diff --git a/.clang-format b/.clang-format
deleted file mode 100644
index 075810c56ec67d2b12c3554a9cc36d6784c8f885..0000000000000000000000000000000000000000
--- a/.clang-format
+++ /dev/null
@@ -1,62 +0,0 @@
-Language: Cpp
-AccessModifierOffset: -4
-AlignAfterOpenBracket: Align
-AllowShortEnumsOnASingleLine: false
-AlignConsecutiveAssignments: true
-AlignConsecutiveDeclarations: true
-AlignEscapedNewlines: Right
-AlignOperands: true
-AlignTrailingComments: true
-AllowAllParametersOfDeclarationOnNextLine: true
-AllowAllArgumentsOnNextLine: true
-AllowShortBlocksOnASingleLine: Empty
-AllowShortCaseLabelsOnASingleLine: false
-AllowShortFunctionsOnASingleLine: Empty
-AllowShortIfStatementsOnASingleLine: Never
-AllowShortLoopsOnASingleLine: false
-AlwaysBreakAfterReturnType: None
-AlwaysBreakBeforeMultilineStrings: false
-AlwaysBreakTemplateDeclarations: true
-BinPackArguments: false
-BinPackParameters: false
-BreakBeforeBinaryOperators: NonAssignment
-BreakBeforeBraces: Stroustrup
-BreakBeforeTernaryOperators: false
-BreakConstructorInitializers: AfterColon
-BreakInheritanceList: AfterColon
-BreakStringLiterals: false
-ColumnLimit: 120
-CompactNamespaces: false
-ConstructorInitializerAllOnOneLineOrOnePerLine: true
-ConstructorInitializerIndentWidth: 4
-ContinuationIndentWidth: 4
-Cpp11BracedListStyle: true
-DerivePointerAlignment: false
-FixNamespaceComments: true
-IndentCaseLabels: true
-IndentPPDirectives: None
-IndentWidth: 4
-IndentWrappedFunctionNames: false
-KeepEmptyLinesAtTheStartOfBlocks: true
-MaxEmptyLinesToKeep: 1
-NamespaceIndentation: None
-PointerAlignment: Left
-ReflowComments: true
-SortIncludes: true
-SortUsingDeclarations: false
-SpaceAfterCStyleCast: false
-SpaceAfterTemplateKeyword: false
-SpaceBeforeAssignmentOperators: true
-SpaceBeforeCtorInitializerColon: false
-SpaceBeforeInheritanceColon: false
-SpaceBeforeParens: ControlStatements
-SpaceInEmptyParentheses: false
-SpacesBeforeTrailingComments: 2
-SpacesInAngles: false
-SpacesInCStyleCastParentheses: false
-SpacesInContainerLiterals: false
-SpacesInParentheses: false
-SpacesInSquareBrackets: false
-Standard: c++17
-TabWidth: 4
-UseTab: Never
diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md
deleted file mode 100644
index 846795ba2b10b195328cc0f00ab5974f677b6e4d..0000000000000000000000000000000000000000
--- a/.github/CONTRIBUTING.md
+++ /dev/null
@@ -1,234 +0,0 @@
-## Contributing to InternLM
-
-Welcome to the InternLM community, all kinds of contributions are welcomed, including but not limited to
-
-**Fix bug**
-
-You can directly post a Pull Request to fix typo in code or documents
-
-The steps to fix the bug of code implementation are as follows.
-
-1. If the modification involve significant changes, you should create an issue first and describe the error information and how to trigger the bug. Other developers will discuss with you and propose an proper solution.
-
-2. Posting a pull request after fixing the bug and adding corresponding unit test.
-
-**New Feature or Enhancement**
-
-1. If the modification involve significant changes, you should create an issue to discuss with our developers to propose an proper design.
-2. Post a Pull Request after implementing the new feature or enhancement and add corresponding unit test.
-
-**Document**
-
-You can directly post a pull request to fix documents. If you want to add a document, you should first create an issue to check if it is reasonable.
-
-### Pull Request Workflow
-
-If you're not familiar with Pull Request, don't worry! The following guidance will tell you how to create a Pull Request step by step. If you want to dive into the develop mode of Pull Request, you can refer to the [official documents](https://docs.github.com/en/github/collaborating-with-issues-and-pull-requests/about-pull-requests)
-
-#### 1. Fork and clone
-
-If you are posting a pull request for the first time, you should fork the OpenMMLab repositories by clicking the **Fork** button in the top right corner of the GitHub page, and the forked repositories will appear under your GitHub profile.
-
-<img src="https://user-images.githubusercontent.com/57566630/167305749-43c7f4e9-449b-4e98-ade5-0c9276d5c9ce.png" width="1200">
-
-Then, you can clone the repositories to local:
-
-```shell
-git clone git@github.com:{username}/lmdeploy.git
-```
-
-After that, you should add official repository as the upstream repository
-
-```bash
-git remote add upstream git@github.com:InternLM/lmdeploy.git
-```
-
-Check whether remote repository has been added successfully by `git remote -v`
-
-```bash
-origin	git@github.com:{username}/lmdeploy.git (fetch)
-origin	git@github.com:{username}/lmdeploy.git (push)
-upstream	git@github.com:InternLM/lmdeploy.git (fetch)
-upstream	git@github.com:InternLM/lmdeploy.git (push)
-```
-
-> Here's a brief introduction to origin and upstream. When we use "git clone", we create an "origin" remote by default, which points to the repository cloned from. As for "upstream", we add it ourselves to point to the target repository. Of course, if you don't like the name "upstream", you could name it as you wish. Usually, we'll push the code to "origin". If the pushed code conflicts with the latest code in official("upstream"), we should pull the latest code from upstream to resolve the conflicts, and then push to "origin" again. The posted Pull Request will be updated automatically.
-
-#### 2. Configure pre-commit
-
-You should configure [pre-commit](https://pre-commit.com/#intro) in the local development environment to make sure the code style matches that of InternLM. **Note**: The following code should be executed under the lmdeploy directory.
-
-```shell
-pip install -U pre-commit
-pre-commit install
-```
-
-Check that pre-commit is configured successfully, and install the hooks defined in `.pre-commit-config.yaml`.
-
-```shell
-pre-commit run --all-files
-```
-
-<img src="https://user-images.githubusercontent.com/57566630/173660750-3df20a63-cb66-4d33-a986-1f643f1d8aaf.png" width="1200">
-
-<img src="https://user-images.githubusercontent.com/57566630/202368856-0465a90d-8fce-4345-918e-67b8b9c82614.png" width="1200">
-
-If the installation process is interrupted, you can repeatedly run `pre-commit run ... ` to continue the installation.
-
-If the code does not conform to the code style specification, pre-commit will raise a warning and  fixes some of the errors automatically.
-
-<img src="https://user-images.githubusercontent.com/57566630/202369176-67642454-0025-4023-a095-263529107aa3.png" width="1200">
-
-If we want to commit our code bypassing the pre-commit hook, we can use the `--no-verify` option(**only for temporarily commit**).
-
-```shell
-git commit -m "xxx" --no-verify
-```
-
-#### 3. Create a development branch
-
-After configuring the pre-commit, we should create a branch based on the master branch to develop the new feature or fix the bug. The proposed branch name is `username/pr_name`
-
-```shell
-git checkout -b yhc/refactor_contributing_doc
-```
-
-In subsequent development, if the master branch of the local repository is behind the master branch of "upstream", we need to pull the upstream for synchronization, and then execute the above command:
-
-```shell
-git pull upstream master
-```
-
-#### 4. Commit the code and pass the unit test
-
-- lmdeploy introduces mypy to do static type checking to increase the robustness of the code. Therefore, we need to add Type Hints to our code and pass the mypy check. If you are not familiar with Type Hints, you can refer to [this tutorial](https://docs.python.org/3/library/typing.html).
-
-- The committed code should pass through the unit test
-
-  ```shell
-  # Pass all unit tests
-  pytest tests
-
-  # Pass the unit test of runner
-  pytest tests/test_runner/test_runner.py
-  ```
-
-  If the unit test fails for lack of dependencies, you can install the dependencies referring to the [guidance](#unit-test)
-
-- If the documents are modified/added, we should check the rendering result referring to [guidance](#document-rendering)
-
-#### 5. Push the code to remote
-
-We could push the local commits to remote after passing through the check of unit test and pre-commit. You can associate the local branch with remote branch by adding `-u` option.
-
-```shell
-git push -u origin {branch_name}
-```
-
-This will allow you to use the `git push` command to push code directly next time, without having to specify a branch or the remote repository.
-
-#### 6. Create a Pull Request
-
-(1) Create a pull request in GitHub's Pull request interface
-
-<img src="https://user-images.githubusercontent.com/57566630/201533288-516f7ac4-0b14-4dc8-afbd-912475c368b5.png" width="1200">
-
-(2) Modify the PR description according to the guidelines so that other developers can better understand your changes
-
-<img src="https://user-images.githubusercontent.com/57566630/202242953-c91a18ff-e388-4ff9-8591-5fae0ead6c1e.png" width="1200">
-
-Find more details about Pull Request description in [pull request guidelines](#pr-specs).
-
-**note**
-
-(a) The Pull Request description should contain the reason for the change, the content of the change, and the impact of the change, and be associated with the relevant Issue (see [documentation](https://docs.github.com/en/issues/tracking-your-work-with-issues/linking-a-pull-request-to-an-issue))
-
-(b) If it is your first contribution, please sign the CLA
-
-<img src="https://user-images.githubusercontent.com/57566630/167307569-a794b967-6e28-4eac-a942-00deb657815f.png" width="1200">
-
-(c) Check whether the Pull Request pass through the CI
-
-<img src="https://user-images.githubusercontent.com/57566630/167307490-f9ebf9fa-63c0-4d83-8ba1-081ea169eb3a.png" width="1200">
-
-IternLM will run unit test for the posted Pull Request on different platforms (Linux, Window, Mac), based on different versions of Python, PyTorch, CUDA to make sure the code is correct. We can see the specific test information by clicking `Details` in the above image so that we can modify the code.
-
-(3) If the Pull Request passes the CI, then you can wait for the review from other developers. You'll modify the code based on the reviewer's comments, and repeat the steps [4](#4-commit-the-code-and-pass-the-unit-test)-[5](#5-push-the-code-to-remote) until all reviewers approve it. Then, we will merge it ASAP.
-
-<img src="https://user-images.githubusercontent.com/57566630/202145400-cc2cd8c4-10b0-472f-ba37-07e6f50acc67.png" width="1200">
-
-#### 7. Resolve conflicts
-
-If your local branch conflicts with the latest master branch of "upstream", you'll need to resolove them. There are two ways to do this:
-
-```shell
-git fetch --all --prune
-git rebase upstream/master
-```
-
-or
-
-```shell
-git fetch --all --prune
-git merge upstream/master
-```
-
-If you are very good at handling conflicts, then you can use rebase to resolve conflicts, as this will keep your commit logs tidy. If you are not familiar with `rebase`, then you can use `merge` to resolve conflicts.
-
-### Guidance
-
-#### Document rendering
-
-If the documents are modified/added, we should check the rendering result. We could install the dependencies and run the following command to render the documents and check the results:
-
-```shell
-pip install -r requirements/docs.txt
-cd docs/zh_cn/
-# or docs/en
-make html
-# check file in ./docs/zh_cn/_build/html/index.html
-```
-
-### Code style
-
-#### Python
-
-We adopt [PEP8](https://www.python.org/dev/peps/pep-0008/) as the preferred code style.
-
-We use the following tools for linting and formatting:
-
-- [flake8](https://github.com/PyCQA/flake8): A wrapper around some linter tools.
-- [isort](https://github.com/timothycrosley/isort): A Python utility to sort imports.
-- [yapf](https://github.com/google/yapf): A formatter for Python files.
-- [codespell](https://github.com/codespell-project/codespell): A Python utility to fix common misspellings in text files.
-- [mdformat](https://github.com/executablebooks/mdformat): Mdformat is an opinionated Markdown formatter that can be used to enforce a consistent style in Markdown files.
-- [docformatter](https://github.com/myint/docformatter): A formatter to format docstring.
-
-We use [pre-commit hook](https://pre-commit.com/) that checks and formats for `flake8`, `yapf`, `isort`, `trailing whitespaces`, `markdown files`,
-fixes `end-of-files`, `double-quoted-strings`, `python-encoding-pragma`, `mixed-line-ending`, sorts `requirments.txt` automatically on every commit.
-The config for a pre-commit hook is stored in [.pre-commit-config](../.pre-commit-config.yaml).
-
-#### C++ and CUDA
-
-The clang-format config is stored in [.clang-format](../.clang-format).
-
-### PR Specs
-
-1. Use [pre-commit](https://pre-commit.com) hook to avoid issues of code style
-
-2. One short-time branch should be matched with only one PR
-
-3. Accomplish a detailed change in one PR. Avoid large PR
-
-   - Bad: Support Faster R-CNN
-   - Acceptable: Add a box head to Faster R-CNN
-   - Good: Add a parameter to box head to support custom conv-layer number
-
-4. Provide clear and significant commit message
-
-5. Provide clear and meaningful PR description
-
-   - Task name should be clarified in title. The general format is: \[Prefix\] Short description of the PR (Suffix)
-   - Prefix: add new feature \[Feature\], fix bug \[Fix\], related to documents \[Docs\], in developing \[WIP\] (which will not be reviewed temporarily)
-   - Introduce main changes, results and influences on other modules in short description
-   - Associate related issues and pull requests with a milestone
diff --git a/.github/ISSUE_TEMPLATE/1-bug-report.yml b/.github/ISSUE_TEMPLATE/1-bug-report.yml
deleted file mode 100644
index 86838836de567f984fc4e0829012a3a702ebb535..0000000000000000000000000000000000000000
--- a/.github/ISSUE_TEMPLATE/1-bug-report.yml
+++ /dev/null
@@ -1,40 +0,0 @@
-name: 🐞 Bug report
-description: Create a report to help us reproduce and fix the bug
-title: "[Bug] "
-labels: ['Bug']
-
-body:
-- type: checkboxes
-  attributes:
-    label: Checklist
-    options:
-    - label: 1. I have searched related issues but cannot get the expected help.
-    - label: 2. The bug has not been fixed in the latest version.
-- type: textarea
-  attributes:
-    label: Describe the bug
-    description: A clear and concise description of what the bug is.
-  validations:
-    required: true
-- type: textarea
-  attributes:
-    label: Reproduction
-    description: |
-      1. What command or script did you run?
-    placeholder: |
-      A placeholder for the command.
-  validations:
-    required: true
-- type: textarea
-  attributes:
-    label: Error traceback
-    description: |
-      If applicable, paste the error trackback here.
-    placeholder: Logs and traceback here.
-    render: Shell
-- type: markdown
-  attributes:
-    value: >
-     If you have already identified the reason, you can provide the information here. If you are willing to create a PR to fix it, please also leave a comment here and that would be much appreciated!
-
-     Thanks for your bug report. We appreciate it a lot.
diff --git a/.github/ISSUE_TEMPLATE/2-feature-request.yml b/.github/ISSUE_TEMPLATE/2-feature-request.yml
deleted file mode 100644
index 976997e14c0b401f7ae58ed9959478880962f62c..0000000000000000000000000000000000000000
--- a/.github/ISSUE_TEMPLATE/2-feature-request.yml
+++ /dev/null
@@ -1,31 +0,0 @@
-name: 🚀 Feature request
-description: Suggest an idea for this project
-title: "[Feature] "
-
-body:
-- type: markdown
-  attributes:
-    value: |
-      We strongly appreciate you creating a PR to implement this feature [here](https://github.com/InternLM/lmdeploy/pulls)!
-      If you need our help, please fill in as much of the following form as you're able to.
-
-      **The less clear the description, the longer it will take to solve it.**
-- type: textarea
-  attributes:
-    label: Motivation
-    description: |
-      A clear and concise description of the motivation of the feature.
-      Ex1. It is inconvenient when \[....\].
-  validations:
-    required: true
-- type: textarea
-  attributes:
-    label: Related resources
-    description: |
-      If there is an official code release or third-party implementations, please also provide the information here, which would be very helpful.
-- type: textarea
-  attributes:
-    label: Additional context
-    description: |
-      Add any other context or screenshots about the feature request here.
-      If you would like to implement the feature and create a PR, please leave a comment here and that would be much appreciated.
diff --git a/.github/ISSUE_TEMPLATE/3-documentation.yml b/.github/ISSUE_TEMPLATE/3-documentation.yml
deleted file mode 100644
index b112c2aea6ad2d5fe68fbfec9bfb2ff43da996e7..0000000000000000000000000000000000000000
--- a/.github/ISSUE_TEMPLATE/3-documentation.yml
+++ /dev/null
@@ -1,23 +0,0 @@
-name: 📚 Documentation
-description: Report an issue related to the documentation.
-labels: "kind/doc,status/unconfirmed"
-title: "[Docs] "
-
-body:
-- type: textarea
-  attributes:
-    label: 📚 The doc issue
-    description: >
-      A clear and concise description the issue.
-  validations:
-    required: true
-
-- type: textarea
-  attributes:
-    label: Suggest a potential alternative/fix
-    description: >
-      Tell us how we could improve the documentation in this regard.
-- type: markdown
-  attributes:
-    value: >
-      Thanks for contributing 🎉!
diff --git a/.github/md-link-config.json b/.github/md-link-config.json
deleted file mode 100644
index 76986cbd01cf43f1bad7effa03409e0c053c4cca..0000000000000000000000000000000000000000
--- a/.github/md-link-config.json
+++ /dev/null
@@ -1,33 +0,0 @@
-{
-  "ignorePatterns": [
-
-    {
-      "pattern": "^https://developer.nvidia.com/"
-    },
-    {
-      "pattern": "^https://docs.openvino.ai/"
-    },
-    {
-      "pattern": "^https://developer.android.com/"
-    },
-    {
-      "pattern": "^https://developer.qualcomm.com/"
-    },
-    {
-      "pattern": "^http://localhost"
-    }
-  ],
-  "httpHeaders": [
-    {
-      "urls": ["https://github.com/", "https://guides.github.com/", "https://help.github.com/", "https://docs.github.com/"],
-      "headers": {
-        "Accept-Encoding": "zstd, br, gzip, deflate"
-      }
-    }
-  ],
-  "timeout": "20s",
-  "retryOn429": true,
-  "retryCount": 5,
-  "fallbackRetryDelay": "30s",
-  "aliveStatusCodes": [200, 206, 429]
-}
diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md
deleted file mode 100644
index addb1f6dd8412d8f77683594b4a5551536e6c0d6..0000000000000000000000000000000000000000
--- a/.github/pull_request_template.md
+++ /dev/null
@@ -1,25 +0,0 @@
-Thanks for your contribution and we appreciate it a lot. The following instructions would make your pull request more healthy and more easily receiving feedbacks. If you do not understand some items, don't worry, just make the pull request and seek help from maintainers.
-
-## Motivation
-
-Please describe the motivation of this PR and the goal you want to achieve through this PR.
-
-## Modification
-
-Please briefly describe what modification is made in this PR.
-
-## BC-breaking (Optional)
-
-Does the modification introduce changes that break the backward-compatibility of the downstream repositories?
-If so, please describe how it breaks the compatibility and how the downstream projects should modify their code to keep compatibility with this PR.
-
-## Use cases (Optional)
-
-If this PR introduces a new feature, it is better to list some use cases here, and update the documentation.
-
-## Checklist
-
-1. Pre-commit or other linting tools are used to fix the potential lint issues.
-2. The modification is covered by complete unit tests. If not, please add more unit tests to ensure the correctness.
-3. If the modification has a dependency on downstream projects of a newer version, this PR should be tested with all supported versions of downstream projects.
-4. The documentation has been modified accordingly, like docstring or example tutorials.
diff --git a/.github/release.yml b/.github/release.yml
deleted file mode 100644
index 4feb7be54a47c72f7e9671bf0d5378cce5e5fb2d..0000000000000000000000000000000000000000
--- a/.github/release.yml
+++ /dev/null
@@ -1,33 +0,0 @@
-changelog:
-  categories:
-    - title: 🚀 Features
-      labels:
-        - feature
-        - enhancement
-    - title: 💥 Improvements
-      labels:
-        - improvement
-    - title: 🐞 Bug fixes
-      labels:
-        - bug
-        - Bug:P0
-        - Bug:P1
-        - Bug:P2
-        - Bug:P3
-    - title: 📚 Documentations
-      labels:
-        - documentation
-    - title: 🌐 Other
-      labels:
-        - '*'
-      exclude:
-        labels:
-          - feature
-          - enhancement
-          - improvement
-          - bug
-          - documentation
-          - Bug:P0
-          - Bug:P1
-          - Bug:P2
-          - Bug:P3
diff --git a/.github/scripts/doc_link_checker.py b/.github/scripts/doc_link_checker.py
deleted file mode 100644
index 3933360c88f5e1cbe46d93886c4494c165e98409..0000000000000000000000000000000000000000
--- a/.github/scripts/doc_link_checker.py
+++ /dev/null
@@ -1,86 +0,0 @@
-# Copyright (c) MegFlow. All rights reserved.
-# /bin/python3
-
-import argparse
-import os
-import re
-
-
-def make_parser():
-    parser = argparse.ArgumentParser('Doc link checker')
-    parser.add_argument('--http',
-                        default=False,
-                        type=bool,
-                        help='check http or not ')
-    parser.add_argument('--target',
-                        default='./docs',
-                        type=str,
-                        help='the directory or file to check')
-    return parser
-
-
-pattern = re.compile(r'\[.*?\]\(.*?\)')
-
-
-def analyze_doc(home, path):
-    print('analyze {}'.format(path))
-    problem_list = []
-    code_block = 0
-    with open(path) as f:
-        lines = f.readlines()
-        for line in lines:
-            line = line.strip()
-            if line.startswith('```'):
-                code_block = 1 - code_block
-
-            if code_block > 0:
-                continue
-
-            if '[' in line and ']' in line and '(' in line and ')' in line:
-                all = pattern.findall(line)
-                for item in all:
-                    # skip  ![]()
-                    if item.find('[') == item.find(']') - 1:
-                        continue
-
-                    # process the case [text()]()
-                    offset = item.find('](')
-                    if offset == -1:
-                        continue
-                    item = item[offset:]
-                    start = item.find('(')
-                    end = item.find(')')
-                    ref = item[start + 1:end]
-
-                    if ref.startswith('http') or ref.startswith('#'):
-                        continue
-                    if '.md#' in ref:
-                        ref = ref[ref.find('#'):]
-                    fullpath = os.path.join(home, ref)
-                    if not os.path.exists(fullpath):
-                        problem_list.append(ref)
-            else:
-                continue
-    if len(problem_list) > 0:
-        print(f'{path}:')
-        for item in problem_list:
-            print(f'\t {item}')
-        print('\n')
-        raise Exception('found link error')
-
-
-def traverse(target):
-    if os.path.isfile(target):
-        analyze_doc(os.path.dirname(target), target)
-        return
-    for home, dirs, files in os.walk(target):
-        for filename in files:
-            if filename.endswith('.md'):
-                path = os.path.join(home, filename)
-                if os.path.islink(path) is False:
-                    analyze_doc(home, path)
-
-
-if __name__ == '__main__':
-    args = make_parser().parse_args()
-    traverse(args.target)
diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml
deleted file mode 100644
index dc511080783059e5575b2c9d5782c51f1270dbeb..0000000000000000000000000000000000000000
--- a/.github/workflows/docker.yml
+++ /dev/null
@@ -1,62 +0,0 @@
-name: publish-docker
-
-on:
-  push:
-    paths-ignore:
-      - "!.github/workflows/docker.yml"
-      - ".github/**"
-      - "docs/**"
-      - "resources/**"
-      - "benchmark/**"
-      - "tests/**"
-      - "**/*.md"
-    branches:
-      - main
-    tags:
-      - "v*.*.*"
-
-jobs:
-  publish_docker_image:
-    runs-on: ubuntu-latest
-    environment: 'prod'
-    env:
-      TAG_PREFIX: "openmmlab/lmdeploy"
-    steps:
-      - name: Checkout repository
-        uses: actions/checkout@v3
-      - name: Free disk space
-        uses: jlumbroso/free-disk-space@main
-        with:
-          # This might remove tools that are actually needed, if set to "true" but frees about 6 GB
-          tool-cache: false
-          docker-images: false
-          # All of these default to true, but feel free to set to "false" if necessary for your workflow
-          android: true
-          dotnet: true
-          haskell: true
-          large-packages: true
-          swap-storage: false
-      - name: Get docker info
-        run: |
-          docker info
-      - name: Login to Docker Hub
-        uses: docker/login-action@v2
-        with:
-          username: ${{ secrets.DOCKERHUB_USERNAME }}
-          password: ${{ secrets.DOCKERHUB_TOKEN }}
-      - name: Build and push the latest Docker image
-        run: |
-          export TAG=$TAG_PREFIX:latest
-          echo $TAG
-          docker build docker/ -t ${TAG} --no-cache
-          docker push $TAG
-          echo "TAG=${TAG}" >> $GITHUB_ENV
-      - name: Push docker image with released tag
-        if: startsWith(github.ref, 'refs/tags/') == true
-        run: |
-          export LMDEPLOY_VERSION=$(python3 -c "import sys; sys.path.append('lmdeploy');from version import __version__;print(__version__)")
-          echo $LMDEPLOY_VERSION
-          export RELEASE_TAG=${TAG_PREFIX}:v${LMDEPLOY_VERSION}
-          echo $RELEASE_TAG
-          docker tag $TAG $RELEASE_TAG
-          docker push $RELEASE_TAG
diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
deleted file mode 100644
index a6f2e6374a41ba21cf2eb7cb5c7554b22ceaae86..0000000000000000000000000000000000000000
--- a/.github/workflows/lint.yml
+++ /dev/null
@@ -1,46 +0,0 @@
-name: lint
-
-on: [push, pull_request]
-
-jobs:
-  lint:
-    runs-on: ubuntu-20.04
-    steps:
-      - uses: actions/checkout@v2
-      - name: Set up Python 3.8
-        uses: actions/setup-python@v2
-        with:
-          python-version: 3.8
-      - name: Install pre-commit hook
-        run: |
-          python -m pip install pre-commit
-          pre-commit install
-      - name: Linting
-        run: pre-commit run --all-files
-      - name: Format c/cuda codes with clang-format
-        uses: DoozyX/clang-format-lint-action@v0.13
-        with:
-          source: src
-          extensions: h,c,cpp,hpp,cu,cuh
-          clangFormatVersion: 11
-          style: file
-      - name: Check markdown link
-        uses: gaurav-nelson/github-action-markdown-link-check@v1
-        with:
-          use-quiet-mode: 'yes'
-          use-verbose-mode: 'yes'
-#          check-modified-files-only: 'yes'
-          config-file: '.github/md-link-config.json'
-          file-path: './README.md, ./LICENSE, ./README_zh-CN.md'
-      - name: Check doc link
-        run: |
-          python .github/scripts/doc_link_checker.py --target README_zh-CN.md
-          python .github/scripts/doc_link_checker.py --target README.md
-      - name: Check docstring coverage
-        run: |
-          python -m pip install interrogate
-          interrogate -v --ignore-init-method --ignore-magic --ignore-module --ignore-private --ignore-nested-functions --ignore-nested-classes --fail-under 80 lmdeploy
-      - name: Check pylint score
-        run: |
-          python -m pip install pylint
-          pylint lmdeploy
diff --git a/.github/workflows/linux-x64-gpu.yml b/.github/workflows/linux-x64-gpu.yml
deleted file mode 100644
index d940408ce7536cbdbf082528d00da79c01f81571..0000000000000000000000000000000000000000
--- a/.github/workflows/linux-x64-gpu.yml
+++ /dev/null
@@ -1,56 +0,0 @@
-name: linux-x64-gpu
-on:
-  push:
-    paths:
-      - '.github/workflows/linux-x64-gpu.yml'
-      - 'src/**'
-      - 'CMakeLists.txt'
-      - 'cmake/**'
-      - 'examples/**'
-      - '3rdparty/**'
-      - 'tests/csrc/**'
-  pull_request:
-    paths:
-      - '.github/workflows/linux-x64-gpu.yml'
-      - 'src/**'
-      - 'CMakeLists.txt'
-      - 'cmake/**'
-      - 'examples/**'
-      - '3rdparty/**'
-      - 'tests/csrc/**'
-concurrency:
-  group: linux-x64-gpu-${{ github.ref }}
-  cancel-in-progress: true
-permissions:
-  contents: read
-
-jobs:
-  cuda-118:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Free disk space
-        uses: jlumbroso/free-disk-space@main
-        with:
-          # This might remove tools that are actually needed, if set to "true" but frees about 6 GB
-          tool-cache: false
-          docker-images: false
-          # All of these default to true, but feel free to set to "false" if necessary for your workflow
-          android: true
-          dotnet: true
-          haskell: true
-          large-packages: true
-          swap-storage: false
-      - name: Checkout repository
-        uses: actions/checkout@v3
-      - name: Build
-        uses: addnab/docker-run-action@v3
-        with:
-          image: openmmlab/lmdeploy-builder:cuda11.8
-          options: -v ${{ github.workspace }}:/work --cpus=1.8
-          run: |
-            cd /work
-            source /opt/conda/bin/activate
-            conda activate py38
-            mkdir build && cd build
-            bash ../generate.sh
-            make -j$(nproc) && make install
diff --git a/.github/workflows/pypi.yml b/.github/workflows/pypi.yml
deleted file mode 100644
index 7c56e08f7d4ac0009bd96ac73fa73dd1a3437aa6..0000000000000000000000000000000000000000
--- a/.github/workflows/pypi.yml
+++ /dev/null
@@ -1,109 +0,0 @@
-name: publish to pypi
-
-on:
-  push:
-    branches:
-      - main
-    paths:
-      - "lmdeploy/version.py"
-  workflow_dispatch:
-
-
-jobs:
-  linux-build:
-    strategy:
-      matrix:
-        pyver: [py38, py39, py310, py311]
-    runs-on: ubuntu-latest
-    env:
-      PYTHON_VERSION: ${{ matrix.pyver }}
-      PLAT_NAME: manylinux2014_x86_64
-      DOCKER_TAG: cuda11.8
-      OUTPUT_FOLDER: cuda11.8_dist
-    steps:
-      - name: Free disk space
-        uses: jlumbroso/free-disk-space@main
-        with:
-          # This might remove tools that are actually needed, if set to "true" but frees about 6 GB
-          tool-cache: false
-          docker-images: false
-          # All of these default to true, but feel free to set to "false" if necessary for your workflow
-          android: true
-          dotnet: true
-          haskell: true
-          large-packages: true
-          swap-storage: false
-      - name: Checkout repository
-        uses: actions/checkout@v3
-      - name: Build
-        run: |
-          echo ${PYTHON_VERSION}
-          echo ${PLAT_NAME}
-          echo ${DOCKER_TAG}
-          echo ${OUTPUT_FOLDER}
-          # remove -it
-          sed -i 's/docker run --rm -it/docker run --rm/g' builder/manywheel/build_wheel.sh
-          bash builder/manywheel/build_wheel.sh ${PYTHON_VERSION} ${PLAT_NAME} ${DOCKER_TAG} ${OUTPUT_FOLDER}
-      - name: Upload Artifacts
-        uses: actions/upload-artifact@v3
-        with:
-          if-no-files-found: error
-          path: builder/manywheel/${{ env.OUTPUT_FOLDER }}/*
-          retention-days: 1
-
-  windows-build:
-    strategy:
-      matrix:
-        pyver: ['3.8', '3.9', '3.10', '3.11']
-    runs-on: windows-latest
-    steps:
-      - name: Checkout repository
-        uses: actions/checkout@v3
-      - name: Set up python
-        uses: actions/setup-python@v4
-        with:
-          python-version: ${{ matrix.pyver }}
-      - name: Install python packages
-        run: |
-          pip install pybind11 wheel
-      - uses: Jimver/cuda-toolkit@v0.2.11
-        id: cuda-toolkit
-        with:
-          cuda: '11.8.0'
-          use-github-cache: false
-      - name: Build wheel
-        run: |
-          mkdir build
-          cd build
-          ..\builder\windows\generate.ps1
-          cmake --build . --config Release -- /m > build.log.txt
-          cmake --install . --config Release
-          cd ..
-          rm build -Force -Recurse
-          python setup.py bdist_wheel -d build/wheel
-      - name: Upload Artifacts
-        uses: actions/upload-artifact@v3
-        with:
-          if-no-files-found: error
-          path: build/wheel/*
-          retention-days: 1
-
-  publish:
-    runs-on: ubuntu-latest
-    environment: 'prod'
-    needs:
-      - linux-build
-      - windows-build
-    steps:
-      - name: Download artifacts
-        uses: actions/download-artifact@v3
-      - name: Display artifacts
-        run: ls artifact/ -lh
-      - name: Set up python3.8
-        uses: actions/setup-python@v4
-        with:
-          python-version: '3.8'
-      - name: Upload to pypi
-        run: |
-          pip install twine
-          twine upload artifact/* -u __token__ -p ${{ secrets.pypi_password }}
diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml
deleted file mode 100644
index 93df0cd47c7ddef6870ae3e4d0e0acfd55f792a6..0000000000000000000000000000000000000000
--- a/.github/workflows/stale.yml
+++ /dev/null
@@ -1,32 +0,0 @@
-name: 'Close stale issues and PRs'
-
-on:
-  schedule:
-    # check issue and pull request once at 01:30 a.m. every day
-    - cron: '30 1 * * *'
-
-permissions:
-  contents: read
-
-jobs:
-  stale:
-    permissions:
-      issues: write
-      pull-requests: write
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/stale@v7
-        with:
-          stale-issue-message: 'This issue is marked as stale because it has been marked as invalid or awaiting response for 7 days without any further response. It will be closed in 5 days if the stale label is not removed or if there is no further response.'
-          stale-pr-message: 'This PR is marked as stale because there has been no activity in the past 45 days. It will be closed in 10 days if the stale label is not removed or if there is no further updates.'
-          close-issue-message: 'This issue is closed because it has been stale for 5 days. Please open a new issue if you have similar issues or you have any new updates now.'
-          close-pr-message: 'This PR is closed because it has been stale for 10 days. Please reopen this PR if you have any updates and want to keep contributing the code.'
-          # only issues/PRS with following labels are checked
-          any-of-labels: 'invalid, awaiting response, duplicate'
-          days-before-issue-stale: 7
-          days-before-pr-stale: 45
-          days-before-issue-close: 5
-          days-before-pr-close: 10
-          # automatically remove the stale label when the issues or the pull requests are updated or commented
-          remove-stale-when-updated: true
-          operations-per-run: 50
diff --git a/.github/workflows/windows-x64-gpu.yml b/.github/workflows/windows-x64-gpu.yml
deleted file mode 100644
index 93839cfb89b07d47e80fb6d30305028caf92e93a..0000000000000000000000000000000000000000
--- a/.github/workflows/windows-x64-gpu.yml
+++ /dev/null
@@ -1,60 +0,0 @@
-name: windows-x64-gpu
-on:
-  push:
-    paths:
-      - '.github/workflows/windows-x64-gpu.yml'
-      - 'src/**'
-      - 'CMakeLists.txt'
-      - 'cmake/**'
-      - 'examples/**'
-      - '3rdparty/**'
-      - 'tests/csrc/**'
-  pull_request:
-    paths:
-      - '.github/workflows/windows-x64-gpu.yml'
-      - 'src/**'
-      - 'CMakeLists.txt'
-      - 'cmake/**'
-      - 'examples/**'
-      - '3rdparty/**'
-      - 'tests/csrc/**'
-concurrency:
-  group: windows-x64-gpu-${{ github.ref }}
-  cancel-in-progress: true
-permissions:
-  contents: read
-
-jobs:
-  cuda-118:
-    runs-on: windows-latest
-    steps:
-      - name: Checkout repository
-        uses: actions/checkout@v3
-      - name: Set up python
-        uses: actions/setup-python@v4
-        with:
-          python-version: '3.8'
-      - name: Install python packages
-        run: |
-          pip install pybind11 wheel
-      - uses: Jimver/cuda-toolkit@v0.2.11
-        id: cuda-toolkit
-        with:
-          cuda: '11.8.0'
-          use-github-cache: false
-      - name: Build wheel
-        run: |
-          ((Get-Content -path CMakeLists.txt -Raw) -replace '-Wall','/W0') | Set-Content CMakeLists.txt
-          $env:BUILD_TEST="ON"
-          mkdir build
-          cd build
-          ..\builder\windows\generate.ps1
-          cmake --build . --config Release -- /m /v:q
-          if (-Not $?) {
-            echo "build failed"
-            exit 1
-          }
-          cmake --install . --config Release
-          cd ..
-          rm build -Force -Recurse
-          python setup.py bdist_wheel -d build/wheel
diff --git a/.gitignore b/.gitignore
deleted file mode 100644
index ccfad036dcd04f55b0eac63fb40377e15509f409..0000000000000000000000000000000000000000
--- a/.gitignore
+++ /dev/null
@@ -1,74 +0,0 @@
-# Byte-compiled / optimized / DLL files
-__pycache__/
-*.py[cod]
-*$py.class
-.vscode/
-.idea/
-# C extensions
-*.so
-
-# Distribution / packaging
-.Python
-develop-eggs/
-dist/
-downloads/
-eggs/
-.eggs/
-lib/
-lib64/
-parts/
-sdist/
-var/
-wheels/
-*.egg-info/
-.installed.cfg
-*.egg
-MANIFEST
-
-# PyInstaller
-#  Usually these files are written by a python script from a template
-#  before PyInstaller builds the exe, so as to inject date/other infos into it.
-*.manifest
-*.spec
-
-# Installer logs
-pip-log.txt
-pip-delete-this-directory.txt
-
-# Unit test / coverage reports
-htmlcov/
-.tox/
-.coverage
-.coverage.*
-.cache
-*build*/
-!builder/
-lmdeploy/lib/
-lmdeploy/bin/
-dist/
-examples/cpp/llama/*.csv
-*.npy
-*.weight
-
-# LMDeploy
-workspace/
-work_dir*/
-
-# Huggingface
-*.bin
-*config.json
-*generate_config.json
-
-# Pytorch
-*.pth
-*.py~
-*.sh~
-*.pyc
-**/src/pytorch-sphinx-theme/
-
-# Outputs and logs
-*.txt
-*.log
-*.out
-*.csv
-*.pkl
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
deleted file mode 100644
index fb5f4d1b36481da818614029b8fa51c696dfe9d8..0000000000000000000000000000000000000000
--- a/.pre-commit-config.yaml
+++ /dev/null
@@ -1,53 +0,0 @@
-repos:
-  - repo: https://github.com/PyCQA/flake8
-    rev: 4.0.1
-    hooks:
-      - id: flake8
-        args: ["--exclude=lmdeploy/turbomind/triton_models/*"]
-  - repo: https://github.com/PyCQA/isort
-    rev: 5.11.5
-    hooks:
-      - id: isort
-  - repo: https://github.com/pre-commit/mirrors-yapf
-    rev: v0.32.0
-    hooks:
-      - id: yapf
-  - repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v4.2.0
-    hooks:
-      - id: trailing-whitespace
-      - id: check-yaml
-      - id: end-of-file-fixer
-      - id: requirements-txt-fixer
-      - id: double-quote-string-fixer
-      - id: check-merge-conflict
-      - id: fix-encoding-pragma
-        args: ["--remove"]
-      - id: mixed-line-ending
-        args: ["--fix=lf"]
-  - repo: https://github.com/executablebooks/mdformat
-    rev: 0.7.9
-    hooks:
-      - id: mdformat
-        args: ["--number"]
-        additional_dependencies:
-          - mdformat-openmmlab
-          - mdformat_frontmatter
-          - linkify-it-py
-  - repo: https://github.com/codespell-project/codespell
-    rev: v2.1.0
-    hooks:
-      - id: codespell
-        args: ["--skip=third_party/*,*.ipynb,*.proto"]
-
-  - repo: https://github.com/myint/docformatter
-    rev: v1.4
-    hooks:
-      - id: docformatter
-        args: ["--in-place", "--wrap-descriptions", "79"]
-
-  - repo: https://github.com/open-mmlab/pre-commit-hooks
-    rev: v0.2.0
-    hooks:
-    -   id: check-copyright
-        args: ["lmdeploy"]
diff --git a/.pylintrc b/.pylintrc
deleted file mode 100644
index d46a1c7f31f05969869ec52d7508f70130d40e32..0000000000000000000000000000000000000000
--- a/.pylintrc
+++ /dev/null
@@ -1,625 +0,0 @@
-[MASTER]
-
-# A comma-separated list of package or module names from where C extensions may
-# be loaded. Extensions are loading into the active Python interpreter and may
-# run arbitrary code.
-extension-pkg-whitelist=
-
-# Specify a score threshold to be exceeded before program exits with error.
-fail-under=8.5
-
-# Add files or directories to the blacklist. They should be base names, not
-# paths.
-ignore=CVS,configs
-
-# Add files or directories matching the regex patterns to the blacklist. The
-# regex matches against base names, not paths.
-ignore-patterns=
-
-# Python code to execute, usually for sys.path manipulation such as
-# pygtk.require().
-#init-hook=
-
-# Use multiple processes to speed up Pylint. Specifying 0 will auto-detect the
-# number of processors available to use.
-jobs=1
-
-# Control the amount of potential inferred values when inferring a single
-# object. This can help the performance when dealing with large functions or
-# complex, nested conditions.
-limit-inference-results=100
-
-# List of plugins (as comma separated values of python module names) to load,
-# usually to register additional checkers.
-load-plugins=
-
-# Pickle collected data for later comparisons.
-persistent=yes
-
-# When enabled, pylint would attempt to guess common misconfiguration and emit
-# user-friendly hints instead of false-positive error messages.
-suggestion-mode=yes
-
-# Allow loading of arbitrary C extensions. Extensions are imported into the
-# active Python interpreter and may run arbitrary code.
-unsafe-load-any-extension=no
-
-
-[MESSAGES CONTROL]
-
-# Only show warnings with the listed confidence levels. Leave empty to show
-# all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED.
-confidence=
-
-# Disable the message, report, category or checker with the given id(s). You
-# can either give multiple identifiers separated by comma (,) or put this
-# option multiple times (only on the command line, not in the configuration
-# file where it should appear only once). You can also use "--disable=all" to
-# disable everything first and then reenable specific checks. For example, if
-# you want to run only the similarities checker, you can use "--disable=all
-# --enable=similarities". If you want to run only the classes checker, but have
-# no Warning level messages displayed, use "--disable=all --enable=classes
-# --disable=W".
-disable=print-statement,
-        parameter-unpacking,
-        unpacking-in-except,
-        old-raise-syntax,
-        backtick,
-        long-suffix,
-        old-ne-operator,
-        old-octal-literal,
-        import-star-module-level,
-        non-ascii-bytes-literal,
-        raw-checker-failed,
-        bad-inline-option,
-        locally-disabled,
-        file-ignored,
-        suppressed-message,
-        useless-suppression,
-        deprecated-pragma,
-        use-symbolic-message-instead,
-        apply-builtin,
-        basestring-builtin,
-        buffer-builtin,
-        cmp-builtin,
-        coerce-builtin,
-        execfile-builtin,
-        file-builtin,
-        long-builtin,
-        raw_input-builtin,
-        reduce-builtin,
-        standarderror-builtin,
-        unicode-builtin,
-        xrange-builtin,
-        coerce-method,
-        delslice-method,
-        getslice-method,
-        setslice-method,
-        no-absolute-import,
-        old-division,
-        dict-iter-method,
-        dict-view-method,
-        next-method-called,
-        metaclass-assignment,
-        indexing-exception,
-        raising-string,
-        reload-builtin,
-        oct-method,
-        hex-method,
-        nonzero-method,
-        cmp-method,
-        input-builtin,
-        round-builtin,
-        intern-builtin,
-        unichr-builtin,
-        map-builtin-not-iterating,
-        zip-builtin-not-iterating,
-        range-builtin-not-iterating,
-        filter-builtin-not-iterating,
-        using-cmp-argument,
-        eq-without-hash,
-        div-method,
-        idiv-method,
-        rdiv-method,
-        exception-message-attribute,
-        invalid-str-codec,
-        sys-max-int,
-        bad-python3-import,
-        deprecated-string-function,
-        deprecated-str-translate-call,
-        deprecated-itertools-function,
-        deprecated-types-field,
-        next-method-defined,
-        dict-items-not-iterating,
-        dict-keys-not-iterating,
-        dict-values-not-iterating,
-        deprecated-operator-function,
-        deprecated-urllib-function,
-        xreadlines-attribute,
-        deprecated-sys-function,
-        exception-escape,
-        comprehension-escape,
-        no-member,
-        invalid-name,
-        too-many-branches,
-        wrong-import-order,
-        too-many-arguments,
-        missing-function-docstring,
-        missing-module-docstring,
-        too-many-locals,
-        too-few-public-methods,
-        abstract-method,
-        broad-except,
-        too-many-nested-blocks,
-        too-many-instance-attributes,
-        missing-class-docstring,
-        duplicate-code,
-        not-callable,
-        protected-access,
-        dangerous-default-value,
-        no-name-in-module,
-        logging-fstring-interpolation,
-        super-init-not-called,
-        redefined-builtin,
-        attribute-defined-outside-init,
-        arguments-differ,
-        cyclic-import,
-        bad-super-call,
-        too-many-statements,
-        unused-argument,
-        import-outside-toplevel,
-        import-error,
-        super-with-arguments
-
-# Enable the message, report, category or checker with the given id(s). You can
-# either give multiple identifier separated by comma (,) or put this option
-# multiple time (only on the command line, not in the configuration file where
-# it should appear only once). See also the "--disable" option for examples.
-enable=c-extension-no-member
-
-
-[REPORTS]
-
-# Python expression which should return a score less than or equal to 10. You
-# have access to the variables 'error', 'warning', 'refactor', and 'convention'
-# which contain the number of messages in each category, as well as 'statement'
-# which is the total number of statements analyzed. This score is used by the
-# global evaluation report (RP0004).
-evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10)
-
-# Template used to display messages. This is a python new-style format string
-# used to format the message information. See doc for all details.
-#msg-template=
-
-# Set the output format. Available formats are text, parseable, colorized, json
-# and msvs (visual studio). You can also give a reporter class, e.g.
-# mypackage.mymodule.MyReporterClass.
-output-format=text
-
-# Tells whether to display a full report or only the messages.
-reports=yes
-
-# Activate the evaluation score.
-score=yes
-
-
-[REFACTORING]
-
-# Maximum number of nested blocks for function / method body
-max-nested-blocks=5
-
-# Complete name of functions that never returns. When checking for
-# inconsistent-return-statements if a never returning function is called then
-# it will be considered as an explicit return statement and no message will be
-# printed.
-never-returning-functions=sys.exit
-
-
-[TYPECHECK]
-
-# List of decorators that produce context managers, such as
-# contextlib.contextmanager. Add to this list to register other decorators that
-# produce valid context managers.
-contextmanager-decorators=contextlib.contextmanager
-
-# List of members which are set dynamically and missed by pylint inference
-# system, and so shouldn't trigger E1101 when accessed. Python regular
-# expressions are accepted.
-generated-members=
-
-# Tells whether missing members accessed in mixin class should be ignored. A
-# mixin class is detected if its name ends with "mixin" (case insensitive).
-ignore-mixin-members=yes
-
-# Tells whether to warn about missing members when the owner of the attribute
-# is inferred to be None.
-ignore-none=yes
-
-# This flag controls whether pylint should warn about no-member and similar
-# checks whenever an opaque object is returned when inferring. The inference
-# can return multiple potential results while evaluating a Python object, but
-# some branches might not be evaluated, which results in partial inference. In
-# that case, it might be useful to still emit no-member and other checks for
-# the rest of the inferred objects.
-ignore-on-opaque-inference=yes
-
-# List of class names for which member attributes should not be checked (useful
-# for classes with dynamically set attributes). This supports the use of
-# qualified names.
-ignored-classes=optparse.Values,thread._local,_thread._local
-
-# List of module names for which member attributes should not be checked
-# (useful for modules/projects where namespaces are manipulated during runtime
-# and thus existing member attributes cannot be deduced by static analysis). It
-# supports qualified module names, as well as Unix pattern matching.
-ignored-modules=
-
-# Show a hint with possible names when a member name was not found. The aspect
-# of finding the hint is based on edit distance.
-missing-member-hint=yes
-
-# The minimum edit distance a name should have in order to be considered a
-# similar match for a missing member name.
-missing-member-hint-distance=1
-
-# The total number of similar names that should be taken in consideration when
-# showing a hint for a missing member.
-missing-member-max-choices=1
-
-# List of decorators that change the signature of a decorated function.
-signature-mutators=
-
-
-[SPELLING]
-
-# Limits count of emitted suggestions for spelling mistakes.
-max-spelling-suggestions=4
-
-# Spelling dictionary name. Available dictionaries: none. To make it work,
-# install the python-enchant package.
-spelling-dict=
-
-# List of comma separated words that should not be checked.
-spelling-ignore-words=
-
-# A path to a file that contains the private dictionary; one word per line.
-spelling-private-dict-file=
-
-# Tells whether to store unknown words to the private dictionary (see the
-# --spelling-private-dict-file option) instead of raising a message.
-spelling-store-unknown-words=no
-
-
-[LOGGING]
-
-# The type of string formatting that logging methods do. `old` means using %
-# formatting, `new` is for `{}` formatting.
-logging-format-style=old
-
-# Logging modules to check that the string format arguments are in logging
-# function parameter format.
-logging-modules=logging
-
-
-[VARIABLES]
-
-# List of additional names supposed to be defined in builtins. Remember that
-# you should avoid defining new builtins when possible.
-additional-builtins=
-
-# Tells whether unused global variables should be treated as a violation.
-allow-global-unused-variables=yes
-
-# List of strings which can identify a callback function by name. A callback
-# name must start or end with one of those strings.
-callbacks=cb_,
-          _cb
-
-# A regular expression matching the name of dummy variables (i.e. expected to
-# not be used).
-dummy-variables-rgx=_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)|dummy|^ignored_|^unused_
-
-# Argument names that match this expression will be ignored. Default to name
-# with leading underscore.
-ignored-argument-names=_.*|^ignored_|^unused_
-
-# Tells whether we should check for unused import in __init__ files.
-init-import=no
-
-# List of qualified module names which can have objects that can redefine
-# builtins.
-redefining-builtins-modules=six.moves,past.builtins,future.builtins,builtins,io
-
-
-[FORMAT]
-
-# Expected format of line ending, e.g. empty (any line ending), LF or CRLF.
-expected-line-ending-format=
-
-# Regexp for a line that is allowed to be longer than the limit.
-ignore-long-lines=^\s*(# )?<?https?://\S+>?$
-
-# Number of spaces of indent required inside a hanging or continued line.
-indent-after-paren=4
-
-# String used as indentation unit. This is usually "    " (4 spaces) or "\t" (1
-# tab).
-indent-string='    '
-
-# Maximum number of characters on a single line.
-max-line-length=100
-
-# Maximum number of lines in a module.
-max-module-lines=1000
-
-# Allow the body of a class to be on the same line as the declaration if body
-# contains single statement.
-single-line-class-stmt=no
-
-# Allow the body of an if to be on the same line as the test if there is no
-# else.
-single-line-if-stmt=no
-
-
-[STRING]
-
-# This flag controls whether inconsistent-quotes generates a warning when the
-# character used as a quote delimiter is used inconsistently within a module.
-check-quote-consistency=no
-
-# This flag controls whether the implicit-str-concat should generate a warning
-# on implicit string concatenation in sequences defined over several lines.
-check-str-concat-over-line-jumps=no
-
-
-[SIMILARITIES]
-
-# Ignore comments when computing similarities.
-ignore-comments=yes
-
-# Ignore docstrings when computing similarities.
-ignore-docstrings=yes
-
-# Ignore imports when computing similarities.
-ignore-imports=no
-
-# Minimum lines number of a similarity.
-min-similarity-lines=4
-
-
-[MISCELLANEOUS]
-
-# List of note tags to take in consideration, separated by a comma.
-notes=FIXME,
-      XXX,
-      TODO
-
-# Regular expression of note tags to take in consideration.
-#notes-rgx=
-
-
-[BASIC]
-
-# Naming style matching correct argument names.
-argument-naming-style=snake_case
-
-# Regular expression matching correct argument names. Overrides argument-
-# naming-style.
-#argument-rgx=
-
-# Naming style matching correct attribute names.
-attr-naming-style=snake_case
-
-# Regular expression matching correct attribute names. Overrides attr-naming-
-# style.
-#attr-rgx=
-
-# Bad variable names which should always be refused, separated by a comma.
-bad-names=foo,
-          bar,
-          baz,
-          toto,
-          tutu,
-          tata
-
-# Bad variable names regexes, separated by a comma. If names match any regex,
-# they will always be refused
-bad-names-rgxs=
-
-# Naming style matching correct class attribute names.
-class-attribute-naming-style=any
-
-# Regular expression matching correct class attribute names. Overrides class-
-# attribute-naming-style.
-#class-attribute-rgx=
-
-# Naming style matching correct class names.
-class-naming-style=PascalCase
-
-# Regular expression matching correct class names. Overrides class-naming-
-# style.
-#class-rgx=
-
-# Naming style matching correct constant names.
-const-naming-style=UPPER_CASE
-
-# Regular expression matching correct constant names. Overrides const-naming-
-# style.
-#const-rgx=
-
-# Minimum line length for functions/classes that require docstrings, shorter
-# ones are exempt.
-docstring-min-length=-1
-
-# Naming style matching correct function names.
-function-naming-style=snake_case
-
-# Regular expression matching correct function names. Overrides function-
-# naming-style.
-#function-rgx=
-
-# Good variable names which should always be accepted, separated by a comma.
-good-names=i,
-           j,
-           k,
-           ex,
-           Run,
-           _,
-           x,
-           y,
-           w,
-           h,
-           a,
-           b
-
-# Good variable names regexes, separated by a comma. If names match any regex,
-# they will always be accepted
-good-names-rgxs=
-
-# Include a hint for the correct naming format with invalid-name.
-include-naming-hint=no
-
-# Naming style matching correct inline iteration names.
-inlinevar-naming-style=any
-
-# Regular expression matching correct inline iteration names. Overrides
-# inlinevar-naming-style.
-#inlinevar-rgx=
-
-# Naming style matching correct method names.
-method-naming-style=snake_case
-
-# Regular expression matching correct method names. Overrides method-naming-
-# style.
-#method-rgx=
-
-# Naming style matching correct module names.
-module-naming-style=snake_case
-
-# Regular expression matching correct module names. Overrides module-naming-
-# style.
-#module-rgx=
-
-# Colon-delimited sets of names that determine each other's naming style when
-# the name regexes allow several styles.
-name-group=
-
-# Regular expression which should only match function or class names that do
-# not require a docstring.
-no-docstring-rgx=^_
-
-# List of decorators that produce properties, such as abc.abstractproperty. Add
-# to this list to register other decorators that produce valid properties.
-# These decorators are taken in consideration only for invalid-name.
-property-classes=abc.abstractproperty
-
-# Naming style matching correct variable names.
-variable-naming-style=snake_case
-
-# Regular expression matching correct variable names. Overrides variable-
-# naming-style.
-#variable-rgx=
-
-
-[DESIGN]
-
-# Maximum number of arguments for function / method.
-max-args=5
-
-# Maximum number of attributes for a class (see R0902).
-max-attributes=7
-
-# Maximum number of boolean expressions in an if statement (see R0916).
-max-bool-expr=5
-
-# Maximum number of branch for function / method body.
-max-branches=12
-
-# Maximum number of locals for function / method body.
-max-locals=15
-
-# Maximum number of parents for a class (see R0901).
-max-parents=7
-
-# Maximum number of public methods for a class (see R0904).
-max-public-methods=20
-
-# Maximum number of return / yield for function / method body.
-max-returns=6
-
-# Maximum number of statements in function / method body.
-max-statements=50
-
-# Minimum number of public methods for a class (see R0903).
-min-public-methods=2
-
-
-[IMPORTS]
-
-# List of modules that can be imported at any level, not just the top level
-# one.
-allow-any-import-level=
-
-# Allow wildcard imports from modules that define __all__.
-allow-wildcard-with-all=no
-
-# Analyse import fallback blocks. This can be used to support both Python 2 and
-# 3 compatible code, which means that the block might have code that exists
-# only in one or another interpreter, leading to false positives when analysed.
-analyse-fallback-blocks=no
-
-# Deprecated modules which should not be used, separated by a comma.
-deprecated-modules=optparse,tkinter.tix
-
-# Create a graph of external dependencies in the given file (report RP0402 must
-# not be disabled).
-ext-import-graph=
-
-# Create a graph of every (i.e. internal and external) dependencies in the
-# given file (report RP0402 must not be disabled).
-import-graph=
-
-# Create a graph of internal dependencies in the given file (report RP0402 must
-# not be disabled).
-int-import-graph=
-
-# Force import order to recognize a module as part of the standard
-# compatibility libraries.
-known-standard-library=
-
-# Force import order to recognize a module as part of a third party library.
-known-third-party=enchant
-
-# Couples of modules and preferred modules, separated by a comma.
-preferred-modules=
-
-
-[CLASSES]
-
-# List of method names used to declare (i.e. assign) instance attributes.
-defining-attr-methods=__init__,
-                      __new__,
-                      setUp,
-                      __post_init__
-
-# List of member names, which should be excluded from the protected access
-# warning.
-exclude-protected=_asdict,
-                  _fields,
-                  _replace,
-                  _source,
-                  _make
-
-# List of valid names for the first argument in a class method.
-valid-classmethod-first-arg=cls
-
-# List of valid names for the first argument in a metaclass class method.
-valid-metaclass-classmethod-first-arg=cls
-
-
-[EXCEPTIONS]
-
-# Exceptions that will emit a warning when being caught. Defaults to
-# "BaseException, Exception".
-overgeneral-exceptions=BaseException,
-                       Exception
diff --git a/.readthedocs.yaml b/.readthedocs.yaml
deleted file mode 100644
index 05ec15cca325e1486907dcf6774b83f4388c6cc7..0000000000000000000000000000000000000000
--- a/.readthedocs.yaml
+++ /dev/null
@@ -1,13 +0,0 @@
-version: 2
-
-formats: all
-
-build:
-  os: "ubuntu-22.04"
-  tools:
-    python: "3.8"
-
-python:
-  install:
-    - requirements: requirements/docs.txt
-    - requirements: requirements/readthedocs.txt
diff --git a/3rdparty/INIReader.h b/3rdparty/INIReader.h
deleted file mode 100644
index 6ed9b5a5aa0bed583811babe8d816178e512feef..0000000000000000000000000000000000000000
--- a/3rdparty/INIReader.h
+++ /dev/null
@@ -1,501 +0,0 @@
-// Read an INI file into easy-to-access name/value pairs.
-
-// inih and INIReader are released under the New BSD license.
-// Go to the project home page for more info:
-//
-// https://github.com/benhoyt/inih (Initial repo)
-// https://github.com/jtilly/inih  (The reference of this header file)
-/* inih -- simple .INI file parser
-inih is released under the New BSD license (see LICENSE.txt). Go to the project
-home page for more info:
-https://github.com/benhoyt/inih
-https://github.com/jtilly/inih
-*/
-
-#ifndef __INI_H__
-#define __INI_H__
-
-/* Make this header file easier to include in C++ code */
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include <stdio.h>
-
-/* Typedef for prototype of handler function. */
-typedef int (*ini_handler)(void* user, const char* section,
-                           const char* name, const char* value);
-
-/* Typedef for prototype of fgets-style reader function. */
-typedef char* (*ini_reader)(char* str, int num, void* stream);
-
-/* Parse given INI-style file. May have [section]s, name=value pairs
-   (whitespace stripped), and comments starting with ';' (semicolon). Section
-   is "" if name=value pair parsed before any section heading. name:value
-   pairs are also supported as a concession to Python's configparser.
-   For each name=value pair parsed, call handler function with given user
-   pointer as well as section, name, and value (data only valid for duration
-   of handler call). Handler should return nonzero on success, zero on error.
-   Returns 0 on success, line number of first error on parse error (doesn't
-   stop on first error), -1 on file open error, or -2 on memory allocation
-   error (only when INI_USE_STACK is zero).
-*/
-int ini_parse(const char* filename, ini_handler handler, void* user);
-
-/* Same as ini_parse(), but takes a FILE* instead of filename. This doesn't
-   close the file when it's finished -- the caller must do that. */
-int ini_parse_file(FILE* file, ini_handler handler, void* user);
-
-/* Same as ini_parse(), but takes an ini_reader function pointer instead of
-   filename. Used for implementing custom or string-based I/O. */
-int ini_parse_stream(ini_reader reader, void* stream, ini_handler handler,
-                     void* user);
-
-/* Nonzero to allow multi-line value parsing, in the style of Python's
-   configparser. If allowed, ini_parse() will call the handler with the same
-   name for each subsequent line parsed. */
-#ifndef INI_ALLOW_MULTILINE
-#define INI_ALLOW_MULTILINE 1
-#endif
-
-/* Nonzero to allow a UTF-8 BOM sequence (0xEF 0xBB 0xBF) at the start of
-   the file. See http://code.google.com/p/inih/issues/detail?id=21 */
-#ifndef INI_ALLOW_BOM
-#define INI_ALLOW_BOM 1
-#endif
-
-/* Nonzero to allow inline comments (with valid inline comment characters
-   specified by INI_INLINE_COMMENT_PREFIXES). Set to 0 to turn off and match
-   Python 3.2+ configparser behaviour. */
-#ifndef INI_ALLOW_INLINE_COMMENTS
-#define INI_ALLOW_INLINE_COMMENTS 1
-#endif
-#ifndef INI_INLINE_COMMENT_PREFIXES
-#define INI_INLINE_COMMENT_PREFIXES ";"
-#endif
-
-/* Nonzero to use stack, zero to use heap (malloc/free). */
-#ifndef INI_USE_STACK
-#define INI_USE_STACK 1
-#endif
-
-/* Stop parsing on first error (default is to keep parsing). */
-#ifndef INI_STOP_ON_FIRST_ERROR
-#define INI_STOP_ON_FIRST_ERROR 0
-#endif
-
-/* Maximum line length for any line in INI file. */
-#ifndef INI_MAX_LINE
-#define INI_MAX_LINE 200
-#endif
-
-#ifdef __cplusplus
-}
-#endif
-
-/* inih -- simple .INI file parser
-inih is released under the New BSD license (see LICENSE.txt). Go to the project
-home page for more info:
-https://github.com/benhoyt/inih
-*/
-
-#if defined(_MSC_VER) && !defined(_CRT_SECURE_NO_WARNINGS)
-#define _CRT_SECURE_NO_WARNINGS
-#endif
-
-#include <stdio.h>
-#include <ctype.h>
-#include <string.h>
-
-#if !INI_USE_STACK
-#include <stdlib.h>
-#endif
-
-#define MAX_SECTION 50
-#define MAX_NAME 50
-
-/* Strip whitespace chars off end of given string, in place. Return s. */
-inline static char* rstrip(char* s)
-{
-    char* p = s + strlen(s);
-    while (p > s && isspace((unsigned char)(*--p)))
-        *p = '\0';
-    return s;
-}
-
-/* Return pointer to first non-whitespace char in given string. */
-inline static char* lskip(const char* s)
-{
-    while (*s && isspace((unsigned char)(*s)))
-        s++;
-    return (char*)s;
-}
-
-/* Return pointer to first char (of chars) or inline comment in given string,
-   or pointer to null at end of string if neither found. Inline comment must
-   be prefixed by a whitespace character to register as a comment. */
-inline static char* find_chars_or_comment(const char* s, const char* chars)
-{
-#if INI_ALLOW_INLINE_COMMENTS
-    int was_space = 0;
-    while (*s && (!chars || !strchr(chars, *s)) &&
-           !(was_space && strchr(INI_INLINE_COMMENT_PREFIXES, *s))) {
-        was_space = isspace((unsigned char)(*s));
-        s++;
-    }
-#else
-    while (*s && (!chars || !strchr(chars, *s))) {
-        s++;
-    }
-#endif
-    return (char*)s;
-}
-
-/* Version of strncpy that ensures dest (size bytes) is null-terminated. */
-inline static char* strncpy0(char* dest, const char* src, size_t size)
-{
-    strncpy(dest, src, size);
-    dest[size - 1] = '\0';
-    return dest;
-}
-
-/* See documentation in header file. */
-inline int ini_parse_stream(ini_reader reader, void* stream, ini_handler handler,
-                     void* user)
-{
-    /* Uses a fair bit of stack (use heap instead if you need to) */
-#if INI_USE_STACK
-    char line[INI_MAX_LINE];
-#else
-    char* line;
-#endif
-    char section[MAX_SECTION] = "";
-    char prev_name[MAX_NAME] = "";
-
-    char* start;
-    char* end;
-    char* name;
-    char* value;
-    int lineno = 0;
-    int error = 0;
-
-#if !INI_USE_STACK
-    line = (char*)malloc(INI_MAX_LINE);
-    if (!line) {
-        return -2;
-    }
-#endif
-
-    /* Scan through stream line by line */
-    while (reader(line, INI_MAX_LINE, stream) != NULL) {
-        lineno++;
-
-        start = line;
-#if INI_ALLOW_BOM
-        if (lineno == 1 && (unsigned char)start[0] == 0xEF &&
-                           (unsigned char)start[1] == 0xBB &&
-                           (unsigned char)start[2] == 0xBF) {
-            start += 3;
-        }
-#endif
-        start = lskip(rstrip(start));
-
-        if (*start == ';' || *start == '#') {
-            /* Per Python configparser, allow both ; and # comments at the
-               start of a line */
-        }
-#if INI_ALLOW_MULTILINE
-        else if (*prev_name && *start && start > line) {
-
-#if INI_ALLOW_INLINE_COMMENTS
-        end = find_chars_or_comment(start, NULL);
-        if (*end)
-            *end = '\0';
-        rstrip(start);
-#endif
-
-            /* Non-blank line with leading whitespace, treat as continuation
-               of previous name's value (as per Python configparser). */
-            if (!handler(user, section, prev_name, start) && !error)
-                error = lineno;
-        }
-#endif
-        else if (*start == '[') {
-            /* A "[section]" line */
-            end = find_chars_or_comment(start + 1, "]");
-            if (*end == ']') {
-                *end = '\0';
-                strncpy0(section, start + 1, sizeof(section));
-                *prev_name = '\0';
-            }
-            else if (!error) {
-                /* No ']' found on section line */
-                error = lineno;
-            }
-        }
-        else if (*start) {
-            /* Not a comment, must be a name[=:]value pair */
-            end = find_chars_or_comment(start, "=:");
-            if (*end == '=' || *end == ':') {
-                *end = '\0';
-                name = rstrip(start);
-                value = lskip(end + 1);
-#if INI_ALLOW_INLINE_COMMENTS
-                end = find_chars_or_comment(value, NULL);
-                if (*end)
-                    *end = '\0';
-#endif
-                rstrip(value);
-
-                /* Valid name[=:]value pair found, call handler */
-                strncpy0(prev_name, name, sizeof(prev_name));
-                if (!handler(user, section, name, value) && !error)
-                    error = lineno;
-            }
-            else if (!error) {
-                /* No '=' or ':' found on name[=:]value line */
-                error = lineno;
-            }
-        }
-
-#if INI_STOP_ON_FIRST_ERROR
-        if (error)
-            break;
-#endif
-    }
-
-#if !INI_USE_STACK
-    free(line);
-#endif
-
-    return error;
-}
-
-/* See documentation in header file. */
-inline int ini_parse_file(FILE* file, ini_handler handler, void* user)
-{
-    return ini_parse_stream((ini_reader)fgets, file, handler, user);
-}
-
-/* See documentation in header file. */
-inline int ini_parse(const char* filename, ini_handler handler, void* user)
-{
-    FILE* file;
-    int error;
-
-    file = fopen(filename, "r");
-    if (!file)
-        return -1;
-    error = ini_parse_file(file, handler, user);
-    fclose(file);
-    return error;
-}
-
-#endif /* __INI_H__ */
-
-
-#ifndef __INIREADER_H__
-#define __INIREADER_H__
-
-#include <map>
-#include <set>
-#include <string>
-
-// Read an INI file into easy-to-access name/value pairs. (Note that I've gone
-// for simplicity here rather than speed, but it should be pretty decent.)
-class INIReader
-{
-public:
-    // Empty Constructor
-    INIReader() {};
-
-    // Construct INIReader and parse given filename. See ini.h for more info
-    // about the parsing.
-    INIReader(std::string filename);
-
-    // Construct INIReader and parse given file. See ini.h for more info
-    // about the parsing.
-    INIReader(FILE *file);
-    ~INIReader();
-    // Return the result of ini_parse(), i.e., 0 on success, line number of
-    // first error on parse error, or -1 on file open error.
-    int ParseError() const;
-
-    // Return the list of sections found in ini file
-    const std::set<std::string>& Sections() const;
-
-    // Get a string value from INI file, returning default_value if not found.
-    std::string Get(std::string section, std::string name,
-                    std::string default_value) const;
-    std::string Get(std::string section, std::string name) const;
-
-    // Get an integer (long) value from INI file, returning default_value if
-    // not found or not a valid integer (decimal "1234", "-1234", or hex "0x4d2").
-    long GetInteger(std::string section, std::string name, long default_value) const;
-    long GetInteger(std::string section, std::string name) const;
-
-    // Get a real (floating point double) value from INI file, returning
-    // default_value if not found or not a valid floating point value
-    // according to strtod().
-    double GetReal(std::string section, std::string name, double default_value) const;
-
-    // Get a single precision floating point number value from INI file, returning
-    // default_value if not found or not a valid floating point value
-    // according to strtof().
-    float GetFloat(std::string section, std::string name, float default_value) const;
-    float GetFloat(std::string section, std::string name) const;
-
-    // Get a boolean value from INI file, returning default_value if not found or if
-    // not a valid true/false value. Valid true values are "true", "yes", "on", "1",
-    // and valid false values are "false", "no", "off", "0" (not case sensitive).
-    bool GetBoolean(std::string section, std::string name, bool default_value) const;
-
-protected:
-    int _error;
-    std::map<std::string, std::string> _values;
-    std::set<std::string> _sections;
-    static std::string MakeKey(std::string section, std::string name);
-    static int ValueHandler(void* user, const char* section, const char* name,
-                            const char* value);
-};
-
-#endif  // __INIREADER_H__
-
-
-#ifndef __INIREADER__
-#define __INIREADER__
-
-#include <algorithm>
-#include <cctype>
-#include <cstdlib>
-
-inline INIReader::INIReader(std::string filename)
-{
-    _error = ini_parse(filename.c_str(), ValueHandler, this);
-}
-
-inline INIReader::INIReader(FILE *file)
-{
-    _error = ini_parse_file(file, ValueHandler, this);
-}
-
-inline int INIReader::ParseError() const
-{
-    return _error;
-}
-
-inline INIReader::~INIReader() { }
-
-inline const std::set<std::string>& INIReader::Sections() const
-{
-    return _sections;
-}
-
-inline std::string INIReader::Get(std::string section, std::string name, std::string default_value) const
-{
-    std::string key = MakeKey(section, name);
-    return _values.count(key) ? _values.at(key) : default_value;
-}
-
-inline std::string INIReader::Get(std::string section, std::string name) const
-{
-    std::string key = MakeKey(section, name);
-    if(_values.count(key)) return _values.at(key);
-    else
-    {
-        printf("[ERROR] Does not find the section %s with name %s. \n", section.c_str(), name.c_str());
-        exit(-1);
-    }
-}
-
-inline long INIReader::GetInteger(std::string section, std::string name, long default_value) const
-{
-    std::string valstr = Get(section, name, "");
-    const char* value = valstr.c_str();
-    char* end;
-    // This parses "1234" (decimal) and also "0x4D2" (hex)
-    long n = strtol(value, &end, 0);
-    return end > value ? n : default_value;
-}
-
-inline long INIReader::GetInteger(std::string section, std::string name) const
-{
-    std::string valstr = Get(section, name, "");
-    const char* value = valstr.c_str();
-    char* end;
-    // This parses "1234" (decimal) and also "0x4D2" (hex)
-    long n = strtol(value, &end, 0);
-    if(end <= value)
-    {
-        printf("[ERROR] Does not find the section %s with name %s. \n", section.c_str(), name.c_str());
-        exit(-1);
-    }
-    return n;
-}
-
-inline double INIReader::GetReal(std::string section, std::string name, double default_value) const
-{
-    std::string valstr = Get(section, name, "");
-    const char* value = valstr.c_str();
-    char* end;
-    double n = strtod(value, &end);
-    return end > value ? n : default_value;
-}
-
-inline float INIReader::GetFloat(std::string section, std::string name, float default_value) const
-{
-    std::string valstr = Get(section, name, "");
-    const char* value = valstr.c_str();
-    char* end;
-    float n = strtof(value, &end);
-    return end > value ? n : default_value;
-}
-
-inline float INIReader::GetFloat(std::string section, std::string name) const
-{
-    std::string valstr = Get(section, name, "");
-    const char* value = valstr.c_str();
-    char* end;
-    float n = strtof(value, &end);
-    if(end <= value)
-    {
-        printf("[ERROR] Does not find the section %s with name %s. \n", section.c_str(), name.c_str());
-        exit(-1);
-    }
-    return n;
-}
-
-inline bool INIReader::GetBoolean(std::string section, std::string name, bool default_value) const
-{
-    std::string valstr = Get(section, name, "");
-    // Convert to lower case to make string comparisons case-insensitive
-    std::transform(valstr.begin(), valstr.end(), valstr.begin(), ::tolower);
-    if (valstr == "true" || valstr == "yes" || valstr == "on" || valstr == "1")
-        return true;
-    else if (valstr == "false" || valstr == "no" || valstr == "off" || valstr == "0")
-        return false;
-    else
-        return default_value;
-}
-
-inline std::string INIReader::MakeKey(std::string section, std::string name)
-{
-    std::string key = section + "=" + name;
-    // Convert to lower case to make section/name lookups case-insensitive
-    std::transform(key.begin(), key.end(), key.begin(), ::tolower);
-    return key;
-}
-
-inline int INIReader::ValueHandler(void* user, const char* section, const char* name,
-                            const char* value)
-{
-    INIReader* reader = (INIReader*)user;
-    std::string key = MakeKey(section, name);
-    if (reader->_values[key].size() > 0)
-        reader->_values[key] += "\n";
-    reader->_values[key] += value;
-    reader->_sections.insert(section);
-    return 1;
-}
-
-#endif  // __INIREADER__
diff --git a/3rdparty/backend-r22.12/.clang-format b/3rdparty/backend-r22.12/.clang-format
deleted file mode 100644
index 98c649734c29e0b1d134dae65be9bc08a14b4ba5..0000000000000000000000000000000000000000
--- a/3rdparty/backend-r22.12/.clang-format
+++ /dev/null
@@ -1,37 +0,0 @@
----
-BasedOnStyle: Google
-
-IndentWidth: 2
-ContinuationIndentWidth: 4
-UseTab: Never
-MaxEmptyLinesToKeep: 2
-
-SortIncludes: true
-CompactNamespaces: true
-ReflowComments: true
-
-DerivePointerAlignment: false
-PointerAlignment: Left
-
-AllowShortIfStatementsOnASingleLine: false
-AllowShortBlocksOnASingleLine: false
-AllowShortFunctionsOnASingleLine: Inline
-
-AlwaysBreakAfterReturnType: TopLevelDefinitions
-AlignAfterOpenBracket: AlwaysBreak
-BreakBeforeBraces: Custom
-BraceWrapping:
-  AfterClass: false
-  AfterControlStatement: false
-  AfterEnum: false
-  AfterFunction: true
-  AfterNamespace: false
-  AfterStruct: false
-  AfterUnion: false
-  BeforeCatch: true
-
-BinPackArguments: true
-BinPackParameters: true
-ConstructorInitializerAllOnOneLineOrOnePerLine: false
-
-IndentCaseLabels: true
\ No newline at end of file
diff --git a/3rdparty/backend-r22.12/.gitignore b/3rdparty/backend-r22.12/.gitignore
deleted file mode 100644
index 0e9f099a2eef4742716637e3cce3a45f7053b021..0000000000000000000000000000000000000000
--- a/3rdparty/backend-r22.12/.gitignore
+++ /dev/null
@@ -1,3 +0,0 @@
-/build
-/.vscode
-*.so
diff --git a/3rdparty/backend-r22.12/CMakeLists.txt b/3rdparty/backend-r22.12/CMakeLists.txt
deleted file mode 100644
index 983a5debc10d4afc8b6157f3b8c3802503145a08..0000000000000000000000000000000000000000
--- a/3rdparty/backend-r22.12/CMakeLists.txt
+++ /dev/null
@@ -1,262 +0,0 @@
-# Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#cmake_minimum_required(VERSION 3.17)
-cmake_minimum_required(VERSION 3.16)
-
-project(tritonbackend LANGUAGES C CXX)
-
-#
-# Options
-#
-option(TRITON_ENABLE_GPU "Enable GPU support in backend utilities" ON)
-option(TRITON_ENABLE_MALI_GPU "Enable Arm MALI GPU support in backend utilities" OFF)
-option(TRITON_ENABLE_STATS "Include statistics collections in backend utilities" ON)
-
-set(TRITON_COMMON_REPO_TAG "main" CACHE STRING "Tag for triton-inference-server/common repo")
-set(TRITON_CORE_REPO_TAG "main" CACHE STRING "Tag for triton-inference-server/core repo")
-
-if(NOT CMAKE_BUILD_TYPE)
-  set(CMAKE_BUILD_TYPE Release)
-endif()
-
-#
-# Dependencies
-#
-include(FetchContent)
-
-FetchContent_Declare(
-  repo-common
-  GIT_REPOSITORY https://github.com/triton-inference-server/common.git
-  GIT_TAG ${TRITON_COMMON_REPO_TAG}
-  GIT_SHALLOW ON
-)
-FetchContent_Declare(
-  repo-core
-  GIT_REPOSITORY https://github.com/triton-inference-server/core.git
-  GIT_TAG ${TRITON_CORE_REPO_TAG}
-  GIT_SHALLOW ON
-)
-FetchContent_MakeAvailable(repo-common repo-core)
-
-#
-# CUDA
-#
-if(${TRITON_ENABLE_GPU})
-  #find_package(CUDAToolkit REQUIRED)
-  find_package(CUDA REQUIRED)
-  message(STATUS "Using CUDA ${CUDA_VERSION}")
-  set(CUDA_NVCC_FLAGS -std=c++11)
-
-  if(CUDA_VERSION VERSION_GREATER "10.1" OR CUDA_VERSION VERSION_EQUAL "10.1")
-    add_definitions(-DTRITON_ENABLE_CUDA_GRAPH=1)
-  else()
-    message(WARNING "CUDA ${CUDA_VERSION} does not support CUDA graphs.")
-  endif()
-endif() # TRITON_ENABLE_GPU
-
-#
-# Backend library containing useful source and utilities
-#
-set(SRC_FILES 
-  "src/backend_common.cc"
-  "src/backend_input_collector.cc"
-  "src/backend_memory.cc"
-  "src/backend_model_instance.cc"
-  "src/backend_model.cc"
-  "src/backend_output_responder.cc"
-)
-
-if(${TRITON_ENABLE_GPU})
-  set(SRC_FILES ${SRC_FILES} "src/kernel.h")
-endif() # TRITON_ENABLE_GPU
-
-add_library(
-  triton-backend-utils
-  ${SRC_FILES}
-)
-
-if(${TRITON_ENABLE_GPU})
-  set(HOST_COMPILER_FLAGS "")
-  if (WIN32)
-    set(HOST_COMPILER_FLAGS "/MD")
-  else()
-    set(HOST_COMPILER_FLAGS "-fPIC")
-  endif()
-
-  set(CUDA_LIBRARIES PUBLIC ${CUDA_LIBRARIES})
-  cuda_add_library(
-    kernel-library-new
-    src/kernel.cu src/kernel.h
-    OPTIONS -arch compute_53
-    OPTIONS -code compute_53,sm_53,sm_60,sm_61,sm_62,sm_70,sm_72,sm_75
-    OPTIONS -Xcompiler ${HOST_COMPILER_FLAGS}
-  )
-endif() # TRITON_ENABLE_GPU
-
-add_library(
-  TritonBackend::triton-backend-utils ALIAS triton-backend-utils
-)
-
-target_include_directories(
-  triton-backend-utils
-  PUBLIC
-    $<INSTALL_INTERFACE:include>
-    $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
-  PRIVATE
-    ${CMAKE_CURRENT_SOURCE_DIR}/src
-)
-
-if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
-  message("Using MSVC as compiler, default target on Windows 10. "
-      "If the target system is not Windows 10, please update _WIN32_WINNT "
-      "to corresponding value.")
-endif()
-target_compile_features(triton-backend-utils PRIVATE cxx_std_11)
-target_compile_options(
-  triton-backend-utils
-  PRIVATE
-  $<$<OR:$<CXX_COMPILER_ID:Clang>,$<CXX_COMPILER_ID:AppleClang>,$<CXX_COMPILER_ID:GNU>>:
-    -Wall -Wextra -Wno-unused-parameter -Werror>
-  $<$<CXX_COMPILER_ID:MSVC>:/Wall /D_WIN32_WINNT=0x0A00 /EHsc>
-)
-
-# TRITON_ENABLE_GPU exposed in header so set PUBLIC
-if(${TRITON_ENABLE_GPU})
-target_compile_definitions(
-  triton-backend-utils
-  PUBLIC TRITON_ENABLE_GPU=1
-)
-endif() # TRITON_ENABLE_GPU
-
-# TRITON_ENABLE_MALI_GPU exposed in header so set PUBLIC
-if(${TRITON_ENABLE_MALI_GPU})
-target_compile_definitions(
-  triton-backend-utils
-  PUBLIC TRITON_ENABLE_MALI_GPU=1
-)
-endif() # TRITON_ENABLE_MALI_GPU
-
-# TRITON_ENABLE_STATS exposed in header so set PUBLIC
-if(${TRITON_ENABLE_STATS})
-target_compile_definitions(
-  triton-backend-utils
-  PUBLIC TRITON_ENABLE_STATS=1
-)
-endif() # TRITON_ENABLE_STATS
-
-set_target_properties(
-  triton-backend-utils PROPERTIES
-  WINDOWS_EXPORT_ALL_SYMBOLS TRUE
-  POSITION_INDEPENDENT_CODE ON
-  OUTPUT_NAME tritonbackendutils
-)
-
-target_link_libraries(
-  triton-backend-utils
-  PUBLIC
-    triton-core-backendapi         # from repo-core
-    triton-core-serverapi          # from repo-core
-    triton-common-async-work-queue # from repo-common
-    triton-common-json             # from repo-common
-)
-
-if(${TRITON_ENABLE_GPU})
-  target_link_libraries(
-    triton-backend-utils
-    PUBLIC
-      #CUDA::cudart
-      cudart
-    PRIVATE
-    kernel-library-new
-  )
-endif() # TRITON_ENABLE_GPU
-
-#
-# Install
-#
-include(GNUInstallDirs)
-set(INSTALL_CONFIGDIR ${CMAKE_INSTALL_LIBDIR}/cmake/TritonBackend)
-
-install(
-  TARGETS
-    triton-backend-utils
-  EXPORT
-    triton-backend-targets
-  LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
-  ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
-)
-
-if(${TRITON_ENABLE_GPU})
-  install(
-    TARGETS
-      kernel-library-new
-    EXPORT
-      triton-backend-targets
-    LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
-    ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
-  )
-endif() # TRITON_ENABLE_GPU
-
-install(
-  DIRECTORY include/
-  DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
-)
-
-install(
-  EXPORT
-    triton-backend-targets
-  FILE
-    TritonBackendTargets.cmake
-  NAMESPACE
-    TritonBackend::
-  DESTINATION
-    ${INSTALL_CONFIGDIR}
-)
-
-include(CMakePackageConfigHelpers)
-configure_package_config_file(
-  ${CMAKE_CURRENT_LIST_DIR}/cmake/TritonBackendConfig.cmake.in
-  ${CMAKE_CURRENT_BINARY_DIR}/TritonBackendConfig.cmake
-  INSTALL_DESTINATION ${INSTALL_CONFIGDIR}
-)
-
-install(
-  FILES
-  ${CMAKE_CURRENT_BINARY_DIR}/TritonBackendConfig.cmake
-  DESTINATION ${INSTALL_CONFIGDIR}
-)
-
-#
-# Export from build tree
-#
-export(
-  EXPORT triton-backend-targets
-  FILE ${CMAKE_CURRENT_BINARY_DIR}/TritonBackendTargets.cmake
-  NAMESPACE TritonBackend::
-)
-
-export(PACKAGE TritonBackend)
diff --git a/3rdparty/backend-r22.12/LICENSE b/3rdparty/backend-r22.12/LICENSE
deleted file mode 100644
index 1b34054e482218d517a8b190ee112ee99740f976..0000000000000000000000000000000000000000
--- a/3rdparty/backend-r22.12/LICENSE
+++ /dev/null
@@ -1,25 +0,0 @@
-Copyright (c) 2018-2020, NVIDIA CORPORATION. All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions
-are met:
- * Redistributions of source code must retain the above copyright
-   notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright
-   notice, this list of conditions and the following disclaimer in the
-   documentation and/or other materials provided with the distribution.
- * Neither the name of NVIDIA CORPORATION nor the names of its
-   contributors may be used to endorse or promote products derived
-   from this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/3rdparty/backend-r22.12/README.md b/3rdparty/backend-r22.12/README.md
deleted file mode 100644
index 0ca36f1edf54a41ca26a7db3443323e500ccdf7f..0000000000000000000000000000000000000000
--- a/3rdparty/backend-r22.12/README.md
+++ /dev/null
@@ -1,540 +0,0 @@
-<!--
-# Copyright 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--->
-
-[![License](https://img.shields.io/badge/License-BSD3-lightgrey.svg)](https://opensource.org/licenses/BSD-3-Clause)
-
-# Triton Inference Server Backend
-
-A Triton *backend* is the implementation that executes a model. A
-backend can be a wrapper around a deep-learning framework, like
-PyTorch, TensorFlow, TensorRT or ONNX Runtime. Or a backend can be
-custom C/C++ logic performing any operation (for example, image
-pre-processing).
-
-This repo contains documentation on Triton backends and also source,
-scripts and utilities for creating Triton backends. You do not need to
-use anything provided in this repo to create a Triton backend but you
-will likely find its contents useful.
-
-## Frequently Asked Questions
-
-Full documentation is included below but these shortcuts can help you
-get started in the right direction.
-
-### Where can I ask general questions about Triton and Triton backends?
-
-Be sure to read all the information below as well as the [general
-Triton
-documentation](https://github.com/triton-inference-server/server#triton-inference-server)
-available in the main
-[server](https://github.com/triton-inference-server/server) repo. If
-you don't find your answer there you can ask questions on the main
-Triton [issues
-page](https://github.com/triton-inference-server/server/issues).
-
-### Where can I find all the backends that are available for Triton?
-
-Anyone can develop a Triton backend, so it isn't possible for us to
-know about all available backends. But the Triton project does provide
-a set of supported backends that are tested and updated with each
-Triton release.
-
-**TensorRT**: The TensorRT backend is used to execute TensorRT
-models. The
-[server](https://github.com/triton-inference-server/tensorrt_backend)
-repo contains the source for the backend.
-
-**ONNX Runtime**: The ONNX Runtime backend is used to execute ONNX
-models. The
-[onnxruntime_backend](https://github.com/triton-inference-server/onnxruntime_backend)
-repo contains the documentation and source for the backend.
-
-**TensorFlow**: The TensorFlow backend is used to execute TensorFlow
-models in both GraphDef and SavedModel formats. The same backend is
-used to execute both TensorFlow 1 and TensorFlow 2 models. The
-[tensorflow_backend](https://github.com/triton-inference-server/tensorflow_backend)
-repo contains the documentation and source for the backend.
-
-**PyTorch**: The PyTorch backend is used to execute TorchScript
-models. The
-[pytorch_backend](https://github.com/triton-inference-server/pytorch_backend)
-repo contains the documentation and source for the backend.
-
-**OpenVINO**: The OpenVINO backend is used to execute
-[OpenVINO](https://docs.openvinotoolkit.org/latest/index.html)
-models. The
-[openvino_backend](https://github.com/triton-inference-server/openvino_backend)
-repo contains the documentation and source for the backend.
-
-**Python**: The Python backend allows you to write your model logic in
-Python. For example, you can use this backend to execute pre/post
-processing code written in Python, or to execute a PyTorch Python
-script directly (instead of first converting it to TorchScript and
-then using the PyTorch backend). The
-[python_backend](https://github.com/triton-inference-server/python_backend)
-repo contains the documentation and source for the backend.
-
-**DALI**: [DALI](https://github.com/NVIDIA/DALI) is a collection of
-highly optimized building blocks and an execution engine that
-accelerates the pre-processing of the input data for deep learning
-applications. The DALI backend allows you to execute your DALI
-pipeline within Triton. The
-[dali_backend](https://github.com/triton-inference-server/dali_backend)
-repo contains the documentation and source for the backend.
-
-**FIL**: The FIL ([Forest Inference
-Library](https://github.com/rapidsai/cuml/tree/branch-21.10/python/cuml/fil))
-backend is used to execute a variety of tree-based ML models, including
-XGBoost models, LightGBM models, Scikit-Learn random forest models, and cuML
-random forest models. The
-[fil_backend](https://github.com/triton-inference-server/fil_backend) repo
-contains the documentation and source for the backend.
-
-**Important Note!** Not all the above backends are supported on every platform
-supported by Triton. Look at the
-[Backend-Platform Support Matrix](docs/backend_platform_support_matrix.md)
-to learn about the same.
-
-### How can I develop my own Triton backend?
-
-First you probably want to ask on the main Triton [issues
-page](https://github.com/triton-inference-server/server/issues) to
-make sure you are not duplicating a backend that already exists. Then
-follow the [tutorial](examples/README.md) to learn how to create your
-first simple Triton backend and incrementally improve it to add more
-features. You should also read the complete documentation on [Triton
-backends](#backends).
-
-### Can I add (or remove) a backend to an existing Triton installation?
-
-Yes. See [Backend Shared Library](#backend-shared-library) for general
-information about how the shared library implementing a backend is
-managed by Triton, and [Triton with Unsupported and Custom
-Backends](https://github.com/triton-inference-server/server/blob/main/docs/customization_guide/compose.md#triton-with-unsupported-and-custom-backends)
-for documentation on how to add your backend to the released Triton
-Docker image. For a standard install the globally available backends
-are in /opt/tritonserver/backends.
-
-### What about backends developed using the "legacy custom backend" API.
-
-The legacy custom API is removed from Triton. If you have custom
-backends that you developed using this older API you must port them to
-the new [Triton Backend API](#triton-backend-api).
-
-## Backends
-
-A Triton *backend* is the implementation that executes a model. A
-backend can be a wrapper around a deep-learning framework, like
-PyTorch, TensorFlow, TensorRT, ONNX Runtime or OpenVINO. A backend can
-also implement any functionality you want as long as it adheres to the
-[backend API](#triton-backend-api). Triton uses this API to send
-requests to the backend for execution and the backend uses the API to
-communicate with Triton.
-
-Every model must be associated with a backend. A model's backend is
-specified in the model's configuration using the 'backend' setting. 
-For using TensorRT backend, the value of this setting should be *tensorrt*. 
-Similarly, for using PyTorch, ONNX and TensorFlow Backends, the `backend` 
-field should be set to *pytorch*, *onnxruntime* or *tensorflow* respectively. 
-For all other backends, 'backend' must be set to the name of the backend.
-
-### Backend Shared Library
-
-Each backend must be implemented as a shared library and the name of
-the shared library must be *libtriton_\<backend-name\>.so*. For
-example, if the name of the backend is "mybackend", a model indicates
-that it uses the backend by setting the model configuration 'backend'
-setting to "mybackend", and Triton looks for *libtriton_mybackend.so*
-as the shared library that implements the backend. The
-[tutorial](examples/README.md) shows examples of how to build your
-backend logic into the appropriate shared library.
-
-For a model, *M* that specifies backend *B*, Triton searches for the
-backend shared library in the following places, in this order:
-
-* \<model_repository\>/M/\<version_directory\>/libtriton_B.so
-
-* \<model_repository\>/M/libtriton_B.so
-
-* \<global_backend_directory\>/B/libtriton_B.so
-
-Where \<global_backend_directory\> is by default
-/opt/tritonserver/backends.  The --backend-directory flag can be used
-to override the default.
-
-Typically you will install your backend into the global backend
-directory. For example, if using Triton Docker images you can follow
-the instructions in [Triton with Unsupported and Custom
-Backends](https://github.com/triton-inference-server/server/blob/main/docs/customization_guide/compose.md#triton-with-unsupported-and-custom-backends). Continuing
-the example of a backend names "mybackend", you would install into the
-Triton image as:
-
-```
-/opt/
-  tritonserver/
-    backends/
-      mybackend/
-        libtriton_mybackend.so
-        ... # other files needed by mybackend
-```
-
-### Triton Backend API
-
-A Triton backend must implement the C interface defined in
-[tritonbackend.h](https://github.com/triton-inference-server/core/tree/main/include/triton/core/tritonbackend.h). The
-following abstractions are used by the API.
-
-#### TRITONBACKEND_Backend
-
-A TRITONBACKEND_Backend object represents the backend itself. The
-same backend object is shared across all models that use the
-backend. The associated API, like TRITONBACKEND_BackendName, is used
-to get information about the backend and to associate a user-defined
-state with the backend.
-
-A backend can optionally implement TRITONBACKEND_Initialize and
-TRITONBACKEND_Finalize to get notification of when the backend object
-is created and destroyed (for more information see [backend
-lifecycles](#backend-lifecycles)).
-
-#### TRITONBACKEND_Model
-
-A TRITONBACKEND_Model object represents a model. Each model loaded by
-Triton is associated with a TRITONBACKEND_Model. Each model can use
-the TRITONBACKEND_ModelBackend API to get the backend object
-representing the backend that is used by the model.
-
-The same model object is shared across all instances of that
-model. The associated API, like TRITONBACKEND_ModelName, is used to
-get information about the model and to associate a user-defined state
-with the model.
-
-Most backends will implement TRITONBACKEND_ModelInitialize and
-TRITONBACKEND_ModelFinalize to initialize the backend for a given
-model and to manage the user-defined state associated with the model
-(for more information see [backend lifecycles](#backend-lifecycles)).
-
-The backend must take into account threading concerns when
-implementing TRITONBACKEND_ModelInitialize and
-TRITONBACKEND_ModelFinalize.  Triton will not perform multiple
-simultaneous calls to these functions for a given model; however, if a
-backend is used by multiple models Triton may simultaneously call the
-functions with a different thread for each model. As a result, the
-backend must be able to handle multiple simultaneous calls to the
-functions. Best practice for backend implementations is to use only
-function-local and model-specific user-defined state in these
-functions, as is shown in the [tutorial](examples/README.md).
-
-#### TRITONBACKEND_ModelInstance
-
-A TRITONBACKEND_ModelInstance object represents a model
-*instance*. Triton creates one or more instances of the model based on
-the *instance_group* settings specified in the model
-configuration. Each of these instances is associated with a
-TRITONBACKEND_ModelInstance object.
-
-The only function that the backend must implement is
-TRITONBACKEND_ModelInstanceExecute. The
-TRITONBACKEND_ModelInstanceExecute function is called by Triton to
-perform inference/computation on a batch of inference requests. Most
-backends will also implement TRITONBACKEND_ModelInstanceInitialize
-and TRITONBACKEND_ModelInstanceFinalize to initialize the backend for
-a given model instance and to manage the user-defined state associated
-with the model (for more information see [backend
-lifecycles](#backend-lifecycles)).
-
-The backend must take into account threading concerns when
-implementing TRITONBACKEND_ModelInstanceInitialize,
-TRITONBACKEND_ModelInstanceFinalize and
-TRITONBACKEND_ModelInstanceExecute.  Triton will not perform multiple
-simultaneous calls to these functions for a given model instance;
-however, if a backend is used by a model with multiple instances or by
-multiple models Triton may simultaneously call the functions with a
-different thread for each model instance. As a result, the backend
-must be able to handle multiple simultaneous calls to the
-functions. Best practice for backend implementations is to use only
-function-local and model-specific user-defined state in these
-functions, as is shown in the [tutorial](examples/README.md).
-
-#### TRITONBACKEND_Request
-
-A TRITONBACKEND_Request object represents an inference request made
-to the model. The backend takes ownership of the request object(s) in
-TRITONBACKEND_ModelInstanceExecute and must release each request by
-calling TRITONBACKEND_RequestRelease. However, the ownership of request
-object is returned back to Triton in case TRITONBACKEND_ModelInstanceExecute
-returns an error. See [Inference Requests and Responses](#inference-requests-and-responses)
-for more information about request lifecycle.
-
-The Triton Backend API allows the backend to get information about the
-request as well as the input and request output tensors of the
-request. Each request input is represented by a TRITONBACKEND_Input
-object.
-
-#### TRITONBACKEND_Response
-
-A TRITONBACKEND_Response object represents a response sent by the
-backend for a specific request. The backend uses the response API to
-set the name, shape, datatype and tensor values for each output tensor
-included in the response. The response can indicate either a failed or
-a successful request. See [Inference Requests and
-Responses](#inference-requests-and-responses) for more information
-about request-response lifecycle.
-
-### Backend Lifecycles
-
-A backend must carefully manage the lifecycle of the backend itself,
-the models and model instances that use the backend and the inference
-requests that execute on the model instances using the backend.
-
-#### Backend and Model
-
-Backend, model and model instance initialization is triggered when
-Triton loads a model.
-
-* If the model requires a backend that is not already in use by an
-  already loaded model, then:
-
-  * Triton [loads the shared library](#backend-shared-library) that
-    implements the backend required by the model.
-
-  * Triton creates the TRITONBACKEND_Backend object that represents
-    the backend.
-
-  * Triton calls TRITONBACKEND_Initialize if it is implemented in the
-    backend shared library. TRITONBACKEND_Initialize should not return
-    until the backend is completely initialized. If
-    TRITONBACKEND_Initialize returns an error, Triton will report that
-    the model failed to load.
-
-* Triton creates the TRITONBACKEND_Model object that represents the
-  model. Triton calls TRITONBACKEND_ModelInitialize if it is
-  implemented in the backend shared library.
-  TRITONBACKEND_ModelInitialize should not return until the backend
-  is completely initialized for the model. If
-  TRITONBACKEND_ModelInitialize returns an error, Triton will show
-  that the model failed to load.
-
-* For each model instance specified for the model in the model
-  configuration:
-
-  * Triton creates the TRITONBACKEND_ModelInstance object that
-    represents the model instance.
-
-  * Triton calls TRITONBACKEND_ModelInstanceInitialize if it is
-    implemented in the backend shared library.
-    TRITONBACKEND_ModelInstanceInitialize should not return until the
-    backend is completely initialized for the instance. If
-    TRITONBACKEND_ModelInstanceInitialize returns an error, Triton
-    will show that the model failed to load.
-
-Backend, model and model instance finalization is triggered when
-Triton unloads a model.
-
-* For each model instance:
-
-  * Triton calls TRITONBACKEND_ModelInstanceFinalize if it is
-    implemented in the backend shared library.
-    TRITONBACKEND_ModelInstanceFinalize should not return until the
-    backend is completely finalized, including stopping any threads
-    create for the model instance and freeing any user-defined state
-    created for the model instance.
-
-  * Triton destroys the TRITONBACKEND_ModelInstance object that
-    represents the model instance.
-
-* Triton calls TRITONBACKEND_ModelFinalize if it is implemented in the
-  backend shared library. TRITONBACKEND_ModelFinalize should not
-  return until the backend is completely finalized, including stopping
-  any threads create for the model and freeing any user-defined state
-  created for the model.
-
-* Triton destroys the TRITONBACKEND_Model object that represents the
-  model.
-
-* Even if no other loaded model requires the backend, Triton does not
-  finalize and unload the backend until the tritonserver process is
-  exiting. When the tritonserver process exits:
-
-  * Triton calls TRITONBACKEND_Finalize if it is implemented in the
-    backend shared library. TRITONBACKEND_ModelFinalize should not
-    return until the backend is completely finalized, including
-    stopping any threads create for the backend and freeing any
-    user-defined state created for the backend.
-
-  * Triton destroys the TRITONBACKEND_Backend object that represents
-    the backend.
-
-#### Inference Requests and Responses
-
-Triton calls TRITONBACKEND_ModelInstanceExecute to execute inference
-requests on a model instance. Each call to
-TRITONBACKEND_ModelInstanceExecute communicates a batch of requests
-to execute and the instance of the model that should be used to
-execute those requests. The backend should not allow the caller
-thread to return from TRITONBACKEND_ModelInstanceExecute until that
-instance is ready to handle another set of requests. Typically this
-means that the TRITONBACKEND_ModelInstanceExecute function will
-create responses and release the requests before returning. However,
-in case TRITONBACKEND_ModelInstanceExecute returns an error, the ownership
-of requests is transferred back to Triton which will then be responsible
-for releasing them. Therefore, in the case where TRITONBACKEND_ModelInstanceExecute
-returns an error, the backend must not retain references to the requests
-or access them in any way. For more detailed description of request/response
-lifetimes, study the documentation of TRITONBACKEND_ModelInstanceExecute in
-[tritonbackend.h](https://github.com/triton-inference-server/core/blob/main/include/triton/core/tritonbackend.h).
-
-##### Single Response
-
-Most backends will create a single response for each request. For that
-kind of backend, executing a single inference request requires the
-following steps:
-
-* Create a response for the request using TRITONBACKEND_ResponseNew.
-
-* For each request input tensor use TRITONBACKEND_InputProperties to
-  get shape and datatype of the input as well as the buffer(s)
-  containing the tensor contents.
-
-* For each output tensor which the request expects to be returned, use
-  TRITONBACKEND_ResponseOutput to create the output tensor of the
-  required datatype and shape. Use TRITONBACKEND_OutputBuffer to get a
-  pointer to the buffer where the tensor's contents should be written.
-
-* Use the inputs to perform the inference computation that produces
-  the requested output tensor contents into the appropriate output
-  buffers.
-
-* Optionally set parameters in the response.
-
-* Send the response using TRITONBACKEND_ResponseSend.
-
-* Release the request using TRITONBACKEND_RequestRelease.
-
-For a batch of requests the backend should attempt to combine the
-execution of the individual requests as much as possible to increase
-performance.
-
-##### Decoupled Responses
-
-It is also possible for a backend to send multiple responses for a
-request or not send any responses for a request. A backend may also
-send responses out-of-order relative to the order that the request
-batches are executed. Such backends are called *decoupled* backends.
-The decoupled backends use one `ResponseFactory` object per request to keep
-creating and sending any number of responses for the request. For this
-kind of backend, executing a single inference request typically requires
-the following steps:
-
-* For each request input tensor use TRITONBACKEND_InputProperties to
-  get shape and datatype of the input as well as the buffer(s)
-  containing the tensor contents.
-
-* Create a `ResponseFactory` object for the request using
-  TRITONBACKEND_ResponseFactoryNew.
-
-  1. Create a response from the `ResponseFactory` object using
-  TRITONBACKEND_ResponseNewFromFactory. As long as you have
-  `ResponseFactory` object you can continue creating responses.
-
-  2. For each output tensor which the request expects to be returned, use
-  TRITONBACKEND_ResponseOutput to create the output tensor of the
-  required datatype and shape. Use TRITONBACKEND_OutputBuffer to get a
-  pointer to the buffer where the tensor's contents should be written.
-
-  3. Use the inputs to perform the inference computation that produces
-  the requested output tensor contents into the appropriate output
-  buffers.
-
-  4. Optionally set parameters in the response.
-
-  5. Send the response using TRITONBACKEND_ResponseSend. If this is the
-     last request then use TRITONSERVER_ResponseCompleteFlag with
-     TRITONBACKEND_ResponseSend. Otherwise continue with Step 1 for
-     sending next request
-
-* Release the request using TRITONBACKEND_RequestRelease.
-
-###### Special Cases
-
-The decoupled API is powerful and supports various special cases:
-
-* If the backend should not send any response for the request,
-  TRITONBACKEND_ResponseFactorySendFlags can be used to send
-  TRITONSERVER_RESPONSE_COMPLETE_FINAL using the `ResponseFactory`.
-
-* The model can also send responses out-of-order in which it received
-  requests.
-
-* The backend can copy out the contents of the input buffer(s) if
-  request is to be released before the contents are completely
-  consumed to generate responses. After copy, the request can be
-  released anytime before exiting TRITONBACKEND_ModelInstanceExecute.
-  The copies and `ResponseFactory` object can be passed to a separate
-  thread in backend. This means main caller thread can exit from
-  TRITONBACKEND_ModelInstanceExecute and the backend can still continue
-  generating responses as long as it holds `ResponseFactory` object.
-
-
-The [repeat example](examples/README.md) demonstrates full power of
-what can be acheived from decoupled API.
-
-
-Study documentation of these TRTIONBACKEND_* functions in
-[tritonbackend.h](https://github.com/triton-inference-server/core/blob/main/include/triton/core/tritonbackend.h)
-for more details on these APIs. Read
-[Decoupled Backends and Models](https://github.com/triton-inference-server/server/blob/main/docs/user_guide/decoupled_models.md)
-for more details on how to host a decoupled model.
-
-## Build the Backend Utilities
-
-The source in this repo builds into a single "backend utilities"
-library that is useful when building backends. You don't need to use
-these utilities but they will be helpful for most backends.
-
-Typically you don't need to build this repo directly but instead you
-can include it in the build of your backend as is shown in the
-CMakeLists.txt files of the [tutorial examples](examples/README.md).
-
-To build and install in a local directory use the following commands.
-
-```
-$ mkdir build
-$ cd build
-$ cmake -DCMAKE_INSTALL_PREFIX:PATH=`pwd`/install ..
-$ make install
-```
-
-The following required Triton repositories will be pulled and used in
-the build. By default the "main" branch/tag will be used for each repo
-but the listed CMake argument can be used to override.
-
-* triton-inference-server/common: -DTRITON_COMMON_REPO_TAG=[tag]
-* triton-inference-server/core: -DTRITON_CORE_REPO_TAG=[tag]
-
-See the [CMakeLists.txt](CMakeLists.txt) file for other build options.
diff --git a/3rdparty/backend-r22.12/cmake/TritonBackendConfig.cmake.in b/3rdparty/backend-r22.12/cmake/TritonBackendConfig.cmake.in
deleted file mode 100644
index a0fdea4fe593ce4ca5310d9ea05d8ea0cf2f09fa..0000000000000000000000000000000000000000
--- a/3rdparty/backend-r22.12/cmake/TritonBackendConfig.cmake.in
+++ /dev/null
@@ -1,39 +0,0 @@
-# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-include(CMakeFindDependencyMacro)
-
-get_filename_component(
-  TRITONBACKEND_CMAKE_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH
-)
-
-list(APPEND CMAKE_MODULE_PATH ${TRITONBACKEND_CMAKE_DIR})
-
-if(NOT TARGET TritonBackend::triton-backend-utils)
-  include("${TRITONBACKEND_CMAKE_DIR}/TritonBackendTargets.cmake")
-endif()
-
-set(TRITONBACKEND_LIBRARIES TritonBackend::triton-backend-utils)
diff --git a/3rdparty/backend-r22.12/docs/backend_platform_support_matrix.md b/3rdparty/backend-r22.12/docs/backend_platform_support_matrix.md
deleted file mode 100644
index 58341c172d0d054d59c8c6c8d9e8f4c6cacc3c54..0000000000000000000000000000000000000000
--- a/3rdparty/backend-r22.12/docs/backend_platform_support_matrix.md
+++ /dev/null
@@ -1,99 +0,0 @@
-<!--
-# Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--->
-
-# Backend-Platform Support Matrix
-
-Even though Triton supports inference across various platforms such as
-cloud, data center, edge and embedded devices on NVIDIA GPUs, x86 and
-ARM CPU, or AWS Inferentia, it does so by relying on the backends.
-Note that not all Triton backends support every platform. The purpose
-of this document is to go over what all compute platforms are supported
-by each of these Triton backends.
-GPU in this document refers to Nvidia GPU. See
-[GPU, Driver, and CUDA Support Matrix](https://docs.nvidia.com/deeplearning/frameworks/support-matrix/index.html)
-to learn more about supported GPUs.
-
-## Ubuntu 20.04
-
-The table below describes target device(s) supported for inference by
-each backend on different platforms.
-
-| Backend      | x86       | ARM-SBSA      |
-| ------------ | --------- | ------------- |
-| TensorRT     |  :heavy_check_mark: GPU <br/> :x: CPU | :heavy_check_mark: GPU <br/> :x: CPU       |
-| ONNX Runtime |  :heavy_check_mark: GPU <br/> :heavy_check_mark: CPU  |   :heavy_check_mark: GPU <br/> :heavy_check_mark: CPU      |
-| TensorFlow   |  :heavy_check_mark: GPU <br/> :heavy_check_mark: CPU  |   :heavy_check_mark: GPU <br/> :heavy_check_mark: CPU      |
-| PyTorch      |  :heavy_check_mark: GPU <br/> :heavy_check_mark: CPU  |   :heavy_check_mark: GPU <br/> :heavy_check_mark: CPU      |
-| OpenVINO     |  :x: GPU <br/> :heavy_check_mark: CPU    |     :x: GPU <br/> :x: CPU       |
-| Python[^1]   |  :heavy_check_mark: GPU <br/> :heavy_check_mark: CPU  |  :heavy_check_mark: GPU <br/> :heavy_check_mark: CPU  |
-| DALI         |  :heavy_check_mark: GPU <br/> :heavy_check_mark: CPU  | :heavy_check_mark: GPU[^2] <br/> :heavy_check_mark: CPU[^2] |
-| FIL          |  :heavy_check_mark: GPU <br/> :heavy_check_mark: CPU  |  Unsupported  |
-
-
-
-## Windows 10
-
-Only TensorRT and ONNX Runtime backends are supported on Windows.
-
-| Backend      | x86       | ARM-SBSA      |
-| ------------ | --------- | ------------- |
-| TensorRT     |  :heavy_check_mark: GPU <br/> :x: CPU | :heavy_check_mark: GPU <br/> :x: CPU       |
-| ONNX Runtime |  :heavy_check_mark: GPU <br/> :heavy_check_mark: CPU  |   :heavy_check_mark: GPU <br/> :heavy_check_mark: CPU      |
-
-
-## Jetson JetPack
-
-Following backends are currently supported on Jetson Jetpack:
-
-| Backend      |   Jetson  |
-| ------------ | --------- |
-| TensorRT     |  :heavy_check_mark: GPU <br/> :x: CPU    |
-| ONNX Runtime |  :heavy_check_mark: GPU <br/> :heavy_check_mark: CPU  |   :heavy_check_mark: GPU <br/> :heavy_check_mark: CPU  |
-| TensorFlow   |  :heavy_check_mark: GPU <br/> :heavy_check_mark: CPU  |   :heavy_check_mark: GPU <br/> :heavy_check_mark: CPU  |
-| PyTorch      |  :heavy_check_mark: GPU <br/> :heavy_check_mark: CPU  |   :heavy_check_mark: GPU <br/> :heavy_check_mark: CPU  |
-| Python[^1]   |  :x: GPU <br/> :heavy_check_mark: CPU    |
-
-
-Look at the [Triton Inference Server Support for Jetson and JetPack](https://github.com/triton-inference-server/server/blob/main/docs/user_guide/jetson.md).
-
-
-## AWS Inferentia
-
-Currently, inference on AWS Inferentia is only supported via
-[python backend](https://github.com/triton-inference-server/python_backend#running-with-inferentia)
-where the deployed python script invokes AWS Neuron SDK.
-
-
-[^1]: The supported devices for Python Backend are mentioned with
-respect to Triton. The python script running in Python Backend can
-be used to execute inference on any hardware if there are available
-python APIs to do so. AWS inferentia is one such example. Triton
-core is largely unaware of the fact that inference will run on
-Inferentia.
-
-[^2]: In case of ARM-SBSA, some operations are not fully supported.
diff --git a/3rdparty/backend-r22.12/examples/README.md b/3rdparty/backend-r22.12/examples/README.md
deleted file mode 100644
index e0ddc4cb70fe2b723daa1529ce97ce3f0aa7e7b3..0000000000000000000000000000000000000000
--- a/3rdparty/backend-r22.12/examples/README.md
+++ /dev/null
@@ -1,460 +0,0 @@
-<!--
-# Copyright 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--->
-
-[![License](https://img.shields.io/badge/License-BSD3-lightgrey.svg)](https://opensource.org/licenses/BSD-3-Clause)
-
-# Triton Example Backends
-
-To learn how to create a Triton backend, and to see a best-practices
-baseline onto which you can add your own backend log, follow the
-[Tutorial](#tutorial).
-
-Triton also provides a couple of example backends that demonstrate
-specific aspects of the backend API not covered by the
-[Tutorial](#tutorial).
-
-* The
-[*repeat*](https://github.com/triton-inference-server/repeat_backend)
-backend shows a more advanced example of how a backend can produce
-multiple responses per request.
-
-* The
-[*stateful*](https://github.com/triton-inference-server/stateful_backend)
-backend shows an example of how a backend can manage model state
-tensors on the server-side for the [sequence
-batcher](https://github.com/triton-inference-server/server/blob/main/docs/user_guide/model_configuration.md#sequence-batcher)
-to avoid transferring state tensors between client and server. Triton
-also implements [Implicit State
-Management](https://github.com/triton-inference-server/server/blob/main/docs/user_guide/architecture.md#implicit-state-management)
-which allows backends to behave in a stateless manner and leave the
-state management to Triton.
-
-## Tutorial
-
-The [Triton Backend API](../README.md#triton-backend-api) exposes a
-large number of features. The backend utilities and classes provide
-many functions commonly used when creating a backend. But to create a
-functional backend it is not necessary to use most of the backend API
-or utilities. The tutorial starts with an implementation that shows a
-*minimal* backend and then adds on recommended and optional
-enhancements. The tutorial implementations follow best practices for
-Triton backends and so can be used as templates for your own backend.
-
-### *Minimal* Triton Backend
-
-The source code for the *minimal* backend is contained in
-[minimal.cc](backends/minimal/src/minimal.cc). The source code
-contains extensive documentation describing the operation of the
-backend and the use of the [Triton Backend
-API](../README.md#triton-backend-api) and the backend
-utilities. Before reading the source code, make sure you understand
-the concepts associated with Triton backend abstractions
-[TRITONBACKEND_Backend](../README.md#tritonbackend_backend),
-[TRITONBACKEND_Model](../README.md#tritonbackend_model), and
-[TRITONBACKEND_ModelInstance](../README.md#tritonbackend_modelinstance).
-
-The *minimal* backend does not do any interesting operation, it simply
-copies a single input tensor to a single output tensor, but it does
-demonstrate the basic organization required for a Triton backend.
-
-The *minimal* backend is complete but for clarity leaves out some
-important aspects of writing a full-featured backend that are
-described in [*Recommended* Triton
-Backend](#recommended-triton-backend). When creating your own backend
-use the [*Recommended* Triton Backend](#recommended-triton-backend) as
-a starting point.
-
-#### Building the *Minimal* Backend
-
-[backends/minimal/CMakeLists.txt](backends/minimal/CMakeLists.txt)
-shows the recommended build and install script for a Triton
-backend. To build the *minimal* backend and install in a local directory
-use the following commands.
-
-```
-$ cd backends/minimal
-$ mkdir build
-$ cd build
-$ cmake -DCMAKE_INSTALL_PREFIX:PATH=`pwd`/install ..
-$ make install
-```
-
-The following required Triton repositories will be pulled and used in
-the build. By default the "main" branch/tag will be used for each repo
-but the listed CMake argument can be used to override.
-
-* triton-inference-server/backend: -DTRITON_BACKEND_REPO_TAG=[tag]
-* triton-inference-server/core: -DTRITON_CORE_REPO_TAG=[tag]
-* triton-inference-server/common: -DTRITON_COMMON_REPO_TAG=[tag]
-
-If you are building on a release branch (or on a development branch
-that is based off of a release branch), then you must set these cmake
-arguments to point to that release branch as well. For example, if you
-are building the r21.10 identity_backend branch then you need to use
-the following additional cmake flags:
-
-```
--DTRITON_BACKEND_REPO_TAG=r21.10
--DTRITON_CORE_REPO_TAG=r21.10
--DTRITON_COMMON_REPO_TAG=r21.10
-```
-
-After building the install directory will contain a backends/minimal
-directory that contains the *minimal* backend. Instructions for adding
-this backend to the Triton server are described in [Backend Shared
-Library](../README.md#backend-shared-library).
-
-#### Running Triton with the *Minimal* Backend
-
-After adding the *minimal* backend to the Triton server as described
-in [Backend Shared Library](../README.md#backend-shared-library), you
-can run Triton and have it load the models in
-[model_repos/minimal_models](model_repos/minimal_models). Assuming you
-have created a *tritonserver* Docker image by adding the *minimal*
-backend to Triton, the following command will run Triton:
-
-```
-$ docker run --rm -it --net=host -v/path/to/model_repos/minimal_models:/models tritonserver --model-repository=/models
-```
-
-The console output will show similar to the following indicating that
-the *batching* and *nonbatching* models from the minimal_models
-repository have loaded correctly. Note that the model repository has
-two models that both use the *minimal* backend. A backend can support
-any number of diffent models.
-
-```
-I1215 23:46:00.250284 68 server.cc:589]
-+-------------+---------+--------+
-| Model       | Version | Status |
-+-------------+---------+--------+
-| batching    | 1       | READY  |
-| nonbatching | 1       | READY  |
-+-------------+---------+--------+
-```
-
-The models are identical except that the *batching* model enabled the
-[dynamic
-batcher](https://github.com/triton-inference-server/server/blob/main/docs/user_guide/model_configuration.md#dynamic-batcher)
-and supports batch sizes up to 8. Note that the *batching* model sets
-the [batch
-delay](https://github.com/triton-inference-server/server/blob/main/docs/user_guide/model_configuration.md#delayed-batching)
-to 5 seconds so that the example client described below can
-demonstrate how the *minimal* backend receives a batch of requests.
-
-#### Testing the *Minimal* Backend
-
-The [clients](clients) directory holds example clients. The
-[minimal_client](clients/minimal_client) Python script demonstrates
-sending a couple of inference requests to the *minimal* backend. With
-Triton running as described in [Running Triton with the *Minimal*
-Backend](#running-triton-with-the-minimal-backend), execute the
-client:
-
-```
-$ clients/minimal_client
-```
-
-The minimal_client first sends a single request to nonbatching
-model. From the output you can see that the input value is returned in
-the output.
-
-```
-=========
-Sending request to nonbatching model: IN0 = [1 2 3 4]
-Response: {'model_name': 'nonbatching', 'model_version': '1', 'outputs': [{'name': 'OUT0', 'datatype': 'INT32', 'shape': [4], 'parameters': {'binary_data_size': 16}}]}
-OUT0 = [1 2 3 4]
-```
-
-In the Triton console output you can see the log message printed by
-the *minimal* backend that indicates that it received a batch
-containing the single request.
-
-```
-I1221 18:14:12.964836 86 minimal.cc:348] model nonbatching: requests in batch 1
-I1221 18:14:12.964857 86 minimal.cc:356] batched IN0 value: [ 1, 2, 3, 4 ]
-```
-
-The minimal_client next sends 2 requests at the same time to the
-batching model. Triton will dynamically batch those requests into a
-single batch and send that single batch to the *minimal* backend.
-
-```
-=========
-Sending request to batching model: IN0 = [[10 11 12 13]]
-Sending request to batching model: IN0 = [[20 21 22 23]]
-Response: {'model_name': 'batching', 'model_version': '1', 'outputs': [{'name': 'OUT0', 'datatype': 'INT32', 'shape': [1, 4], 'parameters': {'binary_data_size': 16}}]}
-OUT0 = [[10 11 12 13]]
-Response: {'model_name': 'batching', 'model_version': '1', 'outputs': [{'name': 'OUT0', 'datatype': 'INT32', 'shape': [1, 4], 'parameters': {'binary_data_size': 16}}]}
-OUT0 = [[20 21 22 23]]
-```
-
-In the Triton console output you can see the log message indicating
-that the *minimal* backend received a batch containing both requests.
-
-```
-I1221 18:14:17.965982 86 minimal.cc:348] model batching: requests in batch 2
-I1221 18:14:17.966035 86 minimal.cc:356] batched IN0 value: [ 10, 11, 12, 13, 20, 21, 22, 23 ]
-```
-
-### *Recommended* Triton Backend
-
-The source code for the *recommended* backend is contained in
-[recommended.cc](backends/recommended/src/recommended.cc). The source
-code contains extensive documentation describing the operation of the
-backend and the use of the [Triton Backend
-API](../README.md#triton-backend-api) and the backend
-utilities. Before reading the source code, make sure you understand
-the concepts associated with Triton backend abstractions
-[TRITONBACKEND_Backend](../README.md#tritonbackend_backend),
-[TRITONBACKEND_Model](../README.md#tritonbackend_model), and
-[TRITONBACKEND_ModelInstance](../README.md#tritonbackend_modelinstance).
-
-The *recommended* backend improves the [*minimal*
-backend](#minimal-triton-backend) to include the following features
-which should be present in any robust backend implementation:
-
-* Enhances the backend to support models with input/output tensors
-  that have datatypes other than INT32.
-
-* Enhances the backend to support models with input/output tensors
-  that have any shape.
-
-* Uses the Triton backend metric APIs to record statistics about
-  requests executing in the backend. These metrics can then we queried
-  using the Triton
-  [metrics](https://github.com/triton-inference-server/server/blob/main/docs/user_guide/metrics.md)
-  and
-  [statistics](https://github.com/triton-inference-server/server/blob/main/docs/protocol/extension_statistics.md)
-  APIs.
-
-* Additional error checking to ensure that the backend's version is
-  compatible with Triton and that each model's configuration is
-  compatible with the backend.
-
-As with the *minimal* backend, the *recommended* backend just returns
-the input tensor value in the output tensor. Because of the additions
-described above, the *recommended* backend can serve as a starting
-point for your backend.
-
-#### Building the *Recommended* Backend
-
-[backends/recommended/CMakeLists.txt](backends/recommended/CMakeLists.txt)
-shows the recommended build and install script for a Triton
-backend. Building and installing is the same as decribed in [Building
-the *Minimal* Backend](#building-the-minimal-backend).
-
-#### Running Triton with the *Recommended* Backend
-
-After adding the *recommended* backend to the Triton server as
-described in [Backend Shared
-Library](../README.md#backend-shared-library), you can run Triton and
-have it load the models in
-[model_repos/recommended_models](model_repos/recommended_models). Assuming
-you have created a *tritonserver* Docker image by adding the
-*recommended* backend to Triton, the following command will run
-Triton:
-
-```
-$ docker run --rm -it --net=host -v/path/to/model_repos/recommended_models:/models tritonserver --model-repository=/models
-```
-
-The console output will show similar to the following indicating that
-the *batching* model from the recommended_models repository have
-loaded correctly.
-
-```
-I1215 23:46:00.250284 68 server.cc:589]
-+-------------+---------+--------+
-| Model       | Version | Status |
-+-------------+---------+--------+
-| batching    | 1       | READY  |
-+-------------+---------+--------+
-```
-
-#### Testing the *Recommended* Backend
-
-The [clients](clients) directory holds example clients. The
-[recommended_client](clients/recommended_client) Python script
-demonstrates sending a couple of inference requests to the
-*recommended* backend. With Triton running as described in [Running
-Triton with the *Recommended*
-Backend](#running-triton-with-the-recommended-backend), execute the
-client:
-
-```
-$ clients/recommended_client
-```
-
-The recommended_client next sends 2 requests at the same time to the
-batching model, similar to what was done above with the *minimal*
-backend. Triton will dynamically batch those requests into a single
-batch and send that single batch to the *recommended* backend. In this
-model, batching is supported, the datatype is FP32 and the tensor
-shape is [ -1, 4, 4 ].
-
-```
-=========
-Sending request to batching model: input = [[[1.  1.1 1.2 1.3]
-  [2.  2.1 2.2 2.3]
-  [3.  3.1 3.2 3.3]
-  [4.  4.1 4.2 4.3]]]
-Sending request to batching model: input = [[[10.  10.1 10.2 10.3]
-  [20.  20.1 20.2 20.3]
-  [30.  30.1 30.2 30.3]
-  [40.  40.1 40.2 40.3]]]
-Response: {'model_name': 'batching', 'model_version': '1', 'outputs': [{'name': 'OUTPUT', 'datatype': 'FP32', 'shape': [1, 4, 4], 'parameters': {'binary_data_size': 64}}]}
-OUTPUT = [[[1.  1.1 1.2 1.3]
-  [2.  2.1 2.2 2.3]
-  [3.  3.1 3.2 3.3]
-  [4.  4.1 4.2 4.3]]]
-Response: {'model_name': 'batching', 'model_version': '1', 'outputs': [{'name': 'OUTPUT', 'datatype': 'FP32', 'shape': [1, 4, 4], 'parameters': {'binary_data_size': 64}}]}
-OUTPUT = [[[10.  10.1 10.2 10.3]
-  [20.  20.1 20.2 20.3]
-  [30.  30.1 30.2 30.3]
-  [40.  40.1 40.2 40.3]]]
-```
-
-In the Triton console output you can see the log message indicating
-that the *recommended* backend received a batch containing both
-requests.
-
-```
-I1221 18:30:52.223226 127 recommended.cc:604] model batching: requests in batch 2
-I1221 18:30:52.223313 127 recommended.cc:613] batched INPUT value: [ 1.000000, 1.100000, 1.200000, 1.300000, 2.000000, 2.100000, 2.200000, 2.300000, 3.000000, 3.100000, 3.200000, 3.300000, 4.000000, 4.100000, 4.200000, 4.300000, 10.000000, 10.100000, 10.200000, 10.300000, 20.000000, 20.100000, 20.200001, 20.299999, 30.000000, 30.100000, 30.200001, 30.299999, 40.000000, 40.099998, 40.200001, 40.299999 ]
-```
-
-Because the *recommended* backend can support models that have
-input/output tensors with any datatype and shape, you can edit the
-model configuration and the client to experiment with these options.
-
-To see the metrics collected for these two inference requests, use the following command to access Triton's metrics endpoint.
-
-```
-$ curl localhost:8002/metrics
-```
-
-The output will be metric values in Prometheus data format. The
-[metrics
-documentation](https://github.com/triton-inference-server/server/blob/main/docs/user_guide/metrics.md)
-gives a description of these metric values.
-
-```
-# HELP nv_inference_request_success Number of successful inference requests, all batch sizes
-# TYPE nv_inference_request_success counter
-nv_inference_request_success{model="batching",version="1"} 2.000000
-# HELP nv_inference_request_failure Number of failed inference requests, all batch sizes
-# TYPE nv_inference_request_failure counter
-nv_inference_request_failure{model="batching",version="1"} 0.000000
-# HELP nv_inference_count Number of inferences performed
-# TYPE nv_inference_count counter
-nv_inference_count{model="batching",version="1"} 2.000000
-# HELP nv_inference_exec_count Number of model executions performed
-# TYPE nv_inference_exec_count counter
-nv_inference_exec_count{model="batching",version="1"} 1.000000
-...
-```
-
-You can also see the collected statistics using the [statistics
-endpoint](https://github.com/triton-inference-server/server/blob/main/docs/protocol/extension_statistics.md).
-
-```
-$ curl localhost:8000/v2/models/batching/stats
-{"model_stats":[{"name":"batching","version":"1","last_inference":1640111452223,"inference_count":2,"execution_count":1,"inference_stats":{"success":{"count":2,"ns":9997025869},"fail":{"count":0,"ns":0},"queue":{"count":2,"ns":9996491319},"compute_input":{"count":2,"ns":95288},"compute_infer":{"count":2,"ns":232202},"compute_output":{"count":2,"ns":195850}},"batch_stats":[{"batch_size":2,"compute_input":{"count":1,"ns":47644},"compute_infer":{"count":1,"ns":116101},"compute_output":{"count":1,"ns":97925}}]}]}
-```
-
-### *BLS* Triton Backend
-
-Please see the [doucumentation](backends/bls/README.md) of *BLS* Backend.
-
-### Enhancements
-
-This section describes several optional features that you can add to
-enhance the capabilities of your backend.
-
-#### Automatically Model Configuration Generation
-
-[Automatic model configuration
-generation](https://github.com/triton-inference-server/server/blob/main/docs/user_guide/model_configuration.md#auto-generated-model-configuration)
-is enabled by the backend implementing the appropriate logic (for
-example, in a function called AutoCompleteConfig) during
-TRITONBACKEND_ModelInitialize. For the *recommended* backend you would
-add a call to AutoCompleteConfig in the ModelState constructor just
-before the call to ValidateModelConfig. The AutoCompleteConfig
-function can update the model configuration with input tensor, output
-tensor, and max-batch-size configuration; and then update the
-configuration using TRITONBACKEND_ModelSetConfig. Examples can be
-found in [ONNXRuntime
-backend](https://github.com/triton-inference-server/onnxruntime_backend),
-[TensorFlow
-backend](https://github.com/triton-inference-server/tensorflow_backend)
-and other backends.
-
-#### Add Key-Value Parameters to a Response
-
-A backend can add a key-value pair to a response any time after the
-response is created and before it is sent. The parameter key must be a
-string and the parameter value can be a string, integer or
-boolean. The following example shows the TRITONBACKEND API used to set
-response parameters. Error checking code is not shown to improve
-clarity.
-
-```
-TRITONBACKEND_ResponseSetStringParameter(response, "param0", "an example string parameter");
-TRITONBACKEND_ResponseSetIntParameter(responses[r], "param1", 42);
-TRITONBACKEND_ResponseSetBoolParameter(responses[r], "param2", false);
-```
-
-#### Access Model Artifacts in the Model Repository
-
-A backend can access any of the files in a model's area of the model
-registry. These files are typically needed during
-TRITONBACKEND_ModelInitialize but can be accessed at other times as
-well. The TRITONBACKEND_ModelRepository API gives the location of the
-model's repository. For example, the following code can be run during
-TRITONBACKEND_ModelInitialize to write the location to the log.
-
-```
-// Can get location of the model artifacts. Normally we would need
-// to check the artifact type to make sure it was something we can
-// handle... but we are just going to log the location so we don't
-// need the check. We would use the location if we wanted to load
-// something from the model's repo.
-TRITONBACKEND_ArtifactType artifact_type;
-const char* clocation;
-RETURN_IF_ERROR(
-    TRITONBACKEND_ModelRepository(model, &artifact_type, &clocation));
-LOG_MESSAGE(
-    TRITONSERVER_LOG_INFO,
-    (std::string("Repository location: ") + clocation).c_str());
-```
-
-The framework backends (for example, TensorRT, ONNXRuntime,
-TensorFlow, PyTorch) read the actual model file from the model
-repository using this API. See those backends for examples of how it
-can be used.
diff --git a/3rdparty/backend-r22.12/examples/backends/bls/README.md b/3rdparty/backend-r22.12/examples/backends/bls/README.md
deleted file mode 100644
index eae8390cad8bfdb0e4876ae460b30bedc5c0cfd1..0000000000000000000000000000000000000000
--- a/3rdparty/backend-r22.12/examples/backends/bls/README.md
+++ /dev/null
@@ -1,135 +0,0 @@
-<!--
-# Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--->
-
-# *BLS* Triton Backend
-
-The [*BLS*](../bls) backend demonstrates using in-process C-API to
-execute inferences within the backend. This backend serves as an example to
-backend developers for implementing their own custom pipeline in C++.
-For Python use cases, please refer to 
-[Business Logic Scripting](https://github.com/triton-inference-server/python_backend/blob/main/README.md#business-logic-scripting)
-section in Python backend.
-
-The source code for the *bls* backend is contained in
-[src](./src).
-
-* [backend.cc](./src/backend.cc) contains the main backend
-implementation. The content of this file is not BLS specific. It only includes
-the required Triton backend functions that is standard for any backend
-implementation. The BLS logic is set off in the
-`TRITONBACKEND_ModelInstanceExecute` with lines `bls_executor.Execute(requests[r], &responses[r]);`.
-
-* [bls.h](./src/bls.h) is where the BLS (class `BLSExecutor`) of
-this example is located. You can refer to this file to see how to interact with
-Triton in-process C-API to build the custom execution pipeline.
-
-* [bls_utils.h](./src/bls_utils.h) is where all the utilities that
-are not BLS dependent are located.
-
-The source code contains extensive documentation describing the operation of
-the backend and the use of the
-[Triton Backend API](../../../README.md#triton-backend-api) and the
-[Triton Server API](https://github.com/triton-inference-server/server/blob/main/docs/customization_guide/inference_protocols.md#in-process-triton-server-api).
-Before reading the source code, make sure you understand
-the concepts associated with Triton backend abstractions
-[TRITONBACKEND_Backend](../../../README.md#tritonbackend_backend),
-[TRITONBACKEND_Model](../../../README.md#tritonbackend_model), and
-[TRITONBACKEND_ModelInstance](../../../README.md#tritonbackend_modelinstance).
-
-The *bls* backend will send two requests on the 'addsub_python' and 'addsub_tf'
-models. After the inference requests are completed, this backend will extract
-OUTPUT0 from the 'addsub_python' and OUTPUT1 from the 'addsub_tf' model to
-construct the final inference response object using these tensors.
-
-There are some self-imposed limitations that were made for the simplicity of
-this example:
-1. This backend does not support batching.
-2. This backend does not support decoupled models.
-3. This backend does not support GPU tensors.
-4. The model configuraion should be strictly set as the comments described in
-[backend.cc](./src/backend.cc).
-
-You can implement your custom backend that is not limited to the limitations
-mentioned above.
-
-## Building the *BLS* Backend
-
-[backends/bls/CMakeLists.txt](CMakeLists.txt)
-shows the recommended build and install script for a Triton
-backend. Building and installing is the same as decribed in [Building
-the *Minimal* Backend](../../README.md#building-the-minimal-backend).
-
-## Running Triton with the *BLS* Backend
-
-After adding the *bls* backend to the Triton server as
-described in [Backend Shared
-Library](../../../README.md#backend-shared-library), you can run Triton and
-have it load the models in
-[model_repos/bls_models](../../model_repos/bls_models). Assuming you have created a
-*tritonserver* Docker image by adding the *bls* backend to Triton, the
-following command will run Triton:
-
-```
-$ docker run --rm -it --net=host -v/path/to/model_repos/bls_models:/models tritonserver --model-repository=/models
-```
-
-The console output will show similar to the following indicating that
-the *bls_fp32*, *addsub_python* and *addsub_tf* models from the bls_models repository have
-loaded correctly.
-
-```
-I0616 09:34:47.767433 19214 server.cc:629] 
-+---------------+---------+--------+
-| Model         | Version | Status |
-+---------------+---------+--------+
-| addsub_python | 1       | READY  |
-| addsub_tf     | 1       | READY  |
-| bls_fp32      | 1       | READY  |
-+---------------+---------+--------+
-```
-
-## Testing the *BLS* Backend
-
-The [clients](../../clients) directory holds example clients. The
-[bls_client](../../clients/bls_client) Python script demonstrates sending an
-inference requests to the *bls* backend. With Triton running as
-described in [Running Triton with the *BLS* Backend](#running-triton-with-the-bls-backend),
-execute the client:
-
-```
-$ clients/bls_client
-```
-
-You should see an output similar to the output below:
-
-```
-INPUT0 ([0.42935285 0.51512766 0.43625894 ... 0.6670954  0.17747518 0.7976901 ]) + INPUT1 ([6.7752063e-01 2.4223252e-01 6.7743927e-01 ... 4.1531715e-01 2.5451833e-01 7.9097062e-01]) = OUTPUT0 ([1.1068735  0.75736016 1.1136982 ... 1.0824126  0.4319935  1.5886607 ])
-INPUT0 ([0.42935285 0.51512766 0.43625894 ... 0.6670954  0.17747518 0.7976901 ]) - INPUT1 ([6.7752063e-01 2.4223252e-01 6.7743927e-01 ... 4.1531715e-01 2.5451833e-01 7.9097062e-01]) = OUTPUT1 ([-0.24816778  0.27289516 -0.24118033 ... 0.25177827 -0.07704315  0.00671947])
-
-PASS
-```
diff --git a/3rdparty/backend-r22.12/examples/backends/bls/cmake/TritonBLSBackendConfig.cmake.in b/3rdparty/backend-r22.12/examples/backends/bls/cmake/TritonBLSBackendConfig.cmake.in
deleted file mode 100644
index dd41ae7aeb9cb34277f7b49bf15e00d5fd1fc007..0000000000000000000000000000000000000000
--- a/3rdparty/backend-r22.12/examples/backends/bls/cmake/TritonBLSBackendConfig.cmake.in
+++ /dev/null
@@ -1,39 +0,0 @@
-# Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-include(CMakeFindDependencyMacro)
-
-get_filename_component(
-  TRITONBLSBACKEND_CMAKE_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH
-)
-
-list(APPEND CMAKE_MODULE_PATH ${TRITONBLSBACKEND_CMAKE_DIR})
-
-if(NOT TARGET TritonBLSBackend::triton-bls-backend)
-  include("${TRITONBLSBACKEND_CMAKE_DIR}/TritonBLSBackendTargets.cmake")
-endif()
-
-set(TRITONBLSBACKEND_LIBRARIES TritonBLSBackend::triton-bls-backend)
diff --git a/3rdparty/backend-r22.12/examples/backends/bls/src/backend.cc b/3rdparty/backend-r22.12/examples/backends/bls/src/backend.cc
deleted file mode 100644
index 66f1c17a13506f53a96294a0ff0f11aead5d7d95..0000000000000000000000000000000000000000
--- a/3rdparty/backend-r22.12/examples/backends/bls/src/backend.cc
+++ /dev/null
@@ -1,526 +0,0 @@
-// Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "bls.h"
-#include "triton/backend/backend_model.h"
-#include "triton/backend/backend_model_instance.h"
-
-//
-// Backend that demonstrates using in-process C-API to execute inferences
-// within the backend.
-//
-// Two particular models, 'addsub_python' and 'addsub_tf', must be loaded on
-// the server for a successful inference execution on this backend.
-//
-// The model configuration should be set as follows in order to be in line with
-// the 'addsub_python' and 'addsub_tf' models. This backend does not support
-// batching. These limitations are only for this specific backend. You can
-// implement your custom BLS backend with less limitations.
-//
-// Model Configuration:
-//   - Input 'INPUT0' must have shape [16] and datatype must be TYPE_FP32.
-//
-//   - Input 'INPUT1' must have shape [16] and datatype must be TYPE_FP32.
-//
-//   - For each response, output 'OUTPUT0' must have shape [16] and
-//     datatype TYPE_FP32.
-//
-//   - For each response, output 'OUTPUT1' must have shape [16] and
-//     datatype TYPE_FP32.
-//
-// This backend will send two requests on the 'addsub_python' and 'addsub_tf'
-// models. After the inference requests are completed, this backend
-// will extract OUTPUT0 from the 'addsub_python' and OUTPUT1 from the
-// 'addsub_tf' model to construct the final inference response object using
-// these tensors.
-
-namespace triton { namespace backend { namespace bls {
-
-//
-// ModelState
-//
-// State associated with a model that is using this backend. An object
-// of this class is created and associated with each
-// TRITONBACKEND_Model.
-//
-class ModelState : public BackendModel {
- public:
-  static TRITONSERVER_Error* Create(
-      TRITONBACKEND_Model* triton_model, ModelState** state);
-  virtual ~ModelState() = default;
-
-  // Validate that model configuration is supported by this backend.
-  TRITONSERVER_Error* ValidateModelConfig();
-
- private:
-  ModelState(TRITONBACKEND_Model* triton_model) : BackendModel(triton_model) {}
-};
-
-TRITONSERVER_Error*
-ModelState::Create(TRITONBACKEND_Model* triton_model, ModelState** state)
-{
-  try {
-    *state = new ModelState(triton_model);
-  }
-  catch (const BackendModelException& ex) {
-    RETURN_ERROR_IF_TRUE(
-        ex.err_ == nullptr, TRITONSERVER_ERROR_INTERNAL,
-        std::string("unexpected nullptr in BackendModelException"));
-    RETURN_IF_ERROR(ex.err_);
-  }
-
-  return nullptr;  // success
-}
-
-TRITONSERVER_Error*
-ModelState::ValidateModelConfig()
-{
-  // We have the json DOM for the model configuration...
-  common::TritonJson::WriteBuffer buffer;
-  RETURN_IF_ERROR(model_config_.PrettyWrite(&buffer));
-  LOG_MESSAGE(
-      TRITONSERVER_LOG_INFO,
-      (std::string("model configuration:\n") + buffer.Contents()).c_str());
-
-  // max_batch_size must be 0 because this backend does not support
-  // batching
-  int64_t max_batch_size;
-  RETURN_IF_ERROR(model_config_.MemberAsInt("max_batch_size", &max_batch_size));
-  RETURN_ERROR_IF_FALSE(
-      max_batch_size == 0, TRITONSERVER_ERROR_INVALID_ARG,
-      std::string("bls backend only supports models with max_batch_size == 0"));
-
-  common::TritonJson::Value inputs, outputs;
-  RETURN_IF_ERROR(model_config_.MemberAsArray("input", &inputs));
-  RETURN_IF_ERROR(model_config_.MemberAsArray("output", &outputs));
-
-  // There must be 2 inputs and 2 outputs.
-  RETURN_ERROR_IF_FALSE(
-      inputs.ArraySize() == 2, TRITONSERVER_ERROR_INVALID_ARG,
-      std::string("expected 2 inputs, got ") +
-          std::to_string(inputs.ArraySize()));
-  RETURN_ERROR_IF_FALSE(
-      outputs.ArraySize() == 2, TRITONSERVER_ERROR_INVALID_ARG,
-      std::string("expected 2 outputs, got ") +
-          std::to_string(outputs.ArraySize()));
-
-  // Here we rely on the model configuation listing the inputs and
-  // outputs in a specific order, which we shouldn't really require...
-  common::TritonJson::Value input0, input1, output0, output1;
-  RETURN_IF_ERROR(inputs.IndexAsObject(0, &input0));
-  RETURN_IF_ERROR(inputs.IndexAsObject(1, &input1));
-  RETURN_IF_ERROR(outputs.IndexAsObject(0, &output0));
-  RETURN_IF_ERROR(outputs.IndexAsObject(1, &output1));
-
-  // Check tensor names
-  std::string in0_name, in1_name, out0_name, out1_name;
-  RETURN_IF_ERROR(input0.MemberAsString("name", &in0_name));
-  RETURN_IF_ERROR(input1.MemberAsString("name", &in1_name));
-  RETURN_IF_ERROR(output0.MemberAsString("name", &out0_name));
-  RETURN_IF_ERROR(output1.MemberAsString("name", &out1_name));
-
-  RETURN_ERROR_IF_FALSE(
-      in0_name == "INPUT0", TRITONSERVER_ERROR_INVALID_ARG,
-      std::string("expected first input tensor name to be INPUT0, got ") +
-          in0_name);
-  RETURN_ERROR_IF_FALSE(
-      in1_name == "INPUT1", TRITONSERVER_ERROR_INVALID_ARG,
-      std::string("expected second input tensor name to be INPUT1, got ") +
-          in1_name);
-  RETURN_ERROR_IF_FALSE(
-      out0_name == "OUTPUT0", TRITONSERVER_ERROR_INVALID_ARG,
-      std::string("expected first output tensor name to be OUTPUT0, got ") +
-          out0_name);
-  RETURN_ERROR_IF_FALSE(
-      out1_name == "OUTPUT1", TRITONSERVER_ERROR_INVALID_ARG,
-      std::string("expected second output tensor name to be OUTPUT1, got ") +
-          out1_name);
-
-  // Check shapes
-  std::vector<int64_t> in0_shape, in1_shape, out0_shape, out1_shape;
-  RETURN_IF_ERROR(backend::ParseShape(input0, "dims", &in0_shape));
-  RETURN_IF_ERROR(backend::ParseShape(input1, "dims", &in1_shape));
-  RETURN_IF_ERROR(backend::ParseShape(output0, "dims", &out0_shape));
-  RETURN_IF_ERROR(backend::ParseShape(output1, "dims", &out1_shape));
-
-  RETURN_ERROR_IF_FALSE(
-      in0_shape.size() == 1, TRITONSERVER_ERROR_INVALID_ARG,
-      std::string("expected INPUT0 shape to have one dimension, got ") +
-          backend::ShapeToString(in0_shape));
-  RETURN_ERROR_IF_FALSE(
-      in1_shape.size() == 1, TRITONSERVER_ERROR_INVALID_ARG,
-      std::string("expected INPUT1 shape to have one dimension, got ") +
-          backend::ShapeToString(in1_shape));
-  RETURN_ERROR_IF_FALSE(
-      out0_shape.size() == 1, TRITONSERVER_ERROR_INVALID_ARG,
-      std::string("expected OUTPUT0 shape to have one dimension, got ") +
-          backend::ShapeToString(out0_shape));
-  RETURN_ERROR_IF_FALSE(
-      out1_shape.size() == 1, TRITONSERVER_ERROR_INVALID_ARG,
-      std::string("expected OUTPUT1 shape to have one dimension, got ") +
-          backend::ShapeToString(out1_shape));
-
-  // Check datatypes
-  std::string in0_dtype, in1_dtype, out0_dtype, out1_dtype;
-  RETURN_IF_ERROR(input0.MemberAsString("data_type", &in0_dtype));
-  RETURN_IF_ERROR(input1.MemberAsString("data_type", &in1_dtype));
-  RETURN_IF_ERROR(output0.MemberAsString("data_type", &out0_dtype));
-  RETURN_IF_ERROR(output1.MemberAsString("data_type", &out1_dtype));
-
-  RETURN_ERROR_IF_FALSE(
-      in0_dtype == "TYPE_FP32", TRITONSERVER_ERROR_INVALID_ARG,
-      std::string("expected INPUT0 datatype to be TYPE_FP32, got ") +
-          in0_dtype);
-  RETURN_ERROR_IF_FALSE(
-      in1_dtype == "TYPE_FP32", TRITONSERVER_ERROR_INVALID_ARG,
-      std::string("expected INPUT1 datatype to be TYPE_FP32, got ") +
-          in1_dtype);
-  RETURN_ERROR_IF_FALSE(
-      out0_dtype == "TYPE_FP32", TRITONSERVER_ERROR_INVALID_ARG,
-      std::string("expected OUTPUT0 datatype to be TYPE_FP32, got ") +
-          out0_dtype);
-  RETURN_ERROR_IF_FALSE(
-      out1_dtype == "TYPE_FP32", TRITONSERVER_ERROR_INVALID_ARG,
-      std::string("expected OUTPUT1 datatype to be TYPE_FP32, got ") +
-          out1_dtype);
-
-  return nullptr;  // success
-}
-
-//
-// ModelInstanceState
-//
-// State associated with a model instance. An object of this class is
-// created and associated with each TRITONBACKEND_ModelInstance.
-//
-class ModelInstanceState : public BackendModelInstance {
- public:
-  static TRITONSERVER_Error* Create(
-      ModelState* model_state,
-      TRITONBACKEND_ModelInstance* triton_model_instance,
-      ModelInstanceState** state);
-  virtual ~ModelInstanceState() = default;
-
-  void ProcessRequests(
-      TRITONBACKEND_Request** requests, const uint32_t request_count);
-
- private:
-  ModelInstanceState(
-      ModelState* model_state,
-      TRITONBACKEND_ModelInstance* triton_model_instance)
-      : BackendModelInstance(model_state, triton_model_instance)
-  {
-  }
-};
-
-TRITONSERVER_Error*
-ModelInstanceState::Create(
-    ModelState* model_state, TRITONBACKEND_ModelInstance* triton_model_instance,
-    ModelInstanceState** state)
-{
-  try {
-    *state = new ModelInstanceState(model_state, triton_model_instance);
-  }
-  catch (const BackendModelInstanceException& ex) {
-    RETURN_ERROR_IF_TRUE(
-        ex.err_ == nullptr, TRITONSERVER_ERROR_INTERNAL,
-        std::string("unexpected nullptr in BackendModelInstanceException"));
-    RETURN_IF_ERROR(ex.err_);
-  }
-
-  return nullptr;  // success
-}
-
-void
-ModelInstanceState::ProcessRequests(
-    TRITONBACKEND_Request** requests, const uint32_t request_count)
-{
-  uint64_t exec_start_ns = 0;
-  SET_TIMESTAMP(exec_start_ns);
-
-  for (size_t i = 0; i < request_count; i++) {
-    // If we get a nullptr request then something is badly wrong. Fail
-    // and release all requests.
-    if (requests[i] == nullptr) {
-      RequestsRespondWithError(
-          requests, request_count,
-          TRITONSERVER_ErrorNew(
-              TRITONSERVER_ERROR_INTERNAL,
-              std::string(
-                  "null request given to BLS backend for '" + Name() + "'")
-                  .c_str()));
-      return;
-    }
-  }
-
-  // At this point we accept ownership of 'requests', which means that
-  // even if something goes wrong we must still return success from
-  // this function. If something does go wrong in processing a
-  // particular request then we send an error response just for the
-  // specific request.
-  std::vector<TRITONBACKEND_Response*> responses;
-  responses.reserve(request_count);
-
-  for (size_t i = 0; i < request_count; i++) {
-    TRITONBACKEND_Response* response;
-    auto err = TRITONBACKEND_ResponseNew(&response, requests[i]);
-    if (err == nullptr) {
-      responses.emplace_back(response);
-    } else {
-      responses.emplace_back(nullptr);
-      LOG_MESSAGE(TRITONSERVER_LOG_ERROR, "Fail to create response");
-      TRITONSERVER_ErrorDelete(err);
-    }
-  }
-
-  ModelState* model_state = reinterpret_cast<ModelState*>(Model());
-
-  // The way we collect these batch timestamps is not entirely
-  // accurate. Normally, in a performant backend you would execute all
-  // the requests at the same time, and so there would be a single
-  // compute-start / compute-end time-range. But here we execute each
-  // request separately so there is no single range. As a result we
-  // just show the entire execute time as being the compute time as
-  // well.
-  uint64_t compute_start_ns = 0;
-  SET_TIMESTAMP(compute_start_ns);
-
-  // Create a BLSExecutor object. To separate from standard backend
-  // implementation, the BLS logic is placed inside class BLSExecutor.
-  BLSExecutor bls_executor(model_state->TritonServer());
-
-  for (size_t r = 0; r < request_count; r++) {
-    bls_executor.Execute(requests[r], &responses[r]);
-  }
-
-  uint64_t compute_end_ns = 0;
-  SET_TIMESTAMP(compute_end_ns);
-
-  uint64_t exec_end_ns = 0;
-  SET_TIMESTAMP(exec_end_ns);
-
-  // Send all the responses that haven't already been sent because of
-  // an earlier error. Note that the responses are not set to nullptr
-  // here as we need that indication below to determine if the request
-  // we successful or not.
-  for (auto& response : responses) {
-    if (response != nullptr) {
-      LOG_IF_ERROR(
-          TRITONBACKEND_ResponseSend(
-              response, TRITONSERVER_RESPONSE_COMPLETE_FINAL, nullptr),
-          "failed to send BLS backend response");
-    }
-  }
-
-  // Report statistics for each request.
-  for (uint32_t r = 0; r < request_count; ++r) {
-    auto& request = requests[r];
-    LOG_IF_ERROR(
-        TRITONBACKEND_ModelInstanceReportStatistics(
-            TritonModelInstance(), request,
-            (responses[r] != nullptr) /* success */, exec_start_ns,
-            compute_start_ns, compute_end_ns, exec_end_ns),
-        "failed reporting request statistics");
-
-    LOG_IF_ERROR(
-        TRITONBACKEND_RequestRelease(request, TRITONSERVER_REQUEST_RELEASE_ALL),
-        "failed releasing request");
-  }
-
-  // Report the entire batch statistics.
-  LOG_IF_ERROR(
-      TRITONBACKEND_ModelInstanceReportBatchStatistics(
-          TritonModelInstance(), 1 /*total_batch_size*/, exec_start_ns,
-          compute_start_ns, compute_end_ns, exec_end_ns),
-      "failed reporting batch request statistics");
-
-  LOG_MESSAGE(
-      TRITONSERVER_LOG_VERBOSE,
-      (std::string("TRITONBACKEND_ModelExecute: model ") + Name() +
-       " released " + std::to_string(request_count) + " requests")
-          .c_str());
-}
-
-/////////////
-
-extern "C" {
-
-// Implementing TRITONBACKEND_ModelInitialize is optional. The backend
-// should initialize any state that is intended to be shared across
-// all instances of the model.
-TRITONSERVER_Error*
-TRITONBACKEND_ModelInitialize(TRITONBACKEND_Model* model)
-{
-  const char* cname;
-  RETURN_IF_ERROR(TRITONBACKEND_ModelName(model, &cname));
-  std::string name(cname);
-
-  uint64_t version;
-  RETURN_IF_ERROR(TRITONBACKEND_ModelVersion(model, &version));
-
-  LOG_MESSAGE(
-      TRITONSERVER_LOG_INFO,
-      (std::string("TRITONBACKEND_ModelInitialize: ") + name + " (version " +
-       std::to_string(version) + ")")
-          .c_str());
-
-  // With each model we create a ModelState object and associate it
-  // with the TRITONBACKEND_Model.
-  ModelState* model_state;
-  RETURN_IF_ERROR(ModelState::Create(model, &model_state));
-  RETURN_IF_ERROR(
-      TRITONBACKEND_ModelSetState(model, reinterpret_cast<void*>(model_state)));
-
-  // One of the primary things to do in ModelInitialize is to examine
-  // the model configuration to ensure that it is something that this
-  // backend can support. If not, returning an error from this
-  // function will prevent the model from loading.
-  RETURN_IF_ERROR(model_state->ValidateModelConfig());
-
-  return nullptr;  // success
-}
-
-// Implementing TRITONBACKEND_ModelFinalize is optional unless state
-// is set using TRITONBACKEND_ModelSetState. The backend must free
-// this state and perform any other cleanup.
-TRITONSERVER_Error*
-TRITONBACKEND_ModelFinalize(TRITONBACKEND_Model* model)
-{
-  void* vstate;
-  RETURN_IF_ERROR(TRITONBACKEND_ModelState(model, &vstate));
-  ModelState* model_state = reinterpret_cast<ModelState*>(vstate);
-
-  LOG_MESSAGE(
-      TRITONSERVER_LOG_INFO, "TRITONBACKEND_ModelFinalize: delete model state");
-
-  delete model_state;
-
-  return nullptr;  // success
-}
-
-// Implementing TRITONBACKEND_ModelInstanceInitialize is optional. The
-// backend should initialize any state that is required for a model
-// instance.
-TRITONSERVER_Error*
-TRITONBACKEND_ModelInstanceInitialize(TRITONBACKEND_ModelInstance* instance)
-{
-  const char* cname;
-  RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceName(instance, &cname));
-  std::string name(cname);
-
-  int32_t device_id;
-  RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceDeviceId(instance, &device_id));
-  TRITONSERVER_InstanceGroupKind kind;
-  RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceKind(instance, &kind));
-
-  LOG_MESSAGE(
-      TRITONSERVER_LOG_INFO,
-      (std::string("TRITONBACKEND_ModelInstanceInitialize: ") + name + " (" +
-       TRITONSERVER_InstanceGroupKindString(kind) + " device " +
-       std::to_string(device_id) + ")")
-          .c_str());
-
-  // The instance can access the corresponding model as well... here
-  // we get the model and from that get the model's state.
-  TRITONBACKEND_Model* model;
-  RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceModel(instance, &model));
-
-  void* vmodelstate;
-  RETURN_IF_ERROR(TRITONBACKEND_ModelState(model, &vmodelstate));
-  ModelState* model_state = reinterpret_cast<ModelState*>(vmodelstate);
-
-  // With each instance we create a ModelInstanceState object and
-  // associate it with the TRITONBACKEND_ModelInstance.
-  ModelInstanceState* instance_state;
-  RETURN_IF_ERROR(
-      ModelInstanceState::Create(model_state, instance, &instance_state));
-  RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceSetState(
-      instance, reinterpret_cast<void*>(instance_state)));
-
-  LOG_MESSAGE(
-      TRITONSERVER_LOG_VERBOSE,
-      (std::string("TRITONBACKEND_ModelInstanceInitialize: instance "
-                   "initialization successful ") +
-       name + " (device " + std::to_string(device_id) + ")")
-          .c_str());
-
-  return nullptr;  // success
-}
-
-// Implementing TRITONBACKEND_ModelInstanceFinalize is optional unless
-// state is set using TRITONBACKEND_ModelInstanceSetState. The backend
-// must free this state and perform any other cleanup.
-TRITONSERVER_Error*
-TRITONBACKEND_ModelInstanceFinalize(TRITONBACKEND_ModelInstance* instance)
-{
-  void* vstate;
-  RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceState(instance, &vstate));
-  ModelInstanceState* instance_state =
-      reinterpret_cast<ModelInstanceState*>(vstate);
-
-  LOG_MESSAGE(
-      TRITONSERVER_LOG_INFO,
-      "TRITONBACKEND_ModelInstanceFinalize: delete instance state");
-
-  delete instance_state;
-
-  return nullptr;  // success
-}
-
-// Implementing TRITONBACKEND_ModelInstanceExecute is required.
-TRITONSERVER_Error*
-TRITONBACKEND_ModelInstanceExecute(
-    TRITONBACKEND_ModelInstance* instance, TRITONBACKEND_Request** requests,
-    const uint32_t request_count)
-{
-  // Triton will not call this function simultaneously for the same
-  // 'instance'. But since this backend could be used by multiple
-  // instances from multiple models the implementation needs to handle
-  // multiple calls to this function at the same time (with different
-  // 'instance' objects). Suggested practice for this is to use only
-  // function-local and model-instance-specific state (obtained from
-  // 'instance'), which is what we do here.
-  ModelInstanceState* instance_state;
-  RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceState(
-      instance, reinterpret_cast<void**>(&instance_state)));
-  ModelState* model_state =
-      reinterpret_cast<ModelState*>(instance_state->Model());
-
-  LOG_MESSAGE(
-      TRITONSERVER_LOG_VERBOSE,
-      (std::string("model ") + model_state->Name() + ", instance " +
-       instance_state->Name() + ", executing " + std::to_string(request_count) +
-       " requests")
-          .c_str());
-
-  instance_state->ProcessRequests(requests, request_count);
-
-  return nullptr;  // success
-}
-
-}  // extern "C"
-
-}}}  // namespace triton::backend::bls
diff --git a/3rdparty/backend-r22.12/examples/backends/bls/src/bls.cc b/3rdparty/backend-r22.12/examples/backends/bls/src/bls.cc
deleted file mode 100644
index a892a41498487c771b0e8c58254c6eaed88caf8a..0000000000000000000000000000000000000000
--- a/3rdparty/backend-r22.12/examples/backends/bls/src/bls.cc
+++ /dev/null
@@ -1,291 +0,0 @@
-// Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "bls.h"
-
-namespace triton { namespace backend { namespace bls {
-
-BLSExecutor::BLSExecutor(TRITONSERVER_Server* server)
-    : server_(server), model_executor_(server)
-{
-}
-
-TRITONSERVER_Error*
-BLSExecutor::PrepareInferenceRequest(
-    TRITONBACKEND_Request* bls_request,
-    TRITONSERVER_InferenceRequest** irequest, const std::string model_name)
-{
-  // Get request_id, correlation_id, and flags from the current request
-  // for preparing a new inference request that we will send to 'addsub_python'
-  // or 'addsub_tf' model later.
-  const char* request_id;
-  uint64_t correlation_id;
-  uint32_t flags;
-  RETURN_IF_ERROR(TRITONBACKEND_RequestId(bls_request, &request_id));
-  RETURN_IF_ERROR(
-      TRITONBACKEND_RequestCorrelationId(bls_request, &correlation_id));
-  RETURN_IF_ERROR(TRITONBACKEND_RequestFlags(bls_request, &flags));
-
-  // Create an inference request object. The inference request object
-  // is where we set the name of the model we want to use for
-  // inference and the input tensors.
-  RETURN_IF_ERROR(TRITONSERVER_InferenceRequestNew(
-      irequest, server_, model_name.c_str(), -1 /* model_version */));
-  // Set request_id, correlation_id, and flags for the new request.
-  RETURN_IF_ERROR(TRITONSERVER_InferenceRequestSetId(*irequest, request_id));
-  RETURN_IF_ERROR(
-      TRITONSERVER_InferenceRequestSetCorrelationId(*irequest, correlation_id));
-  RETURN_IF_ERROR(TRITONSERVER_InferenceRequestSetFlags(*irequest, flags));
-  RETURN_IF_ERROR(TRITONSERVER_InferenceRequestSetReleaseCallback(
-      *irequest, InferRequestComplete, nullptr /* request_release_userp */));
-
-  return nullptr;  // success
-}
-
-TRITONSERVER_Error*
-BLSExecutor::PrepareInferenceInput(
-    TRITONBACKEND_Request* bls_request, TRITONSERVER_InferenceRequest* irequest)
-{
-  // Get the properties of the two inputs from the current request.
-  // Then, add the two input tensors and append the input data to the new
-  // request.
-  uint32_t input_count;
-  RETURN_IF_ERROR(TRITONBACKEND_RequestInputCount(bls_request, &input_count));
-
-  TRITONBACKEND_Input* input;
-  const char* name;
-  TRITONSERVER_DataType datatype;
-  const int64_t* shape;
-  uint32_t dims_count;
-  size_t data_byte_size;
-  TRITONSERVER_MemoryType data_memory_type;
-  int64_t data_memory_id;
-  const char* data_buffer;
-
-  for (size_t count = 0; count < input_count; count++) {
-    RETURN_IF_ERROR(TRITONBACKEND_RequestInputByIndex(
-        bls_request, count /* index */, &input));
-    RETURN_IF_ERROR(TRITONBACKEND_InputProperties(
-        input, &name, &datatype, &shape, &dims_count, nullptr, nullptr));
-    RETURN_IF_ERROR(TRITONBACKEND_InputBuffer(
-        input, 0 /* idx */, reinterpret_cast<const void**>(&data_buffer),
-        &data_byte_size, &data_memory_type, &data_memory_id));
-    RETURN_IF_ERROR(TRITONSERVER_InferenceRequestAddInput(
-        irequest, name, datatype, shape, dims_count));
-    RETURN_IF_ERROR(TRITONSERVER_InferenceRequestAppendInputData(
-        irequest, name, &data_buffer[0], data_byte_size, data_memory_type,
-        data_memory_id));
-  }
-
-  return nullptr;  // success
-}
-
-TRITONSERVER_Error*
-BLSExecutor::PrepareInferenceOutput(
-    TRITONBACKEND_Request* bls_request, TRITONSERVER_InferenceRequest* irequest)
-{
-  // Indicate the output tensors to be calculated and returned
-  // for the inference request.
-  uint32_t output_count;
-  RETURN_IF_ERROR(TRITONBACKEND_RequestOutputCount(bls_request, &output_count));
-  const char* output_name;
-  for (size_t count = 0; count < output_count; count++) {
-    RETURN_IF_ERROR(TRITONBACKEND_RequestOutputName(
-        bls_request, count /* index */, &output_name));
-    RETURN_IF_ERROR(
-        TRITONSERVER_InferenceRequestAddRequestedOutput(irequest, output_name));
-  }
-
-  return nullptr;  // success
-}
-
-void
-BLSExecutor::Execute(
-    TRITONBACKEND_Request* bls_request, TRITONBACKEND_Response** response)
-{
-  // The names of the models that we will send internal requests on.
-  std::vector<std::string> model_names = {"addsub_python", "addsub_tf"};
-
-  // Check if both models are valid before executing request.
-  try {
-    for (size_t i = 0; i < 2; i++) {
-      // Check if the model is ready.
-      bool is_ready = false;
-      THROW_IF_TRITON_ERROR(TRITONSERVER_ServerModelIsReady(
-          server_, model_names[i].c_str(), -1 /* model_version */, &is_ready));
-      if (!is_ready) {
-        throw BLSBackendException(
-            (std::string("Failed to execute the inference request. Model '") +
-             model_names[i].c_str() + "' is not ready.")
-                .c_str());
-      }
-      // For simplicity, decoupled API is not supported in this BLS backend. You
-      // can implement your own backend that supports decoupled models.
-      uint32_t txn_flags;
-      THROW_IF_TRITON_ERROR(TRITONSERVER_ServerModelTransactionProperties(
-          server_, model_names[i].c_str(), -1 /* model_version */, &txn_flags,
-          nullptr /* voidp */));
-      if ((txn_flags & TRITONSERVER_TXN_DECOUPLED) != 0) {
-        throw BLSBackendException(
-            std::string("Model '") + model_names[i].c_str() +
-            "' is using the decoupled. This BLS Backend doesn't support models "
-            "using the decoupled transaction policy.");
-      }
-    }
-  }
-  catch (const BLSBackendException& bls_exception) {
-    LOG_MESSAGE(TRITONSERVER_LOG_ERROR, bls_exception.what());
-    RESPOND_AND_SET_NULL_IF_ERROR(
-        response,
-        TRITONSERVER_ErrorNew(
-            TRITONSERVER_ERROR_INTERNAL, "Failed to send inference requests"));
-    return;
-  }
-
-  // Prepare std::future for each model. Since this BLS backend
-  // can handle requests in parallel, we will send all the inference
-  // requests first and then retrieve them later.
-  std::vector<std::future<TRITONSERVER_InferenceResponse*>> futures(2);
-
-  // The inference request object for sending internal requests.
-  TRITONSERVER_InferenceRequest* irequest = nullptr;
-
-  // For each inference request, the backend sends two requests on the
-  // 'addsub_python' and 'addsub_tf' models.
-  try {
-    for (size_t icount = 0; icount < 2; icount++) {
-      // Initialize the inference request with required information.
-      THROW_IF_TRITON_ERROR(
-          PrepareInferenceRequest(bls_request, &irequest, model_names[icount]));
-      THROW_IF_TRITON_ERROR(PrepareInferenceInput(bls_request, irequest));
-      THROW_IF_TRITON_ERROR(PrepareInferenceOutput(bls_request, irequest));
-
-      // Execute inference request.
-      THROW_IF_TRITON_ERROR(
-          model_executor_.AsyncExecute(irequest, &futures[icount]));
-    }
-  }
-  catch (const BLSBackendException& bls_exception) {
-    LOG_MESSAGE(TRITONSERVER_LOG_ERROR, bls_exception.what());
-    LOG_IF_ERROR(
-        TRITONSERVER_InferenceRequestDelete(irequest),
-        "Failed to delete inference request.");
-    RESPOND_AND_SET_NULL_IF_ERROR(
-        response,
-        TRITONSERVER_ErrorNew(
-            TRITONSERVER_ERROR_INTERNAL, "Failed to send inference requests"));
-    return;
-  }
-
-  // If both internal requests are sent successfully, retrieve the output from
-  // each request and construct the final response.
-  ConstructFinalResponse(response, std::move(futures));
-}
-
-void
-BLSExecutor::ConstructFinalResponse(
-    TRITONBACKEND_Response** response,
-    std::vector<std::future<TRITONSERVER_InferenceResponse*>> futures)
-{
-  // Prepare two TRITONSERVER_InferenceResponse* objects for 'addsub_python' and
-  // 'addsub_tf' repectively.
-  std::vector<TRITONSERVER_InferenceResponse*> completed_responses = {nullptr,
-                                                                      nullptr};
-
-  const char* output_name;
-  TRITONSERVER_DataType output_datatype;
-  const int64_t* output_shape;
-  uint64_t dims_count;
-  size_t output_byte_size;
-  TRITONSERVER_MemoryType output_memory_type;
-  int64_t output_memory_id;
-  const void* output_base;
-  void* userp;
-  for (size_t icount = 0; icount < 2; icount++) {
-    // Retrieve the corresponding TRITONSERVER_InferenceResponse object from
-    // 'futures'. The InferResponseComplete function sets the std::promise
-    // so that this thread will block until the response is returned.
-    completed_responses[icount] = futures[icount].get();
-    try {
-      THROW_IF_TRITON_ERROR(
-          TRITONSERVER_InferenceResponseError(completed_responses[icount]));
-    }
-    catch (const BLSBackendException& bls_exception) {
-      LOG_MESSAGE(TRITONSERVER_LOG_ERROR, bls_exception.what());
-
-      if (completed_responses[icount] != nullptr) {
-        LOG_IF_ERROR(
-            TRITONSERVER_InferenceResponseDelete(completed_responses[icount]),
-            "Failed to delete inference response.");
-      }
-      return;
-    }
-    // Retrieve outputs from 'completed_responses'.
-    // Extract OUTPUT0 from the 'addsub_python' and OUTPUT1 from the
-    // 'addsub_tf' model to form the final inference response object.
-    // Get all the information about the output tensor.
-    RESPOND_AND_SET_NULL_IF_ERROR(
-        response,
-        TRITONSERVER_InferenceResponseOutput(
-            completed_responses[icount], icount, &output_name, &output_datatype,
-            &output_shape, &dims_count, &output_base, &output_byte_size,
-            &output_memory_type, &output_memory_id, &userp));
-
-    // Create an output tensor in the final response with
-    // the information retrieved above.
-    TRITONBACKEND_Output* output;
-    RESPOND_AND_SET_NULL_IF_ERROR(
-        response, TRITONBACKEND_ResponseOutput(
-                      *response, &output, output_name, output_datatype,
-                      output_shape, dims_count));
-
-    // Get a buffer that holds the tensor data for the output.
-    // We request a buffer in CPU memory but we have to handle any returned
-    // type. If we get back a buffer in GPU memory we just fail the request.
-    void* output_buffer;
-    output_memory_type = TRITONSERVER_MEMORY_CPU;
-    RESPOND_AND_SET_NULL_IF_ERROR(
-        response, TRITONBACKEND_OutputBuffer(
-                      output, &output_buffer, output_byte_size,
-                      &output_memory_type, &output_memory_id));
-    if (output_memory_type == TRITONSERVER_MEMORY_GPU) {
-      RESPOND_AND_SET_NULL_IF_ERROR(
-          response, TRITONSERVER_ErrorNew(
-                        TRITONSERVER_ERROR_INTERNAL,
-                        "failed to create output buffer in CPU memory"));
-    }
-
-    // Fill the BLS output buffer with output data returned by internal
-    // requests.
-    memcpy(output_buffer, output_base, output_byte_size);
-
-    LOG_IF_ERROR(
-        TRITONSERVER_InferenceResponseDelete(completed_responses[icount]),
-        "Failed to delete inference response.");
-  }
-}
-
-}}}  // namespace triton::backend::bls
diff --git a/3rdparty/backend-r22.12/examples/backends/bls/src/bls.h b/3rdparty/backend-r22.12/examples/backends/bls/src/bls.h
deleted file mode 100644
index a0a3a1ed0d448402c04d73270034a3aa54120691..0000000000000000000000000000000000000000
--- a/3rdparty/backend-r22.12/examples/backends/bls/src/bls.h
+++ /dev/null
@@ -1,79 +0,0 @@
-// Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include <future>
-#include "bls_utils.h"
-#include "triton/backend/backend_common.h"
-#include "triton/core/tritonbackend.h"
-#include "triton/core/tritonserver.h"
-
-namespace triton { namespace backend { namespace bls {
-
-//
-// BLSExecutor
-//
-// Includes the custom BLS logic for this backend.
-// This class shows how to utilize Triton in-process C-API to build the
-// execution pipeline.
-//
-class BLSExecutor {
- public:
-  BLSExecutor(TRITONSERVER_Server* server);
-
-  // Prepares the inference request that will be used internally.
-  TRITONSERVER_Error* PrepareInferenceRequest(
-      TRITONBACKEND_Request* bls_request,
-      TRITONSERVER_InferenceRequest** irequest, const std::string model_name);
-
-  // Prepares the input for the internal inference request.
-  TRITONSERVER_Error* PrepareInferenceInput(
-      TRITONBACKEND_Request* bls_request,
-      TRITONSERVER_InferenceRequest* irequest);
-
-  // Prepares the output for the internal inference request.
-  TRITONSERVER_Error* PrepareInferenceOutput(
-      TRITONBACKEND_Request* bls_request,
-      TRITONSERVER_InferenceRequest* irequest);
-
-  // Performs the whole BLS pipeline.
-  void Execute(
-      TRITONBACKEND_Request* bls_request, TRITONBACKEND_Response** response);
-
-  // Constructs the final response.
-  void ConstructFinalResponse(
-      TRITONBACKEND_Response** response,
-      std::vector<std::future<TRITONSERVER_InferenceResponse*>> futures);
-
- private:
-  // The server object that encapsulates all the functionality of the Triton
-  // server and allows access to the Triton server API.
-  TRITONSERVER_Server* server_;
-
-  // The ModelExecutor object for executing inference request on a model.
-  ModelExecutor model_executor_;
-};
-
-}}}  // namespace triton::backend::bls
diff --git a/3rdparty/backend-r22.12/examples/backends/bls/src/bls_utils.cc b/3rdparty/backend-r22.12/examples/backends/bls/src/bls_utils.cc
deleted file mode 100644
index d935275309fdb7dfaf168882e13ec866ac868379..0000000000000000000000000000000000000000
--- a/3rdparty/backend-r22.12/examples/backends/bls/src/bls_utils.cc
+++ /dev/null
@@ -1,186 +0,0 @@
-// Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "bls_utils.h"
-
-namespace triton { namespace backend { namespace bls {
-
-TRITONSERVER_Error*
-CPUAllocator(
-    TRITONSERVER_ResponseAllocator* allocator, const char* tensor_name,
-    size_t byte_size, TRITONSERVER_MemoryType preferred_memory_type,
-    int64_t preferred_memory_type_id, void* userp, void** buffer,
-    void** buffer_userp, TRITONSERVER_MemoryType* actual_memory_type,
-    int64_t* actual_memory_type_id)
-{
-  // For simplicity, this backend example always uses CPU memory regardless of
-  // the preferred memory type. You can make the actual memory type and id that
-  // we allocate be the same as preferred memory type. You can also provide a
-  // customized allocator to support different preferred_memory_type, and reuse
-  // memory buffer when possible.
-  *actual_memory_type = TRITONSERVER_MEMORY_CPU;
-  *actual_memory_type_id = preferred_memory_type_id;
-
-  // If 'byte_size' is zero just return 'buffer' == nullptr, we don't
-  // need to do any other book-keeping.
-  if (byte_size == 0) {
-    *buffer = nullptr;
-    *buffer_userp = nullptr;
-    LOG_MESSAGE(
-        TRITONSERVER_LOG_VERBOSE, ("allocated " + std::to_string(byte_size) +
-                                   " bytes for result tensor " + tensor_name)
-                                      .c_str());
-  } else {
-    void* allocated_ptr = nullptr;
-    *actual_memory_type = TRITONSERVER_MEMORY_CPU;
-    allocated_ptr = malloc(byte_size);
-
-    // Pass the tensor name with buffer_userp so we can show it when
-    // releasing the buffer.
-    if (allocated_ptr != nullptr) {
-      *buffer = allocated_ptr;
-      *buffer_userp = new std::string(tensor_name);
-      LOG_MESSAGE(
-          TRITONSERVER_LOG_VERBOSE,
-          ("allocated " + std::to_string(byte_size) + " bytes in " +
-           TRITONSERVER_MemoryTypeString(*actual_memory_type) +
-           " for result tensor " + tensor_name)
-              .c_str());
-    }
-  }
-
-  return nullptr;  // Success
-}
-
-TRITONSERVER_Error*
-ResponseRelease(
-    TRITONSERVER_ResponseAllocator* allocator, void* buffer, void* buffer_userp,
-    size_t byte_size, TRITONSERVER_MemoryType memory_type,
-    int64_t memory_type_id)
-{
-  std::string* name = nullptr;
-  if (buffer_userp != nullptr) {
-    name = reinterpret_cast<std::string*>(buffer_userp);
-  } else {
-    name = new std::string("<unknown>");
-  }
-
-  std::stringstream ss;
-  ss << buffer;
-  std::string buffer_str = ss.str();
-
-  LOG_MESSAGE(
-      TRITONSERVER_LOG_VERBOSE,
-      ("Releasing buffer " + buffer_str + " of size " +
-       std::to_string(byte_size) + " in " +
-       TRITONSERVER_MemoryTypeString(memory_type) + " for result '" + *name)
-          .c_str());
-
-  switch (memory_type) {
-    case TRITONSERVER_MEMORY_CPU:
-      free(buffer);
-      break;
-    default:
-      LOG_MESSAGE(
-          TRITONSERVER_LOG_ERROR,
-          std::string(
-              "error: unexpected buffer allocated in CUDA managed memory")
-              .c_str());
-      break;
-  }
-
-  delete name;
-
-  return nullptr;  // Success
-}
-
-void
-InferRequestComplete(
-    TRITONSERVER_InferenceRequest* request, const uint32_t flags, void* userp)
-{
-  if (request != nullptr) {
-    LOG_IF_ERROR(
-        TRITONSERVER_InferenceRequestDelete(request),
-        "Failed to delete inference request.");
-  }
-}
-
-void
-InferResponseComplete(
-    TRITONSERVER_InferenceResponse* response, const uint32_t flags, void* userp)
-{
-  // The following logic only works for non-decoupled models as for decoupled
-  // models it may send multiple responses for a request or not send any
-  // responses for a request. Need to modify this function if the model is using
-  // decoupled API.
-  if (response != nullptr) {
-    // Send 'response' to the future.
-    std::promise<TRITONSERVER_InferenceResponse*>* p =
-        reinterpret_cast<std::promise<TRITONSERVER_InferenceResponse*>*>(userp);
-    p->set_value(response);
-    delete p;
-  }
-}
-
-ModelExecutor::ModelExecutor(TRITONSERVER_Server* server) : server_(server)
-{
-  // When triton needs a buffer to hold an output tensor, it will ask
-  // us to provide the buffer. In this way we can have any buffer
-  // management and sharing strategy that we want. To communicate to
-  // triton the functions that we want it to call to perform the
-  // allocations, we create a "response allocator" object. We pass
-  // this response allocate object to triton when requesting
-  // inference. We can reuse this response allocator object for any
-  // number of inference requests.
-  allocator_ = nullptr;
-  THROW_IF_TRITON_ERROR(TRITONSERVER_ResponseAllocatorNew(
-      &allocator_, CPUAllocator, ResponseRelease, nullptr /* start_fn */));
-}
-
-TRITONSERVER_Error*
-ModelExecutor::AsyncExecute(
-    TRITONSERVER_InferenceRequest* irequest,
-    std::future<TRITONSERVER_InferenceResponse*>* future)
-{
-  // Perform inference by calling TRITONSERVER_ServerInferAsync. This
-  // call is asychronous and therefore returns immediately. The
-  // completion of the inference and delivery of the response is done
-  // by triton by calling the "response complete" callback functions
-  // (InferResponseComplete in this case).
-  auto p = new std::promise<TRITONSERVER_InferenceResponse*>();
-  *future = p->get_future();
-
-  RETURN_IF_ERROR(TRITONSERVER_InferenceRequestSetResponseCallback(
-      irequest, allocator_, nullptr /* response_allocator_userp */,
-      InferResponseComplete, reinterpret_cast<void*>(p)));
-
-  RETURN_IF_ERROR(
-      TRITONSERVER_ServerInferAsync(server_, irequest, nullptr /* trace */));
-
-  return nullptr;  // success
-}
-
-}}}  // namespace triton::backend::bls
diff --git a/3rdparty/backend-r22.12/examples/backends/bls/src/bls_utils.h b/3rdparty/backend-r22.12/examples/backends/bls/src/bls_utils.h
deleted file mode 100644
index e5482e0adfbe3e2095419839ba32b1182f323370..0000000000000000000000000000000000000000
--- a/3rdparty/backend-r22.12/examples/backends/bls/src/bls_utils.h
+++ /dev/null
@@ -1,98 +0,0 @@
-// Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include <future>
-#include <sstream>
-#include "triton/backend/backend_common.h"
-#include "triton/core/tritonbackend.h"
-#include "triton/core/tritonserver.h"
-
-namespace triton { namespace backend { namespace bls {
-
-#define THROW_IF_TRITON_ERROR(X)                                       \
-  do {                                                                 \
-    TRITONSERVER_Error* tie_err__ = (X);                               \
-    if (tie_err__ != nullptr) {                                        \
-      throw BLSBackendException(TRITONSERVER_ErrorMessage(tie_err__)); \
-    }                                                                  \
-  } while (false)
-
-//
-// BLSBackendException
-//
-// Exception thrown if error occurs in BLSBackend.
-//
-struct BLSBackendException : std::exception {
-  BLSBackendException(const std::string& message) : message_(message) {}
-
-  const char* what() const throw() { return message_.c_str(); }
-
-  std::string message_;
-};
-
-// Performs the allocations of output tensors.
-TRITONSERVER_Error* CPUAllocator(
-    TRITONSERVER_ResponseAllocator* allocator, const char* tensor_name,
-    size_t byte_size, TRITONSERVER_MemoryType preferred_memory_type,
-    int64_t preferred_memory_type_id, void* userp, void** buffer,
-    void** buffer_userp, TRITONSERVER_MemoryType* actual_memory_type,
-    int64_t* actual_memory_type_id);
-
-// Callback functions for server inference.
-TRITONSERVER_Error* ResponseRelease(
-    TRITONSERVER_ResponseAllocator* allocator, void* buffer, void* buffer_userp,
-    size_t byte_size, TRITONSERVER_MemoryType memory_type,
-    int64_t memory_type_id);
-void InferRequestComplete(
-    TRITONSERVER_InferenceRequest* request, const uint32_t flags, void* userp);
-void InferResponseComplete(
-    TRITONSERVER_InferenceResponse* response, const uint32_t flags,
-    void* userp);
-
-//
-// ModelExecutor
-//
-// Execute inference request on a model.
-//
-class ModelExecutor {
- public:
-  ModelExecutor(TRITONSERVER_Server* server);
-
-  // Performs async inference request.
-  TRITONSERVER_Error* AsyncExecute(
-      TRITONSERVER_InferenceRequest* irequest,
-      std::future<TRITONSERVER_InferenceResponse*>* future);
-
- private:
-  // The server object that encapsulates all the functionality of the Triton
-  // server and allows access to the Triton server API.
-  TRITONSERVER_Server* server_;
-
-  // The allocator object that will be used for allocating output tensors.
-  TRITONSERVER_ResponseAllocator* allocator_;
-};
-
-}}}  // namespace triton::backend::bls
diff --git a/3rdparty/backend-r22.12/examples/backends/bls/src/libtriton_bls.ldscript b/3rdparty/backend-r22.12/examples/backends/bls/src/libtriton_bls.ldscript
deleted file mode 100644
index b7c0c7556550578b5ef1cc722cf7357602bdcfc5..0000000000000000000000000000000000000000
--- a/3rdparty/backend-r22.12/examples/backends/bls/src/libtriton_bls.ldscript
+++ /dev/null
@@ -1,30 +0,0 @@
-# Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-{
-  global:
-    TRITONBACKEND_*;
-  local: *;
-};
diff --git a/3rdparty/backend-r22.12/examples/backends/minimal/cmake/TutorialMinimalBackendConfig.cmake.in b/3rdparty/backend-r22.12/examples/backends/minimal/cmake/TutorialMinimalBackendConfig.cmake.in
deleted file mode 100644
index 2e408d0306e0b74611c53033eb255f58ef38a528..0000000000000000000000000000000000000000
--- a/3rdparty/backend-r22.12/examples/backends/minimal/cmake/TutorialMinimalBackendConfig.cmake.in
+++ /dev/null
@@ -1,39 +0,0 @@
-# Copyright 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-include(CMakeFindDependencyMacro)
-
-get_filename_component(
-  TUTORIALMINIMALBACKEND_CMAKE_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH
-)
-
-list(APPEND CMAKE_MODULE_PATH ${TUTORIALMINIMALBACKEND_CMAKE_DIR})
-
-if(NOT TARGET TutorialMinimalBackend::triton-minimal-backend)
-  include("${TUTORIALMINIMALBACKEND_CMAKE_DIR}/TutorialMinimalBackendTargets.cmake")
-endif()
-
-set(TUTORIALMINIMALBACKEND_LIBRARIES TutorialMinimalBackend::triton-minimal-backend)
diff --git a/3rdparty/backend-r22.12/examples/backends/minimal/src/libtriton_minimal.ldscript b/3rdparty/backend-r22.12/examples/backends/minimal/src/libtriton_minimal.ldscript
deleted file mode 100644
index 748714d16fd3a4d028e71216f33da78ff4e6dbe9..0000000000000000000000000000000000000000
--- a/3rdparty/backend-r22.12/examples/backends/minimal/src/libtriton_minimal.ldscript
+++ /dev/null
@@ -1,30 +0,0 @@
-# Copyright 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-{
-  global:
-    TRITONBACKEND_*;
-  local: *;
-};
diff --git a/3rdparty/backend-r22.12/examples/backends/minimal/src/minimal.cc b/3rdparty/backend-r22.12/examples/backends/minimal/src/minimal.cc
deleted file mode 100644
index 6e29e3c78fde11c38c48bd8b2e7eb54985b967cd..0000000000000000000000000000000000000000
--- a/3rdparty/backend-r22.12/examples/backends/minimal/src/minimal.cc
+++ /dev/null
@@ -1,434 +0,0 @@
-// Copyright 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "triton/backend/backend_common.h"
-#include "triton/backend/backend_input_collector.h"
-#include "triton/backend/backend_model.h"
-#include "triton/backend/backend_model_instance.h"
-#include "triton/backend/backend_output_responder.h"
-#include "triton/core/tritonbackend.h"
-
-namespace triton { namespace backend { namespace minimal {
-
-//
-// Minimal backend that demonstrates the TRITONBACKEND API. This
-// backend works for any model that has 1 input called "IN0" with
-// INT32 datatype and shape [ 4 ] and 1 output called "OUT0" with
-// INT32 datatype and shape [ 4 ]. The backend supports both batching
-// and non-batching models.
-//
-// For each batch of requests, the backend returns the input tensor
-// value in the output tensor.
-//
-
-/////////////
-
-//
-// ModelState
-//
-// State associated with a model that is using this backend. An object
-// of this class is created and associated with each
-// TRITONBACKEND_Model. ModelState is derived from BackendModel class
-// provided in the backend utilities that provides many common
-// functions.
-//
-class ModelState : public BackendModel {
- public:
-  static TRITONSERVER_Error* Create(
-      TRITONBACKEND_Model* triton_model, ModelState** state);
-  virtual ~ModelState() = default;
-
- private:
-  ModelState(TRITONBACKEND_Model* triton_model) : BackendModel(triton_model) {}
-};
-
-TRITONSERVER_Error*
-ModelState::Create(TRITONBACKEND_Model* triton_model, ModelState** state)
-{
-  try {
-    *state = new ModelState(triton_model);
-  }
-  catch (const BackendModelException& ex) {
-    RETURN_ERROR_IF_TRUE(
-        ex.err_ == nullptr, TRITONSERVER_ERROR_INTERNAL,
-        std::string("unexpected nullptr in BackendModelException"));
-    RETURN_IF_ERROR(ex.err_);
-  }
-
-  return nullptr;  // success
-}
-
-extern "C" {
-
-// Triton calls TRITONBACKEND_ModelInitialize when a model is loaded
-// to allow the backend to create any state associated with the model,
-// and to also examine the model configuration to determine if the
-// configuration is suitable for the backend. Any errors reported by
-// this function will prevent the model from loading.
-//
-TRITONSERVER_Error*
-TRITONBACKEND_ModelInitialize(TRITONBACKEND_Model* model)
-{
-  // Create a ModelState object and associate it with the
-  // TRITONBACKEND_Model. If anything goes wrong with initialization
-  // of the model state then an error is returned and Triton will fail
-  // to load the model.
-  ModelState* model_state;
-  RETURN_IF_ERROR(ModelState::Create(model, &model_state));
-  RETURN_IF_ERROR(
-      TRITONBACKEND_ModelSetState(model, reinterpret_cast<void*>(model_state)));
-
-  return nullptr;  // success
-}
-
-// Triton calls TRITONBACKEND_ModelFinalize when a model is no longer
-// needed. The backend should cleanup any state associated with the
-// model. This function will not be called until all model instances
-// of the model have been finalized.
-//
-TRITONSERVER_Error*
-TRITONBACKEND_ModelFinalize(TRITONBACKEND_Model* model)
-{
-  void* vstate;
-  RETURN_IF_ERROR(TRITONBACKEND_ModelState(model, &vstate));
-  ModelState* model_state = reinterpret_cast<ModelState*>(vstate);
-  delete model_state;
-
-  return nullptr;  // success
-}
-
-}  // extern "C"
-
-/////////////
-
-//
-// ModelInstanceState
-//
-// State associated with a model instance. An object of this class is
-// created and associated with each
-// TRITONBACKEND_ModelInstance. ModelInstanceState is derived from
-// BackendModelInstance class provided in the backend utilities that
-// provides many common functions.
-//
-class ModelInstanceState : public BackendModelInstance {
- public:
-  static TRITONSERVER_Error* Create(
-      ModelState* model_state,
-      TRITONBACKEND_ModelInstance* triton_model_instance,
-      ModelInstanceState** state);
-  virtual ~ModelInstanceState() = default;
-
-  // Get the state of the model that corresponds to this instance.
-  ModelState* StateForModel() const { return model_state_; }
-
- private:
-  ModelInstanceState(
-      ModelState* model_state,
-      TRITONBACKEND_ModelInstance* triton_model_instance)
-      : BackendModelInstance(model_state, triton_model_instance),
-        model_state_(model_state)
-  {
-  }
-
-  ModelState* model_state_;
-};
-
-TRITONSERVER_Error*
-ModelInstanceState::Create(
-    ModelState* model_state, TRITONBACKEND_ModelInstance* triton_model_instance,
-    ModelInstanceState** state)
-{
-  try {
-    *state = new ModelInstanceState(model_state, triton_model_instance);
-  }
-  catch (const BackendModelInstanceException& ex) {
-    RETURN_ERROR_IF_TRUE(
-        ex.err_ == nullptr, TRITONSERVER_ERROR_INTERNAL,
-        std::string("unexpected nullptr in BackendModelInstanceException"));
-    RETURN_IF_ERROR(ex.err_);
-  }
-
-  return nullptr;  // success
-}
-
-extern "C" {
-
-// Triton calls TRITONBACKEND_ModelInstanceInitialize when a model
-// instance is created to allow the backend to initialize any state
-// associated with the instance.
-//
-TRITONSERVER_Error*
-TRITONBACKEND_ModelInstanceInitialize(TRITONBACKEND_ModelInstance* instance)
-{
-  // Get the model state associated with this instance's model.
-  TRITONBACKEND_Model* model;
-  RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceModel(instance, &model));
-
-  void* vmodelstate;
-  RETURN_IF_ERROR(TRITONBACKEND_ModelState(model, &vmodelstate));
-  ModelState* model_state = reinterpret_cast<ModelState*>(vmodelstate);
-
-  // Create a ModelInstanceState object and associate it with the
-  // TRITONBACKEND_ModelInstance.
-  ModelInstanceState* instance_state;
-  RETURN_IF_ERROR(
-      ModelInstanceState::Create(model_state, instance, &instance_state));
-  RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceSetState(
-      instance, reinterpret_cast<void*>(instance_state)));
-
-  return nullptr;  // success
-}
-
-// Triton calls TRITONBACKEND_ModelInstanceFinalize when a model
-// instance is no longer needed. The backend should cleanup any state
-// associated with the model instance.
-//
-TRITONSERVER_Error*
-TRITONBACKEND_ModelInstanceFinalize(TRITONBACKEND_ModelInstance* instance)
-{
-  void* vstate;
-  RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceState(instance, &vstate));
-  ModelInstanceState* instance_state =
-      reinterpret_cast<ModelInstanceState*>(vstate);
-  delete instance_state;
-
-  return nullptr;  // success
-}
-
-}  // extern "C"
-
-/////////////
-
-extern "C" {
-
-// When Triton calls TRITONBACKEND_ModelInstanceExecute it is required
-// that a backend create a response for each request in the batch. A
-// response may be the output tensors required for that request or may
-// be an error that is returned in the response.
-//
-TRITONSERVER_Error*
-TRITONBACKEND_ModelInstanceExecute(
-    TRITONBACKEND_ModelInstance* instance, TRITONBACKEND_Request** requests,
-    const uint32_t request_count)
-{
-  // Triton will not call this function simultaneously for the same
-  // 'instance'. But since this backend could be used by multiple
-  // instances from multiple models the implementation needs to handle
-  // multiple calls to this function at the same time (with different
-  // 'instance' objects). Best practice for a high-performance
-  // implementation is to avoid introducing mutex/lock and instead use
-  // only function-local and model-instance-specific state.
-  ModelInstanceState* instance_state;
-  RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceState(
-      instance, reinterpret_cast<void**>(&instance_state)));
-  ModelState* model_state = instance_state->StateForModel();
-
-  // 'responses' is initialized as a parallel array to 'requests',
-  // with one TRITONBACKEND_Response object for each
-  // TRITONBACKEND_Request object. If something goes wrong while
-  // creating these response objects, the backend simply returns an
-  // error from TRITONBACKEND_ModelInstanceExecute, indicating to
-  // Triton that this backend did not create or send any responses and
-  // so it is up to Triton to create and send an appropriate error
-  // response for each request. RETURN_IF_ERROR is one of several
-  // useful macros for error handling that can be found in
-  // backend_common.h.
-
-  std::vector<TRITONBACKEND_Response*> responses;
-  responses.reserve(request_count);
-  for (uint32_t r = 0; r < request_count; ++r) {
-    TRITONBACKEND_Request* request = requests[r];
-    TRITONBACKEND_Response* response;
-    RETURN_IF_ERROR(TRITONBACKEND_ResponseNew(&response, request));
-    responses.push_back(response);
-  }
-
-  // At this point, the backend takes ownership of 'requests', which
-  // means that it is responsible for sending a response for every
-  // request. From here, even if something goes wrong in processing,
-  // the backend must return 'nullptr' from this function to indicate
-  // success. Any errors and failures must be communicated via the
-  // response objects.
-  //
-  // To simplify error handling, the backend utilities manage
-  // 'responses' in a specific way and it is recommended that backends
-  // follow this same pattern. When an error is detected in the
-  // processing of a request, an appropriate error response is sent
-  // and the corresponding TRITONBACKEND_Response object within
-  // 'responses' is set to nullptr to indicate that the
-  // request/response has already been handled and no futher processing
-  // should be performed for that request. Even if all responses fail,
-  // the backend still allows execution to flow to the end of the
-  // function. RESPOND_AND_SET_NULL_IF_ERROR, and
-  // RESPOND_ALL_AND_SET_NULL_IF_ERROR are macros from
-  // backend_common.h that assist in this management of response
-  // objects.
-
-  // The backend could iterate over the 'requests' and process each
-  // one separately. But for performance reasons it is usually
-  // preferred to create batched input tensors that are processed
-  // simultaneously. This is especially true for devices like GPUs
-  // that are capable of exploiting the large amount parallelism
-  // exposed by larger data sets.
-  //
-  // The backend utilities provide a "collector" to facilitate this
-  // batching process. The 'collector's ProcessTensor function will
-  // combine a tensor's value from each request in the batch into a
-  // single contiguous buffer. The buffer can be provided by the
-  // backend or 'collector' can create and manage it. In this backend,
-  // there is not a specific buffer into which the batch should be
-  // created, so use ProcessTensor arguments that cause collector to
-  // manage it.
-
-  BackendInputCollector collector(
-      requests, request_count, &responses, model_state->TritonMemoryManager(),
-      false /* pinned_enabled */, nullptr /* stream*/);
-
-  // To instruct ProcessTensor to "gather" the entire batch of IN0
-  // input tensors into a single contiguous buffer in CPU memory, set
-  // the "allowed input types" to be the CPU ones (see tritonserver.h
-  // in the triton-inference-server/core repo for allowed memory
-  // types).
-  std::vector<std::pair<TRITONSERVER_MemoryType, int64_t>> allowed_input_types =
-      {{TRITONSERVER_MEMORY_CPU_PINNED, 0}, {TRITONSERVER_MEMORY_CPU, 0}};
-
-  const char* input_buffer;
-  size_t input_buffer_byte_size;
-  TRITONSERVER_MemoryType input_buffer_memory_type;
-  int64_t input_buffer_memory_type_id;
-
-  RESPOND_ALL_AND_SET_NULL_IF_ERROR(
-      responses, request_count,
-      collector.ProcessTensor(
-          "IN0", nullptr /* existing_buffer */,
-          0 /* existing_buffer_byte_size */, allowed_input_types, &input_buffer,
-          &input_buffer_byte_size, &input_buffer_memory_type,
-          &input_buffer_memory_type_id));
-
-  // Finalize the collector. If 'true' is returned, 'input_buffer'
-  // will not be valid until the backend synchronizes the CUDA
-  // stream or event that was used when creating the collector. For
-  // this backend, GPU is not supported and so no CUDA sync should
-  // be needed; so if 'true' is returned simply log an error.
-  const bool need_cuda_input_sync = collector.Finalize();
-  if (need_cuda_input_sync) {
-    LOG_MESSAGE(
-        TRITONSERVER_LOG_ERROR,
-        "'minimal' backend: unexpected CUDA sync required by collector");
-  }
-
-  // 'input_buffer' contains the batched "IN0" tensor. The backend can
-  // implement whatever logic is necesary to produce "OUT0". This
-  // backend simply returns the IN0 value in OUT0 so no actual
-  // computation is needed.
-
-  LOG_MESSAGE(
-      TRITONSERVER_LOG_INFO,
-      (std::string("model ") + model_state->Name() + ": requests in batch " +
-       std::to_string(request_count))
-          .c_str());
-  std::string tstr;
-  IGNORE_ERROR(BufferAsTypedString(
-      tstr, input_buffer, input_buffer_byte_size, TRITONSERVER_TYPE_INT32));
-  LOG_MESSAGE(
-      TRITONSERVER_LOG_INFO,
-      (std::string("batched IN0 value: ") + tstr).c_str());
-
-  const char* output_buffer = input_buffer;
-  TRITONSERVER_MemoryType output_buffer_memory_type = input_buffer_memory_type;
-  int64_t output_buffer_memory_type_id = input_buffer_memory_type_id;
-
-  // This backend supports models that batch along the first dimension
-  // and those that don't batch. For non-batch models the output shape
-  // will be [ 4 ]. For batch models the output shape will be [ -1, 4
-  // ] and the backend "responder" utility below will set the
-  // appropriate batch dimension value for each response.
-  std::vector<int64_t> output_batch_shape;
-  bool supports_first_dim_batching;
-  RESPOND_ALL_AND_SET_NULL_IF_ERROR(
-      responses, request_count,
-      model_state->SupportsFirstDimBatching(&supports_first_dim_batching));
-  if (supports_first_dim_batching) {
-    output_batch_shape.push_back(-1);
-  }
-  output_batch_shape.push_back(4);
-
-  // Because the OUT0 values are concatenated into a single contiguous
-  // 'output_buffer', the backend must "scatter" them out to the
-  // individual response OUT0 tensors.  The backend utilities provide
-  // a "responder" to facilitate this scattering process.
-
-  // The 'responders's ProcessTensor function will copy the portion of
-  // 'output_buffer' corresonding to each request's output into the
-  // response for that request.
-
-  BackendOutputResponder responder(
-      requests, request_count, &responses, model_state->TritonMemoryManager(),
-      supports_first_dim_batching, false /* pinned_enabled */,
-      nullptr /* stream*/);
-
-  responder.ProcessTensor(
-      "OUT0", TRITONSERVER_TYPE_INT32, output_batch_shape, output_buffer,
-      output_buffer_memory_type, output_buffer_memory_type_id);
-
-  // Finalize the responder. If 'true' is returned, the OUT0
-  // tensors' data will not be valid until the backend synchronizes
-  // the CUDA stream or event that was used when creating the
-  // responder. For this backend, GPU is not supported and so no
-  // CUDA sync should be needed; so if 'true' is returned simply log
-  // an error.
-  const bool need_cuda_output_sync = responder.Finalize();
-  if (need_cuda_output_sync) {
-    LOG_MESSAGE(
-        TRITONSERVER_LOG_ERROR,
-        "'minimal' backend: unexpected CUDA sync required by responder");
-  }
-
-  // Send all the responses that haven't already been sent because of
-  // an earlier error.
-  for (auto& response : responses) {
-    if (response != nullptr) {
-      LOG_IF_ERROR(
-          TRITONBACKEND_ResponseSend(
-              response, TRITONSERVER_RESPONSE_COMPLETE_FINAL, nullptr),
-          "failed to send response");
-    }
-  }
-
-  // Done with the request objects so release them.
-  for (uint32_t r = 0; r < request_count; ++r) {
-    auto& request = requests[r];
-    LOG_IF_ERROR(
-        TRITONBACKEND_RequestRelease(request, TRITONSERVER_REQUEST_RELEASE_ALL),
-        "failed releasing request");
-  }
-
-  return nullptr;  // success
-}
-
-}  // extern "C"
-
-}}}  // namespace triton::backend::minimal
diff --git a/3rdparty/backend-r22.12/examples/backends/recommended/cmake/TutorialRecommendedBackendConfig.cmake.in b/3rdparty/backend-r22.12/examples/backends/recommended/cmake/TutorialRecommendedBackendConfig.cmake.in
deleted file mode 100644
index 4007f9f8d7a4f302be52acc868532b4929739f48..0000000000000000000000000000000000000000
--- a/3rdparty/backend-r22.12/examples/backends/recommended/cmake/TutorialRecommendedBackendConfig.cmake.in
+++ /dev/null
@@ -1,39 +0,0 @@
-# Copyright 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-include(CMakeFindDependencyMacro)
-
-get_filename_component(
-  TUTORIALRECOMMENDEDBACKEND_CMAKE_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH
-)
-
-list(APPEND CMAKE_MODULE_PATH ${TUTORIALRECOMMENDEDBACKEND_CMAKE_DIR})
-
-if(NOT TARGET TutorialRecommendedBackend::triton-recommended-backend)
-  include("${TUTORIALRECOMMENDEDBACKEND_CMAKE_DIR}/TutorialRecommendedBackendTargets.cmake")
-endif()
-
-set(TUTORIALRECOMMENDEDBACKEND_LIBRARIES TutorialRecommendedBackend::triton-recommended-backend)
diff --git a/3rdparty/backend-r22.12/examples/backends/recommended/src/libtriton_recommended.ldscript b/3rdparty/backend-r22.12/examples/backends/recommended/src/libtriton_recommended.ldscript
deleted file mode 100644
index 748714d16fd3a4d028e71216f33da78ff4e6dbe9..0000000000000000000000000000000000000000
--- a/3rdparty/backend-r22.12/examples/backends/recommended/src/libtriton_recommended.ldscript
+++ /dev/null
@@ -1,30 +0,0 @@
-# Copyright 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-{
-  global:
-    TRITONBACKEND_*;
-  local: *;
-};
diff --git a/3rdparty/backend-r22.12/examples/backends/recommended/src/recommended.cc b/3rdparty/backend-r22.12/examples/backends/recommended/src/recommended.cc
deleted file mode 100644
index 02f46724a96c503c49be2a31288fef342d61bb35..0000000000000000000000000000000000000000
--- a/3rdparty/backend-r22.12/examples/backends/recommended/src/recommended.cc
+++ /dev/null
@@ -1,750 +0,0 @@
-// Copyright 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "triton/backend/backend_common.h"
-#include "triton/backend/backend_input_collector.h"
-#include "triton/backend/backend_model.h"
-#include "triton/backend/backend_model_instance.h"
-#include "triton/backend/backend_output_responder.h"
-#include "triton/core/tritonbackend.h"
-
-namespace triton { namespace backend { namespace recommended {
-
-//
-// Backend that demonstrates the TRITONBACKEND API. This backend works
-// for any model that has 1 input with any datatype and any shape and
-// 1 output with the same shape and datatype as the input. The backend
-// supports both batching and non-batching models.
-//
-// For each batch of requests, the backend returns the input tensor
-// value in the output tensor.
-//
-
-/////////////
-
-extern "C" {
-
-// Triton calls TRITONBACKEND_Initialize when a backend is loaded into
-// Triton to allow the backend to create and initialize any state that
-// is intended to be shared across all models and model instances that
-// use the backend. The backend should also verify version
-// compatibility with Triton in this function.
-//
-TRITONSERVER_Error*
-TRITONBACKEND_Initialize(TRITONBACKEND_Backend* backend)
-{
-  const char* cname;
-  RETURN_IF_ERROR(TRITONBACKEND_BackendName(backend, &cname));
-  std::string name(cname);
-
-  LOG_MESSAGE(
-      TRITONSERVER_LOG_INFO,
-      (std::string("TRITONBACKEND_Initialize: ") + name).c_str());
-
-  // Check the backend API version that Triton supports vs. what this
-  // backend was compiled against. Make sure that the Triton major
-  // version is the same and the minor version is >= what this backend
-  // uses.
-  uint32_t api_version_major, api_version_minor;
-  RETURN_IF_ERROR(
-      TRITONBACKEND_ApiVersion(&api_version_major, &api_version_minor));
-
-  LOG_MESSAGE(
-      TRITONSERVER_LOG_INFO,
-      (std::string("Triton TRITONBACKEND API version: ") +
-       std::to_string(api_version_major) + "." +
-       std::to_string(api_version_minor))
-          .c_str());
-  LOG_MESSAGE(
-      TRITONSERVER_LOG_INFO,
-      (std::string("'") + name + "' TRITONBACKEND API version: " +
-       std::to_string(TRITONBACKEND_API_VERSION_MAJOR) + "." +
-       std::to_string(TRITONBACKEND_API_VERSION_MINOR))
-          .c_str());
-
-  if ((api_version_major != TRITONBACKEND_API_VERSION_MAJOR) ||
-      (api_version_minor < TRITONBACKEND_API_VERSION_MINOR)) {
-    return TRITONSERVER_ErrorNew(
-        TRITONSERVER_ERROR_UNSUPPORTED,
-        "triton backend API version does not support this backend");
-  }
-
-  // The backend configuration may contain information needed by the
-  // backend, such as tritonserver command-line arguments. This
-  // backend doesn't use any such configuration but for this example
-  // print whatever is available.
-  TRITONSERVER_Message* backend_config_message;
-  RETURN_IF_ERROR(
-      TRITONBACKEND_BackendConfig(backend, &backend_config_message));
-
-  const char* buffer;
-  size_t byte_size;
-  RETURN_IF_ERROR(TRITONSERVER_MessageSerializeToJson(
-      backend_config_message, &buffer, &byte_size));
-  LOG_MESSAGE(
-      TRITONSERVER_LOG_INFO,
-      (std::string("backend configuration:\n") + buffer).c_str());
-
-  // This backend does not require any "global" state but as an
-  // example create a string to demonstrate.
-  std::string* state = new std::string("backend state");
-  RETURN_IF_ERROR(
-      TRITONBACKEND_BackendSetState(backend, reinterpret_cast<void*>(state)));
-
-  return nullptr;  // success
-}
-
-// Triton calls TRITONBACKEND_Finalize when a backend is no longer
-// needed.
-//
-TRITONSERVER_Error*
-TRITONBACKEND_Finalize(TRITONBACKEND_Backend* backend)
-{
-  // Delete the "global" state associated with the backend.
-  void* vstate;
-  RETURN_IF_ERROR(TRITONBACKEND_BackendState(backend, &vstate));
-  std::string* state = reinterpret_cast<std::string*>(vstate);
-
-  LOG_MESSAGE(
-      TRITONSERVER_LOG_INFO,
-      (std::string("TRITONBACKEND_Finalize: state is '") + *state + "'")
-          .c_str());
-
-  delete state;
-
-  return nullptr;  // success
-}
-
-}  // extern "C"
-
-/////////////
-
-//
-// ModelState
-//
-// State associated with a model that is using this backend. An object
-// of this class is created and associated with each
-// TRITONBACKEND_Model. ModelState is derived from BackendModel class
-// provided in the backend utilities that provides many common
-// functions.
-//
-class ModelState : public BackendModel {
- public:
-  static TRITONSERVER_Error* Create(
-      TRITONBACKEND_Model* triton_model, ModelState** state);
-  virtual ~ModelState() = default;
-
-  // Name of the input and output tensor
-  const std::string& InputTensorName() const { return input_name_; }
-  const std::string& OutputTensorName() const { return output_name_; }
-
-  // Datatype of the input and output tensor
-  TRITONSERVER_DataType TensorDataType() const { return datatype_; }
-
-  // Shape of the input and output tensor as given in the model
-  // configuration file. This shape will not include the batch
-  // dimension (if the model has one).
-  const std::vector<int64_t>& TensorNonBatchShape() const { return nb_shape_; }
-
-  // Shape of the input and output tensor, including the batch
-  // dimension (if the model has one). This method cannot be called
-  // until the model is completely loaded and initialized, including
-  // all instances of the model. In practice, this means that backend
-  // should only call it in TRITONBACKEND_ModelInstanceExecute.
-  TRITONSERVER_Error* TensorShape(std::vector<int64_t>& shape);
-
-  // Validate that this model is supported by this backend.
-  TRITONSERVER_Error* ValidateModelConfig();
-
- private:
-  ModelState(TRITONBACKEND_Model* triton_model);
-
-  std::string input_name_;
-  std::string output_name_;
-
-  TRITONSERVER_DataType datatype_;
-
-  bool shape_initialized_;
-  std::vector<int64_t> nb_shape_;
-  std::vector<int64_t> shape_;
-};
-
-ModelState::ModelState(TRITONBACKEND_Model* triton_model)
-    : BackendModel(triton_model), shape_initialized_(false)
-{
-  // Validate that the model's configuration matches what is supported
-  // by this backend.
-  THROW_IF_BACKEND_MODEL_ERROR(ValidateModelConfig());
-}
-
-TRITONSERVER_Error*
-ModelState::Create(TRITONBACKEND_Model* triton_model, ModelState** state)
-{
-  try {
-    *state = new ModelState(triton_model);
-  }
-  catch (const BackendModelException& ex) {
-    RETURN_ERROR_IF_TRUE(
-        ex.err_ == nullptr, TRITONSERVER_ERROR_INTERNAL,
-        std::string("unexpected nullptr in BackendModelException"));
-    RETURN_IF_ERROR(ex.err_);
-  }
-
-  return nullptr;  // success
-}
-
-TRITONSERVER_Error*
-ModelState::TensorShape(std::vector<int64_t>& shape)
-{
-  // This backend supports models that batch along the first dimension
-  // and those that don't batch. For non-batch models the output shape
-  // will be the shape from the model configuration. For batch models
-  // the output shape will be the shape from the model configuration
-  // prepended with [ -1 ] to represent the batch dimension. The
-  // backend "responder" utility used below will set the appropriate
-  // batch dimension value for each response. The shape needs to be
-  // initialized lazily because the SupportsFirstDimBatching function
-  // cannot be used until the model is completely loaded.
-  if (!shape_initialized_) {
-    bool supports_first_dim_batching;
-    RETURN_IF_ERROR(SupportsFirstDimBatching(&supports_first_dim_batching));
-    if (supports_first_dim_batching) {
-      shape_.push_back(-1);
-    }
-
-    shape_.insert(shape_.end(), nb_shape_.begin(), nb_shape_.end());
-    shape_initialized_ = true;
-  }
-
-  shape = shape_;
-
-  return nullptr;  // success
-}
-
-TRITONSERVER_Error*
-ModelState::ValidateModelConfig()
-{
-  // If verbose logging is enabled, dump the model's configuration as
-  // JSON into the console output.
-  if (TRITONSERVER_LogIsEnabled(TRITONSERVER_LOG_VERBOSE)) {
-    common::TritonJson::WriteBuffer buffer;
-    RETURN_IF_ERROR(ModelConfig().PrettyWrite(&buffer));
-    LOG_MESSAGE(
-        TRITONSERVER_LOG_VERBOSE,
-        (std::string("model configuration:\n") + buffer.Contents()).c_str());
-  }
-
-  // ModelConfig is the model configuration as a TritonJson
-  // object. Use the TritonJson utilities to parse the JSON and
-  // determine if the configuration is supported by this backend.
-  common::TritonJson::Value inputs, outputs;
-  RETURN_IF_ERROR(ModelConfig().MemberAsArray("input", &inputs));
-  RETURN_IF_ERROR(ModelConfig().MemberAsArray("output", &outputs));
-
-  // The model must have exactly 1 input and 1 output.
-  RETURN_ERROR_IF_FALSE(
-      inputs.ArraySize() == 1, TRITONSERVER_ERROR_INVALID_ARG,
-      std::string("model configuration must have 1 input"));
-  RETURN_ERROR_IF_FALSE(
-      outputs.ArraySize() == 1, TRITONSERVER_ERROR_INVALID_ARG,
-      std::string("model configuration must have 1 output"));
-
-  common::TritonJson::Value input, output;
-  RETURN_IF_ERROR(inputs.IndexAsObject(0, &input));
-  RETURN_IF_ERROR(outputs.IndexAsObject(0, &output));
-
-  // Record the input and output name in the model state.
-  const char* input_name;
-  size_t input_name_len;
-  RETURN_IF_ERROR(input.MemberAsString("name", &input_name, &input_name_len));
-  input_name_ = std::string(input_name);
-
-  const char* output_name;
-  size_t output_name_len;
-  RETURN_IF_ERROR(
-      output.MemberAsString("name", &output_name, &output_name_len));
-  output_name_ = std::string(output_name);
-
-  // Input and output must have same datatype
-  std::string input_dtype, output_dtype;
-  RETURN_IF_ERROR(input.MemberAsString("data_type", &input_dtype));
-  RETURN_IF_ERROR(output.MemberAsString("data_type", &output_dtype));
-  RETURN_ERROR_IF_FALSE(
-      input_dtype == output_dtype, TRITONSERVER_ERROR_INVALID_ARG,
-      std::string("expected input and output datatype to match, got ") +
-          input_dtype + " and " + output_dtype);
-  datatype_ = ModelConfigDataTypeToTritonServerDataType(input_dtype);
-
-  // Input and output must have same shape. Reshape is not supported
-  // on either input or output so flag an error is the model
-  // configuration uses it.
-  triton::common::TritonJson::Value reshape;
-  RETURN_ERROR_IF_TRUE(
-      input.Find("reshape", &reshape), TRITONSERVER_ERROR_UNSUPPORTED,
-      std::string("reshape not supported for input tensor"));
-  RETURN_ERROR_IF_TRUE(
-      output.Find("reshape", &reshape), TRITONSERVER_ERROR_UNSUPPORTED,
-      std::string("reshape not supported for output tensor"));
-
-  std::vector<int64_t> input_shape, output_shape;
-  RETURN_IF_ERROR(backend::ParseShape(input, "dims", &input_shape));
-  RETURN_IF_ERROR(backend::ParseShape(output, "dims", &output_shape));
-
-  RETURN_ERROR_IF_FALSE(
-      input_shape == output_shape, TRITONSERVER_ERROR_INVALID_ARG,
-      std::string("expected input and output shape to match, got ") +
-          backend::ShapeToString(input_shape) + " and " +
-          backend::ShapeToString(output_shape));
-
-  nb_shape_ = input_shape;
-
-  return nullptr;  // success
-}
-
-extern "C" {
-
-// Triton calls TRITONBACKEND_ModelInitialize when a model is loaded
-// to allow the backend to create any state associated with the model,
-// and to also examine the model configuration to determine if the
-// configuration is suitable for the backend. Any errors reported by
-// this function will prevent the model from loading.
-//
-TRITONSERVER_Error*
-TRITONBACKEND_ModelInitialize(TRITONBACKEND_Model* model)
-{
-  // Create a ModelState object and associate it with the
-  // TRITONBACKEND_Model. If anything goes wrong with initialization
-  // of the model state then an error is returned and Triton will fail
-  // to load the model.
-  ModelState* model_state;
-  RETURN_IF_ERROR(ModelState::Create(model, &model_state));
-  RETURN_IF_ERROR(
-      TRITONBACKEND_ModelSetState(model, reinterpret_cast<void*>(model_state)));
-
-  return nullptr;  // success
-}
-
-// Triton calls TRITONBACKEND_ModelFinalize when a model is no longer
-// needed. The backend should cleanup any state associated with the
-// model. This function will not be called until all model instances
-// of the model have been finalized.
-//
-TRITONSERVER_Error*
-TRITONBACKEND_ModelFinalize(TRITONBACKEND_Model* model)
-{
-  void* vstate;
-  RETURN_IF_ERROR(TRITONBACKEND_ModelState(model, &vstate));
-  ModelState* model_state = reinterpret_cast<ModelState*>(vstate);
-  delete model_state;
-
-  return nullptr;  // success
-}
-
-}  // extern "C"
-
-/////////////
-
-//
-// ModelInstanceState
-//
-// State associated with a model instance. An object of this class is
-// created and associated with each
-// TRITONBACKEND_ModelInstance. ModelInstanceState is derived from
-// BackendModelInstance class provided in the backend utilities that
-// provides many common functions.
-//
-class ModelInstanceState : public BackendModelInstance {
- public:
-  static TRITONSERVER_Error* Create(
-      ModelState* model_state,
-      TRITONBACKEND_ModelInstance* triton_model_instance,
-      ModelInstanceState** state);
-  virtual ~ModelInstanceState() = default;
-
-  // Get the state of the model that corresponds to this instance.
-  ModelState* StateForModel() const { return model_state_; }
-
- private:
-  ModelInstanceState(
-      ModelState* model_state,
-      TRITONBACKEND_ModelInstance* triton_model_instance)
-      : BackendModelInstance(model_state, triton_model_instance),
-        model_state_(model_state)
-  {
-  }
-
-  ModelState* model_state_;
-};
-
-TRITONSERVER_Error*
-ModelInstanceState::Create(
-    ModelState* model_state, TRITONBACKEND_ModelInstance* triton_model_instance,
-    ModelInstanceState** state)
-{
-  try {
-    *state = new ModelInstanceState(model_state, triton_model_instance);
-  }
-  catch (const BackendModelInstanceException& ex) {
-    RETURN_ERROR_IF_TRUE(
-        ex.err_ == nullptr, TRITONSERVER_ERROR_INTERNAL,
-        std::string("unexpected nullptr in BackendModelInstanceException"));
-    RETURN_IF_ERROR(ex.err_);
-  }
-
-  return nullptr;  // success
-}
-
-extern "C" {
-
-// Triton calls TRITONBACKEND_ModelInstanceInitialize when a model
-// instance is created to allow the backend to initialize any state
-// associated with the instance.
-//
-TRITONSERVER_Error*
-TRITONBACKEND_ModelInstanceInitialize(TRITONBACKEND_ModelInstance* instance)
-{
-  // Get the model state associated with this instance's model.
-  TRITONBACKEND_Model* model;
-  RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceModel(instance, &model));
-
-  void* vmodelstate;
-  RETURN_IF_ERROR(TRITONBACKEND_ModelState(model, &vmodelstate));
-  ModelState* model_state = reinterpret_cast<ModelState*>(vmodelstate);
-
-  // Create a ModelInstanceState object and associate it with the
-  // TRITONBACKEND_ModelInstance.
-  ModelInstanceState* instance_state;
-  RETURN_IF_ERROR(
-      ModelInstanceState::Create(model_state, instance, &instance_state));
-  RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceSetState(
-      instance, reinterpret_cast<void*>(instance_state)));
-
-  return nullptr;  // success
-}
-
-// Triton calls TRITONBACKEND_ModelInstanceFinalize when a model
-// instance is no longer needed. The backend should cleanup any state
-// associated with the model instance.
-//
-TRITONSERVER_Error*
-TRITONBACKEND_ModelInstanceFinalize(TRITONBACKEND_ModelInstance* instance)
-{
-  void* vstate;
-  RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceState(instance, &vstate));
-  ModelInstanceState* instance_state =
-      reinterpret_cast<ModelInstanceState*>(vstate);
-  delete instance_state;
-
-  return nullptr;  // success
-}
-
-}  // extern "C"
-
-/////////////
-
-extern "C" {
-
-// When Triton calls TRITONBACKEND_ModelInstanceExecute it is required
-// that a backend create a response for each request in the batch. A
-// response may be the output tensors required for that request or may
-// be an error that is returned in the response.
-//
-TRITONSERVER_Error*
-TRITONBACKEND_ModelInstanceExecute(
-    TRITONBACKEND_ModelInstance* instance, TRITONBACKEND_Request** requests,
-    const uint32_t request_count)
-{
-  // Collect various timestamps during the execution of this batch or
-  // requests. These values are reported below before returning from
-  // the function.
-
-  uint64_t exec_start_ns = 0;
-  SET_TIMESTAMP(exec_start_ns);
-
-  // Triton will not call this function simultaneously for the same
-  // 'instance'. But since this backend could be used by multiple
-  // instances from multiple models the implementation needs to handle
-  // multiple calls to this function at the same time (with different
-  // 'instance' objects). Best practice for a high-performance
-  // implementation is to avoid introducing mutex/lock and instead use
-  // only function-local and model-instance-specific state.
-  ModelInstanceState* instance_state;
-  RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceState(
-      instance, reinterpret_cast<void**>(&instance_state)));
-  ModelState* model_state = instance_state->StateForModel();
-
-  // 'responses' is initialized as a parallel array to 'requests',
-  // with one TRITONBACKEND_Response object for each
-  // TRITONBACKEND_Request object. If something goes wrong while
-  // creating these response objects, the backend simply returns an
-  // error from TRITONBACKEND_ModelInstanceExecute, indicating to
-  // Triton that this backend did not create or send any responses and
-  // so it is up to Triton to create and send an appropriate error
-  // response for each request. RETURN_IF_ERROR is one of several
-  // useful macros for error handling that can be found in
-  // backend_common.h.
-
-  std::vector<TRITONBACKEND_Response*> responses;
-  responses.reserve(request_count);
-  for (uint32_t r = 0; r < request_count; ++r) {
-    TRITONBACKEND_Request* request = requests[r];
-    TRITONBACKEND_Response* response;
-    RETURN_IF_ERROR(TRITONBACKEND_ResponseNew(&response, request));
-    responses.push_back(response);
-  }
-
-  // At this point, the backend takes ownership of 'requests', which
-  // means that it is responsible for sending a response for every
-  // request. From here, even if something goes wrong in processing,
-  // the backend must return 'nullptr' from this function to indicate
-  // success. Any errors and failures must be communicated via the
-  // response objects.
-  //
-  // To simplify error handling, the backend utilities manage
-  // 'responses' in a specific way and it is recommended that backends
-  // follow this same pattern. When an error is detected in the
-  // processing of a request, an appropriate error response is sent
-  // and the corresponding TRITONBACKEND_Response object within
-  // 'responses' is set to nullptr to indicate that the
-  // request/response has already been handled and no futher processing
-  // should be performed for that request. Even if all responses fail,
-  // the backend still allows execution to flow to the end of the
-  // function so that statistics are correctly reported by the calls
-  // to TRITONBACKEND_ModelInstanceReportStatistics and
-  // TRITONBACKEND_ModelInstanceReportBatchStatistics.
-  // RESPOND_AND_SET_NULL_IF_ERROR, and
-  // RESPOND_ALL_AND_SET_NULL_IF_ERROR are macros from
-  // backend_common.h that assist in this management of response
-  // objects.
-
-  // The backend could iterate over the 'requests' and process each
-  // one separately. But for performance reasons it is usually
-  // preferred to create batched input tensors that are processed
-  // simultaneously. This is especially true for devices like GPUs
-  // that are capable of exploiting the large amount parallelism
-  // exposed by larger data sets.
-  //
-  // The backend utilities provide a "collector" to facilitate this
-  // batching process. The 'collector's ProcessTensor function will
-  // combine a tensor's value from each request in the batch into a
-  // single contiguous buffer. The buffer can be provided by the
-  // backend or 'collector' can create and manage it. In this backend,
-  // there is not a specific buffer into which the batch should be
-  // created, so use ProcessTensor arguments that cause collector to
-  // manage it. ProcessTensor does NOT support TRITONSERVER_TYPE_BYTES
-  // data type.
-
-  BackendInputCollector collector(
-      requests, request_count, &responses, model_state->TritonMemoryManager(),
-      false /* pinned_enabled */, nullptr /* stream*/);
-
-  // To instruct ProcessTensor to "gather" the entire batch of input
-  // tensors into a single contiguous buffer in CPU memory, set the
-  // "allowed input types" to be the CPU ones (see tritonserver.h in
-  // the triton-inference-server/core repo for allowed memory types).
-  std::vector<std::pair<TRITONSERVER_MemoryType, int64_t>> allowed_input_types =
-      {{TRITONSERVER_MEMORY_CPU_PINNED, 0}, {TRITONSERVER_MEMORY_CPU, 0}};
-
-  const char* input_buffer;
-  size_t input_buffer_byte_size;
-  TRITONSERVER_MemoryType input_buffer_memory_type;
-  int64_t input_buffer_memory_type_id;
-
-  RESPOND_ALL_AND_SET_NULL_IF_ERROR(
-      responses, request_count,
-      collector.ProcessTensor(
-          model_state->InputTensorName().c_str(), nullptr /* existing_buffer */,
-          0 /* existing_buffer_byte_size */, allowed_input_types, &input_buffer,
-          &input_buffer_byte_size, &input_buffer_memory_type,
-          &input_buffer_memory_type_id));
-
-  // Finalize the collector. If 'true' is returned, 'input_buffer'
-  // will not be valid until the backend synchronizes the CUDA
-  // stream or event that was used when creating the collector. For
-  // this backend, GPU is not supported and so no CUDA sync should
-  // be needed; so if 'true' is returned simply log an error.
-  const bool need_cuda_input_sync = collector.Finalize();
-  if (need_cuda_input_sync) {
-    LOG_MESSAGE(
-        TRITONSERVER_LOG_ERROR,
-        "'recommended' backend: unexpected CUDA sync required by collector");
-  }
-
-  // 'input_buffer' contains the batched input tensor. The backend can
-  // implement whatever logic is necessary to produce the output
-  // tensor. This backend simply logs the input tensor value and then
-  // returns the input tensor value in the output tensor so no actual
-  // computation is needed.
-
-  uint64_t compute_start_ns = 0;
-  SET_TIMESTAMP(compute_start_ns);
-
-  LOG_MESSAGE(
-      TRITONSERVER_LOG_INFO,
-      (std::string("model ") + model_state->Name() + ": requests in batch " +
-       std::to_string(request_count))
-          .c_str());
-  std::string tstr;
-  IGNORE_ERROR(BufferAsTypedString(
-      tstr, input_buffer, input_buffer_byte_size,
-      model_state->TensorDataType()));
-  LOG_MESSAGE(
-      TRITONSERVER_LOG_INFO,
-      (std::string("batched " + model_state->InputTensorName() + " value: ") +
-       tstr)
-          .c_str());
-
-  const char* output_buffer = input_buffer;
-  TRITONSERVER_MemoryType output_buffer_memory_type = input_buffer_memory_type;
-  int64_t output_buffer_memory_type_id = input_buffer_memory_type_id;
-
-  uint64_t compute_end_ns = 0;
-  SET_TIMESTAMP(compute_end_ns);
-
-  bool supports_first_dim_batching;
-  RESPOND_ALL_AND_SET_NULL_IF_ERROR(
-      responses, request_count,
-      model_state->SupportsFirstDimBatching(&supports_first_dim_batching));
-
-  std::vector<int64_t> tensor_shape;
-  RESPOND_ALL_AND_SET_NULL_IF_ERROR(
-      responses, request_count, model_state->TensorShape(tensor_shape));
-
-  // Because the output tensor values are concatenated into a single
-  // contiguous 'output_buffer', the backend must "scatter" them out
-  // to the individual response output tensors.  The backend utilities
-  // provide a "responder" to facilitate this scattering process.
-  // BackendOutputResponder does NOT support TRITONSERVER_TYPE_BYTES
-  // data type.
-
-  // The 'responders's ProcessTensor function will copy the portion of
-  // 'output_buffer' corresonding to each request's output into the
-  // response for that request.
-
-  BackendOutputResponder responder(
-      requests, request_count, &responses, model_state->TritonMemoryManager(),
-      supports_first_dim_batching, false /* pinned_enabled */,
-      nullptr /* stream*/);
-
-  responder.ProcessTensor(
-      model_state->OutputTensorName().c_str(), model_state->TensorDataType(),
-      tensor_shape, output_buffer, output_buffer_memory_type,
-      output_buffer_memory_type_id);
-
-  // Finalize the responder. If 'true' is returned, the output
-  // tensors' data will not be valid until the backend synchronizes
-  // the CUDA stream or event that was used when creating the
-  // responder. For this backend, GPU is not supported and so no CUDA
-  // sync should be needed; so if 'true' is returned simply log an
-  // error.
-  const bool need_cuda_output_sync = responder.Finalize();
-  if (need_cuda_output_sync) {
-    LOG_MESSAGE(
-        TRITONSERVER_LOG_ERROR,
-        "'recommended' backend: unexpected CUDA sync required by responder");
-  }
-
-  // Send all the responses that haven't already been sent because of
-  // an earlier error.
-  for (auto& response : responses) {
-    if (response != nullptr) {
-      LOG_IF_ERROR(
-          TRITONBACKEND_ResponseSend(
-              response, TRITONSERVER_RESPONSE_COMPLETE_FINAL, nullptr),
-          "failed to send response");
-    }
-  }
-
-  uint64_t exec_end_ns = 0;
-  SET_TIMESTAMP(exec_end_ns);
-
-#ifdef TRITON_ENABLE_STATS
-  // For batch statistics need to know the total batch size of the
-  // requests. This is not necessarily just the number of requests,
-  // because if the model supports batching then any request can be a
-  // batched request itself.
-  size_t total_batch_size = 0;
-  if (!supports_first_dim_batching) {
-    total_batch_size = request_count;
-  } else {
-    for (uint32_t r = 0; r < request_count; ++r) {
-      auto& request = requests[r];
-      TRITONBACKEND_Input* input = nullptr;
-      LOG_IF_ERROR(
-          TRITONBACKEND_RequestInputByIndex(request, 0 /* index */, &input),
-          "failed getting request input");
-      if (input != nullptr) {
-        const int64_t* shape = nullptr;
-        LOG_IF_ERROR(
-            TRITONBACKEND_InputProperties(
-                input, nullptr, nullptr, &shape, nullptr, nullptr, nullptr),
-            "failed getting input properties");
-        if (shape != nullptr) {
-          total_batch_size += shape[0];
-        }
-      }
-    }
-  }
-#else
-  (void)exec_start_ns;
-  (void)exec_end_ns;
-  (void)compute_start_ns;
-  (void)compute_end_ns;
-#endif  // TRITON_ENABLE_STATS
-
-  // Report statistics for each request, and then release the request.
-  for (uint32_t r = 0; r < request_count; ++r) {
-    auto& request = requests[r];
-
-#ifdef TRITON_ENABLE_STATS
-    LOG_IF_ERROR(
-        TRITONBACKEND_ModelInstanceReportStatistics(
-            instance_state->TritonModelInstance(), request,
-            (responses[r] != nullptr) /* success */, exec_start_ns,
-            compute_start_ns, compute_end_ns, exec_end_ns),
-        "failed reporting request statistics");
-#endif  // TRITON_ENABLE_STATS
-
-    LOG_IF_ERROR(
-        TRITONBACKEND_RequestRelease(request, TRITONSERVER_REQUEST_RELEASE_ALL),
-        "failed releasing request");
-  }
-
-#ifdef TRITON_ENABLE_STATS
-  // Report batch statistics.
-  LOG_IF_ERROR(
-      TRITONBACKEND_ModelInstanceReportBatchStatistics(
-          instance_state->TritonModelInstance(), total_batch_size,
-          exec_start_ns, compute_start_ns, compute_end_ns, exec_end_ns),
-      "failed reporting batch request statistics");
-#endif  // TRITON_ENABLE_STATS
-
-  return nullptr;  // success
-}
-
-}  // extern "C"
-
-}}}  // namespace triton::backend::recommended
diff --git a/3rdparty/backend-r22.12/examples/clients/bls_client b/3rdparty/backend-r22.12/examples/clients/bls_client
deleted file mode 100644
index 82090901d7b06ee17e5d511eae775c4b84a451d5..0000000000000000000000000000000000000000
--- a/3rdparty/backend-r22.12/examples/clients/bls_client
+++ /dev/null
@@ -1,86 +0,0 @@
-#!/usr/bin/python
-# Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-import sys
-import argparse
-import numpy as np
-import tritonhttpclient as httpclient
-from tritonclientutils import np_to_triton_dtype
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser()
-    parser.add_argument('-u',
-                        '--url',
-                        type=str,
-                        required=False,
-                        default='localhost:8000',
-                        help='Inference server URL. Default is localhost:8000.')
-    FLAGS = parser.parse_args()
-
-    model_name = "bls_fp32"
-    shape = [16]
-    with httpclient.InferenceServerClient(url=FLAGS.url) as client:
-        input0_data = np.random.rand(*shape).astype(np.float32)
-        input1_data = np.random.rand(*shape).astype(np.float32)
-        inputs = [
-            httpclient.InferInput("INPUT0", input0_data.shape,
-                                    np_to_triton_dtype(input0_data.dtype)),
-            httpclient.InferInput("INPUT1", input1_data.shape,
-                                    np_to_triton_dtype(input1_data.dtype)),
-        ]
-
-        inputs[0].set_data_from_numpy(input0_data)
-        inputs[1].set_data_from_numpy(input1_data)
-
-        outputs = [
-            httpclient.InferRequestedOutput("OUTPUT0"),
-            httpclient.InferRequestedOutput("OUTPUT1"),
-        ]
-        response = client.infer(model_name,
-                                inputs,
-                                request_id=str(1),
-                                outputs=outputs)
-
-        result = response.get_response()
-        output0_data = response.as_numpy("OUTPUT0")
-        output1_data = response.as_numpy("OUTPUT1")
-
-        print("INPUT0 ({}) + INPUT1 ({}) = OUTPUT0 ({})".format(
-            input0_data, input1_data, output0_data))
-        print("INPUT0 ({}) - INPUT1 ({}) = OUTPUT1 ({})".format(
-            input0_data, input1_data, output1_data))
-
-        if not np.allclose(input0_data + input1_data, output0_data):
-            print("error: incorrect sum")
-            sys.exit(1)
-
-        if not np.allclose(input0_data - input1_data, output1_data):
-            print("error: incorrect difference")
-            sys.exit(1)
-
-    print('\nPASS')
-    sys.exit(0)
diff --git a/3rdparty/backend-r22.12/examples/clients/minimal_client b/3rdparty/backend-r22.12/examples/clients/minimal_client
deleted file mode 100644
index ffead3480ffa50270d73495d8a9358e2f1f2ea79..0000000000000000000000000000000000000000
--- a/3rdparty/backend-r22.12/examples/clients/minimal_client
+++ /dev/null
@@ -1,92 +0,0 @@
-#!/usr/bin/env python
-# Copyright 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-import argparse
-import numpy as np
-
-import tritonclient.http as httpclient
-from tritonclient.utils import InferenceServerException
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser()
-    parser.add_argument('-u',
-                        '--url',
-                        type=str,
-                        required=False,
-                        default='localhost:8000',
-                        help='Inference server URL. Default is localhost:8000.')
-    FLAGS = parser.parse_args()
-
-    # For the HTTP client, need to specify large enough concurrency to
-    # issue all the inference requests to the server in parallel. For
-    # this example we want to be able to send 2 requests concurrently.
-    try:
-        concurrent_request_count = 2
-        triton_client = httpclient.InferenceServerClient(
-            url=FLAGS.url, concurrency=concurrent_request_count)
-    except Exception as e:
-        print("channel creation failed: " + str(e))
-        sys.exit(1)
-
-    # First send a single request to the nonbatching model.
-    print('=========')
-    input0_data = np.array([ 1, 2, 3, 4 ], dtype=np.int32)
-    print('Sending request to nonbatching model: IN0 = {}'.format(input0_data))
-
-    inputs = [ httpclient.InferInput('IN0', [4], "INT32") ]
-    inputs[0].set_data_from_numpy(input0_data)
-    result = triton_client.infer('nonbatching', inputs)
-
-    print('Response: {}'.format(result.get_response()))
-    print('OUT0 = {}'.format(result.as_numpy('OUT0')))
-
-    # Send 2 requests to the batching model. Because these are sent
-    # asynchronously and Triton's dynamic batcher is configured to
-    # delay up to 5 seconds when forming a batch for this model, we
-    # expect these 2 requests to be batched within Triton and sent to
-    # the minimal backend as a single batch.
-    print('\n=========')
-    async_requests = []
-
-    input0_data = np.array([[ 10, 11, 12, 13 ]], dtype=np.int32)
-    print('Sending request to batching model: IN0 = {}'.format(input0_data))
-    inputs = [ httpclient.InferInput('IN0', [1, 4], "INT32") ]
-    inputs[0].set_data_from_numpy(input0_data)
-    async_requests.append(triton_client.async_infer('batching', inputs))
-
-    input0_data = np.array([[ 20, 21, 22, 23 ]], dtype=np.int32)
-    print('Sending request to batching model: IN0 = {}'.format(input0_data))
-    inputs = [ httpclient.InferInput('IN0', [1, 4], "INT32") ]
-    inputs[0].set_data_from_numpy(input0_data)
-    async_requests.append(triton_client.async_infer('batching', inputs))
-
-    for async_request in async_requests:
-        # Get the result from the initiated asynchronous inference
-        # request. This call will block till the server responds.
-        result = async_request.get_result()
-        print('Response: {}'.format(result.get_response()))
-        print('OUT0 = {}'.format(result.as_numpy('OUT0')))
diff --git a/3rdparty/backend-r22.12/examples/clients/recommended_client b/3rdparty/backend-r22.12/examples/clients/recommended_client
deleted file mode 100644
index 4a586d2b6d7247e1d715dd7d7e0e41a1a877227f..0000000000000000000000000000000000000000
--- a/3rdparty/backend-r22.12/examples/clients/recommended_client
+++ /dev/null
@@ -1,91 +0,0 @@
-#!/usr/bin/env python
-# Copyright 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-import argparse
-import numpy as np
-
-import tritonclient.http as httpclient
-from tritonclient.utils import InferenceServerException
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser()
-    parser.add_argument('-u',
-                        '--url',
-                        type=str,
-                        required=False,
-                        default='localhost:8000',
-                        help='Inference server URL. Default is localhost:8000.')
-    FLAGS = parser.parse_args()
-
-    # For the HTTP client, need to specify large enough concurrency to
-    # issue all the inference requests to the server in parallel. For
-    # this example we want to be able to send 2 requests concurrently.
-    try:
-        concurrent_request_count = 2
-        triton_client = httpclient.InferenceServerClient(
-            url=FLAGS.url, concurrency=concurrent_request_count)
-    except Exception as e:
-        print("channel creation failed: " + str(e))
-        sys.exit(1)
-
-    # Send 2 requests to the batching model. Because these are sent
-    # asynchronously and Triton's dynamic batcher is configured to
-    # delay up to 5 seconds when forming a batch for this model, we
-    # expect these 2 requests to be batched within Triton and sent to
-    # the backend as a single batch.
-    #
-    # The recommended backend can handle any model with 1 input and 1
-    # output as long as the input and output datatype and shape are
-    # the same. The batching model uses datatype FP32 and shape
-    # [ 4, 4 ].
-    print('\n=========')
-    async_requests = []
-
-    input0_data = np.array([[[ 1.0, 1.1, 1.2, 1.3 ],
-                             [ 2.0, 2.1, 2.2, 2.3 ],
-                             [ 3.0, 3.1, 3.2, 3.3 ],
-                             [ 4.0, 4.1, 4.2, 4.3 ]]], dtype=np.float32)
-    print('Sending request to batching model: input = {}'.format(input0_data))
-    inputs = [ httpclient.InferInput('INPUT', [1, 4, 4], "FP32") ]
-    inputs[0].set_data_from_numpy(input0_data)
-    async_requests.append(triton_client.async_infer('batching', inputs))
-
-    input0_data = np.array([[[ 10.0, 10.1, 10.2, 10.3 ],
-                             [ 20.0, 20.1, 20.2, 20.3 ],
-                             [ 30.0, 30.1, 30.2, 30.3 ],
-                             [ 40.0, 40.1, 40.2, 40.3 ]]], dtype=np.float32)
-    print('Sending request to batching model: input = {}'.format(input0_data))
-    inputs = [ httpclient.InferInput('INPUT', [1, 4, 4], "FP32") ]
-    inputs[0].set_data_from_numpy(input0_data)
-    async_requests.append(triton_client.async_infer('batching', inputs))
-
-    for async_request in async_requests:
-        # Get the result from the initiated asynchronous inference
-        # request. This call will block till the server responds.
-        result = async_request.get_result()
-        print('Response: {}'.format(result.get_response()))
-        print('OUTPUT = {}'.format(result.as_numpy('OUTPUT')))
diff --git a/3rdparty/backend-r22.12/examples/model_repos/bls_models/addsub_python/1/model.py b/3rdparty/backend-r22.12/examples/model_repos/bls_models/addsub_python/1/model.py
deleted file mode 100644
index f0ef2c8b205c8b8425074d04617a8b624fd93347..0000000000000000000000000000000000000000
--- a/3rdparty/backend-r22.12/examples/model_repos/bls_models/addsub_python/1/model.py
+++ /dev/null
@@ -1,74 +0,0 @@
-# Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-import json
-import triton_python_backend_utils as pb_utils
-
-# This model calculates the sum and difference of the INPUT0 and INPUT1 and put
-# the results in OUTPUT0 and OUTPUT1 respectively. For more information
-# regarding how this model.py was written, please refer to Python Backend.
-class TritonPythonModel:
-
-    def initialize(self, args):
-        self.model_config = model_config = json.loads(args['model_config'])
-
-        output0_config = pb_utils.get_output_config_by_name(
-            model_config, "OUTPUT0")
-
-        output1_config = pb_utils.get_output_config_by_name(
-            model_config, "OUTPUT1")
-
-        self.output0_dtype = pb_utils.triton_string_to_numpy(
-            output0_config['data_type'])
-        self.output1_dtype = pb_utils.triton_string_to_numpy(
-            output1_config['data_type'])
-
-    def execute(self, requests):
-        output0_dtype = self.output0_dtype
-        output1_dtype = self.output1_dtype
-
-        responses = []
-
-        for request in requests:
-            in_0 = pb_utils.get_input_tensor_by_name(request, "INPUT0")
-            in_1 = pb_utils.get_input_tensor_by_name(request, "INPUT1")
-
-            out_0, out_1 = (in_0.as_numpy() + in_1.as_numpy(),
-                            in_0.as_numpy() - in_1.as_numpy())
-
-            out_tensor_0 = pb_utils.Tensor("OUTPUT0",
-                                           out_0.astype(output0_dtype))
-            out_tensor_1 = pb_utils.Tensor("OUTPUT1",
-                                           out_1.astype(output1_dtype))
-
-            inference_response = pb_utils.InferenceResponse(
-                output_tensors=[out_tensor_0, out_tensor_1])
-            responses.append(inference_response)
-
-        return responses
-
-    def finalize(self):
-        print('Cleaning up...')
diff --git a/3rdparty/backend-r22.12/examples/model_repos/bls_models/addsub_python/config.pbtxt b/3rdparty/backend-r22.12/examples/model_repos/bls_models/addsub_python/config.pbtxt
deleted file mode 100644
index a0025a0ed1ce985467709b814ce2ba8a38bc4829..0000000000000000000000000000000000000000
--- a/3rdparty/backend-r22.12/examples/model_repos/bls_models/addsub_python/config.pbtxt
+++ /dev/null
@@ -1,58 +0,0 @@
-# Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-name: "addsub_python"
-backend: "python"
-max_batch_size: 0
-
-input [
-  {
-    name: "INPUT0"
-    data_type: TYPE_FP32
-    dims: [ 16 ]
-  }
-]
-input [
-  {
-    name: "INPUT1"
-    data_type: TYPE_FP32
-    dims: [ 16 ]
-  }
-]
-output [
-  {
-    name: "OUTPUT0"
-    data_type: TYPE_FP32
-    dims: [ 16 ]
-  }
-]
-output [
-  {
-    name: "OUTPUT1"
-    data_type: TYPE_FP32
-    dims: [ 16 ]
-  }
-]
diff --git a/3rdparty/backend-r22.12/examples/model_repos/bls_models/addsub_tf/1/model.savedmodel/saved_model.pb b/3rdparty/backend-r22.12/examples/model_repos/bls_models/addsub_tf/1/model.savedmodel/saved_model.pb
deleted file mode 100644
index 7a7cc038c720d6293b9ee48f2b5a68cc1bc6a7a7..0000000000000000000000000000000000000000
Binary files a/3rdparty/backend-r22.12/examples/model_repos/bls_models/addsub_tf/1/model.savedmodel/saved_model.pb and /dev/null differ
diff --git a/3rdparty/backend-r22.12/examples/model_repos/bls_models/addsub_tf/config.pbtxt b/3rdparty/backend-r22.12/examples/model_repos/bls_models/addsub_tf/config.pbtxt
deleted file mode 100644
index ec176a0bd6e873f3c7f41da761576fc30c16ba3e..0000000000000000000000000000000000000000
--- a/3rdparty/backend-r22.12/examples/model_repos/bls_models/addsub_tf/config.pbtxt
+++ /dev/null
@@ -1,28 +0,0 @@
-name: "addsub_tf"
-platform: "tensorflow_savedmodel"
-max_batch_size: 0
-
-input [
-  {
-    name: "INPUT0"
-    data_type: TYPE_FP32
-    dims: [ 16 ]
-  },
-  {
-    name: "INPUT1"
-    data_type: TYPE_FP32
-    dims: [ 16 ]
-  }
-]
-output [
-  {
-    name: "OUTPUT0"
-    data_type: TYPE_FP32
-    dims: [ 16 ]
-  },
-  {
-    name: "OUTPUT1"
-    data_type: TYPE_FP32
-    dims: [ 16 ]
-  }
-]
diff --git a/3rdparty/backend-r22.12/examples/model_repos/bls_models/bls_fp32/config.pbtxt b/3rdparty/backend-r22.12/examples/model_repos/bls_models/bls_fp32/config.pbtxt
deleted file mode 100644
index f8c6c067bc4e75369da107e571b0282e1eb73d59..0000000000000000000000000000000000000000
--- a/3rdparty/backend-r22.12/examples/model_repos/bls_models/bls_fp32/config.pbtxt
+++ /dev/null
@@ -1,63 +0,0 @@
-# Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-name: "bls_fp32"
-backend: "bls"
-max_batch_size: 0
-
-input [
-  {
-    name: "INPUT0"
-    data_type: TYPE_FP32
-    dims: [ 16 ]
-  }
-]
-input [
-  {
-    name: "INPUT1"
-    data_type: TYPE_FP32
-    dims: [ 16 ]
-  }
-]
-output [
-  {
-    name: "OUTPUT0"
-    data_type: TYPE_FP32
-    dims: [ 16 ]
-  }
-]
-output [
-  {
-    name: "OUTPUT1"
-    data_type: TYPE_FP32
-    dims: [ 16 ]
-  }
-]
-instance_group [
-  {
-    kind: KIND_CPU
-  }
-]
diff --git a/3rdparty/backend-r22.12/examples/model_repos/minimal_models/batching/1/.gitkeep b/3rdparty/backend-r22.12/examples/model_repos/minimal_models/batching/1/.gitkeep
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/3rdparty/backend-r22.12/examples/model_repos/minimal_models/batching/config.pbtxt b/3rdparty/backend-r22.12/examples/model_repos/minimal_models/batching/config.pbtxt
deleted file mode 100644
index f7423fb542c73fbc090ea99fa5bdc58cb03a664c..0000000000000000000000000000000000000000
--- a/3rdparty/backend-r22.12/examples/model_repos/minimal_models/batching/config.pbtxt
+++ /dev/null
@@ -1,24 +0,0 @@
-backend: "minimal"
-max_batch_size: 8
-dynamic_batching {
-  max_queue_delay_microseconds: 5000000
-}
-input [
-  {
-    name: "IN0"
-    data_type: TYPE_INT32
-    dims: [ 4 ]
-  }
-]
-output [
-  {
-    name: "OUT0"
-    data_type: TYPE_INT32
-    dims: [ 4 ]
-  }
-]
-instance_group [
-  {
-    kind: KIND_CPU
-  }
-]
diff --git a/3rdparty/backend-r22.12/examples/model_repos/minimal_models/nonbatching/1/.gitkeep b/3rdparty/backend-r22.12/examples/model_repos/minimal_models/nonbatching/1/.gitkeep
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/3rdparty/backend-r22.12/examples/model_repos/minimal_models/nonbatching/config.pbtxt b/3rdparty/backend-r22.12/examples/model_repos/minimal_models/nonbatching/config.pbtxt
deleted file mode 100644
index 01d75a9785e00e8c1d1b18d15b57c49dd908ec93..0000000000000000000000000000000000000000
--- a/3rdparty/backend-r22.12/examples/model_repos/minimal_models/nonbatching/config.pbtxt
+++ /dev/null
@@ -1,21 +0,0 @@
-backend: "minimal"
-max_batch_size: 0
-input [
-  {
-    name: "IN0"
-    data_type: TYPE_INT32
-    dims: [ 4 ]
-  }
-]
-output [
-  {
-    name: "OUT0"
-    data_type: TYPE_INT32
-    dims: [ 4 ]
-  }
-]
-instance_group [
-  {
-    kind: KIND_CPU
-  }
-]
diff --git a/3rdparty/backend-r22.12/examples/model_repos/recommended_models/batching/1/.gitkeep b/3rdparty/backend-r22.12/examples/model_repos/recommended_models/batching/1/.gitkeep
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/3rdparty/backend-r22.12/examples/model_repos/recommended_models/batching/config.pbtxt b/3rdparty/backend-r22.12/examples/model_repos/recommended_models/batching/config.pbtxt
deleted file mode 100644
index 917ebf27ff133d5466d9f7c7618b1839987391a0..0000000000000000000000000000000000000000
--- a/3rdparty/backend-r22.12/examples/model_repos/recommended_models/batching/config.pbtxt
+++ /dev/null
@@ -1,24 +0,0 @@
-backend: "recommended"
-max_batch_size: 8
-dynamic_batching {
-  max_queue_delay_microseconds: 5000000
-}
-input [
-  {
-    name: "INPUT"
-    data_type: TYPE_FP32
-    dims: [ 4, 4 ]
-  }
-]
-output [
-  {
-    name: "OUTPUT"
-    data_type: TYPE_FP32
-    dims: [ 4, 4 ]
-  }
-]
-instance_group [
-  {
-    kind: KIND_CPU
-  }
-]
diff --git a/3rdparty/backend-r22.12/include/triton/backend/backend_common.h b/3rdparty/backend-r22.12/include/triton/backend/backend_common.h
deleted file mode 100644
index aad3a5a4db48b6cb81700adacccb377375082ce6..0000000000000000000000000000000000000000
--- a/3rdparty/backend-r22.12/include/triton/backend/backend_common.h
+++ /dev/null
@@ -1,672 +0,0 @@
-// Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include <chrono>
-#include <condition_variable>
-#include <deque>
-#include <iostream>
-#include <mutex>
-#include <set>
-#include <string>
-#include <unordered_map>
-#include <vector>
-#include "triton/common/error.h"
-#include "triton/core/tritonbackend.h"
-
-#define TRITONJSON_STATUSTYPE TRITONSERVER_Error*
-#define TRITONJSON_STATUSRETURN(M) \
-  return TRITONSERVER_ErrorNew(TRITONSERVER_ERROR_INTERNAL, (M).c_str())
-#define TRITONJSON_STATUSSUCCESS nullptr
-#include "triton/common/triton_json.h"
-
-#ifdef TRITON_ENABLE_GPU
-#include <cuda_runtime_api.h>
-#endif  // TRITON_ENABLE_GPU
-
-namespace triton { namespace backend {
-
-#define IGNORE_ERROR(X)                   \
-  do {                                    \
-    TRITONSERVER_Error* ie_err__ = (X);   \
-    if (ie_err__ != nullptr) {            \
-      TRITONSERVER_ErrorDelete(ie_err__); \
-    }                                     \
-  } while (false)
-
-#define LOG_IF_ERROR(X, MSG)                                                   \
-  do {                                                                         \
-    TRITONSERVER_Error* lie_err__ = (X);                                       \
-    if (lie_err__ != nullptr) {                                                \
-      IGNORE_ERROR(TRITONSERVER_LogMessage(                                    \
-          TRITONSERVER_LOG_INFO, __FILE__, __LINE__,                           \
-          (std::string(MSG) + ": " + TRITONSERVER_ErrorCodeString(lie_err__) + \
-           " - " + TRITONSERVER_ErrorMessage(lie_err__))                       \
-              .c_str()));                                                      \
-      TRITONSERVER_ErrorDelete(lie_err__);                                     \
-    }                                                                          \
-  } while (false)
-
-#define LOG_MESSAGE(LEVEL, MSG)                                  \
-  do {                                                           \
-    LOG_IF_ERROR(                                                \
-        TRITONSERVER_LogMessage(LEVEL, __FILE__, __LINE__, MSG), \
-        ("failed to log message: "));                            \
-  } while (false)
-
-
-#define RETURN_ERROR_IF_FALSE(P, C, MSG)              \
-  do {                                                \
-    if (!(P)) {                                       \
-      return TRITONSERVER_ErrorNew(C, (MSG).c_str()); \
-    }                                                 \
-  } while (false)
-
-#define RETURN_ERROR_IF_TRUE(P, C, MSG)               \
-  do {                                                \
-    if ((P)) {                                        \
-      return TRITONSERVER_ErrorNew(C, (MSG).c_str()); \
-    }                                                 \
-  } while (false)
-
-#define RETURN_IF_ERROR(X)               \
-  do {                                   \
-    TRITONSERVER_Error* rie_err__ = (X); \
-    if (rie_err__ != nullptr) {          \
-      return rie_err__;                  \
-    }                                    \
-  } while (false)
-
-#ifdef TRITON_ENABLE_GPU
-#define LOG_IF_CUDA_ERROR(X, MSG)                                    \
-  do {                                                               \
-    cudaError_t lice_err__ = (X);                                    \
-    if (lice_err__ != cudaSuccess) {                                 \
-      IGNORE_ERROR(TRITONSERVER_LogMessage(                          \
-          TRITONSERVER_LOG_INFO, __FILE__, __LINE__,                 \
-          (std::string(MSG) + ": " + cudaGetErrorString(lice_err__)) \
-              .c_str()));                                            \
-    }                                                                \
-  } while (false)
-
-#define RETURN_IF_CUDA_ERROR(X, C, MSG)                                \
-  do {                                                                 \
-    cudaError_t rice_err__ = (X);                                      \
-    if (rice_err__ != cudaSuccess) {                                   \
-      return TRITONSERVER_ErrorNew(                                    \
-          C, ((MSG) + ": " + cudaGetErrorString(rice_err__)).c_str()); \
-    }                                                                  \
-  } while (false)
-#endif  // TRITON_ENABLE_GPU
-
-#define RESPOND_AND_SET_NULL_IF_ERROR(RESPONSE_PTR, X)               \
-  do {                                                               \
-    TRITONSERVER_Error* rarie_err__ = (X);                           \
-    if (rarie_err__ != nullptr) {                                    \
-      if (*RESPONSE_PTR != nullptr) {                                \
-        LOG_IF_ERROR(                                                \
-            TRITONBACKEND_ResponseSend(                              \
-                *RESPONSE_PTR, TRITONSERVER_RESPONSE_COMPLETE_FINAL, \
-                rarie_err__),                                        \
-            "failed to send error response");                        \
-        *RESPONSE_PTR = nullptr;                                     \
-      }                                                              \
-      TRITONSERVER_ErrorDelete(rarie_err__);                         \
-    }                                                                \
-  } while (false)
-
-#define RESPOND_ALL_AND_SET_NULL_IF_ERROR(RESPONSES, RESPONSES_COUNT, X) \
-  do {                                                                   \
-    TRITONSERVER_Error* raasnie_err__ = (X);                             \
-    if (raasnie_err__ != nullptr) {                                      \
-      for (size_t ridx = 0; ridx < RESPONSES_COUNT; ++ridx) {            \
-        if (RESPONSES[ridx] != nullptr) {                                \
-          LOG_IF_ERROR(                                                  \
-              TRITONBACKEND_ResponseSend(                                \
-                  RESPONSES[ridx], TRITONSERVER_RESPONSE_COMPLETE_FINAL, \
-                  raasnie_err__),                                        \
-              "failed to send error response");                          \
-          RESPONSES[ridx] = nullptr;                                     \
-        }                                                                \
-      }                                                                  \
-      TRITONSERVER_ErrorDelete(raasnie_err__);                           \
-    }                                                                    \
-  } while (false)
-
-#define RESPOND_ALL_AND_SET_TRUE_IF_ERROR(RESPONSES, RESPONSES_COUNT, BOOL, X) \
-  do {                                                                         \
-    TRITONSERVER_Error* raasnie_err__ = (X);                                   \
-    if (raasnie_err__ != nullptr) {                                            \
-      BOOL = true;                                                             \
-      for (size_t ridx = 0; ridx < RESPONSES_COUNT; ++ridx) {                  \
-        if (RESPONSES[ridx] != nullptr) {                                      \
-          LOG_IF_ERROR(                                                        \
-              TRITONBACKEND_ResponseSend(                                      \
-                  RESPONSES[ridx], TRITONSERVER_RESPONSE_COMPLETE_FINAL,       \
-                  raasnie_err__),                                              \
-              "failed to send error response");                                \
-          RESPONSES[ridx] = nullptr;                                           \
-        }                                                                      \
-      }                                                                        \
-      TRITONSERVER_ErrorDelete(raasnie_err__);                                 \
-    }                                                                          \
-  } while (false)
-
-#ifdef TRITON_ENABLE_STATS
-#define TIMESPEC_TO_NANOS(TS) ((TS).tv_sec * 1000000000 + (TS).tv_nsec)
-#define SET_TIMESTAMP(TS_NS)                                         \
-  {                                                                  \
-    TS_NS = std::chrono::duration_cast<std::chrono::nanoseconds>(    \
-                std::chrono::steady_clock::now().time_since_epoch()) \
-                .count();                                            \
-  }
-#define DECL_TIMESTAMP(TS_NS) \
-  uint64_t TS_NS;             \
-  SET_TIMESTAMP(TS_NS);
-#else
-#define DECL_TIMESTAMP(TS_NS)
-#define SET_TIMESTAMP(TS_NS)
-#endif  // TRITON_ENABLE_STATS
-
-#ifndef TRITON_ENABLE_GPU
-using cudaStream_t = void*;
-#endif  // !TRITON_ENABLE_GPU
-
-/// Convenience deleter for TRITONBACKEND_ResponseFactory.
-struct ResponseFactoryDeleter {
-  void operator()(TRITONBACKEND_ResponseFactory* f)
-  {
-    LOG_IF_ERROR(
-        TRITONBACKEND_ResponseFactoryDelete(f),
-        "failed deleting response factory");
-  }
-};
-
-// A representation of the BatchInput message in model config
-class BatchInput {
- public:
-  enum class Kind {
-    BATCH_ELEMENT_COUNT,
-    BATCH_ACCUMULATED_ELEMENT_COUNT,
-    BATCH_ACCUMULATED_ELEMENT_COUNT_WITH_ZERO,
-    BATCH_MAX_ELEMENT_COUNT_AS_SHAPE,
-    BATCH_ITEM_SHAPE,
-    BATCH_ITEM_SHAPE_FLATTEN
-  };
-  static TRITONSERVER_Error* ParseFromModelConfig(
-      triton::common::TritonJson::Value& config,
-      std::vector<BatchInput>* batch_inputs);
-  const std::vector<std::string>& TargetNames() const { return target_names_; }
-  TRITONSERVER_DataType DataType() const { return data_type_; }
-  Kind BatchInputKind() const { return kind_; }
-  std::string BatchInputKindString() const { return kind_str_; }
-  const std::vector<std::string>& SourceInputs() const
-  {
-    return source_inputs_;
-  }
-
- private:
-  TRITONSERVER_Error* Init(triton::common::TritonJson::Value& bi_config);
-  Kind kind_;
-  std::string kind_str_;
-  std::vector<std::string> target_names_;
-  TRITONSERVER_DataType data_type_;
-  std::vector<std::string> source_inputs_;
-};
-
-// A representation of the BatchOutput message in model config
-class BatchOutput {
- public:
-  enum class Kind { BATCH_SCATTER_WITH_INPUT_SHAPE };
-  static TRITONSERVER_Error* ParseFromModelConfig(
-      triton::common::TritonJson::Value& config,
-      std::vector<BatchOutput>* batch_outputs);
-  const std::vector<std::string>& TargetNames() const { return target_names_; }
-  TRITONSERVER_DataType DataType() const { return data_type_; }
-  const std::vector<int64_t>& OutputShape() const { return shape_; }
-  Kind BatchOutputKind() const { return kind_; }
-  const std::vector<std::string>& SourceInputs() const
-  {
-    return source_inputs_;
-  }
-
- private:
-  Kind kind_;
-  std::vector<std::string> target_names_;
-  TRITONSERVER_DataType data_type_;
-  std::vector<int64_t> shape_;
-  std::vector<std::string> source_inputs_;
-};
-
-struct CopyParams {
-  CopyParams(void* dst, const void* src, const size_t byte_size)
-      : dst_(dst), src_(src), byte_size_(byte_size)
-  {
-  }
-
-  void* dst_;
-  const void* src_;
-  const size_t byte_size_;
-};
-
-/// The value for a dimension in a shape that indicates that that
-/// dimension can take on any size.
-constexpr int WILDCARD_DIM = -1;
-
-constexpr char kTensorRTExecutionAccelerator[] = "tensorrt";
-constexpr char kOpenVINOExecutionAccelerator[] = "openvino";
-constexpr char kGPUIOExecutionAccelerator[] = "gpu_io";
-constexpr char kAutoMixedPrecisionExecutionAccelerator[] =
-    "auto_mixed_precision";
-
-TRITONSERVER_MemoryType GetUsePinnedMemoryType(
-    TRITONSERVER_MemoryType ref_buffer_type);
-
-TRITONSERVER_Error* CommonErrorToTritonError(triton::common::Error error);
-
-TRITONSERVER_Error_Code StatusCodeToTritonCode(
-    triton::common::Error::Code error_code);
-
-/// Parse an array in a JSON object into the corresponding shape. The
-/// array must be composed of integers.
-///
-/// \param io The JSON object containing the member array.
-/// \param name The name of the array member in the JSON object.
-/// \param shape Returns the shape.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_Error* ParseShape(
-    common::TritonJson::Value& io, const std::string& name,
-    std::vector<int64_t>* shape);
-
-/// Return the string representation of a shape.
-///
-/// \param dims The shape dimensions.
-/// \param dims_count The number of dimensions.
-/// \return The string representation.
-std::string ShapeToString(const int64_t* dims, const size_t dims_count);
-
-/// Return the string representation of a shape.
-///
-/// \param shape The shape as a vector of dimensions.
-/// \return The string representation.
-std::string ShapeToString(const std::vector<int64_t>& shape);
-
-/// Return the number of elements of a shape.
-///
-/// \param dims The shape dimensions.
-/// \param dims_count The number of dimensions.
-/// \return The number of elements.
-int64_t GetElementCount(const int64_t* dims, const size_t dims_count);
-
-/// Return the number of elements of a shape.
-///
-/// \param shape The shape as a vector of dimensions.
-/// \return The number of elements.
-int64_t GetElementCount(const std::vector<int64_t>& shape);
-
-/// Get the size, in bytes, of a tensor based on datatype and
-/// shape.
-/// \param dtype The data-type.
-/// \param dims The shape.
-/// \return The size, in bytes, of the corresponding tensor, or -1 if
-/// unable to determine the size.
-int64_t GetByteSize(
-    const TRITONSERVER_DataType& dtype, const std::vector<int64_t>& dims);
-
-/// Get an input tensor's contents into a buffer. This overload expects
-/// both 'buffer' and buffers of the input to be in CPU.
-///
-/// \param request The inference request.
-/// \param input_name The name of the input buffer.
-/// \param buffer The buffer where the input tensor content is copied into.
-/// \param buffer_byte_size Acts as both input and output. On input
-/// gives the size of 'buffer', in bytes. The function will fail if
-/// the buffer is not large enough to hold the input tensor
-/// contents. Returns the size of the input tensor data returned in
-/// 'buffer'.
-/// \param host_policy_name The host policy name to look up the input buffer.
-/// Default input buffer will be used if nullptr is provided.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_Error* ReadInputTensor(
-    TRITONBACKEND_Request* request, const std::string& input_name, char* buffer,
-    size_t* buffer_byte_size, const char* host_policy_name = nullptr);
-
-/// Get an input tensor's contents into a buffer. This overload of
-/// 'ReadInputTensor' supports input buffers that can be in any memory.
-///
-/// \param request The inference request.
-/// \param input_name The name of the input buffer.
-/// \param buffer The buffer where the input tensor content is copied into.
-/// \param buffer_byte_size Acts as both input and output. On input
-/// gives the size of 'buffer', in bytes. The function will fail if
-/// the buffer is not large enough to hold the input tensor
-/// contents. Returns the size of the input tensor data returned in
-/// 'buffer'.
-/// \param host_policy_name The host policy name to look up the input buffer.
-/// Default input buffer will be used if nullptr is provided.
-/// \param memory_type The memory type of the buffer provided.
-/// \param memory_type_id The memory type id of the buffer provided.
-/// \param cuda_stream specifies the stream to be associated with, and 0 can be
-/// passed for default stream.
-/// \param cuda_used returns whether a CUDA memory copy is initiated. If true,
-/// the caller should synchronize on the given 'cuda_stream' to ensure data copy
-/// is completed.
-/// \param copy_on_stream whether the memory copies should be performed in cuda
-/// host functions on the 'cuda_stream'.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_Error* ReadInputTensor(
-    TRITONBACKEND_Request* request, const std::string& input_name, char* buffer,
-    size_t* buffer_byte_size, TRITONSERVER_MemoryType memory_type,
-    int64_t memory_type_id, cudaStream_t cuda_stream, bool* cuda_used,
-    const char* host_policy_name = nullptr, const bool copy_on_stream = false);
-
-/// Validate that an input matches one of the allowed input names.
-/// \param io The model input.
-/// \param allowed The set of allowed input names.
-/// \return The error status. A non-OK status indicates the input
-/// is not valid.
-TRITONSERVER_Error* CheckAllowedModelInput(
-    common::TritonJson::Value& io, const std::set<std::string>& allowed);
-
-/// Validate that an output matches one of the allowed output names.
-/// \param io The model output.
-/// \param allowed The set of allowed output names.
-/// \return The error status. A non-OK status indicates the output
-/// is not valid.
-TRITONSERVER_Error* CheckAllowedModelOutput(
-    common::TritonJson::Value& io, const std::set<std::string>& allowed);
-
-/// Get the tensor name, false value, and true value for a boolean
-/// sequence batcher control kind. If 'required' is true then must
-/// find a tensor for the control. If 'required' is false, return
-/// 'tensor_name' as empty-string if the control is not mapped to any
-/// tensor.
-///
-/// \param batcher The JSON object of the sequence batcher.
-/// \param model_name The name of the model.
-/// \param control_kind The kind of control tensor to look for.
-/// \param required Whether the tensor must be specified.
-/// \param tensor_name Returns the name of the tensor.
-/// \param tensor_datatype Returns the data type of the tensor.
-/// \param fp32_false_value Returns the float value for false if
-/// the tensor type is FP32.
-/// \param fp32_true_value Returns the float value for true if
-/// the tensor type is FP32.
-/// \param int32_false_value Returns the int value for false if
-/// the tensor type is INT32.
-/// \param int32_true_value Returns the int value for true if
-/// the tensor type is INT32.
-/// \param bool_false_value Returns the bool value for false if
-/// the tensor type is BOOL.
-/// \param bool_true_value Returns the bool value for true if
-/// the tensor type is BOOL.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_Error* GetBooleanSequenceControlProperties(
-    common::TritonJson::Value& batcher, const std::string& model_name,
-    const std::string& control_kind, const bool required,
-    std::string* tensor_name, std::string* tensor_datatype,
-    float* fp32_false_value, float* fp32_true_value, int32_t* int32_false_value,
-    int32_t* int32_true_value, bool* bool_false_value, bool* bool_true_value);
-
-/// Get the tensor name and datatype for a non-boolean sequence
-/// batcher control kind. If 'required' is true then must find a
-/// tensor for the control. If 'required' is false, return
-/// 'tensor_name' as empty-string if the control is not mapped to any
-/// tensor. 'tensor_datatype' returns the required datatype for the
-/// control.
-///
-/// \param batcher The JSON object of the sequence batcher.
-/// \param model_name The name of the model.
-/// \param control_kind The kind of control tensor to look for.
-/// \param required Whether the tensor must be specified.
-/// \param tensor_name Returns the name of the tensor.
-/// \param tensor_datatype Returns the data type of the tensor.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_Error* GetTypedSequenceControlProperties(
-    common::TritonJson::Value& batcher, const std::string& model_name,
-    const std::string& control_kind, const bool required,
-    std::string* tensor_name, std::string* tensor_datatype);
-
-/// Create and send an error response for a set of requests. This
-/// function takes ownership of 'response_err' and so the caller must
-/// not access or delete it after this call returns.
-///
-/// \param requests The requests.
-/// \param request_count The number of 'requests'.
-/// \param response_err The error to send to each request.
-/// \param release_request If true, the requests will be released after
-/// sending the error responses and the request pointers are set to
-/// nullptr.
-void RequestsRespondWithError(
-    TRITONBACKEND_Request** requests, const uint32_t request_count,
-    TRITONSERVER_Error* response_err, const bool release_request = true);
-
-/// Send an error response for a set of responses. This function takes
-/// ownership of 'response_err' and so the caller must not access or
-/// delete it after this call returns.
-///
-/// \param responses The responses.
-/// \param response_count The number of 'responses'.
-/// \param response_err The error to send.
-void SendErrorForResponses(
-    std::vector<TRITONBACKEND_Response*>* responses,
-    const uint32_t response_count, TRITONSERVER_Error* response_err);
-
-/// Copy buffer from 'src' to 'dst' for given 'byte_size'. The buffer location
-/// is identified by the memory type and id, and the corresponding copy will be
-/// initiated.
-/// \param msg The message to be prepended in error message.
-/// \param src_memory_type The memory type of the source buffer.
-/// \param src_memory_type_id The memory type id of the source buffer.
-/// \param dst_memory_type The memory type of the destination buffer.
-/// \param dst_memory_type_id The memory type id of the destination buffer.
-/// \param byte_size The byte size of the source buffer.
-/// \param src The pointer to the source buffer.
-/// \param dst The pointer to the destination buffer.
-/// \param cuda_stream specifies the stream to be associated with, and 0 can be
-/// passed for default stream.
-/// \param cuda_used returns whether a CUDA memory copy is initiated. If true,
-/// the caller should synchronize on the given 'cuda_stream' to ensure data copy
-/// is completed.
-/// \param copy_on_stream whether the memory copies should be performed in cuda
-/// host functions on the 'cuda_stream'.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_Error* CopyBuffer(
-    const std::string& msg, const TRITONSERVER_MemoryType src_memory_type,
-    const int64_t src_memory_type_id,
-    const TRITONSERVER_MemoryType dst_memory_type,
-    const int64_t dst_memory_type_id, const size_t byte_size, const void* src,
-    void* dst, cudaStream_t cuda_stream, bool* cuda_used,
-    const bool copy_on_stream = false);
-
-/// Does a file or directory exist?
-/// \param path The path to check for existance.
-/// \param exists Returns true if file/dir exists
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_Error* FileExists(const std::string& path, bool* exists);
-
-/// Read a text file into a string.
-/// \param path The path of the file.
-/// \param contents Returns the contents of the file.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_Error* ReadTextFile(
-    const std::string& path, std::string* contents);
-
-/// Is a path a directory?
-/// \param path The path to check.
-/// \param is_dir Returns true if path represents a directory
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_Error* IsDirectory(const std::string& path, bool* is_dir);
-
-/// Join path segments into a longer path
-/// \param segments The path segments.
-/// \return the path formed by joining the segments.
-std::string JoinPath(std::initializer_list<std::string> segments);
-
-/// Returns the content in the model version path and the path to the content as
-/// key-value pair.
-/// \param model_repository_path The path to the model repository.
-/// \param version The version of the model.
-/// \param ignore_directories Whether the directories will be ignored.
-/// \param ignore_files Whether the files will be ignored.
-/// \param model_paths Returns the content in the model version path and
-/// the path to the content.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_Error* ModelPaths(
-    const std::string& model_repository_path, uint64_t version,
-    const bool ignore_directories, const bool ignore_files,
-    std::unordered_map<std::string, std::string>* model_paths);
-
-/// Create a CUDA stream appropriate for GPU<->CPU data transfer
-/// operations for a given GPU device. The caller takes ownership of
-/// the stream. 'stream' returns nullptr if GPU support is disabled.
-///
-/// \param device_id The ID of the GPU.
-/// \param priority The stream priority. Use 0 for normal priority.
-/// \param stream Returns the created stream.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_Error* CreateCudaStream(
-    const int device_id, const int cuda_stream_priority, cudaStream_t* stream);
-
-/// Parse the string as long long integer.
-///
-/// \param value The string.
-/// \param parse_value The long long integral value of the string.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_Error* ParseLongLongValue(
-    const std::string& value, int64_t* parsed_value);
-
-/// Parse the string as unsigned long long integer.
-///
-/// \param value The string.
-/// \param parse_value The unsigned long long integral value of the string.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_Error* ParseUnsignedLongLongValue(
-    const std::string& value, uint64_t* parsed_value);
-
-/// Parse the string as boolean.
-///
-/// \param value The string.
-/// \param parse_value The boolean value of the string.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_Error* ParseBoolValue(
-    const std::string& value, bool* parsed_value);
-
-/// Parse the string as integer.
-///
-/// \param value The string.
-/// \param parse_value The integral value of the string.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_Error* ParseIntValue(const std::string& value, int* parsed_value);
-
-/// Parse the string as double.
-///
-/// \param value The string.
-/// \param parse_value The double value of the string.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_Error* ParseDoubleValue(
-    const std::string& value, double* parsed_value);
-
-/// Return the value of the specified key in a JSON object.
-///
-/// \param params The JSON object containing the key-value mapping.
-/// \param key The key to look up the value in the JSON object.
-/// \param value Returns the value.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_Error* GetParameterValue(
-    triton::common::TritonJson::Value& params, const std::string& key,
-    std::string* value);
-
-/// Return the Triton server data type of the data type string specified
-/// in model config JSON.
-///
-/// \param data_type_str The string representation of the data type.
-/// \return the Triton server data type.
-TRITONSERVER_DataType ModelConfigDataTypeToTritonServerDataType(
-    const std::string& data_type_str);
-
-/// Try to parse the requested parameter.
-///
-/// \param params The param in model config
-/// \param mkey Key in the model config.
-/// \param value The parsed string value.
-/// \param default_value Default value to use when key is not found.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_Error* TryParseModelStringParameter(
-    triton::common::TritonJson::Value& params, const std::string& mkey,
-    std::string* value, const std::string& default_value);
-
-/// Try to parse the requested parameter.
-///
-/// \param params The param in model config
-/// \param mkey Key in the model config.
-/// \param value The parsed int value.
-/// \param default_value Default value to use when key is not found.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_Error* TryParseModelStringParameter(
-    triton::common::TritonJson::Value& params, const std::string& mkey,
-    int* value, const int& default_value);
-
-/// Try to parse the requested parameter.
-///
-/// \param params The param in model config
-/// \param mkey Key in the model config.
-/// \param value The parsed bool value.
-/// \param default_value Default value to use when key is not found.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_Error* TryParseModelStringParameter(
-    triton::common::TritonJson::Value& params, const std::string& mkey,
-    bool* value, const bool& default_value);
-
-/// Try to parse the requested parameter.
-///
-/// \param params The param in model config
-/// \param mkey Key in the model config.
-/// \param value The parsed uint64 value.
-/// \param default_value Default value to use when key is not found.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_Error* TryParseModelStringParameter(
-    triton::common::TritonJson::Value& params, const std::string& mkey,
-    uint64_t* value, const uint64_t& default_value);
-
-/// Get a string representation of a tensor buffer.
-///
-/// \param str Returns the string.
-/// \param buffer The base pointer to the tensor buffer.
-/// \param buffer_byte_size The size of the buffer in bytes.
-/// \param datatype The type of the tensor
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_Error* BufferAsTypedString(
-    std::string& str, const char* buffer, size_t buffer_byte_size,
-    TRITONSERVER_DataType datatype);
-
-/// Get the ID of the request as a string formatted for logging.
-///
-/// \param request Request of which to get the ID.
-/// \return a formatted string for logging the request ID.
-std::string GetRequestId(TRITONBACKEND_Request* request);
-
-}}  // namespace triton::backend
diff --git a/3rdparty/backend-r22.12/include/triton/backend/backend_input_collector.h b/3rdparty/backend-r22.12/include/triton/backend/backend_input_collector.h
deleted file mode 100644
index 44a7b1bc625db649f44f21523a02c79474a54436..0000000000000000000000000000000000000000
--- a/3rdparty/backend-r22.12/include/triton/backend/backend_input_collector.h
+++ /dev/null
@@ -1,301 +0,0 @@
-// Copyright 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include <list>
-#include <memory>
-#include <string>
-#include <vector>
-#include "triton/backend/backend_common.h"
-#include "triton/backend/backend_memory.h"
-#include "triton/common/async_work_queue.h"
-#include "triton/common/sync_queue.h"
-#include "triton/core/tritonbackend.h"
-
-#ifdef TRITON_ENABLE_GPU
-#include <cuda_runtime_api.h>
-#endif  // TRITON_ENABLE_GPU
-
-namespace triton { namespace backend {
-
-#ifndef TRITON_ENABLE_GPU
-using cudaStream_t = void*;
-using cudaEvent_t = void*;
-#endif  // !TRITON_ENABLE_GPU
-
-//
-// BackendInputCollector
-//
-class BackendInputCollector {
- public:
-  // The caller can optionally provide 'event' for internal synchronization
-  // instead of using 'stream'. If 'host_policy_name' is provided, it must be
-  // valid for the lifetime of the collector
-  explicit BackendInputCollector(
-      TRITONBACKEND_Request** requests, const uint32_t request_count,
-      std::vector<TRITONBACKEND_Response*>* responses,
-      TRITONBACKEND_MemoryManager* memory_manager, const bool pinned_enabled,
-      cudaStream_t stream, cudaEvent_t event = nullptr,
-      cudaEvent_t buffer_ready_event = nullptr,
-      const size_t kernel_buffer_threshold = 0,
-      const char* host_policy_name = nullptr, const bool copy_on_stream = false,
-      const bool coalesce_request_input = false)
-      : need_sync_(false), requests_(requests), request_count_(request_count),
-        responses_(responses), memory_manager_(memory_manager),
-        pinned_enabled_(pinned_enabled),
-        use_async_cpu_copy_(triton::common::AsyncWorkQueue::WorkerCount() > 1),
-        stream_(stream), event_(event), buffer_ready_event_(buffer_ready_event),
-        kernel_buffer_threshold_(kernel_buffer_threshold),
-        pending_pinned_byte_size_(0), pending_pinned_offset_(0),
-        pending_copy_kernel_buffer_byte_size_(0),
-        pending_copy_kernel_buffer_offset_(0),
-        pending_copy_kernel_input_buffer_counts_(0), async_task_count_(0),
-        host_policy_cstr_(host_policy_name), copy_on_stream_(copy_on_stream),
-        coalesce_request_input_(coalesce_request_input)
-  {
-  }
-
-  ~BackendInputCollector() = default;
-
-  // Process all requests for a named input tensor and return the
-  // concatenated values of those requests in a single contiguous
-  // buffer. This overload of the function can avoid data copy if the
-  // tensor values are already contiguous and the caller doesn't
-  // provide a destination 'buffer'.
-  //
-  // 'buffer' is used to determine whether the input should be placed at the
-  //   'buffer' provided by the caller. If 'buffer' == nullptr, the returned
-  //   buffer will be managed by the BackendInputCollector object and
-  //   has the same lifecycle as the BackendInputCollector object.
-  // 'buffer_byte_size' is the byte size of 'buffer' if it is not nullptr.
-  // 'allowed_input_types' is the ordered list of the memory type and id pairs
-  //   that the returned buffer can be. It must only contain the memory type
-  //   and id of 'buffer' if 'buffer' is not nullptr.
-  // 'dst_buffer' returns the contiguous buffer of the input tensor.
-  // 'dst_buffer_byte_size' the byte size of 'dst_buffer'.
-  // 'dst_memory_type' returns the memory type of 'dst_buffer'.
-  // 'dst_memory_type_id' returns the memory type id of 'dst_buffer'.
-  TRITONSERVER_Error* ProcessTensor(
-      const char* input_name, char* buffer, const size_t buffer_byte_size,
-      const std::vector<std::pair<TRITONSERVER_MemoryType, int64_t>>&
-          allowed_input_types,
-      const char** dst_buffer, size_t* dst_buffer_byte_size,
-      TRITONSERVER_MemoryType* dst_memory_type, int64_t* dst_memory_type_id);
-
-  // Process all requests for a named input tensor and return the
-  // concatenated values of those requests in a single contiguous
-  // 'buffer'.
-  //
-  // 'buffer' The buffer to hold the concatenates tensor value. Must
-  // be large enough to hold all tensor value.
-  // 'buffer_byte_size' is the byte size of 'buffer'.
-  // 'dst_memory_type' The memory type of 'buffer'.
-  // 'dst_memory_type_id' The memory type id of 'buffer'.
-  void ProcessTensor(
-      const char* input_name, char* buffer, const size_t buffer_byte_size,
-      const TRITONSERVER_MemoryType memory_type, const int64_t memory_type_id);
-
-  // Process the batch input and return its shape. Returning error indicates
-  // that the batch input can't be formed properly and the caller should abort
-  // the whole batch.
-  TRITONSERVER_Error* BatchInputShape(
-      const BatchInput& batch_input, std::vector<int64_t>* shape);
-
-  // Process the batch input and derive its value into 'buffer'. Returning
-  // error indicates that the batch input can't be formed properly and
-  // the caller should abort the whole batch.
-  // 'buffer' is used to determine whether the input should be placed at the
-  //   'buffer' provided by the caller. If 'buffer' == nullptr, the returned
-  //   buffer will be managed by the BackendInputCollector object and
-  //   has the same lifecycle as the BackendInputCollector object.
-  // 'buffer_byte_size' is the byte size of 'buffer' if it is not nullptr.
-  // 'allowed_input_types' is the ordered list of the memory type and id pairs
-  //   that the returned buffer can be. It must only contain the memory type
-  //   and id of 'buffer' if it is not nullptr.
-  // 'dst_buffer' returns the contiguous buffer of the input tensor.
-  // 'dst_memory_type' returns the memory type of 'dst_buffer'.
-  // 'dst_memory_type_id' returns the memory type id of 'dst_buffer'.
-  TRITONSERVER_Error* ProcessBatchInput(
-      const BatchInput& batch_input, char* buffer,
-      const size_t buffer_byte_size,
-      const std::vector<std::pair<TRITONSERVER_MemoryType, int64_t>>&
-          allowed_input_types,
-      const char** dst_buffer, size_t* dst_buffer_byte_size,
-      TRITONSERVER_MemoryType* dst_memory_type, int64_t* dst_memory_type_id);
-
-  // Finalize processing of all requests for all input tensors. Return
-  // true if cudaMemcpyAsync is called, and the caller should call
-  // cudaStreamSynchronize (or cudaEventSynchronize on 'event') before
-  // using the data.
-  bool Finalize();
-
- private:
-  struct ContiguousBuffer {
-    ContiguousBuffer() : start_request_idx_(0), end_request_idx_(0) {}
-    MemoryDesc memory_desc_;
-    size_t start_request_idx_;
-    size_t end_request_idx_;
-  };
-
-  class InputIterator {
-   public:
-    InputIterator(
-        TRITONBACKEND_Request** requests, const uint32_t request_count,
-        std::vector<TRITONBACKEND_Response*>* responses, const char* input_name,
-        const char* host_policy_name, const bool coalesce_request_input);
-
-    // Return false if iterator reaches the end of inputs, 'input' is not set.
-    bool GetNextContiguousInput(ContiguousBuffer* input);
-
-   private:
-    TRITONBACKEND_Request** requests_;
-    const uint32_t request_count_;
-    std::vector<TRITONBACKEND_Response*>* responses_;
-    const char* input_name_;
-    const char* host_policy_;
-    const bool coalesce_request_input_;
-
-    TRITONBACKEND_Input* curr_input_;
-    size_t curr_request_idx_;
-    size_t curr_buffer_idx_;
-    uint32_t curr_buffer_cnt_;
-    bool reach_end_;
-  };
-
-  // Return whether the entire input is in a contiguous buffer. If returns true,
-  // the properties of the contiguous input buffer will also be returned.
-  // Otherwise, only 'buffer_byte_size' will be set and return the total byte
-  // size of the input.
-  bool GetInputBufferIfContiguous(
-      const char* input_name, const char** buffer, size_t* buffer_byte_size,
-      TRITONSERVER_MemoryType* memory_type, int64_t* memory_type_id);
-  bool FlushPendingPinned(
-      char* tensor_buffer, const size_t tensor_buffer_byte_size,
-      const TRITONSERVER_MemoryType tensor_memory_type,
-      const int64_t tensor_memory_type_id);
-  bool FlushPendingCopyKernel(
-      char* tensor_buffer, const size_t tensor_buffer_byte_size,
-      const TRITONSERVER_MemoryType tensor_memory_type,
-      const int64_t tensor_memory_type_id);
-  TRITONSERVER_Error* LaunchCopyKernel(
-      char* tensor_buffer, const size_t tensor_buffer_byte_size,
-      const TRITONSERVER_MemoryType tensor_memory_type,
-      const int64_t tensor_memory_type_id);
-  bool SetInputTensor(
-      const char* input_name, const ContiguousBuffer& input,
-      char* tensor_buffer, const size_t tensor_buffer_byte_size,
-      const TRITONSERVER_MemoryType tensor_memory_type,
-      const int64_t tensor_memory_type_id, const size_t tensor_buffer_offset,
-      const TRITONSERVER_MemoryType use_pinned_memory_type,
-      const bool use_kernel, const bool wait_buffer);
-  template <typename T>
-  TRITONSERVER_Error* SetElementCount(
-      const std::string& source_input, char* buffer,
-      const size_t buffer_byte_size);
-  template <typename T>
-  TRITONSERVER_Error* SetAccumulatedElementCount(
-      const std::string& source_input, char* buffer,
-      const size_t buffer_byte_size);
-  template <typename T>
-  TRITONSERVER_Error* SetBatchItemShape(
-      const std::string& source_input, char* buffer,
-      const size_t buffer_byte_size);
-
-  bool need_sync_;
-  TRITONBACKEND_Request** requests_;
-  const uint32_t request_count_;
-  std::vector<TRITONBACKEND_Response*>* responses_;
-  TRITONBACKEND_MemoryManager* memory_manager_;
-  const bool pinned_enabled_;
-  const bool use_async_cpu_copy_;
-  cudaStream_t stream_;
-  cudaEvent_t event_;
-  cudaEvent_t buffer_ready_event_;
-  const size_t kernel_buffer_threshold_;
-
-  size_t pending_pinned_byte_size_;
-  size_t pending_pinned_offset_;
-  std::list<ContiguousBuffer> pending_pinned_input_buffers_;
-
-  // managed memories that need to live over the lifetime of this
-  // BackendInputCollector object.
-  std::list<std::unique_ptr<BackendMemory>> in_use_memories_;
-
-  size_t pending_copy_kernel_buffer_byte_size_;
-  size_t pending_copy_kernel_buffer_offset_;
-  size_t pending_copy_kernel_input_buffer_counts_;
-  std::list<ContiguousBuffer> pending_copy_kernel_input_buffers_;
-  std::vector<std::unique_ptr<std::vector<int8_t*>>> input_ptr_buffer_host_;
-  std::vector<std::unique_ptr<std::vector<size_t>>> byte_size_buffer_host_;
-  std::vector<std::unique_ptr<std::vector<size_t>>>
-      byte_size_offset_buffer_host_;
-
-  // Pinned memory buffers and the corresponding request_inputs where
-  // the final copy to the tensor is deferred until Finalize() after
-  // waiting for all in-flight copies.
-  struct DeferredPinned {
-    DeferredPinned(
-        char* pinned_memory, const size_t pinned_memory_size,
-        char* tensor_buffer, const size_t tensor_buffer_offset,
-        const TRITONSERVER_MemoryType tensor_memory_type,
-        const int64_t tensor_memory_id,
-        std::list<ContiguousBuffer>&& request_buffers,
-        std::vector<TRITONBACKEND_Response*>* responses)
-        : finalized_(false), pinned_memory_(pinned_memory),
-          pinned_memory_size_(pinned_memory_size),
-          tensor_buffer_(tensor_buffer),
-          tensor_buffer_offset_(tensor_buffer_offset),
-          tensor_memory_type_(tensor_memory_type),
-          tensor_memory_id_(tensor_memory_id),
-          requests_(std::move(request_buffers)), responses_(responses)
-    {
-    }
-
-    bool Finalize(cudaStream_t stream);
-    bool finalized_;
-    // Holding reference to the pinned memory buffer, which is managed
-    // by BackendInputCollector as 'pinned_memory'
-    char* pinned_memory_;
-    const size_t pinned_memory_size_;
-    char* tensor_buffer_;
-    const size_t tensor_buffer_offset_;
-    const TRITONSERVER_MemoryType tensor_memory_type_;
-    const int64_t tensor_memory_id_;
-    std::list<ContiguousBuffer> requests_;
-    std::vector<TRITONBACKEND_Response*>* responses_;
-  };
-
-  std::list<DeferredPinned> deferred_pinned_;
-  // FIXME use future to maintain an issue-order queue to drop task count
-  triton::common::SyncQueue<bool> completion_queue_;
-  size_t async_task_count_;
-
-  const char* host_policy_cstr_;
-  const bool copy_on_stream_;
-  const bool coalesce_request_input_;
-};
-
-}}  // namespace triton::backend
diff --git a/3rdparty/backend-r22.12/include/triton/backend/backend_memory.h b/3rdparty/backend-r22.12/include/triton/backend/backend_memory.h
deleted file mode 100644
index 819ca3743a5b2f929e4b8f429e622c1e86a47232..0000000000000000000000000000000000000000
--- a/3rdparty/backend-r22.12/include/triton/backend/backend_memory.h
+++ /dev/null
@@ -1,138 +0,0 @@
-// Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include <string>
-#include <vector>
-#include "triton/core/tritonbackend.h"
-#include "triton/core/tritonserver.h"
-
-namespace triton { namespace backend {
-
-// Colletion of common properties that describes a buffer in Triton
-struct MemoryDesc {
-  MemoryDesc()
-      : buffer_(nullptr), byte_size_(0), memory_type_(TRITONSERVER_MEMORY_CPU),
-        memory_type_id_(0)
-  {
-  }
-  MemoryDesc(
-      const char* buffer, size_t byte_size, TRITONSERVER_MemoryType memory_type,
-      int64_t memory_type_id)
-      : buffer_(buffer), byte_size_(byte_size), memory_type_(memory_type),
-        memory_type_id_(memory_type_id)
-  {
-  }
-  const char* buffer_;
-  size_t byte_size_;
-  TRITONSERVER_MemoryType memory_type_;
-  int64_t memory_type_id_;
-};
-
-//
-// BackendMemory
-//
-// Utility class for allocating and deallocating memory using both
-// TRITONBACKEND_MemoryManager and direct GPU and CPU malloc/free.
-//
-class BackendMemory {
- public:
-  enum class AllocationType { CPU, CPU_PINNED, GPU, CPU_PINNED_POOL, GPU_POOL };
-
-  // Allocate a contiguous block of 'alloc_type' memory.  'mem'
-  // returns the pointer to the allocated memory.
-  //
-  // CPU, CPU_PINNED_POOL and GPU_POOL are allocated using
-  // TRITONBACKEND_MemoryManagerAllocate. Note that CPU_PINNED and GPU
-  // allocations can be much slower than the POOL variants.
-  //
-  // Two error codes have specific interpretations for this function:
-  //
-  //   TRITONSERVER_ERROR_UNSUPPORTED: Indicates that function is
-  //     incapable of allocating the requested memory type and memory
-  //     type ID. Requests for the memory type and ID will always fail
-  //     no matter 'byte_size' of the request.
-  //
-  //   TRITONSERVER_ERROR_UNAVAILABLE: Indicates that function can
-  //      allocate the memory type and ID but that currently it cannot
-  //      allocate a contiguous block of memory of the requested
-  //      'byte_size'.
-  static TRITONSERVER_Error* Create(
-      TRITONBACKEND_MemoryManager* manager, const AllocationType alloc_type,
-      const int64_t memory_type_id, const size_t byte_size,
-      BackendMemory** mem);
-
-  // Allocate a contiguous block of memory by attempting the
-  // allocation using 'alloc_types' in order until one is successful.
-  // See BackendMemory::Create() above for details.
-  static TRITONSERVER_Error* Create(
-      TRITONBACKEND_MemoryManager* manager,
-      const std::vector<AllocationType>& alloc_types,
-      const int64_t memory_type_id, const size_t byte_size,
-      BackendMemory** mem);
-
-  // Creates a BackendMemory object from a pre-allocated buffer. The buffer
-  // is not owned by the object created with this function. Hence, for
-  // proper operation, the lifetime of the buffer should atleast extend till
-  // the corresponding BackendMemory.
-  static TRITONSERVER_Error* Create(
-      TRITONBACKEND_MemoryManager* manager, const AllocationType alloc_type,
-      const int64_t memory_type_id, void* buffer, const size_t byte_size,
-      BackendMemory** mem);
-
-  ~BackendMemory();
-
-  AllocationType AllocType() const { return alloctype_; }
-  int64_t MemoryTypeId() const { return memtype_id_; }
-  char* MemoryPtr() { return buffer_; }
-  size_t ByteSize() const { return byte_size_; }
-  TRITONSERVER_MemoryType MemoryType() const
-  {
-    return AllocTypeToMemoryType(alloctype_);
-  }
-
-  static TRITONSERVER_MemoryType AllocTypeToMemoryType(const AllocationType a);
-  static const char* AllocTypeString(const AllocationType a);
-
- private:
-  BackendMemory(
-      TRITONBACKEND_MemoryManager* manager, const AllocationType alloctype,
-      const int64_t memtype_id, char* buffer, const size_t byte_size,
-      const bool owns_buffer = true)
-      : manager_(manager), alloctype_(alloctype), memtype_id_(memtype_id),
-        buffer_(buffer), byte_size_(byte_size), owns_buffer_(owns_buffer)
-  {
-  }
-
-  TRITONBACKEND_MemoryManager* manager_;
-  AllocationType alloctype_;
-  int64_t memtype_id_;
-  char* buffer_;
-  size_t byte_size_;
-  bool owns_buffer_;
-};
-
-}}  // namespace triton::backend
diff --git a/3rdparty/backend-r22.12/include/triton/backend/backend_model.h b/3rdparty/backend-r22.12/include/triton/backend/backend_model.h
deleted file mode 100644
index 3179c6e8e656c8c9f618d297fdbd6a61b8c1fac0..0000000000000000000000000000000000000000
--- a/3rdparty/backend-r22.12/include/triton/backend/backend_model.h
+++ /dev/null
@@ -1,146 +0,0 @@
-// Copyright 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include <map>
-#include <set>
-#include <string>
-#include "triton/backend/backend_common.h"
-#include "triton/core/tritonbackend.h"
-#include "triton/core/tritonserver.h"
-
-namespace triton { namespace backend {
-
-//
-// BackendModel
-//
-// Common functionality for a backend model. This class is provided as
-// a convenience; backends are not required to use this class.
-//
-class BackendModel {
- public:
-  BackendModel(
-      TRITONBACKEND_Model* triton_model, const bool allow_optional = false);
-  virtual ~BackendModel() = default;
-
-  // Get the handle to the TRITONBACKEND server hosting this model.
-  TRITONSERVER_Server* TritonServer() { return triton_server_; }
-
-  // Get the handle to the memory manager for this model.
-  TRITONBACKEND_MemoryManager* TritonMemoryManager()
-  {
-    return triton_memory_manager_;
-  }
-
-  // Get the handle to the TRITONBACKEND model.
-  TRITONBACKEND_Model* TritonModel() { return triton_model_; }
-
-  // Get the name and version of the model.
-  const std::string& Name() const { return name_; }
-  uint64_t Version() const { return version_; }
-  const std::string& RepositoryPath() const { return repository_path_; }
-
-  // The model configuration.
-  common::TritonJson::Value& ModelConfig() { return model_config_; }
-
-  // Sets the updated model configuration to the core.
-  TRITONSERVER_Error* SetModelConfig();
-
-  // Parses information out of the model configuration.
-  TRITONSERVER_Error* ParseModelConfig();
-
-  // Maximum batch size supported by the model. A value of 0
-  // indicates that the model does not support batching.
-  int MaxBatchSize() const { return max_batch_size_; }
-
-  // Set the max batch size for the model. When a backend
-  // auto-completes a configuration it may set or change the maximum
-  // batch size.
-  void SetMaxBatchSize(const int b) { max_batch_size_ = b; }
-
-  // Does this model support batching in the first dimension?
-  TRITONSERVER_Error* SupportsFirstDimBatching(bool* supports);
-
-  // Use indirect pinned memory buffer when copying an input or output
-  // tensor to/from the model.
-  bool EnablePinnedInput() const { return enable_pinned_input_; }
-  bool EnablePinnedOutput() const { return enable_pinned_output_; }
-
-  const std::vector<BatchInput>& BatchInputs() const { return batch_inputs_; }
-  const std::vector<BatchOutput>& BatchOutputs() const
-  {
-    return batch_outputs_;
-  }
-  const BatchOutput* FindBatchOutput(const std::string& output_name) const;
-  bool IsInputRagged(const std::string& input_name) const
-  {
-    return (ragged_inputs_.find(input_name) != ragged_inputs_.end());
-  }
-  bool IsInputOptional(const std::string& input_name) const
-  {
-    return (optional_inputs_.find(input_name) != optional_inputs_.end());
-  }
-
- protected:
-  TRITONSERVER_Server* triton_server_;
-  TRITONBACKEND_MemoryManager* triton_memory_manager_;
-  TRITONBACKEND_Model* triton_model_;
-  std::string name_;
-  uint64_t version_;
-  std::string repository_path_;
-  bool allow_optional_;
-
-  common::TritonJson::Value model_config_;
-  int max_batch_size_;
-  bool enable_pinned_input_;
-  bool enable_pinned_output_;
-  std::vector<BatchInput> batch_inputs_;
-  std::vector<BatchOutput> batch_outputs_;
-  std::map<std::string, const BatchOutput*> batch_output_map_;
-  std::set<std::string> ragged_inputs_;
-  std::set<std::string> optional_inputs_;
-};
-
-//
-// BackendModelException
-//
-// Exception thrown if error occurs while constructing an
-// BackendModel.
-//
-struct BackendModelException {
-  BackendModelException(TRITONSERVER_Error* err) : err_(err) {}
-  TRITONSERVER_Error* err_;
-};
-
-#define THROW_IF_BACKEND_MODEL_ERROR(X)                        \
-  do {                                                         \
-    TRITONSERVER_Error* tie_err__ = (X);                       \
-    if (tie_err__ != nullptr) {                                \
-      throw triton::backend::BackendModelException(tie_err__); \
-    }                                                          \
-  } while (false)
-
-}}  // namespace triton::backend
diff --git a/3rdparty/backend-r22.12/include/triton/backend/backend_model_instance.h b/3rdparty/backend-r22.12/include/triton/backend/backend_model_instance.h
deleted file mode 100644
index c4deeea09d760c5a3438532469eabcc570fdb004..0000000000000000000000000000000000000000
--- a/3rdparty/backend-r22.12/include/triton/backend/backend_model_instance.h
+++ /dev/null
@@ -1,118 +0,0 @@
-// Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include <string>
-#include "triton/core/tritonbackend.h"
-
-#ifdef TRITON_ENABLE_GPU
-#include <cuda_runtime_api.h>
-#endif  // TRITON_ENABLE_GPU
-
-namespace triton { namespace backend {
-
-#ifndef TRITON_ENABLE_GPU
-using cudaStream_t = void*;
-#endif  // !TRITON_ENABLE_GPU
-
-class BackendModel;
-
-//
-// BackendModelInstance
-//
-// Common functionality for a backend model instance. This class is
-// provided as a convenience; backends are not required to use this
-// class.
-//
-class BackendModelInstance {
- public:
-  BackendModelInstance(
-      BackendModel* backend_model,
-      TRITONBACKEND_ModelInstance* triton_model_instance);
-  virtual ~BackendModelInstance();
-
-  // Get the name, kind and device ID of the instance.
-  const std::string& Name() const { return name_; }
-  TRITONSERVER_InstanceGroupKind Kind() const { return kind_; }
-  int32_t DeviceId() const { return device_id_; }
-
-  // Get the handle to the TRITONBACKEND model instance.
-  TRITONBACKEND_ModelInstance* TritonModelInstance()
-  {
-    return triton_model_instance_;
-  }
-
-  // Get the BackendModel representing the model that corresponds to
-  // this instance.
-  BackendModel* Model() const { return backend_model_; }
-
-  // The model configuration 'default_model_filename' value, or the
-  // value in model configuration 'cc_model_filenames' for the GPU
-  // targeted by this instance. If neither are specified in the model
-  // configuration, the return empty string.
-  const std::string& ArtifactFilename() const { return artifact_filename_; }
-
-  // Returns the stream associated with this instance that can be used
-  // for GPU<->CPU memory transfers. Returns nullptr if GPU support is
-  // disabled or if this instance is not executing on a GPU.
-  cudaStream_t CudaStream() { return stream_; }
-
-  const std::string& HostPolicyName() const { return host_policy_name_; }
-
- protected:
-  BackendModel* backend_model_;
-  TRITONBACKEND_ModelInstance* triton_model_instance_;
-
-  std::string name_;
-  TRITONSERVER_InstanceGroupKind kind_;
-  int32_t device_id_;
-
-  std::string artifact_filename_;
-  cudaStream_t stream_;
-
-  std::string host_policy_name_;
-};
-
-//
-// BackendModelInstanceException
-//
-// Exception thrown if error occurs while constructing an
-// BackendModelInstance.
-//
-struct BackendModelInstanceException {
-  BackendModelInstanceException(TRITONSERVER_Error* err) : err_(err) {}
-  TRITONSERVER_Error* err_;
-};
-
-#define THROW_IF_BACKEND_INSTANCE_ERROR(X)                             \
-  do {                                                                 \
-    TRITONSERVER_Error* tie_err__ = (X);                               \
-    if (tie_err__ != nullptr) {                                        \
-      throw triton::backend::BackendModelInstanceException(tie_err__); \
-    }                                                                  \
-  } while (false)
-
-}}  // namespace triton::backend
diff --git a/3rdparty/backend-r22.12/include/triton/backend/backend_output_responder.h b/3rdparty/backend-r22.12/include/triton/backend/backend_output_responder.h
deleted file mode 100644
index 611e103c0a9e4dd6f526e40eb746f6b7a9b6a3d1..0000000000000000000000000000000000000000
--- a/3rdparty/backend-r22.12/include/triton/backend/backend_output_responder.h
+++ /dev/null
@@ -1,195 +0,0 @@
-// Copyright 2019-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include <list>
-#include <string>
-#include <vector>
-#include "triton/backend/backend_common.h"
-#include "triton/common/async_work_queue.h"
-#include "triton/core/tritonbackend.h"
-
-#ifdef TRITON_ENABLE_GPU
-#include <cuda_runtime_api.h>
-#endif  // TRITON_ENABLE_GPU
-
-namespace triton { namespace backend {
-
-#ifndef TRITON_ENABLE_GPU
-using cudaStream_t = void*;
-using cudaEvent_t = void*;
-#endif  // !TRITON_ENABLE_GPU
-
-//
-// BackendOutputResponder
-//
-class BackendOutputResponder {
- public:
-  // The caller can optionally provide 'event' for internal synchronization
-  // instead of using 'stream'.
-  explicit BackendOutputResponder(
-      TRITONBACKEND_Request** requests, const uint32_t request_count,
-      std::vector<TRITONBACKEND_Response*>* responses,
-      TRITONBACKEND_MemoryManager* memory_manager,
-      const bool first_dim_batching, const bool pinned_enabled,
-      cudaStream_t stream, cudaEvent_t event = nullptr,
-      bool copy_on_stream = false)
-      : need_sync_(false), requests_(requests), request_count_(request_count),
-        responses_(responses), memory_manager_(memory_manager),
-        first_dim_batching_(first_dim_batching),
-        pinned_enabled_(pinned_enabled),
-        use_async_cpu_copy_(triton::common::AsyncWorkQueue::WorkerCount() > 1),
-        stream_(stream), event_(event), pending_pinned_byte_size_(0),
-        copy_on_stream_(copy_on_stream)
-  {
-  }
-
-  // Legacy constructor for backwards compatibility. The above
-  // constructor should be used for all new cases. The responder needs
-  // to know if the model is batching along the first dimension. With
-  // this constructor we derive that information from the
-  // max_batch_size value instead of having it provided directly as in
-  // the above constructor.
-  explicit BackendOutputResponder(
-      TRITONBACKEND_Request** requests, const uint32_t request_count,
-      std::vector<TRITONBACKEND_Response*>* responses, const int max_batch_size,
-      TRITONBACKEND_MemoryManager* memory_manager, const bool pinned_enabled,
-      cudaStream_t stream, cudaEvent_t event = nullptr,
-      bool copy_on_stream = false)
-      : need_sync_(false), requests_(requests), request_count_(request_count),
-        responses_(responses), memory_manager_(memory_manager),
-        first_dim_batching_(max_batch_size >= 1),
-        pinned_enabled_(pinned_enabled),
-        use_async_cpu_copy_(triton::common::AsyncWorkQueue::WorkerCount() > 1),
-        stream_(stream), event_(event), pending_pinned_byte_size_(0),
-        copy_on_stream_(copy_on_stream)
-  {
-  }
-
-  ~BackendOutputResponder();
-
-  // Process all responses for a named output tensor.
-  // 'batchn_shape' may be modified by the call.
-  void ProcessTensor(
-      const std::string& name, const TRITONSERVER_DataType datatype,
-      std::vector<int64_t>& batchn_shape, const char* buffer,
-      const TRITONSERVER_MemoryType memory_type, const int64_t memory_type_id);
-
-  // Process all responses for a named state tensor. Returns a vector of
-  // TRITONBACKEND_State objects that the backend can use to update the state.
-  // If TRITONBACKEND_StateUpdate is not called on the vector elements, the
-  // state will not be updated.
-  // 'batchn_shape' may be modified by the call.
-  std::vector<TRITONBACKEND_State*> ProcessStateTensor(
-      const std::string& name, const TRITONSERVER_DataType datatype,
-      std::vector<int64_t>& batchn_shape, const char* buffer,
-      const TRITONSERVER_MemoryType memory_type, const int64_t memory_type_id);
-
-  // Process all responses for a batch output and derive its value from
-  // 'buffer'.
-  void ProcessBatchOutput(
-      const std::string& name, const BatchOutput& batch_output,
-      const char* buffer, const TRITONSERVER_MemoryType memory_type,
-      const int64_t memory_type_id);
-
-  // Finalize processing of all responses for all output
-  // tensors. Return true if cudaMemcpyAsync is called, and the caller
-  // should call cudaStreamSynchronize (or cudaEventSynchronize on 'event')
-  // before using the data.
-  bool Finalize();
-
- private:
-  bool FlushPendingPinned(
-      const char* tensor_buffer,
-      const TRITONSERVER_MemoryType tensor_memory_type,
-      const int64_t tensor_memory_type_id);
-  bool SetFixedSizeBuffer(
-      TRITONBACKEND_Response** response, void* response_state_or_output,
-      const std::string& output_name, const size_t tensor_byte_size,
-      const size_t tensor_offset, const char* tensor_buffer,
-      const TRITONSERVER_MemoryType tensor_memory_type,
-      const int64_t tensor_memory_type_id,
-      const TRITONSERVER_MemoryType use_pinned_memory_type, bool state);
-
-  struct OutputData {
-    OutputData(
-        const std::string& name, void* buffer, const size_t buffer_byte_size,
-        const TRITONSERVER_MemoryType memory_type, const int64_t memory_type_id)
-        : name_(name), buffer_(buffer), buffer_byte_size_(buffer_byte_size),
-          memory_type_(memory_type), memory_type_id_(memory_type_id)
-    {
-    }
-    const std::string name_;
-    void* buffer_;
-    const size_t buffer_byte_size_;
-    const TRITONSERVER_MemoryType memory_type_;
-    const int64_t memory_type_id_;
-  };
-
-  bool need_sync_;
-  TRITONBACKEND_Request** requests_;
-  const uint32_t request_count_;
-  std::vector<TRITONBACKEND_Response*>* responses_;
-  TRITONBACKEND_MemoryManager* memory_manager_;
-  const bool first_dim_batching_;
-  const bool pinned_enabled_;
-  const bool use_async_cpu_copy_;
-  cudaStream_t stream_;
-  cudaEvent_t event_;
-
-  using ResponsesList =
-      std::list<std::pair<TRITONBACKEND_Response**, OutputData>>;
-
-  size_t pending_pinned_byte_size_;
-  size_t pending_pinned_offset_;
-  ResponsesList pending_pinned_outputs_;
-  const bool copy_on_stream_;
-
-  // Pinned memories that need to live over the lifetime of this
-  // BackendOutputResponder object.
-  std::list<char*> pinned_memories_;
-
-  // Pinned memory buffers and the corresponding response outputs
-  // where the final copy to the response is deferred until Finalize()
-  // after waiting for all in-flight copies.
-  struct DeferredPinned {
-    DeferredPinned(
-        char* pinned_memory, const size_t pinned_memory_size,
-        ResponsesList&& responses)
-        : pinned_memory_(pinned_memory),
-          pinned_memory_size_(pinned_memory_size),
-          responses_(std::move(responses))
-    {
-    }
-    char* pinned_memory_;
-    const size_t pinned_memory_size_;
-    ResponsesList responses_;
-  };
-
-  std::list<DeferredPinned> deferred_pinned_;
-};
-
-}}  // namespace triton::backend
diff --git a/3rdparty/backend-r22.12/src/backend_common.cc b/3rdparty/backend-r22.12/src/backend_common.cc
deleted file mode 100644
index 4f7a660b3d55a95444734857ed500ea1ebb1dbfc..0000000000000000000000000000000000000000
--- a/3rdparty/backend-r22.12/src/backend_common.cc
+++ /dev/null
@@ -1,1374 +0,0 @@
-// Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "triton/backend/backend_common.h"
-
-#ifdef _WIN32
-// suppress the min and max definitions in Windef.h.
-#define NOMINMAX
-#include <Windows.h>
-
-// _CRT_INTERNAL_NONSTDC_NAMES 1 before including Microsoft provided C Runtime
-// library to expose declarations without "_" prefix to match POSIX style.
-#define _CRT_INTERNAL_NONSTDC_NAMES 1
-#include <direct.h>
-#include <io.h>
-#else
-#include <dirent.h>
-#include <unistd.h>
-#endif
-#include <sys/stat.h>
-#include <algorithm>
-#include <cerrno>
-#include <fstream>
-#include <functional>
-#include <memory>
-
-#ifdef _WIN32
-// <sys/stat.h> in Windows doesn't define S_ISDIR macro
-#if !defined(S_ISDIR) && defined(S_IFMT) && defined(S_IFDIR)
-#define S_ISDIR(m) (((m)&S_IFMT) == S_IFDIR)
-#endif
-#define F_OK 0
-#endif
-
-namespace triton { namespace backend {
-
-#ifdef TRITON_ENABLE_GPU
-void CUDART_CB
-MemcpyHost(void* args)
-{
-  auto* copy_params = reinterpret_cast<CopyParams*>(args);
-  memcpy(copy_params->dst_, copy_params->src_, copy_params->byte_size_);
-  delete copy_params;
-}
-#endif  // TRITON_ENABLE_GPU
-
-TRITONSERVER_MemoryType
-GetUsePinnedMemoryType(TRITONSERVER_MemoryType ref_buffer_type)
-{
-  // The following matrix is used for both input and output.
-  // src   \ dest | non-pinned    | pinned     | device
-  // non-pinned   | memcpy        | memcpy     | buffer needed
-  // pinned       | memcpy        | memcpy     | cudaMemcpy
-  // device       | buffer needed | cudaMemcpy | cudaMemcpy
-  if (ref_buffer_type == TRITONSERVER_MEMORY_CPU_PINNED) {
-    return TRITONSERVER_MEMORY_CPU_PINNED;
-  }
-
-  return (ref_buffer_type == TRITONSERVER_MEMORY_CPU) ? TRITONSERVER_MEMORY_GPU
-                                                      : TRITONSERVER_MEMORY_CPU;
-}
-
-TRITONSERVER_Error_Code
-StatusCodeToTritonCode(triton::common::Error::Code error_code)
-{
-  switch (error_code) {
-    case triton::common::Error::Code::UNKNOWN:
-      return TRITONSERVER_ERROR_UNKNOWN;
-    case triton::common::Error::Code::INTERNAL:
-      return TRITONSERVER_ERROR_INTERNAL;
-    case triton::common::Error::Code::NOT_FOUND:
-      return TRITONSERVER_ERROR_NOT_FOUND;
-    case triton::common::Error::Code::INVALID_ARG:
-      return TRITONSERVER_ERROR_INVALID_ARG;
-    case triton::common::Error::Code::UNAVAILABLE:
-      return TRITONSERVER_ERROR_UNAVAILABLE;
-    case triton::common::Error::Code::UNSUPPORTED:
-      return TRITONSERVER_ERROR_UNSUPPORTED;
-    case triton::common::Error::Code::ALREADY_EXISTS:
-      return TRITONSERVER_ERROR_ALREADY_EXISTS;
-
-    default:
-      break;
-  }
-
-  return TRITONSERVER_ERROR_UNKNOWN;
-}
-
-TRITONSERVER_Error*
-CommonErrorToTritonError(triton::common::Error error)
-{
-  return TRITONSERVER_ErrorNew(
-      StatusCodeToTritonCode(error.ErrorCode()), error.Message().c_str());
-}
-
-TRITONSERVER_Error*
-ParseShape(
-    common::TritonJson::Value& io, const std::string& name,
-    std::vector<int64_t>* shape)
-{
-  common::TritonJson::Value shape_array;
-  RETURN_IF_ERROR(io.MemberAsArray(name.c_str(), &shape_array));
-  for (size_t i = 0; i < shape_array.ArraySize(); ++i) {
-    int64_t d = 0;
-    RETURN_IF_ERROR(shape_array.IndexAsInt(i, &d));
-    shape->push_back(d);
-  }
-
-  return nullptr;  // success
-}
-
-std::string
-ShapeToString(const int64_t* dims, const size_t dims_count)
-{
-  bool first = true;
-
-  std::string str("[");
-  for (size_t i = 0; i < dims_count; ++i) {
-    const int64_t dim = dims[i];
-    if (!first) {
-      str += ",";
-    }
-    str += std::to_string(dim);
-    first = false;
-  }
-
-  str += "]";
-  return str;
-}
-
-std::string
-ShapeToString(const std::vector<int64_t>& shape)
-{
-  return ShapeToString(shape.data(), shape.size());
-}
-
-int64_t
-GetElementCount(const int64_t* dims, const size_t dims_count)
-{
-  bool first = true;
-  int64_t cnt = 0;
-  for (size_t i = 0; i < dims_count; i++) {
-    if (dims[i] == WILDCARD_DIM) {
-      return -1;
-    }
-
-    if (first) {
-      cnt = dims[i];
-      first = false;
-    } else {
-      cnt *= dims[i];
-    }
-  }
-
-  return cnt;
-}
-
-int64_t
-GetElementCount(const std::vector<int64_t>& shape)
-{
-  return GetElementCount(shape.data(), shape.size());
-}
-
-int64_t
-GetByteSize(
-    const TRITONSERVER_DataType& dtype, const std::vector<int64_t>& dims)
-{
-  size_t dt_size = TRITONSERVER_DataTypeByteSize(dtype);
-  if (dt_size == 0) {
-    return -1;
-  }
-
-  int64_t cnt = GetElementCount(dims);
-  if (cnt == -1) {
-    return -1;
-  }
-
-  return cnt * dt_size;
-}
-
-TRITONSERVER_Error*
-ReadInputTensor(
-    TRITONBACKEND_Request* request, const std::string& input_name, char* buffer,
-    size_t* buffer_byte_size, TRITONSERVER_MemoryType memory_type,
-    int64_t memory_type_id, cudaStream_t cuda_stream, bool* cuda_used,
-    const char* host_policy_name, const bool copy_on_stream)
-{
-  TRITONBACKEND_Input* input;
-  RETURN_IF_ERROR(
-      TRITONBACKEND_RequestInput(request, input_name.c_str(), &input));
-
-  uint64_t input_byte_size;
-  uint32_t input_buffer_count;
-  RETURN_IF_ERROR(TRITONBACKEND_InputPropertiesForHostPolicy(
-      input, host_policy_name, nullptr, nullptr, nullptr, nullptr,
-      &input_byte_size, &input_buffer_count));
-  RETURN_ERROR_IF_FALSE(
-      input_byte_size <= *buffer_byte_size, TRITONSERVER_ERROR_INVALID_ARG,
-      std::string(
-          GetRequestId(request) + "buffer too small for input tensor '" +
-          input_name + "', " + std::to_string(*buffer_byte_size) + " < " +
-          std::to_string(input_byte_size)));
-
-  size_t output_buffer_offset = 0;
-  for (uint32_t b = 0; b < input_buffer_count; ++b) {
-    const void* input_buffer = nullptr;
-    uint64_t input_buffer_byte_size = 0;
-    TRITONSERVER_MemoryType input_memory_type = TRITONSERVER_MEMORY_CPU;
-    int64_t input_memory_type_id = 0;
-
-    RETURN_IF_ERROR(TRITONBACKEND_InputBufferForHostPolicy(
-        input, host_policy_name, b, &input_buffer, &input_buffer_byte_size,
-        &input_memory_type, &input_memory_type_id));
-
-    RETURN_IF_ERROR(CopyBuffer(
-        "Failed to copy buffer", input_memory_type, input_memory_type_id,
-        memory_type, memory_type_id, input_buffer_byte_size, input_buffer,
-        buffer + output_buffer_offset, cuda_stream, cuda_used, copy_on_stream));
-
-    output_buffer_offset += input_buffer_byte_size;
-  }
-
-  *buffer_byte_size = input_byte_size;
-
-  return nullptr;  // success
-}
-
-TRITONSERVER_Error*
-ReadInputTensor(
-    TRITONBACKEND_Request* request, const std::string& input_name, char* buffer,
-    size_t* buffer_byte_size, const char* host_policy_name)
-{
-  bool cuda_used;
-  return ReadInputTensor(
-      request, input_name, buffer, buffer_byte_size,
-      TRITONSERVER_MEMORY_CPU /* memory_type */, 0 /* memory_type_id */,
-      0 /* cuda_stream */, &cuda_used);
-}
-
-TRITONSERVER_Error*
-CheckAllowedModelInput(
-    common::TritonJson::Value& io, const std::set<std::string>& allowed)
-{
-  std::string io_name;
-  RETURN_IF_ERROR(io.MemberAsString("name", &io_name));
-  if (allowed.find(io_name) == allowed.end()) {
-    std::string astr;
-    for (const auto& a : allowed) {
-      if (!astr.empty()) {
-        astr.append(", ");
-      }
-      astr.append(a);
-    }
-
-    return TRITONSERVER_ErrorNew(
-        TRITONSERVER_ERROR_INVALID_ARG,
-        std::string(
-            "unexpected inference input '" + io_name +
-            "', allowed inputs are: " + astr)
-            .c_str());
-  }
-  return nullptr;  // success
-}
-
-TRITONSERVER_Error*
-CheckAllowedModelOutput(
-    common::TritonJson::Value& io, const std::set<std::string>& allowed)
-{
-  std::string io_name;
-  RETURN_IF_ERROR(io.MemberAsString("name", &io_name));
-  if (allowed.find(io_name) == allowed.end()) {
-    std::string astr;
-    for (const auto& a : allowed) {
-      if (!astr.empty()) {
-        astr.append(", ");
-      }
-      astr.append(a);
-    }
-
-    return TRITONSERVER_ErrorNew(
-        TRITONSERVER_ERROR_INVALID_ARG,
-        std::string(
-            "unexpected inference output '" + io_name +
-            "', allowed outputs are: " + astr)
-            .c_str());
-  }
-
-  return nullptr;  // success
-}
-
-TRITONSERVER_Error*
-GetBooleanSequenceControlProperties(
-    common::TritonJson::Value& batcher, const std::string& model_name,
-    const std::string& control_kind, const bool required,
-    std::string* tensor_name, std::string* tensor_datatype,
-    float* fp32_false_value, float* fp32_true_value, int32_t* int32_false_value,
-    int32_t* int32_true_value, bool* bool_false_value, bool* bool_true_value)
-{
-  // Make sure same tensor is not configured for multiple controls
-  std::set<std::string> seen_tensors;
-
-  // Make sure the control kind is not mentioned multiple times.
-  bool seen_control = false;
-
-  common::TritonJson::Value control_inputs;
-  if (batcher.Find("control_input", &control_inputs)) {
-    for (size_t ci_idx = 0; ci_idx < control_inputs.ArraySize(); ci_idx++) {
-      common::TritonJson::Value control_input;
-      RETURN_IF_ERROR(control_inputs.IndexAsObject(ci_idx, &control_input));
-      std::string input_name;
-      RETURN_IF_ERROR(control_input.MemberAsString("name", &input_name));
-      if (input_name.empty()) {
-        return TRITONSERVER_ErrorNew(
-            TRITONSERVER_ERROR_INVALID_ARG,
-            (std::string(
-                 "sequence batching control tensor must have a name for ") +
-             model_name)
-                .c_str());
-      }
-
-      if (seen_tensors.find(input_name) != seen_tensors.end()) {
-        return TRITONSERVER_ErrorNew(
-            TRITONSERVER_ERROR_INVALID_ARG,
-            (std::string("sequence batching control tensor '") + input_name +
-             "' is specified for multiple control kinds for " + model_name)
-                .c_str());
-      }
-
-      seen_tensors.insert(input_name);
-      common::TritonJson::Value controls;
-      if (control_input.Find("control", &controls)) {
-        for (size_t c_idx = 0; c_idx < controls.ArraySize(); c_idx++) {
-          common::TritonJson::Value c;
-          RETURN_IF_ERROR(controls.IndexAsObject(c_idx, &c));
-          std::string kind_str;
-          RETURN_IF_ERROR(c.MemberAsString("kind", &kind_str));
-          if (kind_str == control_kind) {
-            if (seen_control) {
-              return TRITONSERVER_ErrorNew(
-                  TRITONSERVER_ERROR_INVALID_ARG,
-                  (std::string(
-                       "sequence batching specifies multiple " + control_kind +
-                       " tensors for " + model_name)
-                       .c_str()));
-            }
-
-            *tensor_name = input_name;
-            seen_control = true;
-
-            common::TritonJson::Value int32_false_true, fp32_false_true,
-                bool_false_true;
-            bool found_int32 =
-                (c.Find("int32_false_true", &int32_false_true) &&
-                 (int32_false_true.ArraySize() > 0));
-            bool found_fp32 =
-                (c.Find("fp32_false_true", &fp32_false_true) &&
-                 (fp32_false_true.ArraySize() > 0));
-            bool found_bool =
-                (c.Find("bool_false_true", &bool_false_true) &&
-                 (bool_false_true.ArraySize() > 0));
-
-            // Make sure only one of int, float, or bool type is specified.
-            if (!(found_int32 || found_fp32 || found_bool)) {
-              return TRITONSERVER_ErrorNew(
-                  TRITONSERVER_ERROR_INVALID_ARG,
-                  (std::string(
-                       "sequence batching must specify either "
-                       "'int32_false_true', 'fp32_false_true' or "
-                       "'bool_false_true' for " +
-                       control_kind + " for " + model_name))
-                      .c_str());
-            } else if (
-                (found_fp32 && found_int32) || (found_fp32 && found_bool) ||
-                (found_int32 && found_bool)) {
-              return TRITONSERVER_ErrorNew(
-                  TRITONSERVER_ERROR_INVALID_ARG,
-                  (std::string(
-                       "sequence batching specifies more than one from "
-                       "'int32_false_true', 'fp32_false_true' and "
-                       "'bool_false_true' for " +
-                       control_kind + " for " + model_name))
-                      .c_str());
-            }
-
-            if (found_int32) {
-              if (int32_false_true.ArraySize() != 2) {
-                return TRITONSERVER_ErrorNew(
-                    TRITONSERVER_ERROR_INVALID_ARG,
-                    (std::string(
-                         "sequence batching control 'int32_false_true' must "
-                         "have "
-                         "exactly 2 entries for " +
-                         control_kind + " for " + model_name))
-                        .c_str());
-              }
-              if (tensor_datatype != nullptr) {
-                *tensor_datatype = "TYPE_INT32";
-              }
-              if (int32_false_value != nullptr) {
-                int64_t value;
-                RETURN_IF_ERROR(int32_false_true.IndexAsInt(0, &value));
-                *int32_false_value = value;
-              }
-              if (int32_true_value != nullptr) {
-                int64_t value;
-                RETURN_IF_ERROR(int32_false_true.IndexAsInt(1, &value));
-                *int32_true_value = value;
-              }
-            } else if (found_fp32) {
-              if (fp32_false_true.ArraySize() != 2) {
-                return TRITONSERVER_ErrorNew(
-                    TRITONSERVER_ERROR_INVALID_ARG,
-                    (std::string(
-                         "sequence batching control 'fp32_false_true' must "
-                         "have exactly "
-                         "2 entries for " +
-                         control_kind + " for " + model_name))
-                        .c_str());
-              }
-              if (tensor_datatype != nullptr) {
-                *tensor_datatype = "TYPE_FP32";
-              }
-              if (fp32_false_value != nullptr) {
-                double value = 0.0;
-                RETURN_IF_ERROR(fp32_false_true.IndexAsDouble(0, &value));
-                *fp32_false_value = value;
-              }
-              if (fp32_true_value != nullptr) {
-                double value = 0.0;
-                RETURN_IF_ERROR(fp32_false_true.IndexAsDouble(1, &value));
-                *fp32_true_value = value;
-              }
-            } else {
-              if (bool_false_true.ArraySize() != 2) {
-                return TRITONSERVER_ErrorNew(
-                    TRITONSERVER_ERROR_INVALID_ARG,
-                    (std::string(
-                         "sequence batching control 'bool_false_true' must "
-                         "have exactly "
-                         "2 entries for " +
-                         control_kind + " for " + model_name))
-                        .c_str());
-              }
-              if (tensor_datatype != nullptr) {
-                *tensor_datatype = "TYPE_BOOL";
-              }
-              if (bool_false_value != nullptr) {
-                bool value;
-                RETURN_IF_ERROR(bool_false_true.IndexAsBool(0, &value));
-                *bool_false_value = value;
-              }
-              if (bool_true_value != nullptr) {
-                bool value;
-                RETURN_IF_ERROR(bool_false_true.IndexAsBool(1, &value));
-                *bool_true_value = value;
-              }
-            }
-          }
-        }
-      }
-    }
-  }
-
-  if (!seen_control) {
-    if (required) {
-      return TRITONSERVER_ErrorNew(
-          TRITONSERVER_ERROR_INVALID_ARG,
-          (std::string(
-               "sequence batching control tensor must specify a " +
-               control_kind + " value for " + model_name))
-              .c_str());
-    }
-
-    tensor_name->clear();
-  }
-
-  return nullptr;  // success
-}
-
-TRITONSERVER_Error*
-GetTypedSequenceControlProperties(
-    common::TritonJson::Value& batcher, const std::string& model_name,
-    const std::string& control_kind, const bool required,
-    std::string* tensor_name, std::string* tensor_datatype)
-{
-  // Make sure same tensor is not configured for multiple controls
-  std::set<std::string> seen_tensors;
-
-  // Make sure the control kind is not mentioned multiple times.
-  bool seen_control = false;
-
-  common::TritonJson::Value control_inputs;
-  if (batcher.Find("control_input", &control_inputs)) {
-    for (size_t ci_idx = 0; ci_idx < control_inputs.ArraySize(); ci_idx++) {
-      common::TritonJson::Value control_input;
-      RETURN_IF_ERROR(control_inputs.IndexAsObject(ci_idx, &control_input));
-      std::string input_name;
-      RETURN_IF_ERROR(control_input.MemberAsString("name", &input_name));
-      if (input_name.empty()) {
-        return TRITONSERVER_ErrorNew(
-            TRITONSERVER_ERROR_INVALID_ARG,
-            (std::string(
-                 "sequence batching control tensor must have a name for ") +
-             model_name)
-                .c_str());
-      }
-      if (seen_tensors.find(input_name) != seen_tensors.end()) {
-        return TRITONSERVER_ErrorNew(
-            TRITONSERVER_ERROR_INVALID_ARG,
-            (std::string("sequence batching control tensor '") + input_name +
-             "' is specified for multiple control kinds for " + model_name)
-                .c_str());
-      }
-
-      seen_tensors.insert(input_name);
-      common::TritonJson::Value controls;
-      if (control_input.Find("control", &controls)) {
-        for (size_t c_idx = 0; c_idx < controls.ArraySize(); c_idx++) {
-          common::TritonJson::Value c;
-          RETURN_IF_ERROR(controls.IndexAsObject(c_idx, &c));
-          std::string kind_str;
-          RETURN_IF_ERROR(c.MemberAsString("kind", &kind_str));
-          if (kind_str == control_kind) {
-            if (seen_control) {
-              return TRITONSERVER_ErrorNew(
-                  TRITONSERVER_ERROR_INVALID_ARG,
-                  (std::string(
-                       "sequence batching specifies multiple " + control_kind +
-                       " tensors for " + model_name)
-                       .c_str()));
-            }
-
-            *tensor_name = input_name;
-            if (tensor_datatype != nullptr) {
-              RETURN_IF_ERROR(c.MemberAsString("data_type", tensor_datatype));
-            }
-
-            seen_control = true;
-
-            common::TritonJson::Value int32_false_true, fp32_false_true,
-                bool_false_true;
-            bool found_int32 =
-                (c.Find("int32_false_true", &int32_false_true) &&
-                 (int32_false_true.ArraySize() > 0));
-            bool found_fp32 =
-                (c.Find("fp32_false_true", &fp32_false_true) &&
-                 (fp32_false_true.ArraySize() > 0));
-            bool found_bool =
-                (c.Find("bool_false_true", &bool_false_true) &&
-                 (bool_false_true.ArraySize() > 0));
-            if (found_fp32 || found_int32 || found_bool) {
-              return TRITONSERVER_ErrorNew(
-                  TRITONSERVER_ERROR_INVALID_ARG,
-                  (std::string(
-                       "sequence batching must not specify either "
-                       "'int32_false_true', 'fp32_false_true' or "
-                       "'bool_false_true' for " +
-                       control_kind + " for " + model_name))
-                      .c_str());
-            }
-          }
-        }
-      }
-    }
-  }
-
-  if (!seen_control) {
-    if (required) {
-      return TRITONSERVER_ErrorNew(
-          TRITONSERVER_ERROR_INVALID_ARG,
-          (std::string(
-               "sequence batching control tensor must specify a " +
-               control_kind + " value for " + model_name))
-              .c_str());
-    }
-
-    tensor_name->clear();
-  }
-
-  return nullptr;  // success
-}
-
-void
-RequestsRespondWithError(
-    TRITONBACKEND_Request** requests, const uint32_t request_count,
-    TRITONSERVER_Error* response_err, const bool release_request)
-{
-  for (size_t i = 0; i < request_count; i++) {
-    TRITONBACKEND_Response* response;
-    auto err = TRITONBACKEND_ResponseNew(&response, requests[i]);
-    if (err != nullptr) {
-      LOG_MESSAGE(
-          TRITONSERVER_LOG_ERROR,
-          (GetRequestId(requests[i]) + "fail to create response").c_str());
-      TRITONSERVER_ErrorDelete(err);
-    } else {
-      LOG_IF_ERROR(
-          TRITONBACKEND_ResponseSend(
-              response, TRITONSERVER_RESPONSE_COMPLETE_FINAL, response_err),
-          (GetRequestId(requests[i]) + "fail to send error response").c_str());
-    }
-
-    if (release_request) {
-      LOG_IF_ERROR(
-          TRITONBACKEND_RequestRelease(
-              requests[i], TRITONSERVER_REQUEST_RELEASE_ALL),
-          "fail to release request");
-      requests[i] = nullptr;
-    }
-  }
-
-  TRITONSERVER_ErrorDelete(response_err);
-}
-
-void
-SendErrorForResponses(
-    std::vector<TRITONBACKEND_Response*>* responses,
-    const uint32_t response_count, TRITONSERVER_Error* response_err)
-{
-  for (size_t i = 0; i < response_count; i++) {
-    TRITONBACKEND_Response* response = (*responses)[i];
-    if (response != nullptr) {
-      LOG_IF_ERROR(
-          TRITONBACKEND_ResponseSend(
-              response, TRITONSERVER_RESPONSE_COMPLETE_FINAL, response_err),
-          "fail to send error response");
-      (*responses)[i] = nullptr;
-    }
-  }
-
-  TRITONSERVER_ErrorDelete(response_err);
-}
-
-TRITONSERVER_Error*
-CopyBuffer(
-    const std::string& msg, const TRITONSERVER_MemoryType src_memory_type,
-    const int64_t src_memory_type_id,
-    const TRITONSERVER_MemoryType dst_memory_type,
-    const int64_t dst_memory_type_id, const size_t byte_size, const void* src,
-    void* dst, cudaStream_t cuda_stream, bool* cuda_used,
-    const bool copy_on_stream)
-{
-  *cuda_used = false;
-
-  if (byte_size > 0) {
-    if (src == nullptr) {
-      return TRITONSERVER_ErrorNew(
-          TRITONSERVER_ERROR_INTERNAL,
-          std::string(
-              msg + ": attempted a copy of " + std::to_string(byte_size) +
-              " Bytes from an uninitialized memory")
-              .c_str());
-    }
-
-    if (dst == nullptr) {
-      return TRITONSERVER_ErrorNew(
-          TRITONSERVER_ERROR_INTERNAL,
-          std::string(
-              msg + ": attempted a copy of " + std::to_string(byte_size) +
-              " Bytes to an uninitialized memory")
-              .c_str());
-    }
-  }
-
-
-  // For CUDA memcpy, if copy_on_stream is false, all host to host copy will be
-  // blocked in respect to the host, so use memcpy() directly. In this case,
-  // need to be careful on whether the src buffer is valid.
-  if ((src_memory_type != TRITONSERVER_MEMORY_GPU) &&
-      (dst_memory_type != TRITONSERVER_MEMORY_GPU)) {
-#ifdef TRITON_ENABLE_GPU
-    if (copy_on_stream) {
-      auto params = new CopyParams(dst, src, byte_size);
-      cudaLaunchHostFunc(
-          cuda_stream, MemcpyHost, reinterpret_cast<void*>(params));
-      *cuda_used = true;
-    } else {
-      memcpy(dst, src, byte_size);
-    }
-#else
-    memcpy(dst, src, byte_size);
-#endif  // TRITON_ENABLE_GPU
-  } else {
-#ifdef TRITON_ENABLE_GPU
-    // [TODO] use cudaMemcpyDefault if UVM is supported for the device
-    auto copy_kind = cudaMemcpyDeviceToDevice;
-    if (src_memory_type != TRITONSERVER_MEMORY_GPU) {
-      copy_kind = cudaMemcpyHostToDevice;
-    } else if (dst_memory_type != TRITONSERVER_MEMORY_GPU) {
-      copy_kind = cudaMemcpyDeviceToHost;
-    }
-
-    if ((src_memory_type_id != dst_memory_type_id) &&
-        (copy_kind == cudaMemcpyDeviceToDevice)) {
-      RETURN_IF_CUDA_ERROR(
-          cudaMemcpyPeerAsync(
-              dst, dst_memory_type_id, src, src_memory_type_id, byte_size,
-              cuda_stream),
-          TRITONSERVER_ERROR_INTERNAL, msg + ": failed to perform CUDA copy");
-    } else {
-      RETURN_IF_CUDA_ERROR(
-          cudaMemcpyAsync(dst, src, byte_size, copy_kind, cuda_stream),
-          TRITONSERVER_ERROR_INTERNAL, msg + ": failed to perform CUDA copy");
-    }
-
-    *cuda_used = true;
-#else
-    return TRITONSERVER_ErrorNew(
-        TRITONSERVER_ERROR_INTERNAL,
-        std::string(msg + ": try to use CUDA copy while GPU is not supported")
-            .c_str());
-#endif  // TRITON_ENABLE_GPU
-  }
-
-  return nullptr;  // success
-}
-
-TRITONSERVER_Error*
-GetDirectoryContents(const std::string& path, std::set<std::string>* contents)
-{
-#ifdef _WIN32
-  WIN32_FIND_DATA entry;
-  HANDLE dir = FindFirstFile(path.c_str(), &entry);
-  if (dir == INVALID_HANDLE_VALUE) {
-    return TRITONSERVER_ErrorNew(
-        TRITONSERVER_ERROR_INTERNAL,
-        (std::string("failed to open directory: ") + path).c_str());
-  }
-  if ((entry.cFileName != ".") && (entry.cFileName != "..")) {
-    contents->insert(entry.cFileName);
-  }
-  while (FindNextFileA(dir, &entry)) {
-    if ((entry.cFileName != ".") && (entry.cFileName != "..")) {
-      contents->insert(entry.cFileName);
-    }
-  }
-
-  FindClose(dir);
-#else
-  DIR* dir = opendir(path.c_str());
-  if (dir == nullptr) {
-    return TRITONSERVER_ErrorNew(
-        TRITONSERVER_ERROR_INTERNAL,
-        (std::string("failed to open directory: ") + path).c_str());
-  }
-
-  struct dirent* entry;
-  while ((entry = readdir(dir)) != nullptr) {
-    std::string entryname = entry->d_name;
-    if ((entryname != ".") && (entryname != "..")) {
-      contents->insert(entryname);
-    }
-  }
-
-  closedir(dir);
-#endif
-  return nullptr;  // success
-}
-
-TRITONSERVER_Error*
-FileExists(const std::string& path, bool* exists)
-{
-  *exists = (access(path.c_str(), F_OK) == 0);
-  return nullptr;  // success
-}
-
-TRITONSERVER_Error*
-ReadTextFile(const std::string& path, std::string* contents)
-{
-  std::ifstream in(path, std::ios::in | std::ios::binary);
-  if (!in) {
-    return TRITONSERVER_ErrorNew(
-        TRITONSERVER_ERROR_INTERNAL,
-        ("failed to open/read file '" + path + "': " + strerror(errno))
-            .c_str());
-  }
-
-  in.seekg(0, std::ios::end);
-  contents->resize(in.tellg());
-  in.seekg(0, std::ios::beg);
-  in.read(&(*contents)[0], contents->size());
-  in.close();
-
-  return nullptr;  // success
-}
-
-TRITONSERVER_Error*
-IsDirectory(const std::string& path, bool* is_dir)
-{
-  *is_dir = false;
-
-  struct stat st;
-  if (stat(path.c_str(), &st) != 0) {
-    return TRITONSERVER_ErrorNew(
-        TRITONSERVER_ERROR_INTERNAL,
-        (std::string("failed to stat file ") + path).c_str());
-  }
-
-  *is_dir = S_ISDIR(st.st_mode);
-  return nullptr;  // success
-}
-
-std::string
-JoinPath(std::initializer_list<std::string> segments)
-{
-  std::string joined;
-
-  for (const auto& seg : segments) {
-    if (joined.empty()) {
-      joined = seg;
-    } else if (!seg.empty() && (seg[0] == '/')) {  // IsAbsolutePath(seg)
-      if (joined[joined.size() - 1] == '/') {
-        joined.append(seg.substr(1));
-      } else {
-        joined.append(seg);
-      }
-    } else {  // !IsAbsolutePath(seg)
-      if (joined[joined.size() - 1] != '/') {
-        joined.append("/");
-      }
-      joined.append(seg);
-    }
-  }
-
-  return joined;
-}
-
-TRITONSERVER_Error*
-ModelPaths(
-    const std::string& model_repository_path, uint64_t version,
-    const bool ignore_directories, const bool ignore_files,
-    std::unordered_map<std::string, std::string>* model_paths)
-{
-  std::set<std::string> model_files;
-  // Read all the files in 'path' and filter by type for different requirements
-  auto path = JoinPath({model_repository_path, std::to_string(version)});
-  RETURN_IF_ERROR(GetDirectoryContents(path, &model_files));
-  if (ignore_directories) {
-    // Erase directory entries...
-    for (auto iter = model_files.begin(); iter != model_files.end();) {
-      bool is_dir;
-      RETURN_IF_ERROR(IsDirectory(JoinPath({path, *iter}), &is_dir));
-      if (is_dir) {
-        iter = model_files.erase(iter);
-      } else {
-        ++iter;
-      }
-    }
-  }
-  if (ignore_files) {
-    // Erase non-directory entries...
-    for (auto iter = model_files.begin(); iter != model_files.end();) {
-      bool is_dir;
-      RETURN_IF_ERROR(IsDirectory(JoinPath({path, *iter}), &is_dir));
-      if (!is_dir) {
-        iter = model_files.erase(iter);
-      } else {
-        ++iter;
-      }
-    }
-  }
-
-  for (const auto& filename : model_files) {
-    const auto model_path = JoinPath({path, filename});
-    model_paths->emplace(
-        std::piecewise_construct, std::make_tuple(filename),
-        std::make_tuple(model_path));
-  }
-
-  return nullptr;  // success
-}
-
-TRITONSERVER_Error*
-CreateCudaStream(
-    const int device_id, const int cuda_stream_priority, cudaStream_t* stream)
-{
-  *stream = nullptr;
-
-#ifdef TRITON_ENABLE_GPU
-  // Make sure that correct device is set before creating stream and
-  // then restore the device to what was set by the caller.
-  int current_device;
-  auto cuerr = cudaGetDevice(&current_device);
-  bool overridden = false;
-  if (cuerr == cudaSuccess) {
-    overridden = (current_device != device_id);
-    if (overridden) {
-      cuerr = cudaSetDevice(device_id);
-    }
-  }
-
-  if (cuerr == cudaSuccess) {
-    cuerr = cudaStreamCreateWithPriority(
-        stream, cudaStreamDefault, cuda_stream_priority);
-  }
-
-  if (overridden) {
-    cudaSetDevice(current_device);
-  }
-
-  if (cuerr != cudaSuccess) {
-    *stream = nullptr;
-    return TRITONSERVER_ErrorNew(
-        TRITONSERVER_ERROR_INTERNAL,
-        (std::string("unable to create stream: ") + cudaGetErrorString(cuerr))
-            .c_str());
-  }
-#endif  // TRITON_ENABLE_GPU
-
-  return nullptr;  // success
-}
-
-TRITONSERVER_Error*
-ParseLongLongValue(const std::string& value, int64_t* parsed_value)
-{
-  try {
-    *parsed_value = std::stoll(value);
-  }
-  catch (const std::invalid_argument& ia) {
-    return TRITONSERVER_ErrorNew(
-        TRITONSERVER_ERROR_INVALID_ARG,
-        (std::string("failed to convert '") + value +
-         "' to long long integral number")
-            .c_str());
-  }
-
-  return nullptr;  // success
-}
-
-TRITONSERVER_Error*
-ParseUnsignedLongLongValue(const std::string& value, uint64_t* parsed_value)
-{
-  try {
-    *parsed_value = std::stoull(value);
-  }
-  catch (const std::invalid_argument& ia) {
-    return TRITONSERVER_ErrorNew(
-        TRITONSERVER_ERROR_INVALID_ARG,
-        (std::string("failed to convert '") + value +
-         "' to unsigned long long integral number")
-            .c_str());
-  }
-
-  return nullptr;  // success
-}
-
-TRITONSERVER_Error*
-ParseBoolValue(const std::string& value, bool* parsed_value)
-{
-  std::string lvalue = value;
-  std::transform(
-      lvalue.begin(), lvalue.end(), lvalue.begin(),
-      [](unsigned char c) { return std::tolower(c); });
-
-  if ((lvalue == "true") || (lvalue == "on") || (lvalue == "1")) {
-    *parsed_value = true;
-    return nullptr;  // success
-  }
-  if ((lvalue == "false") || (lvalue == "off") || (lvalue == "0")) {
-    *parsed_value = false;
-    return nullptr;  // success
-  }
-
-  return TRITONSERVER_ErrorNew(
-      TRITONSERVER_ERROR_INVALID_ARG,
-      (std::string("failed to convert '") + value + "' to boolean").c_str());
-}
-
-TRITONSERVER_Error*
-ParseIntValue(const std::string& value, int* parsed_value)
-{
-  try {
-    *parsed_value = std::stoi(value);
-  }
-  catch (const std::invalid_argument& ia) {
-    return TRITONSERVER_ErrorNew(
-        TRITONSERVER_ERROR_INVALID_ARG,
-        (std::string("failed to convert '") + value + "' to integral number")
-            .c_str());
-  }
-
-  return nullptr;  // success
-}
-
-TRITONSERVER_Error*
-ParseDoubleValue(const std::string& value, double* parsed_value)
-{
-  try {
-    *parsed_value = std::stod(value);
-  }
-  catch (const std::invalid_argument& ia) {
-    return TRITONSERVER_ErrorNew(
-        TRITONSERVER_ERROR_INVALID_ARG,
-        (std::string("failed to convert '") + value + "' to double number")
-            .c_str());
-  }
-
-  return nullptr;  // success
-}
-
-TRITONSERVER_Error*
-GetParameterValue(
-    triton::common::TritonJson::Value& params, const std::string& key,
-    std::string* value)
-{
-  triton::common::TritonJson::Value json_value;
-  RETURN_ERROR_IF_FALSE(
-      params.Find(key.c_str(), &json_value), TRITONSERVER_ERROR_NOT_FOUND,
-      std::string("model configuration is missing the parameter ") + key);
-  RETURN_IF_ERROR(json_value.MemberAsString("string_value", value));
-  return nullptr;  // success
-}
-
-TRITONSERVER_Error*
-BatchInput::ParseFromModelConfig(
-    triton::common::TritonJson::Value& config,
-    std::vector<BatchInput>* batch_inputs)
-{
-  batch_inputs->clear();
-  triton::common::TritonJson::Value bis;
-  RETURN_IF_ERROR(config.MemberAsArray("batch_input", &bis));
-  for (size_t i = 0; i < bis.ArraySize(); ++i) {
-    triton::common::TritonJson::Value bi;
-    RETURN_IF_ERROR(bis.IndexAsObject(i, &bi));
-    batch_inputs->emplace_back();
-    RETURN_IF_ERROR(batch_inputs->back().Init(bi));
-  }
-
-  return nullptr;  // success
-}
-
-TRITONSERVER_Error*
-BatchInput::Init(triton::common::TritonJson::Value& bi_config)
-{
-  {
-    triton::common::TritonJson::Value bi_target_names;
-    RETURN_IF_ERROR(bi_config.MemberAsArray("target_name", &bi_target_names));
-    for (size_t i = 0; i < bi_target_names.ArraySize(); ++i) {
-      std::string tn;
-      RETURN_IF_ERROR(bi_target_names.IndexAsString(i, &tn));
-      target_names_.emplace_back(std::move(tn));
-    }
-  }
-  {
-    RETURN_IF_ERROR(bi_config.MemberAsString("kind", &kind_str_));
-    if (kind_str_ == "BATCH_ELEMENT_COUNT") {
-      kind_ = Kind::BATCH_ELEMENT_COUNT;
-    } else if (kind_str_ == "BATCH_ACCUMULATED_ELEMENT_COUNT") {
-      kind_ = Kind::BATCH_ACCUMULATED_ELEMENT_COUNT;
-    } else if (kind_str_ == "BATCH_ACCUMULATED_ELEMENT_COUNT_WITH_ZERO") {
-      kind_ = Kind::BATCH_ACCUMULATED_ELEMENT_COUNT_WITH_ZERO;
-    } else if (kind_str_ == "BATCH_MAX_ELEMENT_COUNT_AS_SHAPE") {
-      kind_ = Kind::BATCH_MAX_ELEMENT_COUNT_AS_SHAPE;
-    } else if (kind_str_ == "BATCH_ITEM_SHAPE") {
-      kind_ = Kind::BATCH_ITEM_SHAPE;
-    } else if (kind_str_ == "BATCH_ITEM_SHAPE_FLATTEN") {
-      kind_ = Kind::BATCH_ITEM_SHAPE_FLATTEN;
-    } else {
-      RETURN_ERROR_IF_FALSE(
-          false, TRITONSERVER_ERROR_INVALID_ARG,
-          std::string("unexpected batch input kind '" + kind_str_ + "'"));
-    }
-  }
-  {
-    std::string bi_dtype;
-    RETURN_IF_ERROR(bi_config.MemberAsString("data_type", &bi_dtype));
-    data_type_ = ModelConfigDataTypeToTritonServerDataType(bi_dtype);
-    RETURN_ERROR_IF_TRUE(
-        data_type_ == TRITONSERVER_TYPE_INVALID, TRITONSERVER_ERROR_INVALID_ARG,
-        std::string("unexpected batch input data type '" + bi_dtype + "'"));
-  }
-  {
-    triton::common::TritonJson::Value bi_source_inputs;
-    RETURN_IF_ERROR(bi_config.MemberAsArray("source_input", &bi_source_inputs));
-    for (size_t i = 0; i < bi_source_inputs.ArraySize(); ++i) {
-      std::string si;
-      RETURN_IF_ERROR(bi_source_inputs.IndexAsString(i, &si));
-      source_inputs_.emplace_back(std::move(si));
-    }
-  }
-  return nullptr;  // success
-}
-
-TRITONSERVER_DataType
-ModelConfigDataTypeToTritonServerDataType(const std::string& data_type_str)
-{
-  // Must start with "TYPE_".
-  if (data_type_str.rfind("TYPE_", 0) != 0) {
-    return TRITONSERVER_TYPE_INVALID;
-  }
-
-  const std::string dtype = data_type_str.substr(strlen("TYPE_"));
-
-  if (dtype == "BOOL") {
-    return TRITONSERVER_TYPE_BOOL;
-  } else if (dtype == "UINT8") {
-    return TRITONSERVER_TYPE_UINT8;
-  } else if (dtype == "UINT16") {
-    return TRITONSERVER_TYPE_UINT16;
-  } else if (dtype == "UINT32") {
-    return TRITONSERVER_TYPE_UINT32;
-  } else if (dtype == "UINT64") {
-    return TRITONSERVER_TYPE_UINT64;
-  } else if (dtype == "INT8") {
-    return TRITONSERVER_TYPE_INT8;
-  } else if (dtype == "INT16") {
-    return TRITONSERVER_TYPE_INT16;
-  } else if (dtype == "INT32") {
-    return TRITONSERVER_TYPE_INT32;
-  } else if (dtype == "INT64") {
-    return TRITONSERVER_TYPE_INT64;
-  } else if (dtype == "FP16") {
-    return TRITONSERVER_TYPE_FP16;
-  } else if (dtype == "FP32") {
-    return TRITONSERVER_TYPE_FP32;
-  } else if (dtype == "FP64") {
-    return TRITONSERVER_TYPE_FP64;
-  } else if (dtype == "STRING") {
-    return TRITONSERVER_TYPE_BYTES;
-  } else if (dtype == "BF16") {
-    return TRITONSERVER_TYPE_BF16;
-  }
-
-  return TRITONSERVER_TYPE_INVALID;
-}
-
-TRITONSERVER_Error*
-BatchOutput::ParseFromModelConfig(
-    triton::common::TritonJson::Value& config,
-    std::vector<BatchOutput>* batch_outputs)
-{
-  batch_outputs->clear();
-  triton::common::TritonJson::Value bos;
-  RETURN_IF_ERROR(config.MemberAsArray("batch_output", &bos));
-  for (size_t i = 0; i < bos.ArraySize(); ++i) {
-    batch_outputs->emplace_back();
-    auto& batch_output = batch_outputs->back();
-    triton::common::TritonJson::Value bo;
-    RETURN_IF_ERROR(bos.IndexAsObject(i, &bo));
-    {
-      triton::common::TritonJson::Value bo_target_names;
-      RETURN_IF_ERROR(bo.MemberAsArray("target_name", &bo_target_names));
-      for (size_t i = 0; i < bo_target_names.ArraySize(); ++i) {
-        std::string tn;
-        RETURN_IF_ERROR(bo_target_names.IndexAsString(i, &tn));
-        batch_output.target_names_.emplace_back(std::move(tn));
-      }
-    }
-    {
-      std::string bo_kind;
-      RETURN_IF_ERROR(bo.MemberAsString("kind", &bo_kind));
-      if (bo_kind == "BATCH_SCATTER_WITH_INPUT_SHAPE") {
-        batch_output.kind_ = Kind::BATCH_SCATTER_WITH_INPUT_SHAPE;
-        // Keep track of the output info for later cross reference with input
-        int64_t mbs = 0;
-        RETURN_IF_ERROR(config.MemberAsInt("max_batch_size", &mbs));
-        if (mbs != 0) {
-          batch_output.shape_.push_back(-1);
-        }
-        triton::common::TritonJson::Value ios;
-        RETURN_IF_ERROR(config.MemberAsArray("output", &ios));
-        for (size_t i = 0; i < ios.ArraySize(); i++) {
-          triton::common::TritonJson::Value io;
-          RETURN_IF_ERROR(ios.IndexAsObject(i, &io));
-          std::string io_name;
-          RETURN_IF_ERROR(io.MemberAsString("name", &io_name));
-          if (io_name == batch_output.target_names_[0]) {
-            std::string io_dtype;
-            RETURN_IF_ERROR(io.MemberAsString("data_type", &io_dtype));
-            batch_output.data_type_ =
-                ModelConfigDataTypeToTritonServerDataType(io_dtype);
-            // If a reshape is provided for the input then use that when
-            // validating that the model matches what is expected.
-            triton::common::TritonJson::Value reshape;
-            if (io.Find("reshape", &reshape)) {
-              RETURN_IF_ERROR(
-                  ParseShape(reshape, "shape", &batch_output.shape_));
-            } else {
-              RETURN_IF_ERROR(ParseShape(io, "dims", &batch_output.shape_));
-            }
-            break;
-          }
-        }
-      } else {
-        RETURN_ERROR_IF_FALSE(
-            false, TRITONSERVER_ERROR_INVALID_ARG,
-            std::string("unexpected batch output kind '" + bo_kind + "'"));
-      }
-    }
-    {
-      triton::common::TritonJson::Value bo_source_inputs;
-      RETURN_IF_ERROR(bo.MemberAsArray("source_input", &bo_source_inputs));
-      for (size_t i = 0; i < bo_source_inputs.ArraySize(); ++i) {
-        std::string si;
-        RETURN_IF_ERROR(bo_source_inputs.IndexAsString(i, &si));
-        batch_output.source_inputs_.emplace_back(std::move(si));
-      }
-    }
-  }
-
-  return nullptr;  // success
-}
-
-TRITONSERVER_Error*
-TryParseModelStringParameter(
-    triton::common::TritonJson::Value& params, const std::string& mkey,
-    std::string* value, const std::string& default_value)
-{
-  triton::common::TritonJson::Value json_value;
-  if (params.Find(mkey.c_str(), &json_value)) {
-    RETURN_IF_ERROR(json_value.MemberAsString("string_value", value));
-  } else {
-    *value = default_value;
-  }
-
-  return nullptr;  // success
-}
-
-TRITONSERVER_Error*
-TryParseModelStringParameter(
-    triton::common::TritonJson::Value& params, const std::string& mkey,
-    int* value, const int& default_value)
-{
-  triton::common::TritonJson::Value json_value;
-  if (params.Find(mkey.c_str(), &json_value)) {
-    std::string string_value;
-    RETURN_IF_ERROR(json_value.MemberAsString("string_value", &string_value));
-    return ParseIntValue(string_value, value);
-  } else {
-    *value = default_value;
-    return nullptr;  // success
-  }
-}
-
-TRITONSERVER_Error*
-TryParseModelStringParameter(
-    triton::common::TritonJson::Value& params, const std::string& mkey,
-    bool* value, const bool& default_value)
-{
-  triton::common::TritonJson::Value json_value;
-  if (params.Find(mkey.c_str(), &json_value)) {
-    std::string string_value;
-    RETURN_IF_ERROR(json_value.MemberAsString("string_value", &string_value));
-    return ParseBoolValue(string_value, value);
-  } else {
-    *value = default_value;
-    return nullptr;  // success
-  }
-}
-
-TRITONSERVER_Error*
-TryParseModelStringParameter(
-    triton::common::TritonJson::Value& params, const std::string& mkey,
-    uint64_t* value, const uint64_t& default_value)
-{
-  triton::common::TritonJson::Value json_value;
-  if (params.Find(mkey.c_str(), &json_value)) {
-    std::string string_value;
-    RETURN_IF_ERROR(json_value.MemberAsString("string_value", &string_value));
-    return ParseUnsignedLongLongValue(string_value, value);
-  } else {
-    *value = default_value;
-    return nullptr;  // success
-  }
-}
-
-namespace {
-
-template <typename T>
-TRITONSERVER_Error*
-BufferAsTypedString(
-    std::string& str, const char* buffer, const size_t element_cnt)
-{
-  const T* vals = reinterpret_cast<const T*>(buffer);
-
-  str += "[ ";
-  for (size_t i = 0; i < element_cnt; ++i) {
-    const T& v = vals[i];
-    if (i != 0) {
-      str += ", ";
-    }
-    str += std::to_string(v);
-  }
-
-  str += " ]";
-
-  return nullptr;  // success
-}
-
-}  // namespace
-
-
-TRITONSERVER_Error*
-BufferAsTypedString(
-    std::string& str, const char* buffer, size_t buffer_byte_size,
-    TRITONSERVER_DataType datatype)
-{
-  const size_t element_cnt =
-      buffer_byte_size / TRITONSERVER_DataTypeByteSize(datatype);
-
-  switch (datatype) {
-    case TRITONSERVER_TYPE_UINT8:
-      return BufferAsTypedString<uint8_t>(str, buffer, element_cnt);
-    case TRITONSERVER_TYPE_UINT16:
-      return BufferAsTypedString<uint16_t>(str, buffer, element_cnt);
-    case TRITONSERVER_TYPE_UINT32:
-      return BufferAsTypedString<uint32_t>(str, buffer, element_cnt);
-    case TRITONSERVER_TYPE_UINT64:
-      return BufferAsTypedString<uint64_t>(str, buffer, element_cnt);
-
-    case TRITONSERVER_TYPE_INT8:
-      return BufferAsTypedString<int8_t>(str, buffer, element_cnt);
-    case TRITONSERVER_TYPE_INT16:
-      return BufferAsTypedString<int16_t>(str, buffer, element_cnt);
-    case TRITONSERVER_TYPE_INT32:
-      return BufferAsTypedString<int32_t>(str, buffer, element_cnt);
-    case TRITONSERVER_TYPE_INT64:
-      return BufferAsTypedString<int64_t>(str, buffer, element_cnt);
-
-    case TRITONSERVER_TYPE_FP32:
-      return BufferAsTypedString<float>(str, buffer, element_cnt);
-    case TRITONSERVER_TYPE_FP64:
-      return BufferAsTypedString<double>(str, buffer, element_cnt);
-
-    default:
-      return TRITONSERVER_ErrorNew(
-          TRITONSERVER_ERROR_INVALID_ARG,
-          std::string(
-              std::string("class result not available for output due to "
-                          "unsupported type '") +
-              std::string(TRITONSERVER_DataTypeString(datatype)) + "'")
-              .c_str());
-  }
-
-  return nullptr;  // success
-}
-
-std::string
-GetRequestId(TRITONBACKEND_Request* request)
-{
-  const char* request_id = nullptr;
-  LOG_IF_ERROR(
-      TRITONBACKEND_RequestId(request, &request_id),
-      "unable to retrieve request ID string");
-  if ((request_id == nullptr) || (request_id[0] == '\0')) {
-    request_id = "<id_unknown>";
-  }
-  return std::string("[request id: ") + request_id + "] ";
-}
-
-}}  // namespace triton::backend
diff --git a/3rdparty/backend-r22.12/src/backend_input_collector.cc b/3rdparty/backend-r22.12/src/backend_input_collector.cc
deleted file mode 100644
index a6f0cebd7921fcfbb5ce0e820c2711d587a059dd..0000000000000000000000000000000000000000
--- a/3rdparty/backend-r22.12/src/backend_input_collector.cc
+++ /dev/null
@@ -1,1310 +0,0 @@
-// Copyright 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "triton/backend/backend_input_collector.h"
-
-#include <atomic>
-#include "triton/backend/backend_common.h"
-#ifdef TRITON_ENABLE_GPU
-#include "kernel.h"
-#endif  // TRITON_ENABLE_GPU
-
-namespace triton { namespace backend {
-
-//
-// BackendInputCollector::InputIterator
-//
-
-BackendInputCollector::InputIterator::InputIterator(
-    TRITONBACKEND_Request** requests, const uint32_t request_count,
-    std::vector<TRITONBACKEND_Response*>* responses, const char* input_name,
-    const char* host_policy_name, const bool coalesce_request_input)
-    : requests_(requests), request_count_(request_count), responses_(responses),
-      input_name_(input_name), host_policy_(host_policy_name),
-      coalesce_request_input_(coalesce_request_input), curr_request_idx_(0),
-      curr_buffer_idx_(0), reach_end_(false)
-{
-  auto& response = (*responses_)[curr_request_idx_];
-  RESPOND_AND_SET_NULL_IF_ERROR(
-      &response, TRITONBACKEND_RequestInput(
-                     requests_[curr_request_idx_], input_name_, &curr_input_));
-  RESPOND_AND_SET_NULL_IF_ERROR(
-      &response, TRITONBACKEND_InputPropertiesForHostPolicy(
-                     curr_input_, host_policy_, nullptr, nullptr, nullptr,
-                     nullptr, nullptr, &curr_buffer_cnt_));
-}
-
-bool
-BackendInputCollector::InputIterator::GetNextContiguousInput(
-    ContiguousBuffer* input)
-{
-  if (reach_end_ || (curr_buffer_idx_ >= curr_buffer_cnt_)) {
-    return false;
-  }
-
-  // Get the first buffer
-  TRITONBACKEND_InputBufferForHostPolicy(
-      curr_input_, host_policy_, curr_buffer_idx_,
-      reinterpret_cast<const void**>(&input->memory_desc_.buffer_),
-      &input->memory_desc_.byte_size_, &input->memory_desc_.memory_type_,
-      &input->memory_desc_.memory_type_id_);
-  ++curr_buffer_idx_;
-  input->start_request_idx_ = curr_request_idx_;
-  input->end_request_idx_ = curr_request_idx_;
-  if (!coalesce_request_input_) {
-    if (curr_buffer_idx_ >= curr_buffer_cnt_) {
-      ++curr_request_idx_;
-      if (curr_request_idx_ < request_count_) {
-        auto& response = (*responses_)[curr_request_idx_];
-        RESPOND_AND_SET_NULL_IF_ERROR(
-            &response,
-            TRITONBACKEND_RequestInput(
-                requests_[curr_request_idx_], input_name_, &curr_input_));
-        RESPOND_AND_SET_NULL_IF_ERROR(
-            &response, TRITONBACKEND_InputPropertiesForHostPolicy(
-                           curr_input_, host_policy_, nullptr, nullptr, nullptr,
-                           nullptr, nullptr, &curr_buffer_cnt_));
-        // reset buffer idx
-        curr_buffer_idx_ = 0;
-      } else {
-        reach_end_ = true;
-      }
-    }
-    return true;
-  }
-
-  do {
-    for (; curr_buffer_idx_ < curr_buffer_cnt_; ++curr_buffer_idx_) {
-      const void* next_buffer;
-      size_t next_buffer_byte_size;
-      TRITONSERVER_MemoryType next_memory_type;
-      int64_t next_memory_type_id;
-      TRITONBACKEND_InputBufferForHostPolicy(
-          curr_input_, host_policy_, curr_buffer_idx_, &next_buffer,
-          &next_buffer_byte_size, &next_memory_type, &next_memory_type_id);
-      if (((input->memory_desc_.buffer_ + input->memory_desc_.byte_size_) !=
-           next_buffer) ||
-          (input->memory_desc_.memory_type_ != next_memory_type) ||
-          (input->memory_desc_.memory_type_id_ != next_memory_type_id)) {
-        return true;
-      }
-      input->memory_desc_.byte_size_ += next_buffer_byte_size;
-      input->end_request_idx_ = curr_request_idx_;
-    }
-    // Iterated all buffers for current request, check next
-    ++curr_request_idx_;
-    if (curr_request_idx_ < request_count_) {
-      auto& response = (*responses_)[curr_request_idx_];
-      RESPOND_AND_SET_NULL_IF_ERROR(
-          &response,
-          TRITONBACKEND_RequestInput(
-              requests_[curr_request_idx_], input_name_, &curr_input_));
-      RESPOND_AND_SET_NULL_IF_ERROR(
-          &response, TRITONBACKEND_InputPropertiesForHostPolicy(
-                         curr_input_, host_policy_, nullptr, nullptr, nullptr,
-                         nullptr, nullptr, &curr_buffer_cnt_));
-      // reset buffer idx
-      curr_buffer_idx_ = 0;
-    }
-  } while (curr_request_idx_ < request_count_);
-  reach_end_ = true;
-  return true;
-}
-
-//
-// BackendInputCollector
-//
-
-bool
-BackendInputCollector::GetInputBufferIfContiguous(
-    const char* input_name, const char** buffer, size_t* buffer_byte_size,
-    TRITONSERVER_MemoryType* memory_type, int64_t* memory_type_id)
-{
-  *buffer = nullptr;
-  *buffer_byte_size = 0;
-  const char* expected_next_buffer = nullptr;
-  bool contiguous = true;
-  for (size_t idx = 0; idx < request_count_; idx++) {
-    auto& request = requests_[idx];
-    auto& response = (*responses_)[idx];
-
-    TRITONBACKEND_Input* input;
-    RESPOND_AND_SET_NULL_IF_ERROR(
-        &response, TRITONBACKEND_RequestInput(request, input_name, &input));
-    uint64_t byte_size;
-    uint32_t buffer_count;
-    RESPOND_AND_SET_NULL_IF_ERROR(
-        &response, TRITONBACKEND_InputPropertiesForHostPolicy(
-                       input, host_policy_cstr_, nullptr, nullptr, nullptr,
-                       nullptr, &byte_size, &buffer_count));
-    for (size_t idx = 0; idx < buffer_count; ++idx) {
-      const void* src_buffer;
-      size_t src_byte_size;
-      TRITONSERVER_MemoryType src_memory_type;
-      int64_t src_memory_type_id;
-
-      RESPOND_AND_SET_NULL_IF_ERROR(
-          &response,
-          TRITONBACKEND_InputBufferForHostPolicy(
-              input, host_policy_cstr_, idx, &src_buffer, &src_byte_size,
-              &src_memory_type, &src_memory_type_id));
-      if (*buffer != nullptr) {
-        // If have seen the second buffer while coalescing input is not
-        // requested, treat the inputs are not contiguous
-        if (coalesce_request_input_ && (expected_next_buffer == src_buffer) &&
-            (*memory_type == src_memory_type) &&
-            (*memory_type_id == src_memory_type_id)) {
-          expected_next_buffer += src_byte_size;
-        } else {
-          contiguous = false;
-        }
-        // Want to know total buffer byte size even if it is not contiguous
-        *buffer_byte_size += src_byte_size;
-      } else {
-        *buffer = reinterpret_cast<const char*>(src_buffer);
-        *memory_type = src_memory_type;
-        *memory_type_id = src_memory_type_id;
-        *buffer_byte_size = src_byte_size;
-        expected_next_buffer = *buffer + src_byte_size;
-      }
-    }
-  }
-  return contiguous;
-}
-
-void
-BackendInputCollector::ProcessTensor(
-    const char* input_name, char* buffer, const size_t buffer_byte_size,
-    const TRITONSERVER_MemoryType memory_type, const int64_t memory_type_id)
-{
-  // A value of CPU_PINNED indicates that pinned memory buffer is not
-  // needed for this tensor. Any other value indicates that a pinned
-  // memory buffer is needed when the target memory type matches
-  // 'use_pinned_memory_type'.
-  TRITONSERVER_MemoryType use_pinned_memory_type =
-      TRITONSERVER_MEMORY_CPU_PINNED;
-  if (pinned_enabled_) {
-    use_pinned_memory_type = GetUsePinnedMemoryType(memory_type);
-  }
-  const bool use_kernel = (kernel_buffer_threshold_ != 0);
-
-  size_t buffer_offset = 0;
-
-  InputIterator ii(
-      requests_, request_count_, responses_, input_name, host_policy_cstr_,
-      coalesce_request_input_);
-  ContiguousBuffer input;
-  while (ii.GetNextContiguousInput(&input)) {
-    // If there are pending copies from tensor buffer that is not
-    // contiguous with 'response's part of that buffer, then need to
-    // go ahead and perform the pending copies so that can start a new
-    // contiguous region if necessary.
-    if ((pending_pinned_byte_size_ > 0) &&
-        (buffer_offset !=
-         (pending_pinned_byte_size_ + pending_pinned_offset_))) {
-      need_sync_ |= FlushPendingPinned(
-          buffer, buffer_byte_size, memory_type, memory_type_id);
-    }
-    if ((pending_copy_kernel_buffer_byte_size_ > 0) &&
-        (buffer_offset != (pending_copy_kernel_buffer_byte_size_ +
-                           pending_copy_kernel_buffer_offset_))) {
-      need_sync_ |= FlushPendingCopyKernel(
-          buffer, buffer_byte_size, memory_type, memory_type_id);
-    }
-
-    need_sync_ |= SetInputTensor(
-        input_name, input, buffer, buffer_byte_size, memory_type,
-        memory_type_id, buffer_offset, use_pinned_memory_type, use_kernel,
-        true);
-
-    buffer_offset += input.memory_desc_.byte_size_;
-  }
-
-  // Done with the tensor, flush any pending pinned copies.
-  need_sync_ |=
-      FlushPendingPinned(buffer, buffer_byte_size, memory_type, memory_type_id);
-  need_sync_ |= FlushPendingCopyKernel(
-      buffer, buffer_byte_size, memory_type, memory_type_id);
-#ifdef TRITON_ENABLE_GPU
-  if (need_sync_ && (event_ != nullptr)) {
-    cudaEventRecord(event_, stream_);
-  }
-#endif  // TRITON_ENABLE_GPU
-}
-
-TRITONSERVER_Error*
-BackendInputCollector::ProcessTensor(
-    const char* input_name, char* buffer, const size_t buffer_byte_size,
-    const std::vector<std::pair<TRITONSERVER_MemoryType, int64_t>>&
-        allowed_input_types,
-    const char** dst_buffer, size_t* dst_buffer_byte_size,
-    TRITONSERVER_MemoryType* dst_memory_type, int64_t* dst_memory_type_id)
-{
-  if (buffer == nullptr) {
-    if (allowed_input_types.size() == 0) {
-      return TRITONSERVER_ErrorNew(
-          TRITONSERVER_ERROR_INTERNAL,
-          "'allowed_input_types' must contain at least one pair of memory type "
-          "and id");
-    }
-    if (GetInputBufferIfContiguous(
-            input_name, dst_buffer, dst_buffer_byte_size, dst_memory_type,
-            dst_memory_type_id)) {
-      // zero size buffer will be treated as contiguous as well,
-      // but we want to invoke backend memory to have a valid address.
-      if (*dst_buffer_byte_size != 0) {
-        // If the buffer is contiguous, check if the caller expects its type
-        for (const auto& allowed_type : allowed_input_types) {
-          if ((*dst_memory_type == allowed_type.first) &&
-              ((*dst_memory_type_id == allowed_type.second))) {
-            return nullptr;  // success
-          }
-        }
-      }
-    }
-    // A separate buffer is needed
-    BackendMemory* backend_memory = nullptr;
-    for (const auto& allowed_type : allowed_input_types) {
-      std::vector<BackendMemory::AllocationType> alloc_types;
-      const int64_t memory_type_id = allowed_type.second;
-      switch (allowed_type.first) {
-        case TRITONSERVER_MEMORY_GPU:
-          alloc_types = {BackendMemory::AllocationType::GPU_POOL,
-                         BackendMemory::AllocationType::GPU};
-          break;
-        case TRITONSERVER_MEMORY_CPU_PINNED:
-          alloc_types = {BackendMemory::AllocationType::CPU_PINNED_POOL,
-                         BackendMemory::AllocationType::CPU_PINNED};
-          break;
-        case TRITONSERVER_MEMORY_CPU:
-          alloc_types = {BackendMemory::AllocationType::CPU};
-          break;
-      }
-      auto err = BackendMemory::Create(
-          memory_manager_, alloc_types, memory_type_id, *dst_buffer_byte_size,
-          &backend_memory);
-      if (err != nullptr) {
-        LOG_MESSAGE(
-            TRITONSERVER_LOG_VERBOSE,
-            (std::string("unable to create backend memory for type: ") +
-             TRITONSERVER_MemoryTypeString(allowed_type.first) +
-             " id: " + std::to_string(memory_type_id) + ": " +
-             TRITONSERVER_ErrorMessage(err))
-                .c_str());
-        TRITONSERVER_ErrorDelete(err);
-      } else {
-        in_use_memories_.emplace_back(backend_memory);
-        break;
-      }
-    }
-    if (backend_memory == nullptr) {
-      return TRITONSERVER_ErrorNew(
-          TRITONSERVER_ERROR_INTERNAL,
-          (std::string("failed to allocate contiguous buffer for input '") +
-           input_name + "'")
-              .c_str());
-    }
-    buffer = backend_memory->MemoryPtr();
-    *dst_buffer = backend_memory->MemoryPtr();
-    *dst_buffer_byte_size = backend_memory->ByteSize();
-    *dst_memory_type = backend_memory->MemoryType();
-    *dst_memory_type_id = backend_memory->MemoryTypeId();
-  } else {
-    if (allowed_input_types.size() != 1) {
-      return TRITONSERVER_ErrorNew(
-          TRITONSERVER_ERROR_INTERNAL,
-          "'allowed_input_types' must only contain the memory type and id of "
-          "'buffer'");
-    }
-    *dst_buffer = buffer;
-    *dst_buffer_byte_size = buffer_byte_size;
-    *dst_memory_type = allowed_input_types[0].first;
-    *dst_memory_type_id = allowed_input_types[0].second;
-  }
-  if (*dst_buffer_byte_size != 0) {
-    ProcessTensor(
-        input_name, buffer, *dst_buffer_byte_size, *dst_memory_type,
-        *dst_memory_type_id);
-  }
-  return nullptr;  // success
-}
-
-bool
-BackendInputCollector::Finalize()
-{
-#ifdef TRITON_ENABLE_GPU
-  if ((!deferred_pinned_.empty()) && need_sync_) {
-    if (event_ != nullptr) {
-      cudaEventSynchronize(event_);
-    } else {
-      cudaStreamSynchronize(stream_);
-    }
-    need_sync_ = false;
-  }
-#endif  // TRITON_ENABLE_GPU
-
-  // After the above sync all the GPU->pinned copies are complete. Any
-  // deferred copies of pinned->CPU can now be done.
-#ifdef TRITON_ENABLE_GPU
-  if (buffer_ready_event_ != nullptr) {
-    cudaEventSynchronize(buffer_ready_event_);
-    buffer_ready_event_ = nullptr;
-  }
-#endif  // TRITON_ENABLE_GPU
-  for (auto& def : deferred_pinned_) {
-    if (!def.finalized_) {
-      need_sync_ |= def.Finalize(stream_);
-    }
-  }
-  for (size_t i = 0; i < async_task_count_; i++) {
-    need_sync_ |= completion_queue_.Get();
-  }
-
-#ifdef TRITON_ENABLE_GPU
-  // Record the new event location if deferred copies occur
-  if ((!deferred_pinned_.empty()) && need_sync_ && (event_ != nullptr)) {
-    cudaEventRecord(event_, stream_);
-  }
-#endif  // TRITON_ENABLE_GPU
-
-  return need_sync_;
-}
-
-bool
-BackendInputCollector::DeferredPinned::Finalize(cudaStream_t stream)
-{
-  bool cuda_used = false;
-  auto err = CopyBuffer(
-      "pinned buffer", TRITONSERVER_MEMORY_CPU_PINNED, 0, tensor_memory_type_,
-      tensor_memory_id_, pinned_memory_size_, pinned_memory_,
-      tensor_buffer_ + tensor_buffer_offset_, stream, &cuda_used);
-
-  // If something goes wrong with the copy all the pending
-  // responses fail...
-  if (err != nullptr) {
-    for (auto& pr : requests_) {
-      for (size_t idx = pr.start_request_idx_; idx <= pr.end_request_idx_;
-           ++idx) {
-        if ((*responses_)[idx] != nullptr) {
-          LOG_IF_ERROR(
-              TRITONBACKEND_ResponseSend(
-                  (*responses_)[idx], TRITONSERVER_RESPONSE_COMPLETE_FINAL,
-                  err),
-              "failed to send error response");
-          (*responses_)[idx] = nullptr;
-        }
-      }
-    }
-    TRITONSERVER_ErrorDelete(err);
-  }
-  return cuda_used;
-}
-
-bool
-BackendInputCollector::SetInputTensor(
-    const char* input_name, const ContiguousBuffer& input, char* tensor_buffer,
-    const size_t tensor_buffer_byte_size,
-    const TRITONSERVER_MemoryType tensor_memory_type,
-    const int64_t tensor_memory_type_id, const size_t tensor_buffer_offset,
-    const TRITONSERVER_MemoryType use_pinned_memory_type, const bool use_kernel,
-    const bool wait_buffer)
-{
-  bool cuda_copy = false;
-
-  if ((tensor_buffer_offset + input.memory_desc_.byte_size_) >
-      tensor_buffer_byte_size) {
-    for (size_t i = input.start_request_idx_; i <= input.end_request_idx_;
-         ++i) {
-      RESPOND_AND_SET_NULL_IF_ERROR(
-          &(*responses_)[i],
-          TRITONSERVER_ErrorNew(
-              TRITONSERVER_ERROR_INVALID_ARG,
-              std::string(
-                  "unexpected total byte size " +
-                  std::to_string(
-                      tensor_buffer_offset + input.memory_desc_.byte_size_) +
-                  " for input '" + input_name + "', expecting " +
-                  std::to_string(tensor_buffer_byte_size))
-                  .c_str()));
-    }
-    return cuda_copy;
-  }
-
-  // If the request buffer matches the memory type that should use an
-  // intermediate pinned memory buffer for the transfer, then just
-  // record the input as pending and increase the size required for
-  // the intermediate pinned buffer. We only do this check for the
-  // first buffer of an input and apply the same policy for all
-  // buffers. So if an inputs data is split over different memory
-  // types this may not be ideal but that should be a very rare
-  // situation.
-  if ((use_pinned_memory_type != TRITONSERVER_MEMORY_CPU_PINNED) &&
-      (input.memory_desc_.memory_type_ == use_pinned_memory_type)) {
-    if (pending_pinned_byte_size_ == 0) {
-      pending_pinned_offset_ = tensor_buffer_offset;
-    }
-
-    pending_pinned_byte_size_ += input.memory_desc_.byte_size_;
-    pending_pinned_input_buffers_.push_back(input);
-    return cuda_copy;
-  }
-  // [FIXME] support other direction if prove to be faster, all kernel
-  // handling code in this class asssumes the destination buffer is on device
-  // If the request buffer and the destination buffer are accessible by all
-  // GPUs (i.e. pinned, device), initiate the copy via copy CUDA kernel.
-  // We only do this check for the
-  // first buffer of an input and apply the same policy for all
-  // buffers. So if an inputs data is split over different memory
-  // types this may not be ideal but that should be a very rare
-  // situation.
-  // Currently checked direction:
-  // pinned -> device
-  // same device -> device
-  // different device -> device
-  if (use_kernel &&
-      (input.memory_desc_.memory_type_ != TRITONSERVER_MEMORY_CPU) &&
-      (tensor_memory_type == TRITONSERVER_MEMORY_GPU)) {
-    // [FIXME] Currently not allowing copy between devices as it requires
-    // peer-to-peer access to be enabled. Peer-to-peer is enabled by default,
-    // but server can still runs even if it fails to enable peer-to-peer.
-    // Should provide a utility to check whether a device pair allows direct
-    // access and use gather kernel accordingly
-    if ((input.memory_desc_.memory_type_ != TRITONSERVER_MEMORY_GPU) ||
-        (input.memory_desc_.memory_type_id_ == tensor_memory_type_id)) {
-      if (pending_copy_kernel_buffer_byte_size_ == 0) {
-        pending_copy_kernel_buffer_offset_ = tensor_buffer_offset;
-      }
-
-      pending_copy_kernel_buffer_byte_size_ += input.memory_desc_.byte_size_;
-      ++pending_copy_kernel_input_buffer_counts_;
-      pending_copy_kernel_input_buffers_.push_back(input);
-      return cuda_copy;
-    }
-  }
-
-#ifdef TRITON_ENABLE_GPU
-  if (wait_buffer && (buffer_ready_event_ != nullptr)) {
-    cudaEventSynchronize(buffer_ready_event_);
-    buffer_ready_event_ = nullptr;
-  }
-#endif  // TRITON_ENABLE_GPU
-
-  // Direct copy without intermediate pinned memory.
-  bool cuda_used = false;
-  auto err = CopyBuffer(
-      input_name, input.memory_desc_.memory_type_,
-      input.memory_desc_.memory_type_id_, tensor_memory_type,
-      tensor_memory_type_id, input.memory_desc_.byte_size_,
-      input.memory_desc_.buffer_, tensor_buffer + tensor_buffer_offset, stream_,
-      &cuda_used, copy_on_stream_);
-  if (err != nullptr) {
-    for (size_t i = input.start_request_idx_; i <= input.end_request_idx_;
-         ++i) {
-      RESPOND_AND_SET_NULL_IF_ERROR(
-          &(*responses_)[i],
-          TRITONSERVER_ErrorNew(
-              TRITONSERVER_ErrorCode(err), TRITONSERVER_ErrorMessage(err)));
-    }
-    TRITONSERVER_ErrorDelete(err);
-  }
-  cuda_copy |= cuda_used;
-  return cuda_copy;
-}
-
-bool
-BackendInputCollector::FlushPendingPinned(
-    char* tensor_buffer, const size_t tensor_buffer_byte_size,
-    const TRITONSERVER_MemoryType tensor_memory_type,
-    const int64_t tensor_memory_type_id)
-{
-  bool cuda_copy = false;
-
-  // Will be copying from CPU->pinned->GPU or GPU->pinned->CPU
-
-  // Attempt to allocate a pinned buffer to use for staging the
-  // copy... if we fail to allocated the pinned buffer then we just
-  // directly go CPU->GPU or GPU->CPU.
-  char* pinned_memory = nullptr;
-  int64_t pinned_memory_type_id = 0;
-  TRITONSERVER_MemoryType pinned_memory_type;
-  BackendMemory* backend_memory;
-  if (pending_pinned_byte_size_ > 0) {
-    TRITONSERVER_Error* err = BackendMemory::Create(
-        memory_manager_,
-        {BackendMemory::AllocationType::CPU_PINNED_POOL,
-         BackendMemory::AllocationType::CPU_PINNED},
-        0 /* memory_type_id */, pending_pinned_byte_size_, &backend_memory);
-    if (err != nullptr) {
-      TRITONSERVER_ErrorDelete(err);
-    } else {
-      pinned_memory = backend_memory->MemoryPtr();
-      pinned_memory_type = backend_memory->MemoryType();
-      pinned_memory_type_id = backend_memory->MemoryTypeId();
-    }
-  }
-
-  // If the pinned buffer wasn't actually allocated then just perform
-  // a direct copy.
-  if (pinned_memory == nullptr) {
-    size_t offset = 0;
-    for (auto& pr : pending_pinned_input_buffers_) {
-      cuda_copy |= SetInputTensor(
-          "pinned fallback", pr, tensor_buffer, tensor_buffer_byte_size,
-          tensor_memory_type, tensor_memory_type_id,
-          pending_pinned_offset_ + offset, TRITONSERVER_MEMORY_CPU_PINNED,
-          false, true);
-      offset += pr.memory_desc_.byte_size_;
-    }
-  }
-  // We have a pinned buffer so copy the pending input buffer(s) into
-  // the pinned memory.
-  else {  // pinned_memory_type == TRITONSERVER_MEMORY_CPU_PINNED
-    bool cuda_used = false;
-    size_t offset = 0;
-    if (!use_async_cpu_copy_) {
-      for (auto& pr : pending_pinned_input_buffers_) {
-        cuda_used |= SetInputTensor(
-            "pinned H2H", pr, pinned_memory, pending_pinned_byte_size_,
-            TRITONSERVER_MEMORY_CPU_PINNED, 0 /* memory_type_id */, offset,
-            TRITONSERVER_MEMORY_CPU_PINNED, false, true);
-        offset += pr.memory_desc_.byte_size_;
-      }
-
-      cuda_copy |= cuda_used;
-
-      // If the copy was not async (i.e. if request input was in CPU so
-      // a CPU->CPU-PINNED copy was performed above), then the pinned
-      // buffer now holds the tensor contents and we can immediately
-      // issue the copies from the pinned buffer to the tensor.
-      //
-      // Otherwise the GPU->CPU-PINNED async copies are in flight and we
-      // simply remember the pinned buffer and the corresponding
-      // request inputs so that we can do the pinned->CPU copies in
-      // finalize after we have waited for all async copies to complete.
-      if (!cuda_used) {
-#ifdef TRITON_ENABLE_GPU
-        if (buffer_ready_event_ != nullptr) {
-          cudaEventSynchronize(buffer_ready_event_);
-          buffer_ready_event_ = nullptr;
-        }
-#endif  // TRITON_ENABLE_GPU
-        auto err = CopyBuffer(
-            "pinned input buffer H2D", TRITONSERVER_MEMORY_CPU_PINNED,
-            0 /* memory_type_id */, tensor_memory_type, tensor_memory_type_id,
-            pending_pinned_byte_size_, pinned_memory,
-            tensor_buffer + pending_pinned_offset_, stream_, &cuda_used,
-            copy_on_stream_);
-        cuda_copy |= cuda_used;
-
-        // If something goes wrong with the copy all the pending
-        // responses fail...
-        if (err != nullptr) {
-          for (auto& pr : pending_pinned_input_buffers_) {
-            for (size_t idx = pr.start_request_idx_; idx <= pr.end_request_idx_;
-                 ++idx) {
-              if ((*responses_)[idx] != nullptr) {
-                LOG_IF_ERROR(
-                    TRITONBACKEND_ResponseSend(
-                        (*responses_)[idx],
-                        TRITONSERVER_RESPONSE_COMPLETE_FINAL, err),
-                    "failed to send error response");
-                (*responses_)[idx] = nullptr;
-              }
-            }
-          }
-          TRITONSERVER_ErrorDelete(err);
-        }
-      } else {  // cuda_used
-        deferred_pinned_.emplace_back(
-            pinned_memory, pending_pinned_byte_size_, tensor_buffer,
-            pending_pinned_offset_, tensor_memory_type, tensor_memory_type_id,
-            std::move(pending_pinned_input_buffers_), responses_);
-      }
-    } else {
-      async_task_count_++;
-      deferred_pinned_.emplace_back(
-          pinned_memory, pending_pinned_byte_size_, tensor_buffer,
-          pending_pinned_offset_, tensor_memory_type, tensor_memory_type_id,
-          std::move(pending_pinned_input_buffers_), responses_);
-      auto& deferred_pinned = deferred_pinned_.back();
-      // Mark finalized to avoid duplicated call to DeferredPinned::Finalized()
-      // in BackendInputCollector::Finalize()
-      deferred_pinned_.back().finalized_ = true;
-      auto incomplete_count = new std::atomic<size_t>(std::min(
-          deferred_pinned_.back().requests_.size(),
-          triton::common::AsyncWorkQueue::WorkerCount()));
-      auto pending_pinned_byte_size = pending_pinned_byte_size_;
-      size_t stride = (deferred_pinned_.back().requests_.size() +
-                       triton::common::AsyncWorkQueue::WorkerCount() - 1) /
-                      triton::common::AsyncWorkQueue::WorkerCount();
-      auto pending_it = deferred_pinned_.back().requests_.begin();
-      while (pending_it != deferred_pinned_.back().requests_.end()) {
-        auto end_it = pending_it;
-        auto next_offset = offset;
-        for (size_t idx = 0; idx < stride; idx++) {
-          next_offset += end_it->memory_desc_.byte_size_;
-          end_it++;
-          if (end_it == deferred_pinned_.back().requests_.end()) {
-            break;
-          }
-        }
-
-        auto err =
-            CommonErrorToTritonError(triton::common::AsyncWorkQueue::AddTask(
-                [this, offset, pinned_memory, pinned_memory_type,
-                 pending_pinned_byte_size, pinned_memory_type_id, pending_it,
-                 end_it, incomplete_count, &deferred_pinned]() mutable {
-                  for (; pending_it != end_it; pending_it++) {
-                    SetInputTensor(
-                        "pinned async H2H", *pending_it, pinned_memory,
-                        pending_pinned_byte_size, pinned_memory_type,
-                        pinned_memory_type_id, offset,
-                        TRITONSERVER_MEMORY_CPU_PINNED, false, false);
-                    offset += pending_it->memory_desc_.byte_size_;
-                  }
-                  // The last segmented task will start the next phase of
-                  // the internal pinned buffer copy
-                  if (incomplete_count->fetch_sub(1) == 1) {
-#ifdef TRITON_ENABLE_GPU
-                    if (buffer_ready_event_ != nullptr) {
-                      cudaEventSynchronize(buffer_ready_event_);
-                      buffer_ready_event_ = nullptr;
-                    }
-#endif  // TRITON_ENABLE_GPU
-                    completion_queue_.Put(deferred_pinned.Finalize(stream_));
-                    delete incomplete_count;
-                  }
-                }));
-        if (err != nullptr) {
-          for (; pending_it != end_it; pending_it++) {
-            for (size_t idx = pending_it->start_request_idx_;
-                 idx <= pending_it->end_request_idx_; ++idx) {
-              if ((*responses_)[idx] != nullptr) {
-                LOG_IF_ERROR(
-                    TRITONBACKEND_ResponseSend(
-                        (*responses_)[idx],
-                        TRITONSERVER_RESPONSE_COMPLETE_FINAL, err),
-                    "failed to send error response");
-                (*responses_)[idx] = nullptr;
-              }
-            }
-          }
-        }
-        TRITONSERVER_ErrorDelete(err);
-
-        offset = next_offset;
-        pending_it = end_it;
-      }
-    }
-  }
-
-  // Pending pinned copies are handled...
-  pending_pinned_byte_size_ = 0;
-  pending_pinned_offset_ = 0;
-  pending_pinned_input_buffers_.clear();
-
-  // Need to hold on to the allocated pinned buffer as there are still
-  // copies in flight. Will delete it in finalize.
-  if (pinned_memory != nullptr) {
-    in_use_memories_.emplace_back(backend_memory);
-  }
-
-  return cuda_copy;
-}
-
-TRITONSERVER_Error*
-BackendInputCollector::BatchInputShape(
-    const BatchInput& batch_input, std::vector<int64_t>* shape)
-{
-  *shape = std::vector<int64_t>{0};
-  switch (batch_input.BatchInputKind()) {
-    case BatchInput::Kind::BATCH_ELEMENT_COUNT:
-    case BatchInput::Kind::BATCH_ACCUMULATED_ELEMENT_COUNT: {
-      (*shape)[0] = request_count_;
-      break;
-    }
-    case BatchInput::Kind::BATCH_ACCUMULATED_ELEMENT_COUNT_WITH_ZERO: {
-      (*shape)[0] = request_count_ + 1;
-      break;
-    }
-    case BatchInput::Kind::BATCH_MAX_ELEMENT_COUNT_AS_SHAPE: {
-      const auto& source_input = batch_input.SourceInputs()[0];
-      for (size_t req_idx = 0; req_idx < request_count_; req_idx++) {
-        TRITONBACKEND_Input* input;
-        RETURN_IF_ERROR(TRITONBACKEND_RequestInput(
-            requests_[req_idx], source_input.c_str(), &input));
-        const int64_t* shape_arr;
-        uint32_t dims_count;
-        RETURN_IF_ERROR(TRITONBACKEND_InputPropertiesForHostPolicy(
-            input, host_policy_cstr_, nullptr, nullptr, &shape_arr, &dims_count,
-            nullptr, nullptr));
-        (*shape)[0] =
-            std::max((*shape)[0], GetElementCount(shape_arr, dims_count));
-      }
-      break;
-    }
-    case BatchInput::Kind::BATCH_ITEM_SHAPE: {
-      shape->emplace_back(0);
-      const auto& source_input = batch_input.SourceInputs()[0];
-      for (size_t req_idx = 0; req_idx < request_count_; req_idx++) {
-        TRITONBACKEND_Input* input;
-        RETURN_IF_ERROR(TRITONBACKEND_RequestInput(
-            requests_[req_idx], source_input.c_str(), &input));
-        const int64_t* shape_arr;
-        uint32_t dims_count;
-        RETURN_IF_ERROR(TRITONBACKEND_InputPropertiesForHostPolicy(
-            input, host_policy_cstr_, nullptr, nullptr, &shape_arr, &dims_count,
-            nullptr, nullptr));
-        // Assuming first dimension is batch size and ragged input is only set
-        // for batching enabled model.
-        (*shape)[0] += shape_arr[0];
-        // The batch input tracks the shape without batch dimension for
-        // each batch item
-        (*shape)[1] = (dims_count - 1);
-      }
-      break;
-    }
-    case BatchInput::Kind::BATCH_ITEM_SHAPE_FLATTEN: {
-      const auto& source_input = batch_input.SourceInputs()[0];
-      for (size_t req_idx = 0; req_idx < request_count_; req_idx++) {
-        TRITONBACKEND_Input* input;
-        RETURN_IF_ERROR(TRITONBACKEND_RequestInput(
-            requests_[req_idx], source_input.c_str(), &input));
-        const int64_t* shape_arr;
-        uint32_t dims_count;
-        RETURN_IF_ERROR(TRITONBACKEND_InputPropertiesForHostPolicy(
-            input, host_policy_cstr_, nullptr, nullptr, &shape_arr, &dims_count,
-            nullptr, nullptr));
-        // Assuming first dimension is batch size and ragged input is only set
-        // for batching enabled model.
-        // The batch input tracks the shape without batch dimension for
-        // each batch item
-        (*shape)[0] += (shape_arr[0] * (dims_count - 1));
-      }
-      break;
-    }
-    default:
-      return TRITONSERVER_ErrorNew(
-          TRITONSERVER_ERROR_INTERNAL, "unsupported BatchInputKind received");
-  }
-  return nullptr;  // success
-}
-
-TRITONSERVER_Error*
-BackendInputCollector::ProcessBatchInput(
-    const BatchInput& batch_input, char* buffer, const size_t buffer_byte_size,
-    const std::vector<std::pair<TRITONSERVER_MemoryType, int64_t>>&
-        allowed_input_types,
-    const char** dst_buffer, size_t* dst_buffer_byte_size,
-    TRITONSERVER_MemoryType* dst_memory_type, int64_t* dst_memory_type_id)
-{
-#ifdef TRITON_ENABLE_GPU
-  if (buffer_ready_event_ != nullptr) {
-    cudaEventSynchronize(buffer_ready_event_);
-    buffer_ready_event_ = nullptr;
-  }
-#endif  // TRITON_ENABLE_GPU
-  if (buffer == nullptr) {
-    if (allowed_input_types.size() == 0) {
-      return TRITONSERVER_ErrorNew(
-          TRITONSERVER_ERROR_INTERNAL,
-          "'allowed_input_types' must contain at least one pair of memory type "
-          "and id");
-    }
-    // Calculate the byte size of the buffer
-    std::vector<int64_t> shape;
-    RETURN_IF_ERROR(BatchInputShape(batch_input, &shape));
-    *dst_buffer_byte_size = GetByteSize(batch_input.DataType(), shape);
-    BackendMemory* backend_memory = nullptr;
-    for (const auto& allowed_type : allowed_input_types) {
-      std::vector<BackendMemory::AllocationType> alloc_types;
-      const int64_t memory_type_id = allowed_type.second;
-      switch (allowed_type.first) {
-        case TRITONSERVER_MEMORY_GPU:
-          alloc_types = {BackendMemory::AllocationType::GPU_POOL,
-                         BackendMemory::AllocationType::GPU};
-          break;
-        case TRITONSERVER_MEMORY_CPU_PINNED:
-          alloc_types = {BackendMemory::AllocationType::CPU_PINNED_POOL,
-                         BackendMemory::AllocationType::CPU_PINNED};
-          break;
-        case TRITONSERVER_MEMORY_CPU:
-          alloc_types = {BackendMemory::AllocationType::CPU};
-          break;
-      }
-      auto err = BackendMemory::Create(
-          memory_manager_, alloc_types, memory_type_id, *dst_buffer_byte_size,
-          &backend_memory);
-      if (err != nullptr) {
-        LOG_MESSAGE(
-            TRITONSERVER_LOG_VERBOSE,
-            (std::string("unable to create backend memory for type: ") +
-             TRITONSERVER_MemoryTypeString(allowed_type.first) +
-             " id: " + std::to_string(memory_type_id) + ": " +
-             TRITONSERVER_ErrorMessage(err))
-                .c_str());
-        TRITONSERVER_ErrorDelete(err);
-      } else {
-        in_use_memories_.emplace_back(backend_memory);
-        break;
-      }
-    }
-    if (backend_memory == nullptr) {
-      return TRITONSERVER_ErrorNew(
-          TRITONSERVER_ERROR_INTERNAL,
-          (std::string(
-               "failed to allocate contiguous buffer for batch input '") +
-           batch_input.TargetNames()[0] + "'")
-              .c_str());
-    }
-    buffer = backend_memory->MemoryPtr();
-    *dst_buffer = backend_memory->MemoryPtr();
-    *dst_buffer_byte_size = backend_memory->ByteSize();
-    *dst_memory_type = backend_memory->MemoryType();
-    *dst_memory_type_id = backend_memory->MemoryTypeId();
-  } else {
-    if (allowed_input_types.size() != 1) {
-      return TRITONSERVER_ErrorNew(
-          TRITONSERVER_ERROR_INTERNAL,
-          "'allowed_input_types' must only contain the memory type and id of "
-          "'buffer'");
-    }
-    *dst_buffer = buffer;
-    *dst_buffer_byte_size = buffer_byte_size;
-    *dst_memory_type = allowed_input_types[0].first;
-    *dst_memory_type_id = allowed_input_types[0].second;
-  }
-
-  char* input_buffer = buffer;
-  std::unique_ptr<BackendMemory> internal_buffer;
-  // Need a CPU buffer for modifying the value
-  if (*dst_memory_type == TRITONSERVER_MEMORY_GPU) {
-    BackendMemory* ib = nullptr;
-    RETURN_IF_ERROR(BackendMemory::Create(
-        memory_manager_,
-        {BackendMemory::AllocationType::CPU_PINNED_POOL,
-         BackendMemory::AllocationType::CPU},
-        0, *dst_buffer_byte_size, &ib));
-    internal_buffer.reset(ib);
-    input_buffer = internal_buffer->MemoryPtr();
-  }
-  const auto& data_type = batch_input.DataType();
-  switch (batch_input.BatchInputKind()) {
-    case BatchInput::Kind::BATCH_ELEMENT_COUNT: {
-      const auto& source_input = batch_input.SourceInputs()[0];
-      if (data_type == TRITONSERVER_TYPE_FP32) {
-        RETURN_IF_ERROR(SetElementCount<float>(
-            source_input, input_buffer, *dst_buffer_byte_size));
-      } else {
-        RETURN_IF_ERROR(SetElementCount<int32_t>(
-            source_input, input_buffer, *dst_buffer_byte_size));
-      }
-      break;
-    }
-    case BatchInput::Kind::BATCH_ACCUMULATED_ELEMENT_COUNT: {
-      const auto& source_input = batch_input.SourceInputs()[0];
-      if (data_type == TRITONSERVER_TYPE_FP32) {
-        RETURN_IF_ERROR(SetAccumulatedElementCount<float>(
-            source_input, input_buffer, *dst_buffer_byte_size));
-      } else {
-        RETURN_IF_ERROR(SetAccumulatedElementCount<int32_t>(
-            source_input, input_buffer, *dst_buffer_byte_size));
-      }
-      break;
-    }
-    case BatchInput::Kind::BATCH_ACCUMULATED_ELEMENT_COUNT_WITH_ZERO: {
-      const auto& source_input = batch_input.SourceInputs()[0];
-      if (data_type == TRITONSERVER_TYPE_FP32) {
-        *reinterpret_cast<float*>(input_buffer) = 0;
-        RETURN_IF_ERROR(SetAccumulatedElementCount<float>(
-            source_input, input_buffer + sizeof(float),
-            *dst_buffer_byte_size - sizeof(float)));
-      } else {
-        *reinterpret_cast<int32_t*>(input_buffer) = 0;
-        RETURN_IF_ERROR(SetAccumulatedElementCount<int32_t>(
-            source_input, input_buffer + sizeof(int32_t),
-            *dst_buffer_byte_size - sizeof(int32_t)));
-      }
-      break;
-    }
-    case BatchInput::Kind::BATCH_MAX_ELEMENT_COUNT_AS_SHAPE: {
-      // The batch input is described by the shape,
-      // no data modification is needed
-      return nullptr;  // success
-    }
-    case BatchInput::Kind::BATCH_ITEM_SHAPE:
-    case BatchInput::Kind::BATCH_ITEM_SHAPE_FLATTEN: {
-      // Use the same utilities for both types as the data will be the same,
-      // only difference is the shape of the tensor.
-      const auto& source_input = batch_input.SourceInputs()[0];
-      if (data_type == TRITONSERVER_TYPE_FP32) {
-        *reinterpret_cast<float*>(input_buffer) = 0;
-        RETURN_IF_ERROR(SetBatchItemShape<float>(
-            source_input, input_buffer, *dst_buffer_byte_size));
-      } else {
-        *reinterpret_cast<int32_t*>(input_buffer) = 0;
-        RETURN_IF_ERROR(SetBatchItemShape<int32_t>(
-            source_input, input_buffer, *dst_buffer_byte_size));
-      }
-      break;
-    }
-  }
-  if (*dst_memory_type == TRITONSERVER_MEMORY_GPU) {
-    bool cuda_used;
-    RETURN_IF_ERROR(CopyBuffer(
-        "batch input buffer", internal_buffer->MemoryType(),
-        internal_buffer->MemoryTypeId(), *dst_memory_type, *dst_memory_type_id,
-        *dst_buffer_byte_size, input_buffer, buffer, stream_, &cuda_used,
-        copy_on_stream_));
-    // Need to keep the backend memory alive in the case of async copy
-    in_use_memories_.emplace_back(std::move(internal_buffer));
-    need_sync_ |= cuda_used;
-  }
-  return nullptr;  // success
-}
-
-template <typename T>
-TRITONSERVER_Error*
-BackendInputCollector::SetElementCount(
-    const std::string& source_input, char* buffer,
-    const size_t buffer_byte_size)
-{
-  size_t buffer_offset = 0;
-  for (size_t req_idx = 0; req_idx < request_count_; req_idx++) {
-    if (buffer_offset + sizeof(T) > buffer_byte_size) {
-      return TRITONSERVER_ErrorNew(
-          TRITONSERVER_ERROR_INVALID_ARG,
-          "unexpected total byte size for batch input");
-    }
-
-    TRITONBACKEND_Input* input;
-    RETURN_IF_ERROR(TRITONBACKEND_RequestInput(
-        requests_[req_idx], source_input.c_str(), &input));
-    const int64_t* shape;
-    uint32_t dims_count;
-    RETURN_IF_ERROR(TRITONBACKEND_InputPropertiesForHostPolicy(
-        input, host_policy_cstr_, nullptr, nullptr, &shape, &dims_count,
-        nullptr, nullptr));
-    *(reinterpret_cast<T*>(buffer) + req_idx) =
-        GetElementCount(shape, dims_count);
-    buffer_offset += sizeof(T);
-  }
-  // Set the rest of the buffer to 0
-  for (; buffer_offset + sizeof(T) <= buffer_byte_size;
-       buffer_offset += sizeof(T)) {
-    *reinterpret_cast<T*>(buffer + buffer_offset) = 0;
-  }
-  return nullptr;  // success
-}
-
-template <typename T>
-TRITONSERVER_Error*
-BackendInputCollector::SetAccumulatedElementCount(
-    const std::string& source_input, char* buffer,
-    const size_t buffer_byte_size)
-{
-  size_t accumulated_element_count = 0;
-  size_t buffer_offset = 0;
-  for (size_t req_idx = 0; req_idx < request_count_; req_idx++) {
-    if (buffer_offset + sizeof(T) > buffer_byte_size) {
-      return TRITONSERVER_ErrorNew(
-          TRITONSERVER_ERROR_INVALID_ARG,
-          "unexpected total byte size for batch input");
-    }
-
-    TRITONBACKEND_Input* input;
-    RETURN_IF_ERROR(TRITONBACKEND_RequestInput(
-        requests_[req_idx], source_input.c_str(), &input));
-    const int64_t* shape;
-    uint32_t dims_count;
-    RETURN_IF_ERROR(TRITONBACKEND_InputPropertiesForHostPolicy(
-        input, host_policy_cstr_, nullptr, nullptr, &shape, &dims_count,
-        nullptr, nullptr));
-    accumulated_element_count += GetElementCount(shape, dims_count);
-    *(reinterpret_cast<T*>(buffer) + req_idx) = accumulated_element_count;
-    buffer_offset += sizeof(T);
-  }
-  // Set the rest of the buffer to 'accumulated_element_count'
-  // (no increase in element count)
-  for (; buffer_offset + sizeof(T) <= buffer_byte_size;
-       buffer_offset += sizeof(T)) {
-    *reinterpret_cast<T*>(buffer + buffer_offset) = accumulated_element_count;
-  }
-  return nullptr;  // success
-}
-
-template <typename T>
-TRITONSERVER_Error*
-BackendInputCollector::SetBatchItemShape(
-    const std::string& source_input, char* buffer,
-    const size_t buffer_byte_size)
-{
-  size_t buffer_offset = 0;
-  for (size_t req_idx = 0; req_idx < request_count_; req_idx++) {
-    TRITONBACKEND_Input* input;
-    RETURN_IF_ERROR(TRITONBACKEND_RequestInput(
-        requests_[req_idx], source_input.c_str(), &input));
-    const int64_t* shape;
-    uint32_t dims_count;
-    RETURN_IF_ERROR(TRITONBACKEND_InputPropertiesForHostPolicy(
-        input, host_policy_cstr_, nullptr, nullptr, &shape, &dims_count,
-        nullptr, nullptr));
-    // Assuming first dimension is batch size and ragged input is only set
-    // for batching enabled model.
-    size_t batch_1_size = sizeof(T) * (dims_count - 1);
-    if (buffer_offset + (size_t)shape[0] * batch_1_size > buffer_byte_size) {
-      return TRITONSERVER_ErrorNew(
-          TRITONSERVER_ERROR_INVALID_ARG,
-          (GetRequestId(requests_[req_idx]) +
-           "unexpected total byte size for batch input")
-              .c_str());
-    }
-    // The batch input tracks the shape without batch dimension for
-    // each batch item
-    for (size_t idx = 1; idx < dims_count; ++idx) {
-      // Need to set the element explicitly for type conversion
-      *(reinterpret_cast<T*>(buffer + buffer_offset) + (idx - 1)) = shape[idx];
-    }
-    // memcpy the data repeatedly if the request has batch size > 1
-    for (int64_t idx = 1; idx < shape[0]; ++idx) {
-      memcpy(
-          buffer + buffer_offset + idx * batch_1_size, buffer + buffer_offset,
-          batch_1_size);
-    }
-    buffer_offset += batch_1_size * (size_t)shape[0];
-  }
-  return nullptr;  // success
-}
-
-bool
-BackendInputCollector::FlushPendingCopyKernel(
-    char* tensor_buffer, const size_t tensor_buffer_byte_size,
-    const TRITONSERVER_MemoryType tensor_memory_type,
-    const int64_t tensor_memory_type_id)
-{
-  if (pending_copy_kernel_input_buffers_.size() == 0) {
-    return false;
-  }
-
-  bool cuda_copy = false;
-  TRITONSERVER_Error* error = nullptr;
-  // Only try to launch kernel if buffer count is large enough for
-  // good GPU utilization
-  if (pending_copy_kernel_input_buffer_counts_ >= kernel_buffer_threshold_) {
-    error = LaunchCopyKernel(
-        tensor_buffer, tensor_buffer_byte_size, tensor_memory_type,
-        tensor_memory_type_id);
-    cuda_copy = (error == nullptr);
-    LOG_MESSAGE(
-        TRITONSERVER_LOG_VERBOSE,
-        (std::string("gather kernel launched with status: ") +
-         ((error == nullptr) ? "Success" : TRITONSERVER_ErrorMessage(error)))
-            .c_str());
-  }
-  // If kernel can't be launched then just perform a direct copy.
-  if ((pending_copy_kernel_input_buffer_counts_ < kernel_buffer_threshold_) ||
-      (error != nullptr)) {
-    size_t offset = 0;
-    for (auto& pr : pending_copy_kernel_input_buffers_) {
-      cuda_copy |= SetInputTensor(
-          "gather kernel fallback", pr, tensor_buffer, tensor_buffer_byte_size,
-          tensor_memory_type, tensor_memory_type_id,
-          pending_copy_kernel_buffer_offset_ + offset,
-          TRITONSERVER_MEMORY_CPU_PINNED, false, true);
-      offset += pr.memory_desc_.byte_size_;
-    }
-  }
-  TRITONSERVER_ErrorDelete(error);
-
-  // Pending kernel copies are handled...
-  pending_copy_kernel_buffer_byte_size_ = 0;
-  pending_copy_kernel_buffer_offset_ = 0;
-  pending_copy_kernel_input_buffer_counts_ = 0;
-  pending_copy_kernel_input_buffers_.clear();
-
-  return cuda_copy;
-}
-
-TRITONSERVER_Error*
-BackendInputCollector::LaunchCopyKernel(
-    char* tensor_buffer, const size_t tensor_buffer_byte_size,
-    const TRITONSERVER_MemoryType tensor_memory_type,
-    const int64_t tensor_memory_type_id)
-{
-#ifdef TRITON_ENABLE_GPU
-  input_ptr_buffer_host_.emplace_back(new std::vector<int8_t*>());
-  byte_size_buffer_host_.emplace_back(new std::vector<size_t>());
-  byte_size_offset_buffer_host_.emplace_back(new std::vector<size_t>());
-
-  auto& input_ptr_buffer_host = *input_ptr_buffer_host_.back();
-  auto& byte_size_buffer_host = *byte_size_buffer_host_.back();
-  auto& byte_size_offset_buffer_host = *byte_size_offset_buffer_host_.back();
-
-  input_ptr_buffer_host.reserve(pending_copy_kernel_input_buffer_counts_);
-  byte_size_buffer_host.reserve(pending_copy_kernel_input_buffer_counts_);
-  byte_size_offset_buffer_host.reserve(
-      pending_copy_kernel_input_buffer_counts_);
-
-  size_t byte_size_offset = 0;
-  for (const auto& response_input : pending_copy_kernel_input_buffers_) {
-    const auto& input = response_input.memory_desc_;
-    input_ptr_buffer_host.emplace_back(
-        const_cast<int8_t*>(reinterpret_cast<const int8_t*>(input.buffer_)));
-    byte_size_buffer_host.emplace_back(input.byte_size_);
-    byte_size_offset_buffer_host.emplace_back(byte_size_offset);
-    byte_size_offset += input.byte_size_;
-  }
-
-  BackendMemory* backend_memory = nullptr;
-  std::vector<BackendMemory::AllocationType> alloc_types;
-  switch (tensor_memory_type) {
-    case TRITONSERVER_MEMORY_GPU:
-      alloc_types = {BackendMemory::AllocationType::GPU_POOL,
-                     BackendMemory::AllocationType::GPU};
-      break;
-    case TRITONSERVER_MEMORY_CPU_PINNED:
-      alloc_types = {BackendMemory::AllocationType::CPU_PINNED_POOL,
-                     BackendMemory::AllocationType::CPU_PINNED};
-      break;
-    case TRITONSERVER_MEMORY_CPU:
-      alloc_types = {BackendMemory::AllocationType::CPU};
-      break;
-  }
-
-  // input_ptr_buffer
-  size_t input_ptr_buffer_byte_size =
-      pending_copy_kernel_input_buffer_counts_ * sizeof(int8_t*);
-  auto err = BackendMemory::Create(
-      memory_manager_, alloc_types, tensor_memory_type_id,
-      input_ptr_buffer_byte_size, &backend_memory);
-  if (err != nullptr) {
-    LOG_MESSAGE(
-        TRITONSERVER_LOG_VERBOSE,
-        (std::string("unable to create backend memory for type: ") +
-         TRITONSERVER_MemoryTypeString(tensor_memory_type) +
-         " id: " + std::to_string(tensor_memory_type_id) + ": " +
-         TRITONSERVER_ErrorMessage(err))
-            .c_str());
-    TRITONSERVER_ErrorDelete(err);
-  } else {
-    in_use_memories_.emplace_back(backend_memory);
-  }
-  if (backend_memory == nullptr ||
-      (backend_memory->MemoryType() != tensor_memory_type) ||
-      (backend_memory->MemoryTypeId() != tensor_memory_type_id)) {
-    return TRITONSERVER_ErrorNew(
-        TRITONSERVER_ERROR_INTERNAL,
-        "Failed to obtain memory buffer for copy kernel input");
-  }
-  char* input_ptr_buffer = backend_memory->MemoryPtr();
-
-  // byte_size_buffer
-  size_t byte_size_buffer_byte_size =
-      pending_copy_kernel_input_buffer_counts_ * sizeof(size_t);
-  err = BackendMemory::Create(
-      memory_manager_, alloc_types, tensor_memory_type_id,
-      byte_size_buffer_byte_size, &backend_memory);
-  if (err != nullptr) {
-    LOG_MESSAGE(
-        TRITONSERVER_LOG_VERBOSE,
-        (std::string("unable to create backend memory for type: ") +
-         TRITONSERVER_MemoryTypeString(tensor_memory_type) +
-         " id: " + std::to_string(tensor_memory_type_id) + ": " +
-         TRITONSERVER_ErrorMessage(err))
-            .c_str());
-    TRITONSERVER_ErrorDelete(err);
-  } else {
-    in_use_memories_.emplace_back(backend_memory);
-  }
-  if (backend_memory == nullptr ||
-      (backend_memory->MemoryType() != tensor_memory_type) ||
-      (backend_memory->MemoryTypeId() != tensor_memory_type_id)) {
-    return TRITONSERVER_ErrorNew(
-        TRITONSERVER_ERROR_INTERNAL,
-        "Failed to obtain memory buffer for copy kernel input");
-  }
-  char* byte_size_buffer = backend_memory->MemoryPtr();
-
-  // byte_size_offset_buffer
-  size_t byte_size_offset_buffer_byte_size =
-      pending_copy_kernel_input_buffer_counts_ * sizeof(size_t);
-  err = BackendMemory::Create(
-      memory_manager_, alloc_types, tensor_memory_type_id,
-      byte_size_offset_buffer_byte_size, &backend_memory);
-  if (err != nullptr) {
-    LOG_MESSAGE(
-        TRITONSERVER_LOG_VERBOSE,
-        (std::string("unable to create backend memory for type: ") +
-         TRITONSERVER_MemoryTypeString(tensor_memory_type) +
-         " id: " + std::to_string(tensor_memory_type_id) + ": " +
-         TRITONSERVER_ErrorMessage(err))
-            .c_str());
-    TRITONSERVER_ErrorDelete(err);
-  } else {
-    in_use_memories_.emplace_back(backend_memory);
-  }
-  if (backend_memory == nullptr ||
-      (backend_memory->MemoryType() != tensor_memory_type) ||
-      (backend_memory->MemoryTypeId() != tensor_memory_type_id)) {
-    return TRITONSERVER_ErrorNew(
-        TRITONSERVER_ERROR_INTERNAL,
-        "Failed to obtain memory buffer for copy kernel input");
-  }
-  char* byte_size_offset_buffer = backend_memory->MemoryPtr();
-
-  cudaMemcpyAsync(
-      input_ptr_buffer, input_ptr_buffer_host.data(),
-      pending_copy_kernel_input_buffer_counts_ * sizeof(int8_t*),
-      cudaMemcpyDefault, stream_);
-  cudaMemcpyAsync(
-      byte_size_buffer, byte_size_buffer_host.data(),
-      pending_copy_kernel_input_buffer_counts_ * sizeof(size_t),
-      cudaMemcpyDefault, stream_);
-  cudaMemcpyAsync(
-      byte_size_offset_buffer, byte_size_offset_buffer_host.data(),
-      pending_copy_kernel_input_buffer_counts_ * sizeof(size_t),
-      cudaMemcpyDefault, stream_);
-  if (buffer_ready_event_ != nullptr) {
-    cudaEventSynchronize(buffer_ready_event_);
-    buffer_ready_event_ = nullptr;
-  }
-  RETURN_IF_CUDA_ERROR(
-      RunGatherKernel(
-          (const int8_t**)input_ptr_buffer, (const size_t*)byte_size_buffer,
-          (const size_t*)byte_size_offset_buffer,
-          (int8_t*)tensor_buffer + pending_copy_kernel_buffer_offset_,
-          pending_copy_kernel_input_buffer_counts_, stream_),
-      TRITONSERVER_ERROR_INTERNAL,
-      std::string("Failed to launch gather kernel"));
-  return nullptr;
-#else
-  return TRITONSERVER_ErrorNew(
-      TRITONSERVER_ERROR_UNSUPPORTED,
-      "Copy kernel can not be launched with TRITON_ENABLE_GPU=OFF");
-#endif  // TRITON_ENABLE_GPU
-}
-
-}}  // namespace triton::backend
diff --git a/3rdparty/backend-r22.12/src/backend_memory.cc b/3rdparty/backend-r22.12/src/backend_memory.cc
deleted file mode 100644
index 9dd1594552de171d2d51a44a38b1467f1b30cb89..0000000000000000000000000000000000000000
--- a/3rdparty/backend-r22.12/src/backend_memory.cc
+++ /dev/null
@@ -1,231 +0,0 @@
-// Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "triton/backend/backend_memory.h"
-
-#include <map>
-#include "triton/backend/backend_common.h"
-
-namespace triton { namespace backend {
-
-TRITONSERVER_Error*
-BackendMemory::Create(
-    TRITONBACKEND_MemoryManager* manager, const AllocationType alloc_type,
-    const int64_t memory_type_id, const size_t byte_size, BackendMemory** mem)
-{
-  *mem = nullptr;
-
-  void* ptr = nullptr;
-  switch (alloc_type) {
-    case AllocationType::CPU_PINNED: {
-#ifdef TRITON_ENABLE_GPU
-      RETURN_IF_CUDA_ERROR(
-          cudaHostAlloc(&ptr, byte_size, cudaHostAllocPortable),
-          TRITONSERVER_ERROR_UNAVAILABLE,
-          std::string("failed to allocate pinned system memory"));
-#else
-      return TRITONSERVER_ErrorNew(
-          TRITONSERVER_ERROR_UNSUPPORTED,
-          "pinned-memory allocation not supported");
-#endif  // TRITON_ENABLE_GPU
-      break;
-    }
-
-    case AllocationType::GPU: {
-#ifdef TRITON_ENABLE_GPU
-      int current_device;
-      RETURN_IF_CUDA_ERROR(
-          cudaGetDevice(&current_device), TRITONSERVER_ERROR_INTERNAL,
-          std::string("failed to get device"));
-      bool overridden = (current_device != memory_type_id);
-      if (overridden) {
-        RETURN_IF_CUDA_ERROR(
-            cudaSetDevice(memory_type_id), TRITONSERVER_ERROR_INTERNAL,
-            std::string("failed to set device"));
-      }
-
-      auto err = cudaMalloc(&ptr, byte_size);
-
-      if (overridden) {
-        LOG_IF_CUDA_ERROR(
-            cudaSetDevice(current_device), "failed to set CUDA device");
-      }
-
-      RETURN_ERROR_IF_FALSE(
-          err == cudaSuccess, TRITONSERVER_ERROR_UNAVAILABLE,
-          std::string("failed to allocate GPU memory: ") +
-              cudaGetErrorString(err));
-#else
-      return TRITONSERVER_ErrorNew(
-          TRITONSERVER_ERROR_UNSUPPORTED, "GPU allocation not supported");
-#endif  // TRITON_ENABLE_GPU
-      break;
-    }
-
-    case AllocationType::CPU:
-    case AllocationType::CPU_PINNED_POOL:
-    case AllocationType::GPU_POOL:
-      RETURN_IF_ERROR(TRITONBACKEND_MemoryManagerAllocate(
-          manager, &ptr, AllocTypeToMemoryType(alloc_type), memory_type_id,
-          byte_size));
-      break;
-  }
-
-  *mem = new BackendMemory(
-      manager, alloc_type, memory_type_id, reinterpret_cast<char*>(ptr),
-      byte_size);
-
-  return nullptr;  // success
-}
-
-TRITONSERVER_Error*
-BackendMemory::Create(
-    TRITONBACKEND_MemoryManager* manager,
-    const std::vector<AllocationType>& alloc_types,
-    const int64_t memory_type_id, const size_t byte_size, BackendMemory** mem)
-{
-  *mem = nullptr;
-  RETURN_ERROR_IF_TRUE(
-      alloc_types.size() == 0, TRITONSERVER_ERROR_INVALID_ARG,
-      std::string("BackendMemory::Create, at least one allocation type must be "
-                  "specified"));
-
-  bool success = false;
-  std::unordered_map<AllocationType, TRITONSERVER_Error*> errors;
-  for (const AllocationType alloc_type : alloc_types) {
-    TRITONSERVER_Error* err =
-        Create(manager, alloc_type, memory_type_id, byte_size, mem);
-    if (err == nullptr) {
-      success = true;
-      break;
-    }
-
-    errors.insert({alloc_type, err});
-  }
-
-  // If allocation failed for all allocation types then display all
-  // the error messages and show the entire allocation request as
-  // failing.
-  if (!success) {
-    std::string msg = "BackendMemory::Create, all allocation types failed:";
-    for (const auto& pr : errors) {
-      const AllocationType alloc_type = pr.first;
-      TRITONSERVER_Error* err = pr.second;
-      msg += std::string("\n\t") + AllocTypeString(alloc_type) + ": " +
-             TRITONSERVER_ErrorMessage(err);
-      TRITONSERVER_ErrorDelete(err);
-    }
-
-    return TRITONSERVER_ErrorNew(TRITONSERVER_ERROR_UNAVAILABLE, msg.c_str());
-  }
-
-  return nullptr;  // success
-}
-
-TRITONSERVER_Error*
-BackendMemory::Create(
-    TRITONBACKEND_MemoryManager* manager, const AllocationType alloc_type,
-    const int64_t memory_type_id, void* buffer, const size_t byte_size,
-    BackendMemory** mem)
-{
-  *mem = new BackendMemory(
-      manager, alloc_type, memory_type_id, reinterpret_cast<char*>(buffer),
-      byte_size, false /* owns_buffer */);
-
-  return nullptr;  // success
-}
-
-BackendMemory::~BackendMemory()
-{
-  if (owns_buffer_) {
-    switch (alloctype_) {
-      case AllocationType::CPU_PINNED:
-#ifdef TRITON_ENABLE_GPU
-        if (buffer_ != nullptr) {
-          LOG_IF_CUDA_ERROR(
-              cudaFreeHost(buffer_), "failed to free pinned memory");
-        }
-#endif  // TRITON_ENABLE_GPU
-        break;
-
-      case AllocationType::GPU:
-#ifdef TRITON_ENABLE_GPU
-        if (buffer_ != nullptr) {
-          LOG_IF_CUDA_ERROR(cudaFree(buffer_), "failed to free CUDA memory");
-        }
-#endif  // TRITON_ENABLE_GPU
-        break;
-
-      case AllocationType::CPU:
-      case AllocationType::CPU_PINNED_POOL:
-      case AllocationType::GPU_POOL:
-        LOG_IF_ERROR(
-            TRITONBACKEND_MemoryManagerFree(
-                manager_, buffer_, AllocTypeToMemoryType(alloctype_),
-                memtype_id_),
-            "failed to free memory buffer");
-        break;
-    }
-  }
-}
-
-TRITONSERVER_MemoryType
-BackendMemory::AllocTypeToMemoryType(const AllocationType a)
-{
-  switch (a) {
-    case AllocationType::CPU:
-      return TRITONSERVER_MEMORY_CPU;
-    case AllocationType::CPU_PINNED:
-    case AllocationType::CPU_PINNED_POOL:
-      return TRITONSERVER_MEMORY_CPU_PINNED;
-    case AllocationType::GPU:
-    case AllocationType::GPU_POOL:
-      return TRITONSERVER_MEMORY_GPU;
-  }
-
-  return TRITONSERVER_MEMORY_CPU;  // unreachable
-}
-
-const char*
-BackendMemory::AllocTypeString(const AllocationType a)
-{
-  switch (a) {
-    case AllocationType::CPU:
-      return "CPU";
-    case AllocationType::CPU_PINNED:
-      return "CPU_PINNED";
-    case AllocationType::GPU:
-      return "GPU";
-    case AllocationType::CPU_PINNED_POOL:
-      return "CPU_PINNED_POOL";
-    case AllocationType::GPU_POOL:
-      return "GPU_POOL";
-  }
-
-  return "<unknown>";
-}
-
-}}  // namespace triton::backend
diff --git a/3rdparty/backend-r22.12/src/backend_model.cc b/3rdparty/backend-r22.12/src/backend_model.cc
deleted file mode 100644
index 1859580d3eb527c8cbd39e0418c1e0999508f67d..0000000000000000000000000000000000000000
--- a/3rdparty/backend-r22.12/src/backend_model.cc
+++ /dev/null
@@ -1,192 +0,0 @@
-// Copyright 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "triton/backend/backend_model.h"
-
-#include "triton/backend/backend_common.h"
-
-namespace triton { namespace backend {
-
-//
-// BackendModel
-//
-BackendModel::BackendModel(
-    TRITONBACKEND_Model* triton_model, const bool allow_optional)
-    : triton_model_(triton_model), allow_optional_(allow_optional)
-{
-  const char* model_name;
-  THROW_IF_BACKEND_MODEL_ERROR(
-      TRITONBACKEND_ModelName(triton_model, &model_name));
-  name_ = model_name;
-
-  THROW_IF_BACKEND_MODEL_ERROR(
-      TRITONBACKEND_ModelVersion(triton_model, &version_));
-
-  const char* repository_path = nullptr;
-  TRITONBACKEND_ArtifactType repository_artifact_type;
-  THROW_IF_BACKEND_MODEL_ERROR(TRITONBACKEND_ModelRepository(
-      triton_model, &repository_artifact_type, &repository_path));
-  if (repository_artifact_type != TRITONBACKEND_ARTIFACT_FILESYSTEM) {
-    throw BackendModelException(TRITONSERVER_ErrorNew(
-        TRITONSERVER_ERROR_UNSUPPORTED,
-        (std::string("unsupported repository artifact type for model '") +
-         model_name + "'")
-            .c_str()));
-  }
-  repository_path_ = repository_path;
-
-  THROW_IF_BACKEND_MODEL_ERROR(
-      TRITONBACKEND_ModelServer(triton_model, &triton_server_));
-  TRITONBACKEND_Backend* backend;
-  THROW_IF_BACKEND_MODEL_ERROR(
-      TRITONBACKEND_ModelBackend(triton_model, &backend));
-  THROW_IF_BACKEND_MODEL_ERROR(
-      TRITONBACKEND_BackendMemoryManager(backend, &triton_memory_manager_));
-
-  THROW_IF_BACKEND_MODEL_ERROR(ParseModelConfig());
-}
-
-TRITONSERVER_Error*
-BackendModel::ParseModelConfig()
-{
-  TRITONSERVER_Message* config_message;
-  RETURN_IF_ERROR(TRITONBACKEND_ModelConfig(
-      triton_model_, 1 /* config_version */, &config_message));
-
-  // Get the model configuration as a json string from
-  // config_message. We use TritonJson, which is a wrapper that
-  // returns nice errors (currently the underlying implementation is
-  // rapidjson... but others could be added).
-  const char* buffer;
-  size_t byte_size;
-  RETURN_IF_ERROR(
-      TRITONSERVER_MessageSerializeToJson(config_message, &buffer, &byte_size));
-
-  TRITONSERVER_Error* err = model_config_.Parse(buffer, byte_size);
-  RETURN_IF_ERROR(TRITONSERVER_MessageDelete(config_message));
-  RETURN_IF_ERROR(err);
-
-  int64_t mbs = 0;
-  RETURN_IF_ERROR(model_config_.MemberAsInt("max_batch_size", &mbs));
-  max_batch_size_ = mbs;
-
-  enable_pinned_input_ = false;
-  enable_pinned_output_ = false;
-  {
-    common::TritonJson::Value optimization;
-    if (model_config_.Find("optimization", &optimization)) {
-      common::TritonJson::Value pinned_memory;
-      if (optimization.Find("input_pinned_memory", &pinned_memory)) {
-        RETURN_IF_ERROR(
-            pinned_memory.MemberAsBool("enable", &enable_pinned_input_));
-      }
-      if (optimization.Find("output_pinned_memory", &pinned_memory)) {
-        RETURN_IF_ERROR(
-            pinned_memory.MemberAsBool("enable", &enable_pinned_output_));
-      }
-    }
-  }
-
-  RETURN_IF_ERROR(
-      BatchInput::ParseFromModelConfig(model_config_, &batch_inputs_));
-  RETURN_IF_ERROR(
-      BatchOutput::ParseFromModelConfig(model_config_, &batch_outputs_));
-  for (const auto& batch_output : batch_outputs_) {
-    for (const auto& name : batch_output.TargetNames()) {
-      batch_output_map_.emplace(name, &batch_output);
-    }
-  }
-  triton::common::TritonJson::Value config_inputs;
-  RETURN_IF_ERROR(model_config_.MemberAsArray("input", &config_inputs));
-  for (size_t i = 0; i < config_inputs.ArraySize(); i++) {
-    triton::common::TritonJson::Value io;
-    RETURN_IF_ERROR(config_inputs.IndexAsObject(i, &io));
-    std::string io_name;
-    RETURN_IF_ERROR(io.MemberAsString("name", &io_name));
-    triton::common::TritonJson::Value input_property_json;
-    bool allow_ragged_batch = false;
-    if (io.Find("allow_ragged_batch", &input_property_json)) {
-      RETURN_IF_ERROR(input_property_json.AsBool(&allow_ragged_batch));
-    }
-    if (allow_ragged_batch) {
-      ragged_inputs_.emplace(io_name);
-    }
-    bool optional = false;
-    if (io.Find("optional", &input_property_json)) {
-      RETURN_IF_ERROR(input_property_json.AsBool(&optional));
-    }
-    if (optional) {
-      if (allow_optional_) {
-        optional_inputs_.emplace(io_name);
-      } else {
-        RETURN_IF_ERROR(TRITONSERVER_ErrorNew(
-            TRITONSERVER_ERROR_INVALID_ARG,
-            (std::string("'optional' is set to true for input '") + io_name +
-             "' while the backend model doesn't support optional input")
-                .c_str()));
-      }
-    }
-  }
-
-  return nullptr;
-}
-
-TRITONSERVER_Error*
-BackendModel::SetModelConfig()
-{
-  triton::common::TritonJson::WriteBuffer json_buffer;
-  RETURN_IF_ERROR(ModelConfig().Write(&json_buffer));
-
-  TRITONSERVER_Message* message;
-  RETURN_IF_ERROR(TRITONSERVER_MessageNewFromSerializedJson(
-      &message, json_buffer.Base(), json_buffer.Size()));
-  RETURN_IF_ERROR(TRITONBACKEND_ModelSetConfig(
-      triton_model_, 1 /* config_version */, message));
-  RETURN_IF_ERROR(TRITONSERVER_MessageDelete(message));
-
-  // Triton core can normalize the missing config settings
-  // in the above call. We must retrieve the updated model
-  // configration from the core.
-  RETURN_IF_ERROR(ParseModelConfig());
-
-  return nullptr;
-}
-
-TRITONSERVER_Error*
-BackendModel::SupportsFirstDimBatching(bool* supports)
-{
-  *supports = max_batch_size_ > 0;
-  return nullptr;
-}
-
-const BatchOutput*
-BackendModel::FindBatchOutput(const std::string& output_name) const
-{
-  const auto it = batch_output_map_.find(output_name);
-  return ((it == batch_output_map_.end()) ? nullptr : it->second);
-}
-
-}}  // namespace triton::backend
diff --git a/3rdparty/backend-r22.12/src/backend_model_instance.cc b/3rdparty/backend-r22.12/src/backend_model_instance.cc
deleted file mode 100644
index ae7ff9d71f9d3cecbca73c91b8cfe524e4871ba5..0000000000000000000000000000000000000000
--- a/3rdparty/backend-r22.12/src/backend_model_instance.cc
+++ /dev/null
@@ -1,171 +0,0 @@
-// Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "triton/backend/backend_model_instance.h"
-
-#include <vector>
-#include "triton/backend/backend_common.h"
-#include "triton/backend/backend_model.h"
-
-namespace triton { namespace backend {
-
-//
-// BackendModelInstance
-//
-BackendModelInstance::BackendModelInstance(
-    BackendModel* backend_model,
-    TRITONBACKEND_ModelInstance* triton_model_instance)
-    : backend_model_(backend_model),
-      triton_model_instance_(triton_model_instance)
-{
-  const char* instance_name;
-  THROW_IF_BACKEND_INSTANCE_ERROR(
-      TRITONBACKEND_ModelInstanceName(triton_model_instance, &instance_name));
-  name_ = instance_name;
-
-  THROW_IF_BACKEND_INSTANCE_ERROR(
-      TRITONBACKEND_ModelInstanceKind(triton_model_instance, &kind_));
-
-  THROW_IF_BACKEND_INSTANCE_ERROR(
-      TRITONBACKEND_ModelInstanceDeviceId(triton_model_instance, &device_id_));
-
-  common::TritonJson::Value& model_config = backend_model->ModelConfig();
-
-  // If the model configuration specifies a 'default_model_filename'
-  // and/or specifies 'cc_model_filenames' then determine the
-  // appropriate 'artifact_filename' value. If model configuration
-  // does not specify then just leave 'artifact_filename' empty and
-  // the backend can then provide its own logic for determine the
-  // filename if that is appropriate.
-  THROW_IF_BACKEND_INSTANCE_ERROR(model_config.MemberAsString(
-      "default_model_filename", &artifact_filename_));
-
-  switch (kind_) {
-    case TRITONSERVER_INSTANCEGROUPKIND_CPU: {
-      LOG_MESSAGE(
-          TRITONSERVER_LOG_VERBOSE,
-          (std::string("Creating instance ") + name_ +
-           " on CPU using artifact '" + artifact_filename_ + "'")
-              .c_str());
-      break;
-    }
-    case TRITONSERVER_INSTANCEGROUPKIND_MODEL: {
-      LOG_MESSAGE(
-          TRITONSERVER_LOG_VERBOSE,
-          (std::string("Creating instance ") + name_ +
-           " on model-specified devices using artifact '" + artifact_filename_ +
-           "'")
-              .c_str());
-      break;
-    }
-    case TRITONSERVER_INSTANCEGROUPKIND_GPU: {
-#if defined(TRITON_ENABLE_GPU)
-      cudaDeviceProp cuprops;
-      cudaError_t cuerr = cudaGetDeviceProperties(&cuprops, device_id_);
-      if (cuerr != cudaSuccess) {
-        throw BackendModelInstanceException(TRITONSERVER_ErrorNew(
-            TRITONSERVER_ERROR_INTERNAL,
-            (std::string("unable to get CUDA device properties for ") + name_ +
-             ": " + cudaGetErrorString(cuerr))
-                .c_str()));
-      }
-
-      const std::string cc =
-          std::to_string(cuprops.major) + "." + std::to_string(cuprops.minor);
-      common::TritonJson::Value cc_names;
-      common::TritonJson::Value cc_name;
-      if ((model_config.Find("cc_model_filenames", &cc_names)) &&
-          (cc_names.Find(cc.c_str(), &cc_name))) {
-        cc_name.AsString(&artifact_filename_);
-      }
-
-      LOG_MESSAGE(
-          TRITONSERVER_LOG_VERBOSE,
-          (std::string("Creating instance ") + name_ + " on GPU " +
-           std::to_string(device_id_) + " (" + cc + ") using artifact '" +
-           artifact_filename_ + "'")
-              .c_str());
-#elif !defined(TRITON_ENABLE_MALI_GPU)
-      throw BackendModelInstanceException(TRITONSERVER_ErrorNew(
-          TRITONSERVER_ERROR_INTERNAL, "GPU instances not supported"));
-#endif  // TRITON_ENABLE_GPU
-      break;
-    }
-    default: {
-      throw BackendModelInstanceException(TRITONSERVER_ErrorNew(
-          TRITONSERVER_ERROR_INTERNAL,
-          (std::string("unexpected instance kind for ") + name_).c_str()));
-    }
-  }
-
-  stream_ = nullptr;
-  if (kind_ == TRITONSERVER_INSTANCEGROUPKIND_GPU) {
-    THROW_IF_BACKEND_INSTANCE_ERROR(
-        CreateCudaStream(device_id_, 0 /* cuda_stream_priority */, &stream_));
-  }
-
-  // Get the host policy setting as a json string from message,
-  // and extract the host policy name for the instance.
-  TRITONSERVER_Message* message = nullptr;
-  THROW_IF_BACKEND_MODEL_ERROR(
-      TRITONBACKEND_ModelInstanceHostPolicy(triton_model_instance_, &message));
-  const char* buffer;
-  size_t byte_size;
-  THROW_IF_BACKEND_MODEL_ERROR(
-      TRITONSERVER_MessageSerializeToJson(message, &buffer, &byte_size));
-
-  common::TritonJson::Value host_policy;
-  TRITONSERVER_Error* err = host_policy.Parse(buffer, byte_size);
-  THROW_IF_BACKEND_MODEL_ERROR(err);
-  std::vector<std::string> host_policy_name;
-  THROW_IF_BACKEND_MODEL_ERROR(host_policy.Members(&host_policy_name));
-  if (host_policy_name.size() != 1) {
-    throw BackendModelInstanceException(TRITONSERVER_ErrorNew(
-        TRITONSERVER_ERROR_INTERNAL,
-        (std::string("unexpected no host policy for ") + name_).c_str()));
-  }
-  host_policy_name_ = host_policy_name[0];
-}
-
-
-BackendModelInstance::~BackendModelInstance()
-{
-#ifdef TRITON_ENABLE_GPU
-  if (stream_ != nullptr) {
-    cudaError_t err = cudaStreamDestroy(stream_);
-    if (err != cudaSuccess) {
-      TRITONSERVER_LogMessage(
-          TRITONSERVER_LOG_ERROR, __FILE__, __LINE__,
-          (std::string("~BackendModelInstance: ") + name_ +
-           " failed to destroy cuda stream: " + cudaGetErrorString(err))
-              .c_str());
-    }
-    stream_ = nullptr;
-  }
-#endif  // TRITON_ENABLE_GPU
-}
-
-}}  // namespace triton::backend
diff --git a/3rdparty/backend-r22.12/src/backend_output_responder.cc b/3rdparty/backend-r22.12/src/backend_output_responder.cc
deleted file mode 100644
index 81acd2517d1080850a39296f38d3e3bc435a5b0d..0000000000000000000000000000000000000000
--- a/3rdparty/backend-r22.12/src/backend_output_responder.cc
+++ /dev/null
@@ -1,607 +0,0 @@
-// Copyright 2019-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "triton/backend/backend_output_responder.h"
-
-#include "triton/backend/backend_common.h"
-#include "triton/backend/backend_model.h"
-#include "triton/backend/backend_model_instance.h"
-
-namespace triton { namespace backend {
-
-//
-// BackendOutputResponder
-//
-BackendOutputResponder::~BackendOutputResponder()
-{
-  for (auto& pinned_memory : pinned_memories_) {
-    LOG_IF_ERROR(
-        TRITONBACKEND_MemoryManagerFree(
-            memory_manager_, reinterpret_cast<void*>(pinned_memory),
-            TRITONSERVER_MEMORY_CPU_PINNED, 0),
-        "failed to free pinned memory");
-  }
-}
-
-void
-BackendOutputResponder::ProcessTensor(
-    const std::string& output_name, const TRITONSERVER_DataType datatype,
-    std::vector<int64_t>& batchn_shape, const char* buffer,
-    const TRITONSERVER_MemoryType memory_type, const int64_t memory_type_id)
-{
-  // A value of CPU_PINNED indicates that pinned memory buffer is not
-  // needed for this tensor. Any other value indicates that a pinned
-  // memory buffer is needed when the target memory type matches
-  // 'use_pinned_memory_type'.
-  TRITONSERVER_MemoryType use_pinned_memory_type =
-      TRITONSERVER_MEMORY_CPU_PINNED;
-  if (pinned_enabled_) {
-    use_pinned_memory_type = GetUsePinnedMemoryType(memory_type);
-  }
-
-  const int64_t batchn_batch_size = batchn_shape[0];
-  int64_t batch_size_offset = 0;
-
-  size_t tensor_offset = 0;
-
-  for (size_t idx = 0; idx < responses_->size(); idx++) {
-    auto& request = requests_[idx];
-    auto& response = (*responses_)[idx];
-
-    // If then pending copies are from tensor buffer that is not
-    // contiguous with 'response's part of that buffer, then need to
-    // go ahead and perform the pending copies so that can start a
-    // new contiguous region if necessary.
-    if ((pending_pinned_byte_size_ > 0) &&
-        (tensor_offset !=
-         (pending_pinned_byte_size_ + pending_pinned_offset_))) {
-      need_sync_ |= FlushPendingPinned(buffer, memory_type, memory_type_id);
-    }
-
-    // Override shape to be correct for this response.
-    if (first_dim_batching_) {
-      TRITONBACKEND_Input* input;
-      TRITONBACKEND_RequestInputByIndex(request, 0, &input);
-      const int64_t* shape;
-      TRITONBACKEND_InputProperties(
-          input, nullptr, nullptr, &shape, nullptr, nullptr, nullptr);
-      if ((batchn_batch_size != -1) &&
-          ((batch_size_offset + shape[0]) > batchn_batch_size)) {
-        if (response != nullptr) {
-          RESPOND_AND_SET_NULL_IF_ERROR(
-              &response,
-              TRITONSERVER_ErrorNew(
-                  TRITONSERVER_ERROR_UNSUPPORTED,
-                  std::string(
-                      GetRequestId(request) +
-                      "failed to split the output tensor '" + output_name +
-                      "' in responses: expected batch size of atleast " +
-                      std::to_string(batch_size_offset + shape[0]) +
-                      " in model output, got " +
-                      std::to_string(batchn_batch_size))
-                      .c_str()));
-        }
-      }
-      batchn_shape[0] = shape[0];
-      batch_size_offset += shape[0];
-    }
-
-    const size_t tensor_byte_size = GetByteSize(datatype, batchn_shape);
-
-    TRITONBACKEND_Output* response_output;
-    if (response != nullptr) {
-      uint32_t output_count;
-      RESPOND_AND_SET_NULL_IF_ERROR(
-          &response, TRITONBACKEND_RequestOutputCount(request, &output_count));
-      if (response != nullptr) {
-        for (uint32_t output_idx = 0; output_idx < output_count; output_idx++) {
-          const char* name;
-          RESPOND_AND_SET_NULL_IF_ERROR(
-              &response,
-              TRITONBACKEND_RequestOutputName(request, output_idx, &name));
-          if ((response != nullptr) && (output_name == name)) {
-            RESPOND_AND_SET_NULL_IF_ERROR(
-                &response, TRITONBACKEND_ResponseOutput(
-                               response, &response_output, name, datatype,
-                               batchn_shape.data(), batchn_shape.size()));
-            if (response != nullptr) {
-              need_sync_ |= SetFixedSizeBuffer(
-                  &response, response_output, output_name, tensor_byte_size,
-                  tensor_offset, buffer, memory_type, memory_type_id,
-                  use_pinned_memory_type, false /* state */);
-            }
-
-            break;
-          }
-        }
-      }
-    }
-
-    tensor_offset += tensor_byte_size;
-  }
-
-  // Done with the tensor, flush any pending pinned copies.
-  need_sync_ |= FlushPendingPinned(buffer, memory_type, memory_type_id);
-#ifdef TRITON_ENABLE_GPU
-  if (need_sync_ && (event_ != nullptr)) {
-    cudaEventRecord(event_, stream_);
-  }
-#endif  // TRITON_ENABLE_GPU
-}
-
-std::vector<TRITONBACKEND_State*>
-BackendOutputResponder::ProcessStateTensor(
-    const std::string& output_state_name, const TRITONSERVER_DataType datatype,
-    std::vector<int64_t>& batchn_shape, const char* buffer,
-    const TRITONSERVER_MemoryType memory_type, const int64_t memory_type_id)
-{
-  // A value of CPU_PINNED indicates that pinned memory buffer is not
-  // needed for this tensor. Any other value indicates that a pinned
-  // memory buffer is needed when the target memory type matches
-  // 'use_pinned_memory_type'.
-  TRITONSERVER_MemoryType use_pinned_memory_type =
-      TRITONSERVER_MEMORY_CPU_PINNED;
-  if (pinned_enabled_) {
-    use_pinned_memory_type = GetUsePinnedMemoryType(memory_type);
-  }
-
-  std::vector<TRITONBACKEND_State*> states;
-
-  const int64_t batchn_batch_size = batchn_shape[0];
-  int64_t batch_size_offset = 0;
-
-  size_t tensor_offset = 0;
-
-  for (size_t idx = 0; idx < responses_->size(); idx++) {
-    auto& request = requests_[idx];
-    auto& response = (*responses_)[idx];
-
-    // If then pending copies are from tensor buffer that is not
-    // contiguous with 'response's part of that buffer, then need to
-    // go ahead and perform the pending copies so that can start a
-    // new contiguous region if necessary.
-    if ((pending_pinned_byte_size_ > 0) &&
-        (tensor_offset !=
-         (pending_pinned_byte_size_ + pending_pinned_offset_))) {
-      need_sync_ |= FlushPendingPinned(buffer, memory_type, memory_type_id);
-    }
-
-    // Override shape to be correct for this response.
-    if (first_dim_batching_) {
-      TRITONBACKEND_Input* input;
-      TRITONBACKEND_RequestInputByIndex(request, 0, &input);
-      const int64_t* shape;
-      TRITONBACKEND_InputProperties(
-          input, nullptr, nullptr, &shape, nullptr, nullptr, nullptr);
-      if ((batchn_batch_size != -1) &&
-          ((batch_size_offset + shape[0]) > batchn_batch_size)) {
-        if (response != nullptr) {
-          RESPOND_AND_SET_NULL_IF_ERROR(
-              &response,
-              TRITONSERVER_ErrorNew(
-                  TRITONSERVER_ERROR_UNSUPPORTED,
-                  std::string(
-                      GetRequestId(request) +
-                      "failed to split the output state tensor '" +
-                      output_state_name +
-                      "' in responses: expected batch size of atleast " +
-                      std::to_string(batch_size_offset + shape[0]) +
-                      " in model output, got " +
-                      std::to_string(batchn_batch_size))
-                      .c_str()));
-        }
-      }
-      batchn_shape[0] = shape[0];
-      batch_size_offset += shape[0];
-    }
-
-    const size_t tensor_byte_size = GetByteSize(datatype, batchn_shape);
-
-    TRITONBACKEND_State* output_state;
-    if (response != nullptr) {
-      RESPOND_AND_SET_NULL_IF_ERROR(
-          &response, TRITONBACKEND_StateNew(
-                         &output_state, request, output_state_name.c_str(),
-                         datatype, batchn_shape.data(), batchn_shape.size()));
-      if (response != nullptr) {
-        states.push_back(output_state);
-        need_sync_ |= SetFixedSizeBuffer(
-            &response, output_state, output_state_name, tensor_byte_size,
-            tensor_offset, buffer, memory_type, memory_type_id,
-            use_pinned_memory_type, true /* state */);
-      }
-    }
-
-    tensor_offset += tensor_byte_size;
-  }
-
-  // Done with the tensor, flush any pending pinned copies.
-  need_sync_ |= FlushPendingPinned(buffer, memory_type, memory_type_id);
-#ifdef TRITON_ENABLE_GPU
-  if (need_sync_ && (event_ != nullptr)) {
-    cudaEventRecord(event_, stream_);
-  }
-#endif  // TRITON_ENABLE_GPU
-
-  return states;
-}
-
-bool
-BackendOutputResponder::Finalize()
-{
-#ifdef TRITON_ENABLE_GPU
-  if ((!deferred_pinned_.empty()) && need_sync_) {
-    if (event_ != nullptr) {
-      cudaEventSynchronize(event_);
-    } else {
-      cudaStreamSynchronize(stream_);
-    }
-    need_sync_ = false;
-  }
-#endif  // TRITON_ENABLE_GPU
-
-  // After the above sync all the GPU->pinned copies are complete. Any
-  // deferred copies of pinned->CPU can now be done.
-  for (auto& def : deferred_pinned_) {
-    auto pinned_memory_type = TRITONSERVER_MEMORY_CPU_PINNED;
-    int64_t pinned_memory_id = 0;
-    char* pinned_buffer = def.pinned_memory_;
-
-    size_t offset = 0;
-    for (auto& pr : def.responses_) {
-      auto& response = pr.first;
-      auto& response_output = pr.second;
-
-      bool cuda_used = false;
-      RESPOND_AND_SET_NULL_IF_ERROR(
-          response,
-          CopyBuffer(
-              response_output.name_, pinned_memory_type, pinned_memory_id,
-              response_output.memory_type_, response_output.memory_type_id_,
-              response_output.buffer_byte_size_, pinned_buffer + offset,
-              const_cast<void*>(response_output.buffer_), stream_, &cuda_used,
-              copy_on_stream_));
-      need_sync_ |= cuda_used;
-
-      offset += response_output.buffer_byte_size_;
-    }
-  }
-
-#ifdef TRITON_ENABLE_GPU
-  // Record the new event location if deferred copies occur
-  if ((!deferred_pinned_.empty()) && need_sync_ && (event_ != nullptr)) {
-    cudaEventRecord(event_, stream_);
-  }
-#endif  // TRITON_ENABLE_GPU
-  deferred_pinned_.clear();
-
-  return need_sync_;
-}
-
-
-bool
-BackendOutputResponder::SetFixedSizeBuffer(
-    TRITONBACKEND_Response** response, void* response_output_or_state,
-    const std::string& output_name, const size_t tensor_byte_size,
-    const size_t tensor_offset, const char* tensor_buffer,
-    const TRITONSERVER_MemoryType tensor_memory_type,
-    const int64_t tensor_memory_type_id,
-    const TRITONSERVER_MemoryType use_pinned_memory_type, bool state)
-{
-  void* buffer = nullptr;
-  bool cuda_copy = false;
-
-  TRITONSERVER_MemoryType actual_memory_type = tensor_memory_type;
-  int64_t actual_memory_type_id = tensor_memory_type_id;
-
-  if (state) {
-    TRITONBACKEND_State* response_state =
-        reinterpret_cast<TRITONBACKEND_State*>(response_output_or_state);
-    auto err = TRITONBACKEND_StateBuffer(
-        response_state, &buffer, tensor_byte_size, &actual_memory_type,
-        &actual_memory_type_id);
-    if (err != nullptr) {
-      RESPOND_AND_SET_NULL_IF_ERROR(response, err);
-      return cuda_copy;
-    }
-  } else {
-    TRITONBACKEND_Output* response_output =
-        reinterpret_cast<TRITONBACKEND_Output*>(response_output_or_state);
-    auto err = TRITONBACKEND_OutputBuffer(
-        response_output, &buffer, tensor_byte_size, &actual_memory_type,
-        &actual_memory_type_id);
-    if (err != nullptr) {
-      RESPOND_AND_SET_NULL_IF_ERROR(response, err);
-      return cuda_copy;
-    }
-  }
-
-  // If the response buffer matches the memory type that should use an
-  // intermediate pinned memory buffer for the transfer, then just
-  // record the response as pending and increase the size required for
-  // the intermediate pinned buffer.
-  if ((use_pinned_memory_type != TRITONSERVER_MEMORY_CPU_PINNED) &&
-      (actual_memory_type == use_pinned_memory_type)) {
-    if (pending_pinned_byte_size_ == 0) {
-      pending_pinned_offset_ = tensor_offset;
-    }
-
-    pending_pinned_byte_size_ += tensor_byte_size;
-    pending_pinned_outputs_.push_back(std::make_pair(
-        response, OutputData(
-                      output_name, buffer, tensor_byte_size, actual_memory_type,
-                      actual_memory_type_id)));
-  } else {
-    // Direct copy without intermediate pinned memory.
-    bool cuda_used = false;
-    auto err = CopyBuffer(
-        output_name, tensor_memory_type, tensor_memory_type_id,
-        actual_memory_type, actual_memory_type_id, tensor_byte_size,
-        tensor_buffer + tensor_offset, buffer, stream_, &cuda_used,
-        copy_on_stream_);
-    cuda_copy |= cuda_used;
-
-    if (err != nullptr) {
-      RESPOND_AND_SET_NULL_IF_ERROR(response, err);
-      return cuda_copy;
-    }
-  }
-
-  return cuda_copy;
-}
-
-bool
-BackendOutputResponder::FlushPendingPinned(
-    const char* tensor_buffer, const TRITONSERVER_MemoryType tensor_memory_type,
-    const int64_t tensor_memory_type_id)
-{
-  bool cuda_copy = false;
-
-  // Will be copying from CPU->pinned->GPU or GPU->pinned->CPU
-
-  // Attempt to allocate a pinned buffer to use for staging the
-  // copy... if we fail to allocated the pinned buffer then we just
-  // directly go CPU->GPU or GPU->CPU.
-  char* pinned_memory = nullptr;
-  if (pending_pinned_byte_size_ > 0) {
-    TRITONSERVER_Error* err = TRITONBACKEND_MemoryManagerAllocate(
-        memory_manager_, reinterpret_cast<void**>(&pinned_memory),
-        TRITONSERVER_MEMORY_CPU_PINNED, 0 /* memory_type_id */,
-        pending_pinned_byte_size_);
-    if (err != nullptr) {
-      pinned_memory = nullptr;
-      TRITONSERVER_ErrorDelete(err);
-    }
-  }
-
-  // If the pinned buffer wasn't actually allocated then just perform
-  // a direct copy.
-  if (pinned_memory == nullptr) {
-    size_t offset = 0;
-    for (auto& pr : pending_pinned_outputs_) {
-      auto& response = pr.first;
-      auto& response_output = pr.second;
-
-      bool cuda_used = false;
-      RESPOND_AND_SET_NULL_IF_ERROR(
-          response,
-          CopyBuffer(
-              response_output.name_, tensor_memory_type, tensor_memory_type_id,
-              response_output.memory_type_, response_output.memory_type_id_,
-              response_output.buffer_byte_size_,
-              tensor_buffer + pending_pinned_offset_ + offset,
-              const_cast<void*>(response_output.buffer_), stream_, &cuda_used,
-              copy_on_stream_));
-      cuda_copy |= cuda_used;
-
-      offset += response_output.buffer_byte_size_;
-    }
-  }
-  // We have a pinned buffer so do a single copy of a block of tensor
-  // data to the pinned buffer.
-  else {  // pinned_memory_type == TRITONSERVER_MEMORY_CPU_PINNED
-    bool cuda_used = false;
-    auto err = CopyBuffer(
-        "pinned buffer", tensor_memory_type, tensor_memory_type_id,
-        TRITONSERVER_MEMORY_CPU_PINNED, 0 /* memory_type_id */,
-        pending_pinned_byte_size_, tensor_buffer + pending_pinned_offset_,
-        pinned_memory, stream_, &cuda_used, copy_on_stream_);
-    cuda_copy |= cuda_used;
-
-    // If something goes wrong with the copy all the pending
-    // responses fail...
-    if (err != nullptr) {
-      for (auto& pr : pending_pinned_outputs_) {
-        auto& response = pr.first;
-        if (*response != nullptr) {
-          LOG_IF_ERROR(
-              TRITONBACKEND_ResponseSend(
-                  *response, TRITONSERVER_RESPONSE_COMPLETE_FINAL, err),
-              "failed to send TensorFlow error response");
-          *response = nullptr;
-        }
-      }
-      TRITONSERVER_ErrorDelete(err);
-    }
-
-    // If the copy was not async (i.e. if tensor was in CPU so a
-    // CPU->CPU-PINNED copy was performed above), then the pinned
-    // buffer now holds the tensor contents and we can immediately
-    // issue the copies from the pinned buffer to the
-    // responses.
-    //
-    // Otherwise the GPU->CPU-PINNED async copies are in flight and we
-    // simply remember the pinned buffer and the corresponding
-    // response outputs so that we can do the pinned->CPU copies in
-    // finalize after we have waited for all async copies to complete.
-    if (!cuda_used) {
-      size_t offset = 0;
-      for (auto& pr : pending_pinned_outputs_) {
-        auto& response = pr.first;
-        auto& response_output = pr.second;
-
-        bool cuda_used = false;
-        RESPOND_AND_SET_NULL_IF_ERROR(
-            response,
-            CopyBuffer(
-                response_output.name_, TRITONSERVER_MEMORY_CPU_PINNED,
-                0 /* memory_type_id */, response_output.memory_type_,
-                response_output.memory_type_id_,
-                response_output.buffer_byte_size_, pinned_memory + offset,
-                const_cast<void*>(response_output.buffer_), stream_, &cuda_used,
-                copy_on_stream_));
-        cuda_copy |= cuda_used;
-
-        offset += response_output.buffer_byte_size_;
-      }
-    } else {
-      deferred_pinned_.emplace_back(
-          pinned_memory, pending_pinned_byte_size_,
-          std::move(pending_pinned_outputs_));
-    }
-  }
-
-  // Pending pinned copies are handled...
-  pending_pinned_byte_size_ = 0;
-  pending_pinned_offset_ = 0;
-  pending_pinned_outputs_.clear();
-
-  // Need to hold on to the allocated pinned buffer as there are still
-  // copies in flight. Will delete it in finalize.
-  if (pinned_memory != nullptr) {
-    pinned_memories_.push_back(pinned_memory);
-  }
-
-  return cuda_copy;
-}
-
-void
-BackendOutputResponder::ProcessBatchOutput(
-    const std::string& name, const BatchOutput& batch_output,
-    const char* buffer, const TRITONSERVER_MemoryType memory_type,
-    const int64_t memory_type_id)
-{
-  // A value of CPU_PINNED indicates that pinned memory buffer is not
-  // needed for this tensor. Any other value indicates that a pinned
-  // memory buffer is needed when the target memory type matches
-  // 'use_pinned_memory_type'.
-  TRITONSERVER_MemoryType use_pinned_memory_type =
-      TRITONSERVER_MEMORY_CPU_PINNED;
-  if (pinned_enabled_) {
-    use_pinned_memory_type = GetUsePinnedMemoryType(memory_type);
-  }
-
-  // Batch output may be processed differently based on the kind
-  switch (batch_output.BatchOutputKind()) {
-    case BatchOutput::Kind::BATCH_SCATTER_WITH_INPUT_SHAPE: {
-      const auto& output_name = batch_output.TargetNames()[0];
-      const auto& input_name = batch_output.SourceInputs()[0];
-      const auto& datatype = batch_output.DataType();
-      size_t tensor_offset = 0;
-
-      for (size_t idx = 0; idx < responses_->size(); idx++) {
-        auto& request = requests_[idx];
-        auto& response = (*responses_)[idx];
-
-        // If then pending copies are from tensor buffer that is not
-        // contiguous with 'response's part of that buffer, then need to
-        // go ahead and perform the pending copies so that can start a
-        // new contiguous region if necessary.
-        if ((pending_pinned_byte_size_ > 0) &&
-            (tensor_offset !=
-             (pending_pinned_byte_size_ + pending_pinned_offset_))) {
-          need_sync_ |= FlushPendingPinned(buffer, memory_type, memory_type_id);
-        }
-
-        // Override shape to be correct for this response, with a naive
-        // assumption that the dynamic dimension in output is mapped to the same
-        // dimension in the input
-        auto output_batchn_shape = batch_output.OutputShape();
-        {
-          TRITONBACKEND_Input* input;
-          TRITONBACKEND_RequestInput(request, input_name.c_str(), &input);
-          const int64_t* shape;
-          TRITONBACKEND_InputProperties(
-              input, nullptr, nullptr, &shape, nullptr, nullptr, nullptr);
-          for (size_t dim_idx = 0; dim_idx < output_batchn_shape.size();
-               dim_idx++) {
-            if (output_batchn_shape[dim_idx] == -1) {
-              output_batchn_shape[dim_idx] = shape[dim_idx];
-            }
-          }
-        }
-
-        const size_t tensor_byte_size =
-            GetByteSize(datatype, output_batchn_shape);
-
-        TRITONBACKEND_Output* response_output;
-        if (response != nullptr) {
-          uint32_t output_count;
-          RESPOND_AND_SET_NULL_IF_ERROR(
-              &response,
-              TRITONBACKEND_RequestOutputCount(request, &output_count));
-          if (response != nullptr) {
-            for (uint32_t output_idx = 0; output_idx < output_count;
-                 output_idx++) {
-              const char* name;
-              RESPOND_AND_SET_NULL_IF_ERROR(
-                  &response,
-                  TRITONBACKEND_RequestOutputName(request, output_idx, &name));
-              if ((response != nullptr) && (output_name == name)) {
-                RESPOND_AND_SET_NULL_IF_ERROR(
-                    &response, TRITONBACKEND_ResponseOutput(
-                                   response, &response_output, name, datatype,
-                                   output_batchn_shape.data(),
-                                   output_batchn_shape.size()));
-                if (response != nullptr) {
-                  need_sync_ |= SetFixedSizeBuffer(
-                      &response, response_output, output_name, tensor_byte_size,
-                      tensor_offset, buffer, memory_type, memory_type_id,
-                      use_pinned_memory_type, false /* state */);
-                }
-
-                break;
-              }
-            }
-          }
-        }
-
-        tensor_offset += tensor_byte_size;
-      }
-      break;
-    }
-  }
-
-  // Done with the tensor, flush any pending pinned copies.
-  need_sync_ |= FlushPendingPinned(buffer, memory_type, memory_type_id);
-#ifdef TRITON_ENABLE_GPU
-  if (need_sync_ && (event_ != nullptr)) {
-    cudaEventRecord(event_, stream_);
-  }
-#endif  // TRITON_ENABLE_GPU
-}
-
-}}  // namespace triton::backend
diff --git a/3rdparty/backend-r22.12/src/kernel.cu b/3rdparty/backend-r22.12/src/kernel.cu
deleted file mode 100644
index 9f24dd0bdd0bf46b3573c2ad52b9bcb873dc0b6a..0000000000000000000000000000000000000000
--- a/3rdparty/backend-r22.12/src/kernel.cu
+++ /dev/null
@@ -1,81 +0,0 @@
-// Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "kernel.h"
-
-#include <cuda.h>
-
-#define THREADBLOCK_SIZE 512
-__launch_bounds__(THREADBLOCK_SIZE) __global__ void TritonGatherKernel(
-    const int8_t** __restrict input_ptr_buffer,
-    const size_t* __restrict byte_size_buffer,
-    const size_t* __restrict byte_size_offset_buffer,
-    int8_t* __restrict output_buffer)
-{
-  int request_idx = blockIdx.x;
-  int lane_id = threadIdx.x;
-  const int8_t* request_input_buffer = input_ptr_buffer[request_idx];
-  int byte_size = byte_size_buffer[request_idx];
-  int byte_size_offset = byte_size_offset_buffer[request_idx];
-
-  int8_t* output_buffer_with_offset = output_buffer + byte_size_offset;
-  if (((byte_size % 4) == 0) && (((uint64_t)request_input_buffer % 4) == 0) &&
-      (((uint64_t)output_buffer_with_offset % 4) == 0)) {
-    int32_t* input_4 = (int32_t*)request_input_buffer;
-    int32_t* output_4 = (int32_t*)output_buffer_with_offset;
-    int element_count = byte_size / 4;
-    for (int elem_id = lane_id; elem_id < element_count;
-         elem_id += THREADBLOCK_SIZE) {
-      output_4[elem_id] = input_4[elem_id];
-    }
-  } else {
-    for (int elem_id = lane_id; elem_id < byte_size;
-         elem_id += THREADBLOCK_SIZE) {
-      output_buffer_with_offset[elem_id] =
-          __ldg(request_input_buffer + elem_id);
-    }
-  }
-}
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-cudaError_t
-RunGatherKernel(
-    const int8_t** input_ptr_buffer, const size_t* byte_size_buffer,
-    const size_t* byte_size_offset_buffer, int8_t* output_buffer,
-    size_t request_count, cudaStream_t stream)
-{
-  TritonGatherKernel<<<request_count, THREADBLOCK_SIZE, 0, stream>>>(
-      input_ptr_buffer, byte_size_buffer, byte_size_offset_buffer,
-      output_buffer);
-  return cudaGetLastError();
-}
-
-#ifdef __cplusplus
-}
-#endif
diff --git a/3rdparty/backend-r22.12/src/kernel.h b/3rdparty/backend-r22.12/src/kernel.h
deleted file mode 100644
index 948d3051e472d53c3081cffbebe450a7e415f430..0000000000000000000000000000000000000000
--- a/3rdparty/backend-r22.12/src/kernel.h
+++ /dev/null
@@ -1,42 +0,0 @@
-// Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#pragma once
-#include <cuda_runtime_api.h>
-#include <stdint.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-cudaError_t RunGatherKernel(
-    const int8_t** input_ptr_buffer, const size_t* byte_size_buffer,
-    const size_t* byte_size_offset_buffer, int8_t* output_buffer,
-    size_t request_count, cudaStream_t stream);
-
-#ifdef __cplusplus
-}
-#endif
diff --git a/3rdparty/common-r22.12/.clang-format b/3rdparty/common-r22.12/.clang-format
deleted file mode 100644
index 98c649734c29e0b1d134dae65be9bc08a14b4ba5..0000000000000000000000000000000000000000
--- a/3rdparty/common-r22.12/.clang-format
+++ /dev/null
@@ -1,37 +0,0 @@
----
-BasedOnStyle: Google
-
-IndentWidth: 2
-ContinuationIndentWidth: 4
-UseTab: Never
-MaxEmptyLinesToKeep: 2
-
-SortIncludes: true
-CompactNamespaces: true
-ReflowComments: true
-
-DerivePointerAlignment: false
-PointerAlignment: Left
-
-AllowShortIfStatementsOnASingleLine: false
-AllowShortBlocksOnASingleLine: false
-AllowShortFunctionsOnASingleLine: Inline
-
-AlwaysBreakAfterReturnType: TopLevelDefinitions
-AlignAfterOpenBracket: AlwaysBreak
-BreakBeforeBraces: Custom
-BraceWrapping:
-  AfterClass: false
-  AfterControlStatement: false
-  AfterEnum: false
-  AfterFunction: true
-  AfterNamespace: false
-  AfterStruct: false
-  AfterUnion: false
-  BeforeCatch: true
-
-BinPackArguments: true
-BinPackParameters: true
-ConstructorInitializerAllOnOneLineOrOnePerLine: false
-
-IndentCaseLabels: true
\ No newline at end of file
diff --git a/3rdparty/common-r22.12/.gitignore b/3rdparty/common-r22.12/.gitignore
deleted file mode 100644
index 0e9f099a2eef4742716637e3cce3a45f7053b021..0000000000000000000000000000000000000000
--- a/3rdparty/common-r22.12/.gitignore
+++ /dev/null
@@ -1,3 +0,0 @@
-/build
-/.vscode
-*.so
diff --git a/3rdparty/common-r22.12/CMakeLists.txt b/3rdparty/common-r22.12/CMakeLists.txt
deleted file mode 100644
index a9c9f4c238a81374d0f3831197119815f86bfe22..0000000000000000000000000000000000000000
--- a/3rdparty/common-r22.12/CMakeLists.txt
+++ /dev/null
@@ -1,431 +0,0 @@
-# Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#cmake_minimum_required(VERSION 3.17)
-cmake_minimum_required(VERSION 3.16)
-project(tritoncommon LANGUAGES C CXX)
-
-#
-# Options
-#
-# Some components are expensive to build and have extensive
-# dependencies, so those parts of the build must be enabled
-# explicitly.
-option(TRITON_COMMON_ENABLE_PROTOBUF "Build protobuf artifacts" OFF)
-option(TRITON_COMMON_ENABLE_PROTOBUF_PYTHON "Build protobuf artifacts for python" ON)
-option(TRITON_COMMON_ENABLE_GRPC "Build grpc artifacts" OFF)
-option(TRITON_COMMON_ENABLE_JSON "Build json-related libs" ON)
-#option(TRITON_COMMON_ENABLE_JSON "Build json-related libs" OFF)
-
-if(TRITON_COMMON_ENABLE_JSON)
-  find_package(RapidJSON CONFIG REQUIRED)
-  message(STATUS "RapidJSON found. Headers: ${RAPIDJSON_INCLUDE_DIRS}")
-endif()
-
-set(THREADS_PREFER_PTHREAD_FLAG TRUE)
-find_package(Threads REQUIRED)
-
-if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
-  message("Using MSVC as compiler, default target on Windows 10. "
-		  "If the target system is not Windows 10, please update _WIN32_WINNT "
-		  "to corresponding value.")
-endif()
-
-add_library(common-compile-settings INTERFACE)
-
-target_compile_features(common-compile-settings INTERFACE cxx_std_11)
-
-target_compile_options(common-compile-settings INTERFACE
-  $<$<OR:$<CXX_COMPILER_ID:Clang>,$<CXX_COMPILER_ID:AppleClang>,$<CXX_COMPILER_ID:GNU>>:
-    -Wall -Wextra -Wno-unused-parameter -Wno-type-limits -Werror>
-  $<$<CXX_COMPILER_ID:MSVC>:/W0 /D_WIN32_WINNT=0x0A00 /EHsc>
-)
-
-#
-# Error
-#
-add_library(
-  triton-common-error
-  src/error.cc
-  )
-
-add_library(
-  TritonCommon::triton-common-error ALIAS triton-common-error
-)
-
-target_include_directories(
-  triton-common-error
-  PUBLIC
-    $<INSTALL_INTERFACE:include>
-    $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
-  PRIVATE
-    ${CMAKE_CURRENT_SOURCE_DIR}/src
-)
-
-target_link_libraries(triton-common-error PRIVATE common-compile-settings)
-
-#
-# Logging
-#
-add_library(
-  triton-common-logging
-  src/logging.cc
-)
-
-add_library(
-  TritonCommon::triton-common-logging ALIAS triton-common-logging
-)
-
-target_include_directories(
-  triton-common-logging
-  PUBLIC
-    $<INSTALL_INTERFACE:include>
-    $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
-  PRIVATE
-    ${CMAKE_CURRENT_SOURCE_DIR}/src
-)
-
-if(${TRITON_ENABLE_LOGGING})
-  target_compile_definitions(
-    triton-common-logging
-    PRIVATE TRITON_ENABLE_LOGGING=1
-  )
-endif() # TRITON_ENABLE_LOGGING
-
-target_link_libraries(triton-common-logging PRIVATE common-compile-settings)
-
-#
-# SyncQueue
-#
-add_library(
-  triton-common-sync-queue INTERFACE
-)
-
-add_library(
-  TritonCommon::triton-common-sync-queue ALIAS triton-common-sync-queue
-)
-
-target_include_directories(
-  triton-common-sync-queue
-  INTERFACE
-    $<INSTALL_INTERFACE:include>
-    $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
-)
-
-#
-# Async Work Queue
-#
-add_library(
-  triton-common-async-work-queue
-  src/async_work_queue.cc
-  src/error.cc
-  src/thread_pool.cc
-)
-
-add_library(
-  TritonCommon::triton-common-async-work-queue ALIAS  triton-common-async-work-queue
-)
-
-target_include_directories(
-  triton-common-async-work-queue
-  PUBLIC
-    $<INSTALL_INTERFACE:include>
-    $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
-  PRIVATE
-    ${CMAKE_CURRENT_SOURCE_DIR}/src
-)
-
-target_link_libraries(triton-common-async-work-queue
-  PUBLIC
-    Threads::Threads
-  PRIVATE
-    common-compile-settings
-)
-
-#
-# Thread Pool 
-#
-add_library(
-  triton-common-thread-pool
-  src/thread_pool.cc
-)
-
-add_library(
-  TritonCommon::triton-common-thread-pool ALIAS  triton-common-thread-pool
-)
-
-target_include_directories(
-  triton-common-thread-pool
-  PUBLIC
-    $<INSTALL_INTERFACE:include>
-    $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
-  PRIVATE
-    ${CMAKE_CURRENT_SOURCE_DIR}/src
-)
-
-target_link_libraries(triton-common-thread-pool
-  PUBLIC
-    Threads::Threads
-  PRIVATE
-    common-compile-settings
-)
-
-#
-# JSON utilities
-#
-if(TRITON_COMMON_ENABLE_JSON)
-  add_library(
-    triton-common-json INTERFACE
-  )
-
-  add_library(
-    TritonCommon::triton-common-json ALIAS triton-common-json
-  )
-
-  target_include_directories(
-    triton-common-json
-    INTERFACE
-      $<INSTALL_INTERFACE:include>
-      $<INSTALL_INTERFACE:${RAPIDJSON_INCLUDE_DIRS}>
-      $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
-      $<BUILD_INTERFACE:${RAPIDJSON_INCLUDE_DIRS}>
-  )
-endif()
-
-#
-# Table Printer
-#
-add_library(
-  triton-common-table-printer
-  src/table_printer.cc
-)
-
-add_library(
-  TritonBackend::triton-common-table-printer ALIAS triton-common-table-printer
-)
-
-target_include_directories(
-  triton-common-table-printer
-  PUBLIC
-    $<INSTALL_INTERFACE:include>
-    $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
-  PRIVATE
-    ${CMAKE_CURRENT_SOURCE_DIR}/src
-)
-
-target_link_libraries(triton-common-table-printer PRIVATE common-compile-settings)
-
-set_target_properties(
-  triton-common-async-work-queue
-  triton-common-error
-  triton-common-logging
-  triton-common-table-printer
-  triton-common-thread-pool
-  PROPERTIES
-    WINDOWS_EXPORT_ALL_SYMBOLS TRUE
-    POSITION_INDEPENDENT_CODE ON
-)
-
-set_target_properties(
-  triton-common-async-work-queue
-  PROPERTIES
-    OUTPUT_NAME tritonasyncworkqueue
-)
-
-set_target_properties(
-  triton-common-thread-pool
-  PROPERTIES
-    OUTPUT_NAME tritonthreadpool
-)
-
-set_target_properties(
-  triton-common-error
-  PROPERTIES
-    OUTPUT_NAME tritoncommonerror
-)
-
-set_target_properties(
-  triton-common-logging
-  PROPERTIES
-    OUTPUT_NAME tritoncommonlogging
-)
-
-set_target_properties(
-  triton-common-table-printer
-  PROPERTIES
-    OUTPUT_NAME tritontableprinter
-)
-
-#
-# Protobuf and GRPC artifacts
-#
-if(${TRITON_COMMON_ENABLE_PROTOBUF} OR ${TRITON_COMMON_ENABLE_GRPC})
-  add_subdirectory(protobuf)
-
-  set(protobuf_MODULE_COMPATIBLE TRUE CACHE BOOL "protobuf_MODULE_COMPATIBLE" FORCE)
-  find_package(Protobuf CONFIG REQUIRED)
-  message(STATUS "Using protobuf ${Protobuf_VERSION}")
-
-  #
-  # Model Config (depends on protobuf & generated .pb.h file)
-  #
-  add_library(
-    triton-common-model-config
-    src/model_config.cc
-    )
-
-  add_library(
-    TritonCommon::triton-common-model-config ALIAS triton-common-model-config
-  )
-
-  target_include_directories(
-    triton-common-model-config
-    PUBLIC
-      $<INSTALL_INTERFACE:include>
-      $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
-    PRIVATE
-      ${CMAKE_CURRENT_SOURCE_DIR}/src
-      ${Protobuf_INCLUDE_DIRS}
-  )
-
-  target_link_libraries(
-    triton-common-model-config
-    PRIVATE
-      common-compile-settings
-      protobuf::libprotobuf
-      proto-library
-  )
-
-  set_target_properties(
-    triton-common-model-config
-    PROPERTIES
-      WINDOWS_EXPORT_ALL_SYMBOLS TRUE
-      POSITION_INDEPENDENT_CODE ON
-      OUTPUT_NAME tritoncommonmodelconfig
-  )
-
-endif()
-
-#
-# Install
-#
-include(GNUInstallDirs)
-set(INSTALL_CONFIGDIR ${CMAKE_INSTALL_LIBDIR}/cmake/TritonCommon)
-
-install(
-  TARGETS
-    triton-common-async-work-queue
-    triton-common-error
-    triton-common-logging
-    triton-common-sync-queue
-    triton-common-table-printer
-    triton-common-thread-pool
-    common-compile-settings
-  EXPORT
-    triton-common-targets
-  LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
-  ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
-)
-
-if(TRITON_COMMON_ENABLE_JSON)
-  install(
-    TARGETS
-      triton-common-json
-    EXPORT
-      triton-common-targets
-    LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
-    ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
-  )
-endif()
-
-if(${TRITON_COMMON_ENABLE_GRPC} OR ${TRITON_COMMON_ENABLE_PROTOBUF})
-  install(
-    TARGETS
-      proto-library
-      triton-common-model-config
-#      proto-py-library
-    EXPORT
-      triton-common-targets
-    LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
-    ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
-  )
-endif()
-
-if(${TRITON_COMMON_ENABLE_GRPC})
-  install(
-    TARGETS
-      grpc-service-library
-#      grpc-service-py-library
-    EXPORT
-      triton-common-targets
-    LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
-    ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
-  )
-endif()
-
-install(
-  DIRECTORY include/
-  DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
-)
-
-install(
-  EXPORT
-    triton-common-targets
-  FILE
-    TritonCommonTargets.cmake
-  NAMESPACE
-    TritonCommon::
-  DESTINATION
-    ${INSTALL_CONFIGDIR}
-)
-
-include(CMakePackageConfigHelpers)
-configure_package_config_file(
-  ${CMAKE_CURRENT_LIST_DIR}/cmake/TritonCommonConfig.cmake.in
-  ${CMAKE_CURRENT_BINARY_DIR}/TritonCommonConfig.cmake
-  INSTALL_DESTINATION ${INSTALL_CONFIGDIR}
-)
-
-install(
-  FILES
-    ${CMAKE_CURRENT_BINARY_DIR}/TritonCommonConfig.cmake
-  DESTINATION
-    ${INSTALL_CONFIGDIR}
-)
-
-#
-# Export from build tree
-#
-export(
-  EXPORT
-    triton-common-targets
-  FILE
-    ${CMAKE_CURRENT_BINARY_DIR}/TritonCommonTargets.cmake
-  NAMESPACE
-    TritonCommon::
-)
-
-export(PACKAGE TritonCommon)
diff --git a/3rdparty/common-r22.12/LICENSE b/3rdparty/common-r22.12/LICENSE
deleted file mode 100644
index a6bd4f2f5b3ecd75917ae39f06e6b5521c414491..0000000000000000000000000000000000000000
--- a/3rdparty/common-r22.12/LICENSE
+++ /dev/null
@@ -1,25 +0,0 @@
-Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions
-are met:
- * Redistributions of source code must retain the above copyright
-   notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright
-   notice, this list of conditions and the following disclaimer in the
-   documentation and/or other materials provided with the distribution.
- * Neither the name of NVIDIA CORPORATION nor the names of its
-   contributors may be used to endorse or promote products derived
-   from this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/3rdparty/common-r22.12/README.md b/3rdparty/common-r22.12/README.md
deleted file mode 100644
index 96e9e7bc68b80f9fec1e27b8da3c935fe9b6344b..0000000000000000000000000000000000000000
--- a/3rdparty/common-r22.12/README.md
+++ /dev/null
@@ -1,51 +0,0 @@
-<!--
-# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--->
-
-[![License](https://img.shields.io/badge/License-BSD3-lightgrey.svg)](https://opensource.org/licenses/BSD-3-Clause)
-
-# Triton Inference Server Common
-
-Common source, scripts and utilities shared across all Triton
-repositories.
-
-This repo is not typically built directly but is instead included in
-the build of other repos. To build directly first install the required
-dependencies.
-
-```
-$ apt-get install rapidjson-dev
-```
-
-Use cmake 3.17 or later to build and install in a local directory.
-
-```
-$ mkdir build
-$ cd build
-$ cmake -DCMAKE_INSTALL_PREFIX:PATH=`pwd`/install ..
-$ make install
-```
diff --git a/3rdparty/common-r22.12/cmake/TritonCommonConfig.cmake.in b/3rdparty/common-r22.12/cmake/TritonCommonConfig.cmake.in
deleted file mode 100644
index 56cb1461c00366eb321bc4696eac157ee4f43bf9..0000000000000000000000000000000000000000
--- a/3rdparty/common-r22.12/cmake/TritonCommonConfig.cmake.in
+++ /dev/null
@@ -1,51 +0,0 @@
-# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-@PACKAGE_INIT@
-
-set_and_check(TRITONCOMMON_CMAKE_DIR "${CMAKE_CURRENT_LIST_DIR}")
-
-list(APPEND CMAKE_MODULE_PATH ${TRITONCOMMON_CMAKE_DIR})
-
-include(CMakeFindDependencyMacro)
-find_dependency(Threads)
-
-if(NOT TARGET TritonCommon::triton-common-json)
-  include("${TRITONCOMMON_CMAKE_DIR}/TritonCommonTargets.cmake")
-endif()
-
-check_required_components(triton-common-json
-  triton-common-sync-queue
-  triton-common-async-work-queue
-  triton-common-thread-pool
-)
-
-set(TRITONCOMMON_LIBRARIES 
-  TritonCommon::triton-common-json
-  TritonCommon::triton-common-sync-queue
-  TritonCommon::triton-common-async-work-queue
-  TritonCommon::triton-common-thread-pool
-)
diff --git a/3rdparty/common-r22.12/include/triton/common/async_work_queue.h b/3rdparty/common-r22.12/include/triton/common/async_work_queue.h
deleted file mode 100644
index 40afe7bb1af573429eaf0285deab77e5caa0a74a..0000000000000000000000000000000000000000
--- a/3rdparty/common-r22.12/include/triton/common/async_work_queue.h
+++ /dev/null
@@ -1,59 +0,0 @@
-// Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include "error.h"
-#include "thread_pool.h"
-
-namespace triton { namespace common {
-// Manager for asynchronous worker threads. Use to accelerate copies and
-// other such operations by running them in parallel.
-// Call Initialize to start the worker threads (once) and AddTask to tasks to
-// the queue.
-
-class AsyncWorkQueue {
- public:
-  // Start 'worker_count' number of worker threads.
-  static Error Initialize(size_t worker_count);
-
-  // Get the number of worker threads.
-  static size_t WorkerCount();
-
-  // Add a 'task' to the queue. The function will take ownership of 'task'.
-  // Therefore std::move should be used when calling AddTask.
-  static Error AddTask(std::function<void(void)>&& task);
-
- protected:
-  static void Reset();
-
- private:
-  AsyncWorkQueue() = default;
-  ~AsyncWorkQueue();
-  static AsyncWorkQueue* GetSingleton();
-  std::unique_ptr<ThreadPool> thread_pool_;
-};
-
-}}  // namespace triton::common
diff --git a/3rdparty/common-r22.12/include/triton/common/error.h b/3rdparty/common-r22.12/include/triton/common/error.h
deleted file mode 100644
index cf8d30896ddcebd884f9d5b6f82ecbee31c95319..0000000000000000000000000000000000000000
--- a/3rdparty/common-r22.12/include/triton/common/error.h
+++ /dev/null
@@ -1,78 +0,0 @@
-// Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include <string>
-
-namespace triton { namespace common {
-
-//
-// Error
-//
-// Error returned by utilities from common repo.
-//
-class Error {
- public:
-  enum class Code {
-    SUCCESS,
-    UNKNOWN,
-    INTERNAL,
-    NOT_FOUND,
-    INVALID_ARG,
-    UNAVAILABLE,
-    UNSUPPORTED,
-    ALREADY_EXISTS
-  };
-
-  explicit Error(Code code = Code::SUCCESS) : code_(code) {}
-  explicit Error(Code code, const std::string& msg) : code_(code), msg_(msg) {}
-
-  // Convenience "success" value. Can be used as Error::Success to
-  // indicate no error.
-  static const Error Success;
-
-  // Return the code for this status.
-  Code ErrorCode() const { return code_; }
-
-  // Return the message for this status.
-  const std::string& Message() const { return msg_; }
-
-  // Return true if this status indicates "ok"/"success", false if
-  // status indicates some kind of failure.
-  bool IsOk() const { return code_ == Code::SUCCESS; }
-
-  // Return the status as a string.
-  std::string AsString() const;
-
-  // Return the constant string name for a code.
-  static const char* CodeString(const Code code);
-
- protected:
-  Code code_;
-  std::string msg_;
-};
-
-}}  // namespace triton::common
diff --git a/3rdparty/common-r22.12/include/triton/common/logging.h b/3rdparty/common-r22.12/include/triton/common/logging.h
deleted file mode 100644
index a52c0c1918558c5e75e0d90ad8ea051d3a23bf93..0000000000000000000000000000000000000000
--- a/3rdparty/common-r22.12/include/triton/common/logging.h
+++ /dev/null
@@ -1,229 +0,0 @@
-// Copyright 2018-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include <mutex>
-#include <sstream>
-#include <string>
-#include <vector>
-#include <cerrno>
-#include <cstring>
-#include <fstream>
-
-namespace triton { namespace common {
-
-// A log message.
-class LogMessage {
- public:
-  // Log levels.
-  enum Level { kERROR = 0, kWARNING = 1, kINFO = 2 };
-
-  LogMessage(const char* file, int line, uint32_t level);
-  ~LogMessage();
-
-  std::stringstream& stream() { return stream_; }
-
- private:
-  static const std::vector<char> level_name_;
-  std::stringstream stream_;
-};
-
-// Global logger for messages. Controls how log messages are reported.
-class Logger {
- public:
-  enum class Format { kDEFAULT, kISO8601 };
-
-  Logger();
-
-  // Is a log level enabled.
-  bool IsEnabled(LogMessage::Level level) const { return enables_[level]; }
-
-  // Set enable for a log Level.
-  void SetEnabled(LogMessage::Level level, bool enable)
-  {
-    enables_[level] = enable;
-  }
-
-  // Get the current verbose logging level.
-  uint32_t VerboseLevel() const { return vlevel_; }
-
-  // Set the current verbose logging level.
-  void SetVerboseLevel(uint32_t vlevel) { vlevel_ = vlevel; }
-
-  // Get the logging format.
-  Format LogFormat() { return format_; }
-
-  // Get the logging format as a string.
-  std::string LogFormatString()
-  {
-    switch (format_) {
-      case Format::kISO8601:
-        return "ISO8601";
-      case Format::kDEFAULT:
-        return "default";
-      default:
-        return "Invalid format";
-    }
-  }
-
-  // Set the logging format.
-  void SetLogFormat(Format format) { format_ = format; }
-
-  // Get the log output file name.
-  const std::string& LogFile() { return filename_; }
-
-  // Set the log output file. Returns an empty string upon
-  // success, else returns an error string.
-  const std::string SetLogFile(const std::string& filename)
-  {
-    const std::lock_guard<std::mutex> lock(mutex_);
-    file_stream_.close();
-    std::string revert_name(filename_);
-    filename_ = filename;
-    if (!filename_.empty()) {
-      file_stream_.open(filename_, std::ios::app);
-      if (file_stream_.fail()) {
-        std::stringstream error;
-        error << __FILE__ << " " << __LINE__
-              << ": Failed to open log file: " << std::strerror(errno)
-              << std::endl;
-        filename_ = revert_name;
-        file_stream_.open(filename_, std::ios::app);
-        return error.str();
-      }
-    }
-    // will return an empty string
-    return std::string();
-  }
-
-  // Log a message.
-  void Log(const std::string& msg);
-
-  // Flush the log.
-  void Flush();
-
- private:
-  std::vector<bool> enables_;
-  uint32_t vlevel_;
-  Format format_;
-  std::mutex mutex_;
-  std::string filename_;
-  std::ofstream file_stream_;
-};
-
-extern Logger gLogger_;
-
-#define LOG_ENABLE_INFO(E)             \
-  triton::common::gLogger_.SetEnabled( \
-      triton::common::LogMessage::Level::kINFO, (E))
-#define LOG_ENABLE_WARNING(E)          \
-  triton::common::gLogger_.SetEnabled( \
-      triton::common::LogMessage::Level::kWARNING, (E))
-#define LOG_ENABLE_ERROR(E)            \
-  triton::common::gLogger_.SetEnabled( \
-      triton::common::LogMessage::Level::kERROR, (E))
-#define LOG_SET_VERBOSE(L)                  \
-  triton::common::gLogger_.SetVerboseLevel( \
-      static_cast<uint32_t>(std::max(0, (L))))
-#define LOG_SET_OUT_FILE(FN) triton::common::gLogger_.SetLogFile((FN))
-#define LOG_SET_FORMAT(F) triton::common::gLogger_.SetLogFormat((F))
-
-#define LOG_VERBOSE_LEVEL triton::common::gLogger_.VerboseLevel()
-#define LOG_FORMAT triton::common::gLogger_.LogFormat()
-#define LOG_FORMAT_STRING triton::common::gLogger_.LogFormatString()
-#define LOG_FILE triton::common::gLogger_.LogFile()
-
-#ifdef TRITON_ENABLE_LOGGING
-
-#define LOG_INFO_IS_ON \
-  triton::common::gLogger_.IsEnabled(triton::common::LogMessage::Level::kINFO)
-#define LOG_WARNING_IS_ON             \
-  triton::common::gLogger_.IsEnabled( \
-      triton::common::LogMessage::Level::kWARNING)
-#define LOG_ERROR_IS_ON \
-  triton::common::gLogger_.IsEnabled(triton::common::LogMessage::Level::kERROR)
-#define LOG_VERBOSE_IS_ON(L) (triton::common::gLogger_.VerboseLevel() >= (L))
-
-#else
-
-// If logging is disabled, define macro to be false to avoid further evaluation
-#define LOG_INFO_IS_ON false
-#define LOG_WARNING_IS_ON false
-#define LOG_ERROR_IS_ON false
-#define LOG_VERBOSE_IS_ON(L) false
-
-#endif  // TRITON_ENABLE_LOGGING
-
-// Macros that use explicitly given filename and line number.
-#define LOG_INFO_FL(FN, LN)                                      \
-  if (LOG_INFO_IS_ON)                                            \
-  triton::common::LogMessage(                                    \
-      (char*)(FN), LN, triton::common::LogMessage::Level::kINFO) \
-      .stream()
-#define LOG_WARNING_FL(FN, LN)                                      \
-  if (LOG_WARNING_IS_ON)                                            \
-  triton::common::LogMessage(                                       \
-      (char*)(FN), LN, triton::common::LogMessage::Level::kWARNING) \
-      .stream()
-#define LOG_ERROR_FL(FN, LN)                                      \
-  if (LOG_ERROR_IS_ON)                                            \
-  triton::common::LogMessage(                                     \
-      (char*)(FN), LN, triton::common::LogMessage::Level::kERROR) \
-      .stream()
-#define LOG_VERBOSE_FL(L, FN, LN)                                \
-  if (LOG_VERBOSE_IS_ON(L))                                      \
-  triton::common::LogMessage(                                    \
-      (char*)(FN), LN, triton::common::LogMessage::Level::kINFO) \
-      .stream()
-
-// Macros that use current filename and line number.
-#define LOG_INFO LOG_INFO_FL(__FILE__, __LINE__)
-#define LOG_WARNING LOG_WARNING_FL(__FILE__, __LINE__)
-#define LOG_ERROR LOG_ERROR_FL(__FILE__, __LINE__)
-#define LOG_VERBOSE(L) LOG_VERBOSE_FL(L, __FILE__, __LINE__)
-
-
-#define LOG_STATUS_ERROR(X, MSG)                         \
-  do {                                                   \
-    const Status& status__ = (X);                        \
-    if (!status__.IsOk()) {                              \
-      LOG_ERROR << (MSG) << ": " << status__.AsString(); \
-    }                                                    \
-  } while (false)
-
-#define LOG_TRITONSERVER_ERROR(X, MSG)                                  \
-  do {                                                                  \
-    TRITONSERVER_Error* err__ = (X);                                    \
-    if (err__ != nullptr) {                                             \
-      LOG_ERROR << (MSG) << ": " << TRITONSERVER_ErrorCodeString(err__) \
-                << " - " << TRITONSERVER_ErrorMessage(err__);           \
-      TRITONSERVER_ErrorDelete(err__);                                  \
-    }                                                                   \
-  } while (false)
-
-#define LOG_FLUSH triton::common::gLogger_.Flush()
-
-}}  // namespace triton::common
diff --git a/3rdparty/common-r22.12/include/triton/common/model_config.h b/3rdparty/common-r22.12/include/triton/common/model_config.h
deleted file mode 100644
index 468f678cb6676083f2669c67534eb11e9787673e..0000000000000000000000000000000000000000
--- a/3rdparty/common-r22.12/include/triton/common/model_config.h
+++ /dev/null
@@ -1,243 +0,0 @@
-// Copyright 2018-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include <google/protobuf/any.pb.h>
-#include <stdint.h>
-#include "model_config.pb.h"
-
-namespace triton { namespace common {
-
-/// The type for a repeated dims field (used for shape).
-using DimsList = ::google::protobuf::RepeatedField<::google::protobuf::int64>;
-
-/// The type for the metric_tags map.
-using MetricTagsMap = ::google::protobuf::Map<std::string, std::string>;
-
-// Map from a host policy name to <setting, value> map of cmdline
-// settings for the host policy.
-using HostPolicyCmdlineConfig = std::map<std::string, std::string>;
-using HostPolicyCmdlineConfigMap =
-    std::unordered_map<std::string, HostPolicyCmdlineConfig>;
-
-// Map from backend name to list of setting=value pairs of cmdline
-// settings for the backend.
-using BackendCmdlineConfig = std::vector<std::pair<std::string, std::string>>;
-using BackendCmdlineConfigMap =
-    std::unordered_map<std::string, BackendCmdlineConfig>;
-
-/// The value for a dimension in a shape that indicates that that
-/// dimension can take on any size.
-constexpr int WILDCARD_DIM = -1;
-
-constexpr int SCHEDULER_DEFAULT_NICE = 5;
-
-/// Enumeration for the different platform types.
-enum Platform {
-  PLATFORM_UNKNOWN = 0,
-  PLATFORM_TENSORRT_PLAN = 1,
-  PLATFORM_TENSORFLOW_GRAPHDEF = 2,
-  PLATFORM_TENSORFLOW_SAVEDMODEL = 3,
-  PLATFORM_ENSEMBLE = 4,
-  PLATFORM_ONNXRUNTIME_ONNX = 5,
-  PLATFORM_PYTORCH_LIBTORCH = 6
-};
-
-/// Get the number of elements in a shape.
-/// \param dims The shape.
-/// \return The number of elements, or -1 if the number of elements
-/// cannot be determined because the shape contains one or more
-/// wilcard dimensions.
-int64_t GetElementCount(const DimsList& dims);
-
-/// Get the number of elements in a shape.
-/// \param dims The shape.
-/// \return The number of elements, or -1 if the number of elements
-/// cannot be determined because the shape contains one or more
-/// wilcard dimensions.
-int64_t GetElementCount(const std::vector<int64_t>& dims);
-
-/// Get the number of elements in the shape of a model input.
-/// \param mio The model input.
-/// \return The number of elements, or -1 if the number of elements
-/// cannot be determined because the shape contains one or more
-/// wilcard dimensions.
-int64_t GetElementCount(const inference::ModelInput& mio);
-
-/// Get the number of elements in the shape of a model output.
-/// \param mio The model output.
-/// \return The number of elements, or -1 if the number of elements
-/// cannot be determined because the shape contains one or more
-/// wilcard dimensions.
-int64_t GetElementCount(const inference::ModelOutput& mio);
-
-/// Are values of a datatype fixed-size, or variable-sized.
-/// \param dtype The data-type.
-/// \return True if datatype values are fixed-sized, false if
-/// variable-sized.
-bool IsFixedSizeDataType(const inference::DataType dtype);
-
-/// Get the size of objects of a given datatype in bytes.
-/// \param dtype The data-type.
-/// \return The size, in bytes, of objects of the datatype, or 0 if
-/// size cannot be determine (for example, values of type TYPE_STRING
-/// have variable length and so size cannot be determine just from the
-/// type).
-size_t GetDataTypeByteSize(const inference::DataType dtype);
-
-/// Get the size, in bytes, of a tensor based on datatype and
-/// shape.
-/// \param dtype The data-type.
-/// \param dims The shape.
-/// \return The size, in bytes, of the corresponding tensor, or -1 if
-/// unable to determine the size.
-int64_t GetByteSize(const inference::DataType& dtype, const DimsList& dims);
-
-/// Get the size, in bytes, of a tensor based on datatype and
-/// shape.
-/// \param dtype The data-type.
-/// \param dims The shape.
-/// \return The size, in bytes, of the corresponding tensor, or -1 if
-/// unable to determine the size.
-int64_t GetByteSize(
-    const inference::DataType& dtype, const std::vector<int64_t>& dims);
-
-/// Get the size, in bytes, of a tensor based on batch-size, datatype
-/// and shape. A tensor that has empty shape [] and non-zero
-/// batch-size is sized as a tensor with shape [ batch-size ].
-/// \param batch_size The batch-size. May be 0 to indicate no
-/// batching.
-/// \param dtype The data-type.
-/// \param dims The shape.
-/// \return The size, in bytes, of the corresponding tensor, or -1 if
-/// unable to determine the size.
-int64_t GetByteSize(
-    const int batch_size, const inference::DataType& dtype,
-    const DimsList& dims);
-
-/// Get the size, in bytes, of a tensor based on batch-size, datatype
-/// and shape. A tensor that has empty shape [] and non-zero
-/// batch-size is sized as a tensor with shape [ batch-size ].
-/// \param batch_size The batch-size. May be 0 to indicate no
-/// batching.
-/// \param dtype The data-type.
-/// \param dims The shape.
-/// \return The size, in bytes, of the corresponding tensor, or -1 if
-/// unable to determine the size.
-int64_t GetByteSize(
-    const int batch_size, const inference::DataType& dtype,
-    const std::vector<int64_t>& dims);
-
-/// Get the size, in bytes, of a tensor based on ModelInput.
-/// \param mio The ModelInput protobuf.
-/// \return The size, in bytes, of the corresponding tensor, or -1 if
-/// unable to determine the size.
-int64_t GetByteSize(const inference::ModelInput& mio);
-
-/// Get the size, in bytes, of a tensor based on ModelOutput.
-/// \param mio The ModelOutput protobuf.
-/// \return The size, in bytes, of the corresponding tensor, or -1 if
-/// unable to determine the size.
-int64_t GetByteSize(const inference::ModelOutput& mio);
-
-/// Get the CPU thread nice level associate with a model
-/// configuration's priority.
-/// \param config The model configuration.
-/// \return The nice level.
-int GetCpuNiceLevel(const inference::ModelConfig& config);
-
-/// Compare two model configuration shapes for equality. Wildcard
-/// dimensions (that is, dimensions with size WILDCARD_DIM) are
-/// compared literally so that to be equal the two shapes must both
-/// specify WILDCARD_DIM in the same dimensions.
-/// \params dims0 The first shape.
-/// \params dims1 The second shape.
-/// \return True if the shapes are equal, false if not equal.
-bool CompareDims(const DimsList& dims0, const DimsList& dims1);
-
-/// Compare two model configuration shapes for equality. Wildcard
-/// dimensions (that is, dimensions with size WILDCARD_DIM) are
-/// compared literally so that to be equal the two shapes must both
-/// specify WILDCARD_DIM in the same dimensions.
-/// \params dims0 The first shape.
-/// \params dims1 The second shape.
-/// \return True if the shapes are equal, false if not equal.
-bool CompareDims(
-    const std::vector<int64_t>& dims0, const std::vector<int64_t>& dims1);
-
-/// Compare two model configuration shapes for equality. Wildcard
-/// dimensions (that is, dimensions with size WILDCARD_DIM) are
-/// allowed to match with any value. So, a dimension in one shape
-/// specified as WILDCARD_DIM will always match the same dimension in
-/// the other shape.
-/// \params dims0 The first shape.
-/// \params dims1 The second shape.
-/// \return True if the shapes are equal, false if not equal.
-bool CompareDimsWithWildcard(const DimsList& dims0, const DimsList& dims1);
-
-/// Compare two model configuration shapes for equality. Wildcard
-/// dimensions (that is, dimensions with size WILDCARD_DIM) are
-/// allowed to match with any value. So, a dimension in one shape
-/// specified as WILDCARD_DIM will always match the same dimension in
-/// the other shape.
-/// \params dims0 The first shape.
-/// \params dims1 The second shape.
-/// \return True if the shapes are equal, false if not equal.
-bool CompareDimsWithWildcard(
-    const DimsList& dims0, const std::vector<int64_t>& dims1);
-
-/// Convert a DimsList to string representation.
-/// \param dims The DimsList to be converted.
-/// \return String representation of the DimsList in pattern
-/// "[d0,d1,...,dn]"
-std::string DimsListToString(const DimsList& dims);
-
-/// Convert a vector representing a shape to string representation.
-/// \param dims The vector of dimensions to be converted.
-/// \return String representation of the vector in pattern
-/// "[d0,d1,...,dn]"
-std::string DimsListToString(
-    const std::vector<int64_t>& dims, const int start_idx = 0);
-
-/// Get the server protocol string representation of a datatype.
-/// \param dtype The data type.
-/// \return The string representation.
-const char* DataTypeToProtocolString(const inference::DataType dtype);
-
-/// Get the datatype corresponding to a server protocol string
-/// representation of a datatype.
-/// \param dtype string representation.
-/// \return The data type.
-inference::DataType ProtocolStringToDataType(const std::string& dtype);
-
-/// Get the datatype corresponding to a server protocol string
-/// representation of a datatype.
-/// \param dtype Pointer to string.
-/// \param len Length of the string.
-/// \return The data type.
-inference::DataType ProtocolStringToDataType(const char* dtype, size_t len);
-
-}}  // namespace triton::common
diff --git a/3rdparty/common-r22.12/include/triton/common/nvtx.h b/3rdparty/common-r22.12/include/triton/common/nvtx.h
deleted file mode 100644
index 450736cc5ffb11cbed448f51f6f607caceb325ce..0000000000000000000000000000000000000000
--- a/3rdparty/common-r22.12/include/triton/common/nvtx.h
+++ /dev/null
@@ -1,59 +0,0 @@
-// Copyright 2020-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#ifdef TRITON_ENABLE_NVTX
-
-#include <nvtx3/nvToolsExt.h>
-
-namespace triton { namespace common {
-
-// Updates a server stat with duration measured by a C++ scope.
-class NvtxRange {
- public:
-  explicit NvtxRange(const char* label) { nvtxRangePushA(label); }
-
-  explicit NvtxRange(const std::string& label) : NvtxRange(label.c_str()) {}
-
-  ~NvtxRange() { nvtxRangePop(); }
-};
-
-}}  // namespace triton::common
-
-#endif  // TRITON_ENABLE_NVTX
-
-//
-// Macros to access NVTX functionality
-//
-#ifdef TRITON_ENABLE_NVTX
-#define NVTX_INITIALIZE nvtxInitialize(nullptr)
-#define NVTX_RANGE(V, L) triton::common::NvtxRange V(L)
-#define NVTX_MARKER(L) nvtxMarkA(L)
-#else
-#define NVTX_INITIALIZE
-#define NVTX_RANGE(V, L)
-#define NVTX_MARKER(L)
-#endif  // TRITON_ENABLE_NVTX
diff --git a/3rdparty/common-r22.12/include/triton/common/sync_queue.h b/3rdparty/common-r22.12/include/triton/common/sync_queue.h
deleted file mode 100644
index 2ab7a40cf525f27c6f093c4741defaec81013fcd..0000000000000000000000000000000000000000
--- a/3rdparty/common-r22.12/include/triton/common/sync_queue.h
+++ /dev/null
@@ -1,83 +0,0 @@
-// Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include <condition_variable>
-#include <deque>
-#include <mutex>
-
-namespace triton { namespace common {
-
-//
-// C++11 doesn't have a sync queue so we implement a simple one.
-//
-template <typename Item>
-class SyncQueue {
- public:
-  SyncQueue() {}
-
-  bool Empty()
-  {
-    std::lock_guard<std::mutex> lk(mu_);
-    return queue_.empty();
-  }
-
-  Item Get()
-  {
-    std::unique_lock<std::mutex> lk(mu_);
-    if (queue_.empty()) {
-      cv_.wait(lk, [this] { return !queue_.empty(); });
-    }
-    auto res = std::move(queue_.front());
-    queue_.pop_front();
-    return res;
-  }
-
-  void Put(const Item& value)
-  {
-    {
-      std::lock_guard<std::mutex> lk(mu_);
-      queue_.push_back(value);
-    }
-    cv_.notify_all();
-  }
-
-  void Put(Item&& value)
-  {
-    {
-      std::lock_guard<std::mutex> lk(mu_);
-      queue_.push_back(std::move(value));
-    }
-    cv_.notify_all();
-  }
-
- private:
-  std::mutex mu_;
-  std::condition_variable cv_;
-  std::deque<Item> queue_;
-};
-
-}}  // namespace triton::common
diff --git a/3rdparty/common-r22.12/include/triton/common/table_printer.h b/3rdparty/common-r22.12/include/triton/common/table_printer.h
deleted file mode 100644
index 230e6c3043c1d74598805ab17891229d37bb54e0..0000000000000000000000000000000000000000
--- a/3rdparty/common-r22.12/include/triton/common/table_printer.h
+++ /dev/null
@@ -1,79 +0,0 @@
-// Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include <memory>
-#include <sstream>
-#include <string>
-#include <vector>
-
-namespace triton { namespace common {
-
-//
-// An ASCII table printer.
-//
-class TablePrinter {
- public:
-  // Insert a row at the end of the table
-  void InsertRow(const std::vector<std::string>& row);
-
-  // Print the table
-  std::string PrintTable();
-
-  // TablePrinter will take the ownership of `headers`.
-  TablePrinter(const std::vector<std::string>& headers);
-
- private:
-  // Update the `shares_` such that all the excess
-  // amount of space not used a column is fairly allocated
-  // to the other columns
-  void FairShare();
-
-  // Append a row to `table`. This function handles the cases where a wrapping
-  // occurs.
-  void AddRow(std::stringstream& table, size_t row_index);
-
-  // Add a row divider
-  void AddRowDivider(std::stringstream& table);
-
-  // Max row width
-  std::vector<size_t> max_widths_;
-
-  // Max row height
-  std::vector<size_t> max_heights_;
-
-  // A vector of vectors of vectors containing data items for every column
-  // The record is stored in a vector of string, where each of the vector items
-  // contains a single line from the record. For example, ["Item 1", "Item 2",
-  // "Item 3\n Item 3 line 2"] will be stored as [["Item 1"], ["Item 2"], ["Item
-  // 3", "Item 3 line 2"]]
-  std::vector<std::vector<std::vector<std::string>>> data_;
-
-  // Fair share of every column
-  std::vector<float> shares_;
-};
-
-}}  // namespace triton::common
diff --git a/3rdparty/common-r22.12/include/triton/common/thread_pool.h b/3rdparty/common-r22.12/include/triton/common/thread_pool.h
deleted file mode 100644
index 787d4c1199e66d62d35f1af5e60497d1224035f0..0000000000000000000000000000000000000000
--- a/3rdparty/common-r22.12/include/triton/common/thread_pool.h
+++ /dev/null
@@ -1,61 +0,0 @@
-// Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include <condition_variable>
-#include <functional>
-#include <queue>
-#include <thread>
-
-namespace triton { namespace common {
-
-// Generic fixed-size Thread Pool to execute tasks asynchronously
-
-class ThreadPool {
- public:
-  explicit ThreadPool(std::size_t thread_count);
-  ~ThreadPool();
-  ThreadPool(const ThreadPool&) = delete;
-  ThreadPool& operator=(const ThreadPool&) = delete;
-
-  using Task = std::function<void(void)>;
-  // Assigns "task" to the task queue for a worker thread to execute when
-  // available. This will not track the return value of the task.
-  void Enqueue(Task&& task);
-  // Returns the number of threads in thread pool
-  size_t Size() { return workers_.size(); }
-
- private:
-  std::queue<Task> task_queue_;
-  std::mutex queue_mtx_;
-  std::condition_variable cv_;
-  std::vector<std::thread> workers_;
-  // If true, tells pool to stop accepting work and tells awake worker threads
-  // to exit when no tasks are left on the queue.
-  bool stop_ = false;
-};
-
-}}  // namespace triton::common
diff --git a/3rdparty/common-r22.12/include/triton/common/triton_json.h b/3rdparty/common-r22.12/include/triton/common/triton_json.h
deleted file mode 100644
index 68d1cc39aee457726358990f736501701d43d2fc..0000000000000000000000000000000000000000
--- a/3rdparty/common-r22.12/include/triton/common/triton_json.h
+++ /dev/null
@@ -1,1119 +0,0 @@
-// Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#ifdef _WIN32
-// Remove GetObject definition from windows.h, which prevents calls to
-// RapidJSON's GetObject.
-// https://github.com/Tencent/rapidjson/issues/1448
-#undef GetObject
-#include <rapidjson/document.h>
-#else
-// Disable class-memaccess warning to facilitate compilation with gcc>7
-// https://github.com/Tencent/rapidjson/issues/1700
-#pragma GCC diagnostic push
-#if defined(__GNUC__) && __GNUC__ >= 8
-#pragma GCC diagnostic ignored "-Wclass-memaccess"
-#endif
-#include <rapidjson/document.h>
-#pragma GCC diagnostic pop
-#endif  // _WIN32
-
-#include <rapidjson/allocators.h>  // CrtAllocator (default) for Writer instantiation
-#include <rapidjson/encodings.h>  // UTF8 (default) for Writer instantiation
-#include <rapidjson/error/en.h>
-#include <rapidjson/prettywriter.h>
-#include <rapidjson/rapidjson.h>
-#include <rapidjson/stringbuffer.h>
-#include <rapidjson/writer.h>
-#include <string>
-#include <vector>
-
-// This header can be used both within Triton server and externally
-// (i.e. in source that interacts only via TRITONSERVER or
-// TRITONBACKEND API). Status is handled differently in these cases so
-// the following macros must be defined before including this
-// header. As an example the defines are shown here as returned by the
-// TRITONSERVER API.
-//
-//   #define TRITONJSON_STATUSTYPE TRITONSERVER_Error*
-//   #define TRITONJSON_STATUSRETURN(M)
-//        return TRITONSERVER_ErrorNew(TRITONSERVER_ERROR_INTERNAL, (M).c_str())
-//   #define TRITONJSON_STATUSSUCCESS nullptr
-
-namespace triton { namespace common {
-
-//
-// A JSON parser/writer. Currently based on rapidjson but the intent
-// is to provide an abstraction for JSON functions that make it easy
-// to substitute a different JSON parser. Specifically for rapidjson
-// the class is also designed to provide safe access and error
-// reporting to avoid the cases where rapidjson would just abort the
-// entire application (!).
-//
-class TritonJson {
- public:
-  class Value;
-  enum class ValueType {
-    OBJECT = rapidjson::kObjectType,
-    ARRAY = rapidjson::kArrayType,
-  };
-
-  //
-  // Buffer used when writing JSON representation.
-  //
-  class WriteBuffer {
-   public:
-    // Get buffer base address.
-    const char* Base() const { return buffer_.c_str(); }
-
-    // Get a reference to the buffer itself. Useful to efficiently
-    // move the contents out of the buffer.
-    std::string& MutableContents() { return buffer_; }
-
-    // Immutable contents.
-    const std::string& Contents() const { return buffer_; }
-
-    // Interface required by rapidjson::Writer
-    typedef char Ch;
-    void Put(char c) { buffer_.push_back(c); }
-    void Clear() { buffer_.clear(); }
-    void Flush() { return; }
-    size_t Size() const { return buffer_.size(); }
-
-   private:
-    std::string buffer_;
-  };
-
-  //
-  // Value representing the entire document or an element within a
-  // document.
-  //
-  class Value {
-   public:
-    // Empty value. Will become a top-level Document value if
-    // initialized by parsing or a non-top-level value if initialized
-    // any other way.
-    explicit Value() : value_(nullptr), allocator_(nullptr) {}
-
-    // Construct a top-level JSON document.
-    explicit Value(const ValueType type)
-        : document_(static_cast<rapidjson::Type>(type)), value_(nullptr),
-          allocator_(&document_.GetAllocator())
-    {
-    }
-
-    // Construct a non-top-level JSON value in a 'document'.
-    explicit Value(TritonJson::Value& document, const ValueType type)
-    {
-      allocator_ = &document.document_.GetAllocator();
-      value_ = new (allocator_->Malloc(sizeof(rapidjson::Value)))
-          rapidjson::Value(static_cast<rapidjson::Type>(type));
-    }
-
-    // Move constructor.
-    explicit Value(Value&& other) { *this = std::move(other); }
-
-    // Move assignment operator.
-    Value& operator=(Value&& other)
-    {
-      document_ = std::move(other.document_);
-      value_ = other.value_;
-      allocator_ = other.allocator_;
-      other.value_ = nullptr;
-      other.allocator_ = nullptr;
-      return *this;
-    }
-
-    // Parse JSON into document. Can only be called on top-level
-    // document value, otherwise error is returned.
-    TRITONJSON_STATUSTYPE Parse(const char* base, const size_t size)
-    {
-      if (value_ != nullptr) {
-        TRITONJSON_STATUSRETURN(
-            std::string("JSON parsing only available for top-level document"));
-      }
-      const unsigned int parseFlags = rapidjson::kParseNanAndInfFlag;
-      document_.Parse<parseFlags>(base, size);
-      if (document_.HasParseError()) {
-        TRITONJSON_STATUSRETURN(std::string(
-            "failed to parse the request JSON buffer: " +
-            std::string(GetParseError_En(document_.GetParseError())) + " at " +
-            std::to_string(document_.GetErrorOffset())));
-      }
-      allocator_ = &document_.GetAllocator();
-      return TRITONJSON_STATUSSUCCESS;
-    }
-
-    // \see Parse(const char* base, const size_t size)
-    TRITONJSON_STATUSTYPE Parse(const std::string& json)
-    {
-      return Parse(json.data(), json.size());
-    }
-
-    // Write JSON representation into a 'buffer' in a compact
-    // format. Can only be called for a top-level document value,
-    // otherwise error is returned.
-    TRITONJSON_STATUSTYPE Write(WriteBuffer* buffer) const
-    {
-      if (value_ != nullptr) {
-        TRITONJSON_STATUSRETURN(
-            std::string("JSON writing only available for top-level document"));
-      }
-      const unsigned int writeFlags = rapidjson::kWriteNanAndInfFlag;
-      // Provide default template arguments to pass writeFlags
-      rapidjson::Writer<
-          WriteBuffer, rapidjson::UTF8<>, rapidjson::UTF8<>,
-          rapidjson::CrtAllocator, writeFlags>
-          writer(*buffer);
-      if (!document_.Accept(writer)) {
-        TRITONJSON_STATUSRETURN(
-            std::string("Failed to accept document, invalid JSON."));
-      }
-      return TRITONJSON_STATUSSUCCESS;
-    }
-
-    // Write JSON representation into a 'buffer' in an easy-to-read
-    // format. Can only be called for a top-level document value,
-    // otherwise error is returned.
-    TRITONJSON_STATUSTYPE PrettyWrite(WriteBuffer* buffer) const
-    {
-      if (value_ != nullptr) {
-        TRITONJSON_STATUSRETURN(
-            std::string("JSON writing only available for top-level document"));
-      }
-
-      // Can't pass writeFlags with latest release v1.1.0 of rapidjson-dev.
-      // We would need to build rapidjson from source to capture latest fixes.
-      // See this issue:
-      // https://github.com/Tencent/rapidjson/issues/905#issuecomment-370981353
-      // PrettyWrite is only used for displaying model configs currently, so
-      // this should not be an issue.
-      rapidjson::PrettyWriter<WriteBuffer> writer(*buffer);
-      if (!document_.Accept(writer)) {
-        TRITONJSON_STATUSRETURN(
-            std::string("Failed to accept document, invalid JSON."));
-      }
-      return TRITONJSON_STATUSSUCCESS;
-    }
-
-    // Swap a value with another.
-    TRITONJSON_STATUSTYPE Swap(TritonJson::Value& other)
-    {
-      rapidjson::Value& value = AsMutableValue();
-      value.Swap(other.AsMutableValue());
-      return TRITONJSON_STATUSSUCCESS;
-    }
-
-    // FIXME Should have Set* for all types.
-
-    // Set/overwrite a signed integer in a value. This changes the
-    // type of the value to signed int.
-    TRITONJSON_STATUSTYPE SetInt(const int64_t value)
-    {
-      rapidjson::Value& v = AsMutableValue();
-      v.SetInt64(value);
-      return TRITONJSON_STATUSSUCCESS;
-    }
-
-    // Set/overwrite a string in a value. This changes the
-    // type of the value to string
-    TRITONJSON_STATUSTYPE SetString(const std::string& value)
-    {
-      rapidjson::Value& v = AsMutableValue();
-      v.SetString(value.c_str(), value.length(), *allocator_);
-      return TRITONJSON_STATUSSUCCESS;
-    }
-
-    // Set/overwrite a string member with provided name and value in this object
-    TRITONJSON_STATUSTYPE SetStringObject(
-        const char* name, const std::string& value)
-    {
-      rapidjson::Value& object = AsMutableValue();
-      if (!object.IsObject()) {
-        TRITONJSON_STATUSRETURN(
-            std::string("attempt to add/replace JSON member '") + name +
-            "' to non-object");
-      }
-      auto itr = object.FindMember(name);
-      if (itr == object.MemberEnd()) {
-        AddString(name, value);
-      } else {
-        object.RemoveMember(itr);
-        object.AddMember(
-            rapidjson::Value(rapidjson::StringRef(name)).Move(),
-            rapidjson::Value(value.c_str(), value.size(), *allocator_),
-            *allocator_);
-      }
-
-      return TRITONJSON_STATUSSUCCESS;
-    }
-
-    // Add an array or object as a new member to this value. 'value'
-    // is moved into this value and so on return 'value' should not be
-    // used. It is assumed that 'name' can be used by reference, it is
-    // the caller's responsibility to make sure the lifetime of 'name'
-    // extends at least as long as the object.
-    TRITONJSON_STATUSTYPE Add(const char* name, TritonJson::Value&& value)
-    {
-      rapidjson::Value& object = AsMutableValue();
-      if (!object.IsObject()) {
-        TRITONJSON_STATUSRETURN(
-            std::string("attempt to add JSON member '") + name +
-            "' to non-object");
-      }
-      if (value.value_ == nullptr) {
-        rapidjson::Value v2;
-        v2.CopyFrom(value.document_, *allocator_);
-        object.AddMember(
-            rapidjson::Value(rapidjson::StringRef(name)).Move(), v2.Move(),
-            *allocator_);
-      } else {
-        object.AddMember(
-            rapidjson::Value(rapidjson::StringRef(name)).Move(),
-            value.value_->Move(), *allocator_);
-      }
-      value.Release();
-      return TRITONJSON_STATUSSUCCESS;
-    }
-
-    // Add a copy of a string as a new member to this value. It is
-    // assumed that 'name' can be used by reference, it is the
-    // caller's responsibility to make sure the lifetime of 'name'
-    // extends at least as long as the object.
-    TRITONJSON_STATUSTYPE AddString(const char* name, const std::string& value)
-    {
-      rapidjson::Value& object = AsMutableValue();
-      if (!object.IsObject()) {
-        TRITONJSON_STATUSRETURN(
-            std::string("attempt to add JSON member '") + name +
-            "' to non-object");
-      }
-      object.AddMember(
-          rapidjson::Value(rapidjson::StringRef(name)).Move(),
-          rapidjson::Value(value.c_str(), value.size(), *allocator_).Move(),
-          *allocator_);
-      return TRITONJSON_STATUSSUCCESS;
-    }
-
-    // Add a copy of a explicit-length string as a new member to this
-    // value. It is assumed that 'name' can be used by reference, it
-    // is the caller's responsibility to make sure the lifetime of
-    // 'name' extends at least as long as the object.
-    TRITONJSON_STATUSTYPE AddString(
-        const char* name, const char* value, const size_t len)
-    {
-      rapidjson::Value& object = AsMutableValue();
-      if (!object.IsObject()) {
-        TRITONJSON_STATUSRETURN(
-            std::string("attempt to add JSON member '") + name +
-            "' to non-object");
-      }
-      object.AddMember(
-          rapidjson::Value(rapidjson::StringRef(name)).Move(),
-          rapidjson::Value(value, len, *allocator_).Move(), *allocator_);
-      return TRITONJSON_STATUSSUCCESS;
-    }
-
-    // Add a reference to a string as a new member to this value. It
-    // is assumed that 'name' and 'value' can be used by reference, it
-    // is the caller's responsibility to make sure the lifetime of
-    // 'name' and 'value' extend at least as long as the object.
-    TRITONJSON_STATUSTYPE AddStringRef(const char* name, const char* value)
-    {
-      rapidjson::Value& object = AsMutableValue();
-      if (!object.IsObject()) {
-        TRITONJSON_STATUSRETURN(
-            std::string("attempt to add JSON member '") + name +
-            "' to non-object");
-      }
-      object.AddMember(
-          rapidjson::Value(rapidjson::StringRef(name)).Move(),
-          rapidjson::StringRef(value), *allocator_);
-      return TRITONJSON_STATUSSUCCESS;
-    }
-
-    // Add a reference to a expicit-length string as a new member to
-    // this value. It is assumed that 'name' and 'value' can be used
-    // by reference, it is the caller's responsibility to make sure
-    // the lifetime of 'name' and 'value' extend at least as long as
-    // the object.
-    TRITONJSON_STATUSTYPE AddStringRef(
-        const char* name, const char* value, const size_t len)
-    {
-      rapidjson::Value& object = AsMutableValue();
-      if (!object.IsObject()) {
-        TRITONJSON_STATUSRETURN(
-            std::string("attempt to add JSON member '") + name +
-            "' to non-object");
-      }
-      object.AddMember(
-          rapidjson::Value(rapidjson::StringRef(name)).Move(),
-          rapidjson::StringRef(value, len), *allocator_);
-      return TRITONJSON_STATUSSUCCESS;
-    }
-
-    // Add a boolean new member to this value. It is assumed that
-    // 'name' can be used by reference, it is the caller's
-    // responsibility to make sure the lifetime of 'name' extends at
-    // least as long as the object.
-    TRITONJSON_STATUSTYPE AddBool(const char* name, const bool value)
-    {
-      rapidjson::Value& object = AsMutableValue();
-      if (!object.IsObject()) {
-        TRITONJSON_STATUSRETURN(
-            std::string("attempt to add JSON member '") + name +
-            "' to non-object");
-      }
-      object.AddMember(
-          rapidjson::Value(rapidjson::StringRef(name)).Move(),
-          rapidjson::Value(value).Move(), *allocator_);
-      return TRITONJSON_STATUSSUCCESS;
-    }
-
-    // Add a signed integer as a new member to this value. It is
-    // assumed that 'name' can be used by reference, it is the
-    // caller's responsibility to make sure the lifetime of 'name'
-    // extends at least as long as the object.
-    TRITONJSON_STATUSTYPE AddInt(const char* name, const int64_t value)
-    {
-      rapidjson::Value& object = AsMutableValue();
-      if (!object.IsObject()) {
-        TRITONJSON_STATUSRETURN(
-            std::string("attempt to add JSON member '") + name +
-            "' to non-object");
-      }
-      object.AddMember(
-          rapidjson::Value(rapidjson::StringRef(name)).Move(),
-          rapidjson::Value(value).Move(), *allocator_);
-      return TRITONJSON_STATUSSUCCESS;
-    }
-
-    // Add an unsigned integer as a new member to this value. It is
-    // assumed that 'name' can be used by reference, it is the
-    // caller's responsibility to make sure the lifetime of 'name'
-    // extends at least as long as the object.
-    TRITONJSON_STATUSTYPE AddUInt(const char* name, const uint64_t value)
-    {
-      rapidjson::Value& object = AsMutableValue();
-      if (!object.IsObject()) {
-        TRITONJSON_STATUSRETURN(
-            std::string("attempt to add JSON member '") + name +
-            "' to non-object");
-      }
-      object.AddMember(
-          rapidjson::Value(rapidjson::StringRef(name)).Move(),
-          rapidjson::Value(value).Move(), *allocator_);
-      return TRITONJSON_STATUSSUCCESS;
-    }
-
-    // Add a double as a new member to this value. It is assumed that
-    // 'name' can be used by reference, it is the caller's
-    // responsibility to make sure the lifetime of 'name' extends at
-    // least as long as the object.
-    TRITONJSON_STATUSTYPE AddDouble(const char* name, const double value)
-    {
-      rapidjson::Value& object = AsMutableValue();
-      if (!object.IsObject()) {
-        TRITONJSON_STATUSRETURN(
-            std::string("attempt to add JSON member '") + name +
-            "' to non-object");
-      }
-      object.AddMember(
-          rapidjson::Value(rapidjson::StringRef(name)).Move(),
-          rapidjson::Value(value).Move(), *allocator_);
-      return TRITONJSON_STATUSSUCCESS;
-    }
-
-    // Append an array or object to this value, which must be an
-    // array. 'value' is moved into this value and so on return
-    // 'value' should not be used.
-    TRITONJSON_STATUSTYPE Append(TritonJson::Value&& value)
-    {
-      rapidjson::Value& array = AsMutableValue();
-      if (!array.IsArray()) {
-        TRITONJSON_STATUSRETURN(
-            std::string("attempt to append JSON member to non-array"));
-      }
-      if (value.value_ == nullptr) {
-        rapidjson::Value v2;
-        v2.CopyFrom(value.document_, *allocator_);
-        array.PushBack(v2.Move(), *allocator_);
-      } else {
-        array.PushBack(value.value_->Move(), *allocator_);
-      }
-
-      value.Release();
-      return TRITONJSON_STATUSSUCCESS;
-    }
-
-    // Append a copy of a string to this value, which must be an
-    // array.
-    TRITONJSON_STATUSTYPE AppendString(const std::string& value)
-    {
-      rapidjson::Value& array = AsMutableValue();
-      if (!array.IsArray()) {
-        TRITONJSON_STATUSRETURN(
-            std::string("attempt to append JSON member to non-array"));
-      }
-      array.PushBack(
-          rapidjson::Value(value.c_str(), value.size(), *allocator_).Move(),
-          *allocator_);
-      return TRITONJSON_STATUSSUCCESS;
-    }
-
-    // Append a copy of an explicit-length string to this value, which
-    // must be an array.
-    TRITONJSON_STATUSTYPE AppendString(const char* value, const size_t len)
-    {
-      rapidjson::Value& array = AsMutableValue();
-      if (!array.IsArray()) {
-        TRITONJSON_STATUSRETURN(
-            std::string("attempt to append JSON member to non-array"));
-      }
-      array.PushBack(
-          rapidjson::Value(value, len, *allocator_).Move(), *allocator_);
-      return TRITONJSON_STATUSSUCCESS;
-    }
-
-    // Append a reference to a string to this value, which must be an
-    // array. It is assumed that 'value' can be used by reference, it
-    // is the caller's responsibility to make sure the lifetime of
-    // 'value' extends at least as long as the object.
-    TRITONJSON_STATUSTYPE AppendStringRef(const char* value)
-    {
-      rapidjson::Value& array = AsMutableValue();
-      if (!array.IsArray()) {
-        TRITONJSON_STATUSRETURN(
-            std::string("attempt to append JSON member to non-array"));
-      }
-      array.PushBack(rapidjson::StringRef(value), *allocator_);
-      return TRITONJSON_STATUSSUCCESS;
-    }
-
-    // Append a reference to a expicit-length string to this value,
-    // which must be an array. It is assumed that 'value' can be used
-    // by reference, it is the caller's responsibility to make sure
-    // the lifetime of 'value' extends at least as long as the object.
-    TRITONJSON_STATUSTYPE AppendStringRef(const char* value, const size_t len)
-    {
-      rapidjson::Value& array = AsMutableValue();
-      if (!array.IsArray()) {
-        TRITONJSON_STATUSRETURN(
-            std::string("attempt to append JSON member to non-array"));
-      }
-      array.PushBack(rapidjson::StringRef(value, len), *allocator_);
-      return TRITONJSON_STATUSSUCCESS;
-    }
-
-    // Append a boolean to this value, which must be an array.
-    TRITONJSON_STATUSTYPE AppendBool(const bool value)
-    {
-      rapidjson::Value& array = AsMutableValue();
-      if (!array.IsArray()) {
-        TRITONJSON_STATUSRETURN(
-            std::string("attempt to append JSON member to non-array"));
-      }
-
-      array.PushBack(rapidjson::Value(value).Move(), *allocator_);
-      return TRITONJSON_STATUSSUCCESS;
-    }
-
-    // Append a signed integer to this value, which must be an array.
-    TRITONJSON_STATUSTYPE AppendInt(const int64_t value)
-    {
-      rapidjson::Value& array = AsMutableValue();
-      if (!array.IsArray()) {
-        TRITONJSON_STATUSRETURN(
-            std::string("attempt to append JSON member to non-array"));
-      }
-
-      array.PushBack(rapidjson::Value(value).Move(), *allocator_);
-      return TRITONJSON_STATUSSUCCESS;
-    }
-
-    // Append an unsigned integer to this value, which must be an
-    // array.
-    TRITONJSON_STATUSTYPE AppendUInt(const uint64_t value)
-    {
-      rapidjson::Value& array = AsMutableValue();
-      if (!array.IsArray()) {
-        TRITONJSON_STATUSRETURN(
-            std::string("attempt to append JSON member to non-array"));
-      }
-
-      array.PushBack(rapidjson::Value(value).Move(), *allocator_);
-      return TRITONJSON_STATUSSUCCESS;
-    }
-
-    // Append a double to this value, which must be an array.
-    TRITONJSON_STATUSTYPE AppendDouble(const double value)
-    {
-      rapidjson::Value& array = AsMutableValue();
-      if (!array.IsArray()) {
-        TRITONJSON_STATUSRETURN(
-            std::string("attempt to append JSON member to non-array"));
-      }
-
-      array.PushBack(rapidjson::Value(value).Move(), *allocator_);
-      return TRITONJSON_STATUSSUCCESS;
-    }
-
-    // Remove member from this object
-    TRITONJSON_STATUSTYPE Remove(const char* name)
-    {
-      rapidjson::Value& object = AsMutableValue();
-      if (!object.IsObject()) {
-        TRITONJSON_STATUSRETURN(
-            std::string("attempt to remove JSON member '") + name +
-            "' to non-object");
-      }
-      auto itr = object.FindMember(name);
-      if (itr != object.MemberEnd()) {
-        object.RemoveMember(itr);
-      }  // else report success
-
-      return TRITONJSON_STATUSSUCCESS;
-    }
-
-    // Check if this value is of the specified type. Return appropriate
-    // error if not.
-    TRITONJSON_STATUSTYPE AssertType(TritonJson::ValueType type) const
-    {
-      if (static_cast<rapidjson::Type>(type) != AsValue().GetType()) {
-        TRITONJSON_STATUSRETURN(std::string("unexpected type"));
-      }
-      return TRITONJSON_STATUSSUCCESS;
-    }
-
-    // Get the size of an array. If called on non-array returns zero.
-    size_t ArraySize() const
-    {
-      const rapidjson::Value& array = AsValue();
-      if (!array.IsArray()) {
-        return 0;
-      }
-      return array.GetArray().Size();
-    }
-
-    // Return the specified index contained in this array.
-    TRITONJSON_STATUSTYPE At(
-        const size_t idx, TritonJson::Value* value = nullptr)
-    {
-      rapidjson::Value& array = AsMutableValue();
-      if (!array.IsArray() || (idx >= array.GetArray().Size())) {
-        TRITONJSON_STATUSRETURN(
-            std::string("attempt to access non-existing array index '") +
-            std::to_string(idx) + "'");
-      }
-      *value = TritonJson::Value(array[idx], allocator_);
-      return TRITONJSON_STATUSSUCCESS;
-    }
-
-    // Get the names of all members in an object.  Error if value is
-    // not an object.
-    TRITONJSON_STATUSTYPE Members(std::vector<std::string>* names) const
-    {
-      const rapidjson::Value& object = AsValue();
-      if (!object.IsObject()) {
-        TRITONJSON_STATUSRETURN(
-            std::string("attempt to get members for non-object"));
-      }
-      for (const auto& m : object.GetObject()) {
-        names->push_back(m.name.GetString());
-      }
-      return TRITONJSON_STATUSSUCCESS;
-    }
-
-    // Return true if this value is an object and the named member is
-    // contained in this object.
-    bool Find(const char* name) const
-    {
-      const rapidjson::Value& object = AsValue();
-      return object.IsObject() && object.HasMember(name);
-    }
-
-    // Return true if this value is an object and the named member is
-    // contained in this object. Return the member in 'value'.
-    bool Find(const char* name, TritonJson::Value* value)
-    {
-      rapidjson::Value& object = AsMutableValue();
-      if (object.IsObject() && object.HasMember(name)) {
-        if (value != nullptr) {
-          *value = TritonJson::Value(object[name], allocator_);
-        }
-        return true;
-      }
-
-      return false;
-    }
-
-    // Whether the object is null value. Note that false will also be retuned
-    // if the object is not a JSON value.
-    bool IsNull() const { return ((value_ != nullptr) && value_->IsNull()); }
-
-    // Return true if the object is an object and it has no members;
-    // false otherwise.
-    bool IsEmpty() const
-    {
-      const rapidjson::Value& object = AsValue();
-      if (object.IsObject() && object.MemberCount() == 0) {
-        return true;
-      }
-      return false;
-    }
-
-    // Get value as a string. The string may contain null or other
-    // special characters and so 'len' must be used to determine length.
-    // Error if value is not a string.
-    TRITONJSON_STATUSTYPE AsString(const char** value, size_t* len) const
-    {
-      if ((value_ == nullptr) || !value_->IsString()) {
-        TRITONJSON_STATUSRETURN(
-            std::string("attempt to access JSON non-string as string"));
-      }
-      *value = value_->GetString();
-      *len = value_->GetStringLength();
-      return TRITONJSON_STATUSSUCCESS;
-    }
-
-    // Get value as a string. The string may contain null or other
-    // special characters.  Error if value is not a string.
-    TRITONJSON_STATUSTYPE AsString(std::string* str) const
-    {
-      if ((value_ == nullptr) || !value_->IsString()) {
-        TRITONJSON_STATUSRETURN(
-            std::string("attempt to access JSON non-string as string"));
-      }
-      str->assign(value_->GetString(), value_->GetStringLength());
-      return TRITONJSON_STATUSSUCCESS;
-    }
-
-    // Get value as a boolean. Error if value is not a boolean.
-    TRITONJSON_STATUSTYPE AsBool(bool* value) const
-    {
-      if ((value_ == nullptr) || !value_->IsBool()) {
-        TRITONJSON_STATUSRETURN(
-            std::string("attempt to access JSON non-boolean as boolean"));
-      }
-      *value = value_->GetBool();
-      return TRITONJSON_STATUSSUCCESS;
-    }
-
-    // Get value as a signed integer. Error if value is not a signed
-    // integer.
-    TRITONJSON_STATUSTYPE AsInt(int64_t* value) const
-    {
-      if ((value_ == nullptr) || !value_->IsInt64()) {
-        TRITONJSON_STATUSRETURN(std::string(
-            "attempt to access JSON non-signed-integer as signed-integer"));
-      }
-      *value = value_->GetInt64();
-      return TRITONJSON_STATUSSUCCESS;
-    }
-
-    // Get value as an unsigned integer. Error if value is not an
-    // unsigned integer.
-    TRITONJSON_STATUSTYPE AsUInt(uint64_t* value) const
-    {
-      if ((value_ == nullptr) || !value_->IsUint64()) {
-        TRITONJSON_STATUSRETURN(std::string(
-            "attempt to access JSON non-unsigned-integer as unsigned-integer"));
-      }
-      *value = value_->GetUint64();
-      return TRITONJSON_STATUSSUCCESS;
-    }
-
-    // Get value as a double. Error if value is not a double.
-    TRITONJSON_STATUSTYPE AsDouble(double* value) const
-    {
-      if ((value_ == nullptr) || !value_->IsNumber()) {
-        TRITONJSON_STATUSRETURN(
-            std::string("attempt to access JSON non-number as double"));
-      }
-      *value = value_->GetDouble();
-      return TRITONJSON_STATUSSUCCESS;
-    }
-
-    // Get named array member contained in this object.
-    TRITONJSON_STATUSTYPE MemberAsArray(
-        const char* name, TritonJson::Value* value)
-    {
-      rapidjson::Value& object = AsMutableValue();
-      if (!object.IsObject() || !object.HasMember(name)) {
-        TRITONJSON_STATUSRETURN(
-            std::string("attempt to access non-existing object member '") +
-            name + "'");
-      }
-      auto& v = object[name];
-      if (!v.IsArray()) {
-        TRITONJSON_STATUSRETURN(
-            std::string("attempt to access JSON non-array as array"));
-      }
-      *value = TritonJson::Value(v, allocator_);
-      return TRITONJSON_STATUSSUCCESS;
-    }
-
-    // Get named object member contained in this object.
-    TRITONJSON_STATUSTYPE MemberAsObject(
-        const char* name, TritonJson::Value* value)
-    {
-      rapidjson::Value& object = AsMutableValue();
-      if (!object.IsObject() || !object.HasMember(name)) {
-        TRITONJSON_STATUSRETURN(
-            std::string("attempt to access non-existing object member '") +
-            name + "'");
-      }
-      auto& v = object[name];
-      if (!v.IsObject()) {
-        TRITONJSON_STATUSRETURN(
-            std::string("attempt to access JSON non-object as object"));
-      }
-      *value = TritonJson::Value(v, allocator_);
-      return TRITONJSON_STATUSSUCCESS;
-    }
-
-    // Get object member as a string. The string may contain null or other
-    // special characters and so 'len' must be used to determine length.
-    // Error if this is not an object or if the member is not a string.
-    TRITONJSON_STATUSTYPE MemberAsString(
-        const char* name, const char** value, size_t* len) const
-    {
-      const rapidjson::Value& object = AsValue();
-      if (!object.IsObject() || !object.HasMember(name)) {
-        TRITONJSON_STATUSRETURN(
-            std::string("attempt to access non-existing object member '") +
-            name + "'");
-      }
-      const auto& v = object[name];
-      if (!v.IsString()) {
-        TRITONJSON_STATUSRETURN(
-            std::string("attempt to access JSON non-string as string"));
-      }
-      *value = v.GetString();
-      *len = v.GetStringLength();
-      return TRITONJSON_STATUSSUCCESS;
-    }
-
-    // Get object member as a string. The string may contain null or
-    // other special characters.  Error if this is not an object or if
-    // the member is not a string.
-    TRITONJSON_STATUSTYPE MemberAsString(
-        const char* name, std::string* str) const
-    {
-      const rapidjson::Value& object = AsValue();
-      if (!object.IsObject() || !object.HasMember(name)) {
-        TRITONJSON_STATUSRETURN(
-            std::string("attempt to access non-existing object member '") +
-            name + "'");
-      }
-      const auto& v = object[name];
-      if (!v.IsString()) {
-        TRITONJSON_STATUSRETURN(
-            std::string("attempt to access JSON non-string as string"));
-      }
-      str->assign(v.GetString(), v.GetStringLength());
-      return TRITONJSON_STATUSSUCCESS;
-    }
-
-    // Get object member as a boolean.  Error if this is not an object
-    // or if the member is not a boolean.
-    TRITONJSON_STATUSTYPE MemberAsBool(const char* name, bool* value) const
-    {
-      const rapidjson::Value& object = AsValue();
-      if (!object.IsObject() || !object.HasMember(name)) {
-        TRITONJSON_STATUSRETURN(
-            std::string("attempt to access non-existing object member '") +
-            name + "'");
-      }
-      const auto& v = object[name];
-      if (!v.IsBool()) {
-        TRITONJSON_STATUSRETURN(
-            std::string("attempt to access JSON non-boolean as boolean"));
-      }
-      *value = v.GetBool();
-      return TRITONJSON_STATUSSUCCESS;
-    }
-
-    // Get object member as a signed integer.  Error if this is not an object
-    // or if the member is not a signed integer.
-    TRITONJSON_STATUSTYPE MemberAsInt(const char* name, int64_t* value) const
-    {
-      const rapidjson::Value& object = AsValue();
-      if (!object.IsObject() || !object.HasMember(name)) {
-        TRITONJSON_STATUSRETURN(
-            std::string("attempt to access non-existing object member '") +
-            name + "'");
-      }
-      const auto& v = object[name];
-      if (!v.IsInt64()) {
-        TRITONJSON_STATUSRETURN(std::string(
-            "attempt to access JSON non-signed-integer as signed-integer"));
-      }
-      *value = v.GetInt64();
-      return TRITONJSON_STATUSSUCCESS;
-    }
-
-    // Get object member as an unsigned integer.  Error if this is not an object
-    // or if the member is not an unsigned integer.
-    TRITONJSON_STATUSTYPE MemberAsUInt(const char* name, uint64_t* value) const
-    {
-      const rapidjson::Value& object = AsValue();
-      if (!object.IsObject() || !object.HasMember(name)) {
-        TRITONJSON_STATUSRETURN(
-            std::string("attempt to access non-existing object member '") +
-            name + "'");
-      }
-      const auto& v = object[name];
-      if (!v.IsUint64()) {
-        TRITONJSON_STATUSRETURN(std::string(
-            "attempt to access JSON non-unsigned-integer as unsigned-integer"));
-      }
-      *value = v.GetUint64();
-      return TRITONJSON_STATUSSUCCESS;
-    }
-
-    // Get object member as a double.  Error if this is not an object
-    // or if the member is not a double.
-    TRITONJSON_STATUSTYPE MemberAsDouble(const char* name, double* value) const
-    {
-      const rapidjson::Value& object = AsValue();
-      if (!object.IsObject() || !object.HasMember(name)) {
-        TRITONJSON_STATUSRETURN(
-            std::string("attempt to access non-existing object member '") +
-            name + "'");
-      }
-      const auto& v = object[name];
-      if (!v.IsNumber()) {
-        TRITONJSON_STATUSRETURN(
-            std::string("attempt to access JSON non-number as double"));
-      }
-      *value = v.GetDouble();
-      return TRITONJSON_STATUSSUCCESS;
-    }
-
-    // Get array element at a given index within this array.
-    TRITONJSON_STATUSTYPE IndexAsArray(
-        const size_t idx, TritonJson::Value* value)
-    {
-      rapidjson::Value& array = AsMutableValue();
-      if (!array.IsArray() || (idx >= array.GetArray().Size())) {
-        TRITONJSON_STATUSRETURN(
-            std::string("attempt to access non-existing array index '") +
-            std::to_string(idx) + "'");
-      }
-      auto& v = array[idx];
-      if (!v.IsArray()) {
-        TRITONJSON_STATUSRETURN(
-            std::string("attempt to access JSON non-array as array"));
-      }
-      *value = TritonJson::Value(v, allocator_);
-      return TRITONJSON_STATUSSUCCESS;
-    }
-
-    // Get object element at a given index within this array.
-    TRITONJSON_STATUSTYPE IndexAsObject(
-        const size_t idx, TritonJson::Value* value)
-    {
-      rapidjson::Value& array = AsMutableValue();
-      if (!array.IsArray() || (idx >= array.GetArray().Size())) {
-        TRITONJSON_STATUSRETURN(
-            std::string("attempt to access non-existing array index '") +
-            std::to_string(idx) + "'");
-      }
-      auto& v = array[idx];
-      if (!v.IsObject()) {
-        TRITONJSON_STATUSRETURN(
-            std::string("attempt to access JSON non-object as object"));
-      }
-      *value = TritonJson::Value(v, allocator_);
-      return TRITONJSON_STATUSSUCCESS;
-    }
-
-    // Get array index as a string. The string may contain null or
-    // other special characters and so 'len' must be used to determine
-    // length.  Error if this is not an array or if the index element
-    // is not a string.
-    TRITONJSON_STATUSTYPE IndexAsString(
-        const size_t idx, const char** value, size_t* len) const
-    {
-      const rapidjson::Value& array = AsValue();
-      if (!array.IsArray() || (idx >= array.GetArray().Size())) {
-        TRITONJSON_STATUSRETURN(
-            std::string("attempt to access non-existing array index '") +
-            std::to_string(idx) + "'");
-      }
-      const auto& v = array[idx];
-      if (!v.IsString()) {
-        TRITONJSON_STATUSRETURN(
-            std::string("attempt to access JSON non-string as string"));
-      }
-      *value = v.GetString();
-      *len = v.GetStringLength();
-      return TRITONJSON_STATUSSUCCESS;
-    }
-
-    // Get array index as a string. The string may contain null or
-    // other special characters.  Error if this is not an array or if
-    // the index element is not a string.
-    TRITONJSON_STATUSTYPE IndexAsString(
-        const size_t idx, std::string* str) const
-    {
-      const rapidjson::Value& array = AsValue();
-      if (!array.IsArray() || (idx >= array.GetArray().Size())) {
-        TRITONJSON_STATUSRETURN(
-            std::string("attempt to access non-existing array index '") +
-            std::to_string(idx) + "'");
-      }
-      const auto& v = array[idx];
-      if (!v.IsString()) {
-        TRITONJSON_STATUSRETURN(
-            std::string("attempt to access JSON non-string as string"));
-      }
-      str->assign(v.GetString(), v.GetStringLength());
-      return TRITONJSON_STATUSSUCCESS;
-    }
-
-    // Get array index as a boolean.  Error if this is not an array or
-    // if the index element is not a boolean.
-    TRITONJSON_STATUSTYPE IndexAsBool(const size_t idx, bool* value) const
-    {
-      const rapidjson::Value& array = AsValue();
-      if (!array.IsArray() || (idx >= array.GetArray().Size())) {
-        TRITONJSON_STATUSRETURN(
-            std::string("attempt to access non-existing array index '") +
-            std::to_string(idx) + "'");
-      }
-      const auto& v = array[idx];
-      if (!v.IsBool()) {
-        TRITONJSON_STATUSRETURN(
-            std::string("attempt to access JSON non-boolean as boolean"));
-      }
-      *value = v.GetBool();
-      return TRITONJSON_STATUSSUCCESS;
-    }
-
-    // Get array index as a signed integer.  Error if this is not an array or
-    // if the index element is not a signed integer.
-    TRITONJSON_STATUSTYPE IndexAsInt(const size_t idx, int64_t* value) const
-    {
-      const rapidjson::Value& array = AsValue();
-      if (!array.IsArray() || (idx >= array.GetArray().Size())) {
-        TRITONJSON_STATUSRETURN(
-            std::string("attempt to access non-existing array index '") +
-            std::to_string(idx) + "'");
-      }
-      const auto& v = array[idx];
-      if (!v.IsInt64()) {
-        TRITONJSON_STATUSRETURN(std::string(
-            "attempt to access JSON non-signed-integer as signed-integer"));
-      }
-      *value = v.GetInt64();
-      return TRITONJSON_STATUSSUCCESS;
-    }
-
-    // Get array index as an unsigned integer.  Error if this is not an array or
-    // if the index element is not an unsigned integer.
-    TRITONJSON_STATUSTYPE IndexAsUInt(const size_t idx, uint64_t* value) const
-    {
-      const rapidjson::Value& array = AsValue();
-      if (!array.IsArray() || (idx >= array.GetArray().Size())) {
-        TRITONJSON_STATUSRETURN(
-            std::string("attempt to access non-existing array index '") +
-            std::to_string(idx) + "'");
-      }
-      const auto& v = array[idx];
-      if (!v.IsUint64()) {
-        TRITONJSON_STATUSRETURN(std::string(
-            "attempt to access JSON non-unsigned-integer as unsigned-integer"));
-      }
-      *value = v.GetUint64();
-      return TRITONJSON_STATUSSUCCESS;
-    }
-
-    // Get array index as a double.  Error if this is not an array or
-    // if the index element is not a double.
-    TRITONJSON_STATUSTYPE IndexAsDouble(const size_t idx, double* value) const
-    {
-      const rapidjson::Value& array = AsValue();
-      if (!array.IsArray() || (idx >= array.GetArray().Size())) {
-        TRITONJSON_STATUSRETURN(
-            std::string("attempt to access non-existing array index '") +
-            std::to_string(idx) + "'");
-      }
-      const auto& v = array[idx];
-      if (!v.IsNumber()) {
-        TRITONJSON_STATUSRETURN(
-            std::string("attempt to access JSON non-number as double"));
-      }
-      *value = v.GetDouble();
-      return TRITONJSON_STATUSSUCCESS;
-    }
-
-    // Release/clear a value.
-    void Release()
-    {
-      if (value_ != nullptr) {
-        allocator_->Free(value_);
-      }
-    }
-
-   private:
-    // Construct a non-top-level JSON value that references an
-    // existing element in a document.
-    explicit Value(
-        rapidjson::Value& v, rapidjson::Document::AllocatorType* allocator)
-        : value_(&v), allocator_(allocator)
-    {
-    }
-
-    // Return a value object that can be used for both a top-level
-    // document as well as an element within a document.
-    const rapidjson::Value& AsValue() const
-    {
-      if (value_ == nullptr) {
-        return document_;
-      }
-      return *value_;
-    }
-
-    rapidjson::Value& AsMutableValue()
-    {
-      if (value_ == nullptr) {
-        return document_;
-      }
-      return *value_;
-    }
-
-    // If this object a document or value. Based on this only one or
-    // document_ or value_ is valid.
-    rapidjson::Document document_;
-    rapidjson::Value* value_;
-    rapidjson::Document::AllocatorType* allocator_;
-  };
-};
-
-}}  // namespace triton::common
diff --git a/3rdparty/common-r22.12/protobuf/grpc_service.proto b/3rdparty/common-r22.12/protobuf/grpc_service.proto
deleted file mode 100644
index b86ba13d4aad9c40a7c58aa530091cd48256607d..0000000000000000000000000000000000000000
--- a/3rdparty/common-r22.12/protobuf/grpc_service.proto
+++ /dev/null
@@ -1,1699 +0,0 @@
-// Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-syntax = "proto3";
-
-package inference;
-
-//@@.. cpp:namespace:: inference
-
-import "model_config.proto";
-
-//@@
-//@@.. cpp:var:: service InferenceService
-//@@
-//@@   Inference Server GRPC endpoints.
-//@@
-service GRPCInferenceService
-{
-  //@@  .. cpp:var:: rpc ServerLive(ServerLiveRequest) returns
-  //@@       (ServerLiveResponse)
-  //@@
-  //@@     Check liveness of the inference server.
-  //@@
-  rpc ServerLive(ServerLiveRequest) returns (ServerLiveResponse) {}
-
-  //@@  .. cpp:var:: rpc ServerReady(ServerReadyRequest) returns
-  //@@       (ServerReadyResponse)
-  //@@
-  //@@     Check readiness of the inference server.
-  //@@
-  rpc ServerReady(ServerReadyRequest) returns (ServerReadyResponse) {}
-
-  //@@  .. cpp:var:: rpc ModelReady(ModelReadyRequest) returns
-  //@@       (ModelReadyResponse)
-  //@@
-  //@@     Check readiness of a model in the inference server.
-  //@@
-  rpc ModelReady(ModelReadyRequest) returns (ModelReadyResponse) {}
-
-  //@@  .. cpp:var:: rpc ServerMetadata(ServerMetadataRequest) returns
-  //@@       (ServerMetadataResponse)
-  //@@
-  //@@     Get server metadata.
-  //@@
-  rpc ServerMetadata(ServerMetadataRequest) returns (ServerMetadataResponse) {}
-
-  //@@  .. cpp:var:: rpc ModelMetadata(ModelMetadataRequest) returns
-  //@@       (ModelMetadataResponse)
-  //@@
-  //@@     Get model metadata.
-  //@@
-  rpc ModelMetadata(ModelMetadataRequest) returns (ModelMetadataResponse) {}
-
-  //@@  .. cpp:var:: rpc ModelInfer(ModelInferRequest) returns
-  //@@       (ModelInferResponse)
-  //@@
-  //@@     Perform inference using a specific model.
-  //@@
-  rpc ModelInfer(ModelInferRequest) returns (ModelInferResponse) {}
-
-  //@@  .. cpp:var:: rpc ModelStreamInfer(stream ModelInferRequest) returns
-  //@@       (stream ModelStreamInferResponse)
-  //@@
-  //@@     Perform streaming inference.
-  //@@
-  rpc ModelStreamInfer(stream ModelInferRequest)
-      returns (stream ModelStreamInferResponse)
-  {
-  }
-
-  //@@  .. cpp:var:: rpc ModelConfig(ModelConfigRequest) returns
-  //@@       (ModelConfigResponse)
-  //@@
-  //@@     Get model configuration.
-  //@@
-  rpc ModelConfig(ModelConfigRequest) returns (ModelConfigResponse) {}
-
-  //@@  .. cpp:var:: rpc ModelStatistics(
-  //@@                     ModelStatisticsRequest)
-  //@@                   returns (ModelStatisticsResponse)
-  //@@
-  //@@     Get the cumulative inference statistics for a model.
-  //@@
-  rpc ModelStatistics(ModelStatisticsRequest) returns (ModelStatisticsResponse)
-  {
-  }
-
-  //@@  .. cpp:var:: rpc RepositoryIndex(RepositoryIndexRequest) returns
-  //@@       (RepositoryIndexResponse)
-  //@@
-  //@@     Get the index of model repository contents.
-  //@@
-  rpc RepositoryIndex(RepositoryIndexRequest) returns (RepositoryIndexResponse)
-  {
-  }
-
-  //@@  .. cpp:var:: rpc RepositoryModelLoad(RepositoryModelLoadRequest) returns
-  //@@       (RepositoryModelLoadResponse)
-  //@@
-  //@@     Load or reload a model from a repository.
-  //@@
-  rpc RepositoryModelLoad(RepositoryModelLoadRequest)
-      returns (RepositoryModelLoadResponse)
-  {
-  }
-
-  //@@  .. cpp:var:: rpc RepositoryModelUnload(RepositoryModelUnloadRequest)
-  //@@       returns (RepositoryModelUnloadResponse)
-  //@@
-  //@@     Unload a model.
-  //@@
-  rpc RepositoryModelUnload(RepositoryModelUnloadRequest)
-      returns (RepositoryModelUnloadResponse)
-  {
-  }
-
-  //@@  .. cpp:var:: rpc SystemSharedMemoryStatus(
-  //@@                     SystemSharedMemoryStatusRequest)
-  //@@                   returns (SystemSharedMemoryStatusRespose)
-  //@@
-  //@@     Get the status of all registered system-shared-memory regions.
-  //@@
-  rpc SystemSharedMemoryStatus(SystemSharedMemoryStatusRequest)
-      returns (SystemSharedMemoryStatusResponse)
-  {
-  }
-
-  //@@  .. cpp:var:: rpc SystemSharedMemoryRegister(
-  //@@                     SystemSharedMemoryRegisterRequest)
-  //@@                   returns (SystemSharedMemoryRegisterResponse)
-  //@@
-  //@@     Register a system-shared-memory region.
-  //@@
-  rpc SystemSharedMemoryRegister(SystemSharedMemoryRegisterRequest)
-      returns (SystemSharedMemoryRegisterResponse)
-  {
-  }
-
-  //@@  .. cpp:var:: rpc SystemSharedMemoryUnregister(
-  //@@                     SystemSharedMemoryUnregisterRequest)
-  //@@                   returns (SystemSharedMemoryUnregisterResponse)
-  //@@
-  //@@     Unregister a system-shared-memory region.
-  //@@
-  rpc SystemSharedMemoryUnregister(SystemSharedMemoryUnregisterRequest)
-      returns (SystemSharedMemoryUnregisterResponse)
-  {
-  }
-
-  //@@  .. cpp:var:: rpc CudaSharedMemoryStatus(
-  //@@                     CudaSharedMemoryStatusRequest)
-  //@@                   returns (CudaSharedMemoryStatusRespose)
-  //@@
-  //@@     Get the status of all registered CUDA-shared-memory regions.
-  //@@
-  rpc CudaSharedMemoryStatus(CudaSharedMemoryStatusRequest)
-      returns (CudaSharedMemoryStatusResponse)
-  {
-  }
-
-  //@@  .. cpp:var:: rpc CudaSharedMemoryRegister(
-  //@@                     CudaSharedMemoryRegisterRequest)
-  //@@                   returns (CudaSharedMemoryRegisterResponse)
-  //@@
-  //@@     Register a CUDA-shared-memory region.
-  //@@
-  rpc CudaSharedMemoryRegister(CudaSharedMemoryRegisterRequest)
-      returns (CudaSharedMemoryRegisterResponse)
-  {
-  }
-
-  //@@  .. cpp:var:: rpc CudaSharedMemoryUnregister(
-  //@@                     CudaSharedMemoryUnregisterRequest)
-  //@@                   returns (CudaSharedMemoryUnregisterResponse)
-  //@@
-  //@@     Unregister a CUDA-shared-memory region.
-  //@@
-  rpc CudaSharedMemoryUnregister(CudaSharedMemoryUnregisterRequest)
-      returns (CudaSharedMemoryUnregisterResponse)
-  {
-  }
-
-  //@@  .. cpp:var:: rpc TraceSetting(TraceSettingRequest)
-  //@@                   returns (TraceSettingResponse)
-  //@@
-  //@@     Update and get the trace setting of the Triton server.
-  //@@
-  rpc TraceSetting(TraceSettingRequest) returns (TraceSettingResponse)
-  {
-  }
-
-  //@@  .. cpp:var:: rpc LogSettings(LogSettingsRequest)
-  //@@                   returns (LogSettingsResponse)
-  //@@
-  //@@     Update and get the log settings of the Triton server.
-  //@@
-  rpc LogSettings(LogSettingsRequest) returns (LogSettingsResponse)
-  {
-  }
-}
-
-//@@
-//@@.. cpp:var:: message ServerLiveRequest
-//@@
-//@@   Request message for ServerLive.
-//@@
-message ServerLiveRequest {}
-
-//@@
-//@@.. cpp:var:: message ServerLiveResponse
-//@@
-//@@   Response message for ServerLive.
-//@@
-message ServerLiveResponse
-{
-  //@@
-  //@@  .. cpp:var:: bool live
-  //@@
-  //@@     True if the inference server is live, false it not live.
-  //@@
-  bool live = 1;
-}
-
-//@@
-//@@.. cpp:var:: message ServerReadyRequest
-//@@
-//@@   Request message for ServerReady.
-//@@
-message ServerReadyRequest {}
-
-//@@
-//@@.. cpp:var:: message ServerReadyResponse
-//@@
-//@@   Response message for ServerReady.
-//@@
-message ServerReadyResponse
-{
-  //@@
-  //@@  .. cpp:var:: bool ready
-  //@@
-  //@@     True if the inference server is ready, false it not ready.
-  //@@
-  bool ready = 1;
-}
-
-//@@
-//@@.. cpp:var:: message ModelReadyRequest
-//@@
-//@@   Request message for ModelReady.
-//@@
-message ModelReadyRequest
-{
-  //@@
-  //@@  .. cpp:var:: string name
-  //@@
-  //@@     The name of the model to check for readiness.
-  //@@
-  string name = 1;
-
-  //@@  .. cpp:var:: string version
-  //@@
-  //@@     The version of the model to check for readiness. If not given the
-  //@@     server will choose a version based on the model and internal policy.
-  //@@
-  string version = 2;
-}
-
-//@@
-//@@.. cpp:var:: message ModelReadyResponse
-//@@
-//@@   Response message for ModelReady.
-//@@
-message ModelReadyResponse
-{
-  //@@
-  //@@  .. cpp:var:: bool ready
-  //@@
-  //@@     True if the model is ready, false it not ready.
-  //@@
-  bool ready = 1;
-}
-
-//@@
-//@@.. cpp:var:: message ServerMetadataRequest
-//@@
-//@@   Request message for ServerMetadata.
-//@@
-message ServerMetadataRequest {}
-
-//@@
-//@@.. cpp:var:: message ServerMetadataResponse
-//@@
-//@@   Response message for ServerMetadata.
-//@@
-message ServerMetadataResponse
-{
-  //@@
-  //@@  .. cpp:var:: string name
-  //@@
-  //@@     The server name.
-  //@@
-  string name = 1;
-
-  //@@
-  //@@  .. cpp:var:: string version
-  //@@
-  //@@     The server version.
-  //@@
-  string version = 2;
-
-  //@@
-  //@@  .. cpp:var:: string extensions (repeated)
-  //@@
-  //@@     The extensions supported by the server.
-  //@@
-  repeated string extensions = 3;
-}
-
-//@@
-//@@.. cpp:var:: message ModelMetadataRequest
-//@@
-//@@   Request message for ModelMetadata.
-//@@
-message ModelMetadataRequest
-{
-  //@@
-  //@@  .. cpp:var:: string name
-  //@@
-  //@@     The name of the model.
-  //@@
-  string name = 1;
-
-  //@@  .. cpp:var:: string version
-  //@@
-  //@@     The version of the model to check for readiness. If not
-  //@@     given the server will choose a version based on the
-  //@@     model and internal policy.
-  //@@
-  string version = 2;
-}
-
-//@@
-//@@.. cpp:var:: message ModelMetadataResponse
-//@@
-//@@   Response message for ModelMetadata.
-//@@
-message ModelMetadataResponse
-{
-  //@@
-  //@@  .. cpp:var:: message TensorMetadata
-  //@@
-  //@@     Metadata for a tensor.
-  //@@
-  message TensorMetadata
-  {
-    //@@
-    //@@    .. cpp:var:: string name
-    //@@
-    //@@       The tensor name.
-    //@@
-    string name = 1;
-
-    //@@
-    //@@    .. cpp:var:: string datatype
-    //@@
-    //@@       The tensor data type.
-    //@@
-    string datatype = 2;
-
-    //@@
-    //@@    .. cpp:var:: int64 shape (repeated)
-    //@@
-    //@@       The tensor shape. A variable-size dimension is represented
-    //@@       by a -1 value.
-    //@@
-    repeated int64 shape = 3;
-  }
-
-  //@@
-  //@@  .. cpp:var:: string name
-  //@@
-  //@@     The model name.
-  //@@
-  string name = 1;
-
-  //@@
-  //@@  .. cpp:var:: string versions (repeated)
-  //@@
-  //@@     The versions of the model.
-  //@@
-  repeated string versions = 2;
-
-  //@@
-  //@@  .. cpp:var:: string platform
-  //@@
-  //@@     The model's platform.
-  //@@
-  string platform = 3;
-
-  //@@
-  //@@  .. cpp:var:: TensorMetadata inputs (repeated)
-  //@@
-  //@@     The model's inputs.
-  //@@
-  repeated TensorMetadata inputs = 4;
-
-  //@@
-  //@@  .. cpp:var:: TensorMetadata outputs (repeated)
-  //@@
-  //@@     The model's outputs.
-  //@@
-  repeated TensorMetadata outputs = 5;
-}
-
-//@@
-//@@.. cpp:var:: message InferParameter
-//@@
-//@@   An inference parameter value.
-//@@
-message InferParameter
-{
-  //@@  .. cpp:var:: oneof parameter_choice
-  //@@
-  //@@     The parameter value can be a string, an int64 or
-  //@@     a boolean
-  //@@
-  oneof parameter_choice
-  {
-    //@@    .. cpp:var:: bool bool_param
-    //@@
-    //@@       A boolean parameter value.
-    //@@
-    bool bool_param = 1;
-
-    //@@    .. cpp:var:: int64 int64_param
-    //@@
-    //@@       An int64 parameter value.
-    //@@
-    int64 int64_param = 2;
-
-    //@@    .. cpp:var:: string string_param
-    //@@
-    //@@       A string parameter value.
-    //@@
-    string string_param = 3;
-  }
-}
-
-//@@
-//@@.. cpp:var:: message InferTensorContents
-//@@
-//@@   The data contained in a tensor represented by the repeated type
-//@@   that matches the tensor's data type. Protobuf oneof is not used
-//@@   because oneofs cannot contain repeated fields.
-//@@
-message InferTensorContents
-{
-  //@@
-  //@@  .. cpp:var:: bool bool_contents (repeated)
-  //@@
-  //@@     Representation for BOOL data type. The size must match what is
-  //@@     expected by the tensor's shape. The contents must be the flattened,
-  //@@     one-dimensional, row-major order of the tensor elements.
-  //@@
-  repeated bool bool_contents = 1;
-
-  //@@
-  //@@  .. cpp:var:: int32 int_contents (repeated)
-  //@@
-  //@@     Representation for INT8, INT16, and INT32 data types. The size
-  //@@     must match what is expected by the tensor's shape. The contents
-  //@@     must be the flattened, one-dimensional, row-major order of the
-  //@@     tensor elements.
-  //@@
-  repeated int32 int_contents = 2;
-
-  //@@
-  //@@  .. cpp:var:: int64 int64_contents (repeated)
-  //@@
-  //@@     Representation for INT64 data types. The size must match what
-  //@@     is expected by the tensor's shape. The contents must be the
-  //@@     flattened, one-dimensional, row-major order of the tensor elements.
-  //@@
-  repeated int64 int64_contents = 3;
-
-  //@@
-  //@@  .. cpp:var:: uint32 uint_contents (repeated)
-  //@@
-  //@@     Representation for UINT8, UINT16, and UINT32 data types. The size
-  //@@     must match what is expected by the tensor's shape. The contents
-  //@@     must be the flattened, one-dimensional, row-major order of the
-  //@@     tensor elements.
-  //@@
-  repeated uint32 uint_contents = 4;
-
-  //@@
-  //@@  .. cpp:var:: uint64 uint64_contents (repeated)
-  //@@
-  //@@     Representation for UINT64 data types. The size must match what
-  //@@     is expected by the tensor's shape. The contents must be the
-  //@@     flattened, one-dimensional, row-major order of the tensor elements.
-  //@@
-  repeated uint64 uint64_contents = 5;
-
-  //@@
-  //@@  .. cpp:var:: float fp32_contents (repeated)
-  //@@
-  //@@     Representation for FP32 data type. The size must match what is
-  //@@     expected by the tensor's shape. The contents must be the flattened,
-  //@@     one-dimensional, row-major order of the tensor elements.
-  //@@
-  repeated float fp32_contents = 6;
-
-  //@@
-  //@@  .. cpp:var:: double fp64_contents (repeated)
-  //@@
-  //@@     Representation for FP64 data type. The size must match what is
-  //@@     expected by the tensor's shape. The contents must be the flattened,
-  //@@     one-dimensional, row-major order of the tensor elements.
-  //@@
-  repeated double fp64_contents = 7;
-
-  //@@
-  //@@  .. cpp:var:: bytes bytes_contents (repeated)
-  //@@
-  //@@     Representation for BYTES data type. The size must match what is
-  //@@     expected by the tensor's shape. The contents must be the flattened,
-  //@@     one-dimensional, row-major order of the tensor elements.
-  //@@
-  repeated bytes bytes_contents = 8;
-}
-
-//@@
-//@@.. cpp:var:: message ModelInferRequest
-//@@
-//@@   Request message for ModelInfer.
-//@@
-message ModelInferRequest
-{
-  //@@
-  //@@  .. cpp:var:: message InferInputTensor
-  //@@
-  //@@     An input tensor for an inference request.
-  //@@
-  message InferInputTensor
-  {
-    //@@
-    //@@    .. cpp:var:: string name
-    //@@
-    //@@       The tensor name.
-    //@@
-    string name = 1;
-
-    //@@
-    //@@    .. cpp:var:: string datatype
-    //@@
-    //@@       The tensor data type.
-    //@@
-    string datatype = 2;
-
-    //@@
-    //@@    .. cpp:var:: int64 shape (repeated)
-    //@@
-    //@@       The tensor shape.
-    //@@
-    repeated int64 shape = 3;
-
-    //@@    .. cpp:var:: map<string,InferParameter> parameters
-    //@@
-    //@@       Optional inference input tensor parameters.
-    //@@
-    map<string, InferParameter> parameters = 4;
-
-    //@@    .. cpp:var:: InferTensorContents contents
-    //@@
-    //@@       The tensor contents using a data-type format. This field
-    //@@       must not be specified if tensor contents are being specified
-    //@@       in ModelInferRequest.raw_input_contents.
-    //@@
-    InferTensorContents contents = 5;
-  }
-
-  //@@
-  //@@  .. cpp:var:: message InferRequestedOutputTensor
-  //@@
-  //@@     An output tensor requested for an inference request.
-  //@@
-  message InferRequestedOutputTensor
-  {
-    //@@
-    //@@    .. cpp:var:: string name
-    //@@
-    //@@       The tensor name.
-    //@@
-    string name = 1;
-
-    //@@    .. cpp:var:: map<string,InferParameter> parameters
-    //@@
-    //@@       Optional requested output tensor parameters.
-    //@@
-    map<string, InferParameter> parameters = 2;
-  }
-
-  //@@  .. cpp:var:: string model_name
-  //@@
-  //@@     The name of the model to use for inferencing.
-  //@@
-  string model_name = 1;
-
-  //@@  .. cpp:var:: string model_version
-  //@@
-  //@@     The version of the model to use for inference. If not
-  //@@     given the latest/most-recent version of the model is used.
-  //@@
-  string model_version = 2;
-
-  //@@  .. cpp:var:: string id
-  //@@
-  //@@     Optional identifier for the request. If specified will be
-  //@@     returned in the response.
-  //@@
-  string id = 3;
-
-  //@@  .. cpp:var:: map<string,InferParameter> parameters
-  //@@
-  //@@     Optional inference parameters.
-  //@@
-  map<string, InferParameter> parameters = 4;
-
-  //@@
-  //@@  .. cpp:var:: InferInputTensor inputs (repeated)
-  //@@
-  //@@     The input tensors for the inference.
-  //@@
-  repeated InferInputTensor inputs = 5;
-
-  //@@
-  //@@  .. cpp:var:: InferRequestedOutputTensor outputs (repeated)
-  //@@
-  //@@     The requested output tensors for the inference. Optional, if not
-  //@@     specified all outputs specified in the model config will be
-  //@@     returned.
-  //@@
-  repeated InferRequestedOutputTensor outputs = 6;
-
-  //@@
-  //@@  .. cpp:var:: bytes raw_input_contents
-  //@@
-  //@@     The data contained in an input tensor can be represented in
-  //@@     "raw" bytes form or in the repeated type that matches the
-  //@@     tensor's data type. Using the "raw" bytes form will
-  //@@     typically allow higher performance due to the way protobuf
-  //@@     allocation and reuse interacts with GRPC. For example, see
-  //@@     https://github.com/grpc/grpc/issues/23231.
-  //@@
-  //@@     To use the raw representation 'raw_input_contents' must be
-  //@@     initialized with data for each tensor in the same order as
-  //@@     'inputs'. For each tensor, the size of this content must
-  //@@     match what is expected by the tensor's shape and data
-  //@@     type. The raw data must be the flattened, one-dimensional,
-  //@@     row-major order of the tensor elements without any stride
-  //@@     or padding between the elements. Note that the FP16 and BF16 data
-  //@@     types must be represented as raw content as there is no
-  //@@     specific data type for a 16-bit float type.
-  //@@
-  //@@     If this field is specified then InferInputTensor::contents
-  //@@     must not be specified for any input tensor.
-  //@@
-  repeated bytes raw_input_contents = 7;
-}
-
-//@@
-//@@.. cpp:var:: message ModelInferResponse
-//@@
-//@@   Response message for ModelInfer.
-//@@
-message ModelInferResponse
-{
-  //@@
-  //@@  .. cpp:var:: message InferOutputTensor
-  //@@
-  //@@     An output tensor returned for an inference request.
-  //@@
-  message InferOutputTensor
-  {
-    //@@
-    //@@    .. cpp:var:: string name
-    //@@
-    //@@       The tensor name.
-    //@@
-    string name = 1;
-
-    //@@
-    //@@    .. cpp:var:: string datatype
-    //@@
-    //@@       The tensor data type.
-    //@@
-    string datatype = 2;
-
-    //@@
-    //@@    .. cpp:var:: int64 shape (repeated)
-    //@@
-    //@@       The tensor shape.
-    //@@
-    repeated int64 shape = 3;
-
-    //@@    .. cpp:var:: map<string,InferParameter> parameters
-    //@@
-    //@@       Optional output tensor parameters.
-    //@@
-    map<string, InferParameter> parameters = 4;
-
-    //@@    .. cpp:var:: InferTensorContents contents
-    //@@
-    //@@       The tensor contents using a data-type format. This field
-    //@@       must not be specified if tensor contents are being specified
-    //@@       in ModelInferResponse.raw_output_contents.
-    //@@
-    InferTensorContents contents = 5;
-  }
-
-  //@@  .. cpp:var:: string model_name
-  //@@
-  //@@     The name of the model used for inference.
-  //@@
-  string model_name = 1;
-
-  //@@  .. cpp:var:: string model_version
-  //@@
-  //@@     The version of the model used for inference.
-  //@@
-  string model_version = 2;
-
-  //@@  .. cpp:var:: string id
-  //@@
-  //@@     The id of the inference request if one was specified.
-  //@@
-  string id = 3;
-
-  //@@  .. cpp:var:: map<string,InferParameter> parameters
-  //@@
-  //@@     Optional inference response parameters.
-  //@@
-  map<string, InferParameter> parameters = 4;
-
-  //@@
-  //@@  .. cpp:var:: InferOutputTensor outputs (repeated)
-  //@@
-  //@@     The output tensors holding inference results.
-  //@@
-  repeated InferOutputTensor outputs = 5;
-
-  //@@
-  //@@  .. cpp:var:: bytes raw_output_contents
-  //@@
-  //@@     The data contained in an output tensor can be represented in
-  //@@     "raw" bytes form or in the repeated type that matches the
-  //@@     tensor's data type. Using the "raw" bytes form will
-  //@@     typically allow higher performance due to the way protobuf
-  //@@     allocation and reuse interacts with GRPC. For example, see
-  //@@     https://github.com/grpc/grpc/issues/23231.
-  //@@
-  //@@     To use the raw representation 'raw_output_contents' must be
-  //@@     initialized with data for each tensor in the same order as
-  //@@     'outputs'. For each tensor, the size of this content must
-  //@@     match what is expected by the tensor's shape and data
-  //@@     type. The raw data must be the flattened, one-dimensional,
-  //@@     row-major order of the tensor elements without any stride
-  //@@     or padding between the elements. Note that the FP16 and BF16 data
-  //@@     types must be represented as raw content as there is no
-  //@@     specific data type for a 16-bit float type.
-  //@@
-  //@@     If this field is specified then InferOutputTensor::contents
-  //@@     must not be specified for any output tensor.
-  //@@
-  repeated bytes raw_output_contents = 6;
-}
-
-//@@
-//@@.. cpp:var:: message ModelStreamInferResponse
-//@@
-//@@   Response message for ModelStreamInfer.
-//@@
-message ModelStreamInferResponse
-{
-  //@@
-  //@@  .. cpp:var:: string error_message
-  //@@
-  //@@     The message describing the error. The empty message
-  //@@     indicates the inference was successful without errors.
-  //@@
-  string error_message = 1;
-
-  //@@
-  //@@  .. cpp:var:: ModelInferResponse infer_response
-  //@@
-  //@@     Holds the results of the request.
-  //@@
-  ModelInferResponse infer_response = 2;
-}
-
-//@@
-//@@.. cpp:var:: message ModelConfigRequest
-//@@
-//@@   Request message for ModelConfig.
-//@@
-message ModelConfigRequest
-{
-  //@@
-  //@@  .. cpp:var:: string name
-  //@@
-  //@@     The name of the model.
-  //@@
-  string name = 1;
-
-  //@@  .. cpp:var:: string version
-  //@@
-  //@@     The version of the model. If not given the model version
-  //@@     is selected automatically based on the version policy.
-  //@@
-  string version = 2;
-}
-
-//@@
-//@@.. cpp:var:: message ModelConfigResponse
-//@@
-//@@   Response message for ModelConfig.
-//@@
-message ModelConfigResponse
-{
-  //@@
-  //@@  .. cpp:var:: ModelConfig config
-  //@@
-  //@@     The model configuration.
-  //@@
-  ModelConfig config = 1;
-}
-
-//@@
-//@@.. cpp:var:: message ModelStatisticsRequest
-//@@
-//@@   Request message for ModelStatistics.
-//@@
-message ModelStatisticsRequest
-{
-  //@@  .. cpp:var:: string name
-  //@@
-  //@@     The name of the model. If not given returns statistics for
-  //@@     all models.
-  //@@
-  string name = 1;
-
-  //@@  .. cpp:var:: string version
-  //@@
-  //@@     The version of the model. If not given returns statistics for
-  //@@     all model versions.
-  //@@
-  string version = 2;
-}
-
-
-//@@
-//@@.. cpp:var:: message StatisticDuration
-//@@
-//@@   Statistic recording a cumulative duration metric.
-//@@
-message StatisticDuration
-{
-  //@@  .. cpp:var:: uint64 count
-  //@@
-  //@@     Cumulative number of times this metric occurred.
-  //@@
-  uint64 count = 1;
-
-  //@@  .. cpp:var:: uint64 total_time_ns
-  //@@
-  //@@     Total collected duration of this metric in nanoseconds.
-  //@@
-  uint64 ns = 2;
-}
-
-//@@
-//@@.. cpp:var:: message InferStatistics
-//@@
-//@@   Inference statistics.
-//@@
-message InferStatistics
-{
-  //@@  .. cpp:var:: StatisticDuration success
-  //@@
-  //@@     Cumulative count and duration for successful inference
-  //@@     request. The "success" count and cumulative duration includes
-  //@@     cache hits.
-  //@@
-  StatisticDuration success = 1;
-
-  //@@  .. cpp:var:: StatisticDuration fail
-  //@@
-  //@@     Cumulative count and duration for failed inference
-  //@@     request.
-  //@@
-  StatisticDuration fail = 2;
-
-  //@@  .. cpp:var:: StatisticDuration queue
-  //@@
-  //@@     The count and cumulative duration that inference requests wait in
-  //@@     scheduling or other queues. The "queue" count and cumulative 
-  //@@     duration includes cache hits.
-  //@@
-  StatisticDuration queue = 3;
-
-  //@@  .. cpp:var:: StatisticDuration compute_input
-  //@@
-  //@@     The count and cumulative duration to prepare input tensor data as
-  //@@     required by the model framework / backend. For example, this duration
-  //@@     should include the time to copy input tensor data to the GPU.
-  //@@     The "compute_input" count and cumulative duration do not account for
-  //@@     requests that were a cache hit. See the "cache_hit" field for more
-  //@@     info.
-  //@@
-  StatisticDuration compute_input = 4;
-
-  //@@  .. cpp:var:: StatisticDuration compute_infer
-  //@@
-  //@@     The count and cumulative duration to execute the model.
-  //@@     The "compute_infer" count and cumulative duration do not account for
-  //@@     requests that were a cache hit. See the "cache_hit" field for more
-  //@@     info.
-  //@@
-  StatisticDuration compute_infer = 5;
-
-  //@@  .. cpp:var:: StatisticDuration compute_output
-  //@@
-  //@@     The count and cumulative duration to extract output tensor data
-  //@@     produced by the model framework / backend. For example, this duration
-  //@@     should include the time to copy output tensor data from the GPU.
-  //@@     The "compute_output" count and cumulative duration do not account for
-  //@@     requests that were a cache hit. See the "cache_hit" field for more
-  //@@     info.
-  //@@
-  StatisticDuration compute_output = 6;
-
-  //@@  .. cpp:var:: StatisticDuration cache_hit
-  //@@
-  //@@     The count of response cache hits and cumulative duration to lookup
-  //@@     and extract output tensor data from the Response Cache on a cache
-  //@@     hit. For example, this duration should include the time to copy
-  //@@     output tensor data from the Response Cache to the response object.
-  //@@     On cache hits, triton does not need to go to the model/backend 
-  //@@     for the output tensor data, so the "compute_input", "compute_infer",
-  //@@     and "compute_output" fields are not updated. Assuming the response
-  //@@     cache is enabled for a given model, a cache hit occurs for a
-  //@@     request to that model when the request metadata (model name,
-  //@@     model version, model inputs) hashes to an existing entry in the
-  //@@     cache. On a cache miss, the request hash and response output tensor
-  //@@     data is added to the cache. See response cache docs for more info:
-  //@@     https://github.com/triton-inference-server/server/blob/main/docs/response_cache.md
-  //@@
-  StatisticDuration cache_hit = 7;
-
-  //@@  .. cpp:var:: StatisticDuration cache_miss
-  //@@
-  //@@     The count of response cache misses and cumulative duration to lookup
-  //@@     and insert output tensor data from the computed response to the cache.
-  //@@     For example, this duration should include the time to copy
-  //@@     output tensor data from the response object to the Response Cache.
-  //@@     Assuming the response cache is enabled for a given model, a cache
-  //@@     miss occurs for a request to that model when the request metadata
-  //@@     does NOT hash to an existing entry in the cache. See the response
-  //@@     cache docs for more info:
-  //@@     https://github.com/triton-inference-server/server/blob/main/docs/response_cache.md
-  //@@
-  StatisticDuration cache_miss = 8;
-}
-
-//@@
-//@@.. cpp:var:: message InferBatchStatistics
-//@@
-//@@   Inference batch statistics.
-//@@
-message InferBatchStatistics
-{
-  //@@  .. cpp:var:: uint64 batch_size
-  //@@
-  //@@     The size of the batch.
-  //@@
-  uint64 batch_size = 1;
-
-  //@@  .. cpp:var:: StatisticDuration compute_input
-  //@@
-  //@@     The count and cumulative duration to prepare input tensor data as
-  //@@     required by the model framework / backend with the given batch size.
-  //@@     For example, this duration should include the time to copy input
-  //@@     tensor data to the GPU.
-  //@@
-  StatisticDuration compute_input = 2;
-
-  //@@  .. cpp:var:: StatisticDuration compute_infer
-  //@@
-  //@@     The count and cumulative duration to execute the model with the given
-  //@@     batch size.
-  //@@
-  StatisticDuration compute_infer = 3;
-
-  //@@  .. cpp:var:: StatisticDuration compute_output
-  //@@
-  //@@     The count and cumulative duration to extract output tensor data
-  //@@     produced by the model framework / backend with the given batch size.
-  //@@     For example, this duration should include the time to copy output
-  //@@     tensor data from the GPU.
-  //@@
-  StatisticDuration compute_output = 4;
-}
-
-//@@
-//@@.. cpp:var:: message ModelStatistics
-//@@
-//@@   Statistics for a specific model and version.
-//@@
-message ModelStatistics
-{
-  //@@  .. cpp:var:: string name
-  //@@
-  //@@     The name of the model. If not given returns statistics for all
-  //@@
-  string name = 1;
-
-  //@@  .. cpp:var:: string version
-  //@@
-  //@@     The version of the model.
-  //@@
-  string version = 2;
-
-  //@@  .. cpp:var:: uint64 last_inference
-  //@@
-  //@@     The timestamp of the last inference request made for this model,
-  //@@     as milliseconds since the epoch.
-  //@@
-  uint64 last_inference = 3;
-
-  //@@  .. cpp:var:: uint64 last_inference
-  //@@
-  //@@     The cumulative count of successful inference requests made for this
-  //@@     model. Each inference in a batched request is counted as an
-  //@@     individual inference. For example, if a client sends a single
-  //@@     inference request with batch size 64, "inference_count" will be
-  //@@     incremented by 64. Similarly, if a clients sends 64 individual
-  //@@     requests each with batch size 1, "inference_count" will be
-  //@@     incremented by 64. The "inference_count" value DOES NOT include
-  //@@     cache hits.
-  //@@
-  uint64 inference_count = 4;
-
-  //@@  .. cpp:var:: uint64 last_inference
-  //@@
-  //@@     The cumulative count of the number of successful inference executions
-  //@@     performed for the model. When dynamic batching is enabled, a single
-  //@@     model execution can perform inferencing for more than one inference
-  //@@     request. For example, if a clients sends 64 individual requests each
-  //@@     with batch size 1 and the dynamic batcher batches them into a single
-  //@@     large batch for model execution then "execution_count" will be
-  //@@     incremented by 1. If, on the other hand, the dynamic batcher is not
-  //@@     enabled for that each of the 64 individual requests is executed
-  //@@     independently, then "execution_count" will be incremented by 64.
-  //@@     The "execution_count" value DOES NOT include cache hits.
-  //@@
-  uint64 execution_count = 5;
-
-  //@@  .. cpp:var:: InferStatistics inference_stats
-  //@@
-  //@@     The aggregate statistics for the model/version.
-  //@@
-  InferStatistics inference_stats = 6;
-
-  //@@  .. cpp:var:: InferBatchStatistics batch_stats (repeated)
-  //@@
-  //@@     The aggregate statistics for each different batch size that is
-  //@@     executed in the model. The batch statistics indicate how many actual
-  //@@     model executions were performed and show differences due to different
-  //@@     batch size (for example, larger batches typically take longer to
-  //@@     compute).
-  //@@
-  repeated InferBatchStatistics batch_stats = 7;
-}
-
-//@@
-//@@.. cpp:var:: message ModelStatisticsResponse
-//@@
-//@@   Response message for ModelStatistics.
-//@@
-message ModelStatisticsResponse
-{
-  //@@  .. cpp:var:: ModelStatistics model_stats (repeated)
-  //@@
-  //@@     Statistics for each requested model.
-  //@@
-  repeated ModelStatistics model_stats = 1;
-}
-
-//@@
-//@@.. cpp:var:: message ModelRepositoryParameter
-//@@
-//@@   An model repository parameter value.
-//@@
-message ModelRepositoryParameter
-{
-  //@@  .. cpp:var:: oneof parameter_choice
-  //@@
-  //@@     The parameter value can be a string, an int64 or
-  //@@     a boolean
-  //@@
-  oneof parameter_choice
-  {
-    //@@    .. cpp:var:: bool bool_param
-    //@@
-    //@@       A boolean parameter value.
-    //@@
-    bool bool_param = 1;
-
-    //@@    .. cpp:var:: int64 int64_param
-    //@@
-    //@@       An int64 parameter value.
-    //@@
-    int64 int64_param = 2;
-
-    //@@    .. cpp:var:: string string_param
-    //@@
-    //@@       A string parameter value.
-    //@@
-    string string_param = 3;
-
-    //@@    .. cpp:var:: bytes bytes_param
-    //@@
-    //@@       A bytes parameter value.
-    //@@
-    bytes bytes_param = 4;
-  }
-}
-
-//@@
-//@@.. cpp:var:: message RepositoryIndexRequest
-//@@
-//@@   Request message for RepositoryIndex.
-//@@
-message RepositoryIndexRequest
-{
-  //@@  .. cpp:var:: string repository_name
-  //@@
-  //@@     The name of the repository. If empty the index is returned
-  //@@     for all repositories.
-  //@@
-  string repository_name = 1;
-
-  //@@  .. cpp:var:: bool ready
-  //@@
-  //@@     If true returned only models currently ready for inferencing.
-  //@@
-  bool ready = 2;
-}
-
-//@@
-//@@.. cpp:var:: message RepositoryIndexResponse
-//@@
-//@@   Response message for RepositoryIndex.
-//@@
-message RepositoryIndexResponse
-{
-  //@@
-  //@@  .. cpp:var:: message ModelIndex
-  //@@
-  //@@     Index entry for a model.
-  //@@
-  message ModelIndex
-  {
-    //@@
-    //@@    .. cpp:var:: string name
-    //@@
-    //@@       The name of the model.
-    //@@
-    string name = 1;
-
-    //@@    .. cpp:var:: string version
-    //@@
-    //@@       The version of the model.
-    //@@
-    string version = 2;
-
-    //@@
-    //@@    .. cpp:var:: string state
-    //@@
-    //@@       The state of the model.
-    //@@
-    string state = 3;
-
-    //@@
-    //@@    .. cpp:var:: string reason
-    //@@
-    //@@       The reason, if any, that the model is in the given state.
-    //@@
-    string reason = 4;
-  }
-
-  //@@
-  //@@  .. cpp:var:: ModelIndex models (repeated)
-  //@@
-  //@@     An index entry for each model.
-  //@@
-  repeated ModelIndex models = 1;
-}
-
-//@@
-//@@.. cpp:var:: message RepositoryModelLoadRequest
-//@@
-//@@   Request message for RepositoryModelLoad.
-//@@
-message RepositoryModelLoadRequest
-{
-  //@@  .. cpp:var:: string repository_name
-  //@@
-  //@@     The name of the repository to load from. If empty the model
-  //@@     is loaded from any repository.
-  //@@
-  string repository_name = 1;
-
-  //@@  .. cpp:var:: string repository_name
-  //@@
-  //@@     The name of the model to load, or reload.
-  //@@
-  string model_name = 2;
-
-  //@@  .. cpp:var:: map<string,ModelRepositoryParameter> parameters
-  //@@
-  //@@     Optional model repository request parameters.
-  //@@
-  map<string, ModelRepositoryParameter> parameters = 3;
-}
-
-//@@
-//@@.. cpp:var:: message RepositoryModelLoadResponse
-//@@
-//@@   Response message for RepositoryModelLoad.
-//@@
-message RepositoryModelLoadResponse {}
-
-//@@
-//@@.. cpp:var:: message RepositoryModelUnloadRequest
-//@@
-//@@   Request message for RepositoryModelUnload.
-//@@
-message RepositoryModelUnloadRequest
-{
-  //@@  .. cpp:var:: string repository_name
-  //@@
-  //@@     The name of the repository from which the model was originally
-  //@@     loaded. If empty the repository is not considered.
-  //@@
-  string repository_name = 1;
-
-  //@@  .. cpp:var:: string repository_name
-  //@@
-  //@@     The name of the model to unload.
-  //@@
-  string model_name = 2;
-
-  //@@  .. cpp:var:: map<string,ModelRepositoryParameter> parameters
-  //@@
-  //@@     Optional model repository request parameters.
-  //@@
-  map<string, ModelRepositoryParameter> parameters = 3;
-}
-
-//@@
-//@@.. cpp:var:: message RepositoryModelUnloadResponse
-//@@
-//@@   Response message for RepositoryModelUnload.
-//@@
-message RepositoryModelUnloadResponse {}
-
-//@@
-//@@.. cpp:var:: message SystemSharedMemoryStatusRequest
-//@@
-//@@   Request message for SystemSharedMemoryStatus.
-//@@
-message SystemSharedMemoryStatusRequest
-{
-  //@@
-  //@@  .. cpp:var:: string name
-  //@@
-  //@@     The name of the region to get status for. If empty the
-  //@@     status is returned for all registered regions.
-  //@@
-  string name = 1;
-}
-
-//@@
-//@@.. cpp:var:: message SystemSharedMemoryStatusResponse
-//@@
-//@@   Response message for SystemSharedMemoryStatus.
-//@@
-message SystemSharedMemoryStatusResponse
-{
-  //@@
-  //@@  .. cpp:var:: message RegionStatus
-  //@@
-  //@@     Status for a shared memory region.
-  //@@
-  message RegionStatus
-  {
-    //@@
-    //@@    .. cpp:var:: string name
-    //@@
-    //@@       The name for the shared memory region.
-    //@@
-    string name = 1;
-
-    //@@    .. cpp:var:: string shared_memory_key
-    //@@
-    //@@       The key of the underlying memory object that contains the
-    //@@       shared memory region.
-    //@@
-    string key = 2;
-
-    //@@    .. cpp:var:: uint64 offset
-    //@@
-    //@@       Offset, in bytes, within the underlying memory object to
-    //@@       the start of the shared memory region.
-    //@@
-    uint64 offset = 3;
-
-    //@@    .. cpp:var:: uint64 byte_size
-    //@@
-    //@@       Size of the shared memory region, in bytes.
-    //@@
-    uint64 byte_size = 4;
-  }
-
-  //@@
-  //@@  .. cpp:var:: map<string,RegionStatus> regions
-  //@@
-  //@@     Status for each of the registered regions, indexed by
-  //@@     region name.
-  //@@
-  map<string, RegionStatus> regions = 1;
-}
-
-//@@
-//@@.. cpp:var:: message SystemSharedMemoryRegisterRequest
-//@@
-//@@   Request message for SystemSharedMemoryRegister.
-//@@
-message SystemSharedMemoryRegisterRequest
-{
-  //@@
-  //@@  .. cpp:var:: string name
-  //@@
-  //@@     The name of the region to register.
-  //@@
-  string name = 1;
-
-  //@@  .. cpp:var:: string shared_memory_key
-  //@@
-  //@@     The key of the underlying memory object that contains the
-  //@@     shared memory region.
-  //@@
-  string key = 2;
-
-  //@@  .. cpp:var:: uint64 offset
-  //@@
-  //@@     Offset, in bytes, within the underlying memory object to
-  //@@     the start of the shared memory region.
-  //@@
-  uint64 offset = 3;
-
-  //@@  .. cpp:var:: uint64 byte_size
-  //@@
-  //@@     Size of the shared memory region, in bytes.
-  //@@
-  uint64 byte_size = 4;
-}
-
-//@@
-//@@.. cpp:var:: message SystemSharedMemoryRegisterResponse
-//@@
-//@@   Response message for SystemSharedMemoryRegister.
-//@@
-message SystemSharedMemoryRegisterResponse {}
-
-//@@
-//@@.. cpp:var:: message SystemSharedMemoryUnregisterRequest
-//@@
-//@@   Request message for SystemSharedMemoryUnregister.
-//@@
-message SystemSharedMemoryUnregisterRequest
-{
-  //@@
-  //@@  .. cpp:var:: string name
-  //@@
-  //@@     The name of the system region to unregister. If empty
-  //@@     all system shared-memory regions are unregistered.
-  //@@
-  string name = 1;
-}
-
-//@@
-//@@.. cpp:var:: message SystemSharedMemoryUnregisterResponse
-//@@
-//@@   Response message for SystemSharedMemoryUnregister.
-//@@
-message SystemSharedMemoryUnregisterResponse {}
-
-//@@
-//@@.. cpp:var:: message CudaSharedMemoryStatusRequest
-//@@
-//@@   Request message for CudaSharedMemoryStatus.
-//@@
-message CudaSharedMemoryStatusRequest
-{
-  //@@
-  //@@  .. cpp:var:: string name
-  //@@
-  //@@     The name of the region to get status for. If empty the
-  //@@     status is returned for all registered regions.
-  //@@
-  string name = 1;
-}
-
-//@@
-//@@.. cpp:var:: message CudaSharedMemoryStatusResponse
-//@@
-//@@   Response message for CudaSharedMemoryStatus.
-//@@
-message CudaSharedMemoryStatusResponse
-{
-  //@@
-  //@@  .. cpp:var:: message RegionStatus
-  //@@
-  //@@     Status for a shared memory region.
-  //@@
-  message RegionStatus
-  {
-    //@@
-    //@@    .. cpp:var:: string name
-    //@@
-    //@@       The name for the shared memory region.
-    //@@
-    string name = 1;
-
-    //@@    .. cpp:var:: uin64 device_id
-    //@@
-    //@@       The GPU device ID where the cudaIPC handle was created.
-    //@@
-    uint64 device_id = 2;
-
-    //@@    .. cpp:var:: uint64 byte_size
-    //@@
-    //@@       Size of the shared memory region, in bytes.
-    //@@
-    uint64 byte_size = 3;
-  }
-
-  //@@
-  //@@  .. cpp:var:: map<string,RegionStatus> regions
-  //@@
-  //@@     Status for each of the registered regions, indexed by
-  //@@     region name.
-  //@@
-  map<string, RegionStatus> regions = 1;
-}
-
-//@@
-//@@.. cpp:var:: message CudaSharedMemoryRegisterRequest
-//@@
-//@@   Request message for CudaSharedMemoryRegister.
-//@@
-message CudaSharedMemoryRegisterRequest
-{
-  //@@
-  //@@  .. cpp:var:: string name
-  //@@
-  //@@     The name of the region to register.
-  //@@
-  string name = 1;
-
-  //@@  .. cpp:var:: bytes raw_handle
-  //@@
-  //@@     The raw serialized cudaIPC handle.
-  //@@
-  bytes raw_handle = 2;
-
-  //@@  .. cpp:var:: int64 device_id
-  //@@
-  //@@     The GPU device ID on which the cudaIPC handle was created.
-  //@@
-  int64 device_id = 3;
-
-  //@@  .. cpp:var:: uint64 byte_size
-  //@@
-  //@@     Size of the shared memory block, in bytes.
-  //@@
-  uint64 byte_size = 4;
-}
-
-//@@
-//@@.. cpp:var:: message CudaSharedMemoryRegisterResponse
-//@@
-//@@   Response message for CudaSharedMemoryRegister.
-//@@
-message CudaSharedMemoryRegisterResponse {}
-
-//@@
-//@@.. cpp:var:: message CudaSharedMemoryUnregisterRequest
-//@@
-//@@   Request message for CudaSharedMemoryUnregister.
-//@@
-message CudaSharedMemoryUnregisterRequest
-{
-  //@@
-  //@@  .. cpp:var:: string name
-  //@@
-  //@@     The name of the cuda region to unregister. If empty
-  //@@     all cuda shared-memory regions are unregistered.
-  //@@
-  string name = 1;
-}
-
-//@@
-//@@.. cpp:var:: message CudaSharedMemoryUnregisterResponse
-//@@
-//@@   Response message for CudaSharedMemoryUnregister.
-//@@
-message CudaSharedMemoryUnregisterResponse {}
-
-//@@
-//@@.. cpp:var:: message TraceSettingRequest
-//@@
-//@@   Request message for TraceSetting.
-//@@
-message TraceSettingRequest
-{
-  //@@
-  //@@  .. cpp:var:: message SettingValue
-  //@@
-  //@@     The values to be associated with a trace setting.
-  //@@     If no value is provided, the setting will be clear and
-  //@@     the global setting value will be used.
-  //@@
-  message SettingValue
-  {
-    //@@
-    //@@    .. cpp:var:: string value (repeated)
-    //@@
-    //@@       The value.
-    //@@
-    repeated string value = 1;
-  }
-
-  //@@  .. cpp:var:: map<string,SettingValue> settings
-  //@@
-  //@@     The new setting values to be updated,
-  //@@     settings that are not specified will remain unchanged.
-  //@@
-  map<string, SettingValue> settings = 1;
-
-  //@@
-  //@@  .. cpp:var:: string model_name
-  //@@
-  //@@     The name of the model to apply the new trace settings.
-  //@@     If not given, the new settings will be applied globally.
-  //@@
-  string model_name = 2;
-}
-
-//@@
-//@@.. cpp:var:: message TraceSettingResponse
-//@@
-//@@   Response message for TraceSetting.
-//@@
-message TraceSettingResponse
-{
-  //@@
-  //@@  .. cpp:var:: message SettingValue
-  //@@
-  //@@     The values to be associated with a trace setting.
-  //@@
-  message SettingValue
-  {
-    //@@
-    //@@    .. cpp:var:: string value (repeated)
-    //@@
-    //@@       The value.
-    //@@
-    repeated string value = 1;
-  }
-
-  //@@  .. cpp:var:: map<string,SettingValue> settings
-  //@@
-  //@@     The current trace settings, including any changes specified
-  //@@     by TraceSettingRequest.
-  //@@
-  map<string, SettingValue> settings = 1;
-}
-
-//@@
-//@@.. cpp:var:: message LogSettingsRequest
-//@@
-//@@   Request message for LogSettings.
-//@@
-message LogSettingsRequest
-{
-  message SettingValue
-  {
-    oneof parameter_choice
-    {
-      //@@    .. cpp:var:: bool bool_param
-      //@@
-      //@@       A boolean parameter value.
-      //@@
-      bool bool_param = 1;
-
-      //@@    .. cpp:var:: uint32 uint32_param
-      //@@
-      //@@       An uint32 parameter value.
-      //@@
-      uint32 uint32_param = 2;
-
-      //@@    .. cpp:var:: string string_param
-      //@@
-      //@@       A string parameter value.
-      //@@
-      string string_param = 3;
-    }
-  }
-  //@@  .. cpp:var:: map<string,SettingValue> settings
-  //@@
-  //@@     The current log settings.
-  //@@
-  map<string, SettingValue> settings = 1;
-}
-
-//@@
-//@@.. cpp:var:: message LogSettingsResponse
-//@@
-//@@   Response message for LogSettings.
-//@@
-message LogSettingsResponse
-{
-  message SettingValue
-  {
-    oneof parameter_choice
-    {
-      //@@    .. cpp:var:: bool bool_param
-      //@@
-      //@@       A boolean parameter value.
-      //@@
-      bool bool_param = 1;
-
-      //@@    .. cpp:var:: uint32 uint32_param
-      //@@
-      //@@       An int32 parameter value.
-      //@@
-      uint32 uint32_param = 2;
-
-      //@@    .. cpp:var:: string string_param
-      //@@
-      //@@       A string parameter value.
-      //@@
-      string string_param = 3;
-    }
-  }
-  //@@  .. cpp:var:: map<string,SettingValue> settings
-  //@@
-  //@@     The current log settings.
-  //@@
-  map<string, SettingValue> settings = 1;
-}
-
diff --git a/3rdparty/common-r22.12/protobuf/model_config.proto b/3rdparty/common-r22.12/protobuf/model_config.proto
deleted file mode 100644
index b3c5a7b25c792e88e5cc8564d47dd6c048674e46..0000000000000000000000000000000000000000
--- a/3rdparty/common-r22.12/protobuf/model_config.proto
+++ /dev/null
@@ -1,1981 +0,0 @@
-// Copyright 2018-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-// Copyright (c) 2018, TensorFlow Authors. All rights reserved.
-
-syntax = "proto3";
-
-package inference;
-
-//@@.. cpp:namespace:: inference
-
-//@@
-//@@.. cpp:enum:: DataType
-//@@
-//@@   Data types supported for input and output tensors.
-//@@
-enum DataType {
-  //@@  .. cpp:enumerator:: DataType::INVALID = 0
-  TYPE_INVALID = 0;
-
-  //@@  .. cpp:enumerator:: DataType::BOOL = 1
-  TYPE_BOOL = 1;
-
-  //@@  .. cpp:enumerator:: DataType::UINT8 = 2
-  TYPE_UINT8 = 2;
-  //@@  .. cpp:enumerator:: DataType::UINT16 = 3
-  TYPE_UINT16 = 3;
-  //@@  .. cpp:enumerator:: DataType::UINT32 = 4
-  TYPE_UINT32 = 4;
-  //@@  .. cpp:enumerator:: DataType::UINT64 = 5
-  TYPE_UINT64 = 5;
-
-  //@@  .. cpp:enumerator:: DataType::INT8 = 6
-  TYPE_INT8 = 6;
-  //@@  .. cpp:enumerator:: DataType::INT16 = 7
-  TYPE_INT16 = 7;
-  //@@  .. cpp:enumerator:: DataType::INT32 = 8
-  TYPE_INT32 = 8;
-  //@@  .. cpp:enumerator:: DataType::INT64 = 9
-  TYPE_INT64 = 9;
-
-  //@@  .. cpp:enumerator:: DataType::FP16 = 10
-  TYPE_FP16 = 10;
-  //@@  .. cpp:enumerator:: DataType::FP32 = 11
-  TYPE_FP32 = 11;
-  //@@  .. cpp:enumerator:: DataType::FP64 = 12
-  TYPE_FP64 = 12;
-
-  //@@  .. cpp:enumerator:: DataType::STRING = 13
-  TYPE_STRING = 13;
-
-  //@@  .. cpp:enumerator:: DataType::BF16 = 14
-  TYPE_BF16 = 14;
-}
-
-//@@
-//@@  .. cpp:var:: message ModelRateLimiter
-//@@
-//@@     The specifications required by the rate limiter to properly
-//@@     schedule the inference requests across the different models
-//@@     and their instances.
-//@@
-message ModelRateLimiter
-{
-  //@@  .. cpp:var:: message Resource
-  //@@
-  //@@     The resource property.
-  //@@
-  message Resource
-  {
-    //@@  .. cpp:var:: string name
-    //@@
-    //@@     The name associated with the resource.
-    //@@
-    string name = 1;
-
-    //@@  .. cpp:var:: bool global
-    //@@
-    //@@     Whether or not the resource is global. If true then the resource
-    //@@     is assumed to be shared among the devices otherwise specified
-    //@@     count of the resource is assumed for each device associated
-    //@@     with the instance.
-    //@@
-    bool global = 2;
-
-    //@@  .. cpp:var:: uint32 count
-    //@@
-    //@@     The number of resources required for the execution of the model
-    //@@     instance.
-    //@@
-    uint32 count = 3;
-  }
-
-  //@@  .. cpp:var:: Resource resources (repeated)
-  //@@
-  //@@     The resources required to execute the request on a model instance.
-  //@@     Resources are just names with a corresponding count. The execution
-  //@@     of the instance will be blocked until the specificied resources are
-  //@@     available. By default an instance uses no rate-limiter resources.
-  //@@
-  repeated Resource resources = 1;
-
-  //@@  .. cpp:var:: uint32 priority
-  //@@
-  //@@     The optional weighting value to be used for prioritizing across
-  //@@     instances. An instance with priority 2 will be given 1/2 the
-  //@@     number of scheduling chances as an instance_group with priority
-  //@@     1. The default priority is 1. The priority of value 0 will be
-  //@@     treated as priority 1.
-  //@@
-  uint32 priority = 2;
-}
-
-//@@
-//@@.. cpp:var:: message ModelInstanceGroup
-//@@
-//@@   A group of one or more instances of a model and resources made
-//@@   available for those instances.
-//@@
-message ModelInstanceGroup
-{
-  //@@
-  //@@  .. cpp:enum:: Kind
-  //@@
-  //@@     Kind of this instance group.
-  //@@
-  enum Kind {
-    //@@    .. cpp:enumerator:: Kind::KIND_AUTO = 0
-    //@@
-    //@@       This instance group represents instances that can run on either
-    //@@       CPU or GPU. If all GPUs listed in 'gpus' are available then
-    //@@       instances will be created on GPU(s), otherwise instances will
-    //@@       be created on CPU.
-    //@@
-    KIND_AUTO = 0;
-
-    //@@    .. cpp:enumerator:: Kind::KIND_GPU = 1
-    //@@
-    //@@       This instance group represents instances that must run on the
-    //@@       GPU.
-    //@@
-    KIND_GPU = 1;
-
-    //@@    .. cpp:enumerator:: Kind::KIND_CPU = 2
-    //@@
-    //@@       This instance group represents instances that must run on the
-    //@@       CPU.
-    //@@
-    KIND_CPU = 2;
-
-    //@@    .. cpp:enumerator:: Kind::KIND_MODEL = 3
-    //@@
-    //@@       This instance group represents instances that should run on the
-    //@@       CPU and/or GPU(s) as specified by the model or backend itself.
-    //@@       The inference server will not override the model/backend
-    //@@       settings.
-    //@@
-    KIND_MODEL = 3;
-  }
-
-  //@@
-  //@@  .. cpp:var:: message SecondaryDevice
-  //@@
-  //@@     A secondary device required for a model instance.
-  //@@
-  message SecondaryDevice
-  {
-    //@@
-    //@@  .. cpp:enum:: SecondaryDeviceKind
-    //@@
-    //@@     The kind of the secondary device.
-    //@@
-    enum SecondaryDeviceKind {
-      //@@    .. cpp:enumerator:: SecondaryDeviceKind::KIND_NVDLA = 0
-      //@@
-      //@@       An NVDLA core. http://nvdla.org
-      //@@       Currently KIND_NVDLA is only supported by the TensorRT backend.
-      //@@
-      KIND_NVDLA = 0;
-    }
-
-    //@@  .. cpp:var:: SecondaryDeviceKind kind
-    //@@
-    //@@     The secondary device kind.
-    //@@
-    SecondaryDeviceKind kind = 1;
-
-    //@@  .. cpp:var:: int64 device_id
-    //@@
-    //@@     Identifier for the secondary device.
-    //@@
-    int64 device_id = 2;
-  }
-
-  //@@  .. cpp:var:: string name
-  //@@
-  //@@     Optional name of this group of instances. If not specified the
-  //@@     name will be formed as <model name>_<group number>. The name of
-  //@@     individual instances will be further formed by a unique instance
-  //@@     number and GPU index:
-  //@@
-  string name = 1;
-
-  //@@  .. cpp:var:: Kind kind
-  //@@
-  //@@     The kind of this instance group. Default is KIND_AUTO. If
-  //@@     KIND_AUTO or KIND_GPU then both 'count' and 'gpu' are valid and
-  //@@     may be specified. If KIND_CPU or KIND_MODEL only 'count' is valid
-  //@@     and 'gpu' cannot be specified.
-  //@@
-  Kind kind = 4;
-
-  //@@  .. cpp:var:: int32 count
-  //@@
-  //@@     For a group assigned to GPU, the number of instances created for
-  //@@     each GPU listed in 'gpus'. For a group assigned to CPU the number
-  //@@     of instances created. Default is 1.
-  int32 count = 2;
-
-  //@@  .. cpp:var:: ModelRateLimiter rate_limiter
-  //@@
-  //@@     The rate limiter specific settings to be associated with this
-  //@@     instance group. Optional, if not specified no rate limiting
-  //@@     will be applied to this instance group.
-  //@@
-  ModelRateLimiter rate_limiter = 6;
-
-  //@@  .. cpp:var:: int32 gpus (repeated)
-  //@@
-  //@@     GPU(s) where instances should be available. For each GPU listed,
-  //@@     'count' instances of the model will be available. Setting 'gpus'
-  //@@     to empty (or not specifying at all) is eqivalent to listing all
-  //@@     available GPUs.
-  //@@
-  repeated int32 gpus = 3;
-
-  //@@  .. cpp:var:: SecondaryDevice secondary_devices (repeated)
-  //@@
-  //@@     Secondary devices that are required by instances specified by this
-  //@@     instance group. Optional.
-  //@@
-  repeated SecondaryDevice secondary_devices = 8;
-
-  //@@  .. cpp:var:: string profile (repeated)
-  //@@
-  //@@     For TensorRT models containing multiple optimization profile, this
-  //@@     parameter specifies a set of optimization profiles available to this
-  //@@     instance group. The inference server will choose the optimal profile
-  //@@     based on the shapes of the input tensors. This field should lie
-  //@@     between 0 and <TotalNumberOfOptimizationProfilesInPlanModel> - 1
-  //@@     and be specified only for TensorRT backend, otherwise an error will
-  //@@     be generated. If not specified, the server will select the first
-  //@@     optimization profile by default.
-  //@@
-  repeated string profile = 5;
-
-  //@@  .. cpp:var:: bool passive
-  //@@
-  //@@     Whether the instances within this instance group will be accepting
-  //@@     inference requests from the scheduler. If true, the instances will
-  //@@     not be added to the scheduler. Default value is false.
-  //@@
-  bool passive = 7;
-
-  //@@  .. cpp:var:: string host_policy
-  //@@
-  //@@     The host policy name that the instance to be associated with.
-  //@@     The default value is set to reflect the device kind of the instance,
-  //@@     for instance, KIND_CPU is "cpu", KIND_MODEL is "model" and
-  //@@     KIND_GPU is "gpu_<gpu_id>".
-  //@@
-  string host_policy = 9;
-}
-
-//@@
-//@@.. cpp:var:: message ModelTensorReshape
-//@@
-//@@   Reshape specification for input and output tensors.
-//@@
-message ModelTensorReshape
-{
-  //@@  .. cpp:var:: int64 shape (repeated)
-  //@@
-  //@@     The shape to use for reshaping.
-  //@@
-  repeated int64 shape = 1;
-}
-
-//@@
-//@@.. cpp:var:: message ModelInput
-//@@
-//@@   An input required by the model.
-//@@
-message ModelInput
-{
-  //@@
-  //@@  .. cpp:enum:: Format
-  //@@
-  //@@     The format for the input.
-  //@@
-  enum Format {
-    //@@    .. cpp:enumerator:: Format::FORMAT_NONE = 0
-    //@@
-    //@@       The input has no specific format. This is the default.
-    //@@
-    FORMAT_NONE = 0;
-
-    //@@    .. cpp:enumerator:: Format::FORMAT_NHWC = 1
-    //@@
-    //@@       HWC image format. Tensors with this format require 3 dimensions
-    //@@       if the model does not support batching (max_batch_size = 0) or 4
-    //@@       dimensions if the model does support batching (max_batch_size
-    //@@       >= 1). In either case the 'dims' below should only specify the
-    //@@       3 non-batch dimensions (i.e. HWC or CHW).
-    //@@
-    FORMAT_NHWC = 1;
-
-    //@@    .. cpp:enumerator:: Format::FORMAT_NCHW = 2
-    //@@
-    //@@       CHW image format. Tensors with this format require 3 dimensions
-    //@@       if the model does not support batching (max_batch_size = 0) or 4
-    //@@       dimensions if the model does support batching (max_batch_size
-    //@@       >= 1). In either case the 'dims' below should only specify the
-    //@@       3 non-batch dimensions (i.e. HWC or CHW).
-    //@@
-    FORMAT_NCHW = 2;
-  }
-
-  //@@  .. cpp:var:: string name
-  //@@
-  //@@     The name of the input.
-  //@@
-  string name = 1;
-
-  //@@  .. cpp:var:: DataType data_type
-  //@@
-  //@@     The data-type of the input.
-  //@@
-  DataType data_type = 2;
-
-  //@@  .. cpp:var:: Format format
-  //@@
-  //@@     The format of the input. Optional.
-  //@@
-  Format format = 3;
-
-  //@@  .. cpp:var:: int64 dims (repeated)
-  //@@
-  //@@     The dimensions/shape of the input tensor that must be provided
-  //@@     when invoking the inference API for this model.
-  //@@
-  repeated int64 dims = 4;
-
-  //@@  .. cpp:var:: ModelTensorReshape reshape
-  //@@
-  //@@     The shape expected for this input by the backend. The input will
-  //@@     be reshaped to this before being presented to the backend. The
-  //@@     reshape must have the same number of elements as the input shape
-  //@@     specified by 'dims'. Optional.
-  //@@
-  ModelTensorReshape reshape = 5;
-
-  //@@  .. cpp:var:: bool is_shape_tensor
-  //@@
-  //@@     Whether or not the input is a shape tensor to the model. This field
-  //@@     is currently supported only for the TensorRT model. An error will be
-  //@@     generated if this specification does not comply with underlying
-  //@@     model.
-  //@@
-  bool is_shape_tensor = 6;
-
-  //@@  .. cpp:var:: bool allow_ragged_batch
-  //@@
-  //@@     Whether or not the input is allowed to be "ragged" in a dynamically
-  //@@     created batch. Default is false indicating that two requests will
-  //@@     only be batched if this tensor has the same shape in both requests.
-  //@@     True indicates that two requests can be batched even if this tensor
-  //@@     has a different shape in each request.
-  //@@
-  bool allow_ragged_batch = 7;
-
-  //@@  .. cpp:var:: bool optional
-  //@@
-  //@@     Whether or not the input is optional for the model execution.
-  //@@     If true, the input is not required in the inference request.
-  //@@     Default value is false.
-  //@@
-  bool optional = 8;
-}
-
-//@@
-//@@.. cpp:var:: message ModelOutput
-//@@
-//@@   An output produced by the model.
-//@@
-message ModelOutput
-{
-  //@@  .. cpp:var:: string name
-  //@@
-  //@@     The name of the output.
-  //@@
-  string name = 1;
-
-  //@@  .. cpp:var:: DataType data_type
-  //@@
-  //@@     The data-type of the output.
-  //@@
-  DataType data_type = 2;
-
-  //@@  .. cpp:var:: int64 dims (repeated)
-  //@@
-  //@@     The dimensions/shape of the output tensor.
-  //@@
-  repeated int64 dims = 3;
-
-  //@@  .. cpp:var:: ModelTensorReshape reshape
-  //@@
-  //@@     The shape produced for this output by the backend. The output will
-  //@@     be reshaped from this to the shape specifed in 'dims' before being
-  //@@     returned in the inference response. The reshape must have the same
-  //@@     number of elements as the output shape specified by 'dims'. Optional.
-  //@@
-  ModelTensorReshape reshape = 5;
-
-  //@@  .. cpp:var:: string label_filename
-  //@@
-  //@@     The label file associated with this output. Should be specified only
-  //@@     for outputs that represent classifications. Optional.
-  //@@
-  string label_filename = 4;
-
-
-  //@@  .. cpp:var:: bool is_shape_tensor
-  //@@
-  //@@     Whether or not the output is a shape tensor to the model. This field
-  //@@     is currently supported only for the TensorRT model. An error will be
-  //@@     generated if this specification does not comply with underlying
-  //@@     model.
-  //@@
-  bool is_shape_tensor = 6;
-}
-
-//@@  .. cpp:var:: message BatchInput
-//@@
-//@@     A batch input is an additional input that must be added by
-//@@     the backend based on all the requests in a batch.
-//@@
-message BatchInput
-{
-  //@@
-  //@@    .. cpp:enum:: Kind
-  //@@
-  //@@       The kind of the batch input.
-  //@@
-  enum Kind {
-    //@@      .. cpp:enumerator:: Kind::BATCH_ELEMENT_COUNT = 0
-    //@@
-    //@@         The element count of the 'source_input' will be added as
-    //@@         input with shape [1].
-    //@@
-    BATCH_ELEMENT_COUNT = 0;
-
-    //@@      .. cpp:enumerator:: Kind::BATCH_ACCUMULATED_ELEMENT_COUNT = 1
-    //@@
-    //@@         The accumulated element count of the 'source_input' will be
-    //@@         added as input with shape [1]. For example, if there is a
-    //@@         batch of two request, each with 2 elements, an input of value
-    //@@         2 will be added to the first request, and an input of value
-    //@@         4 will be added to the second request.
-    //@@
-    BATCH_ACCUMULATED_ELEMENT_COUNT = 1;
-
-    //@@      .. cpp:enumerator::
-    //@@         Kind::BATCH_ACCUMULATED_ELEMENT_COUNT_WITH_ZERO = 2
-    //@@
-    //@@         The accumulated element count of the 'source_input' will be
-    //@@         added as input with shape [1], except for the first request
-    //@@         in the batch. For the first request in the batch, the input
-    //@@         will have shape [2] where the first element is value 0.
-    //@@
-    BATCH_ACCUMULATED_ELEMENT_COUNT_WITH_ZERO = 2;
-
-    //@@      .. cpp:enumerator:: Kind::BATCH_MAX_ELEMENT_COUNT_AS_SHAPE = 3
-    //@@
-    //@@         Among the requests in the batch, the max element count of the
-    //@@         'source_input' will be added as input with shape
-    //@@         [max_element_count] for the first request in the batch.
-    //@@         For other requests, such input will be with shape [0].
-    //@@         The data of the tensor will be uninitialized.
-    //@@
-    BATCH_MAX_ELEMENT_COUNT_AS_SHAPE = 3;
-
-    //@@      .. cpp:enumerator:: Kind::BATCH_ITEM_SHAPE = 4
-    //@@
-    //@@         Among the requests in the batch, the shape of the
-    //@@         'source_input' will be added as input with shape
-    //@@         [batch_size, len(input_dim)]. For example, if one
-    //@@         batch-2 input with shape [3, 1] and batch-1 input
-    //@@         with shape [2, 2] are batched, the batch input will
-    //@@         have shape [3, 2] and value [ [3, 1], [3, 1], [2, 2]].
-    //@@
-    BATCH_ITEM_SHAPE = 4;
-
-    //@@      .. cpp:enumerator:: Kind::BATCH_ITEM_SHAPE_FLATTEN = 5
-    //@@
-    //@@         Among the requests in the batch, the shape of the
-    //@@         'source_input' will be added as input with single dimensional
-    //@@         shape [batch_size * len(input_dim)]. For example, if one
-    //@@         batch-2 input with shape [3, 1] and batch-1 input
-    //@@         with shape [2, 2] are batched, the batch input will
-    //@@         have shape [6] and value [3, 1, 3, 1, 2, 2].
-    //@@
-    BATCH_ITEM_SHAPE_FLATTEN = 5;
-  }
-
-  //@@    .. cpp:var:: Kind kind
-  //@@
-  //@@       The kind of this batch input.
-  //@@
-  Kind kind = 1;
-
-  //@@    .. cpp:var:: string target_name (repeated)
-  //@@
-  //@@       The name of the model inputs that the backend will create
-  //@@       for this batch input.
-  //@@
-  repeated string target_name = 2;
-
-  //@@    .. cpp:var:: DataType data_type
-  //@@
-  //@@       The input's datatype. The data type can be TYPE_INT32 or
-  //@@       TYPE_FP32.
-  //@@
-  DataType data_type = 3;
-
-  //@@    .. cpp:var:: string source_input (repeated)
-  //@@
-  //@@       The backend derives the value for each batch input from one or
-  //@@       more other inputs. 'source_input' gives the names of those
-  //@@       inputs.
-  //@@
-  repeated string source_input = 4;
-}
-
-//@@.. cpp:var:: message BatchOutput
-//@@
-//@@   A batch output is an output produced by the model that must be handled
-//@@   differently by the backend based on all the requests in a batch.
-//@@
-message BatchOutput
-{
-  //@@
-  //@@  .. cpp:enum:: Kind
-  //@@
-  //@@     The kind of the batch output.
-  //@@
-  enum Kind {
-    //@@    .. cpp:enumerator:: Kind::BATCH_SCATTER_WITH_INPUT_SHAPE = 0
-    //@@
-    //@@       The output should be scattered according to the shape of
-    //@@       'source_input'. The dynamic dimension of the output will
-    //@@       be set to the value of the same dimension in the input.
-    //@@
-    BATCH_SCATTER_WITH_INPUT_SHAPE = 0;
-  }
-
-  //@@  .. cpp:var:: string target_name (repeated)
-  //@@
-  //@@     The name of the outputs to be produced by this batch output
-  //@@     specification.
-  //@@
-  repeated string target_name = 1;
-
-  //@@  .. cpp:var:: Kind kind
-  //@@
-  //@@     The kind of this batch output.
-  //@@
-  Kind kind = 2;
-
-  //@@  .. cpp:var:: string source_input (repeated)
-  //@@
-  //@@     The backend derives each batch output from one or more inputs.
-  //@@     'source_input' gives the names of those inputs.
-  //@@
-  repeated string source_input = 3;
-}
-
-//@@
-//@@.. cpp:var:: message ModelVersionPolicy
-//@@
-//@@   Policy indicating which versions of a model should be made
-//@@   available by the inference server.
-//@@
-message ModelVersionPolicy
-{
-  //@@  .. cpp:var:: message Latest
-  //@@
-  //@@     Serve only the latest version(s) of a model. This is
-  //@@     the default policy.
-  //@@
-  message Latest
-  {
-    //@@    .. cpp:var:: uint32 num_versions
-    //@@
-    //@@       Serve only the 'num_versions' highest-numbered versions. T
-    //@@       The default value of 'num_versions' is 1, indicating that by
-    //@@       default only the single highest-number version of a
-    //@@       model will be served.
-    //@@
-    uint32 num_versions = 1;
-  }
-
-  //@@  .. cpp:var:: message All
-  //@@
-  //@@     Serve all versions of the model.
-  //@@
-  message All {}
-
-  //@@  .. cpp:var:: message Specific
-  //@@
-  //@@     Serve only specific versions of the model.
-  //@@
-  message Specific
-  {
-    //@@    .. cpp:var:: int64 versions (repeated)
-    //@@
-    //@@       The specific versions of the model that will be served.
-    //@@
-    repeated int64 versions = 1;
-  }
-
-  //@@  .. cpp:var:: oneof policy_choice
-  //@@
-  //@@     Each model must implement only a single version policy. The
-  //@@     default policy is 'Latest'.
-  //@@
-  oneof policy_choice
-  {
-    //@@    .. cpp:var:: Latest latest
-    //@@
-    //@@       Serve only latest version(s) of the model.
-    //@@
-    Latest latest = 1;
-
-    //@@    .. cpp:var:: All all
-    //@@
-    //@@       Serve all versions of the model.
-    //@@
-    All all = 2;
-
-    //@@    .. cpp:var:: Specific specific
-    //@@
-    //@@       Serve only specific version(s) of the model.
-    //@@
-    Specific specific = 3;
-  }
-}
-
-//@@
-//@@.. cpp:var:: message ModelOptimizationPolicy
-//@@
-//@@   Optimization settings for a model. These settings control if/how a
-//@@   model is optimized and prioritized by the backend framework when
-//@@   it is loaded.
-//@@
-message ModelOptimizationPolicy
-{
-  //@@
-  //@@  .. cpp:var:: message Graph
-  //@@
-  //@@     Enable generic graph optimization of the model. If not specified
-  //@@     the framework's default level of optimization is used. Supports
-  //@@     TensorFlow graphdef and savedmodel and Onnx models. For TensorFlow
-  //@@     causes XLA to be enabled/disabled for the model. For Onnx defaults
-  //@@     to enabling all optimizations, -1 enables only basic optimizations,
-  //@@     +1 enables only basic and extended optimizations.
-  //@@
-  message Graph
-  {
-    //@@    .. cpp:var:: int32 level
-    //@@
-    //@@       The optimization level. Defaults to 0 (zero) if not specified.
-    //@@
-    //@@         - -1: Disabled
-    //@@         -  0: Framework default
-    //@@         -  1+: Enable optimization level (greater values indicate
-    //@@            higher optimization levels)
-    //@@
-    int32 level = 1;
-  }
-
-  //@@
-  //@@  .. cpp:enum:: ModelPriority
-  //@@
-  //@@     Model priorities. A model will be given scheduling and execution
-  //@@     preference over models at lower priorities. Current model
-  //@@     priorities only work for TensorRT models.
-  //@@
-  enum ModelPriority {
-    //@@    .. cpp:enumerator:: ModelPriority::PRIORITY_DEFAULT = 0
-    //@@
-    //@@       The default model priority.
-    //@@
-    PRIORITY_DEFAULT = 0;
-
-    //@@    .. cpp:enumerator:: ModelPriority::PRIORITY_MAX = 1
-    //@@
-    //@@       The maximum model priority.
-    //@@
-    PRIORITY_MAX = 1;
-
-    //@@    .. cpp:enumerator:: ModelPriority::PRIORITY_MIN = 2
-    //@@
-    //@@       The minimum model priority.
-    //@@
-    PRIORITY_MIN = 2;
-  }
-
-  //@@
-  //@@  .. cpp:var:: message Cuda
-  //@@
-  //@@     CUDA-specific optimization settings.
-  //@@
-  message Cuda
-  {
-    //@@    .. cpp:var:: message GraphSpec
-    //@@
-    //@@       Specification of the CUDA graph to be captured.
-    //@@
-    message GraphSpec
-    {
-      //@@      .. cpp:var:: message Dims
-      //@@
-      //@@         Specification of tensor dimension.
-      //@@
-      message Shape
-      {
-        //@@        .. cpp:var:: int64 dim (repeated)
-        //@@
-        //@@           The dimension.
-        //@@
-        repeated int64 dim = 1;
-      }
-
-      message LowerBound
-      {
-        //@@      .. cpp:var:: int32 batch_size
-        //@@
-        //@@         The batch size of the CUDA graph. If 'max_batch_size' is 0,
-        //@@         'batch_size' must be set to 0. Otherwise, 'batch_size' must
-        //@@         be set to value between 1 and 'max_batch_size'.
-        //@@
-        int32 batch_size = 1;
-
-        //@@      .. cpp:var:: map<string, Shape> input
-        //@@
-        //@@         The specification of the inputs. 'Shape' is the shape of
-        //@@         the input without batching dimension.
-        //@@
-        map<string, Shape> input = 2;
-      }
-
-      //@@      .. cpp:var:: int32 batch_size
-      //@@
-      //@@         The batch size of the CUDA graph. If 'max_batch_size' is 0,
-      //@@         'batch_size' must be set to 0. Otherwise, 'batch_size' must
-      //@@         be set to value between 1 and 'max_batch_size'.
-      //@@
-      int32 batch_size = 1;
-
-      //@@      .. cpp:var:: map<string, Shape> input
-      //@@
-      //@@         The specification of the inputs. 'Shape' is the shape of the
-      //@@         input without batching dimension.
-      //@@
-      map<string, Shape> input = 2;
-
-      //@@      .. cpp:var:: LowerBound graph_lower_bound
-      //@@
-      //@@         Specify the lower bound of the CUDA graph. Optional.
-      //@@         If specified, the graph can be used for input shapes and
-      //@@         batch sizes that are in closed interval between the lower
-      //@@         bound specification and graph specification. For dynamic
-      //@@         shape model, this allows CUDA graphs to be launched
-      //@@         frequently without capturing all possible shape combinations.
-      //@@         However, using graph for shape combinations different from
-      //@@         the one used for capturing introduces uninitialized data for
-      //@@         execution and it may distort the inference result if
-      //@@         the model is sensitive to uninitialized data.
-      //@@
-      LowerBound graph_lower_bound = 3;
-    }
-
-    //@@    .. cpp:var:: bool graphs
-    //@@
-    //@@       Use CUDA graphs API to capture model operations and execute
-    //@@       them more efficiently. Default value is false.
-    //@@       Currently only recognized by TensorRT backend.
-    //@@
-    bool graphs = 1;
-
-    //@@    .. cpp:var:: bool busy_wait_events
-    //@@
-    //@@       Use busy-waiting to synchronize CUDA events to achieve minimum
-    //@@       latency from event complete to host thread to be notified, with
-    //@@       the cost of high CPU load. Default value is false.
-    //@@       Currently only recognized by TensorRT backend.
-    //@@
-    bool busy_wait_events = 2;
-
-    //@@    .. cpp:var:: GraphSpec graph_spec (repeated)
-    //@@
-    //@@       Specification of the CUDA graph to be captured. If not specified
-    //@@       and 'graphs' is true, the default CUDA graphs will be captured
-    //@@       based on model settings.
-    //@@       Currently only recognized by TensorRT backend.
-    //@@
-    repeated GraphSpec graph_spec = 3;
-
-    //@@    .. cpp:var:: bool output_copy_stream
-    //@@
-    //@@       Uses a CUDA stream separate from the inference stream to copy the
-    //@@       output to host. However, be aware that setting this option to
-    //@@       true will lead to an increase in the memory consumption of the
-    //@@       model as Triton will allocate twice as much GPU memory for its
-    //@@       I/O tensor buffers. Default value is false.
-    //@@       Currently only recognized by TensorRT backend.
-    //@@
-    bool output_copy_stream = 4;
-  }
-
-  //@@
-  //@@  .. cpp:var:: message ExecutionAccelerators
-  //@@
-  //@@     Specify the preferred execution accelerators to be used to execute
-  //@@     the model. Currently only recognized by ONNX Runtime backend and
-  //@@     TensorFlow backend.
-  //@@
-  //@@     For ONNX Runtime backend, it will deploy the model with the execution
-  //@@     accelerators by priority, the priority is determined based on the
-  //@@     order that they are set, i.e. the provider at the front has highest
-  //@@     priority. Overall, the priority will be in the following order:
-  //@@         <gpu_execution_accelerator> (if instance is on GPU)
-  //@@         CUDA Execution Provider     (if instance is on GPU)
-  //@@         <cpu_execution_accelerator>
-  //@@         Default CPU Execution Provider
-  //@@
-  message ExecutionAccelerators
-  {
-    //@@
-    //@@  .. cpp:var:: message Accelerator
-    //@@
-    //@@     Specify the accelerator to be used to execute the model.
-    //@@     Accelerator with the same name may accept different parameters
-    //@@     depending on the backends.
-    //@@
-    message Accelerator
-    {
-      //@@    .. cpp:var:: string name
-      //@@
-      //@@       The name of the execution accelerator.
-      //@@
-      string name = 1;
-
-      //@@    .. cpp:var:: map<string, string> parameters
-      //@@
-      //@@       Additional paremeters used to configure the accelerator.
-      //@@
-      map<string, string> parameters = 2;
-    }
-
-    //@@    .. cpp:var:: Accelerator gpu_execution_accelerator (repeated)
-    //@@
-    //@@       The preferred execution provider to be used if the model instance
-    //@@       is deployed on GPU.
-    //@@
-    //@@       For ONNX Runtime backend, possible value is "tensorrt" as name,
-    //@@       and no parameters are required.
-    //@@
-    //@@       For TensorFlow backend, possible values are "tensorrt",
-    //@@       "auto_mixed_precision", "gpu_io".
-    //@@
-    //@@       For "tensorrt", the following parameters can be specified:
-    //@@         "precision_mode": The precision used for optimization.
-    //@@         Allowed values are "FP32" and "FP16". Default value is "FP32".
-    //@@
-    //@@         "max_cached_engines": The maximum number of cached TensorRT
-    //@@         engines in dynamic TensorRT ops. Default value is 100.
-    //@@
-    //@@         "minimum_segment_size": The smallest model subgraph that will
-    //@@         be considered for optimization by TensorRT. Default value is 3.
-    //@@
-    //@@         "max_workspace_size_bytes": The maximum GPU memory the model
-    //@@         can use temporarily during execution. Default value is 1GB.
-    //@@
-    //@@       For "auto_mixed_precision", no parameters are required. If set,
-    //@@       the model will try to use FP16 for better performance.
-    //@@       This optimization can not be set with "tensorrt".
-    //@@
-    //@@       For "gpu_io", no parameters are required. If set, the model will
-    //@@       be executed using TensorFlow Callable API to set input and output
-    //@@       tensors in GPU memory if possible, which can reduce data transfer
-    //@@       overhead if the model is used in ensemble. However, the Callable
-    //@@       object will be created on model creation and it will request all
-    //@@       outputs for every model execution, which may impact the
-    //@@       performance if a request does not require all outputs. This
-    //@@       optimization will only take affect if the model instance is
-    //@@       created with KIND_GPU.
-    //@@
-    repeated Accelerator gpu_execution_accelerator = 1;
-
-    //@@    .. cpp:var:: Accelerator cpu_execution_accelerator (repeated)
-    //@@
-    //@@       The preferred execution provider to be used if the model instance
-    //@@       is deployed on CPU.
-    //@@
-    //@@       For ONNX Runtime backend, possible value is "openvino" as name,
-    //@@       and no parameters are required.
-    //@@
-    repeated Accelerator cpu_execution_accelerator = 2;
-  }
-
-  //@@
-  //@@  .. cpp:var:: message PinnedMemoryBuffer
-  //@@
-  //@@     Specify whether to use a pinned memory buffer when transferring data
-  //@@     between non-pinned system memory and GPU memory. Using a pinned
-  //@@     memory buffer for system from/to GPU transfers will typically provide
-  //@@     increased performance. For example, in the common use case where the
-  //@@     request provides inputs and delivers outputs via non-pinned system
-  //@@     memory, if the model instance accepts GPU IOs, the inputs will be
-  //@@     processed by two copies: from non-pinned system memory to pinned
-  //@@     memory, and from pinned memory to GPU memory. Similarly, pinned
-  //@@     memory will be used for delivering the outputs.
-  //@@
-  message PinnedMemoryBuffer
-  {
-    //@@    .. cpp:var:: bool enable
-    //@@
-    //@@       Use pinned memory buffer. Default is true.
-    //@@
-    bool enable = 1;
-  }
-
-  //@@  .. cpp:var:: Graph graph
-  //@@
-  //@@     The graph optimization setting for the model. Optional.
-  //@@
-  Graph graph = 1;
-
-  //@@  .. cpp:var:: ModelPriority priority
-  //@@
-  //@@     The priority setting for the model. Optional.
-  //@@
-  ModelPriority priority = 2;
-
-  //@@  .. cpp:var:: Cuda cuda
-  //@@
-  //@@     CUDA-specific optimization settings. Optional.
-  //@@
-  Cuda cuda = 3;
-
-  //@@  .. cpp:var:: ExecutionAccelerators execution_accelerators
-  //@@
-  //@@     The accelerators used for the model. Optional.
-  //@@
-  ExecutionAccelerators execution_accelerators = 4;
-
-  //@@  .. cpp:var:: PinnedMemoryBuffer input_pinned_memory
-  //@@
-  //@@     Use pinned memory buffer when the data transfer for inputs
-  //@@     is between GPU memory and non-pinned system memory.
-  //@@     Default is true.
-  //@@
-  PinnedMemoryBuffer input_pinned_memory = 5;
-
-  //@@  .. cpp:var:: PinnedMemoryBuffer output_pinned_memory
-  //@@
-  //@@     Use pinned memory buffer when the data transfer for outputs
-  //@@     is between GPU memory and non-pinned system memory.
-  //@@     Default is true.
-  //@@
-  PinnedMemoryBuffer output_pinned_memory = 6;
-
-  //@@  .. cpp:var:: uint32 gather_kernel_buffer_threshold
-  //@@
-  //@@     The backend may use a gather kernel to gather input data if the
-  //@@     device has direct access to the source buffer and the destination
-  //@@     buffer. In such case, the gather kernel will be used only if the
-  //@@     number of buffers to be gathered is greater or equal to
-  //@@     the specifed value. If 0, the gather kernel will be disabled.
-  //@@     Default value is 0.
-  //@@     Currently only recognized by TensorRT backend.
-  //@@
-  uint32 gather_kernel_buffer_threshold = 7;
-
-  //@@  .. cpp:var:: bool eager_batching
-  //@@
-  //@@     Start preparing the next batch before the model instance is ready
-  //@@     for the next inference. This option can be used to overlap the
-  //@@     batch preparation with model execution, with the trade-off that
-  //@@     the next batch might be smaller than what it could have been.
-  //@@     Default value is false.
-  //@@     Currently only recognized by TensorRT backend.
-  //@@
-  bool eager_batching = 8;
-}
-
-//@@
-//@@.. cpp:var:: message ModelQueuePolicy
-//@@
-//@@   Queue policy for inference requests.
-//@@
-message ModelQueuePolicy
-{
-  //@@
-  //@@  .. cpp:enum:: TimeoutAction
-  //@@
-  //@@     The action applied to timed-out requests.
-  //@@
-  enum TimeoutAction {
-    //@@    .. cpp:enumerator:: Action::REJECT = 0
-    //@@
-    //@@       Reject the request and return error message accordingly.
-    //@@
-    REJECT = 0;
-
-    //@@    .. cpp:enumerator:: Action::DELAY = 1
-    //@@
-    //@@       Delay the request until all other requests at the same
-    //@@       (or higher) priority levels that have not reached their timeouts
-    //@@       are processed. A delayed request will eventually be processed,
-    //@@       but may be delayed indefinitely due to newly arriving requests.
-    //@@
-    DELAY = 1;
-  }
-
-  //@@
-  //@@  .. cpp:var:: TimeoutAction timeout_action
-  //@@
-  //@@     The action applied to timed-out request.
-  //@@     The default action is REJECT.
-  //@@
-  TimeoutAction timeout_action = 1;
-
-  //@@
-  //@@  .. cpp:var:: uint64 default_timeout_microseconds
-  //@@
-  //@@     The default timeout for every request, in microseconds.
-  //@@     The default value is 0 which indicates that no timeout is set.
-  //@@
-  uint64 default_timeout_microseconds = 2;
-
-  //@@
-  //@@  .. cpp:var:: bool allow_timeout_override
-  //@@
-  //@@     Whether individual request can override the default timeout value.
-  //@@     When true, individual requests can set a timeout that is less than
-  //@@     the default timeout value but may not increase the timeout.
-  //@@     The default value is false.
-  //@@
-  bool allow_timeout_override = 3;
-
-  //@@
-  //@@  .. cpp:var:: uint32 max_queue_size
-  //@@
-  //@@     The maximum queue size for holding requests. A request will be
-  //@@     rejected immediately if it can't be enqueued because the queue is
-  //@@     full. The default value is 0 which indicates that no maximum
-  //@@     queue size is enforced.
-  //@@
-  uint32 max_queue_size = 4;
-}
-
-//@@
-//@@.. cpp:var:: message ModelDynamicBatching
-//@@
-//@@   Dynamic batching configuration. These settings control how dynamic
-//@@   batching operates for the model.
-//@@
-message ModelDynamicBatching
-{
-  //@@  .. cpp:var:: int32 preferred_batch_size (repeated)
-  //@@
-  //@@     Preferred batch sizes for dynamic batching. If a batch of one of
-  //@@     these sizes can be formed it will be executed immediately.  If
-  //@@     not specified a preferred batch size will be chosen automatically
-  //@@     based on model and GPU characteristics.
-  //@@
-  repeated int32 preferred_batch_size = 1;
-
-  //@@  .. cpp:var:: uint64 max_queue_delay_microseconds
-  //@@
-  //@@     The maximum time, in microseconds, a request will be delayed in
-  //@@     the scheduling queue to wait for additional requests for
-  //@@     batching. Default is 0.
-  //@@
-  uint64 max_queue_delay_microseconds = 2;
-
-  //@@  .. cpp:var:: bool preserve_ordering
-  //@@
-  //@@     Should the dynamic batcher preserve the ordering of responses to
-  //@@     match the order of requests received by the scheduler. Default is
-  //@@     false. If true, the responses will be returned in the same order as
-  //@@     the order of requests sent to the scheduler. If false, the responses
-  //@@     may be returned in arbitrary order. This option is specifically
-  //@@     needed when a sequence of related inference requests (i.e. inference
-  //@@     requests with the same correlation ID) are sent to the dynamic
-  //@@     batcher to ensure that the sequence responses are in the correct
-  //@@     order.
-  //@@
-  bool preserve_ordering = 3;
-
-  //@@  .. cpp:var:: uint32 priority_levels
-  //@@
-  //@@     The number of priority levels to be enabled for the model,
-  //@@     the priority level starts from 1 and 1 is the highest priority.
-  //@@     Requests are handled in priority order with all priority 1 requests
-  //@@     processed before priority 2, all priority 2 requests processed before
-  //@@     priority 3, etc. Requests with the same priority level will be
-  //@@     handled in the order that they are received.
-  //@@
-  uint32 priority_levels = 4;
-
-  //@@  .. cpp:var:: uint32 default_priority_level
-  //@@
-  //@@     The priority level used for requests that don't specify their
-  //@@     priority. The value must be in the range [ 1, 'priority_levels' ].
-  //@@
-  uint32 default_priority_level = 5;
-
-  //@@  .. cpp:var:: ModelQueuePolicy default_queue_policy
-  //@@
-  //@@     The default queue policy used for requests that don't require
-  //@@     priority handling and requests that specify priority levels where
-  //@@     there is no specific policy given. If not specified, a policy with
-  //@@     default field values will be used.
-  //@@
-  ModelQueuePolicy default_queue_policy = 6;
-
-  //@@  .. cpp:var:: map<uint32, ModelQueuePolicy> priority_queue_policy
-  //@@
-  //@@     Specify the queue policy for the priority level. The default queue
-  //@@     policy will be used if a priority level doesn't specify a queue
-  //@@     policy.
-  //@@
-  map<uint32, ModelQueuePolicy> priority_queue_policy = 7;
-}
-
-//@@
-//@@.. cpp:var:: message ModelSequenceBatching
-//@@
-//@@   Sequence batching configuration. These settings control how sequence
-//@@   batching operates for the model.
-//@@
-message ModelSequenceBatching
-{
-  //@@  .. cpp:var:: message Control
-  //@@
-  //@@     A control is a signal that the sequence batcher uses to
-  //@@     communicate with a backend.
-  //@@
-  message Control
-  {
-    //@@
-    //@@    .. cpp:enum:: Kind
-    //@@
-    //@@       The kind of the control.
-    //@@
-    enum Kind {
-      //@@      .. cpp:enumerator:: Kind::CONTROL_SEQUENCE_START = 0
-      //@@
-      //@@         A new sequence is/is-not starting. If true a sequence is
-      //@@         starting, if false a sequence is continuing. Must
-      //@@         specify either int32_false_true, fp32_false_true or
-      //@@         bool_false_true for this control. This control is optional.
-      //@@
-      CONTROL_SEQUENCE_START = 0;
-
-      //@@      .. cpp:enumerator:: Kind::CONTROL_SEQUENCE_READY = 1
-      //@@
-      //@@         A sequence is/is-not ready for inference. If true the
-      //@@         input tensor data is valid and should be used. If false
-      //@@         the input tensor data is invalid and inferencing should
-      //@@         be "skipped". Must specify either int32_false_true,
-      //@@         fp32_false_true or bool_false_true for this control. This
-      //@@         control is optional.
-      //@@
-      CONTROL_SEQUENCE_READY = 1;
-
-      //@@      .. cpp:enumerator:: Kind::CONTROL_SEQUENCE_END = 2
-      //@@
-      //@@         A sequence is/is-not ending. If true a sequence is
-      //@@         ending, if false a sequence is continuing. Must specify
-      //@@         either int32_false_true, fp32_false_true or bool_false_true
-      //@@         for this control. This control is optional.
-      //@@
-      CONTROL_SEQUENCE_END = 2;
-
-      //@@      .. cpp:enumerator:: Kind::CONTROL_SEQUENCE_CORRID = 3
-      //@@
-      //@@         The correlation ID of the sequence. The correlation ID
-      //@@         is an uint64_t value that is communicated in whole or
-      //@@         in part by the tensor. The tensor's datatype must be
-      //@@         specified by data_type and must be TYPE_UINT64, TYPE_INT64,
-      //@@         TYPE_UINT32 or TYPE_INT32. If a 32-bit datatype is specified
-      //@@         the correlation ID will be truncated to the low-order 32
-      //@@         bits. This control is optional.
-      //@@
-      CONTROL_SEQUENCE_CORRID = 3;
-    }
-
-    //@@    .. cpp:var:: Kind kind
-    //@@
-    //@@       The kind of this control.
-    //@@
-    Kind kind = 1;
-
-    //@@    .. cpp:var:: int32 int32_false_true (repeated)
-    //@@
-    //@@       The control's true and false setting is indicated by setting
-    //@@       a value in an int32 tensor. The tensor must be a
-    //@@       1-dimensional tensor with size equal to the batch size of
-    //@@       the request. 'int32_false_true' must have two entries: the
-    //@@       first the false value and the second the true value.
-    //@@
-    repeated int32 int32_false_true = 2;
-
-    //@@    .. cpp:var:: float fp32_false_true (repeated)
-    //@@
-    //@@       The control's true and false setting is indicated by setting
-    //@@       a value in a fp32 tensor. The tensor must be a
-    //@@       1-dimensional tensor with size equal to the batch size of
-    //@@       the request. 'fp32_false_true' must have two entries: the
-    //@@       first the false value and the second the true value.
-    //@@
-    repeated float fp32_false_true = 3;
-
-    //@@    .. cpp:var:: bool bool_false_true (repeated)
-    //@@
-    //@@       The control's true and false setting is indicated by setting
-    //@@       a value in a bool tensor. The tensor must be a
-    //@@       1-dimensional tensor with size equal to the batch size of
-    //@@       the request. 'bool_false_true' must have two entries: the
-    //@@       first the false value and the second the true value.
-    //@@
-    repeated bool bool_false_true = 5;
-
-    //@@    .. cpp:var:: DataType data_type
-    //@@
-    //@@       The control's datatype.
-    //@@
-    DataType data_type = 4;
-  }
-
-  //@@  .. cpp:var:: message ControlInput
-  //@@
-  //@@     The sequence control values to communicate by a model input.
-  //@@
-  message ControlInput
-  {
-    //@@    .. cpp:var:: string name
-    //@@
-    //@@       The name of the model input.
-    //@@
-    string name = 1;
-
-    //@@    .. cpp:var:: Control control (repeated)
-    //@@
-    //@@       The control value(s) that should be communicated to the
-    //@@       model using this model input.
-    //@@
-    repeated Control control = 2;
-  }
-
-  //@@
-  //@@  .. cpp:var:: message InitialState
-  //@@
-  //@@     Settings used to initialize data for implicit state.
-  //@@
-  message InitialState
-  {
-    //@@      .. cpp:var:: DataType data_type
-    //@@
-    //@@         The data-type of the state.
-    //@@
-    DataType data_type = 1;
-
-    //@@      .. cpp:var:: int64 dims (repeated)
-    //@@
-    //@@         The shape of the state tensor, not including the batch dimension.
-    //@@
-    repeated int64 dims = 2;
-
-    //@@      .. cpp:var:: oneof state_data
-    //@@
-    //@@         Specify how the initial state data is generated.
-    //@@
-    oneof state_data
-    {
-      //@@
-      //@@      .. cpp:var:: bool zero_data
-      //@@
-      //@@         The identifier for using zeros as initial state data.
-      //@@         Note that the value of 'zero_data' will not be checked,
-      //@@         instead, zero data will be used as long as the field is set.
-      //@@
-      bool zero_data = 3;
-
-      //@@      .. cpp:var:: string data_file
-      //@@
-      //@@         The file whose content will be used as the initial data for
-      //@@         the state in row-major order. The file must be provided in
-      //@@         sub-directory 'initial_state' under the model directory.
-      //@@
-      string data_file = 4;
-    }
-
-    //@@  .. cpp:var:: string name
-    //@@
-    //@@     The name of the state initialization.
-    //@@
-    string name = 5;
-  }
-
-  //@@  .. cpp:var:: message State
-  //@@
-  //@@     An input / output pair of tensors that carry state for the sequence.
-  //@@
-  message State
-  {
-    //@@    .. cpp:var:: string input_name
-    //@@
-    //@@       The name of the model state input.
-    //@@
-    string input_name = 1;
-
-    //@@    .. cpp:var:: string output_name
-    //@@
-    //@@       The name of the model state output.
-    //@@
-    string output_name = 2;
-
-    //@@    .. cpp:var:: DataType data_type
-    //@@
-    //@@       The data-type of the state.
-    //@@
-    DataType data_type = 3;
-
-    //@@    .. cpp:var:: int64 dim (repeated)
-    //@@
-    //@@       The dimension.
-    //@@
-    repeated int64 dims = 4;
-
-    //@@  .. cpp:var:: InitialState initial_state (repeated)
-    //@@
-    //@@     The optional field to specify the initial state for the model.
-    //@@
-    repeated InitialState initial_state = 5;
-  }
-
-  //@@  .. cpp:var:: message StrategyDirect
-  //@@
-  //@@     The sequence batcher uses a specific, unique batch
-  //@@     slot for each sequence. All inference requests in a
-  //@@     sequence are directed to the same batch slot in the same
-  //@@     model instance over the lifetime of the sequence. This
-  //@@     is the default strategy.
-  //@@
-  message StrategyDirect
-  {
-    //@@    .. cpp:var:: uint64 max_queue_delay_microseconds
-    //@@
-    //@@       The maximum time, in microseconds, a candidate request
-    //@@       will be delayed in the sequence batch scheduling queue to
-    //@@       wait for additional requests for batching. Default is 0.
-    //@@
-    uint64 max_queue_delay_microseconds = 1;
-
-    //@@    .. cpp:var:: float minimum_slot_utilization
-    //@@
-    //@@       The minimum slot utilization that must be satisfied to
-    //@@       execute the batch before 'max_queue_delay_microseconds' expires.
-    //@@       For example, a value of 0.5 indicates that the batch should be
-    //@@       executed as soon as 50% or more of the slots are ready even if
-    //@@       the 'max_queue_delay_microseconds' timeout has not expired.
-    //@@       The default is 0.0, indicating that a batch will be executed
-    //@@       before 'max_queue_delay_microseconds' timeout expires if at least
-    //@@       one batch slot is ready. 'max_queue_delay_microseconds' will be
-    //@@       ignored unless minimum_slot_utilization is set to a non-zero
-    //@@       value.
-    //@@
-    float minimum_slot_utilization = 2;
-  }
-
-  //@@  .. cpp:var:: message StrategyOldest
-  //@@
-  //@@     The sequence batcher maintains up to 'max_candidate_sequences'
-  //@@     candidate sequences. 'max_candidate_sequences' can be greater
-  //@@     than the model's 'max_batch_size'. For inferencing the batcher
-  //@@     chooses from the candidate sequences up to 'max_batch_size'
-  //@@     inference requests. Requests are chosen in an oldest-first
-  //@@     manner across all candidate sequences. A given sequence is
-  //@@     not guaranteed to be assigned to the same batch slot for
-  //@@     all inference requests of that sequence.
-  //@@
-  message StrategyOldest
-  {
-    //@@    .. cpp:var:: int32 max_candidate_sequences
-    //@@
-    //@@       Maximum number of candidate sequences that the batcher
-    //@@       maintains. Excess seqences are kept in an ordered backlog
-    //@@       and become candidates when existing candidate sequences
-    //@@       complete.
-    //@@
-    int32 max_candidate_sequences = 1;
-
-    //@@    .. cpp:var:: int32 preferred_batch_size (repeated)
-    //@@
-    //@@       Preferred batch sizes for dynamic batching of candidate
-    //@@       sequences. If a batch of one of these sizes can be formed
-    //@@       it will be executed immediately. If not specified a
-    //@@       preferred batch size will be chosen automatically
-    //@@       based on model and GPU characteristics.
-    //@@
-    repeated int32 preferred_batch_size = 2;
-
-    //@@    .. cpp:var:: uint64 max_queue_delay_microseconds
-    //@@
-    //@@       The maximum time, in microseconds, a candidate request
-    //@@       will be delayed in the dynamic batch scheduling queue to
-    //@@       wait for additional requests for batching. Default is 0.
-    //@@
-    uint64 max_queue_delay_microseconds = 3;
-  }
-
-  //@@  .. cpp:var:: oneof strategy_choice
-  //@@
-  //@@     The strategy used by the sequence batcher. Default strategy
-  //@@     is 'direct'.
-  //@@
-  oneof strategy_choice
-  {
-    //@@    .. cpp:var:: StrategyDirect direct
-    //@@
-    //@@       StrategyDirect scheduling strategy.
-    //@@
-    StrategyDirect direct = 3;
-
-    //@@    .. cpp:var:: StrategyOldest oldest
-    //@@
-    //@@       StrategyOldest scheduling strategy.
-    //@@
-    StrategyOldest oldest = 4;
-  }
-
-  //@@  .. cpp:var:: uint64 max_sequence_idle_microseconds
-  //@@
-  //@@     The maximum time, in microseconds, that a sequence is allowed to
-  //@@     be idle before it is aborted. The inference server considers a
-  //@@     sequence idle when it does not have any inference request queued
-  //@@     for the sequence. If this limit is exceeded, the inference server
-  //@@     will free the sequence slot allocated by the sequence and make it
-  //@@     available for another sequence. If not specified (or specified as
-  //@@     zero) a default value of 1000000 (1 second) is used.
-  //@@
-  uint64 max_sequence_idle_microseconds = 1;
-
-  //@@  .. cpp:var:: ControlInput control_input (repeated)
-  //@@
-  //@@     The model input(s) that the server should use to communicate
-  //@@     sequence start, stop, ready and similar control values to the
-  //@@     model.
-  //@@
-  repeated ControlInput control_input = 2;
-
-  //@@  .. cpp:var:: State state (repeated)
-  //@@
-  //@@     The optional state that can be stored in Triton for performing
-  //@@     inference requests on a sequence. Each sequence holds an implicit
-  //@@     state local to itself. The output state tensor provided by the
-  //@@     model in 'output_name' field of the current inference request will
-  //@@     be transferred as an input tensor named 'input_name' in the next
-  //@@     request of the same sequence. The input state of the first request
-  //@@     in the sequence contains garbage data.
-  //@@
-  repeated State state = 5;
-}
-
-//@@
-//@@.. cpp:var:: message ModelEnsembling
-//@@
-//@@   Model ensembling configuration. These settings specify the models that
-//@@   compose the ensemble and how data flows between the models.
-//@@
-message ModelEnsembling
-{
-  //@@  .. cpp:var:: message Step
-  //@@
-  //@@     Each step specifies a model included in the ensemble,
-  //@@     maps ensemble tensor names to the model input tensors,
-  //@@     and maps model output tensors to ensemble tensor names
-  //@@
-  message Step
-  {
-    //@@  .. cpp:var:: string model_name
-    //@@
-    //@@     The name of the model to execute for this step of the ensemble.
-    //@@
-    string model_name = 1;
-
-    //@@  .. cpp:var:: int64 model_version
-    //@@
-    //@@     The version of the model to use for inference. If -1
-    //@@     the latest/most-recent version of the model is used.
-    //@@
-    int64 model_version = 2;
-
-    //@@  .. cpp:var:: map<string,string> input_map
-    //@@
-    //@@     Map from name of an input tensor on this step's model to ensemble
-    //@@     tensor name. The ensemble tensor must have the same data type and
-    //@@     shape as the model input. Each model input must be assigned to
-    //@@     one ensemble tensor, but the same ensemble tensor can be assigned
-    //@@     to multiple model inputs.
-    //@@
-    map<string, string> input_map = 3;
-
-    //@@  .. cpp:var:: map<string,string> output_map
-    //@@
-    //@@     Map from name of an output tensor on this step's model to ensemble
-    //@@     tensor name. The data type and shape of the ensemble tensor will
-    //@@     be inferred from the model output. It is optional to assign all
-    //@@     model outputs to ensemble tensors. One ensemble tensor name
-    //@@     can appear in an output map only once.
-    //@@
-    map<string, string> output_map = 4;
-  }
-
-  //@@  .. cpp:var:: Step step (repeated)
-  //@@
-  //@@     The models and the input / output mappings used within the ensemble.
-  //@@
-  repeated Step step = 1;
-}
-
-//@@
-//@@.. cpp:var:: message ModelParameter
-//@@
-//@@   A model parameter.
-//@@
-message ModelParameter
-{
-  //@@  .. cpp:var:: string string_value
-  //@@
-  //@@     The string value of the parameter.
-  //@@
-  string string_value = 1;
-}
-
-//@@
-//@@.. cpp:var:: message ModelWarmup
-//@@
-//@@   Settings used to construct the request sample for model warmup.
-//@@
-message ModelWarmup
-{
-  //@@
-  //@@  .. cpp:var:: message Input
-  //@@
-  //@@     Meta data associated with an input.
-  //@@
-  message Input
-  {
-    //@@    .. cpp:var:: DataType data_type
-    //@@
-    //@@       The data-type of the input.
-    //@@
-    DataType data_type = 1;
-
-    //@@    .. cpp:var:: int64 dims (repeated)
-    //@@
-    //@@       The shape of the input tensor, not including the batch dimension.
-    //@@
-    repeated int64 dims = 2;
-
-    //@@    .. cpp:var:: oneof input_data_type
-    //@@
-    //@@       Specify how the input data is generated. If the input has STRING
-    //@@       data type and 'random_data' is set, the data generation will fall
-    //@@       back to 'zero_data'.
-    //@@
-    oneof input_data_type
-    {
-      //@@
-      //@@    .. cpp:var:: bool zero_data
-      //@@
-      //@@       The identifier for using zeros as input data. Note that the
-      //@@       value of 'zero_data' will not be checked, instead, zero data
-      //@@       will be used as long as the field is set.
-      //@@
-      bool zero_data = 3;
-
-      //@@
-      //@@    .. cpp:var:: bool random_data
-      //@@
-      //@@       The identifier for using random data as input data. Note that
-      //@@       the value of 'random_data' will not be checked, instead,
-      //@@       random data will be used as long as the field is set.
-      //@@
-      bool random_data = 4;
-
-      //@@    .. cpp:var:: string input_data_file
-      //@@
-      //@@       The file whose content will be used as raw input data in
-      //@@       row-major order. The file must be provided in a sub-directory
-      //@@       'warmup' under the model directory. The file contents should be
-      //@@       in binary format. For TYPE_STRING data-type, an element is
-      //@@       represented by a 4-byte unsigned integer giving the length 
-      //@@       followed by the actual bytes.
-      //@@
-      string input_data_file = 5;
-    }
-  }
-
-  //@@  .. cpp:var:: string name
-  //@@
-  //@@     The name of the request sample.
-  //@@
-  string name = 1;
-
-  //@@  .. cpp:var:: uint32 batch_size
-  //@@
-  //@@     The batch size of the inference request. This must be >= 1. For
-  //@@     models that don't support batching, batch_size must be 1. If
-  //@@     batch_size > 1, the 'inputs' specified below will be duplicated to
-  //@@     match the batch size requested.
-  //@@
-  uint32 batch_size = 2;
-
-  //@@  .. cpp:var:: map<string, Input> inputs
-  //@@
-  //@@     The warmup meta data associated with every model input, including
-  //@@     control tensors.
-  //@@
-  map<string, Input> inputs = 3;
-
-  //@@  .. cpp:var:: uint32 count
-  //@@
-  //@@     The number of iterations that this warmup sample will be executed.
-  //@@     For example, if this field is set to 2, 2 model executions using this
-  //@@     sample will be scheduled for warmup. Default value is 0 which
-  //@@     indicates that this sample will be used only once.
-  //@@     Note that for sequence model, 'count' may not work well
-  //@@     because the model often expect a valid sequence of requests which
-  //@@     should be represented by a series of warmup samples. 'count > 1'
-  //@@     essentially "resends" one of the sample, which may invalidate the
-  //@@     sequence and result in unexpected warmup failure.
-  //@@
-  uint32 count = 4;
-}
-
-//@@
-//@@ .. cpp:var:: message ModelOperations
-//@@
-//@@    The metadata of libraries providing custom operations for this model.
-//@@
-message ModelOperations
-{
-  //@@  .. cpp:var:: string op_library_filename (repeated)
-  //@@
-  //@@     Optional paths of the libraries providing custom operations for
-  //@@     this model. Valid only for ONNX models.
-  //@@
-  repeated string op_library_filename = 1;
-}
-
-//@@
-//@@ .. cpp:var:: message ModelTransactionPolicy
-//@@
-//@@    The specification that describes the nature of transactions
-//@@    to be expected from the model.
-//@@
-message ModelTransactionPolicy
-{
-  //@@  .. cpp:var:: bool decoupled
-  //@@
-  //@@     Indicates whether responses generated by the model are decoupled with
-  //@@     the requests issued to it, which means the number of responses
-  //@@     generated by model may differ from number of requests issued, and
-  //@@     that the responses may be out of order relative to the order of
-  //@@     requests. The default is false, which means the model will generate
-  //@@     exactly one response for each request.
-  //@@
-  bool decoupled = 1;
-}
-
-//@@
-//@@.. cpp:var:: message ModelRepositoryAgents
-//@@
-//@@   The repository agents for the model.
-//@@
-message ModelRepositoryAgents
-{
-  //@@
-  //@@  .. cpp:var:: message Agent
-  //@@
-  //@@     A repository agent that should be invoked for the specified
-  //@@     repository actions for this model.
-  //@@
-  message Agent
-  {
-    //@@    .. cpp:var:: string name
-    //@@
-    //@@       The name of the agent.
-    //@@
-    string name = 1;
-
-    //@@    .. cpp:var:: map<string, string> parameters
-    //@@
-    //@@       The parameters for the agent.
-    //@@
-    map<string, string> parameters = 2;
-  }
-
-  //@@
-  //@@  .. cpp:var:: Agent agents (repeated)
-  //@@
-  //@@     The ordered list of agents for the model. These agents will be
-  //@@     invoked in order to respond to repository actions occuring for the
-  //@@     model.
-  //@@
-  repeated Agent agents = 1;
-}
-
-//@@
-//@@.. cpp:var:: message ModelResponseCache
-//@@
-//@@   The response cache setting for the model.
-//@@
-message ModelResponseCache
-{
-  //@@
-  //@@  .. cpp::var:: bool enable
-  //@@
-  //@@     Whether or not to use response cache for the model. If True, the
-  //@@     responses from the model are cached and when identical request
-  //@@     is encountered, instead of going through the model execution,
-  //@@     the response from the cache is utilized. By default, response
-  //@@     cache is disabled for the models.
-  //@@
-  bool enable = 1;
-}
-
-//@@
-//@@.. cpp:var:: message ModelConfig
-//@@
-//@@   A model configuration.
-//@@
-message ModelConfig
-{
-  //@@  .. cpp:var:: string name
-  //@@
-  //@@     The name of the model.
-  //@@
-  string name = 1;
-
-  //@@  .. cpp:var:: string platform
-  //@@
-  //@@     The framework for the model. Possible values are
-  //@@     "tensorrt_plan", "tensorflow_graphdef",
-  //@@     "tensorflow_savedmodel", "onnxruntime_onnx",
-  //@@     "pytorch_libtorch".
-  //@@
-  string platform = 2;
-
-  //@@  .. cpp:var:: string backend
-  //@@
-  //@@     The backend used by the model.
-  //@@
-  string backend = 17;
-
-  //@@  .. cpp:var:: ModelVersionPolicy version_policy
-  //@@
-  //@@     Policy indicating which version(s) of the model will be served.
-  //@@
-  ModelVersionPolicy version_policy = 3;
-
-  //@@  .. cpp:var:: int32 max_batch_size
-  //@@
-  //@@     Maximum batch size allowed for inference. This can only decrease
-  //@@     what is allowed by the model itself. A max_batch_size value of 0
-  //@@     indicates that batching is not allowed for the model and the
-  //@@     dimension/shape of the input and output tensors must exactly
-  //@@     match what is specified in the input and output configuration. A
-  //@@     max_batch_size value > 0 indicates that batching is allowed and
-  //@@     so the model expects the input tensors to have an additional
-  //@@     initial dimension for the batching that is not specified in the
-  //@@     input (for example, if the model supports batched inputs of
-  //@@     2-dimensional tensors then the model configuration will specify
-  //@@     the input shape as [ X, Y ] but the model will expect the actual
-  //@@     input tensors to have shape [ N, X, Y ]). For max_batch_size > 0
-  //@@     returned outputs will also have an additional initial dimension
-  //@@     for the batch.
-  //@@
-  int32 max_batch_size = 4;
-
-  //@@  .. cpp:var:: ModelInput input (repeated)
-  //@@
-  //@@     The inputs request by the model.
-  //@@
-  repeated ModelInput input = 5;
-
-  //@@  .. cpp:var:: ModelOutput output (repeated)
-  //@@
-  //@@     The outputs produced by the model.
-  //@@
-  repeated ModelOutput output = 6;
-
-  //@@  .. cpp:var:: BatchInput batch_input (repeated)
-  //@@
-  //@@     The model input(s) that the server should use to communicate
-  //@@     batch related values to the model.
-  //@@
-  repeated BatchInput batch_input = 20;
-
-  //@@  .. cpp:var:: BatchOutput batch_output (repeated)
-  //@@
-  //@@     The outputs produced by the model that requires special handling
-  //@@     by the model backend.
-  //@@
-  repeated BatchOutput batch_output = 21;
-
-  //@@  .. cpp:var:: ModelOptimizationPolicy optimization
-  //@@
-  //@@     Optimization configuration for the model. If not specified
-  //@@     then default optimization policy is used.
-  //@@
-  ModelOptimizationPolicy optimization = 12;
-
-  //@@  .. cpp:var:: oneof scheduling_choice
-  //@@
-  //@@     The scheduling policy for the model. If not specified the
-  //@@     default scheduling policy is used for the model. The default
-  //@@     policy is to execute each inference request independently.
-  //@@
-  oneof scheduling_choice
-  {
-    //@@    .. cpp:var:: ModelDynamicBatching dynamic_batching
-    //@@
-    //@@       If specified, enables the dynamic-batching scheduling
-    //@@       policy. With dynamic-batching the scheduler may group
-    //@@       together independent requests into a single batch to
-    //@@       improve inference throughput.
-    //@@
-    ModelDynamicBatching dynamic_batching = 11;
-
-    //@@    .. cpp:var:: ModelSequenceBatching sequence_batching
-    //@@
-    //@@       If specified, enables the sequence-batching scheduling
-    //@@       policy. With sequence-batching, inference requests
-    //@@       with the same correlation ID are routed to the same
-    //@@       model instance. Multiple sequences of inference requests
-    //@@       may be batched together into a single batch to
-    //@@       improve inference throughput.
-    //@@
-    ModelSequenceBatching sequence_batching = 13;
-
-    //@@    .. cpp:var:: ModelEnsembling ensemble_scheduling
-    //@@
-    //@@       If specified, enables the model-ensembling scheduling
-    //@@       policy. With model-ensembling, inference requests
-    //@@       will be processed according to the specification, such as an
-    //@@       execution sequence of models. The input specified in this model
-    //@@       config will be the input for the ensemble, and the output
-    //@@       specified will be the output of the ensemble.
-    //@@
-    ModelEnsembling ensemble_scheduling = 15;
-  }
-
-  //@@  .. cpp:var:: ModelInstanceGroup instance_group (repeated)
-  //@@
-  //@@     Instances of this model. If not specified, one instance
-  //@@     of the model will be instantiated on each available GPU.
-  //@@
-  repeated ModelInstanceGroup instance_group = 7;
-
-  //@@  .. cpp:var:: string default_model_filename
-  //@@
-  //@@     Optional filename of the model file to use if a
-  //@@     compute-capability specific model is not specified in
-  //@@     :cpp:var:`cc_model_filenames`. If not specified the default name
-  //@@     is 'model.graphdef', 'model.savedmodel', 'model.plan' or
-  //@@     'model.pt' depending on the model type.
-  //@@
-  string default_model_filename = 8;
-
-  //@@  .. cpp:var:: map<string,string> cc_model_filenames
-  //@@
-  //@@     Optional map from CUDA compute capability to the filename of
-  //@@     the model that supports that compute capability. The filename
-  //@@     refers to a file within the model version directory.
-  //@@
-  map<string, string> cc_model_filenames = 9;
-
-  //@@  .. cpp:var:: map<string,string> metric_tags
-  //@@
-  //@@     Optional metric tags. User-specific key-value pairs for metrics
-  //@@     reported for this model. These tags are applied to the metrics
-  //@@     reported on the HTTP metrics port.
-  //@@
-  map<string, string> metric_tags = 10;
-
-  //@@  .. cpp:var:: map<string,ModelParameter> parameters
-  //@@
-  //@@     Optional model parameters. User-specified parameter values.
-  //@@
-  map<string, ModelParameter> parameters = 14;
-
-  //@@  .. cpp:var:: ModelWarmup model_warmup (repeated)
-  //@@
-  //@@     Warmup setting of this model. If specified, all instances
-  //@@     will be run with the request samples in sequence before
-  //@@     serving the model.
-  //@@     This field can only be specified if the model is not an ensemble
-  //@@     model.
-  //@@
-  repeated ModelWarmup model_warmup = 16;
-
-  //@@  .. cpp:var:: ModelOperations model_operations
-  //@@
-  //@@     Optional metadata of the libraries providing custom operations for
-  //@@     this model.
-  //@@
-  ModelOperations model_operations = 18;
-
-  //@@  .. cpp:var:: ModelTransactionPolicy model_transaction_policy
-  //@@
-  //@@     Optional specification that describes the nature of transactions
-  //@@     to be expected from the model.
-  //@@
-  ModelTransactionPolicy model_transaction_policy = 19;
-
-  //@@  .. cpp:var:: ModelRepositoryAgents model_repository_agents
-  //@@
-  //@@     Optional specification of the agent(s) that should be invoked
-  //@@     with repository actions are performed for this model.
-  //@@
-  ModelRepositoryAgents model_repository_agents = 23;
-
-  //@@  .. cpp:var:: ModelResponseCache response_cache
-  //@@
-  //@@     Optional setting for utilizing the response cache for this
-  //@@     model.
-  //@@
-  ModelResponseCache response_cache = 24;
-}
diff --git a/3rdparty/common-r22.12/src/async_work_queue.cc b/3rdparty/common-r22.12/src/async_work_queue.cc
deleted file mode 100644
index ebd56b9a3c5b2876ac3caeebfff0ceb75083e715..0000000000000000000000000000000000000000
--- a/3rdparty/common-r22.12/src/async_work_queue.cc
+++ /dev/null
@@ -1,97 +0,0 @@
-// Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "triton/common/async_work_queue.h"
-
-namespace triton { namespace common {
-
-AsyncWorkQueue::~AsyncWorkQueue()
-{
-  GetSingleton()->thread_pool_.reset();
-}
-
-AsyncWorkQueue*
-AsyncWorkQueue::GetSingleton()
-{
-  static AsyncWorkQueue singleton;
-  return &singleton;
-}
-
-Error
-AsyncWorkQueue::Initialize(size_t worker_count)
-{
-  if (worker_count < 1) {
-    return Error(
-        Error::Code::INVALID_ARG,
-        "Async work queue must be initialized with positive 'worker_count'");
-  }
-
-  static std::mutex init_mtx;
-  std::lock_guard<std::mutex> lk(init_mtx);
-
-  if (GetSingleton()->thread_pool_) {
-    return Error(
-        Error::Code::ALREADY_EXISTS,
-        "Async work queue has been initialized with " +
-            std::to_string(GetSingleton()->thread_pool_->Size()) +
-            " 'worker_count'");
-  }
-
-  GetSingleton()->thread_pool_.reset(new ThreadPool(worker_count));
-  return Error::Success;
-}
-
-size_t
-AsyncWorkQueue::WorkerCount()
-{
-  if (!GetSingleton()->thread_pool_) {
-    return 0;
-  }
-  return GetSingleton()->thread_pool_->Size();
-}
-
-Error
-AsyncWorkQueue::AddTask(std::function<void(void)>&& task)
-{
-  if (!GetSingleton()->thread_pool_) {
-    return Error(
-        Error::Code::UNAVAILABLE,
-        "Async work queue must be initialized before adding task");
-  }
-  GetSingleton()->thread_pool_->Enqueue(std::move(task));
-
-  return Error::Success;
-}
-
-void
-AsyncWorkQueue::Reset()
-{
-  // Reconstruct the singleton to reset it
-  GetSingleton()->~AsyncWorkQueue();
-  new (GetSingleton()) AsyncWorkQueue();
-}
-
-}}  // namespace triton::common
diff --git a/3rdparty/common-r22.12/src/error.cc b/3rdparty/common-r22.12/src/error.cc
deleted file mode 100644
index b6da386fa162f246a682dd6f9aef34c22ceffd77..0000000000000000000000000000000000000000
--- a/3rdparty/common-r22.12/src/error.cc
+++ /dev/null
@@ -1,68 +0,0 @@
-// Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "triton/common/error.h"
-
-namespace triton { namespace common {
-
-const Error Error::Success(Error::Code::SUCCESS);
-
-std::string
-Error::AsString() const
-{
-  std::string str(CodeString(code_));
-  str += ": " + msg_;
-  return str;
-}
-
-const char*
-Error::CodeString(const Code code)
-{
-  switch (code) {
-    case Error::Code::SUCCESS:
-      return "OK";
-    case Error::Code::UNKNOWN:
-      return "Unknown";
-    case Error::Code::INTERNAL:
-      return "Internal";
-    case Error::Code::NOT_FOUND:
-      return "Not found";
-    case Error::Code::INVALID_ARG:
-      return "Invalid argument";
-    case Error::Code::UNAVAILABLE:
-      return "Unavailable";
-    case Error::Code::UNSUPPORTED:
-      return "Unsupported";
-    case Error::Code::ALREADY_EXISTS:
-      return "Already exists";
-    default:
-      break;
-  }
-
-  return "<invalid code>";
-}
-
-}}  // namespace triton::common
diff --git a/3rdparty/common-r22.12/src/logging.cc b/3rdparty/common-r22.12/src/logging.cc
deleted file mode 100644
index 67b01ba8ba1bd1253e7b1156f7346b662714c660..0000000000000000000000000000000000000000
--- a/3rdparty/common-r22.12/src/logging.cc
+++ /dev/null
@@ -1,147 +0,0 @@
-// Copyright 2018-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "triton/common/logging.h"
-
-#ifdef _WIN32
-// suppress the min and max definitions in Windef.h.
-#define NOMINMAX
-#include <Windows.h>
-#else
-#include <sys/time.h>
-#include <sys/types.h>
-#include <time.h>
-#include <unistd.h>
-#endif
-#include <algorithm>
-#include <iomanip>
-#include <iostream>
-
-namespace triton { namespace common {
-
-Logger gLogger_;
-
-Logger::Logger()
-    : enables_{true, true, true}, vlevel_(0), format_(Format::kDEFAULT)
-{
-}
-
-void
-Logger::Log(const std::string& msg)
-{
-  const std::lock_guard<std::mutex> lock(mutex_);
-  if (file_stream_.is_open()) {
-    file_stream_ << msg << std::endl;
-  } else {
-    std::cerr << msg << std::endl;
-  }
-}
-
-void
-Logger::Flush()
-{
-  std::cerr << std::flush;
-}
-
-
-const std::vector<char> LogMessage::level_name_{'E', 'W', 'I'};
-
-LogMessage::LogMessage(const char* file, int line, uint32_t level)
-{
-  std::string path(file);
-  size_t pos = path.rfind('/');
-  if (pos != std::string::npos) {
-    path = path.substr(pos + 1, std::string::npos);
-  }
-
-  // 'L' below is placeholder for showing log level
-  switch (gLogger_.LogFormat()) {
-    case Logger::Format::kDEFAULT: {
-      // LMMDD hh:mm:ss.ssssss
-#ifdef _WIN32
-      SYSTEMTIME system_time;
-      GetSystemTime(&system_time);
-      stream_ << level_name_[std::min(level, (uint32_t)Level::kINFO)]
-              << std::setfill('0') << std::setw(2) << system_time.wMonth
-              << std::setw(2) << system_time.wDay << ' ' << std::setw(2)
-              << system_time.wHour << ':' << std::setw(2) << system_time.wMinute
-              << ':' << std::setw(2) << system_time.wSecond << '.'
-              << std::setw(6) << system_time.wMilliseconds * 1000 << ' '
-              << static_cast<uint32_t>(GetCurrentProcessId()) << ' ' << path
-              << ':' << line << "] ";
-#else
-      struct timeval tv;
-      gettimeofday(&tv, NULL);
-      struct tm tm_time;
-      gmtime_r(((time_t*)&(tv.tv_sec)), &tm_time);
-      stream_ << level_name_[std::min(level, (uint32_t)Level::kINFO)]
-              << std::setfill('0') << std::setw(2) << (tm_time.tm_mon + 1)
-              << std::setw(2) << tm_time.tm_mday << ' ' << std::setw(2)
-              << tm_time.tm_hour << ':' << std::setw(2) << tm_time.tm_min << ':'
-              << std::setw(2) << tm_time.tm_sec << '.' << std::setw(6)
-              << tv.tv_usec << ' ' << static_cast<uint32_t>(getpid()) << ' '
-              << path << ':' << line << "] ";
-#endif
-      break;
-    }
-    case Logger::Format::kISO8601: {
-      // YYYY-MM-DDThh:mm:ssZ L
-#ifdef _WIN32
-      SYSTEMTIME system_time;
-      GetSystemTime(&system_time);
-      stream_ << system_time.wYear << '-' << std::setfill('0') << std::setw(2)
-              << system_time.wMonth << '-' << std::setw(2) << system_time.wDay
-              << 'T' << std::setw(2) << system_time.wHour << ':' << std::setw(2)
-              << system_time.wMinute << ':' << std::setw(2)
-              << system_time.wSecond << "Z "
-              << level_name_[std::min(level, (uint32_t)Level::kINFO)] << ' '
-              << static_cast<uint32_t>(GetCurrentProcessId()) << ' ' << path
-              << ':' << line << "] ";
-#else
-      struct timeval tv;
-      gettimeofday(&tv, NULL);
-      struct tm tm_time;
-      gmtime_r(((time_t*)&(tv.tv_sec)), &tm_time);
-      stream_ << (tm_time.tm_year + 1900) << '-' << std::setfill('0')
-              << std::setw(2) << (tm_time.tm_mon + 1) << '-' << std::setw(2)
-              << tm_time.tm_mday << 'T' << std::setw(2) << tm_time.tm_hour
-              << ':' << std::setw(2) << tm_time.tm_min << ':' << std::setw(2)
-              << tm_time.tm_sec << "Z "
-              << level_name_[std::min(level, (uint32_t)Level::kINFO)] << ' '
-              << static_cast<uint32_t>(getpid()) << ' ' << path << ':' << line
-              << "] ";
-#endif
-      break;
-    }
-  }
-}
-
-LogMessage::~LogMessage()
-{
-  gLogger_.Log(stream_.str());
-}
-
-}}  // namespace triton::common
diff --git a/3rdparty/common-r22.12/src/model_config.cc b/3rdparty/common-r22.12/src/model_config.cc
deleted file mode 100644
index e459ef071f2cb35c7e27136ef74e9008f4b712a7..0000000000000000000000000000000000000000
--- a/3rdparty/common-r22.12/src/model_config.cc
+++ /dev/null
@@ -1,443 +0,0 @@
-// Copyright 2018-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "triton/common/model_config.h"
-
-namespace triton { namespace common {
-
-bool
-IsFixedSizeDataType(const inference::DataType dtype)
-{
-  return dtype != inference::DataType::TYPE_STRING;
-}
-
-size_t
-GetDataTypeByteSize(const inference::DataType dtype)
-{
-  switch (dtype) {
-    case inference::DataType::TYPE_BOOL:
-      return 1;
-    case inference::DataType::TYPE_UINT8:
-      return 1;
-    case inference::DataType::TYPE_UINT16:
-      return 2;
-    case inference::DataType::TYPE_UINT32:
-      return 4;
-    case inference::DataType::TYPE_UINT64:
-      return 8;
-    case inference::DataType::TYPE_INT8:
-      return 1;
-    case inference::DataType::TYPE_INT16:
-      return 2;
-    case inference::DataType::TYPE_INT32:
-      return 4;
-    case inference::DataType::TYPE_INT64:
-      return 8;
-    case inference::DataType::TYPE_FP16:
-      return 2;
-    case inference::DataType::TYPE_FP32:
-      return 4;
-    case inference::DataType::TYPE_FP64:
-      return 8;
-    case inference::DataType::TYPE_STRING:
-      return 0;
-    case inference::DataType::TYPE_BF16:
-      return 2;
-    default:
-      break;
-  }
-
-  return 0;
-}
-
-int64_t
-GetElementCount(const DimsList& dims)
-{
-  bool first = true;
-  int64_t cnt = 0;
-  for (auto dim : dims) {
-    if (dim == WILDCARD_DIM) {
-      return -1;
-    }
-
-    if (first) {
-      cnt = dim;
-      first = false;
-    } else {
-      cnt *= dim;
-    }
-  }
-
-  return cnt;
-}
-
-int64_t
-GetElementCount(const std::vector<int64_t>& dims)
-{
-  bool first = true;
-  int64_t cnt = 0;
-  for (auto dim : dims) {
-    if (dim == WILDCARD_DIM) {
-      return -1;
-    }
-
-    if (first) {
-      cnt = dim;
-      first = false;
-    } else {
-      cnt *= dim;
-    }
-  }
-
-  return cnt;
-}
-
-int64_t
-GetElementCount(const inference::ModelInput& mio)
-{
-  return GetElementCount(mio.dims());
-}
-
-int64_t
-GetElementCount(const inference::ModelOutput& mio)
-{
-  return GetElementCount(mio.dims());
-}
-
-int64_t
-GetByteSize(const inference::DataType& dtype, const DimsList& dims)
-{
-  size_t dt_size = GetDataTypeByteSize(dtype);
-  if (dt_size == 0) {
-    return -1;
-  }
-
-  int64_t cnt = GetElementCount(dims);
-  if (cnt == -1) {
-    return -1;
-  }
-
-  return cnt * dt_size;
-}
-
-int64_t
-GetByteSize(const inference::DataType& dtype, const std::vector<int64_t>& dims)
-{
-  size_t dt_size = GetDataTypeByteSize(dtype);
-  if (dt_size == 0) {
-    return -1;
-  }
-
-  int64_t cnt = GetElementCount(dims);
-  if (cnt == -1) {
-    return -1;
-  }
-
-  return cnt * dt_size;
-}
-
-int64_t
-GetByteSize(
-    const int batch_size, const inference::DataType& dtype,
-    const DimsList& dims)
-{
-  if (dims.size() == 0) {
-    return batch_size * GetDataTypeByteSize(dtype);
-  }
-
-  int64_t bs = GetByteSize(dtype, dims);
-  if (bs == -1) {
-    return -1;
-  }
-
-  return std::max(1, batch_size) * bs;
-}
-
-int64_t
-GetByteSize(
-    const int batch_size, const inference::DataType& dtype,
-    const std::vector<int64_t>& dims)
-{
-  if (dims.size() == 0) {
-    return batch_size * GetDataTypeByteSize(dtype);
-  }
-
-  int64_t bs = GetByteSize(dtype, dims);
-  if (bs == -1) {
-    return -1;
-  }
-
-  return std::max(1, batch_size) * bs;
-}
-
-int64_t
-GetByteSize(const inference::ModelInput& mio)
-{
-  return GetByteSize(mio.data_type(), mio.dims());
-}
-
-int64_t
-GetByteSize(const inference::ModelOutput& mio)
-{
-  return GetByteSize(mio.data_type(), mio.dims());
-}
-
-int
-GetCpuNiceLevel(const inference::ModelConfig& config)
-{
-  int nice = SCHEDULER_DEFAULT_NICE;
-  if (config.has_optimization()) {
-    switch (config.optimization().priority()) {
-      case inference::ModelOptimizationPolicy::PRIORITY_MAX:
-        nice = 0;
-        break;
-      case inference::ModelOptimizationPolicy::PRIORITY_MIN:
-        nice = 19;
-        break;
-      default:
-        nice = SCHEDULER_DEFAULT_NICE;
-        break;
-    }
-  }
-
-  return nice;
-}
-
-bool
-CompareDims(const DimsList& dims0, const DimsList& dims1)
-{
-  if (dims0.size() != dims1.size()) {
-    return false;
-  }
-
-  for (int i = 0; i < dims0.size(); ++i) {
-    if (dims0[i] != dims1[i]) {
-      return false;
-    }
-  }
-
-  return true;
-}
-
-bool
-CompareDims(
-    const std::vector<int64_t>& dims0, const std::vector<int64_t>& dims1)
-{
-  if (dims0.size() != dims1.size()) {
-    return false;
-  }
-
-  for (size_t i = 0; i < dims0.size(); ++i) {
-    if (dims0[i] != dims1[i]) {
-      return false;
-    }
-  }
-
-  return true;
-}
-
-bool
-CompareDimsWithWildcard(const DimsList& dims0, const DimsList& dims1)
-{
-  if (dims0.size() != dims1.size()) {
-    return false;
-  }
-
-  for (int i = 0; i < dims0.size(); ++i) {
-    if ((dims0[i] != WILDCARD_DIM) && (dims1[i] != WILDCARD_DIM) &&
-        (dims0[i] != dims1[i])) {
-      return false;
-    }
-  }
-
-  return true;
-}
-
-bool
-CompareDimsWithWildcard(
-    const DimsList& dims0, const std::vector<int64_t>& dims1)
-{
-  if (dims0.size() != (int64_t)dims1.size()) {
-    return false;
-  }
-
-  for (int i = 0; i < dims0.size(); ++i) {
-    if ((dims0[i] != WILDCARD_DIM) && (dims1[i] != WILDCARD_DIM) &&
-        (dims0[i] != dims1[i])) {
-      return false;
-    }
-  }
-
-  return true;
-}
-
-std::string
-DimsListToString(const DimsList& dims)
-{
-  bool first = true;
-
-  std::string str("[");
-  for (const auto& dim : dims) {
-    if (!first) {
-      str += ",";
-    }
-    str += std::to_string(dim);
-    first = false;
-  }
-
-  str += "]";
-  return str;
-}
-
-std::string
-DimsListToString(const std::vector<int64_t>& dims, const int start_idx)
-{
-  int idx = 0;
-
-  std::string str("[");
-  for (const auto& dim : dims) {
-    if (idx >= start_idx) {
-      if (idx > start_idx) {
-        str += ",";
-      }
-      str += std::to_string(dim);
-    }
-
-    idx++;
-  }
-
-  str += "]";
-  return str;
-}
-
-const char*
-DataTypeToProtocolString(const inference::DataType dtype)
-{
-  switch (dtype) {
-    case inference::DataType::TYPE_BOOL:
-      return "BOOL";
-    case inference::DataType::TYPE_UINT8:
-      return "UINT8";
-    case inference::DataType::TYPE_UINT16:
-      return "UINT16";
-    case inference::DataType::TYPE_UINT32:
-      return "UINT32";
-    case inference::DataType::TYPE_UINT64:
-      return "UINT64";
-    case inference::DataType::TYPE_INT8:
-      return "INT8";
-    case inference::DataType::TYPE_INT16:
-      return "INT16";
-    case inference::DataType::TYPE_INT32:
-      return "INT32";
-    case inference::DataType::TYPE_INT64:
-      return "INT64";
-    case inference::DataType::TYPE_FP16:
-      return "FP16";
-    case inference::DataType::TYPE_FP32:
-      return "FP32";
-    case inference::DataType::TYPE_FP64:
-      return "FP64";
-    case inference::DataType::TYPE_STRING:
-      return "BYTES";
-    case inference::DataType::TYPE_BF16:
-      return "BF16";
-    default:
-      break;
-  }
-
-  return "<invalid>";
-}
-
-inference::DataType
-ProtocolStringToDataType(const std::string& dtype)
-{
-  return ProtocolStringToDataType(dtype.c_str(), dtype.size());
-}
-
-inference::DataType
-ProtocolStringToDataType(const char* dtype, size_t len)
-{
-  if (len < 4 || len > 6) {
-    return inference::DataType::TYPE_INVALID;
-  }
-
-  if ((*dtype == 'I') && (len != 6)) {
-    if ((dtype[1] == 'N') && (dtype[2] == 'T')) {
-      if ((dtype[3] == '8') && (len == 4)) {
-        return inference::DataType::TYPE_INT8;
-      } else if ((dtype[3] == '1') && (dtype[4] == '6')) {
-        return inference::DataType::TYPE_INT16;
-      } else if ((dtype[3] == '3') && (dtype[4] == '2')) {
-        return inference::DataType::TYPE_INT32;
-      } else if ((dtype[3] == '6') && (dtype[4] == '4')) {
-        return inference::DataType::TYPE_INT64;
-      }
-    }
-  } else if ((*dtype == 'U') && (len != 4)) {
-    if ((dtype[1] == 'I') && (dtype[2] == 'N') && (dtype[3] == 'T')) {
-      if ((dtype[4] == '8') && (len == 5)) {
-        return inference::DataType::TYPE_UINT8;
-      } else if ((dtype[4] == '1') && (dtype[5] == '6')) {
-        return inference::DataType::TYPE_UINT16;
-      } else if ((dtype[4] == '3') && (dtype[5] == '2')) {
-        return inference::DataType::TYPE_UINT32;
-      } else if ((dtype[4] == '6') && (dtype[5] == '4')) {
-        return inference::DataType::TYPE_UINT64;
-      }
-    }
-  } else if ((*dtype == 'F') && (dtype[1] == 'P') && (len == 4)) {
-    if ((dtype[2] == '1') && (dtype[3] == '6')) {
-      return inference::DataType::TYPE_FP16;
-    } else if ((dtype[2] == '3') && (dtype[3] == '2')) {
-      return inference::DataType::TYPE_FP32;
-    } else if ((dtype[2] == '6') && (dtype[3] == '4')) {
-      return inference::DataType::TYPE_FP64;
-    }
-  } else if (*dtype == 'B') {
-    switch (dtype[1]) {
-      case 'Y':
-        if (!strcmp(dtype + 2, "TES")) {
-          return inference::DataType::TYPE_STRING;
-        }
-        break;
-      case 'O':
-        if (!strcmp(dtype + 2, "OL")) {
-          return inference::DataType::TYPE_BOOL;
-        }
-        break;
-      case 'F':
-        if (!strcmp(dtype + 2, "16")) {
-          return inference::DataType::TYPE_BF16;
-        }
-        break;
-    }
-  }
-
-  return inference::DataType::TYPE_INVALID;
-}
-
-}}  // namespace triton::common
diff --git a/3rdparty/common-r22.12/src/table_printer.cc b/3rdparty/common-r22.12/src/table_printer.cc
deleted file mode 100644
index 779f49921f8cd43c8e1543c465d0819ae1413989..0000000000000000000000000000000000000000
--- a/3rdparty/common-r22.12/src/table_printer.cc
+++ /dev/null
@@ -1,261 +0,0 @@
-// Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "triton/common/table_printer.h"
-
-#ifdef _WIN32
-// suppress the min and max definitions in Windef.h.
-#define NOMINMAX
-#include <Windows.h>
-#else
-#include <sys/ioctl.h>
-#include <unistd.h>
-#endif
-#include <algorithm>
-#include <iomanip>
-#include <iostream>
-#include <memory>
-#include <numeric>
-#include <sstream>
-#include <string>
-#include <vector>
-
-namespace triton { namespace common {
-
-//
-// ASCII table printer.
-//
-void
-TablePrinter::InsertRow(const std::vector<std::string>& row)
-{
-  std::vector<std::vector<std::string>> table_row;
-
-  // Number of lines in each field in the record
-  size_t max_height = 0;
-
-  // Update max length of data items in each row
-  for (size_t i = 0; i < row.size(); ++i) {
-    table_row.push_back(std::vector<std::string>{});
-    std::stringstream ss(row[i]);
-    std::string line;
-
-    size_t max_width = 0;
-    while (std::getline(ss, line, '\n')) {
-      table_row[i].push_back(line);
-      if (line.size() > max_width)
-        max_width = line.size();
-    }
-
-    if (max_width > max_widths_[i])
-      max_widths_[i] = max_width;
-
-    size_t number_of_lines = table_row[i].size();
-    if (max_height < number_of_lines)
-      max_height = number_of_lines;
-  }
-
-  max_heights_.push_back(max_height);
-  data_.emplace_back(table_row);
-}
-
-void
-TablePrinter::FairShare()
-{
-  // initialize original index locations
-  size_t array_size = max_widths_.size();
-  std::vector<size_t> idx(array_size);
-  iota(idx.begin(), idx.end(), 0);
-
-  stable_sort(idx.begin(), idx.end(), [this](size_t i1, size_t i2) {
-    return this->max_widths_[i1] < this->max_widths_[i2];
-  });
-
-  size_t loop_index = 1;
-  for (auto itr = idx.begin(); itr != idx.end(); ++itr) {
-    // If a column is not using all the space allocated to it
-    if (max_widths_[*itr] < shares_[*itr]) {
-      float excess = shares_[*itr] - max_widths_[*itr];
-      shares_[*itr] -= excess;
-
-      if (itr == idx.end() - 1)
-        break;
-      auto update_itr = idx.begin() + (itr - idx.begin() + 1);
-
-      // excess amount of unused space that must be distributed evenly to the
-      // next columns
-      float excess_per_column = excess / (array_size - loop_index);
-
-      for (; update_itr != idx.end(); ++update_itr) {
-        shares_[*update_itr] += excess_per_column;
-        excess -= excess_per_column;
-      }
-    }
-    ++loop_index;
-  }
-
-  // Remove any decimal shares
-  for (auto itr = idx.begin(); itr != idx.end(); ++itr) {
-    shares_[*itr] = (size_t)shares_[*itr];
-  }
-
-  // For each record
-  for (size_t i = 0; i < data_.size(); i++) {
-    auto current_row = data_[i];
-
-    // For each field in the record
-    for (size_t j = 0; j < current_row.size(); j++) {
-      // For each line in the record
-      for (size_t line_index = 0; line_index < current_row[j].size();
-           line_index++) {
-        std::string line = current_row[j][line_index];
-        size_t num_rows = (line.size() + shares_[j] - 1) / shares_[j];
-
-        // If the number of rows required for this record is larger than 1, we
-        // will break that line and put it in multiple lines
-        if (num_rows > 1) {
-          // Remove the multi-line field, it will be replaced by the line
-          // that can fits the column size
-          data_[i][j].erase(data_[i][j].begin() + line_index);
-          for (size_t k = 0; k < num_rows; k++) {
-            size_t start_index =
-                std::min((size_t)(k * shares_[j]), line.size());
-            size_t end_index =
-                std::min((size_t)((k + 1) * shares_[j]), line.size());
-            data_[i][j].insert(
-                data_[i][j].begin() + line_index + k,
-                line.substr(start_index, end_index - start_index));
-          }
-
-          // We need to advance the index for the splitted lines.
-          line_index += num_rows - 1;
-        }
-
-        if (max_heights_[i] < (num_rows - 1 + current_row[j].size()))
-          max_heights_[i] += num_rows - 1;
-      }
-    }
-  }
-}
-
-void
-TablePrinter::AddRow(std::stringstream& table, size_t row_index)
-{
-  auto row = data_[row_index];
-  size_t max_height = max_heights_[row_index];
-
-  for (size_t j = 0; j < max_height; j++) {
-    table << "|" << std::left;
-
-    for (size_t i = 0; i < row.size(); i++) {
-      if (j < row[i].size())
-        table << " " << std::setw(shares_[i]) << row[i][j] << " |";
-      else
-        table << " " << std::setw(shares_[i]) << " "
-              << " |";
-    }
-
-    // Do not add new line if this is the last row of this record
-    if (j != max_height - 1)
-      table << "\n";
-  }
-  table << "\n";
-}
-
-void
-TablePrinter::AddRowDivider(std::stringstream& table)
-{
-  table << "+";
-  for (const auto& share : shares_) {
-    for (size_t i = 0; i < share + 2; i++) table << "-";
-    table << "+";
-  }
-  table << "\n";
-}
-
-std::string
-TablePrinter::PrintTable()
-{
-  std::stringstream table;
-  table << "\n";
-
-  FairShare();
-
-  AddRowDivider(table);
-  // Add table headers
-  AddRow(table, 0);
-  AddRowDivider(table);
-
-  for (size_t j = 1; j < data_.size(); j++) {
-    AddRow(table, j);
-  }
-
-  AddRowDivider(table);
-
-  return table.str();
-}
-
-// TablePrinter will take the ownership of `headers`.
-TablePrinter::TablePrinter(const std::vector<std::string>& headers)
-{
-  // terminal size
-  size_t column_size = 500;
-#ifdef _WIN32
-  CONSOLE_SCREEN_BUFFER_INFO csbi;
-  int ret = GetConsoleScreenBufferInfo(GetStdHandle(STD_OUTPUT_HANDLE), &csbi);
-  if (ret && (csbi.dwSize.X != 0)) {
-    column_size = csbi.dwSize.X;
-  }
-#else
-  struct winsize terminal_size;
-  int status = ioctl(STDOUT_FILENO, TIOCGWINSZ, &terminal_size);
-  if ((status == 0) && (terminal_size.ws_col != 0)) {
-    column_size = terminal_size.ws_col;
-  }
-#endif
-
-  for (size_t i = 0; i < headers.size(); ++i) {
-    max_widths_.emplace_back(0);
-  }
-
-  // Calculate fair share of every column
-  size_t number_of_columns = headers.size();
-
-  // Terminal width is the actual terminal width minus two times spaces
-  // required before and after each column and number of columns plus 1 for
-  // the pipes between the columns
-  size_t terminal_width =
-      column_size - (2 * number_of_columns) - (number_of_columns + 1);
-  int equal_share = terminal_width / headers.size();
-
-  for (size_t i = 0; i < headers.size(); ++i) {
-    shares_.emplace_back(equal_share);
-    terminal_width -= equal_share;
-  }
-
-  InsertRow(headers);
-}
-
-}}  // namespace triton::common
diff --git a/3rdparty/common-r22.12/src/thread_pool.cc b/3rdparty/common-r22.12/src/thread_pool.cc
deleted file mode 100644
index 8f53db71d0430294208b157f9a5c2627fdeac296..0000000000000000000000000000000000000000
--- a/3rdparty/common-r22.12/src/thread_pool.cc
+++ /dev/null
@@ -1,97 +0,0 @@
-// Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "triton/common/thread_pool.h"
-#include <stdexcept>
-
-namespace triton { namespace common {
-
-ThreadPool::ThreadPool(size_t thread_count)
-{
-  if (!thread_count) {
-    throw std::invalid_argument("Thread count must be greater than zero.");
-  }
-
-  // Define infinite loop for each thread to wait for a task to complete
-  const auto worker_loop = [this]() {
-    while (true) {
-      Task task;
-      {
-        std::unique_lock<std::mutex> lk(queue_mtx_);
-        // Wake if there's a task to do, or the pool has been stopped.
-        cv_.wait(lk, [&]() { return !task_queue_.empty() || stop_; });
-        // Exit condition
-        if (stop_ && task_queue_.empty()) {
-          break;
-        }
-        task = std::move(task_queue_.front());
-        task_queue_.pop();
-      }
-
-      // Execute task - ensure function has a valid target
-      if (task) {
-        task();
-      }
-    }
-  };
-
-  workers_.reserve(thread_count);
-  for (size_t i = 0; i < thread_count; ++i) {
-    workers_.emplace_back(worker_loop);
-  }
-}
-
-ThreadPool::~ThreadPool()
-{
-  {
-    std::lock_guard<std::mutex> lk(queue_mtx_);
-    // Signal to each worker that it should exit loop when tasks are finished
-    stop_ = true;
-  }
-  // Wake all threads to clean up
-  cv_.notify_all();
-  for (auto& t : workers_) {
-    t.join();
-  }
-}
-
-void
-ThreadPool::Enqueue(Task&& task)
-{
-  {
-    std::lock_guard<std::mutex> lk(queue_mtx_);
-    // Don't accept more work if pool is shutting down
-    if (stop_) {
-      return;
-    }
-    task_queue_.push(std::move(task));
-  }
-  // Only wake one thread per task
-  // Todo: DLIS-3859 if ThreadPool gets used more.
-  cv_.notify_one();
-}
-
-}}  // namespace triton::common
diff --git a/3rdparty/common-r22.12/tools/format.py b/3rdparty/common-r22.12/tools/format.py
deleted file mode 100644
index 84649d3c2f9501a5f6caf1b85baa12b90e4c3e03..0000000000000000000000000000000000000000
--- a/3rdparty/common-r22.12/tools/format.py
+++ /dev/null
@@ -1,116 +0,0 @@
-#!/usr/bin/python
-
-# Copyright (c) 2018-2020, NVIDIA CORPORATION. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-import argparse
-import os
-import subprocess
-import yapf
-
-FLAGS = None
-FORMAT_EXTS = ('proto', 'cc', 'cu', 'h')
-SKIP_PATHS = ('tools',)
-
-
-def visit(path):
-    if FLAGS.verbose:
-        print("visiting " + path)
-
-    valid_ext = False
-    python_file = False
-    for ext in FORMAT_EXTS:
-        if path.endswith('.' + ext):
-            valid_ext = True
-            break
-    if path.endswith('.py'):
-        valid_ext = True
-        python_file = True
-    if not valid_ext:
-        if FLAGS.verbose:
-            print("skipping due to extension: " + path)
-        return True
-
-    for skip in SKIP_PATHS:
-        if path.startswith(skip):
-            if FLAGS.verbose:
-                print("skipping due to path prefix: " + path)
-            return True
-    if python_file:
-        yapf.yapflib.yapf_api.FormatFile(path,
-                                         in_place=True,
-                                         style_config='google')
-        return True
-    else:
-        args = ['clang-format-6.0', '--style=file', '-i']
-        if FLAGS.verbose:
-            args.append('-verbose')
-        args.append(path)
-
-        ret = subprocess.call(args)
-        if ret != 0:
-            print("format failed for " + path)
-            return False
-
-    return True
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser()
-    parser.add_argument('-v',
-                        '--verbose',
-                        action="store_true",
-                        required=False,
-                        default=False,
-                        help='Enable verbose output')
-    parser.add_argument('paths',
-                        type=str,
-                        nargs='*',
-                        default=None,
-                        help='Directories or files to format')
-    FLAGS = parser.parse_args()
-
-    # Check the version of yapf. Needs a consistent version
-    # of yapf to prevent unneccessary changes in the code.
-    if (yapf.__version__ != '0.30.0'):
-        print("Needs yapf 0.30.0, but got yapf {}".format(yapf.__version__))
-
-    if (FLAGS.paths is None) or (len(FLAGS.paths) == 0):
-        parser.print_help()
-        exit(1)
-
-    ret = True
-    for path in FLAGS.paths:
-        if not os.path.isdir(path):
-            if not visit(path):
-                ret = False
-        else:
-            for root, dirs, files in os.walk(path):
-                for name in files:
-                    if not visit(os.path.join(root, name)):
-                        ret = False
-
-    exit(0 if ret else 1)
diff --git a/3rdparty/common-r22.12/tools/pre-commit b/3rdparty/common-r22.12/tools/pre-commit
deleted file mode 100644
index 5e8ba370716b1981e96ee2c45165326bbba3a1e3..0000000000000000000000000000000000000000
--- a/3rdparty/common-r22.12/tools/pre-commit
+++ /dev/null
@@ -1,56 +0,0 @@
-#!/bin/bash
-# Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-###############################################################################
-#
-# Git pre-commit hook for Triton related projects
-#
-# To install this hook for a project, copy "pre-commit" and "format.py" into
-# ".git/hooks/" directory of the project
-#
-###############################################################################
-
-###############################################################################
-#
-# Run formatter script
-#
-###############################################################################
-
-# Repo root
-GIT_REPO_ROOT=$(git rev-parse --show-toplevel)
-
-PYTHON_CMD=python3
-FORMATTER_PY=${GIT_REPO_ROOT}/.git/hooks/format.py
-
-CHANGED_FILES="$(git --no-pager diff --name-status --no-color --cached | awk '{ if (match($1, /R[0-9]+/)) { print $3 } else if ($1 != "D") { print $2 } }')"
-
-echo "Running Python auto-format..."
-for CHANGED_FILE in $CHANGED_FILES;
-do
-    ${PYTHON_CMD} ${FORMATTER_PY} ${GIT_REPO_ROOT}/${CHANGED_FILE}
-    git add ${GIT_REPO_ROOT}/${CHANGED_FILE}
-done
diff --git a/3rdparty/core-r22.12/.clang-format b/3rdparty/core-r22.12/.clang-format
deleted file mode 100644
index 98c649734c29e0b1d134dae65be9bc08a14b4ba5..0000000000000000000000000000000000000000
--- a/3rdparty/core-r22.12/.clang-format
+++ /dev/null
@@ -1,37 +0,0 @@
----
-BasedOnStyle: Google
-
-IndentWidth: 2
-ContinuationIndentWidth: 4
-UseTab: Never
-MaxEmptyLinesToKeep: 2
-
-SortIncludes: true
-CompactNamespaces: true
-ReflowComments: true
-
-DerivePointerAlignment: false
-PointerAlignment: Left
-
-AllowShortIfStatementsOnASingleLine: false
-AllowShortBlocksOnASingleLine: false
-AllowShortFunctionsOnASingleLine: Inline
-
-AlwaysBreakAfterReturnType: TopLevelDefinitions
-AlignAfterOpenBracket: AlwaysBreak
-BreakBeforeBraces: Custom
-BraceWrapping:
-  AfterClass: false
-  AfterControlStatement: false
-  AfterEnum: false
-  AfterFunction: true
-  AfterNamespace: false
-  AfterStruct: false
-  AfterUnion: false
-  BeforeCatch: true
-
-BinPackArguments: true
-BinPackParameters: true
-ConstructorInitializerAllOnOneLineOrOnePerLine: false
-
-IndentCaseLabels: true
\ No newline at end of file
diff --git a/3rdparty/core-r22.12/.gitignore b/3rdparty/core-r22.12/.gitignore
deleted file mode 100644
index 0e9f099a2eef4742716637e3cce3a45f7053b021..0000000000000000000000000000000000000000
--- a/3rdparty/core-r22.12/.gitignore
+++ /dev/null
@@ -1,3 +0,0 @@
-/build
-/.vscode
-*.so
diff --git a/3rdparty/core-r22.12/CMakeLists.txt b/3rdparty/core-r22.12/CMakeLists.txt
deleted file mode 100644
index 617c3d7c5509495b7bd8c9f9f223df7902fb3a1f..0000000000000000000000000000000000000000
--- a/3rdparty/core-r22.12/CMakeLists.txt
+++ /dev/null
@@ -1,373 +0,0 @@
-# Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#cmake_minimum_required(VERSION 3.18)
-cmake_minimum_required(VERSION 3.16)
-project(tritoncore LANGUAGES C CXX)
-
-# Control building of shared library vs. only headers and stub. By
-# default only the headers and library stub is built. Set
-# TRITON_CORE_HEADERS_ONLY=OFF to also build libtritonserver.so.
-option(TRITON_CORE_HEADERS_ONLY "Build only headers and stub" ON)
-
-
-#
-# Triton Server API
-#
-add_library(
-  triton-core-serverapi INTERFACE
-)
-
-add_library(
-  TritonCore::triton-core-serverapi ALIAS triton-core-serverapi
-)
-
-target_include_directories(
-  triton-core-serverapi
-  INTERFACE
-    $<INSTALL_INTERFACE:include>
-    $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
-)
-
-#
-# Triton Backend API
-#
-add_library(
-  triton-core-backendapi INTERFACE
-)
-
-add_library(
-  TritonCore::triton-core-backendapi ALIAS triton-core-backendapi
-)
-
-target_include_directories(
-  triton-core-backendapi
-  INTERFACE
-    $<INSTALL_INTERFACE:include>
-    $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
-)
-
-#
-# Triton RepoAgent API
-#
-add_library(
-  triton-core-repoagentapi INTERFACE
-)
-
-add_library(
-  TritonCore::triton-core-repoagentapi ALIAS triton-core-repoagentapi
-)
-
-target_include_directories(
-  triton-core-repoagentapi
-  INTERFACE
-    $<INSTALL_INTERFACE:include>
-    $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
-)
-
-#
-# Stub library for libtritonserver.so that stubs Triton Server API and
-# Triton Backend API
-#
-add_library(
-  triton-core-serverstub SHARED
-  ${CMAKE_CURRENT_SOURCE_DIR}/src/tritonserver_stub.cc
-)
-
-add_library(
-  TritonCore::triton-core-serverstub ALIAS triton-core-serverstub
-)
-
-target_compile_features(triton-core-serverstub PRIVATE cxx_std_11)
-if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
-  message("Using MSVC as compiler, default target on Windows 10. "
-		  "If the target system is not Windows 10, please update _WIN32_WINNT "
-		  "to corresponding value.")
-  target_compile_options(
-    triton-core-serverstub
-    PRIVATE
-      /Wall /D_WIN32_WINNT=0x0A00 /EHsc
-  )
-else()
-  target_compile_options(
-    triton-core-serverstub
-    PRIVATE
-      -Wall -Wextra -Wno-unused-parameter -Werror
-  )
-endif()
-
-set_target_properties(
-  triton-core-serverstub
-  PROPERTIES
-    POSITION_INDEPENDENT_CODE ON
-    OUTPUT_NAME tritonserver
-)
-
-#
-# Shared library implementing Triton Server API
-#
-if(NOT TRITON_CORE_HEADERS_ONLY)
-  include(CMakeDependentOption)
-
-  set(TRITON_VERSION "0.0.0" CACHE STRING "The version of the Triton shared library" )
-
-  option(TRITON_ENABLE_LOGGING "Include logging support in server" ON)
-  option(TRITON_ENABLE_STATS "Include statistics collections in server" ON)
-  option(TRITON_ENABLE_TRACING "Include tracing support in server" OFF)
-  option(TRITON_ENABLE_NVTX "Include NVTX support in server" OFF)
-  option(TRITON_ENABLE_GPU "Enable GPU support in server" ON)
-  option(TRITON_ENABLE_MALI_GPU "Enable Arm Mali GPU support in server" OFF)
-  set(TRITON_MIN_COMPUTE_CAPABILITY "6.0" CACHE STRING
-      "The minimum CUDA compute capability supported by Triton" )
-  set(TRITON_EXTRA_LIB_PATHS "" CACHE PATH "Extra library paths for Triton Server build")
-
-  # Ensemble
-  option(TRITON_ENABLE_ENSEMBLE "Include ensemble support in server" OFF)
-
-  # Metrics
-  option(TRITON_ENABLE_METRICS "Include metrics support in server" ON)
-  option(TRITON_ENABLE_METRICS_GPU "Include GPU metrics support in server" ON)
-  option(TRITON_ENABLE_METRICS_CPU "Include CPU metrics support in server" ON)
-
-  # Cloud storage
-  option(TRITON_ENABLE_GCS "Include GCS Filesystem support in server" OFF)
-  option(TRITON_ENABLE_S3 "Include S3 Filesystem support in server" OFF)
-  option(TRITON_ENABLE_AZURE_STORAGE "Include Azure Storage Filesystem support in server" OFF)
-
-  # Repo tags
-  set(TRITON_COMMON_REPO_TAG "main" CACHE STRING "Tag for triton-inference-server/common repo")
-  set(TRITON_THIRD_PARTY_REPO_TAG "main" CACHE STRING "Tag for triton-inference-server/third_party repo")
-
-  # Third-party location
-  set(TRITON_THIRD_PARTY_INSTALL_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/third-party" CACHE STRING "Location of third-party build")
-  set(TRITON_THIRD_PARTY_SRC_INSTALL_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/third-party-src" CACHE STRING "Location of third-party source")
-
-  if(TRITON_ENABLE_METRICS AND NOT TRITON_ENABLE_STATS)
-    message(FATAL_ERROR "TRITON_ENABLE_METRICS=ON requires TRITON_ENABLE_STATS=ON")
-  endif()
-
-  if(TRITON_ENABLE_TRACING AND NOT TRITON_ENABLE_STATS)
-    message(FATAL_ERROR "TRITON_ENABLE_TRACING=ON requires TRITON_ENABLE_STATS=ON")
-  endif()
-
-  if (TRITON_ENABLE_METRICS_CPU AND NOT TRITON_ENABLE_METRICS)
-    message(FATAL_ERROR "TRITON_ENABLE_METRICS_CPU=ON requires TRITON_ENABLE_METRICS=ON")
-  endif()
-
-  if (TRITON_ENABLE_METRICS_GPU AND NOT TRITON_ENABLE_METRICS)
-    message(FATAL_ERROR "TRITON_ENABLE_METRICS_GPU=ON requires TRITON_ENABLE_METRICS=ON")
-  endif()
-
-  if (TRITON_ENABLE_METRICS_GPU AND NOT TRITON_ENABLE_GPU)
-    message(FATAL_ERROR "TRITON_ENABLE_METRICS_GPU=ON requires TRITON_ENABLE_GPU=ON")
-  endif()
-
-  include(FetchContent)
-  FetchContent_Declare(
-    repo-third-party
-    GIT_REPOSITORY https://github.com/triton-inference-server/third_party.git
-    GIT_TAG ${TRITON_THIRD_PARTY_REPO_TAG}
-  )
-  FetchContent_MakeAvailable(repo-third-party)
-
-  # Need to use ExternalProject for our builds so that we can get the
-  # correct dependencies between Triton shared library components and
-  # the ExternalProject dependencies (found in the third_party repo)
-  include(ExternalProject)
-
-  # If CMAKE_TOOLCHAIN_FILE is set, propagate that hint path to the external
-  # projects.
-  set(_CMAKE_ARGS_CMAKE_TOOLCHAIN_FILE "")
-  if (CMAKE_TOOLCHAIN_FILE)
-    set(_CMAKE_ARGS_CMAKE_TOOLCHAIN_FILE "-DCMAKE_TOOLCHAIN_FILE:PATH=${CMAKE_TOOLCHAIN_FILE}")
-  endif()
-
-  # If VCPKG_TARGET_TRIPLET is set, propagate that hint path to the external
-  # projects.
-  set(_CMAKE_ARGS_VCPKG_TARGET_TRIPLET "")
-  if (VCPKG_TARGET_TRIPLET)
-    set(_CMAKE_ARGS_VCPKG_TARGET_TRIPLET "-DVCPKG_TARGET_TRIPLET:STRING=${VCPKG_TARGET_TRIPLET}")
-  endif()
-
-  # If OPENSSL_ROOT_DIR is set, propagate that hint path to the external
-  # projects with OpenSSL dependency.
-  set(_CMAKE_ARGS_OPENSSL_ROOT_DIR "")
-  if (OPENSSL_ROOT_DIR)
-    set(_CMAKE_ARGS_OPENSSL_ROOT_DIR "-DOPENSSL_ROOT_DIR:PATH=${OPENSSL_ROOT_DIR}")
-  endif()
-
-  # Location where protobuf-config.cmake will be installed varies by
-  # platform
-  if (WIN32)
-    set(_FINDPACKAGE_PROTOBUF_CONFIG_DIR "${TRITON_THIRD_PARTY_INSTALL_PREFIX}/protobuf/cmake")
-  else()
-    set(_FINDPACKAGE_PROTOBUF_CONFIG_DIR "${TRITON_THIRD_PARTY_INSTALL_PREFIX}/protobuf/lib/cmake/protobuf")
-  endif()
-
-  if (CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT)
-    set(TRITON_INSTALL_PREFIX ${CMAKE_CURRENT_BINARY_DIR}/install)
-  else()
-    set(TRITON_INSTALL_PREFIX ${CMAKE_INSTALL_PREFIX})
-  endif()
-
-  set(TRITON_DEPENDS googletest protobuf)
-  if(${TRITON_ENABLE_GCS})
-    set(TRITON_DEPENDS ${TRITON_DEPENDS} google-cloud-cpp)
-  endif() # TRITON_ENABLE_GCS
-  if(${TRITON_ENABLE_S3})
-    set(TRITON_DEPENDS ${TRITON_DEPENDS} aws-sdk-cpp)
-  endif() # TRITON_ENABLE_S3
-  if(${TRITON_ENABLE_AZURE_STORAGE})
-    set(TRITON_DEPENDS ${TRITON_DEPENDS} azure-storage-cpplite)
-  endif() # TRITON_ENABLE_AZURE_STORAGE
-  if(${TRITON_ENABLE_METRICS})
-    set(TRITON_DEPENDS ${TRITON_DEPENDS} prometheus-cpp)
-  endif() # TRITON_ENABLE_METRICS
-  if(${TRITON_ENABLE_GPU})
-    set(TRITON_DEPENDS ${TRITON_DEPENDS} cnmem)
-  endif() # TRITON_ENABLE_GPU
-
-  ExternalProject_Add(triton-core
-    PREFIX triton-core
-    SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/src"
-    BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}/triton-core"
-    CMAKE_CACHE_ARGS
-      -DProtobuf_DIR:PATH=${_FINDPACKAGE_PROTOBUF_CONFIG_DIR}
-      ${_CMAKE_ARGS_OPENSSL_ROOT_DIR}
-      ${_CMAKE_ARGS_CMAKE_TOOLCHAIN_FILE}
-      ${_CMAKE_ARGS_VCPKG_TARGET_TRIPLET}
-      -DGTEST_ROOT:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/googletest
-      -DgRPC_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/grpc/lib/cmake/grpc
-      -Dc-ares_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/c-ares/lib/cmake/c-ares
-      -Dabsl_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/absl/lib/cmake/absl
-      -Dnlohmann_json_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/nlohmann_json/lib/cmake/nlohmann_json
-      -Dprometheus-cpp_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/prometheus-cpp/lib/cmake/prometheus-cpp
-      -Dgoogle_cloud_cpp_storage_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/google-cloud-cpp/lib/cmake/google_cloud_cpp_storage
-      -Dgoogle_cloud_cpp_rest_internal_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/google-cloud-cpp/lib/cmake/google_cloud_cpp_rest_internal
-      -Dazure-storage-cpplite_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/azure-storage-cpplite
-      -Dgoogle_cloud_cpp_common_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/google-cloud-cpp/lib/cmake/google_cloud_cpp_common
-      -DCrc32c_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/crc32c/lib/cmake/Crc32c
-      -DAWSSDK_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/aws-sdk-cpp/lib/cmake/AWSSDK
-      -Daws-cpp-sdk-core_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/aws-sdk-cpp/lib/cmake/aws-cpp-sdk-core
-      -Daws-cpp-sdk-s3_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/aws-sdk-cpp/lib/cmake/aws-cpp-sdk-s3
-      -Daws-c-event-stream_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/aws-sdk-cpp/lib/aws-c-event-stream/cmake
-      -Daws-c-common_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/aws-sdk-cpp/lib/aws-c-common/cmake
-      -Daws-checksums_DIR:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/aws-sdk-cpp/lib/aws-checksums/cmake
-      -DCNMEM_PATH:PATH=${TRITON_THIRD_PARTY_INSTALL_PREFIX}/cnmem
-      -DTRITON_COMMON_REPO_TAG:STRING=${TRITON_COMMON_REPO_TAG}
-      -DTRITON_EXTRA_LIB_PATHS:PATH=${TRITON_EXTRA_LIB_PATHS}
-      -DTRITON_ENABLE_NVTX:BOOL=${TRITON_ENABLE_NVTX}
-      -DTRITON_ENABLE_TRACING:BOOL=${TRITON_ENABLE_TRACING}
-      -DTRITON_ENABLE_LOGGING:BOOL=${TRITON_ENABLE_LOGGING}
-      -DTRITON_ENABLE_STATS:BOOL=${TRITON_ENABLE_STATS}
-      -DTRITON_ENABLE_GPU:BOOL=${TRITON_ENABLE_GPU}
-      -DTRITON_ENABLE_MALI_GPU:BOOL=${TRITON_ENABLE_MALI_GPU}
-      -DTRITON_MIN_COMPUTE_CAPABILITY:STRING=${TRITON_MIN_COMPUTE_CAPABILITY}
-      -DTRITON_ENABLE_METRICS:BOOL=${TRITON_ENABLE_METRICS}
-      -DTRITON_ENABLE_METRICS_GPU:BOOL=${TRITON_ENABLE_METRICS_GPU}
-      -DTRITON_ENABLE_METRICS_CPU:BOOL=${TRITON_ENABLE_METRICS_CPU}
-      -DTRITON_ENABLE_GCS:BOOL=${TRITON_ENABLE_GCS}
-      -DTRITON_ENABLE_AZURE_STORAGE:BOOL=${TRITON_ENABLE_AZURE_STORAGE}
-      -DTRITON_ENABLE_S3:BOOL=${TRITON_ENABLE_S3}
-      -DTRITON_ENABLE_ENSEMBLE:BOOL=${TRITON_ENABLE_ENSEMBLE}
-      -DCMAKE_BUILD_TYPE:STRING=${CMAKE_BUILD_TYPE}
-      -DCMAKE_INSTALL_PREFIX:PATH=${TRITON_INSTALL_PREFIX}
-      -DTRITON_VERSION:STRING=${TRITON_VERSION}
-    DEPENDS ${TRITON_DEPENDS}
-  )
-endif() # NOT TRITON_CORE_HEADERS_ONLY
-
-#
-# Install
-#
-include(GNUInstallDirs)
-set(INSTALL_CONFIGDIR ${CMAKE_INSTALL_LIBDIR}/cmake/TritonCore)
-
-install(
-  TARGETS
-    triton-core-backendapi
-    triton-core-repoagentapi
-    triton-core-serverapi
-  EXPORT
-    triton-core-targets
-  LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
-  ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
-  RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
-)
-
-install(
-  TARGETS
-    triton-core-serverstub
-  EXPORT
-    triton-core-targets
-  LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}/stubs
-  ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}/stubs
-  RUNTIME DESTINATION ${CMAKE_INSTALL_LIBDIR}/stubs
-)
-
-install(
-  DIRECTORY include/
-  DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
-)
-
-install(
-  EXPORT
-    triton-core-targets
-  FILE
-    TritonCoreTargets.cmake
-  NAMESPACE
-    TritonCore::
-  DESTINATION
-    ${INSTALL_CONFIGDIR}
-)
-
-include(CMakePackageConfigHelpers)
-configure_package_config_file(
-  ${CMAKE_CURRENT_LIST_DIR}/cmake/TritonCoreConfig.cmake.in
-  ${CMAKE_CURRENT_BINARY_DIR}/TritonCoreConfig.cmake
-  INSTALL_DESTINATION ${INSTALL_CONFIGDIR}
-)
-
-install(
-  FILES
-    ${CMAKE_CURRENT_BINARY_DIR}/TritonCoreConfig.cmake
-  DESTINATION
-    ${INSTALL_CONFIGDIR}
-)
-
-#
-# Export from build tree
-#
-export(
-  EXPORT
-    triton-core-targets
-  FILE
-    ${CMAKE_CURRENT_BINARY_DIR}/TritonCoreTargets.cmake
-  NAMESPACE
-    TritonCore::
-)
-
-export(PACKAGE TritonCore)
diff --git a/3rdparty/core-r22.12/LICENSE b/3rdparty/core-r22.12/LICENSE
deleted file mode 100644
index 237621c146a3b2f3b43a26eba937393b8e1a6f0c..0000000000000000000000000000000000000000
--- a/3rdparty/core-r22.12/LICENSE
+++ /dev/null
@@ -1,25 +0,0 @@
-Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions
-are met:
- * Redistributions of source code must retain the above copyright
-   notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright
-   notice, this list of conditions and the following disclaimer in the
-   documentation and/or other materials provided with the distribution.
- * Neither the name of NVIDIA CORPORATION nor the names of its
-   contributors may be used to endorse or promote products derived
-   from this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/3rdparty/core-r22.12/README.md b/3rdparty/core-r22.12/README.md
deleted file mode 100644
index 457844d1307c05713a5f6a957ea7a77c47312ec2..0000000000000000000000000000000000000000
--- a/3rdparty/core-r22.12/README.md
+++ /dev/null
@@ -1,104 +0,0 @@
-<!--
-# Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--->
-
-[![License](https://img.shields.io/badge/License-BSD3-lightgrey.svg)](https://opensource.org/licenses/BSD-3-Clause)
-
-# Triton Inference Server Core
-
-This repository holds the source code and headers for the library that
-implements the core functionality of Triton. The *core* library can be
-built as described below and used directly via its [C
-API](https://github.com/triton-inference-server/server/blob/main/docs/customization_guide/inference_protocols.md#in-process-triton-server-api). To
-be useful the core library must be paired with one or more backends.
-You can learn more about backends in the [backend
-repo](https://github.com/triton-inference-server/backend).
-
-Typically you do not build or use the core library on its own, but as
-part of the *tritonserver* executable. The *tritonserver* executable
-is built in the [server
-repo](https://github.com/triton-inference-server/server) as described
-in the [server build
-documentation](https://github.com/triton-inference-server/server/blob/main/docs/customization_guide/build.md).
-
-Ask questions or report problems in the main Triton [issues
-page](https://github.com/triton-inference-server/server/issues).
-
-## Build the Triton Core Library
-
-Before building the Triton core library, your build system must
-install the required dependencies described in the [build
-documentation](https://github.com/triton-inference-server/server/blob/main/docs/customization_guide/build.md). For
-example, if you are building the core library with GPU support
-(-DTRITON_ENABLE_GPU=ON), then you must install the CUDA, cuDNN, and
-TensorRT dependencies required for the version of Triton you are
-building.
-
-To build, first clone the release branch matching the Triton release
-you are interest in (*rxx.yy*), or the *main* branch to build the
-top-of-tree. The Triton core library is built with CMake.
-
-```
-$ mkdir build
-$ cd build
-$ cmake -DCMAKE_INSTALL_PREFIX:PATH=`pwd`/install -DTRITON_CORE_HEADERS_ONLY=OFF ..
-$ make install
-```
-
-When the build completes, the install directory will contain the
-Triton core shared library (install/lib/libtritonserver.so on Linux,
-install/bin/tritonserver.dll on Windows), and the core library headers
-files in install/include/triton/core.
-
-### Build a Release Branch
-
-The following required Triton repositories will be pulled and used in
-the build. By default the "main" branch/tag will be used for each repo
-but the listed CMake argument can be used to override.
-
-* triton-inference-server/third_party: -DTRITON_THIRD_PARTY_REPO_TAG=[tag]
-* triton-inference-server/common: -DTRITON_COMMON_REPO_TAG=[tag]
-
-You will need to override if you are building from a release
-branch. For example, if you are building the r22.03 version of Triton,
-you would clone the r22.03 branch of the core repo and use the
-following cmake command.
-
-```
-$ cmake -DTRITON_THIRD_PARTY_REPO_TAG=r22.03 -DTRITON_COMMON_REPO_TAG=r22.03 -DTRITON_CORE_HEADERS_ONLY=OFF ..
-```
-
-### Build Options
-
-The [CMakeLists.txt](CMakeLists.txt) file contains the options
-available when build the core library. For example, to build the core
-library with the default settings plus S3 cloud storage and ensembling
-support use the following command.
-
-```
-$ cmake -DTRITON_CORE_HEADERS_ONLY=OFF -DTRITON_ENABLE_S3=ON -DTRITON_ENABLE_ENSEMBLE=ON ..
-```
diff --git a/3rdparty/core-r22.12/cmake/TritonCoreConfig.cmake.in b/3rdparty/core-r22.12/cmake/TritonCoreConfig.cmake.in
deleted file mode 100644
index 05ba9db1845980877d8814171c2d8fad6fc61a08..0000000000000000000000000000000000000000
--- a/3rdparty/core-r22.12/cmake/TritonCoreConfig.cmake.in
+++ /dev/null
@@ -1,37 +0,0 @@
-# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-include(CMakeFindDependencyMacro)
-
-get_filename_component(
-  TRITONCORE_CMAKE_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH
-)
-
-list(APPEND CMAKE_MODULE_PATH ${TRITONCORE_CMAKE_DIR})
-
-if(NOT TARGET TritonCore::triton-core-serverapi)
-  include("${TRITONCORE_CMAKE_DIR}/TritonCoreTargets.cmake")
-endif()
diff --git a/3rdparty/core-r22.12/include/triton/core/tritonbackend.h b/3rdparty/core-r22.12/include/triton/core/tritonbackend.h
deleted file mode 100644
index 9d800183abbf5511d61a10e036d1d6142cbc0625..0000000000000000000000000000000000000000
--- a/3rdparty/core-r22.12/include/triton/core/tritonbackend.h
+++ /dev/null
@@ -1,1410 +0,0 @@
-// Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include <stddef.h>
-#include <stdint.h>
-#include "triton/core/tritonserver.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#ifdef _COMPILING_TRITONBACKEND
-#if defined(_MSC_VER)
-#define TRITONBACKEND_DECLSPEC __declspec(dllexport)
-#define TRITONBACKEND_ISPEC __declspec(dllimport)
-#elif defined(__GNUC__)
-#define TRITONBACKEND_DECLSPEC __attribute__((__visibility__("default")))
-#define TRITONBACKEND_ISPEC
-#else
-#define TRITONBACKEND_DECLSPEC
-#define TRITONBACKEND_ISPEC
-#endif
-#else
-#if defined(_MSC_VER)
-#define TRITONBACKEND_DECLSPEC __declspec(dllimport)
-#define TRITONBACKEND_ISPEC __declspec(dllexport)
-#else
-#define TRITONBACKEND_DECLSPEC
-#define TRITONBACKEND_ISPEC
-#endif
-#endif
-
-struct TRITONBACKEND_MemoryManager;
-struct TRITONBACKEND_Input;
-struct TRITONBACKEND_Output;
-struct TRITONBACKEND_State;
-struct TRITONBACKEND_Request;
-struct TRITONBACKEND_ResponseFactory;
-struct TRITONBACKEND_Response;
-struct TRITONBACKEND_Backend;
-struct TRITONBACKEND_Model;
-struct TRITONBACKEND_ModelInstance;
-struct TRITONBACKEND_BackendAttribute;
-
-///
-/// TRITONBACKEND API Version
-///
-/// The TRITONBACKEND API is versioned with major and minor version
-/// numbers. Any change to the API that does not impact backwards
-/// compatibility (for example, adding a non-required function)
-/// increases the minor version number. Any change that breaks
-/// backwards compatibility (for example, deleting or changing the
-/// behavior of a function) increases the major version number. A
-/// backend should check that the API version used to compile the
-/// backend is compatible with the API version of the Triton server
-/// that it is running in. This is typically done by code similar to
-/// the following which makes sure that the major versions are equal
-/// and that the minor version of Triton is >= the minor version used
-/// to build the backend.
-///
-///   uint32_t api_version_major, api_version_minor;
-///   TRITONBACKEND_ApiVersion(&api_version_major, &api_version_minor);
-///   if ((api_version_major != TRITONBACKEND_API_VERSION_MAJOR) ||
-///       (api_version_minor < TRITONBACKEND_API_VERSION_MINOR)) {
-///     return TRITONSERVER_ErrorNew(
-///       TRITONSERVER_ERROR_UNSUPPORTED,
-///       "triton backend API version does not support this backend");
-///   }
-///
-#define TRITONBACKEND_API_VERSION_MAJOR 1
-#define TRITONBACKEND_API_VERSION_MINOR 10
-
-/// Get the TRITONBACKEND API version supported by Triton. This value
-/// can be compared against the TRITONBACKEND_API_VERSION_MAJOR and
-/// TRITONBACKEND_API_VERSION_MINOR used to build the backend to
-/// ensure that Triton is compatible with the backend.
-///
-/// \param major Returns the TRITONBACKEND API major version supported
-/// by Triton.
-/// \param minor Returns the TRITONBACKEND API minor version supported
-/// by Triton.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_ApiVersion(
-    uint32_t* major, uint32_t* minor);
-
-/// TRITONBACKEND_ArtifactType
-///
-/// The ways that the files that make up a backend or model are
-/// communicated to the backend.
-///
-///   TRITONBACKEND_ARTIFACT_FILESYSTEM: The model or backend
-///     artifacts are made available to Triton via a locally
-///     accessible filesystem. The backend can access these files
-///     using an appropriate system API.
-///
-typedef enum TRITONBACKEND_artifacttype_enum {
-  TRITONBACKEND_ARTIFACT_FILESYSTEM
-} TRITONBACKEND_ArtifactType;
-
-
-///
-/// TRITONBACKEND_MemoryManager
-///
-/// Object representing an memory manager that is capable of
-/// allocating and otherwise managing different memory types. For
-/// improved performance Triton maintains pools for GPU and CPU-pinned
-/// memory and the memory manager allows backends to access those
-/// pools.
-///
-
-/// Allocate a contiguous block of memory of a specific type using a
-/// memory manager. Two error codes have specific interpretations for
-/// this function:
-///
-///   TRITONSERVER_ERROR_UNSUPPORTED: Indicates that Triton is
-///     incapable of allocating the requested memory type and memory
-///     type ID. Requests for the memory type and ID will always fail
-///     no matter 'byte_size' of the request.
-///
-///   TRITONSERVER_ERROR_UNAVAILABLE: Indicates that Triton can
-///      allocate the memory type and ID but that currently it cannot
-///      allocate a contiguous block of memory of the requested
-///      'byte_size'.
-///
-/// \param manager The memory manager.
-/// \param buffer Returns the allocated memory.
-/// \param memory_type The type of memory to allocate.
-/// \param memory_type_id The ID associated with the memory type to
-/// allocate. For GPU memory this indicates the device ID of the GPU
-/// to allocate from.
-/// \param byte_size The size of memory to allocate, in bytes.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_MemoryManagerAllocate(
-    TRITONBACKEND_MemoryManager* manager, void** buffer,
-    const TRITONSERVER_MemoryType memory_type, const int64_t memory_type_id,
-    const uint64_t byte_size);
-
-/// Free a buffer that was previously allocated with
-/// TRITONBACKEND_MemoryManagerAllocate. The call must provide the
-/// same values for 'memory_type' and 'memory_type_id' as were used
-/// when the buffer was allocate or else the behavior is undefined.
-///
-/// \param manager The memory manager.
-/// \param buffer The allocated memory buffer to free.
-/// \param memory_type The type of memory of the buffer.
-/// \param memory_type_id The ID associated with the memory type of
-/// the buffer.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_MemoryManagerFree(
-    TRITONBACKEND_MemoryManager* manager, void* buffer,
-    const TRITONSERVER_MemoryType memory_type, const int64_t memory_type_id);
-
-///
-/// TRITONBACKEND_Input
-///
-/// Object representing an input tensor.
-///
-
-/// Get the name and properties of an input tensor. The returned
-/// strings and other properties are owned by the input, not the
-/// caller, and so should not be modified or freed.
-///
-/// \param input The input tensor.
-/// \param name If non-nullptr, returns the tensor name.
-/// \param datatype If non-nullptr, returns the tensor datatype.
-/// \param shape If non-nullptr, returns the tensor shape.
-/// \param dim_count If non-nullptr, returns the number of dimensions
-/// in the tensor shape.
-/// \param byte_size If non-nullptr, returns the size of the available
-/// data for the tensor, in bytes. This size reflects the actual data
-/// available, and does not necessarily match what is
-/// expected/required for the tensor given its shape and datatype. It
-/// is the responsibility of the backend to handle mismatches in these
-/// sizes appropriately.
-/// \param buffer_count If non-nullptr, returns the number of buffers
-/// holding the contents of the tensor. These buffers are accessed
-/// using TRITONBACKEND_InputBuffer.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_InputProperties(
-    TRITONBACKEND_Input* input, const char** name,
-    TRITONSERVER_DataType* datatype, const int64_t** shape,
-    uint32_t* dims_count, uint64_t* byte_size, uint32_t* buffer_count);
-
-/// Get the name and properties of an input tensor associated with a given
-/// host policy. If there are no input buffers for the specified  host policy,
-/// the properties of the fallback input buffers are returned. The returned
-/// strings and other properties are owned by the input, not the caller, and so
-/// should not be modified or freed.
-///
-/// \param input The input tensor.
-/// \param host_policy_name The host policy name. Fallback input properties
-/// will be return if nullptr is provided.
-/// \param name If non-nullptr, returns the tensor name.
-/// \param datatype If non-nullptr, returns the tensor datatype.
-/// \param shape If non-nullptr, returns the tensor shape.
-/// \param dim_count If non-nullptr, returns the number of dimensions
-/// in the tensor shape.
-/// \param byte_size If non-nullptr, returns the size of the available
-/// data for the tensor, in bytes. This size reflects the actual data
-/// available, and does not necessarily match what is
-/// expected/required for the tensor given its shape and datatype. It
-/// is the responsibility of the backend to handle mismatches in these
-/// sizes appropriately.
-/// \param buffer_count If non-nullptr, returns the number of buffers
-/// holding the contents of the tensor. These buffers are accessed
-/// using TRITONBACKEND_InputBufferForHostPolicy.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_InputPropertiesForHostPolicy(
-    TRITONBACKEND_Input* input, const char* host_policy_name, const char** name,
-    TRITONSERVER_DataType* datatype, const int64_t** shape,
-    uint32_t* dims_count, uint64_t* byte_size, uint32_t* buffer_count);
-
-/// Get a buffer holding (part of) the tensor data for an input. For a
-/// given input the number of buffers composing the input are found
-/// from 'buffer_count' returned by TRITONBACKEND_InputProperties. The
-/// returned buffer is owned by the input and so should not be
-/// modified or freed by the caller. The lifetime of the buffer
-/// matches that of the input and so the buffer should not be accessed
-/// after the input tensor object is released.
-///
-/// \param input The input tensor.
-/// \param index The index of the buffer. Must be 0 <= index <
-/// buffer_count, where buffer_count is the value returned by
-/// TRITONBACKEND_InputProperties.
-/// \param buffer Returns a pointer to a contiguous block of data for
-/// the named input.
-/// \param buffer_byte_size Returns the size, in bytes, of 'buffer'.
-/// \param memory_type Acts as both input and output. On input gives
-/// the buffer memory type preferred by the function caller.  Returns
-/// the actual memory type of 'buffer'.
-/// \param memory_type_id Acts as both input and output. On input
-/// gives the buffer memory type id preferred by the function caller.
-/// Returns the actual memory type id of 'buffer'.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_InputBuffer(
-    TRITONBACKEND_Input* input, const uint32_t index, const void** buffer,
-    uint64_t* buffer_byte_size, TRITONSERVER_MemoryType* memory_type,
-    int64_t* memory_type_id);
-
-/// Get a buffer holding (part of) the tensor data for an input for a specific
-/// host policy. If there are no input buffers specified for this host policy,
-/// the fallback input buffer is returned.
-/// For a given input the number of buffers composing the input are found
-/// from 'buffer_count' returned by TRITONBACKEND_InputPropertiesForHostPolicy.
-/// The returned buffer is owned by the input and so should not be modified or
-/// freed by the caller. The lifetime of the buffer matches that of the input
-/// and so the buffer should not be accessed after the input tensor object is
-/// released.
-///
-/// \param input The input tensor.
-/// \param host_policy_name The host policy name. Fallback input buffer
-/// will be return if nullptr is provided.
-/// \param index The index of the buffer. Must be 0 <= index <
-/// buffer_count, where buffer_count is the value returned by
-/// TRITONBACKEND_InputPropertiesForHostPolicy.
-/// \param buffer Returns a pointer to a contiguous block of data for
-/// the named input.
-/// \param buffer_byte_size Returns the size, in bytes, of 'buffer'.
-/// \param memory_type Acts as both input and output. On input gives
-/// the buffer memory type preferred by the function caller.  Returns
-/// the actual memory type of 'buffer'.
-/// \param memory_type_id Acts as both input and output. On input
-/// gives the buffer memory type id preferred by the function caller.
-/// Returns the actual memory type id of 'buffer'.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_InputBufferForHostPolicy(
-    TRITONBACKEND_Input* input, const char* host_policy_name,
-    const uint32_t index, const void** buffer, uint64_t* buffer_byte_size,
-    TRITONSERVER_MemoryType* memory_type, int64_t* memory_type_id);
-
-/// Get the buffer attributes associated with the given input buffer. For a
-/// given input the number of buffers composing the input are found from
-/// 'buffer_count' returned by TRITONBACKEND_InputProperties. The returned
-/// 'buffer_attributes' is owned by the input and so should not be modified or
-/// freed by the caller. The lifetime of the 'buffer_attributes' matches that of
-/// the input and so the 'buffer_attributes' should not be accessed after the
-/// input tensor object is released.
-///
-/// \param input The input tensor.
-/// \param index The index of the buffer. Must be 0 <= index < buffer_count,
-/// where buffer_count is the value returned by TRITONBACKEND_InputProperties.
-/// \param buffer Returns a pointer to a contiguous block of data for
-/// the named input.
-/// \param buffer_attributes Returns the attributes for the given buffer.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_InputBufferAttributes(
-    TRITONBACKEND_Input* input, const uint32_t index, const void** buffer,
-    TRITONSERVER_BufferAttributes** buffer_attributes);
-
-///
-/// TRITONBACKEND_Output
-///
-/// Object representing a response output tensor.
-///
-
-/// Get a buffer to use to hold the tensor data for the output. The
-/// returned buffer is owned by the output and so should not be freed
-/// by the caller. The caller can and should fill the buffer with the
-/// output data for the tensor. The lifetime of the buffer matches
-/// that of the output and so the buffer should not be accessed after
-/// the output tensor object is released.
-///
-/// \param buffer Returns a pointer to a buffer where the contents of
-/// the output tensor should be placed.
-/// \param buffer_byte_size The size, in bytes, of the buffer required
-/// by the caller.
-/// \param memory_type Acts as both input and output. On input gives
-/// the buffer memory type preferred by the caller.  Returns the
-/// actual memory type of 'buffer'.
-/// \param memory_type_id Acts as both input and output. On input
-/// gives the buffer memory type id preferred by the caller. Returns
-/// the actual memory type id of 'buffer'.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_OutputBuffer(
-    TRITONBACKEND_Output* output, void** buffer,
-    const uint64_t buffer_byte_size, TRITONSERVER_MemoryType* memory_type,
-    int64_t* memory_type_id);
-
-/// Get the buffer attributes associated with the given output buffer. The
-/// returned 'buffer_attributes' is owned by the output and so should not be
-/// modified or freed by the caller. The lifetime of the 'buffer_attributes'
-/// matches that of the output and so the 'buffer_attributes' should not be
-/// accessed after the output tensor object is released. This function must be
-/// called after the TRITONBACKEND_OutputBuffer otherwise it might contain
-/// incorrect data.
-///
-/// \param output The output tensor.
-/// \param buffer_attributes Returns the attributes for the output buffer.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_OutputBufferAttributes(
-    TRITONBACKEND_Output* output,
-    TRITONSERVER_BufferAttributes** buffer_attributes);
-
-///
-/// TRITONBACKEND_Request
-///
-/// Object representing an inference request.
-///
-
-/// Get the ID of the request. Can be nullptr if request doesn't have
-/// an ID. The returned string is owned by the request, not the
-/// caller, and so should not be modified or freed.
-///
-/// \param request The inference request.
-/// \param id Returns the ID.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_RequestId(
-    TRITONBACKEND_Request* request, const char** id);
-
-/// Get the correlation ID of the request if it is an unsigned integer.
-/// Zero indicates that the request does not have a correlation ID.
-/// Returns failure if correlation ID for given request is not an unsigned
-/// integer.
-///
-/// \param request The inference request.
-/// \param id Returns the correlation ID.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_RequestCorrelationId(
-    TRITONBACKEND_Request* request, uint64_t* id);
-
-/// Get the correlation ID of the request if it is a string.
-/// Empty string indicates that the request does not have a correlation ID.
-/// Returns error if correlation ID for given request is not a string.
-///
-/// \param request The inference request.
-/// \param id Returns the correlation ID.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_RequestCorrelationIdString(
-    TRITONBACKEND_Request* request, const char** id);
-
-/// Get the flag(s) associated with a request. On return 'flags' holds
-/// a bitwise-or of all flag values, see TRITONSERVER_RequestFlag for
-/// available flags.
-///
-/// \param request The inference request.
-/// \param flags Returns the flags.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_RequestFlags(
-    TRITONBACKEND_Request* request, uint32_t* flags);
-
-/// Get the number of input tensors specified in the request.
-///
-/// \param request The inference request.
-/// \param count Returns the number of input tensors.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_RequestInputCount(
-    TRITONBACKEND_Request* request, uint32_t* count);
-
-/// Get the name of an input tensor. The caller does not own
-/// the returned string and must not modify or delete it. The lifetime
-/// of the returned string extends only as long as 'request'.
-///
-/// \param request The inference request.
-/// \param index The index of the input tensor. Must be 0 <= index <
-/// count, where count is the value returned by
-/// TRITONBACKEND_RequestInputCount.
-/// \param input_name Returns the name of the input tensor
-/// corresponding to the index.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_RequestInputName(
-    TRITONBACKEND_Request* request, const uint32_t index,
-    const char** input_name);
-
-/// Get a named request input. The lifetime of the returned input
-/// object matches that of the request and so the input object should
-/// not be accessed after the request object is released.
-///
-/// \param request The inference request.
-/// \param name The name of the input.
-/// \param input Returns the input corresponding to the name.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_RequestInput(
-    TRITONBACKEND_Request* request, const char* name,
-    TRITONBACKEND_Input** input);
-
-/// Get a request input by index. The order of inputs in a given
-/// request is not necessarily consistent with other requests, even if
-/// the requests are in the same batch. As a result, you can not
-/// assume that an index obtained from one request will point to the
-/// same input in a different request.
-///
-/// The lifetime of the returned input object matches that of the
-/// request and so the input object should not be accessed after the
-/// request object is released.
-///
-/// \param request The inference request.
-/// \param index The index of the input tensor. Must be 0 <= index <
-/// count, where count is the value returned by
-/// TRITONBACKEND_RequestInputCount.
-/// \param input Returns the input corresponding to the index.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_RequestInputByIndex(
-    TRITONBACKEND_Request* request, const uint32_t index,
-    TRITONBACKEND_Input** input);
-
-/// Get the number of output tensors requested to be returned in the
-/// request.
-///
-/// \param request The inference request.
-/// \param count Returns the number of output tensors.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_RequestOutputCount(
-    TRITONBACKEND_Request* request, uint32_t* count);
-
-/// Get the name of a requested output tensor. The caller does not own
-/// the returned string and must not modify or delete it. The lifetime
-/// of the returned string extends only as long as 'request'.
-///
-/// \param request The inference request.
-/// \param index The index of the requested output tensor. Must be 0
-/// <= index < count, where count is the value returned by
-/// TRITONBACKEND_RequestOutputCount.
-/// \param output_name Returns the name of the requested output tensor
-/// corresponding to the index.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_RequestOutputName(
-    TRITONBACKEND_Request* request, const uint32_t index,
-    const char** output_name);
-
-/// Returns the preferred memory type and memory type ID of the output buffer
-/// for the request. As much as possible, Triton will attempt to return
-/// the same memory_type and memory_type_id values that will be returned by
-/// the subsequent call to TRITONBACKEND_OutputBuffer, however, the backend must
-/// be capable of handling cases where the values differ.
-///
-/// \param request The request.
-/// \param name The name of the output tensor. This is optional
-/// and it should be set to nullptr to indicate that the tensor name has
-/// not determined.
-/// \param byte_size The expected size of the buffer. This is optional
-/// and it should be set to nullptr to indicate that the byte size has
-/// not determined.
-/// \param memory_type Acts as both input and output. On input gives
-/// the memory type preferred by the caller. Returns memory type preferred
-/// by Triton, taken account of the caller preferred type.
-/// \param memory_type_id Acts as both input and output. On input gives
-/// the memory type ID preferred by the caller. Returns memory type ID preferred
-/// by Triton, taken account of the caller preferred type ID.
-/// \return a TRITONSERVER_Error object if a failure occurs.
-/// A TRITONSERVER_ERROR_UNAVAILABLE error indicates that the properties are not
-/// available, other error codes indicate an error.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_RequestOutputBufferProperties(
-    TRITONBACKEND_Request* request, const char* name, size_t* byte_size,
-    TRITONSERVER_MemoryType* memory_type, int64_t* memory_type_id);
-
-/// Release the request. The request should be released when it is no
-/// longer needed by the backend. If this call returns with an error
-/// (i.e. non-nullptr) then the request was not released and ownership
-/// remains with the backend. If this call returns with success, the
-/// 'request' object is no longer owned by the backend and must not be
-/// used. Any tensor names, data types, shapes, input tensors,
-/// etc. returned by TRITONBACKEND_Request* functions for this request
-/// are no longer valid. If a persistent copy of that data is required
-/// it must be created before calling this function.
-///
-/// \param request The inference request.
-/// \param release_flags Flags indicating what type of request release
-/// should be performed. \see TRITONSERVER_RequestReleaseFlag. \see
-/// TRITONSERVER_InferenceRequestReleaseFn_t.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_RequestRelease(
-    TRITONBACKEND_Request* request, uint32_t release_flags);
-
-///
-/// TRITONBACKEND_ResponseFactory
-///
-/// Object representing an inference response factory. Using a
-/// response factory is not required; instead a response can be
-/// generated directly from a TRITONBACKEND_Request object using
-/// TRITONBACKEND_ResponseNew(). A response factory allows a request
-/// to be released before all responses have been sent. Releasing a
-/// request as early as possible releases all input tensor data and
-/// therefore may be desirable in some cases.
-
-/// Create the response factory associated with a request.
-///
-/// \param factory Returns the new response factory.
-/// \param request The inference request.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_ResponseFactoryNew(
-    TRITONBACKEND_ResponseFactory** factory, TRITONBACKEND_Request* request);
-
-/// Destroy a response factory.
-///
-/// \param factory The response factory.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_ResponseFactoryDelete(
-    TRITONBACKEND_ResponseFactory* factory);
-
-/// Send response flags without a corresponding response.
-///
-/// \param factory The response factory.
-/// \param send_flags Flags to send. \see
-/// TRITONSERVER_ResponseCompleteFlag. \see
-/// TRITONSERVER_InferenceResponseCompleteFn_t.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_ResponseFactorySendFlags(
-    TRITONBACKEND_ResponseFactory* factory, const uint32_t send_flags);
-
-///
-/// TRITONBACKEND_Response
-///
-/// Object representing an inference response. For a given request,
-/// the backend must carefully manage the lifecycle of responses
-/// generated for that request to ensure that the output tensor
-/// buffers are allocated correctly. When a response is created with
-/// TRITONBACKEND_ResponseNew or TRITONBACKEND_ResponseNewFromFactory,
-/// all the outputs and corresponding buffers must be created for that
-/// response using TRITONBACKEND_ResponseOutput and
-/// TRITONBACKEND_OutputBuffer *before* another response is created
-/// for the request. For a given response, outputs can be created in
-/// any order but they must be created sequentially/sychronously (for
-/// example, the backend cannot use multiple threads to simultaneously
-/// add multiple outputs to a response).
-///
-/// The above requirement applies only to responses being generated
-/// for a given request. The backend may generate responses in
-/// parallel on multiple threads as long as those responses are for
-/// different requests.
-///
-/// This order of response creation must be strictly followed. But,
-/// once response(s) are created they do not need to be sent
-/// immediately, nor do they need to be sent in the order they were
-/// created. The backend may even delete a created response instead of
-/// sending it by using TRITONBACKEND_ResponseDelete.
-
-/// Create a response for a request.
-///
-/// \param response Returns the new response.
-/// \param request The request.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_ResponseNew(
-    TRITONBACKEND_Response** response, TRITONBACKEND_Request* request);
-
-/// Create a response using a factory.
-///
-/// \param response Returns the new response.
-/// \param factory The response factory.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_ResponseNewFromFactory(
-    TRITONBACKEND_Response** response, TRITONBACKEND_ResponseFactory* factory);
-
-/// Destroy a response. It is not necessary to delete a response if
-/// TRITONBACKEND_ResponseSend is called as that function transfers
-/// ownership of the response object to Triton.
-///
-/// \param response The response.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_ResponseDelete(
-    TRITONBACKEND_Response* response);
-
-/// Set a string parameter in the response.
-///
-/// \param response The response.
-/// \param name The name of the parameter.
-/// \param value The value of the parameter.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_ResponseSetStringParameter(
-    TRITONBACKEND_Response* response, const char* name, const char* value);
-
-/// Set an integer parameter in the response.
-///
-/// \param response The response.
-/// \param name The name of the parameter.
-/// \param value The value of the parameter.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_ResponseSetIntParameter(
-    TRITONBACKEND_Response* response, const char* name, const int64_t value);
-
-/// Set an boolean parameter in the response.
-///
-/// \param response The response.
-/// \param name The name of the parameter.
-/// \param value The value of the parameter.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_ResponseSetBoolParameter(
-    TRITONBACKEND_Response* response, const char* name, const bool value);
-
-/// Create an output tensor in the response. The lifetime of the
-/// returned output tensor object matches that of the response and so
-/// the output tensor object should not be accessed after the response
-/// object is deleted.
-///
-/// \param response The response.
-/// \param output Returns the new response output.
-/// \param name The name of the output tensor.
-/// \param datatype The datatype of the output tensor.
-/// \param shape The shape of the output tensor.
-/// \param dims_count The number of dimensions in the output tensor
-/// shape.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_ResponseOutput(
-    TRITONBACKEND_Response* response, TRITONBACKEND_Output** output,
-    const char* name, const TRITONSERVER_DataType datatype,
-    const int64_t* shape, const uint32_t dims_count);
-
-/// Send a response. Calling this function transfers ownership of the
-/// response object to Triton. The caller must not access or delete
-/// the response object after calling this function.
-///
-/// \param response The response.
-/// \param send_flags Flags associated with the response. \see
-/// TRITONSERVER_ResponseCompleteFlag. \see
-/// TRITONSERVER_InferenceResponseCompleteFn_t.
-/// \param error The TRITONSERVER_Error to send if the response is an
-/// error, or nullptr if the response is successful.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_ResponseSend(
-    TRITONBACKEND_Response* response, const uint32_t send_flags,
-    TRITONSERVER_Error* error);
-
-///
-/// TRITONBACKEND_State
-///
-/// Object representing a state.
-///
-
-/// Create a state in the request. The returned state object is only valid
-/// before the TRITONBACKEND_StateUpdate is called. The state should not be
-/// freed by the caller. If TRITONBACKEND_StateUpdate is not called, the
-/// lifetime of the state matches the lifetime of the request. If the state name
-/// does not exist in the "state" section of the model configuration, the state
-/// will not be created and an error will be returned. If this function is
-/// called when sequence batching is not enabled or there is no 'states' section
-/// in the sequence batching section of the model configuration, this call will
-/// return an error.
-///
-/// \param state Returns the new state.
-/// \param request The request.
-/// \param name The name of the state.
-/// \param datatype The datatype of the state.
-/// \param shape The shape of the state.
-/// \param dims_count The number of dimensions in the state shape.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_StateNew(
-    TRITONBACKEND_State** state, TRITONBACKEND_Request* request,
-    const char* name, const TRITONSERVER_DataType datatype,
-    const int64_t* shape, const uint32_t dims_count);
-
-/// Update the state for the sequence. Calling this function will replace the
-/// state stored for this seqeunce in Triton with 'state' provided in the
-/// function argument. If this function is called when sequence batching is not
-/// enabled or there is no 'states' section in the sequence batching section of
-/// the model configuration, this call will return an error. The backend is not
-/// required to call this function. If the backend doesn't call
-/// TRITONBACKEND_StateUpdate function, this particular state for the sequence
-/// will not be updated and the next inference request in the sequence will use
-/// the same state as the current inference request.
-///
-/// \param state The state.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_StateUpdate(
-    TRITONBACKEND_State* state);
-
-/// Get a buffer to use to hold the tensor data for the state. The returned
-/// buffer is owned by the state and so should not be freed by the caller. The
-/// caller can and should fill the buffer with the state data. The buffer must
-/// not be accessed by the backend after TRITONBACKEND_StateUpdate is called.
-/// The caller should fill the buffer before calling TRITONBACKEND_StateUpdate.
-///
-/// \param state The state.
-/// \param buffer Returns a pointer to a buffer where the contents of the state
-/// should be placed.
-/// \param buffer_byte_size The size, in bytes, of the buffer required
-/// by the caller.
-/// \param memory_type Acts as both input and output. On input gives
-/// the buffer memory type preferred by the caller.  Returns the
-/// actual memory type of 'buffer'.
-/// \param memory_type_id Acts as both input and output. On input
-/// gives the buffer memory type id preferred by the caller. Returns
-/// the actual memory type id of 'buffer'.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_StateBuffer(
-    TRITONBACKEND_State* state, void** buffer, const uint64_t buffer_byte_size,
-    TRITONSERVER_MemoryType* memory_type, int64_t* memory_type_id);
-
-/// Get the buffer attributes associated with the given state buffer.
-/// The returned 'buffer_attributes' is owned by the state and so should not be
-/// modified or freed by the caller. The lifetime of the 'buffer_attributes'
-/// matches that of the state.
-///
-/// \param state The state.
-/// \param buffer_attributes Returns the buffer attributes for the given state.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_StateBufferAttributes(
-    TRITONBACKEND_State* state,
-    TRITONSERVER_BufferAttributes** buffer_attributes);
-
-///
-/// TRITONBACKEND_Backend
-///
-/// Object representing a backend.
-///
-
-/// TRITONBACKEND_ExecutionPolicy
-///
-/// Types of execution policy that can be implemented by a backend.
-///
-///   TRITONBACKEND_EXECUTION_BLOCKING: An instance of the model
-///     blocks in TRITONBACKEND_ModelInstanceExecute until it is ready
-///     to handle another inference. Upon returning from
-///     TRITONBACKEND_ModelInstanceExecute, Triton may immediately
-///     call TRITONBACKEND_ModelInstanceExecute for the same instance
-///     to execute a new batch of requests. Thus, most backends using
-///     this policy will not return from
-///     TRITONBACKEND_ModelInstanceExecute until all responses have
-///     been sent and all requests have been released. This is the
-///     default execution policy.
-///
-///   TRITONBACKEND_EXECUTION_DEVICE_BLOCKING: An instance, A, of the
-///     model blocks in TRITONBACKEND_ModelInstanceExecute if the
-///     device associated with the instance is unable to handle
-///     another inference. Even if another instance, B, associated
-///     with the device, is available and ready to perform an
-///     inference, Triton will not invoke
-///     TRITONBACKEND_ModeInstanceExecute for B until A returns from
-///     TRITONBACKEND_ModelInstanceExecute. Triton will not be blocked
-///     from calling TRITONBACKEND_ModelInstanceExecute for instance
-///     C, which is associated with a different device than A and B,
-///     even if A or B has not returned from
-///     TRITONBACKEND_ModelInstanceExecute. This execution policy is
-///     typically used by a backend that can cooperatively execute
-///     multiple model instances on the same device.
-///
-typedef enum TRITONBACKEND_execpolicy_enum {
-  TRITONBACKEND_EXECUTION_BLOCKING,
-  TRITONBACKEND_EXECUTION_DEVICE_BLOCKING
-} TRITONBACKEND_ExecutionPolicy;
-
-/// Get the name of the backend. The caller does not own the returned
-/// string and must not modify or delete it. The lifetime of the
-/// returned string extends only as long as 'backend'.
-///
-/// \param backend The backend.
-/// \param name Returns the name of the backend.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_BackendName(
-    TRITONBACKEND_Backend* backend, const char** name);
-
-/// Get the backend configuration.  The 'backend_config' message is
-/// owned by Triton and should not be modified or freed by the caller.
-///
-/// The backend configuration, as JSON, is:
-///
-///   {
-///     "cmdline" : {
-///       "<setting>" : "<value>",
-///       ...
-///     }
-///   }
-///
-/// \param backend The backend.
-/// \param backend_config Returns the backend configuration as a message.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_BackendConfig(
-    TRITONBACKEND_Backend* backend, TRITONSERVER_Message** backend_config);
-
-/// Get the execution policy for this backend. By default the
-/// execution policy is TRITONBACKEND_EXECUTION_BLOCKING.
-///
-/// \param backend The backend.
-/// \param policy Returns the execution policy.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_BackendExecutionPolicy(
-    TRITONBACKEND_Backend* backend, TRITONBACKEND_ExecutionPolicy* policy);
-
-/// Set the execution policy for this backend. By default the
-/// execution policy is TRITONBACKEND_EXECUTION_BLOCKING. Triton reads
-/// the backend's execution policy after calling
-/// TRITONBACKEND_Initialize, so to be recognized changes to the
-/// execution policy must be made in TRITONBACKEND_Initialize.
-/// Also, note that if using sequence batcher for the model, Triton will
-/// use TRITONBACKEND_EXECUTION_BLOCKING policy irrespective of the
-/// policy specified by this setter function.
-///
-/// \param backend The backend.
-/// \param policy The execution policy.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_BackendSetExecutionPolicy(
-    TRITONBACKEND_Backend* backend, TRITONBACKEND_ExecutionPolicy policy);
-
-/// Get the location of the files that make up the backend
-/// implementation. This location contains the backend shared library
-/// and any other files located with the shared library. The
-/// 'location' communicated depends on how the backend is being
-/// communicated to Triton as indicated by 'artifact_type'.
-///
-///   TRITONBACKEND_ARTIFACT_FILESYSTEM: The backend artifacts are
-///     made available to Triton via the local filesytem. 'location'
-///     returns the full path to the directory containing this
-///     backend's artifacts. The returned string is owned by Triton,
-///     not the caller, and so should not be modified or freed.
-///
-/// \param backend The backend.
-/// \param artifact_type Returns the artifact type for the backend.
-/// \param path Returns the location.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_BackendArtifacts(
-    TRITONBACKEND_Backend* backend, TRITONBACKEND_ArtifactType* artifact_type,
-    const char** location);
-
-/// Get the memory manager associated with a backend.
-///
-/// \param backend The backend.
-/// \param manager Returns the memory manager.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_BackendMemoryManager(
-    TRITONBACKEND_Backend* backend, TRITONBACKEND_MemoryManager** manager);
-
-/// Get the user-specified state associated with the backend. The
-/// state is completely owned and managed by the backend.
-///
-/// \param backend The backend.
-/// \param state Returns the user state, or nullptr if no user state.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_BackendState(
-    TRITONBACKEND_Backend* backend, void** state);
-
-/// Set the user-specified state associated with the backend. The
-/// state is completely owned and managed by the backend.
-///
-/// \param backend The backend.
-/// \param state The user state, or nullptr if no user state.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_BackendSetState(
-    TRITONBACKEND_Backend* backend, void* state);
-
-///
-/// TRITONBACKEND_Model
-///
-/// Object representing a model implemented using the backend.
-///
-
-/// Get the name of the model. The returned string is owned by the
-/// model object, not the caller, and so should not be modified or
-/// freed.
-///
-/// \param model The model.
-/// \param name Returns the model name.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_ModelName(
-    TRITONBACKEND_Model* model, const char** name);
-
-/// Get the version of the model.
-///
-/// \param model The model.
-/// \param version Returns the model version.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_ModelVersion(
-    TRITONBACKEND_Model* model, uint64_t* version);
-
-/// Get the location of the files that make up the model. The
-/// 'location' communicated depends on how the model is being
-/// communicated to Triton as indicated by 'artifact_type'.
-///
-///   TRITONBACKEND_ARTIFACT_FILESYSTEM: The model artifacts are made
-///     available to Triton via the local filesytem. 'location'
-///     returns the full path to the directory in the model repository
-///     that contains this model's artifacts. The returned string is
-///     owned by Triton, not the caller, and so should not be modified
-///     or freed.
-///
-/// \param model The model.
-/// \param artifact_type Returns the artifact type for the model.
-/// \param path Returns the location.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_ModelRepository(
-    TRITONBACKEND_Model* model, TRITONBACKEND_ArtifactType* artifact_type,
-    const char** location);
-
-/// Get the model configuration. The caller takes ownership of the
-/// message object and must call TRITONSERVER_MessageDelete to release
-/// the object. The configuration is available via this call even
-/// before the model is loaded and so can be used in
-/// TRITONBACKEND_ModelInitialize. TRITONSERVER_ServerModelConfig
-/// returns equivalent information but is not useable until after the
-/// model loads.
-///
-/// \param model The model.
-/// \param config_version The model configuration will be returned in
-/// a format matching this version. If the configuration cannot be
-/// represented in the requested version's format then an error will
-/// be returned. Currently only version 1 is supported.
-/// \param model_config Returns the model configuration as a message.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_ModelConfig(
-    TRITONBACKEND_Model* model, const uint32_t config_version,
-    TRITONSERVER_Message** model_config);
-
-/// Whether the backend should attempt to auto-complete the model configuration.
-/// If true, the model should fill the inputs, outputs, and max batch size in
-/// the model configuration if incomplete. If the model configuration is
-/// changed,  the new configuration must be reported to Triton using
-/// TRITONBACKEND_ModelSetConfig.
-///
-/// \param model The model.
-/// \param auto_complete_config Returns whether the backend should auto-complete
-/// the model configuration.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_ModelAutoCompleteConfig(
-    TRITONBACKEND_Model* model, bool* auto_complete_config);
-
-/// Set the model configuration in Triton server. This API should only be called
-/// when the backend implements the auto-completion of model configuration
-/// and TRITONBACKEND_ModelAutoCompleteConfig returns true in
-/// auto_complete_config. Only the inputs, outputs, max batch size, and
-/// scheduling choice can be changed. A caveat being scheduling choice can only
-/// be changed if none is previously set. Any other changes to the model
-/// configuration will be ignored by Triton. This function can only be called
-/// from TRITONBACKEND_ModelInitialize, calling in any other context will result
-/// in an error being returned. Additionally, Triton server can add some of the
-/// missing fields in the provided config with this call. The backend must get
-/// the complete configuration again by using TRITONBACKEND_ModelConfig.
-/// TRITONBACKEND_ModelSetConfig does not take ownership of the message object
-/// and so the caller should call TRITONSERVER_MessageDelete to release the
-/// object once the function returns.
-///
-/// \param model The model.
-/// \param config_version The format version of the model configuration.
-/// If the configuration is not represented in the version's format
-/// then an error will be returned. Currently only version 1 is supported.
-/// \param model_config The updated model configuration as a message.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_ModelSetConfig(
-    TRITONBACKEND_Model* model, const uint32_t config_version,
-    TRITONSERVER_Message* model_config);
-
-/// Get the TRITONSERVER_Server object that this model is being served
-/// by.
-///
-/// \param model The model.
-/// \param server Returns the server.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_ModelServer(
-    TRITONBACKEND_Model* model, TRITONSERVER_Server** server);
-
-/// Get the backend used by the model.
-///
-/// \param model The model.
-/// \param model Returns the backend object.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_ModelBackend(
-    TRITONBACKEND_Model* model, TRITONBACKEND_Backend** backend);
-
-/// Get the user-specified state associated with the model. The
-/// state is completely owned and managed by the backend.
-///
-/// \param model The model.
-/// \param state Returns the user state, or nullptr if no user state.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_ModelState(
-    TRITONBACKEND_Model* model, void** state);
-
-/// Set the user-specified state associated with the model. The
-/// state is completely owned and managed by the backend.
-///
-/// \param model The model.
-/// \param state The user state, or nullptr if no user state.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_ModelSetState(
-    TRITONBACKEND_Model* model, void* state);
-
-///
-/// TRITONBACKEND_ModelInstance
-///
-/// Object representing a model instance implemented using the
-/// backend.
-///
-
-/// Get the name of the model instance. The returned string is owned by the
-/// model object, not the caller, and so should not be modified or
-/// freed.
-///
-/// \param instance The model instance.
-/// \param name Returns the instance name.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_ModelInstanceName(
-    TRITONBACKEND_ModelInstance* instance, const char** name);
-
-/// Get the kind of the model instance.
-///
-/// \param instance The model instance.
-/// \param kind Returns the instance kind.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_ModelInstanceKind(
-    TRITONBACKEND_ModelInstance* instance,
-    TRITONSERVER_InstanceGroupKind* kind);
-
-/// Get the device ID of the model instance.
-///
-/// \param instance The model instance.
-/// \param device_id Returns the instance device ID.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_ModelInstanceDeviceId(
-    TRITONBACKEND_ModelInstance* instance, int32_t* device_id);
-
-/// Get the host policy setting.  The 'host_policy' message is
-/// owned by Triton and should not be modified or freed by the caller.
-///
-/// The host policy setting, as JSON, is:
-///
-///   {
-///     "<host_policy>" : {
-///       "<setting>" : "<value>",
-///       ...
-///     }
-///   }
-///
-/// \param instance The model instance.
-/// \param host_policy Returns the host policy setting as a message.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_ModelInstanceHostPolicy(
-    TRITONBACKEND_ModelInstance* instance, TRITONSERVER_Message** host_policy);
-
-/// Whether the model instance is passive.
-///
-/// \param instance The model instance.
-/// \param is_passive Returns true if the instance is passive, false otherwise
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_ModelInstanceIsPassive(
-    TRITONBACKEND_ModelInstance* instance, bool* is_passive);
-
-/// Get the number of optimization profiles to be loaded for the instance.
-///
-/// \param instance The model instance.
-/// \param count Returns the number of optimization profiles.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_ModelInstanceProfileCount(
-    TRITONBACKEND_ModelInstance* instance, uint32_t* count);
-
-/// Get the name of optimization profile. The caller does not own
-/// the returned string and must not modify or delete it. The lifetime
-/// of the returned string extends only as long as 'instance'.
-///
-/// \param instance The model instance.
-/// \param index The index of the optimization profile. Must be 0
-/// <= index < count, where count is the value returned by
-/// TRITONBACKEND_ModelInstanceProfileCount.
-/// \param profile_name Returns the name of the optimization profile
-/// corresponding to the index.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_ModelInstanceProfileName(
-    TRITONBACKEND_ModelInstance* instance, const uint32_t index,
-    const char** profile_name);
-
-/// Get the number of secondary devices configured for the instance.
-///
-/// \param instance The model instance.
-/// \param count Returns the number of secondary devices.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_ModelInstanceSecondaryDeviceCount(
-    TRITONBACKEND_ModelInstance* instance, uint32_t* count);
-
-/// Get the properties of indexed secondary device. The returned
-/// strings and other properties are owned by the instance, not the
-/// caller, and so should not be modified or freed.
-///
-/// \param instance The model instance.
-/// \param index The index of the secondary device. Must be 0
-/// <= index < count, where count is the value returned by
-/// TRITONBACKEND_ModelInstanceSecondaryDeviceCount.
-/// \param kind Returns the kind of secondary device corresponding
-/// to the index.
-/// \param id Returns the id of secondary device corresponding to the index.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_ModelInstanceSecondaryDeviceProperties(
-    TRITONBACKEND_ModelInstance* instance, uint32_t index, const char** kind,
-    int64_t* id);
-
-/// Get the model associated with a model instance.
-///
-/// \param instance The model instance.
-/// \param backend Returns the model object.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_ModelInstanceModel(
-    TRITONBACKEND_ModelInstance* instance, TRITONBACKEND_Model** model);
-
-/// Get the user-specified state associated with the model
-/// instance. The state is completely owned and managed by the
-/// backend.
-///
-/// \param instance The model instance.
-/// \param state Returns the user state, or nullptr if no user state.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_ModelInstanceState(
-    TRITONBACKEND_ModelInstance* instance, void** state);
-
-/// Set the user-specified state associated with the model
-/// instance. The state is completely owned and managed by the
-/// backend.
-///
-/// \param instance The model instance.
-/// \param state The user state, or nullptr if no user state.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error* TRITONBACKEND_ModelInstanceSetState(
-    TRITONBACKEND_ModelInstance* instance, void* state);
-
-/// Record statistics for an inference request.
-///
-/// Set 'success' true to indicate that the inference request
-/// completed successfully. In this case all timestamps should be
-/// non-zero values reported in nanoseconds and should be collected
-/// using std::chrono::steady_clock::now().time_since_epoch() or the equivalent.
-/// Set 'success' to false to indicate that the inference request failed
-/// to complete successfully. In this case all timestamps values are
-/// ignored.
-///
-/// For consistency of measurement across different backends, the
-/// timestamps should be collected at the following points during
-/// TRITONBACKEND_ModelInstanceExecute.
-///
-///   TRITONBACKEND_ModelInstanceExecute()
-///     CAPTURE TIMESPACE (exec_start_ns)
-///     < process input tensors to prepare them for inference
-///       execution, including copying the tensors to/from GPU if
-///       necessary>
-///     CAPTURE TIMESPACE (compute_start_ns)
-///     < perform inference computations to produce outputs >
-///     CAPTURE TIMESPACE (compute_end_ns)
-///     < allocate output buffers and extract output tensors, including
-///       copying the tensors to/from GPU if necessary>
-///     CAPTURE TIMESPACE (exec_end_ns)
-///     return
-///
-/// Note that these statistics are associated with a valid
-/// TRITONBACKEND_Request object and so must be reported before the
-/// request is released. For backends that release the request before
-/// all response(s) are sent, these statistics cannot capture
-/// information about the time required to produce the response.
-///
-/// \param instance The model instance.
-/// \param request The inference request that statistics are being
-/// reported for.
-/// \param success True if the inference request completed
-/// successfully, false if it failed to complete.
-/// \param exec_start_ns Timestamp for the start of execution.
-/// \param compute_start_ns Timestamp for the start of execution
-/// computations.
-/// \param compute_end_ns Timestamp for the end of execution
-/// computations.
-/// \param exec_end_ns Timestamp for the end of execution.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_ModelInstanceReportStatistics(
-    TRITONBACKEND_ModelInstance* instance, TRITONBACKEND_Request* request,
-    const bool success, const uint64_t exec_start_ns,
-    const uint64_t compute_start_ns, const uint64_t compute_end_ns,
-    const uint64_t exec_end_ns);
-
-/// Record statistics for the execution of an entire batch of
-/// inference requests.
-///
-/// All timestamps should be non-zero values reported in nanoseconds
-/// and should be collected using
-/// std::chrono::steady_clock::now().time_since_epoch() or the equivalent.
-/// See TRITONBACKEND_ModelInstanceReportStatistics for more information about
-/// the timestamps.
-///
-/// 'batch_size' is the sum of the batch sizes for the individual
-/// requests that were delivered together in the call to
-/// TRITONBACKEND_ModelInstanceExecute. For example, if three requests
-/// are passed to TRITONBACKEND_ModelInstanceExecute and those
-/// requests have batch size 1, 2, and 3; then 'batch_size' should be
-/// set to 6.
-///
-/// \param instance The model instance.
-/// \param batch_size Combined batch size of all the individual
-/// requests executed in the batch.
-/// \param exec_start_ns Timestamp for the start of execution.
-/// \param compute_start_ns Timestamp for the start of execution
-/// computations.
-/// \param compute_end_ns Timestamp for the end of execution
-/// computations.
-/// \param exec_end_ns Timestamp for the end of execution.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_ModelInstanceReportBatchStatistics(
-    TRITONBACKEND_ModelInstance* instance, const uint64_t batch_size,
-    const uint64_t exec_start_ns, const uint64_t compute_start_ns,
-    const uint64_t compute_end_ns, const uint64_t exec_end_ns);
-
-///
-/// The following functions can be implemented by a backend. Functions
-/// indicated as required must be implemented or the backend will fail
-/// to load.
-///
-
-/// Initialize a backend. This function is optional, a backend is not
-/// required to implement it. This function is called once when a
-/// backend is loaded to allow the backend to initialize any state
-/// associated with the backend. A backend has a single state that is
-/// shared across all models that use the backend.
-///
-/// \param backend The backend.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_ISPEC TRITONSERVER_Error* TRITONBACKEND_Initialize(
-    TRITONBACKEND_Backend* backend);
-
-/// Finalize for a backend. This function is optional, a backend is
-/// not required to implement it. This function is called once, just
-/// before the backend is unloaded. All state associated with the
-/// backend should be freed and any threads created for the backend
-/// should be exited/joined before returning from this function.
-///
-/// \param backend The backend.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_ISPEC TRITONSERVER_Error* TRITONBACKEND_Finalize(
-    TRITONBACKEND_Backend* backend);
-
-/// Initialize for a model. This function is optional, a backend is
-/// not required to implement it. This function is called once when a
-/// model that uses the backend is loaded to allow the backend to
-/// initialize any state associated with the model. The backend should
-/// also examine the model configuration to determine if the
-/// configuration is suitable for the backend. Any errors reported by
-/// this function will prevent the model from loading.
-///
-/// \param model The model.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_ISPEC TRITONSERVER_Error* TRITONBACKEND_ModelInitialize(
-    TRITONBACKEND_Model* model);
-
-/// Finalize for a model. This function is optional, a backend is not
-/// required to implement it. This function is called once for a
-/// model, just before the model is unloaded from Triton. All state
-/// associated with the model should be freed and any threads created
-/// for the model should be exited/joined before returning from this
-/// function.
-///
-/// \param model The model.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_ISPEC TRITONSERVER_Error* TRITONBACKEND_ModelFinalize(
-    TRITONBACKEND_Model* model);
-
-/// Initialize for a model instance. This function is optional, a
-/// backend is not required to implement it. This function is called
-/// once when a model instance is created to allow the backend to
-/// initialize any state associated with the instance.
-///
-/// \param instance The model instance.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_ISPEC TRITONSERVER_Error* TRITONBACKEND_ModelInstanceInitialize(
-    TRITONBACKEND_ModelInstance* instance);
-
-/// Finalize for a model instance. This function is optional, a
-/// backend is not required to implement it. This function is called
-/// once for an instance, just before the corresponding model is
-/// unloaded from Triton. All state associated with the instance
-/// should be freed and any threads created for the instance should be
-/// exited/joined before returning from this function.
-///
-/// \param instance The model instance.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_ISPEC TRITONSERVER_Error* TRITONBACKEND_ModelInstanceFinalize(
-    TRITONBACKEND_ModelInstance* instance);
-
-/// Execute a batch of one or more requests on a model instance. This
-/// function is required. Triton will not perform multiple
-/// simultaneous calls to this function for a given model 'instance';
-/// however, there may be simultaneous calls for different model
-/// instances (for the same or different models).
-///
-/// If an error is returned the ownership of the request objects
-/// remains with Triton and the backend must not retain references to
-/// the request objects or access them in any way.
-///
-/// If success is returned, ownership of the request objects is
-/// transferred to the backend and it is then responsible for creating
-/// responses and releasing the request objects. Note that even though
-/// ownership of the request objects is transferred to the backend, the
-/// ownership of the buffer holding request pointers is returned back
-/// to Triton upon return from TRITONBACKEND_ModelInstanceExecute. If
-/// any request objects need to be maintained beyond
-/// TRITONBACKEND_ModelInstanceExecute, then the pointers must be copied
-/// out of the array within TRITONBACKEND_ModelInstanceExecute.
-///
-/// \param instance The model instance.
-/// \param requests The requests.
-/// \param request_count The number of requests in the batch.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_ISPEC TRITONSERVER_Error* TRITONBACKEND_ModelInstanceExecute(
-    TRITONBACKEND_ModelInstance* instance, TRITONBACKEND_Request** requests,
-    const uint32_t request_count);
-
-/// Query the backend for different model attributes. This function is optional,
-/// a backend is not required to implement it. The backend is also not required
-/// to set all backend attribute listed. This function is called when
-/// Triton requires further backend / model information to perform operations.
-/// This function may be called multiple times within the lifetime of the
-/// backend (between TRITONBACKEND_Initialize and TRITONBACKEND_Finalize).
-/// The backend may return error to indicate failure to set the backend
-/// attributes, and the attributes specified in the same function call will be
-/// ignored. Triton will update the specified attributes if 'nullptr' is
-/// returned.
-///
-/// \param backend The backend.
-/// \param backend_attributes Return the backend attribute.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONBACKEND_ISPEC TRITONSERVER_Error* TRITONBACKEND_GetBackendAttribute(
-    TRITONBACKEND_Backend* backend,
-    TRITONBACKEND_BackendAttribute* backend_attributes);
-
-/// TRITONBACKEND_BackendAttribute
-///
-/// API to modify attributes associated with a backend.
-///
-
-/// Add the preferred instance group of the backend. This function
-/// can be called multiple times to cover different instance group kinds that
-/// the backend supports, given the priority order that the first call describes
-/// the most preferred group. In the case where instance group are not
-/// explicitly provided, Triton will use this attribute to create model
-/// deployment that aligns more with the backend preference.
-///
-/// \param backend_attributes The backend attributes object.
-/// \param kind The kind of the instance group.
-/// \param count The number of instances per device. Triton default will be used
-/// if 0 is provided.
-/// \param device_ids The devices where instances should be available. Triton
-/// default will be used if 'nullptr' is provided.
-/// \param id_count The number of devices in 'device_ids'.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_BackendAttributeAddPreferredInstanceGroup(
-    TRITONBACKEND_BackendAttribute* backend_attributes,
-    const TRITONSERVER_InstanceGroupKind kind, const uint64_t count,
-    const uint64_t* device_ids, const uint64_t id_count);
-
-#ifdef __cplusplus
-}
-#endif
diff --git a/3rdparty/core-r22.12/include/triton/core/tritonrepoagent.h b/3rdparty/core-r22.12/include/triton/core/tritonrepoagent.h
deleted file mode 100644
index 078ec6219c8ac05dfec5baa44312003a82bbcdd6..0000000000000000000000000000000000000000
--- a/3rdparty/core-r22.12/include/triton/core/tritonrepoagent.h
+++ /dev/null
@@ -1,417 +0,0 @@
-// Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include <stddef.h>
-#include <stdint.h>
-#include "triton/core/tritonserver.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#ifdef _COMPILING_TRITONREPOAGENT
-#if defined(_MSC_VER)
-#define TRITONREPOAGENT_DECLSPEC __declspec(dllexport)
-#define TRITONREPOAGENT_ISPEC __declspec(dllimport)
-#elif defined(__GNUC__)
-#define TRITONREPOAGENT_DECLSPEC __attribute__((__visibility__("default")))
-#define TRITONREPOAGENT_ISPEC
-#else
-#define TRITONREPOAGENT_DECLSPEC
-#define TRITONREPOAGENT_ISPEC
-#endif
-#else
-#if defined(_MSC_VER)
-#define TRITONREPOAGENT_DECLSPEC __declspec(dllimport)
-#define TRITONREPOAGENT_ISPEC __declspec(dllexport)
-#else
-#define TRITONREPOAGENT_DECLSPEC
-#define TRITONREPOAGENT_ISPEC
-#endif
-#endif
-
-struct TRITONREPOAGENT_Agent;
-struct TRITONREPOAGENT_AgentModel;
-
-///
-/// TRITONREPOAGENT API Version
-///
-/// The TRITONREPOAGENT API is versioned with major and minor version
-/// numbers. Any change to the API that does not impact backwards
-/// compatibility (for example, adding a non-required function)
-/// increases the minor version number. Any change that breaks
-/// backwards compatibility (for example, deleting or changing the
-/// behavior of a function) increases the major version number. A
-/// repository agent should check that the API version used to compile
-/// the agent is compatible with the API version of the Triton server
-/// that it is running in. This is typically done by code similar to
-/// the following which makes sure that the major versions are equal
-/// and that the minor version of Triton is >= the minor version used
-/// to build the agent.
-///
-///   uint32_t api_version_major, api_version_minor;
-///   TRITONREPOAGENT_ApiVersion(&api_version_major, &api_version_minor);
-///   if ((api_version_major != TRITONREPOAGENT_API_VERSION_MAJOR) ||
-///       (api_version_minor < TRITONREPOAGENT_API_VERSION_MINOR)) {
-///     return TRITONSERVER_ErrorNew(
-///       TRITONSERVER_ERROR_UNSUPPORTED,
-///       "triton repository agent API version does not support this agent");
-///   }
-///
-#define TRITONREPOAGENT_API_VERSION_MAJOR 0
-#define TRITONREPOAGENT_API_VERSION_MINOR 1
-
-/// Get the TRITONREPOAGENT API version supported by Triton. This
-/// value can be compared against the
-/// TRITONREPOAGENT_API_VERSION_MAJOR and
-/// TRITONREPOAGENT_API_VERSION_MINOR used to build the agent to
-/// ensure that Triton is compatible with the agent.
-///
-/// \param major Returns the TRITONREPOAGENT API major version supported
-/// by Triton.
-/// \param minor Returns the TRITONREPOAGENT API minor version supported
-/// by Triton.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONREPOAGENT_DECLSPEC TRITONSERVER_Error* TRITONREPOAGENT_ApiVersion(
-    uint32_t* major, uint32_t* minor);
-
-/// TRITONREPOAGENT_ArtifactType
-///
-/// The ways that the files that make up a model's repository content
-/// are communicated between Triton and the agent.
-///
-///   TRITONREPOAGENT_ARTIFACT_FILESYSTEM: The model artifacts are
-///     communicated to and from the repository agent via a locally
-///     accessible filesystem. The agent can access these files using
-///     an appropriate filesystem API.
-///
-///   TRITONREPOAGENT_ARTIFACT_REMOTE_FILESYSTEM: The model artifacts are
-///     communicated to and from the repository agent via a remote filesystem.
-///     The remote filesystem path follows the same convention as is used for
-///     repository paths, for example, "s3://" prefix indicates an S3 path.
-///
-typedef enum TRITONREPOAGENT_artifacttype_enum {
-  TRITONREPOAGENT_ARTIFACT_FILESYSTEM,
-  TRITONREPOAGENT_ARTIFACT_REMOTE_FILESYSTEM
-} TRITONREPOAGENT_ArtifactType;
-
-/// TRITONREPOAGENT_ActionType
-///
-/// Types of repository actions that can be handled by an agent.
-/// The lifecycle of a TRITONREPOAGENT_AgentModel begins with a call to
-/// TRITONREPOAGENT_ModelInitialize and ends with a call to
-/// TRITONREPOAGENT_ModelFinalize. Between those calls the current lifecycle
-/// state of the model is communicated by calls to TRITONREPOAGENT_ModelAction.
-/// Possible lifecycles are:
-///
-/// LOAD -> LOAD_COMPLETE -> UNLOAD -> UNLOAD_COMPLETE
-/// LOAD -> LOAD_FAIL
-///
-///   TRITONREPOAGENT_ACTION_LOAD: A model is being loaded.
-///
-///   TRITONREPOAGENT_ACTION_LOAD_COMPLETE: The model load completed
-///     successfully and the model is now loaded.
-///
-///   TRITONREPOAGENT_ACTION_LOAD_FAIL: The model load did not complete
-///     successfully. The model is not loaded.
-///
-///   TRITONREPOAGENT_ACTION_UNLOAD: The model is being unloaded.
-///
-///   TRITONREPOAGENT_ACTION_UNLOAD_COMPLETE: The model unload is complete.
-///
-typedef enum TRITONREPOAGENT_actiontype_enum {
-  TRITONREPOAGENT_ACTION_LOAD,
-  TRITONREPOAGENT_ACTION_LOAD_COMPLETE,
-  TRITONREPOAGENT_ACTION_LOAD_FAIL,
-  TRITONREPOAGENT_ACTION_UNLOAD,
-  TRITONREPOAGENT_ACTION_UNLOAD_COMPLETE
-} TRITONREPOAGENT_ActionType;
-
-/// Get the location of the files that make up the model. The
-/// 'location' communicated depends on how the model is being
-/// communicated to the agent as indicated by 'artifact_type'.
-///
-///   TRITONREPOAGENT_ARTIFACT_FILESYSTEM: The model artifacts are
-///     made available to the agent via the local
-///     filesytem. 'location' returns the full path to the directory
-///     in the model repository that contains the model's
-///     artifacts. The returned location string is owned by Triton,
-///     not the caller, and so should not be modified or freed. The
-///     contents of the directory are owned by Triton, not the agent,
-///     and so the agent should not delete or modify the contents. Use
-///     TRITONREPOAGENT_RepositoryAcquire to get a location that can be
-///     used to modify the model repository contents.
-///
-///   TRITONREPOAGENT_ARTIFACT_REMOTE_FILESYSTEM: The model artifacts are
-///     made available to the agent via a remote filesystem.
-///     'location' returns the full path to the remote directory that contains
-///     the model's artifacts. The returned location string is owned by Triton,
-///     not the caller, and so should not be modified or freed. The contents of
-///     the remote directory are owned by Triton, not the agent,
-///     and so the agent should not delete or modify the contents.
-///     Use TRITONREPOAGENT_ModelRepositoryLocationAcquire to get a location
-///     that can be used to write updated model repository contents.
-///
-/// \param agent The agent.
-/// \param model The model.
-/// \param artifact_type Returns the artifact type for the location.
-/// \param path Returns the location.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONREPOAGENT_DECLSPEC TRITONSERVER_Error*
-TRITONREPOAGENT_ModelRepositoryLocation(
-    TRITONREPOAGENT_Agent* agent, TRITONREPOAGENT_AgentModel* model,
-    TRITONREPOAGENT_ArtifactType* artifact_type, const char** location);
-
-/// Acquire a location where the agent can produce a new version of
-/// the model repository files. This is a convenience method to create
-/// a temporary directory for the agent. The agent is responsible for
-/// calling TRITONREPOAGENT_ModelRepositoryLocationDelete in
-/// TRITONREPOAGENT_ModelFinalize to delete the location. Initially the
-/// acquired location is empty. The 'location' communicated depends on
-/// the requested 'artifact_type'.
-///
-///   TRITONREPOAGENT_ARTIFACT_FILESYSTEM: The location is a directory
-///     on the local filesystem. 'location' returns the full path to
-///     an empty directory that the agent should populate with the
-///     model's artifacts. The returned location string is owned by
-///     Triton, not the agent, and so should not be modified or freed.
-///
-/// \param agent The agent.
-/// \param model The model.
-/// \param artifact_type The artifact type for the location.
-/// \param path Returns the location.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONREPOAGENT_DECLSPEC TRITONSERVER_Error*
-TRITONREPOAGENT_ModelRepositoryLocationAcquire(
-    TRITONREPOAGENT_Agent* agent, TRITONREPOAGENT_AgentModel* model,
-    const TRITONREPOAGENT_ArtifactType artifact_type, const char** location);
-
-/// Discard and release ownership of a previously acquired location
-/// and its contents. The agent must not access or modify the location
-/// or its contents after this call.
-///
-/// \param agent The agent.
-/// \param model The model.
-/// \param path The location to release.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONREPOAGENT_DECLSPEC TRITONSERVER_Error*
-TRITONREPOAGENT_ModelRepositoryLocationRelease(
-    TRITONREPOAGENT_Agent* agent, TRITONREPOAGENT_AgentModel* model,
-    const char* location);
-
-/// Inform Triton that the specified repository location should be used for
-/// the model in place of the original model repository. This method can only be
-/// called when TRITONREPOAGENT_ModelAction is invoked with
-/// TRITONREPOAGENT_ACTION_LOAD. The 'location' The 'location'
-/// communicated depends on how the repository is being
-/// communicated to Triton as indicated by 'artifact_type'.
-///
-///   TRITONREPOAGENT_ARTIFACT_FILESYSTEM: The model artifacts are
-///     made available to Triton via the local filesytem. 'location' returns
-///     the full path to the directory. Ownership of the contents of the
-///     returned directory are transferred to Triton and the agent should not
-///     modified or freed the contents until TRITONREPOAGENT_ModelFinalize.
-///     The local filesystem directory can be created using
-///     TRITONREPOAGENT_ModelReopsitroyLocationAcquire or the agent can use
-///     its own local filesystem API.
-///
-///   TRITONREPOAGENT_ARTIFACT_REMOTE_FILESYSTEM: The model artifacts are
-///     made available to Triton via a remote filesystem. 'location' returns
-///     the full path to the remote filesystem directory. Ownership of the
-///     contents of the returned directory are transferred to Triton and
-///     the agent should not modified or freed the contents until
-///     TRITONREPOAGENT_ModelFinalize.
-///
-/// \param agent The agent.
-/// \param model The model.
-/// \param artifact_type The artifact type for the location.
-/// \param path Returns the location.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONREPOAGENT_DECLSPEC TRITONSERVER_Error*
-TRITONREPOAGENT_ModelRepositoryUpdate(
-    TRITONREPOAGENT_Agent* agent, TRITONREPOAGENT_AgentModel* model,
-    const TRITONREPOAGENT_ArtifactType artifact_type, const char* location);
-
-/// Get the number of agent parameters defined for a model.
-///
-/// \param agent The agent.
-/// \param model The model.
-/// \param count Returns the number of input tensors.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONREPOAGENT_DECLSPEC TRITONSERVER_Error*
-TRITONREPOAGENT_ModelParameterCount(
-    TRITONREPOAGENT_Agent* agent, TRITONREPOAGENT_AgentModel* model,
-    uint32_t* count);
-
-/// Get a parameter name and value. The caller does not own the
-/// returned strings and must not modify or delete them.
-///
-/// \param agent The agent.
-/// \param model The model.
-/// \param index The index of the parameter. Must be 0 <= index <
-/// count, where count is the value returned by
-/// TRITONREPOAGENT_ModelParameterCount.
-/// \param parameter_name Returns the name of the parameter.
-/// \param parameter_value Returns the value of the parameter.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONREPOAGENT_DECLSPEC TRITONSERVER_Error* TRITONREPOAGENT_ModelParameter(
-    TRITONREPOAGENT_Agent* agent, TRITONREPOAGENT_AgentModel* model,
-    const uint32_t index, const char** parameter_name,
-    const char** parameter_value);
-
-/// Get the model configuration. The caller takes ownership of the
-/// message object and must call TRITONSERVER_MessageDelete to release
-/// the object. If the model repository does not contain a
-/// config.pbtxt file then 'model_config' is returned as nullptr.
-///
-/// \param agent The agent.
-/// \param model The model.
-/// \param config_version The model configuration will be returned in
-/// a format matching this version. If the configuration cannot be
-/// represented in the requested version's format then an error will
-/// be returned. Currently only version 1 is supported.
-/// \param model_config Returns the model configuration as a message.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONREPOAGENT_DECLSPEC TRITONSERVER_Error* TRITONREPOAGENT_ModelConfig(
-    TRITONREPOAGENT_Agent* agent, TRITONREPOAGENT_AgentModel* model,
-    const uint32_t config_version, TRITONSERVER_Message** model_config);
-
-/// Get the user-specified state associated with the model.
-///
-/// \param model The agent model.
-/// \param state Returns the user state, or nullptr if no user state.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONREPOAGENT_DECLSPEC TRITONSERVER_Error* TRITONREPOAGENT_ModelState(
-    TRITONREPOAGENT_AgentModel* model, void** state);
-
-/// Set the user-specified state associated with the model.
-///
-/// \param model The agent model.
-/// \param state The user state, or nullptr if no user state.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONREPOAGENT_DECLSPEC TRITONSERVER_Error* TRITONREPOAGENT_ModelSetState(
-    TRITONREPOAGENT_AgentModel* model, void* state);
-
-/// Get the user-specified state associated with the agent.
-///
-/// \param agent The agent.
-/// \param state Returns the user state, or nullptr if no user state.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONREPOAGENT_DECLSPEC TRITONSERVER_Error* TRITONREPOAGENT_State(
-    TRITONREPOAGENT_Agent* agent, void** state);
-
-/// Set the user-specified state associated with the agent.
-///
-/// \param agent The agent.
-/// \param state The user state, or nullptr if no user state.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONREPOAGENT_DECLSPEC TRITONSERVER_Error* TRITONREPOAGENT_SetState(
-    TRITONREPOAGENT_Agent* agent, void* state);
-
-///
-/// The following functions can be implemented by an agent. Functions
-/// indicated as required must be implemented or the agent will fail
-/// to load.
-///
-
-/// Initialize an agent. This function is optional. This function is
-/// called once when an agent is loaded to allow the agent to
-/// initialize any state associated with the agent. An agent has a
-/// single state that is shared across all invocations of the agent.
-///
-/// \param agent The agent.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONREPOAGENT_ISPEC TRITONSERVER_Error* TRITONREPOAGENT_Initialize(
-    TRITONREPOAGENT_Agent* agent);
-
-/// Finalize for an agent. This function is optional. This function is
-/// called once, just before the agent is unloaded. All state
-/// associated with the agent should be freed and any threads created
-/// for the agent should be exited/joined before returning from this
-/// function.
-///
-/// \param agent The agent.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONREPOAGENT_ISPEC TRITONSERVER_Error* TRITONREPOAGENT_Finalize(
-    TRITONREPOAGENT_Agent* agent);
-
-/// Initialize a model associated with an agent. This function is optional.
-/// This function is called once when an agent model's lifecycle begins to allow
-/// the agent model to initialize any state associated with it. An agent model
-/// has a single state that is shared across all the lifecycle of the agent
-/// model.
-///
-/// \param agent The agent to be associated with the model.
-/// \param model The model.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONREPOAGENT_ISPEC TRITONSERVER_Error* TRITONREPOAGENT_ModelInitialize(
-    TRITONREPOAGENT_Agent* agent, TRITONREPOAGENT_AgentModel* model);
-
-/// Finalize for a model. This function is optional. This function is
-/// called once, just before the end of the agent model's lifecycle. All state
-/// associated with the agent model should be freed and any threads created
-/// for the agent model should be exited/joined before returning from this
-/// function. If the model acquired a model location using
-/// TRITONREPOAGENT_ModelRepositoryLocationAcquire, it must call
-/// TRITONREPOAGENT_ModelRepositoryLocationRelease to release that location.
-///
-/// \param agent The agent associated with the model.
-/// \param model The model.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONREPOAGENT_ISPEC TRITONSERVER_Error* TRITONREPOAGENT_ModelFinalize(
-    TRITONREPOAGENT_Agent* agent, TRITONREPOAGENT_AgentModel* model);
-
-/// Handle an action for a specified model. This function is
-/// required. Triton will not perform multiple simultaneous calls to
-/// this function for a given agent and model; however, there may be
-/// simultaneous calls for the agent for different models.
-///
-/// If the agent does not handle the action the agent should
-/// immediately return success (nullptr).
-///
-/// Any modification to the model's repository must be made when 'action_type'
-/// is TRITONREPOAGENT_ACTION_LOAD.
-/// To modify the model's repository the agent must either acquire a mutable
-/// location via TRITONREPOAGENT_ModelRepositoryLocationAcquire
-/// or its own managed location, report the location to Triton via
-/// TRITONREPOAGENT_ModelRepositoryUpdate, and then return
-/// success (nullptr). If the agent does not need to make any changes
-/// to the model repository it should not call
-/// TRITONREPOAGENT_ModelRepositoryUpdate and then return success.
-/// To indicate that a model load should fail return a non-success status.
-///
-/// \param agent The agent.
-/// \param model The model that is the target of the action.
-/// \action_type The type of action the agent should handle for the model.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONREPOAGENT_ISPEC TRITONSERVER_Error* TRITONREPOAGENT_ModelAction(
-    TRITONREPOAGENT_Agent* agent, TRITONREPOAGENT_AgentModel* model,
-    const TRITONREPOAGENT_ActionType action_type);
-
-#ifdef __cplusplus
-}
-#endif
diff --git a/3rdparty/core-r22.12/include/triton/core/tritonserver.h b/3rdparty/core-r22.12/include/triton/core/tritonserver.h
deleted file mode 100644
index 6edd5f1809116166215e4b1702b12dfba7f19de4..0000000000000000000000000000000000000000
--- a/3rdparty/core-r22.12/include/triton/core/tritonserver.h
+++ /dev/null
@@ -1,2360 +0,0 @@
-// Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-/// \file
-
-#include <stdbool.h>
-#include <stddef.h>
-#include <stdint.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#ifdef _COMPILING_TRITONSERVER
-#if defined(_MSC_VER)
-#define TRITONSERVER_DECLSPEC __declspec(dllexport)
-#elif defined(__GNUC__)
-#define TRITONSERVER_DECLSPEC __attribute__((__visibility__("default")))
-#else
-#define TRITONSERVER_DECLSPEC
-#endif
-#else
-#if defined(_MSC_VER)
-#define TRITONSERVER_DECLSPEC __declspec(dllimport)
-#else
-#define TRITONSERVER_DECLSPEC
-#endif
-#endif
-
-struct TRITONSERVER_BufferAttributes;
-struct TRITONSERVER_Error;
-struct TRITONSERVER_InferenceRequest;
-struct TRITONSERVER_InferenceResponse;
-struct TRITONSERVER_InferenceTrace;
-struct TRITONSERVER_Message;
-struct TRITONSERVER_Metrics;
-struct TRITONSERVER_Parameter;
-struct TRITONSERVER_ResponseAllocator;
-struct TRITONSERVER_Server;
-struct TRITONSERVER_ServerOptions;
-struct TRITONSERVER_Metric;
-struct TRITONSERVER_MetricFamily;
-
-///
-/// TRITONSERVER API Version
-///
-/// The TRITONSERVER API is versioned with major and minor version
-/// numbers. Any change to the API that does not impact backwards
-/// compatibility (for example, adding a non-required function)
-/// increases the minor version number. Any change that breaks
-/// backwards compatibility (for example, deleting or changing the
-/// behavior of a function) increases the major version number. A
-/// client should check that the API version used to compile the
-/// client is compatible with the API version of the Triton shared
-/// library that it is linking against. This is typically done by code
-/// similar to the following which makes sure that the major versions
-/// are equal and that the minor version of the Triton shared library
-/// is >= the minor version used to build the client.
-///
-///   uint32_t api_version_major, api_version_minor;
-///   TRITONSERVER_ApiVersion(&api_version_major, &api_version_minor);
-///   if ((api_version_major != TRITONSERVER_API_VERSION_MAJOR) ||
-///       (api_version_minor < TRITONSERVER_API_VERSION_MINOR)) {
-///     return TRITONSERVER_ErrorNew(
-///       TRITONSERVER_ERROR_UNSUPPORTED,
-///       "triton server API version does not support this client");
-///   }
-///
-#define TRITONSERVER_API_VERSION_MAJOR 1
-#define TRITONSERVER_API_VERSION_MINOR 17
-
-/// Get the TRITONBACKEND API version supported by the Triton shared
-/// library. This value can be compared against the
-/// TRITONSERVER_API_VERSION_MAJOR and TRITONSERVER_API_VERSION_MINOR
-/// used to build the client to ensure that Triton shared library is
-/// compatible with the client.
-///
-/// \param major Returns the TRITONSERVER API major version supported
-/// by Triton.
-/// \param minor Returns the TRITONSERVER API minor version supported
-/// by Triton.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ApiVersion(
-    uint32_t* major, uint32_t* minor);
-
-/// TRITONSERVER_DataType
-///
-/// Tensor data types recognized by TRITONSERVER.
-///
-typedef enum TRITONSERVER_datatype_enum {
-  TRITONSERVER_TYPE_INVALID,
-  TRITONSERVER_TYPE_BOOL,
-  TRITONSERVER_TYPE_UINT8,
-  TRITONSERVER_TYPE_UINT16,
-  TRITONSERVER_TYPE_UINT32,
-  TRITONSERVER_TYPE_UINT64,
-  TRITONSERVER_TYPE_INT8,
-  TRITONSERVER_TYPE_INT16,
-  TRITONSERVER_TYPE_INT32,
-  TRITONSERVER_TYPE_INT64,
-  TRITONSERVER_TYPE_FP16,
-  TRITONSERVER_TYPE_FP32,
-  TRITONSERVER_TYPE_FP64,
-  TRITONSERVER_TYPE_BYTES,
-  TRITONSERVER_TYPE_BF16
-} TRITONSERVER_DataType;
-
-/// Get the string representation of a data type. The returned string
-/// is not owned by the caller and so should not be modified or freed.
-///
-/// \param datatype The data type.
-/// \return The string representation of the data type.
-TRITONSERVER_DECLSPEC const char* TRITONSERVER_DataTypeString(
-    TRITONSERVER_DataType datatype);
-
-/// Get the Triton datatype corresponding to a string representation
-/// of a datatype.
-///
-/// \param dtype The datatype string representation.
-/// \return The Triton data type or TRITONSERVER_TYPE_INVALID if the
-/// string does not represent a data type.
-TRITONSERVER_DECLSPEC TRITONSERVER_DataType
-TRITONSERVER_StringToDataType(const char* dtype);
-
-/// Get the size of a Triton datatype in bytes. Zero is returned for
-/// TRITONSERVER_TYPE_BYTES because it have variable size. Zero is
-/// returned for TRITONSERVER_TYPE_INVALID.
-///
-/// \param dtype The datatype.
-/// \return The size of the datatype.
-TRITONSERVER_DECLSPEC uint32_t
-TRITONSERVER_DataTypeByteSize(TRITONSERVER_DataType datatype);
-
-/// TRITONSERVER_MemoryType
-///
-/// Types of memory recognized by TRITONSERVER.
-///
-typedef enum TRITONSERVER_memorytype_enum {
-  TRITONSERVER_MEMORY_CPU,
-  TRITONSERVER_MEMORY_CPU_PINNED,
-  TRITONSERVER_MEMORY_GPU
-} TRITONSERVER_MemoryType;
-
-/// Get the string representation of a memory type. The returned
-/// string is not owned by the caller and so should not be modified or
-/// freed.
-///
-/// \param memtype The memory type.
-/// \return The string representation of the memory type.
-TRITONSERVER_DECLSPEC const char* TRITONSERVER_MemoryTypeString(
-    TRITONSERVER_MemoryType memtype);
-
-/// TRITONSERVER_ParameterType
-///
-/// Types of parameters recognized by TRITONSERVER.
-///
-typedef enum TRITONSERVER_parametertype_enum {
-  TRITONSERVER_PARAMETER_STRING,
-  TRITONSERVER_PARAMETER_INT,
-  TRITONSERVER_PARAMETER_BOOL,
-  TRITONSERVER_PARAMETER_BYTES
-} TRITONSERVER_ParameterType;
-
-/// Get the string representation of a parameter type. The returned
-/// string is not owned by the caller and so should not be modified or
-/// freed.
-///
-/// \param paramtype The parameter type.
-/// \return The string representation of the parameter type.
-TRITONSERVER_DECLSPEC const char* TRITONSERVER_ParameterTypeString(
-    TRITONSERVER_ParameterType paramtype);
-
-/// Create a new parameter object. The caller takes ownership of the
-/// TRITONSERVER_Parameter object and must call TRITONSERVER_ParameterDelete to
-/// release the object. The object will maintain its own copy of the 'value'
-///
-/// \param name The parameter name.
-/// \param type The parameter type.
-/// \param value The pointer to the value.
-/// \return A new TRITONSERVER_Parameter object. 'nullptr' will be returned if
-/// 'type' is 'TRITONSERVER_PARAMETER_BYTES'. The caller should use
-/// TRITONSERVER_ParameterBytesNew to create parameter with bytes type.
-TRITONSERVER_DECLSPEC TRITONSERVER_Parameter* TRITONSERVER_ParameterNew(
-    const char* name, const TRITONSERVER_ParameterType type, const void* value);
-
-/// Create a new parameter object with type TRITONSERVER_PARAMETER_BYTES.
-/// The caller takes ownership of the TRITONSERVER_Parameter object and must
-/// call TRITONSERVER_ParameterDelete to release the object. The object only
-/// maintains a shallow copy of the 'byte_ptr' so the data content must be
-/// valid until the parameter object is deleted.
-///
-/// \param name The parameter name.
-/// \param byte_ptr The pointer to the data content.
-/// \param size The size of the data content.
-/// \return A new TRITONSERVER_Error object.
-TRITONSERVER_DECLSPEC TRITONSERVER_Parameter* TRITONSERVER_ParameterBytesNew(
-    const char* name, const void* byte_ptr, const uint64_t size);
-
-/// Delete an parameter object.
-///
-/// \param parameter The parameter object.
-TRITONSERVER_DECLSPEC void TRITONSERVER_ParameterDelete(
-    TRITONSERVER_Parameter* parameter);
-
-/// TRITONSERVER_InstanceGroupKind
-///
-/// Kinds of instance groups recognized by TRITONSERVER.
-///
-typedef enum TRITONSERVER_instancegroupkind_enum {
-  TRITONSERVER_INSTANCEGROUPKIND_AUTO,
-  TRITONSERVER_INSTANCEGROUPKIND_CPU,
-  TRITONSERVER_INSTANCEGROUPKIND_GPU,
-  TRITONSERVER_INSTANCEGROUPKIND_MODEL
-} TRITONSERVER_InstanceGroupKind;
-
-/// Get the string representation of an instance-group kind. The
-/// returned string is not owned by the caller and so should not be
-/// modified or freed.
-///
-/// \param kind The instance-group kind.
-/// \return The string representation of the kind.
-TRITONSERVER_DECLSPEC const char* TRITONSERVER_InstanceGroupKindString(
-    TRITONSERVER_InstanceGroupKind kind);
-
-/// TRITONSERVER_Logging
-///
-/// Types/levels of logging.
-///
-typedef enum TRITONSERVER_loglevel_enum {
-  TRITONSERVER_LOG_INFO,
-  TRITONSERVER_LOG_WARN,
-  TRITONSERVER_LOG_ERROR,
-  TRITONSERVER_LOG_VERBOSE
-} TRITONSERVER_LogLevel;
-
-///
-/// Format of logging.
-///
-/// TRITONSERVER_LOG_DEFAULT: the log severity (L) and timestamp will be
-/// logged as "LMMDD hh:mm:ss.ssssss".
-///
-/// TRITONSERVER_LOG_ISO8601: the log format will be "YYYY-MM-DDThh:mm:ssZ L".
-///
-typedef enum TRITONSERVER_logformat_enum {
-  TRITONSERVER_LOG_DEFAULT,
-  TRITONSERVER_LOG_ISO8601
-} TRITONSERVER_LogFormat;
-
-/// Is a log level enabled?
-///
-/// \param level The log level.
-/// \return True if the log level is enabled, false if not enabled.
-TRITONSERVER_DECLSPEC bool TRITONSERVER_LogIsEnabled(
-    TRITONSERVER_LogLevel level);
-
-/// Log a message at a given log level if that level is enabled.
-///
-/// \param level The log level.
-/// \param filename The file name of the location of the log message.
-/// \param line The line number of the log message.
-/// \param msg The log message.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_LogMessage(
-    TRITONSERVER_LogLevel level, const char* filename, const int line,
-    const char* msg);
-
-/// TRITONSERVER_Error
-///
-/// Errors are reported by a TRITONSERVER_Error object. A NULL
-/// TRITONSERVER_Error indicates no error, a non-NULL TRITONSERVER_Error
-/// indicates error and the code and message for the error can be
-/// retrieved from the object.
-///
-/// The caller takes ownership of a TRITONSERVER_Error object returned by
-/// the API and must call TRITONSERVER_ErrorDelete to release the object.
-///
-
-/// The TRITONSERVER_Error error codes
-typedef enum TRITONSERVER_errorcode_enum {
-  TRITONSERVER_ERROR_UNKNOWN,
-  TRITONSERVER_ERROR_INTERNAL,
-  TRITONSERVER_ERROR_NOT_FOUND,
-  TRITONSERVER_ERROR_INVALID_ARG,
-  TRITONSERVER_ERROR_UNAVAILABLE,
-  TRITONSERVER_ERROR_UNSUPPORTED,
-  TRITONSERVER_ERROR_ALREADY_EXISTS
-} TRITONSERVER_Error_Code;
-
-/// Create a new error object. The caller takes ownership of the
-/// TRITONSERVER_Error object and must call TRITONSERVER_ErrorDelete to
-/// release the object.
-///
-/// \param code The error code.
-/// \param msg The error message.
-/// \return A new TRITONSERVER_Error object.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ErrorNew(
-    TRITONSERVER_Error_Code code, const char* msg);
-
-/// Delete an error object.
-///
-/// \param error The error object.
-TRITONSERVER_DECLSPEC void TRITONSERVER_ErrorDelete(TRITONSERVER_Error* error);
-
-/// Get the error code.
-///
-/// \param error The error object.
-/// \return The error code.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error_Code
-TRITONSERVER_ErrorCode(TRITONSERVER_Error* error);
-
-/// Get the string representation of an error code. The returned
-/// string is not owned by the caller and so should not be modified or
-/// freed. The lifetime of the returned string extends only as long as
-/// 'error' and must not be accessed once 'error' is deleted.
-///
-/// \param error The error object.
-/// \return The string representation of the error code.
-TRITONSERVER_DECLSPEC const char* TRITONSERVER_ErrorCodeString(
-    TRITONSERVER_Error* error);
-
-/// Get the error message. The returned string is not owned by the
-/// caller and so should not be modified or freed. The lifetime of the
-/// returned string extends only as long as 'error' and must not be
-/// accessed once 'error' is deleted.
-///
-/// \param error The error object.
-/// \return The error message.
-TRITONSERVER_DECLSPEC const char* TRITONSERVER_ErrorMessage(
-    TRITONSERVER_Error* error);
-
-/// TRITONSERVER_ResponseAllocator
-///
-/// Object representing a memory allocator for output tensors in an
-/// inference response.
-///
-
-/// Type for allocation function that allocates a buffer to hold an
-/// output tensor.
-///
-/// \param allocator The allocator that is provided in the call to
-/// TRITONSERVER_InferenceRequestSetResponseCallback.
-/// \param tensor_name The name of the output tensor to allocate for.
-/// \param byte_size The size of the buffer to allocate.
-/// \param memory_type The type of memory that the caller prefers for
-/// the buffer allocation.
-/// \param memory_type_id The ID of the memory that the caller prefers
-/// for the buffer allocation.
-/// \param userp The user data pointer that is provided as
-/// 'response_allocator_userp' in the call to
-/// TRITONSERVER_InferenceRequestSetResponseCallback.
-/// \param buffer Returns a pointer to the allocated memory.
-/// \param buffer_userp Returns a user-specified value to associate
-/// with the buffer, or nullptr if no user-specified value should be
-/// associated with the buffer. This value will be provided in the
-/// call to TRITONSERVER_ResponseAllocatorReleaseFn_t when the buffer
-/// is released and will also be returned by
-/// TRITONSERVER_InferenceResponseOutput.
-/// \param actual_memory_type Returns the type of memory where the
-/// allocation resides. May be different than the type of memory
-/// requested by 'memory_type'.
-/// \param actual_memory_type_id Returns the ID of the memory where
-/// the allocation resides. May be different than the ID of the memory
-/// requested by 'memory_type_id'.
-/// \return a TRITONSERVER_Error object if a failure occurs while
-/// attempting an allocation. If an error is returned all other return
-/// values will be ignored.
-typedef TRITONSERVER_Error* (*TRITONSERVER_ResponseAllocatorAllocFn_t)(
-    TRITONSERVER_ResponseAllocator* allocator, const char* tensor_name,
-    size_t byte_size, TRITONSERVER_MemoryType memory_type,
-    int64_t memory_type_id, void* userp, void** buffer, void** buffer_userp,
-    TRITONSERVER_MemoryType* actual_memory_type,
-    int64_t* actual_memory_type_id);
-
-/// Type for allocation function that allocates a buffer to hold an
-/// output tensor with buffer attributes. The callback function must fill in the
-/// appropriate buffer attributes information related to this buffer. If set,
-/// this function is always called after TRITONSERVER_ResponseAllocatorAllocFn_t
-/// function.
-///
-/// \param allocator The allocator that is provided in the call to
-/// TRITONSERVER_InferenceRequestSetResponseCallback.
-/// \param tensor_name The name of the output tensor to allocate for.
-/// \param buffer_attributes The buffer attributes associated with the buffer.
-/// \param userp The user data pointer that is provided as
-/// 'response_allocator_userp' in the call to
-/// TRITONSERVER_InferenceRequestSetResponseCallback.
-/// \param buffer_userp Returns a user-specified value to associate
-/// with the buffer, or nullptr if no user-specified value should be
-/// associated with the buffer. This value will be provided in the
-/// call to TRITONSERVER_ResponseAllocatorReleaseFn_t when the buffer
-/// is released and will also be returned by
-/// TRITONSERVER_InferenceResponseOutput.
-/// \return a TRITONSERVER_Error object if a failure occurs while
-/// attempting an allocation. If an error is returned all other return
-/// values will be ignored.
-typedef TRITONSERVER_Error* (
-    *TRITONSERVER_ResponseAllocatorBufferAttributesFn_t)(
-    TRITONSERVER_ResponseAllocator* allocator, const char* tensor_name,
-    TRITONSERVER_BufferAttributes* buffer_attributes, void* userp,
-    void* buffer_userp);
-
-/// Type for function that is called to query the allocator's preferred memory
-/// type and memory type ID. As much as possible, the allocator should attempt
-/// to return the same memory_type and memory_type_id values that will be
-/// returned by the subsequent call to TRITONSERVER_ResponseAllocatorAllocFn_t.
-/// But the allocator is not required to do so.
-///
-/// \param allocator The allocator that is provided in the call to
-/// TRITONSERVER_InferenceRequestSetResponseCallback.
-/// \param userp The user data pointer that is provided as
-/// 'response_allocator_userp' in the call to
-/// TRITONSERVER_InferenceRequestSetResponseCallback.
-/// \param tensor_name The name of the output tensor. This is optional
-/// and it should be set to nullptr to indicate that the tensor name has
-/// not determined.
-/// \param byte_size The expected size of the buffer. This is optional
-/// and it should be set to nullptr to indicate that the byte size has
-/// not determined.
-/// \param memory_type Acts as both input and output. On input gives
-/// the memory type preferred by the caller. Returns memory type preferred
-/// by the allocator, taken account of the caller preferred type.
-/// \param memory_type_id Acts as both input and output. On input gives
-/// the memory type ID preferred by the caller. Returns memory type ID preferred
-/// by the allocator, taken account of the caller preferred type ID.
-/// \return a TRITONSERVER_Error object if a failure occurs.
-typedef TRITONSERVER_Error* (*TRITONSERVER_ResponseAllocatorQueryFn_t)(
-    TRITONSERVER_ResponseAllocator* allocator, void* userp,
-    const char* tensor_name, size_t* byte_size,
-    TRITONSERVER_MemoryType* memory_type, int64_t* memory_type_id);
-
-/// Type for function that is called when the server no longer holds
-/// any reference to a buffer allocated by
-/// TRITONSERVER_ResponseAllocatorAllocFn_t. In practice this function
-/// is typically called when the response object associated with the
-/// buffer is deleted by TRITONSERVER_InferenceResponseDelete.
-///
-/// \param allocator The allocator that is provided in the call to
-/// TRITONSERVER_InferenceRequestSetResponseCallback.
-/// \param buffer Pointer to the buffer to be freed.
-/// \param buffer_userp The user-specified value associated
-/// with the buffer in TRITONSERVER_ResponseAllocatorAllocFn_t.
-/// \param byte_size The size of the buffer.
-/// \param memory_type The type of memory holding the buffer.
-/// \param memory_type_id The ID of the memory holding the buffer.
-/// \return a TRITONSERVER_Error object if a failure occurs while
-/// attempting the release. If an error is returned Triton will not
-/// attempt to release the buffer again.
-typedef TRITONSERVER_Error* (*TRITONSERVER_ResponseAllocatorReleaseFn_t)(
-    TRITONSERVER_ResponseAllocator* allocator, void* buffer, void* buffer_userp,
-    size_t byte_size, TRITONSERVER_MemoryType memory_type,
-    int64_t memory_type_id);
-
-/// Type for function that is called to indicate that subsequent
-/// allocation requests will refer to a new response.
-///
-/// \param allocator The allocator that is provided in the call to
-/// TRITONSERVER_InferenceRequestSetResponseCallback.
-/// \param userp The user data pointer that is provided as
-/// 'response_allocator_userp' in the call to
-/// TRITONSERVER_InferenceRequestSetResponseCallback.
-/// \return a TRITONSERVER_Error object if a failure occurs.
-typedef TRITONSERVER_Error* (*TRITONSERVER_ResponseAllocatorStartFn_t)(
-    TRITONSERVER_ResponseAllocator* allocator, void* userp);
-
-/// Create a new response allocator object.
-///
-/// The response allocator object is used by Triton to allocate
-/// buffers to hold the output tensors in inference responses. Most
-/// models generate a single response for each inference request
-/// (TRITONSERVER_TXN_ONE_TO_ONE). For these models the order of
-/// callbacks will be:
-///
-///   TRITONSERVER_ServerInferAsync called
-///    - start_fn : optional (and typically not required)
-///    - alloc_fn : called once for each output tensor in response
-///   TRITONSERVER_InferenceResponseDelete called
-///    - release_fn: called once for each output tensor in response
-///
-/// For models that generate multiple responses for each inference
-/// request (TRITONSERVER_TXN_DECOUPLED), the start_fn callback can be
-/// used to determine sets of alloc_fn callbacks that belong to the
-/// same response:
-///
-///   TRITONSERVER_ServerInferAsync called
-///    - start_fn
-///    - alloc_fn : called once for each output tensor in response
-///    - start_fn
-///    - alloc_fn : called once for each output tensor in response
-///      ...
-///   For each response, TRITONSERVER_InferenceResponseDelete called
-///    - release_fn: called once for each output tensor in the response
-///
-/// In all cases the start_fn, alloc_fn and release_fn callback
-/// functions must be thread-safe. Typically making these functions
-/// thread-safe does not require explicit locking. The recommended way
-/// to implement these functions is to have each inference request
-/// provide a 'response_allocator_userp' object that is unique to that
-/// request with TRITONSERVER_InferenceRequestSetResponseCallback. The
-/// callback functions then operate only on this unique state. Locking
-/// is required only when the callback function needs to access state
-/// that is shared across inference requests (for example, a common
-/// allocation pool).
-///
-/// \param allocator Returns the new response allocator object.
-/// \param alloc_fn The function to call to allocate buffers for result
-/// tensors.
-/// \param release_fn The function to call when the server no longer
-/// holds a reference to an allocated buffer.
-/// \param start_fn The function to call to indicate that the
-/// subsequent 'alloc_fn' calls are for a new response. This callback
-/// is optional (use nullptr to indicate that it should not be
-/// invoked).
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ResponseAllocatorNew(
-    TRITONSERVER_ResponseAllocator** allocator,
-    TRITONSERVER_ResponseAllocatorAllocFn_t alloc_fn,
-    TRITONSERVER_ResponseAllocatorReleaseFn_t release_fn,
-    TRITONSERVER_ResponseAllocatorStartFn_t start_fn);
-
-/// Set the buffer attributes function for a response allocator object.
-/// The function will be called after alloc_fn to set the buffer attributes
-/// associated with the output buffer.
-///
-/// The thread-safy requirement for buffer_attributes_fn is the same as other
-/// allocator callbacks.
-///
-/// \param allocator The response allocator object.
-/// \param buffer_attributes_fn The function to call to get the buffer
-/// attributes information for an allocated buffer.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ResponseAllocatorSetBufferAttributesFunction(
-    TRITONSERVER_ResponseAllocator* allocator,
-    TRITONSERVER_ResponseAllocatorBufferAttributesFn_t buffer_attributes_fn);
-
-/// Set the query function to a response allocator object. Usually the
-/// function will be called before alloc_fn to understand what is the
-/// allocator's preferred memory type and memory type ID at the current
-/// situation to make different execution decision.
-///
-/// The thread-safy requirement for query_fn is the same as other allocator
-/// callbacks.
-///
-/// \param allocator The response allocator object.
-/// \param query_fn The function to call to query allocator's preferred memory
-/// type and memory type ID.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ResponseAllocatorSetQueryFunction(
-    TRITONSERVER_ResponseAllocator* allocator,
-    TRITONSERVER_ResponseAllocatorQueryFn_t query_fn);
-
-/// Delete a response allocator.
-///
-/// \param allocator The response allocator object.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ResponseAllocatorDelete(
-    TRITONSERVER_ResponseAllocator* allocator);
-
-/// TRITONSERVER_Message
-///
-/// Object representing a Triton Server message.
-///
-
-/// Create a new message object from serialized JSON string.
-///
-/// \param message The message object.
-/// \param base The base of the serialized JSON.
-/// \param byte_size The size, in bytes, of the serialized message.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_MessageNewFromSerializedJson(
-    TRITONSERVER_Message** message, const char* base, size_t byte_size);
-
-/// Delete a message object.
-///
-/// \param message The message object.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_MessageDelete(
-    TRITONSERVER_Message* message);
-
-/// Get the base and size of the buffer containing the serialized
-/// message in JSON format. The buffer is owned by the
-/// TRITONSERVER_Message object and should not be modified or freed by
-/// the caller. The lifetime of the buffer extends only as long as
-/// 'message' and must not be accessed once 'message' is deleted.
-///
-/// \param message The message object.
-/// \param base Returns the base of the serialized message.
-/// \param byte_size Returns the size, in bytes, of the serialized
-/// message.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_MessageSerializeToJson(
-    TRITONSERVER_Message* message, const char** base, size_t* byte_size);
-
-/// TRITONSERVER_Metrics
-///
-/// Object representing metrics.
-///
-
-/// Metric format types
-typedef enum tritonserver_metricformat_enum {
-  TRITONSERVER_METRIC_PROMETHEUS
-} TRITONSERVER_MetricFormat;
-
-/// Delete a metrics object.
-///
-/// \param metrics The metrics object.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_MetricsDelete(
-    TRITONSERVER_Metrics* metrics);
-
-/// Get a buffer containing the metrics in the specified format. For
-/// each format the buffer contains the following:
-///
-///   TRITONSERVER_METRIC_PROMETHEUS: 'base' points to a single multiline
-///   string (char*) that gives a text representation of the metrics in
-///   prometheus format. 'byte_size' returns the length of the string
-///   in bytes.
-///
-/// The buffer is owned by the 'metrics' object and should not be
-/// modified or freed by the caller. The lifetime of the buffer
-/// extends only as long as 'metrics' and must not be accessed once
-/// 'metrics' is deleted.
-///
-/// \param metrics The metrics object.
-/// \param format The format to use for the returned metrics.
-/// \param base Returns a pointer to the base of the formatted
-/// metrics, as described above.
-/// \param byte_size Returns the size, in bytes, of the formatted
-/// metrics.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_MetricsFormatted(
-    TRITONSERVER_Metrics* metrics, TRITONSERVER_MetricFormat format,
-    const char** base, size_t* byte_size);
-
-/// TRITONSERVER_InferenceTrace
-///
-/// Object that represents tracing for an inference request.
-///
-
-/// Trace levels. The trace level controls the type of trace
-/// activities that are reported for an inference request.
-///
-/// Trace level values are power-of-2 and can be combined to trace
-/// multiple types of activities. For example, use
-/// (TRITONSERVER_TRACE_LEVEL_TIMESTAMPS |
-/// TRITONSERVER_TRACE_LEVEL_TENSORS) to trace both timestamps and
-/// tensors for an inference request.
-///
-/// TRITONSERVER_TRACE_LEVEL_MIN and TRITONSERVER_TRACE_LEVEL_MAX are
-/// deprecated and should not be used.
-typedef enum tritonserver_tracelevel_enum {
-  /// Tracing disabled. No trace activities are reported.
-  TRITONSERVER_TRACE_LEVEL_DISABLED = 0,
-  /// Deprecated. Use TRITONSERVER_TRACE_LEVEL_TIMESTAMPS.
-  TRITONSERVER_TRACE_LEVEL_MIN = 1,
-  /// Deprecated. Use TRITONSERVER_TRACE_LEVEL_TIMESTAMPS.
-  TRITONSERVER_TRACE_LEVEL_MAX = 2,
-  /// Record timestamps for the inference request.
-  TRITONSERVER_TRACE_LEVEL_TIMESTAMPS = 0x4,
-  /// Record input and output tensor values for the inference request.
-  TRITONSERVER_TRACE_LEVEL_TENSORS = 0x8
-} TRITONSERVER_InferenceTraceLevel;
-
-/// Get the string representation of a trace level. The returned
-/// string is not owned by the caller and so should not be modified or
-/// freed.
-///
-/// \param level The trace level.
-/// \return The string representation of the trace level.
-TRITONSERVER_DECLSPEC const char* TRITONSERVER_InferenceTraceLevelString(
-    TRITONSERVER_InferenceTraceLevel level);
-
-/// Trace activities
-typedef enum tritonserver_traceactivity_enum {
-  TRITONSERVER_TRACE_REQUEST_START = 0,
-  TRITONSERVER_TRACE_QUEUE_START = 1,
-  TRITONSERVER_TRACE_COMPUTE_START = 2,
-  TRITONSERVER_TRACE_COMPUTE_INPUT_END = 3,
-  TRITONSERVER_TRACE_COMPUTE_OUTPUT_START = 4,
-  TRITONSERVER_TRACE_COMPUTE_END = 5,
-  TRITONSERVER_TRACE_REQUEST_END = 6,
-  TRITONSERVER_TRACE_TENSOR_QUEUE_INPUT = 7,
-  TRITONSERVER_TRACE_TENSOR_BACKEND_INPUT = 8,
-  TRITONSERVER_TRACE_TENSOR_BACKEND_OUTPUT = 9
-} TRITONSERVER_InferenceTraceActivity;
-
-/// Get the string representation of a trace activity. The returned
-/// string is not owned by the caller and so should not be modified or
-/// freed.
-///
-/// \param activity The trace activity.
-/// \return The string representation of the trace activity.
-TRITONSERVER_DECLSPEC const char* TRITONSERVER_InferenceTraceActivityString(
-    TRITONSERVER_InferenceTraceActivity activity);
-
-/// Type for trace timeline activity callback function. This callback function
-/// is used to report activity occurring for a trace. This function
-/// does not take ownership of 'trace' and so any information needed
-/// from that object must be copied before returning. The 'userp' data
-/// is the same as what is supplied in the call to
-/// TRITONSERVER_InferenceTraceNew.
-typedef void (*TRITONSERVER_InferenceTraceActivityFn_t)(
-    TRITONSERVER_InferenceTrace* trace,
-    TRITONSERVER_InferenceTraceActivity activity, uint64_t timestamp_ns,
-    void* userp);
-
-/// Type for trace tensor activity callback function. This callback function
-/// is used to report tensor activity occurring for a trace. This function
-/// does not take ownership of 'trace' and so any information needed
-/// from that object must be copied before returning. The 'userp' data
-/// is the same as what is supplied in the call to
-/// TRITONSERVER_InferenceTraceTensorNew.
-typedef void (*TRITONSERVER_InferenceTraceTensorActivityFn_t)(
-    TRITONSERVER_InferenceTrace* trace,
-    TRITONSERVER_InferenceTraceActivity activity, const char* name,
-    TRITONSERVER_DataType datatype, const void* base, size_t byte_size,
-    const int64_t* shape, uint64_t dim_count,
-    TRITONSERVER_MemoryType memory_type, int64_t memory_type_id, void* userp);
-
-/// Type for trace release callback function. This callback function
-/// is called when all activity for the trace has completed. The
-/// callback function takes ownership of the
-/// TRITONSERVER_InferenceTrace object. The 'userp' data is the same
-/// as what is supplied in the call to TRITONSERVER_InferenceTraceNew.
-typedef void (*TRITONSERVER_InferenceTraceReleaseFn_t)(
-    TRITONSERVER_InferenceTrace* trace, void* userp);
-
-/// Create a new inference trace object. The caller takes ownership of
-/// the TRITONSERVER_InferenceTrace object and must call
-/// TRITONSERVER_InferenceTraceDelete to release the object.
-///
-/// The activity callback function will be called to report activity
-/// for 'trace' as well as for any child traces that are spawned by
-/// 'trace', and so the activity callback must check the trace object
-/// to determine specifically what activity is being reported.
-///
-/// The release callback is called for both 'trace' and for any child
-/// traces spawned by 'trace'.
-///
-/// \param trace Returns the new inference trace object.
-/// \param level The tracing level.
-/// \param parent_id The parent trace id for this trace. A value of 0
-/// indicates that there is not parent trace.
-/// \param activity_fn The callback function where activity for the
-/// trace is reported.
-/// \param release_fn The callback function called when all activity
-/// is complete for the trace.
-/// \param trace_userp User-provided pointer that is delivered to
-/// the activity and release callback functions.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceTraceNew(
-    TRITONSERVER_InferenceTrace** trace, TRITONSERVER_InferenceTraceLevel level,
-    uint64_t parent_id, TRITONSERVER_InferenceTraceActivityFn_t activity_fn,
-    TRITONSERVER_InferenceTraceReleaseFn_t release_fn, void* trace_userp);
-
-/// Create a new inference trace object. The caller takes ownership of
-/// the TRITONSERVER_InferenceTrace object and must call
-/// TRITONSERVER_InferenceTraceDelete to release the object.
-///
-/// The timeline and tensor activity callback function will be called to report
-/// activity for 'trace' as well as for any child traces that are spawned by
-/// 'trace', and so the activity callback must check the trace object
-/// to determine specifically what activity is being reported.
-///
-/// The release callback is called for both 'trace' and for any child
-/// traces spawned by 'trace'.
-///
-/// \param trace Returns the new inference trace object.
-/// \param level The tracing level.
-/// \param parent_id The parent trace id for this trace. A value of 0
-/// indicates that there is not parent trace.
-/// \param activity_fn The callback function where timeline activity for the
-/// trace is reported.
-/// \param tensor_activity_fn The callback function where tensor activity for
-/// the trace is reported.
-/// \param release_fn The callback function called when all activity
-/// is complete for the trace.
-/// \param trace_userp User-provided pointer that is delivered to
-/// the activity and release callback functions.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceTraceTensorNew(
-    TRITONSERVER_InferenceTrace** trace, TRITONSERVER_InferenceTraceLevel level,
-    uint64_t parent_id, TRITONSERVER_InferenceTraceActivityFn_t activity_fn,
-    TRITONSERVER_InferenceTraceTensorActivityFn_t tensor_activity_fn,
-    TRITONSERVER_InferenceTraceReleaseFn_t release_fn, void* trace_userp);
-
-/// Delete a trace object.
-///
-/// \param trace The trace object.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceTraceDelete(
-    TRITONSERVER_InferenceTrace* trace);
-
-/// Get the id associated with a trace. Every trace is assigned an id
-/// that is unique across all traces created for a Triton server.
-///
-/// \param trace The trace.
-/// \param id Returns the id associated with the trace.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceTraceId(
-    TRITONSERVER_InferenceTrace* trace, uint64_t* id);
-
-/// Get the parent id associated with a trace. The parent id indicates
-/// a parent-child relationship between two traces. A parent id value
-/// of 0 indicates that there is no parent trace.
-///
-/// \param trace The trace.
-/// \param id Returns the parent id associated with the trace.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceTraceParentId(
-    TRITONSERVER_InferenceTrace* trace, uint64_t* parent_id);
-
-/// Get the name of the model associated with a trace. The caller does
-/// not own the returned string and must not modify or delete it. The
-/// lifetime of the returned string extends only as long as 'trace'.
-///
-/// \param trace The trace.
-/// \param model_name Returns the name of the model associated with
-/// the trace.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceTraceModelName(
-    TRITONSERVER_InferenceTrace* trace, const char** model_name);
-
-/// Get the version of the model associated with a trace.
-///
-/// \param trace The trace.
-/// \param model_version Returns the version of the model associated
-/// with the trace.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_InferenceTraceModelVersion(
-    TRITONSERVER_InferenceTrace* trace, int64_t* model_version);
-
-/// TRITONSERVER_InferenceRequest
-///
-/// Object representing an inference request. The inference request
-/// provides the meta-data and input tensor values needed for an
-/// inference and returns the inference result meta-data and output
-/// tensors. An inference request object can be modified and reused
-/// multiple times.
-///
-
-/// Inference request flags. The enum values must be power-of-2 values.
-typedef enum tritonserver_requestflag_enum {
-  TRITONSERVER_REQUEST_FLAG_SEQUENCE_START = 1,
-  TRITONSERVER_REQUEST_FLAG_SEQUENCE_END = 2
-} TRITONSERVER_RequestFlag;
-
-/// Inference request release flags. The enum values must be
-/// power-of-2 values.
-typedef enum tritonserver_requestreleaseflag_enum {
-  TRITONSERVER_REQUEST_RELEASE_ALL = 1
-} TRITONSERVER_RequestReleaseFlag;
-
-/// Inference response complete flags. The enum values must be
-/// power-of-2 values.
-typedef enum tritonserver_responsecompleteflag_enum {
-  TRITONSERVER_RESPONSE_COMPLETE_FINAL = 1
-} TRITONSERVER_ResponseCompleteFlag;
-
-/// Type for inference request release callback function. The callback
-/// indicates what type of release is being performed on the request
-/// and for some of these the callback function takes ownership of the
-/// TRITONSERVER_InferenceRequest object. The 'userp' data is the data
-/// provided as 'request_release_userp' in the call to
-/// TRITONSERVER_InferenceRequestSetReleaseCallback.
-///
-/// One or more flags will be specified when the callback is invoked,
-/// and the callback must take the following actions:
-///
-///   - TRITONSERVER_REQUEST_RELEASE_ALL: The entire inference request
-///     is being released and ownership is passed to the callback
-///     function. Triton will not longer access the 'request' object
-///     itself nor any input tensor data associated with the
-///     request. The callback should free or otherwise manage the
-///     'request' object and all associated tensor data.
-///
-/// Note that currently TRITONSERVER_REQUEST_RELEASE_ALL should always
-/// be set when the callback is invoked but in the future that may
-/// change, so the callback should explicitly check for the flag
-/// before taking ownership of the request object.
-///
-typedef void (*TRITONSERVER_InferenceRequestReleaseFn_t)(
-    TRITONSERVER_InferenceRequest* request, const uint32_t flags, void* userp);
-
-/// Type for callback function indicating that an inference response
-/// has completed. The callback function takes ownership of the
-/// TRITONSERVER_InferenceResponse object. The 'userp' data is the
-/// data provided as 'response_userp' in the call to
-/// TRITONSERVER_InferenceRequestSetResponseCallback.
-///
-/// One or more flags may be specified when the callback is invoked:
-///
-///   - TRITONSERVER_RESPONSE_COMPLETE_FINAL: Indicates that no more
-///     responses will be generated for a given request (more
-///     specifically, that no more responses will be generated for the
-///     inference request that set this callback and 'userp'). When
-///     this flag is set 'response' may be a response object or may be
-///     nullptr. If 'response' is not nullptr, then 'response' is the
-///     last response that Triton will produce for the request. If
-///     'response' is nullptr then Triton is indicating that no more
-///     responses will be produced for the request.
-typedef void (*TRITONSERVER_InferenceResponseCompleteFn_t)(
-    TRITONSERVER_InferenceResponse* response, const uint32_t flags,
-    void* userp);
-
-/// Create a new inference request object.
-///
-/// \param inference_request Returns the new request object.
-/// \param server the inference server object.
-/// \param model_name The name of the model to use for the request.
-/// \param model_version The version of the model to use for the
-/// request. If -1 then the server will choose a version based on the
-/// model's policy.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceRequestNew(
-    TRITONSERVER_InferenceRequest** inference_request,
-    TRITONSERVER_Server* server, const char* model_name,
-    const int64_t model_version);
-
-/// Delete an inference request object.
-///
-/// \param inference_request The request object.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceRequestDelete(
-    TRITONSERVER_InferenceRequest* inference_request);
-
-/// Get the ID for a request. The returned ID is owned by
-/// 'inference_request' and must not be modified or freed by the
-/// caller.
-///
-/// \param inference_request The request object.
-/// \param id Returns the ID.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceRequestId(
-    TRITONSERVER_InferenceRequest* inference_request, const char** id);
-
-/// Set the ID for a request.
-///
-/// \param inference_request The request object.
-/// \param id The ID.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceRequestSetId(
-    TRITONSERVER_InferenceRequest* inference_request, const char* id);
-
-/// Get the flag(s) associated with a request. On return 'flags' holds
-/// a bitwise-or of all flag values, see TRITONSERVER_RequestFlag for
-/// available flags.
-///
-/// \param inference_request The request object.
-/// \param flags Returns the flags.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceRequestFlags(
-    TRITONSERVER_InferenceRequest* inference_request, uint32_t* flags);
-
-/// Set the flag(s) associated with a request. 'flags' should hold a
-/// bitwise-or of all flag values, see TRITONSERVER_RequestFlag for
-/// available flags.
-///
-/// \param inference_request The request object.
-/// \param flags The flags.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceRequestSetFlags(
-    TRITONSERVER_InferenceRequest* inference_request, uint32_t flags);
-
-/// Get the correlation ID of the inference request as an unsigned integer.
-/// Default is 0, which indicates that the request has no correlation ID.
-/// If the correlation id associated with the inference request is a string,
-/// this function will return a failure. The correlation ID is used
-/// to indicate two or more inference request are related to each other.
-/// How this relationship is handled by the inference server is determined by
-/// the model's scheduling policy.
-///
-/// \param inference_request The request object.
-/// \param correlation_id Returns the correlation ID.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_InferenceRequestCorrelationId(
-    TRITONSERVER_InferenceRequest* inference_request, uint64_t* correlation_id);
-
-/// Get the correlation ID of the inference request as a string.
-/// Default is empty "", which indicates that the request has no correlation ID.
-/// If the correlation id associated with the inference request is an unsigned
-/// integer, then this function will return a failure. The correlation ID
-/// is used to indicate two or more inference request are related to each other.
-/// How this relationship is handled by the inference server is determined by
-/// the model's scheduling policy.
-///
-/// \param inference_request The request object.
-/// \param correlation_id Returns the correlation ID.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_InferenceRequestCorrelationIdString(
-    TRITONSERVER_InferenceRequest* inference_request,
-    const char** correlation_id);
-
-/// Set the correlation ID of the inference request to be an unsigned integer.
-/// Default is 0, which indicates that the request has no correlation ID.
-/// The correlation ID is used to indicate two or more inference request
-/// are related to each other. How this relationship is handled by the
-/// inference server is determined by the model's scheduling policy.
-///
-/// \param inference_request The request object.
-/// \param correlation_id The correlation ID.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_InferenceRequestSetCorrelationId(
-    TRITONSERVER_InferenceRequest* inference_request, uint64_t correlation_id);
-
-/// Set the correlation ID of the inference request to be a string.
-/// The correlation ID is used to indicate two or more inference
-/// request are related to each other. How this relationship is
-/// handled by the inference server is determined by the model's
-/// scheduling policy.
-///
-/// \param inference_request The request object.
-/// \param correlation_id The correlation ID.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_InferenceRequestSetCorrelationIdString(
-    TRITONSERVER_InferenceRequest* inference_request,
-    const char* correlation_id);
-
-/// Get the priority for a request. The default is 0 indicating that
-/// the request does not specify a priority and so will use the
-/// model's default priority.
-///
-/// \param inference_request The request object.
-/// \param priority Returns the priority level.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceRequestPriority(
-    TRITONSERVER_InferenceRequest* inference_request, uint32_t* priority);
-
-/// Set the priority for a request. The default is 0 indicating that
-/// the request does not specify a priority and so will use the
-/// model's default priority.
-///
-/// \param inference_request The request object.
-/// \param priority The priority level.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_InferenceRequestSetPriority(
-    TRITONSERVER_InferenceRequest* inference_request, uint32_t priority);
-
-/// Get the timeout for a request, in microseconds. The default is 0
-/// which indicates that the request has no timeout.
-///
-/// \param inference_request The request object.
-/// \param timeout_us Returns the timeout, in microseconds.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_InferenceRequestTimeoutMicroseconds(
-    TRITONSERVER_InferenceRequest* inference_request, uint64_t* timeout_us);
-
-/// Set the timeout for a request, in microseconds. The default is 0
-/// which indicates that the request has no timeout.
-///
-/// \param inference_request The request object.
-/// \param timeout_us The timeout, in microseconds.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_InferenceRequestSetTimeoutMicroseconds(
-    TRITONSERVER_InferenceRequest* inference_request, uint64_t timeout_us);
-
-/// Add an input to a request.
-///
-/// \param inference_request The request object.
-/// \param name The name of the input.
-/// \param datatype The type of the input. Valid type names are BOOL,
-/// UINT8, UINT16, UINT32, UINT64, INT8, INT16, INT32, INT64, FP16,
-/// FP32, FP64, and BYTES.
-/// \param shape The shape of the input.
-/// \param dim_count The number of dimensions of 'shape'.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceRequestAddInput(
-    TRITONSERVER_InferenceRequest* inference_request, const char* name,
-    const TRITONSERVER_DataType datatype, const int64_t* shape,
-    uint64_t dim_count);
-
-/// Add a raw input to a request. The name recognized by the model, data type
-/// and shape of the input will be deduced from model configuration.
-/// This function must be called at most once on request with no other input to
-/// ensure the deduction is accurate.
-///
-/// \param inference_request The request object.
-/// \param name The name of the input. This name is only used as a reference
-/// of the raw input in other Tritonserver APIs. It doesn't assoicate with the
-/// name used in the model.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_InferenceRequestAddRawInput(
-    TRITONSERVER_InferenceRequest* inference_request, const char* name);
-
-/// Remove an input from a request.
-///
-/// \param inference_request The request object.
-/// \param name The name of the input.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_InferenceRequestRemoveInput(
-    TRITONSERVER_InferenceRequest* inference_request, const char* name);
-
-/// Remove all inputs from a request.
-///
-/// \param inference_request The request object.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_InferenceRequestRemoveAllInputs(
-    TRITONSERVER_InferenceRequest* inference_request);
-
-/// Assign a buffer of data to an input. The buffer will be appended
-/// to any existing buffers for that input. The 'inference_request'
-/// object takes ownership of the buffer and so the caller should not
-/// modify or free the buffer until that ownership is released by
-/// 'inference_request' being deleted or by the input being removed
-/// from 'inference_request'.
-///
-/// \param inference_request The request object.
-/// \param name The name of the input.
-/// \param base The base address of the input data.
-/// \param byte_size The size, in bytes, of the input data.
-/// \param memory_type The memory type of the input data.
-/// \param memory_type_id The memory type id of the input data.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_InferenceRequestAppendInputData(
-    TRITONSERVER_InferenceRequest* inference_request, const char* name,
-    const void* base, size_t byte_size, TRITONSERVER_MemoryType memory_type,
-    int64_t memory_type_id);
-
-/// Assign a buffer of data to an input for execution on all model instances
-/// with the specified host policy. The buffer will be appended to any existing
-/// buffers for that input on all devices with this host policy. The
-/// 'inference_request' object takes ownership of the buffer and so the caller
-/// should not modify or free the buffer until that ownership is released by
-/// 'inference_request' being deleted or by the input being removed from
-/// 'inference_request'. If the execution is scheduled on a device that does not
-/// have a input buffer specified using this function, then the input buffer
-/// specified with TRITONSERVER_InferenceRequestAppendInputData will be used so
-/// a non-host policy specific version of data must be added using that API.
-/// \param inference_request The request object.
-/// \param name The name of the input.
-/// \param base The base address of the input data.
-/// \param byte_size The size, in bytes, of the input data.
-/// \param memory_type The memory type of the input data.
-/// \param memory_type_id The memory type id of the input data.
-/// \param host_policy_name All model instances executing with this host_policy
-/// will use this input buffer for execution.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_InferenceRequestAppendInputDataWithHostPolicy(
-    TRITONSERVER_InferenceRequest* inference_request, const char* name,
-    const void* base, size_t byte_size, TRITONSERVER_MemoryType memory_type,
-    int64_t memory_type_id, const char* host_policy_name);
-
-/// Assign a buffer of data to an input. The buffer will be appended
-/// to any existing buffers for that input. The 'inference_request'
-/// object takes ownership of the buffer and so the caller should not
-/// modify or free the buffer until that ownership is released by
-/// 'inference_request' being deleted or by the input being removed
-/// from 'inference_request'.
-///
-/// \param inference_request The request object.
-/// \param name The name of the input.
-/// \param base The base address of the input data.
-/// \param buffer_attributes The buffer attrubutes of the input.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_InferenceRequestAppendInputDataWithBufferAttributes(
-    TRITONSERVER_InferenceRequest* inference_request, const char* name,
-    const void* base, TRITONSERVER_BufferAttributes* buffer_attributes);
-
-/// Clear all input data from an input, releasing ownership of the
-/// buffer(s) that were appended to the input with
-/// TRITONSERVER_InferenceRequestAppendInputData or
-/// TRITONSERVER_InferenceRequestAppendInputDataWithHostPolicy
-/// \param inference_request The request object.
-/// \param name The name of the input.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_InferenceRequestRemoveAllInputData(
-    TRITONSERVER_InferenceRequest* inference_request, const char* name);
-
-/// Add an output request to an inference request.
-///
-/// \param inference_request The request object.
-/// \param name The name of the output.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_InferenceRequestAddRequestedOutput(
-    TRITONSERVER_InferenceRequest* inference_request, const char* name);
-
-/// Remove an output request from an inference request.
-///
-/// \param inference_request The request object.
-/// \param name The name of the output.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_InferenceRequestRemoveRequestedOutput(
-    TRITONSERVER_InferenceRequest* inference_request, const char* name);
-
-/// Remove all output requests from an inference request.
-///
-/// \param inference_request The request object.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_InferenceRequestRemoveAllRequestedOutputs(
-    TRITONSERVER_InferenceRequest* inference_request);
-
-/// Set the release callback for an inference request. The release
-/// callback is called by Triton to return ownership of the request
-/// object.
-///
-/// \param inference_request The request object.
-/// \param request_release_fn The function called to return ownership
-/// of the 'inference_request' object.
-/// \param request_release_userp User-provided pointer that is
-/// delivered to the 'request_release_fn' callback.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_InferenceRequestSetReleaseCallback(
-    TRITONSERVER_InferenceRequest* inference_request,
-    TRITONSERVER_InferenceRequestReleaseFn_t request_release_fn,
-    void* request_release_userp);
-
-/// Set the allocator and response callback for an inference
-/// request. The allocator is used to allocate buffers for any output
-/// tensors included in responses that are produced for this
-/// request. The response callback is called to return response
-/// objects representing responses produced for this request.
-///
-/// \param inference_request The request object.
-/// \param response_allocator The TRITONSERVER_ResponseAllocator to use
-/// to allocate buffers to hold inference results.
-/// \param response_allocator_userp User-provided pointer that is
-/// delivered to the response allocator's start and allocation functions.
-/// \param response_fn The function called to deliver an inference
-/// response for this request.
-/// \param response_userp User-provided pointer that is delivered to
-/// the 'response_fn' callback.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_InferenceRequestSetResponseCallback(
-    TRITONSERVER_InferenceRequest* inference_request,
-    TRITONSERVER_ResponseAllocator* response_allocator,
-    void* response_allocator_userp,
-    TRITONSERVER_InferenceResponseCompleteFn_t response_fn,
-    void* response_userp);
-
-/// TRITONSERVER_InferenceResponse
-///
-/// Object representing an inference response. The inference response
-/// provides the meta-data and output tensor values calculated by the
-/// inference.
-///
-
-/// Delete an inference response object.
-///
-/// \param inference_response The response object.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceResponseDelete(
-    TRITONSERVER_InferenceResponse* inference_response);
-
-/// Return the error status of an inference response. Return a
-/// TRITONSERVER_Error object on failure, return nullptr on success.
-/// The returned error object is owned by 'inference_response' and so
-/// should not be deleted by the caller.
-///
-/// \param inference_response The response object.
-/// \return a TRITONSERVER_Error indicating the success or failure
-/// status of the response.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceResponseError(
-    TRITONSERVER_InferenceResponse* inference_response);
-
-/// Get model used to produce a response. The caller does not own the
-/// returned model name value and must not modify or delete it. The
-/// lifetime of all returned values extends until 'inference_response'
-/// is deleted.
-///
-/// \param inference_response The response object.
-/// \param model_name Returns the name of the model.
-/// \param model_version Returns the version of the model.
-/// this response.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceResponseModel(
-    TRITONSERVER_InferenceResponse* inference_response, const char** model_name,
-    int64_t* model_version);
-
-/// Get the ID of the request corresponding to a response. The caller
-/// does not own the returned ID and must not modify or delete it. The
-/// lifetime of all returned values extends until 'inference_response'
-/// is deleted.
-///
-/// \param inference_response The response object.
-/// \param request_id Returns the ID of the request corresponding to
-/// this response.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceResponseId(
-    TRITONSERVER_InferenceResponse* inference_response,
-    const char** request_id);
-
-/// Get the number of parameters available in the response.
-///
-/// \param inference_response The response object.
-/// \param count Returns the number of parameters.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_InferenceResponseParameterCount(
-    TRITONSERVER_InferenceResponse* inference_response, uint32_t* count);
-
-/// Get all information about a parameter. The caller does not own any
-/// of the returned values and must not modify or delete them. The
-/// lifetime of all returned values extends until 'inference_response'
-/// is deleted.
-///
-/// The 'vvalue' returns a void* pointer that must be cast
-/// appropriately based on 'type'. For example:
-///
-///   void* vvalue;
-///   TRITONSERVER_ParameterType type;
-///   TRITONSERVER_InferenceResponseParameter(
-///                     response, index, &name, &type, &vvalue);
-///   switch (type) {
-///     case TRITONSERVER_PARAMETER_BOOL:
-///       bool value = *(reinterpret_cast<bool*>(vvalue));
-///       ...
-///     case TRITONSERVER_PARAMETER_INT:
-///       int64_t value = *(reinterpret_cast<int64_t*>(vvalue));
-///       ...
-///     case TRITONSERVER_PARAMETER_STRING:
-///       const char* value = reinterpret_cast<const char*>(vvalue);
-///       ...
-///
-/// \param inference_response The response object.
-/// \param index The index of the parameter, must be 0 <= index <
-/// count, where 'count' is the value returned by
-/// TRITONSERVER_InferenceResponseParameterCount.
-/// \param name Returns the name of the parameter.
-/// \param type Returns the type of the parameter.
-/// \param vvalue Returns a pointer to the parameter value.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_InferenceResponseParameter(
-    TRITONSERVER_InferenceResponse* inference_response, const uint32_t index,
-    const char** name, TRITONSERVER_ParameterType* type, const void** vvalue);
-
-/// Get the number of outputs available in the response.
-///
-/// \param inference_response The response object.
-/// \param count Returns the number of output tensors.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_InferenceResponseOutputCount(
-    TRITONSERVER_InferenceResponse* inference_response, uint32_t* count);
-
-/// Get all information about an output tensor.  The tensor data is
-/// returned as the base pointer to the data and the size, in bytes,
-/// of the data. The caller does not own any of the returned values
-/// and must not modify or delete them. The lifetime of all returned
-/// values extends until 'inference_response' is deleted.
-///
-/// \param inference_response The response object.
-/// \param index The index of the output tensor, must be 0 <= index <
-/// count, where 'count' is the value returned by
-/// TRITONSERVER_InferenceResponseOutputCount.
-/// \param name Returns the name of the output.
-/// \param datatype Returns the type of the output.
-/// \param shape Returns the shape of the output.
-/// \param dim_count Returns the number of dimensions of the returned
-/// shape.
-/// \param base Returns the tensor data for the output.
-/// \param byte_size Returns the size, in bytes, of the data.
-/// \param memory_type Returns the memory type of the data.
-/// \param memory_type_id Returns the memory type id of the data.
-/// \param userp The user-specified value associated with the buffer
-/// in TRITONSERVER_ResponseAllocatorAllocFn_t.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceResponseOutput(
-    TRITONSERVER_InferenceResponse* inference_response, const uint32_t index,
-    const char** name, TRITONSERVER_DataType* datatype, const int64_t** shape,
-    uint64_t* dim_count, const void** base, size_t* byte_size,
-    TRITONSERVER_MemoryType* memory_type, int64_t* memory_type_id,
-    void** userp);
-
-/// Get a classification label associated with an output for a given
-/// index.  The caller does not own the returned label and must not
-/// modify or delete it. The lifetime of all returned label extends
-/// until 'inference_response' is deleted.
-///
-/// \param inference_response The response object.
-/// \param index The index of the output tensor, must be 0 <= index <
-/// count, where 'count' is the value returned by
-/// TRITONSERVER_InferenceResponseOutputCount.
-/// \param class_index The index of the class.
-/// \param name Returns the label corresponding to 'class_index' or
-/// nullptr if no label.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_InferenceResponseOutputClassificationLabel(
-    TRITONSERVER_InferenceResponse* inference_response, const uint32_t index,
-    const size_t class_index, const char** label);
-
-/// TRITONSERVER_BufferAttributes
-///
-/// API to create, modify, or retrieve attributes associated with a buffer.
-///
-
-/// Create a new buffer attributes object. The caller takes ownership of
-/// the TRITONSERVER_BufferAttributes object and must call
-/// TRITONSERVER_BufferAttributesDelete to release the object.
-///
-/// \param buffer_attributes Returns the new buffer attributes object.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_BufferAttributesNew(
-    TRITONSERVER_BufferAttributes** buffer_attributes);
-
-/// Delete a buffer attributes object.
-///
-/// \param buffer_attributes The buffer_attributes object.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_BufferAttributesDelete(
-    TRITONSERVER_BufferAttributes* buffer_attributes);
-
-/// Set the memory type id field of the buffer attributes.
-///
-/// \param buffer_attributes The buffer attributes object.
-/// \param memory_type_id Memory type id to assign to the buffer attributes
-/// object.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_BufferAttributesSetMemoryTypeId(
-    TRITONSERVER_BufferAttributes* buffer_attributes, int64_t memory_type_id);
-
-/// Set the memory type field of the buffer attributes.
-///
-/// \param buffer_attributes The buffer attributes object.
-/// \param memory_type Memory type to assign to the buffer attributes object.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_BufferAttributesSetMemoryType(
-    TRITONSERVER_BufferAttributes* buffer_attributes,
-    TRITONSERVER_MemoryType memory_type);
-
-/// Set the CudaIpcHandle field of the buffer attributes.
-///
-/// \param buffer_attributes The buffer attributes object.
-/// \param cuda_ipc_handle The CudaIpcHandle to assign to the buffer attributes
-/// object.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_BufferAttributesSetCudaIpcHandle(
-    TRITONSERVER_BufferAttributes* buffer_attributes, void* cuda_ipc_handle);
-
-/// Set the byte size field of the buffer attributes.
-///
-/// \param buffer_attributes The buffer attributes object.
-/// \param byte_size Byte size to assign to the buffer attributes object.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_BufferAttributesSetByteSize(
-    TRITONSERVER_BufferAttributes* buffer_attributes, size_t byte_size);
-
-/// Get the memory type id field of the buffer attributes.
-///
-/// \param buffer_attributes The buffer attributes object.
-/// \param memory_type_id Returns the memory type id associated with the buffer
-/// attributes object.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_BufferAttributesMemoryTypeId(
-    TRITONSERVER_BufferAttributes* buffer_attributes, int64_t* memory_type_id);
-
-/// Get the memory type field of the buffer attributes.
-///
-/// \param buffer_attributes The buffer attributes object.
-/// \param memory_type Returns the memory type associated with the buffer
-/// attributes object.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_BufferAttributesMemoryType(
-    TRITONSERVER_BufferAttributes* buffer_attributes,
-    TRITONSERVER_MemoryType* memory_type);
-
-/// Get the CudaIpcHandle field of the buffer attributes object.
-///
-/// \param buffer_attributes The buffer attributes object.
-/// \param cuda_ipc_handle Returns the memory type associated with the buffer
-/// attributes object. If the cudaIpcHandle does not exist for the buffer,
-/// nullptr will be returned.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_BufferAttributesCudaIpcHandle(
-    TRITONSERVER_BufferAttributes* buffer_attributes, void** cuda_ipc_handle);
-
-/// Get the byte size field of the buffer attributes.
-///
-/// \param buffer_attributes The buffer attributes object.
-/// \param byte_size Returns the byte size associated with the buffer attributes
-/// object.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_BufferAttributesByteSize(
-    TRITONSERVER_BufferAttributes* buffer_attributes, size_t* byte_size);
-
-
-/// TRITONSERVER_ServerOptions
-///
-/// Options to use when creating an inference server.
-///
-
-/// Model control modes
-typedef enum tritonserver_modelcontrolmode_enum {
-  TRITONSERVER_MODEL_CONTROL_NONE,
-  TRITONSERVER_MODEL_CONTROL_POLL,
-  TRITONSERVER_MODEL_CONTROL_EXPLICIT
-} TRITONSERVER_ModelControlMode;
-
-/// Rate limit modes
-typedef enum tritonserver_ratelimitmode_enum {
-  TRITONSERVER_RATE_LIMIT_OFF,
-  TRITONSERVER_RATE_LIMIT_EXEC_COUNT
-} TRITONSERVER_RateLimitMode;
-
-/// Create a new server options object. The caller takes ownership of
-/// the TRITONSERVER_ServerOptions object and must call
-/// TRITONSERVER_ServerOptionsDelete to release the object.
-///
-/// \param options Returns the new server options object.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerOptionsNew(
-    TRITONSERVER_ServerOptions** options);
-
-/// Delete a server options object.
-///
-/// \param options The server options object.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerOptionsDelete(
-    TRITONSERVER_ServerOptions* options);
-
-/// Set the textual ID for the server in a server options. The ID is a
-/// name that identifies the server.
-///
-/// \param options The server options object.
-/// \param server_id The server identifier.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerOptionsSetServerId(
-    TRITONSERVER_ServerOptions* options, const char* server_id);
-
-/// Set the model repository path in a server options. The path must be
-/// the full absolute path to the model repository. This function can be called
-/// multiple times with different paths to set multiple model repositories.
-/// Note that if a model is not unique across all model repositories
-/// at any time, the model will not be available.
-///
-/// \param options The server options object.
-/// \param model_repository_path The full path to the model repository.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerOptionsSetModelRepositoryPath(
-    TRITONSERVER_ServerOptions* options, const char* model_repository_path);
-
-/// Set the model control mode in a server options. For each mode the models
-/// will be managed as the following:
-///
-///   TRITONSERVER_MODEL_CONTROL_NONE: the models in model repository will be
-///   loaded on startup. After startup any changes to the model repository will
-///   be ignored. Calling TRITONSERVER_ServerPollModelRepository will result in
-///   an error.
-///
-///   TRITONSERVER_MODEL_CONTROL_POLL: the models in model repository will be
-///   loaded on startup. The model repository can be polled periodically using
-///   TRITONSERVER_ServerPollModelRepository and the server will load, unload,
-///   and updated models according to changes in the model repository.
-///
-///   TRITONSERVER_MODEL_CONTROL_EXPLICIT: the models in model repository will
-///   not be loaded on startup. The corresponding model control APIs must be
-///   called to load / unload a model in the model repository.
-///
-/// \param options The server options object.
-/// \param mode The mode to use for the model control.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerOptionsSetModelControlMode(
-    TRITONSERVER_ServerOptions* options, TRITONSERVER_ModelControlMode mode);
-
-/// Set the model to be loaded at startup in a server options. The model must be
-/// present in one, and only one, of the specified model repositories.
-/// This function can be called multiple times with different model name
-/// to set multiple startup models.
-/// Note that it only takes affect on TRITONSERVER_MODEL_CONTROL_EXPLICIT mode.
-///
-/// \param options The server options object.
-/// \param mode_name The name of the model to load on startup.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerOptionsSetStartupModel(
-    TRITONSERVER_ServerOptions* options, const char* model_name);
-
-/// Enable or disable strict model configuration handling in a server
-/// options.
-///
-/// \param options The server options object.
-/// \param strict True to enable strict model configuration handling,
-/// false to disable.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerOptionsSetStrictModelConfig(
-    TRITONSERVER_ServerOptions* options, bool strict);
-
-/// Set the rate limit mode in a server options.
-///
-///   TRITONSERVER_RATE_LIMIT_EXEC_COUNT: The rate limiting prioritizes the
-///   inference execution using the number of times each instance has got a
-///   chance to run. The execution gets to run only when its resource
-///   constraints are satisfied.
-///
-///   TRITONSERVER_RATE_LIMIT_OFF: The rate limiting is turned off and the
-///   inference gets executed whenever an instance is available.
-///
-/// \param options The server options object.
-/// \param mode The mode to use for the rate limiting. By default, execution
-/// count is used to determine the priorities.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerOptionsSetRateLimiterMode(
-    TRITONSERVER_ServerOptions* options, TRITONSERVER_RateLimitMode mode);
-
-/// Add resource count for rate limiting.
-///
-/// \param options The server options object.
-/// \param name The name of the resource.
-/// \param count The count of the resource.
-/// \param device The device identifier for the resource. A value of -1
-/// indicates that the specified number of resources are available on every
-/// device. The device value is ignored for a global resource. The server
-/// will use the rate limiter configuration specified for instance groups
-/// in model config to determine whether resource is global. In case of
-/// conflicting resource type in different model configurations, server
-/// will raise an appropriate error while loading model.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerOptionsAddRateLimiterResource(
-    TRITONSERVER_ServerOptions* options, const char* resource_name,
-    const size_t resource_count, const int device);
-
-/// Set the total pinned memory byte size that the server can allocate
-/// in a server options. The pinned memory pool will be shared across
-/// Triton itself and the backends that use
-/// TRITONBACKEND_MemoryManager to allocate memory.
-///
-/// \param options The server options object.
-/// \param size The pinned memory pool byte size.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerOptionsSetPinnedMemoryPoolByteSize(
-    TRITONSERVER_ServerOptions* options, uint64_t size);
-
-/// Set the total CUDA memory byte size that the server can allocate
-/// on given GPU device in a server options. The pinned memory pool
-/// will be shared across Triton itself and the backends that use
-/// TRITONBACKEND_MemoryManager to allocate memory.
-///
-/// \param options The server options object.
-/// \param gpu_device The GPU device to allocate the memory pool.
-/// \param size The CUDA memory pool byte size.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerOptionsSetCudaMemoryPoolByteSize(
-    TRITONSERVER_ServerOptions* options, int gpu_device, uint64_t size);
-
-/// Set the total response cache byte size that the server can allocate in CPU
-/// memory. The response cache will be shared across all inference requests and
-/// across all models.
-///
-/// \param options The server options object.
-/// \param size The total response cache byte size.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerOptionsSetResponseCacheByteSize(
-    TRITONSERVER_ServerOptions* options, uint64_t size);
-
-/// Set the minimum support CUDA compute capability in a server
-/// options.
-///
-/// \param options The server options object.
-/// \param cc The minimum CUDA compute capability.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerOptionsSetMinSupportedComputeCapability(
-    TRITONSERVER_ServerOptions* options, double cc);
-
-/// Enable or disable exit-on-error in a server options.
-///
-/// \param options The server options object.
-/// \param exit True to enable exiting on intialization error, false
-/// to continue.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerOptionsSetExitOnError(
-    TRITONSERVER_ServerOptions* options, bool exit);
-
-/// Enable or disable strict readiness handling in a server options.
-///
-/// \param options The server options object.
-/// \param strict True to enable strict readiness handling, false to
-/// disable.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerOptionsSetStrictReadiness(
-    TRITONSERVER_ServerOptions* options, bool strict);
-
-/// Set the exit timeout, in seconds, for the server in a server
-/// options.
-///
-/// \param options The server options object.
-/// \param timeout The exit timeout, in seconds.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerOptionsSetExitTimeout(
-    TRITONSERVER_ServerOptions* options, unsigned int timeout);
-
-/// Set the number of threads used in buffer manager in a server options.
-///
-/// \param options The server options object.
-/// \param thread_count The number of threads.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerOptionsSetBufferManagerThreadCount(
-    TRITONSERVER_ServerOptions* options, unsigned int thread_count);
-
-/// Set the number of threads to concurrently load models in a server options.
-///
-/// \param options The server options object.
-/// \param thread_count The number of threads.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerOptionsSetModelLoadThreadCount(
-    TRITONSERVER_ServerOptions* options, unsigned int thread_count);
-
-/// Provide a log output file.
-///
-/// \param options The server options object.
-/// \param file a string defining the file where the log outputs will be saved.
-/// An empty string for the file name will cause triton to direct logging
-/// facilities to the console
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerOptionsSetLogFile(
-    TRITONSERVER_ServerOptions* options, const char* file);
-
-/// Enable or disable info level logging.
-///
-/// \param options The server options object.
-/// \param log True to enable info logging, false to disable.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerOptionsSetLogInfo(
-    TRITONSERVER_ServerOptions* options, bool log);
-
-/// Enable or disable warning level logging.
-///
-/// \param options The server options object.
-/// \param log True to enable warning logging, false to disable.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerOptionsSetLogWarn(
-    TRITONSERVER_ServerOptions* options, bool log);
-
-/// Enable or disable error level logging.
-///
-/// \param options The server options object.
-/// \param log True to enable error logging, false to disable.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerOptionsSetLogError(
-    TRITONSERVER_ServerOptions* options, bool log);
-
-/// Set the logging format.
-///
-/// \param options The server options object.
-/// \param format The logging format.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerOptionsSetLogFormat(
-    TRITONSERVER_ServerOptions* options, const TRITONSERVER_LogFormat format);
-
-/// Set verbose logging level. Level zero disables verbose logging.
-///
-/// \param options The server options object.
-/// \param level The verbose logging level.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerOptionsSetLogVerbose(
-    TRITONSERVER_ServerOptions* options, int level);
-
-/// Enable or disable metrics collection in a server options.
-///
-/// \param options The server options object.
-/// \param metrics True to enable metrics, false to disable.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerOptionsSetMetrics(
-    TRITONSERVER_ServerOptions* options, bool metrics);
-
-/// Enable or disable GPU metrics collection in a server options. GPU
-/// metrics are collected if both this option and
-/// TRITONSERVER_ServerOptionsSetMetrics are true.
-///
-/// \param options The server options object.
-/// \param gpu_metrics True to enable GPU metrics, false to disable.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerOptionsSetGpuMetrics(
-    TRITONSERVER_ServerOptions* options, bool gpu_metrics);
-
-/// Enable or disable CPU metrics collection in a server options. CPU
-/// metrics are collected if both this option and
-/// TRITONSERVER_ServerOptionsSetMetrics are true.
-///
-/// \param options The server options object.
-/// \param cpu_metrics True to enable CPU metrics, false to disable.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerOptionsSetCpuMetrics(
-    TRITONSERVER_ServerOptions* options, bool cpu_metrics);
-
-/// Set the interval for metrics collection in a server options.
-/// This is 2000 milliseconds by default.
-///
-/// \param options The server options object.
-/// \param metrics_interval_ms The time interval in ms between
-/// successive metrics updates.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerOptionsSetMetricsInterval(
-    TRITONSERVER_ServerOptions* options, uint64_t metrics_interval_ms);
-
-/// Set the directory containing backend shared libraries. This
-/// directory is searched last after the version and model directory
-/// in the model repository when looking for the backend shared
-/// library for a model. If the backend is named 'be' the directory
-/// searched is 'backend_dir'/be/libtriton_be.so.
-///
-/// \param options The server options object.
-/// \param backend_dir The full path of the backend directory.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerOptionsSetBackendDirectory(
-    TRITONSERVER_ServerOptions* options, const char* backend_dir);
-
-/// Set the directory containing repository agent shared libraries. This
-/// directory is searched when looking for the repository agent shared
-/// library for a model. If the backend is named 'ra' the directory
-/// searched is 'repoagent_dir'/ra/libtritonrepoagent_ra.so.
-///
-/// \param options The server options object.
-/// \param repoagent_dir The full path of the repository agent directory.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerOptionsSetRepoAgentDirectory(
-    TRITONSERVER_ServerOptions* options, const char* repoagent_dir);
-
-/// Specify the limit on memory usage as a fraction on the device identified by
-/// 'kind' and 'device_id'. If model loading on the device is requested and the
-/// current memory usage exceeds the limit, the load will be rejected. If not
-/// specified, the limit will not be set.
-///
-/// Currently support TRITONSERVER_INSTANCEGROUPKIND_GPU
-///
-/// \param options The server options object.
-/// \param kind The kind of the device.
-/// \param device_id The id of the device.
-/// \param fraction The limit on memory usage as a fraction
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerOptionsSetModelLoadDeviceLimit(
-    TRITONSERVER_ServerOptions* options,
-    const TRITONSERVER_InstanceGroupKind kind, const int device_id,
-    const double fraction);
-
-/// Set a configuration setting for a named backend in a server
-/// options.
-///
-/// \param options The server options object.
-/// \param backend_name The name of the backend.
-/// \param setting The name of the setting.
-/// \param value The setting value.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerOptionsSetBackendConfig(
-    TRITONSERVER_ServerOptions* options, const char* backend_name,
-    const char* setting, const char* value);
-
-/// Set a host policy setting for a given policy name in a server options.
-///
-/// \param options The server options object.
-/// \param policy_name The name of the policy.
-/// \param setting The name of the setting.
-/// \param value The setting value.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerOptionsSetHostPolicy(
-    TRITONSERVER_ServerOptions* options, const char* policy_name,
-    const char* setting, const char* value);
-
-/// TRITONSERVER_Server
-///
-/// An inference server.
-///
-
-/// Model batch flags. The enum values must be power-of-2 values.
-typedef enum tritonserver_batchflag_enum {
-  TRITONSERVER_BATCH_UNKNOWN = 1,
-  TRITONSERVER_BATCH_FIRST_DIM = 2
-} TRITONSERVER_ModelBatchFlag;
-
-/// Model index flags. The enum values must be power-of-2 values.
-typedef enum tritonserver_modelindexflag_enum {
-  TRITONSERVER_INDEX_FLAG_READY = 1
-} TRITONSERVER_ModelIndexFlag;
-
-/// Model transaction policy flags. The enum values must be
-/// power-of-2 values.
-typedef enum tritonserver_txn_property_flag_enum {
-  TRITONSERVER_TXN_ONE_TO_ONE = 1,
-  TRITONSERVER_TXN_DECOUPLED = 2
-} TRITONSERVER_ModelTxnPropertyFlag;
-
-/// Create a new server object. The caller takes ownership of the
-/// TRITONSERVER_Server object and must call TRITONSERVER_ServerDelete
-/// to release the object.
-///
-/// \param server Returns the new inference server object.
-/// \param options The inference server options object.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerNew(
-    TRITONSERVER_Server** server, TRITONSERVER_ServerOptions* options);
-
-/// Delete a server object. If server is not already stopped it is
-/// stopped before being deleted.
-///
-/// \param server The inference server object.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerDelete(
-    TRITONSERVER_Server* server);
-
-/// Stop a server object. A server can't be restarted once it is
-/// stopped.
-///
-/// \param server The inference server object.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerStop(
-    TRITONSERVER_Server* server);
-
-/// Register a new model repository. Not available in polling mode.
-///
-/// \param server The inference server object.
-/// \param repository_path The full path to the model repository.
-/// \param name_mapping List of name_mapping parameters. Each mapping has
-/// the model directory name as its key, overriden model name as its value.
-/// \param model_count Number of mappings provided.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerRegisterModelRepository(
-    TRITONSERVER_Server* server, const char* repository_path,
-    const TRITONSERVER_Parameter** name_mapping, const uint32_t mapping_count);
-
-/// Unregister a model repository. Not available in polling mode.
-///
-/// \param server The inference server object.
-/// \param repository_path The full path to the model repository.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerUnregisterModelRepository(
-    TRITONSERVER_Server* server, const char* repository_path);
-
-/// Check the model repository for changes and update server state
-/// based on those changes.
-///
-/// \param server The inference server object.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerPollModelRepository(TRITONSERVER_Server* server);
-
-/// Is the server live?
-///
-/// \param server The inference server object.
-/// \param live Returns true if server is live, false otherwise.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerIsLive(
-    TRITONSERVER_Server* server, bool* live);
-
-/// Is the server ready?
-///
-/// \param server The inference server object.
-/// \param ready Returns true if server is ready, false otherwise.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerIsReady(
-    TRITONSERVER_Server* server, bool* ready);
-
-/// Is the model ready?
-///
-/// \param server The inference server object.
-/// \param model_name The name of the model to get readiness for.
-/// \param model_version The version of the model to get readiness
-/// for.  If -1 then the server will choose a version based on the
-/// model's policy.
-/// \param ready Returns true if server is ready, false otherwise.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerModelIsReady(
-    TRITONSERVER_Server* server, const char* model_name,
-    const int64_t model_version, bool* ready);
-
-/// Get the batch properties of the model. The properties are
-/// communicated by a flags value and an (optional) object returned by
-/// 'voidp'.
-///
-///   - TRITONSERVER_BATCH_UNKNOWN: Triton cannot determine the
-///     batching properties of the model. This means that the model
-///     does not support batching in any way that is useable by
-///     Triton. The returned 'voidp' value is nullptr.
-///
-///   - TRITONSERVER_BATCH_FIRST_DIM: The model supports batching
-///     along the first dimension of every input and output
-///     tensor. Triton schedulers that perform batching can
-///     automatically batch inference requests along this dimension.
-///     The returned 'voidp' value is nullptr.
-///
-/// \param server The inference server object.
-/// \param model_name The name of the model.
-/// \param model_version The version of the model.  If -1 then the
-/// server will choose a version based on the model's policy.
-/// \param flags Returns flags indicating the batch properties of the
-/// model.
-/// \param voidp If non-nullptr, returns a point specific to the
-/// 'flags' value.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerModelBatchProperties(
-    TRITONSERVER_Server* server, const char* model_name,
-    const int64_t model_version, uint32_t* flags, void** voidp);
-
-/// Get the transaction policy of the model. The policy is
-/// communicated by a flags value.
-///
-///   - TRITONSERVER_TXN_ONE_TO_ONE: The model generates exactly
-///     one response per request.
-///
-///   - TRITONSERVER_TXN_DECOUPLED: The model may generate zero
-///     to many responses per request.
-///
-/// \param server The inference server object.
-/// \param model_name The name of the model.
-/// \param model_version The version of the model.  If -1 then the
-/// server will choose a version based on the model's policy.
-/// \param txn_flags Returns flags indicating the transaction policy of the
-/// model.
-/// \param voidp If non-nullptr, returns a point specific to the 'flags' value.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerModelTransactionProperties(
-    TRITONSERVER_Server* server, const char* model_name,
-    const int64_t model_version, uint32_t* txn_flags, void** voidp);
-
-/// Get the metadata of the server as a TRITONSERVER_Message object.
-/// The caller takes ownership of the message object and must call
-/// TRITONSERVER_MessageDelete to release the object.
-///
-/// \param server The inference server object.
-/// \param server_metadata Returns the server metadata message.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerMetadata(
-    TRITONSERVER_Server* server, TRITONSERVER_Message** server_metadata);
-
-/// Get the metadata of a model as a TRITONSERVER_Message
-/// object.  The caller takes ownership of the message object and must
-/// call TRITONSERVER_MessageDelete to release the object.
-///
-/// \param server The inference server object.
-/// \param model_name The name of the model.
-/// \param model_version The version of the model.
-/// If -1 then the server will choose a version based on the model's
-/// policy.
-/// \param model_metadata Returns the model metadata message.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerModelMetadata(
-    TRITONSERVER_Server* server, const char* model_name,
-    const int64_t model_version, TRITONSERVER_Message** model_metadata);
-
-/// Get the statistics of a model as a TRITONSERVER_Message
-/// object. The caller takes ownership of the object and must call
-/// TRITONSERVER_MessageDelete to release the object.
-///
-/// \param server The inference server object.
-/// \param model_name The name of the model.
-/// If empty, then statistics for all available models will be returned,
-/// and the server will choose a version based on those models' policies.
-/// \param model_version The version of the model.  If -1 then the
-/// server will choose a version based on the model's policy.
-/// \param model_stats Returns the model statistics message.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerModelStatistics(
-    TRITONSERVER_Server* server, const char* model_name,
-    const int64_t model_version, TRITONSERVER_Message** model_stats);
-
-/// Get the configuration of a model as a TRITONSERVER_Message object.
-/// The caller takes ownership of the message object and must call
-/// TRITONSERVER_MessageDelete to release the object.
-///
-/// \param server The inference server object.
-/// \param model_name The name of the model.
-/// \param model_version The version of the model.  If -1 then the
-/// server will choose a version based on the model's policy.
-/// \param config_version The model configuration will be returned in
-/// a format matching this version. If the configuration cannot be
-/// represented in the requested version's format then an error will
-/// be returned. Currently only version 1 is supported.
-/// \param model_config Returns the model config message.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerModelConfig(
-    TRITONSERVER_Server* server, const char* model_name,
-    const int64_t model_version, const uint32_t config_version,
-    TRITONSERVER_Message** model_config);
-
-/// Get the index of all unique models in the model repositories as a
-/// TRITONSERVER_Message object. The caller takes ownership of the
-/// message object and must call TRITONSERVER_MessageDelete to release
-/// the object.
-///
-/// If TRITONSERVER_INDEX_FLAG_READY is set in 'flags' only the models
-/// that are loaded into the server and ready for inferencing are
-/// returned.
-///
-/// \param server The inference server object.
-/// \param flags TRITONSERVER_ModelIndexFlag flags that control how to
-/// collect the index.
-/// \param model_index Return the model index message that holds the
-/// index of all models contained in the server's model repository(s).
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerModelIndex(
-    TRITONSERVER_Server* server, uint32_t flags,
-    TRITONSERVER_Message** model_index);
-
-/// Load the requested model or reload the model if it is already
-/// loaded. The function does not return until the model is loaded or
-/// fails to load. Returned error indicates if model loaded
-/// successfully or not.
-///
-/// \param server The inference server object.
-/// \param model_name The name of the model.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerLoadModel(
-    TRITONSERVER_Server* server, const char* model_name);
-
-/// Load the requested model or reload the model if it is already
-/// loaded, with load parameters provided. The function does not return until
-/// the model is loaded or fails to load. Returned error indicates if model
-/// loaded successfully or not.
-/// Currently the below parameter names are recognized:
-/// - "config" : string parameter that contains a JSON representation of the
-/// model configuration. This config will be used for loading the model instead
-/// of the one in the model directory.
-///
-/// \param server The inference server object.
-/// \param model_name The name of the model.
-/// \param parameters The array of load parameters.
-/// \param parameter_count The number of parameters.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerLoadModelWithParameters(
-    TRITONSERVER_Server* server, const char* model_name,
-    const TRITONSERVER_Parameter** parameters, const uint64_t parameter_count);
-
-/// Unload the requested model. Unloading a model that is not loaded
-/// on server has no affect and success code will be returned.
-/// The function does not wait for the requested model to be fully unload
-/// and success code will be returned.
-/// Returned error indicates if model unloaded successfully or not.
-///
-/// \param server The inference server object.
-/// \param model_name The name of the model.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerUnloadModel(
-    TRITONSERVER_Server* server, const char* model_name);
-
-/// Unload the requested model, and also unload any dependent model that
-/// was loaded along with the requested model (for example, the models composing
-/// an ensemble). Unloading a model that is not loaded
-/// on server has no affect and success code will be returned.
-/// The function does not wait for the requested model and all dependent
-/// models to be fully unload and success code will be returned.
-/// Returned error indicates if model unloaded successfully or not.
-///
-/// \param server The inference server object.
-/// \param model_name The name of the model.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerUnloadModelAndDependents(
-    TRITONSERVER_Server* server, const char* model_name);
-
-/// Get the current metrics for the server. The caller takes ownership
-/// of the metrics object and must call TRITONSERVER_MetricsDelete to
-/// release the object.
-///
-/// \param server The inference server object.
-/// \param metrics Returns the metrics.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerMetrics(
-    TRITONSERVER_Server* server, TRITONSERVER_Metrics** metrics);
-
-/// Perform inference using the meta-data and inputs supplied by the
-/// 'inference_request'. If the function returns success, then the
-/// caller releases ownership of 'inference_request' and must not
-/// access it in any way after this call, until ownership is returned
-/// via the 'request_release_fn' callback registered in the request
-/// object with TRITONSERVER_InferenceRequestSetReleaseCallback.
-///
-/// The function unconditionally takes ownership of 'trace' and so the
-/// caller must not access it in any way after this call (except in
-/// the trace activity callbacks) until ownership is returned via the
-/// trace's release_fn callback.
-///
-/// Responses produced for this request are returned using the
-/// allocator and callback registered with the request by
-/// TRITONSERVER_InferenceRequestSetResponseCallback.
-///
-/// \param server The inference server object.
-/// \param inference_request The request object.
-/// \param trace The trace object for this request, or nullptr if no
-/// tracing.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerInferAsync(
-    TRITONSERVER_Server* server,
-    TRITONSERVER_InferenceRequest* inference_request,
-    TRITONSERVER_InferenceTrace* trace);
-
-/// TRITONSERVER_MetricKind
-///
-/// Types of metrics recognized by TRITONSERVER.
-///
-typedef enum TRITONSERVER_metrickind_enum {
-  TRITONSERVER_METRIC_KIND_COUNTER,
-  TRITONSERVER_METRIC_KIND_GAUGE
-} TRITONSERVER_MetricKind;
-
-/// Create a new metric family object. The caller takes ownership of the
-/// TRITONSERVER_MetricFamily object and must call
-/// TRITONSERVER_MetricFamilyDelete to release the object.
-///
-/// \param family Returns the new metric family object.
-/// \param kind The type of metric family to create.
-/// \param name The name of the metric family seen when calling the metrics
-/// endpoint.
-/// \param description The description of the metric family seen when
-/// calling the metrics endpoint.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_MetricFamilyNew(
-    TRITONSERVER_MetricFamily** family, const TRITONSERVER_MetricKind kind,
-    const char* name, const char* description);
-
-/// Delete a metric family object.
-/// A TRITONSERVER_MetricFamily* object should be deleted AFTER its
-/// corresponding TRITONSERVER_Metric* objects have been deleted.
-/// Attempting to delete a family before its metrics will return an error.
-///
-/// \param family The metric family object to delete.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_MetricFamilyDelete(
-    TRITONSERVER_MetricFamily* family);
-
-/// Create a new metric object. The caller takes ownership of the
-/// TRITONSERVER_Metric object and must call
-/// TRITONSERVER_MetricDelete to release the object. The caller is also
-/// responsible for ownership of the labels passed in. Each label can be deleted
-/// immediately after creating the metric with TRITONSERVER_ParameterDelete
-/// if not re-using the labels.
-///
-/// \param metric Returns the new metric object.
-/// \param family The metric family to add this new metric to.
-/// \param labels The array of labels to associate with this new metric.
-/// \param label_count The number of labels.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_MetricNew(
-    TRITONSERVER_Metric** metric, TRITONSERVER_MetricFamily* family,
-    const TRITONSERVER_Parameter** labels, const uint64_t label_count);
-
-/// Delete a metric object.
-/// All TRITONSERVER_Metric* objects should be deleted BEFORE their
-/// corresponding TRITONSERVER_MetricFamily* objects have been deleted.
-/// If a family is deleted before its metrics, an error will be returned.
-///
-/// \param metric The metric object to delete.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_MetricDelete(
-    TRITONSERVER_Metric* metric);
-
-/// Get the current value of a metric object.
-/// Supports metrics of kind TRITONSERVER_METRIC_KIND_COUNTER
-/// and TRITONSERVER_METRIC_KIND_GAUGE, and returns
-/// TRITONSERVER_ERROR_UNSUPPORTED for unsupported TRITONSERVER_MetricKind.
-///
-/// \param metric The metric object to query.
-/// \param value Returns the current value of the metric object.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_MetricValue(
-    TRITONSERVER_Metric* metric, double* value);
-
-/// Increment the current value of metric by value.
-/// Supports metrics of kind TRITONSERVER_METRIC_KIND_GAUGE for any value,
-/// and TRITONSERVER_METRIC_KIND_COUNTER for non-negative values. Returns
-/// TRITONSERVER_ERROR_UNSUPPORTED for unsupported TRITONSERVER_MetricKind
-/// and TRITONSERVER_ERROR_INVALID_ARG for negative values on a
-/// TRITONSERVER_METRIC_KIND_COUNTER metric.
-///
-/// \param metric The metric object to update.
-/// \param value The amount to increment the metric's value by.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_MetricIncrement(
-    TRITONSERVER_Metric* metric, double value);
-
-/// Set the current value of metric to value.
-/// Supports metrics of kind TRITONSERVER_METRIC_KIND_GAUGE and returns
-/// TRITONSERVER_ERROR_UNSUPPORTED for unsupported TRITONSERVER_MetricKind.
-///
-/// \param metric The metric object to update.
-/// \param value The amount to set metric's value to.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_MetricSet(
-    TRITONSERVER_Metric* metric, double value);
-
-/// Get the TRITONSERVER_MetricKind of metric and its corresponding family.
-///
-/// \param metric The metric object to query.
-/// \param kind Returns the TRITONSERVER_MetricKind of metric.
-/// \return a TRITONSERVER_Error indicating success or failure.
-TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_GetMetricKind(
-    TRITONSERVER_Metric* metric, TRITONSERVER_MetricKind* kind);
-
-#ifdef __cplusplus
-}
-#endif
diff --git a/3rdparty/core-r22.12/src/backend_config.cc b/3rdparty/core-r22.12/src/backend_config.cc
deleted file mode 100644
index 367475fb0fff2925d6fdedeb924bef8a3ded779f..0000000000000000000000000000000000000000
--- a/3rdparty/core-r22.12/src/backend_config.cc
+++ /dev/null
@@ -1,225 +0,0 @@
-// Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "backend_config.h"
-
-#include "status.h"
-#include "triton/common/logging.h"
-#include "triton/common/model_config.h"
-
-namespace triton { namespace core {
-
-namespace {
-
-Status
-GetTFSpecializedBackendName(
-    const triton::common::BackendCmdlineConfigMap& config_map,
-    std::string* specialized_name)
-{
-  std::string tf_version_str = "2";
-  const auto& itr = config_map.find("tensorflow");
-  if (itr != config_map.end()) {
-    if (BackendConfiguration(itr->second, "version", &tf_version_str).IsOk()) {
-      if ((tf_version_str != "1") && (tf_version_str != "2")) {
-        return Status(
-            Status::Code::INVALID_ARG,
-            "unexpected TensorFlow library version '" + tf_version_str +
-                "', expects 1 or 2.");
-      }
-    }
-  }
-
-  *specialized_name += tf_version_str;
-
-  return Status::Success;
-}
-}  // namespace
-
-Status
-BackendConfiguration(
-    const triton::common::BackendCmdlineConfig& config, const std::string& key,
-    std::string* val)
-{
-  for (const auto& pr : config) {
-    if (pr.first == key) {
-      *val = pr.second;
-      return Status::Success;
-    }
-  }
-
-  return Status(
-      Status::Code::INTERNAL,
-      std::string("unable to find common backend configuration for '") + key +
-          "'");
-}
-
-Status
-BackendConfigurationParseStringToDouble(const std::string& str, double* val)
-{
-  try {
-    *val = std::stod(str);
-  }
-  catch (...) {
-    return Status(
-        Status::Code::INTERNAL,
-        "unable to parse common backend configuration as double");
-  }
-
-  return Status::Success;
-}
-
-Status
-BackendConfigurationParseStringToBool(const std::string& str, bool* val)
-{
-  try {
-    std::string lowercase_str{str};
-    std::transform(
-        lowercase_str.begin(), lowercase_str.end(), lowercase_str.begin(),
-        [](unsigned char c) { return std::tolower(c); });
-    *val = (lowercase_str == "true");
-  }
-  catch (...) {
-    return Status(
-        Status::Code::INTERNAL,
-        "unable to parse common backend configuration as bool");
-  }
-
-  return Status::Success;
-}
-
-Status
-BackendConfigurationGlobalBackendsDirectory(
-    const triton::common::BackendCmdlineConfigMap& config_map, std::string* dir)
-{
-  const auto& itr = config_map.find(std::string());
-  if (itr == config_map.end()) {
-    return Status(
-        Status::Code::INTERNAL,
-        "unable to find global backends directory configuration");
-  }
-
-  RETURN_IF_ERROR(BackendConfiguration(itr->second, "backend-directory", dir));
-
-  return Status::Success;
-}
-
-Status
-BackendConfigurationMinComputeCapability(
-    const triton::common::BackendCmdlineConfigMap& config_map, double* mcc)
-{
-#ifdef TRITON_ENABLE_GPU
-  *mcc = TRITON_MIN_COMPUTE_CAPABILITY;
-#else
-  *mcc = 0;
-#endif  // TRITON_ENABLE_GPU
-
-  const auto& itr = config_map.find(std::string());
-  if (itr == config_map.end()) {
-    return Status(
-        Status::Code::INTERNAL, "unable to find common backend configuration");
-  }
-
-  std::string min_compute_capability_str;
-  RETURN_IF_ERROR(BackendConfiguration(
-      itr->second, "min-compute-capability", &min_compute_capability_str));
-  RETURN_IF_ERROR(
-      BackendConfigurationParseStringToDouble(min_compute_capability_str, mcc));
-
-  return Status::Success;
-}
-
-Status
-BackendConfigurationAutoCompleteConfig(
-    const triton::common::BackendCmdlineConfigMap& config_map, bool* acc)
-{
-  const auto& itr = config_map.find(std::string());
-  if (itr == config_map.end()) {
-    return Status(
-        Status::Code::INTERNAL, "unable to find auto-complete configuration");
-  }
-
-  std::string auto_complete_config_str;
-  RETURN_IF_ERROR(BackendConfiguration(
-      itr->second, "auto-complete-config", &auto_complete_config_str));
-  RETURN_IF_ERROR(
-      BackendConfigurationParseStringToBool(auto_complete_config_str, acc));
-
-  return Status::Success;
-}
-
-Status
-BackendConfigurationSpecializeBackendName(
-    const triton::common::BackendCmdlineConfigMap& config_map,
-    const std::string& backend_name, std::string* specialized_name)
-{
-  *specialized_name = backend_name;
-  if (backend_name == "tensorflow") {
-    RETURN_IF_ERROR(GetTFSpecializedBackendName(config_map, specialized_name));
-  }
-
-  return Status::Success;
-}
-
-Status
-BackendConfigurationBackendLibraryName(
-    const std::string& backend_name, std::string* libname)
-{
-#ifdef _WIN32
-  *libname = "triton_" + backend_name + ".dll";
-#else
-  *libname = "libtriton_" + backend_name + ".so";
-#endif
-
-  return Status::Success;
-}
-
-Status
-BackendConfigurationModelLoadGpuFraction(
-    const triton::common::BackendCmdlineConfigMap& config_map,
-    const int device_id, double* memory_limit)
-{
-  *memory_limit = 1.0;
-  const auto& itr = config_map.find(std::string());
-  if (itr == config_map.end()) {
-    return Status(
-        Status::Code::INTERNAL,
-        "unable to find global backends directory configuration");
-  }
-
-  static std::string key_prefix = "model-load-gpu-limit-device-";
-  std::string memory_limit_str;
-  auto status = BackendConfiguration(
-      itr->second, key_prefix + std::to_string(device_id), &memory_limit_str);
-  // Allow missing key, default to 1.0 (no limit) if the limit is not specified
-  if (status.IsOk()) {
-    RETURN_IF_ERROR(BackendConfigurationParseStringToDouble(
-        memory_limit_str, memory_limit));
-  }
-
-  return Status::Success;
-}
-
-}}  // namespace triton::core
diff --git a/3rdparty/core-r22.12/src/backend_config.h b/3rdparty/core-r22.12/src/backend_config.h
deleted file mode 100644
index acd2a0c214030eb0b3b0708c69cc98b44344d135..0000000000000000000000000000000000000000
--- a/3rdparty/core-r22.12/src/backend_config.h
+++ /dev/null
@@ -1,77 +0,0 @@
-// Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include "status.h"
-#include "triton/common/model_config.h"
-
-namespace triton { namespace core {
-
-/// Get a key's string value from a backend configuration.
-Status BackendConfiguration(
-    const triton::common::BackendCmdlineConfig& config, const std::string& key,
-    std::string* val);
-
-/// Convert a backend configuration string  value into a double.
-Status BackendConfigurationParseStringToDouble(
-    const std::string& str, double* val);
-
-/// Convert a backend configuration string  value into a bool.
-Status BackendConfigurationParseStringToBool(const std::string& str, bool* val);
-
-/// Get the global backends directory from the backend configuration.
-Status BackendConfigurationGlobalBackendsDirectory(
-    const triton::common::BackendCmdlineConfigMap& config_map,
-    std::string* dir);
-
-/// Get the minimum compute capability from the backend configuration.
-Status BackendConfigurationMinComputeCapability(
-    const triton::common::BackendCmdlineConfigMap& config_map, double* mcc);
-
-/// Get the model configuration auto-complete setting from the backend
-/// configuration.
-Status BackendConfigurationAutoCompleteConfig(
-    const triton::common::BackendCmdlineConfigMap& config_map, bool* acc);
-
-/// Convert a backend name to the specialized version of that name
-/// based on the backend configuration. For example, "tensorflow" will
-/// convert to either "tensorflow1" or "tensorflow2" depending on how
-/// tritonserver is run.
-Status BackendConfigurationSpecializeBackendName(
-    const triton::common::BackendCmdlineConfigMap& config_map,
-    const std::string& backend_name, std::string* specialized_name);
-
-/// Return the shared library name for a backend.
-Status BackendConfigurationBackendLibraryName(
-    const std::string& backend_name, std::string* libname);
-
-/// Get GPU memory limit fraction for model loading
-/// from the backend configuration.
-Status BackendConfigurationModelLoadGpuFraction(
-    const triton::common::BackendCmdlineConfigMap& config_map,
-    const int device_id, double* memory_limit);
-
-}}  // namespace triton::core
diff --git a/3rdparty/core-r22.12/src/backend_manager.cc b/3rdparty/core-r22.12/src/backend_manager.cc
deleted file mode 100644
index 265202b241703c7e58aabc0534c7d9ef5fdb2b1f..0000000000000000000000000000000000000000
--- a/3rdparty/core-r22.12/src/backend_manager.cc
+++ /dev/null
@@ -1,383 +0,0 @@
-// Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "backend_manager.h"
-
-#include "backend_memory_manager.h"
-#include "server_message.h"
-#include "shared_library.h"
-#include "triton/common/logging.h"
-
-// For unknown reason, windows will not export the TRITONBACKEND_*
-// functions declared with dllexport in tritonbackend.h. To get those
-// functions exported it is (also?) necessary to mark the definitions
-// in this file with dllexport as well.
-#if defined(_MSC_VER)
-#define TRITONAPI_DECLSPEC __declspec(dllexport)
-#elif defined(__GNUC__)
-#define TRITONAPI_DECLSPEC __attribute__((__visibility__("default")))
-#else
-#define TRITONAPI_DECLSPEC
-#endif
-
-namespace triton { namespace core {
-
-//
-// TritonBackend
-//
-Status
-TritonBackend::Create(
-    const std::string& name, const std::string& dir, const std::string& libpath,
-    const triton::common::BackendCmdlineConfig& backend_cmdline_config,
-    std::shared_ptr<TritonBackend>* backend)
-{
-  // Create the JSON representation of the backend configuration.
-  triton::common::TritonJson::Value backend_config_json(
-      triton::common::TritonJson::ValueType::OBJECT);
-  if (!backend_cmdline_config.empty()) {
-    triton::common::TritonJson::Value cmdline_json(
-        backend_config_json, triton::common::TritonJson::ValueType::OBJECT);
-    for (const auto& pr : backend_cmdline_config) {
-      RETURN_IF_ERROR(cmdline_json.AddString(pr.first.c_str(), pr.second));
-    }
-
-    RETURN_IF_ERROR(
-        backend_config_json.Add("cmdline", std::move(cmdline_json)));
-  }
-
-  TritonServerMessage backend_config(backend_config_json);
-
-  auto local_backend = std::shared_ptr<TritonBackend>(
-      new TritonBackend(name, dir, libpath, backend_config));
-
-  // Load the library and initialize all the entrypoints
-  RETURN_IF_ERROR(local_backend->LoadBackendLibrary());
-
-  // Backend initialization is optional... The TRITONBACKEND_Backend
-  // object is this TritonBackend object. We must set set shared
-  // library path to point to the backend directory in case the
-  // backend library attempts to load additional shared libaries.
-  if (local_backend->backend_init_fn_ != nullptr) {
-    std::unique_ptr<SharedLibrary> slib;
-    RETURN_IF_ERROR(SharedLibrary::Acquire(&slib));
-    RETURN_IF_ERROR(slib->SetLibraryDirectory(local_backend->dir_));
-
-    TRITONSERVER_Error* err = local_backend->backend_init_fn_(
-        reinterpret_cast<TRITONBACKEND_Backend*>(local_backend.get()));
-
-    RETURN_IF_ERROR(slib->ResetLibraryDirectory());
-    RETURN_IF_TRITONSERVER_ERROR(err);
-  }
-
-  local_backend->UpdateAttributes();
-
-  *backend = std::move(local_backend);
-  return Status::Success;
-}
-
-Status
-TritonBackend::UpdateAttributes()
-{
-  if (backend_attri_fn_ == nullptr) {
-    return Status::Success;
-  }
-
-  // Create an Attribute object for the backend to fill, note that it copies
-  // some fields from 'attributes_' while the others use default value. This
-  // is an ad hoc way to determine whether the attribute is set by the backend
-  // and keep / update current value.
-  Attribute latest;
-  latest.exec_policy_ = attributes_.exec_policy_;
-  RETURN_IF_TRITONSERVER_ERROR(backend_attri_fn_(
-      reinterpret_cast<TRITONBACKEND_Backend*>(this),
-      reinterpret_cast<TRITONBACKEND_BackendAttribute*>(&latest)));
-
-  // Update attributes that were set
-  attributes_.exec_policy_ = latest.exec_policy_;
-  if (!latest.preferred_groups_.empty()) {
-    attributes_.preferred_groups_ = latest.preferred_groups_;
-  }
-  return Status::Success;
-}
-
-TritonBackend::TritonBackend(
-    const std::string& name, const std::string& dir, const std::string& libpath,
-    const TritonServerMessage& backend_config)
-    : name_(name), dir_(dir), libpath_(libpath),
-      backend_config_(backend_config), state_(nullptr)
-{
-  ClearHandles();
-}
-
-TritonBackend::~TritonBackend()
-{
-  LOG_VERBOSE(1) << "unloading backend '" << name_ << "'";
-
-  // Backend finalization is optional... The TRITONBACKEND_Backend
-  // object is this TritonBackend object.
-  if (backend_fini_fn_ != nullptr) {
-    LOG_TRITONSERVER_ERROR(
-        backend_fini_fn_(reinterpret_cast<TRITONBACKEND_Backend*>(this)),
-        "failed finalizing backend");
-  }
-
-  ClearHandles();
-}
-
-void
-TritonBackend::ClearHandles()
-{
-  dlhandle_ = nullptr;
-  backend_init_fn_ = nullptr;
-  backend_fini_fn_ = nullptr;
-  backend_attri_fn_ = nullptr;
-  model_init_fn_ = nullptr;
-  model_fini_fn_ = nullptr;
-  inst_init_fn_ = nullptr;
-  inst_fini_fn_ = nullptr;
-  inst_exec_fn_ = nullptr;
-}
-
-Status
-TritonBackend::LoadBackendLibrary()
-{
-  TritonBackendInitFn_t bifn;
-  TritonBackendFiniFn_t bffn;
-  TritonBackendAttriFn_t bafn;
-  TritonModelInitFn_t mifn;
-  TritonModelFiniFn_t mffn;
-  TritonModelInstanceInitFn_t iifn;
-  TritonModelInstanceFiniFn_t iffn;
-  TritonModelInstanceExecFn_t iefn;
-
-  {
-    std::unique_ptr<SharedLibrary> slib;
-    RETURN_IF_ERROR(SharedLibrary::Acquire(&slib));
-
-    RETURN_IF_ERROR(slib->OpenLibraryHandle(libpath_, &dlhandle_));
-
-    // Backend initialize and finalize functions, optional
-    RETURN_IF_ERROR(slib->GetEntrypoint(
-        dlhandle_, "TRITONBACKEND_Initialize", true /* optional */,
-        reinterpret_cast<void**>(&bifn)));
-    RETURN_IF_ERROR(slib->GetEntrypoint(
-        dlhandle_, "TRITONBACKEND_Finalize", true /* optional */,
-        reinterpret_cast<void**>(&bffn)));
-    // Backend attribute function, optional
-    RETURN_IF_ERROR(slib->GetEntrypoint(
-        dlhandle_, "TRITONBACKEND_GetBackendAttribute", true /* optional */,
-        reinterpret_cast<void**>(&bafn)));
-
-    // Model initialize and finalize functions, optional
-    RETURN_IF_ERROR(slib->GetEntrypoint(
-        dlhandle_, "TRITONBACKEND_ModelInitialize", true /* optional */,
-        reinterpret_cast<void**>(&mifn)));
-    RETURN_IF_ERROR(slib->GetEntrypoint(
-        dlhandle_, "TRITONBACKEND_ModelFinalize", true /* optional */,
-        reinterpret_cast<void**>(&mffn)));
-
-    // Model instance initialize and finalize functions, optional
-    RETURN_IF_ERROR(slib->GetEntrypoint(
-        dlhandle_, "TRITONBACKEND_ModelInstanceInitialize", true /* optional */,
-        reinterpret_cast<void**>(&iifn)));
-    RETURN_IF_ERROR(slib->GetEntrypoint(
-        dlhandle_, "TRITONBACKEND_ModelInstanceFinalize", true /* optional */,
-        reinterpret_cast<void**>(&iffn)));
-
-    // Model instance execute function, required
-    RETURN_IF_ERROR(slib->GetEntrypoint(
-        dlhandle_, "TRITONBACKEND_ModelInstanceExecute", false /* optional */,
-        reinterpret_cast<void**>(&iefn)));
-  }
-
-  backend_init_fn_ = bifn;
-  backend_fini_fn_ = bffn;
-  backend_attri_fn_ = bafn;
-  model_init_fn_ = mifn;
-  model_fini_fn_ = mffn;
-  inst_init_fn_ = iifn;
-  inst_fini_fn_ = iffn;
-  inst_exec_fn_ = iefn;
-
-  return Status::Success;
-}
-
-extern "C" {
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_ApiVersion(uint32_t* major, uint32_t* minor)
-{
-  *major = TRITONBACKEND_API_VERSION_MAJOR;
-  *minor = TRITONBACKEND_API_VERSION_MINOR;
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_BackendName(TRITONBACKEND_Backend* backend, const char** name)
-{
-  TritonBackend* tb = reinterpret_cast<TritonBackend*>(backend);
-  *name = tb->Name().c_str();
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_BackendConfig(
-    TRITONBACKEND_Backend* backend, TRITONSERVER_Message** backend_config)
-{
-  TritonBackend* tb = reinterpret_cast<TritonBackend*>(backend);
-  *backend_config = const_cast<TRITONSERVER_Message*>(
-      reinterpret_cast<const TRITONSERVER_Message*>(&tb->BackendConfig()));
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_BackendExecutionPolicy(
-    TRITONBACKEND_Backend* backend, TRITONBACKEND_ExecutionPolicy* policy)
-{
-  TritonBackend* tb = reinterpret_cast<TritonBackend*>(backend);
-  *policy = tb->ExecutionPolicy();
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_BackendSetExecutionPolicy(
-    TRITONBACKEND_Backend* backend, TRITONBACKEND_ExecutionPolicy policy)
-{
-  TritonBackend* tb = reinterpret_cast<TritonBackend*>(backend);
-  tb->SetExecutionPolicy(policy);
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_BackendArtifacts(
-    TRITONBACKEND_Backend* backend, TRITONBACKEND_ArtifactType* artifact_type,
-    const char** location)
-{
-  TritonBackend* tb = reinterpret_cast<TritonBackend*>(backend);
-  *artifact_type = TRITONBACKEND_ARTIFACT_FILESYSTEM;
-  *location = tb->Directory().c_str();
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_BackendMemoryManager(
-    TRITONBACKEND_Backend* backend, TRITONBACKEND_MemoryManager** manager)
-{
-  static TritonMemoryManager gMemoryManager;
-  *manager = reinterpret_cast<TRITONBACKEND_MemoryManager*>(&gMemoryManager);
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_BackendState(TRITONBACKEND_Backend* backend, void** state)
-{
-  TritonBackend* tb = reinterpret_cast<TritonBackend*>(backend);
-  *state = tb->State();
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_BackendSetState(TRITONBACKEND_Backend* backend, void* state)
-{
-  TritonBackend* tb = reinterpret_cast<TritonBackend*>(backend);
-  tb->SetState(state);
-  return nullptr;  // success
-}
-
-}  // extern C
-
-//
-// TritonBackendManager
-//
-
-static std::weak_ptr<TritonBackendManager> backend_manager_;
-static std::mutex mu_;
-
-Status
-TritonBackendManager::Create(std::shared_ptr<TritonBackendManager>* manager)
-{
-  std::lock_guard<std::mutex> lock(mu_);
-
-  // If there is already a manager then we just use it...
-  *manager = backend_manager_.lock();
-  if (*manager != nullptr) {
-    return Status::Success;
-  }
-
-  manager->reset(new TritonBackendManager());
-  backend_manager_ = *manager;
-
-  return Status::Success;
-}
-
-Status
-TritonBackendManager::CreateBackend(
-    const std::string& name, const std::string& dir, const std::string& libpath,
-    const triton::common::BackendCmdlineConfig& backend_cmdline_config,
-    std::shared_ptr<TritonBackend>* backend)
-{
-  std::lock_guard<std::mutex> lock(mu_);
-
-  const auto& itr = backend_map_.find(libpath);
-  if (itr != backend_map_.end()) {
-    *backend = itr->second;
-    return Status::Success;
-  }
-
-  RETURN_IF_ERROR(TritonBackend::Create(
-      name, dir, libpath, backend_cmdline_config, backend));
-  backend_map_.insert({libpath, *backend});
-
-  return Status::Success;
-}
-
-Status
-TritonBackendManager::BackendState(
-    std::unique_ptr<std::unordered_map<std::string, std::vector<std::string>>>*
-        backend_state)
-{
-  std::lock_guard<std::mutex> lock(mu_);
-
-  std::unique_ptr<std::unordered_map<std::string, std::vector<std::string>>>
-      backend_state_map(
-          new std::unordered_map<std::string, std::vector<std::string>>);
-  for (const auto& backend_pair : backend_map_) {
-    auto& libpath = backend_pair.first;
-    auto backend = backend_pair.second;
-
-    const char* backend_config;
-    size_t backend_config_size;
-    backend->BackendConfig().Serialize(&backend_config, &backend_config_size);
-    backend_state_map->insert(
-        {backend->Name(), std::vector<std::string>{libpath, backend_config}});
-  }
-
-  *backend_state = std::move(backend_state_map);
-
-  return Status::Success;
-}
-
-}}  // namespace triton::core
diff --git a/3rdparty/core-r22.12/src/backend_manager.h b/3rdparty/core-r22.12/src/backend_manager.h
deleted file mode 100644
index 089268c4221ddc001f01c80f313c3b22f3338d03..0000000000000000000000000000000000000000
--- a/3rdparty/core-r22.12/src/backend_manager.h
+++ /dev/null
@@ -1,174 +0,0 @@
-// Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include <memory>
-#include <mutex>
-#include <string>
-#include <unordered_map>
-#include "constants.h"
-#include "server_message.h"
-#include "status.h"
-#include "triton/common/model_config.h"
-#include "tritonserver_apis.h"
-
-namespace triton { namespace core {
-
-//
-// Proxy to a backend shared library.
-//
-class TritonBackend {
- public:
-  struct Attribute {
-    Attribute() : exec_policy_(TRITONBACKEND_EXECUTION_BLOCKING) {}
-    TRITONBACKEND_ExecutionPolicy exec_policy_;
-    std::vector<inference::ModelInstanceGroup> preferred_groups_;
-  };
-  typedef TRITONSERVER_Error* (*TritonModelInitFn_t)(
-      TRITONBACKEND_Model* model);
-  typedef TRITONSERVER_Error* (*TritonModelFiniFn_t)(
-      TRITONBACKEND_Model* model);
-  typedef TRITONSERVER_Error* (*TritonModelInstanceInitFn_t)(
-      TRITONBACKEND_ModelInstance* instance);
-  typedef TRITONSERVER_Error* (*TritonModelInstanceFiniFn_t)(
-      TRITONBACKEND_ModelInstance* instance);
-  typedef TRITONSERVER_Error* (*TritonModelInstanceExecFn_t)(
-      TRITONBACKEND_ModelInstance* instance, TRITONBACKEND_Request** requests,
-      const uint32_t request_cnt);
-
-  static Status Create(
-      const std::string& name, const std::string& dir,
-      const std::string& libpath,
-      const triton::common::BackendCmdlineConfig& backend_cmdline_config,
-      std::shared_ptr<TritonBackend>* backend);
-  ~TritonBackend();
-
-  const std::string& Name() const { return name_; }
-  const std::string& Directory() const { return dir_; }
-  const TritonServerMessage& BackendConfig() const { return backend_config_; }
-  const Attribute& BackendAttributes() const { return attributes_; }
-
-  TRITONBACKEND_ExecutionPolicy ExecutionPolicy() const
-  {
-    return attributes_.exec_policy_;
-  }
-  void SetExecutionPolicy(const TRITONBACKEND_ExecutionPolicy policy)
-  {
-    attributes_.exec_policy_ = policy;
-  }
-
-  void* State() { return state_; }
-  void SetState(void* state) { state_ = state; }
-
-  TritonModelInitFn_t ModelInitFn() const { return model_init_fn_; }
-  TritonModelFiniFn_t ModelFiniFn() const { return model_fini_fn_; }
-  TritonModelInstanceInitFn_t ModelInstanceInitFn() const
-  {
-    return inst_init_fn_;
-  }
-  TritonModelInstanceFiniFn_t ModelInstanceFiniFn() const
-  {
-    return inst_fini_fn_;
-  }
-  TritonModelInstanceExecFn_t ModelInstanceExecFn() const
-  {
-    return inst_exec_fn_;
-  }
-
- private:
-  typedef TRITONSERVER_Error* (*TritonBackendInitFn_t)(
-      TRITONBACKEND_Backend* backend);
-  typedef TRITONSERVER_Error* (*TritonBackendFiniFn_t)(
-      TRITONBACKEND_Backend* backend);
-  typedef TRITONSERVER_Error* (*TritonBackendAttriFn_t)(
-      TRITONBACKEND_Backend* backend,
-      TRITONBACKEND_BackendAttribute* backend_attributes);
-
-  TritonBackend(
-      const std::string& name, const std::string& dir,
-      const std::string& libpath, const TritonServerMessage& backend_config);
-
-  void ClearHandles();
-  Status LoadBackendLibrary();
-
-  Status UpdateAttributes();
-
-  // The name of the backend.
-  const std::string name_;
-
-  // Full path to the directory holding backend shared library and
-  // other artifacts.
-  const std::string dir_;
-
-  // Full path to the backend shared library.
-  const std::string libpath_;
-
-  // Backend configuration as JSON
-  TritonServerMessage backend_config_;
-
-  // backend attributes
-  Attribute attributes_;
-
-  // dlopen / dlsym handles
-  void* dlhandle_;
-  TritonBackendInitFn_t backend_init_fn_;
-  TritonBackendFiniFn_t backend_fini_fn_;
-  TritonBackendAttriFn_t backend_attri_fn_;
-  TritonModelInitFn_t model_init_fn_;
-  TritonModelFiniFn_t model_fini_fn_;
-  TritonModelInstanceInitFn_t inst_init_fn_;
-  TritonModelInstanceFiniFn_t inst_fini_fn_;
-  TritonModelInstanceExecFn_t inst_exec_fn_;
-
-  // Opaque state associated with the backend.
-  void* state_;
-};
-
-//
-// Manage communication with Triton backends and their lifecycle.
-//
-class TritonBackendManager {
- public:
-  static Status Create(std::shared_ptr<TritonBackendManager>* manager);
-
-  Status CreateBackend(
-      const std::string& name, const std::string& dir,
-      const std::string& libpath,
-      const triton::common::BackendCmdlineConfig& backend_cmdline_config,
-      std::shared_ptr<TritonBackend>* backend);
-
-  Status BackendState(
-      std::unique_ptr<
-          std::unordered_map<std::string, std::vector<std::string>>>*
-          backend_state);
-
- private:
-  DISALLOW_COPY_AND_ASSIGN(TritonBackendManager);
-  TritonBackendManager() = default;
-  std::unordered_map<std::string, std::shared_ptr<TritonBackend>> backend_map_;
-};
-
-}}  // namespace triton::core
diff --git a/3rdparty/core-r22.12/src/backend_memory_manager.cc b/3rdparty/core-r22.12/src/backend_memory_manager.cc
deleted file mode 100644
index 0266d2169fe52969490b8d0cc42f4e99df4344d0..0000000000000000000000000000000000000000
--- a/3rdparty/core-r22.12/src/backend_memory_manager.cc
+++ /dev/null
@@ -1,149 +0,0 @@
-// Copyright 2020-2022, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "backend_memory_manager.h"
-
-#include "pinned_memory_manager.h"
-#include "status.h"
-#include "tritonserver_apis.h"
-
-#ifdef TRITON_ENABLE_GPU
-#include <cuda_runtime_api.h>
-#include "cuda_memory_manager.h"
-#endif  // TRITON_ENABLE_GPU
-
-// For unknown reason, windows will not export the TRITONBACKEND_*
-// functions declared with dllexport in tritonbackend.h. To get those
-// functions exported it is (also?) necessary to mark the definitions
-// in this file with dllexport as well.
-#if defined(_MSC_VER)
-#define TRITONAPI_DECLSPEC __declspec(dllexport)
-#elif defined(__GNUC__)
-#define TRITONAPI_DECLSPEC __attribute__((__visibility__("default")))
-#else
-#define TRITONAPI_DECLSPEC
-#endif
-
-namespace triton { namespace core {
-
-extern "C" {
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_MemoryManagerAllocate(
-    TRITONBACKEND_MemoryManager* manager, void** buffer,
-    const TRITONSERVER_MemoryType memory_type, const int64_t memory_type_id,
-    const uint64_t byte_size)
-{
-  switch (memory_type) {
-    case TRITONSERVER_MEMORY_GPU:
-#ifdef TRITON_ENABLE_GPU
-    {
-      auto status = CudaMemoryManager::Alloc(buffer, byte_size, memory_type_id);
-      if (!status.IsOk()) {
-        return TRITONSERVER_ErrorNew(
-            StatusCodeToTritonCode(status.ErrorCode()),
-            status.Message().c_str());
-      }
-      break;
-    }
-#else
-      return TRITONSERVER_ErrorNew(
-          TRITONSERVER_ERROR_UNSUPPORTED,
-          "GPU memory allocation not supported");
-#endif  // TRITON_ENABLE_GPU
-
-    case TRITONSERVER_MEMORY_CPU_PINNED:
-#ifdef TRITON_ENABLE_GPU
-    {
-      TRITONSERVER_MemoryType mt = memory_type;
-      auto status = PinnedMemoryManager::Alloc(buffer, byte_size, &mt, false);
-      if (!status.IsOk()) {
-        return TRITONSERVER_ErrorNew(
-            StatusCodeToTritonCode(status.ErrorCode()),
-            status.Message().c_str());
-      }
-      break;
-    }
-#else
-      return TRITONSERVER_ErrorNew(
-          TRITONSERVER_ERROR_UNSUPPORTED,
-          "Pinned memory allocation not supported");
-#endif  // TRITON_ENABLE_GPU
-
-    case TRITONSERVER_MEMORY_CPU: {
-      *buffer = malloc(byte_size);
-      if (*buffer == nullptr) {
-        return TRITONSERVER_ErrorNew(
-            TRITONSERVER_ERROR_UNAVAILABLE, "CPU memory allocation failed");
-      }
-      break;
-    }
-  }
-
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_MemoryManagerFree(
-    TRITONBACKEND_MemoryManager* manager, void* buffer,
-    const TRITONSERVER_MemoryType memory_type, const int64_t memory_type_id)
-{
-  switch (memory_type) {
-    case TRITONSERVER_MEMORY_GPU: {
-#ifdef TRITON_ENABLE_GPU
-      auto status = CudaMemoryManager::Free(buffer, memory_type_id);
-      if (!status.IsOk()) {
-        return TRITONSERVER_ErrorNew(
-            StatusCodeToTritonCode(status.StatusCode()),
-            status.Message().c_str());
-      }
-#endif  // TRITON_ENABLE_GPU
-      break;
-    }
-
-    case TRITONSERVER_MEMORY_CPU_PINNED: {
-#ifdef TRITON_ENABLE_GPU
-      auto status = PinnedMemoryManager::Free(buffer);
-      if (!status.IsOk()) {
-        return TRITONSERVER_ErrorNew(
-            StatusCodeToTritonCode(status.StatusCode()),
-            status.Message().c_str());
-      }
-#endif  // TRITON_ENABLE_GPU
-      break;
-    }
-
-    case TRITONSERVER_MEMORY_CPU:
-      free(buffer);
-      break;
-  }
-
-  return nullptr;  // success
-}
-
-}  // extern C
-
-}}  // namespace triton::core
diff --git a/3rdparty/core-r22.12/src/backend_memory_manager.h b/3rdparty/core-r22.12/src/backend_memory_manager.h
deleted file mode 100644
index 5364e13bc944bae22c8dde4705323387709578a2..0000000000000000000000000000000000000000
--- a/3rdparty/core-r22.12/src/backend_memory_manager.h
+++ /dev/null
@@ -1,36 +0,0 @@
-// Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-namespace triton { namespace core {
-
-// Currently there is just a global memory manager that is used for
-// all backends and which simply forwards requests on to the core
-// memory manager.
-struct TritonMemoryManager {
-};
-
-}}  // namespace triton::core
diff --git a/3rdparty/core-r22.12/src/backend_model.cc b/3rdparty/core-r22.12/src/backend_model.cc
deleted file mode 100644
index 2f838810589813c61969eade380fcaeff777a25a..0000000000000000000000000000000000000000
--- a/3rdparty/core-r22.12/src/backend_model.cc
+++ /dev/null
@@ -1,1301 +0,0 @@
-// Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "backend_model.h"
-
-#include <vector>
-#include "backend_config.h"
-#include "backend_model_instance.h"
-#include "dynamic_batch_scheduler.h"
-#include "filesystem.h"
-#include "model_config_utils.h"
-#include "numa_utils.h"
-#include "sequence_batch_scheduler.h"
-#include "sequence_state.h"
-#include "server.h"
-#include "server_message.h"
-#include "shared_library.h"
-#include "triton/common/logging.h"
-#include "tritonserver_apis.h"
-
-// For unknown reason, windows will not export the TRITONBACKEND_*
-// functions declared with dllexport in tritonbackend.h. To get those
-// functions exported it is (also?) necessary to mark the definitions
-// in this file with dllexport as well.
-#if defined(_MSC_VER)
-#define TRITONAPI_DECLSPEC __declspec(dllexport)
-#elif defined(__GNUC__)
-#define TRITONAPI_DECLSPEC __attribute__((__visibility__("default")))
-#else
-#define TRITONAPI_DECLSPEC
-#endif
-
-namespace triton { namespace core {
-
-Status
-TritonModel::Create(
-    InferenceServer* server, const std::string& model_path,
-    const triton::common::BackendCmdlineConfigMap& backend_cmdline_config_map,
-    const triton::common::HostPolicyCmdlineConfigMap& host_policy_map,
-    const std::string& model_name, const int64_t version,
-    inference::ModelConfig model_config, const bool is_config_provided,
-    std::unique_ptr<TritonModel>* model)
-{
-  model->reset();
-
-  // The model configuration must specify a backend. The name of the
-  // corresponding shared library must be libtriton_<backend>.so.
-  if (model_config.backend().empty()) {
-    return Status(
-        Status::Code::INVALID_ARG,
-        "must specify 'backend' for '" + model_config.name() + "'");
-  }
-
-  // Localize the content of the model repository corresponding to
-  // 'model_name'. This model holds a handle to the localized content
-  // so that it persists as long as the model is loaded.
-  std::shared_ptr<LocalizedPath> localized_model_dir;
-  RETURN_IF_ERROR(LocalizePath(model_path, &localized_model_dir));
-
-  // Localize paths in backend model config
-  // [FIXME] Remove once a more permanent solution is implemented (DLIS-4211)
-  RETURN_IF_ERROR(LocalizePythonBackendExecutionEnvironmentPath(
-      model_path, &model_config, &localized_model_dir));
-
-  // Get some internal configuration values needed for initialization.
-  std::string backend_dir;
-  RETURN_IF_ERROR(BackendConfigurationGlobalBackendsDirectory(
-      backend_cmdline_config_map, &backend_dir));
-
-  bool auto_complete_config = false;
-  RETURN_IF_ERROR(BackendConfigurationAutoCompleteConfig(
-      backend_cmdline_config_map, &auto_complete_config));
-
-  double min_compute_capability = 0;
-  RETURN_IF_ERROR(BackendConfigurationMinComputeCapability(
-      backend_cmdline_config_map, &min_compute_capability));
-
-  std::string specialized_backend_name;
-  RETURN_IF_ERROR(BackendConfigurationSpecializeBackendName(
-      backend_cmdline_config_map, model_config.backend(),
-      &specialized_backend_name));
-
-  std::string backend_libname;
-  RETURN_IF_ERROR(BackendConfigurationBackendLibraryName(
-      specialized_backend_name, &backend_libname));
-
-  // Get the path to the backend shared library. Search path is
-  // version directory, model directory, global backend directory.
-  const auto localized_model_path = localized_model_dir->Path();
-  const auto version_path =
-      JoinPath({localized_model_path, std::to_string(version)});
-  const std::string global_path =
-      JoinPath({backend_dir, specialized_backend_name});
-  const std::vector<std::string> search_paths = {
-      version_path, localized_model_path, global_path};
-
-  std::string backend_libdir;
-  std::string backend_libpath;
-  for (const auto& path : search_paths) {
-    const auto full_path = JoinPath({path, backend_libname});
-    bool exists = false;
-    RETURN_IF_ERROR(FileExists(full_path, &exists));
-    if (exists) {
-      backend_libdir = path;
-      backend_libpath = full_path;
-      break;
-    }
-  }
-
-  if (backend_libpath.empty()) {
-    return Status(
-        Status::Code::INVALID_ARG, "unable to find '" + backend_libname +
-                                       "' for model '" + model_config.name() +
-                                       "', searched: " + version_path + ", " +
-                                       model_path + ", " + global_path);
-  }
-
-  // Resolve the global backend configuration with the specific backend
-  // configuration
-  triton::common::BackendCmdlineConfig config;
-  RETURN_IF_ERROR(ResolveBackendConfigs(
-      backend_cmdline_config_map, model_config.backend(), config));
-
-  RETURN_IF_ERROR(SetBackendConfigDefaults(config));
-
-  std::shared_ptr<TritonBackend> backend;
-  RETURN_IF_ERROR(server->BackendManager()->CreateBackend(
-      model_config.backend(), backend_libdir, backend_libpath, config,
-      &backend));
-
-  // Normalize backend-dependent config
-  {
-    const auto& attributes = backend->BackendAttributes();
-    // [WIP] formalize config normalization / validation
-    RETURN_IF_ERROR(NormalizeInstanceGroup(
-        min_compute_capability, attributes.preferred_groups_, &model_config));
-    RETURN_IF_ERROR(
-        ValidateInstanceGroup(model_config, min_compute_capability));
-  }
-
-  // Create and initialize the model.
-  std::unique_ptr<TritonModel> local_model(new TritonModel(
-      server, localized_model_dir, backend, min_compute_capability, version,
-      model_config, auto_complete_config));
-
-  TritonModel* raw_local_model = local_model.get();
-
-  // Model initialization is optional... The TRITONBACKEND_Model
-  // object is this TritonModel object. We must set set shared library
-  // path to point to the backend directory in case the backend
-  // library attempts to load additional shared libaries.
-  if (backend->ModelInitFn() != nullptr) {
-    std::unique_ptr<SharedLibrary> slib;
-    RETURN_IF_ERROR(SharedLibrary::Acquire(&slib));
-    RETURN_IF_ERROR(slib->SetLibraryDirectory(backend->Directory()));
-
-    TRITONSERVER_Error* err = backend->ModelInitFn()(
-        reinterpret_cast<TRITONBACKEND_Model*>(raw_local_model));
-
-    RETURN_IF_ERROR(slib->ResetLibraryDirectory());
-    RETURN_IF_TRITONSERVER_ERROR(err);
-  }
-
-  // Initialize the model for Triton core usage
-  RETURN_IF_ERROR(local_model->Init(is_config_provided));
-
-  bool device_blocking = false;
-  if (local_model->backend_->ExecutionPolicy() ==
-      TRITONBACKEND_EXECUTION_DEVICE_BLOCKING) {
-    if (model_config.has_sequence_batching()) {
-      LOG_INFO << "Overriding execution policy to "
-                  "\"TRITONBACKEND_EXECUTION_BLOCKING\" for sequence model \""
-               << model_config.name() << "\"";
-    } else {
-      device_blocking = true;
-    }
-  }
-
-  // Create and initialize the model instances for this model.
-  RETURN_IF_ERROR(TritonModelInstance::CreateInstances(
-      raw_local_model, backend_cmdline_config_map, host_policy_map,
-      model_config, device_blocking));
-
-  RETURN_IF_ERROR(local_model->SetConfiguredScheduler());
-
-  *model = std::move(local_model);
-  return Status::Success;
-}
-
-Status
-TritonModel::ResolveBackendConfigs(
-    const triton::common::BackendCmdlineConfigMap& backend_cmdline_config_map,
-    const std::string& backend_name,
-    triton::common::BackendCmdlineConfig& config)
-{
-  const auto& global_itr = backend_cmdline_config_map.find(std::string());
-  const auto& specific_itr = backend_cmdline_config_map.find(backend_name);
-  if (specific_itr == backend_cmdline_config_map.end() &&
-      global_itr != backend_cmdline_config_map.end()) {
-    for (auto setting : global_itr->second) {
-      config.push_back(setting);
-    }
-  } else if (
-      specific_itr != backend_cmdline_config_map.end() &&
-      global_itr == backend_cmdline_config_map.end()) {
-    for (auto setting : specific_itr->second) {
-      config.push_back(setting);
-    }
-  } else if (
-      specific_itr != backend_cmdline_config_map.end() &&
-      global_itr != backend_cmdline_config_map.end()) {
-    triton::common::BackendCmdlineConfig global_backend_config =
-        global_itr->second;
-    triton::common::BackendCmdlineConfig specific_backend_config =
-        specific_itr->second;
-
-    std::sort(global_backend_config.begin(), global_backend_config.end());
-    std::sort(specific_backend_config.begin(), specific_backend_config.end());
-
-    size_t global_index = 0;
-    size_t specific_index = 0;
-    while (global_index < global_backend_config.size() &&
-           specific_index < specific_backend_config.size()) {
-      auto& current_global_setting = global_backend_config.at(global_index);
-      auto& current_specific_setting =
-          specific_backend_config.at(specific_index);
-      if (current_specific_setting.first.compare(
-              current_global_setting.first) == 0) {
-        // specific setting overrides global setting
-        config.push_back(current_specific_setting);
-        ++global_index;
-        ++specific_index;
-      } else if (
-          current_specific_setting.first.compare(current_global_setting.first) <
-          0) {
-        config.push_back(current_specific_setting);
-        ++specific_index;
-      } else {
-        config.push_back(current_global_setting);
-        ++global_index;
-      }
-    }
-
-    // add the rest of the global configs
-    if (global_index < global_backend_config.size()) {
-      auto& current_global_setting = global_backend_config.at(global_index);
-      config.push_back(current_global_setting);
-    }
-
-    // add the rest of the specific settings
-    if (specific_index < specific_backend_config.size()) {
-      auto& current_specific_setting =
-          specific_backend_config.at(specific_index);
-      config.push_back(current_specific_setting);
-    }
-  }  // else empty config
-
-  return Status::Success;
-}
-
-
-const std::unordered_map<std::string, std::string> backend_config_defaults(
-    {{"default-max-batch-size", "4"}});
-
-Status
-TritonModel::SetBackendConfigDefaults(
-    triton::common::BackendCmdlineConfig& config)
-{
-  auto backend_config_defaults_copy = backend_config_defaults;
-
-  for (auto& setting : config) {
-    if (setting.first.compare("default-max-batch-size") == 0) {
-      LOG_VERBOSE(1) << "Found overwritten default setting: " << setting.first
-                     << "," << setting.second;
-      backend_config_defaults_copy.erase(setting.first);
-    }
-
-    if (backend_config_defaults_copy.empty()) {
-      break;
-    }
-  }
-
-  // Anything left should be added to the config
-  for (const auto& default_setting : backend_config_defaults_copy) {
-    LOG_VERBOSE(1) << "Adding default backend config setting: "
-                   << default_setting.first << "," << default_setting.second;
-    config.push_back(
-        std::make_pair(default_setting.first, default_setting.second));
-  }
-
-  return Status::Success;
-}
-
-Status
-TritonModel::AddInstance(
-    std::unique_ptr<TritonModelInstance>&& instance, const bool passive)
-{
-  if (passive) {
-    passive_instances_.emplace_back(std::move(instance));
-  } else {
-    instances_.emplace_back(std::move(instance));
-  }
-
-  return Status::Success;
-}
-
-Status
-TritonModel::UpdateModelConfig(
-    const uint32_t config_version, TRITONSERVER_Message* updated_config_message)
-{
-  const char* buffer;
-  size_t byte_size;
-  RETURN_IF_TRITONSERVER_ERROR(TRITONSERVER_MessageSerializeToJson(
-      updated_config_message, &buffer, &byte_size));
-  inference::ModelConfig updated_config;
-  RETURN_IF_ERROR(
-      JsonToModelConfig({buffer, byte_size}, config_version, &updated_config));
-  auto config = Config();
-  config.set_max_batch_size(updated_config.max_batch_size());
-
-  auto inputs_config = config.mutable_input();
-  *inputs_config = updated_config.input();
-  auto outputs_config = config.mutable_output();
-  *outputs_config = updated_config.output();
-
-  if (!config.scheduling_choice_case()) {
-    if (updated_config.has_dynamic_batching()) {
-      auto dynamic_batching_config = config.mutable_dynamic_batching();
-      *dynamic_batching_config = updated_config.dynamic_batching();
-    } else if (updated_config.has_sequence_batching()) {
-      auto sequence_batching_config = config.mutable_sequence_batching();
-      *sequence_batching_config = updated_config.sequence_batching();
-    } else if (updated_config.has_ensemble_scheduling()) {
-      auto ensemble_scheduling_config = config.mutable_ensemble_scheduling();
-      *ensemble_scheduling_config = updated_config.ensemble_scheduling();
-    }  // else do nothing
-  } else if (
-      config.scheduling_choice_case() !=
-      updated_config.scheduling_choice_case()) {
-    return Status(
-        triton::common::Error::Code::INTERNAL,
-        (std::string("Cannot update scheduling choice from ") +
-         std::to_string(config.scheduling_choice_case()) + std::string(" to ") +
-         std::to_string(config.scheduling_choice_case()) +
-         std::string(" when auto-completing."))
-            .c_str());
-  }  // else do nothing
-
-  // Need to normalize the model configuration for
-  // populating missing fields.
-  RETURN_IF_ERROR(NormalizeModelConfig(min_compute_capability_, &config));
-
-  RETURN_IF_ERROR(SetModelConfig(config));
-
-  return Status::Success;
-}
-
-Status
-TritonModel::SetConfiguredScheduler()
-{
-  std::unique_ptr<Scheduler> scheduler;
-
-  // Need to enforce equal shape batches (i.e. non-ragged batches) if
-  // the model 1) allows one or more variable-size input tensors that
-  // are not marked as 'allow_ragged_batch' or 2) has one or more
-  // shape-tensor inputs. This is not needed if all input shapes are
-  // non-variable and if there are no shape tensors... so we don't
-  // enable it in that case for efficiency reasons.
-  std::unordered_map<std::string, bool> enforce_equal_shape_tensors;
-  for (const auto input : config_.input()) {
-    if (input.is_shape_tensor()) {
-      enforce_equal_shape_tensors.insert({input.name(), true});
-    } else if (
-        !input.allow_ragged_batch() &&
-        (triton::common::GetElementCount(input) == -1)) {
-      enforce_equal_shape_tensors.insert({input.name(), false});
-    }
-  }
-
-  // If 'sequence_batching' is configured, then use the SequenceBatchScheduler,
-  // otherwise use the default DynamicBatchScheduler.
-  if (config_.has_sequence_batching()) {
-    // Sequence batcher
-    RETURN_IF_ERROR(SequenceBatchScheduler::Create(
-        this, enforce_equal_shape_tensors, &scheduler));
-  } else if (config_.has_dynamic_batching()) {
-    // Dynamic batcher
-    RETURN_IF_ERROR(DynamicBatchScheduler::Create(
-        this, nullptr, 0 /*nice*/, true /* dynamic_batching_enabled */,
-        config_.max_batch_size(), enforce_equal_shape_tensors,
-        config_.dynamic_batching(),
-        config_.response_cache().enable() /* response_cache_enable */,
-        &scheduler));
-  } else {
-    // Default scheduler. Use dynamic batch scheduler (with batching
-    // disabled) as the default scheduler.
-    RETURN_IF_ERROR(DynamicBatchScheduler::Create(
-        this, nullptr, 0 /*nice*/, false /* dynamic_batching_enabled */,
-        1 /* max_batch_size */,
-        std::unordered_map<
-            std::string, bool>() /* enforce_equal_shape_tensors */,
-        false /* preserve_ordering */,
-        config_.response_cache().enable() /* response_cache_enable */,
-        std::set<int32_t>() /* preferred_batch_sizes */,
-        0 /* max_queue_delay_microseconds */, &scheduler));
-  }
-
-  return SetScheduler(std::move(scheduler));
-}
-
-Status
-TritonModel::Initialize()
-{
-  for (const auto& instance : instances_) {
-    RETURN_IF_ERROR(instance->Initialize());
-  }
-
-  return Status::Success;
-}
-
-Status
-TritonModel::WarmUp()
-{
-  for (const auto& instance : instances_) {
-    RETURN_IF_ERROR(instance->WarmUp());
-  }
-
-  return Status::Success;
-}
-
-TritonModel::TritonModel(
-    InferenceServer* server,
-    const std::shared_ptr<LocalizedPath>& localized_model_dir,
-    const std::shared_ptr<TritonBackend>& backend,
-    const double min_compute_capability, const int64_t version,
-    const inference::ModelConfig& config, const bool auto_complete_config)
-    : Model(
-          min_compute_capability, localized_model_dir->Path(), version, config),
-      server_(server), min_compute_capability_(min_compute_capability),
-      auto_complete_config_(auto_complete_config),
-      localized_model_dir_(localized_model_dir), backend_(backend),
-      state_(nullptr)
-{
-}
-
-TritonModel::~TritonModel()
-{
-  // Explicitly delete/finalize all model instances before finalizing
-  // the model itself.
-  instances_.clear();
-  passive_instances_.clear();
-
-  // Unregister itself from the rate limiter. Note this should happen
-  // after all instances are destructed. Destrucing instances ensures
-  // there are no instance threads waiting on rate limiter for
-  // receiving their payloads.
-  server_->GetRateLimiter()->UnregisterModel(this);
-
-  // Model finalization is optional... The TRITONBACKEND_Model
-  // object is this TritonModel object.
-  if (backend_->ModelFiniFn() != nullptr) {
-    LOG_TRITONSERVER_ERROR(
-        backend_->ModelFiniFn()(reinterpret_cast<TRITONBACKEND_Model*>(this)),
-        "failed finalizing model");
-  }
-}
-
-extern "C" {
-
-//
-// TRITONBACKEND_Model
-//
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_ModelName(TRITONBACKEND_Model* model, const char** name)
-{
-  TritonModel* tm = reinterpret_cast<TritonModel*>(model);
-  *name = tm->Name().c_str();
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_ModelVersion(TRITONBACKEND_Model* model, uint64_t* version)
-{
-  TritonModel* tm = reinterpret_cast<TritonModel*>(model);
-  *version = tm->Version();
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_ModelRepository(
-    TRITONBACKEND_Model* model, TRITONBACKEND_ArtifactType* artifact_type,
-    const char** location)
-{
-  TritonModel* tm = reinterpret_cast<TritonModel*>(model);
-  *artifact_type = TRITONBACKEND_ARTIFACT_FILESYSTEM;
-  *location = tm->LocalizedModelPath().c_str();
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_ModelConfig(
-    TRITONBACKEND_Model* model, const uint32_t config_version,
-    TRITONSERVER_Message** model_config)
-{
-  TritonModel* tm = reinterpret_cast<TritonModel*>(model);
-
-  std::string model_config_json;
-  Status status =
-      ModelConfigToJson(tm->Config(), config_version, &model_config_json);
-  if (!status.IsOk()) {
-    return TRITONSERVER_ErrorNew(
-        StatusCodeToTritonCode(status.StatusCode()), status.Message().c_str());
-  }
-
-  *model_config = reinterpret_cast<TRITONSERVER_Message*>(
-      new TritonServerMessage(std::move(model_config_json)));
-
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_ModelAutoCompleteConfig(
-    TRITONBACKEND_Model* model, bool* auto_complete_config)
-{
-  TritonModel* tm = reinterpret_cast<TritonModel*>(model);
-  *auto_complete_config = tm->AutoCompleteConfig();
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_ModelSetConfig(
-    TRITONBACKEND_Model* model, const uint32_t config_version,
-    TRITONSERVER_Message* model_config)
-{
-  TritonModel* tm = reinterpret_cast<TritonModel*>(model);
-  Status status = tm->UpdateModelConfig(config_version, model_config);
-  if (!status.IsOk()) {
-    return TRITONSERVER_ErrorNew(
-        StatusCodeToTritonCode(status.StatusCode()), status.Message().c_str());
-  }
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_ModelServer(
-    TRITONBACKEND_Model* model, TRITONSERVER_Server** server)
-{
-  TritonModel* tm = reinterpret_cast<TritonModel*>(model);
-  *server = reinterpret_cast<TRITONSERVER_Server*>(tm->Server());
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_ModelBackend(
-    TRITONBACKEND_Model* model, TRITONBACKEND_Backend** backend)
-{
-  TritonModel* tm = reinterpret_cast<TritonModel*>(model);
-  *backend = reinterpret_cast<TRITONBACKEND_Backend*>(tm->Backend().get());
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_ModelState(TRITONBACKEND_Model* model, void** state)
-{
-  TritonModel* tm = reinterpret_cast<TritonModel*>(model);
-  *state = tm->State();
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_ModelSetState(TRITONBACKEND_Model* model, void* state)
-{
-  TritonModel* tm = reinterpret_cast<TritonModel*>(model);
-  tm->SetState(state);
-  return nullptr;  // success
-}
-
-///
-/// TRITONBACKEND_Request
-///
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_RequestId(TRITONBACKEND_Request* request, const char** id)
-{
-  InferenceRequest* tr = reinterpret_cast<InferenceRequest*>(request);
-  *id = tr->Id().c_str();
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_RequestCorrelationId(TRITONBACKEND_Request* request, uint64_t* id)
-{
-  InferenceRequest* tr = reinterpret_cast<InferenceRequest*>(request);
-  const InferenceRequest::SequenceId& correlation_id = tr->CorrelationId();
-  if (correlation_id.Type() != InferenceRequest::SequenceId::DataType::UINT64) {
-    return TRITONSERVER_ErrorNew(
-        TRITONSERVER_ERROR_INVALID_ARG,
-        (tr->LogRequest() + "correlation ID in request is not an unsigned int")
-            .c_str());
-  }
-  *id = correlation_id.UnsignedIntValue();
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_RequestFlags(TRITONBACKEND_Request* request, uint32_t* flags)
-{
-  InferenceRequest* tr = reinterpret_cast<InferenceRequest*>(request);
-  *flags = tr->Flags();
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_RequestCorrelationIdString(
-    TRITONBACKEND_Request* request, const char** id)
-{
-  InferenceRequest* tr = reinterpret_cast<InferenceRequest*>(request);
-  const InferenceRequest::SequenceId& correlation_id = tr->CorrelationId();
-  if (correlation_id.Type() != InferenceRequest::SequenceId::DataType::STRING) {
-    return TRITONSERVER_ErrorNew(
-        TRITONSERVER_ERROR_INVALID_ARG,
-        (tr->LogRequest() + "correlation ID in request is not a string")
-            .c_str());
-  }
-  *id = correlation_id.StringValue().c_str();
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_RequestInputCount(TRITONBACKEND_Request* request, uint32_t* count)
-{
-  InferenceRequest* tr = reinterpret_cast<InferenceRequest*>(request);
-  *count = tr->ImmutableInputs().size();
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_RequestInputName(
-    TRITONBACKEND_Request* request, const uint32_t index,
-    const char** input_name)
-{
-  *input_name = nullptr;
-
-  InferenceRequest* tr = reinterpret_cast<InferenceRequest*>(request);
-  const auto& inputs = tr->ImmutableInputs();
-  if (index >= inputs.size()) {
-    return TRITONSERVER_ErrorNew(
-        TRITONSERVER_ERROR_INVALID_ARG,
-        (tr->LogRequest() + "out of bounds index " + std::to_string(index) +
-         ": request has " + std::to_string(inputs.size()) + " inputs")
-            .c_str());
-  }
-
-  // The request inputs are not allowed to change once the request
-  // makes it to the backend, so it is ok to just iterate through the
-  // map. This linear search is the best we can do given the need for
-  // the inputs to be in a map and given the typical small number of
-  // inputs is better than having every request maintain the inputs as
-  // both map and vector.
-  uint32_t cnt = 0;
-  for (const auto& pr : inputs) {
-    if (cnt++ == index) {
-      InferenceRequest::Input* in = pr.second;
-      *input_name = in->Name().c_str();
-      break;
-    }
-  }
-
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_RequestInput(
-    TRITONBACKEND_Request* request, const char* name,
-    TRITONBACKEND_Input** input)
-{
-  InferenceRequest* tr = reinterpret_cast<InferenceRequest*>(request);
-  const auto& inputs = tr->ImmutableInputs();
-  const auto& itr = inputs.find(name);
-  if (itr == inputs.end()) {
-    *input = nullptr;
-    return TRITONSERVER_ErrorNew(
-        TRITONSERVER_ERROR_INVALID_ARG,
-        (tr->LogRequest() + "unknown request input name " + name).c_str());
-  }
-
-  InferenceRequest::Input* in = itr->second;
-  *input = reinterpret_cast<TRITONBACKEND_Input*>(in);
-
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_RequestInputByIndex(
-    TRITONBACKEND_Request* request, const uint32_t index,
-    TRITONBACKEND_Input** input)
-{
-  InferenceRequest* tr = reinterpret_cast<InferenceRequest*>(request);
-  const auto& inputs = tr->ImmutableInputs();
-  if (index >= inputs.size()) {
-    return TRITONSERVER_ErrorNew(
-        TRITONSERVER_ERROR_INVALID_ARG,
-        (tr->LogRequest() + "out of bounds index " + std::to_string(index) +
-         ": request has " + std::to_string(inputs.size()) + " inputs")
-            .c_str());
-  }
-
-  // The request inputs are not allowed to change once the request
-  // makes it to the backend, so it is ok to just iterate through the
-  // map. This linear search is the best we can do given the need for
-  // the inputs to be in a map and given the typical small number of
-  // inputs is better than having every request maintain the inputs as
-  // both map and vector.
-  uint32_t cnt = 0;
-  for (const auto& pr : inputs) {
-    if (cnt++ == index) {
-      InferenceRequest::Input* in = pr.second;
-      *input = reinterpret_cast<TRITONBACKEND_Input*>(in);
-      break;
-    }
-  }
-
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_RequestOutputCount(
-    TRITONBACKEND_Request* request, uint32_t* count)
-{
-  InferenceRequest* tr = reinterpret_cast<InferenceRequest*>(request);
-  *count = tr->ImmutableRequestedOutputs().size();
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_RequestOutputName(
-    TRITONBACKEND_Request* request, const uint32_t index,
-    const char** output_name)
-{
-  *output_name = nullptr;
-
-  InferenceRequest* tr = reinterpret_cast<InferenceRequest*>(request);
-  const auto& routputs = tr->ImmutableRequestedOutputs();
-  if (index >= routputs.size()) {
-    return TRITONSERVER_ErrorNew(
-        TRITONSERVER_ERROR_INVALID_ARG,
-        (tr->LogRequest() + "out of bounds index " + std::to_string(index) +
-         ": request has " + std::to_string(routputs.size()) +
-         " requested outputs")
-            .c_str());
-  }
-
-  // The requested outputs are not allowed to change once the request
-  // makes it to the backend, so it is ok to just iterate through the
-  // set. This linear search is the best we can do given the requested
-  // outputs being in a set and given the typical small number of
-  // requested outputs it should not be a performance issue.
-  uint32_t cnt = 0;
-  for (const auto& rout : routputs) {
-    if (cnt++ == index) {
-      *output_name = rout.c_str();
-      break;
-    }
-  }
-
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_RequestOutputBufferProperties(
-    TRITONBACKEND_Request* request, const char* name, size_t* byte_size,
-    TRITONSERVER_MemoryType* memory_type, int64_t* memory_type_id)
-{
-  InferenceRequest* tr = reinterpret_cast<InferenceRequest*>(request);
-  auto status =
-      tr->OutputBufferProperties(name, byte_size, memory_type, memory_type_id);
-  if (!status.IsOk()) {
-    return TRITONSERVER_ErrorNew(
-        StatusCodeToTritonCode(status.StatusCode()), status.Message().c_str());
-  }
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_RequestRelease(
-    TRITONBACKEND_Request* request, uint32_t release_flags)
-{
-  InferenceRequest* tr = reinterpret_cast<InferenceRequest*>(request);
-  std::unique_ptr<InferenceRequest> ur(tr);
-  InferenceRequest::Release(std::move(ur), release_flags);
-  return nullptr;  // success
-}
-
-///
-/// TRITONBACKEND_State
-///
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_StateUpdate(TRITONBACKEND_State* state)
-{
-  SequenceState* ts = reinterpret_cast<SequenceState*>(state);
-  auto status = ts->Update();
-
-  if (!status.IsOk()) {
-    return TRITONSERVER_ErrorNew(
-        StatusCodeToTritonCode(status.StatusCode()), status.Message().c_str());
-  }
-
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_StateNew(
-    TRITONBACKEND_State** state, TRITONBACKEND_Request* request,
-    const char* name, const TRITONSERVER_DataType datatype,
-    const int64_t* shape, const uint32_t dims_count)
-{
-  InferenceRequest* tr = reinterpret_cast<InferenceRequest*>(request);
-  SequenceState* lstate;
-  std::vector<int64_t> lshape(shape, shape + dims_count);
-  auto& sequence_state = tr->GetSequenceStates();
-
-  if (sequence_state == nullptr) {
-    return TRITONSERVER_ErrorNew(
-        TRITONSERVER_ERROR_INVALID_ARG,
-        (std::string("unable to add state '") + name +
-         "'. State configuration is missing for model '" + tr->ModelName() +
-         "'.")
-            .c_str());
-  }
-
-  Status status = sequence_state->OutputState(
-      name, TritonToDataType(datatype), lshape, &lstate);
-  if (!status.IsOk()) {
-    *state = nullptr;
-    return TRITONSERVER_ErrorNew(
-        StatusCodeToTritonCode(status.StatusCode()), status.Message().c_str());
-  }
-
-  *state = reinterpret_cast<TRITONBACKEND_State*>(lstate);
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_StateBuffer(
-    TRITONBACKEND_State* state, void** buffer, const uint64_t buffer_byte_size,
-    TRITONSERVER_MemoryType* memory_type, int64_t* memory_type_id)
-{
-  SequenceState* to = reinterpret_cast<SequenceState*>(state);
-  Status status = Status::Success;
-
-  // If the buffer size exactly matches the buffer available, reuse the
-  // currently allocated buffer.
-  if (to->Data()->TotalByteSize() == buffer_byte_size) {
-    const std::shared_ptr<AllocatedMemory>& memory =
-        reinterpret_cast<const std::shared_ptr<AllocatedMemory>&>(to->Data());
-
-    TRITONSERVER_MemoryType current_memory_type;
-    int64_t current_memory_type_id;
-    void* lbuffer =
-        memory->MutableBuffer(&current_memory_type, &current_memory_type_id);
-
-    // If the requested memory type doesn't match the current buffer, allocate a
-    // new buffer with the requested memory type and memory type id.
-    if (current_memory_type == *memory_type &&
-        current_memory_type_id == *memory_type_id) {
-      *buffer = lbuffer;
-    } else {
-      std::shared_ptr<AllocatedMemory> memory =
-          std::make_shared<AllocatedMemory>(
-              buffer_byte_size, *memory_type, *memory_type_id);
-      *buffer = memory->MutableBuffer(memory_type, memory_type_id);
-      to->RemoveAllData();
-      status = to->SetData(memory);
-    }
-  } else {
-    std::shared_ptr<AllocatedMemory> memory = std::make_shared<AllocatedMemory>(
-        buffer_byte_size, *memory_type, *memory_type_id);
-    *buffer = memory->MutableBuffer(memory_type, memory_type_id);
-    to->RemoveAllData();
-    status = to->SetData(memory);
-  }
-
-  if (!status.IsOk()) {
-    *buffer = nullptr;
-    return TRITONSERVER_ErrorNew(
-        StatusCodeToTritonCode(status.StatusCode()), status.Message().c_str());
-  }
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_StateBufferAttributes(
-    TRITONBACKEND_State* state,
-    TRITONSERVER_BufferAttributes** buffer_attributes)
-{
-  SequenceState* to = reinterpret_cast<SequenceState*>(state);
-  to->Data()->BufferAt(
-      0, reinterpret_cast<BufferAttributes**>(buffer_attributes));
-
-  return nullptr;  // success
-}
-
-//
-// TRITONBACKEND_ResponseFactory
-//
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_ResponseFactoryNew(
-    TRITONBACKEND_ResponseFactory** factory, TRITONBACKEND_Request* request)
-{
-  InferenceRequest* tr = reinterpret_cast<InferenceRequest*>(request);
-  std::shared_ptr<InferenceResponseFactory>* response_factory =
-      new std::shared_ptr<InferenceResponseFactory>(tr->ResponseFactory());
-
-  *factory = reinterpret_cast<TRITONBACKEND_ResponseFactory*>(response_factory);
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_ResponseFactoryDelete(TRITONBACKEND_ResponseFactory* factory)
-{
-  std::shared_ptr<InferenceResponseFactory>* response_factory =
-      reinterpret_cast<std::shared_ptr<InferenceResponseFactory>*>(factory);
-  delete response_factory;
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_ResponseFactorySendFlags(
-    TRITONBACKEND_ResponseFactory* factory, const uint32_t send_flags)
-{
-  std::shared_ptr<InferenceResponseFactory>* response_factory =
-      reinterpret_cast<std::shared_ptr<InferenceResponseFactory>*>(factory);
-  Status status = (*response_factory)->SendFlags(send_flags);
-  if (!status.IsOk()) {
-    return TRITONSERVER_ErrorNew(
-        StatusCodeToTritonCode(status.StatusCode()), status.Message().c_str());
-  }
-  return nullptr;  // success
-}
-
-///
-/// TRITONBACKEND_Response
-///
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_ResponseNew(
-    TRITONBACKEND_Response** response, TRITONBACKEND_Request* request)
-{
-  InferenceRequest* tr = reinterpret_cast<InferenceRequest*>(request);
-
-  std::unique_ptr<InferenceResponse> tresp;
-  Status status = tr->ResponseFactory()->CreateResponse(&tresp);
-  if (!status.IsOk()) {
-    *response = nullptr;
-    return TRITONSERVER_ErrorNew(
-        StatusCodeToTritonCode(status.StatusCode()), status.Message().c_str());
-  }
-
-  *response = reinterpret_cast<TRITONBACKEND_Response*>(tresp.release());
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_ResponseNewFromFactory(
-    TRITONBACKEND_Response** response, TRITONBACKEND_ResponseFactory* factory)
-{
-  std::shared_ptr<InferenceResponseFactory>* response_factory =
-      reinterpret_cast<std::shared_ptr<InferenceResponseFactory>*>(factory);
-
-  std::unique_ptr<InferenceResponse> tr;
-  Status status = (*response_factory)->CreateResponse(&tr);
-  if (!status.IsOk()) {
-    *response = nullptr;
-    return TRITONSERVER_ErrorNew(
-        StatusCodeToTritonCode(status.StatusCode()), status.Message().c_str());
-  }
-
-  *response = reinterpret_cast<TRITONBACKEND_Response*>(tr.release());
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_ResponseDelete(TRITONBACKEND_Response* response)
-{
-  InferenceResponse* tr = reinterpret_cast<InferenceResponse*>(response);
-  delete tr;
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_ResponseSetStringParameter(
-    TRITONBACKEND_Response* response, const char* name, const char* value)
-{
-  InferenceResponse* tr = reinterpret_cast<InferenceResponse*>(response);
-  Status status = tr->AddParameter(name, value);
-  if (!status.IsOk()) {
-    return TRITONSERVER_ErrorNew(
-        StatusCodeToTritonCode(status.StatusCode()), status.Message().c_str());
-  }
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_ResponseSetIntParameter(
-    TRITONBACKEND_Response* response, const char* name, const int64_t value)
-{
-  InferenceResponse* tr = reinterpret_cast<InferenceResponse*>(response);
-  Status status = tr->AddParameter(name, value);
-  if (!status.IsOk()) {
-    return TRITONSERVER_ErrorNew(
-        StatusCodeToTritonCode(status.StatusCode()), status.Message().c_str());
-  }
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_ResponseSetBoolParameter(
-    TRITONBACKEND_Response* response, const char* name, const bool value)
-{
-  InferenceResponse* tr = reinterpret_cast<InferenceResponse*>(response);
-  Status status = tr->AddParameter(name, value);
-  if (!status.IsOk()) {
-    return TRITONSERVER_ErrorNew(
-        StatusCodeToTritonCode(status.StatusCode()), status.Message().c_str());
-  }
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_ResponseOutput(
-    TRITONBACKEND_Response* response, TRITONBACKEND_Output** output,
-    const char* name, const TRITONSERVER_DataType datatype,
-    const int64_t* shape, const uint32_t dims_count)
-{
-  InferenceResponse* tr = reinterpret_cast<InferenceResponse*>(response);
-  std::vector<int64_t> lshape(shape, shape + dims_count);
-  InferenceResponse::Output* loutput;
-  Status status = tr->AddOutput(
-      name, TritonToDataType(datatype), std::move(lshape), &loutput);
-  if (!status.IsOk()) {
-    *output = nullptr;
-    return TRITONSERVER_ErrorNew(
-        StatusCodeToTritonCode(status.StatusCode()), status.Message().c_str());
-  }
-
-  *output = reinterpret_cast<TRITONBACKEND_Output*>(loutput);
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_ResponseSend(
-    TRITONBACKEND_Response* response, const uint32_t send_flags,
-    TRITONSERVER_Error* error)
-{
-  InferenceResponse* tr = reinterpret_cast<InferenceResponse*>(response);
-
-  Status status;
-
-  std::unique_ptr<InferenceResponse> utr(tr);
-  if (error == nullptr) {
-    status = InferenceResponse::Send(std::move(utr), send_flags);
-  } else {
-    status = InferenceResponse::SendWithStatus(
-        std::move(utr), send_flags,
-        Status(
-            TritonCodeToStatusCode(TRITONSERVER_ErrorCode(error)),
-            TRITONSERVER_ErrorMessage(error)));
-  }
-
-  if (!status.IsOk()) {
-    return TRITONSERVER_ErrorNew(
-        StatusCodeToTritonCode(status.StatusCode()), status.Message().c_str());
-  }
-
-  return nullptr;  // success
-}
-
-///
-/// TRITONBACKEND_Input
-///
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_InputProperties(
-    TRITONBACKEND_Input* input, const char** name,
-    TRITONSERVER_DataType* datatype, const int64_t** shape,
-    uint32_t* dims_count, uint64_t* byte_size, uint32_t* buffer_count)
-{
-  InferenceRequest::Input* ti =
-      reinterpret_cast<InferenceRequest::Input*>(input);
-  if (name != nullptr) {
-    *name = ti->Name().c_str();
-  }
-  if (datatype != nullptr) {
-    *datatype = DataTypeToTriton(ti->DType());
-  }
-  if (shape != nullptr) {
-    *shape = ti->ShapeWithBatchDim().data();
-  }
-  if (dims_count != nullptr) {
-    *dims_count = ti->ShapeWithBatchDim().size();
-  }
-  if (byte_size != nullptr) {
-    *byte_size = ti->Data()->TotalByteSize();
-  }
-  if (buffer_count != nullptr) {
-    *buffer_count = ti->DataBufferCount();
-  }
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_InputPropertiesForHostPolicy(
-    TRITONBACKEND_Input* input, const char* host_policy_name, const char** name,
-    TRITONSERVER_DataType* datatype, const int64_t** shape,
-    uint32_t* dims_count, uint64_t* byte_size, uint32_t* buffer_count)
-{
-  InferenceRequest::Input* ti =
-      reinterpret_cast<InferenceRequest::Input*>(input);
-  if (name != nullptr) {
-    *name = ti->Name().c_str();
-  }
-  if (datatype != nullptr) {
-    *datatype = DataTypeToTriton(ti->DType());
-  }
-  if (shape != nullptr) {
-    *shape = ti->ShapeWithBatchDim().data();
-  }
-  if (dims_count != nullptr) {
-    *dims_count = ti->ShapeWithBatchDim().size();
-  }
-  if (host_policy_name != nullptr) {
-    if (byte_size != nullptr) {
-      *byte_size = ti->Data(host_policy_name)->TotalByteSize();
-    }
-    if (buffer_count != nullptr) {
-      *buffer_count = ti->DataBufferCountForHostPolicy(host_policy_name);
-    }
-  } else {
-    if (byte_size != nullptr) {
-      *byte_size = ti->Data()->TotalByteSize();
-    }
-    if (buffer_count != nullptr) {
-      *buffer_count = ti->DataBufferCount();
-    }
-  }
-  return nullptr;  // success
-}
-
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_InputBuffer(
-    TRITONBACKEND_Input* input, const uint32_t index, const void** buffer,
-    uint64_t* buffer_byte_size, TRITONSERVER_MemoryType* memory_type,
-    int64_t* memory_type_id)
-{
-  InferenceRequest::Input* ti =
-      reinterpret_cast<InferenceRequest::Input*>(input);
-  Status status = ti->DataBuffer(
-      index, buffer, buffer_byte_size, memory_type, memory_type_id);
-  if (!status.IsOk()) {
-    *buffer = nullptr;
-    *buffer_byte_size = 0;
-    return TRITONSERVER_ErrorNew(
-        StatusCodeToTritonCode(status.StatusCode()), status.Message().c_str());
-  }
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_InputBufferAttributes(
-    TRITONBACKEND_Input* input, const uint32_t index, const void** buffer,
-    TRITONSERVER_BufferAttributes** buffer_attributes)
-{
-  InferenceRequest::Input* ti =
-      reinterpret_cast<InferenceRequest::Input*>(input);
-  Status status = ti->DataBufferAttributes(
-      index, buffer, reinterpret_cast<BufferAttributes**>(buffer_attributes));
-  if (!status.IsOk()) {
-    *buffer = nullptr;
-    *buffer_attributes = nullptr;
-    return TRITONSERVER_ErrorNew(
-        StatusCodeToTritonCode(status.StatusCode()), status.Message().c_str());
-  }
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_InputBufferForHostPolicy(
-    TRITONBACKEND_Input* input, const char* host_policy_name,
-    const uint32_t index, const void** buffer, uint64_t* buffer_byte_size,
-    TRITONSERVER_MemoryType* memory_type, int64_t* memory_type_id)
-{
-  InferenceRequest::Input* ti =
-      reinterpret_cast<InferenceRequest::Input*>(input);
-
-  Status status =
-      (host_policy_name == nullptr)
-          ? ti->DataBuffer(
-                index, buffer, buffer_byte_size, memory_type, memory_type_id)
-          : ti->DataBufferForHostPolicy(
-                index, buffer, buffer_byte_size, memory_type, memory_type_id,
-                host_policy_name);
-  if (!status.IsOk()) {
-    *buffer = nullptr;
-    *buffer_byte_size = 0;
-    return TRITONSERVER_ErrorNew(
-        StatusCodeToTritonCode(status.StatusCode()), status.Message().c_str());
-  }
-  return nullptr;  // success
-}
-
-///
-/// TRITONBACKEND_Output
-///
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_OutputBuffer(
-    TRITONBACKEND_Output* output, void** buffer,
-    const uint64_t buffer_byte_size, TRITONSERVER_MemoryType* memory_type,
-    int64_t* memory_type_id)
-{
-  InferenceResponse::Output* to =
-      reinterpret_cast<InferenceResponse::Output*>(output);
-  Status status = to->AllocateDataBuffer(
-      buffer, buffer_byte_size, memory_type, memory_type_id);
-  if (!status.IsOk()) {
-    *buffer = nullptr;
-    return TRITONSERVER_ErrorNew(
-        StatusCodeToTritonCode(status.StatusCode()), status.Message().c_str());
-  }
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_OutputBufferAttributes(
-    TRITONBACKEND_Output* output,
-    TRITONSERVER_BufferAttributes** buffer_attributes)
-{
-  InferenceResponse::Output* to =
-      reinterpret_cast<InferenceResponse::Output*>(output);
-
-  *buffer_attributes = reinterpret_cast<TRITONSERVER_BufferAttributes*>(
-      to->GetBufferAttributes());
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_BackendAttributeAddPreferredInstanceGroup(
-    TRITONBACKEND_BackendAttribute* backend_attributes,
-    const TRITONSERVER_InstanceGroupKind kind, const uint64_t count,
-    const uint64_t* device_ids, const uint64_t id_count)
-{
-  auto ba = reinterpret_cast<TritonBackend::Attribute*>(backend_attributes);
-  ba->preferred_groups_.emplace_back();
-  auto& pg = ba->preferred_groups_.back();
-  switch (kind) {
-    case TRITONSERVER_INSTANCEGROUPKIND_AUTO:
-      pg.set_kind(inference::ModelInstanceGroup::KIND_AUTO);
-      break;
-    case TRITONSERVER_INSTANCEGROUPKIND_CPU:
-      pg.set_kind(inference::ModelInstanceGroup::KIND_CPU);
-      break;
-    case TRITONSERVER_INSTANCEGROUPKIND_GPU:
-      pg.set_kind(inference::ModelInstanceGroup::KIND_GPU);
-      break;
-    case TRITONSERVER_INSTANCEGROUPKIND_MODEL:
-      pg.set_kind(inference::ModelInstanceGroup::KIND_MODEL);
-      break;
-  }
-  pg.set_count(count);
-  if (device_ids != nullptr) {
-    for (size_t i = 0; i < id_count; ++i) {
-      pg.add_gpus(device_ids[i]);
-    }
-  }
-  return nullptr;
-}
-
-}  // extern C
-
-}}  // namespace triton::core
diff --git a/3rdparty/core-r22.12/src/backend_model.h b/3rdparty/core-r22.12/src/backend_model.h
deleted file mode 100644
index 4e3941eb278bb826837976fb2f34d415ea85b47b..0000000000000000000000000000000000000000
--- a/3rdparty/core-r22.12/src/backend_model.h
+++ /dev/null
@@ -1,133 +0,0 @@
-// Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include <memory>
-#include <string>
-#include "backend_manager.h"
-#include "filesystem.h"
-#include "infer_request.h"
-#include "model.h"
-#include "model_config.pb.h"
-#include "status.h"
-
-namespace triton { namespace core {
-
-class InferenceServer;
-class TritonModelInstance;
-
-//
-// Represents a model.
-//
-// Inheriting from Model to implement backend APIs
-//
-class TritonModel : public Model {
- public:
-  static Status Create(
-      InferenceServer* server, const std::string& model_path,
-      const triton::common::BackendCmdlineConfigMap& backend_cmdline_config_map,
-      const triton::common::HostPolicyCmdlineConfigMap& host_policy_map,
-      const std::string& model_name, const int64_t version,
-      inference::ModelConfig model_config, const bool is_config_provided,
-      std::unique_ptr<TritonModel>* model);
-  ~TritonModel();
-
-  const std::string& LocalizedModelPath() const
-  {
-    return localized_model_dir_->Path();
-  }
-  InferenceServer* Server() { return server_; }
-  bool AutoCompleteConfig() const { return auto_complete_config_; }
-  Status UpdateModelConfig(
-      const uint32_t config_version,
-      TRITONSERVER_Message* updated_config_message);
-  const std::shared_ptr<TritonBackend>& Backend() const { return backend_; }
-  const std::vector<std::unique_ptr<TritonModelInstance>>& Instances() const
-  {
-    return instances_;
-  }
-  void* State() { return state_; }
-  void SetState(void* state) { state_ = state; }
-  Status AddInstance(
-      std::unique_ptr<TritonModelInstance>&& instance, const bool passive);
-
- private:
-  DISALLOW_COPY_AND_ASSIGN(TritonModel);
-
-  TritonModel(
-      InferenceServer* server,
-      const std::shared_ptr<LocalizedPath>& localized_model_dir,
-      const std::shared_ptr<TritonBackend>& backend,
-      const double min_compute_capability, const int64_t version,
-      const inference::ModelConfig& config, const bool auto_complete_config);
-
-  // Set the scheduler based on the model configuration. The scheduler
-  // can only be set once for a backend.
-  Status SetConfiguredScheduler();
-
-  // Merges the global backend configs with the specific
-  // backend configs.
-  static Status ResolveBackendConfigs(
-      const triton::common::BackendCmdlineConfigMap& backend_cmdline_config_map,
-      const std::string& backend_name,
-      triton::common::BackendCmdlineConfig& config);
-
-  // Sets defaults for some backend configurations when none are specified on
-  // the command line.
-  static Status SetBackendConfigDefaults(
-      triton::common::BackendCmdlineConfig& config);
-
-  Status Initialize();
-  Status WarmUp();
-
-  // The server object that owns this model. The model holds this as a
-  // raw pointer because the lifetime of the server is guaranteed to
-  // be longer than the lifetime of a model owned by the server.
-  InferenceServer* server_;
-
-  // The minimum supported compute capability on device.
-  const double min_compute_capability_;
-
-  // Whether the backend should attempt to auto-complete the model config.
-  const bool auto_complete_config_;
-
-  // The localized repo directory holding the model. If localization
-  // required creation of a temporary local copy then that copy will
-  // persist as along as this object is retained by this model.
-  std::shared_ptr<LocalizedPath> localized_model_dir_;
-
-  // Backend used by this model.
-  std::shared_ptr<TritonBackend> backend_;
-
-  // The model instances for this model.
-  std::vector<std::unique_ptr<TritonModelInstance>> instances_;
-  std::vector<std::unique_ptr<TritonModelInstance>> passive_instances_;
-
-  // Opaque state associated with this model.
-  void* state_;
-};
-
-}}  // namespace triton::core
diff --git a/3rdparty/core-r22.12/src/backend_model_instance.cc b/3rdparty/core-r22.12/src/backend_model_instance.cc
deleted file mode 100644
index d91452eccdd4dcfedf3124fd06d87207e836fe06..0000000000000000000000000000000000000000
--- a/3rdparty/core-r22.12/src/backend_model_instance.cc
+++ /dev/null
@@ -1,966 +0,0 @@
-// Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "backend_model_instance.h"
-
-#ifndef _WIN32
-#include <sys/resource.h>
-#include <sys/syscall.h>
-#include <unistd.h>
-#endif
-#include "backend_config.h"
-#include "backend_model.h"
-#include "cuda_utils.h"
-#include "metrics.h"
-#include "model_config.pb.h"
-#include "numa_utils.h"
-#include "server.h"
-#include "shared_library.h"
-#include "triton/common/logging.h"
-#include "triton/common/nvtx.h"
-#include "tritonserver_apis.h"
-
-// For unknown reason, windows will not export the TRITONBACKEND_*
-// functions declared with dllexport in tritonbackend.h. To get those
-// functions exported it is (also?) necessary to mark the definitions
-// in this file with dllexport as well.
-#if defined(_MSC_VER)
-#define TRITONAPI_DECLSPEC __declspec(dllexport)
-#elif defined(__GNUC__)
-#define TRITONAPI_DECLSPEC __attribute__((__visibility__("default")))
-#else
-#define TRITONAPI_DECLSPEC
-#endif
-
-namespace triton { namespace core {
-
-namespace {
-// Utilities for warmup feature
-TRITONSERVER_Error*
-WarmupResponseAlloc(
-    TRITONSERVER_ResponseAllocator* allocator, const char* tensor_name,
-    size_t byte_size, TRITONSERVER_MemoryType preferred_memory_type,
-    int64_t preferred_memory_type_id, void* userp, void** buffer,
-    void** buffer_userp, TRITONSERVER_MemoryType* actual_memory_type,
-    int64_t* actual_memory_type_id)
-{
-  *buffer = malloc(byte_size);
-  if (*buffer != nullptr) {
-    *actual_memory_type = TRITONSERVER_MEMORY_CPU;
-    *actual_memory_type_id = 0;
-    return nullptr;
-  }
-
-  return TRITONSERVER_ErrorNew(
-      TRITONSERVER_ERROR_INTERNAL,
-      "failed to allocate output buffer for warmup.");
-}
-
-TRITONSERVER_Error*
-WarmupResponseRelease(
-    TRITONSERVER_ResponseAllocator* allocator, void* buffer, void* buffer_userp,
-    size_t byte_size, TRITONSERVER_MemoryType memory_type,
-    int64_t memory_type_id)
-{
-  free(buffer);
-  return nullptr;
-}
-
-ResponseAllocator warmup_allocator = ResponseAllocator(
-    WarmupResponseAlloc, WarmupResponseRelease, nullptr /* start_fn */);
-
-void
-WarmupResponseComplete(
-    TRITONSERVER_InferenceResponse* iresponse, const uint32_t flags,
-    void* userp)
-{
-  auto res_pair = reinterpret_cast<
-      std::pair<std::promise<void>, std::vector<std::string>*>*>(userp);
-  if (iresponse != nullptr) {
-    auto err = TRITONSERVER_InferenceResponseError(iresponse);
-    if (err != nullptr) {
-      // The error vector is shared by all requests in the batch for now
-      static std::mutex res_mtx;
-      {
-        std::lock_guard<std::mutex> lk(res_mtx);
-        res_pair->second->emplace_back(TRITONSERVER_ErrorMessage(err));
-      }
-      TRITONSERVER_ErrorDelete(err);
-    }
-    // Just delete the response, warmup doesn't check for correctness
-    LOG_TRITONSERVER_ERROR(
-        TRITONSERVER_InferenceResponseDelete(iresponse),
-        "deleting warmup response");
-  }
-  // Last response
-  if ((flags & TRITONSERVER_RESPONSE_COMPLETE_FINAL) != 0) {
-    res_pair->first.set_value();
-  }
-}
-
-void
-WarmupRequestComplete(
-    TRITONSERVER_InferenceRequest* request, const uint32_t flags, void* userp)
-{
-  if ((flags & TRITONSERVER_REQUEST_RELEASE_ALL) != 0) {
-    // Don't need to release request here, it is managed in WarmupData
-    if (userp != nullptr) {
-      auto warmup_promise = reinterpret_cast<std::promise<void>*>(userp);
-      warmup_promise->set_value();
-    }
-  }
-}
-
-}  // namespace
-
-TritonModelInstance::TritonModelInstance(
-    TritonModel* model, const std::string& name, const size_t index,
-    const TRITONSERVER_InstanceGroupKind kind, const int32_t device_id,
-    const std::vector<std::string>& profile_names, const bool passive,
-    const triton::common::HostPolicyCmdlineConfig& host_policy,
-    const TritonServerMessage& host_policy_message,
-    const std::vector<SecondaryDevice>& secondary_devices)
-    : model_(model), name_(name), index_(index), kind_(kind),
-      device_id_(device_id), host_policy_(host_policy),
-      host_policy_message_(host_policy_message), profile_names_(profile_names),
-      passive_(passive), secondary_devices_(secondary_devices), state_(nullptr)
-{
-#ifdef TRITON_ENABLE_METRICS
-  if (Metrics::Enabled()) {
-    // Use an ID in the metric only for GPU instances. Otherwise use
-    // METRIC_REPORTER_ID_CPU to indicate no device should be reported in the
-    // metric.
-    const int id = (kind_ == TRITONSERVER_INSTANCEGROUPKIND_GPU)
-                       ? device_id_
-                       : METRIC_REPORTER_ID_CPU;
-    MetricModelReporter::Create(
-        model_->Name(), model_->Version(), id, model_->Config().metric_tags(),
-        &reporter_);
-  }
-#endif  // TRITON_ENABLE_METRICS
-}
-
-TritonModelInstance::~TritonModelInstance()
-{
-  if (triton_backend_thread_.get() != nullptr) {
-    triton_backend_thread_->StopBackendThread();
-  }
-
-  // Model finalization is optional...
-  if (model_->Backend()->ModelInstanceFiniFn() != nullptr) {
-    LOG_TRITONSERVER_ERROR(
-        model_->Backend()->ModelInstanceFiniFn()(
-            reinterpret_cast<TRITONBACKEND_ModelInstance*>(this)),
-        "failed finalizing model instance");
-  }
-}
-
-Status
-TritonModelInstance::CreateInstances(
-    TritonModel* model,
-    const triton::common::BackendCmdlineConfigMap& backend_cmdline_config_map,
-    const triton::common::HostPolicyCmdlineConfigMap& host_policy_map,
-    const inference::ModelConfig& model_config, const bool device_blocking)
-{
-  static triton::common::HostPolicyCmdlineConfig empty_host_policy;
-
-  // This structure is used to allocate TritonBackendThread to instances on same
-  // device for device blocking execution policy.
-  std::map<uint32_t, std::shared_ptr<TritonBackendThread>> device_to_thread_map;
-
-  for (const auto& group : model_config.instance_group()) {
-    std::vector<std::string> profile_names;
-    for (const auto& profile_name : group.profile()) {
-      profile_names.push_back(profile_name);
-    }
-    std::vector<SecondaryDevice> secondary_devices;
-    for (const auto& secondary_device : group.secondary_devices()) {
-      secondary_devices.emplace_back(
-          inference::
-              ModelInstanceGroup_SecondaryDevice_SecondaryDeviceKind_Name(
-                  secondary_device.kind()),
-          secondary_device.device_id());
-    }
-    for (int32_t c = 0; c < group.count(); ++c) {
-      std::string instance_name{group.count() > 1
-                                    ? group.name() + "_" + std::to_string(c)
-                                    : group.name()};
-      const bool passive = group.passive();
-      std::vector<std::tuple<
-          std::string, TRITONSERVER_InstanceGroupKind, int32_t,
-          const inference::ModelRateLimiter*>>
-          instance_setting;
-      if (group.kind() == inference::ModelInstanceGroup::KIND_CPU) {
-        instance_setting.emplace_back(
-            group.host_policy().empty() ? "cpu" : group.host_policy(),
-            TRITONSERVER_INSTANCEGROUPKIND_CPU, 0 /* device_id */,
-            &group.rate_limiter());
-      } else if (group.kind() == inference::ModelInstanceGroup::KIND_GPU) {
-        for (const int32_t device_id : group.gpus()) {
-          instance_setting.emplace_back(
-              group.host_policy().empty() ? ("gpu_" + std::to_string(device_id))
-                                          : group.host_policy(),
-              TRITONSERVER_INSTANCEGROUPKIND_GPU, device_id,
-              &group.rate_limiter());
-        }
-      } else if (group.kind() == inference::ModelInstanceGroup::KIND_MODEL) {
-        instance_setting.emplace_back(
-            group.host_policy().empty() ? "model" : group.host_policy(),
-            TRITONSERVER_INSTANCEGROUPKIND_MODEL, 0 /* device_id */,
-            &group.rate_limiter());
-      } else {
-        return Status(
-            Status::Code::INVALID_ARG,
-            std::string("instance_group kind ") +
-                ModelInstanceGroup_Kind_Name(group.kind()) + " not supported");
-      }
-      for (const auto is : instance_setting) {
-        const auto& kind = std::get<1>(is);
-        const auto& id = std::get<2>(is);
-
-        const std::string& policy_name = std::get<0>(is);
-        const triton::common::HostPolicyCmdlineConfig* host_policy;
-        const auto policy_it = host_policy_map.find(policy_name);
-        if (policy_it != host_policy_map.end()) {
-          host_policy = &policy_it->second;
-        } else {
-          host_policy = &empty_host_policy;
-        }
-        RETURN_IF_ERROR(SetNumaConfigOnThread(*host_policy));
-        auto err = CreateInstance(
-            model, instance_name, c, kind, id, profile_names, passive,
-            policy_name, *host_policy, *(std::get<3>(is)), device_blocking,
-            &device_to_thread_map, secondary_devices);
-        RETURN_IF_ERROR(ResetNumaMemoryPolicy());
-        RETURN_IF_ERROR(err);
-
-        // When deploying on GPU, we want to make sure the GPU memory usage
-        // is within allowed range, otherwise, stop the creation to ensure
-        // there is sufficient GPU memory for other use.
-        // We check the usage after loading the instance to better enforcing
-        // the limit. If we check before loading, we may create instance
-        // that occupies the rest of available memory which against the purpose
-        if (kind == TRITONSERVER_INSTANCEGROUPKIND_GPU) {
-          size_t free, total;
-          double memory_limit;
-          RETURN_IF_ERROR(GetDeviceMemoryInfo(id, &free, &total));
-          RETURN_IF_ERROR(BackendConfigurationModelLoadGpuFraction(
-              backend_cmdline_config_map, id, &memory_limit));
-          const size_t allow = total * memory_limit;
-          const size_t used = total - free;
-          if (used > allow) {
-            return Status(
-                Status::Code::UNAVAILABLE,
-                std::string("can not create model '") + instance_name +
-                    "': memory limit set for " +
-                    TRITONSERVER_InstanceGroupKindString(kind) + " " +
-                    std::to_string(id) +
-                    " has exceeded, model loading is rejected.");
-          }
-        }
-      }
-    }
-  }
-
-  return Status::Success;
-}
-
-Status
-TritonModelInstance::CreateInstance(
-    TritonModel* model, const std::string& name, const size_t index,
-    const TRITONSERVER_InstanceGroupKind kind, const int32_t device_id,
-    const std::vector<std::string>& profile_names, const bool passive,
-    const std::string& host_policy_name,
-    const triton::common::HostPolicyCmdlineConfig& host_policy,
-    const inference::ModelRateLimiter& rate_limiter_config,
-    const bool device_blocking,
-    std::map<uint32_t, std::shared_ptr<TritonBackendThread>>*
-        device_to_thread_map,
-    const std::vector<SecondaryDevice>& secondary_devices)
-{
-  // Create the JSON representation of the backend configuration.
-  triton::common::TritonJson::Value host_policy_json(
-      triton::common::TritonJson::ValueType::OBJECT);
-  triton::common::TritonJson::Value policy_setting_json(
-      host_policy_json, triton::common::TritonJson::ValueType::OBJECT);
-  for (const auto& pr : host_policy) {
-    RETURN_IF_ERROR(policy_setting_json.AddString(pr.first.c_str(), pr.second));
-  }
-
-  RETURN_IF_ERROR(host_policy_json.Add(
-      host_policy_name.c_str(), std::move(policy_setting_json)));
-  TritonServerMessage host_policy_message(host_policy_json);
-
-  std::unique_ptr<TritonModelInstance> local_instance(new TritonModelInstance(
-      model, name, index, kind, device_id, profile_names, passive, host_policy,
-      host_policy_message, secondary_devices));
-
-  TRITONBACKEND_ModelInstance* triton_instance =
-      reinterpret_cast<TRITONBACKEND_ModelInstance*>(local_instance.get());
-
-  // Instance initialization is optional... We must set set shared
-  // library path to point to the backend directory in case the
-  // backend library attempts to load additional shared libaries.
-  if (model->Backend()->ModelInstanceInitFn() != nullptr) {
-    std::unique_ptr<SharedLibrary> slib;
-    RETURN_IF_ERROR(SharedLibrary::Acquire(&slib));
-    RETURN_IF_ERROR(slib->SetLibraryDirectory(model->Backend()->Directory()));
-
-    TRITONSERVER_Error* err =
-        model->Backend()->ModelInstanceInitFn()(triton_instance);
-
-    RETURN_IF_ERROR(slib->ResetLibraryDirectory());
-    RETURN_IF_TRITONSERVER_ERROR(err);
-  }
-
-  if (!passive) {
-    RETURN_IF_ERROR(local_instance->GenerateWarmupData());
-    RETURN_IF_ERROR(model->Server()->GetRateLimiter()->RegisterModelInstance(
-        local_instance.get(), rate_limiter_config));
-    RETURN_IF_ERROR(local_instance->SetBackendThread(
-        kind, device_id, device_blocking, device_to_thread_map));
-  }
-
-  RETURN_IF_ERROR(model->AddInstance(std::move(local_instance), passive));
-
-  return Status::Success;
-}
-
-Status
-TritonModelInstance::SetBackendThread(
-    const TRITONSERVER_InstanceGroupKind kind, const int32_t device_id,
-    const bool device_blocking,
-    std::map<uint32_t, std::shared_ptr<TritonBackendThread>>*
-        device_to_thread_map)
-{
-  if (device_blocking && (kind == TRITONSERVER_INSTANCEGROUPKIND_GPU)) {
-    auto thread_it = device_to_thread_map->find(device_id);
-    if (thread_it != device_to_thread_map->end()) {
-      LOG_VERBOSE(1) << "Using already started backend thread for " << Name()
-                     << " on device " << device_id;
-      triton_backend_thread_ = thread_it->second;
-    }
-  }
-  if (triton_backend_thread_.get() == nullptr) {
-    std::unique_ptr<TritonBackendThread> local_backend_thread;
-    RETURN_IF_ERROR(TritonBackendThread::CreateBackendThread(
-        Name(), this, 0 /* nice */, device_id, &local_backend_thread));
-    triton_backend_thread_ = std::move(local_backend_thread);
-    device_to_thread_map->insert({device_id, triton_backend_thread_});
-  } else {
-    triton_backend_thread_->AddModelInstance(this);
-  }
-  RETURN_IF_ERROR(triton_backend_thread_->InitAndWarmUpModelInstance(this));
-
-  return Status::Success;
-}
-
-Status
-TritonModelInstance::GenerateWarmupData()
-{
-  warmup_samples_.clear();
-  for (const auto& warmup_setting : model_->Config().model_warmup()) {
-    if (warmup_setting.batch_size() == 0) {
-      LOG_VERBOSE(1) << "Skipping batch 0 warmup sample '"
-                     << warmup_setting.name() << "'";
-      continue;
-    }
-    LOG_VERBOSE(1) << "Generating warmup sample data for '"
-                   << warmup_setting.name() << "'";
-
-    // Two passes. First pass to get max byte size for synthetic
-    // data. Second pass to add original inputs and override inputs
-    // for control inputs.
-    int64_t max_zero_byte_size = 0;
-    int64_t max_random_byte_size = 0;
-    for (const auto& input_meta : warmup_setting.inputs()) {
-      auto element_count =
-          triton::common::GetElementCount(input_meta.second.dims());
-      if (element_count == -1) {
-        return Status(
-            Status::Code::INVALID_ARG,
-            "warmup setting expects all variable-size dimensions are specified "
-            "for input '" +
-                input_meta.first + "'");
-      }
-
-      int64_t batch_byte_size =
-          element_count *
-          triton::common::GetDataTypeByteSize(input_meta.second.data_type());
-      if (batch_byte_size == 0) {
-        batch_byte_size = element_count * sizeof(int32_t);
-      }
-
-      switch (input_meta.second.input_data_type_case()) {
-        case inference::ModelWarmup_Input::InputDataTypeCase::kZeroData:
-          max_zero_byte_size = std::max(batch_byte_size, max_zero_byte_size);
-          break;
-        case inference::ModelWarmup_Input::InputDataTypeCase::kRandomData: {
-          // Because Triton expects STRING type to be in special format
-          // (prepend 4 bytes to specify string length), so using zero data
-          // for simplicity (4 bytes * element count of zeros).
-          if (input_meta.second.data_type() ==
-              inference::DataType::TYPE_STRING) {
-            max_zero_byte_size = std::max(batch_byte_size, max_zero_byte_size);
-          } else {
-            max_random_byte_size =
-                std::max(batch_byte_size, max_random_byte_size);
-          }
-          break;
-        }
-        default:
-          break;
-      }
-    }
-
-    warmup_samples_.emplace_back(warmup_setting.name(), warmup_setting.count());
-    auto& warmup_data = warmup_samples_.back();
-    // Create buffers for synthetic data
-    TRITONSERVER_MemoryType type;
-    int64_t type_id;
-    warmup_data.zero_data_.reset(new AllocatedMemory(
-        max_zero_byte_size, TRITONSERVER_MEMORY_CPU_PINNED /* memory_type */,
-        0 /* memory_type_id */));
-    char* zero_buffer = warmup_data.zero_data_->MutableBuffer(&type, &type_id);
-    memset(zero_buffer, 0, max_zero_byte_size);
-
-    warmup_data.random_data_.reset(new AllocatedMemory(
-        max_random_byte_size, TRITONSERVER_MEMORY_CPU_PINNED /* memory_type */,
-        0 /* memory_type_id */));
-    char* random_buffer =
-        warmup_data.random_data_->MutableBuffer(&type, &type_id);
-    for (int64_t offset = 0; offset < max_random_byte_size; offset++) {
-      random_buffer[offset] = rand();
-    }
-
-    // Prepare the inference request for the specified sample, not using
-    // in-process C API because the request doesn't go through the same pipeline
-    // (i.e. no normalization / scheduler) so we need to prepare the request to
-    // the state just before calling instance execute function.
-    for (size_t cnt = 0; cnt < warmup_setting.batch_size(); cnt++) {
-      warmup_data.requests_.emplace_back(
-          new InferenceRequest(model_, model_->Version()));
-      auto& lrequest = warmup_data.requests_.back();
-
-      // Second pass to prepare original inputs.
-      std::vector<std::shared_ptr<InferenceRequest::Input>> input_sps;
-      for (const auto& input_meta : warmup_setting.inputs()) {
-        auto batch1_element_count =
-            triton::common::GetElementCount(input_meta.second.dims());
-        auto batch_byte_size =
-            batch1_element_count *
-            triton::common::GetDataTypeByteSize(input_meta.second.data_type());
-        if (batch_byte_size == 0) {
-          batch_byte_size = batch1_element_count * sizeof(int32_t);
-        }
-
-        const char* allocated_ptr;
-        switch (input_meta.second.input_data_type_case()) {
-          case inference::ModelWarmup_Input::InputDataTypeCase::kZeroData:
-            allocated_ptr = zero_buffer;
-            break;
-          case inference::ModelWarmup_Input::InputDataTypeCase::kRandomData: {
-            if (input_meta.second.data_type() ==
-                inference::DataType::TYPE_STRING) {
-              allocated_ptr = zero_buffer;
-            } else {
-              allocated_ptr = random_buffer;
-            }
-            break;
-          }
-          case inference::ModelWarmup_Input::InputDataTypeCase::
-              kInputDataFile: {
-            // For data provided from file, we can set buffer in first pass
-            warmup_data.provided_data_.emplace_back(new std::string());
-            auto input_data = warmup_data.provided_data_.back().get();
-            RETURN_IF_ERROR(ReadTextFile(
-                JoinPath({model_->LocalizedModelPath(), kWarmupDataFolder,
-                          input_meta.second.input_data_file()}),
-                input_data));
-            if (input_meta.second.data_type() ==
-                inference::DataType::TYPE_STRING) {
-              batch_byte_size = input_data->size();
-            } else if (((size_t)batch_byte_size) > input_data->size()) {
-              return Status(
-                  Status::Code::INVALID_ARG,
-                  lrequest->LogRequest() + "warmup setting expects " +
-                      std::to_string(batch_byte_size) +
-                      " bytes, but the data "
-                      "provided from " +
-                      input_meta.second.input_data_file() + "only has " +
-                      std::to_string(input_data->size()) + " bytes");
-            }
-            allocated_ptr = input_data->data();
-            break;
-          }
-          default:
-            return Status(
-                Status::Code::INVALID_ARG,
-                lrequest->LogRequest() + "warmup setting expects input '" +
-                    input_meta.first + "' to have input_data_type set");
-        }
-
-        const inference::ModelInput* input_config;
-        bool is_original_input =
-            model_->GetInput(input_meta.first, &input_config).IsOk();
-        InferenceRequest::Input* input = nullptr;
-        std::vector<int64_t> input_meta_shape;
-        // Append batch size only if the model supports batching
-        // and not control inpt.
-        if ((model_->Config().max_batch_size() != 0) && is_original_input) {
-          input_meta_shape.push_back(1);
-        }
-        for (auto d : input_meta.second.dims()) {
-          input_meta_shape.push_back(d);
-        }
-        if (is_original_input) {
-          RETURN_IF_ERROR(lrequest->AddOriginalInput(
-              input_meta.first, input_meta.second.data_type(), input_meta_shape,
-              &input));
-        } else {
-          input_sps.emplace_back();
-          RETURN_IF_ERROR(lrequest->AddOverrideInput(
-              input_meta.first, input_meta.second.data_type(),
-              (model_->Config().max_batch_size() != 0 ? 1 : 0),
-              input_meta_shape, &input_sps.back()));
-          input = input_sps.back().get();
-        }
-        RETURN_IF_ERROR(input->AppendData(
-            allocated_ptr, batch_byte_size,
-            TRITONSERVER_MEMORY_CPU /* memory_type */, 0 /* memory_type_id */));
-      }
-
-      RETURN_IF_ERROR(lrequest->PrepareForInference());
-      // Override inputs must be added after PrepareForInference() is called
-      for (const auto& sp : input_sps) {
-        RETURN_IF_ERROR(lrequest->AddOverrideInput(sp));
-      }
-    }
-  }
-
-  return Status::Success;
-}
-
-void
-TritonModelInstance::Schedule(
-    std::vector<std::unique_ptr<InferenceRequest>>&& requests,
-    const std::function<void()>& OnCompletion)
-{
-  // Use a thread local vector to avoid needing to malloc each
-  // time an inference is run.
-  thread_local std::vector<TRITONBACKEND_Request*> triton_requests(1024);
-  triton_requests.clear();
-  for (auto& r : requests) {
-    // Load the input states for the inference request.
-    r->LoadInputStates();
-    triton_requests.push_back(
-        reinterpret_cast<TRITONBACKEND_Request*>(r.release()));
-  }
-
-  Execute(triton_requests);
-
-  OnCompletion();
-}
-
-Status
-TritonModelInstance::Initialize()
-{
-  RETURN_IF_ERROR(SetNumaConfigOnThread(HostPolicy()));
-  return Status::Success;
-}
-
-Status
-TritonModelInstance::WarmUp()
-{
-  // move samples to local variable for scoped cleanup
-  std::vector<triton::core::TritonModelInstance::WarmupData> lwarmup_samples;
-  lwarmup_samples.swap(warmup_samples_);
-
-  for (auto& sample : lwarmup_samples) {
-    for (size_t iteration = 1; iteration <= sample.count_; ++iteration) {
-      LOG_VERBOSE(1) << "model '" << sample.requests_.back()->ModelName()
-                     << "' instance " << Name() << " is running warmup sample '"
-                     << sample.sample_name_ << "' for iteration " << iteration;
-
-      // request/response complete is asynchronous so use promise to wait for
-      // completion. Also collects error message from the responses in a vector.
-      std::vector<std::promise<void>> request_complete(sample.requests_.size());
-      std::vector<std::string> response_errors;
-      std::vector<std::pair<std::promise<void>, std::vector<std::string>*>>
-          response_complete(sample.requests_.size());
-
-      std::vector<TRITONBACKEND_Request*> triton_requests;
-      for (size_t i = 0; i < sample.requests_.size(); ++i) {
-        auto& request = sample.requests_[i];
-        request->SetReleaseCallback(
-            WarmupRequestComplete, &request_complete[i]);
-        response_complete[i].second = &response_errors;
-        request->SetResponseCallback(
-            &warmup_allocator, nullptr, WarmupResponseComplete,
-            &response_complete[i]);
-        // Capture timestamp before run to avoid incorrect accumulation from
-        // sequential warmup runs
-#ifdef TRITON_ENABLE_STATS
-        request->CaptureRequestStartNs();
-#endif  // TRITON_ENABLE_STATS
-        request->CaptureQueueStartNs();
-        triton_requests.push_back(
-            reinterpret_cast<TRITONBACKEND_Request*>(request.get()));
-      }
-
-      Execute(triton_requests);
-
-      // Wait for warmup sample to complete and check error
-      for (size_t i = 0; i < sample.requests_.size(); ++i) {
-        request_complete[i].get_future().get();
-        response_complete[i].first.get_future().get();
-      }
-      if (response_errors.size() != 0) {
-        std::string err_str =
-            "failed to run warmup sample '" + sample.sample_name_ + "': ";
-        for (const auto& error : response_errors) {
-          err_str += (error + "; ");
-        }
-        // End warmup as soon as there is failing sample
-        LOG_VERBOSE(1) << "model '" << sample.requests_.back()->ModelName()
-                       << "' instance " << Name()
-                       << " failed to run warmup sample '"
-                       << sample.sample_name_ << "'";
-        return Status(Status::Code::INVALID_ARG, err_str);
-      }
-    }
-  }
-
-  return Status::Success;
-}
-
-void
-TritonModelInstance::Execute(
-    std::vector<TRITONBACKEND_Request*>& triton_requests)
-{
-  TRITONBACKEND_ModelInstance* triton_model_instance =
-      reinterpret_cast<TRITONBACKEND_ModelInstance*>(this);
-  TritonBackend::TritonModelInstanceExecFn_t inst_exec_fn =
-      model_->Backend()->ModelInstanceExecFn();
-
-  // If there is an error then we retain ownership of 'requests'
-  // and must send error responses.
-  TRITONSERVER_Error* err = inst_exec_fn(
-      triton_model_instance, &triton_requests[0], triton_requests.size());
-  if (err != nullptr) {
-    Status status = Status(
-        TritonCodeToStatusCode(TRITONSERVER_ErrorCode(err)),
-        TRITONSERVER_ErrorMessage(err));
-    for (TRITONBACKEND_Request* tr : triton_requests) {
-      std::unique_ptr<InferenceRequest> ur(
-          reinterpret_cast<InferenceRequest*>(tr));
-      InferenceRequest::RespondIfError(ur, status, true /* release_requests */);
-    }
-
-    TRITONSERVER_ErrorDelete(err);
-  }
-}
-
-Status
-TritonModelInstance::TritonBackendThread::CreateBackendThread(
-    const std::string name, TritonModelInstance* model_instance, const int nice,
-    const int32_t device_id,
-    std::unique_ptr<TritonBackendThread>* triton_backend_thread)
-{
-  TritonBackendThread* raw_triton_backend_thread =
-      new TritonBackendThread(name, model_instance->Model());
-  std::unique_ptr<TritonBackendThread> runner(raw_triton_backend_thread);
-
-  runner->AddModelInstance(model_instance);
-  runner->backend_thread_ =
-      std::thread([raw_triton_backend_thread, nice, device_id]() {
-        raw_triton_backend_thread->BackendThread(nice, device_id);
-      });
-
-  triton_backend_thread->reset(runner.release());
-
-  return Status::Success;
-}
-
-void
-TritonModelInstance::TritonBackendThread::AddModelInstance(
-    TritonModelInstance* model_instance)
-{
-  model_instances_.push_back(model_instance);
-}
-
-Status
-TritonModelInstance::TritonBackendThread::InitAndWarmUpModelInstance(
-    TritonModelInstance* model_instance)
-{
-  // Initialize the instance on the backend thread
-  auto init_payload = model_->Server()->GetRateLimiter()->GetPayload(
-      Payload::Operation::INIT, model_instance);
-  RETURN_IF_ERROR(
-      model_->Server()->GetRateLimiter()->EnqueuePayload(model_, init_payload));
-  RETURN_IF_ERROR(init_payload->Wait());
-
-  // Warm-up the instance on the backend thread
-  auto warmup_payload = model_->Server()->GetRateLimiter()->GetPayload(
-      Payload::Operation::WARM_UP, model_instance);
-  RETURN_IF_ERROR(model_->Server()->GetRateLimiter()->EnqueuePayload(
-      model_, warmup_payload));
-  RETURN_IF_ERROR(warmup_payload->Wait());
-
-  return Status::Success;
-}
-
-TritonModelInstance::TritonBackendThread::TritonBackendThread(
-    const std::string& name, TritonModel* model)
-    : name_(name), model_(model)
-{
-}
-
-TritonModelInstance::TritonBackendThread::~TritonBackendThread()
-{
-  StopBackendThread();
-}
-
-void
-TritonModelInstance::TritonBackendThread::StopBackendThread()
-{
-  if (backend_thread_.joinable()) {
-    // Signal the backend thread to exit and then wait for it...
-    auto exit_payload = model_->Server()->GetRateLimiter()->GetPayload(
-        Payload::Operation::EXIT, model_instances_.back());
-    model_->Server()->GetRateLimiter()->EnqueuePayload(model_, exit_payload);
-    backend_thread_.join();
-  }
-}
-
-void
-TritonModelInstance::TritonBackendThread::BackendThread(
-    const int nice, const int32_t device_id)
-{
-#ifndef _WIN32
-  if (setpriority(PRIO_PROCESS, syscall(SYS_gettid), nice) == 0) {
-    LOG_VERBOSE(1) << "Starting backend thread for " << name_ << " at nice "
-                   << nice << " on device " << device_id << "...";
-  } else {
-    LOG_VERBOSE(1) << "Starting backend thread for " << name_
-                   << " at default nice (requested nice " << nice << " failed)"
-                   << " on device " << device_id << "...";
-  }
-#else
-  LOG_VERBOSE(1) << "Starting backend thread for " << name_
-                 << " at default nice on device " << device_id << "...";
-#endif
-
-  bool should_exit = false;
-  while (!should_exit) {
-    std::shared_ptr<Payload> payload;
-    model_->Server()->GetRateLimiter()->DequeuePayload(
-        model_instances_, &payload);
-    NVTX_RANGE(nvtx_, "BackendThread " + name_);
-    payload->Execute(&should_exit);
-    model_instances_.push_back(payload->GetInstance());
-    // Release the payload to the RateLimiter
-    model_->Server()->GetRateLimiter()->PayloadRelease(payload);
-  }
-  LOG_VERBOSE(1) << "Stopping backend thread for " << name_ << "...";
-}
-
-extern "C" {
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_ModelInstanceName(
-    TRITONBACKEND_ModelInstance* instance, const char** name)
-{
-  TritonModelInstance* ti = reinterpret_cast<TritonModelInstance*>(instance);
-  *name = ti->Name().c_str();
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_ModelInstanceKind(
-    TRITONBACKEND_ModelInstance* instance, TRITONSERVER_InstanceGroupKind* kind)
-{
-  TritonModelInstance* ti = reinterpret_cast<TritonModelInstance*>(instance);
-  *kind = ti->Kind();
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_ModelInstanceDeviceId(
-    TRITONBACKEND_ModelInstance* instance, int32_t* device_id)
-{
-  TritonModelInstance* ti = reinterpret_cast<TritonModelInstance*>(instance);
-  *device_id = ti->DeviceId();
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_ModelInstanceHostPolicy(
-    TRITONBACKEND_ModelInstance* instance, TRITONSERVER_Message** host_policy)
-{
-  TritonModelInstance* ti = reinterpret_cast<TritonModelInstance*>(instance);
-  *host_policy = const_cast<TRITONSERVER_Message*>(
-      reinterpret_cast<const TRITONSERVER_Message*>(&ti->HostPolicyMessage()));
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_ModelInstanceProfileCount(
-    TRITONBACKEND_ModelInstance* instance, uint32_t* count)
-{
-  TritonModelInstance* ti = reinterpret_cast<TritonModelInstance*>(instance);
-  *count = ti->Profiles().size();
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_ModelInstanceProfileName(
-    TRITONBACKEND_ModelInstance* instance, const uint32_t index,
-    const char** profile_name)
-{
-  *profile_name = nullptr;
-
-  TritonModelInstance* ti = reinterpret_cast<TritonModelInstance*>(instance);
-  const auto& rprofiles = ti->Profiles();
-  if (index >= rprofiles.size()) {
-    return TRITONSERVER_ErrorNew(
-        TRITONSERVER_ERROR_INVALID_ARG,
-        (std::string("out of bounds index ") + std::to_string(index) +
-         ": instance is configured with " + std::to_string(rprofiles.size()) +
-         " profiles")
-            .c_str());
-  }
-
-  *profile_name = rprofiles[index].c_str();
-
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_ModelInstanceSecondaryDeviceCount(
-    TRITONBACKEND_ModelInstance* instance, uint32_t* count)
-{
-  TritonModelInstance* ti = reinterpret_cast<TritonModelInstance*>(instance);
-  *count = ti->SecondaryDevices().size();
-
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_ModelInstanceSecondaryDeviceProperties(
-    TRITONBACKEND_ModelInstance* instance, uint32_t index, const char** kind,
-    int64_t* id)
-{
-  TritonModelInstance* ti = reinterpret_cast<TritonModelInstance*>(instance);
-  const auto& rsecondarydevices = ti->SecondaryDevices();
-
-  if (index >= rsecondarydevices.size()) {
-    return TRITONSERVER_ErrorNew(
-        TRITONSERVER_ERROR_INVALID_ARG,
-        (std::string("out of bounds index ") + std::to_string(index) +
-         ": instance is configured with " +
-         std::to_string(rsecondarydevices.size()) + " secondary devices")
-            .c_str());
-  }
-
-  *kind = rsecondarydevices[index].kind_.c_str();
-  *id = rsecondarydevices[index].id_;
-
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_ModelInstanceIsPassive(
-    TRITONBACKEND_ModelInstance* instance, bool* is_passive)
-{
-  TritonModelInstance* ti = reinterpret_cast<TritonModelInstance*>(instance);
-  *is_passive = ti->IsPassive();
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_ModelInstanceModel(
-    TRITONBACKEND_ModelInstance* instance, TRITONBACKEND_Model** model)
-{
-  TritonModelInstance* ti = reinterpret_cast<TritonModelInstance*>(instance);
-  *model = reinterpret_cast<TRITONBACKEND_Model*>(ti->Model());
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_ModelInstanceState(
-    TRITONBACKEND_ModelInstance* instance, void** state)
-{
-  TritonModelInstance* ti = reinterpret_cast<TritonModelInstance*>(instance);
-  *state = ti->State();
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_ModelInstanceSetState(
-    TRITONBACKEND_ModelInstance* instance, void* state)
-{
-  TritonModelInstance* ti = reinterpret_cast<TritonModelInstance*>(instance);
-  ti->SetState(state);
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_ModelInstanceReportStatistics(
-    TRITONBACKEND_ModelInstance* instance, TRITONBACKEND_Request* request,
-    const bool success, const uint64_t exec_start_ns,
-    const uint64_t compute_start_ns, const uint64_t compute_end_ns,
-    const uint64_t exec_end_ns)
-{
-#ifdef TRITON_ENABLE_STATS
-  TritonModelInstance* ti = reinterpret_cast<TritonModelInstance*>(instance);
-  InferenceRequest* tr = reinterpret_cast<InferenceRequest*>(request);
-  tr->ReportStatistics(
-      ti->MetricReporter(), success, exec_start_ns, compute_start_ns,
-      compute_end_ns, exec_end_ns);
-#endif  // TRITON_ENABLE_STATS
-
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONBACKEND_ModelInstanceReportBatchStatistics(
-    TRITONBACKEND_ModelInstance* instance, const uint64_t batch_size,
-    const uint64_t exec_start_ns, const uint64_t compute_start_ns,
-    const uint64_t compute_end_ns, const uint64_t exec_end_ns)
-{
-#ifdef TRITON_ENABLE_STATS
-  TritonModelInstance* ti = reinterpret_cast<TritonModelInstance*>(instance);
-  ti->Model()->MutableStatsAggregator()->UpdateInferBatchStats(
-      ti->MetricReporter(), batch_size, exec_start_ns, compute_start_ns,
-      compute_end_ns, exec_end_ns);
-#endif  // TRITON_ENABLE_STATS
-
-  return nullptr;  // success
-}
-
-}  // extern C
-}}  // namespace triton::core
diff --git a/3rdparty/core-r22.12/src/backend_model_instance.h b/3rdparty/core-r22.12/src/backend_model_instance.h
deleted file mode 100644
index aa8ae94045c2b287c47c14cab30ec7013c01ebcf..0000000000000000000000000000000000000000
--- a/3rdparty/core-r22.12/src/backend_model_instance.h
+++ /dev/null
@@ -1,200 +0,0 @@
-// Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include <functional>
-#include <future>
-#include <memory>
-#include <string>
-#include <thread>
-#include "constants.h"
-#include "memory.h"
-#include "metric_model_reporter.h"
-#include "model_config.pb.h"
-#include "server_message.h"
-#include "status.h"
-#include "triton/common/sync_queue.h"
-
-namespace triton { namespace core {
-
-class TritonModel;
-class InferenceRequest;
-
-//
-// Represents a model instance.
-//
-class TritonModelInstance {
- public:
-  static Status CreateInstances(
-      TritonModel* model,
-      const triton::common::BackendCmdlineConfigMap& backend_cmdline_config_map,
-      const triton::common::HostPolicyCmdlineConfigMap& host_policy_map,
-      const inference::ModelConfig& model_config, const bool device_blocking);
-  ~TritonModelInstance();
-
-  const std::string& Name() const { return name_; }
-  size_t Index() const { return index_; }
-  TRITONSERVER_InstanceGroupKind Kind() const { return kind_; }
-  int32_t DeviceId() const { return device_id_; }
-  const triton::common::HostPolicyCmdlineConfig& HostPolicy() const
-  {
-    return host_policy_;
-  }
-  const TritonServerMessage& HostPolicyMessage() const
-  {
-    return host_policy_message_;
-  }
-  bool IsPassive() const { return passive_; }
-  const std::vector<std::string>& Profiles() const { return profile_names_; }
-
-  struct SecondaryDevice {
-    SecondaryDevice(const std::string kind, const int64_t id)
-        : kind_(kind), id_(id)
-    {
-    }
-    const std::string kind_;
-    const int64_t id_;
-  };
-  const std::vector<SecondaryDevice>& SecondaryDevices() const
-  {
-    return secondary_devices_;
-  }
-
-  Status Initialize();
-  Status WarmUp();
-  void Schedule(
-      std::vector<std::unique_ptr<InferenceRequest>>&& requests,
-      const std::function<void()>& OnCompletion);
-
-  TritonModel* Model() const { return model_; }
-  void* State() { return state_; }
-  void SetState(void* state) { state_ = state; }
-
-  MetricModelReporter* MetricReporter() const { return reporter_.get(); }
-
- private:
-  DISALLOW_COPY_AND_ASSIGN(TritonModelInstance);
-  class TritonBackendThread;
-  TritonModelInstance(
-      TritonModel* model, const std::string& name, const size_t index,
-      const TRITONSERVER_InstanceGroupKind kind, const int32_t device_id,
-      const std::vector<std::string>& profile_names, const bool passive,
-      const triton::common::HostPolicyCmdlineConfig& host_policy,
-      const TritonServerMessage& host_policy_message,
-      const std::vector<SecondaryDevice>& secondary_devices);
-  static Status CreateInstance(
-      TritonModel* model, const std::string& name, const size_t index,
-      const TRITONSERVER_InstanceGroupKind kind, const int32_t device_id,
-      const std::vector<std::string>& profile_names, const bool passive,
-      const std::string& host_policy_name,
-      const triton::common::HostPolicyCmdlineConfig& host_policy,
-      const inference::ModelRateLimiter& rate_limiter_config,
-      const bool device_blocking,
-      std::map<uint32_t, std::shared_ptr<TritonBackendThread>>*
-          device_to_thread_map,
-      const std::vector<SecondaryDevice>& secondary_devices);
-  Status SetBackendThread(
-      const TRITONSERVER_InstanceGroupKind kind, const int32_t device_id,
-      const bool device_blocking,
-      std::map<uint32_t, std::shared_ptr<TritonBackendThread>>*
-          device_to_thread_map);
-  Status GenerateWarmupData();
-
-  void Execute(std::vector<TRITONBACKEND_Request*>& triton_requests);
-
-  class TritonBackendThread {
-   public:
-    static Status CreateBackendThread(
-        const std::string name, TritonModelInstance* model, const int nice,
-        const int32_t device_id,
-        std::unique_ptr<TritonBackendThread>* triton_backend_thread);
-    void AddModelInstance(TritonModelInstance* model_instance);
-    Status InitAndWarmUpModelInstance(TritonModelInstance* model_instance);
-    void StopBackendThread();
-    ~TritonBackendThread();
-
-   private:
-    TritonBackendThread(const std::string& name, TritonModel* model);
-    void BackendThread(const int nice, const int32_t device_id);
-
-    std::string name_;
-
-    TritonModel* model_;
-    std::deque<TritonModelInstance*> model_instances_;
-
-    std::thread backend_thread_;
-    std::atomic<bool> backend_thread_exit_;
-  };
-  std::shared_ptr<TritonBackendThread> triton_backend_thread_;
-
-  struct WarmupData {
-    WarmupData(const std::string& sample_name, const size_t count)
-        : sample_name_(sample_name), count_(std::max(count, size_t{1}))
-    {
-    }
-
-    std::string sample_name_;
-    size_t count_;
-    // Using a batch of requests to satisfy batch size, this provides better
-    // alignment on the batch expected by the model, especially for sequence
-    // model.
-    std::vector<std::unique_ptr<InferenceRequest>> requests_;
-
-    // Placeholder for input data
-    std::unique_ptr<AllocatedMemory> zero_data_;
-    std::unique_ptr<AllocatedMemory> random_data_;
-    std::vector<std::unique_ptr<std::string>> provided_data_;
-  };
-  std::vector<WarmupData> warmup_samples_;
-
-  // The TritonModel object that owns this instance. The instance
-  // holds this as a raw pointer because the lifetime of the model is
-  // guaranteed to be longer than the lifetime of an instance owned by the
-  // model.
-  TritonModel* model_;
-
-  std::string name_;
-  size_t index_;
-
-  // For CPU device_id_ is always 0. For GPU device_id_ indicates the
-  // GPU device to be used by the instance.
-  TRITONSERVER_InstanceGroupKind kind_;
-  int32_t device_id_;
-  const triton::common::HostPolicyCmdlineConfig host_policy_;
-  TritonServerMessage host_policy_message_;
-  std::vector<std::string> profile_names_;
-  bool passive_;
-
-  std::vector<SecondaryDevice> secondary_devices_;
-
-  // Reporter for metrics, or nullptr if no metrics should be reported
-  std::shared_ptr<MetricModelReporter> reporter_;
-
-  // Opaque state associated with this model instance.
-  void* state_;
-};
-
-}}  // namespace triton::core
diff --git a/3rdparty/core-r22.12/src/buffer_attributes.cc b/3rdparty/core-r22.12/src/buffer_attributes.cc
deleted file mode 100644
index d184662bd6cbd0fea672b548af30724f89382729..0000000000000000000000000000000000000000
--- a/3rdparty/core-r22.12/src/buffer_attributes.cc
+++ /dev/null
@@ -1,104 +0,0 @@
-// Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "buffer_attributes.h"
-
-#include <cstring>
-#include "constants.h"
-
-namespace triton { namespace core {
-void
-BufferAttributes::SetByteSize(const size_t& byte_size)
-{
-  byte_size_ = byte_size;
-}
-
-void
-BufferAttributes::SetMemoryType(const TRITONSERVER_MemoryType& memory_type)
-{
-  memory_type_ = memory_type;
-}
-
-void
-BufferAttributes::SetMemoryTypeId(const int64_t& memory_type_id)
-{
-  memory_type_id_ = memory_type_id;
-}
-
-void
-BufferAttributes::SetCudaIpcHandle(void* cuda_ipc_handle)
-{
-  char* lcuda_ipc_handle = reinterpret_cast<char*>(cuda_ipc_handle);
-  cuda_ipc_handle_.clear();
-  std::copy(
-      lcuda_ipc_handle, lcuda_ipc_handle + CUDA_IPC_STRUCT_SIZE,
-      std::back_inserter(cuda_ipc_handle_));
-}
-
-void*
-BufferAttributes::CudaIpcHandle()
-{
-  if (cuda_ipc_handle_.empty()) {
-    return nullptr;
-  } else {
-    return reinterpret_cast<void*>(cuda_ipc_handle_.data());
-  }
-}
-
-size_t
-BufferAttributes::ByteSize() const
-{
-  return byte_size_;
-}
-
-TRITONSERVER_MemoryType
-BufferAttributes::MemoryType() const
-{
-  return memory_type_;
-}
-
-int64_t
-BufferAttributes::MemoryTypeId() const
-{
-  return memory_type_id_;
-}
-
-BufferAttributes::BufferAttributes(
-    size_t byte_size, TRITONSERVER_MemoryType memory_type,
-    int64_t memory_type_id, char* cuda_ipc_handle)
-    : byte_size_(byte_size), memory_type_(memory_type),
-      memory_type_id_(memory_type_id)
-{
-  // cuda ipc handle size
-  cuda_ipc_handle_.reserve(CUDA_IPC_STRUCT_SIZE);
-
-  if (cuda_ipc_handle != nullptr) {
-    std::copy(
-        cuda_ipc_handle, cuda_ipc_handle + CUDA_IPC_STRUCT_SIZE,
-        std::back_inserter(cuda_ipc_handle_));
-  }
-}
-}}  // namespace triton::core
diff --git a/3rdparty/core-r22.12/src/buffer_attributes.h b/3rdparty/core-r22.12/src/buffer_attributes.h
deleted file mode 100644
index aa89b3913403379946fe69cc8587638d355749c5..0000000000000000000000000000000000000000
--- a/3rdparty/core-r22.12/src/buffer_attributes.h
+++ /dev/null
@@ -1,79 +0,0 @@
-// Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include <iterator>
-#include <vector>
-#include "tritonserver_apis.h"
-
-#pragma once
-
-namespace triton { namespace core {
-//
-// A class to hold information about the buffer allocation.
-//
-class BufferAttributes {
- public:
-  BufferAttributes(
-      size_t byte_size, TRITONSERVER_MemoryType memory_type,
-      int64_t memory_type_id, char cuda_ipc_handle[64]);
-  BufferAttributes()
-  {
-    memory_type_ = TRITONSERVER_MEMORY_CPU;
-    memory_type_id_ = 0;
-    cuda_ipc_handle_.reserve(64);
-  }
-
-  // Set the buffer byte size
-  void SetByteSize(const size_t& byte_size);
-
-  // Set the buffer memory_type
-  void SetMemoryType(const TRITONSERVER_MemoryType& memory_type);
-
-  // Set the buffer memory type id
-  void SetMemoryTypeId(const int64_t& memory_type_id);
-
-  // Set the cuda ipc handle
-  void SetCudaIpcHandle(void* cuda_ipc_handle);
-
-  // Get the cuda ipc handle
-  void* CudaIpcHandle();
-
-  // Get the byte size
-  size_t ByteSize() const;
-
-  // Get the memory type
-  TRITONSERVER_MemoryType MemoryType() const;
-
-  // Get the memory type id
-  int64_t MemoryTypeId() const;
-
- private:
-  size_t byte_size_;
-  TRITONSERVER_MemoryType memory_type_;
-  int64_t memory_type_id_;
-  std::vector<char> cuda_ipc_handle_;
-};
-}}  // namespace triton::core
diff --git a/3rdparty/core-r22.12/src/constants.h b/3rdparty/core-r22.12/src/constants.h
deleted file mode 100644
index 40d0705586774357e04a9a77983862ab0c3a44f8..0000000000000000000000000000000000000000
--- a/3rdparty/core-r22.12/src/constants.h
+++ /dev/null
@@ -1,108 +0,0 @@
-// Copyright 2018-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include <stdint.h>
-
-namespace triton { namespace core {
-
-constexpr char kInferHeaderContentLengthHTTPHeader[] =
-    "Inference-Header-Content-Length";
-constexpr char kAcceptEncodingHTTPHeader[] = "Accept-Encoding";
-constexpr char kContentEncodingHTTPHeader[] = "Content-Encoding";
-constexpr char kContentTypeHeader[] = "Content-Type";
-constexpr char kContentLengthHeader[] = "Content-Length";
-
-constexpr char kTensorFlowGraphDefPlatform[] = "tensorflow_graphdef";
-constexpr char kTensorFlowSavedModelPlatform[] = "tensorflow_savedmodel";
-constexpr char kTensorFlowGraphDefFilename[] = "model.graphdef";
-constexpr char kTensorFlowSavedModelFilename[] = "model.savedmodel";
-constexpr char kTensorFlowBackend[] = "tensorflow";
-
-constexpr char kTensorRTPlanPlatform[] = "tensorrt_plan";
-constexpr char kTensorRTPlanFilename[] = "model.plan";
-constexpr char kTensorRTBackend[] = "tensorrt";
-
-constexpr char kOnnxRuntimeOnnxPlatform[] = "onnxruntime_onnx";
-constexpr char kOnnxRuntimeOnnxFilename[] = "model.onnx";
-constexpr char kOnnxRuntimeBackend[] = "onnxruntime";
-
-constexpr char kOpenVINORuntimeOpenVINOFilename[] = "model.xml";
-constexpr char kOpenVINORuntimeBackend[] = "openvino";
-
-constexpr char kPyTorchLibTorchPlatform[] = "pytorch_libtorch";
-constexpr char kPyTorchLibTorchFilename[] = "model.pt";
-constexpr char kPyTorchBackend[] = "pytorch";
-
-constexpr char kPythonFilename[] = "model.py";
-constexpr char kPythonBackend[] = "python";
-
-#ifdef TRITON_ENABLE_ENSEMBLE
-constexpr char kEnsemblePlatform[] = "ensemble";
-#endif  // TRITON_ENABLE_ENSEMBLE
-
-constexpr char kTensorRTExecutionAccelerator[] = "tensorrt";
-constexpr char kOpenVINOExecutionAccelerator[] = "openvino";
-constexpr char kGPUIOExecutionAccelerator[] = "gpu_io";
-constexpr char kAutoMixedPrecisionExecutionAccelerator[] =
-    "auto_mixed_precision";
-
-constexpr char kModelConfigPbTxt[] = "config.pbtxt";
-
-constexpr char kMetricsLabelModelName[] = "model";
-constexpr char kMetricsLabelModelVersion[] = "version";
-constexpr char kMetricsLabelGpuUuid[] = "gpu_uuid";
-
-constexpr char kWarmupDataFolder[] = "warmup";
-constexpr char kInitialStateFolder[] = "initial_state";
-
-constexpr uint64_t NANOS_PER_SECOND = 1000000000;
-constexpr uint64_t NANOS_PER_MILLIS = 1000000;
-constexpr int MAX_GRPC_MESSAGE_SIZE = INT32_MAX;
-constexpr uint64_t SEQUENCE_IDLE_DEFAULT_MICROSECONDS = 1000 * 1000;
-constexpr size_t STRING_CORRELATION_ID_MAX_LENGTH_BYTES = 128;
-constexpr size_t CUDA_IPC_STRUCT_SIZE = 64;
-
-#ifdef TRITON_ENABLE_METRICS
-// MetricModelReporter expects a device ID for GPUs, but we reuse this device
-// ID for other metrics as well such as for CPU and Response Cache metrics
-constexpr int METRIC_REPORTER_ID_CPU = -1;
-constexpr int METRIC_REPORTER_ID_RESPONSE_CACHE = -2;
-#endif
-
-#define TIMESPEC_TO_NANOS(TS) \
-  ((TS).tv_sec * triton::core::NANOS_PER_SECOND + (TS).tv_nsec)
-#define TIMESPEC_TO_MILLIS(TS) \
-  (TIMESPEC_TO_NANOS(TS) / triton::core::NANOS_PER_MILLIS)
-
-#define DISALLOW_MOVE(TypeName) TypeName(Context&& o) = delete;
-#define DISALLOW_COPY(TypeName) TypeName(const TypeName&) = delete;
-#define DISALLOW_ASSIGN(TypeName) void operator=(const TypeName&) = delete;
-#define DISALLOW_COPY_AND_ASSIGN(TypeName) \
-  DISALLOW_COPY(TypeName)                  \
-  DISALLOW_ASSIGN(TypeName)
-
-}}  // namespace triton::core
diff --git a/3rdparty/core-r22.12/src/cuda_memory_manager.cc b/3rdparty/core-r22.12/src/cuda_memory_manager.cc
deleted file mode 100644
index eec9206c1f8f8f814b7a5aa16860d2cf6addb3ec..0000000000000000000000000000000000000000
--- a/3rdparty/core-r22.12/src/cuda_memory_manager.cc
+++ /dev/null
@@ -1,197 +0,0 @@
-// Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-#include "cuda_memory_manager.h"
-
-#include <cnmem.h>
-#include <string.h>
-#include <set>
-#include "cuda_utils.h"
-#include "triton/common/logging.h"
-
-namespace {
-
-#define RETURN_IF_CNMEM_ERROR(S, MSG)                    \
-  do {                                                   \
-    auto status__ = (S);                                 \
-    if (status__ != CNMEM_STATUS_SUCCESS) {              \
-      return Status(                                     \
-          Status::Code::INTERNAL,                        \
-          (MSG) + ": " + cnmemGetErrorString(status__)); \
-    }                                                    \
-  } while (false)
-
-std::string
-PointerToString(void* ptr)
-{
-  std::stringstream ss;
-  ss << ptr;
-  return ss.str();
-}
-
-}  // namespace
-
-namespace triton { namespace core {
-
-std::unique_ptr<CudaMemoryManager> CudaMemoryManager::instance_;
-std::mutex CudaMemoryManager::instance_mu_;
-
-CudaMemoryManager::~CudaMemoryManager()
-{
-  if (has_allocation_) {
-    auto status = cnmemFinalize();
-    if (status != CNMEM_STATUS_SUCCESS) {
-      LOG_ERROR << "Failed to finalize CUDA memory manager: [" << status << "] "
-                << cnmemGetErrorString(status);
-    }
-  }
-}
-
-void
-CudaMemoryManager::Reset()
-{
-  std::lock_guard<std::mutex> lock(instance_mu_);
-  instance_.reset();
-}
-
-Status
-CudaMemoryManager::Create(const CudaMemoryManager::Options& options)
-{
-  // Ensure thread-safe creation of CUDA memory pool
-  std::lock_guard<std::mutex> lock(instance_mu_);
-  if (instance_ != nullptr) {
-    LOG_WARNING << "New CUDA memory pools could not be created since they "
-                   "already exists";
-    return Status::Success;
-  }
-
-  std::set<int> supported_gpus;
-  auto status = GetSupportedGPUs(
-      &supported_gpus, options.min_supported_compute_capability_);
-  if (status.IsOk()) {
-    std::vector<cnmemDevice_t> devices;
-    for (auto gpu : supported_gpus) {
-      const auto it = options.memory_pool_byte_size_.find(gpu);
-      if ((it != options.memory_pool_byte_size_.end()) && (it->second != 0)) {
-        devices.emplace_back();
-        auto& device = devices.back();
-        memset(&device, 0, sizeof(device));
-        device.device = gpu;
-        device.size = it->second;
-
-        LOG_INFO << "CUDA memory pool is created on device " << device.device
-                 << " with size " << device.size;
-      }
-    }
-
-    if (!devices.empty()) {
-      RETURN_IF_CNMEM_ERROR(
-          cnmemInit(devices.size(), devices.data(), CNMEM_FLAGS_CANNOT_GROW),
-          std::string("Failed to finalize CUDA memory manager"));
-    } else {
-      LOG_INFO << "CUDA memory pool disabled";
-    }
-
-    // Use to finalize CNMeM properly when out of scope
-    instance_.reset(new CudaMemoryManager(!devices.empty()));
-  } else {
-    return Status(
-        status.ErrorCode(),
-        "Failed to initialize CUDA memory manager: " + status.Message());
-  }
-
-  return Status::Success;
-}
-
-Status
-CudaMemoryManager::Alloc(void** ptr, uint64_t size, int64_t device_id)
-{
-  if (instance_ == nullptr) {
-    return Status(
-        Status::Code::UNAVAILABLE, "CudaMemoryManager has not been created");
-  } else if (!instance_->has_allocation_) {
-    return Status(
-        Status::Code::UNAVAILABLE,
-        "CudaMemoryManager has no preallocated CUDA memory");
-  }
-
-  int current_device;
-  RETURN_IF_CUDA_ERR(
-      cudaGetDevice(&current_device), std::string("Failed to get device"));
-  bool overridden = (current_device != device_id);
-  if (overridden) {
-    RETURN_IF_CUDA_ERR(
-        cudaSetDevice(device_id), std::string("Failed to set device"));
-  }
-
-  // Defer returning error to make sure the device is recovered
-  auto err = cnmemMalloc(ptr, size, nullptr);
-
-  if (overridden) {
-    cudaSetDevice(current_device);
-  }
-
-  RETURN_IF_CNMEM_ERROR(
-      err, std::string("Failed to allocate CUDA memory with byte size ") +
-               std::to_string(size) + " on GPU " + std::to_string(device_id));
-  return Status::Success;
-}
-
-Status
-CudaMemoryManager::Free(void* ptr, int64_t device_id)
-{
-  if (instance_ == nullptr) {
-    return Status(
-        Status::Code::UNAVAILABLE, "CudaMemoryManager has not been created");
-  } else if (!instance_->has_allocation_) {
-    return Status(
-        Status::Code::UNAVAILABLE,
-        "CudaMemoryManager has no preallocated CUDA memory");
-  }
-
-  int current_device;
-  RETURN_IF_CUDA_ERR(
-      cudaGetDevice(&current_device), std::string("Failed to get device"));
-  bool overridden = (current_device != device_id);
-  if (overridden) {
-    RETURN_IF_CUDA_ERR(
-        cudaSetDevice(device_id), std::string("Failed to set device"));
-  }
-
-  // Defer returning error to make sure the device is recovered
-  auto err = cnmemFree(ptr, nullptr);
-
-  if (overridden) {
-    cudaSetDevice(current_device);
-  }
-
-  RETURN_IF_CNMEM_ERROR(
-      err, std::string("Failed to deallocate CUDA memory at address ") +
-               PointerToString(ptr) + " on GPU " + std::to_string(device_id));
-  return Status::Success;
-}
-
-}}  // namespace triton::core
diff --git a/3rdparty/core-r22.12/src/cuda_memory_manager.h b/3rdparty/core-r22.12/src/cuda_memory_manager.h
deleted file mode 100644
index cc06d8ca1d3b57ea5cff8c735b27da8e54c248fb..0000000000000000000000000000000000000000
--- a/3rdparty/core-r22.12/src/cuda_memory_manager.h
+++ /dev/null
@@ -1,85 +0,0 @@
-// Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-#pragma once
-
-#include <map>
-#include <memory>
-#include <mutex>
-#include "status.h"
-
-namespace triton { namespace core {
-
-// This is a singleton class responsible for maintaining CUDA memory pool
-// used by the inference server. CUDA memory allocations and deallocations
-// must be requested via functions provided by this class.
-class CudaMemoryManager {
- public:
-  // Options to configure CUDA memory manager.
-  struct Options {
-    Options(double cc = 6.0, const std::map<int, uint64_t>& s = {})
-        : min_supported_compute_capability_(cc), memory_pool_byte_size_(s)
-    {
-    }
-
-    // The minimum compute capability of the supported devices.
-    double min_supported_compute_capability_;
-
-    // The size of CUDA memory reserved for the specified devices.
-    // The memory size will be rounded up to align with
-    // the default granularity (512 bytes).
-    // No memory will be reserved for devices that is not listed.
-    std::map<int, uint64_t> memory_pool_byte_size_;
-  };
-
-  ~CudaMemoryManager();
-
-  // Create the memory manager based on 'options' specified.
-  // Return Status object indicating success or failure.
-  static Status Create(const Options& options);
-
-  // Allocate CUDA memory on GPU 'device_id' with
-  // the requested 'size' and return the pointer in 'ptr'.
-  // Return Status object indicating success or failure.
-  static Status Alloc(void** ptr, uint64_t size, int64_t device_id);
-
-  // Free the memory allocated by the memory manager on 'device_id'.
-  // Return Status object indicating success or failure.
-  static Status Free(void* ptr, int64_t device_id);
-
- protected:
-  // Provide explicit control on the lifecycle of the CUDA memory manager,
-  // for testing only.
-  static void Reset();
-
- private:
-  CudaMemoryManager(bool has_allocation) : has_allocation_(has_allocation) {}
-  bool has_allocation_;
-  static std::unique_ptr<CudaMemoryManager> instance_;
-  static std::mutex instance_mu_;
-};
-
-}}  // namespace triton::core
diff --git a/3rdparty/core-r22.12/src/cuda_utils.cc b/3rdparty/core-r22.12/src/cuda_utils.cc
deleted file mode 100644
index e758c3a8a71bd4ebf0d6d69b5176995a1f81e69c..0000000000000000000000000000000000000000
--- a/3rdparty/core-r22.12/src/cuda_utils.cc
+++ /dev/null
@@ -1,263 +0,0 @@
-// Copyright 2019-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "cuda_utils.h"
-
-#include "model_config_utils.h"
-#include "triton/common/nvtx.h"
-
-namespace triton { namespace core {
-
-#ifdef TRITON_ENABLE_GPU
-void CUDART_CB
-MemcpyHost(void* args)
-{
-  auto* copy_params = reinterpret_cast<CopyParams*>(args);
-  memcpy(copy_params->dst_, copy_params->src_, copy_params->byte_size_);
-  delete copy_params;
-}
-#endif  // TRITON_ENABLE_GPU
-
-Status
-GetDeviceMemoryInfo(const int device_id, size_t* free, size_t* total)
-{
-  *free = 0;
-  *total = 0;
-#ifdef TRITON_ENABLE_GPU
-  // Make sure that correct device is set before creating stream and
-  // then restore the device to what was set by the caller.
-  int current_device;
-  auto cuerr = cudaGetDevice(&current_device);
-  bool overridden = false;
-  if (cuerr == cudaSuccess) {
-    overridden = (current_device != device_id);
-    if (overridden) {
-      cuerr = cudaSetDevice(device_id);
-    }
-  }
-
-  if (cuerr == cudaSuccess) {
-    cuerr = cudaMemGetInfo(free, total);
-  }
-
-  if (overridden) {
-    cudaSetDevice(current_device);
-  }
-
-  if (cuerr != cudaSuccess) {
-    return Status(
-        Status::Code::INTERNAL,
-        (std::string("unable to get memory info for device ") +
-         std::to_string(device_id) + ": " + cudaGetErrorString(cuerr)));
-  }
-#endif  // TRITON_ENABLE_GPU
-  return Status::Success;
-}
-
-Status
-EnablePeerAccess(const double min_compute_capability)
-{
-#ifdef TRITON_ENABLE_GPU
-  // If we can't enable peer access for one device pair, the best we can
-  // do is skipping it...
-  std::set<int> supported_gpus;
-  bool all_enabled = false;
-  if (GetSupportedGPUs(&supported_gpus, min_compute_capability).IsOk()) {
-    all_enabled = true;
-    int can_access_peer = false;
-    for (const auto& host : supported_gpus) {
-      auto cuerr = cudaSetDevice(host);
-
-      if (cuerr == cudaSuccess) {
-        for (const auto& peer : supported_gpus) {
-          if (host == peer) {
-            continue;
-          }
-
-          cuerr = cudaDeviceCanAccessPeer(&can_access_peer, host, peer);
-          if ((cuerr == cudaSuccess) && (can_access_peer == 1)) {
-            cuerr = cudaDeviceEnablePeerAccess(peer, 0);
-          }
-
-          all_enabled &= ((cuerr == cudaSuccess) && (can_access_peer == 1));
-        }
-      }
-    }
-  }
-  if (!all_enabled) {
-    return Status(
-        Status::Code::UNSUPPORTED,
-        "failed to enable peer access for some device pairs");
-  }
-#endif  // TRITON_ENABLE_GPU
-  return Status::Success;
-}
-
-Status
-CopyBuffer(
-    const std::string& msg, const TRITONSERVER_MemoryType src_memory_type,
-    const int64_t src_memory_type_id,
-    const TRITONSERVER_MemoryType dst_memory_type,
-    const int64_t dst_memory_type_id, const size_t byte_size, const void* src,
-    void* dst, cudaStream_t cuda_stream, bool* cuda_used, bool copy_on_stream)
-{
-  NVTX_RANGE(nvtx_, "CopyBuffer");
-
-  *cuda_used = false;
-
-  // For CUDA memcpy, all host to host copy will be blocked in respect to the
-  // host, so use memcpy() directly. In this case, need to be careful on whether
-  // the src buffer is valid.
-  if ((src_memory_type != TRITONSERVER_MEMORY_GPU) &&
-      (dst_memory_type != TRITONSERVER_MEMORY_GPU)) {
-#ifdef TRITON_ENABLE_GPU
-    if (copy_on_stream) {
-      auto params = new CopyParams(dst, src, byte_size);
-      cudaLaunchHostFunc(
-          cuda_stream, MemcpyHost, reinterpret_cast<void*>(params));
-      *cuda_used = true;
-    } else {
-      memcpy(dst, src, byte_size);
-    }
-#else
-    memcpy(dst, src, byte_size);
-#endif  // TRITON_ENABLE_GPU
-  } else {
-#ifdef TRITON_ENABLE_GPU
-    RETURN_IF_CUDA_ERR(
-        cudaMemcpyAsync(dst, src, byte_size, cudaMemcpyDefault, cuda_stream),
-        msg + ": failed to perform CUDA copy");
-
-    *cuda_used = true;
-#else
-    return Status(
-        Status::Code::INTERNAL,
-        msg + ": try to use CUDA copy while GPU is not supported");
-#endif  // TRITON_ENABLE_GPU
-  }
-
-  return Status::Success;
-}
-
-void
-CopyBufferHandler(
-    const std::string& msg, const TRITONSERVER_MemoryType src_memory_type,
-    const int64_t src_memory_type_id,
-    const TRITONSERVER_MemoryType dst_memory_type,
-    const int64_t dst_memory_type_id, const size_t byte_size, const void* src,
-    void* dst, cudaStream_t cuda_stream, void* response_ptr,
-    triton::common::SyncQueue<std::tuple<Status, bool, void*>>*
-        completion_queue)
-{
-  bool cuda_used = false;
-  Status status = CopyBuffer(
-      msg, src_memory_type, src_memory_type_id, dst_memory_type,
-      dst_memory_type_id, byte_size, src, dst, cuda_stream, &cuda_used);
-  completion_queue->Put(std::make_tuple(status, cuda_used, response_ptr));
-}
-
-#ifdef TRITON_ENABLE_GPU
-Status
-CheckGPUCompatibility(const int gpu_id, const double min_compute_capability)
-{
-  // Query the compute capability from the device
-  cudaDeviceProp cuprops;
-  cudaError_t cuerr = cudaGetDeviceProperties(&cuprops, gpu_id);
-  if (cuerr != cudaSuccess) {
-    return Status(
-        Status::Code::INTERNAL,
-        "unable to get CUDA device properties for GPU ID" +
-            std::to_string(gpu_id) + ": " + cudaGetErrorString(cuerr));
-  }
-
-  double compute_compability = cuprops.major + (cuprops.minor / 10.0);
-  if ((compute_compability > min_compute_capability) ||
-      (abs(compute_compability - min_compute_capability) < 0.01)) {
-    return Status::Success;
-  } else {
-    return Status(
-        Status::Code::UNSUPPORTED,
-        "gpu " + std::to_string(gpu_id) + " has compute capability '" +
-            std::to_string(cuprops.major) + "." +
-            std::to_string(cuprops.minor) +
-            "' which is less than the minimum supported of '" +
-            std::to_string(min_compute_capability) + "'");
-  }
-}
-
-Status
-GetSupportedGPUs(
-    std::set<int>* supported_gpus, const double min_compute_capability)
-{
-  // Make sure set is empty before starting
-  supported_gpus->clear();
-
-  int device_cnt;
-  cudaError_t cuerr = cudaGetDeviceCount(&device_cnt);
-  if ((cuerr == cudaErrorNoDevice) || (cuerr == cudaErrorInsufficientDriver)) {
-    device_cnt = 0;
-  } else if (cuerr != cudaSuccess) {
-    return Status(
-        Status::Code::INTERNAL, "unable to get number of CUDA devices: " +
-                                    std::string(cudaGetErrorString(cuerr)));
-  }
-
-  // populates supported_gpus
-  for (int gpu_id = 0; gpu_id < device_cnt; gpu_id++) {
-    Status status = CheckGPUCompatibility(gpu_id, min_compute_capability);
-    if (status.IsOk()) {
-      supported_gpus->insert(gpu_id);
-    }
-  }
-  return Status::Success;
-}
-
-Status
-SupportsIntegratedZeroCopy(const int gpu_id, bool* zero_copy_support)
-{
-  // Query the device to check if integrated
-  cudaDeviceProp cuprops;
-  cudaError_t cuerr = cudaGetDeviceProperties(&cuprops, gpu_id);
-  if (cuerr != cudaSuccess) {
-    return Status(
-        Status::Code::INTERNAL,
-        "unable to get CUDA device properties for GPU ID" +
-            std::to_string(gpu_id) + ": " + cudaGetErrorString(cuerr));
-  }
-
-  // Zero-copy supported only on integrated GPU when it can map host memory
-  if (cuprops.integrated && cuprops.canMapHostMemory) {
-    *zero_copy_support = true;
-  } else {
-    *zero_copy_support = false;
-  }
-
-  return Status::Success;
-}
-
-#endif
-
-}}  // namespace triton::core
diff --git a/3rdparty/core-r22.12/src/cuda_utils.h b/3rdparty/core-r22.12/src/cuda_utils.h
deleted file mode 100644
index abe900be3d5720e9acb328501e03bd45fded5187..0000000000000000000000000000000000000000
--- a/3rdparty/core-r22.12/src/cuda_utils.h
+++ /dev/null
@@ -1,144 +0,0 @@
-// Copyright 2019-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include <set>
-#include "status.h"
-#include "triton/common/sync_queue.h"
-
-#ifdef TRITON_ENABLE_GPU
-#include <cuda_runtime_api.h>
-#endif  // TRITON_ENABLE_GPU
-
-namespace triton { namespace core {
-
-#ifdef TRITON_ENABLE_GPU
-#define RETURN_IF_CUDA_ERR(X, MSG)                                           \
-  do {                                                                       \
-    cudaError_t err__ = (X);                                                 \
-    if (err__ != cudaSuccess) {                                              \
-      return Status(                                                         \
-          Status::Code::INTERNAL, (MSG) + ": " + cudaGetErrorString(err__)); \
-    }                                                                        \
-  } while (false)
-#endif  // TRITON_ENABLE_GPU
-
-#ifndef TRITON_ENABLE_GPU
-using cudaStream_t = void*;
-#endif  // !TRITON_ENABLE_GPU
-
-/// Get the memory info for the specified device.
-/// \param device_id The device ID.
-/// \param free Return free memory in bytes.
-/// \param total Return total memory in bytes.
-/// \return The error status. A non-OK status means failure to get memory info.
-Status GetDeviceMemoryInfo(const int device_id, size_t* free, size_t* total);
-
-/// Enable peer access for all GPU device pairs
-/// \param min_compute_capability The minimum support CUDA compute
-/// capability.
-/// \return The error status. A non-OK status means not all pairs are enabled
-Status EnablePeerAccess(const double min_compute_capability);
-
-/// Copy buffer from 'src' to 'dst' for given 'byte_size'. The buffer location
-/// is identified by the memory type and id, and the corresponding copy will be
-/// initiated.
-/// \param msg The message to be prepended in error message.
-/// \param src_memory_type The memory type CPU/GPU of the source.
-/// \param src_memory_type_id The device id of the source.
-/// \param dst_memory_type The memory type CPU/GPU of the destination.
-/// \param dst_memory_type_id The device id of the destination.
-/// \param byte_size The size in bytes to me copied from source to destination.
-/// \param src The buffer start address of the source.
-/// \param dst The buffer start address of the destination.
-/// \param cuda_stream The stream to be associated with, and 0 can be
-/// passed for default stream.
-/// \param cuda_used returns whether a CUDA memory copy is initiated. If true,
-/// the caller should synchronize on the given 'cuda_stream' to ensure data copy
-/// is completed.
-/// \param copy_on_stream whether the memory copies should be performed in cuda
-/// host functions on the 'cuda_stream'.
-/// \return The error status. A non-ok status indicates failure to copy the
-/// buffer.
-Status CopyBuffer(
-    const std::string& msg, const TRITONSERVER_MemoryType src_memory_type,
-    const int64_t src_memory_type_id,
-    const TRITONSERVER_MemoryType dst_memory_type,
-    const int64_t dst_memory_type_id, const size_t byte_size, const void* src,
-    void* dst, cudaStream_t cuda_stream, bool* cuda_used,
-    bool copy_on_stream = false);
-
-#ifdef TRITON_ENABLE_GPU
-/// Validates the compute capability of the GPU indexed
-/// \param gpu_id The index of the target GPU.
-/// \param min_compute_capability The minimum support CUDA compute
-/// capability.
-/// \return The error status. A non-OK status means the target GPU is
-/// not supported.
-Status CheckGPUCompatibility(
-    const int gpu_id, const double min_compute_capability);
-
-/// Obtains a set of gpu ids that is supported by triton.
-/// \param supported_gpus Returns the set of integers which is
-///  populated by ids of supported GPUS
-/// \param min_compute_capability The minimum support CUDA compute
-/// capability.
-/// \return The error status. A non-ok status means there were
-/// errors encountered while querying GPU devices.
-Status GetSupportedGPUs(
-    std::set<int>* supported_gpus, const double min_compute_capability);
-
-/// Checks if the GPU specified is an integrated GPU and supports Zero-copy.
-/// \param gpu_id The index of the target GPU.
-/// \param zero_copy_support If true, Zero-copy is supported by this GPU.
-/// \return The error status. A non-OK status means the target GPU is
-/// not supported.
-Status SupportsIntegratedZeroCopy(const int gpu_id, bool* zero_copy_support);
-#endif
-
-// Helper around CopyBuffer that updates the completion queue with the returned
-// status and cuda_used flag.
-void CopyBufferHandler(
-    const std::string& msg, const TRITONSERVER_MemoryType src_memory_type,
-    const int64_t src_memory_type_id,
-    const TRITONSERVER_MemoryType dst_memory_type,
-    const int64_t dst_memory_type_id, const size_t byte_size, const void* src,
-    void* dst, cudaStream_t cuda_stream, void* response_ptr,
-    triton::common::SyncQueue<std::tuple<Status, bool, void*>>*
-        completion_queue);
-
-struct CopyParams {
-  CopyParams(void* dst, const void* src, const size_t byte_size)
-      : dst_(dst), src_(src), byte_size_(byte_size)
-  {
-  }
-
-  void* dst_;
-  const void* src_;
-  const size_t byte_size_;
-};
-
-}}  // namespace triton::core
diff --git a/3rdparty/core-r22.12/src/dynamic_batch_scheduler.cc b/3rdparty/core-r22.12/src/dynamic_batch_scheduler.cc
deleted file mode 100644
index c608aa3709a2ece50eb8d02e9a52faf60b6109b9..0000000000000000000000000000000000000000
--- a/3rdparty/core-r22.12/src/dynamic_batch_scheduler.cc
+++ /dev/null
@@ -1,698 +0,0 @@
-// Copyright 2018-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "dynamic_batch_scheduler.h"
-
-#ifndef _WIN32
-#include <sys/resource.h>
-#include <sys/syscall.h>
-#include <unistd.h>
-#endif
-#include "constants.h"
-#include "server.h"
-#include "triton/common/logging.h"
-#include "triton/common/model_config.h"
-#include "triton/common/nvtx.h"
-
-namespace triton { namespace core {
-
-bool
-IsStaleState(Payload::State payload_state)
-{
-  return (
-      (payload_state == Payload::State::EXECUTING) ||
-      (payload_state == Payload::State::RELEASED));
-}
-
-DynamicBatchScheduler::DynamicBatchScheduler(
-    TritonModel* model, TritonModelInstance* model_instance,
-    const bool dynamic_batching_enabled, const int32_t max_batch_size,
-    const std::unordered_map<std::string, bool>& enforce_equal_shape_tensors,
-    const bool preserve_ordering, const bool response_cache_enable,
-    const std::set<int32_t>& preferred_batch_sizes,
-    const uint64_t max_queue_delay_microseconds,
-    const inference::ModelQueuePolicy& default_queue_policy,
-    const uint32_t priority_levels, const ModelQueuePolicyMap& queue_policy_map)
-    : model_(model), model_instance_(model_instance),
-      model_name_(model->Name()),
-      dynamic_batching_enabled_(dynamic_batching_enabled),
-      queue_(default_queue_policy, priority_levels, queue_policy_map),
-      stop_(false), max_batch_size_((size_t)std::max(1, max_batch_size)),
-      preferred_batch_sizes_(preferred_batch_sizes),
-      pending_batch_delay_ns_(max_queue_delay_microseconds * 1000),
-      pending_batch_size_(0), queued_batch_size_(0),
-      next_preferred_batch_size_(0),
-      enforce_equal_shape_tensors_(enforce_equal_shape_tensors),
-      has_optional_input_(false), preserve_ordering_(preserve_ordering)
-{
-  rate_limiter_ = model_->Server()->GetRateLimiter();
-  // Both the server and model config should specify
-  // caching enabled for model to utilize response cache.
-  response_cache_enabled_ =
-      (model_->Server()->ResponseCacheEnabled() && response_cache_enable);
-#ifdef TRITON_ENABLE_METRICS
-  // Initialize metric reporter for cache statistics if cache enabled
-  if (response_cache_enabled_) {
-    MetricModelReporter::Create(
-        model_name_, model_->Version(), METRIC_REPORTER_ID_RESPONSE_CACHE,
-        model_->Config().metric_tags(), &reporter_);
-  }
-#endif  // TRITON_ENABLE_METRICS
-  max_preferred_batch_size_ = 0;
-  for (const auto size : preferred_batch_sizes_) {
-    max_preferred_batch_size_ =
-        std::max(max_preferred_batch_size_, (size_t)size);
-  }
-
-  for (const auto& input : model_->Config().input()) {
-    if (input.optional()) {
-      has_optional_input_ = true;
-      break;
-    }
-  }
-}
-
-Status
-DynamicBatchScheduler::Create(
-    TritonModel* model, TritonModelInstance* model_instance, const int nice,
-    const bool dynamic_batching_enabled, const int32_t max_batch_size,
-    const std::unordered_map<std::string, bool>& enforce_equal_shape_tensors,
-    const bool preserve_ordering, const bool response_cache_enable,
-    const std::set<int32_t>& preferred_batch_sizes,
-    const uint64_t max_queue_delay_microseconds,
-    std::unique_ptr<Scheduler>* scheduler)
-{
-  inference::ModelDynamicBatching batcher_config;
-  batcher_config.set_preserve_ordering(preserve_ordering);
-  for (const auto& bs : preferred_batch_sizes) {
-    batcher_config.add_preferred_batch_size(bs);
-  }
-  batcher_config.set_max_queue_delay_microseconds(max_queue_delay_microseconds);
-
-  return Create(
-      model, model_instance, nice, dynamic_batching_enabled, max_batch_size,
-      enforce_equal_shape_tensors, batcher_config, response_cache_enable,
-      scheduler);
-}
-
-Status
-DynamicBatchScheduler::Create(
-    TritonModel* model, TritonModelInstance* model_instance, const int nice,
-    const bool dynamic_batching_enabled, const int32_t max_batch_size,
-    const std::unordered_map<std::string, bool>& enforce_equal_shape_tensors,
-    const inference::ModelDynamicBatching& batcher_config,
-    const bool response_cache_enable, std::unique_ptr<Scheduler>* scheduler)
-{
-  std::set<int32_t> preferred_batch_sizes;
-  for (const auto size : batcher_config.preferred_batch_size()) {
-    preferred_batch_sizes.insert(size);
-  }
-
-  DynamicBatchScheduler* dyna_sched = new DynamicBatchScheduler(
-      model, model_instance, dynamic_batching_enabled, max_batch_size,
-      enforce_equal_shape_tensors, batcher_config.preserve_ordering(),
-      response_cache_enable, preferred_batch_sizes,
-      batcher_config.max_queue_delay_microseconds(),
-      batcher_config.default_queue_policy(), batcher_config.priority_levels(),
-      batcher_config.priority_queue_policy());
-  std::unique_ptr<DynamicBatchScheduler> sched(dyna_sched);
-
-  sched->scheduler_thread_exit_.store(false);
-  if (dynamic_batching_enabled) {
-    sched->NewPayload();
-    sched->scheduler_thread_ =
-        std::thread([dyna_sched, nice]() { dyna_sched->BatcherThread(nice); });
-  }
-
-  scheduler->reset(sched.release());
-
-  return Status::Success;
-}
-
-DynamicBatchScheduler::~DynamicBatchScheduler()
-{
-  // Signal the scheduler thread to exit and then wait for it..
-  scheduler_thread_exit_.store(true);
-  cv_.notify_one();
-  if (scheduler_thread_.joinable()) {
-    scheduler_thread_.join();
-  }
-}
-
-Status
-DynamicBatchScheduler::Enqueue(std::unique_ptr<InferenceRequest>& request)
-{
-  if (stop_) {
-    return Status(
-        Status::Code::UNAVAILABLE,
-        request->LogRequest() +
-            "Server is stopping, scheduler for model has stopped accepting new "
-            "inference requests");
-  }
-  // If queue start timestamp hasn't been set, queue timer starts at
-  // the beginning of the queueing and scheduling process. Otherwise,
-  // dynamic batcher is used as component of another batcher and should not
-  // overwrite the queue start timestamp.
-  if (request->QueueStartNs() == 0) {
-    request->CaptureQueueStartNs();
-    INFER_TRACE_ACTIVITY(
-        request->Trace(), TRITONSERVER_TRACE_QUEUE_START,
-        request->QueueStartNs());
-#ifdef TRITON_ENABLE_TRACING
-    request->TraceInputTensors(
-        TRITONSERVER_TRACE_TENSOR_QUEUE_INPUT, "DynamicBatchScheduler Enqueue");
-#endif  // TRITON_ENABLE_TRACING
-  }
-
-  // Record time at the beginning of the batcher queueing. In the case of
-  // oldest sequence batcher, this will overwrite the value that was previously
-  // set by sequence batcher, which is okay as by this point, the previous
-  // batcher won't be needing this value and it can be safely reused by
-  // the dynamic batcher.
-  request->CaptureBatcherStartNs();
-
-  std::unique_ptr<InferenceResponse> cached_response;
-
-  if (response_cache_enabled_) {
-    CacheLookUp(request, cached_response);
-  }
-
-  if (cached_response != nullptr) {
-    // If there was a cache hit then try sending the cached response
-    // and release the request.
-    if (preserve_ordering_) {
-      // In order to preserve the order, the response send must be
-      // delegated.
-      DelegateResponse(request);
-    }
-
-    // Send cached response and release request
-    InferenceResponse::Send(
-        std::move(cached_response), TRITONSERVER_RESPONSE_COMPLETE_FINAL);
-    InferenceRequest::Release(
-        std::move(request), TRITONSERVER_REQUEST_RELEASE_ALL);
-
-    return Status::Success;
-  }
-
-  if (!dynamic_batching_enabled_) {
-    if (preserve_ordering_ || response_cache_enabled_) {
-      DelegateResponse(request);
-    }
-    // If not using dynamic batching, directly enqueue the
-    // request to model for execution
-    auto payload = model_->Server()->GetRateLimiter()->GetPayload(
-        Payload::Operation::INFER_RUN, nullptr /* TritonModelInstance*/);
-    payload->AddRequest(std::move(request));
-    RETURN_IF_ERROR(
-        model_->Server()->GetRateLimiter()->EnqueuePayload(model_, payload));
-
-  } else {
-    bool wake_batcher = true;
-    {
-      std::lock_guard<std::mutex> lock(mu_);
-
-      queued_batch_size_ += std::max(1U, request->BatchSize());
-
-      // Assuming no error is returned, this call takes ownership of
-      // 'request' and so we can't use it after this point.
-      RETURN_IF_ERROR(queue_.Enqueue(request->Priority(), request));
-
-      // If there are any idle runners and the queued batch size is greater or
-      // equal to next preferred batch size, then wake batcher up to service
-      // this request. We do the actual wake outside of the lock to avoid
-      // having the woken thread immediately block on the lock
-      wake_batcher =
-          model_->Server()->GetRateLimiter()->PayloadSlotAvailable(model_);
-
-      // We may wake up runner less often if we don't enforce equal shape
-      // within a batch, otherwise must always wake up runner to check it
-      if (enforce_equal_shape_tensors_.empty()) {
-        std::lock_guard<std::mutex> exec_lock(*(curr_payload_->GetExecMutex()));
-        auto payload_state = curr_payload_->GetState();
-        wake_batcher &=
-            (payload_saturated_ || IsStaleState(payload_state) ||
-             (queued_batch_size_ >= next_preferred_batch_size_));
-      }
-    }
-
-    if (wake_batcher) {
-      cv_.notify_one();
-    }
-  }
-
-  return Status::Success;
-}
-
-void
-DynamicBatchScheduler::NewPayload()
-{
-  curr_payload_ = model_->Server()->GetRateLimiter()->GetPayload(
-      Payload::Operation::INFER_RUN, model_instance_);
-  payload_saturated_ = false;
-}
-
-void
-DynamicBatchScheduler::BatcherThread(const int nice)
-{
-#ifndef _WIN32
-  if (setpriority(PRIO_PROCESS, syscall(SYS_gettid), nice) == 0) {
-    LOG_VERBOSE(1) << "Starting dynamic-batcher thread for " << model_name_
-                   << " at nice " << nice << "...";
-  } else {
-    LOG_VERBOSE(1) << "Starting dynamic-batcher thread for " << model_name_
-                   << " at default nice (requested nice " << nice
-                   << " failed)...";
-  }
-#else
-  LOG_VERBOSE(1) << "Starting dynamic-batcher thread for " << model_name_
-                 << " at default nice...";
-#endif
-  // For debugging/testing, delay start of threads until the queue
-  // contains the specified number of entries.
-  size_t delay_cnt = 0;
-  {
-    const char* dstr = getenv("TRITONSERVER_DELAY_SCHEDULER");
-    if (dstr != nullptr) {
-      delay_cnt = atoi(dstr);
-      LOG_VERBOSE(1) << "Delaying batcher thread for " << model_name_
-                     << " until " << delay_cnt << " queued requests...";
-    }
-  }
-
-  auto wait_for_slots = [this]() {
-    return model_->Server()->GetRateLimiter()->PayloadSlotAvailable(model_);
-  };
-  const uint64_t default_wait_microseconds = 500 * 1000;
-
-  while (!scheduler_thread_exit_.load()) {
-    NVTX_RANGE(nvtx_, "DynamicBatcher " + model_name_);
-
-    std::shared_ptr<std::vector<std::deque<std::unique_ptr<InferenceRequest>>>>
-        rejected_requests;
-    uint64_t wait_microseconds = 0;
-
-    // Hold the lock for as short a time as possible.
-    {
-      std::unique_lock<std::mutex> lock(mu_);
-      {
-        std::lock_guard<std::mutex> exec_lock(*(curr_payload_->GetExecMutex()));
-        auto payload_state = curr_payload_->GetState();
-        if (payload_saturated_ || IsStaleState(payload_state)) {
-          NewPayload();
-          next_preferred_batch_size_ = 0;
-        }
-      }
-
-      if (delay_cnt > 0) {
-        // Debugging/testing... wait until queue contains 'delay_cnt'
-        // items...
-        wait_microseconds = 10 * 1000;
-        if (queue_.Size() >= delay_cnt) {
-          delay_cnt = 0;
-        }
-        LOG_VERBOSE(1) << "Delaying batcher thread " << model_name_ << " until "
-                       << delay_cnt
-                       << " queued requests, current total = " << queue_.Size();
-      } else if (queue_.Empty()) {
-        wait_microseconds = default_wait_microseconds;
-      } else {
-        if (payload_saturated_) {
-          continue;
-        }
-        cv_.wait(lock, wait_for_slots);
-        {
-          std::lock_guard<std::mutex> exec_lock(
-              *(curr_payload_->GetExecMutex()));
-
-          auto payload_state = curr_payload_->GetState();
-          if (IsStaleState(payload_state)) {
-            continue;
-          }
-
-          // Use dynamic batching to get request(s) to execute.
-          wait_microseconds = GetDynamicBatch();
-
-          // Get requests that are rejected from searching dynamic batch.
-          queue_.ReleaseRejectedRequests(&rejected_requests);
-
-          // Extract batch only if there is pending batch
-          auto pending_batch_queue_cnt = queue_.PendingBatchCount();
-          if ((wait_microseconds == 0) && (pending_batch_queue_cnt != 0)) {
-            curr_payload_->ReserveRequests(pending_batch_queue_cnt);
-            for (size_t idx = 0; idx < pending_batch_queue_cnt; ++idx) {
-              std::unique_ptr<InferenceRequest> request;
-              auto status = queue_.Dequeue(&request);
-              if (status.IsOk()) {
-                if (preserve_ordering_ || response_cache_enabled_) {
-                  DelegateResponse(request);
-                }
-                curr_payload_->AddRequest(std::move(request));
-              } else {
-                // The queue is empty which conflicts with pending batch
-                // count. Send the current batch if any and reset related
-                // variables.
-                LOG_ERROR << request->LogRequest()
-                          << "Failed to retrieve request from scheduler queue: "
-                          << status.Message();
-                queue_.ResetCursor();
-                queued_batch_size_ = 0;
-                pending_batch_size_ = 0;
-                break;
-              }
-            }
-
-            if (curr_payload_->GetState() == Payload::State::UNINITIALIZED) {
-              curr_payload_->SetState(Payload::State::READY);
-            }
-
-            queued_batch_size_ -= pending_batch_size_;
-            pending_batch_size_ = 0;
-          }
-        }
-      }
-
-      // If no requests are to be handled, wait for notification or
-      // for the specified timeout before checking the queue again.
-      if (wait_microseconds > 0) {
-        std::chrono::microseconds wait_timeout(wait_microseconds);
-        cv_.wait_for(lock, wait_timeout);
-      }
-    }
-
-    if (curr_payload_->GetState() == Payload::State::READY) {
-      auto callback = [this]() { cv_.notify_one(); };
-      curr_payload_->SetCallback(callback);
-      model_->Server()->GetRateLimiter()->EnqueuePayload(model_, curr_payload_);
-    }
-
-    // Finish rejected requests if any
-    if (rejected_requests != nullptr) {
-      static Status rejected_status =
-          Status(Status::Code::UNAVAILABLE, "Request timeout expired");
-      for (auto& rejected_queue : *rejected_requests) {
-        for (auto& rejected_request : rejected_queue) {
-          InferenceRequest::RespondIfError(
-              rejected_request, rejected_status, true);
-        }
-      }
-    }
-  }  // end runner loop
-
-  LOG_VERBOSE(1) << "Stopping dynamic-batcher thread for " << model_name_
-                 << "...";
-}
-
-uint64_t
-DynamicBatchScheduler::GetDynamicBatch()
-{
-  // 'mu_' mutex must be held when this function is called. queue_
-  // must not be empty.
-
-  // Examine the new requests. If adding these new requests to the
-  // pending batch allows a preferred batch size then execute it
-  // immediately. Stop examining requests if the maximum preferred
-  // batch size would be exceeded or if the shape of the next request
-  // does not match the shape of the pending batch.
-  bool send_now = false;
-  if (!queue_.IsCursorValid()) {
-    queue_.ResetCursor();
-    pending_batch_size_ = 0;
-  }
-  size_t best_preferred_batch_size = 0;
-  queued_batch_size_ -= queue_.ApplyPolicyAtCursor();
-
-  // When there is optional input or input shape must be enforced,
-  // the inputs in the requests must be examined for forming a batch
-  const bool check_input =
-      !enforce_equal_shape_tensors_.empty() || has_optional_input_;
-  auto payload_batch_size = curr_payload_->BatchSize();
-  while (!queue_.CursorEnd()) {
-    const auto batch_size = std::max(1U, queue_.RequestAtCursor()->BatchSize());
-
-    // If there is no pending batch, then this request is starting a
-    // new batch.
-    if ((payload_batch_size + queue_.PendingBatchCount()) == 0) {
-      // Get the shape of the new batch that is being started...
-      if (check_input) {
-        if (!curr_payload_->MutableRequiredEqualInputs()
-                 ->Initialize(
-                     queue_.RequestAtCursor(), enforce_equal_shape_tensors_,
-                     has_optional_input_)
-                 .IsOk()) {
-          send_now = true;
-          break;
-        }
-      }
-    } else {
-      // There is a pending batch and adding this request would make
-      // the batch size larger than all of the preferred batch sizes,
-      // so mark the cursor at this point. Not sending the pending batch so
-      // that we can examine the queue delay of requests that fits in a batch.
-      if (((payload_batch_size + pending_batch_size_ + batch_size) >
-           max_preferred_batch_size_) &&
-          (best_preferred_batch_size == 0)) {
-        best_preferred_batch_size = pending_batch_size_;
-        queue_.MarkCursor();
-        payload_saturated_ = true;
-      }
-      if ((payload_batch_size + pending_batch_size_ + batch_size) >
-          max_batch_size_) {
-        send_now = true;
-        break;
-      }
-
-      // There is a pending batch and it has a different shape then
-      // this request, so send the pending batch as it is.
-      if (check_input &&
-          !curr_payload_->MutableRequiredEqualInputs()->HasEqualInputs(
-              queue_.RequestAtCursor())) {
-        curr_payload_->MarkSaturated();
-        send_now = true;
-        break;
-      }
-    }
-
-    pending_batch_size_ += batch_size;
-    queue_.AdvanceCursor();
-    queued_batch_size_ -= queue_.ApplyPolicyAtCursor();
-
-    if (preferred_batch_sizes_.find(pending_batch_size_ + payload_batch_size) !=
-        preferred_batch_sizes_.end()) {
-      best_preferred_batch_size = pending_batch_size_;
-      queue_.MarkCursor();
-    }
-  }
-
-  // Obatin the age of the oldest pending request to compare with the maximum
-  // batch queuing delay
-  uint64_t now_ns = std::chrono::duration_cast<std::chrono::nanoseconds>(
-                        std::chrono::steady_clock::now().time_since_epoch())
-                        .count();
-  uint64_t delay_ns = now_ns - queue_.OldestEnqueueTime();
-  bool delay_is_exceeded =
-      (pending_batch_delay_ns_ != 0) && (delay_ns >= pending_batch_delay_ns_);
-
-  // If we found a preferred batch size and the queue delay hasn't been
-  // exceeded, then execute that.
-  if ((best_preferred_batch_size != 0) && !delay_is_exceeded) {
-    if (pending_batch_delay_ns_ == 0) {
-      payload_saturated_ = true;
-    }
-    pending_batch_size_ = best_preferred_batch_size;
-    queue_.SetCursorToMark();
-    return 0;
-  }
-
-  // No request in pending batch happens when all queued requests have expired
-  // timeout and the policies are REJECT
-  if (queue_.PendingBatchCount() == 0) {
-    return 0;
-  }
-
-  // If the delay has been exceeded, or if the current batch can't grow
-  // any larger then just immediately execute whatever is pending.
-  if (send_now || ((payload_batch_size + pending_batch_size_) >=
-                   max_preferred_batch_size_)) {
-    payload_saturated_ = true;
-    return 0;
-  }
-
-  if (delay_is_exceeded || (pending_batch_delay_ns_ == 0)) {
-    return 0;
-  }
-
-  // Set the next preferred batch size given the pending batch size
-  auto next_preferred_batch_size_it = preferred_batch_sizes_.upper_bound(
-      pending_batch_size_ + payload_batch_size);
-  if (next_preferred_batch_size_it != preferred_batch_sizes_.end()) {
-    next_preferred_batch_size_ = *next_preferred_batch_size_it;
-  } else {
-    next_preferred_batch_size_ =
-        preferred_batch_sizes_.empty() ? 0 : *preferred_batch_sizes_.begin();
-  }
-  if (next_preferred_batch_size_ != 0) {
-    next_preferred_batch_size_ -= payload_batch_size;
-  }
-
-  // By this point, we have not seen the pending batch that should be executed
-  // immediately. However, if we have scheduled a payload that can be grown and
-  // not yet in preferred batch size, we should move the pending batch over to
-  // ensure the model instance will pick up largest available batch even if it
-  // is not the preferred batch.
-  if (!payload_saturated_ && (payload_batch_size != 0) &&
-      (preferred_batch_sizes_.find(payload_batch_size) ==
-       preferred_batch_sizes_.end())) {
-    return 0;
-  }
-
-  uint64_t wait_ns = pending_batch_delay_ns_ - delay_ns;
-  // Note that taking request timeout into consideration allows us to reset
-  // pending batch as soon as it is invalidated. But the cost is that in edge
-  // case where the timeout will be expired one by one, the thread will be
-  // waken frequently.
-  if (queue_.ClosestTimeout() != 0) {
-    if (now_ns <= queue_.ClosestTimeout()) {
-      wait_ns = std::min(queue_.ClosestTimeout() - now_ns, wait_ns);
-    } else {
-      // A request in pending batch is timed-out, wait for 1 us to force the
-      // thread to reset the pending batch right the way.
-      wait_ns = 1000;
-    }
-  }
-
-  // Return non-zero wait microseconds to cause this thread to wait
-  // until the queue delay or the closest timeout has expired.
-  // Another thread may be awaken due to incoming request to handle the
-  // pending batch before this thread wakes and that is ok. But if no other
-  // request comes in then this thread will wake and revisit the pending batch
-  // (and at that time will then see the delay has been exceeded and will send
-  // the batch).
-  return wait_ns / 1000;
-}
-
-void
-DynamicBatchScheduler::DelegateResponse(
-    std::unique_ptr<InferenceRequest>& request)
-{
-  std::lock_guard<std::mutex> lock(completion_queue_mtx_);
-  completion_queue_.emplace_back();
-  auto queue_slot = &completion_queue_.back();
-  // Pass raw ptr to lambda for tracking stats from cache and updating
-  // metric reporter on cache miss stats after insertion
-  InferenceRequest* raw_request_ptr = request.get();
-
-  request->SetResponseDelegator(
-      [this, queue_slot, raw_request_ptr](
-          std::unique_ptr<InferenceResponse>&& response, const uint32_t flags) {
-        if (response_cache_enabled_ && raw_request_ptr->CacheKeyIsSet()) {
-          // Cache insertion happens here because we need the backend to have
-          // computed the inference response first in the case of cache miss
-          auto cache = model_->Server()->GetResponseCache();
-          auto status = cache->Insert(*response, raw_request_ptr);
-          bool cache_miss =
-              (status.StatusCode() != Status::Code::ALREADY_EXISTS);
-          if (cache_miss) {
-#ifdef TRITON_ENABLE_STATS
-            // Update cache miss statistics even on failure to insert
-            // as we still spend time on lookup and attempting to insert
-            raw_request_ptr->ReportStatisticsCacheMiss(reporter_.get());
-#endif  // TRITON_ENABLE_STATS
-
-            if (!status.IsOk()) {
-              LOG_ERROR << raw_request_ptr->LogRequest()
-                        << "Failed to insert request_hash ["
-                        << raw_request_ptr->CacheKey()
-                        << "] into response cache: " << status.Message();
-            }
-          }  // Otherwise do nothing; we update cache hit statistics on Lookup
-        }
-
-        if (preserve_ordering_) {
-          {
-            std::lock_guard<std::mutex> lock(completion_queue_mtx_);
-            queue_slot->emplace_back(std::move(response), flags);
-          }
-          FinalizeResponses();
-        } else {
-          InferenceResponse::Send(std::move(response), flags);
-        }
-      });
-}
-
-void
-DynamicBatchScheduler::CacheLookUp(
-    std::unique_ptr<InferenceRequest>& request,
-    std::unique_ptr<InferenceResponse>& cached_response)
-{
-  auto cache = model_->Server()->GetResponseCache();
-  // Lookup request in cache
-  std::unique_ptr<InferenceResponse> local_response;
-  request->ResponseFactory()->CreateResponse(&local_response);
-  auto status = cache->Lookup(local_response.get(), request.get());
-  if (status.IsOk() && (local_response != nullptr)) {
-    cached_response = std::move(local_response);
-#ifdef TRITON_ENABLE_STATS
-    // Update model metrics/stats on cache hits
-    // Backends will update metrics as normal on cache misses
-    request->ReportStatisticsCacheHit(reporter_.get());
-#endif  // TRITON_ENABLE_STATS
-  }
-}
-
-void
-DynamicBatchScheduler::FinalizeResponses()
-{
-  // Need exclusive access of the function to ensure responses are sent
-  // in order
-  std::lock_guard<std::mutex> lock(finalize_mtx_);
-  // Finalize the completed payloads in-order as far as possible
-  std::deque<std::pair<std::unique_ptr<InferenceResponse>, const uint32_t>>
-      responses;
-  {
-    std::lock_guard<std::mutex> queue_lock(completion_queue_mtx_);
-    while (!completion_queue_.empty() && !completion_queue_.front().empty()) {
-      bool response_complete = false;
-      for (auto& response_pair : completion_queue_.front()) {
-        // Assuming FINAL flag is set only in the last response of the request
-        response_complete =
-            ((response_pair.second & TRITONSERVER_RESPONSE_COMPLETE_FINAL) !=
-             0);
-        responses.emplace_back(std::move(response_pair));
-      }
-      if (response_complete) {
-        completion_queue_.pop_front();
-      } else {
-        completion_queue_.front().clear();
-      }
-    }
-  }
-
-  for (auto& response : responses) {
-    InferenceResponse::Send(std::move(response.first), response.second);
-  }
-}
-}}  // namespace triton::core
diff --git a/3rdparty/core-r22.12/src/dynamic_batch_scheduler.h b/3rdparty/core-r22.12/src/dynamic_batch_scheduler.h
deleted file mode 100644
index 16818a9dcbcff78ae4ea6b5d720313b395079807..0000000000000000000000000000000000000000
--- a/3rdparty/core-r22.12/src/dynamic_batch_scheduler.h
+++ /dev/null
@@ -1,182 +0,0 @@
-// Copyright 2018-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include <atomic>
-#include <condition_variable>
-#include <deque>
-#include <future>
-#include <map>
-#include <mutex>
-#include <queue>
-#include <set>
-#include <thread>
-#include "backend_model.h"
-#include "backend_model_instance.h"
-#include "model_config.pb.h"
-#include "rate_limiter.h"
-#include "scheduler.h"
-#include "scheduler_utils.h"
-#include "status.h"
-#include "triton/common/model_config.h"
-
-namespace triton { namespace core {
-
-// Scheduler that implements dynamic batching.
-class DynamicBatchScheduler : public Scheduler {
- public:
-  // Create a scheduler to support a given number of runners and a run
-  // function to call when a request is scheduled.
-  static Status Create(
-      TritonModel* model, TritonModelInstance* model_instance, const int nice,
-      const bool dynamic_batching_enabled, const int32_t max_batch_size,
-      const std::unordered_map<std::string, bool>& enforce_equal_shape_tensors,
-      const bool preserve_ordering, const bool response_cache_enable,
-      const std::set<int32_t>& preferred_batch_sizes,
-      const uint64_t max_queue_delay_microseconds,
-      std::unique_ptr<Scheduler>* scheduler);
-
-  // Create a scheduler to support a given number of runners and a run
-  // function to call when a request is scheduled. And the scheduler also
-  // supports different queue policies for different priority levels.
-  static Status Create(
-      TritonModel* model, TritonModelInstance* model_instance, const int nice,
-      const bool dynamic_batching_enabled, const int32_t max_batch_size,
-      const std::unordered_map<std::string, bool>& enforce_equal_shape_tensors,
-      const inference::ModelDynamicBatching& batcher_config,
-      const bool response_cache_enable, std::unique_ptr<Scheduler>* scheduler);
-
-  ~DynamicBatchScheduler();
-
-  // \see Scheduler::Enqueue()
-  Status Enqueue(std::unique_ptr<InferenceRequest>& request) override;
-
-  // \see Scheduler::InflightInferenceCount()
-  size_t InflightInferenceCount() override
-  {
-    std::unique_lock<std::mutex> lock(mu_);
-    if (curr_payload_ != nullptr) {
-      return queue_.Size() + curr_payload_->RequestCount();
-    }
-    return queue_.Size();
-  }
-
-  // \see Scheduler::Stop()
-  void Stop() override { stop_ = true; }
-
-  MetricModelReporter* MetricReporter() const { return reporter_.get(); }
-
- private:
-  DynamicBatchScheduler(
-      TritonModel* model, TritonModelInstance* model_instance,
-      const bool dynamic_batching_enabled, const int32_t max_batch_size,
-      const std::unordered_map<std::string, bool>& enforce_equal_shape_tensors,
-      const bool preserve_ordering, const bool response_cache_enable,
-      const std::set<int32_t>& preferred_batch_sizes,
-      const uint64_t max_queue_delay_microseconds,
-      const inference::ModelQueuePolicy& default_queue_policy,
-      const uint32_t priority_levels,
-      const ModelQueuePolicyMap& queue_policy_map);
-
-  void BatcherThread(const int nice);
-  void NewPayload();
-  uint64_t GetDynamicBatch();
-  void DelegateResponse(std::unique_ptr<InferenceRequest>& request);
-  void CacheLookUp(
-      std::unique_ptr<InferenceRequest>& request,
-      std::unique_ptr<InferenceResponse>& cached_response);
-  void FinalizeResponses();
-
-  TritonModel* model_;
-  TritonModelInstance* model_instance_;
-
-  // Name of the model.
-  std::string model_name_;
-
-  // True if dynamic batching is enabled.
-  const bool dynamic_batching_enabled_;
-
-  // Map from priority level to queue holding inference requests for the model
-  // represented by this scheduler. If priority queues are not supported by the
-  // scheduler, then priority zero entry is used as the single queue.
-  PriorityQueue queue_;
-  bool stop_;
-
-  std::thread scheduler_thread_;
-  std::atomic<bool> scheduler_thread_exit_;
-
-  // Mutex and condvar for signaling scheduler thread
-  std::mutex mu_;
-  std::condition_variable cv_;
-
-  std::shared_ptr<RateLimiter> rate_limiter_;
-
-  std::shared_ptr<Payload> curr_payload_;
-  bool payload_saturated_;
-
-  size_t max_batch_size_;
-  size_t max_preferred_batch_size_;
-  std::set<int32_t> preferred_batch_sizes_;
-  uint64_t pending_batch_delay_ns_;
-  size_t pending_batch_size_;
-
-  size_t queued_batch_size_;
-  size_t next_preferred_batch_size_;
-
-  // The input tensors that require shape checking before being
-  // allowed in a batch. As a map from the tensor name to a bool. If
-  // tensor is in map then its shape must match shape of same tensor
-  // in requests already in the batch. If value is "true" then
-  // additional tensor is treated as a shape tensor and the values
-  // contained in the shape tensor must match same tensor already in
-  // the batch.
-  const std::unordered_map<std::string, bool> enforce_equal_shape_tensors_;
-
-  // Store information on whether the model contains optional inputs.
-  bool has_optional_input_;
-
-  // If true the ordering of responses matches the order of requests
-  // even when there are multiple scheduler threads.
-  const bool preserve_ordering_;
-
-  // If true, the scheduler will try to retrieve responses from cache.
-  bool response_cache_enabled_;
-
-  // Per completion-id queues to store the ready responses
-  std::deque<
-      std::vector<std::pair<std::unique_ptr<InferenceResponse>, uint32_t>>>
-      completion_queue_;
-  // Lock to protect the completion_queues_
-  std::mutex completion_queue_mtx_;
-
-  // Preserves the order in which responses are finalized
-  std::mutex finalize_mtx_;
-
-  // Reporter for metrics, or nullptr if no metrics should be reported
-  std::shared_ptr<MetricModelReporter> reporter_;
-};
-
-}}  // namespace triton::core
diff --git a/3rdparty/core-r22.12/src/ensemble_model.cc b/3rdparty/core-r22.12/src/ensemble_model.cc
deleted file mode 100644
index b263a6512a78c2148b21f2648f1eb4c95f117113..0000000000000000000000000000000000000000
--- a/3rdparty/core-r22.12/src/ensemble_model.cc
+++ /dev/null
@@ -1,67 +0,0 @@
-// Copyright 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "ensemble_model.h"
-
-#include <stdint.h>
-#include "constants.h"
-#include "ensemble_scheduler.h"
-#include "model_config_utils.h"
-#include "triton/common/logging.h"
-
-namespace triton { namespace core {
-
-Status
-EnsembleModel::Create(
-    InferenceServer* server, const std::string& path, const int64_t version,
-    const inference::ModelConfig& model_config, const bool is_config_provided,
-    const double min_compute_capability, std::unique_ptr<Model>* model)
-{
-  // Create the ensemble model.
-  std::unique_ptr<EnsembleModel> local_model(
-      new EnsembleModel(min_compute_capability, path, version, model_config));
-
-  RETURN_IF_ERROR(local_model->Init(is_config_provided));
-
-  std::unique_ptr<Scheduler> scheduler;
-  RETURN_IF_ERROR(EnsembleScheduler::Create(
-      local_model->MutableStatsAggregator(), server, model_config, &scheduler));
-  RETURN_IF_ERROR(local_model->SetScheduler(std::move(scheduler)));
-
-  LOG_VERBOSE(1) << "ensemble model for " << local_model->Name() << std::endl;
-
-  *model = std::move(local_model);
-  return Status::Success;
-}
-
-std::ostream&
-operator<<(std::ostream& out, const EnsembleModel& pb)
-{
-  out << "name=" << pb.Name() << std::endl;
-  return out;
-}
-
-}}  // namespace triton::core
diff --git a/3rdparty/core-r22.12/src/ensemble_model.h b/3rdparty/core-r22.12/src/ensemble_model.h
deleted file mode 100644
index b24df739aad45f7e91cb904f6449ce9258d8c5e3..0000000000000000000000000000000000000000
--- a/3rdparty/core-r22.12/src/ensemble_model.h
+++ /dev/null
@@ -1,60 +0,0 @@
-// Copyright 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include "model.h"
-#include "model_config.pb.h"
-#include "scheduler.h"
-#include "status.h"
-
-namespace triton { namespace core {
-
-class InferenceServer;
-
-class EnsembleModel : public Model {
- public:
-  EnsembleModel(EnsembleModel&&) = default;
-
-  static Status Create(
-      InferenceServer* server, const std::string& path, const int64_t version,
-      const inference::ModelConfig& model_config, const bool is_config_provided,
-      const double min_compute_capability, std::unique_ptr<Model>* model);
-
- private:
-  DISALLOW_COPY_AND_ASSIGN(EnsembleModel);
-
-  explicit EnsembleModel(
-      const double min_compute_capability, const std::string& model_dir,
-      const int64_t version, const inference::ModelConfig& config)
-      : Model(min_compute_capability, model_dir, version, config)
-  {
-  }
-  friend std::ostream& operator<<(std::ostream&, const EnsembleModel&);
-};
-
-std::ostream& operator<<(std::ostream& out, const EnsembleModel& pb);
-
-}}  // namespace triton::core
diff --git a/3rdparty/core-r22.12/src/ensemble_scheduler.cc b/3rdparty/core-r22.12/src/ensemble_scheduler.cc
deleted file mode 100644
index 76d520c3580d1ceb112f3b5def8c73286ec21e8a..0000000000000000000000000000000000000000
--- a/3rdparty/core-r22.12/src/ensemble_scheduler.cc
+++ /dev/null
@@ -1,1390 +0,0 @@
-// Copyright 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#ifdef TRITON_ENABLE_ENSEMBLE
-
-#include "ensemble_scheduler.h"
-
-#include <mutex>
-#include "cuda_utils.h"
-#include "metrics.h"
-#include "model.h"
-#include "model_config_utils.h"
-#include "server.h"
-#include "triton/common/logging.h"
-
-namespace triton { namespace core {
-
-namespace {
-
-class EnsembleContext;
-
-using IterationCount = size_t;
-
-// Request tracker is passed as 'userp' in RequestRelease function and used
-// to manage the lifecycle of the ensemble request
-class RequestTracker {
- public:
-  explicit RequestTracker(
-      std::unique_ptr<InferenceRequest>&& request, uint64_t compute_start_ns,
-      MetricModelReporter* metric_reporter,
-      InferenceStatsAggregator* stats_aggregator)
-      : inflight_request_counter_(1), request_(std::move(request)),
-        compute_start_ns_(compute_start_ns), metric_reporter_(metric_reporter),
-        stats_aggregator_(stats_aggregator), status_(Status::Success)
-  {
-  }
-
-  std::unique_ptr<InferenceRequest>& Request() { return request_; }
-
-  InferenceStatsAggregator& ContextStatsAggregator()
-  {
-    return context_stats_aggregator_;
-  }
-
-  void IncrementCounter()
-  {
-    std::lock_guard<std::mutex> lk(mtx_);
-    inflight_request_counter_++;
-  }
-
-  bool DecrementCounter()
-  {
-    std::lock_guard<std::mutex> lk(mtx_);
-    inflight_request_counter_--;
-    if (inflight_request_counter_ == 0) {
-#ifdef TRITON_ENABLE_STATS
-      const auto& infer_stats = context_stats_aggregator_.ImmutableInferStats();
-      request_->ReportStatisticsWithDuration(
-          metric_reporter_, status_.IsOk(), compute_start_ns_,
-          infer_stats.compute_input_duration_ns_,
-          infer_stats.compute_infer_duration_ns_,
-          infer_stats.compute_output_duration_ns_);
-      if (status_.IsOk()) {
-        stats_aggregator_->UpdateInferBatchStatsWithDuration(
-            metric_reporter_, std::max(1U, request_->BatchSize()),
-            infer_stats.compute_input_duration_ns_,
-            infer_stats.compute_infer_duration_ns_,
-            infer_stats.compute_output_duration_ns_);
-      }
-#endif
-      InferenceRequest::Release(
-          std::move(request_), TRITONSERVER_REQUEST_RELEASE_ALL);
-    }
-    return (inflight_request_counter_ == 0);
-  }
-
-  void SetStatus(const Status& status)
-  {
-    std::lock_guard<std::mutex> lk(mtx_);
-    status_ = status;
-  }
-
- private:
-  std::mutex mtx_;
-  uint32_t inflight_request_counter_;
-  std::unique_ptr<InferenceRequest> request_;
-  uint64_t compute_start_ns_;
-  MetricModelReporter* metric_reporter_;
-  InferenceStatsAggregator* stats_aggregator_;
-  InferenceStatsAggregator context_stats_aggregator_;
-  Status status_;
-};
-
-// Step is used as 'userp' and keeps ensemble context alive
-// until no more internal requests are inflight.
-// Step contains metadata, and status for the
-// internal infer request
-struct Step {
-  Step(
-      size_t step_idx, const InferenceRequest::SequenceId& correlation_id,
-      uint32_t flags)
-      : correlation_id_(correlation_id), flags_(flags), response_flags_(0),
-        infer_status_(nullptr), step_idx_(step_idx)
-  {
-  }
-
-  std::shared_ptr<EnsembleContext> ctx_;
-  std::unique_ptr<InferenceRequest> request_;
-  InferenceRequest::SequenceId correlation_id_;
-  uint32_t flags_;
-
-  std::mutex output_mtx_;
-  // Different output map to avoid address conflict from different memory types
-  std::unordered_map<uintptr_t, std::shared_ptr<AllocatedMemory>>
-      cpu_output_map_;
-  std::unordered_map<
-      int64_t, std::unordered_map<uintptr_t, std::shared_ptr<AllocatedMemory>>>
-      gpu_output_map_;
-  std::set<std::pair<std::string, IterationCount>> updated_tensors_;
-  uint32_t response_flags_;
-  TRITONSERVER_Error* infer_status_;
-
-  size_t step_idx_;
-};
-
-struct TensorData {
-  struct Metadata {
-    Metadata() = default;
-    Metadata(
-        std::unique_ptr<InferenceRequest::Input>&& data, size_t reference_count)
-        : data_(std::move(data)), remaining_reference_count_(reference_count),
-          parameter_override_(false)
-    {
-    }
-    Metadata(
-        std::unique_ptr<InferenceRequest::Input>&& data, size_t reference_count,
-        const InferenceRequest::SequenceId& correlation_id, uint32_t flags)
-        : data_(std::move(data)), remaining_reference_count_(reference_count),
-          parameter_override_(true), correlation_id_(correlation_id),
-          flags_(flags)
-    {
-    }
-    std::unique_ptr<InferenceRequest::Input> data_;
-    size_t remaining_reference_count_;
-    bool parameter_override_;
-    InferenceRequest::SequenceId correlation_id_;
-    uint32_t flags_;
-  };
-  TensorData() = default;
-  TensorData(const size_t outgoing_steps_count)
-      : current_iteration_(0), outgoing_steps_count_(outgoing_steps_count),
-        batch_size_(0)
-  {
-  }
-
-  IterationCount AddTensor(std::unique_ptr<InferenceRequest::Input>&& tensor)
-  {
-    tensor_.emplace(
-        current_iteration_, Metadata(std::move(tensor), outgoing_steps_count_));
-    return current_iteration_++;
-  }
-
-  IterationCount AddTensor(
-      std::unique_ptr<InferenceRequest::Input>&& tensor,
-      const InferenceRequest::SequenceId& correlation_id, uint32_t flags)
-  {
-    tensor_.emplace(
-        current_iteration_,
-        Metadata(
-            std::move(tensor), outgoing_steps_count_, correlation_id, flags));
-    return current_iteration_++;
-  }
-
-  // Tensors associated with the particular ensemble tensor.
-  // A container is used to handle the decoupled case
-  // where variable number of tensors will be produced.
-  // map 'iteration count' to pair of <tensor, remaining outgoing count>
-  std::unordered_map<IterationCount, Metadata> tensor_;
-  size_t current_iteration_;
-  size_t outgoing_steps_count_;
-
-  // Ensemble may be configured to passing tensor between batching model and
-  // non-batching model as long as the full shapes match and storing the batch
-  // size of the generated tensor explicitly for checking and setting proper
-  // shape for the downstream model request.
-  size_t batch_size_;
-};
-
-// EnsembleContext maintains the state of the ensemble request
-//
-// Using static functions to take advantage of shared_ptr, a copy of the
-// shared_ptr will be made when a step is scheduled and it will go out of
-// scope after the step's callback is finished. The step's callback will
-// schedule new steps if available and the last step will finish the ensemble
-// request.
-// So we don't have to maintian the context in scheduler as the shared_ptr
-// will destroy the context for us if there are no "in-flight" steps.
-class EnsembleContext {
- public:
-  EnsembleContext(
-      MetricModelReporter* metric_reporter,
-      InferenceStatsAggregator* stats_aggregator, InferenceServer* is,
-      EnsembleInfo* info, std::unique_ptr<InferenceRequest>& request,
-      cudaStream_t stream);
-
-  // Perform transition on 'context' state given the information of
-  // 'completed_step'
-  static void Proceed(
-      const std::shared_ptr<EnsembleContext>& context,
-      const std::unique_ptr<Step>& completed_step = nullptr);
-
- private:
-  static TRITONSERVER_Error* ResponseAlloc(
-      TRITONSERVER_ResponseAllocator* allocator, const char* tensor_name,
-      size_t byte_size, TRITONSERVER_MemoryType preferred_memory_type,
-      int64_t preferred_memory_type_id, void* userp, void** buffer,
-      void** buffer_userp, TRITONSERVER_MemoryType* allocated_memory_type,
-      int64_t* allocated_memory_type_id);
-  static TRITONSERVER_Error* ResponseRelease(
-      TRITONSERVER_ResponseAllocator* allocator, void* buffer,
-      void* buffer_userp, size_t byte_size, TRITONSERVER_MemoryType memory_type,
-      int64_t memory_type_id);
-  static TRITONSERVER_Error* OutputBufferQuery(
-      TRITONSERVER_ResponseAllocator* allocator, void* userp,
-      const char* tensor_name, size_t* byte_size,
-      TRITONSERVER_MemoryType* memory_type, int64_t* memory_type_id);
-  static void RequestComplete(
-      TRITONSERVER_InferenceRequest* request, const uint32_t flags,
-      void* userp);
-  static void ResponseComplete(
-      TRITONSERVER_InferenceResponse* response, const uint32_t flags,
-      void* userp);
-
-  using StepList = std::vector<std::unique_ptr<Step>>;
-  using VersionMap = std::unordered_map<int64_t, std::shared_ptr<Model>>;
-
-  // Helper function to reshape the given tensor according to the
-  // config shape and batching info and its actual shape and batching info.
-  // Note that 'dims' will be in full shape as opposed to 'config_dims'.
-  // Return the dims after reshape.
-  std::vector<int64_t> ReshapeTensorDims(
-      const triton::common::DimsList& config_dims,
-      const bool config_allow_batching, const size_t tensor_batch_size,
-      const std::vector<int64_t>& dims);
-
-  // Return the list of step that becomes ready due to tensor update
-  // from 'completed_step'
-  Status PrepareSteps(
-      const std::unique_ptr<Step>& completed_step, StepList* steps);
-
-  // Prepare infer stats and call the inference server's function to process
-  // the infer requests specified in 'steps'
-  static void ScheduleSteps(
-      const std::shared_ptr<EnsembleContext>& context, StepList&& steps);
-
-  // Helper function that updates ensemble state given 'completed_step' and
-  // returns the list of updated tensors in 'updated_tensors'
-  Status UpdateEnsembleState(
-      const std::unique_ptr<Step>& completed_step,
-      std::set<std::pair<std::string, IterationCount>>* updated_tensors);
-
-  // Helper function that returns a list of 'steps' that should be run under
-  // current ensemble state. 'updated_tensors' is used so that we don't need to
-  // iterate all the tensors to determine which step can be run.
-  Status GetNextSteps(
-      const std::set<std::pair<std::string, IterationCount>>& updated_tensors,
-      StepList* steps);
-
-  // Helper function that completes the response of the ensemble request
-  Status FinishEnsemble(
-      std::unique_ptr<InferenceResponse>&& response = nullptr);
-
-  // Helper function that initialize the 'step' given the info at 'step_idx'.
-  // The 'step' will have proper request / response provider for the model
-  Status InitStep(
-      const size_t step_idx, const IterationCount iteration_count,
-      std::unique_ptr<Step>* step);
-
-  // Helper function that set the output of the ensemble request if it is ready
-  // and valid.
-  Status CheckAndSetEnsembleOutput(
-      const std::set<std::pair<std::string, IterationCount>>& updated_tensors,
-      std::unique_ptr<InferenceResponse>* response);
-
-  InferenceServer* is_;
-
-  EnsembleInfo* info_;
-
-  // All EnsembleContext will use the same CUDA stream managed by
-  // the ensemble scheduler
-  cudaStream_t stream_;
-
-  // Mutex to avoid concurrent call on 'PrepareSteps' where ensemble state
-  // are being modified
-  std::mutex mutex_;
-
-  size_t inflight_step_counter_;
-
-  // pointer that either points to 'pruned_tensor_to_step_' or to
-  // 'info_->tensor_to_step_' if all ensemble outputs are requested
-  std::unordered_map<std::string, std::set<size_t>>* tensor_to_step_;
-
-  std::unordered_map<std::string, std::set<size_t>> pruned_tensor_to_step_;
-  std::unordered_map<std::string, TensorData> tensor_data_;
-
-  // Handle to all models that may be used in the ensemble
-  std::unordered_map<std::string, VersionMap> handles_;
-
-  // Request specific information that obtained from ensemble request and
-  // should be applied to all internal requests
-  uint32_t flags_;
-  std::string request_id_;
-  InferenceRequest::SequenceId correlation_id_;
-  uint32_t priority_;
-  uint64_t timeout_;
-
-  // Objects related to the ensemble infer request
-  Status ensemble_status_;
-  RequestTracker* request_tracker_;
-
-  // The allocator that will be used to allocate buffers for the
-  // inference result tensors.
-  std::unique_ptr<
-      TRITONSERVER_ResponseAllocator,
-      decltype(&TRITONSERVER_ResponseAllocatorDelete)>
-      allocator_;
-};
-
-EnsembleContext::EnsembleContext(
-    MetricModelReporter* metric_reporter,
-    InferenceStatsAggregator* stats_aggregator, InferenceServer* is,
-    EnsembleInfo* info, std::unique_ptr<InferenceRequest>& request,
-    cudaStream_t stream)
-    : is_(is), info_(info), stream_(stream), inflight_step_counter_(0),
-      allocator_(nullptr, TRITONSERVER_ResponseAllocatorDelete)
-{
-  uint64_t compute_start_ns = 0;
-  INFER_STATS_SET_TIMESTAMP(compute_start_ns);
-  request_tracker_ = new RequestTracker(
-      std::move(request), compute_start_ns, metric_reporter, stats_aggregator);
-
-  auto& lrequest = request_tracker_->Request();
-
-  // Obtain model handles of all models in ensemble request such that
-  // they have the same lifetime as the ensemble request to avoid unloading
-  // while the ensemble is executing.
-  for (const auto& step_info : info_->steps_) {
-    auto it = handles_.find(step_info.model_name_);
-    if (it == handles_.end()) {
-      it = handles_.emplace(std::make_pair(step_info.model_name_, VersionMap()))
-               .first;
-    }
-    auto ver_it = it->second.find(step_info.model_version_);
-    if (ver_it == it->second.end()) {
-      std::shared_ptr<Model> model = nullptr;
-      ensemble_status_ = is_->GetModel(
-          step_info.model_name_, step_info.model_version_, &model);
-      if (!ensemble_status_.IsOk()) {
-        break;
-      }
-
-      it->second.emplace(std::make_pair(step_info.model_version_, model));
-    }
-  }
-
-  // Prune ensemble first if not all outputs are requested
-  std::set<std::string> ignored_tensor;
-  for (const auto& ensemble_output : info_->ensemble_output_shape_) {
-    ignored_tensor.insert(ensemble_output.first);
-  }
-  for (const auto& requested_output : lrequest->ImmutableRequestedOutputs()) {
-    ignored_tensor.erase(requested_output);
-  }
-  if (ignored_tensor.empty()) {
-    tensor_to_step_ = &(info_->tensor_to_step_);
-  } else {
-    pruned_tensor_to_step_ = info_->tensor_to_step_;
-    tensor_to_step_ = &pruned_tensor_to_step_;
-    // Backward traversal
-    std::unordered_map<size_t, size_t> step_requested_output_count;
-    while (!ignored_tensor.empty()) {
-      std::set<std::string> new_ignored_tensor;
-      for (const auto& output : ignored_tensor) {
-        auto step_idx = info_->tensor_to_prev_step_[output];
-        auto& step = info_->steps_[step_idx];
-        auto it = step_requested_output_count.find(step_idx);
-        if (it == step_requested_output_count.end()) {
-          auto output_count = step.output_to_tensor_.size();
-          it =
-              step_requested_output_count.emplace(step_idx, output_count).first;
-        }
-        // If none of the outputs of the step is requested,
-        // then the step can be pruned
-        if (--it->second == 0) {
-          for (const auto& input : step.input_to_tensor_) {
-            auto& step_set = pruned_tensor_to_step_[input.second];
-            step_set.erase(step_idx);
-            // If all steps depend on a tensor are pruned,
-            // then the tensor can be ignored.
-            if (step_set.empty()) {
-              new_ignored_tensor.insert(input.second);
-            }
-          }
-        }
-      }
-      ignored_tensor.swap(new_ignored_tensor);
-    }
-  }
-
-  for (const auto& pair : *tensor_to_step_) {
-    const auto& requested_outputs = lrequest->ImmutableRequestedOutputs();
-    // For requested outputs, add 1 to outgoing count as the ensemble itself
-    // isn't counted as step.
-    if (requested_outputs.find(pair.first) != requested_outputs.end()) {
-      tensor_data_.emplace(pair.first, TensorData(pair.second.size() + 1));
-    } else {
-      tensor_data_.emplace(pair.first, TensorData(pair.second.size()));
-    }
-  }
-
-  if (ensemble_status_.IsOk()) {
-    request_id_ = lrequest->Id();
-    correlation_id_ = lrequest->CorrelationId();
-    flags_ = lrequest->Flags();
-    priority_ = lrequest->Priority();
-    timeout_ = lrequest->TimeoutMicroseconds();
-
-    for (const auto& pr : lrequest->ImmutableInputs()) {
-      const InferenceRequest::Input* input = pr.second;
-      auto it = tensor_data_.find(input->Name());
-      if (it != tensor_data_.end()) {
-        auto& tensor_data = it->second;
-        // Shape() represents reshaped value without batch dimension,
-        // thus need to fill it if necessary.
-        std::unique_ptr<InferenceRequest::Input> tensor;
-        if (lrequest->BatchSize() != 0) {
-          std::vector<int64_t> shape{lrequest->BatchSize()};
-          shape.insert(
-              shape.end(), input->Shape().begin(), input->Shape().end());
-          tensor.reset(new InferenceRequest::Input(
-              input->Name(), input->DType(), shape));
-        } else {
-          tensor.reset(new InferenceRequest::Input(
-              input->Name(), input->DType(), input->Shape()));
-        }
-        tensor->SetData(input->Data());
-        for (const auto& host_policy_data : input->HostPolicyData()) {
-          tensor->SetData(host_policy_data.first, host_policy_data.second);
-        }
-        tensor_data.AddTensor(std::move(tensor));
-        tensor_data.batch_size_ = lrequest->BatchSize();
-      } else {
-        ensemble_status_ = Status(
-            Status::Code::INVALID_ARG,
-            lrequest->LogRequest() + "unexpected input '" + input->Name() +
-                "' in request header that does not map to any ensemble inputs");
-      }
-    }
-
-    // Iterate the ensemble optional inputs and add empty tensor data entry
-    // if the input is not provided
-    for (const auto& name : info_->optional_inputs_) {
-      auto it = tensor_data_.find(name);
-      if ((it != tensor_data_.end()) && it->second.tensor_.empty()) {
-        it->second.AddTensor(nullptr);
-        it->second.batch_size_ = lrequest->BatchSize();
-      }
-    }
-  }
-
-  TRITONSERVER_ResponseAllocator* allocator;
-  TRITONSERVER_Error* err = TRITONSERVER_ResponseAllocatorNew(
-      &allocator, ResponseAlloc, ResponseRelease, nullptr /* start_fn */);
-  if (err == nullptr) {
-    err = TRITONSERVER_ResponseAllocatorSetQueryFunction(
-        allocator, OutputBufferQuery);
-  }
-  if (err != nullptr) {
-    ensemble_status_ = Status(
-        TritonCodeToStatusCode(TRITONSERVER_ErrorCode(err)),
-        TRITONSERVER_ErrorMessage(err));
-    TRITONSERVER_ErrorDelete(err);
-  } else {
-    allocator_.reset(allocator);
-  }
-}
-
-TRITONSERVER_Error*
-EnsembleContext::ResponseAlloc(
-    TRITONSERVER_ResponseAllocator* allocator, const char* tensor_name,
-    size_t byte_size, TRITONSERVER_MemoryType preferred_memory_type,
-    int64_t preferred_memory_type_id, void* userp, void** buffer,
-    void** buffer_userp, TRITONSERVER_MemoryType* allocated_memory_type,
-    int64_t* allocated_memory_type_id)
-{
-  *buffer = nullptr;
-  *buffer_userp = nullptr;
-
-  auto allocated_buffer = std::make_shared<AllocatedMemory>(
-      byte_size, preferred_memory_type, preferred_memory_type_id);
-
-  auto mutable_buffer = allocated_buffer->MutableBuffer(
-      allocated_memory_type, allocated_memory_type_id);
-  if ((mutable_buffer != nullptr) || (byte_size == 0)) {
-    if (byte_size != 0) {
-      *buffer = static_cast<void*>(mutable_buffer);
-      auto step = reinterpret_cast<Step*>(userp);
-      std::lock_guard<std::mutex> lk(step->output_mtx_);
-      if (*allocated_memory_type == TRITONSERVER_MEMORY_GPU) {
-        step->gpu_output_map_[*allocated_memory_type_id].emplace(
-            reinterpret_cast<uintptr_t>(*buffer), std::move(allocated_buffer));
-      } else {
-        step->cpu_output_map_.emplace(
-            reinterpret_cast<uintptr_t>(*buffer), std::move(allocated_buffer));
-      }
-    }
-    LOG_VERBOSE(1) << "Internal response allocation: " << tensor_name
-                   << ", size " << byte_size << ", addr " << *buffer
-                   << ", memory type " << *allocated_memory_type << ", type id "
-                   << *allocated_memory_type_id;
-  }
-
-  return nullptr;  // Success
-}
-
-TRITONSERVER_Error*
-EnsembleContext::ResponseRelease(
-    TRITONSERVER_ResponseAllocator* allocator, void* buffer, void* buffer_userp,
-    size_t byte_size, TRITONSERVER_MemoryType memory_type,
-    int64_t memory_type_id)
-{
-  LOG_VERBOSE(1) << "Internal response release: "
-                 << "size " << byte_size << ", addr " << buffer;
-
-  // Don't do anything when releasing a buffer since ResponseAlloc
-  // passes the ownership of the data to ensemble context.
-  return nullptr;  // Success
-}
-
-TRITONSERVER_Error*
-EnsembleContext::OutputBufferQuery(
-    TRITONSERVER_ResponseAllocator* allocator, void* userp,
-    const char* tensor_name, size_t* byte_size,
-    TRITONSERVER_MemoryType* memory_type, int64_t* memory_type_id)
-{
-  // Ensemble will always attempt to satisfy any output buffer request
-  return nullptr;  // Success
-}
-
-void
-EnsembleContext::RequestComplete(
-    TRITONSERVER_InferenceRequest* request, const uint32_t flags, void* userp)
-{
-  if ((flags & TRITONSERVER_REQUEST_RELEASE_ALL) != 0) {
-    LOG_TRITONSERVER_ERROR(
-        TRITONSERVER_InferenceRequestDelete(request),
-        "deleting ensemble inference request");
-    auto request_tracker = reinterpret_cast<RequestTracker*>(userp);
-    if (request_tracker->DecrementCounter()) {
-      delete request_tracker;
-    }
-  }
-}
-
-void
-EnsembleContext::ResponseComplete(
-    TRITONSERVER_InferenceResponse* response, const uint32_t flags, void* userp)
-{
-  auto step_ptr = std::unique_ptr<Step>(reinterpret_cast<Step*>(userp));
-  step_ptr->response_flags_ = flags;
-
-  if (response != nullptr) {
-    auto err = TRITONSERVER_InferenceResponseError(response);
-    uint32_t count;
-    bool parameter_override = false;
-    InferenceRequest::SequenceId correlation_id{0};
-    uint32_t flags = 0;
-    if (err == nullptr) {
-      err = TRITONSERVER_InferenceResponseParameterCount(response, &count);
-      if (err == nullptr) {
-        for (uint32_t idx = 0; idx < count; idx++) {
-          const char* name;
-          TRITONSERVER_ParameterType type;
-          const void* vvalue;
-          err = TRITONSERVER_InferenceResponseParameter(
-              response, idx, &name, &type, &vvalue);
-          if (err == nullptr) {
-            if (!strcmp(name, "sequence_id")) {
-              switch (type) {
-                case TRITONSERVER_PARAMETER_INT:
-                  correlation_id = InferenceRequest::SequenceId(
-                      *reinterpret_cast<const uint64_t*>(vvalue));
-                  parameter_override = true;
-                  break;
-                case TRITONSERVER_PARAMETER_STRING:
-                  correlation_id = InferenceRequest::SequenceId(std::string(
-                      *reinterpret_cast<const char* const*>(vvalue)));
-                  parameter_override = true;
-                  break;
-                default:
-                  err = TRITONSERVER_ErrorNew(
-                      TRITONSERVER_ERROR_INVALID_ARG,
-                      "expected parameter 'sequence_id' to be "
-                      "TRITONSERVER_PARAMETER_INT or "
-                      "TRITONSERVER_PARAMETER_STRING");
-              }
-            } else if (!strcmp(name, "sequence_start")) {
-              if (type != TRITONSERVER_PARAMETER_BOOL) {
-                err = TRITONSERVER_ErrorNew(
-                    TRITONSERVER_ERROR_INVALID_ARG,
-                    "expect paremeter 'sequence_start' to be "
-                    "TRITONSERVER_PARAMETER_BOOL");
-              } else {
-                if (*reinterpret_cast<const bool*>(vvalue)) {
-                  flags |= TRITONSERVER_REQUEST_FLAG_SEQUENCE_START;
-                }
-                parameter_override = true;
-              }
-            } else if (!strcmp(name, "sequence_end")) {
-              if (type != TRITONSERVER_PARAMETER_BOOL) {
-                err = TRITONSERVER_ErrorNew(
-                    TRITONSERVER_ERROR_INVALID_ARG,
-                    "expect paremeter 'sequence_end' to be "
-                    "TRITONSERVER_PARAMETER_BOOL");
-              } else {
-                if (*reinterpret_cast<const bool*>(vvalue)) {
-                  flags |= TRITONSERVER_REQUEST_FLAG_SEQUENCE_END;
-                }
-                parameter_override = true;
-              }
-            }
-          }
-        }
-      }
-    }
-    if (err == nullptr) {
-      err = TRITONSERVER_InferenceResponseOutputCount(response, &count);
-      if (err == nullptr) {
-        std::lock_guard<std::mutex> lock(step_ptr->ctx_->mutex_);
-        auto& output_to_tensor =
-            step_ptr->ctx_->info_->steps_[step_ptr->step_idx_]
-                .output_to_tensor_;
-        for (uint32_t idx = 0; idx < count; idx++) {
-          const char* name;
-          TRITONSERVER_DataType datatype;
-          const int64_t* shape;
-          uint64_t dim_count;
-          const void* base;
-          size_t byte_size;
-          TRITONSERVER_MemoryType memory_type;
-          int64_t memory_type_id;
-          void* userp;
-          err = TRITONSERVER_InferenceResponseOutput(
-              response, idx, &name, &datatype, &shape, &dim_count, &base,
-              &byte_size, &memory_type, &memory_type_id, &userp);
-          if (err == nullptr) {
-            auto it = output_to_tensor.find(name);
-            if (it != output_to_tensor.end()) {
-              std::unique_ptr<InferenceRequest::Input> tensor(
-                  new InferenceRequest::Input(
-                      it->second, TritonToDataType(datatype), shape,
-                      dim_count));
-
-              if (byte_size != 0) {
-                std::lock_guard<std::mutex> output_lk(step_ptr->output_mtx_);
-                if (memory_type == TRITONSERVER_MEMORY_GPU) {
-                  auto& gpu_output_map =
-                      step_ptr->gpu_output_map_[memory_type_id];
-                  auto it =
-                      gpu_output_map.find(reinterpret_cast<uintptr_t>(base));
-                  tensor->SetData(std::move(it->second));
-                  gpu_output_map.erase(it);
-                } else {
-                  auto it = step_ptr->cpu_output_map_.find(
-                      reinterpret_cast<uintptr_t>(base));
-                  tensor->SetData(std::move(it->second));
-                  step_ptr->cpu_output_map_.erase(it);
-                }
-              }
-
-              auto& tensor_data = step_ptr->ctx_->tensor_data_[it->second];
-              if (parameter_override) {
-                step_ptr->updated_tensors_.emplace(
-                    it->second, tensor_data.AddTensor(
-                                    std::move(tensor), correlation_id, flags));
-              } else {
-                step_ptr->updated_tensors_.emplace(
-                    it->second,
-                    tensor_data.AddTensor(
-                        std::move(tensor), step_ptr->correlation_id_,
-                        step_ptr->flags_));
-              }
-            } else {
-              LOG_VERBOSE(1)
-                  << "in ensemble, an internal response header specified "
-                     "output '"
-                  << name << "' that does not map to any ensemble tensors";
-            }
-          }
-          if (err != nullptr) {
-            break;
-          }
-        }
-      }
-    }
-
-    if (err != nullptr) {
-      step_ptr->infer_status_ = err;
-    }
-    LOG_TRITONSERVER_ERROR(
-        TRITONSERVER_InferenceResponseDelete(response),
-        "deleting inference response");
-  }
-
-  EnsembleContext::Proceed(step_ptr->ctx_, step_ptr);
-  // Expecting more responses
-  if ((flags & TRITONSERVER_RESPONSE_COMPLETE_FINAL) == 0) {
-    step_ptr.release();
-  }
-}
-
-void
-EnsembleContext::Proceed(
-    const std::shared_ptr<EnsembleContext>& context,
-    const std::unique_ptr<Step>& completed_step)
-{
-  StepList ready_steps;
-  Status status = context->PrepareSteps(completed_step, &ready_steps);
-  if (status.IsOk()) {
-    ScheduleSteps(context, std::move(ready_steps));
-  }
-}
-
-Status
-EnsembleContext::PrepareSteps(
-    const std::unique_ptr<Step>& completed_step, StepList* ready_steps)
-{
-  {
-    std::lock_guard<std::mutex> lock(mutex_);
-
-    // Initialization error, ensemble status will be not ok since the beginning
-    if (completed_step == nullptr && !ensemble_status_.IsOk()) {
-      ensemble_status_ = FinishEnsemble();
-    }
-
-    if (ensemble_status_.IsOk()) {
-      StepList res;
-      std::set<std::pair<std::string, IterationCount>> updated_tensors;
-      ensemble_status_ = UpdateEnsembleState(completed_step, &updated_tensors);
-      if (ensemble_status_.IsOk()) {
-        ensemble_status_ = GetNextSteps(updated_tensors, ready_steps);
-      }
-
-      // Check and send ensemble response
-      if ((!ensemble_status_.IsOk()) || (inflight_step_counter_ == 0) ||
-          info_->is_decoupled_) {
-        std::unique_ptr<InferenceResponse> response;
-        if (ensemble_status_.IsOk()) {
-          ensemble_status_ =
-              CheckAndSetEnsembleOutput(updated_tensors, &response);
-        }
-        ensemble_status_ = FinishEnsemble(std::move(response));
-      }
-    }
-    return ensemble_status_;
-  }
-}
-
-Status
-EnsembleContext::UpdateEnsembleState(
-    const std::unique_ptr<Step>& completed_step,
-    std::set<std::pair<std::string, IterationCount>>* updated_tensors)
-{
-  updated_tensors->clear();
-  if (completed_step == nullptr) {
-    for (const auto& tensor_data : tensor_data_) {
-      if (!tensor_data.second.tensor_.empty()) {
-        updated_tensors->emplace(tensor_data.first, 0);
-      }
-    }
-  } else {
-    if (completed_step->response_flags_ &
-        TRITONSERVER_RESPONSE_COMPLETE_FINAL) {
-      inflight_step_counter_--;
-    }
-    RETURN_IF_TRITONSERVER_ERROR(completed_step->infer_status_);
-    updated_tensors->swap(completed_step->updated_tensors_);
-  }
-  return Status::Success;
-}
-
-Status
-EnsembleContext::GetNextSteps(
-    const std::set<std::pair<std::string, IterationCount>>& updated_tensors,
-    StepList* steps)
-{
-  steps->clear();
-
-  std::set<std::pair<size_t, IterationCount>> next_step_idx;
-  // Get steps whose tensors used for input are set
-  for (const auto updated_tensor : updated_tensors) {
-    const auto& step_idx = (*tensor_to_step_)[updated_tensor.first];
-    for (const auto& idx : step_idx) {
-      bool ready = true;
-      for (const auto& input_pair : info_->steps_[idx].input_to_tensor_) {
-        auto& tensor = tensor_data_[input_pair.second].tensor_;
-        if (tensor.empty()) {
-          ready = false;
-          break;
-        } else {
-          // Check if other inputs have tensor with corresponding iteration
-          // count
-          if (tensor.find(updated_tensor.second) == tensor.end()) {
-            ready = false;
-            break;
-          }
-        }
-      }
-      if (ready) {
-        next_step_idx.emplace(idx, updated_tensor.second);
-      }
-    }
-  }
-
-  for (const auto& idx : next_step_idx) {
-    steps->emplace_back();
-    RETURN_IF_ERROR(InitStep(idx.first, idx.second, &(steps->back())));
-  }
-  inflight_step_counter_ += steps->size();
-
-  return Status::Success;
-}
-
-Status
-EnsembleContext::InitStep(
-    const size_t step_idx, const IterationCount iteration_count,
-    std::unique_ptr<Step>* step)
-{
-  const auto& istep = info_->steps_[step_idx];
-  auto& version_map = handles_[istep.model_name_];
-  auto& model = version_map[istep.model_version_];
-
-  const bool allow_batching = (model->Config().max_batch_size() > 0);
-
-  auto irequest = std::unique_ptr<InferenceRequest>(
-      new InferenceRequest(model, istep.model_version_));
-
-  // Store the pointers to tensors used so that we can prune them afterward.
-  // Can't prune the tensor in the input loop below as it may be used by
-  // multiple inputs in the same step.
-  std::map<TensorData*, size_t*> releasing_tensors;
-
-  // Set inputs in request, prepare input map,
-  // and set overridden parameter if any.
-  auto correlation_id = correlation_id_;
-  auto flags = flags_;
-  bool parameter_set = false;
-  for (const auto& pair : istep.input_to_tensor_) {
-    auto& tensor_data = tensor_data_[pair.second];
-    auto& tensor = tensor_data.tensor_[iteration_count];
-
-    // nullptr if and only if the tensor is optional ensemble input and
-    // not provided in the ensemble request. In such case, we don't add
-    // the input and expect the ensemble pipeline is configured correctly
-    // (the input to the inner model is also optional)
-    if (tensor.data_ != nullptr) {
-      // If the actual shape and config shape agree with each other without
-      // considering batch size, non-batch / batch conversion are not required.
-      const inference::ModelInput* input_config;
-      model->GetInput(pair.first, &input_config);
-      auto shape = ReshapeTensorDims(
-          input_config->dims(), allow_batching, tensor_data.batch_size_,
-          tensor.data_->OriginalShape());
-
-      InferenceRequest::Input* input;
-      RETURN_IF_ERROR(irequest->AddOriginalInput(
-          pair.first, tensor.data_->DType(), shape, &input));
-      RETURN_IF_ERROR(input->SetData(tensor.data_->Data()));
-      for (const auto& host_policy_data : tensor.data_->HostPolicyData()) {
-        RETURN_IF_ERROR(
-            input->SetData(host_policy_data.first, host_policy_data.second));
-      }
-    }
-
-    releasing_tensors.emplace(&tensor_data, &tensor.remaining_reference_count_);
-
-    if (tensor.parameter_override_) {
-      if (parameter_set && ((correlation_id != tensor.correlation_id_) ||
-                            (flags != tensor.flags_))) {
-        LOG_ERROR << irequest->LogRequest()
-                  << "Different set of response parameters are set for '"
-                  << istep.model_name_ << "'. Parameter correlation ID "
-                  << correlation_id << ", flags " << flags << " is used.";
-        continue;
-      }
-      correlation_id = tensor.correlation_id_;
-      flags = tensor.flags_;
-      parameter_set = true;
-    }
-  }
-
-  // Prune the tensor if it is not needed by other steps
-  for (auto& releasing_pair : releasing_tensors) {
-    if ((--(*releasing_pair.second)) == 0) {
-      releasing_pair.first->tensor_.erase(iteration_count);
-    }
-  }
-
-  // Set requested outputs in request header
-  for (const auto& pair : istep.output_to_tensor_) {
-    irequest->AddOriginalRequestedOutput(pair.first);
-  }
-
-  step->reset(new Step(step_idx, correlation_id, flags));
-
-  irequest->SetId(request_id_);
-  irequest->SetCorrelationId(correlation_id);
-  irequest->SetFlags(flags);
-  irequest->SetPriority(priority_);
-  irequest->SetTimeoutMicroseconds(timeout_);
-#ifdef TRITON_ENABLE_STATS
-  irequest->SetSecondaryStatsAggregator(
-      &request_tracker_->ContextStatsAggregator());
-#endif
-  irequest->SetResponseCallback(
-      reinterpret_cast<ResponseAllocator*>(allocator_.get()), step->get(),
-      ResponseComplete, step->get());
-  irequest->SetReleaseCallback(RequestComplete, request_tracker_);
-
-  RETURN_IF_ERROR(irequest->PrepareForInference());
-
-#ifdef TRITON_ENABLE_TRACING
-  auto& parent_trace = request_tracker_->Request()->Trace();
-  if (parent_trace != nullptr) {
-    irequest->SetTrace(parent_trace->SpawnChildTrace());
-    irequest->Trace()->SetModelName(irequest->ModelName());
-    irequest->Trace()->SetModelVersion(irequest->ActualModelVersion());
-  }
-#endif
-
-  // Record the batch size of output in advance as
-  // there is no other way to access it later on.
-  for (const auto& pair : istep.output_to_tensor_) {
-    auto& output_data_ = tensor_data_[pair.second];
-    output_data_.batch_size_ = irequest->BatchSize();
-  }
-
-  (*step)->request_ = std::move(irequest);
-
-  return Status::Success;
-}
-
-std::vector<int64_t>
-EnsembleContext::ReshapeTensorDims(
-    const triton::common::DimsList& config_dims,
-    const bool config_allow_batching, const size_t tensor_batch_size,
-    const std::vector<int64_t>& dims)
-{
-  bool reshaped = false;
-  std::vector<int64_t> res;
-
-  // Only attempt to reshape if one setting is batchable while the other is not,
-  // the case of two mismatched batchable shapes is not considered.
-  // If the actual shape and config shape agree with each other without
-  // considering batch size, non-batch / batch conversion are not required.
-  if (config_allow_batching != (tensor_batch_size != 0)) {
-    // expect batching but the tensor is generated from nobatching model
-    if (config_allow_batching) {
-      if (triton::common::CompareDimsWithWildcard(config_dims, dims)) {
-        // If 'dims' already matches 'config_dims', prepend with batch size 1
-        res.push_back(1);
-        res.insert(res.end(), dims.begin(), dims.end());
-        reshaped = true;
-      }
-      // Otherwise, assuming the tensor is already in the batch expected
-      // by the model and do nothing
-    } else {
-      // Check if the batched tensor can be sent to the non-batching
-      // model as one tensor. If not, strip the batch dimension if
-      // it is batch size 1
-      if (!triton::common::CompareDimsWithWildcard(config_dims, dims) &&
-          (tensor_batch_size == 1)) {
-        res.assign(dims.begin() + 1, dims.end());
-        reshaped = true;
-      }
-    }
-  }
-
-  if (!reshaped) {
-    res = dims;
-  }
-  return res;
-}
-
-Status
-EnsembleContext::FinishEnsemble(std::unique_ptr<InferenceResponse>&& response)
-{
-  // Do nothing if the ensemble is finished
-  if (request_tracker_ == nullptr) {
-    return ensemble_status_;
-  }
-
-  // Add ensemble name to make error message more trackable
-  if (!ensemble_status_.IsOk()) {
-    ensemble_status_ = Status(
-        ensemble_status_.StatusCode(), "in ensemble '" + info_->ensemble_name_ +
-                                           "', " + ensemble_status_.Message());
-  }
-
-  if (ensemble_status_.IsOk()) {
-    if (info_->is_decoupled_) {
-      if (response != nullptr) {
-        InferenceResponse::Send(std::move(response), 0 /* flags */);
-      }
-      if (inflight_step_counter_ != 0) {
-        return ensemble_status_;
-      }
-      request_tracker_->Request()->ResponseFactory()->SendFlags(
-          TRITONSERVER_RESPONSE_COMPLETE_FINAL);
-    } else {
-      InferenceResponse::Send(
-          std::move(response), TRITONSERVER_RESPONSE_COMPLETE_FINAL);
-    }
-  } else {
-    if (response != nullptr) {
-      InferenceResponse::SendWithStatus(
-          std::move(response), TRITONSERVER_RESPONSE_COMPLETE_FINAL,
-          ensemble_status_);
-    } else {
-      InferenceRequest::RespondIfError(
-          request_tracker_->Request(), ensemble_status_);
-    }
-  }
-
-  // Reach here when the ensemble execution comes to the end, 'ensemble_status_'
-  // at this point is representative.
-  request_tracker_->SetStatus(ensemble_status_);
-  if (request_tracker_->DecrementCounter()) {
-    delete request_tracker_;
-  }
-  request_tracker_ = nullptr;
-  return ensemble_status_;
-}
-
-Status
-EnsembleContext::CheckAndSetEnsembleOutput(
-    const std::set<std::pair<std::string, IterationCount>>& updated_tensors,
-    std::unique_ptr<InferenceResponse>* response)
-{
-  IterationCount iteration_count = 0;
-  // Check if updated tensor is one of the ensemble output and if all outputs
-  // have tensor of the same iteration count
-  bool ready = false;
-  auto& lrequest = request_tracker_->Request();
-  const auto& requested_outputs = lrequest->ImmutableRequestedOutputs();
-  for (const auto updated_tensor : updated_tensors) {
-    if (requested_outputs.find(updated_tensor.first) ==
-        requested_outputs.end()) {
-      continue;
-    }
-
-    ready = true;
-    iteration_count = updated_tensor.second;
-    for (const auto& output : requested_outputs) {
-      auto& tensor = tensor_data_[output].tensor_;
-      if (tensor.empty()) {
-        ready = false;
-        break;
-      } else {
-        // Check if other outputs have tensor with corresponding iteration count
-        if (tensor.find(iteration_count) == tensor.end()) {
-          ready = false;
-          break;
-        }
-      }
-    }
-  }
-  if (!ready) {
-    if (info_->is_decoupled_) {
-      return Status::Success;
-    }
-    return Status(
-        Status::Code::INVALID_ARG,
-        lrequest->LogRequest() +
-            "unexpected deadlock, at least one output is not set while no more "
-            "ensemble steps can be made");
-  }
-
-  RETURN_IF_ERROR(lrequest->ResponseFactory()->CreateResponse(response));
-
-  bool cuda_async_copy = false;
-  std::map<TensorData*, size_t*> releasing_tensors;
-  for (const auto& output_pair : info_->ensemble_output_shape_) {
-    if (requested_outputs.find(output_pair.first) == requested_outputs.end()) {
-      continue;
-    }
-    // Check if output is ready
-    auto& tensor_data = tensor_data_[output_pair.first];
-    auto& tensor = tensor_data.tensor_[iteration_count];
-
-    auto shape = ReshapeTensorDims(
-        output_pair.second, (lrequest->BatchSize() != 0),
-        tensor_data.batch_size_, tensor.data_->OriginalShape());
-
-    InferenceResponse::Output* output;
-    RETURN_IF_ERROR((*response)->AddOutput(
-        output_pair.first, tensor.data_->DType(), shape, &output));
-
-    // Use the memory type of the memory block as preferred memory type
-    TRITONSERVER_MemoryType dst_memory_type;
-    int64_t dst_memory_type_id;
-    size_t content_size;
-    tensor.data_->Data()->BufferAt(
-        0, &content_size, &dst_memory_type, &dst_memory_type_id);
-
-    void* buffer;
-    RETURN_IF_ERROR(output->AllocateDataBuffer(
-        &buffer, content_size, &dst_memory_type, &dst_memory_type_id));
-
-    // Done with this output if 'expected_byte_size' is 0
-    if (content_size == 0) {
-      continue;
-    } else if (buffer == nullptr) {
-      return Status(
-          Status::Code::INTERNAL,
-          "failed to allocate buffer for output '" + output_pair.first + "'");
-    }
-
-    size_t content_offset = 0;
-    size_t content_idx = 0;
-    TRITONSERVER_MemoryType src_memory_type;
-    int64_t src_memory_type_id;
-
-    const char* content = tensor.data_->Data()->BufferAt(
-        content_idx, &content_size, &src_memory_type, &src_memory_type_id);
-    bool cuda_used = false;
-    while (content != nullptr) {
-      RETURN_IF_ERROR(CopyBuffer(
-          output_pair.first, src_memory_type, src_memory_type_id,
-          dst_memory_type, dst_memory_type_id, content_size, content,
-          ((char*)buffer) + content_offset, stream_, &cuda_used));
-      cuda_async_copy |= cuda_used;
-
-      content_offset += content_size;
-      content_idx++;
-      content = tensor.data_->Data()->BufferAt(
-          content_idx, &content_size, &src_memory_type, &src_memory_type_id);
-    }
-
-    releasing_tensors.emplace(&tensor_data, &tensor.remaining_reference_count_);
-
-    if (tensor.parameter_override_) {
-      switch (lrequest->CorrelationId().Type()) {
-        case InferenceRequest::SequenceId::DataType::STRING:
-          (*response)->AddParameter(
-              "sequence_id", tensor.correlation_id_.StringValue().c_str());
-          break;
-        case InferenceRequest::SequenceId::DataType::UINT64:
-          (*response)->AddParameter(
-              "sequence_id",
-              (int64_t)tensor.correlation_id_.UnsignedIntValue());
-          break;
-        default:
-          (*response)->AddParameter(
-              "sequence_id",
-              (int64_t)tensor.correlation_id_.UnsignedIntValue());
-          break;
-      }
-      (*response)->AddParameter(
-          "sequence_start",
-          (tensor.flags_ & TRITONSERVER_REQUEST_FLAG_SEQUENCE_START) != 0);
-      (*response)->AddParameter(
-          "sequence_end",
-          (tensor.flags_ & TRITONSERVER_REQUEST_FLAG_SEQUENCE_END) != 0);
-    }
-  }
-
-  if (cuda_async_copy) {
-#ifdef TRITON_ENABLE_GPU
-    cudaStreamSynchronize(stream_);
-#else
-    return Status(
-        Status::Code::INTERNAL,
-        "unexpected CUDA copy flag set while GPU is not supported");
-#endif  // TRITON_ENABLE_GPU
-  }
-
-  // Prune the tensor if it is not needed by other steps
-  for (auto& releasing_pair : releasing_tensors) {
-    if ((--(*releasing_pair.second)) == 0) {
-      releasing_pair.first->tensor_.erase(iteration_count);
-    }
-  }
-
-  return Status::Success;
-}
-
-void
-EnsembleContext::ScheduleSteps(
-    const std::shared_ptr<EnsembleContext>& context, StepList&& steps)
-{
-  for (auto& step : steps) {
-    step->ctx_ = context;
-    bool should_schedule = false;
-    // Must release lock before InferAsync to avoid deadlock, as the same thread
-    // will be calling request/response callbacks on cache hits, which will
-    // attempt to acquire the lock already held
-    {
-      std::lock_guard<std::mutex> lock(context->mutex_);
-
-      // Need to check the ensemble_status_ to ensure the FinishEnsemble()
-      // is called only once.
-      if (context->ensemble_status_.IsOk()) {
-        context->request_tracker_->IncrementCounter();
-        should_schedule = true;
-      }
-    }
-    if (should_schedule) {
-      // On a successful call to InferAsync(), the step will be released by
-      // the response callback. When the response callback is invoked, the
-      // step must not own (and release) the request as the request should be
-      // transferred and managed by Triton core. In the case of cache hit, the
-      // request hasn't been transferred and can cause double-free, so moving
-      // the request ownership out of step here to avoid that
-      std::unique_ptr<InferenceRequest> request = std::move(step->request_);
-      auto step_status = context->is_->InferAsync(request);
-      if (!step_status.IsOk()) {
-        std::lock_guard<std::mutex> lock(context->mutex_);
-        context->ensemble_status_ = step_status;
-        // The request is not sent to server properly, shouldn't expect its
-        // release function get called.
-        context->request_tracker_->DecrementCounter();
-        context->ensemble_status_ = context->FinishEnsemble();
-        break;
-      }
-    }
-    step.release();
-  }
-}
-
-}  // namespace
-
-Status
-EnsembleScheduler::Create(
-    InferenceStatsAggregator* const stats_aggregator,
-    InferenceServer* const server, const inference::ModelConfig& config,
-    std::unique_ptr<Scheduler>* scheduler)
-{
-  scheduler->reset(new EnsembleScheduler(stats_aggregator, server, config));
-  return Status::Success;
-}
-
-Status
-EnsembleScheduler::Enqueue(std::unique_ptr<InferenceRequest>& request)
-{
-  // Queue timer starts at the beginning of the queueing and
-  // scheduling process
-  request->CaptureQueueStartNs();
-  INFER_TRACE_ACTIVITY(
-      request->Trace(), TRITONSERVER_TRACE_QUEUE_START,
-      request->QueueStartNs());
-#ifdef TRITON_ENABLE_TRACING
-  request->TraceInputTensors(
-      TRITONSERVER_TRACE_TENSOR_QUEUE_INPUT, "EnsembleScheduler Enqueue");
-#endif  // TRITON_ENABLE_TRACING
-
-  // Add additional callback to keep track of in-flight count
-  ++inflight_count_;
-  request->AddInternalReleaseCallback([this]() { --inflight_count_; });
-  std::shared_ptr<EnsembleContext> context(new EnsembleContext(
-      metric_reporter_.get(), stats_aggregator_, is_, info_.get(), request,
-      stream_));
-  EnsembleContext::Proceed(context);
-  return Status::Success;
-}
-
-EnsembleScheduler::EnsembleScheduler(
-    InferenceStatsAggregator* const stats_aggregator,
-    InferenceServer* const server, const inference::ModelConfig& config)
-    : stats_aggregator_(stats_aggregator), is_(server), stream_(nullptr),
-      inflight_count_(0)
-{
-#ifdef TRITON_ENABLE_GPU
-  // create CUDA stream
-  auto cuerr = cudaStreamCreate(&stream_);
-  if (cuerr != cudaSuccess) {
-    stream_ = nullptr;
-    LOG_ERROR << "unable to create stream for " << config.name() << ": "
-              << cudaGetErrorString(cuerr);
-  }
-#endif  // TRITON_ENABLE_GPU
-
-#ifdef TRITON_ENABLE_METRICS
-  if (Metrics::Enabled()) {
-    MetricModelReporter::Create(
-        config.name(), 1, METRIC_REPORTER_ID_CPU, config.metric_tags(),
-        &metric_reporter_);
-  }
-#endif  // TRITON_ENABLE_METRICS
-
-  // Set 'info_' based on 'config'
-  info_.reset(new EnsembleInfo());
-
-  info_->ensemble_name_ = config.name();
-
-  // This config field is filled internally for ensemble models
-  info_->is_decoupled_ = config.model_transaction_policy().decoupled();
-
-  for (const auto& input : config.input()) {
-    info_->tensor_to_step_.emplace(input.name(), std::set<size_t>());
-    if (input.optional()) {
-      info_->optional_inputs_.emplace(input.name());
-    }
-  }
-  for (const auto& output : config.output()) {
-    info_->tensor_to_step_.emplace(output.name(), std::set<size_t>());
-
-    if (output.has_reshape()) {
-      info_->ensemble_output_shape_[output.name()] = output.reshape().shape();
-    } else {
-      info_->ensemble_output_shape_[output.name()] = output.dims();
-    }
-  }
-
-  for (const auto& element : config.ensemble_scheduling().step()) {
-    size_t step_idx = info_->steps_.size();
-    info_->steps_.emplace_back(element.model_name(), element.model_version());
-    for (const auto& pair : element.input_map()) {
-      auto it = info_->tensor_to_step_.find(pair.second);
-      if (it == info_->tensor_to_step_.end()) {
-        it = info_->tensor_to_step_.emplace(pair.second, std::set<size_t>())
-                 .first;
-      }
-      it->second.insert(step_idx);
-      info_->steps_[step_idx].input_to_tensor_.emplace(
-          std::make_pair(pair.first, pair.second));
-    }
-
-    for (const auto& pair : element.output_map()) {
-      auto it = info_->tensor_to_step_.find(pair.second);
-      if (it == info_->tensor_to_step_.end()) {
-        it = info_->tensor_to_step_.emplace(pair.second, std::set<size_t>())
-                 .first;
-      }
-      info_->steps_[step_idx].output_to_tensor_.emplace(
-          std::make_pair(pair.first, pair.second));
-
-      info_->tensor_to_prev_step_.emplace(pair.second, step_idx);
-    }
-  }
-}
-
-EnsembleScheduler::~EnsembleScheduler()
-{
-#ifdef TRITON_ENABLE_GPU
-  if (stream_ != nullptr) {
-    cudaError_t err = cudaStreamDestroy(stream_);
-    if (err != cudaSuccess) {
-      LOG_ERROR << "Failed to destroy cuda stream: " << cudaGetErrorString(err);
-    }
-  }
-#endif  // TRITON_ENABLE_GPU
-}
-
-}}  // namespace triton::core
-
-#endif  // TRITON_ENABLE_ENSEMBLE
diff --git a/3rdparty/core-r22.12/src/ensemble_scheduler.h b/3rdparty/core-r22.12/src/ensemble_scheduler.h
deleted file mode 100644
index 0305982a7c306b7020305c3743c05050c43a18cf..0000000000000000000000000000000000000000
--- a/3rdparty/core-r22.12/src/ensemble_scheduler.h
+++ /dev/null
@@ -1,123 +0,0 @@
-// Copyright 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#ifdef TRITON_ENABLE_ENSEMBLE
-
-#include <memory>
-#include "metric_model_reporter.h"
-#include "model_config.pb.h"
-#include "model_config_utils.h"
-#include "scheduler.h"
-#include "status.h"
-
-#ifdef TRITON_ENABLE_GPU
-#include <cuda_runtime_api.h>
-#endif  // TRITON_ENABLE_GPU
-
-namespace triton { namespace core {
-
-#ifndef TRITON_ENABLE_GPU
-using cudaStream_t = void*;
-#endif  // TRITON_ENABLE_GPU
-
-class InferenceServer;
-
-struct EnsembleInfo {
-  struct StepInfo {
-    StepInfo(const std::string& model_name, const int64_t model_version)
-        : model_name_(model_name), model_version_(model_version)
-    {
-    }
-
-    std::string model_name_;
-    int64_t model_version_;
-    std::unordered_map<std::string, std::string> input_to_tensor_;
-    std::unordered_map<std::string, std::string> output_to_tensor_;
-  };
-
-  std::string ensemble_name_;
-
-  bool is_decoupled_;
-
-  // the ensemble output (re)shape expected by the ensemble
-  std::unordered_map<std::string, triton::common::DimsList>
-      ensemble_output_shape_;
-
-  // Inputs that is marked optional for the ensemble
-  std::set<std::string> optional_inputs_;
-
-  std::vector<StepInfo> steps_;
-
-  // Only include a step if the ensemble tensor is used as input in that step
-  std::unordered_map<std::string, std::set<size_t>> tensor_to_step_;
-
-  // backward path, ensemble tensor to the step that provides its data
-  std::unordered_map<std::string, size_t> tensor_to_prev_step_;
-};
-
-// Scheduler that implements ensemble scheduling.
-class EnsembleScheduler : public Scheduler {
- public:
-  // Create a scheduler to process ensemble requests and
-  // to dispatch requests to models in ensemble internally.
-  static Status Create(
-      InferenceStatsAggregator* const stats_aggregator,
-      InferenceServer* const server, const inference::ModelConfig& config,
-      std::unique_ptr<Scheduler>* scheduler);
-
-  ~EnsembleScheduler();
-
-  // \see Scheduler::Enqueue()
-  Status Enqueue(std::unique_ptr<InferenceRequest>& request) override;
-
-  // \see Scheduler::InflightInferenceCount()
-  size_t InflightInferenceCount() override { return inflight_count_; }
-
-  // \see Scheduler::Stop()
-  void Stop() override {}
-
- private:
-  EnsembleScheduler(
-      InferenceStatsAggregator* const stats_aggregator,
-      InferenceServer* const server, const inference::ModelConfig& config);
-
-  std::shared_ptr<MetricModelReporter> metric_reporter_;
-  InferenceStatsAggregator* const stats_aggregator_;
-  InferenceServer* const is_;
-
-  // Ensemble information that is built from model config
-  std::unique_ptr<EnsembleInfo> info_;
-
-  // The stream used for data transfer.
-  cudaStream_t stream_;
-
-  std::atomic<size_t> inflight_count_;
-};
-
-}}  // namespace triton::core
-
-#endif  // TRITON_ENABLE_ENSEMBLE
diff --git a/3rdparty/core-r22.12/src/ensemble_utils.cc b/3rdparty/core-r22.12/src/ensemble_utils.cc
deleted file mode 100644
index dc4b6c5e853fdbf9e98c902cae59e1572b171860..0000000000000000000000000000000000000000
--- a/3rdparty/core-r22.12/src/ensemble_utils.cc
+++ /dev/null
@@ -1,370 +0,0 @@
-// Copyright 2018-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#ifdef TRITON_ENABLE_ENSEMBLE
-
-#include "ensemble_utils.h"
-
-#include <set>
-#include "constants.h"
-#include "model.h"
-#include "model_config_utils.h"
-#include "triton/common/logging.h"
-
-namespace triton { namespace core {
-
-namespace {
-
-/// A basic unit in ensemble graph that records the data type and shape
-/// of the ensemble tensor and which model they are inferred from.
-struct TensorNode {
-  TensorNode(
-      const std::string& model_name, const bool batching,
-      const inference::DataType& type, const triton::common::DimsList& dims)
-      : model_name_(model_name), type_(type), dims_(dims), is_decoupled_(false),
-        decouple_label_(0), visited_(false)
-  {
-    // Expand dims to full shape, which includes batch dimension if exist
-    if (batching) {
-      full_dims_.Add(-1);
-    }
-    full_dims_.MergeFrom(dims_);
-  }
-
-  // Constructor for symbolic nodes
-  TensorNode(const std::string& model_name)
-      : model_name_(model_name), is_decoupled_(false), decouple_label_(0),
-        visited_(false)
-  {
-  }
-
-  std::string model_name_;
-  inference::DataType type_;
-  triton::common::DimsList dims_;
-  triton::common::DimsList full_dims_;
-  bool is_decoupled_;
-  size_t decouple_label_;
-  bool visited_;
-  std::vector<TensorNode*> prev_nodes_;
-  std::vector<TensorNode*> next_nodes_;
-  // A symbolic node to keep track of the decouple label of nodes that
-  // are outputs of the same step.
-  std::shared_ptr<TensorNode> sibling_node_;
-};
-
-/// Validate if the data type and the shape of two TensorNode object are
-/// consistent.
-/// \param lhs One of the TensorNode object to be validated.
-/// \param rhs Another TensorNode object to be validated.
-/// \param message Extra message included in the front of error message
-/// if error status is non-OK.
-/// \return The error status. A non-OK status indicates the TensorNode objects
-/// are not consistent.
-Status
-ValidateTensorConsistency(
-    const TensorNode& lhs, const TensorNode& rhs, const std::string& message)
-{
-  if (lhs.type_ != rhs.type_) {
-    return Status(
-        Status::Code::INVALID_ARG,
-        message + "inconsistent data type: " +
-            inference::DataType_Name(lhs.type_) + " is inferred from model " +
-            lhs.model_name_ + " while " + inference::DataType_Name(rhs.type_) +
-            " is inferred from model " + rhs.model_name_);
-  }
-
-  // Shapes must match or either one uses variable size shape, if one uses
-  // variable size shape, shape consistency will be checked at runtime.
-  // If dims mismatch, compare agian with full dims in case the tensor is
-  // used for both non-batching model and batching model. In that case, it
-  // is acceptable if non-batching model shape is [-1, d_0, d_1, ..., d_n]
-  // while the batching model shape is [d_0, d_1, ..., d_n].
-  if (!triton::common::CompareDimsWithWildcard(lhs.dims_, rhs.dims_) &&
-      !triton::common::CompareDimsWithWildcard(
-          lhs.full_dims_, rhs.full_dims_)) {
-    return Status(
-        Status::Code::INVALID_ARG,
-        message + "inconsistent shape: " +
-            triton::common::DimsListToString(lhs.full_dims_) +
-            " is inferred from model " + lhs.model_name_ + " while " +
-            triton::common::DimsListToString(rhs.full_dims_) +
-            " is inferred from model " + rhs.model_name_);
-  }
-
-  return Status::Success;
-}
-
-Status
-ValidateTensorMapping(
-    const std::string& ensemble, const inference::ModelEnsembling::Step& step,
-    const inference::ModelConfig& model_config,
-    std::unordered_map<std::string, TensorNode>* ensemble_tensors)
-{
-  const bool batching = (model_config.max_batch_size() > 0);
-  // Check all inputs are mapped and no mapping to invalid inputs
-  std::set<std::string> input_names;
-  for (const auto& model_input : model_config.input()) {
-    input_names.insert(model_input.name());
-  }
-  for (const auto& input_map : step.input_map()) {
-    if (input_names.find(input_map.first) == input_names.end()) {
-      return Status(
-          Status::Code::INVALID_ARG,
-          "in ensemble " + ensemble + ", ensemble tensor " + input_map.second +
-              " is mapping to non-existing input " + input_map.first +
-              " in model " + step.model_name());
-    }
-  }
-  for (const auto& model_input : model_config.input()) {
-    size_t mapped_cnt = 0;
-    for (const auto& input_map : step.input_map()) {
-      if (model_input.name() == input_map.first) {
-        TensorNode model_tensor(
-            step.model_name(), batching, model_input.data_type(),
-            model_input.dims());
-        auto it = ensemble_tensors->find(input_map.second);
-        if (it != ensemble_tensors->end()) {
-          RETURN_IF_ERROR(ValidateTensorConsistency(
-              it->second, model_tensor,
-              "in ensemble " + ensemble + ", ensemble tensor " +
-                  input_map.second + ": "));
-        } else {
-          ensemble_tensors->emplace(
-              std::make_pair(input_map.second, model_tensor));
-        }
-        mapped_cnt++;
-      }
-    }
-    if (mapped_cnt == 0) {
-      // Allow the input to be excluded from ensemble if it is optional
-      if (model_input.optional()) {
-        continue;
-      }
-      return Status(
-          Status::Code::INVALID_ARG,
-          "in ensemble " + ensemble + ", input " + model_input.name() +
-              " in model " + model_config.name() +
-              " is not mapped to any ensemble tensors");
-    } else if (mapped_cnt > 1) {
-      return Status(
-          Status::Code::INVALID_ARG,
-          "in ensemble " + ensemble + ", input " + model_input.name() +
-              " in model " + model_config.name() +
-              " is mapped to multiple ensemble tensors");
-    }
-  }
-
-  // Check no multiple mappings to same ensemble tensor
-  // and no mapping from invalid outputs
-  std::set<std::string> output_names;
-  for (const auto& model_output : model_config.output()) {
-    output_names.insert(model_output.name());
-  }
-  for (const auto& output_map : step.output_map()) {
-    if (output_names.find(output_map.first) == output_names.end()) {
-      return Status(
-          Status::Code::INVALID_ARG,
-          "in ensemble " + ensemble + ", ensemble tensor " + output_map.second +
-              " is mapped from non-existing output " + output_map.first +
-              " in model " + step.model_name());
-    }
-  }
-  std::shared_ptr<TensorNode> sibling_node(new TensorNode(step.model_name()));
-  for (const auto& output_map : step.output_map()) {
-    size_t mapped_cnt = 0;
-    for (const auto& model_output : model_config.output()) {
-      if (model_output.name() == output_map.first) {
-        TensorNode model_tensor(
-            step.model_name(), batching, model_output.data_type(),
-            model_output.dims());
-        auto it = ensemble_tensors->find(output_map.second);
-        if (it != ensemble_tensors->end()) {
-          RETURN_IF_ERROR(ValidateTensorConsistency(
-              it->second, model_tensor,
-              "in ensemble " + ensemble + ", ensemble tensor " +
-                  output_map.second + ": "));
-        } else {
-          it = ensemble_tensors
-                   ->emplace(std::make_pair(output_map.second, model_tensor))
-                   .first;
-        }
-        it->second.sibling_node_ = sibling_node;
-        mapped_cnt++;
-      }
-    }
-    if (mapped_cnt > 1) {
-      return Status(
-          Status::Code::INVALID_ARG,
-          "in ensemble " + ensemble + ", multiple outputs in model " +
-              model_config.name() + " are mapped to the same ensemble tensor " +
-              output_map.second);
-    }
-  }
-
-  // link ensemble tensors
-  bool is_decoupled = model_config.model_transaction_policy().decoupled();
-  for (const auto& output_map : step.output_map()) {
-    auto& node = ensemble_tensors->find(output_map.second)->second;
-    node.is_decoupled_ = is_decoupled;
-    for (const auto& input_map : step.input_map()) {
-      auto& prev_node = ensemble_tensors->find(input_map.second)->second;
-      node.prev_nodes_.push_back(&prev_node);
-      prev_node.next_nodes_.push_back(&node);
-    }
-  }
-  return Status::Success;
-}
-
-}  // namespace
-
-Status
-ValidateEnsembleConfig(
-    ModelRepositoryManager* model_repository_manager,
-    ModelRepositoryManager::DependencyNode* ensemble)
-{
-  const auto& ensemble_config = ensemble->model_config_;
-  if (!ensemble_config.has_ensemble_scheduling()) {
-    return Status::Success;
-  }
-
-  const auto& ensemble_name = ensemble->model_name_;
-  const bool batching = (ensemble_config.max_batch_size() > 0);
-  std::unordered_map<std::string, TensorNode> ensemble_tensors;
-  for (const auto& input : ensemble_config.input()) {
-    const auto& dims =
-        input.has_reshape() ? input.reshape().shape() : input.dims();
-    TensorNode input_node(ensemble_name, batching, input.data_type(), dims);
-    ensemble_tensors.emplace(std::make_pair(input.name(), input_node));
-  }
-
-  TensorNode sink_node(ensemble_name);
-  for (const auto& output : ensemble_config.output()) {
-    const auto& dims =
-        output.has_reshape() ? output.reshape().shape() : output.dims();
-    TensorNode output_node(ensemble_name, batching, output.data_type(), dims);
-    auto it =
-        ensemble_tensors.emplace(std::make_pair(output.name(), output_node))
-            .first;
-    sink_node.prev_nodes_.emplace_back(&(it->second));
-    it->second.next_nodes_.emplace_back(&sink_node);
-  }
-
-  for (const auto& step : ensemble_config.ensemble_scheduling().step()) {
-    const auto& model_name = step.model_name();
-    inference::ModelConfig model_config;
-    for (auto& node : ensemble->upstreams_) {
-      if (model_name == node.first->model_name_) {
-        // Obtain completed config from model instance
-        std::shared_ptr<Model> model;
-        RETURN_IF_ERROR(
-            model_repository_manager->GetModel(model_name, -1, &model));
-        model_config = model->Config();
-        break;
-      }
-    }
-
-    // batchable ensemble can include non-batchable models as long as
-    // the expanded shapes are consistent
-    if ((model_config.max_batch_size() != 0) &&
-        (model_config.max_batch_size() < ensemble_config.max_batch_size())) {
-      return Status(
-          Status::Code::INVALID_ARG,
-          "ensemble " + ensemble_name + " allows maximum batch size " +
-              std::to_string(ensemble_config.max_batch_size()) +
-              ", but it contains model " + model_name +
-              " which only allows maximum batch size to be " +
-              std::to_string(model_config.max_batch_size()));
-    }
-
-    RETURN_IF_ERROR(ValidateTensorMapping(
-        ensemble_name, step, model_config, &ensemble_tensors));
-  }
-
-  // Visit nodes and validate decoupled workflow if any
-  // check data flow
-  size_t decouple_label = 0;
-  std::deque<TensorNode*> current_iterators;
-  for (const auto& input : ensemble_config.input()) {
-    auto it = ensemble_tensors.find(input.name());
-    it->second.visited_ = true;
-    current_iterators.push_back(&(it->second));
-  }
-  while (!current_iterators.empty()) {
-    auto& current_node = current_iterators.front();
-    for (auto& next_node : current_node->next_nodes_) {
-      if (next_node->visited_) {
-        continue;
-      }
-      bool next_node_ready = true;
-      for (auto& prev_node : next_node->prev_nodes_) {
-        if (!prev_node->visited_) {
-          next_node_ready = false;
-          break;
-        }
-      }
-      if (next_node_ready) {
-        size_t prev_decouple_label = next_node->prev_nodes_[0]->decouple_label_;
-        for (auto& prev_node : next_node->prev_nodes_) {
-          if (prev_node->decouple_label_ != prev_decouple_label) {
-            return Status(
-                Status::Code::INVALID_ARG,
-                "in ensemble " + ensemble_name + ", step of model '" +
-                    next_node->model_name_ +
-                    "' receives inputs originated from different decoupled "
-                    "models");
-          }
-        }
-        if (next_node->sibling_node_ != nullptr) {
-          if (next_node->sibling_node_->visited_) {
-            next_node->decouple_label_ =
-                next_node->sibling_node_->decouple_label_;
-          } else {
-            next_node->decouple_label_ = next_node->is_decoupled_
-                                             ? ++decouple_label
-                                             : prev_decouple_label;
-            next_node->sibling_node_->decouple_label_ =
-                next_node->decouple_label_;
-            next_node->sibling_node_->visited_ = true;
-          }
-        } else {
-          next_node->decouple_label_ =
-              next_node->is_decoupled_ ? ++decouple_label : prev_decouple_label;
-        }
-        next_node->visited_ = true;
-        current_iterators.push_back(next_node);
-      }
-    }
-    current_iterators.pop_front();
-  }
-  ensemble->model_config_.mutable_model_transaction_policy()->set_decoupled(
-      decouple_label != 0);
-
-  return Status::Success;
-}
-
-}}  // namespace triton::core
-
-#endif  // TRITON_ENABLE_ENSEMBLE
diff --git a/3rdparty/core-r22.12/src/ensemble_utils.h b/3rdparty/core-r22.12/src/ensemble_utils.h
deleted file mode 100644
index 63a9afa85eba94fd2fc8be40bc0a0edac556011b..0000000000000000000000000000000000000000
--- a/3rdparty/core-r22.12/src/ensemble_utils.h
+++ /dev/null
@@ -1,50 +0,0 @@
-// Copyright 2018-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#ifdef TRITON_ENABLE_ENSEMBLE
-
-#include <deque>
-#include <unordered_map>
-#include "model_config.pb.h"
-#include "model_repository_manager.h"
-#include "status.h"
-#include "triton/common/model_config.h"
-
-namespace triton { namespace core {
-
-/// Validate that the ensemble are specified correctly. Assuming that the
-/// inputs and outputs specified in depending model configurations are accurate.
-/// \param model_repository_manager The model manager to acquire model config.
-/// \param ensemble The ensemble to be validated.
-/// \return The error status.
-Status ValidateEnsembleConfig(
-    ModelRepositoryManager* model_repository_manager,
-    ModelRepositoryManager::DependencyNode* ensemble);
-
-}}  // namespace triton::core
-
-#endif  // TRITON_ENABLE_ENSEMBLE
diff --git a/3rdparty/core-r22.12/src/filesystem.cc b/3rdparty/core-r22.12/src/filesystem.cc
deleted file mode 100644
index e0e2f98e70735635ce6aa80d5d3e6a62f8d2ccb4..0000000000000000000000000000000000000000
--- a/3rdparty/core-r22.12/src/filesystem.cc
+++ /dev/null
@@ -1,2662 +0,0 @@
-// Copyright 2019-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "filesystem.h"
-
-#ifdef _WIN32
-// suppress the min and max definitions in Windef.h.
-#define NOMINMAX
-#include <Windows.h>
-
-// _CRT_INTERNAL_NONSTDC_NAMES 1 before including Microsoft provided C Runtime
-// library to expose declarations without "_" prefix to match POSIX style.
-#define _CRT_INTERNAL_NONSTDC_NAMES 1
-#include <direct.h>
-#include <io.h>
-#else
-#include <dirent.h>
-#include <unistd.h>
-#endif
-
-#ifdef TRITON_ENABLE_GCS
-#include <google/cloud/storage/client.h>
-#endif  // TRITON_ENABLE_GCS
-
-#ifdef TRITON_ENABLE_S3
-#include <aws/core/Aws.h>
-#include <aws/core/auth/AWSCredentialsProvider.h>
-#include <aws/s3/S3Client.h>
-#include <aws/s3/model/GetObjectRequest.h>
-#include <aws/s3/model/HeadBucketRequest.h>
-#include <aws/s3/model/HeadObjectRequest.h>
-#include <aws/s3/model/ListObjectsRequest.h>
-#endif  // TRITON_ENABLE_S3
-
-#ifdef TRITON_ENABLE_AZURE_STORAGE
-#include <blob/blob_client.h>
-#include <storage_account.h>
-#include <storage_credential.h>
-#undef LOG_INFO
-#undef LOG_WARNING
-#endif  // TRITON_ENABLE_AZURE_STORAGE
-
-#include <google/protobuf/io/coded_stream.h>
-#include <google/protobuf/text_format.h>
-#include <re2/re2.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <sys/stat.h>
-#include <sys/types.h>
-
-#include <cerrno>
-#include <fstream>
-#include <mutex>
-
-#include "constants.h"
-#include "status.h"
-#include "triton/common/logging.h"
-
-#define TRITONJSON_STATUSTYPE triton::core::Status
-#define TRITONJSON_STATUSRETURN(M) \
-  return triton::core::Status(triton::core::Status::Code::INTERNAL, (M))
-#define TRITONJSON_STATUSSUCCESS triton::core::Status::Success
-#include "triton/common/triton_json.h"
-
-#ifdef _WIN32
-// <sys/stat.h> in Windows doesn't define S_ISDIR macro
-#if !defined(S_ISDIR) && defined(S_IFMT) && defined(S_IFDIR)
-#define S_ISDIR(m) (((m)&S_IFMT) == S_IFDIR)
-#endif
-#define F_OK 0
-#endif
-
-namespace triton { namespace core {
-
-namespace {
-
-// Check if a local path is a directory. We need to use this in LocalFileSystem
-// and LocalizedPath so have this common function.
-Status
-IsPathDirectory(const std::string& path, bool* is_dir)
-{
-  *is_dir = false;
-
-  struct stat st;
-  if (stat(path.c_str(), &st) != 0) {
-    return Status(Status::Code::INTERNAL, "failed to stat file " + path);
-  }
-
-  *is_dir = S_ISDIR(st.st_mode);
-  return Status::Success;
-}
-
-}  // namespace
-
-LocalizedPath::~LocalizedPath()
-{
-  if (!local_path_.empty()) {
-    bool is_dir = true;
-    IsDirectory(local_path_, &is_dir);
-    LOG_STATUS_ERROR(
-        DeletePath(is_dir ? local_path_ : DirName(local_path_)),
-        "failed to delete localized path");
-  }
-}
-
-namespace {
-
-class FileSystem {
- public:
-  virtual Status FileExists(const std::string& path, bool* exists) = 0;
-  virtual Status IsDirectory(const std::string& path, bool* is_dir) = 0;
-  virtual Status FileModificationTime(
-      const std::string& path, int64_t* mtime_ns) = 0;
-  virtual Status GetDirectoryContents(
-      const std::string& path, std::set<std::string>* contents) = 0;
-  virtual Status GetDirectorySubdirs(
-      const std::string& path, std::set<std::string>* subdirs) = 0;
-  virtual Status GetDirectoryFiles(
-      const std::string& path, std::set<std::string>* files) = 0;
-  virtual Status ReadTextFile(
-      const std::string& path, std::string* contents) = 0;
-  virtual Status LocalizePath(
-      const std::string& path, std::shared_ptr<LocalizedPath>* localized) = 0;
-  virtual Status WriteTextFile(
-      const std::string& path, const std::string& contents) = 0;
-  virtual Status WriteBinaryFile(
-      const std::string& path, const char* contents,
-      const size_t content_len) = 0;
-  virtual Status MakeDirectory(
-      const std::string& dir, const bool recursive) = 0;
-  virtual Status MakeTemporaryDirectory(std::string* temp_dir) = 0;
-  virtual Status DeletePath(const std::string& path) = 0;
-};
-
-class LocalFileSystem : public FileSystem {
- public:
-  Status FileExists(const std::string& path, bool* exists) override;
-  Status IsDirectory(const std::string& path, bool* is_dir) override;
-  Status FileModificationTime(
-      const std::string& path, int64_t* mtime_ns) override;
-  Status GetDirectoryContents(
-      const std::string& path, std::set<std::string>* contents) override;
-  Status GetDirectorySubdirs(
-      const std::string& path, std::set<std::string>* subdirs) override;
-  Status GetDirectoryFiles(
-      const std::string& path, std::set<std::string>* files) override;
-  Status ReadTextFile(const std::string& path, std::string* contents) override;
-  Status LocalizePath(
-      const std::string& path,
-      std::shared_ptr<LocalizedPath>* localized) override;
-  Status WriteTextFile(
-      const std::string& path, const std::string& contents) override;
-  Status WriteBinaryFile(
-      const std::string& path, const char* contents,
-      const size_t content_len) override;
-  Status MakeDirectory(const std::string& dir, const bool recursive) override;
-  Status MakeTemporaryDirectory(std::string* temp_dir) override;
-  Status DeletePath(const std::string& path) override;
-};
-
-Status
-LocalFileSystem::FileExists(const std::string& path, bool* exists)
-{
-  *exists = (access(path.c_str(), F_OK) == 0);
-  return Status::Success;
-}
-
-Status
-LocalFileSystem::IsDirectory(const std::string& path, bool* is_dir)
-{
-  return IsPathDirectory(path, is_dir);
-}
-
-Status
-LocalFileSystem::FileModificationTime(
-    const std::string& path, int64_t* mtime_ns)
-{
-  struct stat st;
-  if (stat(path.c_str(), &st) != 0) {
-    return Status(Status::Code::INTERNAL, "failed to stat file " + path);
-  }
-
-#ifdef _WIN32
-  // In Windows, st_mtime is in time_t
-  *mtime_ns = std::max(st.st_mtime, st.st_ctime);
-#else
-  *mtime_ns =
-      std::max(TIMESPEC_TO_NANOS(st.st_mtim), TIMESPEC_TO_NANOS(st.st_ctim));
-#endif
-  return Status::Success;
-}
-
-Status
-LocalFileSystem::GetDirectoryContents(
-    const std::string& path, std::set<std::string>* contents)
-{
-#ifdef _WIN32
-  WIN32_FIND_DATA entry;
-  // Append "*" to obtain all files under 'path'
-  HANDLE dir = FindFirstFile(JoinPath({path, "*"}).c_str(), &entry);
-  if (dir == INVALID_HANDLE_VALUE) {
-    return Status(Status::Code::INTERNAL, "failed to open directory " + path);
-  }
-  if ((strcmp(entry.cFileName, ".") != 0) &&
-      (strcmp(entry.cFileName, "..") != 0)) {
-    contents->insert(entry.cFileName);
-  }
-  while (FindNextFile(dir, &entry)) {
-    if ((strcmp(entry.cFileName, ".") != 0) &&
-        (strcmp(entry.cFileName, "..") != 0)) {
-      contents->insert(entry.cFileName);
-    }
-  }
-
-  FindClose(dir);
-#else
-  DIR* dir = opendir(path.c_str());
-  if (dir == nullptr) {
-    return Status(Status::Code::INTERNAL, "failed to open directory " + path);
-  }
-
-  struct dirent* entry;
-  while ((entry = readdir(dir)) != nullptr) {
-    std::string entryname = entry->d_name;
-    if ((entryname != ".") && (entryname != "..")) {
-      contents->insert(entryname);
-    }
-  }
-
-  closedir(dir);
-#endif
-  return Status::Success;
-}
-
-Status
-LocalFileSystem::GetDirectorySubdirs(
-    const std::string& path, std::set<std::string>* subdirs)
-{
-  RETURN_IF_ERROR(GetDirectoryContents(path, subdirs));
-
-  // Erase non-directory entries...
-  for (auto iter = subdirs->begin(); iter != subdirs->end();) {
-    bool is_dir;
-    RETURN_IF_ERROR(IsDirectory(JoinPath({path, *iter}), &is_dir));
-    if (!is_dir) {
-      iter = subdirs->erase(iter);
-    } else {
-      ++iter;
-    }
-  }
-
-  return Status::Success;
-}
-
-Status
-LocalFileSystem::GetDirectoryFiles(
-    const std::string& path, std::set<std::string>* files)
-{
-  RETURN_IF_ERROR(GetDirectoryContents(path, files));
-
-  // Erase directory entries...
-  for (auto iter = files->begin(); iter != files->end();) {
-    bool is_dir;
-    RETURN_IF_ERROR(IsDirectory(JoinPath({path, *iter}), &is_dir));
-    if (is_dir) {
-      iter = files->erase(iter);
-    } else {
-      ++iter;
-    }
-  }
-
-  return Status::Success;
-}
-
-Status
-LocalFileSystem::ReadTextFile(const std::string& path, std::string* contents)
-{
-  std::ifstream in(path, std::ios::in | std::ios::binary);
-  if (!in) {
-    return Status(
-        Status::Code::INTERNAL,
-        "failed to open text file for read " + path + ": " + strerror(errno));
-  }
-
-  in.seekg(0, std::ios::end);
-  contents->resize(in.tellg());
-  in.seekg(0, std::ios::beg);
-  in.read(&(*contents)[0], contents->size());
-  in.close();
-
-  return Status::Success;
-}
-
-Status
-LocalFileSystem::LocalizePath(
-    const std::string& path, std::shared_ptr<LocalizedPath>* localized)
-{
-  // For local file system we don't actually need to download the
-  // directory or file. We use it in place.
-  localized->reset(new LocalizedPath(path));
-  return Status::Success;
-}
-
-Status
-LocalFileSystem::WriteTextFile(
-    const std::string& path, const std::string& contents)
-{
-  std::ofstream out(path, std::ios::out | std::ios::binary);
-  if (!out) {
-    return Status(
-        Status::Code::INTERNAL,
-        "failed to open text file for write " + path + ": " + strerror(errno));
-  }
-
-  out.write(&contents[0], contents.size());
-  out.close();
-
-  return Status::Success;
-}
-
-Status
-LocalFileSystem::WriteBinaryFile(
-    const std::string& path, const char* contents, const size_t content_len)
-{
-  std::ofstream out(path, std::ios::out | std::ios::binary);
-  if (!out) {
-    return Status(
-        Status::Code::INTERNAL, "failed to open binary file for write " + path +
-                                    ": " + strerror(errno));
-  }
-
-  out.write(contents, content_len);
-
-  return Status::Success;
-}
-
-Status
-LocalFileSystem::MakeDirectory(const std::string& dir, const bool recursive)
-{
-#ifdef _WIN32
-  if (mkdir(dir.c_str()) == -1)
-#else
-  if (mkdir(dir.c_str(), S_IRWXU) == -1)
-#endif
-  {
-    // Only allow the error due to parent directory does not exist
-    // if 'recursive' is requested
-    if ((errno == ENOENT) && (!dir.empty()) && recursive) {
-      RETURN_IF_ERROR(MakeDirectory(DirName(dir), recursive));
-      // Retry the creation
-#ifdef _WIN32
-      if (mkdir(dir.c_str()) == -1)
-#else
-      if (mkdir(dir.c_str(), S_IRWXU) == -1)
-#endif
-      {
-        return Status(
-            Status::Code::INTERNAL, "Failed to create directory '" + dir +
-                                        "', errno:" + strerror(errno));
-      }
-    } else {
-      return Status(
-          Status::Code::INTERNAL,
-          "Failed to create directory '" + dir + "', errno:" + strerror(errno));
-    }
-  }
-
-  return Status::Success;
-}
-
-Status
-LocalFileSystem::MakeTemporaryDirectory(std::string* temp_dir)
-{
-#ifdef _WIN32
-  char temp_path[MAX_PATH + 1];
-  size_t temp_path_length = GetTempPath(MAX_PATH + 1, temp_path);
-  if (temp_path_length == 0) {
-    return Status(
-        Status::Code::INTERNAL,
-        "Failed to get local directory for temporary files");
-  }
-  // There is no single operation like 'mkdtemp' in Windows, thus generating
-  // unique temporary directory is a process of getting temporary file name,
-  // deleting the file (file creation is side effect fo getting name), creating
-  // corresponding directory, so mutex is used to avoid possible race condition.
-  // However, it doesn't prevent other process on creating temporary file and
-  // thus the race condition may still happen. One possible solution is
-  // to reserve a temporary directory for the process and generate temporary
-  // model directories inside it.
-  static std::mutex mtx;
-  std::lock_guard<std::mutex> lk(mtx);
-  // Construct a std::string as filled 'temp_path' is not C string,
-  // and so that we can reuse 'temp_path' to hold the temp file name.
-  std::string temp_path_str(temp_path, temp_path_length);
-  if (GetTempFileName(temp_path_str.c_str(), "folder", 0, temp_path) == 0) {
-    return Status(Status::Code::INTERNAL, "Failed to create local temp folder");
-  }
-  *temp_dir = temp_path;
-  DeleteFile(temp_dir->c_str());
-  if (CreateDirectory(temp_dir->c_str(), NULL) == 0) {
-    return Status(
-        Status::Code::INTERNAL,
-        "Failed to create local temp folder: " + *temp_dir);
-  }
-#else
-  std::string folder_template = "/tmp/folderXXXXXX";
-  char* res = mkdtemp(const_cast<char*>(folder_template.c_str()));
-  if (res == nullptr) {
-    return Status(
-        Status::Code::INTERNAL,
-        "Failed to create local temp folder: " + folder_template +
-            ", errno:" + strerror(errno));
-  }
-  *temp_dir = res;
-#endif
-  return Status::Success;
-}
-
-Status
-LocalFileSystem::DeletePath(const std::string& path)
-{
-  bool is_dir = false;
-  RETURN_IF_ERROR(IsDirectory(path, &is_dir));
-  if (is_dir) {
-    std::set<std::string> contents;
-    RETURN_IF_ERROR(GetDirectoryContents(path, &contents));
-    for (const auto& content : contents) {
-      RETURN_IF_ERROR(DeletePath(JoinPath({path, content})));
-    }
-    rmdir(path.c_str());
-  } else {
-    remove(path.c_str());
-  }
-  return Status::Success;
-}
-
-#if defined(TRITON_ENABLE_GCS) || defined(TRITON_ENABLE_S3) || \
-    defined(TRITON_ENABLE_AZURE_STORAGE)
-// Helper function to take care of lack of trailing slashes
-std::string
-AppendSlash(const std::string& name)
-{
-  if (name.empty() || (name.back() == '/')) {
-    return name;
-  }
-
-  return (name + "/");
-}
-#endif  // TRITON_ENABLE_GCS || TRITON_ENABLE_S3 || TRITON_ENABLE_AZURE_STORAGE
-
-#ifdef TRITON_ENABLE_GCS
-
-namespace gcs = google::cloud::storage;
-
-struct GCSCredential {
-  std::string path_;
-
-  GCSCredential();  // from env var
-  GCSCredential(triton::common::TritonJson::Value& cred_json);
-};
-
-GCSCredential::GCSCredential()
-{
-  const char* path = std::getenv("GOOGLE_APPLICATION_CREDENTIALS");
-  path_ = (path != nullptr ? std::string(path) : "");
-}
-
-GCSCredential::GCSCredential(triton::common::TritonJson::Value& cred_json)
-{
-  cred_json.AsString(&path_);
-}
-
-class GCSFileSystem : public FileSystem {
- public:
-  GCSFileSystem(const GCSCredential& gs_cred);
-  // unify with S3/azure interface
-  GCSFileSystem(const std::string& path, const GCSCredential& gs_cred)
-      : GCSFileSystem(gs_cred)
-  {
-  }
-  Status CheckClient();
-  // unify with S3 interface
-  Status CheckClient(const std::string& path) { return CheckClient(); }
-
-  Status FileExists(const std::string& path, bool* exists) override;
-  Status IsDirectory(const std::string& path, bool* is_dir) override;
-  Status FileModificationTime(
-      const std::string& path, int64_t* mtime_ns) override;
-  Status GetDirectoryContents(
-      const std::string& path, std::set<std::string>* contents) override;
-  Status GetDirectorySubdirs(
-      const std::string& path, std::set<std::string>* subdirs) override;
-  Status GetDirectoryFiles(
-      const std::string& path, std::set<std::string>* files) override;
-  Status ReadTextFile(const std::string& path, std::string* contents) override;
-  Status LocalizePath(
-      const std::string& path,
-      std::shared_ptr<LocalizedPath>* localized) override;
-  Status WriteTextFile(
-      const std::string& path, const std::string& contents) override;
-  Status WriteBinaryFile(
-      const std::string& path, const char* contents,
-      const size_t content_len) override;
-  Status MakeDirectory(const std::string& dir, const bool recursive) override;
-  Status MakeTemporaryDirectory(std::string* temp_dir) override;
-  Status DeletePath(const std::string& path) override;
-
- private:
-  Status ParsePath(
-      const std::string& path, std::string* bucket, std::string* object);
-  Status MetaDataExists(
-      const std::string path, bool* exists,
-      google::cloud::StatusOr<gcs::ObjectMetadata>* metadata);
-
-  google::cloud::StatusOr<gcs::Client> client_;
-};
-
-GCSFileSystem::GCSFileSystem(const GCSCredential& gs_cred)
-{
-  auto creds = gcs::oauth2::CreateServiceAccountCredentialsFromJsonFilePath(
-      gs_cred.path_);
-  if (creds) {
-    client_ = gcs::Client(gcs::ClientOptions(*creds));
-  }
-}
-
-Status
-GCSFileSystem::CheckClient()
-{
-  if (!client_) {
-    return Status(
-        Status::Code::INTERNAL,
-        "Unable to create GCS client. Check account credentials.");
-  }
-  return Status::Success;
-}
-
-Status
-GCSFileSystem::ParsePath(
-    const std::string& path, std::string* bucket, std::string* object)
-{
-  // Get the bucket name and the object path. Return error if input is malformed
-  int bucket_start = path.find("gs://") + strlen("gs://");
-  int bucket_end = path.find("/", bucket_start);
-
-  // If there isn't a second slash, the address has only the bucket
-  if (bucket_end > bucket_start) {
-    *bucket = path.substr(bucket_start, bucket_end - bucket_start);
-    *object = path.substr(bucket_end + 1);
-  } else {
-    *bucket = path.substr(bucket_start);
-    *object = "";
-  }
-
-  if (bucket->empty()) {
-    return Status(
-        Status::Code::INTERNAL, "No bucket name found in path: " + path);
-  }
-
-  return Status::Success;
-}
-
-Status
-GCSFileSystem::FileExists(const std::string& path, bool* exists)
-{
-  *exists = false;
-
-  std::string bucket, object;
-  RETURN_IF_ERROR(ParsePath(path, &bucket, &object));
-
-  // Make a request for metadata and check the response
-  google::cloud::StatusOr<gcs::ObjectMetadata> object_metadata =
-      client_->GetObjectMetadata(bucket, object);
-
-  if (object_metadata) {
-    *exists = true;
-    return Status::Success;
-  }
-
-  // GCS doesn't make objects for directories, so it could still be a directory
-  bool is_dir;
-  RETURN_IF_ERROR(IsDirectory(path, &is_dir));
-  *exists = is_dir;
-
-  return Status::Success;
-}
-
-Status
-GCSFileSystem::IsDirectory(const std::string& path, bool* is_dir)
-{
-  *is_dir = false;
-  std::string bucket, object_path;
-  RETURN_IF_ERROR(ParsePath(path, &bucket, &object_path));
-
-  // Check if the bucket exists
-  google::cloud::StatusOr<gcs::BucketMetadata> bucket_metadata =
-      client_->GetBucketMetadata(bucket);
-
-  if (!bucket_metadata) {
-    return Status(
-        Status::Code::INTERNAL, "Could not get MetaData for bucket with name " +
-                                    bucket + " : " +
-                                    bucket_metadata.status().message());
-  }
-
-  // Root case - bucket exists and object path is empty
-  if (object_path.empty()) {
-    *is_dir = true;
-    return Status::Success;
-  }
-
-  // Check whether it has children. If at least one child, it is a directory
-  for (auto&& object_metadata :
-       client_->ListObjects(bucket, gcs::Prefix(AppendSlash(object_path)))) {
-    if (object_metadata) {
-      *is_dir = true;
-      break;
-    }
-  }
-  return Status::Success;
-}
-
-Status
-GCSFileSystem::FileModificationTime(const std::string& path, int64_t* mtime_ns)
-{
-  // We don't need to worry about the case when this is a directory
-  bool is_dir;
-  RETURN_IF_ERROR(IsDirectory(path, &is_dir));
-  if (is_dir) {
-    *mtime_ns = 0;
-    return Status::Success;
-  }
-
-  std::string bucket, object;
-  RETURN_IF_ERROR(ParsePath(path, &bucket, &object));
-
-  // Otherwise check the object metadata for update time
-  google::cloud::StatusOr<gcs::ObjectMetadata> object_metadata =
-      client_->GetObjectMetadata(bucket, object);
-
-  if (!object_metadata) {
-    return Status(
-        Status::Code::INTERNAL, "Failed to get metadata for " + object + " : " +
-                                    object_metadata.status().message());
-  }
-
-  // Get duration from time point with respect to object clock
-  auto update_time = std::chrono::time_point_cast<std::chrono::nanoseconds>(
-                         object_metadata->updated())
-                         .time_since_epoch()
-                         .count();
-
-  *mtime_ns = update_time;
-  return Status::Success;
-}
-
-Status
-GCSFileSystem::GetDirectoryContents(
-    const std::string& path, std::set<std::string>* contents)
-{
-  std::string bucket, dir_path;
-  RETURN_IF_ERROR(ParsePath(path, &bucket, &dir_path));
-  // Append a slash to make it easier to list contents
-  std::string full_dir = AppendSlash(dir_path);
-
-  // Get objects with prefix equal to full directory path
-  for (auto&& object_metadata :
-       client_->ListObjects(bucket, gcs::Prefix(full_dir))) {
-    if (!object_metadata) {
-      return Status(
-          Status::Code::INTERNAL, "Could not list contents of directory at " +
-                                      path + " : " +
-                                      object_metadata.status().message());
-    }
-
-    // In the case of empty directories, the directory itself will appear here
-    if (object_metadata->name() == full_dir) {
-      continue;
-    }
-
-    // We have to make sure that subdirectory contents do not appear here
-    std::string name = object_metadata->name();
-    int item_start = name.find(full_dir) + full_dir.size();
-    // GCS response prepends parent directory name
-    int item_end = name.find("/", item_start);
-
-    // Let set take care of subdirectory contents
-    std::string item = name.substr(item_start, item_end - item_start);
-    contents->insert(item);
-  }
-  return Status::Success;
-}
-
-Status
-GCSFileSystem::GetDirectorySubdirs(
-    const std::string& path, std::set<std::string>* subdirs)
-{
-  RETURN_IF_ERROR(GetDirectoryContents(path, subdirs));
-
-  // Erase non-directory entries...
-  for (auto iter = subdirs->begin(); iter != subdirs->end();) {
-    bool is_dir;
-    RETURN_IF_ERROR(IsDirectory(JoinPath({path, *iter}), &is_dir));
-    if (!is_dir) {
-      iter = subdirs->erase(iter);
-    } else {
-      ++iter;
-    }
-  }
-
-  return Status::Success;
-}
-
-Status
-GCSFileSystem::GetDirectoryFiles(
-    const std::string& path, std::set<std::string>* files)
-{
-  RETURN_IF_ERROR(GetDirectoryContents(path, files));
-
-  // Erase directory entries...
-  for (auto iter = files->begin(); iter != files->end();) {
-    bool is_dir;
-    RETURN_IF_ERROR(IsDirectory(JoinPath({path, *iter}), &is_dir));
-    if (is_dir) {
-      iter = files->erase(iter);
-    } else {
-      ++iter;
-    }
-  }
-
-  return Status::Success;
-}
-
-Status
-GCSFileSystem::ReadTextFile(const std::string& path, std::string* contents)
-{
-  bool exists;
-  RETURN_IF_ERROR(FileExists(path, &exists));
-
-  if (!exists) {
-    return Status(Status::Code::INTERNAL, "File does not exist at " + path);
-  }
-
-  std::string bucket, object;
-  ParsePath(path, &bucket, &object);
-
-  gcs::ObjectReadStream stream = client_->ReadObject(bucket, object);
-
-  if (!stream) {
-    return Status(
-        Status::Code::INTERNAL, "Failed to open object read stream for " +
-                                    path + " : " + stream.status().message());
-  }
-
-  std::string data = "";
-  char c;
-  while (stream.get(c)) {
-    data += c;
-  }
-
-  *contents = data;
-
-  return Status::Success;
-}
-
-Status
-GCSFileSystem::LocalizePath(
-    const std::string& path, std::shared_ptr<LocalizedPath>* localized)
-{
-  bool exists;
-  RETURN_IF_ERROR(FileExists(path, &exists));
-  if (!exists) {
-    return Status(
-        Status::Code::INTERNAL, "directory or file does not exist at " + path);
-  }
-
-  bool is_dir;
-  RETURN_IF_ERROR(IsDirectory(path, &is_dir));
-  if (!is_dir) {
-    return Status(
-        Status::Code::UNSUPPORTED,
-        "GCS file localization not yet implemented " + path);
-  }
-
-  std::string tmp_folder;
-  RETURN_IF_ERROR(
-      triton::core::MakeTemporaryDirectory(FileSystemType::LOCAL, &tmp_folder));
-
-  localized->reset(new LocalizedPath(path, tmp_folder));
-
-  std::set<std::string> contents, filenames;
-  RETURN_IF_ERROR(GetDirectoryContents(path, &filenames));
-  for (auto itr = filenames.begin(); itr != filenames.end(); ++itr) {
-    contents.insert(JoinPath({path, *itr}));
-  }
-
-  while (contents.size() != 0) {
-    std::set<std::string> tmp_contents = contents;
-    contents.clear();
-    for (auto iter = tmp_contents.begin(); iter != tmp_contents.end(); ++iter) {
-      bool is_subdir;
-      std::string gcs_fpath = *iter;
-      std::string gcs_removed_path = gcs_fpath.substr(path.size());
-      std::string local_fpath =
-          JoinPath({(*localized)->Path(), gcs_removed_path});
-      RETURN_IF_ERROR(IsDirectory(gcs_fpath, &is_subdir));
-      if (is_subdir) {
-        // Create local mirror of sub-directories
-#ifdef _WIN32
-        int status = mkdir(const_cast<char*>(local_fpath.c_str()));
-#else
-        int status = mkdir(
-            const_cast<char*>(local_fpath.c_str()),
-            S_IRUSR | S_IWUSR | S_IXUSR);
-#endif
-        if (status == -1) {
-          return Status(
-              Status::Code::INTERNAL,
-              "Failed to create local folder: " + local_fpath +
-                  ", errno:" + strerror(errno));
-        }
-
-        // Add sub-directories and deeper files to contents
-        std::set<std::string> subdir_contents;
-        RETURN_IF_ERROR(GetDirectoryContents(gcs_fpath, &subdir_contents));
-        for (auto itr = subdir_contents.begin(); itr != subdir_contents.end();
-             ++itr) {
-          contents.insert(JoinPath({gcs_fpath, *itr}));
-        }
-      } else {
-        // Create local copy of file
-        std::string file_bucket, file_object;
-        RETURN_IF_ERROR(ParsePath(gcs_fpath, &file_bucket, &file_object));
-
-        // Send a request to read the object
-        gcs::ObjectReadStream filestream =
-            client_->ReadObject(file_bucket, file_object);
-        if (!filestream) {
-          return Status(
-              Status::Code::INTERNAL, "Failed to get object at " + *iter +
-                                          " : " +
-                                          filestream.status().message());
-        }
-
-        std::string gcs_removed_path = (*iter).substr(path.size());
-        std::string local_file_path =
-            JoinPath({(*localized)->Path(), gcs_removed_path});
-        std::ofstream output_file(local_file_path.c_str(), std::ios::binary);
-        output_file << filestream.rdbuf();
-        output_file.close();
-      }
-    }
-  }
-
-  return Status::Success;
-}
-
-Status
-GCSFileSystem::WriteTextFile(
-    const std::string& path, const std::string& contents)
-{
-  return Status(
-      Status::Code::UNSUPPORTED,
-      "Write text file operation not yet implemented " + path);
-}
-
-Status
-GCSFileSystem::WriteBinaryFile(
-    const std::string& path, const char* contents, const size_t content_len)
-{
-  return Status(
-      Status::Code::UNSUPPORTED,
-      "Write text file operation not yet implemented " + path);
-}
-
-Status
-GCSFileSystem::MakeDirectory(const std::string& dir, const bool recursive)
-{
-  return Status(
-      Status::Code::UNSUPPORTED,
-      "Make temporary directory operation not yet implemented");
-}
-
-Status
-GCSFileSystem::MakeTemporaryDirectory(std::string* temp_dir)
-{
-  return Status(
-      Status::Code::UNSUPPORTED,
-      "Make temporary directory operation not yet implemented");
-}
-
-Status
-GCSFileSystem::DeletePath(const std::string& path)
-{
-  return Status(
-      Status::Code::UNSUPPORTED, "Delete path operation not yet implemented");
-}
-
-#endif  // TRITON_ENABLE_GCS
-
-
-#ifdef TRITON_ENABLE_AZURE_STORAGE
-
-namespace as = azure::storage_lite;
-const std::string AS_URL_PATTERN = "as://([^/]+)/([^/?]+)(?:/([^?]*))?(\\?.*)?";
-
-struct ASCredential {
-  std::string account_str_;
-  std::string account_key_;
-
-  ASCredential();  // from env var
-  ASCredential(triton::common::TritonJson::Value& cred_json);
-};
-
-ASCredential::ASCredential()
-{
-  const auto to_str = [](const char* s) -> std::string {
-    return (s != nullptr ? std::string(s) : "");
-  };
-  const char* account_str = std::getenv("AZURE_STORAGE_ACCOUNT");
-  const char* account_key = std::getenv("AZURE_STORAGE_KEY");
-  account_str_ = to_str(account_str);
-  account_key_ = to_str(account_key);
-}
-
-ASCredential::ASCredential(triton::common::TritonJson::Value& cred_json)
-{
-  triton::common::TritonJson::Value account_str_json, account_key_json;
-  if (cred_json.Find("account_str", &account_str_json))
-    account_str_json.AsString(&account_str_);
-  if (cred_json.Find("account_key", &account_key_json))
-    account_key_json.AsString(&account_key_);
-}
-
-class ASFileSystem : public FileSystem {
- public:
-  ASFileSystem(const std::string& path, const ASCredential& as_cred);
-  Status CheckClient();
-  // unify with S3 interface
-  Status CheckClient(const std::string& path) { return CheckClient(); }
-
-  Status FileExists(const std::string& path, bool* exists) override;
-  Status IsDirectory(const std::string& path, bool* is_dir) override;
-  Status FileModificationTime(
-      const std::string& path, int64_t* mtime_ns) override;
-  Status GetDirectoryContents(
-      const std::string& path, std::set<std::string>* contents) override;
-  Status GetDirectorySubdirs(
-      const std::string& path, std::set<std::string>* subdirs) override;
-  Status GetDirectoryFiles(
-      const std::string& path, std::set<std::string>* files) override;
-  Status ReadTextFile(const std::string& path, std::string* contents) override;
-  Status LocalizePath(
-      const std::string& path,
-      std::shared_ptr<LocalizedPath>* localized) override;
-  Status WriteTextFile(
-      const std::string& path, const std::string& contents) override;
-  Status WriteBinaryFile(
-      const std::string& path, const char* contents,
-      const size_t content_len) override;
-  Status MakeDirectory(const std::string& dir, const bool recursive) override;
-  Status MakeTemporaryDirectory(std::string* temp_dir) override;
-  Status DeletePath(const std::string& path) override;
-
- private:
-  Status ParsePath(
-      const std::string& path, std::string* bucket, std::string* object);
-  std::shared_ptr<as::blob_client> client_;
-
-  Status ListDirectory(
-      const std::string& path, const std::string& dir_path,
-      std::function<
-          Status(const as::list_blobs_segmented_item&, const std::string&)>
-          func);
-
-  Status DownloadFolder(
-      const std::string& container, const std::string& path,
-      const std::string& dest);
-  re2::RE2 as_regex_;
-};
-
-Status
-ASFileSystem::ParsePath(
-    const std::string& path, std::string* container, std::string* object)
-{
-  std::string host_name, query;
-  if (!RE2::FullMatch(path, as_regex_, &host_name, container, object, &query)) {
-    return Status(
-        Status::Code::INTERNAL, "Invalid azure storage path: " + path);
-  }
-  return Status::Success;
-}
-
-ASFileSystem::ASFileSystem(const std::string& path, const ASCredential& as_cred)
-    : as_regex_(AS_URL_PATTERN)
-{
-  std::shared_ptr<as::storage_account> account = nullptr;
-  std::string host_name, container, blob_path, query;
-  if (RE2::FullMatch(
-          path, as_regex_, &host_name, &container, &blob_path, &query)) {
-    size_t pos = host_name.rfind(".blob.core.windows.net");
-    std::string account_name;
-    if (as_cred.account_str_.empty()) {
-      if (pos != std::string::npos) {
-        account_name = host_name.substr(0, pos);
-      } else {
-        account_name = host_name;
-      }
-    } else {
-      account_name = as_cred.account_str_;
-    }
-
-    std::shared_ptr<as::storage_credential> cred;
-    if (!as_cred.account_key_.empty()) {
-      // Shared Key
-      cred = std::make_shared<as::shared_key_credential>(
-          account_name, as_cred.account_key_);
-    } else {
-      cred = std::make_shared<as::anonymous_credential>();
-    }
-    account = std::make_shared<as::storage_account>(
-        account_name, cred, /* use_https */ true);
-    client_ =
-        std::make_shared<as::blob_client>(account, /*max_concurrency*/ 16);
-  }
-}
-
-Status
-ASFileSystem::CheckClient()
-{
-  if (client_ == nullptr) {
-    return Status(
-        Status::Code::INTERNAL,
-        "Unable to create Azure filesystem client. Check account credentials.");
-  }
-  return Status::Success;
-}
-
-
-Status
-ASFileSystem::FileModificationTime(const std::string& path, int64_t* mtime_ns)
-{
-  as::blob_client_wrapper bc(client_);
-  std::string container, object_path;
-  RETURN_IF_ERROR(ParsePath(path, &container, &object_path));
-
-  auto blobProperty = bc.get_blob_property(container, object_path);
-  if (errno != 0) {
-    return Status(
-        Status::Code::INTERNAL, "Unable to get blob property for file at " +
-                                    path + ", errno:" + strerror(errno));
-  }
-
-  auto time =
-      std::chrono::system_clock::from_time_t(blobProperty.last_modified);
-  auto update_time =
-      std::chrono::time_point_cast<std::chrono::nanoseconds>(time)
-          .time_since_epoch()
-          .count();
-
-  *mtime_ns = update_time;
-  return Status::Success;
-};
-
-Status
-ASFileSystem::ListDirectory(
-    const std::string& container, const std::string& dir_path,
-    std::function<
-        Status(const as::list_blobs_segmented_item&, const std::string&)>
-        func)
-{
-  as::blob_client_wrapper bc(client_);
-
-  // Append a slash to make it easier to list contents
-  std::string full_dir = AppendSlash(dir_path);
-  auto blobs = bc.list_blobs_segmented(container, "/", "", full_dir);
-  if (errno != 0) {
-    return Status(
-        Status::Code::INTERNAL, "Failed to get contents of directory " +
-                                    dir_path + ", errno:" + strerror(errno));
-  }
-
-  for (auto&& item : blobs.blobs) {
-    std::string name = item.name;
-    int item_start = name.find(full_dir) + full_dir.size();
-    int item_end = name.find("/", item_start);
-    // Let set take care of subdirectory contents
-    std::string subfile = name.substr(item_start, item_end - item_start);
-    auto status = func(item, subfile);
-    if (!status.IsOk()) {
-      return status;
-    }
-  }
-  return Status::Success;
-}
-
-Status
-ASFileSystem::GetDirectoryContents(
-    const std::string& path, std::set<std::string>* contents)
-{
-  auto func = [&](const as::list_blobs_segmented_item& item,
-                  const std::string& dir) {
-    contents->insert(dir);
-    return Status::Success;
-  };
-  std::string container, dir_path;
-  RETURN_IF_ERROR(ParsePath(path, &container, &dir_path));
-  return ListDirectory(container, dir_path, func);
-}
-
-Status
-ASFileSystem::GetDirectorySubdirs(
-    const std::string& path, std::set<std::string>* subdirs)
-{
-  auto func = [&](const as::list_blobs_segmented_item& item,
-                  const std::string& dir) {
-    if (item.is_directory) {
-      subdirs->insert(dir);
-    }
-    return Status::Success;
-  };
-  std::string container, dir_path;
-  RETURN_IF_ERROR(ParsePath(path, &container, &dir_path));
-  return ListDirectory(container, dir_path, func);
-}
-
-Status
-ASFileSystem::GetDirectoryFiles(
-    const std::string& path, std::set<std::string>* files)
-{
-  auto func = [&](const as::list_blobs_segmented_item& item,
-                  const std::string& file) {
-    if (!item.is_directory) {
-      files->insert(file);
-    }
-    return Status::Success;
-  };
-  std::string container, dir_path;
-  RETURN_IF_ERROR(ParsePath(path, &container, &dir_path));
-  return ListDirectory(container, dir_path, func);
-}
-
-Status
-ASFileSystem::IsDirectory(const std::string& path, bool* is_dir)
-{
-  *is_dir = false;
-  std::string container, object_path;
-  RETURN_IF_ERROR(ParsePath(path, &container, &object_path));
-
-  as::blob_client_wrapper bc(client_);
-  auto blobs = bc.list_blobs_segmented(container, "/", "", object_path, 1);
-  if (errno != 0) {
-    return Status(
-        Status::Code::INTERNAL, "Failed to check if directory at " + path +
-                                    ", errno:" + strerror(errno));
-  }
-  *is_dir = blobs.blobs.size() > 0;
-
-  return Status::Success;
-};
-
-Status
-ASFileSystem::ReadTextFile(const std::string& path, std::string* contents)
-{
-  as::blob_client_wrapper bc(client_);
-  std::string container, object_path;
-  RETURN_IF_ERROR(ParsePath(path, &container, &object_path));
-  using namespace azure::storage_lite;
-  std::ostringstream out_stream;
-  bc.download_blob_to_stream(container, object_path, 0, 0, out_stream);
-  if (errno != 0) {
-    return Status(
-        Status::Code::INTERNAL, "Failed to fetch file stream at " + path +
-                                    ", errno:" + strerror(errno));
-  }
-  *contents = out_stream.str();
-
-  return Status::Success;
-}
-
-Status
-ASFileSystem::FileExists(const std::string& path, bool* exists)
-{
-  *exists = false;
-
-  std::string container, object;
-  RETURN_IF_ERROR(ParsePath(path, &container, &object));
-  as::blob_client_wrapper bc(client_);
-  auto blobs = bc.list_blobs_segmented(container, "/", "", object, 1);
-  if (errno != 0) {
-    return Status(
-        Status::Code::INTERNAL, "Failed to check if file exists at " + path +
-                                    ", errno:" + strerror(errno));
-  }
-  if (blobs.blobs.size() > 0) {
-    *exists = true;
-  }
-  return Status::Success;
-}
-
-Status
-ASFileSystem::DownloadFolder(
-    const std::string& container, const std::string& path,
-    const std::string& dest)
-{
-  as::blob_client_wrapper bc(client_);
-  auto func = [&](const as::list_blobs_segmented_item& item,
-                  const std::string& dir) {
-    auto local_path = JoinPath({dest, dir});
-    auto blob_path = JoinPath({path, dir});
-    if (item.is_directory) {
-      int status = mkdir(
-          const_cast<char*>(local_path.c_str()), S_IRUSR | S_IWUSR | S_IXUSR);
-      if (status == -1) {
-        return Status(
-            Status::Code::INTERNAL,
-            "Failed to create local folder: " + local_path +
-                ", errno:" + strerror(errno));
-      }
-      auto ret = DownloadFolder(container, blob_path, local_path);
-      if (!ret.IsOk()) {
-        return ret;
-      }
-    } else {
-      time_t last_modified;
-      bc.download_blob_to_file(container, blob_path, local_path, last_modified);
-      if (errno != 0) {
-        return Status(
-            Status::Code::INTERNAL, "Failed to download file at " + blob_path +
-                                        ", errno:" + strerror(errno));
-      }
-    }
-    return Status::Success;
-  };
-  return ListDirectory(container, path, func);
-}
-
-Status
-ASFileSystem::LocalizePath(
-    const std::string& path, std::shared_ptr<LocalizedPath>* localized)
-{
-  bool exists;
-  RETURN_IF_ERROR(FileExists(path, &exists));
-  if (!exists) {
-    return Status(
-        Status::Code::INTERNAL, "directory or file does not exist at " + path);
-  }
-
-  bool is_dir;
-  RETURN_IF_ERROR(IsDirectory(path, &is_dir));
-  if (!is_dir) {
-    return Status(
-        Status::Code::UNSUPPORTED,
-        "AS file localization not yet implemented " + path);
-  }
-
-  std::string folder_template = "/tmp/folderXXXXXX";
-  char* tmp_folder = mkdtemp(const_cast<char*>(folder_template.c_str()));
-  if (tmp_folder == nullptr) {
-    return Status(
-        Status::Code::INTERNAL,
-        "Failed to create local temp folder: " + folder_template +
-            ", errno:" + strerror(errno));
-  }
-  localized->reset(new LocalizedPath(path, tmp_folder));
-
-  std::string dest(folder_template);
-
-  as::blob_client_wrapper bc(client_);
-
-  std::string container, object;
-  RETURN_IF_ERROR(ParsePath(path, &container, &object));
-  return DownloadFolder(container, object, dest);
-}
-
-Status
-ASFileSystem::WriteTextFile(
-    const std::string& path, const std::string& contents)
-{
-  std::stringstream ss(contents);
-  std::istream is(ss.rdbuf());
-  std::string container, object;
-  RETURN_IF_ERROR(ParsePath(path, &container, &object));
-  std::vector<std::pair<std::string, std::string>> metadata;
-  auto ret =
-      client_->upload_block_blob_from_stream(container, object, is, metadata)
-          .get();
-  if (!ret.success()) {
-    return Status(
-        Status::Code::INTERNAL,
-        "Failed to upload blob, Error: " + ret.error().code + ", " +
-            ret.error().code_name);
-  }
-  return Status::Success;
-}
-
-Status
-ASFileSystem::WriteBinaryFile(
-    const std::string& path, const char* contents, const size_t content_len)
-{
-  return Status(
-      Status::Code::UNSUPPORTED,
-      "Write text file operation not yet implemented " + path);
-}
-
-Status
-ASFileSystem::MakeDirectory(const std::string& dir, const bool recursive)
-{
-  return Status(
-      Status::Code::UNSUPPORTED,
-      "Make directory operation not yet implemented");
-}
-
-Status
-ASFileSystem::MakeTemporaryDirectory(std::string* temp_dir)
-{
-  return Status(
-      Status::Code::UNSUPPORTED,
-      "Make temporary directory operation not yet implemented");
-}
-
-Status
-ASFileSystem::DeletePath(const std::string& path)
-{
-  return Status(
-      Status::Code::UNSUPPORTED, "Delete path operation not yet implemented");
-}
-
-#endif  // TRITON_ENABLE_AZURE_STORAGE
-
-
-#ifdef TRITON_ENABLE_S3
-
-namespace s3 = Aws::S3;
-
-struct S3Credential {
-  std::string secret_key_;
-  std::string key_id_;
-  std::string region_;
-  std::string session_token_;
-  std::string profile_name_;
-
-  S3Credential();  // from env var
-  S3Credential(triton::common::TritonJson::Value& cred_json);
-};
-
-S3Credential::S3Credential()
-{
-  const auto to_str = [](const char* s) -> std::string {
-    return (s != nullptr ? std::string(s) : "");
-  };
-  const char* secret_key = std::getenv("AWS_SECRET_ACCESS_KEY");
-  const char* key_id = std::getenv("AWS_ACCESS_KEY_ID");
-  const char* region = std::getenv("AWS_DEFAULT_REGION");
-  const char* session_token = std::getenv("AWS_SESSION_TOKEN");
-  const char* profile = std::getenv("AWS_PROFILE");
-  secret_key_ = to_str(secret_key);
-  key_id_ = to_str(key_id);
-  region_ = to_str(region);
-  session_token_ = to_str(session_token);
-  profile_name_ = to_str(profile);
-}
-
-S3Credential::S3Credential(triton::common::TritonJson::Value& cred_json)
-{
-  triton::common::TritonJson::Value secret_key_json, key_id_json, region_json,
-      session_token_json, profile_json;
-  if (cred_json.Find("secret_key", &secret_key_json))
-    secret_key_json.AsString(&secret_key_);
-  if (cred_json.Find("key_id", &key_id_json))
-    key_id_json.AsString(&key_id_);
-  if (cred_json.Find("region", &region_json))
-    region_json.AsString(&region_);
-  if (cred_json.Find("session_token", &session_token_json))
-    session_token_json.AsString(&session_token_);
-  if (cred_json.Find("profile", &profile_json))
-    profile_json.AsString(&profile_name_);
-}
-
-class S3FileSystem : public FileSystem {
- public:
-  S3FileSystem(const std::string& s3_path, const S3Credential& s3_cred);
-  Status CheckClient(const std::string& s3_path);
-
-  Status FileExists(const std::string& path, bool* exists) override;
-  Status IsDirectory(const std::string& path, bool* is_dir) override;
-  Status FileModificationTime(
-      const std::string& path, int64_t* mtime_ns) override;
-  Status GetDirectoryContents(
-      const std::string& path, std::set<std::string>* contents) override;
-  Status GetDirectorySubdirs(
-      const std::string& path, std::set<std::string>* subdirs) override;
-  Status GetDirectoryFiles(
-      const std::string& path, std::set<std::string>* files) override;
-  Status ReadTextFile(const std::string& path, std::string* contents) override;
-  Status LocalizePath(
-      const std::string& path,
-      std::shared_ptr<LocalizedPath>* localized) override;
-  Status WriteTextFile(
-      const std::string& path, const std::string& contents) override;
-  Status WriteBinaryFile(
-      const std::string& path, const char* contents,
-      const size_t content_len) override;
-  Status MakeDirectory(const std::string& dir, const bool recursive) override;
-  Status MakeTemporaryDirectory(std::string* temp_dir) override;
-  Status DeletePath(const std::string& path) override;
-
- private:
-  Status ParsePath(
-      const std::string& path, std::string* bucket, std::string* object);
-  Status CleanPath(const std::string& s3_path, std::string* clean_path);
-  std::unique_ptr<s3::S3Client> client_;  // init after Aws::InitAPI is called
-  re2::RE2 s3_regex_;
-};
-
-Status
-S3FileSystem::ParsePath(
-    const std::string& path, std::string* bucket, std::string* object)
-{
-  // Cleanup extra slashes
-  std::string clean_path;
-  RETURN_IF_ERROR(CleanPath(path, &clean_path));
-
-  // Get the bucket name and the object path. Return error if path is malformed
-  std::string protocol, host_name, host_port;
-  if (!RE2::FullMatch(
-          clean_path, s3_regex_, &protocol, &host_name, &host_port, bucket,
-          object)) {
-    int bucket_start = clean_path.find("s3://") + strlen("s3://");
-    int bucket_end = clean_path.find("/", bucket_start);
-
-    // If there isn't a slash, the address has only the bucket
-    if (bucket_end > bucket_start) {
-      *bucket = clean_path.substr(bucket_start, bucket_end - bucket_start);
-      *object = clean_path.substr(bucket_end + 1);
-    } else {
-      *bucket = clean_path.substr(bucket_start);
-      *object = "";
-    }
-  } else {
-    // Erase leading '/' that is left behind in object name
-    if ((*object)[0] == '/') {
-      object->erase(0, 1);
-    }
-  }
-
-  if (bucket->empty()) {
-    return Status(
-        Status::Code::INTERNAL, "No bucket name found in path: " + path);
-  }
-
-  return Status::Success;
-}
-
-Status
-S3FileSystem::CleanPath(const std::string& s3_path, std::string* clean_path)
-{
-  // Must handle paths with s3 prefix
-  size_t start = s3_path.find("s3://");
-  std::string path = "";
-  if (start != std::string::npos) {
-    path = s3_path.substr(start + strlen("s3://"));
-    *clean_path = "s3://";
-  } else {
-    path = s3_path;
-    *clean_path = "";
-  }
-
-  // Must handle paths with https:// or http:// prefix
-  size_t https_start = path.find("https://");
-  if (https_start != std::string::npos) {
-    path = path.substr(https_start + strlen("https://"));
-    *clean_path += "https://";
-  } else {
-    size_t http_start = path.find("http://");
-    if (http_start != std::string::npos) {
-      path = path.substr(http_start + strlen("http://"));
-      *clean_path += "http://";
-    }
-  }
-
-  // Remove trailing slashes
-  size_t rtrim_length = path.find_last_not_of('/');
-  if (rtrim_length == std::string::npos) {
-    return Status(
-        Status::Code::INVALID_ARG, "Invalid bucket name: '" + path + "'");
-  }
-
-  // Remove leading slashes
-  size_t ltrim_length = path.find_first_not_of('/');
-  if (ltrim_length == std::string::npos) {
-    return Status(
-        Status::Code::INVALID_ARG, "Invalid bucket name: '" + path + "'");
-  }
-
-  // Remove extra internal slashes
-  std::string true_path = path.substr(ltrim_length, rtrim_length + 1);
-  std::vector<int> slash_locations;
-  bool previous_slash = false;
-  for (size_t i = 0; i < true_path.size(); i++) {
-    if (true_path[i] == '/') {
-      if (!previous_slash) {
-        *clean_path += true_path[i];
-      }
-      previous_slash = true;
-    } else {
-      *clean_path += true_path[i];
-      previous_slash = false;
-    }
-  }
-
-  return Status::Success;
-}
-
-S3FileSystem::S3FileSystem(
-    const std::string& s3_path, const S3Credential& s3_cred)
-    : s3_regex_(
-          "s3://(http://|https://|)([0-9a-zA-Z\\-.]+):([0-9]+)/"
-          "([0-9a-z.\\-]+)(((/[0-9a-zA-Z.\\-_]+)*)?)")
-{
-  // init aws api if not already
-  Aws::SDKOptions options;
-  static std::once_flag onceFlag;
-  std::call_once(onceFlag, [&options] { Aws::InitAPI(options); });
-
-  Aws::Client::ClientConfiguration config;
-  Aws::Auth::AWSCredentials credentials;
-
-  // check vars for S3 credentials -> aws profile -> default
-  if (!s3_cred.secret_key_.empty() && !s3_cred.key_id_.empty()) {
-    credentials.SetAWSAccessKeyId(s3_cred.key_id_.c_str());
-    credentials.SetAWSSecretKey(s3_cred.secret_key_.c_str());
-    if (!s3_cred.session_token_.empty()) {
-      credentials.SetSessionToken(s3_cred.session_token_.c_str());
-    }
-    config = Aws::Client::ClientConfiguration();
-    if (!s3_cred.region_.empty()) {
-      config.region = s3_cred.region_.c_str();
-    }
-  } else if (!s3_cred.profile_name_.empty()) {
-    config = Aws::Client::ClientConfiguration(s3_cred.profile_name_.c_str());
-  } else {
-    config = Aws::Client::ClientConfiguration("default");
-  }
-
-  // Cleanup extra slashes
-  std::string clean_path;
-  LOG_STATUS_ERROR(CleanPath(s3_path, &clean_path), "failed to parse S3 path");
-
-  std::string protocol, host_name, host_port, bucket, object;
-  if (RE2::FullMatch(
-          clean_path, s3_regex_, &protocol, &host_name, &host_port, &bucket,
-          &object)) {
-    config.endpointOverride = Aws::String(host_name + ":" + host_port);
-    if (protocol == "https://") {
-      config.scheme = Aws::Http::Scheme::HTTPS;
-    } else {
-      config.scheme = Aws::Http::Scheme::HTTP;
-    }
-  }
-
-  if (!s3_cred.secret_key_.empty() && !s3_cred.key_id_.empty()) {
-    client_ = std::make_unique<s3::S3Client>(
-        credentials, config,
-        Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy::Never,
-        /*useVirtualAdressing*/ false);
-  } else {
-    client_ = std::make_unique<s3::S3Client>(
-        config, Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy::Never,
-        /*useVirtualAdressing*/ false);
-  }
-}
-
-Status
-S3FileSystem::CheckClient(const std::string& s3_path)
-{
-  std::string bucket, object_path;
-  RETURN_IF_ERROR(ParsePath(s3_path, &bucket, &object_path));
-  // check if can connect to the bucket
-  s3::Model::HeadBucketRequest head_request;
-  head_request.WithBucket(bucket.c_str());
-  if (!client_->HeadBucket(head_request).IsSuccess()) {
-    return Status(
-        Status::Code::INTERNAL,
-        "Unable to create S3 filesystem client. Check account credentials.");
-  }
-  return Status::Success;
-}
-
-Status
-S3FileSystem::FileExists(const std::string& path, bool* exists)
-{
-  *exists = false;
-
-  // S3 doesn't make objects for directories, so it could still be a directory
-  bool is_dir;
-  RETURN_IF_ERROR(IsDirectory(path, &is_dir));
-  if (is_dir) {
-    *exists = is_dir;
-    return Status::Success;
-  }
-
-  std::string bucket, object;
-  RETURN_IF_ERROR(ParsePath(path, &bucket, &object));
-
-  // Construct request for object metadata
-  s3::Model::HeadObjectRequest head_request;
-  head_request.SetBucket(bucket.c_str());
-  head_request.SetKey(object.c_str());
-
-  auto head_object_outcome = client_->HeadObject(head_request);
-  if (!head_object_outcome.IsSuccess()) {
-    if (head_object_outcome.GetError().GetErrorType() !=
-        s3::S3Errors::RESOURCE_NOT_FOUND) {
-      return Status(
-          Status::Code::INTERNAL,
-          "Could not get MetaData for object at " + path +
-              " due to exception: " +
-              head_object_outcome.GetError().GetExceptionName() +
-              ", error message: " +
-              head_object_outcome.GetError().GetMessage());
-    }
-  } else {
-    *exists = true;
-  }
-
-  return Status::Success;
-}
-
-Status
-S3FileSystem::IsDirectory(const std::string& path, bool* is_dir)
-{
-  *is_dir = false;
-  std::string bucket, object_path;
-  RETURN_IF_ERROR(ParsePath(path, &bucket, &object_path));
-
-  // Check if the bucket exists
-  s3::Model::HeadBucketRequest head_request;
-  head_request.WithBucket(bucket.c_str());
-
-  auto head_bucket_outcome = client_->HeadBucket(head_request);
-  if (!head_bucket_outcome.IsSuccess()) {
-    return Status(
-        Status::Code::INTERNAL,
-        "Could not get MetaData for bucket with name " + bucket +
-            " due to exception: " +
-            head_bucket_outcome.GetError().GetExceptionName() +
-            ", error message: " + head_bucket_outcome.GetError().GetMessage());
-  }
-
-  // Root case - bucket exists and object path is empty
-  if (object_path.empty()) {
-    *is_dir = true;
-    return Status::Success;
-  }
-
-  // List the objects in the bucket
-  s3::Model::ListObjectsRequest list_objects_request;
-  list_objects_request.SetBucket(bucket.c_str());
-  list_objects_request.SetPrefix(AppendSlash(object_path).c_str());
-  auto list_objects_outcome = client_->ListObjects(list_objects_request);
-
-  if (list_objects_outcome.IsSuccess()) {
-    *is_dir = !list_objects_outcome.GetResult().GetContents().empty();
-  } else {
-    return Status(
-        Status::Code::INTERNAL,
-        "Failed to list objects with prefix " + path + " due to exception: " +
-            list_objects_outcome.GetError().GetExceptionName() +
-            ", error message: " + list_objects_outcome.GetError().GetMessage());
-  }
-  return Status::Success;
-}
-
-Status
-S3FileSystem::FileModificationTime(const std::string& path, int64_t* mtime_ns)
-{
-  // We don't need to worry about the case when this is a directory
-  bool is_dir;
-  RETURN_IF_ERROR(IsDirectory(path, &is_dir));
-  if (is_dir) {
-    *mtime_ns = 0;
-    return Status::Success;
-  }
-
-  std::string bucket, object;
-  RETURN_IF_ERROR(ParsePath(path, &bucket, &object));
-
-  // Send a request for the objects metadata
-  s3::Model::HeadObjectRequest head_request;
-  head_request.SetBucket(bucket.c_str());
-  head_request.SetKey(object.c_str());
-
-  // If request succeeds, copy over the modification time
-  auto head_object_outcome = client_->HeadObject(head_request);
-  if (head_object_outcome.IsSuccess()) {
-    *mtime_ns = head_object_outcome.GetResult().GetLastModified().Millis() *
-                NANOS_PER_MILLIS;
-  } else {
-    return Status(
-        Status::Code::INTERNAL,
-        "Failed to get modification time for object at " + path +
-            " due to exception: " +
-            head_object_outcome.GetError().GetExceptionName() +
-            ", error message: " + head_object_outcome.GetError().GetMessage());
-  }
-  return Status::Success;
-}
-
-Status
-S3FileSystem::GetDirectoryContents(
-    const std::string& path, std::set<std::string>* contents)
-{
-  // Parse bucket and dir_path
-  std::string bucket, dir_path, full_dir;
-  RETURN_IF_ERROR(ParsePath(path, &bucket, &dir_path));
-  std::string true_path = "s3://" + bucket + '/' + dir_path;
-
-  // Capture the full path to facilitate content listing
-  full_dir = AppendSlash(dir_path);
-
-  // Issue request for objects with prefix
-  s3::Model::ListObjectsRequest objects_request;
-  objects_request.SetBucket(bucket.c_str());
-  objects_request.SetPrefix(full_dir.c_str());
-  auto list_objects_outcome = client_->ListObjects(objects_request);
-
-  if (list_objects_outcome.IsSuccess()) {
-    Aws::Vector<Aws::S3::Model::Object> object_list =
-        list_objects_outcome.GetResult().GetContents();
-    for (auto const& s3_object : object_list) {
-      // In the case of empty directories, the directory itself will appear here
-      if (s3_object.GetKey().c_str() == full_dir) {
-        continue;
-      }
-
-      // We have to make sure that subdirectory contents do not appear here
-      std::string name(s3_object.GetKey().c_str());
-      int item_start = name.find(full_dir) + full_dir.size();
-      // S3 response prepends parent directory name
-      int item_end = name.find("/", item_start);
-
-      // Let set take care of subdirectory contents
-      std::string item = name.substr(item_start, item_end - item_start);
-      contents->insert(item);
-    }
-  } else {
-    return Status(
-        Status::Code::INTERNAL,
-        "Could not list contents of directory at " + true_path +
-            " due to exception: " +
-            list_objects_outcome.GetError().GetExceptionName() +
-            ", error message: " + list_objects_outcome.GetError().GetMessage());
-  }
-  return Status::Success;
-}
-
-Status
-S3FileSystem::GetDirectorySubdirs(
-    const std::string& path, std::set<std::string>* subdirs)
-{
-  // Parse bucket and dir_path
-  std::string bucket, dir_path;
-  RETURN_IF_ERROR(ParsePath(path, &bucket, &dir_path));
-  std::string true_path = "s3://" + bucket + '/' + dir_path;
-
-  RETURN_IF_ERROR(GetDirectoryContents(true_path, subdirs));
-
-  // Erase non-directory entries...
-  for (auto iter = subdirs->begin(); iter != subdirs->end();) {
-    bool is_dir;
-    RETURN_IF_ERROR(IsDirectory(JoinPath({true_path, *iter}), &is_dir));
-    if (!is_dir) {
-      iter = subdirs->erase(iter);
-    } else {
-      ++iter;
-    }
-  }
-
-  return Status::Success;
-}
-Status
-S3FileSystem::GetDirectoryFiles(
-    const std::string& path, std::set<std::string>* files)
-{
-  // Parse bucket and dir_path
-  std::string bucket, dir_path;
-  RETURN_IF_ERROR(ParsePath(path, &bucket, &dir_path));
-  std::string true_path = "s3://" + bucket + '/' + dir_path;
-  RETURN_IF_ERROR(GetDirectoryContents(true_path, files));
-
-  // Erase directory entries...
-  for (auto iter = files->begin(); iter != files->end();) {
-    bool is_dir;
-    RETURN_IF_ERROR(IsDirectory(JoinPath({true_path, *iter}), &is_dir));
-    if (is_dir) {
-      iter = files->erase(iter);
-    } else {
-      ++iter;
-    }
-  }
-
-  return Status::Success;
-}
-
-Status
-S3FileSystem::ReadTextFile(const std::string& path, std::string* contents)
-{
-  bool exists;
-  RETURN_IF_ERROR(FileExists(path, &exists));
-
-  if (!exists) {
-    return Status(Status::Code::INTERNAL, "File does not exist at " + path);
-  }
-
-  std::string bucket, object;
-  RETURN_IF_ERROR(ParsePath(path, &bucket, &object));
-
-  // Send a request for the objects metadata
-  s3::Model::GetObjectRequest object_request;
-  object_request.SetBucket(bucket.c_str());
-  object_request.SetKey(object.c_str());
-
-  auto get_object_outcome = client_->GetObject(object_request);
-  if (get_object_outcome.IsSuccess()) {
-    auto& object_result = get_object_outcome.GetResultWithOwnership().GetBody();
-
-    std::string data = "";
-    char c;
-    while (object_result.get(c)) {
-      data += c;
-    }
-
-    *contents = data;
-  } else {
-    return Status(
-        Status::Code::INTERNAL,
-        "Failed to get object at " + path + " due to exception: " +
-            get_object_outcome.GetError().GetExceptionName() +
-            ", error message: " + get_object_outcome.GetError().GetMessage());
-  }
-
-  return Status::Success;
-}
-
-Status
-S3FileSystem::LocalizePath(
-    const std::string& path, std::shared_ptr<LocalizedPath>* localized)
-{
-  // Check if the directory or file exists
-  bool exists;
-  RETURN_IF_ERROR(FileExists(path, &exists));
-  if (!exists) {
-    return Status(
-        Status::Code::INTERNAL, "directory or file does not exist at " + path);
-  }
-
-  // Cleanup extra slashes
-  std::string clean_path;
-  RETURN_IF_ERROR(CleanPath(path, &clean_path));
-
-  // Remove protocol and host name and port
-  std::string effective_path, protocol, host_name, host_port, bucket, object;
-  if (RE2::FullMatch(
-          clean_path, s3_regex_, &protocol, &host_name, &host_port, &bucket,
-          &object)) {
-    effective_path = "s3://" + bucket + object;
-  } else {
-    effective_path = path;
-  }
-
-  // Create temporary directory
-  std::string tmp_folder;
-  RETURN_IF_ERROR(
-      triton::core::MakeTemporaryDirectory(FileSystemType::LOCAL, &tmp_folder));
-
-  // Specify contents to be downloaded
-  std::set<std::string> contents;
-  bool is_dir;
-  RETURN_IF_ERROR(IsDirectory(path, &is_dir));
-  if (is_dir) {
-    // Set localized path
-    localized->reset(new LocalizedPath(effective_path, tmp_folder));
-    // Specify the entire directory to be downloaded
-    std::set<std::string> filenames;
-    RETURN_IF_ERROR(GetDirectoryContents(effective_path, &filenames));
-    for (auto itr = filenames.begin(); itr != filenames.end(); ++itr) {
-      contents.insert(JoinPath({effective_path, *itr}));
-    }
-  } else {
-    // Set localized path
-    std::string filename =
-        effective_path.substr(effective_path.find_last_of('/') + 1);
-    localized->reset(
-        new LocalizedPath(effective_path, JoinPath({tmp_folder, filename})));
-    // Specify only the file to be downloaded
-    contents.insert(effective_path);
-  }
-
-  // Download all specified contents and nested contents
-  while (contents.size() != 0) {
-    std::set<std::string> tmp_contents = contents;
-    contents.clear();
-    for (auto iter = tmp_contents.begin(); iter != tmp_contents.end(); ++iter) {
-      std::string s3_fpath = *iter;
-      std::string s3_removed_path = s3_fpath.substr(effective_path.size());
-      std::string local_fpath =
-          s3_removed_path.empty()
-              ? (*localized)->Path()
-              : JoinPath({(*localized)->Path(), s3_removed_path});
-      bool is_subdir;
-      RETURN_IF_ERROR(IsDirectory(s3_fpath, &is_subdir));
-      if (is_subdir) {
-        // Create local mirror of sub-directories
-#ifdef _WIN32
-        int status = mkdir(const_cast<char*>(local_fpath.c_str()));
-#else
-        int status = mkdir(
-            const_cast<char*>(local_fpath.c_str()),
-            S_IRUSR | S_IWUSR | S_IXUSR);
-#endif
-        if (status == -1) {
-          return Status(
-              Status::Code::INTERNAL,
-              "Failed to create local folder: " + local_fpath +
-                  ", errno:" + strerror(errno));
-        }
-
-        // Add sub-directories and deeper files to contents
-        std::set<std::string> subdir_contents;
-        RETURN_IF_ERROR(GetDirectoryContents(s3_fpath, &subdir_contents));
-        for (auto itr = subdir_contents.begin(); itr != subdir_contents.end();
-             ++itr) {
-          contents.insert(JoinPath({s3_fpath, *itr}));
-        }
-      } else {
-        // Create local copy of file
-        std::string file_bucket, file_object;
-        RETURN_IF_ERROR(ParsePath(s3_fpath, &file_bucket, &file_object));
-
-        s3::Model::GetObjectRequest object_request;
-        object_request.SetBucket(file_bucket.c_str());
-        object_request.SetKey(file_object.c_str());
-
-        auto get_object_outcome = client_->GetObject(object_request);
-        if (get_object_outcome.IsSuccess()) {
-          auto& retrieved_file =
-              get_object_outcome.GetResultWithOwnership().GetBody();
-          std::ofstream output_file(local_fpath.c_str(), std::ios::binary);
-          output_file << retrieved_file.rdbuf();
-          output_file.close();
-        } else {
-          return Status(
-              Status::Code::INTERNAL,
-              "Failed to get object at " + s3_fpath + " due to exception: " +
-                  get_object_outcome.GetError().GetExceptionName() +
-                  ", error message: " +
-                  get_object_outcome.GetError().GetMessage());
-        }
-      }
-    }
-  }
-
-  return Status::Success;
-}
-
-Status
-S3FileSystem::WriteTextFile(
-    const std::string& path, const std::string& contents)
-{
-  return Status(
-      Status::Code::UNSUPPORTED,
-      "Write text file operation not yet implemented " + path);
-}
-
-Status
-S3FileSystem::WriteBinaryFile(
-    const std::string& path, const char* contents, const size_t content_len)
-{
-  return Status(
-      Status::Code::UNSUPPORTED,
-      "Write text file operation not yet implemented " + path);
-}
-
-Status
-S3FileSystem::MakeDirectory(const std::string& dir, const bool recursive)
-{
-  return Status(
-      Status::Code::UNSUPPORTED,
-      "Make directory operation not yet implemented");
-}
-
-Status
-S3FileSystem::MakeTemporaryDirectory(std::string* temp_dir)
-{
-  return Status(
-      Status::Code::UNSUPPORTED,
-      "Make temporary directory operation not yet implemented");
-}
-
-Status
-S3FileSystem::DeletePath(const std::string& path)
-{
-  return Status(
-      Status::Code::UNSUPPORTED, "Delete path operation not yet implemented");
-}
-
-
-#endif  // TRITON_ENABLE_S3
-
-
-class FileSystemManager {
- public:
-  Status GetFileSystem(
-      const std::string& path, std::shared_ptr<FileSystem>& file_system);
-  Status GetFileSystem(
-      FileSystemType type, std::shared_ptr<FileSystem>& file_system);
-  FileSystemManager();
-
- private:
-  template <class CacheType, class CredentialType, class FileSystemType>
-  Status GetFileSystem(
-      const std::string& path, CacheType& cache,
-      std::shared_ptr<FileSystem>& file_system);
-  template <class CacheType, class CredentialType, class FileSystemType>
-  Status ReturnErrorOrReload(
-      const Status& load_status, const Status& error_status,
-      const std::string& path, CacheType& cache,
-      std::shared_ptr<FileSystem>& file_system);
-  Status LoadCredentials(bool flush_cache = false);
-  template <class CacheType, class CredentialType, class FileSystemType>
-  static void LoadCredential(
-      triton::common::TritonJson::Value& creds_json, const char* fs_type,
-      CacheType& cache);
-  template <class CredentialType, class FileSystemType>
-  static void SortCache(
-      std::vector<std::tuple<
-          std::string, CredentialType, std::shared_ptr<FileSystemType>>>&
-          cache);
-  template <class CredentialType, class FileSystemType>
-  static Status GetLongestMatchingNameIndex(
-      const std::vector<std::tuple<
-          std::string, CredentialType, std::shared_ptr<FileSystemType>>>& cache,
-      const std::string& path, size_t& idx);
-
-  std::shared_ptr<LocalFileSystem> local_fs_;
-  std::mutex mu_;   // protect concurrent access into variables
-  bool is_cached_;  // if name and credential is cached, lazy load file system
-  // cloud credential cache should be sorted in descending name length order
-  // [(name_long, credential, file_system), (name, ...)]
-#ifdef TRITON_ENABLE_GCS
-  std::vector<
-      std::tuple<std::string, GCSCredential, std::shared_ptr<GCSFileSystem>>>
-      gs_cache_;
-#endif  // TRITON_ENABLE_GCS
-#ifdef TRITON_ENABLE_S3
-  std::vector<
-      std::tuple<std::string, S3Credential, std::shared_ptr<S3FileSystem>>>
-      s3_cache_;
-#endif  // TRITON_ENABLE_S3
-#ifdef TRITON_ENABLE_AZURE_STORAGE
-  std::vector<
-      std::tuple<std::string, ASCredential, std::shared_ptr<ASFileSystem>>>
-      as_cache_;
-#endif  // TRITON_ENABLE_AZURE_STORAGE
-};
-
-FileSystemManager::FileSystemManager()
-    : local_fs_(new LocalFileSystem()), is_cached_(false)
-{
-}
-
-Status
-FileSystemManager::GetFileSystem(
-    const std::string& path, std::shared_ptr<FileSystem>& file_system)
-{
-  // Check if this is a GCS path (gs://$BUCKET_NAME)
-  if (!path.empty() && !path.rfind("gs://", 0)) {
-#ifndef TRITON_ENABLE_GCS
-    return Status(
-        Status::Code::INTERNAL,
-        "gs:// file-system not supported. To enable, build with "
-        "-DTRITON_ENABLE_GCS=ON.");
-#else
-    return GetFileSystem<
-        std::vector<std::tuple<
-            std::string, GCSCredential, std::shared_ptr<GCSFileSystem>>>,
-        GCSCredential, GCSFileSystem>(path, gs_cache_, file_system);
-#endif  // TRITON_ENABLE_GCS
-  }
-
-  // Check if this is an S3 path (s3://$BUCKET_NAME)
-  if (!path.empty() && !path.rfind("s3://", 0)) {
-#ifndef TRITON_ENABLE_S3
-    return Status(
-        Status::Code::INTERNAL,
-        "s3:// file-system not supported. To enable, build with "
-        "-DTRITON_ENABLE_S3=ON.");
-#else
-    return GetFileSystem<
-        std::vector<std::tuple<
-            std::string, S3Credential, std::shared_ptr<S3FileSystem>>>,
-        S3Credential, S3FileSystem>(path, s3_cache_, file_system);
-#endif  // TRITON_ENABLE_S3
-  }
-
-  // Check if this is an Azure Storage path
-  if (!path.empty() && !path.rfind("as://", 0)) {
-#ifndef TRITON_ENABLE_AZURE_STORAGE
-    return Status(
-        Status::Code::INTERNAL,
-        "as:// file-system not supported. To enable, build with "
-        "-DTRITON_ENABLE_AZURE_STORAGE=ON.");
-#else
-    return GetFileSystem<
-        std::vector<std::tuple<
-            std::string, ASCredential, std::shared_ptr<ASFileSystem>>>,
-        ASCredential, ASFileSystem>(path, as_cache_, file_system);
-#endif  // TRITON_ENABLE_AZURE_STORAGE
-  }
-
-  // Assume path is for local filesystem
-  file_system = local_fs_;
-  return Status::Success;
-}
-
-Status
-FileSystemManager::GetFileSystem(
-    FileSystemType type, std::shared_ptr<FileSystem>& file_system)
-{
-  // only LOCAL and GCS are not path-dependent and can be accessed by type
-  switch (type) {
-    case FileSystemType::LOCAL:
-      return GetFileSystem("", file_system);
-    case FileSystemType::GCS:
-      return GetFileSystem("gs://", file_system);
-    case FileSystemType::S3:
-      return Status(
-          Status::Code::UNSUPPORTED,
-          "S3 filesystem cannot be accessed by type");
-    case FileSystemType::AS:
-      return Status(
-          Status::Code::UNSUPPORTED,
-          "AS filesystem cannot be accessed by type");
-    default:
-      return Status(Status::Code::UNSUPPORTED, "Unsupported filesystem type");
-  }
-}
-
-template <class CacheType, class CredentialType, class FileSystemType>
-Status
-FileSystemManager::GetFileSystem(
-    const std::string& path, CacheType& cache,
-    std::shared_ptr<FileSystem>& file_system)
-{
-  const Status& cred_status = LoadCredentials();
-  if (cred_status.IsOk() ||
-      cred_status.StatusCode() == Status::Code::ALREADY_EXISTS) {
-    // Find credential
-    size_t idx;
-    const Status& match_status = GetLongestMatchingNameIndex(cache, path, idx);
-    if (!match_status.IsOk()) {
-      return ReturnErrorOrReload<CacheType, CredentialType, FileSystemType>(
-          cred_status, match_status, path, cache, file_system);
-    }
-    // Find or lazy load file system
-    std::shared_ptr<FileSystemType> fs = std::get<2>(cache[idx]);
-    if (fs == nullptr) {
-      std::string cred_name = std::get<0>(cache[idx]);
-      CredentialType cred = std::get<1>(cache[idx]);
-      fs = std::make_shared<FileSystemType>(path, cred);
-      cache[idx] = std::make_tuple(cred_name, cred, fs);
-    }
-    // Check client
-    const Status& client_status = fs->CheckClient(path);
-    if (!client_status.IsOk()) {
-      return ReturnErrorOrReload<CacheType, CredentialType, FileSystemType>(
-          cred_status, client_status, path, cache, file_system);
-    }
-    // Return client
-    file_system = fs;
-    return Status::Success;
-  }
-  return cred_status;
-}
-
-template <class CacheType, class CredentialType, class FileSystemType>
-Status
-FileSystemManager::ReturnErrorOrReload(
-    const Status& load_status, const Status& error_status,
-    const std::string& path, CacheType& cache,
-    std::shared_ptr<FileSystem>& file_system)
-{
-  if (load_status.StatusCode() == Status::Code::ALREADY_EXISTS) {
-    return error_status;
-  }
-  LoadCredentials(true);  // flush cache
-  return GetFileSystem<CacheType, CredentialType, FileSystemType>(
-      path, cache, file_system);
-}
-
-// return status meaning:
-// - SUCCESS, "" -> loaded credential from file
-// - ALREADY_EXISTS, "Cached" -> credential already loaded
-Status
-FileSystemManager::LoadCredentials(bool flush_cache)
-{
-  // prevent concurrent access into class variables
-  std::lock_guard<std::mutex> lock(mu_);
-
-  // check if credential is already cached
-  if (is_cached_ && !flush_cache) {
-    return Status(Status::Code::ALREADY_EXISTS, "Cached");
-  }
-
-  const char* file_path_c_str = std::getenv("TRITON_CLOUD_CREDENTIAL_PATH");
-  if (file_path_c_str != nullptr) {
-    // Load from credential file
-    std::string file_path = std::string(file_path_c_str);
-    LOG_VERBOSE(1) << "Reading cloud credential from " << file_path;
-
-    triton::common::TritonJson::Value creds_json;
-    std::string cred_file_content;
-    RETURN_IF_ERROR(local_fs_->ReadTextFile(file_path, &cred_file_content));
-    RETURN_IF_ERROR(creds_json.Parse(cred_file_content));
-
-#ifdef TRITON_ENABLE_GCS
-    // load GCS credentials
-    LoadCredential<
-        std::vector<std::tuple<
-            std::string, GCSCredential, std::shared_ptr<GCSFileSystem>>>,
-        GCSCredential, GCSFileSystem>(creds_json, "gs", gs_cache_);
-#endif  // TRITON_ENABLE_GCS
-#ifdef TRITON_ENABLE_S3
-    // load S3 credentials
-    LoadCredential<
-        std::vector<std::tuple<
-            std::string, S3Credential, std::shared_ptr<S3FileSystem>>>,
-        S3Credential, S3FileSystem>(creds_json, "s3", s3_cache_);
-#endif  // TRITON_ENABLE_S3
-#ifdef TRITON_ENABLE_AZURE_STORAGE
-    // load AS credentials
-    LoadCredential<
-        std::vector<std::tuple<
-            std::string, ASCredential, std::shared_ptr<ASFileSystem>>>,
-        ASCredential, ASFileSystem>(creds_json, "as", as_cache_);
-#endif  // TRITON_ENABLE_AZURE_STORAGE
-  } else {
-    // Load from environment variables
-    LOG_VERBOSE(1) << "TRITON_CLOUD_CREDENTIAL_PATH environment variable is "
-                      "not set, reading from environment variables";
-
-#ifdef TRITON_ENABLE_GCS
-    // load GCS credentials
-    gs_cache_.clear();
-    gs_cache_.push_back(
-        std::make_tuple("", GCSCredential(), std::shared_ptr<GCSFileSystem>()));
-#endif  // TRITON_ENABLE_GCS
-
-#ifdef TRITON_ENABLE_S3
-    // load S3 credentials
-    s3_cache_.clear();
-    s3_cache_.push_back(
-        std::make_tuple("", S3Credential(), std::shared_ptr<S3FileSystem>()));
-#endif  // TRITON_ENABLE_S3
-
-#ifdef TRITON_ENABLE_AZURE_STORAGE
-    // load AS credentials
-    as_cache_.clear();
-    as_cache_.push_back(
-        std::make_tuple("", ASCredential(), std::shared_ptr<ASFileSystem>()));
-#endif  // TRITON_ENABLE_AZURE_STORAGE
-  }
-
-  is_cached_ = true;
-  return Status::Success;
-}
-
-template <class CacheType, class CredentialType, class FileSystemType>
-void
-FileSystemManager::LoadCredential(
-    triton::common::TritonJson::Value& creds_json, const char* fs_type,
-    CacheType& cache)
-{
-  cache.clear();
-  triton::common::TritonJson::Value creds_fs_json;
-  if (creds_json.Find(fs_type, &creds_fs_json)) {
-    std::vector<std::string> cred_names;
-    creds_fs_json.Members(&cred_names);
-    for (size_t i = 0; i < cred_names.size(); i++) {
-      std::string cred_name = cred_names[i];
-      triton::common::TritonJson::Value cred_json;
-      creds_fs_json.Find(cred_name.c_str(), &cred_json);
-      cache.push_back(std::make_tuple(
-          cred_name, CredentialType(cred_json),
-          std::shared_ptr<FileSystemType>()));
-    }
-    SortCache(cache);
-  }
-}
-
-template <class CredentialType, class FileSystemType>
-void
-FileSystemManager::SortCache(
-    std::vector<std::tuple<
-        std::string, CredentialType, std::shared_ptr<FileSystemType>>>& cache)
-{
-  std::sort(
-      cache.begin(), cache.end(),
-      [](std::tuple<
-             std::string, CredentialType, std::shared_ptr<FileSystemType>>
-             a,
-         std::tuple<
-             std::string, CredentialType, std::shared_ptr<FileSystemType>>
-             b) { return std::get<0>(a).size() >= std::get<0>(b).size(); });
-}
-
-template <class CredentialType, class FileSystemType>
-Status
-FileSystemManager::GetLongestMatchingNameIndex(
-    const std::vector<std::tuple<
-        std::string, CredentialType, std::shared_ptr<FileSystemType>>>& cache,
-    const std::string& path, size_t& idx)
-{
-  for (size_t i = 0; i < cache.size(); i++) {
-    if (!path.rfind(std::get<0>(cache[i]), 0)) {
-      idx = i;
-      LOG_VERBOSE(1) << "Using credential  " + std::get<0>(cache[i]) +
-                            "  for path  " + path;
-      return Status::Success;
-    }
-  }
-  return Status(
-      Status::Code::NOT_FOUND, "Cannot match credential for path  " + path);
-}
-
-static FileSystemManager fsm_;
-
-}  // namespace
-
-// FIXME: Windows support '/'? If so, the below doesn't need to change
-bool
-IsAbsolutePath(const std::string& path)
-{
-  return !path.empty() && (path[0] == '/');
-}
-
-std::string
-JoinPath(std::initializer_list<std::string> segments)
-{
-  std::string joined;
-
-  for (const auto& seg : segments) {
-    if (joined.empty()) {
-      joined = seg;
-    } else if (IsAbsolutePath(seg)) {
-      if (joined[joined.size() - 1] == '/') {
-        joined.append(seg.substr(1));
-      } else {
-        joined.append(seg);
-      }
-    } else {  // !IsAbsolutePath(seg)
-      if (joined[joined.size() - 1] != '/') {
-        joined.append("/");
-      }
-      joined.append(seg);
-    }
-  }
-
-  return joined;
-}
-
-std::string
-BaseName(const std::string& path)
-{
-  if (path.empty()) {
-    return path;
-  }
-
-  size_t last = path.size() - 1;
-  while ((last > 0) && (path[last] == '/')) {
-    last -= 1;
-  }
-
-  if (path[last] == '/') {
-    return std::string();
-  }
-
-  const size_t idx = path.find_last_of("/", last);
-  if (idx == std::string::npos) {
-    return path.substr(0, last + 1);
-  }
-
-  return path.substr(idx + 1, last - idx);
-}
-
-std::string
-DirName(const std::string& path)
-{
-  if (path.empty()) {
-    return path;
-  }
-
-  size_t last = path.size() - 1;
-  while ((last > 0) && (path[last] == '/')) {
-    last -= 1;
-  }
-
-  if (path[last] == '/') {
-    return std::string("/");
-  }
-
-  const size_t idx = path.find_last_of("/", last);
-  if (idx == std::string::npos) {
-    return std::string(".");
-  }
-  if (idx == 0) {
-    return std::string("/");
-  }
-
-  return path.substr(0, idx);
-}
-
-Status
-FileExists(const std::string& path, bool* exists)
-{
-  std::shared_ptr<FileSystem> fs;
-  RETURN_IF_ERROR(fsm_.GetFileSystem(path, fs));
-  return fs->FileExists(path, exists);
-}
-
-Status
-IsDirectory(const std::string& path, bool* is_dir)
-{
-  std::shared_ptr<FileSystem> fs;
-  RETURN_IF_ERROR(fsm_.GetFileSystem(path, fs));
-  return fs->IsDirectory(path, is_dir);
-}
-
-Status
-FileModificationTime(const std::string& path, int64_t* mtime_ns)
-{
-  std::shared_ptr<FileSystem> fs;
-  RETURN_IF_ERROR(fsm_.GetFileSystem(path, fs));
-  return fs->FileModificationTime(path, mtime_ns);
-}
-
-Status
-GetDirectoryContents(const std::string& path, std::set<std::string>* contents)
-{
-  std::shared_ptr<FileSystem> fs;
-  RETURN_IF_ERROR(fsm_.GetFileSystem(path, fs));
-  return fs->GetDirectoryContents(path, contents);
-}
-
-Status
-GetDirectorySubdirs(const std::string& path, std::set<std::string>* subdirs)
-{
-  std::shared_ptr<FileSystem> fs;
-  RETURN_IF_ERROR(fsm_.GetFileSystem(path, fs));
-  return fs->GetDirectorySubdirs(path, subdirs);
-}
-
-Status
-GetDirectoryFiles(
-    const std::string& path, const bool skip_hidden_files,
-    std::set<std::string>* files)
-{
-  std::shared_ptr<FileSystem> fs;
-  RETURN_IF_ERROR(fsm_.GetFileSystem(path, fs));
-  std::set<std::string> all_files;
-  RETURN_IF_ERROR(fs->GetDirectoryFiles(path, &all_files));
-  // Remove the hidden files
-  for (auto f : all_files) {
-    if ((f[0] != '.') || (!skip_hidden_files)) {
-      files->insert(f);
-    }
-  }
-  return Status::Success;
-}
-
-Status
-ReadTextFile(const std::string& path, std::string* contents)
-{
-  std::shared_ptr<FileSystem> fs;
-  RETURN_IF_ERROR(fsm_.GetFileSystem(path, fs));
-  return fs->ReadTextFile(path, contents);
-}
-
-Status
-ReadTextProto(const std::string& path, google::protobuf::Message* msg)
-{
-  std::shared_ptr<FileSystem> fs;
-  RETURN_IF_ERROR(fsm_.GetFileSystem(path, fs));
-
-  std::string contents;
-  RETURN_IF_ERROR(fs->ReadTextFile(path, &contents));
-
-  if (!google::protobuf::TextFormat::ParseFromString(contents, msg)) {
-    return Status(
-        Status::Code::INTERNAL, "failed to read text proto from " + path);
-  }
-
-  return Status::Success;
-}
-
-Status
-LocalizePath(const std::string& path, std::shared_ptr<LocalizedPath>* localized)
-{
-  std::shared_ptr<FileSystem> fs;
-  RETURN_IF_ERROR(fsm_.GetFileSystem(path, fs));
-  return fs->LocalizePath(path, localized);
-}
-
-Status
-WriteTextProto(const std::string& path, const google::protobuf::Message& msg)
-{
-  std::shared_ptr<FileSystem> fs;
-  RETURN_IF_ERROR(fsm_.GetFileSystem(path, fs));
-
-  std::string prototxt;
-  if (!google::protobuf::TextFormat::PrintToString(msg, &prototxt)) {
-    return Status(
-        Status::Code::INTERNAL, "failed to write text proto to " + path);
-  }
-
-  return fs->WriteTextFile(path, prototxt);
-}
-
-Status
-WriteBinaryFile(
-    const std::string& path, const char* contents, const size_t content_len)
-{
-  std::shared_ptr<FileSystem> fs;
-  RETURN_IF_ERROR(fsm_.GetFileSystem(path, fs));
-  return fs->WriteBinaryFile(path, contents, content_len);
-}
-
-Status
-ReadBinaryProto(const std::string& path, google::protobuf::MessageLite* msg)
-{
-  std::string msg_str;
-  RETURN_IF_ERROR(ReadTextFile(path, &msg_str));
-
-  google::protobuf::io::CodedInputStream coded_stream(
-      reinterpret_cast<const uint8_t*>(msg_str.c_str()), msg_str.size());
-  coded_stream.SetTotalBytesLimit(INT_MAX);
-  if (!msg->ParseFromCodedStream(&coded_stream)) {
-    return Status(
-        Status::Code::INTERNAL, "Can't parse " + path + " as binary proto");
-  }
-
-  return Status::Success;
-}
-
-Status
-MakeDirectory(const std::string& dir, const bool recursive)
-{
-  std::shared_ptr<FileSystem> fs;
-  RETURN_IF_ERROR(fsm_.GetFileSystem(dir, fs));
-  return fs->MakeDirectory(dir, recursive);
-}
-
-Status
-MakeTemporaryDirectory(const FileSystemType type, std::string* temp_dir)
-{
-  std::shared_ptr<FileSystem> fs;
-  RETURN_IF_ERROR(fsm_.GetFileSystem(type, fs));
-  return fs->MakeTemporaryDirectory(temp_dir);
-}
-
-Status
-DeletePath(const std::string& path)
-{
-  std::shared_ptr<FileSystem> fs;
-  RETURN_IF_ERROR(fsm_.GetFileSystem(path, fs));
-  return fs->DeletePath(path);
-}
-
-Status
-GetFileSystemType(const std::string& path, FileSystemType* type)
-{
-  if (path.empty()) {
-    return Status(
-        Status::Code::INVALID_ARG,
-        "Can not infer filesystem type from empty path");
-  }
-#ifdef TRITON_ENABLE_GCS
-  // Check if this is a GCS path (gs://$BUCKET_NAME)
-  if (!path.rfind("gs://", 0)) {
-    *type = FileSystemType::GCS;
-    return Status::Success;
-  }
-#endif  // TRITON_ENABLE_GCS
-
-#ifdef TRITON_ENABLE_S3
-  // Check if this is an S3 path (s3://$BUCKET_NAME)
-  if (!path.rfind("s3://", 0)) {
-    *type = FileSystemType::S3;
-    return Status::Success;
-  }
-#endif  // TRITON_ENABLE_S3
-
-#ifdef TRITON_ENABLE_AZURE_STORAGE
-  // Check if this is an Azure Storage path
-  if (!path.rfind("as://", 0)) {
-    *type = FileSystemType::AS;
-    return Status::Success;
-  }
-#endif  // TRITON_ENABLE_AZURE_STORAGE
-
-  // Assume path is for local filesystem
-  *type = FileSystemType::LOCAL;
-  return Status::Success;
-}
-
-const std::string&
-FileSystemTypeString(const FileSystemType type)
-{
-  static const std::string local_str("LOCAL");
-  static const std::string gcs_str("GCS");
-  static const std::string s3_str("S3");
-  static const std::string as_str("AS");
-  static const std::string unknown_str("UNKNOWN");
-  switch (type) {
-    case FileSystemType::LOCAL:
-      return local_str;
-    case FileSystemType::GCS:
-      return gcs_str;
-    case FileSystemType::S3:
-      return s3_str;
-    case FileSystemType::AS:
-      return as_str;
-    default:
-      return unknown_str;
-  }
-}
-
-}}  // namespace triton::core
diff --git a/3rdparty/core-r22.12/src/filesystem.h b/3rdparty/core-r22.12/src/filesystem.h
deleted file mode 100644
index 439e42570c1d635725c2ec7f1ee6e0a62ae41673..0000000000000000000000000000000000000000
--- a/3rdparty/core-r22.12/src/filesystem.h
+++ /dev/null
@@ -1,224 +0,0 @@
-// Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#ifdef _WIN32
-// Remove GetObject definition from windows.h, which can cause
-// a naming collision when GetObject is called.
-// https://github.com/Tencent/rapidjson/issues/1448
-#undef GetObject
-#endif  // _WIN32
-
-#include <string>
-#include "google/protobuf/message.h"
-#include "status.h"
-
-namespace triton { namespace core {
-
-enum class FileSystemType { LOCAL, GCS, S3, AS };
-
-// This class stores the paths of local temporary files needed for loading
-// models from Cloud repositories and performs necessary cleanup after the
-// models are loaded.
-class LocalizedPath {
- public:
-  // Create an object for a path that is already local.
-  LocalizedPath(const std::string& original_path)
-      : original_path_(original_path)
-  {
-  }
-
-  // Create an object for a remote path. Store both the original path and the
-  // temporary local path.
-  LocalizedPath(
-      const std::string& original_path, const std::string& local_path)
-      : original_path_(original_path), local_path_(local_path)
-  {
-  }
-
-  // Destructor. Remove temporary local storage associated with the object.
-  // If the local path is a directory, delete the directory.
-  // If the local path is a file, delete the directory containing the file.
-  ~LocalizedPath();
-
-  // Return the localized path represented by this object.
-  const std::string& Path() const
-  {
-    return (local_path_.empty()) ? original_path_ : local_path_;
-  }
-
-  // Maintain a vector of LocalizedPath that should be kept available in the
-  // tmp directory for the lifetime of this object
-  // FIXME: Remove when no longer required
-  std::vector<std::shared_ptr<LocalizedPath>> other_localized_path;
-
- private:
-  std::string original_path_;
-  std::string local_path_;
-};
-
-/// Is a path an absolute path?
-/// \param path The path.
-/// \return true if absolute path, false if relative path.
-bool IsAbsolutePath(const std::string& path);
-
-/// Join path segments into a longer path
-/// \param segments The path segments.
-/// \return the path formed by joining the segments.
-std::string JoinPath(std::initializer_list<std::string> segments);
-
-/// Get the basename of a path.
-/// \param path The path.
-/// \return the last segment of the path.
-std::string BaseName(const std::string& path);
-
-/// Get the dirname of a path.
-/// \param path The path.
-/// \return all but the last segment of the path.
-std::string DirName(const std::string& path);
-
-/// Does a file or directory exist?
-/// \param path The path to check for existance.
-/// \param exists Returns true if file/dir exists
-/// \return Error status if unable to perform the check
-Status FileExists(const std::string& path, bool* exists);
-
-/// Is a path a directory?
-/// \param path The path to check.
-/// \param is_dir Returns true if path represents a directory
-/// \return Error status
-Status IsDirectory(const std::string& path, bool* is_dir);
-
-/// Get file modification time in nanoseconds.
-/// A file is considered modified in Triton when its binary content has changed
-/// including the action of replacing it with another file.
-/// \param path The path.
-/// \param mtime_ns Returns the file modification time. For some filesystems a
-/// file/folder may not have a modification time, in that case return 0.
-/// \return Error status
-Status FileModificationTime(const std::string& path, int64_t* mtime_ns);
-
-/// Get the contents of a directory.
-/// \param path The directory path.
-/// \param subdirs Returns the directory contents.
-/// \return Error status
-Status GetDirectoryContents(
-    const std::string& path, std::set<std::string>* contents);
-
-/// Get the sub-directories of a path.
-/// \param path The path.
-/// \param subdirs Returns the names of the sub-directories.
-/// \return Error status
-Status GetDirectorySubdirs(
-    const std::string& path, std::set<std::string>* subdirs);
-
-/// Get the files contained in a directory.
-/// \param path The directory.
-/// \param skip_hidden_files Ignores the hidden files in the directory.
-/// \param files Returns the names of the files.
-/// \return Error status
-Status GetDirectoryFiles(
-    const std::string& path, const bool skip_hidden_files,
-    std::set<std::string>* files);
-
-/// Read a text file into a string.
-/// \param path The path of the file.
-/// \param contents Returns the contents of the file.
-/// \return Error status
-Status ReadTextFile(const std::string& path, std::string* contents);
-
-/// Create an object representing a local copy of a path.
-/// \param path The path of the directory or file.
-/// \param localized Returns the LocalizedPath object
-/// representing the local copy of the path.
-/// \return Error status
-Status LocalizePath(
-    const std::string& path, std::shared_ptr<LocalizedPath>* localized);
-
-/// Write a string to a file.
-/// \param path The path of the file.
-/// \param contents The contents to write to the file.
-/// \return Error status
-Status WriteTextFile(const std::string& path, const std::string& contents);
-
-/// Write binary to a file.
-/// \param path The path of the file.
-/// \param contents The contents to write to the file.
-/// \param content_len The size of the content.
-/// \return Error status
-Status WriteBinaryFile(
-    const std::string& path, const char* contents, const size_t content_len);
-
-/// Read a prototext file.
-/// \param path The path of the file.
-/// \param msg Returns the protobuf message for the file.
-/// \return Error status
-Status ReadTextProto(const std::string& path, google::protobuf::Message* msg);
-
-/// Write a prototext file.
-/// \param path The path of the file.
-/// \param msg The protobuf to write.
-/// \return Error status
-Status WriteTextProto(
-    const std::string& path, const google::protobuf::Message& msg);
-
-/// Read a binary protobuf file.
-/// \param path The path of the file.
-/// \param msg Returns the protobuf message for the file.
-/// \return Error status
-Status ReadBinaryProto(
-    const std::string& path, google::protobuf::MessageLite* msg);
-
-/// Create a directory of the specified path.
-/// \param dir The path to the directory.
-/// \param recursive Whether the parent directories will be created
-/// if not exist.
-/// \return Error status if the directory can't be created
-Status MakeDirectory(const std::string& dir, const bool recursive);
-
-/// Create a temporary directory of the specified filesystem type.
-/// \param type The type of the filesystem.
-/// \param temp_dir Returns the path to the temporary directory.
-/// \return Error status
-Status MakeTemporaryDirectory(const FileSystemType type, std::string* temp_dir);
-
-/// Delete a path.
-/// \param path The path to the directory or file.
-/// \return Error status
-Status DeletePath(const std::string& path);
-
-/// Infer the filesystem type from the given path.
-/// \param path The path to infer the filesystem type from.
-/// \param type Returns the filesystem type of the path.
-/// \return Error status
-Status GetFileSystemType(const std::string& path, FileSystemType* type);
-
-/// Return the string representation of the filesystem type.
-/// \param type The filesystem type.
-/// \return The string representation of the type.
-const std::string& FileSystemTypeString(const FileSystemType type);
-
-}}  // namespace triton::core
diff --git a/3rdparty/core-r22.12/src/infer_parameter.cc b/3rdparty/core-r22.12/src/infer_parameter.cc
deleted file mode 100644
index 49f8c494a30d953724223d2c1ed8c5fabafa2945..0000000000000000000000000000000000000000
--- a/3rdparty/core-r22.12/src/infer_parameter.cc
+++ /dev/null
@@ -1,61 +0,0 @@
-// Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "infer_parameter.h"
-
-namespace triton { namespace core {
-
-
-const void*
-InferenceParameter::ValuePointer() const
-{
-  switch (type_) {
-    case TRITONSERVER_PARAMETER_STRING:
-      return reinterpret_cast<const void*>(value_string_.c_str());
-    case TRITONSERVER_PARAMETER_INT:
-      return reinterpret_cast<const void*>(&value_int64_);
-    case TRITONSERVER_PARAMETER_BOOL:
-      return reinterpret_cast<const void*>(&value_bool_);
-    case TRITONSERVER_PARAMETER_BYTES:
-      return reinterpret_cast<const void*>(value_bytes_);
-    default:
-      break;
-  }
-
-  return nullptr;
-}
-
-std::ostream&
-operator<<(std::ostream& out, const InferenceParameter& parameter)
-{
-  out << "[0x" << std::addressof(parameter) << "] "
-      << "name: " << parameter.Name()
-      << ", type: " << TRITONSERVER_ParameterTypeString(parameter.Type())
-      << ", value: ";
-  return out;
-}
-
-}}  // namespace triton::core
diff --git a/3rdparty/core-r22.12/src/infer_parameter.h b/3rdparty/core-r22.12/src/infer_parameter.h
deleted file mode 100644
index 0e5b758016ace701d98b782b3cfcb8dfb4c2ec1f..0000000000000000000000000000000000000000
--- a/3rdparty/core-r22.12/src/infer_parameter.h
+++ /dev/null
@@ -1,102 +0,0 @@
-// Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include <iostream>
-#include <string>
-#include "tritonserver_apis.h"
-
-namespace triton { namespace core {
-
-//
-// An inference parameter.
-//
-class InferenceParameter {
- public:
-  InferenceParameter(const char* name, const char* value)
-      : name_(name), type_(TRITONSERVER_PARAMETER_STRING), value_string_(value)
-  {
-    byte_size_ = value_string_.size();
-  }
-
-  InferenceParameter(const char* name, const int64_t value)
-      : name_(name), type_(TRITONSERVER_PARAMETER_INT), value_int64_(value),
-        byte_size_(sizeof(int64_t))
-  {
-  }
-
-  InferenceParameter(const char* name, const bool value)
-      : name_(name), type_(TRITONSERVER_PARAMETER_BOOL), value_bool_(value),
-        byte_size_(sizeof(bool))
-  {
-  }
-
-  InferenceParameter(const char* name, const void* ptr, const uint64_t size)
-      : name_(name), type_(TRITONSERVER_PARAMETER_BYTES), value_bytes_(ptr),
-        byte_size_(size)
-  {
-  }
-
-  // The name of the parameter.
-  const std::string& Name() const { return name_; }
-
-  // Data type of the parameter.
-  TRITONSERVER_ParameterType Type() const { return type_; }
-
-  // Return a pointer to the parameter, or a pointer to the data content
-  // if type_ is TRITONSERVER_PARAMETER_BYTES. This returned pointer must be
-  // cast correctly based on 'type_'.
-  //   TRITONSERVER_PARAMETER_STRING -> const char*
-  //   TRITONSERVER_PARAMETER_INT -> int64_t*
-  //   TRITONSERVER_PARAMETER_BOOL -> bool*
-  //   TRITONSERVER_PARAMETER_BYTES -> const void*
-  const void* ValuePointer() const;
-
-  // Return the data byte size of the parameter.
-  uint64_t ValueByteSize() const { return byte_size_; }
-
-  // Return the parameter value string, the return value is valid only if
-  // Type() returns TRITONSERVER_PARAMETER_STRING
-  const std::string& ValueString() const { return value_string_; }
-
- private:
-  friend std::ostream& operator<<(
-      std::ostream& out, const InferenceParameter& parameter);
-
-  std::string name_;
-  TRITONSERVER_ParameterType type_;
-
-  std::string value_string_;
-  int64_t value_int64_;
-  bool value_bool_;
-  const void* value_bytes_;
-  uint64_t byte_size_;
-};
-
-std::ostream& operator<<(
-    std::ostream& out, const InferenceParameter& parameter);
-
-}}  // namespace triton::core
diff --git a/3rdparty/core-r22.12/src/infer_request.cc b/3rdparty/core-r22.12/src/infer_request.cc
deleted file mode 100644
index 149fe84527d290961c37cb2c50a49fc2778cd972..0000000000000000000000000000000000000000
--- a/3rdparty/core-r22.12/src/infer_request.cc
+++ /dev/null
@@ -1,1498 +0,0 @@
-// Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "infer_request.h"
-
-#include <algorithm>
-#include <deque>
-#include "model.h"
-#include "model_config_utils.h"
-#include "server.h"
-#include "triton/common/logging.h"
-#ifdef TRITON_ENABLE_TRACING
-#include "cuda_utils.h"
-#endif  // TRITON_ENABLE_TRACING
-
-namespace triton { namespace core {
-
-namespace {
-
-// Utilities for Null request feature.
-TRITONSERVER_Error*
-NullResponseAlloc(
-    TRITONSERVER_ResponseAllocator* allocator, const char* tensor_name,
-    size_t byte_size, TRITONSERVER_MemoryType preferred_memory_type,
-    int64_t preferred_memory_type_id, void* userp, void** buffer,
-    void** buffer_userp, TRITONSERVER_MemoryType* actual_memory_type,
-    int64_t* actual_memory_type_id)
-{
-  return TRITONSERVER_ErrorNew(
-      TRITONSERVER_ERROR_INTERNAL,
-      "unexpected allocation for null request, no output should be requested.");
-}
-
-TRITONSERVER_Error*
-NullResponseRelease(
-    TRITONSERVER_ResponseAllocator* allocator, void* buffer, void* buffer_userp,
-    size_t byte_size, TRITONSERVER_MemoryType memory_type,
-    int64_t memory_type_id)
-{
-  return TRITONSERVER_ErrorNew(
-      TRITONSERVER_ERROR_INTERNAL,
-      "unexpected release for null request, no output should be requested.");
-}
-
-ResponseAllocator null_allocator = ResponseAllocator(
-    NullResponseAlloc, NullResponseRelease, nullptr /* start_fn */);
-
-void
-NullResponseComplete(
-    TRITONSERVER_InferenceResponse* iresponse, const uint32_t flags,
-    void* userp)
-{
-  if (iresponse != nullptr) {
-    LOG_TRITONSERVER_ERROR(
-        TRITONSERVER_InferenceResponseDelete(iresponse),
-        "deleting null response");
-  }
-}
-
-void
-NullRequestComplete(
-    TRITONSERVER_InferenceRequest* request, const uint32_t flags, void* userp)
-{
-  if ((flags & TRITONSERVER_REQUEST_RELEASE_ALL) != 0) {
-    LOG_TRITONSERVER_ERROR(
-        TRITONSERVER_InferenceRequestDelete(request), "deleting null request");
-  }
-}
-
-}  // namespace
-
-InferenceRequest::InferenceRequest(
-    const std::shared_ptr<Model>& model, const int64_t requested_model_version)
-    : InferenceRequest(model.get(), requested_model_version)
-{
-  model_shared_ = model;
-}
-
-InferenceRequest::InferenceRequest(
-    Model* model, const int64_t requested_model_version)
-    : needs_normalization_(true), model_raw_(model),
-      requested_model_version_(requested_model_version), flags_(0),
-      correlation_id_(0), batch_size_(0), timeout_us_(0), collect_stats_(true)
-{
-  SetPriority(0);
-}
-
-const std::string&
-InferenceRequest::ModelName() const
-{
-  return model_raw_->Name();
-}
-
-int64_t
-InferenceRequest::ActualModelVersion() const
-{
-  return model_raw_->Version();
-}
-
-void
-InferenceRequest::SetPriority(uint32_t p)
-{
-  if ((p == 0) || (p > model_raw_->MaxPriorityLevel())) {
-    priority_ = model_raw_->DefaultPriorityLevel();
-  } else {
-    priority_ = p;
-  }
-}
-
-#ifdef TRITON_ENABLE_TRACING
-Status
-InferenceRequest::TraceInputTensors(
-    TRITONSERVER_InferenceTraceActivity activity, const std::string& msg)
-{
-  const auto& inputs = this->ImmutableInputs();
-  TRITONSERVER_MemoryType dst_memory_type = TRITONSERVER_MEMORY_CPU;
-  int64_t dst_memory_type_id = 0;
-
-  for (const auto& pr : inputs) {
-    InferenceRequest::Input* ti = pr.second;
-
-    // input data
-    const std::string& name = ti->Name();
-    TRITONSERVER_DataType datatype = DataTypeToTriton(ti->DType());
-    uint64_t byte_size = ti->Data()->TotalByteSize();
-    const int64_t* shape = ti->ShapeWithBatchDim().data();
-    uint32_t dim_count = ti->ShapeWithBatchDim().size();
-    uint32_t buffer_count = ti->DataBufferCount();
-    // chunk buffer
-    Status status;
-    const void* buffer;
-    uint64_t buffer_size;
-    TRITONSERVER_MemoryType src_memory_type;
-    int64_t src_memory_type_id;
-    bool cuda_used;
-
-    if (buffer_count == 0) {
-      LOG_STATUS_ERROR(
-          status, LogRequest() +
-                      TRITONSERVER_InferenceTraceActivityString(activity) +
-                      ": " + msg + ": tensor: " + name + ": no buffer chunk");
-      continue;
-    }
-
-    if (buffer_count == 1) {
-      status = ti->DataBuffer(
-          0, &buffer, &buffer_size, &src_memory_type, &src_memory_type_id);
-      if (!status.IsOk()) {
-        LOG_STATUS_ERROR(
-            status, LogRequest() +
-                        TRITONSERVER_InferenceTraceActivityString(activity) +
-                        ": " + msg + ": tensor: " + name +
-                        ": fail to get data buffer: " + status.Message());
-        return status;
-      }
-
-      if (buffer_size != byte_size) {
-        LOG_STATUS_ERROR(
-            status,
-            LogRequest() + TRITONSERVER_InferenceTraceActivityString(activity) +
-                ": " + msg + ": tensor: " + name + ": truncated buffer");
-        continue;
-      }
-
-      INFER_TRACE_TENSOR_ACTIVITY(
-          this->trace_, activity, name.c_str(), datatype,
-          const_cast<void*>(buffer), buffer_size, shape, dim_count,
-          src_memory_type, src_memory_type_id);
-
-      continue;
-    }
-
-    // input buffer
-    std::vector<char> in_buffer(byte_size);
-    char* base = in_buffer.data();
-    size_t offset = 0;
-    for (uint32_t b = 0; b < buffer_count; ++b) {
-      status = ti->DataBuffer(
-          b, &buffer, &buffer_size, &src_memory_type, &src_memory_type_id);
-      if (!status.IsOk()) {
-        LOG_STATUS_ERROR(
-            status, LogRequest() +
-                        TRITONSERVER_InferenceTraceActivityString(activity) +
-                        ": " + msg + ": tensor: " + name +
-                        ": fail to get data buffer: " + status.Message());
-        return status;
-      }
-
-      status = CopyBuffer(
-          "InferenceRequest TraceInputTensors", src_memory_type,
-          src_memory_type_id, dst_memory_type, dst_memory_type_id, buffer_size,
-          buffer, base + offset, nullptr, &cuda_used);
-      if (!status.IsOk()) {
-        LOG_STATUS_ERROR(
-            status, LogRequest() +
-                        TRITONSERVER_InferenceTraceActivityString(activity) +
-                        ": " + msg + ": tensor: " + name +
-                        ": fail to copy buffer: " + status.Message());
-        return status;
-      }
-
-      offset += buffer_size;
-    }
-
-    INFER_TRACE_TENSOR_ACTIVITY(
-        this->trace_, activity, name.c_str(), datatype,
-        static_cast<void*>(base), byte_size, shape, dim_count, dst_memory_type,
-        dst_memory_type_id);
-  }
-
-  return Status::Success;
-}
-#endif  // TRITON_ENABLE_TRACING
-
-Status
-InferenceRequest::OutputBufferProperties(
-    const char* name, size_t* byte_size, TRITONSERVER_MemoryType* memory_type,
-    int64_t* memory_type_id)
-{
-  const auto allocator = response_factory_->Allocator();
-  if ((allocator == nullptr) || (allocator->QueryFn() == nullptr)) {
-    return Status(
-        Status::Code::UNAVAILABLE,
-        (LogRequest() + "Output properties are not available").c_str());
-  } else {
-    RETURN_IF_TRITONSERVER_ERROR(allocator->QueryFn()(
-        reinterpret_cast<TRITONSERVER_ResponseAllocator*>(
-            const_cast<ResponseAllocator*>(allocator)),
-        response_factory_->AllocatorUserp(), name, byte_size, memory_type,
-        memory_type_id));
-  }
-  return Status::Success;
-}
-
-Status
-InferenceRequest::Run(std::unique_ptr<InferenceRequest>& request)
-{
-  return request->model_raw_->Enqueue(request);
-}
-
-void
-InferenceRequest::RespondIfError(
-    std::unique_ptr<InferenceRequest>& request, const Status& status,
-    const bool release_request)
-{
-  if (status.IsOk()) {
-    return;
-  }
-
-  // Use the response factory to create a response, set the status,
-  // and send it. If something goes wrong all we can do is log the
-  // error. Because this is sending an error we assume that this is
-  // the last response for the request and so set the FINAL flag.
-  std::unique_ptr<InferenceResponse> response;
-  LOG_STATUS_ERROR(
-      request->response_factory_->CreateResponse(&response),
-      (request->LogRequest() + "failed to create error response").c_str());
-  LOG_STATUS_ERROR(
-      InferenceResponse::SendWithStatus(
-          std::move(response), TRITONSERVER_RESPONSE_COMPLETE_FINAL, status),
-      (request->LogRequest() + "failed to send error response").c_str());
-
-  // If releasing the request then invoke the release callback which
-  // gives ownership to the callback. So can't access 'request' after
-  // this point.
-  if (release_request) {
-    InferenceRequest::Release(
-        std::move(request), TRITONSERVER_REQUEST_RELEASE_ALL);
-  }
-}
-
-void
-InferenceRequest::RespondIfError(
-    std::vector<std::unique_ptr<InferenceRequest>>& requests,
-    const Status& status, const bool release_requests)
-{
-  if (status.IsOk()) {
-    return;
-  }
-
-  for (auto& request : requests) {
-    RespondIfError(request, status, release_requests);
-  }
-}
-
-void
-InferenceRequest::Release(
-    std::unique_ptr<InferenceRequest>&& request, const uint32_t release_flags)
-{
-  // Invoke the release callbacks added internally before releasing the
-  // request to user provided callback.
-  for (auto it = request->release_callbacks_.rbegin();
-       it != request->release_callbacks_.rend(); it++) {
-    (*it)();
-  }
-  request->release_callbacks_.clear();
-
-#ifdef TRITON_ENABLE_TRACING
-  // If tracing then record request end and release the trace.
-  // This must be before the request callback to ensure the trace
-  // is properly layered, as the request may be nested in an ensemble
-  // and the callback may interact with upper level trace.
-  if (request->trace_ != nullptr) {
-    request->trace_->ReportNow(TRITONSERVER_TRACE_REQUEST_END);
-    request->ReleaseTrace();
-  }
-#endif  // TRITON_ENABLE_TRACING
-
-  void* userp = request->release_userp_;
-  auto& release_fn = request->release_fn_;
-  release_fn(
-      reinterpret_cast<TRITONSERVER_InferenceRequest*>(request.release()),
-      release_flags, userp);
-}
-
-InferenceRequest*
-InferenceRequest::CopyAsNull(const InferenceRequest& from)
-{
-  // Create a copy of 'from' request with artifical inputs and no requested
-  // outputs. Maybe more efficient to share inputs and other metadata,
-  // but that binds the Null request with 'from' request's lifecycle.
-  std::unique_ptr<InferenceRequest> lrequest(
-      new InferenceRequest(from.model_raw_, from.requested_model_version_));
-  lrequest->needs_normalization_ = false;
-  lrequest->batch_size_ = from.batch_size_;
-  lrequest->collect_stats_ = false;
-
-  // Three passes: first to construct input for the shape tensors inputs, second
-  // to obtain the max input byte size for allocating a large enough buffer for
-  // all non shape tensor inputs; third to construct the inputs for these
-  // tensors.
-  //  First pass
-  for (const auto& input : from.OriginalInputs()) {
-    // Handle only shape tensors in this pass
-    if (!input.second.IsShapeTensor()) {
-      continue;
-    }
-
-    // Prepare the memory to hold input data
-    size_t byte_size = input.second.Data()->TotalByteSize();
-    auto mem_type = TRITONSERVER_MEMORY_CPU;
-    int64_t mem_id = 0;
-    std::shared_ptr<MutableMemory> data =
-        std::make_shared<AllocatedMemory>(byte_size, mem_type, mem_id);
-
-    // Get the source buffer. Assumes shape tensors be in a single buffer on the
-    // CPU
-    const auto& from_data = input.second.Data();
-    size_t from_data_byte_size;
-    TRITONSERVER_MemoryType from_data_memory_type;
-    int64_t from_data_memory_id;
-    const char* from_data_buffer = from_data->BufferAt(
-        0 /* idx */, &from_data_byte_size, &from_data_memory_type,
-        &from_data_memory_id);
-
-    if (from_data_byte_size != byte_size) {
-      LOG_WARNING
-          << lrequest->LogRequest()
-          << "The byte size of shape tensor to be copied does not match";
-    }
-
-    // Copy the shape values to the input buffer
-    std::memcpy(data->MutableBuffer(), from_data_buffer, from_data_byte_size);
-
-    Input* new_input;
-    lrequest->AddOriginalInput(
-        input.first, input.second.DType(), input.second.Shape(), &new_input);
-
-    // Must normalize shape here...
-    *new_input->MutableShape() = input.second.Shape();
-    *new_input->MutableShapeWithBatchDim() = input.second.ShapeWithBatchDim();
-
-    new_input->SetData(data);
-  }
-
-  // Second pass
-  size_t max_byte_size = 0;
-  size_t max_str_byte_size = 0;
-  const std::string* max_input_name;
-  for (const auto& input : from.OriginalInputs()) {
-    // Skip shape tensors in this pass
-    if (input.second.IsShapeTensor()) {
-      continue;
-    }
-
-    if (input.second.DType() == inference::DataType::TYPE_STRING) {
-      int64_t element_count =
-          triton::common::GetElementCount(input.second.Shape());
-
-      size_t str_byte_size = static_cast<size_t>(4 * element_count);
-      max_str_byte_size = std::max(str_byte_size, max_str_byte_size);
-      if (str_byte_size > max_byte_size) {
-        max_byte_size = str_byte_size;
-        max_input_name = &(input.first);
-      }
-    } else {
-      if (input.second.Data()->TotalByteSize() >= max_byte_size) {
-        max_byte_size = input.second.Data()->TotalByteSize();
-        max_input_name = &(input.first);
-      }
-    }
-  }
-
-  // Third pass
-  // [DLIS-1268] should use one growable static buffer for all null requests
-  auto mem_type = TRITONSERVER_MEMORY_CPU;
-  int64_t mem_id = 0;
-  std::shared_ptr<MutableMemory> data =
-      std::make_shared<AllocatedMemory>(max_byte_size, mem_type, mem_id);
-  auto data_base = data->BufferAt(0, &max_byte_size, &mem_type, &mem_id);
-
-  // Zero initialization is only required when there is a TYPE_BYTES tensor in
-  // the request. Only set the required number of bytes to zero.
-  if (max_str_byte_size > 0) {
-    std::fill(
-        data->MutableBuffer(), data->MutableBuffer() + max_str_byte_size, 0);
-  }
-
-  for (const auto& input : from.OriginalInputs()) {
-    // skip shape tensors in this pass
-    if (input.second.IsShapeTensor()) {
-      continue;
-    }
-    Input* new_input;
-    lrequest->AddOriginalInput(
-        input.first, input.second.DType(), input.second.Shape(), &new_input);
-
-    // Must normalize shape here...
-    *new_input->MutableShape() = input.second.Shape();
-    *new_input->MutableShapeWithBatchDim() = input.second.ShapeWithBatchDim();
-
-    // Note that the input that have max byte size will be responsible for
-    // holding the artifical data, while other inputs will hold a reference to
-    // it with byte size that matches 'from'
-    if (input.first == *max_input_name) {
-      new_input->SetData(data);
-    } else {
-      if (inference::DataType::TYPE_STRING == input.second.DType()) {
-        new_input->AppendData(
-            data_base,
-            triton::common::GetElementCount(input.second.Shape()) * 4, mem_type,
-            mem_id);
-      } else {
-        new_input->AppendData(
-            data_base, input.second.Data()->TotalByteSize(), mem_type, mem_id);
-      }
-    }
-  }
-
-  // No outputs were requested and thus there should be no allocations.
-  lrequest->SetResponseCallback(
-      &null_allocator, nullptr, NullResponseComplete, nullptr);
-  lrequest->SetReleaseCallback(NullRequestComplete, nullptr);
-
-  // Must normalize inputs here...
-  for (auto& pr : lrequest->original_inputs_) {
-    lrequest->inputs_.emplace(
-        std::make_pair(pr.second.Name(), std::addressof(pr.second)));
-  }
-
-  return lrequest.release();
-}
-
-Status
-InferenceRequest::MutableOriginalInput(
-    const std::string& name, InferenceRequest::Input** input)
-{
-  auto itr = original_inputs_.find(name);
-  if (itr == original_inputs_.end()) {
-    return Status(
-        Status::Code::INVALID_ARG,
-        LogRequest() + "input '" + name + "' does not exist in request");
-  }
-
-  *input = &(itr->second);
-
-  return Status::Success;
-}
-
-Status
-InferenceRequest::ImmutableInput(
-    const std::string& name, const InferenceRequest::Input** input) const
-{
-  auto itr = inputs_.find(name);
-  if (itr == inputs_.end()) {
-    return Status(
-        Status::Code::INVALID_ARG,
-        LogRequest() + "input '" + name + "' does not exist in request");
-  }
-
-  *input = itr->second;
-  return Status::Success;
-}
-
-Status
-InferenceRequest::AddOriginalInput(
-    const std::string& name, const inference::DataType datatype,
-    const int64_t* shape, const uint64_t dim_count,
-    InferenceRequest::Input** input)
-{
-  const auto& pr = original_inputs_.emplace(
-      std::piecewise_construct, std::forward_as_tuple(name),
-      std::forward_as_tuple(name, datatype, shape, dim_count));
-  if (!pr.second) {
-    return Status(
-        Status::Code::INVALID_ARG,
-        LogRequest() + "input '" + name + "' already exists in request");
-  }
-
-  if (input != nullptr) {
-    *input = std::addressof(pr.first->second);
-  }
-
-  needs_normalization_ = true;
-  return Status::Success;
-}
-
-Status
-InferenceRequest::AddOriginalInput(
-    const std::string& name, const inference::DataType datatype,
-    const std::vector<int64_t>& shape, InferenceRequest::Input** input)
-{
-  return AddOriginalInput(name, datatype, &shape[0], shape.size(), input);
-}
-
-Status
-InferenceRequest::AddRawInput(
-    const std::string& name, InferenceRequest::Input** input)
-{
-  if (original_inputs_.size() != 0) {
-    return Status(
-        Status::Code::INVALID_ARG,
-        LogRequest() + "raw input '" + name +
-            "' can't be added to request with other inputs");
-  }
-  const auto& pr = original_inputs_.emplace(
-      std::piecewise_construct, std::forward_as_tuple(name),
-      std::forward_as_tuple());
-  if (!pr.second) {
-    return Status(
-        Status::Code::INVALID_ARG,
-        LogRequest() + "input '" + name + "' already exists in request");
-  }
-
-  if (input != nullptr) {
-    *input = std::addressof(pr.first->second);
-  }
-
-  raw_input_name_ = name;
-  needs_normalization_ = true;
-  return Status::Success;
-}
-
-Status
-InferenceRequest::RemoveOriginalInput(const std::string& name)
-{
-  if (original_inputs_.erase(name) != 1) {
-    return Status(
-        Status::Code::INVALID_ARG,
-        LogRequest() + "input '" + name + "' does not exist in request");
-  }
-
-  if (name == raw_input_name_) {
-    raw_input_name_.clear();
-  }
-  needs_normalization_ = true;
-  return Status::Success;
-}
-
-Status
-InferenceRequest::RemoveAllOriginalInputs()
-{
-  original_inputs_.clear();
-  raw_input_name_.clear();
-  needs_normalization_ = true;
-  return Status::Success;
-}
-
-Status
-InferenceRequest::AddOverrideInput(
-    const std::string& name, const inference::DataType datatype,
-    const int64_t batch_size, const std::vector<int64_t>& shape,
-    std::shared_ptr<InferenceRequest::Input>* input)
-{
-  std::shared_ptr<Input> i = std::make_shared<Input>(name, datatype, shape);
-  *(i->MutableShape()) = i->OriginalShape();
-  if (batch_size > 0) {
-    *(i->MutableShapeWithBatchDim()) = {batch_size};
-    i->MutableShapeWithBatchDim()->insert(
-        i->MutableShapeWithBatchDim()->end(), i->OriginalShape().begin(),
-        i->OriginalShape().end());
-  } else {
-    *(i->MutableShapeWithBatchDim()) = i->OriginalShape();
-  }
-
-  RETURN_IF_ERROR(AddOverrideInput(i));
-  if (input != nullptr) {
-    *input = std::move(i);
-  }
-
-  return Status::Success;
-}
-
-Status
-InferenceRequest::AddOverrideInput(
-    const std::shared_ptr<InferenceRequest::Input>& input)
-{
-  LOG_VERBOSE(1) << LogRequest() << "adding input override for "
-                 << input->Name() << ": " << *this;
-
-  const auto& pr =
-      override_inputs_.emplace(std::make_pair(input->Name(), input));
-  if (!pr.second) {
-    pr.first->second = input;
-  }
-
-  // Add or replace this override in the inputs...
-  const auto res = inputs_.emplace(std::make_pair(input->Name(), input.get()));
-  if (!res.second) {
-    res.first->second = input.get();
-  }
-
-  LOG_VERBOSE(1) << LogRequest() << "added input override for " << input->Name()
-                 << ": " << *this;
-
-  return Status::Success;
-}
-
-Status
-InferenceRequest::AddOriginalRequestedOutput(const std::string& name)
-{
-  original_requested_outputs_.insert(name);
-  needs_normalization_ = true;
-  return Status::Success;
-}
-
-Status
-InferenceRequest::LoadInputStates()
-{
-  // Add the input states to the inference request.
-  if (sequence_states_ != nullptr) {
-    if (sequence_states_->IsNullRequest()) {
-      sequence_states_ =
-          SequenceStates::CopyAsNull(sequence_states_->NullSequenceStates());
-    }
-    for (auto& input_state_pair : sequence_states_->InputStates()) {
-      auto& input_state = input_state_pair.second;
-      std::shared_ptr<InferenceRequest::Input> input =
-          std::make_shared<InferenceRequest::Input>(
-              input_state->Name(), input_state->DType(), input_state->Shape());
-      *input->MutableShapeWithBatchDim() = input_state->Shape();
-      input->SetData(input_state->Data());
-      AddOverrideInput(input);
-    }
-  }
-
-  return Status::Success;
-}
-
-Status
-InferenceRequest::RemoveOriginalRequestedOutput(const std::string& name)
-{
-  original_requested_outputs_.erase(name);
-  needs_normalization_ = true;
-  return Status::Success;
-}
-
-Status
-InferenceRequest::RemoveAllOriginalRequestedOutputs()
-{
-  original_requested_outputs_.clear();
-  needs_normalization_ = true;
-  return Status::Success;
-}
-
-Status
-InferenceRequest::PrepareForInference()
-{
-  // Remove override inputs as those are added during any previous
-  // inference execution.
-  inputs_.clear();
-  override_inputs_.clear();
-
-  // Renormalize if anything has changed in the inference request in a
-  // way that could impact renormalization.
-  if (needs_normalization_) {
-    RETURN_IF_ERROR(Normalize());
-    needs_normalization_ = false;
-  }
-
-  // Initially show the actual inputs to be only the original
-  // inputs. If overrides are added later they will be added to
-  // 'inputs_'.
-  for (auto& pr : original_inputs_) {
-    inputs_.emplace(
-        std::make_pair(pr.second.Name(), std::addressof(pr.second)));
-  }
-
-  // Clear the timestamps
-  queue_start_ns_ = 0;
-  batcher_start_ns_ = 0;
-#ifdef TRITON_ENABLE_STATS
-  request_start_ns_ = 0;
-#endif  // TRITON_ENABLE_STATS
-
-  LOG_VERBOSE(1) << LogRequest() << "prepared: " << *this;
-
-  return Status::Success;
-}
-
-Status
-InferenceRequest::Normalize()
-{
-  const inference::ModelConfig& model_config = model_raw_->Config();
-
-  // Fill metadata for raw input
-  if (!raw_input_name_.empty()) {
-    const bool has_multiple_inputs =
-        (original_inputs_.size() != 1) || (model_config.input_size() != 1);
-    if (has_multiple_inputs) {
-      return Status(
-          Status::Code::INVALID_ARG,
-          LogRequest() + "Raw request must only have 1 input (found " +
-              std::to_string(original_inputs_.size()) +
-              ") to be deduced but got " +
-              std::to_string(model_config.input_size()) + " inputs in '" +
-              ModelName() + "' model configuration");
-    }
-    auto it = original_inputs_.begin();
-    if (raw_input_name_ != it->first) {
-      return Status(
-          Status::Code::INVALID_ARG,
-          LogRequest() + "Unexpected reference name for raw input '" +
-              raw_input_name_ + "' got '" + it->first + "'");
-    }
-    const auto& config_input = model_config.input(0);
-    auto& raw_input = it->second;
-    std::vector<int64_t> shape;
-    if (model_config.max_batch_size() != 0) {
-      shape.emplace_back(1);
-    }
-    int64_t dynamic_axis = -1;
-    size_t element_cnt = 1;
-    for (const auto& dim : config_input.dims()) {
-      if (dim == triton::common::WILDCARD_DIM) {
-        if (dynamic_axis != -1) {
-          return Status(
-              Status::Code::INVALID_ARG,
-              LogRequest() + "The shape of the raw input '" +
-                  config_input.name() +
-                  "' can not be deduced because there are more than one "
-                  "variable-sized dimension");
-        }
-        dynamic_axis = shape.size();
-      } else {
-        element_cnt *= (size_t)dim;
-      }
-      shape.emplace_back(dim);
-    }
-    if ((config_input.data_type() == inference::DataType::TYPE_STRING)) {
-      const bool has_one_element = (dynamic_axis == -1) && (element_cnt == 1);
-      if (!has_one_element) {
-        return Status(
-            Status::Code::INVALID_ARG, LogRequest() +
-                                           "For BYTE datatype raw input, the "
-                                           "model must have input shape [1]");
-      }
-      // In the case of BYTE data type, we will prepend the byte size to follow
-      // the Triton convention.
-      raw_input_size_ = raw_input.Data()->TotalByteSize();
-      RETURN_IF_ERROR(raw_input.PrependData(
-          &raw_input_size_, sizeof(uint32_t), TRITONSERVER_MEMORY_CPU, 0));
-      // Limit the BYTE raw input not to have host policy specific input for
-      // simplicity, such case won't happen given the current protocol spec.
-      // Will need to extend Input::PrependData() if needed.
-      if (!raw_input.HostPolicyData().empty()) {
-        return Status(
-            Status::Code::INVALID_ARG, LogRequest() +
-                                           "Raw input with data associated "
-                                           "with a host policy setting is not "
-                                           "currently supported");
-      }
-    } else if (dynamic_axis != -1) {
-      shape[dynamic_axis] =
-          raw_input.Data()->TotalByteSize() / element_cnt /
-          triton::common::GetDataTypeByteSize(config_input.data_type());
-    }
-    raw_input.SetMetadata(config_input.name(), config_input.data_type(), shape);
-  }
-
-  // Initialize the requested outputs to be used during inference. If
-  // original_requested_outputs_ is empty assume all outputs specified
-  // in model config are being requested.
-  requested_outputs_.clear();
-  if (original_requested_outputs_.size() == 0) {
-    for (const auto& output : model_config.output()) {
-      requested_outputs_.insert(output.name());
-    }
-  } else {
-    // Validate if the original requested output name exists in the
-    // model configuration.
-    for (const auto& output_name : original_requested_outputs_) {
-      const inference::ModelOutput* output_config;
-      RETURN_IF_ERROR(model_raw_->GetOutput(output_name, &output_config));
-    }
-  }
-  // Make sure that the request is providing the number of inputs
-  // as is expected by the model.
-  if ((original_inputs_.size() > (size_t)model_config.input_size()) ||
-      (original_inputs_.size() < model_raw_->RequiredInputCount())) {
-    // If no input is marked as optional, then use exact match error message
-    // for consistency / backward compatibility
-    if ((size_t)model_config.input_size() == model_raw_->RequiredInputCount()) {
-      return Status(
-          Status::Code::INVALID_ARG,
-          LogRequest() + "expected " +
-              std::to_string(model_config.input_size()) + " inputs but got " +
-              std::to_string(original_inputs_.size()) + " inputs for model '" +
-              ModelName() + "'");
-    } else {
-      return Status(
-          Status::Code::INVALID_ARG,
-          LogRequest() + "expected number of inputs between " +
-              std::to_string(model_raw_->RequiredInputCount()) + " and " +
-              std::to_string(model_config.input_size()) + " but got " +
-              std::to_string(original_inputs_.size()) + " inputs for model '" +
-              ModelName() + "'");
-    }
-  }
-
-  // Determine the batch size and shape of each input.
-  if (model_config.max_batch_size() == 0) {
-    // Model does not support Triton-style batching so set as
-    // batch-size 0 and leave the tensor shapes as they are.
-    batch_size_ = 0;
-    for (auto& pr : original_inputs_) {
-      auto& input = pr.second;
-      *input.MutableShape() = input.OriginalShape();
-    }
-  } else {
-    // Model does support Triton-style batching so each input tensor
-    // must have the same first dimension which is the batch
-    // size. Adjust the shape of the input tensors to remove the batch
-    // dimension.
-    batch_size_ = 0;
-    for (auto& pr : original_inputs_) {
-      auto& input = pr.second;
-
-      // For a shape tensor, keep the tensor's shape as it is and mark
-      // that the input is a shape tensor.
-      const inference::ModelInput* input_config;
-      RETURN_IF_ERROR(model_raw_->GetInput(input.Name(), &input_config));
-      if (input_config->is_shape_tensor()) {
-        *input.MutableShape() = input.OriginalShape();
-        input.SetIsShapeTensor(true);
-        continue;
-      }
-
-      if (input.OriginalShape().size() == 0) {
-        return Status(
-            Status::Code::INVALID_ARG,
-            LogRequest() + "input '" + input.Name() +
-                "' has no shape but model requires batch dimension for '" +
-                ModelName() + "'");
-      }
-
-      if (batch_size_ == 0) {
-        batch_size_ = input.OriginalShape()[0];
-      } else if (input.OriginalShape()[0] != batch_size_) {
-        return Status(
-            Status::Code::INVALID_ARG,
-            LogRequest() + "input '" + input.Name() +
-                "' batch size does not match other inputs for '" + ModelName() +
-                "'");
-      }
-
-      input.MutableShape()->assign(
-          input.OriginalShape().begin() + 1, input.OriginalShape().end());
-    }
-  }
-
-  // Make sure request batch-size doesn't exceed what is supported by
-  // the model.
-  if ((int)batch_size_ > model_config.max_batch_size()) {
-    return Status(
-        Status::Code::INVALID_ARG,
-        LogRequest() + "inference request batch-size must be <= " +
-            std::to_string(model_config.max_batch_size()) + " for '" +
-            ModelName() + "'");
-  }
-
-  // Verify that each input shape is valid for the model, make
-  // adjustments for reshapes and find the total tensor size.
-  for (auto& pr : original_inputs_) {
-    const inference::ModelInput* input_config;
-    RETURN_IF_ERROR(model_raw_->GetInput(pr.second.Name(), &input_config));
-
-    auto& input = pr.second;
-    auto shape = input.MutableShape();
-
-    if (input.DType() != input_config->data_type()) {
-      return Status(
-          Status::Code::INVALID_ARG,
-          LogRequest() + "inference input data-type is '" +
-              std::string(
-                  triton::common::DataTypeToProtocolString(input.DType())) +
-              "', model expects '" +
-              std::string(triton::common::DataTypeToProtocolString(
-                  input_config->data_type())) +
-              "' for '" + ModelName() + "'");
-    }
-
-    // Validate input shape
-    {
-      bool match_config = true;
-      const auto& config_dims = input_config->dims();
-      const auto& input_dims = *shape;
-      if (config_dims.size() != (int64_t)input_dims.size()) {
-        match_config = false;
-      } else {
-        for (int i = 0; i < config_dims.size(); ++i) {
-          if (input_dims[i] == triton::common::WILDCARD_DIM) {
-            return Status(
-                Status::Code::INVALID_ARG,
-                LogRequest() +
-                    "All input dimensions should be specified for input '" +
-                    pr.first + "' for model '" + ModelName() + "', got " +
-                    triton::common::DimsListToString(input.OriginalShape()));
-          } else if (
-              (config_dims[i] != triton::common::WILDCARD_DIM) &&
-              (config_dims[i] != input_dims[i])) {
-            match_config = false;
-            break;
-          }
-        }
-      }
-
-      if (!match_config) {
-        triton::common::DimsList full_dims;
-        if (model_config.max_batch_size() > 0) {
-          full_dims.Add(triton::common::WILDCARD_DIM);
-        }
-        for (int i = 0; i < input_config->dims_size(); ++i) {
-          full_dims.Add(input_config->dims(i));
-        }
-        return Status(
-            Status::Code::INVALID_ARG,
-            LogRequest() + "unexpected shape for input '" + pr.first +
-                "' for model '" + ModelName() + "'. Expected " +
-                triton::common::DimsListToString(full_dims) + ", got " +
-                triton::common::DimsListToString(input.OriginalShape()));
-      }
-    }
-
-    // If there is a reshape for this input then adjust them to
-    // match the reshape. As reshape may have variable-size
-    // dimensions, we need to record corresponding value so that we
-    // can set the value correctly for reshape.
-    if (input_config->has_reshape()) {
-      std::deque<int64_t> variable_size_values;
-      for (int64_t idx = 0; idx < input_config->dims_size(); idx++) {
-        if (input_config->dims(idx) == -1) {
-          variable_size_values.push_back((*shape)[idx]);
-        }
-      }
-
-      shape->clear();
-      for (const auto& dim : input_config->reshape().shape()) {
-        if (dim == -1) {
-          shape->push_back(variable_size_values.front());
-          variable_size_values.pop_front();
-        } else {
-          shape->push_back(dim);
-        }
-      }
-    }
-
-    // Create shape with batch dimension.
-    // FIXME, should not need this!!
-    if (batch_size_ == 0) {
-      *input.MutableShapeWithBatchDim() = *shape;
-    } else {
-      input.MutableShapeWithBatchDim()->clear();
-      input.MutableShapeWithBatchDim()->push_back(batch_size_);
-      for (int64_t d : *shape) {
-        input.MutableShapeWithBatchDim()->push_back(d);
-      }
-    }
-  }
-
-  return Status::Success;
-}
-
-#ifdef TRITON_ENABLE_STATS
-void
-InferenceRequest::ReportStatistics(
-    MetricModelReporter* metric_reporter, bool success,
-    const uint64_t compute_start_ns, const uint64_t compute_input_end_ns,
-    const uint64_t compute_output_start_ns, const uint64_t compute_end_ns)
-{
-  if (!collect_stats_) {
-    return;
-  }
-
-#ifdef TRITON_ENABLE_TRACING
-  if (trace_ != nullptr) {
-    trace_->Report(TRITONSERVER_TRACE_COMPUTE_START, compute_start_ns);
-    trace_->Report(TRITONSERVER_TRACE_COMPUTE_INPUT_END, compute_input_end_ns);
-    trace_->Report(
-        TRITONSERVER_TRACE_COMPUTE_OUTPUT_START, compute_output_start_ns);
-    trace_->Report(TRITONSERVER_TRACE_COMPUTE_END, compute_end_ns);
-  }
-#endif  // TRITON_ENABLE_TRACING
-
-  INFER_STATS_DECL_TIMESTAMP(request_end_ns);
-
-  if (success) {
-    model_raw_->MutableStatsAggregator()->UpdateSuccess(
-        metric_reporter, std::max(1U, batch_size_), request_start_ns_,
-        queue_start_ns_, compute_start_ns, compute_input_end_ns,
-        compute_output_start_ns, compute_end_ns, request_end_ns);
-    if (secondary_stats_aggregator_ != nullptr) {
-      secondary_stats_aggregator_->UpdateSuccess(
-          nullptr /* metric_reporter */, std::max(1U, batch_size_),
-          request_start_ns_, queue_start_ns_, compute_start_ns,
-          compute_input_end_ns, compute_output_start_ns, compute_end_ns,
-          request_end_ns);
-    }
-  } else {
-    model_raw_->MutableStatsAggregator()->UpdateFailure(
-        metric_reporter, request_start_ns_, request_end_ns);
-    if (secondary_stats_aggregator_ != nullptr) {
-      secondary_stats_aggregator_->UpdateFailure(
-          nullptr /* metric_reporter */, request_start_ns_, request_end_ns);
-    }
-  }
-}
-
-void
-InferenceRequest::ReportStatisticsWithDuration(
-    MetricModelReporter* metric_reporter, bool success,
-    const uint64_t compute_start_ns, const uint64_t compute_input_duration_ns,
-    const uint64_t compute_infer_duration_ns,
-    const uint64_t compute_output_duration_ns)
-{
-  if (!collect_stats_) {
-    return;
-  }
-
-  INFER_STATS_DECL_TIMESTAMP(request_end_ns);
-
-  if (success) {
-    model_raw_->MutableStatsAggregator()->UpdateSuccessWithDuration(
-        metric_reporter, std::max(1U, batch_size_), request_start_ns_,
-        queue_start_ns_, compute_start_ns, request_end_ns,
-        compute_input_duration_ns, compute_infer_duration_ns,
-        compute_output_duration_ns);
-    if (secondary_stats_aggregator_ != nullptr) {
-      secondary_stats_aggregator_->UpdateSuccessWithDuration(
-          nullptr /* metric_reporter */, std::max(1U, batch_size_),
-          request_start_ns_, queue_start_ns_, compute_start_ns, request_end_ns,
-          compute_input_duration_ns, compute_infer_duration_ns,
-          compute_output_duration_ns);
-    }
-  } else {
-    model_raw_->MutableStatsAggregator()->UpdateFailure(
-        metric_reporter, request_start_ns_, request_end_ns);
-    if (secondary_stats_aggregator_ != nullptr) {
-      secondary_stats_aggregator_->UpdateFailure(
-          nullptr /* metric_reporter */, request_start_ns_, request_end_ns);
-    }
-  }
-}
-
-void
-InferenceRequest::ReportStatisticsCacheHit(MetricModelReporter* metric_reporter)
-{
-  // Capture end of request time
-  INFER_STATS_DECL_TIMESTAMP(request_end_ns);
-
-  if (cache_lookup_start_ns_ >= cache_lookup_end_ns_) {
-    LOG_WARNING << LogRequest()
-                << "Cache lookup timestamps were not set correctly. Cache "
-                   "lookup duration stats may be incorrect.";
-  }
-  const uint64_t cache_lookup_duration_ns =
-      cache_lookup_end_ns_ - cache_lookup_start_ns_;
-
-  // Cache hit is always success
-  model_raw_->MutableStatsAggregator()->UpdateSuccessCacheHit(
-      metric_reporter, std::max(1U, batch_size_), request_start_ns_,
-      queue_start_ns_, cache_lookup_start_ns_, request_end_ns,
-      cache_lookup_duration_ns);
-  if (secondary_stats_aggregator_ != nullptr) {
-    secondary_stats_aggregator_->UpdateSuccessCacheHit(
-        nullptr /* metric_reporter */, std::max(1U, batch_size_),
-        request_start_ns_, queue_start_ns_, cache_lookup_start_ns_,
-        request_end_ns, cache_lookup_duration_ns);
-  }
-}
-
-void
-InferenceRequest::ReportStatisticsCacheMiss(
-    MetricModelReporter* metric_reporter)
-{
-  if (cache_lookup_start_ns_ >= cache_lookup_end_ns_) {
-    LOG_WARNING << LogRequest()
-                << "Cache lookup timestamps were not set correctly. Cache "
-                   "lookup duration stats may be incorrect.";
-  }
-  if (cache_insertion_start_ns_ >= cache_insertion_end_ns_) {
-    LOG_WARNING << LogRequest()
-                << "Cache insertion timestamps were not set correctly. Cache "
-                   "insertion duration stats may be incorrect.";
-  }
-
-  const uint64_t cache_lookup_duration_ns =
-      cache_lookup_end_ns_ - cache_lookup_start_ns_;
-
-  const uint64_t cache_insertion_duration_ns =
-      cache_insertion_end_ns_ - cache_insertion_start_ns_;
-
-  model_raw_->MutableStatsAggregator()->UpdateSuccessCacheMiss(
-      metric_reporter, cache_lookup_duration_ns, cache_insertion_duration_ns);
-  if (secondary_stats_aggregator_ != nullptr) {
-    secondary_stats_aggregator_->UpdateSuccessCacheMiss(
-        nullptr /* metric_reporter */, cache_lookup_duration_ns,
-        cache_insertion_duration_ns);
-  }
-}
-#endif  // TRITON_ENABLE_STATS
-
-//
-// Input
-//
-InferenceRequest::Input::Input()
-    : is_shape_tensor_(false), data_(new MemoryReference),
-      has_host_policy_specific_data_(false)
-{
-}
-
-InferenceRequest::Input::Input(
-    const std::string& name, const inference::DataType datatype,
-    const int64_t* shape, const uint64_t dim_count)
-    : name_(name), datatype_(datatype),
-      original_shape_(shape, shape + dim_count), is_shape_tensor_(false),
-      data_(new MemoryReference), has_host_policy_specific_data_(false)
-{
-}
-
-InferenceRequest::Input::Input(
-    const std::string& name, const inference::DataType datatype,
-    const std::vector<int64_t>& shape)
-    : name_(name), datatype_(datatype), original_shape_(shape),
-      is_shape_tensor_(false), data_(new MemoryReference),
-      has_host_policy_specific_data_(false)
-{
-}
-
-void
-InferenceRequest::Input::SetMetadata(
-    const std::string& name, const inference::DataType& dt,
-    const std::vector<int64_t>& shape)
-{
-  name_ = name;
-  datatype_ = dt;
-  original_shape_ = shape;
-}
-
-Status
-InferenceRequest::Input::SetIsShapeTensor(const bool is_shape_tensor)
-{
-  is_shape_tensor_ = is_shape_tensor;
-  return Status::Success;
-}
-
-const std::shared_ptr<Memory>&
-InferenceRequest::Input::Data(const std::string& host_policy_name) const
-{
-  auto device_data = host_policy_data_map_.find(host_policy_name);
-  if (device_data == host_policy_data_map_.end()) {
-    // Fall back on default data if there is no data that has been added for
-    // this host policy
-    return data_;
-  }
-  return device_data->second;
-}
-
-Status
-InferenceRequest::Input::AppendData(
-    const void* base, size_t byte_size, TRITONSERVER_MemoryType memory_type,
-    int64_t memory_type_id)
-{
-  if (byte_size > 0) {
-    std::static_pointer_cast<MemoryReference>(data_)->AddBuffer(
-        static_cast<const char*>(base), byte_size, memory_type, memory_type_id);
-  }
-
-  return Status::Success;
-}
-
-Status
-InferenceRequest::Input::AppendDataWithBufferAttributes(
-    const void* base, BufferAttributes* buffer_attributes)
-{
-  if (buffer_attributes->ByteSize() > 0) {
-    std::static_pointer_cast<MemoryReference>(data_)->AddBuffer(
-        static_cast<const char*>(base), buffer_attributes);
-  }
-  return Status::Success;
-}
-
-Status
-InferenceRequest::Input::AppendDataWithHostPolicy(
-    const void* base, size_t byte_size, TRITONSERVER_MemoryType memory_type,
-    int64_t memory_type_id, const char* host_policy_name)
-{
-  auto device_data = host_policy_data_map_.find(host_policy_name);
-  has_host_policy_specific_data_ = true;
-  if (device_data == host_policy_data_map_.end()) {
-    auto insert_pair = host_policy_data_map_.insert(
-        std::make_pair(std::string(host_policy_name), new MemoryReference));
-    device_data = insert_pair.first;
-  }
-  if (byte_size > 0) {
-    std::static_pointer_cast<MemoryReference>(device_data->second)
-        ->AddBuffer(
-            static_cast<const char*>(base), byte_size, memory_type,
-            memory_type_id);
-  }
-
-  return Status::Success;
-}
-
-Status
-InferenceRequest::Input::PrependData(
-    const void* base, size_t byte_size, TRITONSERVER_MemoryType memory_type,
-    int64_t memory_type_id)
-{
-  if (byte_size > 0) {
-    std::static_pointer_cast<MemoryReference>(data_)->AddBufferFront(
-        static_cast<const char*>(base), byte_size, memory_type, memory_type_id);
-  }
-
-  return Status::Success;
-}
-
-Status
-InferenceRequest::Input::SetData(const std::shared_ptr<Memory>& data)
-{
-  if (data_->TotalByteSize() != 0) {
-    return Status(
-        Status::Code::INVALID_ARG,
-        "input '" + name_ + "' already has data, can't overwrite");
-  }
-
-  data_ = data;
-
-  return Status::Success;
-}
-
-Status
-InferenceRequest::Input::SetData(
-    const std::string& host_policy_name, const std::shared_ptr<Memory>& data)
-{
-  if (host_policy_data_map_.find(host_policy_name) !=
-      host_policy_data_map_.end()) {
-    return Status(
-        Status::Code::INVALID_ARG, "input '" + name_ +
-                                       "' already has data for host policy '" +
-                                       host_policy_name + "', can't overwrite");
-  }
-
-  host_policy_data_map_.emplace(host_policy_name, data);
-
-  return Status::Success;
-}
-
-Status
-InferenceRequest::Input::RemoveAllData()
-{
-  data_ = std::make_shared<MemoryReference>();
-  host_policy_data_map_.clear();
-  has_host_policy_specific_data_ = false;
-  return Status::Success;
-}
-
-Status
-InferenceRequest::Input::DataBuffer(
-    const size_t idx, const void** base, size_t* byte_size,
-    TRITONSERVER_MemoryType* memory_type, int64_t* memory_type_id) const
-{
-  *base = data_->BufferAt(idx, byte_size, memory_type, memory_type_id);
-
-  return Status::Success;
-}
-
-Status
-InferenceRequest::Input::DataBufferAttributes(
-    const size_t idx, const void** base,
-    BufferAttributes** buffer_attributes) const
-{
-  *base = data_->BufferAt(idx, buffer_attributes);
-
-  return Status::Success;
-}
-
-Status
-InferenceRequest::Input::DataBufferForHostPolicy(
-    const size_t idx, const void** base, size_t* byte_size,
-    TRITONSERVER_MemoryType* memory_type, int64_t* memory_type_id,
-    const std::string& host_policy_name) const
-{
-  auto device_data = host_policy_data_map_.find(host_policy_name);
-  if (device_data == host_policy_data_map_.end()) {
-    // Return data buffer if there is no host-policy specific buffer available
-    *base = data_->BufferAt(idx, byte_size, memory_type, memory_type_id);
-  } else {
-    *base = device_data->second->BufferAt(
-        idx, byte_size, memory_type, memory_type_id);
-  }
-
-  return Status::Success;
-}
-
-size_t
-InferenceRequest::Input::DataBufferCountForHostPolicy(
-    const std::string& host_policy_name) const
-{
-  auto policy_data = host_policy_data_map_.find(host_policy_name);
-  if (policy_data != host_policy_data_map_.end()) {
-    return policy_data->second->BufferCount();
-  }
-  return data_->BufferCount();
-}
-
-InferenceRequest::SequenceId::SequenceId()
-    : sequence_label_(""), sequence_index_(0),
-      id_type_(InferenceRequest::SequenceId::DataType::UINT64)
-{
-}
-
-InferenceRequest::SequenceId::SequenceId(const std::string& sequence_label)
-    : sequence_label_(sequence_label), sequence_index_(0),
-      id_type_(InferenceRequest::SequenceId::DataType::STRING)
-{
-}
-
-InferenceRequest::SequenceId::SequenceId(uint64_t sequence_index)
-    : sequence_label_(""), sequence_index_(sequence_index),
-      id_type_(InferenceRequest::SequenceId::DataType::UINT64)
-{
-}
-
-InferenceRequest::SequenceId&
-InferenceRequest::SequenceId::operator=(const std::string& rhs)
-{
-  sequence_label_ = rhs;
-  sequence_index_ = 0;
-  id_type_ = InferenceRequest::SequenceId::DataType::STRING;
-  return *this;
-}
-
-InferenceRequest::SequenceId&
-InferenceRequest::SequenceId::operator=(const uint64_t rhs)
-{
-  sequence_label_ = "";
-  sequence_index_ = rhs;
-  id_type_ = InferenceRequest::SequenceId::DataType::UINT64;
-  return *this;
-}
-
-std::ostream&
-operator<<(std::ostream& out, const InferenceRequest& request)
-{
-  out << "[0x" << std::addressof(request) << "] "
-      << "request id: " << request.Id() << ", model: " << request.ModelName()
-      << ", requested version: " << request.RequestedModelVersion()
-      << ", actual version: " << request.ActualModelVersion() << ", flags: 0x"
-      << std::hex << request.Flags() << std::dec
-      << ", correlation id: " << request.CorrelationId()
-      << ", batch size: " << request.BatchSize()
-      << ", priority: " << request.Priority()
-      << ", timeout (us): " << request.TimeoutMicroseconds() << std::endl;
-
-  out << "original inputs:" << std::endl;
-  for (const auto& itr : request.OriginalInputs()) {
-    out << "[0x" << std::addressof(itr.second) << "] " << itr.second
-        << std::endl;
-  }
-
-  out << "override inputs:" << std::endl;
-  for (const auto& itr : request.OverrideInputs()) {
-    out << "[0x" << itr.second.get() << "] " << *itr.second << std::endl;
-  }
-
-  out << "inputs:" << std::endl;
-  for (const auto& itr : request.ImmutableInputs()) {
-    out << "[0x" << itr.second << "] " << *itr.second << std::endl;
-  }
-
-  out << "original requested outputs:" << std::endl;
-  for (const auto& name : request.OriginalRequestedOutputs()) {
-    out << name << std::endl;
-  }
-
-  out << "requested outputs:" << std::endl;
-  for (const auto& name : request.ImmutableRequestedOutputs()) {
-    out << name << std::endl;
-  }
-
-  return out;
-}
-
-std::ostream&
-operator<<(std::ostream& out, const InferenceRequest::Input& input)
-{
-  out << "input: " << input.Name()
-      << ", type: " << triton::common::DataTypeToProtocolString(input.DType())
-      << ", original shape: "
-      << triton::common::DimsListToString(input.OriginalShape())
-      << ", batch + shape: "
-      << triton::common::DimsListToString(input.ShapeWithBatchDim())
-      << ", shape: " << triton::common::DimsListToString(input.Shape());
-  if (input.IsShapeTensor()) {
-    out << ", is_shape_tensor: True";
-  }
-  return out;
-}
-
-std::ostream&
-operator<<(std::ostream& out, const InferenceRequest::SequenceId& sequence_id)
-{
-  switch (sequence_id.Type()) {
-    case InferenceRequest::SequenceId::DataType::STRING:
-      out << sequence_id.StringValue();
-      break;
-    case InferenceRequest::SequenceId::DataType::UINT64:
-      out << sequence_id.UnsignedIntValue();
-      break;
-    default:
-      out << sequence_id.UnsignedIntValue();
-      break;
-  }
-  return out;
-}
-
-bool
-operator==(
-    const InferenceRequest::SequenceId lhs,
-    const InferenceRequest::SequenceId rhs)
-{
-  if (lhs.Type() == rhs.Type()) {
-    switch (lhs.Type()) {
-      case InferenceRequest::SequenceId::DataType::STRING:
-        return lhs.StringValue() == rhs.StringValue();
-      case InferenceRequest::SequenceId::DataType::UINT64:
-        return lhs.UnsignedIntValue() == rhs.UnsignedIntValue();
-      default:
-        return lhs.UnsignedIntValue() == rhs.UnsignedIntValue();
-    }
-  } else {
-    return false;
-  }
-}
-
-bool
-operator!=(
-    const InferenceRequest::SequenceId lhs,
-    const InferenceRequest::SequenceId rhs)
-{
-  return !(lhs == rhs);
-}
-
-}}  // namespace triton::core
diff --git a/3rdparty/core-r22.12/src/infer_request.h b/3rdparty/core-r22.12/src/infer_request.h
deleted file mode 100644
index 0dba6aa45ab255bf39c5fbbfc5a0d0d5ac3ea659..0000000000000000000000000000000000000000
--- a/3rdparty/core-r22.12/src/infer_request.h
+++ /dev/null
@@ -1,800 +0,0 @@
-// Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include <functional>
-#include <string>
-#include <unordered_map>
-#include <vector>
-#include "buffer_attributes.h"
-#include "infer_response.h"
-#include "infer_stats.h"
-#include "infer_trace.h"
-#include "memory.h"
-#include "response_allocator.h"
-#include "sequence_state.h"
-#include "status.h"
-#include "triton/common/model_config.h"
-#include "tritonserver_apis.h"
-
-namespace triton { namespace core {
-
-class Model;
-class InferenceServer;
-class MetricModelReporter;
-
-//
-// An inference request. A request can be used multiple times for
-// inference but before each inference run, PrepareForInference() must
-// be called to verify and prepare the request. Verification involves
-// ensuring that any changes made since the last inference are
-// valid. Preparing involves removing/resetting any state left over
-// from the previous inference.
-//
-class InferenceRequest {
- public:
-  // Input tensor
-  class Input {
-   public:
-    Input();
-    Input(
-        const std::string& name, const inference::DataType datatype,
-        const std::vector<int64_t>& shape);
-    Input(
-        const std::string& name, const inference::DataType datatype,
-        const int64_t* shape, const uint64_t dim_count);
-
-    // Set the name, data type and original shape of the input tensor.
-    void SetMetadata(
-        const std::string& name, const inference::DataType& dt,
-        const std::vector<int64_t>& shape);
-
-    // The name of the input tensor. There is no mutable operator for
-    // the name because it is used in a InferenceRequest map and a
-    // mutable method would allow it to get out-of-sync.
-    const std::string& Name() const { return name_; }
-
-    // Data type of the input tensor.
-    inference::DataType DType() const { return datatype_; }
-
-    // The original shape of the input tensor.
-    const std::vector<int64_t>& OriginalShape() const
-    {
-      return original_shape_;
-    }
-
-    // The shape of the input tensor after normalization. This shape
-    // is the original shape modified as required/expected by
-    // inference processing.
-    const std::vector<int64_t>& Shape() const { return shape_; }
-    std::vector<int64_t>* MutableShape() { return &shape_; }
-
-    // FIXME. Should not need these functions. All shapes kept here
-    // should include the batch dimension instead of breaking the same
-    // into batch + shape.
-    const std::vector<int64_t>& ShapeWithBatchDim() const
-    {
-      return shape_with_batch_dim_;
-    }
-    std::vector<int64_t>* MutableShapeWithBatchDim()
-    {
-      return &shape_with_batch_dim_;
-    }
-
-    // Return true if host-specific data was added for this input
-    bool HasHostPolicySpecificData() const
-    {
-      return has_host_policy_specific_data_;
-    }
-
-    // Whether or not the input is a tensorrt shape tensor
-    bool IsShapeTensor() const { return is_shape_tensor_; }
-
-    // Set the input to be treated as a shape tensor.
-    Status SetIsShapeTensor(const bool is_shape_tensor);
-
-    // The data for this input.
-    const std::shared_ptr<Memory>& Data() const { return data_; }
-
-    // The data for this input for a specific device
-    const std::shared_ptr<Memory>& Data(
-        const std::string& host_policy_name) const;
-
-    // Return all host policy data set for this input
-    const std::map<std::string, std::shared_ptr<Memory>>& HostPolicyData() const
-    {
-      return host_policy_data_map_;
-    }
-
-    // Set the data for this input. Error if input already has some
-    // data.
-    Status SetData(const std::shared_ptr<Memory>& data);
-
-    // Set the data associated with the host policy for this input.
-    // Return error if input already has some data.
-    Status SetData(
-        const std::string& host_policy_name,
-        const std::shared_ptr<Memory>& data);
-
-    // Append a new buffer of data to this input.
-    Status AppendData(
-        const void* base, size_t byte_size, TRITONSERVER_MemoryType memory_type,
-        int64_t memory_type_id);
-
-    Status AppendDataWithHostPolicy(
-        const void* base, size_t byte_size, TRITONSERVER_MemoryType memory_type,
-        int64_t memory_type_id, const char* host_policy_name);
-
-    Status AppendDataWithBufferAttributes(
-        const void* base, BufferAttributes* buffer_attributes);
-
-    // Prepend a new buffer of data to this input.
-    Status PrependData(
-        const void* base, size_t byte_size, TRITONSERVER_MemoryType memory_type,
-        int64_t memory_type_id);
-
-    // Remove all existing data for the input.
-    Status RemoveAllData();
-
-    // Get the number of buffers containing the input tensor data.
-    size_t DataBufferCount() const { return data_->BufferCount(); }
-
-    // Get the number of buffers containing the input tensor data with
-    // host policy. If there are no buffers corresponding to the specific
-    // host policy, the number of buffers in the fallback input data is
-    // returned.
-    size_t DataBufferCountForHostPolicy(
-        const std::string& host_policy_name) const;
-
-    // Get the 'idx' buffer containing a contiguous chunk of bytes for
-    // the input. Return error is 'idx' refers to a buffer that does
-    // not exist. Return a pointer to the chunk in 'base' and the
-    // size of the chunk in 'byte_size'. 'memory_type' acts as
-    // both input and output. On input 'memory_type' is the buffer
-    // memory type preferred by the function caller. On return
-    // 'memory_type' gives the actual memory type of the chunk pointed
-    // to by 'base'.  'memory_type_id' acts as both input and
-    // output. On input 'memory_type_id' is the buffer memory type id
-    // preferred by the function caller.  On return 'memory_type_id'
-    // gives the actual memory type id of the chunk pointed to by
-    // 'base'.
-    Status DataBuffer(
-        const size_t idx, const void** base, size_t* byte_size,
-        TRITONSERVER_MemoryType* memory_type, int64_t* memory_type_id) const;
-
-    // Get the buffer attributes associated with 'idx' buffer.
-    Status DataBufferAttributes(
-        const size_t idx, const void** base,
-        BufferAttributes** buffer_attributes) const;
-
-    // Get the 'idx' buffer containing a contiguous chunk of bytes for
-    // the input. Return error is 'idx' refers to a buffer that does
-    // not exist. Return a pointer to the chunk in 'base' and the
-    // size of the chunk in 'byte_size'. 'memory_type' acts as
-    // both input and output. On input 'memory_type' is the buffer
-    // memory type preferred by the function caller. On return
-    // 'memory_type' gives the actual memory type of the chunk pointed
-    // to by 'base'.  'memory_type_id' acts as both input and
-    // output. On input 'memory_type_id' is the buffer memory type id
-    // preferred by the function caller.  On return 'memory_type_id'
-    // gives the actual memory type id of the chunk pointed to by
-    // 'base'.
-    Status DataBufferForHostPolicy(
-        const size_t idx, const void** base, size_t* byte_size,
-        TRITONSERVER_MemoryType* memory_type, int64_t* memory_type_id,
-        const std::string& host_policy_name) const;
-
-   private:
-    DISALLOW_COPY_AND_ASSIGN(Input);
-    friend std::ostream& operator<<(
-        std::ostream& out, const InferenceRequest::Input& input);
-
-    std::string name_;
-    inference::DataType datatype_;
-    std::vector<int64_t> original_shape_;
-    std::vector<int64_t> shape_;
-    std::vector<int64_t> shape_with_batch_dim_;
-    bool is_shape_tensor_;
-    std::shared_ptr<Memory> data_;
-
-    bool has_host_policy_specific_data_;
-    // A map of host policy to input data memory
-    std::map<std::string, std::shared_ptr<Memory>> host_policy_data_map_;
-  };
-
-  // Sequence ID can be either a 64 bit integer or a string.
-  // This class implements the SequenceId type
-  class SequenceId {
-   public:
-    enum class DataType { UINT64, STRING };
-
-    SequenceId();
-    SequenceId(const std::string& sequence_label);
-    SequenceId(uint64_t sequence_index);
-    SequenceId& operator=(const SequenceId& rhs) = default;
-    SequenceId& operator=(const std::string& rhs);
-    SequenceId& operator=(const uint64_t rhs);
-
-    // Functions that help determine exact type of sequence Id
-    DataType Type() const { return id_type_; }
-    bool InSequence() const
-    {
-      return ((sequence_label_ != "") || (sequence_index_ != 0));
-    }
-
-    // Get the value of the SequenceId based on the type
-    const std::string& StringValue() const { return sequence_label_; }
-    uint64_t UnsignedIntValue() const { return sequence_index_; }
-
-   private:
-    friend std::ostream& operator<<(
-        std::ostream& out, const InferenceRequest::SequenceId& correlation_id);
-    friend bool operator==(const SequenceId lhs, const SequenceId rhs);
-    friend bool operator!=(const SequenceId lhs, const SequenceId rhs);
-
-    std::string sequence_label_;
-    uint64_t sequence_index_;
-    DataType id_type_;
-  };
-
-  // InferenceRequest
-  //
-  // The two constructors are identical except one takes model as a
-  // shared pointer and the other as a raw pointer. The shared pointer
-  // version is the primary one and acts to keep the model alive as
-  // long as the request is in flight. The raw pointer version is used
-  // only for cases where the model itself is issuing a request
-  // (e.g. warmup) and no shared pointer version of the model exists
-  // (because we aren't using shared_from_this).
-  InferenceRequest(
-      const std::shared_ptr<Model>& model,
-      const int64_t requested_model_version);
-
-  InferenceRequest(Model* model, const int64_t requested_model_version);
-
-  const std::string& ModelName() const;
-  int64_t RequestedModelVersion() const { return requested_model_version_; }
-  int64_t ActualModelVersion() const;
-
-  const std::string& Id() const { return id_; }
-  void SetId(const std::string& i) { id_ = i; }
-  // Return string for logging request ID
-  std::string LogRequest() const
-  {
-    std::string id = Id();
-    if (id.empty()) {
-      id = "<id_unknown>";
-    }
-    return std::string("[request id: ") + id + "] ";
-  }
-
-  // Flags for the request, union of TRITONSERVER_RequestFlag.
-  uint32_t Flags() const { return flags_; }
-  void SetFlags(uint32_t f) { flags_ = f; }
-
-  const SequenceId& CorrelationId() const { return correlation_id_; }
-  void SetCorrelationId(const SequenceId& c) { correlation_id_ = c; }
-
-  // The batch size of the request, as understood by Triton. A
-  // batch-size of 0 indicates that the model doesn't support batching
-  // in a way that Triton understands.  Batch size is not set
-  // explicitly so there is no setter for it. It is set when the
-  // request is normalized.
-  uint32_t BatchSize() const { return batch_size_; }
-
-  uint32_t Priority() const { return priority_; }
-  void SetPriority(uint32_t p);
-
-  uint64_t TimeoutMicroseconds() const { return timeout_us_; }
-  void SetTimeoutMicroseconds(uint64_t t) { timeout_us_ = t; }
-
-  uint64_t CacheKey() const { return cache_key_; }
-  // It is up to the user to update the cache_key_ if modifying any hashable
-  // fields of the request after cache_key_is_set_ has been set to true.
-  void SetCacheKey(uint64_t key)
-  {
-    cache_key_ = key;
-    cache_key_is_set_ = true;
-  }
-  bool CacheKeyIsSet() const { return cache_key_is_set_; }
-
-#ifdef TRITON_ENABLE_TRACING
-  const std::shared_ptr<InferenceTraceProxy>& Trace() const { return trace_; }
-  std::shared_ptr<InferenceTraceProxy>* MutableTrace() { return &trace_; }
-  void SetTrace(const std::shared_ptr<InferenceTraceProxy>& trace)
-  {
-    trace_ = trace;
-    response_factory_->SetTrace(trace);
-  }
-  void ReleaseTrace()
-  {
-    trace_ = nullptr;
-    response_factory_->ReleaseTrace();
-  }
-
-  Status TraceInputTensors(
-      TRITONSERVER_InferenceTraceActivity activity, const std::string& msg);
-#endif  // TRITON_ENABLE_TRACING
-
-  // The original inputs are the inputs added to the request before
-  // the inference execution (that is before
-  // TRITONSERVER_ServerInferAsync is called). Once execution has
-  // started the original inputs should not be modified until
-  // execution completes (and those modifications will apply to the
-  // next inference execution).
-  Status MutableOriginalInput(const std::string& name, Input** input);
-  std::unordered_map<std::string, Input>* MutableOriginalInputs()
-  {
-    return &original_inputs_;
-  }
-  const std::unordered_map<std::string, Input>& OriginalInputs() const
-  {
-    return original_inputs_;
-  }
-
-  // The override inputs are the inputs added to the request after
-  // inference execution has started (that is after
-  // TRITONSERVER_ServerInferAsync or equivalent is called). During
-  // inference processing, if Triton needs to change an original input
-  // it will add an override instead of changing the original. Triton
-  // will also use an override if it needs to add a new input to the
-  // request. Overrides are recorded as shared_ptr so that the same
-  // override can be used efficiently multiple times or even in
-  // multiple requests simultaneously. Must be careful not to modify
-  // an override input if it is being shared unless you want that
-  // change to be reflected in all requests that hold that override
-  // input. Override inputs within a specific request are not
-  // persisted across inference calls.
-  std::unordered_map<std::string, std::shared_ptr<Input>>*
-  MutableOverrideInputs()
-  {
-    return &override_inputs_;
-  }
-  const std::unordered_map<std::string, std::shared_ptr<Input>>&
-  OverrideInputs() const
-  {
-    return override_inputs_;
-  }
-
-  // Get an input taking into account both original inputs and
-  // overrides. If an override input is available use it, otherwise
-  // use the original input. Accessing inputs via this method is not
-  // valid until after PrepareForInference is called.
-  Status ImmutableInput(const std::string& name, const Input** input) const;
-  const std::unordered_map<std::string, Input*>& ImmutableInputs() const
-  {
-    return inputs_;
-  }
-
-  // The original requested outputs are the requested outputs added to
-  // the request before the inference execution (that is before
-  // TRITONSERVER_ServerInferAsync is called). Once execution has
-  // started the original requested outputs should not be modified
-  // until execution completes (and those modifications will apply to
-  // the next inference execution).
-  const std::set<std::string>& OriginalRequestedOutputs() const
-  {
-    return original_requested_outputs_;
-  }
-
-  // Get the requested outputs that should be used during
-  // inference. Accessing outputs via this method is not valid until
-  // after PrepareForInference is called.
-  const std::set<std::string>& ImmutableRequestedOutputs() const
-  {
-    return (requested_outputs_.empty()) ? original_requested_outputs_
-                                        : requested_outputs_;
-  }
-
-  // Get the response factory.
-  const std::shared_ptr<InferenceResponseFactory>& ResponseFactory() const
-  {
-    return response_factory_;
-  }
-
-  // Add an original input to the request. If 'input' is non-null
-  // return a pointer to the newly added input.
-  Status AddOriginalInput(
-      const std::string& name, const inference::DataType datatype,
-      const int64_t* shape, const uint64_t dim_count, Input** input = nullptr);
-  Status AddOriginalInput(
-      const std::string& name, const inference::DataType datatype,
-      const std::vector<int64_t>& shape, Input** input = nullptr);
-
-  // Add an original raw input to the request. If 'input' is non-null
-  // return a pointer to the newly added input.
-  Status AddRawInput(const std::string& name, Input** input = nullptr);
-
-  // Remove a single original input or all inputs.
-  Status RemoveOriginalInput(const std::string& name);
-  Status RemoveAllOriginalInputs();
-
-  // Add an override input to the request. If 'input' is non-null
-  // return a pointer to the newly added input.
-  // FIXME passing batch size is special handling for backend API.
-  // For override input, the 'shape' is without batch dimension for
-  // backends that implemented w/o backend API (which need correct
-  // input.Shape()), but backend API uses input.ShapeWithBatchDim().
-  Status AddOverrideInput(
-      const std::string& name, const inference::DataType datatype,
-      const int64_t batch_size, const std::vector<int64_t>& shape,
-      std::shared_ptr<Input>* input = nullptr);
-
-  // Add an override input to the request.
-  Status AddOverrideInput(const std::shared_ptr<Input>& input);
-
-  // Request an original requested output.
-  Status AddOriginalRequestedOutput(const std::string& name);
-
-  // Remove a single original requested output or all requested
-  // outputs.
-  Status RemoveOriginalRequestedOutput(const std::string& name);
-  Status RemoveAllOriginalRequestedOutputs();
-
-  // Initialize the release callback for the request.
-  Status SetReleaseCallback(
-      TRITONSERVER_InferenceRequestReleaseFn_t release_fn, void* release_userp)
-  {
-    release_fn_ = release_fn;
-    release_userp_ = release_userp;
-    return Status::Success;
-  }
-
-  // Initialize the response factory that is to be used with any
-  // responses produced for this request.
-  Status SetResponseCallback(
-      const ResponseAllocator* allocator, void* alloc_userp,
-      TRITONSERVER_InferenceResponseCompleteFn_t response_fn,
-      void* response_userp)
-  {
-    response_factory_.reset(new InferenceResponseFactory(
-        model_shared_, id_, allocator, alloc_userp, response_fn, response_userp,
-        response_delegator_));
-    return Status::Success;
-  }
-
-  // Returns the preferred memory type and memory type ID of the output buffer
-  // for the request. 'name' and 'byte_size' are optional and set to nullptr
-  // if not specified, if provided, they give the allocator more information.
-  // 'memory_type' and 'memory_type_id' are also used as input to provide types
-  // preferred by the caller.
-  // Status::Code::UNAVAILABLE will be returned if output properties are not
-  // available.
-  Status OutputBufferProperties(
-      const char* name, size_t* byte_size, TRITONSERVER_MemoryType* memory_type,
-      int64_t* memory_type_id);
-
-  // Add a callback to be invoked on releasing the request object from Triton.
-  // Multile callbacks can be added by calling this function in order,
-  // and they will be invoked in reversed order.
-  Status AddInternalReleaseCallback(std::function<void()>&& callback)
-  {
-    release_callbacks_.emplace_back(std::move(callback));
-    return Status::Success;
-  }
-
-  // Add a delegator to be invoked on sending the responses of this request.
-  // The response will be passed to 'delegator' and 'delegator' must call the
-  // InferenceResponse::Send() to send the response.
-  Status SetResponseDelegator(
-      std::function<void(
-          std::unique_ptr<InferenceResponse>&&, const uint32_t)>&& delegator)
-  {
-    response_delegator_ = std::move(delegator);
-    return response_factory_->SetResponseDelegator(response_delegator_);
-  }
-
-  Status SetSequenceStates(
-      const std::shared_ptr<SequenceStates>& sequence_states)
-  {
-    sequence_states_ = sequence_states;
-    return Status::Success;
-  }
-
-  Status LoadInputStates();
-
-  const std::shared_ptr<SequenceStates>& GetSequenceStates() const
-  {
-    return sequence_states_;
-  }
-
-  // Prepare this request for inference.
-  Status PrepareForInference();
-
-  // Run this inference request using the model associated with the
-  // request. If Status::Success is returned then the call has taken
-  // ownership of the request object and so 'request' will be
-  // nullptr. If non-success is returned then the caller still retains
-  // ownership of 'request'.
-  static Status Run(std::unique_ptr<InferenceRequest>& request);
-
-  // Send an error response for this request. If 'status' is Success
-  // then no response is sent and the request is not released (even if
-  // 'release_request' is true). Because this is sending an error it
-  // is assumed that this is the last response for the request and so
-  // the FINAL flag is set in the response callback. If
-  // 'release_request' is true then the release callback is called for
-  // this request and ownership is given to the callback. Thus, if
-  // 'release_request' is true 'request' is returned as nullptr.
-  static void RespondIfError(
-      std::unique_ptr<InferenceRequest>& request, const Status& status,
-      const bool release_request = false);
-
-  // Send an error response to a set of 'requests'. If 'status' is
-  // Success then no responses are sent and the requests are not
-  // released (even if 'release_request' is true). Because this is
-  // sending an error it is assumed that this is the last response for
-  // the requests and so the FINAL flag is set in the response
-  // callbacks. If 'release_request' is true then the release callback
-  // is called for each request, and the request ownership is given to
-  // the callback. Thus, if 'release_request' is true 'requests' is
-  // returned with all nullptrs.
-  static void RespondIfError(
-      std::vector<std::unique_ptr<InferenceRequest>>& requests,
-      const Status& status, const bool release_requests = false);
-
-  // Release the request. Call the release callback and transfer
-  // ownership of the request to the callback. On return 'request' is
-  // nullptr.
-  static void Release(
-      std::unique_ptr<InferenceRequest>&& request,
-      const uint32_t release_flags);
-
-  // Create a copy of 'from' suitable for use as a "null" request as
-  // required for the direct sequence batcher. The returned copy will
-  // contain only the minimum content required for a null request.
-  // The statistics of the copy will not be collected.
-  static InferenceRequest* CopyAsNull(const InferenceRequest& from);
-
-  uint64_t QueueStartNs() const { return queue_start_ns_; }
-  uint64_t CaptureQueueStartNs()
-  {
-    queue_start_ns_ = std::chrono::duration_cast<std::chrono::nanoseconds>(
-                          std::chrono::steady_clock::now().time_since_epoch())
-                          .count();
-    return queue_start_ns_;
-  }
-
-  uint64_t CacheLookupStartNs() const { return cache_lookup_start_ns_; }
-  uint64_t CaptureCacheLookupStartNs()
-  {
-    cache_lookup_start_ns_ =
-        std::chrono::duration_cast<std::chrono::nanoseconds>(
-            std::chrono::steady_clock::now().time_since_epoch())
-            .count();
-    return cache_lookup_start_ns_;
-  }
-
-  uint64_t CacheLookupEndNs() const { return cache_lookup_end_ns_; }
-  uint64_t CaptureCacheLookupEndNs()
-  {
-    cache_lookup_end_ns_ =
-        std::chrono::duration_cast<std::chrono::nanoseconds>(
-            std::chrono::steady_clock::now().time_since_epoch())
-            .count();
-    return cache_lookup_end_ns_;
-  }
-
-  uint64_t CacheInsertionStartNs() const { return cache_insertion_start_ns_; }
-  uint64_t CaptureCacheInsertionStartNs()
-  {
-    cache_insertion_start_ns_ =
-        std::chrono::duration_cast<std::chrono::nanoseconds>(
-            std::chrono::steady_clock::now().time_since_epoch())
-            .count();
-    return cache_insertion_start_ns_;
-  }
-
-  uint64_t CacheInsertionEndNs() const { return cache_insertion_end_ns_; }
-  uint64_t CaptureCacheInsertionEndNs()
-  {
-    cache_insertion_end_ns_ =
-        std::chrono::duration_cast<std::chrono::nanoseconds>(
-            std::chrono::steady_clock::now().time_since_epoch())
-            .count();
-    return cache_insertion_end_ns_;
-  }
-
-  uint64_t BatcherStartNs() const { return batcher_start_ns_; }
-  uint64_t CaptureBatcherStartNs()
-  {
-    batcher_start_ns_ = std::chrono::duration_cast<std::chrono::nanoseconds>(
-                            std::chrono::steady_clock::now().time_since_epoch())
-                            .count();
-    return batcher_start_ns_;
-  }
-
-#ifdef TRITON_ENABLE_STATS
-  uint64_t RequestStartNs() const { return request_start_ns_; }
-  uint64_t CaptureRequestStartNs()
-  {
-    request_start_ns_ = std::chrono::duration_cast<std::chrono::nanoseconds>(
-                            std::chrono::steady_clock::now().time_since_epoch())
-                            .count();
-    return request_start_ns_;
-  }
-
-  // Report the statistics to stats collectors associated with the request.
-  // Duration and timestamps provide two granularities for stats collectors.
-  void ReportStatistics(
-      MetricModelReporter* metric_reporter, bool success,
-      const uint64_t compute_start_ns, const uint64_t compute_input_end_ns,
-      const uint64_t compute_output_start_ns, const uint64_t compute_end_ns);
-
-  // Report the statistics to stats collectors associated with the request.
-  // Duration and timestamps provide two granularities for stats collectors.
-  void ReportStatisticsWithDuration(
-      MetricModelReporter* metric_reporter, bool success,
-      const uint64_t compute_start_ns, const uint64_t compute_input_duration_ns,
-      const uint64_t compute_infer_duration_ns,
-      const uint64_t compute_output_duration_ns);
-
-  // Report the statistics to stats collectors associated with the request on
-  // response cache hits.
-  void ReportStatisticsCacheHit(MetricModelReporter* metric_reporter);
-
-  // Report the statistics to stats collectors associated with the request on
-  // response cache misses and update request duration to include cache
-  // insertion time.
-  void ReportStatisticsCacheMiss(MetricModelReporter* metric_reporter);
-
-  // Statistics for each request are aggregated into the corresponding
-  // model's statistics. Optionally this function may be used to
-  // add an additional aggregator where statistics are also aggregated.
-  void SetSecondaryStatsAggregator(
-      InferenceStatsAggregator* secondary_stats_aggregator)
-  {
-    secondary_stats_aggregator_ = secondary_stats_aggregator;
-  }
-
-#endif  // TRITON_ENABLE_STATS
-
- private:
-  DISALLOW_COPY_AND_ASSIGN(InferenceRequest);
-  friend std::ostream& operator<<(
-      std::ostream& out, const InferenceRequest& request);
-
-  Status Normalize();
-
-  // Has anything in the request potentially changed in a way that
-  // causes normalization to be required when preparing the request
-  // for inference.
-  bool needs_normalization_;
-
-  // The model associated with this request. For most requests
-  // model_shared_ will be non-null and will act to keep the model
-  // alive as long as this request is live. In this case model_raw_
-  // will be the raw pointer from the shared pointer. For cases where
-  // the model itself created the request (like running requests for
-  // warmup), model_shared_ will be nullptr, but model_raw_ will
-  // still be defined. Thus model_raw_ is always defined and should
-  // always to used to access the model.
-  std::shared_ptr<Model> model_shared_;
-  Model* model_raw_;
-
-  // The model version as requested and based on version policy the
-  // specific version that is actually used for inference.
-  int64_t requested_model_version_;
-  int64_t actual_model_version_;
-
-  std::string id_;
-
-  uint32_t flags_;
-  SequenceId correlation_id_;
-  uint32_t batch_size_;
-  uint32_t priority_;
-  uint64_t timeout_us_;
-  uint64_t cache_key_ = 0;
-  // Helper to determine if request was successfully hashed
-  // and cache_key_ field is valid
-  bool cache_key_is_set_ = false;
-
-  std::unordered_map<std::string, Input> original_inputs_;
-  std::unordered_map<std::string, std::shared_ptr<Input>> override_inputs_;
-  std::unordered_map<std::string, Input*> inputs_;
-  std::set<std::string> original_requested_outputs_;
-  std::string raw_input_name_;
-  uint32_t raw_input_size_;
-
-  // requested_outputs_ is to be used post-normalization. It will be
-  // empty unless it differs from original_requested_outputs_, so
-  // typically should access it through ImmutableRequestedOutputs.
-  std::set<std::string> requested_outputs_;
-
-  // The release function and user pointer for this request.
-  TRITONSERVER_InferenceRequestReleaseFn_t release_fn_;
-  void* release_userp_;
-
-  // Additional release callbacks invoked before 'release_fn_'.
-  std::vector<std::function<void()>> release_callbacks_;
-
-  // Delegator to be invoked on sending responses.
-  std::function<void(std::unique_ptr<InferenceResponse>&&, const uint32_t)>
-      response_delegator_;
-
-  // The response factory associated with this request.
-  std::shared_ptr<InferenceResponseFactory> response_factory_;
-
-  // Request timestamps. Queue start is needed for schedulers even
-  // when statistics are not being collected.
-  uint64_t queue_start_ns_;
-
-  // Cache lookup start/end timestamps. Cache manages its own stats even
-  // when statistics are not being colleceted.
-  uint64_t cache_lookup_start_ns_;
-  uint64_t cache_lookup_end_ns_;
-
-  // Cache insertion start/end timestamps. Cache manages its own stats even
-  // when statistics are not being colleceted.
-  uint64_t cache_insertion_start_ns_;
-  uint64_t cache_insertion_end_ns_;
-
-  // Dedicated timestamp for batcher internal which can diverge from
-  // queue start timestamp to provide accurate queue time without affecting
-  // batcher functionalities.
-  uint64_t batcher_start_ns_;
-
-  // Whether the stats of the request should be collected.
-  bool collect_stats_;
-
-#ifdef TRITON_ENABLE_STATS
-  uint64_t request_start_ns_;
-  InferenceStatsAggregator* secondary_stats_aggregator_ = nullptr;
-#endif  // TRITON_ENABLE_STATS
-
-#ifdef TRITON_ENABLE_TRACING
-  // Inference trace associated with this request.
-  std::shared_ptr<InferenceTraceProxy> trace_;
-#endif  // TRITON_ENABLE_TRACING
-
-  // Sequence I/O states used for implicit state.
-  std::shared_ptr<SequenceStates> sequence_states_;
-};
-
-std::ostream& operator<<(std::ostream& out, const InferenceRequest& request);
-std::ostream& operator<<(
-    std::ostream& out, const InferenceRequest::Input& input);
-std::ostream& operator<<(
-    std::ostream& out, const InferenceRequest::SequenceId& sequence_id);
-bool operator==(
-    const InferenceRequest::SequenceId lhs,
-    const InferenceRequest::SequenceId rhs);
-}}  // namespace triton::core
-
-namespace std {
-using namespace triton::core;
-template <>
-class hash<InferenceRequest::SequenceId> {
- public:
-  size_t operator()(const InferenceRequest::SequenceId& sequence_id) const
-  {
-    if (sequence_id.Type() == InferenceRequest::SequenceId::DataType::STRING) {
-      return std::hash<std::string>{}(sequence_id.StringValue());
-    }
-    return std::hash<uint64_t>{}(sequence_id.UnsignedIntValue());
-  }
-};
-}  // namespace std
diff --git a/3rdparty/core-r22.12/src/infer_response.cc b/3rdparty/core-r22.12/src/infer_response.cc
deleted file mode 100644
index 2a8f2af2ec617149c31daa4566f632dbfb3b9ca7..0000000000000000000000000000000000000000
--- a/3rdparty/core-r22.12/src/infer_response.cc
+++ /dev/null
@@ -1,431 +0,0 @@
-// Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "infer_response.h"
-
-#include "model.h"
-#include "model_config_utils.h"
-#include "server.h"
-#include "triton/common/logging.h"
-
-namespace triton { namespace core {
-
-//
-// InferenceResponseFactory
-//
-Status
-InferenceResponseFactory::CreateResponse(
-    std::unique_ptr<InferenceResponse>* response) const
-{
-  response->reset(new InferenceResponse(
-      model_, id_, allocator_, alloc_userp_, response_fn_, response_userp_,
-      response_delegator_));
-#ifdef TRITON_ENABLE_TRACING
-  (*response)->SetTrace(trace_);
-#endif  // TRITON_ENABLE_TRACING
-  return Status::Success;
-}
-
-Status
-InferenceResponseFactory::SendFlags(const uint32_t flags) const
-{
-  if (response_delegator_ != nullptr) {
-    std::unique_ptr<InferenceResponse> response(
-        new InferenceResponse(response_fn_, response_userp_));
-    response_delegator_(std::move(response), flags);
-  } else {
-    void* userp = response_userp_;
-    response_fn_(nullptr /* response */, flags, userp);
-  }
-  return Status::Success;
-}
-
-//
-// InferenceResponse
-//
-InferenceResponse::InferenceResponse(
-    const std::shared_ptr<Model>& model, const std::string& id,
-    const ResponseAllocator* allocator, void* alloc_userp,
-    TRITONSERVER_InferenceResponseCompleteFn_t response_fn,
-    void* response_userp,
-    const std::function<
-        void(std::unique_ptr<InferenceResponse>&&, const uint32_t)>& delegator)
-    : model_(model), id_(id), allocator_(allocator), alloc_userp_(alloc_userp),
-      response_fn_(response_fn), response_userp_(response_userp),
-      response_delegator_(delegator), null_response_(false)
-{
-  // If the allocator has a start_fn then invoke it.
-  TRITONSERVER_ResponseAllocatorStartFn_t start_fn = allocator_->StartFn();
-  if (start_fn != nullptr) {
-    LOG_TRITONSERVER_ERROR(
-        start_fn(
-            reinterpret_cast<TRITONSERVER_ResponseAllocator*>(
-                const_cast<ResponseAllocator*>(allocator_)),
-            alloc_userp_),
-        "response allocation start failed");
-  }
-}
-
-InferenceResponse::InferenceResponse(
-    TRITONSERVER_InferenceResponseCompleteFn_t response_fn,
-    void* response_userp)
-    : response_fn_(response_fn), response_userp_(response_userp),
-      null_response_(true)
-{
-}
-
-const std::string&
-InferenceResponse::ModelName() const
-{
-  static const std::string unknown("<unknown>");
-  return (model_ == nullptr) ? unknown : model_->Name();
-}
-
-int64_t
-InferenceResponse::ActualModelVersion() const
-{
-  return (model_ == nullptr) ? -1 : model_->Version();
-}
-
-Status
-InferenceResponse::AddParameter(const char* name, const char* value)
-{
-  parameters_.emplace_back(name, value);
-  return Status::Success;
-}
-
-Status
-InferenceResponse::AddParameter(const char* name, const int64_t value)
-{
-  parameters_.emplace_back(name, value);
-  return Status::Success;
-}
-
-Status
-InferenceResponse::AddParameter(const char* name, const bool value)
-{
-  parameters_.emplace_back(name, value);
-  return Status::Success;
-}
-
-Status
-InferenceResponse::AddOutput(
-    const std::string& name, const inference::DataType datatype,
-    const std::vector<int64_t>& shape, InferenceResponse::Output** output)
-{
-  outputs_.emplace_back(name, datatype, shape, allocator_, alloc_userp_);
-
-  LOG_VERBOSE(1) << "add response output: " << outputs_.back();
-
-  if (model_ != nullptr) {
-    const inference::ModelOutput* output_config;
-    RETURN_IF_ERROR(model_->GetOutput(name, &output_config));
-    if (output_config->has_reshape()) {
-      const bool has_batch_dim = (model_->Config().max_batch_size() > 0);
-      outputs_.back().Reshape(has_batch_dim, output_config);
-    }
-  }
-
-  if (output != nullptr) {
-    *output = std::addressof(outputs_.back());
-  }
-
-  return Status::Success;
-}
-
-Status
-InferenceResponse::AddOutput(
-    const std::string& name, const inference::DataType datatype,
-    std::vector<int64_t>&& shape, InferenceResponse::Output** output)
-{
-  outputs_.emplace_back(
-      name, datatype, std::move(shape), allocator_, alloc_userp_);
-
-  LOG_VERBOSE(1) << "add response output: " << outputs_.back();
-
-  if (model_ != nullptr) {
-    const inference::ModelOutput* output_config;
-    RETURN_IF_ERROR(model_->GetOutput(name, &output_config));
-    if (output_config->has_reshape()) {
-      const bool has_batch_dim = (model_->Config().max_batch_size() > 0);
-      outputs_.back().Reshape(has_batch_dim, output_config);
-    }
-  }
-
-  if (output != nullptr) {
-    *output = std::addressof(outputs_.back());
-  }
-
-  return Status::Success;
-}
-
-Status
-InferenceResponse::ClassificationLabel(
-    const InferenceResponse::Output& output, const uint32_t class_index,
-    const char** label) const
-{
-  const auto& label_provider = model_->GetLabelProvider();
-  const std::string& l = label_provider->GetLabel(output.Name(), class_index);
-  if (l.empty()) {
-    *label = nullptr;
-  } else {
-    *label = l.c_str();
-  }
-
-  return Status::Success;
-}
-
-Status
-InferenceResponse::Send(
-    std::unique_ptr<InferenceResponse>&& response, const uint32_t flags)
-{
-#ifdef TRITON_ENABLE_TRACING
-  response->TraceOutputTensors(
-      TRITONSERVER_TRACE_TENSOR_BACKEND_OUTPUT, "InferenceResponse Send");
-#endif  // TRITON_ENABLE_TRACING
-
-  if (response->response_delegator_ != nullptr) {
-    auto ldelegator = std::move(response->response_delegator_);
-    ldelegator(std::move(response), flags);
-    return Status::Success;
-  }
-  void* userp = response->response_userp_;
-  if (response->null_response_) {
-    response->response_fn_(nullptr /* response */, flags, userp);
-  } else {
-    auto& response_fn = response->response_fn_;
-    response_fn(
-        reinterpret_cast<TRITONSERVER_InferenceResponse*>(response.release()),
-        flags, userp);
-  }
-  return Status::Success;
-}
-
-Status
-InferenceResponse::SendWithStatus(
-    std::unique_ptr<InferenceResponse>&& response, const uint32_t flags,
-    const Status& status)
-{
-  response->status_ = status;
-  return InferenceResponse::Send(std::move(response), flags);
-}
-
-#ifdef TRITON_ENABLE_TRACING
-Status
-InferenceResponse::TraceOutputTensors(
-    TRITONSERVER_InferenceTraceActivity activity, const std::string& msg)
-{
-  const auto& outputs = this->Outputs();
-  uint32_t output_count = outputs.size();
-
-  for (uint32_t idx = 0; idx < output_count; ++idx) {
-    const Output& output = outputs[idx];
-
-    // output data
-    const char* cname = output.Name().c_str();
-    TRITONSERVER_DataType datatype = DataTypeToTriton(output.DType());
-    const std::vector<int64_t>& oshape = output.Shape();
-    const int64_t* shape = &oshape[0];
-    uint64_t dim_count = oshape.size();
-    const void* base;
-    size_t byte_size;
-    TRITONSERVER_MemoryType memory_type;
-    int64_t memory_type_id;
-    void* userp;
-
-    Status status = output.DataBuffer(
-        &base, &byte_size, &memory_type, &memory_type_id, &userp);
-    if (!status.IsOk()) {
-      LOG_STATUS_ERROR(
-          status,
-          std::string(TRITONSERVER_InferenceTraceActivityString(activity)) +
-              ": " + msg + ": fail to get data buffer: " + status.Message());
-      return status;
-    }
-
-    INFER_TRACE_TENSOR_ACTIVITY(
-        this->trace_, activity, cname, datatype, base, byte_size, shape,
-        dim_count, memory_type, memory_type_id);
-  }
-
-  return Status::Success;
-}
-#endif  // TRITON_ENABLE_TRACING
-
-//
-// InferenceResponse::Output
-//
-InferenceResponse::Output::~Output()
-{
-  Status status = ReleaseDataBuffer();
-  if (!status.IsOk()) {
-    LOG_ERROR << "failed to release buffer for output '" << name_
-              << "': " << status.AsString();
-  }
-}
-
-void
-InferenceResponse::Output::Reshape(
-    const bool has_batch_dim, const inference::ModelOutput* output_config)
-{
-  std::deque<int64_t> variable_size_values;
-
-  const int64_t batch_dim =
-      (has_batch_dim && (shape_.size() > 0)) ? shape_[0] : -1;
-  const size_t batch_dim_offset = (has_batch_dim) ? 1 : 0;
-
-  const auto& from_shape = output_config->reshape().shape();
-  const auto& to_shape = output_config->dims();
-  for (int64_t idx = 0; idx < from_shape.size(); idx++) {
-    if (from_shape[idx] == -1) {
-      variable_size_values.push_back(shape_[idx + batch_dim_offset]);
-    }
-  }
-
-  shape_.clear();
-  if (batch_dim >= 0) {
-    shape_.push_back(batch_dim);
-  }
-
-  for (const auto& dim : to_shape) {
-    if (dim == -1) {
-      shape_.push_back(variable_size_values.front());
-      variable_size_values.pop_front();
-    } else {
-      shape_.push_back(dim);
-    }
-  }
-}
-
-Status
-InferenceResponse::Output::DataBuffer(
-    const void** buffer, size_t* buffer_byte_size,
-    TRITONSERVER_MemoryType* memory_type, int64_t* memory_type_id,
-    void** userp) const
-{
-  *buffer = allocated_buffer_;
-  *buffer_byte_size = buffer_attributes_.ByteSize();
-  *memory_type = buffer_attributes_.MemoryType();
-  *memory_type_id = buffer_attributes_.MemoryTypeId();
-  *userp = allocated_userp_;
-  return Status::Success;
-}
-
-Status
-InferenceResponse::Output::AllocateDataBuffer(
-    void** buffer, size_t buffer_byte_size,
-    TRITONSERVER_MemoryType* memory_type, int64_t* memory_type_id)
-{
-  if (allocated_buffer_ != nullptr) {
-    return Status(
-        Status::Code::ALREADY_EXISTS,
-        "allocated buffer for output '" + name_ + "' already exists");
-  }
-
-  TRITONSERVER_MemoryType actual_memory_type = *memory_type;
-  int64_t actual_memory_type_id = *memory_type_id;
-  void* alloc_buffer_userp = nullptr;
-
-  RETURN_IF_TRITONSERVER_ERROR(allocator_->AllocFn()(
-      reinterpret_cast<TRITONSERVER_ResponseAllocator*>(
-          const_cast<ResponseAllocator*>(allocator_)),
-      name_.c_str(), buffer_byte_size, *memory_type, *memory_type_id,
-      alloc_userp_, buffer, &alloc_buffer_userp, &actual_memory_type,
-      &actual_memory_type_id));
-
-  // Only call the buffer attributes API if it is set.
-  if (allocator_->BufferAttributesFn() != nullptr) {
-    RETURN_IF_TRITONSERVER_ERROR(allocator_->BufferAttributesFn()(
-        reinterpret_cast<TRITONSERVER_ResponseAllocator*>(
-            const_cast<ResponseAllocator*>(allocator_)),
-        name_.c_str(),
-        reinterpret_cast<TRITONSERVER_BufferAttributes*>(&buffer_attributes_),
-        alloc_userp_, alloc_buffer_userp));
-  }
-
-  allocated_buffer_ = *buffer;
-  buffer_attributes_.SetByteSize(buffer_byte_size);
-  buffer_attributes_.SetMemoryType(actual_memory_type);
-  buffer_attributes_.SetMemoryTypeId(actual_memory_type_id);
-
-  allocated_userp_ = alloc_buffer_userp;
-  *memory_type = actual_memory_type;
-  *memory_type_id = actual_memory_type_id;
-
-  return Status::Success;
-}
-
-Status
-InferenceResponse::Output::ReleaseDataBuffer()
-{
-  TRITONSERVER_Error* err = nullptr;
-
-  if (allocated_buffer_ != nullptr) {
-    err = allocator_->ReleaseFn()(
-        reinterpret_cast<TRITONSERVER_ResponseAllocator*>(
-            const_cast<ResponseAllocator*>(allocator_)),
-        allocated_buffer_, allocated_userp_, buffer_attributes_.ByteSize(),
-        buffer_attributes_.MemoryType(), buffer_attributes_.MemoryTypeId());
-  }
-
-  allocated_buffer_ = nullptr;
-  buffer_attributes_.SetByteSize(0);
-  buffer_attributes_.SetMemoryType(TRITONSERVER_MEMORY_CPU);
-  buffer_attributes_.SetMemoryTypeId(0);
-  allocated_userp_ = nullptr;
-
-  RETURN_IF_TRITONSERVER_ERROR(err);
-
-  return Status::Success;
-}
-
-std::ostream&
-operator<<(std::ostream& out, const InferenceResponse& response)
-{
-  out << "[0x" << std::addressof(response) << "] "
-      << "response id: " << response.Id() << ", model: " << response.ModelName()
-      << ", actual version: " << response.ActualModelVersion() << std::endl;
-
-  out << "status:" << response.ResponseStatus().AsString() << std::endl;
-
-  out << "outputs:" << std::endl;
-  for (const auto& output : response.Outputs()) {
-    out << "[0x" << std::addressof(output) << "] " << output << std::endl;
-  }
-
-  return out;
-}
-
-std::ostream&
-operator<<(std::ostream& out, const InferenceResponse::Output& output)
-{
-  out << "output: " << output.Name()
-      << ", type: " << triton::common::DataTypeToProtocolString(output.DType())
-      << ", shape: " << triton::common::DimsListToString(output.Shape());
-  return out;
-}
-
-}}  // namespace triton::core
diff --git a/3rdparty/core-r22.12/src/infer_response.h b/3rdparty/core-r22.12/src/infer_response.h
deleted file mode 100644
index 783641558db643388bd06d5db665ceb4d4395980..0000000000000000000000000000000000000000
--- a/3rdparty/core-r22.12/src/infer_response.h
+++ /dev/null
@@ -1,351 +0,0 @@
-// Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include <deque>
-#include <functional>
-#include <string>
-#include <vector>
-#include "buffer_attributes.h"
-#include "constants.h"
-#include "infer_parameter.h"
-#include "infer_trace.h"
-#include "response_allocator.h"
-#include "status.h"
-#include "triton/common/model_config.h"
-#include "tritonserver_apis.h"
-
-namespace triton { namespace core {
-
-class Model;
-class InferenceResponse;
-//
-// An inference response factory.
-//
-class InferenceResponseFactory {
- public:
-  InferenceResponseFactory() = default;
-
-  InferenceResponseFactory(
-      const std::shared_ptr<Model>& model, const std::string& id,
-      const ResponseAllocator* allocator, void* alloc_userp,
-      TRITONSERVER_InferenceResponseCompleteFn_t response_fn,
-      void* response_userp,
-      const std::function<void(
-          std::unique_ptr<InferenceResponse>&&, const uint32_t)>& delegator)
-      : model_(model), id_(id), allocator_(allocator),
-        alloc_userp_(alloc_userp), response_fn_(response_fn),
-        response_userp_(response_userp), response_delegator_(delegator)
-  {
-  }
-
-  const ResponseAllocator* Allocator() { return allocator_; }
-  void* AllocatorUserp() { return alloc_userp_; }
-
-  Status SetResponseDelegator(
-      const std::function<void(
-          std::unique_ptr<InferenceResponse>&&, const uint32_t)>& delegator)
-  {
-    response_delegator_ = delegator;
-    return Status::Success;
-  }
-
-  // Create a new response.
-  Status CreateResponse(std::unique_ptr<InferenceResponse>* response) const;
-
-  // Send a "null" response with 'flags'.
-  Status SendFlags(const uint32_t flags) const;
-
-#ifdef TRITON_ENABLE_TRACING
-  const std::shared_ptr<InferenceTraceProxy>& Trace() const { return trace_; }
-  void SetTrace(const std::shared_ptr<InferenceTraceProxy>& trace)
-  {
-    trace_ = trace;
-  }
-  void ReleaseTrace() { trace_ = nullptr; }
-#endif  // TRITON_ENABLE_TRACING
-
- private:
-  // The model associated with this factory. For normal
-  // requests/responses this will always be defined and acts to keep
-  // the model loaded as long as this factory is live. It may be
-  // nullptr for cases where the model itself created the request
-  // (like running requests for warmup) and so must protect any uses
-  // to handle the nullptr case.
-  std::shared_ptr<Model> model_;
-
-  // The ID of the corresponding request that should be included in every
-  // response. This is a property that can be optionally provided by the user.
-  std::string id_;
-
-  // The response allocator and user pointer. The 'allocator_' is a
-  // raw pointer because it is owned by the client, and the client is
-  // responsible for ensuring that the lifetime of the allocator
-  // extends longer that any request or response that depend on the
-  // allocator.
-  const ResponseAllocator* allocator_;
-  void* alloc_userp_;
-
-  // The response callback function and user pointer.
-  TRITONSERVER_InferenceResponseCompleteFn_t response_fn_;
-  void* response_userp_;
-
-  // Delegator to be invoked on sending responses.
-  std::function<void(std::unique_ptr<InferenceResponse>&&, const uint32_t)>
-      response_delegator_;
-
-
-#ifdef TRITON_ENABLE_TRACING
-  // Inference trace associated with this response.
-  std::shared_ptr<InferenceTraceProxy> trace_;
-#endif  // TRITON_ENABLE_TRACING
-};
-
-//
-// An inference response.
-//
-class InferenceResponse {
- public:
-  // Output tensor
-  class Output {
-   public:
-    Output(
-        const std::string& name, const inference::DataType datatype,
-        const std::vector<int64_t>& shape, const ResponseAllocator* allocator,
-        void* alloc_userp)
-        : name_(name), datatype_(datatype), shape_(shape),
-          allocator_(allocator), alloc_userp_(alloc_userp),
-          allocated_buffer_(nullptr)
-    {
-    }
-    Output(
-        const std::string& name, const inference::DataType datatype,
-        std::vector<int64_t>&& shape, const ResponseAllocator* allocator,
-        void* alloc_userp)
-        : name_(name), datatype_(datatype), shape_(std::move(shape)),
-          allocator_(allocator), alloc_userp_(alloc_userp),
-          allocated_buffer_(nullptr)
-    {
-    }
-
-    ~Output();
-
-    // The name of the output tensor.
-    const std::string& Name() const { return name_; }
-
-    // Data type of the output tensor.
-    inference::DataType DType() const { return datatype_; }
-
-    // The shape of the output tensor.
-    const std::vector<int64_t>& Shape() const { return shape_; }
-
-    BufferAttributes* GetBufferAttributes() { return &buffer_attributes_; }
-
-    // Reshape the output tensor. This function must only be called
-    // for outputs that have respace specified in the model
-    // configuration.
-    void Reshape(
-        const bool has_batch_dim, const inference::ModelOutput* output_config);
-
-    // Get information about the buffer allocated for this output
-    // tensor's data. If no buffer is allocated 'buffer' will return
-    // nullptr and the other returned values will be undefined.
-    Status DataBuffer(
-        const void** buffer, size_t* buffer_byte_size,
-        TRITONSERVER_MemoryType* memory_type, int64_t* memory_type_id,
-        void** userp) const;
-
-    // Allocate the buffer that should be used for this output
-    // tensor's data. 'buffer' must return a buffer of size
-    // 'buffer_byte_size'.  'memory_type' acts as both input and
-    // output. On input gives the buffer memory type preferred by the
-    // caller and on return holds the actual memory type of
-    // 'buffer'. 'memory_type_id' acts as both input and output. On
-    // input gives the buffer memory type id preferred by the caller
-    // and returns the actual memory type id of 'buffer'. Only a
-    // single buffer may be allocated for the output at any time, so
-    // multiple calls to AllocateDataBuffer without intervening
-    // ReleaseDataBuffer call will result in an error.
-    Status AllocateDataBuffer(
-        void** buffer, const size_t buffer_byte_size,
-        TRITONSERVER_MemoryType* memory_type, int64_t* memory_type_id);
-
-    // Release the buffer that was previously allocated by
-    // AllocateDataBuffer(). Do nothing if AllocateDataBuffer() has
-    // not been called.
-    Status ReleaseDataBuffer();
-
-   private:
-    DISALLOW_COPY_AND_ASSIGN(Output);
-    friend std::ostream& operator<<(
-        std::ostream& out, const InferenceResponse::Output& output);
-
-    std::string name_;
-    inference::DataType datatype_;
-    std::vector<int64_t> shape_;
-
-    // The response allocator and user pointer.
-    const ResponseAllocator* allocator_;
-    void* alloc_userp_;
-
-    // Information about the buffer allocated by
-    // AllocateDataBuffer(). This information is needed by
-    // DataBuffer() and ReleaseDataBuffer().
-    void* allocated_buffer_;
-    BufferAttributes buffer_attributes_;
-    void* allocated_userp_;
-  };
-
-  // InferenceResponse
-  InferenceResponse(
-      const std::shared_ptr<Model>& model, const std::string& id,
-      const ResponseAllocator* allocator, void* alloc_userp,
-      TRITONSERVER_InferenceResponseCompleteFn_t response_fn,
-      void* response_userp,
-      const std::function<void(
-          std::unique_ptr<InferenceResponse>&&, const uint32_t)>& delegator);
-
-  // "null" InferenceResponse is a special instance of InferenceResponse which
-  // contains minimal information for calling InferenceResponse::Send,
-  // InferenceResponse::NullResponse. nullptr will be passed as response in
-  // 'response_fn'.
-  InferenceResponse(
-      TRITONSERVER_InferenceResponseCompleteFn_t response_fn,
-      void* response_userp);
-
-  const std::string& Id() const { return id_; }
-  const std::string& ModelName() const;
-  int64_t ActualModelVersion() const;
-  const Status& ResponseStatus() const { return status_; }
-
-  // The response parameters.
-  const std::deque<InferenceParameter>& Parameters() const
-  {
-    return parameters_;
-  }
-
-  // Add an parameter to the response.
-  Status AddParameter(const char* name, const char* value);
-  Status AddParameter(const char* name, const int64_t value);
-  Status AddParameter(const char* name, const bool value);
-
-  // The response outputs.
-  const std::deque<Output>& Outputs() const { return outputs_; }
-
-  // Add an output to the response. If 'output' is non-null
-  // return a pointer to the newly added output.
-  Status AddOutput(
-      const std::string& name, const inference::DataType datatype,
-      const std::vector<int64_t>& shape, Output** output = nullptr);
-  Status AddOutput(
-      const std::string& name, const inference::DataType datatype,
-      std::vector<int64_t>&& shape, Output** output = nullptr);
-
-  // Get the classification label associated with an output. Return
-  // 'label' == nullptr if no label.
-  Status ClassificationLabel(
-      const Output& output, const uint32_t class_index,
-      const char** label) const;
-
-  // Send the response with success status. Calling this function
-  // releases ownership of the response object and gives it to the
-  // callback function.
-  static Status Send(
-      std::unique_ptr<InferenceResponse>&& response, const uint32_t flags);
-
-  // Send the response with explicit status. Calling this function
-  // releases ownership of the response object and gives it to the
-  // callback function.
-  static Status SendWithStatus(
-      std::unique_ptr<InferenceResponse>&& response, const uint32_t flags,
-      const Status& status);
-
-#ifdef TRITON_ENABLE_TRACING
-  const std::shared_ptr<InferenceTraceProxy>& Trace() const { return trace_; }
-  void SetTrace(const std::shared_ptr<InferenceTraceProxy>& trace)
-  {
-    trace_ = trace;
-  }
-  void ReleaseTrace() { trace_ = nullptr; }
-#endif  // TRITON_ENABLE_TRACING
-
- private:
-  DISALLOW_COPY_AND_ASSIGN(InferenceResponse);
-  friend std::ostream& operator<<(
-      std::ostream& out, const InferenceResponse& response);
-
-#ifdef TRITON_ENABLE_TRACING
-  Status TraceOutputTensors(
-      TRITONSERVER_InferenceTraceActivity activity, const std::string& msg);
-#endif  // TRITON_ENABLE_TRACING
-
-  // The model associated with this factory. For normal
-  // requests/responses this will always be defined and acts to keep
-  // the model loaded as long as this factory is live. It may be
-  // nullptr for cases where the model itself created the request
-  // (like running requests for warmup) and so must protect any uses
-  // to handle the nullptr case.
-  std::shared_ptr<Model> model_;
-
-  // The ID of the corresponding request that should be included in
-  // every response.
-  std::string id_;
-
-  // Error status for the response.
-  Status status_;
-
-  // The parameters of the response. Use a deque so that there is no
-  // reallocation.
-  std::deque<InferenceParameter> parameters_;
-
-  // The result tensors. Use a deque so that there is no reallocation.
-  std::deque<Output> outputs_;
-
-  // The response allocator and user pointer.
-  const ResponseAllocator* allocator_;
-  void* alloc_userp_;
-
-  // The response callback function and user pointer.
-  TRITONSERVER_InferenceResponseCompleteFn_t response_fn_;
-  void* response_userp_;
-
-  // Delegator to be invoked on sending responses.
-  std::function<void(std::unique_ptr<InferenceResponse>&&, const uint32_t)>
-      response_delegator_;
-
-  bool null_response_;
-
-#ifdef TRITON_ENABLE_TRACING
-  // Inference trace associated with this response.
-  std::shared_ptr<InferenceTraceProxy> trace_;
-#endif  // TRITON_ENABLE_TRACING
-};
-
-std::ostream& operator<<(std::ostream& out, const InferenceResponse& response);
-std::ostream& operator<<(
-    std::ostream& out, const InferenceResponse::Output& output);
-
-}}  // namespace triton::core
diff --git a/3rdparty/core-r22.12/src/infer_stats.cc b/3rdparty/core-r22.12/src/infer_stats.cc
deleted file mode 100644
index 1d33a1898c15896667e3e9603d1928397c32e166..0000000000000000000000000000000000000000
--- a/3rdparty/core-r22.12/src/infer_stats.cc
+++ /dev/null
@@ -1,241 +0,0 @@
-// Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "infer_stats.h"
-
-#include <time.h>
-#include "metric_model_reporter.h"
-#include "metrics.h"
-#include "triton/common/logging.h"
-
-namespace triton { namespace core {
-
-#ifdef TRITON_ENABLE_STATS
-
-void
-InferenceStatsAggregator::UpdateFailure(
-    MetricModelReporter* metric_reporter, const uint64_t request_start_ns,
-    const uint64_t request_end_ns)
-{
-  std::lock_guard<std::mutex> lock(mu_);
-
-  infer_stats_.failure_count_++;
-  infer_stats_.failure_duration_ns_ += (request_end_ns - request_start_ns);
-
-#ifdef TRITON_ENABLE_METRICS
-  if (metric_reporter != nullptr) {
-    metric_reporter->MetricInferenceFailure().Increment(1);
-  }
-#endif  // TRITON_ENABLE_METRICS
-}
-
-void
-InferenceStatsAggregator::UpdateSuccess(
-    MetricModelReporter* metric_reporter, const size_t batch_size,
-    const uint64_t request_start_ns, const uint64_t queue_start_ns,
-    const uint64_t compute_start_ns, const uint64_t compute_input_end_ns,
-    const uint64_t compute_output_start_ns, const uint64_t compute_end_ns,
-    const uint64_t request_end_ns)
-{
-  const uint64_t compute_input_duration_ns =
-      compute_input_end_ns - compute_start_ns;
-  const uint64_t compute_infer_duration_ns =
-      compute_output_start_ns - compute_input_end_ns;
-  const uint64_t compute_output_duration_ns =
-      compute_end_ns - compute_output_start_ns;
-  UpdateSuccessWithDuration(
-      metric_reporter, batch_size, request_start_ns, queue_start_ns,
-      compute_start_ns, request_end_ns, compute_input_duration_ns,
-      compute_infer_duration_ns, compute_output_duration_ns);
-}
-
-void
-InferenceStatsAggregator::UpdateSuccessWithDuration(
-    MetricModelReporter* metric_reporter, const size_t batch_size,
-    const uint64_t request_start_ns, const uint64_t queue_start_ns,
-    const uint64_t compute_start_ns, const uint64_t request_end_ns,
-    const uint64_t compute_input_duration_ns,
-    const uint64_t compute_infer_duration_ns,
-    const uint64_t compute_output_duration_ns)
-{
-  const uint64_t request_duration_ns = request_end_ns - request_start_ns;
-  const uint64_t queue_duration_ns = compute_start_ns - queue_start_ns;
-
-  std::lock_guard<std::mutex> lock(mu_);
-
-  inference_count_ += batch_size;
-
-  infer_stats_.success_count_++;
-  infer_stats_.request_duration_ns_ += request_duration_ns;
-  infer_stats_.queue_duration_ns_ += queue_duration_ns;
-  infer_stats_.compute_input_duration_ns_ += compute_input_duration_ns;
-  infer_stats_.compute_infer_duration_ns_ += compute_infer_duration_ns;
-  infer_stats_.compute_output_duration_ns_ += compute_output_duration_ns;
-
-#ifdef TRITON_ENABLE_METRICS
-  if (metric_reporter != nullptr) {
-    metric_reporter->MetricInferenceSuccess().Increment(1);
-    metric_reporter->MetricInferenceCount().Increment(batch_size);
-    metric_reporter->MetricInferenceRequestDuration().Increment(
-        request_duration_ns / 1000);
-    metric_reporter->MetricInferenceQueueDuration().Increment(
-        queue_duration_ns / 1000);
-    metric_reporter->MetricInferenceComputeInputDuration().Increment(
-        compute_input_duration_ns / 1000);
-    metric_reporter->MetricInferenceComputeInferDuration().Increment(
-        compute_infer_duration_ns / 1000);
-    metric_reporter->MetricInferenceComputeOutputDuration().Increment(
-        compute_output_duration_ns / 1000);
-  }
-#endif  // TRITON_ENABLE_METRICS
-}
-
-// Currently cache hits will not go to the inference backend where metrics
-// are typically updated, so this method allows us to update relevant metrics
-// from a metric reporter rather than going through the backend.
-void
-InferenceStatsAggregator::UpdateSuccessCacheHit(
-    MetricModelReporter* metric_reporter, const size_t batch_size,
-    const uint64_t request_start_ns, const uint64_t queue_start_ns,
-    const uint64_t cache_lookup_start_ns, const uint64_t request_end_ns,
-    const uint64_t cache_hit_lookup_duration_ns)
-{
-  const uint64_t request_duration_ns = request_end_ns - request_start_ns;
-  const uint64_t queue_duration_ns = cache_lookup_start_ns - queue_start_ns;
-
-  std::lock_guard<std::mutex> lock(mu_);
-
-  infer_stats_.success_count_++;
-  infer_stats_.request_duration_ns_ += request_duration_ns;
-  infer_stats_.queue_duration_ns_ += queue_duration_ns;
-  infer_stats_.cache_hit_count_++;
-  infer_stats_.cache_hit_lookup_duration_ns_ += cache_hit_lookup_duration_ns;
-
-#ifdef TRITON_ENABLE_METRICS
-  if (metric_reporter != nullptr) {
-    metric_reporter->MetricInferenceSuccess().Increment(1);
-    metric_reporter->MetricInferenceRequestDuration().Increment(
-        request_duration_ns / 1000);
-    metric_reporter->MetricInferenceQueueDuration().Increment(
-        queue_duration_ns / 1000);
-    metric_reporter->MetricCacheHitCount().Increment(1);
-    metric_reporter->MetricCacheHitLookupDuration().Increment(
-        cache_hit_lookup_duration_ns / 1000);
-  }
-#endif  // TRITON_ENABLE_METRICS
-}
-
-// Cache misses will go to the inference backend where metrics are typically
-// updated, but cache insertion happens after the inference backend finishes.
-// So we use this method to update cache miss stats and adjust the request
-// duration to include cache insertion time.
-void
-InferenceStatsAggregator::UpdateSuccessCacheMiss(
-    MetricModelReporter* metric_reporter,
-    const uint64_t cache_miss_lookup_duration_ns,
-    const uint64_t cache_miss_insertion_duration_ns)
-{
-  std::lock_guard<std::mutex> lock(mu_);
-
-  const uint64_t cache_miss_duration_ns =
-      cache_miss_lookup_duration_ns + cache_miss_insertion_duration_ns;
-  infer_stats_.request_duration_ns_ += cache_miss_duration_ns;
-  infer_stats_.cache_miss_count_++;
-  infer_stats_.cache_miss_lookup_duration_ns_ += cache_miss_lookup_duration_ns;
-  infer_stats_.cache_miss_insertion_duration_ns_ +=
-      cache_miss_insertion_duration_ns;
-
-#ifdef TRITON_ENABLE_METRICS
-  if (metric_reporter != nullptr) {
-    // Add cache insertion time to request duration since insertion
-    // happens after inference backend sets the request duration, and
-    // cache lookup time was already included before the inference backend
-    // was called
-    metric_reporter->MetricInferenceRequestDuration().Increment(
-        cache_miss_duration_ns / 1000);
-    metric_reporter->MetricCacheMissCount().Increment(1);
-    metric_reporter->MetricCacheMissLookupDuration().Increment(
-        cache_miss_lookup_duration_ns / 1000);
-    metric_reporter->MetricCacheMissInsertionDuration().Increment(
-        cache_miss_insertion_duration_ns / 1000);
-  }
-#endif  // TRITON_ENABLE_METRICS
-}
-
-void
-InferenceStatsAggregator::UpdateInferBatchStats(
-    MetricModelReporter* metric_reporter, const size_t batch_size,
-    const uint64_t compute_start_ns, const uint64_t compute_input_end_ns,
-    const uint64_t compute_output_start_ns, const uint64_t compute_end_ns)
-{
-  auto compute_input_duration_ns = (compute_input_end_ns - compute_start_ns);
-  auto compute_infer_duration_ns =
-      (compute_output_start_ns - compute_input_end_ns);
-  auto compute_output_duration_ns = (compute_end_ns - compute_output_start_ns);
-  UpdateInferBatchStatsWithDuration(
-      metric_reporter, batch_size, compute_input_duration_ns,
-      compute_infer_duration_ns, compute_output_duration_ns);
-}
-
-void
-InferenceStatsAggregator::UpdateInferBatchStatsWithDuration(
-    MetricModelReporter* metric_reporter, size_t batch_size,
-    const uint64_t compute_input_duration_ns,
-    const uint64_t compute_infer_duration_ns,
-    const uint64_t compute_output_duration_ns)
-{
-  uint64_t inference_ms =
-      std::chrono::duration_cast<std::chrono::milliseconds>(
-          std::chrono::system_clock::now().time_since_epoch())
-          .count();
-
-  std::lock_guard<std::mutex> lock(mu_);
-
-  if (inference_ms > last_inference_ms_) {
-    last_inference_ms_ = inference_ms;
-  }
-
-  execution_count_++;
-
-  auto it = batch_stats_.find(batch_size);
-  if (it == batch_stats_.end()) {
-    it = batch_stats_.emplace(batch_size, InferBatchStats()).first;
-  }
-  it->second.count_++;
-  it->second.compute_input_duration_ns_ += compute_input_duration_ns;
-  it->second.compute_infer_duration_ns_ += compute_infer_duration_ns;
-  it->second.compute_output_duration_ns_ += compute_output_duration_ns;
-
-#ifdef TRITON_ENABLE_METRICS
-  if (metric_reporter != nullptr) {
-    metric_reporter->MetricInferenceExecutionCount().Increment(1);
-  }
-#endif  // TRITON_ENABLE_METRICS
-}
-
-#endif  // TRITON_ENABLE_STATS
-
-}}  // namespace triton::core
diff --git a/3rdparty/core-r22.12/src/infer_stats.h b/3rdparty/core-r22.12/src/infer_stats.h
deleted file mode 100644
index b5e3be8429dc20b986db0a52f0760f15b2b96bfb..0000000000000000000000000000000000000000
--- a/3rdparty/core-r22.12/src/infer_stats.h
+++ /dev/null
@@ -1,190 +0,0 @@
-// Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include <time.h>
-#include <map>
-#include <memory>
-#include <mutex>
-#include <vector>
-#include "constants.h"
-#include "infer_response.h"
-#include "status.h"
-#include "tritonserver_apis.h"
-
-namespace triton { namespace core {
-
-class MetricModelReporter;
-
-
-//
-// InferenceStatsAggregator
-//
-// A statistics aggregator.
-//
-class InferenceStatsAggregator {
-#ifdef TRITON_ENABLE_STATS
- public:
-  struct InferStats {
-    InferStats()
-        : failure_count_(0), failure_duration_ns_(0), success_count_(0),
-          request_duration_ns_(0), queue_duration_ns_(0),
-          compute_input_duration_ns_(0), compute_infer_duration_ns_(0),
-          compute_output_duration_ns_(0), cache_hit_count_(0),
-          cache_hit_lookup_duration_ns_(0), cache_miss_count_(0),
-          cache_miss_lookup_duration_ns_(0),
-          cache_miss_insertion_duration_ns_(0)
-    {
-    }
-    uint64_t failure_count_;
-    uint64_t failure_duration_ns_;
-
-    uint64_t success_count_;
-    uint64_t request_duration_ns_;
-    uint64_t queue_duration_ns_;
-    uint64_t compute_input_duration_ns_;
-    uint64_t compute_infer_duration_ns_;
-    uint64_t compute_output_duration_ns_;
-
-    // Cache hit stats
-    uint64_t cache_hit_count_;
-    uint64_t cache_hit_lookup_duration_ns_;
-    // Cache miss stats
-    uint64_t cache_miss_count_;
-    uint64_t cache_miss_lookup_duration_ns_;
-    uint64_t cache_miss_insertion_duration_ns_;
-  };
-
-  struct InferBatchStats {
-    InferBatchStats()
-        : count_(0), compute_input_duration_ns_(0),
-          compute_infer_duration_ns_(0), compute_output_duration_ns_(0)
-    {
-    }
-    uint64_t count_;
-    uint64_t compute_input_duration_ns_;
-    uint64_t compute_infer_duration_ns_;
-    uint64_t compute_output_duration_ns_;
-  };
-
-  // Create an aggregator for model statistics
-  InferenceStatsAggregator()
-      : last_inference_ms_(0), inference_count_(0), execution_count_(0)
-  {
-  }
-
-  uint64_t LastInferenceMs() const { return last_inference_ms_; }
-  uint64_t InferenceCount() const { return inference_count_; }
-  uint64_t ExecutionCount() const { return execution_count_; }
-  const InferStats& ImmutableInferStats() const { return infer_stats_; }
-  const std::map<size_t, InferBatchStats>& ImmutableInferBatchStats() const
-  {
-    return batch_stats_;
-  }
-
-  // Add durations to Infer stats for a failed inference request.
-  void UpdateFailure(
-      MetricModelReporter* metric_reporter, const uint64_t request_start_ns,
-      const uint64_t request_end_ns);
-
-  // Add durations to infer stats for a successful inference request.
-  void UpdateSuccess(
-      MetricModelReporter* metric_reporter, const size_t batch_size,
-      const uint64_t request_start_ns, const uint64_t queue_start_ns,
-      const uint64_t compute_start_ns, const uint64_t compute_input_end_ns,
-      const uint64_t compute_output_start_ns, const uint64_t compute_end_ns,
-      const uint64_t request_end_ns);
-
-  // Add durations to infer stats for a successful inference request.
-  void UpdateSuccessWithDuration(
-      MetricModelReporter* metric_reporter, const size_t batch_size,
-      const uint64_t request_start_ns, const uint64_t queue_start_ns,
-      const uint64_t compute_start_ns, const uint64_t request_end_ns,
-      const uint64_t compute_input_duration_ns,
-      const uint64_t compute_infer_duration_ns,
-      const uint64_t compute_output_duration_ns);
-
-  // Add durations to infer stats for a successful cached response.
-  void UpdateSuccessCacheHit(
-      MetricModelReporter* metric_reporter, const size_t batch_size,
-      const uint64_t request_start_ns, const uint64_t queue_start_ns,
-      const uint64_t cache_lookup_start_ns, const uint64_t request_end_ns,
-      const uint64_t cache_hit_lookup_duration_ns);
-
-  // Add durations to infer stats for a cache miss and update request duration
-  // to account for cache insertion after backend computes the response.
-  void UpdateSuccessCacheMiss(
-      MetricModelReporter* metric_reporter,
-      const uint64_t cache_miss_lookup_duration_ns,
-      const uint64_t cache_miss_insertion_duration_ns);
-
-  // Add durations to batch infer stats for a batch execution.
-  // 'success_request_count' is the number of sucess requests in the
-  // batch that have infer_stats attached.
-  void UpdateInferBatchStats(
-      MetricModelReporter* metric_reporter, const size_t batch_size,
-      const uint64_t compute_start_ns, const uint64_t compute_input_end_ns,
-      const uint64_t compute_output_start_ns, const uint64_t compute_end_ns);
-
-  // Add durations to batch infer stats for a batch execution.
-  // 'success_request_count' is the number of sucess requests in the
-  // batch that have infer_stats attached.
-  void UpdateInferBatchStatsWithDuration(
-      MetricModelReporter* metric_reporter, size_t batch_size,
-      const uint64_t compute_input_duration_ns,
-      const uint64_t compute_infer_duration_ns,
-      const uint64_t compute_output_duration_ns);
-
- private:
-  std::mutex mu_;
-  uint64_t last_inference_ms_;
-  uint64_t inference_count_;
-  uint64_t execution_count_;
-  InferStats infer_stats_;
-  std::map<size_t, InferBatchStats> batch_stats_;
-#endif  // TRITON_ENABLE_STATS
-};
-
-
-//
-// Macros to set infer stats.
-//
-#ifdef TRITON_ENABLE_STATS
-#define INFER_STATS_SET_TIMESTAMP(TS_NS)                             \
-  {                                                                  \
-    TS_NS = std::chrono::duration_cast<std::chrono::nanoseconds>(    \
-                std::chrono::steady_clock::now().time_since_epoch()) \
-                .count();                                            \
-  }
-#define INFER_STATS_DECL_TIMESTAMP(TS_NS) \
-  uint64_t TS_NS;                         \
-  INFER_STATS_SET_TIMESTAMP(TS_NS);
-#else
-#define INFER_STATS_DECL_TIMESTAMP(TS_NS)
-#define INFER_STATS_SET_TIMESTAMP(TS_NS)
-#endif  // TRITON_ENABLE_STATS
-
-}}  // namespace triton::core
diff --git a/3rdparty/core-r22.12/src/infer_trace.cc b/3rdparty/core-r22.12/src/infer_trace.cc
deleted file mode 100644
index cce46e26283188b7afef2ca2296d206cbf2791a5..0000000000000000000000000000000000000000
--- a/3rdparty/core-r22.12/src/infer_trace.cc
+++ /dev/null
@@ -1,61 +0,0 @@
-// Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "infer_trace.h"
-
-namespace triton { namespace core {
-
-#ifdef TRITON_ENABLE_TRACING
-
-// Start the trace id at 1, because id 0 is reserved to indicate no
-// parent.
-std::atomic<uint64_t> InferenceTrace::next_id_(1);
-
-InferenceTrace*
-InferenceTrace::SpawnChildTrace()
-{
-  InferenceTrace* trace = new InferenceTrace(
-      level_, id_, activity_fn_, tensor_activity_fn_, release_fn_, userp_);
-  return trace;
-}
-
-void
-InferenceTrace::Release()
-{
-  release_fn_(reinterpret_cast<TRITONSERVER_InferenceTrace*>(this), userp_);
-}
-
-std::shared_ptr<InferenceTraceProxy>
-InferenceTraceProxy::SpawnChildTrace()
-{
-  std::shared_ptr<InferenceTraceProxy> strace_proxy =
-      std::make_shared<InferenceTraceProxy>(trace_->SpawnChildTrace());
-  return strace_proxy;
-}
-
-#endif  // TRITON_ENABLE_TRACING
-
-}}  // namespace triton::core
diff --git a/3rdparty/core-r22.12/src/infer_trace.h b/3rdparty/core-r22.12/src/infer_trace.h
deleted file mode 100644
index f2696c24a2f09ea9cad268981a47b5cddda71f61..0000000000000000000000000000000000000000
--- a/3rdparty/core-r22.12/src/infer_trace.h
+++ /dev/null
@@ -1,205 +0,0 @@
-// Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include <atomic>
-#include <chrono>
-#include <memory>
-#include "constants.h"
-#include "status.h"
-#include "tritonserver_apis.h"
-
-namespace triton { namespace core {
-
-#ifdef TRITON_ENABLE_TRACING
-
-//
-// InferenceTrace
-//
-// Interface to TRITONSERVER_InferenceTrace to report trace events.
-//
-class InferenceTrace {
- public:
-  InferenceTrace(
-      const TRITONSERVER_InferenceTraceLevel level, const uint64_t parent_id,
-      TRITONSERVER_InferenceTraceActivityFn_t activity_fn,
-      TRITONSERVER_InferenceTraceTensorActivityFn_t tensor_activity_fn,
-      TRITONSERVER_InferenceTraceReleaseFn_t release_fn, void* userp)
-      : level_(level), id_(next_id_++), parent_id_(parent_id),
-        activity_fn_(activity_fn), tensor_activity_fn_(tensor_activity_fn),
-        release_fn_(release_fn), userp_(userp)
-  {
-  }
-
-  InferenceTrace* SpawnChildTrace();
-
-  int64_t Id() const { return id_; }
-  int64_t ParentId() const { return parent_id_; }
-
-  const std::string& ModelName() const { return model_name_; }
-  int64_t ModelVersion() const { return model_version_; }
-
-  void SetModelName(const std::string& n) { model_name_ = n; }
-  void SetModelVersion(int64_t v) { model_version_ = v; }
-
-  // Report trace activity.
-  void Report(
-      const TRITONSERVER_InferenceTraceActivity activity, uint64_t timestamp_ns)
-  {
-    if ((level_ & TRITONSERVER_TRACE_LEVEL_TIMESTAMPS) > 0) {
-      activity_fn_(
-          reinterpret_cast<TRITONSERVER_InferenceTrace*>(this), activity,
-          timestamp_ns, userp_);
-    }
-  }
-
-  // Report trace activity at the current time.
-  void ReportNow(const TRITONSERVER_InferenceTraceActivity activity)
-  {
-    if ((level_ & TRITONSERVER_TRACE_LEVEL_TIMESTAMPS) > 0) {
-      Report(
-          activity, std::chrono::duration_cast<std::chrono::nanoseconds>(
-                        std::chrono::steady_clock::now().time_since_epoch())
-                        .count());
-    }
-  }
-
-  // Report tensor trace activity.
-  void ReportTensor(
-      const TRITONSERVER_InferenceTraceActivity activity, const char* name,
-      TRITONSERVER_DataType datatype, const void* base, size_t byte_size,
-      const int64_t* shape, uint64_t dim_count,
-      TRITONSERVER_MemoryType memory_type, int64_t memory_type_id)
-  {
-    if ((level_ & TRITONSERVER_TRACE_LEVEL_TENSORS) > 0) {
-      tensor_activity_fn_(
-          reinterpret_cast<TRITONSERVER_InferenceTrace*>(this), activity, name,
-          datatype, base, byte_size, shape, dim_count, memory_type,
-          memory_type_id, userp_);
-    }
-  }
-
-  // Release the trace. Call the trace release callback.
-  void Release();
-
- private:
-  const TRITONSERVER_InferenceTraceLevel level_;
-  const uint64_t id_;
-  const uint64_t parent_id_;
-
-  TRITONSERVER_InferenceTraceActivityFn_t activity_fn_;
-  TRITONSERVER_InferenceTraceTensorActivityFn_t tensor_activity_fn_;
-  TRITONSERVER_InferenceTraceReleaseFn_t release_fn_;
-  void* userp_;
-
-  std::string model_name_;
-  int64_t model_version_;
-
-  // Maintain next id statically so that trace id is unique even
-  // across traces
-  static std::atomic<uint64_t> next_id_;
-};
-
-//
-// InferenceTraceProxy
-//
-// Object attached as shared_ptr to InferenceRequest and
-// InferenceResponse(s) being traced as part of a single inference
-// request.
-//
-class InferenceTraceProxy {
- public:
-  InferenceTraceProxy(InferenceTrace* trace) : trace_(trace) {}
-  ~InferenceTraceProxy() { trace_->Release(); }
-  int64_t Id() const { return trace_->Id(); }
-  int64_t ParentId() const { return trace_->ParentId(); }
-  const std::string& ModelName() const { return trace_->ModelName(); }
-  int64_t ModelVersion() const { return trace_->ModelVersion(); }
-  void SetModelName(const std::string& n) { trace_->SetModelName(n); }
-  void SetModelVersion(int64_t v) { trace_->SetModelVersion(v); }
-
-  void Report(
-      const TRITONSERVER_InferenceTraceActivity activity, uint64_t timestamp_ns)
-  {
-    trace_->Report(activity, timestamp_ns);
-  }
-
-  void ReportNow(const TRITONSERVER_InferenceTraceActivity activity)
-  {
-    trace_->ReportNow(activity);
-  }
-
-  void ReportTensor(
-      const TRITONSERVER_InferenceTraceActivity activity, const char* name,
-      TRITONSERVER_DataType datatype, const void* base, size_t byte_size,
-      const int64_t* shape, uint64_t dim_count,
-      TRITONSERVER_MemoryType memory_type, int64_t memory_type_id)
-  {
-    trace_->ReportTensor(
-        activity, name, datatype, base, byte_size, shape, dim_count,
-        memory_type, memory_type_id);
-  }
-
-  std::shared_ptr<InferenceTraceProxy> SpawnChildTrace();
-
- private:
-  InferenceTrace* trace_;
-};
-
-#endif  // TRITON_ENABLE_TRACING
-
-//
-// Macros to generate trace activity
-//
-#ifdef TRITON_ENABLE_TRACING
-#define INFER_TRACE_ACTIVITY(T, A, TS_NS) \
-  {                                       \
-    const auto& trace = (T);              \
-    const auto ts_ns = (TS_NS);           \
-    if (trace != nullptr) {               \
-      trace->Report(A, ts_ns);            \
-    }                                     \
-  }
-#define INFER_TRACE_ACTIVITY_NOW(T, A) \
-  {                                    \
-    const auto& trace = (T);           \
-    if (trace != nullptr) {            \
-      trace->ReportNow(A);             \
-    }                                  \
-  }
-#define INFER_TRACE_TENSOR_ACTIVITY(T, A, N, D, BA, BY, S, DI, MT, MTI) \
-  {                                                                     \
-    const auto& trace = (T);                                            \
-    if (trace != nullptr) {                                             \
-      trace->ReportTensor(A, N, D, BA, BY, S, DI, MT, MTI);             \
-    }                                                                   \
-  }
-#else
-#define INFER_TRACE_ACTIVITY(T, A, TS_NS)
-#define INFER_TRACE_ACTIVITY_NOW(T, A)
-#define INFER_TRACE_TENSOR_ACTIVITY(T, A, N, D, BA, BY, S, DI, MT, MTI)
-#endif  // TRITON_ENABLE_TRACING
-}}      // namespace triton::core
diff --git a/3rdparty/core-r22.12/src/instance_queue.cc b/3rdparty/core-r22.12/src/instance_queue.cc
deleted file mode 100644
index 0aaccb7183ff7e676e8d7b99ca72a5973c380ee0..0000000000000000000000000000000000000000
--- a/3rdparty/core-r22.12/src/instance_queue.cc
+++ /dev/null
@@ -1,99 +0,0 @@
-// Copyright 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "instance_queue.h"
-
-#include "triton/common/logging.h"
-
-namespace triton { namespace core {
-
-InstanceQueue::InstanceQueue(size_t max_batch_size, uint64_t max_queue_delay_ns)
-    : max_batch_size_(max_batch_size), max_queue_delay_ns_(max_queue_delay_ns)
-{
-}
-
-size_t
-InstanceQueue::Size()
-{
-  return payload_queue_.size();
-}
-
-bool
-InstanceQueue::Empty()
-{
-  return payload_queue_.empty();
-}
-
-void
-InstanceQueue::Enqueue(const std::shared_ptr<Payload>& payload)
-{
-  payload_queue_.push_back(payload);
-}
-
-void
-InstanceQueue::Dequeue(
-    std::shared_ptr<Payload>* payload,
-    std::vector<std::shared_ptr<Payload>>* merged_payloads)
-{
-  *payload = payload_queue_.front();
-  payload_queue_.pop_front();
-  {
-    std::lock_guard<std::mutex> exec_lock(*((*payload)->GetExecMutex()));
-    (*payload)->SetState(Payload::State::EXECUTING);
-    if ((!payload_queue_.empty()) && (max_queue_delay_ns_ > 0) &&
-        (max_batch_size_ > 1) && (!(*payload)->IsSaturated())) {
-      bool continue_merge;
-      do {
-        continue_merge = false;
-        uint64_t now_ns =
-            std::chrono::duration_cast<std::chrono::nanoseconds>(
-                std::chrono::steady_clock::now().time_since_epoch())
-                .count();
-        size_t batch_size = (*payload)->BatchSize();
-        if ((!payload_queue_.empty()) &&
-            (!payload_queue_.front()->IsSaturated()) &&
-            (now_ns - payload_queue_.front()->BatcherStartNs()) >
-                max_queue_delay_ns_) {
-          std::lock_guard<std::mutex> exec_lock(
-              *(payload_queue_.front()->GetExecMutex()));
-          payload_queue_.front()->SetState(Payload::State::EXECUTING);
-          size_t front_batch_size = payload_queue_.front()->BatchSize();
-          if ((batch_size + front_batch_size) <= max_batch_size_) {
-            const auto& status =
-                (*payload)->MergePayload(payload_queue_.front());
-            if (status.IsOk()) {
-              merged_payloads->push_back(payload_queue_.front());
-              payload_queue_.pop_front();
-              continue_merge = true;
-            }
-          }
-        }
-      } while (continue_merge);
-    }
-  }
-}
-
-}}  // namespace triton::core
diff --git a/3rdparty/core-r22.12/src/instance_queue.h b/3rdparty/core-r22.12/src/instance_queue.h
deleted file mode 100644
index da25a460c0aa1c963c5c8141923e4a9a2628329a..0000000000000000000000000000000000000000
--- a/3rdparty/core-r22.12/src/instance_queue.h
+++ /dev/null
@@ -1,57 +0,0 @@
-// Copyright 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include "payload.h"
-
-namespace triton { namespace core {
-
-//
-// InstanceQueue
-//
-// A queue implementation holding Payloads ready to be scheduled on
-// model instance.
-class InstanceQueue {
- public:
-  explicit InstanceQueue(size_t max_batch_size, uint64_t max_queue_delay_ns);
-
-  size_t Size();
-  bool Empty();
-  void Enqueue(const std::shared_ptr<Payload>& payload);
-  void Dequeue(
-      std::shared_ptr<Payload>* payload,
-      std::vector<std::shared_ptr<Payload>>* merged_payloads);
-
- private:
-  size_t max_batch_size_;
-  uint64_t max_queue_delay_ns_;
-
-  std::deque<std::shared_ptr<Payload>> payload_queue_;
-  std::shared_ptr<Payload> staged_payload_;
-  std::mutex mu_;
-};
-
-}}  // namespace triton::core
diff --git a/3rdparty/core-r22.12/src/label_provider.cc b/3rdparty/core-r22.12/src/label_provider.cc
deleted file mode 100644
index cff489453f3f69434b2038e346f15290eb0cc19e..0000000000000000000000000000000000000000
--- a/3rdparty/core-r22.12/src/label_provider.cc
+++ /dev/null
@@ -1,95 +0,0 @@
-// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "label_provider.h"
-
-#include <iostream>
-#include <iterator>
-#include <sstream>
-#include "filesystem.h"
-
-namespace triton { namespace core {
-
-const std::string&
-LabelProvider::GetLabel(const std::string& name, size_t index) const
-{
-  static const std::string not_found;
-
-  auto itr = label_map_.find(name);
-  if (itr == label_map_.end()) {
-    return not_found;
-  }
-
-  if (itr->second.size() <= index) {
-    return not_found;
-  }
-
-  return itr->second[index];
-}
-
-Status
-LabelProvider::AddLabels(const std::string& name, const std::string& filepath)
-{
-  std::string label_file_contents;
-  RETURN_IF_ERROR(ReadTextFile(filepath, &label_file_contents));
-
-  auto p = label_map_.insert(std::make_pair(name, std::vector<std::string>()));
-  if (!p.second) {
-    return Status(
-        Status::Code::INTERNAL, "multiple label files for '" + name + "'");
-  }
-
-  auto itr = p.first;
-
-  std::istringstream label_file_stream(label_file_contents);
-  std::string line;
-  while (std::getline(label_file_stream, line)) {
-    itr->second.push_back(line);
-  }
-
-  return Status::Success;
-}
-
-const std::vector<std::string>&
-LabelProvider::GetLabels(const std::string& name)
-{
-  static const std::vector<std::string> not_found;
-  auto itr = label_map_.find(name);
-  if (itr == label_map_.end()) {
-    return not_found;
-  }
-  return itr->second;
-}
-
-Status
-LabelProvider::AddLabels(
-    const std::string& name, const std::vector<std::string>& labels)
-{
-  label_map_.emplace(name, labels);
-  return Status::Success;
-}
-
-}}  // namespace triton::core
diff --git a/3rdparty/core-r22.12/src/label_provider.h b/3rdparty/core-r22.12/src/label_provider.h
deleted file mode 100644
index ebbd1894772ab6e4169d6b1f6717c72239d9f4e8..0000000000000000000000000000000000000000
--- a/3rdparty/core-r22.12/src/label_provider.h
+++ /dev/null
@@ -1,65 +0,0 @@
-// Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include <string>
-#include <unordered_map>
-#include <vector>
-#include "constants.h"
-#include "status.h"
-
-namespace triton { namespace core {
-
-// Provides classification labels.
-class LabelProvider {
- public:
-  LabelProvider() = default;
-
-  // Return the label associated with 'name' for a given
-  // 'index'. Return empty string if no label is available.
-  const std::string& GetLabel(const std::string& name, size_t index) const;
-
-  // Associate with 'name' a set of labels initialized from a given
-  // 'filepath'. Within the file each label is specified on its own
-  // line. The first label (line 0) is the index-0 label, the second
-  // label (line 1) is the index-1 label, etc.
-  Status AddLabels(const std::string& name, const std::string& filepath);
-
-  // Return the labels associated with 'name'. Return empty vector if no labels
-  // are available.
-  const std::vector<std::string>& GetLabels(const std::string& name);
-
-  // Associate with 'name' a set of 'labels'
-  Status AddLabels(
-      const std::string& name, const std::vector<std::string>& labels);
-
- private:
-  DISALLOW_COPY_AND_ASSIGN(LabelProvider);
-
-  std::unordered_map<std::string, std::vector<std::string>> label_map_;
-};
-
-}}  // namespace triton::core
diff --git a/3rdparty/core-r22.12/src/libtritonserver.ldscript b/3rdparty/core-r22.12/src/libtritonserver.ldscript
deleted file mode 100644
index 055d5df1980898e0a7870d39c494dbcc43e878f1..0000000000000000000000000000000000000000
--- a/3rdparty/core-r22.12/src/libtritonserver.ldscript
+++ /dev/null
@@ -1,32 +0,0 @@
-# Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-{
-  global:
-    TRITONSERVER_*;
-    TRITONBACKEND_*;
-    TRITONREPOAGENT_*;
-  local: *;
-};
diff --git a/3rdparty/core-r22.12/src/memory.cc b/3rdparty/core-r22.12/src/memory.cc
deleted file mode 100644
index 7d44f4b7114d25fe3cef7d22e12e9baf076c862f..0000000000000000000000000000000000000000
--- a/3rdparty/core-r22.12/src/memory.cc
+++ /dev/null
@@ -1,238 +0,0 @@
-// Copyright 2018-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "memory.h"
-
-#include "pinned_memory_manager.h"
-#include "triton/common/logging.h"
-
-#ifdef TRITON_ENABLE_GPU
-#include <cuda_runtime_api.h>
-#include "cuda_memory_manager.h"
-#endif  // TRITON_ENABLE_GPU
-
-namespace triton { namespace core {
-
-//
-// MemoryReference
-//
-MemoryReference::MemoryReference() : Memory() {}
-
-const char*
-MemoryReference::BufferAt(
-    size_t idx, size_t* byte_size, TRITONSERVER_MemoryType* memory_type,
-    int64_t* memory_type_id) const
-{
-  if (idx >= buffer_.size()) {
-    *byte_size = 0;
-    *memory_type = TRITONSERVER_MEMORY_CPU;
-    *memory_type_id = 0;
-    return nullptr;
-  }
-  *memory_type = buffer_[idx].buffer_attributes_.MemoryType();
-  *memory_type_id = buffer_[idx].buffer_attributes_.MemoryTypeId();
-  *byte_size = buffer_[idx].buffer_attributes_.ByteSize();
-  return buffer_[idx].buffer_;
-}
-
-const char*
-MemoryReference::BufferAt(size_t idx, BufferAttributes** buffer_attributes)
-{
-  if (idx >= buffer_.size()) {
-    *buffer_attributes = nullptr;
-    return nullptr;
-  }
-
-  *buffer_attributes = &(buffer_[idx].buffer_attributes_);
-  return buffer_[idx].buffer_;
-}
-
-size_t
-MemoryReference::AddBuffer(
-    const char* buffer, size_t byte_size, TRITONSERVER_MemoryType memory_type,
-    int64_t memory_type_id)
-{
-  total_byte_size_ += byte_size;
-  buffer_count_++;
-  buffer_.emplace_back(buffer, byte_size, memory_type, memory_type_id);
-  return buffer_.size() - 1;
-}
-
-size_t
-MemoryReference::AddBuffer(
-    const char* buffer, BufferAttributes* buffer_attributes)
-{
-  total_byte_size_ += buffer_attributes->ByteSize();
-  buffer_count_++;
-  buffer_.emplace_back(buffer, buffer_attributes);
-  return buffer_.size() - 1;
-}
-
-size_t
-MemoryReference::AddBufferFront(
-    const char* buffer, size_t byte_size, TRITONSERVER_MemoryType memory_type,
-    int64_t memory_type_id)
-{
-  total_byte_size_ += byte_size;
-  buffer_count_++;
-  buffer_.emplace(
-      buffer_.begin(), buffer, byte_size, memory_type, memory_type_id);
-  return buffer_.size() - 1;
-}
-
-//
-// MutableMemory
-//
-MutableMemory::MutableMemory(
-    char* buffer, size_t byte_size, TRITONSERVER_MemoryType memory_type,
-    int64_t memory_type_id)
-    : Memory(), buffer_(buffer),
-      buffer_attributes_(
-          BufferAttributes(byte_size, memory_type, memory_type_id, nullptr))
-{
-  total_byte_size_ = byte_size;
-  buffer_count_ = (byte_size == 0) ? 0 : 1;
-}
-
-const char*
-MutableMemory::BufferAt(
-    size_t idx, size_t* byte_size, TRITONSERVER_MemoryType* memory_type,
-    int64_t* memory_type_id) const
-{
-  if (idx != 0) {
-    *byte_size = 0;
-    *memory_type = TRITONSERVER_MEMORY_CPU;
-    *memory_type_id = 0;
-    return nullptr;
-  }
-  *byte_size = total_byte_size_;
-  *memory_type = buffer_attributes_.MemoryType();
-  *memory_type_id = buffer_attributes_.MemoryTypeId();
-  return buffer_;
-}
-
-const char*
-MutableMemory::BufferAt(size_t idx, BufferAttributes** buffer_attributes)
-{
-  if (idx != 0) {
-    *buffer_attributes = nullptr;
-    return nullptr;
-  }
-
-  *buffer_attributes = &buffer_attributes_;
-  return buffer_;
-}
-
-char*
-MutableMemory::MutableBuffer(
-    TRITONSERVER_MemoryType* memory_type, int64_t* memory_type_id)
-{
-  if (memory_type != nullptr) {
-    *memory_type = buffer_attributes_.MemoryType();
-  }
-  if (memory_type_id != nullptr) {
-    *memory_type_id = buffer_attributes_.MemoryTypeId();
-  }
-
-  return buffer_;
-}
-
-//
-// AllocatedMemory
-//
-AllocatedMemory::AllocatedMemory(
-    size_t byte_size, TRITONSERVER_MemoryType memory_type,
-    int64_t memory_type_id)
-    : MutableMemory(nullptr, byte_size, memory_type, memory_type_id)
-{
-  if (total_byte_size_ != 0) {
-    // Allocate memory with the following fallback policy:
-    // CUDA memory -> pinned system memory -> non-pinned system memory
-    switch (buffer_attributes_.MemoryType()) {
-#ifdef TRITON_ENABLE_GPU
-      case TRITONSERVER_MEMORY_GPU: {
-        auto status = CudaMemoryManager::Alloc(
-            (void**)&buffer_, total_byte_size_,
-            buffer_attributes_.MemoryTypeId());
-        if (!status.IsOk()) {
-          static bool warning_logged = false;
-          if (!warning_logged) {
-            LOG_WARNING << status.Message()
-                        << ", falling back to pinned system memory";
-            warning_logged = true;
-          }
-
-          goto pinned_memory_allocation;
-        }
-        break;
-      }
-      pinned_memory_allocation:
-#endif  // TRITON_ENABLE_GPU
-      default: {
-        TRITONSERVER_MemoryType memory_type = buffer_attributes_.MemoryType();
-        auto status = PinnedMemoryManager::Alloc(
-            (void**)&buffer_, total_byte_size_, &memory_type, true);
-        buffer_attributes_.SetMemoryType(memory_type);
-        if (!status.IsOk()) {
-          LOG_ERROR << status.Message();
-          buffer_ = nullptr;
-        }
-        break;
-      }
-    }
-  }
-  total_byte_size_ = (buffer_ == nullptr) ? 0 : total_byte_size_;
-}
-
-AllocatedMemory::~AllocatedMemory()
-{
-  if (buffer_ != nullptr) {
-    switch (buffer_attributes_.MemoryType()) {
-      case TRITONSERVER_MEMORY_GPU: {
-#ifdef TRITON_ENABLE_GPU
-        auto status =
-            CudaMemoryManager::Free(buffer_, buffer_attributes_.MemoryTypeId());
-        if (!status.IsOk()) {
-          LOG_ERROR << status.Message();
-        }
-#endif  // TRITON_ENABLE_GPU
-        break;
-      }
-
-      default: {
-        auto status = PinnedMemoryManager::Free(buffer_);
-        if (!status.IsOk()) {
-          LOG_ERROR << status.Message();
-          buffer_ = nullptr;
-        }
-        break;
-      }
-    }
-    buffer_ = nullptr;
-  }
-}
-
-}}  // namespace triton::core
diff --git a/3rdparty/core-r22.12/src/memory.h b/3rdparty/core-r22.12/src/memory.h
deleted file mode 100644
index fad58db09e3fbe7c33dac07034dd8c5689c280ce..0000000000000000000000000000000000000000
--- a/3rdparty/core-r22.12/src/memory.h
+++ /dev/null
@@ -1,174 +0,0 @@
-// Copyright 2018-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include <vector>
-#include "buffer_attributes.h"
-#include "constants.h"
-#include "status.h"
-
-namespace triton { namespace core {
-
-//
-// Memory used to access data in inference requests
-//
-class Memory {
- public:
-  // Get the 'idx'-th data block in the buffer. Using index to avoid
-  // maintaining internal state such that one buffer can be shared
-  // across multiple providers.
-  // 'idx' zero base index. Valid indices are continuous.
-  // 'byte_size' returns the byte size of the chunk of bytes.
-  // 'memory_type' returns the memory type of the chunk of bytes.
-  // 'memory_type_id' returns the memory type id of the chunk of bytes.
-  // Return the pointer to the data block. Returns nullptr if 'idx' is
-  // out of range
-  virtual const char* BufferAt(
-      size_t idx, size_t* byte_size, TRITONSERVER_MemoryType* memory_type,
-      int64_t* memory_type_id) const = 0;
-
-  // Similar to the above BufferAt but with BufferAttributes.
-  virtual const char* BufferAt(
-      size_t idx, BufferAttributes** buffer_attributes) = 0;
-
-  // Get the number of contiguous buffers composing the memory.
-  size_t BufferCount() const { return buffer_count_; }
-
-  // Return the total byte size of the data buffer
-  size_t TotalByteSize() const { return total_byte_size_; }
-
- protected:
-  Memory() : total_byte_size_(0), buffer_count_(0) {}
-  size_t total_byte_size_;
-  size_t buffer_count_;
-};
-
-//
-// MemoryReference
-//
-class MemoryReference : public Memory {
- public:
-  // Create a read-only data buffer as a reference to other data buffer
-  MemoryReference();
-
-  //\see Memory::BufferAt()
-  const char* BufferAt(
-      size_t idx, size_t* byte_size, TRITONSERVER_MemoryType* memory_type,
-      int64_t* memory_type_id) const override;
-
-  const char* BufferAt(
-      size_t idx, BufferAttributes** buffer_attributes) override;
-
-  // Add a 'buffer' with 'byte_size' as part of this data buffer
-  // Return the index of the buffer
-  size_t AddBuffer(
-      const char* buffer, size_t byte_size, TRITONSERVER_MemoryType memory_type,
-      int64_t memory_type_id);
-
-  size_t AddBuffer(const char* buffer, BufferAttributes* buffer_attributes);
-
-  // Add a 'buffer' with 'byte_size' as part of this data buffer in the front
-  // Return the index of the buffer
-  size_t AddBufferFront(
-      const char* buffer, size_t byte_size, TRITONSERVER_MemoryType memory_type,
-      int64_t memory_type_id);
-
- private:
-  struct Block {
-    Block(
-        const char* buffer, size_t byte_size,
-        TRITONSERVER_MemoryType memory_type, int64_t memory_type_id)
-        : buffer_(buffer), buffer_attributes_(BufferAttributes(
-                               byte_size, memory_type, memory_type_id, nullptr))
-    {
-    }
-
-    Block(const char* buffer, BufferAttributes* buffer_attributes)
-        : buffer_(buffer), buffer_attributes_(*buffer_attributes)
-    {
-    }
-    const char* buffer_;
-    BufferAttributes buffer_attributes_;
-  };
-  std::vector<Block> buffer_;
-};
-
-//
-// MutableMemory
-//
-class MutableMemory : public Memory {
- public:
-  // Create a mutable data buffer referencing to other data buffer.
-  MutableMemory(
-      char* buffer, size_t byte_size, TRITONSERVER_MemoryType memory_type,
-      int64_t memory_type_id);
-
-  virtual ~MutableMemory() {}
-
-  //\see Memory::BufferAt()
-  const char* BufferAt(
-      size_t idx, size_t* byte_size, TRITONSERVER_MemoryType* memory_type,
-      int64_t* memory_type_id) const override;
-
-  //\see Memory::BufferAt()
-  const char* BufferAt(
-      size_t idx, BufferAttributes** buffer_attributes) override;
-
-  // Return a pointer to the base address of the mutable buffer. If
-  // non-null 'memory_type' returns the memory type of the chunk of
-  // bytes. If non-null 'memory_type_id' returns the memory type id of
-  // the chunk of bytes.
-  char* MutableBuffer(
-      TRITONSERVER_MemoryType* memory_type = nullptr,
-      int64_t* memory_type_id = nullptr);
-
-  DISALLOW_COPY_AND_ASSIGN(MutableMemory);
-
- protected:
-  MutableMemory() : Memory() {}
-
-  char* buffer_;
-  BufferAttributes buffer_attributes_;
-};
-
-//
-// AllocatedMemory
-//
-class AllocatedMemory : public MutableMemory {
- public:
-  // Create a continuous data buffer with 'byte_size', 'memory_type' and
-  // 'memory_type_id'. Note that the buffer may be created on different memeory
-  // type and memory type id if the original request type and id can not be
-  // satisfied, thus the function caller should always check the actual memory
-  // type and memory type id before use.
-  AllocatedMemory(
-      size_t byte_size, TRITONSERVER_MemoryType memory_type,
-      int64_t memory_type_id);
-
-  ~AllocatedMemory() override;
-};
-
-}}  // namespace triton::core
diff --git a/3rdparty/core-r22.12/src/metric_family.cc b/3rdparty/core-r22.12/src/metric_family.cc
deleted file mode 100644
index 4ae3a8174088f42f9cc892ac4997e7bde74e9ee3..0000000000000000000000000000000000000000
--- a/3rdparty/core-r22.12/src/metric_family.cc
+++ /dev/null
@@ -1,321 +0,0 @@
-// Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#ifdef TRITON_ENABLE_METRICS
-
-#include "metric_family.h"
-#include "metrics.h"
-#include "triton/common/logging.h"
-
-namespace triton { namespace core {
-
-//
-// Implementation for TRITONSERVER_MetricFamily.
-//
-MetricFamily::MetricFamily(
-    TRITONSERVER_MetricKind kind, const char* name, const char* description)
-{
-  auto registry = Metrics::GetRegistry();
-
-  switch (kind) {
-    case TRITONSERVER_METRIC_KIND_COUNTER:
-      family_ = reinterpret_cast<void*>(&prometheus::BuildCounter()
-                                             .Name(name)
-                                             .Help(description)
-                                             .Register(*registry));
-      break;
-    case TRITONSERVER_METRIC_KIND_GAUGE:
-      family_ = reinterpret_cast<void*>(&prometheus::BuildGauge()
-                                             .Name(name)
-                                             .Help(description)
-                                             .Register(*registry));
-      break;
-    default:
-      throw std::invalid_argument(
-          "Unsupported kind passed to MetricFamily constructor.");
-  }
-
-  kind_ = kind;
-}
-
-void*
-MetricFamily::Add(std::map<std::string, std::string> label_map, Metric* metric)
-{
-  void* prom_metric = nullptr;
-  switch (kind_) {
-    case TRITONSERVER_METRIC_KIND_COUNTER: {
-      auto counter_family_ptr =
-          reinterpret_cast<prometheus::Family<prometheus::Counter>*>(family_);
-      auto counter_ptr = &counter_family_ptr->Add(label_map);
-      prom_metric = reinterpret_cast<void*>(counter_ptr);
-      break;
-    }
-    case TRITONSERVER_METRIC_KIND_GAUGE: {
-      auto gauge_family_ptr =
-          reinterpret_cast<prometheus::Family<prometheus::Gauge>*>(family_);
-      auto gauge_ptr = &gauge_family_ptr->Add(label_map);
-      prom_metric = reinterpret_cast<void*>(gauge_ptr);
-      break;
-    }
-    default:
-      throw std::invalid_argument(
-          "Unsupported family kind passed to Metric constructor.");
-  }
-
-  std::lock_guard<std::mutex> lk(metric_mtx_);
-  ++prom_metric_ref_cnt_[prom_metric];
-  child_metrics_.insert(metric);
-  return prom_metric;
-}
-
-void
-MetricFamily::Remove(void* prom_metric, Metric* metric)
-{
-  {
-    // Remove reference to dependent Metric object
-    std::lock_guard<std::mutex> lk(metric_mtx_);
-    child_metrics_.erase(metric);
-  }
-
-  if (prom_metric == nullptr) {
-    return;
-  }
-
-  {
-    std::lock_guard<std::mutex> lk(metric_mtx_);
-    const auto it = prom_metric_ref_cnt_.find(prom_metric);
-    if (it != prom_metric_ref_cnt_.end()) {
-      --it->second;
-      if (it->second == 0) {
-        prom_metric_ref_cnt_.erase(it);
-      } else {
-        // Done as it is not the last reference
-        return;
-      }
-    }
-  }
-
-  switch (kind_) {
-    case TRITONSERVER_METRIC_KIND_COUNTER: {
-      auto counter_family_ptr =
-          reinterpret_cast<prometheus::Family<prometheus::Counter>*>(family_);
-      auto counter_ptr = reinterpret_cast<prometheus::Counter*>(prom_metric);
-      counter_family_ptr->Remove(counter_ptr);
-      break;
-    }
-    case TRITONSERVER_METRIC_KIND_GAUGE: {
-      auto gauge_family_ptr =
-          reinterpret_cast<prometheus::Family<prometheus::Gauge>*>(family_);
-      auto gauge_ptr = reinterpret_cast<prometheus::Gauge*>(prom_metric);
-      gauge_family_ptr->Remove(gauge_ptr);
-      break;
-    }
-    default:
-      // Invalid kind should be caught in constructor
-      LOG_ERROR << "Unsupported kind in Metric destructor.";
-      break;
-  }
-}
-
-void
-MetricFamily::InvalidateReferences()
-{
-  std::lock_guard<std::mutex> lk(metric_mtx_);
-  for (auto& metric : child_metrics_) {
-    if (metric != nullptr) {
-      metric->Invalidate();
-    }
-  }
-  child_metrics_.clear();
-}
-
-MetricFamily::~MetricFamily()
-{
-  if (NumMetrics() > 0) {
-    LOG_WARNING << "MetricFamily was deleted before its child Metrics, this "
-                   "should not happen. Make sure to delete all child Metrics "
-                   "before deleting their MetricFamily.";
-  }
-  InvalidateReferences();
-  // DLIS-4072: Support for removing metric families from registry
-}
-
-//
-// Implementation for TRITONSERVER_Metric.
-//
-Metric::Metric(
-    TRITONSERVER_MetricFamily* family,
-    std::vector<const InferenceParameter*> labels)
-{
-  family_ = reinterpret_cast<MetricFamily*>(family);
-  kind_ = family_->Kind();
-
-  // Create map of labels from InferenceParameters
-  std::map<std::string, std::string> label_map;
-  for (const auto& param : labels) {
-    if (param->Type() != TRITONSERVER_PARAMETER_STRING) {
-      throw std::invalid_argument(
-          "Parameter [" + param->Name() +
-          "] must have a type of TRITONSERVER_PARAMETER_STRING to be "
-          "added as a label.");
-    }
-
-    label_map[param->Name()] =
-        std::string(reinterpret_cast<const char*>(param->ValuePointer()));
-  }
-
-  metric_ = family_->Add(label_map, this);
-}
-
-Metric::~Metric()
-{
-  if (family_ != nullptr) {
-    family_->Remove(metric_, this);
-  } else {
-    LOG_WARNING << "Corresponding MetricFamily was deleted before this Metric, "
-                   "this should not happen. Make sure to delete a Metric "
-                   "before deleting its MetricFamily.";
-  }
-  // Catch lifetime management / invalid reference issues
-  Invalidate();
-}
-
-void
-Metric::Invalidate()
-{
-  family_ = nullptr;
-  metric_ = nullptr;
-}
-
-TRITONSERVER_Error*
-Metric::Value(double* value)
-{
-  if (metric_ == nullptr) {
-    return TRITONSERVER_ErrorNew(
-        TRITONSERVER_ERROR_INTERNAL,
-        "Could not get metric value. Metric has been invalidated.");
-  }
-
-  switch (kind_) {
-    case TRITONSERVER_METRIC_KIND_COUNTER: {
-      auto counter_ptr = reinterpret_cast<prometheus::Counter*>(metric_);
-      LOG_VERBOSE(1) << "SETTING COUNTER METRIC FROM: " << *value << " to "
-                     << counter_ptr->Value();
-      *value = counter_ptr->Value();
-      break;
-    }
-    case TRITONSERVER_METRIC_KIND_GAUGE: {
-      auto gauge_ptr = reinterpret_cast<prometheus::Gauge*>(metric_);
-      LOG_VERBOSE(1) << "SETTING GAUGE METRIC FROM: " << *value << " to "
-                     << gauge_ptr->Value();
-      *value = gauge_ptr->Value();
-      break;
-    }
-    default:
-      return TRITONSERVER_ErrorNew(
-          TRITONSERVER_ERROR_UNSUPPORTED,
-          "Unsupported TRITONSERVER_MetricKind");
-  }
-
-  return nullptr;  // Success
-}
-
-TRITONSERVER_Error*
-Metric::Increment(double value)
-{
-  if (metric_ == nullptr) {
-    return TRITONSERVER_ErrorNew(
-        TRITONSERVER_ERROR_INTERNAL,
-        "Could not increment metric value. Metric has been invalidated.");
-  }
-
-  switch (kind_) {
-    case TRITONSERVER_METRIC_KIND_COUNTER: {
-      if (value < 0.0) {
-        return TRITONSERVER_ErrorNew(
-            TRITONSERVER_ERROR_INVALID_ARG,
-            "TRITONSERVER_METRIC_KIND_COUNTER can only be incremented "
-            "monotonically by non-negative values.");
-      }
-
-      auto counter_ptr = reinterpret_cast<prometheus::Counter*>(metric_);
-      counter_ptr->Increment(value);
-      break;
-    }
-    case TRITONSERVER_METRIC_KIND_GAUGE: {
-      auto gauge_ptr = reinterpret_cast<prometheus::Gauge*>(metric_);
-      // Gauge::Increment works for both positive and negative values as of
-      // prometheus-cpp v1.0 but for now on v0.7 we defer call to
-      // Increment/Decrement based on the sign of value
-      // https://github.com/jupp0r/prometheus-cpp/blob/master/core/src/gauge.cc
-      if (value < 0.0) {
-        gauge_ptr->Decrement(-1.0 * value);
-      } else {
-        gauge_ptr->Increment(value);
-      }
-      break;
-    }
-    default:
-      return TRITONSERVER_ErrorNew(
-          TRITONSERVER_ERROR_UNSUPPORTED,
-          "Unsupported TRITONSERVER_MetricKind");
-  }
-
-  return nullptr;  // Success
-}
-
-TRITONSERVER_Error*
-Metric::Set(double value)
-{
-  if (metric_ == nullptr) {
-    return TRITONSERVER_ErrorNew(
-        TRITONSERVER_ERROR_INTERNAL,
-        "Could not set metric value. Metric has been invalidated.");
-  }
-
-  switch (kind_) {
-    case TRITONSERVER_METRIC_KIND_COUNTER: {
-      return TRITONSERVER_ErrorNew(
-          TRITONSERVER_ERROR_UNSUPPORTED,
-          "TRITONSERVER_METRIC_KIND_COUNTER does not support Set");
-    }
-    case TRITONSERVER_METRIC_KIND_GAUGE: {
-      auto gauge_ptr = reinterpret_cast<prometheus::Gauge*>(metric_);
-      gauge_ptr->Set(value);
-      break;
-    }
-    default:
-      return TRITONSERVER_ErrorNew(
-          TRITONSERVER_ERROR_UNSUPPORTED,
-          "Unsupported TRITONSERVER_MetricKind");
-  }
-
-  return nullptr;  // Success
-}
-
-}}  // namespace triton::core
-
-#endif  // TRITON_ENABLE_METRICS
diff --git a/3rdparty/core-r22.12/src/metric_family.h b/3rdparty/core-r22.12/src/metric_family.h
deleted file mode 100644
index b5d09d864cf30a90455357b9f42ddf3b11835ad0..0000000000000000000000000000000000000000
--- a/3rdparty/core-r22.12/src/metric_family.h
+++ /dev/null
@@ -1,111 +0,0 @@
-// Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#ifdef TRITON_ENABLE_METRICS
-
-#include <mutex>
-#include <set>
-#include <unordered_map>
-
-#include "infer_parameter.h"
-#include "prometheus/registry.h"
-#include "tritonserver_apis.h"
-
-namespace triton { namespace core {
-
-//
-// Implementation for TRITONSERVER_MetricFamily.
-//
-class Metric;
-class MetricFamily {
- public:
-  MetricFamily(
-      TRITONSERVER_MetricKind kind, const char* name, const char* description);
-  ~MetricFamily();
-
-  void* Family() const { return family_; }
-  TRITONSERVER_MetricKind Kind() const { return kind_; }
-
-  void* Add(std::map<std::string, std::string> label_map, Metric* metric);
-  void Remove(void* prom_metric, Metric* metric);
-
-  int NumMetrics()
-  {
-    std::lock_guard<std::mutex> lk(metric_mtx_);
-    return child_metrics_.size();
-  }
-
- private:
-  // If a MetricFamily is deleted before its dependent Metric, we want to
-  // invalidate the reference so we don't access invalid memory.
-  void InvalidateReferences();
-
-  void* family_;
-  TRITONSERVER_MetricKind kind_;
-  // Synchronize access of related metric objects
-  std::mutex metric_mtx_;
-  // Prometheus returns the existing metric pointer if the metric with the same
-  // set of labels are requested, as a result, different Metric objects may
-  // refer to the same prometheus metric. So we must track the reference count
-  // of the metric and request prometheus to remove it only when all references
-  // are released.
-  std::unordered_map<void*, size_t> prom_metric_ref_cnt_;
-  // Maintain references to metrics created from this metric family to
-  // invalidate their references if a family is deleted before its metric
-  std::set<Metric*> child_metrics_;
-};
-
-//
-// Implementation for TRITONSERVER_Metric.
-//
-class Metric {
- public:
-  Metric(
-      TRITONSERVER_MetricFamily* family,
-      std::vector<const InferenceParameter*> labels);
-  ~Metric();
-
-  MetricFamily* Family() const { return family_; }
-  TRITONSERVER_MetricKind Kind() const { return kind_; }
-
-  TRITONSERVER_Error* Value(double* value);
-  TRITONSERVER_Error* Increment(double value);
-  TRITONSERVER_Error* Set(double value);
-
-  // If a MetricFamily is deleted before its dependent Metric, we want to
-  // invalidate the references so we don't access invalid memory.
-  void Invalidate();
-
- private:
-  void* metric_;
-  MetricFamily* family_;
-  TRITONSERVER_MetricKind kind_;
-};
-
-}}  // namespace triton::core
-
-#endif  // TRITON_ENABLE_METRICS
diff --git a/3rdparty/core-r22.12/src/metric_model_reporter.cc b/3rdparty/core-r22.12/src/metric_model_reporter.cc
deleted file mode 100644
index 5f0905ef7331b23bdba95009f2dea16595c7077c..0000000000000000000000000000000000000000
--- a/3rdparty/core-r22.12/src/metric_model_reporter.cc
+++ /dev/null
@@ -1,168 +0,0 @@
-// Copyright 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "metric_model_reporter.h"
-
-#ifdef TRITON_ENABLE_METRICS
-
-#include "constants.h"
-#include "metrics.h"
-
-namespace triton { namespace core {
-
-Status
-MetricModelReporter::Create(
-    const std::string& model_name, const int64_t model_version,
-    const int device, const triton::common::MetricTagsMap& model_tags,
-    std::shared_ptr<MetricModelReporter>* metric_model_reporter)
-{
-  static std::mutex mtx;
-  static std::unordered_map<size_t, std::weak_ptr<MetricModelReporter>>
-      reporter_map;
-
-  std::map<std::string, std::string> labels;
-  GetMetricLabels(&labels, model_name, model_version, device, model_tags);
-  auto hash_labels = Metrics::HashLabels(labels);
-
-  std::lock_guard<std::mutex> lock(mtx);
-
-  const auto& itr = reporter_map.find(hash_labels);
-  if (itr != reporter_map.end()) {
-    // Found in map. If the weak_ptr is still valid that means that
-    // there are other models using the reporter and we just reuse that
-    // same reporter. If the weak_ptr is not valid then we need to remove
-    // the weak_ptr from the map and create the reporter again.
-    *metric_model_reporter = itr->second.lock();
-    if (*metric_model_reporter != nullptr) {
-      return Status::Success;
-    }
-
-    reporter_map.erase(itr);
-  }
-
-  metric_model_reporter->reset(
-      new MetricModelReporter(model_name, model_version, device, model_tags));
-  reporter_map.insert({hash_labels, *metric_model_reporter});
-  return Status::Success;
-}
-
-MetricModelReporter::MetricModelReporter(
-    const std::string& model_name, const int64_t model_version,
-    const int device, const triton::common::MetricTagsMap& model_tags)
-{
-  std::map<std::string, std::string> labels;
-  GetMetricLabels(&labels, model_name, model_version, device, model_tags);
-
-  metric_inf_success_ =
-      CreateCounterMetric(Metrics::FamilyInferenceSuccess(), labels);
-  metric_inf_failure_ =
-      CreateCounterMetric(Metrics::FamilyInferenceFailure(), labels);
-  metric_inf_count_ =
-      CreateCounterMetric(Metrics::FamilyInferenceCount(), labels);
-  metric_inf_exec_count_ =
-      CreateCounterMetric(Metrics::FamilyInferenceExecutionCount(), labels);
-  metric_inf_request_duration_us_ =
-      CreateCounterMetric(Metrics::FamilyInferenceRequestDuration(), labels);
-  metric_inf_queue_duration_us_ =
-      CreateCounterMetric(Metrics::FamilyInferenceQueueDuration(), labels);
-  metric_inf_compute_input_duration_us_ = CreateCounterMetric(
-      Metrics::FamilyInferenceComputeInputDuration(), labels);
-  metric_inf_compute_infer_duration_us_ = CreateCounterMetric(
-      Metrics::FamilyInferenceComputeInferDuration(), labels);
-  metric_inf_compute_output_duration_us_ = CreateCounterMetric(
-      Metrics::FamilyInferenceComputeOutputDuration(), labels);
-  metric_cache_hit_count_ =
-      CreateCounterMetric(Metrics::FamilyCacheHitCount(), labels);
-  metric_cache_hit_lookup_duration_us_ =
-      CreateCounterMetric(Metrics::FamilyCacheHitLookupDuration(), labels);
-  metric_cache_miss_count_ =
-      CreateCounterMetric(Metrics::FamilyCacheMissCount(), labels);
-  metric_cache_miss_lookup_duration_us_ =
-      CreateCounterMetric(Metrics::FamilyCacheMissLookupDuration(), labels);
-  metric_cache_miss_insertion_duration_us_ =
-      CreateCounterMetric(Metrics::FamilyCacheMissInsertionDuration(), labels);
-}
-
-MetricModelReporter::~MetricModelReporter()
-{
-  Metrics::FamilyInferenceSuccess().Remove(metric_inf_success_);
-  Metrics::FamilyInferenceFailure().Remove(metric_inf_failure_);
-  Metrics::FamilyInferenceCount().Remove(metric_inf_count_);
-  Metrics::FamilyInferenceExecutionCount().Remove(metric_inf_exec_count_);
-  Metrics::FamilyInferenceRequestDuration().Remove(
-      metric_inf_request_duration_us_);
-  Metrics::FamilyInferenceQueueDuration().Remove(metric_inf_queue_duration_us_);
-  Metrics::FamilyInferenceComputeInputDuration().Remove(
-      metric_inf_compute_input_duration_us_);
-  Metrics::FamilyInferenceComputeInferDuration().Remove(
-      metric_inf_compute_infer_duration_us_);
-  Metrics::FamilyInferenceComputeOutputDuration().Remove(
-      metric_inf_compute_output_duration_us_);
-  Metrics::FamilyCacheHitCount().Remove(metric_cache_hit_count_);
-  Metrics::FamilyCacheHitLookupDuration().Remove(
-      metric_cache_hit_lookup_duration_us_);
-  Metrics::FamilyCacheMissCount().Remove(metric_cache_miss_count_);
-  Metrics::FamilyCacheMissInsertionDuration().Remove(
-      metric_cache_miss_insertion_duration_us_);
-}
-
-void
-MetricModelReporter::GetMetricLabels(
-    std::map<std::string, std::string>* labels, const std::string& model_name,
-    const int64_t model_version, const int device,
-    const triton::common::MetricTagsMap& model_tags)
-{
-  labels->insert(std::map<std::string, std::string>::value_type(
-      std::string(kMetricsLabelModelName), model_name));
-  labels->insert(std::map<std::string, std::string>::value_type(
-      std::string(kMetricsLabelModelVersion), std::to_string(model_version)));
-  for (const auto& tag : model_tags) {
-    labels->insert(std::map<std::string, std::string>::value_type(
-        "_" + tag.first, tag.second));
-  }
-
-  // 'device' can be < 0 to indicate that the GPU is not known. In
-  // that case use a metric that doesn't have the gpu_uuid label.
-  if (device >= 0) {
-    std::string uuid;
-    if (Metrics::UUIDForCudaDevice(device, &uuid)) {
-      labels->insert(std::map<std::string, std::string>::value_type(
-          std::string(kMetricsLabelGpuUuid), uuid));
-    }
-  }
-}
-
-prometheus::Counter*
-MetricModelReporter::CreateCounterMetric(
-    prometheus::Family<prometheus::Counter>& family,
-    const std::map<std::string, std::string>& labels)
-{
-  return &family.Add(labels);
-}
-
-}}  // namespace triton::core
-
-#endif  // TRITON_ENABLE_METRICS
diff --git a/3rdparty/core-r22.12/src/metric_model_reporter.h b/3rdparty/core-r22.12/src/metric_model_reporter.h
deleted file mode 100644
index 282152828fda4dc1df9932d42030dea93941120f..0000000000000000000000000000000000000000
--- a/3rdparty/core-r22.12/src/metric_model_reporter.h
+++ /dev/null
@@ -1,138 +0,0 @@
-// Copyright 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include "status.h"
-#include "triton/common/model_config.h"
-
-#ifdef TRITON_ENABLE_METRICS
-#include "prometheus/registry.h"
-#endif  // TRITON_ENABLE_METRICS
-
-namespace triton { namespace core {
-
-//
-// Interface for a metric reporter for a given version of a model.
-//
-class MetricModelReporter {
- public:
-#ifdef TRITON_ENABLE_METRICS
-  static Status Create(
-      const std::string& model_name, const int64_t model_version,
-      const int device, const triton::common::MetricTagsMap& model_tags,
-      std::shared_ptr<MetricModelReporter>* metric_model_reporter);
-
-  ~MetricModelReporter();
-
-  // Get a metric for the given model, version and GPU index.
-  prometheus::Counter& MetricInferenceSuccess() const
-  {
-    return *metric_inf_success_;
-  }
-  prometheus::Counter& MetricInferenceFailure() const
-  {
-    return *metric_inf_failure_;
-  }
-  prometheus::Counter& MetricInferenceCount() const
-  {
-    return *metric_inf_count_;
-  }
-  prometheus::Counter& MetricInferenceExecutionCount() const
-  {
-    return *metric_inf_exec_count_;
-  }
-  prometheus::Counter& MetricInferenceRequestDuration() const
-  {
-    return *metric_inf_request_duration_us_;
-  }
-  prometheus::Counter& MetricInferenceQueueDuration() const
-  {
-    return *metric_inf_queue_duration_us_;
-  }
-  prometheus::Counter& MetricInferenceComputeInputDuration() const
-  {
-    return *metric_inf_compute_input_duration_us_;
-  }
-  prometheus::Counter& MetricInferenceComputeInferDuration() const
-  {
-    return *metric_inf_compute_infer_duration_us_;
-  }
-  prometheus::Counter& MetricInferenceComputeOutputDuration() const
-  {
-    return *metric_inf_compute_output_duration_us_;
-  }
-  prometheus::Counter& MetricCacheHitCount() const
-  {
-    return *metric_cache_hit_count_;
-  }
-  prometheus::Counter& MetricCacheHitLookupDuration() const
-  {
-    return *metric_cache_hit_lookup_duration_us_;
-  }
-  prometheus::Counter& MetricCacheMissCount() const
-  {
-    return *metric_cache_miss_count_;
-  }
-  prometheus::Counter& MetricCacheMissLookupDuration() const
-  {
-    return *metric_cache_miss_lookup_duration_us_;
-  }
-  prometheus::Counter& MetricCacheMissInsertionDuration() const
-  {
-    return *metric_cache_miss_insertion_duration_us_;
-  }
-
- private:
-  MetricModelReporter(
-      const std::string& model_name, const int64_t model_version,
-      const int device, const triton::common::MetricTagsMap& model_tags);
-
-  static void GetMetricLabels(
-      std::map<std::string, std::string>* labels, const std::string& model_name,
-      const int64_t model_version, const int device,
-      const triton::common::MetricTagsMap& model_tags);
-  prometheus::Counter* CreateCounterMetric(
-      prometheus::Family<prometheus::Counter>& family,
-      const std::map<std::string, std::string>& labels);
-
-  prometheus::Counter* metric_inf_success_;
-  prometheus::Counter* metric_inf_failure_;
-  prometheus::Counter* metric_inf_count_;
-  prometheus::Counter* metric_inf_exec_count_;
-  prometheus::Counter* metric_inf_request_duration_us_;
-  prometheus::Counter* metric_inf_queue_duration_us_;
-  prometheus::Counter* metric_inf_compute_input_duration_us_;
-  prometheus::Counter* metric_inf_compute_infer_duration_us_;
-  prometheus::Counter* metric_inf_compute_output_duration_us_;
-  prometheus::Counter* metric_cache_hit_count_;
-  prometheus::Counter* metric_cache_hit_lookup_duration_us_;
-  prometheus::Counter* metric_cache_miss_count_;
-  prometheus::Counter* metric_cache_miss_lookup_duration_us_;
-  prometheus::Counter* metric_cache_miss_insertion_duration_us_;
-#endif  // TRITON_ENABLE_METRICS
-};
-
-}}  // namespace triton::core
diff --git a/3rdparty/core-r22.12/src/metrics.cc b/3rdparty/core-r22.12/src/metrics.cc
deleted file mode 100644
index 0be4907e9b7b000ba54b677133a9e8b3e6814dcf..0000000000000000000000000000000000000000
--- a/3rdparty/core-r22.12/src/metrics.cc
+++ /dev/null
@@ -1,1035 +0,0 @@
-// Copyright 2018-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-
-#ifdef TRITON_ENABLE_METRICS
-
-#include "metrics.h"
-
-#include <thread>
-#include "constants.h"
-#include "prometheus/detail/utils.h"
-#include "triton/common/logging.h"
-
-#ifdef TRITON_ENABLE_METRICS_GPU
-#include <cuda_runtime_api.h>
-#include <dcgm_agent.h>
-#include <cstring>
-#include <set>
-#include <string>
-#endif  // TRITON_ENABLE_METRICS_GPU
-
-namespace triton { namespace core {
-
-Metrics::Metrics()
-    : registry_(std::make_shared<prometheus::Registry>()),
-      serializer_(new prometheus::TextSerializer()),
-      inf_success_family_(
-          prometheus::BuildCounter()
-              .Name("nv_inference_request_success")
-              .Help("Number of successful inference requests, all batch sizes")
-              .Register(*registry_)),
-      inf_failure_family_(
-          prometheus::BuildCounter()
-              .Name("nv_inference_request_failure")
-              .Help("Number of failed inference requests, all batch sizes")
-              .Register(*registry_)),
-      inf_count_family_(prometheus::BuildCounter()
-                            .Name("nv_inference_count")
-                            .Help("Number of inferences performed (does not "
-                                  "include cached requests)")
-                            .Register(*registry_)),
-      inf_count_exec_family_(prometheus::BuildCounter()
-                                 .Name("nv_inference_exec_count")
-                                 .Help("Number of model executions performed "
-                                       "(does not include cached requests)")
-                                 .Register(*registry_)),
-      inf_request_duration_us_family_(
-          prometheus::BuildCounter()
-              .Name("nv_inference_request_duration_us")
-              .Help("Cumulative inference request duration in microseconds "
-                    "(includes cached requests)")
-              .Register(*registry_)),
-      inf_queue_duration_us_family_(
-          prometheus::BuildCounter()
-              .Name("nv_inference_queue_duration_us")
-              .Help("Cumulative inference queuing duration in microseconds "
-                    "(includes cached requests)")
-              .Register(*registry_)),
-      inf_compute_input_duration_us_family_(
-          prometheus::BuildCounter()
-              .Name("nv_inference_compute_input_duration_us")
-              .Help("Cumulative compute input duration in microseconds (does "
-                    "not include cached requests)")
-              .Register(*registry_)),
-      inf_compute_infer_duration_us_family_(
-          prometheus::BuildCounter()
-              .Name("nv_inference_compute_infer_duration_us")
-              .Help("Cumulative compute inference duration in microseconds "
-                    "(does not include cached requests)")
-              .Register(*registry_)),
-      inf_compute_output_duration_us_family_(
-          prometheus::BuildCounter()
-              .Name("nv_inference_compute_output_duration_us")
-              .Help("Cumulative inference compute output duration in "
-                    "microseconds (does not include cached requests)")
-              .Register(*registry_)),
-      cache_num_entries_family_(
-          prometheus::BuildGauge()
-              .Name("nv_cache_num_entries")
-              .Help("Number of responses stored in response cache")
-              .Register(*registry_)),
-      cache_num_lookups_family_(
-          prometheus::BuildGauge()
-              .Name("nv_cache_num_lookups")
-              .Help("Number of cache lookups in response cache")
-              .Register(*registry_)),
-      cache_num_hits_family_(prometheus::BuildGauge()
-                                 .Name("nv_cache_num_hits")
-                                 .Help("Number of cache hits in response cache")
-                                 .Register(*registry_)),
-      cache_num_misses_family_(
-          prometheus::BuildGauge()
-              .Name("nv_cache_num_misses")
-              .Help("Number of cache misses in response cache")
-              .Register(*registry_)),
-      cache_num_evictions_family_(
-          prometheus::BuildGauge()
-              .Name("nv_cache_num_evictions")
-              .Help("Number of cache evictions in response cache")
-              .Register(*registry_)),
-      cache_lookup_duration_us_family_(
-          prometheus::BuildGauge()
-              .Name("nv_cache_lookup_duration")
-              .Help(
-                  "Total cache lookup duration (hit and miss), in microseconds")
-              .Register(*registry_)),
-      cache_insertion_duration_us_family_(
-          prometheus::BuildGauge()
-              .Name("nv_cache_insertion_duration")
-              .Help("Total cache insertion duration, in microseconds")
-              .Register(*registry_)),
-      cache_util_family_(prometheus::BuildGauge()
-                             .Name("nv_cache_util")
-                             .Help("Cache utilization [0.0 - 1.0]")
-                             .Register(*registry_)),
-      // Per-model cache metric families
-      cache_num_hits_model_family_(prometheus::BuildCounter()
-                                       .Name("nv_cache_num_hits_per_model")
-                                       .Help("Number of cache hits per model")
-                                       .Register(*registry_)),
-      cache_hit_lookup_duration_us_model_family_(
-          prometheus::BuildCounter()
-              .Name("nv_cache_hit_lookup_duration_per_model")
-              .Help(
-                  "Total cache hit lookup duration per model, in microseconds")
-              .Register(*registry_)),
-      cache_num_misses_model_family_(
-          prometheus::BuildCounter()
-              .Name("nv_cache_num_misses_per_model")
-              .Help("Number of cache misses per model")
-              .Register(*registry_)),
-      cache_miss_lookup_duration_us_model_family_(
-          prometheus::BuildCounter()
-              .Name("nv_cache_miss_lookup_duration_per_model")
-              .Help(
-                  "Total cache miss lookup duration per model, in microseconds")
-              .Register(*registry_)),
-      cache_miss_insertion_duration_us_model_family_(
-          prometheus::BuildCounter()
-              .Name("nv_cache_miss_insertion_duration_per_model")
-              .Help("Total cache miss insertion duration per model, in "
-                    "microseconds")
-              .Register(*registry_)),
-
-#ifdef TRITON_ENABLE_METRICS_GPU
-      gpu_utilization_family_(prometheus::BuildGauge()
-                                  .Name("nv_gpu_utilization")
-                                  .Help("GPU utilization rate [0.0 - 1.0)")
-                                  .Register(*registry_)),
-      gpu_memory_total_family_(prometheus::BuildGauge()
-                                   .Name("nv_gpu_memory_total_bytes")
-                                   .Help("GPU total memory, in bytes")
-                                   .Register(*registry_)),
-      gpu_memory_used_family_(prometheus::BuildGauge()
-                                  .Name("nv_gpu_memory_used_bytes")
-                                  .Help("GPU used memory, in bytes")
-                                  .Register(*registry_)),
-      gpu_power_usage_family_(prometheus::BuildGauge()
-                                  .Name("nv_gpu_power_usage")
-                                  .Help("GPU power usage in watts")
-                                  .Register(*registry_)),
-      gpu_power_limit_family_(prometheus::BuildGauge()
-                                  .Name("nv_gpu_power_limit")
-                                  .Help("GPU power management limit in watts")
-                                  .Register(*registry_)),
-      gpu_energy_consumption_family_(
-          prometheus::BuildCounter()
-              .Name("nv_energy_consumption")
-              .Help("GPU energy consumption in joules since the Triton Server "
-                    "started")
-              .Register(*registry_)),
-#endif  // TRITON_ENABLE_METRICS_GPU
-
-#ifdef TRITON_ENABLE_METRICS_CPU
-      cpu_utilization_family_(prometheus::BuildGauge()
-                                  .Name("nv_cpu_utilization")
-                                  .Help("CPU utilization rate [0.0 - 1.0]")
-                                  .Register(*registry_)),
-      cpu_memory_total_family_(prometheus::BuildGauge()
-                                   .Name("nv_cpu_memory_total_bytes")
-                                   .Help("CPU total memory (RAM), in bytes")
-                                   .Register(*registry_)),
-      cpu_memory_used_family_(prometheus::BuildGauge()
-                                  .Name("nv_cpu_memory_used_bytes")
-                                  .Help("CPU used memory (RAM), in bytes")
-                                  .Register(*registry_)),
-#endif  // TRITON_ENABLE_METRICS_CPU
-
-      metrics_enabled_(false), gpu_metrics_enabled_(false),
-      cpu_metrics_enabled_(false), cache_metrics_enabled_(false),
-      metrics_interval_ms_(2000)
-{
-}
-
-static prometheus::detail::LabelHasher label_hasher_;
-
-size_t
-Metrics::HashLabels(const std::map<std::string, std::string>& labels)
-{
-  return label_hasher_(labels);
-}
-
-Metrics::~Metrics()
-{
-  // Signal the cache thread to exit and then wait for it...
-  if (poll_thread_ != nullptr) {
-    poll_thread_exit_.store(true);
-    poll_thread_->join();
-#ifdef TRITON_ENABLE_METRICS_GPU
-    if (dcgm_metadata_.dcgm_initialized_) {
-      dcgmReturn_t derr;
-      // Group destroy will return an error if groupId invalid or dcgm not
-      // initialized or configured correctly
-      derr = dcgmGroupDestroy(
-          dcgm_metadata_.dcgm_handle_, dcgm_metadata_.groupId_);
-      if (derr != DCGM_ST_OK) {
-        LOG_WARNING << "Unable to destroy DCGM group: " << errorString(derr);
-      }
-
-      // Stop and shutdown DCGM
-      if (dcgm_metadata_.standalone_) {
-        derr = dcgmDisconnect(dcgm_metadata_.dcgm_handle_);
-      } else {
-        derr = dcgmStopEmbedded(dcgm_metadata_.dcgm_handle_);
-      }
-      if (derr != DCGM_ST_OK) {
-        LOG_WARNING << "Unable to stop DCGM: " << errorString(derr);
-      }
-      derr = dcgmShutdown();
-      if (derr != DCGM_ST_OK) {
-        LOG_WARNING << "Unable to shutdown DCGM: " << errorString(derr);
-      }
-    }
-#endif  // TRITON_ENABLE_METRICS_GPU
-  }
-}
-
-bool
-Metrics::Enabled()
-{
-  auto singleton = GetSingleton();
-  return singleton->metrics_enabled_;
-}
-
-void
-Metrics::EnableMetrics()
-{
-  auto singleton = GetSingleton();
-  singleton->metrics_enabled_ = true;
-}
-
-void
-Metrics::EnableCacheMetrics(
-    std::shared_ptr<RequestResponseCache> response_cache)
-{
-  auto singleton = GetSingleton();
-  // Ensure thread-safe enabling of Cache Metrics
-  std::lock_guard<std::mutex> lock(singleton->metrics_enabling_);
-  if (singleton->cache_metrics_enabled_) {
-    return;
-  }
-
-  singleton->InitializeCacheMetrics(response_cache);
-  singleton->cache_metrics_enabled_ = true;
-}
-
-void
-Metrics::EnableGPUMetrics()
-{
-  auto singleton = GetSingleton();
-  // Ensure thread-safe enabling of GPU Metrics
-  std::lock_guard<std::mutex> lock(singleton->metrics_enabling_);
-  if (singleton->gpu_metrics_enabled_) {
-    return;
-  }
-
-  if (std::getenv("TRITON_SERVER_CPU_ONLY") == nullptr) {
-    singleton->InitializeDcgmMetrics();
-  }
-
-  singleton->gpu_metrics_enabled_ = true;
-}
-
-void
-Metrics::EnableCpuMetrics()
-{
-  auto singleton = GetSingleton();
-  // Ensure thread-safe enabling of CPU Metrics
-  std::lock_guard<std::mutex> lock(singleton->metrics_enabling_);
-  if (singleton->cpu_metrics_enabled_) {
-    return;
-  }
-
-  singleton->InitializeCpuMetrics();
-  singleton->cpu_metrics_enabled_ = true;
-}
-
-void
-Metrics::SetMetricsInterval(uint64_t metrics_interval_ms)
-{
-  auto singleton = GetSingleton();
-  singleton->metrics_interval_ms_ = metrics_interval_ms;
-}
-
-void
-Metrics::StartPollingThreadSingleton(
-    std::shared_ptr<RequestResponseCache> response_cache)
-{
-  auto singleton = GetSingleton();
-
-  // Ensure thread-safe start of polling thread
-  std::lock_guard<std::mutex> lock(singleton->poll_thread_starting_);
-  if (singleton->poll_thread_started_) {
-    return;
-  }
-
-  // Start thread for polling cache/dcgm metrics
-  singleton->StartPollingThread(response_cache);
-
-  // Toggle flag so this function is only executed once
-  singleton->poll_thread_started_ = true;
-}
-
-bool
-Metrics::StartPollingThread(
-    std::shared_ptr<RequestResponseCache> response_cache)
-{
-  // Nothing to poll if no polling metrics enabled, don't spawn a thread
-  if (!cache_metrics_enabled_ && !gpu_metrics_enabled_ &&
-      !cpu_metrics_enabled_) {
-    LOG_WARNING << "No polling metrics (CPU, GPU, Cache) are enabled. Will not "
-                   "poll for them.";
-    return false;
-  }
-  poll_thread_exit_.store(false);
-
-  // Start a separate thread for polling metrics at specified interval
-  poll_thread_.reset(new std::thread([this, response_cache] {
-    // Thread will update metrics indefinitely until exit flag set
-    while (!poll_thread_exit_.load()) {
-      // Sleep for metric interval
-      std::this_thread::sleep_for(
-          std::chrono::milliseconds(metrics_interval_ms_ / 2));
-
-      // Poll Response Cache metrics
-      if (cache_metrics_enabled_ && response_cache != nullptr) {
-        PollCacheMetrics(response_cache);
-      }
-
-#ifdef TRITON_ENABLE_METRICS_GPU
-      // Poll DCGM GPU metrics
-      if (gpu_metrics_enabled_ &&
-          dcgm_metadata_.available_cuda_gpu_ids_.size() > 0) {
-        PollDcgmMetrics();
-      }
-#endif  // TRITON_ENABLE_METRICS_GPU
-
-#ifdef TRITON_ENABLE_METRICS_CPU
-      if (cpu_metrics_enabled_) {
-        PollCpuMetrics();
-      }
-#endif  // TRITON_ENABLE_METRICS_CPU
-    }
-  }));
-
-  return true;
-}
-
-bool
-Metrics::PollCacheMetrics(std::shared_ptr<RequestResponseCache> response_cache)
-{
-  if (response_cache == nullptr) {
-    LOG_WARNING << "error polling cache metrics, cache metrics will not be "
-                << "available: cache was nullptr";
-    return false;
-  }
-
-  // Update global cache metrics
-  cache_num_entries_global_->Set(response_cache->NumEntries());
-  cache_num_lookups_global_->Set(response_cache->NumLookups());
-  cache_num_hits_global_->Set(response_cache->NumHits());
-  cache_num_misses_global_->Set(response_cache->NumMisses());
-  cache_num_evictions_global_->Set(response_cache->NumEvictions());
-  cache_lookup_duration_us_global_->Set(
-      response_cache->TotalLookupLatencyNs() / 1000);
-  cache_insertion_duration_us_global_->Set(
-      response_cache->TotalInsertionLatencyNs() / 1000);
-  cache_util_global_->Set(response_cache->TotalUtilization());
-  return true;
-}
-
-#ifdef TRITON_ENABLE_METRICS_CPU
-Status
-Metrics::ParseCpuInfo(CpuInfo& info)
-{
-#ifdef _WIN32
-  return Status(
-      Status::Code::INTERNAL, "CPU metrics not supported on Windows.");
-#else
-  std::ifstream ifs("/proc/stat");
-  if (!ifs.good()) {
-    return Status(Status::Code::INTERNAL, "Failed to open /proc/stat.");
-  }
-
-  std::string line;
-  // Verify first line is aggregate cpu line
-  std::getline(ifs, line);
-  if (line.rfind("cpu ", 0) == std::string::npos) {
-    return Status(
-        Status::Code::INTERNAL,
-        "Failed to find aggregate CPU info in /proc/stat.");
-  }
-
-  std::string _;
-  std::istringstream iss(line);
-  // Use _ to skip "cpu" at start of line
-  if (!(iss >> _ >> info)) {
-    return Status(
-        Status::Code::INTERNAL,
-        "Failed to parse aggregate CPU info in /proc/stat.");
-  }
-  return Status::Success;
-#endif  // OS
-}
-
-Status
-Metrics::ParseMemInfo(MemInfo& info)
-{
-#ifdef _WIN32
-  return Status(
-      Status::Code::INTERNAL, "Memory metrics not supported on Windows.");
-#else
-  std::ifstream ifs("/proc/meminfo");
-  if (!ifs.good()) {
-    return Status(Status::Code::INTERNAL, "Failed to open /proc/meminfo.");
-  }
-
-  std::string line;
-  constexpr uint64_t KB = 1024;
-  while (std::getline(ifs, line)) {
-    std::istringstream iss(line);
-    std::string name;
-    uint64_t value = 0;
-    if (iss >> name >> value) {
-      name.pop_back();
-      info[name] = value * KB;
-    } else {
-      return Status(
-          Status::Code::INTERNAL, "Encountered error parsing /proc/meminfo.");
-    }
-  }
-
-  if (info.find("MemTotal") == info.end() ||
-      info.find("MemAvailable") == info.end()) {
-    return Status(
-        Status::Code::INTERNAL,
-        "Failed to find desired values in /proc/meminfo.");
-  }
-
-  if (info["MemAvailable"] > info["MemTotal"]) {
-    return Status(
-        Status::Code::INTERNAL,
-        "Available bytes shouldn't be greater than Total bytes");
-  }
-
-  // "Used" memory can be defined in many different ways. While many
-  // older applications consider "used = total - (free + cached)", a more
-  // accurate measure of available memory "MemAvailable" was added,
-  // so we choose "used = total - available" for a more accurate measure.
-  // This may change in the future if not sufficient for most use cases.
-  // See https://stackoverflow.com/a/35019697.
-  info["MemUsed"] = info["MemTotal"] - info["MemAvailable"];
-
-  return Status::Success;
-#endif  // OS
-}
-
-double
-Metrics::CpuUtilization(const CpuInfo& info_new, const CpuInfo& info_old)
-{
-  // Account for overflow
-  const auto wrap_sub = [](uint64_t a, uint64_t b) {
-    return (a > b) ? (a - b) : 0;
-  };
-  uint64_t util_diff = wrap_sub(info_new.user, info_old.user) +
-                       wrap_sub(info_new.nice, info_old.nice) +
-                       wrap_sub(info_new.system, info_old.system) +
-                       wrap_sub(info_new.irq, info_old.irq) +
-                       wrap_sub(info_new.softirq, info_old.softirq) +
-                       wrap_sub(info_new.steal, info_old.steal);
-  uint64_t idle_diff = wrap_sub(info_new.idle, info_old.idle) +
-                       wrap_sub(info_new.iowait, info_old.iowait);
-  double util_ratio = static_cast<double>(util_diff) / (util_diff + idle_diff);
-  return util_ratio;
-}
-#endif  // TRITON_ENABLE_METRICS_CPU
-
-bool
-Metrics::PollCpuMetrics()
-{
-#ifndef TRITON_ENABLE_METRICS_CPU
-  return false;
-#else
-  // CPU Utilization
-  double cpu_util = 0.0;
-  auto cpu_info = CpuInfo();
-  auto status = ParseCpuInfo(cpu_info);
-  if (status.IsOk()) {
-    cpu_util = CpuUtilization(cpu_info, last_cpu_info_);
-    last_cpu_info_ = cpu_info;
-  }
-  cpu_utilization_->Set(cpu_util);  // [0.0, 1.0]
-
-  // RAM / Memory
-  double mem_total_bytes = 0.0;
-  double mem_used_bytes = 0.0;
-  auto mem_info = MemInfo();
-  status = ParseMemInfo(mem_info);
-  if (status.IsOk()) {
-    // MemTotal will usually not change over time, but if something
-    // goes wrong when querying memory, we can reflect that by updating.
-    mem_total_bytes = mem_info["MemTotal"];
-    mem_used_bytes = mem_info["MemUsed"];
-  }
-
-  cpu_memory_total_->Set(mem_total_bytes);
-  cpu_memory_used_->Set(mem_used_bytes);
-
-  return true;
-#endif  // TRITON_ENABLE_METRICS_CPU
-}
-
-bool
-Metrics::PollDcgmMetrics()
-{
-#ifndef TRITON_ENABLE_METRICS_GPU
-  return false;
-#else
-
-  if (dcgm_metadata_.available_cuda_gpu_ids_.size() == 0) {
-    LOG_WARNING << "error polling GPU metrics, GPU metrics will not be "
-                << "available: no available gpus to poll";
-    return false;
-  }
-
-  dcgmUpdateAllFields(dcgm_metadata_.dcgm_handle_, 1 /* wait for update*/);
-  for (unsigned int didx = 0;
-       didx < dcgm_metadata_.available_cuda_gpu_ids_.size(); ++didx) {
-    uint32_t cuda_id = dcgm_metadata_.available_cuda_gpu_ids_[didx];
-    if (dcgm_metadata_.cuda_ids_to_dcgm_ids_.count(cuda_id) <= 0) {
-      LOG_WARNING << "Cannot find DCGM id for CUDA id " << cuda_id;
-      continue;
-    }
-    uint32_t dcgm_id = dcgm_metadata_.cuda_ids_to_dcgm_ids_.at(cuda_id);
-    dcgmFieldValue_v1 field_values[dcgm_metadata_.field_count_];
-    dcgmReturn_t dcgmerr = dcgmGetLatestValuesForFields(
-        dcgm_metadata_.dcgm_handle_, dcgm_id, dcgm_metadata_.fields_.data(),
-        dcgm_metadata_.field_count_, field_values);
-
-    if (dcgmerr != DCGM_ST_OK) {
-      dcgm_metadata_.power_limit_fail_cnt_[didx]++;
-      dcgm_metadata_.power_usage_fail_cnt_[didx]++;
-      dcgm_metadata_.energy_fail_cnt_[didx]++;
-      dcgm_metadata_.util_fail_cnt_[didx]++;
-      dcgm_metadata_.mem_fail_cnt_[didx]++;
-      LOG_WARNING << "Unable to get field values for GPU ID " << cuda_id << ": "
-                  << errorString(dcgmerr);
-    } else {
-      // Power limit
-      if (dcgm_metadata_.power_limit_fail_cnt_[didx] <
-          dcgm_metadata_.fail_threshold_) {
-        double power_limit = field_values[0].value.dbl;
-        if ((field_values[0].status == DCGM_ST_OK) &&
-            (!DCGM_FP64_IS_BLANK(power_limit))) {
-          dcgm_metadata_.power_limit_fail_cnt_[didx] = 0;
-        } else {
-          dcgm_metadata_.power_limit_fail_cnt_[didx]++;
-          power_limit = 0;
-          dcgmReturn_t status = dcgmReturn_t(field_values[0].status);
-          LOG_WARNING << "Unable to get power limit for GPU " << cuda_id
-                      << ". Status:" << errorString(status)
-                      << ", value:" << dcgmValueToErrorMessage(power_limit);
-        }
-        gpu_power_limit_[didx]->Set(power_limit);
-      }
-
-      // Power usage
-      if (dcgm_metadata_.power_usage_fail_cnt_[didx] <
-          dcgm_metadata_.fail_threshold_) {
-        double power_usage = field_values[1].value.dbl;
-        if ((field_values[1].status == DCGM_ST_OK) &&
-            (!DCGM_FP64_IS_BLANK(power_usage))) {
-          dcgm_metadata_.power_usage_fail_cnt_[didx] = 0;
-        } else {
-          dcgm_metadata_.power_usage_fail_cnt_[didx]++;
-          power_usage = 0;
-          dcgmReturn_t status = dcgmReturn_t(field_values[1].status);
-          LOG_WARNING << "Unable to get power usage for GPU " << cuda_id
-                      << ". Status:" << errorString(status)
-                      << ", value:" << dcgmValueToErrorMessage(power_usage);
-        }
-        gpu_power_usage_[didx]->Set(power_usage);
-      }
-
-      // Energy Consumption
-      if (dcgm_metadata_.energy_fail_cnt_[didx] <
-          dcgm_metadata_.fail_threshold_) {
-        int64_t energy = field_values[2].value.i64;
-        if ((field_values[2].status == DCGM_ST_OK) &&
-            (!DCGM_INT64_IS_BLANK(energy))) {
-          dcgm_metadata_.energy_fail_cnt_[didx] = 0;
-          if (dcgm_metadata_.last_energy_[didx] == 0) {
-            dcgm_metadata_.last_energy_[didx] = energy;
-          }
-          gpu_energy_consumption_[didx]->Increment(
-              (double)(energy - dcgm_metadata_.last_energy_[didx]) * 0.001);
-          dcgm_metadata_.last_energy_[didx] = energy;
-        } else {
-          dcgm_metadata_.energy_fail_cnt_[didx]++;
-          energy = 0;
-          dcgmReturn_t status = dcgmReturn_t(field_values[2].status);
-          LOG_WARNING << "Unable to get energy consumption for "
-                      << "GPU " << cuda_id << ". Status:" << errorString(status)
-                      << ", value:" << dcgmValueToErrorMessage(energy);
-        }
-      }
-
-      // Utilization
-      if (dcgm_metadata_.util_fail_cnt_[didx] <
-          dcgm_metadata_.fail_threshold_) {
-        int64_t util = field_values[3].value.i64;
-        if ((field_values[3].status == DCGM_ST_OK) &&
-            (!DCGM_INT64_IS_BLANK(util))) {
-          dcgm_metadata_.util_fail_cnt_[didx] = 0;
-        } else {
-          dcgm_metadata_.util_fail_cnt_[didx]++;
-          util = 0;
-          dcgmReturn_t status = dcgmReturn_t(field_values[3].status);
-          LOG_WARNING << "Unable to get GPU utilization for GPU " << cuda_id
-                      << ". Status:" << errorString(status)
-                      << ", value:" << dcgmValueToErrorMessage(util);
-        }
-        gpu_utilization_[didx]->Set((double)util * 0.01);
-      }
-
-      // Memory Usage
-      if (dcgm_metadata_.mem_fail_cnt_[didx] < dcgm_metadata_.fail_threshold_) {
-        int64_t memory_used = field_values[4].value.i64;
-        int64_t memory_total = field_values[5].value.i64;
-        if ((field_values[4].status == DCGM_ST_OK) &&
-            (!DCGM_INT64_IS_BLANK(memory_used)) &&
-            (field_values[5].status == DCGM_ST_OK) &&
-            (!DCGM_INT64_IS_BLANK(memory_total))) {
-          dcgm_metadata_.mem_fail_cnt_[didx] = 0;
-        } else {
-          memory_total = 0;
-          memory_used = 0;
-          dcgm_metadata_.mem_fail_cnt_[didx]++;
-          dcgmReturn_t usageStatus = dcgmReturn_t(field_values[4].status);
-          dcgmReturn_t memoryTotaltatus = dcgmReturn_t(field_values[5].status);
-          LOG_WARNING << "Unable to get memory usage for GPU " << cuda_id
-                      << ". Memory usage status:" << errorString(usageStatus)
-                      << ", value:" << dcgmValueToErrorMessage(memory_used)
-                      << ". Memory total status:"
-                      << errorString(memoryTotaltatus)
-                      << ", value:" << dcgmValueToErrorMessage(memory_total);
-        }
-        gpu_memory_total_[didx]->Set(memory_total * 1024 * 1024);  // bytes
-        gpu_memory_used_[didx]->Set(memory_used * 1024 * 1024);    // bytes
-      }
-    }
-  }
-  return true;
-#endif  // TRITON_ENABLE_METRICS_GPU
-}
-
-bool
-Metrics::InitializeCacheMetrics(
-    std::shared_ptr<RequestResponseCache> response_cache)
-{
-  if (response_cache == nullptr) {
-    LOG_WARNING
-        << "error initializing cache metrics, cache metrics will not be "
-        << "available: cache was nullptr";
-    return false;
-  }
-
-  const std::map<std::string, std::string> cache_labels;
-  cache_num_entries_global_ = &cache_num_entries_family_.Add(cache_labels);
-  cache_num_lookups_global_ = &cache_num_lookups_family_.Add(cache_labels);
-  cache_num_hits_global_ = &cache_num_hits_family_.Add(cache_labels);
-  cache_num_misses_global_ = &cache_num_misses_family_.Add(cache_labels);
-  cache_num_evictions_global_ = &cache_num_evictions_family_.Add(cache_labels);
-  cache_lookup_duration_us_global_ =
-      &cache_lookup_duration_us_family_.Add(cache_labels);
-  cache_insertion_duration_us_global_ =
-      &cache_insertion_duration_us_family_.Add(cache_labels);
-  cache_util_global_ = &cache_util_family_.Add(cache_labels);
-  LOG_INFO << "Collecting Response Cache metrics";
-  return true;
-}
-
-bool
-Metrics::InitializeCpuMetrics()
-{
-#ifndef TRITON_ENABLE_METRICS_CPU
-  return false;
-#else
-  const std::map<std::string, std::string> cpu_labels;
-  cpu_utilization_ = &cpu_utilization_family_.Add(cpu_labels);
-  cpu_memory_total_ = &cpu_memory_total_family_.Add(cpu_labels);
-  cpu_memory_used_ = &cpu_memory_used_family_.Add(cpu_labels);
-
-  // Get baseline CPU info for future comparisons
-  last_cpu_info_ = CpuInfo();
-  auto status = ParseCpuInfo(last_cpu_info_);
-  if (!status.IsOk()) {
-    LOG_WARNING << "error initializing CPU metrics, CPU utilization may not "
-                   "be available: "
-                << status.Message();
-    return false;
-  }
-
-  // Verify memory metrics can be parsed
-  auto mem_info = MemInfo();
-  status = ParseMemInfo(mem_info);
-  if (!status.IsOk()) {
-    LOG_WARNING << "error initializing CPU metrics, CPU memory metrics may not "
-                   "be available: "
-                << status.Message();
-    return false;
-  }
-
-  LOG_INFO << "Collecting CPU metrics";
-  return true;
-#endif  // TRITON_ENABLE_METRICS_CPU
-}
-
-bool
-Metrics::InitializeDcgmMetrics()
-{
-#ifndef TRITON_ENABLE_METRICS_GPU
-  return false;
-#else
-  dcgmReturn_t dcgmerr = dcgmInit();
-  if (dcgmerr != DCGM_ST_OK) {
-    LOG_WARNING << "error initializing DCGM, GPU metrics will not be "
-                << "available: " << errorString(dcgmerr);
-    return false;
-  }
-
-  if (dcgm_metadata_.standalone_) {
-    char hostIpAddress[16] = {0};
-    std::string ipAddress = "127.0.0.1";
-    strncpy(hostIpAddress, ipAddress.c_str(), 15);
-    dcgmerr = dcgmConnect(hostIpAddress, &dcgm_metadata_.dcgm_handle_);
-  } else {
-    dcgmerr = dcgmStartEmbedded(
-        DCGM_OPERATION_MODE_MANUAL, &dcgm_metadata_.dcgm_handle_);
-  }
-  if (dcgmerr != DCGM_ST_OK) {
-    LOG_WARNING << "DCGM unable to start: " << errorString(dcgmerr);
-    return false;
-  } else {
-    // Set this flag to signal DCGM cleanup in destructor
-    dcgm_metadata_.dcgm_initialized_ = true;
-  }
-
-  if (dcgm_metadata_.standalone_) {
-    dcgmerr = dcgmUpdateAllFields(dcgm_metadata_.dcgm_handle_, 1);
-    if (dcgmerr != DCGM_ST_OK) {
-      LOG_WARNING << "DCGM unable to update all fields, GPU metrics will "
-                     "not be available: "
-                  << errorString(dcgmerr);
-      return false;
-    }
-  }
-
-  unsigned int dcgm_gpu_ids[DCGM_MAX_NUM_DEVICES];
-  int dcgm_gpu_count;
-  dcgmerr = dcgmGetAllDevices(
-      dcgm_metadata_.dcgm_handle_, dcgm_gpu_ids, &dcgm_gpu_count);
-  if (dcgmerr != DCGM_ST_OK) {
-    LOG_WARNING << "DCGM unable to get device info and count, GPU "
-                   "metrics will not be available: "
-                << errorString(dcgmerr);
-    return false;
-  }
-
-  // Get PCI Bus ID to DCGM device Id map.
-  // Some devices may have problems using DCGM API and
-  // these devices needs to be ignored.
-  std::map<std::string, size_t> pci_bus_id_to_dcgm_id;
-  std::map<std::string, std::map<std::string, std::string> >
-      pci_bus_id_to_gpu_labels;
-  std::map<std::string, std::string> pci_bus_id_to_device_name;
-  dcgmDeviceAttributes_t gpu_attributes[DCGM_MAX_NUM_DEVICES];
-  for (int i = 0; i < dcgm_gpu_count; i++) {
-    gpu_attributes[i].version = dcgmDeviceAttributes_version;
-    dcgmerr = dcgmGetDeviceAttributes(
-        dcgm_metadata_.dcgm_handle_, dcgm_gpu_ids[i], &gpu_attributes[i]);
-    if (dcgmerr != DCGM_ST_OK) {
-      LOG_WARNING << "DCGM unable to get device properties for DCGM device "
-                  << dcgm_gpu_ids[i]
-                  << ", GPU metrics will not be available for this device: "
-                  << errorString(dcgmerr);
-    } else {
-      std::string pciBusId = gpu_attributes[i].identifiers.pciBusId;
-      pci_bus_id_to_dcgm_id[pciBusId] = i;
-      pci_bus_id_to_device_name[pciBusId] =
-          std::string(gpu_attributes[i].identifiers.deviceName);
-      std::map<std::string, std::string> gpu_labels;
-      gpu_labels.insert(std::map<std::string, std::string>::value_type(
-          kMetricsLabelGpuUuid,
-          std::string(gpu_attributes[i].identifiers.uuid)));
-      pci_bus_id_to_gpu_labels[pciBusId] = gpu_labels;
-    }
-  }
-
-
-  // Get CUDA-visible PCI Bus Ids and get DCGM metrics for each CUDA-visible GPU
-  int cuda_gpu_count;
-  cudaError_t cudaerr = cudaGetDeviceCount(&cuda_gpu_count);
-  if (cudaerr != cudaSuccess) {
-    LOG_WARNING
-        << "Cannot get CUDA device count, GPU metrics will not be available";
-    return false;
-  }
-  for (int i = 0; i < cuda_gpu_count; ++i) {
-    std::string pci_bus_id = "0000";  // pad 0's for uniformity
-    char pcibusid_str[64];
-    cudaerr = cudaDeviceGetPCIBusId(pcibusid_str, sizeof(pcibusid_str) - 1, i);
-    if (cudaerr == cudaSuccess) {
-      pci_bus_id.append(pcibusid_str);
-      if (pci_bus_id_to_dcgm_id.count(pci_bus_id) <= 0) {
-        LOG_INFO << "Skipping GPU:" << i
-                 << " since it's not CUDA enabled. This should never happen!";
-        continue;
-      }
-      // Filter out CUDA visible GPUs from GPUs found by DCGM
-      LOG_INFO << "Collecting metrics for GPU " << i << ": "
-               << pci_bus_id_to_device_name[pci_bus_id];
-      auto& gpu_labels = pci_bus_id_to_gpu_labels[pci_bus_id];
-      gpu_utilization_.push_back(&gpu_utilization_family_.Add(gpu_labels));
-      gpu_memory_total_.push_back(&gpu_memory_total_family_.Add(gpu_labels));
-      gpu_memory_used_.push_back(&gpu_memory_used_family_.Add(gpu_labels));
-      gpu_power_usage_.push_back(&gpu_power_usage_family_.Add(gpu_labels));
-      gpu_power_limit_.push_back(&gpu_power_limit_family_.Add(gpu_labels));
-      gpu_energy_consumption_.push_back(
-          &gpu_energy_consumption_family_.Add(gpu_labels));
-      uint32_t dcgm_id = pci_bus_id_to_dcgm_id[pci_bus_id];
-      dcgm_metadata_.cuda_ids_to_dcgm_ids_[i] = dcgm_id;
-      dcgm_metadata_.available_cuda_gpu_ids_.emplace_back(i);
-    } else {
-      LOG_WARNING << "GPU metrics will not be available for device:" << i;
-    }
-  }
-
-  // create a gpu group
-  char groupName[] = "dcgm_group";
-  dcgmerr = dcgmGroupCreate(
-      dcgm_metadata_.dcgm_handle_, DCGM_GROUP_DEFAULT, groupName,
-      &dcgm_metadata_.groupId_);
-  if (dcgmerr != DCGM_ST_OK) {
-    LOG_WARNING << "Cannot make GPU group: " << errorString(dcgmerr);
-  }
-
-  // Initialize tracking vectors
-  for (unsigned int didx = 0;
-       didx < dcgm_metadata_.available_cuda_gpu_ids_.size(); ++didx) {
-    dcgm_metadata_.power_limit_fail_cnt_.push_back(0);
-    dcgm_metadata_.power_usage_fail_cnt_.push_back(0);
-    dcgm_metadata_.energy_fail_cnt_.push_back(0);
-    dcgm_metadata_.util_fail_cnt_.push_back(0);
-    dcgm_metadata_.mem_fail_cnt_.push_back(0);
-    dcgm_metadata_.last_energy_.push_back(0);
-  }
-
-  // Number of fields for DCGM to use from fields_ below
-  dcgm_metadata_.field_count_ = 6;
-  unsigned short util_flag = dcgm_metadata_.standalone_
-                                 ? DCGM_FI_PROF_GR_ENGINE_ACTIVE
-                                 : DCGM_FI_DEV_GPU_UTIL;
-  dcgm_metadata_.fields_ = {
-      DCGM_FI_DEV_POWER_MGMT_LIMIT,          // power limit, watts
-      DCGM_FI_DEV_POWER_USAGE,               // power usage, watts
-      DCGM_FI_DEV_TOTAL_ENERGY_CONSUMPTION,  // Total energy consumption, mJ
-      util_flag,                             // util ratio, 1 = 1%
-      DCGM_FI_DEV_FB_USED,                   // Frame buffer used, MiB
-      DCGM_FI_DEV_FB_TOTAL,                  // Frame buffer used, MiB
-  };
-
-  char fieldName[] = "field_group";
-  dcgmFieldGrp_t fieldGroupId;
-  dcgmerr = dcgmFieldGroupCreate(
-      dcgm_metadata_.dcgm_handle_, dcgm_metadata_.field_count_,
-      dcgm_metadata_.fields_.data(), fieldName, &fieldGroupId);
-  if (dcgmerr != DCGM_ST_OK) {
-    LOG_WARNING << "Cannot make field group: " << errorString(dcgmerr);
-  }
-
-  dcgmerr = dcgmWatchFields(
-      dcgm_metadata_.dcgm_handle_, dcgm_metadata_.groupId_, fieldGroupId,
-      metrics_interval_ms_ * 1000 /*update period, usec*/,
-      5.0 /*maxKeepAge, sec*/, 5 /*maxKeepSamples*/);
-  if (dcgmerr != DCGM_ST_OK) {
-    LOG_WARNING << "Cannot start watching fields: " << errorString(dcgmerr);
-    return false;
-  }
-
-  return true;
-#endif  // TRITON_ENABLE_METRICS_GPU
-}
-
-#ifdef TRITON_ENABLE_METRICS_GPU
-std::string
-Metrics::dcgmValueToErrorMessage(double val)
-{
-  if (DCGM_FP64_IS_BLANK(val)) {
-    if (val == DCGM_FP64_BLANK) {
-      return "Not Specified";
-    } else if (val == DCGM_FP64_NOT_FOUND) {
-      return "Not Found";
-    } else if (val == DCGM_FP64_NOT_SUPPORTED) {
-      return "Not Supported";
-    } else if (val == DCGM_FP64_NOT_PERMISSIONED) {
-      return "Insf. Permission";
-    } else {
-      return "Unknown";
-    }
-  } else {
-    return std::to_string(val);
-  }
-}
-
-std::string
-Metrics::dcgmValueToErrorMessage(int64_t val)
-{
-  if (DCGM_INT64_IS_BLANK(val)) {
-    switch (val) {
-      case DCGM_INT64_BLANK:
-        return "Not Specified";
-      case DCGM_INT64_NOT_FOUND:
-        return "Not Found";
-      case DCGM_INT64_NOT_SUPPORTED:
-        return "Not Supported";
-      case DCGM_INT64_NOT_PERMISSIONED:
-        return "Insf. Permission";
-      default:
-        return "Unknown";
-    }
-  } else {
-    return std::to_string(val);
-  }
-}
-#endif  // TRITON_ENABLE_METRICS_GPU
-
-bool
-Metrics::UUIDForCudaDevice(int cuda_device, std::string* uuid)
-{
-  // If metrics were not initialized then just silently fail since
-  // with DCGM we can't get the CUDA device (and not worth doing
-  // anyway since metrics aren't being reported).
-  auto singleton = GetSingleton();
-  if (!singleton->gpu_metrics_enabled_) {
-    return false;
-  }
-
-  // If GPU metrics is not enabled just silently fail.
-#ifndef TRITON_ENABLE_METRICS_GPU
-  return false;
-#else
-
-  dcgmDeviceAttributes_t gpu_attributes;
-  gpu_attributes.version = dcgmDeviceAttributes_version;
-  dcgmReturn_t dcgmerr = dcgmGetDeviceAttributes(
-      singleton->dcgm_metadata_.dcgm_handle_, cuda_device, &gpu_attributes);
-  if (dcgmerr != DCGM_ST_OK) {
-    LOG_ERROR << "Unable to get device UUID: " << errorString(dcgmerr);
-    return false;
-  }
-
-  *uuid = gpu_attributes.identifiers.uuid;
-  return true;
-#endif  // TRITON_ENABLE_METRICS_GPU
-}
-
-std::shared_ptr<prometheus::Registry>
-Metrics::GetRegistry()
-{
-  auto singleton = Metrics::GetSingleton();
-  return singleton->registry_;
-}
-
-const std::string
-Metrics::SerializedMetrics()
-{
-  auto singleton = Metrics::GetSingleton();
-  return singleton->serializer_->Serialize(
-      singleton->registry_.get()->Collect());
-}
-
-Metrics*
-Metrics::GetSingleton()
-{
-  static Metrics singleton;
-  return &singleton;
-}
-
-}}  // namespace triton::core
-
-#endif  // TRITON_ENABLE_METRICS
diff --git a/3rdparty/core-r22.12/src/metrics.h b/3rdparty/core-r22.12/src/metrics.h
deleted file mode 100644
index 9b7e8f4a168f3def67cc91d121f39e15178fd6d2..0000000000000000000000000000000000000000
--- a/3rdparty/core-r22.12/src/metrics.h
+++ /dev/null
@@ -1,335 +0,0 @@
-// Copyright 2018-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-#pragma once
-
-#ifdef TRITON_ENABLE_METRICS
-
-#include <atomic>
-#include <mutex>
-#include <thread>
-#include "prometheus/counter.h"
-#include "prometheus/gauge.h"
-#include "prometheus/registry.h"
-#include "prometheus/serializer.h"
-#include "prometheus/text_serializer.h"
-#include "response_cache.h"
-
-#ifdef TRITON_ENABLE_METRICS_GPU
-#include <dcgm_agent.h>
-#endif  // TRITON_ENABLE_METRICS_GPU
-
-namespace triton { namespace core {
-
-#ifdef TRITON_ENABLE_METRICS_CPU
-using MemInfo = std::unordered_map<std::string, uint64_t>;
-
-// References:
-// - htop source: https://stackoverflow.com/a/23376195
-// - Linux docs: https://www.kernel.org/doc/Documentation/filesystems/proc.txt
-// guest/guestnice values are counted in user/nice so we skip parsing them
-struct CpuInfo {
-  uint64_t user = 0;     // normal processes executing in user mode
-  uint64_t nice = 0;     // niced processes executing in user mode
-  uint64_t system = 0;   // processes executing in kernel mode
-  uint64_t idle = 0;     // twiddling thumbs
-  uint64_t iowait = 0;   // waiting for I/O to complete
-  uint64_t irq = 0;      // servicing interrupts
-  uint64_t softirq = 0;  // servicing softirqs
-  uint64_t steal = 0;    // involuntary wait
-};
-
-inline std::istream&
-operator>>(std::istream& is, CpuInfo& info)
-{
-  is >> info.user >> info.nice >> info.system >> info.idle >> info.iowait >>
-      info.irq >> info.softirq >> info.steal;
-  return is;
-}
-#endif  // TRITON_ENABLE_METRICS_CPU
-
-#ifdef TRITON_ENABLE_METRICS_GPU
-struct DcgmMetadata {
-  // DCGM handles for initialization and destruction
-  dcgmHandle_t dcgm_handle_ = 0;
-  dcgmGpuGrp_t groupId_ = 0;
-  // DCGM Flags
-  bool standalone_ = false;
-  // DCGM Fields
-  size_t field_count_ = 0;
-  std::vector<unsigned short> fields_;
-  // GPU Device Mapping
-  std::map<uint32_t, uint32_t> cuda_ids_to_dcgm_ids_;
-  std::vector<uint32_t> available_cuda_gpu_ids_;
-  // Stop attempting metrics if they fail multiple consecutive
-  // times for a device.
-  const int fail_threshold_ = 3;
-  // DCGM Failure Tracking
-  std::vector<int> power_limit_fail_cnt_;
-  std::vector<int> power_usage_fail_cnt_;
-  std::vector<int> energy_fail_cnt_;
-  std::vector<int> util_fail_cnt_;
-  std::vector<int> mem_fail_cnt_;
-  // DCGM Energy Tracking
-  std::vector<unsigned long long> last_energy_;
-  // Track if DCGM handle initialized successfully
-  bool dcgm_initialized_ = false;
-};
-#endif  // TRITON_ENABLE_METRICS_GPU
-
-class Metrics {
- public:
-  // Return the hash value of the labels
-  static size_t HashLabels(const std::map<std::string, std::string>& labels);
-
-  // Are metrics enabled?
-  static bool Enabled();
-
-  // Enable reporting of metrics
-  static void EnableMetrics();
-
-  // Enable reporting of GPU metrics
-  static void EnableGPUMetrics();
-
-  // Enable reporting of CPU metrics
-  static void EnableCpuMetrics();
-
-  // Enable reporting of Cache metrics
-  static void EnableCacheMetrics(
-      std::shared_ptr<RequestResponseCache> response_cache);
-
-  // Start a thread for polling enabled metrics if any
-  static void StartPollingThreadSingleton(
-      std::shared_ptr<RequestResponseCache> response_cache);
-
-  // Set the time interval in secs at which metrics are collected
-  static void SetMetricsInterval(uint64_t metrics_interval_ms);
-
-  // Get the prometheus registry
-  static std::shared_ptr<prometheus::Registry> GetRegistry();
-
-  // Get serialized metrics
-  static const std::string SerializedMetrics();
-
-  // Get the UUID for a CUDA device. Return true and initialize 'uuid'
-  // if a UUID is found, return false if a UUID cannot be returned.
-  static bool UUIDForCudaDevice(int cuda_device, std::string* uuid);
-
-  // Metric family counting successful inference requests
-  static prometheus::Family<prometheus::Counter>& FamilyInferenceSuccess()
-  {
-    return GetSingleton()->inf_success_family_;
-  }
-
-  // Metric family counting failed inference requests
-  static prometheus::Family<prometheus::Counter>& FamilyInferenceFailure()
-  {
-    return GetSingleton()->inf_failure_family_;
-  }
-
-  // Metric family counting inferences performed, where a batch-size
-  // 'n' inference request is counted as 'n' inferences
-  static prometheus::Family<prometheus::Counter>& FamilyInferenceCount()
-  {
-    return GetSingleton()->inf_count_family_;
-  }
-
-  // Metric family counting inferences performed, where a batch-size
-  // 'n' inference request is counted as 'n' inferences
-  static prometheus::Family<prometheus::Counter>&
-  FamilyInferenceExecutionCount()
-  {
-    return GetSingleton()->inf_count_exec_family_;
-  }
-
-  // Metric family of cumulative inference request duration, in
-  // microseconds
-  static prometheus::Family<prometheus::Counter>&
-  FamilyInferenceRequestDuration()
-  {
-    return GetSingleton()->inf_request_duration_us_family_;
-  }
-
-  // Metric family of cumulative inference queuing duration, in
-  // microseconds
-  static prometheus::Family<prometheus::Counter>& FamilyInferenceQueueDuration()
-  {
-    return GetSingleton()->inf_queue_duration_us_family_;
-  }
-
-  // Metric family of cumulative inference compute durations, in
-  // microseconds
-  static prometheus::Family<prometheus::Counter>&
-  FamilyInferenceComputeInputDuration()
-  {
-    return GetSingleton()->inf_compute_input_duration_us_family_;
-  }
-  static prometheus::Family<prometheus::Counter>&
-  FamilyInferenceComputeInferDuration()
-  {
-    return GetSingleton()->inf_compute_infer_duration_us_family_;
-  }
-  static prometheus::Family<prometheus::Counter>&
-  FamilyInferenceComputeOutputDuration()
-  {
-    return GetSingleton()->inf_compute_output_duration_us_family_;
-  }
-  // Metric families of per-model response cache metrics
-  static prometheus::Family<prometheus::Counter>& FamilyCacheHitCount()
-  {
-    return GetSingleton()->cache_num_hits_model_family_;
-  }
-  static prometheus::Family<prometheus::Counter>& FamilyCacheHitLookupDuration()
-  {
-    return GetSingleton()->cache_hit_lookup_duration_us_model_family_;
-  }
-  static prometheus::Family<prometheus::Counter>& FamilyCacheMissCount()
-  {
-    return GetSingleton()->cache_num_misses_model_family_;
-  }
-  static prometheus::Family<prometheus::Counter>&
-  FamilyCacheMissLookupDuration()
-  {
-    return GetSingleton()->cache_miss_lookup_duration_us_model_family_;
-  }
-  static prometheus::Family<prometheus::Counter>&
-  FamilyCacheMissInsertionDuration()
-  {
-    return GetSingleton()->cache_miss_insertion_duration_us_model_family_;
-  }
-
-
- private:
-  Metrics();
-  virtual ~Metrics();
-  static Metrics* GetSingleton();
-  bool InitializeDcgmMetrics();
-  bool InitializeCpuMetrics();
-  bool InitializeCacheMetrics(
-      std::shared_ptr<RequestResponseCache> response_cache);
-  bool StartPollingThread(std::shared_ptr<RequestResponseCache> response_cache);
-  bool PollCacheMetrics(std::shared_ptr<RequestResponseCache> response_cache);
-  bool PollDcgmMetrics();
-  bool PollCpuMetrics();
-
-  std::string dcgmValueToErrorMessage(double val);
-  std::string dcgmValueToErrorMessage(int64_t val);
-
-  std::shared_ptr<prometheus::Registry> registry_;
-  std::unique_ptr<prometheus::Serializer> serializer_;
-
-  prometheus::Family<prometheus::Counter>& inf_success_family_;
-  prometheus::Family<prometheus::Counter>& inf_failure_family_;
-  prometheus::Family<prometheus::Counter>& inf_count_family_;
-  prometheus::Family<prometheus::Counter>& inf_count_exec_family_;
-  prometheus::Family<prometheus::Counter>& inf_request_duration_us_family_;
-  prometheus::Family<prometheus::Counter>& inf_queue_duration_us_family_;
-  prometheus::Family<prometheus::Counter>&
-      inf_compute_input_duration_us_family_;
-  prometheus::Family<prometheus::Counter>&
-      inf_compute_infer_duration_us_family_;
-  prometheus::Family<prometheus::Counter>&
-      inf_compute_output_duration_us_family_;
-  // Global Response Cache metrics
-  prometheus::Family<prometheus::Gauge>& cache_num_entries_family_;
-  prometheus::Family<prometheus::Gauge>& cache_num_lookups_family_;
-  prometheus::Family<prometheus::Gauge>& cache_num_hits_family_;
-  prometheus::Family<prometheus::Gauge>& cache_num_misses_family_;
-  prometheus::Family<prometheus::Gauge>& cache_num_evictions_family_;
-  prometheus::Family<prometheus::Gauge>& cache_lookup_duration_us_family_;
-  prometheus::Family<prometheus::Gauge>& cache_insertion_duration_us_family_;
-  prometheus::Family<prometheus::Gauge>& cache_util_family_;
-  // Gauges for Global Response Cache metrics
-  prometheus::Gauge* cache_num_entries_global_;
-  prometheus::Gauge* cache_num_lookups_global_;
-  prometheus::Gauge* cache_num_hits_global_;
-  prometheus::Gauge* cache_num_misses_global_;
-  prometheus::Gauge* cache_num_evictions_global_;
-  prometheus::Gauge* cache_lookup_duration_us_global_;
-  prometheus::Gauge* cache_insertion_duration_us_global_;
-  prometheus::Gauge* cache_util_global_;
-  // Per-model Response Cache metrics
-  prometheus::Family<prometheus::Counter>& cache_num_hits_model_family_;
-  prometheus::Family<prometheus::Counter>&
-      cache_hit_lookup_duration_us_model_family_;
-  prometheus::Family<prometheus::Counter>& cache_num_misses_model_family_;
-  prometheus::Family<prometheus::Counter>&
-      cache_miss_lookup_duration_us_model_family_;
-  prometheus::Family<prometheus::Counter>&
-      cache_miss_insertion_duration_us_model_family_;
-
-#ifdef TRITON_ENABLE_METRICS_GPU
-  prometheus::Family<prometheus::Gauge>& gpu_utilization_family_;
-  prometheus::Family<prometheus::Gauge>& gpu_memory_total_family_;
-  prometheus::Family<prometheus::Gauge>& gpu_memory_used_family_;
-  prometheus::Family<prometheus::Gauge>& gpu_power_usage_family_;
-  prometheus::Family<prometheus::Gauge>& gpu_power_limit_family_;
-  prometheus::Family<prometheus::Counter>& gpu_energy_consumption_family_;
-
-  std::vector<prometheus::Gauge*> gpu_utilization_;
-  std::vector<prometheus::Gauge*> gpu_memory_total_;
-  std::vector<prometheus::Gauge*> gpu_memory_used_;
-  std::vector<prometheus::Gauge*> gpu_power_usage_;
-  std::vector<prometheus::Gauge*> gpu_power_limit_;
-  std::vector<prometheus::Counter*> gpu_energy_consumption_;
-
-  DcgmMetadata dcgm_metadata_;
-#endif  // TRITON_ENABLE_METRICS_GPU
-
-#ifdef TRITON_ENABLE_METRICS_CPU
-  // Parses "/proc/meminfo" for metrics, currently only supported on Linux.
-  Status ParseMemInfo(MemInfo& info);
-  // Parses "/proc/stat" for metrics, currently only supported on Linux.
-  Status ParseCpuInfo(CpuInfo& info);
-  // Computes CPU utilization between "info_new" and "info_old" values
-  double CpuUtilization(const CpuInfo& info_new, const CpuInfo& info_old);
-
-  prometheus::Family<prometheus::Gauge>& cpu_utilization_family_;
-  prometheus::Family<prometheus::Gauge>& cpu_memory_total_family_;
-  prometheus::Family<prometheus::Gauge>& cpu_memory_used_family_;
-
-  prometheus::Gauge* cpu_utilization_;
-  prometheus::Gauge* cpu_memory_total_;
-  prometheus::Gauge* cpu_memory_used_;
-  CpuInfo last_cpu_info_;
-#endif  // TRITON_ENABLE_METRICS_CPU
-
-  // Thread for polling cache/gpu metrics periodically
-  std::unique_ptr<std::thread> poll_thread_;
-  std::atomic<bool> poll_thread_exit_;
-  bool metrics_enabled_;
-  bool gpu_metrics_enabled_;
-  bool cpu_metrics_enabled_;
-  bool cache_metrics_enabled_;
-  bool poll_thread_started_;
-  std::mutex metrics_enabling_;
-  std::mutex poll_thread_starting_;
-  uint64_t metrics_interval_ms_;
-};
-
-}}  // namespace triton::core
-
-#endif  // TRITON_ENABLE_METRICS
diff --git a/3rdparty/core-r22.12/src/model.cc b/3rdparty/core-r22.12/src/model.cc
deleted file mode 100644
index c59a3170c7319b926fab9dbaa95a3faeda768816..0000000000000000000000000000000000000000
--- a/3rdparty/core-r22.12/src/model.cc
+++ /dev/null
@@ -1,137 +0,0 @@
-// Copyright 2018-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "model.h"
-
-#include <chrono>
-#include <future>
-#include "constants.h"
-#include "filesystem.h"
-#include "infer_request.h"
-#include "model_config_utils.h"
-#include "triton/common/logging.h"
-
-namespace triton { namespace core {
-
-Status
-Model::GetInput(
-    const std::string& name, const inference::ModelInput** input) const
-{
-  const auto itr = input_map_.find(name);
-  if (itr == input_map_.end()) {
-    return Status(
-        Status::Code::INVALID_ARG,
-        "unexpected inference input '" + name + "' for model '" + Name() + "'");
-  }
-
-  *input = &itr->second;
-  return Status::Success;
-}
-
-Status
-Model::GetOutput(
-    const std::string& name, const inference::ModelOutput** output) const
-{
-  const auto itr = output_map_.find(name);
-  if (itr == output_map_.end()) {
-    return Status(
-        Status::Code::INVALID_ARG, "unexpected inference output '" + name +
-                                       "' for model '" + Name() + "'");
-  }
-
-  *output = &itr->second;
-  return Status::Success;
-}
-
-Status
-Model::SetModelConfig(const inference::ModelConfig& config)
-{
-  config_ = config;
-  set_model_config_ = true;
-
-  return Status::Success;
-}
-
-Status
-Model::SetScheduler(std::unique_ptr<Scheduler> scheduler)
-{
-  if (scheduler_ != nullptr) {
-    return Status(
-        Status::Code::INTERNAL, "Attempt to change scheduler not allowed");
-  }
-
-  scheduler_ = std::move(scheduler);
-  return Status::Success;
-}
-
-Status
-Model::Init(const bool is_config_provided)
-{
-  if (!set_model_config_ && !is_config_provided) {
-    return Status(
-        Status::Code::NOT_FOUND,
-        "model configuration is not provided for model '" + Name() + "'");
-  }
-
-  RETURN_IF_ERROR(ValidateModelConfig(config_, min_compute_capability_));
-  RETURN_IF_ERROR(ValidateModelIOConfig(config_));
-
-  // Initialize the input map
-  for (const auto& io : config_.input()) {
-    input_map_.insert(std::make_pair(io.name(), io));
-    if (!io.optional()) {
-      ++required_input_count_;
-    }
-  }
-
-  // Initialize the output map and label provider for each output
-  label_provider_ = std::make_shared<LabelProvider>();
-  for (const auto& io : config_.output()) {
-    output_map_.insert(std::make_pair(io.name(), io));
-
-    if (!io.label_filename().empty()) {
-      const auto label_path = JoinPath({model_dir_, io.label_filename()});
-      RETURN_IF_ERROR(label_provider_->AddLabels(io.name(), label_path));
-    }
-  }
-
-  if (config_.has_dynamic_batching()) {
-    default_priority_level_ =
-        config_.dynamic_batching().default_priority_level();
-    max_priority_level_ = config_.dynamic_batching().priority_levels();
-  } else if (config_.has_ensemble_scheduling()) {
-    // For ensemble, allow any priority level to pass through
-    default_priority_level_ = 0;
-    max_priority_level_ = UINT32_MAX;
-  } else {
-    default_priority_level_ = 0;
-    max_priority_level_ = 0;
-  }
-
-  return Status::Success;
-}
-
-}}  // namespace triton::core
diff --git a/3rdparty/core-r22.12/src/model.h b/3rdparty/core-r22.12/src/model.h
deleted file mode 100644
index 240849856bf3bf0e5dc1c64fe42c40b17adc0b61..0000000000000000000000000000000000000000
--- a/3rdparty/core-r22.12/src/model.h
+++ /dev/null
@@ -1,162 +0,0 @@
-// Copyright 2018-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include "infer_stats.h"
-#include "label_provider.h"
-#include "model_config.pb.h"
-#include "scheduler.h"
-#include "status.h"
-
-namespace triton { namespace core {
-
-class InferenceRequest;
-
-//
-// Interface for models that handle inference requests.
-//
-class Model {
- public:
-  explicit Model(
-      const double min_compute_capability, const std::string& model_dir,
-      const int64_t version, const inference::ModelConfig& config)
-      : config_(config), min_compute_capability_(min_compute_capability),
-        version_(version), required_input_count_(0), model_dir_(model_dir),
-        set_model_config_(false)
-  {
-  }
-  virtual ~Model() {}
-
-  // Get the name of model being served.
-  const std::string& Name() const { return config_.name(); }
-
-  // Get the version of model being served.
-  int64_t Version() const { return version_; }
-
-  // Get the configuration of model being served.
-  const inference::ModelConfig& Config() const { return config_; }
-
-  // Get the number of required inputs
-  size_t RequiredInputCount() const { return required_input_count_; }
-
-  // Get the stats collector for the model being served.
-  InferenceStatsAggregator* MutableStatsAggregator()
-  {
-    return &stats_aggregator_;
-  }
-  const InferenceStatsAggregator& StatsAggregator() const
-  {
-    return stats_aggregator_;
-  }
-
-  // Get the model configuration for a named input.
-  Status GetInput(
-      const std::string& name, const inference::ModelInput** input) const;
-
-  // Get the model configuration for a named output.
-  Status GetOutput(
-      const std::string& name, const inference::ModelOutput** output) const;
-
-  // Get a label provider for the model.
-  const std::shared_ptr<LabelProvider>& GetLabelProvider() const
-  {
-    return label_provider_;
-  }
-
-  // Initialize the instance for Triton core usage
-  Status Init(const bool is_config_provided);
-
-  // Enqueue a request for execution. If Status::Success is returned
-  // then the model has taken ownership of the request object and so
-  // 'request' will be nullptr. If non-success is returned then the
-  // caller still retains ownership of 'request'.
-  Status Enqueue(std::unique_ptr<InferenceRequest>& request)
-  {
-    return scheduler_->Enqueue(request);
-  }
-
-  // Return the number of in-flight inferences.
-  size_t InflightInferenceCount()
-  {
-    return scheduler_->InflightInferenceCount();
-  }
-
-  // Stop processing future requests unless they are considered as in-flight.
-  void Stop() { scheduler_->Stop(); }
-
-  uint32_t DefaultPriorityLevel() const { return default_priority_level_; }
-
-  uint32_t MaxPriorityLevel() const { return max_priority_level_; }
-
- protected:
-  // Set the configuration of the model being served.
-  Status SetModelConfig(const inference::ModelConfig& config);
-
-  // Explicitly set the scheduler to use for inference requests to the
-  // model. The scheduler can only be set once for a model.
-  Status SetScheduler(std::unique_ptr<Scheduler> scheduler);
-
-  // The scheduler to use for this model.
-  std::unique_ptr<Scheduler> scheduler_;
-
-  // Configuration of the model.
-  inference::ModelConfig config_;
-
- private:
-  // The minimum supported CUDA compute capability.
-  const double min_compute_capability_;
-
-  // Version of the model.
-  int64_t version_;
-
-  // The stats collector for the model.
-  InferenceStatsAggregator stats_aggregator_;
-
-  // Label provider for this model.
-  std::shared_ptr<LabelProvider> label_provider_;
-
-  size_t required_input_count_;
-
-  // Map from input name to the model configuration for that input.
-  std::unordered_map<std::string, inference::ModelInput> input_map_;
-
-  // Map from output name to the model configuration for that output.
-  std::unordered_map<std::string, inference::ModelOutput> output_map_;
-
-  // Path to model
-  std::string model_dir_;
-
-  // The default priority level for the model.
-  uint32_t default_priority_level_;
-
-  // The largest priority value for the model.
-  uint32_t max_priority_level_;
-
-  // Whether or not model config has been set.
-  bool set_model_config_;
-};
-
-}}  // namespace triton::core
diff --git a/3rdparty/core-r22.12/src/model_config_cuda.cc b/3rdparty/core-r22.12/src/model_config_cuda.cc
deleted file mode 100644
index e08dfb499db1396829ffc9d2f9be0e380757b5ca..0000000000000000000000000000000000000000
--- a/3rdparty/core-r22.12/src/model_config_cuda.cc
+++ /dev/null
@@ -1,61 +0,0 @@
-// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "model_config_cuda.h"
-
-#include <cuda_runtime_api.h>
-
-namespace triton { namespace core {
-
-int
-GetCudaStreamPriority(
-    inference::ModelOptimizationPolicy::ModelPriority priority)
-{
-  // Default priority is 0
-  int cuda_stream_priority = 0;
-
-  int min, max;
-  cudaError_t cuerr = cudaDeviceGetStreamPriorityRange(&min, &max);
-  if ((cuerr != cudaErrorNoDevice) && (cuerr != cudaSuccess)) {
-    return 0;
-  }
-
-  switch (priority) {
-    case inference::ModelOptimizationPolicy::PRIORITY_MAX:
-      cuda_stream_priority = max;
-      break;
-    case inference::ModelOptimizationPolicy::PRIORITY_MIN:
-      cuda_stream_priority = min;
-      break;
-    default:
-      cuda_stream_priority = 0;
-      break;
-  }
-
-  return cuda_stream_priority;
-}
-
-}}  // namespace triton::core
diff --git a/3rdparty/core-r22.12/src/model_config_cuda.h b/3rdparty/core-r22.12/src/model_config_cuda.h
deleted file mode 100644
index f939232a312bfab372ecfcbf644ae9c496ce8525..0000000000000000000000000000000000000000
--- a/3rdparty/core-r22.12/src/model_config_cuda.h
+++ /dev/null
@@ -1,40 +0,0 @@
-// Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include <stdint.h>
-#include "model_config.pb.h"
-
-namespace triton { namespace core {
-
-/// Get the CUDA stream priority for a given ModelPriority
-/// \param priority The inference::ModelOptimizationPolicy::ModelPriority
-/// priority. \param cuda_stream_priority Returns the CUDA stream priority.
-/// \return The error status.
-int GetCudaStreamPriority(
-    inference::ModelOptimizationPolicy::ModelPriority priority);
-
-}}  // namespace triton::core
diff --git a/3rdparty/core-r22.12/src/model_config_utils.cc b/3rdparty/core-r22.12/src/model_config_utils.cc
deleted file mode 100644
index 189b2df23e34e0a95f4a502acd6fea41fa4d7b22..0000000000000000000000000000000000000000
--- a/3rdparty/core-r22.12/src/model_config_utils.cc
+++ /dev/null
@@ -1,2294 +0,0 @@
-// Copyright 2018-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "model_config_utils.h"
-
-#include <google/protobuf/util/json_util.h>
-#include <deque>
-#include <mutex>
-#include <set>
-#include "constants.h"
-#include "cuda_utils.h"
-#include "filesystem.h"
-#include "triton/common/logging.h"
-
-#define TRITONJSON_STATUSTYPE triton::core::Status
-#define TRITONJSON_STATUSRETURN(M) \
-  return triton::core::Status(triton::core::Status::Code::INTERNAL, (M))
-#define TRITONJSON_STATUSSUCCESS triton::core::Status::Success
-#include "triton/common/triton_json.h"
-
-#ifdef TRITON_ENABLE_GPU
-#include <cuda_runtime_api.h>
-#endif  // TRITON_ENABLE_GPU
-
-namespace triton { namespace core {
-
-namespace {
-
-#ifdef TRITON_ENABLE_ENSEMBLE
-
-struct EnsembleTensor {
-  EnsembleTensor(bool isOutput) : ready(false), isOutput(isOutput) {}
-  bool ready;
-  bool isOutput;
-  std::vector<EnsembleTensor*> prev_nodes;
-  std::vector<EnsembleTensor*> next_nodes;
-};
-
-/// Build a graph that represents the data flow in the ensemble specified in
-/// given model config. the node (ensemble tensor) in the graph can be looked
-/// up using its name as key.
-/// \param ensemble_config The model configuration that specifies
-/// ensemble_scheduling field.
-/// \param keyed_ensemble_graph Returned the ensemble graph.
-/// \return The error status. A non-OK status indicates the build fails because
-/// the ensemble configuration is not valid.
-Status
-BuildEnsembleGraph(
-    const inference::ModelConfig& config,
-    std::unordered_map<std::string, EnsembleTensor>& keyed_ensemble_graph)
-{
-  keyed_ensemble_graph.clear();
-  size_t step_idx = 0;
-  for (const auto& element : config.ensemble_scheduling().step()) {
-    if (element.model_name().empty()) {
-      return Status(
-          Status::Code::INVALID_ARG,
-          "must specify 'model_name' in step " + std::to_string(step_idx) +
-              " of ensemble '" + config.name() + "'");
-    }
-    if (element.input_map().size() == 0) {
-      return Status(
-          Status::Code::INVALID_ARG,
-          "must specify 'input_map' in step " + std::to_string(step_idx) +
-              " of ensemble '" + config.name() + "'");
-    }
-    if (element.output_map().size() == 0) {
-      return Status(
-          Status::Code::INVALID_ARG,
-          "must specify 'output_map' in step " + std::to_string(step_idx) +
-              " of ensemble '" + config.name() + "'");
-    }
-
-    // Link ensemble tensors
-    std::vector<EnsembleTensor*> tensor_as_output;
-    for (const auto& output_map : element.output_map()) {
-      auto it = keyed_ensemble_graph.find(output_map.second);
-      if (it != keyed_ensemble_graph.end()) {
-        if (it->second.isOutput) {
-          return Status(
-              Status::Code::INVALID_ARG,
-              "ensemble tensor '" + it->first +
-                  "' can appear in an output map only once for ensemble '" +
-                  config.name() + "' step " + std::to_string(step_idx));
-        } else {
-          it->second.isOutput = true;
-        }
-      } else {
-        it = keyed_ensemble_graph
-                 .emplace(
-                     std::make_pair(output_map.second, EnsembleTensor(true)))
-                 .first;
-      }
-      tensor_as_output.push_back(&(it->second));
-    }
-
-    std::set<std::string> model_inputs;
-    for (const auto& input_map : element.input_map()) {
-      if (model_inputs.find(input_map.first) != model_inputs.end()) {
-        return Status(
-            Status::Code::INVALID_ARG,
-            "input '" + input_map.first + "' in model '" +
-                element.model_name() +
-                "' is mapped to multiple ensemble tensors for ensemble '" +
-                config.name() + "' step " + std::to_string(step_idx));
-      } else {
-        model_inputs.emplace(input_map.first);
-      }
-      auto it = keyed_ensemble_graph.find(input_map.second);
-      if (it == keyed_ensemble_graph.end()) {
-        it = keyed_ensemble_graph
-                 .emplace(
-                     std::make_pair(input_map.second, EnsembleTensor(false)))
-                 .first;
-      }
-      for (auto output : tensor_as_output) {
-        output->prev_nodes.push_back(&(it->second));
-        it->second.next_nodes.push_back(output);
-      }
-    }
-
-    step_idx++;
-  }
-
-  return Status::Success;
-}
-
-Status
-ValidateEnsembleSchedulingConfig(const inference::ModelConfig& config)
-{
-  if (config.platform() != kEnsemblePlatform) {
-    return Status(
-        Status::Code::INVALID_ARG,
-        "ensemble scheduling cannot be set for model '" + config.name() +
-            "' whose platform is not " + kEnsemblePlatform);
-  }
-  if (config.instance_group().size() != 0) {
-    return Status(
-        Status::Code::INVALID_ARG,
-        "instance group should not be specified for ensemble '" +
-            config.name() + "'");
-  }
-  if (config.has_optimization()) {
-    return Status(
-        Status::Code::INVALID_ARG,
-        "optimization should not be specified for ensemble '" + config.name() +
-            "'");
-  }
-  if (config.model_warmup_size() != 0) {
-    return Status(
-        Status::Code::INVALID_ARG,
-        "model_warmup can not be specified for ensemble '" + config.name() +
-            "'");
-  }
-
-  // Make sure step is not empty and all fields are set
-  if (config.ensemble_scheduling().step_size() == 0) {
-    return Status(
-        Status::Code::INVALID_ARG,
-        "must specify 'step' for ensemble '" + config.name() + "'");
-  }
-
-  std::unordered_map<std::string, EnsembleTensor> tensors;
-
-  RETURN_IF_ERROR(BuildEnsembleGraph(config, tensors));
-
-  // check data flow
-  std::deque<EnsembleTensor*> ready_queue;
-  for (const auto& input : config.input()) {
-    auto it = tensors.find(input.name());
-    if (it == tensors.end()) {
-      return Status(
-          Status::Code::INVALID_ARG, "ensemble input '" + input.name() +
-                                         "' for ensemble " + config.name() +
-                                         "' is not used");
-    }
-    it->second.ready = true;
-    ready_queue.push_back(&(it->second));
-  }
-  while (!ready_queue.empty()) {
-    auto& ready_node = ready_queue.front();
-    for (auto& next_node : ready_node->next_nodes) {
-      if (next_node->ready) {
-        continue;
-      }
-      bool next_node_ready = true;
-      for (auto& prev_node : next_node->prev_nodes) {
-        if (!prev_node->ready) {
-          next_node_ready = false;
-          break;
-        }
-      }
-      next_node->ready = next_node_ready;
-      if (next_node_ready) {
-        ready_queue.push_back(next_node);
-      }
-    }
-    ready_queue.pop_front();
-  }
-  std::set<std::string> outputs;
-  for (const auto& output : config.output()) {
-    auto it = tensors.find(output.name());
-    if (it == tensors.end()) {
-      return Status(
-          Status::Code::INVALID_ARG, "ensemble output '" + output.name() +
-                                         "' for ensemble " + config.name() +
-                                         "' is not used");
-    }
-    if (!it->second.ready) {
-      return Status(
-          Status::Code::INVALID_ARG, "output '" + output.name() +
-                                         "' for ensemble '" + config.name() +
-                                         "' is not written");
-    } else {
-      outputs.insert(it->first);
-    }
-  }
-  // Check redundant ensemble tensors
-  for (const auto& tensor : tensors) {
-    // skip ensemble outputs as they have been checked and can have no
-    // next nodes
-    if (outputs.find(tensor.first) != outputs.end()) {
-      continue;
-    }
-    if (!tensor.second.ready || (tensor.second.next_nodes.size() == 0)) {
-      return Status(
-          Status::Code::INVALID_ARG, "ensemble tensor '" + tensor.first +
-                                         "' is unused in ensemble '" +
-                                         config.name() + "'");
-    }
-  }
-  return Status::Success;
-}
-
-#endif  // TRITON_ENABLE_ENSEMBLE
-
-template <class ModelIO>
-Status
-ValidateIOShape(
-    const ModelIO& io, int32_t max_batch_size,
-    const std::string& message_prefix = "")
-{
-  if (io.name().empty()) {
-    return Status(
-        Status::Code::INVALID_ARG, message_prefix + "must specify 'name'");
-  }
-
-  if (io.data_type() == inference::DataType::TYPE_INVALID) {
-    return Status(
-        Status::Code::INVALID_ARG, "model output must specify 'data_type'");
-  }
-
-  if (io.dims_size() == 0) {
-    return Status(
-        Status::Code::INVALID_ARG, message_prefix + "must specify 'dims'");
-  }
-
-  // If the configuration is non-batching, then no input or output
-  // reshape can be empty as that would mean that input or output was
-  // always empty (no data).
-  if (io.has_reshape() && (io.reshape().shape_size() == 0) &&
-      (max_batch_size == 0)) {
-    return Status(
-        Status::Code::INVALID_ARG,
-        message_prefix +
-            "cannot have empty reshape for non-batching model as scalar "
-            "tensors are not supported");
-  }
-
-  for (auto dim : io.dims()) {
-    // Dimension cannot be 0.
-    if ((dim < 1) && (dim != triton::common::WILDCARD_DIM)) {
-      return Status(
-          Status::Code::INVALID_ARG,
-          message_prefix + "dimension must be integer >= 1, or " +
-              std::to_string(triton::common::WILDCARD_DIM) +
-              " to indicate a variable-size dimension");
-    }
-  }
-
-  if (io.has_reshape()) {
-    // Zeros are not allowed in reshape.
-    for (auto dim : io.reshape().shape()) {
-      if ((dim < 1) && (dim != triton::common::WILDCARD_DIM)) {
-        return Status(
-            Status::Code::INVALID_ARG,
-            message_prefix + "reshape dimensions must be integer >= 1, or " +
-                std::to_string(triton::common::WILDCARD_DIM) +
-                " to indicate a variable-size dimension");
-      }
-    }
-
-    const int64_t dims_size = triton::common::GetElementCount(io.dims());
-    const int64_t reshape_size =
-        triton::common::GetElementCount(io.reshape().shape());
-
-    // dims and reshape must both have same element count
-    // or both have variable-size dimension.
-    // Special case for empty reshape... expect dims to have element
-    // count of 1.
-    if ((dims_size != reshape_size) &&
-        ((reshape_size != 0) || (dims_size != 1))) {
-      return Status(
-          Status::Code::INVALID_ARG,
-          message_prefix + "has different size for dims and reshape");
-    }
-
-    // shape contains variable-size dimension, in this case we compare if
-    // each pair of the trunks separated by variable-size dimension has
-    // the same element count. For instance, from [2, 4, -1, 6] to [8, -1, 1, 6]
-    // is valid reshape as 2 * 4 = 8 and 6 = 1 * 6.
-    if (dims_size == -1) {
-      std::vector<int64_t> dim_element_cnts;
-      std::vector<int64_t> reshape_element_cnts;
-      int64_t current_cnt = 1;
-      for (const auto& dim : io.dims()) {
-        if (dim != -1) {
-          current_cnt *= dim;
-        } else {
-          dim_element_cnts.push_back(current_cnt);
-          current_cnt = 1;
-        }
-      }
-      dim_element_cnts.push_back(current_cnt);
-
-      current_cnt = 1;
-      for (const auto& dim : io.reshape().shape()) {
-        if (dim != -1) {
-          current_cnt *= dim;
-        } else {
-          reshape_element_cnts.push_back(current_cnt);
-          current_cnt = 1;
-        }
-      }
-      reshape_element_cnts.push_back(current_cnt);
-
-      if (dim_element_cnts.size() != reshape_element_cnts.size()) {
-        return Status(
-            Status::Code::INVALID_ARG,
-            message_prefix +
-                "has different number of variable-size dimensions for dims "
-                "and reshape");
-      }
-      for (size_t idx = 0; idx < dim_element_cnts.size(); idx++) {
-        if (dim_element_cnts[idx] != reshape_element_cnts[idx]) {
-          return Status(
-              Status::Code::INVALID_ARG,
-              message_prefix + "has different size for dims and reshape");
-        }
-      }
-    }
-  }
-
-  return Status::Success;
-}
-
-}  // namespace
-
-Status
-GetModelVersionFromPath(const std::string& path, int64_t* version)
-{
-  auto version_dir = BaseName(path);
-
-  // Determine the version from the last segment of 'path'
-  try {
-    *version = std::atoll(version_dir.c_str());
-  }
-  catch (...) {
-    return Status(
-        Status::Code::INTERNAL,
-        "unable to determine model version from " + path);
-  }
-
-  return Status::Success;
-}
-
-Status
-GetBooleanSequenceControlProperties(
-    const inference::ModelSequenceBatching& batcher,
-    const std::string& model_name,
-    const inference::ModelSequenceBatching::Control::Kind control_kind,
-    const bool required, std::string* tensor_name,
-    inference::DataType* tensor_datatype, float* fp32_false_value,
-    float* fp32_true_value, int32_t* int32_false_value,
-    int32_t* int32_true_value, bool* bool_false_value, bool* bool_true_value)
-{
-  // Make sure same tensor is not configured for multiple controls
-  std::set<std::string> seen_tensors;
-
-  // Make sure the control kind is not mentioned multiple times.
-  bool seen_control = false;
-
-  for (const auto& control_input : batcher.control_input()) {
-    if (control_input.name().empty()) {
-      return Status(
-          Status::Code::INVALID_ARG,
-          "sequence batching control tensor must have a name for " +
-              model_name);
-    }
-
-    if (seen_tensors.find(control_input.name()) != seen_tensors.end()) {
-      return Status(
-          Status::Code::INVALID_ARG,
-          "sequence batching control tensor '" + control_input.name() +
-              "' is specified for multiple control kinds for " + model_name);
-    }
-
-    seen_tensors.insert(control_input.name());
-
-    for (const auto& c : control_input.control()) {
-      if (c.kind() == control_kind) {
-        if (seen_control) {
-          return Status(
-              Status::Code::INVALID_ARG,
-              "sequence batching specifies multiple " +
-                  inference::ModelSequenceBatching_Control_Kind_Name(
-                      control_kind) +
-                  " tensors for " + model_name);
-        }
-
-        *tensor_name = control_input.name();
-        seen_control = true;
-
-        // Make sure only one of int, float, or bool type is specified.
-        if (!((c.int32_false_true_size() != 0) ||
-              (c.fp32_false_true_size() != 0) ||
-              (c.bool_false_true_size() != 0))) {
-          return Status(
-              Status::Code::INVALID_ARG,
-              "sequence batching must specify either 'int32_false_true', "
-              "'fp32_false_true' or 'bool_false_true' for " +
-                  inference::ModelSequenceBatching_Control_Kind_Name(
-                      control_kind) +
-                  " for " + model_name);
-        } else if (
-            ((c.int32_false_true_size() != 0) &&
-             (c.fp32_false_true_size() != 0)) ||
-            ((c.int32_false_true_size() != 0) &&
-             (c.bool_false_true_size() != 0)) ||
-            ((c.fp32_false_true_size() != 0) &&
-             (c.bool_false_true_size() != 0))) {
-          return Status(
-              Status::Code::INVALID_ARG,
-              "sequence batching specifies more than one from "
-              "'int32_false_true', 'fp32_false_true' and 'bool_false_true' "
-              "for " +
-                  inference::ModelSequenceBatching_Control_Kind_Name(
-                      control_kind) +
-                  " for " + model_name);
-        }
-
-        if (c.int32_false_true_size() > 0) {
-          if (c.int32_false_true_size() != 2) {
-            return Status(
-                Status::Code::INVALID_ARG,
-                "sequence batching control 'int32_false_true' must have "
-                "exactly 2 entries for " +
-                    inference::ModelSequenceBatching_Control_Kind_Name(
-                        control_kind) +
-                    " for " + model_name);
-          }
-
-          if (tensor_datatype != nullptr) {
-            *tensor_datatype = inference::DataType::TYPE_INT32;
-          }
-          if (int32_false_value != nullptr) {
-            *int32_false_value = c.int32_false_true(0);
-          }
-          if (int32_true_value != nullptr) {
-            *int32_true_value = c.int32_false_true(1);
-          }
-        } else if (c.fp32_false_true_size() > 0) {
-          if (c.fp32_false_true_size() != 2) {
-            return Status(
-                Status::Code::INVALID_ARG,
-                "sequence batching control 'fp32_false_true' must have exactly "
-                "2 entries for " +
-                    inference::ModelSequenceBatching_Control_Kind_Name(
-                        control_kind) +
-                    " for " + model_name);
-          }
-
-          if (tensor_datatype != nullptr) {
-            *tensor_datatype = inference::DataType::TYPE_FP32;
-          }
-          if (fp32_false_value != nullptr) {
-            *fp32_false_value = c.fp32_false_true(0);
-          }
-          if (fp32_true_value != nullptr) {
-            *fp32_true_value = c.fp32_false_true(1);
-          }
-        } else {
-          if (c.bool_false_true_size() != 2) {
-            return Status(
-                Status::Code::INVALID_ARG,
-                "sequence batching control 'bool_false_true' must have exactly "
-                "2 entries for " +
-                    inference::ModelSequenceBatching_Control_Kind_Name(
-                        control_kind) +
-                    " for " + model_name);
-          }
-
-          if (tensor_datatype != nullptr) {
-            *tensor_datatype = inference::DataType::TYPE_BOOL;
-          }
-          if (bool_false_value != nullptr) {
-            *bool_false_value = c.bool_false_true(0);
-          }
-          if (bool_true_value != nullptr) {
-            *bool_true_value = c.bool_false_true(1);
-          }
-        }
-      }
-    }
-  }
-
-  if (!seen_control) {
-    if (required) {
-      return Status(
-          Status::Code::INVALID_ARG,
-          "sequence batching control tensor must specify a " +
-              inference::ModelSequenceBatching_Control_Kind_Name(control_kind) +
-              " value for " + model_name);
-    }
-
-    tensor_name->clear();
-  }
-
-  return Status::Success;
-}
-
-Status
-GetTypedSequenceControlProperties(
-    const inference::ModelSequenceBatching& batcher,
-    const std::string& model_name,
-    const inference::ModelSequenceBatching::Control::Kind control_kind,
-    const bool required, std::string* tensor_name,
-    inference::DataType* tensor_datatype)
-{
-  // Make sure same tensor is not configured for multiple controls
-  std::set<std::string> seen_tensors;
-
-  // Make sure the control kind is not mentioned multiple times.
-  bool seen_control = false;
-
-  for (const auto& control_input : batcher.control_input()) {
-    if (control_input.name().empty()) {
-      return Status(
-          Status::Code::INVALID_ARG,
-          "sequence batching control tensor must have a name for " +
-              model_name);
-    }
-
-    if (seen_tensors.find(control_input.name()) != seen_tensors.end()) {
-      return Status(
-          Status::Code::INVALID_ARG,
-          "sequence batching control tensor '" + control_input.name() +
-              "' is specified for multiple control kinds for " + model_name);
-    }
-
-    seen_tensors.insert(control_input.name());
-
-    for (const auto& c : control_input.control()) {
-      if (c.kind() == control_kind) {
-        if (seen_control) {
-          return Status(
-              Status::Code::INVALID_ARG,
-              "sequence batching specifies multiple " +
-                  inference::ModelSequenceBatching_Control_Kind_Name(
-                      control_kind) +
-                  " tensors for " + model_name);
-        }
-
-        *tensor_name = control_input.name();
-        if (tensor_datatype != nullptr) {
-          *tensor_datatype = c.data_type();
-        }
-
-        seen_control = true;
-
-        if ((c.int32_false_true_size() > 0) || (c.fp32_false_true_size() > 0) ||
-            (c.bool_false_true_size() > 0)) {
-          return Status(
-              Status::Code::INVALID_ARG,
-              "sequence batching must not specify either 'int32_false_true', "
-              "'fp32_false_true' or 'bool_false_true' for " +
-                  inference::ModelSequenceBatching_Control_Kind_Name(
-                      control_kind) +
-                  " for " + model_name);
-        }
-      }
-    }
-  }
-
-  if (!seen_control) {
-    if (required) {
-      return Status(
-          Status::Code::INVALID_ARG,
-          "sequence batching control tensor must specify a " +
-              inference::ModelSequenceBatching_Control_Kind_Name(control_kind) +
-              " value for " + model_name);
-    }
-
-    tensor_name->clear();
-  }
-
-  return Status::Success;
-}
-
-Status
-GetNormalizedModelConfig(
-    const std::string& model_name, const std::string& path,
-    const double min_compute_capability, inference::ModelConfig* config)
-{
-  // Server-side autofill only sets certain backend fields for the models that
-  // belong to limited backends for backwards-compatibility. See TensorRT
-  // backend, ONNX Runtime backend, OpenVINO backend, TensorFLow backend, and
-  // PyTorch backend.
-  // Extracting detailed information is delegated to the backend implementation
-  // to auto-complete.
-  RETURN_IF_ERROR(
-      AutoCompleteBackendFields(model_name, std::string(path), config));
-  LOG_VERBOSE(1) << "Server side auto-completed config: "
-                 << config->DebugString();
-
-  RETURN_IF_ERROR(NormalizeModelConfig(min_compute_capability, config));
-
-  return Status::Success;
-}
-
-Status
-NormalizeModelConfig(
-    const double min_compute_capability, inference::ModelConfig* config)
-{
-  // If version_policy is not specified, default to Latest 1 version.
-  if (!config->has_version_policy()) {
-    inference::ModelVersionPolicy::Latest latest;
-    latest.set_num_versions(1);
-    config->mutable_version_policy()->mutable_latest()->CopyFrom(latest);
-  }
-
-  // If dynamic batching is specified...
-  if (config->has_dynamic_batching()) {
-    // If preferred batch size is not specified set it to
-    // max-batch-size.
-    if (config->dynamic_batching().preferred_batch_size().size() == 0) {
-      auto mutable_preferred_batch_size =
-          config->mutable_dynamic_batching()->mutable_preferred_batch_size();
-      if (config->max_batch_size() > 0) {
-        mutable_preferred_batch_size->Add(config->max_batch_size());
-      }
-    }
-  }
-
-  // If sequence batching is specified...
-  if (config->has_sequence_batching()) {
-    // Set default idle is not specified.
-    if (config->sequence_batching().max_sequence_idle_microseconds() == 0) {
-      config->mutable_sequence_batching()->set_max_sequence_idle_microseconds(
-          SEQUENCE_IDLE_DEFAULT_MICROSECONDS);
-    }
-
-    if (config->sequence_batching().has_oldest()) {
-      // If preferred batch size is not specified set it to
-      // max-batch-size.
-      if (config->sequence_batching().oldest().preferred_batch_size().size() ==
-          0) {
-        auto mutable_preferred_batch_size =
-            config->mutable_sequence_batching()
-                ->mutable_oldest()
-                ->mutable_preferred_batch_size();
-        if (config->max_batch_size() > 0) {
-          mutable_preferred_batch_size->Add(config->max_batch_size());
-        }
-      }
-    }
-  }
-
-  // If model ensembling is specified, don't attempt to normalize instance_group
-  // as it is not allowed in ensemble scheduling
-  if (!config->has_ensemble_scheduling()) {
-    auto optimization = config->mutable_optimization();
-    if (!optimization->has_input_pinned_memory()) {
-      optimization->mutable_input_pinned_memory()->set_enable(true);
-    }
-    if (!optimization->has_output_pinned_memory()) {
-      optimization->mutable_output_pinned_memory()->set_enable(true);
-    }
-  }
-
-  return Status::Success;
-}
-
-Status
-NormalizeInstanceGroup(
-    const double min_compute_capability,
-    const std::vector<inference::ModelInstanceGroup>& preferred_groups,
-    inference::ModelConfig* config)
-{
-  // Instance group setting doesn't apply to ensemble
-  if (config->has_ensemble_scheduling()) {
-    return Status::Success;
-  }
-
-  // Creates a set of supported GPU device ids
-  std::set<int> supported_gpus;
-#ifdef TRITON_ENABLE_GPU
-  // Get the total number of GPUs from the runtime library.
-  Status status = GetSupportedGPUs(&supported_gpus, min_compute_capability);
-  if (!status.IsOk()) {
-    return status;
-  }
-
-#endif  // TRITON_ENABLE_GPU
-
-  // Make sure there is at least one instance_group.
-  if (config->instance_group().empty()) {
-    inference::ModelInstanceGroup* group = config->add_instance_group();
-    group->set_name(config->name());
-
-    for (const auto& pg : preferred_groups) {
-      group->set_kind(pg.kind());
-      group->set_count(pg.count());
-      // handle preferred GPU setting differently based on kind
-      if (pg.kind() == inference::ModelInstanceGroup::KIND_GPU) {
-        // Don't use preferred group with KIND_GPU if there is no GPU.
-        if (supported_gpus.empty()) {
-          continue;
-        }
-        // If preferred group sets GPUs, limit deployment onto those that
-        // are also listed in supported gpus
-        if (!pg.gpus().empty()) {
-          for (const int32_t gid : pg.gpus()) {
-            if (supported_gpus.find(gid) != supported_gpus.end()) {
-              group->add_gpus(gid);
-            }
-          }
-        }
-        break;
-      } else if (pg.kind() == inference::ModelInstanceGroup::KIND_AUTO) {
-        // if AUTO, then set preferred GPU as is, to align with KIND_AUTO
-        // deduction specified below
-        for (const int32_t gid : pg.gpus()) {
-          group->add_gpus(gid);
-        }
-        break;
-      }
-      // Other kind should not set GPUs
-      break;
-    }
-  }
-
-  // Assign default name, kind and count to each instance group that
-  // doesn't give those values explicitly. For KIND_GPU, set GPUs to
-  // all available if not specified explicitly.
-  size_t cnt = 0;
-  for (auto& group : *config->mutable_instance_group()) {
-    // Name
-    if (group.name().empty()) {
-      group.set_name(config->name() + "_" + std::to_string(cnt));
-    }
-    cnt++;
-
-    // For KIND_AUTO... if there are no GPUs or if any of the listed
-    // 'gpu's are not present, then use KIND_CPU.
-    if (group.kind() == inference::ModelInstanceGroup::KIND_AUTO) {
-      if (supported_gpus.empty()) {
-        group.set_kind(inference::ModelInstanceGroup::KIND_CPU);
-      } else {
-        for (const int32_t gid : group.gpus()) {
-          if (supported_gpus.find(gid) == supported_gpus.end()) {
-            group.set_kind(inference::ModelInstanceGroup::KIND_CPU);
-            break;
-          }
-        }
-      }
-
-      if (group.kind() == inference::ModelInstanceGroup::KIND_AUTO) {
-        group.set_kind(inference::ModelInstanceGroup::KIND_GPU);
-      }
-    }
-
-    // KIND is resolved at this point
-    for (const auto& pg : preferred_groups) {
-      if (group.kind() != pg.kind()) {
-        continue;
-      }
-
-      // Limit the GPU setting within what is specified in the preferred group,
-      // if no available GPU then skip to next preferred group
-      if ((group.kind() == inference::ModelInstanceGroup::KIND_GPU) &&
-          group.gpus().empty() && !pg.gpus().empty()) {
-        for (const int32_t gid : pg.gpus()) {
-          if (supported_gpus.find(gid) != supported_gpus.end()) {
-            group.add_gpus(gid);
-          }
-        }
-        if (group.gpus().empty()) {
-          continue;
-        }
-      }
-      if ((group.count() < 1) && (pg.count() > 0)) {
-        group.set_count(pg.count());
-      }
-    }
-
-    // Set Triton default if the fields are not set from preferred group
-    // Count
-    if (group.count() < 1) {
-      RETURN_IF_ERROR(SetDefaultInstanceCount(&group, config->backend()));
-    }
-
-    // GPUs
-    if ((group.kind() == inference::ModelInstanceGroup::KIND_GPU) &&
-        (group.gpus().size() == 0)) {
-      for (auto d : supported_gpus) {
-        group.add_gpus(d);
-      }
-    }
-  }
-
-  return Status::Success;
-}
-
-Status
-LocalizePythonBackendExecutionEnvironmentPath(
-    const std::string& model_path, inference::ModelConfig* config,
-    std::shared_ptr<LocalizedPath>* localized_model_dir)
-{
-  if (config->backend() == "python") {
-    if (config->parameters().contains("EXECUTION_ENV_PATH")) {
-      // Read EXECUTION_ENV_PATH
-      std::string exec_env_path =
-          config->parameters().at("EXECUTION_ENV_PATH").string_value();
-      // Replace model directory variable with model_path
-      std::string model_dir_var = "$$TRITON_MODEL_DIRECTORY";
-      if (exec_env_path.substr(0, model_dir_var.size()) == model_dir_var) {
-        exec_env_path.replace(0, model_dir_var.size(), model_path);
-      }
-      // Collapse any .. in the path
-      std::string abs_exec_env_path;
-      std::size_t prev_pos = exec_env_path.size();
-      std::size_t pos = exec_env_path.find_last_of('/', prev_pos - 1);
-      int skip = 0;
-      while (pos != std::string::npos && prev_pos > 0) {
-        if (!skip) {
-          abs_exec_env_path =
-              exec_env_path.substr(pos, prev_pos - pos) + abs_exec_env_path;
-        }
-        skip = skip > 0 ? skip - 1 : skip;
-        if (pos >= 3 && exec_env_path.substr(pos - 3, 3) == "/..") {
-          skip += 2;
-        }
-        prev_pos = pos;
-        pos = exec_env_path.find_last_of('/', prev_pos - 1);
-      }
-      abs_exec_env_path = exec_env_path.substr(0, prev_pos) + abs_exec_env_path;
-      // Localize iff abs_exec_env_path is outside the model directory
-      std::string model_path_slash =
-          model_path.back() == '/' ? model_path : model_path + "/";
-      if (abs_exec_env_path.substr(0, model_path_slash.size()) !=
-          model_path_slash) {
-        // Localize the file
-        std::shared_ptr<LocalizedPath> localized_exec_env_path;
-        RETURN_IF_ERROR(
-            LocalizePath(abs_exec_env_path, &localized_exec_env_path));
-        // Persist the localized temporary path
-        (*localized_model_dir)
-            ->other_localized_path.push_back(localized_exec_env_path);
-        // Rewrite EXECUTION_ENV_PATH
-        config->mutable_parameters()
-            ->at("EXECUTION_ENV_PATH")
-            .set_string_value(localized_exec_env_path->Path());
-      }
-    }
-  }
-  return Status::Success;
-}
-
-Status
-SetDefaultInstanceCount(
-    inference::ModelInstanceGroup* group, const std::string& backend)
-{
-  group->set_count(1);
-
-  // Backends opt into the default_cpu_instance_count since
-  // some backends (pytorch, OpenVINO) don't perform well/have high overhead
-  // when using multiple instances.
-  const int default_cpu_instance_count = 2;
-  bool use_default_cpu_instance_count =
-      (backend == kTensorFlowBackend) || (backend == kOnnxRuntimeBackend);
-  if (group->kind() == inference::ModelInstanceGroup::KIND_CPU &&
-      use_default_cpu_instance_count) {
-    group->set_count(default_cpu_instance_count);
-  }
-
-  return Status::Success;
-}
-
-Status
-AutoCompleteBackendFields(
-    const std::string& model_name, const std::string& model_path,
-    inference::ModelConfig* config)
-{
-  std::set<std::string> version_dirs;
-  RETURN_IF_ERROR(GetDirectorySubdirs(model_path, &version_dirs));
-
-  // There must be at least one version directory that we can inspect to
-  // attempt to determine the platform. If not, we skip autofill with file name.
-  // For now we allow multiple versions and only inspect the first verison
-  // directory to ensure it is valid. We can add more aggressive checks later.
-  const bool has_version = (version_dirs.size() != 0);
-  const auto version_path =
-      has_version ? JoinPath({model_path, *(version_dirs.begin())}) : "";
-  std::set<std::string> version_dir_content;
-  if (has_version) {
-    RETURN_IF_ERROR(GetDirectoryContents(version_path, &version_dir_content));
-  }
-
-  // If the model name is not given in the configuration, set if based
-  // on the model path.
-  if (config->name().empty()) {
-    config->set_name(model_name);
-  }
-
-  // Trying to fill the 'backend', 'default_model_filename' field.
-
-  // TensorFlow
-  // For TF backend, the platform is required
-  if (config->platform().empty()) {
-    // Check 'backend', 'default_model_filename', and the actual directory
-    // to determine the platform
-    if (config->backend().empty() ||
-        (config->backend() == kTensorFlowBackend)) {
-      if (config->default_model_filename() == kTensorFlowSavedModelFilename) {
-        config->set_platform(kTensorFlowSavedModelPlatform);
-      } else if (
-          config->default_model_filename() == kTensorFlowGraphDefFilename) {
-        config->set_platform(kTensorFlowGraphDefPlatform);
-      } else if (config->default_model_filename().empty() && has_version) {
-        bool is_dir = false;
-        if (version_dir_content.find(kTensorFlowSavedModelFilename) !=
-            version_dir_content.end()) {
-          RETURN_IF_ERROR(IsDirectory(
-              JoinPath({version_path, kTensorFlowSavedModelFilename}),
-              &is_dir));
-          if (is_dir) {
-            config->set_platform(kTensorFlowSavedModelPlatform);
-          }
-        }
-        if (version_dir_content.find(kTensorFlowGraphDefFilename) !=
-            version_dir_content.end()) {
-          RETURN_IF_ERROR(IsDirectory(
-              JoinPath({version_path, kTensorFlowGraphDefFilename}), &is_dir));
-          if (!is_dir) {
-            config->set_platform(kTensorFlowGraphDefPlatform);
-          }
-        }
-      }
-    }
-  }
-
-  // Fill 'backend' and 'default_model_filename' if missing
-  if ((config->platform() == kTensorFlowSavedModelPlatform) ||
-      (config->platform() == kTensorFlowGraphDefPlatform)) {
-    if (config->backend().empty()) {
-      config->set_backend(kTensorFlowBackend);
-    }
-    if (config->default_model_filename().empty()) {
-      if (config->platform() == kTensorFlowSavedModelPlatform) {
-        config->set_default_model_filename(kTensorFlowSavedModelFilename);
-      } else {
-        config->set_default_model_filename(kTensorFlowGraphDefFilename);
-      }
-    }
-    return Status::Success;
-  }
-
-  // TensorRT
-  if (config->backend().empty()) {
-    if ((config->platform() == kTensorRTPlanPlatform) ||
-        (config->default_model_filename() == kTensorRTPlanFilename)) {
-      config->set_backend(kTensorRTBackend);
-    } else if (
-        config->platform().empty() &&
-        config->default_model_filename().empty() && has_version) {
-      bool is_dir = false;
-      if (version_dir_content.find(kTensorRTPlanFilename) !=
-          version_dir_content.end()) {
-        RETURN_IF_ERROR(IsDirectory(
-            JoinPath({version_path, kTensorRTPlanFilename}), &is_dir));
-        if (!is_dir) {
-          config->set_backend(kTensorRTBackend);
-        }
-      }
-    }
-  }
-  if (config->backend() == kTensorRTBackend) {
-    if (config->platform().empty()) {
-      config->set_platform(kTensorRTPlanPlatform);
-    }
-    if (config->default_model_filename().empty()) {
-      config->set_default_model_filename(kTensorRTPlanFilename);
-    }
-    return Status::Success;
-  }
-
-  // ONNXRuntime
-  if (config->backend().empty()) {
-    if ((config->platform() == kOnnxRuntimeOnnxPlatform) ||
-        (config->default_model_filename() == kOnnxRuntimeOnnxFilename)) {
-      config->set_backend(kOnnxRuntimeBackend);
-    } else if (
-        config->platform().empty() &&
-        config->default_model_filename().empty() && has_version) {
-      if (version_dir_content.find(kOnnxRuntimeOnnxFilename) !=
-          version_dir_content.end()) {
-        // ONNX model can be a file or a directory in the case of large model
-        config->set_backend(kOnnxRuntimeBackend);
-      }
-    }
-  }
-  if (config->backend() == kOnnxRuntimeBackend) {
-    if (config->platform().empty()) {
-      config->set_platform(kOnnxRuntimeOnnxPlatform);
-    }
-    if (config->default_model_filename().empty()) {
-      config->set_default_model_filename(kOnnxRuntimeOnnxFilename);
-    }
-    return Status::Success;
-  }
-
-  // OpenVINO
-  if (config->backend().empty()) {
-    if (config->default_model_filename() == kOpenVINORuntimeOpenVINOFilename) {
-      config->set_backend(kOpenVINORuntimeBackend);
-    } else if (
-        config->platform().empty() &&
-        config->default_model_filename().empty() && has_version) {
-      if (version_dir_content.find(kOpenVINORuntimeOpenVINOFilename) !=
-          version_dir_content.end()) {
-        config->set_backend(kOpenVINORuntimeBackend);
-      }
-    }
-  }
-  if (config->backend() == kOpenVINORuntimeBackend) {
-    if (config->default_model_filename().empty()) {
-      config->set_default_model_filename(kOpenVINORuntimeOpenVINOFilename);
-    }
-    return Status::Success;
-  }
-
-  // PyTorch (TorchScript, LibTorch)
-  if (config->backend().empty()) {
-    if ((config->platform() == kPyTorchLibTorchPlatform) ||
-        (config->default_model_filename() == kPyTorchLibTorchFilename)) {
-      config->set_backend(kPyTorchBackend);
-    } else if (
-        config->platform().empty() &&
-        config->default_model_filename().empty() && has_version) {
-      bool is_dir = false;
-      if (version_dir_content.find(kPyTorchLibTorchFilename) !=
-          version_dir_content.end()) {
-        RETURN_IF_ERROR(IsDirectory(
-            JoinPath({version_path, kPyTorchLibTorchFilename}), &is_dir));
-        if (!is_dir) {
-          config->set_backend(kPyTorchBackend);
-        }
-      }
-    }
-  }
-  if (config->backend() == kPyTorchBackend) {
-    if (config->platform().empty()) {
-      config->set_platform(kPyTorchLibTorchPlatform);
-    }
-    if (config->default_model_filename().empty()) {
-      config->set_default_model_filename(kPyTorchLibTorchFilename);
-    }
-    return Status::Success;
-  }
-
-  // Python
-  if (config->backend().empty()) {
-    if (config->default_model_filename() == kPythonFilename) {
-      config->set_backend(kPythonBackend);
-    } else if (
-        config->platform().empty() &&
-        config->default_model_filename().empty() && has_version) {
-      if (version_dir_content.find(kPythonFilename) !=
-          version_dir_content.end()) {
-        config->set_backend(kPythonBackend);
-      }
-    }
-  }
-  if (config->backend() == kPythonBackend) {
-    if (config->default_model_filename().empty()) {
-      config->set_default_model_filename(kPythonFilename);
-    }
-    return Status::Success;
-  }
-
-  // Custom Backend
-  // For now, only do the narrowest case, where no info is given in the config.
-  if (config->backend().empty() && config->platform().empty() &&
-      config->default_model_filename().empty()) {
-    LOG_VERBOSE(1) << "Could not infer supported backend, so attempting "
-                      "autofill of custom backend.";
-    // Since we lazily load the backends, we let the model tell us what backend
-    // to load. We must assume that if the model name conforms to the required
-    // shape, we parse the backend name out of the model file name. i.e.
-    // model.identity will set the backend to "identity".
-    const std::string delimiter = ".";
-    size_t pos = model_name.find(delimiter, 0);
-    if (pos == std::string::npos) {
-      return Status(
-          triton::common::Error::Code::INVALID_ARG,
-          ("Invalid model name: Could not determine backend for model '" +
-           model_name +
-           "' with no backend in model configuration. Expected model name of "
-           "the form 'model.<backend_name>'."));
-    }
-    const std::string backend_name =
-        model_name.substr(pos + 1, std::string::npos);
-    config->set_backend(backend_name);
-    config->set_default_model_filename(
-        (std::string("model.") + backend_name).c_str());
-    return Status::Success;
-  }
-
-  return Status::Success;
-}
-
-Status
-ValidateModelIOConfig(const inference::ModelConfig& config)
-{
-  Status status;
-  for (const auto& io : config.input()) {
-    status = ValidateModelInput(io, config.max_batch_size(), config.platform());
-    if (!status.IsOk()) {
-      return Status(
-          status.StatusCode(), status.Message() + " for " + config.name());
-    }
-  }
-  for (const auto& io : config.output()) {
-    status =
-        ValidateModelOutput(io, config.max_batch_size(), config.platform());
-    if (!status.IsOk()) {
-      return Status(
-          status.StatusCode(), status.Message() + " for " + config.name());
-    }
-  }
-  status = ValidateBatchIO(config);
-  if (!status.IsOk()) {
-    return Status(
-        status.StatusCode(), status.Message() + " for " + config.name());
-  }
-  return Status::Success;
-}
-
-Status
-ValidateBatchIO(const inference::ModelConfig& config)
-{
-  std::set<std::string> input_names;
-  std::set<std::string> output_names;
-  for (const auto& io : config.input()) {
-    input_names.emplace(io.name());
-  }
-  for (const auto& io : config.output()) {
-    output_names.emplace(io.name());
-  }
-  for (const auto& batch_io : config.batch_input()) {
-    switch (batch_io.kind()) {
-      case inference::BatchInput::BATCH_ELEMENT_COUNT:
-      case inference::BatchInput::BATCH_ACCUMULATED_ELEMENT_COUNT:
-      case inference::BatchInput::BATCH_ACCUMULATED_ELEMENT_COUNT_WITH_ZERO:
-      case inference::BatchInput::BATCH_MAX_ELEMENT_COUNT_AS_SHAPE:
-      case inference::BatchInput::BATCH_ITEM_SHAPE:
-      case inference::BatchInput::BATCH_ITEM_SHAPE_FLATTEN: {
-        if (batch_io.source_input_size() != 1) {
-          return Status(
-              Status::Code::INVALID_ARG,
-              "batch input kind '" +
-                  inference::BatchInput::Kind_Name(batch_io.kind()) +
-                  "' expects 1 source input, got " +
-                  std::to_string(batch_io.source_input_size()));
-        }
-        break;
-      }
-      default:
-        return Status(
-            Status::Code::INVALID_ARG,
-            "unknown batch input kind '" +
-                inference::BatchInput::Kind_Name(batch_io.kind()) + "'");
-    }
-    if ((batch_io.data_type() != inference::DataType::TYPE_INT32) &&
-        (batch_io.data_type() != inference::DataType::TYPE_FP32)) {
-      return Status(
-          Status::Code::INVALID_ARG,
-          "batch input data type must be TYPE_INT32 or TYPE_FP32");
-    }
-    for (const auto& source_name : batch_io.source_input()) {
-      if (input_names.find(source_name) == input_names.end()) {
-        return Status(
-            Status::Code::INVALID_ARG,
-            "unknown source input name '" + source_name + "'");
-      }
-    }
-  }
-
-  for (const auto& batch_io : config.batch_output()) {
-    switch (batch_io.kind()) {
-      case inference::BatchOutput::BATCH_SCATTER_WITH_INPUT_SHAPE: {
-        if (batch_io.source_input_size() != 1) {
-          return Status(
-              Status::Code::INVALID_ARG,
-              "batch output kind '" +
-                  inference::BatchOutput::Kind_Name(batch_io.kind()) +
-                  "' expects 1 source input, got " +
-                  std::to_string(batch_io.source_input_size()));
-        }
-        break;
-      }
-      default:
-        return Status(
-            Status::Code::INVALID_ARG,
-            "unknown batch output kind '" +
-                inference::BatchOutput::Kind_Name(batch_io.kind()) + "'");
-    }
-    for (const auto& source_name : batch_io.source_input()) {
-      if (input_names.find(source_name) == input_names.end()) {
-        return Status(
-            Status::Code::INVALID_ARG,
-            "unknown source input name '" + source_name + "'");
-      }
-    }
-    std::set<std::string> target_names;
-    for (const auto& target_name : batch_io.target_name()) {
-      if (output_names.find(target_name) == output_names.end()) {
-        return Status(
-            Status::Code::INVALID_ARG,
-            "unknown target output name '" + target_name + "'");
-      }
-      if (target_names.emplace(target_name).second == false) {
-        return Status(
-            Status::Code::INVALID_ARG, "target output name '" + target_name +
-                                           "' can only be specified once");
-      }
-    }
-  }
-  return Status::Success;
-}
-
-Status
-ValidateModelConfig(
-    const inference::ModelConfig& config, const double min_compute_capability)
-{
-  if (config.name().empty()) {
-    return Status(
-        Status::Code::INVALID_ARG, "model configuration must specify 'name'");
-  }
-
-  if (config.backend().empty()) {
-    // Expect backend is not empty unless it is ensemble platform.
-#ifdef TRITON_ENABLE_ENSEMBLE
-    if (config.platform() != kEnsemblePlatform)
-#endif  // TRITON_ENABLE_ENSEMBLE
-      return Status(
-          Status::Code::INVALID_ARG, "unexpected platform type '" +
-                                         config.platform() + "' for " +
-                                         config.name());
-  }
-#ifdef TRITON_ENABLE_ENSEMBLE
-  else if (config.platform() == kEnsemblePlatform) {
-    return Status(
-        Status::Code::INVALID_ARG,
-        "Ensemble model '" + config.name() + "' must have platform type '" +
-            config.platform() + "' and empty backend type");
-  }
-#endif  // TRITON_ENABLE_ENSEMBLE
-
-  if (config.platform().empty() && config.backend().empty()) {
-    return Status(
-        Status::Code::INVALID_ARG,
-        "must specify 'platform' or 'backend' for '" + config.name() + "'");
-  }
-
-  // Ensure both platform and backend are referring to known backend,
-  // or both referring to unknown backend for user-provided backend.
-  if (GetBackendTypeFromPlatform(config.platform()) !=
-      GetBackendType(config.backend())) {
-    return Status(
-        Status::Code::INVALID_ARG,
-        "unexpected 'platform' and 'backend' pair, got:" + config.platform() +
-            ", " + config.backend());
-  }
-
-  if (config.max_batch_size() < 0) {
-    return Status(
-        Status::Code::INVALID_ARG,
-        "'max_batch_size' must be non-negative value for " + config.name());
-  }
-
-  if (!config.has_version_policy()) {
-    return Status(
-        Status::Code::INVALID_ARG,
-        "must specify 'version policy' for " + config.name());
-  }
-
-  // If dynamic batching is specified make sure the preferred batch
-  // sizes are positive and don't exceed maximum batch size.
-  if (config.has_dynamic_batching()) {
-    for (const auto size : config.dynamic_batching().preferred_batch_size()) {
-      if (size <= 0) {
-        return Status(
-            Status::Code::INVALID_ARG,
-            "dynamic batching preferred size must be positive for " +
-                config.name());
-      }
-      if (size > config.max_batch_size()) {
-        return Status(
-            Status::Code::INVALID_ARG,
-            "dynamic batching preferred size must be <= max batch size for " +
-                config.name());
-      }
-    }
-
-    // Priority queue is specified
-    const auto priority_levels = config.dynamic_batching().priority_levels();
-    if (priority_levels != 0) {
-      if ((config.dynamic_batching().default_priority_level() == 0) ||
-          (config.dynamic_batching().default_priority_level() >
-           priority_levels)) {
-        return Status(
-            Status::Code::INVALID_ARG,
-            "default priority level must be in range [1, " +
-                std::to_string(priority_levels) + "] for " + config.name());
-      }
-      for (const auto& queue_policy :
-           config.dynamic_batching().priority_queue_policy()) {
-        if ((queue_policy.first == 0) ||
-            (queue_policy.first > priority_levels)) {
-          return Status(
-              Status::Code::INVALID_ARG,
-              "priority queue policy must have priority level in range [1, " +
-                  std::to_string(priority_levels) + "] for " + config.name());
-        }
-      }
-    }
-
-    // preserve ordering option will conflict with priorities and delay policy
-    if (config.dynamic_batching().preserve_ordering()) {
-      if (priority_levels > 1) {
-        return Status(
-            Status::Code::INVALID_ARG,
-            "Only one priority level is allowed when 'preserve_ordering' is "
-            "true for " +
-                config.name());
-      }
-      const auto& default_policy =
-          config.dynamic_batching().default_queue_policy();
-      if ((default_policy.default_timeout_microseconds() != 0) &&
-          (default_policy.timeout_action() ==
-           inference::ModelQueuePolicy::DELAY)) {
-        return Status(
-            Status::Code::INVALID_ARG,
-            "Queue policy can not have DELAY as timeout action when "
-            "'preserve_ordering' is true for " +
-                config.name());
-      }
-      // Also need to check policy in 'priority_queue_policy'
-      // for single priority case
-      for (const auto& policy :
-           config.dynamic_batching().priority_queue_policy()) {
-        if ((policy.second.default_timeout_microseconds() != 0) &&
-            (policy.second.timeout_action() ==
-             inference::ModelQueuePolicy::DELAY)) {
-          return Status(
-              Status::Code::INVALID_ARG,
-              "Queue policy can not have DELAY as timeout action when "
-              "'preserve_ordering' is true for " +
-                  config.name());
-        }
-      }
-    }
-  }
-
-  // If sequence batching is specified make sure the control is
-  // specified correctly.
-  if (config.has_sequence_batching()) {
-    const auto& batcher = config.sequence_batching();
-
-    // Check boolean controls...
-    std::string tensor_name;
-    RETURN_IF_ERROR(GetBooleanSequenceControlProperties(
-        batcher, config.name(),
-        inference::ModelSequenceBatching::Control::CONTROL_SEQUENCE_START,
-        false /* required */, &tensor_name, nullptr, nullptr, nullptr, nullptr,
-        nullptr, nullptr, nullptr));
-    RETURN_IF_ERROR(GetBooleanSequenceControlProperties(
-        batcher, config.name(),
-        inference::ModelSequenceBatching::Control::CONTROL_SEQUENCE_END,
-        false /* required */, &tensor_name, nullptr, nullptr, nullptr, nullptr,
-        nullptr, nullptr, nullptr));
-    RETURN_IF_ERROR(GetBooleanSequenceControlProperties(
-        batcher, config.name(),
-        inference::ModelSequenceBatching::Control::CONTROL_SEQUENCE_READY,
-        false /* required */, &tensor_name, nullptr, nullptr, nullptr, nullptr,
-        nullptr, nullptr, nullptr));
-
-    // Check CORRID control and make sure it is one of the allowed types.
-    inference::DataType tensor_datatype;
-    RETURN_IF_ERROR(GetTypedSequenceControlProperties(
-        batcher, config.name(),
-        inference::ModelSequenceBatching::Control::CONTROL_SEQUENCE_CORRID,
-        false /* required */, &tensor_name, &tensor_datatype));
-    if (!tensor_name.empty()) {
-      if ((tensor_datatype != inference::DataType::TYPE_UINT64) &&
-          (tensor_datatype != inference::DataType::TYPE_INT64) &&
-          (tensor_datatype != inference::DataType::TYPE_UINT32) &&
-          (tensor_datatype != inference::DataType::TYPE_INT32) &&
-          (tensor_datatype != inference::DataType::TYPE_STRING)) {
-        return Status(
-            Status::Code::INVALID_ARG,
-            "unexpected data type for control " +
-                inference::ModelSequenceBatching_Control_Kind_Name(
-                    inference::ModelSequenceBatching::Control::
-                        CONTROL_SEQUENCE_CORRID) +
-                " for " + config.name() +
-                ". Allowed data types are TYPE_UINT64, TYPE_INT64, "
-                "TYPE_UINT32, "
-                "TYPE_INT32 and TYPE_STRING");
-      }
-    }
-
-    // If oldest-first strategy is enabled make sure the preferred
-    // batch sizes are positive and don't exceed maximum batch size.
-    if (config.sequence_batching().has_oldest()) {
-      for (const auto size :
-           config.sequence_batching().oldest().preferred_batch_size()) {
-        if (size <= 0) {
-          return Status(
-              Status::Code::INVALID_ARG,
-              "sequence batching preferred batch size must be positive for " +
-                  config.name());
-        }
-        if (size > config.max_batch_size()) {
-          return Status(
-              Status::Code::INVALID_ARG,
-              "sequence batching preferred batch size must be <= max batch "
-              "size for " +
-                  config.name());
-        }
-      }
-    }
-
-    // If direct strategy is enabled make sure the minimum slot utilization is
-    // in range (0.0, 1.0]
-    if (config.sequence_batching().has_direct()) {
-      if ((config.sequence_batching().direct().minimum_slot_utilization() <
-           0.0) ||
-          (config.sequence_batching().direct().minimum_slot_utilization() >
-           1.0)) {
-        return Status(
-            Status::Code::INVALID_ARG,
-            "sequence batching minimum slot utilization must be in range "
-            "(0.0, 1.0] for " +
-                config.name());
-      }
-    }
-  }
-
-  // If ensemble scheduling is specified, validate it.  Otherwise,
-  // must validate platform and instance_group
-  if (config.has_ensemble_scheduling()) {
-#ifdef TRITON_ENABLE_ENSEMBLE
-    RETURN_IF_ERROR(ValidateEnsembleSchedulingConfig(config));
-#else
-    return Status(
-        Status::Code::INVALID_ARG, "ensemble scheduling not supported");
-#endif  // TRITON_ENABLE_ENSEMBLE
-  }
-#ifdef TRITON_ENABLE_ENSEMBLE
-  else if (config.platform() == kEnsemblePlatform) {
-    return Status(
-        Status::Code::INVALID_ARG,
-        "ensemble scheduling must be set for ensemble " + config.name() +
-            " whose platform is " + kEnsemblePlatform);
-  }
-#endif  // TRITON_ENABLE_ENSEMBLE
-
-  // FIXME: DLIS-3916 - Response Cache does not yet support decoupled models
-  if (config.model_transaction_policy().decoupled() &&
-      config.response_cache().enable()) {
-    return Status(
-        Status::Code::INVALID_ARG,
-        "Response Cache does not currently support model " + config.name() +
-            " with 'decoupled' transaction policy. Please disable the response"
-            " cache.");
-  }
-
-  return Status::Success;
-}
-
-Status
-ValidateInstanceGroup(
-    const inference::ModelConfig& config, const double min_compute_capability)
-{
-  // Instance group setting doesn't apply to ensemble
-  if (config.has_ensemble_scheduling()) {
-    return Status::Success;
-  }
-
-  if (config.instance_group().size() == 0) {
-    return Status(
-        Status::Code::INVALID_ARG,
-        "must specify one or more 'instance group's for " + config.name());
-  }
-
-  // Make sure KIND_GPU instance group specifies at least one GPU and
-  // doesn't specify a non-existent GPU. Make sure non-KIND_GPU does
-  // not specify any GPUs.
-#ifdef TRITON_ENABLE_GPU
-  std::set<int> supported_gpus;
-  Status status = GetSupportedGPUs(&supported_gpus, min_compute_capability);
-  if (!status.IsOk()) {
-    return status;
-  }
-#endif  // TRITON_ENABLE_GPU
-
-  for (const auto& group : config.instance_group()) {
-    if (group.kind() == inference::ModelInstanceGroup::KIND_MODEL) {
-      if (group.gpus().size() > 0) {
-        return Status(
-            Status::Code::INVALID_ARG,
-            "instance group " + group.name() + " of model " + config.name() +
-                " has kind KIND_MODEL but specifies one or more GPUs");
-      }
-    } else if (group.kind() == inference::ModelInstanceGroup::KIND_GPU) {
-#if !defined(TRITON_ENABLE_GPU) && !defined(TRITON_ENABLE_MALI_GPU)
-      return Status(
-          Status::Code::INVALID_ARG,
-          "instance group " + group.name() + " of model " + config.name() +
-              " has kind KIND_GPU but server does not support GPUs");
-#elif defined(TRITON_ENABLE_GPU)
-      if (group.gpus().size() == 0) {
-        if (supported_gpus.size() == 0) {
-          return Status(
-              Status::Code::INVALID_ARG,
-              "instance group " + group.name() + " of model " + config.name() +
-                  " has kind KIND_GPU but no GPUs are available");
-        } else {
-          return Status(
-              Status::Code::INVALID_ARG,
-              "instance group " + group.name() + " of model " + config.name() +
-                  " has kind KIND_GPU but specifies no GPUs");
-        }
-      }
-
-      for (const int32_t gid : group.gpus()) {
-        if (supported_gpus.find(gid) == supported_gpus.end()) {
-          std::string supported_gpus_str;
-          for (const auto& cc : supported_gpus) {
-            if (!supported_gpus_str.empty()) {
-              supported_gpus_str += ", ";
-            }
-            supported_gpus_str += std::to_string(cc);
-          }
-          return Status(
-              Status::Code::INVALID_ARG,
-              "instance group " + group.name() + " of model " + config.name() +
-                  " specifies invalid or unsupported gpu id " +
-                  std::to_string(gid) +
-                  ". GPUs with at least the minimum required CUDA compute "
-                  "compatibility of " +
-                  std::to_string(min_compute_capability) +
-                  " are: " + supported_gpus_str);
-        }
-      }
-#endif  // ! TRITON_ENABLE_GPU && ! TRITON_ENABLE_MALI_GPU
-    } else if (group.kind() == inference::ModelInstanceGroup::KIND_CPU) {
-      if (group.gpus().size() > 0) {
-        return Status(
-            Status::Code::INVALID_ARG,
-            "instance group " + group.name() + " of model " + config.name() +
-                " has kind KIND_CPU but specifies one or more GPUs");
-      }
-    } else {
-      return Status(
-          Status::Code::INTERNAL, "instance group " + group.name() +
-                                      " of model " + config.name() +
-                                      " has unexpected kind KIND_AUTO");
-    }
-
-    if ((config.platform() != kTensorRTPlanPlatform) &&
-        !group.profile().empty()) {
-      return Status(
-          Status::Code::INVALID_ARG,
-          "instance group " + group.name() + " of model " + config.name() +
-              " and platform " + config.platform() +
-              "specifies profile field which is only supported for "
-              "TensorRT models");
-    } else if (!group.profile().empty()) {
-      for (const auto& profile : group.profile()) {
-        int profile_index;
-        RETURN_IF_ERROR(GetProfileIndex(profile, &profile_index));
-        if (profile_index < 0) {
-          return Status(
-              Status::Code::INVALID_ARG,
-              "instance group " + group.name() + " of model " + config.name() +
-                  " and platform " + config.platform() +
-                  " specifies invalid profile " + profile +
-                  ". The field should contain the string representation of a "
-                  "non-negative integer.");
-        }
-      }
-    }
-  }
-  return Status::Success;
-}
-
-Status
-ValidateModelInput(
-    const inference::ModelInput& io, int32_t max_batch_size,
-    const std::string& platform)
-{
-  RETURN_IF_ERROR(ValidateIOShape(io, max_batch_size, "model input "));
-
-  if (((io.format() == inference::ModelInput::FORMAT_NHWC) ||
-       (io.format() == inference::ModelInput::FORMAT_NCHW)) &&
-      (io.dims_size() != 3)) {
-    return Status(
-        Status::Code::INVALID_ARG, "model input NHWC/NCHW require 3 dims");
-  }
-
-  if ((platform != kTensorRTPlanPlatform) && io.is_shape_tensor()) {
-    return Status(
-        Status::Code::INVALID_ARG,
-        "shape tensors are only supported for TensorRT platform");
-  }
-
-  return Status::Success;
-}
-
-Status
-CheckAllowedModelInput(
-    const inference::ModelInput& io, const std::set<std::string>& allowed)
-{
-  if (allowed.find(io.name()) == allowed.end()) {
-    std::string astr;
-    for (const auto& a : allowed) {
-      if (!astr.empty()) {
-        astr.append(", ");
-      }
-      astr.append(a);
-    }
-
-    return Status(
-        Status::Code::INVALID_ARG, "unexpected inference input '" + io.name() +
-                                       "', allowed inputs are: " + astr);
-  }
-  return Status::Success;
-}
-
-Status
-ValidateModelOutput(
-    const inference::ModelOutput& io, int32_t max_batch_size,
-    const std::string& platform)
-{
-  RETURN_IF_ERROR(ValidateIOShape(io, max_batch_size, "model output "));
-
-  if ((platform != kTensorRTPlanPlatform) && io.is_shape_tensor()) {
-    return Status(
-        Status::Code::INVALID_ARG,
-        "shape tensors are only supported for TensorRT platform");
-  }
-
-  return Status::Success;
-}
-
-Status
-CheckAllowedModelOutput(
-    const inference::ModelOutput& io, const std::set<std::string>& allowed)
-{
-  if (allowed.find(io.name()) == allowed.end()) {
-    std::string astr;
-    for (const auto& a : allowed) {
-      if (!astr.empty()) {
-        astr.append(", ");
-      }
-      astr.append(a);
-    }
-
-    return Status(
-        Status::Code::INVALID_ARG, "unexpected inference output '" + io.name() +
-                                       "', allowed outputs are: " + astr);
-  }
-
-  return Status::Success;
-}
-
-Status
-ParseBoolParameter(
-    const std::string& key, std::string value, bool* parsed_value)
-{
-  std::transform(
-      value.begin(), value.end(), value.begin(),
-      [](unsigned char c) { return std::tolower(c); });
-
-  if ((value == "true") || (value == "1")) {
-    *parsed_value = true;
-  } else if ((value == "false") || (value == "0")) {
-    *parsed_value = false;
-  } else {
-    return Status(
-        Status::Code::INVALID_ARG,
-        "failed to convert " + key + " '" + value + "' to boolean value");
-  }
-
-  return Status::Success;
-}
-
-Status
-ParseLongLongParameter(
-    const std::string& key, const std::string& value, int64_t* parsed_value)
-{
-  try {
-    *parsed_value = std::stoll(value);
-  }
-  catch (const std::invalid_argument& ia) {
-    return Status(
-        Status::Code::INVALID_ARG,
-        "failed to convert " + key + " '" + value + "' to integral number");
-  }
-
-  return Status::Success;
-}
-
-Status
-GetProfileIndex(const std::string& profile_name, int* profile_index)
-{
-  if (profile_name.empty()) {
-    return Status(Status::Code::INVALID_ARG, "profile name must not be empty");
-  }
-
-  try {
-    *profile_index = stoi(profile_name);
-  }
-  catch (const std::invalid_argument& ia) {
-    return Status(
-        Status::Code::INVALID_ARG,
-        "unable to parse '" + profile_name + "': " + ia.what());
-  }
-
-  return Status::Success;
-}
-
-namespace {
-
-Status
-CollectInt64Fields(
-    google::protobuf::Message* message, const std::string& prefix,
-    std::set<std::string>* int64_fields)
-{
-  const google::protobuf::Descriptor* desc = message->GetDescriptor();
-  const google::protobuf::Reflection* refl = message->GetReflection();
-  for (int i = 0; i < desc->field_count(); ++i) {
-    const google::protobuf::FieldDescriptor* field = desc->field(i);
-    const std::string fullname = prefix + "::" + field->name();
-    switch (field->type()) {
-      case google::protobuf::FieldDescriptor::TYPE_MESSAGE: {
-        if (field->is_repeated()) {
-          int rsize = refl->FieldSize(*message, field);
-          if (rsize == 0) {
-            refl->AddMessage(message, field);
-          }
-
-          rsize = refl->FieldSize(*message, field);
-          for (int r = 0; r < rsize; ++r) {
-            RETURN_IF_ERROR(CollectInt64Fields(
-                refl->MutableRepeatedMessage(message, field, r), fullname,
-                int64_fields));
-          }
-        } else {
-          RETURN_IF_ERROR(CollectInt64Fields(
-              refl->MutableMessage(message, field), fullname, int64_fields));
-        }
-      } break;
-
-      case google::protobuf::FieldDescriptor::TYPE_INT64:
-      case google::protobuf::FieldDescriptor::TYPE_UINT64:
-      case google::protobuf::FieldDescriptor::TYPE_SINT64:
-      case google::protobuf::FieldDescriptor::TYPE_FIXED64:
-      case google::protobuf::FieldDescriptor::TYPE_SFIXED64:
-        int64_fields->insert(fullname);
-        break;
-
-      default:
-        break;
-    }
-  }
-
-  return Status::Success;
-}
-
-Status
-ValidateModelConfigInt64()
-{
-  // Must initialize a dummy ModelConfig so that all fields are
-  // visited.
-  inference::ModelConfig config;
-
-  std::set<std::string> int64_fields;
-  RETURN_IF_ERROR(CollectInt64Fields(&config, "ModelConfig", &int64_fields));
-
-  LOG_VERBOSE(1) << "ModelConfig 64-bit fields:";
-  for (const auto& f : int64_fields) {
-    LOG_VERBOSE(1) << "\t" << f;
-  }
-
-  // We expect to find exactly the following fields. If we get an
-  // error from this code ModelConfig has added or removed a 64-bit
-  // field and we need to adjust here and in ModelConfigToJson below.
-  std::set<std::string> expected{
-      "ModelConfig::input::dims",
-      "ModelConfig::input::reshape::shape",
-      "ModelConfig::output::dims",
-      "ModelConfig::output::reshape::shape",
-      "ModelConfig::version_policy::specific::versions",
-      "ModelConfig::dynamic_batching::max_queue_delay_microseconds",
-      "ModelConfig::dynamic_batching::default_queue_policy::default_timeout_"
-      "microseconds",
-      "ModelConfig::dynamic_batching::priority_queue_policy::value::default_"
-      "timeout_microseconds",
-      "ModelConfig::sequence_batching::direct::max_queue_delay_microseconds",
-      "ModelConfig::sequence_batching::state::dims",
-      "ModelConfig::sequence_batching::state::initial_state::dims",
-      "ModelConfig::sequence_batching::oldest::max_queue_delay_microseconds",
-      "ModelConfig::sequence_batching::max_sequence_idle_microseconds",
-      "ModelConfig::ensemble_scheduling::step::model_version",
-      "ModelConfig::model_warmup::inputs::value::dims",
-      "ModelConfig::optimization::cuda::graph_spec::input::value::dim",
-      "ModelConfig::optimization::cuda::graph_spec::graph_lower_bound::input::"
-      "value::dim",
-      "ModelConfig::instance_group::secondary_devices::device_id"};
-
-  if (int64_fields != expected) {
-    return Status(
-        Status::Code::INTERNAL, "ModelConfig 64-bit field needs update");
-  }
-
-  return Status::Success;
-}
-
-Status
-FixInt(
-    triton::common::TritonJson::Value& document,
-    triton::common::TritonJson::Value& io, const std::string& name)
-{
-  triton::common::TritonJson::Value str_value;
-  if (!io.Find(name.c_str(), &str_value)) {
-    return Status::Success;
-  }
-
-  std::string str;
-  RETURN_IF_ERROR(str_value.AsString(&str));
-
-  int64_t d;
-  try {
-    d = std::atoll(str.c_str());
-  }
-  catch (...) {
-    return Status(
-        Status::Code::INTERNAL,
-        (std::string("unable to convert '") + str + "' to integer"));
-  }
-
-  str_value.SetInt(d);
-
-  return Status::Success;
-}
-
-Status
-FixIntArray(
-    triton::common::TritonJson::Value& document,
-    triton::common::TritonJson::Value& io, const std::string& name)
-{
-  triton::common::TritonJson::Value fixed_shape_array(
-      document, triton::common::TritonJson::ValueType::ARRAY);
-
-  if (!io.Find(name.c_str())) {
-    return Status::Success;
-  }
-
-  triton::common::TritonJson::Value shape_array;
-  RETURN_IF_ERROR(io.MemberAsArray(name.c_str(), &shape_array));
-  for (size_t i = 0; i < shape_array.ArraySize(); ++i) {
-    std::string str;
-    RETURN_IF_ERROR(shape_array.IndexAsString(i, &str));
-
-    int64_t d;
-    try {
-      d = std::atoll(str.c_str());
-    }
-    catch (...) {
-      return Status(
-          Status::Code::INTERNAL,
-          (std::string("unable to convert '") + str + "' to integer"));
-    }
-
-    RETURN_IF_ERROR(fixed_shape_array.AppendInt(d));
-  }
-
-  shape_array.Swap(fixed_shape_array);
-  fixed_shape_array.Release();
-
-  return Status::Success;
-}
-
-Status
-FixObjectArray(
-    triton::common::TritonJson::Value& document,
-    triton::common::TritonJson::Value& arr, const std::string& name)
-{
-  for (size_t i = 0; i < arr.ArraySize(); ++i) {
-    triton::common::TritonJson::Value obj;
-    RETURN_IF_ERROR(arr.IndexAsObject(i, &obj));
-    RETURN_IF_ERROR(FixInt(document, obj, name));
-  }
-
-  return Status::Success;
-}
-
-}  // namespace
-
-Status
-ModelConfigToJson(
-    const inference::ModelConfig& config, const uint32_t config_version,
-    std::string* json_str)
-{
-  // Currently only support 'config_version' 1, which is the json
-  // representation of the ModelConfig protobuf with the int64 fields
-  // fixes to be actual numbers instead of the string madness done by
-  // protobuf.
-  if (config_version != 1) {
-    return Status(
-        Status::Code::INVALID_ARG,
-        std::string("model configuration version ") +
-            std::to_string(config_version) +
-            " not supported, supported versions are: 1");
-  }
-
-  // Config will have 0 byte size if all fields are with default value,
-  // in other word the config is empty.
-  if (config.ByteSizeLong() == 0) {
-    json_str->clear();
-    return Status::Success;
-  }
-
-  std::string config_json_str;
-  ::google::protobuf::util::JsonPrintOptions options;
-  options.preserve_proto_field_names = true;
-  options.always_print_primitive_fields = true;
-  ::google::protobuf::util::MessageToJsonString(
-      config, &config_json_str, options);
-
-  // We need to verify that every field 64-bit field in the
-  // ModelConfig protobuf is being handled. We hardcode the known
-  // fields and check just once to make sure everything has been
-  // handled. We could have this check in a separately compiled CI
-  // test but it is convenient to keep it here close to the code below
-  // that actually fixes the 64-bit fields.
-  {
-    static std::once_flag fonce;
-    Status status = Status::Success;
-    std::call_once(fonce, [&status] { status = ValidateModelConfigInt64(); });
-    RETURN_IF_ERROR(status);
-  }
-
-  // In the json produced by protobuf, int64 and uint64 values are
-  // represented as strings. Protobuf doesn't provide an option to
-  // disable this (sigh) so we need to fix it up here as we want the
-  // json representation of the config to be reasonable json...
-  triton::common::TritonJson::Value config_json;
-  config_json.Parse(config_json_str);
-
-  // Fix input::dims, input::reshape::shape, output::dims,
-  // output::reshape::shape
-  for (std::string name : {"input", "output"}) {
-    triton::common::TritonJson::Value ios;
-    RETURN_IF_ERROR(config_json.MemberAsArray(name.c_str(), &ios));
-    for (size_t i = 0; i < ios.ArraySize(); ++i) {
-      triton::common::TritonJson::Value io;
-      RETURN_IF_ERROR(ios.IndexAsObject(i, &io));
-      RETURN_IF_ERROR(FixIntArray(config_json, io, "dims"));
-
-      triton::common::TritonJson::Value reshape;
-      if (io.Find("reshape", &reshape)) {
-        RETURN_IF_ERROR(FixIntArray(config_json, reshape, "shape"));
-      }
-    }
-  }
-
-  // Fix version_policy::specific::versions
-  {
-    triton::common::TritonJson::Value vp;
-    if (config_json.Find("version_policy", &vp)) {
-      triton::common::TritonJson::Value specific;
-      if (vp.Find("specific", &specific)) {
-        RETURN_IF_ERROR(FixIntArray(config_json, specific, "versions"));
-      }
-    }
-  }
-
-  // Fix dynamic_batching::max_queue_delay_microseconds,
-  // dynamic_batching::default_queue_policy::default_timeout_microseconds,
-  // dynamic_batching::priority_queue_policy::value::default_timeout_microseconds
-  {
-    triton::common::TritonJson::Value db;
-    if (config_json.Find("dynamic_batching", &db)) {
-      RETURN_IF_ERROR(FixInt(config_json, db, "max_queue_delay_microseconds"));
-      triton::common::TritonJson::Value dqp;
-      if (db.Find("default_queue_policy", &dqp)) {
-        RETURN_IF_ERROR(
-            FixInt(config_json, dqp, "default_timeout_microseconds"));
-      }
-      triton::common::TritonJson::Value pqp;
-      if (db.Find("priority_queue_policy", &pqp)) {
-        // Iterate over each member in 'pqp' and fix...
-        std::vector<std::string> members;
-        RETURN_IF_ERROR(pqp.Members(&members));
-        for (const auto& m : members) {
-          triton::common::TritonJson::Value el;
-          RETURN_IF_ERROR(pqp.MemberAsObject(m.c_str(), &el));
-          RETURN_IF_ERROR(
-              FixInt(config_json, el, "default_timeout_microseconds"));
-        }
-      }
-    }
-  }
-
-  // Fix sequence_batching::oldest::max_queue_delay_microseconds,
-  // sequence_batching::direct::max_queue_delay_microseconds,
-  // sequence_batching::max_sequence_idle_microseconds
-  {
-    triton::common::TritonJson::Value sb;
-    if (config_json.Find("sequence_batching", &sb)) {
-      RETURN_IF_ERROR(
-          FixInt(config_json, sb, "max_sequence_idle_microseconds"));
-      triton::common::TritonJson::Value oldest;
-      if (sb.Find("oldest", &oldest)) {
-        RETURN_IF_ERROR(
-            FixInt(config_json, oldest, "max_queue_delay_microseconds"));
-      }
-      triton::common::TritonJson::Value direct;
-      if (sb.Find("direct", &direct)) {
-        RETURN_IF_ERROR(
-            FixInt(config_json, direct, "max_queue_delay_microseconds"));
-      }
-
-      triton::common::TritonJson::Value states;
-      if (sb.Find("state", &states)) {
-        for (size_t i = 0; i < states.ArraySize(); ++i) {
-          triton::common::TritonJson::Value state;
-          RETURN_IF_ERROR(states.IndexAsObject(i, &state));
-          RETURN_IF_ERROR(FixIntArray(config_json, state, "dims"));
-
-          triton::common::TritonJson::Value initial_state;
-          if (sb.Find("initial_state", &initial_state)) {
-            RETURN_IF_ERROR(FixIntArray(config_json, initial_state, "dims"));
-          }
-        }
-      }
-    }
-  }
-
-  // Fix ensemble_scheduling::step::model_version.
-  {
-    triton::common::TritonJson::Value ens;
-    if (config_json.Find("ensemble_scheduling", &ens)) {
-      triton::common::TritonJson::Value step;
-      if (ens.Find("step", &step)) {
-        RETURN_IF_ERROR(FixObjectArray(config_json, step, "model_version"));
-      }
-    }
-  }
-
-  // Fix model_warmup::inputs::value::dims.
-  {
-    triton::common::TritonJson::Value warmups;
-    if (config_json.Find("model_warmup", &warmups)) {
-      for (size_t i = 0; i < warmups.ArraySize(); ++i) {
-        triton::common::TritonJson::Value warmup;
-        RETURN_IF_ERROR(warmups.IndexAsObject(i, &warmup));
-        triton::common::TritonJson::Value inputs;
-        if (warmup.Find("inputs", &inputs)) {
-          std::vector<std::string> members;
-          RETURN_IF_ERROR(inputs.Members(&members));
-          for (const auto& m : members) {
-            triton::common::TritonJson::Value input;
-            RETURN_IF_ERROR(inputs.MemberAsObject(m.c_str(), &input));
-            RETURN_IF_ERROR(FixIntArray(config_json, input, "dims"));
-          }
-        }
-      }
-    }
-  }
-
-  // Convert fixed json back the string...
-  triton::common::TritonJson::WriteBuffer buffer;
-  RETURN_IF_ERROR(config_json.Write(&buffer));
-  *json_str = std::move(buffer.MutableContents());
-
-  return Status::Success;
-}
-
-Status
-JsonToModelConfig(
-    const std::string& json_config, const uint32_t config_version,
-    inference::ModelConfig* protobuf_config)
-{
-  // Currently only support 'config_version' 1, which is the json
-  // representation of the ModelConfig protobuf matches the representation in
-  // ModelConfigToJson().
-  if (config_version != 1) {
-    return Status(
-        Status::Code::INVALID_ARG,
-        std::string("model configuration version ") +
-            std::to_string(config_version) +
-            " not supported, supported versions are: 1");
-  }
-
-  ::google::protobuf::util::JsonParseOptions options;
-  options.case_insensitive_enum_parsing = true;
-  options.ignore_unknown_fields = false;
-  auto err = ::google::protobuf::util::JsonStringToMessage(
-      json_config, protobuf_config, options);
-  if (!err.ok()) {
-    return Status(Status::Code::INVALID_ARG, std::string(err.message()));
-  }
-
-  return Status::Success;
-}
-
-BackendType
-GetBackendTypeFromPlatform(const std::string& platform_name)
-{
-  if ((platform_name == kTensorFlowGraphDefPlatform) ||
-      (platform_name == kTensorFlowSavedModelPlatform)) {
-    return BackendType::BACKEND_TYPE_TENSORFLOW;
-  }
-
-  if (platform_name == kTensorRTPlanPlatform) {
-    return BackendType::BACKEND_TYPE_TENSORRT;
-  }
-
-  if (platform_name == kOnnxRuntimeOnnxPlatform) {
-    return BackendType::BACKEND_TYPE_ONNXRUNTIME;
-  }
-
-  if (platform_name == kPyTorchLibTorchPlatform) {
-    return BackendType::BACKEND_TYPE_PYTORCH;
-  }
-
-  return BackendType::BACKEND_TYPE_UNKNOWN;
-}
-
-/// Get the BackendType value for a backend name.
-/// \param backend_name The backend name.
-/// \return The BackendType or BackendType::UNKNOWN if the platform string
-/// is not recognized.
-BackendType
-GetBackendType(const std::string& backend_name)
-{
-  if (backend_name == kTensorFlowBackend) {
-    return BackendType::BACKEND_TYPE_TENSORFLOW;
-  }
-
-  if (backend_name == kTensorRTBackend) {
-    return BackendType::BACKEND_TYPE_TENSORRT;
-  }
-
-  if (backend_name == kOnnxRuntimeBackend) {
-    return BackendType::BACKEND_TYPE_ONNXRUNTIME;
-  }
-
-  if (backend_name == kPyTorchBackend) {
-    return BackendType::BACKEND_TYPE_PYTORCH;
-  }
-
-  return BackendType::BACKEND_TYPE_UNKNOWN;
-}
-
-TRITONSERVER_DataType
-DataTypeToTriton(const inference::DataType dtype)
-{
-  switch (dtype) {
-    case inference::DataType::TYPE_BOOL:
-      return TRITONSERVER_TYPE_BOOL;
-    case inference::DataType::TYPE_UINT8:
-      return TRITONSERVER_TYPE_UINT8;
-    case inference::DataType::TYPE_UINT16:
-      return TRITONSERVER_TYPE_UINT16;
-    case inference::DataType::TYPE_UINT32:
-      return TRITONSERVER_TYPE_UINT32;
-    case inference::DataType::TYPE_UINT64:
-      return TRITONSERVER_TYPE_UINT64;
-    case inference::DataType::TYPE_INT8:
-      return TRITONSERVER_TYPE_INT8;
-    case inference::DataType::TYPE_INT16:
-      return TRITONSERVER_TYPE_INT16;
-    case inference::DataType::TYPE_INT32:
-      return TRITONSERVER_TYPE_INT32;
-    case inference::DataType::TYPE_INT64:
-      return TRITONSERVER_TYPE_INT64;
-    case inference::DataType::TYPE_FP16:
-      return TRITONSERVER_TYPE_FP16;
-    case inference::DataType::TYPE_FP32:
-      return TRITONSERVER_TYPE_FP32;
-    case inference::DataType::TYPE_FP64:
-      return TRITONSERVER_TYPE_FP64;
-    case inference::DataType::TYPE_STRING:
-      return TRITONSERVER_TYPE_BYTES;
-    case inference::DataType::TYPE_BF16:
-      return TRITONSERVER_TYPE_BF16;
-    default:
-      break;
-  }
-
-  return TRITONSERVER_TYPE_INVALID;
-}
-
-inference::DataType
-TritonToDataType(const TRITONSERVER_DataType dtype)
-{
-  switch (dtype) {
-    case TRITONSERVER_TYPE_BOOL:
-      return inference::DataType::TYPE_BOOL;
-    case TRITONSERVER_TYPE_UINT8:
-      return inference::DataType::TYPE_UINT8;
-    case TRITONSERVER_TYPE_UINT16:
-      return inference::DataType::TYPE_UINT16;
-    case TRITONSERVER_TYPE_UINT32:
-      return inference::DataType::TYPE_UINT32;
-    case TRITONSERVER_TYPE_UINT64:
-      return inference::DataType::TYPE_UINT64;
-    case TRITONSERVER_TYPE_INT8:
-      return inference::DataType::TYPE_INT8;
-    case TRITONSERVER_TYPE_INT16:
-      return inference::DataType::TYPE_INT16;
-    case TRITONSERVER_TYPE_INT32:
-      return inference::DataType::TYPE_INT32;
-    case TRITONSERVER_TYPE_INT64:
-      return inference::DataType::TYPE_INT64;
-    case TRITONSERVER_TYPE_FP16:
-      return inference::DataType::TYPE_FP16;
-    case TRITONSERVER_TYPE_FP32:
-      return inference::DataType::TYPE_FP32;
-    case TRITONSERVER_TYPE_FP64:
-      return inference::DataType::TYPE_FP64;
-    case TRITONSERVER_TYPE_BYTES:
-      return inference::DataType::TYPE_STRING;
-    case TRITONSERVER_TYPE_BF16:
-      return inference::DataType::TYPE_BF16;
-    default:
-      break;
-  }
-
-  return inference::DataType::TYPE_INVALID;
-}
-
-}}  // namespace triton::core
diff --git a/3rdparty/core-r22.12/src/model_config_utils.h b/3rdparty/core-r22.12/src/model_config_utils.h
deleted file mode 100644
index f514ef3c48dab55945ec0bf650797e8f49edb7f4..0000000000000000000000000000000000000000
--- a/3rdparty/core-r22.12/src/model_config_utils.h
+++ /dev/null
@@ -1,282 +0,0 @@
-// Copyright 2018-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include "model_config.pb.h"
-#include "status.h"
-#include "triton/common/model_config.h"
-#include "tritonserver_apis.h"
-#include "filesystem.h"
-
-namespace triton { namespace core {
-
-/// Enumeration for the different backend types.
-enum BackendType {
-  BACKEND_TYPE_UNKNOWN = 0,
-  BACKEND_TYPE_TENSORRT = 1,
-  BACKEND_TYPE_TENSORFLOW = 2,
-  BACKEND_TYPE_ONNXRUNTIME = 3,
-  BACKEND_TYPE_PYTORCH = 4
-};
-
-// Get version of a model from the path containing the model
-/// definition file.
-/// \param path The path to the model definition file.
-/// \param version Returns the version.
-/// \return The error status.
-Status GetModelVersionFromPath(const std::string& path, int64_t* version);
-
-/// Get the tensor name, false value, and true value for a boolean
-/// sequence batcher control kind. If 'required' is true then must
-/// find a tensor for the control. If 'required' is false, return
-/// 'tensor_name' as empty-string if the control is not mapped to any
-/// tensor.
-Status GetBooleanSequenceControlProperties(
-    const inference::ModelSequenceBatching& batcher,
-    const std::string& model_name,
-    const inference::ModelSequenceBatching::Control::Kind control_kind,
-    const bool required, std::string* tensor_name,
-    inference::DataType* tensor_datatype, float* fp32_false_value,
-    float* fp32_true_value, int32_t* int32_false_value,
-    int32_t* int32_true_value, bool* bool_false_value, bool* bool_true_value);
-
-/// Get the tensor name and datatype for a non-boolean sequence
-/// batcher control kind. If 'required' is true then must find a
-/// tensor for the control. If 'required' is false, return
-/// 'tensor_name' as empty-string if the control is not mapped to any
-/// tensor. 'tensor_datatype' returns the required datatype for the
-/// control.
-Status GetTypedSequenceControlProperties(
-    const inference::ModelSequenceBatching& batcher,
-    const std::string& model_name,
-    const inference::ModelSequenceBatching::Control::Kind control_kind,
-    const bool required, std::string* tensor_name,
-    inference::DataType* tensor_datatype);
-
-/// Read a ModelConfig and normalize it as expected by model backends.
-/// \param path The full-path to the directory containing the
-/// model configuration.
-/// \param min_compute_capability The minimum support CUDA compute
-/// capability.
-/// \param config Returns the normalized model configuration.
-/// \return The error status.
-Status GetNormalizedModelConfig(
-    const std::string& model_name, const std::string& path,
-    const double min_compute_capability, inference::ModelConfig* config);
-
-/// Auto-complete backend related fields (platform, backend and default model
-/// filename) if not set, note that only Triton recognized backends will be
-/// checked.
-/// \param model_name The name of the model.
-/// \param model_path The full-path to the directory containing the
-/// model configuration.
-/// \param config Returns the auto-completed model configuration.
-/// \return The error status.
-Status AutoCompleteBackendFields(
-    const std::string& model_name, const std::string& model_path,
-    inference::ModelConfig* config);
-
-/// Detects and adds missing fields in the model configuration.
-/// \param min_compute_capability The minimum supported CUDA compute
-/// capability.
-/// \param config The model configuration
-/// \return The error status
-Status NormalizeModelConfig(
-    const double min_compute_capability, inference::ModelConfig* config);
-
-/// [FIXME] better formalize config normalization / validation
-/// Detects and adds missing fields in instance group setting.
-/// \param min_compute_capability The minimum supported CUDA compute
-/// capability.
-/// \param config The model configuration
-/// \return The error status
-Status NormalizeInstanceGroup(
-    const double min_compute_capability,
-    const std::vector<inference::ModelInstanceGroup>& preferred_groups,
-    inference::ModelConfig* config);
-
-/// [FIXME] Remove once a more permanent solution is implemented  (DLIS-4211)
-/// Localize EXECUTION_ENV_PATH in python backend.
-/// \param model_path The full-path to the directory containing the model
-/// configuration, before localization.
-/// \param config The model configuration
-/// \param localized_model_dir The localized model directory
-/// \return The error status
-Status LocalizePythonBackendExecutionEnvironmentPath(
-    const std::string& model_path, inference::ModelConfig* config,
-    std::shared_ptr<LocalizedPath>* localized_model_dir);
-
-/// Auto-complete the instance count based on instance kind and backend name.
-/// \param group The instance group to set the count for.
-/// \param backend The backend name to check against.
-/// \return The error status.
-Status SetDefaultInstanceCount(
-    inference::ModelInstanceGroup* group, const std::string& backend);
-
-/// Validate that a model is specified correctly, except for model inputs
-/// and outputs. ValidateModelIOConfig() should be called to
-/// validate model inputs and outputs.
-/// \param config The model configuration to validate.
-/// \param min_compute_capability The minimum support CUDA compute
-/// capability.
-/// \return The error status. A non-OK status indicates the configuration
-/// is not valid.
-Status ValidateModelConfig(
-    const inference::ModelConfig& config, const double min_compute_capability);
-
-/// [FIXME] better formalize config normalization / validation
-/// Validate instance group setting.
-/// \param config The model configuration to validate.
-/// \param min_compute_capability The minimum support CUDA compute
-/// capability.
-/// \return The error status. A non-OK status indicates the configuration
-/// is not valid.
-Status ValidateInstanceGroup(
-    const inference::ModelConfig& config, const double min_compute_capability);
-
-/// Validate that a model inputs and outputs are specified correctly.
-/// \param config The model configuration to validate.
-/// \return The error status. A non-OK status indicates the configuration
-/// is not valid.
-Status ValidateModelIOConfig(const inference::ModelConfig& config);
-
-/// Validate that input is specified correctly in a model
-/// configuration.
-/// \param io The model input.
-/// \param max_batch_size The max batch size specified in model configuration.
-/// \param platform The platform name
-/// \return The error status. A non-OK status indicates the input
-/// is not valid.
-Status ValidateModelInput(
-    const inference::ModelInput& io, int32_t max_batch_size,
-    const std::string& platform);
-
-/// Validate that an input matches one of the allowed input names.
-/// \param io The model input.
-/// \param allowed The set of allowed input names.
-/// \return The error status. A non-OK status indicates the input
-/// is not valid.
-Status CheckAllowedModelInput(
-    const inference::ModelInput& io, const std::set<std::string>& allowed);
-
-/// Validate that an output is specified correctly in a model
-/// configuration.
-/// \param io The model output.
-/// \param max_batch_size The max batch size specified in model configuration.
-/// \param platform The platform name
-/// \return The error status. A non-OK status indicates the output
-/// is not valid.
-Status ValidateModelOutput(
-    const inference::ModelOutput& io, int32_t max_batch_size,
-    const std::string& platform);
-
-/// Validate that an output matches one of the allowed output names.
-/// \param io The model output.
-/// \param allowed The set of allowed output names.
-/// \return The error status. A non-OK status indicates the output
-/// is not valid.
-Status CheckAllowedModelOutput(
-    const inference::ModelOutput& io, const std::set<std::string>& allowed);
-
-/// Validate that a model batch inputs and batch outputs are specified
-/// correctly.
-/// \param config The model configuration to validate..
-/// \return The error status. A non-OK status indicates the batch inputs or
-/// batch outputs are not valid.
-Status ValidateBatchIO(const inference::ModelConfig& config);
-
-/// Parse the 'value' of the parameter 'key' into a boolean value.
-/// \param key The name of the parameter.
-/// \param value The value of the parameter in string.
-/// \param parsed_value Return the boolean of the parameter.
-/// \return The error status. A non-OK status indicates failure on parsing the
-/// value.
-Status ParseBoolParameter(
-    const std::string& key, std::string value, bool* parsed_value);
-
-/// Parse the 'value' of the parameter 'key' into a long long integer value.
-/// \param key The name of the parameter.
-/// \param value The value of the parameter in string.
-/// \param parsed_value Return the numerical value of the parameter.
-/// \return The error status. A non-OK status indicates failure on parsing the
-/// value.
-Status ParseLongLongParameter(
-    const std::string& key, const std::string& value, int64_t* parsed_value);
-
-/// Obtain the 'profile_index' of the 'profile_name'.
-/// \param profile_name The name of the profile.
-/// \param profile_index Return the index of the profile.
-/// \return The error status. A non-OK status indicates failure on getting the
-/// value.
-Status GetProfileIndex(const std::string& profile_name, int* profile_index);
-
-/// Convert a model configuration protobuf to the equivalent json.
-/// \param config The protobuf model configuration.
-/// \param config_version The model configuration will be returned in
-/// a format matching this version. If the configuration cannot be
-/// represented in the requested version's format then an error will
-/// be returned.
-/// \param json Returns the equivalent JSON.
-/// \return The error status.
-Status ModelConfigToJson(
-    const inference::ModelConfig& config, const uint32_t config_version,
-    std::string* json_str);
-
-/// Convert a model configuration JSON to the equivalent protobuf.
-/// \param config The JSON model configuration.
-/// \param config_version The model configuration will be returned in
-/// a format matching this version. If the configuration cannot be
-/// represented in the requested version's format then an error will
-/// be returned.
-/// \param protobuf Returns the equivalent protobuf.
-/// \return The error status.
-Status JsonToModelConfig(
-    const std::string& json_config, const uint32_t config_version,
-    inference::ModelConfig* protobuf_config);
-
-/// Get the BackendType value for a platform name.
-/// \param platform_name The platform name.
-/// \return The BackendType or BackendType::UNKNOWN if the platform string
-/// is not recognized.
-BackendType GetBackendTypeFromPlatform(const std::string& platform_name);
-
-/// Get the BackendType value for a backend name.
-/// \param backend_name The backend name.
-/// \return The BackendType or BackendType::UNKNOWN if the platform string
-/// is not recognized.
-BackendType GetBackendType(const std::string& backend_name);
-
-/// Get the Triton server data type corresponding to a data type.
-/// \param dtype The data type.
-/// \return The Triton server data type.
-TRITONSERVER_DataType DataTypeToTriton(const inference::DataType dtype);
-
-/// Get the data type corresponding to a Triton server data type.
-/// \param dtype The Triton server data type.
-/// \return The data type.
-inference::DataType TritonToDataType(const TRITONSERVER_DataType dtype);
-
-}}  // namespace triton::core
diff --git a/3rdparty/core-r22.12/src/model_lifecycle.cc b/3rdparty/core-r22.12/src/model_lifecycle.cc
deleted file mode 100644
index 2d37a422b439cdf46465210e577b4f3436480493..0000000000000000000000000000000000000000
--- a/3rdparty/core-r22.12/src/model_lifecycle.cc
+++ /dev/null
@@ -1,740 +0,0 @@
-// Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-
-#include "model_lifecycle.h"
-
-#include <algorithm>
-#include <deque>
-#include <future>
-#include <stdexcept>
-#include <thread>
-#include "constants.h"
-#include "filesystem.h"
-#include "model.h"
-#include "model_config_utils.h"
-#include "repo_agent.h"
-#include "triton/common/logging.h"
-#include "triton/common/thread_pool.h"
-
-#include "backend_model.h"
-#ifdef TRITON_ENABLE_ENSEMBLE
-#include "ensemble_model.h"
-#endif  // TRITON_ENABLE_ENSEMBLE
-
-namespace triton { namespace core {
-
-const std::string&
-ModelReadyStateString(ModelReadyState state)
-{
-  switch (state) {
-    case ModelReadyState::UNKNOWN: {
-      static std::string m("UNKNOWN");
-      return m;
-    }
-    case ModelReadyState::READY: {
-      static std::string m("READY");
-      return m;
-    }
-    case ModelReadyState::UNAVAILABLE: {
-      static std::string m("UNAVAILABLE");
-      return m;
-    }
-    case ModelReadyState::LOADING: {
-      static std::string m("LOADING");
-      return m;
-    }
-    case ModelReadyState::UNLOADING: {
-      static std::string m("UNLOADING");
-      return m;
-    }
-  }
-
-  static std::string m("<unknown>");
-  return m;
-}
-
-namespace {
-
-Status
-VersionsToLoad(
-    const std::string model_path, const std::string& name,
-    const inference::ModelConfig& model_config, std::set<int64_t>* versions)
-{
-  versions->clear();
-
-  // Get integral number of the version directory
-  std::set<std::string> subdirs;
-  RETURN_IF_ERROR(GetDirectorySubdirs(model_path, &subdirs));
-  std::set<int64_t, std::greater<int64_t>> existing_versions;
-  for (const auto& subdir : subdirs) {
-    if (subdir == kWarmupDataFolder || subdir == kInitialStateFolder) {
-      continue;
-    }
-    if ((subdir.length() > 1) && (subdir.front() == '0')) {
-      LOG_WARNING << "ignore version directory '" << subdir
-                  << "' which contains leading zeros in its directory name";
-      continue;
-    }
-    try {
-      int64_t version = std::stoll(subdir);
-      existing_versions.insert(version);
-    }
-    catch (const std::invalid_argument& ia) {
-      LOG_WARNING << "ignore version directory '" << subdir
-                  << "' which fails to convert to integral number";
-    }
-  }
-
-  if (model_config.version_policy().has_specific()) {
-    for (const auto& v : model_config.version_policy().specific().versions()) {
-      // Only load the specific versions that are presented in model directory
-      bool version_not_exist = existing_versions.insert(v).second;
-      if (!version_not_exist) {
-        versions->emplace(v);
-      } else {
-        LOG_ERROR << "version " << v << " is specified for model '" << name
-                  << "', but the version directory is not present";
-      }
-    }
-  } else {
-    if (model_config.version_policy().has_latest()) {
-      // std::set is sorted with std::greater
-      for (const auto& v : existing_versions) {
-        if (versions->size() >=
-            model_config.version_policy().latest().num_versions()) {
-          break;
-        }
-        versions->emplace(v);
-      }
-    } else {
-      // all
-      versions->insert(existing_versions.begin(), existing_versions.end());
-    }
-  }
-
-  return Status::Success;
-}
-
-// Use smart pointer with custom deleter so that model state will be updated
-// to UNAVAILABLE if all smart pointer copies are out of scope
-struct ModelDeleter {
-  ModelDeleter(std::function<void()> OnDestroyModel)
-      : OnDestroyModel_(std::move(OnDestroyModel))
-  {
-  }
-
-  void operator()(Model* model)
-  {
-    // The actual model object must be destroyed in a different
-    // thread. This thread could have a callstack that includes the
-    // model itself because this deleter could be triggered by
-    // a request release or response send in the model. Following
-    // delete will lead to the model destructor which may wait on this
-    // same thread... so deadlock if we don't use a different thread
-    // here.
-    std::function<void()> destroy_fn = OnDestroyModel_;
-    std::thread dthd([model, destroy_fn]() {
-      delete model;
-      destroy_fn();
-    });
-
-    dthd.detach();
-  }
-
-  // Use to inform the ModelLifeCycle that the model handle is destroyed
-  std::function<void()> OnDestroyModel_;
-};
-
-}  // namespace
-
-Status
-ModelLifeCycle::Create(
-    InferenceServer* server, const ModelLifeCycleOptions& options,
-    std::unique_ptr<ModelLifeCycle>* life_cycle)
-{
-  std::unique_ptr<ModelLifeCycle> local_life_cycle(
-      new ModelLifeCycle(server, options));
-
-  *life_cycle = std::move(local_life_cycle);
-  return Status::Success;
-}
-
-const ModelStateMap
-ModelLifeCycle::LiveModelStates(bool strict_readiness)
-{
-  LOG_VERBOSE(2) << "LiveModelStates()";
-  std::lock_guard<std::mutex> map_lock(map_mtx_);
-  ModelStateMap live_model_states;
-  for (auto& model_version : map_) {
-    bool live = false;
-    VersionStateMap version_map;
-
-    for (auto& version_model : model_version.second) {
-      std::lock_guard<std::mutex> lock(version_model.second->mtx_);
-      if (strict_readiness &&
-          version_model.second->state_ != ModelReadyState::READY) {
-        continue;
-      }
-
-      // At least one version is live (ready / loading / unloading)
-      if ((version_model.second->state_ != ModelReadyState::UNKNOWN) &&
-          (version_model.second->state_ != ModelReadyState::UNAVAILABLE)) {
-        live = true;
-        version_map[version_model.first] = std::make_pair(
-            version_model.second->state_, version_model.second->state_reason_);
-      }
-    }
-
-    if (live) {
-      live_model_states[model_version.first] = std::move(version_map);
-    }
-  }
-  return live_model_states;
-}
-
-Status
-ModelLifeCycle::StopAllModels()
-{
-  LOG_VERBOSE(2) << "StopAllModels()";
-  std::lock_guard<std::mutex> map_lock(map_mtx_);
-  for (auto& model_version : map_) {
-    for (auto& version_model : model_version.second) {
-      if (version_model.second != nullptr) {
-        std::lock_guard<std::mutex> lock(version_model.second->mtx_);
-        if (version_model.second->model_ != nullptr) {
-          version_model.second->model_->Stop();
-        }
-      }
-    }
-  }
-  return Status::Success;
-}
-
-const std::set<std::tuple<std::string, int64_t, size_t>>
-ModelLifeCycle::InflightStatus()
-{
-  LOG_VERBOSE(2) << "InflightStatus()";
-  std::lock_guard<std::mutex> map_lock(map_mtx_);
-  std::set<std::tuple<std::string, int64_t, size_t>> inflight_status;
-  for (auto& model_version : map_) {
-    for (auto& version_model : model_version.second) {
-      if (version_model.second != nullptr) {
-        std::lock_guard<std::mutex> lock(version_model.second->mtx_);
-        if (version_model.second->model_ != nullptr) {
-          const auto cnt =
-              version_model.second->model_->InflightInferenceCount();
-          if (cnt != 0) {
-            inflight_status.emplace(
-                model_version.first, version_model.first, cnt);
-          }
-        }
-      }
-    }
-  }
-  return inflight_status;
-}
-
-const ModelStateMap
-ModelLifeCycle::ModelStates()
-{
-  LOG_VERBOSE(2) << "ModelStates()";
-  std::lock_guard<std::mutex> map_lock(map_mtx_);
-  ModelStateMap model_states;
-  for (auto& model_version : map_) {
-    VersionStateMap version_map;
-
-    for (auto& version_model : model_version.second) {
-      std::lock_guard<std::mutex> lock(version_model.second->mtx_);
-      version_map[version_model.first] = std::make_pair(
-          version_model.second->state_, version_model.second->state_reason_);
-    }
-
-    model_states[model_version.first] = std::move(version_map);
-  }
-
-  return model_states;
-}
-
-const VersionStateMap
-ModelLifeCycle::VersionStates(const std::string& model_name)
-{
-  LOG_VERBOSE(2) << "VersionStates() '" << model_name << "'";
-  std::lock_guard<std::mutex> map_lock(map_mtx_);
-  VersionStateMap version_map;
-  auto mit = map_.find(model_name);
-  if (mit != map_.end()) {
-    for (auto& version_model : mit->second) {
-      std::lock_guard<std::mutex> lock(version_model.second->mtx_);
-      version_map[version_model.first] = std::make_pair(
-          version_model.second->state_, version_model.second->state_reason_);
-    }
-  }
-
-  return version_map;
-}
-
-Status
-ModelLifeCycle::ModelState(
-    const std::string& model_name, const int64_t model_version,
-    ModelReadyState* state)
-{
-  std::lock_guard<std::mutex> map_lock(map_mtx_);
-  auto mit = map_.find(model_name);
-  if (mit != map_.end()) {
-    auto vit = mit->second.find(model_version);
-    if (vit != mit->second.end()) {
-      std::lock_guard<std::mutex> lock(vit->second->mtx_);
-      *state = vit->second->state_;
-      return Status::Success;
-    }
-  }
-
-  return Status(
-      Status::Code::NOT_FOUND, "model '" + model_name + "', version " +
-                                   std::to_string(model_version) +
-                                   " is not found");
-}
-
-Status
-ModelLifeCycle::GetModel(
-    const std::string& model_name, const int64_t version,
-    std::shared_ptr<Model>* model)
-{
-  LOG_VERBOSE(2) << "GetModel() '" << model_name << "' version " << version;
-  std::lock_guard<std::mutex> map_lock(map_mtx_);
-  auto mit = map_.find(model_name);
-  if (mit == map_.end()) {
-    return Status(Status::Code::NOT_FOUND, "'" + model_name + "' is not found");
-  }
-
-  auto vit = mit->second.find(version);
-  if (vit == mit->second.end()) {
-    if (version != -1) {
-      return Status(
-          Status::Code::NOT_FOUND, "'" + model_name + "' version " +
-                                       std::to_string(version) +
-                                       " is not found");
-    }
-
-    // The case where the request is asking for latest version
-    int64_t latest = -1;
-    for (auto& version_model : mit->second) {
-      if (version_model.first > latest) {
-        std::lock_guard<std::mutex> lock(version_model.second->mtx_);
-        if (version_model.second->state_ == ModelReadyState::READY) {
-          latest = version_model.first;
-          // Tedious, but have to set handle for any "latest" version
-          // at the moment to avoid edge case like the following:
-          // "versions : 1 3 2", version 3 is latest but is requested
-          // to be unloaded when the iterator is examining version 2,
-          // then 'model' will ensure version 3 is still valid
-          *model = version_model.second->model_;
-        }
-      }
-    }
-    if (latest == -1) {
-      return Status(
-          Status::Code::NOT_FOUND,
-          "'" + model_name + "' has no available versions");
-    }
-  } else {
-    std::lock_guard<std::mutex> lock(vit->second->mtx_);
-    if (vit->second->state_ == ModelReadyState::READY) {
-      *model = vit->second->model_;
-    } else {
-      return Status(
-          Status::Code::UNAVAILABLE, "'" + model_name + "' version " +
-                                         std::to_string(version) +
-                                         " is not at ready state");
-    }
-  }
-  return Status::Success;
-}
-
-Status
-ModelLifeCycle::AsyncUnload(const std::string& model_name)
-{
-  LOG_VERBOSE(2) << "AsyncUnload() '" << model_name << "'";
-  std::lock_guard<std::mutex> map_lock(map_mtx_);
-  auto it = map_.find(model_name);
-  if (it == map_.end()) {
-    return Status(
-        Status::Code::INVALID_ARG, "Model to be unloaded has not been served");
-  }
-
-  // Get the existing agent models and notify the unload action
-  const uint64_t now_ns =
-      std::chrono::duration_cast<std::chrono::nanoseconds>(
-          std::chrono::steady_clock::now().time_since_epoch())
-          .count();
-  for (auto& version : it->second) {
-    auto& model_info = version.second;
-    std::lock_guard<std::mutex> lock(model_info->mtx_);
-    model_info->last_update_ns_ = now_ns;
-    // Unload serving model, for model that is in LOADING state,
-    // the updated timestamp will be recognized that there is newer update
-    // on the model info and the load should be aborted
-    if (model_info->state_ == ModelReadyState::READY) {
-      if (model_info->agent_model_list_ != nullptr) {
-        // Only log the error because the model should be unloaded regardless
-        auto status = model_info->agent_model_list_->InvokeAgentModels(
-            TRITONREPOAGENT_ACTION_UNLOAD);
-        if (!status.IsOk()) {
-          LOG_ERROR
-              << "Agent model returns error on TRITONREPOAGENT_ACTION_UNLOAD: "
-              << status.AsString();
-        }
-      }
-
-      // unload
-      model_info->Release();
-    }
-  }
-
-  return Status::Success;
-}
-
-Status
-ModelLifeCycle::AsyncLoad(
-    const std::string& model_name, const std::string& model_path,
-    const inference::ModelConfig& model_config, const bool is_config_provided,
-    const std::shared_ptr<TritonRepoAgentModelList>& agent_model_list,
-    std::function<void(Status)>&& OnComplete)
-{
-  LOG_VERBOSE(2) << "AsyncLoad() '" << model_name << "'";
-
-  std::lock_guard<std::mutex> map_lock(map_mtx_);
-  auto it = map_.find(model_name);
-  if (it == map_.end()) {
-    it = map_.emplace(std::make_pair(model_name, VersionMap())).first;
-  }
-
-  std::set<int64_t> versions;
-  RETURN_IF_ERROR(
-      VersionsToLoad(model_path, model_name, model_config, &versions));
-  if (versions.empty()) {
-    return Status(
-        Status::Code::INVALID_ARG,
-        "at least one version must be available under the version policy of "
-        "model '" +
-            model_name + "'");
-  }
-
-
-  const uint64_t now_ns =
-      std::chrono::duration_cast<std::chrono::nanoseconds>(
-          std::chrono::steady_clock::now().time_since_epoch())
-          .count();
-  std::shared_ptr<LoadTracker> load_tracker(
-      new LoadTracker(versions.size(), now_ns));
-  for (const auto& version : versions) {
-    std::unique_ptr<ModelInfo> linfo(
-        new ModelInfo(model_path, model_config, now_ns));
-    ModelInfo* model_info = linfo.get();
-
-    LOG_INFO << "loading: " << model_name << ":" << version;
-    model_info->state_ = ModelReadyState::LOADING;
-    model_info->state_reason_.clear();
-    model_info->agent_model_list_ = agent_model_list;
-
-    auto res = it->second.emplace(
-        std::make_pair(version, std::unique_ptr<ModelInfo>()));
-    if (res.second) {
-      res.first->second = std::move(linfo);
-    } else {
-      // There is already a record of this model version. Check if the version
-      // model is being served, if so, the re-load of the version
-      // should be performed in background to avoid version downtime.
-      // Otherwise, swap and monitor state for newly loading model.
-      auto& serving_model = res.first->second;
-      std::lock_guard<std::mutex> lock(serving_model->mtx_);
-      if (serving_model->state_ == ModelReadyState::READY) {
-        background_models_[(uintptr_t)model_info] = std::move(linfo);
-      } else {
-        // swap the monitoring model info
-        serving_model.swap(linfo);
-
-        // further check the state, put to 'background_models_' to keep
-        // the object valid if the model is LOADING / UNLOADING, because
-        // the model info will be accessed by a different thread once the
-        // operation is completed
-        if ((linfo->state_ == ModelReadyState::LOADING) ||
-            (linfo->state_ == ModelReadyState::UNLOADING)) {
-          ModelInfo* key = linfo.get();
-          background_models_[(uintptr_t)key] = std::move(linfo);
-        }
-      }
-    }
-
-    // Load model asynchronously via thread pool
-    load_pool_->Enqueue([this, model_name, version, model_info, OnComplete,
-                         load_tracker, is_config_provided]() {
-      CreateModel(model_name, version, model_info, is_config_provided);
-      OnLoadComplete(model_name, version, model_info, OnComplete, load_tracker);
-    });
-  }
-
-  return Status::Success;
-}
-
-void
-ModelLifeCycle::CreateModel(
-    const std::string& model_name, const int64_t version, ModelInfo* model_info,
-    const bool is_config_provided)
-{
-  LOG_VERBOSE(2) << "CreateModel() '" << model_name << "' version " << version;
-  const auto& model_config = model_info->model_config_;
-
-  // Create model
-  Status status;
-  std::unique_ptr<Model> is;
-
-  // If 'backend' is specified in the config then use the new triton
-  // backend.
-  if (!model_config.backend().empty()) {
-    std::unique_ptr<TritonModel> model;
-    status = TritonModel::Create(
-        server_, model_info->model_path_, cmdline_config_map_, host_policy_map_,
-        model_name, version, model_config, is_config_provided, &model);
-    is.reset(model.release());
-  } else {
-#ifdef TRITON_ENABLE_ENSEMBLE
-    if (model_info->is_ensemble_) {
-      status = EnsembleModel::Create(
-          server_, model_info->model_path_, version, model_config,
-          is_config_provided, min_compute_capability_, &is);
-      // Complete label provider with label information from involved models
-      // Must be done here because involved models may not be able to
-      // obtained from server because this may happen during server
-      // initialization.
-      if (status.IsOk()) {
-        std::set<std::string> no_label_outputs;
-        const auto& label_provider = is->GetLabelProvider();
-        for (const auto& output : model_config.output()) {
-          if (label_provider->GetLabel(output.name(), 0).empty()) {
-            no_label_outputs.emplace(output.name());
-          }
-        }
-        for (const auto& element : model_config.ensemble_scheduling().step()) {
-          for (const auto& pair : element.output_map()) {
-            // Found model that produce one of the missing output
-            if (no_label_outputs.find(pair.second) != no_label_outputs.end()) {
-              std::shared_ptr<Model> model;
-              // Safe to obtain model because the ensemble can't be loaded
-              // until the involved models are ready
-              GetModel(element.model_name(), element.model_version(), &model);
-              label_provider->AddLabels(
-                  pair.second,
-                  model->GetLabelProvider()->GetLabels(pair.first));
-            }
-          }
-        }
-      }
-    } else
-#endif  // TRITON_ENABLE_ENSEMBLE
-    {
-      status = Status(
-          Status::Code::INVALID_ARG,
-          "unknown platform '" + model_config.platform() + "'");
-    }
-  }
-
-  std::lock_guard<std::mutex> lock(model_info->mtx_);
-  if (status.IsOk()) {
-    // [FIXME] better way to manage agent model lifecycle
-    // Let the deleter also holds a shared pointer copy of agent model list,
-    // because the reference in ModelInfo can be cleared before the Model object
-    // is destroyed, and we want agent model to be valid for receiving
-    // UNLOAD_COMPLETE signal (see ~TritonRepoAgentModelList for detail)
-    auto agent_model_list = model_info->agent_model_list_;
-    model_info->model_.reset(
-        is.release(), ModelDeleter([this, model_name, version, model_info,
-                                    agent_model_list]() mutable {
-          LOG_VERBOSE(2) << "OnDestroy callback() '" << model_name
-                         << "' version " << version;
-          LOG_INFO << "successfully unloaded '" << model_name << "' version "
-                   << version;
-          // Update model state as it is fully unloaded
-          {
-            std::lock_guard<std::mutex> lock(model_info->mtx_);
-            model_info->state_ = ModelReadyState::UNAVAILABLE;
-            model_info->state_reason_ = "unloaded";
-          }
-
-          // Check if the model info is in background, if so, remove from the
-          // map
-          std::lock_guard<std::mutex> lk(this->map_mtx_);
-          auto it = this->background_models_.find((uintptr_t)model_info);
-          if (it != this->background_models_.end()) {
-            this->background_models_.erase(it);
-          }
-        }));
-  } else {
-    LOG_ERROR << "failed to load '" << model_name << "' version " << version
-              << ": " << status.AsString();
-    model_info->state_ = ModelReadyState::UNAVAILABLE;
-    model_info->state_reason_ = status.AsString();
-  }
-}
-
-void
-ModelLifeCycle::OnLoadComplete(
-    const std::string& model_name, const int64_t version, ModelInfo* model_info,
-    std::function<void(Status)> OnComplete,
-    std::shared_ptr<LoadTracker> load_tracker)
-{
-  std::lock_guard<std::mutex> tracker_lock(load_tracker->mtx_);
-  ++load_tracker->completed_version_cnt_;
-  load_tracker->load_set_[version] = model_info;
-  // Version will not be marked ready until all versions are
-  // ready, this simplify the unloading when one version fails to load as
-  // all other versions won't have inflight requests
-  if (model_info->state_ != ModelReadyState::LOADING) {
-    load_tracker->load_failed_ = true;
-    load_tracker->reason_ +=
-        ("version " + std::to_string(version) + " is at " +
-         ModelReadyStateString(model_info->state_) +
-         " state: " + model_info->state_reason_ + ";");
-  }
-  // Check if all versions are completed and finish the load
-  if (load_tracker->completed_version_cnt_ ==
-      load_tracker->affected_version_cnt_) {
-    // hold 'map_mtx_' as there will be change onto the model info map
-    std::lock_guard<std::mutex> map_lock(map_mtx_);
-    auto it = map_.find(model_name);
-    // Check if the load is the latest frontground action on the model
-    for (const auto& version_info : it->second) {
-      if (version_info.second->last_update_ns_ >
-          load_tracker->last_update_ns_) {
-        load_tracker->load_failed_ = true;
-        load_tracker->reason_ =
-            "Newer operation has been applied to the model lifecycle, current "
-            "load operation is out-dated.";
-        break;
-      }
-    }
-
-    if (load_tracker->load_failed_) {
-      // Move agent list out of ModelInfo as it needs to be invoked
-      // after all ModelInfos are reset
-      std::shared_ptr<TritonRepoAgentModelList> lagent_list;
-      if (model_info->agent_model_list_) {
-        lagent_list = std::move(model_info->agent_model_list_);
-      }
-      // If any of the versions fails to load, abort the load and unload
-      // all newly loaded versions
-      for (auto& loaded : load_tracker->load_set_) {
-        // Unload directly, the object is being managed either in frontground
-        // or background
-        std::lock_guard<std::mutex> lock(loaded.second->mtx_);
-        if (loaded.second->model_ != nullptr) {
-          loaded.second->Release();
-        }
-      }
-
-      if (lagent_list) {
-        auto status =
-            lagent_list->InvokeAgentModels(TRITONREPOAGENT_ACTION_LOAD_FAIL);
-        if (!status.IsOk()) {
-          LOG_ERROR << "Agent model returns error on "
-                       "TRITONREPOAGENT_ACTION_LOAD_FAIL: "
-                    << status.AsString();
-        }
-      }
-    } else {
-      // Unload any previous loaded versions that are still available
-      for (auto& version_info : it->second) {
-        auto& mi = version_info.second;
-        std::lock_guard<std::mutex> info_lk(mi->mtx_);
-        if ((mi->state_ == ModelReadyState::READY) &&
-            (mi->last_update_ns_ < load_tracker->last_update_ns_)) {
-          if (mi->agent_model_list_ != nullptr) {
-            auto status = mi->agent_model_list_->InvokeAgentModels(
-                TRITONREPOAGENT_ACTION_UNLOAD);
-            if (!status.IsOk()) {
-              LOG_ERROR << "Agent model returns error on "
-                           "TRITONREPOAGENT_ACTION_UNLOAD: "
-                        << status.AsString();
-            }
-          }
-
-          mi->Release();
-        }
-      }
-
-      // Mark current versions ready and track info in foreground
-      for (auto& loaded : load_tracker->load_set_) {
-        std::lock_guard<std::mutex> curr_info_lk(loaded.second->mtx_);
-        loaded.second->state_ = ModelReadyState::READY;
-        model_info->state_reason_.clear();
-        LOG_INFO << "successfully loaded '" << model_name << "' version "
-                 << version;
-
-        auto bit = background_models_.find((uintptr_t)loaded.second);
-        // Check if the version model is loaded in background, if so,
-        // replace and unload the current serving version
-        if (bit != background_models_.end()) {
-          auto vit = it->second.find(loaded.first);
-
-          // Need to lock the previous model info for in case the model is
-          // loading / unloading, this ensure the model state is consistent
-          // even when the load / unload is completed.
-          std::lock_guard<std::mutex> prev_info_lk(vit->second->mtx_);
-
-          // swap previous info into local unique pointer
-          auto linfo = std::move(bit->second);
-          vit->second.swap(linfo);
-          background_models_.erase(bit);
-
-          // if previous info is under change, put into 'background_models_'
-          if ((linfo->state_ == ModelReadyState::LOADING) ||
-              (linfo->state_ == ModelReadyState::UNLOADING)) {
-            ModelInfo* key = linfo.get();
-            background_models_[(uintptr_t)key] = std::move(linfo);
-          }
-        }
-      }
-      if (model_info->agent_model_list_) {
-        auto status = model_info->agent_model_list_->InvokeAgentModels(
-            TRITONREPOAGENT_ACTION_LOAD_COMPLETE);
-        if (!status.IsOk()) {
-          LOG_ERROR << "Agent model returns error on "
-                       "TRITONREPOAGENT_ACTION_LOAD_COMPLETE: "
-                    << status.AsString();
-        }
-      }
-    }
-    if (OnComplete != nullptr) {
-      OnComplete(
-          load_tracker->load_failed_
-              ? Status(Status::Code::INVALID_ARG, load_tracker->reason_)
-              : Status::Success);
-    }
-  }
-}
-
-}}  // namespace triton::core
diff --git a/3rdparty/core-r22.12/src/model_lifecycle.h b/3rdparty/core-r22.12/src/model_lifecycle.h
deleted file mode 100644
index e32b607219bbc155ab1332bb1ffaa0ed2971316d..0000000000000000000000000000000000000000
--- a/3rdparty/core-r22.12/src/model_lifecycle.h
+++ /dev/null
@@ -1,324 +0,0 @@
-// Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-#pragma once
-
-#include <functional>
-#include <map>
-#include <mutex>
-#include "infer_parameter.h"
-#include "model_config.pb.h"
-#include "repo_agent.h"
-#include "status.h"
-#include "triton/common/model_config.h"
-#include "triton/common/thread_pool.h"
-
-namespace triton { namespace core {
-
-struct ModelLifeCycleOptions {
-  explicit ModelLifeCycleOptions(
-      const double min_compute_capability,
-      const triton::common::BackendCmdlineConfigMap& backend_cmdline_config_map,
-      const triton::common::HostPolicyCmdlineConfigMap& host_policy_map,
-      const unsigned int model_load_thread_count)
-      : min_compute_capability_(min_compute_capability),
-        backend_cmdline_config_map_(backend_cmdline_config_map),
-        host_policy_map_(host_policy_map),
-        model_load_thread_count_(model_load_thread_count)
-  {
-  }
-  // The minimum supported CUDA compute capability.
-  const double min_compute_capability_;
-  // The backend configuration settings specified on the command-line
-  const triton::common::BackendCmdlineConfigMap& backend_cmdline_config_map_;
-  // The host policy setting used when loading models.
-  const triton::common::HostPolicyCmdlineConfigMap& host_policy_map_;
-  // Number of the threads to use for concurrently loading models
-  const unsigned int model_load_thread_count_;
-};
-
-
-/// Readiness status for models.
-enum class ModelReadyState {
-  // The model is in an unknown state. The model is not available for
-  // inferencing.
-  UNKNOWN,
-
-  // The model is ready and available for inferencing.
-  READY,
-
-  // The model is unavailable, indicating that the model failed to
-  // load or has been implicitly or explicitly unloaded. The model is
-  // not available for inferencing.
-  UNAVAILABLE,
-
-  // The model is being loaded by the inference server. The model is
-  // not available for inferencing.
-  LOADING,
-
-  // The model is being unloaded by the inference server. The model is
-  // not available for inferencing.
-  UNLOADING
-};
-
-/// Get the string representation for a ModelReadyState
-const std::string& ModelReadyStateString(ModelReadyState state);
-
-using VersionStateMap =
-    std::map<int64_t, std::pair<ModelReadyState, std::string>>;
-using ModelStateMap = std::map<std::string, VersionStateMap>;
-
-// Helper class to manage the lifecycle of a list of associated agent models
-class TritonRepoAgentModelList {
- public:
-  TritonRepoAgentModelList()
-      : last_action_type_(TRITONREPOAGENT_ACTION_UNLOAD_COMPLETE){};
-  ~TritonRepoAgentModelList()
-  {
-    // Using destructor to finish the unload lifecycle without
-    // explicitly managing the last step in ModelLifecycle.
-    if (last_action_type_ == TRITONREPOAGENT_ACTION_UNLOAD) {
-      InvokeAgentModels(TRITONREPOAGENT_ACTION_UNLOAD_COMPLETE);
-    }
-  }
-  Status AddAgentModel(std::unique_ptr<TritonRepoAgentModel>&& agent_model)
-  {
-    agent_models_.emplace_back(std::move(agent_model));
-    return Status::Success;
-  }
-
-  size_t Size() { return agent_models_.size(); }
-
-  TritonRepoAgentModel* Back() { return agent_models_.back().get(); }
-
-  Status InvokeAgentModels(const TRITONREPOAGENT_ActionType action_type)
-  {
-    // Special handling for the current model lifecycle implementation,
-    // the repo agent may be asked to perform UNLOAD action multiple times,
-    // and the requests after the first should be ignored.
-    const bool first_unload =
-        (action_type == TRITONREPOAGENT_ACTION_UNLOAD) &&
-        (last_action_type_ != TRITONREPOAGENT_ACTION_UNLOAD);
-    if (!first_unload) {
-      return Status::Success;
-    }
-
-    last_action_type_ = action_type;
-    switch (action_type) {
-      case TRITONREPOAGENT_ACTION_LOAD:
-      case TRITONREPOAGENT_ACTION_UNLOAD: {
-        for (size_t idx = 0; idx < agent_models_.size(); ++idx) {
-          RETURN_IF_ERROR(agent_models_[idx]->InvokeAgent(action_type));
-        }
-        break;
-      }
-      case TRITONREPOAGENT_ACTION_LOAD_COMPLETE:
-      case TRITONREPOAGENT_ACTION_LOAD_FAIL:
-      case TRITONREPOAGENT_ACTION_UNLOAD_COMPLETE: {
-        // reverse order
-        for (size_t one_pass_idx = agent_models_.size(); one_pass_idx > 0;
-             --one_pass_idx) {
-          RETURN_IF_ERROR(
-              agent_models_[one_pass_idx - 1]->InvokeAgent(action_type));
-        }
-        break;
-      }
-    }
-    return Status::Success;
-  }
-
- private:
-  DISALLOW_COPY_AND_ASSIGN(TritonRepoAgentModelList);
-
-  std::vector<std::unique_ptr<TritonRepoAgentModel>> agent_models_;
-  TRITONREPOAGENT_ActionType last_action_type_;
-};
-
-class InferenceServer;
-class Model;
-
-class ModelLifeCycle {
- public:
-  static Status Create(
-      InferenceServer* server, const ModelLifeCycleOptions& options,
-      std::unique_ptr<ModelLifeCycle>* life_cycle);
-
-  ~ModelLifeCycle()
-  {
-    // Explicitly clean up thread pool first to clean up any pending callbacks
-    // that may modify model lifecycle members
-    load_pool_.reset();
-    map_.clear();
-  }
-
-  // Start loading model with specified versions asynchronously.
-  // All versions that are being served will be unloaded only after
-  // the load is finished sucessfully.
-  Status AsyncLoad(
-      const std::string& model_name, const std::string& model_path,
-      const inference::ModelConfig& model_config, const bool is_config_provided,
-      const std::shared_ptr<TritonRepoAgentModelList>& agent_model_list,
-      std::function<void(Status)>&& OnComplete);
-
-  // Unload model asynchronously.
-  Status AsyncUnload(const std::string& model_name);
-
-  // Get specified version of the model. Latest ready version will
-  // be retrieved if 'version' is -1. Return error if the version specified is
-  // not found or it is not ready.
-  Status GetModel(
-      const std::string& model_name, const int64_t version,
-      std::shared_ptr<Model>* model);
-
-  // Get the ModelStateMap representation of the live models. A model is
-  // live if at least one of the versions is not unknown nor unavailable.
-  // If 'strict_readiness' is true, a model is only live if
-  // at least one of the versions is ready.
-  const ModelStateMap LiveModelStates(bool strict_readiness = false);
-
-  // Get the ModelStateMap representation of the models.
-  const ModelStateMap ModelStates();
-
-  // Get the VersionStateMap representation of the specified model.
-  const VersionStateMap VersionStates(const std::string& model_name);
-
-  // Get the state of a specific model version.
-  Status ModelState(
-      const std::string& model_name, const int64_t model_version,
-      ModelReadyState* state);
-
-  // Instruct the model to stop accepting new inference requests.
-  Status StopAllModels();
-
-  // Return the number of in-flight inference if any, model versions
-  // that don't have in-flight inferences will not be included.
-  const std::set<std::tuple<std::string, int64_t, size_t>> InflightStatus();
-
- private:
-  struct ModelInfo {
-    ModelInfo(
-        const std::string& model_path,
-        const inference::ModelConfig& model_config,
-        const uint64_t last_update_ns)
-        : model_config_(model_config), model_path_(model_path),
-#ifdef TRITON_ENABLE_ENSEMBLE
-          is_ensemble_(model_config.platform() == kEnsemblePlatform),
-#else
-          is_ensemble_(false),
-#endif  // TRITON_ENABLE_ENSEMBLE
-          last_update_ns_(last_update_ns), state_(ModelReadyState::UNKNOWN)
-    {
-    }
-
-    // Release the flyweight in ModelInfo object, reflect as 'UNLOADING' in
-    // model state. Note that 'mtx_' should be acquired before invoking this
-    // function to prevent possible data race.
-    void Release()
-    {
-      state_ = ModelReadyState::UNLOADING;
-      state_reason_.clear();
-      agent_model_list_.reset();
-      model_.reset();
-    }
-
-    const inference::ModelConfig model_config_;
-    const std::string model_path_;
-    const bool is_ensemble_;
-
-    std::mutex mtx_;
-
-    uint64_t last_update_ns_;
-
-    ModelReadyState state_;
-    std::string state_reason_;
-
-    // flyweight
-    std::shared_ptr<TritonRepoAgentModelList> agent_model_list_;
-    std::shared_ptr<Model> model_;
-  };
-
-  struct LoadTracker {
-    LoadTracker(
-        const size_t affected_version_cnt, const uint64_t last_update_ns)
-        : last_update_ns_(last_update_ns),
-          affected_version_cnt_(affected_version_cnt), load_failed_(false),
-          completed_version_cnt_(0)
-    {
-    }
-
-    const uint64_t last_update_ns_;
-    const size_t affected_version_cnt_;
-
-    std::mutex mtx_;
-
-    bool load_failed_;
-    std::string reason_;
-    size_t completed_version_cnt_;
-    std::map<int64_t, ModelInfo*> load_set_;
-  };
-
-  ModelLifeCycle(InferenceServer* server, const ModelLifeCycleOptions& options)
-      : server_(server),
-        min_compute_capability_(options.min_compute_capability_),
-        cmdline_config_map_(options.backend_cmdline_config_map_),
-        host_policy_map_(options.host_policy_map_)
-  {
-    load_pool_.reset(new triton::common::ThreadPool(
-        std::max(1u, options.model_load_thread_count_)));
-  }
-
-  void CreateModel(
-      const std::string& model_name, const int64_t version,
-      ModelInfo* model_info, const bool is_config_provided);
-  // Callback function template for model load.
-  // 'OnComplete' needs to be passed by value for now as there can be
-  // multiple versions to be loaded and each holds a copy of
-  // the 'OnComplete' callback.
-  void OnLoadComplete(
-      const std::string& model_name, const int64_t version,
-      ModelInfo* model_info, std::function<void(Status)> OnComplete,
-      std::shared_ptr<LoadTracker> load_tracker);
-
-
-  // Mutex for 'map_' and 'background_models_'
-  std::mutex map_mtx_;
-
-  using VersionMap = std::map<int64_t, std::unique_ptr<ModelInfo>>;
-  using ModelMap = std::map<std::string, VersionMap>;
-  ModelMap map_;
-  // Models that are being loaded / unloaded in background
-  std::map<uintptr_t, std::unique_ptr<ModelInfo>> background_models_;
-
-  InferenceServer* server_;
-  const double min_compute_capability_;
-  const triton::common::BackendCmdlineConfigMap cmdline_config_map_;
-  const triton::common::HostPolicyCmdlineConfigMap host_policy_map_;
-
-  // Fixed-size thread pool to load models at specified concurrency
-  std::unique_ptr<triton::common::ThreadPool> load_pool_;
-};
-
-}}  // namespace triton::core
diff --git a/3rdparty/core-r22.12/src/model_repository_manager.cc b/3rdparty/core-r22.12/src/model_repository_manager.cc
deleted file mode 100644
index 7a8f2b5ca3d67b781a5551e72157e18fadaf8380..0000000000000000000000000000000000000000
--- a/3rdparty/core-r22.12/src/model_repository_manager.cc
+++ /dev/null
@@ -1,1602 +0,0 @@
-// Copyright 2018-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-
-#include "model_repository_manager.h"
-
-#include <algorithm>
-#include <deque>
-#include <future>
-#include <stdexcept>
-#include <thread>
-#include "constants.h"
-#include "ensemble_utils.h"
-#include "filesystem.h"
-#include "model.h"
-#include "model_config_utils.h"
-#include "triton/common/logging.h"
-
-#include "backend_model.h"
-#ifdef TRITON_ENABLE_ENSEMBLE
-#include "ensemble_model.h"
-#endif  // TRITON_ENABLE_ENSEMBLE
-
-namespace triton { namespace core {
-
-namespace {
-
-static std::string file_prefix = "file:";
-
-// Internal repo agent used for model file override
-class LocalizeRepoAgent : public TritonRepoAgent {
- public:
-  LocalizeRepoAgent()
-      : TritonRepoAgent("ModelRepositoryManager::LocalizeRepoAgent")
-  {
-    // Callbacks below interact with TritonRepoAgentModel directly knowing that
-    // it is the internal implementation of TRITONREPOAGENT_AgentModel
-    model_action_fn_ = [](TRITONREPOAGENT_Agent* agent,
-                          TRITONREPOAGENT_AgentModel* model,
-                          const TRITONREPOAGENT_ActionType action_type)
-        -> TRITONSERVER_Error* {
-      auto agent_model = reinterpret_cast<TritonRepoAgentModel*>(model);
-      switch (action_type) {
-        case TRITONREPOAGENT_ACTION_LOAD: {
-          // localize the override files for model loading,
-          // as currently the model is expected to load from local directory
-          const char* temp_dir_cstr = nullptr;
-          RETURN_TRITONSERVER_ERROR_IF_ERROR(
-              agent_model->AcquireMutableLocation(
-                  TRITONREPOAGENT_ARTIFACT_FILESYSTEM, &temp_dir_cstr));
-          const std::string temp_dir = temp_dir_cstr;
-          const auto& files =
-              *reinterpret_cast<std::vector<const InferenceParameter*>*>(
-                  agent_model->State());
-          bool found_config = false;
-          for (const auto& file : files) {
-            if (file->Name() == "config") {
-              if (file->Type() != TRITONSERVER_PARAMETER_STRING) {
-                return TRITONSERVER_ErrorNew(
-                    TRITONSERVER_ERROR_INVALID_ARG,
-                    "Config parameter 'config' must have string type for its "
-                    "value");
-              }
-              inference::ModelConfig config;
-              RETURN_TRITONSERVER_ERROR_IF_ERROR(JsonToModelConfig(
-                  file->ValueString(), 1 /* config_version */, &config));
-              RETURN_TRITONSERVER_ERROR_IF_ERROR(WriteTextProto(
-                  JoinPath({temp_dir, kModelConfigPbTxt}), config));
-              found_config = true;
-            } else if (file->Name().rfind(file_prefix, 0) == 0) {
-              if (file->Type() != TRITONSERVER_PARAMETER_BYTES) {
-                return TRITONSERVER_ErrorNew(
-                    TRITONSERVER_ERROR_INVALID_ARG,
-                    (std::string("File parameter '") + file->Name() +
-                     "' must have bytes type for its value")
-                        .c_str());
-              }
-
-              // Save model file to the instructed directory
-              // mkdir
-              const std::string file_path =
-                  JoinPath({temp_dir, file->Name().substr(file_prefix.size())});
-              const std::string dir = DirName(file_path);
-              bool dir_exist = false;
-              RETURN_TRITONSERVER_ERROR_IF_ERROR(FileExists(dir, &dir_exist));
-              if (dir_exist) {
-                bool is_dir = false;
-                RETURN_TRITONSERVER_ERROR_IF_ERROR(IsDirectory(dir, &is_dir));
-                if (!is_dir) {
-                  return TRITONSERVER_ErrorNew(
-                      TRITONSERVER_ERROR_INVALID_ARG,
-                      (std::string("Invalid file parameter '") + file->Name() +
-                       "', directory has been created as a file")
-                          .c_str());
-                }
-              } else {
-                RETURN_TRITONSERVER_ERROR_IF_ERROR(
-                    MakeDirectory(dir, true /* recursive */));
-              }
-
-              // write
-              RETURN_TRITONSERVER_ERROR_IF_ERROR(WriteBinaryFile(
-                  file_path,
-                  reinterpret_cast<const char*>(file->ValuePointer()),
-                  file->ValueByteSize()));
-            }
-          }
-          if (!found_config) {
-            return TRITONSERVER_ErrorNew(
-                TRITONSERVER_ERROR_INVALID_ARG,
-                "Load parameter 'config' must be specified for model file "
-                "override");
-          }
-          // Commit the temporary directory
-          RETURN_TRITONSERVER_ERROR_IF_ERROR(agent_model->SetLocation(
-              TRITONREPOAGENT_ARTIFACT_FILESYSTEM, temp_dir_cstr));
-          break;
-        }
-        default:
-          break;
-      }
-      return nullptr;  // success
-    };
-
-    model_fini_fn_ =
-        [](TRITONREPOAGENT_Agent* agent,
-           TRITONREPOAGENT_AgentModel* model) -> TRITONSERVER_Error* {
-      auto agent_model = reinterpret_cast<TritonRepoAgentModel*>(model);
-      RETURN_TRITONSERVER_ERROR_IF_ERROR(agent_model->DeleteMutableLocation());
-      return nullptr;  // success
-    };
-  }
-};
-
-Status
-CreateAgentModelListWithLoadAction(
-    const inference::ModelConfig& original_model_config,
-    const std::string& original_model_path,
-    std::shared_ptr<TritonRepoAgentModelList>* agent_model_list)
-{
-  if (original_model_config.has_model_repository_agents()) {
-    // Trick to append user specified repo agent on top of internal ones
-    std::shared_ptr<TritonRepoAgentModelList> lagent_model_list;
-    if (*agent_model_list != nullptr) {
-      lagent_model_list = std::move(*agent_model_list);
-    } else {
-      lagent_model_list.reset(new TritonRepoAgentModelList());
-    }
-
-    FileSystemType filesystem_type;
-    RETURN_IF_ERROR(GetFileSystemType(original_model_path, &filesystem_type));
-    TRITONREPOAGENT_ArtifactType artifact_type =
-        TRITONREPOAGENT_ARTIFACT_FILESYSTEM;
-    if (filesystem_type != FileSystemType::LOCAL) {
-      artifact_type = TRITONREPOAGENT_ARTIFACT_REMOTE_FILESYSTEM;
-    }
-    const char* location = original_model_path.c_str();
-    inference::ModelConfig model_config = original_model_config;
-    for (const auto& agent_config :
-         original_model_config.model_repository_agents().agents()) {
-      std::shared_ptr<TritonRepoAgent> agent;
-      RETURN_IF_ERROR(
-          TritonRepoAgentManager::CreateAgent(agent_config.name(), &agent));
-      TritonRepoAgent::Parameters agent_params;
-      for (const auto& parameter : agent_config.parameters()) {
-        agent_params.emplace_back(parameter.first, parameter.second);
-      }
-      std::unique_ptr<TritonRepoAgentModel> agent_model;
-      if (lagent_model_list->Size() != 0) {
-        lagent_model_list->Back()->Location(&artifact_type, &location);
-        const auto config_path = JoinPath({location, kModelConfigPbTxt});
-        if (!ReadTextProto(config_path, &model_config).IsOk()) {
-          model_config.Clear();
-        }
-      }
-      RETURN_IF_ERROR(TritonRepoAgentModel::Create(
-          artifact_type, location, model_config, agent, agent_params,
-          &agent_model));
-      RETURN_IF_ERROR(agent_model->InvokeAgent(TRITONREPOAGENT_ACTION_LOAD));
-      lagent_model_list->AddAgentModel(std::move(agent_model));
-    }
-    *agent_model_list = std::move(lagent_model_list);
-  }
-  return Status::Success;
-}
-
-int64_t
-GetModifiedTime(const std::string& path)
-{
-  // If there is an error in any step the fall-back default
-  // modification time is 0. This means that in error cases 'path'
-  // will show as not modified. This is the safe fall-back to avoid
-  // assuming a model is constantly being modified.
-  bool path_is_dir;
-  Status status = IsDirectory(path, &path_is_dir);
-  if (!status.IsOk()) {
-    LOG_ERROR << "Failed to determine modification time for '" << path
-              << "': " << status.AsString();
-    return 0;
-  }
-
-  // If 'path' is a file return its mtime. Otherwise, using the modification
-  // time of the directory as baseline in case of file deletion
-  int64_t mtime = 0;
-  status = FileModificationTime(path, &mtime);
-  if (!status.IsOk()) {
-    LOG_ERROR << "Failed to determine modification time for '" << path
-              << "': " << status.AsString();
-    return 0;
-  }
-  if (!path_is_dir) {
-    return mtime;
-  }
-
-  // 'path' is a directory. Return the most recent mtime of the
-  // contents of the directory.
-  std::set<std::string> contents;
-  status = GetDirectoryContents(path, &contents);
-  if (!status.IsOk()) {
-    LOG_ERROR << "Failed to determine modification time for '" << path
-              << "': " << status.AsString();
-    return 0;
-  }
-
-  for (const auto& child : contents) {
-    const auto full_path = JoinPath({path, child});
-    mtime = std::max(mtime, GetModifiedTime(full_path));
-  }
-
-  return mtime;
-}
-// Return true if any file in the subdirectory root at 'path' has been
-// modified more recently than 'last'. Return the most-recent modified
-// time in 'last'.
-bool
-IsModified(const std::string& path, int64_t* last_ns)
-{
-  const int64_t repo_ns = GetModifiedTime(path);
-  bool modified = repo_ns > *last_ns;
-  *last_ns = repo_ns;
-  return modified;
-}
-
-}  // namespace
-
-struct ModelRepositoryManager::ModelInfo {
-  ModelInfo(
-      const int64_t mtime_nsec, const int64_t prev_mtime_ns,
-      const std::string& model_path)
-      : mtime_nsec_(mtime_nsec), prev_mtime_ns_(prev_mtime_ns),
-        explicitly_load_(true), model_path_(model_path),
-        is_config_provided_(false)
-  {
-  }
-  ModelInfo()
-      : mtime_nsec_(0), prev_mtime_ns_(0), explicitly_load_(true),
-        is_config_provided_(false)
-  {
-  }
-  int64_t mtime_nsec_;
-  int64_t prev_mtime_ns_;
-  bool explicitly_load_;
-  inference::ModelConfig model_config_;
-  std::string model_path_;
-  // Temporary location to hold agent model list before creating the model
-  // the ownership must transfer to ModelLifeCycle to ensure
-  // the agent model life cycle is handled properly.
-  std::shared_ptr<TritonRepoAgentModelList> agent_model_list_;
-  bool is_config_provided_;
-};
-
-ModelRepositoryManager::ModelRepositoryManager(
-    const std::set<std::string>& repository_paths, const bool autofill,
-    const bool polling_enabled, const bool model_control_enabled,
-    const double min_compute_capability,
-    std::unique_ptr<ModelLifeCycle> life_cycle)
-    : repository_paths_(repository_paths), autofill_(autofill),
-      polling_enabled_(polling_enabled),
-      model_control_enabled_(model_control_enabled),
-      min_compute_capability_(min_compute_capability),
-      model_life_cycle_(std::move(life_cycle))
-{
-}
-
-ModelRepositoryManager::~ModelRepositoryManager() {}
-
-Status
-ModelRepositoryManager::Create(
-    InferenceServer* server, const std::string& server_version,
-    const std::set<std::string>& repository_paths,
-    const std::set<std::string>& startup_models, const bool strict_model_config,
-    const bool polling_enabled, const bool model_control_enabled,
-    const ModelLifeCycleOptions& life_cycle_options,
-    std::unique_ptr<ModelRepositoryManager>* model_repository_manager)
-{
-  // The rest only matters if repository path is valid directory
-  for (const auto& path : repository_paths) {
-    bool path_is_dir;
-    RETURN_IF_ERROR(IsDirectory(path, &path_is_dir));
-    if (!path_is_dir) {
-      return Status(
-          Status::Code::INVALID_ARG,
-          "repository path is not a valid directory");
-    }
-  }
-
-  if (polling_enabled && model_control_enabled) {
-    return Status(
-        Status::Code::INVALID_ARG,
-        "cannot enable both polling and explicit model control");
-  }
-
-  std::unique_ptr<ModelLifeCycle> life_cycle;
-  RETURN_IF_ERROR(
-      ModelLifeCycle::Create(server, life_cycle_options, &life_cycle));
-
-  // Not setting the smart pointer directly to simplify clean up
-  std::unique_ptr<ModelRepositoryManager> local_manager(
-      new ModelRepositoryManager(
-          repository_paths, !strict_model_config, polling_enabled,
-          model_control_enabled, life_cycle_options.min_compute_capability_,
-          std::move(life_cycle)));
-  *model_repository_manager = std::move(local_manager);
-
-  // Support loading all models on startup in explicit model control mode with
-  // special startup_model name "*". This does not imply support for pattern
-  // matching in model names.
-  bool load_all_models_on_startup = false;
-  if ((startup_models.find("*") != startup_models.end()) &&
-      model_control_enabled) {
-    if (startup_models.size() > 1) {
-      return Status(
-          Status::Code::INVALID_ARG,
-          "Wildcard model name '*' must be the ONLY startup model "
-          "if specified at all.");
-    }
-
-    load_all_models_on_startup = true;
-  }
-
-  bool all_models_polled = true;
-  if (!model_control_enabled || load_all_models_on_startup) {
-    // only error happens before model load / unload will be return
-    // model loading / unloading error will be printed but ignored
-    RETURN_IF_ERROR(
-        (*model_repository_manager)->PollAndUpdateInternal(&all_models_polled));
-  } else {
-    // Load each specified startup_model
-    std::unordered_map<std::string, std::vector<const InferenceParameter*>>
-        models;
-    for (const auto& model_name : startup_models) {
-      models[model_name];
-    }
-    RETURN_IF_ERROR(
-        (*model_repository_manager)
-            ->LoadUnloadModels(
-                models, ActionType::LOAD, false, &all_models_polled));
-  }
-
-
-  if (!all_models_polled) {
-    return Status(Status::Code::INTERNAL, "failed to load all models");
-  }
-  // Some models may failed to be loaded after model manager is created,
-  // return proper error and let function caller decide whether to proceed.
-  for (const auto& model : (*model_repository_manager)->infos_) {
-    const auto version_states =
-        (*model_repository_manager)
-            ->model_life_cycle_->VersionStates(model.first);
-    // Return general error message, detail of each model's loading state
-    // is logged separately.
-    if (version_states.empty()) {
-      return Status(Status::Code::INTERNAL, "failed to load all models");
-    }
-    for (const auto& state : version_states) {
-      if (state.second.first != ModelReadyState::READY) {
-        return Status(Status::Code::INTERNAL, "failed to load all models");
-      }
-    }
-  }
-
-  return Status::Success;
-}
-
-Status
-ModelRepositoryManager::PollAndUpdate()
-{
-  if (!polling_enabled_) {
-    return Status(Status::Code::UNAVAILABLE, "polling is disabled");
-  }
-
-  bool all_models_polled;
-  return PollAndUpdateInternal(&all_models_polled);
-}
-
-Status
-ModelRepositoryManager::PollAndUpdateInternal(bool* all_models_polled)
-{
-  // Serialize all operations that change model state
-  std::lock_guard<std::mutex> lock(poll_mu_);
-
-  std::set<std::string> added, deleted, modified, unmodified;
-
-  // We don't modify 'infos_' in place to minimize how long we need to
-  // hold the lock and also prevent any partial changes to do an error
-  // during processing.
-  ModelInfoMap new_infos;
-
-  // Each subdirectory of repository path is a model directory from
-  // which we read the model configuration.
-  std::unordered_map<std::string, std::vector<const InferenceParameter*>>
-      subdirs;
-  RETURN_IF_ERROR(Poll(
-      subdirs, &added, &deleted, &modified, &unmodified, &new_infos,
-      all_models_polled));
-
-  // Anything in 'infos_' that is not in "added", "modified", or
-  // "unmodified" is deleted.
-  for (const auto& pr : infos_) {
-    if ((added.find(pr.first) == added.end()) &&
-        (modified.find(pr.first) == modified.end()) &&
-        (unmodified.find(pr.first) == unmodified.end())) {
-      deleted.insert(pr.first);
-    }
-  }
-
-  // Nothing to do if no model adds, deletes or modifies.
-  if (added.empty() && deleted.empty() && modified.empty()) {
-    return Status::Success;
-  }
-
-  infos_.swap(new_infos);
-
-  UpdateDependencyGraph(added, deleted, modified);
-
-  for (const auto& name : deleted) {
-    model_life_cycle_->AsyncUnload(name);
-  }
-
-  // model loading / unloading error will be printed but ignored
-  LoadModelByDependency();
-
-  return Status::Success;
-}
-
-std::map<std::string, Status>
-ModelRepositoryManager::LoadModelByDependency()
-{
-  std::map<std::string, Status> res;
-  struct ModelState {
-    ModelState(DependencyNode* node) : node_(node), status_(Status::Success) {}
-    DependencyNode* node_;
-    Status status_;
-    std::promise<void> ready_;
-  };
-  NodeSet loaded_models;
-  auto set_pair = ModelsToLoadUnload(loaded_models);
-  // Loop until all model are loaded / unloaded
-  while ((!set_pair.first.empty()) || (!set_pair.second.empty())) {
-    loaded_models.clear();
-    // Unload invalid models first
-    for (auto& invalid_model : set_pair.second) {
-      model_life_cycle_->AsyncUnload(invalid_model->model_name_);
-      LOG_ERROR << invalid_model->status_.AsString();
-      invalid_model->loaded_versions_ = std::set<int64_t>();
-      loaded_models.emplace(invalid_model);
-    }
-    // load valid models and wait for load results
-    std::vector<std::unique_ptr<ModelState>> model_states;
-    for (auto& valid_model : set_pair.first) {
-      model_states.emplace_back(new ModelState(valid_model));
-      auto model_state = model_states.back().get();
-      const auto itr = infos_.find(valid_model->model_name_);
-      auto status = model_life_cycle_->AsyncLoad(
-          valid_model->model_name_, itr->second->model_path_,
-          valid_model->model_config_, itr->second->is_config_provided_,
-          itr->second->agent_model_list_, [model_state](Status load_status) {
-            model_state->status_ = load_status;
-            model_state->ready_.set_value();
-          });
-      if (!status.IsOk()) {
-        model_state->status_ = status;
-        model_state->ready_.set_value();
-        LOG_ERROR << "failed to load model '" << valid_model->model_name_
-                  << "': " << status.Message();
-      }
-      loaded_models.emplace(valid_model);
-    }
-    for (auto& model_state : model_states) {
-      model_state->ready_.get_future().wait();
-      res[model_state->node_->model_name_] = model_state->status_;
-      const auto version_state =
-          model_life_cycle_->VersionStates(model_state->node_->model_name_);
-      model_state->node_->loaded_versions_.clear();
-      for (const auto& vs : version_state) {
-        if (vs.second.first == ModelReadyState::READY) {
-          model_state->node_->loaded_versions_.emplace(vs.first);
-        }
-      }
-      // If the model failed to load, should revert the timestamp to
-      // ensure the next load request will attempt to load the model again
-      // for operation consistency.
-      if (!model_state->status_.IsOk()) {
-        auto& model_info = infos_.find(model_state->node_->model_name_)->second;
-        model_info->mtime_nsec_ = model_info->prev_mtime_ns_;
-      }
-    }
-    set_pair = ModelsToLoadUnload(loaded_models);
-  }
-  // Clear temporary stored agent model list after all loads are triggerred
-  for (auto& info : infos_) {
-    info.second->agent_model_list_.reset();
-  }
-  return res;
-}
-
-Status
-ModelRepositoryManager::LoadUnloadModel(
-    const std::unordered_map<
-        std::string, std::vector<const InferenceParameter*>>& models,
-    const ActionType type, const bool unload_dependents)
-{
-  if (!model_control_enabled_) {
-    return Status(
-        Status::Code::UNAVAILABLE,
-        "explicit model load / unload is not allowed if polling is enabled");
-  }
-
-  if (models.size() > 1) {
-    return Status(
-        Status::Code::UNSUPPORTED,
-        "explicit load / unload multiple models is not currently supported");
-  }
-
-  // Serialize all operations that change model state
-  std::lock_guard<std::mutex> lock(poll_mu_);
-
-  bool polled = true;
-  RETURN_IF_ERROR(LoadUnloadModels(models, type, unload_dependents, &polled));
-  // Check if model is loaded / unloaded properly
-  const auto& model_name = models.begin()->first;
-  if (!polled) {
-    return Status(
-        Status::Code::INTERNAL, "failed to load '" + model_name +
-                                    "', failed to poll from model repository");
-  }
-
-  const auto version_states = model_life_cycle_->VersionStates(model_name);
-  if (type == ActionType::LOAD) {
-    if (version_states.empty()) {
-      return Status(
-          Status::Code::INTERNAL,
-          "failed to load '" + model_name + "', no version is available");
-    }
-    auto it = infos_.find(model_name);
-    if (it == infos_.end()) {
-      return Status(
-          Status::Code::INTERNAL,
-          "failed to load '" + model_name +
-              "', failed to poll from model repository");
-    }
-  } else {
-    std::string ready_version_str;
-    for (const auto& version_state : version_states) {
-      if (version_state.second.first == ModelReadyState::READY) {
-        ready_version_str += std::to_string(version_state.first);
-        ready_version_str += ",";
-      }
-    }
-    if (!ready_version_str.empty()) {
-      ready_version_str.pop_back();
-      return Status(
-          Status::Code::INTERNAL,
-          "failed to unload '" + model_name +
-              "', versions that are still available: " + ready_version_str);
-    }
-  }
-
-  return Status::Success;
-}
-
-Status
-ModelRepositoryManager::LoadUnloadModels(
-    const std::unordered_map<
-        std::string, std::vector<const InferenceParameter*>>& models,
-    const ActionType type, const bool unload_dependents,
-    bool* all_models_polled)
-{
-  auto status = Status::Success;
-  *all_models_polled = true;
-  // Update ModelInfo related to file system accordingly
-  std::set<std::string> added, deleted, modified, unmodified;
-  {
-    if (type == ActionType::UNLOAD) {
-      for (const auto& model : models) {
-        deleted.insert(model.first);
-      }
-    }
-    // ActionType::LOAD and in model control mode
-    else {
-      std::set<std::string> checked_models;
-      auto current_models = models;
-      for (const auto& model : models) {
-        checked_models.emplace(model.first);
-      }
-
-      ModelInfoMap new_infos;
-#ifdef TRITON_ENABLE_ENSEMBLE
-      bool first_iteration = true;
-#endif  // TRITON_ENABLE_ENSEMBLE
-      while (!current_models.empty()) {
-        bool polled = true;
-        RETURN_IF_ERROR(Poll(
-            current_models, &added, &deleted, &modified, &unmodified,
-            &new_infos, &polled));
-        *all_models_polled &= polled;
-
-        // More models should be polled if the polled models are ensembles
-        std::unordered_map<std::string, std::vector<const InferenceParameter*>>
-            next_models;
-#ifdef TRITON_ENABLE_ENSEMBLE
-        for (const auto& model : current_models) {
-          auto it = new_infos.find(model.first);
-          // Some models may be marked as deleted and not in 'new_infos'
-          if (it != new_infos.end()) {
-            it->second->explicitly_load_ = first_iteration;
-            const auto& config = it->second->model_config_;
-            if (config.has_ensemble_scheduling()) {
-              for (const auto& step : config.ensemble_scheduling().step()) {
-                bool need_poll =
-                    checked_models.emplace(step.model_name()).second;
-                if (need_poll) {
-                  next_models[step.model_name()];
-                }
-              }
-            }
-          }
-        }
-        first_iteration = false;
-#endif  // TRITON_ENABLE_ENSEMBLE
-        current_models.swap(next_models);
-      }
-
-      // Only update the infos when all validation is completed
-      for (const auto& model_name : added) {
-        auto nitr = new_infos.find(model_name);
-        infos_.emplace(model_name, std::move(nitr->second));
-      }
-      for (const auto& model_name : modified) {
-        auto nitr = new_infos.find(model_name);
-        auto itr = infos_.find(model_name);
-        itr->second = std::move(nitr->second);
-      }
-    }
-  }
-  std::set<std::string> deleted_dependents;
-
-  // Update dependency graph and load
-  UpdateDependencyGraph(
-      added, deleted, modified,
-      unload_dependents ? &deleted_dependents : nullptr);
-
-  // The models are in 'deleted' either when they are asked to be unloaded or
-  // they are not found / are duplicated across all model repositories.
-  // In all cases, should unload them and remove from 'infos_' explicitly.
-  for (const auto& name : (unload_dependents ? deleted_dependents : deleted)) {
-    infos_.erase(name);
-    model_life_cycle_->AsyncUnload(name);
-  }
-
-  // load / unload the models affected, and check the load status of
-  // the requested models
-  const auto& load_status = LoadModelByDependency();
-  if (status.IsOk() && (type == ActionType::LOAD)) {
-    std::string load_error_message = "";
-    for (const auto& model : models) {
-      auto it = load_status.find(model.first);
-      // If 'model.first' not in load status, it means the (re-)load is not
-      // necessary because there is no change in the model's directory
-      if ((it != load_status.end()) && !it->second.IsOk()) {
-        load_error_message +=
-            ("load failed for model '" + model.first +
-             "': " + it->second.Message() + "\n");
-      }
-    }
-    if (!load_error_message.empty()) {
-      status = Status(Status::Code::INVALID_ARG, load_error_message);
-    }
-  }
-
-  return status;
-}
-
-Status
-ModelRepositoryManager::UnloadAllModels()
-{
-  Status status;
-  for (const auto& name_info : infos_) {
-    Status unload_status = model_life_cycle_->AsyncUnload(name_info.first);
-    if (!unload_status.IsOk()) {
-      status = Status(
-          unload_status.ErrorCode(),
-          "Failed to gracefully unload models: " + unload_status.Message());
-    }
-  }
-  return Status::Success;
-}
-
-Status
-ModelRepositoryManager::StopAllModels()
-{
-  return model_life_cycle_->StopAllModels();
-}
-
-const std::set<std::tuple<std::string, int64_t, size_t>>
-ModelRepositoryManager::InflightStatus()
-{
-  return model_life_cycle_->InflightStatus();
-}
-
-const ModelStateMap
-ModelRepositoryManager::LiveModelStates(bool strict_readiness)
-{
-  return model_life_cycle_->LiveModelStates(strict_readiness);
-}
-
-const ModelStateMap
-ModelRepositoryManager::ModelStates()
-{
-  return model_life_cycle_->ModelStates();
-}
-
-const VersionStateMap
-ModelRepositoryManager::VersionStates(const std::string& model_name)
-{
-  return model_life_cycle_->VersionStates(model_name);
-}
-
-Status
-ModelRepositoryManager::ModelState(
-    const std::string& model_name, const int64_t model_version,
-    ModelReadyState* state)
-{
-  return model_life_cycle_->ModelState(model_name, model_version, state);
-}
-
-Status
-ModelRepositoryManager::RepositoryIndex(
-    const bool ready_only, std::vector<ModelIndex>* index)
-{
-  std::set<std::string> seen_models;
-  std::set<std::string> duplicate_models;
-  for (const auto& repository_path : repository_paths_) {
-    // For any mapped models in this repository, save the mapping
-    // from their subdirectory name to model name.
-    std::map<std::string, std::string> models_in_repo;
-    for (const auto& mapping_it : model_mappings_) {
-      if (mapping_it.second.first == repository_path) {
-        models_in_repo.emplace(
-            BaseName(mapping_it.second.second), mapping_it.first);
-      }
-    }
-    std::set<std::string> subdirs;
-    RETURN_IF_ERROR(GetDirectorySubdirs(repository_path, &subdirs));
-    for (const auto& subdir : subdirs) {
-      auto model = subdir;
-      auto model_it = models_in_repo.find(subdir);
-      if (model_it != models_in_repo.end()) {
-        model = model_it->second;
-      }
-
-      if (seen_models.find(model) != seen_models.end()) {
-        duplicate_models.insert(model);
-      }
-
-      seen_models.insert(model);
-    }
-  }
-
-  ModelStateMap states = ModelStates();
-
-  for (const auto& model : seen_models) {
-    // If the same model appears in multiple repostories then show it
-    // as unavailable since duplicate models are not allowed to load.
-    if (duplicate_models.find(model) != duplicate_models.end()) {
-      index->emplace_back(
-          model, -1 /* version */, ModelReadyState::UNAVAILABLE,
-          MODEL_READY_REASON_DUPLICATE);
-      continue;
-    }
-
-    // If there is any version/state/reason associated with the model
-    // then include that in the index.
-    auto sitr = states.find(model);
-    if (sitr == states.end()) {
-      if (!ready_only) {
-        index->emplace_back(model);
-      }
-    } else {
-      for (const auto& pr : sitr->second) {
-        if (!ready_only || (pr.second.first == ModelReadyState::READY)) {
-          index->emplace_back(
-              model, pr.first, pr.second.first, pr.second.second);
-        }
-      }
-    }
-  }
-
-  return Status::Success;
-}
-
-Status
-ModelRepositoryManager::GetModel(
-    const std::string& model_name, const int64_t model_version,
-    std::shared_ptr<Model>* model)
-{
-  Status status = model_life_cycle_->GetModel(model_name, model_version, model);
-  if (!status.IsOk()) {
-    model->reset();
-    status = Status(
-        status.ErrorCode(), "Request for unknown model: " + status.Message());
-  }
-  return status;
-}
-
-Status
-ModelRepositoryManager::Poll(
-    const std::unordered_map<
-        std::string, std::vector<const InferenceParameter*>>& models,
-    std::set<std::string>* added, std::set<std::string>* deleted,
-    std::set<std::string>* modified, std::set<std::string>* unmodified,
-    ModelInfoMap* updated_infos, bool* all_models_polled)
-{
-  *all_models_polled = true;
-  // empty path is the special case to indicate the model should be loaded
-  // from override file content in 'models'.
-  std::map<std::string, std::string> model_to_path;
-
-  // If no model is specified, poll all models in all model repositories.
-  // Otherwise, only poll the specified models
-  if (models.empty()) {
-    std::set<std::string> duplicated_models;
-    for (const auto& repository_path : repository_paths_) {
-      std::set<std::string> subdirs;
-      Status status = GetDirectorySubdirs(repository_path, &subdirs);
-      if (!status.IsOk()) {
-        LOG_ERROR << "failed to poll model repository '" << repository_path
-                  << "': " << status.Message();
-        *all_models_polled = false;
-      } else {
-        for (const auto& subdir : subdirs) {
-          if (!model_to_path
-                   .emplace(subdir, JoinPath({repository_path, subdir}))
-                   .second) {
-            duplicated_models.insert(subdir);
-            *all_models_polled = false;
-          }
-        }
-      }
-    }
-    // If the model is not unique, mark as deleted to unload it
-    for (const auto& model : duplicated_models) {
-      model_to_path.erase(model);
-      deleted->insert(model);
-      LOG_ERROR << "failed to poll model '" << model
-                << "': not unique across all model repositories";
-    }
-  }
-  // If models are specified, this is explicit model control mode.
-  else {
-    for (const auto& model : models) {
-      // Skip repository polling if override model files
-      if (ModelDirectoryOverride(model.second)) {
-        model_to_path.emplace(model.first, "");
-        continue;
-      }
-      // Check model mapping first to see if matching model to load.
-      bool exists = false;
-      auto model_it = model_mappings_.find(model.first);
-      if (model_it != model_mappings_.end()) {
-        bool exists_in_this_repo = false;
-        auto full_path = model_it->second.second;
-        Status status = FileExists(full_path, &exists_in_this_repo);
-        if (!status.IsOk()) {
-          LOG_ERROR << "failed to poll mapped path '" << full_path
-                    << "' for model '" << model.first
-                    << "': " << status.Message();
-          *all_models_polled = false;
-        }
-        if (exists_in_this_repo) {
-          model_to_path.emplace(model.first, model_it->second.second);
-          exists = true;
-        } else {
-          LOG_ERROR << "mapped path '" << full_path
-                    << "' does not exist for model '" << model.first << "'";
-          exists = false;
-        }
-      } else {
-        for (const auto repository_path : repository_paths_) {
-          bool exists_in_this_repo = false;
-          const auto full_path = JoinPath({repository_path, model.first});
-          Status status = FileExists(full_path, &exists_in_this_repo);
-          if (!status.IsOk()) {
-            LOG_ERROR << "failed to poll model repository '" << repository_path
-                      << "' for model '" << model.first
-                      << "': " << status.Message();
-            *all_models_polled = false;
-          } else if (exists_in_this_repo) {
-            // Check to make sure this directory is not mapped.
-            // If mapped, continue to next repository path.
-            bool mapped = false;
-            for (auto const& mapping : model_mappings_) {
-              if (mapping.second.second == full_path) {
-                mapped = true;
-                break;
-              }
-            }
-            if (mapped) {
-              continue;
-            }
-
-            auto res = model_to_path.emplace(
-                model.first, JoinPath({repository_path, model.first}));
-            if (res.second) {
-              exists = true;
-            } else {
-              exists = false;
-              model_to_path.erase(res.first);
-              LOG_ERROR << "failed to poll model '" << model.first
-                        << "': not unique across all model repositories";
-              break;
-            }
-          }
-        }
-      }
-      // For an explicitly specified model that doesn't exist, we don't mark it
-      // as deleted, we simply mark that we couldn't poll all models.
-      if (!exists) {
-        *all_models_polled = false;
-      }
-    }
-  }
-
-  // Poll each of the models. If error happens during polling the model,
-  // its state will fallback to the state before the polling.
-  for (const auto& pair : model_to_path) {
-    std::unique_ptr<ModelInfo> model_info;
-    const auto& mit = models.find(pair.first);
-    static std::vector<const InferenceParameter*> empty_params;
-    auto status = InitializeModelInfo(
-        pair.first, pair.second,
-        ((mit == models.end()) ? empty_params : mit->second), &model_info);
-
-    const auto& iitr = infos_.find(pair.first);
-    const bool invalid_add = (!status.IsOk()) && (iitr == infos_.end());
-    if (!invalid_add) {
-      const auto& ret = updated_infos->emplace(pair.first, nullptr);
-      if (!ret.second) {
-        return Status(
-            Status::Code::ALREADY_EXISTS,
-            "unexpected model info for model '" + pair.first + "'");
-      }
-
-      // Classify load state and set updated info
-      if (model_info == nullptr) {
-        ret.first->second.reset(new ModelInfo(*iitr->second));
-        unmodified->insert(pair.first);
-      } else {
-        ret.first->second = std::move(model_info);
-        if (iitr != infos_.end()) {
-          modified->insert(pair.first);
-        } else {
-          added->insert(pair.first);
-        }
-      }
-    }
-
-    if (!status.IsOk()) {
-      LOG_ERROR << "Poll failed for model directory '" << pair.first
-                << "': " << status.Message();
-      *all_models_polled = false;
-    }
-  }
-
-  return Status::Success;
-}
-
-bool
-ModelRepositoryManager::ModelDirectoryOverride(
-    const std::vector<const InferenceParameter*>& model_params)
-{
-  for (const auto& param : model_params) {
-    if (param->Name().rfind(file_prefix, 0) == 0) {
-      // param name starts with prefix if user provides override file
-      return true;
-    }
-  }
-  return false;
-}
-
-Status
-ModelRepositoryManager::InitializeModelInfo(
-    const std::string& name, const std::string& path,
-    const std::vector<const InferenceParameter*>& params,
-    std::unique_ptr<ModelInfo>* info)
-{
-  std::unique_ptr<ModelInfo> linfo(new ModelInfo());
-  linfo->model_path_ = path;
-
-  bool unmodified = false;
-
-  const auto iitr = infos_.find(name);
-  // Set 'prev_mtime_ns_' if there is existing ModelInfo
-  if (iitr != infos_.end()) {
-    linfo->prev_mtime_ns_ = iitr->second->mtime_nsec_;
-  } else {
-    linfo->prev_mtime_ns_ = 0;
-  }
-
-  // Set 'mtime_nsec_' and override 'model_path_' if current path is empty
-  // (file override is specified)
-  if (linfo->model_path_.empty()) {
-    // Need to localize the override files, use repo agent to manage
-    // the lifecycle of the localized files
-    std::shared_ptr<TritonRepoAgent> localize_agent(new LocalizeRepoAgent());
-    std::unique_ptr<TritonRepoAgentModel> localize_agent_model;
-    RETURN_IF_ERROR(TritonRepoAgentModel::Create(
-        TRITONREPOAGENT_ARTIFACT_FILESYSTEM, "", inference::ModelConfig(),
-        localize_agent, {}, &localize_agent_model));
-
-    // Set agent model state so the repo agent can access the encoded files
-    // Using const_cast here but we are safe as the RepoAgent will not
-    // modify the state
-    localize_agent_model->SetState(
-        const_cast<void*>(reinterpret_cast<const void*>(&params)));
-    RETURN_IF_ERROR(
-        localize_agent_model->InvokeAgent(TRITONREPOAGENT_ACTION_LOAD));
-
-    const char* location;
-    TRITONREPOAGENT_ArtifactType type;
-    RETURN_IF_ERROR(localize_agent_model->Location(&type, &location));
-
-    // For file override, set 'mtime_nsec_' to minimum value so that
-    // the next load without override will trigger re-load to undo
-    // the override while the local files may still be unchanged.
-    linfo->mtime_nsec_ = 0;
-    linfo->model_path_ = location;
-    linfo->agent_model_list_.reset(new TritonRepoAgentModelList());
-    linfo->agent_model_list_->AddAgentModel(std::move(localize_agent_model));
-  } else {
-    if (iitr == infos_.end()) {
-      linfo->mtime_nsec_ = GetModifiedTime(std::string(linfo->model_path_));
-    } else {
-      // Check the current timestamps to determine if model actually has been
-      // modified
-      linfo->mtime_nsec_ = linfo->prev_mtime_ns_;
-      unmodified =
-          !IsModified(std::string(linfo->model_path_), &linfo->mtime_nsec_);
-    }
-  }
-
-  // Set 'model_config_'
-  bool parsed_config = false;
-  // Check if there is config override
-  for (const auto& override_parameter : params) {
-    if ((override_parameter->Name() == "config") &&
-        (override_parameter->Type() == TRITONSERVER_PARAMETER_STRING)) {
-      // When override happens, set 'mtime_nsec_' to minimum value so that
-      // the next load without override will trigger re-load to undo
-      // the override while the local files may still be unchanged.
-      linfo->mtime_nsec_ = 0;
-      unmodified = false;
-
-      const std::string& override_config = override_parameter->ValueString();
-      auto err = JsonToModelConfig(
-          override_config, 1 /* config_version */, &linfo->model_config_);
-      if (!err.IsOk()) {
-        return Status(
-            Status::Code::INVALID_ARG,
-            "Invalid config override: " + std::string(err.Message()));
-      }
-      parsed_config = true;
-      break;
-    } else if (override_parameter->Name().rfind(file_prefix, 0) != 0) {
-      return Status(
-          Status::Code::INVALID_ARG,
-          "Unrecognized load parameter '" + override_parameter->Name() +
-              "' with type '" +
-              TRITONSERVER_ParameterTypeString(override_parameter->Type()) +
-              "'");
-    }
-  }
-
-  // Polling model is considered unmodified by this point and can be returned
-  // with info == nullptr
-  if (unmodified) {
-    return Status::Success;
-  }
-
-  // Create the associated repo agent models when a model is to be loaded,
-  // this must be done before normalizing model config as agents might
-  // redirect to use the model config at a different location
-  if (!parsed_config) {
-    const auto config_path = JoinPath({linfo->model_path_, kModelConfigPbTxt});
-    bool model_config_exists = false;
-    RETURN_IF_ERROR(FileExists(config_path, &model_config_exists));
-    // model config can be missing if auto fill is set
-    if (autofill_ && !model_config_exists) {
-      linfo->model_config_.Clear();
-    } else {
-      RETURN_IF_ERROR(ReadTextProto(config_path, &linfo->model_config_));
-      parsed_config = true;
-    }
-  }
-  if (parsed_config) {
-    RETURN_IF_ERROR(CreateAgentModelListWithLoadAction(
-        linfo->model_config_, linfo->model_path_, &linfo->agent_model_list_));
-    if (linfo->agent_model_list_ != nullptr) {
-      // Get the latest repository path
-      const char* location;
-      TRITONREPOAGENT_ArtifactType artifact_type;
-      RETURN_IF_ERROR(linfo->agent_model_list_->Back()->Location(
-          &artifact_type, &location));
-      auto latest_path = std::string(location);
-      linfo->model_path_ = latest_path;
-    }
-  }
-  linfo->is_config_provided_ = parsed_config;
-
-  // Try to automatically generate missing parts of the model
-  // configuration (autofill) that don't require model detail
-  RETURN_IF_ERROR(GetNormalizedModelConfig(
-      name, linfo->model_path_, min_compute_capability_,
-      &linfo->model_config_));
-
-  // Note that the model inputs and outputs are not validated until
-  // the model model is intialized as they may not be auto-completed
-  // until model is intialized.
-  RETURN_IF_ERROR(
-      ValidateModelConfig(linfo->model_config_, min_compute_capability_));
-  if (!autofill_) {
-    RETURN_IF_ERROR(ValidateModelIOConfig(linfo->model_config_));
-  }
-
-  // If the model is mapped, update its config name based on the
-  // mapping.
-  if (model_mappings_.find(name) != model_mappings_.end()) {
-    linfo->model_config_.set_name(name);
-  } else {
-    // If there is no model mapping, make sure the name of the model
-    // matches the name of the directory. This is a somewhat arbitrary
-    // requirement but seems like good practice to require it of the user.
-    // It also acts as a check to make sure we don't have two different
-    // models with the same name.
-    if (linfo->model_config_.name() != name) {
-      return Status(
-          Status::Code::INVALID_ARG,
-          "unexpected directory name '" + name + "' for model '" +
-              linfo->model_config_.name() +
-              "', directory name must equal model name");
-    }
-  }
-
-  *info = std::move(linfo);
-  return Status::Success;
-}
-
-Status
-ModelRepositoryManager::UpdateDependencyGraph(
-    const std::set<std::string>& added, const std::set<std::string>& deleted,
-    const std::set<std::string>& modified,
-    std::set<std::string>* deleted_dependents)
-{
-  // update dependency graph, if the state of a node is changed, all its
-  // downstreams will be affected
-
-  // deleted, drop from dependency_graph, add to missing_nodes if downstreams is
-  // not empty affected_nodes are all ensembles as only ensembles are depending
-  // on other models
-  std::set<DependencyNode*> affected_nodes;
-  std::set<DependencyNode*> updated_nodes;
-  std::set<std::string> current_deleted = deleted;
-  while (!current_deleted.empty()) {
-    std::set<std::string> next_deleted;
-    for (const auto& model_name : current_deleted) {
-      auto it = dependency_graph_.find(model_name);
-      if (it != dependency_graph_.end()) {
-        // remove this node from its upstreams
-        for (auto& upstream : it->second->upstreams_) {
-          upstream.first->downstreams_.erase(it->second.get());
-          // Check if the upstream should be removed as well
-          if ((deleted_dependents != nullptr) &&
-              (upstream.first->downstreams_.empty()) &&
-              (!upstream.first->explicitly_load_)) {
-            next_deleted.emplace(upstream.first->model_name_);
-          }
-        }
-        it->second->upstreams_.clear();
-
-        if (!it->second->downstreams_.empty()) {
-          UncheckDownstream(&it->second->downstreams_, &affected_nodes);
-          // mark this node as missing upstream in its downstreams
-          for (auto& downstream : it->second->downstreams_) {
-            downstream->missing_upstreams_.emplace(it->second.get());
-          }
-          missing_nodes_.emplace(
-              std::make_pair(model_name, std::move(it->second)));
-        }
-
-        // Make sure deleted node will not be in affected nodes
-        affected_nodes.erase(it->second.get());
-        dependency_graph_.erase(it);
-      }
-      if (deleted_dependents != nullptr) {
-        deleted_dependents->emplace(model_name);
-      }
-    }
-    current_deleted.swap(next_deleted);
-  }
-
-  // modified, invalidate (uncheck) all downstreams
-  for (const auto& model_name : modified) {
-    auto it = dependency_graph_.find(model_name);
-    if (it != dependency_graph_.end()) {
-      UncheckDownstream(&it->second->downstreams_, &affected_nodes);
-      ModelInfo* info = nullptr;
-      GetModelInfo(model_name, &info);
-      it->second->model_config_ = info->model_config_;
-      it->second->explicitly_load_ = info->explicitly_load_;
-      // remove this node from its upstream node
-      for (auto& upstream : it->second->upstreams_) {
-        upstream.first->downstreams_.erase(it->second.get());
-      }
-      it->second->upstreams_.clear();
-      it->second->checked_ = false;
-      it->second->status_ = Status::Success;
-      updated_nodes.emplace(it->second.get());
-    }
-  }
-
-  // added, add to dependency_graph, if in missing_node, invalidate (uncheck)
-  // and associate all downstreams, remove from missing_node
-  for (const auto& model_name : added) {
-    std::unique_ptr<DependencyNode> added_node;
-    auto it = missing_nodes_.find(model_name);
-    if (it != missing_nodes_.end()) {
-      UncheckDownstream(&it->second->downstreams_, &affected_nodes);
-      // remove this node from missing upstream node in its downstream nodes
-      for (auto& downstream : it->second->downstreams_) {
-        downstream->missing_upstreams_.erase(it->second.get());
-      }
-
-      it->second->checked_ = false;
-      added_node = std::move(it->second);
-      missing_nodes_.erase(it);
-    } else {
-      // Right now, nothing is going to be filled until validation
-      added_node.reset(new DependencyNode(model_name));
-    }
-    ModelInfo* info = nullptr;
-    GetModelInfo(model_name, &info);
-    added_node->model_config_ = info->model_config_;
-    added_node->explicitly_load_ = info->explicitly_load_;
-    updated_nodes.emplace(added_node.get());
-    dependency_graph_.emplace(
-        std::make_pair(model_name, std::move(added_node)));
-  }
-
-  auto& affected_ensembles = affected_nodes;
-  for (auto& updated_node : updated_nodes) {
-    bool is_ensemble = ConnectDependencyGraph(updated_node);
-    if (is_ensemble) {
-      affected_ensembles.emplace(updated_node);
-    }
-  }
-
-#ifdef TRITON_ENABLE_ENSEMBLE
-  // After the dependency graph is updated, check ensemble dependencies
-  for (auto& ensemble : affected_ensembles) {
-    if (ensemble->status_.IsOk()) {
-      if (!ensemble->missing_upstreams_.empty()) {
-        std::string name_list;
-        for (auto it = ensemble->missing_upstreams_.begin();
-             it != ensemble->missing_upstreams_.end(); it++) {
-          if (it != ensemble->missing_upstreams_.begin()) {
-            name_list += ", ";
-          }
-          name_list += (*it)->model_name_;
-        }
-        ensemble->status_ = Status(
-            Status::Code::INVALID_ARG,
-            "ensemble " + ensemble->model_name_ +
-                " contains models that are not available: " + name_list);
-      } else {
-        ensemble->status_ = CircularcyCheck(ensemble, ensemble);
-      }
-    }
-  }
-#endif  // TRITON_ENABLE_ENSEMBLE
-  return Status::Success;
-}
-
-Status
-ModelRepositoryManager::RegisterModelRepository(
-    const std::string& repository,
-    const std::unordered_map<std::string, std::string>& model_mapping)
-{
-  if (!model_control_enabled_) {
-    return Status(
-        Status::Code::UNSUPPORTED,
-        "repository registration is not allowed if model control mode is not "
-        "EXPLICIT");
-  }
-  bool is_directory = false;
-  auto status = IsDirectory(repository, &is_directory);
-  if (!status.IsOk() || !is_directory) {
-    return Status(
-        Status::Code::INVALID_ARG, (std::string("failed to register '") +
-                                    repository + "', repository not found")
-                                       .c_str());
-  }
-
-  {
-    // Serialize all operations that change model state
-    std::lock_guard<std::mutex> lock(poll_mu_);
-
-    // Check repository and mapped models do not yet exist.
-    if (repository_paths_.find(repository) != repository_paths_.end()) {
-      return Status(
-          Status::Code::ALREADY_EXISTS,
-          "model repository '" + repository + "' has already been registered");
-    }
-
-    for (const auto& mapping : model_mapping) {
-      if (model_mappings_.find(mapping.first) != model_mappings_.end()) {
-        return Status(
-            Status::Code::ALREADY_EXISTS,
-            (std::string("failed to register '") + mapping.first +
-             "', there is a conflicting mapping for '" +
-             std::string(mapping.first) + "'")
-                .c_str());
-      }
-    }
-
-    repository_paths_.emplace(repository);
-    for (const auto& mapping : model_mapping) {
-      model_mappings_.emplace(
-          mapping.first,
-          std::make_pair(repository, JoinPath({repository, mapping.second})));
-    }
-  }
-
-  LOG_INFO << "Model repository registered: " << repository;
-  return Status::Success;
-}
-
-Status
-ModelRepositoryManager::UnregisterModelRepository(const std::string& repository)
-{
-  if (!model_control_enabled_) {
-    return Status(
-        Status::Code::UNSUPPORTED,
-        "repository unregistration is not allowed if model control mode is not "
-        "EXPLICIT");
-  }
-  {
-    std::lock_guard<std::mutex> lock(poll_mu_);
-    if (repository_paths_.erase(repository) != 1) {
-      return Status(
-          Status::Code::INVALID_ARG,
-          "failed to unregister '" + repository + "', repository not found");
-    }
-
-    std::set<std::string> models_to_delete;
-    for (auto const& mapping : model_mappings_) {
-      if (mapping.second.first == repository) {
-        models_to_delete.insert(mapping.first);
-      }
-    }
-    for (auto const& model : models_to_delete) {
-      model_mappings_.erase(model);
-    }
-  }
-
-  LOG_INFO << "Model repository unregistered: " << repository;
-  return Status::Success;
-}
-
-Status
-ModelRepositoryManager::CircularcyCheck(
-    DependencyNode* current_node, const DependencyNode* start_node)
-{
-  for (auto& downstream : current_node->downstreams_) {
-    if (downstream->model_name_ == start_node->model_name_) {
-      return Status(
-          Status::Code::INVALID_ARG,
-          "circular dependency between ensembles: " + start_node->model_name_ +
-              " -> ... -> " + current_node->model_name_ + " -> " +
-              start_node->model_name_);
-    } else {
-      const auto status = CircularcyCheck(downstream, start_node);
-      if (!status.IsOk() && current_node->status_.IsOk()) {
-        current_node->status_ = status;
-        return status;
-      }
-    }
-  }
-  return Status::Success;
-}
-
-void
-ModelRepositoryManager::UncheckDownstream(
-    NodeSet* downstreams, NodeSet* updated_nodes)
-{
-  // Mark downstream nodes as unchecked recursively
-  for (auto& node : *downstreams) {
-    if (node->checked_) {
-      node->checked_ = false;
-      node->status_ = Status::Success;
-      UncheckDownstream(&node->downstreams_, updated_nodes);
-      updated_nodes->emplace(node);
-    }
-  }
-}
-
-bool
-ModelRepositoryManager::ConnectDependencyGraph(DependencyNode* updated_node)
-{
-  // Check the node's model config to determine if it depends on other models
-  // and if those models are present
-  updated_node->upstreams_.clear();
-  updated_node->missing_upstreams_.clear();
-  if (updated_node->model_config_.has_ensemble_scheduling()) {
-    for (const auto& step :
-         updated_node->model_config_.ensemble_scheduling().step()) {
-      DependencyNode* upstream_node = nullptr;
-      const auto& model_name = step.model_name();
-      auto dit = dependency_graph_.find(model_name);
-      if (dit == dependency_graph_.end()) {
-        auto mit = missing_nodes_.find(model_name);
-        if (mit == missing_nodes_.end()) {
-          std::unique_ptr<DependencyNode> node(new DependencyNode(model_name));
-          updated_node->missing_upstreams_.emplace(node.get());
-          mit = missing_nodes_.emplace(model_name, std::move(node)).first;
-        }
-        // Add the node to missing node's downstream so that when the missing
-        // node is added, the downstreams can be found easily.
-        mit->second->downstreams_.emplace(updated_node);
-        upstream_node = mit->second.get();
-      } else {
-        dit->second->downstreams_.emplace(updated_node);
-        upstream_node = dit->second.get();
-      }
-      auto res = updated_node->upstreams_.emplace(
-          upstream_node, std::set<int64_t>({step.model_version()}));
-      // If map insertion doesn't happen, the same model is required in
-      // different step, insert the version to existing required version set.
-      if (!res.second) {
-        res.first->second.insert(step.model_version());
-      }
-    }
-    return true;
-  }
-  return false;
-}
-
-Status
-ModelRepositoryManager::GetModelInfo(
-    const std::string& name, ModelInfo** model_info)
-{
-  const auto itr = infos_.find(name);
-  if (itr == infos_.end()) {
-    return Status(
-        Status::Code::NOT_FOUND, "no configuration for model '" + name + "'");
-  }
-
-  *model_info = itr->second.get();
-  return Status::Success;
-}
-
-std::pair<ModelRepositoryManager::NodeSet, ModelRepositoryManager::NodeSet>
-ModelRepositoryManager::ModelsToLoadUnload(const NodeSet& loaded_models)
-{
-  // <valid model set, invalid model set>
-  std::pair<NodeSet, NodeSet> res;
-  // first call to this function
-  if (loaded_models.empty()) {
-    for (auto& pair : dependency_graph_) {
-      auto node = pair.second.get();
-      // only care about nodes that are affected by the update
-      if (!node->checked_) {
-        if (CheckNode(node)) {
-          if (node->status_.IsOk()) {
-            res.first.emplace(node);
-          } else {
-            res.second.emplace(node);
-          }
-        }
-      }
-    }
-  } else {
-    for (const auto& model : loaded_models) {
-      for (auto node : model->downstreams_) {
-        // only care about nodes that are affected by the update
-        if (!node->checked_) {
-          if (CheckNode(node)) {
-            if (node->status_.IsOk()) {
-              res.first.emplace(node);
-            } else {
-              res.second.emplace(node);
-            }
-          }
-        }
-      }
-    }
-  }
-  for (auto& node : res.first) {
-    node->checked_ = true;
-  }
-  for (auto& node : res.second) {
-    node->checked_ = true;
-  }
-  return res;
-}
-
-bool
-ModelRepositoryManager::CheckNode(DependencyNode* node)
-{
-  bool node_ready = true;
-  // if the node is in invalid status, mark as ready as we know
-  // it should not be loaded
-  if (node->status_.IsOk()) {
-    for (auto& upstream : node->upstreams_) {
-      if (!upstream.first->checked_) {
-        node_ready = false;
-        break;
-      }
-      if (!upstream.first->status_.IsOk()) {
-        node->status_ = Status(
-            Status::Code::INVALID_ARG,
-            "ensemble '" + node->model_name_ + "' depends on '" +
-                upstream.first->model_name_ + "' which is not valid");
-      } else if (upstream.first->loaded_versions_.empty()) {
-        node->status_ = Status(
-            Status::Code::INVALID_ARG,
-            "ensemble '" + node->model_name_ + "' depends on '" +
-                upstream.first->model_name_ + "' which has no loaded version");
-      } else {
-        for (const auto& required_version : upstream.second) {
-          if (required_version == -1) {
-            continue;
-          }
-
-          auto it = upstream.first->loaded_versions_.find(required_version);
-          if (it == upstream.first->loaded_versions_.end()) {
-            node->status_ = Status(
-                Status::Code::INVALID_ARG,
-                "ensemble '" + node->model_name_ + "' depends on '" +
-                    upstream.first->model_name_ + "' whose required version " +
-                    std::to_string(required_version) + " is not loaded");
-          }
-        }
-      }
-      if (!node->status_.IsOk()) {
-        break;
-      }
-    }
-#ifdef TRITON_ENABLE_ENSEMBLE
-    // Validate ensemble config if the node is ready. By this point, the
-    // depending models are loaded and their configs are completed
-    if (node_ready && node->status_.IsOk()) {
-      node->status_ = ValidateEnsembleConfig(this, node);
-    }
-#endif  // TRITON_ENABLE_ENSEMBLE
-  }
-  return node_ready;
-}
-
-}}  // namespace triton::core
diff --git a/3rdparty/core-r22.12/src/model_repository_manager.h b/3rdparty/core-r22.12/src/model_repository_manager.h
deleted file mode 100644
index bd06723801da4dd5e01bd800f1545fecb1a26e25..0000000000000000000000000000000000000000
--- a/3rdparty/core-r22.12/src/model_repository_manager.h
+++ /dev/null
@@ -1,345 +0,0 @@
-// Copyright 2018-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-#pragma once
-
-#include <functional>
-#include <map>
-#include <mutex>
-#include <set>
-#include "infer_parameter.h"
-#include "model_config.pb.h"
-#include "model_lifecycle.h"
-#include "status.h"
-#include "triton/common/model_config.h"
-
-namespace triton { namespace core {
-
-class InferenceServer;
-class Model;
-
-// [FIXME] should have separated load / unload functions for clarity
-enum ActionType { NO_ACTION, LOAD, UNLOAD };
-
-/// Predefined reason strings
-#define MODEL_READY_REASON_DUPLICATE "model appears in two or more repositories"
-
-/// An object to manage the model repository active in the server.
-class ModelRepositoryManager {
- public:
-  // Index information for a model.
-  struct ModelIndex {
-    ModelIndex(const std::string& n)
-        : name_only_(true), name_(n), version_(-1),
-          state_(ModelReadyState::UNKNOWN)
-    {
-    }
-    ModelIndex(
-        const std::string& n, const int64_t v, const ModelReadyState s,
-        const std::string& r)
-        : name_only_(false), name_(n), version_(v), state_(s), reason_(r)
-    {
-    }
-    const bool name_only_;
-    const std::string name_;
-    const int64_t version_;
-    const ModelReadyState state_;
-    const std::string reason_;
-  };
-
-  /// A basic unit in dependency graph that records the models seen by the model
-  /// repository manager.
-  struct DependencyNode {
-    DependencyNode(const std::string& model_name)
-        : model_name_(model_name), status_(Status::Success), checked_(false)
-    {
-    }
-
-    std::string model_name_;
-    Status status_;
-    bool checked_;
-    bool explicitly_load_;
-    inference::ModelConfig model_config_;
-    std::set<int64_t> loaded_versions_;
-    std::set<DependencyNode*> missing_upstreams_;
-    std::unordered_map<DependencyNode*, std::set<int64_t>> upstreams_;
-    std::set<DependencyNode*> downstreams_;
-  };
-
-  ~ModelRepositoryManager();
-
-  /// Create a manager for a repository.
-  /// \param server The pointer to the inference server.
-  /// \param server_version The version of the inference server.
-  /// \param repository_paths A set of file-system paths of the repositories.
-  /// \param startup_models A set of models to be loaded at startup
-  /// if model control is enabled.
-  /// \param strict_model_config If false attempt to autofill missing required
-  /// information in each model configuration.
-  /// \param polling_enabled If true, then PollAndUpdate() is allowed.
-  /// Otherwise, it is not allowed.
-  /// \param model_control_enabled If true, then LoadUnloadModel() is allowed
-  /// and the models in the model repository will not be loaded at startup.
-  /// Otherwise, LoadUnloadModel() is not allowed and the models will be loaded.
-  /// Cannot be set to true if polling_enabled is true.
-  /// \param life_cycle_options The options to configure ModelLifeCycle.
-  /// \param model_repository_manager Return the model repository manager.
-  /// \return The error status.
-  static Status Create(
-      InferenceServer* server, const std::string& server_version,
-      const std::set<std::string>& repository_paths,
-      const std::set<std::string>& startup_models,
-      const bool strict_model_config, const bool polling_enabled,
-      const bool model_control_enabled,
-      const ModelLifeCycleOptions& life_cycle_options,
-      std::unique_ptr<ModelRepositoryManager>* model_repository_manager);
-
-  /// Poll the model repository to determine the new set of models and
-  /// compare with the current set. And serve the new set of models based
-  /// on their version policy.
-  Status PollAndUpdate();
-
-  /// Load or unload a specified model.
-  /// \param models The models and the parameters to be loaded or unloaded
-  /// \param type The type action to be performed. If the action is LOAD and
-  /// the model has been loaded, the model will be re-loaded.
-  /// \return error status. Return "NOT_FOUND" if it tries to load
-  /// a non-existing model or if it tries to unload a model that hasn't been
-  /// loaded.
-  Status LoadUnloadModel(
-      const std::unordered_map<
-          std::string, std::vector<const InferenceParameter*>>& models,
-      const ActionType type, const bool unload_dependents);
-
-  /// Unload all models. This function should be called before shutting down
-  /// the model repository manager.
-  /// \return error status.
-  Status UnloadAllModels();
-
-  /// Instruct all models to stop accepting new inference requests. However,
-  /// the models are still capable of processing inference requests
-  /// if the model considers them as part of the in-flight inference.
-  /// \return error status.
-  Status StopAllModels();
-
-  /// \return the number of in-flight inferences for the all versions of all
-  /// models. The set element will be a tuple of <model_name, model_version,
-  /// in-flight inference count>. Note that a model version will not be included
-  /// if it doesn't have in-flight inferences.
-  const std::set<std::tuple<std::string, int64_t, size_t>> InflightStatus();
-
-  /// \param strict_readiness If true, only models that have at least one
-  /// ready version will be considered as live. Otherwise, the models that
-  /// have loading / unloading versions will also be live.
-  /// \return the state of all versions of all live models.
-  const ModelStateMap LiveModelStates(bool strict_readiness = false);
-
-  /// \return the state of all versions of all models that have every
-  /// been (attempted) loaded over the lifetime of the server.
-  const ModelStateMap ModelStates();
-
-  /// \return the states of all versions of a specific model.
-  const VersionStateMap VersionStates(const std::string& model_name);
-
-  /// \return the ready-state of a specific model version.
-  Status ModelState(
-      const std::string& model_name, const int64_t model_version,
-      ModelReadyState* state);
-
-  /// Get the index of all models in all repositories.
-  /// \param ready_only If true return only index of models that are ready.
-  /// \param index Returns the index.
-  /// \return error status.
-  Status RepositoryIndex(const bool ready_only, std::vector<ModelIndex>* index);
-
-  /// Obtain the specified model.
-  /// \param model_name The name of the model.
-  /// \param model_version The version of the model.
-  /// \param model Return the model object.
-  /// \return error status.
-  Status GetModel(
-      const std::string& model_name, const int64_t model_version,
-      std::shared_ptr<Model>* model);
-
-  // Register model repository path.
-  /// \param repository Path to model repository.
-  /// \param model_mapping Mapping with (overridden) model name as key, subdir
-  /// name as value.
-  /// \return error status
-  Status RegisterModelRepository(
-      const std::string& repository,
-      const std::unordered_map<std::string, std::string>& model_mapping);
-
-  // Unregister model repository path.
-  /// \param repository Path to model repository.
-  /// \return error status
-  Status UnregisterModelRepository(const std::string& repository);
-
- private:
-  struct ModelInfo;
-
-  // Map from model name to information about the model.
-  using ModelInfoMap =
-      std::unordered_map<std::string, std::unique_ptr<ModelInfo>>;
-
-  // Set of DependencyNode
-  using NodeSet = std::set<DependencyNode*>;
-
-  ModelRepositoryManager(
-      const std::set<std::string>& repository_paths, const bool autofill,
-      const bool polling_enabled, const bool model_control_enabled,
-      const double min_compute_capability,
-      std::unique_ptr<ModelLifeCycle> life_cycle);
-
-  /// The internal function that are called in Create() and PollAndUpdate().
-  Status PollAndUpdateInternal(bool* all_models_polled);
-
-  /// The internal function that load or unload a set of models.
-  Status LoadUnloadModels(
-      const std::unordered_map<
-          std::string, std::vector<const InferenceParameter*>>& models,
-      const ActionType type, const bool unload_dependents,
-      bool* all_models_polled);
-
-  /// Poll the requested models in the model repository and
-  /// compare with the current set. Return the additions, deletions,
-  /// and modifications that have occurred. This function will not updated
-  /// the current model info, it is caller's responsibility to do so.
-  /// \param models The map from models to be polled to their associated
-  /// parameters.
-  /// \param added The names of the models added to the repository.
-  /// \param deleted The names of the models removed from the repository.
-  /// \param modified The names of the models remaining in the
-  /// repository that have been changed.
-  /// \param unmodified The names of the models remaining in the
-  /// repository that have not changed.
-  /// \param updated_infos The model infos retrieved from the poll.
-  /// \param all_models_polled Return true if all models are polled and
-  /// their model configuration are validated successfully. Instead of aborting
-  /// the polling, the models that fail will be ignored and their model infos
-  /// will stay in the previous state.
-  /// \return The error status.
-  Status Poll(
-      const std::unordered_map<
-          std::string, std::vector<const InferenceParameter*>>& models,
-      std::set<std::string>* added, std::set<std::string>* deleted,
-      std::set<std::string>* modified, std::set<std::string>* unmodified,
-      ModelInfoMap* updated_infos, bool* all_models_polled);
-
-  /// Helper function for Poll() to initialize ModelInfo for the model.
-  /// \param name The name of the model.
-  /// \param path The model path. Empty path means the model is provided via
-  /// 'params'
-  /// \param params The model parameters provided for polling model.
-  /// \param info Return the updated ModelInfo. 'nullptr' will be returned if
-  /// existing ModelInfo for the model should be reused.
-  /// \return The error status.
-  Status InitializeModelInfo(
-      const std::string& name, const std::string& path,
-      const std::vector<const InferenceParameter*>& params,
-      std::unique_ptr<ModelInfo>* info);
-
-  /// Load models based on the dependency graph. The function will iteratively
-  /// load models that all the models they depend on has been loaded, and unload
-  /// models if their dependencies are no longer satisfied.
-  /// \return The status of the model loads.
-  std::map<std::string, Status> LoadModelByDependency();
-
-  /// Helper function to update the dependency graph based on the poll result
-  /// \param added The names of the models added to the repository.
-  /// \param deleted The names of the models removed from the repository.
-  /// \param modified The names of the models remaining in the
-  /// repository that have been changed.
-  /// \param deleted_dependents The names of dependent models to be removed
-  /// from the repository.
-  /// \return The error status.
-  Status UpdateDependencyGraph(
-      const std::set<std::string>& added, const std::set<std::string>& deleted,
-      const std::set<std::string>& modified,
-      std::set<std::string>* deleted_dependents = nullptr);
-
-  /// Helper function to uncheck the nodes because the model that they depends
-  /// on has changed. The unchecked nodes will be validated again.
-  /// The function will be call recursively to uncheck all downstreams.
-  /// \param downstreams The nodes to be unchecked.
-  /// \param updated_nodes Return the nodes that have been unchecked
-  void UncheckDownstream(NodeSet* downstreams, NodeSet* updated_nodes);
-
-  /// Helper function to construct the edges between nodes in dependency graph.
-  /// \param updated_node The node that is newly added or modified.
-  /// \return True if the node represents an ensemble model. False otherwise.
-  bool ConnectDependencyGraph(DependencyNode* updated_node);
-
-  /// Get the model info for a named model.
-  /// \param name The model name.
-  /// \param model_info Returns the model information.
-  /// \return OK if found, NOT_FOUND otherwise.
-  Status GetModelInfo(const std::string& name, ModelInfo** model_info);
-
-  /// Get the models to be loaded / unloaded based on the model loaded in
-  /// previous iteration.
-  /// \param loaded_models The models loaded / unloaded in previous iteration.
-  /// Unloaded models will be represented as models with no loaded versions.
-  /// \return A pair of node set containing models to be loaded and models to be
-  /// unloaded for the next iteration.
-  std::pair<NodeSet, NodeSet> ModelsToLoadUnload(const NodeSet& loaded_models);
-
-  /// Check if the node is ready for the next iteration. A node is ready if the
-  /// node is invalid (containing invalid model config or its depdencies failed
-  /// to load) or all of its dependencies are satisfied.
-  /// \param node The node to be checked.
-  /// \return True if the node is ready. False otherwise.
-  bool CheckNode(DependencyNode* node);
-
-  Status CircularcyCheck(
-      DependencyNode* current_node, const DependencyNode* start_node);
-
-  bool ModelDirectoryOverride(
-      const std::vector<const InferenceParameter*>& model_params);
-
-  std::set<std::string> repository_paths_;
-  const bool autofill_;
-  const bool polling_enabled_;
-  const bool model_control_enabled_;
-  const double min_compute_capability_;
-
-  std::mutex poll_mu_;
-  ModelInfoMap infos_;
-
-  std::unordered_map<std::string, std::unique_ptr<DependencyNode>>
-      dependency_graph_;
-  std::unordered_map<std::string, std::unique_ptr<DependencyNode>>
-      missing_nodes_;
-
-  // Mappings from (overridden) model names to a pair of their repository and
-  // absolute path
-  std::unordered_map<std::string, std::pair<std::string, std::string>>
-      model_mappings_;
-
-  std::unique_ptr<ModelLifeCycle> model_life_cycle_;
-};
-
-}}  // namespace triton::core
diff --git a/3rdparty/core-r22.12/src/numa_utils.cc b/3rdparty/core-r22.12/src/numa_utils.cc
deleted file mode 100644
index 03f7af278990dc17c8a6746ad693ebfe46bf2b48..0000000000000000000000000000000000000000
--- a/3rdparty/core-r22.12/src/numa_utils.cc
+++ /dev/null
@@ -1,237 +0,0 @@
-// Copyright 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#include "numa_utils.h"
-
-#ifndef _WIN32
-#include <numa.h>
-#include <numaif.h>
-#endif
-#include "triton/common/logging.h"
-
-namespace triton { namespace core {
-
-namespace {
-std::string
-VectorToString(const std::vector<int>& vec)
-{
-  std::string str("[");
-  for (const auto& element : vec) {
-    str += std::to_string(element);
-    str += ",";
-  }
-
-  str += "]";
-  return str;
-}
-
-Status
-ParseIntOption(const std::string& msg, const std::string& arg, int* value)
-{
-  try {
-    *value = std::stoi(arg);
-  }
-  catch (const std::invalid_argument& ia) {
-    return Status(
-        Status::Code::INVALID_ARG,
-        msg + ": Can't parse '" + arg + "' to integer");
-  }
-  return Status::Success;
-}
-
-}  // namespace
-
-// NUMA setting will be ignored on Windows platform
-#ifdef _WIN32
-Status
-SetNumaConfigOnThread(
-    const triton::common::HostPolicyCmdlineConfig& host_policy)
-{
-  return Status::Success;
-}
-
-Status
-SetNumaMemoryPolicy(const triton::common::HostPolicyCmdlineConfig& host_policy)
-{
-  return Status::Success;
-}
-
-Status
-GetNumaMemoryPolicyNodeMask(unsigned long* node_mask)
-{
-  *node_mask = 0;
-  return Status::Success;
-}
-
-Status
-ResetNumaMemoryPolicy()
-{
-  return Status::Success;
-}
-
-Status
-SetNumaThreadAffinity(
-    std::thread::native_handle_type thread,
-    const triton::common::HostPolicyCmdlineConfig& host_policy)
-{
-  return Status::Success;
-}
-#else
-// Use variable to make sure no NUMA related function is actually called
-// if Triton is not running with NUMA awareness. i.e. Extra docker permission
-// is needed to call the NUMA functions and this ensures backward compatibility.
-thread_local bool numa_set = false;
-
-Status
-SetNumaConfigOnThread(
-    const triton::common::HostPolicyCmdlineConfig& host_policy)
-{
-  // Set thread affinity
-  RETURN_IF_ERROR(SetNumaThreadAffinity(pthread_self(), host_policy));
-
-  // Set memory policy
-  RETURN_IF_ERROR(SetNumaMemoryPolicy(host_policy));
-
-  return Status::Success;
-}
-
-Status
-SetNumaMemoryPolicy(const triton::common::HostPolicyCmdlineConfig& host_policy)
-{
-  const auto it = host_policy.find("numa-node");
-  if (it != host_policy.end()) {
-    int node_id;
-    RETURN_IF_ERROR(
-        ParseIntOption("Parsing 'numa-node' value", it->second, &node_id));
-    LOG_VERBOSE(1) << "Thread is binding to NUMA node " << it->second
-                   << ". Max NUMA node count: " << (numa_max_node() + 1);
-    numa_set = true;
-    unsigned long node_mask = 1UL << node_id;
-    if (set_mempolicy(MPOL_BIND, &node_mask, (numa_max_node() + 1) + 1) != 0) {
-      return Status(
-          Status::Code::INTERNAL,
-          std::string("Unable to set NUMA memory policy: ") + strerror(errno));
-    }
-  }
-  return Status::Success;
-}
-
-Status
-GetNumaMemoryPolicyNodeMask(unsigned long* node_mask)
-{
-  *node_mask = 0;
-  int mode;
-  if (numa_set &&
-      get_mempolicy(&mode, node_mask, numa_max_node() + 1, NULL, 0) != 0) {
-    return Status(
-        Status::Code::INTERNAL,
-        std::string("Unable to get NUMA node for current thread: ") +
-            strerror(errno));
-  }
-  return Status::Success;
-}
-
-Status
-ResetNumaMemoryPolicy()
-{
-  if (numa_set && (set_mempolicy(MPOL_DEFAULT, nullptr, 0) != 0)) {
-    return Status(
-        Status::Code::INTERNAL,
-        std::string("Unable to reset NUMA memory policy: ") + strerror(errno));
-  }
-  numa_set = false;
-  return Status::Success;
-}
-
-Status
-SetNumaThreadAffinity(
-    std::thread::native_handle_type thread,
-    const triton::common::HostPolicyCmdlineConfig& host_policy)
-{
-  const auto it = host_policy.find("cpu-cores");
-  if (it != host_policy.end()) {
-    // Parse CPUs
-    std::vector<int> cpus;
-    {
-      const auto& cpu_str = it->second;
-      auto delim_cpus = cpu_str.find(",");
-      int current_pos = 0;
-      while (true) {
-        auto delim_range = cpu_str.find("-", current_pos);
-        if (delim_range == std::string::npos) {
-          return Status(
-              Status::Code::INVALID_ARG,
-              std::string("host policy setting 'cpu-cores' format is "
-                          "'<lower_cpu_core_id>-<upper_cpu_core_id>'. Got ") +
-                  cpu_str.substr(
-                      current_pos, ((delim_cpus == std::string::npos)
-                                        ? (cpu_str.length() + 1)
-                                        : delim_cpus) -
-                                       current_pos));
-        }
-        int lower, upper;
-        RETURN_IF_ERROR(ParseIntOption(
-            "Parsing 'cpu-cores' value",
-            cpu_str.substr(current_pos, delim_range - current_pos), &lower));
-        RETURN_IF_ERROR(ParseIntOption(
-            "Parsing 'cpu-cores' value",
-            (delim_cpus == std::string::npos)
-                ? cpu_str.substr(delim_range + 1)
-                : cpu_str.substr(
-                      delim_range + 1, delim_cpus - (delim_range + 1)),
-            &upper));
-        for (; lower <= upper; ++lower) {
-          cpus.push_back(lower);
-        }
-        // break if the processed range is the last specified range
-        if (delim_cpus != std::string::npos) {
-          current_pos = delim_cpus + 1;
-          delim_cpus = cpu_str.find(",", current_pos);
-        } else {
-          break;
-        }
-      }
-    }
-
-    LOG_VERBOSE(1) << "Thread is binding to one of the CPUs: "
-                   << VectorToString(cpus);
-    numa_set = true;
-    cpu_set_t cpuset;
-    CPU_ZERO(&cpuset);
-    for (int cpu : cpus) {
-      CPU_SET(cpu, &cpuset);
-    }
-    if (pthread_setaffinity_np(thread, sizeof(cpu_set_t), &cpuset) != 0) {
-      return Status(
-          Status::Code::INTERNAL,
-          std::string("Unable to set NUMA thread affinity: ") +
-              strerror(errno));
-    }
-  }
-  return Status::Success;
-}
-#endif
-
-}}  // namespace triton::core
diff --git a/3rdparty/core-r22.12/src/numa_utils.h b/3rdparty/core-r22.12/src/numa_utils.h
deleted file mode 100644
index bb226bdfc23b31f2623c838e5f4cafbd4d18c914..0000000000000000000000000000000000000000
--- a/3rdparty/core-r22.12/src/numa_utils.h
+++ /dev/null
@@ -1,57 +0,0 @@
-// Copyright 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include <map>
-#include <thread>
-#include <vector>
-#include "status.h"
-#include "triton/common/model_config.h"
-#include "tritonserver_apis.h"
-
-namespace triton { namespace core {
-
-// Helper function to set memory policy and thread affinity on current thread
-Status SetNumaConfigOnThread(
-    const triton::common::HostPolicyCmdlineConfig& host_policy);
-
-// Restrict the memory allocation to specific NUMA node.
-Status SetNumaMemoryPolicy(
-    const triton::common::HostPolicyCmdlineConfig& host_policy);
-
-// Retrieve the node mask used to set memory policy for the current thread
-Status GetNumaMemoryPolicyNodeMask(unsigned long* node_mask);
-
-// Reset the memory allocation setting.
-Status ResetNumaMemoryPolicy();
-
-// Set a thread affinity to be on specific cpus.
-Status SetNumaThreadAffinity(
-    std::thread::native_handle_type thread,
-    const triton::common::HostPolicyCmdlineConfig& host_policy);
-
-
-}}  // namespace triton::core
diff --git a/3rdparty/core-r22.12/src/payload.cc b/3rdparty/core-r22.12/src/payload.cc
deleted file mode 100644
index c5c2fa26b408eca5bf4ac3b75003171b792f47e7..0000000000000000000000000000000000000000
--- a/3rdparty/core-r22.12/src/payload.cc
+++ /dev/null
@@ -1,215 +0,0 @@
-// Copyright 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "payload.h"
-
-namespace triton { namespace core {
-
-Payload::Payload()
-    : op_type_(Operation::INFER_RUN),
-      requests_(std::vector<std::unique_ptr<InferenceRequest>>()),
-      OnCallback_([]() {}), instance_(nullptr), state_(State::UNINITIALIZED),
-      batcher_start_ns_(0), saturated_(false)
-{
-  exec_mu_.reset(new std::mutex());
-}
-
-const Status&
-Payload::MergePayload(std::shared_ptr<Payload>& payload)
-{
-  if ((payload->GetOpType() != Operation::INFER_RUN) ||
-      (op_type_ != Operation::INFER_RUN)) {
-    static Status op_type_error(
-        Status::Code::INTERNAL,
-        "Attempted to merge payloads of type that are not INFER_RUN");
-    return op_type_error;
-  }
-  if (payload->GetInstance() != instance_) {
-    static Status instance_error(
-        Status::Code::INTERNAL,
-        "Attempted to merge payloads of mismatching instance");
-    return instance_error;
-  }
-  if ((payload->GetState() != State::EXECUTING) ||
-      (state_ != State::EXECUTING)) {
-    static Status state_error(
-        Status::Code::INTERNAL,
-        "Attempted to merge payloads that are not in executing state");
-    return state_error;
-  }
-
-  // Skip comparison if not initialized (required), here assume either all
-  // payloads are initialized or otherwise.
-  if (required_equal_inputs_.Initialized() &&
-      !required_equal_inputs_.HasEqualInputs(*payload->Requests().begin())) {
-    static Status shape_error(
-        Status::Code::INVALID_ARG,
-        "Attempted to merge payloads that has non-equal inputs");
-    return shape_error;
-  }
-
-  requests_.insert(
-      requests_.end(), std::make_move_iterator(payload->Requests().begin()),
-      std::make_move_iterator(payload->Requests().end()));
-
-  payload->Callback();
-
-  return Status::Success;
-}
-
-void
-Payload::Reset(const Operation op_type, TritonModelInstance* instance)
-{
-  op_type_ = op_type;
-  requests_.clear();
-  OnCallback_ = []() {};
-  release_callbacks_.clear();
-  instance_ = instance;
-  state_ = State::UNINITIALIZED;
-  status_.reset(new std::promise<Status>());
-  required_equal_inputs_ = RequiredEqualInputs();
-  batcher_start_ns_ = 0;
-  saturated_ = false;
-}
-
-void
-Payload::Release()
-{
-  op_type_ = Operation::INFER_RUN;
-  requests_.clear();
-  OnCallback_ = []() {};
-  release_callbacks_.clear();
-  instance_ = nullptr;
-  state_ = State::RELEASED;
-  required_equal_inputs_ = RequiredEqualInputs();
-  batcher_start_ns_ = 0;
-  saturated_ = false;
-}
-
-size_t
-Payload::BatchSize()
-{
-  size_t batch_size = 0;
-  for (const auto& request : requests_) {
-    batch_size += std::max(1U, request->BatchSize());
-  }
-  return batch_size;
-}
-
-void
-Payload::ReserveRequests(size_t size)
-{
-  requests_.reserve(size);
-}
-
-void
-Payload::AddRequest(std::unique_ptr<InferenceRequest> request)
-{
-  if ((batcher_start_ns_ == 0) ||
-      (batcher_start_ns_ > request->BatcherStartNs())) {
-    batcher_start_ns_ = request->BatcherStartNs();
-  }
-  requests_.push_back(std::move(request));
-}
-
-void
-Payload::SetCallback(std::function<void()> OnCallback)
-{
-  OnCallback_ = OnCallback;
-}
-
-void
-Payload::SetInstance(TritonModelInstance* model_instance)
-{
-  instance_ = model_instance;
-}
-
-void
-Payload::AddInternalReleaseCallback(std::function<void()>&& callback)
-{
-  release_callbacks_.emplace_back(std::move(callback));
-}
-
-void
-Payload::MarkSaturated()
-{
-  saturated_ = true;
-}
-
-void
-Payload::SetState(Payload::State state)
-{
-  state_ = state;
-}
-
-Status
-Payload::Wait()
-{
-  return status_->get_future().get();
-}
-
-void
-Payload::Callback()
-{
-  OnCallback_();
-}
-
-void
-Payload::OnRelease()
-{
-  // Invoke the release callbacks added internally before releasing the
-  // request to user provided callback.
-  for (auto it = release_callbacks_.rbegin(); it != release_callbacks_.rend();
-       it++) {
-    (*it)();
-  }
-  release_callbacks_.clear();
-}
-
-void
-Payload::Execute(bool* should_exit)
-{
-  *should_exit = false;
-
-  Status status;
-  switch (op_type_) {
-    case Operation::INFER_RUN:
-      instance_->Schedule(std::move(requests_), OnCallback_);
-      break;
-    case Operation::INIT:
-      status = instance_->Initialize();
-      break;
-    case Operation::WARM_UP:
-      status = instance_->WarmUp();
-      break;
-    case Operation::EXIT:
-      *should_exit = true;
-  }
-
-  status_->set_value(status);
-}
-
-}}  // namespace triton::core
diff --git a/3rdparty/core-r22.12/src/payload.h b/3rdparty/core-r22.12/src/payload.h
deleted file mode 100644
index 1650917ae20053b457dad06bd15c5dd2a965b6d5..0000000000000000000000000000000000000000
--- a/3rdparty/core-r22.12/src/payload.h
+++ /dev/null
@@ -1,102 +0,0 @@
-// Copyright 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include <functional>
-#include <future>
-#include <memory>
-#include <mutex>
-#include <queue>
-#include <vector>
-
-#include "backend_model_instance.h"
-#include "infer_request.h"
-#include "scheduler_utils.h"
-#include "status.h"
-
-namespace triton { namespace core {
-
-class Payload {
- public:
-  enum Operation { INFER_RUN = 0, INIT = 1, WARM_UP = 2, EXIT = 3 };
-  enum State {
-    UNINITIALIZED = 0,
-    READY = 1,
-    REQUESTED = 2,
-    SCHEDULED = 3,
-    EXECUTING = 4,
-    RELEASED = 5
-  };
-
-  Payload();
-  void Reset(const Operation op_type, TritonModelInstance* instance = nullptr);
-  const Status& MergePayload(std::shared_ptr<Payload>& payload);
-  Operation GetOpType() { return op_type_; }
-  std::mutex* GetExecMutex() { return exec_mu_.get(); }
-  size_t RequestCount() { return requests_.size(); }
-  size_t BatchSize();
-  void ReserveRequests(size_t size);
-  void AddRequest(std::unique_ptr<InferenceRequest> request);
-  std::vector<std::unique_ptr<InferenceRequest>>& Requests()
-  {
-    return requests_;
-  }
-  uint64_t BatcherStartNs() { return batcher_start_ns_; }
-  void SetCallback(std::function<void()> OnCallback);
-  void Callback();
-  void AddInternalReleaseCallback(std::function<void()>&& callback);
-  void OnRelease();
-  void SetInstance(TritonModelInstance* model_instance);
-  TritonModelInstance* GetInstance() { return instance_; }
-  void MarkSaturated();
-  bool IsSaturated() { return saturated_; }
-  RequiredEqualInputs* MutableRequiredEqualInputs()
-  {
-    return &required_equal_inputs_;
-  }
-
-  State GetState() { return state_; }
-  void SetState(State state);
-  void Execute(bool* should_exit);
-  Status Wait();
-  void Release();
-
- private:
-  Operation op_type_;
-  std::vector<std::unique_ptr<InferenceRequest>> requests_;
-  std::function<void()> OnCallback_;
-  std::vector<std::function<void()>> release_callbacks_;
-  TritonModelInstance* instance_;
-  State state_;
-  std::unique_ptr<std::promise<Status>> status_;
-  std::unique_ptr<std::mutex> exec_mu_;
-  uint64_t batcher_start_ns_;
-  RequiredEqualInputs required_equal_inputs_;
-
-  bool saturated_;
-};
-
-}}  // namespace triton::core
diff --git a/3rdparty/core-r22.12/src/pinned_memory_manager.cc b/3rdparty/core-r22.12/src/pinned_memory_manager.cc
deleted file mode 100644
index 4b4ffd42207ccf22a6f8da30f7785832f378a26b..0000000000000000000000000000000000000000
--- a/3rdparty/core-r22.12/src/pinned_memory_manager.cc
+++ /dev/null
@@ -1,378 +0,0 @@
-// Copyright 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-
-#include "pinned_memory_manager.h"
-
-#include <sstream>
-#include "numa_utils.h"
-#include "triton/common/logging.h"
-
-#ifdef TRITON_ENABLE_GPU
-#include <cuda_runtime_api.h>
-#endif  // TRITON_ENABLE_GPU
-
-namespace triton { namespace core {
-
-namespace {
-
-std::string
-PointerToString(void* ptr)
-{
-  std::stringstream ss;
-  ss << ptr;
-  return ss.str();
-}
-
-Status
-ParseIntOption(const std::string& msg, const std::string& arg, int* value)
-{
-  try {
-    *value = std::stoi(arg);
-  }
-  catch (const std::invalid_argument& ia) {
-    return Status(
-        Status::Code::INVALID_ARG,
-        msg + ": Can't parse '" + arg + "' to integer");
-  }
-  return Status::Success;
-}
-
-}  // namespace
-
-std::unique_ptr<PinnedMemoryManager> PinnedMemoryManager::instance_;
-uint64_t PinnedMemoryManager::pinned_memory_byte_size_;
-
-PinnedMemoryManager::PinnedMemory::PinnedMemory(
-    void* pinned_memory_buffer, uint64_t size)
-    : pinned_memory_buffer_(pinned_memory_buffer)
-{
-  if (pinned_memory_buffer_ != nullptr) {
-    managed_pinned_memory_ = boost::interprocess::managed_external_buffer(
-        boost::interprocess::create_only_t{}, pinned_memory_buffer_, size);
-  }
-}
-
-
-PinnedMemoryManager::PinnedMemory::~PinnedMemory()
-{
-#ifdef TRITON_ENABLE_GPU
-  if (pinned_memory_buffer_ != nullptr) {
-    cudaFreeHost(pinned_memory_buffer_);
-  }
-#endif  // TRITON_ENABLE_GPU
-}
-
-PinnedMemoryManager::~PinnedMemoryManager()
-{
-  // Clean up
-  for (const auto& memory_info : memory_info_) {
-    const auto& is_pinned = memory_info.second.first;
-    if (!is_pinned) {
-      free(memory_info.first);
-    }
-  }
-}
-
-void
-PinnedMemoryManager::AddPinnedMemoryBuffer(
-    const std::shared_ptr<PinnedMemory>& pinned_memory_buffer,
-    unsigned long node_mask)
-{
-  pinned_memory_buffers_[node_mask] = pinned_memory_buffer;
-}
-
-Status
-PinnedMemoryManager::AllocInternal(
-    void** ptr, uint64_t size, TRITONSERVER_MemoryType* allocated_type,
-    bool allow_nonpinned_fallback, PinnedMemory* pinned_memory_buffer)
-{
-  auto status = Status::Success;
-  if (pinned_memory_buffer->pinned_memory_buffer_ != nullptr) {
-    std::lock_guard<std::mutex> lk(pinned_memory_buffer->buffer_mtx_);
-    *ptr = pinned_memory_buffer->managed_pinned_memory_.allocate(
-        size, std::nothrow_t{});
-    *allocated_type = TRITONSERVER_MEMORY_CPU_PINNED;
-    if (*ptr == nullptr) {
-      status = Status(
-          Status::Code::INTERNAL, "failed to allocate pinned system memory");
-    }
-  } else {
-    status = Status(
-        Status::Code::INTERNAL,
-        "failed to allocate pinned system memory: no pinned memory pool");
-  }
-
-  bool is_pinned = true;
-  if ((!status.IsOk()) && allow_nonpinned_fallback) {
-    static bool warning_logged = false;
-    if (!warning_logged) {
-      LOG_WARNING << status.Message()
-                  << ", falling back to non-pinned system memory";
-      warning_logged = true;
-    }
-    *ptr = malloc(size);
-    *allocated_type = TRITONSERVER_MEMORY_CPU;
-    is_pinned = false;
-    if (*ptr == nullptr) {
-      status = Status(
-          Status::Code::INTERNAL,
-          "failed to allocate non-pinned system memory");
-    } else {
-      status = Status::Success;
-    }
-  }
-
-  // keep track of allocated buffer or clean up
-  {
-    std::lock_guard<std::mutex> lk(info_mtx_);
-    if (status.IsOk()) {
-      auto res = memory_info_.emplace(
-          *ptr, std::make_pair(is_pinned, pinned_memory_buffer));
-      if (!res.second) {
-        status = Status(
-            Status::Code::INTERNAL, "unexpected memory address collision, '" +
-                                        PointerToString(*ptr) +
-                                        "' has been managed");
-      }
-      LOG_VERBOSE(1) << (is_pinned ? "" : "non-")
-                     << "pinned memory allocation: "
-                     << "size " << size << ", addr " << *ptr;
-    }
-  }
-
-  if ((!status.IsOk()) && (*ptr != nullptr)) {
-    if (is_pinned) {
-      std::lock_guard<std::mutex> lk(pinned_memory_buffer->buffer_mtx_);
-      pinned_memory_buffer->managed_pinned_memory_.deallocate(*ptr);
-    } else {
-      free(*ptr);
-    }
-  }
-
-  return status;
-}
-
-Status
-PinnedMemoryManager::FreeInternal(void* ptr)
-{
-  bool is_pinned = true;
-  PinnedMemory* pinned_memory_buffer = nullptr;
-  {
-    std::lock_guard<std::mutex> lk(info_mtx_);
-    auto it = memory_info_.find(ptr);
-    if (it != memory_info_.end()) {
-      is_pinned = it->second.first;
-      pinned_memory_buffer = it->second.second;
-      LOG_VERBOSE(1) << (is_pinned ? "" : "non-")
-                     << "pinned memory deallocation: "
-                     << "addr " << ptr;
-      memory_info_.erase(it);
-    } else {
-      return Status(
-          Status::Code::INTERNAL, "unexpected memory address '" +
-                                      PointerToString(ptr) +
-                                      "' is not being managed");
-    }
-  }
-
-  if (is_pinned) {
-    std::lock_guard<std::mutex> lk(pinned_memory_buffer->buffer_mtx_);
-    pinned_memory_buffer->managed_pinned_memory_.deallocate(ptr);
-  } else {
-    free(ptr);
-  }
-  return Status::Success;
-}
-
-void
-PinnedMemoryManager::Reset()
-{
-  instance_.reset();
-}
-
-Status
-PinnedMemoryManager::Create(const Options& options)
-{
-  if (instance_ != nullptr) {
-    LOG_WARNING << "New pinned memory pool of size "
-                << options.pinned_memory_pool_byte_size_
-                << " could not be created since one already exists"
-                << " of size " << pinned_memory_byte_size_;
-    return Status::Success;
-  }
-
-  instance_.reset(new PinnedMemoryManager());
-  if (options.host_policy_map_.empty()) {
-    void* buffer = nullptr;
-#ifdef TRITON_ENABLE_GPU
-    auto err = cudaHostAlloc(
-        &buffer, options.pinned_memory_pool_byte_size_, cudaHostAllocPortable);
-    if (err != cudaSuccess) {
-      buffer = nullptr;
-      LOG_WARNING << "Unable to allocate pinned system memory, pinned memory "
-                     "pool will not be available: "
-                  << std::string(cudaGetErrorString(err));
-    } else if (options.pinned_memory_pool_byte_size_ != 0) {
-      LOG_INFO << "Pinned memory pool is created at '"
-               << PointerToString(buffer) << "' with size "
-               << options.pinned_memory_pool_byte_size_;
-    } else {
-      LOG_INFO << "Pinned memory pool disabled";
-    }
-#endif  // TRITON_ENABLE_GPU
-    try {
-      instance_->AddPinnedMemoryBuffer(
-          std::shared_ptr<PinnedMemory>(
-              new PinnedMemory(buffer, options.pinned_memory_pool_byte_size_)),
-          0);
-    }
-    catch (const std::exception& ex) {
-      return Status(
-          Status::Code::INTERNAL,
-          "Failed to add Pinned Memory buffer: " + std::string(ex.what()));
-    }
-  } else {
-    // Create only one buffer / manager should be created for one node,
-    // and all associated devices should request memory from the shared manager
-    std::map<int32_t, std::string> numa_map;
-    for (const auto host_policy : options.host_policy_map_) {
-      const auto numa_it = host_policy.second.find("numa-node");
-      if (numa_it != host_policy.second.end()) {
-        int32_t numa_id;
-        if (ParseIntOption("Parsing NUMA node", numa_it->second, &numa_id)
-                .IsOk()) {
-          numa_map.emplace(numa_id, host_policy.first);
-        }
-      }
-    }
-    for (const auto node_policy : numa_map) {
-      auto status =
-          SetNumaMemoryPolicy(options.host_policy_map_.at(node_policy.second));
-      if (!status.IsOk()) {
-        LOG_WARNING << "Unable to allocate pinned system memory for NUMA node "
-                    << node_policy.first << ": " << status.AsString();
-        continue;
-      }
-      unsigned long node_mask;
-      status = GetNumaMemoryPolicyNodeMask(&node_mask);
-      if (!status.IsOk()) {
-        LOG_WARNING << "Unable to get NUMA node set for current thread: "
-                    << status.AsString();
-        continue;
-      }
-      void* buffer = nullptr;
-#ifdef TRITON_ENABLE_GPU
-      auto err = cudaHostAlloc(
-          &buffer, options.pinned_memory_pool_byte_size_,
-          cudaHostAllocPortable);
-      if (err != cudaSuccess) {
-        buffer = nullptr;
-        LOG_WARNING << "Unable to allocate pinned system memory, pinned memory "
-                       "pool will not be available: "
-                    << std::string(cudaGetErrorString(err));
-      } else if (options.pinned_memory_pool_byte_size_ != 0) {
-        LOG_INFO << "Pinned memory pool is created at '"
-                 << PointerToString(buffer) << "' with size "
-                 << options.pinned_memory_pool_byte_size_;
-      } else {
-        LOG_INFO << "Pinned memory pool disabled";
-      }
-#endif  // TRITON_ENABLE_GPU
-      ResetNumaMemoryPolicy();
-      try {
-        instance_->AddPinnedMemoryBuffer(
-            std::shared_ptr<PinnedMemory>(new PinnedMemory(
-                buffer, options.pinned_memory_pool_byte_size_)),
-            node_mask);
-      }
-      catch (const std::exception& ex) {
-        return Status(
-            Status::Code::INTERNAL,
-            "Failed to add Pinned Memory buffer with host policy: " +
-                std::string(ex.what()));
-      }
-    }
-    // If no pinned memory is allocated, add an empty entry where all allocation
-    // will be on normal system memory
-    if (instance_->pinned_memory_buffers_.empty()) {
-      try {
-        instance_->AddPinnedMemoryBuffer(
-            std::shared_ptr<PinnedMemory>(new PinnedMemory(
-                nullptr, options.pinned_memory_pool_byte_size_)),
-            0);
-      }
-      catch (const std::exception& ex) {
-        return Status(
-            Status::Code::INTERNAL,
-            "Failed to add empty Pinned Memory entry: " +
-                std::string(ex.what()));
-      }
-    }
-  }
-  pinned_memory_byte_size_ = options.pinned_memory_pool_byte_size_;
-  return Status::Success;
-}
-
-Status
-PinnedMemoryManager::Alloc(
-    void** ptr, uint64_t size, TRITONSERVER_MemoryType* allocated_type,
-    bool allow_nonpinned_fallback)
-{
-  if (instance_ == nullptr) {
-    return Status(
-        Status::Code::UNAVAILABLE, "PinnedMemoryManager has not been created");
-  }
-
-  auto pinned_memory_buffer =
-      instance_->pinned_memory_buffers_.begin()->second.get();
-  if (instance_->pinned_memory_buffers_.size() > 1) {
-    unsigned long node_mask;
-    if (GetNumaMemoryPolicyNodeMask(&node_mask).IsOk()) {
-      auto it = instance_->pinned_memory_buffers_.find(node_mask);
-      if (it != instance_->pinned_memory_buffers_.end()) {
-        pinned_memory_buffer = it->second.get();
-      }
-    }
-  }
-
-  return instance_->AllocInternal(
-      ptr, size, allocated_type, allow_nonpinned_fallback,
-      pinned_memory_buffer);
-}
-
-Status
-PinnedMemoryManager::Free(void* ptr)
-{
-  if (instance_ == nullptr) {
-    return Status(
-        Status::Code::UNAVAILABLE, "PinnedMemoryManager has not been created");
-  }
-
-  return instance_->FreeInternal(ptr);
-}
-
-}}  // namespace triton::core
diff --git a/3rdparty/core-r22.12/src/pinned_memory_manager.h b/3rdparty/core-r22.12/src/pinned_memory_manager.h
deleted file mode 100644
index 1236f06b4f73d41c4bdba9e0a9ff10118a106344..0000000000000000000000000000000000000000
--- a/3rdparty/core-r22.12/src/pinned_memory_manager.h
+++ /dev/null
@@ -1,108 +0,0 @@
-// Copyright 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-#pragma once
-
-#include <boost/interprocess/managed_external_buffer.hpp>
-#include <map>
-#include <memory>
-#include <mutex>
-#include "status.h"
-#include "triton/common/model_config.h"
-
-namespace triton { namespace core {
-
-// This is a singleton class responsible for maintaining pinned memory pool
-// used by the inference server. Pinned memory allocations and deallocations
-// must be requested via functions provided by this class.
-class PinnedMemoryManager {
- public:
-  // Options to configure pinned memeory manager.
-  struct Options {
-    Options(
-        uint64_t b = 0,
-        const triton::common::HostPolicyCmdlineConfigMap& host_policy_map = {})
-        : pinned_memory_pool_byte_size_(b), host_policy_map_(host_policy_map)
-    {
-    }
-
-    uint64_t pinned_memory_pool_byte_size_;
-    triton::common::HostPolicyCmdlineConfigMap host_policy_map_;
-  };
-
-  ~PinnedMemoryManager();
-
-  // Create the pinned memory manager based on 'options' specified.
-  // Return Status object indicating success or failure.
-  static Status Create(const Options& options);
-
-  // Allocate pinned memory with the requested 'size' and return the pointer
-  // in 'ptr'. If 'allow_nonpinned_fallback' is true, regular system memory
-  // will be allocated as fallback in the case where pinned memory fails to
-  // be allocated.
-  // Return Status object indicating success or failure.
-  static Status Alloc(
-      void** ptr, uint64_t size, TRITONSERVER_MemoryType* allocated_type,
-      bool allow_nonpinned_fallback);
-
-  // Free the memory allocated by the pinned memory manager.
-  // Return Status object indicating success or failure.
-  static Status Free(void* ptr);
-
- protected:
-  // Provide explicit control on the lifecycle of the CUDA memory manager,
-  // for testing only.
-  static void Reset();
-
- private:
-  class PinnedMemory {
-   public:
-    PinnedMemory(void* pinned_memory_buffer, uint64_t size);
-    ~PinnedMemory();
-    void* pinned_memory_buffer_;
-    std::mutex buffer_mtx_;
-    boost::interprocess::managed_external_buffer managed_pinned_memory_;
-  };
-
-  PinnedMemoryManager() = default;
-
-  Status AllocInternal(
-      void** ptr, uint64_t size, TRITONSERVER_MemoryType* allocated_type,
-      bool allow_nonpinned_fallback, PinnedMemory* pinned_memory_buffer);
-  Status FreeInternal(void* ptr);
-  void AddPinnedMemoryBuffer(
-      const std::shared_ptr<PinnedMemory>& pinned_memory_buffer,
-      unsigned long node_mask);
-
-  static std::unique_ptr<PinnedMemoryManager> instance_;
-  static uint64_t pinned_memory_byte_size_;
-
-  std::mutex info_mtx_;
-  std::map<void*, std::pair<bool, PinnedMemory*>> memory_info_;
-  std::map<unsigned long, std::shared_ptr<PinnedMemory>> pinned_memory_buffers_;
-};
-
-}}  // namespace triton::core
diff --git a/3rdparty/core-r22.12/src/rate_limiter.cc b/3rdparty/core-r22.12/src/rate_limiter.cc
deleted file mode 100644
index 8052281332f061d1e06476b31e83dac17965019a..0000000000000000000000000000000000000000
--- a/3rdparty/core-r22.12/src/rate_limiter.cc
+++ /dev/null
@@ -1,943 +0,0 @@
-// Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "rate_limiter.h"
-
-#include <limits>
-#include "triton/common/logging.h"
-
-namespace triton { namespace core {
-
-constexpr size_t MAX_PAYLOAD_BUCKET_COUNT = 1000;
-
-//=========================================================================
-//  Core Implementation
-//=========================================================================
-
-Status
-RateLimiter::Create(
-    const bool ignore_resources_and_priority,
-    const RateLimiter::ResourceMap& resource_map,
-    std::unique_ptr<RateLimiter>* rate_limiter)
-{
-  std::unique_ptr<RateLimiter> local_rate_limiter(
-      new RateLimiter(ignore_resources_and_priority, resource_map));
-  *rate_limiter = std::move(local_rate_limiter);
-
-  return Status::Success;
-}
-
-Status
-RateLimiter::RegisterModelInstance(
-    TritonModelInstance* triton_model_instance,
-    const RateLimiterConfig& rate_limiter_config)
-{
-  {
-    std::lock_guard<std::mutex> lk1(model_ctx_mtx_);
-    std::lock_guard<std::mutex> lk2(model_instance_ctx_mtx_);
-
-    auto& model_context = model_contexts_[triton_model_instance->Model()];
-    auto& model_instances =
-        model_instance_ctxs_[triton_model_instance->Model()];
-
-    model_instances.push_back(
-        std::shared_ptr<ModelInstanceContext>(new ModelInstanceContext(
-            triton_model_instance, &model_context, rate_limiter_config,
-            [this](ModelInstanceContext* instance) { OnStage(instance); },
-            [this](ModelInstanceContext* instance) { OnRelease(instance); })));
-    model_context.AddAvailableInstance(model_instances.back().get());
-    model_context.AddSpecificRequestQueue();
-
-    if (!ignore_resources_and_priority_) {
-      resource_manager_->AddModelInstance(model_instances.back().get());
-      RETURN_IF_ERROR(resource_manager_->UpdateResourceLimits());
-    }
-  }
-
-  InitializePayloadQueues(triton_model_instance);
-
-  return Status::Success;
-}
-
-Status
-RateLimiter::UnregisterModel(const TritonModel* model)
-{
-  {
-    std::lock_guard<std::mutex> lk1(model_ctx_mtx_);
-    std::lock_guard<std::mutex> lk2(model_instance_ctx_mtx_);
-
-    auto& model_context = model_contexts_[model];
-
-    model_context.RequestRemoval();
-    for (const auto& instance : model_instance_ctxs_[model]) {
-      instance->WaitForRemoval();
-      if (!ignore_resources_and_priority_) {
-        resource_manager_->RemoveModelInstance(instance.get());
-      }
-    }
-
-    model_instance_ctxs_.erase(model);
-    model_contexts_.erase(model);
-  }
-
-  if (!ignore_resources_and_priority_) {
-    RETURN_IF_ERROR(resource_manager_->UpdateResourceLimits());
-  }
-
-  {
-    std::lock_guard<std::mutex> lk(payload_queues_mu_);
-    if (payload_queues_.find(model) != payload_queues_.end()) {
-      payload_queues_.erase(model);
-    }
-  }
-
-  return Status::Success;
-}
-
-bool
-RateLimiter::PayloadSlotAvailable(const TritonModel* model)
-{
-  bool result;
-  PayloadQueue* payload_queue = payload_queues_[model].get();
-  {
-    std::lock_guard<std::mutex> lk(payload_queue->mu_);
-    result = payload_queue->queue_->Size() <
-             2 * payload_queue->specific_queues_.size();
-  }
-  return result;
-}
-
-Status
-RateLimiter::EnqueuePayload(
-    const TritonModel* model, std::shared_ptr<Payload> payload)
-{
-  auto pinstance = payload->GetInstance();
-  if (payload_queues_.find(model) == payload_queues_.end()) {
-    LOG_INFO << "Should not print this ";
-  }
-  PayloadQueue* payload_queue = payload_queues_[model].get();
-  {
-    std::lock_guard<std::mutex> lk(payload_queue->mu_);
-    payload->SetState(Payload::State::REQUESTED);
-    if (ignore_resources_and_priority_) {
-      SchedulePayload(pinstance, payload_queue, payload);
-    }
-  }
-  if (ignore_resources_and_priority_) {
-    if (pinstance == nullptr) {
-      payload_queue->cv_.notify_one();
-    } else {
-      payload_queue->cv_.notify_all();
-    }
-  } else {
-    StandardScheduleFunc sched_func = [this, payload_queue,
-                                       payload](ModelInstanceContext* mi) {
-      {
-        std::lock_guard<std::mutex> lk(payload_queue->mu_);
-        this->SchedulePayload(mi->RawInstance(), payload_queue, payload);
-      }
-      auto cb = [mi]() { mi->Release(); };
-      payload->AddInternalReleaseCallback(cb);
-      if (mi->RawInstance() == nullptr) {
-        payload_queue->cv_.notify_one();
-      } else {
-        payload_queue->cv_.notify_all();
-      }
-    };
-    DeferPayloadSchedule(sched_func, model, payload->GetInstance());
-  }
-  return Status::Success;
-}
-
-void
-RateLimiter::DequeuePayload(
-    std::deque<TritonModelInstance*>& instances,
-    std::shared_ptr<Payload>* payload)
-{
-  payload->reset();
-  if (payload_queues_.find(instances[0]->Model()) == payload_queues_.end()) {
-    LOG_INFO << "Should not print this ";
-  }
-  PayloadQueue* payload_queue = payload_queues_[instances[0]->Model()].get();
-  std::vector<std::shared_ptr<Payload>> merged_payloads;
-  size_t instance_index = std::numeric_limits<std::size_t>::max();
-  {
-    std::unique_lock<std::mutex> lk(payload_queue->mu_);
-    payload_queue->cv_.wait(lk, [&instances, &instance_index, payload_queue]() {
-      bool empty = payload_queue->queue_->Empty();
-      if (empty) {
-        instance_index = 0;
-        for (const auto instance : instances) {
-          empty = payload_queue->specific_queues_[instance]->Empty();
-          if (empty) {
-            instance_index++;
-          } else {
-            break;
-          }
-        }
-      }
-      return !empty;
-    });
-    if (instance_index < instances.size()) {
-      TritonModelInstance* instance = instances[instance_index];
-      if (!payload_queue->specific_queues_[instance]->Empty()) {
-        payload_queue->specific_queues_[instance]->Dequeue(
-            payload, &merged_payloads);
-      }
-    } else {
-      payload_queue->queue_->Dequeue(payload, &merged_payloads);
-    }
-  }
-  for (auto& merge_payload : merged_payloads) {
-    PayloadRelease(merge_payload);
-  }
-  (*payload)->Callback();
-  if ((*payload)->GetInstance() == nullptr) {
-    (*payload)->SetInstance(instances.front());
-    instances.pop_front();
-  } else {
-    instances.erase(instances.begin() + instance_index);
-  }
-}
-
-std::shared_ptr<Payload>
-RateLimiter::GetPayload(
-    const Payload::Operation op_type, TritonModelInstance* instance)
-{
-  std::shared_ptr<Payload> payload;
-
-  if (max_payload_bucket_count_ > 0) {
-    std::lock_guard<std::mutex> lock(payload_mu_);
-
-    if (!payload_bucket_.empty()) {
-      payload = payload_bucket_.back();
-      payload_bucket_.pop_back();
-    }
-    if (payload.get() == nullptr && (!payloads_in_use_.empty())) {
-      // Just checking the front of the queue instead the entire queue for
-      // an available payload to save time.
-      if (payloads_in_use_.front().use_count() == 1) {
-        payload = payloads_in_use_.front();
-        payloads_in_use_.pop_front();
-      }
-    }
-  }
-
-  if (payload.get() == nullptr) {
-    payload.reset(new Payload());
-  }
-
-  payload->Reset(op_type, instance);
-  return payload;
-}
-
-void
-RateLimiter::PayloadRelease(std::shared_ptr<Payload>& payload)
-{
-  payload->OnRelease();
-  if (max_payload_bucket_count_ > 0) {
-    std::lock_guard<std::mutex> lock(payload_mu_);
-
-    if (payloads_in_use_.size() + payload_bucket_.size() <
-        max_payload_bucket_count_) {
-      // Release iff the payload shared_ptr is uniquely held.
-      if (payload.use_count() == 1) {
-        payload->Release();
-        payload_bucket_.push_back(std::move(payload));
-        return;
-      } else {
-        payloads_in_use_.push_back(std::move(payload));
-      }
-    }
-  }
-}
-
-RateLimiter::RateLimiter(
-    const bool ignore_resources_and_priority, const ResourceMap& resource_map)
-    : ignore_resources_and_priority_(ignore_resources_and_priority),
-      max_payload_bucket_count_(MAX_PAYLOAD_BUCKET_COUNT)
-{
-  ResourceManager::Create(resource_map, &resource_manager_);
-}
-
-void
-RateLimiter::InitializePayloadQueues(const TritonModelInstance* instance)
-{
-  auto& config = instance->Model()->Config();
-  uint64_t max_queue_delay_microseconds;
-  if (config.has_sequence_batching()) {
-    const auto& batcher_config = config.sequence_batching();
-    if (batcher_config.has_oldest()) {
-      max_queue_delay_microseconds =
-          batcher_config.oldest().max_queue_delay_microseconds();
-    } else {
-      max_queue_delay_microseconds = 0;
-    }
-  } else if (config.has_dynamic_batching()) {
-    max_queue_delay_microseconds =
-        config.dynamic_batching().max_queue_delay_microseconds();
-  } else {
-    max_queue_delay_microseconds = 0;
-  }
-  {
-    std::lock_guard<std::mutex> lk(payload_queues_mu_);
-    if (payload_queues_.find(instance->Model()) == payload_queues_.end()) {
-      payload_queues_.emplace(
-          instance->Model(),
-          new PayloadQueue(
-              config.max_batch_size(), max_queue_delay_microseconds * 1000));
-    }
-  }
-  PayloadQueue* payload_queue = payload_queues_[instance->Model()].get();
-  if (payload_queue->specific_queues_.find(instance) ==
-      payload_queue->specific_queues_.end()) {
-    payload_queue->specific_queues_.emplace(
-        instance,
-        new InstanceQueue(
-            config.max_batch_size(), max_queue_delay_microseconds * 1000));
-  }
-}
-
-Status
-RateLimiter::DeferPayloadSchedule(
-    const StandardScheduleFunc& OnSchedule, const TritonModel* model,
-    TritonModelInstance* triton_model_instance)
-{
-  std::lock_guard<std::mutex> lk(model_ctx_mtx_);
-
-  auto itr = model_contexts_.find(model);
-  if (itr == model_contexts_.end()) {
-    return Status(
-        Status::Code::INTERNAL,
-        "Requested model is not yet registered with rate limiter");
-  }
-
-  if (itr->second.isRemovalInProgress()) {
-    return Status(
-        Status::Code::INTERNAL,
-        "New model requests can not be made to a model that is being "
-        "removed");
-  }
-
-  itr->second.EnqueueModelInstanceRequest(OnSchedule, triton_model_instance);
-  itr->second.StageInstanceIfAvailable(triton_model_instance);
-
-  return Status::Success;
-}
-
-void
-RateLimiter::SchedulePayload(
-    TritonModelInstance* tmi, PayloadQueue* payload_queue,
-    const std::shared_ptr<Payload>& payload)
-{
-  if (tmi == nullptr) {
-    payload_queue->queue_->Enqueue(payload);
-  } else {
-    payload_queue->specific_queues_[tmi]->Enqueue(payload);
-  }
-  payload->SetState(Payload::State::SCHEDULED);
-}
-
-void
-RateLimiter::OnStage(ModelInstanceContext* instance)
-{
-  {
-    std::lock_guard<std::recursive_mutex> lk(staged_instances_mtx_);
-    staged_instances_.push(instance);
-  }
-  AttemptAllocation();
-}
-
-void
-RateLimiter::OnRelease(ModelInstanceContext* instance)
-{
-  auto& model_context = model_contexts_[instance->RawInstance()->Model()];
-  model_context.AddAvailableInstance(instance);
-  resource_manager_->ReleaseResources(instance);
-  if (model_context.ContainsPendingRequests(instance->RawInstance()->Index())) {
-    model_context.StageInstanceIfAvailable(instance->RawInstance());
-  }
-  AttemptAllocation();
-}
-
-void
-RateLimiter::AttemptAllocation()
-{
-  std::lock_guard<std::recursive_mutex> lk(staged_instances_mtx_);
-  if (!staged_instances_.empty()) {
-    ModelInstanceContext* instance = staged_instances_.top();
-    if (resource_manager_->AllocateResources(instance)) {
-      staged_instances_.pop();
-      instance->Allocate();
-    }
-  }
-}
-
-//=========================================================================
-//  ModelContext Implementation
-//=========================================================================
-
-RateLimiter::ModelContext::ModelContext() : removal_in_progress_(false) {}
-
-Status
-RateLimiter::ModelContext::EnqueueModelInstanceRequest(
-    const StandardScheduleFunc& OnSchedule,
-    TritonModelInstance* triton_model_instance)
-{
-  std::lock_guard<std::recursive_mutex> lk(sched_request_queue_mtx_);
-
-  if (triton_model_instance == nullptr) {
-    generic_sched_request_queue_.push(OnSchedule);
-  } else if (
-      (uint32_t)triton_model_instance->Index() <
-      specific_sched_request_queues_.size()) {
-    specific_sched_request_queues_[triton_model_instance->Index()].push(
-        OnSchedule);
-  } else {
-    return Status(
-        Status::Code::INTERNAL,
-        "expected instance index between 0 and " +
-            std::to_string(specific_sched_request_queues_.size()) + ", got " +
-            std::to_string(triton_model_instance->Index()));
-  }
-
-  return Status::Success;
-}
-
-void
-RateLimiter::ModelContext::AddAvailableInstance(ModelInstanceContext* instance)
-{
-  std::lock_guard<std::recursive_mutex> lk(avbl_instances_mtx_);
-  avbl_instances_.push(instance);
-  instance->MarkAvailable();
-}
-
-
-void
-RateLimiter::ModelContext::StageInstanceIfAvailable(
-    TritonModelInstance* req_instance)
-{
-  std::lock_guard<std::recursive_mutex> lk1(sched_request_queue_mtx_);
-  std::lock_guard<std::recursive_mutex> lk2(avbl_instances_mtx_);
-  PriorityQueue backup_queue;
-
-  while (!avbl_instances_.empty()) {
-    ModelInstanceContext* instance = avbl_instances_.top();
-    if ((req_instance != nullptr) &&
-        (instance->RawInstance() != req_instance)) {
-      backup_queue.push(instance);
-      avbl_instances_.pop();
-      continue;
-    }
-    if (!specific_sched_request_queues_[instance->RawInstance()->Index()]
-             .empty()) {
-      // Prioritize the specific requests for the available model
-      // instance highest priority.
-      const StandardScheduleFunc func =
-          specific_sched_request_queues_[instance->RawInstance()->Index()]
-              .front();
-      specific_sched_request_queues_[instance->RawInstance()->Index()].pop();
-      instance->Stage(func);
-    } else if (!generic_sched_request_queue_.empty()) {
-      // If request is for generic model instance then use the
-      // instance with the highest priority.
-      const StandardScheduleFunc func = generic_sched_request_queue_.front();
-      generic_sched_request_queue_.pop();
-      instance->Stage(func);
-    } else {
-      // If there are requests for a specific model instance then backup
-      // the model instance and keep searching through the available
-      // model instances. The prioritization will be taken care of in the
-      // staging priority queue.
-      backup_queue.push(instance);
-    }
-    avbl_instances_.pop();
-  }
-  // Restore the backup queue
-  if (!backup_queue.empty()) {
-    avbl_instances_.swap(backup_queue);
-  }
-}
-
-void
-RateLimiter::ModelContext::AllocateInstanceIfAvailable()
-{
-  std::lock_guard<std::recursive_mutex> lk1(sched_request_queue_mtx_);
-  std::lock_guard<std::recursive_mutex> lk2(avbl_instances_mtx_);
-  PriorityQueue backup_queue;
-  while (!avbl_instances_.empty()) {
-    ModelInstanceContext* instance = avbl_instances_.top();
-    if (!specific_sched_request_queues_[instance->RawInstance()->Index()]
-             .empty()) {
-      // Prioritize the specific requests for the available model
-      // instance highest priority.
-      const StandardScheduleFunc func =
-          specific_sched_request_queues_[instance->RawInstance()->Index()]
-              .front();
-      specific_sched_request_queues_[instance->RawInstance()->Index()].pop();
-      instance->DirectAllocate(func);
-    } else if (!generic_sched_request_queue_.empty()) {
-      // If request is for generic model instance then use the
-      // instance with the highest priority.
-      const StandardScheduleFunc func = generic_sched_request_queue_.front();
-      generic_sched_request_queue_.pop();
-      instance->DirectAllocate(func);
-    } else {
-      // If there are requests for a specific model instance then backup
-      // the model instance and keep searching through the available
-      // model instances. The prioritization will be taken care of in the
-      // staging priority queue.
-      backup_queue.push(instance);
-    }
-    avbl_instances_.pop();
-  }
-  // Restore the backup queue
-  if (!backup_queue.empty()) {
-    avbl_instances_.swap(backup_queue);
-  }
-}
-
-void
-RateLimiter::ModelContext::AddSpecificRequestQueue()
-{
-  std::lock_guard<std::recursive_mutex> lk(sched_request_queue_mtx_);
-  specific_sched_request_queues_.emplace_back();
-}
-
-bool
-RateLimiter::ModelContext::ContainsPendingRequests(int index)
-{
-  std::lock_guard<std::recursive_mutex> lk(sched_request_queue_mtx_);
-  return (generic_sched_request_queue_.size() != 0) ||
-         (specific_sched_request_queues_[index].size() != 0);
-}
-
-void
-RateLimiter::ModelContext::RequestRemoval()
-{
-  removal_in_progress_ = true;
-}
-
-
-//=========================================================================
-//  ModelInstanceContext Implementation
-//=========================================================================
-
-RateLimiter::ModelInstanceContext::ModelInstanceContext(
-    TritonModelInstance* triton_model_instance,
-    RateLimiter::ModelContext* model_context,
-    const RateLimiter::RateLimiterConfig& rate_limiter_config,
-    RateLimiter::StandardStageFunc OnStage,
-    RateLimiter::StandardReleaseFunc OnRelease)
-    : triton_model_instance_(triton_model_instance),
-      index_(triton_model_instance->Index()), model_context_(model_context),
-      rate_limiter_config_(rate_limiter_config), OnStage_(OnStage),
-      OnRelease_(OnRelease), exec_count_(0), state_(AVAILABLE)
-{
-}
-
-void
-RateLimiter::ModelInstanceContext::MarkAvailable()
-{
-  std::lock_guard<std::mutex> lk(state_mtx_);
-  state_ = AVAILABLE;
-}
-
-Status
-RateLimiter::ModelInstanceContext::Stage(StandardScheduleFunc OnSchedule)
-{
-  {
-    std::lock_guard<std::mutex> lk(state_mtx_);
-
-    if (state_ != AVAILABLE) {
-      return Status(
-          Status::Code::INTERNAL,
-          "Can not stage a model instance that is not yet available");
-    }
-
-    state_ = STAGED;
-    OnSchedule_ = OnSchedule;
-  }
-
-  OnStage_(this);
-
-  return Status::Success;
-}
-
-Status
-RateLimiter::ModelInstanceContext::Allocate()
-{
-  {
-    std::lock_guard<std::mutex> lk(state_mtx_);
-
-    if (state_ != STAGED) {
-      return Status(
-          Status::Code::INTERNAL,
-          "Can not allocate a model instance that is not yet staged");
-    }
-
-    state_ = ALLOCATED;
-  }
-
-  OnSchedule_(this);
-
-  return Status::Success;
-}
-
-Status
-RateLimiter::ModelInstanceContext::DirectAllocate(
-    StandardScheduleFunc OnSchedule)
-{
-  {
-    std::lock_guard<std::mutex> lk(state_mtx_);
-
-    if (state_ != AVAILABLE) {
-      return Status(
-          Status::Code::INTERNAL,
-          "Can not allocate a model instance that is not yet available");
-    }
-
-    state_ = ALLOCATED;
-  }
-
-  OnSchedule(this);
-
-  return Status::Success;
-}
-
-void
-RateLimiter::ModelInstanceContext::Release()
-{
-  exec_count_++;
-
-  OnRelease_(this);
-
-  {
-    std::lock_guard<std::mutex> lk(state_mtx_);
-    if ((model_context_->isRemovalInProgress()) && (state_ == AVAILABLE) &&
-        (!model_context_->ContainsPendingRequests(index_))) {
-      state_ = REMOVED;
-    }
-  }
-
-  if (state_ == REMOVED) {
-    cv_.notify_all();
-  }
-}
-
-void
-RateLimiter::ModelInstanceContext::RequestRemoval()
-{
-  std::lock_guard<std::mutex> lk(state_mtx_);
-
-  if ((state_ == AVAILABLE) &&
-      (!model_context_->ContainsPendingRequests(index_))) {
-    state_ = REMOVED;
-  }
-}
-
-void
-RateLimiter::ModelInstanceContext::WaitForRemoval()
-{
-  if (!model_context_->isRemovalInProgress()) {
-    model_context_->RequestRemoval();
-  }
-
-  RequestRemoval();
-
-  // Wait for the instance to be removed
-  {
-    std::unique_lock<std::mutex> lk(state_mtx_);
-    cv_.wait(lk, [this] { return state_ == REMOVED; });
-  }
-}
-
-double
-RateLimiter::ModelInstanceContext::ScaledPriority()
-{
-  // TODO: Different schemes for the prioritization of
-  // model instance can be added here.
-  // The priority of instance is 1 by default. If specified
-  // as 0, the priority is still treated as 1.
-  auto priority = std::max(rate_limiter_config_.priority(), 1u);
-  return (exec_count_ * priority);
-}
-
-
-//=========================================================================
-//  ResourceManager Implementation
-//=========================================================================
-
-Status
-RateLimiter::ResourceManager::Create(
-    const ResourceMap& resource_map,
-    std::unique_ptr<ResourceManager>* resource_manager)
-{
-  std::unique_ptr<ResourceManager> local_resource_manager(
-      new ResourceManager(resource_map));
-  *resource_manager = std::move(local_resource_manager);
-  return Status::Success;
-}
-
-void
-RateLimiter::ResourceManager::AddModelInstance(
-    const ModelInstanceContext* instance)
-{
-  std::lock_guard<std::mutex> lk(model_resources_mtx_);
-  auto pr = model_resources_.emplace(std::make_pair(instance, ResourceMap()));
-  for (const auto& resource : instance->GetRateLimiterConfig()->resources()) {
-    if (resource.global()) {
-      (pr.first->second[GLOBAL_RESOURCE_KEY])[resource.name()] =
-          resource.count();
-    } else {
-      (pr.first->second[instance->RawInstance()->DeviceId()])[resource.name()] =
-          resource.count();
-    }
-  }
-}
-
-Status
-RateLimiter::ResourceManager::RemoveModelInstance(
-    const ModelInstanceContext* instance)
-{
-  std::lock_guard<std::mutex> lk(model_resources_mtx_);
-  const auto& itr = model_resources_.find(instance);
-  if (itr == model_resources_.end()) {
-    return Status(
-        Status::Code::INTERNAL, "Can not find the instance to remove");
-  }
-  model_resources_.erase(instance);
-  return Status::Success;
-}
-
-Status
-RateLimiter::ResourceManager::UpdateResourceLimits()
-{
-  std::lock_guard<std::mutex> lk1(max_resources_mtx_);
-  std::lock_guard<std::mutex> lk2(model_resources_mtx_);
-  max_resources_.clear();
-  // Obtain the maximum resource across all the instances
-  // and use it as the default available.
-  for (const auto& instance_resources : model_resources_) {
-    for (const auto& resource_device_map : instance_resources.second) {
-      auto ditr = max_resources_.find(resource_device_map.first);
-      if (ditr == max_resources_.end()) {
-        ditr =
-            max_resources_
-                .emplace(resource_device_map.first, resource_device_map.second)
-                .first;
-      } else {
-        for (const auto resource : resource_device_map.second) {
-          auto ritr = ditr->second.find(resource.first);
-          if (ritr == ditr->second.end()) {
-            ritr = ditr->second.emplace(resource.first, resource.second).first;
-          } else {
-            if (ritr->second < resource.second) {
-              ritr->second = resource.second;
-            }
-          }
-        }
-      }
-    }
-  }
-  if (!explicit_max_resources_.empty()) {
-    RETURN_IF_ERROR(ParseAndValidateExplicitResources());
-  }
-  RETURN_IF_ERROR(ValidateMaxResources());
-
-  if (LOG_VERBOSE_IS_ON(1)) {
-    std::string resource_map_str{"\nMax Resource Map===>\n"};
-    for (const auto& ditr : max_resources_) {
-      if (!ditr.second.empty()) {
-        std::string device_str{(ditr.first == GLOBAL_RESOURCE_KEY)
-                                   ? "GLOBAL"
-                                   : std::to_string(ditr.first)};
-        resource_map_str += "\tDevice: " + device_str + "\n";
-        for (const auto& ritr : ditr.second) {
-          resource_map_str += "\t\tResource: " + ritr.first +
-                              "\t Count: " + std::to_string(ritr.second) + "\n";
-        }
-      }
-    }
-    LOG_VERBOSE(1) << resource_map_str;
-  }
-
-  return Status::Success;
-}
-
-Status
-RateLimiter::ResourceManager::ValidateMaxResources()
-{
-  for (const auto& global_resource : max_resources_[GLOBAL_RESOURCE_KEY]) {
-    for (const auto& ditr : max_resources_) {
-      if (ditr.first != GLOBAL_RESOURCE_KEY) {
-        for (const auto& ritr : ditr.second) {
-          if (global_resource.first.compare(ritr.first) == 0) {
-            return Status(
-                Status::Code::INVALID_ARG,
-                (std::string("Resource \"") + ritr.first +
-                 "\" is present as both global and device-specific resource in "
-                 "the model configuration.")
-                    .c_str());
-          }
-        }
-      }
-    }
-  }
-  return Status::Success;
-}
-
-Status
-RateLimiter::ResourceManager::ParseAndValidateExplicitResources()
-{
-  for (auto& ditr : max_resources_) {
-    for (auto& ritr : ditr.second) {
-      // If not specified explicitly, consider the resource to be unavailable.
-      size_t resource_count = 0;
-      if (ditr.first == GLOBAL_RESOURCE_KEY) {
-        // Ignore the device specification... will search for all resources in
-        // the map...
-        for (const auto& exp_ditr : explicit_max_resources_) {
-          for (const auto& exp_ritr : exp_ditr.second) {
-            if (ritr.first.compare(exp_ritr.first) == 0) {
-              if (resource_count < exp_ritr.second) {
-                resource_count = exp_ritr.second;
-              }
-            }
-          }
-        }
-      } else {
-        // Search only for the device specific or per-device resources...
-        // device-specific
-        for (const auto& exp_ritr : explicit_max_resources_[ditr.first]) {
-          if (ritr.first.compare(exp_ritr.first) == 0) {
-            if (resource_count < exp_ritr.second) {
-              resource_count = exp_ritr.second;
-            }
-          }
-        }
-        // per-device
-        for (const auto& exp_ritr :
-             explicit_max_resources_[PER_DEVICE_RESOURCE_KEY]) {
-          if (ritr.first.compare(exp_ritr.first) == 0) {
-            if (resource_count < exp_ritr.second) {
-              resource_count = exp_ritr.second;
-            }
-          }
-        }
-      }
-      if (resource_count < ritr.second) {
-        return Status(
-            Status::Code::INVALID_ARG,
-            (std::string("Resource count for \"") + ritr.first +
-             "\" is limited to " + std::to_string(resource_count) +
-             " which will prevent scheduling of one or more model "
-             "instances, the minimum required count is " +
-             std::to_string(ritr.second))
-                .c_str());
-      } else {
-        ritr.second = resource_count;
-      }
-    }
-  }
-
-  return Status::Success;
-}
-
-bool
-RateLimiter::ResourceManager::AllocateResources(
-    const ModelInstanceContext* instance)
-{
-  std::lock_guard<std::mutex> lk1(model_resources_mtx_);
-  std::lock_guard<std::mutex> lk2(allocated_resources_mtx_);
-  const auto& itr = model_resources_.find(instance);
-  if (itr == model_resources_.end()) {
-    return false;
-  } else {
-    // First pass to verify if resources are available
-    {
-      std::lock_guard<std::mutex> lk3(max_resources_mtx_);
-      for (const auto& ditr : itr->second) {
-        auto allocated_ditr = allocated_resources_.find(ditr.first);
-        if (allocated_ditr == allocated_resources_.end()) {
-          allocated_ditr =
-              allocated_resources_
-                  .emplace(ditr.first, std::map<std::string, size_t>())
-                  .first;
-        }
-        for (const auto& ritr : ditr.second) {
-          auto allocated_ritr = allocated_ditr->second.find(ritr.first);
-          if (allocated_ritr == allocated_ditr->second.end()) {
-            allocated_ritr =
-                allocated_ditr->second.emplace(ritr.first, 0).first;
-          }
-          if ((allocated_ritr->second + ritr.second) >
-              (max_resources_[ditr.first])[ritr.first]) {
-            return false;
-          }
-        }
-      }
-    }
-
-    // Second pass to actually allocate the resources
-    for (const auto& ditr : itr->second) {
-      for (const auto& ritr : ditr.second) {
-        (allocated_resources_[ditr.first])[ritr.first] += ritr.second;
-      }
-    }
-  }
-
-  return true;
-}
-
-Status
-RateLimiter::ResourceManager::ReleaseResources(
-    const ModelInstanceContext* instance)
-{
-  std::lock_guard<std::mutex> lk1(model_resources_mtx_);
-  std::lock_guard<std::mutex> lk2(allocated_resources_mtx_);
-  const auto& itr = model_resources_.find(instance);
-  if (itr == model_resources_.end()) {
-    return Status(
-        Status::Code::INTERNAL,
-        "Unable find the instance resources to release");
-  } else {
-    for (const auto& ditr : itr->second) {
-      for (const auto& ritr : ditr.second) {
-        (allocated_resources_[ditr.first])[ritr.first] -= ritr.second;
-      }
-    }
-  }
-
-  return Status::Success;
-}
-
-RateLimiter::ResourceManager::ResourceManager(const ResourceMap& resource_map)
-    : explicit_max_resources_(resource_map)
-{
-}
-
-}}  // namespace triton::core
diff --git a/3rdparty/core-r22.12/src/rate_limiter.h b/3rdparty/core-r22.12/src/rate_limiter.h
deleted file mode 100644
index 3734e9bd1224e59b846cdd53d1f861a87c0d6f95..0000000000000000000000000000000000000000
--- a/3rdparty/core-r22.12/src/rate_limiter.h
+++ /dev/null
@@ -1,310 +0,0 @@
-// Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include <condition_variable>
-#include <functional>
-#include <mutex>
-#include <queue>
-#include <vector>
-
-#include "backend_model.h"
-#include "backend_model_instance.h"
-#include "instance_queue.h"
-#include "model_config.pb.h"
-#include "payload.h"
-#include "status.h"
-
-namespace triton { namespace core {
-
-// Limits the rate at which requests are dispatched to the model instances
-class RateLimiter {
- public:
-  using RateLimiterConfig = inference::ModelRateLimiter;
-  using ResourceMap = std::map<int, std::map<std::string, size_t>>;
-  enum RESOURCE_KIND_KEY {
-    // Key for holding global resources
-    GLOBAL_RESOURCE_KEY = -2,
-    // Key for holding resources per each device
-    PER_DEVICE_RESOURCE_KEY = -1
-  };
-
-  /// Creates a rate limiter object which will funnel the requests to
-  /// the model instances. A typical lifetime of the model instance within
-  /// RateLimiter transition from available -> staged -> allocated -> available.
-  /// The transition from available to staged occurs when a request is
-  /// registered for the model. Depending upon the resource availabilty and
-  /// priority, the RateLimiter will transition an instance to allocated state
-  /// at some point in the future. The staged state is skipped when
-  /// configured to ignore the resource constraints. The cycle in this case
-  /// will be available -> allocated -> available.
-  /// \param ignore_resources_and_priority Whether or not to ignore resource
-  /// constraints and cross-model priority. An available instance is directly
-  /// allocated when true.
-  /// \param resource_map The map to the available resource count provided
-  /// explicitly.
-  /// \return Status object indicating success or failure.
-  static Status Create(
-      const bool ignore_resources_and_priority, const ResourceMap& resource_map,
-      std::unique_ptr<RateLimiter>* rate_limiter);
-
-  /// Registers the model instance with the rate limiter.
-  /// \param instance The pointer to the TritonModelInstance object to register
-  /// with the rate limiter.
-  /// \param rate_limiter_config The rate limiter configuration associated with
-  /// the model instance.
-  /// \return Status object indicating success or failure.
-  Status RegisterModelInstance(
-      TritonModelInstance* instance,
-      const RateLimiterConfig& rate_limiter_config);
-
-  /// Remove model from the set of models being managed by the rate limiter.
-  /// \param model The pointer to TritonModel object to be removed.
-  /// \return Status object indicating success or failure.
-  Status UnregisterModel(const TritonModel* model);
-
-  /// Returns true if there is a payload slot available for the given model.
-  /// \param model The pointer to TritonModel object to be removed.
-  /// \return slot availability in boolean.
-  bool PayloadSlotAvailable(const TritonModel* model);
-
-  /// Enqueues the payload to rate limiter for scheduling on the given model.
-  /// \param model The pointer to TritonModel object to be removed.
-  /// \param payload The shared pointer to the payload object.
-  /// \return Status object indicating success or failure.
-  Status EnqueuePayload(
-      const TritonModel* model, std::shared_ptr<Payload> payload);
-
-  /// Returns the payload that has been scheduled for the given set of model
-  /// instances. Note that this call is blocking and depends upon the
-  /// availability of payloads in the rate limiter for the triton model
-  /// instance.
-  /// \param instance The pointers to TritonModelInstance objects whose
-  /// payload is being requested.
-  /// \param payload The shared pointer to the payload object.
-  void DequeuePayload(
-      std::deque<TritonModelInstance*>& instance,
-      std::shared_ptr<Payload>* payload);
-
-  /// Returns a new payload object.
-  /// \param op_type The operation type for the payload.
-  /// \param instance Optional field that providess the model instance that must
-  /// be used for the execution of the payload. Default is nullptr which allows
-  /// any model instance to execute the payload.
-  /// \return The shared pointer to a new payload object.
-  std::shared_ptr<Payload> GetPayload(
-      const Payload::Operation op_type,
-      TritonModelInstance* instance = nullptr);
-
-  /// Releases the given payload object back to the rate limiter.
-  /// \param payload The payload to release.
-  void PayloadRelease(std::shared_ptr<Payload>& payload);
-
- private:
-  class ModelInstanceContext;
-  class ModelContext;
-  struct PayloadQueue;
-  using StandardReleaseFunc = std::function<void(ModelInstanceContext*)>;
-  using StandardScheduleFunc = std::function<void(ModelInstanceContext*)>;
-  using StandardStageFunc = std::function<void(ModelInstanceContext*)>;
-
-  // Holds the state of the model instance.
-  class ModelInstanceContext {
-   public:
-    friend class RateLimiter;
-    friend class ResourceManager;
-    enum State { AVAILABLE, STAGED, ALLOCATED, REMOVED };
-
-    void Release();
-    TritonModelInstance* RawInstance() const { return triton_model_instance_; }
-
-   private:
-    ModelInstanceContext(
-        TritonModelInstance* triton_model_instance, ModelContext* model_context,
-        const RateLimiterConfig& rate_limiter_config, StandardStageFunc OnStage,
-        StandardReleaseFunc OnRelease);
-
-    const RateLimiterConfig* GetRateLimiterConfig() const
-    {
-      return &rate_limiter_config_;
-    }
-    void MarkAvailable();
-    double ScaledPriority();
-    Status Stage(StandardScheduleFunc OnSchedule);
-    Status Allocate();
-    Status DirectAllocate(StandardScheduleFunc OnSchedule);
-    void RequestRemoval();
-    void WaitForRemoval();
-
-    TritonModelInstance* triton_model_instance_;
-    size_t index_;
-    ModelContext* model_context_;
-    RateLimiterConfig rate_limiter_config_;
-    StandardStageFunc OnStage_;
-    StandardReleaseFunc OnRelease_;
-    std::atomic<uint64_t> exec_count_;
-
-    State state_;
-    bool removal_in_progress_;
-    std::mutex state_mtx_;
-
-    StandardScheduleFunc OnSchedule_;
-
-    std::condition_variable cv_;
-  };
-
-  class ScaledPriorityComparator {
-   public:
-    bool operator()(ModelInstanceContext* a, ModelInstanceContext* b)
-    {
-      return a->ScaledPriority() > b->ScaledPriority();
-    }
-  };
-
-  using PriorityQueue = std::priority_queue<
-      ModelInstanceContext*, std::vector<ModelInstanceContext*>,
-      ScaledPriorityComparator>;
-
-  // Holds the active context to a model
-  class ModelContext {
-   public:
-    ModelContext();
-
-    Status EnqueueModelInstanceRequest(
-        const StandardScheduleFunc& OnSchedule,
-        TritonModelInstance* triton_model_instance);
-    void AddAvailableInstance(ModelInstanceContext* instance);
-    void StageInstanceIfAvailable(TritonModelInstance* triton_model_instance);
-    void AllocateInstanceIfAvailable();
-    void AddSpecificRequestQueue();
-    bool ContainsPendingRequests(int32_t index);
-    void RequestRemoval();
-    bool isRemovalInProgress() { return removal_in_progress_; }
-
-   private:
-    bool removal_in_progress_;
-
-    // Queue holding pending scheduling request
-    std::queue<StandardScheduleFunc> generic_sched_request_queue_;
-    std::vector<std::queue<StandardScheduleFunc>>
-        specific_sched_request_queues_;
-    std::recursive_mutex sched_request_queue_mtx_;
-
-    // The set of instances that are available at the moment
-    PriorityQueue avbl_instances_;
-    std::recursive_mutex avbl_instances_mtx_;
-  };
-
-  // Manages and keep track of resource allocation to the model instances.
-  class ResourceManager {
-   public:
-    static Status Create(
-        const ResourceMap& resource_map,
-        std::unique_ptr<ResourceManager>* resource_manager);
-    void AddModelInstance(const ModelInstanceContext* instance);
-    Status RemoveModelInstance(const ModelInstanceContext* instance);
-    Status UpdateResourceLimits();
-    bool AllocateResources(const ModelInstanceContext* instance);
-    Status ReleaseResources(const ModelInstanceContext* instance);
-
-   private:
-    ResourceManager(const ResourceMap& resource_map);
-    Status ValidateMaxResources();
-    Status ParseAndValidateExplicitResources();
-
-    ResourceMap explicit_max_resources_;
-
-    std::map<const ModelInstanceContext*, ResourceMap> model_resources_;
-    std::mutex model_resources_mtx_;
-
-    ResourceMap max_resources_;
-    std::mutex max_resources_mtx_;
-
-    ResourceMap allocated_resources_;
-    std::mutex allocated_resources_mtx_;
-  };
-
-  RateLimiter(
-      const bool ignore_resources_and_priority,
-      const ResourceMap& resource_map);
-
-  void InitializePayloadQueues(const TritonModelInstance* instance);
-  Status DeferPayloadSchedule(
-      const StandardScheduleFunc& OnSchedule, const TritonModel* model,
-      TritonModelInstance* instance = nullptr);
-  void OnStage(ModelInstanceContext* instance_ptr);
-  void OnRelease(ModelInstanceContext* instance_ptr);
-  void AttemptAllocation();
-  void SchedulePayload(
-      TritonModelInstance* tmi, PayloadQueue* payload_queue,
-      const std::shared_ptr<Payload>& payload);
-
-  bool ignore_resources_and_priority_;
-
-  // Instance context for the models
-  std::map<
-      const TritonModel*, std::vector<std::shared_ptr<ModelInstanceContext>>>
-      model_instance_ctxs_;
-  std::mutex model_instance_ctx_mtx_;
-
-  // Running context of the models
-  std::map<const TritonModel*, ModelContext> model_contexts_;
-  std::mutex model_ctx_mtx_;
-
-  // Holds the model instances that have been staged
-  PriorityQueue staged_instances_;
-  std::recursive_mutex staged_instances_mtx_;
-
-  // Manager to keep track of the resource allocations
-  std::unique_ptr<ResourceManager> resource_manager_;
-
-  // Mutex to serialize Payload [de]allocation
-  std::mutex payload_mu_;
-
-  // Mutex to serialize Payload Queues deallocation
-  std::mutex payload_queues_mu_;
-
-  // Keep some number of Payload objects for reuse to avoid the overhead
-  // of creating a Payload for every new request.
-  const size_t max_payload_bucket_count_;
-  std::vector<std::shared_ptr<Payload>> payload_bucket_;
-  std::deque<std::shared_ptr<Payload>> payloads_in_use_;
-
-  struct PayloadQueue {
-    explicit PayloadQueue(size_t max_batch_size, uint64_t max_queue_delay_ns)
-    {
-      queue_.reset(new InstanceQueue(max_batch_size, max_queue_delay_ns));
-    }
-    std::unique_ptr<InstanceQueue> queue_;
-    std::map<const TritonModelInstance*, std::unique_ptr<InstanceQueue>>
-        specific_queues_;
-    std::mutex mu_;
-    std::condition_variable cv_;
-  };
-  std::map<const TritonModel*, std::unique_ptr<PayloadQueue>> payload_queues_;
-};
-
-}}  // namespace triton::core
diff --git a/3rdparty/core-r22.12/src/repo_agent.cc b/3rdparty/core-r22.12/src/repo_agent.cc
deleted file mode 100644
index c5c27e6aa3cf483b799a1332e5979dde545370bd..0000000000000000000000000000000000000000
--- a/3rdparty/core-r22.12/src/repo_agent.cc
+++ /dev/null
@@ -1,573 +0,0 @@
-// Copyright 2021-2022, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "repo_agent.h"
-
-#include <string>
-#include "filesystem.h"
-#include "shared_library.h"
-#include "triton/common/logging.h"
-#include "tritonserver_apis.h"
-
-// For unknown reason, windows will not export the TRITONREPOAGENT_*
-// functions declared with dllexport in tritonrepoagent.h. To get
-// those functions exported it is (also?) necessary to mark the
-// definitions in this file with dllexport as well.
-#if defined(_MSC_VER)
-#define TRITONAPI_DECLSPEC __declspec(dllexport)
-#elif defined(__GNUC__)
-#define TRITONAPI_DECLSPEC __attribute__((__visibility__("default")))
-#else
-#define TRITONAPI_DECLSPEC
-#endif
-
-namespace triton { namespace core {
-
-std::string
-TritonRepoAgentLibraryName(const std::string& agent_name)
-{
-#ifdef _WIN32
-  return std::string("tritonrepoagent_") + agent_name + ".dll";
-#else
-  return std::string("libtritonrepoagent_") + agent_name + ".so";
-#endif
-}
-
-std::string
-TRITONREPOAGENT_ActionTypeString(const TRITONREPOAGENT_ActionType type)
-{
-  switch (type) {
-    case TRITONREPOAGENT_ACTION_LOAD:
-      return "TRITONREPOAGENT_ACTION_LOAD";
-    case TRITONREPOAGENT_ACTION_LOAD_COMPLETE:
-      return "TRITONREPOAGENT_ACTION_LOAD_COMPLETE";
-    case TRITONREPOAGENT_ACTION_LOAD_FAIL:
-      return "TRITONREPOAGENT_ACTION_LOAD_FAIL";
-    case TRITONREPOAGENT_ACTION_UNLOAD:
-      return "TRITONREPOAGENT_ACTION_UNLOAD";
-    case TRITONREPOAGENT_ACTION_UNLOAD_COMPLETE:
-      return "TRITONREPOAGENT_ACTION_UNLOAD_COMPLETE";
-  }
-  return "Unknown TRITONREPOAGENT_ActionType";
-}
-
-std::string
-TRITONREPOAGENT_ArtifactTypeString(const TRITONREPOAGENT_ArtifactType type)
-{
-  switch (type) {
-    case TRITONREPOAGENT_ARTIFACT_FILESYSTEM:
-      return "TRITONREPOAGENT_ARTIFACT_FILESYSTEM";
-    case TRITONREPOAGENT_ARTIFACT_REMOTE_FILESYSTEM:
-      return "TRITONREPOAGENT_ARTIFACT_REMOTE_FILESYSTEM";
-  }
-  return "Unknown TRITONREPOAGENT_ArtifactType";
-}
-
-//
-// TritonRepoAgent
-//
-Status
-TritonRepoAgent::Create(
-    const std::string& name, const std::string& libpath,
-    std::shared_ptr<TritonRepoAgent>* agent)
-{
-  std::shared_ptr<TritonRepoAgent> lagent(new TritonRepoAgent(name));
-
-  {
-    std::unique_ptr<SharedLibrary> slib;
-    RETURN_IF_ERROR(SharedLibrary::Acquire(&slib));
-
-    RETURN_IF_ERROR(slib->OpenLibraryHandle(libpath, &lagent->dlhandle_));
-    RETURN_IF_ERROR(slib->GetEntrypoint(
-        lagent->dlhandle_, "TRITONREPOAGENT_Initialize", true /* optional */,
-        reinterpret_cast<void**>(&lagent->init_fn_)));
-    RETURN_IF_ERROR(slib->GetEntrypoint(
-        lagent->dlhandle_, "TRITONREPOAGENT_Finalize", true /* optional */,
-        reinterpret_cast<void**>(&lagent->fini_fn_)));
-    RETURN_IF_ERROR(slib->GetEntrypoint(
-        lagent->dlhandle_, "TRITONREPOAGENT_ModelInitialize",
-        true /* optional */,
-        reinterpret_cast<void**>(&lagent->model_init_fn_)));
-    RETURN_IF_ERROR(slib->GetEntrypoint(
-        lagent->dlhandle_, "TRITONREPOAGENT_ModelFinalize", true /* optional */,
-        reinterpret_cast<void**>(&lagent->model_fini_fn_)));
-    RETURN_IF_ERROR(slib->GetEntrypoint(
-        lagent->dlhandle_, "TRITONREPOAGENT_ModelAction", false /* optional */,
-        reinterpret_cast<void**>(&lagent->model_action_fn_)));
-  }
-
-  // Initialize if needed
-  if (lagent->init_fn_ != nullptr) {
-    RETURN_IF_TRITONSERVER_ERROR(lagent->init_fn_(
-        reinterpret_cast<TRITONREPOAGENT_Agent*>(lagent.get())));
-  }
-
-  *agent = std::move(lagent);
-  return Status::Success;
-}
-
-TritonRepoAgent::~TritonRepoAgent()
-{
-  // Finalize if needed
-  if (fini_fn_ != nullptr) {
-    auto err = fini_fn_(reinterpret_cast<TRITONREPOAGENT_Agent*>(this));
-    if (err != nullptr) {
-      LOG_ERROR << "~TritonRepoAgent: "
-                << Status(
-                       TritonCodeToStatusCode(TRITONSERVER_ErrorCode(err)),
-                       TRITONSERVER_ErrorMessage(err))
-                       .AsString();
-      TRITONSERVER_ErrorDelete(err);
-    };
-  }
-
-  {
-    std::unique_ptr<SharedLibrary> slib;
-    LOG_STATUS_ERROR(SharedLibrary::Acquire(&slib), "~TritonRepoAgent");
-    LOG_STATUS_ERROR(slib->CloseLibraryHandle(dlhandle_), "~TritonRepoAgent");
-  }
-}
-
-//
-// TritonRepoAgentModel
-//
-Status
-TritonRepoAgentModel::Create(
-    const TRITONREPOAGENT_ArtifactType type, const std::string& location,
-    const inference::ModelConfig& config,
-    const std::shared_ptr<TritonRepoAgent>& agent,
-    const TritonRepoAgent::Parameters& agent_parameters,
-    std::unique_ptr<TritonRepoAgentModel>* agent_model)
-{
-  std::unique_ptr<TritonRepoAgentModel> lagent_model(new TritonRepoAgentModel(
-      type, location, config, agent, agent_parameters));
-  if (agent->AgentModelInitFn() != nullptr) {
-    RETURN_IF_TRITONSERVER_ERROR(agent->AgentModelInitFn()(
-        reinterpret_cast<TRITONREPOAGENT_Agent*>(agent.get()),
-        reinterpret_cast<TRITONREPOAGENT_AgentModel*>(lagent_model.get())));
-  }
-  *agent_model = std::move(lagent_model);
-  return Status::Success;
-}
-
-TritonRepoAgentModel::~TritonRepoAgentModel()
-{
-  // Need to ensure the proper lifecycle is informed
-  if (action_type_set_) {
-    switch (current_action_type_) {
-      case TRITONREPOAGENT_ACTION_LOAD:
-        LOG_TRITONSERVER_ERROR(
-            agent_->AgentModelActionFn()(
-                reinterpret_cast<TRITONREPOAGENT_Agent*>(agent_.get()),
-                reinterpret_cast<TRITONREPOAGENT_AgentModel*>(this),
-                TRITONREPOAGENT_ACTION_LOAD_FAIL),
-            "Inform TRITONREPOAGENT_ACTION_LOAD_FAIL");
-        break;
-      case TRITONREPOAGENT_ACTION_LOAD_COMPLETE:
-        LOG_TRITONSERVER_ERROR(
-            agent_->AgentModelActionFn()(
-                reinterpret_cast<TRITONREPOAGENT_Agent*>(agent_.get()),
-                reinterpret_cast<TRITONREPOAGENT_AgentModel*>(this),
-                TRITONREPOAGENT_ACTION_UNLOAD),
-            "Inform TRITONREPOAGENT_ACTION_UNLOAD");
-        // Fallthough is not yet an language feature until C++17
-        LOG_TRITONSERVER_ERROR(
-            agent_->AgentModelActionFn()(
-                reinterpret_cast<TRITONREPOAGENT_Agent*>(agent_.get()),
-                reinterpret_cast<TRITONREPOAGENT_AgentModel*>(this),
-                TRITONREPOAGENT_ACTION_UNLOAD_COMPLETE),
-            "Inform TRITONREPOAGENT_ACTION_UNLOAD_COMPLETE");
-        break;
-      case TRITONREPOAGENT_ACTION_UNLOAD:
-        LOG_TRITONSERVER_ERROR(
-            agent_->AgentModelActionFn()(
-                reinterpret_cast<TRITONREPOAGENT_Agent*>(agent_.get()),
-                reinterpret_cast<TRITONREPOAGENT_AgentModel*>(this),
-                TRITONREPOAGENT_ACTION_UNLOAD_COMPLETE),
-            "Inform TRITONREPOAGENT_ACTION_UNLOAD_COMPLETE");
-        break;
-      case TRITONREPOAGENT_ACTION_LOAD_FAIL:
-      case TRITONREPOAGENT_ACTION_UNLOAD_COMPLETE:
-        break;
-    }
-  }
-  if (agent_->AgentModelFiniFn() != nullptr) {
-    LOG_TRITONSERVER_ERROR(
-        agent_->AgentModelFiniFn()(
-            reinterpret_cast<TRITONREPOAGENT_Agent*>(agent_.get()),
-            reinterpret_cast<TRITONREPOAGENT_AgentModel*>(this)),
-        "~TritonRepoAgentModel");
-  }
-  if (!acquired_location_.empty()) {
-    DeleteMutableLocation();
-  }
-}
-
-Status
-TritonRepoAgentModel::InvokeAgent(const TRITONREPOAGENT_ActionType action_type)
-{
-  if ((!action_type_set_) && (action_type != TRITONREPOAGENT_ACTION_LOAD)) {
-    return Status(
-        Status::Code::INTERNAL,
-        "Unexpected lifecycle start state " +
-            TRITONREPOAGENT_ActionTypeString(action_type));
-  }
-  switch (action_type) {
-    case TRITONREPOAGENT_ACTION_LOAD:
-      if (action_type_set_) {
-        return Status(
-            Status::Code::INTERNAL,
-            "Unexpected lifecycle state transition from " +
-                TRITONREPOAGENT_ActionTypeString(current_action_type_) +
-                " to " + TRITONREPOAGENT_ActionTypeString(action_type));
-      }
-      break;
-    case TRITONREPOAGENT_ACTION_LOAD_COMPLETE:
-    case TRITONREPOAGENT_ACTION_LOAD_FAIL:
-      if (current_action_type_ != TRITONREPOAGENT_ACTION_LOAD) {
-        return Status(
-            Status::Code::INTERNAL,
-            "Unexpected lifecycle state transition from " +
-                TRITONREPOAGENT_ActionTypeString(current_action_type_) +
-                " to " + TRITONREPOAGENT_ActionTypeString(action_type));
-      }
-      break;
-    case TRITONREPOAGENT_ACTION_UNLOAD:
-      if (current_action_type_ != TRITONREPOAGENT_ACTION_LOAD_COMPLETE) {
-        return Status(
-            Status::Code::INTERNAL,
-            "Unexpected lifecycle state transition from " +
-                TRITONREPOAGENT_ActionTypeString(current_action_type_) +
-                " to " + TRITONREPOAGENT_ActionTypeString(action_type));
-      }
-      break;
-    case TRITONREPOAGENT_ACTION_UNLOAD_COMPLETE:
-      if (current_action_type_ != TRITONREPOAGENT_ACTION_UNLOAD) {
-        return Status(
-            Status::Code::INTERNAL,
-            "Unexpected lifecycle state transition from " +
-                TRITONREPOAGENT_ActionTypeString(current_action_type_) +
-                " to " + TRITONREPOAGENT_ActionTypeString(action_type));
-      }
-      break;
-  }
-  current_action_type_ = action_type;
-  action_type_set_ = true;
-  RETURN_IF_TRITONSERVER_ERROR(agent_->AgentModelActionFn()(
-      reinterpret_cast<TRITONREPOAGENT_Agent*>(agent_.get()),
-      reinterpret_cast<TRITONREPOAGENT_AgentModel*>(this), action_type));
-  return Status::Success;
-}
-
-Status
-TritonRepoAgentModel::SetLocation(
-    const TRITONREPOAGENT_ArtifactType type, const std::string& location)
-{
-  if (current_action_type_ != TRITONREPOAGENT_ACTION_LOAD) {
-    return Status(
-        Status::Code::INVALID_ARG,
-        "location can only be updated during TRITONREPOAGENT_ACTION_LOAD, "
-        "current action type is " +
-            (action_type_set_
-                 ? TRITONREPOAGENT_ActionTypeString(current_action_type_)
-                 : "not set"));
-  }
-  type_ = type;
-  location_ = location;
-  return Status::Success;
-}
-
-Status
-TritonRepoAgentModel::Location(
-    TRITONREPOAGENT_ArtifactType* type, const char** location)
-{
-  if (location_.empty()) {
-    return Status(
-        Status::Code::INTERNAL, "Model repository location is not set");
-  }
-  *type = type_;
-  *location = location_.c_str();
-  return Status::Success;
-}
-
-Status
-TritonRepoAgentModel::AcquireMutableLocation(
-    const TRITONREPOAGENT_ArtifactType type, const char** location)
-{
-  if (type != TRITONREPOAGENT_ARTIFACT_FILESYSTEM) {
-    return Status(
-        Status::Code::INVALID_ARG,
-        "Unexpected artifact type, expects "
-        "'TRITONREPOAGENT_ARTIFACT_FILESYSTEM'");
-  }
-  if (acquired_location_.empty()) {
-    std::string lacquired_location;
-    RETURN_IF_ERROR(
-        MakeTemporaryDirectory(FileSystemType::LOCAL, &lacquired_location));
-    acquired_location_.swap(lacquired_location);
-    acquired_type_ = type;
-  }
-  *location = acquired_location_.c_str();
-  return Status::Success;
-}
-
-Status
-TritonRepoAgentModel::DeleteMutableLocation()
-{
-  if (acquired_location_.empty()) {
-    return Status(
-        Status::Code::UNAVAILABLE, "No mutable location to be deleted");
-  }
-
-  auto status = DeletePath(acquired_location_);
-  if (!status.IsOk()) {
-    LOG_ERROR << "Failed to delete previously acquired location '"
-              << acquired_location_ << "': " << status.AsString();
-  }
-  acquired_location_.clear();
-  return Status::Success;
-}
-
-//
-// TritonRepoAgentManager
-//
-TritonRepoAgentManager&
-TritonRepoAgentManager::Singleton()
-{
-  static TritonRepoAgentManager triton_repo_agent_manager;
-  return triton_repo_agent_manager;
-}
-
-Status
-TritonRepoAgentManager::SetGlobalSearchPath(const std::string& path)
-{
-  auto& singleton_manager = Singleton();
-  std::lock_guard<std::mutex> lock(singleton_manager.mu_);
-  singleton_manager.global_search_path_ = path;
-  return Status::Success;
-}
-
-Status
-TritonRepoAgentManager::CreateAgent(
-    const std::string& agent_name, std::shared_ptr<TritonRepoAgent>* agent)
-{
-  auto& singleton_manager = Singleton();
-  std::lock_guard<std::mutex> lock(singleton_manager.mu_);
-
-  // Get the path to the agent shared library. Search path is global
-  // agent directory.  FIXME expose global path as Triton option
-  const std::vector<std::string> search_paths = {
-      JoinPath({singleton_manager.global_search_path_, agent_name})};
-
-  std::string agent_libname = TritonRepoAgentLibraryName(agent_name);
-  std::string libpath;
-  for (const auto& path : search_paths) {
-    const auto full_path = JoinPath({path, agent_libname});
-    bool exists = false;
-    RETURN_IF_ERROR(FileExists(full_path, &exists));
-    if (exists) {
-      libpath = full_path;
-      break;
-    }
-  }
-
-  if (libpath.empty()) {
-    return Status(
-        Status::Code::INVALID_ARG,
-        "unable to find '" + agent_libname + "' for repo agent '" + agent_name +
-            "', searched: " + singleton_manager.global_search_path_);
-  }
-
-  const auto& itr = singleton_manager.agent_map_.find(libpath);
-  if (itr != singleton_manager.agent_map_.end()) {
-    // Found in map. If the weak_ptr is still valid that means that
-    // there are other models using the agent and we just reuse that
-    // same agent. If the weak_ptr is not valid then agent has been
-    // unloaded so we need to remove the weak_ptr from the map and
-    // create the agent again.
-    *agent = itr->second.lock();
-    if (*agent != nullptr) {
-      return Status::Success;
-    }
-
-    singleton_manager.agent_map_.erase(itr);
-  }
-  RETURN_IF_ERROR(TritonRepoAgent::Create(agent_name, libpath, agent));
-  singleton_manager.agent_map_.insert({libpath, *agent});
-
-  return Status::Success;
-}
-
-Status
-TritonRepoAgentManager::AgentState(
-    std::unique_ptr<std::unordered_map<std::string, std::string>>* agent_state)
-{
-  auto& singleton_manager = Singleton();
-  std::lock_guard<std::mutex> lock(singleton_manager.mu_);
-
-  std::unique_ptr<std::unordered_map<std::string, std::string>> agent_state_map(
-      new std::unordered_map<std::string, std::string>);
-  for (const auto& agent_pair : singleton_manager.agent_map_) {
-    auto& libpath = agent_pair.first;
-    auto agent = agent_pair.second.lock();
-
-    if (agent != nullptr) {
-      agent_state_map->insert({agent->Name(), libpath});
-    }
-  }
-
-  *agent_state = std::move(agent_state_map);
-
-  return Status::Success;
-}
-
-extern "C" {
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONREPOAGENT_ApiVersion(uint32_t* major, uint32_t* minor)
-{
-  *major = TRITONREPOAGENT_API_VERSION_MAJOR;
-  *minor = TRITONREPOAGENT_API_VERSION_MINOR;
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONREPOAGENT_ModelRepositoryLocation(
-    TRITONREPOAGENT_Agent* agent, TRITONREPOAGENT_AgentModel* model,
-    TRITONREPOAGENT_ArtifactType* artifact_type, const char** location)
-{
-  TritonRepoAgentModel* tam = reinterpret_cast<TritonRepoAgentModel*>(model);
-  RETURN_TRITONSERVER_ERROR_IF_ERROR(tam->Location(artifact_type, location));
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONREPOAGENT_ModelRepositoryLocationAcquire(
-    TRITONREPOAGENT_Agent* agent, TRITONREPOAGENT_AgentModel* model,
-    const TRITONREPOAGENT_ArtifactType artifact_type, const char** location)
-{
-  TritonRepoAgentModel* tam = reinterpret_cast<TritonRepoAgentModel*>(model);
-  RETURN_TRITONSERVER_ERROR_IF_ERROR(
-      tam->AcquireMutableLocation(artifact_type, location));
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONREPOAGENT_ModelRepositoryLocationRelease(
-    TRITONREPOAGENT_Agent* agent, TRITONREPOAGENT_AgentModel* model,
-    const char* location)
-{
-  TritonRepoAgentModel* tam = reinterpret_cast<TritonRepoAgentModel*>(model);
-  RETURN_TRITONSERVER_ERROR_IF_ERROR(tam->DeleteMutableLocation());
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONREPOAGENT_ModelRepositoryUpdate(
-    TRITONREPOAGENT_Agent* agent, TRITONREPOAGENT_AgentModel* model,
-    const TRITONREPOAGENT_ArtifactType artifact_type, const char* location)
-{
-  TritonRepoAgentModel* tam = reinterpret_cast<TritonRepoAgentModel*>(model);
-  RETURN_TRITONSERVER_ERROR_IF_ERROR(tam->SetLocation(artifact_type, location));
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONREPOAGENT_ModelParameterCount(
-    TRITONREPOAGENT_Agent* agent, TRITONREPOAGENT_AgentModel* model,
-    uint32_t* count)
-{
-  TritonRepoAgentModel* tam = reinterpret_cast<TritonRepoAgentModel*>(model);
-  *count = tam->AgentParameters().size();
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONREPOAGENT_ModelParameter(
-    TRITONREPOAGENT_Agent* agent, TRITONREPOAGENT_AgentModel* model,
-    const uint32_t index, const char** parameter_name,
-    const char** parameter_value)
-{
-  TritonRepoAgentModel* tam = reinterpret_cast<TritonRepoAgentModel*>(model);
-  const auto& params = tam->AgentParameters();
-  if (index >= params.size()) {
-    return TRITONSERVER_ErrorNew(
-        TRITONSERVER_ERROR_INVALID_ARG,
-        "index out of range for model parameters");
-  }
-  *parameter_name = params[index].first.c_str();
-  *parameter_value = params[index].second.c_str();
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONREPOAGENT_ModelConfig(
-    TRITONREPOAGENT_Agent* agent, TRITONREPOAGENT_AgentModel* model,
-    const uint32_t config_version, TRITONSERVER_Message** model_config)
-{
-  TritonRepoAgentModel* tam = reinterpret_cast<TritonRepoAgentModel*>(model);
-  std::string model_config_json;
-  RETURN_TRITONSERVER_ERROR_IF_ERROR(
-      ModelConfigToJson(tam->Config(), config_version, &model_config_json));
-  return TRITONSERVER_MessageNewFromSerializedJson(
-      model_config, model_config_json.c_str(), model_config_json.length());
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONREPOAGENT_ModelState(TRITONREPOAGENT_AgentModel* model, void** state)
-{
-  TritonRepoAgentModel* tam = reinterpret_cast<TritonRepoAgentModel*>(model);
-  *state = tam->State();
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONREPOAGENT_ModelSetState(TRITONREPOAGENT_AgentModel* model, void* state)
-{
-  TritonRepoAgentModel* tam = reinterpret_cast<TritonRepoAgentModel*>(model);
-  tam->SetState(state);
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONREPOAGENT_State(TRITONREPOAGENT_Agent* agent, void** state)
-{
-  TritonRepoAgent* ta = reinterpret_cast<TritonRepoAgent*>(agent);
-  *state = ta->State();
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONREPOAGENT_SetState(TRITONREPOAGENT_Agent* agent, void* state)
-{
-  TritonRepoAgent* ta = reinterpret_cast<TritonRepoAgent*>(agent);
-  ta->SetState(state);
-  return nullptr;  // success
-}
-
-}  // extern C
-
-}}  // namespace triton::core
diff --git a/3rdparty/core-r22.12/src/repo_agent.h b/3rdparty/core-r22.12/src/repo_agent.h
deleted file mode 100644
index 001b6f7406cb083a4e481c7e03c0823bf98a8721..0000000000000000000000000000000000000000
--- a/3rdparty/core-r22.12/src/repo_agent.h
+++ /dev/null
@@ -1,182 +0,0 @@
-// Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include "tritonserver_apis.h"
-
-#include <memory>
-#include <mutex>
-#include <unordered_map>
-#include <vector>
-#include "constants.h"
-#include "model_config_utils.h"
-
-namespace triton { namespace core {
-
-std::string TritonRepoAgentLibraryName(const std::string& agent_name);
-
-std::string TRITONREPOAGENT_ActionTypeString(
-    const TRITONREPOAGENT_ActionType type);
-
-std::string TRITONREPOAGENT_ArtifactTypeString(
-    const TRITONREPOAGENT_ArtifactType type);
-
-class TritonRepoAgent {
- public:
-  using Parameters = std::vector<std::pair<std::string, std::string>>;
-  typedef TRITONSERVER_Error* (*TritonRepoAgentInitFn_t)(
-      TRITONREPOAGENT_Agent* agent);
-  typedef TRITONSERVER_Error* (*TritonRepoAgentFiniFn_t)(
-      TRITONREPOAGENT_Agent* agent);
-  typedef TRITONSERVER_Error* (*TritonRepoAgentModelInitFn_t)(
-      TRITONREPOAGENT_Agent* agent, TRITONREPOAGENT_AgentModel* model);
-  typedef TRITONSERVER_Error* (*TritonRepoAgentModelFiniFn_t)(
-      TRITONREPOAGENT_Agent* agent, TRITONREPOAGENT_AgentModel* model);
-  typedef TRITONSERVER_Error* (*TritonRepoAgentModelActionFn_t)(
-      TRITONREPOAGENT_Agent* agent, TRITONREPOAGENT_AgentModel* model,
-      const TRITONREPOAGENT_ActionType action_type);
-
-  static Status Create(
-      const std::string& name, const std::string& libpath,
-      std::shared_ptr<TritonRepoAgent>* agent);
-  ~TritonRepoAgent();
-
-  const std::string& Name() { return name_; }
-  void* State() { return state_; }
-  void SetState(void* state) { state_ = state; }
-
-  TritonRepoAgentModelActionFn_t AgentModelActionFn() const
-  {
-    return model_action_fn_;
-  }
-
-  TritonRepoAgentModelInitFn_t AgentModelInitFn() const
-  {
-    return model_init_fn_;
-  }
-
-  TritonRepoAgentModelFiniFn_t AgentModelFiniFn() const
-  {
-    return model_fini_fn_;
-  }
-
- protected:
-  DISALLOW_COPY_AND_ASSIGN(TritonRepoAgent);
-
-  TritonRepoAgent(const std::string& name)
-      : name_(name), state_(nullptr), dlhandle_(nullptr), init_fn_(nullptr),
-        fini_fn_(nullptr), model_init_fn_(nullptr), model_fini_fn_(nullptr),
-        model_action_fn_(nullptr)
-  {
-  }
-  const std::string name_;
-  void* state_;
-
-  // dlopen / dlsym handles
-  void* dlhandle_;
-  TritonRepoAgentInitFn_t init_fn_;
-  TritonRepoAgentFiniFn_t fini_fn_;
-  TritonRepoAgentModelInitFn_t model_init_fn_;
-  TritonRepoAgentModelFiniFn_t model_fini_fn_;
-  TritonRepoAgentModelActionFn_t model_action_fn_;
-};
-
-class TritonRepoAgentModel {
- public:
-  static Status Create(
-      const TRITONREPOAGENT_ArtifactType type, const std::string& location,
-      const inference::ModelConfig& config,
-      const std::shared_ptr<TritonRepoAgent>& agent,
-      const TritonRepoAgent::Parameters& agent_parameters,
-      std::unique_ptr<TritonRepoAgentModel>* agent_model);
-  ~TritonRepoAgentModel();
-
-  void* State() { return state_; }
-  void SetState(void* state) { state_ = state; }
-
-  Status InvokeAgent(const TRITONREPOAGENT_ActionType action_type);
-  const TritonRepoAgent::Parameters& AgentParameters()
-  {
-    return agent_parameters_;
-  }
-
-  Status SetLocation(
-      const TRITONREPOAGENT_ArtifactType type, const std::string& location);
-  Status Location(TRITONREPOAGENT_ArtifactType* type, const char** location);
-  Status AcquireMutableLocation(
-      const TRITONREPOAGENT_ArtifactType type, const char** location);
-  Status DeleteMutableLocation();
-  const inference::ModelConfig Config() { return config_; }
-
- private:
-  DISALLOW_COPY_AND_ASSIGN(TritonRepoAgentModel);
-
-  TritonRepoAgentModel(
-      const TRITONREPOAGENT_ArtifactType type, const std::string& location,
-      const inference::ModelConfig& config,
-      const std::shared_ptr<TritonRepoAgent>& agent,
-      const TritonRepoAgent::Parameters& agent_parameters)
-      : state_(nullptr), config_(config), agent_(agent),
-        agent_parameters_(agent_parameters), type_(type), location_(location),
-        action_type_set_(false),
-        current_action_type_(TRITONREPOAGENT_ACTION_UNLOAD_COMPLETE)
-  {
-  }
-
-  void* state_;
-  const inference::ModelConfig config_;
-  const std::shared_ptr<TritonRepoAgent> agent_;
-  const TritonRepoAgent::Parameters agent_parameters_;
-  TRITONREPOAGENT_ArtifactType type_;
-  std::string location_;
-  TRITONREPOAGENT_ArtifactType acquired_type_;
-  std::string acquired_location_;
-  bool action_type_set_;
-  TRITONREPOAGENT_ActionType current_action_type_;
-};
-
-class TritonRepoAgentManager {
- public:
-  static Status SetGlobalSearchPath(const std::string& path);
-  static Status CreateAgent(
-      const std::string& agent_name, std::shared_ptr<TritonRepoAgent>* agent);
-
-  static Status AgentState(
-      std::unique_ptr<std::unordered_map<std::string, std::string>>*
-          agent_state);
-
- private:
-  DISALLOW_COPY_AND_ASSIGN(TritonRepoAgentManager);
-
-  TritonRepoAgentManager()
-      : global_search_path_("/opt/tritonserver/repoagents"){};
-  static TritonRepoAgentManager& Singleton();
-  std::mutex mu_;
-  std::string global_search_path_;
-  std::unordered_map<std::string, std::weak_ptr<TritonRepoAgent>> agent_map_;
-};
-
-}}  // namespace triton::core
diff --git a/3rdparty/core-r22.12/src/response_allocator.h b/3rdparty/core-r22.12/src/response_allocator.h
deleted file mode 100644
index 143cc7ff877cc141c1a4110ebf03fd1f2a82227e..0000000000000000000000000000000000000000
--- a/3rdparty/core-r22.12/src/response_allocator.h
+++ /dev/null
@@ -1,77 +0,0 @@
-// Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include "tritonserver_apis.h"
-
-namespace triton { namespace core {
-
-//
-// Implementation for TRITONSERVER_ResponseAllocator.
-//
-class ResponseAllocator {
- public:
-  explicit ResponseAllocator(
-      TRITONSERVER_ResponseAllocatorAllocFn_t alloc_fn,
-      TRITONSERVER_ResponseAllocatorReleaseFn_t release_fn,
-      TRITONSERVER_ResponseAllocatorStartFn_t start_fn)
-      : alloc_fn_(alloc_fn), buffer_attributes_fn_(nullptr), query_fn_(nullptr),
-        release_fn_(release_fn), start_fn_(start_fn)
-  {
-  }
-
-  void SetQueryFunction(TRITONSERVER_ResponseAllocatorQueryFn_t query_fn)
-  {
-    query_fn_ = query_fn;
-  }
-
-  void SetBufferAttributesFunction(
-      TRITONSERVER_ResponseAllocatorBufferAttributesFn_t buffer_attributes_fn)
-  {
-    buffer_attributes_fn_ = buffer_attributes_fn;
-  }
-
-  TRITONSERVER_ResponseAllocatorAllocFn_t AllocFn() const { return alloc_fn_; }
-  TRITONSERVER_ResponseAllocatorBufferAttributesFn_t BufferAttributesFn() const
-  {
-    return buffer_attributes_fn_;
-  }
-  TRITONSERVER_ResponseAllocatorQueryFn_t QueryFn() const { return query_fn_; }
-  TRITONSERVER_ResponseAllocatorReleaseFn_t ReleaseFn() const
-  {
-    return release_fn_;
-  }
-  TRITONSERVER_ResponseAllocatorStartFn_t StartFn() const { return start_fn_; }
-
- private:
-  TRITONSERVER_ResponseAllocatorAllocFn_t alloc_fn_;
-  TRITONSERVER_ResponseAllocatorBufferAttributesFn_t buffer_attributes_fn_;
-  TRITONSERVER_ResponseAllocatorQueryFn_t query_fn_;
-  TRITONSERVER_ResponseAllocatorReleaseFn_t release_fn_;
-  TRITONSERVER_ResponseAllocatorStartFn_t start_fn_;
-};
-
-}}  // namespace triton::core
diff --git a/3rdparty/core-r22.12/src/response_cache.cc b/3rdparty/core-r22.12/src/response_cache.cc
deleted file mode 100644
index ff5f0707accec7aaf2302503f968889b9883dba4..0000000000000000000000000000000000000000
--- a/3rdparty/core-r22.12/src/response_cache.cc
+++ /dev/null
@@ -1,542 +0,0 @@
-// Copyright 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "response_cache.h"
-#include "infer_stats.h"
-#include "triton/common/logging.h"
-
-namespace {
-
-enum class ScopedTimerType { INSERTION, LOOKUP };
-
-class ScopedTimer {
- public:
-  explicit ScopedTimer(
-      triton::core::InferenceRequest& request, uint64_t& duration,
-      ScopedTimerType type)
-      : request_(request), duration_(duration), type_(type)
-  {
-    switch (type_) {
-      case ScopedTimerType::LOOKUP:
-        request_.CaptureCacheLookupStartNs();
-        break;
-      case ScopedTimerType::INSERTION:
-        request_.CaptureCacheInsertionStartNs();
-        break;
-    }
-  }
-
-  ~ScopedTimer()
-  {
-    switch (type_) {
-      case ScopedTimerType::LOOKUP:
-        request_.CaptureCacheLookupEndNs();
-        duration_ +=
-            request_.CacheLookupEndNs() - request_.CacheLookupStartNs();
-        break;
-      case ScopedTimerType::INSERTION:
-        request_.CaptureCacheInsertionEndNs();
-        duration_ +=
-            request_.CacheInsertionEndNs() - request_.CacheInsertionStartNs();
-        break;
-    }
-  }
-
- private:
-  triton::core::InferenceRequest& request_;
-  uint64_t& duration_;
-  ScopedTimerType type_;
-};
-
-std::string
-PointerToString(void* ptr)
-{
-  std::stringstream ss;
-  ss << ptr;
-  return ss.str();
-}
-
-}  // namespace
-
-namespace triton { namespace core {
-
-Status
-RequestResponseCache::Create(
-    uint64_t cache_size, std::unique_ptr<RequestResponseCache>* cache)
-{
-  try {
-    cache->reset(new RequestResponseCache(cache_size));
-  }
-  catch (const std::exception& ex) {
-    return Status(
-        Status::Code::INTERNAL,
-        "Failed to initialize Response Cache: " + std::string(ex.what()));
-  }
-
-  return Status::Success;
-}
-
-RequestResponseCache::RequestResponseCache(const uint64_t size)
-{
-  // Allocate buffer
-  buffer_ = malloc(size);
-  // Exit early if buffer allocation failed
-  if (buffer_ == nullptr) {
-    throw std::runtime_error("failed to allocate buffer");
-  }
-
-  // Create cache as managed buffer
-  managed_buffer_ = boost::interprocess::managed_external_buffer(
-      boost::interprocess::create_only_t{}, buffer_, size);
-
-  LOG_INFO << "Response Cache is created at '" << PointerToString(buffer_)
-           << "' with size " << size;
-}
-
-RequestResponseCache::~RequestResponseCache()
-{
-  // Deallocate each chunk from managed buffer
-  for (auto& iter : cache_) {
-    auto& entry = iter.second;
-    for (auto& output : entry.outputs_) {
-      if (output.buffer_ != nullptr) {
-        managed_buffer_.deallocate(output.buffer_);
-      }
-    }
-  }
-
-  // Validate we freed all underlying memory managed by cache
-  if (!managed_buffer_.all_memory_deallocated()) {
-    // Destructors can't throw exceptions
-    LOG_ERROR << "failed to free managed cache memory";
-  }
-
-  // Free total cache buffer
-  if (buffer_ != nullptr) {
-    free(buffer_);
-  }
-}
-
-Status
-RequestResponseCache::Lookup(
-    InferenceResponse* const response, InferenceRequest* const request)
-{
-  // Lock on cache lookup
-  std::lock_guard<std::recursive_mutex> lk(cache_mtx_);
-
-  if (request == nullptr) {
-    return Status(
-        Status::Code::INTERNAL, "Cache Lookup passed a nullptr request");
-  }
-
-  // Capture start latency now and end latency when timer goes out of scope
-  ScopedTimer timer(
-      *request, total_lookup_latency_ns_, ScopedTimerType::LOOKUP);
-
-  // Hash the request and set cache key if it hasn't already been set
-  if (!request->CacheKeyIsSet()) {
-    RETURN_IF_ERROR(HashAndSet(request));
-  }
-  const uint64_t key = request->CacheKey();
-
-  num_lookups_++;
-  LOG_VERBOSE(1) << request->LogRequest()
-                 << "Looking up key [" + std::to_string(key) + "] in cache.";
-
-  // Search cache for request hash key
-  auto iter = cache_.find(key);
-  if (iter == cache_.end()) {
-    num_misses_++;
-    LOG_VERBOSE(1) << request->LogRequest()
-                   << "MISS for key [" + std::to_string(key) + "] in cache.";
-    return Status(
-        Status::Code::INTERNAL,
-        request->LogRequest() + "key not found in cache");
-  }
-
-  // If find succeeds, it's a cache hit
-  num_hits_++;
-  LOG_VERBOSE(1) << request->LogRequest()
-                 << "HIT for key [" + std::to_string(key) + "] in cache.";
-
-  // Populate passed-in "response" from cache entry
-  auto entry = iter->second;
-  // Build InferenceResponse from CacheEntry
-  RETURN_IF_ERROR(BuildInferenceResponse(entry, response));
-
-  // Update this key to front of LRU list
-  UpdateLRU(iter);
-  LOG_VERBOSE(1) << request->LogRequest()
-                 << "Using cached response for key [" + std::to_string(key) +
-                        "].";
-  return Status::Success;
-}
-
-Status
-RequestResponseCache::Insert(
-    const InferenceResponse& response, InferenceRequest* const request)
-{
-  // Lock on cache insertion
-  std::lock_guard<std::recursive_mutex> lk(cache_mtx_);
-
-  if (request == nullptr) {
-    return Status(
-        Status::Code::INTERNAL, "Cache Insert passed a nullptr request");
-  }
-
-  // Capture start latency now and end latency when timer goes out of scope
-  ScopedTimer timer(
-      *request, total_insertion_latency_ns_, ScopedTimerType::INSERTION);
-
-  // Hash the request and set cache key if it hasn't already been set
-  if (!request->CacheKeyIsSet()) {
-    RETURN_IF_ERROR(HashAndSet(request));
-  }
-  const uint64_t key = request->CacheKey();
-
-  // Exit early if key already exists in cache
-  auto iter = cache_.find(key);
-  if (iter != cache_.end()) {
-    return Status(
-        Status::Code::ALREADY_EXISTS, request->LogRequest() + "key [" +
-                                          std::to_string(key) +
-                                          "] already exists in cache");
-  }
-
-  // Construct cache entry from response
-  auto entry = CacheEntry();
-  RETURN_IF_ERROR(BuildCacheEntry(response, &entry));
-
-  // Insert entry into cache
-  LOG_VERBOSE(1) << request->LogRequest()
-                 << "Inserting key [" + std::to_string(key) + "] into cache.";
-  auto cache_pair = cache_.insert({key, entry});
-  // Exit early if cache insertion failed
-  if (!cache_pair.second) {
-    LOG_ERROR << request->LogRequest() << "Failed to insert key into map.";
-    return Status(
-        Status::Code::INTERNAL,
-        request->LogRequest() + "Cache insertion failed");
-  }
-  // Update LRU with new cache entry
-  auto cache_iter = cache_pair.first;
-  UpdateLRU(cache_iter);
-
-  return Status::Success;
-}
-
-// LRU
-Status
-RequestResponseCache::Evict()
-{
-  // Lock on cache eviction
-  std::lock_guard<std::recursive_mutex> lk(cache_mtx_);
-
-  // Nothing to evict if cache is empty
-  if (NumEntries() == 0) {
-    return Status(Status::Code::INTERNAL, "Cache is empty, nothing to evict.");
-  }
-
-  // Least recently used key in back of LRU list
-  uint64_t lru_key = lru_.back();
-  LOG_VERBOSE(1) << "Evicting key [" + std::to_string(lru_key) +
-                        "] from cache.";
-
-  // Find cache entry for least recently used key
-  auto iter = cache_.find(lru_key);
-  // Error check if key isn't in cache, but this shouldn't happen in evict
-  // and probably indicates a bug
-  if (iter == cache_.end()) {
-    return Status(
-        Status::Code::INTERNAL,
-        "key [" + std::to_string(lru_key) +
-            "] not found in cache during eviction: this indicates a bug in the "
-            "code");
-  }
-  // Get size of cache entry being evicted to update available size
-  auto entry = iter->second;
-  // Free managed memory used in cache entry's outputs
-  for (auto& output : entry.outputs_) {
-    // Lock on buffer deallocation
-    std::lock_guard<std::recursive_mutex> lk(buffer_mtx_);
-    managed_buffer_.deallocate(output.buffer_);
-  }
-
-  // Remove LRU entry from cache
-  cache_.erase(lru_key);
-  // Remove LRU key from LRU list
-  lru_.pop_back();
-  // Increment number of evictions
-  num_evictions_++;
-
-  return Status::Success;
-}
-
-// Helpers
-void
-RequestResponseCache::UpdateLRU(
-    std::unordered_map<uint64_t, CacheEntry>::iterator& cache_iter)
-{
-  // Lock on cache update
-  std::lock_guard<std::recursive_mutex> lk(cache_mtx_);
-
-  const auto& key = cache_iter->first;
-  auto& cache_entry = cache_iter->second;
-  // Remove key from LRU list if it was already in there
-  auto lru_iter = std::find(lru_.begin(), lru_.end(), key);
-  if (lru_iter != lru_.end()) {
-    lru_.erase(lru_iter);
-  }
-  // Add key to front of LRU list since it's most recently used
-  lru_.push_front(key);
-  // Set CacheEntry LRU iterator to new LRU key location
-  cache_entry.lru_iter_ = lru_.begin();
-}
-
-Status
-RequestResponseCache::BuildCacheEntry(
-    const InferenceResponse& response, CacheEntry* const entry)
-{
-  // Build cache entry data from response outputs
-  for (const auto& response_output : response.Outputs()) {
-    auto cache_output = Output();
-
-    // Fetch output buffer details
-    const void* response_buffer = nullptr;
-    size_t response_byte_size = 0;
-    TRITONSERVER_MemoryType response_memory_type;
-    int64_t response_memory_type_id;
-    void* userp;
-    RETURN_IF_ERROR(response_output.DataBuffer(
-        &response_buffer, &response_byte_size, &response_memory_type,
-        &response_memory_type_id, &userp));
-
-    // TODO: Handle other memory types
-    if (response_memory_type != TRITONSERVER_MEMORY_CPU &&
-        response_memory_type != TRITONSERVER_MEMORY_CPU_PINNED) {
-      return Status(
-          Status::Code::INTERNAL,
-          "Only input buffers in CPU memory are allowed in cache currently");
-    }
-
-    // Exit early if response buffer from output is invalid
-    if (response_buffer == nullptr) {
-      return Status(
-          Status::Code::INTERNAL, "Response buffer from output was nullptr");
-    }
-
-    // Lock on managed buffer references
-    {
-      std::lock_guard<std::recursive_mutex> lk(buffer_mtx_);
-
-      // Exit early if cache entry will be larger than available cache size
-      if (response_byte_size > managed_buffer_.get_size()) {
-        return Status(
-            Status::Code::INTERNAL,
-            "Cache entry is larger than total cache size");
-      }
-
-      // If cache doesn't have enough space, evict until enough space available
-      // NOTE: FreeBytes() doesn't account for allocator overhead so allocation
-      //       may fail even if response_byte_size is less than FreeBytes()
-      while (response_byte_size > FreeBytes()) {
-        LOG_VERBOSE(1) << "EVICT: Response larger than remaining available "
-                          "memory, attempting to evict from cache.";
-        RETURN_IF_ERROR(Evict());
-      }
-
-      // Attempt to allocate buffer until success or eviction from cache fails
-      while (cache_output.buffer_ == nullptr) {
-        // Allocate buffer for response output in cache entry
-        cache_output.buffer_ =
-            managed_buffer_.allocate(response_byte_size, std::nothrow_t{});
-        // Attempt to evict if allocation fails
-        if (cache_output.buffer_ == nullptr) {
-          LOG_VERBOSE(1) << "FAILED to allocate buffer in cache. Attempting to "
-                            "evict an entry.";
-          // Exit out if Eviction fails
-          RETURN_IF_ERROR(Evict());
-        }
-      }
-
-      // Copy data from response buffer to cache entry output buffer
-      // TODO: Handle other memory types
-      std::memcpy(cache_output.buffer_, response_buffer, response_byte_size);
-
-      // Set output metadata
-      cache_output.name_ = response_output.Name();
-      cache_output.dtype_ = response_output.DType();
-      cache_output.shape_ = response_output.Shape();
-      cache_output.buffer_size_ = static_cast<uint64_t>(response_byte_size);
-    }
-
-    // Add each output to cache entry
-    entry->outputs_.push_back(cache_output);
-  }
-
-  return Status::Success;
-}
-
-
-Status
-RequestResponseCache::BuildInferenceResponse(
-    const CacheEntry& entry, InferenceResponse* const response)
-{
-  if (response == nullptr) {
-    return Status(Status::Code::INTERNAL, "invalid response ptr passed in");
-  }
-
-  // Lock on cache references
-  {
-    std::lock_guard<std::recursive_mutex> lk(cache_mtx_);
-
-    // Inference response outputs should be empty so we can append to them
-    if (response->Outputs().size() != 0) {
-      return Status(
-          Status::Code::INTERNAL,
-          "InferenceResponse already contains some outputs");
-    }
-
-    for (auto& cache_output : entry.outputs_) {
-      InferenceResponse::Output* response_output = nullptr;
-      RETURN_IF_ERROR(response->AddOutput(
-          cache_output.name_, cache_output.dtype_, cache_output.shape_,
-          &response_output));
-
-      if (response_output == nullptr) {
-        return Status(
-            Status::Code::INTERNAL,
-            "InferenceResponse::Output pointer as nullptr");
-      }
-
-      TRITONSERVER_MemoryType memory_type = TRITONSERVER_MEMORY_CPU;
-      int64_t memory_type_id = 0;
-
-      // Allocate buffer for inference response
-      void* buffer;
-      RETURN_IF_ERROR(response_output->AllocateDataBuffer(
-          &buffer, cache_output.buffer_size_, &memory_type, &memory_type_id));
-
-      // TODO: Handle other memory types
-      if (memory_type != TRITONSERVER_MEMORY_CPU &&
-          memory_type != TRITONSERVER_MEMORY_CPU_PINNED) {
-        return Status(
-            Status::Code::INTERNAL,
-            "Only input buffers in CPU memory are allowed in cache currently");
-      }
-
-      if (buffer == nullptr) {
-        return Status(
-            Status::Code::INTERNAL, "failed to allocate buffer for output '" +
-                                        cache_output.name_ + "'");
-      }
-      // Copy cached output buffer to allocated response output buffer
-      std::memcpy(buffer, cache_output.buffer_, cache_output.buffer_size_);
-
-      // TODO: Add field to InferenceResponse to indicate this was from cache
-      // response.cached = true;
-    }
-  }
-
-  return Status::Success;
-}
-
-Status
-RequestResponseCache::HashInputBuffers(
-    const InferenceRequest::Input* input, size_t* seed)
-{
-  // Iterate over each data buffer in input in case of non-contiguous memory
-  for (size_t idx = 0; idx < input->DataBufferCount(); ++idx) {
-    const void* src_buffer;
-    size_t src_byte_size;
-    TRITONSERVER_MemoryType src_memory_type;
-    int64_t src_memory_type_id;
-
-    RETURN_IF_ERROR(input->DataBuffer(
-        idx, &src_buffer, &src_byte_size, &src_memory_type,
-        &src_memory_type_id));
-
-    // TODO: Handle other memory types
-    if (src_memory_type != TRITONSERVER_MEMORY_CPU &&
-        src_memory_type != TRITONSERVER_MEMORY_CPU_PINNED) {
-      return Status(
-          Status::Code::INTERNAL,
-          "Only input buffers in CPU memory are allowed in cache currently");
-    }
-
-    // Add each byte of input buffer chunk to hash
-    const unsigned char* tmp = static_cast<const unsigned char*>(src_buffer);
-    for (uint64_t byte = 0; byte < src_byte_size; byte++) {
-      boost::hash_combine(*seed, tmp[byte]);
-    }
-  }
-
-  return Status::Success;
-}
-
-
-Status
-RequestResponseCache::HashInputs(const InferenceRequest& request, size_t* seed)
-{
-  const auto& inputs = request.ImmutableInputs();
-  // Convert inputs to ordered map for consistency in hashing
-  // inputs sorted by key (input) name
-  std::map<std::string, InferenceRequest::Input*> ordered_inputs(
-      inputs.begin(), inputs.end());
-  for (const auto& input : ordered_inputs) {
-    // Add input name to hash
-    boost::hash_combine(*seed, input.second->Name());
-    // Fetch input buffer for hashing raw data
-    RETURN_IF_ERROR(HashInputBuffers(input.second, seed));
-  }
-
-  return Status::Success;
-}
-
-
-Status
-RequestResponseCache::Hash(const InferenceRequest& request, uint64_t* key)
-{
-  std::size_t seed = 0;
-  // Add request model name to hash
-  boost::hash_combine(seed, request.ModelName());
-  // Add request model version to hash
-  boost::hash_combine(seed, request.ActualModelVersion());
-  RETURN_IF_ERROR(HashInputs(request, &seed));
-  *key = static_cast<uint64_t>(seed);
-  return Status::Success;
-}
-
-Status
-RequestResponseCache::HashAndSet(InferenceRequest* const request)
-{
-  uint64_t key = 0;
-  RETURN_IF_ERROR(Hash(*request, &key));
-  request->SetCacheKey(key);
-  return Status::Success;
-}
-
-}}  // namespace triton::core
diff --git a/3rdparty/core-r22.12/src/response_cache.h b/3rdparty/core-r22.12/src/response_cache.h
deleted file mode 100644
index 6c39655e67d392c2b1541fe15de3e983b1a70c85..0000000000000000000000000000000000000000
--- a/3rdparty/core-r22.12/src/response_cache.h
+++ /dev/null
@@ -1,198 +0,0 @@
-// Copyright 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#pragma once
-
-#include <list>
-#include <string>
-#include <unordered_map>
-
-#include "infer_request.h"
-#include "infer_response.h"
-#include "model.h"
-#include "status.h"
-
-#include <boost/functional/hash.hpp>
-#include <boost/interprocess/managed_external_buffer.hpp>
-
-namespace triton { namespace core {
-
-// Assuming CPU memory only for now
-struct Output {
-  // Output tensor data buffer
-  void* buffer_;
-  // Size of "buffer" above
-  uint64_t buffer_size_ = 0;
-  // Name of the output
-  std::string name_;
-  // Datatype of the output
-  inference::DataType dtype_;
-  // Shape of the output
-  std::vector<int64_t> shape_;
-};
-
-struct CacheEntry {
-  explicit CacheEntry() {}
-  // Point to key in LRU list for maintaining LRU order
-  std::list<uint64_t>::iterator lru_iter_;
-  // each output buffer = managed_buffer.allocate(size, ...)
-  std::vector<Output> outputs_;
-};
-
-class RequestResponseCache {
- public:
-  ~RequestResponseCache();
-  // Create the request/response cache object
-  static Status Create(
-      uint64_t cache_size, std::unique_ptr<RequestResponseCache>* cache);
-  // Hash inference request for cache access and store it in "request" object.
-  // This will also be called internally in Lookup/Insert if the request hasn't
-  // already stored it's hash. It is up to the user to update the hash in the
-  // request if modifying any hashed fields of the request object after storing.
-  // Return Status object indicating success or failure.
-  Status HashAndSet(InferenceRequest* const request);
-
-  // Lookup 'request' hash in cache and return the inference response in
-  // 'response' on cache hit or nullptr on cache miss
-  // Return Status object indicating success or failure.
-  Status Lookup(
-      InferenceResponse* const response, InferenceRequest* const request);
-  // Insert response into cache, evict entries to make space if necessary
-  // Return Status object indicating success or failure.
-  Status Insert(
-      const InferenceResponse& response, InferenceRequest* const request);
-  // Evict entry from cache based on policy
-  // Return Status object indicating success or failure.
-  Status Evict();
-  // Returns number of items in cache
-  size_t NumEntries()
-  {
-    std::lock_guard<std::recursive_mutex> lk(cache_mtx_);
-    return cache_.size();
-  }
-  // Returns number of items evicted in cache lifespan
-  size_t NumEvictions()
-  {
-    std::lock_guard<std::recursive_mutex> lk(cache_mtx_);
-    return num_evictions_;
-  }
-  // Returns number of lookups in cache lifespan, should sum to hits + misses
-  size_t NumLookups()
-  {
-    std::lock_guard<std::recursive_mutex> lk(cache_mtx_);
-    return num_lookups_;
-  }
-  // Returns number of cache hits in cache lifespan
-  size_t NumHits()
-  {
-    std::lock_guard<std::recursive_mutex> lk(cache_mtx_);
-    return num_hits_;
-  }
-  // Returns number of cache hits in cache lifespan
-  size_t NumMisses()
-  {
-    std::lock_guard<std::recursive_mutex> lk(cache_mtx_);
-    return num_misses_;
-  }
-  // Returns the total lookup latency (nanoseconds) of all lookups in cache
-  // lifespan
-  uint64_t TotalLookupLatencyNs()
-  {
-    std::lock_guard<std::recursive_mutex> lk(cache_mtx_);
-    return total_lookup_latency_ns_;
-  }
-
-  uint64_t TotalInsertionLatencyNs()
-  {
-    std::lock_guard<std::recursive_mutex> lk(cache_mtx_);
-    return total_insertion_latency_ns_;
-  }
-
-  // Returns total number of bytes allocated for cache
-  size_t TotalBytes()
-  {
-    std::lock_guard<std::recursive_mutex> lk(buffer_mtx_);
-    return managed_buffer_.get_size();
-  }
-  // Returns number of free bytes in cache
-  size_t FreeBytes()
-  {
-    std::lock_guard<std::recursive_mutex> lk(buffer_mtx_);
-    return managed_buffer_.get_free_memory();
-  }
-  // Returns number of bytes in use by cache
-  size_t AllocatedBytes()
-  {
-    std::lock_guard<std::recursive_mutex> lk(buffer_mtx_);
-    return managed_buffer_.get_size() - managed_buffer_.get_free_memory();
-  }
-  // Returns fraction of bytes allocated over total cache size between [0, 1]
-  double TotalUtilization()
-  {
-    std::lock_guard<std::recursive_mutex> lk(buffer_mtx_);
-    return static_cast<double>(AllocatedBytes()) /
-           static_cast<double>(TotalBytes());
-  }
-
- private:
-  explicit RequestResponseCache(const uint64_t cache_size);
-  // Update LRU ordering on lookup
-  void UpdateLRU(std::unordered_map<uint64_t, CacheEntry>::iterator&);
-  // Build CacheEntry from InferenceResponse
-  Status BuildCacheEntry(
-      const InferenceResponse& response, CacheEntry* const entry);
-  // Build InferenceResponse from CacheEntry
-  Status BuildInferenceResponse(
-      const CacheEntry& entry, InferenceResponse* const response);
-  // Helper function to hash data buffers used by "input"
-  Status HashInputBuffers(const InferenceRequest::Input* input, size_t* seed);
-  // Helper function to hash each input in "request"
-  Status HashInputs(const InferenceRequest& request, size_t* seed);
-  // Helper function to hash request and store it in "key"
-  Status Hash(const InferenceRequest& request, uint64_t* key);
-
-  // Cache buffer
-  void* buffer_;
-  // Managed buffer
-  boost::interprocess::managed_external_buffer managed_buffer_;
-  // key -> CacheEntry containing values and list iterator for LRU management
-  std::unordered_map<uint64_t, CacheEntry> cache_;
-  // List of keys sorted from most to least recently used
-  std::list<uint64_t> lru_;
-  // Cache metrics
-  size_t num_evictions_ = 0;
-  size_t num_lookups_ = 0;
-  size_t num_hits_ = 0;
-  size_t num_misses_ = 0;
-  uint64_t total_lookup_latency_ns_ = 0;
-  uint64_t total_insertion_latency_ns_ = 0;
-  // Mutex for buffer synchronization
-  std::recursive_mutex buffer_mtx_;
-  // Mutex for cache synchronization
-  std::recursive_mutex cache_mtx_;
-};
-
-}}  // namespace triton::core
diff --git a/3rdparty/core-r22.12/src/scheduler.h b/3rdparty/core-r22.12/src/scheduler.h
deleted file mode 100644
index 7cc9142c8a7fc4a1cf86c352984493df7e1b3923..0000000000000000000000000000000000000000
--- a/3rdparty/core-r22.12/src/scheduler.h
+++ /dev/null
@@ -1,80 +0,0 @@
-// Copyright (c) 2018-2020, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include <functional>
-#include "infer_request.h"
-#include "status.h"
-
-namespace triton { namespace core {
-
-// Scheduler interface.
-class Scheduler {
- public:
-  virtual ~Scheduler() {}
-
-  // The prototype for the initialization function that will be called
-  // by the "standard" schedulers created based on a model's
-  // scheduling_choice settings. The init function is called once by
-  // the runner that will later execute requests for 'runner_idx'. A
-  // non-OK error status indicates an initialization error that
-  // prevents scheduler from using the runner.
-  using StandardInitFunc = std::function<Status(uint32_t runner_idx)>;
-
-  // The prototype for the warmup function that will be called by the
-  // "standard" schedulers created based on a model's
-  // scheduling_choice settings. The warmup function is called once by
-  // the runner that will later execute requests for 'runner_idx'. A
-  // non-OK error status indicates an error that prevents scheduler
-  // from sending warmup requests to the runner.
-  using StandardWarmupFunc = std::function<Status(uint32_t runner_idx)>;
-
-  // The prototype for the run function that will be called by the
-  // "standard" schedulers created based on a model's
-  // scheduling_choice settings. The run function must accept a
-  // 'runner_idx' indicating which runner should execute the
-  // 'requests'. Ownership of the 'requests' is transferred to the
-  // runner which is responsible for generating responses and
-  // releasing the requests.
-  using StandardRunFunc = std::function<void(
-      uint32_t runner_idx,
-      std::vector<std::unique_ptr<InferenceRequest>>&& requests)>;
-
-  // Enqueue a request with the scheduler. If Status::Success is returned
-  // then the backend has taken ownership of the request object and so
-  // 'request' will be nullptr. If non-success is returned then the
-  // caller still retains ownership of 'request'.
-  virtual Status Enqueue(std::unique_ptr<InferenceRequest>& request) = 0;
-
-  // Return the number of in-flight inferences tracked by the scheduler.
-  virtual size_t InflightInferenceCount() = 0;
-
-  // Instruct the scheduler to stop processing future requests unless they are
-  // considered as in-flight.
-  virtual void Stop() = 0;
-};
-
-}}  // namespace triton::core
diff --git a/3rdparty/core-r22.12/src/scheduler_utils.cc b/3rdparty/core-r22.12/src/scheduler_utils.cc
deleted file mode 100644
index f3a7e243744988a3e01db7ccffd822e19d214983..0000000000000000000000000000000000000000
--- a/3rdparty/core-r22.12/src/scheduler_utils.cc
+++ /dev/null
@@ -1,423 +0,0 @@
-// Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "scheduler_utils.h"
-
-#include <cassert>
-#include "constants.h"
-#include "triton/common/logging.h"
-
-namespace triton { namespace core {
-
-Status
-RequiredEqualInputs::Initialize(
-    const std::unique_ptr<InferenceRequest>& request,
-    const std::unordered_map<std::string, bool>& enforce_equal_shape_tensors,
-    const bool has_optional_input)
-{
-  has_optional_input_ = has_optional_input;
-  required_inputs_.clear();
-
-  for (const auto& pr : request->ImmutableInputs()) {
-    const InferenceRequest::Input* input = pr.second;
-    const auto itr = enforce_equal_shape_tensors.find(input->Name());
-    if (itr != enforce_equal_shape_tensors.end()) {
-      required_inputs_.emplace(
-          std::piecewise_construct, std::forward_as_tuple(input->Name()),
-          std::forward_as_tuple(input, itr->second));
-    }
-    // When the model has optional inputs, overload 'required_inputs_'
-    // to track the inputs involved in the batch
-    else if (has_optional_input) {
-      required_inputs_.emplace(
-          std::piecewise_construct, std::forward_as_tuple(input->Name()),
-          std::forward_as_tuple(nullptr, false));
-    }
-  }
-
-  init_ = true;
-  return Status::Success;
-}
-
-bool
-RequiredEqualInputs::HasEqualInputs(
-    const std::unique_ptr<InferenceRequest>& request)
-{
-  // If current request has different number of inputs, then dynamic batching
-  // shouldn't be applied.
-  if (has_optional_input_ &&
-      (request->ImmutableInputs().size() != required_inputs_.size())) {
-    return false;
-  }
-  for (const auto& pr : request->ImmutableInputs()) {
-    const InferenceRequest::Input* input = pr.second;
-    const auto itr = required_inputs_.find(input->Name());
-    if (itr != required_inputs_.end()) {
-      if (itr->second.first != nullptr) {
-        // Make sure shape of input tensors is equal.
-        if (!triton::common::CompareDims(
-                itr->second.first->Shape(), input->Shape())) {
-          return false;
-        }
-
-        // If necessary compare the contents as well...
-        if (itr->second.second) {
-          const auto& d1 = itr->second.first->Data();
-          const auto& d2 = input->Data();
-
-          // For now being conservative and assuming that content
-          // comparison is for shape tensors which are likely to always
-          // be in a single buffer.
-          if ((d1->BufferCount() != 1) || (d2->BufferCount() != 1)) {
-            return false;
-          }
-
-          size_t d1_byte_size, d2_byte_size;
-          TRITONSERVER_MemoryType d1_memory_type, d2_memory_type;
-          int64_t d1_memory_id, d2_memory_id;
-          const char* d1_buffer = d1->BufferAt(
-              0 /* idx */, &d1_byte_size, &d1_memory_type, &d1_memory_id);
-          const char* d2_buffer = d2->BufferAt(
-              0 /* idx */, &d2_byte_size, &d2_memory_type, &d2_memory_id);
-
-          // Tensor must be same size and in in CPU memory so that it
-          // can be easily compared. If not return false conservatively.
-          if ((d1_byte_size != d2_byte_size) || (d1_buffer == nullptr) ||
-              (d2_buffer == nullptr) ||
-              (d1_memory_type == TRITONSERVER_MEMORY_GPU) ||
-              (d2_memory_type == TRITONSERVER_MEMORY_GPU)) {
-            return false;
-          }
-
-          if (strncmp(d1_buffer, d2_buffer, d1_byte_size) != 0) {
-            return false;
-          }
-        }
-      }
-    } else if (has_optional_input_) {
-      // If the model has optional inputs, the current request must contains all
-      // inputs that in the first request (tracked in 'required_inputs_').
-      return false;
-    }
-  }
-
-  return true;
-}
-
-Status
-PriorityQueue::PolicyQueue::Enqueue(std::unique_ptr<InferenceRequest>& request)
-{
-  if ((max_queue_size_ != 0) && (Size() >= max_queue_size_)) {
-    return Status(
-        Status::Code::UNAVAILABLE,
-        request->LogRequest() + "Exceeds maximum queue size");
-  }
-
-  queue_.emplace_back(std::move(request));
-  auto timeout_us = default_timeout_us_;
-  if (allow_timeout_override_) {
-    auto override_timeout_us = queue_.back()->TimeoutMicroseconds();
-    if (override_timeout_us != 0 && override_timeout_us < timeout_us) {
-      timeout_us = override_timeout_us;
-    }
-  }
-  if (timeout_us != 0) {
-    timeout_timestamp_ns_.emplace_back(
-        std::chrono::duration_cast<std::chrono::nanoseconds>(
-            std::chrono::steady_clock::now().time_since_epoch())
-            .count() +
-        timeout_us * 1000);
-  } else {
-    timeout_timestamp_ns_.emplace_back(0);
-  }
-
-  return Status::Success;
-}
-
-Status
-PriorityQueue::PolicyQueue::Dequeue(std::unique_ptr<InferenceRequest>* request)
-{
-  if (!queue_.empty()) {
-    *request = std::move(queue_.front());
-    queue_.pop_front();
-    timeout_timestamp_ns_.pop_front();
-  } else {
-    *request = std::move(delayed_queue_.front());
-    delayed_queue_.pop_front();
-  }
-
-  return Status::Success;
-}
-
-bool
-PriorityQueue::PolicyQueue::ApplyPolicy(
-    size_t idx, size_t* rejected_count, size_t* rejected_batch_size)
-{
-  uint64_t now_nanoseconds =
-      std::chrono::duration_cast<std::chrono::nanoseconds>(
-          std::chrono::steady_clock::now().time_since_epoch())
-          .count();
-  if (idx < queue_.size()) {
-    size_t curr_idx = idx;
-    while (curr_idx < queue_.size()) {
-      if ((timeout_timestamp_ns_[curr_idx] != 0) &&
-          (now_nanoseconds > timeout_timestamp_ns_[curr_idx])) {
-        if (timeout_action_ == inference::ModelQueuePolicy::DELAY) {
-          delayed_queue_.emplace_back(std::move(queue_[curr_idx]));
-        } else {
-          rejected_queue_.emplace_back(std::move(queue_[curr_idx]));
-          *rejected_count += 1;
-          *rejected_batch_size +=
-              std::max(1U, rejected_queue_.back()->BatchSize());
-        }
-        curr_idx++;
-      } else {
-        break;
-      }
-    }
-
-    // Use range erasure on deque as all erasure functions are linear,
-    // this implies in the edge case where this function is always called on
-    // 'bad' index can be O(n^2). However, for data structures that are O(1)
-    // erasure, the traversal may not be as efficient due to cache miss
-    // (elements not stored contiguously).
-    queue_.erase(queue_.begin() + idx, queue_.begin() + curr_idx);
-    timeout_timestamp_ns_.erase(
-        timeout_timestamp_ns_.begin() + idx,
-        timeout_timestamp_ns_.begin() + curr_idx);
-
-    // Current idx is pointing to an item with unexpired timeout
-    if (idx < queue_.size()) {
-      return true;
-    }
-  }
-  // At this point, idx is pointing to an item with expired timeout.
-  // If the item is in delayed queue, then return true. Otherwise, false
-  // meaning the queue has no item with this 'idx'.
-  return ((idx - queue_.size()) < delayed_queue_.size());
-}
-
-void
-PriorityQueue::PolicyQueue::ReleaseRejectedQueue(
-    std::deque<std::unique_ptr<InferenceRequest>>* requests)
-{
-  rejected_queue_.swap(*requests);
-}
-
-const std::unique_ptr<InferenceRequest>&
-PriorityQueue::PolicyQueue::At(size_t idx) const
-{
-  if (idx < queue_.size()) {
-    return queue_[idx];
-  } else {
-    return delayed_queue_[idx - queue_.size()];
-  }
-}
-
-uint64_t
-PriorityQueue::PolicyQueue::TimeoutAt(size_t idx)
-{
-  if (idx < queue_.size()) {
-    return timeout_timestamp_ns_[idx];
-  } else {
-    return 0;
-  }
-}
-
-PriorityQueue::PriorityQueue()
-    : size_(0), front_priority_level_(0), last_priority_level_(0)
-{
-  inference::ModelQueuePolicy default_policy;
-  queues_.emplace(0, PolicyQueue(default_policy));
-  front_priority_level_ = queues_.begin()->first;
-  ResetCursor();
-}
-
-PriorityQueue::PriorityQueue(
-    const inference::ModelQueuePolicy& default_queue_policy,
-    uint32_t priority_levels, const ModelQueuePolicyMap queue_policy_map)
-    : size_(0), last_priority_level_(priority_levels)
-{
-  if (priority_levels == 0) {
-    queues_.emplace(0, PolicyQueue(default_queue_policy));
-  } else {
-    for (uint32_t level = 1; level <= priority_levels; level++) {
-      auto it = queue_policy_map.find(level);
-      if (it == queue_policy_map.end()) {
-        queues_.emplace(level, PolicyQueue(default_queue_policy));
-      } else {
-        queues_.emplace(level, PolicyQueue(it->second));
-      }
-    }
-  }
-  front_priority_level_ = queues_.begin()->first;
-  ResetCursor();
-}
-
-Status
-PriorityQueue::Enqueue(
-    uint32_t priority_level, std::unique_ptr<InferenceRequest>& request)
-{
-  auto status = queues_[priority_level].Enqueue(request);
-  if (status.IsOk()) {
-    size_++;
-    front_priority_level_ = std::min(front_priority_level_, priority_level);
-    // Invalidate the pending batch cursor if the enqueued item is placed
-    // within the pending batch. At the same priority level the request is
-    // guaranteed to be after pending batch if the batch hasn't reached
-    // delayed queue.
-    if ((priority_level < pending_cursor_.curr_it_->first) ||
-        ((priority_level == pending_cursor_.curr_it_->first) &&
-         (pending_cursor_.at_delayed_queue_))) {
-      pending_cursor_.valid_ = false;
-    }
-  }
-
-  return status;
-}
-
-Status
-PriorityQueue::Dequeue(std::unique_ptr<InferenceRequest>* request)
-{
-  pending_cursor_.valid_ = false;
-  while (true) {
-    if (!queues_[front_priority_level_].Empty()) {
-      RETURN_IF_ERROR(queues_[front_priority_level_].Dequeue(request));
-      size_--;
-      return Status::Success;
-    } else if (front_priority_level_ != last_priority_level_) {
-      front_priority_level_++;
-      continue;
-    }
-
-    // Control reach here if the queue for last priority level is also
-    // empty, then return error below.
-    break;
-  }
-
-  return Status(
-      Status::Code::UNAVAILABLE,
-      (*request)->LogRequest() + "dequeue on empty queue");
-}
-
-void
-PriorityQueue::ReleaseRejectedRequests(
-    std::shared_ptr<std::vector<std::deque<std::unique_ptr<InferenceRequest>>>>*
-        requests)
-{
-  auto res = std::make_shared<
-      std::vector<std::deque<std::unique_ptr<InferenceRequest>>>>(
-      queues_.size());
-  size_t idx = 0;
-  for (auto& queue : queues_) {
-    queue.second.ReleaseRejectedQueue(&((*res)[idx]));
-    idx++;
-  }
-
-  requests->swap(res);
-}
-
-bool
-PriorityQueue::IsCursorValid()
-{
-  if (pending_cursor_.valid_) {
-    return (uint64_t)std::chrono::duration_cast<std::chrono::nanoseconds>(
-               std::chrono::steady_clock::now().time_since_epoch())
-               .count() < pending_cursor_.pending_batch_closest_timeout_ns_;
-  }
-  return false;
-}
-
-PriorityQueue::Cursor::Cursor(PriorityQueues::iterator start_it)
-    : curr_it_(start_it), queue_idx_(0), at_delayed_queue_(false),
-      pending_batch_closest_timeout_ns_(0),
-      pending_batch_oldest_enqueue_time_ns_(0), pending_batch_count_(0),
-      valid_(true)
-{
-}
-
-size_t
-PriorityQueue::ApplyPolicyAtCursor()
-{
-  size_t rejected_batch_size = 0;
-  size_t rejected_count = 0;
-  while (pending_cursor_.curr_it_ != queues_.end()) {
-    if (!(pending_cursor_.curr_it_->second.ApplyPolicy(
-            pending_cursor_.queue_idx_, &rejected_count,
-            &rejected_batch_size))) {
-      if (size_ > pending_cursor_.pending_batch_count_ + rejected_count) {
-        pending_cursor_.curr_it_++;
-        pending_cursor_.queue_idx_ = 0;
-        continue;
-      }
-    }
-    // Control reach here if the cursor points to a request that is candidate
-    // for pending batch, or if all requests are in pending batch.
-    break;
-  }
-  size_ -= rejected_count;
-  return rejected_batch_size;
-}
-
-void
-PriorityQueue::AdvanceCursor()
-{
-  if (pending_cursor_.pending_batch_count_ >= size_) {
-    return;
-  }
-
-  const auto& timeout_ns =
-      pending_cursor_.curr_it_->second.TimeoutAt(pending_cursor_.queue_idx_);
-  if (timeout_ns != 0) {
-    if (pending_cursor_.pending_batch_closest_timeout_ns_ != 0) {
-      pending_cursor_.pending_batch_closest_timeout_ns_ = std::min(
-          pending_cursor_.pending_batch_closest_timeout_ns_, timeout_ns);
-    } else {
-      pending_cursor_.pending_batch_closest_timeout_ns_ = timeout_ns;
-    }
-  }
-
-  uint64_t curr_enqueue_time_ns =
-      pending_cursor_.curr_it_->second.At(pending_cursor_.queue_idx_)
-          ->BatcherStartNs();
-  if (pending_cursor_.pending_batch_oldest_enqueue_time_ns_ != 0) {
-    pending_cursor_.pending_batch_oldest_enqueue_time_ns_ = std::min(
-        pending_cursor_.pending_batch_oldest_enqueue_time_ns_,
-        curr_enqueue_time_ns);
-  } else {
-    pending_cursor_.pending_batch_oldest_enqueue_time_ns_ =
-        curr_enqueue_time_ns;
-  }
-  ++pending_cursor_.queue_idx_;
-  ++pending_cursor_.pending_batch_count_;
-  // pending batch includes delayed request if (queue_idx_ - 1) points to
-  // delayed queue.
-  pending_cursor_.at_delayed_queue_ =
-      (pending_cursor_.queue_idx_ >
-       pending_cursor_.curr_it_->second.UnexpiredSize());
-}
-
-}}  // namespace triton::core
diff --git a/3rdparty/core-r22.12/src/scheduler_utils.h b/3rdparty/core-r22.12/src/scheduler_utils.h
deleted file mode 100644
index 1790f369c24281588a30955807abdad351ca2092..0000000000000000000000000000000000000000
--- a/3rdparty/core-r22.12/src/scheduler_utils.h
+++ /dev/null
@@ -1,256 +0,0 @@
-// Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include <deque>
-#include <unordered_map>
-#include "scheduler.h"
-
-namespace triton { namespace core {
-
-struct RequiredEqualInputs {
- public:
-  RequiredEqualInputs() : init_(false), has_optional_input_(false) {}
-  Status Initialize(
-      const std::unique_ptr<InferenceRequest>& request,
-      const std::unordered_map<std::string, bool>& enforce_equal_shape_tensors,
-      const bool has_optional_input);
-  bool HasEqualInputs(const std::unique_ptr<InferenceRequest>& request);
-  bool Initialized() { return init_; };
-
- private:
-  bool init_;
-  bool has_optional_input_;
-  // A collection of inputs in the request, an nullptr for
-  // InferenceRequest::Input indicates that the inputs doesn't require
-  // equality check
-  std::unordered_map<
-      std::string,
-      std::pair<const InferenceRequest::Input*, bool /* compare contents */>>
-      required_inputs_;
-};
-
-//
-// PriorityQueue
-//
-using ModelQueuePolicyMap = ::google::protobuf::Map<
-    ::google::protobuf::uint32, inference::ModelQueuePolicy>;
-
-class PriorityQueue {
- public:
-  // Construct a queue with no priority level with default queue policy,
-  // which will behave the same as regular queue.
-  PriorityQueue();
-
-  // Construct a queue with 'priority_levels', the priority starts from 1.
-  // Different priority level may follow different queue policies given by
-  // 'queue_policy_map', otherwise, the 'default_queue_policy' will be used.
-  PriorityQueue(
-      const inference::ModelQueuePolicy& default_queue_policy,
-      uint32_t priority_levels, const ModelQueuePolicyMap queue_policy_map);
-
-  // Enqueue a request with priority set to 'priority_level'. If
-  // Status::Success is returned then the queue has taken ownership of
-  // the request object and so 'request' will be nullptr. If
-  // non-success is returned then the caller still retains ownership
-  // of 'request'.
-  Status Enqueue(
-      uint32_t priority_level, std::unique_ptr<InferenceRequest>& request);
-
-  // Dequeue the request at the front of the queue.
-  Status Dequeue(std::unique_ptr<InferenceRequest>* request);
-
-  // Retrieve the requests that are rejected based on the queue policies.
-  void ReleaseRejectedRequests(
-      std::shared_ptr<
-          std::vector<std::deque<std::unique_ptr<InferenceRequest>>>>*
-          requests);
-
-  // Return the number of requests in the queue, rejected requests are
-  // not included.
-  size_t Size() { return size_; }
-
-  // Is the queue is empty? Rejected requests are not included.
-  bool Empty() { return Size() == 0; }
-
-  // Reset the cursor such that it is representing an empty pending batch.
-  void ResetCursor() { pending_cursor_ = Cursor(queues_.begin()); }
-
-  // Record the current cursor. The cursor can be restored to recorded state
-  // by invoking SetCursorToMark(). Note that Enqueue(), Dequeue(), and
-  // ResetCursor() will invalidate the marker, it is the function caller's
-  // responsibility to ensure the marker is valid before calling
-  // SetCursorToMark().
-  void MarkCursor() { current_mark_ = pending_cursor_; }
-
-  // Apply the queue policy and alter the underlying queue accordingly. After
-  // the function returns, the cursor may be at its end to indicate that
-  // there no request after the pending batch.
-  // Returns the total batch size of the newly rejected requests.
-  size_t ApplyPolicyAtCursor();
-
-  // Return the request at the cursor.
-  const std::unique_ptr<InferenceRequest>& RequestAtCursor()
-  {
-    return pending_cursor_.curr_it_->second.At(pending_cursor_.queue_idx_);
-  }
-
-  // Advance the cursor for pending batch. This function will not trigger the
-  // queue policy. No effect if the cursor already reach the end of the queue.
-  void AdvanceCursor();
-
-  // Whether the cursor reaches its end,
-  bool CursorEnd() { return pending_cursor_.pending_batch_count_ == size_; }
-
-  // Restore the cursor state to the marker.
-  void SetCursorToMark() { pending_cursor_ = current_mark_; }
-
-  // Whether the cursor is still valid. The cursor is valid only if the pending
-  // batch is unchanged.
-  bool IsCursorValid();
-
-  // Return the oldest queued time of requests in pending batch.
-  uint64_t OldestEnqueueTime()
-  {
-    return pending_cursor_.pending_batch_oldest_enqueue_time_ns_;
-  }
-
-  // Return the closest timeout of requests in pending batch.
-  uint64_t ClosestTimeout()
-  {
-    return pending_cursor_.pending_batch_closest_timeout_ns_;
-  }
-
-  // Return the number of requests in pending batch.
-  size_t PendingBatchCount() { return pending_cursor_.pending_batch_count_; }
-
- private:
-  class PolicyQueue {
-   public:
-    // Construct a policy queue with default policy, which will behave the same
-    // as regular queue.
-    PolicyQueue()
-        : timeout_action_(inference::ModelQueuePolicy::REJECT),
-          default_timeout_us_(0), allow_timeout_override_(false),
-          max_queue_size_(0)
-    {
-    }
-
-    // Construct a policy queue with given 'policy'.
-    PolicyQueue(const inference::ModelQueuePolicy& policy)
-        : timeout_action_(policy.timeout_action()),
-          default_timeout_us_(policy.default_timeout_microseconds()),
-          allow_timeout_override_(policy.allow_timeout_override()),
-          max_queue_size_(policy.max_queue_size())
-    {
-    }
-
-    // Enqueue a request and set up its timeout accordingly. If
-    // Status::Success is returned then the queue has taken ownership
-    // of the request object and so 'request' will be nullptr. If
-    // non-success is returned then the caller still retains ownership
-    // of 'request'.
-    Status Enqueue(std::unique_ptr<InferenceRequest>& request);
-
-    // Dequeue the request at the front of the queue.
-    Status Dequeue(std::unique_ptr<InferenceRequest>* request);
-
-    // Apply the queue policy to the request at 'idx'.
-    // 'rejected_count' will be incremented by the number of the newly rejected
-    // requets after applying the policy.
-    // 'rejected_batch_size' will be incremented by the total batch size of the
-    // newly rejected requests after applying the policy.
-    // Return true if the 'idx' still points to a request after applying the
-    // policy, false otherwise.
-    bool ApplyPolicy(
-        size_t idx, size_t* rejected_count, size_t* rejected_batch_size);
-
-    // Return the rejected requests held by the queue.
-    void ReleaseRejectedQueue(
-        std::deque<std::unique_ptr<InferenceRequest>>* requests);
-
-    // Return the request at 'idx'.
-    const std::unique_ptr<InferenceRequest>& At(size_t idx) const;
-
-    // Return the timeout timestamp of the request at 'idx', in ns. A value of 0
-    // indicates that the request doesn't specify a timeout.
-    uint64_t TimeoutAt(size_t idx);
-
-    // Return whether the queue is empty, rejected requests are not included.
-    bool Empty() { return Size() == 0; }
-
-    // Return the number of requests in the queue, rejected requests are not
-    // included.
-    size_t Size() { return queue_.size() + delayed_queue_.size(); }
-
-    // Return the number of unexpired requests in the queue
-    size_t UnexpiredSize() { return queue_.size(); }
-
-   private:
-    // Variables that define the policy for the queue
-    const inference::ModelQueuePolicy::TimeoutAction timeout_action_;
-    const uint64_t default_timeout_us_;
-    const bool allow_timeout_override_;
-    const uint32_t max_queue_size_;
-
-    std::deque<uint64_t> timeout_timestamp_ns_;
-    std::deque<std::unique_ptr<InferenceRequest>> queue_;
-    std::deque<std::unique_ptr<InferenceRequest>> delayed_queue_;
-    std::deque<std::unique_ptr<InferenceRequest>> rejected_queue_;
-  };
-  using PriorityQueues = std::map<uint32_t, PolicyQueue>;
-
-  // Cursor for tracking pending batch, the cursor points to the item after
-  // the pending batch.
-  struct Cursor {
-    Cursor() = default;
-    Cursor(PriorityQueues::iterator start_it);
-
-    Cursor(const Cursor& rhs) = default;
-    Cursor& operator=(const Cursor& rhs) = default;
-
-    PriorityQueues::iterator curr_it_;
-    size_t queue_idx_;
-    bool at_delayed_queue_;
-    uint64_t pending_batch_closest_timeout_ns_;
-    uint64_t pending_batch_oldest_enqueue_time_ns_;
-    size_t pending_batch_count_;
-    bool valid_;
-  };
-
-  PriorityQueues queues_;
-  size_t size_;
-
-  // Keep track of the priority level that the first request in the queue
-  // is at to avoid traversing 'queues_'
-  uint32_t front_priority_level_;
-  uint32_t last_priority_level_;
-
-  Cursor pending_cursor_;
-  Cursor current_mark_;
-};
-
-}}  // namespace triton::core
diff --git a/3rdparty/core-r22.12/src/sequence_batch_scheduler.cc b/3rdparty/core-r22.12/src/sequence_batch_scheduler.cc
deleted file mode 100644
index 67f9ded8b8c1f2c3b54f38e21b12ad4898227a84..0000000000000000000000000000000000000000
--- a/3rdparty/core-r22.12/src/sequence_batch_scheduler.cc
+++ /dev/null
@@ -1,1687 +0,0 @@
-// Copyright 2018-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "sequence_batch_scheduler.h"
-
-#ifndef _WIN32
-#include <sys/resource.h>
-#include <sys/syscall.h>
-#include <unistd.h>
-#endif
-#include <algorithm>
-#include "constants.h"
-#include "dynamic_batch_scheduler.h"
-#include "model_config_utils.h"
-#include "server.h"
-#include "triton/common/logging.h"
-
-namespace triton { namespace core {
-
-Status
-SequenceBatchScheduler::Create(
-    TritonModel* model,
-    const std::unordered_map<std::string, bool>& enforce_equal_shape_tensors,
-    std::unique_ptr<Scheduler>* scheduler)
-{
-  std::unique_ptr<SequenceBatchScheduler> sched(new SequenceBatchScheduler());
-
-  // For debugging and testing,
-  const char* dstr = getenv("TRITONSERVER_BACKLOG_DELAY_SCHEDULER");
-  sched->backlog_delay_cnt_ = 0;
-  if (dstr != nullptr) {
-    sched->backlog_delay_cnt_ = atoi(dstr);
-    LOG_INFO << "Delaying scheduler until " << sched->backlog_delay_cnt_
-             << " backlog queued requests...";
-  }
-
-  auto instance_count = model->Instances().size();
-  sched->queue_request_cnts_.resize(instance_count, 0);
-
-  auto& config = model->Config();
-
-  // Max sequence idle...
-  sched->max_sequence_idle_microseconds_ =
-      config.sequence_batching().max_sequence_idle_microseconds();
-
-  sched->max_batch_size_ = config.max_batch_size();
-
-  // Implicit States
-  auto& states = config.sequence_batching().state();
-
-  for (const inference::ModelSequenceBatching_State& state : states) {
-    sched->state_output_config_map_.insert({state.output_name(), state});
-
-    if (state.initial_state_size() > 1) {
-      return Status(
-          Status::Code::INVALID_ARG,
-          std::string(
-              std::string("initial_state field for state input '") +
-              state.input_name() +
-              "' must contain exactly one or zero element. Found '" +
-              std::to_string(state.initial_state_size()) + "' elements."));
-    }
-
-    // If the model configuration has initial_state field.
-    if (state.initial_state_size() == 1) {
-      auto& initial_state = state.initial_state(0);
-      RETURN_IF_ERROR(
-          sched->GenerateInitialStateData(initial_state, state, model));
-    }
-  }
-
-  // Get the number of candidate sequence slots to allow for each
-  // runner. This is at least 1 even if the model doesn't support
-  // batching.
-  const size_t model_batch_size = std::max(1, config.max_batch_size());
-  size_t seq_slot_cnt = model_batch_size;
-  if (config.sequence_batching().has_oldest()) {
-    seq_slot_cnt =
-        config.sequence_batching().oldest().max_candidate_sequences();
-  }
-
-  // Based on the model configuration create input tensors for control
-  // signals indicating sequence start, sequence continue, and
-  // sequence not ready.
-  std::shared_ptr<ControlInputs> start;
-  std::shared_ptr<ControlInputs> end;
-  std::shared_ptr<ControlInputs> startend;
-  std::shared_ptr<ControlInputs> cont;
-  std::shared_ptr<ControlInputs> notready;
-  RETURN_IF_ERROR(sched->CreateBooleanControlTensors(
-      config, &start, &end, &startend, &cont, &notready));
-
-  bool has_optional_input = false;
-  for (const auto& input : config.input()) {
-    if (input.optional()) {
-      has_optional_input = true;
-      break;
-    }
-  }
-
-  // Create one SequenceBatch object for each requested runner. The
-  // SequenceBatch object has a thread that manages the batch of
-  // requests.
-  const auto& instances = model->Instances();
-  uint32_t index = 0;
-  for (const auto& instance : instances) {
-    bool init_state;
-    std::unique_ptr<SequenceBatch> sb;
-
-    // Create the SequenceBatch derivative that handles the requested
-    // scheduling strategy.
-    if (config.sequence_batching().has_oldest()) {
-      sb.reset(new OldestSequenceBatch(
-          sched.get(), index, seq_slot_cnt, instance.get(),
-          enforce_equal_shape_tensors, has_optional_input, start, end, startend,
-          cont, notready, &init_state));
-    } else {
-      sb.reset(new DirectSequenceBatch(
-          sched.get(), index, seq_slot_cnt, instance.get(),
-          enforce_equal_shape_tensors, has_optional_input, start, end, startend,
-          cont, notready, &init_state));
-    }
-
-    if (init_state) {
-      sched->batchers_.push_back(std::move(sb));
-      // All sequence slots in the batcher are initially ready for a
-      // new sequence.
-      for (size_t b = 0; b < seq_slot_cnt; ++b) {
-        sched->ready_batcher_seq_slots_.push(
-            SequenceBatchScheduler::BatcherSequenceSlot(index, b));
-      }
-    }
-    ++index;
-  }
-  if (sched->batchers_.empty()) {
-    return Status(
-        Status::Code::INTERNAL,
-        "Initialization failed for all sequence-batch scheduler threads");
-  }
-
-  // Create a reaper thread that watches for idle sequences. Run the
-  // reaper a lower priority.
-  SequenceBatchScheduler* raw = sched.release();
-
-  raw->reaper_thread_exit_ = false;
-  raw->reaper_thread_.reset(
-      new std::thread([raw]() { raw->ReaperThread(10 /* nice */); }));
-
-  scheduler->reset(raw);
-
-  return Status::Success;
-}
-
-Status
-SequenceBatchScheduler::GenerateInitialStateData(
-    const inference::ModelSequenceBatching_InitialState& initial_state,
-    const inference::ModelSequenceBatching_State& state, TritonModel* model)
-{
-  if (initial_state.data_type() != state.data_type()) {
-    return Status(
-        Status::Code::INVALID_ARG,
-        std::string("The data type used for 'initial_state' field of state '") +
-            state.input_name() + "' does not match the state data type.");
-  }
-
-  if (initial_state.name().size() == 0) {
-    return Status(
-        Status::Code::INVALID_ARG,
-        std::string("Field 'name' must be set when using initial_state for "
-                    "state input '") +
-            state.input_name() + "'.");
-  }
-
-  auto initial_state_itr = initial_state_.find(state.input_name());
-  if (initial_state_itr != initial_state_.end()) {
-    return Status(
-        Status::Code::INVALID_ARG, std::string("State input name '") +
-                                       state.input_name() +
-                                       "' specified more than once.");
-  }
-
-  if (initial_state.dims().size() != state.dims().size()) {
-    return Status(
-        Status::Code::INVALID_ARG,
-        std::string(
-            "Number of dimensions in 'initial_state' doesn't match the size of"
-            " 'state' dimensions for state input '") +
-            state.input_name() + "'. " +
-            std::to_string(initial_state.dims().size()) +
-            " != " + std::to_string(state.dims().size()));
-  }
-
-  // Check the dimensions to make sure it doesn't have variable-sized dims and
-  // matches the state description.
-  auto initial_state_dim = initial_state.dims().begin();
-  auto state_dim = state.dims().begin();
-  for (; initial_state_dim != initial_state.dims().end();
-       initial_state_dim++, state_dim++) {
-    if (*initial_state_dim == -1) {
-      return Status(
-          Status::Code::INVALID_ARG,
-          std::string("'initial_state' field for state input name '") +
-              state.input_name() + "' contains variable dimensions.");
-    } else {
-      if (*state_dim != -1 && *initial_state_dim != *state_dim) {
-        return Status(
-            Status::Code::INVALID_ARG,
-            std::string("'initial_state' dim for input name '") +
-                state.input_name() +
-                "' doesn't match 'state' dim description. " +
-                std::to_string(*initial_state_dim) +
-                " != " + std::to_string(*state_dim));
-      }
-    }
-  }
-
-  const auto& initial_state_pair = initial_state_.emplace(
-      std::piecewise_construct, std::forward_as_tuple(state.input_name()),
-      std::forward_as_tuple(initial_state.name()));
-  auto& initial_state_data = initial_state_pair.first->second;
-
-  // Calculate total memory byte size
-  auto element_count = triton::common::GetElementCount(initial_state.dims());
-  size_t dtype_byte_size =
-      triton::common::GetDataTypeByteSize(initial_state.data_type());
-  size_t total_byte_size = element_count * dtype_byte_size;
-
-  // Custom handling for TYPE_BYTES
-  if (dtype_byte_size == 0) {
-    total_byte_size = sizeof(int32_t) * element_count;
-  }
-
-  switch (initial_state.state_data_case()) {
-    case inference::ModelSequenceBatching_InitialState::StateDataCase::
-        kZeroData: {
-      initial_state_data.data_ = std::make_shared<AllocatedMemory>(
-          total_byte_size, TRITONSERVER_MEMORY_CPU /* memory_type */,
-          0 /* memory_type_id */);
-
-      TRITONSERVER_MemoryType memory_type;
-      int64_t memory_type_id;
-      char* data_ptr = initial_state_data.data_->MutableBuffer(
-          &memory_type, &memory_type_id);
-      memset(data_ptr, 0, total_byte_size);
-      break;
-    }
-    case inference::ModelSequenceBatching_InitialState::StateDataCase::
-        kDataFile: {
-      std::string file_input;
-      RETURN_IF_ERROR(ReadTextFile(
-          JoinPath({model->LocalizedModelPath(), kInitialStateFolder,
-                    (initial_state.data_file())}),
-          &file_input));
-      if (initial_state.data_type() == inference::DataType::TYPE_STRING) {
-        total_byte_size = file_input.size();
-      } else if (total_byte_size > file_input.size()) {
-        return Status(
-            Status::Code::INVALID_ARG,
-            "initial_state setting expects " + std::to_string(total_byte_size) +
-                " bytes, but the data "
-                "provided from " +
-                initial_state.data_file() + "only has " +
-                std::to_string(file_input.size()) + " bytes.");
-      }
-
-      TRITONSERVER_MemoryType memory_type;
-      int64_t memory_type_id;
-
-      initial_state_data.data_ = std::make_shared<AllocatedMemory>(
-          total_byte_size, TRITONSERVER_MEMORY_CPU /* memory_type */,
-          0 /* memory_type_id */);
-      char* data_ptr = initial_state_data.data_->MutableBuffer(
-          &memory_type, &memory_type_id);
-      memcpy(data_ptr, file_input.data(), total_byte_size);
-
-      break;
-    }
-    default:
-      return Status(
-          Status::Code::INVALID_ARG,
-          std::string("initial_state setting expects state'") +
-              state.input_name() + "' to have state_data set");
-  }
-
-  return Status::Success;
-}
-
-SequenceBatchScheduler::~SequenceBatchScheduler()
-{
-  // Signal the reaper thread to exit...
-  {
-    std::unique_lock<std::mutex> lock(mu_);
-    reaper_thread_exit_ = true;
-  }
-
-  reaper_cv_.notify_one();
-  if ((reaper_thread_ != nullptr) && reaper_thread_->joinable()) {
-    reaper_thread_->join();
-  }
-
-  // Release 'batchers_' before other member variables because 'batchers_'
-  // can access 'this' and we need to make sure the member variables live
-  // longer than 'batchers_'
-  batchers_.clear();
-}
-
-
-namespace {
-
-Status
-GetBooleanOverrideInputs(
-    const std::string& tensor_name, const bool support_batching,
-    const inference::DataType tensor_datatype, const float fp32_false_value,
-    const float fp32_true_value, const int32_t int32_false_value,
-    const int32_t int32_true_value, const bool bool_false_value,
-    const bool bool_true_value,
-    std::shared_ptr<InferenceRequest::Input>* true_override,
-    std::shared_ptr<InferenceRequest::Input>* false_override)
-{
-  TRITONSERVER_MemoryType memory_type;
-  int64_t memory_type_id;
-
-  const std::vector<int64_t> tensor_shape{1};
-  std::vector<int64_t> tensor_shape_with_batch_dim{1};
-  if (support_batching) {
-    tensor_shape_with_batch_dim.push_back(1);
-  }
-  const size_t size_p = triton::common::GetDataTypeByteSize(tensor_datatype);
-
-  auto true_p =
-      std::make_shared<AllocatedMemory>(size_p, TRITONSERVER_MEMORY_CPU, 0);
-  char* true_p_ptr = true_p->MutableBuffer(&memory_type, &memory_type_id);
-  if ((true_p_ptr == nullptr) ||
-      ((memory_type != TRITONSERVER_MEMORY_CPU) &&
-       (memory_type != TRITONSERVER_MEMORY_CPU_PINNED)) ||
-      (memory_type_id != 0)) {
-    return Status(
-        Status::Code::INTERNAL,
-        "failed to allocate sequence control signal in CPU memory");
-  }
-
-  auto false_p =
-      std::make_shared<AllocatedMemory>(size_p, TRITONSERVER_MEMORY_CPU, 0);
-  char* false_p_ptr = false_p->MutableBuffer(&memory_type, &memory_type_id);
-  if ((false_p_ptr == nullptr) ||
-      ((memory_type != TRITONSERVER_MEMORY_CPU) &&
-       (memory_type != TRITONSERVER_MEMORY_CPU_PINNED)) ||
-      (memory_type_id != 0)) {
-    return Status(
-        Status::Code::INTERNAL,
-        "failed to allocate sequence control signal in CPU memory");
-  }
-
-  if (tensor_datatype == inference::DataType::TYPE_INT32) {
-    *(reinterpret_cast<int32_t*>(true_p_ptr)) = int32_true_value;
-    *(reinterpret_cast<int32_t*>(false_p_ptr)) = int32_false_value;
-  } else if (tensor_datatype == inference::DataType::TYPE_FP32) {
-    *(reinterpret_cast<float*>(true_p_ptr)) = fp32_true_value;
-    *(reinterpret_cast<float*>(false_p_ptr)) = fp32_false_value;
-  } else {
-    *(reinterpret_cast<bool*>(true_p_ptr)) = bool_true_value;
-    *(reinterpret_cast<bool*>(false_p_ptr)) = bool_false_value;
-  }
-
-  auto ltrue_override = std::make_shared<InferenceRequest::Input>(
-      tensor_name, tensor_datatype, tensor_shape);
-  *ltrue_override->MutableShape() = ltrue_override->OriginalShape();
-  *ltrue_override->MutableShapeWithBatchDim() = tensor_shape_with_batch_dim;
-  RETURN_IF_ERROR(ltrue_override->SetData(true_p));
-
-  auto lfalse_override = std::make_shared<InferenceRequest::Input>(
-      tensor_name, tensor_datatype, tensor_shape);
-  *lfalse_override->MutableShape() = lfalse_override->OriginalShape();
-  *lfalse_override->MutableShapeWithBatchDim() = tensor_shape_with_batch_dim;
-  RETURN_IF_ERROR(lfalse_override->SetData(false_p));
-
-  *true_override = std::move(ltrue_override);
-  *false_override = std::move(lfalse_override);
-
-  return Status::Success;
-}
-
-}  // namespace
-
-Status
-SequenceBatchScheduler::CreateBooleanControlTensors(
-    const inference::ModelConfig& config,
-    std::shared_ptr<ControlInputs>* start_input_overrides,
-    std::shared_ptr<ControlInputs>* end_input_overrides,
-    std::shared_ptr<ControlInputs>* startend_input_overrides,
-    std::shared_ptr<ControlInputs>* continue_input_overrides,
-    std::shared_ptr<ControlInputs>* notready_input_overrides)
-{
-  // Currently only batch-size 1 requests are supported so only need
-  // to provide control vectors of that size.
-  *start_input_overrides = std::make_shared<ControlInputs>();
-  *end_input_overrides = std::make_shared<ControlInputs>();
-  *startend_input_overrides = std::make_shared<ControlInputs>();
-  *continue_input_overrides = std::make_shared<ControlInputs>();
-  *notready_input_overrides = std::make_shared<ControlInputs>();
-
-  std::string tensor_name;
-  inference::DataType tensor_datatype;
-  int32_t int32_false_value, int32_true_value;
-  float fp32_false_value, fp32_true_value;
-  bool bool_false_value, bool_true_value;
-
-  // START, optional
-  {
-    RETURN_IF_ERROR(GetBooleanSequenceControlProperties(
-        config.sequence_batching(), config.name(),
-        inference::ModelSequenceBatching::Control::CONTROL_SEQUENCE_START,
-        false /* required */, &tensor_name, &tensor_datatype, &fp32_false_value,
-        &fp32_true_value, &int32_false_value, &int32_true_value,
-        &bool_false_value, &bool_true_value));
-    if (!tensor_name.empty()) {
-      std::shared_ptr<InferenceRequest::Input> true_override;
-      std::shared_ptr<InferenceRequest::Input> false_override;
-
-      RETURN_IF_ERROR(GetBooleanOverrideInputs(
-          tensor_name, config.max_batch_size() != 0, tensor_datatype,
-          fp32_false_value, fp32_true_value, int32_false_value,
-          int32_true_value, bool_false_value, bool_true_value, &true_override,
-          &false_override));
-
-      (*start_input_overrides)->emplace_back(true_override);
-      (*end_input_overrides)->emplace_back(false_override);
-      (*startend_input_overrides)->emplace_back(true_override);
-      (*continue_input_overrides)->emplace_back(false_override);
-      (*notready_input_overrides)->emplace_back(false_override);
-    }
-  }
-
-  // END, optional
-  {
-    RETURN_IF_ERROR(GetBooleanSequenceControlProperties(
-        config.sequence_batching(), config.name(),
-        inference::ModelSequenceBatching::Control::CONTROL_SEQUENCE_END,
-        false /* required */, &tensor_name, &tensor_datatype, &fp32_false_value,
-        &fp32_true_value, &int32_false_value, &int32_true_value,
-        &bool_false_value, &bool_true_value));
-    if (!tensor_name.empty()) {
-      std::shared_ptr<InferenceRequest::Input> true_override;
-      std::shared_ptr<InferenceRequest::Input> false_override;
-
-      RETURN_IF_ERROR(GetBooleanOverrideInputs(
-          tensor_name, config.max_batch_size() != 0, tensor_datatype,
-          fp32_false_value, fp32_true_value, int32_false_value,
-          int32_true_value, bool_false_value, bool_true_value, &true_override,
-          &false_override));
-
-      (*start_input_overrides)->emplace_back(false_override);
-      (*end_input_overrides)->emplace_back(true_override);
-      (*startend_input_overrides)->emplace_back(true_override);
-      (*continue_input_overrides)->emplace_back(false_override);
-      (*notready_input_overrides)->emplace_back(false_override);
-    }
-  }
-
-  // READY, optional
-  {
-    RETURN_IF_ERROR(GetBooleanSequenceControlProperties(
-        config.sequence_batching(), config.name(),
-        inference::ModelSequenceBatching::Control::CONTROL_SEQUENCE_READY,
-        false /* required */, &tensor_name, &tensor_datatype, &fp32_false_value,
-        &fp32_true_value, &int32_false_value, &int32_true_value,
-        &bool_false_value, &bool_true_value));
-    if (!tensor_name.empty()) {
-      std::shared_ptr<InferenceRequest::Input> true_override;
-      std::shared_ptr<InferenceRequest::Input> false_override;
-
-      RETURN_IF_ERROR(GetBooleanOverrideInputs(
-          tensor_name, config.max_batch_size() != 0, tensor_datatype,
-          fp32_false_value, fp32_true_value, int32_false_value,
-          int32_true_value, bool_false_value, bool_true_value, &true_override,
-          &false_override));
-
-      (*start_input_overrides)->emplace_back(true_override);
-      (*end_input_overrides)->emplace_back(true_override);
-      (*startend_input_overrides)->emplace_back(true_override);
-      (*continue_input_overrides)->emplace_back(true_override);
-      (*notready_input_overrides)->emplace_back(false_override);
-    }
-  }
-
-  return Status::Success;
-}
-
-Status
-SequenceBatchScheduler::Enqueue(std::unique_ptr<InferenceRequest>& irequest)
-{
-  // Queue timer starts at the beginning of the queueing and
-  // scheduling process
-  irequest->CaptureQueueStartNs();
-  INFER_TRACE_ACTIVITY(
-      irequest->Trace(), TRITONSERVER_TRACE_QUEUE_START,
-      irequest->QueueStartNs());
-
-  // Record time at the beginning of the batcher queueing
-  irequest->CaptureBatcherStartNs();
-
-  // For now the request must have batch-size 1 since the sequence
-  // batcher does not yet support requests that are statically
-  // batched.
-  if (irequest->BatchSize() > 1) {
-    return Status(
-        Status::Code::INVALID_ARG,
-        "inference request to model '" + irequest->ModelName() +
-            "' must specify batch-size 1 due to requirements of sequence "
-            "batcher");
-  }
-
-  // A request must have a correlation ID to be processed correctly by
-  // this scheduler. A value of 0 (zero) or "" (empty) indicates that the
-  // request doesn't have a correlation ID.
-  const InferenceRequest::SequenceId& correlation_id =
-      irequest->CorrelationId();
-  if (!correlation_id.InSequence()) {
-    return Status(
-        Status::Code::INVALID_ARG,
-        "inference request to model '" + irequest->ModelName() +
-            "' must specify a non-zero or non-empty correlation ID");
-  }
-
-  BatcherSequenceSlot* target = nullptr;
-
-  const bool seq_start =
-      ((irequest->Flags() & TRITONSERVER_REQUEST_FLAG_SEQUENCE_START) != 0);
-  const bool seq_end =
-      ((irequest->Flags() & TRITONSERVER_REQUEST_FLAG_SEQUENCE_END) != 0);
-
-  // Check if the request is one of the in-flight sequence (not starting new
-  // sequence), we consider sequences in backlog as also in-flight.
-  if (stop_ && seq_start) {
-    return Status(
-        Status::Code::UNAVAILABLE,
-        "Server is stopping, scheduler for model has stopped accepting new "
-        "inference requests");
-  }
-
-  std::unique_lock<std::mutex> lock(mu_);
-
-  auto sb_itr = sequence_to_batcherseqslot_map_.find(correlation_id);
-  auto bl_itr = sequence_to_backlog_map_.find(correlation_id);
-
-  // If this request is not starting a new sequence its correlation ID
-  // should already be known with a target in either a sequence slot
-  // or in the backlog. If it doesn't then the sequence wasn't started
-  // correctly or there has been a correlation ID conflict. In either
-  // case fail this request.
-  if (!seq_start && (sb_itr == sequence_to_batcherseqslot_map_.end()) &&
-      (bl_itr == sequence_to_backlog_map_.end())) {
-    std::string correlation_id_str{""};
-    if (correlation_id.Type() ==
-        InferenceRequest::SequenceId::DataType::STRING) {
-      correlation_id_str = correlation_id.StringValue();
-    } else if (
-        correlation_id.Type() ==
-        InferenceRequest::SequenceId::DataType::UINT64) {
-      correlation_id_str = std::to_string(correlation_id.UnsignedIntValue());
-    }
-    return Status(
-        Status::Code::INVALID_ARG,
-        "inference request for sequence " + correlation_id_str + " to model '" +
-            irequest->ModelName() +
-            "' must specify the START flag on the first request of the "
-            "sequence");
-  }
-
-  // Record the timestamp of this request for the correlation ID. The
-  // reaper thread will check to make sure that
-  // max_sequence_idle_microseconds value is not exceed for any
-  // sequence, and if it is it will release the sequence slot (if any)
-  // allocated to that sequence.
-  {
-    uint64_t now_us = std::chrono::duration_cast<std::chrono::microseconds>(
-                          std::chrono::steady_clock::now().time_since_epoch())
-                          .count();
-    correlation_id_timestamps_[correlation_id] = now_us;
-  }
-
-  // If this request starts a new sequence but the correlation ID
-  // already has an in-progress sequence then that previous sequence
-  // did not end correctly, or there is a correlation ID conflict. In
-  // this case we continue the new sequence (in either backlog or
-  // sequence slot). It is ok for a backlog/slot to have multiple
-  // starts... as long as it has a single end. The previous sequence
-  // that was not correctly ended will have its existing requests
-  // handled and then the new sequence will start.
-  if (seq_start && ((sb_itr != sequence_to_batcherseqslot_map_.end()) ||
-                    (bl_itr != sequence_to_backlog_map_.end()))) {
-    LOG_WARNING
-        << "sequence " << correlation_id << " for model '"
-        << irequest->ModelName()
-        << "' has a conflict. The previous sequence did not end before this "
-           "sequence start. Previous sequence will be terminated early.";
-  }
-
-  // This request already has an assigned slot...
-  if (sb_itr != sequence_to_batcherseqslot_map_.end()) {
-    target = &sb_itr->second;
-  }
-  // This request already has a queue in the backlog...
-  else if (bl_itr != sequence_to_backlog_map_.end()) {
-    LOG_VERBOSE(1) << "Enqueuing CORRID " << correlation_id
-                   << " into existing backlog: " << irequest->ModelName();
-
-    bl_itr->second->emplace_back(std::move(irequest));
-
-    // If the sequence is ending then forget correlation ID
-    // connection to this backlog queue. If another sequence starts
-    // with the same correlation ID it will be collected in another
-    // backlog queue.
-    if (seq_end) {
-      sequence_to_backlog_map_.erase(bl_itr);
-    }
-    return Status::Success;
-  }
-  // This request does not have an assigned backlog or sequence
-  // slot. By the above checks it must be starting. If there is a free
-  // sequence slot available then assign this sequence to that slot...
-  else if (!ready_batcher_seq_slots_.empty()) {
-    target = &sequence_to_batcherseqslot_map_[correlation_id];
-    *target = ready_batcher_seq_slots_.top();
-    ready_batcher_seq_slots_.pop();
-  }
-  // Last option is to assign this request to the backlog...
-  else {
-    LOG_VERBOSE(1) << "Enqueuing CORRID " << correlation_id
-                   << " into new backlog: " << irequest->ModelName();
-
-    auto backlog =
-        std::make_shared<std::deque<std::unique_ptr<InferenceRequest>>>();
-    backlog_queues_.push_back(backlog);
-    backlog->emplace_back(std::move(irequest));
-    if (!seq_end) {
-      sequence_to_backlog_map_[correlation_id] = std::move(backlog);
-    }
-    return Status::Success;
-  }
-
-  // Need to grab the target contents before the erase below since
-  // that can free it.
-  const size_t batcher_idx = target->batcher_idx_;
-  const uint32_t seq_slot = target->seq_slot_;
-
-  // At this point the request has been assigned to a sequence
-  // slot. If the sequence is ending then stop tracking the
-  // correlation.
-  if (seq_end) {
-    sequence_to_batcherseqslot_map_.erase(correlation_id);
-  }
-
-  // Enqueue request into batcher and sequence slot.  Don't hold the
-  // lock while enqueuing in a specific batcher.
-  lock.unlock();
-
-  LOG_VERBOSE(1) << "Enqueuing CORRID " << correlation_id << " into batcher "
-                 << batcher_idx << ", sequence slot " << seq_slot << ": "
-                 << irequest->ModelName();
-
-  batchers_[batcher_idx]->Enqueue(seq_slot, correlation_id, irequest);
-
-  return Status::Success;
-}
-
-InferenceRequest::SequenceId
-SequenceBatchScheduler::ReleaseSequenceSlot(
-    const BatcherSequenceSlot& batcher_seq_slot,
-    std::deque<std::unique_ptr<InferenceRequest>>* requests)
-{
-  std::unique_lock<std::mutex> lock(mu_);
-
-  // If there is a backlogged sequence and it is requested, return it
-  // so that it can use the newly available sequence slot.
-  if (!backlog_queues_.empty()) {
-    auto& backlog = backlog_queues_.front();
-    *requests = std::move(*backlog);
-    backlog_queues_.pop_front();
-    if (!requests->empty()) {  // should never be empty...
-      const auto& irequest = requests->back();
-      const InferenceRequest::SequenceId& correlation_id =
-          irequest->CorrelationId();
-
-      // If the last queue entry is not an END request then the entire
-      // sequence is not contained in the backlog. In that case must
-      // update backlog and batcherseqslot maps so that future
-      // requests get directed to the batcher sequence-slot instead of
-      // the backlog.
-      const bool seq_end =
-          ((irequest->Flags() & TRITONSERVER_REQUEST_FLAG_SEQUENCE_END) != 0);
-      if (!seq_end) {
-        // Since the correlation ID is being actively collected in the
-        // backlog, there should not be any in-flight sequences with
-        // that same correlation ID that have an assigned slot.
-        if (sequence_to_batcherseqslot_map_.find(correlation_id) !=
-            sequence_to_batcherseqslot_map_.end()) {
-          LOG_ERROR << irequest->LogRequest() << "internal: backlog sequence "
-                    << correlation_id
-                    << " conflicts with in-flight sequence for model '"
-                    << irequest->ModelName() << "'";
-        }
-
-        sequence_to_backlog_map_.erase(correlation_id);
-        sequence_to_batcherseqslot_map_[correlation_id] = batcher_seq_slot;
-      }
-
-      LOG_VERBOSE(1) << irequest->LogRequest() << "CORRID " << correlation_id
-                     << " reusing batcher " << batcher_seq_slot.batcher_idx_
-                     << ", slot " << batcher_seq_slot.seq_slot_ << ": "
-                     << irequest->ModelName();
-      return correlation_id;
-    }
-  }
-
-  // There is no backlogged sequence so just release the batch slot
-  LOG_VERBOSE(1) << "Freeing slot in batcher " << batcher_seq_slot.batcher_idx_
-                 << ", slot " << batcher_seq_slot.seq_slot_;
-
-  ready_batcher_seq_slots_.push(batcher_seq_slot);
-  return InferenceRequest::SequenceId();
-}
-
-bool
-SequenceBatchScheduler::DelayScheduler(
-    const uint32_t batcher_idx, const size_t cnt, const size_t total)
-{
-  std::unique_lock<std::mutex> lock(mu_);
-  queue_request_cnts_[batcher_idx] = cnt;
-
-  size_t seen = 0;
-  for (auto c : queue_request_cnts_) {
-    seen += c;
-  }
-
-  if (seen < total) {
-    return true;
-  }
-
-  if (backlog_delay_cnt_ > 0) {
-    size_t backlog_seen = 0;
-    for (const auto& q : backlog_queues_) {
-      backlog_seen += q->size();
-    }
-
-    if (backlog_seen < backlog_delay_cnt_) {
-      return true;
-    }
-  }
-
-  return false;
-}
-
-void
-SequenceBatchScheduler::ReaperThread(const int nice)
-{
-#ifndef _WIN32
-  if (setpriority(PRIO_PROCESS, syscall(SYS_gettid), nice) == 0) {
-    LOG_VERBOSE(1) << "Starting sequence-batch reaper thread at nice " << nice
-                   << "...";
-  } else {
-    LOG_VERBOSE(1) << "Starting sequence-batch reaper thread at default nice "
-                      "(requested nice "
-                   << nice << " failed)...";
-  }
-#else
-  LOG_VERBOSE(1) << "Starting sequence-batch reaper thread at default nice...";
-#endif
-
-  const uint64_t backlog_idle_wait_microseconds = 50 * 1000;
-
-  while (!reaper_thread_exit_) {
-    uint64_t wait_microseconds = max_sequence_idle_microseconds_;
-    BatcherSequenceSlotMap force_end_sequences;
-
-    {
-      std::unique_lock<std::mutex> lock(mu_);
-
-      uint64_t now_us = std::chrono::duration_cast<std::chrono::microseconds>(
-                            std::chrono::steady_clock::now().time_since_epoch())
-                            .count();
-
-      for (auto cid_itr = correlation_id_timestamps_.cbegin();
-           cid_itr != correlation_id_timestamps_.cend();) {
-        int64_t remaining_microseconds =
-            (int64_t)max_sequence_idle_microseconds_ -
-            (now_us - cid_itr->second);
-        if (remaining_microseconds > 0) {
-          wait_microseconds =
-              std::min(wait_microseconds, (uint64_t)remaining_microseconds + 1);
-          ++cid_itr;
-          continue;
-        }
-
-        const InferenceRequest::SequenceId& idle_correlation_id =
-            cid_itr->first;
-        LOG_VERBOSE(1) << "Reaper: CORRID " << idle_correlation_id
-                       << ": max sequence idle exceeded";
-
-        auto idle_sb_itr =
-            sequence_to_batcherseqslot_map_.find(idle_correlation_id);
-
-        // If the idle correlation ID has an assigned sequence slot,
-        // then release that assignment so it becomes available for
-        // another sequence. Release is done by enqueuing and must be
-        // done outside the lock, so just collect needed info here.
-        if (idle_sb_itr != sequence_to_batcherseqslot_map_.end()) {
-          force_end_sequences[idle_correlation_id] = idle_sb_itr->second;
-
-          sequence_to_batcherseqslot_map_.erase(idle_correlation_id);
-          cid_itr = correlation_id_timestamps_.erase(cid_itr);
-        } else {
-          // If the idle correlation ID is in the backlog, then just
-          // need to increase the timeout so that we revisit it again in
-          // the future to check if it is assigned to a sequence slot.
-          auto idle_bl_itr = sequence_to_backlog_map_.find(idle_correlation_id);
-          if (idle_bl_itr != sequence_to_backlog_map_.end()) {
-            LOG_VERBOSE(1) << "Reaper: found idle CORRID "
-                           << idle_correlation_id;
-            wait_microseconds =
-                std::min(wait_microseconds, backlog_idle_wait_microseconds);
-            ++cid_itr;
-          } else {
-            LOG_VERBOSE(1) << "Reaper: ignoring stale idle CORRID "
-                           << idle_correlation_id;
-            cid_itr = correlation_id_timestamps_.erase(cid_itr);
-          }
-        }
-      }
-    }
-
-    // Enqueue force-ends outside of the lock.
-    for (const auto& pr : force_end_sequences) {
-      const InferenceRequest::SequenceId& idle_correlation_id = pr.first;
-      const size_t batcher_idx = pr.second.batcher_idx_;
-      const uint32_t seq_slot = pr.second.seq_slot_;
-
-      LOG_VERBOSE(1) << "Reaper: force-ending CORRID " << idle_correlation_id
-                     << " in batcher " << batcher_idx << ", slot " << seq_slot;
-
-      // A slot assignment is released by enqueuing a request with a
-      // null request. The scheduler thread will interpret the null
-      // request as meaning it should release the sequence slot but
-      // otherwise do nothing with the request.
-      std::unique_ptr<InferenceRequest> null_request;
-      batchers_[batcher_idx]->Enqueue(
-          seq_slot, idle_correlation_id, null_request);
-    }
-
-    // Wait until the next idle timeout needs to be checked
-    if (wait_microseconds > 0) {
-      std::unique_lock<std::mutex> lock(mu_);
-      LOG_VERBOSE(2) << "Reaper: sleeping for " << wait_microseconds << "us...";
-      std::chrono::microseconds wait_timeout(wait_microseconds);
-      reaper_cv_.wait_for(lock, wait_timeout);
-    }
-  }
-
-  LOG_VERBOSE(1) << "Stopping sequence-batch reaper thread...";
-}
-
-SequenceBatch::SequenceBatch(
-    SequenceBatchScheduler* base, const uint32_t batcher_idx,
-    const size_t seq_slot_cnt,
-    const std::unordered_map<std::string, bool>& enforce_equal_shape_tensors,
-    const bool has_optional_input,
-    const std::shared_ptr<SequenceBatchScheduler::ControlInputs>&
-        start_input_overrides,
-    const std::shared_ptr<SequenceBatchScheduler::ControlInputs>&
-        end_input_overrides,
-    const std::shared_ptr<SequenceBatchScheduler::ControlInputs>&
-        startend_input_overrides,
-    const std::shared_ptr<SequenceBatchScheduler::ControlInputs>&
-        continue_input_overrides,
-    const std::shared_ptr<SequenceBatchScheduler::ControlInputs>&
-        notready_input_overrides)
-    : base_(base), batcher_idx_(batcher_idx), seq_slot_cnt_(seq_slot_cnt),
-      enforce_equal_shape_tensors_(enforce_equal_shape_tensors),
-      has_optional_input_(has_optional_input),
-      start_input_overrides_(start_input_overrides),
-      end_input_overrides_(end_input_overrides),
-      startend_input_overrides_(startend_input_overrides),
-      continue_input_overrides_(continue_input_overrides),
-      notready_input_overrides_(notready_input_overrides),
-      sequence_states_(seq_slot_cnt)
-{
-}
-
-bool
-SequenceBatch::CreateCorrelationIDControl(const inference::ModelConfig& config)
-{
-  // If model wants CORRID control then get the name of the input
-  // tensor and initialize the override structure for each sequence
-  // slot that is used to communicate the correlation ID.
-  std::string correlation_id_tensor_name;
-  inference::DataType correlation_id_datatype;
-  Status corrid_status = GetTypedSequenceControlProperties(
-      config.sequence_batching(), config.name(),
-      inference::ModelSequenceBatching::Control::CONTROL_SEQUENCE_CORRID,
-      false /* required */, &correlation_id_tensor_name,
-      &correlation_id_datatype);
-  if (!corrid_status.IsOk()) {
-    LOG_ERROR << "failed validating CORRID control for sequence-batch "
-                 "scheduler thread "
-              << batcher_idx_ << ": " << corrid_status.Message();
-    return false;
-  }
-
-  if (!correlation_id_tensor_name.empty()) {
-    if ((correlation_id_datatype != inference::DataType::TYPE_UINT64) &&
-        (correlation_id_datatype != inference::DataType::TYPE_INT64) &&
-        (correlation_id_datatype != inference::DataType::TYPE_UINT32) &&
-        (correlation_id_datatype != inference::DataType::TYPE_INT32) &&
-        (correlation_id_datatype != inference::DataType::TYPE_STRING)) {
-      LOG_ERROR << "unexpected control data type, expected TYPE_UINT64, "
-                   "TYPE_INT64, TYPE_UINT32, TYPE_INT32, or TYPE_STRING for "
-                << inference::ModelSequenceBatching_Control_Kind_Name(
-                       inference::ModelSequenceBatching::Control::
-                           CONTROL_SEQUENCE_CORRID)
-                << " for " << config.name();
-      return false;
-    }
-
-    const std::vector<int64_t> tensor_shape{1};
-    std::vector<int64_t> tensor_shape_with_batch_dim{1};
-    if (config.max_batch_size() != 0) {
-      tensor_shape_with_batch_dim.push_back(1);
-    }
-
-    auto override = std::make_shared<InferenceRequest::Input>(
-        correlation_id_tensor_name, correlation_id_datatype, tensor_shape);
-    *override->MutableShape() = override->OriginalShape();
-    *override->MutableShapeWithBatchDim() = tensor_shape_with_batch_dim;
-
-    seq_slot_corrid_override_ = std::move(override);
-  }
-
-  return true;
-}
-
-void
-SequenceBatch::SetControlTensors(
-    std::unique_ptr<InferenceRequest>& irequest, const int32_t seq_slot,
-    const InferenceRequest::SequenceId& corrid, const bool not_ready)
-{
-  const SequenceBatchScheduler::ControlInputs* controls;
-
-  // Set the start, end, and ready control tensors appropriately...
-  if (not_ready) {
-    controls = notready_input_overrides_.get();
-  } else if (
-      (irequest->Flags() & (TRITONSERVER_REQUEST_FLAG_SEQUENCE_START |
-                            TRITONSERVER_REQUEST_FLAG_SEQUENCE_END)) ==
-      (TRITONSERVER_REQUEST_FLAG_SEQUENCE_START |
-       TRITONSERVER_REQUEST_FLAG_SEQUENCE_END)) {
-    controls = startend_input_overrides_.get();
-  } else if (
-      (irequest->Flags() & TRITONSERVER_REQUEST_FLAG_SEQUENCE_START) != 0) {
-    controls = start_input_overrides_.get();
-  } else if (
-      (irequest->Flags() & TRITONSERVER_REQUEST_FLAG_SEQUENCE_END) != 0) {
-    controls = end_input_overrides_.get();
-  } else {
-    controls = continue_input_overrides_.get();
-  }
-
-  for (const auto& control : *controls) {
-    irequest->AddOverrideInput(control);
-  }
-
-  // Set correlation ID control tensor if requested by the model.
-  if (seq_slot_corrid_override_ != nullptr) {
-    auto& seq_corr_id = seq_slot_corrid_override_;
-    size_t size_p = triton::common::GetDataTypeByteSize(seq_corr_id->DType());
-    if (seq_corr_id->DType() == inference::DataType::TYPE_STRING) {
-      // 4 bytes for length of string plus pre-defined max string correlation id
-      // length in bytes
-      size_p = 4 + triton::core::STRING_CORRELATION_ID_MAX_LENGTH_BYTES;
-    }
-
-    TRITONSERVER_MemoryType memory_type;
-    int64_t memory_type_id;
-    auto corrid_p =
-        std::make_shared<AllocatedMemory>(size_p, TRITONSERVER_MEMORY_CPU, 0);
-    char* corrid_p_ptr = corrid_p->MutableBuffer(&memory_type, &memory_type_id);
-    if ((corrid_p_ptr == nullptr) ||
-        ((memory_type != TRITONSERVER_MEMORY_CPU) &&
-         (memory_type != TRITONSERVER_MEMORY_CPU_PINNED)) ||
-        (memory_type_id != 0)) {
-      LOG_ERROR << "failed to allocate sequence CORRID control signal in CPU "
-                   "memory";
-      return;
-    }
-
-    auto override = std::make_shared<InferenceRequest::Input>(
-        seq_corr_id->Name(), seq_corr_id->DType(), seq_corr_id->Shape());
-    *override->MutableShape() = override->OriginalShape();
-    *override->MutableShapeWithBatchDim() = seq_corr_id->ShapeWithBatchDim();
-    Status corrid_status = override->SetData(corrid_p);
-    if (!corrid_status.IsOk()) {
-      LOG_ERROR << "failed creating CORRID control for sequence-batch "
-                   "scheduler thread "
-                << batcher_idx_ << " for " << seq_corr_id->Name();
-      return;
-    }
-
-    if (corrid.Type() == InferenceRequest::SequenceId::DataType::STRING) {
-      std::string correlation_id = corrid.StringValue();
-      uint32_t correlation_id_length = correlation_id.length();
-      memcpy(corrid_p_ptr, &correlation_id_length, sizeof(uint32_t));
-      memcpy(
-          corrid_p_ptr + sizeof(uint32_t), correlation_id.c_str(),
-          correlation_id_length);
-    } else if (
-        corrid.Type() == InferenceRequest::SequenceId::DataType::UINT64) {
-      uint64_t correlation_id = corrid.UnsignedIntValue();
-      const char* corrid_ptr = reinterpret_cast<const char*>(&correlation_id);
-      memcpy(corrid_p_ptr, corrid_ptr, size_p);
-    }
-    irequest->AddOverrideInput(override);
-  }
-}
-
-void
-SequenceBatch::UpdateImplicitState(
-    std::unique_ptr<InferenceRequest>& irequest, const int32_t seq_slot)
-{
-  // This should be executed only if the model has a states section.
-  if (!base_->StateOutputConfigMap().empty()) {
-    auto& sequence_states = sequence_states_[seq_slot];
-
-    // Initialize the input state if the sequence is starting.
-    if ((irequest->Flags() & TRITONSERVER_REQUEST_FLAG_SEQUENCE_START) != 0) {
-      sequence_states = nullptr;
-    }
-
-    // Create the state for the first request in the sequence.
-    if (sequence_states == nullptr) {
-      sequence_states.reset(new SequenceStates);
-      sequence_states->Initialize(
-          base_->StateOutputConfigMap(), base_->MaxBatchSize(),
-          base_->InitialState());
-    }
-
-    irequest->SetSequenceStates(sequence_states);
-  }
-}
-
-DirectSequenceBatch::DirectSequenceBatch(
-    SequenceBatchScheduler* base, const uint32_t batcher_idx,
-    const size_t seq_slot_cnt, TritonModelInstance* model_instance,
-    const std::unordered_map<std::string, bool>& enforce_equal_shape_tensors,
-    const bool has_optional_input,
-    const std::shared_ptr<SequenceBatchScheduler::ControlInputs>&
-        start_input_overrides,
-    const std::shared_ptr<SequenceBatchScheduler::ControlInputs>&
-        end_input_overrides,
-    const std::shared_ptr<SequenceBatchScheduler::ControlInputs>&
-        startend_input_overrides,
-    const std::shared_ptr<SequenceBatchScheduler::ControlInputs>&
-        continue_input_overrides,
-    const std::shared_ptr<SequenceBatchScheduler::ControlInputs>&
-        notready_input_overrides,
-    bool* is_initialized)
-    : SequenceBatch(
-          base, batcher_idx, seq_slot_cnt, enforce_equal_shape_tensors,
-          has_optional_input, start_input_overrides, end_input_overrides,
-          startend_input_overrides, continue_input_overrides,
-          notready_input_overrides),
-      model_instance_(model_instance), scheduler_thread_exit_(false),
-      scheduler_idle_(false), queues_(seq_slot_cnt),
-      seq_slot_correlation_ids_(seq_slot_cnt, 0), max_active_seq_slot_(-1)
-{
-  // Initialize to handle CORRID control. If error just exit
-  // now... that means the corresponding model instance will not have
-  // any runner and so will not get used for execution.
-  const auto& config = model_instance_->Model()->Config();
-  if (!CreateCorrelationIDControl(config)) {
-    *is_initialized = false;
-    return;
-  }
-
-  max_batch_size_ = ((size_t)std::max(1, config.max_batch_size()));
-  minimum_slot_utilization_ =
-      config.sequence_batching().direct().minimum_slot_utilization();
-  pending_batch_delay_ns_ =
-      config.sequence_batching().direct().max_queue_delay_microseconds() * 1000;
-
-  // Create a scheduler thread associated with 'batcher_idx' that
-  // executes the queued requests.
-  const int nice = 0;
-  NewPayload();
-  scheduler_thread_.reset(
-      new std::thread([this, nice]() { BatcherThread(nice); }));
-
-  *is_initialized = true;
-}
-
-DirectSequenceBatch::~DirectSequenceBatch()
-{
-  // Signal the scheduler thread to exit...
-  {
-    std::unique_lock<std::mutex> lock(mu_);
-    scheduler_thread_exit_ = true;
-  }
-
-  cv_.notify_one();
-
-  // It is possible for the scheduler thread to be the last holder of
-  // a model object, and when that scheduler thread releases the
-  // object the scheduler thread itself will destroy this
-  // SequenceBatch object. So we need to check to make sure the
-  // scheduler thread does not join it against itself and instead
-  // detach it so there is not a problem when its thread object is
-  // destroyed.
-  if (scheduler_thread_->joinable()) {
-    scheduler_thread_->join();
-  }
-}
-
-void
-DirectSequenceBatch::Enqueue(
-    const uint32_t seq_slot, const InferenceRequest::SequenceId& correlation_id,
-    std::unique_ptr<InferenceRequest>& request)
-{
-  bool wake_runner = false;
-
-  {
-    std::lock_guard<std::mutex> lock(mu_);
-
-    queues_[seq_slot].emplace_back(std::move(request));
-
-    seq_slot_correlation_ids_[seq_slot] = correlation_id;
-    max_active_seq_slot_ =
-        std::max(max_active_seq_slot_, static_cast<int32_t>(seq_slot));
-
-    // If runner is idle then wake it to service this request. We do
-    // the actual wake outside of the lock to avoid having the woken
-    // thread immediately block on the lock
-    wake_runner = scheduler_idle_;
-  }
-
-  if (wake_runner) {
-    cv_.notify_one();
-  }
-}
-
-void
-DirectSequenceBatch::NewPayload()
-{
-  curr_payload_ =
-      model_instance_->Model()->Server()->GetRateLimiter()->GetPayload(
-          Payload::Operation::INFER_RUN, model_instance_);
-}
-
-void
-DirectSequenceBatch::BatcherThread(const int nice)
-{
-#ifndef _WIN32
-  if (setpriority(PRIO_PROCESS, syscall(SYS_gettid), nice) == 0) {
-    LOG_VERBOSE(1) << "Starting Direct sequence-batch scheduler thread "
-                   << batcher_idx_ << " at nice " << nice << "...";
-  } else {
-    LOG_VERBOSE(1) << "Starting Direct sequence-batch scheduler thread "
-                   << batcher_idx_ << " at default nice (requested nice "
-                   << nice << " failed)...";
-  }
-#else
-  LOG_VERBOSE(1) << "Starting Direct sequence-batch scheduler thread "
-                 << batcher_idx_ << " at default nice...";
-#endif
-
-  // For debugging and testing, delay start of thread until queues
-  // contain the specified number of entries (across all
-  // SequenceBatchs in the scheduler).
-  const char* dstr = getenv("TRITONSERVER_DELAY_SCHEDULER");
-  size_t delay_cnt = 0;
-  if (dstr != nullptr) {
-    delay_cnt = atoi(dstr);
-    LOG_VERBOSE(1) << "Delaying scheduler thread " << batcher_idx_ << " until "
-                   << delay_cnt << " queued requests...";
-  }
-
-  const uint64_t default_wait_microseconds = 500 * 1000;
-  exec_complete_ = true;
-
-  // When there is optional input or input shape must be enforced,
-  // the inputs in the requests must be examined for forming a batch
-  const bool check_input =
-      !enforce_equal_shape_tensors_.empty() || has_optional_input_;
-  while (!scheduler_thread_exit_) {
-    uint64_t wait_microseconds = default_wait_microseconds;
-
-    // Wait till execution of the last enqueued payload is
-    // complete.
-    {
-      std::unique_lock<std::mutex> lk(payload_mu_);
-      payload_cv_.wait(lk, [this] { return exec_complete_; });
-    }
-
-    // Hold the lock for as short a time as possible.
-    {
-      std::unique_lock<std::mutex> lock(mu_);
-
-      if (delay_cnt > 0) {
-        wait_microseconds = 10 * 1000;
-        // Debugging/testing... wait until queues together contain at
-        // least 'delay_cnt' items...
-        size_t total_size = 0;
-        for (const auto& q : queues_) {
-          total_size += q.size();
-        }
-        if (!base_->DelayScheduler(batcher_idx_, total_size, delay_cnt)) {
-          delay_cnt = 0;
-        }
-        LOG_VERBOSE(1) << "Delaying scheduler thread " << batcher_idx_
-                       << " until " << delay_cnt
-                       << " queued requests, current total = " << total_size;
-      } else {
-        RequiredEqualInputs required_equal_inputs;
-        InferenceRequest* null_irequest = nullptr;
-
-        // Make one pass through the active slots to:
-        //
-        //   1) release any slots that have forcibly ended sequences
-        //
-        //   2) find a representative request that will provide:
-        //
-        //      a) the shape, type, etc. information for null requests
-        //
-        //      b) the required tensor shapes for the batch for the
-        //      case where ragged batching is not allowed
-        //
-        //   3) Determine the earliest enqueue time and number of ready
-        //      sequences if queue delay is enabled
-        //
-        int32_t max_seq_slot = -1;
-        uint64_t earliest_enqueue_time_ns = UINT64_MAX;
-        size_t ready_cnt = 0;
-        for (int32_t seq_slot = 0; seq_slot <= max_active_seq_slot_;
-             ++seq_slot) {
-          std::deque<std::unique_ptr<InferenceRequest>>& queue =
-              queues_[seq_slot];
-          if (!queue.empty()) {
-            // If the request is nullptr then the sequence in the slot
-            // has timed-out so release the slot for another sequence
-            // from the backlog.
-            if (queue.front() == nullptr) {
-              queue.pop_front();
-
-              SequenceBatchScheduler::BatcherSequenceSlot batcher_seq_slot(
-                  batcher_idx_, seq_slot);
-              seq_slot_correlation_ids_[seq_slot] =
-                  base_->ReleaseSequenceSlot(batcher_seq_slot, &queue);
-            }
-          }
-
-          // Need to check queue again for contents since if released
-          // above it may now be empty...
-          if (!queue.empty()) {
-            // For NULL requests need an InferenceRequest that can be
-            // batched but has controls set to "not ready". Any
-            // request can serve this purpose so grab a copy of the
-            // first one. This first request is also used to
-            // initialize 'required_equal_inputs' so we are sure that
-            // this null request will have the correct shape for any
-            // created batch.
-            if (null_irequest == nullptr) {
-              null_irequest = queue.front().get();
-              UpdateImplicitState(queue.front(), seq_slot);
-            }
-
-            // If this is the first non-null request capture the shape
-            // of the tensors that don't support ragged so we can
-            // compare them to later requests.
-            if (!required_equal_inputs.Initialized() && check_input) {
-              Status status = required_equal_inputs.Initialize(
-                  queue.front(), enforce_equal_shape_tensors_,
-                  has_optional_input_);
-              if (!status.IsOk()) {
-                LOG_ERROR
-                    << "internal: unexpecting failure initializing shape: "
-                    << status.Message();
-              }
-            }
-
-            earliest_enqueue_time_ns = std::min(
-                earliest_enqueue_time_ns, queue.front()->BatcherStartNs());
-            ready_cnt++;
-            max_seq_slot = seq_slot;
-          }
-        }
-
-        if (max_seq_slot != -1) {
-          if ((pending_batch_delay_ns_ == 0) ||
-              (minimum_slot_utilization_ == 0.0)) {
-            wait_microseconds = 0;
-          } else {
-            // Compare the age of the oldest pending request to the maximum
-            // batch queuing delay, and the size of the ready requests in the
-            // batch, execute now if queuing delay is exceeded or the batch
-            // size is large enough. Otherwise create a timer to wakeup a
-            // thread to check again at the maximum allowed delay.
-            uint64_t now_ns =
-                std::chrono::duration_cast<std::chrono::nanoseconds>(
-                    std::chrono::steady_clock::now().time_since_epoch())
-                    .count();
-            uint64_t current_batch_delay_ns =
-                (now_ns - earliest_enqueue_time_ns);
-            if ((current_batch_delay_ns > pending_batch_delay_ns_) ||
-                (((float)ready_cnt) / max_batch_size_ >=
-                 minimum_slot_utilization_)) {
-              wait_microseconds = 0;
-              LOG_VERBOSE(1)
-                  << "start sequence batch execution. "
-                  << "current batch delay: " << current_batch_delay_ns
-                  << "; maximum delay allowed: " << pending_batch_delay_ns_
-                  << "slot utilization: " << ready_cnt << "/" << max_batch_size_
-                  << "; utilization threshold: " << minimum_slot_utilization_;
-            } else {
-              wait_microseconds =
-                  (pending_batch_delay_ns_ - current_batch_delay_ns) / 1000;
-              // reset 'max_seq_slot' so that not request is pulled from the
-              // queues
-              max_seq_slot = -1;
-              LOG_VERBOSE(1)
-                  << "defer sequence batch execution. "
-                  << "current batch delay: " << current_batch_delay_ns
-                  << "; maximum delay allowed: " << pending_batch_delay_ns_
-                  << "slot utilization: " << ready_cnt << "/" << max_batch_size_
-                  << "; utilization threshold: " << minimum_slot_utilization_;
-            }
-          }
-        }
-
-        // Collect requests from slot 0 to max_seq_slot.
-        for (int32_t seq_slot = 0; seq_slot <= max_seq_slot; ++seq_slot) {
-          bool end_of_sequence = false;
-          bool use_null_request = false;
-          std::deque<std::unique_ptr<InferenceRequest>>& queue =
-              queues_[seq_slot];
-
-          // If 'seq_slot' doesn't have any requests then change the
-          // request to send dummy/null input tensors for this
-          // slot. We need this so that other requests stay in the
-          // correct slot.
-          if (queue.empty()) {
-            use_null_request = true;
-          }
-          // If there are one or more tensors that don't support
-          // ragged batch, then don't allow a request into an existing
-          // batch if shape differs.
-          else if (required_equal_inputs.Initialized() && check_input) {
-            if (!required_equal_inputs.HasEqualInputs(queue.front())) {
-              use_null_request = true;
-            }
-          }
-
-          // Use null-request if necessary otherwise use the next
-          // request in the queue...
-          if (use_null_request) {
-            std::unique_ptr<InferenceRequest> ni(
-                InferenceRequest::CopyAsNull(*null_irequest));
-            // Note that when the not-ready control input of the
-            // request is "true" the model can't assume that any
-            // other inputs are meaningful, including CORRID. So we
-            // just use zero for that.
-            SetControlTensors(
-                ni, seq_slot, 0 /* corrid */, true /* not_ready */);
-
-            // This should be executed only if the model has a states section.
-            if (!base_->StateOutputConfigMap().empty()) {
-              // For NULL requests we will be using a dummy state instead of the
-              // real state stored in Triton. When the model is using variable
-              // dimensions and batching, the null request's input state shapes
-              // may be different from the actual shapes of the state for that
-              // sequence. We create a dummy state in order to avoid corrupting
-              // the actual state of the sequence.
-              std::shared_ptr<SequenceStates> sequence_states(
-                  new SequenceStates);
-              sequence_states->SetNullSequenceStates(
-                  null_irequest->GetSequenceStates());
-              ni->SetSequenceStates(sequence_states);
-            }
-
-            curr_payload_->AddRequest(std::move(ni));
-          } else {
-            std::unique_ptr<InferenceRequest>& irequest = queue.front();
-
-            // Set the control tensor values in the request.
-            SetControlTensors(irequest, seq_slot, irequest->CorrelationId());
-
-            // Update the implicit state and set the input state tensors.
-            UpdateImplicitState(irequest, seq_slot);
-
-            if ((irequest->Flags() & TRITONSERVER_REQUEST_FLAG_SEQUENCE_END) !=
-                0) {
-              end_of_sequence = true;
-            }
-            curr_payload_->AddRequest(std::move(irequest));
-
-            queue.pop_front();
-          }
-
-          if (curr_payload_->GetState() == Payload::State::UNINITIALIZED) {
-            curr_payload_->SetState(Payload::State::READY);
-          }
-
-          // If the sequence has ended then attempt to refill the
-          // sequence slot with a sequence from the backlog. If
-          // there is no backlog show that the slot is no longer
-          // active.
-          if (end_of_sequence) {
-            LOG_VERBOSE(1) << "End sequence CORRID "
-                           << seq_slot_correlation_ids_[seq_slot]
-                           << " in batcher " << batcher_idx_ << ", slot "
-                           << seq_slot;
-
-            // Should never be anything in a queue after the END
-            // marker. If it happens that means we will clobber
-            // that request if/when we swap in a backlog sequence
-            // in ReleaseSequenceSlot below.
-            if (!queue.empty()) {
-              LOG_ERROR << "internal: unexpected requests after sequence "
-                           "end in slot "
-                        << seq_slot;
-            }
-
-            SequenceBatchScheduler::BatcherSequenceSlot batcher_seq_slot(
-                batcher_idx_, seq_slot);
-            seq_slot_correlation_ids_[seq_slot] =
-                base_->ReleaseSequenceSlot(batcher_seq_slot, &queue);
-          }
-        }
-      }
-
-      // One or more sequences may have ended... find the new
-      // 'max_active_seq_slot_'.
-      while ((max_active_seq_slot_ >= 0) &&
-             (!seq_slot_correlation_ids_[max_active_seq_slot_].InSequence())) {
-        max_active_seq_slot_--;
-      }
-
-      // If no requests are to be handled, wait for notification or
-      // for the specified timeout before checking the queues again.
-      if (wait_microseconds > 0) {
-        scheduler_idle_ = true;
-        std::chrono::microseconds wait_timeout(wait_microseconds);
-        cv_.wait_for(lock, wait_timeout);
-        scheduler_idle_ = false;
-      }
-    }
-
-    if (curr_payload_->GetState() == Payload::State::READY) {
-      // Add callback to signal the execution completion
-      exec_complete_ = false;
-      auto callback = [this]() {
-        {
-          std::unique_lock<std::mutex> lk(payload_mu_);
-          exec_complete_ = true;
-        }
-        payload_cv_.notify_one();
-      };
-      curr_payload_->AddInternalReleaseCallback(callback);
-      curr_payload_->MarkSaturated();
-
-      // Enqueue the payload to RateLimiter
-      model_instance_->Model()->Server()->GetRateLimiter()->EnqueuePayload(
-          model_instance_->Model(), curr_payload_);
-      NewPayload();
-    }
-  }  // end runner loop
-
-  LOG_VERBOSE(1) << "Stopping Direct sequence-batch scheduler thread "
-                 << batcher_idx_ << "...";
-}
-
-OldestSequenceBatch::OldestSequenceBatch(
-    SequenceBatchScheduler* base, const uint32_t batcher_idx,
-    const size_t seq_slot_cnt, TritonModelInstance* model_instance,
-    const std::unordered_map<std::string, bool>& enforce_equal_shape_tensors,
-    const bool has_optional_input,
-    const std::shared_ptr<SequenceBatchScheduler::ControlInputs>&
-        start_input_overrides,
-    const std::shared_ptr<SequenceBatchScheduler::ControlInputs>&
-        end_input_overrides,
-    const std::shared_ptr<SequenceBatchScheduler::ControlInputs>&
-        startend_input_overrides,
-    const std::shared_ptr<SequenceBatchScheduler::ControlInputs>&
-        continue_input_overrides,
-    const std::shared_ptr<SequenceBatchScheduler::ControlInputs>&
-        notready_input_overrides,
-    bool* is_initialized)
-    : SequenceBatch(
-          base, batcher_idx, seq_slot_cnt, enforce_equal_shape_tensors,
-          has_optional_input, start_input_overrides, end_input_overrides,
-          startend_input_overrides, continue_input_overrides,
-          notready_input_overrides),
-      in_flight_(seq_slot_cnt, false), queues_(seq_slot_cnt)
-{
-  // Initialize to handle CORRID control. If error just exit
-  // now... that means the corresponding model instance will not have
-  // any runner and so will not get used for execution.
-  const auto& config = model_instance->Model()->Config();
-  if (!CreateCorrelationIDControl(config)) {
-    *is_initialized = false;
-    return;
-  }
-
-  // Create a dynamic batcher use to batch together sequences for
-  // inference.
-  std::set<int32_t> preferred_batch_sizes;
-  for (const auto size :
-       config.sequence_batching().oldest().preferred_batch_size()) {
-    preferred_batch_sizes.insert(size);
-  }
-
-  // TODO: Provide appropriate request_cache_enable flag when caching
-  // is enabled for sequence models.
-  Status status = DynamicBatchScheduler::Create(
-      model_instance->Model(), model_instance,
-      triton::common::GetCpuNiceLevel(config),
-      true /* dynamic_batching_enabled */, config.max_batch_size(),
-      enforce_equal_shape_tensors_, true /* preserve_ordering */,
-      false /* response_cache_enable */, preferred_batch_sizes,
-      config.sequence_batching().oldest().max_queue_delay_microseconds(),
-      &dynamic_batcher_);
-  if (!status.IsOk()) {
-    LOG_ERROR << "failed creating dynamic sequence batcher for OldestFirst "
-              << batcher_idx_ << ": " << status.Message();
-    *is_initialized = false;
-    return;
-  }
-
-  *is_initialized = true;
-}
-OldestSequenceBatch::~OldestSequenceBatch() {}
-
-void
-OldestSequenceBatch::CompleteAndNext(const uint32_t seq_slot)
-{
-  std::lock_guard<std::mutex> lock(mu_);
-
-  // We may enqueue 1 or more pending inferences triggered by the
-  // completion. If the sequence has a pending inference then it needs
-  // to be send to dynamic batcher since the "previous" inference just
-  // completed. If this next inference ends up being the end of the
-  // sequence (either from the END flag or because the sequence is
-  // being force-ended) then we try to fill the now-free sequence slot
-  // from the backlog and then send the first inference from that
-  // sequence to the dynamic batcher...
-  std::deque<std::unique_ptr<InferenceRequest>>& queue = queues_[seq_slot];
-  bool retry = true;
-  while (retry) {
-    retry = false;
-
-    bool release_seq_slot = false;
-    in_flight_[seq_slot] = false;
-
-    // If the next sequence inference is ready in the queue then enqueue
-    // it in the dynamic batcher now.
-    if (!queue.empty()) {
-      auto& irequest = queue.front();
-
-      // If the request is null then this inference request is from
-      // the reaper thread indicating a timed-out sequence. Mark that
-      // the sequence slot should be released but otherwise do
-      // nothing.
-      if (irequest == nullptr) {
-        LOG_VERBOSE(1) << irequest->LogRequest()
-                       << "force-end sequence in batcher " << batcher_idx_
-                       << ", slot " << seq_slot;
-        release_seq_slot = true;
-      } else {
-        const InferenceRequest::SequenceId& correlation_id =
-            irequest->CorrelationId();
-
-        // After handling the last inference in a sequence we must
-        // release the sequence slot to make it available to another
-        // sequence.
-        if ((irequest->Flags() & TRITONSERVER_REQUEST_FLAG_SEQUENCE_END) != 0) {
-          LOG_VERBOSE(1) << irequest->LogRequest() << "end sequence CORRID "
-                         << correlation_id << " in batcher " << batcher_idx_
-                         << ", slot " << seq_slot;
-          release_seq_slot = true;
-        }
-
-        // Add the appropriate control tensor values to the request.
-        SetControlTensors(irequest, seq_slot, correlation_id);
-
-        // Update the implicit state and set the input state tensors.
-        UpdateImplicitState(irequest, seq_slot);
-
-        LOG_VERBOSE(1) << irequest->LogRequest()
-                       << "issue to dynamic batcher CORRID " << correlation_id
-                       << " in batcher " << batcher_idx_ << ", slot "
-                       << seq_slot;
-        in_flight_[seq_slot] = true;
-
-        irequest->AddInternalReleaseCallback(
-            [this, seq_slot]() { CompleteAndNext(seq_slot); });
-
-        dynamic_batcher_->Enqueue(irequest);
-      }
-
-      queue.pop_front();
-    }
-
-    // If releasing the sequence slot then the sequence queue should be
-    // empty and we can now assign a new sequence to the queue (from the
-    // backlog).
-    if (release_seq_slot) {
-      // Should never be anything in a queue after the END marker. If it
-      // happens that means we will clobber that request if/when we swap
-      // in a backlog sequence in ReleaseSequenceSlot below.
-      if (!queue.empty()) {
-        LOG_ERROR << "internal: unexpected requests after sequence end in slot "
-                  << seq_slot;
-      }
-
-      SequenceBatchScheduler::BatcherSequenceSlot batcher_seq_slot(
-          batcher_idx_, seq_slot);
-      const InferenceRequest::SequenceId& released_cid =
-          base_->ReleaseSequenceSlot(batcher_seq_slot, &queue);
-
-      if (released_cid.InSequence()) {
-        LOG_VERBOSE(1) << "Enqueued new sequence containing " << queue.size()
-                       << " requests into OldestFirst batcher " << batcher_idx_
-                       << ", slot " << seq_slot;
-
-        // If an inference is already in-flight in the dynamic batcher
-        // in this sequence slot then can't process the new queue
-        // inferences right now, because the in-flight request is
-        // using slot resources like the CORRID override map.
-        if (!in_flight_[seq_slot]) {
-          retry = true;
-        }
-      }
-    }
-  }
-}
-
-void
-OldestSequenceBatch::Enqueue(
-    const uint32_t seq_slot, const InferenceRequest::SequenceId& correlation_id,
-    std::unique_ptr<InferenceRequest>& request)
-{
-  // Queue the new request... if there isn't already a request in
-  // flight for this sequence then send one to the dynamic batcher
-  // immediately.
-  bool in_flight;
-  {
-    std::lock_guard<std::mutex> lock(mu_);
-
-    std::deque<std::unique_ptr<InferenceRequest>>& queue = queues_[seq_slot];
-    queue.emplace_back(std::move(request));
-    in_flight = in_flight_[seq_slot];
-  }
-
-  if (!in_flight) {
-    CompleteAndNext(seq_slot);
-  }
-}
-}}  // namespace triton::core
diff --git a/3rdparty/core-r22.12/src/sequence_batch_scheduler.h b/3rdparty/core-r22.12/src/sequence_batch_scheduler.h
deleted file mode 100644
index 44b7594717b2610917c78e24eb5522a83077b954..0000000000000000000000000000000000000000
--- a/3rdparty/core-r22.12/src/sequence_batch_scheduler.h
+++ /dev/null
@@ -1,399 +0,0 @@
-// Copyright 2018-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include <atomic>
-#include <condition_variable>
-#include <deque>
-#include <future>
-#include <mutex>
-#include <queue>
-#include <thread>
-#include <unordered_map>
-#include "backend_model.h"
-#include "backend_model_instance.h"
-#include "model_config.pb.h"
-#include "rate_limiter.h"
-#include "scheduler.h"
-#include "scheduler_utils.h"
-#include "sequence_state.h"
-#include "status.h"
-#include "triton/common/model_config.h"
-
-namespace triton { namespace core {
-
-class SequenceBatch;
-
-// Scheduler that implements batching across sequences of correlated
-// inferences.
-class SequenceBatchScheduler : public Scheduler {
- public:
-  using ControlInputs = std::vector<std::shared_ptr<InferenceRequest::Input>>;
-
-  SequenceBatchScheduler() = default;
-  ~SequenceBatchScheduler();
-
-  // Create a scheduler to support a given number of runners and a run
-  // function to call when a request is scheduled.
-  static Status Create(
-      TritonModel* model,
-      const std::unordered_map<std::string, bool>& enforce_equal_shape_tensors,
-      std::unique_ptr<Scheduler>* scheduler);
-
-  // \see Scheduler::Enqueue()
-  Status Enqueue(std::unique_ptr<InferenceRequest>& request) override;
-
-  // \see Scheduler::InflightInferenceCount()
-  size_t InflightInferenceCount() override
-  {
-    std::unique_lock<std::mutex> lock(mu_);
-    return sequence_to_batcherseqslot_map_.size();
-  }
-
-  // \see Scheduler::Stop()
-  void Stop() override { stop_ = true; }
-
-  // A batcher-sequence_slot combination. The batcher is represented
-  // by the index into 'batchers_'.
-  struct BatcherSequenceSlot {
-    BatcherSequenceSlot() = default;
-    BatcherSequenceSlot(const BatcherSequenceSlot&) = default;
-    BatcherSequenceSlot(size_t b, uint32_t s) : batcher_idx_(b), seq_slot_(s) {}
-    size_t batcher_idx_;
-    uint32_t seq_slot_;
-  };
-
-  // Fill a sequence slot with a sequence from the backlog or show
-  // that the sequence slot is no longer being used.
-  InferenceRequest::SequenceId ReleaseSequenceSlot(
-      const BatcherSequenceSlot& seq_slot,
-      std::deque<std::unique_ptr<InferenceRequest>>* requests);
-
-  // For debugging/testing, batcher reports how many waiting requests
-  // and returns true if the batcher should continue waiting.
-  bool DelayScheduler(
-      const uint32_t batcher_idx, const size_t cnt, const size_t total);
-
-  const std::unordered_map<
-      std::string, const inference::ModelSequenceBatching_State&>&
-  StateOutputConfigMap()
-  {
-    return state_output_config_map_;
-  }
-
-  size_t MaxBatchSize() { return max_batch_size_; }
-  const std::unordered_map<std::string, SequenceStates::InitialStateData>&
-  InitialState()
-  {
-    return initial_state_;
-  }
-
- private:
-  void ReaperThread(const int nice);
-
-  Status CreateBooleanControlTensors(
-      const inference::ModelConfig& config,
-      std::shared_ptr<ControlInputs>* start_input_overrides,
-      std::shared_ptr<ControlInputs>* end_input_overrides,
-      std::shared_ptr<ControlInputs>* startend_input_overrides,
-      std::shared_ptr<ControlInputs>* continue_input_overrides,
-      std::shared_ptr<ControlInputs>* notready_input_overrides);
-
-  Status GenerateInitialStateData(
-      const inference::ModelSequenceBatching_InitialState& initial_state,
-      const inference::ModelSequenceBatching_State& state, TritonModel* model);
-
-  struct BatcherSequenceSlotCompare {
-    bool operator()(
-        const BatcherSequenceSlot& a, const BatcherSequenceSlot& b) const
-    {
-      return a.seq_slot_ > b.seq_slot_;
-    }
-  };
-
-  // The max_sequence_idle_microseconds value for this scheduler.
-  uint64_t max_sequence_idle_microseconds_;
-
-  bool stop_;
-
-  // Mutex
-  std::mutex mu_;
-
-  // The reaper thread
-  std::unique_ptr<std::thread> reaper_thread_;
-  std::condition_variable reaper_cv_;
-  bool reaper_thread_exit_;
-
-  // The SequenceBatchs being managed by this scheduler.
-  std::vector<std::unique_ptr<SequenceBatch>> batchers_;
-
-  // Map from a request's correlation ID to the BatcherSequenceSlot
-  // assigned to that correlation ID.
-  using BatcherSequenceSlotMap =
-      std::unordered_map<InferenceRequest::SequenceId, BatcherSequenceSlot>;
-  BatcherSequenceSlotMap sequence_to_batcherseqslot_map_;
-
-  // Map from a request's correlation ID to the backlog queue
-  // collecting requests for that correlation ID.
-  using BacklogMap = std::unordered_map<
-      InferenceRequest::SequenceId,
-      std::shared_ptr<std::deque<std::unique_ptr<InferenceRequest>>>>;
-  BacklogMap sequence_to_backlog_map_;
-
-  // The ordered backlog of sequences waiting for a free sequenceslot.
-  std::deque<std::shared_ptr<std::deque<std::unique_ptr<InferenceRequest>>>>
-      backlog_queues_;
-
-  // The batcher/sequence-slot locations ready to accept a new
-  // sequence. Ordered from lowest sequence-slot-number to highest so
-  // that all batchers grow at the same rate and attempt to remain as
-  // small as possible.
-  std::priority_queue<
-      BatcherSequenceSlot, std::vector<BatcherSequenceSlot>,
-      BatcherSequenceSlotCompare>
-      ready_batcher_seq_slots_;
-
-  // For each correlation ID the most recently seen timestamp, in
-  // microseconds, for a request using that correlation ID.
-  std::unordered_map<InferenceRequest::SequenceId, uint64_t>
-      correlation_id_timestamps_;
-
-  // Used for debugging/testing.
-  size_t backlog_delay_cnt_;
-  std::vector<size_t> queue_request_cnts_;
-
-  // IO mapping between the output state name and the state configuration.
-  std::unordered_map<std::string, const inference::ModelSequenceBatching_State&>
-      state_output_config_map_;
-  size_t max_batch_size_;
-
-  // Initial state used for implicit state.
-  std::unordered_map<std::string, SequenceStates::InitialStateData>
-      initial_state_;
-};
-
-// Base class for a scheduler that implements a particular scheduling
-// strategy for a model instance.
-class SequenceBatch {
- public:
-  SequenceBatch(
-      SequenceBatchScheduler* base, const uint32_t batcher_idx,
-      const size_t seq_slot_cnt,
-      const std::unordered_map<std::string, bool>& enforce_equal_shape_tensors,
-      const bool has_optional_input,
-      const std::shared_ptr<SequenceBatchScheduler::ControlInputs>&
-          start_input_overrides,
-      const std::shared_ptr<SequenceBatchScheduler::ControlInputs>&
-          end_input_overrides,
-      const std::shared_ptr<SequenceBatchScheduler::ControlInputs>&
-          startend_input_overrides,
-      const std::shared_ptr<SequenceBatchScheduler::ControlInputs>&
-          continue_input_overrides,
-      const std::shared_ptr<SequenceBatchScheduler::ControlInputs>&
-          notready_input_overrides);
-  virtual ~SequenceBatch() = default;
-
-  // Enqueue a request into the appropriate queue for the requested
-  // sequence slot. This function takes ownership of 'request' so on
-  // request 'request' will be nullptr.
-  virtual void Enqueue(
-      const uint32_t seq_slot,
-      const InferenceRequest::SequenceId& correlation_id,
-      std::unique_ptr<InferenceRequest>& request) = 0;
-
- protected:
-  bool CreateCorrelationIDControl(const inference::ModelConfig& config);
-  void SetControlTensors(
-      std::unique_ptr<InferenceRequest>& irequest, const int32_t seq_slot,
-      const InferenceRequest::SequenceId& corr_id,
-      const bool not_ready = false);
-
-  // Update the implicit state and set the required input states.
-  void UpdateImplicitState(
-      std::unique_ptr<InferenceRequest>& irequest, const int32_t seq_slot);
-
-  // The controlling scheduler.
-  SequenceBatchScheduler* const base_;
-
-  // The index of this batcher within the controlling scheduler.
-  const uint32_t batcher_idx_;
-
-  // The number of candidate sequence slots.
-  const size_t seq_slot_cnt_;
-
-  // The input tensors that require shape checking before being
-  // allowed in a batch. As a map from the tensor name to a bool. If
-  // tensor is in map then its shape must match shape of same tensor
-  // in requests already in the batch. If value is "true" then
-  // additional tensor is treated as a shape tensor and the values
-  // contained in the shape tensor must match same tensor already in
-  // the batch.
-  const std::unordered_map<std::string, bool> enforce_equal_shape_tensors_;
-
-  // Store information on whether the model contains optional inputs.
-  bool has_optional_input_;
-
-  // The control values, delivered as input tensors, that should be
-  // used when starting a sequence, continuing a sequence, ending a
-  // sequence, and showing that a sequence has not input available.
-  std::shared_ptr<SequenceBatchScheduler::ControlInputs> start_input_overrides_;
-  std::shared_ptr<SequenceBatchScheduler::ControlInputs> end_input_overrides_;
-  std::shared_ptr<SequenceBatchScheduler::ControlInputs>
-      startend_input_overrides_;
-  std::shared_ptr<SequenceBatchScheduler::ControlInputs>
-      continue_input_overrides_;
-  std::shared_ptr<SequenceBatchScheduler::ControlInputs>
-      notready_input_overrides_;
-
-  // The correlation ID override. Empty if model does not specify the
-  // CONTROL_SEQUENCE_CORRID control.
-  std::shared_ptr<InferenceRequest::Input> seq_slot_corrid_override_;
-
-  // For each sequence slot store the optional state i/o tensors.
-  std::vector<std::shared_ptr<SequenceStates>> sequence_states_;
-};
-
-// Scheduler that implements the Direct sequence scheduling strategy
-// for a model instance.
-class DirectSequenceBatch : public SequenceBatch {
- public:
-  DirectSequenceBatch(
-      SequenceBatchScheduler* base, const uint32_t batcher_idx,
-      const size_t seq_slot_cnt, TritonModelInstance* model_instance,
-      const std::unordered_map<std::string, bool>& enforce_equal_shape_tensors,
-      const bool has_optional_input,
-      const std::shared_ptr<SequenceBatchScheduler::ControlInputs>&
-          start_input_overrides,
-      const std::shared_ptr<SequenceBatchScheduler::ControlInputs>&
-          end_input_overrides,
-      const std::shared_ptr<SequenceBatchScheduler::ControlInputs>&
-          startend_input_overrides,
-      const std::shared_ptr<SequenceBatchScheduler::ControlInputs>&
-          continue_input_overrides,
-      const std::shared_ptr<SequenceBatchScheduler::ControlInputs>&
-          notready_input_overrides,
-      bool* is_initialized);
-  ~DirectSequenceBatch();
-
-  void Enqueue(
-      const uint32_t seq_slot,
-      const InferenceRequest::SequenceId& correlation_id,
-      std::unique_ptr<InferenceRequest>& request) override;
-
- private:
-  void BatcherThread(const int nice);
-  void NewPayload();
-
-  std::shared_ptr<Payload> curr_payload_;
-  TritonModelInstance* model_instance_;
-
-  // The thread scheduling requests that are queued in this batch.
-  std::unique_ptr<std::thread> scheduler_thread_;
-  bool scheduler_thread_exit_;
-  bool scheduler_idle_;
-
-  // Mutex protecting correlation queues, etc.
-  std::mutex mu_;
-  std::condition_variable cv_;
-
-  // Execution state of the last enqueued payload
-  bool exec_complete_;
-
-  // Mutex protecting execution state of payload
-  std::mutex payload_mu_;
-  std::condition_variable payload_cv_;
-
-  // Queues holding inference requests. There are 'seq_slot_cnt'
-  // queues, one for each sequence slot where requests assigned to
-  // that slot are enqueued to wait for inferencing.
-  std::vector<std::deque<std::unique_ptr<InferenceRequest>>> queues_;
-
-  // Is each sequence slot active or not? A zero or empty value indicates
-  // inactive, a non-zero/non-empty value indicates active and is the
-  // correlation ID of the sequence active in the slot. An empty
-  // queue for a sequence slot does not mean it's inactive... it
-  // could just not have any requests pending at the moment.
-  std::vector<InferenceRequest::SequenceId> seq_slot_correlation_ids_;
-
-  // The maximum active sequence slot. A value of -1 indicates that
-  // no slots are active in the model.
-  int32_t max_active_seq_slot_;
-
-  size_t max_batch_size_;
-  float minimum_slot_utilization_;
-  uint64_t pending_batch_delay_ns_;
-};
-
-// Scheduler that implements the oldest-first sequence scheduling
-// strategy for a model instance.
-class OldestSequenceBatch : public SequenceBatch {
- public:
-  OldestSequenceBatch(
-      SequenceBatchScheduler* base, const uint32_t batcher_idx,
-      const size_t seq_slot_cnt, TritonModelInstance* model_instance,
-      const std::unordered_map<std::string, bool>& enforce_equal_shape_tensors,
-      const bool has_optional_input,
-      const std::shared_ptr<SequenceBatchScheduler::ControlInputs>&
-          start_input_overrides,
-      const std::shared_ptr<SequenceBatchScheduler::ControlInputs>&
-          end_input_overrides,
-      const std::shared_ptr<SequenceBatchScheduler::ControlInputs>&
-          startend_input_overrides,
-      const std::shared_ptr<SequenceBatchScheduler::ControlInputs>&
-          continue_input_overrides,
-      const std::shared_ptr<SequenceBatchScheduler::ControlInputs>&
-          notready_input_overrides,
-      bool* is_initialized);
-  ~OldestSequenceBatch();
-
-  void Enqueue(
-      const uint32_t seq_slot,
-      const InferenceRequest::SequenceId& correlation_id,
-      std::unique_ptr<InferenceRequest>& request) override;
-
- private:
-  void CompleteAndNext(const uint32_t seq_slot);
-
-  // The dynamic batcher for this scheduler
-  std::unique_ptr<Scheduler> dynamic_batcher_;
-
-  TritonModelInstance* model_instance_;
-
-  // Mutex protecting queues, etc.
-  std::mutex mu_;
-
-  // For each sequence slot, true if there is a request for that
-  // sequence in-flight in the dynamic batcher. Used to ensure that at
-  // most one request from each sequence can be scheduled at a time.
-  std::vector<bool> in_flight_;
-
-  // Queues holding inference requests. There are 'seq_slot_cnt'
-  // queues, one for each sequence slot where requests assigned to
-  // that slot are enqueued to wait for inferencing.
-  std::vector<std::deque<std::unique_ptr<InferenceRequest>>> queues_;
-};
-
-}}  // namespace triton::core
diff --git a/3rdparty/core-r22.12/src/sequence_state.cc b/3rdparty/core-r22.12/src/sequence_state.cc
deleted file mode 100644
index af0605cf41c6c96d38971ee0c9189e8913424c8f..0000000000000000000000000000000000000000
--- a/3rdparty/core-r22.12/src/sequence_state.cc
+++ /dev/null
@@ -1,336 +0,0 @@
-// Copyright 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "sequence_state.h"
-
-#include "memory.h"
-#include "triton/common/logging.h"
-
-namespace triton { namespace core {
-
-SequenceState::SequenceState() : data_(new MemoryReference) {}
-
-SequenceState::SequenceState(
-    const std::string& name, const inference::DataType datatype,
-    const int64_t* shape, const uint64_t dim_count)
-    : name_(name), datatype_(datatype), shape_(shape, shape + dim_count),
-      data_(new MemoryReference)
-{
-}
-
-SequenceState::SequenceState(
-    const std::string& name, const inference::DataType datatype,
-    const std::vector<int64_t>& shape)
-    : name_(name), datatype_(datatype), shape_(shape),
-      data_(new MemoryReference)
-{
-}
-
-Status
-SequenceState::SetData(const std::shared_ptr<Memory>& data)
-{
-  if (data_->TotalByteSize() != 0) {
-    return Status(
-        Status::Code::INVALID_ARG,
-        "state '" + name_ + "' already has data, can't overwrite");
-  }
-
-  data_ = data;
-  return Status::Success;
-}
-
-Status
-SequenceState::RemoveAllData()
-{
-  data_ = std::make_shared<MemoryReference>();
-  return Status::Success;
-}
-
-Status
-SequenceState::SetStringDataToZero()
-{
-  if (Data()->TotalByteSize() % 4 != 0) {
-    return Status(
-        Status::Code::INVALID_ARG,
-        "The total byte size must be a multiple of 4 when setting the "
-        "sequence state to zero.");
-  }
-
-  TRITONSERVER_MemoryType memory_type;
-  int64_t memory_type_id;
-
-  const std::shared_ptr<MutableMemory>& memory =
-      reinterpret_cast<const std::shared_ptr<MutableMemory>&>(Data());
-  char* buffer = memory->MutableBuffer(&memory_type, &memory_type_id);
-  memset(buffer, 0, Data()->TotalByteSize());
-
-  return Status::Success;
-}
-
-Status
-SequenceStates::Initialize(
-    const std::unordered_map<
-        std::string, const inference::ModelSequenceBatching_State&>&
-        state_output_config_map,
-    const size_t max_batch_size,
-    const std::unordered_map<std::string, InitialStateData>& initial_state)
-{
-  input_states_.clear();
-  output_states_.clear();
-
-  for (auto& state : state_output_config_map) {
-    auto& state_config = state.second;
-
-    std::vector<int64_t> dims;
-    if (max_batch_size != 0) {
-      dims.push_back(1);
-    }
-
-    // Convert the variable dimensions to 1 for the first request.
-    for (auto& dim : state_config.dims()) {
-      if (dim == -1) {
-        dims.push_back(1);
-      } else {
-        dims.push_back(dim);
-      }
-    }
-
-    std::shared_ptr<AllocatedMemory> data;
-    auto initial_state_it = initial_state.find(state_config.input_name());
-    if (initial_state_it != initial_state.end()) {
-      data = std::make_shared<AllocatedMemory>(
-          initial_state_it->second.data_->TotalByteSize(),
-          TRITONSERVER_MEMORY_CPU, 0);
-
-      TRITONSERVER_MemoryType memory_type;
-      int64_t memory_type_id;
-      char* dst_buffer = data->MutableBuffer(&memory_type, &memory_type_id);
-      char* initial_state_buffer =
-          initial_state_it->second.data_->MutableBuffer(
-              &memory_type, &memory_type_id);
-
-      memcpy(
-          dst_buffer, initial_state_buffer,
-          initial_state_it->second.data_->TotalByteSize());
-    } else {
-      size_t state_size;
-      if (state.second.data_type() == inference::DataType::TYPE_STRING) {
-        auto element_count = triton::common::GetElementCount(dims);
-        // Total number of bytes required is equal to the element count
-        // multiplied by 4.
-        state_size = 4 * element_count;
-      } else {
-        state_size =
-            triton::common::GetByteSize(state.second.data_type(), dims);
-      }
-      data = std::make_shared<AllocatedMemory>(
-          state_size, TRITONSERVER_MEMORY_CPU, 0);
-    }
-
-    const auto& input_pair = input_states_.emplace(
-        std::piecewise_construct,
-        std::forward_as_tuple(state_config.input_name()),
-        std::forward_as_tuple(new SequenceState(
-            state_config.input_name(), state.second.data_type(), dims)));
-
-    if (!input_pair.second) {
-      LOG_WARNING
-          << "Detected duplicate 'input_name' in the state configuration: '"
-          << state_config.input_name()
-          << ".' This state configuration will be ignored.";
-      continue;
-    }
-
-    auto& input_tensor = input_pair.first->second;
-    RETURN_IF_ERROR(input_tensor->SetData(data));
-    if (input_tensor->DType() == inference::DataType::TYPE_STRING) {
-      RETURN_IF_ERROR(input_tensor->SetStringDataToZero());
-    }
-
-    const auto& output_pair = output_states_.emplace(
-        std::piecewise_construct,
-        std::forward_as_tuple(state_config.output_name()),
-        std::forward_as_tuple());
-    if (!output_pair.second) {
-      // Remove the corresponding state from the input_states_map
-      input_states_.erase(state_config.input_name());
-      LOG_WARNING << "Detected duplicate 'output_name' in the state "
-                     "configuration: '"
-                  << state_config.output_name()
-                  << "'. This state configuration will be ignored.";
-
-      continue;
-    }
-  }
-
-  return Status::Success;
-}
-
-Status
-SequenceStates::OutputState(
-    const std::string& name, const inference::DataType datatype,
-    const int64_t* shape, const uint64_t dim_count,
-    SequenceState** output_state)
-{
-  const auto& output_state_itr = output_states_.find(name);
-
-  // If the state name is not valid return an error.
-  if (output_state_itr == output_states_.end()) {
-    return Status(
-        Status::Code::INVALID_ARG,
-        "state '" + name + "' is not a valid state name.");
-  }
-
-  if (output_states_[name] == nullptr) {
-    output_states_[name] = std::unique_ptr<SequenceState>(
-        new SequenceState(name, datatype, shape, dim_count));
-  } else {
-    // A new SequenceState is created here in case the shape for the new output
-    // state is different from the shape of the originally stored state.
-    std::unique_ptr<SequenceState> output_state(
-        new SequenceState(name, datatype, shape, dim_count));
-
-    // Transfer the previously allocated buffer to the new output_state.
-    output_state->SetData(output_states_[name]->Data());
-    output_states_[name] = std::move(output_state);
-  }
-
-  auto& output_state_r = output_states_[name];
-  size_t iter_advance =
-      std::distance(output_states_.begin(), output_states_.find(name));
-
-  // Find the input state corresponding to this output state.
-  auto input_states_itr = input_states_.begin();
-  std::advance(input_states_itr, iter_advance);
-  auto& input_state_r = input_states_[input_states_itr->first];
-
-  if (output_state != nullptr) {
-    *output_state = output_states_[name].get();
-  }
-
-  output_state_r->SetStateUpdateCallback([&output_state_r, &input_state_r]() {
-    // Swap the internal memory if the size of the input and output state is
-    // equal
-
-    if (output_state_r->Data()->TotalByteSize() ==
-        input_state_r->Data()->TotalByteSize()) {
-      std::shared_ptr<Memory> temp_memory = input_state_r->Data();
-      RETURN_IF_ERROR(input_state_r->RemoveAllData());
-      RETURN_IF_ERROR(input_state_r->SetData(output_state_r->Data()));
-      RETURN_IF_ERROR(output_state_r->RemoveAllData());
-      RETURN_IF_ERROR(output_state_r->SetData(temp_memory));
-    } else {
-      // If the size of output state is different from the input state, allocate
-      // a new memory for the input state with the same size as output state.
-      TRITONSERVER_MemoryType memory_type;
-      int64_t memory_type_id;
-
-      const std::shared_ptr<MutableMemory>& input_memory =
-          reinterpret_cast<const std::shared_ptr<MutableMemory>&>(
-              input_state_r->Data());
-
-      input_memory->MutableBuffer(&memory_type, &memory_type_id);
-      std::shared_ptr<AllocatedMemory> memory =
-          std::make_shared<AllocatedMemory>(
-              output_state_r->Data()->TotalByteSize(), memory_type,
-              memory_type_id);
-      RETURN_IF_ERROR(input_state_r->RemoveAllData());
-      RETURN_IF_ERROR(input_state_r->SetData(output_state_r->Data()));
-      RETURN_IF_ERROR(output_state_r->RemoveAllData());
-      RETURN_IF_ERROR(output_state_r->SetData(memory));
-    }
-
-    // Update the shape and data type of the output state if it doesn't match
-    // the input state.
-    if (input_state_r->Shape() != output_state_r->Shape()) {
-      *input_state_r->MutableShape() = output_state_r->Shape();
-    }
-
-    if (input_state_r->DType() != output_state_r->DType()) {
-      *input_state_r->MutableDType() = output_state_r->DType();
-    }
-
-    return Status::Success;
-  });
-
-  return Status::Success;
-}
-
-Status
-SequenceStates::OutputState(
-    const std::string& name, const inference::DataType datatype,
-    const std::vector<int64_t>& shape, SequenceState** output_state)
-{
-  return OutputState(name, datatype, shape.data(), shape.size(), output_state);
-}
-
-std::shared_ptr<SequenceStates>
-SequenceStates::CopyAsNull(const std::shared_ptr<SequenceStates>& from)
-{
-  std::shared_ptr<SequenceStates> lsequence_states;
-  if (from != nullptr) {
-    lsequence_states.reset(new SequenceStates);
-    for (auto& from_input_state : from->InputStates()) {
-      auto& from_input_state_tensor = from_input_state.second;
-      const auto& input_pair = lsequence_states->input_states_.emplace(
-          std::piecewise_construct,
-          std::forward_as_tuple(from_input_state_tensor->Name()),
-          std::forward_as_tuple(new SequenceState(
-              from_input_state_tensor->Name(), from_input_state_tensor->DType(),
-              from_input_state_tensor->Shape())));
-
-      auto& input_tensor = input_pair.first->second;
-      std::shared_ptr<AllocatedMemory> data;
-      if (from_input_state_tensor->DType() ==
-          inference::DataType::TYPE_STRING) {
-        // Use all-zero input states for null requests.
-        auto element_count =
-            triton::common::GetElementCount(from_input_state_tensor->Shape());
-        auto state_size = 4 * element_count;
-        data = std::make_shared<AllocatedMemory>(
-            state_size, TRITONSERVER_MEMORY_CPU, 0);
-      } else {
-        data = std::make_shared<AllocatedMemory>(
-            from_input_state_tensor->Data()->TotalByteSize(),
-            TRITONSERVER_MEMORY_CPU, 0);
-      }
-
-      input_tensor->SetData(data);
-      if (input_tensor->DType() == inference::DataType::TYPE_STRING) {
-        input_tensor->SetStringDataToZero();
-      }
-    }
-
-    for (auto& from_output_state : from->OutputStates()) {
-      lsequence_states->output_states_.emplace(
-          std::piecewise_construct,
-          std::forward_as_tuple(from_output_state.first),
-          std::forward_as_tuple());
-    }
-  }
-  return lsequence_states;
-}
-}}  // namespace triton::core
diff --git a/3rdparty/core-r22.12/src/sequence_state.h b/3rdparty/core-r22.12/src/sequence_state.h
deleted file mode 100644
index a2d0b14244799b6e3b1c4aef931163315e7eb880..0000000000000000000000000000000000000000
--- a/3rdparty/core-r22.12/src/sequence_state.h
+++ /dev/null
@@ -1,168 +0,0 @@
-// Copyright 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include <functional>
-#include <map>
-#include <memory>
-#include "memory.h"
-#include "status.h"
-#include "triton/common/model_config.h"
-
-#pragma once
-
-namespace triton { namespace core {
-
-//
-// Sequence state tensors.
-//
-class SequenceState {
- public:
-  SequenceState();
-  SequenceState(
-      const std::string& name, const inference::DataType datatype,
-      const std::vector<int64_t>& shape);
-  SequenceState(
-      const std::string& name, const inference::DataType datatype,
-      const int64_t* shape, const uint64_t dim_count);
-
-  // The name of the state tensor.
-  const std::string& Name() const { return name_; }
-
-  // Data type of the state tensor.
-  inference::DataType DType() const { return datatype_; }
-
-  // Mutable data type of the state tensor.
-  inference::DataType* MutableDType() { return &datatype_; }
-
-  // The shape of the state tensor after normalization.
-  const std::vector<int64_t>& Shape() const { return shape_; }
-  std::vector<int64_t>* MutableShape() { return &shape_; }
-
-  // The data for this shape.
-  std::shared_ptr<Memory>& Data() { return data_; }
-
-  // Set the data for this shape. Error if state already has some
-  // data.
-  Status SetData(const std::shared_ptr<Memory>& data);
-
-  // Sets state tensors that have type string to zero
-  Status SetStringDataToZero();
-
-  // Remove all existing data for the state.
-  Status RemoveAllData();
-
-  // Set the state update callback.
-  void SetStateUpdateCallback(std::function<Status()>&& state_update_cb)
-  {
-    state_update_cb_ = std::move(state_update_cb);
-  }
-
-  // Call the state update callback. This function will be called when
-  // TRITONBACKEND_StateUpdate is called.
-  Status Update() { return state_update_cb_(); }
-
- private:
-  DISALLOW_COPY_AND_ASSIGN(SequenceState);
-  std::string name_;
-  inference::DataType datatype_;
-  std::vector<int64_t> shape_;
-  std::vector<int64_t> batch_dim_;
-  std::shared_ptr<Memory> data_;
-  std::function<Status()> state_update_cb_ = []() {
-    // By default calling the TRITONBACKEND_StateUpdate will return an error.
-    return Status(
-        Status::Code::INVALID_ARG,
-        "TRITONBACKEND_StateUpdate called when sequence batching is disabled "
-        "or the 'states' section of the model configuration is empty.");
-  };
-};
-
-class SequenceStates {
- public:
-  struct InitialStateData {
-    InitialStateData(const std::string& state_init_name)
-        : state_init_name_(state_init_name)
-    {
-    }
-
-    std::string state_init_name_;
-    std::shared_ptr<MutableMemory> data_;
-  };
-
-  // Initialize the state tensors according to the state model configuration.
-  // Will use a default value of 1 for the variable dimensions in the state
-  // tensor configuration.
-  Status Initialize(
-      const std::unordered_map<
-          std::string, const inference::ModelSequenceBatching_State&>&
-          state_output_config_map,
-      const size_t max_batch_size,
-      const std::unordered_map<std::string, InitialStateData>& initial_state);
-
-  // Get a buffer holding the output state.
-  Status OutputState(
-      const std::string& name, const inference::DataType datatype,
-      const int64_t* shape, const uint64_t dim_count,
-      SequenceState** output_state);
-  Status OutputState(
-      const std::string& name, const inference::DataType datatype,
-      const std::vector<int64_t>& shape, SequenceState** output_state);
-
-  // Create a copy of the 'from' sequence states for NULL requests.
-  static std::shared_ptr<SequenceStates> CopyAsNull(
-      const std::shared_ptr<SequenceStates>& from);
-
-  const std::map<std::string, std::unique_ptr<SequenceState>>& InputStates()
-  {
-    return input_states_;
-  }
-
-  std::map<std::string, std::unique_ptr<SequenceState>>& OutputStates()
-  {
-    return output_states_;
-  }
-
-  void SetNullSequenceStates(std::shared_ptr<SequenceStates> sequence_states)
-  {
-    null_sequence_states_ = sequence_states;
-    is_null_request_ = true;
-  }
-
-  const std::shared_ptr<SequenceStates>& NullSequenceStates()
-  {
-    return null_sequence_states_;
-  }
-
-  bool IsNullRequest() { return is_null_request_; }
-
- private:
-  std::map<std::string, std::unique_ptr<SequenceState>> input_states_;
-  std::map<std::string, std::unique_ptr<SequenceState>> output_states_;
-  std::shared_ptr<SequenceStates> null_sequence_states_;
-  bool is_null_request_ = false;
-};
-
-}}  // namespace triton::core
diff --git a/3rdparty/core-r22.12/src/server.cc b/3rdparty/core-r22.12/src/server.cc
deleted file mode 100644
index e313a16e2a2488b0478403a4b97849eb11bd3fed..0000000000000000000000000000000000000000
--- a/3rdparty/core-r22.12/src/server.cc
+++ /dev/null
@@ -1,653 +0,0 @@
-// Copyright 2018-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "server.h"
-
-#include <stdint.h>
-#include <time.h>
-#include <algorithm>
-#include <csignal>
-#include <iostream>
-#include <memory>
-#include <utility>
-#include <vector>
-
-#include "backend_manager.h"
-#include "constants.h"
-#include "cuda_utils.h"
-#include "model.h"
-#include "model_config.pb.h"
-#include "model_config_utils.h"
-#include "model_repository_manager.h"
-#include "pinned_memory_manager.h"
-#include "repo_agent.h"
-#include "triton/common/async_work_queue.h"
-#include "triton/common/logging.h"
-#include "triton/common/model_config.h"
-#include "triton/common/table_printer.h"
-
-#ifdef TRITON_ENABLE_GPU
-#include "cuda_memory_manager.h"
-#endif  // TRITON_ENABLE_GPU
-
-namespace triton { namespace core {
-
-namespace {
-
-// Scoped increment / decrement of atomic
-class ScopedAtomicIncrement {
- public:
-  explicit ScopedAtomicIncrement(std::atomic<uint64_t>& counter)
-      : counter_(counter)
-  {
-    counter_++;
-  }
-
-  ~ScopedAtomicIncrement() { counter_--; }
-
- private:
-  std::atomic<uint64_t>& counter_;
-};
-
-}  // namespace
-
-//
-// InferenceServer
-//
-InferenceServer::InferenceServer()
-    : version_(TRITON_VERSION), ready_state_(ServerReadyState::SERVER_INVALID)
-{
-  id_ = "triton";
-  extensions_.push_back("classification");
-  extensions_.push_back("sequence");
-  extensions_.push_back("model_repository");
-  extensions_.push_back("model_repository(unload_dependents)");
-  extensions_.push_back("schedule_policy");
-  extensions_.push_back("model_configuration");
-  extensions_.push_back("system_shared_memory");
-  extensions_.push_back("cuda_shared_memory");
-  extensions_.push_back("binary_tensor_data");
-#ifdef TRITON_ENABLE_STATS
-  extensions_.push_back("statistics");
-#endif  // TRITON_ENABLE_STATS
-#ifdef TRITON_ENABLE_TRACING
-  extensions_.push_back("trace");
-#endif  // TRITON_ENABLE_TRACING
-#ifdef TRITON_ENABLE_LOGGING
-  extensions_.push_back("logging");
-#endif  // TRITON_ENABLE_LOGGING
-  strict_model_config_ = true;
-  strict_readiness_ = true;
-  exit_timeout_secs_ = 30;
-  pinned_memory_pool_size_ = 1 << 28;
-  buffer_manager_thread_count_ = 0;
-  model_load_thread_count_ =
-      std::max(2u, 2 * std::thread::hardware_concurrency());
-
-#ifdef TRITON_ENABLE_GPU
-  min_supported_compute_capability_ = TRITON_MIN_COMPUTE_CAPABILITY;
-#else
-  min_supported_compute_capability_ = 0.0;
-#endif  // TRITON_ENABLE_GPU
-
-  inflight_request_counter_ = 0;
-}
-
-Status
-InferenceServer::Init()
-{
-  Status status;
-
-  ready_state_ = ServerReadyState::SERVER_INITIALIZING;
-
-  if (model_repository_paths_.empty()) {
-    ready_state_ = ServerReadyState::SERVER_FAILED_TO_INITIALIZE;
-    return Status(
-        Status::Code::INVALID_ARG, "--model-repository must be specified");
-  }
-
-  if (repoagent_dir_.empty()) {
-    ready_state_ = ServerReadyState::SERVER_FAILED_TO_INITIALIZE;
-    return Status(
-        Status::Code::INVALID_ARG, "--repoagent-directory can not be empty");
-  }
-
-  status = TritonRepoAgentManager::SetGlobalSearchPath(repoagent_dir_);
-  if (!status.IsOk()) {
-    ready_state_ = ServerReadyState::SERVER_FAILED_TO_INITIALIZE;
-    return status;
-  }
-
-  status = TritonBackendManager::Create(&backend_manager_);
-  if (!status.IsOk()) {
-    ready_state_ = ServerReadyState::SERVER_FAILED_TO_INITIALIZE;
-    return status;
-  }
-
-  if (buffer_manager_thread_count_ > 0) {
-    status = CommonErrorToStatus(triton::common::AsyncWorkQueue::Initialize(
-        buffer_manager_thread_count_));
-    if (!status.IsOk()) {
-      ready_state_ = ServerReadyState::SERVER_FAILED_TO_INITIALIZE;
-      return status;
-    }
-  }
-
-  std::unique_ptr<RateLimiter> local_rate_limiter;
-  bool ignore_resources_and_priority =
-      (rate_limit_mode_ == RateLimitMode::RL_OFF);
-
-  status = RateLimiter::Create(
-      ignore_resources_and_priority, rate_limit_resource_map_,
-      &local_rate_limiter);
-  rate_limiter_ = std::move(local_rate_limiter);
-  if (!status.IsOk()) {
-    ready_state_ = ServerReadyState::SERVER_FAILED_TO_INITIALIZE;
-    return status;
-  }
-
-  PinnedMemoryManager::Options options(pinned_memory_pool_size_);
-  status = PinnedMemoryManager::Create(options);
-  if (!status.IsOk()) {
-    ready_state_ = ServerReadyState::SERVER_FAILED_TO_INITIALIZE;
-    return status;
-  }
-
-  if (response_cache_byte_size_ > 0) {
-    std::unique_ptr<RequestResponseCache> local_response_cache;
-    status = RequestResponseCache::Create(
-        response_cache_byte_size_, &local_response_cache);
-    if (!status.IsOk()) {
-      ready_state_ = ServerReadyState::SERVER_FAILED_TO_INITIALIZE;
-      return status;
-    }
-
-    response_cache_ = std::move(local_response_cache);
-  }
-
-
-#ifdef TRITON_ENABLE_GPU
-  // Set the default CUDA memory pool size for GPUs where it is not
-  // set explicitly.
-  std::set<int> supported_gpus;
-  if (GetSupportedGPUs(&supported_gpus, min_supported_compute_capability_)
-          .IsOk()) {
-    for (const auto gpu : supported_gpus) {
-      if (cuda_memory_pool_size_.find(gpu) == cuda_memory_pool_size_.end()) {
-        cuda_memory_pool_size_[gpu] = 1 << 26;
-      }
-    }
-  }
-
-  CudaMemoryManager::Options cuda_options(
-      min_supported_compute_capability_, cuda_memory_pool_size_);
-  status = CudaMemoryManager::Create(cuda_options);
-  // If CUDA memory manager can't be created, just log error as the
-  // server can still function properly
-  if (!status.IsOk()) {
-    LOG_ERROR << status.Message();
-  }
-#endif  // TRITON_ENABLE_GPU
-
-  status = EnablePeerAccess(min_supported_compute_capability_);
-  if (!status.IsOk()) {
-    // failed to enable peer access is not critical, just inefficient.
-    LOG_WARNING << status.Message();
-  }
-
-  // Create the model manager for the repository. Unless model control
-  // is disabled, all models are eagerly loaded when the manager is created.
-  bool polling_enabled = (model_control_mode_ == ModelControlMode::MODE_POLL);
-  bool model_control_enabled =
-      (model_control_mode_ == ModelControlMode::MODE_EXPLICIT);
-  const ModelLifeCycleOptions life_cycle_options(
-      min_supported_compute_capability_, backend_cmdline_config_map_,
-      host_policy_map_, model_load_thread_count_);
-  status = ModelRepositoryManager::Create(
-      this, version_, model_repository_paths_, startup_models_,
-      strict_model_config_, polling_enabled, model_control_enabled,
-      life_cycle_options, &model_repository_manager_);
-  if (!status.IsOk()) {
-    if (model_repository_manager_ == nullptr) {
-      ready_state_ = ServerReadyState::SERVER_FAILED_TO_INITIALIZE;
-    } else {
-      // If error is returned while the manager is set, we assume the
-      // failure is due to a model not loading correctly so we just
-      // continue if not exiting on error.
-      ready_state_ = ServerReadyState::SERVER_READY;
-      PrintBackendAndModelSummary();
-    }
-  } else {
-    ready_state_ = ServerReadyState::SERVER_READY;
-    PrintBackendAndModelSummary();
-  }
-
-  return status;
-}
-
-Status
-InferenceServer::Stop(const bool force)
-{
-  if (!force && (ready_state_ != ServerReadyState::SERVER_READY)) {
-    return Status::Success;
-  }
-
-  ready_state_ = ServerReadyState::SERVER_EXITING;
-
-  if (model_repository_manager_ == nullptr) {
-    LOG_INFO << "No server context available. Exiting immediately.";
-    return Status::Success;
-  } else {
-    LOG_INFO << "Waiting for in-flight requests to complete.";
-  }
-
-  Status status = model_repository_manager_->StopAllModels();
-  if (!status.IsOk()) {
-    LOG_ERROR << status.Message();
-  }
-
-  // Wait for all in-flight non-inference requests to complete and all
-  // loaded models to unload, or for the exit timeout to expire.
-  uint32_t exit_timeout_iters = exit_timeout_secs_;
-  bool unloading_model = false;
-  while (true) {
-    if (!unloading_model) {
-      // Check if all in-flight inference requests / sequences are completed
-      const auto& inflight_status = model_repository_manager_->InflightStatus();
-      LOG_INFO << "Timeout " << exit_timeout_iters << ": Found "
-               << inflight_status.size()
-               << " model versions that have in-flight inferences";
-      for (const auto& inflight : inflight_status) {
-        LOG_INFO << "Model '" << std::get<0>(inflight) << "' "
-                 << "(version " << std::get<1>(inflight) << ") has "
-                 << std::get<2>(inflight) << " in-flight inferences";
-      }
-
-      if (inflight_status.size() == 0) {
-        unloading_model = true;
-        status = model_repository_manager_->UnloadAllModels();
-        if (!status.IsOk()) {
-          LOG_ERROR << status.Message();
-        } else {
-          LOG_INFO << "All models are stopped, unloading models";
-          continue;
-        }
-      }
-    } else {
-      const auto& live_models = model_repository_manager_->LiveModelStates();
-
-      LOG_INFO << "Timeout " << exit_timeout_iters << ": Found "
-               << live_models.size() << " live models and "
-               << inflight_request_counter_
-               << " in-flight non-inference requests";
-      if (LOG_VERBOSE_IS_ON(1)) {
-        for (const auto& m : live_models) {
-          for (const auto& v : m.second) {
-            LOG_VERBOSE(1) << m.first << " v" << v.first << ": "
-                           << ModelReadyStateString(v.second.first);
-          }
-        }
-      }
-
-      if ((live_models.size() == 0) && (inflight_request_counter_ == 0)) {
-        return Status::Success;
-      }
-    }
-    if (exit_timeout_iters <= 0) {
-      break;
-    }
-
-    exit_timeout_iters--;
-    std::this_thread::sleep_for(std::chrono::seconds(1));
-  }
-
-  return Status(
-      Status::Code::INTERNAL, "Exit timeout expired. Exiting immediately.");
-}
-
-Status
-InferenceServer::PollModelRepository()
-{
-  LOG_VERBOSE(1) << "Polling model repository";
-
-  // Look for changes and update the loaded model configurations
-  // appropriately.
-  if (ready_state_ == ServerReadyState::SERVER_READY) {
-    ScopedAtomicIncrement inflight(inflight_request_counter_);
-    RETURN_IF_ERROR(model_repository_manager_->PollAndUpdate());
-  }
-
-  return Status::Success;
-}
-
-Status
-InferenceServer::IsLive(bool* live)
-{
-  *live = false;
-
-  if (ready_state_ == ServerReadyState::SERVER_EXITING) {
-    return Status(Status::Code::UNAVAILABLE, "Server exiting");
-  }
-
-  ScopedAtomicIncrement inflight(inflight_request_counter_);
-
-  // Server is considered live if it can respond to this health
-  // request and it was able to initialize.
-  *live =
-      ((ready_state_ != ServerReadyState::SERVER_INVALID) &&
-       (ready_state_ != ServerReadyState::SERVER_INITIALIZING) &&
-       (ready_state_ != ServerReadyState::SERVER_FAILED_TO_INITIALIZE));
-  return Status::Success;
-}
-
-Status
-InferenceServer::IsReady(bool* ready)
-{
-  *ready = false;
-
-  if (ready_state_ == ServerReadyState::SERVER_EXITING) {
-    return Status(Status::Code::UNAVAILABLE, "Server exiting");
-  }
-
-  ScopedAtomicIncrement inflight(inflight_request_counter_);
-
-  // Server is considered ready if it is in the ready state.
-  // Additionally can report ready only when all models are ready.
-  *ready = (ready_state_ == ServerReadyState::SERVER_READY);
-  if (*ready && strict_readiness_) {
-    // Strict readiness... get the model status and make sure all
-    // models are ready.
-    const auto model_versions = model_repository_manager_->ModelStates();
-
-    for (const auto& mv : model_versions) {
-      // If a model status is present but no version status,
-      // the model is not ready as there is no proper version to be served
-      if (mv.second.size() == 0) {
-        *ready = false;
-        goto strict_done;
-      }
-      for (const auto& vs : mv.second) {
-        // Okay if model is not ready due to unload
-        if ((vs.second.first != ModelReadyState::READY) &&
-            (vs.second.second != "unloaded")) {
-          *ready = false;
-          goto strict_done;
-        }
-      }
-    }
-  strict_done:;
-  }
-
-  return Status::Success;
-}
-
-Status
-InferenceServer::ModelIsReady(
-    const std::string& model_name, const int64_t model_version, bool* ready)
-{
-  *ready = false;
-
-  if (ready_state_ != ServerReadyState::SERVER_READY) {
-    return Status(Status::Code::UNAVAILABLE, "Server not ready");
-  }
-
-  ScopedAtomicIncrement inflight(inflight_request_counter_);
-
-  std::shared_ptr<Model> model;
-  if (GetModel(model_name, model_version, &model).IsOk()) {
-    ModelReadyState state;
-    if (model_repository_manager_
-            ->ModelState(model_name, model->Version(), &state)
-            .IsOk()) {
-      *ready = (state == ModelReadyState::READY);
-    }
-  }
-
-  return Status::Success;
-}
-
-Status
-InferenceServer::ModelReadyVersions(
-    const std::string& model_name, std::vector<int64_t>* versions)
-{
-  if (ready_state_ != ServerReadyState::SERVER_READY) {
-    return Status(Status::Code::UNAVAILABLE, "Server not ready");
-  }
-
-  ScopedAtomicIncrement inflight(inflight_request_counter_);
-
-  const auto version_states =
-      model_repository_manager_->VersionStates(model_name);
-  for (const auto& pr : version_states) {
-    if (pr.second.first == ModelReadyState::READY) {
-      versions->push_back(pr.first);
-    }
-  }
-
-  return Status::Success;
-}
-
-Status
-InferenceServer::ModelReadyVersions(
-    std::map<std::string, std::vector<int64_t>>* ready_model_versions)
-{
-  if (ready_state_ != ServerReadyState::SERVER_READY) {
-    return Status(Status::Code::UNAVAILABLE, "Server not ready");
-  }
-
-  ScopedAtomicIncrement inflight(inflight_request_counter_);
-
-  const auto model_versions =
-      model_repository_manager_->LiveModelStates(true /* strict_readiness */);
-
-  ready_model_versions->clear();
-  std::vector<int64_t> versions;
-  for (const auto& mv_pair : model_versions) {
-    for (const auto& vs_pair : mv_pair.second) {
-      versions.emplace_back(vs_pair.first);
-    }
-    ready_model_versions->emplace(mv_pair.first, std::move(versions));
-  }
-
-  return Status::Success;
-}
-
-Status
-InferenceServer::RepositoryIndex(
-    const bool ready_only,
-    std::vector<ModelRepositoryManager::ModelIndex>* index)
-{
-  if (ready_state_ != ServerReadyState::SERVER_READY) {
-    return Status(Status::Code::UNAVAILABLE, "Server not ready");
-  }
-
-  ScopedAtomicIncrement inflight(inflight_request_counter_);
-
-  return model_repository_manager_->RepositoryIndex(ready_only, index);
-}
-
-Status
-InferenceServer::InferAsync(std::unique_ptr<InferenceRequest>& request)
-{
-  // Allow inference request while server exiting to provide graceful
-  // completion of inference sequence that spans multiple requests.
-  if ((ready_state_ != ServerReadyState::SERVER_READY) &&
-      (ready_state_ != ServerReadyState::SERVER_EXITING)) {
-    return Status(Status::Code::UNAVAILABLE, "Server not ready");
-  }
-
-#ifdef TRITON_ENABLE_STATS
-  request->CaptureRequestStartNs();
-  INFER_TRACE_ACTIVITY(
-      request->Trace(), TRITONSERVER_TRACE_REQUEST_START,
-      request->RequestStartNs());
-#endif  // TRITON_ENABLE_STATS
-
-  return InferenceRequest::Run(request);
-}
-
-Status
-InferenceServer::LoadModel(
-    const std::unordered_map<
-        std::string, std::vector<const InferenceParameter*>>& models)
-{
-  if (ready_state_ != ServerReadyState::SERVER_READY) {
-    return Status(Status::Code::UNAVAILABLE, "Server not ready");
-  }
-
-  ScopedAtomicIncrement inflight(inflight_request_counter_);
-
-  auto action_type = ActionType::LOAD;
-  return model_repository_manager_->LoadUnloadModel(
-      models, action_type, false /* unload_dependents */);
-}
-
-Status
-InferenceServer::UnloadModel(
-    const std::string& model_name, const bool unload_dependents)
-{
-  if (ready_state_ != ServerReadyState::SERVER_READY) {
-    return Status(Status::Code::UNAVAILABLE, "Server not ready");
-  }
-
-  ScopedAtomicIncrement inflight(inflight_request_counter_);
-
-  auto action_type = ActionType::UNLOAD;
-  return model_repository_manager_->LoadUnloadModel(
-      {{model_name, {}}}, action_type, unload_dependents);
-}
-
-Status
-InferenceServer::PrintBackendAndModelSummary()
-{
-  // Repository Agents Summary
-  std::vector<std::string> repoagent_headers;
-  repoagent_headers.emplace_back("Repository Agent");
-  repoagent_headers.emplace_back("Path");
-
-  triton::common::TablePrinter repoagents_table(repoagent_headers);
-
-  std::unique_ptr<std::unordered_map<std::string, std::string>> repoagent_state;
-  RETURN_IF_ERROR(TritonRepoAgentManager::AgentState(&repoagent_state));
-
-  for (const auto& repoagent_pair : *repoagent_state) {
-    std::vector<std::string> repoagent_record;
-    repoagent_record.emplace_back(repoagent_pair.first);
-    repoagent_record.emplace_back(repoagent_pair.second);
-    repoagents_table.InsertRow(repoagent_record);
-  }
-  std::string repoagents_table_string = repoagents_table.PrintTable();
-  LOG_INFO << repoagents_table_string;
-
-  // Backends Summary
-  std::vector<std::string> backend_headers;
-  backend_headers.emplace_back("Backend");
-  backend_headers.emplace_back("Path");
-  backend_headers.emplace_back("Config");
-
-  triton::common::TablePrinter backends_table(backend_headers);
-
-  std::unique_ptr<std::unordered_map<std::string, std::vector<std::string>>>
-      backend_state;
-  RETURN_IF_ERROR(backend_manager_->BackendState(&backend_state));
-
-  for (const auto& backend_pair : *backend_state) {
-    std::vector<std::string> backend_record;
-
-    // Backend Name
-    backend_record.emplace_back(backend_pair.first);
-
-    // Backend config and lib path
-    for (const auto& backend_field : backend_pair.second) {
-      backend_record.emplace_back(backend_field);
-    }
-    backends_table.InsertRow(backend_record);
-  }
-  std::string backends_table_string = backends_table.PrintTable();
-  LOG_INFO << backends_table_string;
-
-  // Models Summary
-  auto model_states = model_repository_manager_->ModelStates();
-
-  std::vector<std::string> model_headers;
-  model_headers.emplace_back("Model");
-  model_headers.emplace_back("Version");
-  model_headers.emplace_back("Status");
-
-  triton::common::TablePrinter models_table(model_headers);
-
-  for (const auto& model_state : model_states) {
-    auto model_version_map = model_state.second;
-    std::string model_name = model_state.first;
-
-    // If model_version_map size is zero, no version is found for this model
-    if (model_version_map.size() == 0) {
-      std::vector<std::string> model_record;
-      model_record.emplace_back(model_name);
-      model_record.emplace_back("-");
-      model_record.emplace_back("Not loaded: No model version was found");
-      models_table.InsertRow(model_record);
-    } else {
-      for (const auto& model_map : model_version_map) {
-        std::vector<std::string> model_record;
-        std::string model_version = std::to_string(model_map.first);
-        auto model_status_pair = model_map.second;
-        std::string model_status =
-            ModelReadyStateString(model_status_pair.first);
-
-        if (model_status_pair.second != "") {
-          model_status += ": " + model_status_pair.second;
-        }
-
-        model_record.emplace_back(model_name);
-        model_record.emplace_back(model_version);
-        model_record.emplace_back(model_status);
-        models_table.InsertRow(model_record);
-      }
-    }
-  }
-  std::string models_table_string = models_table.PrintTable();
-  LOG_INFO << models_table_string;
-
-  return Status::Success;
-}
-
-Status
-InferenceServer::RegisterModelRepository(
-    const std::string& repository,
-    const std::unordered_map<std::string, std::string>& model_mapping)
-{
-  return model_repository_manager_->RegisterModelRepository(
-      repository, model_mapping);
-}
-
-Status
-InferenceServer::UnregisterModelRepository(const std::string& repository)
-{
-  return model_repository_manager_->UnregisterModelRepository(repository);
-}
-
-}}  // namespace triton::core
diff --git a/3rdparty/core-r22.12/src/server.h b/3rdparty/core-r22.12/src/server.h
deleted file mode 100644
index f1e3dab0af3f1cd9c767b304c6b475bb03d48300..0000000000000000000000000000000000000000
--- a/3rdparty/core-r22.12/src/server.h
+++ /dev/null
@@ -1,326 +0,0 @@
-// Copyright 2018-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include <stddef.h>
-#include <stdint.h>
-#include <atomic>
-#include <map>
-#include <string>
-#include <thread>
-#include <vector>
-
-#include "backend_manager.h"
-#include "infer_parameter.h"
-#include "model_config.pb.h"
-#include "model_repository_manager.h"
-#include "rate_limiter.h"
-#include "response_cache.h"
-#include "status.h"
-#include "triton/common/model_config.h"
-
-namespace triton { namespace core {
-
-class Model;
-class InferenceRequest;
-
-enum class ModelControlMode { MODE_NONE, MODE_POLL, MODE_EXPLICIT };
-
-enum class RateLimitMode { RL_EXEC_COUNT, RL_OFF };
-
-// Readiness status for the inference server.
-enum class ServerReadyState {
-  // The server is in an invalid state and will likely not response
-  // correctly to any requests.
-  SERVER_INVALID,
-
-  // The server is initializing.
-  SERVER_INITIALIZING,
-
-  // The server is ready and accepting requests.
-  SERVER_READY,
-
-  // The server is exiting and will not respond to requests.
-  SERVER_EXITING,
-
-  // The server did not initialize correctly.
-  SERVER_FAILED_TO_INITIALIZE
-};
-
-// Inference server information.
-class InferenceServer {
- public:
-  // Construct an inference server.
-  InferenceServer();
-
-  // Initialize the server. Return true on success, false otherwise.
-  Status Init();
-
-  // Stop the server.  Return true if all models are unloaded, false
-  // if exit timeout occurs. If 'force' is true attempt to stop the
-  // server even if it is not in a ready state.
-  Status Stop(const bool force = false);
-
-  // Check the model repository for changes and update server state
-  // based on those changes.
-  Status PollModelRepository();
-
-  // Server health
-  Status IsLive(bool* live);
-  Status IsReady(bool* ready);
-
-  // Model health
-  Status ModelIsReady(
-      const std::string& model_name, const int64_t model_version, bool* ready);
-
-  // Return the ready versions of specific model
-  Status ModelReadyVersions(
-      const std::string& model_name, std::vector<int64_t>* versions);
-
-  // Return the ready versions of all models
-  Status ModelReadyVersions(
-      std::map<std::string, std::vector<int64_t>>* model_versions);
-
-  /// Get the index of all models in all repositories.
-  /// \param ready_only If true return only index of models that are ready.
-  /// \param index Returns the index.
-  /// \return error status.
-  Status RepositoryIndex(
-      const bool ready_only,
-      std::vector<ModelRepositoryManager::ModelIndex>* index);
-
-  // Inference. If Status::Success is returned then this function has
-  // taken ownership of the request object and so 'request' will be
-  // nullptr. If non-success is returned then the caller still retains
-  // ownership of 'request'.
-  Status InferAsync(std::unique_ptr<InferenceRequest>& request);
-
-  // Load the corresponding model. Reload the model if it has been loaded.
-  Status LoadModel(
-      const std::unordered_map<
-          std::string, std::vector<const InferenceParameter*>>& models);
-
-  // Unload the corresponding model.
-  Status UnloadModel(
-      const std::string& model_name, const bool unload_dependents);
-
-  // Print backends and models summary
-  Status PrintBackendAndModelSummary();
-
-  // Register model repository path and associated mappings
-  Status RegisterModelRepository(
-      const std::string& repository,
-      const std::unordered_map<std::string, std::string>& model_mapping);
-
-  // Unregister model repository path.
-  Status UnregisterModelRepository(const std::string& repository);
-
-  // Return the server version.
-  const std::string& Version() const { return version_; }
-
-  // Return the server extensions.
-  const std::vector<const char*>& Extensions() const { return extensions_; }
-
-  // Get / set the ID of the server.
-  const std::string& Id() const { return id_; }
-  void SetId(const std::string& id) { id_ = id; }
-
-  // Get / set the model repository path
-  const std::set<std::string>& ModelRepositoryPaths() const
-  {
-    return model_repository_paths_;
-  }
-
-  void SetModelRepositoryPaths(const std::set<std::string>& p)
-  {
-    model_repository_paths_ = p;
-  }
-
-  // Get / set model control mode.
-  ModelControlMode GetModelControlMode() const { return model_control_mode_; }
-  void SetModelControlMode(ModelControlMode m) { model_control_mode_ = m; }
-
-  // Get / set the startup models
-  const std::set<std::string>& StartupModels() const { return startup_models_; }
-  void SetStartupModels(const std::set<std::string>& m) { startup_models_ = m; }
-
-  // Get / set strict model configuration enable.
-  bool StrictModelConfigEnabled() const { return strict_model_config_; }
-  void SetStrictModelConfigEnabled(bool e) { strict_model_config_ = e; }
-
-  // Get / set rate limiter mode.
-  RateLimitMode RateLimiterMode() const { return rate_limit_mode_; }
-  void SetRateLimiterMode(RateLimitMode m) { rate_limit_mode_ = m; }
-
-  // Get / set rate limit resource counts
-  const RateLimiter::ResourceMap& RateLimiterResources() const
-  {
-    return rate_limit_resource_map_;
-  }
-  void SetRateLimiterResources(const RateLimiter::ResourceMap& rm)
-  {
-    rate_limit_resource_map_ = rm;
-  }
-
-  // Get / set the pinned memory pool byte size.
-  int64_t PinnedMemoryPoolByteSize() const { return pinned_memory_pool_size_; }
-  void SetPinnedMemoryPoolByteSize(int64_t s)
-  {
-    pinned_memory_pool_size_ = std::max((int64_t)0, s);
-  }
-
-  // Get / set the response cache byte size.
-  uint64_t ResponseCacheByteSize() const { return response_cache_byte_size_; }
-  void SetResponseCacheByteSize(uint64_t s)
-  {
-    response_cache_byte_size_ = s;
-    response_cache_enabled_ = (s > 0) ? true : false;
-  }
-
-  bool ResponseCacheEnabled() const { return response_cache_enabled_; }
-
-  // Get / set CUDA memory pool size
-  const std::map<int, uint64_t>& CudaMemoryPoolByteSize() const
-  {
-    return cuda_memory_pool_size_;
-  }
-
-  void SetCudaMemoryPoolByteSize(const std::map<int, uint64_t>& s)
-  {
-    cuda_memory_pool_size_ = s;
-  }
-
-  // Get / set the minimum support CUDA compute capability.
-  double MinSupportedComputeCapability() const
-  {
-    return min_supported_compute_capability_;
-  }
-  void SetMinSupportedComputeCapability(double c)
-  {
-    min_supported_compute_capability_ = c;
-  }
-
-  // Get / set strict readiness enable.
-  bool StrictReadinessEnabled() const { return strict_readiness_; }
-  void SetStrictReadinessEnabled(bool e) { strict_readiness_ = e; }
-
-  // Get / set the server exit timeout, in seconds.
-  int32_t ExitTimeoutSeconds() const { return exit_timeout_secs_; }
-  void SetExitTimeoutSeconds(int32_t s) { exit_timeout_secs_ = std::max(0, s); }
-
-  void SetBufferManagerThreadCount(unsigned int c)
-  {
-    buffer_manager_thread_count_ = c;
-  }
-
-  void SetModelLoadThreadCount(unsigned int c) { model_load_thread_count_ = c; }
-
-  // Set a backend command-line configuration
-  void SetBackendCmdlineConfig(
-      const triton::common::BackendCmdlineConfigMap& bc)
-  {
-    backend_cmdline_config_map_ = bc;
-  }
-
-  void SetHostPolicyCmdlineConfig(
-      const triton::common::HostPolicyCmdlineConfigMap& hp)
-  {
-    host_policy_map_ = hp;
-  }
-
-  void SetRepoAgentDir(const std::string& d) { repoagent_dir_ = d; }
-
-  // Return the requested model object.
-  Status GetModel(
-      const std::string& model_name, const int64_t model_version,
-      std::shared_ptr<Model>* model)
-  {
-    // Allow model retrival while server exiting to provide graceful
-    // completion of inference sequence that spans multiple requests.
-    if ((ready_state_ != ServerReadyState::SERVER_READY) &&
-        (ready_state_ != ServerReadyState::SERVER_EXITING)) {
-      return Status(Status::Code::UNAVAILABLE, "Server not ready");
-    }
-    return model_repository_manager_->GetModel(
-        model_name, model_version, model);
-  }
-
-  // Get the Backend Manager
-  const std::shared_ptr<TritonBackendManager>& BackendManager()
-  {
-    return backend_manager_;
-  }
-
-  // Return the pointer to RateLimiter object.
-  std::shared_ptr<RateLimiter> GetRateLimiter() { return rate_limiter_; }
-
-  // Return the pointer to response cache object.
-  std::shared_ptr<RequestResponseCache> GetResponseCache()
-  {
-    return response_cache_;
-  }
-
- private:
-  const std::string version_;
-  std::string id_;
-  std::vector<const char*> extensions_;
-
-  std::set<std::string> model_repository_paths_;
-  std::set<std::string> startup_models_;
-  ModelControlMode model_control_mode_;
-  bool strict_model_config_;
-  bool strict_readiness_;
-  uint32_t exit_timeout_secs_;
-  uint32_t buffer_manager_thread_count_;
-  uint32_t model_load_thread_count_;
-  uint64_t pinned_memory_pool_size_;
-  uint64_t response_cache_byte_size_;
-  bool response_cache_enabled_;
-  std::map<int, uint64_t> cuda_memory_pool_size_;
-  double min_supported_compute_capability_;
-  triton::common::BackendCmdlineConfigMap backend_cmdline_config_map_;
-  triton::common::HostPolicyCmdlineConfigMap host_policy_map_;
-  std::string repoagent_dir_;
-  RateLimitMode rate_limit_mode_;
-  RateLimiter::ResourceMap rate_limit_resource_map_;
-
-
-  // Current state of the inference server.
-  ServerReadyState ready_state_;
-
-  // Number of in-flight, non-inference requests. During shutdown we
-  // attempt to wait for all in-flight non-inference requests to
-  // complete before exiting (also wait for in-flight inference
-  // requests but that is determined by model shared_ptr).
-  std::atomic<uint64_t> inflight_request_counter_;
-
-  std::shared_ptr<RateLimiter> rate_limiter_;
-  std::unique_ptr<ModelRepositoryManager> model_repository_manager_;
-  std::shared_ptr<TritonBackendManager> backend_manager_;
-  std::shared_ptr<RequestResponseCache> response_cache_;
-};
-
-}}  // namespace triton::core
diff --git a/3rdparty/core-r22.12/src/server_message.h b/3rdparty/core-r22.12/src/server_message.h
deleted file mode 100644
index ae5d0668e69fdaadc573cf8340a80e047d6914c8..0000000000000000000000000000000000000000
--- a/3rdparty/core-r22.12/src/server_message.h
+++ /dev/null
@@ -1,90 +0,0 @@
-// Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include <string>
-#include "status.h"
-
-#define TRITONJSON_STATUSTYPE triton::core::Status
-#define TRITONJSON_STATUSRETURN(M) \
-  return triton::core::Status(triton::core::Status::Code::INTERNAL, (M))
-#define TRITONJSON_STATUSSUCCESS triton::core::Status::Success
-#include "triton/common/triton_json.h"
-
-namespace triton { namespace core {
-
-//
-// Implementation for TRITONSERVER_Message.
-//
-class TritonServerMessage {
- public:
-  TritonServerMessage(const triton::common::TritonJson::Value& msg)
-  {
-    json_buffer_.Clear();
-    msg.Write(&json_buffer_);
-    base_ = json_buffer_.Base();
-    byte_size_ = json_buffer_.Size();
-    from_json_ = true;
-  }
-
-  TritonServerMessage(std::string&& msg)
-  {
-    str_buffer_ = std::move(msg);
-    base_ = str_buffer_.data();
-    byte_size_ = str_buffer_.size();
-    from_json_ = false;
-  }
-
-  TritonServerMessage(const TritonServerMessage& rhs)
-  {
-    from_json_ = rhs.from_json_;
-    if (from_json_) {
-      json_buffer_ = rhs.json_buffer_;
-      base_ = json_buffer_.Base();
-      byte_size_ = json_buffer_.Size();
-    } else {
-      str_buffer_ = rhs.str_buffer_;
-      base_ = str_buffer_.data();
-      byte_size_ = str_buffer_.size();
-    }
-  }
-
-  void Serialize(const char** base, size_t* byte_size) const
-  {
-    *base = base_;
-    *byte_size = byte_size_;
-  }
-
- private:
-  bool from_json_;
-  triton::common::TritonJson::WriteBuffer json_buffer_;
-  std::string str_buffer_;
-
-  const char* base_;
-  size_t byte_size_;
-};
-
-}}  // namespace triton::core
diff --git a/3rdparty/core-r22.12/src/shared_library.cc b/3rdparty/core-r22.12/src/shared_library.cc
deleted file mode 100644
index 2bf00b15e74eaa66f0c9f472a67be355ef1d93db..0000000000000000000000000000000000000000
--- a/3rdparty/core-r22.12/src/shared_library.cc
+++ /dev/null
@@ -1,231 +0,0 @@
-// Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "shared_library.h"
-
-#include "filesystem.h"
-#include "mutex"
-#include "triton/common/logging.h"
-
-#ifdef _WIN32
-// suppress the min and max definitions in Windef.h.
-#define NOMINMAX
-#include <Windows.h>
-#else
-#include <dlfcn.h>
-#endif
-
-namespace triton { namespace core {
-
-static std::mutex mu_;
-
-Status
-SharedLibrary::Acquire(std::unique_ptr<SharedLibrary>* slib)
-{
-  mu_.lock();
-  slib->reset(new SharedLibrary());
-  return Status::Success;
-}
-
-SharedLibrary::~SharedLibrary()
-{
-  mu_.unlock();
-}
-
-Status
-SharedLibrary::SetLibraryDirectory(const std::string& path)
-{
-#ifdef _WIN32
-  LOG_VERBOSE(1) << "SetLibraryDirectory: path = " << path;
-  if (!SetDllDirectory(path.c_str())) {
-    LPSTR err_buffer = nullptr;
-    size_t size = FormatMessageA(
-        FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM |
-            FORMAT_MESSAGE_IGNORE_INSERTS,
-        NULL, GetLastError(), MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
-        (LPSTR)&err_buffer, 0, NULL);
-    std::string errstr(err_buffer, size);
-    LocalFree(err_buffer);
-
-    return Status(
-        Status::Code::NOT_FOUND,
-        "unable to set dll path " + path + ": " + errstr);
-  }
-#endif
-
-  return Status::Success;
-}
-
-Status
-SharedLibrary::ResetLibraryDirectory()
-{
-#ifdef _WIN32
-  LOG_VERBOSE(1) << "ResetLibraryDirectory";
-  if (!SetDllDirectory(NULL)) {
-    LPSTR err_buffer = nullptr;
-    size_t size = FormatMessageA(
-        FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM |
-            FORMAT_MESSAGE_IGNORE_INSERTS,
-        NULL, GetLastError(), MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
-        (LPSTR)&err_buffer, 0, NULL);
-    std::string errstr(err_buffer, size);
-    LocalFree(err_buffer);
-
-    return Status(
-        Status::Code::NOT_FOUND, "unable to reset dll path: " + errstr);
-  }
-#endif
-
-  return Status::Success;
-}
-
-Status
-SharedLibrary::OpenLibraryHandle(const std::string& path, void** handle)
-{
-  LOG_VERBOSE(1) << "OpenLibraryHandle: " << path;
-
-#ifdef _WIN32
-  // Need to put shared library directory on the DLL path so that any
-  // dependencies of the shared library are found
-  const std::string library_dir = DirName(path);
-  RETURN_IF_ERROR(SetLibraryDirectory(library_dir));
-
-  // HMODULE is typedef of void*
-  // https://docs.microsoft.com/en-us/windows/win32/winprog/windows-data-types
-  LOG_VERBOSE(1) << "OpenLibraryHandle: path = " << path;
-  *handle = LoadLibrary(path.c_str());
-
-  // Remove the dll path added above... do this unconditionally before
-  // check for failure in dll load.
-  RETURN_IF_ERROR(ResetLibraryDirectory());
-
-  if (*handle == nullptr) {
-    LPSTR err_buffer = nullptr;
-    size_t size = FormatMessageA(
-        FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM |
-            FORMAT_MESSAGE_IGNORE_INSERTS,
-        NULL, GetLastError(), MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
-        (LPSTR)&err_buffer, 0, NULL);
-    std::string errstr(err_buffer, size);
-    LocalFree(err_buffer);
-
-    return Status(
-        Status::Code::NOT_FOUND, "unable to load shared library: " + errstr);
-  }
-#else
-  *handle = dlopen(path.c_str(), RTLD_NOW | RTLD_LOCAL);
-  if (*handle == nullptr) {
-    return Status(
-        Status::Code::NOT_FOUND,
-        "unable to load shared library: " + std::string(dlerror()));
-  }
-#endif
-
-  return Status::Success;
-}
-
-Status
-SharedLibrary::CloseLibraryHandle(void* handle)
-{
-  if (handle != nullptr) {
-#ifdef _WIN32
-    if (FreeLibrary((HMODULE)handle) == 0) {
-      LPSTR err_buffer = nullptr;
-      size_t size = FormatMessageA(
-          FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM |
-              FORMAT_MESSAGE_IGNORE_INSERTS,
-          NULL, GetLastError(), MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
-          (LPSTR)&err_buffer, 0, NULL);
-      std::string errstr(err_buffer, size);
-      LocalFree(err_buffer);
-      return Status(
-          Status::Code::INTERNAL, "unable to unload shared library: " + errstr);
-    }
-#else
-    if (dlclose(handle) != 0) {
-      return Status(
-          Status::Code::INTERNAL,
-          "unable to unload shared library: " + std::string(dlerror()));
-    }
-#endif
-  }
-
-  return Status::Success;
-}
-
-Status
-SharedLibrary::GetEntrypoint(
-    void* handle, const std::string& name, const bool optional, void** befn)
-{
-  *befn = nullptr;
-
-#ifdef _WIN32
-  void* fn = GetProcAddress((HMODULE)handle, name.c_str());
-  if ((fn == nullptr) && !optional) {
-    LPSTR err_buffer = nullptr;
-    size_t size = FormatMessageA(
-        FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM |
-            FORMAT_MESSAGE_IGNORE_INSERTS,
-        NULL, GetLastError(), MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
-        (LPSTR)&err_buffer, 0, NULL);
-    std::string errstr(err_buffer, size);
-    LocalFree(err_buffer);
-    return Status(
-        Status::Code::NOT_FOUND,
-        "unable to find '" + name +
-            "' entrypoint in custom library: " + errstr);
-  }
-#else
-  dlerror();
-  void* fn = dlsym(handle, name.c_str());
-  const char* dlsym_error = dlerror();
-  if (dlsym_error != nullptr) {
-    if (optional) {
-      return Status::Success;
-    }
-
-    std::string errstr(dlsym_error);  // need copy as dlclose overwrites
-    return Status(
-        Status::Code::NOT_FOUND, "unable to find required entrypoint '" + name +
-                                     "' in shared library: " + errstr);
-  }
-
-  if (fn == nullptr) {
-    if (optional) {
-      return Status::Success;
-    }
-
-    return Status(
-        Status::Code::NOT_FOUND,
-        "unable to find required entrypoint '" + name + "' in shared library");
-  }
-#endif
-
-  *befn = fn;
-  return Status::Success;
-}
-
-}}  // namespace triton::core
diff --git a/3rdparty/core-r22.12/src/shared_library.h b/3rdparty/core-r22.12/src/shared_library.h
deleted file mode 100644
index 8ab12f3a6b07e6fde21dc154bca2cf5870928cec..0000000000000000000000000000000000000000
--- a/3rdparty/core-r22.12/src/shared_library.h
+++ /dev/null
@@ -1,72 +0,0 @@
-// Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include <memory>
-#include <string>
-#include "constants.h"
-#include "status.h"
-
-namespace triton { namespace core {
-
-// SharedLibrary
-//
-// Utility functions for shared libraries. Because some operations
-// require serialization, this object cannot be directly constructed
-// and must instead be accessed using Acquire().
-class SharedLibrary {
- public:
-  // Acquire a SharedLibrary object exclusively. Any other attempts to
-  // concurrently acquire a SharedLibrary object will block.
-  // object. Ownership is released by destroying the SharedLibrary
-  // object.
-  static Status Acquire(std::unique_ptr<SharedLibrary>* slib);
-
-  ~SharedLibrary();
-
-  // Configuration so that dependent libraries will be searched for in
-  // 'path' during OpenLibraryHandle.
-  Status SetLibraryDirectory(const std::string& path);
-
-  // Reset any configuration done by SetLibraryDirectory.
-  Status ResetLibraryDirectory();
-
-  // Open shared library and return generic handle.
-  Status OpenLibraryHandle(const std::string& path, void** handle);
-
-  // Close shared library.
-  Status CloseLibraryHandle(void* handle);
-
-  // Get a generic pointer for an entrypoint into a shared library.
-  Status GetEntrypoint(
-      void* handle, const std::string& name, const bool optional, void** befn);
-
- private:
-  DISALLOW_COPY_AND_ASSIGN(SharedLibrary);
-  explicit SharedLibrary() = default;
-};
-
-}}  // namespace triton::core
diff --git a/3rdparty/core-r22.12/src/status.cc b/3rdparty/core-r22.12/src/status.cc
deleted file mode 100644
index 1640ee5ed08b34fa9af6bb29a27bb7b79258852a..0000000000000000000000000000000000000000
--- a/3rdparty/core-r22.12/src/status.cc
+++ /dev/null
@@ -1,91 +0,0 @@
-// Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "status.h"
-
-namespace triton { namespace core {
-
-const Status Status::Success(Status::Code::SUCCESS);
-
-Status::Code
-TritonCodeToStatusCode(TRITONSERVER_Error_Code code)
-{
-  switch (code) {
-    case TRITONSERVER_ERROR_UNKNOWN:
-      return Status::Code::UNKNOWN;
-    case TRITONSERVER_ERROR_INTERNAL:
-      return Status::Code::INTERNAL;
-    case TRITONSERVER_ERROR_NOT_FOUND:
-      return Status::Code::NOT_FOUND;
-    case TRITONSERVER_ERROR_INVALID_ARG:
-      return Status::Code::INVALID_ARG;
-    case TRITONSERVER_ERROR_UNAVAILABLE:
-      return Status::Code::UNAVAILABLE;
-    case TRITONSERVER_ERROR_UNSUPPORTED:
-      return Status::Code::UNSUPPORTED;
-    case TRITONSERVER_ERROR_ALREADY_EXISTS:
-      return Status::Code::ALREADY_EXISTS;
-
-    default:
-      break;
-  }
-
-  return Status::Code::UNKNOWN;
-}
-
-TRITONSERVER_Error_Code
-StatusCodeToTritonCode(Status::Code status_code)
-{
-  switch (status_code) {
-    case Status::Code::UNKNOWN:
-      return TRITONSERVER_ERROR_UNKNOWN;
-    case Status::Code::INTERNAL:
-      return TRITONSERVER_ERROR_INTERNAL;
-    case Status::Code::NOT_FOUND:
-      return TRITONSERVER_ERROR_NOT_FOUND;
-    case Status::Code::INVALID_ARG:
-      return TRITONSERVER_ERROR_INVALID_ARG;
-    case Status::Code::UNAVAILABLE:
-      return TRITONSERVER_ERROR_UNAVAILABLE;
-    case Status::Code::UNSUPPORTED:
-      return TRITONSERVER_ERROR_UNSUPPORTED;
-    case Status::Code::ALREADY_EXISTS:
-      return TRITONSERVER_ERROR_ALREADY_EXISTS;
-
-    default:
-      break;
-  }
-
-  return TRITONSERVER_ERROR_UNKNOWN;
-}
-
-Status
-CommonErrorToStatus(const triton::common::Error& error)
-{
-  return Status(error);
-}
-
-}}  // namespace triton::core
diff --git a/3rdparty/core-r22.12/src/status.h b/3rdparty/core-r22.12/src/status.h
deleted file mode 100644
index 6efdf1522336c44a94d63c618e1be8dbf3091d4c..0000000000000000000000000000000000000000
--- a/3rdparty/core-r22.12/src/status.h
+++ /dev/null
@@ -1,97 +0,0 @@
-// Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#include <string>
-#include "triton/common/error.h"
-#include "tritonserver_apis.h"
-
-namespace triton { namespace core {
-
-class Status : public triton::common::Error {
- public:
-  // Construct a status from a code with no message.
-  explicit Status(Code code = Code::SUCCESS) : Error(code) {}
-
-  // Construct a status from a code and message.
-  explicit Status(Code code, const std::string& msg) : Error(code, msg) {}
-
-  // Construct a status from a code and message.
-  explicit Status(const Error& error) : Error(error) {}
-
-  // Convenience "success" value. Can be used as Error::Success to
-  // indicate no error.
-  static const Status Success;
-
-  // Return the code for this status.
-  Code StatusCode() const { return code_; }
-};
-
-// Return the Status::Code corresponding to a
-// TRITONSERVER_Error_Code.
-Status::Code TritonCodeToStatusCode(TRITONSERVER_Error_Code code);
-
-// Return the TRITONSERVER_Error_Code corresponding to a
-// Status::Code.
-TRITONSERVER_Error_Code StatusCodeToTritonCode(Status::Code status_code);
-
-// Converts the common Error to Status object
-Status CommonErrorToStatus(const triton::common::Error& error);
-
-// If status is non-OK, return the Status.
-#define RETURN_IF_ERROR(S)        \
-  do {                            \
-    const Status& status__ = (S); \
-    if (!status__.IsOk()) {       \
-      return status__;            \
-    }                             \
-  } while (false)
-
-// If TRITONSERVER error is non-OK, return the corresponding status.
-#define RETURN_IF_TRITONSERVER_ERROR(E)                          \
-  do {                                                           \
-    TRITONSERVER_Error* err__ = (E);                             \
-    if (err__ != nullptr) {                                      \
-      Status status__ = Status(                                  \
-          TritonCodeToStatusCode(TRITONSERVER_ErrorCode(err__)), \
-          TRITONSERVER_ErrorMessage(err__));                     \
-      TRITONSERVER_ErrorDelete(err__);                           \
-      return status__;                                           \
-    }                                                            \
-  } while (false)
-
-// If status is non-OK, return the corresponding TRITONSERVER_Error.
-#define RETURN_TRITONSERVER_ERROR_IF_ERROR(S)            \
-  do {                                                   \
-    const Status& status__ = (S);                        \
-    if (!status__.IsOk()) {                              \
-      return TRITONSERVER_ErrorNew(                      \
-          StatusCodeToTritonCode(status__.StatusCode()), \
-          status__.Message().c_str());                   \
-    }                                                    \
-  } while (false)
-
-}}  // namespace triton::core
diff --git a/3rdparty/core-r22.12/src/test/async_work_queue_test.cc b/3rdparty/core-r22.12/src/test/async_work_queue_test.cc
deleted file mode 100644
index 488e1daf1ace2c812becca55bc5e8f41938b9e40..0000000000000000000000000000000000000000
--- a/3rdparty/core-r22.12/src/test/async_work_queue_test.cc
+++ /dev/null
@@ -1,245 +0,0 @@
-// Copyright 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#include "gtest/gtest.h"
-
-#include <chrono>
-#include <condition_variable>
-#include <future>
-#include <limits>
-#include <mutex>
-#include <random>
-#include <thread>
-#include <vector>
-#include "triton/common/async_work_queue.h"
-
-namespace tc = triton::common;
-
-namespace {
-
-// Wrapper of AsyncWorkQueue class to expose Reset() for unit testing
-class TestingAsyncWorkQueue : public tc::AsyncWorkQueue {
- public:
-  static void Reset() { AsyncWorkQueue::Reset(); }
-};
-
-class AsyncWorkQueueTest : public ::testing::Test {
- protected:
-  void TearDown() override { TestingAsyncWorkQueue::Reset(); }
-};
-
-TEST_F(AsyncWorkQueueTest, InitZeroWorker)
-{
-  auto error = tc::AsyncWorkQueue::Initialize(0);
-  EXPECT_FALSE(error.IsOk()) << "Expect error when initialized with 0 worker";
-}
-
-TEST_F(AsyncWorkQueueTest, InitOneWorker)
-{
-  auto error = tc::AsyncWorkQueue::Initialize(1);
-  EXPECT_TRUE(error.IsOk()) << error.Message();
-}
-
-TEST_F(AsyncWorkQueueTest, InitFourWorker)
-{
-  auto error = tc::AsyncWorkQueue::Initialize(4);
-  EXPECT_TRUE(error.IsOk()) << error.Message();
-}
-
-TEST_F(AsyncWorkQueueTest, InitTwice)
-{
-  auto error = tc::AsyncWorkQueue::Initialize(4);
-  EXPECT_TRUE(error.IsOk()) << error.Message();
-  error = tc::AsyncWorkQueue::Initialize(2);
-  EXPECT_FALSE(error.IsOk()) << "Expect error from initializing twice";
-}
-
-TEST_F(AsyncWorkQueueTest, WorkerCountUninitialized)
-{
-  EXPECT_EQ(tc::AsyncWorkQueue::WorkerCount(), (size_t)0)
-      << "Expect 0 worker count for uninitialized queue";
-}
-
-TEST_F(AsyncWorkQueueTest, WorkerCountInitialized)
-{
-  auto error = tc::AsyncWorkQueue::Initialize(4);
-  EXPECT_TRUE(error.IsOk()) << error.Message();
-  EXPECT_EQ(tc::AsyncWorkQueue::WorkerCount(), (size_t)4)
-      << "Expect 4 worker count for initialized queue";
-}
-
-
-TEST_F(AsyncWorkQueueTest, RunTasksInParallel)
-{
-  auto AddTwoFn = [](const std::vector<int>& lhs, const std::vector<int>& rhs,
-                     std::promise<std::vector<int>>* res) {
-    std::vector<int> lres;
-    lres.reserve(lhs.size());
-    for (size_t idx = 0; idx < lhs.size(); idx++) {
-      lres.push_back(lhs[idx] + rhs[idx]);
-    }
-    res->set_value(lres);
-  };
-
-  size_t task_count = 8;
-  std::vector<std::vector<int>> operands;
-  std::vector<std::vector<int>> expected_results;
-  {
-    // Use large element count to reduce the async work queue overhead
-    size_t element_count = 1 << 24;
-    auto RandHalfIntFn = std::bind(
-        std::uniform_int_distribution<>{std::numeric_limits<int>::min() / 2,
-                                        std::numeric_limits<int>::max() / 2},
-        std::default_random_engine{});
-    for (size_t tc = 0; tc < task_count + 1; tc++) {
-      expected_results.push_back(std::vector<int>());
-      operands.push_back(std::vector<int>());
-      operands.back().reserve(element_count);
-      for (size_t ec = 0; ec < element_count; ec++) {
-        operands.back().push_back(RandHalfIntFn());
-      }
-    }
-  }
-
-  // Get serialized time as baseline and store expected results
-  uint64_t serialized_duration = 0;
-  {
-    std::vector<std::promise<std::vector<int>>> res(task_count);
-
-    auto start_ts =
-        std::chrono::duration_cast<std::chrono::nanoseconds>(
-            std::chrono::high_resolution_clock::now().time_since_epoch())
-            .count();
-
-    for (size_t count = 0; count < task_count; count++) {
-      AddTwoFn(operands[count], operands[count + 1], &res[count]);
-    }
-
-    auto end_ts =
-        std::chrono::duration_cast<std::chrono::nanoseconds>(
-            std::chrono::high_resolution_clock::now().time_since_epoch())
-            .count();
-
-    for (size_t count = 0; count < task_count; count++) {
-      expected_results[count] = std::move(res[count].get_future().get());
-    }
-    serialized_duration = end_ts - start_ts;
-  }
-
-  auto error = tc::AsyncWorkQueue::Initialize(4);
-  ASSERT_TRUE(error.IsOk()) << error.Message();
-
-  uint64_t parallelized_duration = 0;
-  {
-    std::vector<std::promise<std::vector<int>>> ps(task_count);
-    std::vector<std::future<std::vector<int>>> fs;
-    for (auto& p : ps) {
-      fs.emplace_back(std::move(p.get_future()));
-    }
-
-    auto start_ts =
-        std::chrono::duration_cast<std::chrono::nanoseconds>(
-            std::chrono::high_resolution_clock::now().time_since_epoch())
-            .count();
-
-    for (size_t count = 0; count < task_count; count++) {
-      tc::AsyncWorkQueue::AddTask([&AddTwoFn, &operands, &ps, count]() mutable {
-        AddTwoFn(operands[count], operands[count + 1], &ps[count]);
-      });
-    }
-    for (size_t count = 0; count < task_count; count++) {
-      fs[count].wait();
-    }
-
-    auto end_ts =
-        std::chrono::duration_cast<std::chrono::nanoseconds>(
-            std::chrono::high_resolution_clock::now().time_since_epoch())
-            .count();
-
-    parallelized_duration = end_ts - start_ts;
-    // FIXME manual testing shows parallelized time is between 30% to 33.3% for
-    // 128 M total elements
-    EXPECT_LT(parallelized_duration, serialized_duration / 3)
-        << "Expected parallelized work was completed within 1/3 of serialized "
-           "time";
-    for (size_t count = 0; count < task_count; count++) {
-      auto res = std::move(fs[count].get());
-      EXPECT_EQ(res, expected_results[count])
-          << "Mismatched parallelized result";
-    }
-  }
-}
-
-TEST_F(AsyncWorkQueueTest, RunTasksFIFO)
-{
-  auto CaptureTimestampFn = [](std::promise<uint64_t>* res) {
-    res->set_value(
-        std::chrono::duration_cast<std::chrono::nanoseconds>(
-            std::chrono::high_resolution_clock::now().time_since_epoch())
-            .count());
-    std::this_thread::sleep_for(std::chrono::milliseconds(100));
-  };
-
-  size_t task_count = 8;
-  std::vector<std::promise<uint64_t>> ps(task_count);
-
-  auto error = tc::AsyncWorkQueue::Initialize(2);
-  ASSERT_TRUE(error.IsOk()) << error.Message();
-
-  std::vector<std::promise<void>> barrier(2);
-  tc::AsyncWorkQueue::AddTask([&barrier]() mutable {
-    barrier[0].get_future().get();
-    std::this_thread::sleep_for(std::chrono::milliseconds(100));
-  });
-  tc::AsyncWorkQueue::AddTask([&barrier]() mutable {
-    barrier[1].get_future().get();
-    std::this_thread::sleep_for(std::chrono::milliseconds(200));
-  });
-  for (size_t count = 0; count < task_count; count++) {
-    tc::AsyncWorkQueue::AddTask([count, &CaptureTimestampFn, &ps]() mutable {
-      CaptureTimestampFn(&ps[count]);
-    });
-  }
-
-  // Signal to start the work
-  barrier[0].set_value();
-  barrier[1].set_value();
-
-  uint64_t prev_ts = 0;
-  for (size_t count = 0; count < task_count; count++) {
-    uint64_t curr_ts = ps[count].get_future().get();
-    EXPECT_LT(prev_ts, curr_ts)
-        << "Expected async work is processed in FIFO order";
-  }
-}
-
-}  // namespace
-
-int
-main(int argc, char** argv)
-{
-  ::testing::InitGoogleTest(&argc, argv);
-  return RUN_ALL_TESTS();
-}
diff --git a/3rdparty/core-r22.12/src/test/memory_test.cc b/3rdparty/core-r22.12/src/test/memory_test.cc
deleted file mode 100644
index 6bd3b6293949425bcac6690899473488764645b9..0000000000000000000000000000000000000000
--- a/3rdparty/core-r22.12/src/test/memory_test.cc
+++ /dev/null
@@ -1,402 +0,0 @@
-// Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#include "gtest/gtest.h"
-
-#include <cuda_runtime_api.h>
-#include "cuda_memory_manager.h"
-#include "cuda_utils.h"
-#include "memory.h"
-#include "pinned_memory_manager.h"
-
-namespace tc = triton::core;
-
-namespace {
-
-#define CHECK_POINTER_ATTRIBUTES(ptr__, type__, device__)                   \
-  do {                                                                      \
-    cudaPointerAttributes attr;                                             \
-    auto cuerr = cudaPointerGetAttributes(&attr, ptr__);                    \
-    ASSERT_TRUE(cuerr == cudaSuccess)                                       \
-        << "Failed to get CUDA pointer attributes: "                        \
-        << cudaGetErrorString(cuerr);                                       \
-    EXPECT_TRUE(attr.type == type__)                                        \
-        << "Expect pointer with type " << type__ << ", got: " << attr.type; \
-    if (attr.type == cudaMemoryTypeDevice) {                                \
-      EXPECT_TRUE(attr.device == device__)                                  \
-          << "Expect allocation on CUDA device " << device__                \
-          << ", got: " << attr.device;                                      \
-    }                                                                       \
-  } while (false)
-
-// Wrapper of CudaMemoryManager class to expose Reset() for unit testing
-class TestingCudaMemoryManager : public tc::CudaMemoryManager {
- public:
-  static void Reset() { CudaMemoryManager::Reset(); }
-};
-
-class CudaMemoryManagerTest : public ::testing::Test {
- protected:
-  void SetUp() override
-  {
-    // Default memory manager options
-    options_.min_supported_compute_capability_ = 6.0;
-    options_.memory_pool_byte_size_ = {{0, 1 << 10}};
-  }
-
-  void TearDown() override { TestingCudaMemoryManager::Reset(); }
-
-  tc::CudaMemoryManager::Options options_;
-};
-
-TEST_F(CudaMemoryManagerTest, InitOOM)
-{
-  // Set to reserve too much memory
-  double cc = 6.0;
-  std::map<int, uint64_t> s{{0, uint64_t(1) << 40 /* 1024 GB */}};
-  const tc::CudaMemoryManager::Options options{cc, s};
-  auto status = tc::CudaMemoryManager::Create(options);
-  EXPECT_FALSE(status.IsOk()) << "Expect creation error";
-}
-
-TEST_F(CudaMemoryManagerTest, InitSuccess)
-{
-  double cc = 6.0;
-  std::map<int, uint64_t> s{{0, 1 << 10 /* 1024 bytes */}};
-  const tc::CudaMemoryManager::Options options{cc, s};
-  auto status = tc::CudaMemoryManager::Create(options);
-  EXPECT_TRUE(status.IsOk()) << status.Message();
-}
-
-TEST_F(CudaMemoryManagerTest, InitNoDeviceConfig)
-{
-  double cc = 6.0;
-  std::map<int, uint64_t> s;
-  const tc::CudaMemoryManager::Options options{cc, s};
-  auto status = tc::CudaMemoryManager::Create(options);
-  EXPECT_TRUE(status.IsOk()) << status.Message();
-
-  void* ptr = nullptr;
-  status = tc::CudaMemoryManager::Alloc(&ptr, 1, 0);
-  ASSERT_FALSE(status.IsOk()) << "Unexpected successful allocation";
-}
-
-TEST_F(CudaMemoryManagerTest, InitZeroByte)
-{
-  double cc = 6.0;
-  std::map<int, uint64_t> s{{0, 0}};
-  const tc::CudaMemoryManager::Options options{cc, s};
-  auto status = tc::CudaMemoryManager::Create(options);
-  EXPECT_TRUE(status.IsOk()) << status.Message();
-
-  void* ptr = nullptr;
-  status = tc::CudaMemoryManager::Alloc(&ptr, 1, 0);
-  ASSERT_FALSE(status.IsOk()) << "Unexpected successful allocation";
-}
-
-TEST_F(CudaMemoryManagerTest, AllocSuccess)
-{
-  auto status = tc::CudaMemoryManager::Create(options_);
-  ASSERT_TRUE(status.IsOk()) << status.Message();
-
-  void* ptr = nullptr;
-  status = tc::CudaMemoryManager::Alloc(&ptr, 1024, 0);
-  ASSERT_TRUE(status.IsOk()) << status.Message();
-  ASSERT_TRUE(ptr) << "Expect pointer to allocated buffer";
-  // check if returned pointer is CUDA pointer
-  CHECK_POINTER_ATTRIBUTES(ptr, cudaMemoryTypeDevice, 0);
-}
-
-TEST_F(CudaMemoryManagerTest, AllocFail)
-{
-  auto status = tc::CudaMemoryManager::Create(options_);
-  ASSERT_TRUE(status.IsOk()) << status.Message();
-
-  void* ptr = nullptr;
-  status = tc::CudaMemoryManager::Alloc(&ptr, 2048, 0);
-  ASSERT_FALSE(status.IsOk()) << "Unexpected successful allocation";
-}
-
-TEST_F(CudaMemoryManagerTest, MultipleAlloc)
-{
-  auto status = tc::CudaMemoryManager::Create(options_);
-  ASSERT_TRUE(status.IsOk()) << status.Message();
-
-  void* first_ptr = nullptr;
-  status = tc::CudaMemoryManager::Alloc(&first_ptr, 600, 0);
-  ASSERT_TRUE(status.IsOk()) << status.Message();
-  ASSERT_TRUE(first_ptr) << "Expect pointer to allocated buffer";
-  // check if returned pointer is CUDA pointer
-  CHECK_POINTER_ATTRIBUTES(first_ptr, cudaMemoryTypeDevice, 0);
-
-  // 512 + 600 > 1024
-  void* second_ptr = nullptr;
-  status = tc::CudaMemoryManager::Alloc(&second_ptr, 512, 0);
-  ASSERT_FALSE(status.IsOk()) << "Unexpected successful allocation";
-
-  // Free the first pointer and retry the second one
-  status = tc::CudaMemoryManager::Free(first_ptr, 0);
-  EXPECT_TRUE(status.IsOk()) << status.Message();
-  status = tc::CudaMemoryManager::Alloc(&second_ptr, 512, 0);
-  ASSERT_TRUE(status.IsOk()) << status.Message();
-  ASSERT_TRUE(second_ptr) << "Expect pointer to allocated buffer";
-  // check if returned pointer is CUDA pointer
-  CHECK_POINTER_ATTRIBUTES(second_ptr, cudaMemoryTypeDevice, 0);
-}
-
-TEST_F(CudaMemoryManagerTest, MultipleDevice)
-{
-  std::set<int> supported_gpus;
-  auto status = tc::GetSupportedGPUs(
-      &supported_gpus, options_.min_supported_compute_capability_);
-  ASSERT_TRUE(status.IsOk()) << status.Message();
-  ASSERT_GE(supported_gpus.size(), size_t(2))
-      << "Test requires at least two supported CUDA devices";
-
-  {
-    double cc = 6.0;
-    std::map<int, uint64_t> s;
-    // Only enough memory is only reserved in one of the devices
-    s[*supported_gpus.begin()] = 32;
-    s[*(++supported_gpus.begin())] = 1024;
-    const tc::CudaMemoryManager::Options options{cc, s};
-    status = tc::CudaMemoryManager::Create(options);
-    ASSERT_TRUE(status.IsOk()) << status.Message();
-  }
-
-  void* ptr = nullptr;
-  // Allocation on small device
-  int small_device = *supported_gpus.begin();
-  status = tc::CudaMemoryManager::Alloc(&ptr, 1024, small_device);
-  ASSERT_FALSE(status.IsOk()) << "Unexpected successful allocation";
-
-  // Allocation on large device
-  int large_device = *(++supported_gpus.begin());
-  status = tc::CudaMemoryManager::Alloc(&ptr, 1024, large_device);
-  ASSERT_TRUE(status.IsOk()) << status.Message();
-  ASSERT_TRUE(ptr) << "Expect pointer to allocated buffer";
-  // check if returned pointer is CUDA pointer
-  CHECK_POINTER_ATTRIBUTES(ptr, cudaMemoryTypeDevice, large_device);
-
-  // Free allocation ...
-  status = tc::CudaMemoryManager::Free(ptr, small_device);
-  EXPECT_FALSE(status.IsOk()) << "Unexpected deallocation on wrong device";
-  status = tc::CudaMemoryManager::Free(ptr, large_device);
-  EXPECT_TRUE(status.IsOk()) << status.Message();
-}
-
-class AllocatedMemoryTest : public ::testing::Test {
- protected:
-  // Per-test-suite set-up.
-  static void SetUpTestSuite()
-  {
-    // Pinned memory manager
-    {
-      tc::PinnedMemoryManager::Options options{1024};
-      auto status = tc::PinnedMemoryManager::Create(options);
-      ASSERT_TRUE(status.IsOk()) << status.Message();
-    }
-  }
-
-  // Set up CUDA memory manager per test for special fallback case
-  void SetUp() override
-  {
-    tc::CudaMemoryManager::Options options{6.0, {{0, 1 << 10}}};
-    auto status = tc::CudaMemoryManager::Create(options);
-    ASSERT_TRUE(status.IsOk()) << status.Message();
-  }
-
-  void TearDown() override { TestingCudaMemoryManager::Reset(); }
-};
-
-TEST_F(AllocatedMemoryTest, AllocGPU)
-{
-  size_t expect_size = 512, actual_size;
-  TRITONSERVER_MemoryType expect_type = TRITONSERVER_MEMORY_GPU, actual_type;
-  int64_t expect_id = 0, actual_id;
-  tc::AllocatedMemory memory(expect_size, expect_type, expect_id);
-
-  auto ptr = memory.BufferAt(0, &actual_size, &actual_type, &actual_id);
-  EXPECT_EQ(expect_size, actual_size)
-      << "Expect size: " << expect_size << ", got: " << actual_size;
-  EXPECT_EQ(expect_type, actual_type)
-      << "Expect type: " << expect_type << ", got: " << actual_type;
-  EXPECT_EQ(expect_id, actual_id)
-      << "Expect id: " << expect_id << ", got: " << actual_id;
-
-  // Sanity check on the pointer property
-  CHECK_POINTER_ATTRIBUTES(ptr, cudaMemoryTypeDevice, expect_id);
-}
-
-TEST_F(AllocatedMemoryTest, AllocPinned)
-{
-  size_t expect_size = 512, actual_size;
-  TRITONSERVER_MemoryType expect_type = TRITONSERVER_MEMORY_CPU_PINNED,
-                          actual_type;
-  int64_t expect_id = 0, actual_id;
-  tc::AllocatedMemory memory(expect_size, expect_type, expect_id);
-
-  auto ptr = memory.BufferAt(0, &actual_size, &actual_type, &actual_id);
-  EXPECT_EQ(expect_size, actual_size)
-      << "Expect size: " << expect_size << ", got: " << actual_size;
-  EXPECT_EQ(expect_type, actual_type)
-      << "Expect type: " << expect_type << ", got: " << actual_type;
-  EXPECT_EQ(expect_id, actual_id)
-      << "Expect id: " << expect_id << ", got: " << actual_id;
-
-  // Sanity check on the pointer property
-  CHECK_POINTER_ATTRIBUTES(ptr, cudaMemoryTypeHost, expect_id);
-}
-
-TEST_F(AllocatedMemoryTest, AllocFallback)
-{
-  // Each allocation uses half of the target reserved memory
-  size_t expect_size = 600, actual_size;
-  TRITONSERVER_MemoryType expect_type = TRITONSERVER_MEMORY_GPU, actual_type;
-  int64_t expect_id = 0, actual_id;
-
-  // First allocation
-  tc::AllocatedMemory cuda_memory(expect_size, expect_type, expect_id);
-
-  auto ptr = cuda_memory.BufferAt(0, &actual_size, &actual_type, &actual_id);
-  EXPECT_EQ(expect_size, actual_size)
-      << "Expect size: " << expect_size << ", got: " << actual_size;
-  EXPECT_EQ(expect_type, actual_type)
-      << "Expect type: " << expect_type << ", got: " << actual_type;
-  EXPECT_EQ(expect_id, actual_id)
-      << "Expect id: " << expect_id << ", got: " << actual_id;
-
-  // Sanity check on the pointer property
-  CHECK_POINTER_ATTRIBUTES(ptr, cudaMemoryTypeDevice, expect_id);
-
-  // Second allocation, should trigger fallback from CUDA -> pinned memory
-  tc::AllocatedMemory pinned_memory(expect_size, expect_type, expect_id);
-
-  ptr = pinned_memory.BufferAt(0, &actual_size, &actual_type, &actual_id);
-  EXPECT_EQ(expect_size, actual_size)
-      << "Expect size: " << expect_size << ", got: " << actual_size;
-  EXPECT_EQ(TRITONSERVER_MEMORY_CPU_PINNED, actual_type)
-      << "Expect type: " << TRITONSERVER_MEMORY_CPU_PINNED
-      << ", got: " << actual_type;
-
-  // Sanity check on the pointer property
-  CHECK_POINTER_ATTRIBUTES(ptr, cudaMemoryTypeHost, expect_id);
-
-  // Third allocation, CUDA -> pinned -> non-pinned
-  tc::AllocatedMemory system_memory(expect_size, expect_type, expect_id);
-
-  ptr = system_memory.BufferAt(0, &actual_size, &actual_type, &actual_id);
-  EXPECT_EQ(expect_size, actual_size)
-      << "Expect size: " << expect_size << ", got: " << actual_size;
-  EXPECT_EQ(TRITONSERVER_MEMORY_CPU, actual_type)
-      << "Expect type: " << TRITONSERVER_MEMORY_CPU_PINNED
-      << ", got: " << actual_type;
-
-  // Sanity check on the pointer property
-  CHECK_POINTER_ATTRIBUTES(ptr, cudaMemoryTypeUnregistered, expect_id);
-}
-
-TEST_F(AllocatedMemoryTest, AllocFallbackNoCuda)
-{
-  // Test fallback in the case where CUDA memory manager is not properly created
-  TestingCudaMemoryManager::Reset();
-
-  size_t expect_size = 600, actual_size;
-  TRITONSERVER_MemoryType expect_type = TRITONSERVER_MEMORY_GPU, actual_type;
-  int64_t expect_id = 0, actual_id;
-
-  // CUDA memory allocation should trigger fallback to allocate pinned memory
-  tc::AllocatedMemory pinned_memory(expect_size, expect_type, expect_id);
-
-  auto ptr = pinned_memory.BufferAt(0, &actual_size, &actual_type, &actual_id);
-  EXPECT_EQ(expect_size, actual_size)
-      << "Expect size: " << expect_size << ", got: " << actual_size;
-  EXPECT_EQ(TRITONSERVER_MEMORY_CPU_PINNED, actual_type)
-      << "Expect type: " << TRITONSERVER_MEMORY_CPU_PINNED
-      << ", got: " << actual_type;
-
-  // Sanity check on the pointer property
-  CHECK_POINTER_ATTRIBUTES(ptr, cudaMemoryTypeHost, expect_id);
-}
-
-TEST_F(AllocatedMemoryTest, Release)
-{
-  // Similar to above, but verify that the memory will be released once
-  // out of scope
-  // Each allocation uses half of the target reserved memory
-  size_t expect_size = 600, actual_size;
-  TRITONSERVER_MemoryType expect_type = TRITONSERVER_MEMORY_GPU, actual_type;
-  int64_t expect_id = 0, actual_id;
-
-  {
-    // First allocation
-    tc::AllocatedMemory cuda_memory(expect_size, expect_type, expect_id);
-
-    auto ptr = cuda_memory.BufferAt(0, &actual_size, &actual_type, &actual_id);
-    EXPECT_EQ(expect_size, actual_size)
-        << "Expect size: " << expect_size << ", got: " << actual_size;
-    EXPECT_EQ(expect_type, actual_type)
-        << "Expect type: " << expect_type << ", got: " << actual_type;
-    EXPECT_EQ(expect_id, actual_id)
-        << "Expect id: " << expect_id << ", got: " << actual_id;
-
-    // Sanity check on the pointer property
-    CHECK_POINTER_ATTRIBUTES(ptr, cudaMemoryTypeDevice, expect_id);
-
-    // Second allocation, should trigger fallback from CUDA -> pinned memory
-    tc::AllocatedMemory pinned_memory(expect_size, expect_type, expect_id);
-
-    ptr = pinned_memory.BufferAt(0, &actual_size, &actual_type, &actual_id);
-    EXPECT_EQ(expect_size, actual_size)
-        << "Expect size: " << expect_size << ", got: " << actual_size;
-    EXPECT_EQ(TRITONSERVER_MEMORY_CPU_PINNED, actual_type)
-        << "Expect type: " << TRITONSERVER_MEMORY_CPU_PINNED
-        << ", got: " << actual_type;
-
-    // Sanity check on the pointer property
-    CHECK_POINTER_ATTRIBUTES(ptr, cudaMemoryTypeHost, expect_id);
-  }
-
-  // Third allocation, should not trigger fallback
-  tc::AllocatedMemory memory(expect_size, expect_type, expect_id);
-
-  auto ptr = memory.BufferAt(0, &actual_size, &actual_type, &actual_id);
-  EXPECT_EQ(expect_size, actual_size)
-      << "Expect size: " << expect_size << ", got: " << actual_size;
-  EXPECT_EQ(expect_type, actual_type)
-      << "Expect type: " << expect_type << ", got: " << actual_type;
-
-  // Sanity check on the pointer property
-  CHECK_POINTER_ATTRIBUTES(ptr, cudaMemoryTypeDevice, expect_id);
-}
-
-}  // namespace
-
-int
-main(int argc, char** argv)
-{
-  ::testing::InitGoogleTest(&argc, argv);
-  return RUN_ALL_TESTS();
-}
diff --git a/3rdparty/core-r22.12/src/test/metrics_api_test.cc b/3rdparty/core-r22.12/src/test/metrics_api_test.cc
deleted file mode 100644
index f99595a6cd9c8e20fcd8205addcaf9a1b132021f..0000000000000000000000000000000000000000
--- a/3rdparty/core-r22.12/src/test/metrics_api_test.cc
+++ /dev/null
@@ -1,678 +0,0 @@
-// Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#ifdef TRITON_ENABLE_METRICS
-
-#include <iostream>
-#include <thread>
-#include "gmock/gmock.h"
-#include "gtest/gtest.h"
-#include "metric_family.h"
-#include "triton/common/logging.h"
-#include "triton/core/tritonserver.h"
-
-namespace tc = triton::core;
-
-namespace {
-
-using ::testing::HasSubstr;
-
-#define FAIL_TEST_IF_ERR(X, MSG)                                              \
-  do {                                                                        \
-    std::shared_ptr<TRITONSERVER_Error> err__((X), TRITONSERVER_ErrorDelete); \
-    ASSERT_TRUE((err__ == nullptr))                                           \
-        << "error: " << (MSG) << ": "                                         \
-        << TRITONSERVER_ErrorCodeString(err__.get()) << " - "                 \
-        << TRITONSERVER_ErrorMessage(err__.get());                            \
-  } while (false)
-
-/* Helpers */
-
-// Get serialized metrics string from C API
-void
-GetMetrics(TRITONSERVER_Server* server, std::string* metrics_str)
-{
-  // Check metrics via C API
-  ASSERT_NE(server, nullptr);
-  TRITONSERVER_Metrics* metrics = nullptr;
-  FAIL_TEST_IF_ERR(
-      TRITONSERVER_ServerMetrics(server, &metrics), "fetch metrics");
-  const char* base;
-  size_t byte_size;
-  FAIL_TEST_IF_ERR(
-      TRITONSERVER_MetricsFormatted(
-          metrics, TRITONSERVER_METRIC_PROMETHEUS, &base, &byte_size),
-      "format metrics string");
-  *metrics_str = std::string(base, byte_size);
-  TRITONSERVER_MetricsDelete(metrics);
-}
-
-// Count number of times substr appears in s
-int
-CountMatches(const std::string s, const std::string substr)
-{
-  int num_matches = 0;
-  std::string::size_type pos = 0;
-  while ((pos = s.find(substr, pos)) != std::string::npos) {
-    num_matches++;
-    pos += substr.length();
-  }
-  return num_matches;
-}
-
-int
-NumMetricMatches(TRITONSERVER_Server* server, const std::string substr)
-{
-  std::string metrics_str;
-  GetMetrics(server, &metrics_str);
-  const int num_matches = CountMatches(metrics_str, substr);
-  return num_matches;
-}
-
-// Add two metrics with the same labels from the same metric family
-// and verify they refer to the same metric/value
-void
-DupeMetricHelper(
-    TRITONSERVER_Server* server,
-    std::vector<const TRITONSERVER_Parameter*> labels)
-{
-  // Create metric family
-  TRITONSERVER_MetricFamily* family = nullptr;
-  TRITONSERVER_MetricKind kind = TRITONSERVER_METRIC_KIND_COUNTER;
-  const char* name = "dupe_metric_test";
-  const char* description = "dupe metric description";
-  FAIL_TEST_IF_ERR(
-      TRITONSERVER_MetricFamilyNew(&family, kind, name, description),
-      "Creating new metric family1");
-
-  // Create metric
-  TRITONSERVER_Metric* metric1 = nullptr;
-  FAIL_TEST_IF_ERR(
-      TRITONSERVER_MetricNew(&metric1, family, labels.data(), labels.size()),
-      "Creating new metric");
-
-  // Create duplicate metric
-  TRITONSERVER_Metric* metric2 = nullptr;
-  FAIL_TEST_IF_ERR(
-      TRITONSERVER_MetricNew(&metric2, family, labels.data(), labels.size()),
-      "Creating new metric");
-
-  // Verify dupe metrics reference same underlying metric
-  double value1 = -1;
-  double value2 = -1;
-  double inc = 7.5;
-
-  // Verify initial values of zero
-  FAIL_TEST_IF_ERR(
-      TRITONSERVER_MetricValue(metric1, &value1),
-      "query metric value after increment");
-  ASSERT_EQ(value1, 0);
-  FAIL_TEST_IF_ERR(
-      TRITONSERVER_MetricValue(metric2, &value2),
-      "query metric value after increment");
-  ASSERT_EQ(value2, 0);
-
-  // Increment metric 1, check metric 2 == metric 1
-  FAIL_TEST_IF_ERR(
-      TRITONSERVER_MetricIncrement(metric1, inc), "increase metric value");
-  FAIL_TEST_IF_ERR(
-      TRITONSERVER_MetricValue(metric1, &value1),
-      "query metric value after increment");
-  ASSERT_EQ(value1, inc);
-
-  FAIL_TEST_IF_ERR(
-      TRITONSERVER_MetricValue(metric2, &value2),
-      "query metric value after increment");
-  ASSERT_EQ(value1, value2);
-  std::cout << "metric1 value: " << value1 << " == metric2 value: " << value2
-            << std::endl;
-
-  // Assert custom metric/family remains when there's still a reference to it
-  FAIL_TEST_IF_ERR(TRITONSERVER_MetricDelete(metric1), "delete metric1");
-  ASSERT_EQ(NumMetricMatches(server, description), 1);
-
-  // Assert custom metric/family not displayed after all metrics are deleted
-  FAIL_TEST_IF_ERR(TRITONSERVER_MetricDelete(metric2), "delete metric2");
-  ASSERT_EQ(NumMetricMatches(server, description), 0);
-  FAIL_TEST_IF_ERR(TRITONSERVER_MetricFamilyDelete(family), "delete family");
-}
-
-void
-MetricAPIHelper(TRITONSERVER_Metric* metric, TRITONSERVER_MetricKind kind)
-{
-  double value = -1;
-  double prev_value = -1;
-  FAIL_TEST_IF_ERR(
-      TRITONSERVER_MetricValue(metric, &value), "query metric initial value");
-  // Value should be zero initially
-  ASSERT_EQ(value, 0.0);
-
-  // Increment positively
-  double increment = 1729.0;
-  prev_value = value;
-  FAIL_TEST_IF_ERR(
-      TRITONSERVER_MetricIncrement(metric, increment), "increase metric value");
-  FAIL_TEST_IF_ERR(
-      TRITONSERVER_MetricValue(metric, &value),
-      "query metric value after positive increment");
-  ASSERT_EQ(value, prev_value + increment);
-
-  // Increment negatively
-  double decrement = -3.14;
-  prev_value = value;
-  auto err = TRITONSERVER_MetricIncrement(metric, decrement);
-  switch (kind) {
-    case TRITONSERVER_METRIC_KIND_COUNTER: {
-      ASSERT_NE(err, nullptr);
-      break;
-    }
-    case TRITONSERVER_METRIC_KIND_GAUGE: {
-      ASSERT_EQ(err, nullptr);
-      FAIL_TEST_IF_ERR(
-          TRITONSERVER_MetricValue(metric, &value),
-          "query metric value after negative increment");
-      ASSERT_EQ(value, prev_value + decrement);
-      break;
-    }
-    default:
-      ASSERT_TRUE(false);
-      break;
-  }
-
-  // Set
-  double set_value = 42.0;
-  err = TRITONSERVER_MetricSet(metric, set_value);
-  switch (kind) {
-    case TRITONSERVER_METRIC_KIND_COUNTER: {
-      ASSERT_NE(err, nullptr);
-      break;
-    }
-    case TRITONSERVER_METRIC_KIND_GAUGE: {
-      ASSERT_EQ(err, nullptr);
-      FAIL_TEST_IF_ERR(
-          TRITONSERVER_MetricValue(metric, &value),
-          "query metric value after set");
-      ASSERT_EQ(value, set_value);
-      break;
-    }
-    default:
-      ASSERT_TRUE(false);
-      break;
-  }
-
-  // MetricKind
-  TRITONSERVER_MetricKind kind_tmp;
-  FAIL_TEST_IF_ERR(
-      TRITONSERVER_GetMetricKind(metric, &kind_tmp), "query metric kind");
-  ASSERT_EQ(kind_tmp, kind);
-  TRITONSERVER_ErrorDelete(err);
-}
-
-
-// Test Fixture
-class MetricsApiTest : public ::testing::Test {
- protected:
-  // Run only once before entire set of tests
-  static void SetUpTestSuite() {}
-  // Run only once after entire set of tests
-  static void TearDownTestSuite() {}
-
-  // Run before each test
-  void SetUp() override
-  {
-    // Create server object to pass when retrieving metrics.
-    // NOTE: It is currently not required to pass a valid server object to
-    //       TRITONSERVER_ServerMetrics, but is more future-proof to include.
-    TRITONSERVER_ServerOptions* server_options = nullptr;
-    FAIL_TEST_IF_ERR(
-        TRITONSERVER_ServerOptionsNew(&server_options),
-        "creating server options");
-    // Mute info output for the sake of this test, less output
-    FAIL_TEST_IF_ERR(
-        TRITONSERVER_ServerOptionsSetLogInfo(server_options, false),
-        "disabling log INFO for brevity");
-    // This test doesn't require the use of any models, so we use "." as repo
-    // and set ModelControlMode to EXPLICIT to avoid attempting to load models
-    FAIL_TEST_IF_ERR(
-        TRITONSERVER_ServerOptionsSetModelRepositoryPath(server_options, "."),
-        "setting model repository path");
-    FAIL_TEST_IF_ERR(
-        TRITONSERVER_ServerOptionsSetModelControlMode(
-            server_options, TRITONSERVER_MODEL_CONTROL_EXPLICIT),
-        "setting model control mode");
-    FAIL_TEST_IF_ERR(
-        TRITONSERVER_ServerNew(&server_, server_options), "creating server");
-    FAIL_TEST_IF_ERR(
-        TRITONSERVER_ServerOptionsDelete(server_options),
-        "deleting server options");
-  }
-
-  // Run after each test
-  void TearDown() override
-  {
-    FAIL_TEST_IF_ERR(TRITONSERVER_ServerDelete(server_), "deleting server");
-  }
-
-  static TRITONSERVER_Server* server_;
-};
-
-TRITONSERVER_Server* MetricsApiTest::server_ = nullptr;
-
-// Test end-to-end flow of Generic Metrics API for Counter metric
-TEST_F(MetricsApiTest, TestCounterEndToEnd)
-{
-  // Create metric family
-  TRITONSERVER_MetricFamily* family;
-  TRITONSERVER_MetricKind kind = TRITONSERVER_METRIC_KIND_COUNTER;
-  const char* name = "custom_counter_example";
-  const char* description = "this is an example counter metric added via API.";
-  FAIL_TEST_IF_ERR(
-      TRITONSERVER_MetricFamilyNew(&family, kind, name, description),
-      "Creating new metric family");
-
-  // Create metric
-  TRITONSERVER_Metric* metric;
-  std::vector<const TRITONSERVER_Parameter*> labels;
-  labels.emplace_back(TRITONSERVER_ParameterNew(
-      "example1", TRITONSERVER_PARAMETER_STRING, "counter_label1"));
-  labels.emplace_back(TRITONSERVER_ParameterNew(
-      "example2", TRITONSERVER_PARAMETER_STRING, "counter_label2"));
-  FAIL_TEST_IF_ERR(
-      TRITONSERVER_MetricNew(&metric, family, labels.data(), labels.size()),
-      "Creating new metric");
-  for (const auto label : labels) {
-    TRITONSERVER_ParameterDelete(const_cast<TRITONSERVER_Parameter*>(label));
-  }
-
-  // Run through metric APIs and assert correctness
-  MetricAPIHelper(metric, kind);
-
-  // Assert custom metric is reported and found in output
-  ASSERT_EQ(NumMetricMatches(server_, description), 1);
-
-  // Cleanup
-  FAIL_TEST_IF_ERR(TRITONSERVER_MetricDelete(metric), "delete metric");
-  FAIL_TEST_IF_ERR(
-      TRITONSERVER_MetricFamilyDelete(family), "delete metric family");
-
-  // Assert custom metric/family is unregistered and no longer in output
-  ASSERT_EQ(NumMetricMatches(server_, description), 0);
-}
-
-// Test end-to-end flow of Generic Metrics API for Gauge metric
-TEST_F(MetricsApiTest, TestGaugeEndToEnd)
-{
-  // Create metric family
-  TRITONSERVER_MetricFamily* family;
-  TRITONSERVER_MetricKind kind = TRITONSERVER_METRIC_KIND_GAUGE;
-  const char* name = "custom_gauge_example";
-  const char* description = "this is an example gauge metric added via API.";
-  FAIL_TEST_IF_ERR(
-      TRITONSERVER_MetricFamilyNew(&family, kind, name, description),
-      "Creating new metric family");
-
-  // Create metric
-  TRITONSERVER_Metric* metric;
-  std::vector<const TRITONSERVER_Parameter*> labels;
-  labels.emplace_back(TRITONSERVER_ParameterNew(
-      "example1", TRITONSERVER_PARAMETER_STRING, "gauge_label1"));
-  labels.emplace_back(TRITONSERVER_ParameterNew(
-      "example2", TRITONSERVER_PARAMETER_STRING, "gauge_label2"));
-  FAIL_TEST_IF_ERR(
-      TRITONSERVER_MetricNew(&metric, family, labels.data(), labels.size()),
-      "Creating new metric");
-  for (const auto label : labels) {
-    TRITONSERVER_ParameterDelete(const_cast<TRITONSERVER_Parameter*>(label));
-  }
-
-  // Run through metric APIs and assert correctness
-  MetricAPIHelper(metric, kind);
-
-  // Assert custom metric is reported and found in output
-  ASSERT_EQ(NumMetricMatches(server_, description), 1);
-
-  // Cleanup
-  FAIL_TEST_IF_ERR(TRITONSERVER_MetricDelete(metric), "delete metric");
-  FAIL_TEST_IF_ERR(
-      TRITONSERVER_MetricFamilyDelete(family), "delete metric family");
-
-  // Assert custom metric/family is unregistered and no longer in output
-  ASSERT_EQ(NumMetricMatches(server_, description), 0);
-}
-
-// Test that a duplicate metric family can't be added
-// with a conflicting type/kind
-TEST_F(MetricsApiTest, TestDupeMetricFamilyDiffKind)
-{
-  // Create metric family
-  TRITONSERVER_MetricFamily* family1 = nullptr;
-  TRITONSERVER_MetricKind kind1 = TRITONSERVER_METRIC_KIND_COUNTER;
-  const char* name = "diff_kind_test";
-  const char* description = "diff kind description";
-  FAIL_TEST_IF_ERR(
-      TRITONSERVER_MetricFamilyNew(&family1, kind1, name, description),
-      "Creating new metric family1");
-
-  // Create duplicate metric family with different kind
-  TRITONSERVER_MetricFamily* family2 = nullptr;
-  TRITONSERVER_MetricKind kind2 = TRITONSERVER_METRIC_KIND_GAUGE;
-  // Expect this to fail, can't have duplicate name of different kind
-  auto err = TRITONSERVER_MetricFamilyNew(&family2, kind2, name, description);
-  ASSERT_NE(err, nullptr);
-  ASSERT_EQ(family2, nullptr);
-  TRITONSERVER_ErrorDelete(err);
-}
-
-// Test that a duplicate metric family name will still
-// return the original metric family even if the description
-// is changed
-TEST_F(MetricsApiTest, TestDupeMetricFamilyDiffDescription)
-{
-  // Create metric family
-  TRITONSERVER_MetricFamily* family1 = nullptr;
-  TRITONSERVER_MetricKind kind = TRITONSERVER_METRIC_KIND_COUNTER;
-  const char* name = "diff_description_test";
-  const char* description1 = "first description";
-  FAIL_TEST_IF_ERR(
-      TRITONSERVER_MetricFamilyNew(&family1, kind, name, description1),
-      "Creating new metric family1");
-
-  // Create duplicate metric family
-  TRITONSERVER_MetricFamily* family2 = nullptr;
-  const char* description2 = "second description";
-  FAIL_TEST_IF_ERR(
-      TRITONSERVER_MetricFamilyNew(&family2, kind, name, description2),
-      "Creating new metric family2");
-
-  // Assert MetricFamily is not reported until metrics are added to them
-  ASSERT_EQ(NumMetricMatches(server_, description1), 0);
-  ASSERT_EQ(NumMetricMatches(server_, description2), 0);
-
-  // Add metric to family2 only, this will be shared by family1 as well
-  // since both families refer to the same underlying prometheus family
-  std::vector<const TRITONSERVER_Parameter*> labels;
-  TRITONSERVER_Metric* metric2 = nullptr;
-  FAIL_TEST_IF_ERR(
-      TRITONSERVER_MetricNew(&metric2, family2, labels.data(), labels.size()),
-      "Creating new metric2");
-
-  // Assert MetricFamily is reported exactly once
-  // This confirms attempting to add a duplicate returns the existing family
-  ASSERT_EQ(NumMetricMatches(server_, description1), 1);
-  // The first description will be taken/kept if adding a duplicate
-  /// metric family name, even with a different description
-  ASSERT_EQ(NumMetricMatches(server_, description2), 0);
-
-  // Delete one of the metric family references
-  // Specificailly, family1, because family2 is bound to metric2
-  FAIL_TEST_IF_ERR(TRITONSERVER_MetricFamilyDelete(family1), "delete family1");
-
-  // Assert custom metric/family remains when family2 still references it
-  ASSERT_EQ(NumMetricMatches(server_, description1), 1);
-
-  // Assert custom metric/family unregistered after last reference deleted
-  FAIL_TEST_IF_ERR(TRITONSERVER_MetricDelete(metric2), "delete metric2");
-  FAIL_TEST_IF_ERR(TRITONSERVER_MetricFamilyDelete(family2), "delete family2");
-  ASSERT_EQ(NumMetricMatches(server_, description1), 0);
-  ASSERT_EQ(NumMetricMatches(server_, description2), 0);
-}
-
-// Test that adding a duplicate metric family will reuse the original
-// and not add another entry to registry
-TEST_F(MetricsApiTest, TestDupeMetricFamily)
-{
-  // Create metric family
-  TRITONSERVER_MetricFamily* family1 = nullptr;
-  TRITONSERVER_MetricKind kind = TRITONSERVER_METRIC_KIND_COUNTER;
-  const char* name = "dupe_metric_family_test";
-  const char* description = "dupe metric family description";
-  FAIL_TEST_IF_ERR(
-      TRITONSERVER_MetricFamilyNew(&family1, kind, name, description),
-      "Creating new metric family1");
-
-  // Create duplicate metric family
-  TRITONSERVER_MetricFamily* family2 = nullptr;
-  FAIL_TEST_IF_ERR(
-      TRITONSERVER_MetricFamilyNew(&family2, kind, name, description),
-      "Creating new metric family2");
-
-  // Assert MetricFamily is not reported until metrics are added to them
-  ASSERT_EQ(NumMetricMatches(server_, description), 0);
-
-  // Create unique metrics for each family object. Both family objects
-  // will refer to the same prometheus family in the registry, so both
-  // metrics should be displayed under the family.
-  const char* metric_key = "custom_metric_key";
-  std::vector<const TRITONSERVER_Parameter*> labels1;
-  labels1.emplace_back(TRITONSERVER_ParameterNew(
-      metric_key, TRITONSERVER_PARAMETER_STRING, "label1"));
-  TRITONSERVER_Metric* metric1 = nullptr;
-  FAIL_TEST_IF_ERR(
-      TRITONSERVER_MetricNew(&metric1, family1, labels1.data(), labels1.size()),
-      "Creating new metric1");
-  for (const auto label : labels1) {
-    TRITONSERVER_ParameterDelete(const_cast<TRITONSERVER_Parameter*>(label));
-  }
-
-  std::vector<const TRITONSERVER_Parameter*> labels2;
-  labels2.emplace_back(TRITONSERVER_ParameterNew(
-      metric_key, TRITONSERVER_PARAMETER_STRING, "label2"));
-  TRITONSERVER_Metric* metric2 = nullptr;
-  FAIL_TEST_IF_ERR(
-      TRITONSERVER_MetricNew(&metric2, family2, labels2.data(), labels2.size()),
-      "Creating new metric2");
-  for (const auto label : labels2) {
-    TRITONSERVER_ParameterDelete(const_cast<TRITONSERVER_Parameter*>(label));
-  }
-
-  // Assert MetricFamily is reported exactly once
-  // This confirms attempting to add a duplicate returns the existing family
-  ASSERT_EQ(NumMetricMatches(server_, description), 1);
-  // Assert we have two unique metrics
-  ASSERT_EQ(NumMetricMatches(server_, metric_key), 2);
-
-  // Delete one of the metric family references
-  FAIL_TEST_IF_ERR(TRITONSERVER_MetricDelete(metric1), "delete metric1");
-  FAIL_TEST_IF_ERR(TRITONSERVER_MetricFamilyDelete(family1), "delete family1");
-
-  // Assert custom family remains when there's still a reference to it
-  ASSERT_EQ(NumMetricMatches(server_, description), 1);
-  // Assert only one remaining metric after deleting one
-  ASSERT_EQ(NumMetricMatches(server_, metric_key), 1);
-
-  // Assert custom metric/family unregistered after last reference deleted
-  FAIL_TEST_IF_ERR(TRITONSERVER_MetricDelete(metric2), "delete metric2");
-  FAIL_TEST_IF_ERR(TRITONSERVER_MetricFamilyDelete(family2), "delete family2");
-  ASSERT_EQ(NumMetricMatches(server_, description), 0);
-  // Assert no remaining metrics after deleting both
-  ASSERT_EQ(NumMetricMatches(server_, metric_key), 0);
-}
-
-// Test that adding a duplicate metric will refer to the same
-// underlying metric, and all instances will be updated
-TEST_F(MetricsApiTest, TestDupeMetricLabels)
-{
-  std::vector<const TRITONSERVER_Parameter*> labels;
-  labels.emplace_back(TRITONSERVER_ParameterNew(
-      "example1", TRITONSERVER_PARAMETER_STRING, "label1"));
-  labels.emplace_back(TRITONSERVER_ParameterNew(
-      "example2", TRITONSERVER_PARAMETER_STRING, "label2"));
-
-  DupeMetricHelper(server_, labels);
-
-  for (const auto label : labels) {
-    TRITONSERVER_ParameterDelete(const_cast<TRITONSERVER_Parameter*>(label));
-  }
-}
-
-// Test that adding a duplicate metric will refer to the same
-// underlying metric, and all instances will be updated
-TEST_F(MetricsApiTest, TestDupeMetricEmptyLabels)
-{
-  std::vector<const TRITONSERVER_Parameter*> labels;
-  DupeMetricHelper(server_, labels);
-}
-
-TEST_F(MetricsApiTest, TestOutOfOrderDelete)
-{
-  // Create metric family
-  TRITONSERVER_MetricFamily* family = nullptr;
-  TRITONSERVER_MetricKind kind = TRITONSERVER_METRIC_KIND_COUNTER;
-  const char* name = "out_of_order_delete";
-  const char* description = "out of order delete test";
-  FAIL_TEST_IF_ERR(
-      TRITONSERVER_MetricFamilyNew(&family, kind, name, description),
-      "Creating new metric family");
-
-  // Add metric to family
-  std::vector<const TRITONSERVER_Parameter*> labels;
-  TRITONSERVER_Metric* metric = nullptr;
-  FAIL_TEST_IF_ERR(
-      TRITONSERVER_MetricNew(&metric, family, labels.data(), labels.size()),
-      "Creating new metric");
-
-  // Check that deleting metric family BEFORE metric fails
-  auto err = TRITONSERVER_MetricFamilyDelete(family);
-  EXPECT_THAT(
-      TRITONSERVER_ErrorMessage(err), HasSubstr("Must call MetricDelete"));
-
-  // Check that deleting in correct order still works after above failure
-  FAIL_TEST_IF_ERR(TRITONSERVER_MetricDelete(metric), "deleting metric");
-  FAIL_TEST_IF_ERR(TRITONSERVER_MetricFamilyDelete(family), "deleting family");
-  TRITONSERVER_ErrorDelete(err);
-}
-
-TEST_F(MetricsApiTest, TestMetricAfterFamilyDelete)
-{
-  // Create metric family
-  TRITONSERVER_MetricFamily* family = nullptr;
-  TRITONSERVER_MetricKind kind = TRITONSERVER_METRIC_KIND_GAUGE;
-  const char* name = "use_metric_after_family_delete";
-  const char* description = "test using a metric after its family is deleted";
-  FAIL_TEST_IF_ERR(
-      TRITONSERVER_MetricFamilyNew(&family, kind, name, description),
-      "Creating new metric family");
-
-  // Add metric to family
-  std::vector<const TRITONSERVER_Parameter*> labels;
-  TRITONSERVER_Metric* metric = nullptr;
-  FAIL_TEST_IF_ERR(
-      TRITONSERVER_MetricNew(&metric, family, labels.data(), labels.size()),
-      "Creating new metric");
-
-  // Check that deleting metric family BEFORE metric fails
-  auto err = TRITONSERVER_MetricFamilyDelete(family);
-  EXPECT_THAT(
-      TRITONSERVER_ErrorMessage(err), HasSubstr("Must call MetricDelete"));
-
-  // Use internal implementation to force deletion since C API checks first
-  // NOTE: This is for internal testing and should NOT be done by users.
-  delete reinterpret_cast<tc::MetricFamily*>(family);
-
-  // Expected API calls to fail since metric has been invalidated by
-  // calling MetricFamilyDelete before MetricDelete
-  double value = -1;
-  err = TRITONSERVER_MetricValue(metric, &value);
-  EXPECT_THAT(TRITONSERVER_ErrorMessage(err), HasSubstr("invalidated"));
-  err = TRITONSERVER_MetricIncrement(metric, 1.0);
-  EXPECT_THAT(TRITONSERVER_ErrorMessage(err), HasSubstr("invalidated"));
-  err = TRITONSERVER_MetricSet(metric, 1.0);
-  EXPECT_THAT(TRITONSERVER_ErrorMessage(err), HasSubstr("invalidated"));
-  TRITONSERVER_ErrorDelete(err);
-}
-
-// This test serves as a reminder to consider the ability to access
-// internal core metrics via current metrics API and its implications.
-TEST_F(MetricsApiTest, TestCoreMetricAccess)
-{
-  // Test accessing a metric family created in Triton Core
-  // through prometheus directly. Technically this metric can be
-  // updated manually by a user in addition to how the core manages
-  // the metric, but this should generally not be done.
-  TRITONSERVER_MetricFamily* family = nullptr;
-  TRITONSERVER_MetricKind kind = TRITONSERVER_METRIC_KIND_GAUGE;
-  // Pick existing core metric name here.
-  const char* name = "nv_gpu_power_limit";
-  const char* description = "";
-  FAIL_TEST_IF_ERR(
-      TRITONSERVER_MetricFamilyNew(&family, kind, name, description),
-      "Creating new metric family");
-  // DLIS-4072: If registry->Remove() is implemented in MetricFamily we will
-  // we will probably want to make sure core metrics can not be deleted early.
-  FAIL_TEST_IF_ERR(TRITONSERVER_MetricFamilyDelete(family), "delete family");
-}
-
-TEST_F(MetricsApiTest, TestChildMetricTracking)
-{
-  // Create metric family
-  TRITONSERVER_MetricFamily* family = nullptr;
-  TRITONSERVER_MetricKind kind = TRITONSERVER_METRIC_KIND_GAUGE;
-  const char* name = "test_ref_counting";
-  const char* description = "test using metric ref counting";
-  FAIL_TEST_IF_ERR(
-      TRITONSERVER_MetricFamilyNew(&family, kind, name, description),
-      "Creating new metric family");
-
-  // Use internal implementation to verify correctness
-  auto tc_family = reinterpret_cast<tc::MetricFamily*>(family);
-
-  // Create metric
-  TRITONSERVER_Metric* metric1 = nullptr;
-  std::vector<const TRITONSERVER_Parameter*> labels;
-  FAIL_TEST_IF_ERR(
-      TRITONSERVER_MetricNew(&metric1, family, labels.data(), labels.size()),
-      "Creating new metric1");
-  ASSERT_EQ(tc_family->NumMetrics(), 1);
-
-  // Create duplicate metric
-  TRITONSERVER_Metric* metric2 = nullptr;
-  FAIL_TEST_IF_ERR(
-      TRITONSERVER_MetricNew(&metric2, family, labels.data(), labels.size()),
-      "Creating new metric2");
-  ASSERT_EQ(tc_family->NumMetrics(), 2);
-
-
-  FAIL_TEST_IF_ERR(TRITONSERVER_MetricDelete(metric1), "delete metric1");
-  ASSERT_EQ(tc_family->NumMetrics(), 1);
-  FAIL_TEST_IF_ERR(TRITONSERVER_MetricDelete(metric2), "delete metric2");
-  ASSERT_EQ(tc_family->NumMetrics(), 0);
-  FAIL_TEST_IF_ERR(TRITONSERVER_MetricFamilyDelete(family), "delete family");
-}
-
-}  // namespace
-
-int
-main(int argc, char** argv)
-{
-#ifdef TRITON_ENABLE_LOGGING
-  LOG_SET_VERBOSE(1);
-#endif  // TRITON_ENABLE_LOGGING
-
-  ::testing::InitGoogleTest(&argc, argv);
-  return RUN_ALL_TESTS();
-}
-
-#endif  // TRITON_ENABLE_METRICS
diff --git a/3rdparty/core-r22.12/src/test/pinned_memory_manager_test.cc b/3rdparty/core-r22.12/src/test/pinned_memory_manager_test.cc
deleted file mode 100644
index d618047853192df9947ef5bbf7837dec07ebcf89..0000000000000000000000000000000000000000
--- a/3rdparty/core-r22.12/src/test/pinned_memory_manager_test.cc
+++ /dev/null
@@ -1,320 +0,0 @@
-// Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#include "gtest/gtest.h"
-
-#include <cuda_runtime_api.h>
-#include <condition_variable>
-#include <mutex>
-#include <thread>
-#include <vector>
-#include "pinned_memory_manager.h"
-#include "tritonserver_apis.h"
-
-namespace tc = triton::core;
-
-namespace {
-
-#define CHECK_POINTER_ATTRIBUTES(ptr__, type__, device__)                   \
-  do {                                                                      \
-    cudaPointerAttributes attr;                                             \
-    auto cuerr = cudaPointerGetAttributes(&attr, ptr__);                    \
-    ASSERT_TRUE(cuerr == cudaSuccess)                                       \
-        << "Failed to get CUDA pointer attributes: "                        \
-        << cudaGetErrorString(cuerr);                                       \
-    EXPECT_TRUE(attr.type == type__)                                        \
-        << "Expect pointer with type " << type__ << ", got: " << attr.type; \
-    if (attr.type == cudaMemoryTypeDevice) {                                \
-      EXPECT_TRUE(attr.device == device__)                                  \
-          << "Expect allocation on CUDA device " << device__                \
-          << ", got: " << attr.device;                                      \
-    }                                                                       \
-  } while (false)
-
-#define STORE_RESULT_AND_RETURN_IF_ERROR(metadata__, idx__, status__) \
-  do {                                                                \
-    if (!status__.IsOk()) {                                           \
-      std::lock_guard<std::mutex> lk(metadata__->mtx_);               \
-      metadata__->results_[idx__] = status__.AsString();              \
-      return;                                                         \
-    }                                                                 \
-  } while (false)
-
-struct MemoryWorkMetadata {
-  MemoryWorkMetadata(size_t thread_count)
-      : thread_count_(thread_count), ready_count_(0), results_(thread_count, "")
-  {
-  }
-  size_t thread_count_;
-  size_t ready_count_;
-  std::vector<std::string> results_;
-  std::mutex mtx_;
-  std::condition_variable cv_;
-};
-
-void
-RunMemoryWork(
-    size_t idx, size_t alloc_size, bool allow_nonpinned_fallback,
-    MemoryWorkMetadata* metadata)
-{
-  // Prepare variable to hold input / output
-  std::unique_ptr<char> input(new char[alloc_size]);
-  std::unique_ptr<char> output(new char[alloc_size]);
-
-  // Wait until all threads are issued
-  {
-    std::unique_lock<std::mutex> lk(metadata->mtx_);
-    metadata->ready_count_++;
-    if (metadata->ready_count_ != metadata->thread_count_) {
-      while (metadata->ready_count_ != metadata->thread_count_) {
-        metadata->cv_.wait(lk);
-      }
-    }
-    metadata->cv_.notify_one();
-  }
-
-  // Simulate receive input data -> alloc and write to input buffer
-  // -> alloc and write to output buffer -> return output data
-  TRITONSERVER_MemoryType allocated_type = TRITONSERVER_MEMORY_GPU;
-  void* input_buffer = nullptr;
-  STORE_RESULT_AND_RETURN_IF_ERROR(
-      metadata, idx,
-      tc::PinnedMemoryManager::Alloc(
-          &input_buffer, alloc_size, &allocated_type,
-          allow_nonpinned_fallback));
-  if ((!allow_nonpinned_fallback) &&
-      (allocated_type != TRITONSERVER_MEMORY_CPU_PINNED)) {
-    tc::Status status(
-        tc::Status::Code::INVALID_ARG, "returned memory buffer is not pinned");
-    STORE_RESULT_AND_RETURN_IF_ERROR(metadata, idx, status);
-  }
-  memcpy(input_buffer, input.get(), alloc_size);
-  void* output_buffer = nullptr;
-  STORE_RESULT_AND_RETURN_IF_ERROR(
-      metadata, idx,
-      tc::PinnedMemoryManager::Alloc(
-          &output_buffer, alloc_size, &allocated_type,
-          allow_nonpinned_fallback));
-  if ((!allow_nonpinned_fallback) &&
-      (allocated_type != TRITONSERVER_MEMORY_CPU_PINNED)) {
-    tc::Status status(
-        tc::Status::Code::INVALID_ARG, "returned memory buffer is not pinned");
-    STORE_RESULT_AND_RETURN_IF_ERROR(metadata, idx, status);
-  }
-  memcpy(output_buffer, input_buffer, alloc_size);
-  memcpy(output.get(), output_buffer, alloc_size);
-  for (size_t offset = 0; offset < alloc_size; offset++) {
-    if (input.get()[offset] != output.get()[offset]) {
-      std::lock_guard<std::mutex> lk(metadata->mtx_);
-      metadata->results_[idx] =
-          std::string("mismatch between input and output for work idx ") +
-          std::to_string(idx);
-      return;
-    }
-  }
-}
-
-// Wrapper of PinnedMemoryManager class to expose Reset() for unit testing
-class TestingPinnedMemoryManager : public tc::PinnedMemoryManager {
- public:
-  static void Reset() { PinnedMemoryManager::Reset(); }
-};
-
-class PinnedMemoryManagerTest : public ::testing::Test {
- protected:
-  void SetUp() override
-  {
-    // Default memory manager options
-    options_.pinned_memory_pool_byte_size_ = 1 << 10;
-  }
-
-  void TearDown() override { TestingPinnedMemoryManager::Reset(); }
-
-  tc::PinnedMemoryManager::Options options_;
-};
-
-TEST_F(PinnedMemoryManagerTest, InitOOM)
-{
-  // Set to reserve too much memory
-  options_.pinned_memory_pool_byte_size_ = uint64_t(1) << 40 /* 1024 GB */;
-  auto status = tc::PinnedMemoryManager::Create(options_);
-  // For pinned memory manager, it will still be created for "CPU fallback"
-  // allocation even if it fails to create pinned memory pool
-  EXPECT_TRUE(status.IsOk()) << status.Message();
-}
-
-TEST_F(PinnedMemoryManagerTest, InitSuccess)
-{
-  auto status = tc::PinnedMemoryManager::Create(options_);
-  EXPECT_TRUE(status.IsOk()) << status.Message();
-}
-
-TEST_F(PinnedMemoryManagerTest, InitZeroByte)
-{
-  options_.pinned_memory_pool_byte_size_ = 0;
-  auto status = tc::PinnedMemoryManager::Create(options_);
-  EXPECT_TRUE(status.IsOk()) << status.Message();
-
-  void* ptr = nullptr;
-  TRITONSERVER_MemoryType allocated_type = TRITONSERVER_MEMORY_GPU;
-  status = tc::PinnedMemoryManager::Alloc(
-      &ptr, 1, &allocated_type, false /* allow_nonpinned_fallback */);
-  ASSERT_FALSE(status.IsOk()) << "Unexpected successful allocation";
-}
-
-TEST_F(PinnedMemoryManagerTest, AllocSuccess)
-{
-  auto status = tc::PinnedMemoryManager::Create(options_);
-  ASSERT_TRUE(status.IsOk()) << status.Message();
-
-  void* ptr = nullptr;
-  TRITONSERVER_MemoryType allocated_type = TRITONSERVER_MEMORY_GPU;
-  status = tc::PinnedMemoryManager::Alloc(
-      &ptr, 512, &allocated_type, false /* allow_nonpinned_fallback */);
-  ASSERT_TRUE(status.IsOk()) << status.Message();
-  ASSERT_TRUE(ptr) << "Expect pointer to allocated buffer";
-  ASSERT_TRUE(allocated_type == TRITONSERVER_MEMORY_CPU_PINNED)
-      << "Expect pointer to pinned memory";
-  // check if returned pointer is pinned memory pointer
-  CHECK_POINTER_ATTRIBUTES(ptr, cudaMemoryTypeHost, 0);
-}
-
-TEST_F(PinnedMemoryManagerTest, AllocFallbackSuccess)
-{
-  auto status = tc::PinnedMemoryManager::Create(options_);
-  ASSERT_TRUE(status.IsOk()) << status.Message();
-
-  void* ptr = nullptr;
-  TRITONSERVER_MemoryType allocated_type = TRITONSERVER_MEMORY_GPU;
-  status = tc::PinnedMemoryManager::Alloc(
-      &ptr, 2048, &allocated_type, true /* allow_nonpinned_fallback */);
-  ASSERT_TRUE(status.IsOk()) << status.Message();
-  ASSERT_TRUE(ptr) << "Expect pointer to allocated buffer";
-  ASSERT_TRUE(allocated_type == TRITONSERVER_MEMORY_CPU)
-      << "Expect pointer to non-pinned memory";
-  // check if returned pointer is non-pinned memory pointer
-  CHECK_POINTER_ATTRIBUTES(ptr, cudaMemoryTypeUnregistered, 0);
-}
-
-TEST_F(PinnedMemoryManagerTest, AllocFail)
-{
-  auto status = tc::PinnedMemoryManager::Create(options_);
-  ASSERT_TRUE(status.IsOk()) << status.Message();
-
-  void* ptr = nullptr;
-  TRITONSERVER_MemoryType allocated_type = TRITONSERVER_MEMORY_GPU;
-  status = tc::PinnedMemoryManager::Alloc(
-      &ptr, 2048, &allocated_type, false /* allow_nonpinned_fallback */);
-  ASSERT_FALSE(status.IsOk()) << "Unexpected successful allocation";
-}
-
-TEST_F(PinnedMemoryManagerTest, MultipleAlloc)
-{
-  auto status = tc::PinnedMemoryManager::Create(options_);
-  ASSERT_TRUE(status.IsOk()) << status.Message();
-
-  void* first_ptr = nullptr;
-  TRITONSERVER_MemoryType allocated_type = TRITONSERVER_MEMORY_GPU;
-  status = tc::PinnedMemoryManager::Alloc(
-      &first_ptr, 600, &allocated_type, false /* allow_nonpinned_fallback */);
-  ASSERT_TRUE(status.IsOk()) << status.Message();
-  ASSERT_TRUE(first_ptr) << "Expect pointer to allocated buffer";
-  ASSERT_TRUE(allocated_type == TRITONSERVER_MEMORY_CPU_PINNED)
-      << "Expect pointer to pinned memory";
-  // check if returned pointer is pinned memory pointer
-  CHECK_POINTER_ATTRIBUTES(first_ptr, cudaMemoryTypeHost, 0);
-
-  // 512 + 600 > 1024
-  void* second_ptr = nullptr;
-  status = tc::PinnedMemoryManager::Alloc(
-      &second_ptr, 512, &allocated_type, false /* allow_nonpinned_fallback */);
-  ASSERT_FALSE(status.IsOk()) << "Unexpected successful allocation";
-
-  // Free the first pointer and retry the second one
-  status = tc::PinnedMemoryManager::Free(first_ptr);
-  EXPECT_TRUE(status.IsOk()) << status.Message();
-  status = tc::PinnedMemoryManager::Alloc(
-      &second_ptr, 512, &allocated_type, false /* allow_nonpinned_fallback */);
-  ASSERT_TRUE(status.IsOk()) << status.Message();
-  ASSERT_TRUE(second_ptr) << "Expect pointer to allocated buffer";
-  ASSERT_TRUE(allocated_type == TRITONSERVER_MEMORY_CPU_PINNED)
-      << "Expect pointer to pinned memory";
-  // check if returned pointer is pinned memory pointer
-  CHECK_POINTER_ATTRIBUTES(second_ptr, cudaMemoryTypeHost, 0);
-}
-
-TEST_F(PinnedMemoryManagerTest, ParallelAlloc)
-{
-  options_.pinned_memory_pool_byte_size_ = uint64_t(1) << 28 /* 256 MB */;
-  auto status = tc::PinnedMemoryManager::Create(options_);
-  ASSERT_TRUE(status.IsOk()) << status.Message();
-
-  // Create threads to perform operations on allocated memory in parallel
-  // Seems like for 1 MB alloc size (2 MB for both input and output),
-  // 100 threads is a good amount for pool manager not to use CPU fallback.
-  size_t thread_count = 100;
-  size_t allocated_size = 1 << 20 /* 1 MB */;
-  MemoryWorkMetadata metadata(thread_count);
-  std::vector<std::thread> threads;
-  for (size_t idx = 0; idx < thread_count; idx++) {
-    threads.emplace_back(
-        std::thread(RunMemoryWork, idx, allocated_size, false, &metadata));
-  }
-  for (size_t idx = 0; idx < thread_count; idx++) {
-    threads[idx].join();
-    EXPECT_TRUE(metadata.results_[idx].empty()) << metadata.results_[idx];
-  }
-}
-
-
-TEST_F(PinnedMemoryManagerTest, ParallelAllocFallback)
-{
-  options_.pinned_memory_pool_byte_size_ = uint64_t(1) << 28 /* 256 MB */;
-  auto status = tc::PinnedMemoryManager::Create(options_);
-  ASSERT_TRUE(status.IsOk()) << status.Message();
-
-  // Create threads to perform operations on allocated memory in parallel
-  size_t thread_count = 128;
-  size_t allocated_size = 1 << 24 /* 4 MB */;
-  MemoryWorkMetadata metadata(thread_count);
-  std::vector<std::thread> threads;
-  for (size_t idx = 0; idx < thread_count; idx++) {
-    threads.emplace_back(
-        std::thread(RunMemoryWork, idx, allocated_size, true, &metadata));
-  }
-  for (size_t idx = 0; idx < thread_count; idx++) {
-    threads[idx].join();
-    EXPECT_TRUE(metadata.results_[idx].empty()) << metadata.results_[idx];
-  }
-}
-
-}  // namespace
-
-int
-main(int argc, char** argv)
-{
-  ::testing::InitGoogleTest(&argc, argv);
-  return RUN_ALL_TESTS();
-}
diff --git a/3rdparty/core-r22.12/src/test/query_test.cc b/3rdparty/core-r22.12/src/test/query_test.cc
deleted file mode 100644
index a3e78176359704af1accaf44f8443386c933c4cc..0000000000000000000000000000000000000000
--- a/3rdparty/core-r22.12/src/test/query_test.cc
+++ /dev/null
@@ -1,368 +0,0 @@
-// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#include "gmock/gmock.h"
-#include "gtest/gtest.h"
-
-#include <unistd.h>
-#include <chrono>
-#include <cstring>
-#include <future>
-#include <iostream>
-#include <map>
-#include <memory>
-#include <string>
-#include <thread>
-#include <vector>
-#include "triton/core/tritonserver.h"
-
-namespace {
-
-using ::testing::HasSubstr;
-
-#define FAIL_TEST_IF_ERR(X, MSG)                                              \
-  do {                                                                        \
-    std::shared_ptr<TRITONSERVER_Error> err__((X), TRITONSERVER_ErrorDelete); \
-    ASSERT_TRUE((err__ == nullptr))                                           \
-        << "error: " << (MSG) << ": "                                         \
-        << TRITONSERVER_ErrorCodeString(err__.get()) << " - "                 \
-        << TRITONSERVER_ErrorMessage(err__.get());                            \
-  } while (false)
-
-using NameMap =
-    std::map<std::string, std::tuple<TRITONSERVER_MemoryType, int64_t, size_t>>;
-struct QueryTracker {
-  QueryTracker(
-      const char* tensor_name, size_t* byte_size,
-      TRITONSERVER_MemoryType memory_type, int64_t memory_type_id)
-      : has_name_(tensor_name != nullptr), has_byte_size_(byte_size != nullptr),
-        caller_preferred_type_(memory_type),
-        caller_preferred_id_(memory_type_id)
-  {
-    if (has_name_) {
-      name_ = tensor_name;
-    }
-    if (has_byte_size_) {
-      byte_size_ = *byte_size;
-    }
-  }
-  bool has_name_;
-  bool has_byte_size_;
-  std::string name_;
-  size_t byte_size_;
-  TRITONSERVER_MemoryType caller_preferred_type_;
-  int64_t caller_preferred_id_;
-};
-
-TRITONSERVER_Error*
-ResponseAlloc(
-    TRITONSERVER_ResponseAllocator* allocator, const char* tensor_name,
-    size_t byte_size, TRITONSERVER_MemoryType preferred_memory_type,
-    int64_t preferred_memory_type_id, void* userp, void** buffer,
-    void** buffer_userp, TRITONSERVER_MemoryType* actual_memory_type,
-    int64_t* actual_memory_type_id)
-{
-  auto& output_tracker =
-      (reinterpret_cast<std::pair<std::vector<QueryTracker>, NameMap>*>(userp)
-           ->second);
-  output_tracker.emplace(
-      tensor_name,
-      std::make_tuple(
-          preferred_memory_type, preferred_memory_type_id, byte_size));
-  return nullptr;  // Success
-}
-
-TRITONSERVER_Error*
-ResponseRelease(
-    TRITONSERVER_ResponseAllocator* allocator, void* buffer, void* buffer_userp,
-    size_t byte_size, TRITONSERVER_MemoryType memory_type,
-    int64_t memory_type_id)
-{
-  return nullptr;  // Success
-}
-
-void
-InferRequestComplete(
-    TRITONSERVER_InferenceRequest* request, const uint32_t flags, void* userp)
-{
-  TRITONSERVER_InferenceRequestDelete(request);
-}
-
-void
-InferResponseComplete(
-    TRITONSERVER_InferenceResponse* response, const uint32_t flags, void* userp)
-{
-  if (response != nullptr) {
-    // Notify that the completion.
-    std::promise<TRITONSERVER_Error*>* p =
-        reinterpret_cast<std::promise<TRITONSERVER_Error*>*>(userp);
-    p->set_value(TRITONSERVER_InferenceResponseError(response));
-  }
-  TRITONSERVER_InferenceResponseDelete(response);
-}
-
-class QueryTest : public ::testing::Test {
- protected:
-  static void SetUpTestSuite()
-  {
-    // Create the server...
-    TRITONSERVER_ServerOptions* server_options = nullptr;
-    FAIL_TEST_IF_ERR(
-        TRITONSERVER_ServerOptionsNew(&server_options),
-        "creating server options");
-    FAIL_TEST_IF_ERR(
-        TRITONSERVER_ServerOptionsSetModelRepositoryPath(
-            server_options, "./models"),
-        "setting model repository path");
-    FAIL_TEST_IF_ERR(
-        TRITONSERVER_ServerOptionsSetBackendDirectory(
-            server_options, "/opt/tritonserver/backends"),
-        "setting backend directory");
-    FAIL_TEST_IF_ERR(
-        TRITONSERVER_ServerOptionsSetRepoAgentDirectory(
-            server_options, "/opt/tritonserver/repoagents"),
-        "setting repository agent directory");
-    FAIL_TEST_IF_ERR(
-        TRITONSERVER_ServerOptionsSetStrictModelConfig(server_options, true),
-        "setting strict model configuration");
-
-    FAIL_TEST_IF_ERR(
-        TRITONSERVER_ServerNew(&server_, server_options), "creating server");
-    FAIL_TEST_IF_ERR(
-        TRITONSERVER_ServerOptionsDelete(server_options),
-        "deleting server options");
-  }
-
-  static void TearDownTestSuite()
-  {
-    FAIL_TEST_IF_ERR(TRITONSERVER_ServerDelete(server_), "deleting server");
-  }
-
-  void SetUp() override
-  {
-    ASSERT_TRUE(server_ != nullptr) << "Server has not created";
-    // Wait until the server is both live and ready.
-    size_t health_iters = 0;
-    while (true) {
-      bool live, ready;
-      FAIL_TEST_IF_ERR(
-          TRITONSERVER_ServerIsLive(server_, &live),
-          "unable to get server liveness");
-      FAIL_TEST_IF_ERR(
-          TRITONSERVER_ServerIsReady(server_, &ready),
-          "unable to get server readiness");
-      if (live && ready) {
-        break;
-      }
-
-      if (++health_iters >= 10) {
-        FAIL() << "failed to find healthy inference server";
-      }
-
-      std::this_thread::sleep_for(std::chrono::milliseconds(500));
-    }
-
-    // Create allocator with common callback
-    FAIL_TEST_IF_ERR(
-        TRITONSERVER_ResponseAllocatorNew(
-            &allocator_, ResponseAlloc, ResponseRelease,
-            nullptr /* start_fn */),
-        "creating response allocator");
-
-    FAIL_TEST_IF_ERR(
-        TRITONSERVER_InferenceRequestNew(
-            &irequest_, server_, "query", -1 /* model_version */),
-        "creating inference request");
-
-    FAIL_TEST_IF_ERR(
-        TRITONSERVER_InferenceRequestSetReleaseCallback(
-            irequest_, InferRequestComplete,
-            nullptr /* request_release_userp */),
-        "setting request release callback");
-
-    std::vector<int64_t> shape{1};
-    FAIL_TEST_IF_ERR(
-        TRITONSERVER_InferenceRequestAddInput(
-            irequest_, "INPUT", TRITONSERVER_TYPE_UINT8, shape.data(),
-            shape.size()),
-        "setting input for the request");
-    FAIL_TEST_IF_ERR(
-        TRITONSERVER_InferenceRequestAppendInputData(
-            irequest_, "INPUT", input_data_.data(), input_data_.size(),
-            TRITONSERVER_MEMORY_CPU, 0),
-        "assigning INPUT data");
-
-    FAIL_TEST_IF_ERR(
-        TRITONSERVER_InferenceRequestSetResponseCallback(
-            irequest_, allocator_, reinterpret_cast<void*>(&output_info_),
-            InferResponseComplete, reinterpret_cast<void*>(&completed_)),
-        "setting response callback");
-  }
-
-  void TearDown() override
-  {
-    unsetenv("TEST_ANONYMOUS");
-    unsetenv("TEST_BYTE_SIZE");
-    FAIL_TEST_IF_ERR(
-        TRITONSERVER_ResponseAllocatorDelete(allocator_),
-        "deleting response allocator");
-  }
-
-  static TRITONSERVER_Server* server_;
-  TRITONSERVER_ResponseAllocator* allocator_ = nullptr;
-  static std::vector<uint8_t> input_data_;
-  TRITONSERVER_InferenceRequest* irequest_ = nullptr;
-  std::promise<TRITONSERVER_Error*> completed_;
-  std::pair<std::vector<QueryTracker>, NameMap> output_info_;
-};
-
-TRITONSERVER_Server* QueryTest::server_ = nullptr;
-std::vector<uint8_t> QueryTest::input_data_{1};
-
-TEST_F(QueryTest, DefaultQuery)
-{
-  TRITONSERVER_ResponseAllocatorQueryFn_t query_fn =
-      [](TRITONSERVER_ResponseAllocator* allocator, void* userp,
-         const char* tensor_name, size_t* byte_size,
-         TRITONSERVER_MemoryType* memory_type,
-         int64_t* memory_type_id) -> TRITONSERVER_Error* {
-    auto& query_tracker =
-        (reinterpret_cast<std::pair<std::vector<QueryTracker>, NameMap>*>(userp)
-             ->first);
-    query_tracker.emplace_back(
-        tensor_name, byte_size, *memory_type, *memory_type_id);
-    *memory_type = TRITONSERVER_MEMORY_CPU;
-    *memory_type_id = 0;
-    return nullptr;
-  };
-  FAIL_TEST_IF_ERR(
-      TRITONSERVER_ResponseAllocatorSetQueryFunction(allocator_, query_fn),
-      "setting response callback");
-
-  FAIL_TEST_IF_ERR(
-      TRITONSERVER_ServerInferAsync(server_, irequest_, nullptr /* trace */),
-      "running inference");
-
-  auto err = completed_.get_future().get();
-  ASSERT_TRUE(err == nullptr) << "Expect successful inference";
-
-  // Check query tracker to see if the query function is connected properly
-  ASSERT_EQ(output_info_.first.size(), size_t(2));
-  for (size_t i = 0; i < output_info_.first.size(); ++i) {
-    const auto& query_info = output_info_.first[i];
-    EXPECT_EQ(query_info.has_name_, true);
-    EXPECT_EQ(query_info.name_, (std::string("OUTPUT") + std::to_string(i)));
-    EXPECT_EQ(query_info.has_byte_size_, false);
-    EXPECT_EQ(
-        query_info.caller_preferred_type_, TRITONSERVER_MEMORY_CPU_PINNED);
-    EXPECT_EQ(query_info.caller_preferred_id_, 1);
-  }
-
-  const auto& output_0 = output_info_.second["OUTPUT0"];
-  EXPECT_EQ(std::get<0>(output_0), TRITONSERVER_MEMORY_CPU);
-  EXPECT_EQ(std::get<1>(output_0), int64_t(0));
-  EXPECT_EQ(std::get<2>(output_0), size_t(2));
-
-  const auto& output_1 = output_info_.second["OUTPUT1"];
-  EXPECT_EQ(std::get<0>(output_1), TRITONSERVER_MEMORY_CPU);
-  EXPECT_EQ(std::get<1>(output_1), int64_t(0));
-  EXPECT_EQ(std::get<2>(output_1), size_t(2));
-}
-
-TEST_F(QueryTest, NoQueryFn)
-{
-  FAIL_TEST_IF_ERR(
-      TRITONSERVER_ServerInferAsync(server_, irequest_, nullptr /* trace */),
-      "running inference");
-
-  auto err = completed_.get_future().get();
-  ASSERT_TRUE(err != nullptr) << "Expect error";
-  EXPECT_EQ(TRITONSERVER_ErrorCode(err), TRITONSERVER_ERROR_UNAVAILABLE);
-  EXPECT_THAT(
-      TRITONSERVER_ErrorMessage(err),
-      HasSubstr("Output properties are not available"));
-}
-
-TEST_F(QueryTest, UnnamedQuery)
-{
-  setenv("TEST_ANONYMOUS", "", 1);
-  setenv("TEST_BYTE_SIZE", "32", 1);
-  TRITONSERVER_ResponseAllocatorQueryFn_t query_fn =
-      [](TRITONSERVER_ResponseAllocator* allocator, void* userp,
-         const char* tensor_name, size_t* byte_size,
-         TRITONSERVER_MemoryType* memory_type,
-         int64_t* memory_type_id) -> TRITONSERVER_Error* {
-    auto& query_tracker =
-        (reinterpret_cast<std::pair<std::vector<QueryTracker>, NameMap>*>(userp)
-             ->first);
-    query_tracker.emplace_back(
-        tensor_name, byte_size, *memory_type, *memory_type_id);
-    // Slightly different setting
-    *memory_type = TRITONSERVER_MEMORY_GPU;
-    *memory_type_id = 2;
-    return nullptr;
-  };
-  FAIL_TEST_IF_ERR(
-      TRITONSERVER_ResponseAllocatorSetQueryFunction(allocator_, query_fn),
-      "setting response callback");
-
-  FAIL_TEST_IF_ERR(
-      TRITONSERVER_ServerInferAsync(server_, irequest_, nullptr /* trace */),
-      "running inference");
-
-  auto err = completed_.get_future().get();
-  ASSERT_TRUE(err == nullptr) << "Expect successful inference";
-
-  // Check query tracker to see if the query function is connected properly
-  ASSERT_EQ(output_info_.first.size(), size_t(1));
-  for (size_t i = 0; i < output_info_.first.size(); ++i) {
-    const auto& query_info = output_info_.first[i];
-    EXPECT_EQ(query_info.has_name_, false);
-    EXPECT_EQ(query_info.has_byte_size_, true);
-    EXPECT_EQ(query_info.byte_size_, size_t(32));
-    EXPECT_EQ(
-        query_info.caller_preferred_type_, TRITONSERVER_MEMORY_CPU_PINNED);
-    EXPECT_EQ(query_info.caller_preferred_id_, 1);
-  }
-
-  const auto& output_0 = output_info_.second["OUTPUT0"];
-  EXPECT_EQ(std::get<0>(output_0), TRITONSERVER_MEMORY_GPU);
-  EXPECT_EQ(std::get<1>(output_0), int64_t(2));
-  EXPECT_EQ(std::get<2>(output_0), size_t(16));
-
-  const auto& output_1 = output_info_.second["OUTPUT1"];
-  EXPECT_EQ(std::get<0>(output_1), TRITONSERVER_MEMORY_GPU);
-  EXPECT_EQ(std::get<1>(output_1), int64_t(2));
-  EXPECT_EQ(std::get<2>(output_1), size_t(16));
-}
-
-}  // namespace
-
-int
-main(int argc, char** argv)
-{
-  ::testing::InitGoogleTest(&argc, argv);
-  return RUN_ALL_TESTS();
-}
diff --git a/3rdparty/core-r22.12/src/test/register_api_test.cc b/3rdparty/core-r22.12/src/test/register_api_test.cc
deleted file mode 100644
index 5a41bbad98dc312ab2f38042299d25433f3f2a05..0000000000000000000000000000000000000000
--- a/3rdparty/core-r22.12/src/test/register_api_test.cc
+++ /dev/null
@@ -1,905 +0,0 @@
-// Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include <iostream>
-#include <thread>
-#include "gtest/gtest.h"
-#include "triton/core/tritonserver.h"
-
-namespace {
-
-#define FAIL_TEST_IF_ERR(X, MSG)                                              \
-  do {                                                                        \
-    std::shared_ptr<TRITONSERVER_Error> err__((X), TRITONSERVER_ErrorDelete); \
-    ASSERT_TRUE((err__ == nullptr))                                           \
-        << "error: " << (MSG) << ": "                                         \
-        << TRITONSERVER_ErrorCodeString(err__.get()) << " - "                 \
-        << TRITONSERVER_ErrorMessage(err__.get());                            \
-  } while (false)
-
-#define FAIL_TEST_IF_NOT_ERR(X, CODE, ERR_MSG, MSG)                           \
-  do {                                                                        \
-    std::shared_ptr<TRITONSERVER_Error> err__((X), TRITONSERVER_ErrorDelete); \
-    ASSERT_TRUE((err__ != nullptr)) << "expected error on: " << (MSG);        \
-    if (err__ != nullptr) {                                                   \
-      EXPECT_EQ(TRITONSERVER_ErrorCode(err__.get()), (CODE)) << (MSG);        \
-      EXPECT_STREQ(TRITONSERVER_ErrorMessage(err__.get()), (ERR_MSG))         \
-          << (MSG);                                                           \
-    }                                                                         \
-  } while (false)
-
-// Test Fixture, this test suit expects the current directory to
-// have the following file structure:
-//  - empty_models (empty directory)
-//  - models_0 (contain model directory "model_0")
-//  - models_1 (contain model directories "model_0", "model_1")
-//  - models_2 (contain model directories "model_0" with config name
-//    "mapped_name")
-class RegisterApiTest : public ::testing::Test {
- protected:
-  void SetUp() override
-  {
-    // Create running server object.
-    TRITONSERVER_ServerOptions* server_options = nullptr;
-    FAIL_TEST_IF_ERR(
-        TRITONSERVER_ServerOptionsNew(&server_options),
-        "creating server options");
-    // Triton expects at least one model repository is set at start, set to
-    // an empty repository set ModelControlMode to EXPLICIT to avoid attempting
-    // to load models.
-    FAIL_TEST_IF_ERR(
-        TRITONSERVER_ServerOptionsSetModelRepositoryPath(
-            server_options, "empty_models"),
-        "setting model repository path");
-    FAIL_TEST_IF_ERR(
-        TRITONSERVER_ServerOptionsSetModelControlMode(
-            server_options, TRITONSERVER_MODEL_CONTROL_EXPLICIT),
-        "setting model control mode");
-    FAIL_TEST_IF_ERR(
-        TRITONSERVER_ServerNew(&server_, server_options), "creating server");
-    FAIL_TEST_IF_ERR(
-        TRITONSERVER_ServerOptionsDelete(server_options),
-        "deleting server options");
-    ASSERT_TRUE(server_ != nullptr) << "server not created";
-    bool live = false;
-    for (int i = 10; ((i > 0) && !live); --i) {
-      FAIL_TEST_IF_ERR(
-          TRITONSERVER_ServerIsLive(server_, &live), "Is server live");
-    }
-    ASSERT_TRUE(live) << "server not live";
-  }
-
-  void TearDown() override
-  {
-    FAIL_TEST_IF_ERR(TRITONSERVER_ServerDelete(server_), "deleting server");
-  }
-
-  TRITONSERVER_Server* server_ = nullptr;
-};
-
-TEST_F(RegisterApiTest, Register)
-{
-  // Request to load "model_0" which should fail
-  FAIL_TEST_IF_NOT_ERR(
-      TRITONSERVER_ServerLoadModel(server_, "model_0"),
-      TRITONSERVER_ERROR_INTERNAL,
-      "failed to load 'model_0', failed to poll from model repository",
-      "loading model 'model_0'");
-
-  // Registering a repository "models_0" where contains "model_0"
-  FAIL_TEST_IF_ERR(
-      TRITONSERVER_ServerRegisterModelRepository(
-          server_, "models_0", nullptr, 0),
-      "registering model repository 'models_0'");
-  // Request to load "model_0"
-  FAIL_TEST_IF_ERR(
-      TRITONSERVER_ServerLoadModel(server_, "model_0"),
-      "loading model 'model_0'");
-  bool ready = false;
-  FAIL_TEST_IF_ERR(
-      TRITONSERVER_ServerModelIsReady(server_, "model_0", 1, &ready),
-      "Is 'model_0' v1 ready");
-  ASSERT_TRUE(ready) << "Expect 'model_0' v1 to be ready, model directory is "
-                        "'models_0/model_0'";
-}
-
-TEST_F(RegisterApiTest, RegisterWithMap)
-{
-  // Registering a repository "models_0" where contains "model_0", but with
-  // different name mapping
-  const char* override_name = "name_0";
-  std::shared_ptr<TRITONSERVER_Parameter> managed_param(
-      TRITONSERVER_ParameterNew(
-          "model_0", TRITONSERVER_PARAMETER_STRING, override_name),
-      TRITONSERVER_ParameterDelete);
-  ASSERT_TRUE(managed_param != nullptr) << "failed to create name mapping pair";
-  std::vector<const TRITONSERVER_Parameter*> name_map{managed_param.get()};
-
-  FAIL_TEST_IF_ERR(
-      TRITONSERVER_ServerRegisterModelRepository(
-          server_, "models_0", name_map.data(), name_map.size()),
-      "registering model repository 'models_0'");
-
-  // Request to load "model_0" which should fail
-  FAIL_TEST_IF_NOT_ERR(
-      TRITONSERVER_ServerLoadModel(server_, "model_0"),
-      TRITONSERVER_ERROR_INTERNAL,
-      "failed to load 'model_0', failed to poll from model repository",
-      "loading model 'model_0'");
-  // Request to load "name_0"
-  FAIL_TEST_IF_ERR(
-      TRITONSERVER_ServerLoadModel(server_, "name_0"),
-      "loading model 'name_0'");
-  bool ready = false;
-  FAIL_TEST_IF_ERR(
-      TRITONSERVER_ServerModelIsReady(server_, "name_0", 1, &ready),
-      "Is 'name_0' v1 ready");
-  ASSERT_TRUE(ready) << "Expect 'name_0' v1 to be ready, model directory is "
-                        "'models_0/model_0'";
-}
-
-TEST_F(RegisterApiTest, RegisterTwice)
-{
-  // Registering a startup repository
-  FAIL_TEST_IF_NOT_ERR(
-      TRITONSERVER_ServerRegisterModelRepository(
-          server_, "empty_models", nullptr, 0),
-      TRITONSERVER_ERROR_ALREADY_EXISTS,
-      "model repository 'empty_models' has already been registered",
-      "registering model repository 'empty_models'");
-}
-
-TEST_F(RegisterApiTest, RegisterTwice2)
-{
-  // Registering the same repository twice
-  FAIL_TEST_IF_ERR(
-      TRITONSERVER_ServerRegisterModelRepository(
-          server_, "models_0", nullptr, 0),
-      "registering model repository 'models_0'");
-
-  FAIL_TEST_IF_NOT_ERR(
-      TRITONSERVER_ServerRegisterModelRepository(
-          server_, "models_0", nullptr, 0),
-      TRITONSERVER_ERROR_ALREADY_EXISTS,
-      "model repository 'models_0' has already been registered",
-      "registering model repository 'models_0'");
-}
-
-TEST_F(RegisterApiTest, RegisterWithMultiMap)
-{
-  // Registering a repository "models_0" where contains "model_0",
-  // and "model_0" is mapped to two different names
-  std::vector<std::string> override_names{"name_0", "name_1"};
-  std::vector<std::shared_ptr<TRITONSERVER_Parameter>> managed_params;
-  std::vector<const TRITONSERVER_Parameter*> name_map;
-  for (const auto& name : override_names) {
-    managed_params.emplace_back(
-        TRITONSERVER_ParameterNew(
-            "model_0", TRITONSERVER_PARAMETER_STRING, name.c_str()),
-        TRITONSERVER_ParameterDelete);
-    ASSERT_TRUE(managed_params.back() != nullptr)
-        << "failed to create name mapping pair";
-    name_map.emplace_back(managed_params.back().get());
-  }
-
-  // Such mapping should be allow as it is mapping to unique names
-  FAIL_TEST_IF_ERR(
-      TRITONSERVER_ServerRegisterModelRepository(
-          server_, "models_0", name_map.data(), name_map.size()),
-      "registering model repository 'models_0'");
-
-  // Request to load "name_0"
-  FAIL_TEST_IF_ERR(
-      TRITONSERVER_ServerLoadModel(server_, "name_0"),
-      "loading model 'name_0'");
-  bool ready = false;
-  FAIL_TEST_IF_ERR(
-      TRITONSERVER_ServerModelIsReady(server_, "name_0", 1, &ready),
-      "Is 'name_0' v1 ready");
-  ASSERT_TRUE(ready) << "Expect 'name_0' v1 to be ready, model directory is "
-                        "'models_0/model_0'";
-
-  // Request to load "name_1"
-  FAIL_TEST_IF_ERR(
-      TRITONSERVER_ServerLoadModel(server_, "name_1"),
-      "loading model 'name_1'");
-  ready = false;
-  FAIL_TEST_IF_ERR(
-      TRITONSERVER_ServerModelIsReady(server_, "name_1", 1, &ready),
-      "Is 'name_1' v1 ready");
-  ASSERT_TRUE(ready) << "Expect 'name_1' v1 to be ready, model directory is "
-                        "'models_0/model_0'";
-}
-
-TEST_F(RegisterApiTest, RegisterWithRepeatedMap)
-{
-  // Registering a repository "models_1" where contains "model_0" and "model_1",
-  // map "model_0" to "model_1" which creates confliction, however,
-  // in EXPLICIT mode, mapping lookup will have higher priority than
-  // repository polling so the confliction will be resolved by always loading
-  // the model from mapped directory.
-  std::vector<std::string> override_names{"model_1"};
-  std::vector<std::shared_ptr<TRITONSERVER_Parameter>> managed_params;
-  std::vector<const TRITONSERVER_Parameter*> name_map;
-  managed_params.emplace_back(
-      TRITONSERVER_ParameterNew(
-          "model_0", TRITONSERVER_PARAMETER_STRING, override_names[0].c_str()),
-      TRITONSERVER_ParameterDelete);
-  ASSERT_TRUE(managed_params.back() != nullptr)
-      << "failed to create name mapping pair";
-  name_map.emplace_back(managed_params.back().get());
-
-  // Such mapping should be allow as it is mapping to unique names
-  FAIL_TEST_IF_ERR(
-      TRITONSERVER_ServerRegisterModelRepository(
-          server_, "models_1", name_map.data(), name_map.size()),
-      "registering model repository 'models_1'");
-
-  // Request to load "model_1"
-  FAIL_TEST_IF_ERR(
-      TRITONSERVER_ServerLoadModel(server_, "model_1"),
-      "loading model 'model_1'");
-  bool ready = false;
-  FAIL_TEST_IF_ERR(
-      TRITONSERVER_ServerModelIsReady(server_, "model_1", 2, &ready),
-      "Is 'model_1' ready");
-  ASSERT_TRUE(ready) << "Expect 'model_1' v2 to be ready, model directory is "
-                        "'models_1/model_0'";
-}
-
-TEST_F(RegisterApiTest, RegisterWithRepeatedMap2)
-{
-  // Registering a repository "models_1" where contains "model_0" and "model_1",
-  // map both directories to the same name which creates confliction. Different
-  // from 'RegisterWithRepeatedMap', the confliction within the mapping can't be
-  // resolved and error should be returend
-  std::vector<std::string> dir_names{"model_0", "model_1"};
-  std::vector<std::shared_ptr<TRITONSERVER_Parameter>> managed_params;
-  std::vector<const TRITONSERVER_Parameter*> name_map;
-  for (const auto& name : dir_names) {
-    managed_params.emplace_back(
-        TRITONSERVER_ParameterNew(
-            name.c_str(), TRITONSERVER_PARAMETER_STRING, "name_0"),
-        TRITONSERVER_ParameterDelete);
-    ASSERT_TRUE(managed_params.back() != nullptr)
-        << "failed to create name mapping pair";
-    name_map.emplace_back(managed_params.back().get());
-  }
-
-  // Register should fail
-  FAIL_TEST_IF_NOT_ERR(
-      TRITONSERVER_ServerRegisterModelRepository(
-          server_, "models_1", name_map.data(), name_map.size()),
-      TRITONSERVER_ERROR_INVALID_ARG,
-      "failed to register 'models_1', there is a conflicting mapping for "
-      "'name_0'",
-      "registering model repository 'models_1'");
-}
-
-TEST_F(RegisterApiTest, RegisterMulti)
-{
-  // Registering repository "models_0" and "model_1" without mappings,
-  // there are duplicate models but it won't be checked until load
-  std::vector<const TRITONSERVER_Parameter*> name_map;
-  FAIL_TEST_IF_ERR(
-      TRITONSERVER_ServerRegisterModelRepository(
-          server_, "models_0", name_map.data(), name_map.size()),
-      "registering model repository 'models_0'");
-  FAIL_TEST_IF_ERR(
-      TRITONSERVER_ServerRegisterModelRepository(
-          server_, "models_1", name_map.data(), name_map.size()),
-      "registering model repository 'models_1'");
-
-  // Request to load "model_0" which should fail
-  FAIL_TEST_IF_NOT_ERR(
-      TRITONSERVER_ServerLoadModel(server_, "model_0"),
-      TRITONSERVER_ERROR_INTERNAL,
-      "failed to load 'model_0', failed to poll from model repository",
-      "loading model 'model_0'");
-  // Request to load "model_1"
-  FAIL_TEST_IF_ERR(
-      TRITONSERVER_ServerLoadModel(server_, "model_1"),
-      "loading model 'model_1'");
-  bool ready = false;
-  FAIL_TEST_IF_ERR(
-      TRITONSERVER_ServerModelIsReady(server_, "model_1", 3, &ready),
-      "Is 'model_1' ready");
-  ASSERT_TRUE(ready) << "Expect 'model_1' v3 to be ready, model directory is "
-                        "'models_1/model_1'";
-}
-
-TEST_F(RegisterApiTest, RegisterMultiWithMap)
-{
-  // Registering repository "models_0" and "models_1" without mappings,
-  // there are duplicate models but we provides a "override" map for "models_0",
-  // from "model_0" to "model_0" which sets priority to resolve the conflict.
-  std::vector<std::string> override_names{"model_0"};
-  std::vector<std::shared_ptr<TRITONSERVER_Parameter>> managed_params;
-  std::vector<const TRITONSERVER_Parameter*> name_map;
-  managed_params.emplace_back(
-      TRITONSERVER_ParameterNew(
-          "model_0", TRITONSERVER_PARAMETER_STRING, override_names[0].c_str()),
-      TRITONSERVER_ParameterDelete);
-  ASSERT_TRUE(managed_params.back() != nullptr)
-      << "failed to create name mapping pair";
-  name_map.emplace_back(managed_params.back().get());
-  FAIL_TEST_IF_ERR(
-      TRITONSERVER_ServerRegisterModelRepository(
-          server_, "models_0", name_map.data(), name_map.size()),
-      "registering model repository 'models_0'");
-  FAIL_TEST_IF_ERR(
-      TRITONSERVER_ServerRegisterModelRepository(
-          server_, "models_1", nullptr, 0),
-      "registering model repository 'models_1'");
-
-  // Request to load "model_0", "model_1"
-  FAIL_TEST_IF_ERR(
-      TRITONSERVER_ServerLoadModel(server_, "model_0"),
-      "loading model 'model_0'");
-  bool ready = false;
-  FAIL_TEST_IF_ERR(
-      TRITONSERVER_ServerModelIsReady(server_, "model_0", 1, &ready),
-      "Is 'model_0' ready");
-  ASSERT_TRUE(ready) << "Expect 'model_0' v1 to be ready, model directory is "
-                        "'models_0/model_0'";
-
-  FAIL_TEST_IF_ERR(
-      TRITONSERVER_ServerLoadModel(server_, "model_1"),
-      "loading model 'model_1'");
-  ready = false;
-  FAIL_TEST_IF_ERR(
-      TRITONSERVER_ServerModelIsReady(server_, "model_1", 3, &ready),
-      "Is 'model_1' ready");
-  ASSERT_TRUE(ready) << "Expect 'model_1' v3 to be ready, model directory is "
-                        "'models_1/model_1'";
-}
-
-TEST_F(RegisterApiTest, RegisterMultiWithMap2)
-{
-  // Registering repository "models_0" and "model_1s",
-  // there are duplicate models but we provides a map for "models_1"
-  // so they all have different name.
-  std::vector<std::string> override_names{"model_2"};
-  std::vector<std::shared_ptr<TRITONSERVER_Parameter>> managed_params;
-  std::vector<const TRITONSERVER_Parameter*> name_map;
-  managed_params.emplace_back(
-      TRITONSERVER_ParameterNew(
-          "model_0", TRITONSERVER_PARAMETER_STRING, override_names[0].c_str()),
-      TRITONSERVER_ParameterDelete);
-  ASSERT_TRUE(managed_params.back() != nullptr)
-      << "failed to create name mapping pair";
-  name_map.emplace_back(managed_params.back().get());
-  FAIL_TEST_IF_ERR(
-      TRITONSERVER_ServerRegisterModelRepository(
-          server_, "models_0", nullptr, 0),
-      "registering model repository 'models_0'");
-  FAIL_TEST_IF_ERR(
-      TRITONSERVER_ServerRegisterModelRepository(
-          server_, "models_1", name_map.data(), name_map.size()),
-      "registering model repository 'models_1'");
-
-  // Request to load "model_0", "model_1", "model_2"
-  FAIL_TEST_IF_ERR(
-      TRITONSERVER_ServerLoadModel(server_, "model_0"),
-      "loading model 'model_0'");
-  bool ready = false;
-  FAIL_TEST_IF_ERR(
-      TRITONSERVER_ServerModelIsReady(server_, "model_0", 1, &ready),
-      "Is 'model_0' ready");
-  ASSERT_TRUE(ready) << "Expect 'model_0' v1 to be ready, model directory is "
-                        "'models_0/model_0'";
-
-  FAIL_TEST_IF_ERR(
-      TRITONSERVER_ServerLoadModel(server_, "model_1"),
-      "loading model 'model_1'");
-  ready = false;
-  FAIL_TEST_IF_ERR(
-      TRITONSERVER_ServerModelIsReady(server_, "model_1", 3, &ready),
-      "Is 'model_1' ready");
-  ASSERT_TRUE(ready) << "Expect 'model_1' v3 to be ready, model directory is "
-                        "'models_1/model_1'";
-
-  FAIL_TEST_IF_ERR(
-      TRITONSERVER_ServerLoadModel(server_, "model_2"),
-      "loading model 'model_2'");
-  ready = false;
-  FAIL_TEST_IF_ERR(
-      TRITONSERVER_ServerModelIsReady(server_, "model_2", 2, &ready),
-      "Is 'model_2' ready");
-  ASSERT_TRUE(ready) << "Expect 'model_2' v2 to be ready, model directory is "
-                        "'models_1/model_0'";
-}
-
-TEST_F(RegisterApiTest, RegisterMultiWithMap3)
-{
-  // Registering repository "models_0" and "model_1s",
-  // there are duplicate models but we provides a map for both
-  // "models_0" and "models_1" so they all have different name.
-  std::vector<std::string> override_names{"name_0", "name_1"};
-  std::vector<std::shared_ptr<TRITONSERVER_Parameter>> managed_params;
-  for (const auto& name : override_names) {
-    managed_params.emplace_back(
-        TRITONSERVER_ParameterNew(
-            "model_0", TRITONSERVER_PARAMETER_STRING, name.c_str()),
-        TRITONSERVER_ParameterDelete);
-    ASSERT_TRUE(managed_params.back() != nullptr)
-        << "failed to create name mapping pair";
-  }
-  std::vector<const TRITONSERVER_Parameter*> models_0_map{
-      managed_params[0].get()};
-  std::vector<const TRITONSERVER_Parameter*> models_1_map{
-      managed_params[1].get()};
-  FAIL_TEST_IF_ERR(
-      TRITONSERVER_ServerRegisterModelRepository(
-          server_, "models_0", models_0_map.data(), models_0_map.size()),
-      "registering model repository 'models_0'");
-  FAIL_TEST_IF_ERR(
-      TRITONSERVER_ServerRegisterModelRepository(
-          server_, "models_1", models_1_map.data(), models_1_map.size()),
-      "registering model repository 'models_1'");
-
-  // Request to load "model_0", "model_1", "model_2"
-  FAIL_TEST_IF_ERR(
-      TRITONSERVER_ServerLoadModel(server_, "name_0"),
-      "loading model 'name_0'");
-  bool ready = false;
-  FAIL_TEST_IF_ERR(
-      TRITONSERVER_ServerModelIsReady(server_, "name_0", 1, &ready),
-      "Is 'name_0' ready");
-  ASSERT_TRUE(ready) << "Expect 'name_0' v1 to be ready, model directory is "
-                        "'models_0/model_0'";
-
-  FAIL_TEST_IF_ERR(
-      TRITONSERVER_ServerLoadModel(server_, "name_1"),
-      "loading model 'name_1'");
-  ready = false;
-  FAIL_TEST_IF_ERR(
-      TRITONSERVER_ServerModelIsReady(server_, "name_1", 2, &ready),
-      "Is 'name_1' ready");
-  ASSERT_TRUE(ready) << "Expect 'name_1' v2 to be ready, model directory is "
-                        "'models_1/model_0'";
-
-  FAIL_TEST_IF_ERR(
-      TRITONSERVER_ServerLoadModel(server_, "model_1"),
-      "loading model 'model_1'");
-  ready = false;
-  FAIL_TEST_IF_ERR(
-      TRITONSERVER_ServerModelIsReady(server_, "model_1", 3, &ready),
-      "Is 'model_1' ready");
-  ASSERT_TRUE(ready) << "Expect 'model_1' v3 to be ready, model directory is "
-                        "'models_1/model_1'";
-}
-
-TEST_F(RegisterApiTest, RegisterNonExistingRepo)
-{
-  // Register should fail
-  FAIL_TEST_IF_NOT_ERR(
-      TRITONSERVER_ServerRegisterModelRepository(
-          server_, "unknown_repo", nullptr, 0),
-      TRITONSERVER_ERROR_INVALID_ARG,
-      "failed to register 'unknown_repo', repository not found",
-      "registering model repository 'unknown_repo'");
-}
-
-
-TEST_F(RegisterApiTest, UnregisterInvalidRepo)
-{
-  FAIL_TEST_IF_NOT_ERR(
-      TRITONSERVER_ServerUnregisterModelRepository(server_, "unknown_repo"),
-      TRITONSERVER_ERROR_INVALID_ARG,
-      "failed to unregister 'unknown_repo', repository not found",
-      "unregistering model repository 'unknown_repo'");
-}
-
-TEST_F(RegisterApiTest, Unregister)
-{
-  FAIL_TEST_IF_ERR(
-      TRITONSERVER_ServerUnregisterModelRepository(server_, "empty_models"),
-      "unregistering model repository 'empty_models'");
-}
-
-TEST_F(RegisterApiTest, UnregisterTwice)
-{
-  FAIL_TEST_IF_ERR(
-      TRITONSERVER_ServerUnregisterModelRepository(server_, "empty_models"),
-      "unregistering model repository 'empty_models'");
-  FAIL_TEST_IF_NOT_ERR(
-      TRITONSERVER_ServerUnregisterModelRepository(server_, "empty_models"),
-      TRITONSERVER_ERROR_INVALID_ARG,
-      "failed to unregister 'empty_models', repository not found",
-      "unregistering model repository 'empty_models'");
-}
-
-TEST_F(RegisterApiTest, UnregisterWithLoadedModel)
-{
-  // Registering a repository "models_0" where contains "model_0"
-  FAIL_TEST_IF_ERR(
-      TRITONSERVER_ServerRegisterModelRepository(
-          server_, "models_0", nullptr, 0),
-      "registering model repository 'models_0'");
-  // Request to load "model_0"
-  FAIL_TEST_IF_ERR(
-      TRITONSERVER_ServerLoadModel(server_, "model_0"),
-      "loading model 'model_0'");
-
-  // Unregister and the model should still be loaded
-  FAIL_TEST_IF_ERR(
-      TRITONSERVER_ServerUnregisterModelRepository(server_, "models_0"),
-      "unregistering model repository 'models_0'");
-
-  bool ready = false;
-  FAIL_TEST_IF_ERR(
-      TRITONSERVER_ServerModelIsReady(server_, "model_0", 1, &ready),
-      "Is 'model_0' ready");
-  ASSERT_TRUE(ready) << "Expect 'model_0' v1 to be ready, model directory is "
-                        "'models_0/model_0'";
-
-  // Request to load "model_0" which should fail
-  FAIL_TEST_IF_NOT_ERR(
-      TRITONSERVER_ServerLoadModel(server_, "model_0"),
-      TRITONSERVER_ERROR_INTERNAL,
-      "failed to load 'model_0', failed to poll from model repository",
-      "loading model 'model_0'");
-}
-
-TEST_F(RegisterApiTest, MultiRegister)
-{
-  // Register / unregister a repository "models_0"
-  FAIL_TEST_IF_ERR(
-      TRITONSERVER_ServerRegisterModelRepository(
-          server_, "models_0", nullptr, 0),
-      "registering model repository 'models_0'");
-  FAIL_TEST_IF_ERR(
-      TRITONSERVER_ServerUnregisterModelRepository(server_, "models_0"),
-      "unregistering model repository 'models_0'");
-  // Register / unregister "models_0" again
-  FAIL_TEST_IF_ERR(
-      TRITONSERVER_ServerRegisterModelRepository(
-          server_, "models_0", nullptr, 0),
-      "registering model repository 'models_0'");
-  FAIL_TEST_IF_ERR(
-      TRITONSERVER_ServerUnregisterModelRepository(server_, "models_0"),
-      "unregistering model repository 'models_0'");
-}
-
-TEST_F(RegisterApiTest, RegisterMulti2)
-{
-  // Registering repository "models_0" and "model_1" without mappings,
-  // there are duplicate models but it won't be checked until load
-  std::vector<const TRITONSERVER_Parameter*> name_map;
-  FAIL_TEST_IF_ERR(
-      TRITONSERVER_ServerRegisterModelRepository(
-          server_, "models_0", name_map.data(), name_map.size()),
-      "registering model repository 'models_0'");
-  FAIL_TEST_IF_ERR(
-      TRITONSERVER_ServerRegisterModelRepository(
-          server_, "models_1", name_map.data(), name_map.size()),
-      "registering model repository 'models_1'");
-
-  // Request to load "model_0" which should fail
-  FAIL_TEST_IF_NOT_ERR(
-      TRITONSERVER_ServerLoadModel(server_, "model_0"),
-      TRITONSERVER_ERROR_INTERNAL,
-      "failed to load 'model_0', failed to poll from model repository",
-      "loading model 'model_0'");
-  // Request to load "model_1"
-  FAIL_TEST_IF_ERR(
-      TRITONSERVER_ServerLoadModel(server_, "model_1"),
-      "loading model 'model_1'");
-
-  // Unregister one of the repos and 'model_0' can be loaded as there is no
-  // confliction
-  FAIL_TEST_IF_ERR(
-      TRITONSERVER_ServerUnregisterModelRepository(server_, "models_1"),
-      "unregistering model repository 'models_1'");
-  // Request to load "model_0"
-  FAIL_TEST_IF_ERR(
-      TRITONSERVER_ServerLoadModel(server_, "model_0"),
-      "loading model 'model_0'");
-
-  bool ready = false;
-  FAIL_TEST_IF_ERR(
-      TRITONSERVER_ServerModelIsReady(server_, "model_0", 1, &ready),
-      "Is 'model_0' ready");
-  ASSERT_TRUE(ready) << "Expect 'model_0' v1 to be ready, model directory is "
-                        "'models_0/model_0'";
-
-  ready = false;
-  FAIL_TEST_IF_ERR(
-      TRITONSERVER_ServerModelIsReady(server_, "model_1", 3, &ready),
-      "Is 'model_1' ready");
-  ASSERT_TRUE(ready) << "Expect 'model_1' v3 to be ready, model directory is "
-                        "'models_1/model_1'";
-}
-
-TEST_F(RegisterApiTest, DifferentMapping)
-{
-  // With register and unregister, user can update a mapping for specific repo.
-  std::vector<std::string> override_names{"name_0"};
-  std::vector<std::shared_ptr<TRITONSERVER_Parameter>> managed_params;
-  std::vector<const TRITONSERVER_Parameter*> name_map;
-  managed_params.emplace_back(
-      TRITONSERVER_ParameterNew(
-          "model_0", TRITONSERVER_PARAMETER_STRING, override_names[0].c_str()),
-      TRITONSERVER_ParameterDelete);
-  ASSERT_TRUE(managed_params.back() != nullptr)
-      << "failed to create name mapping pair";
-  name_map.emplace_back(managed_params.back().get());
-
-  // First register without mapping
-  FAIL_TEST_IF_ERR(
-      TRITONSERVER_ServerRegisterModelRepository(
-          server_, "models_0", nullptr, 0),
-      "registering model repository 'models_0'");
-  // Request to load "model_0"
-  FAIL_TEST_IF_ERR(
-      TRITONSERVER_ServerLoadModel(server_, "model_0"),
-      "loading model 'model_0'");
-
-  // Re-register with mapping
-  FAIL_TEST_IF_ERR(
-      TRITONSERVER_ServerUnregisterModelRepository(server_, "models_0"),
-      "unregistering model repository 'models_0'");
-  FAIL_TEST_IF_ERR(
-      TRITONSERVER_ServerRegisterModelRepository(
-          server_, "models_0", name_map.data(), name_map.size()),
-      "registering model repository 'models_0'");
-  // Request to load "model_0" will fail, but load "name_0" is okay
-  FAIL_TEST_IF_NOT_ERR(
-      TRITONSERVER_ServerLoadModel(server_, "model_0"),
-      TRITONSERVER_ERROR_INTERNAL,
-      "failed to load 'model_0', failed to poll from model repository",
-      "loading model 'model_0'");
-  FAIL_TEST_IF_ERR(
-      TRITONSERVER_ServerLoadModel(server_, "name_0"),
-      "loading model 'name_0'");
-
-  bool ready = false;
-  FAIL_TEST_IF_ERR(
-      TRITONSERVER_ServerModelIsReady(server_, "name_0", 1, &ready),
-      "Is 'name_0' ready");
-  ASSERT_TRUE(ready) << "Expect 'name_0' v1 to be ready, model directory is "
-                        "'models_0/model_0'";
-
-  // Verify that model_0 still exists in-memory
-  ready = false;
-  FAIL_TEST_IF_ERR(
-      TRITONSERVER_ServerModelIsReady(server_, "model_0", 1, &ready),
-      "Is 'model_0' ready");
-  ASSERT_TRUE(ready) << "Expect 'model_0' v1 to be ready, model directory is "
-                        "'models_0/model_0'";
-}
-
-TEST_F(RegisterApiTest, CorrectIndex)
-{
-  // Registering a repository "models_0" where contains "model_0", but with
-  // different name mapping
-  const char* override_name = "name_0";
-  std::shared_ptr<TRITONSERVER_Parameter> managed_param(
-      TRITONSERVER_ParameterNew(
-          "model_0", TRITONSERVER_PARAMETER_STRING, override_name),
-      TRITONSERVER_ParameterDelete);
-  ASSERT_TRUE(managed_param != nullptr) << "failed to create name mapping pair";
-  std::vector<const TRITONSERVER_Parameter*> name_map{managed_param.get()};
-
-  FAIL_TEST_IF_ERR(
-      TRITONSERVER_ServerRegisterModelRepository(
-          server_, "models_0", name_map.data(), name_map.size()),
-      "registering model repository 'models_0'");
-
-  // Request to load "model_0" which should fail
-  FAIL_TEST_IF_NOT_ERR(
-      TRITONSERVER_ServerLoadModel(server_, "model_0"),
-      TRITONSERVER_ERROR_INTERNAL,
-      "failed to load 'model_0', failed to poll from model repository",
-      "loading model 'model_0'");
-  // Request to load "name_0"
-  FAIL_TEST_IF_ERR(
-      TRITONSERVER_ServerLoadModel(server_, "name_0"),
-      "loading model 'name_0'");
-  bool ready = false;
-  FAIL_TEST_IF_ERR(
-      TRITONSERVER_ServerModelIsReady(server_, "name_0", 1, &ready),
-      "Is 'name_0' v1 ready");
-  ASSERT_TRUE(ready) << "Expect 'name_0' v1 to be ready, model directory is "
-                        "'models_0/model_0'";
-
-  TRITONSERVER_Message* repository_index;
-  FAIL_TEST_IF_ERR(
-      TRITONSERVER_ServerModelIndex(server_, 1, &repository_index),
-      "checking model indexes");
-  const char* base = nullptr;
-  size_t byte_size = 0;
-  FAIL_TEST_IF_ERR(
-      TRITONSERVER_MessageSerializeToJson(repository_index, &base, &byte_size),
-      "serializing index to Json");
-  const std::string search_msg =
-      "[{\"name\":\"name_0\",\"version\":\"1\",\"state\":\"READY\"}]";
-  const std::string serialized_index(base, byte_size);
-  EXPECT_EQ(serialized_index, search_msg)
-      << "Returned index does not equal expected index";
-}
-
-TEST_F(RegisterApiTest, CorrectIndexNotLoaded)
-{
-  // Registering a repository "models_0" where contains "model_0", but with
-  // different name mapping
-  const char* override_name = "name_0";
-  std::shared_ptr<TRITONSERVER_Parameter> managed_param(
-      TRITONSERVER_ParameterNew(
-          "model_0", TRITONSERVER_PARAMETER_STRING, override_name),
-      TRITONSERVER_ParameterDelete);
-  ASSERT_TRUE(managed_param != nullptr) << "failed to create name mapping pair";
-  std::vector<const TRITONSERVER_Parameter*> name_map{managed_param.get()};
-
-  FAIL_TEST_IF_ERR(
-      TRITONSERVER_ServerRegisterModelRepository(
-          server_, "models_0", name_map.data(), name_map.size()),
-      "registering model repository 'models_0'");
-
-  TRITONSERVER_Message* repository_index;
-  FAIL_TEST_IF_ERR(
-      TRITONSERVER_ServerModelIndex(server_, 0, &repository_index),
-      "checking model indexes");
-  const char* base = nullptr;
-  size_t byte_size = 0;
-  FAIL_TEST_IF_ERR(
-      TRITONSERVER_MessageSerializeToJson(repository_index, &base, &byte_size),
-      "serializing index to Json");
-  const std::string search_msg = "[{\"name\":\"name_0\"}]";
-  const std::string serialized_index(base, byte_size);
-  EXPECT_EQ(serialized_index, search_msg)
-      << "Returned index does not equal expected index";
-}
-
-// // Test Fixture that runs server with POLLING mode
-class PollingRegisterApiTest : public ::testing::Test {
- protected:
-  void SetUp() override
-  {
-    // Create running server object.
-    TRITONSERVER_ServerOptions* server_options = nullptr;
-    FAIL_TEST_IF_ERR(
-        TRITONSERVER_ServerOptionsNew(&server_options),
-        "creating server options");
-    // Triton expects at least one model repository is set at start, set to
-    // an empty repository set ModelControlMode to EXPLICIT to avoid attempting
-    // to load models.
-    FAIL_TEST_IF_ERR(
-        TRITONSERVER_ServerOptionsSetModelRepositoryPath(
-            server_options, "empty_models"),
-        "setting model repository path");
-    FAIL_TEST_IF_ERR(
-        TRITONSERVER_ServerOptionsSetModelControlMode(
-            server_options, TRITONSERVER_MODEL_CONTROL_POLL),
-        "setting model control mode");
-    FAIL_TEST_IF_ERR(
-        TRITONSERVER_ServerNew(&server_, server_options), "creating server");
-    FAIL_TEST_IF_ERR(
-        TRITONSERVER_ServerOptionsDelete(server_options),
-        "deleting server options");
-    ASSERT_TRUE(server_ != nullptr) << "server not created";
-    bool live = false;
-    for (int i = 10; ((i > 0) && !live); --i) {
-      FAIL_TEST_IF_ERR(
-          TRITONSERVER_ServerIsLive(server_, &live), "Is server live");
-    }
-    ASSERT_TRUE(live) << "server not live";
-  }
-
-  void TearDown() override
-  {
-    FAIL_TEST_IF_ERR(TRITONSERVER_ServerDelete(server_), "deleting server");
-  }
-
-  TRITONSERVER_Server* server_ = nullptr;
-};
-
-TEST_F(PollingRegisterApiTest, unsupport)
-{
-  FAIL_TEST_IF_NOT_ERR(
-      TRITONSERVER_ServerRegisterModelRepository(
-          server_, "empty_models", nullptr, 0),
-      TRITONSERVER_ERROR_UNSUPPORTED,
-      "repository registration is not allowed if model control mode is not "
-      "EXPLICIT",
-      "registering model repository 'empty_models'");
-  FAIL_TEST_IF_NOT_ERR(
-      TRITONSERVER_ServerUnregisterModelRepository(server_, "empty_models"),
-      TRITONSERVER_ERROR_UNSUPPORTED,
-      "repository unregistration is not allowed if model control mode is not "
-      "EXPLICIT",
-      "unregistering model repository 'empty_models'");
-}
-
-// Test Fixture that runs server with NONE mode
-class NoneRegisterApiTest : public ::testing::Test {
- protected:
-  void SetUp() override
-  {
-    // Create running server object.
-    TRITONSERVER_ServerOptions* server_options = nullptr;
-    FAIL_TEST_IF_ERR(
-        TRITONSERVER_ServerOptionsNew(&server_options),
-        "creating server options");
-    // Triton expects at least one model repository is set at start, set to
-    // an empty repository set ModelControlMode to EXPLICIT to avoid attempting
-    // to load models.
-    FAIL_TEST_IF_ERR(
-        TRITONSERVER_ServerOptionsSetModelRepositoryPath(
-            server_options, "empty_models"),
-        "setting model repository path");
-    FAIL_TEST_IF_ERR(
-        TRITONSERVER_ServerOptionsSetModelControlMode(
-            server_options, TRITONSERVER_MODEL_CONTROL_NONE),
-        "setting model control mode");
-    FAIL_TEST_IF_ERR(
-        TRITONSERVER_ServerNew(&server_, server_options), "creating server");
-    FAIL_TEST_IF_ERR(
-        TRITONSERVER_ServerOptionsDelete(server_options),
-        "deleting server options");
-    ASSERT_TRUE(server_ != nullptr) << "server not created";
-    bool live = false;
-    for (int i = 10; ((i > 0) && !live); --i) {
-      FAIL_TEST_IF_ERR(
-          TRITONSERVER_ServerIsLive(server_, &live), "Is server live");
-    }
-    ASSERT_TRUE(live) << "server not live";
-  }
-
-  void TearDown() override
-  {
-    FAIL_TEST_IF_ERR(TRITONSERVER_ServerDelete(server_), "deleting server");
-  }
-
-  TRITONSERVER_Server* server_ = nullptr;
-};
-
-TEST_F(NoneRegisterApiTest, unsupport)
-{
-  FAIL_TEST_IF_NOT_ERR(
-      TRITONSERVER_ServerRegisterModelRepository(
-          server_, "empty_models", nullptr, 0),
-      TRITONSERVER_ERROR_UNSUPPORTED,
-      "repository registration is not allowed if model control mode is not "
-      "EXPLICIT",
-      "registering model repository 'empty_models'");
-  FAIL_TEST_IF_NOT_ERR(
-      TRITONSERVER_ServerUnregisterModelRepository(server_, "empty_models"),
-      TRITONSERVER_ERROR_UNSUPPORTED,
-      "repository unregistration is not allowed if model control mode is not "
-      "EXPLICIT",
-      "unregistering model repository 'empty_models'");
-}
-
-}  // namespace
-
-int
-main(int argc, char** argv)
-{
-  ::testing::InitGoogleTest(&argc, argv);
-  return RUN_ALL_TESTS();
-}
diff --git a/3rdparty/core-r22.12/src/test/repo_agent_test.cc b/3rdparty/core-r22.12/src/test/repo_agent_test.cc
deleted file mode 100644
index 76d72e22db69cb336d4caed4c6f4eb5a94fc53dc..0000000000000000000000000000000000000000
--- a/3rdparty/core-r22.12/src/test/repo_agent_test.cc
+++ /dev/null
@@ -1,2365 +0,0 @@
-// Copyright 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#include "gtest/gtest.h"
-
-#include <sys/stat.h>
-#include <sys/types.h>
-#include <fstream>
-#include <functional>
-#include <future>
-#include <map>
-#include <memory>
-#include "filesystem.h"
-#include "repo_agent.h"
-#include "server_message.h"
-#include "shared_library.h"
-
-namespace tc = triton::core;
-
-namespace {
-
-//
-// Duplication of TRITONSERVER_Error implementation
-//
-class TritonServerError {
- public:
-  static TRITONSERVER_Error* Create(
-      TRITONSERVER_Error_Code code, const char* msg);
-  static TRITONSERVER_Error* Create(const tc::Status& status);
-
-  TRITONSERVER_Error_Code Code() const { return code_; }
-  const std::string& Message() const { return msg_; }
-
- private:
-  TritonServerError(TRITONSERVER_Error_Code code, const std::string& msg)
-      : code_(code), msg_(msg)
-  {
-  }
-  TritonServerError(TRITONSERVER_Error_Code code, const char* msg)
-      : code_(code), msg_(msg)
-  {
-  }
-
-  TRITONSERVER_Error_Code code_;
-  const std::string msg_;
-};
-
-TRITONSERVER_Error*
-TritonServerError::Create(TRITONSERVER_Error_Code code, const char* msg)
-{
-  return reinterpret_cast<TRITONSERVER_Error*>(
-      new TritonServerError(code, msg));
-}
-
-TRITONSERVER_Error*
-TritonServerError::Create(const tc::Status& status)
-{
-  // If 'status' is success then return nullptr as that indicates
-  // success
-  if (status.IsOk()) {
-    return nullptr;
-  }
-
-  return Create(
-      tc::StatusCodeToTritonCode(status.StatusCode()),
-      status.Message().c_str());
-}
-
-class MockSharedLibraryHandle {
- public:
-  bool AddEntryPoint(const std::string& name, void* fn)
-  {
-    auto it = entry_points_.find(name);
-    if (it == entry_points_.end()) {
-      entry_points_.emplace(name, fn).second;
-      return true;
-    } else {
-      it->second = fn;
-      return false;
-    }
-  }
-
-  bool GetEntryPoint(const std::string& name, void** fn)
-  {
-    auto it = entry_points_.find(name);
-    if (it != entry_points_.end()) {
-      *fn = it->second;
-      return true;
-    }
-    return false;
-  }
-
- private:
-  std::map<std::string, void*> entry_points_;
-};
-
-static std::map<std::string, MockSharedLibraryHandle> global_mock_agents;
-
-}  // namespace
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-TRITONSERVER_Error*
-TRITONSERVER_ErrorNew(TRITONSERVER_Error_Code code, const char* msg)
-{
-  return reinterpret_cast<TRITONSERVER_Error*>(
-      TritonServerError::Create(code, msg));
-}
-
-void
-TRITONSERVER_ErrorDelete(TRITONSERVER_Error* error)
-{
-  TritonServerError* lerror = reinterpret_cast<TritonServerError*>(error);
-  delete lerror;
-}
-
-TRITONSERVER_Error_Code
-TRITONSERVER_ErrorCode(TRITONSERVER_Error* error)
-{
-  TritonServerError* lerror = reinterpret_cast<TritonServerError*>(error);
-  return lerror->Code();
-}
-
-const char*
-TRITONSERVER_ErrorCodeString(TRITONSERVER_Error* error)
-{
-  TritonServerError* lerror = reinterpret_cast<TritonServerError*>(error);
-  return tc::Status::CodeString(tc::TritonCodeToStatusCode(lerror->Code()));
-}
-
-const char*
-TRITONSERVER_ErrorMessage(TRITONSERVER_Error* error)
-{
-  TritonServerError* lerror = reinterpret_cast<TritonServerError*>(error);
-  return lerror->Message().c_str();
-}
-
-//
-// TRITONSERVER_Message
-//
-TRITONSERVER_Error*
-TRITONSERVER_MessageNewFromSerializedJson(
-    TRITONSERVER_Message** message, const char* base, size_t byte_size)
-{
-  *message = reinterpret_cast<TRITONSERVER_Message*>(
-      new tc::TritonServerMessage({base, byte_size}));
-  return nullptr;
-}
-
-TRITONSERVER_Error*
-TRITONSERVER_MessageSerializeToJson(
-    TRITONSERVER_Message* message, const char** base, size_t* byte_size)
-{
-  tc::TritonServerMessage* lmessage =
-      reinterpret_cast<tc::TritonServerMessage*>(message);
-  lmessage->Serialize(base, byte_size);
-  return nullptr;  // Success
-}
-
-#ifdef __cplusplus
-}
-#endif
-
-namespace triton { namespace core {
-
-Status
-SharedLibrary::Acquire(std::unique_ptr<SharedLibrary>* slib)
-{
-  slib->reset(new SharedLibrary());
-  return Status::Success;
-}
-
-SharedLibrary::~SharedLibrary() {}
-Status
-SharedLibrary::SetLibraryDirectory(const std::string& path)
-{
-  return Status::Success;
-}
-Status
-SharedLibrary::ResetLibraryDirectory()
-{
-  return Status::Success;
-}
-Status
-SharedLibrary::OpenLibraryHandle(const std::string& path, void** handle)
-{
-  auto it = global_mock_agents.find(path);
-  if (it != global_mock_agents.end()) {
-    *handle = reinterpret_cast<void*>(&it->second);
-    return Status::Success;
-  }
-  return Status(
-      Status::Code::NOT_FOUND,
-      "unable to load shared library: mock shared library is not set for "
-      "path " +
-          path);
-}
-
-Status
-SharedLibrary::CloseLibraryHandle(void* handle)
-{
-  for (auto& global_mock_agent : global_mock_agents) {
-    if (reinterpret_cast<void*>(&global_mock_agent.second) == handle) {
-      return Status::Success;
-    }
-  }
-  return Status(
-      Status::Code::NOT_FOUND,
-      "unable to unload shared library: handle does not matach any mock shared "
-      "library");
-}
-
-Status
-SharedLibrary::GetEntrypoint(
-    void* handle, const std::string& name, const bool optional, void** fn)
-{
-  auto mock_agent = reinterpret_cast<MockSharedLibraryHandle*>(handle);
-  bool found = mock_agent->GetEntryPoint(name, fn);
-  if (!optional && !found) {
-    return Status(
-        Status::Code::NOT_FOUND,
-        "unable to find required entrypoint '" + name + "' in shared library");
-  }
-  return Status::Success;
-}
-
-}}  // namespace triton::core
-
-namespace {
-
-class TritonRepoAgentTest : public ::testing::Test {
- protected:
-  void TearDown() override { global_mock_agents.clear(); }
-};
-
-TEST_F(TritonRepoAgentTest, Create)
-{
-  // Set up agent with only action function defined, check agent properties
-  tc::TritonRepoAgent::TritonRepoAgentModelActionFn_t CheckNameModelActionFn =
-      [](TRITONREPOAGENT_Agent* agent, TRITONREPOAGENT_AgentModel* model,
-         const TRITONREPOAGENT_ActionType action_type) -> TRITONSERVER_Error* {
-    auto lagent = reinterpret_cast<tc::TritonRepoAgent*>(agent);
-    EXPECT_EQ(lagent->Name(), "minimal_agent")
-        << "Expect action function is called with minimal agent";
-    return nullptr;
-  };
-  auto agent_handle = MockSharedLibraryHandle();
-  agent_handle.AddEntryPoint(
-      "TRITONREPOAGENT_ModelAction",
-      reinterpret_cast<void*>(CheckNameModelActionFn));
-  global_mock_agents.emplace("minimal_agent_path", agent_handle);
-
-  std::shared_ptr<tc::TritonRepoAgent> minimal_agent;
-  auto status = tc::TritonRepoAgent::Create(
-      "minimal_agent", "minimal_agent_path", &minimal_agent);
-  ASSERT_TRUE(status.IsOk())
-      << "Expect successful agent creation: " << status.AsString();
-
-  ASSERT_TRUE(minimal_agent->AgentModelActionFn() != nullptr)
-      << "Expect action function is provided";
-  EXPECT_TRUE(minimal_agent->AgentModelInitFn() == nullptr)
-      << "Unexpect model init function is provided";
-  EXPECT_TRUE(minimal_agent->AgentModelFiniFn() == nullptr)
-      << "Unexpect model fini function is provided";
-
-  auto err = minimal_agent->AgentModelActionFn()(
-      reinterpret_cast<TRITONREPOAGENT_Agent*>(minimal_agent.get()), nullptr,
-      TRITONREPOAGENT_ACTION_LOAD);
-  EXPECT_TRUE(err == nullptr) << "Expect successful action function invocation";
-}
-
-TEST_F(TritonRepoAgentTest, CreateFailInvalidSharedLibrary)
-{
-  // Passing a agent path that is not in global_mock_agents to
-  // simulate failure on opening shared library handle
-  std::shared_ptr<tc::TritonRepoAgent> invalid_agent;
-  auto status = tc::TritonRepoAgent::Create(
-      "invalid_agent", "invalid_agent_path", &invalid_agent);
-  ASSERT_FALSE(status.IsOk()) << "Unexpect successful agent creation";
-  EXPECT_NE(
-      status.Message().find("unable to load shared library"), std::string::npos)
-      << "Unexpect error message: '" << status.Message()
-      << "', expect 'unable to load shared library...'";
-}
-
-TEST_F(TritonRepoAgentTest, CreateFailMissingEndpoint)
-{
-  // Set up agent with nothing defined
-  auto agent_handle = MockSharedLibraryHandle();
-  global_mock_agents.emplace("invalid_agent_path", agent_handle);
-
-  std::shared_ptr<tc::TritonRepoAgent> invalid_agent;
-  auto status = tc::TritonRepoAgent::Create(
-      "invalid_agent", "invalid_agent_path", &invalid_agent);
-  ASSERT_FALSE(status.IsOk()) << "Unexpect successful agent creation";
-  EXPECT_NE(
-      status.Message().find("unable to find required entrypoint"),
-      std::string::npos)
-      << "Unexpect error message: '" << status.Message()
-      << "', expect 'unable to find required entrypoint...'";
-}
-
-TEST_F(TritonRepoAgentTest, Lifecycle)
-{
-  // Set up agent with init / fini function defined
-  tc::TritonRepoAgent::TritonRepoAgentInitFn_t InitFn =
-      [](TRITONREPOAGENT_Agent* agent) -> TRITONSERVER_Error* {
-    auto lagent = reinterpret_cast<tc::TritonRepoAgent*>(agent);
-    EXPECT_TRUE(lagent->State() == nullptr)
-        << "Expect agent state is not set before initialization";
-    bool* state = new bool(false);
-    lagent->SetState(reinterpret_cast<void*>(state));
-    return nullptr;
-  };
-  tc::TritonRepoAgent::TritonRepoAgentFiniFn_t FiniFn =
-      [](TRITONREPOAGENT_Agent* agent) -> TRITONSERVER_Error* {
-    auto lagent = reinterpret_cast<tc::TritonRepoAgent*>(agent);
-    bool* state = reinterpret_cast<bool*>(lagent->State());
-    EXPECT_TRUE(state != nullptr) << "Expect agent state is set";
-    EXPECT_TRUE(*state) << "Expect state is set to true";
-    delete state;
-    return nullptr;
-  };
-  tc::TritonRepoAgent::TritonRepoAgentModelActionFn_t ActionFn =
-      [](TRITONREPOAGENT_Agent* agent, TRITONREPOAGENT_AgentModel* model,
-         const TRITONREPOAGENT_ActionType action_type) -> TRITONSERVER_Error* {
-    auto lagent = reinterpret_cast<tc::TritonRepoAgent*>(agent);
-    bool* state = reinterpret_cast<bool*>(lagent->State());
-    EXPECT_TRUE(state != nullptr) << "Expect agent state is set";
-    EXPECT_FALSE(*state) << "Expect state is set to false";
-    *state = true;
-    return nullptr;
-  };
-  auto agent_handle = MockSharedLibraryHandle();
-  agent_handle.AddEntryPoint(
-      "TRITONREPOAGENT_Initialize", reinterpret_cast<void*>(InitFn));
-  agent_handle.AddEntryPoint(
-      "TRITONREPOAGENT_Finalize", reinterpret_cast<void*>(FiniFn));
-  agent_handle.AddEntryPoint(
-      "TRITONREPOAGENT_ModelAction", reinterpret_cast<void*>(ActionFn));
-  global_mock_agents.emplace("agent_path", agent_handle);
-
-  std::shared_ptr<tc::TritonRepoAgent> agent;
-  auto status = tc::TritonRepoAgent::Create("agent", "agent_path", &agent);
-  ASSERT_TRUE(status.IsOk())
-      << "Expect successful agent creation: " << status.AsString();
-
-  ASSERT_TRUE(agent->AgentModelActionFn() != nullptr)
-      << "Expect action function is provided";
-  EXPECT_TRUE(agent->AgentModelInitFn() == nullptr)
-      << "Unexpect model init function is provided";
-  EXPECT_TRUE(agent->AgentModelFiniFn() == nullptr)
-      << "Unexpect model fini function is provided";
-
-  auto err = agent->AgentModelActionFn()(
-      reinterpret_cast<TRITONREPOAGENT_Agent*>(agent.get()), nullptr,
-      TRITONREPOAGENT_ACTION_LOAD);
-  EXPECT_TRUE(err == nullptr) << "Expect successful action function invocation";
-  // Cause destructor to be called
-  agent.reset();
-}
-
-TEST_F(TritonRepoAgentTest, ModelLifecycle)
-{
-  // Set up agent with model init / fini function defined
-  tc::TritonRepoAgent::TritonRepoAgentModelInitFn_t InitFn =
-      [](TRITONREPOAGENT_Agent* agent,
-         TRITONREPOAGENT_AgentModel* model) -> TRITONSERVER_Error* {
-    auto lmodel_state =
-        reinterpret_cast<std::pair<std::promise<void>*, std::future<void>*>*>(
-            model);
-    lmodel_state->first->set_value();
-    return nullptr;
-  };
-  tc::TritonRepoAgent::TritonRepoAgentModelFiniFn_t FiniFn =
-      [](TRITONREPOAGENT_Agent* agent,
-         TRITONREPOAGENT_AgentModel* model) -> TRITONSERVER_Error* {
-    auto lmodel_state =
-        reinterpret_cast<std::pair<std::promise<void>*, std::future<void>*>*>(
-            model);
-    lmodel_state->second->get();
-    return nullptr;
-  };
-  tc::TritonRepoAgent::TritonRepoAgentModelActionFn_t ActionFn =
-      [](TRITONREPOAGENT_Agent* agent, TRITONREPOAGENT_AgentModel* model,
-         const TRITONREPOAGENT_ActionType action_type) -> TRITONSERVER_Error* {
-    auto lmodel_state =
-        reinterpret_cast<std::pair<std::promise<void>*, std::future<void>*>*>(
-            model);
-    EXPECT_TRUE(lmodel_state->second->valid()) << "Expect promise value is set";
-    return nullptr;
-  };
-  auto agent_handle = MockSharedLibraryHandle();
-  agent_handle.AddEntryPoint(
-      "TRITONREPOAGENT_ModelInitialize", reinterpret_cast<void*>(InitFn));
-  agent_handle.AddEntryPoint(
-      "TRITONREPOAGENT_ModelFinalize", reinterpret_cast<void*>(FiniFn));
-  agent_handle.AddEntryPoint(
-      "TRITONREPOAGENT_ModelAction", reinterpret_cast<void*>(ActionFn));
-  global_mock_agents.emplace("agent_path", agent_handle);
-
-  std::shared_ptr<tc::TritonRepoAgent> agent;
-  auto status = tc::TritonRepoAgent::Create("agent", "agent_path", &agent);
-  ASSERT_TRUE(status.IsOk())
-      << "Expect successful agent creation: " << status.AsString();
-
-  ASSERT_TRUE(agent->AgentModelActionFn() != nullptr)
-      << "Expect action function is provided";
-  ASSERT_TRUE(agent->AgentModelInitFn() != nullptr)
-      << "Expect model init function is provided";
-  ASSERT_TRUE(agent->AgentModelFiniFn() != nullptr)
-      << "Expect model fini function is provided";
-
-  std::promise<void> p;
-  auto f = p.get_future();
-  auto model_state = std::make_pair(&p, &f);
-  // Simulate the model lifecycle
-  auto err = agent->AgentModelInitFn()(
-      reinterpret_cast<TRITONREPOAGENT_Agent*>(agent.get()),
-      reinterpret_cast<TRITONREPOAGENT_AgentModel*>(&model_state));
-  EXPECT_TRUE(err == nullptr)
-      << "Expect successful model init function invocation";
-  err = agent->AgentModelActionFn()(
-      reinterpret_cast<TRITONREPOAGENT_Agent*>(agent.get()),
-      reinterpret_cast<TRITONREPOAGENT_AgentModel*>(&model_state),
-      TRITONREPOAGENT_ACTION_LOAD);
-  EXPECT_TRUE(err == nullptr) << "Expect successful action function invocation";
-  err = agent->AgentModelFiniFn()(
-      reinterpret_cast<TRITONREPOAGENT_Agent*>(agent.get()),
-      reinterpret_cast<TRITONREPOAGENT_AgentModel*>(&model_state));
-  EXPECT_TRUE(err == nullptr)
-      << "Expect successful model fini function invocation";
-  EXPECT_FALSE(f.valid()) << "Expect future value is retrieved";
-}
-
-class TritonRepoAgentManagerTest : public ::testing::Test {
- public:
-  static size_t agent_init_counter_;
-  static size_t agent_fini_counter_;
-
- protected:
-  void SetUp() override
-  {
-    // Set up agent with init / fini function defined
-    tc::TritonRepoAgent::TritonRepoAgentInitFn_t InitFn =
-        [](TRITONREPOAGENT_Agent* agent) -> TRITONSERVER_Error* {
-      agent_init_counter_++;
-      return nullptr;
-    };
-    tc::TritonRepoAgent::TritonRepoAgentFiniFn_t FiniFn =
-        [](TRITONREPOAGENT_Agent* agent) -> TRITONSERVER_Error* {
-      agent_fini_counter_++;
-      return nullptr;
-    };
-    tc::TritonRepoAgent::TritonRepoAgentModelActionFn_t ActionFn =
-        [](TRITONREPOAGENT_Agent* agent, TRITONREPOAGENT_AgentModel* model,
-           const TRITONREPOAGENT_ActionType action_type)
-        -> TRITONSERVER_Error* { return nullptr; };
-    auto agent_handle = MockSharedLibraryHandle();
-    agent_handle.AddEntryPoint(
-        "TRITONREPOAGENT_Initialize", reinterpret_cast<void*>(InitFn));
-    agent_handle.AddEntryPoint(
-        "TRITONREPOAGENT_Finalize", reinterpret_cast<void*>(FiniFn));
-    agent_handle.AddEntryPoint(
-        "TRITONREPOAGENT_ModelAction", reinterpret_cast<void*>(ActionFn));
-
-    // Reserve valid shared library paths because manager searches the libraries
-    // via the FileSystem API
-    const tc::FileSystemType type = tc::FileSystemType::LOCAL;
-    auto status = tc::MakeTemporaryDirectory(type, &root_agent_path_);
-    ASSERT_TRUE(status.IsOk()) << "TritonRepoAgentManagerTest set up failed: "
-                                  "create temporary directory: "
-                               << status.AsString();
-    // FIXME make the following platform independent
-    global_agent_path_ = tc::JoinPath({root_agent_path_, "global"});
-    int err = mkdir(
-        global_agent_path_.c_str(), S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH);
-    ASSERT_EQ(err, 0) << "TritonRepoAgentManagerTest set up failed: create "
-                         "global agent directory: "
-                      << err;
-    const std::set<std::string> agent_names{"global_agent"};
-    for (const auto& agent_name : agent_names) {
-      auto global_path_to_agent =
-          tc::JoinPath({global_agent_path_, agent_name});
-      auto global_agent = tc::JoinPath(
-          {global_path_to_agent, tc::TritonRepoAgentLibraryName(agent_name)});
-      err = mkdir(
-          global_path_to_agent.c_str(), S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH);
-      ASSERT_EQ(err, 0) << "TritonRepoAgentManagerTest set up failed: create "
-                           "global agent directory: "
-                        << err;
-      std::ofstream global_agent_file(global_agent);
-      global_mock_agents.emplace(global_agent, agent_handle);
-    }
-    status =
-        tc::TritonRepoAgentManager::SetGlobalSearchPath(global_agent_path_);
-    ASSERT_TRUE(status.IsOk()) << "TritonRepoAgentManagerTest set up failed: "
-                                  "create temporary directory: "
-                               << status.AsString();
-  }
-  void TearDown() override
-  {
-    agent_init_counter_ = 0;
-    agent_fini_counter_ = 0;
-    if (!root_agent_path_.empty()) {
-      // tc::DeleteDirectory(root_agent_path_);
-    }
-    global_mock_agents.clear();
-  }
-
-  std::string root_agent_path_;
-  std::string global_agent_path_;
-  std::string local_agent_path_;
-};
-size_t TritonRepoAgentManagerTest::agent_init_counter_ = 0;
-size_t TritonRepoAgentManagerTest::agent_fini_counter_ = 0;
-
-TEST_F(TritonRepoAgentManagerTest, CreateFailureFileNotExist)
-{
-  // Passing a agent path that is not in global_mock_agents to
-  // simulate failure on opening shared library handle
-  std::shared_ptr<tc::TritonRepoAgent> invalid_agent;
-  auto status = tc::TritonRepoAgentManager::CreateAgent(
-      "invalid_agent_name", &invalid_agent);
-  ASSERT_FALSE(status.IsOk()) << "Unexpect successful agent creation";
-  EXPECT_NE(status.Message().find("unable to find"), std::string::npos)
-      << "Unexpect error message: '" << status.Message()
-      << "', expect 'unable to find...'";
-}
-
-TEST_F(TritonRepoAgentManagerTest, CreateGlobalAgent)
-{
-  std::shared_ptr<tc::TritonRepoAgent> agent;
-  auto status = tc::TritonRepoAgentManager::CreateAgent("global_agent", &agent);
-  ASSERT_TRUE(status.IsOk())
-      << "Expect successful agent creation" << status.AsString();
-  agent.reset();
-  EXPECT_EQ(agent_init_counter_, (size_t)1) << "Expect 1 agent initialization";
-  EXPECT_EQ(agent_fini_counter_, (size_t)1) << "Expect 1 agent finalization";
-}
-
-TEST_F(TritonRepoAgentManagerTest, AgentPersistence)
-{
-  std::shared_ptr<tc::TritonRepoAgent> agent1;
-  std::shared_ptr<tc::TritonRepoAgent> agent2;
-  auto status =
-      tc::TritonRepoAgentManager::CreateAgent("global_agent", &agent1);
-  ASSERT_TRUE(status.IsOk())
-      << "Expect successful agent creation" << status.AsString();
-  EXPECT_EQ(agent_init_counter_, (size_t)1) << "Expect 1 agent initialization";
-  EXPECT_EQ(agent_fini_counter_, (size_t)0) << "Expect 0 agent finalization";
-
-  status = tc::TritonRepoAgentManager::CreateAgent("global_agent", &agent2);
-  ASSERT_TRUE(status.IsOk())
-      << "Expect successful agent creation" << status.AsString();
-  EXPECT_EQ(agent_init_counter_, (size_t)1) << "Expect 1 agent initialization";
-  EXPECT_EQ(agent_fini_counter_, (size_t)0) << "Expect 0 agent finalization";
-
-  agent1.reset();
-  EXPECT_EQ(agent_init_counter_, (size_t)1) << "Expect 1 agent initialization";
-  EXPECT_EQ(agent_fini_counter_, (size_t)0) << "Expect 0 agent finalization";
-  agent2.reset();
-  EXPECT_EQ(agent_init_counter_, (size_t)1) << "Expect 1 agent initialization";
-  EXPECT_EQ(agent_fini_counter_, (size_t)1) << "Expect 1 agent finalization";
-
-  // Create again after all previous agents are reset
-  status = tc::TritonRepoAgentManager::CreateAgent("global_agent", &agent1);
-  ASSERT_TRUE(status.IsOk())
-      << "Expect successful agent creation" << status.AsString();
-  EXPECT_EQ(agent_init_counter_, (size_t)2) << "Expect 2 agent initialization";
-  EXPECT_EQ(agent_fini_counter_, (size_t)1) << "Expect 1 agent finalization";
-  agent1.reset();
-  EXPECT_EQ(agent_init_counter_, (size_t)2) << "Expect 2 agent initialization";
-  EXPECT_EQ(agent_fini_counter_, (size_t)2) << "Expect 2 agent finalization";
-}
-
-class TritonRepoAgentModelTest : public ::testing::Test {
- protected:
-  void SetUp() override
-  {
-    simple_config_.set_name("simple_config");
-
-    // Add a simple agent handle for convinence
-    tc::TritonRepoAgent::TritonRepoAgentModelActionFn_t ActionFn =
-        [](TRITONREPOAGENT_Agent* agent, TRITONREPOAGENT_AgentModel* model,
-           const TRITONREPOAGENT_ActionType action_type)
-        -> TRITONSERVER_Error* { return nullptr; };
-    auto agent_handle = MockSharedLibraryHandle();
-    agent_handle.AddEntryPoint(
-        "TRITONREPOAGENT_ModelAction", reinterpret_cast<void*>(ActionFn));
-    global_mock_agents.emplace("simple_agent_path", agent_handle);
-
-    // Add a agent handle for logging actions of the model
-    tc::TritonRepoAgent::TritonRepoAgentModelInitFn_t LogInitFn =
-        [](TRITONREPOAGENT_Agent* agent,
-           TRITONREPOAGENT_AgentModel* model) -> TRITONSERVER_Error* {
-      auto lagent = reinterpret_cast<tc::TritonRepoAgent*>(agent);
-      auto state = reinterpret_cast<std::vector<std::string>*>(lagent->State());
-      if (state == nullptr) {
-        return TRITONSERVER_ErrorNew(
-            TRITONSERVER_ERROR_INTERNAL, "Agent state is not set");
-      }
-      state->emplace_back("Model Initialized");
-      return nullptr;
-    };
-    tc::TritonRepoAgent::TritonRepoAgentModelFiniFn_t LogFiniFn =
-        [](TRITONREPOAGENT_Agent* agent,
-           TRITONREPOAGENT_AgentModel* model) -> TRITONSERVER_Error* {
-      auto lagent = reinterpret_cast<tc::TritonRepoAgent*>(agent);
-      auto state = reinterpret_cast<std::vector<std::string>*>(lagent->State());
-      if (state == nullptr) {
-        return TRITONSERVER_ErrorNew(
-            TRITONSERVER_ERROR_INTERNAL, "Agent state is not set");
-      }
-      state->emplace_back("Model Finalized");
-      return nullptr;
-    };
-    tc::TritonRepoAgent::TritonRepoAgentModelActionFn_t LogActionFn =
-        [](TRITONREPOAGENT_Agent* agent, TRITONREPOAGENT_AgentModel* model,
-           const TRITONREPOAGENT_ActionType action_type)
-        -> TRITONSERVER_Error* {
-      auto lagent = reinterpret_cast<tc::TritonRepoAgent*>(agent);
-      auto state = reinterpret_cast<std::vector<std::string>*>(lagent->State());
-      if (state == nullptr) {
-        return TRITONSERVER_ErrorNew(
-            TRITONSERVER_ERROR_INTERNAL, "Agent state is not set");
-      }
-      state->emplace_back(tc::TRITONREPOAGENT_ActionTypeString(action_type));
-      return nullptr;
-    };
-    auto log_agent_handle = MockSharedLibraryHandle();
-    log_agent_handle.AddEntryPoint(
-        "TRITONREPOAGENT_ModelInitialize", reinterpret_cast<void*>(LogInitFn));
-    log_agent_handle.AddEntryPoint(
-        "TRITONREPOAGENT_ModelFinalize", reinterpret_cast<void*>(LogFiniFn));
-    log_agent_handle.AddEntryPoint(
-        "TRITONREPOAGENT_ModelAction", reinterpret_cast<void*>(LogActionFn));
-    global_mock_agents.emplace("log_agent_path", log_agent_handle);
-  }
-  void TearDown() override { global_mock_agents.clear(); }
-
-  TRITONREPOAGENT_ArtifactType original_type_ =
-      TRITONREPOAGENT_ARTIFACT_FILESYSTEM;
-  const std::string original_location_ = "/original";
-  inference::ModelConfig simple_config_;
-};
-
-TEST_F(TritonRepoAgentModelTest, Create)
-{
-  // Create agent to be associated with the model
-  std::shared_ptr<tc::TritonRepoAgent> agent;
-  auto status =
-      tc::TritonRepoAgent::Create("agent", "simple_agent_path", &agent);
-  ASSERT_TRUE(status.IsOk())
-      << "Expect successful agent creation: " << status.AsString();
-
-  // Create model
-  std::unique_ptr<tc::TritonRepoAgentModel> model;
-  status = tc::TritonRepoAgentModel::Create(
-      original_type_, original_location_, simple_config_, agent,
-      tc::TritonRepoAgent::Parameters(), &model);
-  ASSERT_TRUE(status.IsOk())
-      << "Expect successful model creation: " << status.AsString();
-  EXPECT_EQ(model->Config().name(), simple_config_.name())
-      << "Expect the model contains the same config as simple config";
-}
-
-TEST_F(TritonRepoAgentModelTest, CreateFailure)
-{
-  // Create agent to be associated with the model, whose model init function
-  // always returns error
-  tc::TritonRepoAgent::TritonRepoAgentModelInitFn_t InitFn =
-      [](TRITONREPOAGENT_Agent* agent,
-         TRITONREPOAGENT_AgentModel* model) -> TRITONSERVER_Error* {
-    return TRITONSERVER_ErrorNew(
-        TRITONSERVER_ERROR_INVALID_ARG, "Model initialization error");
-  };
-  tc::TritonRepoAgent::TritonRepoAgentModelActionFn_t ActionFn =
-      [](TRITONREPOAGENT_Agent* agent, TRITONREPOAGENT_AgentModel* model,
-         const TRITONREPOAGENT_ActionType action_type) -> TRITONSERVER_Error* {
-    return nullptr;
-  };
-  auto agent_handle = MockSharedLibraryHandle();
-  agent_handle.AddEntryPoint(
-      "TRITONREPOAGENT_ModelInitialize", reinterpret_cast<void*>(InitFn));
-  agent_handle.AddEntryPoint(
-      "TRITONREPOAGENT_ModelAction", reinterpret_cast<void*>(ActionFn));
-  global_mock_agents.emplace("agent_path", agent_handle);
-
-  std::shared_ptr<tc::TritonRepoAgent> agent;
-  auto status = tc::TritonRepoAgent::Create("agent", "agent_path", &agent);
-  ASSERT_TRUE(status.IsOk())
-      << "Expect successful agent creation: " << status.AsString();
-
-  // Create model
-  std::unique_ptr<tc::TritonRepoAgentModel> model;
-  status = tc::TritonRepoAgentModel::Create(
-      original_type_, original_location_, simple_config_, agent,
-      tc::TritonRepoAgent::Parameters(), &model);
-  ASSERT_FALSE(status.IsOk()) << "Unexpect successful model creation";
-  EXPECT_NE(
-      status.Message().find("Model initialization error"), std::string::npos)
-      << "Unexpect error message: '" << status.Message()
-      << "', expect 'Model initialization error...'";
-}
-
-TEST_F(TritonRepoAgentModelTest, Location)
-{
-  // Create agent to be associated with the model
-  std::shared_ptr<tc::TritonRepoAgent> agent;
-  auto status =
-      tc::TritonRepoAgent::Create("agent", "simple_agent_path", &agent);
-  ASSERT_TRUE(status.IsOk())
-      << "Expect successful agent creation: " << status.AsString();
-
-  // Create model
-  std::unique_ptr<tc::TritonRepoAgentModel> model;
-  status = tc::TritonRepoAgentModel::Create(
-      original_type_, original_location_, simple_config_, agent,
-      tc::TritonRepoAgent::Parameters(), &model);
-  ASSERT_TRUE(status.IsOk())
-      << "Expect successful model creation: " << status.AsString();
-  TRITONREPOAGENT_ArtifactType type;
-  const char* location;
-  status = model->Location(&type, &location);
-  ASSERT_TRUE(status.IsOk()) << "Expect location is returned from Location()";
-  EXPECT_EQ(type, original_type_) << "Expect returned original filesystem type";
-  EXPECT_EQ(std::string(location), original_location_)
-      << "Expect returned original location";
-}
-
-TEST_F(TritonRepoAgentModelTest, SetLocationFailure)
-{
-  // Create agent to be associated with the model
-  std::shared_ptr<tc::TritonRepoAgent> agent;
-  auto status =
-      tc::TritonRepoAgent::Create("agent", "simple_agent_path", &agent);
-  ASSERT_TRUE(status.IsOk())
-      << "Expect successful agent creation: " << status.AsString();
-
-  // Create model
-  std::unique_ptr<tc::TritonRepoAgentModel> model;
-  status = tc::TritonRepoAgentModel::Create(
-      original_type_, original_location_, simple_config_, agent,
-      tc::TritonRepoAgent::Parameters(), &model);
-  ASSERT_TRUE(status.IsOk())
-      << "Expect successful model creation: " << status.AsString();
-  TRITONREPOAGENT_ArtifactType type = TRITONREPOAGENT_ARTIFACT_FILESYSTEM;
-  const char* location = "/tmp";
-  status = model->SetLocation(type, location);
-  ASSERT_FALSE(status.IsOk()) << "Expect error returned from SetLocation()";
-  EXPECT_NE(
-      status.Message().find(
-          "location can only be updated during TRITONREPOAGENT_ACTION_LOAD, "
-          "current action type is not set"),
-      std::string::npos)
-      << "Unexpect error message: '" << status.Message()
-      << "', expect 'location can only be updated during "
-         "TRITONREPOAGENT_ACTION_LOAD, current action type is not set'";
-}
-
-TEST_F(TritonRepoAgentModelTest, SetLocation)
-{
-  static const TRITONREPOAGENT_ArtifactType new_type =
-      TRITONREPOAGENT_ARTIFACT_FILESYSTEM;
-  static const std::string new_location = "/new_location";
-
-  // Create agent to be associated with the model
-  std::shared_ptr<tc::TritonRepoAgent> agent;
-  auto status =
-      tc::TritonRepoAgent::Create("agent", "simple_agent_path", &agent);
-  ASSERT_TRUE(status.IsOk())
-      << "Expect successful agent creation: " << status.AsString();
-
-  // Create model
-  std::unique_ptr<tc::TritonRepoAgentModel> model;
-  status = tc::TritonRepoAgentModel::Create(
-      original_type_, original_location_, simple_config_, agent,
-      tc::TritonRepoAgent::Parameters(), &model);
-  ASSERT_TRUE(status.IsOk())
-      << "Expect successful model creation: " << status.AsString();
-  // Advance the model lifecycle to be able to set location
-  status = model->InvokeAgent(TRITONREPOAGENT_ACTION_LOAD);
-  EXPECT_TRUE(status.IsOk())
-      << "Expect successful agent invocation with TRITONREPOAGENT_ACTION_LOAD";
-  status = model->SetLocation(new_type, new_location);
-  ASSERT_TRUE(status.IsOk())
-      << "Expect successful SetLocation() after invoking agent with "
-         "TRITONREPOAGENT_ACTION_LOAD";
-  TRITONREPOAGENT_ArtifactType type = original_type_;
-  const char* location = original_location_.c_str();
-  status = model->Location(&type, &location);
-  ASSERT_TRUE(status.IsOk()) << "Expect location is returned from Location()";
-  EXPECT_EQ(type, new_type) << "Expect returned filesystem type is "
-                            << tc::TRITONREPOAGENT_ArtifactTypeString(new_type);
-  EXPECT_EQ(std::string(location), new_location)
-      << "Expect returned location is " << new_location;
-}
-
-TEST_F(TritonRepoAgentModelTest, SetLocationWrongActionFailure)
-{
-  static const TRITONREPOAGENT_ArtifactType new_type =
-      TRITONREPOAGENT_ARTIFACT_FILESYSTEM;
-  static const std::string new_location = "/new_location";
-
-  // Create agent to be associated with the model
-  std::shared_ptr<tc::TritonRepoAgent> agent;
-  auto status =
-      tc::TritonRepoAgent::Create("agent", "simple_agent_path", &agent);
-  ASSERT_TRUE(status.IsOk())
-      << "Expect successful agent creation: " << status.AsString();
-
-  // Create model
-  std::unique_ptr<tc::TritonRepoAgentModel> model;
-  status = tc::TritonRepoAgentModel::Create(
-      original_type_, original_location_, simple_config_, agent,
-      tc::TritonRepoAgent::Parameters(), &model);
-  ASSERT_TRUE(status.IsOk())
-      << "Expect successful model creation: " << status.AsString();
-  // Advance the model lifecycle to be able to set location
-  status = model->InvokeAgent(TRITONREPOAGENT_ACTION_LOAD);
-  EXPECT_TRUE(status.IsOk())
-      << "Expect successful agent invocation with TRITONREPOAGENT_ACTION_LOAD";
-  status = model->InvokeAgent(TRITONREPOAGENT_ACTION_LOAD_COMPLETE);
-  EXPECT_TRUE(status.IsOk()) << "Expect successful agent invocation with "
-                                "TRITONREPOAGENT_ACTION_LOAD_COMPLETE";
-  status = model->SetLocation(new_type, new_location);
-  ASSERT_FALSE(status.IsOk()) << "Expect error returned from SetLocation()";
-  EXPECT_NE(
-      status.Message().find(
-          "location can only be updated during TRITONREPOAGENT_ACTION_LOAD, "
-          "current action type is TRITONREPOAGENT_ACTION_LOAD_COMPLETE"),
-      std::string::npos)
-      << "Unexpect error message: '" << status.Message()
-      << "', expect 'location can only be updated during "
-         "TRITONREPOAGENT_ACTION_LOAD, current action type is "
-         "TRITONREPOAGENT_ACTION_LOAD_COMPLETE'";
-}
-
-TEST_F(TritonRepoAgentModelTest, SetLocationViaAgent)
-{
-  static const TRITONREPOAGENT_ArtifactType new_type =
-      TRITONREPOAGENT_ARTIFACT_FILESYSTEM;
-  static const std::string new_location = "/new_location";
-  // Create agent to be associated with the model
-  tc::TritonRepoAgent::TritonRepoAgentModelActionFn_t ActionFn =
-      [](TRITONREPOAGENT_Agent* agent, TRITONREPOAGENT_AgentModel* model,
-         const TRITONREPOAGENT_ActionType action_type) -> TRITONSERVER_Error* {
-    auto lmodel = reinterpret_cast<tc::TritonRepoAgentModel*>(model);
-    auto status = lmodel->SetLocation(new_type, new_location);
-    return reinterpret_cast<TRITONSERVER_Error*>(
-        TritonServerError::Create(status));
-  };
-  auto agent_handle = MockSharedLibraryHandle();
-  agent_handle.AddEntryPoint(
-      "TRITONREPOAGENT_ModelAction", reinterpret_cast<void*>(ActionFn));
-  global_mock_agents.emplace("set_location_agent_path", agent_handle);
-  std::shared_ptr<tc::TritonRepoAgent> agent;
-  auto status =
-      tc::TritonRepoAgent::Create("agent", "set_location_agent_path", &agent);
-  ASSERT_TRUE(status.IsOk())
-      << "Expect successful agent creation: " << status.AsString();
-
-  // Create model
-  std::unique_ptr<tc::TritonRepoAgentModel> model;
-  status = tc::TritonRepoAgentModel::Create(
-      original_type_, original_location_, simple_config_, agent,
-      tc::TritonRepoAgent::Parameters(), &model);
-  ASSERT_TRUE(status.IsOk())
-      << "Expect successful model creation: " << status.AsString();
-  // Advance the model lifecycle to be able to set location
-  status = model->InvokeAgent(TRITONREPOAGENT_ACTION_LOAD);
-  ASSERT_TRUE(status.IsOk())
-      << "Expect successful agent invocation with TRITONREPOAGENT_ACTION_LOAD";
-  TRITONREPOAGENT_ArtifactType type = original_type_;
-  const char* location = original_location_.c_str();
-  status = model->Location(&type, &location);
-  ASSERT_TRUE(status.IsOk()) << "Expect location is returned from Location()";
-  EXPECT_EQ(type, new_type) << "Expect returned filesystem type is "
-                            << tc::TRITONREPOAGENT_ArtifactTypeString(new_type);
-  EXPECT_EQ(std::string(location), new_location)
-      << "Expect returned location is " << new_location;
-}
-
-TEST_F(TritonRepoAgentModelTest, DeleteLocationBeforeAcquire)
-{
-  // Create agent to be associated with the model
-  std::shared_ptr<tc::TritonRepoAgent> agent;
-  auto status =
-      tc::TritonRepoAgent::Create("agent", "simple_agent_path", &agent);
-  ASSERT_TRUE(status.IsOk())
-      << "Expect successful agent creation: " << status.AsString();
-
-  // Create model
-  std::unique_ptr<tc::TritonRepoAgentModel> model;
-  status = tc::TritonRepoAgentModel::Create(
-      original_type_, original_location_, simple_config_, agent,
-      tc::TritonRepoAgent::Parameters(), &model);
-  ASSERT_TRUE(status.IsOk())
-      << "Expect successful model creation: " << status.AsString();
-
-  status = model->DeleteMutableLocation();
-  ASSERT_FALSE(status.IsOk())
-      << "Expect error returned from DeleteMutableLocation()";
-  EXPECT_NE(
-      status.Message().find("No mutable location to be deleted"),
-      std::string::npos)
-      << "Unexpect error message: '" << status.Message()
-      << "', expect 'No mutable location to be deleted'";
-}
-
-TEST_F(TritonRepoAgentModelTest, AcquireLocalLocationAndDelete)
-{
-  // Create agent to be associated with the model
-  std::shared_ptr<tc::TritonRepoAgent> agent;
-  auto status =
-      tc::TritonRepoAgent::Create("agent", "simple_agent_path", &agent);
-  ASSERT_TRUE(status.IsOk())
-      << "Expect successful agent creation: " << status.AsString();
-
-  // Create model
-  std::unique_ptr<tc::TritonRepoAgentModel> model;
-  status = tc::TritonRepoAgentModel::Create(
-      original_type_, original_location_, simple_config_, agent,
-      tc::TritonRepoAgent::Parameters(), &model);
-  ASSERT_TRUE(status.IsOk())
-      << "Expect successful model creation: " << status.AsString();
-  const char* acquired_location;
-  status = model->AcquireMutableLocation(
-      TRITONREPOAGENT_ARTIFACT_FILESYSTEM, &acquired_location);
-  ASSERT_TRUE(status.IsOk())
-      << "Expect successful location acquisition: " << status.AsString();
-
-  // Check directory
-  bool is_dir = false;
-  status = tc::IsDirectory(acquired_location, &is_dir);
-  ASSERT_TRUE(status.IsOk())
-      << "Expect location proprety can be checked: " << status.AsString();
-  EXPECT_TRUE(is_dir) << "Expect a directory is returned as mutable location";
-  tc::FileSystemType type = tc::FileSystemType::LOCAL;
-  status = tc::GetFileSystemType(acquired_location, &type);
-  ASSERT_TRUE(status.IsOk())
-      << "Expect location filesystem type can be checked: "
-      << status.AsString();
-  EXPECT_EQ(type, tc::FileSystemType::LOCAL)
-      << "Expect a local mutable location is acquired";
-
-  status = model->DeleteMutableLocation();
-  ASSERT_TRUE(status.IsOk())
-      << "Expect successful location deletion: " << status.AsString();
-  // Check directory
-  bool exists = true;
-  status = tc::FileExists(acquired_location, &exists);
-  ASSERT_TRUE(status.IsOk())
-      << "Expect location proprety can be checked: " << status.AsString();
-  EXPECT_FALSE(exists) << "Expect the mutable location no longer exists";
-}
-
-TEST_F(TritonRepoAgentModelTest, AcquireLocalLocationTwice)
-{
-  // Create agent to be associated with the model
-  std::shared_ptr<tc::TritonRepoAgent> agent;
-  auto status =
-      tc::TritonRepoAgent::Create("agent", "simple_agent_path", &agent);
-  ASSERT_TRUE(status.IsOk())
-      << "Expect successful agent creation: " << status.AsString();
-
-  // Create model
-  std::unique_ptr<tc::TritonRepoAgentModel> model;
-  status = tc::TritonRepoAgentModel::Create(
-      original_type_, original_location_, simple_config_, agent,
-      tc::TritonRepoAgent::Parameters(), &model);
-  ASSERT_TRUE(status.IsOk())
-      << "Expect successful model creation: " << status.AsString();
-
-  const char* acquired_location;
-  status = model->AcquireMutableLocation(
-      TRITONREPOAGENT_ARTIFACT_FILESYSTEM, &acquired_location);
-  ASSERT_TRUE(status.IsOk())
-      << "Expect successful location acquisition: " << status.AsString();
-
-  // Acquire the same type again
-  const char* second_acquired_location;
-  status = model->AcquireMutableLocation(
-      TRITONREPOAGENT_ARTIFACT_FILESYSTEM, &second_acquired_location);
-  ASSERT_TRUE(status.IsOk())
-      << "Expect successful location acquisition: " << status.AsString();
-  EXPECT_EQ(
-      std::string(acquired_location), std::string(second_acquired_location))
-      << "Expect the same location is returned";
-}
-
-TEST_F(TritonRepoAgentModelTest, DeleteTwiceAfterAcquire)
-{
-  // Create agent to be associated with the model
-  std::shared_ptr<tc::TritonRepoAgent> agent;
-  auto status =
-      tc::TritonRepoAgent::Create("agent", "simple_agent_path", &agent);
-  ASSERT_TRUE(status.IsOk())
-      << "Expect successful agent creation: " << status.AsString();
-
-  // Create model
-  std::unique_ptr<tc::TritonRepoAgentModel> model;
-  status = tc::TritonRepoAgentModel::Create(
-      original_type_, original_location_, simple_config_, agent,
-      tc::TritonRepoAgent::Parameters(), &model);
-  ASSERT_TRUE(status.IsOk())
-      << "Expect successful model creation: " << status.AsString();
-  const char* acquired_location;
-  status = model->AcquireMutableLocation(
-      TRITONREPOAGENT_ARTIFACT_FILESYSTEM, &acquired_location);
-  ASSERT_TRUE(status.IsOk())
-      << "Expect successful location acquisition: " << status.AsString();
-
-  status = model->DeleteMutableLocation();
-  ASSERT_TRUE(status.IsOk())
-      << "Expect successful location deletion: " << status.AsString();
-  status = model->DeleteMutableLocation();
-  ASSERT_FALSE(status.IsOk())
-      << "Expect error returned from DeleteMutableLocation()";
-  EXPECT_NE(
-      status.Message().find("No mutable location to be deleted"),
-      std::string::npos)
-      << "Unexpect error message: '" << status.Message()
-      << "', expect 'No mutable location to be deleted'";
-}
-
-TEST_F(TritonRepoAgentModelTest, AcquireRemoteLocation)
-{
-  // Create agent to be associated with the model
-  std::shared_ptr<tc::TritonRepoAgent> agent;
-  auto status =
-      tc::TritonRepoAgent::Create("agent", "simple_agent_path", &agent);
-  ASSERT_TRUE(status.IsOk())
-      << "Expect successful agent creation: " << status.AsString();
-
-  // Create model
-  std::unique_ptr<tc::TritonRepoAgentModel> model;
-  status = tc::TritonRepoAgentModel::Create(
-      original_type_, original_location_, simple_config_, agent,
-      tc::TritonRepoAgent::Parameters(), &model);
-  ASSERT_TRUE(status.IsOk())
-      << "Expect successful model creation: " << status.AsString();
-
-  const char* acquired_location;
-  status = model->AcquireMutableLocation(
-      TRITONREPOAGENT_ARTIFACT_REMOTE_FILESYSTEM, &acquired_location);
-  ASSERT_FALSE(status.IsOk())
-      << "Expect error returned from AcquireMutableLocation()";
-  const std::string search_msg =
-      "Unexpected artifact type, expects 'TRITONREPOAGENT_ARTIFACT_FILESYSTEM'";
-  EXPECT_NE(status.Message().find(search_msg), std::string::npos)
-      << "Unexpect error message: '" << status.Message() << "', expect '"
-      << search_msg << "'";
-}
-
-TEST_F(TritonRepoAgentModelTest, AgentParameters)
-{
-  // Create agent to be associated with the model
-  std::shared_ptr<tc::TritonRepoAgent> agent;
-  auto status =
-      tc::TritonRepoAgent::Create("agent", "simple_agent_path", &agent);
-  ASSERT_TRUE(status.IsOk())
-      << "Expect successful agent creation: " << status.AsString();
-
-  // Create model
-  tc::TritonRepoAgent::Parameters expected_params{{"key_a", "value_b"},
-                                                  {"key_b", "value_b"}};
-  std::unique_ptr<tc::TritonRepoAgentModel> model;
-  status = tc::TritonRepoAgentModel::Create(
-      original_type_, original_location_, simple_config_, agent,
-      expected_params, &model);
-  ASSERT_TRUE(status.IsOk())
-      << "Expect successful model creation: " << status.AsString();
-  auto agent_params = model->AgentParameters();
-  ASSERT_EQ(agent_params.size(), expected_params.size());
-  for (size_t idx = 0; idx < agent_params.size(); ++idx) {
-    EXPECT_EQ(agent_params[idx].first, expected_params[idx].first);
-    EXPECT_EQ(agent_params[idx].second, expected_params[idx].second);
-  }
-}
-
-TEST_F(TritonRepoAgentModelTest, State)
-{
-  // Create agent to be associated with the model
-  std::shared_ptr<tc::TritonRepoAgent> agent;
-  auto status =
-      tc::TritonRepoAgent::Create("agent", "simple_agent_path", &agent);
-  ASSERT_TRUE(status.IsOk())
-      << "Expect successful agent creation: " << status.AsString();
-
-  // Create model
-  std::unique_ptr<tc::TritonRepoAgentModel> model;
-  status = tc::TritonRepoAgentModel::Create(
-      original_type_, original_location_, simple_config_, agent,
-      tc::TritonRepoAgent::Parameters(), &model);
-  ASSERT_TRUE(status.IsOk())
-      << "Expect successful model creation: " << status.AsString();
-  auto state = model->State();
-  ASSERT_TRUE(state == nullptr) << "Expect state is not set";
-  bool state_value = true;
-  model->SetState(reinterpret_cast<void*>(&state_value));
-  state = model->State();
-  ASSERT_TRUE(state != nullptr) << "Expect state is set";
-  EXPECT_EQ(*reinterpret_cast<bool*>(state), state_value)
-      << "Expect state value is true";
-}
-
-TEST_F(TritonRepoAgentModelTest, EmptyLifeCycle)
-{
-  // Create agent to be associated with the model
-  std::shared_ptr<tc::TritonRepoAgent> agent;
-  auto status = tc::TritonRepoAgent::Create("agent", "log_agent_path", &agent);
-  ASSERT_TRUE(status.IsOk())
-      << "Expect successful agent creation: " << status.AsString();
-  std::vector<std::string> log;
-  agent->SetState(reinterpret_cast<void*>(&log));
-
-  // Create and destroy model
-  std::unique_ptr<tc::TritonRepoAgentModel> model;
-  status = tc::TritonRepoAgentModel::Create(
-      original_type_, original_location_, simple_config_, agent,
-      tc::TritonRepoAgent::Parameters(), &model);
-  ASSERT_TRUE(status.IsOk())
-      << "Expect successful model creation: " << status.AsString();
-  model.reset();
-
-  // Check log
-  ASSERT_EQ(log.size(), (size_t)2)
-      << "Expect 2 state of model lifecycle is logged, got " << log.size();
-  EXPECT_EQ(log[0], "Model Initialized");
-  EXPECT_EQ(log[1], "Model Finalized");
-}
-
-TEST_F(TritonRepoAgentModelTest, HalfLifeCycle)
-{
-  // Create agent to be associated with the model
-  std::shared_ptr<tc::TritonRepoAgent> agent;
-  auto status = tc::TritonRepoAgent::Create("agent", "log_agent_path", &agent);
-  ASSERT_TRUE(status.IsOk())
-      << "Expect successful agent creation: " << status.AsString();
-  std::vector<std::string> log;
-  agent->SetState(reinterpret_cast<void*>(&log));
-
-  std::unique_ptr<tc::TritonRepoAgentModel> model;
-  // Create and destroy model in situations that a full lifecycle should run
-  std::vector<std::vector<TRITONREPOAGENT_ActionType>> situations{
-      {TRITONREPOAGENT_ACTION_LOAD},
-      {TRITONREPOAGENT_ACTION_LOAD, TRITONREPOAGENT_ACTION_LOAD_FAIL}};
-  std::vector<std::string> expected_log{
-      "Model Initialized", "TRITONREPOAGENT_ACTION_LOAD",
-      "TRITONREPOAGENT_ACTION_LOAD_FAIL", "Model Finalized"};
-  for (const auto& situation : situations) {
-    log.clear();
-    status = tc::TritonRepoAgentModel::Create(
-        original_type_, original_location_, simple_config_, agent,
-        tc::TritonRepoAgent::Parameters(), &model);
-    ASSERT_TRUE(status.IsOk())
-        << "Expect successful model creation: " << status.AsString();
-    for (const auto action : situation) {
-      status = model->InvokeAgent(action);
-      EXPECT_TRUE(status.IsOk()) << "Expect successful agent invocation with "
-                                 << tc::TRITONREPOAGENT_ActionTypeString(action)
-                                 << ": " << status.AsString();
-    }
-    model.reset();
-
-    // Check log
-    ASSERT_EQ(log.size(), expected_log.size())
-        << "Expect " << expected_log.size()
-        << " state of model lifecycle is logged, got " << log.size();
-    for (size_t i = 0; i < log.size(); ++i) {
-      EXPECT_EQ(log[i], expected_log[i]);
-    }
-  }
-}
-
-TEST_F(TritonRepoAgentModelTest, FullLifeCycle)
-{
-  // Create agent to be associated with the model
-  std::shared_ptr<tc::TritonRepoAgent> agent;
-  auto status = tc::TritonRepoAgent::Create("agent", "log_agent_path", &agent);
-  ASSERT_TRUE(status.IsOk())
-      << "Expect successful agent creation: " << status.AsString();
-  std::vector<std::string> log;
-  agent->SetState(reinterpret_cast<void*>(&log));
-
-  std::unique_ptr<tc::TritonRepoAgentModel> model;
-  // Create and destroy model in situations that a full lifecycle should run
-  std::vector<std::vector<TRITONREPOAGENT_ActionType>> situations{
-      {TRITONREPOAGENT_ACTION_LOAD, TRITONREPOAGENT_ACTION_LOAD_COMPLETE},
-      {TRITONREPOAGENT_ACTION_LOAD, TRITONREPOAGENT_ACTION_LOAD_COMPLETE,
-       TRITONREPOAGENT_ACTION_UNLOAD},
-      {TRITONREPOAGENT_ACTION_LOAD, TRITONREPOAGENT_ACTION_LOAD_COMPLETE,
-       TRITONREPOAGENT_ACTION_UNLOAD, TRITONREPOAGENT_ACTION_UNLOAD_COMPLETE}};
-  std::vector<std::string> expected_log{
-      "Model Initialized",
-      "TRITONREPOAGENT_ACTION_LOAD",
-      "TRITONREPOAGENT_ACTION_LOAD_COMPLETE",
-      "TRITONREPOAGENT_ACTION_UNLOAD",
-      "TRITONREPOAGENT_ACTION_UNLOAD_COMPLETE",
-      "Model Finalized"};
-  for (const auto& situation : situations) {
-    log.clear();
-    status = tc::TritonRepoAgentModel::Create(
-        original_type_, original_location_, simple_config_, agent,
-        tc::TritonRepoAgent::Parameters(), &model);
-    ASSERT_TRUE(status.IsOk())
-        << "Expect successful model creation: " << status.AsString();
-    for (const auto action : situation) {
-      status = model->InvokeAgent(action);
-      EXPECT_TRUE(status.IsOk()) << "Expect successful agent invocation with "
-                                 << tc::TRITONREPOAGENT_ActionTypeString(action)
-                                 << ": " << status.AsString();
-    }
-    model.reset();
-
-    // Check log
-    ASSERT_EQ(log.size(), expected_log.size())
-        << "Expect " << expected_log.size()
-        << " state of model lifecycle is logged, got " << log.size();
-    for (size_t i = 0; i < log.size(); ++i) {
-      EXPECT_EQ(log[i], expected_log[i]);
-    }
-  }
-}
-
-TEST_F(TritonRepoAgentModelTest, WrongLifeCycle)
-{
-  // Create agent to be associated with the model
-  std::shared_ptr<tc::TritonRepoAgent> agent;
-  auto status = tc::TritonRepoAgent::Create("agent", "log_agent_path", &agent);
-  ASSERT_TRUE(status.IsOk())
-      << "Expect successful agent creation: " << status.AsString();
-  std::vector<std::string> log;
-  agent->SetState(reinterpret_cast<void*>(&log));
-
-  // Create model and run all action combinations
-  std::vector<std::vector<TRITONREPOAGENT_ActionType>> valid_lifecycles{
-      {TRITONREPOAGENT_ACTION_LOAD, TRITONREPOAGENT_ACTION_LOAD_FAIL},
-      {TRITONREPOAGENT_ACTION_LOAD, TRITONREPOAGENT_ACTION_LOAD_COMPLETE,
-       TRITONREPOAGENT_ACTION_UNLOAD, TRITONREPOAGENT_ACTION_UNLOAD_COMPLETE}};
-  std::vector<TRITONREPOAGENT_ActionType> available_actions{
-      TRITONREPOAGENT_ACTION_LOAD, TRITONREPOAGENT_ACTION_LOAD_FAIL,
-      TRITONREPOAGENT_ACTION_LOAD_COMPLETE, TRITONREPOAGENT_ACTION_UNLOAD,
-      TRITONREPOAGENT_ACTION_UNLOAD_COMPLETE};
-  std::map<TRITONREPOAGENT_ActionType, std::set<TRITONREPOAGENT_ActionType>>
-      valid_actions{{TRITONREPOAGENT_ACTION_LOAD,
-                     {TRITONREPOAGENT_ACTION_LOAD_FAIL,
-                      TRITONREPOAGENT_ACTION_LOAD_COMPLETE}},
-                    {TRITONREPOAGENT_ACTION_LOAD_FAIL, {}},
-                    {TRITONREPOAGENT_ACTION_LOAD_COMPLETE,
-                     {TRITONREPOAGENT_ACTION_UNLOAD}},
-                    {TRITONREPOAGENT_ACTION_UNLOAD,
-                     {TRITONREPOAGENT_ACTION_UNLOAD_COMPLETE}},
-                    {TRITONREPOAGENT_ACTION_UNLOAD_COMPLETE, {}}};
-  for (const auto& valid_lifecycle : valid_lifecycles) {
-    log.clear();
-    std::unique_ptr<tc::TritonRepoAgentModel> model;
-    status = tc::TritonRepoAgentModel::Create(
-        original_type_, original_location_, simple_config_, agent,
-        tc::TritonRepoAgent::Parameters(), &model);
-    ASSERT_TRUE(status.IsOk())
-        << "Expect successful model creation: " << status.AsString();
-    for (size_t idx = 0; idx < valid_lifecycle.size(); ++idx) {
-      const auto next_lifecycle_action = valid_lifecycle[idx];
-      // Handle the first action specially
-      if (idx == 0) {
-        for (const auto action : available_actions) {
-          if (action == valid_lifecycle[0]) {
-            continue;
-          }
-          status = model->InvokeAgent(action);
-          if (status.IsOk()) {
-            for (const auto& state_log : log) {
-              EXPECT_TRUE(false) << state_log;
-            }
-          }
-          ASSERT_FALSE(status.IsOk())
-              << "Unexpect successful agent invocation with "
-              << tc::TRITONREPOAGENT_ActionTypeString(action);
-        }
-        status = model->InvokeAgent(valid_lifecycle[0]);
-        if (!status.IsOk()) {
-          for (const auto& state_log : log) {
-            EXPECT_TRUE(false) << state_log;
-          }
-        }
-        ASSERT_TRUE(status.IsOk())
-            << "Expect successful agent invocation with "
-            << tc::TRITONREPOAGENT_ActionTypeString(next_lifecycle_action)
-            << ": " << status.AsString();
-        continue;
-      }
-      const auto& current_valid_actions =
-          valid_actions[valid_lifecycle[idx - 1]];
-      for (const auto action : available_actions) {
-        if (current_valid_actions.find(action) != current_valid_actions.end()) {
-          continue;
-        }
-        status = model->InvokeAgent(action);
-        if (status.IsOk()) {
-          for (const auto& state_log : log) {
-            EXPECT_TRUE(false) << state_log;
-          }
-        }
-        ASSERT_FALSE(status.IsOk())
-            << "Unexpect successful agent invocation with "
-            << tc::TRITONREPOAGENT_ActionTypeString(action);
-      }
-      status = model->InvokeAgent(next_lifecycle_action);
-      if (!status.IsOk()) {
-        for (const auto& state_log : log) {
-          EXPECT_TRUE(false) << state_log;
-        }
-      }
-      ASSERT_TRUE(status.IsOk())
-          << "Expect successful agent invocation with "
-          << tc::TRITONREPOAGENT_ActionTypeString(next_lifecycle_action) << ": "
-          << status.AsString();
-    }
-  }
-}
-
-class TritonRepoAgentAPITest : public ::testing::Test {
- public:
-  static std::function<void(TRITONREPOAGENT_Agent*)> agent_init_fn_;
-  static std::function<void(TRITONREPOAGENT_Agent*)> agent_fini_fn_;
-  static std::function<void(
-      TRITONREPOAGENT_Agent*, TRITONREPOAGENT_AgentModel*)>
-      model_init_fn_;
-  static std::function<void(
-      TRITONREPOAGENT_Agent*, TRITONREPOAGENT_AgentModel*)>
-      model_action_fn_;
-  static std::function<void(
-      TRITONREPOAGENT_Agent*, TRITONREPOAGENT_AgentModel*)>
-      model_fini_fn_;
-
- protected:
-  void SetUp() override
-  {
-    simple_config_.set_name("simple_config");
-    // Add a agent handle for flexible testing
-    tc::TritonRepoAgent::TritonRepoAgentInitFn_t AgentInitFn =
-        [](TRITONREPOAGENT_Agent* agent) -> TRITONSERVER_Error* {
-      if (agent_init_fn_ != nullptr) {
-        agent_init_fn_(agent);
-      }
-      return nullptr;
-    };
-    tc::TritonRepoAgent::TritonRepoAgentFiniFn_t AgentFiniFn =
-        [](TRITONREPOAGENT_Agent* agent) -> TRITONSERVER_Error* {
-      if (agent_fini_fn_ != nullptr) {
-        agent_fini_fn_(agent);
-      }
-      return nullptr;
-    };
-    tc::TritonRepoAgent::TritonRepoAgentModelInitFn_t ModelInitFn =
-        [](TRITONREPOAGENT_Agent* agent,
-           TRITONREPOAGENT_AgentModel* model) -> TRITONSERVER_Error* {
-      if (model_init_fn_ != nullptr) {
-        model_init_fn_(agent, model);
-      }
-      return nullptr;
-    };
-    tc::TritonRepoAgent::TritonRepoAgentModelActionFn_t ModelActionFn =
-        [](TRITONREPOAGENT_Agent* agent, TRITONREPOAGENT_AgentModel* model,
-           const TRITONREPOAGENT_ActionType action_type)
-        -> TRITONSERVER_Error* {
-      if (model_action_fn_ != nullptr) {
-        model_action_fn_(agent, model);
-      }
-      return nullptr;
-    };
-    tc::TritonRepoAgent::TritonRepoAgentModelFiniFn_t ModelFiniFn =
-        [](TRITONREPOAGENT_Agent* agent,
-           TRITONREPOAGENT_AgentModel* model) -> TRITONSERVER_Error* {
-      if (model_fini_fn_ != nullptr) {
-        model_fini_fn_(agent, model);
-      }
-      return nullptr;
-    };
-    auto agent_handle = MockSharedLibraryHandle();
-    agent_handle.AddEntryPoint(
-        "TRITONREPOAGENT_Initialize", reinterpret_cast<void*>(AgentInitFn));
-    agent_handle.AddEntryPoint(
-        "TRITONREPOAGENT_Finalize", reinterpret_cast<void*>(AgentFiniFn));
-    agent_handle.AddEntryPoint(
-        "TRITONREPOAGENT_ModelInitialize",
-        reinterpret_cast<void*>(ModelInitFn));
-    agent_handle.AddEntryPoint(
-        "TRITONREPOAGENT_ModelAction", reinterpret_cast<void*>(ModelActionFn));
-    agent_handle.AddEntryPoint(
-        "TRITONREPOAGENT_ModelFinalize", reinterpret_cast<void*>(ModelFiniFn));
-    global_mock_agents.emplace("agent_path", agent_handle);
-  }
-  void TearDown() override
-  {
-    global_mock_agents.clear();
-    agent_init_fn_ = nullptr;
-    agent_fini_fn_ = nullptr;
-    model_init_fn_ = nullptr;
-    model_action_fn_ = nullptr;
-    model_fini_fn_ = nullptr;
-  }
-
-  TRITONREPOAGENT_ArtifactType original_type_ =
-      TRITONREPOAGENT_ARTIFACT_FILESYSTEM;
-  const std::string original_location_ = "/original";
-  inference::ModelConfig simple_config_;
-
-  std::vector<std::vector<TRITONREPOAGENT_ActionType>> valid_lifecycles_{
-      {TRITONREPOAGENT_ACTION_LOAD, TRITONREPOAGENT_ACTION_LOAD_FAIL},
-      {TRITONREPOAGENT_ACTION_LOAD, TRITONREPOAGENT_ACTION_LOAD_COMPLETE,
-       TRITONREPOAGENT_ACTION_UNLOAD, TRITONREPOAGENT_ACTION_UNLOAD_COMPLETE}};
-};
-
-std::function<void(TRITONREPOAGENT_Agent*)>
-    TritonRepoAgentAPITest::agent_init_fn_ = nullptr;
-std::function<void(TRITONREPOAGENT_Agent*)>
-    TritonRepoAgentAPITest::agent_fini_fn_ = nullptr;
-std::function<void(TRITONREPOAGENT_Agent*, TRITONREPOAGENT_AgentModel*)>
-    TritonRepoAgentAPITest::model_init_fn_ = nullptr;
-std::function<void(TRITONREPOAGENT_Agent*, TRITONREPOAGENT_AgentModel*)>
-    TritonRepoAgentAPITest::model_action_fn_ = nullptr;
-std::function<void(TRITONREPOAGENT_Agent*, TRITONREPOAGENT_AgentModel*)>
-    TritonRepoAgentAPITest::model_fini_fn_ = nullptr;
-
-TEST_F(TritonRepoAgentAPITest, TRITONREPOAGENT_ApiVersion)
-{
-  agent_init_fn_ =
-      [](TRITONREPOAGENT_Agent* agent) {
-        uint32_t major = 0;
-        uint32_t minor = 0;
-        auto err = TRITONREPOAGENT_ApiVersion(&major, &minor);
-        if (err != nullptr) {
-          EXPECT_TRUE(false)
-              << "Expect successful TRITONREPOAGENT_ApiVersion() invokation: "
-              << TRITONSERVER_ErrorMessage(err);
-          TRITONSERVER_ErrorDelete(err);
-        } else {
-          EXPECT_EQ(major, (uint32_t)TRITONREPOAGENT_API_VERSION_MAJOR)
-              << "Unexpected major veresion";
-          EXPECT_EQ(minor, (uint32_t)TRITONREPOAGENT_API_VERSION_MINOR)
-              << "Unexpected major veresion";
-        }
-      };
-  agent_fini_fn_ = agent_init_fn_;
-  model_init_fn_ =
-      [](TRITONREPOAGENT_Agent* agent, TRITONREPOAGENT_AgentModel* model) {
-        uint32_t major = 0;
-        uint32_t minor = 0;
-        auto err = TRITONREPOAGENT_ApiVersion(&major, &minor);
-        if (err != nullptr) {
-          EXPECT_TRUE(false)
-              << "Expect successful TRITONREPOAGENT_ApiVersion() invokation: "
-              << TRITONSERVER_ErrorMessage(err);
-          TRITONSERVER_ErrorDelete(err);
-        } else {
-          EXPECT_EQ(major, (uint32_t)TRITONREPOAGENT_API_VERSION_MAJOR)
-              << "Unexpected major veresion";
-          EXPECT_EQ(minor, (uint32_t)TRITONREPOAGENT_API_VERSION_MINOR)
-              << "Unexpected major veresion";
-        }
-      };
-  model_action_fn_ = model_init_fn_;
-  model_fini_fn_ = model_init_fn_;
-
-  const auto lifecycles = valid_lifecycles_;
-  for (const auto& lifecycle : lifecycles) {
-    // Create agent to be associated with the model
-    std::shared_ptr<tc::TritonRepoAgent> agent;
-    auto status = tc::TritonRepoAgent::Create("agent", "agent_path", &agent);
-    ASSERT_TRUE(status.IsOk())
-        << "Expect successful agent creation: " << status.AsString();
-    std::unique_ptr<tc::TritonRepoAgentModel> model;
-    status = tc::TritonRepoAgentModel::Create(
-        original_type_, original_location_, simple_config_, agent,
-        tc::TritonRepoAgent::Parameters(), &model);
-    ASSERT_TRUE(status.IsOk())
-        << "Expect successful model creation: " << status.AsString();
-    for (const auto action : lifecycle) {
-      status = model->InvokeAgent(action);
-      ASSERT_TRUE(status.IsOk()) << "Expect successful agent invocation with "
-                                 << tc::TRITONREPOAGENT_ActionTypeString(action)
-                                 << ": " << status.AsString();
-    }
-  }
-}
-
-TEST_F(TritonRepoAgentAPITest, TRITONREPOAGENT_ModelRepositoryLocation)
-{
-  model_init_fn_ =
-      [](TRITONREPOAGENT_Agent* agent, TRITONREPOAGENT_AgentModel* model) {
-        TRITONREPOAGENT_ArtifactType artifact_type =
-            TRITONREPOAGENT_ARTIFACT_REMOTE_FILESYSTEM;
-        const char* location = nullptr;
-        auto err = TRITONREPOAGENT_ModelRepositoryLocation(
-            agent, model, &artifact_type, &location);
-        if (err != nullptr) {
-          EXPECT_TRUE(false)
-              << "Expect successful TRITONREPOAGENT_ModelRepositoryLocation(): "
-              << TRITONSERVER_ErrorMessage(err);
-          TRITONSERVER_ErrorDelete(err);
-        } else {
-          EXPECT_EQ(artifact_type, TRITONREPOAGENT_ARTIFACT_FILESYSTEM)
-              << "Unexpected artifact type";
-          EXPECT_EQ(std::string(location), "/original")
-              << "Unexpected location";
-        }
-      };
-  model_action_fn_ = model_init_fn_;
-  model_fini_fn_ = model_init_fn_;
-
-  const auto lifecycles = valid_lifecycles_;
-  for (const auto& lifecycle : lifecycles) {
-    // Create agent to be associated with the model
-    std::shared_ptr<tc::TritonRepoAgent> agent;
-    auto status = tc::TritonRepoAgent::Create("agent", "agent_path", &agent);
-    ASSERT_TRUE(status.IsOk())
-        << "Expect successful agent creation: " << status.AsString();
-    std::unique_ptr<tc::TritonRepoAgentModel> model;
-    status = tc::TritonRepoAgentModel::Create(
-        original_type_, original_location_, simple_config_, agent,
-        tc::TritonRepoAgent::Parameters(), &model);
-    ASSERT_TRUE(status.IsOk())
-        << "Expect successful model creation: " << status.AsString();
-    for (const auto action : lifecycle) {
-      status = model->InvokeAgent(action);
-      ASSERT_TRUE(status.IsOk()) << "Expect successful agent invocation with "
-                                 << tc::TRITONREPOAGENT_ActionTypeString(action)
-                                 << ": " << status.AsString();
-    }
-  }
-}
-
-TEST_F(
-    TritonRepoAgentAPITest,
-    TRITONREPOAGENT_ModelRepositoryLocationAcquireRemote)
-{
-  model_init_fn_ = [](TRITONREPOAGENT_Agent* agent,
-                      TRITONREPOAGENT_AgentModel* model) {
-    TRITONREPOAGENT_ArtifactType artifact_type =
-        TRITONREPOAGENT_ARTIFACT_REMOTE_FILESYSTEM;
-    const char* location = nullptr;
-    auto err = TRITONREPOAGENT_ModelRepositoryLocationAcquire(
-        agent, model, artifact_type, &location);
-    if (err != nullptr) {
-      const std::string err_msg = TRITONSERVER_ErrorMessage(err);
-      const std::string search_msg =
-          "Unexpected artifact type, expects "
-          "'TRITONREPOAGENT_ARTIFACT_FILESYSTEM'";
-      EXPECT_NE(err_msg.find(search_msg), std::string::npos)
-          << "Unexpect error message: '" << err_msg << "', expect '"
-          << search_msg << "'";
-      TRITONSERVER_ErrorDelete(err);
-    } else {
-      EXPECT_TRUE(false) << "Expect error returned from "
-                            "TRITONREPOAGENT_ModelRepositoryLocationAcquire()";
-    }
-  };
-  model_action_fn_ = model_init_fn_;
-  model_fini_fn_ = model_init_fn_;
-
-  const auto lifecycles = valid_lifecycles_;
-  for (const auto& lifecycle : lifecycles) {
-    // Create agent to be associated with the model
-    std::shared_ptr<tc::TritonRepoAgent> agent;
-    auto status = tc::TritonRepoAgent::Create("agent", "agent_path", &agent);
-    ASSERT_TRUE(status.IsOk())
-        << "Expect successful agent creation: " << status.AsString();
-    std::unique_ptr<tc::TritonRepoAgentModel> model;
-    status = tc::TritonRepoAgentModel::Create(
-        original_type_, original_location_, simple_config_, agent,
-        tc::TritonRepoAgent::Parameters(), &model);
-    ASSERT_TRUE(status.IsOk())
-        << "Expect successful model creation: " << status.AsString();
-    for (const auto action : lifecycle) {
-      status = model->InvokeAgent(action);
-      ASSERT_TRUE(status.IsOk()) << "Expect successful agent invocation with "
-                                 << tc::TRITONREPOAGENT_ActionTypeString(action)
-                                 << ": " << status.AsString();
-    }
-  }
-}
-
-TEST_F(TritonRepoAgentAPITest, TRITONREPOAGENT_ModelRepositoryLocationAcquire)
-{
-  model_init_fn_ = [](TRITONREPOAGENT_Agent* agent,
-                      TRITONREPOAGENT_AgentModel* model) {
-    // Acquire, acquire (same), release
-    TRITONREPOAGENT_ArtifactType artifact_type =
-        TRITONREPOAGENT_ARTIFACT_FILESYSTEM;
-    const char* location = nullptr;
-    auto err = TRITONREPOAGENT_ModelRepositoryLocationAcquire(
-        agent, model, artifact_type, &location);
-    if (err != nullptr) {
-      EXPECT_TRUE(false) << "Expect successful "
-                            "TRITONREPOAGENT_ModelRepositoryLocationAcquire(): "
-                         << TRITONSERVER_ErrorMessage(err);
-      TRITONSERVER_ErrorDelete(err);
-    }
-
-    std::string acquired_location = location;
-    err = TRITONREPOAGENT_ModelRepositoryLocationAcquire(
-        agent, model, artifact_type, &location);
-    if (err != nullptr) {
-      EXPECT_TRUE(false) << "Expect successful "
-                            "TRITONREPOAGENT_ModelRepositoryLocationAcquire(): "
-                         << TRITONSERVER_ErrorMessage(err);
-      TRITONSERVER_ErrorDelete(err);
-    } else {
-      EXPECT_EQ(acquired_location, std::string(location))
-          << "Expect the same location is acquired";
-    }
-  };
-  model_action_fn_ = model_init_fn_;
-  model_fini_fn_ = model_init_fn_;
-
-  const auto lifecycles = valid_lifecycles_;
-  for (const auto& lifecycle : lifecycles) {
-    // Create agent to be associated with the model
-    std::shared_ptr<tc::TritonRepoAgent> agent;
-    auto status = tc::TritonRepoAgent::Create("agent", "agent_path", &agent);
-    ASSERT_TRUE(status.IsOk())
-        << "Expect successful agent creation: " << status.AsString();
-    std::unique_ptr<tc::TritonRepoAgentModel> model;
-    status = tc::TritonRepoAgentModel::Create(
-        original_type_, original_location_, simple_config_, agent,
-        tc::TritonRepoAgent::Parameters(), &model);
-    ASSERT_TRUE(status.IsOk())
-        << "Expect successful model creation: " << status.AsString();
-    for (const auto action : lifecycle) {
-      status = model->InvokeAgent(action);
-      ASSERT_TRUE(status.IsOk()) << "Expect successful agent invocation with "
-                                 << tc::TRITONREPOAGENT_ActionTypeString(action)
-                                 << ": " << status.AsString();
-    }
-  }
-}
-
-TEST_F(TritonRepoAgentAPITest, TRITONREPOAGENT_ModelRepositoryLocationRelease)
-{
-  model_init_fn_ =
-      [](TRITONREPOAGENT_Agent* agent, TRITONREPOAGENT_AgentModel* model) {
-        // relase (fail), acquire, release
-        const char* location = "nonexisting_location";
-        auto err = TRITONREPOAGENT_ModelRepositoryLocationRelease(
-            agent, model, location);
-        if (err != nullptr) {
-          const std::string search_msg = "No mutable location to be deleted";
-          const std::string err_msg = TRITONSERVER_ErrorMessage(err);
-          EXPECT_NE(err_msg.find(search_msg), std::string::npos)
-              << "Unexpect error message: '" << err_msg << "', expect '"
-              << search_msg << "'";
-          TRITONSERVER_ErrorDelete(err);
-        } else {
-          EXPECT_TRUE(false)
-              << "Expect error returned from "
-                 "TRITONREPOAGENT_ModelRepositoryLocationRelease()";
-        }
-
-        TRITONREPOAGENT_ArtifactType artifact_type =
-            TRITONREPOAGENT_ARTIFACT_FILESYSTEM;
-        err = TRITONREPOAGENT_ModelRepositoryLocationAcquire(
-            agent, model, artifact_type, &location);
-        if (err != nullptr) {
-          EXPECT_TRUE(false)
-              << "Expect successful TRITONREPOAGENT_ModelRepositoryLocation(): "
-              << TRITONSERVER_ErrorMessage(err);
-          TRITONSERVER_ErrorDelete(err);
-        }
-
-        err = TRITONREPOAGENT_ModelRepositoryLocationRelease(
-            agent, model, location);
-        if (err != nullptr) {
-          EXPECT_TRUE(false)
-              << "Expect successful TRITONREPOAGENT_ModelRepositoryLocation(): "
-              << TRITONSERVER_ErrorMessage(err);
-          TRITONSERVER_ErrorDelete(err);
-        }
-      };
-  model_action_fn_ = model_init_fn_;
-  model_fini_fn_ = model_init_fn_;
-
-  const auto lifecycles = valid_lifecycles_;
-  for (const auto& lifecycle : lifecycles) {
-    // Create agent to be associated with the model
-    std::shared_ptr<tc::TritonRepoAgent> agent;
-    auto status = tc::TritonRepoAgent::Create("agent", "agent_path", &agent);
-    ASSERT_TRUE(status.IsOk())
-        << "Expect successful agent creation: " << status.AsString();
-    std::unique_ptr<tc::TritonRepoAgentModel> model;
-    status = tc::TritonRepoAgentModel::Create(
-        original_type_, original_location_, simple_config_, agent,
-        tc::TritonRepoAgent::Parameters(), &model);
-    ASSERT_TRUE(status.IsOk())
-        << "Expect successful model creation: " << status.AsString();
-    for (const auto action : lifecycle) {
-      status = model->InvokeAgent(action);
-      ASSERT_TRUE(status.IsOk()) << "Expect successful agent invocation with "
-                                 << tc::TRITONREPOAGENT_ActionTypeString(action)
-                                 << ": " << status.AsString();
-    }
-  }
-}
-
-TEST_F(TritonRepoAgentAPITest, TRITONREPOAGENT_ModelRepositoryUpdate)
-{
-  static std::string current_location = original_location_;
-  static TRITONREPOAGENT_ArtifactType current_type =
-      TRITONREPOAGENT_ARTIFACT_FILESYSTEM;
-  model_init_fn_ =
-      [](TRITONREPOAGENT_Agent* agent, TRITONREPOAGENT_AgentModel* model) {
-        std::string new_location = current_location + "_new";
-        TRITONREPOAGENT_ArtifactType artifact_type =
-            TRITONREPOAGENT_ARTIFACT_FILESYSTEM;
-        const char* location = new_location.c_str();
-        auto err = TRITONREPOAGENT_ModelRepositoryUpdate(
-            agent, model, artifact_type, location);
-        if (err != nullptr) {
-          const std::string search_msg =
-              "location can only be updated during TRITONREPOAGENT_ACTION_LOAD";
-          const std::string err_msg = TRITONSERVER_ErrorMessage(err);
-          EXPECT_NE(err_msg.find(search_msg), std::string::npos)
-              << "Unexpect error message: '" << err_msg << "', expect '"
-              << search_msg << "...'";
-          TRITONSERVER_ErrorDelete(err);
-        } else {
-          EXPECT_TRUE(false) << "Expect error returned from "
-                                "TRITONREPOAGENT_ModelRepositoryUpdate()";
-        }
-
-        // Check location shouldn't be changed
-        err = TRITONREPOAGENT_ModelRepositoryLocation(
-            agent, model, &artifact_type, &location);
-        if (err != nullptr) {
-          EXPECT_TRUE(false)
-              << "Expect successful TRITONREPOAGENT_ModelRepositoryLocation(): "
-              << TRITONSERVER_ErrorMessage(err);
-          TRITONSERVER_ErrorDelete(err);
-        } else {
-          EXPECT_EQ(artifact_type, current_type) << "Unexpected artifact type";
-          EXPECT_EQ(std::string(location), current_location)
-              << "Unexpected location";
-        }
-      };
-  model_action_fn_ = model_init_fn_;
-  model_fini_fn_ = model_init_fn_;
-
-  // Overriding the model action function in agent handle because the action
-  // type needs to be checked here
-  tc::TritonRepoAgent::TritonRepoAgentModelActionFn_t ModelActionFn =
-      [](TRITONREPOAGENT_Agent* agent, TRITONREPOAGENT_AgentModel* model,
-         const TRITONREPOAGENT_ActionType action_type) -> TRITONSERVER_Error* {
-    std::string new_location = current_location + "_new";
-    TRITONREPOAGENT_ArtifactType artifact_type =
-        TRITONREPOAGENT_ARTIFACT_REMOTE_FILESYSTEM;
-    const char* location = new_location.c_str();
-    auto err = TRITONREPOAGENT_ModelRepositoryUpdate(
-        agent, model, artifact_type, location);
-    if (action_type == TRITONREPOAGENT_ACTION_LOAD) {
-      if (err != nullptr) {
-        EXPECT_TRUE(false)
-            << "Expect successful TRITONREPOAGENT_ModelRepositoryUpdate(): "
-            << TRITONSERVER_ErrorMessage(err);
-        TRITONSERVER_ErrorDelete(err);
-      } else {
-        current_location = new_location;
-        current_type = artifact_type;
-      }
-    } else {
-      if (err != nullptr) {
-        const std::string search_msg =
-            "location can only be updated during TRITONREPOAGENT_ACTION_LOAD";
-        const std::string err_msg = TRITONSERVER_ErrorMessage(err);
-        EXPECT_NE(err_msg.find(search_msg), std::string::npos)
-            << "Unexpect error message: '" << err_msg << "', expect '"
-            << search_msg << "...'";
-        TRITONSERVER_ErrorDelete(err);
-      } else {
-        EXPECT_TRUE(false) << "Expect error returned from "
-                              "TRITONREPOAGENT_ModelRepositoryUpdate()";
-      }
-    }
-
-    // Check location
-    err = TRITONREPOAGENT_ModelRepositoryLocation(
-        agent, model, &artifact_type, &location);
-    if (err != nullptr) {
-      EXPECT_TRUE(false)
-          << "Expect successful TRITONREPOAGENT_ModelRepositoryLocation(): "
-          << TRITONSERVER_ErrorMessage(err);
-      TRITONSERVER_ErrorDelete(err);
-    } else {
-      EXPECT_EQ(artifact_type, current_type) << "Unexpected artifact type";
-      EXPECT_EQ(std::string(location), current_location)
-          << "Unexpected location";
-    }
-    return nullptr;
-  };
-  global_mock_agents["agent_path"].AddEntryPoint(
-      "TRITONREPOAGENT_ModelAction", reinterpret_cast<void*>(ModelActionFn));
-
-  const auto lifecycles = valid_lifecycles_;
-  for (const auto& lifecycle : lifecycles) {
-    // Reset location and type
-    current_location = original_location_;
-    current_type = TRITONREPOAGENT_ARTIFACT_FILESYSTEM;
-    // Create agent to be associated with the model
-    std::shared_ptr<tc::TritonRepoAgent> agent;
-    auto status = tc::TritonRepoAgent::Create("agent", "agent_path", &agent);
-    ASSERT_TRUE(status.IsOk())
-        << "Expect successful agent creation: " << status.AsString();
-    std::unique_ptr<tc::TritonRepoAgentModel> model;
-    status = tc::TritonRepoAgentModel::Create(
-        original_type_, current_location, simple_config_, agent,
-        tc::TritonRepoAgent::Parameters(), &model);
-    ASSERT_TRUE(status.IsOk())
-        << "Expect successful model creation: " << status.AsString();
-    for (const auto action : lifecycle) {
-      status = model->InvokeAgent(action);
-      ASSERT_TRUE(status.IsOk()) << "Expect successful agent invocation with "
-                                 << tc::TRITONREPOAGENT_ActionTypeString(action)
-                                 << ": " << status.AsString();
-    }
-  }
-}
-
-TEST_F(TritonRepoAgentAPITest, TRITONREPOAGENT_ModelParameter)
-{
-  static tc::TritonRepoAgent::Parameters expected_params{{"key_a", "value_a"},
-                                                         {"key_b", "value_b"}};
-  model_init_fn_ =
-      [](TRITONREPOAGENT_Agent* agent, TRITONREPOAGENT_AgentModel* model) {
-        uint32_t count;
-        auto err = TRITONREPOAGENT_ModelParameterCount(agent, model, &count);
-        if (err != nullptr) {
-          EXPECT_TRUE(false)
-              << "Expect successful TRITONREPOAGENT_ModelParameterCount(): "
-              << TRITONSERVER_ErrorMessage(err);
-          TRITONSERVER_ErrorDelete(err);
-        } else {
-          EXPECT_EQ(count, expected_params.size());
-        }
-
-        const char* parameter_name = nullptr;
-        const char* parameter_value = nullptr;
-        for (size_t idx = 0; idx < count; ++idx) {
-          err = TRITONREPOAGENT_ModelParameter(
-              agent, model, idx, &parameter_name, &parameter_value);
-          if (err != nullptr) {
-            EXPECT_TRUE(false)
-                << "Expect successful TRITONREPOAGENT_ModelParameter(): "
-                << TRITONSERVER_ErrorMessage(err);
-            TRITONSERVER_ErrorDelete(err);
-          } else {
-            EXPECT_EQ(std::string(parameter_name), expected_params[idx].first);
-            EXPECT_EQ(
-                std::string(parameter_value), expected_params[idx].second);
-          }
-        }
-        // out of range
-        err = TRITONREPOAGENT_ModelParameter(
-            agent, model, count, &parameter_name, &parameter_value);
-        if (err != nullptr) {
-          const std::string search_msg =
-              "index out of range for model parameters";
-          const std::string err_msg = TRITONSERVER_ErrorMessage(err);
-          EXPECT_NE(err_msg.find(search_msg), std::string::npos)
-              << "Unexpect error message: '" << err_msg << "', expect '"
-              << search_msg << "...'";
-          TRITONSERVER_ErrorDelete(err);
-        } else {
-          EXPECT_TRUE(false)
-              << "Expect error returned from TRITONREPOAGENT_ModelParameter()";
-        }
-      };
-  model_action_fn_ = model_init_fn_;
-  model_fini_fn_ = model_init_fn_;
-
-  const auto lifecycles = valid_lifecycles_;
-  for (const auto& lifecycle : lifecycles) {
-    // Create agent to be associated with the model
-    std::shared_ptr<tc::TritonRepoAgent> agent;
-    auto status = tc::TritonRepoAgent::Create("agent", "agent_path", &agent);
-    ASSERT_TRUE(status.IsOk())
-        << "Expect successful agent creation: " << status.AsString();
-    std::unique_ptr<tc::TritonRepoAgentModel> model;
-    status = tc::TritonRepoAgentModel::Create(
-        original_type_, original_location_, simple_config_, agent,
-        expected_params, &model);
-    ASSERT_TRUE(status.IsOk())
-        << "Expect successful model creation: " << status.AsString();
-    for (const auto action : lifecycle) {
-      status = model->InvokeAgent(action);
-      ASSERT_TRUE(status.IsOk()) << "Expect successful agent invocation with "
-                                 << tc::TRITONREPOAGENT_ActionTypeString(action)
-                                 << ": " << status.AsString();
-    }
-  }
-}
-
-TEST_F(TritonRepoAgentAPITest, TRITONREPOAGENT_ModelConfig)
-{
-  model_init_fn_ =
-      [](TRITONREPOAGENT_Agent* agent, TRITONREPOAGENT_AgentModel* model) {
-        TRITONSERVER_Message* config = nullptr;
-        auto err = TRITONREPOAGENT_ModelConfig(agent, model, 1, &config);
-        if (err != nullptr) {
-          EXPECT_TRUE(false)
-              << "Expect successful TRITONREPOAGENT_ModelConfig(): "
-              << TRITONSERVER_ErrorMessage(err);
-          TRITONSERVER_ErrorDelete(err);
-        }
-        const char* base = nullptr;
-        size_t byte_size = 0;
-        err = TRITONSERVER_MessageSerializeToJson(config, &base, &byte_size);
-        if (err != nullptr) {
-          EXPECT_TRUE(false)
-              << "Expect successful TRITONSERVER_MessageSerializeToJson(): "
-              << TRITONSERVER_ErrorMessage(err);
-          TRITONSERVER_ErrorDelete(err);
-        } else {
-          const std::string search_msg = "simple_config";
-          const std::string serialized_config(base, byte_size);
-          EXPECT_NE(serialized_config.find(search_msg), std::string::npos)
-              << "Expect finding '" << search_msg
-              << "' in returned config: " << serialized_config;
-        }
-
-        // unsupport version
-        err = TRITONREPOAGENT_ModelConfig(agent, model, 2, &config);
-        if (err != nullptr) {
-          const std::string search_msg =
-              "model configuration version 2 not supported, supported versions "
-              "are: 1";
-          const std::string err_msg = TRITONSERVER_ErrorMessage(err);
-          EXPECT_NE(err_msg.find(search_msg), std::string::npos)
-              << "Unexpect error message: '" << err_msg << "', expect '"
-              << search_msg << "...'";
-          TRITONSERVER_ErrorDelete(err);
-        } else {
-          EXPECT_TRUE(false)
-              << "Expect error returned from TRITONREPOAGENT_ModelConfig()";
-        }
-      };
-  model_action_fn_ = model_init_fn_;
-  model_fini_fn_ = model_init_fn_;
-
-  const auto lifecycles = valid_lifecycles_;
-  for (const auto& lifecycle : lifecycles) {
-    // Create agent to be associated with the model
-    std::shared_ptr<tc::TritonRepoAgent> agent;
-    auto status = tc::TritonRepoAgent::Create("agent", "agent_path", &agent);
-    ASSERT_TRUE(status.IsOk())
-        << "Expect successful agent creation: " << status.AsString();
-    std::unique_ptr<tc::TritonRepoAgentModel> model;
-    status = tc::TritonRepoAgentModel::Create(
-        original_type_, original_location_, simple_config_, agent,
-        tc::TritonRepoAgent::Parameters(), &model);
-    ASSERT_TRUE(status.IsOk())
-        << "Expect successful model creation: " << status.AsString();
-    for (const auto action : lifecycle) {
-      status = model->InvokeAgent(action);
-      ASSERT_TRUE(status.IsOk()) << "Expect successful agent invocation with "
-                                 << tc::TRITONREPOAGENT_ActionTypeString(action)
-                                 << ": " << status.AsString();
-    }
-  }
-}
-
-TEST_F(TritonRepoAgentAPITest, TRITONREPOAGENT_ModelState)
-{
-  model_init_fn_ =
-      [](TRITONREPOAGENT_Agent* agent, TRITONREPOAGENT_AgentModel* model) {
-        size_t* state = nullptr;
-        auto err =
-            TRITONREPOAGENT_ModelState(model, reinterpret_cast<void**>(&state));
-        if (err != nullptr) {
-          EXPECT_TRUE(false)
-              << "Expect successful TRITONREPOAGENT_ModelState(): "
-              << TRITONSERVER_ErrorMessage(err);
-          TRITONSERVER_ErrorDelete(err);
-        } else {
-          EXPECT_TRUE(state == nullptr) << "Expect state is not set";
-        }
-        state = new size_t(0);
-        err = TRITONREPOAGENT_ModelSetState(
-            model, reinterpret_cast<void*>(state));
-        if (err != nullptr) {
-          EXPECT_TRUE(false)
-              << "Expect successful TRITONREPOAGENT_ModelSetState(): "
-              << TRITONSERVER_ErrorMessage(err);
-          TRITONSERVER_ErrorDelete(err);
-          delete state;
-        }
-      };
-  model_fini_fn_ =
-      [](TRITONREPOAGENT_Agent* agent, TRITONREPOAGENT_AgentModel* model) {
-        size_t* state = nullptr;
-        auto err =
-            TRITONREPOAGENT_ModelState(model, reinterpret_cast<void**>(&state));
-        if (err != nullptr) {
-          EXPECT_TRUE(false)
-              << "Expect successful TRITONREPOAGENT_ModelState(): "
-              << TRITONSERVER_ErrorMessage(err);
-          TRITONSERVER_ErrorDelete(err);
-        } else {
-          EXPECT_TRUE(state != nullptr) << "Expect state is set";
-          EXPECT_EQ(*state, size_t(0));
-        }
-
-        // Sanity check that set state works elsewhere
-        size_t* new_state = new size_t(*state);
-        delete state;
-        err = TRITONREPOAGENT_ModelSetState(
-            model, reinterpret_cast<void*>(new_state));
-        if (err != nullptr) {
-          EXPECT_TRUE(false)
-              << "Expect successful TRITONREPOAGENT_ModelSetState(): "
-              << TRITONSERVER_ErrorMessage(err);
-          TRITONSERVER_ErrorDelete(err);
-        }
-
-        // Delete state before end of model lifecycle
-        delete new_state;
-      };
-  // Overriding the model action function in agent handle because the action
-  // type needs to be checked here
-  tc::TritonRepoAgent::TritonRepoAgentModelActionFn_t ModelActionFn =
-      [](TRITONREPOAGENT_Agent* agent, TRITONREPOAGENT_AgentModel* model,
-         const TRITONREPOAGENT_ActionType action_type) -> TRITONSERVER_Error* {
-    size_t* state = nullptr;
-    auto err =
-        TRITONREPOAGENT_ModelState(model, reinterpret_cast<void**>(&state));
-    if (err != nullptr) {
-      EXPECT_TRUE(false) << "Expect successful TRITONREPOAGENT_ModelState(): "
-                         << TRITONSERVER_ErrorMessage(err);
-      TRITONSERVER_ErrorDelete(err);
-    }
-    EXPECT_TRUE(state != nullptr) << "Expect state is set";
-    switch (action_type) {
-      case TRITONREPOAGENT_ACTION_LOAD: {
-        EXPECT_EQ(*state, size_t(0));
-        ++*state;
-        break;
-      }
-      case TRITONREPOAGENT_ACTION_LOAD_COMPLETE: {
-        EXPECT_EQ(*state, size_t(1));
-        ++*state;
-        break;
-      }
-      case TRITONREPOAGENT_ACTION_LOAD_FAIL: {
-        EXPECT_EQ(*state, size_t(1));
-        --*state;
-        break;
-      }
-      case TRITONREPOAGENT_ACTION_UNLOAD: {
-        EXPECT_EQ(*state, size_t(2));
-        --*state;
-        break;
-      }
-      case TRITONREPOAGENT_ACTION_UNLOAD_COMPLETE: {
-        EXPECT_EQ(*state, size_t(1));
-        --*state;
-        break;
-      }
-    }
-
-    // Sanity check that set state works elsewhere
-    size_t* new_state = new size_t(*state);
-    delete state;
-    err = TRITONREPOAGENT_ModelSetState(
-        model, reinterpret_cast<void*>(new_state));
-    if (err != nullptr) {
-      EXPECT_TRUE(false)
-          << "Expect successful TRITONREPOAGENT_ModelSetState(): "
-          << TRITONSERVER_ErrorMessage(err);
-      TRITONSERVER_ErrorDelete(err);
-      delete new_state;
-    }
-    return nullptr;
-  };
-  global_mock_agents["agent_path"].AddEntryPoint(
-      "TRITONREPOAGENT_ModelAction", reinterpret_cast<void*>(ModelActionFn));
-
-
-  const auto lifecycles = valid_lifecycles_;
-  for (const auto& lifecycle : lifecycles) {
-    // Create agent to be associated with the model
-    std::shared_ptr<tc::TritonRepoAgent> agent;
-    auto status = tc::TritonRepoAgent::Create("agent", "agent_path", &agent);
-    ASSERT_TRUE(status.IsOk())
-        << "Expect successful agent creation: " << status.AsString();
-    std::unique_ptr<tc::TritonRepoAgentModel> model;
-    status = tc::TritonRepoAgentModel::Create(
-        original_type_, original_location_, simple_config_, agent,
-        tc::TritonRepoAgent::Parameters(), &model);
-    ASSERT_TRUE(status.IsOk())
-        << "Expect successful model creation: " << status.AsString();
-    for (const auto action : lifecycle) {
-      status = model->InvokeAgent(action);
-      ASSERT_TRUE(status.IsOk()) << "Expect successful agent invocation with "
-                                 << tc::TRITONREPOAGENT_ActionTypeString(action)
-                                 << ": " << status.AsString();
-    }
-  }
-}
-
-TEST_F(TritonRepoAgentAPITest, TRITONREPOAGENT_AgentState)
-{
-  // Two models share one agent, check if agent state is properly shared
-  agent_init_fn_ = [](TRITONREPOAGENT_Agent* agent) {
-    size_t* state = nullptr;
-    auto err = TRITONREPOAGENT_State(agent, reinterpret_cast<void**>(&state));
-    if (err != nullptr) {
-      EXPECT_TRUE(false) << "Expect successful TRITONREPOAGENT_State(): "
-                         << TRITONSERVER_ErrorMessage(err);
-      TRITONSERVER_ErrorDelete(err);
-    } else {
-      EXPECT_TRUE(state == nullptr) << "Expect state is not set";
-    }
-    state = new size_t(0);
-    err = TRITONREPOAGENT_SetState(agent, reinterpret_cast<void*>(state));
-    if (err != nullptr) {
-      EXPECT_TRUE(false) << "Expect successful TRITONREPOAGENT_SetState(): "
-                         << TRITONSERVER_ErrorMessage(err);
-      TRITONSERVER_ErrorDelete(err);
-      delete state;
-    }
-  };
-  agent_fini_fn_ = [](TRITONREPOAGENT_Agent* agent) {
-    size_t* state = nullptr;
-    auto err = TRITONREPOAGENT_State(agent, reinterpret_cast<void**>(&state));
-    if (err != nullptr) {
-      EXPECT_TRUE(false) << "Expect successful TRITONREPOAGENT_State(): "
-                         << TRITONSERVER_ErrorMessage(err);
-      TRITONSERVER_ErrorDelete(err);
-    } else {
-      EXPECT_TRUE(state != nullptr) << "Expect state is set";
-      EXPECT_EQ(*state, size_t(0));
-    }
-
-    // Sanity check that set state works elsewhere
-    size_t* new_state = new size_t(*state);
-    delete state;
-    err = TRITONREPOAGENT_SetState(agent, reinterpret_cast<void*>(new_state));
-    if (err != nullptr) {
-      EXPECT_TRUE(false) << "Expect successful TRITONREPOAGENT_SetState(): "
-                         << TRITONSERVER_ErrorMessage(err);
-      TRITONSERVER_ErrorDelete(err);
-    }
-
-    // Delete state before end of agent lifecycle
-    delete new_state;
-  };
-  model_init_fn_ =
-      [](TRITONREPOAGENT_Agent* agent, TRITONREPOAGENT_AgentModel* model) {
-        size_t* state = nullptr;
-        auto err =
-            TRITONREPOAGENT_State(agent, reinterpret_cast<void**>(&state));
-        if (err != nullptr) {
-          EXPECT_TRUE(false) << "Expect successful TRITONREPOAGENT_State(): "
-                             << TRITONSERVER_ErrorMessage(err);
-          TRITONSERVER_ErrorDelete(err);
-        } else {
-          EXPECT_TRUE(state != nullptr) << "Expect state is set";
-        }
-
-        // Agent state maybe 0 or 1 depending on the order of model lifecycle,
-        // record that in model state to keep track of the order
-        if ((*state == 0) || (*state == 1)) {
-          size_t* model_state = new size_t(*state);
-          err = TRITONREPOAGENT_ModelSetState(
-              model, reinterpret_cast<void*>(model_state));
-          if (err != nullptr) {
-            EXPECT_TRUE(false)
-                << "Expect successful TRITONREPOAGENT_ModelSetState(): "
-                << TRITONSERVER_ErrorMessage(err);
-            TRITONSERVER_ErrorDelete(err);
-          }
-        } else {
-          EXPECT_TRUE(false) << "Expect agent state is either 0 or 1";
-        }
-
-        // Sanity check that set state works elsewhere
-        ++*state;
-        size_t* new_state = new size_t(*state);
-        delete state;
-        err =
-            TRITONREPOAGENT_SetState(agent, reinterpret_cast<void*>(new_state));
-        if (err != nullptr) {
-          EXPECT_TRUE(false) << "Expect successful TRITONREPOAGENT_SetState(): "
-                             << TRITONSERVER_ErrorMessage(err);
-          TRITONSERVER_ErrorDelete(err);
-          delete new_state;
-        }
-      };
-  model_fini_fn_ = [](TRITONREPOAGENT_Agent* agent,
-                      TRITONREPOAGENT_AgentModel* model) {
-    size_t* model_state = nullptr;
-    auto err = TRITONREPOAGENT_ModelState(
-        model, reinterpret_cast<void**>(&model_state));
-    if (err != nullptr) {
-      EXPECT_TRUE(false) << "Expect successful TRITONREPOAGENT_ModelState(): "
-                         << TRITONSERVER_ErrorMessage(err);
-      TRITONSERVER_ErrorDelete(err);
-    } else {
-      EXPECT_TRUE(model_state != nullptr) << "Expect state is set";
-    }
-
-    size_t* state = nullptr;
-    err = TRITONREPOAGENT_State(agent, reinterpret_cast<void**>(&state));
-    if (err != nullptr) {
-      EXPECT_TRUE(false) << "Expect successful TRITONREPOAGENT_State(): "
-                         << TRITONSERVER_ErrorMessage(err);
-      TRITONSERVER_ErrorDelete(err);
-    } else {
-      EXPECT_TRUE(state != nullptr) << "Expect state is set";
-      EXPECT_EQ(*state, size_t(2) - *model_state);
-    }
-
-    // Sanity check that set state works elsewhere
-    --*state;
-    size_t* new_state = new size_t(*state);
-    delete state;
-    err = TRITONREPOAGENT_SetState(agent, reinterpret_cast<void*>(new_state));
-    if (err != nullptr) {
-      EXPECT_TRUE(false) << "Expect successful TRITONREPOAGENT_SetState(): "
-                         << TRITONSERVER_ErrorMessage(err);
-      TRITONSERVER_ErrorDelete(err);
-      delete new_state;
-    }
-
-    // Delete state before end of model lifecycle
-    delete model_state;
-  };
-  // Overriding the model action function in agent handle because the action
-  // type needs to be checked here
-  tc::TritonRepoAgent::TritonRepoAgentModelActionFn_t ModelActionFn =
-      [](TRITONREPOAGENT_Agent* agent, TRITONREPOAGENT_AgentModel* model,
-         const TRITONREPOAGENT_ActionType action_type) -> TRITONSERVER_Error* {
-    size_t* model_state = nullptr;
-    auto err = TRITONREPOAGENT_ModelState(
-        model, reinterpret_cast<void**>(&model_state));
-    if (err != nullptr) {
-      EXPECT_TRUE(false) << "Expect successful TRITONREPOAGENT_ModelState(): "
-                         << TRITONSERVER_ErrorMessage(err);
-      TRITONSERVER_ErrorDelete(err);
-    } else {
-      EXPECT_TRUE(model_state != nullptr) << "Expect state is set";
-    }
-
-    size_t* state = nullptr;
-    err = TRITONREPOAGENT_State(agent, reinterpret_cast<void**>(&state));
-    if (err != nullptr) {
-      EXPECT_TRUE(false) << "Expect successful TRITONREPOAGENT_State(): "
-                         << TRITONSERVER_ErrorMessage(err);
-      TRITONSERVER_ErrorDelete(err);
-    }
-    EXPECT_TRUE(state != nullptr) << "Expect state is set";
-    switch (action_type) {
-      case TRITONREPOAGENT_ACTION_LOAD: {
-        EXPECT_EQ(*state, size_t(2) + *model_state);
-        ++*state;
-        break;
-      }
-      case TRITONREPOAGENT_ACTION_LOAD_COMPLETE: {
-        EXPECT_EQ(*state, size_t(4) + *model_state);
-        ++*state;
-        break;
-      }
-      case TRITONREPOAGENT_ACTION_LOAD_FAIL: {
-        EXPECT_EQ(*state, size_t(4) - *model_state);
-        --*state;
-        break;
-      }
-      case TRITONREPOAGENT_ACTION_UNLOAD: {
-        EXPECT_EQ(*state, size_t(6) - *model_state);
-        --*state;
-        break;
-      }
-      case TRITONREPOAGENT_ACTION_UNLOAD_COMPLETE: {
-        EXPECT_EQ(*state, size_t(4) - *model_state);
-        --*state;
-        break;
-      }
-    }
-
-    // Sanity check that set state works elsewhere
-    size_t* new_state = new size_t(*state);
-    delete state;
-    err = TRITONREPOAGENT_SetState(agent, reinterpret_cast<void*>(new_state));
-    if (err != nullptr) {
-      EXPECT_TRUE(false) << "Expect successful TRITONREPOAGENT_SetState(): "
-                         << TRITONSERVER_ErrorMessage(err);
-      TRITONSERVER_ErrorDelete(err);
-      delete new_state;
-    }
-    return nullptr;
-  };
-  global_mock_agents["agent_path"].AddEntryPoint(
-      "TRITONREPOAGENT_ModelAction", reinterpret_cast<void*>(ModelActionFn));
-
-
-  const auto lifecycles = valid_lifecycles_;
-  for (const auto& lifecycle : lifecycles) {
-    // Create agent to be associated with the model
-    std::shared_ptr<tc::TritonRepoAgent> agent;
-    auto status = tc::TritonRepoAgent::Create("agent", "agent_path", &agent);
-    ASSERT_TRUE(status.IsOk())
-        << "Expect successful agent creation: " << status.AsString();
-    std::vector<std::unique_ptr<tc::TritonRepoAgentModel>> models(2);
-    for (auto& model : models) {
-      status = tc::TritonRepoAgentModel::Create(
-          original_type_, original_location_, simple_config_, agent,
-          tc::TritonRepoAgent::Parameters(), &model);
-      ASSERT_TRUE(status.IsOk())
-          << "Expect successful model creation: " << status.AsString();
-    }
-    for (const auto action : lifecycle) {
-      for (auto& model : models) {
-        status = model->InvokeAgent(action);
-        ASSERT_TRUE(status.IsOk())
-            << "Expect successful agent invocation with "
-            << tc::TRITONREPOAGENT_ActionTypeString(action) << ": "
-            << status.AsString();
-      }
-    }
-  }
-}
-
-}  // namespace
-
-int
-main(int argc, char** argv)
-{
-  ::testing::InitGoogleTest(&argc, argv);
-  return RUN_ALL_TESTS();
-}
diff --git a/3rdparty/core-r22.12/src/test/response_cache_test.cc b/3rdparty/core-r22.12/src/test/response_cache_test.cc
deleted file mode 100644
index 4662f07b440f54c59d197f10c1f81ce8a294d443..0000000000000000000000000000000000000000
--- a/3rdparty/core-r22.12/src/test/response_cache_test.cc
+++ /dev/null
@@ -1,981 +0,0 @@
-// Copyright 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#include "gtest/gtest.h"
-
-#include <thread>
-#include "memory.h"
-#include "response_cache.h"
-#include "triton/common/logging.h"
-
-namespace tc = triton::core;
-
-/* Mock classes for Unit Testing */
-namespace triton { namespace core {
-
-//
-// InferenceResponseFactory
-//
-Status
-InferenceResponseFactory::CreateResponse(
-    std::unique_ptr<InferenceResponse>* response) const
-{
-  response->reset(new InferenceResponse(
-      model_, id_, allocator_, alloc_userp_, response_fn_, response_userp_,
-      response_delegator_));
-
-  return Status::Success;
-}
-
-//
-// InferenceRequest
-//
-InferenceRequest::InferenceRequest(
-    Model* model, const int64_t requested_model_version)
-    : needs_normalization_(true), model_raw_(model),
-      requested_model_version_(requested_model_version), flags_(0),
-      correlation_id_(0), batch_size_(0), timeout_us_(0), collect_stats_(true)
-{
-  // Unit test doesn't need actual response factory logic
-  // or other priority/request_counting logic, it just needs
-  // a non-null reponse factory object.
-  response_factory_.reset(new InferenceResponseFactory());
-}
-
-InferenceRequest::Input::Input(
-    const std::string& name, const inference::DataType datatype,
-    const int64_t* shape, const uint64_t dim_count)
-    : name_(name), datatype_(datatype),
-      original_shape_(shape, shape + dim_count), is_shape_tensor_(false),
-      data_(new MemoryReference), has_host_policy_specific_data_(false)
-{
-}
-
-// Use const global var as locals can't be returned in ModelName(),
-// and we don't care about the model for the unit test
-const std::string MODEL = "model";
-
-const std::string&
-InferenceRequest::ModelName() const
-{
-  return MODEL;
-}
-
-int64_t
-InferenceRequest::ActualModelVersion() const
-{
-  // Not using model in unit test mock
-  return requested_model_version_;
-}
-
-Status
-InferenceRequest::PrepareForInference()
-{
-  // Remove override inputs as those are added during any previous
-  // inference execution.
-  inputs_.clear();
-  override_inputs_.clear();
-
-  // Initially show the actual inputs to be only the original
-  // inputs. If overrides are added later they will be added to
-  // 'inputs_'.
-  for (auto& pr : original_inputs_) {
-    inputs_.emplace(std::make_pair(pr.first, std::addressof(pr.second)));
-  }
-
-  // Clear the timestamps
-  queue_start_ns_ = 0;
-#ifdef TRITON_ENABLE_STATS
-  request_start_ns_ = 0;
-#endif  // TRITON_ENABLE_STATS
-
-  return Status::Success;
-}
-
-Status
-InferenceRequest::Input::DataBuffer(
-    const size_t idx, const void** base, size_t* byte_size,
-    TRITONSERVER_MemoryType* memory_type, int64_t* memory_type_id) const
-{
-  *base = data_->BufferAt(idx, byte_size, memory_type, memory_type_id);
-
-  return Status::Success;
-}
-
-Status
-InferenceRequest::AddOriginalInput(
-    const std::string& name, const inference::DataType datatype,
-    const int64_t* shape, const uint64_t dim_count,
-    InferenceRequest::Input** input)
-{
-  const auto& pr = original_inputs_.emplace(
-      std::piecewise_construct, std::forward_as_tuple(name),
-      std::forward_as_tuple(name, datatype, shape, dim_count));
-  if (!pr.second) {
-    return Status(
-        Status::Code::INVALID_ARG,
-        "input '" + name + "' already exists in request");
-  }
-
-  if (input != nullptr) {
-    *input = std::addressof(pr.first->second);
-  }
-
-  needs_normalization_ = true;
-  return Status::Success;
-}
-
-Status
-InferenceRequest::AddOriginalInput(
-    const std::string& name, const inference::DataType datatype,
-    const std::vector<int64_t>& shape, InferenceRequest::Input** input)
-{
-  return AddOriginalInput(name, datatype, &shape[0], shape.size(), input);
-}
-
-Status
-InferenceRequest::Input::AppendData(
-    const void* base, size_t byte_size, TRITONSERVER_MemoryType memory_type,
-    int64_t memory_type_id)
-{
-  if (byte_size > 0) {
-    std::static_pointer_cast<MemoryReference>(data_)->AddBuffer(
-        static_cast<const char*>(base), byte_size, memory_type, memory_type_id);
-  }
-
-  return Status::Success;
-}
-
-//
-// InferenceResponse
-//
-
-InferenceResponse::InferenceResponse(
-    const std::shared_ptr<Model>& model, const std::string& id,
-    const ResponseAllocator* allocator, void* alloc_userp,
-    TRITONSERVER_InferenceResponseCompleteFn_t response_fn,
-    void* response_userp,
-    const std::function<
-        void(std::unique_ptr<InferenceResponse>&&, const uint32_t)>& delegator)
-    : model_(model), id_(id), allocator_(allocator), alloc_userp_(alloc_userp),
-      response_fn_(response_fn), response_userp_(response_userp),
-      response_delegator_(delegator), null_response_(false)
-{
-  // Skip allocator logic / references in unit test
-}
-
-std::ostream&
-operator<<(std::ostream& out, const InferenceResponse& response)
-{
-  out << "[0x" << std::addressof(response) << "] "
-      << "response id: " << response.Id() << std::endl;
-
-  out << "status:" << response.ResponseStatus().AsString() << std::endl;
-
-  return out;
-}
-
-InferenceResponse::Output::~Output()
-{
-  Status status = ReleaseDataBuffer();
-  if (!status.IsOk()) {
-    std::cerr << "[ERROR] failed to release buffer for output '" << name_
-              << "': " << status.AsString();
-  }
-}
-
-Status
-InferenceResponse::Output::ReleaseDataBuffer()
-{
-  if (allocated_buffer_ != nullptr) {
-    free(allocated_buffer_);
-  }
-
-  allocated_buffer_ = nullptr;
-  buffer_attributes_.SetByteSize(0);
-  buffer_attributes_.SetMemoryType(TRITONSERVER_MEMORY_CPU);
-  buffer_attributes_.SetMemoryTypeId(0);
-  allocated_userp_ = nullptr;
-
-  return Status::Success;
-}
-
-// Same as defined in infer_response.cc
-Status
-InferenceResponse::Output::DataBuffer(
-    const void** buffer, size_t* buffer_byte_size,
-    TRITONSERVER_MemoryType* memory_type, int64_t* memory_type_id,
-    void** userp) const
-{
-  *buffer = allocated_buffer_;
-  *buffer_byte_size = buffer_attributes_.ByteSize();
-  *memory_type = buffer_attributes_.MemoryType();
-  *memory_type_id = buffer_attributes_.MemoryTypeId();
-  *userp = allocated_userp_;
-  return Status::Success;
-}
-
-// Simplified version of AllocateDataBuffer for CPU memory only
-Status
-InferenceResponse::Output::AllocateDataBuffer(
-    void** buffer, size_t buffer_byte_size,
-    TRITONSERVER_MemoryType* memory_type, int64_t* memory_type_id)
-{
-  if (allocated_buffer_ != nullptr) {
-    return Status(
-        Status::Code::ALREADY_EXISTS,
-        "allocated buffer for output '" + name_ + "' already exists");
-  }
-
-  // Simplifications - CPU memory only for now
-  if (*memory_type != TRITONSERVER_MEMORY_CPU || *memory_type_id != 0) {
-    return Status(
-        Status::Code::INTERNAL, "Only standard CPU memory supported for now");
-  }
-
-  // Allocate buffer to copy to
-  *buffer = malloc(buffer_byte_size);
-  if (buffer == nullptr || *buffer == nullptr) {
-    return Status(
-        Status::Code::INTERNAL, "buffer was nullptr in AllocateDataBuffer");
-  }
-
-  // Set relevant member variables for DataBuffer() to return
-  allocated_buffer_ = *buffer;
-  buffer_attributes_.SetByteSize(buffer_byte_size);
-  buffer_attributes_.SetMemoryType(*memory_type);
-  buffer_attributes_.SetMemoryTypeId(*memory_type_id);
-  allocated_userp_ = nullptr;
-  return Status::Success;
-}
-
-Status
-InferenceResponse::AddOutput(
-    const std::string& name, const inference::DataType datatype,
-    const std::vector<int64_t>& shape, InferenceResponse::Output** output)
-{
-  outputs_.emplace_back(name, datatype, shape, allocator_, alloc_userp_);
-
-  if (output != nullptr) {
-    *output = std::addressof(outputs_.back());
-  }
-
-  return Status::Success;
-}
-
-InferenceRequest::SequenceId::SequenceId()
-    : sequence_label_(""), sequence_index_(0),
-      id_type_(InferenceRequest::SequenceId::DataType::UINT64)
-{
-}
-
-InferenceRequest::SequenceId::SequenceId(const std::string& sequence_label)
-    : sequence_label_(sequence_label), sequence_index_(0),
-      id_type_(InferenceRequest::SequenceId::DataType::STRING)
-{
-}
-
-InferenceRequest::SequenceId::SequenceId(uint64_t sequence_index)
-    : sequence_label_(""), sequence_index_(sequence_index),
-      id_type_(InferenceRequest::SequenceId::DataType::UINT64)
-{
-}
-
-}}  // namespace triton::core
-
-
-namespace {
-
-// Helpers
-void
-check_status(tc::Status status)
-{
-  ASSERT_TRUE(status.IsOk()) << "ERROR: " << status.Message();
-}
-
-void
-cache_stats(std::unique_ptr<tc::RequestResponseCache>& cache)
-{
-  std::cout << "Cache entries: " << cache->NumEntries() << std::endl;
-  std::cout << "Cache evictions: " << cache->NumEvictions() << std::endl;
-  std::cout << "Cache free bytes: " << cache->FreeBytes() << std::endl;
-  std::cout << "Cache alloc'd bytes: " << cache->AllocatedBytes() << std::endl;
-  std::cout << "Cache total bytes: " << cache->TotalBytes() << std::endl;
-}
-
-void
-reset_response(
-    std::unique_ptr<tc::InferenceResponse>* response,
-    tc::InferenceRequest* request)
-{
-  check_status(request->ResponseFactory()->CreateResponse(response));
-}
-
-// Only support 1-Dimensional data to keep it simple
-struct Tensor {
-  std::string name;
-  std::vector<int> data;
-};
-
-// Only support 1-Dimensional data to keep it simple
-std::unique_ptr<tc::InferenceResponse>
-GenerateResponse(
-    const tc::InferenceRequest* request, inference::DataType dtype,
-    TRITONSERVER_MemoryType memory_type, int64_t memory_type_id,
-    const std::vector<Tensor>& outputs)
-{
-  std::cout << "Create response object" << std::endl;
-  std::unique_ptr<tc::InferenceResponse> response;
-  check_status(request->ResponseFactory()->CreateResponse(&response));
-
-  std::cout << "Add output metadata to response object" << std::endl;
-  for (const auto& tensor : outputs) {
-    if (tensor.data.size() == 0) {
-      std::cout << "[ERROR] Can't generate a request with no output data"
-                << std::endl;
-      return nullptr;
-    }
-
-    tc::InferenceResponse::Output* response_output = nullptr;
-    std::vector<int64_t> shape{1, -1};
-    shape[1] = tensor.data.size();
-    uint64_t output_size = sizeof(tensor.data[0]) * tensor.data.size();
-    std::cout << "Output size bytes: " << output_size << std::endl;
-    check_status(
-        response->AddOutput(tensor.name, dtype, shape, &response_output));
-
-    std::cout << "Allocate output data buffer for response object" << std::endl;
-    void* buffer;
-    check_status(response_output->AllocateDataBuffer(
-        &buffer, output_size, &memory_type, &memory_type_id));
-    if (buffer == nullptr) {
-      std::cout << "[ERROR] buffer was nullptr;" << std::endl;
-      return nullptr;
-    }
-    // Copy data from output to response buffer
-    std::memcpy(buffer, tensor.data.data(), output_size);
-  }
-
-  return response;
-}
-
-// Only support 1-Dimensional data to keep it simple
-tc::InferenceRequest*
-GenerateRequest(
-    tc::Model* model, uint64_t model_version, inference::DataType dtype,
-    TRITONSERVER_MemoryType memory_type, int64_t memory_type_id,
-    const std::vector<Tensor>& inputs, const std::string& request_id)
-{
-  auto request = new tc::InferenceRequest(model, model_version);
-  for (const auto& tensor : inputs) {
-    if (tensor.data.size() == 0) {
-      std::cout << "[ERROR] Can't generate a request with no input data"
-                << std::endl;
-      return nullptr;
-    }
-
-    tc::InferenceRequest::Input* request_input = nullptr;
-    std::vector<int64_t> shape{1, -1};
-    shape[1] = tensor.data.size();
-    request->AddOriginalInput(tensor.name, dtype, shape, &request_input);
-    if (request_input == nullptr) {
-      std::cout << "[ERROR] request_input was nullptr" << std::endl;
-      return nullptr;
-    }
-
-    uint64_t input_size = sizeof(tensor.data[0]) * tensor.data.size();
-    request_input->AppendData(
-        tensor.data.data(), input_size, memory_type, memory_type_id);
-  }
-  // PrepareForInference for use of ImmutableInputs()
-  check_status(request->PrepareForInference());
-  request->SetId(request_id);  // for debugging purposes
-  return request;
-}
-
-// Test Fixture
-class RequestResponseCacheTest : public ::testing::Test {
- protected:
-  void SetUp() override
-  {
-    // Sample input data
-    data0 = {1, 2, 3, 4};
-    data1 = {5, 6, 7, 8};
-
-    // Sample input vectors
-    inputs0 = std::vector<Tensor>{{"input", data0}};
-    inputs1 = std::vector<Tensor>{{"input", data1}};
-    inputs2 = std::vector<Tensor>{{"input", data1}};
-    inputs3 = std::vector<Tensor>{{"input0", data0}, {"input1", data1}};
-    inputs4 = std::vector<Tensor>{{"input1", data1}, {"input0", data0}};
-
-    // Create three requests with same input name, two with same data, one with
-    // different data
-    request0 = GenerateRequest(
-        model, model_version, dtype, memory_type, memory_type_id, inputs0,
-        "request0");
-    request1 = GenerateRequest(
-        model, model_version, dtype, memory_type, memory_type_id, inputs1,
-        "request1");
-    request2 = GenerateRequest(
-        model, model_version, dtype, memory_type, memory_type_id, inputs2,
-        "request2");
-    // Create two requests with the same two inputs but inserted in different
-    // order
-    request3 = GenerateRequest(
-        model, model_version, dtype, memory_type, memory_type_id, inputs3,
-        "request3");
-    request4 = GenerateRequest(
-        model, model_version, dtype, memory_type, memory_type_id, inputs4,
-        "request4");
-    // Verify requests were created correctly
-    ASSERT_NE(request0, nullptr);
-    ASSERT_NE(request1, nullptr);
-    ASSERT_NE(request2, nullptr);
-    ASSERT_NE(request3, nullptr);
-    ASSERT_NE(request4, nullptr);
-
-    // Generate a set of unique requests to use for parallelism tests
-    for (size_t idx = 0; idx < thread_count; idx++) {
-      std::vector<int> data(thread_count, static_cast<int>(idx));
-      std::vector<Tensor> inputs{Tensor{"input" + std::to_string(idx), data}};
-
-      std::string request_id = "unique" + std::to_string(idx);
-      std::cout << "Generating request: " << request_id << std::endl;
-      auto request = GenerateRequest(
-          model, model_version, dtype, memory_type, memory_type_id, inputs,
-          request_id);
-      ASSERT_NE(request, nullptr);
-      unique_requests.emplace_back(request);
-    }
-    ASSERT_EQ(unique_requests.size(), thread_count);
-
-    // Sample outputs
-    Tensor output_tensor0 = {"output", data0};
-    output0_size = sizeof(int) * data0.size();
-    outputs0 = std::vector<Tensor>{output_tensor0};
-    // Response of 100 ints, taking ~400 bytes at a time
-    data100 = std::vector<int>(100, 0);
-    Tensor output_tensor100 = {"output", data100};
-    outputs100 = std::vector<Tensor>{output_tensor100};
-
-    // Sample responses
-    response0 = GenerateResponse(
-        request0, dtype, memory_type, memory_type_id, outputs0);
-    ASSERT_NE(response0, nullptr);
-    response_400bytes = GenerateResponse(
-        request0, dtype, memory_type, memory_type_id, outputs100);
-    ASSERT_NE(response_400bytes, nullptr);
-  }
-
-  void TearDown() override
-  {
-    delete request0;
-    delete request1;
-    delete request2;
-    delete request3;
-    delete request4;
-    for (auto r : unique_requests) {
-      delete r;
-    }
-  }
-
- public:
-  tc::Model* model = nullptr;
-  uint64_t model_version = 1;
-  inference::DataType dtype = inference::DataType::TYPE_INT32;
-  TRITONSERVER_MemoryType memory_type = TRITONSERVER_MEMORY_CPU;
-  int64_t memory_type_id = 0;
-  size_t thread_count = 10;
-  uint64_t output0_size;
-
-  std::vector<int> data0, data1, data100;
-  std::vector<Tensor> inputs0, inputs1, inputs2, inputs3, inputs4, inputs100;
-  std::vector<Tensor> outputs0, outputs100;
-  tc::InferenceRequest *request0, *request1, *request2, *request3, *request4;
-  std::vector<tc::InferenceRequest*> unique_requests;
-  std::unique_ptr<tc::InferenceResponse> response0, response_400bytes;
-};
-
-// Test hashing for consistency on same request
-TEST_F(RequestResponseCacheTest, TestHashing)
-{
-  // Create cache
-  std::cout << "Create cache" << std::endl;
-  uint64_t cache_size = 4 * 1024 * 1024;
-  std::unique_ptr<tc::RequestResponseCache> cache;
-  tc::RequestResponseCache::Create(cache_size, &cache);
-
-  // Compare hashes
-  std::cout << "Compare hashes" << std::endl;
-  check_status(cache->HashAndSet(request0));
-  check_status(cache->HashAndSet(request1));
-  check_status(cache->HashAndSet(request2));
-  check_status(cache->HashAndSet(request3));
-  check_status(cache->HashAndSet(request4));
-
-  std::cout << "request0->CacheKey(): " << request0->CacheKey() << std::endl;
-  std::cout << "request1->CacheKey(): " << request1->CacheKey() << std::endl;
-  std::cout << "request2->CacheKey(): " << request2->CacheKey() << std::endl;
-  std::cout << "request3->CacheKey(): " << request3->CacheKey() << std::endl;
-  std::cout << "request4->CacheKey(): " << request4->CacheKey() << std::endl;
-  // Different input data should have different hashes
-  ASSERT_NE(request0->CacheKey(), request1->CacheKey());
-  // Same input data should have same hashes
-  ASSERT_EQ(request1->CacheKey(), request2->CacheKey());
-  // Two requests with same two inputs but added in different orders
-  ASSERT_EQ(request3->CacheKey(), request4->CacheKey());
-}
-
-
-// Test cache size too large to initialize.
-TEST_F(RequestResponseCacheTest, TestCacheSizeTooLarge)
-{
-  // Pick intentionally large cache size, expecting failure
-  constexpr uint64_t cache_size = ULLONG_MAX;
-  std::cout << "Create cache of size: " << cache_size << std::endl;
-  std::unique_ptr<tc::RequestResponseCache> cache;
-  auto status = tc::RequestResponseCache::Create(cache_size, &cache);
-  ASSERT_FALSE(status.IsOk()) << "Creating cache of size " << cache_size
-                              << " succeeded when it should fail.";
-}
-
-// Test cache size too small to initialize.
-// See following boost code for reference:
-// -
-// https://github.com/boostorg/interprocess/blob/41018201d6b7a34f38a0303a1ad591d978989cb8/include/boost/interprocess/managed_external_buffer.hpp#L75-L77
-// -
-// https://github.com/boostorg/interprocess/blob/41018201d6b7a34f38a0303a1ad591d978989cb8/include/boost/interprocess/detail/managed_memory_impl.hpp#L172-L174
-TEST_F(RequestResponseCacheTest, TestCacheSizeTooSmall)
-{
-  // Pick intentionally small cache size, expecting failure
-  constexpr uint64_t cache_size = 1;
-  std::cout << "Create cache of size: " << cache_size << std::endl;
-  std::unique_ptr<tc::RequestResponseCache> cache;
-  auto status = tc::RequestResponseCache::Create(cache_size, &cache);
-  ASSERT_FALSE(status.IsOk()) << "Creating cache of size " << cache_size
-                              << " succeeded when it should fail.";
-}
-
-// Test cache too small for entry
-TEST_F(RequestResponseCacheTest, TestCacheSizeSmallerThanEntry)
-{
-  // Create cache
-  constexpr uint64_t cache_size = 1024;
-  std::cout << "Create cache of size: " << cache_size << std::endl;
-  std::unique_ptr<tc::RequestResponseCache> cache;
-  tc::RequestResponseCache::Create(cache_size, &cache);
-
-  // Set output data to be larger than cache size
-  // NOTE: This is not 1 byte larger than cache_size, the cache_size + 1 is to
-  // be clear it will always be larger than cache even if the dtype is changed.
-  std::vector<int> large_data(cache_size + 1, 0);
-  std::cout << "Create large_response (larger than cache) of size: "
-            << large_data.size() << std::endl;
-  std::vector<Tensor> large_outputs{Tensor{"output", large_data}};
-  auto large_response = GenerateResponse(
-      request0, dtype, memory_type, memory_type_id, large_outputs);
-
-  std::cout << "Insert large_response into cache" << std::endl;
-  auto status = cache->Insert(*large_response, request0);
-  // We expect insertion to fail here since cache is too small
-  std::cout << status.Message() << std::endl;
-  ASSERT_FALSE(status.IsOk())
-      << "Inserting item larger than cache succeeded when it should fail";
-}
-
-// Test hashing for consistency on same request
-TEST_F(RequestResponseCacheTest, TestEviction)
-{
-  // Create cache
-  std::cout << "Create cache" << std::endl;
-  uint64_t cache_size = 1024;
-  std::unique_ptr<tc::RequestResponseCache> cache;
-  tc::RequestResponseCache::Create(cache_size, &cache);
-  cache_stats(cache);
-
-  std::cout << "Lookup unique_requests[0] in empty cache" << std::endl;
-  auto status = cache->Lookup(nullptr, unique_requests[0]);
-  // This hash not in cache yet
-  ASSERT_FALSE(status.IsOk())
-      << "hash [" + std::to_string(unique_requests[0]->CacheKey()) +
-             "] should not be in cache";
-  std::cout << "Insert response into cache" << std::endl;
-  check_status(cache->Insert(*response_400bytes, unique_requests[0]));
-  cache_stats(cache);
-  ASSERT_EQ(cache->NumEntries(), 1u);
-  ASSERT_EQ(cache->NumEvictions(), 0u);
-
-  check_status(cache->Insert(*response_400bytes, unique_requests[1]));
-  cache_stats(cache);
-  ASSERT_EQ(cache->NumEntries(), 2u);
-  ASSERT_EQ(cache->NumEvictions(), 0u);
-
-  check_status(cache->Insert(*response_400bytes, unique_requests[2]));
-  cache_stats(cache);
-  ASSERT_EQ(cache->NumEntries(), 2u);
-  ASSERT_EQ(cache->NumEvictions(), 1u);
-
-  check_status(cache->Insert(*response_400bytes, unique_requests[3]));
-  cache_stats(cache);
-  ASSERT_EQ(cache->NumEntries(), 2u);
-  ASSERT_EQ(cache->NumEvictions(), 2u);
-}
-
-// Test inserting into cache with multiple threads in parallel
-// and asserting that the correct number of entries and evictions
-// occurred based on cache and entry sizes
-TEST_F(RequestResponseCacheTest, TestParallelInsertion)
-{
-  // Create cache
-  std::cout << "Create cache" << std::endl;
-  uint64_t cache_size = 1024;
-  std::unique_ptr<tc::RequestResponseCache> cache;
-  tc::RequestResponseCache::Create(cache_size, &cache);
-  cache_stats(cache);
-
-  // Create threads
-  std::vector<std::thread> threads;
-  std::cout << "Insert responses into cache with [" << thread_count
-            << "] threads in parallel" << std::endl;
-  for (size_t idx = 0; idx < thread_count; idx++) {
-    threads.emplace_back(std::thread(
-        &tc::RequestResponseCache::Insert, cache.get(),
-        std::ref(*response_400bytes), unique_requests[idx]));
-  }
-
-  // Join threads
-  for (size_t idx = 0; idx < thread_count; idx++) {
-    std::cout << "Joining idx: " << idx << std::endl;
-    threads[idx].join();
-  }
-
-  // Cache size only has room for 2 entries of 100 ints, so we expect 2 entries
-  // and N-2 evictions for N threads
-  cache_stats(cache);
-  ASSERT_EQ(cache->NumEntries(), 2u) << "NumEntries: " << cache->NumEntries();
-  ASSERT_EQ(cache->NumEvictions(), (uint64_t)(thread_count - 2u))
-      << "NumEvictions: " << cache->NumEvictions();
-}
-
-// Test evicting from cache with multiple threads in parallel
-// and asserting that the correct number of entries and evictions
-// occurred
-TEST_F(RequestResponseCacheTest, TestParallelEviction)
-{
-  // Create cache
-  std::cout << "Create cache" << std::endl;
-  uint64_t cache_size = 1024;
-  std::unique_ptr<tc::RequestResponseCache> cache;
-  tc::RequestResponseCache::Create(cache_size, &cache);
-  cache_stats(cache);
-
-  // Create threads
-  std::vector<std::thread> threads;
-
-  // Insert [thread_count] entries into cache sequentially
-  for (size_t idx = 0; idx < thread_count; idx++) {
-    cache->Insert(*response0, unique_requests[idx]);
-  }
-
-  // Assert all entries were put into cache and no evictions occurred yet
-  cache_stats(cache);
-  ASSERT_EQ(cache->NumEntries(), (uint64_t)thread_count)
-      << "NumEntries: " << cache->NumEntries();
-  ASSERT_EQ(cache->NumEvictions(), 0u)
-      << "NumEvictions: " << cache->NumEvictions();
-
-  // Evict [thread_count] entries from cache in parallel
-  std::cout << "Evict from cache with [" << thread_count
-            << "] threads in parallel" << std::endl;
-  for (size_t idx = 0; idx < thread_count; idx++) {
-    threads.emplace_back(
-        std::thread(&tc::RequestResponseCache::Evict, cache.get()));
-  }
-
-  // Join threads
-  for (size_t idx = 0; idx < thread_count; idx++) {
-    threads[idx].join();
-  }
-
-  // Assert all entries were evicted from cache and exactly [thread_count]
-  // evictions occurred
-  cache_stats(cache);
-  ASSERT_EQ(cache->NumEntries(), 0u) << "NumEntries: " << cache->NumEntries();
-  ASSERT_EQ(cache->NumEvictions(), (uint64_t)thread_count)
-      << "NumEvictions: " << cache->NumEvictions();
-}
-
-// Test LRU ordering of cache
-TEST_F(RequestResponseCacheTest, TestLRU)
-{
-  // Create cache
-  std::cout << "Create cache" << std::endl;
-  uint64_t cache_size = 1024;
-  std::unique_ptr<tc::RequestResponseCache> cache;
-  tc::RequestResponseCache::Create(cache_size, &cache);
-  cache_stats(cache);
-
-  // Insert 3 items into cache: 0, 1, 2
-  check_status(cache->Insert(*response0, unique_requests[0]));
-  check_status(cache->Insert(*response0, unique_requests[1]));
-  check_status(cache->Insert(*response0, unique_requests[2]));
-
-  // Verify items 0, 1, 2, in cache
-  reset_response(&response0, unique_requests[0]);
-  check_status(cache->Lookup(response0.get(), unique_requests[0]));
-  reset_response(&response0, unique_requests[1]);
-  check_status(cache->Lookup(response0.get(), unique_requests[1]));
-  reset_response(&response0, unique_requests[2]);
-  check_status(cache->Lookup(response0.get(), unique_requests[2]));
-
-  // Evict item from cache, should be item 0 since it was looked up last
-  cache->Evict();
-  // Assert Lookup for item 0 fails but items 1, 2 succeed
-  tc::Status status;
-  reset_response(&response0, unique_requests[0]);
-  status = cache->Lookup(response0.get(), unique_requests[0]);
-  ASSERT_FALSE(status.IsOk());
-  reset_response(&response0, unique_requests[1]);
-  check_status(cache->Lookup(response0.get(), unique_requests[1]));
-  reset_response(&response0, unique_requests[2]);
-  check_status(cache->Lookup(response0.get(), unique_requests[2]));
-
-  // Insert item 3, 4
-  check_status(cache->Insert(*response0, unique_requests[3]));
-  check_status(cache->Insert(*response0, unique_requests[4]));
-
-  // Evict twice, assert items 1 and 2 were evicted
-  cache->Evict();
-  cache->Evict();
-  reset_response(&response0, unique_requests[1]);
-  status = cache->Lookup(response0.get(), unique_requests[1]);
-  ASSERT_FALSE(status.IsOk());
-  reset_response(&response0, unique_requests[2]);
-  status = cache->Lookup(response0.get(), unique_requests[2]);
-  ASSERT_FALSE(status.IsOk());
-
-  // Lookup items 3 and 4
-  reset_response(&response0, unique_requests[3]);
-  check_status(cache->Lookup(response0.get(), unique_requests[3]));
-  reset_response(&response0, unique_requests[4]);
-  check_status(cache->Lookup(response0.get(), unique_requests[4]));
-
-  // Evict, assert item 3 was evicted
-  cache->Evict();
-  reset_response(&response0, unique_requests[3]);
-  status = cache->Lookup(response0.get(), unique_requests[3]);
-  ASSERT_FALSE(status.IsOk());
-  reset_response(&response0, unique_requests[4]);
-  check_status(cache->Lookup(response0.get(), unique_requests[4]));
-}
-
-// Test looking up from cache with multiple threads in parallel
-// and asserting the responses were populated correctly
-TEST_F(RequestResponseCacheTest, TestParallelLookup)
-{
-  // Create cache
-  std::cout << "Create cache" << std::endl;
-  uint64_t cache_size = 1024;
-  std::unique_ptr<tc::RequestResponseCache> cache;
-  tc::RequestResponseCache::Create(cache_size, &cache);
-  cache_stats(cache);
-
-  // Create threads
-  std::vector<std::thread> threads;
-  std::vector<std::unique_ptr<tc::InferenceResponse>> responses;
-
-  // Insert [thread_count] entries into cache sequentially
-  for (size_t idx = 0; idx < thread_count; idx++) {
-    // Create response for each thread to fill from cache
-    std::unique_ptr<tc::InferenceResponse> response;
-    check_status(
-        unique_requests[idx]->ResponseFactory()->CreateResponse(&response));
-    responses.push_back(std::move(response));
-    // Insert response for each thread
-    cache->Insert(*response0, unique_requests[idx]);
-  }
-
-  // Assert all entries were put into cache and no evictions occurred yet
-  cache_stats(cache);
-  ASSERT_EQ(cache->NumEntries(), (uint64_t)thread_count)
-      << "NumEntries: " << cache->NumEntries();
-  ASSERT_EQ(cache->NumEvictions(), 0u)
-      << "NumEvictions: " << cache->NumEvictions();
-
-  // Lookup [thread_count] entries from cache in parallel
-  std::cout << "Lookup from cache with [" << thread_count
-            << "] threads in parallel" << std::endl;
-  for (size_t idx = 0; idx < thread_count; idx++) {
-    threads.emplace_back(std::thread(
-        &tc::RequestResponseCache::Lookup, cache.get(), responses[idx].get(),
-        unique_requests[idx]));
-  }
-
-  // Join threads
-  for (size_t idx = 0; idx < thread_count; idx++) {
-    threads[idx].join();
-  }
-
-  // Grab output from sample response for comparison
-  const auto& response0_output = response0->Outputs()[0];
-
-  // Verify output results from cache
-  for (size_t idx = 0; idx < thread_count; idx++) {
-    // Fetch output buffer details
-    const void* response_buffer = nullptr;
-    size_t response_byte_size = 0;
-    TRITONSERVER_MemoryType response_memory_type;
-    int64_t response_memory_type_id;
-    void* userp;
-
-    // TODO: Handle multiple outputs more generically
-    const auto& response_test = responses[idx];
-    for (const auto& response_test_output : response_test->Outputs()) {
-      ASSERT_EQ(response_test_output.Name(), response0_output.Name());
-      ASSERT_EQ(response_test_output.DType(), response0_output.DType());
-      ASSERT_EQ(response_test_output.Shape(), response0_output.Shape());
-      check_status(response_test_output.DataBuffer(
-          &response_buffer, &response_byte_size, &response_memory_type,
-          &response_memory_type_id, &userp));
-
-      // TODO: Use Triton DType to cast buffer and compare outputs generically
-      int* cache_output = (int*)response_buffer;
-      std::cout << "Check output buffer data from cache entry for thread ["
-                << idx << "]:" << std::endl;
-      for (size_t i = 0; i < response_byte_size / sizeof(int); i++) {
-        std::cout << cache_output[i] << " == " << data0[i] << std::endl;
-        ASSERT_EQ(cache_output[i], data0[i]);
-      }
-    }
-  }
-}
-
-// Test end-to-end flow of cache
-TEST_F(RequestResponseCacheTest, TestEndToEnd)
-{
-  // Create cache
-  std::cout << "Create cache" << std::endl;
-  uint64_t cache_size = 256;
-  std::unique_ptr<tc::RequestResponseCache> cache;
-  tc::RequestResponseCache::Create(cache_size, &cache);
-  cache_stats(cache);
-
-  std::cout << "Lookup request0 in empty cache" << std::endl;
-  auto status = cache->Lookup(nullptr, request0);
-  // This hash not in cache yet
-  ASSERT_FALSE(status.IsOk()) << "hash [" +
-                                     std::to_string(request0->CacheKey()) +
-                                     "] should not be in cache";
-  std::cout << "Insert response into cache with request0" << std::endl;
-  // Insertion should succeed
-  check_status(cache->Insert(*response0, request0));
-  cache_stats(cache);
-
-  // Check cache stats
-  auto total_lookup_latency = cache->TotalLookupLatencyNs();
-  auto total_insertion_latency = cache->TotalInsertionLatencyNs();
-  std::cout << "Total lookup latency: " << total_lookup_latency << std::endl;
-  std::cout << "Total insertion latency: " << total_insertion_latency
-            << std::endl;
-  ASSERT_TRUE(total_lookup_latency > 0)
-      << "ERROR: Total lookup latency should be non-zero";
-  ASSERT_TRUE(total_insertion_latency > 0)
-      << "ERROR: Total insertion latency should be non-zero";
-
-  // Duplicate insertion should fail since request0 already exists in cache
-  status = cache->Insert(*response0, request0);
-  ASSERT_FALSE(status.IsOk())
-      << "Inserting duplicate item in cache should fail";
-
-  // Create response to test cache lookup
-  std::cout << "Create response object into fill from cache" << std::endl;
-  std::unique_ptr<tc::InferenceResponse> response_test;
-  check_status(request0->ResponseFactory()->CreateResponse(&response_test));
-
-  // Lookup should now succeed
-  std::cout << "Lookup request0 in cache after insertion" << std::endl;
-  check_status(cache->Lookup(response_test.get(), request0));
-
-  // Check cache stats again
-  auto total_lookup_latency2 = cache->TotalLookupLatencyNs();
-  auto total_insertion_latency2 = cache->TotalInsertionLatencyNs();
-  std::cout << "Total lookup latency2: " << total_lookup_latency2 << std::endl;
-  std::cout << "Total insertion latency2: " << total_insertion_latency2
-            << std::endl;
-  ASSERT_TRUE(total_lookup_latency2 > total_lookup_latency)
-      << "ERROR: Total lookup latency should increase";
-  ASSERT_TRUE(total_insertion_latency2 > total_insertion_latency)
-      << "ERROR: Total insertion latency should increase";
-
-  // Grab output from sample response for comparison
-  const auto& response0_output = response0->Outputs()[0];
-
-  // Fetch output buffer details
-  const void* response_buffer = nullptr;
-  size_t response_byte_size = 0;
-  TRITONSERVER_MemoryType response_memory_type;
-  int64_t response_memory_type_id;
-  void* userp;
-  // TODO: How to handle different memory types? GPU vs CPU vs Pinned, etc.
-  // TODO: Handle multiple outputs more generically
-  for (const auto& response_test_output : response_test->Outputs()) {
-    ASSERT_EQ(response_test_output.Name(), response0_output.Name());
-    ASSERT_EQ(response_test_output.DType(), response0_output.DType());
-    ASSERT_EQ(response_test_output.Shape(), response0_output.Shape());
-    check_status(response_test_output.DataBuffer(
-        &response_buffer, &response_byte_size, &response_memory_type,
-        &response_memory_type_id, &userp));
-  }
-
-  // TODO: Use Triton DType to cast buffer and compare outputs generically
-  int* cache_output = (int*)response_buffer;
-  std::cout << "Check output buffer data from cache entry:" << std::endl;
-  for (size_t i = 0; i < response_byte_size / sizeof(int); i++) {
-    std::cout << cache_output[i] << " == " << outputs0[0].data[i] << std::endl;
-    ASSERT_EQ(cache_output[i], outputs0[0].data[i]);
-  }
-
-  // Simple Evict() test
-  ASSERT_EQ(cache->NumEntries(), 1u);
-  ASSERT_EQ(cache->NumEvictions(), 0u);
-  cache->Evict();
-  ASSERT_EQ(cache->NumEntries(), 0u);
-  ASSERT_EQ(cache->NumEvictions(), 1u);
-  std::cout << "Done!" << std::endl;
-}
-
-}  // namespace
-
-int
-main(int argc, char** argv)
-{
-#ifdef TRITON_ENABLE_LOGGING
-  LOG_SET_VERBOSE(1);
-#endif  // TRITON_ENABLE_LOGGING
-
-  ::testing::InitGoogleTest(&argc, argv);
-  return RUN_ALL_TESTS();
-}
diff --git a/3rdparty/core-r22.12/src/tritonserver.cc b/3rdparty/core-r22.12/src/tritonserver.cc
deleted file mode 100644
index bdce0346525815f40233482f881e0ba9294d9e3e..0000000000000000000000000000000000000000
--- a/3rdparty/core-r22.12/src/tritonserver.cc
+++ /dev/null
@@ -1,3066 +0,0 @@
-// Copyright 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include <string>
-#include <vector>
-#include "buffer_attributes.h"
-#include "cuda_utils.h"
-#include "infer_parameter.h"
-#include "infer_request.h"
-#include "infer_response.h"
-#include "infer_stats.h"
-#include "metric_family.h"
-#include "metrics.h"
-#include "model.h"
-#include "model_config_utils.h"
-#include "model_repository_manager.h"
-#include "rate_limiter.h"
-#include "response_allocator.h"
-#include "server.h"
-#include "server_message.h"
-#include "status.h"
-#include "triton/common/logging.h"
-#include "triton/common/model_config.h"
-#include "triton/common/nvtx.h"
-#include "triton/common/table_printer.h"
-#include "triton/common/triton_json.h"
-#include "tritonserver_apis.h"
-
-// For unknown reason, windows will not export some functions declared
-// with dllexport in tritonrepoagent.h and tritonbackend.h. To get
-// those functions exported it is (also?) necessary to mark the
-// definitions in this file with dllexport as well. The TRITONSERVER_*
-// functions are getting exported but for consistency adding the
-// declspec to these definitions as well.
-#if defined(_MSC_VER)
-#define TRITONAPI_DECLSPEC __declspec(dllexport)
-#elif defined(__GNUC__)
-#define TRITONAPI_DECLSPEC __attribute__((__visibility__("default")))
-#else
-#define TRITONAPI_DECLSPEC
-#endif
-
-namespace tc = triton::core;
-
-namespace {
-
-std::string
-ResourceString(const std::string& name, const int count, const int device_id)
-{
-  return std::string(
-      "{\"name\":\"" + name + "\", \"count\":" + std::to_string(count) +
-      " \"device\":" + std::to_string(device_id) + "}");
-}
-
-std::string
-RateLimitModeToString(const tc::RateLimitMode rate_limit_mode)
-{
-  std::string rl_mode_str("<unknown>");
-  switch (rate_limit_mode) {
-    case tc::RateLimitMode::RL_EXEC_COUNT: {
-      rl_mode_str = "EXEC_COUNT";
-      break;
-    }
-    case tc::RateLimitMode::RL_OFF: {
-      rl_mode_str = "OFF";
-      break;
-    }
-  }
-  return rl_mode_str;
-}
-
-//
-// TritonServerError
-//
-// Implementation for TRITONSERVER_Error.
-//
-class TritonServerError {
- public:
-  static TRITONSERVER_Error* Create(
-      TRITONSERVER_Error_Code code, const char* msg);
-  static TRITONSERVER_Error* Create(
-      TRITONSERVER_Error_Code code, const std::string& msg);
-  static TRITONSERVER_Error* Create(const tc::Status& status);
-
-  TRITONSERVER_Error_Code Code() const { return code_; }
-  const std::string& Message() const { return msg_; }
-
- private:
-  TritonServerError(TRITONSERVER_Error_Code code, const std::string& msg)
-      : code_(code), msg_(msg)
-  {
-  }
-  TritonServerError(TRITONSERVER_Error_Code code, const char* msg)
-      : code_(code), msg_(msg)
-  {
-  }
-
-  TRITONSERVER_Error_Code code_;
-  const std::string msg_;
-};
-
-TRITONSERVER_Error*
-TritonServerError::Create(TRITONSERVER_Error_Code code, const char* msg)
-{
-  return reinterpret_cast<TRITONSERVER_Error*>(
-      new TritonServerError(code, msg));
-}
-
-TRITONSERVER_Error*
-TritonServerError::Create(TRITONSERVER_Error_Code code, const std::string& msg)
-{
-  return reinterpret_cast<TRITONSERVER_Error*>(
-      new TritonServerError(code, msg));
-}
-
-TRITONSERVER_Error*
-TritonServerError::Create(const tc::Status& status)
-{
-  // If 'status' is success then return nullptr as that indicates
-  // success
-  if (status.IsOk()) {
-    return nullptr;
-  }
-
-  return Create(
-      tc::StatusCodeToTritonCode(status.StatusCode()), status.Message());
-}
-
-#define RETURN_IF_STATUS_ERROR(S)                 \
-  do {                                            \
-    const tc::Status& status__ = (S);             \
-    if (!status__.IsOk()) {                       \
-      return TritonServerError::Create(status__); \
-    }                                             \
-  } while (false)
-
-//
-// TritonServerMetrics
-//
-// Implementation for TRITONSERVER_Metrics.
-//
-class TritonServerMetrics {
- public:
-  TritonServerMetrics() = default;
-  TRITONSERVER_Error* Serialize(const char** base, size_t* byte_size);
-
- private:
-  std::string serialized_;
-};
-
-TRITONSERVER_Error*
-TritonServerMetrics::Serialize(const char** base, size_t* byte_size)
-{
-#ifdef TRITON_ENABLE_METRICS
-  serialized_ = tc::Metrics::SerializedMetrics();
-  *base = serialized_.c_str();
-  *byte_size = serialized_.size();
-  return nullptr;  // Success
-#else
-  *base = nullptr;
-  *byte_size = 0;
-  return TRITONSERVER_ErrorNew(
-      TRITONSERVER_ERROR_UNSUPPORTED, "metrics not supported");
-#endif  // TRITON_ENABLE_METRICS
-}
-
-//
-// TritonServerOptions
-//
-// Implementation for TRITONSERVER_ServerOptions.
-//
-class TritonServerOptions {
- public:
-  TritonServerOptions();
-
-  const std::string& ServerId() const { return server_id_; }
-  void SetServerId(const char* id) { server_id_ = id; }
-
-  const std::set<std::string>& ModelRepositoryPaths() const
-  {
-    return repo_paths_;
-  }
-  void SetModelRepositoryPath(const char* p) { repo_paths_.insert(p); }
-
-  tc::ModelControlMode ModelControlMode() const { return model_control_mode_; }
-  void SetModelControlMode(tc::ModelControlMode m) { model_control_mode_ = m; }
-
-  const std::set<std::string>& StartupModels() const { return models_; }
-  void SetStartupModel(const char* m) { models_.insert(m); }
-
-  bool ExitOnError() const { return exit_on_error_; }
-  void SetExitOnError(bool b) { exit_on_error_ = b; }
-
-  bool StrictModelConfig() const { return strict_model_config_; }
-  void SetStrictModelConfig(bool b) { strict_model_config_ = b; }
-
-  tc::RateLimitMode RateLimiterMode() const { return rate_limit_mode_; }
-  void SetRateLimiterMode(tc::RateLimitMode m) { rate_limit_mode_ = m; }
-
-  TRITONSERVER_Error* AddRateLimiterResource(
-      const std::string& resource, const size_t count, const int device);
-
-  // The resource map is the map from device id to the map of
-  // of resources with their respective counts for that device.
-  const tc::RateLimiter::ResourceMap& RateLimiterResources() const
-  {
-    return rate_limit_resource_map_;
-  }
-
-  uint64_t PinnedMemoryPoolByteSize() const { return pinned_memory_pool_size_; }
-  void SetPinnedMemoryPoolByteSize(uint64_t s) { pinned_memory_pool_size_ = s; }
-
-  uint64_t ResponseCacheByteSize() const { return response_cache_byte_size_; }
-  void SetResponseCacheByteSize(uint64_t s) { response_cache_byte_size_ = s; }
-
-  const std::map<int, uint64_t>& CudaMemoryPoolByteSize() const
-  {
-    return cuda_memory_pool_size_;
-  }
-  void SetCudaMemoryPoolByteSize(int id, uint64_t s)
-  {
-    cuda_memory_pool_size_[id] = s;
-  }
-
-  double MinSupportedComputeCapability() const
-  {
-    return min_compute_capability_;
-  }
-  void SetMinSupportedComputeCapability(double c)
-  {
-    min_compute_capability_ = c;
-  }
-
-  bool StrictReadiness() const { return strict_readiness_; }
-  void SetStrictReadiness(bool b) { strict_readiness_ = b; }
-
-  unsigned int ExitTimeout() const { return exit_timeout_; }
-  void SetExitTimeout(unsigned int t) { exit_timeout_ = t; }
-
-  unsigned int BufferManagerThreadCount() const
-  {
-    return buffer_manager_thread_count_;
-  }
-  void SetBufferManagerThreadCount(unsigned int c)
-  {
-    buffer_manager_thread_count_ = c;
-  }
-
-  unsigned int ModelLoadThreadCount() const { return model_load_thread_count_; }
-  void SetModelLoadThreadCount(unsigned int c) { model_load_thread_count_ = c; }
-
-  bool Metrics() const { return metrics_; }
-  void SetMetrics(bool b) { metrics_ = b; }
-
-  bool GpuMetrics() const { return gpu_metrics_; }
-  void SetGpuMetrics(bool b) { gpu_metrics_ = b; }
-
-  bool CpuMetrics() const { return cpu_metrics_; }
-  void SetCpuMetrics(bool b) { cpu_metrics_ = b; }
-
-  uint64_t MetricsInterval() const { return metrics_interval_; }
-  void SetMetricsInterval(uint64_t m) { metrics_interval_ = m; }
-
-  const std::string& BackendDir() const { return backend_dir_; }
-  void SetBackendDir(const std::string& bd) { backend_dir_ = bd; }
-
-  const std::string& RepoAgentDir() const { return repoagent_dir_; }
-  void SetRepoAgentDir(const std::string& rad) { repoagent_dir_ = rad; }
-
-  // The backend config map is a map from backend name to the
-  // setting=value pairs for that backend. The empty backend name ("")
-  // is used to communicate configuration information that is used
-  // internally.
-  const triton::common::BackendCmdlineConfigMap& BackendCmdlineConfigMap() const
-  {
-    return backend_cmdline_config_map_;
-  }
-  TRITONSERVER_Error* AddBackendConfig(
-      const std::string& backend_name, const std::string& setting,
-      const std::string& value);
-
-  TRITONSERVER_Error* SetHostPolicy(
-      const std::string& policy_name, const std::string& setting,
-      const std::string& value);
-  const triton::common::HostPolicyCmdlineConfigMap& HostPolicyCmdlineConfigMap()
-      const
-  {
-    return host_policy_map_;
-  }
-
- private:
-  std::string server_id_;
-  std::set<std::string> repo_paths_;
-  tc::ModelControlMode model_control_mode_;
-  std::set<std::string> models_;
-  bool exit_on_error_;
-  bool strict_model_config_;
-  bool strict_readiness_;
-  tc::RateLimitMode rate_limit_mode_;
-  tc::RateLimiter::ResourceMap rate_limit_resource_map_;
-  bool metrics_;
-  bool gpu_metrics_;
-  bool cpu_metrics_;
-  uint64_t metrics_interval_;
-  unsigned int exit_timeout_;
-  uint64_t pinned_memory_pool_size_;
-  uint64_t response_cache_byte_size_;
-  unsigned int buffer_manager_thread_count_;
-  unsigned int model_load_thread_count_;
-  std::map<int, uint64_t> cuda_memory_pool_size_;
-  double min_compute_capability_;
-  std::string backend_dir_;
-  std::string repoagent_dir_;
-  triton::common::BackendCmdlineConfigMap backend_cmdline_config_map_;
-  triton::common::HostPolicyCmdlineConfigMap host_policy_map_;
-};
-
-TritonServerOptions::TritonServerOptions()
-    : server_id_("triton"),
-      model_control_mode_(tc::ModelControlMode::MODE_POLL),
-      exit_on_error_(true), strict_model_config_(true), strict_readiness_(true),
-      rate_limit_mode_(tc::RateLimitMode::RL_OFF), metrics_(true),
-      gpu_metrics_(true), cpu_metrics_(true), metrics_interval_(2000),
-      exit_timeout_(30), pinned_memory_pool_size_(1 << 28),
-      response_cache_byte_size_(0), buffer_manager_thread_count_(0),
-      model_load_thread_count_(
-          std::max(2u, 2 * std::thread::hardware_concurrency())),
-#ifdef TRITON_ENABLE_GPU
-      min_compute_capability_(TRITON_MIN_COMPUTE_CAPABILITY),
-#else
-      min_compute_capability_(0),
-#endif  // TRITON_ENABLE_GPU
-      backend_dir_("/opt/tritonserver/backends"),
-      repoagent_dir_("/opt/tritonserver/repoagents")
-{
-#ifndef TRITON_ENABLE_METRICS
-  metrics_ = false;
-  gpu_metrics_ = false;
-  cpu_metrics_ = false;
-#endif  // TRITON_ENABLE_METRICS
-
-#ifndef TRITON_ENABLE_METRICS_GPU
-  gpu_metrics_ = false;
-#endif  // TRITON_ENABLE_METRICS_GPU
-
-#ifndef TRITON_ENABLE_METRICS_CPU
-  cpu_metrics_ = false;
-#endif  // TRITON_ENABLE_METRICS_CPU
-}
-
-TRITONSERVER_Error*
-TritonServerOptions::AddRateLimiterResource(
-    const std::string& name, const size_t count, const int device)
-{
-  auto ditr = rate_limit_resource_map_.find(device);
-  if (ditr == rate_limit_resource_map_.end()) {
-    ditr = rate_limit_resource_map_
-               .emplace(device, std::map<std::string, size_t>())
-               .first;
-  }
-  auto ritr = ditr->second.find(name);
-  if (ritr == ditr->second.end()) {
-    ditr->second.emplace(name, count).first;
-  } else {
-    // If already present then store the minimum of the two.
-    if (ritr->second > count) {
-      ritr->second = count;
-    }
-  }
-
-  return nullptr;  // success
-}
-
-TRITONSERVER_Error*
-TritonServerOptions::AddBackendConfig(
-    const std::string& backend_name, const std::string& setting,
-    const std::string& value)
-{
-  triton::common::BackendCmdlineConfig& cc =
-      backend_cmdline_config_map_[backend_name];
-  cc.push_back(std::make_pair(setting, value));
-
-  return nullptr;  // success
-}
-
-TRITONSERVER_Error*
-TritonServerOptions::SetHostPolicy(
-    const std::string& policy_name, const std::string& setting,
-    const std::string& value)
-{
-  // Check if supported setting is passed
-  if ((setting != "numa-node") && (setting != "cpu-cores")) {
-    return TRITONSERVER_ErrorNew(
-        TRITONSERVER_ERROR_UNSUPPORTED,
-        std::string(
-            "Unsupported host policy setting '" + setting +
-            "' is specified, supported settings are 'numa-node', 'cpu-cores'")
-            .c_str());
-  }
-
-  triton::common::HostPolicyCmdlineConfig& hp = host_policy_map_[policy_name];
-  hp[setting] = value;
-
-  return nullptr;  // success
-}
-
-#define SetDurationStat(DOC, PARENT, STAT_NAME, COUNT, NS)   \
-  do {                                                       \
-    triton::common::TritonJson::Value dstat(                 \
-        DOC, triton::common::TritonJson::ValueType::OBJECT); \
-    dstat.AddUInt("count", (COUNT));                         \
-    dstat.AddUInt("ns", (NS));                               \
-    PARENT.Add(STAT_NAME, std::move(dstat));                 \
-  } while (false)
-
-}  // namespace
-
-extern "C" {
-
-//
-// TRITONSERVER API Version
-//
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ApiVersion(uint32_t* major, uint32_t* minor)
-{
-  *major = TRITONSERVER_API_VERSION_MAJOR;
-  *minor = TRITONSERVER_API_VERSION_MINOR;
-  return nullptr;  // success
-}
-
-//
-// TRITONSERVER_DataType
-//
-TRITONAPI_DECLSPEC const char*
-TRITONSERVER_DataTypeString(TRITONSERVER_DataType datatype)
-{
-  switch (datatype) {
-    case TRITONSERVER_TYPE_BOOL:
-      return "BOOL";
-    case TRITONSERVER_TYPE_UINT8:
-      return "UINT8";
-    case TRITONSERVER_TYPE_UINT16:
-      return "UINT16";
-    case TRITONSERVER_TYPE_UINT32:
-      return "UINT32";
-    case TRITONSERVER_TYPE_UINT64:
-      return "UINT64";
-    case TRITONSERVER_TYPE_INT8:
-      return "INT8";
-    case TRITONSERVER_TYPE_INT16:
-      return "INT16";
-    case TRITONSERVER_TYPE_INT32:
-      return "INT32";
-    case TRITONSERVER_TYPE_INT64:
-      return "INT64";
-    case TRITONSERVER_TYPE_FP16:
-      return "FP16";
-    case TRITONSERVER_TYPE_FP32:
-      return "FP32";
-    case TRITONSERVER_TYPE_FP64:
-      return "FP64";
-    case TRITONSERVER_TYPE_BYTES:
-      return "BYTES";
-    case TRITONSERVER_TYPE_BF16:
-      return "BF16";
-    default:
-      break;
-  }
-
-  return "<invalid>";
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_DataType
-TRITONSERVER_StringToDataType(const char* dtype)
-{
-  const size_t len = strlen(dtype);
-  return tc::DataTypeToTriton(
-      triton::common::ProtocolStringToDataType(dtype, len));
-}
-
-TRITONAPI_DECLSPEC uint32_t
-TRITONSERVER_DataTypeByteSize(TRITONSERVER_DataType datatype)
-{
-  switch (datatype) {
-    case TRITONSERVER_TYPE_BOOL:
-    case TRITONSERVER_TYPE_INT8:
-    case TRITONSERVER_TYPE_UINT8:
-      return 1;
-    case TRITONSERVER_TYPE_INT16:
-    case TRITONSERVER_TYPE_UINT16:
-    case TRITONSERVER_TYPE_FP16:
-    case TRITONSERVER_TYPE_BF16:
-      return 2;
-    case TRITONSERVER_TYPE_INT32:
-    case TRITONSERVER_TYPE_UINT32:
-    case TRITONSERVER_TYPE_FP32:
-      return 4;
-    case TRITONSERVER_TYPE_INT64:
-    case TRITONSERVER_TYPE_UINT64:
-    case TRITONSERVER_TYPE_FP64:
-      return 8;
-    case TRITONSERVER_TYPE_BYTES:
-      return 0;
-    default:
-      break;
-  }
-
-  return 0;
-}
-
-//
-// TRITONSERVER_MemoryType
-//
-TRITONAPI_DECLSPEC const char*
-TRITONSERVER_MemoryTypeString(TRITONSERVER_MemoryType memtype)
-{
-  switch (memtype) {
-    case TRITONSERVER_MEMORY_CPU:
-      return "CPU";
-    case TRITONSERVER_MEMORY_CPU_PINNED:
-      return "CPU_PINNED";
-    case TRITONSERVER_MEMORY_GPU:
-      return "GPU";
-    default:
-      break;
-  }
-
-  return "<invalid>";
-}
-
-//
-// TRITONSERVER_Parameter
-//
-TRITONAPI_DECLSPEC const char*
-TRITONSERVER_ParameterTypeString(TRITONSERVER_ParameterType paramtype)
-{
-  switch (paramtype) {
-    case TRITONSERVER_PARAMETER_STRING:
-      return "STRING";
-    case TRITONSERVER_PARAMETER_INT:
-      return "INT";
-    case TRITONSERVER_PARAMETER_BOOL:
-      return "BOOL";
-    default:
-      break;
-  }
-
-  return "<invalid>";
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Parameter*
-TRITONSERVER_ParameterNew(
-    const char* name, const TRITONSERVER_ParameterType type, const void* value)
-{
-  std::unique_ptr<tc::InferenceParameter> lparam;
-  switch (type) {
-    case TRITONSERVER_PARAMETER_STRING:
-      lparam.reset(new tc::InferenceParameter(
-          name, reinterpret_cast<const char*>(value)));
-      break;
-    case TRITONSERVER_PARAMETER_INT:
-      lparam.reset(new tc::InferenceParameter(
-          name, *reinterpret_cast<const int64_t*>(value)));
-      break;
-    case TRITONSERVER_PARAMETER_BOOL:
-      lparam.reset(new tc::InferenceParameter(
-          name, *reinterpret_cast<const bool*>(value)));
-      break;
-    default:
-      break;
-  }
-  return reinterpret_cast<TRITONSERVER_Parameter*>(lparam.release());
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Parameter*
-TRITONSERVER_ParameterBytesNew(
-    const char* name, const void* byte_ptr, const uint64_t size)
-{
-  std::unique_ptr<tc::InferenceParameter> lparam(
-      new tc::InferenceParameter(name, byte_ptr, size));
-  return reinterpret_cast<TRITONSERVER_Parameter*>(lparam.release());
-}
-
-TRITONAPI_DECLSPEC void
-TRITONSERVER_ParameterDelete(TRITONSERVER_Parameter* parameter)
-{
-  delete reinterpret_cast<tc::InferenceParameter*>(parameter);
-}
-
-//
-// TRITONSERVER_InstanceGroupKind
-//
-TRITONAPI_DECLSPEC const char*
-TRITONSERVER_InstanceGroupKindString(TRITONSERVER_InstanceGroupKind kind)
-{
-  switch (kind) {
-    case TRITONSERVER_INSTANCEGROUPKIND_AUTO:
-      return "AUTO";
-    case TRITONSERVER_INSTANCEGROUPKIND_CPU:
-      return "CPU";
-    case TRITONSERVER_INSTANCEGROUPKIND_GPU:
-      return "GPU";
-    case TRITONSERVER_INSTANCEGROUPKIND_MODEL:
-      return "MODEL";
-    default:
-      break;
-  }
-
-  return "<invalid>";
-}
-
-//
-// TRITONSERVER_Log
-//
-TRITONAPI_DECLSPEC bool
-TRITONSERVER_LogIsEnabled(TRITONSERVER_LogLevel level)
-{
-  switch (level) {
-    case TRITONSERVER_LOG_INFO:
-      return LOG_INFO_IS_ON;
-    case TRITONSERVER_LOG_WARN:
-      return LOG_WARNING_IS_ON;
-    case TRITONSERVER_LOG_ERROR:
-      return LOG_ERROR_IS_ON;
-    case TRITONSERVER_LOG_VERBOSE:
-      return LOG_VERBOSE_IS_ON(1);
-  }
-
-  return false;
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_LogMessage(
-    TRITONSERVER_LogLevel level, const char* filename, const int line,
-    const char* msg)
-{
-  switch (level) {
-    case TRITONSERVER_LOG_INFO:
-      LOG_INFO_FL(filename, line) << msg;
-      return nullptr;
-    case TRITONSERVER_LOG_WARN:
-      LOG_WARNING_FL(filename, line) << msg;
-      return nullptr;
-    case TRITONSERVER_LOG_ERROR:
-      LOG_ERROR_FL(filename, line) << msg;
-      return nullptr;
-    case TRITONSERVER_LOG_VERBOSE:
-      LOG_VERBOSE_FL(1, filename, line) << msg;
-      return nullptr;
-    default:
-      return TRITONSERVER_ErrorNew(
-          TRITONSERVER_ERROR_INVALID_ARG,
-          std::string("unknown logging level '" + std::to_string(level) + "'")
-              .c_str());
-  }
-}
-
-//
-// TRITONSERVER_Error
-//
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ErrorNew(TRITONSERVER_Error_Code code, const char* msg)
-{
-  return reinterpret_cast<TRITONSERVER_Error*>(
-      TritonServerError::Create(code, msg));
-}
-
-TRITONAPI_DECLSPEC void
-TRITONSERVER_ErrorDelete(TRITONSERVER_Error* error)
-{
-  TritonServerError* lerror = reinterpret_cast<TritonServerError*>(error);
-  delete lerror;
-}
-
-TRITONSERVER_Error_Code
-TRITONSERVER_ErrorCode(TRITONSERVER_Error* error)
-{
-  TritonServerError* lerror = reinterpret_cast<TritonServerError*>(error);
-  return lerror->Code();
-}
-
-TRITONAPI_DECLSPEC const char*
-TRITONSERVER_ErrorCodeString(TRITONSERVER_Error* error)
-{
-  TritonServerError* lerror = reinterpret_cast<TritonServerError*>(error);
-  return tc::Status::CodeString(tc::TritonCodeToStatusCode(lerror->Code()));
-}
-
-TRITONAPI_DECLSPEC const char*
-TRITONSERVER_ErrorMessage(TRITONSERVER_Error* error)
-{
-  TritonServerError* lerror = reinterpret_cast<TritonServerError*>(error);
-  return lerror->Message().c_str();
-}
-
-//
-// TRITONSERVER_ResponseAllocator
-//
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ResponseAllocatorNew(
-    TRITONSERVER_ResponseAllocator** allocator,
-    TRITONSERVER_ResponseAllocatorAllocFn_t alloc_fn,
-    TRITONSERVER_ResponseAllocatorReleaseFn_t release_fn,
-    TRITONSERVER_ResponseAllocatorStartFn_t start_fn)
-{
-  *allocator = reinterpret_cast<TRITONSERVER_ResponseAllocator*>(
-      new tc::ResponseAllocator(alloc_fn, release_fn, start_fn));
-  return nullptr;  // Success
-}
-
-TRITONSERVER_Error*
-TRITONSERVER_ResponseAllocatorSetQueryFunction(
-    TRITONSERVER_ResponseAllocator* allocator,
-    TRITONSERVER_ResponseAllocatorQueryFn_t query_fn)
-{
-  reinterpret_cast<tc::ResponseAllocator*>(allocator)->SetQueryFunction(
-      query_fn);
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ResponseAllocatorSetBufferAttributesFunction(
-    TRITONSERVER_ResponseAllocator* allocator,
-    TRITONSERVER_ResponseAllocatorBufferAttributesFn_t buffer_attributes_fn)
-{
-  reinterpret_cast<tc::ResponseAllocator*>(allocator)
-      ->SetBufferAttributesFunction(buffer_attributes_fn);
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ResponseAllocatorDelete(TRITONSERVER_ResponseAllocator* allocator)
-{
-  tc::ResponseAllocator* lalloc =
-      reinterpret_cast<tc::ResponseAllocator*>(allocator);
-  delete lalloc;
-  return nullptr;  // Success
-}
-
-//
-// TRITONSERVER_Message
-//
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_MessageNewFromSerializedJson(
-    TRITONSERVER_Message** message, const char* base, size_t byte_size)
-{
-  *message = reinterpret_cast<TRITONSERVER_Message*>(
-      new tc::TritonServerMessage({base, byte_size}));
-  return nullptr;
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_MessageDelete(TRITONSERVER_Message* message)
-{
-  tc::TritonServerMessage* lmessage =
-      reinterpret_cast<tc::TritonServerMessage*>(message);
-  delete lmessage;
-  return nullptr;  // Success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_MessageSerializeToJson(
-    TRITONSERVER_Message* message, const char** base, size_t* byte_size)
-{
-  tc::TritonServerMessage* lmessage =
-      reinterpret_cast<tc::TritonServerMessage*>(message);
-  lmessage->Serialize(base, byte_size);
-  return nullptr;  // Success
-}
-
-//
-// TRITONSERVER_Metrics
-//
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_MetricsDelete(TRITONSERVER_Metrics* metrics)
-{
-  TritonServerMetrics* lmetrics =
-      reinterpret_cast<TritonServerMetrics*>(metrics);
-  delete lmetrics;
-  return nullptr;  // Success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_MetricsFormatted(
-    TRITONSERVER_Metrics* metrics, TRITONSERVER_MetricFormat format,
-    const char** base, size_t* byte_size)
-{
-  TritonServerMetrics* lmetrics =
-      reinterpret_cast<TritonServerMetrics*>(metrics);
-
-  switch (format) {
-    case TRITONSERVER_METRIC_PROMETHEUS: {
-      return lmetrics->Serialize(base, byte_size);
-    }
-
-    default:
-      break;
-  }
-
-  return TRITONSERVER_ErrorNew(
-      TRITONSERVER_ERROR_INVALID_ARG,
-      std::string("unknown metrics format '" + std::to_string(format) + "'")
-          .c_str());
-}
-
-//
-// TRITONSERVER_InferenceTrace
-//
-TRITONAPI_DECLSPEC const char*
-TRITONSERVER_InferenceTraceLevelString(TRITONSERVER_InferenceTraceLevel level)
-{
-  switch (level) {
-    case TRITONSERVER_TRACE_LEVEL_DISABLED:
-      return "DISABLED";
-    case TRITONSERVER_TRACE_LEVEL_MIN:
-      return "MIN";
-    case TRITONSERVER_TRACE_LEVEL_MAX:
-      return "MAX";
-    case TRITONSERVER_TRACE_LEVEL_TIMESTAMPS:
-      return "TIMESTAMPS";
-    case TRITONSERVER_TRACE_LEVEL_TENSORS:
-      return "TENSORS";
-  }
-
-  return "<unknown>";
-}
-
-TRITONAPI_DECLSPEC const char*
-TRITONSERVER_InferenceTraceActivityString(
-    TRITONSERVER_InferenceTraceActivity activity)
-{
-  switch (activity) {
-    case TRITONSERVER_TRACE_REQUEST_START:
-      return "REQUEST_START";
-    case TRITONSERVER_TRACE_QUEUE_START:
-      return "QUEUE_START";
-    case TRITONSERVER_TRACE_COMPUTE_START:
-      return "COMPUTE_START";
-    case TRITONSERVER_TRACE_COMPUTE_INPUT_END:
-      return "COMPUTE_INPUT_END";
-    case TRITONSERVER_TRACE_COMPUTE_OUTPUT_START:
-      return "COMPUTE_OUTPUT_START";
-    case TRITONSERVER_TRACE_COMPUTE_END:
-      return "COMPUTE_END";
-    case TRITONSERVER_TRACE_REQUEST_END:
-      return "REQUEST_END";
-    case TRITONSERVER_TRACE_TENSOR_QUEUE_INPUT:
-      return "TENSOR_QUEUE_INPUT";
-    case TRITONSERVER_TRACE_TENSOR_BACKEND_INPUT:
-      return "TENSOR_BACKEND_INPUT";
-    case TRITONSERVER_TRACE_TENSOR_BACKEND_OUTPUT:
-      return "TENSOR_BACKEND_OUTPUT";
-  }
-
-  return "<unknown>";
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_InferenceTraceNew(
-    TRITONSERVER_InferenceTrace** trace, TRITONSERVER_InferenceTraceLevel level,
-    uint64_t parent_id, TRITONSERVER_InferenceTraceActivityFn_t activity_fn,
-    TRITONSERVER_InferenceTraceReleaseFn_t release_fn, void* trace_userp)
-{
-#ifdef TRITON_ENABLE_TRACING
-  if ((level & TRITONSERVER_TRACE_LEVEL_MIN) > 0) {
-    level = static_cast<TRITONSERVER_InferenceTraceLevel>(
-        (level ^ TRITONSERVER_TRACE_LEVEL_MIN) |
-        TRITONSERVER_TRACE_LEVEL_TIMESTAMPS);
-  }
-  if ((level & TRITONSERVER_TRACE_LEVEL_MAX) > 0) {
-    level = static_cast<TRITONSERVER_InferenceTraceLevel>(
-        (level ^ TRITONSERVER_TRACE_LEVEL_MAX) |
-        TRITONSERVER_TRACE_LEVEL_TIMESTAMPS);
-  }
-  tc::InferenceTrace* ltrace = new tc::InferenceTrace(
-      level, parent_id, activity_fn, nullptr, release_fn, trace_userp);
-  *trace = reinterpret_cast<TRITONSERVER_InferenceTrace*>(ltrace);
-  return nullptr;  // Success
-#else
-  *trace = nullptr;
-  return TRITONSERVER_ErrorNew(
-      TRITONSERVER_ERROR_UNSUPPORTED, "inference tracing not supported");
-#endif  // TRITON_ENABLE_TRACING
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_InferenceTraceTensorNew(
-    TRITONSERVER_InferenceTrace** trace, TRITONSERVER_InferenceTraceLevel level,
-    uint64_t parent_id, TRITONSERVER_InferenceTraceActivityFn_t activity_fn,
-    TRITONSERVER_InferenceTraceTensorActivityFn_t tensor_activity_fn,
-    TRITONSERVER_InferenceTraceReleaseFn_t release_fn, void* trace_userp)
-{
-#ifdef TRITON_ENABLE_TRACING
-  if ((level & TRITONSERVER_TRACE_LEVEL_MIN) > 0) {
-    level = static_cast<TRITONSERVER_InferenceTraceLevel>(
-        (level ^ TRITONSERVER_TRACE_LEVEL_MIN) |
-        TRITONSERVER_TRACE_LEVEL_TIMESTAMPS);
-  }
-  if ((level & TRITONSERVER_TRACE_LEVEL_MAX) > 0) {
-    level = static_cast<TRITONSERVER_InferenceTraceLevel>(
-        (level ^ TRITONSERVER_TRACE_LEVEL_MAX) |
-        TRITONSERVER_TRACE_LEVEL_TIMESTAMPS);
-  }
-  tc::InferenceTrace* ltrace = new tc::InferenceTrace(
-      level, parent_id, activity_fn, tensor_activity_fn, release_fn,
-      trace_userp);
-  *trace = reinterpret_cast<TRITONSERVER_InferenceTrace*>(ltrace);
-  return nullptr;  // Success
-#else
-  *trace = nullptr;
-  return TRITONSERVER_ErrorNew(
-      TRITONSERVER_ERROR_UNSUPPORTED, "inference tracing not supported");
-#endif  // TRITON_ENABLE_TRACING
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_InferenceTraceDelete(TRITONSERVER_InferenceTrace* trace)
-{
-#ifdef TRITON_ENABLE_TRACING
-  tc::InferenceTrace* ltrace = reinterpret_cast<tc::InferenceTrace*>(trace);
-  delete ltrace;
-  return nullptr;  // Success
-#else
-  return TRITONSERVER_ErrorNew(
-      TRITONSERVER_ERROR_UNSUPPORTED, "inference tracing not supported");
-#endif  // TRITON_ENABLE_TRACING
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_InferenceTraceId(TRITONSERVER_InferenceTrace* trace, uint64_t* id)
-{
-#ifdef TRITON_ENABLE_TRACING
-  tc::InferenceTrace* ltrace = reinterpret_cast<tc::InferenceTrace*>(trace);
-  *id = ltrace->Id();
-  return nullptr;  // Success
-#else
-  return TRITONSERVER_ErrorNew(
-      TRITONSERVER_ERROR_UNSUPPORTED, "inference tracing not supported");
-#endif  // TRITON_ENABLE_TRACING
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_InferenceTraceParentId(
-    TRITONSERVER_InferenceTrace* trace, uint64_t* parent_id)
-{
-#ifdef TRITON_ENABLE_TRACING
-  tc::InferenceTrace* ltrace = reinterpret_cast<tc::InferenceTrace*>(trace);
-  *parent_id = ltrace->ParentId();
-  return nullptr;  // Success
-#else
-  return TRITONSERVER_ErrorNew(
-      TRITONSERVER_ERROR_UNSUPPORTED, "inference tracing not supported");
-#endif  // TRITON_ENABLE_TRACING
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_InferenceTraceModelName(
-    TRITONSERVER_InferenceTrace* trace, const char** model_name)
-{
-#ifdef TRITON_ENABLE_TRACING
-  tc::InferenceTrace* ltrace = reinterpret_cast<tc::InferenceTrace*>(trace);
-  *model_name = ltrace->ModelName().c_str();
-  return nullptr;  // Success
-#else
-  return TRITONSERVER_ErrorNew(
-      TRITONSERVER_ERROR_UNSUPPORTED, "inference tracing not supported");
-#endif  // TRITON_ENABLE_TRACING
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_InferenceTraceModelVersion(
-    TRITONSERVER_InferenceTrace* trace, int64_t* model_version)
-{
-#ifdef TRITON_ENABLE_TRACING
-  tc::InferenceTrace* ltrace = reinterpret_cast<tc::InferenceTrace*>(trace);
-  *model_version = ltrace->ModelVersion();
-  return nullptr;  // Success
-#else
-  return TRITONSERVER_ErrorNew(
-      TRITONSERVER_ERROR_UNSUPPORTED, "inference tracing not supported");
-#endif  // TRITON_ENABLE_TRACING
-}
-
-//
-// TRITONSERVER_ServerOptions
-//
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerOptionsNew(TRITONSERVER_ServerOptions** options)
-{
-  *options =
-      reinterpret_cast<TRITONSERVER_ServerOptions*>(new TritonServerOptions());
-  return nullptr;  // Success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerOptionsDelete(TRITONSERVER_ServerOptions* options)
-{
-  TritonServerOptions* loptions =
-      reinterpret_cast<TritonServerOptions*>(options);
-  delete loptions;
-  return nullptr;  // Success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerOptionsSetServerId(
-    TRITONSERVER_ServerOptions* options, const char* server_id)
-{
-  TritonServerOptions* loptions =
-      reinterpret_cast<TritonServerOptions*>(options);
-  loptions->SetServerId(server_id);
-  return nullptr;  // Success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerOptionsSetModelRepositoryPath(
-    TRITONSERVER_ServerOptions* options, const char* model_repository_path)
-{
-  TritonServerOptions* loptions =
-      reinterpret_cast<TritonServerOptions*>(options);
-  loptions->SetModelRepositoryPath(model_repository_path);
-  return nullptr;  // Success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerOptionsSetModelControlMode(
-    TRITONSERVER_ServerOptions* options, TRITONSERVER_ModelControlMode mode)
-{
-  TritonServerOptions* loptions =
-      reinterpret_cast<TritonServerOptions*>(options);
-
-  // convert mode from TRITONSERVER_ to triton::core
-  switch (mode) {
-    case TRITONSERVER_MODEL_CONTROL_NONE: {
-      loptions->SetModelControlMode(tc::ModelControlMode::MODE_NONE);
-      break;
-    }
-    case TRITONSERVER_MODEL_CONTROL_POLL: {
-      loptions->SetModelControlMode(tc::ModelControlMode::MODE_POLL);
-      break;
-    }
-    case TRITONSERVER_MODEL_CONTROL_EXPLICIT: {
-      loptions->SetModelControlMode(tc::ModelControlMode::MODE_EXPLICIT);
-      break;
-    }
-    default: {
-      return TRITONSERVER_ErrorNew(
-          TRITONSERVER_ERROR_INVALID_ARG,
-          std::string("unknown control mode '" + std::to_string(mode) + "'")
-              .c_str());
-    }
-  }
-
-  return nullptr;  // Success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerOptionsSetStartupModel(
-    TRITONSERVER_ServerOptions* options, const char* model_name)
-{
-  TritonServerOptions* loptions =
-      reinterpret_cast<TritonServerOptions*>(options);
-  loptions->SetStartupModel(model_name);
-  return nullptr;  // Success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerOptionsSetExitOnError(
-    TRITONSERVER_ServerOptions* options, bool exit)
-{
-  TritonServerOptions* loptions =
-      reinterpret_cast<TritonServerOptions*>(options);
-  loptions->SetExitOnError(exit);
-  return nullptr;  // Success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerOptionsSetStrictModelConfig(
-    TRITONSERVER_ServerOptions* options, bool strict)
-{
-  TritonServerOptions* loptions =
-      reinterpret_cast<TritonServerOptions*>(options);
-  loptions->SetStrictModelConfig(strict);
-  return nullptr;  // Success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerOptionsSetRateLimiterMode(
-    TRITONSERVER_ServerOptions* options, TRITONSERVER_RateLimitMode mode)
-{
-  TritonServerOptions* loptions =
-      reinterpret_cast<TritonServerOptions*>(options);
-
-  // convert mode from TRITONSERVER_ to triton::core
-  switch (mode) {
-    case TRITONSERVER_RATE_LIMIT_EXEC_COUNT: {
-      loptions->SetRateLimiterMode(tc::RateLimitMode::RL_EXEC_COUNT);
-      break;
-    }
-    case TRITONSERVER_RATE_LIMIT_OFF: {
-      loptions->SetRateLimiterMode(tc::RateLimitMode::RL_OFF);
-      break;
-    }
-    default: {
-      return TRITONSERVER_ErrorNew(
-          TRITONSERVER_ERROR_INVALID_ARG,
-          std::string("unknown rate limit mode '" + std::to_string(mode) + "'")
-              .c_str());
-    }
-  }
-
-  return nullptr;  // Success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerOptionsAddRateLimiterResource(
-    TRITONSERVER_ServerOptions* options, const char* name, const size_t count,
-    const int device)
-{
-  TritonServerOptions* loptions =
-      reinterpret_cast<TritonServerOptions*>(options);
-  return loptions->AddRateLimiterResource(name, count, device);
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerOptionsSetPinnedMemoryPoolByteSize(
-    TRITONSERVER_ServerOptions* options, uint64_t size)
-{
-  TritonServerOptions* loptions =
-      reinterpret_cast<TritonServerOptions*>(options);
-  loptions->SetPinnedMemoryPoolByteSize(size);
-  return nullptr;  // Success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerOptionsSetCudaMemoryPoolByteSize(
-    TRITONSERVER_ServerOptions* options, int gpu_device, uint64_t size)
-{
-  TritonServerOptions* loptions =
-      reinterpret_cast<TritonServerOptions*>(options);
-  loptions->SetCudaMemoryPoolByteSize(gpu_device, size);
-  return nullptr;  // Success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerOptionsSetResponseCacheByteSize(
-    TRITONSERVER_ServerOptions* options, uint64_t size)
-{
-  TritonServerOptions* loptions =
-      reinterpret_cast<TritonServerOptions*>(options);
-  loptions->SetResponseCacheByteSize(size);
-  return nullptr;  // Success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerOptionsSetMinSupportedComputeCapability(
-    TRITONSERVER_ServerOptions* options, double cc)
-{
-  TritonServerOptions* loptions =
-      reinterpret_cast<TritonServerOptions*>(options);
-  loptions->SetMinSupportedComputeCapability(cc);
-  return nullptr;  // Success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerOptionsSetStrictReadiness(
-    TRITONSERVER_ServerOptions* options, bool strict)
-{
-  TritonServerOptions* loptions =
-      reinterpret_cast<TritonServerOptions*>(options);
-  loptions->SetStrictReadiness(strict);
-  return nullptr;  // Success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerOptionsSetExitTimeout(
-    TRITONSERVER_ServerOptions* options, unsigned int timeout)
-{
-  TritonServerOptions* loptions =
-      reinterpret_cast<TritonServerOptions*>(options);
-  loptions->SetExitTimeout(timeout);
-  return nullptr;  // Success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerOptionsSetBufferManagerThreadCount(
-    TRITONSERVER_ServerOptions* options, unsigned int thread_count)
-{
-  TritonServerOptions* loptions =
-      reinterpret_cast<TritonServerOptions*>(options);
-  loptions->SetBufferManagerThreadCount(thread_count);
-  return nullptr;  // Success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerOptionsSetModelLoadThreadCount(
-    TRITONSERVER_ServerOptions* options, unsigned int thread_count)
-{
-  TritonServerOptions* loptions =
-      reinterpret_cast<TritonServerOptions*>(options);
-  loptions->SetModelLoadThreadCount(thread_count);
-  return nullptr;  // Success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerOptionsSetLogFile(
-    TRITONSERVER_ServerOptions* options, const char* file)
-{
-#ifdef TRITON_ENABLE_LOGGING
-  std::string out_file;
-  if (file != nullptr) {
-    out_file = std::string(file);
-  }
-  const std::string& error = LOG_SET_OUT_FILE(out_file);
-  if (!error.empty()) {
-    return TRITONSERVER_ErrorNew(TRITONSERVER_ERROR_INTERNAL, (error).c_str());
-  }
-  return nullptr;  // Success
-#else
-  return TRITONSERVER_ErrorNew(
-      TRITONSERVER_ERROR_UNSUPPORTED, "logging not supported");
-#endif  // TRITON_ENABLE_LOGGING
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerOptionsSetLogInfo(
-    TRITONSERVER_ServerOptions* options, bool log)
-{
-#ifdef TRITON_ENABLE_LOGGING
-  // Logging is global for now...
-  LOG_ENABLE_INFO(log);
-  return nullptr;  // Success
-#else
-  return TRITONSERVER_ErrorNew(
-      TRITONSERVER_ERROR_UNSUPPORTED, "logging not supported");
-#endif  // TRITON_ENABLE_LOGGING
-}
-
-// Enable or disable warning level logging.
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerOptionsSetLogWarn(
-    TRITONSERVER_ServerOptions* options, bool log)
-{
-#ifdef TRITON_ENABLE_LOGGING
-  // Logging is global for now...
-  LOG_ENABLE_WARNING(log);
-  return nullptr;  // Success
-#else
-  return TRITONSERVER_ErrorNew(
-      TRITONSERVER_ERROR_UNSUPPORTED, "logging not supported");
-#endif  // TRITON_ENABLE_LOGGING
-}
-
-// Enable or disable error level logging.
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerOptionsSetLogError(
-    TRITONSERVER_ServerOptions* options, bool log)
-{
-#ifdef TRITON_ENABLE_LOGGING
-  // Logging is global for now...
-  LOG_ENABLE_ERROR(log);
-  return nullptr;  // Success
-#else
-  return TRITONSERVER_ErrorNew(
-      TRITONSERVER_ERROR_UNSUPPORTED, "logging not supported");
-#endif  // TRITON_ENABLE_LOGGING
-}
-
-// Set verbose logging level. Level zero disables verbose logging.
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerOptionsSetLogVerbose(
-    TRITONSERVER_ServerOptions* options, int level)
-{
-#ifdef TRITON_ENABLE_LOGGING
-  // Logging is global for now...
-  LOG_SET_VERBOSE(level);
-#else
-  return TRITONSERVER_ErrorNew(
-      TRITONSERVER_ERROR_UNSUPPORTED, "logging not supported");
-#endif             // TRITON_ENABLE_LOGGING
-  return nullptr;  // Success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerOptionsSetLogFormat(
-    TRITONSERVER_ServerOptions* options, const TRITONSERVER_LogFormat format)
-{
-#ifdef TRITON_ENABLE_LOGGING
-  // Logging is global for now...
-  switch (format) {
-    case TRITONSERVER_LOG_DEFAULT:
-      LOG_SET_FORMAT(triton::common::Logger::Format::kDEFAULT);
-      break;
-    case TRITONSERVER_LOG_ISO8601:
-      LOG_SET_FORMAT(triton::common::Logger::Format::kISO8601);
-      break;
-  }
-#else
-  return TRITONSERVER_ErrorNew(
-      TRITONSERVER_ERROR_UNSUPPORTED, "logging not supported");
-#endif             // TRITON_ENABLE_LOGGING
-  return nullptr;  // Success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerOptionsSetMetrics(
-    TRITONSERVER_ServerOptions* options, bool metrics)
-{
-#ifdef TRITON_ENABLE_METRICS
-  TritonServerOptions* loptions =
-      reinterpret_cast<TritonServerOptions*>(options);
-  loptions->SetMetrics(metrics);
-  return nullptr;  // Success
-#else
-  return TRITONSERVER_ErrorNew(
-      TRITONSERVER_ERROR_UNSUPPORTED, "metrics not supported");
-#endif  // TRITON_ENABLE_METRICS
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerOptionsSetGpuMetrics(
-    TRITONSERVER_ServerOptions* options, bool gpu_metrics)
-{
-#ifdef TRITON_ENABLE_METRICS
-  TritonServerOptions* loptions =
-      reinterpret_cast<TritonServerOptions*>(options);
-  loptions->SetGpuMetrics(gpu_metrics);
-  return nullptr;  // Success
-#else
-  return TRITONSERVER_ErrorNew(
-      TRITONSERVER_ERROR_UNSUPPORTED, "metrics not supported");
-#endif  // TRITON_ENABLE_METRICS
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerOptionsSetCpuMetrics(
-    TRITONSERVER_ServerOptions* options, bool cpu_metrics)
-{
-#ifdef TRITON_ENABLE_METRICS
-  TritonServerOptions* loptions =
-      reinterpret_cast<TritonServerOptions*>(options);
-  loptions->SetCpuMetrics(cpu_metrics);
-  return nullptr;  // Success
-#else
-  return TRITONSERVER_ErrorNew(
-      TRITONSERVER_ERROR_UNSUPPORTED, "metrics not supported");
-#endif  // TRITON_ENABLE_METRICS
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerOptionsSetMetricsInterval(
-    TRITONSERVER_ServerOptions* options, uint64_t metrics_interval_ms)
-{
-#ifdef TRITON_ENABLE_METRICS
-  TritonServerOptions* loptions =
-      reinterpret_cast<TritonServerOptions*>(options);
-  loptions->SetMetricsInterval(metrics_interval_ms);
-  return nullptr;  // Success
-#else
-  return TRITONSERVER_ErrorNew(
-      TRITONSERVER_ERROR_UNSUPPORTED, "metrics not supported");
-#endif  // TRITON_ENABLE_METRICS
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerOptionsSetBackendDirectory(
-    TRITONSERVER_ServerOptions* options, const char* backend_dir)
-{
-  TritonServerOptions* loptions =
-      reinterpret_cast<TritonServerOptions*>(options);
-  loptions->SetBackendDir(backend_dir);
-  return nullptr;  // Success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerOptionsSetRepoAgentDirectory(
-    TRITONSERVER_ServerOptions* options, const char* repoagent_dir)
-{
-  TritonServerOptions* loptions =
-      reinterpret_cast<TritonServerOptions*>(options);
-  loptions->SetRepoAgentDir(repoagent_dir);
-  return nullptr;  // Success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerOptionsSetModelLoadDeviceLimit(
-    TRITONSERVER_ServerOptions* options,
-    const TRITONSERVER_InstanceGroupKind kind, const int device_id,
-    const double fraction)
-{
-  if (device_id < 0) {
-    return TRITONSERVER_ErrorNew(
-        TRITONSERVER_ERROR_INVALID_ARG,
-        (std::string("expects device ID >= 0, got ") +
-         std::to_string(device_id))
-            .c_str());
-  } else if ((fraction < 0.0) || (fraction > 1.0)) {
-    return TRITONSERVER_ErrorNew(
-        TRITONSERVER_ERROR_INVALID_ARG,
-        (std::string("expects limit fraction to be in range [0.0, 1.0], got ") +
-         std::to_string(fraction))
-            .c_str());
-  }
-
-  TritonServerOptions* loptions =
-      reinterpret_cast<TritonServerOptions*>(options);
-  switch (kind) {
-    case TRITONSERVER_INSTANCEGROUPKIND_GPU: {
-      static std::string key_prefix = "model-load-gpu-limit-device-";
-      return loptions->AddBackendConfig(
-          "", key_prefix + std::to_string(device_id), std::to_string(fraction));
-    }
-    default:
-      return TRITONSERVER_ErrorNew(
-          TRITONSERVER_ERROR_INVALID_ARG,
-          (std::string("given device kind is not supported, got: ") +
-           TRITONSERVER_InstanceGroupKindString(kind))
-              .c_str());
-  }
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerOptionsSetBackendConfig(
-    TRITONSERVER_ServerOptions* options, const char* backend_name,
-    const char* setting, const char* value)
-{
-  TritonServerOptions* loptions =
-      reinterpret_cast<TritonServerOptions*>(options);
-  return loptions->AddBackendConfig(backend_name, setting, value);
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerOptionsSetHostPolicy(
-    TRITONSERVER_ServerOptions* options, const char* policy_name,
-    const char* setting, const char* value)
-{
-  TritonServerOptions* loptions =
-      reinterpret_cast<TritonServerOptions*>(options);
-  return loptions->SetHostPolicy(policy_name, setting, value);
-}
-
-//
-// TRITONSERVER_InferenceRequest
-//
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_InferenceRequestNew(
-    TRITONSERVER_InferenceRequest** inference_request,
-    TRITONSERVER_Server* server, const char* model_name,
-    const int64_t model_version)
-{
-  tc::InferenceServer* lserver = reinterpret_cast<tc::InferenceServer*>(server);
-
-  std::shared_ptr<tc::Model> model;
-  RETURN_IF_STATUS_ERROR(lserver->GetModel(model_name, model_version, &model));
-
-  *inference_request = reinterpret_cast<TRITONSERVER_InferenceRequest*>(
-      new tc::InferenceRequest(model, model_version));
-
-  return nullptr;  // Success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_InferenceRequestDelete(
-    TRITONSERVER_InferenceRequest* inference_request)
-{
-  tc::InferenceRequest* lrequest =
-      reinterpret_cast<tc::InferenceRequest*>(inference_request);
-  delete lrequest;
-  return nullptr;  // Success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_InferenceRequestId(
-    TRITONSERVER_InferenceRequest* inference_request, const char** id)
-{
-  tc::InferenceRequest* lrequest =
-      reinterpret_cast<tc::InferenceRequest*>(inference_request);
-  *id = lrequest->Id().c_str();
-  return nullptr;  // Success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_InferenceRequestSetId(
-    TRITONSERVER_InferenceRequest* inference_request, const char* id)
-{
-  tc::InferenceRequest* lrequest =
-      reinterpret_cast<tc::InferenceRequest*>(inference_request);
-  lrequest->SetId(id);
-  return nullptr;  // Success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_InferenceRequestFlags(
-    TRITONSERVER_InferenceRequest* inference_request, uint32_t* flags)
-{
-  tc::InferenceRequest* lrequest =
-      reinterpret_cast<tc::InferenceRequest*>(inference_request);
-  *flags = lrequest->Flags();
-  return nullptr;  // Success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_InferenceRequestSetFlags(
-    TRITONSERVER_InferenceRequest* inference_request, uint32_t flags)
-{
-  tc::InferenceRequest* lrequest =
-      reinterpret_cast<tc::InferenceRequest*>(inference_request);
-  lrequest->SetFlags(flags);
-  return nullptr;  // Success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_InferenceRequestCorrelationId(
-    TRITONSERVER_InferenceRequest* inference_request, uint64_t* correlation_id)
-{
-  tc::InferenceRequest* lrequest =
-      reinterpret_cast<tc::InferenceRequest*>(inference_request);
-  const tc::InferenceRequest::SequenceId& corr_id = lrequest->CorrelationId();
-  if (corr_id.Type() != tc::InferenceRequest::SequenceId::DataType::UINT64) {
-    return TRITONSERVER_ErrorNew(
-        TRITONSERVER_ERROR_INVALID_ARG,
-        std::string("given request's correlation id is not an unsigned int")
-            .c_str());
-  }
-  *correlation_id = corr_id.UnsignedIntValue();
-  return nullptr;  // Success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_InferenceRequestCorrelationIdString(
-    TRITONSERVER_InferenceRequest* inference_request,
-    const char** correlation_id)
-{
-  tc::InferenceRequest* lrequest =
-      reinterpret_cast<tc::InferenceRequest*>(inference_request);
-  const tc::InferenceRequest::SequenceId& corr_id = lrequest->CorrelationId();
-  if (corr_id.Type() != tc::InferenceRequest::SequenceId::DataType::STRING) {
-    return TRITONSERVER_ErrorNew(
-        TRITONSERVER_ERROR_INVALID_ARG,
-        std::string("given request's correlation id is not a string").c_str());
-  }
-  *correlation_id = corr_id.StringValue().c_str();
-  return nullptr;  // Success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_InferenceRequestSetCorrelationId(
-    TRITONSERVER_InferenceRequest* inference_request, uint64_t correlation_id)
-{
-  tc::InferenceRequest* lrequest =
-      reinterpret_cast<tc::InferenceRequest*>(inference_request);
-  lrequest->SetCorrelationId(tc::InferenceRequest::SequenceId(correlation_id));
-  return nullptr;  // Success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_InferenceRequestSetCorrelationIdString(
-    TRITONSERVER_InferenceRequest* inference_request,
-    const char* correlation_id)
-{
-  tc::InferenceRequest* lrequest =
-      reinterpret_cast<tc::InferenceRequest*>(inference_request);
-  if (std::string(correlation_id).length() > 128) {
-    return TRITONSERVER_ErrorNew(
-        TRITONSERVER_ERROR_UNSUPPORTED,
-        std::string(
-            "string correlation ID cannot be longer than 128 characters")
-            .c_str());
-  }
-  lrequest->SetCorrelationId(tc::InferenceRequest::SequenceId(correlation_id));
-  return nullptr;  // Success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_InferenceRequestPriority(
-    TRITONSERVER_InferenceRequest* inference_request, uint32_t* priority)
-{
-  tc::InferenceRequest* lrequest =
-      reinterpret_cast<tc::InferenceRequest*>(inference_request);
-  *priority = lrequest->Priority();
-  return nullptr;  // Success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_InferenceRequestSetPriority(
-    TRITONSERVER_InferenceRequest* inference_request, uint32_t priority)
-{
-  tc::InferenceRequest* lrequest =
-      reinterpret_cast<tc::InferenceRequest*>(inference_request);
-  lrequest->SetPriority(priority);
-  return nullptr;  // Success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_InferenceRequestTimeoutMicroseconds(
-    TRITONSERVER_InferenceRequest* inference_request, uint64_t* timeout_us)
-{
-  tc::InferenceRequest* lrequest =
-      reinterpret_cast<tc::InferenceRequest*>(inference_request);
-  *timeout_us = lrequest->TimeoutMicroseconds();
-  return nullptr;  // Success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_InferenceRequestSetTimeoutMicroseconds(
-    TRITONSERVER_InferenceRequest* inference_request, uint64_t timeout_us)
-{
-  tc::InferenceRequest* lrequest =
-      reinterpret_cast<tc::InferenceRequest*>(inference_request);
-  lrequest->SetTimeoutMicroseconds(timeout_us);
-  return nullptr;  // Success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_InferenceRequestAddInput(
-    TRITONSERVER_InferenceRequest* inference_request, const char* name,
-    const TRITONSERVER_DataType datatype, const int64_t* shape,
-    uint64_t dim_count)
-{
-  tc::InferenceRequest* lrequest =
-      reinterpret_cast<tc::InferenceRequest*>(inference_request);
-  RETURN_IF_STATUS_ERROR(lrequest->AddOriginalInput(
-      name, tc::TritonToDataType(datatype), shape, dim_count));
-  return nullptr;  // Success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_InferenceRequestAddRawInput(
-    TRITONSERVER_InferenceRequest* inference_request, const char* name)
-{
-  tc::InferenceRequest* lrequest =
-      reinterpret_cast<tc::InferenceRequest*>(inference_request);
-  RETURN_IF_STATUS_ERROR(lrequest->AddRawInput(name));
-  return nullptr;  // Success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_InferenceRequestRemoveInput(
-    TRITONSERVER_InferenceRequest* inference_request, const char* name)
-{
-  tc::InferenceRequest* lrequest =
-      reinterpret_cast<tc::InferenceRequest*>(inference_request);
-  RETURN_IF_STATUS_ERROR(lrequest->RemoveOriginalInput(name));
-  return nullptr;  // Success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_InferenceRequestRemoveAllInputs(
-    TRITONSERVER_InferenceRequest* inference_request)
-{
-  tc::InferenceRequest* lrequest =
-      reinterpret_cast<tc::InferenceRequest*>(inference_request);
-  RETURN_IF_STATUS_ERROR(lrequest->RemoveAllOriginalInputs());
-  return nullptr;  // Success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_InferenceRequestAppendInputData(
-    TRITONSERVER_InferenceRequest* inference_request, const char* name,
-    const void* base, size_t byte_size, TRITONSERVER_MemoryType memory_type,
-    int64_t memory_type_id)
-{
-  tc::InferenceRequest* lrequest =
-      reinterpret_cast<tc::InferenceRequest*>(inference_request);
-
-  tc::InferenceRequest::Input* input;
-  RETURN_IF_STATUS_ERROR(lrequest->MutableOriginalInput(name, &input));
-  RETURN_IF_STATUS_ERROR(
-      input->AppendData(base, byte_size, memory_type, memory_type_id));
-
-  return nullptr;  // Success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_InferenceRequestAppendInputDataWithHostPolicy(
-    TRITONSERVER_InferenceRequest* inference_request, const char* name,
-    const void* base, size_t byte_size, TRITONSERVER_MemoryType memory_type,
-    int64_t memory_type_id, const char* host_policy_name)
-{
-  tc::InferenceRequest* lrequest =
-      reinterpret_cast<tc::InferenceRequest*>(inference_request);
-
-  tc::InferenceRequest::Input* input;
-  RETURN_IF_STATUS_ERROR(lrequest->MutableOriginalInput(name, &input));
-  RETURN_IF_STATUS_ERROR(input->AppendDataWithHostPolicy(
-      base, byte_size, memory_type, memory_type_id, host_policy_name));
-
-  return nullptr;  // Success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_InferenceRequestAppendInputDataWithBufferAttributes(
-    TRITONSERVER_InferenceRequest* inference_request, const char* name,
-    const void* base, TRITONSERVER_BufferAttributes* buffer_attributes)
-{
-  tc::InferenceRequest* lrequest =
-      reinterpret_cast<tc::InferenceRequest*>(inference_request);
-  tc::BufferAttributes* lbuffer_attributes =
-      reinterpret_cast<tc::BufferAttributes*>(buffer_attributes);
-
-  tc::InferenceRequest::Input* input;
-  RETURN_IF_STATUS_ERROR(lrequest->MutableOriginalInput(name, &input));
-  RETURN_IF_STATUS_ERROR(
-      input->AppendDataWithBufferAttributes(base, lbuffer_attributes));
-
-  return nullptr;  // Success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_InferenceRequestRemoveAllInputData(
-    TRITONSERVER_InferenceRequest* inference_request, const char* name)
-{
-  tc::InferenceRequest* lrequest =
-      reinterpret_cast<tc::InferenceRequest*>(inference_request);
-
-  tc::InferenceRequest::Input* input;
-  RETURN_IF_STATUS_ERROR(lrequest->MutableOriginalInput(name, &input));
-  RETURN_IF_STATUS_ERROR(input->RemoveAllData());
-
-  return nullptr;  // Success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_InferenceRequestAddRequestedOutput(
-    TRITONSERVER_InferenceRequest* inference_request, const char* name)
-{
-  tc::InferenceRequest* lrequest =
-      reinterpret_cast<tc::InferenceRequest*>(inference_request);
-  RETURN_IF_STATUS_ERROR(lrequest->AddOriginalRequestedOutput(name));
-  return nullptr;  // Success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_InferenceRequestRemoveRequestedOutput(
-    TRITONSERVER_InferenceRequest* inference_request, const char* name)
-{
-  tc::InferenceRequest* lrequest =
-      reinterpret_cast<tc::InferenceRequest*>(inference_request);
-  RETURN_IF_STATUS_ERROR(lrequest->RemoveOriginalRequestedOutput(name));
-  return nullptr;  // Success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_InferenceRequestRemoveAllRequestedOutputs(
-    TRITONSERVER_InferenceRequest* inference_request)
-{
-  tc::InferenceRequest* lrequest =
-      reinterpret_cast<tc::InferenceRequest*>(inference_request);
-  RETURN_IF_STATUS_ERROR(lrequest->RemoveAllOriginalRequestedOutputs());
-  return nullptr;  // Success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_InferenceRequestSetReleaseCallback(
-    TRITONSERVER_InferenceRequest* inference_request,
-    TRITONSERVER_InferenceRequestReleaseFn_t request_release_fn,
-    void* request_release_userp)
-{
-  tc::InferenceRequest* lrequest =
-      reinterpret_cast<tc::InferenceRequest*>(inference_request);
-  RETURN_IF_STATUS_ERROR(
-      lrequest->SetReleaseCallback(request_release_fn, request_release_userp));
-  return nullptr;  // Success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_InferenceRequestSetResponseCallback(
-    TRITONSERVER_InferenceRequest* inference_request,
-    TRITONSERVER_ResponseAllocator* response_allocator,
-    void* response_allocator_userp,
-    TRITONSERVER_InferenceResponseCompleteFn_t response_fn,
-    void* response_userp)
-{
-  tc::InferenceRequest* lrequest =
-      reinterpret_cast<tc::InferenceRequest*>(inference_request);
-  tc::ResponseAllocator* lallocator =
-      reinterpret_cast<tc::ResponseAllocator*>(response_allocator);
-  RETURN_IF_STATUS_ERROR(lrequest->SetResponseCallback(
-      lallocator, response_allocator_userp, response_fn, response_userp));
-  return nullptr;  // Success
-}
-
-//
-// TRITONSERVER_InferenceResponse
-//
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_InferenceResponseDelete(
-    TRITONSERVER_InferenceResponse* inference_response)
-{
-  tc::InferenceResponse* lresponse =
-      reinterpret_cast<tc::InferenceResponse*>(inference_response);
-  delete lresponse;
-  return nullptr;  // Success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_InferenceResponseError(
-    TRITONSERVER_InferenceResponse* inference_response)
-{
-  tc::InferenceResponse* lresponse =
-      reinterpret_cast<tc::InferenceResponse*>(inference_response);
-  RETURN_IF_STATUS_ERROR(lresponse->ResponseStatus());
-  return nullptr;  // Success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_InferenceResponseModel(
-    TRITONSERVER_InferenceResponse* inference_response, const char** model_name,
-    int64_t* model_version)
-{
-  tc::InferenceResponse* lresponse =
-      reinterpret_cast<tc::InferenceResponse*>(inference_response);
-
-  *model_name = lresponse->ModelName().c_str();
-  *model_version = lresponse->ActualModelVersion();
-
-  return nullptr;  // Success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_InferenceResponseId(
-    TRITONSERVER_InferenceResponse* inference_response, const char** request_id)
-{
-  tc::InferenceResponse* lresponse =
-      reinterpret_cast<tc::InferenceResponse*>(inference_response);
-
-  *request_id = lresponse->Id().c_str();
-
-  return nullptr;  // Success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_InferenceResponseParameterCount(
-    TRITONSERVER_InferenceResponse* inference_response, uint32_t* count)
-{
-  tc::InferenceResponse* lresponse =
-      reinterpret_cast<tc::InferenceResponse*>(inference_response);
-
-  const auto& parameters = lresponse->Parameters();
-  *count = parameters.size();
-
-  return nullptr;  // Success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_InferenceResponseParameter(
-    TRITONSERVER_InferenceResponse* inference_response, const uint32_t index,
-    const char** name, TRITONSERVER_ParameterType* type, const void** vvalue)
-{
-  tc::InferenceResponse* lresponse =
-      reinterpret_cast<tc::InferenceResponse*>(inference_response);
-
-  const auto& parameters = lresponse->Parameters();
-  if (index >= parameters.size()) {
-    return TritonServerError::Create(
-        TRITONSERVER_ERROR_INVALID_ARG,
-        "out of bounds index " + std::to_string(index) +
-            std::string(": response has ") + std::to_string(parameters.size()) +
-            " parameters");
-  }
-
-  const tc::InferenceParameter& param = parameters[index];
-
-  *name = param.Name().c_str();
-  *type = param.Type();
-  *vvalue = param.ValuePointer();
-
-  return nullptr;  // Success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_InferenceResponseOutputCount(
-    TRITONSERVER_InferenceResponse* inference_response, uint32_t* count)
-{
-  tc::InferenceResponse* lresponse =
-      reinterpret_cast<tc::InferenceResponse*>(inference_response);
-
-  const auto& outputs = lresponse->Outputs();
-  *count = outputs.size();
-
-  return nullptr;  // Success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_InferenceResponseOutput(
-    TRITONSERVER_InferenceResponse* inference_response, const uint32_t index,
-    const char** name, TRITONSERVER_DataType* datatype, const int64_t** shape,
-    uint64_t* dim_count, const void** base, size_t* byte_size,
-    TRITONSERVER_MemoryType* memory_type, int64_t* memory_type_id, void** userp)
-{
-  tc::InferenceResponse* lresponse =
-      reinterpret_cast<tc::InferenceResponse*>(inference_response);
-
-  const auto& outputs = lresponse->Outputs();
-  if (index >= outputs.size()) {
-    return TritonServerError::Create(
-        TRITONSERVER_ERROR_INVALID_ARG,
-        "out of bounds index " + std::to_string(index) +
-            std::string(": response has ") + std::to_string(outputs.size()) +
-            " outputs");
-  }
-
-  const tc::InferenceResponse::Output& output = outputs[index];
-
-  *name = output.Name().c_str();
-  *datatype = tc::DataTypeToTriton(output.DType());
-
-  const std::vector<int64_t>& oshape = output.Shape();
-  *shape = &oshape[0];
-  *dim_count = oshape.size();
-
-  RETURN_IF_STATUS_ERROR(
-      output.DataBuffer(base, byte_size, memory_type, memory_type_id, userp));
-
-  return nullptr;  // Success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_InferenceResponseOutputClassificationLabel(
-    TRITONSERVER_InferenceResponse* inference_response, const uint32_t index,
-    const size_t class_index, const char** label)
-{
-  tc::InferenceResponse* lresponse =
-      reinterpret_cast<tc::InferenceResponse*>(inference_response);
-
-  const auto& outputs = lresponse->Outputs();
-  if (index >= outputs.size()) {
-    return TritonServerError::Create(
-        TRITONSERVER_ERROR_INVALID_ARG,
-        "out of bounds index " + std::to_string(index) +
-            std::string(": response has ") + std::to_string(outputs.size()) +
-            " outputs");
-  }
-
-  const tc::InferenceResponse::Output& output = outputs[index];
-  RETURN_IF_STATUS_ERROR(
-      lresponse->ClassificationLabel(output, class_index, label));
-
-  return nullptr;  // Success
-}
-
-//
-// TRITONSERVER_BufferAttributes
-//
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_BufferAttributesNew(
-    TRITONSERVER_BufferAttributes** buffer_attributes)
-{
-  tc::BufferAttributes* lbuffer_attributes = new tc::BufferAttributes();
-  *buffer_attributes =
-      reinterpret_cast<TRITONSERVER_BufferAttributes*>(lbuffer_attributes);
-
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_BufferAttributesDelete(
-    TRITONSERVER_BufferAttributes* buffer_attributes)
-{
-  tc::BufferAttributes* lbuffer_attributes =
-      reinterpret_cast<tc::BufferAttributes*>(buffer_attributes);
-  delete lbuffer_attributes;
-
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_BufferAttributesSetMemoryTypeId(
-    TRITONSERVER_BufferAttributes* buffer_attributes, int64_t memory_type_id)
-{
-  tc::BufferAttributes* lbuffer_attributes =
-      reinterpret_cast<tc::BufferAttributes*>(buffer_attributes);
-  lbuffer_attributes->SetMemoryTypeId(memory_type_id);
-
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_BufferAttributesSetMemoryType(
-    TRITONSERVER_BufferAttributes* buffer_attributes,
-    TRITONSERVER_MemoryType memory_type)
-{
-  tc::BufferAttributes* lbuffer_attributes =
-      reinterpret_cast<tc::BufferAttributes*>(buffer_attributes);
-  lbuffer_attributes->SetMemoryType(memory_type);
-
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_BufferAttributesSetCudaIpcHandle(
-    TRITONSERVER_BufferAttributes* buffer_attributes, void* cuda_ipc_handle)
-{
-  tc::BufferAttributes* lbuffer_attributes =
-      reinterpret_cast<tc::BufferAttributes*>(buffer_attributes);
-  lbuffer_attributes->SetCudaIpcHandle(cuda_ipc_handle);
-
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_BufferAttributesSetByteSize(
-    TRITONSERVER_BufferAttributes* buffer_attributes, size_t byte_size)
-{
-  tc::BufferAttributes* lbuffer_attributes =
-      reinterpret_cast<tc::BufferAttributes*>(buffer_attributes);
-  lbuffer_attributes->SetByteSize(byte_size);
-
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_BufferAttributesMemoryTypeId(
-    TRITONSERVER_BufferAttributes* buffer_attributes, int64_t* memory_type_id)
-{
-  tc::BufferAttributes* lbuffer_attributes =
-      reinterpret_cast<tc::BufferAttributes*>(buffer_attributes);
-  *memory_type_id = lbuffer_attributes->MemoryTypeId();
-
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_BufferAttributesMemoryType(
-    TRITONSERVER_BufferAttributes* buffer_attributes,
-    TRITONSERVER_MemoryType* memory_type)
-{
-  tc::BufferAttributes* lbuffer_attributes =
-      reinterpret_cast<tc::BufferAttributes*>(buffer_attributes);
-  *memory_type = lbuffer_attributes->MemoryType();
-
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_BufferAttributesCudaIpcHandle(
-    TRITONSERVER_BufferAttributes* buffer_attributes, void** cuda_ipc_handle)
-{
-  tc::BufferAttributes* lbuffer_attributes =
-      reinterpret_cast<tc::BufferAttributes*>(buffer_attributes);
-  *cuda_ipc_handle = lbuffer_attributes->CudaIpcHandle();
-
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_BufferAttributesByteSize(
-    TRITONSERVER_BufferAttributes* buffer_attributes, size_t* byte_size)
-{
-  tc::BufferAttributes* lbuffer_attributes =
-      reinterpret_cast<tc::BufferAttributes*>(buffer_attributes);
-  *byte_size = lbuffer_attributes->ByteSize();
-
-  return nullptr;  // success
-}
-
-//
-// TRITONSERVER_Server
-//
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerNew(
-    TRITONSERVER_Server** server, TRITONSERVER_ServerOptions* options)
-{
-  tc::InferenceServer* lserver = new tc::InferenceServer();
-  TritonServerOptions* loptions =
-      reinterpret_cast<TritonServerOptions*>(options);
-
-  NVTX_INITIALIZE;
-
-#ifdef TRITON_ENABLE_METRICS
-  // NOTE: Metrics must be enabled before backends are setup
-  if (loptions->Metrics()) {
-    tc::Metrics::EnableMetrics();
-    tc::Metrics::SetMetricsInterval(loptions->MetricsInterval());
-  }
-#endif  // TRITON_ENABLE_METRICS
-
-  lserver->SetId(loptions->ServerId());
-  lserver->SetModelRepositoryPaths(loptions->ModelRepositoryPaths());
-  lserver->SetModelControlMode(loptions->ModelControlMode());
-  lserver->SetStartupModels(loptions->StartupModels());
-  bool strict_model_config = loptions->StrictModelConfig();
-  lserver->SetStrictModelConfigEnabled(strict_model_config);
-  lserver->SetRateLimiterMode(loptions->RateLimiterMode());
-  lserver->SetRateLimiterResources(loptions->RateLimiterResources());
-  lserver->SetPinnedMemoryPoolByteSize(loptions->PinnedMemoryPoolByteSize());
-  lserver->SetResponseCacheByteSize(loptions->ResponseCacheByteSize());
-  lserver->SetCudaMemoryPoolByteSize(loptions->CudaMemoryPoolByteSize());
-  double min_compute_capability = loptions->MinSupportedComputeCapability();
-  lserver->SetMinSupportedComputeCapability(min_compute_capability);
-  lserver->SetStrictReadinessEnabled(loptions->StrictReadiness());
-  lserver->SetExitTimeoutSeconds(loptions->ExitTimeout());
-  lserver->SetHostPolicyCmdlineConfig(loptions->HostPolicyCmdlineConfigMap());
-  lserver->SetRepoAgentDir(loptions->RepoAgentDir());
-  lserver->SetBufferManagerThreadCount(loptions->BufferManagerThreadCount());
-  lserver->SetModelLoadThreadCount(loptions->ModelLoadThreadCount());
-
-  // SetBackendCmdlineConfig must be called after all AddBackendConfig calls
-  // have completed.
-  // Note that the auto complete config condition is reverted
-  // due to setting name being different
-  loptions->AddBackendConfig(
-      std::string(), "auto-complete-config",
-      strict_model_config ? "false" : "true");
-  loptions->AddBackendConfig(
-      std::string(), "min-compute-capability",
-      std::to_string(min_compute_capability));
-  loptions->AddBackendConfig(
-      std::string(), "backend-directory", loptions->BackendDir());
-  lserver->SetBackendCmdlineConfig(loptions->BackendCmdlineConfigMap());
-
-  // Initialize server
-  tc::Status status = lserver->Init();
-
-#ifdef TRITON_ENABLE_METRICS
-  if (loptions->Metrics() && lserver->ResponseCacheEnabled()) {
-    // NOTE: Cache metrics must be enabled after cache initialized in
-    // server->Init()
-    tc::Metrics::EnableCacheMetrics(lserver->GetResponseCache());
-  }
-#ifdef TRITON_ENABLE_METRICS_GPU
-  if (loptions->Metrics() && loptions->GpuMetrics()) {
-    tc::Metrics::EnableGPUMetrics();
-  }
-#endif  // TRITON_ENABLE_METRICS_GPU
-
-#ifdef TRITON_ENABLE_METRICS_CPU
-  if (loptions->Metrics() && loptions->CpuMetrics()) {
-    tc::Metrics::EnableCpuMetrics();
-  }
-#endif  // TRITON_ENABLE_METRICS_CPU
-
-  const bool poll_metrics =
-      (lserver->ResponseCacheEnabled() || loptions->GpuMetrics() ||
-       loptions->CpuMetrics());
-  if (loptions->Metrics() && poll_metrics) {
-    // Start thread to poll enabled metrics periodically
-    tc::Metrics::StartPollingThreadSingleton(lserver->GetResponseCache());
-  }
-#endif  // TRITON_ENABLE_METRICS
-
-
-  // Setup tritonserver options table
-  std::vector<std::string> options_headers;
-  options_headers.emplace_back("Option");
-  options_headers.emplace_back("Value");
-
-  triton::common::TablePrinter options_table(options_headers);
-  options_table.InsertRow(std::vector<std::string>{"server_id", lserver->Id()});
-  options_table.InsertRow(
-      std::vector<std::string>{"server_version", lserver->Version()});
-
-  auto extensions = lserver->Extensions();
-  std::string exts;
-  for (const auto& ext : extensions) {
-    exts.append(ext);
-    exts.append(" ");
-  }
-
-  // Remove the trailing space
-  if (exts.size() > 0)
-    exts.pop_back();
-
-  options_table.InsertRow(std::vector<std::string>{"server_extensions", exts});
-
-  size_t i = 0;
-  for (const auto& model_repository_path : lserver->ModelRepositoryPaths()) {
-    options_table.InsertRow(std::vector<std::string>{
-        "model_repository_path[" + std::to_string(i) + "]",
-        model_repository_path});
-    ++i;
-  }
-
-  std::string model_control_mode;
-  auto control_mode = lserver->GetModelControlMode();
-  switch (control_mode) {
-    case tc::ModelControlMode::MODE_NONE: {
-      model_control_mode = "MODE_NONE";
-      break;
-    }
-    case tc::ModelControlMode::MODE_POLL: {
-      model_control_mode = "MODE_POLL";
-      break;
-    }
-    case tc::ModelControlMode::MODE_EXPLICIT: {
-      model_control_mode = "MODE_EXPLICIT";
-      break;
-    }
-    default: {
-      model_control_mode = "<unknown>";
-    }
-  }
-  options_table.InsertRow(
-      std::vector<std::string>{"model_control_mode", model_control_mode});
-
-  i = 0;
-  for (const auto& startup_model : lserver->StartupModels()) {
-    options_table.InsertRow(std::vector<std::string>{
-        "startup_models_" + std::to_string(i), startup_model});
-    ++i;
-  }
-  options_table.InsertRow(std::vector<std::string>{
-      "strict_model_config",
-      std::to_string(lserver->StrictModelConfigEnabled())});
-  std::string rate_limit = RateLimitModeToString(lserver->RateLimiterMode());
-  options_table.InsertRow(std::vector<std::string>{"rate_limit", rate_limit});
-  i = 0;
-  for (const auto& device_resources : lserver->RateLimiterResources()) {
-    for (const auto& resource : device_resources.second) {
-      options_table.InsertRow(std::vector<std::string>{
-          "rate_limit_resource[" + std::to_string(i) + "]",
-          ResourceString(
-              resource.first, resource.second, device_resources.first)});
-      ++i;
-    }
-  }
-  options_table.InsertRow(std::vector<std::string>{
-      "pinned_memory_pool_byte_size",
-      std::to_string(lserver->PinnedMemoryPoolByteSize())});
-  for (const auto& cuda_memory_pool : lserver->CudaMemoryPoolByteSize()) {
-    options_table.InsertRow(std::vector<std::string>{
-        "cuda_memory_pool_byte_size{" + std::to_string(cuda_memory_pool.first) +
-            "}",
-        std::to_string(cuda_memory_pool.second)});
-  }
-  options_table.InsertRow(std::vector<std::string>{
-      "response_cache_byte_size",
-      std::to_string(lserver->ResponseCacheByteSize())});
-
-  std::stringstream compute_capability_ss;
-  compute_capability_ss.setf(std::ios::fixed);
-  compute_capability_ss.precision(1);
-  compute_capability_ss << lserver->MinSupportedComputeCapability();
-  options_table.InsertRow(std::vector<std::string>{
-      "min_supported_compute_capability", compute_capability_ss.str()});
-  options_table.InsertRow(std::vector<std::string>{
-      "strict_readiness", std::to_string(lserver->StrictReadinessEnabled())});
-  options_table.InsertRow(std::vector<std::string>{
-      "exit_timeout", std::to_string(lserver->ExitTimeoutSeconds())});
-
-  std::string options_table_string = options_table.PrintTable();
-  LOG_INFO << options_table_string;
-
-  if (!status.IsOk()) {
-    if (loptions->ExitOnError()) {
-      lserver->Stop(true /* force */);
-      delete lserver;
-      RETURN_IF_STATUS_ERROR(status);
-    }
-
-    LOG_ERROR << status.AsString();
-  }
-
-  *server = reinterpret_cast<TRITONSERVER_Server*>(lserver);
-  return nullptr;  // Success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerDelete(TRITONSERVER_Server* server)
-{
-  tc::InferenceServer* lserver = reinterpret_cast<tc::InferenceServer*>(server);
-  if (lserver != nullptr) {
-    RETURN_IF_STATUS_ERROR(lserver->Stop());
-  }
-  delete lserver;
-  return nullptr;  // Success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerStop(TRITONSERVER_Server* server)
-{
-  tc::InferenceServer* lserver = reinterpret_cast<tc::InferenceServer*>(server);
-  if (lserver != nullptr) {
-    RETURN_IF_STATUS_ERROR(lserver->Stop());
-  }
-  return nullptr;  // Success
-}
-
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerRegisterModelRepository(
-    TRITONSERVER_Server* server, const char* repository_path,
-    const TRITONSERVER_Parameter** name_mapping, const uint32_t mapping_count)
-{
-  tc::InferenceServer* lserver = reinterpret_cast<tc::InferenceServer*>(server);
-  if ((name_mapping == nullptr) && (mapping_count != 0)) {
-    return TRITONSERVER_ErrorNew(
-        TRITONSERVER_ERROR_INVALID_ARG,
-        "model mappings are not provided while mapping count is non-zero");
-  }
-
-  std::unordered_map<std::string, std::string> model_mapping;
-  for (size_t i = 0; i < mapping_count; ++i) {
-    auto mapping =
-        reinterpret_cast<const tc::InferenceParameter*>(name_mapping[i]);
-    auto subdir = mapping->Name();
-
-    if (mapping->Type() != TRITONSERVER_PARAMETER_STRING) {
-      return TRITONSERVER_ErrorNew(
-          TRITONSERVER_ERROR_INVALID_ARG,
-          std::string(
-              "Mapped model name must be a string, found "
-              "another type for " +
-              subdir)
-              .c_str());
-    }
-
-    auto model_name =
-        std::string(reinterpret_cast<const char*>(mapping->ValuePointer()));
-
-    if (!(model_mapping.emplace(model_name, subdir).second)) {
-      return TRITONSERVER_ErrorNew(
-          TRITONSERVER_ERROR_INVALID_ARG,
-          (std::string("failed to register '") + repository_path +
-           "', there is a conflicting mapping for '" + std::string(model_name) +
-           "'")
-              .c_str());
-    }
-  }
-  RETURN_IF_STATUS_ERROR(
-      lserver->RegisterModelRepository(repository_path, model_mapping));
-  return nullptr;  // Success
-}
-
-TRITONSERVER_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerUnregisterModelRepository(
-    TRITONSERVER_Server* server, const char* repository_path)
-{
-  tc::InferenceServer* lserver = reinterpret_cast<tc::InferenceServer*>(server);
-  RETURN_IF_STATUS_ERROR(lserver->UnregisterModelRepository(repository_path));
-  return nullptr;  // Success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerPollModelRepository(TRITONSERVER_Server* server)
-{
-  tc::InferenceServer* lserver = reinterpret_cast<tc::InferenceServer*>(server);
-  RETURN_IF_STATUS_ERROR(lserver->PollModelRepository());
-  return nullptr;  // Success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerIsLive(TRITONSERVER_Server* server, bool* live)
-{
-  tc::InferenceServer* lserver = reinterpret_cast<tc::InferenceServer*>(server);
-
-  RETURN_IF_STATUS_ERROR(lserver->IsLive(live));
-  return nullptr;  // Success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerIsReady(TRITONSERVER_Server* server, bool* ready)
-{
-  tc::InferenceServer* lserver = reinterpret_cast<tc::InferenceServer*>(server);
-
-  RETURN_IF_STATUS_ERROR(lserver->IsReady(ready));
-  return nullptr;  // Success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerModelIsReady(
-    TRITONSERVER_Server* server, const char* model_name,
-    const int64_t model_version, bool* ready)
-{
-  tc::InferenceServer* lserver = reinterpret_cast<tc::InferenceServer*>(server);
-
-  RETURN_IF_STATUS_ERROR(
-      lserver->ModelIsReady(model_name, model_version, ready));
-  return nullptr;  // Success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerModelBatchProperties(
-    TRITONSERVER_Server* server, const char* model_name,
-    const int64_t model_version, uint32_t* flags, void** voidp)
-{
-  tc::InferenceServer* lserver = reinterpret_cast<tc::InferenceServer*>(server);
-
-  if (voidp != nullptr) {
-    *voidp = nullptr;
-  }
-
-  std::shared_ptr<tc::Model> model;
-  RETURN_IF_STATUS_ERROR(lserver->GetModel(model_name, model_version, &model));
-
-  if (model->Config().max_batch_size() > 0) {
-    *flags = TRITONSERVER_BATCH_FIRST_DIM;
-  } else {
-    *flags = TRITONSERVER_BATCH_UNKNOWN;
-  }
-
-  return nullptr;  // Success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerModelTransactionProperties(
-    TRITONSERVER_Server* server, const char* model_name,
-    const int64_t model_version, uint32_t* txn_flags, void** voidp)
-{
-  tc::InferenceServer* lserver = reinterpret_cast<tc::InferenceServer*>(server);
-
-  if (voidp != nullptr) {
-    *voidp = nullptr;
-  }
-
-  *txn_flags = 0;
-
-  std::shared_ptr<tc::Model> model;
-  RETURN_IF_STATUS_ERROR(lserver->GetModel(model_name, model_version, &model));
-
-  if (model->Config().model_transaction_policy().decoupled()) {
-    *txn_flags = TRITONSERVER_TXN_DECOUPLED;
-  } else {
-    *txn_flags = TRITONSERVER_TXN_ONE_TO_ONE;
-  }
-
-  return nullptr;  // Success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerMetadata(
-    TRITONSERVER_Server* server, TRITONSERVER_Message** server_metadata)
-{
-  tc::InferenceServer* lserver = reinterpret_cast<tc::InferenceServer*>(server);
-
-  triton::common::TritonJson::Value metadata(
-      triton::common::TritonJson::ValueType::OBJECT);
-
-  // Just store string reference in JSON object since it will be
-  // serialized to another buffer before lserver->Id() or
-  // lserver->Version() lifetime ends.
-  RETURN_IF_STATUS_ERROR(metadata.AddStringRef("name", lserver->Id().c_str()));
-  RETURN_IF_STATUS_ERROR(
-      metadata.AddStringRef("version", lserver->Version().c_str()));
-
-  triton::common::TritonJson::Value extensions(
-      metadata, triton::common::TritonJson::ValueType::ARRAY);
-  const std::vector<const char*>& exts = lserver->Extensions();
-  for (const auto ext : exts) {
-    RETURN_IF_STATUS_ERROR(extensions.AppendStringRef(ext));
-  }
-
-  RETURN_IF_STATUS_ERROR(metadata.Add("extensions", std::move(extensions)));
-
-  *server_metadata = reinterpret_cast<TRITONSERVER_Message*>(
-      new tc::TritonServerMessage(metadata));
-  return nullptr;  // Success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerModelMetadata(
-    TRITONSERVER_Server* server, const char* model_name,
-    const int64_t model_version, TRITONSERVER_Message** model_metadata)
-{
-  tc::InferenceServer* lserver = reinterpret_cast<tc::InferenceServer*>(server);
-
-  std::shared_ptr<tc::Model> model;
-  RETURN_IF_STATUS_ERROR(lserver->GetModel(model_name, model_version, &model));
-
-  std::vector<int64_t> ready_versions;
-  RETURN_IF_STATUS_ERROR(
-      lserver->ModelReadyVersions(model_name, &ready_versions));
-
-  triton::common::TritonJson::Value metadata(
-      triton::common::TritonJson::ValueType::OBJECT);
-
-  // Can use string ref in this function even though model can be
-  // unloaded and config becomes invalid, because TritonServeMessage
-  // serializes the json when it is constructed below.
-  RETURN_IF_STATUS_ERROR(metadata.AddStringRef("name", model_name));
-
-  triton::common::TritonJson::Value versions(
-      metadata, triton::common::TritonJson::ValueType::ARRAY);
-  if (model_version != -1) {
-    RETURN_IF_STATUS_ERROR(
-        versions.AppendString(std::move(std::to_string(model_version))));
-  } else {
-    for (const auto v : ready_versions) {
-      RETURN_IF_STATUS_ERROR(
-          versions.AppendString(std::move(std::to_string(v))));
-    }
-  }
-
-  RETURN_IF_STATUS_ERROR(metadata.Add("versions", std::move(versions)));
-
-  const auto& model_config = model->Config();
-  if (!model_config.platform().empty()) {
-    RETURN_IF_STATUS_ERROR(
-        metadata.AddStringRef("platform", model_config.platform().c_str()));
-  } else {
-    RETURN_IF_STATUS_ERROR(
-        metadata.AddStringRef("platform", model_config.backend().c_str()));
-  }
-
-  triton::common::TritonJson::Value inputs(
-      metadata, triton::common::TritonJson::ValueType::ARRAY);
-  for (const auto& io : model_config.input()) {
-    triton::common::TritonJson::Value io_metadata(
-        metadata, triton::common::TritonJson::ValueType::OBJECT);
-    RETURN_IF_STATUS_ERROR(io_metadata.AddStringRef("name", io.name().c_str()));
-    RETURN_IF_STATUS_ERROR(io_metadata.AddStringRef(
-        "datatype", triton::common::DataTypeToProtocolString(io.data_type())));
-
-    // Input shape. If the model supports batching then must include
-    // '-1' for the batch dimension.
-    triton::common::TritonJson::Value io_metadata_shape(
-        metadata, triton::common::TritonJson::ValueType::ARRAY);
-    if (model_config.max_batch_size() >= 1) {
-      RETURN_IF_STATUS_ERROR(io_metadata_shape.AppendInt(-1));
-    }
-    for (const auto d : io.dims()) {
-      RETURN_IF_STATUS_ERROR(io_metadata_shape.AppendInt(d));
-    }
-    RETURN_IF_STATUS_ERROR(
-        io_metadata.Add("shape", std::move(io_metadata_shape)));
-
-    RETURN_IF_STATUS_ERROR(inputs.Append(std::move(io_metadata)));
-  }
-  RETURN_IF_STATUS_ERROR(metadata.Add("inputs", std::move(inputs)));
-
-  triton::common::TritonJson::Value outputs(
-      metadata, triton::common::TritonJson::ValueType::ARRAY);
-  for (const auto& io : model_config.output()) {
-    triton::common::TritonJson::Value io_metadata(
-        metadata, triton::common::TritonJson::ValueType::OBJECT);
-    RETURN_IF_STATUS_ERROR(io_metadata.AddStringRef("name", io.name().c_str()));
-    RETURN_IF_STATUS_ERROR(io_metadata.AddStringRef(
-        "datatype", triton::common::DataTypeToProtocolString(io.data_type())));
-
-    // Output shape. If the model supports batching then must include
-    // '-1' for the batch dimension.
-    triton::common::TritonJson::Value io_metadata_shape(
-        metadata, triton::common::TritonJson::ValueType::ARRAY);
-    if (model_config.max_batch_size() >= 1) {
-      RETURN_IF_STATUS_ERROR(io_metadata_shape.AppendInt(-1));
-    }
-    for (const auto d : io.dims()) {
-      RETURN_IF_STATUS_ERROR(io_metadata_shape.AppendInt(d));
-    }
-    RETURN_IF_STATUS_ERROR(
-        io_metadata.Add("shape", std::move(io_metadata_shape)));
-
-    RETURN_IF_STATUS_ERROR(outputs.Append(std::move(io_metadata)));
-  }
-  RETURN_IF_STATUS_ERROR(metadata.Add("outputs", std::move(outputs)));
-
-  *model_metadata = reinterpret_cast<TRITONSERVER_Message*>(
-      new tc::TritonServerMessage(metadata));
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerModelStatistics(
-    TRITONSERVER_Server* server, const char* model_name,
-    const int64_t model_version, TRITONSERVER_Message** model_stats)
-{
-#ifndef TRITON_ENABLE_STATS
-  return TRITONSERVER_ErrorNew(
-      TRITONSERVER_ERROR_UNSUPPORTED, "statistics not supported");
-#else
-
-  tc::InferenceServer* lserver = reinterpret_cast<tc::InferenceServer*>(server);
-
-  auto model_name_string = std::string(model_name);
-  std::map<std::string, std::vector<int64_t>> ready_model_versions;
-  if (model_name_string.empty()) {
-    RETURN_IF_STATUS_ERROR(lserver->ModelReadyVersions(&ready_model_versions));
-  } else {
-    std::vector<int64_t> ready_versions;
-    RETURN_IF_STATUS_ERROR(
-        lserver->ModelReadyVersions(model_name_string, &ready_versions));
-    if (ready_versions.empty()) {
-      return TRITONSERVER_ErrorNew(
-          TRITONSERVER_ERROR_INVALID_ARG,
-          std::string(
-              "requested model '" + model_name_string + "' is not available")
-              .c_str());
-    }
-
-    if (model_version == -1) {
-      ready_model_versions.emplace(
-          model_name_string, std::move(ready_versions));
-    } else {
-      bool found = false;
-      for (const auto v : ready_versions) {
-        if (v == model_version) {
-          found = true;
-          break;
-        }
-      }
-      if (found) {
-        ready_model_versions.emplace(
-            model_name_string, std::vector<int64_t>{model_version});
-      } else {
-        return TRITONSERVER_ErrorNew(
-            TRITONSERVER_ERROR_INVALID_ARG,
-            std::string(
-                "requested model version is not available for model '" +
-                model_name_string + "'")
-                .c_str());
-      }
-    }
-  }
-
-  // Can use string ref in this function because TritonServeMessage
-  // serializes the json when it is constructed below.
-  triton::common::TritonJson::Value metadata(
-      triton::common::TritonJson::ValueType::OBJECT);
-
-  triton::common::TritonJson::Value model_stats_json(
-      metadata, triton::common::TritonJson::ValueType::ARRAY);
-  for (const auto& mv_pair : ready_model_versions) {
-    for (const auto& version : mv_pair.second) {
-      std::shared_ptr<tc::Model> model;
-      RETURN_IF_STATUS_ERROR(lserver->GetModel(mv_pair.first, version, &model));
-      const auto& infer_stats = model->StatsAggregator().ImmutableInferStats();
-      const auto& infer_batch_stats =
-          model->StatsAggregator().ImmutableInferBatchStats();
-
-      triton::common::TritonJson::Value inference_stats(
-          metadata, triton::common::TritonJson::ValueType::OBJECT);
-      // Compute figures only calculated when not going through cache, so
-      // subtract cache_hit count from success count. Cache hit count will
-      // simply be 0 when cache is disabled.
-      uint64_t compute_count =
-          infer_stats.success_count_ - infer_stats.cache_hit_count_;
-      SetDurationStat(
-          metadata, inference_stats, "success", infer_stats.success_count_,
-          infer_stats.request_duration_ns_);
-      SetDurationStat(
-          metadata, inference_stats, "fail", infer_stats.failure_count_,
-          infer_stats.failure_duration_ns_);
-      SetDurationStat(
-          metadata, inference_stats, "queue", infer_stats.success_count_,
-          infer_stats.queue_duration_ns_);
-      SetDurationStat(
-          metadata, inference_stats, "compute_input", compute_count,
-          infer_stats.compute_input_duration_ns_);
-      SetDurationStat(
-          metadata, inference_stats, "compute_infer", compute_count,
-          infer_stats.compute_infer_duration_ns_);
-      SetDurationStat(
-          metadata, inference_stats, "compute_output", compute_count,
-          infer_stats.compute_output_duration_ns_);
-      SetDurationStat(
-          metadata, inference_stats, "cache_hit", infer_stats.cache_hit_count_,
-          infer_stats.cache_hit_lookup_duration_ns_);
-      // NOTE: cache_miss_count_ should equal compute_count if non-zero
-      SetDurationStat(
-          metadata, inference_stats, "cache_miss",
-          infer_stats.cache_miss_count_,
-          infer_stats.cache_miss_lookup_duration_ns_ +
-              infer_stats.cache_miss_insertion_duration_ns_);
-
-      triton::common::TritonJson::Value batch_stats(
-          metadata, triton::common::TritonJson::ValueType::ARRAY);
-      for (const auto& batch : infer_batch_stats) {
-        triton::common::TritonJson::Value batch_stat(
-            metadata, triton::common::TritonJson::ValueType::OBJECT);
-        RETURN_IF_STATUS_ERROR(batch_stat.AddUInt("batch_size", batch.first));
-        SetDurationStat(
-            metadata, batch_stat, "compute_input", batch.second.count_,
-            batch.second.compute_input_duration_ns_);
-        SetDurationStat(
-            metadata, batch_stat, "compute_infer", batch.second.count_,
-            batch.second.compute_infer_duration_ns_);
-        SetDurationStat(
-            metadata, batch_stat, "compute_output", batch.second.count_,
-            batch.second.compute_output_duration_ns_);
-        RETURN_IF_STATUS_ERROR(batch_stats.Append(std::move(batch_stat)));
-      }
-
-      triton::common::TritonJson::Value model_stat(
-          metadata, triton::common::TritonJson::ValueType::OBJECT);
-      RETURN_IF_STATUS_ERROR(
-          model_stat.AddStringRef("name", mv_pair.first.c_str()));
-      RETURN_IF_STATUS_ERROR(
-          model_stat.AddString("version", std::move(std::to_string(version))));
-
-      RETURN_IF_STATUS_ERROR(model_stat.AddUInt(
-          "last_inference", model->StatsAggregator().LastInferenceMs()));
-      RETURN_IF_STATUS_ERROR(model_stat.AddUInt(
-          "inference_count", model->StatsAggregator().InferenceCount()));
-      RETURN_IF_STATUS_ERROR(model_stat.AddUInt(
-          "execution_count", model->StatsAggregator().ExecutionCount()));
-
-      RETURN_IF_STATUS_ERROR(
-          model_stat.Add("inference_stats", std::move(inference_stats)));
-      RETURN_IF_STATUS_ERROR(
-          model_stat.Add("batch_stats", std::move(batch_stats)));
-
-      RETURN_IF_STATUS_ERROR(model_stats_json.Append(std::move(model_stat)));
-    }
-  }
-
-  RETURN_IF_STATUS_ERROR(
-      metadata.Add("model_stats", std::move(model_stats_json)));
-  *model_stats = reinterpret_cast<TRITONSERVER_Message*>(
-      new tc::TritonServerMessage(metadata));
-
-  return nullptr;  // success
-
-#endif  // TRITON_ENABLE_STATS
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerModelConfig(
-    TRITONSERVER_Server* server, const char* model_name,
-    const int64_t model_version, const uint32_t config_version,
-    TRITONSERVER_Message** model_config)
-{
-  tc::InferenceServer* lserver = reinterpret_cast<tc::InferenceServer*>(server);
-
-  std::shared_ptr<tc::Model> model;
-  RETURN_IF_STATUS_ERROR(lserver->GetModel(model_name, model_version, &model));
-
-  std::string model_config_json;
-  RETURN_IF_STATUS_ERROR(tc::ModelConfigToJson(
-      model->Config(), config_version, &model_config_json));
-
-  *model_config = reinterpret_cast<TRITONSERVER_Message*>(
-      new tc::TritonServerMessage(std::move(model_config_json)));
-
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerModelIndex(
-    TRITONSERVER_Server* server, uint32_t flags,
-    TRITONSERVER_Message** repository_index)
-{
-  tc::InferenceServer* lserver = reinterpret_cast<tc::InferenceServer*>(server);
-
-  const bool ready_only = ((flags & TRITONSERVER_INDEX_FLAG_READY) != 0);
-
-  std::vector<tc::ModelRepositoryManager::ModelIndex> index;
-  RETURN_IF_STATUS_ERROR(lserver->RepositoryIndex(ready_only, &index));
-
-  // Can use string ref in this function because TritonServerMessage
-  // serializes the json when it is constructed below.
-  triton::common::TritonJson::Value repository_index_json(
-      triton::common::TritonJson::ValueType::ARRAY);
-
-  for (const auto& in : index) {
-    triton::common::TritonJson::Value model_index(
-        repository_index_json, triton::common::TritonJson::ValueType::OBJECT);
-    RETURN_IF_STATUS_ERROR(model_index.AddStringRef("name", in.name_.c_str()));
-    if (!in.name_only_) {
-      if (in.version_ >= 0) {
-        RETURN_IF_STATUS_ERROR(model_index.AddString(
-            "version", std::move(std::to_string(in.version_))));
-      }
-      RETURN_IF_STATUS_ERROR(model_index.AddStringRef(
-          "state", tc::ModelReadyStateString(in.state_).c_str()));
-      if (!in.reason_.empty()) {
-        RETURN_IF_STATUS_ERROR(
-            model_index.AddStringRef("reason", in.reason_.c_str()));
-      }
-    }
-
-    RETURN_IF_STATUS_ERROR(
-        repository_index_json.Append(std::move(model_index)));
-  }
-
-  *repository_index = reinterpret_cast<TRITONSERVER_Message*>(
-      new tc::TritonServerMessage(repository_index_json));
-
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerLoadModel(
-    TRITONSERVER_Server* server, const char* model_name)
-{
-  tc::InferenceServer* lserver = reinterpret_cast<tc::InferenceServer*>(server);
-
-  RETURN_IF_STATUS_ERROR(lserver->LoadModel({{std::string(model_name), {}}}));
-
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerLoadModelWithParameters(
-    TRITONSERVER_Server* server, const char* model_name,
-    const TRITONSERVER_Parameter** parameters, const uint64_t parameter_count)
-{
-  if ((parameters == nullptr) && (parameter_count != 0)) {
-    return TRITONSERVER_ErrorNew(
-        TRITONSERVER_ERROR_INVALID_ARG,
-        "load parameters are not provided while parameter count is non-zero");
-  }
-
-  tc::InferenceServer* lserver = reinterpret_cast<tc::InferenceServer*>(server);
-
-  std::unordered_map<std::string, std::vector<const tc::InferenceParameter*>>
-      models;
-  std::vector<const tc::InferenceParameter*> mp;
-  for (size_t i = 0; i < parameter_count; ++i) {
-    mp.emplace_back(
-        reinterpret_cast<const tc::InferenceParameter*>(parameters[i]));
-  }
-  models[model_name] = std::move(mp);
-  RETURN_IF_STATUS_ERROR(lserver->LoadModel(models));
-
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerUnloadModel(
-    TRITONSERVER_Server* server, const char* model_name)
-{
-  tc::InferenceServer* lserver = reinterpret_cast<tc::InferenceServer*>(server);
-
-  RETURN_IF_STATUS_ERROR(lserver->UnloadModel(
-      std::string(model_name), false /* unload_dependents */));
-
-  return nullptr;  // success
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerUnloadModelAndDependents(
-    TRITONSERVER_Server* server, const char* model_name)
-{
-  {
-    tc::InferenceServer* lserver =
-        reinterpret_cast<tc::InferenceServer*>(server);
-
-    RETURN_IF_STATUS_ERROR(lserver->UnloadModel(
-        std::string(model_name), true /* unload_dependents */));
-
-    return nullptr;  // success
-  }
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerMetrics(
-    TRITONSERVER_Server* server, TRITONSERVER_Metrics** metrics)
-{
-#ifdef TRITON_ENABLE_METRICS
-  TritonServerMetrics* lmetrics = new TritonServerMetrics();
-  *metrics = reinterpret_cast<TRITONSERVER_Metrics*>(lmetrics);
-  return nullptr;  // Success
-#else
-  *metrics = nullptr;
-  return TRITONSERVER_ErrorNew(
-      TRITONSERVER_ERROR_UNSUPPORTED, "metrics not supported");
-#endif  // TRITON_ENABLE_METRICS
-}
-
-TRITONAPI_DECLSPEC TRITONSERVER_Error*
-TRITONSERVER_ServerInferAsync(
-    TRITONSERVER_Server* server,
-    TRITONSERVER_InferenceRequest* inference_request,
-    TRITONSERVER_InferenceTrace* trace)
-{
-  tc::InferenceServer* lserver = reinterpret_cast<tc::InferenceServer*>(server);
-  tc::InferenceRequest* lrequest =
-      reinterpret_cast<tc::InferenceRequest*>(inference_request);
-
-  RETURN_IF_STATUS_ERROR(lrequest->PrepareForInference());
-
-  // Set the trace object in the request so that activity associated
-  // with the request can be recorded as the request flows through
-  // Triton.
-  if (trace != nullptr) {
-#ifdef TRITON_ENABLE_TRACING
-    tc::InferenceTrace* ltrace = reinterpret_cast<tc::InferenceTrace*>(trace);
-    ltrace->SetModelName(lrequest->ModelName());
-    ltrace->SetModelVersion(lrequest->ActualModelVersion());
-
-    lrequest->SetTrace(std::make_shared<tc::InferenceTraceProxy>(ltrace));
-#else
-    return TRITONSERVER_ErrorNew(
-        TRITONSERVER_ERROR_UNSUPPORTED, "inference tracing not supported");
-#endif  // TRITON_ENABLE_TRACING
-  }
-
-  // We wrap the request in a unique pointer to ensure that it flows
-  // through inferencing with clear ownership.
-  std::unique_ptr<tc::InferenceRequest> ureq(lrequest);
-
-  // Run inference...
-  tc::Status status = lserver->InferAsync(ureq);
-
-  // If there is an error then must explicitly release any trace
-  // object associated with the inference request above.
-#ifdef TRITON_ENABLE_TRACING
-  if (!status.IsOk()) {
-    ureq->ReleaseTrace();
-  }
-#endif  // TRITON_ENABLE_TRACING
-
-  // If there is an error then ureq will still have 'lrequest' and we
-  // must release it from unique_ptr since the caller should retain
-  // ownership when there is error. If there is not an error then ureq
-  // == nullptr and so this release is a nop.
-  ureq.release();
-
-  RETURN_IF_STATUS_ERROR(status);
-  return nullptr;  // Success
-}
-
-//
-// TRITONSERVER_MetricFamily
-//
-TRITONSERVER_Error*
-TRITONSERVER_MetricFamilyNew(
-    TRITONSERVER_MetricFamily** family, TRITONSERVER_MetricKind kind,
-    const char* name, const char* description)
-{
-#ifdef TRITON_ENABLE_METRICS
-  try {
-    *family = reinterpret_cast<TRITONSERVER_MetricFamily*>(
-        new tc::MetricFamily(kind, name, description));
-  }
-  catch (std::invalid_argument const& ex) {
-    // Catch invalid kinds passed to constructor
-    return TRITONSERVER_ErrorNew(TRITONSERVER_ERROR_INVALID_ARG, ex.what());
-  }
-  return nullptr;  // Success
-#else
-  return TRITONSERVER_ErrorNew(
-      TRITONSERVER_ERROR_UNSUPPORTED, "metrics not supported");
-#endif  // TRITON_ENABLE_METRICS
-}
-
-TRITONSERVER_Error*
-TRITONSERVER_MetricFamilyDelete(TRITONSERVER_MetricFamily* family)
-{
-#ifdef TRITON_ENABLE_METRICS
-  auto lfamily = reinterpret_cast<tc::MetricFamily*>(family);
-  if (lfamily->NumMetrics() > 0) {
-    return TRITONSERVER_ErrorNew(
-        TRITONSERVER_ERROR_INTERNAL,
-        "Must call MetricDelete on all dependent metrics before calling "
-        "MetricFamilyDelete.");
-  }
-
-  delete lfamily;
-  return nullptr;  // Success
-#else
-  return TRITONSERVER_ErrorNew(
-      TRITONSERVER_ERROR_UNSUPPORTED, "metrics not supported");
-#endif  // TRITON_ENABLE_METRICS
-}
-
-//
-// TRITONSERVER_Metric
-//
-TRITONSERVER_Error*
-TRITONSERVER_MetricNew(
-    TRITONSERVER_Metric** metric, TRITONSERVER_MetricFamily* family,
-    const TRITONSERVER_Parameter** labels, const uint64_t label_count)
-{
-#ifdef TRITON_ENABLE_METRICS
-  std::vector<const tc::InferenceParameter*> labels_vec;
-  for (size_t i = 0; i < label_count; i++) {
-    labels_vec.emplace_back(
-        reinterpret_cast<const tc::InferenceParameter*>(labels[i]));
-  }
-
-  try {
-    *metric = reinterpret_cast<TRITONSERVER_Metric*>(
-        new tc::Metric(family, labels_vec));
-  }
-  catch (std::invalid_argument const& ex) {
-    // Catch invalid kinds passed to constructor
-    return TRITONSERVER_ErrorNew(TRITONSERVER_ERROR_INVALID_ARG, ex.what());
-  }
-
-  return nullptr;  // Success
-#else
-  return TRITONSERVER_ErrorNew(
-      TRITONSERVER_ERROR_UNSUPPORTED, "metrics not supported");
-#endif  // TRITON_ENABLE_METRICS
-}
-
-TRITONSERVER_Error*
-TRITONSERVER_MetricDelete(TRITONSERVER_Metric* metric)
-{
-#ifdef TRITON_ENABLE_METRICS
-  auto lmetric = reinterpret_cast<tc::Metric*>(metric);
-  if (lmetric->Family() == nullptr) {
-    return TRITONSERVER_ErrorNew(
-        TRITONSERVER_ERROR_INTERNAL,
-        "MetricFamily reference was invalidated before Metric was deleted. "
-        "Must call MetricDelete on all dependent metrics before calling "
-        "MetricFamilyDelete.");
-  }
-
-  delete lmetric;
-  return nullptr;  // success
-#else
-  return TRITONSERVER_ErrorNew(
-      TRITONSERVER_ERROR_UNSUPPORTED, "metrics not supported");
-#endif  // TRITON_ENABLE_METRICS
-}
-
-TRITONSERVER_Error*
-TRITONSERVER_MetricValue(TRITONSERVER_Metric* metric, double* value)
-{
-#ifdef TRITON_ENABLE_METRICS
-  return reinterpret_cast<tc::Metric*>(metric)->Value(value);
-#else
-  return TRITONSERVER_ErrorNew(
-      TRITONSERVER_ERROR_UNSUPPORTED, "metrics not supported");
-#endif  // TRITON_ENABLE_METRICS
-}
-
-TRITONSERVER_Error*
-TRITONSERVER_MetricIncrement(TRITONSERVER_Metric* metric, double value)
-{
-#ifdef TRITON_ENABLE_METRICS
-  return reinterpret_cast<tc::Metric*>(metric)->Increment(value);
-#else
-  return TRITONSERVER_ErrorNew(
-      TRITONSERVER_ERROR_UNSUPPORTED, "metrics not supported");
-#endif  // TRITON_ENABLE_METRICS
-}
-
-TRITONSERVER_Error*
-TRITONSERVER_MetricSet(TRITONSERVER_Metric* metric, double value)
-{
-#ifdef TRITON_ENABLE_METRICS
-  return reinterpret_cast<tc::Metric*>(metric)->Set(value);
-#else
-  return TRITONSERVER_ErrorNew(
-      TRITONSERVER_ERROR_UNSUPPORTED, "metrics not supported");
-#endif  // TRITON_ENABLE_METRICS
-}
-
-TRITONSERVER_Error*
-TRITONSERVER_GetMetricKind(
-    TRITONSERVER_Metric* metric, TRITONSERVER_MetricKind* kind)
-{
-#ifdef TRITON_ENABLE_METRICS
-  *kind = reinterpret_cast<tc::Metric*>(metric)->Kind();
-  return nullptr;  // Success
-#else
-  return TRITONSERVER_ErrorNew(
-      TRITONSERVER_ERROR_UNSUPPORTED, "metrics not supported");
-#endif  // TRITON_ENABLE_METRICS
-}
-
-}  // extern C
diff --git a/3rdparty/core-r22.12/src/tritonserver_apis.h b/3rdparty/core-r22.12/src/tritonserver_apis.h
deleted file mode 100644
index 5f2af2dc76a74b4d6af9453eb98570868f0677b4..0000000000000000000000000000000000000000
--- a/3rdparty/core-r22.12/src/tritonserver_apis.h
+++ /dev/null
@@ -1,38 +0,0 @@
-// Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#pragma once
-
-#define _COMPILING_TRITONSERVER 1
-#define _COMPILING_TRITONBACKEND 1
-#define _COMPILING_TRITONREPOAGENT 1
-
-#include "triton/core/tritonbackend.h"
-#include "triton/core/tritonrepoagent.h"
-#include "triton/core/tritonserver.h"
-
-#undef _COMPILING_TRITONSERVER
-#undef _COMPILING_TRITONBACKEND
-#undef _COMPILING_TRITONREPOAGENT
diff --git a/3rdparty/core-r22.12/src/tritonserver_stub.cc b/3rdparty/core-r22.12/src/tritonserver_stub.cc
deleted file mode 100644
index 402fb8313ac6ef31c0d7a1e4e008a53848fe0c36..0000000000000000000000000000000000000000
--- a/3rdparty/core-r22.12/src/tritonserver_stub.cc
+++ /dev/null
@@ -1,960 +0,0 @@
-// Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES{} LOSS OF USE, DATA, OR
-// PROFITS{} OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#if defined(_MSC_VER)
-#define TRITONAPI_DECLSPEC __declspec(dllexport)
-#elif defined(__GNUC__)
-#define TRITONAPI_DECLSPEC __attribute__((__visibility__("default")))
-#else
-#define TRITONAPI_DECLSPEC
-#endif
-
-extern "C" {
-TRITONAPI_DECLSPEC void
-TRITONSERVER_ApiVersion()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_DataTypeString()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_StringToDataType()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_DataTypeByteSize()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_MemoryTypeString()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_ParameterTypeString()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_ParameterNew()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_ParameterBytesNew()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_ParameterDelete()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_InstanceGroupKindString()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_LogIsEnabled()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_LogMessage()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_ErrorNew()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_ErrorDelete()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_ErrorCode()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_ErrorCodeString()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_ErrorMessage()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_ResponseAllocatorNew()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_ResponseAllocatorSetQueryFunction()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_ResponseAllocatorSetBufferAttributesFunction()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_ResponseAllocatorDelete()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_MessageNewFromSerializedJson()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_MessageDelete()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_MessageSerializeToJson()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_MetricsDelete()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_MetricsFormatted()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_InferenceTraceLevelString()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_InferenceTraceActivityString()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_InferenceTraceNew()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_InferenceTraceTensorNew()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_InferenceTraceDelete()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_InferenceTraceId()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_InferenceTraceParentId()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_InferenceTraceModelName()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_InferenceTraceModelVersion()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_InferenceRequestNew()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_InferenceRequestDelete()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_InferenceRequestId()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_InferenceRequestSetId()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_InferenceRequestFlags()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_InferenceRequestSetFlags()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_InferenceRequestCorrelationId()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_InferenceRequestCorrelationIdString()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_InferenceRequestSetCorrelationId()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_InferenceRequestSetCorrelationIdString()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_InferenceRequestPriority()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_InferenceRequestSetPriority()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_InferenceRequestTimeoutMicroseconds()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_InferenceRequestSetTimeoutMicroseconds()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_InferenceRequestAddInput()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_InferenceRequestAddRawInput()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_InferenceRequestRemoveInput()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_InferenceRequestRemoveAllInputs()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_InferenceRequestAppendInputData()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_InferenceRequestAppendInputDataWithHostPolicy()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_InferenceRequestRemoveAllInputData()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_InferenceRequestAddRequestedOutput()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_InferenceRequestRemoveRequestedOutput()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_InferenceRequestRemoveAllRequestedOutputs()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_InferenceRequestSetReleaseCallback()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_InferenceRequestSetResponseCallback()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_InferenceResponseDelete()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_InferenceResponseError()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_InferenceResponseModel()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_InferenceResponseId()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_InferenceResponseParameterCount()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_InferenceResponseParameter()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_InferenceResponseOutputCount()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_InferenceResponseOutput()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_InferenceResponseOutputClassificationLabel()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_ServerOptionsNew()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_ServerOptionsDelete()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_ServerOptionsSetServerId()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_ServerOptionsSetModelRepositoryPath()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_ServerOptionsSetModelControlMode()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_ServerOptionsSetStartupModel()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_ServerOptionsSetStrictModelConfig()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_ServerOptionsSetRateLimiterMode()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_ServerOptionsAddRateLimiterResource()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_ServerOptionsSetPinnedMemoryPoolByteSize()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_ServerOptionsSetCudaMemoryPoolByteSize()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_ServerOptionsSetResponseCacheByteSize()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_ServerOptionsSetMinSupportedComputeCapability()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_ServerOptionsSetExitOnError()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_ServerOptionsSetStrictReadiness()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_ServerOptionsSetExitTimeout()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_ServerOptionsSetBufferManagerThreadCount()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_ServerOptionsSetModelLoadThreadCount()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_ServerOptionsSetLogFile()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_ServerOptionsSetLogInfo()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_ServerOptionsSetLogWarn()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_ServerOptionsSetLogError()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_ServerOptionsSetLogVerbose()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_ServerOptionsSetLogFormat()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_ServerOptionsSetMetrics()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_ServerOptionsSetGpuMetrics()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_ServerOptionsSetCpuMetrics()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_ServerOptionsSetMetricsInterval()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_ServerOptionsSetBackendDirectory()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_ServerOptionsSetRepoAgentDirectory()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_ServerOptionsSetModelLoadDeviceLimit()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_ServerOptionsSetBackendConfig()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_ServerOptionsSetHostPolicy()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_InferenceRequestAppendInputDataWithBufferAttributes()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_BufferAttributesNew()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_BufferAttributesDelete()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_BufferAttributesSetMemoryTypeId()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_BufferAttributesSetMemoryType()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_BufferAttributesSetCudaIpcHandle()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_BufferAttributesSetByteSize()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_BufferAttributesMemoryTypeId()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_BufferAttributesMemoryType()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_BufferAttributesCudaIpcHandle()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_BufferAttributesByteSize()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_ServerNew()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_ServerDelete()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_ServerStop()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_ServerPollModelRepository()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_ServerIsLive()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_ServerIsReady()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_ServerModelIsReady()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_ServerModelBatchProperties()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_ServerModelTransactionProperties()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_ServerMetadata()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_ServerModelMetadata()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_ServerModelStatistics()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_ServerModelConfig()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_ServerModelIndex()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_ServerRegisterModelRepository()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_ServerUnregisterModelRepository()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_ServerLoadModel()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_ServerLoadModelWithParameters()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_ServerUnloadModel()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_ServerUnloadModelAndDependents()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_ServerMetrics()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONSERVER_ServerInferAsync()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONBACKEND_ApiVersion()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONBACKEND_MemoryManagerAllocate()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONBACKEND_MemoryManagerFree()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONBACKEND_InputProperties()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONBACKEND_InputPropertiesForHostPolicy()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONBACKEND_InputBuffer()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONBACKEND_InputBufferForHostPolicy()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONBACKEND_InputBufferAttributes()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONBACKEND_OutputBuffer()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONBACKEND_OutputBufferAttributes()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONBACKEND_RequestId()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONBACKEND_RequestCorrelationId()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONBACKEND_RequestCorrelationIdString()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONBACKEND_RequestFlags()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONBACKEND_RequestInputCount()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONBACKEND_RequestInputName()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONBACKEND_RequestInput()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONBACKEND_RequestInputByIndex()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONBACKEND_RequestOutputCount()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONBACKEND_RequestOutputName()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONBACKEND_RequestOutputBufferProperties()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONBACKEND_RequestRelease()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONBACKEND_ResponseFactoryNew()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONBACKEND_ResponseFactoryDelete()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONBACKEND_ResponseFactorySendFlags()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONBACKEND_ResponseNew()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONBACKEND_ResponseNewFromFactory()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONBACKEND_ResponseDelete()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONBACKEND_ResponseSetStringParameter()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONBACKEND_ResponseSetIntParameter()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONBACKEND_ResponseSetBoolParameter()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONBACKEND_ResponseOutput()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONBACKEND_ResponseSend()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONBACKEND_StateNew()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONBACKEND_StateUpdate()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONBACKEND_StateBuffer()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONBACKEND_StateBufferAttributes()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONBACKEND_BackendName()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONBACKEND_BackendConfig()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONBACKEND_BackendExecutionPolicy()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONBACKEND_BackendSetExecutionPolicy()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONBACKEND_BackendArtifacts()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONBACKEND_BackendMemoryManager()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONBACKEND_BackendState()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONBACKEND_BackendSetState()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONBACKEND_ModelName()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONBACKEND_ModelVersion()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONBACKEND_ModelRepository()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONBACKEND_ModelConfig()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONBACKEND_ModelAutoCompleteConfig()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONBACKEND_ModelSetConfig()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONBACKEND_ModelServer()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONBACKEND_ModelBackend()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONBACKEND_ModelState()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONBACKEND_ModelSetState()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONBACKEND_ModelInstanceName()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONBACKEND_ModelInstanceKind()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONBACKEND_ModelInstanceDeviceId()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONBACKEND_ModelInstanceHostPolicy()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONBACKEND_ModelInstanceIsPassive()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONBACKEND_ModelInstanceProfileCount()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONBACKEND_ModelInstanceProfileName()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONBACKEND_ModelInstanceSecondaryDeviceCount()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONBACKEND_ModelInstanceSecondaryDeviceProperties()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONBACKEND_ModelInstanceModel()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONBACKEND_ModelInstanceState()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONBACKEND_ModelInstanceSetState()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONBACKEND_ModelInstanceReportStatistics()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONBACKEND_ModelInstanceReportBatchStatistics()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONREPOAGENT_ApiVersion()
-{
-}
-TRITONAPI_DECLSPEC void
-TRITONREPOAGENT_ModelRepositoryLocation()
-{
-}
-
-TRITONAPI_DECLSPEC void
-TRITONREPOAGENT_ModelRepositoryLocationAcquire()
-{
-}
-
-TRITONAPI_DECLSPEC void
-TRITONREPOAGENT_ModelRepositoryLocationRelease()
-{
-}
-
-TRITONAPI_DECLSPEC void
-TRITONREPOAGENT_ModelRepositoryUpdate()
-{
-}
-
-TRITONAPI_DECLSPEC void
-TRITONREPOAGENT_ModelParameterCount()
-{
-}
-
-TRITONAPI_DECLSPEC void
-TRITONREPOAGENT_ModelParameter()
-{
-}
-
-TRITONAPI_DECLSPEC void
-TRITONREPOAGENT_ModelConfig()
-{
-}
-
-TRITONAPI_DECLSPEC void
-TRITONREPOAGENT_ModelState()
-{
-}
-
-TRITONAPI_DECLSPEC void
-TRITONREPOAGENT_ModelSetState()
-{
-}
-
-TRITONAPI_DECLSPEC void
-TRITONREPOAGENT_State()
-{
-}
-
-TRITONAPI_DECLSPEC void
-TRITONREPOAGENT_SetState()
-{
-}
-
-TRITONAPI_DECLSPEC void
-TRITONSERVER_MetricFamilyNew()
-{
-}
-
-TRITONAPI_DECLSPEC void
-TRITONSERVER_MetricFamilyDelete()
-{
-}
-
-TRITONAPI_DECLSPEC void
-TRITONSERVER_MetricNew()
-{
-}
-
-TRITONAPI_DECLSPEC void
-TRITONSERVER_MetricDelete()
-{
-}
-
-TRITONAPI_DECLSPEC void
-TRITONSERVER_MetricValue()
-{
-}
-
-TRITONAPI_DECLSPEC void
-TRITONSERVER_MetricIncrement()
-{
-}
-
-TRITONAPI_DECLSPEC void
-TRITONSERVER_MetricSet()
-{
-}
-
-TRITONAPI_DECLSPEC void
-TRITONSERVER_GetMetricKind()
-{
-}
-
-TRITONAPI_DECLSPEC void
-TRITONBACKEND_BackendAttributeAddPreferredInstanceGroup()
-{
-}
-
-} /* extern "C" */
diff --git a/3rdparty/googletest-1.13.0/.clang-format b/3rdparty/googletest-1.13.0/.clang-format
deleted file mode 100644
index 5b9bfe6d224232981ada90cee232c716afbdf09d..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/.clang-format
+++ /dev/null
@@ -1,4 +0,0 @@
-# Run manually to reformat a file:
-# clang-format -i --style=file <file>
-Language:        Cpp
-BasedOnStyle:  Google
diff --git a/3rdparty/googletest-1.13.0/.github/ISSUE_TEMPLATE/00-bug_report.yml b/3rdparty/googletest-1.13.0/.github/ISSUE_TEMPLATE/00-bug_report.yml
deleted file mode 100644
index 586779ad2d618299eff5b68f9f8a3da6013934b8..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/.github/ISSUE_TEMPLATE/00-bug_report.yml
+++ /dev/null
@@ -1,53 +0,0 @@
-name: Bug Report
-description: Let us know that something does not work as expected.
-title: "[Bug]: Please title this bug report"
-body:
-  - type: textarea
-    id: what-happened
-    attributes:
-      label: Describe the issue
-      description: What happened, and what did you expect to happen?
-    validations:
-      required: true
-  - type: textarea
-    id: steps
-    attributes:
-      label: Steps to reproduce the problem
-      description: It is important that we are able to reproduce the problem that you are experiencing. Please provide all code and relevant steps to reproduce the problem, including your `BUILD`/`CMakeLists.txt` file and build commands. Links to a GitHub branch or [godbolt.org](https://godbolt.org/) that demonstrate the problem are also helpful.
-    validations:
-      required: true
-  - type: textarea
-    id: version
-    attributes:
-      label: What version of GoogleTest are you using?
-      description: Please include the output of `git rev-parse HEAD` or the GoogleTest release version number that you are using.
-    validations:
-      required: true
-  - type: textarea
-    id: os
-    attributes:
-      label: What operating system and version are you using?
-      description: If you are using a Linux distribution please include the name and version of the distribution as well.
-    validations:
-      required: true
-  - type: textarea
-    id: compiler
-    attributes:
-      label: What compiler and version are you using?
-      description: Please include the output of `gcc -v` or `clang -v`, or the equivalent for your compiler.
-    validations:
-      required: true
-  - type: textarea
-    id: buildsystem
-    attributes:
-      label: What build system are you using?
-      description: Please include the output of `bazel --version` or `cmake --version`, or the equivalent for your build system.
-    validations:
-      required: true
-  - type: textarea
-    id: additional
-    attributes:
-      label: Additional context
-      description: Add any other context about the problem here.
-    validations:
-      required: false
diff --git a/3rdparty/googletest-1.13.0/.github/ISSUE_TEMPLATE/10-feature_request.yml b/3rdparty/googletest-1.13.0/.github/ISSUE_TEMPLATE/10-feature_request.yml
deleted file mode 100644
index 91ad0417702e1971fab9e8d57df9a3efbde9240d..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/.github/ISSUE_TEMPLATE/10-feature_request.yml
+++ /dev/null
@@ -1,33 +0,0 @@
-name: Feature request
-description: Propose a new feature.
-title: "[FR]: Please title this feature request"
-labels: "enhancement"
-body:
-  - type: textarea
-    id: version
-    attributes:
-      label: Does the feature exist in the most recent commit?
-      description: We recommend using the latest commit from GitHub in your projects.
-    validations:
-      required: true
- - type: textarea
-    id: why
-    attributes:
-      label: Why do we need this feature?
-      description: Ideally, explain why a combination of existing features cannot be used instead.
-    validations:
-      required: true
- - type: textarea
-    id: proposal
-    attributes:
-      label: Describe the proposal.
-      description: Include a detailed description of the feature, with usage examples.
-    validations:
-      required: true
- - type: textarea
-    id: platform
-    attributes:
-      label: Is the feature specific to an operating system, compiler, or build system version?
-      description: If it is, please specify which versions.
-    validations:
-      required: true
diff --git a/3rdparty/googletest-1.13.0/.github/ISSUE_TEMPLATE/config.yml b/3rdparty/googletest-1.13.0/.github/ISSUE_TEMPLATE/config.yml
deleted file mode 100644
index 65170d10a78231455bed85e929bc008927445644..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/.github/ISSUE_TEMPLATE/config.yml
+++ /dev/null
@@ -1,5 +0,0 @@
-blank_issues_enabled: false
-contact_links:
-    - name: Get Help
-      url: https://github.com/google/googletest/discussions
-      about: Please ask and answer questions here.
diff --git a/3rdparty/googletest-1.13.0/.github/workflows/gtest-ci.yml b/3rdparty/googletest-1.13.0/.github/workflows/gtest-ci.yml
deleted file mode 100644
index 03a8cc5e287b47bacc3b9ae7dc0d5b966cf4debe..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/.github/workflows/gtest-ci.yml
+++ /dev/null
@@ -1,43 +0,0 @@
-name: ci
-
-on:
-  push:
-  pull_request:
-
-env:
-  BAZEL_CXXOPTS: -std=c++14
-
-jobs:
-  Linux:
-    runs-on: ubuntu-latest
-    steps:
-
-    - uses: actions/checkout@v3
-      with:
-        fetch-depth: 0
-
-    - name: Tests
-      run: bazel test --cxxopt=-std=c++14 --features=external_include_paths --test_output=errors ...
-
-  macOS:
-    runs-on: macos-latest
-    steps:
-
-    - uses: actions/checkout@v3
-      with:
-        fetch-depth: 0
-
-    - name: Tests
-      run:  bazel test --cxxopt=-std=c++14 --features=external_include_paths --test_output=errors ...
-
-
-  Windows:
-    runs-on: windows-latest
-    steps:
-
-    - uses: actions/checkout@v3
-      with:
-        fetch-depth: 0
-
-    - name: Tests
-      run: bazel test --cxxopt=/std:c++14 --features=external_include_paths --test_output=errors ...
diff --git a/3rdparty/googletest-1.13.0/.gitignore b/3rdparty/googletest-1.13.0/.gitignore
deleted file mode 100644
index f08cb72a33cd199478f41be1bd487f916330472c..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/.gitignore
+++ /dev/null
@@ -1,84 +0,0 @@
-# Ignore CI build directory
-build/
-xcuserdata
-cmake-build-debug/
-.idea/
-bazel-bin
-bazel-genfiles
-bazel-googletest
-bazel-out
-bazel-testlogs
-# python
-*.pyc
-
-# Visual Studio files
-.vs
-*.sdf
-*.opensdf
-*.VC.opendb
-*.suo
-*.user
-_ReSharper.Caches/
-Win32-Debug/
-Win32-Release/
-x64-Debug/
-x64-Release/
-
-# Ignore autoconf / automake files
-Makefile.in
-aclocal.m4
-configure
-build-aux/
-autom4te.cache/
-googletest/m4/libtool.m4
-googletest/m4/ltoptions.m4
-googletest/m4/ltsugar.m4
-googletest/m4/ltversion.m4
-googletest/m4/lt~obsolete.m4
-googlemock/m4
-
-# Ignore generated directories.
-googlemock/fused-src/
-googletest/fused-src/
-
-# macOS files
-.DS_Store
-googletest/.DS_Store
-googletest/xcode/.DS_Store
-
-# Ignore cmake generated directories and files.
-CMakeFiles
-CTestTestfile.cmake
-Makefile
-cmake_install.cmake
-googlemock/CMakeFiles
-googlemock/CTestTestfile.cmake
-googlemock/Makefile
-googlemock/cmake_install.cmake
-googlemock/gtest
-/bin
-/googlemock/gmock.dir
-/googlemock/gmock_main.dir
-/googlemock/RUN_TESTS.vcxproj.filters
-/googlemock/RUN_TESTS.vcxproj
-/googlemock/INSTALL.vcxproj.filters
-/googlemock/INSTALL.vcxproj
-/googlemock/gmock_main.vcxproj.filters
-/googlemock/gmock_main.vcxproj
-/googlemock/gmock.vcxproj.filters
-/googlemock/gmock.vcxproj
-/googlemock/gmock.sln
-/googlemock/ALL_BUILD.vcxproj.filters
-/googlemock/ALL_BUILD.vcxproj
-/lib
-/Win32
-/ZERO_CHECK.vcxproj.filters
-/ZERO_CHECK.vcxproj
-/RUN_TESTS.vcxproj.filters
-/RUN_TESTS.vcxproj
-/INSTALL.vcxproj.filters
-/INSTALL.vcxproj
-/googletest-distribution.sln
-/CMakeCache.txt
-/ALL_BUILD.vcxproj.filters
-/ALL_BUILD.vcxproj
diff --git a/3rdparty/googletest-1.13.0/BUILD.bazel b/3rdparty/googletest-1.13.0/BUILD.bazel
deleted file mode 100644
index ac62251e10172614d93b385777e52defc187cac4..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/BUILD.bazel
+++ /dev/null
@@ -1,218 +0,0 @@
-# Copyright 2017 Google Inc.
-# All Rights Reserved.
-#
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met:
-#
-#     * Redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer.
-#     * Redistributions in binary form must reproduce the above
-# copyright notice, this list of conditions and the following disclaimer
-# in the documentation and/or other materials provided with the
-# distribution.
-#     * Neither the name of Google Inc. nor the names of its
-# contributors may be used to endorse or promote products derived from
-# this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#
-#   Bazel Build for Google C++ Testing Framework(Google Test)
-
-package(default_visibility = ["//visibility:public"])
-
-licenses(["notice"])
-
-exports_files(["LICENSE"])
-
-config_setting(
-    name = "qnx",
-    constraint_values = ["@platforms//os:qnx"],
-)
-
-config_setting(
-    name = "windows",
-    constraint_values = ["@platforms//os:windows"],
-)
-
-config_setting(
-    name = "freebsd",
-    constraint_values = ["@platforms//os:freebsd"],
-)
-
-config_setting(
-    name = "openbsd",
-    constraint_values = ["@platforms//os:openbsd"],
-)
-
-config_setting(
-    name = "msvc_compiler",
-    flag_values = {
-        "@bazel_tools//tools/cpp:compiler": "msvc-cl",
-    },
-    visibility = [":__subpackages__"],
-)
-
-config_setting(
-    name = "has_absl",
-    values = {"define": "absl=1"},
-)
-
-# Library that defines the FRIEND_TEST macro.
-cc_library(
-    name = "gtest_prod",
-    hdrs = ["googletest/include/gtest/gtest_prod.h"],
-    includes = ["googletest/include"],
-)
-
-# Google Test including Google Mock
-cc_library(
-    name = "gtest",
-    srcs = glob(
-        include = [
-            "googletest/src/*.cc",
-            "googletest/src/*.h",
-            "googletest/include/gtest/**/*.h",
-            "googlemock/src/*.cc",
-            "googlemock/include/gmock/**/*.h",
-        ],
-        exclude = [
-            "googletest/src/gtest-all.cc",
-            "googletest/src/gtest_main.cc",
-            "googlemock/src/gmock-all.cc",
-            "googlemock/src/gmock_main.cc",
-        ],
-    ),
-    hdrs = glob([
-        "googletest/include/gtest/*.h",
-        "googlemock/include/gmock/*.h",
-    ]),
-    copts = select({
-        ":qnx": [],
-        ":windows": [],
-        "//conditions:default": ["-pthread"],
-    }),
-    defines = select({
-        ":has_absl": ["GTEST_HAS_ABSL=1"],
-        "//conditions:default": [],
-    }),
-    features = select({
-        ":windows": ["windows_export_all_symbols"],
-        "//conditions:default": [],
-    }),
-    includes = [
-        "googlemock",
-        "googlemock/include",
-        "googletest",
-        "googletest/include",
-    ],
-    linkopts = select({
-        ":qnx": ["-lregex"],
-        ":windows": [],
-        ":freebsd": [
-            "-lm",
-            "-pthread",
-        ],
-        ":openbsd": [
-            "-lm",
-            "-pthread",
-        ],
-        "//conditions:default": ["-pthread"],
-    }),
-    deps = select({
-        ":has_absl": [
-            "@com_google_absl//absl/debugging:failure_signal_handler",
-            "@com_google_absl//absl/debugging:stacktrace",
-            "@com_google_absl//absl/debugging:symbolize",
-            "@com_google_absl//absl/flags:flag",
-            "@com_google_absl//absl/flags:parse",
-            "@com_google_absl//absl/flags:reflection",
-            "@com_google_absl//absl/flags:usage",
-            "@com_google_absl//absl/strings",
-            "@com_google_absl//absl/types:any",
-            "@com_google_absl//absl/types:optional",
-            "@com_google_absl//absl/types:variant",
-            "@com_googlesource_code_re2//:re2",
-        ],
-        "//conditions:default": [],
-    }),
-)
-
-cc_library(
-    name = "gtest_main",
-    srcs = ["googlemock/src/gmock_main.cc"],
-    features = select({
-        ":windows": ["windows_export_all_symbols"],
-        "//conditions:default": [],
-    }),
-    deps = [":gtest"],
-)
-
-# The following rules build samples of how to use gTest.
-cc_library(
-    name = "gtest_sample_lib",
-    srcs = [
-        "googletest/samples/sample1.cc",
-        "googletest/samples/sample2.cc",
-        "googletest/samples/sample4.cc",
-    ],
-    hdrs = [
-        "googletest/samples/prime_tables.h",
-        "googletest/samples/sample1.h",
-        "googletest/samples/sample2.h",
-        "googletest/samples/sample3-inl.h",
-        "googletest/samples/sample4.h",
-    ],
-    features = select({
-        ":windows": ["windows_export_all_symbols"],
-        "//conditions:default": [],
-    }),
-)
-
-cc_test(
-    name = "gtest_samples",
-    size = "small",
-    # All Samples except:
-    #   sample9 (main)
-    #   sample10 (main and takes a command line option and needs to be separate)
-    srcs = [
-        "googletest/samples/sample1_unittest.cc",
-        "googletest/samples/sample2_unittest.cc",
-        "googletest/samples/sample3_unittest.cc",
-        "googletest/samples/sample4_unittest.cc",
-        "googletest/samples/sample5_unittest.cc",
-        "googletest/samples/sample6_unittest.cc",
-        "googletest/samples/sample7_unittest.cc",
-        "googletest/samples/sample8_unittest.cc",
-    ],
-    linkstatic = 0,
-    deps = [
-        "gtest_sample_lib",
-        ":gtest_main",
-    ],
-)
-
-cc_test(
-    name = "sample9_unittest",
-    size = "small",
-    srcs = ["googletest/samples/sample9_unittest.cc"],
-    deps = [":gtest"],
-)
-
-cc_test(
-    name = "sample10_unittest",
-    size = "small",
-    srcs = ["googletest/samples/sample10_unittest.cc"],
-    deps = [":gtest"],
-)
diff --git a/3rdparty/googletest-1.13.0/CMakeLists.txt b/3rdparty/googletest-1.13.0/CMakeLists.txt
deleted file mode 100644
index 6af414371f7a1c038d5834dc8707a6ac34bed7b5..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/CMakeLists.txt
+++ /dev/null
@@ -1,39 +0,0 @@
-# Note: CMake support is community-based. The maintainers do not use CMake
-# internally.
-
-cmake_minimum_required(VERSION 3.5)
-
-if (POLICY CMP0048)
-  cmake_policy(SET CMP0048 NEW)
-endif (POLICY CMP0048)
-
-if (POLICY CMP0069)
-  cmake_policy(SET CMP0069 NEW)
-endif (POLICY CMP0069)
-
-if (POLICY CMP0077)
-  cmake_policy(SET CMP0077 NEW)
-endif (POLICY CMP0077)
-
-project(googletest-distribution)
-set(GOOGLETEST_VERSION 1.13.0)
-
-if(NOT CYGWIN AND NOT MSYS AND NOT ${CMAKE_SYSTEM_NAME} STREQUAL QNX)
-  set(CMAKE_CXX_EXTENSIONS OFF)
-endif()
-
-enable_testing()
-
-include(CMakeDependentOption)
-include(GNUInstallDirs)
-
-#Note that googlemock target already builds googletest
-option(BUILD_GMOCK "Builds the googlemock subproject" ON)
-option(INSTALL_GTEST "Enable installation of googletest. (Projects embedding googletest may want to turn this OFF.)" ON)
-option(GTEST_HAS_ABSL "Use Abseil and RE2. Requires Abseil and RE2 to be separately added to the build." OFF)
-
-if(BUILD_GMOCK)
-  add_subdirectory( googlemock )
-else()
-  add_subdirectory( googletest )
-endif()
diff --git a/3rdparty/googletest-1.13.0/CONTRIBUTING.md b/3rdparty/googletest-1.13.0/CONTRIBUTING.md
deleted file mode 100644
index de14c8159b17e367dd0098c7202e13439a06ff89..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/CONTRIBUTING.md
+++ /dev/null
@@ -1,131 +0,0 @@
-# How to become a contributor and submit your own code
-
-## Contributor License Agreements
-
-We'd love to accept your patches! Before we can take them, we have to jump a
-couple of legal hurdles.
-
-Please fill out either the individual or corporate Contributor License Agreement
-(CLA).
-
-*   If you are an individual writing original source code and you're sure you
-    own the intellectual property, then you'll need to sign an
-    [individual CLA](https://developers.google.com/open-source/cla/individual).
-*   If you work for a company that wants to allow you to contribute your work,
-    then you'll need to sign a
-    [corporate CLA](https://developers.google.com/open-source/cla/corporate).
-
-Follow either of the two links above to access the appropriate CLA and
-instructions for how to sign and return it. Once we receive it, we'll be able to
-accept your pull requests.
-
-## Are you a Googler?
-
-If you are a Googler, please make an attempt to submit an internal contribution
-rather than a GitHub Pull Request. If you are not able to submit internally, a
-PR is acceptable as an alternative.
-
-## Contributing A Patch
-
-1.  Submit an issue describing your proposed change to the
-    [issue tracker](https://github.com/google/googletest/issues).
-2.  Please don't mix more than one logical change per submittal, because it
-    makes the history hard to follow. If you want to make a change that doesn't
-    have a corresponding issue in the issue tracker, please create one.
-3.  Also, coordinate with team members that are listed on the issue in question.
-    This ensures that work isn't being duplicated and communicating your plan
-    early also generally leads to better patches.
-4.  If your proposed change is accepted, and you haven't already done so, sign a
-    Contributor License Agreement
-    ([see details above](#contributor-license-agreements)).
-5.  Fork the desired repo, develop and test your code changes.
-6.  Ensure that your code adheres to the existing style in the sample to which
-    you are contributing.
-7.  Ensure that your code has an appropriate set of unit tests which all pass.
-8.  Submit a pull request.
-
-## The Google Test and Google Mock Communities
-
-The Google Test community exists primarily through the
-[discussion group](http://groups.google.com/group/googletestframework) and the
-GitHub repository. Likewise, the Google Mock community exists primarily through
-their own [discussion group](http://groups.google.com/group/googlemock). You are
-definitely encouraged to contribute to the discussion and you can also help us
-to keep the effectiveness of the group high by following and promoting the
-guidelines listed here.
-
-### Please Be Friendly
-
-Showing courtesy and respect to others is a vital part of the Google culture,
-and we strongly encourage everyone participating in Google Test development to
-join us in accepting nothing less. Of course, being courteous is not the same as
-failing to constructively disagree with each other, but it does mean that we
-should be respectful of each other when enumerating the 42 technical reasons
-that a particular proposal may not be the best choice. There's never a reason to
-be antagonistic or dismissive toward anyone who is sincerely trying to
-contribute to a discussion.
-
-Sure, C++ testing is serious business and all that, but it's also a lot of fun.
-Let's keep it that way. Let's strive to be one of the friendliest communities in
-all of open source.
-
-As always, discuss Google Test in the official GoogleTest discussion group. You
-don't have to actually submit code in order to sign up. Your participation
-itself is a valuable contribution.
-
-## Style
-
-To keep the source consistent, readable, diffable and easy to merge, we use a
-fairly rigid coding style, as defined by the
-[google-styleguide](https://github.com/google/styleguide) project. All patches
-will be expected to conform to the style outlined
-[here](https://google.github.io/styleguide/cppguide.html). Use
-[.clang-format](https://github.com/google/googletest/blob/main/.clang-format) to
-check your formatting.
-
-## Requirements for Contributors
-
-If you plan to contribute a patch, you need to build Google Test, Google Mock,
-and their own tests from a git checkout, which has further requirements:
-
-*   [Python](https://www.python.org/) v2.3 or newer (for running some of the
-    tests and re-generating certain source files from templates)
-*   [CMake](https://cmake.org/) v2.8.12 or newer
-
-## Developing Google Test and Google Mock
-
-This section discusses how to make your own changes to the Google Test project.
-
-### Testing Google Test and Google Mock Themselves
-
-To make sure your changes work as intended and don't break existing
-functionality, you'll want to compile and run Google Test and GoogleMock's own
-tests. For that you can use CMake:
-
-    mkdir mybuild
-    cd mybuild
-    cmake -Dgtest_build_tests=ON -Dgmock_build_tests=ON ${GTEST_REPO_DIR}
-
-To choose between building only Google Test or Google Mock, you may modify your
-cmake command to be one of each
-
-    cmake -Dgtest_build_tests=ON ${GTEST_DIR} # sets up Google Test tests
-    cmake -Dgmock_build_tests=ON ${GMOCK_DIR} # sets up Google Mock tests
-
-Make sure you have Python installed, as some of Google Test's tests are written
-in Python. If the cmake command complains about not being able to find Python
-(`Could NOT find PythonInterp (missing: PYTHON_EXECUTABLE)`), try telling it
-explicitly where your Python executable can be found:
-
-    cmake -DPYTHON_EXECUTABLE=path/to/python ...
-
-Next, you can build Google Test and / or Google Mock and all desired tests. On
-\*nix, this is usually done by
-
-    make
-
-To run the tests, do
-
-    make test
-
-All tests should pass.
diff --git a/3rdparty/googletest-1.13.0/CONTRIBUTORS b/3rdparty/googletest-1.13.0/CONTRIBUTORS
deleted file mode 100644
index 77397a5b53fea5352f8af38bdb1c4a0ce0e30d66..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/CONTRIBUTORS
+++ /dev/null
@@ -1,65 +0,0 @@
-# This file contains a list of people who've made non-trivial
-# contribution to the Google C++ Testing Framework project.  People
-# who commit code to the project are encouraged to add their names
-# here.  Please keep the list sorted by first names.
-
-Ajay Joshi <jaj@google.com>
-Balázs Dán <balazs.dan@gmail.com>
-Benoit Sigoure <tsuna@google.com>
-Bharat Mediratta <bharat@menalto.com>
-Bogdan Piloca <boo@google.com>
-Chandler Carruth <chandlerc@google.com>
-Chris Prince <cprince@google.com>
-Chris Taylor <taylorc@google.com>
-Dan Egnor <egnor@google.com>
-Dave MacLachlan <dmaclach@gmail.com>
-David Anderson <danderson@google.com>
-Dean Sturtevant
-Eric Roman <eroman@chromium.org>
-Gene Volovich <gv@cite.com>
-Hady Zalek <hady.zalek@gmail.com>
-Hal Burch <gmock@hburch.com>
-Jeffrey Yasskin <jyasskin@google.com>
-Jim Keller <jimkeller@google.com>
-Joe Walnes <joe@truemesh.com>
-Jon Wray <jwray@google.com>
-Jói Sigurðsson <joi@google.com>
-Keir Mierle <mierle@gmail.com>
-Keith Ray <keith.ray@gmail.com>
-Kenton Varda <kenton@google.com>
-Kostya Serebryany <kcc@google.com>
-Krystian Kuzniarek <krystian.kuzniarek@gmail.com>
-Lev Makhlis
-Manuel Klimek <klimek@google.com>
-Mario Tanev <radix@google.com>
-Mark Paskin
-Markus Heule <markus.heule@gmail.com>
-Martijn Vels <mvels@google.com>
-Matthew Simmons <simmonmt@acm.org>
-Mika Raento <mikie@iki.fi>
-Mike Bland <mbland@google.com>
-Miklós Fazekas <mfazekas@szemafor.com>
-Neal Norwitz <nnorwitz@gmail.com>
-Nermin Ozkiranartli <nermin@google.com>
-Owen Carlsen <ocarlsen@google.com>
-Paneendra Ba <paneendra@google.com>
-Pasi Valminen <pasi.valminen@gmail.com>
-Patrick Hanna <phanna@google.com>
-Patrick Riley <pfr@google.com>
-Paul Menage <menage@google.com>
-Peter Kaminski <piotrk@google.com>
-Piotr Kaminski <piotrk@google.com>
-Preston Jackson <preston.a.jackson@gmail.com>
-Rainer Klaffenboeck <rainer.klaffenboeck@dynatrace.com>
-Russ Cox <rsc@google.com>
-Russ Rufer <russ@pentad.com>
-Sean Mcafee <eefacm@gmail.com>
-Sigurður Ásgeirsson <siggi@google.com>
-Sverre Sundsdal <sundsdal@gmail.com>
-Szymon Sobik <sobik.szymon@gmail.com>
-Takeshi Yoshino <tyoshino@google.com>
-Tracy Bialik <tracy@pentad.com>
-Vadim Berman <vadimb@google.com>
-Vlad Losev <vladl@google.com>
-Wolfgang Klier <wklier@google.com>
-Zhanyong Wan <wan@google.com>
diff --git a/3rdparty/googletest-1.13.0/LICENSE b/3rdparty/googletest-1.13.0/LICENSE
deleted file mode 100644
index 1941a11f8ce94389160b458927a29ba217542818..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/LICENSE
+++ /dev/null
@@ -1,28 +0,0 @@
-Copyright 2008, Google Inc.
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are
-met:
-
-    * Redistributions of source code must retain the above copyright
-notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above
-copyright notice, this list of conditions and the following disclaimer
-in the documentation and/or other materials provided with the
-distribution.
-    * Neither the name of Google Inc. nor the names of its
-contributors may be used to endorse or promote products derived from
-this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/3rdparty/googletest-1.13.0/README.md b/3rdparty/googletest-1.13.0/README.md
deleted file mode 100644
index cd89abb2d2daea7d0feef40f2496106a9bfd31f4..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/README.md
+++ /dev/null
@@ -1,117 +0,0 @@
-# GoogleTest
-
-### Announcements
-
-#### Live at Head
-
-GoogleTest now follows the
-[Abseil Live at Head philosophy](https://abseil.io/about/philosophy#upgrade-support).
-We recommend
-[updating to the latest commit in the `main` branch as often as possible](https://github.com/abseil/abseil-cpp/blob/master/FAQ.md#what-is-live-at-head-and-how-do-i-do-it).
-
-#### Documentation Updates
-
-Our documentation is now live on GitHub Pages at
-https://google.github.io/googletest/. We recommend browsing the documentation on
-GitHub Pages rather than directly in the repository.
-
-#### Release 1.12.1
-
-[Release 1.12.1](https://github.com/google/googletest/releases/tag/release-1.12.1)
-is now available.
-
-The 1.12.x branch will be the last to support C++11. Future releases will
-require at least C++14.
-
-#### Coming Soon
-
-*   We are planning to take a dependency on
-    [Abseil](https://github.com/abseil/abseil-cpp).
-*   More documentation improvements are planned.
-
-## Welcome to **GoogleTest**, Google's C++ test framework!
-
-This repository is a merger of the formerly separate GoogleTest and GoogleMock
-projects. These were so closely related that it makes sense to maintain and
-release them together.
-
-### Getting Started
-
-See the [GoogleTest User's Guide](https://google.github.io/googletest/) for
-documentation. We recommend starting with the
-[GoogleTest Primer](https://google.github.io/googletest/primer.html).
-
-More information about building GoogleTest can be found at
-[googletest/README.md](googletest/README.md).
-
-## Features
-
-*   An [xUnit](https://en.wikipedia.org/wiki/XUnit) test framework.
-*   Test discovery.
-*   A rich set of assertions.
-*   User-defined assertions.
-*   Death tests.
-*   Fatal and non-fatal failures.
-*   Value-parameterized tests.
-*   Type-parameterized tests.
-*   Various options for running the tests.
-*   XML test report generation.
-
-## Supported Platforms
-
-GoogleTest follows Google's
-[Foundational C++ Support Policy](https://opensource.google/documentation/policies/cplusplus-support).
-See
-[this table](https://github.com/google/oss-policies-info/blob/main/foundational-cxx-support-matrix.md)
-for a list of currently supported versions compilers, platforms, and build
-tools.
-
-## Who Is Using GoogleTest?
-
-In addition to many internal projects at Google, GoogleTest is also used by the
-following notable projects:
-
-*   The [Chromium projects](http://www.chromium.org/) (behind the Chrome browser
-    and Chrome OS).
-*   The [LLVM](http://llvm.org/) compiler.
-*   [Protocol Buffers](https://github.com/google/protobuf), Google's data
-    interchange format.
-*   The [OpenCV](http://opencv.org/) computer vision library.
-
-## Related Open Source Projects
-
-[GTest Runner](https://github.com/nholthaus/gtest-runner) is a Qt5 based
-automated test-runner and Graphical User Interface with powerful features for
-Windows and Linux platforms.
-
-[GoogleTest UI](https://github.com/ospector/gtest-gbar) is a test runner that
-runs your test binary, allows you to track its progress via a progress bar, and
-displays a list of test failures. Clicking on one shows failure text. GoogleTest
-UI is written in C#.
-
-[GTest TAP Listener](https://github.com/kinow/gtest-tap-listener) is an event
-listener for GoogleTest that implements the
-[TAP protocol](https://en.wikipedia.org/wiki/Test_Anything_Protocol) for test
-result output. If your test runner understands TAP, you may find it useful.
-
-[gtest-parallel](https://github.com/google/gtest-parallel) is a test runner that
-runs tests from your binary in parallel to provide significant speed-up.
-
-[GoogleTest Adapter](https://marketplace.visualstudio.com/items?itemName=DavidSchuldenfrei.gtest-adapter)
-is a VS Code extension allowing to view GoogleTest in a tree view and run/debug
-your tests.
-
-[C++ TestMate](https://github.com/matepek/vscode-catch2-test-adapter) is a VS
-Code extension allowing to view GoogleTest in a tree view and run/debug your
-tests.
-
-[Cornichon](https://pypi.org/project/cornichon/) is a small Gherkin DSL parser
-that generates stub code for GoogleTest.
-
-## Contributing Changes
-
-Please read
-[`CONTRIBUTING.md`](https://github.com/google/googletest/blob/main/CONTRIBUTING.md)
-for details on how to contribute to this project.
-
-Happy testing!
diff --git a/3rdparty/googletest-1.13.0/WORKSPACE b/3rdparty/googletest-1.13.0/WORKSPACE
deleted file mode 100644
index 0f10a6a9a8691036391e8acd814e500a33f5a7bf..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/WORKSPACE
+++ /dev/null
@@ -1,40 +0,0 @@
-workspace(name = "com_google_googletest")
-
-load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
-
-http_archive(
-    name = "com_google_absl",  # 2023-01-10T21:08:25Z
-    sha256 = "f9a4e749f42c386a32a90fddf0e2913ed408d10c42f7f33ccf4c59ac4f0d1d05",
-    strip_prefix = "abseil-cpp-52835439ca90d86b27bf8cd1708296e95604d724",
-    urls = ["https://github.com/abseil/abseil-cpp/archive/52835439ca90d86b27bf8cd1708296e95604d724.zip"],
-)
-
-# Note this must use a commit from the `abseil` branch of the RE2 project.
-# https://github.com/google/re2/tree/abseil
-http_archive(
-    name = "com_googlesource_code_re2",  # 2022-12-21T14:29:10Z
-    sha256 = "b9ce3a51beebb38534d11d40f8928d40509b9e18a735f6a4a97ad3d014c87cb5",
-    strip_prefix = "re2-d0b1f8f2ecc2ea74956c7608b6f915175314ff0e",
-    urls = ["https://github.com/google/re2/archive/d0b1f8f2ecc2ea74956c7608b6f915175314ff0e.zip"],
-)
-
-http_archive(
-    name = "rules_python",  # 2023-01-10T22:00:51Z
-    sha256 = "5de54486a60ad8948dabe49605bb1c08053e04001a431ab3e96745b4d97a4419",
-    strip_prefix = "rules_python-70cce26432187a60b4e950118791385e6fb3c26f",
-    urls = ["https://github.com/bazelbuild/rules_python/archive/70cce26432187a60b4e950118791385e6fb3c26f.zip"],
-)
-
-http_archive(
-    name = "bazel_skylib",  # 2022-11-16T18:29:32Z
-    sha256 = "a22290c26d29d3ecca286466f7f295ac6cbe32c0a9da3a91176a90e0725e3649",
-    strip_prefix = "bazel-skylib-5bfcb1a684550626ce138fe0fe8f5f702b3764c3",
-    urls = ["https://github.com/bazelbuild/bazel-skylib/archive/5bfcb1a684550626ce138fe0fe8f5f702b3764c3.zip"],
-)
-
-http_archive(
-    name = "platforms",  # 2022-11-09T19:18:22Z
-    sha256 = "b4a3b45dc4202e2b3e34e3bc49d2b5b37295fc23ea58d88fb9e01f3642ad9b55",
-    strip_prefix = "platforms-3fbc687756043fb58a407c2ea8c944bc2fe1d922",
-    urls = ["https://github.com/bazelbuild/platforms/archive/3fbc687756043fb58a407c2ea8c944bc2fe1d922.zip"],
-)
diff --git a/3rdparty/googletest-1.13.0/ci/linux-presubmit.sh b/3rdparty/googletest-1.13.0/ci/linux-presubmit.sh
deleted file mode 100644
index 4eb5bbe4a1dac8923fc0aa2f2d49cc38f258ec4c..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/ci/linux-presubmit.sh
+++ /dev/null
@@ -1,134 +0,0 @@
-#!/bin/bash
-#
-# Copyright 2020, Google Inc.
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met:
-#
-#     * Redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer.
-#     * Redistributions in binary form must reproduce the above
-# copyright notice, this list of conditions and the following disclaimer
-# in the documentation and/or other materials provided with the
-# distribution.
-#     * Neither the name of Google Inc. nor the names of its
-# contributors may be used to endorse or promote products derived from
-# this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-set -euox pipefail
-
-readonly LINUX_LATEST_CONTAINER="gcr.io/google.com/absl-177019/linux_hybrid-latest:20220217"
-readonly LINUX_GCC_FLOOR_CONTAINER="gcr.io/google.com/absl-177019/linux_gcc-floor:20220621"
-
-if [[ -z ${GTEST_ROOT:-} ]]; then
-  GTEST_ROOT="$(realpath $(dirname ${0})/..)"
-fi
-
-if [[ -z ${STD:-} ]]; then
-  STD="c++14 c++17 c++20"
-fi
-
-# Test the CMake build
-for cc in /usr/local/bin/gcc /opt/llvm/clang/bin/clang; do
-  for cmake_off_on in OFF ON; do
-    time docker run \
-      --volume="${GTEST_ROOT}:/src:ro" \
-      --tmpfs="/build:exec" \
-      --workdir="/build" \
-      --rm \
-      --env="CC=${cc}" \
-      --env="CXX_FLAGS=\"-Werror -Wdeprecated\"" \
-      ${LINUX_LATEST_CONTAINER} \
-      /bin/bash -c "
-        cmake /src \
-          -DCMAKE_CXX_STANDARD=14 \
-          -Dgtest_build_samples=ON \
-          -Dgtest_build_tests=ON \
-          -Dgmock_build_tests=ON \
-          -Dcxx_no_exception=${cmake_off_on} \
-          -Dcxx_no_rtti=${cmake_off_on} && \
-        make -j$(nproc) && \
-        ctest -j$(nproc) --output-on-failure"
-  done
-done
-
-# Do one test with an older version of GCC
-time docker run \
-  --volume="${GTEST_ROOT}:/src:ro" \
-  --workdir="/src" \
-  --rm \
-  --env="CC=/usr/local/bin/gcc" \
-  --env="BAZEL_CXXOPTS=-std=c++14" \
-  ${LINUX_GCC_FLOOR_CONTAINER} \
-    /usr/local/bin/bazel test ... \
-      --copt="-Wall" \
-      --copt="-Werror" \
-      --copt="-Wuninitialized" \
-      --copt="-Wno-error=pragmas" \
-      --distdir="/bazel-distdir" \
-      --features=external_include_paths \
-      --keep_going \
-      --show_timestamps \
-      --test_output=errors
-
-# Test GCC
-for std in ${STD}; do
-  for absl in 0 1; do
-    time docker run \
-      --volume="${GTEST_ROOT}:/src:ro" \
-      --workdir="/src" \
-      --rm \
-      --env="CC=/usr/local/bin/gcc" \
-      --env="BAZEL_CXXOPTS=-std=${std}" \
-      ${LINUX_LATEST_CONTAINER} \
-      /usr/local/bin/bazel test ... \
-        --copt="-Wall" \
-        --copt="-Werror" \
-        --copt="-Wuninitialized" \
-        --define="absl=${absl}" \
-        --distdir="/bazel-distdir" \
-        --features=external_include_paths \
-        --keep_going \
-        --show_timestamps \
-        --test_output=errors
-  done
-done
-
-# Test Clang
-for std in ${STD}; do
-  for absl in 0 1; do
-    time docker run \
-      --volume="${GTEST_ROOT}:/src:ro" \
-      --workdir="/src" \
-      --rm \
-      --env="CC=/opt/llvm/clang/bin/clang" \
-      --env="BAZEL_CXXOPTS=-std=${std}" \
-      ${LINUX_LATEST_CONTAINER} \
-      /usr/local/bin/bazel test ... \
-        --copt="--gcc-toolchain=/usr/local" \
-        --copt="-Wall" \
-        --copt="-Werror" \
-        --copt="-Wuninitialized" \
-        --define="absl=${absl}" \
-        --distdir="/bazel-distdir" \
-        --features=external_include_paths \
-        --keep_going \
-        --linkopt="--gcc-toolchain=/usr/local" \
-        --show_timestamps \
-        --test_output=errors
-  done
-done
diff --git a/3rdparty/googletest-1.13.0/ci/macos-presubmit.sh b/3rdparty/googletest-1.13.0/ci/macos-presubmit.sh
deleted file mode 100644
index 8f35df58d2baa1b278f468608deb517d05150a3a..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/ci/macos-presubmit.sh
+++ /dev/null
@@ -1,75 +0,0 @@
-#!/bin/bash
-#
-# Copyright 2020, Google Inc.
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met:
-#
-#     * Redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer.
-#     * Redistributions in binary form must reproduce the above
-# copyright notice, this list of conditions and the following disclaimer
-# in the documentation and/or other materials provided with the
-# distribution.
-#     * Neither the name of Google Inc. nor the names of its
-# contributors may be used to endorse or promote products derived from
-# this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-set -euox pipefail
-
-if [[ -z ${GTEST_ROOT:-} ]]; then
-  GTEST_ROOT="$(realpath $(dirname ${0})/..)"
-fi
-
-# Test the CMake build
-for cmake_off_on in OFF ON; do
-  BUILD_DIR=$(mktemp -d build_dir.XXXXXXXX)
-  cd ${BUILD_DIR}
-  time cmake ${GTEST_ROOT} \
-    -DCMAKE_CXX_STANDARD=14 \
-    -Dgtest_build_samples=ON \
-    -Dgtest_build_tests=ON \
-    -Dgmock_build_tests=ON \
-    -Dcxx_no_exception=${cmake_off_on} \
-    -Dcxx_no_rtti=${cmake_off_on}
-  time make
-  time ctest -j$(nproc) --output-on-failure
-done
-
-# Test the Bazel build
-
-# If we are running on Kokoro, check for a versioned Bazel binary.
-KOKORO_GFILE_BAZEL_BIN="bazel-5.1.1-darwin-x86_64"
-if [[ ${KOKORO_GFILE_DIR:-} ]] && [[ -f ${KOKORO_GFILE_DIR}/${KOKORO_GFILE_BAZEL_BIN} ]]; then
-  BAZEL_BIN="${KOKORO_GFILE_DIR}/${KOKORO_GFILE_BAZEL_BIN}"
-  chmod +x ${BAZEL_BIN}
-else
-  BAZEL_BIN="bazel"
-fi
-
-cd ${GTEST_ROOT}
-for absl in 0 1; do
-  ${BAZEL_BIN} test ... \
-    --copt="-Wall" \
-    --copt="-Werror" \
-    --cxxopt="-std=c++14" \
-    --define="absl=${absl}" \
-    --features=external_include_paths \
-    --keep_going \
-    --show_timestamps \
-    --test_output=errors
-done
diff --git a/3rdparty/googletest-1.13.0/ci/windows-presubmit.bat b/3rdparty/googletest-1.13.0/ci/windows-presubmit.bat
deleted file mode 100644
index 8668ff3594b9fb31e5baf8c1f3c6de7946377b62..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/ci/windows-presubmit.bat
+++ /dev/null
@@ -1,56 +0,0 @@
-SETLOCAL ENABLEDELAYEDEXPANSION
-
-SET BAZEL_EXE=%KOKORO_GFILE_DIR%\bazel-5.1.1-windows-x86_64.exe
-
-SET PATH=C:\Python37;%PATH%
-SET BAZEL_PYTHON=C:\python37\python.exe
-SET BAZEL_SH=C:\tools\msys64\usr\bin\bash.exe
-SET CMAKE_BIN="C:\Program Files\CMake\bin\cmake.exe"
-SET CTEST_BIN="C:\Program Files\CMake\bin\ctest.exe"
-SET CTEST_OUTPUT_ON_FAILURE=1
-
-IF EXIST git\googletest (
-  CD git\googletest
-) ELSE IF EXIST github\googletest (
-  CD github\googletest
-)
-
-IF %errorlevel% neq 0 EXIT /B 1
-
-:: ----------------------------------------------------------------------------
-:: CMake Visual Studio 15 2017 Win64
-MKDIR cmake_msvc2017
-CD cmake_msvc2017
-
-%CMAKE_BIN% .. ^
-  -G "Visual Studio 15 2017 Win64" ^
-  -DPYTHON_EXECUTABLE:FILEPATH=c:\python37\python.exe ^
-  -DPYTHON_INCLUDE_DIR:PATH=c:\python37\include ^
-  -DPYTHON_LIBRARY:FILEPATH=c:\python37\lib\site-packages\pip ^
-  -Dgtest_build_samples=ON ^
-  -Dgtest_build_tests=ON ^
-  -Dgmock_build_tests=ON
-IF %errorlevel% neq 0 EXIT /B 1
-
-%CMAKE_BIN% --build . --target ALL_BUILD --config Debug -- -maxcpucount
-IF %errorlevel% neq 0 EXIT /B 1
-
-%CTEST_BIN% -C Debug --timeout 600
-IF %errorlevel% neq 0 EXIT /B 1
-
-CD ..
-RMDIR /S /Q cmake_msvc2017
-
-:: ----------------------------------------------------------------------------
-:: Bazel Visual Studio 15 2017 Win64
-
-SET BAZEL_VC=C:\Program Files (x86)\Microsoft Visual Studio\2017\BuildTools\VC
-%BAZEL_EXE% test ... ^
-  --compilation_mode=dbg ^
-  --copt=/std:c++14 ^
-  --copt=/WX ^
-  --features=external_include_paths ^
-  --keep_going ^
-  --test_output=errors ^
-  --test_tag_filters=-no_test_msvc2017
-IF %errorlevel% neq 0 EXIT /B 1
diff --git a/3rdparty/googletest-1.13.0/docs/_config.yml b/3rdparty/googletest-1.13.0/docs/_config.yml
deleted file mode 100644
index d12867eab6b6872489002b56cf5c4115388fb1aa..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/docs/_config.yml
+++ /dev/null
@@ -1 +0,0 @@
-title: GoogleTest
diff --git a/3rdparty/googletest-1.13.0/docs/_data/navigation.yml b/3rdparty/googletest-1.13.0/docs/_data/navigation.yml
deleted file mode 100644
index 9f3332708eac165cd1fe2516f2b2cb855c5a32ef..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/docs/_data/navigation.yml
+++ /dev/null
@@ -1,43 +0,0 @@
-nav:
-- section: "Get Started"
-  items:
-  - title: "Supported Platforms"
-    url: "/platforms.html"
-  - title: "Quickstart: Bazel"
-    url: "/quickstart-bazel.html"
-  - title: "Quickstart: CMake"
-    url: "/quickstart-cmake.html"
-- section: "Guides"
-  items:
-  - title: "GoogleTest Primer"
-    url: "/primer.html"
-  - title: "Advanced Topics"
-    url: "/advanced.html"
-  - title: "Mocking for Dummies"
-    url: "/gmock_for_dummies.html"
-  - title: "Mocking Cookbook"
-    url: "/gmock_cook_book.html"
-  - title: "Mocking Cheat Sheet"
-    url: "/gmock_cheat_sheet.html"
-- section: "References"
-  items:
-  - title: "Testing Reference"
-    url: "/reference/testing.html"
-  - title: "Mocking Reference"
-    url: "/reference/mocking.html"
-  - title: "Assertions"
-    url: "/reference/assertions.html"
-  - title: "Matchers"
-    url: "/reference/matchers.html"
-  - title: "Actions"
-    url: "/reference/actions.html"
-  - title: "Testing FAQ"
-    url: "/faq.html"
-  - title: "Mocking FAQ"
-    url: "/gmock_faq.html"
-  - title: "Code Samples"
-    url: "/samples.html"
-  - title: "Using pkg-config"
-    url: "/pkgconfig.html"
-  - title: "Community Documentation"
-    url: "/community_created_documentation.html"
diff --git a/3rdparty/googletest-1.13.0/docs/_layouts/default.html b/3rdparty/googletest-1.13.0/docs/_layouts/default.html
deleted file mode 100644
index c7f331b87d7ddd4102791fd4d5b4122bfb0dd4b3..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/docs/_layouts/default.html
+++ /dev/null
@@ -1,58 +0,0 @@
-<!DOCTYPE html>
-<html lang="{{ site.lang | default: "en-US" }}">
-  <head>
-    <meta charset="UTF-8">
-    <meta http-equiv="X-UA-Compatible" content="IE=edge">
-    <meta name="viewport" content="width=device-width, initial-scale=1">
-
-{% seo %}
-    <link rel="stylesheet" href="{{ "/assets/css/style.css?v=" | append: site.github.build_revision | relative_url }}">
-    <script>
-      window.ga=window.ga||function(){(ga.q=ga.q||[]).push(arguments)};ga.l=+new Date;
-      ga('create', 'UA-197576187-1', { 'storage': 'none' });
-      ga('set', 'referrer', document.referrer.split('?')[0]);
-      ga('set', 'location', window.location.href.split('?')[0]);
-      ga('set', 'anonymizeIp', true);
-      ga('send', 'pageview');
-    </script>
-    <script async src='https://www.google-analytics.com/analytics.js'></script>
-  </head>
-  <body>
-    <div class="sidebar">
-      <div class="header">
-        <h1><a href="{{ "/" | relative_url }}">{{ site.title | default: "Documentation" }}</a></h1>
-      </div>
-      <input type="checkbox" id="nav-toggle" class="nav-toggle">
-      <label for="nav-toggle" class="expander">
-        <span class="arrow"></span>
-      </label>
-      <nav>
-        {% for item in site.data.navigation.nav %}
-        <h2>{{ item.section }}</h2>
-        <ul>
-          {% for subitem in item.items %}
-          <a href="{{subitem.url | relative_url }}">
-            <li class="{% if subitem.url == page.url %}active{% endif %}">
-              {{ subitem.title }}
-            </li>
-          </a>
-          {% endfor %}
-        </ul>
-        {% endfor %}
-      </nav>
-    </div>
-    <div class="main markdown-body">
-      <div class="main-inner">
-        {{ content }}
-      </div>
-      <div class="footer">
-        GoogleTest &middot;
-        <a href="https://github.com/google/googletest">GitHub Repository</a> &middot;
-        <a href="https://github.com/google/googletest/blob/main/LICENSE">License</a> &middot;
-        <a href="https://policies.google.com/privacy">Privacy Policy</a>
-      </div>
-    </div>
-    <script src="https://cdnjs.cloudflare.com/ajax/libs/anchor-js/4.1.0/anchor.min.js" integrity="sha256-lZaRhKri35AyJSypXXs4o6OPFTbTmUoltBbDCbdzegg=" crossorigin="anonymous"></script>
-    <script>anchors.add('.main h2, .main h3, .main h4, .main h5, .main h6');</script>
-  </body>
-</html>
diff --git a/3rdparty/googletest-1.13.0/docs/_sass/main.scss b/3rdparty/googletest-1.13.0/docs/_sass/main.scss
deleted file mode 100644
index 92edc877a592e877d037b769337f82568913a9d7..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/docs/_sass/main.scss
+++ /dev/null
@@ -1,200 +0,0 @@
-// Styles for GoogleTest docs website on GitHub Pages.
-// Color variables are defined in
-// https://github.com/pages-themes/primer/tree/master/_sass/primer-support/lib/variables
-
-$sidebar-width: 260px;
-
-body {
-  display: flex;
-  margin: 0;
-}
-
-.sidebar {
-  background: $black;
-  color: $text-white;
-  flex-shrink: 0;
-  height: 100vh;
-  overflow: auto;
-  position: sticky;
-  top: 0;
-  width: $sidebar-width;
-}
-
-.sidebar h1 {
-  font-size: 1.5em;
-}
-
-.sidebar h2 {
-  color: $gray-light;
-  font-size: 0.8em;
-  font-weight: normal;
-  margin-bottom: 0.8em;
-  padding-left: 2.5em;
-  text-transform: uppercase;
-}
-
-.sidebar .header {
-  background: $black;
-  padding: 2em;
-  position: sticky;
-  top: 0;
-  width: 100%;
-}
-
-.sidebar .header a {
-  color: $text-white;
-  text-decoration: none;
-}
-
-.sidebar .nav-toggle {
-  display: none;
-}
-
-.sidebar .expander {
-  cursor: pointer;
-  display: none;
-  height: 3em;
-  position: absolute;
-  right: 1em;
-  top: 1.5em;
-  width: 3em;
-}
-
-.sidebar .expander .arrow {
-  border: solid $white;
-  border-width: 0 3px 3px 0;
-  display: block;
-  height: 0.7em;
-  margin: 1em auto;
-  transform: rotate(45deg);
-  transition: transform 0.5s;
-  width: 0.7em;
-}
-
-.sidebar nav {
-  width: 100%;
-}
-
-.sidebar nav ul {
-  list-style-type: none;
-  margin-bottom: 1em;
-  padding: 0;
-
-  &:last-child {
-    margin-bottom: 2em;
-  }
-
-  a {
-   text-decoration: none;
-  }
-
-  li {
-    color: $text-white;
-    padding-left: 2em;
-    text-decoration: none;
-  }
-
-  li.active {
-    background: $border-gray-darker;
-    font-weight: bold;
-  }
-
-  li:hover {
-    background: $border-gray-darker;
-  }
-}
-
-.main {
-  background-color: $bg-gray;
-  width: calc(100% - #{$sidebar-width});
-}
-
-.main .main-inner {
-  background-color: $white;
-  padding: 2em;
-}
-
-.main .footer {
-  margin: 0;
-  padding: 2em;
-}
-
-.main table th {
-  text-align: left;
-}
-
-.main .callout {
-  border-left: 0.25em solid $white;
-  padding: 1em;
-
-  a {
-    text-decoration: underline;
-  }
-
-  &.important {
-    background-color: $bg-yellow-light;
-    border-color: $bg-yellow;
-    color: $black;
-  }
-
-  &.note {
-    background-color: $bg-blue-light;
-    border-color: $text-blue;
-    color: $text-blue;
-  }
-
-  &.tip {
-    background-color: $green-000;
-    border-color: $green-700;
-    color: $green-700;
-  }
-
-  &.warning {
-    background-color: $red-000;
-    border-color: $text-red;
-    color: $text-red;
-  }
-}
-
-.main .good pre {
-  background-color: $bg-green-light;
-}
-
-.main .bad pre {
-  background-color: $red-000;
-}
-
-@media all and (max-width: 768px) {
-  body {
-    flex-direction: column;
-  }
-
-  .sidebar {
-    height: auto;
-    position: relative;
-    width: 100%;
-  }
-
-  .sidebar .expander {
-    display: block;
-  }
-
-  .sidebar nav {
-    height: 0;
-    overflow: hidden;
-  }
-
-  .sidebar .nav-toggle:checked {
-    & ~ nav {
-      height: auto;
-    }
-
-    & + .expander .arrow {
-      transform: rotate(-135deg);
-    }
-  }
-
-  .main {
-    width: 100%;
-  }
-}
diff --git a/3rdparty/googletest-1.13.0/docs/advanced.md b/3rdparty/googletest-1.13.0/docs/advanced.md
deleted file mode 100644
index f16382fe04fcd6fe91edb48e7e2748d3963a45e4..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/docs/advanced.md
+++ /dev/null
@@ -1,2407 +0,0 @@
-# Advanced googletest Topics
-
-## Introduction
-
-Now that you have read the [googletest Primer](primer.md) and learned how to
-write tests using googletest, it's time to learn some new tricks. This document
-will show you more assertions as well as how to construct complex failure
-messages, propagate fatal failures, reuse and speed up your test fixtures, and
-use various flags with your tests.
-
-## More Assertions
-
-This section covers some less frequently used, but still significant,
-assertions.
-
-### Explicit Success and Failure
-
-See [Explicit Success and Failure](reference/assertions.md#success-failure) in
-the Assertions Reference.
-
-### Exception Assertions
-
-See [Exception Assertions](reference/assertions.md#exceptions) in the Assertions
-Reference.
-
-### Predicate Assertions for Better Error Messages
-
-Even though googletest has a rich set of assertions, they can never be complete,
-as it's impossible (nor a good idea) to anticipate all scenarios a user might
-run into. Therefore, sometimes a user has to use `EXPECT_TRUE()` to check a
-complex expression, for lack of a better macro. This has the problem of not
-showing you the values of the parts of the expression, making it hard to
-understand what went wrong. As a workaround, some users choose to construct the
-failure message by themselves, streaming it into `EXPECT_TRUE()`. However, this
-is awkward especially when the expression has side-effects or is expensive to
-evaluate.
-
-googletest gives you three different options to solve this problem:
-
-#### Using an Existing Boolean Function
-
-If you already have a function or functor that returns `bool` (or a type that
-can be implicitly converted to `bool`), you can use it in a *predicate
-assertion* to get the function arguments printed for free. See
-[`EXPECT_PRED*`](reference/assertions.md#EXPECT_PRED) in the Assertions
-Reference for details.
-
-#### Using a Function That Returns an AssertionResult
-
-While `EXPECT_PRED*()` and friends are handy for a quick job, the syntax is not
-satisfactory: you have to use different macros for different arities, and it
-feels more like Lisp than C++. The `::testing::AssertionResult` class solves
-this problem.
-
-An `AssertionResult` object represents the result of an assertion (whether it's
-a success or a failure, and an associated message). You can create an
-`AssertionResult` using one of these factory functions:
-
-```c++
-namespace testing {
-
-// Returns an AssertionResult object to indicate that an assertion has
-// succeeded.
-AssertionResult AssertionSuccess();
-
-// Returns an AssertionResult object to indicate that an assertion has
-// failed.
-AssertionResult AssertionFailure();
-
-}
-```
-
-You can then use the `<<` operator to stream messages to the `AssertionResult`
-object.
-
-To provide more readable messages in Boolean assertions (e.g. `EXPECT_TRUE()`),
-write a predicate function that returns `AssertionResult` instead of `bool`. For
-example, if you define `IsEven()` as:
-
-```c++
-testing::AssertionResult IsEven(int n) {
-  if ((n % 2) == 0)
-    return testing::AssertionSuccess();
-  else
-    return testing::AssertionFailure() << n << " is odd";
-}
-```
-
-instead of:
-
-```c++
-bool IsEven(int n) {
-  return (n % 2) == 0;
-}
-```
-
-the failed assertion `EXPECT_TRUE(IsEven(Fib(4)))` will print:
-
-```none
-Value of: IsEven(Fib(4))
-  Actual: false (3 is odd)
-Expected: true
-```
-
-instead of a more opaque
-
-```none
-Value of: IsEven(Fib(4))
-  Actual: false
-Expected: true
-```
-
-If you want informative messages in `EXPECT_FALSE` and `ASSERT_FALSE` as well
-(one third of Boolean assertions in the Google code base are negative ones), and
-are fine with making the predicate slower in the success case, you can supply a
-success message:
-
-```c++
-testing::AssertionResult IsEven(int n) {
-  if ((n % 2) == 0)
-    return testing::AssertionSuccess() << n << " is even";
-  else
-    return testing::AssertionFailure() << n << " is odd";
-}
-```
-
-Then the statement `EXPECT_FALSE(IsEven(Fib(6)))` will print
-
-```none
-  Value of: IsEven(Fib(6))
-     Actual: true (8 is even)
-  Expected: false
-```
-
-#### Using a Predicate-Formatter
-
-If you find the default message generated by
-[`EXPECT_PRED*`](reference/assertions.md#EXPECT_PRED) and
-[`EXPECT_TRUE`](reference/assertions.md#EXPECT_TRUE) unsatisfactory, or some
-arguments to your predicate do not support streaming to `ostream`, you can
-instead use *predicate-formatter assertions* to *fully* customize how the
-message is formatted. See
-[`EXPECT_PRED_FORMAT*`](reference/assertions.md#EXPECT_PRED_FORMAT) in the
-Assertions Reference for details.
-
-### Floating-Point Comparison
-
-See [Floating-Point Comparison](reference/assertions.md#floating-point) in the
-Assertions Reference.
-
-#### Floating-Point Predicate-Format Functions
-
-Some floating-point operations are useful, but not that often used. In order to
-avoid an explosion of new macros, we provide them as predicate-format functions
-that can be used in the predicate assertion macro
-[`EXPECT_PRED_FORMAT2`](reference/assertions.md#EXPECT_PRED_FORMAT), for
-example:
-
-```c++
-using ::testing::FloatLE;
-using ::testing::DoubleLE;
-...
-EXPECT_PRED_FORMAT2(FloatLE, val1, val2);
-EXPECT_PRED_FORMAT2(DoubleLE, val1, val2);
-```
-
-The above code verifies that `val1` is less than, or approximately equal to,
-`val2`.
-
-### Asserting Using gMock Matchers
-
-See [`EXPECT_THAT`](reference/assertions.md#EXPECT_THAT) in the Assertions
-Reference.
-
-### More String Assertions
-
-(Please read the [previous](#asserting-using-gmock-matchers) section first if
-you haven't.)
-
-You can use the gMock [string matchers](reference/matchers.md#string-matchers)
-with [`EXPECT_THAT`](reference/assertions.md#EXPECT_THAT) to do more string
-comparison tricks (sub-string, prefix, suffix, regular expression, and etc). For
-example,
-
-```c++
-using ::testing::HasSubstr;
-using ::testing::MatchesRegex;
-...
-  ASSERT_THAT(foo_string, HasSubstr("needle"));
-  EXPECT_THAT(bar_string, MatchesRegex("\\w*\\d+"));
-```
-
-### Windows HRESULT assertions
-
-See [Windows HRESULT Assertions](reference/assertions.md#HRESULT) in the
-Assertions Reference.
-
-### Type Assertions
-
-You can call the function
-
-```c++
-::testing::StaticAssertTypeEq<T1, T2>();
-```
-
-to assert that types `T1` and `T2` are the same. The function does nothing if
-the assertion is satisfied. If the types are different, the function call will
-fail to compile, the compiler error message will say that `T1 and T2 are not the
-same type` and most likely (depending on the compiler) show you the actual
-values of `T1` and `T2`. This is mainly useful inside template code.
-
-**Caveat**: When used inside a member function of a class template or a function
-template, `StaticAssertTypeEq<T1, T2>()` is effective only if the function is
-instantiated. For example, given:
-
-```c++
-template <typename T> class Foo {
- public:
-  void Bar() { testing::StaticAssertTypeEq<int, T>(); }
-};
-```
-
-the code:
-
-```c++
-void Test1() { Foo<bool> foo; }
-```
-
-will not generate a compiler error, as `Foo<bool>::Bar()` is never actually
-instantiated. Instead, you need:
-
-```c++
-void Test2() { Foo<bool> foo; foo.Bar(); }
-```
-
-to cause a compiler error.
-
-### Assertion Placement
-
-You can use assertions in any C++ function. In particular, it doesn't have to be
-a method of the test fixture class. The one constraint is that assertions that
-generate a fatal failure (`FAIL*` and `ASSERT_*`) can only be used in
-void-returning functions. This is a consequence of Google's not using
-exceptions. By placing it in a non-void function you'll get a confusing compile
-error like `"error: void value not ignored as it ought to be"` or `"cannot
-initialize return object of type 'bool' with an rvalue of type 'void'"` or
-`"error: no viable conversion from 'void' to 'string'"`.
-
-If you need to use fatal assertions in a function that returns non-void, one
-option is to make the function return the value in an out parameter instead. For
-example, you can rewrite `T2 Foo(T1 x)` to `void Foo(T1 x, T2* result)`. You
-need to make sure that `*result` contains some sensible value even when the
-function returns prematurely. As the function now returns `void`, you can use
-any assertion inside of it.
-
-If changing the function's type is not an option, you should just use assertions
-that generate non-fatal failures, such as `ADD_FAILURE*` and `EXPECT_*`.
-
-{: .callout .note}
-NOTE: Constructors and destructors are not considered void-returning functions,
-according to the C++ language specification, and so you may not use fatal
-assertions in them; you'll get a compilation error if you try. Instead, either
-call `abort` and crash the entire test executable, or put the fatal assertion in
-a `SetUp`/`TearDown` function; see
-[constructor/destructor vs. `SetUp`/`TearDown`](faq.md#CtorVsSetUp)
-
-{: .callout .warning}
-WARNING: A fatal assertion in a helper function (private void-returning method)
-called from a constructor or destructor does not terminate the current test, as
-your intuition might suggest: it merely returns from the constructor or
-destructor early, possibly leaving your object in a partially-constructed or
-partially-destructed state! You almost certainly want to `abort` or use
-`SetUp`/`TearDown` instead.
-
-## Skipping test execution
-
-Related to the assertions `SUCCEED()` and `FAIL()`, you can prevent further test
-execution at runtime with the `GTEST_SKIP()` macro. This is useful when you need
-to check for preconditions of the system under test during runtime and skip
-tests in a meaningful way.
-
-`GTEST_SKIP()` can be used in individual test cases or in the `SetUp()` methods
-of classes derived from either `::testing::Environment` or `::testing::Test`.
-For example:
-
-```c++
-TEST(SkipTest, DoesSkip) {
-  GTEST_SKIP() << "Skipping single test";
-  EXPECT_EQ(0, 1);  // Won't fail; it won't be executed
-}
-
-class SkipFixture : public ::testing::Test {
- protected:
-  void SetUp() override {
-    GTEST_SKIP() << "Skipping all tests for this fixture";
-  }
-};
-
-// Tests for SkipFixture won't be executed.
-TEST_F(SkipFixture, SkipsOneTest) {
-  EXPECT_EQ(5, 7);  // Won't fail
-}
-```
-
-As with assertion macros, you can stream a custom message into `GTEST_SKIP()`.
-
-## Teaching googletest How to Print Your Values
-
-When a test assertion such as `EXPECT_EQ` fails, googletest prints the argument
-values to help you debug. It does this using a user-extensible value printer.
-
-This printer knows how to print built-in C++ types, native arrays, STL
-containers, and any type that supports the `<<` operator. For other types, it
-prints the raw bytes in the value and hopes that you the user can figure it out.
-
-As mentioned earlier, the printer is *extensible*. That means you can teach it
-to do a better job at printing your particular type than to dump the bytes. To
-do that, define `<<` for your type:
-
-```c++
-#include <ostream>
-
-namespace foo {
-
-class Bar {  // We want googletest to be able to print instances of this.
-...
-  // Create a free inline friend function.
-  friend std::ostream& operator<<(std::ostream& os, const Bar& bar) {
-    return os << bar.DebugString();  // whatever needed to print bar to os
-  }
-};
-
-// If you can't declare the function in the class it's important that the
-// << operator is defined in the SAME namespace that defines Bar.  C++'s look-up
-// rules rely on that.
-std::ostream& operator<<(std::ostream& os, const Bar& bar) {
-  return os << bar.DebugString();  // whatever needed to print bar to os
-}
-
-}  // namespace foo
-```
-
-Sometimes, this might not be an option: your team may consider it bad style to
-have a `<<` operator for `Bar`, or `Bar` may already have a `<<` operator that
-doesn't do what you want (and you cannot change it). If so, you can instead
-define a `PrintTo()` function like this:
-
-```c++
-#include <ostream>
-
-namespace foo {
-
-class Bar {
-  ...
-  friend void PrintTo(const Bar& bar, std::ostream* os) {
-    *os << bar.DebugString();  // whatever needed to print bar to os
-  }
-};
-
-// If you can't declare the function in the class it's important that PrintTo()
-// is defined in the SAME namespace that defines Bar.  C++'s look-up rules rely
-// on that.
-void PrintTo(const Bar& bar, std::ostream* os) {
-  *os << bar.DebugString();  // whatever needed to print bar to os
-}
-
-}  // namespace foo
-```
-
-If you have defined both `<<` and `PrintTo()`, the latter will be used when
-googletest is concerned. This allows you to customize how the value appears in
-googletest's output without affecting code that relies on the behavior of its
-`<<` operator.
-
-If you want to print a value `x` using googletest's value printer yourself, just
-call `::testing::PrintToString(x)`, which returns an `std::string`:
-
-```c++
-vector<pair<Bar, int> > bar_ints = GetBarIntVector();
-
-EXPECT_TRUE(IsCorrectBarIntVector(bar_ints))
-    << "bar_ints = " << testing::PrintToString(bar_ints);
-```
-
-## Death Tests
-
-In many applications, there are assertions that can cause application failure if
-a condition is not met. These consistency checks, which ensure that the program
-is in a known good state, are there to fail at the earliest possible time after
-some program state is corrupted. If the assertion checks the wrong condition,
-then the program may proceed in an erroneous state, which could lead to memory
-corruption, security holes, or worse. Hence it is vitally important to test that
-such assertion statements work as expected.
-
-Since these precondition checks cause the processes to die, we call such tests
-_death tests_. More generally, any test that checks that a program terminates
-(except by throwing an exception) in an expected fashion is also a death test.
-
-Note that if a piece of code throws an exception, we don't consider it "death"
-for the purpose of death tests, as the caller of the code could catch the
-exception and avoid the crash. If you want to verify exceptions thrown by your
-code, see [Exception Assertions](#ExceptionAssertions).
-
-If you want to test `EXPECT_*()/ASSERT_*()` failures in your test code, see
-["Catching" Failures](#catching-failures).
-
-### How to Write a Death Test
-
-GoogleTest provides assertion macros to support death tests. See
-[Death Assertions](reference/assertions.md#death) in the Assertions Reference
-for details.
-
-To write a death test, simply use one of the macros inside your test function.
-For example,
-
-```c++
-TEST(MyDeathTest, Foo) {
-  // This death test uses a compound statement.
-  ASSERT_DEATH({
-    int n = 5;
-    Foo(&n);
-  }, "Error on line .* of Foo()");
-}
-
-TEST(MyDeathTest, NormalExit) {
-  EXPECT_EXIT(NormalExit(), testing::ExitedWithCode(0), "Success");
-}
-
-TEST(MyDeathTest, KillProcess) {
-  EXPECT_EXIT(KillProcess(), testing::KilledBySignal(SIGKILL),
-              "Sending myself unblockable signal");
-}
-```
-
-verifies that:
-
-*   calling `Foo(5)` causes the process to die with the given error message,
-*   calling `NormalExit()` causes the process to print `"Success"` to stderr and
-    exit with exit code 0, and
-*   calling `KillProcess()` kills the process with signal `SIGKILL`.
-
-The test function body may contain other assertions and statements as well, if
-necessary.
-
-Note that a death test only cares about three things:
-
-1.  does `statement` abort or exit the process?
-2.  (in the case of `ASSERT_EXIT` and `EXPECT_EXIT`) does the exit status
-    satisfy `predicate`? Or (in the case of `ASSERT_DEATH` and `EXPECT_DEATH`)
-    is the exit status non-zero? And
-3.  does the stderr output match `matcher`?
-
-In particular, if `statement` generates an `ASSERT_*` or `EXPECT_*` failure, it
-will **not** cause the death test to fail, as googletest assertions don't abort
-the process.
-
-### Death Test Naming
-
-{: .callout .important}
-IMPORTANT: We strongly recommend you to follow the convention of naming your
-**test suite** (not test) `*DeathTest` when it contains a death test, as
-demonstrated in the above example. The
-[Death Tests And Threads](#death-tests-and-threads) section below explains why.
-
-If a test fixture class is shared by normal tests and death tests, you can use
-`using` or `typedef` to introduce an alias for the fixture class and avoid
-duplicating its code:
-
-```c++
-class FooTest : public testing::Test { ... };
-
-using FooDeathTest = FooTest;
-
-TEST_F(FooTest, DoesThis) {
-  // normal test
-}
-
-TEST_F(FooDeathTest, DoesThat) {
-  // death test
-}
-```
-
-### Regular Expression Syntax
-
-When built with Bazel and using Abseil, googletest uses the
-[RE2](https://github.com/google/re2/wiki/Syntax) syntax. Otherwise, for POSIX
-systems (Linux, Cygwin, Mac), googletest uses the
-[POSIX extended regular expression](http://www.opengroup.org/onlinepubs/009695399/basedefs/xbd_chap09.html#tag_09_04)
-syntax. To learn about POSIX syntax, you may want to read this
-[Wikipedia entry](http://en.wikipedia.org/wiki/Regular_expression#POSIX_extended).
-
-On Windows, googletest uses its own simple regular expression implementation. It
-lacks many features. For example, we don't support union (`"x|y"`), grouping
-(`"(xy)"`), brackets (`"[xy]"`), and repetition count (`"x{5,7}"`), among
-others. Below is what we do support (`A` denotes a literal character, period
-(`.`), or a single `\\ ` escape sequence; `x` and `y` denote regular
-expressions.):
-
-Expression | Meaning
----------- | --------------------------------------------------------------
-`c`        | matches any literal character `c`
-`\\d`      | matches any decimal digit
-`\\D`      | matches any character that's not a decimal digit
-`\\f`      | matches `\f`
-`\\n`      | matches `\n`
-`\\r`      | matches `\r`
-`\\s`      | matches any ASCII whitespace, including `\n`
-`\\S`      | matches any character that's not a whitespace
-`\\t`      | matches `\t`
-`\\v`      | matches `\v`
-`\\w`      | matches any letter, `_`, or decimal digit
-`\\W`      | matches any character that `\\w` doesn't match
-`\\c`      | matches any literal character `c`, which must be a punctuation
-`.`        | matches any single character except `\n`
-`A?`       | matches 0 or 1 occurrences of `A`
-`A*`       | matches 0 or many occurrences of `A`
-`A+`       | matches 1 or many occurrences of `A`
-`^`        | matches the beginning of a string (not that of each line)
-`$`        | matches the end of a string (not that of each line)
-`xy`       | matches `x` followed by `y`
-
-To help you determine which capability is available on your system, googletest
-defines macros to govern which regular expression it is using. The macros are:
-`GTEST_USES_SIMPLE_RE=1` or `GTEST_USES_POSIX_RE=1`. If you want your death
-tests to work in all cases, you can either `#if` on these macros or use the more
-limited syntax only.
-
-### How It Works
-
-See [Death Assertions](reference/assertions.md#death) in the Assertions
-Reference.
-
-### Death Tests And Threads
-
-The reason for the two death test styles has to do with thread safety. Due to
-well-known problems with forking in the presence of threads, death tests should
-be run in a single-threaded context. Sometimes, however, it isn't feasible to
-arrange that kind of environment. For example, statically-initialized modules
-may start threads before main is ever reached. Once threads have been created,
-it may be difficult or impossible to clean them up.
-
-googletest has three features intended to raise awareness of threading issues.
-
-1.  A warning is emitted if multiple threads are running when a death test is
-    encountered.
-2.  Test suites with a name ending in "DeathTest" are run before all other
-    tests.
-3.  It uses `clone()` instead of `fork()` to spawn the child process on Linux
-    (`clone()` is not available on Cygwin and Mac), as `fork()` is more likely
-    to cause the child to hang when the parent process has multiple threads.
-
-It's perfectly fine to create threads inside a death test statement; they are
-executed in a separate process and cannot affect the parent.
-
-### Death Test Styles
-
-The "threadsafe" death test style was introduced in order to help mitigate the
-risks of testing in a possibly multithreaded environment. It trades increased
-test execution time (potentially dramatically so) for improved thread safety.
-
-The automated testing framework does not set the style flag. You can choose a
-particular style of death tests by setting the flag programmatically:
-
-```c++
-GTEST_FLAG_SET(death_test_style, "threadsafe")
-```
-
-You can do this in `main()` to set the style for all death tests in the binary,
-or in individual tests. Recall that flags are saved before running each test and
-restored afterwards, so you need not do that yourself. For example:
-
-```c++
-int main(int argc, char** argv) {
-  testing::InitGoogleTest(&argc, argv);
-  GTEST_FLAG_SET(death_test_style, "fast");
-  return RUN_ALL_TESTS();
-}
-
-TEST(MyDeathTest, TestOne) {
-  GTEST_FLAG_SET(death_test_style, "threadsafe");
-  // This test is run in the "threadsafe" style:
-  ASSERT_DEATH(ThisShouldDie(), "");
-}
-
-TEST(MyDeathTest, TestTwo) {
-  // This test is run in the "fast" style:
-  ASSERT_DEATH(ThisShouldDie(), "");
-}
-```
-
-### Caveats
-
-The `statement` argument of `ASSERT_EXIT()` can be any valid C++ statement. If
-it leaves the current function via a `return` statement or by throwing an
-exception, the death test is considered to have failed. Some googletest macros
-may return from the current function (e.g. `ASSERT_TRUE()`), so be sure to avoid
-them in `statement`.
-
-Since `statement` runs in the child process, any in-memory side effect (e.g.
-modifying a variable, releasing memory, etc) it causes will *not* be observable
-in the parent process. In particular, if you release memory in a death test,
-your program will fail the heap check as the parent process will never see the
-memory reclaimed. To solve this problem, you can
-
-1.  try not to free memory in a death test;
-2.  free the memory again in the parent process; or
-3.  do not use the heap checker in your program.
-
-Due to an implementation detail, you cannot place multiple death test assertions
-on the same line; otherwise, compilation will fail with an unobvious error
-message.
-
-Despite the improved thread safety afforded by the "threadsafe" style of death
-test, thread problems such as deadlock are still possible in the presence of
-handlers registered with `pthread_atfork(3)`.
-
-## Using Assertions in Sub-routines
-
-{: .callout .note}
-Note: If you want to put a series of test assertions in a subroutine to check
-for a complex condition, consider using
-[a custom GMock matcher](gmock_cook_book.md#NewMatchers) instead. This lets you
-provide a more readable error message in case of failure and avoid all of the
-issues described below.
-
-### Adding Traces to Assertions
-
-If a test sub-routine is called from several places, when an assertion inside it
-fails, it can be hard to tell which invocation of the sub-routine the failure is
-from. You can alleviate this problem using extra logging or custom failure
-messages, but that usually clutters up your tests. A better solution is to use
-the `SCOPED_TRACE` macro or the `ScopedTrace` utility:
-
-```c++
-SCOPED_TRACE(message);
-```
-
-```c++
-ScopedTrace trace("file_path", line_number, message);
-```
-
-where `message` can be anything streamable to `std::ostream`. `SCOPED_TRACE`
-macro will cause the current file name, line number, and the given message to be
-added in every failure message. `ScopedTrace` accepts explicit file name and
-line number in arguments, which is useful for writing test helpers. The effect
-will be undone when the control leaves the current lexical scope.
-
-For example,
-
-```c++
-10: void Sub1(int n) {
-11:   EXPECT_EQ(Bar(n), 1);
-12:   EXPECT_EQ(Bar(n + 1), 2);
-13: }
-14:
-15: TEST(FooTest, Bar) {
-16:   {
-17:     SCOPED_TRACE("A");  // This trace point will be included in
-18:                         // every failure in this scope.
-19:     Sub1(1);
-20:   }
-21:   // Now it won't.
-22:   Sub1(9);
-23: }
-```
-
-could result in messages like these:
-
-```none
-path/to/foo_test.cc:11: Failure
-Value of: Bar(n)
-Expected: 1
-  Actual: 2
-Google Test trace:
-path/to/foo_test.cc:17: A
-
-path/to/foo_test.cc:12: Failure
-Value of: Bar(n + 1)
-Expected: 2
-  Actual: 3
-```
-
-Without the trace, it would've been difficult to know which invocation of
-`Sub1()` the two failures come from respectively. (You could add an extra
-message to each assertion in `Sub1()` to indicate the value of `n`, but that's
-tedious.)
-
-Some tips on using `SCOPED_TRACE`:
-
-1.  With a suitable message, it's often enough to use `SCOPED_TRACE` at the
-    beginning of a sub-routine, instead of at each call site.
-2.  When calling sub-routines inside a loop, make the loop iterator part of the
-    message in `SCOPED_TRACE` such that you can know which iteration the failure
-    is from.
-3.  Sometimes the line number of the trace point is enough for identifying the
-    particular invocation of a sub-routine. In this case, you don't have to
-    choose a unique message for `SCOPED_TRACE`. You can simply use `""`.
-4.  You can use `SCOPED_TRACE` in an inner scope when there is one in the outer
-    scope. In this case, all active trace points will be included in the failure
-    messages, in reverse order they are encountered.
-5.  The trace dump is clickable in Emacs - hit `return` on a line number and
-    you'll be taken to that line in the source file!
-
-### Propagating Fatal Failures
-
-A common pitfall when using `ASSERT_*` and `FAIL*` is not understanding that
-when they fail they only abort the _current function_, not the entire test. For
-example, the following test will segfault:
-
-```c++
-void Subroutine() {
-  // Generates a fatal failure and aborts the current function.
-  ASSERT_EQ(1, 2);
-
-  // The following won't be executed.
-  ...
-}
-
-TEST(FooTest, Bar) {
-  Subroutine();  // The intended behavior is for the fatal failure
-                 // in Subroutine() to abort the entire test.
-
-  // The actual behavior: the function goes on after Subroutine() returns.
-  int* p = nullptr;
-  *p = 3;  // Segfault!
-}
-```
-
-To alleviate this, googletest provides three different solutions. You could use
-either exceptions, the `(ASSERT|EXPECT)_NO_FATAL_FAILURE` assertions or the
-`HasFatalFailure()` function. They are described in the following two
-subsections.
-
-#### Asserting on Subroutines with an exception
-
-The following code can turn ASSERT-failure into an exception:
-
-```c++
-class ThrowListener : public testing::EmptyTestEventListener {
-  void OnTestPartResult(const testing::TestPartResult& result) override {
-    if (result.type() == testing::TestPartResult::kFatalFailure) {
-      throw testing::AssertionException(result);
-    }
-  }
-};
-int main(int argc, char** argv) {
-  ...
-  testing::UnitTest::GetInstance()->listeners().Append(new ThrowListener);
-  return RUN_ALL_TESTS();
-}
-```
-
-This listener should be added after other listeners if you have any, otherwise
-they won't see failed `OnTestPartResult`.
-
-#### Asserting on Subroutines
-
-As shown above, if your test calls a subroutine that has an `ASSERT_*` failure
-in it, the test will continue after the subroutine returns. This may not be what
-you want.
-
-Often people want fatal failures to propagate like exceptions. For that
-googletest offers the following macros:
-
-Fatal assertion                       | Nonfatal assertion                    | Verifies
-------------------------------------- | ------------------------------------- | --------
-`ASSERT_NO_FATAL_FAILURE(statement);` | `EXPECT_NO_FATAL_FAILURE(statement);` | `statement` doesn't generate any new fatal failures in the current thread.
-
-Only failures in the thread that executes the assertion are checked to determine
-the result of this type of assertions. If `statement` creates new threads,
-failures in these threads are ignored.
-
-Examples:
-
-```c++
-ASSERT_NO_FATAL_FAILURE(Foo());
-
-int i;
-EXPECT_NO_FATAL_FAILURE({
-  i = Bar();
-});
-```
-
-Assertions from multiple threads are currently not supported on Windows.
-
-#### Checking for Failures in the Current Test
-
-`HasFatalFailure()` in the `::testing::Test` class returns `true` if an
-assertion in the current test has suffered a fatal failure. This allows
-functions to catch fatal failures in a sub-routine and return early.
-
-```c++
-class Test {
- public:
-  ...
-  static bool HasFatalFailure();
-};
-```
-
-The typical usage, which basically simulates the behavior of a thrown exception,
-is:
-
-```c++
-TEST(FooTest, Bar) {
-  Subroutine();
-  // Aborts if Subroutine() had a fatal failure.
-  if (HasFatalFailure()) return;
-
-  // The following won't be executed.
-  ...
-}
-```
-
-If `HasFatalFailure()` is used outside of `TEST()` , `TEST_F()` , or a test
-fixture, you must add the `::testing::Test::` prefix, as in:
-
-```c++
-if (testing::Test::HasFatalFailure()) return;
-```
-
-Similarly, `HasNonfatalFailure()` returns `true` if the current test has at
-least one non-fatal failure, and `HasFailure()` returns `true` if the current
-test has at least one failure of either kind.
-
-## Logging Additional Information
-
-In your test code, you can call `RecordProperty("key", value)` to log additional
-information, where `value` can be either a string or an `int`. The *last* value
-recorded for a key will be emitted to the
-[XML output](#generating-an-xml-report) if you specify one. For example, the
-test
-
-```c++
-TEST_F(WidgetUsageTest, MinAndMaxWidgets) {
-  RecordProperty("MaximumWidgets", ComputeMaxUsage());
-  RecordProperty("MinimumWidgets", ComputeMinUsage());
-}
-```
-
-will output XML like this:
-
-```xml
-  ...
-    <testcase name="MinAndMaxWidgets" file="test.cpp" line="1" status="run" time="0.006" classname="WidgetUsageTest" MaximumWidgets="12" MinimumWidgets="9" />
-  ...
-```
-
-{: .callout .note}
-> NOTE:
->
-> *   `RecordProperty()` is a static member of the `Test` class. Therefore it
->     needs to be prefixed with `::testing::Test::` if used outside of the
->     `TEST` body and the test fixture class.
-> *   *`key`* must be a valid XML attribute name, and cannot conflict with the
->     ones already used by googletest (`name`, `status`, `time`, `classname`,
->     `type_param`, and `value_param`).
-> *   Calling `RecordProperty()` outside of the lifespan of a test is allowed.
->     If it's called outside of a test but between a test suite's
->     `SetUpTestSuite()` and `TearDownTestSuite()` methods, it will be
->     attributed to the XML element for the test suite. If it's called outside
->     of all test suites (e.g. in a test environment), it will be attributed to
->     the top-level XML element.
-
-## Sharing Resources Between Tests in the Same Test Suite
-
-googletest creates a new test fixture object for each test in order to make
-tests independent and easier to debug. However, sometimes tests use resources
-that are expensive to set up, making the one-copy-per-test model prohibitively
-expensive.
-
-If the tests don't change the resource, there's no harm in their sharing a
-single resource copy. So, in addition to per-test set-up/tear-down, googletest
-also supports per-test-suite set-up/tear-down. To use it:
-
-1.  In your test fixture class (say `FooTest` ), declare as `static` some member
-    variables to hold the shared resources.
-2.  Outside your test fixture class (typically just below it), define those
-    member variables, optionally giving them initial values.
-3.  In the same test fixture class, define a `static void SetUpTestSuite()`
-    function (remember not to spell it as **`SetupTestSuite`** with a small
-    `u`!) to set up the shared resources and a `static void TearDownTestSuite()`
-    function to tear them down.
-
-That's it! googletest automatically calls `SetUpTestSuite()` before running the
-*first test* in the `FooTest` test suite (i.e. before creating the first
-`FooTest` object), and calls `TearDownTestSuite()` after running the *last test*
-in it (i.e. after deleting the last `FooTest` object). In between, the tests can
-use the shared resources.
-
-Remember that the test order is undefined, so your code can't depend on a test
-preceding or following another. Also, the tests must either not modify the state
-of any shared resource, or, if they do modify the state, they must restore the
-state to its original value before passing control to the next test.
-
-Note that `SetUpTestSuite()` may be called multiple times for a test fixture
-class that has derived classes, so you should not expect code in the function
-body to be run only once. Also, derived classes still have access to shared
-resources defined as static members, so careful consideration is needed when
-managing shared resources to avoid memory leaks.
-
-Here's an example of per-test-suite set-up and tear-down:
-
-```c++
-class FooTest : public testing::Test {
- protected:
-  // Per-test-suite set-up.
-  // Called before the first test in this test suite.
-  // Can be omitted if not needed.
-  static void SetUpTestSuite() {
-    // Avoid reallocating static objects if called in subclasses of FooTest.
-    if (shared_resource_ == nullptr) {
-      shared_resource_ = new ...;
-    }
-  }
-
-  // Per-test-suite tear-down.
-  // Called after the last test in this test suite.
-  // Can be omitted if not needed.
-  static void TearDownTestSuite() {
-    delete shared_resource_;
-    shared_resource_ = nullptr;
-  }
-
-  // You can define per-test set-up logic as usual.
-  void SetUp() override { ... }
-
-  // You can define per-test tear-down logic as usual.
-  void TearDown() override { ... }
-
-  // Some expensive resource shared by all tests.
-  static T* shared_resource_;
-};
-
-T* FooTest::shared_resource_ = nullptr;
-
-TEST_F(FooTest, Test1) {
-  ... you can refer to shared_resource_ here ...
-}
-
-TEST_F(FooTest, Test2) {
-  ... you can refer to shared_resource_ here ...
-}
-```
-
-{: .callout .note}
-NOTE: Though the above code declares `SetUpTestSuite()` protected, it may
-sometimes be necessary to declare it public, such as when using it with
-`TEST_P`.
-
-## Global Set-Up and Tear-Down
-
-Just as you can do set-up and tear-down at the test level and the test suite
-level, you can also do it at the test program level. Here's how.
-
-First, you subclass the `::testing::Environment` class to define a test
-environment, which knows how to set-up and tear-down:
-
-```c++
-class Environment : public ::testing::Environment {
- public:
-  ~Environment() override {}
-
-  // Override this to define how to set up the environment.
-  void SetUp() override {}
-
-  // Override this to define how to tear down the environment.
-  void TearDown() override {}
-};
-```
-
-Then, you register an instance of your environment class with googletest by
-calling the `::testing::AddGlobalTestEnvironment()` function:
-
-```c++
-Environment* AddGlobalTestEnvironment(Environment* env);
-```
-
-Now, when `RUN_ALL_TESTS()` is called, it first calls the `SetUp()` method of
-each environment object, then runs the tests if none of the environments
-reported fatal failures and `GTEST_SKIP()` was not called. `RUN_ALL_TESTS()`
-always calls `TearDown()` with each environment object, regardless of whether or
-not the tests were run.
-
-It's OK to register multiple environment objects. In this suite, their `SetUp()`
-will be called in the order they are registered, and their `TearDown()` will be
-called in the reverse order.
-
-Note that googletest takes ownership of the registered environment objects.
-Therefore **do not delete them** by yourself.
-
-You should call `AddGlobalTestEnvironment()` before `RUN_ALL_TESTS()` is called,
-probably in `main()`. If you use `gtest_main`, you need to call this before
-`main()` starts for it to take effect. One way to do this is to define a global
-variable like this:
-
-```c++
-testing::Environment* const foo_env =
-    testing::AddGlobalTestEnvironment(new FooEnvironment);
-```
-
-However, we strongly recommend you to write your own `main()` and call
-`AddGlobalTestEnvironment()` there, as relying on initialization of global
-variables makes the code harder to read and may cause problems when you register
-multiple environments from different translation units and the environments have
-dependencies among them (remember that the compiler doesn't guarantee the order
-in which global variables from different translation units are initialized).
-
-## Value-Parameterized Tests
-
-*Value-parameterized tests* allow you to test your code with different
-parameters without writing multiple copies of the same test. This is useful in a
-number of situations, for example:
-
-*   You have a piece of code whose behavior is affected by one or more
-    command-line flags. You want to make sure your code performs correctly for
-    various values of those flags.
-*   You want to test different implementations of an OO interface.
-*   You want to test your code over various inputs (a.k.a. data-driven testing).
-    This feature is easy to abuse, so please exercise your good sense when doing
-    it!
-
-### How to Write Value-Parameterized Tests
-
-To write value-parameterized tests, first you should define a fixture class. It
-must be derived from both `testing::Test` and `testing::WithParamInterface<T>`
-(the latter is a pure interface), where `T` is the type of your parameter
-values. For convenience, you can just derive the fixture class from
-`testing::TestWithParam<T>`, which itself is derived from both `testing::Test`
-and `testing::WithParamInterface<T>`. `T` can be any copyable type. If it's a
-raw pointer, you are responsible for managing the lifespan of the pointed
-values.
-
-{: .callout .note}
-NOTE: If your test fixture defines `SetUpTestSuite()` or `TearDownTestSuite()`
-they must be declared **public** rather than **protected** in order to use
-`TEST_P`.
-
-```c++
-class FooTest :
-    public testing::TestWithParam<const char*> {
-  // You can implement all the usual fixture class members here.
-  // To access the test parameter, call GetParam() from class
-  // TestWithParam<T>.
-};
-
-// Or, when you want to add parameters to a pre-existing fixture class:
-class BaseTest : public testing::Test {
-  ...
-};
-class BarTest : public BaseTest,
-                public testing::WithParamInterface<const char*> {
-  ...
-};
-```
-
-Then, use the `TEST_P` macro to define as many test patterns using this fixture
-as you want. The `_P` suffix is for "parameterized" or "pattern", whichever you
-prefer to think.
-
-```c++
-TEST_P(FooTest, DoesBlah) {
-  // Inside a test, access the test parameter with the GetParam() method
-  // of the TestWithParam<T> class:
-  EXPECT_TRUE(foo.Blah(GetParam()));
-  ...
-}
-
-TEST_P(FooTest, HasBlahBlah) {
-  ...
-}
-```
-
-Finally, you can use the `INSTANTIATE_TEST_SUITE_P` macro to instantiate the
-test suite with any set of parameters you want. GoogleTest defines a number of
-functions for generating test parameters—see details at
-[`INSTANTIATE_TEST_SUITE_P`](reference/testing.md#INSTANTIATE_TEST_SUITE_P) in
-the Testing Reference.
-
-For example, the following statement will instantiate tests from the `FooTest`
-test suite each with parameter values `"meeny"`, `"miny"`, and `"moe"` using the
-[`Values`](reference/testing.md#param-generators) parameter generator:
-
-```c++
-INSTANTIATE_TEST_SUITE_P(MeenyMinyMoe,
-                         FooTest,
-                         testing::Values("meeny", "miny", "moe"));
-```
-
-{: .callout .note}
-NOTE: The code above must be placed at global or namespace scope, not at
-function scope.
-
-The first argument to `INSTANTIATE_TEST_SUITE_P` is a unique name for the
-instantiation of the test suite. The next argument is the name of the test
-pattern, and the last is the
-[parameter generator](reference/testing.md#param-generators).
-
-The parameter generator expression is not evaluated until GoogleTest is
-initialized (via `InitGoogleTest()`). Any prior initialization done in the
-`main` function will be accessible from the parameter generator, for example,
-the results of flag parsing.
-
-You can instantiate a test pattern more than once, so to distinguish different
-instances of the pattern, the instantiation name is added as a prefix to the
-actual test suite name. Remember to pick unique prefixes for different
-instantiations. The tests from the instantiation above will have these names:
-
-*   `MeenyMinyMoe/FooTest.DoesBlah/0` for `"meeny"`
-*   `MeenyMinyMoe/FooTest.DoesBlah/1` for `"miny"`
-*   `MeenyMinyMoe/FooTest.DoesBlah/2` for `"moe"`
-*   `MeenyMinyMoe/FooTest.HasBlahBlah/0` for `"meeny"`
-*   `MeenyMinyMoe/FooTest.HasBlahBlah/1` for `"miny"`
-*   `MeenyMinyMoe/FooTest.HasBlahBlah/2` for `"moe"`
-
-You can use these names in [`--gtest_filter`](#running-a-subset-of-the-tests).
-
-The following statement will instantiate all tests from `FooTest` again, each
-with parameter values `"cat"` and `"dog"` using the
-[`ValuesIn`](reference/testing.md#param-generators) parameter generator:
-
-```c++
-const char* pets[] = {"cat", "dog"};
-INSTANTIATE_TEST_SUITE_P(Pets, FooTest, testing::ValuesIn(pets));
-```
-
-The tests from the instantiation above will have these names:
-
-*   `Pets/FooTest.DoesBlah/0` for `"cat"`
-*   `Pets/FooTest.DoesBlah/1` for `"dog"`
-*   `Pets/FooTest.HasBlahBlah/0` for `"cat"`
-*   `Pets/FooTest.HasBlahBlah/1` for `"dog"`
-
-Please note that `INSTANTIATE_TEST_SUITE_P` will instantiate *all* tests in the
-given test suite, whether their definitions come before or *after* the
-`INSTANTIATE_TEST_SUITE_P` statement.
-
-Additionally, by default, every `TEST_P` without a corresponding
-`INSTANTIATE_TEST_SUITE_P` causes a failing test in test suite
-`GoogleTestVerification`. If you have a test suite where that omission is not an
-error, for example it is in a library that may be linked in for other reasons or
-where the list of test cases is dynamic and may be empty, then this check can be
-suppressed by tagging the test suite:
-
-```c++
-GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(FooTest);
-```
-
-You can see [sample7_unittest.cc] and [sample8_unittest.cc] for more examples.
-
-[sample7_unittest.cc]: https://github.com/google/googletest/blob/main/googletest/samples/sample7_unittest.cc "Parameterized Test example"
-[sample8_unittest.cc]: https://github.com/google/googletest/blob/main/googletest/samples/sample8_unittest.cc "Parameterized Test example with multiple parameters"
-
-### Creating Value-Parameterized Abstract Tests
-
-In the above, we define and instantiate `FooTest` in the *same* source file.
-Sometimes you may want to define value-parameterized tests in a library and let
-other people instantiate them later. This pattern is known as *abstract tests*.
-As an example of its application, when you are designing an interface you can
-write a standard suite of abstract tests (perhaps using a factory function as
-the test parameter) that all implementations of the interface are expected to
-pass. When someone implements the interface, they can instantiate your suite to
-get all the interface-conformance tests for free.
-
-To define abstract tests, you should organize your code like this:
-
-1.  Put the definition of the parameterized test fixture class (e.g. `FooTest`)
-    in a header file, say `foo_param_test.h`. Think of this as *declaring* your
-    abstract tests.
-2.  Put the `TEST_P` definitions in `foo_param_test.cc`, which includes
-    `foo_param_test.h`. Think of this as *implementing* your abstract tests.
-
-Once they are defined, you can instantiate them by including `foo_param_test.h`,
-invoking `INSTANTIATE_TEST_SUITE_P()`, and depending on the library target that
-contains `foo_param_test.cc`. You can instantiate the same abstract test suite
-multiple times, possibly in different source files.
-
-### Specifying Names for Value-Parameterized Test Parameters
-
-The optional last argument to `INSTANTIATE_TEST_SUITE_P()` allows the user to
-specify a function or functor that generates custom test name suffixes based on
-the test parameters. The function should accept one argument of type
-`testing::TestParamInfo<class ParamType>`, and return `std::string`.
-
-`testing::PrintToStringParamName` is a builtin test suffix generator that
-returns the value of `testing::PrintToString(GetParam())`. It does not work for
-`std::string` or C strings.
-
-{: .callout .note}
-NOTE: test names must be non-empty, unique, and may only contain ASCII
-alphanumeric characters. In particular, they
-[should not contain underscores](faq.md#why-should-test-suite-names-and-test-names-not-contain-underscore)
-
-```c++
-class MyTestSuite : public testing::TestWithParam<int> {};
-
-TEST_P(MyTestSuite, MyTest)
-{
-  std::cout << "Example Test Param: " << GetParam() << std::endl;
-}
-
-INSTANTIATE_TEST_SUITE_P(MyGroup, MyTestSuite, testing::Range(0, 10),
-                         testing::PrintToStringParamName());
-```
-
-Providing a custom functor allows for more control over test parameter name
-generation, especially for types where the automatic conversion does not
-generate helpful parameter names (e.g. strings as demonstrated above). The
-following example illustrates this for multiple parameters, an enumeration type
-and a string, and also demonstrates how to combine generators. It uses a lambda
-for conciseness:
-
-```c++
-enum class MyType { MY_FOO = 0, MY_BAR = 1 };
-
-class MyTestSuite : public testing::TestWithParam<std::tuple<MyType, std::string>> {
-};
-
-INSTANTIATE_TEST_SUITE_P(
-    MyGroup, MyTestSuite,
-    testing::Combine(
-        testing::Values(MyType::MY_FOO, MyType::MY_BAR),
-        testing::Values("A", "B")),
-    [](const testing::TestParamInfo<MyTestSuite::ParamType>& info) {
-      std::string name = absl::StrCat(
-          std::get<0>(info.param) == MyType::MY_FOO ? "Foo" : "Bar",
-          std::get<1>(info.param));
-      absl::c_replace_if(name, [](char c) { return !std::isalnum(c); }, '_');
-      return name;
-    });
-```
-
-## Typed Tests
-
-Suppose you have multiple implementations of the same interface and want to make
-sure that all of them satisfy some common requirements. Or, you may have defined
-several types that are supposed to conform to the same "concept" and you want to
-verify it. In both cases, you want the same test logic repeated for different
-types.
-
-While you can write one `TEST` or `TEST_F` for each type you want to test (and
-you may even factor the test logic into a function template that you invoke from
-the `TEST`), it's tedious and doesn't scale: if you want `m` tests over `n`
-types, you'll end up writing `m*n` `TEST`s.
-
-*Typed tests* allow you to repeat the same test logic over a list of types. You
-only need to write the test logic once, although you must know the type list
-when writing typed tests. Here's how you do it:
-
-First, define a fixture class template. It should be parameterized by a type.
-Remember to derive it from `::testing::Test`:
-
-```c++
-template <typename T>
-class FooTest : public testing::Test {
- public:
-  ...
-  using List = std::list<T>;
-  static T shared_;
-  T value_;
-};
-```
-
-Next, associate a list of types with the test suite, which will be repeated for
-each type in the list:
-
-```c++
-using MyTypes = ::testing::Types<char, int, unsigned int>;
-TYPED_TEST_SUITE(FooTest, MyTypes);
-```
-
-The type alias (`using` or `typedef`) is necessary for the `TYPED_TEST_SUITE`
-macro to parse correctly. Otherwise the compiler will think that each comma in
-the type list introduces a new macro argument.
-
-Then, use `TYPED_TEST()` instead of `TEST_F()` to define a typed test for this
-test suite. You can repeat this as many times as you want:
-
-```c++
-TYPED_TEST(FooTest, DoesBlah) {
-  // Inside a test, refer to the special name TypeParam to get the type
-  // parameter.  Since we are inside a derived class template, C++ requires
-  // us to visit the members of FooTest via 'this'.
-  TypeParam n = this->value_;
-
-  // To visit static members of the fixture, add the 'TestFixture::'
-  // prefix.
-  n += TestFixture::shared_;
-
-  // To refer to typedefs in the fixture, add the 'typename TestFixture::'
-  // prefix.  The 'typename' is required to satisfy the compiler.
-  typename TestFixture::List values;
-
-  values.push_back(n);
-  ...
-}
-
-TYPED_TEST(FooTest, HasPropertyA) { ... }
-```
-
-You can see [sample6_unittest.cc] for a complete example.
-
-[sample6_unittest.cc]: https://github.com/google/googletest/blob/main/googletest/samples/sample6_unittest.cc "Typed Test example"
-
-## Type-Parameterized Tests
-
-*Type-parameterized tests* are like typed tests, except that they don't require
-you to know the list of types ahead of time. Instead, you can define the test
-logic first and instantiate it with different type lists later. You can even
-instantiate it more than once in the same program.
-
-If you are designing an interface or concept, you can define a suite of
-type-parameterized tests to verify properties that any valid implementation of
-the interface/concept should have. Then, the author of each implementation can
-just instantiate the test suite with their type to verify that it conforms to
-the requirements, without having to write similar tests repeatedly. Here's an
-example:
-
-First, define a fixture class template, as we did with typed tests:
-
-```c++
-template <typename T>
-class FooTest : public testing::Test {
-  void DoSomethingInteresting();
-  ...
-};
-```
-
-Next, declare that you will define a type-parameterized test suite:
-
-```c++
-TYPED_TEST_SUITE_P(FooTest);
-```
-
-Then, use `TYPED_TEST_P()` to define a type-parameterized test. You can repeat
-this as many times as you want:
-
-```c++
-TYPED_TEST_P(FooTest, DoesBlah) {
-  // Inside a test, refer to TypeParam to get the type parameter.
-  TypeParam n = 0;
-
-  // You will need to use `this` explicitly to refer to fixture members.
-  this->DoSomethingInteresting()
-  ...
-}
-
-TYPED_TEST_P(FooTest, HasPropertyA) { ... }
-```
-
-Now the tricky part: you need to register all test patterns using the
-`REGISTER_TYPED_TEST_SUITE_P` macro before you can instantiate them. The first
-argument of the macro is the test suite name; the rest are the names of the
-tests in this test suite:
-
-```c++
-REGISTER_TYPED_TEST_SUITE_P(FooTest,
-                            DoesBlah, HasPropertyA);
-```
-
-Finally, you are free to instantiate the pattern with the types you want. If you
-put the above code in a header file, you can `#include` it in multiple C++
-source files and instantiate it multiple times.
-
-```c++
-using MyTypes = ::testing::Types<char, int, unsigned int>;
-INSTANTIATE_TYPED_TEST_SUITE_P(My, FooTest, MyTypes);
-```
-
-To distinguish different instances of the pattern, the first argument to the
-`INSTANTIATE_TYPED_TEST_SUITE_P` macro is a prefix that will be added to the
-actual test suite name. Remember to pick unique prefixes for different
-instances.
-
-In the special case where the type list contains only one type, you can write
-that type directly without `::testing::Types<...>`, like this:
-
-```c++
-INSTANTIATE_TYPED_TEST_SUITE_P(My, FooTest, int);
-```
-
-You can see [sample6_unittest.cc] for a complete example.
-
-## Testing Private Code
-
-If you change your software's internal implementation, your tests should not
-break as long as the change is not observable by users. Therefore, **per the
-black-box testing principle, most of the time you should test your code through
-its public interfaces.**
-
-**If you still find yourself needing to test internal implementation code,
-consider if there's a better design.** The desire to test internal
-implementation is often a sign that the class is doing too much. Consider
-extracting an implementation class, and testing it. Then use that implementation
-class in the original class.
-
-If you absolutely have to test non-public interface code though, you can. There
-are two cases to consider:
-
-*   Static functions ( *not* the same as static member functions!) or unnamed
-    namespaces, and
-*   Private or protected class members
-
-To test them, we use the following special techniques:
-
-*   Both static functions and definitions/declarations in an unnamed namespace
-    are only visible within the same translation unit. To test them, you can
-    `#include` the entire `.cc` file being tested in your `*_test.cc` file.
-    (#including `.cc` files is not a good way to reuse code - you should not do
-    this in production code!)
-
-    However, a better approach is to move the private code into the
-    `foo::internal` namespace, where `foo` is the namespace your project
-    normally uses, and put the private declarations in a `*-internal.h` file.
-    Your production `.cc` files and your tests are allowed to include this
-    internal header, but your clients are not. This way, you can fully test your
-    internal implementation without leaking it to your clients.
-
-*   Private class members are only accessible from within the class or by
-    friends. To access a class' private members, you can declare your test
-    fixture as a friend to the class and define accessors in your fixture. Tests
-    using the fixture can then access the private members of your production
-    class via the accessors in the fixture. Note that even though your fixture
-    is a friend to your production class, your tests are not automatically
-    friends to it, as they are technically defined in sub-classes of the
-    fixture.
-
-    Another way to test private members is to refactor them into an
-    implementation class, which is then declared in a `*-internal.h` file. Your
-    clients aren't allowed to include this header but your tests can. Such is
-    called the
-    [Pimpl](https://www.gamedev.net/articles/programming/general-and-gameplay-programming/the-c-pimpl-r1794/)
-    (Private Implementation) idiom.
-
-    Or, you can declare an individual test as a friend of your class by adding
-    this line in the class body:
-
-    ```c++
-        FRIEND_TEST(TestSuiteName, TestName);
-    ```
-
-    For example,
-
-    ```c++
-    // foo.h
-    class Foo {
-      ...
-     private:
-      FRIEND_TEST(FooTest, BarReturnsZeroOnNull);
-
-      int Bar(void* x);
-    };
-
-    // foo_test.cc
-    ...
-    TEST(FooTest, BarReturnsZeroOnNull) {
-      Foo foo;
-      EXPECT_EQ(foo.Bar(NULL), 0);  // Uses Foo's private member Bar().
-    }
-    ```
-
-    Pay special attention when your class is defined in a namespace. If you want
-    your test fixtures and tests to be friends of your class, then they must be
-    defined in the exact same namespace (no anonymous or inline namespaces).
-
-    For example, if the code to be tested looks like:
-
-    ```c++
-    namespace my_namespace {
-
-    class Foo {
-      friend class FooTest;
-      FRIEND_TEST(FooTest, Bar);
-      FRIEND_TEST(FooTest, Baz);
-      ... definition of the class Foo ...
-    };
-
-    }  // namespace my_namespace
-    ```
-
-    Your test code should be something like:
-
-    ```c++
-    namespace my_namespace {
-
-    class FooTest : public testing::Test {
-     protected:
-      ...
-    };
-
-    TEST_F(FooTest, Bar) { ... }
-    TEST_F(FooTest, Baz) { ... }
-
-    }  // namespace my_namespace
-    ```
-
-## "Catching" Failures
-
-If you are building a testing utility on top of googletest, you'll want to test
-your utility. What framework would you use to test it? googletest, of course.
-
-The challenge is to verify that your testing utility reports failures correctly.
-In frameworks that report a failure by throwing an exception, you could catch
-the exception and assert on it. But googletest doesn't use exceptions, so how do
-we test that a piece of code generates an expected failure?
-
-`"gtest/gtest-spi.h"` contains some constructs to do this.
-After #including this header, you can use
-
-```c++
-  EXPECT_FATAL_FAILURE(statement, substring);
-```
-
-to assert that `statement` generates a fatal (e.g. `ASSERT_*`) failure in the
-current thread whose message contains the given `substring`, or use
-
-```c++
-  EXPECT_NONFATAL_FAILURE(statement, substring);
-```
-
-if you are expecting a non-fatal (e.g. `EXPECT_*`) failure.
-
-Only failures in the current thread are checked to determine the result of this
-type of expectations. If `statement` creates new threads, failures in these
-threads are also ignored. If you want to catch failures in other threads as
-well, use one of the following macros instead:
-
-```c++
-  EXPECT_FATAL_FAILURE_ON_ALL_THREADS(statement, substring);
-  EXPECT_NONFATAL_FAILURE_ON_ALL_THREADS(statement, substring);
-```
-
-{: .callout .note}
-NOTE: Assertions from multiple threads are currently not supported on Windows.
-
-For technical reasons, there are some caveats:
-
-1.  You cannot stream a failure message to either macro.
-
-2.  `statement` in `EXPECT_FATAL_FAILURE{_ON_ALL_THREADS}()` cannot reference
-    local non-static variables or non-static members of `this` object.
-
-3.  `statement` in `EXPECT_FATAL_FAILURE{_ON_ALL_THREADS}()` cannot return a
-    value.
-
-## Registering tests programmatically
-
-The `TEST` macros handle the vast majority of all use cases, but there are few
-where runtime registration logic is required. For those cases, the framework
-provides the `::testing::RegisterTest` that allows callers to register arbitrary
-tests dynamically.
-
-This is an advanced API only to be used when the `TEST` macros are insufficient.
-The macros should be preferred when possible, as they avoid most of the
-complexity of calling this function.
-
-It provides the following signature:
-
-```c++
-template <typename Factory>
-TestInfo* RegisterTest(const char* test_suite_name, const char* test_name,
-                       const char* type_param, const char* value_param,
-                       const char* file, int line, Factory factory);
-```
-
-The `factory` argument is a factory callable (move-constructible) object or
-function pointer that creates a new instance of the Test object. It handles
-ownership to the caller. The signature of the callable is `Fixture*()`, where
-`Fixture` is the test fixture class for the test. All tests registered with the
-same `test_suite_name` must return the same fixture type. This is checked at
-runtime.
-
-The framework will infer the fixture class from the factory and will call the
-`SetUpTestSuite` and `TearDownTestSuite` for it.
-
-Must be called before `RUN_ALL_TESTS()` is invoked, otherwise behavior is
-undefined.
-
-Use case example:
-
-```c++
-class MyFixture : public testing::Test {
- public:
-  // All of these optional, just like in regular macro usage.
-  static void SetUpTestSuite() { ... }
-  static void TearDownTestSuite() { ... }
-  void SetUp() override { ... }
-  void TearDown() override { ... }
-};
-
-class MyTest : public MyFixture {
- public:
-  explicit MyTest(int data) : data_(data) {}
-  void TestBody() override { ... }
-
- private:
-  int data_;
-};
-
-void RegisterMyTests(const std::vector<int>& values) {
-  for (int v : values) {
-    testing::RegisterTest(
-        "MyFixture", ("Test" + std::to_string(v)).c_str(), nullptr,
-        std::to_string(v).c_str(),
-        __FILE__, __LINE__,
-        // Important to use the fixture type as the return type here.
-        [=]() -> MyFixture* { return new MyTest(v); });
-  }
-}
-...
-int main(int argc, char** argv) {
-  testing::InitGoogleTest(&argc, argv);
-  std::vector<int> values_to_test = LoadValuesFromConfig();
-  RegisterMyTests(values_to_test);
-  ...
-  return RUN_ALL_TESTS();
-}
-```
-
-## Getting the Current Test's Name
-
-Sometimes a function may need to know the name of the currently running test.
-For example, you may be using the `SetUp()` method of your test fixture to set
-the golden file name based on which test is running. The
-[`TestInfo`](reference/testing.md#TestInfo) class has this information.
-
-To obtain a `TestInfo` object for the currently running test, call
-`current_test_info()` on the [`UnitTest`](reference/testing.md#UnitTest)
-singleton object:
-
-```c++
-  // Gets information about the currently running test.
-  // Do NOT delete the returned object - it's managed by the UnitTest class.
-  const testing::TestInfo* const test_info =
-      testing::UnitTest::GetInstance()->current_test_info();
-
-  printf("We are in test %s of test suite %s.\n",
-         test_info->name(),
-         test_info->test_suite_name());
-```
-
-`current_test_info()` returns a null pointer if no test is running. In
-particular, you cannot find the test suite name in `SetUpTestSuite()`,
-`TearDownTestSuite()` (where you know the test suite name implicitly), or
-functions called from them.
-
-## Extending googletest by Handling Test Events
-
-googletest provides an **event listener API** to let you receive notifications
-about the progress of a test program and test failures. The events you can
-listen to include the start and end of the test program, a test suite, or a test
-method, among others. You may use this API to augment or replace the standard
-console output, replace the XML output, or provide a completely different form
-of output, such as a GUI or a database. You can also use test events as
-checkpoints to implement a resource leak checker, for example.
-
-### Defining Event Listeners
-
-To define a event listener, you subclass either
-[`testing::TestEventListener`](reference/testing.md#TestEventListener) or
-[`testing::EmptyTestEventListener`](reference/testing.md#EmptyTestEventListener)
-The former is an (abstract) interface, where *each pure virtual method can be
-overridden to handle a test event* (For example, when a test starts, the
-`OnTestStart()` method will be called.). The latter provides an empty
-implementation of all methods in the interface, such that a subclass only needs
-to override the methods it cares about.
-
-When an event is fired, its context is passed to the handler function as an
-argument. The following argument types are used:
-
-*   UnitTest reflects the state of the entire test program,
-*   TestSuite has information about a test suite, which can contain one or more
-    tests,
-*   TestInfo contains the state of a test, and
-*   TestPartResult represents the result of a test assertion.
-
-An event handler function can examine the argument it receives to find out
-interesting information about the event and the test program's state.
-
-Here's an example:
-
-```c++
-  class MinimalistPrinter : public testing::EmptyTestEventListener {
-    // Called before a test starts.
-    void OnTestStart(const testing::TestInfo& test_info) override {
-      printf("*** Test %s.%s starting.\n",
-             test_info.test_suite_name(), test_info.name());
-    }
-
-    // Called after a failed assertion or a SUCCESS().
-    void OnTestPartResult(const testing::TestPartResult& test_part_result) override {
-      printf("%s in %s:%d\n%s\n",
-             test_part_result.failed() ? "*** Failure" : "Success",
-             test_part_result.file_name(),
-             test_part_result.line_number(),
-             test_part_result.summary());
-    }
-
-    // Called after a test ends.
-    void OnTestEnd(const testing::TestInfo& test_info) override {
-      printf("*** Test %s.%s ending.\n",
-             test_info.test_suite_name(), test_info.name());
-    }
-  };
-```
-
-### Using Event Listeners
-
-To use the event listener you have defined, add an instance of it to the
-googletest event listener list (represented by class
-[`TestEventListeners`](reference/testing.md#TestEventListeners) - note the "s"
-at the end of the name) in your `main()` function, before calling
-`RUN_ALL_TESTS()`:
-
-```c++
-int main(int argc, char** argv) {
-  testing::InitGoogleTest(&argc, argv);
-  // Gets hold of the event listener list.
-  testing::TestEventListeners& listeners =
-      testing::UnitTest::GetInstance()->listeners();
-  // Adds a listener to the end.  googletest takes the ownership.
-  listeners.Append(new MinimalistPrinter);
-  return RUN_ALL_TESTS();
-}
-```
-
-There's only one problem: the default test result printer is still in effect, so
-its output will mingle with the output from your minimalist printer. To suppress
-the default printer, just release it from the event listener list and delete it.
-You can do so by adding one line:
-
-```c++
-  ...
-  delete listeners.Release(listeners.default_result_printer());
-  listeners.Append(new MinimalistPrinter);
-  return RUN_ALL_TESTS();
-```
-
-Now, sit back and enjoy a completely different output from your tests. For more
-details, see [sample9_unittest.cc].
-
-[sample9_unittest.cc]: https://github.com/google/googletest/blob/main/googletest/samples/sample9_unittest.cc "Event listener example"
-
-You may append more than one listener to the list. When an `On*Start()` or
-`OnTestPartResult()` event is fired, the listeners will receive it in the order
-they appear in the list (since new listeners are added to the end of the list,
-the default text printer and the default XML generator will receive the event
-first). An `On*End()` event will be received by the listeners in the *reverse*
-order. This allows output by listeners added later to be framed by output from
-listeners added earlier.
-
-### Generating Failures in Listeners
-
-You may use failure-raising macros (`EXPECT_*()`, `ASSERT_*()`, `FAIL()`, etc)
-when processing an event. There are some restrictions:
-
-1.  You cannot generate any failure in `OnTestPartResult()` (otherwise it will
-    cause `OnTestPartResult()` to be called recursively).
-2.  A listener that handles `OnTestPartResult()` is not allowed to generate any
-    failure.
-
-When you add listeners to the listener list, you should put listeners that
-handle `OnTestPartResult()` *before* listeners that can generate failures. This
-ensures that failures generated by the latter are attributed to the right test
-by the former.
-
-See [sample10_unittest.cc] for an example of a failure-raising listener.
-
-[sample10_unittest.cc]: https://github.com/google/googletest/blob/main/googletest/samples/sample10_unittest.cc "Failure-raising listener example"
-
-## Running Test Programs: Advanced Options
-
-googletest test programs are ordinary executables. Once built, you can run them
-directly and affect their behavior via the following environment variables
-and/or command line flags. For the flags to work, your programs must call
-`::testing::InitGoogleTest()` before calling `RUN_ALL_TESTS()`.
-
-To see a list of supported flags and their usage, please run your test program
-with the `--help` flag. You can also use `-h`, `-?`, or `/?` for short.
-
-If an option is specified both by an environment variable and by a flag, the
-latter takes precedence.
-
-### Selecting Tests
-
-#### Listing Test Names
-
-Sometimes it is necessary to list the available tests in a program before
-running them so that a filter may be applied if needed. Including the flag
-`--gtest_list_tests` overrides all other flags and lists tests in the following
-format:
-
-```none
-TestSuite1.
-  TestName1
-  TestName2
-TestSuite2.
-  TestName
-```
-
-None of the tests listed are actually run if the flag is provided. There is no
-corresponding environment variable for this flag.
-
-#### Running a Subset of the Tests
-
-By default, a googletest program runs all tests the user has defined. Sometimes,
-you want to run only a subset of the tests (e.g. for debugging or quickly
-verifying a change). If you set the `GTEST_FILTER` environment variable or the
-`--gtest_filter` flag to a filter string, googletest will only run the tests
-whose full names (in the form of `TestSuiteName.TestName`) match the filter.
-
-The format of a filter is a '`:`'-separated list of wildcard patterns (called
-the *positive patterns*) optionally followed by a '`-`' and another
-'`:`'-separated pattern list (called the *negative patterns*). A test matches
-the filter if and only if it matches any of the positive patterns but does not
-match any of the negative patterns.
-
-A pattern may contain `'*'` (matches any string) or `'?'` (matches any single
-character). For convenience, the filter `'*-NegativePatterns'` can be also
-written as `'-NegativePatterns'`.
-
-For example:
-
-*   `./foo_test` Has no flag, and thus runs all its tests.
-*   `./foo_test --gtest_filter=*` Also runs everything, due to the single
-    match-everything `*` value.
-*   `./foo_test --gtest_filter=FooTest.*` Runs everything in test suite
-    `FooTest` .
-*   `./foo_test --gtest_filter=*Null*:*Constructor*` Runs any test whose full
-    name contains either `"Null"` or `"Constructor"` .
-*   `./foo_test --gtest_filter=-*DeathTest.*` Runs all non-death tests.
-*   `./foo_test --gtest_filter=FooTest.*-FooTest.Bar` Runs everything in test
-    suite `FooTest` except `FooTest.Bar`.
-*   `./foo_test --gtest_filter=FooTest.*:BarTest.*-FooTest.Bar:BarTest.Foo` Runs
-    everything in test suite `FooTest` except `FooTest.Bar` and everything in
-    test suite `BarTest` except `BarTest.Foo`.
-
-#### Stop test execution upon first failure
-
-By default, a googletest program runs all tests the user has defined. In some
-cases (e.g. iterative test development & execution) it may be desirable stop
-test execution upon first failure (trading improved latency for completeness).
-If `GTEST_FAIL_FAST` environment variable or `--gtest_fail_fast` flag is set,
-the test runner will stop execution as soon as the first test failure is found.
-
-#### Temporarily Disabling Tests
-
-If you have a broken test that you cannot fix right away, you can add the
-`DISABLED_` prefix to its name. This will exclude it from execution. This is
-better than commenting out the code or using `#if 0`, as disabled tests are
-still compiled (and thus won't rot).
-
-If you need to disable all tests in a test suite, you can either add `DISABLED_`
-to the front of the name of each test, or alternatively add it to the front of
-the test suite name.
-
-For example, the following tests won't be run by googletest, even though they
-will still be compiled:
-
-```c++
-// Tests that Foo does Abc.
-TEST(FooTest, DISABLED_DoesAbc) { ... }
-
-class DISABLED_BarTest : public testing::Test { ... };
-
-// Tests that Bar does Xyz.
-TEST_F(DISABLED_BarTest, DoesXyz) { ... }
-```
-
-{: .callout .note}
-NOTE: This feature should only be used for temporary pain-relief. You still have
-to fix the disabled tests at a later date. As a reminder, googletest will print
-a banner warning you if a test program contains any disabled tests.
-
-{: .callout .tip}
-TIP: You can easily count the number of disabled tests you have using
-`grep`. This number can be used as a metric for
-improving your test quality.
-
-#### Temporarily Enabling Disabled Tests
-
-To include disabled tests in test execution, just invoke the test program with
-the `--gtest_also_run_disabled_tests` flag or set the
-`GTEST_ALSO_RUN_DISABLED_TESTS` environment variable to a value other than `0`.
-You can combine this with the `--gtest_filter` flag to further select which
-disabled tests to run.
-
-### Repeating the Tests
-
-Once in a while you'll run into a test whose result is hit-or-miss. Perhaps it
-will fail only 1% of the time, making it rather hard to reproduce the bug under
-a debugger. This can be a major source of frustration.
-
-The `--gtest_repeat` flag allows you to repeat all (or selected) test methods in
-a program many times. Hopefully, a flaky test will eventually fail and give you
-a chance to debug. Here's how to use it:
-
-```none
-$ foo_test --gtest_repeat=1000
-Repeat foo_test 1000 times and don't stop at failures.
-
-$ foo_test --gtest_repeat=-1
-A negative count means repeating forever.
-
-$ foo_test --gtest_repeat=1000 --gtest_break_on_failure
-Repeat foo_test 1000 times, stopping at the first failure.  This
-is especially useful when running under a debugger: when the test
-fails, it will drop into the debugger and you can then inspect
-variables and stacks.
-
-$ foo_test --gtest_repeat=1000 --gtest_filter=FooBar.*
-Repeat the tests whose name matches the filter 1000 times.
-```
-
-If your test program contains
-[global set-up/tear-down](#global-set-up-and-tear-down) code, it will be
-repeated in each iteration as well, as the flakiness may be in it. To avoid
-repeating global set-up/tear-down, specify
-`--gtest_recreate_environments_when_repeating=false`{.nowrap}.
-
-You can also specify the repeat count by setting the `GTEST_REPEAT` environment
-variable.
-
-### Shuffling the Tests
-
-You can specify the `--gtest_shuffle` flag (or set the `GTEST_SHUFFLE`
-environment variable to `1`) to run the tests in a program in a random order.
-This helps to reveal bad dependencies between tests.
-
-By default, googletest uses a random seed calculated from the current time.
-Therefore you'll get a different order every time. The console output includes
-the random seed value, such that you can reproduce an order-related test failure
-later. To specify the random seed explicitly, use the `--gtest_random_seed=SEED`
-flag (or set the `GTEST_RANDOM_SEED` environment variable), where `SEED` is an
-integer in the range [0, 99999]. The seed value 0 is special: it tells
-googletest to do the default behavior of calculating the seed from the current
-time.
-
-If you combine this with `--gtest_repeat=N`, googletest will pick a different
-random seed and re-shuffle the tests in each iteration.
-
-### Distributing Test Functions to Multiple Machines
-
-If you have more than one machine you can use to run a test program, you might
-want to run the test functions in parallel and get the result faster. We call
-this technique *sharding*, where each machine is called a *shard*.
-
-GoogleTest is compatible with test sharding. To take advantage of this feature,
-your test runner (not part of GoogleTest) needs to do the following:
-
-1.  Allocate a number of machines (shards) to run the tests.
-1.  On each shard, set the `GTEST_TOTAL_SHARDS` environment variable to the total
-    number of shards. It must be the same for all shards.
-1.  On each shard, set the `GTEST_SHARD_INDEX` environment variable to the index
-    of the shard. Different shards must be assigned different indices, which
-    must be in the range `[0, GTEST_TOTAL_SHARDS - 1]`.
-1.  Run the same test program on all shards. When GoogleTest sees the above two
-    environment variables, it will select a subset of the test functions to run.
-    Across all shards, each test function in the program will be run exactly
-    once.
-1.  Wait for all shards to finish, then collect and report the results.
-
-Your project may have tests that were written without GoogleTest and thus don't
-understand this protocol. In order for your test runner to figure out which test
-supports sharding, it can set the environment variable `GTEST_SHARD_STATUS_FILE`
-to a non-existent file path. If a test program supports sharding, it will create
-this file to acknowledge that fact; otherwise it will not create it. The actual
-contents of the file are not important at this time, although we may put some
-useful information in it in the future.
-
-Here's an example to make it clear. Suppose you have a test program `foo_test`
-that contains the following 5 test functions:
-
-```
-TEST(A, V)
-TEST(A, W)
-TEST(B, X)
-TEST(B, Y)
-TEST(B, Z)
-```
-
-Suppose you have 3 machines at your disposal. To run the test functions in
-parallel, you would set `GTEST_TOTAL_SHARDS` to 3 on all machines, and set
-`GTEST_SHARD_INDEX` to 0, 1, and 2 on the machines respectively. Then you would
-run the same `foo_test` on each machine.
-
-GoogleTest reserves the right to change how the work is distributed across the
-shards, but here's one possible scenario:
-
-*   Machine #0 runs `A.V` and `B.X`.
-*   Machine #1 runs `A.W` and `B.Y`.
-*   Machine #2 runs `B.Z`.
-
-### Controlling Test Output
-
-#### Colored Terminal Output
-
-googletest can use colors in its terminal output to make it easier to spot the
-important information:
-
-<pre>...
-<font color="green">[----------]</font> 1 test from FooTest
-<font color="green">[ RUN      ]</font> FooTest.DoesAbc
-<font color="green">[       OK ]</font> FooTest.DoesAbc
-<font color="green">[----------]</font> 2 tests from BarTest
-<font color="green">[ RUN      ]</font> BarTest.HasXyzProperty
-<font color="green">[       OK ]</font> BarTest.HasXyzProperty
-<font color="green">[ RUN      ]</font> BarTest.ReturnsTrueOnSuccess
-... some error messages ...
-<font color="red">[   FAILED ]</font> BarTest.ReturnsTrueOnSuccess
-...
-<font color="green">[==========]</font> 30 tests from 14 test suites ran.
-<font color="green">[   PASSED ]</font> 28 tests.
-<font color="red">[   FAILED ]</font> 2 tests, listed below:
-<font color="red">[   FAILED ]</font> BarTest.ReturnsTrueOnSuccess
-<font color="red">[   FAILED ]</font> AnotherTest.DoesXyz
-
- 2 FAILED TESTS
-</pre>
-
-You can set the `GTEST_COLOR` environment variable or the `--gtest_color`
-command line flag to `yes`, `no`, or `auto` (the default) to enable colors,
-disable colors, or let googletest decide. When the value is `auto`, googletest
-will use colors if and only if the output goes to a terminal and (on non-Windows
-platforms) the `TERM` environment variable is set to `xterm` or `xterm-color`.
-
-#### Suppressing test passes
-
-By default, googletest prints 1 line of output for each test, indicating if it
-passed or failed. To show only test failures, run the test program with
-`--gtest_brief=1`, or set the GTEST_BRIEF environment variable to `1`.
-
-#### Suppressing the Elapsed Time
-
-By default, googletest prints the time it takes to run each test. To disable
-that, run the test program with the `--gtest_print_time=0` command line flag, or
-set the GTEST_PRINT_TIME environment variable to `0`.
-
-#### Suppressing UTF-8 Text Output
-
-In case of assertion failures, googletest prints expected and actual values of
-type `string` both as hex-encoded strings as well as in readable UTF-8 text if
-they contain valid non-ASCII UTF-8 characters. If you want to suppress the UTF-8
-text because, for example, you don't have an UTF-8 compatible output medium, run
-the test program with `--gtest_print_utf8=0` or set the `GTEST_PRINT_UTF8`
-environment variable to `0`.
-
-#### Generating an XML Report
-
-googletest can emit a detailed XML report to a file in addition to its normal
-textual output. The report contains the duration of each test, and thus can help
-you identify slow tests.
-
-To generate the XML report, set the `GTEST_OUTPUT` environment variable or the
-`--gtest_output` flag to the string `"xml:path_to_output_file"`, which will
-create the file at the given location. You can also just use the string `"xml"`,
-in which case the output can be found in the `test_detail.xml` file in the
-current directory.
-
-If you specify a directory (for example, `"xml:output/directory/"` on Linux or
-`"xml:output\directory\"` on Windows), googletest will create the XML file in
-that directory, named after the test executable (e.g. `foo_test.xml` for test
-program `foo_test` or `foo_test.exe`). If the file already exists (perhaps left
-over from a previous run), googletest will pick a different name (e.g.
-`foo_test_1.xml`) to avoid overwriting it.
-
-The report is based on the `junitreport` Ant task. Since that format was
-originally intended for Java, a little interpretation is required to make it
-apply to googletest tests, as shown here:
-
-```xml
-<testsuites name="AllTests" ...>
-  <testsuite name="test_case_name" ...>
-    <testcase    name="test_name" ...>
-      <failure message="..."/>
-      <failure message="..."/>
-      <failure message="..."/>
-    </testcase>
-  </testsuite>
-</testsuites>
-```
-
-*   The root `<testsuites>` element corresponds to the entire test program.
-*   `<testsuite>` elements correspond to googletest test suites.
-*   `<testcase>` elements correspond to googletest test functions.
-
-For instance, the following program
-
-```c++
-TEST(MathTest, Addition) { ... }
-TEST(MathTest, Subtraction) { ... }
-TEST(LogicTest, NonContradiction) { ... }
-```
-
-could generate this report:
-
-```xml
-<?xml version="1.0" encoding="UTF-8"?>
-<testsuites tests="3" failures="1" errors="0" time="0.035" timestamp="2011-10-31T18:52:42" name="AllTests">
-  <testsuite name="MathTest" tests="2" failures="1" errors="0" time="0.015">
-    <testcase name="Addition" file="test.cpp" line="1" status="run" time="0.007" classname="">
-      <failure message="Value of: add(1, 1)&#x0A;  Actual: 3&#x0A;Expected: 2" type="">...</failure>
-      <failure message="Value of: add(1, -1)&#x0A;  Actual: 1&#x0A;Expected: 0" type="">...</failure>
-    </testcase>
-    <testcase name="Subtraction" file="test.cpp" line="2" status="run" time="0.005" classname="">
-    </testcase>
-  </testsuite>
-  <testsuite name="LogicTest" tests="1" failures="0" errors="0" time="0.005">
-    <testcase name="NonContradiction" file="test.cpp" line="3" status="run" time="0.005" classname="">
-    </testcase>
-  </testsuite>
-</testsuites>
-```
-
-Things to note:
-
-*   The `tests` attribute of a `<testsuites>` or `<testsuite>` element tells how
-    many test functions the googletest program or test suite contains, while the
-    `failures` attribute tells how many of them failed.
-
-*   The `time` attribute expresses the duration of the test, test suite, or
-    entire test program in seconds.
-
-*   The `timestamp` attribute records the local date and time of the test
-    execution.
-
-*   The `file` and `line` attributes record the source file location, where the
-    test was defined.
-
-*   Each `<failure>` element corresponds to a single failed googletest
-    assertion.
-
-#### Generating a JSON Report
-
-googletest can also emit a JSON report as an alternative format to XML. To
-generate the JSON report, set the `GTEST_OUTPUT` environment variable or the
-`--gtest_output` flag to the string `"json:path_to_output_file"`, which will
-create the file at the given location. You can also just use the string
-`"json"`, in which case the output can be found in the `test_detail.json` file
-in the current directory.
-
-The report format conforms to the following JSON Schema:
-
-```json
-{
-  "$schema": "http://json-schema.org/schema#",
-  "type": "object",
-  "definitions": {
-    "TestCase": {
-      "type": "object",
-      "properties": {
-        "name": { "type": "string" },
-        "tests": { "type": "integer" },
-        "failures": { "type": "integer" },
-        "disabled": { "type": "integer" },
-        "time": { "type": "string" },
-        "testsuite": {
-          "type": "array",
-          "items": {
-            "$ref": "#/definitions/TestInfo"
-          }
-        }
-      }
-    },
-    "TestInfo": {
-      "type": "object",
-      "properties": {
-        "name": { "type": "string" },
-        "file": { "type": "string" },
-        "line": { "type": "integer" },
-        "status": {
-          "type": "string",
-          "enum": ["RUN", "NOTRUN"]
-        },
-        "time": { "type": "string" },
-        "classname": { "type": "string" },
-        "failures": {
-          "type": "array",
-          "items": {
-            "$ref": "#/definitions/Failure"
-          }
-        }
-      }
-    },
-    "Failure": {
-      "type": "object",
-      "properties": {
-        "failures": { "type": "string" },
-        "type": { "type": "string" }
-      }
-    }
-  },
-  "properties": {
-    "tests": { "type": "integer" },
-    "failures": { "type": "integer" },
-    "disabled": { "type": "integer" },
-    "errors": { "type": "integer" },
-    "timestamp": {
-      "type": "string",
-      "format": "date-time"
-    },
-    "time": { "type": "string" },
-    "name": { "type": "string" },
-    "testsuites": {
-      "type": "array",
-      "items": {
-        "$ref": "#/definitions/TestCase"
-      }
-    }
-  }
-}
-```
-
-The report uses the format that conforms to the following Proto3 using the
-[JSON encoding](https://developers.google.com/protocol-buffers/docs/proto3#json):
-
-```proto
-syntax = "proto3";
-
-package googletest;
-
-import "google/protobuf/timestamp.proto";
-import "google/protobuf/duration.proto";
-
-message UnitTest {
-  int32 tests = 1;
-  int32 failures = 2;
-  int32 disabled = 3;
-  int32 errors = 4;
-  google.protobuf.Timestamp timestamp = 5;
-  google.protobuf.Duration time = 6;
-  string name = 7;
-  repeated TestCase testsuites = 8;
-}
-
-message TestCase {
-  string name = 1;
-  int32 tests = 2;
-  int32 failures = 3;
-  int32 disabled = 4;
-  int32 errors = 5;
-  google.protobuf.Duration time = 6;
-  repeated TestInfo testsuite = 7;
-}
-
-message TestInfo {
-  string name = 1;
-  string file = 6;
-  int32 line = 7;
-  enum Status {
-    RUN = 0;
-    NOTRUN = 1;
-  }
-  Status status = 2;
-  google.protobuf.Duration time = 3;
-  string classname = 4;
-  message Failure {
-    string failures = 1;
-    string type = 2;
-  }
-  repeated Failure failures = 5;
-}
-```
-
-For instance, the following program
-
-```c++
-TEST(MathTest, Addition) { ... }
-TEST(MathTest, Subtraction) { ... }
-TEST(LogicTest, NonContradiction) { ... }
-```
-
-could generate this report:
-
-```json
-{
-  "tests": 3,
-  "failures": 1,
-  "errors": 0,
-  "time": "0.035s",
-  "timestamp": "2011-10-31T18:52:42Z",
-  "name": "AllTests",
-  "testsuites": [
-    {
-      "name": "MathTest",
-      "tests": 2,
-      "failures": 1,
-      "errors": 0,
-      "time": "0.015s",
-      "testsuite": [
-        {
-          "name": "Addition",
-          "file": "test.cpp",
-          "line": 1,
-          "status": "RUN",
-          "time": "0.007s",
-          "classname": "",
-          "failures": [
-            {
-              "message": "Value of: add(1, 1)\n  Actual: 3\nExpected: 2",
-              "type": ""
-            },
-            {
-              "message": "Value of: add(1, -1)\n  Actual: 1\nExpected: 0",
-              "type": ""
-            }
-          ]
-        },
-        {
-          "name": "Subtraction",
-          "file": "test.cpp",
-          "line": 2,
-          "status": "RUN",
-          "time": "0.005s",
-          "classname": ""
-        }
-      ]
-    },
-    {
-      "name": "LogicTest",
-      "tests": 1,
-      "failures": 0,
-      "errors": 0,
-      "time": "0.005s",
-      "testsuite": [
-        {
-          "name": "NonContradiction",
-          "file": "test.cpp",
-          "line": 3,
-          "status": "RUN",
-          "time": "0.005s",
-          "classname": ""
-        }
-      ]
-    }
-  ]
-}
-```
-
-{: .callout .important}
-IMPORTANT: The exact format of the JSON document is subject to change.
-
-### Controlling How Failures Are Reported
-
-#### Detecting Test Premature Exit
-
-Google Test implements the _premature-exit-file_ protocol for test runners to
-catch any kind of unexpected exits of test programs. Upon start, Google Test
-creates the file which will be automatically deleted after all work has been
-finished. Then, the test runner can check if this file exists. In case the file
-remains undeleted, the inspected test has exited prematurely.
-
-This feature is enabled only if the `TEST_PREMATURE_EXIT_FILE` environment
-variable has been set.
-
-#### Turning Assertion Failures into Break-Points
-
-When running test programs under a debugger, it's very convenient if the
-debugger can catch an assertion failure and automatically drop into interactive
-mode. googletest's *break-on-failure* mode supports this behavior.
-
-To enable it, set the `GTEST_BREAK_ON_FAILURE` environment variable to a value
-other than `0`. Alternatively, you can use the `--gtest_break_on_failure`
-command line flag.
-
-#### Disabling Catching Test-Thrown Exceptions
-
-googletest can be used either with or without exceptions enabled. If a test
-throws a C++ exception or (on Windows) a structured exception (SEH), by default
-googletest catches it, reports it as a test failure, and continues with the next
-test method. This maximizes the coverage of a test run. Also, on Windows an
-uncaught exception will cause a pop-up window, so catching the exceptions allows
-you to run the tests automatically.
-
-When debugging the test failures, however, you may instead want the exceptions
-to be handled by the debugger, such that you can examine the call stack when an
-exception is thrown. To achieve that, set the `GTEST_CATCH_EXCEPTIONS`
-environment variable to `0`, or use the `--gtest_catch_exceptions=0` flag when
-running the tests.
-
-### Sanitizer Integration
-
-The
-[Undefined Behavior Sanitizer](https://clang.llvm.org/docs/UndefinedBehaviorSanitizer.html),
-[Address Sanitizer](https://github.com/google/sanitizers/wiki/AddressSanitizer),
-and
-[Thread Sanitizer](https://github.com/google/sanitizers/wiki/ThreadSanitizerCppManual)
-all provide weak functions that you can override to trigger explicit failures
-when they detect sanitizer errors, such as creating a reference from `nullptr`.
-To override these functions, place definitions for them in a source file that
-you compile as part of your main binary:
-
-```
-extern "C" {
-void __ubsan_on_report() {
-  FAIL() << "Encountered an undefined behavior sanitizer error";
-}
-void __asan_on_error() {
-  FAIL() << "Encountered an address sanitizer error";
-}
-void __tsan_on_report() {
-  FAIL() << "Encountered a thread sanitizer error";
-}
-}  // extern "C"
-```
-
-After compiling your project with one of the sanitizers enabled, if a particular
-test triggers a sanitizer error, googletest will report that it failed.
diff --git a/3rdparty/googletest-1.13.0/docs/assets/css/style.scss b/3rdparty/googletest-1.13.0/docs/assets/css/style.scss
deleted file mode 100644
index bb30f418da7b92d8eaa1fd63caac99aa0576e91c..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/docs/assets/css/style.scss
+++ /dev/null
@@ -1,5 +0,0 @@
----
----
-
-@import "jekyll-theme-primer";
-@import "main";
diff --git a/3rdparty/googletest-1.13.0/docs/community_created_documentation.md b/3rdparty/googletest-1.13.0/docs/community_created_documentation.md
deleted file mode 100644
index 4569075ff23be385fce1656a8db2f77631d00d42..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/docs/community_created_documentation.md
+++ /dev/null
@@ -1,7 +0,0 @@
-# Community-Created Documentation
-
-The following is a list, in no particular order, of links to documentation
-created by the Googletest community.
-
-*   [Googlemock Insights](https://github.com/ElectricRCAircraftGuy/eRCaGuy_dotfiles/blob/master/googletest/insights.md),
-    by [ElectricRCAircraftGuy](https://github.com/ElectricRCAircraftGuy)
diff --git a/3rdparty/googletest-1.13.0/docs/faq.md b/3rdparty/googletest-1.13.0/docs/faq.md
deleted file mode 100644
index 1928097292a238a81269a01c8e6bd16c96c61b9b..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/docs/faq.md
+++ /dev/null
@@ -1,692 +0,0 @@
-# GoogleTest FAQ
-
-## Why should test suite names and test names not contain underscore?
-
-{: .callout .note}
-Note: GoogleTest reserves underscore (`_`) for special purpose keywords, such as
-[the `DISABLED_` prefix](advanced.md#temporarily-disabling-tests), in addition
-to the following rationale.
-
-Underscore (`_`) is special, as C++ reserves the following to be used by the
-compiler and the standard library:
-
-1.  any identifier that starts with an `_` followed by an upper-case letter, and
-2.  any identifier that contains two consecutive underscores (i.e. `__`)
-    *anywhere* in its name.
-
-User code is *prohibited* from using such identifiers.
-
-Now let's look at what this means for `TEST` and `TEST_F`.
-
-Currently `TEST(TestSuiteName, TestName)` generates a class named
-`TestSuiteName_TestName_Test`. What happens if `TestSuiteName` or `TestName`
-contains `_`?
-
-1.  If `TestSuiteName` starts with an `_` followed by an upper-case letter (say,
-    `_Foo`), we end up with `_Foo_TestName_Test`, which is reserved and thus
-    invalid.
-2.  If `TestSuiteName` ends with an `_` (say, `Foo_`), we get
-    `Foo__TestName_Test`, which is invalid.
-3.  If `TestName` starts with an `_` (say, `_Bar`), we get
-    `TestSuiteName__Bar_Test`, which is invalid.
-4.  If `TestName` ends with an `_` (say, `Bar_`), we get
-    `TestSuiteName_Bar__Test`, which is invalid.
-
-So clearly `TestSuiteName` and `TestName` cannot start or end with `_`
-(Actually, `TestSuiteName` can start with `_` -- as long as the `_` isn't
-followed by an upper-case letter. But that's getting complicated. So for
-simplicity we just say that it cannot start with `_`.).
-
-It may seem fine for `TestSuiteName` and `TestName` to contain `_` in the
-middle. However, consider this:
-
-```c++
-TEST(Time, Flies_Like_An_Arrow) { ... }
-TEST(Time_Flies, Like_An_Arrow) { ... }
-```
-
-Now, the two `TEST`s will both generate the same class
-(`Time_Flies_Like_An_Arrow_Test`). That's not good.
-
-So for simplicity, we just ask the users to avoid `_` in `TestSuiteName` and
-`TestName`. The rule is more constraining than necessary, but it's simple and
-easy to remember. It also gives GoogleTest some wiggle room in case its
-implementation needs to change in the future.
-
-If you violate the rule, there may not be immediate consequences, but your test
-may (just may) break with a new compiler (or a new version of the compiler you
-are using) or with a new version of GoogleTest. Therefore it's best to follow
-the rule.
-
-## Why does GoogleTest support `EXPECT_EQ(NULL, ptr)` and `ASSERT_EQ(NULL, ptr)` but not `EXPECT_NE(NULL, ptr)` and `ASSERT_NE(NULL, ptr)`?
-
-First of all, you can use `nullptr` with each of these macros, e.g.
-`EXPECT_EQ(ptr, nullptr)`, `EXPECT_NE(ptr, nullptr)`, `ASSERT_EQ(ptr, nullptr)`,
-`ASSERT_NE(ptr, nullptr)`. This is the preferred syntax in the style guide
-because `nullptr` does not have the type problems that `NULL` does.
-
-Due to some peculiarity of C++, it requires some non-trivial template meta
-programming tricks to support using `NULL` as an argument of the `EXPECT_XX()`
-and `ASSERT_XX()` macros. Therefore we only do it where it's most needed
-(otherwise we make the implementation of GoogleTest harder to maintain and more
-error-prone than necessary).
-
-Historically, the `EXPECT_EQ()` macro took the *expected* value as its first
-argument and the *actual* value as the second, though this argument order is now
-discouraged. It was reasonable that someone wanted
-to write `EXPECT_EQ(NULL, some_expression)`, and this indeed was requested
-several times. Therefore we implemented it.
-
-The need for `EXPECT_NE(NULL, ptr)` wasn't nearly as strong. When the assertion
-fails, you already know that `ptr` must be `NULL`, so it doesn't add any
-information to print `ptr` in this case. That means `EXPECT_TRUE(ptr != NULL)`
-works just as well.
-
-If we were to support `EXPECT_NE(NULL, ptr)`, for consistency we'd have to
-support `EXPECT_NE(ptr, NULL)` as well. This means using the template meta
-programming tricks twice in the implementation, making it even harder to
-understand and maintain. We believe the benefit doesn't justify the cost.
-
-Finally, with the growth of the gMock matcher library, we are encouraging people
-to use the unified `EXPECT_THAT(value, matcher)` syntax more often in tests. One
-significant advantage of the matcher approach is that matchers can be easily
-combined to form new matchers, while the `EXPECT_NE`, etc, macros cannot be
-easily combined. Therefore we want to invest more in the matchers than in the
-`EXPECT_XX()` macros.
-
-## I need to test that different implementations of an interface satisfy some common requirements. Should I use typed tests or value-parameterized tests?
-
-For testing various implementations of the same interface, either typed tests or
-value-parameterized tests can get it done. It's really up to you the user to
-decide which is more convenient for you, depending on your particular case. Some
-rough guidelines:
-
-*   Typed tests can be easier to write if instances of the different
-    implementations can be created the same way, modulo the type. For example,
-    if all these implementations have a public default constructor (such that
-    you can write `new TypeParam`), or if their factory functions have the same
-    form (e.g. `CreateInstance<TypeParam>()`).
-*   Value-parameterized tests can be easier to write if you need different code
-    patterns to create different implementations' instances, e.g. `new Foo` vs
-    `new Bar(5)`. To accommodate for the differences, you can write factory
-    function wrappers and pass these function pointers to the tests as their
-    parameters.
-*   When a typed test fails, the default output includes the name of the type,
-    which can help you quickly identify which implementation is wrong.
-    Value-parameterized tests only show the number of the failed iteration by
-    default. You will need to define a function that returns the iteration name
-    and pass it as the third parameter to INSTANTIATE_TEST_SUITE_P to have more
-    useful output.
-*   When using typed tests, you need to make sure you are testing against the
-    interface type, not the concrete types (in other words, you want to make
-    sure `implicit_cast<MyInterface*>(my_concrete_impl)` works, not just that
-    `my_concrete_impl` works). It's less likely to make mistakes in this area
-    when using value-parameterized tests.
-
-I hope I didn't confuse you more. :-) If you don't mind, I'd suggest you to give
-both approaches a try. Practice is a much better way to grasp the subtle
-differences between the two tools. Once you have some concrete experience, you
-can much more easily decide which one to use the next time.
-
-## I got some run-time errors about invalid proto descriptors when using `ProtocolMessageEquals`. Help!
-
-{: .callout .note}
-**Note:** `ProtocolMessageEquals` and `ProtocolMessageEquiv` are *deprecated*
-now. Please use `EqualsProto`, etc instead.
-
-`ProtocolMessageEquals` and `ProtocolMessageEquiv` were redefined recently and
-are now less tolerant of invalid protocol buffer definitions. In particular, if
-you have a `foo.proto` that doesn't fully qualify the type of a protocol message
-it references (e.g. `message<Bar>` where it should be `message<blah.Bar>`), you
-will now get run-time errors like:
-
-```
-... descriptor.cc:...] Invalid proto descriptor for file "path/to/foo.proto":
-... descriptor.cc:...]  blah.MyMessage.my_field: ".Bar" is not defined.
-```
-
-If you see this, your `.proto` file is broken and needs to be fixed by making
-the types fully qualified. The new definition of `ProtocolMessageEquals` and
-`ProtocolMessageEquiv` just happen to reveal your bug.
-
-## My death test modifies some state, but the change seems lost after the death test finishes. Why?
-
-Death tests (`EXPECT_DEATH`, etc) are executed in a sub-process s.t. the
-expected crash won't kill the test program (i.e. the parent process). As a
-result, any in-memory side effects they incur are observable in their respective
-sub-processes, but not in the parent process. You can think of them as running
-in a parallel universe, more or less.
-
-In particular, if you use mocking and the death test statement invokes some mock
-methods, the parent process will think the calls have never occurred. Therefore,
-you may want to move your `EXPECT_CALL` statements inside the `EXPECT_DEATH`
-macro.
-
-## EXPECT_EQ(htonl(blah), blah_blah) generates weird compiler errors in opt mode. Is this a GoogleTest bug?
-
-Actually, the bug is in `htonl()`.
-
-According to `'man htonl'`, `htonl()` is a *function*, which means it's valid to
-use `htonl` as a function pointer. However, in opt mode `htonl()` is defined as
-a *macro*, which breaks this usage.
-
-Worse, the macro definition of `htonl()` uses a `gcc` extension and is *not*
-standard C++. That hacky implementation has some ad hoc limitations. In
-particular, it prevents you from writing `Foo<sizeof(htonl(x))>()`, where `Foo`
-is a template that has an integral argument.
-
-The implementation of `EXPECT_EQ(a, b)` uses `sizeof(... a ...)` inside a
-template argument, and thus doesn't compile in opt mode when `a` contains a call
-to `htonl()`. It is difficult to make `EXPECT_EQ` bypass the `htonl()` bug, as
-the solution must work with different compilers on various platforms.
-
-## The compiler complains about "undefined references" to some static const member variables, but I did define them in the class body. What's wrong?
-
-If your class has a static data member:
-
-```c++
-// foo.h
-class Foo {
-  ...
-  static const int kBar = 100;
-};
-```
-
-You also need to define it *outside* of the class body in `foo.cc`:
-
-```c++
-const int Foo::kBar;  // No initializer here.
-```
-
-Otherwise your code is **invalid C++**, and may break in unexpected ways. In
-particular, using it in GoogleTest comparison assertions (`EXPECT_EQ`, etc) will
-generate an "undefined reference" linker error. The fact that "it used to work"
-doesn't mean it's valid. It just means that you were lucky. :-)
-
-If the declaration of the static data member is `constexpr` then it is
-implicitly an `inline` definition, and a separate definition in `foo.cc` is not
-needed:
-
-```c++
-// foo.h
-class Foo {
-  ...
-  static constexpr int kBar = 100;  // Defines kBar, no need to do it in foo.cc.
-};
-```
-
-## Can I derive a test fixture from another?
-
-Yes.
-
-Each test fixture has a corresponding and same named test suite. This means only
-one test suite can use a particular fixture. Sometimes, however, multiple test
-cases may want to use the same or slightly different fixtures. For example, you
-may want to make sure that all of a GUI library's test suites don't leak
-important system resources like fonts and brushes.
-
-In GoogleTest, you share a fixture among test suites by putting the shared logic
-in a base test fixture, then deriving from that base a separate fixture for each
-test suite that wants to use this common logic. You then use `TEST_F()` to write
-tests using each derived fixture.
-
-Typically, your code looks like this:
-
-```c++
-// Defines a base test fixture.
-class BaseTest : public ::testing::Test {
- protected:
-  ...
-};
-
-// Derives a fixture FooTest from BaseTest.
-class FooTest : public BaseTest {
- protected:
-  void SetUp() override {
-    BaseTest::SetUp();  // Sets up the base fixture first.
-    ... additional set-up work ...
-  }
-
-  void TearDown() override {
-    ... clean-up work for FooTest ...
-    BaseTest::TearDown();  // Remember to tear down the base fixture
-                           // after cleaning up FooTest!
-  }
-
-  ... functions and variables for FooTest ...
-};
-
-// Tests that use the fixture FooTest.
-TEST_F(FooTest, Bar) { ... }
-TEST_F(FooTest, Baz) { ... }
-
-... additional fixtures derived from BaseTest ...
-```
-
-If necessary, you can continue to derive test fixtures from a derived fixture.
-GoogleTest has no limit on how deep the hierarchy can be.
-
-For a complete example using derived test fixtures, see
-[sample5_unittest.cc](https://github.com/google/googletest/blob/main/googletest/samples/sample5_unittest.cc).
-
-## My compiler complains "void value not ignored as it ought to be." What does this mean?
-
-You're probably using an `ASSERT_*()` in a function that doesn't return `void`.
-`ASSERT_*()` can only be used in `void` functions, due to exceptions being
-disabled by our build system. Please see more details
-[here](advanced.md#assertion-placement).
-
-## My death test hangs (or seg-faults). How do I fix it?
-
-In GoogleTest, death tests are run in a child process and the way they work is
-delicate. To write death tests you really need to understand how they work—see
-the details at [Death Assertions](reference/assertions.md#death) in the
-Assertions Reference.
-
-In particular, death tests don't like having multiple threads in the parent
-process. So the first thing you can try is to eliminate creating threads outside
-of `EXPECT_DEATH()`. For example, you may want to use mocks or fake objects
-instead of real ones in your tests.
-
-Sometimes this is impossible as some library you must use may be creating
-threads before `main()` is even reached. In this case, you can try to minimize
-the chance of conflicts by either moving as many activities as possible inside
-`EXPECT_DEATH()` (in the extreme case, you want to move everything inside), or
-leaving as few things as possible in it. Also, you can try to set the death test
-style to `"threadsafe"`, which is safer but slower, and see if it helps.
-
-If you go with thread-safe death tests, remember that they rerun the test
-program from the beginning in the child process. Therefore make sure your
-program can run side-by-side with itself and is deterministic.
-
-In the end, this boils down to good concurrent programming. You have to make
-sure that there are no race conditions or deadlocks in your program. No silver
-bullet - sorry!
-
-## Should I use the constructor/destructor of the test fixture or SetUp()/TearDown()? {#CtorVsSetUp}
-
-The first thing to remember is that GoogleTest does **not** reuse the same test
-fixture object across multiple tests. For each `TEST_F`, GoogleTest will create
-a **fresh** test fixture object, immediately call `SetUp()`, run the test body,
-call `TearDown()`, and then delete the test fixture object.
-
-When you need to write per-test set-up and tear-down logic, you have the choice
-between using the test fixture constructor/destructor or `SetUp()/TearDown()`.
-The former is usually preferred, as it has the following benefits:
-
-*   By initializing a member variable in the constructor, we have the option to
-    make it `const`, which helps prevent accidental changes to its value and
-    makes the tests more obviously correct.
-*   In case we need to subclass the test fixture class, the subclass'
-    constructor is guaranteed to call the base class' constructor *first*, and
-    the subclass' destructor is guaranteed to call the base class' destructor
-    *afterward*. With `SetUp()/TearDown()`, a subclass may make the mistake of
-    forgetting to call the base class' `SetUp()/TearDown()` or call them at the
-    wrong time.
-
-You may still want to use `SetUp()/TearDown()` in the following cases:
-
-*   C++ does not allow virtual function calls in constructors and destructors.
-    You can call a method declared as virtual, but it will not use dynamic
-    dispatch. It will use the definition from the class the constructor of which
-    is currently executing. This is because calling a virtual method before the
-    derived class constructor has a chance to run is very dangerous - the
-    virtual method might operate on uninitialized data. Therefore, if you need
-    to call a method that will be overridden in a derived class, you have to use
-    `SetUp()/TearDown()`.
-*   In the body of a constructor (or destructor), it's not possible to use the
-    `ASSERT_xx` macros. Therefore, if the set-up operation could cause a fatal
-    test failure that should prevent the test from running, it's necessary to
-    use `abort` and abort the whole test
-    executable, or to use `SetUp()` instead of a constructor.
-*   If the tear-down operation could throw an exception, you must use
-    `TearDown()` as opposed to the destructor, as throwing in a destructor leads
-    to undefined behavior and usually will kill your program right away. Note
-    that many standard libraries (like STL) may throw when exceptions are
-    enabled in the compiler. Therefore you should prefer `TearDown()` if you
-    want to write portable tests that work with or without exceptions.
-*   The GoogleTest team is considering making the assertion macros throw on
-    platforms where exceptions are enabled (e.g. Windows, Mac OS, and Linux
-    client-side), which will eliminate the need for the user to propagate
-    failures from a subroutine to its caller. Therefore, you shouldn't use
-    GoogleTest assertions in a destructor if your code could run on such a
-    platform.
-
-## The compiler complains "no matching function to call" when I use ASSERT_PRED*. How do I fix it?
-
-See details for [`EXPECT_PRED*`](reference/assertions.md#EXPECT_PRED) in the
-Assertions Reference.
-
-## My compiler complains about "ignoring return value" when I call RUN_ALL_TESTS(). Why?
-
-Some people had been ignoring the return value of `RUN_ALL_TESTS()`. That is,
-instead of
-
-```c++
-  return RUN_ALL_TESTS();
-```
-
-they write
-
-```c++
-  RUN_ALL_TESTS();
-```
-
-This is **wrong and dangerous**. The testing services needs to see the return
-value of `RUN_ALL_TESTS()` in order to determine if a test has passed. If your
-`main()` function ignores it, your test will be considered successful even if it
-has a GoogleTest assertion failure. Very bad.
-
-We have decided to fix this (thanks to Michael Chastain for the idea). Now, your
-code will no longer be able to ignore `RUN_ALL_TESTS()` when compiled with
-`gcc`. If you do so, you'll get a compiler error.
-
-If you see the compiler complaining about you ignoring the return value of
-`RUN_ALL_TESTS()`, the fix is simple: just make sure its value is used as the
-return value of `main()`.
-
-But how could we introduce a change that breaks existing tests? Well, in this
-case, the code was already broken in the first place, so we didn't break it. :-)
-
-## My compiler complains that a constructor (or destructor) cannot return a value. What's going on?
-
-Due to a peculiarity of C++, in order to support the syntax for streaming
-messages to an `ASSERT_*`, e.g.
-
-```c++
-  ASSERT_EQ(1, Foo()) << "blah blah" << foo;
-```
-
-we had to give up using `ASSERT*` and `FAIL*` (but not `EXPECT*` and
-`ADD_FAILURE*`) in constructors and destructors. The workaround is to move the
-content of your constructor/destructor to a private void member function, or
-switch to `EXPECT_*()` if that works. This
-[section](advanced.md#assertion-placement) in the user's guide explains it.
-
-## My SetUp() function is not called. Why?
-
-C++ is case-sensitive. Did you spell it as `Setup()`?
-
-Similarly, sometimes people spell `SetUpTestSuite()` as `SetupTestSuite()` and
-wonder why it's never called.
-
-## I have several test suites which share the same test fixture logic, do I have to define a new test fixture class for each of them? This seems pretty tedious.
-
-You don't have to. Instead of
-
-```c++
-class FooTest : public BaseTest {};
-
-TEST_F(FooTest, Abc) { ... }
-TEST_F(FooTest, Def) { ... }
-
-class BarTest : public BaseTest {};
-
-TEST_F(BarTest, Abc) { ... }
-TEST_F(BarTest, Def) { ... }
-```
-
-you can simply `typedef` the test fixtures:
-
-```c++
-typedef BaseTest FooTest;
-
-TEST_F(FooTest, Abc) { ... }
-TEST_F(FooTest, Def) { ... }
-
-typedef BaseTest BarTest;
-
-TEST_F(BarTest, Abc) { ... }
-TEST_F(BarTest, Def) { ... }
-```
-
-## GoogleTest output is buried in a whole bunch of LOG messages. What do I do?
-
-The GoogleTest output is meant to be a concise and human-friendly report. If
-your test generates textual output itself, it will mix with the GoogleTest
-output, making it hard to read. However, there is an easy solution to this
-problem.
-
-Since `LOG` messages go to stderr, we decided to let GoogleTest output go to
-stdout. This way, you can easily separate the two using redirection. For
-example:
-
-```shell
-$ ./my_test > gtest_output.txt
-```
-
-## Why should I prefer test fixtures over global variables?
-
-There are several good reasons:
-
-1.  It's likely your test needs to change the states of its global variables.
-    This makes it difficult to keep side effects from escaping one test and
-    contaminating others, making debugging difficult. By using fixtures, each
-    test has a fresh set of variables that's different (but with the same
-    names). Thus, tests are kept independent of each other.
-2.  Global variables pollute the global namespace.
-3.  Test fixtures can be reused via subclassing, which cannot be done easily
-    with global variables. This is useful if many test suites have something in
-    common.
-
-## What can the statement argument in ASSERT_DEATH() be?
-
-`ASSERT_DEATH(statement, matcher)` (or any death assertion macro) can be used
-wherever *`statement`* is valid. So basically *`statement`* can be any C++
-statement that makes sense in the current context. In particular, it can
-reference global and/or local variables, and can be:
-
-*   a simple function call (often the case),
-*   a complex expression, or
-*   a compound statement.
-
-Some examples are shown here:
-
-```c++
-// A death test can be a simple function call.
-TEST(MyDeathTest, FunctionCall) {
-  ASSERT_DEATH(Xyz(5), "Xyz failed");
-}
-
-// Or a complex expression that references variables and functions.
-TEST(MyDeathTest, ComplexExpression) {
-  const bool c = Condition();
-  ASSERT_DEATH((c ? Func1(0) : object2.Method("test")),
-               "(Func1|Method) failed");
-}
-
-// Death assertions can be used anywhere in a function.  In
-// particular, they can be inside a loop.
-TEST(MyDeathTest, InsideLoop) {
-  // Verifies that Foo(0), Foo(1), ..., and Foo(4) all die.
-  for (int i = 0; i < 5; i++) {
-    EXPECT_DEATH_M(Foo(i), "Foo has \\d+ errors",
-                   ::testing::Message() << "where i is " << i);
-  }
-}
-
-// A death assertion can contain a compound statement.
-TEST(MyDeathTest, CompoundStatement) {
-  // Verifies that at lease one of Bar(0), Bar(1), ..., and
-  // Bar(4) dies.
-  ASSERT_DEATH({
-    for (int i = 0; i < 5; i++) {
-      Bar(i);
-    }
-  },
-  "Bar has \\d+ errors");
-}
-```
-
-## I have a fixture class `FooTest`, but `TEST_F(FooTest, Bar)` gives me error ``"no matching function for call to `FooTest::FooTest()'"``. Why?
-
-GoogleTest needs to be able to create objects of your test fixture class, so it
-must have a default constructor. Normally the compiler will define one for you.
-However, there are cases where you have to define your own:
-
-*   If you explicitly declare a non-default constructor for class `FooTest`
-    (`DISALLOW_EVIL_CONSTRUCTORS()` does this), then you need to define a
-    default constructor, even if it would be empty.
-*   If `FooTest` has a const non-static data member, then you have to define the
-    default constructor *and* initialize the const member in the initializer
-    list of the constructor. (Early versions of `gcc` doesn't force you to
-    initialize the const member. It's a bug that has been fixed in `gcc 4`.)
-
-## Why does ASSERT_DEATH complain about previous threads that were already joined?
-
-With the Linux pthread library, there is no turning back once you cross the line
-from a single thread to multiple threads. The first time you create a thread, a
-manager thread is created in addition, so you get 3, not 2, threads. Later when
-the thread you create joins the main thread, the thread count decrements by 1,
-but the manager thread will never be killed, so you still have 2 threads, which
-means you cannot safely run a death test.
-
-The new NPTL thread library doesn't suffer from this problem, as it doesn't
-create a manager thread. However, if you don't control which machine your test
-runs on, you shouldn't depend on this.
-
-## Why does GoogleTest require the entire test suite, instead of individual tests, to be named *DeathTest when it uses ASSERT_DEATH?
-
-GoogleTest does not interleave tests from different test suites. That is, it
-runs all tests in one test suite first, and then runs all tests in the next test
-suite, and so on. GoogleTest does this because it needs to set up a test suite
-before the first test in it is run, and tear it down afterwards. Splitting up
-the test case would require multiple set-up and tear-down processes, which is
-inefficient and makes the semantics unclean.
-
-If we were to determine the order of tests based on test name instead of test
-case name, then we would have a problem with the following situation:
-
-```c++
-TEST_F(FooTest, AbcDeathTest) { ... }
-TEST_F(FooTest, Uvw) { ... }
-
-TEST_F(BarTest, DefDeathTest) { ... }
-TEST_F(BarTest, Xyz) { ... }
-```
-
-Since `FooTest.AbcDeathTest` needs to run before `BarTest.Xyz`, and we don't
-interleave tests from different test suites, we need to run all tests in the
-`FooTest` case before running any test in the `BarTest` case. This contradicts
-with the requirement to run `BarTest.DefDeathTest` before `FooTest.Uvw`.
-
-## But I don't like calling my entire test suite \*DeathTest when it contains both death tests and non-death tests. What do I do?
-
-You don't have to, but if you like, you may split up the test suite into
-`FooTest` and `FooDeathTest`, where the names make it clear that they are
-related:
-
-```c++
-class FooTest : public ::testing::Test { ... };
-
-TEST_F(FooTest, Abc) { ... }
-TEST_F(FooTest, Def) { ... }
-
-using FooDeathTest = FooTest;
-
-TEST_F(FooDeathTest, Uvw) { ... EXPECT_DEATH(...) ... }
-TEST_F(FooDeathTest, Xyz) { ... ASSERT_DEATH(...) ... }
-```
-
-## GoogleTest prints the LOG messages in a death test's child process only when the test fails. How can I see the LOG messages when the death test succeeds?
-
-Printing the LOG messages generated by the statement inside `EXPECT_DEATH()`
-makes it harder to search for real problems in the parent's log. Therefore,
-GoogleTest only prints them when the death test has failed.
-
-If you really need to see such LOG messages, a workaround is to temporarily
-break the death test (e.g. by changing the regex pattern it is expected to
-match). Admittedly, this is a hack. We'll consider a more permanent solution
-after the fork-and-exec-style death tests are implemented.
-
-## The compiler complains about `no match for 'operator<<'` when I use an assertion. What gives?
-
-If you use a user-defined type `FooType` in an assertion, you must make sure
-there is an `std::ostream& operator<<(std::ostream&, const FooType&)` function
-defined such that we can print a value of `FooType`.
-
-In addition, if `FooType` is declared in a name space, the `<<` operator also
-needs to be defined in the *same* name space. See
-[Tip of the Week #49](http://abseil.io/tips/49) for details.
-
-## How do I suppress the memory leak messages on Windows?
-
-Since the statically initialized GoogleTest singleton requires allocations on
-the heap, the Visual C++ memory leak detector will report memory leaks at the
-end of the program run. The easiest way to avoid this is to use the
-`_CrtMemCheckpoint` and `_CrtMemDumpAllObjectsSince` calls to not report any
-statically initialized heap objects. See MSDN for more details and additional
-heap check/debug routines.
-
-## How can my code detect if it is running in a test?
-
-If you write code that sniffs whether it's running in a test and does different
-things accordingly, you are leaking test-only logic into production code and
-there is no easy way to ensure that the test-only code paths aren't run by
-mistake in production. Such cleverness also leads to
-[Heisenbugs](https://en.wikipedia.org/wiki/Heisenbug). Therefore we strongly
-advise against the practice, and GoogleTest doesn't provide a way to do it.
-
-In general, the recommended way to cause the code to behave differently under
-test is [Dependency Injection](http://en.wikipedia.org/wiki/Dependency_injection). You can inject
-different functionality from the test and from the production code. Since your
-production code doesn't link in the for-test logic at all (the
-[`testonly`](http://docs.bazel.build/versions/master/be/common-definitions.html#common.testonly) attribute for BUILD targets helps to ensure
-that), there is no danger in accidentally running it.
-
-However, if you *really*, *really*, *really* have no choice, and if you follow
-the rule of ending your test program names with `_test`, you can use the
-*horrible* hack of sniffing your executable name (`argv[0]` in `main()`) to know
-whether the code is under test.
-
-## How do I temporarily disable a test?
-
-If you have a broken test that you cannot fix right away, you can add the
-`DISABLED_` prefix to its name. This will exclude it from execution. This is
-better than commenting out the code or using `#if 0`, as disabled tests are
-still compiled (and thus won't rot).
-
-To include disabled tests in test execution, just invoke the test program with
-the `--gtest_also_run_disabled_tests` flag.
-
-## Is it OK if I have two separate `TEST(Foo, Bar)` test methods defined in different namespaces?
-
-Yes.
-
-The rule is **all test methods in the same test suite must use the same fixture
-class.** This means that the following is **allowed** because both tests use the
-same fixture class (`::testing::Test`).
-
-```c++
-namespace foo {
-TEST(CoolTest, DoSomething) {
-  SUCCEED();
-}
-}  // namespace foo
-
-namespace bar {
-TEST(CoolTest, DoSomething) {
-  SUCCEED();
-}
-}  // namespace bar
-```
-
-However, the following code is **not allowed** and will produce a runtime error
-from GoogleTest because the test methods are using different test fixture
-classes with the same test suite name.
-
-```c++
-namespace foo {
-class CoolTest : public ::testing::Test {};  // Fixture foo::CoolTest
-TEST_F(CoolTest, DoSomething) {
-  SUCCEED();
-}
-}  // namespace foo
-
-namespace bar {
-class CoolTest : public ::testing::Test {};  // Fixture: bar::CoolTest
-TEST_F(CoolTest, DoSomething) {
-  SUCCEED();
-}
-}  // namespace bar
-```
diff --git a/3rdparty/googletest-1.13.0/docs/gmock_cheat_sheet.md b/3rdparty/googletest-1.13.0/docs/gmock_cheat_sheet.md
deleted file mode 100644
index 2fb0403e616a79a46294a6dd5c8489f1f1dd2a78..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/docs/gmock_cheat_sheet.md
+++ /dev/null
@@ -1,241 +0,0 @@
-# gMock Cheat Sheet
-
-## Defining a Mock Class
-
-### Mocking a Normal Class {#MockClass}
-
-Given
-
-```cpp
-class Foo {
- public:
-  virtual ~Foo();
-  virtual int GetSize() const = 0;
-  virtual string Describe(const char* name) = 0;
-  virtual string Describe(int type) = 0;
-  virtual bool Process(Bar elem, int count) = 0;
-};
-```
-
-(note that `~Foo()` **must** be virtual) we can define its mock as
-
-```cpp
-#include "gmock/gmock.h"
-
-class MockFoo : public Foo {
- public:
-  MOCK_METHOD(int, GetSize, (), (const, override));
-  MOCK_METHOD(string, Describe, (const char* name), (override));
-  MOCK_METHOD(string, Describe, (int type), (override));
-  MOCK_METHOD(bool, Process, (Bar elem, int count), (override));
-};
-```
-
-To create a "nice" mock, which ignores all uninteresting calls, a "naggy" mock,
-which warns on all uninteresting calls, or a "strict" mock, which treats them as
-failures:
-
-```cpp
-using ::testing::NiceMock;
-using ::testing::NaggyMock;
-using ::testing::StrictMock;
-
-NiceMock<MockFoo> nice_foo;      // The type is a subclass of MockFoo.
-NaggyMock<MockFoo> naggy_foo;    // The type is a subclass of MockFoo.
-StrictMock<MockFoo> strict_foo;  // The type is a subclass of MockFoo.
-```
-
-{: .callout .note}
-**Note:** A mock object is currently naggy by default. We may make it nice by
-default in the future.
-
-### Mocking a Class Template {#MockTemplate}
-
-Class templates can be mocked just like any class.
-
-To mock
-
-```cpp
-template <typename Elem>
-class StackInterface {
- public:
-  virtual ~StackInterface();
-  virtual int GetSize() const = 0;
-  virtual void Push(const Elem& x) = 0;
-};
-```
-
-(note that all member functions that are mocked, including `~StackInterface()`
-**must** be virtual).
-
-```cpp
-template <typename Elem>
-class MockStack : public StackInterface<Elem> {
- public:
-  MOCK_METHOD(int, GetSize, (), (const, override));
-  MOCK_METHOD(void, Push, (const Elem& x), (override));
-};
-```
-
-### Specifying Calling Conventions for Mock Functions
-
-If your mock function doesn't use the default calling convention, you can
-specify it by adding `Calltype(convention)` to `MOCK_METHOD`'s 4th parameter.
-For example,
-
-```cpp
-  MOCK_METHOD(bool, Foo, (int n), (Calltype(STDMETHODCALLTYPE)));
-  MOCK_METHOD(int, Bar, (double x, double y),
-              (const, Calltype(STDMETHODCALLTYPE)));
-```
-
-where `STDMETHODCALLTYPE` is defined by `<objbase.h>` on Windows.
-
-## Using Mocks in Tests {#UsingMocks}
-
-The typical work flow is:
-
-1.  Import the gMock names you need to use. All gMock symbols are in the
-    `testing` namespace unless they are macros or otherwise noted.
-2.  Create the mock objects.
-3.  Optionally, set the default actions of the mock objects.
-4.  Set your expectations on the mock objects (How will they be called? What
-    will they do?).
-5.  Exercise code that uses the mock objects; if necessary, check the result
-    using googletest assertions.
-6.  When a mock object is destructed, gMock automatically verifies that all
-    expectations on it have been satisfied.
-
-Here's an example:
-
-```cpp
-using ::testing::Return;                          // #1
-
-TEST(BarTest, DoesThis) {
-  MockFoo foo;                                    // #2
-
-  ON_CALL(foo, GetSize())                         // #3
-      .WillByDefault(Return(1));
-  // ... other default actions ...
-
-  EXPECT_CALL(foo, Describe(5))                   // #4
-      .Times(3)
-      .WillRepeatedly(Return("Category 5"));
-  // ... other expectations ...
-
-  EXPECT_EQ(MyProductionFunction(&foo), "good");  // #5
-}                                                 // #6
-```
-
-## Setting Default Actions {#OnCall}
-
-gMock has a **built-in default action** for any function that returns `void`,
-`bool`, a numeric value, or a pointer. In C++11, it will additionally returns
-the default-constructed value, if one exists for the given type.
-
-To customize the default action for functions with return type `T`, use
-[`DefaultValue<T>`](reference/mocking.md#DefaultValue). For example:
-
-```cpp
-  // Sets the default action for return type std::unique_ptr<Buzz> to
-  // creating a new Buzz every time.
-  DefaultValue<std::unique_ptr<Buzz>>::SetFactory(
-      [] { return std::make_unique<Buzz>(AccessLevel::kInternal); });
-
-  // When this fires, the default action of MakeBuzz() will run, which
-  // will return a new Buzz object.
-  EXPECT_CALL(mock_buzzer_, MakeBuzz("hello")).Times(AnyNumber());
-
-  auto buzz1 = mock_buzzer_.MakeBuzz("hello");
-  auto buzz2 = mock_buzzer_.MakeBuzz("hello");
-  EXPECT_NE(buzz1, nullptr);
-  EXPECT_NE(buzz2, nullptr);
-  EXPECT_NE(buzz1, buzz2);
-
-  // Resets the default action for return type std::unique_ptr<Buzz>,
-  // to avoid interfere with other tests.
-  DefaultValue<std::unique_ptr<Buzz>>::Clear();
-```
-
-To customize the default action for a particular method of a specific mock
-object, use [`ON_CALL`](reference/mocking.md#ON_CALL). `ON_CALL` has a similar
-syntax to `EXPECT_CALL`, but it is used for setting default behaviors when you
-do not require that the mock method is called. See
-[Knowing When to Expect](gmock_cook_book.md#UseOnCall) for a more detailed
-discussion.
-
-## Setting Expectations {#ExpectCall}
-
-See [`EXPECT_CALL`](reference/mocking.md#EXPECT_CALL) in the Mocking Reference.
-
-## Matchers {#MatcherList}
-
-See the [Matchers Reference](reference/matchers.md).
-
-## Actions {#ActionList}
-
-See the [Actions Reference](reference/actions.md).
-
-## Cardinalities {#CardinalityList}
-
-See the [`Times` clause](reference/mocking.md#EXPECT_CALL.Times) of
-`EXPECT_CALL` in the Mocking Reference.
-
-## Expectation Order
-
-By default, expectations can be matched in *any* order. If some or all
-expectations must be matched in a given order, you can use the
-[`After` clause](reference/mocking.md#EXPECT_CALL.After) or
-[`InSequence` clause](reference/mocking.md#EXPECT_CALL.InSequence) of
-`EXPECT_CALL`, or use an [`InSequence` object](reference/mocking.md#InSequence).
-
-## Verifying and Resetting a Mock
-
-gMock will verify the expectations on a mock object when it is destructed, or
-you can do it earlier:
-
-```cpp
-using ::testing::Mock;
-...
-// Verifies and removes the expectations on mock_obj;
-// returns true if and only if successful.
-Mock::VerifyAndClearExpectations(&mock_obj);
-...
-// Verifies and removes the expectations on mock_obj;
-// also removes the default actions set by ON_CALL();
-// returns true if and only if successful.
-Mock::VerifyAndClear(&mock_obj);
-```
-
-Do not set new expectations after verifying and clearing a mock after its use.
-Setting expectations after code that exercises the mock has undefined behavior.
-See [Using Mocks in Tests](gmock_for_dummies.md#using-mocks-in-tests) for more
-information.
-
-You can also tell gMock that a mock object can be leaked and doesn't need to be
-verified:
-
-```cpp
-Mock::AllowLeak(&mock_obj);
-```
-
-## Mock Classes
-
-gMock defines a convenient mock class template
-
-```cpp
-class MockFunction<R(A1, ..., An)> {
- public:
-  MOCK_METHOD(R, Call, (A1, ..., An));
-};
-```
-
-See this [recipe](gmock_cook_book.md#UsingCheckPoints) for one application of
-it.
-
-## Flags
-
-| Flag                           | Description                               |
-| :----------------------------- | :---------------------------------------- |
-| `--gmock_catch_leaked_mocks=0` | Don't report leaked mock objects as failures. |
-| `--gmock_verbose=LEVEL` | Sets the default verbosity level (`info`, `warning`, or `error`) of Google Mock messages. |
diff --git a/3rdparty/googletest-1.13.0/docs/gmock_cook_book.md b/3rdparty/googletest-1.13.0/docs/gmock_cook_book.md
deleted file mode 100644
index fc7db35b82c769fea0c34cf875bf6529d58cbaab..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/docs/gmock_cook_book.md
+++ /dev/null
@@ -1,4343 +0,0 @@
-# gMock Cookbook
-
-You can find recipes for using gMock here. If you haven't yet, please read
-[the dummy guide](gmock_for_dummies.md) first to make sure you understand the
-basics.
-
-{: .callout .note}
-**Note:** gMock lives in the `testing` name space. For readability, it is
-recommended to write `using ::testing::Foo;` once in your file before using the
-name `Foo` defined by gMock. We omit such `using` statements in this section for
-brevity, but you should do it in your own code.
-
-## Creating Mock Classes
-
-Mock classes are defined as normal classes, using the `MOCK_METHOD` macro to
-generate mocked methods. The macro gets 3 or 4 parameters:
-
-```cpp
-class MyMock {
- public:
-  MOCK_METHOD(ReturnType, MethodName, (Args...));
-  MOCK_METHOD(ReturnType, MethodName, (Args...), (Specs...));
-};
-```
-
-The first 3 parameters are simply the method declaration, split into 3 parts.
-The 4th parameter accepts a closed list of qualifiers, which affect the
-generated method:
-
-*   **`const`** - Makes the mocked method a `const` method. Required if
-    overriding a `const` method.
-*   **`override`** - Marks the method with `override`. Recommended if overriding
-    a `virtual` method.
-*   **`noexcept`** - Marks the method with `noexcept`. Required if overriding a
-    `noexcept` method.
-*   **`Calltype(...)`** - Sets the call type for the method (e.g. to
-    `STDMETHODCALLTYPE`), useful in Windows.
-*   **`ref(...)`** - Marks the method with the reference qualification
-    specified. Required if overriding a method that has reference
-    qualifications. Eg `ref(&)` or `ref(&&)`.
-
-### Dealing with unprotected commas
-
-Unprotected commas, i.e. commas which are not surrounded by parentheses, prevent
-`MOCK_METHOD` from parsing its arguments correctly:
-
-{: .bad}
-```cpp
-class MockFoo {
- public:
-  MOCK_METHOD(std::pair<bool, int>, GetPair, ());  // Won't compile!
-  MOCK_METHOD(bool, CheckMap, (std::map<int, double>, bool));  // Won't compile!
-};
-```
-
-Solution 1 - wrap with parentheses:
-
-{: .good}
-```cpp
-class MockFoo {
- public:
-  MOCK_METHOD((std::pair<bool, int>), GetPair, ());
-  MOCK_METHOD(bool, CheckMap, ((std::map<int, double>), bool));
-};
-```
-
-Note that wrapping a return or argument type with parentheses is, in general,
-invalid C++. `MOCK_METHOD` removes the parentheses.
-
-Solution 2 - define an alias:
-
-{: .good}
-```cpp
-class MockFoo {
- public:
-  using BoolAndInt = std::pair<bool, int>;
-  MOCK_METHOD(BoolAndInt, GetPair, ());
-  using MapIntDouble = std::map<int, double>;
-  MOCK_METHOD(bool, CheckMap, (MapIntDouble, bool));
-};
-```
-
-### Mocking Private or Protected Methods
-
-You must always put a mock method definition (`MOCK_METHOD`) in a `public:`
-section of the mock class, regardless of the method being mocked being `public`,
-`protected`, or `private` in the base class. This allows `ON_CALL` and
-`EXPECT_CALL` to reference the mock function from outside of the mock class.
-(Yes, C++ allows a subclass to change the access level of a virtual function in
-the base class.) Example:
-
-```cpp
-class Foo {
- public:
-  ...
-  virtual bool Transform(Gadget* g) = 0;
-
- protected:
-  virtual void Resume();
-
- private:
-  virtual int GetTimeOut();
-};
-
-class MockFoo : public Foo {
- public:
-  ...
-  MOCK_METHOD(bool, Transform, (Gadget* g), (override));
-
-  // The following must be in the public section, even though the
-  // methods are protected or private in the base class.
-  MOCK_METHOD(void, Resume, (), (override));
-  MOCK_METHOD(int, GetTimeOut, (), (override));
-};
-```
-
-### Mocking Overloaded Methods
-
-You can mock overloaded functions as usual. No special attention is required:
-
-```cpp
-class Foo {
-  ...
-
-  // Must be virtual as we'll inherit from Foo.
-  virtual ~Foo();
-
-  // Overloaded on the types and/or numbers of arguments.
-  virtual int Add(Element x);
-  virtual int Add(int times, Element x);
-
-  // Overloaded on the const-ness of this object.
-  virtual Bar& GetBar();
-  virtual const Bar& GetBar() const;
-};
-
-class MockFoo : public Foo {
-  ...
-  MOCK_METHOD(int, Add, (Element x), (override));
-  MOCK_METHOD(int, Add, (int times, Element x), (override));
-
-  MOCK_METHOD(Bar&, GetBar, (), (override));
-  MOCK_METHOD(const Bar&, GetBar, (), (const, override));
-};
-```
-
-{: .callout .note}
-**Note:** if you don't mock all versions of the overloaded method, the compiler
-will give you a warning about some methods in the base class being hidden. To
-fix that, use `using` to bring them in scope:
-
-```cpp
-class MockFoo : public Foo {
-  ...
-  using Foo::Add;
-  MOCK_METHOD(int, Add, (Element x), (override));
-  // We don't want to mock int Add(int times, Element x);
-  ...
-};
-```
-
-### Mocking Class Templates
-
-You can mock class templates just like any class.
-
-```cpp
-template <typename Elem>
-class StackInterface {
-  ...
-  // Must be virtual as we'll inherit from StackInterface.
-  virtual ~StackInterface();
-
-  virtual int GetSize() const = 0;
-  virtual void Push(const Elem& x) = 0;
-};
-
-template <typename Elem>
-class MockStack : public StackInterface<Elem> {
-  ...
-  MOCK_METHOD(int, GetSize, (), (override));
-  MOCK_METHOD(void, Push, (const Elem& x), (override));
-};
-```
-
-### Mocking Non-virtual Methods {#MockingNonVirtualMethods}
-
-gMock can mock non-virtual functions to be used in Hi-perf dependency injection.
-
-In this case, instead of sharing a common base class with the real class, your
-mock class will be *unrelated* to the real class, but contain methods with the
-same signatures. The syntax for mocking non-virtual methods is the *same* as
-mocking virtual methods (just don't add `override`):
-
-```cpp
-// A simple packet stream class.  None of its members is virtual.
-class ConcretePacketStream {
- public:
-  void AppendPacket(Packet* new_packet);
-  const Packet* GetPacket(size_t packet_number) const;
-  size_t NumberOfPackets() const;
-  ...
-};
-
-// A mock packet stream class.  It inherits from no other, but defines
-// GetPacket() and NumberOfPackets().
-class MockPacketStream {
- public:
-  MOCK_METHOD(const Packet*, GetPacket, (size_t packet_number), (const));
-  MOCK_METHOD(size_t, NumberOfPackets, (), (const));
-  ...
-};
-```
-
-Note that the mock class doesn't define `AppendPacket()`, unlike the real class.
-That's fine as long as the test doesn't need to call it.
-
-Next, you need a way to say that you want to use `ConcretePacketStream` in
-production code, and use `MockPacketStream` in tests. Since the functions are
-not virtual and the two classes are unrelated, you must specify your choice at
-*compile time* (as opposed to run time).
-
-One way to do it is to templatize your code that needs to use a packet stream.
-More specifically, you will give your code a template type argument for the type
-of the packet stream. In production, you will instantiate your template with
-`ConcretePacketStream` as the type argument. In tests, you will instantiate the
-same template with `MockPacketStream`. For example, you may write:
-
-```cpp
-template <class PacketStream>
-void CreateConnection(PacketStream* stream) { ... }
-
-template <class PacketStream>
-class PacketReader {
- public:
-  void ReadPackets(PacketStream* stream, size_t packet_num);
-};
-```
-
-Then you can use `CreateConnection<ConcretePacketStream>()` and
-`PacketReader<ConcretePacketStream>` in production code, and use
-`CreateConnection<MockPacketStream>()` and `PacketReader<MockPacketStream>` in
-tests.
-
-```cpp
-  MockPacketStream mock_stream;
-  EXPECT_CALL(mock_stream, ...)...;
-  .. set more expectations on mock_stream ...
-  PacketReader<MockPacketStream> reader(&mock_stream);
-  ... exercise reader ...
-```
-
-### Mocking Free Functions
-
-It is not possible to directly mock a free function (i.e. a C-style function or
-a static method). If you need to, you can rewrite your code to use an interface
-(abstract class).
-
-Instead of calling a free function (say, `OpenFile`) directly, introduce an
-interface for it and have a concrete subclass that calls the free function:
-
-```cpp
-class FileInterface {
- public:
-  ...
-  virtual bool Open(const char* path, const char* mode) = 0;
-};
-
-class File : public FileInterface {
- public:
-  ...
-  bool Open(const char* path, const char* mode) override {
-     return OpenFile(path, mode);
-  }
-};
-```
-
-Your code should talk to `FileInterface` to open a file. Now it's easy to mock
-out the function.
-
-This may seem like a lot of hassle, but in practice you often have multiple
-related functions that you can put in the same interface, so the per-function
-syntactic overhead will be much lower.
-
-If you are concerned about the performance overhead incurred by virtual
-functions, and profiling confirms your concern, you can combine this with the
-recipe for [mocking non-virtual methods](#MockingNonVirtualMethods).
-
-### Old-Style `MOCK_METHODn` Macros
-
-Before the generic `MOCK_METHOD` macro
-[was introduced in 2018](https://github.com/google/googletest/commit/c5f08bf91944ce1b19bcf414fa1760e69d20afc2),
-mocks where created using a family of macros collectively called `MOCK_METHODn`.
-These macros are still supported, though migration to the new `MOCK_METHOD` is
-recommended.
-
-The macros in the `MOCK_METHODn` family differ from `MOCK_METHOD`:
-
-*   The general structure is `MOCK_METHODn(MethodName, ReturnType(Args))`,
-    instead of `MOCK_METHOD(ReturnType, MethodName, (Args))`.
-*   The number `n` must equal the number of arguments.
-*   When mocking a const method, one must use `MOCK_CONST_METHODn`.
-*   When mocking a class template, the macro name must be suffixed with `_T`.
-*   In order to specify the call type, the macro name must be suffixed with
-    `_WITH_CALLTYPE`, and the call type is the first macro argument.
-
-Old macros and their new equivalents:
-
-<table>
-  <tr><th colspan=2>Simple</th></tr>
-  <tr>
-    <td>Old</td>
-    <td><code>MOCK_METHOD1(Foo, bool(int))</code></td>
-  </tr>
-  <tr>
-    <td>New</td>
-    <td><code>MOCK_METHOD(bool, Foo, (int))</code></td>
-  </tr>
-
-  <tr><th colspan=2>Const Method</th></tr>
-  <tr>
-    <td>Old</td>
-    <td><code>MOCK_CONST_METHOD1(Foo, bool(int))</code></td>
-  </tr>
-  <tr>
-    <td>New</td>
-    <td><code>MOCK_METHOD(bool, Foo, (int), (const))</code></td>
-  </tr>
-
-  <tr><th colspan=2>Method in a Class Template</th></tr>
-  <tr>
-    <td>Old</td>
-    <td><code>MOCK_METHOD1_T(Foo, bool(int))</code></td>
-  </tr>
-  <tr>
-    <td>New</td>
-    <td><code>MOCK_METHOD(bool, Foo, (int))</code></td>
-  </tr>
-
-  <tr><th colspan=2>Const Method in a Class Template</th></tr>
-  <tr>
-    <td>Old</td>
-    <td><code>MOCK_CONST_METHOD1_T(Foo, bool(int))</code></td>
-  </tr>
-  <tr>
-    <td>New</td>
-    <td><code>MOCK_METHOD(bool, Foo, (int), (const))</code></td>
-  </tr>
-
-  <tr><th colspan=2>Method with Call Type</th></tr>
-  <tr>
-    <td>Old</td>
-    <td><code>MOCK_METHOD1_WITH_CALLTYPE(STDMETHODCALLTYPE, Foo, bool(int))</code></td>
-  </tr>
-  <tr>
-    <td>New</td>
-    <td><code>MOCK_METHOD(bool, Foo, (int), (Calltype(STDMETHODCALLTYPE)))</code></td>
-  </tr>
-
-  <tr><th colspan=2>Const Method with Call Type</th></tr>
-  <tr>
-    <td>Old</td>
-    <td><code>MOCK_CONST_METHOD1_WITH_CALLTYPE(STDMETHODCALLTYPE, Foo, bool(int))</code></td>
-  </tr>
-  <tr>
-    <td>New</td>
-    <td><code>MOCK_METHOD(bool, Foo, (int), (const, Calltype(STDMETHODCALLTYPE)))</code></td>
-  </tr>
-
-  <tr><th colspan=2>Method with Call Type in a Class Template</th></tr>
-  <tr>
-    <td>Old</td>
-    <td><code>MOCK_METHOD1_T_WITH_CALLTYPE(STDMETHODCALLTYPE, Foo, bool(int))</code></td>
-  </tr>
-  <tr>
-    <td>New</td>
-    <td><code>MOCK_METHOD(bool, Foo, (int), (Calltype(STDMETHODCALLTYPE)))</code></td>
-  </tr>
-
-  <tr><th colspan=2>Const Method with Call Type in a Class Template</th></tr>
-  <tr>
-    <td>Old</td>
-    <td><code>MOCK_CONST_METHOD1_T_WITH_CALLTYPE(STDMETHODCALLTYPE, Foo, bool(int))</code></td>
-  </tr>
-  <tr>
-    <td>New</td>
-    <td><code>MOCK_METHOD(bool, Foo, (int), (const, Calltype(STDMETHODCALLTYPE)))</code></td>
-  </tr>
-</table>
-
-### The Nice, the Strict, and the Naggy {#NiceStrictNaggy}
-
-If a mock method has no `EXPECT_CALL` spec but is called, we say that it's an
-"uninteresting call", and the default action (which can be specified using
-`ON_CALL()`) of the method will be taken. Currently, an uninteresting call will
-also by default cause gMock to print a warning.
-
-However, sometimes you may want to ignore these uninteresting calls, and
-sometimes you may want to treat them as errors. gMock lets you make the decision
-on a per-mock-object basis.
-
-Suppose your test uses a mock class `MockFoo`:
-
-```cpp
-TEST(...) {
-  MockFoo mock_foo;
-  EXPECT_CALL(mock_foo, DoThis());
-  ... code that uses mock_foo ...
-}
-```
-
-If a method of `mock_foo` other than `DoThis()` is called, you will get a
-warning. However, if you rewrite your test to use `NiceMock<MockFoo>` instead,
-you can suppress the warning:
-
-```cpp
-using ::testing::NiceMock;
-
-TEST(...) {
-  NiceMock<MockFoo> mock_foo;
-  EXPECT_CALL(mock_foo, DoThis());
-  ... code that uses mock_foo ...
-}
-```
-
-`NiceMock<MockFoo>` is a subclass of `MockFoo`, so it can be used wherever
-`MockFoo` is accepted.
-
-It also works if `MockFoo`'s constructor takes some arguments, as
-`NiceMock<MockFoo>` "inherits" `MockFoo`'s constructors:
-
-```cpp
-using ::testing::NiceMock;
-
-TEST(...) {
-  NiceMock<MockFoo> mock_foo(5, "hi");  // Calls MockFoo(5, "hi").
-  EXPECT_CALL(mock_foo, DoThis());
-  ... code that uses mock_foo ...
-}
-```
-
-The usage of `StrictMock` is similar, except that it makes all uninteresting
-calls failures:
-
-```cpp
-using ::testing::StrictMock;
-
-TEST(...) {
-  StrictMock<MockFoo> mock_foo;
-  EXPECT_CALL(mock_foo, DoThis());
-  ... code that uses mock_foo ...
-
-  // The test will fail if a method of mock_foo other than DoThis()
-  // is called.
-}
-```
-
-{: .callout .note}
-NOTE: `NiceMock` and `StrictMock` only affects *uninteresting* calls (calls of
-*methods* with no expectations); they do not affect *unexpected* calls (calls of
-methods with expectations, but they don't match). See
-[Understanding Uninteresting vs Unexpected Calls](#uninteresting-vs-unexpected).
-
-There are some caveats though (sadly they are side effects of C++'s
-limitations):
-
-1.  `NiceMock<MockFoo>` and `StrictMock<MockFoo>` only work for mock methods
-    defined using the `MOCK_METHOD` macro **directly** in the `MockFoo` class.
-    If a mock method is defined in a **base class** of `MockFoo`, the "nice" or
-    "strict" modifier may not affect it, depending on the compiler. In
-    particular, nesting `NiceMock` and `StrictMock` (e.g.
-    `NiceMock<StrictMock<MockFoo> >`) is **not** supported.
-2.  `NiceMock<MockFoo>` and `StrictMock<MockFoo>` may not work correctly if the
-    destructor of `MockFoo` is not virtual. We would like to fix this, but it
-    requires cleaning up existing tests.
-
-Finally, you should be **very cautious** about when to use naggy or strict
-mocks, as they tend to make tests more brittle and harder to maintain. When you
-refactor your code without changing its externally visible behavior, ideally you
-shouldn't need to update any tests. If your code interacts with a naggy mock,
-however, you may start to get spammed with warnings as the result of your
-change. Worse, if your code interacts with a strict mock, your tests may start
-to fail and you'll be forced to fix them. Our general recommendation is to use
-nice mocks (not yet the default) most of the time, use naggy mocks (the current
-default) when developing or debugging tests, and use strict mocks only as the
-last resort.
-
-### Simplifying the Interface without Breaking Existing Code {#SimplerInterfaces}
-
-Sometimes a method has a long list of arguments that is mostly uninteresting.
-For example:
-
-```cpp
-class LogSink {
- public:
-  ...
-  virtual void send(LogSeverity severity, const char* full_filename,
-                    const char* base_filename, int line,
-                    const struct tm* tm_time,
-                    const char* message, size_t message_len) = 0;
-};
-```
-
-This method's argument list is lengthy and hard to work with (the `message`
-argument is not even 0-terminated). If we mock it as is, using the mock will be
-awkward. If, however, we try to simplify this interface, we'll need to fix all
-clients depending on it, which is often infeasible.
-
-The trick is to redispatch the method in the mock class:
-
-```cpp
-class ScopedMockLog : public LogSink {
- public:
-  ...
-  void send(LogSeverity severity, const char* full_filename,
-                    const char* base_filename, int line, const tm* tm_time,
-                    const char* message, size_t message_len) override {
-    // We are only interested in the log severity, full file name, and
-    // log message.
-    Log(severity, full_filename, std::string(message, message_len));
-  }
-
-  // Implements the mock method:
-  //
-  //   void Log(LogSeverity severity,
-  //            const string& file_path,
-  //            const string& message);
-  MOCK_METHOD(void, Log,
-              (LogSeverity severity, const string& file_path,
-               const string& message));
-};
-```
-
-By defining a new mock method with a trimmed argument list, we make the mock
-class more user-friendly.
-
-This technique may also be applied to make overloaded methods more amenable to
-mocking. For example, when overloads have been used to implement default
-arguments:
-
-```cpp
-class MockTurtleFactory : public TurtleFactory {
- public:
-  Turtle* MakeTurtle(int length, int weight) override { ... }
-  Turtle* MakeTurtle(int length, int weight, int speed) override { ... }
-
-  // the above methods delegate to this one:
-  MOCK_METHOD(Turtle*, DoMakeTurtle, ());
-};
-```
-
-This allows tests that don't care which overload was invoked to avoid specifying
-argument matchers:
-
-```cpp
-ON_CALL(factory, DoMakeTurtle)
-    .WillByDefault(Return(MakeMockTurtle()));
-```
-
-### Alternative to Mocking Concrete Classes
-
-Often you may find yourself using classes that don't implement interfaces. In
-order to test your code that uses such a class (let's call it `Concrete`), you
-may be tempted to make the methods of `Concrete` virtual and then mock it.
-
-Try not to do that.
-
-Making a non-virtual function virtual is a big decision. It creates an extension
-point where subclasses can tweak your class' behavior. This weakens your control
-on the class because now it's harder to maintain the class invariants. You
-should make a function virtual only when there is a valid reason for a subclass
-to override it.
-
-Mocking concrete classes directly is problematic as it creates a tight coupling
-between the class and the tests - any small change in the class may invalidate
-your tests and make test maintenance a pain.
-
-To avoid such problems, many programmers have been practicing "coding to
-interfaces": instead of talking to the `Concrete` class, your code would define
-an interface and talk to it. Then you implement that interface as an adaptor on
-top of `Concrete`. In tests, you can easily mock that interface to observe how
-your code is doing.
-
-This technique incurs some overhead:
-
-*   You pay the cost of virtual function calls (usually not a problem).
-*   There is more abstraction for the programmers to learn.
-
-However, it can also bring significant benefits in addition to better
-testability:
-
-*   `Concrete`'s API may not fit your problem domain very well, as you may not
-    be the only client it tries to serve. By designing your own interface, you
-    have a chance to tailor it to your need - you may add higher-level
-    functionalities, rename stuff, etc instead of just trimming the class. This
-    allows you to write your code (user of the interface) in a more natural way,
-    which means it will be more readable, more maintainable, and you'll be more
-    productive.
-*   If `Concrete`'s implementation ever has to change, you don't have to rewrite
-    everywhere it is used. Instead, you can absorb the change in your
-    implementation of the interface, and your other code and tests will be
-    insulated from this change.
-
-Some people worry that if everyone is practicing this technique, they will end
-up writing lots of redundant code. This concern is totally understandable.
-However, there are two reasons why it may not be the case:
-
-*   Different projects may need to use `Concrete` in different ways, so the best
-    interfaces for them will be different. Therefore, each of them will have its
-    own domain-specific interface on top of `Concrete`, and they will not be the
-    same code.
-*   If enough projects want to use the same interface, they can always share it,
-    just like they have been sharing `Concrete`. You can check in the interface
-    and the adaptor somewhere near `Concrete` (perhaps in a `contrib`
-    sub-directory) and let many projects use it.
-
-You need to weigh the pros and cons carefully for your particular problem, but
-I'd like to assure you that the Java community has been practicing this for a
-long time and it's a proven effective technique applicable in a wide variety of
-situations. :-)
-
-### Delegating Calls to a Fake {#DelegatingToFake}
-
-Some times you have a non-trivial fake implementation of an interface. For
-example:
-
-```cpp
-class Foo {
- public:
-  virtual ~Foo() {}
-  virtual char DoThis(int n) = 0;
-  virtual void DoThat(const char* s, int* p) = 0;
-};
-
-class FakeFoo : public Foo {
- public:
-  char DoThis(int n) override {
-    return (n > 0) ? '+' :
-           (n < 0) ? '-' : '0';
-  }
-
-  void DoThat(const char* s, int* p) override {
-    *p = strlen(s);
-  }
-};
-```
-
-Now you want to mock this interface such that you can set expectations on it.
-However, you also want to use `FakeFoo` for the default behavior, as duplicating
-it in the mock object is, well, a lot of work.
-
-When you define the mock class using gMock, you can have it delegate its default
-action to a fake class you already have, using this pattern:
-
-```cpp
-class MockFoo : public Foo {
- public:
-  // Normal mock method definitions using gMock.
-  MOCK_METHOD(char, DoThis, (int n), (override));
-  MOCK_METHOD(void, DoThat, (const char* s, int* p), (override));
-
-  // Delegates the default actions of the methods to a FakeFoo object.
-  // This must be called *before* the custom ON_CALL() statements.
-  void DelegateToFake() {
-    ON_CALL(*this, DoThis).WillByDefault([this](int n) {
-      return fake_.DoThis(n);
-    });
-    ON_CALL(*this, DoThat).WillByDefault([this](const char* s, int* p) {
-      fake_.DoThat(s, p);
-    });
-  }
-
- private:
-  FakeFoo fake_;  // Keeps an instance of the fake in the mock.
-};
-```
-
-With that, you can use `MockFoo` in your tests as usual. Just remember that if
-you don't explicitly set an action in an `ON_CALL()` or `EXPECT_CALL()`, the
-fake will be called upon to do it.:
-
-```cpp
-using ::testing::_;
-
-TEST(AbcTest, Xyz) {
-  MockFoo foo;
-
-  foo.DelegateToFake();  // Enables the fake for delegation.
-
-  // Put your ON_CALL(foo, ...)s here, if any.
-
-  // No action specified, meaning to use the default action.
-  EXPECT_CALL(foo, DoThis(5));
-  EXPECT_CALL(foo, DoThat(_, _));
-
-  int n = 0;
-  EXPECT_EQ('+', foo.DoThis(5));  // FakeFoo::DoThis() is invoked.
-  foo.DoThat("Hi", &n);  // FakeFoo::DoThat() is invoked.
-  EXPECT_EQ(2, n);
-}
-```
-
-**Some tips:**
-
-*   If you want, you can still override the default action by providing your own
-    `ON_CALL()` or using `.WillOnce()` / `.WillRepeatedly()` in `EXPECT_CALL()`.
-*   In `DelegateToFake()`, you only need to delegate the methods whose fake
-    implementation you intend to use.
-
-*   The general technique discussed here works for overloaded methods, but
-    you'll need to tell the compiler which version you mean. To disambiguate a
-    mock function (the one you specify inside the parentheses of `ON_CALL()`),
-    use [this technique](#SelectOverload); to disambiguate a fake function (the
-    one you place inside `Invoke()`), use a `static_cast` to specify the
-    function's type. For instance, if class `Foo` has methods `char DoThis(int
-    n)` and `bool DoThis(double x) const`, and you want to invoke the latter,
-    you need to write `Invoke(&fake_, static_cast<bool (FakeFoo::*)(double)
-    const>(&FakeFoo::DoThis))` instead of `Invoke(&fake_, &FakeFoo::DoThis)`
-    (The strange-looking thing inside the angled brackets of `static_cast` is
-    the type of a function pointer to the second `DoThis()` method.).
-
-*   Having to mix a mock and a fake is often a sign of something gone wrong.
-    Perhaps you haven't got used to the interaction-based way of testing yet. Or
-    perhaps your interface is taking on too many roles and should be split up.
-    Therefore, **don't abuse this**. We would only recommend to do it as an
-    intermediate step when you are refactoring your code.
-
-Regarding the tip on mixing a mock and a fake, here's an example on why it may
-be a bad sign: Suppose you have a class `System` for low-level system
-operations. In particular, it does file and I/O operations. And suppose you want
-to test how your code uses `System` to do I/O, and you just want the file
-operations to work normally. If you mock out the entire `System` class, you'll
-have to provide a fake implementation for the file operation part, which
-suggests that `System` is taking on too many roles.
-
-Instead, you can define a `FileOps` interface and an `IOOps` interface and split
-`System`'s functionalities into the two. Then you can mock `IOOps` without
-mocking `FileOps`.
-
-### Delegating Calls to a Real Object
-
-When using testing doubles (mocks, fakes, stubs, and etc), sometimes their
-behaviors will differ from those of the real objects. This difference could be
-either intentional (as in simulating an error such that you can test the error
-handling code) or unintentional. If your mocks have different behaviors than the
-real objects by mistake, you could end up with code that passes the tests but
-fails in production.
-
-You can use the *delegating-to-real* technique to ensure that your mock has the
-same behavior as the real object while retaining the ability to validate calls.
-This technique is very similar to the [delegating-to-fake](#DelegatingToFake)
-technique, the difference being that we use a real object instead of a fake.
-Here's an example:
-
-```cpp
-using ::testing::AtLeast;
-
-class MockFoo : public Foo {
- public:
-  MockFoo() {
-    // By default, all calls are delegated to the real object.
-    ON_CALL(*this, DoThis).WillByDefault([this](int n) {
-      return real_.DoThis(n);
-    });
-    ON_CALL(*this, DoThat).WillByDefault([this](const char* s, int* p) {
-      real_.DoThat(s, p);
-    });
-    ...
-  }
-  MOCK_METHOD(char, DoThis, ...);
-  MOCK_METHOD(void, DoThat, ...);
-  ...
- private:
-  Foo real_;
-};
-
-...
-  MockFoo mock;
-  EXPECT_CALL(mock, DoThis())
-      .Times(3);
-  EXPECT_CALL(mock, DoThat("Hi"))
-      .Times(AtLeast(1));
-  ... use mock in test ...
-```
-
-With this, gMock will verify that your code made the right calls (with the right
-arguments, in the right order, called the right number of times, etc), and a
-real object will answer the calls (so the behavior will be the same as in
-production). This gives you the best of both worlds.
-
-### Delegating Calls to a Parent Class
-
-Ideally, you should code to interfaces, whose methods are all pure virtual. In
-reality, sometimes you do need to mock a virtual method that is not pure (i.e,
-it already has an implementation). For example:
-
-```cpp
-class Foo {
- public:
-  virtual ~Foo();
-
-  virtual void Pure(int n) = 0;
-  virtual int Concrete(const char* str) { ... }
-};
-
-class MockFoo : public Foo {
- public:
-  // Mocking a pure method.
-  MOCK_METHOD(void, Pure, (int n), (override));
-  // Mocking a concrete method.  Foo::Concrete() is shadowed.
-  MOCK_METHOD(int, Concrete, (const char* str), (override));
-};
-```
-
-Sometimes you may want to call `Foo::Concrete()` instead of
-`MockFoo::Concrete()`. Perhaps you want to do it as part of a stub action, or
-perhaps your test doesn't need to mock `Concrete()` at all (but it would be
-oh-so painful to have to define a new mock class whenever you don't need to mock
-one of its methods).
-
-You can call `Foo::Concrete()` inside an action by:
-
-```cpp
-...
-  EXPECT_CALL(foo, Concrete).WillOnce([&foo](const char* str) {
-    return foo.Foo::Concrete(str);
-  });
-```
-
-or tell the mock object that you don't want to mock `Concrete()`:
-
-```cpp
-...
-  ON_CALL(foo, Concrete).WillByDefault([&foo](const char* str) {
-    return foo.Foo::Concrete(str);
-  });
-```
-
-(Why don't we just write `{ return foo.Concrete(str); }`? If you do that,
-`MockFoo::Concrete()` will be called (and cause an infinite recursion) since
-`Foo::Concrete()` is virtual. That's just how C++ works.)
-
-## Using Matchers
-
-### Matching Argument Values Exactly
-
-You can specify exactly which arguments a mock method is expecting:
-
-```cpp
-using ::testing::Return;
-...
-  EXPECT_CALL(foo, DoThis(5))
-      .WillOnce(Return('a'));
-  EXPECT_CALL(foo, DoThat("Hello", bar));
-```
-
-### Using Simple Matchers
-
-You can use matchers to match arguments that have a certain property:
-
-```cpp
-using ::testing::NotNull;
-using ::testing::Return;
-...
-  EXPECT_CALL(foo, DoThis(Ge(5)))  // The argument must be >= 5.
-      .WillOnce(Return('a'));
-  EXPECT_CALL(foo, DoThat("Hello", NotNull()));
-      // The second argument must not be NULL.
-```
-
-A frequently used matcher is `_`, which matches anything:
-
-```cpp
-  EXPECT_CALL(foo, DoThat(_, NotNull()));
-```
-
-### Combining Matchers {#CombiningMatchers}
-
-You can build complex matchers from existing ones using `AllOf()`,
-`AllOfArray()`, `AnyOf()`, `AnyOfArray()` and `Not()`:
-
-```cpp
-using ::testing::AllOf;
-using ::testing::Gt;
-using ::testing::HasSubstr;
-using ::testing::Ne;
-using ::testing::Not;
-...
-  // The argument must be > 5 and != 10.
-  EXPECT_CALL(foo, DoThis(AllOf(Gt(5),
-                                Ne(10))));
-
-  // The first argument must not contain sub-string "blah".
-  EXPECT_CALL(foo, DoThat(Not(HasSubstr("blah")),
-                          NULL));
-```
-
-Matchers are function objects, and parametrized matchers can be composed just
-like any other function. However because their types can be long and rarely
-provide meaningful information, it can be easier to express them with C++14
-generic lambdas to avoid specifying types. For example,
-
-```cpp
-using ::testing::Contains;
-using ::testing::Property;
-
-inline constexpr auto HasFoo = [](const auto& f) {
-  return Property("foo", &MyClass::foo, Contains(f));
-};
-...
-  EXPECT_THAT(x, HasFoo("blah"));
-```
-
-### Casting Matchers {#SafeMatcherCast}
-
-gMock matchers are statically typed, meaning that the compiler can catch your
-mistake if you use a matcher of the wrong type (for example, if you use `Eq(5)`
-to match a `string` argument). Good for you!
-
-Sometimes, however, you know what you're doing and want the compiler to give you
-some slack. One example is that you have a matcher for `long` and the argument
-you want to match is `int`. While the two types aren't exactly the same, there
-is nothing really wrong with using a `Matcher<long>` to match an `int` - after
-all, we can first convert the `int` argument to a `long` losslessly before
-giving it to the matcher.
-
-To support this need, gMock gives you the `SafeMatcherCast<T>(m)` function. It
-casts a matcher `m` to type `Matcher<T>`. To ensure safety, gMock checks that
-(let `U` be the type `m` accepts :
-
-1.  Type `T` can be *implicitly* cast to type `U`;
-2.  When both `T` and `U` are built-in arithmetic types (`bool`, integers, and
-    floating-point numbers), the conversion from `T` to `U` is not lossy (in
-    other words, any value representable by `T` can also be represented by `U`);
-    and
-3.  When `U` is a reference, `T` must also be a reference (as the underlying
-    matcher may be interested in the address of the `U` value).
-
-The code won't compile if any of these conditions isn't met.
-
-Here's one example:
-
-```cpp
-using ::testing::SafeMatcherCast;
-
-// A base class and a child class.
-class Base { ... };
-class Derived : public Base { ... };
-
-class MockFoo : public Foo {
- public:
-  MOCK_METHOD(void, DoThis, (Derived* derived), (override));
-};
-
-...
-  MockFoo foo;
-  // m is a Matcher<Base*> we got from somewhere.
-  EXPECT_CALL(foo, DoThis(SafeMatcherCast<Derived*>(m)));
-```
-
-If you find `SafeMatcherCast<T>(m)` too limiting, you can use a similar function
-`MatcherCast<T>(m)`. The difference is that `MatcherCast` works as long as you
-can `static_cast` type `T` to type `U`.
-
-`MatcherCast` essentially lets you bypass C++'s type system (`static_cast` isn't
-always safe as it could throw away information, for example), so be careful not
-to misuse/abuse it.
-
-### Selecting Between Overloaded Functions {#SelectOverload}
-
-If you expect an overloaded function to be called, the compiler may need some
-help on which overloaded version it is.
-
-To disambiguate functions overloaded on the const-ness of this object, use the
-`Const()` argument wrapper.
-
-```cpp
-using ::testing::ReturnRef;
-
-class MockFoo : public Foo {
-  ...
-  MOCK_METHOD(Bar&, GetBar, (), (override));
-  MOCK_METHOD(const Bar&, GetBar, (), (const, override));
-};
-
-...
-  MockFoo foo;
-  Bar bar1, bar2;
-  EXPECT_CALL(foo, GetBar())         // The non-const GetBar().
-      .WillOnce(ReturnRef(bar1));
-  EXPECT_CALL(Const(foo), GetBar())  // The const GetBar().
-      .WillOnce(ReturnRef(bar2));
-```
-
-(`Const()` is defined by gMock and returns a `const` reference to its argument.)
-
-To disambiguate overloaded functions with the same number of arguments but
-different argument types, you may need to specify the exact type of a matcher,
-either by wrapping your matcher in `Matcher<type>()`, or using a matcher whose
-type is fixed (`TypedEq<type>`, `An<type>()`, etc):
-
-```cpp
-using ::testing::An;
-using ::testing::Matcher;
-using ::testing::TypedEq;
-
-class MockPrinter : public Printer {
- public:
-  MOCK_METHOD(void, Print, (int n), (override));
-  MOCK_METHOD(void, Print, (char c), (override));
-};
-
-TEST(PrinterTest, Print) {
-  MockPrinter printer;
-
-  EXPECT_CALL(printer, Print(An<int>()));            // void Print(int);
-  EXPECT_CALL(printer, Print(Matcher<int>(Lt(5))));  // void Print(int);
-  EXPECT_CALL(printer, Print(TypedEq<char>('a')));   // void Print(char);
-
-  printer.Print(3);
-  printer.Print(6);
-  printer.Print('a');
-}
-```
-
-### Performing Different Actions Based on the Arguments
-
-When a mock method is called, the *last* matching expectation that's still
-active will be selected (think "newer overrides older"). So, you can make a
-method do different things depending on its argument values like this:
-
-```cpp
-using ::testing::_;
-using ::testing::Lt;
-using ::testing::Return;
-...
-  // The default case.
-  EXPECT_CALL(foo, DoThis(_))
-      .WillRepeatedly(Return('b'));
-  // The more specific case.
-  EXPECT_CALL(foo, DoThis(Lt(5)))
-      .WillRepeatedly(Return('a'));
-```
-
-Now, if `foo.DoThis()` is called with a value less than 5, `'a'` will be
-returned; otherwise `'b'` will be returned.
-
-### Matching Multiple Arguments as a Whole
-
-Sometimes it's not enough to match the arguments individually. For example, we
-may want to say that the first argument must be less than the second argument.
-The `With()` clause allows us to match all arguments of a mock function as a
-whole. For example,
-
-```cpp
-using ::testing::_;
-using ::testing::Ne;
-using ::testing::Lt;
-...
-  EXPECT_CALL(foo, InRange(Ne(0), _))
-      .With(Lt());
-```
-
-says that the first argument of `InRange()` must not be 0, and must be less than
-the second argument.
-
-The expression inside `With()` must be a matcher of type `Matcher<std::tuple<A1,
-..., An>>`, where `A1`, ..., `An` are the types of the function arguments.
-
-You can also write `AllArgs(m)` instead of `m` inside `.With()`. The two forms
-are equivalent, but `.With(AllArgs(Lt()))` is more readable than `.With(Lt())`.
-
-You can use `Args<k1, ..., kn>(m)` to match the `n` selected arguments (as a
-tuple) against `m`. For example,
-
-```cpp
-using ::testing::_;
-using ::testing::AllOf;
-using ::testing::Args;
-using ::testing::Lt;
-...
-  EXPECT_CALL(foo, Blah)
-      .With(AllOf(Args<0, 1>(Lt()), Args<1, 2>(Lt())));
-```
-
-says that `Blah` will be called with arguments `x`, `y`, and `z` where `x < y <
-z`. Note that in this example, it wasn't necessary to specify the positional
-matchers.
-
-As a convenience and example, gMock provides some matchers for 2-tuples,
-including the `Lt()` matcher above. See
-[Multi-argument Matchers](reference/matchers.md#MultiArgMatchers) for the
-complete list.
-
-Note that if you want to pass the arguments to a predicate of your own (e.g.
-`.With(Args<0, 1>(Truly(&MyPredicate)))`), that predicate MUST be written to
-take a `std::tuple` as its argument; gMock will pass the `n` selected arguments
-as *one* single tuple to the predicate.
-
-### Using Matchers as Predicates
-
-Have you noticed that a matcher is just a fancy predicate that also knows how to
-describe itself? Many existing algorithms take predicates as arguments (e.g.
-those defined in STL's `<algorithm>` header), and it would be a shame if gMock
-matchers were not allowed to participate.
-
-Luckily, you can use a matcher where a unary predicate functor is expected by
-wrapping it inside the `Matches()` function. For example,
-
-```cpp
-#include <algorithm>
-#include <vector>
-
-using ::testing::Matches;
-using ::testing::Ge;
-
-vector<int> v;
-...
-// How many elements in v are >= 10?
-const int count = count_if(v.begin(), v.end(), Matches(Ge(10)));
-```
-
-Since you can build complex matchers from simpler ones easily using gMock, this
-gives you a way to conveniently construct composite predicates (doing the same
-using STL's `<functional>` header is just painful). For example, here's a
-predicate that's satisfied by any number that is >= 0, <= 100, and != 50:
-
-```cpp
-using testing::AllOf;
-using testing::Ge;
-using testing::Le;
-using testing::Matches;
-using testing::Ne;
-...
-Matches(AllOf(Ge(0), Le(100), Ne(50)))
-```
-
-### Using Matchers in googletest Assertions
-
-See [`EXPECT_THAT`](reference/assertions.md#EXPECT_THAT) in the Assertions
-Reference.
-
-### Using Predicates as Matchers
-
-gMock provides a set of built-in matchers for matching arguments with expected
-values—see the [Matchers Reference](reference/matchers.md) for more information.
-In case you find the built-in set lacking, you can use an arbitrary unary
-predicate function or functor as a matcher - as long as the predicate accepts a
-value of the type you want. You do this by wrapping the predicate inside the
-`Truly()` function, for example:
-
-```cpp
-using ::testing::Truly;
-
-int IsEven(int n) { return (n % 2) == 0 ? 1 : 0; }
-...
-  // Bar() must be called with an even number.
-  EXPECT_CALL(foo, Bar(Truly(IsEven)));
-```
-
-Note that the predicate function / functor doesn't have to return `bool`. It
-works as long as the return value can be used as the condition in the statement
-`if (condition) ...`.
-
-### Matching Arguments that Are Not Copyable
-
-When you do an `EXPECT_CALL(mock_obj, Foo(bar))`, gMock saves away a copy of
-`bar`. When `Foo()` is called later, gMock compares the argument to `Foo()` with
-the saved copy of `bar`. This way, you don't need to worry about `bar` being
-modified or destroyed after the `EXPECT_CALL()` is executed. The same is true
-when you use matchers like `Eq(bar)`, `Le(bar)`, and so on.
-
-But what if `bar` cannot be copied (i.e. has no copy constructor)? You could
-define your own matcher function or callback and use it with `Truly()`, as the
-previous couple of recipes have shown. Or, you may be able to get away from it
-if you can guarantee that `bar` won't be changed after the `EXPECT_CALL()` is
-executed. Just tell gMock that it should save a reference to `bar`, instead of a
-copy of it. Here's how:
-
-```cpp
-using ::testing::Eq;
-using ::testing::Lt;
-...
-  // Expects that Foo()'s argument == bar.
-  EXPECT_CALL(mock_obj, Foo(Eq(std::ref(bar))));
-
-  // Expects that Foo()'s argument < bar.
-  EXPECT_CALL(mock_obj, Foo(Lt(std::ref(bar))));
-```
-
-Remember: if you do this, don't change `bar` after the `EXPECT_CALL()`, or the
-result is undefined.
-
-### Validating a Member of an Object
-
-Often a mock function takes a reference to object as an argument. When matching
-the argument, you may not want to compare the entire object against a fixed
-object, as that may be over-specification. Instead, you may need to validate a
-certain member variable or the result of a certain getter method of the object.
-You can do this with `Field()` and `Property()`. More specifically,
-
-```cpp
-Field(&Foo::bar, m)
-```
-
-is a matcher that matches a `Foo` object whose `bar` member variable satisfies
-matcher `m`.
-
-```cpp
-Property(&Foo::baz, m)
-```
-
-is a matcher that matches a `Foo` object whose `baz()` method returns a value
-that satisfies matcher `m`.
-
-For example:
-
-| Expression                   | Description                              |
-| :--------------------------- | :--------------------------------------- |
-| `Field(&Foo::number, Ge(3))` | Matches `x` where `x.number >= 3`.       |
-| `Property(&Foo::name,  StartsWith("John "))` | Matches `x` where `x.name()` starts with  `"John "`. |
-
-Note that in `Property(&Foo::baz, ...)`, method `baz()` must take no argument
-and be declared as `const`. Don't use `Property()` against member functions that
-you do not own, because taking addresses of functions is fragile and generally
-not part of the contract of the function.
-
-`Field()` and `Property()` can also match plain pointers to objects. For
-instance,
-
-```cpp
-using ::testing::Field;
-using ::testing::Ge;
-...
-Field(&Foo::number, Ge(3))
-```
-
-matches a plain pointer `p` where `p->number >= 3`. If `p` is `NULL`, the match
-will always fail regardless of the inner matcher.
-
-What if you want to validate more than one members at the same time? Remember
-that there are [`AllOf()` and `AllOfArray()`](#CombiningMatchers).
-
-Finally `Field()` and `Property()` provide overloads that take the field or
-property names as the first argument to include it in the error message. This
-can be useful when creating combined matchers.
-
-```cpp
-using ::testing::AllOf;
-using ::testing::Field;
-using ::testing::Matcher;
-using ::testing::SafeMatcherCast;
-
-Matcher<Foo> IsFoo(const Foo& foo) {
-  return AllOf(Field("some_field", &Foo::some_field, foo.some_field),
-               Field("other_field", &Foo::other_field, foo.other_field),
-               Field("last_field", &Foo::last_field, foo.last_field));
-}
-```
-
-### Validating the Value Pointed to by a Pointer Argument
-
-C++ functions often take pointers as arguments. You can use matchers like
-`IsNull()`, `NotNull()`, and other comparison matchers to match a pointer, but
-what if you want to make sure the value *pointed to* by the pointer, instead of
-the pointer itself, has a certain property? Well, you can use the `Pointee(m)`
-matcher.
-
-`Pointee(m)` matches a pointer if and only if `m` matches the value the pointer
-points to. For example:
-
-```cpp
-using ::testing::Ge;
-using ::testing::Pointee;
-...
-  EXPECT_CALL(foo, Bar(Pointee(Ge(3))));
-```
-
-expects `foo.Bar()` to be called with a pointer that points to a value greater
-than or equal to 3.
-
-One nice thing about `Pointee()` is that it treats a `NULL` pointer as a match
-failure, so you can write `Pointee(m)` instead of
-
-```cpp
-using ::testing::AllOf;
-using ::testing::NotNull;
-using ::testing::Pointee;
-...
-  AllOf(NotNull(), Pointee(m))
-```
-
-without worrying that a `NULL` pointer will crash your test.
-
-Also, did we tell you that `Pointee()` works with both raw pointers **and**
-smart pointers (`std::unique_ptr`, `std::shared_ptr`, etc)?
-
-What if you have a pointer to pointer? You guessed it - you can use nested
-`Pointee()` to probe deeper inside the value. For example,
-`Pointee(Pointee(Lt(3)))` matches a pointer that points to a pointer that points
-to a number less than 3 (what a mouthful...).
-
-### Defining a Custom Matcher Class {#CustomMatcherClass}
-
-Most matchers can be simply defined using [the MATCHER* macros](#NewMatchers),
-which are terse and flexible, and produce good error messages. However, these
-macros are not very explicit about the interfaces they create and are not always
-suitable, especially for matchers that will be widely reused.
-
-For more advanced cases, you may need to define your own matcher class. A custom
-matcher allows you to test a specific invariant property of that object. Let's
-take a look at how to do so.
-
-Imagine you have a mock function that takes an object of type `Foo`, which has
-an `int bar()` method and an `int baz()` method. You want to constrain that the
-argument's `bar()` value plus its `baz()` value is a given number. (This is an
-invariant.) Here's how we can write and use a matcher class to do so:
-
-```cpp
-class BarPlusBazEqMatcher {
- public:
-  using is_gtest_matcher = void;
-
-  explicit BarPlusBazEqMatcher(int expected_sum)
-      : expected_sum_(expected_sum) {}
-
-  bool MatchAndExplain(const Foo& foo,
-                       std::ostream* /* listener */) const {
-    return (foo.bar() + foo.baz()) == expected_sum_;
-  }
-
-  void DescribeTo(std::ostream* os) const {
-    *os << "bar() + baz() equals " << expected_sum_;
-  }
-
-  void DescribeNegationTo(std::ostream* os) const {
-    *os << "bar() + baz() does not equal " << expected_sum_;
-  }
- private:
-  const int expected_sum_;
-};
-
-::testing::Matcher<const Foo&> BarPlusBazEq(int expected_sum) {
-  return BarPlusBazEqMatcher(expected_sum);
-}
-
-...
-  Foo foo;
-  EXPECT_THAT(foo, BarPlusBazEq(5))...;
-```
-
-### Matching Containers
-
-Sometimes an STL container (e.g. list, vector, map, ...) is passed to a mock
-function and you may want to validate it. Since most STL containers support the
-`==` operator, you can write `Eq(expected_container)` or simply
-`expected_container` to match a container exactly.
-
-Sometimes, though, you may want to be more flexible (for example, the first
-element must be an exact match, but the second element can be any positive
-number, and so on). Also, containers used in tests often have a small number of
-elements, and having to define the expected container out-of-line is a bit of a
-hassle.
-
-You can use the `ElementsAre()` or `UnorderedElementsAre()` matcher in such
-cases:
-
-```cpp
-using ::testing::_;
-using ::testing::ElementsAre;
-using ::testing::Gt;
-...
-  MOCK_METHOD(void, Foo, (const vector<int>& numbers), (override));
-...
-  EXPECT_CALL(mock, Foo(ElementsAre(1, Gt(0), _, 5)));
-```
-
-The above matcher says that the container must have 4 elements, which must be 1,
-greater than 0, anything, and 5 respectively.
-
-If you instead write:
-
-```cpp
-using ::testing::_;
-using ::testing::Gt;
-using ::testing::UnorderedElementsAre;
-...
-  MOCK_METHOD(void, Foo, (const vector<int>& numbers), (override));
-...
-  EXPECT_CALL(mock, Foo(UnorderedElementsAre(1, Gt(0), _, 5)));
-```
-
-It means that the container must have 4 elements, which (under some permutation)
-must be 1, greater than 0, anything, and 5 respectively.
-
-As an alternative you can place the arguments in a C-style array and use
-`ElementsAreArray()` or `UnorderedElementsAreArray()` instead:
-
-```cpp
-using ::testing::ElementsAreArray;
-...
-  // ElementsAreArray accepts an array of element values.
-  const int expected_vector1[] = {1, 5, 2, 4, ...};
-  EXPECT_CALL(mock, Foo(ElementsAreArray(expected_vector1)));
-
-  // Or, an array of element matchers.
-  Matcher<int> expected_vector2[] = {1, Gt(2), _, 3, ...};
-  EXPECT_CALL(mock, Foo(ElementsAreArray(expected_vector2)));
-```
-
-In case the array needs to be dynamically created (and therefore the array size
-cannot be inferred by the compiler), you can give `ElementsAreArray()` an
-additional argument to specify the array size:
-
-```cpp
-using ::testing::ElementsAreArray;
-...
-  int* const expected_vector3 = new int[count];
-  ... fill expected_vector3 with values ...
-  EXPECT_CALL(mock, Foo(ElementsAreArray(expected_vector3, count)));
-```
-
-Use `Pair` when comparing maps or other associative containers.
-
-{% raw %}
-
-```cpp
-using ::testing::UnorderedElementsAre;
-using ::testing::Pair;
-...
-  absl::flat_hash_map<string, int> m = {{"a", 1}, {"b", 2}, {"c", 3}};
-  EXPECT_THAT(m, UnorderedElementsAre(
-      Pair("a", 1), Pair("b", 2), Pair("c", 3)));
-```
-
-{% endraw %}
-
-**Tips:**
-
-*   `ElementsAre*()` can be used to match *any* container that implements the
-    STL iterator pattern (i.e. it has a `const_iterator` type and supports
-    `begin()/end()`), not just the ones defined in STL. It will even work with
-    container types yet to be written - as long as they follows the above
-    pattern.
-*   You can use nested `ElementsAre*()` to match nested (multi-dimensional)
-    containers.
-*   If the container is passed by pointer instead of by reference, just write
-    `Pointee(ElementsAre*(...))`.
-*   The order of elements *matters* for `ElementsAre*()`. If you are using it
-    with containers whose element order are undefined (such as a
-    `std::unordered_map`) you should use `UnorderedElementsAre`.
-
-### Sharing Matchers
-
-Under the hood, a gMock matcher object consists of a pointer to a ref-counted
-implementation object. Copying matchers is allowed and very efficient, as only
-the pointer is copied. When the last matcher that references the implementation
-object dies, the implementation object will be deleted.
-
-Therefore, if you have some complex matcher that you want to use again and
-again, there is no need to build it every time. Just assign it to a matcher
-variable and use that variable repeatedly! For example,
-
-```cpp
-using ::testing::AllOf;
-using ::testing::Gt;
-using ::testing::Le;
-using ::testing::Matcher;
-...
-  Matcher<int> in_range = AllOf(Gt(5), Le(10));
-  ... use in_range as a matcher in multiple EXPECT_CALLs ...
-```
-
-### Matchers must have no side-effects {#PureMatchers}
-
-{: .callout .warning}
-WARNING: gMock does not guarantee when or how many times a matcher will be
-invoked. Therefore, all matchers must be *purely functional*: they cannot have
-any side effects, and the match result must not depend on anything other than
-the matcher's parameters and the value being matched.
-
-This requirement must be satisfied no matter how a matcher is defined (e.g., if
-it is one of the standard matchers, or a custom matcher). In particular, a
-matcher can never call a mock function, as that will affect the state of the
-mock object and gMock.
-
-## Setting Expectations
-
-### Knowing When to Expect {#UseOnCall}
-
-**`ON_CALL`** is likely the *single most under-utilized construct* in gMock.
-
-There are basically two constructs for defining the behavior of a mock object:
-`ON_CALL` and `EXPECT_CALL`. The difference? `ON_CALL` defines what happens when
-a mock method is called, but <em>doesn't imply any expectation on the method
-being called</em>. `EXPECT_CALL` not only defines the behavior, but also sets an
-expectation that <em>the method will be called with the given arguments, for the
-given number of times</em> (and *in the given order* when you specify the order
-too).
-
-Since `EXPECT_CALL` does more, isn't it better than `ON_CALL`? Not really. Every
-`EXPECT_CALL` adds a constraint on the behavior of the code under test. Having
-more constraints than necessary is *baaad* - even worse than not having enough
-constraints.
-
-This may be counter-intuitive. How could tests that verify more be worse than
-tests that verify less? Isn't verification the whole point of tests?
-
-The answer lies in *what* a test should verify. **A good test verifies the
-contract of the code.** If a test over-specifies, it doesn't leave enough
-freedom to the implementation. As a result, changing the implementation without
-breaking the contract (e.g. refactoring and optimization), which should be
-perfectly fine to do, can break such tests. Then you have to spend time fixing
-them, only to see them broken again the next time the implementation is changed.
-
-Keep in mind that one doesn't have to verify more than one property in one test.
-In fact, **it's a good style to verify only one thing in one test.** If you do
-that, a bug will likely break only one or two tests instead of dozens (which
-case would you rather debug?). If you are also in the habit of giving tests
-descriptive names that tell what they verify, you can often easily guess what's
-wrong just from the test log itself.
-
-So use `ON_CALL` by default, and only use `EXPECT_CALL` when you actually intend
-to verify that the call is made. For example, you may have a bunch of `ON_CALL`s
-in your test fixture to set the common mock behavior shared by all tests in the
-same group, and write (scarcely) different `EXPECT_CALL`s in different `TEST_F`s
-to verify different aspects of the code's behavior. Compared with the style
-where each `TEST` has many `EXPECT_CALL`s, this leads to tests that are more
-resilient to implementational changes (and thus less likely to require
-maintenance) and makes the intent of the tests more obvious (so they are easier
-to maintain when you do need to maintain them).
-
-If you are bothered by the "Uninteresting mock function call" message printed
-when a mock method without an `EXPECT_CALL` is called, you may use a `NiceMock`
-instead to suppress all such messages for the mock object, or suppress the
-message for specific methods by adding `EXPECT_CALL(...).Times(AnyNumber())`. DO
-NOT suppress it by blindly adding an `EXPECT_CALL(...)`, or you'll have a test
-that's a pain to maintain.
-
-### Ignoring Uninteresting Calls
-
-If you are not interested in how a mock method is called, just don't say
-anything about it. In this case, if the method is ever called, gMock will
-perform its default action to allow the test program to continue. If you are not
-happy with the default action taken by gMock, you can override it using
-`DefaultValue<T>::Set()` (described [here](#DefaultValue)) or `ON_CALL()`.
-
-Please note that once you expressed interest in a particular mock method (via
-`EXPECT_CALL()`), all invocations to it must match some expectation. If this
-function is called but the arguments don't match any `EXPECT_CALL()` statement,
-it will be an error.
-
-### Disallowing Unexpected Calls
-
-If a mock method shouldn't be called at all, explicitly say so:
-
-```cpp
-using ::testing::_;
-...
-  EXPECT_CALL(foo, Bar(_))
-      .Times(0);
-```
-
-If some calls to the method are allowed, but the rest are not, just list all the
-expected calls:
-
-```cpp
-using ::testing::AnyNumber;
-using ::testing::Gt;
-...
-  EXPECT_CALL(foo, Bar(5));
-  EXPECT_CALL(foo, Bar(Gt(10)))
-      .Times(AnyNumber());
-```
-
-A call to `foo.Bar()` that doesn't match any of the `EXPECT_CALL()` statements
-will be an error.
-
-### Understanding Uninteresting vs Unexpected Calls {#uninteresting-vs-unexpected}
-
-*Uninteresting* calls and *unexpected* calls are different concepts in gMock.
-*Very* different.
-
-A call `x.Y(...)` is **uninteresting** if there's *not even a single*
-`EXPECT_CALL(x, Y(...))` set. In other words, the test isn't interested in the
-`x.Y()` method at all, as evident in that the test doesn't care to say anything
-about it.
-
-A call `x.Y(...)` is **unexpected** if there are *some* `EXPECT_CALL(x,
-Y(...))`s set, but none of them matches the call. Put another way, the test is
-interested in the `x.Y()` method (therefore it explicitly sets some
-`EXPECT_CALL` to verify how it's called); however, the verification fails as the
-test doesn't expect this particular call to happen.
-
-**An unexpected call is always an error,** as the code under test doesn't behave
-the way the test expects it to behave.
-
-**By default, an uninteresting call is not an error,** as it violates no
-constraint specified by the test. (gMock's philosophy is that saying nothing
-means there is no constraint.) However, it leads to a warning, as it *might*
-indicate a problem (e.g. the test author might have forgotten to specify a
-constraint).
-
-In gMock, `NiceMock` and `StrictMock` can be used to make a mock class "nice" or
-"strict". How does this affect uninteresting calls and unexpected calls?
-
-A **nice mock** suppresses uninteresting call *warnings*. It is less chatty than
-the default mock, but otherwise is the same. If a test fails with a default
-mock, it will also fail using a nice mock instead. And vice versa. Don't expect
-making a mock nice to change the test's result.
-
-A **strict mock** turns uninteresting call warnings into errors. So making a
-mock strict may change the test's result.
-
-Let's look at an example:
-
-```cpp
-TEST(...) {
-  NiceMock<MockDomainRegistry> mock_registry;
-  EXPECT_CALL(mock_registry, GetDomainOwner("google.com"))
-          .WillRepeatedly(Return("Larry Page"));
-
-  // Use mock_registry in code under test.
-  ... &mock_registry ...
-}
-```
-
-The sole `EXPECT_CALL` here says that all calls to `GetDomainOwner()` must have
-`"google.com"` as the argument. If `GetDomainOwner("yahoo.com")` is called, it
-will be an unexpected call, and thus an error. *Having a nice mock doesn't
-change the severity of an unexpected call.*
-
-So how do we tell gMock that `GetDomainOwner()` can be called with some other
-arguments as well? The standard technique is to add a "catch all" `EXPECT_CALL`:
-
-```cpp
-  EXPECT_CALL(mock_registry, GetDomainOwner(_))
-        .Times(AnyNumber());  // catches all other calls to this method.
-  EXPECT_CALL(mock_registry, GetDomainOwner("google.com"))
-        .WillRepeatedly(Return("Larry Page"));
-```
-
-Remember that `_` is the wildcard matcher that matches anything. With this, if
-`GetDomainOwner("google.com")` is called, it will do what the second
-`EXPECT_CALL` says; if it is called with a different argument, it will do what
-the first `EXPECT_CALL` says.
-
-Note that the order of the two `EXPECT_CALL`s is important, as a newer
-`EXPECT_CALL` takes precedence over an older one.
-
-For more on uninteresting calls, nice mocks, and strict mocks, read
-["The Nice, the Strict, and the Naggy"](#NiceStrictNaggy).
-
-### Ignoring Uninteresting Arguments {#ParameterlessExpectations}
-
-If your test doesn't care about the parameters (it only cares about the number
-or order of calls), you can often simply omit the parameter list:
-
-```cpp
-  // Expect foo.Bar( ... ) twice with any arguments.
-  EXPECT_CALL(foo, Bar).Times(2);
-
-  // Delegate to the given method whenever the factory is invoked.
-  ON_CALL(foo_factory, MakeFoo)
-      .WillByDefault(&BuildFooForTest);
-```
-
-This functionality is only available when a method is not overloaded; to prevent
-unexpected behavior it is a compilation error to try to set an expectation on a
-method where the specific overload is ambiguous. You can work around this by
-supplying a [simpler mock interface](#SimplerInterfaces) than the mocked class
-provides.
-
-This pattern is also useful when the arguments are interesting, but match logic
-is substantially complex. You can leave the argument list unspecified and use
-SaveArg actions to [save the values for later verification](#SaveArgVerify). If
-you do that, you can easily differentiate calling the method the wrong number of
-times from calling it with the wrong arguments.
-
-### Expecting Ordered Calls {#OrderedCalls}
-
-Although an `EXPECT_CALL()` statement defined later takes precedence when gMock
-tries to match a function call with an expectation, by default calls don't have
-to happen in the order `EXPECT_CALL()` statements are written. For example, if
-the arguments match the matchers in the second `EXPECT_CALL()`, but not those in
-the first and third, then the second expectation will be used.
-
-If you would rather have all calls occur in the order of the expectations, put
-the `EXPECT_CALL()` statements in a block where you define a variable of type
-`InSequence`:
-
-```cpp
-using ::testing::_;
-using ::testing::InSequence;
-
-  {
-    InSequence s;
-
-    EXPECT_CALL(foo, DoThis(5));
-    EXPECT_CALL(bar, DoThat(_))
-        .Times(2);
-    EXPECT_CALL(foo, DoThis(6));
-  }
-```
-
-In this example, we expect a call to `foo.DoThis(5)`, followed by two calls to
-`bar.DoThat()` where the argument can be anything, which are in turn followed by
-a call to `foo.DoThis(6)`. If a call occurred out-of-order, gMock will report an
-error.
-
-### Expecting Partially Ordered Calls {#PartialOrder}
-
-Sometimes requiring everything to occur in a predetermined order can lead to
-brittle tests. For example, we may care about `A` occurring before both `B` and
-`C`, but aren't interested in the relative order of `B` and `C`. In this case,
-the test should reflect our real intent, instead of being overly constraining.
-
-gMock allows you to impose an arbitrary DAG (directed acyclic graph) on the
-calls. One way to express the DAG is to use the
-[`After` clause](reference/mocking.md#EXPECT_CALL.After) of `EXPECT_CALL`.
-
-Another way is via the `InSequence()` clause (not the same as the `InSequence`
-class), which we borrowed from jMock 2. It's less flexible than `After()`, but
-more convenient when you have long chains of sequential calls, as it doesn't
-require you to come up with different names for the expectations in the chains.
-Here's how it works:
-
-If we view `EXPECT_CALL()` statements as nodes in a graph, and add an edge from
-node A to node B wherever A must occur before B, we can get a DAG. We use the
-term "sequence" to mean a directed path in this DAG. Now, if we decompose the
-DAG into sequences, we just need to know which sequences each `EXPECT_CALL()`
-belongs to in order to be able to reconstruct the original DAG.
-
-So, to specify the partial order on the expectations we need to do two things:
-first to define some `Sequence` objects, and then for each `EXPECT_CALL()` say
-which `Sequence` objects it is part of.
-
-Expectations in the same sequence must occur in the order they are written. For
-example,
-
-```cpp
-using ::testing::Sequence;
-...
-  Sequence s1, s2;
-
-  EXPECT_CALL(foo, A())
-      .InSequence(s1, s2);
-  EXPECT_CALL(bar, B())
-      .InSequence(s1);
-  EXPECT_CALL(bar, C())
-      .InSequence(s2);
-  EXPECT_CALL(foo, D())
-      .InSequence(s2);
-```
-
-specifies the following DAG (where `s1` is `A -> B`, and `s2` is `A -> C -> D`):
-
-```text
-       +---> B
-       |
-  A ---|
-       |
-       +---> C ---> D
-```
-
-This means that A must occur before B and C, and C must occur before D. There's
-no restriction about the order other than these.
-
-### Controlling When an Expectation Retires
-
-When a mock method is called, gMock only considers expectations that are still
-active. An expectation is active when created, and becomes inactive (aka
-*retires*) when a call that has to occur later has occurred. For example, in
-
-```cpp
-using ::testing::_;
-using ::testing::Sequence;
-...
-  Sequence s1, s2;
-
-  EXPECT_CALL(log, Log(WARNING, _, "File too large."))      // #1
-      .Times(AnyNumber())
-      .InSequence(s1, s2);
-  EXPECT_CALL(log, Log(WARNING, _, "Data set is empty."))   // #2
-      .InSequence(s1);
-  EXPECT_CALL(log, Log(WARNING, _, "User not found."))      // #3
-      .InSequence(s2);
-```
-
-as soon as either #2 or #3 is matched, #1 will retire. If a warning `"File too
-large."` is logged after this, it will be an error.
-
-Note that an expectation doesn't retire automatically when it's saturated. For
-example,
-
-```cpp
-using ::testing::_;
-...
-  EXPECT_CALL(log, Log(WARNING, _, _));                     // #1
-  EXPECT_CALL(log, Log(WARNING, _, "File too large."));     // #2
-```
-
-says that there will be exactly one warning with the message `"File too
-large."`. If the second warning contains this message too, #2 will match again
-and result in an upper-bound-violated error.
-
-If this is not what you want, you can ask an expectation to retire as soon as it
-becomes saturated:
-
-```cpp
-using ::testing::_;
-...
-  EXPECT_CALL(log, Log(WARNING, _, _));                     // #1
-  EXPECT_CALL(log, Log(WARNING, _, "File too large."))      // #2
-      .RetiresOnSaturation();
-```
-
-Here #2 can be used only once, so if you have two warnings with the message
-`"File too large."`, the first will match #2 and the second will match #1 -
-there will be no error.
-
-## Using Actions
-
-### Returning References from Mock Methods
-
-If a mock function's return type is a reference, you need to use `ReturnRef()`
-instead of `Return()` to return a result:
-
-```cpp
-using ::testing::ReturnRef;
-
-class MockFoo : public Foo {
- public:
-  MOCK_METHOD(Bar&, GetBar, (), (override));
-};
-...
-  MockFoo foo;
-  Bar bar;
-  EXPECT_CALL(foo, GetBar())
-      .WillOnce(ReturnRef(bar));
-...
-```
-
-### Returning Live Values from Mock Methods
-
-The `Return(x)` action saves a copy of `x` when the action is created, and
-always returns the same value whenever it's executed. Sometimes you may want to
-instead return the *live* value of `x` (i.e. its value at the time when the
-action is *executed*.). Use either `ReturnRef()` or `ReturnPointee()` for this
-purpose.
-
-If the mock function's return type is a reference, you can do it using
-`ReturnRef(x)`, as shown in the previous recipe ("Returning References from Mock
-Methods"). However, gMock doesn't let you use `ReturnRef()` in a mock function
-whose return type is not a reference, as doing that usually indicates a user
-error. So, what shall you do?
-
-Though you may be tempted, DO NOT use `std::ref()`:
-
-```cpp
-using testing::Return;
-
-class MockFoo : public Foo {
- public:
-  MOCK_METHOD(int, GetValue, (), (override));
-};
-...
-  int x = 0;
-  MockFoo foo;
-  EXPECT_CALL(foo, GetValue())
-      .WillRepeatedly(Return(std::ref(x)));  // Wrong!
-  x = 42;
-  EXPECT_EQ(42, foo.GetValue());
-```
-
-Unfortunately, it doesn't work here. The above code will fail with error:
-
-```text
-Value of: foo.GetValue()
-  Actual: 0
-Expected: 42
-```
-
-The reason is that `Return(*value*)` converts `value` to the actual return type
-of the mock function at the time when the action is *created*, not when it is
-*executed*. (This behavior was chosen for the action to be safe when `value` is
-a proxy object that references some temporary objects.) As a result,
-`std::ref(x)` is converted to an `int` value (instead of a `const int&`) when
-the expectation is set, and `Return(std::ref(x))` will always return 0.
-
-`ReturnPointee(pointer)` was provided to solve this problem specifically. It
-returns the value pointed to by `pointer` at the time the action is *executed*:
-
-```cpp
-using testing::ReturnPointee;
-...
-  int x = 0;
-  MockFoo foo;
-  EXPECT_CALL(foo, GetValue())
-      .WillRepeatedly(ReturnPointee(&x));  // Note the & here.
-  x = 42;
-  EXPECT_EQ(42, foo.GetValue());  // This will succeed now.
-```
-
-### Combining Actions
-
-Want to do more than one thing when a function is called? That's fine. `DoAll()`
-allows you to do a sequence of actions every time. Only the return value of the
-last action in the sequence will be used.
-
-```cpp
-using ::testing::_;
-using ::testing::DoAll;
-
-class MockFoo : public Foo {
- public:
-  MOCK_METHOD(bool, Bar, (int n), (override));
-};
-...
-  EXPECT_CALL(foo, Bar(_))
-      .WillOnce(DoAll(action_1,
-                      action_2,
-                      ...
-                      action_n));
-```
-
-### Verifying Complex Arguments {#SaveArgVerify}
-
-If you want to verify that a method is called with a particular argument but the
-match criteria is complex, it can be difficult to distinguish between
-cardinality failures (calling the method the wrong number of times) and argument
-match failures. Similarly, if you are matching multiple parameters, it may not
-be easy to distinguishing which argument failed to match. For example:
-
-```cpp
-  // Not ideal: this could fail because of a problem with arg1 or arg2, or maybe
-  // just the method wasn't called.
-  EXPECT_CALL(foo, SendValues(_, ElementsAre(1, 4, 4, 7), EqualsProto( ... )));
-```
-
-You can instead save the arguments and test them individually:
-
-```cpp
-  EXPECT_CALL(foo, SendValues)
-      .WillOnce(DoAll(SaveArg<1>(&actual_array), SaveArg<2>(&actual_proto)));
-  ... run the test
-  EXPECT_THAT(actual_array, ElementsAre(1, 4, 4, 7));
-  EXPECT_THAT(actual_proto, EqualsProto( ... ));
-```
-
-### Mocking Side Effects {#MockingSideEffects}
-
-Sometimes a method exhibits its effect not via returning a value but via side
-effects. For example, it may change some global state or modify an output
-argument. To mock side effects, in general you can define your own action by
-implementing `::testing::ActionInterface`.
-
-If all you need to do is to change an output argument, the built-in
-`SetArgPointee()` action is convenient:
-
-```cpp
-using ::testing::_;
-using ::testing::SetArgPointee;
-
-class MockMutator : public Mutator {
- public:
-  MOCK_METHOD(void, Mutate, (bool mutate, int* value), (override));
-  ...
-}
-...
-  MockMutator mutator;
-  EXPECT_CALL(mutator, Mutate(true, _))
-      .WillOnce(SetArgPointee<1>(5));
-```
-
-In this example, when `mutator.Mutate()` is called, we will assign 5 to the
-`int` variable pointed to by argument #1 (0-based).
-
-`SetArgPointee()` conveniently makes an internal copy of the value you pass to
-it, removing the need to keep the value in scope and alive. The implication
-however is that the value must have a copy constructor and assignment operator.
-
-If the mock method also needs to return a value as well, you can chain
-`SetArgPointee()` with `Return()` using `DoAll()`, remembering to put the
-`Return()` statement last:
-
-```cpp
-using ::testing::_;
-using ::testing::DoAll;
-using ::testing::Return;
-using ::testing::SetArgPointee;
-
-class MockMutator : public Mutator {
- public:
-  ...
-  MOCK_METHOD(bool, MutateInt, (int* value), (override));
-}
-...
-  MockMutator mutator;
-  EXPECT_CALL(mutator, MutateInt(_))
-      .WillOnce(DoAll(SetArgPointee<0>(5),
-                      Return(true)));
-```
-
-Note, however, that if you use the `ReturnOKWith()` method, it will override the
-values provided by `SetArgPointee()` in the response parameters of your function
-call.
-
-If the output argument is an array, use the `SetArrayArgument<N>(first, last)`
-action instead. It copies the elements in source range `[first, last)` to the
-array pointed to by the `N`-th (0-based) argument:
-
-```cpp
-using ::testing::NotNull;
-using ::testing::SetArrayArgument;
-
-class MockArrayMutator : public ArrayMutator {
- public:
-  MOCK_METHOD(void, Mutate, (int* values, int num_values), (override));
-  ...
-}
-...
-  MockArrayMutator mutator;
-  int values[5] = {1, 2, 3, 4, 5};
-  EXPECT_CALL(mutator, Mutate(NotNull(), 5))
-      .WillOnce(SetArrayArgument<0>(values, values + 5));
-```
-
-This also works when the argument is an output iterator:
-
-```cpp
-using ::testing::_;
-using ::testing::SetArrayArgument;
-
-class MockRolodex : public Rolodex {
- public:
-  MOCK_METHOD(void, GetNames, (std::back_insert_iterator<vector<string>>),
-              (override));
-  ...
-}
-...
-  MockRolodex rolodex;
-  vector<string> names = {"George", "John", "Thomas"};
-  EXPECT_CALL(rolodex, GetNames(_))
-      .WillOnce(SetArrayArgument<0>(names.begin(), names.end()));
-```
-
-### Changing a Mock Object's Behavior Based on the State
-
-If you expect a call to change the behavior of a mock object, you can use
-`::testing::InSequence` to specify different behaviors before and after the
-call:
-
-```cpp
-using ::testing::InSequence;
-using ::testing::Return;
-
-...
-  {
-     InSequence seq;
-     EXPECT_CALL(my_mock, IsDirty())
-         .WillRepeatedly(Return(true));
-     EXPECT_CALL(my_mock, Flush());
-     EXPECT_CALL(my_mock, IsDirty())
-         .WillRepeatedly(Return(false));
-  }
-  my_mock.FlushIfDirty();
-```
-
-This makes `my_mock.IsDirty()` return `true` before `my_mock.Flush()` is called
-and return `false` afterwards.
-
-If the behavior change is more complex, you can store the effects in a variable
-and make a mock method get its return value from that variable:
-
-```cpp
-using ::testing::_;
-using ::testing::SaveArg;
-using ::testing::Return;
-
-ACTION_P(ReturnPointee, p) { return *p; }
-...
-  int previous_value = 0;
-  EXPECT_CALL(my_mock, GetPrevValue)
-      .WillRepeatedly(ReturnPointee(&previous_value));
-  EXPECT_CALL(my_mock, UpdateValue)
-      .WillRepeatedly(SaveArg<0>(&previous_value));
-  my_mock.DoSomethingToUpdateValue();
-```
-
-Here `my_mock.GetPrevValue()` will always return the argument of the last
-`UpdateValue()` call.
-
-### Setting the Default Value for a Return Type {#DefaultValue}
-
-If a mock method's return type is a built-in C++ type or pointer, by default it
-will return 0 when invoked. Also, in C++ 11 and above, a mock method whose
-return type has a default constructor will return a default-constructed value by
-default. You only need to specify an action if this default value doesn't work
-for you.
-
-Sometimes, you may want to change this default value, or you may want to specify
-a default value for types gMock doesn't know about. You can do this using the
-`::testing::DefaultValue` class template:
-
-```cpp
-using ::testing::DefaultValue;
-
-class MockFoo : public Foo {
- public:
-  MOCK_METHOD(Bar, CalculateBar, (), (override));
-};
-
-
-...
-  Bar default_bar;
-  // Sets the default return value for type Bar.
-  DefaultValue<Bar>::Set(default_bar);
-
-  MockFoo foo;
-
-  // We don't need to specify an action here, as the default
-  // return value works for us.
-  EXPECT_CALL(foo, CalculateBar());
-
-  foo.CalculateBar();  // This should return default_bar.
-
-  // Unsets the default return value.
-  DefaultValue<Bar>::Clear();
-```
-
-Please note that changing the default value for a type can make your tests hard
-to understand. We recommend you to use this feature judiciously. For example,
-you may want to make sure the `Set()` and `Clear()` calls are right next to the
-code that uses your mock.
-
-### Setting the Default Actions for a Mock Method
-
-You've learned how to change the default value of a given type. However, this
-may be too coarse for your purpose: perhaps you have two mock methods with the
-same return type and you want them to have different behaviors. The `ON_CALL()`
-macro allows you to customize your mock's behavior at the method level:
-
-```cpp
-using ::testing::_;
-using ::testing::AnyNumber;
-using ::testing::Gt;
-using ::testing::Return;
-...
-  ON_CALL(foo, Sign(_))
-      .WillByDefault(Return(-1));
-  ON_CALL(foo, Sign(0))
-      .WillByDefault(Return(0));
-  ON_CALL(foo, Sign(Gt(0)))
-      .WillByDefault(Return(1));
-
-  EXPECT_CALL(foo, Sign(_))
-      .Times(AnyNumber());
-
-  foo.Sign(5);   // This should return 1.
-  foo.Sign(-9);  // This should return -1.
-  foo.Sign(0);   // This should return 0.
-```
-
-As you may have guessed, when there are more than one `ON_CALL()` statements,
-the newer ones in the order take precedence over the older ones. In other words,
-the **last** one that matches the function arguments will be used. This matching
-order allows you to set up the common behavior in a mock object's constructor or
-the test fixture's set-up phase and specialize the mock's behavior later.
-
-Note that both `ON_CALL` and `EXPECT_CALL` have the same "later statements take
-precedence" rule, but they don't interact. That is, `EXPECT_CALL`s have their
-own precedence order distinct from the `ON_CALL` precedence order.
-
-### Using Functions/Methods/Functors/Lambdas as Actions {#FunctionsAsActions}
-
-If the built-in actions don't suit you, you can use an existing callable
-(function, `std::function`, method, functor, lambda) as an action.
-
-```cpp
-using ::testing::_; using ::testing::Invoke;
-
-class MockFoo : public Foo {
- public:
-  MOCK_METHOD(int, Sum, (int x, int y), (override));
-  MOCK_METHOD(bool, ComplexJob, (int x), (override));
-};
-
-int CalculateSum(int x, int y) { return x + y; }
-int Sum3(int x, int y, int z) { return x + y + z; }
-
-class Helper {
- public:
-  bool ComplexJob(int x);
-};
-
-...
-  MockFoo foo;
-  Helper helper;
-  EXPECT_CALL(foo, Sum(_, _))
-      .WillOnce(&CalculateSum)
-      .WillRepeatedly(Invoke(NewPermanentCallback(Sum3, 1)));
-  EXPECT_CALL(foo, ComplexJob(_))
-      .WillOnce(Invoke(&helper, &Helper::ComplexJob))
-      .WillOnce([] { return true; })
-      .WillRepeatedly([](int x) { return x > 0; });
-
-  foo.Sum(5, 6);         // Invokes CalculateSum(5, 6).
-  foo.Sum(2, 3);         // Invokes Sum3(1, 2, 3).
-  foo.ComplexJob(10);    // Invokes helper.ComplexJob(10).
-  foo.ComplexJob(-1);    // Invokes the inline lambda.
-```
-
-The only requirement is that the type of the function, etc must be *compatible*
-with the signature of the mock function, meaning that the latter's arguments (if
-it takes any) can be implicitly converted to the corresponding arguments of the
-former, and the former's return type can be implicitly converted to that of the
-latter. So, you can invoke something whose type is *not* exactly the same as the
-mock function, as long as it's safe to do so - nice, huh?
-
-Note that:
-
-*   The action takes ownership of the callback and will delete it when the
-    action itself is destructed.
-*   If the type of a callback is derived from a base callback type `C`, you need
-    to implicitly cast it to `C` to resolve the overloading, e.g.
-
-    ```cpp
-    using ::testing::Invoke;
-    ...
-      ResultCallback<bool>* is_ok = ...;
-      ... Invoke(is_ok) ...;  // This works.
-
-      BlockingClosure* done = new BlockingClosure;
-      ... Invoke(implicit_cast<Closure*>(done)) ...;  // The cast is necessary.
-    ```
-
-### Using Functions with Extra Info as Actions
-
-The function or functor you call using `Invoke()` must have the same number of
-arguments as the mock function you use it for. Sometimes you may have a function
-that takes more arguments, and you are willing to pass in the extra arguments
-yourself to fill the gap. You can do this in gMock using callbacks with
-pre-bound arguments. Here's an example:
-
-```cpp
-using ::testing::Invoke;
-
-class MockFoo : public Foo {
- public:
-  MOCK_METHOD(char, DoThis, (int n), (override));
-};
-
-char SignOfSum(int x, int y) {
-  const int sum = x + y;
-  return (sum > 0) ? '+' : (sum < 0) ? '-' : '0';
-}
-
-TEST_F(FooTest, Test) {
-  MockFoo foo;
-
-  EXPECT_CALL(foo, DoThis(2))
-      .WillOnce(Invoke(NewPermanentCallback(SignOfSum, 5)));
-  EXPECT_EQ('+', foo.DoThis(2));  // Invokes SignOfSum(5, 2).
-}
-```
-
-### Invoking a Function/Method/Functor/Lambda/Callback Without Arguments
-
-`Invoke()` passes the mock function's arguments to the function, etc being
-invoked such that the callee has the full context of the call to work with. If
-the invoked function is not interested in some or all of the arguments, it can
-simply ignore them.
-
-Yet, a common pattern is that a test author wants to invoke a function without
-the arguments of the mock function. She could do that using a wrapper function
-that throws away the arguments before invoking an underlining nullary function.
-Needless to say, this can be tedious and obscures the intent of the test.
-
-There are two solutions to this problem. First, you can pass any callable of
-zero args as an action. Alternatively, use `InvokeWithoutArgs()`, which is like
-`Invoke()` except that it doesn't pass the mock function's arguments to the
-callee. Here's an example of each:
-
-```cpp
-using ::testing::_;
-using ::testing::InvokeWithoutArgs;
-
-class MockFoo : public Foo {
- public:
-  MOCK_METHOD(bool, ComplexJob, (int n), (override));
-};
-
-bool Job1() { ... }
-bool Job2(int n, char c) { ... }
-
-...
-  MockFoo foo;
-  EXPECT_CALL(foo, ComplexJob(_))
-      .WillOnce([] { Job1(); });
-      .WillOnce(InvokeWithoutArgs(NewPermanentCallback(Job2, 5, 'a')));
-
-  foo.ComplexJob(10);  // Invokes Job1().
-  foo.ComplexJob(20);  // Invokes Job2(5, 'a').
-```
-
-Note that:
-
-*   The action takes ownership of the callback and will delete it when the
-    action itself is destructed.
-*   If the type of a callback is derived from a base callback type `C`, you need
-    to implicitly cast it to `C` to resolve the overloading, e.g.
-
-    ```cpp
-    using ::testing::InvokeWithoutArgs;
-    ...
-      ResultCallback<bool>* is_ok = ...;
-      ... InvokeWithoutArgs(is_ok) ...;  // This works.
-
-      BlockingClosure* done = ...;
-      ... InvokeWithoutArgs(implicit_cast<Closure*>(done)) ...;
-      // The cast is necessary.
-    ```
-
-### Invoking an Argument of the Mock Function
-
-Sometimes a mock function will receive a function pointer, a functor (in other
-words, a "callable") as an argument, e.g.
-
-```cpp
-class MockFoo : public Foo {
- public:
-  MOCK_METHOD(bool, DoThis, (int n, (ResultCallback1<bool, int>* callback)),
-              (override));
-};
-```
-
-and you may want to invoke this callable argument:
-
-```cpp
-using ::testing::_;
-...
-  MockFoo foo;
-  EXPECT_CALL(foo, DoThis(_, _))
-      .WillOnce(...);
-      // Will execute callback->Run(5), where callback is the
-      // second argument DoThis() receives.
-```
-
-{: .callout .note}
-NOTE: The section below is legacy documentation from before C++ had lambdas:
-
-Arghh, you need to refer to a mock function argument but C++ has no lambda
-(yet), so you have to define your own action. :-( Or do you really?
-
-Well, gMock has an action to solve *exactly* this problem:
-
-```cpp
-InvokeArgument<N>(arg_1, arg_2, ..., arg_m)
-```
-
-will invoke the `N`-th (0-based) argument the mock function receives, with
-`arg_1`, `arg_2`, ..., and `arg_m`. No matter if the argument is a function
-pointer, a functor, or a callback. gMock handles them all.
-
-With that, you could write:
-
-```cpp
-using ::testing::_;
-using ::testing::InvokeArgument;
-...
-  EXPECT_CALL(foo, DoThis(_, _))
-      .WillOnce(InvokeArgument<1>(5));
-      // Will execute callback->Run(5), where callback is the
-      // second argument DoThis() receives.
-```
-
-What if the callable takes an argument by reference? No problem - just wrap it
-inside `std::ref()`:
-
-```cpp
-  ...
-  MOCK_METHOD(bool, Bar,
-              ((ResultCallback2<bool, int, const Helper&>* callback)),
-              (override));
-  ...
-  using ::testing::_;
-  using ::testing::InvokeArgument;
-  ...
-  MockFoo foo;
-  Helper helper;
-  ...
-  EXPECT_CALL(foo, Bar(_))
-      .WillOnce(InvokeArgument<0>(5, std::ref(helper)));
-      // std::ref(helper) guarantees that a reference to helper, not a copy of
-      // it, will be passed to the callback.
-```
-
-What if the callable takes an argument by reference and we do **not** wrap the
-argument in `std::ref()`? Then `InvokeArgument()` will *make a copy* of the
-argument, and pass a *reference to the copy*, instead of a reference to the
-original value, to the callable. This is especially handy when the argument is a
-temporary value:
-
-```cpp
-  ...
-  MOCK_METHOD(bool, DoThat, (bool (*f)(const double& x, const string& s)),
-              (override));
-  ...
-  using ::testing::_;
-  using ::testing::InvokeArgument;
-  ...
-  MockFoo foo;
-  ...
-  EXPECT_CALL(foo, DoThat(_))
-      .WillOnce(InvokeArgument<0>(5.0, string("Hi")));
-      // Will execute (*f)(5.0, string("Hi")), where f is the function pointer
-      // DoThat() receives.  Note that the values 5.0 and string("Hi") are
-      // temporary and dead once the EXPECT_CALL() statement finishes.  Yet
-      // it's fine to perform this action later, since a copy of the values
-      // are kept inside the InvokeArgument action.
-```
-
-### Ignoring an Action's Result
-
-Sometimes you have an action that returns *something*, but you need an action
-that returns `void` (perhaps you want to use it in a mock function that returns
-`void`, or perhaps it needs to be used in `DoAll()` and it's not the last in the
-list). `IgnoreResult()` lets you do that. For example:
-
-```cpp
-using ::testing::_;
-using ::testing::DoAll;
-using ::testing::IgnoreResult;
-using ::testing::Return;
-
-int Process(const MyData& data);
-string DoSomething();
-
-class MockFoo : public Foo {
- public:
-  MOCK_METHOD(void, Abc, (const MyData& data), (override));
-  MOCK_METHOD(bool, Xyz, (), (override));
-};
-
-  ...
-  MockFoo foo;
-  EXPECT_CALL(foo, Abc(_))
-      // .WillOnce(Invoke(Process));
-      // The above line won't compile as Process() returns int but Abc() needs
-      // to return void.
-      .WillOnce(IgnoreResult(Process));
-  EXPECT_CALL(foo, Xyz())
-      .WillOnce(DoAll(IgnoreResult(DoSomething),
-                      // Ignores the string DoSomething() returns.
-                      Return(true)));
-```
-
-Note that you **cannot** use `IgnoreResult()` on an action that already returns
-`void`. Doing so will lead to ugly compiler errors.
-
-### Selecting an Action's Arguments {#SelectingArgs}
-
-Say you have a mock function `Foo()` that takes seven arguments, and you have a
-custom action that you want to invoke when `Foo()` is called. Trouble is, the
-custom action only wants three arguments:
-
-```cpp
-using ::testing::_;
-using ::testing::Invoke;
-...
-  MOCK_METHOD(bool, Foo,
-              (bool visible, const string& name, int x, int y,
-               (const map<pair<int, int>>), double& weight, double min_weight,
-               double max_wight));
-...
-bool IsVisibleInQuadrant1(bool visible, int x, int y) {
-  return visible && x >= 0 && y >= 0;
-}
-...
-  EXPECT_CALL(mock, Foo)
-      .WillOnce(Invoke(IsVisibleInQuadrant1));  // Uh, won't compile. :-(
-```
-
-To please the compiler God, you need to define an "adaptor" that has the same
-signature as `Foo()` and calls the custom action with the right arguments:
-
-```cpp
-using ::testing::_;
-using ::testing::Invoke;
-...
-bool MyIsVisibleInQuadrant1(bool visible, const string& name, int x, int y,
-                            const map<pair<int, int>, double>& weight,
-                            double min_weight, double max_wight) {
-  return IsVisibleInQuadrant1(visible, x, y);
-}
-...
-  EXPECT_CALL(mock, Foo)
-      .WillOnce(Invoke(MyIsVisibleInQuadrant1));  // Now it works.
-```
-
-But isn't this awkward?
-
-gMock provides a generic *action adaptor*, so you can spend your time minding
-more important business than writing your own adaptors. Here's the syntax:
-
-```cpp
-WithArgs<N1, N2, ..., Nk>(action)
-```
-
-creates an action that passes the arguments of the mock function at the given
-indices (0-based) to the inner `action` and performs it. Using `WithArgs`, our
-original example can be written as:
-
-```cpp
-using ::testing::_;
-using ::testing::Invoke;
-using ::testing::WithArgs;
-...
-  EXPECT_CALL(mock, Foo)
-      .WillOnce(WithArgs<0, 2, 3>(Invoke(IsVisibleInQuadrant1)));  // No need to define your own adaptor.
-```
-
-For better readability, gMock also gives you:
-
-*   `WithoutArgs(action)` when the inner `action` takes *no* argument, and
-*   `WithArg<N>(action)` (no `s` after `Arg`) when the inner `action` takes
-    *one* argument.
-
-As you may have realized, `InvokeWithoutArgs(...)` is just syntactic sugar for
-`WithoutArgs(Invoke(...))`.
-
-Here are more tips:
-
-*   The inner action used in `WithArgs` and friends does not have to be
-    `Invoke()` -- it can be anything.
-*   You can repeat an argument in the argument list if necessary, e.g.
-    `WithArgs<2, 3, 3, 5>(...)`.
-*   You can change the order of the arguments, e.g. `WithArgs<3, 2, 1>(...)`.
-*   The types of the selected arguments do *not* have to match the signature of
-    the inner action exactly. It works as long as they can be implicitly
-    converted to the corresponding arguments of the inner action. For example,
-    if the 4-th argument of the mock function is an `int` and `my_action` takes
-    a `double`, `WithArg<4>(my_action)` will work.
-
-### Ignoring Arguments in Action Functions
-
-The [selecting-an-action's-arguments](#SelectingArgs) recipe showed us one way
-to make a mock function and an action with incompatible argument lists fit
-together. The downside is that wrapping the action in `WithArgs<...>()` can get
-tedious for people writing the tests.
-
-If you are defining a function (or method, functor, lambda, callback) to be used
-with `Invoke*()`, and you are not interested in some of its arguments, an
-alternative to `WithArgs` is to declare the uninteresting arguments as `Unused`.
-This makes the definition less cluttered and less fragile in case the types of
-the uninteresting arguments change. It could also increase the chance the action
-function can be reused. For example, given
-
-```cpp
- public:
-  MOCK_METHOD(double, Foo, double(const string& label, double x, double y),
-              (override));
-  MOCK_METHOD(double, Bar, (int index, double x, double y), (override));
-```
-
-instead of
-
-```cpp
-using ::testing::_;
-using ::testing::Invoke;
-
-double DistanceToOriginWithLabel(const string& label, double x, double y) {
-  return sqrt(x*x + y*y);
-}
-double DistanceToOriginWithIndex(int index, double x, double y) {
-  return sqrt(x*x + y*y);
-}
-...
-  EXPECT_CALL(mock, Foo("abc", _, _))
-      .WillOnce(Invoke(DistanceToOriginWithLabel));
-  EXPECT_CALL(mock, Bar(5, _, _))
-      .WillOnce(Invoke(DistanceToOriginWithIndex));
-```
-
-you could write
-
-```cpp
-using ::testing::_;
-using ::testing::Invoke;
-using ::testing::Unused;
-
-double DistanceToOrigin(Unused, double x, double y) {
-  return sqrt(x*x + y*y);
-}
-...
-  EXPECT_CALL(mock, Foo("abc", _, _))
-      .WillOnce(Invoke(DistanceToOrigin));
-  EXPECT_CALL(mock, Bar(5, _, _))
-      .WillOnce(Invoke(DistanceToOrigin));
-```
-
-### Sharing Actions
-
-Just like matchers, a gMock action object consists of a pointer to a ref-counted
-implementation object. Therefore copying actions is also allowed and very
-efficient. When the last action that references the implementation object dies,
-the implementation object will be deleted.
-
-If you have some complex action that you want to use again and again, you may
-not have to build it from scratch every time. If the action doesn't have an
-internal state (i.e. if it always does the same thing no matter how many times
-it has been called), you can assign it to an action variable and use that
-variable repeatedly. For example:
-
-```cpp
-using ::testing::Action;
-using ::testing::DoAll;
-using ::testing::Return;
-using ::testing::SetArgPointee;
-...
-  Action<bool(int*)> set_flag = DoAll(SetArgPointee<0>(5),
-                                      Return(true));
-  ... use set_flag in .WillOnce() and .WillRepeatedly() ...
-```
-
-However, if the action has its own state, you may be surprised if you share the
-action object. Suppose you have an action factory `IncrementCounter(init)` which
-creates an action that increments and returns a counter whose initial value is
-`init`, using two actions created from the same expression and using a shared
-action will exhibit different behaviors. Example:
-
-```cpp
-  EXPECT_CALL(foo, DoThis())
-      .WillRepeatedly(IncrementCounter(0));
-  EXPECT_CALL(foo, DoThat())
-      .WillRepeatedly(IncrementCounter(0));
-  foo.DoThis();  // Returns 1.
-  foo.DoThis();  // Returns 2.
-  foo.DoThat();  // Returns 1 - Blah() uses a different
-                 // counter than Bar()'s.
-```
-
-versus
-
-```cpp
-using ::testing::Action;
-...
-  Action<int()> increment = IncrementCounter(0);
-  EXPECT_CALL(foo, DoThis())
-      .WillRepeatedly(increment);
-  EXPECT_CALL(foo, DoThat())
-      .WillRepeatedly(increment);
-  foo.DoThis();  // Returns 1.
-  foo.DoThis();  // Returns 2.
-  foo.DoThat();  // Returns 3 - the counter is shared.
-```
-
-### Testing Asynchronous Behavior
-
-One oft-encountered problem with gMock is that it can be hard to test
-asynchronous behavior. Suppose you had a `EventQueue` class that you wanted to
-test, and you created a separate `EventDispatcher` interface so that you could
-easily mock it out. However, the implementation of the class fired all the
-events on a background thread, which made test timings difficult. You could just
-insert `sleep()` statements and hope for the best, but that makes your test
-behavior nondeterministic. A better way is to use gMock actions and
-`Notification` objects to force your asynchronous test to behave synchronously.
-
-```cpp
-class MockEventDispatcher : public EventDispatcher {
-  MOCK_METHOD(bool, DispatchEvent, (int32), (override));
-};
-
-TEST(EventQueueTest, EnqueueEventTest) {
-  MockEventDispatcher mock_event_dispatcher;
-  EventQueue event_queue(&mock_event_dispatcher);
-
-  const int32 kEventId = 321;
-  absl::Notification done;
-  EXPECT_CALL(mock_event_dispatcher, DispatchEvent(kEventId))
-      .WillOnce([&done] { done.Notify(); });
-
-  event_queue.EnqueueEvent(kEventId);
-  done.WaitForNotification();
-}
-```
-
-In the example above, we set our normal gMock expectations, but then add an
-additional action to notify the `Notification` object. Now we can just call
-`Notification::WaitForNotification()` in the main thread to wait for the
-asynchronous call to finish. After that, our test suite is complete and we can
-safely exit.
-
-{: .callout .note}
-Note: this example has a downside: namely, if the expectation is not satisfied,
-our test will run forever. It will eventually time-out and fail, but it will
-take longer and be slightly harder to debug. To alleviate this problem, you can
-use `WaitForNotificationWithTimeout(ms)` instead of `WaitForNotification()`.
-
-## Misc Recipes on Using gMock
-
-### Mocking Methods That Use Move-Only Types
-
-C++11 introduced *move-only types*. A move-only-typed value can be moved from
-one object to another, but cannot be copied. `std::unique_ptr<T>` is probably
-the most commonly used move-only type.
-
-Mocking a method that takes and/or returns move-only types presents some
-challenges, but nothing insurmountable. This recipe shows you how you can do it.
-Note that the support for move-only method arguments was only introduced to
-gMock in April 2017; in older code, you may find more complex
-[workarounds](#LegacyMoveOnly) for lack of this feature.
-
-Let’s say we are working on a fictional project that lets one post and share
-snippets called “buzzes”. Your code uses these types:
-
-```cpp
-enum class AccessLevel { kInternal, kPublic };
-
-class Buzz {
- public:
-  explicit Buzz(AccessLevel access) { ... }
-  ...
-};
-
-class Buzzer {
- public:
-  virtual ~Buzzer() {}
-  virtual std::unique_ptr<Buzz> MakeBuzz(StringPiece text) = 0;
-  virtual bool ShareBuzz(std::unique_ptr<Buzz> buzz, int64_t timestamp) = 0;
-  ...
-};
-```
-
-A `Buzz` object represents a snippet being posted. A class that implements the
-`Buzzer` interface is capable of creating and sharing `Buzz`es. Methods in
-`Buzzer` may return a `unique_ptr<Buzz>` or take a `unique_ptr<Buzz>`. Now we
-need to mock `Buzzer` in our tests.
-
-To mock a method that accepts or returns move-only types, you just use the
-familiar `MOCK_METHOD` syntax as usual:
-
-```cpp
-class MockBuzzer : public Buzzer {
- public:
-  MOCK_METHOD(std::unique_ptr<Buzz>, MakeBuzz, (StringPiece text), (override));
-  MOCK_METHOD(bool, ShareBuzz, (std::unique_ptr<Buzz> buzz, int64_t timestamp),
-              (override));
-};
-```
-
-Now that we have the mock class defined, we can use it in tests. In the
-following code examples, we assume that we have defined a `MockBuzzer` object
-named `mock_buzzer_`:
-
-```cpp
-  MockBuzzer mock_buzzer_;
-```
-
-First let’s see how we can set expectations on the `MakeBuzz()` method, which
-returns a `unique_ptr<Buzz>`.
-
-As usual, if you set an expectation without an action (i.e. the `.WillOnce()` or
-`.WillRepeatedly()` clause), when that expectation fires, the default action for
-that method will be taken. Since `unique_ptr<>` has a default constructor that
-returns a null `unique_ptr`, that’s what you’ll get if you don’t specify an
-action:
-
-```cpp
-  // Use the default action.
-  EXPECT_CALL(mock_buzzer_, MakeBuzz("hello"));
-
-  // Triggers the previous EXPECT_CALL.
-  EXPECT_EQ(nullptr, mock_buzzer_.MakeBuzz("hello"));
-```
-
-If you are not happy with the default action, you can tweak it as usual; see
-[Setting Default Actions](#OnCall).
-
-If you just need to return a pre-defined move-only value, you can use the
-`Return(ByMove(...))` action:
-
-```cpp
-  // When this fires, the unique_ptr<> specified by ByMove(...) will
-  // be returned.
-  EXPECT_CALL(mock_buzzer_, MakeBuzz("world"))
-      .WillOnce(Return(ByMove(std::make_unique<Buzz>(AccessLevel::kInternal))));
-
-  EXPECT_NE(nullptr, mock_buzzer_.MakeBuzz("world"));
-```
-
-Note that `ByMove()` is essential here - if you drop it, the code won’t compile.
-
-Quiz time! What do you think will happen if a `Return(ByMove(...))` action is
-performed more than once (e.g. you write `...
-.WillRepeatedly(Return(ByMove(...)));`)? Come think of it, after the first time
-the action runs, the source value will be consumed (since it’s a move-only
-value), so the next time around, there’s no value to move from -- you’ll get a
-run-time error that `Return(ByMove(...))` can only be run once.
-
-If you need your mock method to do more than just moving a pre-defined value,
-remember that you can always use a lambda or a callable object, which can do
-pretty much anything you want:
-
-```cpp
-  EXPECT_CALL(mock_buzzer_, MakeBuzz("x"))
-      .WillRepeatedly([](StringPiece text) {
-        return std::make_unique<Buzz>(AccessLevel::kInternal);
-      });
-
-  EXPECT_NE(nullptr, mock_buzzer_.MakeBuzz("x"));
-  EXPECT_NE(nullptr, mock_buzzer_.MakeBuzz("x"));
-```
-
-Every time this `EXPECT_CALL` fires, a new `unique_ptr<Buzz>` will be created
-and returned. You cannot do this with `Return(ByMove(...))`.
-
-That covers returning move-only values; but how do we work with methods
-accepting move-only arguments? The answer is that they work normally, although
-some actions will not compile when any of method's arguments are move-only. You
-can always use `Return`, or a [lambda or functor](#FunctionsAsActions):
-
-```cpp
-  using ::testing::Unused;
-
-  EXPECT_CALL(mock_buzzer_, ShareBuzz(NotNull(), _)).WillOnce(Return(true));
-  EXPECT_TRUE(mock_buzzer_.ShareBuzz(std::make_unique<Buzz>(AccessLevel::kInternal)),
-              0);
-
-  EXPECT_CALL(mock_buzzer_, ShareBuzz(_, _)).WillOnce(
-      [](std::unique_ptr<Buzz> buzz, Unused) { return buzz != nullptr; });
-  EXPECT_FALSE(mock_buzzer_.ShareBuzz(nullptr, 0));
-```
-
-Many built-in actions (`WithArgs`, `WithoutArgs`,`DeleteArg`, `SaveArg`, ...)
-could in principle support move-only arguments, but the support for this is not
-implemented yet. If this is blocking you, please file a bug.
-
-A few actions (e.g. `DoAll`) copy their arguments internally, so they can never
-work with non-copyable objects; you'll have to use functors instead.
-
-#### Legacy workarounds for move-only types {#LegacyMoveOnly}
-
-Support for move-only function arguments was only introduced to gMock in April
-of 2017. In older code, you may encounter the following workaround for the lack
-of this feature (it is no longer necessary - we're including it just for
-reference):
-
-```cpp
-class MockBuzzer : public Buzzer {
- public:
-  MOCK_METHOD(bool, DoShareBuzz, (Buzz* buzz, Time timestamp));
-  bool ShareBuzz(std::unique_ptr<Buzz> buzz, Time timestamp) override {
-    return DoShareBuzz(buzz.get(), timestamp);
-  }
-};
-```
-
-The trick is to delegate the `ShareBuzz()` method to a mock method (let’s call
-it `DoShareBuzz()`) that does not take move-only parameters. Then, instead of
-setting expectations on `ShareBuzz()`, you set them on the `DoShareBuzz()` mock
-method:
-
-```cpp
-  MockBuzzer mock_buzzer_;
-  EXPECT_CALL(mock_buzzer_, DoShareBuzz(NotNull(), _));
-
-  // When one calls ShareBuzz() on the MockBuzzer like this, the call is
-  // forwarded to DoShareBuzz(), which is mocked.  Therefore this statement
-  // will trigger the above EXPECT_CALL.
-  mock_buzzer_.ShareBuzz(std::make_unique<Buzz>(AccessLevel::kInternal), 0);
-```
-
-### Making the Compilation Faster
-
-Believe it or not, the *vast majority* of the time spent on compiling a mock
-class is in generating its constructor and destructor, as they perform
-non-trivial tasks (e.g. verification of the expectations). What's more, mock
-methods with different signatures have different types and thus their
-constructors/destructors need to be generated by the compiler separately. As a
-result, if you mock many different types of methods, compiling your mock class
-can get really slow.
-
-If you are experiencing slow compilation, you can move the definition of your
-mock class' constructor and destructor out of the class body and into a `.cc`
-file. This way, even if you `#include` your mock class in N files, the compiler
-only needs to generate its constructor and destructor once, resulting in a much
-faster compilation.
-
-Let's illustrate the idea using an example. Here's the definition of a mock
-class before applying this recipe:
-
-```cpp
-// File mock_foo.h.
-...
-class MockFoo : public Foo {
- public:
-  // Since we don't declare the constructor or the destructor,
-  // the compiler will generate them in every translation unit
-  // where this mock class is used.
-
-  MOCK_METHOD(int, DoThis, (), (override));
-  MOCK_METHOD(bool, DoThat, (const char* str), (override));
-  ... more mock methods ...
-};
-```
-
-After the change, it would look like:
-
-```cpp
-// File mock_foo.h.
-...
-class MockFoo : public Foo {
- public:
-  // The constructor and destructor are declared, but not defined, here.
-  MockFoo();
-  virtual ~MockFoo();
-
-  MOCK_METHOD(int, DoThis, (), (override));
-  MOCK_METHOD(bool, DoThat, (const char* str), (override));
-  ... more mock methods ...
-};
-```
-
-and
-
-```cpp
-// File mock_foo.cc.
-#include "path/to/mock_foo.h"
-
-// The definitions may appear trivial, but the functions actually do a
-// lot of things through the constructors/destructors of the member
-// variables used to implement the mock methods.
-MockFoo::MockFoo() {}
-MockFoo::~MockFoo() {}
-```
-
-### Forcing a Verification
-
-When it's being destroyed, your friendly mock object will automatically verify
-that all expectations on it have been satisfied, and will generate googletest
-failures if not. This is convenient as it leaves you with one less thing to
-worry about. That is, unless you are not sure if your mock object will be
-destroyed.
-
-How could it be that your mock object won't eventually be destroyed? Well, it
-might be created on the heap and owned by the code you are testing. Suppose
-there's a bug in that code and it doesn't delete the mock object properly - you
-could end up with a passing test when there's actually a bug.
-
-Using a heap checker is a good idea and can alleviate the concern, but its
-implementation is not 100% reliable. So, sometimes you do want to *force* gMock
-to verify a mock object before it is (hopefully) destructed. You can do this
-with `Mock::VerifyAndClearExpectations(&mock_object)`:
-
-```cpp
-TEST(MyServerTest, ProcessesRequest) {
-  using ::testing::Mock;
-
-  MockFoo* const foo = new MockFoo;
-  EXPECT_CALL(*foo, ...)...;
-  // ... other expectations ...
-
-  // server now owns foo.
-  MyServer server(foo);
-  server.ProcessRequest(...);
-
-  // In case that server's destructor will forget to delete foo,
-  // this will verify the expectations anyway.
-  Mock::VerifyAndClearExpectations(foo);
-}  // server is destroyed when it goes out of scope here.
-```
-
-{: .callout .tip}
-**Tip:** The `Mock::VerifyAndClearExpectations()` function returns a `bool` to
-indicate whether the verification was successful (`true` for yes), so you can
-wrap that function call inside a `ASSERT_TRUE()` if there is no point going
-further when the verification has failed.
-
-Do not set new expectations after verifying and clearing a mock after its use.
-Setting expectations after code that exercises the mock has undefined behavior.
-See [Using Mocks in Tests](gmock_for_dummies.md#using-mocks-in-tests) for more
-information.
-
-### Using Checkpoints {#UsingCheckPoints}
-
-Sometimes you might want to test a mock object's behavior in phases whose sizes
-are each manageable, or you might want to set more detailed expectations about
-which API calls invoke which mock functions.
-
-A technique you can use is to put the expectations in a sequence and insert
-calls to a dummy "checkpoint" function at specific places. Then you can verify
-that the mock function calls do happen at the right time. For example, if you
-are exercising the code:
-
-```cpp
-  Foo(1);
-  Foo(2);
-  Foo(3);
-```
-
-and want to verify that `Foo(1)` and `Foo(3)` both invoke `mock.Bar("a")`, but
-`Foo(2)` doesn't invoke anything, you can write:
-
-```cpp
-using ::testing::MockFunction;
-
-TEST(FooTest, InvokesBarCorrectly) {
-  MyMock mock;
-  // Class MockFunction<F> has exactly one mock method.  It is named
-  // Call() and has type F.
-  MockFunction<void(string check_point_name)> check;
-  {
-    InSequence s;
-
-    EXPECT_CALL(mock, Bar("a"));
-    EXPECT_CALL(check, Call("1"));
-    EXPECT_CALL(check, Call("2"));
-    EXPECT_CALL(mock, Bar("a"));
-  }
-  Foo(1);
-  check.Call("1");
-  Foo(2);
-  check.Call("2");
-  Foo(3);
-}
-```
-
-The expectation spec says that the first `Bar("a")` call must happen before
-checkpoint "1", the second `Bar("a")` call must happen after checkpoint "2", and
-nothing should happen between the two checkpoints. The explicit checkpoints make
-it clear which `Bar("a")` is called by which call to `Foo()`.
-
-### Mocking Destructors
-
-Sometimes you want to make sure a mock object is destructed at the right time,
-e.g. after `bar->A()` is called but before `bar->B()` is called. We already know
-that you can specify constraints on the [order](#OrderedCalls) of mock function
-calls, so all we need to do is to mock the destructor of the mock function.
-
-This sounds simple, except for one problem: a destructor is a special function
-with special syntax and special semantics, and the `MOCK_METHOD` macro doesn't
-work for it:
-
-```cpp
-MOCK_METHOD(void, ~MockFoo, ());  // Won't compile!
-```
-
-The good news is that you can use a simple pattern to achieve the same effect.
-First, add a mock function `Die()` to your mock class and call it in the
-destructor, like this:
-
-```cpp
-class MockFoo : public Foo {
-  ...
-  // Add the following two lines to the mock class.
-  MOCK_METHOD(void, Die, ());
-  ~MockFoo() override { Die(); }
-};
-```
-
-(If the name `Die()` clashes with an existing symbol, choose another name.) Now,
-we have translated the problem of testing when a `MockFoo` object dies to
-testing when its `Die()` method is called:
-
-```cpp
-  MockFoo* foo = new MockFoo;
-  MockBar* bar = new MockBar;
-  ...
-  {
-    InSequence s;
-
-    // Expects *foo to die after bar->A() and before bar->B().
-    EXPECT_CALL(*bar, A());
-    EXPECT_CALL(*foo, Die());
-    EXPECT_CALL(*bar, B());
-  }
-```
-
-And that's that.
-
-### Using gMock and Threads {#UsingThreads}
-
-In a **unit** test, it's best if you could isolate and test a piece of code in a
-single-threaded context. That avoids race conditions and dead locks, and makes
-debugging your test much easier.
-
-Yet most programs are multi-threaded, and sometimes to test something we need to
-pound on it from more than one thread. gMock works for this purpose too.
-
-Remember the steps for using a mock:
-
-1.  Create a mock object `foo`.
-2.  Set its default actions and expectations using `ON_CALL()` and
-    `EXPECT_CALL()`.
-3.  The code under test calls methods of `foo`.
-4.  Optionally, verify and reset the mock.
-5.  Destroy the mock yourself, or let the code under test destroy it. The
-    destructor will automatically verify it.
-
-If you follow the following simple rules, your mocks and threads can live
-happily together:
-
-*   Execute your *test code* (as opposed to the code being tested) in *one*
-    thread. This makes your test easy to follow.
-*   Obviously, you can do step #1 without locking.
-*   When doing step #2 and #5, make sure no other thread is accessing `foo`.
-    Obvious too, huh?
-*   #3 and #4 can be done either in one thread or in multiple threads - anyway
-    you want. gMock takes care of the locking, so you don't have to do any -
-    unless required by your test logic.
-
-If you violate the rules (for example, if you set expectations on a mock while
-another thread is calling its methods), you get undefined behavior. That's not
-fun, so don't do it.
-
-gMock guarantees that the action for a mock function is done in the same thread
-that called the mock function. For example, in
-
-```cpp
-  EXPECT_CALL(mock, Foo(1))
-      .WillOnce(action1);
-  EXPECT_CALL(mock, Foo(2))
-      .WillOnce(action2);
-```
-
-if `Foo(1)` is called in thread 1 and `Foo(2)` is called in thread 2, gMock will
-execute `action1` in thread 1 and `action2` in thread 2.
-
-gMock does *not* impose a sequence on actions performed in different threads
-(doing so may create deadlocks as the actions may need to cooperate). This means
-that the execution of `action1` and `action2` in the above example *may*
-interleave. If this is a problem, you should add proper synchronization logic to
-`action1` and `action2` to make the test thread-safe.
-
-Also, remember that `DefaultValue<T>` is a global resource that potentially
-affects *all* living mock objects in your program. Naturally, you won't want to
-mess with it from multiple threads or when there still are mocks in action.
-
-### Controlling How Much Information gMock Prints
-
-When gMock sees something that has the potential of being an error (e.g. a mock
-function with no expectation is called, a.k.a. an uninteresting call, which is
-allowed but perhaps you forgot to explicitly ban the call), it prints some
-warning messages, including the arguments of the function, the return value, and
-the stack trace. Hopefully this will remind you to take a look and see if there
-is indeed a problem.
-
-Sometimes you are confident that your tests are correct and may not appreciate
-such friendly messages. Some other times, you are debugging your tests or
-learning about the behavior of the code you are testing, and wish you could
-observe every mock call that happens (including argument values, the return
-value, and the stack trace). Clearly, one size doesn't fit all.
-
-You can control how much gMock tells you using the `--gmock_verbose=LEVEL`
-command-line flag, where `LEVEL` is a string with three possible values:
-
-*   `info`: gMock will print all informational messages, warnings, and errors
-    (most verbose). At this setting, gMock will also log any calls to the
-    `ON_CALL/EXPECT_CALL` macros. It will include a stack trace in
-    "uninteresting call" warnings.
-*   `warning`: gMock will print both warnings and errors (less verbose); it will
-    omit the stack traces in "uninteresting call" warnings. This is the default.
-*   `error`: gMock will print errors only (least verbose).
-
-Alternatively, you can adjust the value of that flag from within your tests like
-so:
-
-```cpp
-  ::testing::FLAGS_gmock_verbose = "error";
-```
-
-If you find gMock printing too many stack frames with its informational or
-warning messages, remember that you can control their amount with the
-`--gtest_stack_trace_depth=max_depth` flag.
-
-Now, judiciously use the right flag to enable gMock serve you better!
-
-### Gaining Super Vision into Mock Calls
-
-You have a test using gMock. It fails: gMock tells you some expectations aren't
-satisfied. However, you aren't sure why: Is there a typo somewhere in the
-matchers? Did you mess up the order of the `EXPECT_CALL`s? Or is the code under
-test doing something wrong? How can you find out the cause?
-
-Won't it be nice if you have X-ray vision and can actually see the trace of all
-`EXPECT_CALL`s and mock method calls as they are made? For each call, would you
-like to see its actual argument values and which `EXPECT_CALL` gMock thinks it
-matches? If you still need some help to figure out who made these calls, how
-about being able to see the complete stack trace at each mock call?
-
-You can unlock this power by running your test with the `--gmock_verbose=info`
-flag. For example, given the test program:
-
-```cpp
-#include "gmock/gmock.h"
-
-using testing::_;
-using testing::HasSubstr;
-using testing::Return;
-
-class MockFoo {
- public:
-  MOCK_METHOD(void, F, (const string& x, const string& y));
-};
-
-TEST(Foo, Bar) {
-  MockFoo mock;
-  EXPECT_CALL(mock, F(_, _)).WillRepeatedly(Return());
-  EXPECT_CALL(mock, F("a", "b"));
-  EXPECT_CALL(mock, F("c", HasSubstr("d")));
-
-  mock.F("a", "good");
-  mock.F("a", "b");
-}
-```
-
-if you run it with `--gmock_verbose=info`, you will see this output:
-
-```shell
-[ RUN       ] Foo.Bar
-
-foo_test.cc:14: EXPECT_CALL(mock, F(_, _)) invoked
-Stack trace: ...
-
-foo_test.cc:15: EXPECT_CALL(mock, F("a", "b")) invoked
-Stack trace: ...
-
-foo_test.cc:16: EXPECT_CALL(mock, F("c", HasSubstr("d"))) invoked
-Stack trace: ...
-
-foo_test.cc:14: Mock function call matches EXPECT_CALL(mock, F(_, _))...
-    Function call: F(@0x7fff7c8dad40"a",@0x7fff7c8dad10"good")
-Stack trace: ...
-
-foo_test.cc:15: Mock function call matches EXPECT_CALL(mock, F("a", "b"))...
-    Function call: F(@0x7fff7c8dada0"a",@0x7fff7c8dad70"b")
-Stack trace: ...
-
-foo_test.cc:16: Failure
-Actual function call count doesn't match EXPECT_CALL(mock, F("c", HasSubstr("d")))...
-         Expected: to be called once
-           Actual: never called - unsatisfied and active
-[  FAILED  ] Foo.Bar
-```
-
-Suppose the bug is that the `"c"` in the third `EXPECT_CALL` is a typo and
-should actually be `"a"`. With the above message, you should see that the actual
-`F("a", "good")` call is matched by the first `EXPECT_CALL`, not the third as
-you thought. From that it should be obvious that the third `EXPECT_CALL` is
-written wrong. Case solved.
-
-If you are interested in the mock call trace but not the stack traces, you can
-combine `--gmock_verbose=info` with `--gtest_stack_trace_depth=0` on the test
-command line.
-
-### Running Tests in Emacs
-
-If you build and run your tests in Emacs using the `M-x google-compile` command
-(as many googletest users do), the source file locations of gMock and googletest
-errors will be highlighted. Just press `<Enter>` on one of them and you'll be
-taken to the offending line. Or, you can just type `C-x`` to jump to the next
-error.
-
-To make it even easier, you can add the following lines to your `~/.emacs` file:
-
-```text
-(global-set-key "\M-m"  'google-compile)  ; m is for make
-(global-set-key [M-down] 'next-error)
-(global-set-key [M-up]  '(lambda () (interactive) (next-error -1)))
-```
-
-Then you can type `M-m` to start a build (if you want to run the test as well,
-just make sure `foo_test.run` or `runtests` is in the build command you supply
-after typing `M-m`), or `M-up`/`M-down` to move back and forth between errors.
-
-## Extending gMock
-
-### Writing New Matchers Quickly {#NewMatchers}
-
-{: .callout .warning}
-WARNING: gMock does not guarantee when or how many times a matcher will be
-invoked. Therefore, all matchers must be functionally pure. See
-[this section](#PureMatchers) for more details.
-
-The `MATCHER*` family of macros can be used to define custom matchers easily.
-The syntax:
-
-```cpp
-MATCHER(name, description_string_expression) { statements; }
-```
-
-will define a matcher with the given name that executes the statements, which
-must return a `bool` to indicate if the match succeeds. Inside the statements,
-you can refer to the value being matched by `arg`, and refer to its type by
-`arg_type`.
-
-The *description string* is a `string`-typed expression that documents what the
-matcher does, and is used to generate the failure message when the match fails.
-It can (and should) reference the special `bool` variable `negation`, and should
-evaluate to the description of the matcher when `negation` is `false`, or that
-of the matcher's negation when `negation` is `true`.
-
-For convenience, we allow the description string to be empty (`""`), in which
-case gMock will use the sequence of words in the matcher name as the
-description.
-
-For example:
-
-```cpp
-MATCHER(IsDivisibleBy7, "") { return (arg % 7) == 0; }
-```
-
-allows you to write
-
-```cpp
-  // Expects mock_foo.Bar(n) to be called where n is divisible by 7.
-  EXPECT_CALL(mock_foo, Bar(IsDivisibleBy7()));
-```
-
-or,
-
-```cpp
-  using ::testing::Not;
-  ...
-  // Verifies that a value is divisible by 7 and the other is not.
-  EXPECT_THAT(some_expression, IsDivisibleBy7());
-  EXPECT_THAT(some_other_expression, Not(IsDivisibleBy7()));
-```
-
-If the above assertions fail, they will print something like:
-
-```shell
-  Value of: some_expression
-  Expected: is divisible by 7
-    Actual: 27
-  ...
-  Value of: some_other_expression
-  Expected: not (is divisible by 7)
-    Actual: 21
-```
-
-where the descriptions `"is divisible by 7"` and `"not (is divisible by 7)"` are
-automatically calculated from the matcher name `IsDivisibleBy7`.
-
-As you may have noticed, the auto-generated descriptions (especially those for
-the negation) may not be so great. You can always override them with a `string`
-expression of your own:
-
-```cpp
-MATCHER(IsDivisibleBy7,
-        absl::StrCat(negation ? "isn't" : "is", " divisible by 7")) {
-  return (arg % 7) == 0;
-}
-```
-
-Optionally, you can stream additional information to a hidden argument named
-`result_listener` to explain the match result. For example, a better definition
-of `IsDivisibleBy7` is:
-
-```cpp
-MATCHER(IsDivisibleBy7, "") {
-  if ((arg % 7) == 0)
-    return true;
-
-  *result_listener << "the remainder is " << (arg % 7);
-  return false;
-}
-```
-
-With this definition, the above assertion will give a better message:
-
-```shell
-  Value of: some_expression
-  Expected: is divisible by 7
-    Actual: 27 (the remainder is 6)
-```
-
-You should let `MatchAndExplain()` print *any additional information* that can
-help a user understand the match result. Note that it should explain why the
-match succeeds in case of a success (unless it's obvious) - this is useful when
-the matcher is used inside `Not()`. There is no need to print the argument value
-itself, as gMock already prints it for you.
-
-{: .callout .note}
-NOTE: The type of the value being matched (`arg_type`) is determined by the
-context in which you use the matcher and is supplied to you by the compiler, so
-you don't need to worry about declaring it (nor can you). This allows the
-matcher to be polymorphic. For example, `IsDivisibleBy7()` can be used to match
-any type where the value of `(arg % 7) == 0` can be implicitly converted to a
-`bool`. In the `Bar(IsDivisibleBy7())` example above, if method `Bar()` takes an
-`int`, `arg_type` will be `int`; if it takes an `unsigned long`, `arg_type` will
-be `unsigned long`; and so on.
-
-### Writing New Parameterized Matchers Quickly
-
-Sometimes you'll want to define a matcher that has parameters. For that you can
-use the macro:
-
-```cpp
-MATCHER_P(name, param_name, description_string) { statements; }
-```
-
-where the description string can be either `""` or a `string` expression that
-references `negation` and `param_name`.
-
-For example:
-
-```cpp
-MATCHER_P(HasAbsoluteValue, value, "") { return abs(arg) == value; }
-```
-
-will allow you to write:
-
-```cpp
-  EXPECT_THAT(Blah("a"), HasAbsoluteValue(n));
-```
-
-which may lead to this message (assuming `n` is 10):
-
-```shell
-  Value of: Blah("a")
-  Expected: has absolute value 10
-    Actual: -9
-```
-
-Note that both the matcher description and its parameter are printed, making the
-message human-friendly.
-
-In the matcher definition body, you can write `foo_type` to reference the type
-of a parameter named `foo`. For example, in the body of
-`MATCHER_P(HasAbsoluteValue, value)` above, you can write `value_type` to refer
-to the type of `value`.
-
-gMock also provides `MATCHER_P2`, `MATCHER_P3`, ..., up to `MATCHER_P10` to
-support multi-parameter matchers:
-
-```cpp
-MATCHER_Pk(name, param_1, ..., param_k, description_string) { statements; }
-```
-
-Please note that the custom description string is for a particular *instance* of
-the matcher, where the parameters have been bound to actual values. Therefore
-usually you'll want the parameter values to be part of the description. gMock
-lets you do that by referencing the matcher parameters in the description string
-expression.
-
-For example,
-
-```cpp
-using ::testing::PrintToString;
-MATCHER_P2(InClosedRange, low, hi,
-           absl::StrFormat("%s in range [%s, %s]", negation ? "isn't" : "is",
-                           PrintToString(low), PrintToString(hi))) {
-  return low <= arg && arg <= hi;
-}
-...
-EXPECT_THAT(3, InClosedRange(4, 6));
-```
-
-would generate a failure that contains the message:
-
-```shell
-  Expected: is in range [4, 6]
-```
-
-If you specify `""` as the description, the failure message will contain the
-sequence of words in the matcher name followed by the parameter values printed
-as a tuple. For example,
-
-```cpp
-  MATCHER_P2(InClosedRange, low, hi, "") { ... }
-  ...
-  EXPECT_THAT(3, InClosedRange(4, 6));
-```
-
-would generate a failure that contains the text:
-
-```shell
-  Expected: in closed range (4, 6)
-```
-
-For the purpose of typing, you can view
-
-```cpp
-MATCHER_Pk(Foo, p1, ..., pk, description_string) { ... }
-```
-
-as shorthand for
-
-```cpp
-template <typename p1_type, ..., typename pk_type>
-FooMatcherPk<p1_type, ..., pk_type>
-Foo(p1_type p1, ..., pk_type pk) { ... }
-```
-
-When you write `Foo(v1, ..., vk)`, the compiler infers the types of the
-parameters `v1`, ..., and `vk` for you. If you are not happy with the result of
-the type inference, you can specify the types by explicitly instantiating the
-template, as in `Foo<long, bool>(5, false)`. As said earlier, you don't get to
-(or need to) specify `arg_type` as that's determined by the context in which the
-matcher is used.
-
-You can assign the result of expression `Foo(p1, ..., pk)` to a variable of type
-`FooMatcherPk<p1_type, ..., pk_type>`. This can be useful when composing
-matchers. Matchers that don't have a parameter or have only one parameter have
-special types: you can assign `Foo()` to a `FooMatcher`-typed variable, and
-assign `Foo(p)` to a `FooMatcherP<p_type>`-typed variable.
-
-While you can instantiate a matcher template with reference types, passing the
-parameters by pointer usually makes your code more readable. If, however, you
-still want to pass a parameter by reference, be aware that in the failure
-message generated by the matcher you will see the value of the referenced object
-but not its address.
-
-You can overload matchers with different numbers of parameters:
-
-```cpp
-MATCHER_P(Blah, a, description_string_1) { ... }
-MATCHER_P2(Blah, a, b, description_string_2) { ... }
-```
-
-While it's tempting to always use the `MATCHER*` macros when defining a new
-matcher, you should also consider implementing the matcher interface directly
-instead (see the recipes that follow), especially if you need to use the matcher
-a lot. While these approaches require more work, they give you more control on
-the types of the value being matched and the matcher parameters, which in
-general leads to better compiler error messages that pay off in the long run.
-They also allow overloading matchers based on parameter types (as opposed to
-just based on the number of parameters).
-
-### Writing New Monomorphic Matchers
-
-A matcher of argument type `T` implements the matcher interface for `T` and does
-two things: it tests whether a value of type `T` matches the matcher, and can
-describe what kind of values it matches. The latter ability is used for
-generating readable error messages when expectations are violated.
-
-A matcher of `T` must declare a typedef like:
-
-```cpp
-using is_gtest_matcher = void;
-```
-
-and supports the following operations:
-
-```cpp
-// Match a value and optionally explain into an ostream.
-bool matched = matcher.MatchAndExplain(value, maybe_os);
-// where `value` is of type `T` and
-// `maybe_os` is of type `std::ostream*`, where it can be null if the caller
-// is not interested in there textual explanation.
-
-matcher.DescribeTo(os);
-matcher.DescribeNegationTo(os);
-// where `os` is of type `std::ostream*`.
-```
-
-If you need a custom matcher but `Truly()` is not a good option (for example,
-you may not be happy with the way `Truly(predicate)` describes itself, or you
-may want your matcher to be polymorphic as `Eq(value)` is), you can define a
-matcher to do whatever you want in two steps: first implement the matcher
-interface, and then define a factory function to create a matcher instance. The
-second step is not strictly needed but it makes the syntax of using the matcher
-nicer.
-
-For example, you can define a matcher to test whether an `int` is divisible by 7
-and then use it like this:
-
-```cpp
-using ::testing::Matcher;
-
-class DivisibleBy7Matcher {
- public:
-  using is_gtest_matcher = void;
-
-  bool MatchAndExplain(int n, std::ostream*) const {
-    return (n % 7) == 0;
-  }
-
-  void DescribeTo(std::ostream* os) const {
-    *os << "is divisible by 7";
-  }
-
-  void DescribeNegationTo(std::ostream* os) const {
-    *os << "is not divisible by 7";
-  }
-};
-
-Matcher<int> DivisibleBy7() {
-  return DivisibleBy7Matcher();
-}
-
-...
-  EXPECT_CALL(foo, Bar(DivisibleBy7()));
-```
-
-You may improve the matcher message by streaming additional information to the
-`os` argument in `MatchAndExplain()`:
-
-```cpp
-class DivisibleBy7Matcher {
- public:
-  bool MatchAndExplain(int n, std::ostream* os) const {
-    const int remainder = n % 7;
-    if (remainder != 0 && os != nullptr) {
-      *os << "the remainder is " << remainder;
-    }
-    return remainder == 0;
-  }
-  ...
-};
-```
-
-Then, `EXPECT_THAT(x, DivisibleBy7());` may generate a message like this:
-
-```shell
-Value of: x
-Expected: is divisible by 7
-  Actual: 23 (the remainder is 2)
-```
-
-{: .callout .tip}
-Tip: for convenience, `MatchAndExplain()` can take a `MatchResultListener*`
-instead of `std::ostream*`.
-
-### Writing New Polymorphic Matchers
-
-Expanding what we learned above to *polymorphic* matchers is now just as simple
-as adding templates in the right place.
-
-```cpp
-
-class NotNullMatcher {
- public:
-  using is_gtest_matcher = void;
-
-  // To implement a polymorphic matcher, we just need to make MatchAndExplain a
-  // template on its first argument.
-
-  // In this example, we want to use NotNull() with any pointer, so
-  // MatchAndExplain() accepts a pointer of any type as its first argument.
-  // In general, you can define MatchAndExplain() as an ordinary method or
-  // a method template, or even overload it.
-  template <typename T>
-  bool MatchAndExplain(T* p, std::ostream*) const {
-    return p != nullptr;
-  }
-
-  // Describes the property of a value matching this matcher.
-  void DescribeTo(std::ostream* os) const { *os << "is not NULL"; }
-
-  // Describes the property of a value NOT matching this matcher.
-  void DescribeNegationTo(std::ostream* os) const { *os << "is NULL"; }
-};
-
-NotNullMatcher NotNull() {
-  return NotNullMatcher();
-}
-
-...
-
-  EXPECT_CALL(foo, Bar(NotNull()));  // The argument must be a non-NULL pointer.
-```
-
-### Legacy Matcher Implementation
-
-Defining matchers used to be somewhat more complicated, in which it required
-several supporting classes and virtual functions. To implement a matcher for
-type `T` using the legacy API you have to derive from `MatcherInterface<T>` and
-call `MakeMatcher` to construct the object.
-
-The interface looks like this:
-
-```cpp
-class MatchResultListener {
- public:
-  ...
-  // Streams x to the underlying ostream; does nothing if the ostream
-  // is NULL.
-  template <typename T>
-  MatchResultListener& operator<<(const T& x);
-
-  // Returns the underlying ostream.
-  std::ostream* stream();
-};
-
-template <typename T>
-class MatcherInterface {
- public:
-  virtual ~MatcherInterface();
-
-  // Returns true if and only if the matcher matches x; also explains the match
-  // result to 'listener'.
-  virtual bool MatchAndExplain(T x, MatchResultListener* listener) const = 0;
-
-  // Describes this matcher to an ostream.
-  virtual void DescribeTo(std::ostream* os) const = 0;
-
-  // Describes the negation of this matcher to an ostream.
-  virtual void DescribeNegationTo(std::ostream* os) const;
-};
-```
-
-Fortunately, most of the time you can define a polymorphic matcher easily with
-the help of `MakePolymorphicMatcher()`. Here's how you can define `NotNull()` as
-an example:
-
-```cpp
-using ::testing::MakePolymorphicMatcher;
-using ::testing::MatchResultListener;
-using ::testing::PolymorphicMatcher;
-
-class NotNullMatcher {
- public:
-  // To implement a polymorphic matcher, first define a COPYABLE class
-  // that has three members MatchAndExplain(), DescribeTo(), and
-  // DescribeNegationTo(), like the following.
-
-  // In this example, we want to use NotNull() with any pointer, so
-  // MatchAndExplain() accepts a pointer of any type as its first argument.
-  // In general, you can define MatchAndExplain() as an ordinary method or
-  // a method template, or even overload it.
-  template <typename T>
-  bool MatchAndExplain(T* p,
-                       MatchResultListener* /* listener */) const {
-    return p != NULL;
-  }
-
-  // Describes the property of a value matching this matcher.
-  void DescribeTo(std::ostream* os) const { *os << "is not NULL"; }
-
-  // Describes the property of a value NOT matching this matcher.
-  void DescribeNegationTo(std::ostream* os) const { *os << "is NULL"; }
-};
-
-// To construct a polymorphic matcher, pass an instance of the class
-// to MakePolymorphicMatcher().  Note the return type.
-PolymorphicMatcher<NotNullMatcher> NotNull() {
-  return MakePolymorphicMatcher(NotNullMatcher());
-}
-
-...
-
-  EXPECT_CALL(foo, Bar(NotNull()));  // The argument must be a non-NULL pointer.
-```
-
-{: .callout .note}
-**Note:** Your polymorphic matcher class does **not** need to inherit from
-`MatcherInterface` or any other class, and its methods do **not** need to be
-virtual.
-
-Like in a monomorphic matcher, you may explain the match result by streaming
-additional information to the `listener` argument in `MatchAndExplain()`.
-
-### Writing New Cardinalities
-
-A cardinality is used in `Times()` to tell gMock how many times you expect a
-call to occur. It doesn't have to be exact. For example, you can say
-`AtLeast(5)` or `Between(2, 4)`.
-
-If the [built-in set](gmock_cheat_sheet.md#CardinalityList) of cardinalities
-doesn't suit you, you are free to define your own by implementing the following
-interface (in namespace `testing`):
-
-```cpp
-class CardinalityInterface {
- public:
-  virtual ~CardinalityInterface();
-
-  // Returns true if and only if call_count calls will satisfy this cardinality.
-  virtual bool IsSatisfiedByCallCount(int call_count) const = 0;
-
-  // Returns true if and only if call_count calls will saturate this
-  // cardinality.
-  virtual bool IsSaturatedByCallCount(int call_count) const = 0;
-
-  // Describes self to an ostream.
-  virtual void DescribeTo(std::ostream* os) const = 0;
-};
-```
-
-For example, to specify that a call must occur even number of times, you can
-write
-
-```cpp
-using ::testing::Cardinality;
-using ::testing::CardinalityInterface;
-using ::testing::MakeCardinality;
-
-class EvenNumberCardinality : public CardinalityInterface {
- public:
-  bool IsSatisfiedByCallCount(int call_count) const override {
-    return (call_count % 2) == 0;
-  }
-
-  bool IsSaturatedByCallCount(int call_count) const override {
-    return false;
-  }
-
-  void DescribeTo(std::ostream* os) const {
-    *os << "called even number of times";
-  }
-};
-
-Cardinality EvenNumber() {
-  return MakeCardinality(new EvenNumberCardinality);
-}
-
-...
-  EXPECT_CALL(foo, Bar(3))
-      .Times(EvenNumber());
-```
-
-### Writing New Actions {#QuickNewActions}
-
-If the built-in actions don't work for you, you can easily define your own one.
-All you need is a call operator with a signature compatible with the mocked
-function. So you can use a lambda:
-
-```
-MockFunction<int(int)> mock;
-EXPECT_CALL(mock, Call).WillOnce([](const int input) { return input * 7; });
-EXPECT_EQ(14, mock.AsStdFunction()(2));
-```
-
-Or a struct with a call operator (even a templated one):
-
-```
-struct MultiplyBy {
-  template <typename T>
-  T operator()(T arg) { return arg * multiplier; }
-
-  int multiplier;
-};
-
-// Then use:
-// EXPECT_CALL(...).WillOnce(MultiplyBy{7});
-```
-
-It's also fine for the callable to take no arguments, ignoring the arguments
-supplied to the mock function:
-
-```
-MockFunction<int(int)> mock;
-EXPECT_CALL(mock, Call).WillOnce([] { return 17; });
-EXPECT_EQ(17, mock.AsStdFunction()(0));
-```
-
-When used with `WillOnce`, the callable can assume it will be called at most
-once and is allowed to be a move-only type:
-
-```
-// An action that contains move-only types and has an &&-qualified operator,
-// demanding in the type system that it be called at most once. This can be
-// used with WillOnce, but the compiler will reject it if handed to
-// WillRepeatedly.
-struct MoveOnlyAction {
-  std::unique_ptr<int> move_only_state;
-  std::unique_ptr<int> operator()() && { return std::move(move_only_state); }
-};
-
-MockFunction<std::unique_ptr<int>()> mock;
-EXPECT_CALL(mock, Call).WillOnce(MoveOnlyAction{std::make_unique<int>(17)});
-EXPECT_THAT(mock.AsStdFunction()(), Pointee(Eq(17)));
-```
-
-More generally, to use with a mock function whose signature is `R(Args...)` the
-object can be anything convertible to `OnceAction<R(Args...)>` or
-`Action<R(Args...)`>. The difference between the two is that `OnceAction` has
-weaker requirements (`Action` requires a copy-constructible input that can be
-called repeatedly whereas `OnceAction` requires only move-constructible and
-supports `&&`-qualified call operators), but can be used only with `WillOnce`.
-`OnceAction` is typically relevant only when supporting move-only types or
-actions that want a type-system guarantee that they will be called at most once.
-
-Typically the `OnceAction` and `Action` templates need not be referenced
-directly in your actions: a struct or class with a call operator is sufficient,
-as in the examples above. But fancier polymorphic actions that need to know the
-specific return type of the mock function can define templated conversion
-operators to make that possible. See `gmock-actions.h` for examples.
-
-#### Legacy macro-based Actions
-
-Before C++11, the functor-based actions were not supported; the old way of
-writing actions was through a set of `ACTION*` macros. We suggest to avoid them
-in new code; they hide a lot of logic behind the macro, potentially leading to
-harder-to-understand compiler errors. Nevertheless, we cover them here for
-completeness.
-
-By writing
-
-```cpp
-ACTION(name) { statements; }
-```
-
-in a namespace scope (i.e. not inside a class or function), you will define an
-action with the given name that executes the statements. The value returned by
-`statements` will be used as the return value of the action. Inside the
-statements, you can refer to the K-th (0-based) argument of the mock function as
-`argK`. For example:
-
-```cpp
-ACTION(IncrementArg1) { return ++(*arg1); }
-```
-
-allows you to write
-
-```cpp
-... WillOnce(IncrementArg1());
-```
-
-Note that you don't need to specify the types of the mock function arguments.
-Rest assured that your code is type-safe though: you'll get a compiler error if
-`*arg1` doesn't support the `++` operator, or if the type of `++(*arg1)` isn't
-compatible with the mock function's return type.
-
-Another example:
-
-```cpp
-ACTION(Foo) {
-  (*arg2)(5);
-  Blah();
-  *arg1 = 0;
-  return arg0;
-}
-```
-
-defines an action `Foo()` that invokes argument #2 (a function pointer) with 5,
-calls function `Blah()`, sets the value pointed to by argument #1 to 0, and
-returns argument #0.
-
-For more convenience and flexibility, you can also use the following pre-defined
-symbols in the body of `ACTION`:
-
-`argK_type`     | The type of the K-th (0-based) argument of the mock function
-:-------------- | :-----------------------------------------------------------
-`args`          | All arguments of the mock function as a tuple
-`args_type`     | The type of all arguments of the mock function as a tuple
-`return_type`   | The return type of the mock function
-`function_type` | The type of the mock function
-
-For example, when using an `ACTION` as a stub action for mock function:
-
-```cpp
-int DoSomething(bool flag, int* ptr);
-```
-
-we have:
-
-Pre-defined Symbol | Is Bound To
------------------- | ---------------------------------
-`arg0`             | the value of `flag`
-`arg0_type`        | the type `bool`
-`arg1`             | the value of `ptr`
-`arg1_type`        | the type `int*`
-`args`             | the tuple `(flag, ptr)`
-`args_type`        | the type `std::tuple<bool, int*>`
-`return_type`      | the type `int`
-`function_type`    | the type `int(bool, int*)`
-
-#### Legacy macro-based parameterized Actions
-
-Sometimes you'll want to parameterize an action you define. For that we have
-another macro
-
-```cpp
-ACTION_P(name, param) { statements; }
-```
-
-For example,
-
-```cpp
-ACTION_P(Add, n) { return arg0 + n; }
-```
-
-will allow you to write
-
-```cpp
-// Returns argument #0 + 5.
-... WillOnce(Add(5));
-```
-
-For convenience, we use the term *arguments* for the values used to invoke the
-mock function, and the term *parameters* for the values used to instantiate an
-action.
-
-Note that you don't need to provide the type of the parameter either. Suppose
-the parameter is named `param`, you can also use the gMock-defined symbol
-`param_type` to refer to the type of the parameter as inferred by the compiler.
-For example, in the body of `ACTION_P(Add, n)` above, you can write `n_type` for
-the type of `n`.
-
-gMock also provides `ACTION_P2`, `ACTION_P3`, and etc to support multi-parameter
-actions. For example,
-
-```cpp
-ACTION_P2(ReturnDistanceTo, x, y) {
-  double dx = arg0 - x;
-  double dy = arg1 - y;
-  return sqrt(dx*dx + dy*dy);
-}
-```
-
-lets you write
-
-```cpp
-... WillOnce(ReturnDistanceTo(5.0, 26.5));
-```
-
-You can view `ACTION` as a degenerated parameterized action where the number of
-parameters is 0.
-
-You can also easily define actions overloaded on the number of parameters:
-
-```cpp
-ACTION_P(Plus, a) { ... }
-ACTION_P2(Plus, a, b) { ... }
-```
-
-### Restricting the Type of an Argument or Parameter in an ACTION
-
-For maximum brevity and reusability, the `ACTION*` macros don't ask you to
-provide the types of the mock function arguments and the action parameters.
-Instead, we let the compiler infer the types for us.
-
-Sometimes, however, we may want to be more explicit about the types. There are
-several tricks to do that. For example:
-
-```cpp
-ACTION(Foo) {
-  // Makes sure arg0 can be converted to int.
-  int n = arg0;
-  ... use n instead of arg0 here ...
-}
-
-ACTION_P(Bar, param) {
-  // Makes sure the type of arg1 is const char*.
-  ::testing::StaticAssertTypeEq<const char*, arg1_type>();
-
-  // Makes sure param can be converted to bool.
-  bool flag = param;
-}
-```
-
-where `StaticAssertTypeEq` is a compile-time assertion in googletest that
-verifies two types are the same.
-
-### Writing New Action Templates Quickly
-
-Sometimes you want to give an action explicit template parameters that cannot be
-inferred from its value parameters. `ACTION_TEMPLATE()` supports that and can be
-viewed as an extension to `ACTION()` and `ACTION_P*()`.
-
-The syntax:
-
-```cpp
-ACTION_TEMPLATE(ActionName,
-                HAS_m_TEMPLATE_PARAMS(kind1, name1, ..., kind_m, name_m),
-                AND_n_VALUE_PARAMS(p1, ..., p_n)) { statements; }
-```
-
-defines an action template that takes *m* explicit template parameters and *n*
-value parameters, where *m* is in [1, 10] and *n* is in [0, 10]. `name_i` is the
-name of the *i*-th template parameter, and `kind_i` specifies whether it's a
-`typename`, an integral constant, or a template. `p_i` is the name of the *i*-th
-value parameter.
-
-Example:
-
-```cpp
-// DuplicateArg<k, T>(output) converts the k-th argument of the mock
-// function to type T and copies it to *output.
-ACTION_TEMPLATE(DuplicateArg,
-                // Note the comma between int and k:
-                HAS_2_TEMPLATE_PARAMS(int, k, typename, T),
-                AND_1_VALUE_PARAMS(output)) {
-  *output = T(std::get<k>(args));
-}
-```
-
-To create an instance of an action template, write:
-
-```cpp
-ActionName<t1, ..., t_m>(v1, ..., v_n)
-```
-
-where the `t`s are the template arguments and the `v`s are the value arguments.
-The value argument types are inferred by the compiler. For example:
-
-```cpp
-using ::testing::_;
-...
-  int n;
-  EXPECT_CALL(mock, Foo).WillOnce(DuplicateArg<1, unsigned char>(&n));
-```
-
-If you want to explicitly specify the value argument types, you can provide
-additional template arguments:
-
-```cpp
-ActionName<t1, ..., t_m, u1, ..., u_k>(v1, ..., v_n)
-```
-
-where `u_i` is the desired type of `v_i`.
-
-`ACTION_TEMPLATE` and `ACTION`/`ACTION_P*` can be overloaded on the number of
-value parameters, but not on the number of template parameters. Without the
-restriction, the meaning of the following is unclear:
-
-```cpp
-  OverloadedAction<int, bool>(x);
-```
-
-Are we using a single-template-parameter action where `bool` refers to the type
-of `x`, or a two-template-parameter action where the compiler is asked to infer
-the type of `x`?
-
-### Using the ACTION Object's Type
-
-If you are writing a function that returns an `ACTION` object, you'll need to
-know its type. The type depends on the macro used to define the action and the
-parameter types. The rule is relatively simple:
-
-
-| Given Definition              | Expression          | Has Type              |
-| ----------------------------- | ------------------- | --------------------- |
-| `ACTION(Foo)`                 | `Foo()`             | `FooAction`           |
-| `ACTION_TEMPLATE(Foo, HAS_m_TEMPLATE_PARAMS(...), AND_0_VALUE_PARAMS())` | `Foo<t1, ..., t_m>()` | `FooAction<t1, ..., t_m>` |
-| `ACTION_P(Bar, param)`        | `Bar(int_value)`    | `BarActionP<int>`     |
-| `ACTION_TEMPLATE(Bar, HAS_m_TEMPLATE_PARAMS(...), AND_1_VALUE_PARAMS(p1))` | `Bar<t1, ..., t_m>(int_value)` | `BarActionP<t1, ..., t_m, int>` |
-| `ACTION_P2(Baz, p1, p2)`      | `Baz(bool_value, int_value)` | `BazActionP2<bool, int>` |
-| `ACTION_TEMPLATE(Baz, HAS_m_TEMPLATE_PARAMS(...), AND_2_VALUE_PARAMS(p1, p2))` | `Baz<t1, ..., t_m>(bool_value, int_value)` | `BazActionP2<t1, ..., t_m, bool, int>` |
-| ...                           | ...                 | ...                   |
-
-
-Note that we have to pick different suffixes (`Action`, `ActionP`, `ActionP2`,
-and etc) for actions with different numbers of value parameters, or the action
-definitions cannot be overloaded on the number of them.
-
-### Writing New Monomorphic Actions {#NewMonoActions}
-
-While the `ACTION*` macros are very convenient, sometimes they are
-inappropriate. For example, despite the tricks shown in the previous recipes,
-they don't let you directly specify the types of the mock function arguments and
-the action parameters, which in general leads to unoptimized compiler error
-messages that can baffle unfamiliar users. They also don't allow overloading
-actions based on parameter types without jumping through some hoops.
-
-An alternative to the `ACTION*` macros is to implement
-`::testing::ActionInterface<F>`, where `F` is the type of the mock function in
-which the action will be used. For example:
-
-```cpp
-template <typename F>
-class ActionInterface {
- public:
-  virtual ~ActionInterface();
-
-  // Performs the action.  Result is the return type of function type
-  // F, and ArgumentTuple is the tuple of arguments of F.
-  //
-
-  // For example, if F is int(bool, const string&), then Result would
-  // be int, and ArgumentTuple would be std::tuple<bool, const string&>.
-  virtual Result Perform(const ArgumentTuple& args) = 0;
-};
-```
-
-```cpp
-using ::testing::_;
-using ::testing::Action;
-using ::testing::ActionInterface;
-using ::testing::MakeAction;
-
-typedef int IncrementMethod(int*);
-
-class IncrementArgumentAction : public ActionInterface<IncrementMethod> {
- public:
-  int Perform(const std::tuple<int*>& args) override {
-    int* p = std::get<0>(args);  // Grabs the first argument.
-    return *p++;
-  }
-};
-
-Action<IncrementMethod> IncrementArgument() {
-  return MakeAction(new IncrementArgumentAction);
-}
-
-...
-  EXPECT_CALL(foo, Baz(_))
-      .WillOnce(IncrementArgument());
-
-  int n = 5;
-  foo.Baz(&n);  // Should return 5 and change n to 6.
-```
-
-### Writing New Polymorphic Actions {#NewPolyActions}
-
-The previous recipe showed you how to define your own action. This is all good,
-except that you need to know the type of the function in which the action will
-be used. Sometimes that can be a problem. For example, if you want to use the
-action in functions with *different* types (e.g. like `Return()` and
-`SetArgPointee()`).
-
-If an action can be used in several types of mock functions, we say it's
-*polymorphic*. The `MakePolymorphicAction()` function template makes it easy to
-define such an action:
-
-```cpp
-namespace testing {
-template <typename Impl>
-PolymorphicAction<Impl> MakePolymorphicAction(const Impl& impl);
-}  // namespace testing
-```
-
-As an example, let's define an action that returns the second argument in the
-mock function's argument list. The first step is to define an implementation
-class:
-
-```cpp
-class ReturnSecondArgumentAction {
- public:
-  template <typename Result, typename ArgumentTuple>
-  Result Perform(const ArgumentTuple& args) const {
-    // To get the i-th (0-based) argument, use std::get(args).
-    return std::get<1>(args);
-  }
-};
-```
-
-This implementation class does *not* need to inherit from any particular class.
-What matters is that it must have a `Perform()` method template. This method
-template takes the mock function's arguments as a tuple in a **single**
-argument, and returns the result of the action. It can be either `const` or not,
-but must be invocable with exactly one template argument, which is the result
-type. In other words, you must be able to call `Perform<R>(args)` where `R` is
-the mock function's return type and `args` is its arguments in a tuple.
-
-Next, we use `MakePolymorphicAction()` to turn an instance of the implementation
-class into the polymorphic action we need. It will be convenient to have a
-wrapper for this:
-
-```cpp
-using ::testing::MakePolymorphicAction;
-using ::testing::PolymorphicAction;
-
-PolymorphicAction<ReturnSecondArgumentAction> ReturnSecondArgument() {
-  return MakePolymorphicAction(ReturnSecondArgumentAction());
-}
-```
-
-Now, you can use this polymorphic action the same way you use the built-in ones:
-
-```cpp
-using ::testing::_;
-
-class MockFoo : public Foo {
- public:
-  MOCK_METHOD(int, DoThis, (bool flag, int n), (override));
-  MOCK_METHOD(string, DoThat, (int x, const char* str1, const char* str2),
-              (override));
-};
-
-  ...
-  MockFoo foo;
-  EXPECT_CALL(foo, DoThis).WillOnce(ReturnSecondArgument());
-  EXPECT_CALL(foo, DoThat).WillOnce(ReturnSecondArgument());
-  ...
-  foo.DoThis(true, 5);  // Will return 5.
-  foo.DoThat(1, "Hi", "Bye");  // Will return "Hi".
-```
-
-### Teaching gMock How to Print Your Values
-
-When an uninteresting or unexpected call occurs, gMock prints the argument
-values and the stack trace to help you debug. Assertion macros like
-`EXPECT_THAT` and `EXPECT_EQ` also print the values in question when the
-assertion fails. gMock and googletest do this using googletest's user-extensible
-value printer.
-
-This printer knows how to print built-in C++ types, native arrays, STL
-containers, and any type that supports the `<<` operator. For other types, it
-prints the raw bytes in the value and hopes that you the user can figure it out.
-[The GoogleTest advanced guide](advanced.md#teaching-googletest-how-to-print-your-values)
-explains how to extend the printer to do a better job at printing your
-particular type than to dump the bytes.
-
-## Useful Mocks Created Using gMock
-
-<!--#include file="includes/g3_testing_LOGs.md"-->
-<!--#include file="includes/g3_mock_callbacks.md"-->
-
-### Mock std::function {#MockFunction}
-
-`std::function` is a general function type introduced in C++11. It is a
-preferred way of passing callbacks to new interfaces. Functions are copiable,
-and are not usually passed around by pointer, which makes them tricky to mock.
-But fear not - `MockFunction` can help you with that.
-
-`MockFunction<R(T1, ..., Tn)>` has a mock method `Call()` with the signature:
-
-```cpp
-  R Call(T1, ..., Tn);
-```
-
-It also has a `AsStdFunction()` method, which creates a `std::function` proxy
-forwarding to Call:
-
-```cpp
-  std::function<R(T1, ..., Tn)> AsStdFunction();
-```
-
-To use `MockFunction`, first create `MockFunction` object and set up
-expectations on its `Call` method. Then pass proxy obtained from
-`AsStdFunction()` to the code you are testing. For example:
-
-```cpp
-TEST(FooTest, RunsCallbackWithBarArgument) {
-  // 1. Create a mock object.
-  MockFunction<int(string)> mock_function;
-
-  // 2. Set expectations on Call() method.
-  EXPECT_CALL(mock_function, Call("bar")).WillOnce(Return(1));
-
-  // 3. Exercise code that uses std::function.
-  Foo(mock_function.AsStdFunction());
-  // Foo's signature can be either of:
-  // void Foo(const std::function<int(string)>& fun);
-  // void Foo(std::function<int(string)> fun);
-
-  // 4. All expectations will be verified when mock_function
-  //     goes out of scope and is destroyed.
-}
-```
-
-Remember that function objects created with `AsStdFunction()` are just
-forwarders. If you create multiple of them, they will share the same set of
-expectations.
-
-Although `std::function` supports unlimited number of arguments, `MockFunction`
-implementation is limited to ten. If you ever hit that limit... well, your
-callback has bigger problems than being mockable. :-)
diff --git a/3rdparty/googletest-1.13.0/docs/gmock_faq.md b/3rdparty/googletest-1.13.0/docs/gmock_faq.md
deleted file mode 100644
index 8f220bf7a8fec033ed9cb827a794397315962fcc..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/docs/gmock_faq.md
+++ /dev/null
@@ -1,390 +0,0 @@
-# Legacy gMock FAQ
-
-### When I call a method on my mock object, the method for the real object is invoked instead. What's the problem?
-
-In order for a method to be mocked, it must be *virtual*, unless you use the
-[high-perf dependency injection technique](gmock_cook_book.md#MockingNonVirtualMethods).
-
-### Can I mock a variadic function?
-
-You cannot mock a variadic function (i.e. a function taking ellipsis (`...`)
-arguments) directly in gMock.
-
-The problem is that in general, there is *no way* for a mock object to know how
-many arguments are passed to the variadic method, and what the arguments' types
-are. Only the *author of the base class* knows the protocol, and we cannot look
-into his or her head.
-
-Therefore, to mock such a function, the *user* must teach the mock object how to
-figure out the number of arguments and their types. One way to do it is to
-provide overloaded versions of the function.
-
-Ellipsis arguments are inherited from C and not really a C++ feature. They are
-unsafe to use and don't work with arguments that have constructors or
-destructors. Therefore we recommend to avoid them in C++ as much as possible.
-
-### MSVC gives me warning C4301 or C4373 when I define a mock method with a const parameter. Why?
-
-If you compile this using Microsoft Visual C++ 2005 SP1:
-
-```cpp
-class Foo {
-  ...
-  virtual void Bar(const int i) = 0;
-};
-
-class MockFoo : public Foo {
-  ...
-  MOCK_METHOD(void, Bar, (const int i), (override));
-};
-```
-
-You may get the following warning:
-
-```shell
-warning C4301: 'MockFoo::Bar': overriding virtual function only differs from 'Foo::Bar' by const/volatile qualifier
-```
-
-This is a MSVC bug. The same code compiles fine with gcc, for example. If you
-use Visual C++ 2008 SP1, you would get the warning:
-
-```shell
-warning C4373: 'MockFoo::Bar': virtual function overrides 'Foo::Bar', previous versions of the compiler did not override when parameters only differed by const/volatile qualifiers
-```
-
-In C++, if you *declare* a function with a `const` parameter, the `const`
-modifier is ignored. Therefore, the `Foo` base class above is equivalent to:
-
-```cpp
-class Foo {
-  ...
-  virtual void Bar(int i) = 0;  // int or const int?  Makes no difference.
-};
-```
-
-In fact, you can *declare* `Bar()` with an `int` parameter, and define it with a
-`const int` parameter. The compiler will still match them up.
-
-Since making a parameter `const` is meaningless in the method declaration, we
-recommend to remove it in both `Foo` and `MockFoo`. That should workaround the
-VC bug.
-
-Note that we are talking about the *top-level* `const` modifier here. If the
-function parameter is passed by pointer or reference, declaring the pointee or
-referee as `const` is still meaningful. For example, the following two
-declarations are *not* equivalent:
-
-```cpp
-void Bar(int* p);         // Neither p nor *p is const.
-void Bar(const int* p);  // p is not const, but *p is.
-```
-
-### I can't figure out why gMock thinks my expectations are not satisfied. What should I do?
-
-You might want to run your test with `--gmock_verbose=info`. This flag lets
-gMock print a trace of every mock function call it receives. By studying the
-trace, you'll gain insights on why the expectations you set are not met.
-
-If you see the message "The mock function has no default action set, and its
-return type has no default value set.", then try
-[adding a default action](gmock_cheat_sheet.md#OnCall). Due to a known issue,
-unexpected calls on mocks without default actions don't print out a detailed
-comparison between the actual arguments and the expected arguments.
-
-### My program crashed and `ScopedMockLog` spit out tons of messages. Is it a gMock bug?
-
-gMock and `ScopedMockLog` are likely doing the right thing here.
-
-When a test crashes, the failure signal handler will try to log a lot of
-information (the stack trace, and the address map, for example). The messages
-are compounded if you have many threads with depth stacks. When `ScopedMockLog`
-intercepts these messages and finds that they don't match any expectations, it
-prints an error for each of them.
-
-You can learn to ignore the errors, or you can rewrite your expectations to make
-your test more robust, for example, by adding something like:
-
-```cpp
-using ::testing::AnyNumber;
-using ::testing::Not;
-...
-  // Ignores any log not done by us.
-  EXPECT_CALL(log, Log(_, Not(EndsWith("/my_file.cc")), _))
-      .Times(AnyNumber());
-```
-
-### How can I assert that a function is NEVER called?
-
-```cpp
-using ::testing::_;
-...
-  EXPECT_CALL(foo, Bar(_))
-      .Times(0);
-```
-
-### I have a failed test where gMock tells me TWICE that a particular expectation is not satisfied. Isn't this redundant?
-
-When gMock detects a failure, it prints relevant information (the mock function
-arguments, the state of relevant expectations, and etc) to help the user debug.
-If another failure is detected, gMock will do the same, including printing the
-state of relevant expectations.
-
-Sometimes an expectation's state didn't change between two failures, and you'll
-see the same description of the state twice. They are however *not* redundant,
-as they refer to *different points in time*. The fact they are the same *is*
-interesting information.
-
-### I get a heapcheck failure when using a mock object, but using a real object is fine. What can be wrong?
-
-Does the class (hopefully a pure interface) you are mocking have a virtual
-destructor?
-
-Whenever you derive from a base class, make sure its destructor is virtual.
-Otherwise Bad Things will happen. Consider the following code:
-
-```cpp
-class Base {
- public:
-  // Not virtual, but should be.
-  ~Base() { ... }
-  ...
-};
-
-class Derived : public Base {
- public:
-  ...
- private:
-  std::string value_;
-};
-
-...
-  Base* p = new Derived;
-  ...
-  delete p;  // Surprise! ~Base() will be called, but ~Derived() will not
-                 // - value_ is leaked.
-```
-
-By changing `~Base()` to virtual, `~Derived()` will be correctly called when
-`delete p` is executed, and the heap checker will be happy.
-
-### The "newer expectations override older ones" rule makes writing expectations awkward. Why does gMock do that?
-
-When people complain about this, often they are referring to code like:
-
-```cpp
-using ::testing::Return;
-...
-  // foo.Bar() should be called twice, return 1 the first time, and return
-  // 2 the second time.  However, I have to write the expectations in the
-  // reverse order.  This sucks big time!!!
-  EXPECT_CALL(foo, Bar())
-      .WillOnce(Return(2))
-      .RetiresOnSaturation();
-  EXPECT_CALL(foo, Bar())
-      .WillOnce(Return(1))
-      .RetiresOnSaturation();
-```
-
-The problem, is that they didn't pick the **best** way to express the test's
-intent.
-
-By default, expectations don't have to be matched in *any* particular order. If
-you want them to match in a certain order, you need to be explicit. This is
-gMock's (and jMock's) fundamental philosophy: it's easy to accidentally
-over-specify your tests, and we want to make it harder to do so.
-
-There are two better ways to write the test spec. You could either put the
-expectations in sequence:
-
-```cpp
-using ::testing::Return;
-...
-  // foo.Bar() should be called twice, return 1 the first time, and return
-  // 2 the second time.  Using a sequence, we can write the expectations
-  // in their natural order.
-  {
-    InSequence s;
-    EXPECT_CALL(foo, Bar())
-        .WillOnce(Return(1))
-        .RetiresOnSaturation();
-    EXPECT_CALL(foo, Bar())
-        .WillOnce(Return(2))
-        .RetiresOnSaturation();
-  }
-```
-
-or you can put the sequence of actions in the same expectation:
-
-```cpp
-using ::testing::Return;
-...
-  // foo.Bar() should be called twice, return 1 the first time, and return
-  // 2 the second time.
-  EXPECT_CALL(foo, Bar())
-      .WillOnce(Return(1))
-      .WillOnce(Return(2))
-      .RetiresOnSaturation();
-```
-
-Back to the original questions: why does gMock search the expectations (and
-`ON_CALL`s) from back to front? Because this allows a user to set up a mock's
-behavior for the common case early (e.g. in the mock's constructor or the test
-fixture's set-up phase) and customize it with more specific rules later. If
-gMock searches from front to back, this very useful pattern won't be possible.
-
-### gMock prints a warning when a function without EXPECT_CALL is called, even if I have set its behavior using ON_CALL. Would it be reasonable not to show the warning in this case?
-
-When choosing between being neat and being safe, we lean toward the latter. So
-the answer is that we think it's better to show the warning.
-
-Often people write `ON_CALL`s in the mock object's constructor or `SetUp()`, as
-the default behavior rarely changes from test to test. Then in the test body
-they set the expectations, which are often different for each test. Having an
-`ON_CALL` in the set-up part of a test doesn't mean that the calls are expected.
-If there's no `EXPECT_CALL` and the method is called, it's possibly an error. If
-we quietly let the call go through without notifying the user, bugs may creep in
-unnoticed.
-
-If, however, you are sure that the calls are OK, you can write
-
-```cpp
-using ::testing::_;
-...
-  EXPECT_CALL(foo, Bar(_))
-      .WillRepeatedly(...);
-```
-
-instead of
-
-```cpp
-using ::testing::_;
-...
-  ON_CALL(foo, Bar(_))
-      .WillByDefault(...);
-```
-
-This tells gMock that you do expect the calls and no warning should be printed.
-
-Also, you can control the verbosity by specifying `--gmock_verbose=error`. Other
-values are `info` and `warning`. If you find the output too noisy when
-debugging, just choose a less verbose level.
-
-### How can I delete the mock function's argument in an action?
-
-If your mock function takes a pointer argument and you want to delete that
-argument, you can use testing::DeleteArg<N>() to delete the N'th (zero-indexed)
-argument:
-
-```cpp
-using ::testing::_;
-  ...
-  MOCK_METHOD(void, Bar, (X* x, const Y& y));
-  ...
-  EXPECT_CALL(mock_foo_, Bar(_, _))
-      .WillOnce(testing::DeleteArg<0>()));
-```
-
-### How can I perform an arbitrary action on a mock function's argument?
-
-If you find yourself needing to perform some action that's not supported by
-gMock directly, remember that you can define your own actions using
-[`MakeAction()`](#NewMonoActions) or
-[`MakePolymorphicAction()`](#NewPolyActions), or you can write a stub function
-and invoke it using [`Invoke()`](#FunctionsAsActions).
-
-```cpp
-using ::testing::_;
-using ::testing::Invoke;
-  ...
-  MOCK_METHOD(void, Bar, (X* p));
-  ...
-  EXPECT_CALL(mock_foo_, Bar(_))
-      .WillOnce(Invoke(MyAction(...)));
-```
-
-### My code calls a static/global function. Can I mock it?
-
-You can, but you need to make some changes.
-
-In general, if you find yourself needing to mock a static function, it's a sign
-that your modules are too tightly coupled (and less flexible, less reusable,
-less testable, etc). You are probably better off defining a small interface and
-call the function through that interface, which then can be easily mocked. It's
-a bit of work initially, but usually pays for itself quickly.
-
-This Google Testing Blog
-[post](https://testing.googleblog.com/2008/06/defeat-static-cling.html) says it
-excellently. Check it out.
-
-### My mock object needs to do complex stuff. It's a lot of pain to specify the actions. gMock sucks!
-
-I know it's not a question, but you get an answer for free any way. :-)
-
-With gMock, you can create mocks in C++ easily. And people might be tempted to
-use them everywhere. Sometimes they work great, and sometimes you may find them,
-well, a pain to use. So, what's wrong in the latter case?
-
-When you write a test without using mocks, you exercise the code and assert that
-it returns the correct value or that the system is in an expected state. This is
-sometimes called "state-based testing".
-
-Mocks are great for what some call "interaction-based" testing: instead of
-checking the system state at the very end, mock objects verify that they are
-invoked the right way and report an error as soon as it arises, giving you a
-handle on the precise context in which the error was triggered. This is often
-more effective and economical to do than state-based testing.
-
-If you are doing state-based testing and using a test double just to simulate
-the real object, you are probably better off using a fake. Using a mock in this
-case causes pain, as it's not a strong point for mocks to perform complex
-actions. If you experience this and think that mocks suck, you are just not
-using the right tool for your problem. Or, you might be trying to solve the
-wrong problem. :-)
-
-### I got a warning "Uninteresting function call encountered - default action taken.." Should I panic?
-
-By all means, NO! It's just an FYI. :-)
-
-What it means is that you have a mock function, you haven't set any expectations
-on it (by gMock's rule this means that you are not interested in calls to this
-function and therefore it can be called any number of times), and it is called.
-That's OK - you didn't say it's not OK to call the function!
-
-What if you actually meant to disallow this function to be called, but forgot to
-write `EXPECT_CALL(foo, Bar()).Times(0)`? While one can argue that it's the
-user's fault, gMock tries to be nice and prints you a note.
-
-So, when you see the message and believe that there shouldn't be any
-uninteresting calls, you should investigate what's going on. To make your life
-easier, gMock dumps the stack trace when an uninteresting call is encountered.
-From that you can figure out which mock function it is, and how it is called.
-
-### I want to define a custom action. Should I use Invoke() or implement the ActionInterface interface?
-
-Either way is fine - you want to choose the one that's more convenient for your
-circumstance.
-
-Usually, if your action is for a particular function type, defining it using
-`Invoke()` should be easier; if your action can be used in functions of
-different types (e.g. if you are defining `Return(*value*)`),
-`MakePolymorphicAction()` is easiest. Sometimes you want precise control on what
-types of functions the action can be used in, and implementing `ActionInterface`
-is the way to go here. See the implementation of `Return()` in `gmock-actions.h`
-for an example.
-
-### I use SetArgPointee() in WillOnce(), but gcc complains about "conflicting return type specified". What does it mean?
-
-You got this error as gMock has no idea what value it should return when the
-mock method is called. `SetArgPointee()` says what the side effect is, but
-doesn't say what the return value should be. You need `DoAll()` to chain a
-`SetArgPointee()` with a `Return()` that provides a value appropriate to the API
-being mocked.
-
-See this [recipe](gmock_cook_book.md#mocking-side-effects) for more details and
-an example.
-
-### I have a huge mock class, and Microsoft Visual C++ runs out of memory when compiling it. What can I do?
-
-We've noticed that when the `/clr` compiler flag is used, Visual C++ uses 5~6
-times as much memory when compiling a mock class. We suggest to avoid `/clr`
-when compiling native C++ mocks.
diff --git a/3rdparty/googletest-1.13.0/docs/gmock_for_dummies.md b/3rdparty/googletest-1.13.0/docs/gmock_for_dummies.md
deleted file mode 100644
index b7264d3587f71ada659741bd6c47ac015ff46e99..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/docs/gmock_for_dummies.md
+++ /dev/null
@@ -1,700 +0,0 @@
-# gMock for Dummies
-
-## What Is gMock?
-
-When you write a prototype or test, often it's not feasible or wise to rely on
-real objects entirely. A **mock object** implements the same interface as a real
-object (so it can be used as one), but lets you specify at run time how it will
-be used and what it should do (which methods will be called? in which order? how
-many times? with what arguments? what will they return? etc).
-
-It is easy to confuse the term *fake objects* with mock objects. Fakes and mocks
-actually mean very different things in the Test-Driven Development (TDD)
-community:
-
-*   **Fake** objects have working implementations, but usually take some
-    shortcut (perhaps to make the operations less expensive), which makes them
-    not suitable for production. An in-memory file system would be an example of
-    a fake.
-*   **Mocks** are objects pre-programmed with *expectations*, which form a
-    specification of the calls they are expected to receive.
-
-If all this seems too abstract for you, don't worry - the most important thing
-to remember is that a mock allows you to check the *interaction* between itself
-and code that uses it. The difference between fakes and mocks shall become much
-clearer once you start to use mocks.
-
-**gMock** is a library (sometimes we also call it a "framework" to make it sound
-cool) for creating mock classes and using them. It does to C++ what
-jMock/EasyMock does to Java (well, more or less).
-
-When using gMock,
-
-1.  first, you use some simple macros to describe the interface you want to
-    mock, and they will expand to the implementation of your mock class;
-2.  next, you create some mock objects and specify its expectations and behavior
-    using an intuitive syntax;
-3.  then you exercise code that uses the mock objects. gMock will catch any
-    violation to the expectations as soon as it arises.
-
-## Why gMock?
-
-While mock objects help you remove unnecessary dependencies in tests and make
-them fast and reliable, using mocks manually in C++ is *hard*:
-
-*   Someone has to implement the mocks. The job is usually tedious and
-    error-prone. No wonder people go great distance to avoid it.
-*   The quality of those manually written mocks is a bit, uh, unpredictable. You
-    may see some really polished ones, but you may also see some that were
-    hacked up in a hurry and have all sorts of ad hoc restrictions.
-*   The knowledge you gained from using one mock doesn't transfer to the next
-    one.
-
-In contrast, Java and Python programmers have some fine mock frameworks (jMock,
-EasyMock, etc), which automate the creation of mocks. As a result, mocking is a
-proven effective technique and widely adopted practice in those communities.
-Having the right tool absolutely makes the difference.
-
-gMock was built to help C++ programmers. It was inspired by jMock and EasyMock,
-but designed with C++'s specifics in mind. It is your friend if any of the
-following problems is bothering you:
-
-*   You are stuck with a sub-optimal design and wish you had done more
-    prototyping before it was too late, but prototyping in C++ is by no means
-    "rapid".
-*   Your tests are slow as they depend on too many libraries or use expensive
-    resources (e.g. a database).
-*   Your tests are brittle as some resources they use are unreliable (e.g. the
-    network).
-*   You want to test how your code handles a failure (e.g. a file checksum
-    error), but it's not easy to cause one.
-*   You need to make sure that your module interacts with other modules in the
-    right way, but it's hard to observe the interaction; therefore you resort to
-    observing the side effects at the end of the action, but it's awkward at
-    best.
-*   You want to "mock out" your dependencies, except that they don't have mock
-    implementations yet; and, frankly, you aren't thrilled by some of those
-    hand-written mocks.
-
-We encourage you to use gMock as
-
-*   a *design* tool, for it lets you experiment with your interface design early
-    and often. More iterations lead to better designs!
-*   a *testing* tool to cut your tests' outbound dependencies and probe the
-    interaction between your module and its collaborators.
-
-## Getting Started
-
-gMock is bundled with googletest.
-
-## A Case for Mock Turtles
-
-Let's look at an example. Suppose you are developing a graphics program that
-relies on a [LOGO](http://en.wikipedia.org/wiki/Logo_programming_language)-like
-API for drawing. How would you test that it does the right thing? Well, you can
-run it and compare the screen with a golden screen snapshot, but let's admit it:
-tests like this are expensive to run and fragile (What if you just upgraded to a
-shiny new graphics card that has better anti-aliasing? Suddenly you have to
-update all your golden images.). It would be too painful if all your tests are
-like this. Fortunately, you learned about
-[Dependency Injection](http://en.wikipedia.org/wiki/Dependency_injection) and know the right thing
-to do: instead of having your application talk to the system API directly, wrap
-the API in an interface (say, `Turtle`) and code to that interface:
-
-```cpp
-class Turtle {
-  ...
-  virtual ~Turtle() {}
-  virtual void PenUp() = 0;
-  virtual void PenDown() = 0;
-  virtual void Forward(int distance) = 0;
-  virtual void Turn(int degrees) = 0;
-  virtual void GoTo(int x, int y) = 0;
-  virtual int GetX() const = 0;
-  virtual int GetY() const = 0;
-};
-```
-
-(Note that the destructor of `Turtle` **must** be virtual, as is the case for
-**all** classes you intend to inherit from - otherwise the destructor of the
-derived class will not be called when you delete an object through a base
-pointer, and you'll get corrupted program states like memory leaks.)
-
-You can control whether the turtle's movement will leave a trace using `PenUp()`
-and `PenDown()`, and control its movement using `Forward()`, `Turn()`, and
-`GoTo()`. Finally, `GetX()` and `GetY()` tell you the current position of the
-turtle.
-
-Your program will normally use a real implementation of this interface. In
-tests, you can use a mock implementation instead. This allows you to easily
-check what drawing primitives your program is calling, with what arguments, and
-in which order. Tests written this way are much more robust (they won't break
-because your new machine does anti-aliasing differently), easier to read and
-maintain (the intent of a test is expressed in the code, not in some binary
-images), and run *much, much faster*.
-
-## Writing the Mock Class
-
-If you are lucky, the mocks you need to use have already been implemented by
-some nice people. If, however, you find yourself in the position to write a mock
-class, relax - gMock turns this task into a fun game! (Well, almost.)
-
-### How to Define It
-
-Using the `Turtle` interface as example, here are the simple steps you need to
-follow:
-
-*   Derive a class `MockTurtle` from `Turtle`.
-*   Take a *virtual* function of `Turtle` (while it's possible to
-    [mock non-virtual methods using templates](gmock_cook_book.md#MockingNonVirtualMethods),
-    it's much more involved).
-*   In the `public:` section of the child class, write `MOCK_METHOD();`
-*   Now comes the fun part: you take the function signature, cut-and-paste it
-    into the macro, and add two commas - one between the return type and the
-    name, another between the name and the argument list.
-*   If you're mocking a const method, add a 4th parameter containing `(const)`
-    (the parentheses are required).
-*   Since you're overriding a virtual method, we suggest adding the `override`
-    keyword. For const methods the 4th parameter becomes `(const, override)`,
-    for non-const methods just `(override)`. This isn't mandatory.
-*   Repeat until all virtual functions you want to mock are done. (It goes
-    without saying that *all* pure virtual methods in your abstract class must
-    be either mocked or overridden.)
-
-After the process, you should have something like:
-
-```cpp
-#include "gmock/gmock.h"  // Brings in gMock.
-
-class MockTurtle : public Turtle {
- public:
-  ...
-  MOCK_METHOD(void, PenUp, (), (override));
-  MOCK_METHOD(void, PenDown, (), (override));
-  MOCK_METHOD(void, Forward, (int distance), (override));
-  MOCK_METHOD(void, Turn, (int degrees), (override));
-  MOCK_METHOD(void, GoTo, (int x, int y), (override));
-  MOCK_METHOD(int, GetX, (), (const, override));
-  MOCK_METHOD(int, GetY, (), (const, override));
-};
-```
-
-You don't need to define these mock methods somewhere else - the `MOCK_METHOD`
-macro will generate the definitions for you. It's that simple!
-
-### Where to Put It
-
-When you define a mock class, you need to decide where to put its definition.
-Some people put it in a `_test.cc`. This is fine when the interface being mocked
-(say, `Foo`) is owned by the same person or team. Otherwise, when the owner of
-`Foo` changes it, your test could break. (You can't really expect `Foo`'s
-maintainer to fix every test that uses `Foo`, can you?)
-
-Generally, you should not mock classes you don't own. If you must mock such a
-class owned by others, define the mock class in `Foo`'s Bazel package (usually
-the same directory or a `testing` sub-directory), and put it in a `.h` and a
-`cc_library` with `testonly=True`. Then everyone can reference them from their
-tests. If `Foo` ever changes, there is only one copy of `MockFoo` to change, and
-only tests that depend on the changed methods need to be fixed.
-
-Another way to do it: you can introduce a thin layer `FooAdaptor` on top of
-`Foo` and code to this new interface. Since you own `FooAdaptor`, you can absorb
-changes in `Foo` much more easily. While this is more work initially, carefully
-choosing the adaptor interface can make your code easier to write and more
-readable (a net win in the long run), as you can choose `FooAdaptor` to fit your
-specific domain much better than `Foo` does.
-
-## Using Mocks in Tests
-
-Once you have a mock class, using it is easy. The typical work flow is:
-
-1.  Import the gMock names from the `testing` namespace such that you can use
-    them unqualified (You only have to do it once per file). Remember that
-    namespaces are a good idea.
-2.  Create some mock objects.
-3.  Specify your expectations on them (How many times will a method be called?
-    With what arguments? What should it do? etc.).
-4.  Exercise some code that uses the mocks; optionally, check the result using
-    googletest assertions. If a mock method is called more than expected or with
-    wrong arguments, you'll get an error immediately.
-5.  When a mock is destructed, gMock will automatically check whether all
-    expectations on it have been satisfied.
-
-Here's an example:
-
-```cpp
-#include "path/to/mock-turtle.h"
-#include "gmock/gmock.h"
-#include "gtest/gtest.h"
-
-using ::testing::AtLeast;                         // #1
-
-TEST(PainterTest, CanDrawSomething) {
-  MockTurtle turtle;                              // #2
-  EXPECT_CALL(turtle, PenDown())                  // #3
-      .Times(AtLeast(1));
-
-  Painter painter(&turtle);                       // #4
-
-  EXPECT_TRUE(painter.DrawCircle(0, 0, 10));      // #5
-}
-```
-
-As you might have guessed, this test checks that `PenDown()` is called at least
-once. If the `painter` object didn't call this method, your test will fail with
-a message like this:
-
-```text
-path/to/my_test.cc:119: Failure
-Actual function call count doesn't match this expectation:
-Actually: never called;
-Expected: called at least once.
-Stack trace:
-...
-```
-
-**Tip 1:** If you run the test from an Emacs buffer, you can hit `<Enter>` on
-the line number to jump right to the failed expectation.
-
-**Tip 2:** If your mock objects are never deleted, the final verification won't
-happen. Therefore it's a good idea to turn on the heap checker in your tests
-when you allocate mocks on the heap. You get that automatically if you use the
-`gtest_main` library already.
-
-**Important note:** gMock requires expectations to be set **before** the mock
-functions are called, otherwise the behavior is **undefined**. Do not alternate
-between calls to `EXPECT_CALL()` and calls to the mock functions, and do not set
-any expectations on a mock after passing the mock to an API.
-
-This means `EXPECT_CALL()` should be read as expecting that a call will occur
-*in the future*, not that a call has occurred. Why does gMock work like that?
-Well, specifying the expectation beforehand allows gMock to report a violation
-as soon as it rises, when the context (stack trace, etc) is still available.
-This makes debugging much easier.
-
-Admittedly, this test is contrived and doesn't do much. You can easily achieve
-the same effect without using gMock. However, as we shall reveal soon, gMock
-allows you to do *so much more* with the mocks.
-
-## Setting Expectations
-
-The key to using a mock object successfully is to set the *right expectations*
-on it. If you set the expectations too strict, your test will fail as the result
-of unrelated changes. If you set them too loose, bugs can slip through. You want
-to do it just right such that your test can catch exactly the kind of bugs you
-intend it to catch. gMock provides the necessary means for you to do it "just
-right."
-
-### General Syntax
-
-In gMock we use the `EXPECT_CALL()` macro to set an expectation on a mock
-method. The general syntax is:
-
-```cpp
-EXPECT_CALL(mock_object, method(matchers))
-    .Times(cardinality)
-    .WillOnce(action)
-    .WillRepeatedly(action);
-```
-
-The macro has two arguments: first the mock object, and then the method and its
-arguments. Note that the two are separated by a comma (`,`), not a period (`.`).
-(Why using a comma? The answer is that it was necessary for technical reasons.)
-If the method is not overloaded, the macro can also be called without matchers:
-
-```cpp
-EXPECT_CALL(mock_object, non-overloaded-method)
-    .Times(cardinality)
-    .WillOnce(action)
-    .WillRepeatedly(action);
-```
-
-This syntax allows the test writer to specify "called with any arguments"
-without explicitly specifying the number or types of arguments. To avoid
-unintended ambiguity, this syntax may only be used for methods that are not
-overloaded.
-
-Either form of the macro can be followed by some optional *clauses* that provide
-more information about the expectation. We'll discuss how each clause works in
-the coming sections.
-
-This syntax is designed to make an expectation read like English. For example,
-you can probably guess that
-
-```cpp
-using ::testing::Return;
-...
-EXPECT_CALL(turtle, GetX())
-    .Times(5)
-    .WillOnce(Return(100))
-    .WillOnce(Return(150))
-    .WillRepeatedly(Return(200));
-```
-
-says that the `turtle` object's `GetX()` method will be called five times, it
-will return 100 the first time, 150 the second time, and then 200 every time.
-Some people like to call this style of syntax a Domain-Specific Language (DSL).
-
-{: .callout .note}
-**Note:** Why do we use a macro to do this? Well it serves two purposes: first
-it makes expectations easily identifiable (either by `grep` or by a human
-reader), and second it allows gMock to include the source file location of a
-failed expectation in messages, making debugging easier.
-
-### Matchers: What Arguments Do We Expect?
-
-When a mock function takes arguments, we may specify what arguments we are
-expecting, for example:
-
-```cpp
-// Expects the turtle to move forward by 100 units.
-EXPECT_CALL(turtle, Forward(100));
-```
-
-Oftentimes you do not want to be too specific. Remember that talk about tests
-being too rigid? Over specification leads to brittle tests and obscures the
-intent of tests. Therefore we encourage you to specify only what's necessary—no
-more, no less. If you aren't interested in the value of an argument, write `_`
-as the argument, which means "anything goes":
-
-```cpp
-using ::testing::_;
-...
-// Expects that the turtle jumps to somewhere on the x=50 line.
-EXPECT_CALL(turtle, GoTo(50, _));
-```
-
-`_` is an instance of what we call **matchers**. A matcher is like a predicate
-and can test whether an argument is what we'd expect. You can use a matcher
-inside `EXPECT_CALL()` wherever a function argument is expected. `_` is a
-convenient way of saying "any value".
-
-In the above examples, `100` and `50` are also matchers; implicitly, they are
-the same as `Eq(100)` and `Eq(50)`, which specify that the argument must be
-equal (using `operator==`) to the matcher argument. There are many
-[built-in matchers](reference/matchers.md) for common types (as well as
-[custom matchers](gmock_cook_book.md#NewMatchers)); for example:
-
-```cpp
-using ::testing::Ge;
-...
-// Expects the turtle moves forward by at least 100.
-EXPECT_CALL(turtle, Forward(Ge(100)));
-```
-
-If you don't care about *any* arguments, rather than specify `_` for each of
-them you may instead omit the parameter list:
-
-```cpp
-// Expects the turtle to move forward.
-EXPECT_CALL(turtle, Forward);
-// Expects the turtle to jump somewhere.
-EXPECT_CALL(turtle, GoTo);
-```
-
-This works for all non-overloaded methods; if a method is overloaded, you need
-to help gMock resolve which overload is expected by specifying the number of
-arguments and possibly also the
-[types of the arguments](gmock_cook_book.md#SelectOverload).
-
-### Cardinalities: How Many Times Will It Be Called?
-
-The first clause we can specify following an `EXPECT_CALL()` is `Times()`. We
-call its argument a **cardinality** as it tells *how many times* the call should
-occur. It allows us to repeat an expectation many times without actually writing
-it as many times. More importantly, a cardinality can be "fuzzy", just like a
-matcher can be. This allows a user to express the intent of a test exactly.
-
-An interesting special case is when we say `Times(0)`. You may have guessed - it
-means that the function shouldn't be called with the given arguments at all, and
-gMock will report a googletest failure whenever the function is (wrongfully)
-called.
-
-We've seen `AtLeast(n)` as an example of fuzzy cardinalities earlier. For the
-list of built-in cardinalities you can use, see
-[here](gmock_cheat_sheet.md#CardinalityList).
-
-The `Times()` clause can be omitted. **If you omit `Times()`, gMock will infer
-the cardinality for you.** The rules are easy to remember:
-
-*   If **neither** `WillOnce()` **nor** `WillRepeatedly()` is in the
-    `EXPECT_CALL()`, the inferred cardinality is `Times(1)`.
-*   If there are *n* `WillOnce()`'s but **no** `WillRepeatedly()`, where *n* >=
-    1, the cardinality is `Times(n)`.
-*   If there are *n* `WillOnce()`'s and **one** `WillRepeatedly()`, where *n* >=
-    0, the cardinality is `Times(AtLeast(n))`.
-
-**Quick quiz:** what do you think will happen if a function is expected to be
-called twice but actually called four times?
-
-### Actions: What Should It Do?
-
-Remember that a mock object doesn't really have a working implementation? We as
-users have to tell it what to do when a method is invoked. This is easy in
-gMock.
-
-First, if the return type of a mock function is a built-in type or a pointer,
-the function has a **default action** (a `void` function will just return, a
-`bool` function will return `false`, and other functions will return 0). In
-addition, in C++ 11 and above, a mock function whose return type is
-default-constructible (i.e. has a default constructor) has a default action of
-returning a default-constructed value. If you don't say anything, this behavior
-will be used.
-
-Second, if a mock function doesn't have a default action, or the default action
-doesn't suit you, you can specify the action to be taken each time the
-expectation matches using a series of `WillOnce()` clauses followed by an
-optional `WillRepeatedly()`. For example,
-
-```cpp
-using ::testing::Return;
-...
-EXPECT_CALL(turtle, GetX())
-     .WillOnce(Return(100))
-     .WillOnce(Return(200))
-     .WillOnce(Return(300));
-```
-
-says that `turtle.GetX()` will be called *exactly three times* (gMock inferred
-this from how many `WillOnce()` clauses we've written, since we didn't
-explicitly write `Times()`), and will return 100, 200, and 300 respectively.
-
-```cpp
-using ::testing::Return;
-...
-EXPECT_CALL(turtle, GetY())
-     .WillOnce(Return(100))
-     .WillOnce(Return(200))
-     .WillRepeatedly(Return(300));
-```
-
-says that `turtle.GetY()` will be called *at least twice* (gMock knows this as
-we've written two `WillOnce()` clauses and a `WillRepeatedly()` while having no
-explicit `Times()`), will return 100 and 200 respectively the first two times,
-and 300 from the third time on.
-
-Of course, if you explicitly write a `Times()`, gMock will not try to infer the
-cardinality itself. What if the number you specified is larger than there are
-`WillOnce()` clauses? Well, after all `WillOnce()`s are used up, gMock will do
-the *default* action for the function every time (unless, of course, you have a
-`WillRepeatedly()`.).
-
-What can we do inside `WillOnce()` besides `Return()`? You can return a
-reference using `ReturnRef(`*`variable`*`)`, or invoke a pre-defined function,
-among [others](gmock_cook_book.md#using-actions).
-
-**Important note:** The `EXPECT_CALL()` statement evaluates the action clause
-only once, even though the action may be performed many times. Therefore you
-must be careful about side effects. The following may not do what you want:
-
-```cpp
-using ::testing::Return;
-...
-int n = 100;
-EXPECT_CALL(turtle, GetX())
-    .Times(4)
-    .WillRepeatedly(Return(n++));
-```
-
-Instead of returning 100, 101, 102, ..., consecutively, this mock function will
-always return 100 as `n++` is only evaluated once. Similarly, `Return(new Foo)`
-will create a new `Foo` object when the `EXPECT_CALL()` is executed, and will
-return the same pointer every time. If you want the side effect to happen every
-time, you need to define a custom action, which we'll teach in the
-[cook book](gmock_cook_book.md).
-
-Time for another quiz! What do you think the following means?
-
-```cpp
-using ::testing::Return;
-...
-EXPECT_CALL(turtle, GetY())
-    .Times(4)
-    .WillOnce(Return(100));
-```
-
-Obviously `turtle.GetY()` is expected to be called four times. But if you think
-it will return 100 every time, think twice! Remember that one `WillOnce()`
-clause will be consumed each time the function is invoked and the default action
-will be taken afterwards. So the right answer is that `turtle.GetY()` will
-return 100 the first time, but **return 0 from the second time on**, as
-returning 0 is the default action for `int` functions.
-
-### Using Multiple Expectations {#MultiExpectations}
-
-So far we've only shown examples where you have a single expectation. More
-realistically, you'll specify expectations on multiple mock methods which may be
-from multiple mock objects.
-
-By default, when a mock method is invoked, gMock will search the expectations in
-the **reverse order** they are defined, and stop when an active expectation that
-matches the arguments is found (you can think of it as "newer rules override
-older ones."). If the matching expectation cannot take any more calls, you will
-get an upper-bound-violated failure. Here's an example:
-
-```cpp
-using ::testing::_;
-...
-EXPECT_CALL(turtle, Forward(_));  // #1
-EXPECT_CALL(turtle, Forward(10))  // #2
-    .Times(2);
-```
-
-If `Forward(10)` is called three times in a row, the third time it will be an
-error, as the last matching expectation (#2) has been saturated. If, however,
-the third `Forward(10)` call is replaced by `Forward(20)`, then it would be OK,
-as now #1 will be the matching expectation.
-
-{: .callout .note}
-**Note:** Why does gMock search for a match in the *reverse* order of the
-expectations? The reason is that this allows a user to set up the default
-expectations in a mock object's constructor or the test fixture's set-up phase
-and then customize the mock by writing more specific expectations in the test
-body. So, if you have two expectations on the same method, you want to put the
-one with more specific matchers **after** the other, or the more specific rule
-would be shadowed by the more general one that comes after it.
-
-{: .callout .tip}
-**Tip:** It is very common to start with a catch-all expectation for a method
-and `Times(AnyNumber())` (omitting arguments, or with `_` for all arguments, if
-overloaded). This makes any calls to the method expected. This is not necessary
-for methods that are not mentioned at all (these are "uninteresting"), but is
-useful for methods that have some expectations, but for which other calls are
-ok. See
-[Understanding Uninteresting vs Unexpected Calls](gmock_cook_book.md#uninteresting-vs-unexpected).
-
-### Ordered vs Unordered Calls {#OrderedCalls}
-
-By default, an expectation can match a call even though an earlier expectation
-hasn't been satisfied. In other words, the calls don't have to occur in the
-order the expectations are specified.
-
-Sometimes, you may want all the expected calls to occur in a strict order. To
-say this in gMock is easy:
-
-```cpp
-using ::testing::InSequence;
-...
-TEST(FooTest, DrawsLineSegment) {
-  ...
-  {
-    InSequence seq;
-
-    EXPECT_CALL(turtle, PenDown());
-    EXPECT_CALL(turtle, Forward(100));
-    EXPECT_CALL(turtle, PenUp());
-  }
-  Foo();
-}
-```
-
-By creating an object of type `InSequence`, all expectations in its scope are
-put into a *sequence* and have to occur *sequentially*. Since we are just
-relying on the constructor and destructor of this object to do the actual work,
-its name is really irrelevant.
-
-In this example, we test that `Foo()` calls the three expected functions in the
-order as written. If a call is made out-of-order, it will be an error.
-
-(What if you care about the relative order of some of the calls, but not all of
-them? Can you specify an arbitrary partial order? The answer is ... yes! The
-details can be found [here](gmock_cook_book.md#OrderedCalls).)
-
-### All Expectations Are Sticky (Unless Said Otherwise) {#StickyExpectations}
-
-Now let's do a quick quiz to see how well you can use this mock stuff already.
-How would you test that the turtle is asked to go to the origin *exactly twice*
-(you want to ignore any other instructions it receives)?
-
-After you've come up with your answer, take a look at ours and compare notes
-(solve it yourself first - don't cheat!):
-
-```cpp
-using ::testing::_;
-using ::testing::AnyNumber;
-...
-EXPECT_CALL(turtle, GoTo(_, _))  // #1
-     .Times(AnyNumber());
-EXPECT_CALL(turtle, GoTo(0, 0))  // #2
-     .Times(2);
-```
-
-Suppose `turtle.GoTo(0, 0)` is called three times. In the third time, gMock will
-see that the arguments match expectation #2 (remember that we always pick the
-last matching expectation). Now, since we said that there should be only two
-such calls, gMock will report an error immediately. This is basically what we've
-told you in the [Using Multiple Expectations](#MultiExpectations) section above.
-
-This example shows that **expectations in gMock are "sticky" by default**, in
-the sense that they remain active even after we have reached their invocation
-upper bounds. This is an important rule to remember, as it affects the meaning
-of the spec, and is **different** to how it's done in many other mocking
-frameworks (Why'd we do that? Because we think our rule makes the common cases
-easier to express and understand.).
-
-Simple? Let's see if you've really understood it: what does the following code
-say?
-
-```cpp
-using ::testing::Return;
-...
-for (int i = n; i > 0; i--) {
-  EXPECT_CALL(turtle, GetX())
-      .WillOnce(Return(10*i));
-}
-```
-
-If you think it says that `turtle.GetX()` will be called `n` times and will
-return 10, 20, 30, ..., consecutively, think twice! The problem is that, as we
-said, expectations are sticky. So, the second time `turtle.GetX()` is called,
-the last (latest) `EXPECT_CALL()` statement will match, and will immediately
-lead to an "upper bound violated" error - this piece of code is not very useful!
-
-One correct way of saying that `turtle.GetX()` will return 10, 20, 30, ..., is
-to explicitly say that the expectations are *not* sticky. In other words, they
-should *retire* as soon as they are saturated:
-
-```cpp
-using ::testing::Return;
-...
-for (int i = n; i > 0; i--) {
-  EXPECT_CALL(turtle, GetX())
-      .WillOnce(Return(10*i))
-      .RetiresOnSaturation();
-}
-```
-
-And, there's a better way to do it: in this case, we expect the calls to occur
-in a specific order, and we line up the actions to match the order. Since the
-order is important here, we should make it explicit using a sequence:
-
-```cpp
-using ::testing::InSequence;
-using ::testing::Return;
-...
-{
-  InSequence s;
-
-  for (int i = 1; i <= n; i++) {
-    EXPECT_CALL(turtle, GetX())
-        .WillOnce(Return(10*i))
-        .RetiresOnSaturation();
-  }
-}
-```
-
-By the way, the other situation where an expectation may *not* be sticky is when
-it's in a sequence - as soon as another expectation that comes after it in the
-sequence has been used, it automatically retires (and will never be used to
-match any call).
-
-### Uninteresting Calls
-
-A mock object may have many methods, and not all of them are that interesting.
-For example, in some tests we may not care about how many times `GetX()` and
-`GetY()` get called.
-
-In gMock, if you are not interested in a method, just don't say anything about
-it. If a call to this method occurs, you'll see a warning in the test output,
-but it won't be a failure. This is called "naggy" behavior; to change, see
-[The Nice, the Strict, and the Naggy](gmock_cook_book.md#NiceStrictNaggy).
diff --git a/3rdparty/googletest-1.13.0/docs/index.md b/3rdparty/googletest-1.13.0/docs/index.md
deleted file mode 100644
index b162c740116394bd6871fe9e65f78cd0289b258f..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/docs/index.md
+++ /dev/null
@@ -1,22 +0,0 @@
-# GoogleTest User's Guide
-
-## Welcome to GoogleTest!
-
-GoogleTest is Google's C++ testing and mocking framework. This user's guide has
-the following contents:
-
-*   [GoogleTest Primer](primer.md) - Teaches you how to write simple tests using
-    GoogleTest. Read this first if you are new to GoogleTest.
-*   [GoogleTest Advanced](advanced.md) - Read this when you've finished the
-    Primer and want to utilize GoogleTest to its full potential.
-*   [GoogleTest Samples](samples.md) - Describes some GoogleTest samples.
-*   [GoogleTest FAQ](faq.md) - Have a question? Want some tips? Check here
-    first.
-*   [Mocking for Dummies](gmock_for_dummies.md) - Teaches you how to create mock
-    objects and use them in tests.
-*   [Mocking Cookbook](gmock_cook_book.md) - Includes tips and approaches to
-    common mocking use cases.
-*   [Mocking Cheat Sheet](gmock_cheat_sheet.md) - A handy reference for
-    matchers, actions, invariants, and more.
-*   [Mocking FAQ](gmock_faq.md) - Contains answers to some mocking-specific
-    questions.
diff --git a/3rdparty/googletest-1.13.0/docs/pkgconfig.md b/3rdparty/googletest-1.13.0/docs/pkgconfig.md
deleted file mode 100644
index 18a2546a3846acde26b930a5ee30a00cce96a570..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/docs/pkgconfig.md
+++ /dev/null
@@ -1,148 +0,0 @@
-## Using GoogleTest from various build systems
-
-GoogleTest comes with pkg-config files that can be used to determine all
-necessary flags for compiling and linking to GoogleTest (and GoogleMock).
-Pkg-config is a standardised plain-text format containing
-
-*   the includedir (-I) path
-*   necessary macro (-D) definitions
-*   further required flags (-pthread)
-*   the library (-L) path
-*   the library (-l) to link to
-
-All current build systems support pkg-config in one way or another. For all
-examples here we assume you want to compile the sample
-`samples/sample3_unittest.cc`.
-
-### CMake
-
-Using `pkg-config` in CMake is fairly easy:
-
-```cmake
-cmake_minimum_required(VERSION 3.0)
-
-cmake_policy(SET CMP0048 NEW)
-project(my_gtest_pkgconfig VERSION 0.0.1 LANGUAGES CXX)
-
-find_package(PkgConfig)
-pkg_search_module(GTEST REQUIRED gtest_main)
-
-add_executable(testapp samples/sample3_unittest.cc)
-target_link_libraries(testapp ${GTEST_LDFLAGS})
-target_compile_options(testapp PUBLIC ${GTEST_CFLAGS})
-
-include(CTest)
-add_test(first_and_only_test testapp)
-```
-
-It is generally recommended that you use `target_compile_options` + `_CFLAGS`
-over `target_include_directories` + `_INCLUDE_DIRS` as the former includes not
-just -I flags (GoogleTest might require a macro indicating to internal headers
-that all libraries have been compiled with threading enabled. In addition,
-GoogleTest might also require `-pthread` in the compiling step, and as such
-splitting the pkg-config `Cflags` variable into include dirs and macros for
-`target_compile_definitions()` might still miss this). The same recommendation
-goes for using `_LDFLAGS` over the more commonplace `_LIBRARIES`, which happens
-to discard `-L` flags and `-pthread`.
-
-### Help! pkg-config can't find GoogleTest!
-
-Let's say you have a `CMakeLists.txt` along the lines of the one in this
-tutorial and you try to run `cmake`. It is very possible that you get a failure
-along the lines of:
-
-```
--- Checking for one of the modules 'gtest_main'
-CMake Error at /usr/share/cmake/Modules/FindPkgConfig.cmake:640 (message):
-  None of the required 'gtest_main' found
-```
-
-These failures are common if you installed GoogleTest yourself and have not
-sourced it from a distro or other package manager. If so, you need to tell
-pkg-config where it can find the `.pc` files containing the information. Say you
-installed GoogleTest to `/usr/local`, then it might be that the `.pc` files are
-installed under `/usr/local/lib64/pkgconfig`. If you set
-
-```
-export PKG_CONFIG_PATH=/usr/local/lib64/pkgconfig
-```
-
-pkg-config will also try to look in `PKG_CONFIG_PATH` to find `gtest_main.pc`.
-
-### Using pkg-config in a cross-compilation setting
-
-Pkg-config can be used in a cross-compilation setting too. To do this, let's
-assume the final prefix of the cross-compiled installation will be `/usr`, and
-your sysroot is `/home/MYUSER/sysroot`. Configure and install GTest using
-
-```
-mkdir build && cmake -DCMAKE_INSTALL_PREFIX=/usr ..
-```
-
-Install into the sysroot using `DESTDIR`:
-
-```
-make -j install DESTDIR=/home/MYUSER/sysroot
-```
-
-Before we continue, it is recommended to **always** define the following two
-variables for pkg-config in a cross-compilation setting:
-
-```
-export PKG_CONFIG_ALLOW_SYSTEM_CFLAGS=yes
-export PKG_CONFIG_ALLOW_SYSTEM_LIBS=yes
-```
-
-otherwise `pkg-config` will filter `-I` and `-L` flags against standard prefixes
-such as `/usr` (see https://bugs.freedesktop.org/show_bug.cgi?id=28264#c3 for
-reasons why this stripping needs to occur usually).
-
-If you look at the generated pkg-config file, it will look something like
-
-```
-libdir=/usr/lib64
-includedir=/usr/include
-
-Name: gtest
-Description: GoogleTest (without main() function)
-Version: 1.11.0
-URL: https://github.com/google/googletest
-Libs: -L${libdir} -lgtest -lpthread
-Cflags: -I${includedir} -DGTEST_HAS_PTHREAD=1 -lpthread
-```
-
-Notice that the sysroot is not included in `libdir` and `includedir`! If you try
-to run `pkg-config` with the correct
-`PKG_CONFIG_LIBDIR=/home/MYUSER/sysroot/usr/lib64/pkgconfig` against this `.pc`
-file, you will get
-
-```
-$ pkg-config --cflags gtest
--DGTEST_HAS_PTHREAD=1 -lpthread -I/usr/include
-$ pkg-config --libs gtest
--L/usr/lib64 -lgtest -lpthread
-```
-
-which is obviously wrong and points to the `CBUILD` and not `CHOST` root. In
-order to use this in a cross-compilation setting, we need to tell pkg-config to
-inject the actual sysroot into `-I` and `-L` variables. Let us now tell
-pkg-config about the actual sysroot
-
-```
-export PKG_CONFIG_DIR=
-export PKG_CONFIG_SYSROOT_DIR=/home/MYUSER/sysroot
-export PKG_CONFIG_LIBDIR=${PKG_CONFIG_SYSROOT_DIR}/usr/lib64/pkgconfig
-```
-
-and running `pkg-config` again we get
-
-```
-$ pkg-config --cflags gtest
--DGTEST_HAS_PTHREAD=1 -lpthread -I/home/MYUSER/sysroot/usr/include
-$ pkg-config --libs gtest
--L/home/MYUSER/sysroot/usr/lib64 -lgtest -lpthread
-```
-
-which contains the correct sysroot now. For a more comprehensive guide to also
-including `${CHOST}` in build system calls, see the excellent tutorial by Diego
-Elio Pettenò: <https://autotools.io/pkgconfig/cross-compiling.html>
diff --git a/3rdparty/googletest-1.13.0/docs/platforms.md b/3rdparty/googletest-1.13.0/docs/platforms.md
deleted file mode 100644
index eba6ef805661f33dff7588039396678a19a108a9..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/docs/platforms.md
+++ /dev/null
@@ -1,35 +0,0 @@
-# Supported Platforms
-
-GoogleTest requires a codebase and compiler compliant with the C++11 standard or
-newer.
-
-The GoogleTest code is officially supported on the following platforms.
-Operating systems or tools not listed below are community-supported. For
-community-supported platforms, patches that do not complicate the code may be
-considered.
-
-If you notice any problems on your platform, please file an issue on the
-[GoogleTest GitHub Issue Tracker](https://github.com/google/googletest/issues).
-Pull requests containing fixes are welcome!
-
-### Operating systems
-
-*   Linux
-*   macOS
-*   Windows
-
-### Compilers
-
-*   gcc 5.0+
-*   clang 5.0+
-*   MSVC 2015+
-
-**macOS users:** Xcode 9.3+ provides clang 5.0+.
-
-### Build systems
-
-*   [Bazel](https://bazel.build/)
-*   [CMake](https://cmake.org/)
-
-Bazel is the build system used by the team internally and in tests. CMake is
-supported on a best-effort basis and by the community.
diff --git a/3rdparty/googletest-1.13.0/docs/primer.md b/3rdparty/googletest-1.13.0/docs/primer.md
deleted file mode 100644
index 2ffbf53bc8dff6337e8f4c4d33d6f7b4df767bbe..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/docs/primer.md
+++ /dev/null
@@ -1,483 +0,0 @@
-# Googletest Primer
-
-## Introduction: Why googletest?
-
-*googletest* helps you write better C++ tests.
-
-googletest is a testing framework developed by the Testing Technology team with
-Google's specific requirements and constraints in mind. Whether you work on
-Linux, Windows, or a Mac, if you write C++ code, googletest can help you. And it
-supports *any* kind of tests, not just unit tests.
-
-So what makes a good test, and how does googletest fit in? We believe:
-
-1.  Tests should be *independent* and *repeatable*. It's a pain to debug a test
-    that succeeds or fails as a result of other tests. googletest isolates the
-    tests by running each of them on a different object. When a test fails,
-    googletest allows you to run it in isolation for quick debugging.
-2.  Tests should be well *organized* and reflect the structure of the tested
-    code. googletest groups related tests into test suites that can share data
-    and subroutines. This common pattern is easy to recognize and makes tests
-    easy to maintain. Such consistency is especially helpful when people switch
-    projects and start to work on a new code base.
-3.  Tests should be *portable* and *reusable*. Google has a lot of code that is
-    platform-neutral; its tests should also be platform-neutral. googletest
-    works on different OSes, with different compilers, with or without
-    exceptions, so googletest tests can work with a variety of configurations.
-4.  When tests fail, they should provide as much *information* about the problem
-    as possible. googletest doesn't stop at the first test failure. Instead, it
-    only stops the current test and continues with the next. You can also set up
-    tests that report non-fatal failures after which the current test continues.
-    Thus, you can detect and fix multiple bugs in a single run-edit-compile
-    cycle.
-5.  The testing framework should liberate test writers from housekeeping chores
-    and let them focus on the test *content*. googletest automatically keeps
-    track of all tests defined, and doesn't require the user to enumerate them
-    in order to run them.
-6.  Tests should be *fast*. With googletest, you can reuse shared resources
-    across tests and pay for the set-up/tear-down only once, without making
-    tests depend on each other.
-
-Since googletest is based on the popular xUnit architecture, you'll feel right
-at home if you've used JUnit or PyUnit before. If not, it will take you about 10
-minutes to learn the basics and get started. So let's go!
-
-## Beware of the nomenclature
-
-{: .callout .note}
-_Note:_ There might be some confusion arising from different definitions of the
-terms _Test_, _Test Case_ and _Test Suite_, so beware of misunderstanding these.
-
-Historically, googletest started to use the term _Test Case_ for grouping
-related tests, whereas current publications, including International Software
-Testing Qualifications Board ([ISTQB](http://www.istqb.org/)) materials and
-various textbooks on software quality, use the term
-_[Test Suite][istqb test suite]_ for this.
-
-The related term _Test_, as it is used in googletest, corresponds to the term
-_[Test Case][istqb test case]_ of ISTQB and others.
-
-The term _Test_ is commonly of broad enough sense, including ISTQB's definition
-of _Test Case_, so it's not much of a problem here. But the term _Test Case_ as
-was used in Google Test is of contradictory sense and thus confusing.
-
-googletest recently started replacing the term _Test Case_ with _Test Suite_.
-The preferred API is *TestSuite*. The older TestCase API is being slowly
-deprecated and refactored away.
-
-So please be aware of the different definitions of the terms:
-
-
-Meaning                                                                              | googletest Term         | [ISTQB](http://www.istqb.org/) Term
-:----------------------------------------------------------------------------------- | :---------------------- | :----------------------------------
-Exercise a particular program path with specific input values and verify the results | [TEST()](#simple-tests) | [Test Case][istqb test case]
-
-
-[istqb test case]: http://glossary.istqb.org/en/search/test%20case
-[istqb test suite]: http://glossary.istqb.org/en/search/test%20suite
-
-## Basic Concepts
-
-When using googletest, you start by writing *assertions*, which are statements
-that check whether a condition is true. An assertion's result can be *success*,
-*nonfatal failure*, or *fatal failure*. If a fatal failure occurs, it aborts the
-current function; otherwise the program continues normally.
-
-*Tests* use assertions to verify the tested code's behavior. If a test crashes
-or has a failed assertion, then it *fails*; otherwise it *succeeds*.
-
-A *test suite* contains one or many tests. You should group your tests into test
-suites that reflect the structure of the tested code. When multiple tests in a
-test suite need to share common objects and subroutines, you can put them into a
-*test fixture* class.
-
-A *test program* can contain multiple test suites.
-
-We'll now explain how to write a test program, starting at the individual
-assertion level and building up to tests and test suites.
-
-## Assertions
-
-googletest assertions are macros that resemble function calls. You test a class
-or function by making assertions about its behavior. When an assertion fails,
-googletest prints the assertion's source file and line number location, along
-with a failure message. You may also supply a custom failure message which will
-be appended to googletest's message.
-
-The assertions come in pairs that test the same thing but have different effects
-on the current function. `ASSERT_*` versions generate fatal failures when they
-fail, and **abort the current function**. `EXPECT_*` versions generate nonfatal
-failures, which don't abort the current function. Usually `EXPECT_*` are
-preferred, as they allow more than one failure to be reported in a test.
-However, you should use `ASSERT_*` if it doesn't make sense to continue when the
-assertion in question fails.
-
-Since a failed `ASSERT_*` returns from the current function immediately,
-possibly skipping clean-up code that comes after it, it may cause a space leak.
-Depending on the nature of the leak, it may or may not be worth fixing - so keep
-this in mind if you get a heap checker error in addition to assertion errors.
-
-To provide a custom failure message, simply stream it into the macro using the
-`<<` operator or a sequence of such operators. See the following example, using
-the [`ASSERT_EQ` and `EXPECT_EQ`](reference/assertions.md#EXPECT_EQ) macros to
-verify value equality:
-
-```c++
-ASSERT_EQ(x.size(), y.size()) << "Vectors x and y are of unequal length";
-
-for (int i = 0; i < x.size(); ++i) {
-  EXPECT_EQ(x[i], y[i]) << "Vectors x and y differ at index " << i;
-}
-```
-
-Anything that can be streamed to an `ostream` can be streamed to an assertion
-macro--in particular, C strings and `string` objects. If a wide string
-(`wchar_t*`, `TCHAR*` in `UNICODE` mode on Windows, or `std::wstring`) is
-streamed to an assertion, it will be translated to UTF-8 when printed.
-
-GoogleTest provides a collection of assertions for verifying the behavior of
-your code in various ways. You can check Boolean conditions, compare values
-based on relational operators, verify string values, floating-point values, and
-much more. There are even assertions that enable you to verify more complex
-states by providing custom predicates. For the complete list of assertions
-provided by GoogleTest, see the [Assertions Reference](reference/assertions.md).
-
-## Simple Tests
-
-To create a test:
-
-1.  Use the `TEST()` macro to define and name a test function. These are
-    ordinary C++ functions that don't return a value.
-2.  In this function, along with any valid C++ statements you want to include,
-    use the various googletest assertions to check values.
-3.  The test's result is determined by the assertions; if any assertion in the
-    test fails (either fatally or non-fatally), or if the test crashes, the
-    entire test fails. Otherwise, it succeeds.
-
-```c++
-TEST(TestSuiteName, TestName) {
-  ... test body ...
-}
-```
-
-`TEST()` arguments go from general to specific. The *first* argument is the name
-of the test suite, and the *second* argument is the test's name within the test
-suite. Both names must be valid C++ identifiers, and they should not contain any
-underscores (`_`). A test's *full name* consists of its containing test suite
-and its individual name. Tests from different test suites can have the same
-individual name.
-
-For example, let's take a simple integer function:
-
-```c++
-int Factorial(int n);  // Returns the factorial of n
-```
-
-A test suite for this function might look like:
-
-```c++
-// Tests factorial of 0.
-TEST(FactorialTest, HandlesZeroInput) {
-  EXPECT_EQ(Factorial(0), 1);
-}
-
-// Tests factorial of positive numbers.
-TEST(FactorialTest, HandlesPositiveInput) {
-  EXPECT_EQ(Factorial(1), 1);
-  EXPECT_EQ(Factorial(2), 2);
-  EXPECT_EQ(Factorial(3), 6);
-  EXPECT_EQ(Factorial(8), 40320);
-}
-```
-
-googletest groups the test results by test suites, so logically related tests
-should be in the same test suite; in other words, the first argument to their
-`TEST()` should be the same. In the above example, we have two tests,
-`HandlesZeroInput` and `HandlesPositiveInput`, that belong to the same test
-suite `FactorialTest`.
-
-When naming your test suites and tests, you should follow the same convention as
-for
-[naming functions and classes](https://google.github.io/styleguide/cppguide.html#Function_Names).
-
-**Availability**: Linux, Windows, Mac.
-
-## Test Fixtures: Using the Same Data Configuration for Multiple Tests {#same-data-multiple-tests}
-
-If you find yourself writing two or more tests that operate on similar data, you
-can use a *test fixture*. This allows you to reuse the same configuration of
-objects for several different tests.
-
-To create a fixture:
-
-1.  Derive a class from `::testing::Test` . Start its body with `protected:`, as
-    we'll want to access fixture members from sub-classes.
-2.  Inside the class, declare any objects you plan to use.
-3.  If necessary, write a default constructor or `SetUp()` function to prepare
-    the objects for each test. A common mistake is to spell `SetUp()` as
-    **`Setup()`** with a small `u` - Use `override` in C++11 to make sure you
-    spelled it correctly.
-4.  If necessary, write a destructor or `TearDown()` function to release any
-    resources you allocated in `SetUp()` . To learn when you should use the
-    constructor/destructor and when you should use `SetUp()/TearDown()`, read
-    the [FAQ](faq.md#CtorVsSetUp).
-5.  If needed, define subroutines for your tests to share.
-
-When using a fixture, use `TEST_F()` instead of `TEST()` as it allows you to
-access objects and subroutines in the test fixture:
-
-```c++
-TEST_F(TestFixtureName, TestName) {
-  ... test body ...
-}
-```
-
-Like `TEST()`, the first argument is the test suite name, but for `TEST_F()`
-this must be the name of the test fixture class. You've probably guessed: `_F`
-is for fixture.
-
-Unfortunately, the C++ macro system does not allow us to create a single macro
-that can handle both types of tests. Using the wrong macro causes a compiler
-error.
-
-Also, you must first define a test fixture class before using it in a
-`TEST_F()`, or you'll get the compiler error "`virtual outside class
-declaration`".
-
-For each test defined with `TEST_F()`, googletest will create a *fresh* test
-fixture at runtime, immediately initialize it via `SetUp()`, run the test, clean
-up by calling `TearDown()`, and then delete the test fixture. Note that
-different tests in the same test suite have different test fixture objects, and
-googletest always deletes a test fixture before it creates the next one.
-googletest does **not** reuse the same test fixture for multiple tests. Any
-changes one test makes to the fixture do not affect other tests.
-
-As an example, let's write tests for a FIFO queue class named `Queue`, which has
-the following interface:
-
-```c++
-template <typename E>  // E is the element type.
-class Queue {
- public:
-  Queue();
-  void Enqueue(const E& element);
-  E* Dequeue();  // Returns NULL if the queue is empty.
-  size_t size() const;
-  ...
-};
-```
-
-First, define a fixture class. By convention, you should give it the name
-`FooTest` where `Foo` is the class being tested.
-
-```c++
-class QueueTest : public ::testing::Test {
- protected:
-  void SetUp() override {
-     // q0_ remains empty
-     q1_.Enqueue(1);
-     q2_.Enqueue(2);
-     q2_.Enqueue(3);
-  }
-
-  // void TearDown() override {}
-
-  Queue<int> q0_;
-  Queue<int> q1_;
-  Queue<int> q2_;
-};
-```
-
-In this case, `TearDown()` is not needed since we don't have to clean up after
-each test, other than what's already done by the destructor.
-
-Now we'll write tests using `TEST_F()` and this fixture.
-
-```c++
-TEST_F(QueueTest, IsEmptyInitially) {
-  EXPECT_EQ(q0_.size(), 0);
-}
-
-TEST_F(QueueTest, DequeueWorks) {
-  int* n = q0_.Dequeue();
-  EXPECT_EQ(n, nullptr);
-
-  n = q1_.Dequeue();
-  ASSERT_NE(n, nullptr);
-  EXPECT_EQ(*n, 1);
-  EXPECT_EQ(q1_.size(), 0);
-  delete n;
-
-  n = q2_.Dequeue();
-  ASSERT_NE(n, nullptr);
-  EXPECT_EQ(*n, 2);
-  EXPECT_EQ(q2_.size(), 1);
-  delete n;
-}
-```
-
-The above uses both `ASSERT_*` and `EXPECT_*` assertions. The rule of thumb is
-to use `EXPECT_*` when you want the test to continue to reveal more errors after
-the assertion failure, and use `ASSERT_*` when continuing after failure doesn't
-make sense. For example, the second assertion in the `Dequeue` test is
-`ASSERT_NE(n, nullptr)`, as we need to dereference the pointer `n` later, which
-would lead to a segfault when `n` is `NULL`.
-
-When these tests run, the following happens:
-
-1.  googletest constructs a `QueueTest` object (let's call it `t1`).
-2.  `t1.SetUp()` initializes `t1`.
-3.  The first test (`IsEmptyInitially`) runs on `t1`.
-4.  `t1.TearDown()` cleans up after the test finishes.
-5.  `t1` is destructed.
-6.  The above steps are repeated on another `QueueTest` object, this time
-    running the `DequeueWorks` test.
-
-**Availability**: Linux, Windows, Mac.
-
-## Invoking the Tests
-
-`TEST()` and `TEST_F()` implicitly register their tests with googletest. So,
-unlike with many other C++ testing frameworks, you don't have to re-list all
-your defined tests in order to run them.
-
-After defining your tests, you can run them with `RUN_ALL_TESTS()`, which
-returns `0` if all the tests are successful, or `1` otherwise. Note that
-`RUN_ALL_TESTS()` runs *all tests* in your link unit--they can be from different
-test suites, or even different source files.
-
-When invoked, the `RUN_ALL_TESTS()` macro:
-
-*   Saves the state of all googletest flags.
-
-*   Creates a test fixture object for the first test.
-
-*   Initializes it via `SetUp()`.
-
-*   Runs the test on the fixture object.
-
-*   Cleans up the fixture via `TearDown()`.
-
-*   Deletes the fixture.
-
-*   Restores the state of all googletest flags.
-
-*   Repeats the above steps for the next test, until all tests have run.
-
-If a fatal failure happens the subsequent steps will be skipped.
-
-{: .callout .important}
-> IMPORTANT: You must **not** ignore the return value of `RUN_ALL_TESTS()`, or
-> you will get a compiler error. The rationale for this design is that the
-> automated testing service determines whether a test has passed based on its
-> exit code, not on its stdout/stderr output; thus your `main()` function must
-> return the value of `RUN_ALL_TESTS()`.
->
-> Also, you should call `RUN_ALL_TESTS()` only **once**. Calling it more than
-> once conflicts with some advanced googletest features (e.g., thread-safe
-> [death tests](advanced.md#death-tests)) and thus is not supported.
-
-**Availability**: Linux, Windows, Mac.
-
-## Writing the main() Function
-
-Most users should _not_ need to write their own `main` function and instead link
-with `gtest_main` (as opposed to with `gtest`), which defines a suitable entry
-point. See the end of this section for details. The remainder of this section
-should only apply when you need to do something custom before the tests run that
-cannot be expressed within the framework of fixtures and test suites.
-
-If you write your own `main` function, it should return the value of
-`RUN_ALL_TESTS()`.
-
-You can start from this boilerplate:
-
-```c++
-#include "this/package/foo.h"
-
-#include "gtest/gtest.h"
-
-namespace my {
-namespace project {
-namespace {
-
-// The fixture for testing class Foo.
-class FooTest : public ::testing::Test {
- protected:
-  // You can remove any or all of the following functions if their bodies would
-  // be empty.
-
-  FooTest() {
-     // You can do set-up work for each test here.
-  }
-
-  ~FooTest() override {
-     // You can do clean-up work that doesn't throw exceptions here.
-  }
-
-  // If the constructor and destructor are not enough for setting up
-  // and cleaning up each test, you can define the following methods:
-
-  void SetUp() override {
-     // Code here will be called immediately after the constructor (right
-     // before each test).
-  }
-
-  void TearDown() override {
-     // Code here will be called immediately after each test (right
-     // before the destructor).
-  }
-
-  // Class members declared here can be used by all tests in the test suite
-  // for Foo.
-};
-
-// Tests that the Foo::Bar() method does Abc.
-TEST_F(FooTest, MethodBarDoesAbc) {
-  const std::string input_filepath = "this/package/testdata/myinputfile.dat";
-  const std::string output_filepath = "this/package/testdata/myoutputfile.dat";
-  Foo f;
-  EXPECT_EQ(f.Bar(input_filepath, output_filepath), 0);
-}
-
-// Tests that Foo does Xyz.
-TEST_F(FooTest, DoesXyz) {
-  // Exercises the Xyz feature of Foo.
-}
-
-}  // namespace
-}  // namespace project
-}  // namespace my
-
-int main(int argc, char **argv) {
-  ::testing::InitGoogleTest(&argc, argv);
-  return RUN_ALL_TESTS();
-}
-```
-
-The `::testing::InitGoogleTest()` function parses the command line for
-googletest flags, and removes all recognized flags. This allows the user to
-control a test program's behavior via various flags, which we'll cover in the
-[AdvancedGuide](advanced.md). You **must** call this function before calling
-`RUN_ALL_TESTS()`, or the flags won't be properly initialized.
-
-On Windows, `InitGoogleTest()` also works with wide strings, so it can be used
-in programs compiled in `UNICODE` mode as well.
-
-But maybe you think that writing all those `main` functions is too much work? We
-agree with you completely, and that's why Google Test provides a basic
-implementation of main(). If it fits your needs, then just link your test with
-the `gtest_main` library and you are good to go.
-
-{: .callout .note}
-NOTE: `ParseGUnitFlags()` is deprecated in favor of `InitGoogleTest()`.
-
-## Known Limitations
-
-*   Google Test is designed to be thread-safe. The implementation is thread-safe
-    on systems where the `pthreads` library is available. It is currently
-    _unsafe_ to use Google Test assertions from two threads concurrently on
-    other systems (e.g. Windows). In most tests this is not an issue as usually
-    the assertions are done in the main thread. If you want to help, you can
-    volunteer to implement the necessary synchronization primitives in
-    `gtest-port.h` for your platform.
diff --git a/3rdparty/googletest-1.13.0/docs/quickstart-bazel.md b/3rdparty/googletest-1.13.0/docs/quickstart-bazel.md
deleted file mode 100644
index 15c27a22ed9c63eeb234e35db8f02bb63ba8c9b8..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/docs/quickstart-bazel.md
+++ /dev/null
@@ -1,146 +0,0 @@
-# Quickstart: Building with Bazel
-
-This tutorial aims to get you up and running with GoogleTest using the Bazel
-build system. If you're using GoogleTest for the first time or need a refresher,
-we recommend this tutorial as a starting point.
-
-## Prerequisites
-
-To complete this tutorial, you'll need:
-
-*   A compatible operating system (e.g. Linux, macOS, Windows).
-*   A compatible C++ compiler that supports at least C++14.
-*   [Bazel](https://bazel.build/), the preferred build system used by the
-    GoogleTest team.
-
-See [Supported Platforms](platforms.md) for more information about platforms
-compatible with GoogleTest.
-
-If you don't already have Bazel installed, see the
-[Bazel installation guide](https://bazel.build/install).
-
-{: .callout .note} Note: The terminal commands in this tutorial show a Unix
-shell prompt, but the commands work on the Windows command line as well.
-
-## Set up a Bazel workspace
-
-A
-[Bazel workspace](https://docs.bazel.build/versions/main/build-ref.html#workspace)
-is a directory on your filesystem that you use to manage source files for the
-software you want to build. Each workspace directory has a text file named
-`WORKSPACE` which may be empty, or may contain references to external
-dependencies required to build the outputs.
-
-First, create a directory for your workspace:
-
-```
-$ mkdir my_workspace && cd my_workspace
-```
-
-Next, you’ll create the `WORKSPACE` file to specify dependencies. A common and
-recommended way to depend on GoogleTest is to use a
-[Bazel external dependency](https://docs.bazel.build/versions/main/external.html)
-via the
-[`http_archive` rule](https://docs.bazel.build/versions/main/repo/http.html#http_archive).
-To do this, in the root directory of your workspace (`my_workspace/`), create a
-file named `WORKSPACE` with the following contents:
-
-```
-load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
-
-http_archive(
-  name = "com_google_googletest",
-  urls = ["https://github.com/google/googletest/archive/5ab508a01f9eb089207ee87fd547d290da39d015.zip"],
-  strip_prefix = "googletest-5ab508a01f9eb089207ee87fd547d290da39d015",
-)
-```
-
-The above configuration declares a dependency on GoogleTest which is downloaded
-as a ZIP archive from GitHub. In the above example,
-`5ab508a01f9eb089207ee87fd547d290da39d015` is the Git commit hash of the
-GoogleTest version to use; we recommend updating the hash often to point to the
-latest version. Use a recent hash on the `main` branch.
-
-Now you're ready to build C++ code that uses GoogleTest.
-
-## Create and run a binary
-
-With your Bazel workspace set up, you can now use GoogleTest code within your
-own project.
-
-As an example, create a file named `hello_test.cc` in your `my_workspace`
-directory with the following contents:
-
-```cpp
-#include <gtest/gtest.h>
-
-// Demonstrate some basic assertions.
-TEST(HelloTest, BasicAssertions) {
-  // Expect two strings not to be equal.
-  EXPECT_STRNE("hello", "world");
-  // Expect equality.
-  EXPECT_EQ(7 * 6, 42);
-}
-```
-
-GoogleTest provides [assertions](primer.md#assertions) that you use to test the
-behavior of your code. The above sample includes the main GoogleTest header file
-and demonstrates some basic assertions.
-
-To build the code, create a file named `BUILD` in the same directory with the
-following contents:
-
-```
-cc_test(
-  name = "hello_test",
-  size = "small",
-  srcs = ["hello_test.cc"],
-  deps = ["@com_google_googletest//:gtest_main"],
-)
-```
-
-This `cc_test` rule declares the C++ test binary you want to build, and links to
-GoogleTest (`//:gtest_main`) using the prefix you specified in the `WORKSPACE`
-file (`@com_google_googletest`). For more information about Bazel `BUILD` files,
-see the
-[Bazel C++ Tutorial](https://docs.bazel.build/versions/main/tutorial/cpp.html).
-
-Now you can build and run your test:
-
-<pre>
-<strong>my_workspace$ bazel test --test_output=all //:hello_test</strong>
-INFO: Analyzed target //:hello_test (26 packages loaded, 362 targets configured).
-INFO: Found 1 test target...
-INFO: From Testing //:hello_test:
-==================== Test output for //:hello_test:
-Running main() from gmock_main.cc
-[==========] Running 1 test from 1 test suite.
-[----------] Global test environment set-up.
-[----------] 1 test from HelloTest
-[ RUN      ] HelloTest.BasicAssertions
-[       OK ] HelloTest.BasicAssertions (0 ms)
-[----------] 1 test from HelloTest (0 ms total)
-
-[----------] Global test environment tear-down
-[==========] 1 test from 1 test suite ran. (0 ms total)
-[  PASSED  ] 1 test.
-================================================================================
-Target //:hello_test up-to-date:
-  bazel-bin/hello_test
-INFO: Elapsed time: 4.190s, Critical Path: 3.05s
-INFO: 27 processes: 8 internal, 19 linux-sandbox.
-INFO: Build completed successfully, 27 total actions
-//:hello_test                                                     PASSED in 0.1s
-
-INFO: Build completed successfully, 27 total actions
-</pre>
-
-Congratulations! You've successfully built and run a test binary using
-GoogleTest.
-
-## Next steps
-
-*   [Check out the Primer](primer.md) to start learning how to write simple
-    tests.
-*   [See the code samples](samples.md) for more examples showing how to use a
-    variety of GoogleTest features.
diff --git a/3rdparty/googletest-1.13.0/docs/quickstart-cmake.md b/3rdparty/googletest-1.13.0/docs/quickstart-cmake.md
deleted file mode 100644
index 5abe50441294bd3183c3d1d9f1934f7fea03f88f..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/docs/quickstart-cmake.md
+++ /dev/null
@@ -1,156 +0,0 @@
-# Quickstart: Building with CMake
-
-This tutorial aims to get you up and running with GoogleTest using CMake. If
-you're using GoogleTest for the first time or need a refresher, we recommend
-this tutorial as a starting point. If your project uses Bazel, see the
-[Quickstart for Bazel](quickstart-bazel.md) instead.
-
-## Prerequisites
-
-To complete this tutorial, you'll need:
-
-*   A compatible operating system (e.g. Linux, macOS, Windows).
-*   A compatible C++ compiler that supports at least C++14.
-*   [CMake](https://cmake.org/) and a compatible build tool for building the
-    project.
-    *   Compatible build tools include
-        [Make](https://www.gnu.org/software/make/),
-        [Ninja](https://ninja-build.org/), and others - see
-        [CMake Generators](https://cmake.org/cmake/help/latest/manual/cmake-generators.7.html)
-        for more information.
-
-See [Supported Platforms](platforms.md) for more information about platforms
-compatible with GoogleTest.
-
-If you don't already have CMake installed, see the
-[CMake installation guide](https://cmake.org/install).
-
-{: .callout .note}
-Note: The terminal commands in this tutorial show a Unix shell prompt, but the
-commands work on the Windows command line as well.
-
-## Set up a project
-
-CMake uses a file named `CMakeLists.txt` to configure the build system for a
-project. You'll use this file to set up your project and declare a dependency on
-GoogleTest.
-
-First, create a directory for your project:
-
-```
-$ mkdir my_project && cd my_project
-```
-
-Next, you'll create the `CMakeLists.txt` file and declare a dependency on
-GoogleTest. There are many ways to express dependencies in the CMake ecosystem;
-in this quickstart, you'll use the
-[`FetchContent` CMake module](https://cmake.org/cmake/help/latest/module/FetchContent.html).
-To do this, in your project directory (`my_project`), create a file named
-`CMakeLists.txt` with the following contents:
-
-```cmake
-cmake_minimum_required(VERSION 3.14)
-project(my_project)
-
-# GoogleTest requires at least C++14
-set(CMAKE_CXX_STANDARD 14)
-
-include(FetchContent)
-FetchContent_Declare(
-  googletest
-  URL https://github.com/google/googletest/archive/03597a01ee50ed33e9dfd640b249b4be3799d395.zip
-)
-# For Windows: Prevent overriding the parent project's compiler/linker settings
-set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
-FetchContent_MakeAvailable(googletest)
-```
-
-The above configuration declares a dependency on GoogleTest which is downloaded
-from GitHub. In the above example, `03597a01ee50ed33e9dfd640b249b4be3799d395` is
-the Git commit hash of the GoogleTest version to use; we recommend updating the
-hash often to point to the latest version.
-
-For more information about how to create `CMakeLists.txt` files, see the
-[CMake Tutorial](https://cmake.org/cmake/help/latest/guide/tutorial/index.html).
-
-## Create and run a binary
-
-With GoogleTest declared as a dependency, you can use GoogleTest code within
-your own project.
-
-As an example, create a file named `hello_test.cc` in your `my_project`
-directory with the following contents:
-
-```cpp
-#include <gtest/gtest.h>
-
-// Demonstrate some basic assertions.
-TEST(HelloTest, BasicAssertions) {
-  // Expect two strings not to be equal.
-  EXPECT_STRNE("hello", "world");
-  // Expect equality.
-  EXPECT_EQ(7 * 6, 42);
-}
-```
-
-GoogleTest provides [assertions](primer.md#assertions) that you use to test the
-behavior of your code. The above sample includes the main GoogleTest header file
-and demonstrates some basic assertions.
-
-To build the code, add the following to the end of your `CMakeLists.txt` file:
-
-```cmake
-enable_testing()
-
-add_executable(
-  hello_test
-  hello_test.cc
-)
-target_link_libraries(
-  hello_test
-  GTest::gtest_main
-)
-
-include(GoogleTest)
-gtest_discover_tests(hello_test)
-```
-
-The above configuration enables testing in CMake, declares the C++ test binary
-you want to build (`hello_test`), and links it to GoogleTest (`gtest_main`). The
-last two lines enable CMake's test runner to discover the tests included in the
-binary, using the
-[`GoogleTest` CMake module](https://cmake.org/cmake/help/git-stage/module/GoogleTest.html).
-
-Now you can build and run your test:
-
-<pre>
-<strong>my_project$ cmake -S . -B build</strong>
--- The C compiler identification is GNU 10.2.1
--- The CXX compiler identification is GNU 10.2.1
-...
--- Build files have been written to: .../my_project/build
-
-<strong>my_project$ cmake --build build</strong>
-Scanning dependencies of target gtest
-...
-[100%] Built target gmock_main
-
-<strong>my_project$ cd build && ctest</strong>
-Test project .../my_project/build
-    Start 1: HelloTest.BasicAssertions
-1/1 Test #1: HelloTest.BasicAssertions ........   Passed    0.00 sec
-
-100% tests passed, 0 tests failed out of 1
-
-Total Test time (real) =   0.01 sec
-</pre>
-
-Congratulations! You've successfully built and run a test binary using
-GoogleTest.
-
-## Next steps
-
-*   [Check out the Primer](primer.md) to start learning how to write simple
-    tests.
-*   [See the code samples](samples.md) for more examples showing how to use a
-    variety of GoogleTest features.
diff --git a/3rdparty/googletest-1.13.0/docs/reference/actions.md b/3rdparty/googletest-1.13.0/docs/reference/actions.md
deleted file mode 100644
index ab81a129eff692d513b27c155abed96dd30f8db6..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/docs/reference/actions.md
+++ /dev/null
@@ -1,115 +0,0 @@
-# Actions Reference
-
-[**Actions**](../gmock_for_dummies.md#actions-what-should-it-do) specify what a
-mock function should do when invoked. This page lists the built-in actions
-provided by GoogleTest. All actions are defined in the `::testing` namespace.
-
-## Returning a Value
-
-| Action                            | Description                                   |
-| :-------------------------------- | :-------------------------------------------- |
-| `Return()`                        | Return from a `void` mock function.           |
-| `Return(value)`                   | Return `value`. If the type of `value` is     different to the mock function's return type, `value` is converted to the latter type <i>at the time the expectation is set</i>, not when the action is executed. |
-| `ReturnArg<N>()`                  | Return the `N`-th (0-based) argument.         |
-| `ReturnNew<T>(a1, ..., ak)`       | Return `new T(a1, ..., ak)`; a different      object is created each time. |
-| `ReturnNull()`                    | Return a null pointer.                        |
-| `ReturnPointee(ptr)`              | Return the value pointed to by `ptr`.         |
-| `ReturnRef(variable)`             | Return a reference to `variable`.             |
-| `ReturnRefOfCopy(value)`          | Return a reference to a copy of `value`; the  copy lives as long as the action. |
-| `ReturnRoundRobin({a1, ..., ak})` | Each call will return the next `ai` in the list, starting at the beginning when the end of the list is reached. |
-
-## Side Effects
-
-| Action                             | Description                             |
-| :--------------------------------- | :-------------------------------------- |
-| `Assign(&variable, value)` | Assign `value` to variable. |
-| `DeleteArg<N>()` | Delete the `N`-th (0-based) argument, which must be a pointer. |
-| `SaveArg<N>(pointer)` | Save the `N`-th (0-based) argument to `*pointer`. |
-| `SaveArgPointee<N>(pointer)` | Save the value pointed to by the `N`-th (0-based) argument to `*pointer`. |
-| `SetArgReferee<N>(value)` | Assign `value` to the variable referenced by the `N`-th (0-based) argument. |
-| `SetArgPointee<N>(value)` | Assign `value` to the variable pointed by the `N`-th (0-based) argument. |
-| `SetArgumentPointee<N>(value)` | Same as `SetArgPointee<N>(value)`. Deprecated. Will be removed in v1.7.0. |
-| `SetArrayArgument<N>(first, last)` | Copies the elements in source range [`first`, `last`) to the array pointed to by the `N`-th (0-based) argument, which can be either a pointer or an iterator. The action does not take ownership of the elements in the source range. |
-| `SetErrnoAndReturn(error, value)` | Set `errno` to `error` and return `value`. |
-| `Throw(exception)` | Throws the given exception, which can be any copyable value. Available since v1.1.0. |
-
-## Using a Function, Functor, or Lambda as an Action
-
-In the following, by "callable" we mean a free function, `std::function`,
-functor, or lambda.
-
-| Action                              | Description                            |
-| :---------------------------------- | :------------------------------------- |
-| `f` | Invoke `f` with the arguments passed to the mock function, where `f` is a callable. |
-| `Invoke(f)` | Invoke `f` with the arguments passed to the mock function, where `f` can be a global/static function or a functor. |
-| `Invoke(object_pointer, &class::method)` | Invoke the method on the object with the arguments passed to the mock function. |
-| `InvokeWithoutArgs(f)` | Invoke `f`, which can be a global/static function or a functor. `f` must take no arguments. |
-| `InvokeWithoutArgs(object_pointer, &class::method)` | Invoke the method on the object, which takes no arguments. |
-| `InvokeArgument<N>(arg1, arg2, ..., argk)` | Invoke the mock function's `N`-th (0-based) argument, which must be a function or a functor, with the `k` arguments. |
-
-The return value of the invoked function is used as the return value of the
-action.
-
-When defining a callable to be used with `Invoke*()`, you can declare any unused
-parameters as `Unused`:
-
-```cpp
-using ::testing::Invoke;
-double Distance(Unused, double x, double y) { return sqrt(x*x + y*y); }
-...
-EXPECT_CALL(mock, Foo("Hi", _, _)).WillOnce(Invoke(Distance));
-```
-
-`Invoke(callback)` and `InvokeWithoutArgs(callback)` take ownership of
-`callback`, which must be permanent. The type of `callback` must be a base
-callback type instead of a derived one, e.g.
-
-```cpp
-  BlockingClosure* done = new BlockingClosure;
-  ... Invoke(done) ...;  // This won't compile!
-
-  Closure* done2 = new BlockingClosure;
-  ... Invoke(done2) ...;  // This works.
-```
-
-In `InvokeArgument<N>(...)`, if an argument needs to be passed by reference,
-wrap it inside `std::ref()`. For example,
-
-```cpp
-using ::testing::InvokeArgument;
-...
-InvokeArgument<2>(5, string("Hi"), std::ref(foo))
-```
-
-calls the mock function's #2 argument, passing to it `5` and `string("Hi")` by
-value, and `foo` by reference.
-
-## Default Action
-
-| Action        | Description                                            |
-| :------------ | :----------------------------------------------------- |
-| `DoDefault()` | Do the default action (specified by `ON_CALL()` or the built-in one). |
-
-{: .callout .note}
-**Note:** due to technical reasons, `DoDefault()` cannot be used inside a
-composite action - trying to do so will result in a run-time error.
-
-## Composite Actions
-
-| Action                         | Description                                 |
-| :----------------------------- | :------------------------------------------ |
-| `DoAll(a1, a2, ..., an)`       | Do all actions `a1` to `an` and return the result of `an` in each invocation. The first `n - 1` sub-actions must return void and will receive a  readonly view of the arguments. |
-| `IgnoreResult(a)`              | Perform action `a` and ignore its result. `a` must not return void. |
-| `WithArg<N>(a)`                | Pass the `N`-th (0-based) argument of the mock function to action `a` and perform it. |
-| `WithArgs<N1, N2, ..., Nk>(a)` | Pass the selected (0-based) arguments of the mock function to action `a` and perform it. |
-| `WithoutArgs(a)`               | Perform action `a` without any arguments. |
-
-## Defining Actions
-
-| Macro                              | Description                             |
-| :--------------------------------- | :-------------------------------------- |
-| `ACTION(Sum) { return arg0 + arg1; }` | Defines an action `Sum()` to return the sum of the mock function's argument #0 and #1. |
-| `ACTION_P(Plus, n) { return arg0 + n; }` | Defines an action `Plus(n)` to return the sum of the mock function's argument #0 and `n`. |
-| `ACTION_Pk(Foo, p1, ..., pk) { statements; }` | Defines a parameterized action `Foo(p1, ..., pk)` to execute the given `statements`. |
-
-The `ACTION*` macros cannot be used inside a function or class.
diff --git a/3rdparty/googletest-1.13.0/docs/reference/assertions.md b/3rdparty/googletest-1.13.0/docs/reference/assertions.md
deleted file mode 100644
index 7bf03a3dde17857dfe7b508f14daa60d73bdac19..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/docs/reference/assertions.md
+++ /dev/null
@@ -1,633 +0,0 @@
-# Assertions Reference
-
-This page lists the assertion macros provided by GoogleTest for verifying code
-behavior. To use them, include the header `gtest/gtest.h`.
-
-The majority of the macros listed below come as a pair with an `EXPECT_` variant
-and an `ASSERT_` variant. Upon failure, `EXPECT_` macros generate nonfatal
-failures and allow the current function to continue running, while `ASSERT_`
-macros generate fatal failures and abort the current function.
-
-All assertion macros support streaming a custom failure message into them with
-the `<<` operator, for example:
-
-```cpp
-EXPECT_TRUE(my_condition) << "My condition is not true";
-```
-
-Anything that can be streamed to an `ostream` can be streamed to an assertion
-macro—in particular, C strings and string objects. If a wide string (`wchar_t*`,
-`TCHAR*` in `UNICODE` mode on Windows, or `std::wstring`) is streamed to an
-assertion, it will be translated to UTF-8 when printed.
-
-## Explicit Success and Failure {#success-failure}
-
-The assertions in this section generate a success or failure directly instead of
-testing a value or expression. These are useful when control flow, rather than a
-Boolean expression, determines the test's success or failure, as shown by the
-following example:
-
-```c++
-switch(expression) {
-  case 1:
-    ... some checks ...
-  case 2:
-    ... some other checks ...
-  default:
-    FAIL() << "We shouldn't get here.";
-}
-```
-
-### SUCCEED {#SUCCEED}
-
-`SUCCEED()`
-
-Generates a success. This *does not* make the overall test succeed. A test is
-considered successful only if none of its assertions fail during its execution.
-
-The `SUCCEED` assertion is purely documentary and currently doesn't generate any
-user-visible output. However, we may add `SUCCEED` messages to GoogleTest output
-in the future.
-
-### FAIL {#FAIL}
-
-`FAIL()`
-
-Generates a fatal failure, which returns from the current function.
-
-Can only be used in functions that return `void`. See
-[Assertion Placement](../advanced.md#assertion-placement) for more information.
-
-### ADD_FAILURE {#ADD_FAILURE}
-
-`ADD_FAILURE()`
-
-Generates a nonfatal failure, which allows the current function to continue
-running.
-
-### ADD_FAILURE_AT {#ADD_FAILURE_AT}
-
-`ADD_FAILURE_AT(`*`file_path`*`,`*`line_number`*`)`
-
-Generates a nonfatal failure at the file and line number specified.
-
-## Generalized Assertion {#generalized}
-
-The following assertion allows [matchers](matchers.md) to be used to verify
-values.
-
-### EXPECT_THAT {#EXPECT_THAT}
-
-`EXPECT_THAT(`*`value`*`,`*`matcher`*`)` \
-`ASSERT_THAT(`*`value`*`,`*`matcher`*`)`
-
-Verifies that *`value`* matches the [matcher](matchers.md) *`matcher`*.
-
-For example, the following code verifies that the string `value1` starts with
-`"Hello"`, `value2` matches a regular expression, and `value3` is between 5 and
-10:
-
-```cpp
-#include "gmock/gmock.h"
-
-using ::testing::AllOf;
-using ::testing::Gt;
-using ::testing::Lt;
-using ::testing::MatchesRegex;
-using ::testing::StartsWith;
-
-...
-EXPECT_THAT(value1, StartsWith("Hello"));
-EXPECT_THAT(value2, MatchesRegex("Line \\d+"));
-ASSERT_THAT(value3, AllOf(Gt(5), Lt(10)));
-```
-
-Matchers enable assertions of this form to read like English and generate
-informative failure messages. For example, if the above assertion on `value1`
-fails, the resulting message will be similar to the following:
-
-```
-Value of: value1
-  Actual: "Hi, world!"
-Expected: starts with "Hello"
-```
-
-GoogleTest provides a built-in library of matchers—see the
-[Matchers Reference](matchers.md). It is also possible to write your own
-matchers—see [Writing New Matchers Quickly](../gmock_cook_book.md#NewMatchers).
-The use of matchers makes `EXPECT_THAT` a powerful, extensible assertion.
-
-*The idea for this assertion was borrowed from Joe Walnes' Hamcrest project,
-which adds `assertThat()` to JUnit.*
-
-## Boolean Conditions {#boolean}
-
-The following assertions test Boolean conditions.
-
-### EXPECT_TRUE {#EXPECT_TRUE}
-
-`EXPECT_TRUE(`*`condition`*`)` \
-`ASSERT_TRUE(`*`condition`*`)`
-
-Verifies that *`condition`* is true.
-
-### EXPECT_FALSE {#EXPECT_FALSE}
-
-`EXPECT_FALSE(`*`condition`*`)` \
-`ASSERT_FALSE(`*`condition`*`)`
-
-Verifies that *`condition`* is false.
-
-## Binary Comparison {#binary-comparison}
-
-The following assertions compare two values. The value arguments must be
-comparable by the assertion's comparison operator, otherwise a compiler error
-will result.
-
-If an argument supports the `<<` operator, it will be called to print the
-argument when the assertion fails. Otherwise, GoogleTest will attempt to print
-them in the best way it can—see
-[Teaching GoogleTest How to Print Your Values](../advanced.md#teaching-googletest-how-to-print-your-values).
-
-Arguments are always evaluated exactly once, so it's OK for the arguments to
-have side effects. However, the argument evaluation order is undefined and
-programs should not depend on any particular argument evaluation order.
-
-These assertions work with both narrow and wide string objects (`string` and
-`wstring`).
-
-See also the [Floating-Point Comparison](#floating-point) assertions to compare
-floating-point numbers and avoid problems caused by rounding.
-
-### EXPECT_EQ {#EXPECT_EQ}
-
-`EXPECT_EQ(`*`val1`*`,`*`val2`*`)` \
-`ASSERT_EQ(`*`val1`*`,`*`val2`*`)`
-
-Verifies that *`val1`*`==`*`val2`*.
-
-Does pointer equality on pointers. If used on two C strings, it tests if they
-are in the same memory location, not if they have the same value. Use
-[`EXPECT_STREQ`](#EXPECT_STREQ) to compare C strings (e.g. `const char*`) by
-value.
-
-When comparing a pointer to `NULL`, use `EXPECT_EQ(`*`ptr`*`, nullptr)` instead
-of `EXPECT_EQ(`*`ptr`*`, NULL)`.
-
-### EXPECT_NE {#EXPECT_NE}
-
-`EXPECT_NE(`*`val1`*`,`*`val2`*`)` \
-`ASSERT_NE(`*`val1`*`,`*`val2`*`)`
-
-Verifies that *`val1`*`!=`*`val2`*.
-
-Does pointer equality on pointers. If used on two C strings, it tests if they
-are in different memory locations, not if they have different values. Use
-[`EXPECT_STRNE`](#EXPECT_STRNE) to compare C strings (e.g. `const char*`) by
-value.
-
-When comparing a pointer to `NULL`, use `EXPECT_NE(`*`ptr`*`, nullptr)` instead
-of `EXPECT_NE(`*`ptr`*`, NULL)`.
-
-### EXPECT_LT {#EXPECT_LT}
-
-`EXPECT_LT(`*`val1`*`,`*`val2`*`)` \
-`ASSERT_LT(`*`val1`*`,`*`val2`*`)`
-
-Verifies that *`val1`*`<`*`val2`*.
-
-### EXPECT_LE {#EXPECT_LE}
-
-`EXPECT_LE(`*`val1`*`,`*`val2`*`)` \
-`ASSERT_LE(`*`val1`*`,`*`val2`*`)`
-
-Verifies that *`val1`*`<=`*`val2`*.
-
-### EXPECT_GT {#EXPECT_GT}
-
-`EXPECT_GT(`*`val1`*`,`*`val2`*`)` \
-`ASSERT_GT(`*`val1`*`,`*`val2`*`)`
-
-Verifies that *`val1`*`>`*`val2`*.
-
-### EXPECT_GE {#EXPECT_GE}
-
-`EXPECT_GE(`*`val1`*`,`*`val2`*`)` \
-`ASSERT_GE(`*`val1`*`,`*`val2`*`)`
-
-Verifies that *`val1`*`>=`*`val2`*.
-
-## String Comparison {#c-strings}
-
-The following assertions compare two **C strings**. To compare two `string`
-objects, use [`EXPECT_EQ`](#EXPECT_EQ) or [`EXPECT_NE`](#EXPECT_NE) instead.
-
-These assertions also accept wide C strings (`wchar_t*`). If a comparison of two
-wide strings fails, their values will be printed as UTF-8 narrow strings.
-
-To compare a C string with `NULL`, use `EXPECT_EQ(`*`c_string`*`, nullptr)` or
-`EXPECT_NE(`*`c_string`*`, nullptr)`.
-
-### EXPECT_STREQ {#EXPECT_STREQ}
-
-`EXPECT_STREQ(`*`str1`*`,`*`str2`*`)` \
-`ASSERT_STREQ(`*`str1`*`,`*`str2`*`)`
-
-Verifies that the two C strings *`str1`* and *`str2`* have the same contents.
-
-### EXPECT_STRNE {#EXPECT_STRNE}
-
-`EXPECT_STRNE(`*`str1`*`,`*`str2`*`)` \
-`ASSERT_STRNE(`*`str1`*`,`*`str2`*`)`
-
-Verifies that the two C strings *`str1`* and *`str2`* have different contents.
-
-### EXPECT_STRCASEEQ {#EXPECT_STRCASEEQ}
-
-`EXPECT_STRCASEEQ(`*`str1`*`,`*`str2`*`)` \
-`ASSERT_STRCASEEQ(`*`str1`*`,`*`str2`*`)`
-
-Verifies that the two C strings *`str1`* and *`str2`* have the same contents,
-ignoring case.
-
-### EXPECT_STRCASENE {#EXPECT_STRCASENE}
-
-`EXPECT_STRCASENE(`*`str1`*`,`*`str2`*`)` \
-`ASSERT_STRCASENE(`*`str1`*`,`*`str2`*`)`
-
-Verifies that the two C strings *`str1`* and *`str2`* have different contents,
-ignoring case.
-
-## Floating-Point Comparison {#floating-point}
-
-The following assertions compare two floating-point values.
-
-Due to rounding errors, it is very unlikely that two floating-point values will
-match exactly, so `EXPECT_EQ` is not suitable. In general, for floating-point
-comparison to make sense, the user needs to carefully choose the error bound.
-
-GoogleTest also provides assertions that use a default error bound based on
-Units in the Last Place (ULPs). To learn more about ULPs, see the article
-[Comparing Floating Point Numbers](https://randomascii.wordpress.com/2012/02/25/comparing-floating-point-numbers-2012-edition/).
-
-### EXPECT_FLOAT_EQ {#EXPECT_FLOAT_EQ}
-
-`EXPECT_FLOAT_EQ(`*`val1`*`,`*`val2`*`)` \
-`ASSERT_FLOAT_EQ(`*`val1`*`,`*`val2`*`)`
-
-Verifies that the two `float` values *`val1`* and *`val2`* are approximately
-equal, to within 4 ULPs from each other.
-
-### EXPECT_DOUBLE_EQ {#EXPECT_DOUBLE_EQ}
-
-`EXPECT_DOUBLE_EQ(`*`val1`*`,`*`val2`*`)` \
-`ASSERT_DOUBLE_EQ(`*`val1`*`,`*`val2`*`)`
-
-Verifies that the two `double` values *`val1`* and *`val2`* are approximately
-equal, to within 4 ULPs from each other.
-
-### EXPECT_NEAR {#EXPECT_NEAR}
-
-`EXPECT_NEAR(`*`val1`*`,`*`val2`*`,`*`abs_error`*`)` \
-`ASSERT_NEAR(`*`val1`*`,`*`val2`*`,`*`abs_error`*`)`
-
-Verifies that the difference between *`val1`* and *`val2`* does not exceed the
-absolute error bound *`abs_error`*.
-
-## Exception Assertions {#exceptions}
-
-The following assertions verify that a piece of code throws, or does not throw,
-an exception. Usage requires exceptions to be enabled in the build environment.
-
-Note that the piece of code under test can be a compound statement, for example:
-
-```cpp
-EXPECT_NO_THROW({
-  int n = 5;
-  DoSomething(&n);
-});
-```
-
-### EXPECT_THROW {#EXPECT_THROW}
-
-`EXPECT_THROW(`*`statement`*`,`*`exception_type`*`)` \
-`ASSERT_THROW(`*`statement`*`,`*`exception_type`*`)`
-
-Verifies that *`statement`* throws an exception of type *`exception_type`*.
-
-### EXPECT_ANY_THROW {#EXPECT_ANY_THROW}
-
-`EXPECT_ANY_THROW(`*`statement`*`)` \
-`ASSERT_ANY_THROW(`*`statement`*`)`
-
-Verifies that *`statement`* throws an exception of any type.
-
-### EXPECT_NO_THROW {#EXPECT_NO_THROW}
-
-`EXPECT_NO_THROW(`*`statement`*`)` \
-`ASSERT_NO_THROW(`*`statement`*`)`
-
-Verifies that *`statement`* does not throw any exception.
-
-## Predicate Assertions {#predicates}
-
-The following assertions enable more complex predicates to be verified while
-printing a more clear failure message than if `EXPECT_TRUE` were used alone.
-
-### EXPECT_PRED* {#EXPECT_PRED}
-
-`EXPECT_PRED1(`*`pred`*`,`*`val1`*`)` \
-`EXPECT_PRED2(`*`pred`*`,`*`val1`*`,`*`val2`*`)` \
-`EXPECT_PRED3(`*`pred`*`,`*`val1`*`,`*`val2`*`,`*`val3`*`)` \
-`EXPECT_PRED4(`*`pred`*`,`*`val1`*`,`*`val2`*`,`*`val3`*`,`*`val4`*`)` \
-`EXPECT_PRED5(`*`pred`*`,`*`val1`*`,`*`val2`*`,`*`val3`*`,`*`val4`*`,`*`val5`*`)`
-
-`ASSERT_PRED1(`*`pred`*`,`*`val1`*`)` \
-`ASSERT_PRED2(`*`pred`*`,`*`val1`*`,`*`val2`*`)` \
-`ASSERT_PRED3(`*`pred`*`,`*`val1`*`,`*`val2`*`,`*`val3`*`)` \
-`ASSERT_PRED4(`*`pred`*`,`*`val1`*`,`*`val2`*`,`*`val3`*`,`*`val4`*`)` \
-`ASSERT_PRED5(`*`pred`*`,`*`val1`*`,`*`val2`*`,`*`val3`*`,`*`val4`*`,`*`val5`*`)`
-
-Verifies that the predicate *`pred`* returns `true` when passed the given values
-as arguments.
-
-The parameter *`pred`* is a function or functor that accepts as many arguments
-as the corresponding macro accepts values. If *`pred`* returns `true` for the
-given arguments, the assertion succeeds, otherwise the assertion fails.
-
-When the assertion fails, it prints the value of each argument. Arguments are
-always evaluated exactly once.
-
-As an example, see the following code:
-
-```cpp
-// Returns true if m and n have no common divisors except 1.
-bool MutuallyPrime(int m, int n) { ... }
-...
-const int a = 3;
-const int b = 4;
-const int c = 10;
-...
-EXPECT_PRED2(MutuallyPrime, a, b);  // Succeeds
-EXPECT_PRED2(MutuallyPrime, b, c);  // Fails
-```
-
-In the above example, the first assertion succeeds, and the second fails with
-the following message:
-
-```
-MutuallyPrime(b, c) is false, where
-b is 4
-c is 10
-```
-
-Note that if the given predicate is an overloaded function or a function
-template, the assertion macro might not be able to determine which version to
-use, and it might be necessary to explicitly specify the type of the function.
-For example, for a Boolean function `IsPositive()` overloaded to take either a
-single `int` or `double` argument, it would be necessary to write one of the
-following:
-
-```cpp
-EXPECT_PRED1(static_cast<bool (*)(int)>(IsPositive), 5);
-EXPECT_PRED1(static_cast<bool (*)(double)>(IsPositive), 3.14);
-```
-
-Writing simply `EXPECT_PRED1(IsPositive, 5);` would result in a compiler error.
-Similarly, to use a template function, specify the template arguments:
-
-```cpp
-template <typename T>
-bool IsNegative(T x) {
-  return x < 0;
-}
-...
-EXPECT_PRED1(IsNegative<int>, -5);  // Must specify type for IsNegative
-```
-
-If a template has multiple parameters, wrap the predicate in parentheses so the
-macro arguments are parsed correctly:
-
-```cpp
-ASSERT_PRED2((MyPredicate<int, int>), 5, 0);
-```
-
-### EXPECT_PRED_FORMAT* {#EXPECT_PRED_FORMAT}
-
-`EXPECT_PRED_FORMAT1(`*`pred_formatter`*`,`*`val1`*`)` \
-`EXPECT_PRED_FORMAT2(`*`pred_formatter`*`,`*`val1`*`,`*`val2`*`)` \
-`EXPECT_PRED_FORMAT3(`*`pred_formatter`*`,`*`val1`*`,`*`val2`*`,`*`val3`*`)` \
-`EXPECT_PRED_FORMAT4(`*`pred_formatter`*`,`*`val1`*`,`*`val2`*`,`*`val3`*`,`*`val4`*`)`
-\
-`EXPECT_PRED_FORMAT5(`*`pred_formatter`*`,`*`val1`*`,`*`val2`*`,`*`val3`*`,`*`val4`*`,`*`val5`*`)`
-
-`ASSERT_PRED_FORMAT1(`*`pred_formatter`*`,`*`val1`*`)` \
-`ASSERT_PRED_FORMAT2(`*`pred_formatter`*`,`*`val1`*`,`*`val2`*`)` \
-`ASSERT_PRED_FORMAT3(`*`pred_formatter`*`,`*`val1`*`,`*`val2`*`,`*`val3`*`)` \
-`ASSERT_PRED_FORMAT4(`*`pred_formatter`*`,`*`val1`*`,`*`val2`*`,`*`val3`*`,`*`val4`*`)`
-\
-`ASSERT_PRED_FORMAT5(`*`pred_formatter`*`,`*`val1`*`,`*`val2`*`,`*`val3`*`,`*`val4`*`,`*`val5`*`)`
-
-Verifies that the predicate *`pred_formatter`* succeeds when passed the given
-values as arguments.
-
-The parameter *`pred_formatter`* is a *predicate-formatter*, which is a function
-or functor with the signature:
-
-```cpp
-testing::AssertionResult PredicateFormatter(const char* expr1,
-                                            const char* expr2,
-                                            ...
-                                            const char* exprn,
-                                            T1 val1,
-                                            T2 val2,
-                                            ...
-                                            Tn valn);
-```
-
-where *`val1`*, *`val2`*, ..., *`valn`* are the values of the predicate
-arguments, and *`expr1`*, *`expr2`*, ..., *`exprn`* are the corresponding
-expressions as they appear in the source code. The types `T1`, `T2`, ..., `Tn`
-can be either value types or reference types; if an argument has type `T`, it
-can be declared as either `T` or `const T&`, whichever is appropriate. For more
-about the return type `testing::AssertionResult`, see
-[Using a Function That Returns an AssertionResult](../advanced.md#using-a-function-that-returns-an-assertionresult).
-
-As an example, see the following code:
-
-```cpp
-// Returns the smallest prime common divisor of m and n,
-// or 1 when m and n are mutually prime.
-int SmallestPrimeCommonDivisor(int m, int n) { ... }
-
-// Returns true if m and n have no common divisors except 1.
-bool MutuallyPrime(int m, int n) { ... }
-
-// A predicate-formatter for asserting that two integers are mutually prime.
-testing::AssertionResult AssertMutuallyPrime(const char* m_expr,
-                                             const char* n_expr,
-                                             int m,
-                                             int n) {
-  if (MutuallyPrime(m, n)) return testing::AssertionSuccess();
-
-  return testing::AssertionFailure() << m_expr << " and " << n_expr
-      << " (" << m << " and " << n << ") are not mutually prime, "
-      << "as they have a common divisor " << SmallestPrimeCommonDivisor(m, n);
-}
-
-...
-const int a = 3;
-const int b = 4;
-const int c = 10;
-...
-EXPECT_PRED_FORMAT2(AssertMutuallyPrime, a, b);  // Succeeds
-EXPECT_PRED_FORMAT2(AssertMutuallyPrime, b, c);  // Fails
-```
-
-In the above example, the final assertion fails and the predicate-formatter
-produces the following failure message:
-
-```
-b and c (4 and 10) are not mutually prime, as they have a common divisor 2
-```
-
-## Windows HRESULT Assertions {#HRESULT}
-
-The following assertions test for `HRESULT` success or failure. For example:
-
-```cpp
-CComPtr<IShellDispatch2> shell;
-ASSERT_HRESULT_SUCCEEDED(shell.CoCreateInstance(L"Shell.Application"));
-CComVariant empty;
-ASSERT_HRESULT_SUCCEEDED(shell->ShellExecute(CComBSTR(url), empty, empty, empty, empty));
-```
-
-The generated output contains the human-readable error message associated with
-the returned `HRESULT` code.
-
-### EXPECT_HRESULT_SUCCEEDED {#EXPECT_HRESULT_SUCCEEDED}
-
-`EXPECT_HRESULT_SUCCEEDED(`*`expression`*`)` \
-`ASSERT_HRESULT_SUCCEEDED(`*`expression`*`)`
-
-Verifies that *`expression`* is a success `HRESULT`.
-
-### EXPECT_HRESULT_FAILED {#EXPECT_HRESULT_FAILED}
-
-`EXPECT_HRESULT_FAILED(`*`expression`*`)` \
-`EXPECT_HRESULT_FAILED(`*`expression`*`)`
-
-Verifies that *`expression`* is a failure `HRESULT`.
-
-## Death Assertions {#death}
-
-The following assertions verify that a piece of code causes the process to
-terminate. For context, see [Death Tests](../advanced.md#death-tests).
-
-These assertions spawn a new process and execute the code under test in that
-process. How that happens depends on the platform and the variable
-`::testing::GTEST_FLAG(death_test_style)`, which is initialized from the
-command-line flag `--gtest_death_test_style`.
-
-*   On POSIX systems, `fork()` (or `clone()` on Linux) is used to spawn the
-    child, after which:
-    *   If the variable's value is `"fast"`, the death test statement is
-        immediately executed.
-    *   If the variable's value is `"threadsafe"`, the child process re-executes
-        the unit test binary just as it was originally invoked, but with some
-        extra flags to cause just the single death test under consideration to
-        be run.
-*   On Windows, the child is spawned using the `CreateProcess()` API, and
-    re-executes the binary to cause just the single death test under
-    consideration to be run - much like the `"threadsafe"` mode on POSIX.
-
-Other values for the variable are illegal and will cause the death test to fail.
-Currently, the flag's default value is
-**`"fast"`**.
-
-If the death test statement runs to completion without dying, the child process
-will nonetheless terminate, and the assertion fails.
-
-Note that the piece of code under test can be a compound statement, for example:
-
-```cpp
-EXPECT_DEATH({
-  int n = 5;
-  DoSomething(&n);
-}, "Error on line .* of DoSomething()");
-```
-
-### EXPECT_DEATH {#EXPECT_DEATH}
-
-`EXPECT_DEATH(`*`statement`*`,`*`matcher`*`)` \
-`ASSERT_DEATH(`*`statement`*`,`*`matcher`*`)`
-
-Verifies that *`statement`* causes the process to terminate with a nonzero exit
-status and produces `stderr` output that matches *`matcher`*.
-
-The parameter *`matcher`* is either a [matcher](matchers.md) for a `const
-std::string&`, or a regular expression (see
-[Regular Expression Syntax](../advanced.md#regular-expression-syntax))—a bare
-string *`s`* (with no matcher) is treated as
-[`ContainsRegex(s)`](matchers.md#string-matchers), **not**
-[`Eq(s)`](matchers.md#generic-comparison).
-
-For example, the following code verifies that calling `DoSomething(42)` causes
-the process to die with an error message that contains the text `My error`:
-
-```cpp
-EXPECT_DEATH(DoSomething(42), "My error");
-```
-
-### EXPECT_DEATH_IF_SUPPORTED {#EXPECT_DEATH_IF_SUPPORTED}
-
-`EXPECT_DEATH_IF_SUPPORTED(`*`statement`*`,`*`matcher`*`)` \
-`ASSERT_DEATH_IF_SUPPORTED(`*`statement`*`,`*`matcher`*`)`
-
-If death tests are supported, behaves the same as
-[`EXPECT_DEATH`](#EXPECT_DEATH). Otherwise, verifies nothing.
-
-### EXPECT_DEBUG_DEATH {#EXPECT_DEBUG_DEATH}
-
-`EXPECT_DEBUG_DEATH(`*`statement`*`,`*`matcher`*`)` \
-`ASSERT_DEBUG_DEATH(`*`statement`*`,`*`matcher`*`)`
-
-In debug mode, behaves the same as [`EXPECT_DEATH`](#EXPECT_DEATH). When not in
-debug mode (i.e. `NDEBUG` is defined), just executes *`statement`*.
-
-### EXPECT_EXIT {#EXPECT_EXIT}
-
-`EXPECT_EXIT(`*`statement`*`,`*`predicate`*`,`*`matcher`*`)` \
-`ASSERT_EXIT(`*`statement`*`,`*`predicate`*`,`*`matcher`*`)`
-
-Verifies that *`statement`* causes the process to terminate with an exit status
-that satisfies *`predicate`*, and produces `stderr` output that matches
-*`matcher`*.
-
-The parameter *`predicate`* is a function or functor that accepts an `int` exit
-status and returns a `bool`. GoogleTest provides two predicates to handle common
-cases:
-
-```cpp
-// Returns true if the program exited normally with the given exit status code.
-::testing::ExitedWithCode(exit_code);
-
-// Returns true if the program was killed by the given signal.
-// Not available on Windows.
-::testing::KilledBySignal(signal_number);
-```
-
-The parameter *`matcher`* is either a [matcher](matchers.md) for a `const
-std::string&`, or a regular expression (see
-[Regular Expression Syntax](../advanced.md#regular-expression-syntax))—a bare
-string *`s`* (with no matcher) is treated as
-[`ContainsRegex(s)`](matchers.md#string-matchers), **not**
-[`Eq(s)`](matchers.md#generic-comparison).
-
-For example, the following code verifies that calling `NormalExit()` causes the
-process to print a message containing the text `Success` to `stderr` and exit
-with exit status code 0:
-
-```cpp
-EXPECT_EXIT(NormalExit(), testing::ExitedWithCode(0), "Success");
-```
diff --git a/3rdparty/googletest-1.13.0/docs/reference/matchers.md b/3rdparty/googletest-1.13.0/docs/reference/matchers.md
deleted file mode 100644
index 9fb159275131504ec920303268297a373502ab49..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/docs/reference/matchers.md
+++ /dev/null
@@ -1,290 +0,0 @@
-# Matchers Reference
-
-A **matcher** matches a *single* argument. You can use it inside `ON_CALL()` or
-`EXPECT_CALL()`, or use it to validate a value directly using two macros:
-
-| Macro                                | Description                           |
-| :----------------------------------- | :------------------------------------ |
-| `EXPECT_THAT(actual_value, matcher)` | Asserts that `actual_value` matches `matcher`. |
-| `ASSERT_THAT(actual_value, matcher)` | The same as `EXPECT_THAT(actual_value, matcher)`, except that it generates a **fatal** failure. |
-
-{: .callout .warning}
-**WARNING:** Equality matching via `EXPECT_THAT(actual_value, expected_value)`
-is supported, however note that implicit conversions can cause surprising
-results. For example, `EXPECT_THAT(some_bool, "some string")` will compile and
-may pass unintentionally.
-
-**BEST PRACTICE:** Prefer to make the comparison explicit via
-`EXPECT_THAT(actual_value, Eq(expected_value))` or `EXPECT_EQ(actual_value,
-expected_value)`.
-
-Built-in matchers (where `argument` is the function argument, e.g.
-`actual_value` in the example above, or when used in the context of
-`EXPECT_CALL(mock_object, method(matchers))`, the arguments of `method`) are
-divided into several categories. All matchers are defined in the `::testing`
-namespace unless otherwise noted.
-
-## Wildcard
-
-Matcher                     | Description
-:-------------------------- | :-----------------------------------------------
-`_`                         | `argument` can be any value of the correct type.
-`A<type>()` or `An<type>()` | `argument` can be any value of type `type`.
-
-## Generic Comparison
-
-| Matcher                | Description                                         |
-| :--------------------- | :-------------------------------------------------- |
-| `Eq(value)` or `value` | `argument == value`                                 |
-| `Ge(value)`            | `argument >= value`                                 |
-| `Gt(value)`            | `argument > value`                                  |
-| `Le(value)`            | `argument <= value`                                 |
-| `Lt(value)`            | `argument < value`                                  |
-| `Ne(value)`            | `argument != value`                                 |
-| `IsFalse()`            | `argument` evaluates to `false` in a Boolean context. |
-| `IsTrue()`             | `argument` evaluates to `true` in a Boolean context. |
-| `IsNull()`             | `argument` is a `NULL` pointer (raw or smart).      |
-| `NotNull()`            | `argument` is a non-null pointer (raw or smart).    |
-| `Optional(m)`          | `argument` is `optional<>` that contains a value matching `m`. (For testing whether an `optional<>` is set, check for equality with `nullopt`. You may need to use `Eq(nullopt)` if the inner type doesn't have `==`.)|
-| `VariantWith<T>(m)`    | `argument` is `variant<>` that holds the alternative of type T with a value matching `m`. |
-| `Ref(variable)`        | `argument` is a reference to `variable`.            |
-| `TypedEq<type>(value)` | `argument` has type `type` and is equal to `value`. You may need to use this instead of `Eq(value)` when the mock function is overloaded. |
-
-Except `Ref()`, these matchers make a *copy* of `value` in case it's modified or
-destructed later. If the compiler complains that `value` doesn't have a public
-copy constructor, try wrap it in `std::ref()`, e.g.
-`Eq(std::ref(non_copyable_value))`. If you do that, make sure
-`non_copyable_value` is not changed afterwards, or the meaning of your matcher
-will be changed.
-
-`IsTrue` and `IsFalse` are useful when you need to use a matcher, or for types
-that can be explicitly converted to Boolean, but are not implicitly converted to
-Boolean. In other cases, you can use the basic
-[`EXPECT_TRUE` and `EXPECT_FALSE`](assertions.md#boolean) assertions.
-
-## Floating-Point Matchers {#FpMatchers}
-
-| Matcher                          | Description                        |
-| :------------------------------- | :--------------------------------- |
-| `DoubleEq(a_double)`             | `argument` is a `double` value approximately equal to `a_double`, treating two NaNs as unequal. |
-| `FloatEq(a_float)`               | `argument` is a `float` value approximately equal to `a_float`, treating two NaNs as unequal. |
-| `NanSensitiveDoubleEq(a_double)` | `argument` is a `double` value approximately equal to `a_double`, treating two NaNs as equal. |
-| `NanSensitiveFloatEq(a_float)`   | `argument` is a `float` value approximately equal to `a_float`, treating two NaNs as equal. |
-| `IsNan()`   | `argument` is any floating-point type with a NaN value. |
-
-The above matchers use ULP-based comparison (the same as used in googletest).
-They automatically pick a reasonable error bound based on the absolute value of
-the expected value. `DoubleEq()` and `FloatEq()` conform to the IEEE standard,
-which requires comparing two NaNs for equality to return false. The
-`NanSensitive*` version instead treats two NaNs as equal, which is often what a
-user wants.
-
-| Matcher                                           | Description              |
-| :------------------------------------------------ | :----------------------- |
-| `DoubleNear(a_double, max_abs_error)`             | `argument` is a `double` value close to `a_double` (absolute error <= `max_abs_error`), treating two NaNs as unequal. |
-| `FloatNear(a_float, max_abs_error)`               | `argument` is a `float` value close to `a_float` (absolute error <= `max_abs_error`), treating two NaNs as unequal. |
-| `NanSensitiveDoubleNear(a_double, max_abs_error)` | `argument` is a `double` value close to `a_double` (absolute error <= `max_abs_error`), treating two NaNs as equal. |
-| `NanSensitiveFloatNear(a_float, max_abs_error)`   | `argument` is a `float` value close to `a_float` (absolute error <= `max_abs_error`), treating two NaNs as equal. |
-
-## String Matchers
-
-The `argument` can be either a C string or a C++ string object:
-
-| Matcher                 | Description                                        |
-| :---------------------- | :------------------------------------------------- |
-| `ContainsRegex(string)`  | `argument` matches the given regular expression.  |
-| `EndsWith(suffix)`       | `argument` ends with string `suffix`.             |
-| `HasSubstr(string)`      | `argument` contains `string` as a sub-string.     |
-| `IsEmpty()`              | `argument` is an empty string.                    |
-| `MatchesRegex(string)`   | `argument` matches the given regular expression with the match starting at the first character and ending at the last character. |
-| `StartsWith(prefix)`     | `argument` starts with string `prefix`.           |
-| `StrCaseEq(string)`      | `argument` is equal to `string`, ignoring case.   |
-| `StrCaseNe(string)`      | `argument` is not equal to `string`, ignoring case. |
-| `StrEq(string)`          | `argument` is equal to `string`.                  |
-| `StrNe(string)`          | `argument` is not equal to `string`.              |
-| `WhenBase64Unescaped(m)` | `argument` is a base-64 escaped string whose unescaped string matches `m`. |
-
-`ContainsRegex()` and `MatchesRegex()` take ownership of the `RE` object. They
-use the regular expression syntax defined
-[here](../advanced.md#regular-expression-syntax). All of these matchers, except
-`ContainsRegex()` and `MatchesRegex()` work for wide strings as well.
-
-## Container Matchers
-
-Most STL-style containers support `==`, so you can use `Eq(expected_container)`
-or simply `expected_container` to match a container exactly. If you want to
-write the elements in-line, match them more flexibly, or get more informative
-messages, you can use:
-
-| Matcher                                   | Description                      |
-| :---------------------------------------- | :------------------------------- |
-| `BeginEndDistanceIs(m)` | `argument` is a container whose `begin()` and `end()` iterators are separated by a number of increments matching `m`. E.g. `BeginEndDistanceIs(2)` or `BeginEndDistanceIs(Lt(2))`. For containers that define a `size()` method, `SizeIs(m)` may be more efficient. |
-| `ContainerEq(container)` | The same as `Eq(container)` except that the failure message also includes which elements are in one container but not the other. |
-| `Contains(e)` | `argument` contains an element that matches `e`, which can be either a value or a matcher. |
-| `Contains(e).Times(n)` | `argument` contains elements that match `e`, which can be either a value or a matcher, and the number of matches is `n`, which can be either a value or a matcher. Unlike the plain `Contains` and `Each` this allows to check for arbitrary occurrences including testing for absence with `Contains(e).Times(0)`. |
-| `Each(e)` | `argument` is a container where *every* element matches `e`, which can be either a value or a matcher. |
-| `ElementsAre(e0, e1, ..., en)` | `argument` has `n + 1` elements, where the *i*-th element matches `ei`, which can be a value or a matcher. |
-| `ElementsAreArray({e0, e1, ..., en})`, `ElementsAreArray(a_container)`, `ElementsAreArray(begin, end)`, `ElementsAreArray(array)`, or `ElementsAreArray(array, count)` | The same as `ElementsAre()` except that the expected element values/matchers come from an initializer list, STL-style container, iterator range, or C-style array. |
-| `IsEmpty()` | `argument` is an empty container (`container.empty()`). |
-| `IsSubsetOf({e0, e1, ..., en})`, `IsSubsetOf(a_container)`, `IsSubsetOf(begin, end)`, `IsSubsetOf(array)`, or `IsSubsetOf(array, count)` | `argument` matches `UnorderedElementsAre(x0, x1, ..., xk)` for some subset `{x0, x1, ..., xk}` of the expected matchers. |
-| `IsSupersetOf({e0, e1, ..., en})`, `IsSupersetOf(a_container)`, `IsSupersetOf(begin, end)`, `IsSupersetOf(array)`, or `IsSupersetOf(array, count)` | Some subset of `argument` matches `UnorderedElementsAre(`expected matchers`)`. |
-| `Pointwise(m, container)`, `Pointwise(m, {e0, e1, ..., en})` | `argument` contains the same number of elements as in `container`, and for all i, (the i-th element in `argument`, the i-th element in `container`) match `m`, which is a matcher on 2-tuples. E.g. `Pointwise(Le(), upper_bounds)` verifies that each element in `argument` doesn't exceed the corresponding element in `upper_bounds`. See more detail below. |
-| `SizeIs(m)` | `argument` is a container whose size matches `m`. E.g. `SizeIs(2)` or `SizeIs(Lt(2))`. |
-| `UnorderedElementsAre(e0, e1, ..., en)` | `argument` has `n + 1` elements, and under *some* permutation of the elements, each element matches an `ei` (for a different `i`), which can be a value or a matcher. |
-| `UnorderedElementsAreArray({e0, e1, ..., en})`, `UnorderedElementsAreArray(a_container)`, `UnorderedElementsAreArray(begin, end)`, `UnorderedElementsAreArray(array)`, or `UnorderedElementsAreArray(array, count)` | The same as `UnorderedElementsAre()` except that the expected element values/matchers come from an initializer list, STL-style container, iterator range, or C-style array. |
-| `UnorderedPointwise(m, container)`, `UnorderedPointwise(m, {e0, e1, ..., en})` | Like `Pointwise(m, container)`, but ignores the order of elements. |
-| `WhenSorted(m)` | When `argument` is sorted using the `<` operator, it matches container matcher `m`. E.g. `WhenSorted(ElementsAre(1, 2, 3))` verifies that `argument` contains elements 1, 2, and 3, ignoring order. |
-| `WhenSortedBy(comparator, m)` | The same as `WhenSorted(m)`, except that the given comparator instead of `<` is used to sort `argument`. E.g. `WhenSortedBy(std::greater(), ElementsAre(3, 2, 1))`. |
-
-**Notes:**
-
-*   These matchers can also match:
-    1.  a native array passed by reference (e.g. in `Foo(const int (&a)[5])`),
-        and
-    2.  an array passed as a pointer and a count (e.g. in `Bar(const T* buffer,
-        int len)` -- see [Multi-argument Matchers](#MultiArgMatchers)).
-*   The array being matched may be multi-dimensional (i.e. its elements can be
-    arrays).
-*   `m` in `Pointwise(m, ...)` and `UnorderedPointwise(m, ...)` should be a
-    matcher for `::std::tuple<T, U>` where `T` and `U` are the element type of
-    the actual container and the expected container, respectively. For example,
-    to compare two `Foo` containers where `Foo` doesn't support `operator==`,
-    one might write:
-
-    ```cpp
-    MATCHER(FooEq, "") {
-      return std::get<0>(arg).Equals(std::get<1>(arg));
-    }
-    ...
-    EXPECT_THAT(actual_foos, Pointwise(FooEq(), expected_foos));
-    ```
-
-## Member Matchers
-
-| Matcher                         | Description                                |
-| :------------------------------ | :----------------------------------------- |
-| `Field(&class::field, m)`       | `argument.field` (or `argument->field` when `argument` is a plain pointer) matches matcher `m`, where `argument` is an object of type _class_. |
-| `Field(field_name, &class::field, m)` | The same as the two-parameter version, but provides a better error message. |
-| `Key(e)`                        | `argument.first` matches `e`, which can be either a value or a matcher. E.g. `Contains(Key(Le(5)))` can verify that a `map` contains a key `<= 5`. |
-| `Pair(m1, m2)`                  | `argument` is an `std::pair` whose `first` field matches `m1` and `second` field matches `m2`. |
-| `FieldsAre(m...)`                   | `argument` is a compatible object where each field matches piecewise with the matchers `m...`. A compatible object is any that supports the `std::tuple_size<Obj>`+`get<I>(obj)` protocol. In C++17 and up this also supports types compatible with structured bindings, like aggregates. |
-| `Property(&class::property, m)` | `argument.property()` (or `argument->property()` when `argument` is a plain pointer) matches matcher `m`, where `argument` is an object of type _class_. The method `property()` must take no argument and be declared as `const`. |
-| `Property(property_name, &class::property, m)` | The same as the two-parameter version, but provides a better error message.
-
-**Notes:**
-
-*   You can use `FieldsAre()` to match any type that supports structured
-    bindings, such as `std::tuple`, `std::pair`, `std::array`, and aggregate
-    types. For example:
-
-    ```cpp
-    std::tuple<int, std::string> my_tuple{7, "hello world"};
-    EXPECT_THAT(my_tuple, FieldsAre(Ge(0), HasSubstr("hello")));
-
-    struct MyStruct {
-      int value = 42;
-      std::string greeting = "aloha";
-    };
-    MyStruct s;
-    EXPECT_THAT(s, FieldsAre(42, "aloha"));
-    ```
-
-*   Don't use `Property()` against member functions that you do not own, because
-    taking addresses of functions is fragile and generally not part of the
-    contract of the function.
-
-## Matching the Result of a Function, Functor, or Callback
-
-| Matcher          | Description                                       |
-| :--------------- | :------------------------------------------------ |
-| `ResultOf(f, m)` | `f(argument)` matches matcher `m`, where `f` is a function or functor. |
-| `ResultOf(result_description, f, m)` | The same as the two-parameter version, but provides a better error message.
-
-## Pointer Matchers
-
-| Matcher                   | Description                                     |
-| :------------------------ | :---------------------------------------------- |
-| `Address(m)`              | the result of `std::addressof(argument)` matches `m`. |
-| `Pointee(m)`              | `argument` (either a smart pointer or a raw pointer) points to a value that matches matcher `m`. |
-| `Pointer(m)`              | `argument` (either a smart pointer or a raw pointer) contains a pointer that matches `m`. `m` will match against the raw pointer regardless of the type of `argument`. |
-| `WhenDynamicCastTo<T>(m)` | when `argument` is passed through `dynamic_cast<T>()`, it matches matcher `m`. |
-
-## Multi-argument Matchers {#MultiArgMatchers}
-
-Technically, all matchers match a *single* value. A "multi-argument" matcher is
-just one that matches a *tuple*. The following matchers can be used to match a
-tuple `(x, y)`:
-
-Matcher | Description
-:------ | :----------
-`Eq()`  | `x == y`
-`Ge()`  | `x >= y`
-`Gt()`  | `x > y`
-`Le()`  | `x <= y`
-`Lt()`  | `x < y`
-`Ne()`  | `x != y`
-
-You can use the following selectors to pick a subset of the arguments (or
-reorder them) to participate in the matching:
-
-| Matcher                    | Description                                     |
-| :------------------------- | :---------------------------------------------- |
-| `AllArgs(m)`               | Equivalent to `m`. Useful as syntactic sugar in `.With(AllArgs(m))`. |
-| `Args<N1, N2, ..., Nk>(m)` | The tuple of the `k` selected (using 0-based indices) arguments matches `m`, e.g. `Args<1, 2>(Eq())`. |
-
-## Composite Matchers
-
-You can make a matcher from one or more other matchers:
-
-| Matcher                          | Description                             |
-| :------------------------------- | :-------------------------------------- |
-| `AllOf(m1, m2, ..., mn)` | `argument` matches all of the matchers `m1` to `mn`. |
-| `AllOfArray({m0, m1, ..., mn})`, `AllOfArray(a_container)`, `AllOfArray(begin, end)`, `AllOfArray(array)`, or `AllOfArray(array, count)` | The same as `AllOf()` except that the matchers come from an initializer list, STL-style container, iterator range, or C-style array. |
-| `AnyOf(m1, m2, ..., mn)` | `argument` matches at least one of the matchers `m1` to `mn`. |
-| `AnyOfArray({m0, m1, ..., mn})`, `AnyOfArray(a_container)`, `AnyOfArray(begin, end)`, `AnyOfArray(array)`, or `AnyOfArray(array, count)` | The same as `AnyOf()` except that the matchers come from an initializer list, STL-style container, iterator range, or C-style array. |
-| `Not(m)` | `argument` doesn't match matcher `m`. |
-| `Conditional(cond, m1, m2)` | Matches matcher `m1` if `cond` evaluates to true, else matches `m2`.|
-
-## Adapters for Matchers
-
-| Matcher                 | Description                           |
-| :---------------------- | :------------------------------------ |
-| `MatcherCast<T>(m)`     | casts matcher `m` to type `Matcher<T>`. |
-| `SafeMatcherCast<T>(m)` | [safely casts](../gmock_cook_book.md#SafeMatcherCast) matcher `m` to type `Matcher<T>`. |
-| `Truly(predicate)`      | `predicate(argument)` returns something considered by C++ to be true, where `predicate` is a function or functor. |
-
-`AddressSatisfies(callback)` and `Truly(callback)` take ownership of `callback`,
-which must be a permanent callback.
-
-## Using Matchers as Predicates {#MatchersAsPredicatesCheat}
-
-| Matcher                       | Description                                 |
-| :---------------------------- | :------------------------------------------ |
-| `Matches(m)(value)` | evaluates to `true` if `value` matches `m`. You can use `Matches(m)` alone as a unary functor. |
-| `ExplainMatchResult(m, value, result_listener)` | evaluates to `true` if `value` matches `m`, explaining the result to `result_listener`. |
-| `Value(value, m)` | evaluates to `true` if `value` matches `m`. |
-
-## Defining Matchers
-
-| Macro                                | Description                           |
-| :----------------------------------- | :------------------------------------ |
-| `MATCHER(IsEven, "") { return (arg % 2) == 0; }` | Defines a matcher `IsEven()` to match an even number. |
-| `MATCHER_P(IsDivisibleBy, n, "") { *result_listener << "where the remainder is " << (arg % n); return (arg % n) == 0; }` | Defines a matcher `IsDivisibleBy(n)` to match a number divisible by `n`. |
-| `MATCHER_P2(IsBetween, a, b, absl::StrCat(negation ? "isn't" : "is", " between ", PrintToString(a), " and ", PrintToString(b))) { return a <= arg && arg <= b; }` | Defines a matcher `IsBetween(a, b)` to match a value in the range [`a`, `b`]. |
-
-**Notes:**
-
-1.  The `MATCHER*` macros cannot be used inside a function or class.
-2.  The matcher body must be *purely functional* (i.e. it cannot have any side
-    effect, and the result must not depend on anything other than the value
-    being matched and the matcher parameters).
-3.  You can use `PrintToString(x)` to convert a value `x` of any type to a
-    string.
-4.  You can use `ExplainMatchResult()` in a custom matcher to wrap another
-    matcher, for example:
-
-    ```cpp
-    MATCHER_P(NestedPropertyMatches, matcher, "") {
-      return ExplainMatchResult(matcher, arg.nested().property(), result_listener);
-    }
-    ```
diff --git a/3rdparty/googletest-1.13.0/docs/reference/mocking.md b/3rdparty/googletest-1.13.0/docs/reference/mocking.md
deleted file mode 100644
index e414ffbd0dea39b9a97989f2939943a4a87362bd..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/docs/reference/mocking.md
+++ /dev/null
@@ -1,589 +0,0 @@
-# Mocking Reference
-
-This page lists the facilities provided by GoogleTest for creating and working
-with mock objects. To use them, include the header
-`gmock/gmock.h`.
-
-## Macros {#macros}
-
-GoogleTest defines the following macros for working with mocks.
-
-### MOCK_METHOD {#MOCK_METHOD}
-
-`MOCK_METHOD(`*`return_type`*`,`*`method_name`*`, (`*`args...`*`));` \
-`MOCK_METHOD(`*`return_type`*`,`*`method_name`*`, (`*`args...`*`),
-(`*`specs...`*`));`
-
-Defines a mock method *`method_name`* with arguments `(`*`args...`*`)` and
-return type *`return_type`* within a mock class.
-
-The parameters of `MOCK_METHOD` mirror the method declaration. The optional
-fourth parameter *`specs...`* is a comma-separated list of qualifiers. The
-following qualifiers are accepted:
-
-| Qualifier                  | Meaning                                      |
-| -------------------------- | -------------------------------------------- |
-| `const`                    | Makes the mocked method a `const` method. Required if overriding a `const` method. |
-| `override`                 | Marks the method with `override`. Recommended if overriding a `virtual` method. |
-| `noexcept`                 | Marks the method with `noexcept`. Required if overriding a `noexcept` method. |
-| `Calltype(`*`calltype`*`)` | Sets the call type for the method, for example `Calltype(STDMETHODCALLTYPE)`. Useful on Windows. |
-| `ref(`*`qualifier`*`)`     | Marks the method with the given reference qualifier, for example `ref(&)` or `ref(&&)`. Required if overriding a method that has a reference qualifier. |
-
-Note that commas in arguments prevent `MOCK_METHOD` from parsing the arguments
-correctly if they are not appropriately surrounded by parentheses. See the
-following example:
-
-```cpp
-class MyMock {
- public:
-  // The following 2 lines will not compile due to commas in the arguments:
-  MOCK_METHOD(std::pair<bool, int>, GetPair, ());              // Error!
-  MOCK_METHOD(bool, CheckMap, (std::map<int, double>, bool));  // Error!
-
-  // One solution - wrap arguments that contain commas in parentheses:
-  MOCK_METHOD((std::pair<bool, int>), GetPair, ());
-  MOCK_METHOD(bool, CheckMap, ((std::map<int, double>), bool));
-
-  // Another solution - use type aliases:
-  using BoolAndInt = std::pair<bool, int>;
-  MOCK_METHOD(BoolAndInt, GetPair, ());
-  using MapIntDouble = std::map<int, double>;
-  MOCK_METHOD(bool, CheckMap, (MapIntDouble, bool));
-};
-```
-
-`MOCK_METHOD` must be used in the `public:` section of a mock class definition,
-regardless of whether the method being mocked is `public`, `protected`, or
-`private` in the base class.
-
-### EXPECT_CALL {#EXPECT_CALL}
-
-`EXPECT_CALL(`*`mock_object`*`,`*`method_name`*`(`*`matchers...`*`))`
-
-Creates an [expectation](../gmock_for_dummies.md#setting-expectations) that the
-method *`method_name`* of the object *`mock_object`* is called with arguments
-that match the given matchers *`matchers...`*. `EXPECT_CALL` must precede any
-code that exercises the mock object.
-
-The parameter *`matchers...`* is a comma-separated list of
-[matchers](../gmock_for_dummies.md#matchers-what-arguments-do-we-expect) that
-correspond to each argument of the method *`method_name`*. The expectation will
-apply only to calls of *`method_name`* whose arguments match all of the
-matchers. If `(`*`matchers...`*`)` is omitted, the expectation behaves as if
-each argument's matcher were a [wildcard matcher (`_`)](matchers.md#wildcard).
-See the [Matchers Reference](matchers.md) for a list of all built-in matchers.
-
-The following chainable clauses can be used to modify the expectation, and they
-must be used in the following order:
-
-```cpp
-EXPECT_CALL(mock_object, method_name(matchers...))
-    .With(multi_argument_matcher)  // Can be used at most once
-    .Times(cardinality)            // Can be used at most once
-    .InSequence(sequences...)      // Can be used any number of times
-    .After(expectations...)        // Can be used any number of times
-    .WillOnce(action)              // Can be used any number of times
-    .WillRepeatedly(action)        // Can be used at most once
-    .RetiresOnSaturation();        // Can be used at most once
-```
-
-See details for each modifier clause below.
-
-#### With {#EXPECT_CALL.With}
-
-`.With(`*`multi_argument_matcher`*`)`
-
-Restricts the expectation to apply only to mock function calls whose arguments
-as a whole match the multi-argument matcher *`multi_argument_matcher`*.
-
-GoogleTest passes all of the arguments as one tuple into the matcher. The
-parameter *`multi_argument_matcher`* must thus be a matcher of type
-`Matcher<std::tuple<A1, ..., An>>`, where `A1, ..., An` are the types of the
-function arguments.
-
-For example, the following code sets the expectation that
-`my_mock.SetPosition()` is called with any two arguments, the first argument
-being less than the second:
-
-```cpp
-using ::testing::_;
-using ::testing::Lt;
-...
-EXPECT_CALL(my_mock, SetPosition(_, _))
-    .With(Lt());
-```
-
-GoogleTest provides some built-in matchers for 2-tuples, including the `Lt()`
-matcher above. See [Multi-argument Matchers](matchers.md#MultiArgMatchers).
-
-The `With` clause can be used at most once on an expectation and must be the
-first clause.
-
-#### Times {#EXPECT_CALL.Times}
-
-`.Times(`*`cardinality`*`)`
-
-Specifies how many times the mock function call is expected.
-
-The parameter *`cardinality`* represents the number of expected calls and can be
-one of the following, all defined in the `::testing` namespace:
-
-| Cardinality         | Meaning                                             |
-| ------------------- | --------------------------------------------------- |
-| `AnyNumber()`       | The function can be called any number of times.     |
-| `AtLeast(n)`        | The function call is expected at least *n* times.   |
-| `AtMost(n)`         | The function call is expected at most *n* times.    |
-| `Between(m, n)`     | The function call is expected between *m* and *n* times, inclusive. |
-| `Exactly(n)` or `n` | The function call is expected exactly *n* times. If *n* is 0, the call should never happen. |
-
-If the `Times` clause is omitted, GoogleTest infers the cardinality as follows:
-
-*   If neither [`WillOnce`](#EXPECT_CALL.WillOnce) nor
-    [`WillRepeatedly`](#EXPECT_CALL.WillRepeatedly) are specified, the inferred
-    cardinality is `Times(1)`.
-*   If there are *n* `WillOnce` clauses and no `WillRepeatedly` clause, where
-    *n* >= 1, the inferred cardinality is `Times(n)`.
-*   If there are *n* `WillOnce` clauses and one `WillRepeatedly` clause, where
-    *n* >= 0, the inferred cardinality is `Times(AtLeast(n))`.
-
-The `Times` clause can be used at most once on an expectation.
-
-#### InSequence {#EXPECT_CALL.InSequence}
-
-`.InSequence(`*`sequences...`*`)`
-
-Specifies that the mock function call is expected in a certain sequence.
-
-The parameter *`sequences...`* is any number of [`Sequence`](#Sequence) objects.
-Expected calls assigned to the same sequence are expected to occur in the order
-the expectations are declared.
-
-For example, the following code sets the expectation that the `Reset()` method
-of `my_mock` is called before both `GetSize()` and `Describe()`, and `GetSize()`
-and `Describe()` can occur in any order relative to each other:
-
-```cpp
-using ::testing::Sequence;
-Sequence s1, s2;
-...
-EXPECT_CALL(my_mock, Reset())
-    .InSequence(s1, s2);
-EXPECT_CALL(my_mock, GetSize())
-    .InSequence(s1);
-EXPECT_CALL(my_mock, Describe())
-    .InSequence(s2);
-```
-
-The `InSequence` clause can be used any number of times on an expectation.
-
-See also the [`InSequence` class](#InSequence).
-
-#### After {#EXPECT_CALL.After}
-
-`.After(`*`expectations...`*`)`
-
-Specifies that the mock function call is expected to occur after one or more
-other calls.
-
-The parameter *`expectations...`* can be up to five
-[`Expectation`](#Expectation) or [`ExpectationSet`](#ExpectationSet) objects.
-The mock function call is expected to occur after all of the given expectations.
-
-For example, the following code sets the expectation that the `Describe()`
-method of `my_mock` is called only after both `InitX()` and `InitY()` have been
-called.
-
-```cpp
-using ::testing::Expectation;
-...
-Expectation init_x = EXPECT_CALL(my_mock, InitX());
-Expectation init_y = EXPECT_CALL(my_mock, InitY());
-EXPECT_CALL(my_mock, Describe())
-    .After(init_x, init_y);
-```
-
-The `ExpectationSet` object is helpful when the number of prerequisites for an
-expectation is large or variable, for example:
-
-```cpp
-using ::testing::ExpectationSet;
-...
-ExpectationSet all_inits;
-// Collect all expectations of InitElement() calls
-for (int i = 0; i < element_count; i++) {
-  all_inits += EXPECT_CALL(my_mock, InitElement(i));
-}
-EXPECT_CALL(my_mock, Describe())
-    .After(all_inits);  // Expect Describe() call after all InitElement() calls
-```
-
-The `After` clause can be used any number of times on an expectation.
-
-#### WillOnce {#EXPECT_CALL.WillOnce}
-
-`.WillOnce(`*`action`*`)`
-
-Specifies the mock function's actual behavior when invoked, for a single
-matching function call.
-
-The parameter *`action`* represents the
-[action](../gmock_for_dummies.md#actions-what-should-it-do) that the function
-call will perform. See the [Actions Reference](actions.md) for a list of
-built-in actions.
-
-The use of `WillOnce` implicitly sets a cardinality on the expectation when
-`Times` is not specified. See [`Times`](#EXPECT_CALL.Times).
-
-Each matching function call will perform the next action in the order declared.
-For example, the following code specifies that `my_mock.GetNumber()` is expected
-to be called exactly 3 times and will return `1`, `2`, and `3` respectively on
-the first, second, and third calls:
-
-```cpp
-using ::testing::Return;
-...
-EXPECT_CALL(my_mock, GetNumber())
-    .WillOnce(Return(1))
-    .WillOnce(Return(2))
-    .WillOnce(Return(3));
-```
-
-The `WillOnce` clause can be used any number of times on an expectation. Unlike
-`WillRepeatedly`, the action fed to each `WillOnce` call will be called at most
-once, so may be a move-only type and/or have an `&&`-qualified call operator.
-
-#### WillRepeatedly {#EXPECT_CALL.WillRepeatedly}
-
-`.WillRepeatedly(`*`action`*`)`
-
-Specifies the mock function's actual behavior when invoked, for all subsequent
-matching function calls. Takes effect after the actions specified in the
-[`WillOnce`](#EXPECT_CALL.WillOnce) clauses, if any, have been performed.
-
-The parameter *`action`* represents the
-[action](../gmock_for_dummies.md#actions-what-should-it-do) that the function
-call will perform. See the [Actions Reference](actions.md) for a list of
-built-in actions.
-
-The use of `WillRepeatedly` implicitly sets a cardinality on the expectation
-when `Times` is not specified. See [`Times`](#EXPECT_CALL.Times).
-
-If any `WillOnce` clauses have been specified, matching function calls will
-perform those actions before the action specified by `WillRepeatedly`. See the
-following example:
-
-```cpp
-using ::testing::Return;
-...
-EXPECT_CALL(my_mock, GetName())
-    .WillRepeatedly(Return("John Doe"));  // Return "John Doe" on all calls
-
-EXPECT_CALL(my_mock, GetNumber())
-    .WillOnce(Return(42))        // Return 42 on the first call
-    .WillRepeatedly(Return(7));  // Return 7 on all subsequent calls
-```
-
-The `WillRepeatedly` clause can be used at most once on an expectation.
-
-#### RetiresOnSaturation {#EXPECT_CALL.RetiresOnSaturation}
-
-`.RetiresOnSaturation()`
-
-Indicates that the expectation will no longer be active after the expected
-number of matching function calls has been reached.
-
-The `RetiresOnSaturation` clause is only meaningful for expectations with an
-upper-bounded cardinality. The expectation will *retire* (no longer match any
-function calls) after it has been *saturated* (the upper bound has been
-reached). See the following example:
-
-```cpp
-using ::testing::_;
-using ::testing::AnyNumber;
-...
-EXPECT_CALL(my_mock, SetNumber(_))  // Expectation 1
-    .Times(AnyNumber());
-EXPECT_CALL(my_mock, SetNumber(7))  // Expectation 2
-    .Times(2)
-    .RetiresOnSaturation();
-```
-
-In the above example, the first two calls to `my_mock.SetNumber(7)` match
-expectation 2, which then becomes inactive and no longer matches any calls. A
-third call to `my_mock.SetNumber(7)` would then match expectation 1. Without
-`RetiresOnSaturation()` on expectation 2, a third call to `my_mock.SetNumber(7)`
-would match expectation 2 again, producing a failure since the limit of 2 calls
-was exceeded.
-
-The `RetiresOnSaturation` clause can be used at most once on an expectation and
-must be the last clause.
-
-### ON_CALL {#ON_CALL}
-
-`ON_CALL(`*`mock_object`*`,`*`method_name`*`(`*`matchers...`*`))`
-
-Defines what happens when the method *`method_name`* of the object
-*`mock_object`* is called with arguments that match the given matchers
-*`matchers...`*. Requires a modifier clause to specify the method's behavior.
-*Does not* set any expectations that the method will be called.
-
-The parameter *`matchers...`* is a comma-separated list of
-[matchers](../gmock_for_dummies.md#matchers-what-arguments-do-we-expect) that
-correspond to each argument of the method *`method_name`*. The `ON_CALL`
-specification will apply only to calls of *`method_name`* whose arguments match
-all of the matchers. If `(`*`matchers...`*`)` is omitted, the behavior is as if
-each argument's matcher were a [wildcard matcher (`_`)](matchers.md#wildcard).
-See the [Matchers Reference](matchers.md) for a list of all built-in matchers.
-
-The following chainable clauses can be used to set the method's behavior, and
-they must be used in the following order:
-
-```cpp
-ON_CALL(mock_object, method_name(matchers...))
-    .With(multi_argument_matcher)  // Can be used at most once
-    .WillByDefault(action);        // Required
-```
-
-See details for each modifier clause below.
-
-#### With {#ON_CALL.With}
-
-`.With(`*`multi_argument_matcher`*`)`
-
-Restricts the specification to only mock function calls whose arguments as a
-whole match the multi-argument matcher *`multi_argument_matcher`*.
-
-GoogleTest passes all of the arguments as one tuple into the matcher. The
-parameter *`multi_argument_matcher`* must thus be a matcher of type
-`Matcher<std::tuple<A1, ..., An>>`, where `A1, ..., An` are the types of the
-function arguments.
-
-For example, the following code sets the default behavior when
-`my_mock.SetPosition()` is called with any two arguments, the first argument
-being less than the second:
-
-```cpp
-using ::testing::_;
-using ::testing::Lt;
-using ::testing::Return;
-...
-ON_CALL(my_mock, SetPosition(_, _))
-    .With(Lt())
-    .WillByDefault(Return(true));
-```
-
-GoogleTest provides some built-in matchers for 2-tuples, including the `Lt()`
-matcher above. See [Multi-argument Matchers](matchers.md#MultiArgMatchers).
-
-The `With` clause can be used at most once with each `ON_CALL` statement.
-
-#### WillByDefault {#ON_CALL.WillByDefault}
-
-`.WillByDefault(`*`action`*`)`
-
-Specifies the default behavior of a matching mock function call.
-
-The parameter *`action`* represents the
-[action](../gmock_for_dummies.md#actions-what-should-it-do) that the function
-call will perform. See the [Actions Reference](actions.md) for a list of
-built-in actions.
-
-For example, the following code specifies that by default, a call to
-`my_mock.Greet()` will return `"hello"`:
-
-```cpp
-using ::testing::Return;
-...
-ON_CALL(my_mock, Greet())
-    .WillByDefault(Return("hello"));
-```
-
-The action specified by `WillByDefault` is superseded by the actions specified
-on a matching `EXPECT_CALL` statement, if any. See the
-[`WillOnce`](#EXPECT_CALL.WillOnce) and
-[`WillRepeatedly`](#EXPECT_CALL.WillRepeatedly) clauses of `EXPECT_CALL`.
-
-The `WillByDefault` clause must be used exactly once with each `ON_CALL`
-statement.
-
-## Classes {#classes}
-
-GoogleTest defines the following classes for working with mocks.
-
-### DefaultValue {#DefaultValue}
-
-`::testing::DefaultValue<T>`
-
-Allows a user to specify the default value for a type `T` that is both copyable
-and publicly destructible (i.e. anything that can be used as a function return
-type). For mock functions with a return type of `T`, this default value is
-returned from function calls that do not specify an action.
-
-Provides the static methods `Set()`, `SetFactory()`, and `Clear()` to manage the
-default value:
-
-```cpp
-// Sets the default value to be returned. T must be copy constructible.
-DefaultValue<T>::Set(value);
-
-// Sets a factory. Will be invoked on demand. T must be move constructible.
-T MakeT();
-DefaultValue<T>::SetFactory(&MakeT);
-
-// Unsets the default value.
-DefaultValue<T>::Clear();
-```
-
-### NiceMock {#NiceMock}
-
-`::testing::NiceMock<T>`
-
-Represents a mock object that suppresses warnings on
-[uninteresting calls](../gmock_cook_book.md#uninteresting-vs-unexpected). The
-template parameter `T` is any mock class, except for another `NiceMock`,
-`NaggyMock`, or `StrictMock`.
-
-Usage of `NiceMock<T>` is analogous to usage of `T`. `NiceMock<T>` is a subclass
-of `T`, so it can be used wherever an object of type `T` is accepted. In
-addition, `NiceMock<T>` can be constructed with any arguments that a constructor
-of `T` accepts.
-
-For example, the following code suppresses warnings on the mock `my_mock` of
-type `MockClass` if a method other than `DoSomething()` is called:
-
-```cpp
-using ::testing::NiceMock;
-...
-NiceMock<MockClass> my_mock("some", "args");
-EXPECT_CALL(my_mock, DoSomething());
-... code that uses my_mock ...
-```
-
-`NiceMock<T>` only works for mock methods defined using the `MOCK_METHOD` macro
-directly in the definition of class `T`. If a mock method is defined in a base
-class of `T`, a warning might still be generated.
-
-`NiceMock<T>` might not work correctly if the destructor of `T` is not virtual.
-
-### NaggyMock {#NaggyMock}
-
-`::testing::NaggyMock<T>`
-
-Represents a mock object that generates warnings on
-[uninteresting calls](../gmock_cook_book.md#uninteresting-vs-unexpected). The
-template parameter `T` is any mock class, except for another `NiceMock`,
-`NaggyMock`, or `StrictMock`.
-
-Usage of `NaggyMock<T>` is analogous to usage of `T`. `NaggyMock<T>` is a
-subclass of `T`, so it can be used wherever an object of type `T` is accepted.
-In addition, `NaggyMock<T>` can be constructed with any arguments that a
-constructor of `T` accepts.
-
-For example, the following code generates warnings on the mock `my_mock` of type
-`MockClass` if a method other than `DoSomething()` is called:
-
-```cpp
-using ::testing::NaggyMock;
-...
-NaggyMock<MockClass> my_mock("some", "args");
-EXPECT_CALL(my_mock, DoSomething());
-... code that uses my_mock ...
-```
-
-Mock objects of type `T` by default behave the same way as `NaggyMock<T>`.
-
-### StrictMock {#StrictMock}
-
-`::testing::StrictMock<T>`
-
-Represents a mock object that generates test failures on
-[uninteresting calls](../gmock_cook_book.md#uninteresting-vs-unexpected). The
-template parameter `T` is any mock class, except for another `NiceMock`,
-`NaggyMock`, or `StrictMock`.
-
-Usage of `StrictMock<T>` is analogous to usage of `T`. `StrictMock<T>` is a
-subclass of `T`, so it can be used wherever an object of type `T` is accepted.
-In addition, `StrictMock<T>` can be constructed with any arguments that a
-constructor of `T` accepts.
-
-For example, the following code generates a test failure on the mock `my_mock`
-of type `MockClass` if a method other than `DoSomething()` is called:
-
-```cpp
-using ::testing::StrictMock;
-...
-StrictMock<MockClass> my_mock("some", "args");
-EXPECT_CALL(my_mock, DoSomething());
-... code that uses my_mock ...
-```
-
-`StrictMock<T>` only works for mock methods defined using the `MOCK_METHOD`
-macro directly in the definition of class `T`. If a mock method is defined in a
-base class of `T`, a failure might not be generated.
-
-`StrictMock<T>` might not work correctly if the destructor of `T` is not
-virtual.
-
-### Sequence {#Sequence}
-
-`::testing::Sequence`
-
-Represents a chronological sequence of expectations. See the
-[`InSequence`](#EXPECT_CALL.InSequence) clause of `EXPECT_CALL` for usage.
-
-### InSequence {#InSequence}
-
-`::testing::InSequence`
-
-An object of this type causes all expectations encountered in its scope to be
-put in an anonymous sequence.
-
-This allows more convenient expression of multiple expectations in a single
-sequence:
-
-```cpp
-using ::testing::InSequence;
-{
-  InSequence seq;
-
-  // The following are expected to occur in the order declared.
-  EXPECT_CALL(...);
-  EXPECT_CALL(...);
-  ...
-  EXPECT_CALL(...);
-}
-```
-
-The name of the `InSequence` object does not matter.
-
-### Expectation {#Expectation}
-
-`::testing::Expectation`
-
-Represents a mock function call expectation as created by
-[`EXPECT_CALL`](#EXPECT_CALL):
-
-```cpp
-using ::testing::Expectation;
-Expectation my_expectation = EXPECT_CALL(...);
-```
-
-Useful for specifying sequences of expectations; see the
-[`After`](#EXPECT_CALL.After) clause of `EXPECT_CALL`.
-
-### ExpectationSet {#ExpectationSet}
-
-`::testing::ExpectationSet`
-
-Represents a set of mock function call expectations.
-
-Use the `+=` operator to add [`Expectation`](#Expectation) objects to the set:
-
-```cpp
-using ::testing::ExpectationSet;
-ExpectationSet my_expectations;
-my_expectations += EXPECT_CALL(...);
-```
-
-Useful for specifying sequences of expectations; see the
-[`After`](#EXPECT_CALL.After) clause of `EXPECT_CALL`.
diff --git a/3rdparty/googletest-1.13.0/docs/reference/testing.md b/3rdparty/googletest-1.13.0/docs/reference/testing.md
deleted file mode 100644
index 62cdcc1c6555542856533412608e2173e7ea9a0d..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/docs/reference/testing.md
+++ /dev/null
@@ -1,1431 +0,0 @@
-# Testing Reference
-
-<!--* toc_depth: 3 *-->
-
-This page lists the facilities provided by GoogleTest for writing test programs.
-To use them, include the header `gtest/gtest.h`.
-
-## Macros
-
-GoogleTest defines the following macros for writing tests.
-
-### TEST {#TEST}
-
-<pre>
-TEST(<em>TestSuiteName</em>, <em>TestName</em>) {
-  ... <em>statements</em> ...
-}
-</pre>
-
-Defines an individual test named *`TestName`* in the test suite
-*`TestSuiteName`*, consisting of the given statements.
-
-Both arguments *`TestSuiteName`* and *`TestName`* must be valid C++ identifiers
-and must not contain underscores (`_`). Tests in different test suites can have
-the same individual name.
-
-The statements within the test body can be any code under test.
-[Assertions](assertions.md) used within the test body determine the outcome of
-the test.
-
-### TEST_F {#TEST_F}
-
-<pre>
-TEST_F(<em>TestFixtureName</em>, <em>TestName</em>) {
-  ... <em>statements</em> ...
-}
-</pre>
-
-Defines an individual test named *`TestName`* that uses the test fixture class
-*`TestFixtureName`*. The test suite name is *`TestFixtureName`*.
-
-Both arguments *`TestFixtureName`* and *`TestName`* must be valid C++
-identifiers and must not contain underscores (`_`). *`TestFixtureName`* must be
-the name of a test fixture class—see
-[Test Fixtures](../primer.md#same-data-multiple-tests).
-
-The statements within the test body can be any code under test.
-[Assertions](assertions.md) used within the test body determine the outcome of
-the test.
-
-### TEST_P {#TEST_P}
-
-<pre>
-TEST_P(<em>TestFixtureName</em>, <em>TestName</em>) {
-  ... <em>statements</em> ...
-}
-</pre>
-
-Defines an individual value-parameterized test named *`TestName`* that uses the
-test fixture class *`TestFixtureName`*. The test suite name is
-*`TestFixtureName`*.
-
-Both arguments *`TestFixtureName`* and *`TestName`* must be valid C++
-identifiers and must not contain underscores (`_`). *`TestFixtureName`* must be
-the name of a value-parameterized test fixture class—see
-[Value-Parameterized Tests](../advanced.md#value-parameterized-tests).
-
-The statements within the test body can be any code under test. Within the test
-body, the test parameter can be accessed with the `GetParam()` function (see
-[`WithParamInterface`](#WithParamInterface)). For example:
-
-```cpp
-TEST_P(MyTestSuite, DoesSomething) {
-  ...
-  EXPECT_TRUE(DoSomething(GetParam()));
-  ...
-}
-```
-
-[Assertions](assertions.md) used within the test body determine the outcome of
-the test.
-
-See also [`INSTANTIATE_TEST_SUITE_P`](#INSTANTIATE_TEST_SUITE_P).
-
-### INSTANTIATE_TEST_SUITE_P {#INSTANTIATE_TEST_SUITE_P}
-
-`INSTANTIATE_TEST_SUITE_P(`*`InstantiationName`*`,`*`TestSuiteName`*`,`*`param_generator`*`)`
-\
-`INSTANTIATE_TEST_SUITE_P(`*`InstantiationName`*`,`*`TestSuiteName`*`,`*`param_generator`*`,`*`name_generator`*`)`
-
-Instantiates the value-parameterized test suite *`TestSuiteName`* (defined with
-[`TEST_P`](#TEST_P)).
-
-The argument *`InstantiationName`* is a unique name for the instantiation of the
-test suite, to distinguish between multiple instantiations. In test output, the
-instantiation name is added as a prefix to the test suite name
-*`TestSuiteName`*.
-
-The argument *`param_generator`* is one of the following GoogleTest-provided
-functions that generate the test parameters, all defined in the `::testing`
-namespace:
-
-<span id="param-generators"></span>
-
-| Parameter Generator | Behavior                                             |
-| ------------------- | ---------------------------------------------------- |
-| `Range(begin, end [, step])` | Yields values `{begin, begin+step, begin+step+step, ...}`. The values do not include `end`. `step` defaults to 1. |
-| `Values(v1, v2, ..., vN)`    | Yields values `{v1, v2, ..., vN}`.          |
-| `ValuesIn(container)` or `ValuesIn(begin,end)` | Yields values from a C-style array, an STL-style container, or an iterator range `[begin, end)`. |
-| `Bool()`                     | Yields sequence `{false, true}`.            |
-| `Combine(g1, g2, ..., gN)`   | Yields as `std::tuple` *n*-tuples all combinations (Cartesian product) of the values generated by the given *n* generators `g1`, `g2`, ..., `gN`. |
-| `ConvertGenerator<T>(g)`     | Yields values generated by generator `g`, `static_cast` to `T`. |
-The optional last argument *`name_generator`* is a function or functor that
-generates custom test name suffixes based on the test parameters. The function
-must accept an argument of type
-[`TestParamInfo<class ParamType>`](#TestParamInfo) and return a `std::string`.
-The test name suffix can only contain alphanumeric characters and underscores.
-GoogleTest provides [`PrintToStringParamName`](#PrintToStringParamName), or a
-custom function can be used for more control:
-
-```cpp
-INSTANTIATE_TEST_SUITE_P(
-    MyInstantiation, MyTestSuite,
-    ::testing::Values(...),
-    [](const ::testing::TestParamInfo<MyTestSuite::ParamType>& info) {
-      // Can use info.param here to generate the test suffix
-      std::string name = ...
-      return name;
-    });
-```
-
-For more information, see
-[Value-Parameterized Tests](../advanced.md#value-parameterized-tests).
-
-See also
-[`GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST`](#GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST).
-
-### TYPED_TEST_SUITE {#TYPED_TEST_SUITE}
-
-`TYPED_TEST_SUITE(`*`TestFixtureName`*`,`*`Types`*`)`
-
-Defines a typed test suite based on the test fixture *`TestFixtureName`*. The
-test suite name is *`TestFixtureName`*.
-
-The argument *`TestFixtureName`* is a fixture class template, parameterized by a
-type, for example:
-
-```cpp
-template <typename T>
-class MyFixture : public ::testing::Test {
- public:
-  ...
-  using List = std::list<T>;
-  static T shared_;
-  T value_;
-};
-```
-
-The argument *`Types`* is a [`Types`](#Types) object representing the list of
-types to run the tests on, for example:
-
-```cpp
-using MyTypes = ::testing::Types<char, int, unsigned int>;
-TYPED_TEST_SUITE(MyFixture, MyTypes);
-```
-
-The type alias (`using` or `typedef`) is necessary for the `TYPED_TEST_SUITE`
-macro to parse correctly.
-
-See also [`TYPED_TEST`](#TYPED_TEST) and
-[Typed Tests](../advanced.md#typed-tests) for more information.
-
-### TYPED_TEST {#TYPED_TEST}
-
-<pre>
-TYPED_TEST(<em>TestSuiteName</em>, <em>TestName</em>) {
-  ... <em>statements</em> ...
-}
-</pre>
-
-Defines an individual typed test named *`TestName`* in the typed test suite
-*`TestSuiteName`*. The test suite must be defined with
-[`TYPED_TEST_SUITE`](#TYPED_TEST_SUITE).
-
-Within the test body, the special name `TypeParam` refers to the type parameter,
-and `TestFixture` refers to the fixture class. See the following example:
-
-```cpp
-TYPED_TEST(MyFixture, Example) {
-  // Inside a test, refer to the special name TypeParam to get the type
-  // parameter.  Since we are inside a derived class template, C++ requires
-  // us to visit the members of MyFixture via 'this'.
-  TypeParam n = this->value_;
-
-  // To visit static members of the fixture, add the 'TestFixture::'
-  // prefix.
-  n += TestFixture::shared_;
-
-  // To refer to typedefs in the fixture, add the 'typename TestFixture::'
-  // prefix. The 'typename' is required to satisfy the compiler.
-  typename TestFixture::List values;
-
-  values.push_back(n);
-  ...
-}
-```
-
-For more information, see [Typed Tests](../advanced.md#typed-tests).
-
-### TYPED_TEST_SUITE_P {#TYPED_TEST_SUITE_P}
-
-`TYPED_TEST_SUITE_P(`*`TestFixtureName`*`)`
-
-Defines a type-parameterized test suite based on the test fixture
-*`TestFixtureName`*. The test suite name is *`TestFixtureName`*.
-
-The argument *`TestFixtureName`* is a fixture class template, parameterized by a
-type. See [`TYPED_TEST_SUITE`](#TYPED_TEST_SUITE) for an example.
-
-See also [`TYPED_TEST_P`](#TYPED_TEST_P) and
-[Type-Parameterized Tests](../advanced.md#type-parameterized-tests) for more
-information.
-
-### TYPED_TEST_P {#TYPED_TEST_P}
-
-<pre>
-TYPED_TEST_P(<em>TestSuiteName</em>, <em>TestName</em>) {
-  ... <em>statements</em> ...
-}
-</pre>
-
-Defines an individual type-parameterized test named *`TestName`* in the
-type-parameterized test suite *`TestSuiteName`*. The test suite must be defined
-with [`TYPED_TEST_SUITE_P`](#TYPED_TEST_SUITE_P).
-
-Within the test body, the special name `TypeParam` refers to the type parameter,
-and `TestFixture` refers to the fixture class. See [`TYPED_TEST`](#TYPED_TEST)
-for an example.
-
-See also [`REGISTER_TYPED_TEST_SUITE_P`](#REGISTER_TYPED_TEST_SUITE_P) and
-[Type-Parameterized Tests](../advanced.md#type-parameterized-tests) for more
-information.
-
-### REGISTER_TYPED_TEST_SUITE_P {#REGISTER_TYPED_TEST_SUITE_P}
-
-`REGISTER_TYPED_TEST_SUITE_P(`*`TestSuiteName`*`,`*`TestNames...`*`)`
-
-Registers the type-parameterized tests *`TestNames...`* of the test suite
-*`TestSuiteName`*. The test suite and tests must be defined with
-[`TYPED_TEST_SUITE_P`](#TYPED_TEST_SUITE_P) and [`TYPED_TEST_P`](#TYPED_TEST_P).
-
-For example:
-
-```cpp
-// Define the test suite and tests.
-TYPED_TEST_SUITE_P(MyFixture);
-TYPED_TEST_P(MyFixture, HasPropertyA) { ... }
-TYPED_TEST_P(MyFixture, HasPropertyB) { ... }
-
-// Register the tests in the test suite.
-REGISTER_TYPED_TEST_SUITE_P(MyFixture, HasPropertyA, HasPropertyB);
-```
-
-See also [`INSTANTIATE_TYPED_TEST_SUITE_P`](#INSTANTIATE_TYPED_TEST_SUITE_P) and
-[Type-Parameterized Tests](../advanced.md#type-parameterized-tests) for more
-information.
-
-### INSTANTIATE_TYPED_TEST_SUITE_P {#INSTANTIATE_TYPED_TEST_SUITE_P}
-
-`INSTANTIATE_TYPED_TEST_SUITE_P(`*`InstantiationName`*`,`*`TestSuiteName`*`,`*`Types`*`)`
-
-Instantiates the type-parameterized test suite *`TestSuiteName`*. The test suite
-must be registered with
-[`REGISTER_TYPED_TEST_SUITE_P`](#REGISTER_TYPED_TEST_SUITE_P).
-
-The argument *`InstantiationName`* is a unique name for the instantiation of the
-test suite, to distinguish between multiple instantiations. In test output, the
-instantiation name is added as a prefix to the test suite name
-*`TestSuiteName`*.
-
-The argument *`Types`* is a [`Types`](#Types) object representing the list of
-types to run the tests on, for example:
-
-```cpp
-using MyTypes = ::testing::Types<char, int, unsigned int>;
-INSTANTIATE_TYPED_TEST_SUITE_P(MyInstantiation, MyFixture, MyTypes);
-```
-
-The type alias (`using` or `typedef`) is necessary for the
-`INSTANTIATE_TYPED_TEST_SUITE_P` macro to parse correctly.
-
-For more information, see
-[Type-Parameterized Tests](../advanced.md#type-parameterized-tests).
-
-### FRIEND_TEST {#FRIEND_TEST}
-
-`FRIEND_TEST(`*`TestSuiteName`*`,`*`TestName`*`)`
-
-Within a class body, declares an individual test as a friend of the class,
-enabling the test to access private class members.
-
-If the class is defined in a namespace, then in order to be friends of the
-class, test fixtures and tests must be defined in the exact same namespace,
-without inline or anonymous namespaces.
-
-For example, if the class definition looks like the following:
-
-```cpp
-namespace my_namespace {
-
-class MyClass {
-  friend class MyClassTest;
-  FRIEND_TEST(MyClassTest, HasPropertyA);
-  FRIEND_TEST(MyClassTest, HasPropertyB);
-  ... definition of class MyClass ...
-};
-
-}  // namespace my_namespace
-```
-
-Then the test code should look like:
-
-```cpp
-namespace my_namespace {
-
-class MyClassTest : public ::testing::Test {
-  ...
-};
-
-TEST_F(MyClassTest, HasPropertyA) { ... }
-TEST_F(MyClassTest, HasPropertyB) { ... }
-
-}  // namespace my_namespace
-```
-
-See [Testing Private Code](../advanced.md#testing-private-code) for more
-information.
-
-### SCOPED_TRACE {#SCOPED_TRACE}
-
-`SCOPED_TRACE(`*`message`*`)`
-
-Causes the current file name, line number, and the given message *`message`* to
-be added to the failure message for each assertion failure that occurs in the
-scope.
-
-For more information, see
-[Adding Traces to Assertions](../advanced.md#adding-traces-to-assertions).
-
-See also the [`ScopedTrace` class](#ScopedTrace).
-
-### GTEST_SKIP {#GTEST_SKIP}
-
-`GTEST_SKIP()`
-
-Prevents further test execution at runtime.
-
-Can be used in individual test cases or in the `SetUp()` methods of test
-environments or test fixtures (classes derived from the
-[`Environment`](#Environment) or [`Test`](#Test) classes). If used in a global
-test environment `SetUp()` method, it skips all tests in the test program. If
-used in a test fixture `SetUp()` method, it skips all tests in the corresponding
-test suite.
-
-Similar to assertions, `GTEST_SKIP` allows streaming a custom message into it.
-
-See [Skipping Test Execution](../advanced.md#skipping-test-execution) for more
-information.
-
-### GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST {#GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST}
-
-`GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(`*`TestSuiteName`*`)`
-
-Allows the value-parameterized test suite *`TestSuiteName`* to be
-uninstantiated.
-
-By default, every [`TEST_P`](#TEST_P) call without a corresponding
-[`INSTANTIATE_TEST_SUITE_P`](#INSTANTIATE_TEST_SUITE_P) call causes a failing
-test in the test suite `GoogleTestVerification`.
-`GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST` suppresses this failure for the
-given test suite.
-
-## Classes and types
-
-GoogleTest defines the following classes and types to help with writing tests.
-
-### AssertionResult {#AssertionResult}
-
-`::testing::AssertionResult`
-
-A class for indicating whether an assertion was successful.
-
-When the assertion wasn't successful, the `AssertionResult` object stores a
-non-empty failure message that can be retrieved with the object's `message()`
-method.
-
-To create an instance of this class, use one of the factory functions
-[`AssertionSuccess()`](#AssertionSuccess) or
-[`AssertionFailure()`](#AssertionFailure).
-
-### AssertionException {#AssertionException}
-
-`::testing::AssertionException`
-
-Exception which can be thrown from
-[`TestEventListener::OnTestPartResult`](#TestEventListener::OnTestPartResult).
-
-### EmptyTestEventListener {#EmptyTestEventListener}
-
-`::testing::EmptyTestEventListener`
-
-Provides an empty implementation of all methods in the
-[`TestEventListener`](#TestEventListener) interface, such that a subclass only
-needs to override the methods it cares about.
-
-### Environment {#Environment}
-
-`::testing::Environment`
-
-Represents a global test environment. See
-[Global Set-Up and Tear-Down](../advanced.md#global-set-up-and-tear-down).
-
-#### Protected Methods {#Environment-protected}
-
-##### SetUp {#Environment::SetUp}
-
-`virtual void Environment::SetUp()`
-
-Override this to define how to set up the environment.
-
-##### TearDown {#Environment::TearDown}
-
-`virtual void Environment::TearDown()`
-
-Override this to define how to tear down the environment.
-
-### ScopedTrace {#ScopedTrace}
-
-`::testing::ScopedTrace`
-
-An instance of this class causes a trace to be included in every test failure
-message generated by code in the scope of the lifetime of the `ScopedTrace`
-instance. The effect is undone with the destruction of the instance.
-
-The `ScopedTrace` constructor has the following form:
-
-```cpp
-template <typename T>
-ScopedTrace(const char* file, int line, const T& message)
-```
-
-Example usage:
-
-```cpp
-::testing::ScopedTrace trace("file.cc", 123, "message");
-```
-
-The resulting trace includes the given source file path and line number, and the
-given message. The `message` argument can be anything streamable to
-`std::ostream`.
-
-See also [`SCOPED_TRACE`](#SCOPED_TRACE).
-
-### Test {#Test}
-
-`::testing::Test`
-
-The abstract class that all tests inherit from. `Test` is not copyable.
-
-#### Public Methods {#Test-public}
-
-##### SetUpTestSuite {#Test::SetUpTestSuite}
-
-`static void Test::SetUpTestSuite()`
-
-Performs shared setup for all tests in the test suite. GoogleTest calls
-`SetUpTestSuite()` before running the first test in the test suite.
-
-##### TearDownTestSuite {#Test::TearDownTestSuite}
-
-`static void Test::TearDownTestSuite()`
-
-Performs shared teardown for all tests in the test suite. GoogleTest calls
-`TearDownTestSuite()` after running the last test in the test suite.
-
-##### HasFatalFailure {#Test::HasFatalFailure}
-
-`static bool Test::HasFatalFailure()`
-
-Returns true if and only if the current test has a fatal failure.
-
-##### HasNonfatalFailure {#Test::HasNonfatalFailure}
-
-`static bool Test::HasNonfatalFailure()`
-
-Returns true if and only if the current test has a nonfatal failure.
-
-##### HasFailure {#Test::HasFailure}
-
-`static bool Test::HasFailure()`
-
-Returns true if and only if the current test has any failure, either fatal or
-nonfatal.
-
-##### IsSkipped {#Test::IsSkipped}
-
-`static bool Test::IsSkipped()`
-
-Returns true if and only if the current test was skipped.
-
-##### RecordProperty {#Test::RecordProperty}
-
-`static void Test::RecordProperty(const std::string& key, const std::string&
-value)` \
-`static void Test::RecordProperty(const std::string& key, int value)`
-
-Logs a property for the current test, test suite, or entire invocation of the
-test program. Only the last value for a given key is logged.
-
-The key must be a valid XML attribute name, and cannot conflict with the ones
-already used by GoogleTest (`name`, `file`, `line`, `status`, `time`,
-`classname`, `type_param`, and `value_param`).
-
-`RecordProperty` is `public static` so it can be called from utility functions
-that are not members of the test fixture.
-
-Calls to `RecordProperty` made during the lifespan of the test (from the moment
-its constructor starts to the moment its destructor finishes) are output in XML
-as attributes of the `<testcase>` element. Properties recorded from a fixture's
-`SetUpTestSuite` or `TearDownTestSuite` methods are logged as attributes of the
-corresponding `<testsuite>` element. Calls to `RecordProperty` made in the
-global context (before or after invocation of `RUN_ALL_TESTS` or from the
-`SetUp`/`TearDown` methods of registered `Environment` objects) are output as
-attributes of the `<testsuites>` element.
-
-#### Protected Methods {#Test-protected}
-
-##### SetUp {#Test::SetUp}
-
-`virtual void Test::SetUp()`
-
-Override this to perform test fixture setup. GoogleTest calls `SetUp()` before
-running each individual test.
-
-##### TearDown {#Test::TearDown}
-
-`virtual void Test::TearDown()`
-
-Override this to perform test fixture teardown. GoogleTest calls `TearDown()`
-after running each individual test.
-
-### TestWithParam {#TestWithParam}
-
-`::testing::TestWithParam<T>`
-
-A convenience class which inherits from both [`Test`](#Test) and
-[`WithParamInterface<T>`](#WithParamInterface).
-
-### TestSuite {#TestSuite}
-
-Represents a test suite. `TestSuite` is not copyable.
-
-#### Public Methods {#TestSuite-public}
-
-##### name {#TestSuite::name}
-
-`const char* TestSuite::name() const`
-
-Gets the name of the test suite.
-
-##### type_param {#TestSuite::type_param}
-
-`const char* TestSuite::type_param() const`
-
-Returns the name of the parameter type, or `NULL` if this is not a typed or
-type-parameterized test suite. See [Typed Tests](../advanced.md#typed-tests) and
-[Type-Parameterized Tests](../advanced.md#type-parameterized-tests).
-
-##### should_run {#TestSuite::should_run}
-
-`bool TestSuite::should_run() const`
-
-Returns true if any test in this test suite should run.
-
-##### successful_test_count {#TestSuite::successful_test_count}
-
-`int TestSuite::successful_test_count() const`
-
-Gets the number of successful tests in this test suite.
-
-##### skipped_test_count {#TestSuite::skipped_test_count}
-
-`int TestSuite::skipped_test_count() const`
-
-Gets the number of skipped tests in this test suite.
-
-##### failed_test_count {#TestSuite::failed_test_count}
-
-`int TestSuite::failed_test_count() const`
-
-Gets the number of failed tests in this test suite.
-
-##### reportable_disabled_test_count {#TestSuite::reportable_disabled_test_count}
-
-`int TestSuite::reportable_disabled_test_count() const`
-
-Gets the number of disabled tests that will be reported in the XML report.
-
-##### disabled_test_count {#TestSuite::disabled_test_count}
-
-`int TestSuite::disabled_test_count() const`
-
-Gets the number of disabled tests in this test suite.
-
-##### reportable_test_count {#TestSuite::reportable_test_count}
-
-`int TestSuite::reportable_test_count() const`
-
-Gets the number of tests to be printed in the XML report.
-
-##### test_to_run_count {#TestSuite::test_to_run_count}
-
-`int TestSuite::test_to_run_count() const`
-
-Get the number of tests in this test suite that should run.
-
-##### total_test_count {#TestSuite::total_test_count}
-
-`int TestSuite::total_test_count() const`
-
-Gets the number of all tests in this test suite.
-
-##### Passed {#TestSuite::Passed}
-
-`bool TestSuite::Passed() const`
-
-Returns true if and only if the test suite passed.
-
-##### Failed {#TestSuite::Failed}
-
-`bool TestSuite::Failed() const`
-
-Returns true if and only if the test suite failed.
-
-##### elapsed_time {#TestSuite::elapsed_time}
-
-`TimeInMillis TestSuite::elapsed_time() const`
-
-Returns the elapsed time, in milliseconds.
-
-##### start_timestamp {#TestSuite::start_timestamp}
-
-`TimeInMillis TestSuite::start_timestamp() const`
-
-Gets the time of the test suite start, in ms from the start of the UNIX epoch.
-
-##### GetTestInfo {#TestSuite::GetTestInfo}
-
-`const TestInfo* TestSuite::GetTestInfo(int i) const`
-
-Returns the [`TestInfo`](#TestInfo) for the `i`-th test among all the tests. `i`
-can range from 0 to `total_test_count() - 1`. If `i` is not in that range,
-returns `NULL`.
-
-##### ad_hoc_test_result {#TestSuite::ad_hoc_test_result}
-
-`const TestResult& TestSuite::ad_hoc_test_result() const`
-
-Returns the [`TestResult`](#TestResult) that holds test properties recorded
-during execution of `SetUpTestSuite` and `TearDownTestSuite`.
-
-### TestInfo {#TestInfo}
-
-`::testing::TestInfo`
-
-Stores information about a test.
-
-#### Public Methods {#TestInfo-public}
-
-##### test_suite_name {#TestInfo::test_suite_name}
-
-`const char* TestInfo::test_suite_name() const`
-
-Returns the test suite name.
-
-##### name {#TestInfo::name}
-
-`const char* TestInfo::name() const`
-
-Returns the test name.
-
-##### type_param {#TestInfo::type_param}
-
-`const char* TestInfo::type_param() const`
-
-Returns the name of the parameter type, or `NULL` if this is not a typed or
-type-parameterized test. See [Typed Tests](../advanced.md#typed-tests) and
-[Type-Parameterized Tests](../advanced.md#type-parameterized-tests).
-
-##### value_param {#TestInfo::value_param}
-
-`const char* TestInfo::value_param() const`
-
-Returns the text representation of the value parameter, or `NULL` if this is not
-a value-parameterized test. See
-[Value-Parameterized Tests](../advanced.md#value-parameterized-tests).
-
-##### file {#TestInfo::file}
-
-`const char* TestInfo::file() const`
-
-Returns the file name where this test is defined.
-
-##### line {#TestInfo::line}
-
-`int TestInfo::line() const`
-
-Returns the line where this test is defined.
-
-##### is_in_another_shard {#TestInfo::is_in_another_shard}
-
-`bool TestInfo::is_in_another_shard() const`
-
-Returns true if this test should not be run because it's in another shard.
-
-##### should_run {#TestInfo::should_run}
-
-`bool TestInfo::should_run() const`
-
-Returns true if this test should run, that is if the test is not disabled (or it
-is disabled but the `also_run_disabled_tests` flag has been specified) and its
-full name matches the user-specified filter.
-
-GoogleTest allows the user to filter the tests by their full names. Only the
-tests that match the filter will run. See
-[Running a Subset of the Tests](../advanced.md#running-a-subset-of-the-tests)
-for more information.
-
-##### is_reportable {#TestInfo::is_reportable}
-
-`bool TestInfo::is_reportable() const`
-
-Returns true if and only if this test will appear in the XML report.
-
-##### result {#TestInfo::result}
-
-`const TestResult* TestInfo::result() const`
-
-Returns the result of the test. See [`TestResult`](#TestResult).
-
-### TestParamInfo {#TestParamInfo}
-
-`::testing::TestParamInfo<T>`
-
-Describes a parameter to a value-parameterized test. The type `T` is the type of
-the parameter.
-
-Contains the fields `param` and `index` which hold the value of the parameter
-and its integer index respectively.
-
-### UnitTest {#UnitTest}
-
-`::testing::UnitTest`
-
-This class contains information about the test program.
-
-`UnitTest` is a singleton class. The only instance is created when
-`UnitTest::GetInstance()` is first called. This instance is never deleted.
-
-`UnitTest` is not copyable.
-
-#### Public Methods {#UnitTest-public}
-
-##### GetInstance {#UnitTest::GetInstance}
-
-`static UnitTest* UnitTest::GetInstance()`
-
-Gets the singleton `UnitTest` object. The first time this method is called, a
-`UnitTest` object is constructed and returned. Consecutive calls will return the
-same object.
-
-##### original_working_dir {#UnitTest::original_working_dir}
-
-`const char* UnitTest::original_working_dir() const`
-
-Returns the working directory when the first [`TEST()`](#TEST) or
-[`TEST_F()`](#TEST_F) was executed. The `UnitTest` object owns the string.
-
-##### current_test_suite {#UnitTest::current_test_suite}
-
-`const TestSuite* UnitTest::current_test_suite() const`
-
-Returns the [`TestSuite`](#TestSuite) object for the test that's currently
-running, or `NULL` if no test is running.
-
-##### current_test_info {#UnitTest::current_test_info}
-
-`const TestInfo* UnitTest::current_test_info() const`
-
-Returns the [`TestInfo`](#TestInfo) object for the test that's currently
-running, or `NULL` if no test is running.
-
-##### random_seed {#UnitTest::random_seed}
-
-`int UnitTest::random_seed() const`
-
-Returns the random seed used at the start of the current test run.
-
-##### successful_test_suite_count {#UnitTest::successful_test_suite_count}
-
-`int UnitTest::successful_test_suite_count() const`
-
-Gets the number of successful test suites.
-
-##### failed_test_suite_count {#UnitTest::failed_test_suite_count}
-
-`int UnitTest::failed_test_suite_count() const`
-
-Gets the number of failed test suites.
-
-##### total_test_suite_count {#UnitTest::total_test_suite_count}
-
-`int UnitTest::total_test_suite_count() const`
-
-Gets the number of all test suites.
-
-##### test_suite_to_run_count {#UnitTest::test_suite_to_run_count}
-
-`int UnitTest::test_suite_to_run_count() const`
-
-Gets the number of all test suites that contain at least one test that should
-run.
-
-##### successful_test_count {#UnitTest::successful_test_count}
-
-`int UnitTest::successful_test_count() const`
-
-Gets the number of successful tests.
-
-##### skipped_test_count {#UnitTest::skipped_test_count}
-
-`int UnitTest::skipped_test_count() const`
-
-Gets the number of skipped tests.
-
-##### failed_test_count {#UnitTest::failed_test_count}
-
-`int UnitTest::failed_test_count() const`
-
-Gets the number of failed tests.
-
-##### reportable_disabled_test_count {#UnitTest::reportable_disabled_test_count}
-
-`int UnitTest::reportable_disabled_test_count() const`
-
-Gets the number of disabled tests that will be reported in the XML report.
-
-##### disabled_test_count {#UnitTest::disabled_test_count}
-
-`int UnitTest::disabled_test_count() const`
-
-Gets the number of disabled tests.
-
-##### reportable_test_count {#UnitTest::reportable_test_count}
-
-`int UnitTest::reportable_test_count() const`
-
-Gets the number of tests to be printed in the XML report.
-
-##### total_test_count {#UnitTest::total_test_count}
-
-`int UnitTest::total_test_count() const`
-
-Gets the number of all tests.
-
-##### test_to_run_count {#UnitTest::test_to_run_count}
-
-`int UnitTest::test_to_run_count() const`
-
-Gets the number of tests that should run.
-
-##### start_timestamp {#UnitTest::start_timestamp}
-
-`TimeInMillis UnitTest::start_timestamp() const`
-
-Gets the time of the test program start, in ms from the start of the UNIX epoch.
-
-##### elapsed_time {#UnitTest::elapsed_time}
-
-`TimeInMillis UnitTest::elapsed_time() const`
-
-Gets the elapsed time, in milliseconds.
-
-##### Passed {#UnitTest::Passed}
-
-`bool UnitTest::Passed() const`
-
-Returns true if and only if the unit test passed (i.e. all test suites passed).
-
-##### Failed {#UnitTest::Failed}
-
-`bool UnitTest::Failed() const`
-
-Returns true if and only if the unit test failed (i.e. some test suite failed or
-something outside of all tests failed).
-
-##### GetTestSuite {#UnitTest::GetTestSuite}
-
-`const TestSuite* UnitTest::GetTestSuite(int i) const`
-
-Gets the [`TestSuite`](#TestSuite) object for the `i`-th test suite among all
-the test suites. `i` can range from 0 to `total_test_suite_count() - 1`. If `i`
-is not in that range, returns `NULL`.
-
-##### ad_hoc_test_result {#UnitTest::ad_hoc_test_result}
-
-`const TestResult& UnitTest::ad_hoc_test_result() const`
-
-Returns the [`TestResult`](#TestResult) containing information on test failures
-and properties logged outside of individual test suites.
-
-##### listeners {#UnitTest::listeners}
-
-`TestEventListeners& UnitTest::listeners()`
-
-Returns the list of event listeners that can be used to track events inside
-GoogleTest. See [`TestEventListeners`](#TestEventListeners).
-
-### TestEventListener {#TestEventListener}
-
-`::testing::TestEventListener`
-
-The interface for tracing execution of tests. The methods below are listed in
-the order the corresponding events are fired.
-
-#### Public Methods {#TestEventListener-public}
-
-##### OnTestProgramStart {#TestEventListener::OnTestProgramStart}
-
-`virtual void TestEventListener::OnTestProgramStart(const UnitTest& unit_test)`
-
-Fired before any test activity starts.
-
-##### OnTestIterationStart {#TestEventListener::OnTestIterationStart}
-
-`virtual void TestEventListener::OnTestIterationStart(const UnitTest& unit_test,
-int iteration)`
-
-Fired before each iteration of tests starts. There may be more than one
-iteration if `GTEST_FLAG(repeat)` is set. `iteration` is the iteration index,
-starting from 0.
-
-##### OnEnvironmentsSetUpStart {#TestEventListener::OnEnvironmentsSetUpStart}
-
-`virtual void TestEventListener::OnEnvironmentsSetUpStart(const UnitTest&
-unit_test)`
-
-Fired before environment set-up for each iteration of tests starts.
-
-##### OnEnvironmentsSetUpEnd {#TestEventListener::OnEnvironmentsSetUpEnd}
-
-`virtual void TestEventListener::OnEnvironmentsSetUpEnd(const UnitTest&
-unit_test)`
-
-Fired after environment set-up for each iteration of tests ends.
-
-##### OnTestSuiteStart {#TestEventListener::OnTestSuiteStart}
-
-`virtual void TestEventListener::OnTestSuiteStart(const TestSuite& test_suite)`
-
-Fired before the test suite starts.
-
-##### OnTestStart {#TestEventListener::OnTestStart}
-
-`virtual void TestEventListener::OnTestStart(const TestInfo& test_info)`
-
-Fired before the test starts.
-
-##### OnTestPartResult {#TestEventListener::OnTestPartResult}
-
-`virtual void TestEventListener::OnTestPartResult(const TestPartResult&
-test_part_result)`
-
-Fired after a failed assertion or a `SUCCEED()` invocation. If you want to throw
-an exception from this function to skip to the next test, it must be an
-[`AssertionException`](#AssertionException) or inherited from it.
-
-##### OnTestEnd {#TestEventListener::OnTestEnd}
-
-`virtual void TestEventListener::OnTestEnd(const TestInfo& test_info)`
-
-Fired after the test ends.
-
-##### OnTestSuiteEnd {#TestEventListener::OnTestSuiteEnd}
-
-`virtual void TestEventListener::OnTestSuiteEnd(const TestSuite& test_suite)`
-
-Fired after the test suite ends.
-
-##### OnEnvironmentsTearDownStart {#TestEventListener::OnEnvironmentsTearDownStart}
-
-`virtual void TestEventListener::OnEnvironmentsTearDownStart(const UnitTest&
-unit_test)`
-
-Fired before environment tear-down for each iteration of tests starts.
-
-##### OnEnvironmentsTearDownEnd {#TestEventListener::OnEnvironmentsTearDownEnd}
-
-`virtual void TestEventListener::OnEnvironmentsTearDownEnd(const UnitTest&
-unit_test)`
-
-Fired after environment tear-down for each iteration of tests ends.
-
-##### OnTestIterationEnd {#TestEventListener::OnTestIterationEnd}
-
-`virtual void TestEventListener::OnTestIterationEnd(const UnitTest& unit_test,
-int iteration)`
-
-Fired after each iteration of tests finishes.
-
-##### OnTestProgramEnd {#TestEventListener::OnTestProgramEnd}
-
-`virtual void TestEventListener::OnTestProgramEnd(const UnitTest& unit_test)`
-
-Fired after all test activities have ended.
-
-### TestEventListeners {#TestEventListeners}
-
-`::testing::TestEventListeners`
-
-Lets users add listeners to track events in GoogleTest.
-
-#### Public Methods {#TestEventListeners-public}
-
-##### Append {#TestEventListeners::Append}
-
-`void TestEventListeners::Append(TestEventListener* listener)`
-
-Appends an event listener to the end of the list. GoogleTest assumes ownership
-of the listener (i.e. it will delete the listener when the test program
-finishes).
-
-##### Release {#TestEventListeners::Release}
-
-`TestEventListener* TestEventListeners::Release(TestEventListener* listener)`
-
-Removes the given event listener from the list and returns it. It then becomes
-the caller's responsibility to delete the listener. Returns `NULL` if the
-listener is not found in the list.
-
-##### default_result_printer {#TestEventListeners::default_result_printer}
-
-`TestEventListener* TestEventListeners::default_result_printer() const`
-
-Returns the standard listener responsible for the default console output. Can be
-removed from the listeners list to shut down default console output. Note that
-removing this object from the listener list with
-[`Release()`](#TestEventListeners::Release) transfers its ownership to the
-caller and makes this function return `NULL` the next time.
-
-##### default_xml_generator {#TestEventListeners::default_xml_generator}
-
-`TestEventListener* TestEventListeners::default_xml_generator() const`
-
-Returns the standard listener responsible for the default XML output controlled
-by the `--gtest_output=xml` flag. Can be removed from the listeners list by
-users who want to shut down the default XML output controlled by this flag and
-substitute it with custom one. Note that removing this object from the listener
-list with [`Release()`](#TestEventListeners::Release) transfers its ownership to
-the caller and makes this function return `NULL` the next time.
-
-### TestPartResult {#TestPartResult}
-
-`::testing::TestPartResult`
-
-A copyable object representing the result of a test part (i.e. an assertion or
-an explicit `FAIL()`, `ADD_FAILURE()`, or `SUCCESS()`).
-
-#### Public Methods {#TestPartResult-public}
-
-##### type {#TestPartResult::type}
-
-`Type TestPartResult::type() const`
-
-Gets the outcome of the test part.
-
-The return type `Type` is an enum defined as follows:
-
-```cpp
-enum Type {
-  kSuccess,          // Succeeded.
-  kNonFatalFailure,  // Failed but the test can continue.
-  kFatalFailure,     // Failed and the test should be terminated.
-  kSkip              // Skipped.
-};
-```
-
-##### file_name {#TestPartResult::file_name}
-
-`const char* TestPartResult::file_name() const`
-
-Gets the name of the source file where the test part took place, or `NULL` if
-it's unknown.
-
-##### line_number {#TestPartResult::line_number}
-
-`int TestPartResult::line_number() const`
-
-Gets the line in the source file where the test part took place, or `-1` if it's
-unknown.
-
-##### summary {#TestPartResult::summary}
-
-`const char* TestPartResult::summary() const`
-
-Gets the summary of the failure message.
-
-##### message {#TestPartResult::message}
-
-`const char* TestPartResult::message() const`
-
-Gets the message associated with the test part.
-
-##### skipped {#TestPartResult::skipped}
-
-`bool TestPartResult::skipped() const`
-
-Returns true if and only if the test part was skipped.
-
-##### passed {#TestPartResult::passed}
-
-`bool TestPartResult::passed() const`
-
-Returns true if and only if the test part passed.
-
-##### nonfatally_failed {#TestPartResult::nonfatally_failed}
-
-`bool TestPartResult::nonfatally_failed() const`
-
-Returns true if and only if the test part non-fatally failed.
-
-##### fatally_failed {#TestPartResult::fatally_failed}
-
-`bool TestPartResult::fatally_failed() const`
-
-Returns true if and only if the test part fatally failed.
-
-##### failed {#TestPartResult::failed}
-
-`bool TestPartResult::failed() const`
-
-Returns true if and only if the test part failed.
-
-### TestProperty {#TestProperty}
-
-`::testing::TestProperty`
-
-A copyable object representing a user-specified test property which can be
-output as a key/value string pair.
-
-#### Public Methods {#TestProperty-public}
-
-##### key {#key}
-
-`const char* key() const`
-
-Gets the user-supplied key.
-
-##### value {#value}
-
-`const char* value() const`
-
-Gets the user-supplied value.
-
-##### SetValue {#SetValue}
-
-`void SetValue(const std::string& new_value)`
-
-Sets a new value, overriding the previous one.
-
-### TestResult {#TestResult}
-
-`::testing::TestResult`
-
-Contains information about the result of a single test.
-
-`TestResult` is not copyable.
-
-#### Public Methods {#TestResult-public}
-
-##### total_part_count {#TestResult::total_part_count}
-
-`int TestResult::total_part_count() const`
-
-Gets the number of all test parts. This is the sum of the number of successful
-test parts and the number of failed test parts.
-
-##### test_property_count {#TestResult::test_property_count}
-
-`int TestResult::test_property_count() const`
-
-Returns the number of test properties.
-
-##### Passed {#TestResult::Passed}
-
-`bool TestResult::Passed() const`
-
-Returns true if and only if the test passed (i.e. no test part failed).
-
-##### Skipped {#TestResult::Skipped}
-
-`bool TestResult::Skipped() const`
-
-Returns true if and only if the test was skipped.
-
-##### Failed {#TestResult::Failed}
-
-`bool TestResult::Failed() const`
-
-Returns true if and only if the test failed.
-
-##### HasFatalFailure {#TestResult::HasFatalFailure}
-
-`bool TestResult::HasFatalFailure() const`
-
-Returns true if and only if the test fatally failed.
-
-##### HasNonfatalFailure {#TestResult::HasNonfatalFailure}
-
-`bool TestResult::HasNonfatalFailure() const`
-
-Returns true if and only if the test has a non-fatal failure.
-
-##### elapsed_time {#TestResult::elapsed_time}
-
-`TimeInMillis TestResult::elapsed_time() const`
-
-Returns the elapsed time, in milliseconds.
-
-##### start_timestamp {#TestResult::start_timestamp}
-
-`TimeInMillis TestResult::start_timestamp() const`
-
-Gets the time of the test case start, in ms from the start of the UNIX epoch.
-
-##### GetTestPartResult {#TestResult::GetTestPartResult}
-
-`const TestPartResult& TestResult::GetTestPartResult(int i) const`
-
-Returns the [`TestPartResult`](#TestPartResult) for the `i`-th test part result
-among all the results. `i` can range from 0 to `total_part_count() - 1`. If `i`
-is not in that range, aborts the program.
-
-##### GetTestProperty {#TestResult::GetTestProperty}
-
-`const TestProperty& TestResult::GetTestProperty(int i) const`
-
-Returns the [`TestProperty`](#TestProperty) object for the `i`-th test property.
-`i` can range from 0 to `test_property_count() - 1`. If `i` is not in that
-range, aborts the program.
-
-### TimeInMillis {#TimeInMillis}
-
-`::testing::TimeInMillis`
-
-An integer type representing time in milliseconds.
-
-### Types {#Types}
-
-`::testing::Types<T...>`
-
-Represents a list of types for use in typed tests and type-parameterized tests.
-
-The template argument `T...` can be any number of types, for example:
-
-```
-::testing::Types<char, int, unsigned int>
-```
-
-See [Typed Tests](../advanced.md#typed-tests) and
-[Type-Parameterized Tests](../advanced.md#type-parameterized-tests) for more
-information.
-
-### WithParamInterface {#WithParamInterface}
-
-`::testing::WithParamInterface<T>`
-
-The pure interface class that all value-parameterized tests inherit from.
-
-A value-parameterized test fixture class must inherit from both [`Test`](#Test)
-and `WithParamInterface`. In most cases that just means inheriting from
-[`TestWithParam`](#TestWithParam), but more complicated test hierarchies may
-need to inherit from `Test` and `WithParamInterface` at different levels.
-
-This interface defines the type alias `ParamType` for the parameter type `T` and
-has support for accessing the test parameter value via the `GetParam()` method:
-
-```
-static const ParamType& GetParam()
-```
-
-For more information, see
-[Value-Parameterized Tests](../advanced.md#value-parameterized-tests).
-
-## Functions
-
-GoogleTest defines the following functions to help with writing and running
-tests.
-
-### InitGoogleTest {#InitGoogleTest}
-
-`void ::testing::InitGoogleTest(int* argc, char** argv)` \
-`void ::testing::InitGoogleTest(int* argc, wchar_t** argv)` \
-`void ::testing::InitGoogleTest()`
-
-Initializes GoogleTest. This must be called before calling
-[`RUN_ALL_TESTS()`](#RUN_ALL_TESTS). In particular, it parses the command line
-for the flags that GoogleTest recognizes. Whenever a GoogleTest flag is seen, it
-is removed from `argv`, and `*argc` is decremented.
-
-No value is returned. Instead, the GoogleTest flag variables are updated.
-
-The `InitGoogleTest(int* argc, wchar_t** argv)` overload can be used in Windows
-programs compiled in `UNICODE` mode.
-
-The argument-less `InitGoogleTest()` overload can be used on Arduino/embedded
-platforms where there is no `argc`/`argv`.
-
-### AddGlobalTestEnvironment {#AddGlobalTestEnvironment}
-
-`Environment* ::testing::AddGlobalTestEnvironment(Environment* env)`
-
-Adds a test environment to the test program. Must be called before
-[`RUN_ALL_TESTS()`](#RUN_ALL_TESTS) is called. See
-[Global Set-Up and Tear-Down](../advanced.md#global-set-up-and-tear-down) for
-more information.
-
-See also [`Environment`](#Environment).
-
-### RegisterTest {#RegisterTest}
-
-```cpp
-template <typename Factory>
-TestInfo* ::testing::RegisterTest(const char* test_suite_name, const char* test_name,
-                                  const char* type_param, const char* value_param,
-                                  const char* file, int line, Factory factory)
-```
-
-Dynamically registers a test with the framework.
-
-The `factory` argument is a factory callable (move-constructible) object or
-function pointer that creates a new instance of the `Test` object. It handles
-ownership to the caller. The signature of the callable is `Fixture*()`, where
-`Fixture` is the test fixture class for the test. All tests registered with the
-same `test_suite_name` must return the same fixture type. This is checked at
-runtime.
-
-The framework will infer the fixture class from the factory and will call the
-`SetUpTestSuite` and `TearDownTestSuite` methods for it.
-
-Must be called before [`RUN_ALL_TESTS()`](#RUN_ALL_TESTS) is invoked, otherwise
-behavior is undefined.
-
-See
-[Registering tests programmatically](../advanced.md#registering-tests-programmatically)
-for more information.
-
-### RUN_ALL_TESTS {#RUN_ALL_TESTS}
-
-`int RUN_ALL_TESTS()`
-
-Use this function in `main()` to run all tests. It returns `0` if all tests are
-successful, or `1` otherwise.
-
-`RUN_ALL_TESTS()` should be invoked after the command line has been parsed by
-[`InitGoogleTest()`](#InitGoogleTest).
-
-This function was formerly a macro; thus, it is in the global namespace and has
-an all-caps name.
-
-### AssertionSuccess {#AssertionSuccess}
-
-`AssertionResult ::testing::AssertionSuccess()`
-
-Creates a successful assertion result. See
-[`AssertionResult`](#AssertionResult).
-
-### AssertionFailure {#AssertionFailure}
-
-`AssertionResult ::testing::AssertionFailure()`
-
-Creates a failed assertion result. Use the `<<` operator to store a failure
-message:
-
-```cpp
-::testing::AssertionFailure() << "My failure message";
-```
-
-See [`AssertionResult`](#AssertionResult).
-
-### StaticAssertTypeEq {#StaticAssertTypeEq}
-
-`::testing::StaticAssertTypeEq<T1, T2>()`
-
-Compile-time assertion for type equality. Compiles if and only if `T1` and `T2`
-are the same type. The value it returns is irrelevant.
-
-See [Type Assertions](../advanced.md#type-assertions) for more information.
-
-### PrintToString {#PrintToString}
-
-`std::string ::testing::PrintToString(x)`
-
-Prints any value `x` using GoogleTest's value printer.
-
-See
-[Teaching GoogleTest How to Print Your Values](../advanced.md#teaching-googletest-how-to-print-your-values)
-for more information.
-
-### PrintToStringParamName {#PrintToStringParamName}
-
-`std::string ::testing::PrintToStringParamName(TestParamInfo<T>& info)`
-
-A built-in parameterized test name generator which returns the result of
-[`PrintToString`](#PrintToString) called on `info.param`. Does not work when the
-test parameter is a `std::string` or C string. See
-[Specifying Names for Value-Parameterized Test Parameters](../advanced.md#specifying-names-for-value-parameterized-test-parameters)
-for more information.
-
-See also [`TestParamInfo`](#TestParamInfo) and
-[`INSTANTIATE_TEST_SUITE_P`](#INSTANTIATE_TEST_SUITE_P).
diff --git a/3rdparty/googletest-1.13.0/docs/samples.md b/3rdparty/googletest-1.13.0/docs/samples.md
deleted file mode 100644
index dedc59098df5ae6a318bae5992694dc11b6daf62..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/docs/samples.md
+++ /dev/null
@@ -1,22 +0,0 @@
-# Googletest Samples
-
-If you're like us, you'd like to look at
-[googletest samples.](https://github.com/google/googletest/blob/main/googletest/samples)
-The sample directory has a number of well-commented samples showing how to use a
-variety of googletest features.
-
-*   Sample #1 shows the basic steps of using googletest to test C++ functions.
-*   Sample #2 shows a more complex unit test for a class with multiple member
-    functions.
-*   Sample #3 uses a test fixture.
-*   Sample #4 teaches you how to use googletest and `googletest.h` together to
-    get the best of both libraries.
-*   Sample #5 puts shared testing logic in a base test fixture, and reuses it in
-    derived fixtures.
-*   Sample #6 demonstrates type-parameterized tests.
-*   Sample #7 teaches the basics of value-parameterized tests.
-*   Sample #8 shows using `Combine()` in value-parameterized tests.
-*   Sample #9 shows use of the listener API to modify Google Test's console
-    output and the use of its reflection API to inspect test results.
-*   Sample #10 shows use of the listener API to implement a primitive memory
-    leak checker.
diff --git a/3rdparty/googletest-1.13.0/googlemock/README.md b/3rdparty/googletest-1.13.0/googlemock/README.md
deleted file mode 100644
index 7da60655dba8b8e91ec66a9a65f97139af03ee9b..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googlemock/README.md
+++ /dev/null
@@ -1,40 +0,0 @@
-# Googletest Mocking (gMock) Framework
-
-### Overview
-
-Google's framework for writing and using C++ mock classes. It can help you
-derive better designs of your system and write better tests.
-
-It is inspired by:
-
-*   [jMock](http://www.jmock.org/)
-*   [EasyMock](http://www.easymock.org/)
-*   [Hamcrest](http://code.google.com/p/hamcrest/)
-
-It is designed with C++'s specifics in mind.
-
-gMock:
-
--   Provides a declarative syntax for defining mocks.
--   Can define partial (hybrid) mocks, which are a cross of real and mock
-    objects.
--   Handles functions of arbitrary types and overloaded functions.
--   Comes with a rich set of matchers for validating function arguments.
--   Uses an intuitive syntax for controlling the behavior of a mock.
--   Does automatic verification of expectations (no record-and-replay needed).
--   Allows arbitrary (partial) ordering constraints on function calls to be
-    expressed.
--   Lets a user extend it by defining new matchers and actions.
--   Does not use exceptions.
--   Is easy to learn and use.
-
-Details and examples can be found here:
-
-*   [gMock for Dummies](https://google.github.io/googletest/gmock_for_dummies.html)
-*   [Legacy gMock FAQ](https://google.github.io/googletest/gmock_faq.html)
-*   [gMock Cookbook](https://google.github.io/googletest/gmock_cook_book.html)
-*   [gMock Cheat Sheet](https://google.github.io/googletest/gmock_cheat_sheet.html)
-
-GoogleMock is a part of
-[GoogleTest C++ testing framework](http://github.com/google/googletest/) and a
-subject to the same requirements.
diff --git a/3rdparty/googletest-1.13.0/googlemock/cmake/gmock.pc.in b/3rdparty/googletest-1.13.0/googlemock/cmake/gmock.pc.in
deleted file mode 100644
index 23c67b5c88db4add6d21403b8ecbaf1be5a88813..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googlemock/cmake/gmock.pc.in
+++ /dev/null
@@ -1,10 +0,0 @@
-libdir=@CMAKE_INSTALL_FULL_LIBDIR@
-includedir=@CMAKE_INSTALL_FULL_INCLUDEDIR@
-
-Name: gmock
-Description: GoogleMock (without main() function)
-Version: @PROJECT_VERSION@
-URL: https://github.com/google/googletest
-Requires: gtest = @PROJECT_VERSION@
-Libs: -L${libdir} -lgmock @CMAKE_THREAD_LIBS_INIT@
-Cflags: -I${includedir} @GTEST_HAS_PTHREAD_MACRO@
diff --git a/3rdparty/googletest-1.13.0/googlemock/cmake/gmock_main.pc.in b/3rdparty/googletest-1.13.0/googlemock/cmake/gmock_main.pc.in
deleted file mode 100644
index 66ffea7f4431f606c5ca5d87bef505157658244d..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googlemock/cmake/gmock_main.pc.in
+++ /dev/null
@@ -1,10 +0,0 @@
-libdir=@CMAKE_INSTALL_FULL_LIBDIR@
-includedir=@CMAKE_INSTALL_FULL_INCLUDEDIR@
-
-Name: gmock_main
-Description: GoogleMock (with main() function)
-Version: @PROJECT_VERSION@
-URL: https://github.com/google/googletest
-Requires: gmock = @PROJECT_VERSION@
-Libs: -L${libdir} -lgmock_main @CMAKE_THREAD_LIBS_INIT@
-Cflags: -I${includedir} @GTEST_HAS_PTHREAD_MACRO@
diff --git a/3rdparty/googletest-1.13.0/googlemock/docs/README.md b/3rdparty/googletest-1.13.0/googlemock/docs/README.md
deleted file mode 100644
index 1bc57b799cce933c034c31859594ca1b87689aef..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googlemock/docs/README.md
+++ /dev/null
@@ -1,4 +0,0 @@
-# Content Moved
-
-We are working on updates to the GoogleTest documentation, which has moved to
-the top-level [docs](../../docs) directory.
diff --git a/3rdparty/googletest-1.13.0/googlemock/include/gmock/gmock-actions.h b/3rdparty/googletest-1.13.0/googlemock/include/gmock/gmock-actions.h
deleted file mode 100644
index aad07d51cc1a26f0b55d12633c374b45b2eff49d..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googlemock/include/gmock/gmock-actions.h
+++ /dev/null
@@ -1,2302 +0,0 @@
-// Copyright 2007, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// Google Mock - a framework for writing C++ mock classes.
-//
-// The ACTION* family of macros can be used in a namespace scope to
-// define custom actions easily.  The syntax:
-//
-//   ACTION(name) { statements; }
-//
-// will define an action with the given name that executes the
-// statements.  The value returned by the statements will be used as
-// the return value of the action.  Inside the statements, you can
-// refer to the K-th (0-based) argument of the mock function by
-// 'argK', and refer to its type by 'argK_type'.  For example:
-//
-//   ACTION(IncrementArg1) {
-//     arg1_type temp = arg1;
-//     return ++(*temp);
-//   }
-//
-// allows you to write
-//
-//   ...WillOnce(IncrementArg1());
-//
-// You can also refer to the entire argument tuple and its type by
-// 'args' and 'args_type', and refer to the mock function type and its
-// return type by 'function_type' and 'return_type'.
-//
-// Note that you don't need to specify the types of the mock function
-// arguments.  However rest assured that your code is still type-safe:
-// you'll get a compiler error if *arg1 doesn't support the ++
-// operator, or if the type of ++(*arg1) isn't compatible with the
-// mock function's return type, for example.
-//
-// Sometimes you'll want to parameterize the action.   For that you can use
-// another macro:
-//
-//   ACTION_P(name, param_name) { statements; }
-//
-// For example:
-//
-//   ACTION_P(Add, n) { return arg0 + n; }
-//
-// will allow you to write:
-//
-//   ...WillOnce(Add(5));
-//
-// Note that you don't need to provide the type of the parameter
-// either.  If you need to reference the type of a parameter named
-// 'foo', you can write 'foo_type'.  For example, in the body of
-// ACTION_P(Add, n) above, you can write 'n_type' to refer to the type
-// of 'n'.
-//
-// We also provide ACTION_P2, ACTION_P3, ..., up to ACTION_P10 to support
-// multi-parameter actions.
-//
-// For the purpose of typing, you can view
-//
-//   ACTION_Pk(Foo, p1, ..., pk) { ... }
-//
-// as shorthand for
-//
-//   template <typename p1_type, ..., typename pk_type>
-//   FooActionPk<p1_type, ..., pk_type> Foo(p1_type p1, ..., pk_type pk) { ... }
-//
-// In particular, you can provide the template type arguments
-// explicitly when invoking Foo(), as in Foo<long, bool>(5, false);
-// although usually you can rely on the compiler to infer the types
-// for you automatically.  You can assign the result of expression
-// Foo(p1, ..., pk) to a variable of type FooActionPk<p1_type, ...,
-// pk_type>.  This can be useful when composing actions.
-//
-// You can also overload actions with different numbers of parameters:
-//
-//   ACTION_P(Plus, a) { ... }
-//   ACTION_P2(Plus, a, b) { ... }
-//
-// While it's tempting to always use the ACTION* macros when defining
-// a new action, you should also consider implementing ActionInterface
-// or using MakePolymorphicAction() instead, especially if you need to
-// use the action a lot.  While these approaches require more work,
-// they give you more control on the types of the mock function
-// arguments and the action parameters, which in general leads to
-// better compiler error messages that pay off in the long run.  They
-// also allow overloading actions based on parameter types (as opposed
-// to just based on the number of parameters).
-//
-// CAVEAT:
-//
-// ACTION*() can only be used in a namespace scope as templates cannot be
-// declared inside of a local class.
-// Users can, however, define any local functors (e.g. a lambda) that
-// can be used as actions.
-//
-// MORE INFORMATION:
-//
-// To learn more about using these macros, please search for 'ACTION' on
-// https://github.com/google/googletest/blob/main/docs/gmock_cook_book.md
-
-// IWYU pragma: private, include "gmock/gmock.h"
-// IWYU pragma: friend gmock/.*
-
-#ifndef GOOGLEMOCK_INCLUDE_GMOCK_GMOCK_ACTIONS_H_
-#define GOOGLEMOCK_INCLUDE_GMOCK_GMOCK_ACTIONS_H_
-
-#ifndef _WIN32_WCE
-#include <errno.h>
-#endif
-
-#include <algorithm>
-#include <functional>
-#include <memory>
-#include <string>
-#include <tuple>
-#include <type_traits>
-#include <utility>
-
-#include "gmock/internal/gmock-internal-utils.h"
-#include "gmock/internal/gmock-port.h"
-#include "gmock/internal/gmock-pp.h"
-
-#ifdef _MSC_VER
-#pragma warning(push)
-#pragma warning(disable : 4100)
-#endif
-
-namespace testing {
-
-// To implement an action Foo, define:
-//   1. a class FooAction that implements the ActionInterface interface, and
-//   2. a factory function that creates an Action object from a
-//      const FooAction*.
-//
-// The two-level delegation design follows that of Matcher, providing
-// consistency for extension developers.  It also eases ownership
-// management as Action objects can now be copied like plain values.
-
-namespace internal {
-
-// BuiltInDefaultValueGetter<T, true>::Get() returns a
-// default-constructed T value.  BuiltInDefaultValueGetter<T,
-// false>::Get() crashes with an error.
-//
-// This primary template is used when kDefaultConstructible is true.
-template <typename T, bool kDefaultConstructible>
-struct BuiltInDefaultValueGetter {
-  static T Get() { return T(); }
-};
-template <typename T>
-struct BuiltInDefaultValueGetter<T, false> {
-  static T Get() {
-    Assert(false, __FILE__, __LINE__,
-           "Default action undefined for the function return type.");
-    return internal::Invalid<T>();
-    // The above statement will never be reached, but is required in
-    // order for this function to compile.
-  }
-};
-
-// BuiltInDefaultValue<T>::Get() returns the "built-in" default value
-// for type T, which is NULL when T is a raw pointer type, 0 when T is
-// a numeric type, false when T is bool, or "" when T is string or
-// std::string.  In addition, in C++11 and above, it turns a
-// default-constructed T value if T is default constructible.  For any
-// other type T, the built-in default T value is undefined, and the
-// function will abort the process.
-template <typename T>
-class BuiltInDefaultValue {
- public:
-  // This function returns true if and only if type T has a built-in default
-  // value.
-  static bool Exists() { return ::std::is_default_constructible<T>::value; }
-
-  static T Get() {
-    return BuiltInDefaultValueGetter<
-        T, ::std::is_default_constructible<T>::value>::Get();
-  }
-};
-
-// This partial specialization says that we use the same built-in
-// default value for T and const T.
-template <typename T>
-class BuiltInDefaultValue<const T> {
- public:
-  static bool Exists() { return BuiltInDefaultValue<T>::Exists(); }
-  static T Get() { return BuiltInDefaultValue<T>::Get(); }
-};
-
-// This partial specialization defines the default values for pointer
-// types.
-template <typename T>
-class BuiltInDefaultValue<T*> {
- public:
-  static bool Exists() { return true; }
-  static T* Get() { return nullptr; }
-};
-
-// The following specializations define the default values for
-// specific types we care about.
-#define GMOCK_DEFINE_DEFAULT_ACTION_FOR_RETURN_TYPE_(type, value) \
-  template <>                                                     \
-  class BuiltInDefaultValue<type> {                               \
-   public:                                                        \
-    static bool Exists() { return true; }                         \
-    static type Get() { return value; }                           \
-  }
-
-GMOCK_DEFINE_DEFAULT_ACTION_FOR_RETURN_TYPE_(void, );  // NOLINT
-GMOCK_DEFINE_DEFAULT_ACTION_FOR_RETURN_TYPE_(::std::string, "");
-GMOCK_DEFINE_DEFAULT_ACTION_FOR_RETURN_TYPE_(bool, false);
-GMOCK_DEFINE_DEFAULT_ACTION_FOR_RETURN_TYPE_(unsigned char, '\0');
-GMOCK_DEFINE_DEFAULT_ACTION_FOR_RETURN_TYPE_(signed char, '\0');
-GMOCK_DEFINE_DEFAULT_ACTION_FOR_RETURN_TYPE_(char, '\0');
-
-// There's no need for a default action for signed wchar_t, as that
-// type is the same as wchar_t for gcc, and invalid for MSVC.
-//
-// There's also no need for a default action for unsigned wchar_t, as
-// that type is the same as unsigned int for gcc, and invalid for
-// MSVC.
-#if GMOCK_WCHAR_T_IS_NATIVE_
-GMOCK_DEFINE_DEFAULT_ACTION_FOR_RETURN_TYPE_(wchar_t, 0U);  // NOLINT
-#endif
-
-GMOCK_DEFINE_DEFAULT_ACTION_FOR_RETURN_TYPE_(unsigned short, 0U);  // NOLINT
-GMOCK_DEFINE_DEFAULT_ACTION_FOR_RETURN_TYPE_(signed short, 0);     // NOLINT
-GMOCK_DEFINE_DEFAULT_ACTION_FOR_RETURN_TYPE_(unsigned int, 0U);
-GMOCK_DEFINE_DEFAULT_ACTION_FOR_RETURN_TYPE_(signed int, 0);
-GMOCK_DEFINE_DEFAULT_ACTION_FOR_RETURN_TYPE_(unsigned long, 0UL);     // NOLINT
-GMOCK_DEFINE_DEFAULT_ACTION_FOR_RETURN_TYPE_(signed long, 0L);        // NOLINT
-GMOCK_DEFINE_DEFAULT_ACTION_FOR_RETURN_TYPE_(unsigned long long, 0);  // NOLINT
-GMOCK_DEFINE_DEFAULT_ACTION_FOR_RETURN_TYPE_(signed long long, 0);    // NOLINT
-GMOCK_DEFINE_DEFAULT_ACTION_FOR_RETURN_TYPE_(float, 0);
-GMOCK_DEFINE_DEFAULT_ACTION_FOR_RETURN_TYPE_(double, 0);
-
-#undef GMOCK_DEFINE_DEFAULT_ACTION_FOR_RETURN_TYPE_
-
-// Partial implementations of metaprogramming types from the standard library
-// not available in C++11.
-
-template <typename P>
-struct negation
-    // NOLINTNEXTLINE
-    : std::integral_constant<bool, bool(!P::value)> {};
-
-// Base case: with zero predicates the answer is always true.
-template <typename...>
-struct conjunction : std::true_type {};
-
-// With a single predicate, the answer is that predicate.
-template <typename P1>
-struct conjunction<P1> : P1 {};
-
-// With multiple predicates the answer is the first predicate if that is false,
-// and we recurse otherwise.
-template <typename P1, typename... Ps>
-struct conjunction<P1, Ps...>
-    : std::conditional<bool(P1::value), conjunction<Ps...>, P1>::type {};
-
-template <typename...>
-struct disjunction : std::false_type {};
-
-template <typename P1>
-struct disjunction<P1> : P1 {};
-
-template <typename P1, typename... Ps>
-struct disjunction<P1, Ps...>
-    // NOLINTNEXTLINE
-    : std::conditional<!bool(P1::value), disjunction<Ps...>, P1>::type {};
-
-template <typename...>
-using void_t = void;
-
-// Detects whether an expression of type `From` can be implicitly converted to
-// `To` according to [conv]. In C++17, [conv]/3 defines this as follows:
-//
-//     An expression e can be implicitly converted to a type T if and only if
-//     the declaration T t=e; is well-formed, for some invented temporary
-//     variable t ([dcl.init]).
-//
-// [conv]/2 implies we can use function argument passing to detect whether this
-// initialization is valid.
-//
-// Note that this is distinct from is_convertible, which requires this be valid:
-//
-//     To test() {
-//       return declval<From>();
-//     }
-//
-// In particular, is_convertible doesn't give the correct answer when `To` and
-// `From` are the same non-moveable type since `declval<From>` will be an rvalue
-// reference, defeating the guaranteed copy elision that would otherwise make
-// this function work.
-//
-// REQUIRES: `From` is not cv void.
-template <typename From, typename To>
-struct is_implicitly_convertible {
- private:
-  // A function that accepts a parameter of type T. This can be called with type
-  // U successfully only if U is implicitly convertible to T.
-  template <typename T>
-  static void Accept(T);
-
-  // A function that creates a value of type T.
-  template <typename T>
-  static T Make();
-
-  // An overload be selected when implicit conversion from T to To is possible.
-  template <typename T, typename = decltype(Accept<To>(Make<T>()))>
-  static std::true_type TestImplicitConversion(int);
-
-  // A fallback overload selected in all other cases.
-  template <typename T>
-  static std::false_type TestImplicitConversion(...);
-
- public:
-  using type = decltype(TestImplicitConversion<From>(0));
-  static constexpr bool value = type::value;
-};
-
-// Like std::invoke_result_t from C++17, but works only for objects with call
-// operators (not e.g. member function pointers, which we don't need specific
-// support for in OnceAction because std::function deals with them).
-template <typename F, typename... Args>
-using call_result_t = decltype(std::declval<F>()(std::declval<Args>()...));
-
-template <typename Void, typename R, typename F, typename... Args>
-struct is_callable_r_impl : std::false_type {};
-
-// Specialize the struct for those template arguments where call_result_t is
-// well-formed. When it's not, the generic template above is chosen, resulting
-// in std::false_type.
-template <typename R, typename F, typename... Args>
-struct is_callable_r_impl<void_t<call_result_t<F, Args...>>, R, F, Args...>
-    : std::conditional<
-          std::is_void<R>::value,  //
-          std::true_type,          //
-          is_implicitly_convertible<call_result_t<F, Args...>, R>>::type {};
-
-// Like std::is_invocable_r from C++17, but works only for objects with call
-// operators. See the note on call_result_t.
-template <typename R, typename F, typename... Args>
-using is_callable_r = is_callable_r_impl<void, R, F, Args...>;
-
-// Like std::as_const from C++17.
-template <typename T>
-typename std::add_const<T>::type& as_const(T& t) {
-  return t;
-}
-
-}  // namespace internal
-
-// Specialized for function types below.
-template <typename F>
-class OnceAction;
-
-// An action that can only be used once.
-//
-// This is accepted by WillOnce, which doesn't require the underlying action to
-// be copy-constructible (only move-constructible), and promises to invoke it as
-// an rvalue reference. This allows the action to work with move-only types like
-// std::move_only_function in a type-safe manner.
-//
-// For example:
-//
-//     // Assume we have some API that needs to accept a unique pointer to some
-//     // non-copyable object Foo.
-//     void AcceptUniquePointer(std::unique_ptr<Foo> foo);
-//
-//     // We can define an action that provides a Foo to that API. Because It
-//     // has to give away its unique pointer, it must not be called more than
-//     // once, so its call operator is &&-qualified.
-//     struct ProvideFoo {
-//       std::unique_ptr<Foo> foo;
-//
-//       void operator()() && {
-//         AcceptUniquePointer(std::move(Foo));
-//       }
-//     };
-//
-//     // This action can be used with WillOnce.
-//     EXPECT_CALL(mock, Call)
-//         .WillOnce(ProvideFoo{std::make_unique<Foo>(...)});
-//
-//     // But a call to WillRepeatedly will fail to compile. This is correct,
-//     // since the action cannot correctly be used repeatedly.
-//     EXPECT_CALL(mock, Call)
-//         .WillRepeatedly(ProvideFoo{std::make_unique<Foo>(...)});
-//
-// A less-contrived example would be an action that returns an arbitrary type,
-// whose &&-qualified call operator is capable of dealing with move-only types.
-template <typename Result, typename... Args>
-class OnceAction<Result(Args...)> final {
- private:
-  // True iff we can use the given callable type (or lvalue reference) directly
-  // via StdFunctionAdaptor.
-  template <typename Callable>
-  using IsDirectlyCompatible = internal::conjunction<
-      // It must be possible to capture the callable in StdFunctionAdaptor.
-      std::is_constructible<typename std::decay<Callable>::type, Callable>,
-      // The callable must be compatible with our signature.
-      internal::is_callable_r<Result, typename std::decay<Callable>::type,
-                              Args...>>;
-
-  // True iff we can use the given callable type via StdFunctionAdaptor once we
-  // ignore incoming arguments.
-  template <typename Callable>
-  using IsCompatibleAfterIgnoringArguments = internal::conjunction<
-      // It must be possible to capture the callable in a lambda.
-      std::is_constructible<typename std::decay<Callable>::type, Callable>,
-      // The callable must be invocable with zero arguments, returning something
-      // convertible to Result.
-      internal::is_callable_r<Result, typename std::decay<Callable>::type>>;
-
- public:
-  // Construct from a callable that is directly compatible with our mocked
-  // signature: it accepts our function type's arguments and returns something
-  // convertible to our result type.
-  template <typename Callable,
-            typename std::enable_if<
-                internal::conjunction<
-                    // Teach clang on macOS that we're not talking about a
-                    // copy/move constructor here. Otherwise it gets confused
-                    // when checking the is_constructible requirement of our
-                    // traits above.
-                    internal::negation<std::is_same<
-                        OnceAction, typename std::decay<Callable>::type>>,
-                    IsDirectlyCompatible<Callable>>  //
-                ::value,
-                int>::type = 0>
-  OnceAction(Callable&& callable)  // NOLINT
-      : function_(StdFunctionAdaptor<typename std::decay<Callable>::type>(
-            {}, std::forward<Callable>(callable))) {}
-
-  // As above, but for a callable that ignores the mocked function's arguments.
-  template <typename Callable,
-            typename std::enable_if<
-                internal::conjunction<
-                    // Teach clang on macOS that we're not talking about a
-                    // copy/move constructor here. Otherwise it gets confused
-                    // when checking the is_constructible requirement of our
-                    // traits above.
-                    internal::negation<std::is_same<
-                        OnceAction, typename std::decay<Callable>::type>>,
-                    // Exclude callables for which the overload above works.
-                    // We'd rather provide the arguments if possible.
-                    internal::negation<IsDirectlyCompatible<Callable>>,
-                    IsCompatibleAfterIgnoringArguments<Callable>>::value,
-                int>::type = 0>
-  OnceAction(Callable&& callable)  // NOLINT
-                                   // Call the constructor above with a callable
-                                   // that ignores the input arguments.
-      : OnceAction(IgnoreIncomingArguments<typename std::decay<Callable>::type>{
-            std::forward<Callable>(callable)}) {}
-
-  // We are naturally copyable because we store only an std::function, but
-  // semantically we should not be copyable.
-  OnceAction(const OnceAction&) = delete;
-  OnceAction& operator=(const OnceAction&) = delete;
-  OnceAction(OnceAction&&) = default;
-
-  // Invoke the underlying action callable with which we were constructed,
-  // handing it the supplied arguments.
-  Result Call(Args... args) && {
-    return function_(std::forward<Args>(args)...);
-  }
-
- private:
-  // An adaptor that wraps a callable that is compatible with our signature and
-  // being invoked as an rvalue reference so that it can be used as an
-  // StdFunctionAdaptor. This throws away type safety, but that's fine because
-  // this is only used by WillOnce, which we know calls at most once.
-  //
-  // Once we have something like std::move_only_function from C++23, we can do
-  // away with this.
-  template <typename Callable>
-  class StdFunctionAdaptor final {
-   public:
-    // A tag indicating that the (otherwise universal) constructor is accepting
-    // the callable itself, instead of e.g. stealing calls for the move
-    // constructor.
-    struct CallableTag final {};
-
-    template <typename F>
-    explicit StdFunctionAdaptor(CallableTag, F&& callable)
-        : callable_(std::make_shared<Callable>(std::forward<F>(callable))) {}
-
-    // Rather than explicitly returning Result, we return whatever the wrapped
-    // callable returns. This allows for compatibility with existing uses like
-    // the following, when the mocked function returns void:
-    //
-    //     EXPECT_CALL(mock_fn_, Call)
-    //         .WillOnce([&] {
-    //            [...]
-    //            return 0;
-    //         });
-    //
-    // Such a callable can be turned into std::function<void()>. If we use an
-    // explicit return type of Result here then it *doesn't* work with
-    // std::function, because we'll get a "void function should not return a
-    // value" error.
-    //
-    // We need not worry about incompatible result types because the SFINAE on
-    // OnceAction already checks this for us. std::is_invocable_r_v itself makes
-    // the same allowance for void result types.
-    template <typename... ArgRefs>
-    internal::call_result_t<Callable, ArgRefs...> operator()(
-        ArgRefs&&... args) const {
-      return std::move(*callable_)(std::forward<ArgRefs>(args)...);
-    }
-
-   private:
-    // We must put the callable on the heap so that we are copyable, which
-    // std::function needs.
-    std::shared_ptr<Callable> callable_;
-  };
-
-  // An adaptor that makes a callable that accepts zero arguments callable with
-  // our mocked arguments.
-  template <typename Callable>
-  struct IgnoreIncomingArguments {
-    internal::call_result_t<Callable> operator()(Args&&...) {
-      return std::move(callable)();
-    }
-
-    Callable callable;
-  };
-
-  std::function<Result(Args...)> function_;
-};
-
-// When an unexpected function call is encountered, Google Mock will
-// let it return a default value if the user has specified one for its
-// return type, or if the return type has a built-in default value;
-// otherwise Google Mock won't know what value to return and will have
-// to abort the process.
-//
-// The DefaultValue<T> class allows a user to specify the
-// default value for a type T that is both copyable and publicly
-// destructible (i.e. anything that can be used as a function return
-// type).  The usage is:
-//
-//   // Sets the default value for type T to be foo.
-//   DefaultValue<T>::Set(foo);
-template <typename T>
-class DefaultValue {
- public:
-  // Sets the default value for type T; requires T to be
-  // copy-constructable and have a public destructor.
-  static void Set(T x) {
-    delete producer_;
-    producer_ = new FixedValueProducer(x);
-  }
-
-  // Provides a factory function to be called to generate the default value.
-  // This method can be used even if T is only move-constructible, but it is not
-  // limited to that case.
-  typedef T (*FactoryFunction)();
-  static void SetFactory(FactoryFunction factory) {
-    delete producer_;
-    producer_ = new FactoryValueProducer(factory);
-  }
-
-  // Unsets the default value for type T.
-  static void Clear() {
-    delete producer_;
-    producer_ = nullptr;
-  }
-
-  // Returns true if and only if the user has set the default value for type T.
-  static bool IsSet() { return producer_ != nullptr; }
-
-  // Returns true if T has a default return value set by the user or there
-  // exists a built-in default value.
-  static bool Exists() {
-    return IsSet() || internal::BuiltInDefaultValue<T>::Exists();
-  }
-
-  // Returns the default value for type T if the user has set one;
-  // otherwise returns the built-in default value. Requires that Exists()
-  // is true, which ensures that the return value is well-defined.
-  static T Get() {
-    return producer_ == nullptr ? internal::BuiltInDefaultValue<T>::Get()
-                                : producer_->Produce();
-  }
-
- private:
-  class ValueProducer {
-   public:
-    virtual ~ValueProducer() {}
-    virtual T Produce() = 0;
-  };
-
-  class FixedValueProducer : public ValueProducer {
-   public:
-    explicit FixedValueProducer(T value) : value_(value) {}
-    T Produce() override { return value_; }
-
-   private:
-    const T value_;
-    FixedValueProducer(const FixedValueProducer&) = delete;
-    FixedValueProducer& operator=(const FixedValueProducer&) = delete;
-  };
-
-  class FactoryValueProducer : public ValueProducer {
-   public:
-    explicit FactoryValueProducer(FactoryFunction factory)
-        : factory_(factory) {}
-    T Produce() override { return factory_(); }
-
-   private:
-    const FactoryFunction factory_;
-    FactoryValueProducer(const FactoryValueProducer&) = delete;
-    FactoryValueProducer& operator=(const FactoryValueProducer&) = delete;
-  };
-
-  static ValueProducer* producer_;
-};
-
-// This partial specialization allows a user to set default values for
-// reference types.
-template <typename T>
-class DefaultValue<T&> {
- public:
-  // Sets the default value for type T&.
-  static void Set(T& x) {  // NOLINT
-    address_ = &x;
-  }
-
-  // Unsets the default value for type T&.
-  static void Clear() { address_ = nullptr; }
-
-  // Returns true if and only if the user has set the default value for type T&.
-  static bool IsSet() { return address_ != nullptr; }
-
-  // Returns true if T has a default return value set by the user or there
-  // exists a built-in default value.
-  static bool Exists() {
-    return IsSet() || internal::BuiltInDefaultValue<T&>::Exists();
-  }
-
-  // Returns the default value for type T& if the user has set one;
-  // otherwise returns the built-in default value if there is one;
-  // otherwise aborts the process.
-  static T& Get() {
-    return address_ == nullptr ? internal::BuiltInDefaultValue<T&>::Get()
-                               : *address_;
-  }
-
- private:
-  static T* address_;
-};
-
-// This specialization allows DefaultValue<void>::Get() to
-// compile.
-template <>
-class DefaultValue<void> {
- public:
-  static bool Exists() { return true; }
-  static void Get() {}
-};
-
-// Points to the user-set default value for type T.
-template <typename T>
-typename DefaultValue<T>::ValueProducer* DefaultValue<T>::producer_ = nullptr;
-
-// Points to the user-set default value for type T&.
-template <typename T>
-T* DefaultValue<T&>::address_ = nullptr;
-
-// Implement this interface to define an action for function type F.
-template <typename F>
-class ActionInterface {
- public:
-  typedef typename internal::Function<F>::Result Result;
-  typedef typename internal::Function<F>::ArgumentTuple ArgumentTuple;
-
-  ActionInterface() {}
-  virtual ~ActionInterface() {}
-
-  // Performs the action.  This method is not const, as in general an
-  // action can have side effects and be stateful.  For example, a
-  // get-the-next-element-from-the-collection action will need to
-  // remember the current element.
-  virtual Result Perform(const ArgumentTuple& args) = 0;
-
- private:
-  ActionInterface(const ActionInterface&) = delete;
-  ActionInterface& operator=(const ActionInterface&) = delete;
-};
-
-template <typename F>
-class Action;
-
-// An Action<R(Args...)> is a copyable and IMMUTABLE (except by assignment)
-// object that represents an action to be taken when a mock function of type
-// R(Args...) is called. The implementation of Action<T> is just a
-// std::shared_ptr to const ActionInterface<T>. Don't inherit from Action! You
-// can view an object implementing ActionInterface<F> as a concrete action
-// (including its current state), and an Action<F> object as a handle to it.
-template <typename R, typename... Args>
-class Action<R(Args...)> {
- private:
-  using F = R(Args...);
-
-  // Adapter class to allow constructing Action from a legacy ActionInterface.
-  // New code should create Actions from functors instead.
-  struct ActionAdapter {
-    // Adapter must be copyable to satisfy std::function requirements.
-    ::std::shared_ptr<ActionInterface<F>> impl_;
-
-    template <typename... InArgs>
-    typename internal::Function<F>::Result operator()(InArgs&&... args) {
-      return impl_->Perform(
-          ::std::forward_as_tuple(::std::forward<InArgs>(args)...));
-    }
-  };
-
-  template <typename G>
-  using IsCompatibleFunctor = std::is_constructible<std::function<F>, G>;
-
- public:
-  typedef typename internal::Function<F>::Result Result;
-  typedef typename internal::Function<F>::ArgumentTuple ArgumentTuple;
-
-  // Constructs a null Action.  Needed for storing Action objects in
-  // STL containers.
-  Action() {}
-
-  // Construct an Action from a specified callable.
-  // This cannot take std::function directly, because then Action would not be
-  // directly constructible from lambda (it would require two conversions).
-  template <
-      typename G,
-      typename = typename std::enable_if<internal::disjunction<
-          IsCompatibleFunctor<G>, std::is_constructible<std::function<Result()>,
-                                                        G>>::value>::type>
-  Action(G&& fun) {  // NOLINT
-    Init(::std::forward<G>(fun), IsCompatibleFunctor<G>());
-  }
-
-  // Constructs an Action from its implementation.
-  explicit Action(ActionInterface<F>* impl)
-      : fun_(ActionAdapter{::std::shared_ptr<ActionInterface<F>>(impl)}) {}
-
-  // This constructor allows us to turn an Action<Func> object into an
-  // Action<F>, as long as F's arguments can be implicitly converted
-  // to Func's and Func's return type can be implicitly converted to F's.
-  template <typename Func>
-  Action(const Action<Func>& action)  // NOLINT
-      : fun_(action.fun_) {}
-
-  // Returns true if and only if this is the DoDefault() action.
-  bool IsDoDefault() const { return fun_ == nullptr; }
-
-  // Performs the action.  Note that this method is const even though
-  // the corresponding method in ActionInterface is not.  The reason
-  // is that a const Action<F> means that it cannot be re-bound to
-  // another concrete action, not that the concrete action it binds to
-  // cannot change state.  (Think of the difference between a const
-  // pointer and a pointer to const.)
-  Result Perform(ArgumentTuple args) const {
-    if (IsDoDefault()) {
-      internal::IllegalDoDefault(__FILE__, __LINE__);
-    }
-    return internal::Apply(fun_, ::std::move(args));
-  }
-
-  // An action can be used as a OnceAction, since it's obviously safe to call it
-  // once.
-  operator OnceAction<F>() const {  // NOLINT
-    // Return a OnceAction-compatible callable that calls Perform with the
-    // arguments it is provided. We could instead just return fun_, but then
-    // we'd need to handle the IsDoDefault() case separately.
-    struct OA {
-      Action<F> action;
-
-      R operator()(Args... args) && {
-        return action.Perform(
-            std::forward_as_tuple(std::forward<Args>(args)...));
-      }
-    };
-
-    return OA{*this};
-  }
-
- private:
-  template <typename G>
-  friend class Action;
-
-  template <typename G>
-  void Init(G&& g, ::std::true_type) {
-    fun_ = ::std::forward<G>(g);
-  }
-
-  template <typename G>
-  void Init(G&& g, ::std::false_type) {
-    fun_ = IgnoreArgs<typename ::std::decay<G>::type>{::std::forward<G>(g)};
-  }
-
-  template <typename FunctionImpl>
-  struct IgnoreArgs {
-    template <typename... InArgs>
-    Result operator()(const InArgs&...) const {
-      return function_impl();
-    }
-
-    FunctionImpl function_impl;
-  };
-
-  // fun_ is an empty function if and only if this is the DoDefault() action.
-  ::std::function<F> fun_;
-};
-
-// The PolymorphicAction class template makes it easy to implement a
-// polymorphic action (i.e. an action that can be used in mock
-// functions of than one type, e.g. Return()).
-//
-// To define a polymorphic action, a user first provides a COPYABLE
-// implementation class that has a Perform() method template:
-//
-//   class FooAction {
-//    public:
-//     template <typename Result, typename ArgumentTuple>
-//     Result Perform(const ArgumentTuple& args) const {
-//       // Processes the arguments and returns a result, using
-//       // std::get<N>(args) to get the N-th (0-based) argument in the tuple.
-//     }
-//     ...
-//   };
-//
-// Then the user creates the polymorphic action using
-// MakePolymorphicAction(object) where object has type FooAction.  See
-// the definition of Return(void) and SetArgumentPointee<N>(value) for
-// complete examples.
-template <typename Impl>
-class PolymorphicAction {
- public:
-  explicit PolymorphicAction(const Impl& impl) : impl_(impl) {}
-
-  template <typename F>
-  operator Action<F>() const {
-    return Action<F>(new MonomorphicImpl<F>(impl_));
-  }
-
- private:
-  template <typename F>
-  class MonomorphicImpl : public ActionInterface<F> {
-   public:
-    typedef typename internal::Function<F>::Result Result;
-    typedef typename internal::Function<F>::ArgumentTuple ArgumentTuple;
-
-    explicit MonomorphicImpl(const Impl& impl) : impl_(impl) {}
-
-    Result Perform(const ArgumentTuple& args) override {
-      return impl_.template Perform<Result>(args);
-    }
-
-   private:
-    Impl impl_;
-  };
-
-  Impl impl_;
-};
-
-// Creates an Action from its implementation and returns it.  The
-// created Action object owns the implementation.
-template <typename F>
-Action<F> MakeAction(ActionInterface<F>* impl) {
-  return Action<F>(impl);
-}
-
-// Creates a polymorphic action from its implementation.  This is
-// easier to use than the PolymorphicAction<Impl> constructor as it
-// doesn't require you to explicitly write the template argument, e.g.
-//
-//   MakePolymorphicAction(foo);
-// vs
-//   PolymorphicAction<TypeOfFoo>(foo);
-template <typename Impl>
-inline PolymorphicAction<Impl> MakePolymorphicAction(const Impl& impl) {
-  return PolymorphicAction<Impl>(impl);
-}
-
-namespace internal {
-
-// Helper struct to specialize ReturnAction to execute a move instead of a copy
-// on return. Useful for move-only types, but could be used on any type.
-template <typename T>
-struct ByMoveWrapper {
-  explicit ByMoveWrapper(T value) : payload(std::move(value)) {}
-  T payload;
-};
-
-// The general implementation of Return(R). Specializations follow below.
-template <typename R>
-class ReturnAction final {
- public:
-  explicit ReturnAction(R value) : value_(std::move(value)) {}
-
-  template <typename U, typename... Args,
-            typename = typename std::enable_if<conjunction<
-                // See the requirements documented on Return.
-                negation<std::is_same<void, U>>,  //
-                negation<std::is_reference<U>>,   //
-                std::is_convertible<R, U>,        //
-                std::is_move_constructible<U>>::value>::type>
-  operator OnceAction<U(Args...)>() && {  // NOLINT
-    return Impl<U>(std::move(value_));
-  }
-
-  template <typename U, typename... Args,
-            typename = typename std::enable_if<conjunction<
-                // See the requirements documented on Return.
-                negation<std::is_same<void, U>>,   //
-                negation<std::is_reference<U>>,    //
-                std::is_convertible<const R&, U>,  //
-                std::is_copy_constructible<U>>::value>::type>
-  operator Action<U(Args...)>() const {  // NOLINT
-    return Impl<U>(value_);
-  }
-
- private:
-  // Implements the Return(x) action for a mock function that returns type U.
-  template <typename U>
-  class Impl final {
-   public:
-    // The constructor used when the return value is allowed to move from the
-    // input value (i.e. we are converting to OnceAction).
-    explicit Impl(R&& input_value)
-        : state_(new State(std::move(input_value))) {}
-
-    // The constructor used when the return value is not allowed to move from
-    // the input value (i.e. we are converting to Action).
-    explicit Impl(const R& input_value) : state_(new State(input_value)) {}
-
-    U operator()() && { return std::move(state_->value); }
-    U operator()() const& { return state_->value; }
-
-   private:
-    // We put our state on the heap so that the compiler-generated copy/move
-    // constructors work correctly even when U is a reference-like type. This is
-    // necessary only because we eagerly create State::value (see the note on
-    // that symbol for details). If we instead had only the input value as a
-    // member then the default constructors would work fine.
-    //
-    // For example, when R is std::string and U is std::string_view, value is a
-    // reference to the string backed by input_value. The copy constructor would
-    // copy both, so that we wind up with a new input_value object (with the
-    // same contents) and a reference to the *old* input_value object rather
-    // than the new one.
-    struct State {
-      explicit State(const R& input_value_in)
-          : input_value(input_value_in),
-            // Make an implicit conversion to Result before initializing the U
-            // object we store, avoiding calling any explicit constructor of U
-            // from R.
-            //
-            // This simulates the language rules: a function with return type U
-            // that does `return R()` requires R to be implicitly convertible to
-            // U, and uses that path for the conversion, even U Result has an
-            // explicit constructor from R.
-            value(ImplicitCast_<U>(internal::as_const(input_value))) {}
-
-      // As above, but for the case where we're moving from the ReturnAction
-      // object because it's being used as a OnceAction.
-      explicit State(R&& input_value_in)
-          : input_value(std::move(input_value_in)),
-            // For the same reason as above we make an implicit conversion to U
-            // before initializing the value.
-            //
-            // Unlike above we provide the input value as an rvalue to the
-            // implicit conversion because this is a OnceAction: it's fine if it
-            // wants to consume the input value.
-            value(ImplicitCast_<U>(std::move(input_value))) {}
-
-      // A copy of the value originally provided by the user. We retain this in
-      // addition to the value of the mock function's result type below in case
-      // the latter is a reference-like type. See the std::string_view example
-      // in the documentation on Return.
-      R input_value;
-
-      // The value we actually return, as the type returned by the mock function
-      // itself.
-      //
-      // We eagerly initialize this here, rather than lazily doing the implicit
-      // conversion automatically each time Perform is called, for historical
-      // reasons: in 2009-11, commit a070cbd91c (Google changelist 13540126)
-      // made the Action<U()> conversion operator eagerly convert the R value to
-      // U, but without keeping the R alive. This broke the use case discussed
-      // in the documentation for Return, making reference-like types such as
-      // std::string_view not safe to use as U where the input type R is a
-      // value-like type such as std::string.
-      //
-      // The example the commit gave was not very clear, nor was the issue
-      // thread (https://github.com/google/googlemock/issues/86), but it seems
-      // the worry was about reference-like input types R that flatten to a
-      // value-like type U when being implicitly converted. An example of this
-      // is std::vector<bool>::reference, which is often a proxy type with an
-      // reference to the underlying vector:
-      //
-      //     // Helper method: have the mock function return bools according
-      //     // to the supplied script.
-      //     void SetActions(MockFunction<bool(size_t)>& mock,
-      //                     const std::vector<bool>& script) {
-      //       for (size_t i = 0; i < script.size(); ++i) {
-      //         EXPECT_CALL(mock, Call(i)).WillOnce(Return(script[i]));
-      //       }
-      //     }
-      //
-      //     TEST(Foo, Bar) {
-      //       // Set actions using a temporary vector, whose operator[]
-      //       // returns proxy objects that references that will be
-      //       // dangling once the call to SetActions finishes and the
-      //       // vector is destroyed.
-      //       MockFunction<bool(size_t)> mock;
-      //       SetActions(mock, {false, true});
-      //
-      //       EXPECT_FALSE(mock.AsStdFunction()(0));
-      //       EXPECT_TRUE(mock.AsStdFunction()(1));
-      //     }
-      //
-      // This eager conversion helps with a simple case like this, but doesn't
-      // fully make these types work in general. For example the following still
-      // uses a dangling reference:
-      //
-      //     TEST(Foo, Baz) {
-      //       MockFunction<std::vector<std::string>()> mock;
-      //
-      //       // Return the same vector twice, and then the empty vector
-      //       // thereafter.
-      //       auto action = Return(std::initializer_list<std::string>{
-      //           "taco", "burrito",
-      //       });
-      //
-      //       EXPECT_CALL(mock, Call)
-      //           .WillOnce(action)
-      //           .WillOnce(action)
-      //           .WillRepeatedly(Return(std::vector<std::string>{}));
-      //
-      //       EXPECT_THAT(mock.AsStdFunction()(),
-      //                   ElementsAre("taco", "burrito"));
-      //       EXPECT_THAT(mock.AsStdFunction()(),
-      //                   ElementsAre("taco", "burrito"));
-      //       EXPECT_THAT(mock.AsStdFunction()(), IsEmpty());
-      //     }
-      //
-      U value;
-    };
-
-    const std::shared_ptr<State> state_;
-  };
-
-  R value_;
-};
-
-// A specialization of ReturnAction<R> when R is ByMoveWrapper<T> for some T.
-//
-// This version applies the type system-defeating hack of moving from T even in
-// the const call operator, checking at runtime that it isn't called more than
-// once, since the user has declared their intent to do so by using ByMove.
-template <typename T>
-class ReturnAction<ByMoveWrapper<T>> final {
- public:
-  explicit ReturnAction(ByMoveWrapper<T> wrapper)
-      : state_(new State(std::move(wrapper.payload))) {}
-
-  T operator()() const {
-    GTEST_CHECK_(!state_->called)
-        << "A ByMove() action must be performed at most once.";
-
-    state_->called = true;
-    return std::move(state_->value);
-  }
-
- private:
-  // We store our state on the heap so that we are copyable as required by
-  // Action, despite the fact that we are stateful and T may not be copyable.
-  struct State {
-    explicit State(T&& value_in) : value(std::move(value_in)) {}
-
-    T value;
-    bool called = false;
-  };
-
-  const std::shared_ptr<State> state_;
-};
-
-// Implements the ReturnNull() action.
-class ReturnNullAction {
- public:
-  // Allows ReturnNull() to be used in any pointer-returning function. In C++11
-  // this is enforced by returning nullptr, and in non-C++11 by asserting a
-  // pointer type on compile time.
-  template <typename Result, typename ArgumentTuple>
-  static Result Perform(const ArgumentTuple&) {
-    return nullptr;
-  }
-};
-
-// Implements the Return() action.
-class ReturnVoidAction {
- public:
-  // Allows Return() to be used in any void-returning function.
-  template <typename Result, typename ArgumentTuple>
-  static void Perform(const ArgumentTuple&) {
-    static_assert(std::is_void<Result>::value, "Result should be void.");
-  }
-};
-
-// Implements the polymorphic ReturnRef(x) action, which can be used
-// in any function that returns a reference to the type of x,
-// regardless of the argument types.
-template <typename T>
-class ReturnRefAction {
- public:
-  // Constructs a ReturnRefAction object from the reference to be returned.
-  explicit ReturnRefAction(T& ref) : ref_(ref) {}  // NOLINT
-
-  // This template type conversion operator allows ReturnRef(x) to be
-  // used in ANY function that returns a reference to x's type.
-  template <typename F>
-  operator Action<F>() const {
-    typedef typename Function<F>::Result Result;
-    // Asserts that the function return type is a reference.  This
-    // catches the user error of using ReturnRef(x) when Return(x)
-    // should be used, and generates some helpful error message.
-    static_assert(std::is_reference<Result>::value,
-                  "use Return instead of ReturnRef to return a value");
-    return Action<F>(new Impl<F>(ref_));
-  }
-
- private:
-  // Implements the ReturnRef(x) action for a particular function type F.
-  template <typename F>
-  class Impl : public ActionInterface<F> {
-   public:
-    typedef typename Function<F>::Result Result;
-    typedef typename Function<F>::ArgumentTuple ArgumentTuple;
-
-    explicit Impl(T& ref) : ref_(ref) {}  // NOLINT
-
-    Result Perform(const ArgumentTuple&) override { return ref_; }
-
-   private:
-    T& ref_;
-  };
-
-  T& ref_;
-};
-
-// Implements the polymorphic ReturnRefOfCopy(x) action, which can be
-// used in any function that returns a reference to the type of x,
-// regardless of the argument types.
-template <typename T>
-class ReturnRefOfCopyAction {
- public:
-  // Constructs a ReturnRefOfCopyAction object from the reference to
-  // be returned.
-  explicit ReturnRefOfCopyAction(const T& value) : value_(value) {}  // NOLINT
-
-  // This template type conversion operator allows ReturnRefOfCopy(x) to be
-  // used in ANY function that returns a reference to x's type.
-  template <typename F>
-  operator Action<F>() const {
-    typedef typename Function<F>::Result Result;
-    // Asserts that the function return type is a reference.  This
-    // catches the user error of using ReturnRefOfCopy(x) when Return(x)
-    // should be used, and generates some helpful error message.
-    static_assert(std::is_reference<Result>::value,
-                  "use Return instead of ReturnRefOfCopy to return a value");
-    return Action<F>(new Impl<F>(value_));
-  }
-
- private:
-  // Implements the ReturnRefOfCopy(x) action for a particular function type F.
-  template <typename F>
-  class Impl : public ActionInterface<F> {
-   public:
-    typedef typename Function<F>::Result Result;
-    typedef typename Function<F>::ArgumentTuple ArgumentTuple;
-
-    explicit Impl(const T& value) : value_(value) {}  // NOLINT
-
-    Result Perform(const ArgumentTuple&) override { return value_; }
-
-   private:
-    T value_;
-  };
-
-  const T value_;
-};
-
-// Implements the polymorphic ReturnRoundRobin(v) action, which can be
-// used in any function that returns the element_type of v.
-template <typename T>
-class ReturnRoundRobinAction {
- public:
-  explicit ReturnRoundRobinAction(std::vector<T> values) {
-    GTEST_CHECK_(!values.empty())
-        << "ReturnRoundRobin requires at least one element.";
-    state_->values = std::move(values);
-  }
-
-  template <typename... Args>
-  T operator()(Args&&...) const {
-    return state_->Next();
-  }
-
- private:
-  struct State {
-    T Next() {
-      T ret_val = values[i++];
-      if (i == values.size()) i = 0;
-      return ret_val;
-    }
-
-    std::vector<T> values;
-    size_t i = 0;
-  };
-  std::shared_ptr<State> state_ = std::make_shared<State>();
-};
-
-// Implements the polymorphic DoDefault() action.
-class DoDefaultAction {
- public:
-  // This template type conversion operator allows DoDefault() to be
-  // used in any function.
-  template <typename F>
-  operator Action<F>() const {
-    return Action<F>();
-  }  // NOLINT
-};
-
-// Implements the Assign action to set a given pointer referent to a
-// particular value.
-template <typename T1, typename T2>
-class AssignAction {
- public:
-  AssignAction(T1* ptr, T2 value) : ptr_(ptr), value_(value) {}
-
-  template <typename Result, typename ArgumentTuple>
-  void Perform(const ArgumentTuple& /* args */) const {
-    *ptr_ = value_;
-  }
-
- private:
-  T1* const ptr_;
-  const T2 value_;
-};
-
-#if !GTEST_OS_WINDOWS_MOBILE
-
-// Implements the SetErrnoAndReturn action to simulate return from
-// various system calls and libc functions.
-template <typename T>
-class SetErrnoAndReturnAction {
- public:
-  SetErrnoAndReturnAction(int errno_value, T result)
-      : errno_(errno_value), result_(result) {}
-  template <typename Result, typename ArgumentTuple>
-  Result Perform(const ArgumentTuple& /* args */) const {
-    errno = errno_;
-    return result_;
-  }
-
- private:
-  const int errno_;
-  const T result_;
-};
-
-#endif  // !GTEST_OS_WINDOWS_MOBILE
-
-// Implements the SetArgumentPointee<N>(x) action for any function
-// whose N-th argument (0-based) is a pointer to x's type.
-template <size_t N, typename A, typename = void>
-struct SetArgumentPointeeAction {
-  A value;
-
-  template <typename... Args>
-  void operator()(const Args&... args) const {
-    *::std::get<N>(std::tie(args...)) = value;
-  }
-};
-
-// Implements the Invoke(object_ptr, &Class::Method) action.
-template <class Class, typename MethodPtr>
-struct InvokeMethodAction {
-  Class* const obj_ptr;
-  const MethodPtr method_ptr;
-
-  template <typename... Args>
-  auto operator()(Args&&... args) const
-      -> decltype((obj_ptr->*method_ptr)(std::forward<Args>(args)...)) {
-    return (obj_ptr->*method_ptr)(std::forward<Args>(args)...);
-  }
-};
-
-// Implements the InvokeWithoutArgs(f) action.  The template argument
-// FunctionImpl is the implementation type of f, which can be either a
-// function pointer or a functor.  InvokeWithoutArgs(f) can be used as an
-// Action<F> as long as f's type is compatible with F.
-template <typename FunctionImpl>
-struct InvokeWithoutArgsAction {
-  FunctionImpl function_impl;
-
-  // Allows InvokeWithoutArgs(f) to be used as any action whose type is
-  // compatible with f.
-  template <typename... Args>
-  auto operator()(const Args&...) -> decltype(function_impl()) {
-    return function_impl();
-  }
-};
-
-// Implements the InvokeWithoutArgs(object_ptr, &Class::Method) action.
-template <class Class, typename MethodPtr>
-struct InvokeMethodWithoutArgsAction {
-  Class* const obj_ptr;
-  const MethodPtr method_ptr;
-
-  using ReturnType =
-      decltype((std::declval<Class*>()->*std::declval<MethodPtr>())());
-
-  template <typename... Args>
-  ReturnType operator()(const Args&...) const {
-    return (obj_ptr->*method_ptr)();
-  }
-};
-
-// Implements the IgnoreResult(action) action.
-template <typename A>
-class IgnoreResultAction {
- public:
-  explicit IgnoreResultAction(const A& action) : action_(action) {}
-
-  template <typename F>
-  operator Action<F>() const {
-    // Assert statement belongs here because this is the best place to verify
-    // conditions on F. It produces the clearest error messages
-    // in most compilers.
-    // Impl really belongs in this scope as a local class but can't
-    // because MSVC produces duplicate symbols in different translation units
-    // in this case. Until MS fixes that bug we put Impl into the class scope
-    // and put the typedef both here (for use in assert statement) and
-    // in the Impl class. But both definitions must be the same.
-    typedef typename internal::Function<F>::Result Result;
-
-    // Asserts at compile time that F returns void.
-    static_assert(std::is_void<Result>::value, "Result type should be void.");
-
-    return Action<F>(new Impl<F>(action_));
-  }
-
- private:
-  template <typename F>
-  class Impl : public ActionInterface<F> {
-   public:
-    typedef typename internal::Function<F>::Result Result;
-    typedef typename internal::Function<F>::ArgumentTuple ArgumentTuple;
-
-    explicit Impl(const A& action) : action_(action) {}
-
-    void Perform(const ArgumentTuple& args) override {
-      // Performs the action and ignores its result.
-      action_.Perform(args);
-    }
-
-   private:
-    // Type OriginalFunction is the same as F except that its return
-    // type is IgnoredValue.
-    typedef
-        typename internal::Function<F>::MakeResultIgnoredValue OriginalFunction;
-
-    const Action<OriginalFunction> action_;
-  };
-
-  const A action_;
-};
-
-template <typename InnerAction, size_t... I>
-struct WithArgsAction {
-  InnerAction inner_action;
-
-  // The signature of the function as seen by the inner action, given an out
-  // action with the given result and argument types.
-  template <typename R, typename... Args>
-  using InnerSignature =
-      R(typename std::tuple_element<I, std::tuple<Args...>>::type...);
-
-  // Rather than a call operator, we must define conversion operators to
-  // particular action types. This is necessary for embedded actions like
-  // DoDefault(), which rely on an action conversion operators rather than
-  // providing a call operator because even with a particular set of arguments
-  // they don't have a fixed return type.
-
-  template <typename R, typename... Args,
-            typename std::enable_if<
-                std::is_convertible<
-                    InnerAction,
-                    // Unfortunately we can't use the InnerSignature alias here;
-                    // MSVC complains about the I parameter pack not being
-                    // expanded (error C3520) despite it being expanded in the
-                    // type alias.
-                    // TupleElement is also an MSVC workaround.
-                    // See its definition for details.
-                    OnceAction<R(internal::TupleElement<
-                                 I, std::tuple<Args...>>...)>>::value,
-                int>::type = 0>
-  operator OnceAction<R(Args...)>() && {  // NOLINT
-    struct OA {
-      OnceAction<InnerSignature<R, Args...>> inner_action;
-
-      R operator()(Args&&... args) && {
-        return std::move(inner_action)
-            .Call(std::get<I>(
-                std::forward_as_tuple(std::forward<Args>(args)...))...);
-      }
-    };
-
-    return OA{std::move(inner_action)};
-  }
-
-  template <typename R, typename... Args,
-            typename std::enable_if<
-                std::is_convertible<
-                    const InnerAction&,
-                    // Unfortunately we can't use the InnerSignature alias here;
-                    // MSVC complains about the I parameter pack not being
-                    // expanded (error C3520) despite it being expanded in the
-                    // type alias.
-                    // TupleElement is also an MSVC workaround.
-                    // See its definition for details.
-                    Action<R(internal::TupleElement<
-                             I, std::tuple<Args...>>...)>>::value,
-                int>::type = 0>
-  operator Action<R(Args...)>() const {  // NOLINT
-    Action<InnerSignature<R, Args...>> converted(inner_action);
-
-    return [converted](Args&&... args) -> R {
-      return converted.Perform(std::forward_as_tuple(
-          std::get<I>(std::forward_as_tuple(std::forward<Args>(args)...))...));
-    };
-  }
-};
-
-template <typename... Actions>
-class DoAllAction;
-
-// Base case: only a single action.
-template <typename FinalAction>
-class DoAllAction<FinalAction> {
- public:
-  struct UserConstructorTag {};
-
-  template <typename T>
-  explicit DoAllAction(UserConstructorTag, T&& action)
-      : final_action_(std::forward<T>(action)) {}
-
-  // Rather than a call operator, we must define conversion operators to
-  // particular action types. This is necessary for embedded actions like
-  // DoDefault(), which rely on an action conversion operators rather than
-  // providing a call operator because even with a particular set of arguments
-  // they don't have a fixed return type.
-
-  template <typename R, typename... Args,
-            typename std::enable_if<
-                std::is_convertible<FinalAction, OnceAction<R(Args...)>>::value,
-                int>::type = 0>
-  operator OnceAction<R(Args...)>() && {  // NOLINT
-    return std::move(final_action_);
-  }
-
-  template <
-      typename R, typename... Args,
-      typename std::enable_if<
-          std::is_convertible<const FinalAction&, Action<R(Args...)>>::value,
-          int>::type = 0>
-  operator Action<R(Args...)>() const {  // NOLINT
-    return final_action_;
-  }
-
- private:
-  FinalAction final_action_;
-};
-
-// Recursive case: support N actions by calling the initial action and then
-// calling through to the base class containing N-1 actions.
-template <typename InitialAction, typename... OtherActions>
-class DoAllAction<InitialAction, OtherActions...>
-    : private DoAllAction<OtherActions...> {
- private:
-  using Base = DoAllAction<OtherActions...>;
-
-  // The type of reference that should be provided to an initial action for a
-  // mocked function parameter of type T.
-  //
-  // There are two quirks here:
-  //
-  //  *  Unlike most forwarding functions, we pass scalars through by value.
-  //     This isn't strictly necessary because an lvalue reference would work
-  //     fine too and be consistent with other non-reference types, but it's
-  //     perhaps less surprising.
-  //
-  //     For example if the mocked function has signature void(int), then it
-  //     might seem surprising for the user's initial action to need to be
-  //     convertible to Action<void(const int&)>. This is perhaps less
-  //     surprising for a non-scalar type where there may be a performance
-  //     impact, or it might even be impossible, to pass by value.
-  //
-  //  *  More surprisingly, `const T&` is often not a const reference type.
-  //     By the reference collapsing rules in C++17 [dcl.ref]/6, if T refers to
-  //     U& or U&& for some non-scalar type U, then InitialActionArgType<T> is
-  //     U&. In other words, we may hand over a non-const reference.
-  //
-  //     So for example, given some non-scalar type Obj we have the following
-  //     mappings:
-  //
-  //            T               InitialActionArgType<T>
-  //         -------            -----------------------
-  //         Obj                const Obj&
-  //         Obj&               Obj&
-  //         Obj&&              Obj&
-  //         const Obj          const Obj&
-  //         const Obj&         const Obj&
-  //         const Obj&&        const Obj&
-  //
-  //     In other words, the initial actions get a mutable view of an non-scalar
-  //     argument if and only if the mock function itself accepts a non-const
-  //     reference type. They are never given an rvalue reference to an
-  //     non-scalar type.
-  //
-  //     This situation makes sense if you imagine use with a matcher that is
-  //     designed to write through a reference. For example, if the caller wants
-  //     to fill in a reference argument and then return a canned value:
-  //
-  //         EXPECT_CALL(mock, Call)
-  //             .WillOnce(DoAll(SetArgReferee<0>(17), Return(19)));
-  //
-  template <typename T>
-  using InitialActionArgType =
-      typename std::conditional<std::is_scalar<T>::value, T, const T&>::type;
-
- public:
-  struct UserConstructorTag {};
-
-  template <typename T, typename... U>
-  explicit DoAllAction(UserConstructorTag, T&& initial_action,
-                       U&&... other_actions)
-      : Base({}, std::forward<U>(other_actions)...),
-        initial_action_(std::forward<T>(initial_action)) {}
-
-  template <typename R, typename... Args,
-            typename std::enable_if<
-                conjunction<
-                    // Both the initial action and the rest must support
-                    // conversion to OnceAction.
-                    std::is_convertible<
-                        InitialAction,
-                        OnceAction<void(InitialActionArgType<Args>...)>>,
-                    std::is_convertible<Base, OnceAction<R(Args...)>>>::value,
-                int>::type = 0>
-  operator OnceAction<R(Args...)>() && {  // NOLINT
-    // Return an action that first calls the initial action with arguments
-    // filtered through InitialActionArgType, then forwards arguments directly
-    // to the base class to deal with the remaining actions.
-    struct OA {
-      OnceAction<void(InitialActionArgType<Args>...)> initial_action;
-      OnceAction<R(Args...)> remaining_actions;
-
-      R operator()(Args... args) && {
-        std::move(initial_action)
-            .Call(static_cast<InitialActionArgType<Args>>(args)...);
-
-        return std::move(remaining_actions).Call(std::forward<Args>(args)...);
-      }
-    };
-
-    return OA{
-        std::move(initial_action_),
-        std::move(static_cast<Base&>(*this)),
-    };
-  }
-
-  template <
-      typename R, typename... Args,
-      typename std::enable_if<
-          conjunction<
-              // Both the initial action and the rest must support conversion to
-              // Action.
-              std::is_convertible<const InitialAction&,
-                                  Action<void(InitialActionArgType<Args>...)>>,
-              std::is_convertible<const Base&, Action<R(Args...)>>>::value,
-          int>::type = 0>
-  operator Action<R(Args...)>() const {  // NOLINT
-    // Return an action that first calls the initial action with arguments
-    // filtered through InitialActionArgType, then forwards arguments directly
-    // to the base class to deal with the remaining actions.
-    struct OA {
-      Action<void(InitialActionArgType<Args>...)> initial_action;
-      Action<R(Args...)> remaining_actions;
-
-      R operator()(Args... args) const {
-        initial_action.Perform(std::forward_as_tuple(
-            static_cast<InitialActionArgType<Args>>(args)...));
-
-        return remaining_actions.Perform(
-            std::forward_as_tuple(std::forward<Args>(args)...));
-      }
-    };
-
-    return OA{
-        initial_action_,
-        static_cast<const Base&>(*this),
-    };
-  }
-
- private:
-  InitialAction initial_action_;
-};
-
-template <typename T, typename... Params>
-struct ReturnNewAction {
-  T* operator()() const {
-    return internal::Apply(
-        [](const Params&... unpacked_params) {
-          return new T(unpacked_params...);
-        },
-        params);
-  }
-  std::tuple<Params...> params;
-};
-
-template <size_t k>
-struct ReturnArgAction {
-  template <typename... Args,
-            typename = typename std::enable_if<(k < sizeof...(Args))>::type>
-  auto operator()(Args&&... args) const -> decltype(std::get<k>(
-      std::forward_as_tuple(std::forward<Args>(args)...))) {
-    return std::get<k>(std::forward_as_tuple(std::forward<Args>(args)...));
-  }
-};
-
-template <size_t k, typename Ptr>
-struct SaveArgAction {
-  Ptr pointer;
-
-  template <typename... Args>
-  void operator()(const Args&... args) const {
-    *pointer = std::get<k>(std::tie(args...));
-  }
-};
-
-template <size_t k, typename Ptr>
-struct SaveArgPointeeAction {
-  Ptr pointer;
-
-  template <typename... Args>
-  void operator()(const Args&... args) const {
-    *pointer = *std::get<k>(std::tie(args...));
-  }
-};
-
-template <size_t k, typename T>
-struct SetArgRefereeAction {
-  T value;
-
-  template <typename... Args>
-  void operator()(Args&&... args) const {
-    using argk_type =
-        typename ::std::tuple_element<k, std::tuple<Args...>>::type;
-    static_assert(std::is_lvalue_reference<argk_type>::value,
-                  "Argument must be a reference type.");
-    std::get<k>(std::tie(args...)) = value;
-  }
-};
-
-template <size_t k, typename I1, typename I2>
-struct SetArrayArgumentAction {
-  I1 first;
-  I2 last;
-
-  template <typename... Args>
-  void operator()(const Args&... args) const {
-    auto value = std::get<k>(std::tie(args...));
-    for (auto it = first; it != last; ++it, (void)++value) {
-      *value = *it;
-    }
-  }
-};
-
-template <size_t k>
-struct DeleteArgAction {
-  template <typename... Args>
-  void operator()(const Args&... args) const {
-    delete std::get<k>(std::tie(args...));
-  }
-};
-
-template <typename Ptr>
-struct ReturnPointeeAction {
-  Ptr pointer;
-  template <typename... Args>
-  auto operator()(const Args&...) const -> decltype(*pointer) {
-    return *pointer;
-  }
-};
-
-#if GTEST_HAS_EXCEPTIONS
-template <typename T>
-struct ThrowAction {
-  T exception;
-  // We use a conversion operator to adapt to any return type.
-  template <typename R, typename... Args>
-  operator Action<R(Args...)>() const {  // NOLINT
-    T copy = exception;
-    return [copy](Args...) -> R { throw copy; };
-  }
-};
-#endif  // GTEST_HAS_EXCEPTIONS
-
-}  // namespace internal
-
-// An Unused object can be implicitly constructed from ANY value.
-// This is handy when defining actions that ignore some or all of the
-// mock function arguments.  For example, given
-//
-//   MOCK_METHOD3(Foo, double(const string& label, double x, double y));
-//   MOCK_METHOD3(Bar, double(int index, double x, double y));
-//
-// instead of
-//
-//   double DistanceToOriginWithLabel(const string& label, double x, double y) {
-//     return sqrt(x*x + y*y);
-//   }
-//   double DistanceToOriginWithIndex(int index, double x, double y) {
-//     return sqrt(x*x + y*y);
-//   }
-//   ...
-//   EXPECT_CALL(mock, Foo("abc", _, _))
-//       .WillOnce(Invoke(DistanceToOriginWithLabel));
-//   EXPECT_CALL(mock, Bar(5, _, _))
-//       .WillOnce(Invoke(DistanceToOriginWithIndex));
-//
-// you could write
-//
-//   // We can declare any uninteresting argument as Unused.
-//   double DistanceToOrigin(Unused, double x, double y) {
-//     return sqrt(x*x + y*y);
-//   }
-//   ...
-//   EXPECT_CALL(mock, Foo("abc", _, _)).WillOnce(Invoke(DistanceToOrigin));
-//   EXPECT_CALL(mock, Bar(5, _, _)).WillOnce(Invoke(DistanceToOrigin));
-typedef internal::IgnoredValue Unused;
-
-// Creates an action that does actions a1, a2, ..., sequentially in
-// each invocation. All but the last action will have a readonly view of the
-// arguments.
-template <typename... Action>
-internal::DoAllAction<typename std::decay<Action>::type...> DoAll(
-    Action&&... action) {
-  return internal::DoAllAction<typename std::decay<Action>::type...>(
-      {}, std::forward<Action>(action)...);
-}
-
-// WithArg<k>(an_action) creates an action that passes the k-th
-// (0-based) argument of the mock function to an_action and performs
-// it.  It adapts an action accepting one argument to one that accepts
-// multiple arguments.  For convenience, we also provide
-// WithArgs<k>(an_action) (defined below) as a synonym.
-template <size_t k, typename InnerAction>
-internal::WithArgsAction<typename std::decay<InnerAction>::type, k> WithArg(
-    InnerAction&& action) {
-  return {std::forward<InnerAction>(action)};
-}
-
-// WithArgs<N1, N2, ..., Nk>(an_action) creates an action that passes
-// the selected arguments of the mock function to an_action and
-// performs it.  It serves as an adaptor between actions with
-// different argument lists.
-template <size_t k, size_t... ks, typename InnerAction>
-internal::WithArgsAction<typename std::decay<InnerAction>::type, k, ks...>
-WithArgs(InnerAction&& action) {
-  return {std::forward<InnerAction>(action)};
-}
-
-// WithoutArgs(inner_action) can be used in a mock function with a
-// non-empty argument list to perform inner_action, which takes no
-// argument.  In other words, it adapts an action accepting no
-// argument to one that accepts (and ignores) arguments.
-template <typename InnerAction>
-internal::WithArgsAction<typename std::decay<InnerAction>::type> WithoutArgs(
-    InnerAction&& action) {
-  return {std::forward<InnerAction>(action)};
-}
-
-// Creates an action that returns a value.
-//
-// The returned type can be used with a mock function returning a non-void,
-// non-reference type U as follows:
-//
-//  *  If R is convertible to U and U is move-constructible, then the action can
-//     be used with WillOnce.
-//
-//  *  If const R& is convertible to U and U is copy-constructible, then the
-//     action can be used with both WillOnce and WillRepeatedly.
-//
-// The mock expectation contains the R value from which the U return value is
-// constructed (a move/copy of the argument to Return). This means that the R
-// value will survive at least until the mock object's expectations are cleared
-// or the mock object is destroyed, meaning that U can safely be a
-// reference-like type such as std::string_view:
-//
-//     // The mock function returns a view of a copy of the string fed to
-//     // Return. The view is valid even after the action is performed.
-//     MockFunction<std::string_view()> mock;
-//     EXPECT_CALL(mock, Call).WillOnce(Return(std::string("taco")));
-//     const std::string_view result = mock.AsStdFunction()();
-//     EXPECT_EQ("taco", result);
-//
-template <typename R>
-internal::ReturnAction<R> Return(R value) {
-  return internal::ReturnAction<R>(std::move(value));
-}
-
-// Creates an action that returns NULL.
-inline PolymorphicAction<internal::ReturnNullAction> ReturnNull() {
-  return MakePolymorphicAction(internal::ReturnNullAction());
-}
-
-// Creates an action that returns from a void function.
-inline PolymorphicAction<internal::ReturnVoidAction> Return() {
-  return MakePolymorphicAction(internal::ReturnVoidAction());
-}
-
-// Creates an action that returns the reference to a variable.
-template <typename R>
-inline internal::ReturnRefAction<R> ReturnRef(R& x) {  // NOLINT
-  return internal::ReturnRefAction<R>(x);
-}
-
-// Prevent using ReturnRef on reference to temporary.
-template <typename R, R* = nullptr>
-internal::ReturnRefAction<R> ReturnRef(R&&) = delete;
-
-// Creates an action that returns the reference to a copy of the
-// argument.  The copy is created when the action is constructed and
-// lives as long as the action.
-template <typename R>
-inline internal::ReturnRefOfCopyAction<R> ReturnRefOfCopy(const R& x) {
-  return internal::ReturnRefOfCopyAction<R>(x);
-}
-
-// DEPRECATED: use Return(x) directly with WillOnce.
-//
-// Modifies the parent action (a Return() action) to perform a move of the
-// argument instead of a copy.
-// Return(ByMove()) actions can only be executed once and will assert this
-// invariant.
-template <typename R>
-internal::ByMoveWrapper<R> ByMove(R x) {
-  return internal::ByMoveWrapper<R>(std::move(x));
-}
-
-// Creates an action that returns an element of `vals`. Calling this action will
-// repeatedly return the next value from `vals` until it reaches the end and
-// will restart from the beginning.
-template <typename T>
-internal::ReturnRoundRobinAction<T> ReturnRoundRobin(std::vector<T> vals) {
-  return internal::ReturnRoundRobinAction<T>(std::move(vals));
-}
-
-// Creates an action that returns an element of `vals`. Calling this action will
-// repeatedly return the next value from `vals` until it reaches the end and
-// will restart from the beginning.
-template <typename T>
-internal::ReturnRoundRobinAction<T> ReturnRoundRobin(
-    std::initializer_list<T> vals) {
-  return internal::ReturnRoundRobinAction<T>(std::vector<T>(vals));
-}
-
-// Creates an action that does the default action for the give mock function.
-inline internal::DoDefaultAction DoDefault() {
-  return internal::DoDefaultAction();
-}
-
-// Creates an action that sets the variable pointed by the N-th
-// (0-based) function argument to 'value'.
-template <size_t N, typename T>
-internal::SetArgumentPointeeAction<N, T> SetArgPointee(T value) {
-  return {std::move(value)};
-}
-
-// The following version is DEPRECATED.
-template <size_t N, typename T>
-internal::SetArgumentPointeeAction<N, T> SetArgumentPointee(T value) {
-  return {std::move(value)};
-}
-
-// Creates an action that sets a pointer referent to a given value.
-template <typename T1, typename T2>
-PolymorphicAction<internal::AssignAction<T1, T2>> Assign(T1* ptr, T2 val) {
-  return MakePolymorphicAction(internal::AssignAction<T1, T2>(ptr, val));
-}
-
-#if !GTEST_OS_WINDOWS_MOBILE
-
-// Creates an action that sets errno and returns the appropriate error.
-template <typename T>
-PolymorphicAction<internal::SetErrnoAndReturnAction<T>> SetErrnoAndReturn(
-    int errval, T result) {
-  return MakePolymorphicAction(
-      internal::SetErrnoAndReturnAction<T>(errval, result));
-}
-
-#endif  // !GTEST_OS_WINDOWS_MOBILE
-
-// Various overloads for Invoke().
-
-// Legacy function.
-// Actions can now be implicitly constructed from callables. No need to create
-// wrapper objects.
-// This function exists for backwards compatibility.
-template <typename FunctionImpl>
-typename std::decay<FunctionImpl>::type Invoke(FunctionImpl&& function_impl) {
-  return std::forward<FunctionImpl>(function_impl);
-}
-
-// Creates an action that invokes the given method on the given object
-// with the mock function's arguments.
-template <class Class, typename MethodPtr>
-internal::InvokeMethodAction<Class, MethodPtr> Invoke(Class* obj_ptr,
-                                                      MethodPtr method_ptr) {
-  return {obj_ptr, method_ptr};
-}
-
-// Creates an action that invokes 'function_impl' with no argument.
-template <typename FunctionImpl>
-internal::InvokeWithoutArgsAction<typename std::decay<FunctionImpl>::type>
-InvokeWithoutArgs(FunctionImpl function_impl) {
-  return {std::move(function_impl)};
-}
-
-// Creates an action that invokes the given method on the given object
-// with no argument.
-template <class Class, typename MethodPtr>
-internal::InvokeMethodWithoutArgsAction<Class, MethodPtr> InvokeWithoutArgs(
-    Class* obj_ptr, MethodPtr method_ptr) {
-  return {obj_ptr, method_ptr};
-}
-
-// Creates an action that performs an_action and throws away its
-// result.  In other words, it changes the return type of an_action to
-// void.  an_action MUST NOT return void, or the code won't compile.
-template <typename A>
-inline internal::IgnoreResultAction<A> IgnoreResult(const A& an_action) {
-  return internal::IgnoreResultAction<A>(an_action);
-}
-
-// Creates a reference wrapper for the given L-value.  If necessary,
-// you can explicitly specify the type of the reference.  For example,
-// suppose 'derived' is an object of type Derived, ByRef(derived)
-// would wrap a Derived&.  If you want to wrap a const Base& instead,
-// where Base is a base class of Derived, just write:
-//
-//   ByRef<const Base>(derived)
-//
-// N.B. ByRef is redundant with std::ref, std::cref and std::reference_wrapper.
-// However, it may still be used for consistency with ByMove().
-template <typename T>
-inline ::std::reference_wrapper<T> ByRef(T& l_value) {  // NOLINT
-  return ::std::reference_wrapper<T>(l_value);
-}
-
-// The ReturnNew<T>(a1, a2, ..., a_k) action returns a pointer to a new
-// instance of type T, constructed on the heap with constructor arguments
-// a1, a2, ..., and a_k. The caller assumes ownership of the returned value.
-template <typename T, typename... Params>
-internal::ReturnNewAction<T, typename std::decay<Params>::type...> ReturnNew(
-    Params&&... params) {
-  return {std::forward_as_tuple(std::forward<Params>(params)...)};
-}
-
-// Action ReturnArg<k>() returns the k-th argument of the mock function.
-template <size_t k>
-internal::ReturnArgAction<k> ReturnArg() {
-  return {};
-}
-
-// Action SaveArg<k>(pointer) saves the k-th (0-based) argument of the
-// mock function to *pointer.
-template <size_t k, typename Ptr>
-internal::SaveArgAction<k, Ptr> SaveArg(Ptr pointer) {
-  return {pointer};
-}
-
-// Action SaveArgPointee<k>(pointer) saves the value pointed to
-// by the k-th (0-based) argument of the mock function to *pointer.
-template <size_t k, typename Ptr>
-internal::SaveArgPointeeAction<k, Ptr> SaveArgPointee(Ptr pointer) {
-  return {pointer};
-}
-
-// Action SetArgReferee<k>(value) assigns 'value' to the variable
-// referenced by the k-th (0-based) argument of the mock function.
-template <size_t k, typename T>
-internal::SetArgRefereeAction<k, typename std::decay<T>::type> SetArgReferee(
-    T&& value) {
-  return {std::forward<T>(value)};
-}
-
-// Action SetArrayArgument<k>(first, last) copies the elements in
-// source range [first, last) to the array pointed to by the k-th
-// (0-based) argument, which can be either a pointer or an
-// iterator. The action does not take ownership of the elements in the
-// source range.
-template <size_t k, typename I1, typename I2>
-internal::SetArrayArgumentAction<k, I1, I2> SetArrayArgument(I1 first,
-                                                             I2 last) {
-  return {first, last};
-}
-
-// Action DeleteArg<k>() deletes the k-th (0-based) argument of the mock
-// function.
-template <size_t k>
-internal::DeleteArgAction<k> DeleteArg() {
-  return {};
-}
-
-// This action returns the value pointed to by 'pointer'.
-template <typename Ptr>
-internal::ReturnPointeeAction<Ptr> ReturnPointee(Ptr pointer) {
-  return {pointer};
-}
-
-// Action Throw(exception) can be used in a mock function of any type
-// to throw the given exception.  Any copyable value can be thrown.
-#if GTEST_HAS_EXCEPTIONS
-template <typename T>
-internal::ThrowAction<typename std::decay<T>::type> Throw(T&& exception) {
-  return {std::forward<T>(exception)};
-}
-#endif  // GTEST_HAS_EXCEPTIONS
-
-namespace internal {
-
-// A macro from the ACTION* family (defined later in gmock-generated-actions.h)
-// defines an action that can be used in a mock function.  Typically,
-// these actions only care about a subset of the arguments of the mock
-// function.  For example, if such an action only uses the second
-// argument, it can be used in any mock function that takes >= 2
-// arguments where the type of the second argument is compatible.
-//
-// Therefore, the action implementation must be prepared to take more
-// arguments than it needs.  The ExcessiveArg type is used to
-// represent those excessive arguments.  In order to keep the compiler
-// error messages tractable, we define it in the testing namespace
-// instead of testing::internal.  However, this is an INTERNAL TYPE
-// and subject to change without notice, so a user MUST NOT USE THIS
-// TYPE DIRECTLY.
-struct ExcessiveArg {};
-
-// Builds an implementation of an Action<> for some particular signature, using
-// a class defined by an ACTION* macro.
-template <typename F, typename Impl>
-struct ActionImpl;
-
-template <typename Impl>
-struct ImplBase {
-  struct Holder {
-    // Allows each copy of the Action<> to get to the Impl.
-    explicit operator const Impl&() const { return *ptr; }
-    std::shared_ptr<Impl> ptr;
-  };
-  using type = typename std::conditional<std::is_constructible<Impl>::value,
-                                         Impl, Holder>::type;
-};
-
-template <typename R, typename... Args, typename Impl>
-struct ActionImpl<R(Args...), Impl> : ImplBase<Impl>::type {
-  using Base = typename ImplBase<Impl>::type;
-  using function_type = R(Args...);
-  using args_type = std::tuple<Args...>;
-
-  ActionImpl() = default;  // Only defined if appropriate for Base.
-  explicit ActionImpl(std::shared_ptr<Impl> impl) : Base{std::move(impl)} {}
-
-  R operator()(Args&&... arg) const {
-    static constexpr size_t kMaxArgs =
-        sizeof...(Args) <= 10 ? sizeof...(Args) : 10;
-    return Apply(MakeIndexSequence<kMaxArgs>{},
-                 MakeIndexSequence<10 - kMaxArgs>{},
-                 args_type{std::forward<Args>(arg)...});
-  }
-
-  template <std::size_t... arg_id, std::size_t... excess_id>
-  R Apply(IndexSequence<arg_id...>, IndexSequence<excess_id...>,
-          const args_type& args) const {
-    // Impl need not be specific to the signature of action being implemented;
-    // only the implementing function body needs to have all of the specific
-    // types instantiated.  Up to 10 of the args that are provided by the
-    // args_type get passed, followed by a dummy of unspecified type for the
-    // remainder up to 10 explicit args.
-    static constexpr ExcessiveArg kExcessArg{};
-    return static_cast<const Impl&>(*this)
-        .template gmock_PerformImpl<
-            /*function_type=*/function_type, /*return_type=*/R,
-            /*args_type=*/args_type,
-            /*argN_type=*/
-            typename std::tuple_element<arg_id, args_type>::type...>(
-            /*args=*/args, std::get<arg_id>(args)...,
-            ((void)excess_id, kExcessArg)...);
-  }
-};
-
-// Stores a default-constructed Impl as part of the Action<>'s
-// std::function<>. The Impl should be trivial to copy.
-template <typename F, typename Impl>
-::testing::Action<F> MakeAction() {
-  return ::testing::Action<F>(ActionImpl<F, Impl>());
-}
-
-// Stores just the one given instance of Impl.
-template <typename F, typename Impl>
-::testing::Action<F> MakeAction(std::shared_ptr<Impl> impl) {
-  return ::testing::Action<F>(ActionImpl<F, Impl>(std::move(impl)));
-}
-
-#define GMOCK_INTERNAL_ARG_UNUSED(i, data, el) \
-  , const arg##i##_type& arg##i GTEST_ATTRIBUTE_UNUSED_
-#define GMOCK_ACTION_ARG_TYPES_AND_NAMES_UNUSED_                 \
-  const args_type& args GTEST_ATTRIBUTE_UNUSED_ GMOCK_PP_REPEAT( \
-      GMOCK_INTERNAL_ARG_UNUSED, , 10)
-
-#define GMOCK_INTERNAL_ARG(i, data, el) , const arg##i##_type& arg##i
-#define GMOCK_ACTION_ARG_TYPES_AND_NAMES_ \
-  const args_type& args GMOCK_PP_REPEAT(GMOCK_INTERNAL_ARG, , 10)
-
-#define GMOCK_INTERNAL_TEMPLATE_ARG(i, data, el) , typename arg##i##_type
-#define GMOCK_ACTION_TEMPLATE_ARGS_NAMES_ \
-  GMOCK_PP_TAIL(GMOCK_PP_REPEAT(GMOCK_INTERNAL_TEMPLATE_ARG, , 10))
-
-#define GMOCK_INTERNAL_TYPENAME_PARAM(i, data, param) , typename param##_type
-#define GMOCK_ACTION_TYPENAME_PARAMS_(params) \
-  GMOCK_PP_TAIL(GMOCK_PP_FOR_EACH(GMOCK_INTERNAL_TYPENAME_PARAM, , params))
-
-#define GMOCK_INTERNAL_TYPE_PARAM(i, data, param) , param##_type
-#define GMOCK_ACTION_TYPE_PARAMS_(params) \
-  GMOCK_PP_TAIL(GMOCK_PP_FOR_EACH(GMOCK_INTERNAL_TYPE_PARAM, , params))
-
-#define GMOCK_INTERNAL_TYPE_GVALUE_PARAM(i, data, param) \
-  , param##_type gmock_p##i
-#define GMOCK_ACTION_TYPE_GVALUE_PARAMS_(params) \
-  GMOCK_PP_TAIL(GMOCK_PP_FOR_EACH(GMOCK_INTERNAL_TYPE_GVALUE_PARAM, , params))
-
-#define GMOCK_INTERNAL_GVALUE_PARAM(i, data, param) \
-  , std::forward<param##_type>(gmock_p##i)
-#define GMOCK_ACTION_GVALUE_PARAMS_(params) \
-  GMOCK_PP_TAIL(GMOCK_PP_FOR_EACH(GMOCK_INTERNAL_GVALUE_PARAM, , params))
-
-#define GMOCK_INTERNAL_INIT_PARAM(i, data, param) \
-  , param(::std::forward<param##_type>(gmock_p##i))
-#define GMOCK_ACTION_INIT_PARAMS_(params) \
-  GMOCK_PP_TAIL(GMOCK_PP_FOR_EACH(GMOCK_INTERNAL_INIT_PARAM, , params))
-
-#define GMOCK_INTERNAL_FIELD_PARAM(i, data, param) param##_type param;
-#define GMOCK_ACTION_FIELD_PARAMS_(params) \
-  GMOCK_PP_FOR_EACH(GMOCK_INTERNAL_FIELD_PARAM, , params)
-
-#define GMOCK_INTERNAL_ACTION(name, full_name, params)                         \
-  template <GMOCK_ACTION_TYPENAME_PARAMS_(params)>                             \
-  class full_name {                                                            \
-   public:                                                                     \
-    explicit full_name(GMOCK_ACTION_TYPE_GVALUE_PARAMS_(params))               \
-        : impl_(std::make_shared<gmock_Impl>(                                  \
-              GMOCK_ACTION_GVALUE_PARAMS_(params))) {}                         \
-    full_name(const full_name&) = default;                                     \
-    full_name(full_name&&) noexcept = default;                                 \
-    template <typename F>                                                      \
-    operator ::testing::Action<F>() const {                                    \
-      return ::testing::internal::MakeAction<F>(impl_);                        \
-    }                                                                          \
-                                                                               \
-   private:                                                                    \
-    class gmock_Impl {                                                         \
-     public:                                                                   \
-      explicit gmock_Impl(GMOCK_ACTION_TYPE_GVALUE_PARAMS_(params))            \
-          : GMOCK_ACTION_INIT_PARAMS_(params) {}                               \
-      template <typename function_type, typename return_type,                  \
-                typename args_type, GMOCK_ACTION_TEMPLATE_ARGS_NAMES_>         \
-      return_type gmock_PerformImpl(GMOCK_ACTION_ARG_TYPES_AND_NAMES_) const;  \
-      GMOCK_ACTION_FIELD_PARAMS_(params)                                       \
-    };                                                                         \
-    std::shared_ptr<const gmock_Impl> impl_;                                   \
-  };                                                                           \
-  template <GMOCK_ACTION_TYPENAME_PARAMS_(params)>                             \
-  inline full_name<GMOCK_ACTION_TYPE_PARAMS_(params)> name(                    \
-      GMOCK_ACTION_TYPE_GVALUE_PARAMS_(params)) GTEST_MUST_USE_RESULT_;        \
-  template <GMOCK_ACTION_TYPENAME_PARAMS_(params)>                             \
-  inline full_name<GMOCK_ACTION_TYPE_PARAMS_(params)> name(                    \
-      GMOCK_ACTION_TYPE_GVALUE_PARAMS_(params)) {                              \
-    return full_name<GMOCK_ACTION_TYPE_PARAMS_(params)>(                       \
-        GMOCK_ACTION_GVALUE_PARAMS_(params));                                  \
-  }                                                                            \
-  template <GMOCK_ACTION_TYPENAME_PARAMS_(params)>                             \
-  template <typename function_type, typename return_type, typename args_type,  \
-            GMOCK_ACTION_TEMPLATE_ARGS_NAMES_>                                 \
-  return_type                                                                  \
-  full_name<GMOCK_ACTION_TYPE_PARAMS_(params)>::gmock_Impl::gmock_PerformImpl( \
-      GMOCK_ACTION_ARG_TYPES_AND_NAMES_UNUSED_) const
-
-}  // namespace internal
-
-// Similar to GMOCK_INTERNAL_ACTION, but no bound parameters are stored.
-#define ACTION(name)                                                          \
-  class name##Action {                                                        \
-   public:                                                                    \
-    explicit name##Action() noexcept {}                                       \
-    name##Action(const name##Action&) noexcept {}                             \
-    template <typename F>                                                     \
-    operator ::testing::Action<F>() const {                                   \
-      return ::testing::internal::MakeAction<F, gmock_Impl>();                \
-    }                                                                         \
-                                                                              \
-   private:                                                                   \
-    class gmock_Impl {                                                        \
-     public:                                                                  \
-      template <typename function_type, typename return_type,                 \
-                typename args_type, GMOCK_ACTION_TEMPLATE_ARGS_NAMES_>        \
-      return_type gmock_PerformImpl(GMOCK_ACTION_ARG_TYPES_AND_NAMES_) const; \
-    };                                                                        \
-  };                                                                          \
-  inline name##Action name() GTEST_MUST_USE_RESULT_;                          \
-  inline name##Action name() { return name##Action(); }                       \
-  template <typename function_type, typename return_type, typename args_type, \
-            GMOCK_ACTION_TEMPLATE_ARGS_NAMES_>                                \
-  return_type name##Action::gmock_Impl::gmock_PerformImpl(                    \
-      GMOCK_ACTION_ARG_TYPES_AND_NAMES_UNUSED_) const
-
-#define ACTION_P(name, ...) \
-  GMOCK_INTERNAL_ACTION(name, name##ActionP, (__VA_ARGS__))
-
-#define ACTION_P2(name, ...) \
-  GMOCK_INTERNAL_ACTION(name, name##ActionP2, (__VA_ARGS__))
-
-#define ACTION_P3(name, ...) \
-  GMOCK_INTERNAL_ACTION(name, name##ActionP3, (__VA_ARGS__))
-
-#define ACTION_P4(name, ...) \
-  GMOCK_INTERNAL_ACTION(name, name##ActionP4, (__VA_ARGS__))
-
-#define ACTION_P5(name, ...) \
-  GMOCK_INTERNAL_ACTION(name, name##ActionP5, (__VA_ARGS__))
-
-#define ACTION_P6(name, ...) \
-  GMOCK_INTERNAL_ACTION(name, name##ActionP6, (__VA_ARGS__))
-
-#define ACTION_P7(name, ...) \
-  GMOCK_INTERNAL_ACTION(name, name##ActionP7, (__VA_ARGS__))
-
-#define ACTION_P8(name, ...) \
-  GMOCK_INTERNAL_ACTION(name, name##ActionP8, (__VA_ARGS__))
-
-#define ACTION_P9(name, ...) \
-  GMOCK_INTERNAL_ACTION(name, name##ActionP9, (__VA_ARGS__))
-
-#define ACTION_P10(name, ...) \
-  GMOCK_INTERNAL_ACTION(name, name##ActionP10, (__VA_ARGS__))
-
-}  // namespace testing
-
-#ifdef _MSC_VER
-#pragma warning(pop)
-#endif
-
-#endif  // GOOGLEMOCK_INCLUDE_GMOCK_GMOCK_ACTIONS_H_
diff --git a/3rdparty/googletest-1.13.0/googlemock/include/gmock/gmock-cardinalities.h b/3rdparty/googletest-1.13.0/googlemock/include/gmock/gmock-cardinalities.h
deleted file mode 100644
index b6ab648e50a649257120e62fdc404e8e5ba2c1d9..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googlemock/include/gmock/gmock-cardinalities.h
+++ /dev/null
@@ -1,159 +0,0 @@
-// Copyright 2007, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// Google Mock - a framework for writing C++ mock classes.
-//
-// This file implements some commonly used cardinalities.  More
-// cardinalities can be defined by the user implementing the
-// CardinalityInterface interface if necessary.
-
-// IWYU pragma: private, include "gmock/gmock.h"
-// IWYU pragma: friend gmock/.*
-
-#ifndef GOOGLEMOCK_INCLUDE_GMOCK_GMOCK_CARDINALITIES_H_
-#define GOOGLEMOCK_INCLUDE_GMOCK_GMOCK_CARDINALITIES_H_
-
-#include <limits.h>
-
-#include <memory>
-#include <ostream>  // NOLINT
-
-#include "gmock/internal/gmock-port.h"
-#include "gtest/gtest.h"
-
-GTEST_DISABLE_MSC_WARNINGS_PUSH_(4251 \
-/* class A needs to have dll-interface to be used by clients of class B */)
-
-namespace testing {
-
-// To implement a cardinality Foo, define:
-//   1. a class FooCardinality that implements the
-//      CardinalityInterface interface, and
-//   2. a factory function that creates a Cardinality object from a
-//      const FooCardinality*.
-//
-// The two-level delegation design follows that of Matcher, providing
-// consistency for extension developers.  It also eases ownership
-// management as Cardinality objects can now be copied like plain values.
-
-// The implementation of a cardinality.
-class CardinalityInterface {
- public:
-  virtual ~CardinalityInterface() {}
-
-  // Conservative estimate on the lower/upper bound of the number of
-  // calls allowed.
-  virtual int ConservativeLowerBound() const { return 0; }
-  virtual int ConservativeUpperBound() const { return INT_MAX; }
-
-  // Returns true if and only if call_count calls will satisfy this
-  // cardinality.
-  virtual bool IsSatisfiedByCallCount(int call_count) const = 0;
-
-  // Returns true if and only if call_count calls will saturate this
-  // cardinality.
-  virtual bool IsSaturatedByCallCount(int call_count) const = 0;
-
-  // Describes self to an ostream.
-  virtual void DescribeTo(::std::ostream* os) const = 0;
-};
-
-// A Cardinality is a copyable and IMMUTABLE (except by assignment)
-// object that specifies how many times a mock function is expected to
-// be called.  The implementation of Cardinality is just a std::shared_ptr
-// to const CardinalityInterface. Don't inherit from Cardinality!
-class GTEST_API_ Cardinality {
- public:
-  // Constructs a null cardinality.  Needed for storing Cardinality
-  // objects in STL containers.
-  Cardinality() {}
-
-  // Constructs a Cardinality from its implementation.
-  explicit Cardinality(const CardinalityInterface* impl) : impl_(impl) {}
-
-  // Conservative estimate on the lower/upper bound of the number of
-  // calls allowed.
-  int ConservativeLowerBound() const { return impl_->ConservativeLowerBound(); }
-  int ConservativeUpperBound() const { return impl_->ConservativeUpperBound(); }
-
-  // Returns true if and only if call_count calls will satisfy this
-  // cardinality.
-  bool IsSatisfiedByCallCount(int call_count) const {
-    return impl_->IsSatisfiedByCallCount(call_count);
-  }
-
-  // Returns true if and only if call_count calls will saturate this
-  // cardinality.
-  bool IsSaturatedByCallCount(int call_count) const {
-    return impl_->IsSaturatedByCallCount(call_count);
-  }
-
-  // Returns true if and only if call_count calls will over-saturate this
-  // cardinality, i.e. exceed the maximum number of allowed calls.
-  bool IsOverSaturatedByCallCount(int call_count) const {
-    return impl_->IsSaturatedByCallCount(call_count) &&
-           !impl_->IsSatisfiedByCallCount(call_count);
-  }
-
-  // Describes self to an ostream
-  void DescribeTo(::std::ostream* os) const { impl_->DescribeTo(os); }
-
-  // Describes the given actual call count to an ostream.
-  static void DescribeActualCallCountTo(int actual_call_count,
-                                        ::std::ostream* os);
-
- private:
-  std::shared_ptr<const CardinalityInterface> impl_;
-};
-
-// Creates a cardinality that allows at least n calls.
-GTEST_API_ Cardinality AtLeast(int n);
-
-// Creates a cardinality that allows at most n calls.
-GTEST_API_ Cardinality AtMost(int n);
-
-// Creates a cardinality that allows any number of calls.
-GTEST_API_ Cardinality AnyNumber();
-
-// Creates a cardinality that allows between min and max calls.
-GTEST_API_ Cardinality Between(int min, int max);
-
-// Creates a cardinality that allows exactly n calls.
-GTEST_API_ Cardinality Exactly(int n);
-
-// Creates a cardinality from its implementation.
-inline Cardinality MakeCardinality(const CardinalityInterface* c) {
-  return Cardinality(c);
-}
-
-}  // namespace testing
-
-GTEST_DISABLE_MSC_WARNINGS_POP_()  //  4251
-
-#endif  // GOOGLEMOCK_INCLUDE_GMOCK_GMOCK_CARDINALITIES_H_
diff --git a/3rdparty/googletest-1.13.0/googlemock/include/gmock/gmock-function-mocker.h b/3rdparty/googletest-1.13.0/googlemock/include/gmock/gmock-function-mocker.h
deleted file mode 100644
index 73065493b38b4fbe8bedaaeb690779769708cac1..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googlemock/include/gmock/gmock-function-mocker.h
+++ /dev/null
@@ -1,517 +0,0 @@
-// Copyright 2007, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// Google Mock - a framework for writing C++ mock classes.
-//
-// This file implements MOCK_METHOD.
-
-// IWYU pragma: private, include "gmock/gmock.h"
-// IWYU pragma: friend gmock/.*
-
-#ifndef GOOGLEMOCK_INCLUDE_GMOCK_GMOCK_FUNCTION_MOCKER_H_
-#define GOOGLEMOCK_INCLUDE_GMOCK_GMOCK_FUNCTION_MOCKER_H_
-
-#include <type_traits>  // IWYU pragma: keep
-#include <utility>      // IWYU pragma: keep
-
-#include "gmock/gmock-spec-builders.h"
-#include "gmock/internal/gmock-internal-utils.h"
-#include "gmock/internal/gmock-pp.h"
-
-namespace testing {
-namespace internal {
-template <typename T>
-using identity_t = T;
-
-template <typename Pattern>
-struct ThisRefAdjuster {
-  template <typename T>
-  using AdjustT = typename std::conditional<
-      std::is_const<typename std::remove_reference<Pattern>::type>::value,
-      typename std::conditional<std::is_lvalue_reference<Pattern>::value,
-                                const T&, const T&&>::type,
-      typename std::conditional<std::is_lvalue_reference<Pattern>::value, T&,
-                                T&&>::type>::type;
-
-  template <typename MockType>
-  static AdjustT<MockType> Adjust(const MockType& mock) {
-    return static_cast<AdjustT<MockType>>(const_cast<MockType&>(mock));
-  }
-};
-
-constexpr bool PrefixOf(const char* a, const char* b) {
-  return *a == 0 || (*a == *b && internal::PrefixOf(a + 1, b + 1));
-}
-
-template <int N, int M>
-constexpr bool StartsWith(const char (&prefix)[N], const char (&str)[M]) {
-  return N <= M && internal::PrefixOf(prefix, str);
-}
-
-template <int N, int M>
-constexpr bool EndsWith(const char (&suffix)[N], const char (&str)[M]) {
-  return N <= M && internal::PrefixOf(suffix, str + M - N);
-}
-
-template <int N, int M>
-constexpr bool Equals(const char (&a)[N], const char (&b)[M]) {
-  return N == M && internal::PrefixOf(a, b);
-}
-
-template <int N>
-constexpr bool ValidateSpec(const char (&spec)[N]) {
-  return internal::Equals("const", spec) ||
-         internal::Equals("override", spec) ||
-         internal::Equals("final", spec) ||
-         internal::Equals("noexcept", spec) ||
-         (internal::StartsWith("noexcept(", spec) &&
-          internal::EndsWith(")", spec)) ||
-         internal::Equals("ref(&)", spec) ||
-         internal::Equals("ref(&&)", spec) ||
-         (internal::StartsWith("Calltype(", spec) &&
-          internal::EndsWith(")", spec));
-}
-
-}  // namespace internal
-
-// The style guide prohibits "using" statements in a namespace scope
-// inside a header file.  However, the FunctionMocker class template
-// is meant to be defined in the ::testing namespace.  The following
-// line is just a trick for working around a bug in MSVC 8.0, which
-// cannot handle it if we define FunctionMocker in ::testing.
-using internal::FunctionMocker;
-}  // namespace testing
-
-#define MOCK_METHOD(...)                                               \
-  GMOCK_INTERNAL_WARNING_PUSH()                                        \
-  GMOCK_INTERNAL_WARNING_CLANG(ignored, "-Wunused-member-function")    \
-  GMOCK_PP_VARIADIC_CALL(GMOCK_INTERNAL_MOCK_METHOD_ARG_, __VA_ARGS__) \
-  GMOCK_INTERNAL_WARNING_POP()
-
-#define GMOCK_INTERNAL_MOCK_METHOD_ARG_1(...) \
-  GMOCK_INTERNAL_WRONG_ARITY(__VA_ARGS__)
-
-#define GMOCK_INTERNAL_MOCK_METHOD_ARG_2(...) \
-  GMOCK_INTERNAL_WRONG_ARITY(__VA_ARGS__)
-
-#define GMOCK_INTERNAL_MOCK_METHOD_ARG_3(_Ret, _MethodName, _Args) \
-  GMOCK_INTERNAL_MOCK_METHOD_ARG_4(_Ret, _MethodName, _Args, ())
-
-#define GMOCK_INTERNAL_MOCK_METHOD_ARG_4(_Ret, _MethodName, _Args, _Spec)  \
-  GMOCK_INTERNAL_ASSERT_PARENTHESIS(_Args);                                \
-  GMOCK_INTERNAL_ASSERT_PARENTHESIS(_Spec);                                \
-  GMOCK_INTERNAL_ASSERT_VALID_SIGNATURE(                                   \
-      GMOCK_PP_NARG0 _Args, GMOCK_INTERNAL_SIGNATURE(_Ret, _Args));        \
-  GMOCK_INTERNAL_ASSERT_VALID_SPEC(_Spec)                                  \
-  GMOCK_INTERNAL_MOCK_METHOD_IMPL(                                         \
-      GMOCK_PP_NARG0 _Args, _MethodName, GMOCK_INTERNAL_HAS_CONST(_Spec),  \
-      GMOCK_INTERNAL_HAS_OVERRIDE(_Spec), GMOCK_INTERNAL_HAS_FINAL(_Spec), \
-      GMOCK_INTERNAL_GET_NOEXCEPT_SPEC(_Spec),                             \
-      GMOCK_INTERNAL_GET_CALLTYPE_SPEC(_Spec),                             \
-      GMOCK_INTERNAL_GET_REF_SPEC(_Spec),                                  \
-      (GMOCK_INTERNAL_SIGNATURE(_Ret, _Args)))
-
-#define GMOCK_INTERNAL_MOCK_METHOD_ARG_5(...) \
-  GMOCK_INTERNAL_WRONG_ARITY(__VA_ARGS__)
-
-#define GMOCK_INTERNAL_MOCK_METHOD_ARG_6(...) \
-  GMOCK_INTERNAL_WRONG_ARITY(__VA_ARGS__)
-
-#define GMOCK_INTERNAL_MOCK_METHOD_ARG_7(...) \
-  GMOCK_INTERNAL_WRONG_ARITY(__VA_ARGS__)
-
-#define GMOCK_INTERNAL_WRONG_ARITY(...)                                      \
-  static_assert(                                                             \
-      false,                                                                 \
-      "MOCK_METHOD must be called with 3 or 4 arguments. _Ret, "             \
-      "_MethodName, _Args and optionally _Spec. _Args and _Spec must be "    \
-      "enclosed in parentheses. If _Ret is a type with unprotected commas, " \
-      "it must also be enclosed in parentheses.")
-
-#define GMOCK_INTERNAL_ASSERT_PARENTHESIS(_Tuple) \
-  static_assert(                                  \
-      GMOCK_PP_IS_ENCLOSED_PARENS(_Tuple),        \
-      GMOCK_PP_STRINGIZE(_Tuple) " should be enclosed in parentheses.")
-
-#define GMOCK_INTERNAL_ASSERT_VALID_SIGNATURE(_N, ...)                 \
-  static_assert(                                                       \
-      std::is_function<__VA_ARGS__>::value,                            \
-      "Signature must be a function type, maybe return type contains " \
-      "unprotected comma.");                                           \
-  static_assert(                                                       \
-      ::testing::tuple_size<typename ::testing::internal::Function<    \
-              __VA_ARGS__>::ArgumentTuple>::value == _N,               \
-      "This method does not take " GMOCK_PP_STRINGIZE(                 \
-          _N) " arguments. Parenthesize all types with unprotected commas.")
-
-#define GMOCK_INTERNAL_ASSERT_VALID_SPEC(_Spec) \
-  GMOCK_PP_FOR_EACH(GMOCK_INTERNAL_ASSERT_VALID_SPEC_ELEMENT, ~, _Spec)
-
-#define GMOCK_INTERNAL_MOCK_METHOD_IMPL(_N, _MethodName, _Constness,           \
-                                        _Override, _Final, _NoexceptSpec,      \
-                                        _CallType, _RefSpec, _Signature)       \
-  typename ::testing::internal::Function<GMOCK_PP_REMOVE_PARENS(               \
-      _Signature)>::Result                                                     \
-  GMOCK_INTERNAL_EXPAND(_CallType)                                             \
-      _MethodName(GMOCK_PP_REPEAT(GMOCK_INTERNAL_PARAMETER, _Signature, _N))   \
-          GMOCK_PP_IF(_Constness, const, ) _RefSpec _NoexceptSpec              \
-          GMOCK_PP_IF(_Override, override, ) GMOCK_PP_IF(_Final, final, ) {    \
-    GMOCK_MOCKER_(_N, _Constness, _MethodName)                                 \
-        .SetOwnerAndName(this, #_MethodName);                                  \
-    return GMOCK_MOCKER_(_N, _Constness, _MethodName)                          \
-        .Invoke(GMOCK_PP_REPEAT(GMOCK_INTERNAL_FORWARD_ARG, _Signature, _N));  \
-  }                                                                            \
-  ::testing::MockSpec<GMOCK_PP_REMOVE_PARENS(_Signature)> gmock_##_MethodName( \
-      GMOCK_PP_REPEAT(GMOCK_INTERNAL_MATCHER_PARAMETER, _Signature, _N))       \
-      GMOCK_PP_IF(_Constness, const, ) _RefSpec {                              \
-    GMOCK_MOCKER_(_N, _Constness, _MethodName).RegisterOwner(this);            \
-    return GMOCK_MOCKER_(_N, _Constness, _MethodName)                          \
-        .With(GMOCK_PP_REPEAT(GMOCK_INTERNAL_MATCHER_ARGUMENT, , _N));         \
-  }                                                                            \
-  ::testing::MockSpec<GMOCK_PP_REMOVE_PARENS(_Signature)> gmock_##_MethodName( \
-      const ::testing::internal::WithoutMatchers&,                             \
-      GMOCK_PP_IF(_Constness, const, )::testing::internal::Function<           \
-          GMOCK_PP_REMOVE_PARENS(_Signature)>*) const _RefSpec _NoexceptSpec { \
-    return ::testing::internal::ThisRefAdjuster<GMOCK_PP_IF(                   \
-        _Constness, const, ) int _RefSpec>::Adjust(*this)                      \
-        .gmock_##_MethodName(GMOCK_PP_REPEAT(                                  \
-            GMOCK_INTERNAL_A_MATCHER_ARGUMENT, _Signature, _N));               \
-  }                                                                            \
-  mutable ::testing::FunctionMocker<GMOCK_PP_REMOVE_PARENS(_Signature)>        \
-  GMOCK_MOCKER_(_N, _Constness, _MethodName)
-
-#define GMOCK_INTERNAL_EXPAND(...) __VA_ARGS__
-
-// Valid modifiers.
-#define GMOCK_INTERNAL_HAS_CONST(_Tuple) \
-  GMOCK_PP_HAS_COMMA(GMOCK_PP_FOR_EACH(GMOCK_INTERNAL_DETECT_CONST, ~, _Tuple))
-
-#define GMOCK_INTERNAL_HAS_OVERRIDE(_Tuple) \
-  GMOCK_PP_HAS_COMMA(                       \
-      GMOCK_PP_FOR_EACH(GMOCK_INTERNAL_DETECT_OVERRIDE, ~, _Tuple))
-
-#define GMOCK_INTERNAL_HAS_FINAL(_Tuple) \
-  GMOCK_PP_HAS_COMMA(GMOCK_PP_FOR_EACH(GMOCK_INTERNAL_DETECT_FINAL, ~, _Tuple))
-
-#define GMOCK_INTERNAL_GET_NOEXCEPT_SPEC(_Tuple) \
-  GMOCK_PP_FOR_EACH(GMOCK_INTERNAL_NOEXCEPT_SPEC_IF_NOEXCEPT, ~, _Tuple)
-
-#define GMOCK_INTERNAL_NOEXCEPT_SPEC_IF_NOEXCEPT(_i, _, _elem)          \
-  GMOCK_PP_IF(                                                          \
-      GMOCK_PP_HAS_COMMA(GMOCK_INTERNAL_DETECT_NOEXCEPT(_i, _, _elem)), \
-      _elem, )
-
-#define GMOCK_INTERNAL_GET_CALLTYPE_SPEC(_Tuple) \
-  GMOCK_PP_FOR_EACH(GMOCK_INTERNAL_CALLTYPE_SPEC_IF_CALLTYPE, ~, _Tuple)
-
-#define GMOCK_INTERNAL_CALLTYPE_SPEC_IF_CALLTYPE(_i, _, _elem)          \
-  GMOCK_PP_IF(                                                          \
-      GMOCK_PP_HAS_COMMA(GMOCK_INTERNAL_DETECT_CALLTYPE(_i, _, _elem)), \
-      GMOCK_PP_CAT(GMOCK_INTERNAL_UNPACK_, _elem), )
-
-#define GMOCK_INTERNAL_GET_REF_SPEC(_Tuple) \
-  GMOCK_PP_FOR_EACH(GMOCK_INTERNAL_REF_SPEC_IF_REF, ~, _Tuple)
-
-#define GMOCK_INTERNAL_REF_SPEC_IF_REF(_i, _, _elem)                       \
-  GMOCK_PP_IF(GMOCK_PP_HAS_COMMA(GMOCK_INTERNAL_DETECT_REF(_i, _, _elem)), \
-              GMOCK_PP_CAT(GMOCK_INTERNAL_UNPACK_, _elem), )
-
-#ifdef GMOCK_INTERNAL_STRICT_SPEC_ASSERT
-#define GMOCK_INTERNAL_ASSERT_VALID_SPEC_ELEMENT(_i, _, _elem) \
-  static_assert(                                                     \
-      ::testing::internal::ValidateSpec(GMOCK_PP_STRINGIZE(_elem)),  \
-      "Token \'" GMOCK_PP_STRINGIZE(                                 \
-          _elem) "\' cannot be recognized as a valid specification " \
-                 "modifier. Is a ',' missing?");
-#else
-#define GMOCK_INTERNAL_ASSERT_VALID_SPEC_ELEMENT(_i, _, _elem)                 \
-  static_assert(                                                               \
-      (GMOCK_PP_HAS_COMMA(GMOCK_INTERNAL_DETECT_CONST(_i, _, _elem)) +         \
-       GMOCK_PP_HAS_COMMA(GMOCK_INTERNAL_DETECT_OVERRIDE(_i, _, _elem)) +      \
-       GMOCK_PP_HAS_COMMA(GMOCK_INTERNAL_DETECT_FINAL(_i, _, _elem)) +         \
-       GMOCK_PP_HAS_COMMA(GMOCK_INTERNAL_DETECT_NOEXCEPT(_i, _, _elem)) +      \
-       GMOCK_PP_HAS_COMMA(GMOCK_INTERNAL_DETECT_REF(_i, _, _elem)) +           \
-       GMOCK_PP_HAS_COMMA(GMOCK_INTERNAL_DETECT_CALLTYPE(_i, _, _elem))) == 1, \
-      GMOCK_PP_STRINGIZE(                                                      \
-          _elem) " cannot be recognized as a valid specification modifier.");
-#endif  // GMOCK_INTERNAL_STRICT_SPEC_ASSERT
-
-// Modifiers implementation.
-#define GMOCK_INTERNAL_DETECT_CONST(_i, _, _elem) \
-  GMOCK_PP_CAT(GMOCK_INTERNAL_DETECT_CONST_I_, _elem)
-
-#define GMOCK_INTERNAL_DETECT_CONST_I_const ,
-
-#define GMOCK_INTERNAL_DETECT_OVERRIDE(_i, _, _elem) \
-  GMOCK_PP_CAT(GMOCK_INTERNAL_DETECT_OVERRIDE_I_, _elem)
-
-#define GMOCK_INTERNAL_DETECT_OVERRIDE_I_override ,
-
-#define GMOCK_INTERNAL_DETECT_FINAL(_i, _, _elem) \
-  GMOCK_PP_CAT(GMOCK_INTERNAL_DETECT_FINAL_I_, _elem)
-
-#define GMOCK_INTERNAL_DETECT_FINAL_I_final ,
-
-#define GMOCK_INTERNAL_DETECT_NOEXCEPT(_i, _, _elem) \
-  GMOCK_PP_CAT(GMOCK_INTERNAL_DETECT_NOEXCEPT_I_, _elem)
-
-#define GMOCK_INTERNAL_DETECT_NOEXCEPT_I_noexcept ,
-
-#define GMOCK_INTERNAL_DETECT_REF(_i, _, _elem) \
-  GMOCK_PP_CAT(GMOCK_INTERNAL_DETECT_REF_I_, _elem)
-
-#define GMOCK_INTERNAL_DETECT_REF_I_ref ,
-
-#define GMOCK_INTERNAL_UNPACK_ref(x) x
-
-#define GMOCK_INTERNAL_DETECT_CALLTYPE(_i, _, _elem) \
-  GMOCK_PP_CAT(GMOCK_INTERNAL_DETECT_CALLTYPE_I_, _elem)
-
-#define GMOCK_INTERNAL_DETECT_CALLTYPE_I_Calltype ,
-
-#define GMOCK_INTERNAL_UNPACK_Calltype(...) __VA_ARGS__
-
-// Note: The use of `identity_t` here allows _Ret to represent return types that
-// would normally need to be specified in a different way. For example, a method
-// returning a function pointer must be written as
-//
-// fn_ptr_return_t (*method(method_args_t...))(fn_ptr_args_t...)
-//
-// But we only support placing the return type at the beginning. To handle this,
-// we wrap all calls in identity_t, so that a declaration will be expanded to
-//
-// identity_t<fn_ptr_return_t (*)(fn_ptr_args_t...)> method(method_args_t...)
-//
-// This allows us to work around the syntactic oddities of function/method
-// types.
-#define GMOCK_INTERNAL_SIGNATURE(_Ret, _Args)                                 \
-  ::testing::internal::identity_t<GMOCK_PP_IF(GMOCK_PP_IS_BEGIN_PARENS(_Ret), \
-                                              GMOCK_PP_REMOVE_PARENS,         \
-                                              GMOCK_PP_IDENTITY)(_Ret)>(      \
-      GMOCK_PP_FOR_EACH(GMOCK_INTERNAL_GET_TYPE, _, _Args))
-
-#define GMOCK_INTERNAL_GET_TYPE(_i, _, _elem)                          \
-  GMOCK_PP_COMMA_IF(_i)                                                \
-  GMOCK_PP_IF(GMOCK_PP_IS_BEGIN_PARENS(_elem), GMOCK_PP_REMOVE_PARENS, \
-              GMOCK_PP_IDENTITY)                                       \
-  (_elem)
-
-#define GMOCK_INTERNAL_PARAMETER(_i, _Signature, _)            \
-  GMOCK_PP_COMMA_IF(_i)                                        \
-  GMOCK_INTERNAL_ARG_O(_i, GMOCK_PP_REMOVE_PARENS(_Signature)) \
-  gmock_a##_i
-
-#define GMOCK_INTERNAL_FORWARD_ARG(_i, _Signature, _) \
-  GMOCK_PP_COMMA_IF(_i)                               \
-  ::std::forward<GMOCK_INTERNAL_ARG_O(                \
-      _i, GMOCK_PP_REMOVE_PARENS(_Signature))>(gmock_a##_i)
-
-#define GMOCK_INTERNAL_MATCHER_PARAMETER(_i, _Signature, _)        \
-  GMOCK_PP_COMMA_IF(_i)                                            \
-  GMOCK_INTERNAL_MATCHER_O(_i, GMOCK_PP_REMOVE_PARENS(_Signature)) \
-  gmock_a##_i
-
-#define GMOCK_INTERNAL_MATCHER_ARGUMENT(_i, _1, _2) \
-  GMOCK_PP_COMMA_IF(_i)                             \
-  gmock_a##_i
-
-#define GMOCK_INTERNAL_A_MATCHER_ARGUMENT(_i, _Signature, _) \
-  GMOCK_PP_COMMA_IF(_i)                                      \
-  ::testing::A<GMOCK_INTERNAL_ARG_O(_i, GMOCK_PP_REMOVE_PARENS(_Signature))>()
-
-#define GMOCK_INTERNAL_ARG_O(_i, ...) \
-  typename ::testing::internal::Function<__VA_ARGS__>::template Arg<_i>::type
-
-#define GMOCK_INTERNAL_MATCHER_O(_i, ...)                          \
-  const ::testing::Matcher<typename ::testing::internal::Function< \
-      __VA_ARGS__>::template Arg<_i>::type>&
-
-#define MOCK_METHOD0(m, ...) GMOCK_INTERNAL_MOCK_METHODN(, , m, 0, __VA_ARGS__)
-#define MOCK_METHOD1(m, ...) GMOCK_INTERNAL_MOCK_METHODN(, , m, 1, __VA_ARGS__)
-#define MOCK_METHOD2(m, ...) GMOCK_INTERNAL_MOCK_METHODN(, , m, 2, __VA_ARGS__)
-#define MOCK_METHOD3(m, ...) GMOCK_INTERNAL_MOCK_METHODN(, , m, 3, __VA_ARGS__)
-#define MOCK_METHOD4(m, ...) GMOCK_INTERNAL_MOCK_METHODN(, , m, 4, __VA_ARGS__)
-#define MOCK_METHOD5(m, ...) GMOCK_INTERNAL_MOCK_METHODN(, , m, 5, __VA_ARGS__)
-#define MOCK_METHOD6(m, ...) GMOCK_INTERNAL_MOCK_METHODN(, , m, 6, __VA_ARGS__)
-#define MOCK_METHOD7(m, ...) GMOCK_INTERNAL_MOCK_METHODN(, , m, 7, __VA_ARGS__)
-#define MOCK_METHOD8(m, ...) GMOCK_INTERNAL_MOCK_METHODN(, , m, 8, __VA_ARGS__)
-#define MOCK_METHOD9(m, ...) GMOCK_INTERNAL_MOCK_METHODN(, , m, 9, __VA_ARGS__)
-#define MOCK_METHOD10(m, ...) \
-  GMOCK_INTERNAL_MOCK_METHODN(, , m, 10, __VA_ARGS__)
-
-#define MOCK_CONST_METHOD0(m, ...) \
-  GMOCK_INTERNAL_MOCK_METHODN(const, , m, 0, __VA_ARGS__)
-#define MOCK_CONST_METHOD1(m, ...) \
-  GMOCK_INTERNAL_MOCK_METHODN(const, , m, 1, __VA_ARGS__)
-#define MOCK_CONST_METHOD2(m, ...) \
-  GMOCK_INTERNAL_MOCK_METHODN(const, , m, 2, __VA_ARGS__)
-#define MOCK_CONST_METHOD3(m, ...) \
-  GMOCK_INTERNAL_MOCK_METHODN(const, , m, 3, __VA_ARGS__)
-#define MOCK_CONST_METHOD4(m, ...) \
-  GMOCK_INTERNAL_MOCK_METHODN(const, , m, 4, __VA_ARGS__)
-#define MOCK_CONST_METHOD5(m, ...) \
-  GMOCK_INTERNAL_MOCK_METHODN(const, , m, 5, __VA_ARGS__)
-#define MOCK_CONST_METHOD6(m, ...) \
-  GMOCK_INTERNAL_MOCK_METHODN(const, , m, 6, __VA_ARGS__)
-#define MOCK_CONST_METHOD7(m, ...) \
-  GMOCK_INTERNAL_MOCK_METHODN(const, , m, 7, __VA_ARGS__)
-#define MOCK_CONST_METHOD8(m, ...) \
-  GMOCK_INTERNAL_MOCK_METHODN(const, , m, 8, __VA_ARGS__)
-#define MOCK_CONST_METHOD9(m, ...) \
-  GMOCK_INTERNAL_MOCK_METHODN(const, , m, 9, __VA_ARGS__)
-#define MOCK_CONST_METHOD10(m, ...) \
-  GMOCK_INTERNAL_MOCK_METHODN(const, , m, 10, __VA_ARGS__)
-
-#define MOCK_METHOD0_T(m, ...) MOCK_METHOD0(m, __VA_ARGS__)
-#define MOCK_METHOD1_T(m, ...) MOCK_METHOD1(m, __VA_ARGS__)
-#define MOCK_METHOD2_T(m, ...) MOCK_METHOD2(m, __VA_ARGS__)
-#define MOCK_METHOD3_T(m, ...) MOCK_METHOD3(m, __VA_ARGS__)
-#define MOCK_METHOD4_T(m, ...) MOCK_METHOD4(m, __VA_ARGS__)
-#define MOCK_METHOD5_T(m, ...) MOCK_METHOD5(m, __VA_ARGS__)
-#define MOCK_METHOD6_T(m, ...) MOCK_METHOD6(m, __VA_ARGS__)
-#define MOCK_METHOD7_T(m, ...) MOCK_METHOD7(m, __VA_ARGS__)
-#define MOCK_METHOD8_T(m, ...) MOCK_METHOD8(m, __VA_ARGS__)
-#define MOCK_METHOD9_T(m, ...) MOCK_METHOD9(m, __VA_ARGS__)
-#define MOCK_METHOD10_T(m, ...) MOCK_METHOD10(m, __VA_ARGS__)
-
-#define MOCK_CONST_METHOD0_T(m, ...) MOCK_CONST_METHOD0(m, __VA_ARGS__)
-#define MOCK_CONST_METHOD1_T(m, ...) MOCK_CONST_METHOD1(m, __VA_ARGS__)
-#define MOCK_CONST_METHOD2_T(m, ...) MOCK_CONST_METHOD2(m, __VA_ARGS__)
-#define MOCK_CONST_METHOD3_T(m, ...) MOCK_CONST_METHOD3(m, __VA_ARGS__)
-#define MOCK_CONST_METHOD4_T(m, ...) MOCK_CONST_METHOD4(m, __VA_ARGS__)
-#define MOCK_CONST_METHOD5_T(m, ...) MOCK_CONST_METHOD5(m, __VA_ARGS__)
-#define MOCK_CONST_METHOD6_T(m, ...) MOCK_CONST_METHOD6(m, __VA_ARGS__)
-#define MOCK_CONST_METHOD7_T(m, ...) MOCK_CONST_METHOD7(m, __VA_ARGS__)
-#define MOCK_CONST_METHOD8_T(m, ...) MOCK_CONST_METHOD8(m, __VA_ARGS__)
-#define MOCK_CONST_METHOD9_T(m, ...) MOCK_CONST_METHOD9(m, __VA_ARGS__)
-#define MOCK_CONST_METHOD10_T(m, ...) MOCK_CONST_METHOD10(m, __VA_ARGS__)
-
-#define MOCK_METHOD0_WITH_CALLTYPE(ct, m, ...) \
-  GMOCK_INTERNAL_MOCK_METHODN(, ct, m, 0, __VA_ARGS__)
-#define MOCK_METHOD1_WITH_CALLTYPE(ct, m, ...) \
-  GMOCK_INTERNAL_MOCK_METHODN(, ct, m, 1, __VA_ARGS__)
-#define MOCK_METHOD2_WITH_CALLTYPE(ct, m, ...) \
-  GMOCK_INTERNAL_MOCK_METHODN(, ct, m, 2, __VA_ARGS__)
-#define MOCK_METHOD3_WITH_CALLTYPE(ct, m, ...) \
-  GMOCK_INTERNAL_MOCK_METHODN(, ct, m, 3, __VA_ARGS__)
-#define MOCK_METHOD4_WITH_CALLTYPE(ct, m, ...) \
-  GMOCK_INTERNAL_MOCK_METHODN(, ct, m, 4, __VA_ARGS__)
-#define MOCK_METHOD5_WITH_CALLTYPE(ct, m, ...) \
-  GMOCK_INTERNAL_MOCK_METHODN(, ct, m, 5, __VA_ARGS__)
-#define MOCK_METHOD6_WITH_CALLTYPE(ct, m, ...) \
-  GMOCK_INTERNAL_MOCK_METHODN(, ct, m, 6, __VA_ARGS__)
-#define MOCK_METHOD7_WITH_CALLTYPE(ct, m, ...) \
-  GMOCK_INTERNAL_MOCK_METHODN(, ct, m, 7, __VA_ARGS__)
-#define MOCK_METHOD8_WITH_CALLTYPE(ct, m, ...) \
-  GMOCK_INTERNAL_MOCK_METHODN(, ct, m, 8, __VA_ARGS__)
-#define MOCK_METHOD9_WITH_CALLTYPE(ct, m, ...) \
-  GMOCK_INTERNAL_MOCK_METHODN(, ct, m, 9, __VA_ARGS__)
-#define MOCK_METHOD10_WITH_CALLTYPE(ct, m, ...) \
-  GMOCK_INTERNAL_MOCK_METHODN(, ct, m, 10, __VA_ARGS__)
-
-#define MOCK_CONST_METHOD0_WITH_CALLTYPE(ct, m, ...) \
-  GMOCK_INTERNAL_MOCK_METHODN(const, ct, m, 0, __VA_ARGS__)
-#define MOCK_CONST_METHOD1_WITH_CALLTYPE(ct, m, ...) \
-  GMOCK_INTERNAL_MOCK_METHODN(const, ct, m, 1, __VA_ARGS__)
-#define MOCK_CONST_METHOD2_WITH_CALLTYPE(ct, m, ...) \
-  GMOCK_INTERNAL_MOCK_METHODN(const, ct, m, 2, __VA_ARGS__)
-#define MOCK_CONST_METHOD3_WITH_CALLTYPE(ct, m, ...) \
-  GMOCK_INTERNAL_MOCK_METHODN(const, ct, m, 3, __VA_ARGS__)
-#define MOCK_CONST_METHOD4_WITH_CALLTYPE(ct, m, ...) \
-  GMOCK_INTERNAL_MOCK_METHODN(const, ct, m, 4, __VA_ARGS__)
-#define MOCK_CONST_METHOD5_WITH_CALLTYPE(ct, m, ...) \
-  GMOCK_INTERNAL_MOCK_METHODN(const, ct, m, 5, __VA_ARGS__)
-#define MOCK_CONST_METHOD6_WITH_CALLTYPE(ct, m, ...) \
-  GMOCK_INTERNAL_MOCK_METHODN(const, ct, m, 6, __VA_ARGS__)
-#define MOCK_CONST_METHOD7_WITH_CALLTYPE(ct, m, ...) \
-  GMOCK_INTERNAL_MOCK_METHODN(const, ct, m, 7, __VA_ARGS__)
-#define MOCK_CONST_METHOD8_WITH_CALLTYPE(ct, m, ...) \
-  GMOCK_INTERNAL_MOCK_METHODN(const, ct, m, 8, __VA_ARGS__)
-#define MOCK_CONST_METHOD9_WITH_CALLTYPE(ct, m, ...) \
-  GMOCK_INTERNAL_MOCK_METHODN(const, ct, m, 9, __VA_ARGS__)
-#define MOCK_CONST_METHOD10_WITH_CALLTYPE(ct, m, ...) \
-  GMOCK_INTERNAL_MOCK_METHODN(const, ct, m, 10, __VA_ARGS__)
-
-#define MOCK_METHOD0_T_WITH_CALLTYPE(ct, m, ...) \
-  MOCK_METHOD0_WITH_CALLTYPE(ct, m, __VA_ARGS__)
-#define MOCK_METHOD1_T_WITH_CALLTYPE(ct, m, ...) \
-  MOCK_METHOD1_WITH_CALLTYPE(ct, m, __VA_ARGS__)
-#define MOCK_METHOD2_T_WITH_CALLTYPE(ct, m, ...) \
-  MOCK_METHOD2_WITH_CALLTYPE(ct, m, __VA_ARGS__)
-#define MOCK_METHOD3_T_WITH_CALLTYPE(ct, m, ...) \
-  MOCK_METHOD3_WITH_CALLTYPE(ct, m, __VA_ARGS__)
-#define MOCK_METHOD4_T_WITH_CALLTYPE(ct, m, ...) \
-  MOCK_METHOD4_WITH_CALLTYPE(ct, m, __VA_ARGS__)
-#define MOCK_METHOD5_T_WITH_CALLTYPE(ct, m, ...) \
-  MOCK_METHOD5_WITH_CALLTYPE(ct, m, __VA_ARGS__)
-#define MOCK_METHOD6_T_WITH_CALLTYPE(ct, m, ...) \
-  MOCK_METHOD6_WITH_CALLTYPE(ct, m, __VA_ARGS__)
-#define MOCK_METHOD7_T_WITH_CALLTYPE(ct, m, ...) \
-  MOCK_METHOD7_WITH_CALLTYPE(ct, m, __VA_ARGS__)
-#define MOCK_METHOD8_T_WITH_CALLTYPE(ct, m, ...) \
-  MOCK_METHOD8_WITH_CALLTYPE(ct, m, __VA_ARGS__)
-#define MOCK_METHOD9_T_WITH_CALLTYPE(ct, m, ...) \
-  MOCK_METHOD9_WITH_CALLTYPE(ct, m, __VA_ARGS__)
-#define MOCK_METHOD10_T_WITH_CALLTYPE(ct, m, ...) \
-  MOCK_METHOD10_WITH_CALLTYPE(ct, m, __VA_ARGS__)
-
-#define MOCK_CONST_METHOD0_T_WITH_CALLTYPE(ct, m, ...) \
-  MOCK_CONST_METHOD0_WITH_CALLTYPE(ct, m, __VA_ARGS__)
-#define MOCK_CONST_METHOD1_T_WITH_CALLTYPE(ct, m, ...) \
-  MOCK_CONST_METHOD1_WITH_CALLTYPE(ct, m, __VA_ARGS__)
-#define MOCK_CONST_METHOD2_T_WITH_CALLTYPE(ct, m, ...) \
-  MOCK_CONST_METHOD2_WITH_CALLTYPE(ct, m, __VA_ARGS__)
-#define MOCK_CONST_METHOD3_T_WITH_CALLTYPE(ct, m, ...) \
-  MOCK_CONST_METHOD3_WITH_CALLTYPE(ct, m, __VA_ARGS__)
-#define MOCK_CONST_METHOD4_T_WITH_CALLTYPE(ct, m, ...) \
-  MOCK_CONST_METHOD4_WITH_CALLTYPE(ct, m, __VA_ARGS__)
-#define MOCK_CONST_METHOD5_T_WITH_CALLTYPE(ct, m, ...) \
-  MOCK_CONST_METHOD5_WITH_CALLTYPE(ct, m, __VA_ARGS__)
-#define MOCK_CONST_METHOD6_T_WITH_CALLTYPE(ct, m, ...) \
-  MOCK_CONST_METHOD6_WITH_CALLTYPE(ct, m, __VA_ARGS__)
-#define MOCK_CONST_METHOD7_T_WITH_CALLTYPE(ct, m, ...) \
-  MOCK_CONST_METHOD7_WITH_CALLTYPE(ct, m, __VA_ARGS__)
-#define MOCK_CONST_METHOD8_T_WITH_CALLTYPE(ct, m, ...) \
-  MOCK_CONST_METHOD8_WITH_CALLTYPE(ct, m, __VA_ARGS__)
-#define MOCK_CONST_METHOD9_T_WITH_CALLTYPE(ct, m, ...) \
-  MOCK_CONST_METHOD9_WITH_CALLTYPE(ct, m, __VA_ARGS__)
-#define MOCK_CONST_METHOD10_T_WITH_CALLTYPE(ct, m, ...) \
-  MOCK_CONST_METHOD10_WITH_CALLTYPE(ct, m, __VA_ARGS__)
-
-#define GMOCK_INTERNAL_MOCK_METHODN(constness, ct, Method, args_num, ...) \
-  GMOCK_INTERNAL_ASSERT_VALID_SIGNATURE(                                  \
-      args_num, ::testing::internal::identity_t<__VA_ARGS__>);            \
-  GMOCK_INTERNAL_MOCK_METHOD_IMPL(                                        \
-      args_num, Method, GMOCK_PP_NARG0(constness), 0, 0, , ct, ,          \
-      (::testing::internal::identity_t<__VA_ARGS__>))
-
-#define GMOCK_MOCKER_(arity, constness, Method) \
-  GTEST_CONCAT_TOKEN_(gmock##constness##arity##_##Method##_, __LINE__)
-
-#endif  // GOOGLEMOCK_INCLUDE_GMOCK_GMOCK_FUNCTION_MOCKER_H_
diff --git a/3rdparty/googletest-1.13.0/googlemock/include/gmock/gmock-matchers.h b/3rdparty/googletest-1.13.0/googlemock/include/gmock/gmock-matchers.h
deleted file mode 100644
index 9e634f7f1c47b267c2e2ddf5392445cb393ab669..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googlemock/include/gmock/gmock-matchers.h
+++ /dev/null
@@ -1,5620 +0,0 @@
-// Copyright 2007, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// Google Mock - a framework for writing C++ mock classes.
-//
-// The MATCHER* family of macros can be used in a namespace scope to
-// define custom matchers easily.
-//
-// Basic Usage
-// ===========
-//
-// The syntax
-//
-//   MATCHER(name, description_string) { statements; }
-//
-// defines a matcher with the given name that executes the statements,
-// which must return a bool to indicate if the match succeeds.  Inside
-// the statements, you can refer to the value being matched by 'arg',
-// and refer to its type by 'arg_type'.
-//
-// The description string documents what the matcher does, and is used
-// to generate the failure message when the match fails.  Since a
-// MATCHER() is usually defined in a header file shared by multiple
-// C++ source files, we require the description to be a C-string
-// literal to avoid possible side effects.  It can be empty, in which
-// case we'll use the sequence of words in the matcher name as the
-// description.
-//
-// For example:
-//
-//   MATCHER(IsEven, "") { return (arg % 2) == 0; }
-//
-// allows you to write
-//
-//   // Expects mock_foo.Bar(n) to be called where n is even.
-//   EXPECT_CALL(mock_foo, Bar(IsEven()));
-//
-// or,
-//
-//   // Verifies that the value of some_expression is even.
-//   EXPECT_THAT(some_expression, IsEven());
-//
-// If the above assertion fails, it will print something like:
-//
-//   Value of: some_expression
-//   Expected: is even
-//     Actual: 7
-//
-// where the description "is even" is automatically calculated from the
-// matcher name IsEven.
-//
-// Argument Type
-// =============
-//
-// Note that the type of the value being matched (arg_type) is
-// determined by the context in which you use the matcher and is
-// supplied to you by the compiler, so you don't need to worry about
-// declaring it (nor can you).  This allows the matcher to be
-// polymorphic.  For example, IsEven() can be used to match any type
-// where the value of "(arg % 2) == 0" can be implicitly converted to
-// a bool.  In the "Bar(IsEven())" example above, if method Bar()
-// takes an int, 'arg_type' will be int; if it takes an unsigned long,
-// 'arg_type' will be unsigned long; and so on.
-//
-// Parameterizing Matchers
-// =======================
-//
-// Sometimes you'll want to parameterize the matcher.  For that you
-// can use another macro:
-//
-//   MATCHER_P(name, param_name, description_string) { statements; }
-//
-// For example:
-//
-//   MATCHER_P(HasAbsoluteValue, value, "") { return abs(arg) == value; }
-//
-// will allow you to write:
-//
-//   EXPECT_THAT(Blah("a"), HasAbsoluteValue(n));
-//
-// which may lead to this message (assuming n is 10):
-//
-//   Value of: Blah("a")
-//   Expected: has absolute value 10
-//     Actual: -9
-//
-// Note that both the matcher description and its parameter are
-// printed, making the message human-friendly.
-//
-// In the matcher definition body, you can write 'foo_type' to
-// reference the type of a parameter named 'foo'.  For example, in the
-// body of MATCHER_P(HasAbsoluteValue, value) above, you can write
-// 'value_type' to refer to the type of 'value'.
-//
-// We also provide MATCHER_P2, MATCHER_P3, ..., up to MATCHER_P$n to
-// support multi-parameter matchers.
-//
-// Describing Parameterized Matchers
-// =================================
-//
-// The last argument to MATCHER*() is a string-typed expression.  The
-// expression can reference all of the matcher's parameters and a
-// special bool-typed variable named 'negation'.  When 'negation' is
-// false, the expression should evaluate to the matcher's description;
-// otherwise it should evaluate to the description of the negation of
-// the matcher.  For example,
-//
-//   using testing::PrintToString;
-//
-//   MATCHER_P2(InClosedRange, low, hi,
-//       std::string(negation ? "is not" : "is") + " in range [" +
-//       PrintToString(low) + ", " + PrintToString(hi) + "]") {
-//     return low <= arg && arg <= hi;
-//   }
-//   ...
-//   EXPECT_THAT(3, InClosedRange(4, 6));
-//   EXPECT_THAT(3, Not(InClosedRange(2, 4)));
-//
-// would generate two failures that contain the text:
-//
-//   Expected: is in range [4, 6]
-//   ...
-//   Expected: is not in range [2, 4]
-//
-// If you specify "" as the description, the failure message will
-// contain the sequence of words in the matcher name followed by the
-// parameter values printed as a tuple.  For example,
-//
-//   MATCHER_P2(InClosedRange, low, hi, "") { ... }
-//   ...
-//   EXPECT_THAT(3, InClosedRange(4, 6));
-//   EXPECT_THAT(3, Not(InClosedRange(2, 4)));
-//
-// would generate two failures that contain the text:
-//
-//   Expected: in closed range (4, 6)
-//   ...
-//   Expected: not (in closed range (2, 4))
-//
-// Types of Matcher Parameters
-// ===========================
-//
-// For the purpose of typing, you can view
-//
-//   MATCHER_Pk(Foo, p1, ..., pk, description_string) { ... }
-//
-// as shorthand for
-//
-//   template <typename p1_type, ..., typename pk_type>
-//   FooMatcherPk<p1_type, ..., pk_type>
-//   Foo(p1_type p1, ..., pk_type pk) { ... }
-//
-// When you write Foo(v1, ..., vk), the compiler infers the types of
-// the parameters v1, ..., and vk for you.  If you are not happy with
-// the result of the type inference, you can specify the types by
-// explicitly instantiating the template, as in Foo<long, bool>(5,
-// false).  As said earlier, you don't get to (or need to) specify
-// 'arg_type' as that's determined by the context in which the matcher
-// is used.  You can assign the result of expression Foo(p1, ..., pk)
-// to a variable of type FooMatcherPk<p1_type, ..., pk_type>.  This
-// can be useful when composing matchers.
-//
-// While you can instantiate a matcher template with reference types,
-// passing the parameters by pointer usually makes your code more
-// readable.  If, however, you still want to pass a parameter by
-// reference, be aware that in the failure message generated by the
-// matcher you will see the value of the referenced object but not its
-// address.
-//
-// Explaining Match Results
-// ========================
-//
-// Sometimes the matcher description alone isn't enough to explain why
-// the match has failed or succeeded.  For example, when expecting a
-// long string, it can be very helpful to also print the diff between
-// the expected string and the actual one.  To achieve that, you can
-// optionally stream additional information to a special variable
-// named result_listener, whose type is a pointer to class
-// MatchResultListener:
-//
-//   MATCHER_P(EqualsLongString, str, "") {
-//     if (arg == str) return true;
-//
-//     *result_listener << "the difference: "
-///                     << DiffStrings(str, arg);
-//     return false;
-//   }
-//
-// Overloading Matchers
-// ====================
-//
-// You can overload matchers with different numbers of parameters:
-//
-//   MATCHER_P(Blah, a, description_string1) { ... }
-//   MATCHER_P2(Blah, a, b, description_string2) { ... }
-//
-// Caveats
-// =======
-//
-// When defining a new matcher, you should also consider implementing
-// MatcherInterface or using MakePolymorphicMatcher().  These
-// approaches require more work than the MATCHER* macros, but also
-// give you more control on the types of the value being matched and
-// the matcher parameters, which may leads to better compiler error
-// messages when the matcher is used wrong.  They also allow
-// overloading matchers based on parameter types (as opposed to just
-// based on the number of parameters).
-//
-// MATCHER*() can only be used in a namespace scope as templates cannot be
-// declared inside of a local class.
-//
-// More Information
-// ================
-//
-// To learn more about using these macros, please search for 'MATCHER'
-// on
-// https://github.com/google/googletest/blob/main/docs/gmock_cook_book.md
-//
-// This file also implements some commonly used argument matchers.  More
-// matchers can be defined by the user implementing the
-// MatcherInterface<T> interface if necessary.
-//
-// See googletest/include/gtest/gtest-matchers.h for the definition of class
-// Matcher, class MatcherInterface, and others.
-
-// IWYU pragma: private, include "gmock/gmock.h"
-// IWYU pragma: friend gmock/.*
-
-#ifndef GOOGLEMOCK_INCLUDE_GMOCK_GMOCK_MATCHERS_H_
-#define GOOGLEMOCK_INCLUDE_GMOCK_GMOCK_MATCHERS_H_
-
-#include <algorithm>
-#include <cmath>
-#include <initializer_list>
-#include <ios>
-#include <iterator>
-#include <limits>
-#include <memory>
-#include <ostream>  // NOLINT
-#include <sstream>
-#include <string>
-#include <type_traits>
-#include <utility>
-#include <vector>
-
-#include "gmock/internal/gmock-internal-utils.h"
-#include "gmock/internal/gmock-port.h"
-#include "gmock/internal/gmock-pp.h"
-#include "gtest/gtest.h"
-
-// MSVC warning C5046 is new as of VS2017 version 15.8.
-#if defined(_MSC_VER) && _MSC_VER >= 1915
-#define GMOCK_MAYBE_5046_ 5046
-#else
-#define GMOCK_MAYBE_5046_
-#endif
-
-GTEST_DISABLE_MSC_WARNINGS_PUSH_(
-    4251 GMOCK_MAYBE_5046_ /* class A needs to have dll-interface to be used by
-                              clients of class B */
-    /* Symbol involving type with internal linkage not defined */)
-
-namespace testing {
-
-// To implement a matcher Foo for type T, define:
-//   1. a class FooMatcherImpl that implements the
-//      MatcherInterface<T> interface, and
-//   2. a factory function that creates a Matcher<T> object from a
-//      FooMatcherImpl*.
-//
-// The two-level delegation design makes it possible to allow a user
-// to write "v" instead of "Eq(v)" where a Matcher is expected, which
-// is impossible if we pass matchers by pointers.  It also eases
-// ownership management as Matcher objects can now be copied like
-// plain values.
-
-// A match result listener that stores the explanation in a string.
-class StringMatchResultListener : public MatchResultListener {
- public:
-  StringMatchResultListener() : MatchResultListener(&ss_) {}
-
-  // Returns the explanation accumulated so far.
-  std::string str() const { return ss_.str(); }
-
-  // Clears the explanation accumulated so far.
-  void Clear() { ss_.str(""); }
-
- private:
-  ::std::stringstream ss_;
-
-  StringMatchResultListener(const StringMatchResultListener&) = delete;
-  StringMatchResultListener& operator=(const StringMatchResultListener&) =
-      delete;
-};
-
-// Anything inside the 'internal' namespace IS INTERNAL IMPLEMENTATION
-// and MUST NOT BE USED IN USER CODE!!!
-namespace internal {
-
-// The MatcherCastImpl class template is a helper for implementing
-// MatcherCast().  We need this helper in order to partially
-// specialize the implementation of MatcherCast() (C++ allows
-// class/struct templates to be partially specialized, but not
-// function templates.).
-
-// This general version is used when MatcherCast()'s argument is a
-// polymorphic matcher (i.e. something that can be converted to a
-// Matcher but is not one yet; for example, Eq(value)) or a value (for
-// example, "hello").
-template <typename T, typename M>
-class MatcherCastImpl {
- public:
-  static Matcher<T> Cast(const M& polymorphic_matcher_or_value) {
-    // M can be a polymorphic matcher, in which case we want to use
-    // its conversion operator to create Matcher<T>.  Or it can be a value
-    // that should be passed to the Matcher<T>'s constructor.
-    //
-    // We can't call Matcher<T>(polymorphic_matcher_or_value) when M is a
-    // polymorphic matcher because it'll be ambiguous if T has an implicit
-    // constructor from M (this usually happens when T has an implicit
-    // constructor from any type).
-    //
-    // It won't work to unconditionally implicit_cast
-    // polymorphic_matcher_or_value to Matcher<T> because it won't trigger
-    // a user-defined conversion from M to T if one exists (assuming M is
-    // a value).
-    return CastImpl(polymorphic_matcher_or_value,
-                    std::is_convertible<M, Matcher<T>>{},
-                    std::is_convertible<M, T>{});
-  }
-
- private:
-  template <bool Ignore>
-  static Matcher<T> CastImpl(const M& polymorphic_matcher_or_value,
-                             std::true_type /* convertible_to_matcher */,
-                             std::integral_constant<bool, Ignore>) {
-    // M is implicitly convertible to Matcher<T>, which means that either
-    // M is a polymorphic matcher or Matcher<T> has an implicit constructor
-    // from M.  In both cases using the implicit conversion will produce a
-    // matcher.
-    //
-    // Even if T has an implicit constructor from M, it won't be called because
-    // creating Matcher<T> would require a chain of two user-defined conversions
-    // (first to create T from M and then to create Matcher<T> from T).
-    return polymorphic_matcher_or_value;
-  }
-
-  // M can't be implicitly converted to Matcher<T>, so M isn't a polymorphic
-  // matcher. It's a value of a type implicitly convertible to T. Use direct
-  // initialization to create a matcher.
-  static Matcher<T> CastImpl(const M& value,
-                             std::false_type /* convertible_to_matcher */,
-                             std::true_type /* convertible_to_T */) {
-    return Matcher<T>(ImplicitCast_<T>(value));
-  }
-
-  // M can't be implicitly converted to either Matcher<T> or T. Attempt to use
-  // polymorphic matcher Eq(value) in this case.
-  //
-  // Note that we first attempt to perform an implicit cast on the value and
-  // only fall back to the polymorphic Eq() matcher afterwards because the
-  // latter calls bool operator==(const Lhs& lhs, const Rhs& rhs) in the end
-  // which might be undefined even when Rhs is implicitly convertible to Lhs
-  // (e.g. std::pair<const int, int> vs. std::pair<int, int>).
-  //
-  // We don't define this method inline as we need the declaration of Eq().
-  static Matcher<T> CastImpl(const M& value,
-                             std::false_type /* convertible_to_matcher */,
-                             std::false_type /* convertible_to_T */);
-};
-
-// This more specialized version is used when MatcherCast()'s argument
-// is already a Matcher.  This only compiles when type T can be
-// statically converted to type U.
-template <typename T, typename U>
-class MatcherCastImpl<T, Matcher<U>> {
- public:
-  static Matcher<T> Cast(const Matcher<U>& source_matcher) {
-    return Matcher<T>(new Impl(source_matcher));
-  }
-
- private:
-  class Impl : public MatcherInterface<T> {
-   public:
-    explicit Impl(const Matcher<U>& source_matcher)
-        : source_matcher_(source_matcher) {}
-
-    // We delegate the matching logic to the source matcher.
-    bool MatchAndExplain(T x, MatchResultListener* listener) const override {
-      using FromType = typename std::remove_cv<typename std::remove_pointer<
-          typename std::remove_reference<T>::type>::type>::type;
-      using ToType = typename std::remove_cv<typename std::remove_pointer<
-          typename std::remove_reference<U>::type>::type>::type;
-      // Do not allow implicitly converting base*/& to derived*/&.
-      static_assert(
-          // Do not trigger if only one of them is a pointer. That implies a
-          // regular conversion and not a down_cast.
-          (std::is_pointer<typename std::remove_reference<T>::type>::value !=
-           std::is_pointer<typename std::remove_reference<U>::type>::value) ||
-              std::is_same<FromType, ToType>::value ||
-              !std::is_base_of<FromType, ToType>::value,
-          "Can't implicitly convert from <base> to <derived>");
-
-      // Do the cast to `U` explicitly if necessary.
-      // Otherwise, let implicit conversions do the trick.
-      using CastType =
-          typename std::conditional<std::is_convertible<T&, const U&>::value,
-                                    T&, U>::type;
-
-      return source_matcher_.MatchAndExplain(static_cast<CastType>(x),
-                                             listener);
-    }
-
-    void DescribeTo(::std::ostream* os) const override {
-      source_matcher_.DescribeTo(os);
-    }
-
-    void DescribeNegationTo(::std::ostream* os) const override {
-      source_matcher_.DescribeNegationTo(os);
-    }
-
-   private:
-    const Matcher<U> source_matcher_;
-  };
-};
-
-// This even more specialized version is used for efficiently casting
-// a matcher to its own type.
-template <typename T>
-class MatcherCastImpl<T, Matcher<T>> {
- public:
-  static Matcher<T> Cast(const Matcher<T>& matcher) { return matcher; }
-};
-
-// Template specialization for parameterless Matcher.
-template <typename Derived>
-class MatcherBaseImpl {
- public:
-  MatcherBaseImpl() = default;
-
-  template <typename T>
-  operator ::testing::Matcher<T>() const {  // NOLINT(runtime/explicit)
-    return ::testing::Matcher<T>(new
-                                 typename Derived::template gmock_Impl<T>());
-  }
-};
-
-// Template specialization for Matcher with parameters.
-template <template <typename...> class Derived, typename... Ts>
-class MatcherBaseImpl<Derived<Ts...>> {
- public:
-  // Mark the constructor explicit for single argument T to avoid implicit
-  // conversions.
-  template <typename E = std::enable_if<sizeof...(Ts) == 1>,
-            typename E::type* = nullptr>
-  explicit MatcherBaseImpl(Ts... params)
-      : params_(std::forward<Ts>(params)...) {}
-  template <typename E = std::enable_if<sizeof...(Ts) != 1>,
-            typename = typename E::type>
-  MatcherBaseImpl(Ts... params)  // NOLINT
-      : params_(std::forward<Ts>(params)...) {}
-
-  template <typename F>
-  operator ::testing::Matcher<F>() const {  // NOLINT(runtime/explicit)
-    return Apply<F>(MakeIndexSequence<sizeof...(Ts)>{});
-  }
-
- private:
-  template <typename F, std::size_t... tuple_ids>
-  ::testing::Matcher<F> Apply(IndexSequence<tuple_ids...>) const {
-    return ::testing::Matcher<F>(
-        new typename Derived<Ts...>::template gmock_Impl<F>(
-            std::get<tuple_ids>(params_)...));
-  }
-
-  const std::tuple<Ts...> params_;
-};
-
-}  // namespace internal
-
-// In order to be safe and clear, casting between different matcher
-// types is done explicitly via MatcherCast<T>(m), which takes a
-// matcher m and returns a Matcher<T>.  It compiles only when T can be
-// statically converted to the argument type of m.
-template <typename T, typename M>
-inline Matcher<T> MatcherCast(const M& matcher) {
-  return internal::MatcherCastImpl<T, M>::Cast(matcher);
-}
-
-// This overload handles polymorphic matchers and values only since
-// monomorphic matchers are handled by the next one.
-template <typename T, typename M>
-inline Matcher<T> SafeMatcherCast(const M& polymorphic_matcher_or_value) {
-  return MatcherCast<T>(polymorphic_matcher_or_value);
-}
-
-// This overload handles monomorphic matchers.
-//
-// In general, if type T can be implicitly converted to type U, we can
-// safely convert a Matcher<U> to a Matcher<T> (i.e. Matcher is
-// contravariant): just keep a copy of the original Matcher<U>, convert the
-// argument from type T to U, and then pass it to the underlying Matcher<U>.
-// The only exception is when U is a reference and T is not, as the
-// underlying Matcher<U> may be interested in the argument's address, which
-// is not preserved in the conversion from T to U.
-template <typename T, typename U>
-inline Matcher<T> SafeMatcherCast(const Matcher<U>& matcher) {
-  // Enforce that T can be implicitly converted to U.
-  static_assert(std::is_convertible<const T&, const U&>::value,
-                "T must be implicitly convertible to U");
-  // Enforce that we are not converting a non-reference type T to a reference
-  // type U.
-  static_assert(std::is_reference<T>::value || !std::is_reference<U>::value,
-                "cannot convert non reference arg to reference");
-  // In case both T and U are arithmetic types, enforce that the
-  // conversion is not lossy.
-  typedef GTEST_REMOVE_REFERENCE_AND_CONST_(T) RawT;
-  typedef GTEST_REMOVE_REFERENCE_AND_CONST_(U) RawU;
-  constexpr bool kTIsOther = GMOCK_KIND_OF_(RawT) == internal::kOther;
-  constexpr bool kUIsOther = GMOCK_KIND_OF_(RawU) == internal::kOther;
-  static_assert(
-      kTIsOther || kUIsOther ||
-          (internal::LosslessArithmeticConvertible<RawT, RawU>::value),
-      "conversion of arithmetic types must be lossless");
-  return MatcherCast<T>(matcher);
-}
-
-// A<T>() returns a matcher that matches any value of type T.
-template <typename T>
-Matcher<T> A();
-
-// Anything inside the 'internal' namespace IS INTERNAL IMPLEMENTATION
-// and MUST NOT BE USED IN USER CODE!!!
-namespace internal {
-
-// If the explanation is not empty, prints it to the ostream.
-inline void PrintIfNotEmpty(const std::string& explanation,
-                            ::std::ostream* os) {
-  if (explanation != "" && os != nullptr) {
-    *os << ", " << explanation;
-  }
-}
-
-// Returns true if the given type name is easy to read by a human.
-// This is used to decide whether printing the type of a value might
-// be helpful.
-inline bool IsReadableTypeName(const std::string& type_name) {
-  // We consider a type name readable if it's short or doesn't contain
-  // a template or function type.
-  return (type_name.length() <= 20 ||
-          type_name.find_first_of("<(") == std::string::npos);
-}
-
-// Matches the value against the given matcher, prints the value and explains
-// the match result to the listener. Returns the match result.
-// 'listener' must not be NULL.
-// Value cannot be passed by const reference, because some matchers take a
-// non-const argument.
-template <typename Value, typename T>
-bool MatchPrintAndExplain(Value& value, const Matcher<T>& matcher,
-                          MatchResultListener* listener) {
-  if (!listener->IsInterested()) {
-    // If the listener is not interested, we do not need to construct the
-    // inner explanation.
-    return matcher.Matches(value);
-  }
-
-  StringMatchResultListener inner_listener;
-  const bool match = matcher.MatchAndExplain(value, &inner_listener);
-
-  UniversalPrint(value, listener->stream());
-#if GTEST_HAS_RTTI
-  const std::string& type_name = GetTypeName<Value>();
-  if (IsReadableTypeName(type_name))
-    *listener->stream() << " (of type " << type_name << ")";
-#endif
-  PrintIfNotEmpty(inner_listener.str(), listener->stream());
-
-  return match;
-}
-
-// An internal helper class for doing compile-time loop on a tuple's
-// fields.
-template <size_t N>
-class TuplePrefix {
- public:
-  // TuplePrefix<N>::Matches(matcher_tuple, value_tuple) returns true
-  // if and only if the first N fields of matcher_tuple matches
-  // the first N fields of value_tuple, respectively.
-  template <typename MatcherTuple, typename ValueTuple>
-  static bool Matches(const MatcherTuple& matcher_tuple,
-                      const ValueTuple& value_tuple) {
-    return TuplePrefix<N - 1>::Matches(matcher_tuple, value_tuple) &&
-           std::get<N - 1>(matcher_tuple).Matches(std::get<N - 1>(value_tuple));
-  }
-
-  // TuplePrefix<N>::ExplainMatchFailuresTo(matchers, values, os)
-  // describes failures in matching the first N fields of matchers
-  // against the first N fields of values.  If there is no failure,
-  // nothing will be streamed to os.
-  template <typename MatcherTuple, typename ValueTuple>
-  static void ExplainMatchFailuresTo(const MatcherTuple& matchers,
-                                     const ValueTuple& values,
-                                     ::std::ostream* os) {
-    // First, describes failures in the first N - 1 fields.
-    TuplePrefix<N - 1>::ExplainMatchFailuresTo(matchers, values, os);
-
-    // Then describes the failure (if any) in the (N - 1)-th (0-based)
-    // field.
-    typename std::tuple_element<N - 1, MatcherTuple>::type matcher =
-        std::get<N - 1>(matchers);
-    typedef typename std::tuple_element<N - 1, ValueTuple>::type Value;
-    const Value& value = std::get<N - 1>(values);
-    StringMatchResultListener listener;
-    if (!matcher.MatchAndExplain(value, &listener)) {
-      *os << "  Expected arg #" << N - 1 << ": ";
-      std::get<N - 1>(matchers).DescribeTo(os);
-      *os << "\n           Actual: ";
-      // We remove the reference in type Value to prevent the
-      // universal printer from printing the address of value, which
-      // isn't interesting to the user most of the time.  The
-      // matcher's MatchAndExplain() method handles the case when
-      // the address is interesting.
-      internal::UniversalPrint(value, os);
-      PrintIfNotEmpty(listener.str(), os);
-      *os << "\n";
-    }
-  }
-};
-
-// The base case.
-template <>
-class TuplePrefix<0> {
- public:
-  template <typename MatcherTuple, typename ValueTuple>
-  static bool Matches(const MatcherTuple& /* matcher_tuple */,
-                      const ValueTuple& /* value_tuple */) {
-    return true;
-  }
-
-  template <typename MatcherTuple, typename ValueTuple>
-  static void ExplainMatchFailuresTo(const MatcherTuple& /* matchers */,
-                                     const ValueTuple& /* values */,
-                                     ::std::ostream* /* os */) {}
-};
-
-// TupleMatches(matcher_tuple, value_tuple) returns true if and only if
-// all matchers in matcher_tuple match the corresponding fields in
-// value_tuple.  It is a compiler error if matcher_tuple and
-// value_tuple have different number of fields or incompatible field
-// types.
-template <typename MatcherTuple, typename ValueTuple>
-bool TupleMatches(const MatcherTuple& matcher_tuple,
-                  const ValueTuple& value_tuple) {
-  // Makes sure that matcher_tuple and value_tuple have the same
-  // number of fields.
-  static_assert(std::tuple_size<MatcherTuple>::value ==
-                    std::tuple_size<ValueTuple>::value,
-                "matcher and value have different numbers of fields");
-  return TuplePrefix<std::tuple_size<ValueTuple>::value>::Matches(matcher_tuple,
-                                                                  value_tuple);
-}
-
-// Describes failures in matching matchers against values.  If there
-// is no failure, nothing will be streamed to os.
-template <typename MatcherTuple, typename ValueTuple>
-void ExplainMatchFailureTupleTo(const MatcherTuple& matchers,
-                                const ValueTuple& values, ::std::ostream* os) {
-  TuplePrefix<std::tuple_size<MatcherTuple>::value>::ExplainMatchFailuresTo(
-      matchers, values, os);
-}
-
-// TransformTupleValues and its helper.
-//
-// TransformTupleValuesHelper hides the internal machinery that
-// TransformTupleValues uses to implement a tuple traversal.
-template <typename Tuple, typename Func, typename OutIter>
-class TransformTupleValuesHelper {
- private:
-  typedef ::std::tuple_size<Tuple> TupleSize;
-
- public:
-  // For each member of tuple 't', taken in order, evaluates '*out++ = f(t)'.
-  // Returns the final value of 'out' in case the caller needs it.
-  static OutIter Run(Func f, const Tuple& t, OutIter out) {
-    return IterateOverTuple<Tuple, TupleSize::value>()(f, t, out);
-  }
-
- private:
-  template <typename Tup, size_t kRemainingSize>
-  struct IterateOverTuple {
-    OutIter operator()(Func f, const Tup& t, OutIter out) const {
-      *out++ = f(::std::get<TupleSize::value - kRemainingSize>(t));
-      return IterateOverTuple<Tup, kRemainingSize - 1>()(f, t, out);
-    }
-  };
-  template <typename Tup>
-  struct IterateOverTuple<Tup, 0> {
-    OutIter operator()(Func /* f */, const Tup& /* t */, OutIter out) const {
-      return out;
-    }
-  };
-};
-
-// Successively invokes 'f(element)' on each element of the tuple 't',
-// appending each result to the 'out' iterator. Returns the final value
-// of 'out'.
-template <typename Tuple, typename Func, typename OutIter>
-OutIter TransformTupleValues(Func f, const Tuple& t, OutIter out) {
-  return TransformTupleValuesHelper<Tuple, Func, OutIter>::Run(f, t, out);
-}
-
-// Implements _, a matcher that matches any value of any
-// type.  This is a polymorphic matcher, so we need a template type
-// conversion operator to make it appearing as a Matcher<T> for any
-// type T.
-class AnythingMatcher {
- public:
-  using is_gtest_matcher = void;
-
-  template <typename T>
-  bool MatchAndExplain(const T& /* x */, std::ostream* /* listener */) const {
-    return true;
-  }
-  void DescribeTo(std::ostream* os) const { *os << "is anything"; }
-  void DescribeNegationTo(::std::ostream* os) const {
-    // This is mostly for completeness' sake, as it's not very useful
-    // to write Not(A<bool>()).  However we cannot completely rule out
-    // such a possibility, and it doesn't hurt to be prepared.
-    *os << "never matches";
-  }
-};
-
-// Implements the polymorphic IsNull() matcher, which matches any raw or smart
-// pointer that is NULL.
-class IsNullMatcher {
- public:
-  template <typename Pointer>
-  bool MatchAndExplain(const Pointer& p,
-                       MatchResultListener* /* listener */) const {
-    return p == nullptr;
-  }
-
-  void DescribeTo(::std::ostream* os) const { *os << "is NULL"; }
-  void DescribeNegationTo(::std::ostream* os) const { *os << "isn't NULL"; }
-};
-
-// Implements the polymorphic NotNull() matcher, which matches any raw or smart
-// pointer that is not NULL.
-class NotNullMatcher {
- public:
-  template <typename Pointer>
-  bool MatchAndExplain(const Pointer& p,
-                       MatchResultListener* /* listener */) const {
-    return p != nullptr;
-  }
-
-  void DescribeTo(::std::ostream* os) const { *os << "isn't NULL"; }
-  void DescribeNegationTo(::std::ostream* os) const { *os << "is NULL"; }
-};
-
-// Ref(variable) matches any argument that is a reference to
-// 'variable'.  This matcher is polymorphic as it can match any
-// super type of the type of 'variable'.
-//
-// The RefMatcher template class implements Ref(variable).  It can
-// only be instantiated with a reference type.  This prevents a user
-// from mistakenly using Ref(x) to match a non-reference function
-// argument.  For example, the following will righteously cause a
-// compiler error:
-//
-//   int n;
-//   Matcher<int> m1 = Ref(n);   // This won't compile.
-//   Matcher<int&> m2 = Ref(n);  // This will compile.
-template <typename T>
-class RefMatcher;
-
-template <typename T>
-class RefMatcher<T&> {
-  // Google Mock is a generic framework and thus needs to support
-  // mocking any function types, including those that take non-const
-  // reference arguments.  Therefore the template parameter T (and
-  // Super below) can be instantiated to either a const type or a
-  // non-const type.
- public:
-  // RefMatcher() takes a T& instead of const T&, as we want the
-  // compiler to catch using Ref(const_value) as a matcher for a
-  // non-const reference.
-  explicit RefMatcher(T& x) : object_(x) {}  // NOLINT
-
-  template <typename Super>
-  operator Matcher<Super&>() const {
-    // By passing object_ (type T&) to Impl(), which expects a Super&,
-    // we make sure that Super is a super type of T.  In particular,
-    // this catches using Ref(const_value) as a matcher for a
-    // non-const reference, as you cannot implicitly convert a const
-    // reference to a non-const reference.
-    return MakeMatcher(new Impl<Super>(object_));
-  }
-
- private:
-  template <typename Super>
-  class Impl : public MatcherInterface<Super&> {
-   public:
-    explicit Impl(Super& x) : object_(x) {}  // NOLINT
-
-    // MatchAndExplain() takes a Super& (as opposed to const Super&)
-    // in order to match the interface MatcherInterface<Super&>.
-    bool MatchAndExplain(Super& x,
-                         MatchResultListener* listener) const override {
-      *listener << "which is located @" << static_cast<const void*>(&x);
-      return &x == &object_;
-    }
-
-    void DescribeTo(::std::ostream* os) const override {
-      *os << "references the variable ";
-      UniversalPrinter<Super&>::Print(object_, os);
-    }
-
-    void DescribeNegationTo(::std::ostream* os) const override {
-      *os << "does not reference the variable ";
-      UniversalPrinter<Super&>::Print(object_, os);
-    }
-
-   private:
-    const Super& object_;
-  };
-
-  T& object_;
-};
-
-// Polymorphic helper functions for narrow and wide string matchers.
-inline bool CaseInsensitiveCStringEquals(const char* lhs, const char* rhs) {
-  return String::CaseInsensitiveCStringEquals(lhs, rhs);
-}
-
-inline bool CaseInsensitiveCStringEquals(const wchar_t* lhs,
-                                         const wchar_t* rhs) {
-  return String::CaseInsensitiveWideCStringEquals(lhs, rhs);
-}
-
-// String comparison for narrow or wide strings that can have embedded NUL
-// characters.
-template <typename StringType>
-bool CaseInsensitiveStringEquals(const StringType& s1, const StringType& s2) {
-  // Are the heads equal?
-  if (!CaseInsensitiveCStringEquals(s1.c_str(), s2.c_str())) {
-    return false;
-  }
-
-  // Skip the equal heads.
-  const typename StringType::value_type nul = 0;
-  const size_t i1 = s1.find(nul), i2 = s2.find(nul);
-
-  // Are we at the end of either s1 or s2?
-  if (i1 == StringType::npos || i2 == StringType::npos) {
-    return i1 == i2;
-  }
-
-  // Are the tails equal?
-  return CaseInsensitiveStringEquals(s1.substr(i1 + 1), s2.substr(i2 + 1));
-}
-
-// String matchers.
-
-// Implements equality-based string matchers like StrEq, StrCaseNe, and etc.
-template <typename StringType>
-class StrEqualityMatcher {
- public:
-  StrEqualityMatcher(StringType str, bool expect_eq, bool case_sensitive)
-      : string_(std::move(str)),
-        expect_eq_(expect_eq),
-        case_sensitive_(case_sensitive) {}
-
-#if GTEST_INTERNAL_HAS_STRING_VIEW
-  bool MatchAndExplain(const internal::StringView& s,
-                       MatchResultListener* listener) const {
-    // This should fail to compile if StringView is used with wide
-    // strings.
-    const StringType& str = std::string(s);
-    return MatchAndExplain(str, listener);
-  }
-#endif  // GTEST_INTERNAL_HAS_STRING_VIEW
-
-  // Accepts pointer types, particularly:
-  //   const char*
-  //   char*
-  //   const wchar_t*
-  //   wchar_t*
-  template <typename CharType>
-  bool MatchAndExplain(CharType* s, MatchResultListener* listener) const {
-    if (s == nullptr) {
-      return !expect_eq_;
-    }
-    return MatchAndExplain(StringType(s), listener);
-  }
-
-  // Matches anything that can convert to StringType.
-  //
-  // This is a template, not just a plain function with const StringType&,
-  // because StringView has some interfering non-explicit constructors.
-  template <typename MatcheeStringType>
-  bool MatchAndExplain(const MatcheeStringType& s,
-                       MatchResultListener* /* listener */) const {
-    const StringType s2(s);
-    const bool eq = case_sensitive_ ? s2 == string_
-                                    : CaseInsensitiveStringEquals(s2, string_);
-    return expect_eq_ == eq;
-  }
-
-  void DescribeTo(::std::ostream* os) const {
-    DescribeToHelper(expect_eq_, os);
-  }
-
-  void DescribeNegationTo(::std::ostream* os) const {
-    DescribeToHelper(!expect_eq_, os);
-  }
-
- private:
-  void DescribeToHelper(bool expect_eq, ::std::ostream* os) const {
-    *os << (expect_eq ? "is " : "isn't ");
-    *os << "equal to ";
-    if (!case_sensitive_) {
-      *os << "(ignoring case) ";
-    }
-    UniversalPrint(string_, os);
-  }
-
-  const StringType string_;
-  const bool expect_eq_;
-  const bool case_sensitive_;
-};
-
-// Implements the polymorphic HasSubstr(substring) matcher, which
-// can be used as a Matcher<T> as long as T can be converted to a
-// string.
-template <typename StringType>
-class HasSubstrMatcher {
- public:
-  explicit HasSubstrMatcher(const StringType& substring)
-      : substring_(substring) {}
-
-#if GTEST_INTERNAL_HAS_STRING_VIEW
-  bool MatchAndExplain(const internal::StringView& s,
-                       MatchResultListener* listener) const {
-    // This should fail to compile if StringView is used with wide
-    // strings.
-    const StringType& str = std::string(s);
-    return MatchAndExplain(str, listener);
-  }
-#endif  // GTEST_INTERNAL_HAS_STRING_VIEW
-
-  // Accepts pointer types, particularly:
-  //   const char*
-  //   char*
-  //   const wchar_t*
-  //   wchar_t*
-  template <typename CharType>
-  bool MatchAndExplain(CharType* s, MatchResultListener* listener) const {
-    return s != nullptr && MatchAndExplain(StringType(s), listener);
-  }
-
-  // Matches anything that can convert to StringType.
-  //
-  // This is a template, not just a plain function with const StringType&,
-  // because StringView has some interfering non-explicit constructors.
-  template <typename MatcheeStringType>
-  bool MatchAndExplain(const MatcheeStringType& s,
-                       MatchResultListener* /* listener */) const {
-    return StringType(s).find(substring_) != StringType::npos;
-  }
-
-  // Describes what this matcher matches.
-  void DescribeTo(::std::ostream* os) const {
-    *os << "has substring ";
-    UniversalPrint(substring_, os);
-  }
-
-  void DescribeNegationTo(::std::ostream* os) const {
-    *os << "has no substring ";
-    UniversalPrint(substring_, os);
-  }
-
- private:
-  const StringType substring_;
-};
-
-// Implements the polymorphic StartsWith(substring) matcher, which
-// can be used as a Matcher<T> as long as T can be converted to a
-// string.
-template <typename StringType>
-class StartsWithMatcher {
- public:
-  explicit StartsWithMatcher(const StringType& prefix) : prefix_(prefix) {}
-
-#if GTEST_INTERNAL_HAS_STRING_VIEW
-  bool MatchAndExplain(const internal::StringView& s,
-                       MatchResultListener* listener) const {
-    // This should fail to compile if StringView is used with wide
-    // strings.
-    const StringType& str = std::string(s);
-    return MatchAndExplain(str, listener);
-  }
-#endif  // GTEST_INTERNAL_HAS_STRING_VIEW
-
-  // Accepts pointer types, particularly:
-  //   const char*
-  //   char*
-  //   const wchar_t*
-  //   wchar_t*
-  template <typename CharType>
-  bool MatchAndExplain(CharType* s, MatchResultListener* listener) const {
-    return s != nullptr && MatchAndExplain(StringType(s), listener);
-  }
-
-  // Matches anything that can convert to StringType.
-  //
-  // This is a template, not just a plain function with const StringType&,
-  // because StringView has some interfering non-explicit constructors.
-  template <typename MatcheeStringType>
-  bool MatchAndExplain(const MatcheeStringType& s,
-                       MatchResultListener* /* listener */) const {
-    const StringType& s2(s);
-    return s2.length() >= prefix_.length() &&
-           s2.substr(0, prefix_.length()) == prefix_;
-  }
-
-  void DescribeTo(::std::ostream* os) const {
-    *os << "starts with ";
-    UniversalPrint(prefix_, os);
-  }
-
-  void DescribeNegationTo(::std::ostream* os) const {
-    *os << "doesn't start with ";
-    UniversalPrint(prefix_, os);
-  }
-
- private:
-  const StringType prefix_;
-};
-
-// Implements the polymorphic EndsWith(substring) matcher, which
-// can be used as a Matcher<T> as long as T can be converted to a
-// string.
-template <typename StringType>
-class EndsWithMatcher {
- public:
-  explicit EndsWithMatcher(const StringType& suffix) : suffix_(suffix) {}
-
-#if GTEST_INTERNAL_HAS_STRING_VIEW
-  bool MatchAndExplain(const internal::StringView& s,
-                       MatchResultListener* listener) const {
-    // This should fail to compile if StringView is used with wide
-    // strings.
-    const StringType& str = std::string(s);
-    return MatchAndExplain(str, listener);
-  }
-#endif  // GTEST_INTERNAL_HAS_STRING_VIEW
-
-  // Accepts pointer types, particularly:
-  //   const char*
-  //   char*
-  //   const wchar_t*
-  //   wchar_t*
-  template <typename CharType>
-  bool MatchAndExplain(CharType* s, MatchResultListener* listener) const {
-    return s != nullptr && MatchAndExplain(StringType(s), listener);
-  }
-
-  // Matches anything that can convert to StringType.
-  //
-  // This is a template, not just a plain function with const StringType&,
-  // because StringView has some interfering non-explicit constructors.
-  template <typename MatcheeStringType>
-  bool MatchAndExplain(const MatcheeStringType& s,
-                       MatchResultListener* /* listener */) const {
-    const StringType& s2(s);
-    return s2.length() >= suffix_.length() &&
-           s2.substr(s2.length() - suffix_.length()) == suffix_;
-  }
-
-  void DescribeTo(::std::ostream* os) const {
-    *os << "ends with ";
-    UniversalPrint(suffix_, os);
-  }
-
-  void DescribeNegationTo(::std::ostream* os) const {
-    *os << "doesn't end with ";
-    UniversalPrint(suffix_, os);
-  }
-
- private:
-  const StringType suffix_;
-};
-
-// Implements the polymorphic WhenBase64Unescaped(matcher) matcher, which can be
-// used as a Matcher<T> as long as T can be converted to a string.
-class WhenBase64UnescapedMatcher {
- public:
-  using is_gtest_matcher = void;
-
-  explicit WhenBase64UnescapedMatcher(
-      const Matcher<const std::string&>& internal_matcher)
-      : internal_matcher_(internal_matcher) {}
-
-  // Matches anything that can convert to std::string.
-  template <typename MatcheeStringType>
-  bool MatchAndExplain(const MatcheeStringType& s,
-                       MatchResultListener* listener) const {
-    const std::string s2(s);  // NOLINT (needed for working with string_view).
-    std::string unescaped;
-    if (!internal::Base64Unescape(s2, &unescaped)) {
-      if (listener != nullptr) {
-        *listener << "is not a valid base64 escaped string";
-      }
-      return false;
-    }
-    return MatchPrintAndExplain(unescaped, internal_matcher_, listener);
-  }
-
-  void DescribeTo(::std::ostream* os) const {
-    *os << "matches after Base64Unescape ";
-    internal_matcher_.DescribeTo(os);
-  }
-
-  void DescribeNegationTo(::std::ostream* os) const {
-    *os << "does not match after Base64Unescape ";
-    internal_matcher_.DescribeTo(os);
-  }
-
- private:
-  const Matcher<const std::string&> internal_matcher_;
-};
-
-// Implements a matcher that compares the two fields of a 2-tuple
-// using one of the ==, <=, <, etc, operators.  The two fields being
-// compared don't have to have the same type.
-//
-// The matcher defined here is polymorphic (for example, Eq() can be
-// used to match a std::tuple<int, short>, a std::tuple<const long&, double>,
-// etc).  Therefore we use a template type conversion operator in the
-// implementation.
-template <typename D, typename Op>
-class PairMatchBase {
- public:
-  template <typename T1, typename T2>
-  operator Matcher<::std::tuple<T1, T2>>() const {
-    return Matcher<::std::tuple<T1, T2>>(new Impl<const ::std::tuple<T1, T2>&>);
-  }
-  template <typename T1, typename T2>
-  operator Matcher<const ::std::tuple<T1, T2>&>() const {
-    return MakeMatcher(new Impl<const ::std::tuple<T1, T2>&>);
-  }
-
- private:
-  static ::std::ostream& GetDesc(::std::ostream& os) {  // NOLINT
-    return os << D::Desc();
-  }
-
-  template <typename Tuple>
-  class Impl : public MatcherInterface<Tuple> {
-   public:
-    bool MatchAndExplain(Tuple args,
-                         MatchResultListener* /* listener */) const override {
-      return Op()(::std::get<0>(args), ::std::get<1>(args));
-    }
-    void DescribeTo(::std::ostream* os) const override {
-      *os << "are " << GetDesc;
-    }
-    void DescribeNegationTo(::std::ostream* os) const override {
-      *os << "aren't " << GetDesc;
-    }
-  };
-};
-
-class Eq2Matcher : public PairMatchBase<Eq2Matcher, AnyEq> {
- public:
-  static const char* Desc() { return "an equal pair"; }
-};
-class Ne2Matcher : public PairMatchBase<Ne2Matcher, AnyNe> {
- public:
-  static const char* Desc() { return "an unequal pair"; }
-};
-class Lt2Matcher : public PairMatchBase<Lt2Matcher, AnyLt> {
- public:
-  static const char* Desc() { return "a pair where the first < the second"; }
-};
-class Gt2Matcher : public PairMatchBase<Gt2Matcher, AnyGt> {
- public:
-  static const char* Desc() { return "a pair where the first > the second"; }
-};
-class Le2Matcher : public PairMatchBase<Le2Matcher, AnyLe> {
- public:
-  static const char* Desc() { return "a pair where the first <= the second"; }
-};
-class Ge2Matcher : public PairMatchBase<Ge2Matcher, AnyGe> {
- public:
-  static const char* Desc() { return "a pair where the first >= the second"; }
-};
-
-// Implements the Not(...) matcher for a particular argument type T.
-// We do not nest it inside the NotMatcher class template, as that
-// will prevent different instantiations of NotMatcher from sharing
-// the same NotMatcherImpl<T> class.
-template <typename T>
-class NotMatcherImpl : public MatcherInterface<const T&> {
- public:
-  explicit NotMatcherImpl(const Matcher<T>& matcher) : matcher_(matcher) {}
-
-  bool MatchAndExplain(const T& x,
-                       MatchResultListener* listener) const override {
-    return !matcher_.MatchAndExplain(x, listener);
-  }
-
-  void DescribeTo(::std::ostream* os) const override {
-    matcher_.DescribeNegationTo(os);
-  }
-
-  void DescribeNegationTo(::std::ostream* os) const override {
-    matcher_.DescribeTo(os);
-  }
-
- private:
-  const Matcher<T> matcher_;
-};
-
-// Implements the Not(m) matcher, which matches a value that doesn't
-// match matcher m.
-template <typename InnerMatcher>
-class NotMatcher {
- public:
-  explicit NotMatcher(InnerMatcher matcher) : matcher_(matcher) {}
-
-  // This template type conversion operator allows Not(m) to be used
-  // to match any type m can match.
-  template <typename T>
-  operator Matcher<T>() const {
-    return Matcher<T>(new NotMatcherImpl<T>(SafeMatcherCast<T>(matcher_)));
-  }
-
- private:
-  InnerMatcher matcher_;
-};
-
-// Implements the AllOf(m1, m2) matcher for a particular argument type
-// T. We do not nest it inside the BothOfMatcher class template, as
-// that will prevent different instantiations of BothOfMatcher from
-// sharing the same BothOfMatcherImpl<T> class.
-template <typename T>
-class AllOfMatcherImpl : public MatcherInterface<const T&> {
- public:
-  explicit AllOfMatcherImpl(std::vector<Matcher<T>> matchers)
-      : matchers_(std::move(matchers)) {}
-
-  void DescribeTo(::std::ostream* os) const override {
-    *os << "(";
-    for (size_t i = 0; i < matchers_.size(); ++i) {
-      if (i != 0) *os << ") and (";
-      matchers_[i].DescribeTo(os);
-    }
-    *os << ")";
-  }
-
-  void DescribeNegationTo(::std::ostream* os) const override {
-    *os << "(";
-    for (size_t i = 0; i < matchers_.size(); ++i) {
-      if (i != 0) *os << ") or (";
-      matchers_[i].DescribeNegationTo(os);
-    }
-    *os << ")";
-  }
-
-  bool MatchAndExplain(const T& x,
-                       MatchResultListener* listener) const override {
-    // If either matcher1_ or matcher2_ doesn't match x, we only need
-    // to explain why one of them fails.
-    std::string all_match_result;
-
-    for (size_t i = 0; i < matchers_.size(); ++i) {
-      StringMatchResultListener slistener;
-      if (matchers_[i].MatchAndExplain(x, &slistener)) {
-        if (all_match_result.empty()) {
-          all_match_result = slistener.str();
-        } else {
-          std::string result = slistener.str();
-          if (!result.empty()) {
-            all_match_result += ", and ";
-            all_match_result += result;
-          }
-        }
-      } else {
-        *listener << slistener.str();
-        return false;
-      }
-    }
-
-    // Otherwise we need to explain why *both* of them match.
-    *listener << all_match_result;
-    return true;
-  }
-
- private:
-  const std::vector<Matcher<T>> matchers_;
-};
-
-// VariadicMatcher is used for the variadic implementation of
-// AllOf(m_1, m_2, ...) and AnyOf(m_1, m_2, ...).
-// CombiningMatcher<T> is used to recursively combine the provided matchers
-// (of type Args...).
-template <template <typename T> class CombiningMatcher, typename... Args>
-class VariadicMatcher {
- public:
-  VariadicMatcher(const Args&... matchers)  // NOLINT
-      : matchers_(matchers...) {
-    static_assert(sizeof...(Args) > 0, "Must have at least one matcher.");
-  }
-
-  VariadicMatcher(const VariadicMatcher&) = default;
-  VariadicMatcher& operator=(const VariadicMatcher&) = delete;
-
-  // This template type conversion operator allows an
-  // VariadicMatcher<Matcher1, Matcher2...> object to match any type that
-  // all of the provided matchers (Matcher1, Matcher2, ...) can match.
-  template <typename T>
-  operator Matcher<T>() const {
-    std::vector<Matcher<T>> values;
-    CreateVariadicMatcher<T>(&values, std::integral_constant<size_t, 0>());
-    return Matcher<T>(new CombiningMatcher<T>(std::move(values)));
-  }
-
- private:
-  template <typename T, size_t I>
-  void CreateVariadicMatcher(std::vector<Matcher<T>>* values,
-                             std::integral_constant<size_t, I>) const {
-    values->push_back(SafeMatcherCast<T>(std::get<I>(matchers_)));
-    CreateVariadicMatcher<T>(values, std::integral_constant<size_t, I + 1>());
-  }
-
-  template <typename T>
-  void CreateVariadicMatcher(
-      std::vector<Matcher<T>>*,
-      std::integral_constant<size_t, sizeof...(Args)>) const {}
-
-  std::tuple<Args...> matchers_;
-};
-
-template <typename... Args>
-using AllOfMatcher = VariadicMatcher<AllOfMatcherImpl, Args...>;
-
-// Implements the AnyOf(m1, m2) matcher for a particular argument type
-// T.  We do not nest it inside the AnyOfMatcher class template, as
-// that will prevent different instantiations of AnyOfMatcher from
-// sharing the same EitherOfMatcherImpl<T> class.
-template <typename T>
-class AnyOfMatcherImpl : public MatcherInterface<const T&> {
- public:
-  explicit AnyOfMatcherImpl(std::vector<Matcher<T>> matchers)
-      : matchers_(std::move(matchers)) {}
-
-  void DescribeTo(::std::ostream* os) const override {
-    *os << "(";
-    for (size_t i = 0; i < matchers_.size(); ++i) {
-      if (i != 0) *os << ") or (";
-      matchers_[i].DescribeTo(os);
-    }
-    *os << ")";
-  }
-
-  void DescribeNegationTo(::std::ostream* os) const override {
-    *os << "(";
-    for (size_t i = 0; i < matchers_.size(); ++i) {
-      if (i != 0) *os << ") and (";
-      matchers_[i].DescribeNegationTo(os);
-    }
-    *os << ")";
-  }
-
-  bool MatchAndExplain(const T& x,
-                       MatchResultListener* listener) const override {
-    std::string no_match_result;
-
-    // If either matcher1_ or matcher2_ matches x, we just need to
-    // explain why *one* of them matches.
-    for (size_t i = 0; i < matchers_.size(); ++i) {
-      StringMatchResultListener slistener;
-      if (matchers_[i].MatchAndExplain(x, &slistener)) {
-        *listener << slistener.str();
-        return true;
-      } else {
-        if (no_match_result.empty()) {
-          no_match_result = slistener.str();
-        } else {
-          std::string result = slistener.str();
-          if (!result.empty()) {
-            no_match_result += ", and ";
-            no_match_result += result;
-          }
-        }
-      }
-    }
-
-    // Otherwise we need to explain why *both* of them fail.
-    *listener << no_match_result;
-    return false;
-  }
-
- private:
-  const std::vector<Matcher<T>> matchers_;
-};
-
-// AnyOfMatcher is used for the variadic implementation of AnyOf(m_1, m_2, ...).
-template <typename... Args>
-using AnyOfMatcher = VariadicMatcher<AnyOfMatcherImpl, Args...>;
-
-// ConditionalMatcher is the implementation of Conditional(cond, m1, m2)
-template <typename MatcherTrue, typename MatcherFalse>
-class ConditionalMatcher {
- public:
-  ConditionalMatcher(bool condition, MatcherTrue matcher_true,
-                     MatcherFalse matcher_false)
-      : condition_(condition),
-        matcher_true_(std::move(matcher_true)),
-        matcher_false_(std::move(matcher_false)) {}
-
-  template <typename T>
-  operator Matcher<T>() const {  // NOLINT(runtime/explicit)
-    return condition_ ? SafeMatcherCast<T>(matcher_true_)
-                      : SafeMatcherCast<T>(matcher_false_);
-  }
-
- private:
-  bool condition_;
-  MatcherTrue matcher_true_;
-  MatcherFalse matcher_false_;
-};
-
-// Wrapper for implementation of Any/AllOfArray().
-template <template <class> class MatcherImpl, typename T>
-class SomeOfArrayMatcher {
- public:
-  // Constructs the matcher from a sequence of element values or
-  // element matchers.
-  template <typename Iter>
-  SomeOfArrayMatcher(Iter first, Iter last) : matchers_(first, last) {}
-
-  template <typename U>
-  operator Matcher<U>() const {  // NOLINT
-    using RawU = typename std::decay<U>::type;
-    std::vector<Matcher<RawU>> matchers;
-    for (const auto& matcher : matchers_) {
-      matchers.push_back(MatcherCast<RawU>(matcher));
-    }
-    return Matcher<U>(new MatcherImpl<RawU>(std::move(matchers)));
-  }
-
- private:
-  const ::std::vector<T> matchers_;
-};
-
-template <typename T>
-using AllOfArrayMatcher = SomeOfArrayMatcher<AllOfMatcherImpl, T>;
-
-template <typename T>
-using AnyOfArrayMatcher = SomeOfArrayMatcher<AnyOfMatcherImpl, T>;
-
-// Used for implementing Truly(pred), which turns a predicate into a
-// matcher.
-template <typename Predicate>
-class TrulyMatcher {
- public:
-  explicit TrulyMatcher(Predicate pred) : predicate_(pred) {}
-
-  // This method template allows Truly(pred) to be used as a matcher
-  // for type T where T is the argument type of predicate 'pred'.  The
-  // argument is passed by reference as the predicate may be
-  // interested in the address of the argument.
-  template <typename T>
-  bool MatchAndExplain(T& x,  // NOLINT
-                       MatchResultListener* listener) const {
-    // Without the if-statement, MSVC sometimes warns about converting
-    // a value to bool (warning 4800).
-    //
-    // We cannot write 'return !!predicate_(x);' as that doesn't work
-    // when predicate_(x) returns a class convertible to bool but
-    // having no operator!().
-    if (predicate_(x)) return true;
-    *listener << "didn't satisfy the given predicate";
-    return false;
-  }
-
-  void DescribeTo(::std::ostream* os) const {
-    *os << "satisfies the given predicate";
-  }
-
-  void DescribeNegationTo(::std::ostream* os) const {
-    *os << "doesn't satisfy the given predicate";
-  }
-
- private:
-  Predicate predicate_;
-};
-
-// Used for implementing Matches(matcher), which turns a matcher into
-// a predicate.
-template <typename M>
-class MatcherAsPredicate {
- public:
-  explicit MatcherAsPredicate(M matcher) : matcher_(matcher) {}
-
-  // This template operator() allows Matches(m) to be used as a
-  // predicate on type T where m is a matcher on type T.
-  //
-  // The argument x is passed by reference instead of by value, as
-  // some matcher may be interested in its address (e.g. as in
-  // Matches(Ref(n))(x)).
-  template <typename T>
-  bool operator()(const T& x) const {
-    // We let matcher_ commit to a particular type here instead of
-    // when the MatcherAsPredicate object was constructed.  This
-    // allows us to write Matches(m) where m is a polymorphic matcher
-    // (e.g. Eq(5)).
-    //
-    // If we write Matcher<T>(matcher_).Matches(x) here, it won't
-    // compile when matcher_ has type Matcher<const T&>; if we write
-    // Matcher<const T&>(matcher_).Matches(x) here, it won't compile
-    // when matcher_ has type Matcher<T>; if we just write
-    // matcher_.Matches(x), it won't compile when matcher_ is
-    // polymorphic, e.g. Eq(5).
-    //
-    // MatcherCast<const T&>() is necessary for making the code work
-    // in all of the above situations.
-    return MatcherCast<const T&>(matcher_).Matches(x);
-  }
-
- private:
-  M matcher_;
-};
-
-// For implementing ASSERT_THAT() and EXPECT_THAT().  The template
-// argument M must be a type that can be converted to a matcher.
-template <typename M>
-class PredicateFormatterFromMatcher {
- public:
-  explicit PredicateFormatterFromMatcher(M m) : matcher_(std::move(m)) {}
-
-  // This template () operator allows a PredicateFormatterFromMatcher
-  // object to act as a predicate-formatter suitable for using with
-  // Google Test's EXPECT_PRED_FORMAT1() macro.
-  template <typename T>
-  AssertionResult operator()(const char* value_text, const T& x) const {
-    // We convert matcher_ to a Matcher<const T&> *now* instead of
-    // when the PredicateFormatterFromMatcher object was constructed,
-    // as matcher_ may be polymorphic (e.g. NotNull()) and we won't
-    // know which type to instantiate it to until we actually see the
-    // type of x here.
-    //
-    // We write SafeMatcherCast<const T&>(matcher_) instead of
-    // Matcher<const T&>(matcher_), as the latter won't compile when
-    // matcher_ has type Matcher<T> (e.g. An<int>()).
-    // We don't write MatcherCast<const T&> either, as that allows
-    // potentially unsafe downcasting of the matcher argument.
-    const Matcher<const T&> matcher = SafeMatcherCast<const T&>(matcher_);
-
-    // The expected path here is that the matcher should match (i.e. that most
-    // tests pass) so optimize for this case.
-    if (matcher.Matches(x)) {
-      return AssertionSuccess();
-    }
-
-    ::std::stringstream ss;
-    ss << "Value of: " << value_text << "\n"
-       << "Expected: ";
-    matcher.DescribeTo(&ss);
-
-    // Rerun the matcher to "PrintAndExplain" the failure.
-    StringMatchResultListener listener;
-    if (MatchPrintAndExplain(x, matcher, &listener)) {
-      ss << "\n  The matcher failed on the initial attempt; but passed when "
-            "rerun to generate the explanation.";
-    }
-    ss << "\n  Actual: " << listener.str();
-    return AssertionFailure() << ss.str();
-  }
-
- private:
-  const M matcher_;
-};
-
-// A helper function for converting a matcher to a predicate-formatter
-// without the user needing to explicitly write the type.  This is
-// used for implementing ASSERT_THAT() and EXPECT_THAT().
-// Implementation detail: 'matcher' is received by-value to force decaying.
-template <typename M>
-inline PredicateFormatterFromMatcher<M> MakePredicateFormatterFromMatcher(
-    M matcher) {
-  return PredicateFormatterFromMatcher<M>(std::move(matcher));
-}
-
-// Implements the polymorphic IsNan() matcher, which matches any floating type
-// value that is Nan.
-class IsNanMatcher {
- public:
-  template <typename FloatType>
-  bool MatchAndExplain(const FloatType& f,
-                       MatchResultListener* /* listener */) const {
-    return (::std::isnan)(f);
-  }
-
-  void DescribeTo(::std::ostream* os) const { *os << "is NaN"; }
-  void DescribeNegationTo(::std::ostream* os) const { *os << "isn't NaN"; }
-};
-
-// Implements the polymorphic floating point equality matcher, which matches
-// two float values using ULP-based approximation or, optionally, a
-// user-specified epsilon.  The template is meant to be instantiated with
-// FloatType being either float or double.
-template <typename FloatType>
-class FloatingEqMatcher {
- public:
-  // Constructor for FloatingEqMatcher.
-  // The matcher's input will be compared with expected.  The matcher treats two
-  // NANs as equal if nan_eq_nan is true.  Otherwise, under IEEE standards,
-  // equality comparisons between NANs will always return false.  We specify a
-  // negative max_abs_error_ term to indicate that ULP-based approximation will
-  // be used for comparison.
-  FloatingEqMatcher(FloatType expected, bool nan_eq_nan)
-      : expected_(expected), nan_eq_nan_(nan_eq_nan), max_abs_error_(-1) {}
-
-  // Constructor that supports a user-specified max_abs_error that will be used
-  // for comparison instead of ULP-based approximation.  The max absolute
-  // should be non-negative.
-  FloatingEqMatcher(FloatType expected, bool nan_eq_nan,
-                    FloatType max_abs_error)
-      : expected_(expected),
-        nan_eq_nan_(nan_eq_nan),
-        max_abs_error_(max_abs_error) {
-    GTEST_CHECK_(max_abs_error >= 0)
-        << ", where max_abs_error is" << max_abs_error;
-  }
-
-  // Implements floating point equality matcher as a Matcher<T>.
-  template <typename T>
-  class Impl : public MatcherInterface<T> {
-   public:
-    Impl(FloatType expected, bool nan_eq_nan, FloatType max_abs_error)
-        : expected_(expected),
-          nan_eq_nan_(nan_eq_nan),
-          max_abs_error_(max_abs_error) {}
-
-    bool MatchAndExplain(T value,
-                         MatchResultListener* listener) const override {
-      const FloatingPoint<FloatType> actual(value), expected(expected_);
-
-      // Compares NaNs first, if nan_eq_nan_ is true.
-      if (actual.is_nan() || expected.is_nan()) {
-        if (actual.is_nan() && expected.is_nan()) {
-          return nan_eq_nan_;
-        }
-        // One is nan; the other is not nan.
-        return false;
-      }
-      if (HasMaxAbsError()) {
-        // We perform an equality check so that inf will match inf, regardless
-        // of error bounds.  If the result of value - expected_ would result in
-        // overflow or if either value is inf, the default result is infinity,
-        // which should only match if max_abs_error_ is also infinity.
-        if (value == expected_) {
-          return true;
-        }
-
-        const FloatType diff = value - expected_;
-        if (::std::fabs(diff) <= max_abs_error_) {
-          return true;
-        }
-
-        if (listener->IsInterested()) {
-          *listener << "which is " << diff << " from " << expected_;
-        }
-        return false;
-      } else {
-        return actual.AlmostEquals(expected);
-      }
-    }
-
-    void DescribeTo(::std::ostream* os) const override {
-      // os->precision() returns the previously set precision, which we
-      // store to restore the ostream to its original configuration
-      // after outputting.
-      const ::std::streamsize old_precision =
-          os->precision(::std::numeric_limits<FloatType>::digits10 + 2);
-      if (FloatingPoint<FloatType>(expected_).is_nan()) {
-        if (nan_eq_nan_) {
-          *os << "is NaN";
-        } else {
-          *os << "never matches";
-        }
-      } else {
-        *os << "is approximately " << expected_;
-        if (HasMaxAbsError()) {
-          *os << " (absolute error <= " << max_abs_error_ << ")";
-        }
-      }
-      os->precision(old_precision);
-    }
-
-    void DescribeNegationTo(::std::ostream* os) const override {
-      // As before, get original precision.
-      const ::std::streamsize old_precision =
-          os->precision(::std::numeric_limits<FloatType>::digits10 + 2);
-      if (FloatingPoint<FloatType>(expected_).is_nan()) {
-        if (nan_eq_nan_) {
-          *os << "isn't NaN";
-        } else {
-          *os << "is anything";
-        }
-      } else {
-        *os << "isn't approximately " << expected_;
-        if (HasMaxAbsError()) {
-          *os << " (absolute error > " << max_abs_error_ << ")";
-        }
-      }
-      // Restore original precision.
-      os->precision(old_precision);
-    }
-
-   private:
-    bool HasMaxAbsError() const { return max_abs_error_ >= 0; }
-
-    const FloatType expected_;
-    const bool nan_eq_nan_;
-    // max_abs_error will be used for value comparison when >= 0.
-    const FloatType max_abs_error_;
-  };
-
-  // The following 3 type conversion operators allow FloatEq(expected) and
-  // NanSensitiveFloatEq(expected) to be used as a Matcher<float>, a
-  // Matcher<const float&>, or a Matcher<float&>, but nothing else.
-  operator Matcher<FloatType>() const {
-    return MakeMatcher(
-        new Impl<FloatType>(expected_, nan_eq_nan_, max_abs_error_));
-  }
-
-  operator Matcher<const FloatType&>() const {
-    return MakeMatcher(
-        new Impl<const FloatType&>(expected_, nan_eq_nan_, max_abs_error_));
-  }
-
-  operator Matcher<FloatType&>() const {
-    return MakeMatcher(
-        new Impl<FloatType&>(expected_, nan_eq_nan_, max_abs_error_));
-  }
-
- private:
-  const FloatType expected_;
-  const bool nan_eq_nan_;
-  // max_abs_error will be used for value comparison when >= 0.
-  const FloatType max_abs_error_;
-};
-
-// A 2-tuple ("binary") wrapper around FloatingEqMatcher:
-// FloatingEq2Matcher() matches (x, y) by matching FloatingEqMatcher(x, false)
-// against y, and FloatingEq2Matcher(e) matches FloatingEqMatcher(x, false, e)
-// against y. The former implements "Eq", the latter "Near". At present, there
-// is no version that compares NaNs as equal.
-template <typename FloatType>
-class FloatingEq2Matcher {
- public:
-  FloatingEq2Matcher() { Init(-1, false); }
-
-  explicit FloatingEq2Matcher(bool nan_eq_nan) { Init(-1, nan_eq_nan); }
-
-  explicit FloatingEq2Matcher(FloatType max_abs_error) {
-    Init(max_abs_error, false);
-  }
-
-  FloatingEq2Matcher(FloatType max_abs_error, bool nan_eq_nan) {
-    Init(max_abs_error, nan_eq_nan);
-  }
-
-  template <typename T1, typename T2>
-  operator Matcher<::std::tuple<T1, T2>>() const {
-    return MakeMatcher(
-        new Impl<::std::tuple<T1, T2>>(max_abs_error_, nan_eq_nan_));
-  }
-  template <typename T1, typename T2>
-  operator Matcher<const ::std::tuple<T1, T2>&>() const {
-    return MakeMatcher(
-        new Impl<const ::std::tuple<T1, T2>&>(max_abs_error_, nan_eq_nan_));
-  }
-
- private:
-  static ::std::ostream& GetDesc(::std::ostream& os) {  // NOLINT
-    return os << "an almost-equal pair";
-  }
-
-  template <typename Tuple>
-  class Impl : public MatcherInterface<Tuple> {
-   public:
-    Impl(FloatType max_abs_error, bool nan_eq_nan)
-        : max_abs_error_(max_abs_error), nan_eq_nan_(nan_eq_nan) {}
-
-    bool MatchAndExplain(Tuple args,
-                         MatchResultListener* listener) const override {
-      if (max_abs_error_ == -1) {
-        FloatingEqMatcher<FloatType> fm(::std::get<0>(args), nan_eq_nan_);
-        return static_cast<Matcher<FloatType>>(fm).MatchAndExplain(
-            ::std::get<1>(args), listener);
-      } else {
-        FloatingEqMatcher<FloatType> fm(::std::get<0>(args), nan_eq_nan_,
-                                        max_abs_error_);
-        return static_cast<Matcher<FloatType>>(fm).MatchAndExplain(
-            ::std::get<1>(args), listener);
-      }
-    }
-    void DescribeTo(::std::ostream* os) const override {
-      *os << "are " << GetDesc;
-    }
-    void DescribeNegationTo(::std::ostream* os) const override {
-      *os << "aren't " << GetDesc;
-    }
-
-   private:
-    FloatType max_abs_error_;
-    const bool nan_eq_nan_;
-  };
-
-  void Init(FloatType max_abs_error_val, bool nan_eq_nan_val) {
-    max_abs_error_ = max_abs_error_val;
-    nan_eq_nan_ = nan_eq_nan_val;
-  }
-  FloatType max_abs_error_;
-  bool nan_eq_nan_;
-};
-
-// Implements the Pointee(m) matcher for matching a pointer whose
-// pointee matches matcher m.  The pointer can be either raw or smart.
-template <typename InnerMatcher>
-class PointeeMatcher {
- public:
-  explicit PointeeMatcher(const InnerMatcher& matcher) : matcher_(matcher) {}
-
-  // This type conversion operator template allows Pointee(m) to be
-  // used as a matcher for any pointer type whose pointee type is
-  // compatible with the inner matcher, where type Pointer can be
-  // either a raw pointer or a smart pointer.
-  //
-  // The reason we do this instead of relying on
-  // MakePolymorphicMatcher() is that the latter is not flexible
-  // enough for implementing the DescribeTo() method of Pointee().
-  template <typename Pointer>
-  operator Matcher<Pointer>() const {
-    return Matcher<Pointer>(new Impl<const Pointer&>(matcher_));
-  }
-
- private:
-  // The monomorphic implementation that works for a particular pointer type.
-  template <typename Pointer>
-  class Impl : public MatcherInterface<Pointer> {
-   public:
-    using Pointee =
-        typename std::pointer_traits<GTEST_REMOVE_REFERENCE_AND_CONST_(
-            Pointer)>::element_type;
-
-    explicit Impl(const InnerMatcher& matcher)
-        : matcher_(MatcherCast<const Pointee&>(matcher)) {}
-
-    void DescribeTo(::std::ostream* os) const override {
-      *os << "points to a value that ";
-      matcher_.DescribeTo(os);
-    }
-
-    void DescribeNegationTo(::std::ostream* os) const override {
-      *os << "does not point to a value that ";
-      matcher_.DescribeTo(os);
-    }
-
-    bool MatchAndExplain(Pointer pointer,
-                         MatchResultListener* listener) const override {
-      if (GetRawPointer(pointer) == nullptr) return false;
-
-      *listener << "which points to ";
-      return MatchPrintAndExplain(*pointer, matcher_, listener);
-    }
-
-   private:
-    const Matcher<const Pointee&> matcher_;
-  };
-
-  const InnerMatcher matcher_;
-};
-
-// Implements the Pointer(m) matcher
-// Implements the Pointer(m) matcher for matching a pointer that matches matcher
-// m.  The pointer can be either raw or smart, and will match `m` against the
-// raw pointer.
-template <typename InnerMatcher>
-class PointerMatcher {
- public:
-  explicit PointerMatcher(const InnerMatcher& matcher) : matcher_(matcher) {}
-
-  // This type conversion operator template allows Pointer(m) to be
-  // used as a matcher for any pointer type whose pointer type is
-  // compatible with the inner matcher, where type PointerType can be
-  // either a raw pointer or a smart pointer.
-  //
-  // The reason we do this instead of relying on
-  // MakePolymorphicMatcher() is that the latter is not flexible
-  // enough for implementing the DescribeTo() method of Pointer().
-  template <typename PointerType>
-  operator Matcher<PointerType>() const {  // NOLINT
-    return Matcher<PointerType>(new Impl<const PointerType&>(matcher_));
-  }
-
- private:
-  // The monomorphic implementation that works for a particular pointer type.
-  template <typename PointerType>
-  class Impl : public MatcherInterface<PointerType> {
-   public:
-    using Pointer =
-        const typename std::pointer_traits<GTEST_REMOVE_REFERENCE_AND_CONST_(
-            PointerType)>::element_type*;
-
-    explicit Impl(const InnerMatcher& matcher)
-        : matcher_(MatcherCast<Pointer>(matcher)) {}
-
-    void DescribeTo(::std::ostream* os) const override {
-      *os << "is a pointer that ";
-      matcher_.DescribeTo(os);
-    }
-
-    void DescribeNegationTo(::std::ostream* os) const override {
-      *os << "is not a pointer that ";
-      matcher_.DescribeTo(os);
-    }
-
-    bool MatchAndExplain(PointerType pointer,
-                         MatchResultListener* listener) const override {
-      *listener << "which is a pointer that ";
-      Pointer p = GetRawPointer(pointer);
-      return MatchPrintAndExplain(p, matcher_, listener);
-    }
-
-   private:
-    Matcher<Pointer> matcher_;
-  };
-
-  const InnerMatcher matcher_;
-};
-
-#if GTEST_HAS_RTTI
-// Implements the WhenDynamicCastTo<T>(m) matcher that matches a pointer or
-// reference that matches inner_matcher when dynamic_cast<T> is applied.
-// The result of dynamic_cast<To> is forwarded to the inner matcher.
-// If To is a pointer and the cast fails, the inner matcher will receive NULL.
-// If To is a reference and the cast fails, this matcher returns false
-// immediately.
-template <typename To>
-class WhenDynamicCastToMatcherBase {
- public:
-  explicit WhenDynamicCastToMatcherBase(const Matcher<To>& matcher)
-      : matcher_(matcher) {}
-
-  void DescribeTo(::std::ostream* os) const {
-    GetCastTypeDescription(os);
-    matcher_.DescribeTo(os);
-  }
-
-  void DescribeNegationTo(::std::ostream* os) const {
-    GetCastTypeDescription(os);
-    matcher_.DescribeNegationTo(os);
-  }
-
- protected:
-  const Matcher<To> matcher_;
-
-  static std::string GetToName() { return GetTypeName<To>(); }
-
- private:
-  static void GetCastTypeDescription(::std::ostream* os) {
-    *os << "when dynamic_cast to " << GetToName() << ", ";
-  }
-};
-
-// Primary template.
-// To is a pointer. Cast and forward the result.
-template <typename To>
-class WhenDynamicCastToMatcher : public WhenDynamicCastToMatcherBase<To> {
- public:
-  explicit WhenDynamicCastToMatcher(const Matcher<To>& matcher)
-      : WhenDynamicCastToMatcherBase<To>(matcher) {}
-
-  template <typename From>
-  bool MatchAndExplain(From from, MatchResultListener* listener) const {
-    To to = dynamic_cast<To>(from);
-    return MatchPrintAndExplain(to, this->matcher_, listener);
-  }
-};
-
-// Specialize for references.
-// In this case we return false if the dynamic_cast fails.
-template <typename To>
-class WhenDynamicCastToMatcher<To&> : public WhenDynamicCastToMatcherBase<To&> {
- public:
-  explicit WhenDynamicCastToMatcher(const Matcher<To&>& matcher)
-      : WhenDynamicCastToMatcherBase<To&>(matcher) {}
-
-  template <typename From>
-  bool MatchAndExplain(From& from, MatchResultListener* listener) const {
-    // We don't want an std::bad_cast here, so do the cast with pointers.
-    To* to = dynamic_cast<To*>(&from);
-    if (to == nullptr) {
-      *listener << "which cannot be dynamic_cast to " << this->GetToName();
-      return false;
-    }
-    return MatchPrintAndExplain(*to, this->matcher_, listener);
-  }
-};
-#endif  // GTEST_HAS_RTTI
-
-// Implements the Field() matcher for matching a field (i.e. member
-// variable) of an object.
-template <typename Class, typename FieldType>
-class FieldMatcher {
- public:
-  FieldMatcher(FieldType Class::*field,
-               const Matcher<const FieldType&>& matcher)
-      : field_(field), matcher_(matcher), whose_field_("whose given field ") {}
-
-  FieldMatcher(const std::string& field_name, FieldType Class::*field,
-               const Matcher<const FieldType&>& matcher)
-      : field_(field),
-        matcher_(matcher),
-        whose_field_("whose field `" + field_name + "` ") {}
-
-  void DescribeTo(::std::ostream* os) const {
-    *os << "is an object " << whose_field_;
-    matcher_.DescribeTo(os);
-  }
-
-  void DescribeNegationTo(::std::ostream* os) const {
-    *os << "is an object " << whose_field_;
-    matcher_.DescribeNegationTo(os);
-  }
-
-  template <typename T>
-  bool MatchAndExplain(const T& value, MatchResultListener* listener) const {
-    // FIXME: The dispatch on std::is_pointer was introduced as a workaround for
-    // a compiler bug, and can now be removed.
-    return MatchAndExplainImpl(
-        typename std::is_pointer<typename std::remove_const<T>::type>::type(),
-        value, listener);
-  }
-
- private:
-  bool MatchAndExplainImpl(std::false_type /* is_not_pointer */,
-                           const Class& obj,
-                           MatchResultListener* listener) const {
-    *listener << whose_field_ << "is ";
-    return MatchPrintAndExplain(obj.*field_, matcher_, listener);
-  }
-
-  bool MatchAndExplainImpl(std::true_type /* is_pointer */, const Class* p,
-                           MatchResultListener* listener) const {
-    if (p == nullptr) return false;
-
-    *listener << "which points to an object ";
-    // Since *p has a field, it must be a class/struct/union type and
-    // thus cannot be a pointer.  Therefore we pass false_type() as
-    // the first argument.
-    return MatchAndExplainImpl(std::false_type(), *p, listener);
-  }
-
-  const FieldType Class::*field_;
-  const Matcher<const FieldType&> matcher_;
-
-  // Contains either "whose given field " if the name of the field is unknown
-  // or "whose field `name_of_field` " if the name is known.
-  const std::string whose_field_;
-};
-
-// Implements the Property() matcher for matching a property
-// (i.e. return value of a getter method) of an object.
-//
-// Property is a const-qualified member function of Class returning
-// PropertyType.
-template <typename Class, typename PropertyType, typename Property>
-class PropertyMatcher {
- public:
-  typedef const PropertyType& RefToConstProperty;
-
-  PropertyMatcher(Property property, const Matcher<RefToConstProperty>& matcher)
-      : property_(property),
-        matcher_(matcher),
-        whose_property_("whose given property ") {}
-
-  PropertyMatcher(const std::string& property_name, Property property,
-                  const Matcher<RefToConstProperty>& matcher)
-      : property_(property),
-        matcher_(matcher),
-        whose_property_("whose property `" + property_name + "` ") {}
-
-  void DescribeTo(::std::ostream* os) const {
-    *os << "is an object " << whose_property_;
-    matcher_.DescribeTo(os);
-  }
-
-  void DescribeNegationTo(::std::ostream* os) const {
-    *os << "is an object " << whose_property_;
-    matcher_.DescribeNegationTo(os);
-  }
-
-  template <typename T>
-  bool MatchAndExplain(const T& value, MatchResultListener* listener) const {
-    return MatchAndExplainImpl(
-        typename std::is_pointer<typename std::remove_const<T>::type>::type(),
-        value, listener);
-  }
-
- private:
-  bool MatchAndExplainImpl(std::false_type /* is_not_pointer */,
-                           const Class& obj,
-                           MatchResultListener* listener) const {
-    *listener << whose_property_ << "is ";
-    // Cannot pass the return value (for example, int) to MatchPrintAndExplain,
-    // which takes a non-const reference as argument.
-    RefToConstProperty result = (obj.*property_)();
-    return MatchPrintAndExplain(result, matcher_, listener);
-  }
-
-  bool MatchAndExplainImpl(std::true_type /* is_pointer */, const Class* p,
-                           MatchResultListener* listener) const {
-    if (p == nullptr) return false;
-
-    *listener << "which points to an object ";
-    // Since *p has a property method, it must be a class/struct/union
-    // type and thus cannot be a pointer.  Therefore we pass
-    // false_type() as the first argument.
-    return MatchAndExplainImpl(std::false_type(), *p, listener);
-  }
-
-  Property property_;
-  const Matcher<RefToConstProperty> matcher_;
-
-  // Contains either "whose given property " if the name of the property is
-  // unknown or "whose property `name_of_property` " if the name is known.
-  const std::string whose_property_;
-};
-
-// Type traits specifying various features of different functors for ResultOf.
-// The default template specifies features for functor objects.
-template <typename Functor>
-struct CallableTraits {
-  typedef Functor StorageType;
-
-  static void CheckIsValid(Functor /* functor */) {}
-
-  template <typename T>
-  static auto Invoke(Functor f, const T& arg) -> decltype(f(arg)) {
-    return f(arg);
-  }
-};
-
-// Specialization for function pointers.
-template <typename ArgType, typename ResType>
-struct CallableTraits<ResType (*)(ArgType)> {
-  typedef ResType ResultType;
-  typedef ResType (*StorageType)(ArgType);
-
-  static void CheckIsValid(ResType (*f)(ArgType)) {
-    GTEST_CHECK_(f != nullptr)
-        << "NULL function pointer is passed into ResultOf().";
-  }
-  template <typename T>
-  static ResType Invoke(ResType (*f)(ArgType), T arg) {
-    return (*f)(arg);
-  }
-};
-
-// Implements the ResultOf() matcher for matching a return value of a
-// unary function of an object.
-template <typename Callable, typename InnerMatcher>
-class ResultOfMatcher {
- public:
-  ResultOfMatcher(Callable callable, InnerMatcher matcher)
-      : ResultOfMatcher(/*result_description=*/"", std::move(callable),
-                        std::move(matcher)) {}
-
-  ResultOfMatcher(const std::string& result_description, Callable callable,
-                  InnerMatcher matcher)
-      : result_description_(result_description),
-        callable_(std::move(callable)),
-        matcher_(std::move(matcher)) {
-    CallableTraits<Callable>::CheckIsValid(callable_);
-  }
-
-  template <typename T>
-  operator Matcher<T>() const {
-    return Matcher<T>(
-        new Impl<const T&>(result_description_, callable_, matcher_));
-  }
-
- private:
-  typedef typename CallableTraits<Callable>::StorageType CallableStorageType;
-
-  template <typename T>
-  class Impl : public MatcherInterface<T> {
-    using ResultType = decltype(CallableTraits<Callable>::template Invoke<T>(
-        std::declval<CallableStorageType>(), std::declval<T>()));
-
-   public:
-    template <typename M>
-    Impl(const std::string& result_description,
-         const CallableStorageType& callable, const M& matcher)
-        : result_description_(result_description),
-          callable_(callable),
-          matcher_(MatcherCast<ResultType>(matcher)) {}
-
-    void DescribeTo(::std::ostream* os) const override {
-      if (result_description_.empty()) {
-        *os << "is mapped by the given callable to a value that ";
-      } else {
-        *os << "whose " << result_description_ << " ";
-      }
-      matcher_.DescribeTo(os);
-    }
-
-    void DescribeNegationTo(::std::ostream* os) const override {
-      if (result_description_.empty()) {
-        *os << "is mapped by the given callable to a value that ";
-      } else {
-        *os << "whose " << result_description_ << " ";
-      }
-      matcher_.DescribeNegationTo(os);
-    }
-
-    bool MatchAndExplain(T obj, MatchResultListener* listener) const override {
-      if (result_description_.empty()) {
-        *listener << "which is mapped by the given callable to ";
-      } else {
-        *listener << "whose " << result_description_ << " is ";
-      }
-      // Cannot pass the return value directly to MatchPrintAndExplain, which
-      // takes a non-const reference as argument.
-      // Also, specifying template argument explicitly is needed because T could
-      // be a non-const reference (e.g. Matcher<Uncopyable&>).
-      ResultType result =
-          CallableTraits<Callable>::template Invoke<T>(callable_, obj);
-      return MatchPrintAndExplain(result, matcher_, listener);
-    }
-
-   private:
-    const std::string result_description_;
-    // Functors often define operator() as non-const method even though
-    // they are actually stateless. But we need to use them even when
-    // 'this' is a const pointer. It's the user's responsibility not to
-    // use stateful callables with ResultOf(), which doesn't guarantee
-    // how many times the callable will be invoked.
-    mutable CallableStorageType callable_;
-    const Matcher<ResultType> matcher_;
-  };  // class Impl
-
-  const std::string result_description_;
-  const CallableStorageType callable_;
-  const InnerMatcher matcher_;
-};
-
-// Implements a matcher that checks the size of an STL-style container.
-template <typename SizeMatcher>
-class SizeIsMatcher {
- public:
-  explicit SizeIsMatcher(const SizeMatcher& size_matcher)
-      : size_matcher_(size_matcher) {}
-
-  template <typename Container>
-  operator Matcher<Container>() const {
-    return Matcher<Container>(new Impl<const Container&>(size_matcher_));
-  }
-
-  template <typename Container>
-  class Impl : public MatcherInterface<Container> {
-   public:
-    using SizeType = decltype(std::declval<Container>().size());
-    explicit Impl(const SizeMatcher& size_matcher)
-        : size_matcher_(MatcherCast<SizeType>(size_matcher)) {}
-
-    void DescribeTo(::std::ostream* os) const override {
-      *os << "has a size that ";
-      size_matcher_.DescribeTo(os);
-    }
-    void DescribeNegationTo(::std::ostream* os) const override {
-      *os << "has a size that ";
-      size_matcher_.DescribeNegationTo(os);
-    }
-
-    bool MatchAndExplain(Container container,
-                         MatchResultListener* listener) const override {
-      SizeType size = container.size();
-      StringMatchResultListener size_listener;
-      const bool result = size_matcher_.MatchAndExplain(size, &size_listener);
-      *listener << "whose size " << size
-                << (result ? " matches" : " doesn't match");
-      PrintIfNotEmpty(size_listener.str(), listener->stream());
-      return result;
-    }
-
-   private:
-    const Matcher<SizeType> size_matcher_;
-  };
-
- private:
-  const SizeMatcher size_matcher_;
-};
-
-// Implements a matcher that checks the begin()..end() distance of an STL-style
-// container.
-template <typename DistanceMatcher>
-class BeginEndDistanceIsMatcher {
- public:
-  explicit BeginEndDistanceIsMatcher(const DistanceMatcher& distance_matcher)
-      : distance_matcher_(distance_matcher) {}
-
-  template <typename Container>
-  operator Matcher<Container>() const {
-    return Matcher<Container>(new Impl<const Container&>(distance_matcher_));
-  }
-
-  template <typename Container>
-  class Impl : public MatcherInterface<Container> {
-   public:
-    typedef internal::StlContainerView<GTEST_REMOVE_REFERENCE_AND_CONST_(
-        Container)>
-        ContainerView;
-    typedef typename std::iterator_traits<
-        typename ContainerView::type::const_iterator>::difference_type
-        DistanceType;
-    explicit Impl(const DistanceMatcher& distance_matcher)
-        : distance_matcher_(MatcherCast<DistanceType>(distance_matcher)) {}
-
-    void DescribeTo(::std::ostream* os) const override {
-      *os << "distance between begin() and end() ";
-      distance_matcher_.DescribeTo(os);
-    }
-    void DescribeNegationTo(::std::ostream* os) const override {
-      *os << "distance between begin() and end() ";
-      distance_matcher_.DescribeNegationTo(os);
-    }
-
-    bool MatchAndExplain(Container container,
-                         MatchResultListener* listener) const override {
-      using std::begin;
-      using std::end;
-      DistanceType distance = std::distance(begin(container), end(container));
-      StringMatchResultListener distance_listener;
-      const bool result =
-          distance_matcher_.MatchAndExplain(distance, &distance_listener);
-      *listener << "whose distance between begin() and end() " << distance
-                << (result ? " matches" : " doesn't match");
-      PrintIfNotEmpty(distance_listener.str(), listener->stream());
-      return result;
-    }
-
-   private:
-    const Matcher<DistanceType> distance_matcher_;
-  };
-
- private:
-  const DistanceMatcher distance_matcher_;
-};
-
-// Implements an equality matcher for any STL-style container whose elements
-// support ==. This matcher is like Eq(), but its failure explanations provide
-// more detailed information that is useful when the container is used as a set.
-// The failure message reports elements that are in one of the operands but not
-// the other. The failure messages do not report duplicate or out-of-order
-// elements in the containers (which don't properly matter to sets, but can
-// occur if the containers are vectors or lists, for example).
-//
-// Uses the container's const_iterator, value_type, operator ==,
-// begin(), and end().
-template <typename Container>
-class ContainerEqMatcher {
- public:
-  typedef internal::StlContainerView<Container> View;
-  typedef typename View::type StlContainer;
-  typedef typename View::const_reference StlContainerReference;
-
-  static_assert(!std::is_const<Container>::value,
-                "Container type must not be const");
-  static_assert(!std::is_reference<Container>::value,
-                "Container type must not be a reference");
-
-  // We make a copy of expected in case the elements in it are modified
-  // after this matcher is created.
-  explicit ContainerEqMatcher(const Container& expected)
-      : expected_(View::Copy(expected)) {}
-
-  void DescribeTo(::std::ostream* os) const {
-    *os << "equals ";
-    UniversalPrint(expected_, os);
-  }
-  void DescribeNegationTo(::std::ostream* os) const {
-    *os << "does not equal ";
-    UniversalPrint(expected_, os);
-  }
-
-  template <typename LhsContainer>
-  bool MatchAndExplain(const LhsContainer& lhs,
-                       MatchResultListener* listener) const {
-    typedef internal::StlContainerView<
-        typename std::remove_const<LhsContainer>::type>
-        LhsView;
-    StlContainerReference lhs_stl_container = LhsView::ConstReference(lhs);
-    if (lhs_stl_container == expected_) return true;
-
-    ::std::ostream* const os = listener->stream();
-    if (os != nullptr) {
-      // Something is different. Check for extra values first.
-      bool printed_header = false;
-      for (auto it = lhs_stl_container.begin(); it != lhs_stl_container.end();
-           ++it) {
-        if (internal::ArrayAwareFind(expected_.begin(), expected_.end(), *it) ==
-            expected_.end()) {
-          if (printed_header) {
-            *os << ", ";
-          } else {
-            *os << "which has these unexpected elements: ";
-            printed_header = true;
-          }
-          UniversalPrint(*it, os);
-        }
-      }
-
-      // Now check for missing values.
-      bool printed_header2 = false;
-      for (auto it = expected_.begin(); it != expected_.end(); ++it) {
-        if (internal::ArrayAwareFind(lhs_stl_container.begin(),
-                                     lhs_stl_container.end(),
-                                     *it) == lhs_stl_container.end()) {
-          if (printed_header2) {
-            *os << ", ";
-          } else {
-            *os << (printed_header ? ",\nand" : "which")
-                << " doesn't have these expected elements: ";
-            printed_header2 = true;
-          }
-          UniversalPrint(*it, os);
-        }
-      }
-    }
-
-    return false;
-  }
-
- private:
-  const StlContainer expected_;
-};
-
-// A comparator functor that uses the < operator to compare two values.
-struct LessComparator {
-  template <typename T, typename U>
-  bool operator()(const T& lhs, const U& rhs) const {
-    return lhs < rhs;
-  }
-};
-
-// Implements WhenSortedBy(comparator, container_matcher).
-template <typename Comparator, typename ContainerMatcher>
-class WhenSortedByMatcher {
- public:
-  WhenSortedByMatcher(const Comparator& comparator,
-                      const ContainerMatcher& matcher)
-      : comparator_(comparator), matcher_(matcher) {}
-
-  template <typename LhsContainer>
-  operator Matcher<LhsContainer>() const {
-    return MakeMatcher(new Impl<LhsContainer>(comparator_, matcher_));
-  }
-
-  template <typename LhsContainer>
-  class Impl : public MatcherInterface<LhsContainer> {
-   public:
-    typedef internal::StlContainerView<GTEST_REMOVE_REFERENCE_AND_CONST_(
-        LhsContainer)>
-        LhsView;
-    typedef typename LhsView::type LhsStlContainer;
-    typedef typename LhsView::const_reference LhsStlContainerReference;
-    // Transforms std::pair<const Key, Value> into std::pair<Key, Value>
-    // so that we can match associative containers.
-    typedef
-        typename RemoveConstFromKey<typename LhsStlContainer::value_type>::type
-            LhsValue;
-
-    Impl(const Comparator& comparator, const ContainerMatcher& matcher)
-        : comparator_(comparator), matcher_(matcher) {}
-
-    void DescribeTo(::std::ostream* os) const override {
-      *os << "(when sorted) ";
-      matcher_.DescribeTo(os);
-    }
-
-    void DescribeNegationTo(::std::ostream* os) const override {
-      *os << "(when sorted) ";
-      matcher_.DescribeNegationTo(os);
-    }
-
-    bool MatchAndExplain(LhsContainer lhs,
-                         MatchResultListener* listener) const override {
-      LhsStlContainerReference lhs_stl_container = LhsView::ConstReference(lhs);
-      ::std::vector<LhsValue> sorted_container(lhs_stl_container.begin(),
-                                               lhs_stl_container.end());
-      ::std::sort(sorted_container.begin(), sorted_container.end(),
-                  comparator_);
-
-      if (!listener->IsInterested()) {
-        // If the listener is not interested, we do not need to
-        // construct the inner explanation.
-        return matcher_.Matches(sorted_container);
-      }
-
-      *listener << "which is ";
-      UniversalPrint(sorted_container, listener->stream());
-      *listener << " when sorted";
-
-      StringMatchResultListener inner_listener;
-      const bool match =
-          matcher_.MatchAndExplain(sorted_container, &inner_listener);
-      PrintIfNotEmpty(inner_listener.str(), listener->stream());
-      return match;
-    }
-
-   private:
-    const Comparator comparator_;
-    const Matcher<const ::std::vector<LhsValue>&> matcher_;
-
-    Impl(const Impl&) = delete;
-    Impl& operator=(const Impl&) = delete;
-  };
-
- private:
-  const Comparator comparator_;
-  const ContainerMatcher matcher_;
-};
-
-// Implements Pointwise(tuple_matcher, rhs_container).  tuple_matcher
-// must be able to be safely cast to Matcher<std::tuple<const T1&, const
-// T2&> >, where T1 and T2 are the types of elements in the LHS
-// container and the RHS container respectively.
-template <typename TupleMatcher, typename RhsContainer>
-class PointwiseMatcher {
-  static_assert(
-      !IsHashTable<GTEST_REMOVE_REFERENCE_AND_CONST_(RhsContainer)>::value,
-      "use UnorderedPointwise with hash tables");
-
- public:
-  typedef internal::StlContainerView<RhsContainer> RhsView;
-  typedef typename RhsView::type RhsStlContainer;
-  typedef typename RhsStlContainer::value_type RhsValue;
-
-  static_assert(!std::is_const<RhsContainer>::value,
-                "RhsContainer type must not be const");
-  static_assert(!std::is_reference<RhsContainer>::value,
-                "RhsContainer type must not be a reference");
-
-  // Like ContainerEq, we make a copy of rhs in case the elements in
-  // it are modified after this matcher is created.
-  PointwiseMatcher(const TupleMatcher& tuple_matcher, const RhsContainer& rhs)
-      : tuple_matcher_(tuple_matcher), rhs_(RhsView::Copy(rhs)) {}
-
-  template <typename LhsContainer>
-  operator Matcher<LhsContainer>() const {
-    static_assert(
-        !IsHashTable<GTEST_REMOVE_REFERENCE_AND_CONST_(LhsContainer)>::value,
-        "use UnorderedPointwise with hash tables");
-
-    return Matcher<LhsContainer>(
-        new Impl<const LhsContainer&>(tuple_matcher_, rhs_));
-  }
-
-  template <typename LhsContainer>
-  class Impl : public MatcherInterface<LhsContainer> {
-   public:
-    typedef internal::StlContainerView<GTEST_REMOVE_REFERENCE_AND_CONST_(
-        LhsContainer)>
-        LhsView;
-    typedef typename LhsView::type LhsStlContainer;
-    typedef typename LhsView::const_reference LhsStlContainerReference;
-    typedef typename LhsStlContainer::value_type LhsValue;
-    // We pass the LHS value and the RHS value to the inner matcher by
-    // reference, as they may be expensive to copy.  We must use tuple
-    // instead of pair here, as a pair cannot hold references (C++ 98,
-    // 20.2.2 [lib.pairs]).
-    typedef ::std::tuple<const LhsValue&, const RhsValue&> InnerMatcherArg;
-
-    Impl(const TupleMatcher& tuple_matcher, const RhsStlContainer& rhs)
-        // mono_tuple_matcher_ holds a monomorphic version of the tuple matcher.
-        : mono_tuple_matcher_(SafeMatcherCast<InnerMatcherArg>(tuple_matcher)),
-          rhs_(rhs) {}
-
-    void DescribeTo(::std::ostream* os) const override {
-      *os << "contains " << rhs_.size()
-          << " values, where each value and its corresponding value in ";
-      UniversalPrinter<RhsStlContainer>::Print(rhs_, os);
-      *os << " ";
-      mono_tuple_matcher_.DescribeTo(os);
-    }
-    void DescribeNegationTo(::std::ostream* os) const override {
-      *os << "doesn't contain exactly " << rhs_.size()
-          << " values, or contains a value x at some index i"
-          << " where x and the i-th value of ";
-      UniversalPrint(rhs_, os);
-      *os << " ";
-      mono_tuple_matcher_.DescribeNegationTo(os);
-    }
-
-    bool MatchAndExplain(LhsContainer lhs,
-                         MatchResultListener* listener) const override {
-      LhsStlContainerReference lhs_stl_container = LhsView::ConstReference(lhs);
-      const size_t actual_size = lhs_stl_container.size();
-      if (actual_size != rhs_.size()) {
-        *listener << "which contains " << actual_size << " values";
-        return false;
-      }
-
-      auto left = lhs_stl_container.begin();
-      auto right = rhs_.begin();
-      for (size_t i = 0; i != actual_size; ++i, ++left, ++right) {
-        if (listener->IsInterested()) {
-          StringMatchResultListener inner_listener;
-          // Create InnerMatcherArg as a temporarily object to avoid it outlives
-          // *left and *right. Dereference or the conversion to `const T&` may
-          // return temp objects, e.g. for vector<bool>.
-          if (!mono_tuple_matcher_.MatchAndExplain(
-                  InnerMatcherArg(ImplicitCast_<const LhsValue&>(*left),
-                                  ImplicitCast_<const RhsValue&>(*right)),
-                  &inner_listener)) {
-            *listener << "where the value pair (";
-            UniversalPrint(*left, listener->stream());
-            *listener << ", ";
-            UniversalPrint(*right, listener->stream());
-            *listener << ") at index #" << i << " don't match";
-            PrintIfNotEmpty(inner_listener.str(), listener->stream());
-            return false;
-          }
-        } else {
-          if (!mono_tuple_matcher_.Matches(
-                  InnerMatcherArg(ImplicitCast_<const LhsValue&>(*left),
-                                  ImplicitCast_<const RhsValue&>(*right))))
-            return false;
-        }
-      }
-
-      return true;
-    }
-
-   private:
-    const Matcher<InnerMatcherArg> mono_tuple_matcher_;
-    const RhsStlContainer rhs_;
-  };
-
- private:
-  const TupleMatcher tuple_matcher_;
-  const RhsStlContainer rhs_;
-};
-
-// Holds the logic common to ContainsMatcherImpl and EachMatcherImpl.
-template <typename Container>
-class QuantifierMatcherImpl : public MatcherInterface<Container> {
- public:
-  typedef GTEST_REMOVE_REFERENCE_AND_CONST_(Container) RawContainer;
-  typedef StlContainerView<RawContainer> View;
-  typedef typename View::type StlContainer;
-  typedef typename View::const_reference StlContainerReference;
-  typedef typename StlContainer::value_type Element;
-
-  template <typename InnerMatcher>
-  explicit QuantifierMatcherImpl(InnerMatcher inner_matcher)
-      : inner_matcher_(
-            testing::SafeMatcherCast<const Element&>(inner_matcher)) {}
-
-  // Checks whether:
-  // * All elements in the container match, if all_elements_should_match.
-  // * Any element in the container matches, if !all_elements_should_match.
-  bool MatchAndExplainImpl(bool all_elements_should_match, Container container,
-                           MatchResultListener* listener) const {
-    StlContainerReference stl_container = View::ConstReference(container);
-    size_t i = 0;
-    for (auto it = stl_container.begin(); it != stl_container.end();
-         ++it, ++i) {
-      StringMatchResultListener inner_listener;
-      const bool matches = inner_matcher_.MatchAndExplain(*it, &inner_listener);
-
-      if (matches != all_elements_should_match) {
-        *listener << "whose element #" << i
-                  << (matches ? " matches" : " doesn't match");
-        PrintIfNotEmpty(inner_listener.str(), listener->stream());
-        return !all_elements_should_match;
-      }
-    }
-    return all_elements_should_match;
-  }
-
-  bool MatchAndExplainImpl(const Matcher<size_t>& count_matcher,
-                           Container container,
-                           MatchResultListener* listener) const {
-    StlContainerReference stl_container = View::ConstReference(container);
-    size_t i = 0;
-    std::vector<size_t> match_elements;
-    for (auto it = stl_container.begin(); it != stl_container.end();
-         ++it, ++i) {
-      StringMatchResultListener inner_listener;
-      const bool matches = inner_matcher_.MatchAndExplain(*it, &inner_listener);
-      if (matches) {
-        match_elements.push_back(i);
-      }
-    }
-    if (listener->IsInterested()) {
-      if (match_elements.empty()) {
-        *listener << "has no element that matches";
-      } else if (match_elements.size() == 1) {
-        *listener << "whose element #" << match_elements[0] << " matches";
-      } else {
-        *listener << "whose elements (";
-        std::string sep = "";
-        for (size_t e : match_elements) {
-          *listener << sep << e;
-          sep = ", ";
-        }
-        *listener << ") match";
-      }
-    }
-    StringMatchResultListener count_listener;
-    if (count_matcher.MatchAndExplain(match_elements.size(), &count_listener)) {
-      *listener << " and whose match quantity of " << match_elements.size()
-                << " matches";
-      PrintIfNotEmpty(count_listener.str(), listener->stream());
-      return true;
-    } else {
-      if (match_elements.empty()) {
-        *listener << " and";
-      } else {
-        *listener << " but";
-      }
-      *listener << " whose match quantity of " << match_elements.size()
-                << " does not match";
-      PrintIfNotEmpty(count_listener.str(), listener->stream());
-      return false;
-    }
-  }
-
- protected:
-  const Matcher<const Element&> inner_matcher_;
-};
-
-// Implements Contains(element_matcher) for the given argument type Container.
-// Symmetric to EachMatcherImpl.
-template <typename Container>
-class ContainsMatcherImpl : public QuantifierMatcherImpl<Container> {
- public:
-  template <typename InnerMatcher>
-  explicit ContainsMatcherImpl(InnerMatcher inner_matcher)
-      : QuantifierMatcherImpl<Container>(inner_matcher) {}
-
-  // Describes what this matcher does.
-  void DescribeTo(::std::ostream* os) const override {
-    *os << "contains at least one element that ";
-    this->inner_matcher_.DescribeTo(os);
-  }
-
-  void DescribeNegationTo(::std::ostream* os) const override {
-    *os << "doesn't contain any element that ";
-    this->inner_matcher_.DescribeTo(os);
-  }
-
-  bool MatchAndExplain(Container container,
-                       MatchResultListener* listener) const override {
-    return this->MatchAndExplainImpl(false, container, listener);
-  }
-};
-
-// Implements Each(element_matcher) for the given argument type Container.
-// Symmetric to ContainsMatcherImpl.
-template <typename Container>
-class EachMatcherImpl : public QuantifierMatcherImpl<Container> {
- public:
-  template <typename InnerMatcher>
-  explicit EachMatcherImpl(InnerMatcher inner_matcher)
-      : QuantifierMatcherImpl<Container>(inner_matcher) {}
-
-  // Describes what this matcher does.
-  void DescribeTo(::std::ostream* os) const override {
-    *os << "only contains elements that ";
-    this->inner_matcher_.DescribeTo(os);
-  }
-
-  void DescribeNegationTo(::std::ostream* os) const override {
-    *os << "contains some element that ";
-    this->inner_matcher_.DescribeNegationTo(os);
-  }
-
-  bool MatchAndExplain(Container container,
-                       MatchResultListener* listener) const override {
-    return this->MatchAndExplainImpl(true, container, listener);
-  }
-};
-
-// Implements Contains(element_matcher).Times(n) for the given argument type
-// Container.
-template <typename Container>
-class ContainsTimesMatcherImpl : public QuantifierMatcherImpl<Container> {
- public:
-  template <typename InnerMatcher>
-  explicit ContainsTimesMatcherImpl(InnerMatcher inner_matcher,
-                                    Matcher<size_t> count_matcher)
-      : QuantifierMatcherImpl<Container>(inner_matcher),
-        count_matcher_(std::move(count_matcher)) {}
-
-  void DescribeTo(::std::ostream* os) const override {
-    *os << "quantity of elements that match ";
-    this->inner_matcher_.DescribeTo(os);
-    *os << " ";
-    count_matcher_.DescribeTo(os);
-  }
-
-  void DescribeNegationTo(::std::ostream* os) const override {
-    *os << "quantity of elements that match ";
-    this->inner_matcher_.DescribeTo(os);
-    *os << " ";
-    count_matcher_.DescribeNegationTo(os);
-  }
-
-  bool MatchAndExplain(Container container,
-                       MatchResultListener* listener) const override {
-    return this->MatchAndExplainImpl(count_matcher_, container, listener);
-  }
-
- private:
-  const Matcher<size_t> count_matcher_;
-};
-
-// Implements polymorphic Contains(element_matcher).Times(n).
-template <typename M>
-class ContainsTimesMatcher {
- public:
-  explicit ContainsTimesMatcher(M m, Matcher<size_t> count_matcher)
-      : inner_matcher_(m), count_matcher_(std::move(count_matcher)) {}
-
-  template <typename Container>
-  operator Matcher<Container>() const {  // NOLINT
-    return Matcher<Container>(new ContainsTimesMatcherImpl<const Container&>(
-        inner_matcher_, count_matcher_));
-  }
-
- private:
-  const M inner_matcher_;
-  const Matcher<size_t> count_matcher_;
-};
-
-// Implements polymorphic Contains(element_matcher).
-template <typename M>
-class ContainsMatcher {
- public:
-  explicit ContainsMatcher(M m) : inner_matcher_(m) {}
-
-  template <typename Container>
-  operator Matcher<Container>() const {  // NOLINT
-    return Matcher<Container>(
-        new ContainsMatcherImpl<const Container&>(inner_matcher_));
-  }
-
-  ContainsTimesMatcher<M> Times(Matcher<size_t> count_matcher) const {
-    return ContainsTimesMatcher<M>(inner_matcher_, std::move(count_matcher));
-  }
-
- private:
-  const M inner_matcher_;
-};
-
-// Implements polymorphic Each(element_matcher).
-template <typename M>
-class EachMatcher {
- public:
-  explicit EachMatcher(M m) : inner_matcher_(m) {}
-
-  template <typename Container>
-  operator Matcher<Container>() const {  // NOLINT
-    return Matcher<Container>(
-        new EachMatcherImpl<const Container&>(inner_matcher_));
-  }
-
- private:
-  const M inner_matcher_;
-};
-
-struct Rank1 {};
-struct Rank0 : Rank1 {};
-
-namespace pair_getters {
-using std::get;
-template <typename T>
-auto First(T& x, Rank1) -> decltype(get<0>(x)) {  // NOLINT
-  return get<0>(x);
-}
-template <typename T>
-auto First(T& x, Rank0) -> decltype((x.first)) {  // NOLINT
-  return x.first;
-}
-
-template <typename T>
-auto Second(T& x, Rank1) -> decltype(get<1>(x)) {  // NOLINT
-  return get<1>(x);
-}
-template <typename T>
-auto Second(T& x, Rank0) -> decltype((x.second)) {  // NOLINT
-  return x.second;
-}
-}  // namespace pair_getters
-
-// Implements Key(inner_matcher) for the given argument pair type.
-// Key(inner_matcher) matches an std::pair whose 'first' field matches
-// inner_matcher.  For example, Contains(Key(Ge(5))) can be used to match an
-// std::map that contains at least one element whose key is >= 5.
-template <typename PairType>
-class KeyMatcherImpl : public MatcherInterface<PairType> {
- public:
-  typedef GTEST_REMOVE_REFERENCE_AND_CONST_(PairType) RawPairType;
-  typedef typename RawPairType::first_type KeyType;
-
-  template <typename InnerMatcher>
-  explicit KeyMatcherImpl(InnerMatcher inner_matcher)
-      : inner_matcher_(
-            testing::SafeMatcherCast<const KeyType&>(inner_matcher)) {}
-
-  // Returns true if and only if 'key_value.first' (the key) matches the inner
-  // matcher.
-  bool MatchAndExplain(PairType key_value,
-                       MatchResultListener* listener) const override {
-    StringMatchResultListener inner_listener;
-    const bool match = inner_matcher_.MatchAndExplain(
-        pair_getters::First(key_value, Rank0()), &inner_listener);
-    const std::string explanation = inner_listener.str();
-    if (explanation != "") {
-      *listener << "whose first field is a value " << explanation;
-    }
-    return match;
-  }
-
-  // Describes what this matcher does.
-  void DescribeTo(::std::ostream* os) const override {
-    *os << "has a key that ";
-    inner_matcher_.DescribeTo(os);
-  }
-
-  // Describes what the negation of this matcher does.
-  void DescribeNegationTo(::std::ostream* os) const override {
-    *os << "doesn't have a key that ";
-    inner_matcher_.DescribeTo(os);
-  }
-
- private:
-  const Matcher<const KeyType&> inner_matcher_;
-};
-
-// Implements polymorphic Key(matcher_for_key).
-template <typename M>
-class KeyMatcher {
- public:
-  explicit KeyMatcher(M m) : matcher_for_key_(m) {}
-
-  template <typename PairType>
-  operator Matcher<PairType>() const {
-    return Matcher<PairType>(
-        new KeyMatcherImpl<const PairType&>(matcher_for_key_));
-  }
-
- private:
-  const M matcher_for_key_;
-};
-
-// Implements polymorphic Address(matcher_for_address).
-template <typename InnerMatcher>
-class AddressMatcher {
- public:
-  explicit AddressMatcher(InnerMatcher m) : matcher_(m) {}
-
-  template <typename Type>
-  operator Matcher<Type>() const {  // NOLINT
-    return Matcher<Type>(new Impl<const Type&>(matcher_));
-  }
-
- private:
-  // The monomorphic implementation that works for a particular object type.
-  template <typename Type>
-  class Impl : public MatcherInterface<Type> {
-   public:
-    using Address = const GTEST_REMOVE_REFERENCE_AND_CONST_(Type) *;
-    explicit Impl(const InnerMatcher& matcher)
-        : matcher_(MatcherCast<Address>(matcher)) {}
-
-    void DescribeTo(::std::ostream* os) const override {
-      *os << "has address that ";
-      matcher_.DescribeTo(os);
-    }
-
-    void DescribeNegationTo(::std::ostream* os) const override {
-      *os << "does not have address that ";
-      matcher_.DescribeTo(os);
-    }
-
-    bool MatchAndExplain(Type object,
-                         MatchResultListener* listener) const override {
-      *listener << "which has address ";
-      Address address = std::addressof(object);
-      return MatchPrintAndExplain(address, matcher_, listener);
-    }
-
-   private:
-    const Matcher<Address> matcher_;
-  };
-  const InnerMatcher matcher_;
-};
-
-// Implements Pair(first_matcher, second_matcher) for the given argument pair
-// type with its two matchers. See Pair() function below.
-template <typename PairType>
-class PairMatcherImpl : public MatcherInterface<PairType> {
- public:
-  typedef GTEST_REMOVE_REFERENCE_AND_CONST_(PairType) RawPairType;
-  typedef typename RawPairType::first_type FirstType;
-  typedef typename RawPairType::second_type SecondType;
-
-  template <typename FirstMatcher, typename SecondMatcher>
-  PairMatcherImpl(FirstMatcher first_matcher, SecondMatcher second_matcher)
-      : first_matcher_(
-            testing::SafeMatcherCast<const FirstType&>(first_matcher)),
-        second_matcher_(
-            testing::SafeMatcherCast<const SecondType&>(second_matcher)) {}
-
-  // Describes what this matcher does.
-  void DescribeTo(::std::ostream* os) const override {
-    *os << "has a first field that ";
-    first_matcher_.DescribeTo(os);
-    *os << ", and has a second field that ";
-    second_matcher_.DescribeTo(os);
-  }
-
-  // Describes what the negation of this matcher does.
-  void DescribeNegationTo(::std::ostream* os) const override {
-    *os << "has a first field that ";
-    first_matcher_.DescribeNegationTo(os);
-    *os << ", or has a second field that ";
-    second_matcher_.DescribeNegationTo(os);
-  }
-
-  // Returns true if and only if 'a_pair.first' matches first_matcher and
-  // 'a_pair.second' matches second_matcher.
-  bool MatchAndExplain(PairType a_pair,
-                       MatchResultListener* listener) const override {
-    if (!listener->IsInterested()) {
-      // If the listener is not interested, we don't need to construct the
-      // explanation.
-      return first_matcher_.Matches(pair_getters::First(a_pair, Rank0())) &&
-             second_matcher_.Matches(pair_getters::Second(a_pair, Rank0()));
-    }
-    StringMatchResultListener first_inner_listener;
-    if (!first_matcher_.MatchAndExplain(pair_getters::First(a_pair, Rank0()),
-                                        &first_inner_listener)) {
-      *listener << "whose first field does not match";
-      PrintIfNotEmpty(first_inner_listener.str(), listener->stream());
-      return false;
-    }
-    StringMatchResultListener second_inner_listener;
-    if (!second_matcher_.MatchAndExplain(pair_getters::Second(a_pair, Rank0()),
-                                         &second_inner_listener)) {
-      *listener << "whose second field does not match";
-      PrintIfNotEmpty(second_inner_listener.str(), listener->stream());
-      return false;
-    }
-    ExplainSuccess(first_inner_listener.str(), second_inner_listener.str(),
-                   listener);
-    return true;
-  }
-
- private:
-  void ExplainSuccess(const std::string& first_explanation,
-                      const std::string& second_explanation,
-                      MatchResultListener* listener) const {
-    *listener << "whose both fields match";
-    if (first_explanation != "") {
-      *listener << ", where the first field is a value " << first_explanation;
-    }
-    if (second_explanation != "") {
-      *listener << ", ";
-      if (first_explanation != "") {
-        *listener << "and ";
-      } else {
-        *listener << "where ";
-      }
-      *listener << "the second field is a value " << second_explanation;
-    }
-  }
-
-  const Matcher<const FirstType&> first_matcher_;
-  const Matcher<const SecondType&> second_matcher_;
-};
-
-// Implements polymorphic Pair(first_matcher, second_matcher).
-template <typename FirstMatcher, typename SecondMatcher>
-class PairMatcher {
- public:
-  PairMatcher(FirstMatcher first_matcher, SecondMatcher second_matcher)
-      : first_matcher_(first_matcher), second_matcher_(second_matcher) {}
-
-  template <typename PairType>
-  operator Matcher<PairType>() const {
-    return Matcher<PairType>(
-        new PairMatcherImpl<const PairType&>(first_matcher_, second_matcher_));
-  }
-
- private:
-  const FirstMatcher first_matcher_;
-  const SecondMatcher second_matcher_;
-};
-
-template <typename T, size_t... I>
-auto UnpackStructImpl(const T& t, IndexSequence<I...>, int)
-    -> decltype(std::tie(get<I>(t)...)) {
-  static_assert(std::tuple_size<T>::value == sizeof...(I),
-                "Number of arguments doesn't match the number of fields.");
-  return std::tie(get<I>(t)...);
-}
-
-#if defined(__cpp_structured_bindings) && __cpp_structured_bindings >= 201606
-template <typename T>
-auto UnpackStructImpl(const T& t, MakeIndexSequence<1>, char) {
-  const auto& [a] = t;
-  return std::tie(a);
-}
-template <typename T>
-auto UnpackStructImpl(const T& t, MakeIndexSequence<2>, char) {
-  const auto& [a, b] = t;
-  return std::tie(a, b);
-}
-template <typename T>
-auto UnpackStructImpl(const T& t, MakeIndexSequence<3>, char) {
-  const auto& [a, b, c] = t;
-  return std::tie(a, b, c);
-}
-template <typename T>
-auto UnpackStructImpl(const T& t, MakeIndexSequence<4>, char) {
-  const auto& [a, b, c, d] = t;
-  return std::tie(a, b, c, d);
-}
-template <typename T>
-auto UnpackStructImpl(const T& t, MakeIndexSequence<5>, char) {
-  const auto& [a, b, c, d, e] = t;
-  return std::tie(a, b, c, d, e);
-}
-template <typename T>
-auto UnpackStructImpl(const T& t, MakeIndexSequence<6>, char) {
-  const auto& [a, b, c, d, e, f] = t;
-  return std::tie(a, b, c, d, e, f);
-}
-template <typename T>
-auto UnpackStructImpl(const T& t, MakeIndexSequence<7>, char) {
-  const auto& [a, b, c, d, e, f, g] = t;
-  return std::tie(a, b, c, d, e, f, g);
-}
-template <typename T>
-auto UnpackStructImpl(const T& t, MakeIndexSequence<8>, char) {
-  const auto& [a, b, c, d, e, f, g, h] = t;
-  return std::tie(a, b, c, d, e, f, g, h);
-}
-template <typename T>
-auto UnpackStructImpl(const T& t, MakeIndexSequence<9>, char) {
-  const auto& [a, b, c, d, e, f, g, h, i] = t;
-  return std::tie(a, b, c, d, e, f, g, h, i);
-}
-template <typename T>
-auto UnpackStructImpl(const T& t, MakeIndexSequence<10>, char) {
-  const auto& [a, b, c, d, e, f, g, h, i, j] = t;
-  return std::tie(a, b, c, d, e, f, g, h, i, j);
-}
-template <typename T>
-auto UnpackStructImpl(const T& t, MakeIndexSequence<11>, char) {
-  const auto& [a, b, c, d, e, f, g, h, i, j, k] = t;
-  return std::tie(a, b, c, d, e, f, g, h, i, j, k);
-}
-template <typename T>
-auto UnpackStructImpl(const T& t, MakeIndexSequence<12>, char) {
-  const auto& [a, b, c, d, e, f, g, h, i, j, k, l] = t;
-  return std::tie(a, b, c, d, e, f, g, h, i, j, k, l);
-}
-template <typename T>
-auto UnpackStructImpl(const T& t, MakeIndexSequence<13>, char) {
-  const auto& [a, b, c, d, e, f, g, h, i, j, k, l, m] = t;
-  return std::tie(a, b, c, d, e, f, g, h, i, j, k, l, m);
-}
-template <typename T>
-auto UnpackStructImpl(const T& t, MakeIndexSequence<14>, char) {
-  const auto& [a, b, c, d, e, f, g, h, i, j, k, l, m, n] = t;
-  return std::tie(a, b, c, d, e, f, g, h, i, j, k, l, m, n);
-}
-template <typename T>
-auto UnpackStructImpl(const T& t, MakeIndexSequence<15>, char) {
-  const auto& [a, b, c, d, e, f, g, h, i, j, k, l, m, n, o] = t;
-  return std::tie(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o);
-}
-template <typename T>
-auto UnpackStructImpl(const T& t, MakeIndexSequence<16>, char) {
-  const auto& [a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p] = t;
-  return std::tie(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p);
-}
-template <typename T>
-auto UnpackStructImpl(const T& t, MakeIndexSequence<17>, char) {
-  const auto& [a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q] = t;
-  return std::tie(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q);
-}
-template <typename T>
-auto UnpackStructImpl(const T& t, MakeIndexSequence<18>, char) {
-  const auto& [a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r] = t;
-  return std::tie(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r);
-}
-template <typename T>
-auto UnpackStructImpl(const T& t, MakeIndexSequence<19>, char) {
-  const auto& [a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s] = t;
-  return std::tie(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s);
-}
-#endif  // defined(__cpp_structured_bindings)
-
-template <size_t I, typename T>
-auto UnpackStruct(const T& t)
-    -> decltype((UnpackStructImpl)(t, MakeIndexSequence<I>{}, 0)) {
-  return (UnpackStructImpl)(t, MakeIndexSequence<I>{}, 0);
-}
-
-// Helper function to do comma folding in C++11.
-// The array ensures left-to-right order of evaluation.
-// Usage: VariadicExpand({expr...});
-template <typename T, size_t N>
-void VariadicExpand(const T (&)[N]) {}
-
-template <typename Struct, typename StructSize>
-class FieldsAreMatcherImpl;
-
-template <typename Struct, size_t... I>
-class FieldsAreMatcherImpl<Struct, IndexSequence<I...>>
-    : public MatcherInterface<Struct> {
-  using UnpackedType =
-      decltype(UnpackStruct<sizeof...(I)>(std::declval<const Struct&>()));
-  using MatchersType = std::tuple<
-      Matcher<const typename std::tuple_element<I, UnpackedType>::type&>...>;
-
- public:
-  template <typename Inner>
-  explicit FieldsAreMatcherImpl(const Inner& matchers)
-      : matchers_(testing::SafeMatcherCast<
-                  const typename std::tuple_element<I, UnpackedType>::type&>(
-            std::get<I>(matchers))...) {}
-
-  void DescribeTo(::std::ostream* os) const override {
-    const char* separator = "";
-    VariadicExpand(
-        {(*os << separator << "has field #" << I << " that ",
-          std::get<I>(matchers_).DescribeTo(os), separator = ", and ")...});
-  }
-
-  void DescribeNegationTo(::std::ostream* os) const override {
-    const char* separator = "";
-    VariadicExpand({(*os << separator << "has field #" << I << " that ",
-                     std::get<I>(matchers_).DescribeNegationTo(os),
-                     separator = ", or ")...});
-  }
-
-  bool MatchAndExplain(Struct t, MatchResultListener* listener) const override {
-    return MatchInternal((UnpackStruct<sizeof...(I)>)(t), listener);
-  }
-
- private:
-  bool MatchInternal(UnpackedType tuple, MatchResultListener* listener) const {
-    if (!listener->IsInterested()) {
-      // If the listener is not interested, we don't need to construct the
-      // explanation.
-      bool good = true;
-      VariadicExpand({good = good && std::get<I>(matchers_).Matches(
-                                         std::get<I>(tuple))...});
-      return good;
-    }
-
-    size_t failed_pos = ~size_t{};
-
-    std::vector<StringMatchResultListener> inner_listener(sizeof...(I));
-
-    VariadicExpand(
-        {failed_pos == ~size_t{}&& !std::get<I>(matchers_).MatchAndExplain(
-                           std::get<I>(tuple), &inner_listener[I])
-             ? failed_pos = I
-             : 0 ...});
-    if (failed_pos != ~size_t{}) {
-      *listener << "whose field #" << failed_pos << " does not match";
-      PrintIfNotEmpty(inner_listener[failed_pos].str(), listener->stream());
-      return false;
-    }
-
-    *listener << "whose all elements match";
-    const char* separator = ", where";
-    for (size_t index = 0; index < sizeof...(I); ++index) {
-      const std::string str = inner_listener[index].str();
-      if (!str.empty()) {
-        *listener << separator << " field #" << index << " is a value " << str;
-        separator = ", and";
-      }
-    }
-
-    return true;
-  }
-
-  MatchersType matchers_;
-};
-
-template <typename... Inner>
-class FieldsAreMatcher {
- public:
-  explicit FieldsAreMatcher(Inner... inner) : matchers_(std::move(inner)...) {}
-
-  template <typename Struct>
-  operator Matcher<Struct>() const {  // NOLINT
-    return Matcher<Struct>(
-        new FieldsAreMatcherImpl<const Struct&, IndexSequenceFor<Inner...>>(
-            matchers_));
-  }
-
- private:
-  std::tuple<Inner...> matchers_;
-};
-
-// Implements ElementsAre() and ElementsAreArray().
-template <typename Container>
-class ElementsAreMatcherImpl : public MatcherInterface<Container> {
- public:
-  typedef GTEST_REMOVE_REFERENCE_AND_CONST_(Container) RawContainer;
-  typedef internal::StlContainerView<RawContainer> View;
-  typedef typename View::type StlContainer;
-  typedef typename View::const_reference StlContainerReference;
-  typedef typename StlContainer::value_type Element;
-
-  // Constructs the matcher from a sequence of element values or
-  // element matchers.
-  template <typename InputIter>
-  ElementsAreMatcherImpl(InputIter first, InputIter last) {
-    while (first != last) {
-      matchers_.push_back(MatcherCast<const Element&>(*first++));
-    }
-  }
-
-  // Describes what this matcher does.
-  void DescribeTo(::std::ostream* os) const override {
-    if (count() == 0) {
-      *os << "is empty";
-    } else if (count() == 1) {
-      *os << "has 1 element that ";
-      matchers_[0].DescribeTo(os);
-    } else {
-      *os << "has " << Elements(count()) << " where\n";
-      for (size_t i = 0; i != count(); ++i) {
-        *os << "element #" << i << " ";
-        matchers_[i].DescribeTo(os);
-        if (i + 1 < count()) {
-          *os << ",\n";
-        }
-      }
-    }
-  }
-
-  // Describes what the negation of this matcher does.
-  void DescribeNegationTo(::std::ostream* os) const override {
-    if (count() == 0) {
-      *os << "isn't empty";
-      return;
-    }
-
-    *os << "doesn't have " << Elements(count()) << ", or\n";
-    for (size_t i = 0; i != count(); ++i) {
-      *os << "element #" << i << " ";
-      matchers_[i].DescribeNegationTo(os);
-      if (i + 1 < count()) {
-        *os << ", or\n";
-      }
-    }
-  }
-
-  bool MatchAndExplain(Container container,
-                       MatchResultListener* listener) const override {
-    // To work with stream-like "containers", we must only walk
-    // through the elements in one pass.
-
-    const bool listener_interested = listener->IsInterested();
-
-    // explanations[i] is the explanation of the element at index i.
-    ::std::vector<std::string> explanations(count());
-    StlContainerReference stl_container = View::ConstReference(container);
-    auto it = stl_container.begin();
-    size_t exam_pos = 0;
-    bool mismatch_found = false;  // Have we found a mismatched element yet?
-
-    // Go through the elements and matchers in pairs, until we reach
-    // the end of either the elements or the matchers, or until we find a
-    // mismatch.
-    for (; it != stl_container.end() && exam_pos != count(); ++it, ++exam_pos) {
-      bool match;  // Does the current element match the current matcher?
-      if (listener_interested) {
-        StringMatchResultListener s;
-        match = matchers_[exam_pos].MatchAndExplain(*it, &s);
-        explanations[exam_pos] = s.str();
-      } else {
-        match = matchers_[exam_pos].Matches(*it);
-      }
-
-      if (!match) {
-        mismatch_found = true;
-        break;
-      }
-    }
-    // If mismatch_found is true, 'exam_pos' is the index of the mismatch.
-
-    // Find how many elements the actual container has.  We avoid
-    // calling size() s.t. this code works for stream-like "containers"
-    // that don't define size().
-    size_t actual_count = exam_pos;
-    for (; it != stl_container.end(); ++it) {
-      ++actual_count;
-    }
-
-    if (actual_count != count()) {
-      // The element count doesn't match.  If the container is empty,
-      // there's no need to explain anything as Google Mock already
-      // prints the empty container.  Otherwise we just need to show
-      // how many elements there actually are.
-      if (listener_interested && (actual_count != 0)) {
-        *listener << "which has " << Elements(actual_count);
-      }
-      return false;
-    }
-
-    if (mismatch_found) {
-      // The element count matches, but the exam_pos-th element doesn't match.
-      if (listener_interested) {
-        *listener << "whose element #" << exam_pos << " doesn't match";
-        PrintIfNotEmpty(explanations[exam_pos], listener->stream());
-      }
-      return false;
-    }
-
-    // Every element matches its expectation.  We need to explain why
-    // (the obvious ones can be skipped).
-    if (listener_interested) {
-      bool reason_printed = false;
-      for (size_t i = 0; i != count(); ++i) {
-        const std::string& s = explanations[i];
-        if (!s.empty()) {
-          if (reason_printed) {
-            *listener << ",\nand ";
-          }
-          *listener << "whose element #" << i << " matches, " << s;
-          reason_printed = true;
-        }
-      }
-    }
-    return true;
-  }
-
- private:
-  static Message Elements(size_t count) {
-    return Message() << count << (count == 1 ? " element" : " elements");
-  }
-
-  size_t count() const { return matchers_.size(); }
-
-  ::std::vector<Matcher<const Element&>> matchers_;
-};
-
-// Connectivity matrix of (elements X matchers), in element-major order.
-// Initially, there are no edges.
-// Use NextGraph() to iterate over all possible edge configurations.
-// Use Randomize() to generate a random edge configuration.
-class GTEST_API_ MatchMatrix {
- public:
-  MatchMatrix(size_t num_elements, size_t num_matchers)
-      : num_elements_(num_elements),
-        num_matchers_(num_matchers),
-        matched_(num_elements_ * num_matchers_, 0) {}
-
-  size_t LhsSize() const { return num_elements_; }
-  size_t RhsSize() const { return num_matchers_; }
-  bool HasEdge(size_t ilhs, size_t irhs) const {
-    return matched_[SpaceIndex(ilhs, irhs)] == 1;
-  }
-  void SetEdge(size_t ilhs, size_t irhs, bool b) {
-    matched_[SpaceIndex(ilhs, irhs)] = b ? 1 : 0;
-  }
-
-  // Treating the connectivity matrix as a (LhsSize()*RhsSize())-bit number,
-  // adds 1 to that number; returns false if incrementing the graph left it
-  // empty.
-  bool NextGraph();
-
-  void Randomize();
-
-  std::string DebugString() const;
-
- private:
-  size_t SpaceIndex(size_t ilhs, size_t irhs) const {
-    return ilhs * num_matchers_ + irhs;
-  }
-
-  size_t num_elements_;
-  size_t num_matchers_;
-
-  // Each element is a char interpreted as bool. They are stored as a
-  // flattened array in lhs-major order, use 'SpaceIndex()' to translate
-  // a (ilhs, irhs) matrix coordinate into an offset.
-  ::std::vector<char> matched_;
-};
-
-typedef ::std::pair<size_t, size_t> ElementMatcherPair;
-typedef ::std::vector<ElementMatcherPair> ElementMatcherPairs;
-
-// Returns a maximum bipartite matching for the specified graph 'g'.
-// The matching is represented as a vector of {element, matcher} pairs.
-GTEST_API_ ElementMatcherPairs FindMaxBipartiteMatching(const MatchMatrix& g);
-
-struct UnorderedMatcherRequire {
-  enum Flags {
-    Superset = 1 << 0,
-    Subset = 1 << 1,
-    ExactMatch = Superset | Subset,
-  };
-};
-
-// Untyped base class for implementing UnorderedElementsAre.  By
-// putting logic that's not specific to the element type here, we
-// reduce binary bloat and increase compilation speed.
-class GTEST_API_ UnorderedElementsAreMatcherImplBase {
- protected:
-  explicit UnorderedElementsAreMatcherImplBase(
-      UnorderedMatcherRequire::Flags matcher_flags)
-      : match_flags_(matcher_flags) {}
-
-  // A vector of matcher describers, one for each element matcher.
-  // Does not own the describers (and thus can be used only when the
-  // element matchers are alive).
-  typedef ::std::vector<const MatcherDescriberInterface*> MatcherDescriberVec;
-
-  // Describes this UnorderedElementsAre matcher.
-  void DescribeToImpl(::std::ostream* os) const;
-
-  // Describes the negation of this UnorderedElementsAre matcher.
-  void DescribeNegationToImpl(::std::ostream* os) const;
-
-  bool VerifyMatchMatrix(const ::std::vector<std::string>& element_printouts,
-                         const MatchMatrix& matrix,
-                         MatchResultListener* listener) const;
-
-  bool FindPairing(const MatchMatrix& matrix,
-                   MatchResultListener* listener) const;
-
-  MatcherDescriberVec& matcher_describers() { return matcher_describers_; }
-
-  static Message Elements(size_t n) {
-    return Message() << n << " element" << (n == 1 ? "" : "s");
-  }
-
-  UnorderedMatcherRequire::Flags match_flags() const { return match_flags_; }
-
- private:
-  UnorderedMatcherRequire::Flags match_flags_;
-  MatcherDescriberVec matcher_describers_;
-};
-
-// Implements UnorderedElementsAre, UnorderedElementsAreArray, IsSubsetOf, and
-// IsSupersetOf.
-template <typename Container>
-class UnorderedElementsAreMatcherImpl
-    : public MatcherInterface<Container>,
-      public UnorderedElementsAreMatcherImplBase {
- public:
-  typedef GTEST_REMOVE_REFERENCE_AND_CONST_(Container) RawContainer;
-  typedef internal::StlContainerView<RawContainer> View;
-  typedef typename View::type StlContainer;
-  typedef typename View::const_reference StlContainerReference;
-  typedef typename StlContainer::value_type Element;
-
-  template <typename InputIter>
-  UnorderedElementsAreMatcherImpl(UnorderedMatcherRequire::Flags matcher_flags,
-                                  InputIter first, InputIter last)
-      : UnorderedElementsAreMatcherImplBase(matcher_flags) {
-    for (; first != last; ++first) {
-      matchers_.push_back(MatcherCast<const Element&>(*first));
-    }
-    for (const auto& m : matchers_) {
-      matcher_describers().push_back(m.GetDescriber());
-    }
-  }
-
-  // Describes what this matcher does.
-  void DescribeTo(::std::ostream* os) const override {
-    return UnorderedElementsAreMatcherImplBase::DescribeToImpl(os);
-  }
-
-  // Describes what the negation of this matcher does.
-  void DescribeNegationTo(::std::ostream* os) const override {
-    return UnorderedElementsAreMatcherImplBase::DescribeNegationToImpl(os);
-  }
-
-  bool MatchAndExplain(Container container,
-                       MatchResultListener* listener) const override {
-    StlContainerReference stl_container = View::ConstReference(container);
-    ::std::vector<std::string> element_printouts;
-    MatchMatrix matrix =
-        AnalyzeElements(stl_container.begin(), stl_container.end(),
-                        &element_printouts, listener);
-
-    return VerifyMatchMatrix(element_printouts, matrix, listener) &&
-           FindPairing(matrix, listener);
-  }
-
- private:
-  template <typename ElementIter>
-  MatchMatrix AnalyzeElements(ElementIter elem_first, ElementIter elem_last,
-                              ::std::vector<std::string>* element_printouts,
-                              MatchResultListener* listener) const {
-    element_printouts->clear();
-    ::std::vector<char> did_match;
-    size_t num_elements = 0;
-    DummyMatchResultListener dummy;
-    for (; elem_first != elem_last; ++num_elements, ++elem_first) {
-      if (listener->IsInterested()) {
-        element_printouts->push_back(PrintToString(*elem_first));
-      }
-      for (size_t irhs = 0; irhs != matchers_.size(); ++irhs) {
-        did_match.push_back(
-            matchers_[irhs].MatchAndExplain(*elem_first, &dummy));
-      }
-    }
-
-    MatchMatrix matrix(num_elements, matchers_.size());
-    ::std::vector<char>::const_iterator did_match_iter = did_match.begin();
-    for (size_t ilhs = 0; ilhs != num_elements; ++ilhs) {
-      for (size_t irhs = 0; irhs != matchers_.size(); ++irhs) {
-        matrix.SetEdge(ilhs, irhs, *did_match_iter++ != 0);
-      }
-    }
-    return matrix;
-  }
-
-  ::std::vector<Matcher<const Element&>> matchers_;
-};
-
-// Functor for use in TransformTuple.
-// Performs MatcherCast<Target> on an input argument of any type.
-template <typename Target>
-struct CastAndAppendTransform {
-  template <typename Arg>
-  Matcher<Target> operator()(const Arg& a) const {
-    return MatcherCast<Target>(a);
-  }
-};
-
-// Implements UnorderedElementsAre.
-template <typename MatcherTuple>
-class UnorderedElementsAreMatcher {
- public:
-  explicit UnorderedElementsAreMatcher(const MatcherTuple& args)
-      : matchers_(args) {}
-
-  template <typename Container>
-  operator Matcher<Container>() const {
-    typedef GTEST_REMOVE_REFERENCE_AND_CONST_(Container) RawContainer;
-    typedef typename internal::StlContainerView<RawContainer>::type View;
-    typedef typename View::value_type Element;
-    typedef ::std::vector<Matcher<const Element&>> MatcherVec;
-    MatcherVec matchers;
-    matchers.reserve(::std::tuple_size<MatcherTuple>::value);
-    TransformTupleValues(CastAndAppendTransform<const Element&>(), matchers_,
-                         ::std::back_inserter(matchers));
-    return Matcher<Container>(
-        new UnorderedElementsAreMatcherImpl<const Container&>(
-            UnorderedMatcherRequire::ExactMatch, matchers.begin(),
-            matchers.end()));
-  }
-
- private:
-  const MatcherTuple matchers_;
-};
-
-// Implements ElementsAre.
-template <typename MatcherTuple>
-class ElementsAreMatcher {
- public:
-  explicit ElementsAreMatcher(const MatcherTuple& args) : matchers_(args) {}
-
-  template <typename Container>
-  operator Matcher<Container>() const {
-    static_assert(
-        !IsHashTable<GTEST_REMOVE_REFERENCE_AND_CONST_(Container)>::value ||
-            ::std::tuple_size<MatcherTuple>::value < 2,
-        "use UnorderedElementsAre with hash tables");
-
-    typedef GTEST_REMOVE_REFERENCE_AND_CONST_(Container) RawContainer;
-    typedef typename internal::StlContainerView<RawContainer>::type View;
-    typedef typename View::value_type Element;
-    typedef ::std::vector<Matcher<const Element&>> MatcherVec;
-    MatcherVec matchers;
-    matchers.reserve(::std::tuple_size<MatcherTuple>::value);
-    TransformTupleValues(CastAndAppendTransform<const Element&>(), matchers_,
-                         ::std::back_inserter(matchers));
-    return Matcher<Container>(new ElementsAreMatcherImpl<const Container&>(
-        matchers.begin(), matchers.end()));
-  }
-
- private:
-  const MatcherTuple matchers_;
-};
-
-// Implements UnorderedElementsAreArray(), IsSubsetOf(), and IsSupersetOf().
-template <typename T>
-class UnorderedElementsAreArrayMatcher {
- public:
-  template <typename Iter>
-  UnorderedElementsAreArrayMatcher(UnorderedMatcherRequire::Flags match_flags,
-                                   Iter first, Iter last)
-      : match_flags_(match_flags), matchers_(first, last) {}
-
-  template <typename Container>
-  operator Matcher<Container>() const {
-    return Matcher<Container>(
-        new UnorderedElementsAreMatcherImpl<const Container&>(
-            match_flags_, matchers_.begin(), matchers_.end()));
-  }
-
- private:
-  UnorderedMatcherRequire::Flags match_flags_;
-  ::std::vector<T> matchers_;
-};
-
-// Implements ElementsAreArray().
-template <typename T>
-class ElementsAreArrayMatcher {
- public:
-  template <typename Iter>
-  ElementsAreArrayMatcher(Iter first, Iter last) : matchers_(first, last) {}
-
-  template <typename Container>
-  operator Matcher<Container>() const {
-    static_assert(
-        !IsHashTable<GTEST_REMOVE_REFERENCE_AND_CONST_(Container)>::value,
-        "use UnorderedElementsAreArray with hash tables");
-
-    return Matcher<Container>(new ElementsAreMatcherImpl<const Container&>(
-        matchers_.begin(), matchers_.end()));
-  }
-
- private:
-  const ::std::vector<T> matchers_;
-};
-
-// Given a 2-tuple matcher tm of type Tuple2Matcher and a value second
-// of type Second, BoundSecondMatcher<Tuple2Matcher, Second>(tm,
-// second) is a polymorphic matcher that matches a value x if and only if
-// tm matches tuple (x, second).  Useful for implementing
-// UnorderedPointwise() in terms of UnorderedElementsAreArray().
-//
-// BoundSecondMatcher is copyable and assignable, as we need to put
-// instances of this class in a vector when implementing
-// UnorderedPointwise().
-template <typename Tuple2Matcher, typename Second>
-class BoundSecondMatcher {
- public:
-  BoundSecondMatcher(const Tuple2Matcher& tm, const Second& second)
-      : tuple2_matcher_(tm), second_value_(second) {}
-
-  BoundSecondMatcher(const BoundSecondMatcher& other) = default;
-
-  template <typename T>
-  operator Matcher<T>() const {
-    return MakeMatcher(new Impl<T>(tuple2_matcher_, second_value_));
-  }
-
-  // We have to define this for UnorderedPointwise() to compile in
-  // C++98 mode, as it puts BoundSecondMatcher instances in a vector,
-  // which requires the elements to be assignable in C++98.  The
-  // compiler cannot generate the operator= for us, as Tuple2Matcher
-  // and Second may not be assignable.
-  //
-  // However, this should never be called, so the implementation just
-  // need to assert.
-  void operator=(const BoundSecondMatcher& /*rhs*/) {
-    GTEST_LOG_(FATAL) << "BoundSecondMatcher should never be assigned.";
-  }
-
- private:
-  template <typename T>
-  class Impl : public MatcherInterface<T> {
-   public:
-    typedef ::std::tuple<T, Second> ArgTuple;
-
-    Impl(const Tuple2Matcher& tm, const Second& second)
-        : mono_tuple2_matcher_(SafeMatcherCast<const ArgTuple&>(tm)),
-          second_value_(second) {}
-
-    void DescribeTo(::std::ostream* os) const override {
-      *os << "and ";
-      UniversalPrint(second_value_, os);
-      *os << " ";
-      mono_tuple2_matcher_.DescribeTo(os);
-    }
-
-    bool MatchAndExplain(T x, MatchResultListener* listener) const override {
-      return mono_tuple2_matcher_.MatchAndExplain(ArgTuple(x, second_value_),
-                                                  listener);
-    }
-
-   private:
-    const Matcher<const ArgTuple&> mono_tuple2_matcher_;
-    const Second second_value_;
-  };
-
-  const Tuple2Matcher tuple2_matcher_;
-  const Second second_value_;
-};
-
-// Given a 2-tuple matcher tm and a value second,
-// MatcherBindSecond(tm, second) returns a matcher that matches a
-// value x if and only if tm matches tuple (x, second).  Useful for
-// implementing UnorderedPointwise() in terms of UnorderedElementsAreArray().
-template <typename Tuple2Matcher, typename Second>
-BoundSecondMatcher<Tuple2Matcher, Second> MatcherBindSecond(
-    const Tuple2Matcher& tm, const Second& second) {
-  return BoundSecondMatcher<Tuple2Matcher, Second>(tm, second);
-}
-
-// Returns the description for a matcher defined using the MATCHER*()
-// macro where the user-supplied description string is "", if
-// 'negation' is false; otherwise returns the description of the
-// negation of the matcher.  'param_values' contains a list of strings
-// that are the print-out of the matcher's parameters.
-GTEST_API_ std::string FormatMatcherDescription(
-    bool negation, const char* matcher_name,
-    const std::vector<const char*>& param_names, const Strings& param_values);
-
-// Implements a matcher that checks the value of a optional<> type variable.
-template <typename ValueMatcher>
-class OptionalMatcher {
- public:
-  explicit OptionalMatcher(const ValueMatcher& value_matcher)
-      : value_matcher_(value_matcher) {}
-
-  template <typename Optional>
-  operator Matcher<Optional>() const {
-    return Matcher<Optional>(new Impl<const Optional&>(value_matcher_));
-  }
-
-  template <typename Optional>
-  class Impl : public MatcherInterface<Optional> {
-   public:
-    typedef GTEST_REMOVE_REFERENCE_AND_CONST_(Optional) OptionalView;
-    typedef typename OptionalView::value_type ValueType;
-    explicit Impl(const ValueMatcher& value_matcher)
-        : value_matcher_(MatcherCast<ValueType>(value_matcher)) {}
-
-    void DescribeTo(::std::ostream* os) const override {
-      *os << "value ";
-      value_matcher_.DescribeTo(os);
-    }
-
-    void DescribeNegationTo(::std::ostream* os) const override {
-      *os << "value ";
-      value_matcher_.DescribeNegationTo(os);
-    }
-
-    bool MatchAndExplain(Optional optional,
-                         MatchResultListener* listener) const override {
-      if (!optional) {
-        *listener << "which is not engaged";
-        return false;
-      }
-      const ValueType& value = *optional;
-      StringMatchResultListener value_listener;
-      const bool match = value_matcher_.MatchAndExplain(value, &value_listener);
-      *listener << "whose value " << PrintToString(value)
-                << (match ? " matches" : " doesn't match");
-      PrintIfNotEmpty(value_listener.str(), listener->stream());
-      return match;
-    }
-
-   private:
-    const Matcher<ValueType> value_matcher_;
-  };
-
- private:
-  const ValueMatcher value_matcher_;
-};
-
-namespace variant_matcher {
-// Overloads to allow VariantMatcher to do proper ADL lookup.
-template <typename T>
-void holds_alternative() {}
-template <typename T>
-void get() {}
-
-// Implements a matcher that checks the value of a variant<> type variable.
-template <typename T>
-class VariantMatcher {
- public:
-  explicit VariantMatcher(::testing::Matcher<const T&> matcher)
-      : matcher_(std::move(matcher)) {}
-
-  template <typename Variant>
-  bool MatchAndExplain(const Variant& value,
-                       ::testing::MatchResultListener* listener) const {
-    using std::get;
-    if (!listener->IsInterested()) {
-      return holds_alternative<T>(value) && matcher_.Matches(get<T>(value));
-    }
-
-    if (!holds_alternative<T>(value)) {
-      *listener << "whose value is not of type '" << GetTypeName() << "'";
-      return false;
-    }
-
-    const T& elem = get<T>(value);
-    StringMatchResultListener elem_listener;
-    const bool match = matcher_.MatchAndExplain(elem, &elem_listener);
-    *listener << "whose value " << PrintToString(elem)
-              << (match ? " matches" : " doesn't match");
-    PrintIfNotEmpty(elem_listener.str(), listener->stream());
-    return match;
-  }
-
-  void DescribeTo(std::ostream* os) const {
-    *os << "is a variant<> with value of type '" << GetTypeName()
-        << "' and the value ";
-    matcher_.DescribeTo(os);
-  }
-
-  void DescribeNegationTo(std::ostream* os) const {
-    *os << "is a variant<> with value of type other than '" << GetTypeName()
-        << "' or the value ";
-    matcher_.DescribeNegationTo(os);
-  }
-
- private:
-  static std::string GetTypeName() {
-#if GTEST_HAS_RTTI
-    GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(
-        return internal::GetTypeName<T>());
-#endif
-    return "the element type";
-  }
-
-  const ::testing::Matcher<const T&> matcher_;
-};
-
-}  // namespace variant_matcher
-
-namespace any_cast_matcher {
-
-// Overloads to allow AnyCastMatcher to do proper ADL lookup.
-template <typename T>
-void any_cast() {}
-
-// Implements a matcher that any_casts the value.
-template <typename T>
-class AnyCastMatcher {
- public:
-  explicit AnyCastMatcher(const ::testing::Matcher<const T&>& matcher)
-      : matcher_(matcher) {}
-
-  template <typename AnyType>
-  bool MatchAndExplain(const AnyType& value,
-                       ::testing::MatchResultListener* listener) const {
-    if (!listener->IsInterested()) {
-      const T* ptr = any_cast<T>(&value);
-      return ptr != nullptr && matcher_.Matches(*ptr);
-    }
-
-    const T* elem = any_cast<T>(&value);
-    if (elem == nullptr) {
-      *listener << "whose value is not of type '" << GetTypeName() << "'";
-      return false;
-    }
-
-    StringMatchResultListener elem_listener;
-    const bool match = matcher_.MatchAndExplain(*elem, &elem_listener);
-    *listener << "whose value " << PrintToString(*elem)
-              << (match ? " matches" : " doesn't match");
-    PrintIfNotEmpty(elem_listener.str(), listener->stream());
-    return match;
-  }
-
-  void DescribeTo(std::ostream* os) const {
-    *os << "is an 'any' type with value of type '" << GetTypeName()
-        << "' and the value ";
-    matcher_.DescribeTo(os);
-  }
-
-  void DescribeNegationTo(std::ostream* os) const {
-    *os << "is an 'any' type with value of type other than '" << GetTypeName()
-        << "' or the value ";
-    matcher_.DescribeNegationTo(os);
-  }
-
- private:
-  static std::string GetTypeName() {
-#if GTEST_HAS_RTTI
-    GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(
-        return internal::GetTypeName<T>());
-#endif
-    return "the element type";
-  }
-
-  const ::testing::Matcher<const T&> matcher_;
-};
-
-}  // namespace any_cast_matcher
-
-// Implements the Args() matcher.
-template <class ArgsTuple, size_t... k>
-class ArgsMatcherImpl : public MatcherInterface<ArgsTuple> {
- public:
-  using RawArgsTuple = typename std::decay<ArgsTuple>::type;
-  using SelectedArgs =
-      std::tuple<typename std::tuple_element<k, RawArgsTuple>::type...>;
-  using MonomorphicInnerMatcher = Matcher<const SelectedArgs&>;
-
-  template <typename InnerMatcher>
-  explicit ArgsMatcherImpl(const InnerMatcher& inner_matcher)
-      : inner_matcher_(SafeMatcherCast<const SelectedArgs&>(inner_matcher)) {}
-
-  bool MatchAndExplain(ArgsTuple args,
-                       MatchResultListener* listener) const override {
-    // Workaround spurious C4100 on MSVC<=15.7 when k is empty.
-    (void)args;
-    const SelectedArgs& selected_args =
-        std::forward_as_tuple(std::get<k>(args)...);
-    if (!listener->IsInterested()) return inner_matcher_.Matches(selected_args);
-
-    PrintIndices(listener->stream());
-    *listener << "are " << PrintToString(selected_args);
-
-    StringMatchResultListener inner_listener;
-    const bool match =
-        inner_matcher_.MatchAndExplain(selected_args, &inner_listener);
-    PrintIfNotEmpty(inner_listener.str(), listener->stream());
-    return match;
-  }
-
-  void DescribeTo(::std::ostream* os) const override {
-    *os << "are a tuple ";
-    PrintIndices(os);
-    inner_matcher_.DescribeTo(os);
-  }
-
-  void DescribeNegationTo(::std::ostream* os) const override {
-    *os << "are a tuple ";
-    PrintIndices(os);
-    inner_matcher_.DescribeNegationTo(os);
-  }
-
- private:
-  // Prints the indices of the selected fields.
-  static void PrintIndices(::std::ostream* os) {
-    *os << "whose fields (";
-    const char* sep = "";
-    // Workaround spurious C4189 on MSVC<=15.7 when k is empty.
-    (void)sep;
-    // The static_cast to void is needed to silence Clang's -Wcomma warning.
-    // This pattern looks suspiciously like we may have mismatched parentheses
-    // and may have been trying to use the first operation of the comma operator
-    // as a member of the array, so Clang warns that we may have made a mistake.
-    const char* dummy[] = {
-        "", (static_cast<void>(*os << sep << "#" << k), sep = ", ")...};
-    (void)dummy;
-    *os << ") ";
-  }
-
-  MonomorphicInnerMatcher inner_matcher_;
-};
-
-template <class InnerMatcher, size_t... k>
-class ArgsMatcher {
- public:
-  explicit ArgsMatcher(InnerMatcher inner_matcher)
-      : inner_matcher_(std::move(inner_matcher)) {}
-
-  template <typename ArgsTuple>
-  operator Matcher<ArgsTuple>() const {  // NOLINT
-    return MakeMatcher(new ArgsMatcherImpl<ArgsTuple, k...>(inner_matcher_));
-  }
-
- private:
-  InnerMatcher inner_matcher_;
-};
-
-}  // namespace internal
-
-// ElementsAreArray(iterator_first, iterator_last)
-// ElementsAreArray(pointer, count)
-// ElementsAreArray(array)
-// ElementsAreArray(container)
-// ElementsAreArray({ e1, e2, ..., en })
-//
-// The ElementsAreArray() functions are like ElementsAre(...), except
-// that they are given a homogeneous sequence rather than taking each
-// element as a function argument. The sequence can be specified as an
-// array, a pointer and count, a vector, an initializer list, or an
-// STL iterator range. In each of these cases, the underlying sequence
-// can be either a sequence of values or a sequence of matchers.
-//
-// All forms of ElementsAreArray() make a copy of the input matcher sequence.
-
-template <typename Iter>
-inline internal::ElementsAreArrayMatcher<
-    typename ::std::iterator_traits<Iter>::value_type>
-ElementsAreArray(Iter first, Iter last) {
-  typedef typename ::std::iterator_traits<Iter>::value_type T;
-  return internal::ElementsAreArrayMatcher<T>(first, last);
-}
-
-template <typename T>
-inline auto ElementsAreArray(const T* pointer, size_t count)
-    -> decltype(ElementsAreArray(pointer, pointer + count)) {
-  return ElementsAreArray(pointer, pointer + count);
-}
-
-template <typename T, size_t N>
-inline auto ElementsAreArray(const T (&array)[N])
-    -> decltype(ElementsAreArray(array, N)) {
-  return ElementsAreArray(array, N);
-}
-
-template <typename Container>
-inline auto ElementsAreArray(const Container& container)
-    -> decltype(ElementsAreArray(container.begin(), container.end())) {
-  return ElementsAreArray(container.begin(), container.end());
-}
-
-template <typename T>
-inline auto ElementsAreArray(::std::initializer_list<T> xs)
-    -> decltype(ElementsAreArray(xs.begin(), xs.end())) {
-  return ElementsAreArray(xs.begin(), xs.end());
-}
-
-// UnorderedElementsAreArray(iterator_first, iterator_last)
-// UnorderedElementsAreArray(pointer, count)
-// UnorderedElementsAreArray(array)
-// UnorderedElementsAreArray(container)
-// UnorderedElementsAreArray({ e1, e2, ..., en })
-//
-// UnorderedElementsAreArray() verifies that a bijective mapping onto a
-// collection of matchers exists.
-//
-// The matchers can be specified as an array, a pointer and count, a container,
-// an initializer list, or an STL iterator range. In each of these cases, the
-// underlying matchers can be either values or matchers.
-
-template <typename Iter>
-inline internal::UnorderedElementsAreArrayMatcher<
-    typename ::std::iterator_traits<Iter>::value_type>
-UnorderedElementsAreArray(Iter first, Iter last) {
-  typedef typename ::std::iterator_traits<Iter>::value_type T;
-  return internal::UnorderedElementsAreArrayMatcher<T>(
-      internal::UnorderedMatcherRequire::ExactMatch, first, last);
-}
-
-template <typename T>
-inline internal::UnorderedElementsAreArrayMatcher<T> UnorderedElementsAreArray(
-    const T* pointer, size_t count) {
-  return UnorderedElementsAreArray(pointer, pointer + count);
-}
-
-template <typename T, size_t N>
-inline internal::UnorderedElementsAreArrayMatcher<T> UnorderedElementsAreArray(
-    const T (&array)[N]) {
-  return UnorderedElementsAreArray(array, N);
-}
-
-template <typename Container>
-inline internal::UnorderedElementsAreArrayMatcher<
-    typename Container::value_type>
-UnorderedElementsAreArray(const Container& container) {
-  return UnorderedElementsAreArray(container.begin(), container.end());
-}
-
-template <typename T>
-inline internal::UnorderedElementsAreArrayMatcher<T> UnorderedElementsAreArray(
-    ::std::initializer_list<T> xs) {
-  return UnorderedElementsAreArray(xs.begin(), xs.end());
-}
-
-// _ is a matcher that matches anything of any type.
-//
-// This definition is fine as:
-//
-//   1. The C++ standard permits using the name _ in a namespace that
-//      is not the global namespace or ::std.
-//   2. The AnythingMatcher class has no data member or constructor,
-//      so it's OK to create global variables of this type.
-//   3. c-style has approved of using _ in this case.
-const internal::AnythingMatcher _ = {};
-// Creates a matcher that matches any value of the given type T.
-template <typename T>
-inline Matcher<T> A() {
-  return _;
-}
-
-// Creates a matcher that matches any value of the given type T.
-template <typename T>
-inline Matcher<T> An() {
-  return _;
-}
-
-template <typename T, typename M>
-Matcher<T> internal::MatcherCastImpl<T, M>::CastImpl(
-    const M& value, std::false_type /* convertible_to_matcher */,
-    std::false_type /* convertible_to_T */) {
-  return Eq(value);
-}
-
-// Creates a polymorphic matcher that matches any NULL pointer.
-inline PolymorphicMatcher<internal::IsNullMatcher> IsNull() {
-  return MakePolymorphicMatcher(internal::IsNullMatcher());
-}
-
-// Creates a polymorphic matcher that matches any non-NULL pointer.
-// This is convenient as Not(NULL) doesn't compile (the compiler
-// thinks that that expression is comparing a pointer with an integer).
-inline PolymorphicMatcher<internal::NotNullMatcher> NotNull() {
-  return MakePolymorphicMatcher(internal::NotNullMatcher());
-}
-
-// Creates a polymorphic matcher that matches any argument that
-// references variable x.
-template <typename T>
-inline internal::RefMatcher<T&> Ref(T& x) {  // NOLINT
-  return internal::RefMatcher<T&>(x);
-}
-
-// Creates a polymorphic matcher that matches any NaN floating point.
-inline PolymorphicMatcher<internal::IsNanMatcher> IsNan() {
-  return MakePolymorphicMatcher(internal::IsNanMatcher());
-}
-
-// Creates a matcher that matches any double argument approximately
-// equal to rhs, where two NANs are considered unequal.
-inline internal::FloatingEqMatcher<double> DoubleEq(double rhs) {
-  return internal::FloatingEqMatcher<double>(rhs, false);
-}
-
-// Creates a matcher that matches any double argument approximately
-// equal to rhs, including NaN values when rhs is NaN.
-inline internal::FloatingEqMatcher<double> NanSensitiveDoubleEq(double rhs) {
-  return internal::FloatingEqMatcher<double>(rhs, true);
-}
-
-// Creates a matcher that matches any double argument approximately equal to
-// rhs, up to the specified max absolute error bound, where two NANs are
-// considered unequal.  The max absolute error bound must be non-negative.
-inline internal::FloatingEqMatcher<double> DoubleNear(double rhs,
-                                                      double max_abs_error) {
-  return internal::FloatingEqMatcher<double>(rhs, false, max_abs_error);
-}
-
-// Creates a matcher that matches any double argument approximately equal to
-// rhs, up to the specified max absolute error bound, including NaN values when
-// rhs is NaN.  The max absolute error bound must be non-negative.
-inline internal::FloatingEqMatcher<double> NanSensitiveDoubleNear(
-    double rhs, double max_abs_error) {
-  return internal::FloatingEqMatcher<double>(rhs, true, max_abs_error);
-}
-
-// Creates a matcher that matches any float argument approximately
-// equal to rhs, where two NANs are considered unequal.
-inline internal::FloatingEqMatcher<float> FloatEq(float rhs) {
-  return internal::FloatingEqMatcher<float>(rhs, false);
-}
-
-// Creates a matcher that matches any float argument approximately
-// equal to rhs, including NaN values when rhs is NaN.
-inline internal::FloatingEqMatcher<float> NanSensitiveFloatEq(float rhs) {
-  return internal::FloatingEqMatcher<float>(rhs, true);
-}
-
-// Creates a matcher that matches any float argument approximately equal to
-// rhs, up to the specified max absolute error bound, where two NANs are
-// considered unequal.  The max absolute error bound must be non-negative.
-inline internal::FloatingEqMatcher<float> FloatNear(float rhs,
-                                                    float max_abs_error) {
-  return internal::FloatingEqMatcher<float>(rhs, false, max_abs_error);
-}
-
-// Creates a matcher that matches any float argument approximately equal to
-// rhs, up to the specified max absolute error bound, including NaN values when
-// rhs is NaN.  The max absolute error bound must be non-negative.
-inline internal::FloatingEqMatcher<float> NanSensitiveFloatNear(
-    float rhs, float max_abs_error) {
-  return internal::FloatingEqMatcher<float>(rhs, true, max_abs_error);
-}
-
-// Creates a matcher that matches a pointer (raw or smart) that points
-// to a value that matches inner_matcher.
-template <typename InnerMatcher>
-inline internal::PointeeMatcher<InnerMatcher> Pointee(
-    const InnerMatcher& inner_matcher) {
-  return internal::PointeeMatcher<InnerMatcher>(inner_matcher);
-}
-
-#if GTEST_HAS_RTTI
-// Creates a matcher that matches a pointer or reference that matches
-// inner_matcher when dynamic_cast<To> is applied.
-// The result of dynamic_cast<To> is forwarded to the inner matcher.
-// If To is a pointer and the cast fails, the inner matcher will receive NULL.
-// If To is a reference and the cast fails, this matcher returns false
-// immediately.
-template <typename To>
-inline PolymorphicMatcher<internal::WhenDynamicCastToMatcher<To>>
-WhenDynamicCastTo(const Matcher<To>& inner_matcher) {
-  return MakePolymorphicMatcher(
-      internal::WhenDynamicCastToMatcher<To>(inner_matcher));
-}
-#endif  // GTEST_HAS_RTTI
-
-// Creates a matcher that matches an object whose given field matches
-// 'matcher'.  For example,
-//   Field(&Foo::number, Ge(5))
-// matches a Foo object x if and only if x.number >= 5.
-template <typename Class, typename FieldType, typename FieldMatcher>
-inline PolymorphicMatcher<internal::FieldMatcher<Class, FieldType>> Field(
-    FieldType Class::*field, const FieldMatcher& matcher) {
-  return MakePolymorphicMatcher(internal::FieldMatcher<Class, FieldType>(
-      field, MatcherCast<const FieldType&>(matcher)));
-  // The call to MatcherCast() is required for supporting inner
-  // matchers of compatible types.  For example, it allows
-  //   Field(&Foo::bar, m)
-  // to compile where bar is an int32 and m is a matcher for int64.
-}
-
-// Same as Field() but also takes the name of the field to provide better error
-// messages.
-template <typename Class, typename FieldType, typename FieldMatcher>
-inline PolymorphicMatcher<internal::FieldMatcher<Class, FieldType>> Field(
-    const std::string& field_name, FieldType Class::*field,
-    const FieldMatcher& matcher) {
-  return MakePolymorphicMatcher(internal::FieldMatcher<Class, FieldType>(
-      field_name, field, MatcherCast<const FieldType&>(matcher)));
-}
-
-// Creates a matcher that matches an object whose given property
-// matches 'matcher'.  For example,
-//   Property(&Foo::str, StartsWith("hi"))
-// matches a Foo object x if and only if x.str() starts with "hi".
-template <typename Class, typename PropertyType, typename PropertyMatcher>
-inline PolymorphicMatcher<internal::PropertyMatcher<
-    Class, PropertyType, PropertyType (Class::*)() const>>
-Property(PropertyType (Class::*property)() const,
-         const PropertyMatcher& matcher) {
-  return MakePolymorphicMatcher(
-      internal::PropertyMatcher<Class, PropertyType,
-                                PropertyType (Class::*)() const>(
-          property, MatcherCast<const PropertyType&>(matcher)));
-  // The call to MatcherCast() is required for supporting inner
-  // matchers of compatible types.  For example, it allows
-  //   Property(&Foo::bar, m)
-  // to compile where bar() returns an int32 and m is a matcher for int64.
-}
-
-// Same as Property() above, but also takes the name of the property to provide
-// better error messages.
-template <typename Class, typename PropertyType, typename PropertyMatcher>
-inline PolymorphicMatcher<internal::PropertyMatcher<
-    Class, PropertyType, PropertyType (Class::*)() const>>
-Property(const std::string& property_name,
-         PropertyType (Class::*property)() const,
-         const PropertyMatcher& matcher) {
-  return MakePolymorphicMatcher(
-      internal::PropertyMatcher<Class, PropertyType,
-                                PropertyType (Class::*)() const>(
-          property_name, property, MatcherCast<const PropertyType&>(matcher)));
-}
-
-// The same as above but for reference-qualified member functions.
-template <typename Class, typename PropertyType, typename PropertyMatcher>
-inline PolymorphicMatcher<internal::PropertyMatcher<
-    Class, PropertyType, PropertyType (Class::*)() const&>>
-Property(PropertyType (Class::*property)() const&,
-         const PropertyMatcher& matcher) {
-  return MakePolymorphicMatcher(
-      internal::PropertyMatcher<Class, PropertyType,
-                                PropertyType (Class::*)() const&>(
-          property, MatcherCast<const PropertyType&>(matcher)));
-}
-
-// Three-argument form for reference-qualified member functions.
-template <typename Class, typename PropertyType, typename PropertyMatcher>
-inline PolymorphicMatcher<internal::PropertyMatcher<
-    Class, PropertyType, PropertyType (Class::*)() const&>>
-Property(const std::string& property_name,
-         PropertyType (Class::*property)() const&,
-         const PropertyMatcher& matcher) {
-  return MakePolymorphicMatcher(
-      internal::PropertyMatcher<Class, PropertyType,
-                                PropertyType (Class::*)() const&>(
-          property_name, property, MatcherCast<const PropertyType&>(matcher)));
-}
-
-// Creates a matcher that matches an object if and only if the result of
-// applying a callable to x matches 'matcher'. For example,
-//   ResultOf(f, StartsWith("hi"))
-// matches a Foo object x if and only if f(x) starts with "hi".
-// `callable` parameter can be a function, function pointer, or a functor. It is
-// required to keep no state affecting the results of the calls on it and make
-// no assumptions about how many calls will be made. Any state it keeps must be
-// protected from the concurrent access.
-template <typename Callable, typename InnerMatcher>
-internal::ResultOfMatcher<Callable, InnerMatcher> ResultOf(
-    Callable callable, InnerMatcher matcher) {
-  return internal::ResultOfMatcher<Callable, InnerMatcher>(std::move(callable),
-                                                           std::move(matcher));
-}
-
-// Same as ResultOf() above, but also takes a description of the `callable`
-// result to provide better error messages.
-template <typename Callable, typename InnerMatcher>
-internal::ResultOfMatcher<Callable, InnerMatcher> ResultOf(
-    const std::string& result_description, Callable callable,
-    InnerMatcher matcher) {
-  return internal::ResultOfMatcher<Callable, InnerMatcher>(
-      result_description, std::move(callable), std::move(matcher));
-}
-
-// String matchers.
-
-// Matches a string equal to str.
-template <typename T = std::string>
-PolymorphicMatcher<internal::StrEqualityMatcher<std::string>> StrEq(
-    const internal::StringLike<T>& str) {
-  return MakePolymorphicMatcher(
-      internal::StrEqualityMatcher<std::string>(std::string(str), true, true));
-}
-
-// Matches a string not equal to str.
-template <typename T = std::string>
-PolymorphicMatcher<internal::StrEqualityMatcher<std::string>> StrNe(
-    const internal::StringLike<T>& str) {
-  return MakePolymorphicMatcher(
-      internal::StrEqualityMatcher<std::string>(std::string(str), false, true));
-}
-
-// Matches a string equal to str, ignoring case.
-template <typename T = std::string>
-PolymorphicMatcher<internal::StrEqualityMatcher<std::string>> StrCaseEq(
-    const internal::StringLike<T>& str) {
-  return MakePolymorphicMatcher(
-      internal::StrEqualityMatcher<std::string>(std::string(str), true, false));
-}
-
-// Matches a string not equal to str, ignoring case.
-template <typename T = std::string>
-PolymorphicMatcher<internal::StrEqualityMatcher<std::string>> StrCaseNe(
-    const internal::StringLike<T>& str) {
-  return MakePolymorphicMatcher(internal::StrEqualityMatcher<std::string>(
-      std::string(str), false, false));
-}
-
-// Creates a matcher that matches any string, std::string, or C string
-// that contains the given substring.
-template <typename T = std::string>
-PolymorphicMatcher<internal::HasSubstrMatcher<std::string>> HasSubstr(
-    const internal::StringLike<T>& substring) {
-  return MakePolymorphicMatcher(
-      internal::HasSubstrMatcher<std::string>(std::string(substring)));
-}
-
-// Matches a string that starts with 'prefix' (case-sensitive).
-template <typename T = std::string>
-PolymorphicMatcher<internal::StartsWithMatcher<std::string>> StartsWith(
-    const internal::StringLike<T>& prefix) {
-  return MakePolymorphicMatcher(
-      internal::StartsWithMatcher<std::string>(std::string(prefix)));
-}
-
-// Matches a string that ends with 'suffix' (case-sensitive).
-template <typename T = std::string>
-PolymorphicMatcher<internal::EndsWithMatcher<std::string>> EndsWith(
-    const internal::StringLike<T>& suffix) {
-  return MakePolymorphicMatcher(
-      internal::EndsWithMatcher<std::string>(std::string(suffix)));
-}
-
-#if GTEST_HAS_STD_WSTRING
-// Wide string matchers.
-
-// Matches a string equal to str.
-inline PolymorphicMatcher<internal::StrEqualityMatcher<std::wstring>> StrEq(
-    const std::wstring& str) {
-  return MakePolymorphicMatcher(
-      internal::StrEqualityMatcher<std::wstring>(str, true, true));
-}
-
-// Matches a string not equal to str.
-inline PolymorphicMatcher<internal::StrEqualityMatcher<std::wstring>> StrNe(
-    const std::wstring& str) {
-  return MakePolymorphicMatcher(
-      internal::StrEqualityMatcher<std::wstring>(str, false, true));
-}
-
-// Matches a string equal to str, ignoring case.
-inline PolymorphicMatcher<internal::StrEqualityMatcher<std::wstring>> StrCaseEq(
-    const std::wstring& str) {
-  return MakePolymorphicMatcher(
-      internal::StrEqualityMatcher<std::wstring>(str, true, false));
-}
-
-// Matches a string not equal to str, ignoring case.
-inline PolymorphicMatcher<internal::StrEqualityMatcher<std::wstring>> StrCaseNe(
-    const std::wstring& str) {
-  return MakePolymorphicMatcher(
-      internal::StrEqualityMatcher<std::wstring>(str, false, false));
-}
-
-// Creates a matcher that matches any ::wstring, std::wstring, or C wide string
-// that contains the given substring.
-inline PolymorphicMatcher<internal::HasSubstrMatcher<std::wstring>> HasSubstr(
-    const std::wstring& substring) {
-  return MakePolymorphicMatcher(
-      internal::HasSubstrMatcher<std::wstring>(substring));
-}
-
-// Matches a string that starts with 'prefix' (case-sensitive).
-inline PolymorphicMatcher<internal::StartsWithMatcher<std::wstring>> StartsWith(
-    const std::wstring& prefix) {
-  return MakePolymorphicMatcher(
-      internal::StartsWithMatcher<std::wstring>(prefix));
-}
-
-// Matches a string that ends with 'suffix' (case-sensitive).
-inline PolymorphicMatcher<internal::EndsWithMatcher<std::wstring>> EndsWith(
-    const std::wstring& suffix) {
-  return MakePolymorphicMatcher(
-      internal::EndsWithMatcher<std::wstring>(suffix));
-}
-
-#endif  // GTEST_HAS_STD_WSTRING
-
-// Creates a polymorphic matcher that matches a 2-tuple where the
-// first field == the second field.
-inline internal::Eq2Matcher Eq() { return internal::Eq2Matcher(); }
-
-// Creates a polymorphic matcher that matches a 2-tuple where the
-// first field >= the second field.
-inline internal::Ge2Matcher Ge() { return internal::Ge2Matcher(); }
-
-// Creates a polymorphic matcher that matches a 2-tuple where the
-// first field > the second field.
-inline internal::Gt2Matcher Gt() { return internal::Gt2Matcher(); }
-
-// Creates a polymorphic matcher that matches a 2-tuple where the
-// first field <= the second field.
-inline internal::Le2Matcher Le() { return internal::Le2Matcher(); }
-
-// Creates a polymorphic matcher that matches a 2-tuple where the
-// first field < the second field.
-inline internal::Lt2Matcher Lt() { return internal::Lt2Matcher(); }
-
-// Creates a polymorphic matcher that matches a 2-tuple where the
-// first field != the second field.
-inline internal::Ne2Matcher Ne() { return internal::Ne2Matcher(); }
-
-// Creates a polymorphic matcher that matches a 2-tuple where
-// FloatEq(first field) matches the second field.
-inline internal::FloatingEq2Matcher<float> FloatEq() {
-  return internal::FloatingEq2Matcher<float>();
-}
-
-// Creates a polymorphic matcher that matches a 2-tuple where
-// DoubleEq(first field) matches the second field.
-inline internal::FloatingEq2Matcher<double> DoubleEq() {
-  return internal::FloatingEq2Matcher<double>();
-}
-
-// Creates a polymorphic matcher that matches a 2-tuple where
-// FloatEq(first field) matches the second field with NaN equality.
-inline internal::FloatingEq2Matcher<float> NanSensitiveFloatEq() {
-  return internal::FloatingEq2Matcher<float>(true);
-}
-
-// Creates a polymorphic matcher that matches a 2-tuple where
-// DoubleEq(first field) matches the second field with NaN equality.
-inline internal::FloatingEq2Matcher<double> NanSensitiveDoubleEq() {
-  return internal::FloatingEq2Matcher<double>(true);
-}
-
-// Creates a polymorphic matcher that matches a 2-tuple where
-// FloatNear(first field, max_abs_error) matches the second field.
-inline internal::FloatingEq2Matcher<float> FloatNear(float max_abs_error) {
-  return internal::FloatingEq2Matcher<float>(max_abs_error);
-}
-
-// Creates a polymorphic matcher that matches a 2-tuple where
-// DoubleNear(first field, max_abs_error) matches the second field.
-inline internal::FloatingEq2Matcher<double> DoubleNear(double max_abs_error) {
-  return internal::FloatingEq2Matcher<double>(max_abs_error);
-}
-
-// Creates a polymorphic matcher that matches a 2-tuple where
-// FloatNear(first field, max_abs_error) matches the second field with NaN
-// equality.
-inline internal::FloatingEq2Matcher<float> NanSensitiveFloatNear(
-    float max_abs_error) {
-  return internal::FloatingEq2Matcher<float>(max_abs_error, true);
-}
-
-// Creates a polymorphic matcher that matches a 2-tuple where
-// DoubleNear(first field, max_abs_error) matches the second field with NaN
-// equality.
-inline internal::FloatingEq2Matcher<double> NanSensitiveDoubleNear(
-    double max_abs_error) {
-  return internal::FloatingEq2Matcher<double>(max_abs_error, true);
-}
-
-// Creates a matcher that matches any value of type T that m doesn't
-// match.
-template <typename InnerMatcher>
-inline internal::NotMatcher<InnerMatcher> Not(InnerMatcher m) {
-  return internal::NotMatcher<InnerMatcher>(m);
-}
-
-// Returns a matcher that matches anything that satisfies the given
-// predicate.  The predicate can be any unary function or functor
-// whose return type can be implicitly converted to bool.
-template <typename Predicate>
-inline PolymorphicMatcher<internal::TrulyMatcher<Predicate>> Truly(
-    Predicate pred) {
-  return MakePolymorphicMatcher(internal::TrulyMatcher<Predicate>(pred));
-}
-
-// Returns a matcher that matches the container size. The container must
-// support both size() and size_type which all STL-like containers provide.
-// Note that the parameter 'size' can be a value of type size_type as well as
-// matcher. For instance:
-//   EXPECT_THAT(container, SizeIs(2));     // Checks container has 2 elements.
-//   EXPECT_THAT(container, SizeIs(Le(2));  // Checks container has at most 2.
-template <typename SizeMatcher>
-inline internal::SizeIsMatcher<SizeMatcher> SizeIs(
-    const SizeMatcher& size_matcher) {
-  return internal::SizeIsMatcher<SizeMatcher>(size_matcher);
-}
-
-// Returns a matcher that matches the distance between the container's begin()
-// iterator and its end() iterator, i.e. the size of the container. This matcher
-// can be used instead of SizeIs with containers such as std::forward_list which
-// do not implement size(). The container must provide const_iterator (with
-// valid iterator_traits), begin() and end().
-template <typename DistanceMatcher>
-inline internal::BeginEndDistanceIsMatcher<DistanceMatcher> BeginEndDistanceIs(
-    const DistanceMatcher& distance_matcher) {
-  return internal::BeginEndDistanceIsMatcher<DistanceMatcher>(distance_matcher);
-}
-
-// Returns a matcher that matches an equal container.
-// This matcher behaves like Eq(), but in the event of mismatch lists the
-// values that are included in one container but not the other. (Duplicate
-// values and order differences are not explained.)
-template <typename Container>
-inline PolymorphicMatcher<
-    internal::ContainerEqMatcher<typename std::remove_const<Container>::type>>
-ContainerEq(const Container& rhs) {
-  return MakePolymorphicMatcher(internal::ContainerEqMatcher<Container>(rhs));
-}
-
-// Returns a matcher that matches a container that, when sorted using
-// the given comparator, matches container_matcher.
-template <typename Comparator, typename ContainerMatcher>
-inline internal::WhenSortedByMatcher<Comparator, ContainerMatcher> WhenSortedBy(
-    const Comparator& comparator, const ContainerMatcher& container_matcher) {
-  return internal::WhenSortedByMatcher<Comparator, ContainerMatcher>(
-      comparator, container_matcher);
-}
-
-// Returns a matcher that matches a container that, when sorted using
-// the < operator, matches container_matcher.
-template <typename ContainerMatcher>
-inline internal::WhenSortedByMatcher<internal::LessComparator, ContainerMatcher>
-WhenSorted(const ContainerMatcher& container_matcher) {
-  return internal::WhenSortedByMatcher<internal::LessComparator,
-                                       ContainerMatcher>(
-      internal::LessComparator(), container_matcher);
-}
-
-// Matches an STL-style container or a native array that contains the
-// same number of elements as in rhs, where its i-th element and rhs's
-// i-th element (as a pair) satisfy the given pair matcher, for all i.
-// TupleMatcher must be able to be safely cast to Matcher<std::tuple<const
-// T1&, const T2&> >, where T1 and T2 are the types of elements in the
-// LHS container and the RHS container respectively.
-template <typename TupleMatcher, typename Container>
-inline internal::PointwiseMatcher<TupleMatcher,
-                                  typename std::remove_const<Container>::type>
-Pointwise(const TupleMatcher& tuple_matcher, const Container& rhs) {
-  return internal::PointwiseMatcher<TupleMatcher, Container>(tuple_matcher,
-                                                             rhs);
-}
-
-// Supports the Pointwise(m, {a, b, c}) syntax.
-template <typename TupleMatcher, typename T>
-inline internal::PointwiseMatcher<TupleMatcher, std::vector<T>> Pointwise(
-    const TupleMatcher& tuple_matcher, std::initializer_list<T> rhs) {
-  return Pointwise(tuple_matcher, std::vector<T>(rhs));
-}
-
-// UnorderedPointwise(pair_matcher, rhs) matches an STL-style
-// container or a native array that contains the same number of
-// elements as in rhs, where in some permutation of the container, its
-// i-th element and rhs's i-th element (as a pair) satisfy the given
-// pair matcher, for all i.  Tuple2Matcher must be able to be safely
-// cast to Matcher<std::tuple<const T1&, const T2&> >, where T1 and T2 are
-// the types of elements in the LHS container and the RHS container
-// respectively.
-//
-// This is like Pointwise(pair_matcher, rhs), except that the element
-// order doesn't matter.
-template <typename Tuple2Matcher, typename RhsContainer>
-inline internal::UnorderedElementsAreArrayMatcher<
-    typename internal::BoundSecondMatcher<
-        Tuple2Matcher,
-        typename internal::StlContainerView<
-            typename std::remove_const<RhsContainer>::type>::type::value_type>>
-UnorderedPointwise(const Tuple2Matcher& tuple2_matcher,
-                   const RhsContainer& rhs_container) {
-  // RhsView allows the same code to handle RhsContainer being a
-  // STL-style container and it being a native C-style array.
-  typedef typename internal::StlContainerView<RhsContainer> RhsView;
-  typedef typename RhsView::type RhsStlContainer;
-  typedef typename RhsStlContainer::value_type Second;
-  const RhsStlContainer& rhs_stl_container =
-      RhsView::ConstReference(rhs_container);
-
-  // Create a matcher for each element in rhs_container.
-  ::std::vector<internal::BoundSecondMatcher<Tuple2Matcher, Second>> matchers;
-  for (auto it = rhs_stl_container.begin(); it != rhs_stl_container.end();
-       ++it) {
-    matchers.push_back(internal::MatcherBindSecond(tuple2_matcher, *it));
-  }
-
-  // Delegate the work to UnorderedElementsAreArray().
-  return UnorderedElementsAreArray(matchers);
-}
-
-// Supports the UnorderedPointwise(m, {a, b, c}) syntax.
-template <typename Tuple2Matcher, typename T>
-inline internal::UnorderedElementsAreArrayMatcher<
-    typename internal::BoundSecondMatcher<Tuple2Matcher, T>>
-UnorderedPointwise(const Tuple2Matcher& tuple2_matcher,
-                   std::initializer_list<T> rhs) {
-  return UnorderedPointwise(tuple2_matcher, std::vector<T>(rhs));
-}
-
-// Matches an STL-style container or a native array that contains at
-// least one element matching the given value or matcher.
-//
-// Examples:
-//   ::std::set<int> page_ids;
-//   page_ids.insert(3);
-//   page_ids.insert(1);
-//   EXPECT_THAT(page_ids, Contains(1));
-//   EXPECT_THAT(page_ids, Contains(Gt(2)));
-//   EXPECT_THAT(page_ids, Not(Contains(4)));  // See below for Times(0)
-//
-//   ::std::map<int, size_t> page_lengths;
-//   page_lengths[1] = 100;
-//   EXPECT_THAT(page_lengths,
-//               Contains(::std::pair<const int, size_t>(1, 100)));
-//
-//   const char* user_ids[] = { "joe", "mike", "tom" };
-//   EXPECT_THAT(user_ids, Contains(Eq(::std::string("tom"))));
-//
-// The matcher supports a modifier `Times` that allows to check for arbitrary
-// occurrences including testing for absence with Times(0).
-//
-// Examples:
-//   ::std::vector<int> ids;
-//   ids.insert(1);
-//   ids.insert(1);
-//   ids.insert(3);
-//   EXPECT_THAT(ids, Contains(1).Times(2));      // 1 occurs 2 times
-//   EXPECT_THAT(ids, Contains(2).Times(0));      // 2 is not present
-//   EXPECT_THAT(ids, Contains(3).Times(Ge(1)));  // 3 occurs at least once
-
-template <typename M>
-inline internal::ContainsMatcher<M> Contains(M matcher) {
-  return internal::ContainsMatcher<M>(matcher);
-}
-
-// IsSupersetOf(iterator_first, iterator_last)
-// IsSupersetOf(pointer, count)
-// IsSupersetOf(array)
-// IsSupersetOf(container)
-// IsSupersetOf({e1, e2, ..., en})
-//
-// IsSupersetOf() verifies that a surjective partial mapping onto a collection
-// of matchers exists. In other words, a container matches
-// IsSupersetOf({e1, ..., en}) if and only if there is a permutation
-// {y1, ..., yn} of some of the container's elements where y1 matches e1,
-// ..., and yn matches en. Obviously, the size of the container must be >= n
-// in order to have a match. Examples:
-//
-// - {1, 2, 3} matches IsSupersetOf({Ge(3), Ne(0)}), as 3 matches Ge(3) and
-//   1 matches Ne(0).
-// - {1, 2} doesn't match IsSupersetOf({Eq(1), Lt(2)}), even though 1 matches
-//   both Eq(1) and Lt(2). The reason is that different matchers must be used
-//   for elements in different slots of the container.
-// - {1, 1, 2} matches IsSupersetOf({Eq(1), Lt(2)}), as (the first) 1 matches
-//   Eq(1) and (the second) 1 matches Lt(2).
-// - {1, 2, 3} matches IsSupersetOf(Gt(1), Gt(1)), as 2 matches (the first)
-//   Gt(1) and 3 matches (the second) Gt(1).
-//
-// The matchers can be specified as an array, a pointer and count, a container,
-// an initializer list, or an STL iterator range. In each of these cases, the
-// underlying matchers can be either values or matchers.
-
-template <typename Iter>
-inline internal::UnorderedElementsAreArrayMatcher<
-    typename ::std::iterator_traits<Iter>::value_type>
-IsSupersetOf(Iter first, Iter last) {
-  typedef typename ::std::iterator_traits<Iter>::value_type T;
-  return internal::UnorderedElementsAreArrayMatcher<T>(
-      internal::UnorderedMatcherRequire::Superset, first, last);
-}
-
-template <typename T>
-inline internal::UnorderedElementsAreArrayMatcher<T> IsSupersetOf(
-    const T* pointer, size_t count) {
-  return IsSupersetOf(pointer, pointer + count);
-}
-
-template <typename T, size_t N>
-inline internal::UnorderedElementsAreArrayMatcher<T> IsSupersetOf(
-    const T (&array)[N]) {
-  return IsSupersetOf(array, N);
-}
-
-template <typename Container>
-inline internal::UnorderedElementsAreArrayMatcher<
-    typename Container::value_type>
-IsSupersetOf(const Container& container) {
-  return IsSupersetOf(container.begin(), container.end());
-}
-
-template <typename T>
-inline internal::UnorderedElementsAreArrayMatcher<T> IsSupersetOf(
-    ::std::initializer_list<T> xs) {
-  return IsSupersetOf(xs.begin(), xs.end());
-}
-
-// IsSubsetOf(iterator_first, iterator_last)
-// IsSubsetOf(pointer, count)
-// IsSubsetOf(array)
-// IsSubsetOf(container)
-// IsSubsetOf({e1, e2, ..., en})
-//
-// IsSubsetOf() verifies that an injective mapping onto a collection of matchers
-// exists.  In other words, a container matches IsSubsetOf({e1, ..., en}) if and
-// only if there is a subset of matchers {m1, ..., mk} which would match the
-// container using UnorderedElementsAre.  Obviously, the size of the container
-// must be <= n in order to have a match. Examples:
-//
-// - {1} matches IsSubsetOf({Gt(0), Lt(0)}), as 1 matches Gt(0).
-// - {1, -1} matches IsSubsetOf({Lt(0), Gt(0)}), as 1 matches Gt(0) and -1
-//   matches Lt(0).
-// - {1, 2} doesn't matches IsSubsetOf({Gt(0), Lt(0)}), even though 1 and 2 both
-//   match Gt(0). The reason is that different matchers must be used for
-//   elements in different slots of the container.
-//
-// The matchers can be specified as an array, a pointer and count, a container,
-// an initializer list, or an STL iterator range. In each of these cases, the
-// underlying matchers can be either values or matchers.
-
-template <typename Iter>
-inline internal::UnorderedElementsAreArrayMatcher<
-    typename ::std::iterator_traits<Iter>::value_type>
-IsSubsetOf(Iter first, Iter last) {
-  typedef typename ::std::iterator_traits<Iter>::value_type T;
-  return internal::UnorderedElementsAreArrayMatcher<T>(
-      internal::UnorderedMatcherRequire::Subset, first, last);
-}
-
-template <typename T>
-inline internal::UnorderedElementsAreArrayMatcher<T> IsSubsetOf(
-    const T* pointer, size_t count) {
-  return IsSubsetOf(pointer, pointer + count);
-}
-
-template <typename T, size_t N>
-inline internal::UnorderedElementsAreArrayMatcher<T> IsSubsetOf(
-    const T (&array)[N]) {
-  return IsSubsetOf(array, N);
-}
-
-template <typename Container>
-inline internal::UnorderedElementsAreArrayMatcher<
-    typename Container::value_type>
-IsSubsetOf(const Container& container) {
-  return IsSubsetOf(container.begin(), container.end());
-}
-
-template <typename T>
-inline internal::UnorderedElementsAreArrayMatcher<T> IsSubsetOf(
-    ::std::initializer_list<T> xs) {
-  return IsSubsetOf(xs.begin(), xs.end());
-}
-
-// Matches an STL-style container or a native array that contains only
-// elements matching the given value or matcher.
-//
-// Each(m) is semantically equivalent to `Not(Contains(Not(m)))`. Only
-// the messages are different.
-//
-// Examples:
-//   ::std::set<int> page_ids;
-//   // Each(m) matches an empty container, regardless of what m is.
-//   EXPECT_THAT(page_ids, Each(Eq(1)));
-//   EXPECT_THAT(page_ids, Each(Eq(77)));
-//
-//   page_ids.insert(3);
-//   EXPECT_THAT(page_ids, Each(Gt(0)));
-//   EXPECT_THAT(page_ids, Not(Each(Gt(4))));
-//   page_ids.insert(1);
-//   EXPECT_THAT(page_ids, Not(Each(Lt(2))));
-//
-//   ::std::map<int, size_t> page_lengths;
-//   page_lengths[1] = 100;
-//   page_lengths[2] = 200;
-//   page_lengths[3] = 300;
-//   EXPECT_THAT(page_lengths, Not(Each(Pair(1, 100))));
-//   EXPECT_THAT(page_lengths, Each(Key(Le(3))));
-//
-//   const char* user_ids[] = { "joe", "mike", "tom" };
-//   EXPECT_THAT(user_ids, Not(Each(Eq(::std::string("tom")))));
-template <typename M>
-inline internal::EachMatcher<M> Each(M matcher) {
-  return internal::EachMatcher<M>(matcher);
-}
-
-// Key(inner_matcher) matches an std::pair whose 'first' field matches
-// inner_matcher.  For example, Contains(Key(Ge(5))) can be used to match an
-// std::map that contains at least one element whose key is >= 5.
-template <typename M>
-inline internal::KeyMatcher<M> Key(M inner_matcher) {
-  return internal::KeyMatcher<M>(inner_matcher);
-}
-
-// Pair(first_matcher, second_matcher) matches a std::pair whose 'first' field
-// matches first_matcher and whose 'second' field matches second_matcher.  For
-// example, EXPECT_THAT(map_type, ElementsAre(Pair(Ge(5), "foo"))) can be used
-// to match a std::map<int, string> that contains exactly one element whose key
-// is >= 5 and whose value equals "foo".
-template <typename FirstMatcher, typename SecondMatcher>
-inline internal::PairMatcher<FirstMatcher, SecondMatcher> Pair(
-    FirstMatcher first_matcher, SecondMatcher second_matcher) {
-  return internal::PairMatcher<FirstMatcher, SecondMatcher>(first_matcher,
-                                                            second_matcher);
-}
-
-namespace no_adl {
-// Conditional() creates a matcher that conditionally uses either the first or
-// second matcher provided. For example, we could create an `equal if, and only
-// if' matcher using the Conditional wrapper as follows:
-//
-//   EXPECT_THAT(result, Conditional(condition, Eq(expected), Ne(expected)));
-template <typename MatcherTrue, typename MatcherFalse>
-internal::ConditionalMatcher<MatcherTrue, MatcherFalse> Conditional(
-    bool condition, MatcherTrue matcher_true, MatcherFalse matcher_false) {
-  return internal::ConditionalMatcher<MatcherTrue, MatcherFalse>(
-      condition, std::move(matcher_true), std::move(matcher_false));
-}
-
-// FieldsAre(matchers...) matches piecewise the fields of compatible structs.
-// These include those that support `get<I>(obj)`, and when structured bindings
-// are enabled any class that supports them.
-// In particular, `std::tuple`, `std::pair`, `std::array` and aggregate types.
-template <typename... M>
-internal::FieldsAreMatcher<typename std::decay<M>::type...> FieldsAre(
-    M&&... matchers) {
-  return internal::FieldsAreMatcher<typename std::decay<M>::type...>(
-      std::forward<M>(matchers)...);
-}
-
-// Creates a matcher that matches a pointer (raw or smart) that matches
-// inner_matcher.
-template <typename InnerMatcher>
-inline internal::PointerMatcher<InnerMatcher> Pointer(
-    const InnerMatcher& inner_matcher) {
-  return internal::PointerMatcher<InnerMatcher>(inner_matcher);
-}
-
-// Creates a matcher that matches an object that has an address that matches
-// inner_matcher.
-template <typename InnerMatcher>
-inline internal::AddressMatcher<InnerMatcher> Address(
-    const InnerMatcher& inner_matcher) {
-  return internal::AddressMatcher<InnerMatcher>(inner_matcher);
-}
-
-// Matches a base64 escaped string, when the unescaped string matches the
-// internal matcher.
-template <typename MatcherType>
-internal::WhenBase64UnescapedMatcher WhenBase64Unescaped(
-    const MatcherType& internal_matcher) {
-  return internal::WhenBase64UnescapedMatcher(internal_matcher);
-}
-}  // namespace no_adl
-
-// Returns a predicate that is satisfied by anything that matches the
-// given matcher.
-template <typename M>
-inline internal::MatcherAsPredicate<M> Matches(M matcher) {
-  return internal::MatcherAsPredicate<M>(matcher);
-}
-
-// Returns true if and only if the value matches the matcher.
-template <typename T, typename M>
-inline bool Value(const T& value, M matcher) {
-  return testing::Matches(matcher)(value);
-}
-
-// Matches the value against the given matcher and explains the match
-// result to listener.
-template <typename T, typename M>
-inline bool ExplainMatchResult(M matcher, const T& value,
-                               MatchResultListener* listener) {
-  return SafeMatcherCast<const T&>(matcher).MatchAndExplain(value, listener);
-}
-
-// Returns a string representation of the given matcher.  Useful for description
-// strings of matchers defined using MATCHER_P* macros that accept matchers as
-// their arguments.  For example:
-//
-// MATCHER_P(XAndYThat, matcher,
-//           "X that " + DescribeMatcher<int>(matcher, negation) +
-//               (negation ? " or" : " and") + " Y that " +
-//               DescribeMatcher<double>(matcher, negation)) {
-//   return ExplainMatchResult(matcher, arg.x(), result_listener) &&
-//          ExplainMatchResult(matcher, arg.y(), result_listener);
-// }
-template <typename T, typename M>
-std::string DescribeMatcher(const M& matcher, bool negation = false) {
-  ::std::stringstream ss;
-  Matcher<T> monomorphic_matcher = SafeMatcherCast<T>(matcher);
-  if (negation) {
-    monomorphic_matcher.DescribeNegationTo(&ss);
-  } else {
-    monomorphic_matcher.DescribeTo(&ss);
-  }
-  return ss.str();
-}
-
-template <typename... Args>
-internal::ElementsAreMatcher<
-    std::tuple<typename std::decay<const Args&>::type...>>
-ElementsAre(const Args&... matchers) {
-  return internal::ElementsAreMatcher<
-      std::tuple<typename std::decay<const Args&>::type...>>(
-      std::make_tuple(matchers...));
-}
-
-template <typename... Args>
-internal::UnorderedElementsAreMatcher<
-    std::tuple<typename std::decay<const Args&>::type...>>
-UnorderedElementsAre(const Args&... matchers) {
-  return internal::UnorderedElementsAreMatcher<
-      std::tuple<typename std::decay<const Args&>::type...>>(
-      std::make_tuple(matchers...));
-}
-
-// Define variadic matcher versions.
-template <typename... Args>
-internal::AllOfMatcher<typename std::decay<const Args&>::type...> AllOf(
-    const Args&... matchers) {
-  return internal::AllOfMatcher<typename std::decay<const Args&>::type...>(
-      matchers...);
-}
-
-template <typename... Args>
-internal::AnyOfMatcher<typename std::decay<const Args&>::type...> AnyOf(
-    const Args&... matchers) {
-  return internal::AnyOfMatcher<typename std::decay<const Args&>::type...>(
-      matchers...);
-}
-
-// AnyOfArray(array)
-// AnyOfArray(pointer, count)
-// AnyOfArray(container)
-// AnyOfArray({ e1, e2, ..., en })
-// AnyOfArray(iterator_first, iterator_last)
-//
-// AnyOfArray() verifies whether a given value matches any member of a
-// collection of matchers.
-//
-// AllOfArray(array)
-// AllOfArray(pointer, count)
-// AllOfArray(container)
-// AllOfArray({ e1, e2, ..., en })
-// AllOfArray(iterator_first, iterator_last)
-//
-// AllOfArray() verifies whether a given value matches all members of a
-// collection of matchers.
-//
-// The matchers can be specified as an array, a pointer and count, a container,
-// an initializer list, or an STL iterator range. In each of these cases, the
-// underlying matchers can be either values or matchers.
-
-template <typename Iter>
-inline internal::AnyOfArrayMatcher<
-    typename ::std::iterator_traits<Iter>::value_type>
-AnyOfArray(Iter first, Iter last) {
-  return internal::AnyOfArrayMatcher<
-      typename ::std::iterator_traits<Iter>::value_type>(first, last);
-}
-
-template <typename Iter>
-inline internal::AllOfArrayMatcher<
-    typename ::std::iterator_traits<Iter>::value_type>
-AllOfArray(Iter first, Iter last) {
-  return internal::AllOfArrayMatcher<
-      typename ::std::iterator_traits<Iter>::value_type>(first, last);
-}
-
-template <typename T>
-inline internal::AnyOfArrayMatcher<T> AnyOfArray(const T* ptr, size_t count) {
-  return AnyOfArray(ptr, ptr + count);
-}
-
-template <typename T>
-inline internal::AllOfArrayMatcher<T> AllOfArray(const T* ptr, size_t count) {
-  return AllOfArray(ptr, ptr + count);
-}
-
-template <typename T, size_t N>
-inline internal::AnyOfArrayMatcher<T> AnyOfArray(const T (&array)[N]) {
-  return AnyOfArray(array, N);
-}
-
-template <typename T, size_t N>
-inline internal::AllOfArrayMatcher<T> AllOfArray(const T (&array)[N]) {
-  return AllOfArray(array, N);
-}
-
-template <typename Container>
-inline internal::AnyOfArrayMatcher<typename Container::value_type> AnyOfArray(
-    const Container& container) {
-  return AnyOfArray(container.begin(), container.end());
-}
-
-template <typename Container>
-inline internal::AllOfArrayMatcher<typename Container::value_type> AllOfArray(
-    const Container& container) {
-  return AllOfArray(container.begin(), container.end());
-}
-
-template <typename T>
-inline internal::AnyOfArrayMatcher<T> AnyOfArray(
-    ::std::initializer_list<T> xs) {
-  return AnyOfArray(xs.begin(), xs.end());
-}
-
-template <typename T>
-inline internal::AllOfArrayMatcher<T> AllOfArray(
-    ::std::initializer_list<T> xs) {
-  return AllOfArray(xs.begin(), xs.end());
-}
-
-// Args<N1, N2, ..., Nk>(a_matcher) matches a tuple if the selected
-// fields of it matches a_matcher.  C++ doesn't support default
-// arguments for function templates, so we have to overload it.
-template <size_t... k, typename InnerMatcher>
-internal::ArgsMatcher<typename std::decay<InnerMatcher>::type, k...> Args(
-    InnerMatcher&& matcher) {
-  return internal::ArgsMatcher<typename std::decay<InnerMatcher>::type, k...>(
-      std::forward<InnerMatcher>(matcher));
-}
-
-// AllArgs(m) is a synonym of m.  This is useful in
-//
-//   EXPECT_CALL(foo, Bar(_, _)).With(AllArgs(Eq()));
-//
-// which is easier to read than
-//
-//   EXPECT_CALL(foo, Bar(_, _)).With(Eq());
-template <typename InnerMatcher>
-inline InnerMatcher AllArgs(const InnerMatcher& matcher) {
-  return matcher;
-}
-
-// Returns a matcher that matches the value of an optional<> type variable.
-// The matcher implementation only uses '!arg' and requires that the optional<>
-// type has a 'value_type' member type and that '*arg' is of type 'value_type'
-// and is printable using 'PrintToString'. It is compatible with
-// std::optional/std::experimental::optional.
-// Note that to compare an optional type variable against nullopt you should
-// use Eq(nullopt) and not Eq(Optional(nullopt)). The latter implies that the
-// optional value contains an optional itself.
-template <typename ValueMatcher>
-inline internal::OptionalMatcher<ValueMatcher> Optional(
-    const ValueMatcher& value_matcher) {
-  return internal::OptionalMatcher<ValueMatcher>(value_matcher);
-}
-
-// Returns a matcher that matches the value of a absl::any type variable.
-template <typename T>
-PolymorphicMatcher<internal::any_cast_matcher::AnyCastMatcher<T>> AnyWith(
-    const Matcher<const T&>& matcher) {
-  return MakePolymorphicMatcher(
-      internal::any_cast_matcher::AnyCastMatcher<T>(matcher));
-}
-
-// Returns a matcher that matches the value of a variant<> type variable.
-// The matcher implementation uses ADL to find the holds_alternative and get
-// functions.
-// It is compatible with std::variant.
-template <typename T>
-PolymorphicMatcher<internal::variant_matcher::VariantMatcher<T>> VariantWith(
-    const Matcher<const T&>& matcher) {
-  return MakePolymorphicMatcher(
-      internal::variant_matcher::VariantMatcher<T>(matcher));
-}
-
-#if GTEST_HAS_EXCEPTIONS
-
-// Anything inside the `internal` namespace is internal to the implementation
-// and must not be used in user code!
-namespace internal {
-
-class WithWhatMatcherImpl {
- public:
-  WithWhatMatcherImpl(Matcher<std::string> matcher)
-      : matcher_(std::move(matcher)) {}
-
-  void DescribeTo(std::ostream* os) const {
-    *os << "contains .what() that ";
-    matcher_.DescribeTo(os);
-  }
-
-  void DescribeNegationTo(std::ostream* os) const {
-    *os << "contains .what() that does not ";
-    matcher_.DescribeTo(os);
-  }
-
-  template <typename Err>
-  bool MatchAndExplain(const Err& err, MatchResultListener* listener) const {
-    *listener << "which contains .what() (of value = " << err.what()
-              << ") that ";
-    return matcher_.MatchAndExplain(err.what(), listener);
-  }
-
- private:
-  const Matcher<std::string> matcher_;
-};
-
-inline PolymorphicMatcher<WithWhatMatcherImpl> WithWhat(
-    Matcher<std::string> m) {
-  return MakePolymorphicMatcher(WithWhatMatcherImpl(std::move(m)));
-}
-
-template <typename Err>
-class ExceptionMatcherImpl {
-  class NeverThrown {
-   public:
-    const char* what() const noexcept {
-      return "this exception should never be thrown";
-    }
-  };
-
-  // If the matchee raises an exception of a wrong type, we'd like to
-  // catch it and print its message and type. To do that, we add an additional
-  // catch clause:
-  //
-  //     try { ... }
-  //     catch (const Err&) { /* an expected exception */ }
-  //     catch (const std::exception&) { /* exception of a wrong type */ }
-  //
-  // However, if the `Err` itself is `std::exception`, we'd end up with two
-  // identical `catch` clauses:
-  //
-  //     try { ... }
-  //     catch (const std::exception&) { /* an expected exception */ }
-  //     catch (const std::exception&) { /* exception of a wrong type */ }
-  //
-  // This can cause a warning or an error in some compilers. To resolve
-  // the issue, we use a fake error type whenever `Err` is `std::exception`:
-  //
-  //     try { ... }
-  //     catch (const std::exception&) { /* an expected exception */ }
-  //     catch (const NeverThrown&) { /* exception of a wrong type */ }
-  using DefaultExceptionType = typename std::conditional<
-      std::is_same<typename std::remove_cv<
-                       typename std::remove_reference<Err>::type>::type,
-                   std::exception>::value,
-      const NeverThrown&, const std::exception&>::type;
-
- public:
-  ExceptionMatcherImpl(Matcher<const Err&> matcher)
-      : matcher_(std::move(matcher)) {}
-
-  void DescribeTo(std::ostream* os) const {
-    *os << "throws an exception which is a " << GetTypeName<Err>();
-    *os << " which ";
-    matcher_.DescribeTo(os);
-  }
-
-  void DescribeNegationTo(std::ostream* os) const {
-    *os << "throws an exception which is not a " << GetTypeName<Err>();
-    *os << " which ";
-    matcher_.DescribeNegationTo(os);
-  }
-
-  template <typename T>
-  bool MatchAndExplain(T&& x, MatchResultListener* listener) const {
-    try {
-      (void)(std::forward<T>(x)());
-    } catch (const Err& err) {
-      *listener << "throws an exception which is a " << GetTypeName<Err>();
-      *listener << " ";
-      return matcher_.MatchAndExplain(err, listener);
-    } catch (DefaultExceptionType err) {
-#if GTEST_HAS_RTTI
-      *listener << "throws an exception of type " << GetTypeName(typeid(err));
-      *listener << " ";
-#else
-      *listener << "throws an std::exception-derived type ";
-#endif
-      *listener << "with description \"" << err.what() << "\"";
-      return false;
-    } catch (...) {
-      *listener << "throws an exception of an unknown type";
-      return false;
-    }
-
-    *listener << "does not throw any exception";
-    return false;
-  }
-
- private:
-  const Matcher<const Err&> matcher_;
-};
-
-}  // namespace internal
-
-// Throws()
-// Throws(exceptionMatcher)
-// ThrowsMessage(messageMatcher)
-//
-// This matcher accepts a callable and verifies that when invoked, it throws
-// an exception with the given type and properties.
-//
-// Examples:
-//
-//   EXPECT_THAT(
-//       []() { throw std::runtime_error("message"); },
-//       Throws<std::runtime_error>());
-//
-//   EXPECT_THAT(
-//       []() { throw std::runtime_error("message"); },
-//       ThrowsMessage<std::runtime_error>(HasSubstr("message")));
-//
-//   EXPECT_THAT(
-//       []() { throw std::runtime_error("message"); },
-//       Throws<std::runtime_error>(
-//           Property(&std::runtime_error::what, HasSubstr("message"))));
-
-template <typename Err>
-PolymorphicMatcher<internal::ExceptionMatcherImpl<Err>> Throws() {
-  return MakePolymorphicMatcher(
-      internal::ExceptionMatcherImpl<Err>(A<const Err&>()));
-}
-
-template <typename Err, typename ExceptionMatcher>
-PolymorphicMatcher<internal::ExceptionMatcherImpl<Err>> Throws(
-    const ExceptionMatcher& exception_matcher) {
-  // Using matcher cast allows users to pass a matcher of a more broad type.
-  // For example user may want to pass Matcher<std::exception>
-  // to Throws<std::runtime_error>, or Matcher<int64> to Throws<int32>.
-  return MakePolymorphicMatcher(internal::ExceptionMatcherImpl<Err>(
-      SafeMatcherCast<const Err&>(exception_matcher)));
-}
-
-template <typename Err, typename MessageMatcher>
-PolymorphicMatcher<internal::ExceptionMatcherImpl<Err>> ThrowsMessage(
-    MessageMatcher&& message_matcher) {
-  static_assert(std::is_base_of<std::exception, Err>::value,
-                "expected an std::exception-derived type");
-  return Throws<Err>(internal::WithWhat(
-      MatcherCast<std::string>(std::forward<MessageMatcher>(message_matcher))));
-}
-
-#endif  // GTEST_HAS_EXCEPTIONS
-
-// These macros allow using matchers to check values in Google Test
-// tests.  ASSERT_THAT(value, matcher) and EXPECT_THAT(value, matcher)
-// succeed if and only if the value matches the matcher.  If the assertion
-// fails, the value and the description of the matcher will be printed.
-#define ASSERT_THAT(value, matcher) \
-  ASSERT_PRED_FORMAT1(              \
-      ::testing::internal::MakePredicateFormatterFromMatcher(matcher), value)
-#define EXPECT_THAT(value, matcher) \
-  EXPECT_PRED_FORMAT1(              \
-      ::testing::internal::MakePredicateFormatterFromMatcher(matcher), value)
-
-// MATCHER* macros itself are listed below.
-#define MATCHER(name, description)                                             \
-  class name##Matcher                                                          \
-      : public ::testing::internal::MatcherBaseImpl<name##Matcher> {           \
-   public:                                                                     \
-    template <typename arg_type>                                               \
-    class gmock_Impl : public ::testing::MatcherInterface<const arg_type&> {   \
-     public:                                                                   \
-      gmock_Impl() {}                                                          \
-      bool MatchAndExplain(                                                    \
-          const arg_type& arg,                                                 \
-          ::testing::MatchResultListener* result_listener) const override;     \
-      void DescribeTo(::std::ostream* gmock_os) const override {               \
-        *gmock_os << FormatDescription(false);                                 \
-      }                                                                        \
-      void DescribeNegationTo(::std::ostream* gmock_os) const override {       \
-        *gmock_os << FormatDescription(true);                                  \
-      }                                                                        \
-                                                                               \
-     private:                                                                  \
-      ::std::string FormatDescription(bool negation) const {                   \
-        /* NOLINTNEXTLINE readability-redundant-string-init */                 \
-        ::std::string gmock_description = (description);                       \
-        if (!gmock_description.empty()) {                                      \
-          return gmock_description;                                            \
-        }                                                                      \
-        return ::testing::internal::FormatMatcherDescription(negation, #name,  \
-                                                             {}, {});          \
-      }                                                                        \
-    };                                                                         \
-  };                                                                           \
-  inline name##Matcher GMOCK_INTERNAL_WARNING_PUSH()                           \
-      GMOCK_INTERNAL_WARNING_CLANG(ignored, "-Wunused-function")               \
-          GMOCK_INTERNAL_WARNING_CLANG(ignored, "-Wunused-member-function")    \
-              name                                                             \
-              GMOCK_INTERNAL_WARNING_POP()() {                                 \
-    return {};                                                                 \
-  }                                                                            \
-  template <typename arg_type>                                                 \
-  bool name##Matcher::gmock_Impl<arg_type>::MatchAndExplain(                   \
-      const arg_type& arg,                                                     \
-      ::testing::MatchResultListener* result_listener GTEST_ATTRIBUTE_UNUSED_) \
-      const
-
-#define MATCHER_P(name, p0, description) \
-  GMOCK_INTERNAL_MATCHER(name, name##MatcherP, description, (#p0), (p0))
-#define MATCHER_P2(name, p0, p1, description)                            \
-  GMOCK_INTERNAL_MATCHER(name, name##MatcherP2, description, (#p0, #p1), \
-                         (p0, p1))
-#define MATCHER_P3(name, p0, p1, p2, description)                             \
-  GMOCK_INTERNAL_MATCHER(name, name##MatcherP3, description, (#p0, #p1, #p2), \
-                         (p0, p1, p2))
-#define MATCHER_P4(name, p0, p1, p2, p3, description)        \
-  GMOCK_INTERNAL_MATCHER(name, name##MatcherP4, description, \
-                         (#p0, #p1, #p2, #p3), (p0, p1, p2, p3))
-#define MATCHER_P5(name, p0, p1, p2, p3, p4, description)    \
-  GMOCK_INTERNAL_MATCHER(name, name##MatcherP5, description, \
-                         (#p0, #p1, #p2, #p3, #p4), (p0, p1, p2, p3, p4))
-#define MATCHER_P6(name, p0, p1, p2, p3, p4, p5, description) \
-  GMOCK_INTERNAL_MATCHER(name, name##MatcherP6, description,  \
-                         (#p0, #p1, #p2, #p3, #p4, #p5),      \
-                         (p0, p1, p2, p3, p4, p5))
-#define MATCHER_P7(name, p0, p1, p2, p3, p4, p5, p6, description) \
-  GMOCK_INTERNAL_MATCHER(name, name##MatcherP7, description,      \
-                         (#p0, #p1, #p2, #p3, #p4, #p5, #p6),     \
-                         (p0, p1, p2, p3, p4, p5, p6))
-#define MATCHER_P8(name, p0, p1, p2, p3, p4, p5, p6, p7, description) \
-  GMOCK_INTERNAL_MATCHER(name, name##MatcherP8, description,          \
-                         (#p0, #p1, #p2, #p3, #p4, #p5, #p6, #p7),    \
-                         (p0, p1, p2, p3, p4, p5, p6, p7))
-#define MATCHER_P9(name, p0, p1, p2, p3, p4, p5, p6, p7, p8, description) \
-  GMOCK_INTERNAL_MATCHER(name, name##MatcherP9, description,              \
-                         (#p0, #p1, #p2, #p3, #p4, #p5, #p6, #p7, #p8),   \
-                         (p0, p1, p2, p3, p4, p5, p6, p7, p8))
-#define MATCHER_P10(name, p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, description) \
-  GMOCK_INTERNAL_MATCHER(name, name##MatcherP10, description,                  \
-                         (#p0, #p1, #p2, #p3, #p4, #p5, #p6, #p7, #p8, #p9),   \
-                         (p0, p1, p2, p3, p4, p5, p6, p7, p8, p9))
-
-#define GMOCK_INTERNAL_MATCHER(name, full_name, description, arg_names, args)  \
-  template <GMOCK_INTERNAL_MATCHER_TEMPLATE_PARAMS(args)>                      \
-  class full_name : public ::testing::internal::MatcherBaseImpl<               \
-                        full_name<GMOCK_INTERNAL_MATCHER_TYPE_PARAMS(args)>> { \
-   public:                                                                     \
-    using full_name::MatcherBaseImpl::MatcherBaseImpl;                         \
-    template <typename arg_type>                                               \
-    class gmock_Impl : public ::testing::MatcherInterface<const arg_type&> {   \
-     public:                                                                   \
-      explicit gmock_Impl(GMOCK_INTERNAL_MATCHER_FUNCTION_ARGS(args))          \
-          : GMOCK_INTERNAL_MATCHER_FORWARD_ARGS(args) {}                       \
-      bool MatchAndExplain(                                                    \
-          const arg_type& arg,                                                 \
-          ::testing::MatchResultListener* result_listener) const override;     \
-      void DescribeTo(::std::ostream* gmock_os) const override {               \
-        *gmock_os << FormatDescription(false);                                 \
-      }                                                                        \
-      void DescribeNegationTo(::std::ostream* gmock_os) const override {       \
-        *gmock_os << FormatDescription(true);                                  \
-      }                                                                        \
-      GMOCK_INTERNAL_MATCHER_MEMBERS(args)                                     \
-                                                                               \
-     private:                                                                  \
-      ::std::string FormatDescription(bool negation) const {                   \
-        ::std::string gmock_description = (description);                       \
-        if (!gmock_description.empty()) {                                      \
-          return gmock_description;                                            \
-        }                                                                      \
-        return ::testing::internal::FormatMatcherDescription(                  \
-            negation, #name, {GMOCK_PP_REMOVE_PARENS(arg_names)},              \
-            ::testing::internal::UniversalTersePrintTupleFieldsToStrings(      \
-                ::std::tuple<GMOCK_INTERNAL_MATCHER_TYPE_PARAMS(args)>(        \
-                    GMOCK_INTERNAL_MATCHER_MEMBERS_USAGE(args))));             \
-      }                                                                        \
-    };                                                                         \
-  };                                                                           \
-  template <GMOCK_INTERNAL_MATCHER_TEMPLATE_PARAMS(args)>                      \
-  inline full_name<GMOCK_INTERNAL_MATCHER_TYPE_PARAMS(args)> name(             \
-      GMOCK_INTERNAL_MATCHER_FUNCTION_ARGS(args)) {                            \
-    return full_name<GMOCK_INTERNAL_MATCHER_TYPE_PARAMS(args)>(                \
-        GMOCK_INTERNAL_MATCHER_ARGS_USAGE(args));                              \
-  }                                                                            \
-  template <GMOCK_INTERNAL_MATCHER_TEMPLATE_PARAMS(args)>                      \
-  template <typename arg_type>                                                 \
-  bool full_name<GMOCK_INTERNAL_MATCHER_TYPE_PARAMS(args)>::gmock_Impl<        \
-      arg_type>::MatchAndExplain(const arg_type& arg,                          \
-                                 ::testing::MatchResultListener*               \
-                                     result_listener GTEST_ATTRIBUTE_UNUSED_)  \
-      const
-
-#define GMOCK_INTERNAL_MATCHER_TEMPLATE_PARAMS(args) \
-  GMOCK_PP_TAIL(                                     \
-      GMOCK_PP_FOR_EACH(GMOCK_INTERNAL_MATCHER_TEMPLATE_PARAM, , args))
-#define GMOCK_INTERNAL_MATCHER_TEMPLATE_PARAM(i_unused, data_unused, arg) \
-  , typename arg##_type
-
-#define GMOCK_INTERNAL_MATCHER_TYPE_PARAMS(args) \
-  GMOCK_PP_TAIL(GMOCK_PP_FOR_EACH(GMOCK_INTERNAL_MATCHER_TYPE_PARAM, , args))
-#define GMOCK_INTERNAL_MATCHER_TYPE_PARAM(i_unused, data_unused, arg) \
-  , arg##_type
-
-#define GMOCK_INTERNAL_MATCHER_FUNCTION_ARGS(args) \
-  GMOCK_PP_TAIL(dummy_first GMOCK_PP_FOR_EACH(     \
-      GMOCK_INTERNAL_MATCHER_FUNCTION_ARG, , args))
-#define GMOCK_INTERNAL_MATCHER_FUNCTION_ARG(i, data_unused, arg) \
-  , arg##_type gmock_p##i
-
-#define GMOCK_INTERNAL_MATCHER_FORWARD_ARGS(args) \
-  GMOCK_PP_TAIL(GMOCK_PP_FOR_EACH(GMOCK_INTERNAL_MATCHER_FORWARD_ARG, , args))
-#define GMOCK_INTERNAL_MATCHER_FORWARD_ARG(i, data_unused, arg) \
-  , arg(::std::forward<arg##_type>(gmock_p##i))
-
-#define GMOCK_INTERNAL_MATCHER_MEMBERS(args) \
-  GMOCK_PP_FOR_EACH(GMOCK_INTERNAL_MATCHER_MEMBER, , args)
-#define GMOCK_INTERNAL_MATCHER_MEMBER(i_unused, data_unused, arg) \
-  const arg##_type arg;
-
-#define GMOCK_INTERNAL_MATCHER_MEMBERS_USAGE(args) \
-  GMOCK_PP_TAIL(GMOCK_PP_FOR_EACH(GMOCK_INTERNAL_MATCHER_MEMBER_USAGE, , args))
-#define GMOCK_INTERNAL_MATCHER_MEMBER_USAGE(i_unused, data_unused, arg) , arg
-
-#define GMOCK_INTERNAL_MATCHER_ARGS_USAGE(args) \
-  GMOCK_PP_TAIL(GMOCK_PP_FOR_EACH(GMOCK_INTERNAL_MATCHER_ARG_USAGE, , args))
-#define GMOCK_INTERNAL_MATCHER_ARG_USAGE(i, data_unused, arg_unused) \
-  , gmock_p##i
-
-// To prevent ADL on certain functions we put them on a separate namespace.
-using namespace no_adl;  // NOLINT
-
-}  // namespace testing
-
-GTEST_DISABLE_MSC_WARNINGS_POP_()  //  4251 5046
-
-// Include any custom callback matchers added by the local installation.
-// We must include this header at the end to make sure it can use the
-// declarations from this file.
-#include "gmock/internal/custom/gmock-matchers.h"
-
-#endif  // GOOGLEMOCK_INCLUDE_GMOCK_GMOCK_MATCHERS_H_
diff --git a/3rdparty/googletest-1.13.0/googlemock/include/gmock/gmock-more-actions.h b/3rdparty/googletest-1.13.0/googlemock/include/gmock/gmock-more-actions.h
deleted file mode 100644
index 148ac017210cecce46d789219267360f6989e1aa..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googlemock/include/gmock/gmock-more-actions.h
+++ /dev/null
@@ -1,662 +0,0 @@
-// Copyright 2007, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// Google Mock - a framework for writing C++ mock classes.
-//
-// This file implements some commonly used variadic actions.
-
-// IWYU pragma: private, include "gmock/gmock.h"
-// IWYU pragma: friend gmock/.*
-
-#ifndef GOOGLEMOCK_INCLUDE_GMOCK_GMOCK_MORE_ACTIONS_H_
-#define GOOGLEMOCK_INCLUDE_GMOCK_GMOCK_MORE_ACTIONS_H_
-
-#include <memory>
-#include <utility>
-
-#include "gmock/gmock-actions.h"
-#include "gmock/internal/gmock-port.h"
-
-// Include any custom callback actions added by the local installation.
-#include "gmock/internal/custom/gmock-generated-actions.h"
-
-// Sometimes you want to give an action explicit template parameters
-// that cannot be inferred from its value parameters.  ACTION() and
-// ACTION_P*() don't support that.  ACTION_TEMPLATE() remedies that
-// and can be viewed as an extension to ACTION() and ACTION_P*().
-//
-// The syntax:
-//
-//   ACTION_TEMPLATE(ActionName,
-//                   HAS_m_TEMPLATE_PARAMS(kind1, name1, ..., kind_m, name_m),
-//                   AND_n_VALUE_PARAMS(p1, ..., p_n)) { statements; }
-//
-// defines an action template that takes m explicit template
-// parameters and n value parameters.  name_i is the name of the i-th
-// template parameter, and kind_i specifies whether it's a typename,
-// an integral constant, or a template.  p_i is the name of the i-th
-// value parameter.
-//
-// Example:
-//
-//   // DuplicateArg<k, T>(output) converts the k-th argument of the mock
-//   // function to type T and copies it to *output.
-//   ACTION_TEMPLATE(DuplicateArg,
-//                   HAS_2_TEMPLATE_PARAMS(int, k, typename, T),
-//                   AND_1_VALUE_PARAMS(output)) {
-//     *output = T(::std::get<k>(args));
-//   }
-//   ...
-//     int n;
-//     EXPECT_CALL(mock, Foo(_, _))
-//         .WillOnce(DuplicateArg<1, unsigned char>(&n));
-//
-// To create an instance of an action template, write:
-//
-//   ActionName<t1, ..., t_m>(v1, ..., v_n)
-//
-// where the ts are the template arguments and the vs are the value
-// arguments.  The value argument types are inferred by the compiler.
-// If you want to explicitly specify the value argument types, you can
-// provide additional template arguments:
-//
-//   ActionName<t1, ..., t_m, u1, ..., u_k>(v1, ..., v_n)
-//
-// where u_i is the desired type of v_i.
-//
-// ACTION_TEMPLATE and ACTION/ACTION_P* can be overloaded on the
-// number of value parameters, but not on the number of template
-// parameters.  Without the restriction, the meaning of the following
-// is unclear:
-//
-//   OverloadedAction<int, bool>(x);
-//
-// Are we using a single-template-parameter action where 'bool' refers
-// to the type of x, or are we using a two-template-parameter action
-// where the compiler is asked to infer the type of x?
-//
-// Implementation notes:
-//
-// GMOCK_INTERNAL_*_HAS_m_TEMPLATE_PARAMS and
-// GMOCK_INTERNAL_*_AND_n_VALUE_PARAMS are internal macros for
-// implementing ACTION_TEMPLATE.  The main trick we use is to create
-// new macro invocations when expanding a macro.  For example, we have
-//
-//   #define ACTION_TEMPLATE(name, template_params, value_params)
-//       ... GMOCK_INTERNAL_DECL_##template_params ...
-//
-// which causes ACTION_TEMPLATE(..., HAS_1_TEMPLATE_PARAMS(typename, T), ...)
-// to expand to
-//
-//       ... GMOCK_INTERNAL_DECL_HAS_1_TEMPLATE_PARAMS(typename, T) ...
-//
-// Since GMOCK_INTERNAL_DECL_HAS_1_TEMPLATE_PARAMS is a macro, the
-// preprocessor will continue to expand it to
-//
-//       ... typename T ...
-//
-// This technique conforms to the C++ standard and is portable.  It
-// allows us to implement action templates using O(N) code, where N is
-// the maximum number of template/value parameters supported.  Without
-// using it, we'd have to devote O(N^2) amount of code to implement all
-// combinations of m and n.
-
-// Declares the template parameters.
-#define GMOCK_INTERNAL_DECL_HAS_1_TEMPLATE_PARAMS(kind0, name0) kind0 name0
-#define GMOCK_INTERNAL_DECL_HAS_2_TEMPLATE_PARAMS(kind0, name0, kind1, name1) \
-  kind0 name0, kind1 name1
-#define GMOCK_INTERNAL_DECL_HAS_3_TEMPLATE_PARAMS(kind0, name0, kind1, name1, \
-                                                  kind2, name2)               \
-  kind0 name0, kind1 name1, kind2 name2
-#define GMOCK_INTERNAL_DECL_HAS_4_TEMPLATE_PARAMS(kind0, name0, kind1, name1, \
-                                                  kind2, name2, kind3, name3) \
-  kind0 name0, kind1 name1, kind2 name2, kind3 name3
-#define GMOCK_INTERNAL_DECL_HAS_5_TEMPLATE_PARAMS(                        \
-    kind0, name0, kind1, name1, kind2, name2, kind3, name3, kind4, name4) \
-  kind0 name0, kind1 name1, kind2 name2, kind3 name3, kind4 name4
-#define GMOCK_INTERNAL_DECL_HAS_6_TEMPLATE_PARAMS(kind0, name0, kind1, name1, \
-                                                  kind2, name2, kind3, name3, \
-                                                  kind4, name4, kind5, name5) \
-  kind0 name0, kind1 name1, kind2 name2, kind3 name3, kind4 name4, kind5 name5
-#define GMOCK_INTERNAL_DECL_HAS_7_TEMPLATE_PARAMS(                        \
-    kind0, name0, kind1, name1, kind2, name2, kind3, name3, kind4, name4, \
-    kind5, name5, kind6, name6)                                           \
-  kind0 name0, kind1 name1, kind2 name2, kind3 name3, kind4 name4,        \
-      kind5 name5, kind6 name6
-#define GMOCK_INTERNAL_DECL_HAS_8_TEMPLATE_PARAMS(                        \
-    kind0, name0, kind1, name1, kind2, name2, kind3, name3, kind4, name4, \
-    kind5, name5, kind6, name6, kind7, name7)                             \
-  kind0 name0, kind1 name1, kind2 name2, kind3 name3, kind4 name4,        \
-      kind5 name5, kind6 name6, kind7 name7
-#define GMOCK_INTERNAL_DECL_HAS_9_TEMPLATE_PARAMS(                        \
-    kind0, name0, kind1, name1, kind2, name2, kind3, name3, kind4, name4, \
-    kind5, name5, kind6, name6, kind7, name7, kind8, name8)               \
-  kind0 name0, kind1 name1, kind2 name2, kind3 name3, kind4 name4,        \
-      kind5 name5, kind6 name6, kind7 name7, kind8 name8
-#define GMOCK_INTERNAL_DECL_HAS_10_TEMPLATE_PARAMS(                       \
-    kind0, name0, kind1, name1, kind2, name2, kind3, name3, kind4, name4, \
-    kind5, name5, kind6, name6, kind7, name7, kind8, name8, kind9, name9) \
-  kind0 name0, kind1 name1, kind2 name2, kind3 name3, kind4 name4,        \
-      kind5 name5, kind6 name6, kind7 name7, kind8 name8, kind9 name9
-
-// Lists the template parameters.
-#define GMOCK_INTERNAL_LIST_HAS_1_TEMPLATE_PARAMS(kind0, name0) name0
-#define GMOCK_INTERNAL_LIST_HAS_2_TEMPLATE_PARAMS(kind0, name0, kind1, name1) \
-  name0, name1
-#define GMOCK_INTERNAL_LIST_HAS_3_TEMPLATE_PARAMS(kind0, name0, kind1, name1, \
-                                                  kind2, name2)               \
-  name0, name1, name2
-#define GMOCK_INTERNAL_LIST_HAS_4_TEMPLATE_PARAMS(kind0, name0, kind1, name1, \
-                                                  kind2, name2, kind3, name3) \
-  name0, name1, name2, name3
-#define GMOCK_INTERNAL_LIST_HAS_5_TEMPLATE_PARAMS(                        \
-    kind0, name0, kind1, name1, kind2, name2, kind3, name3, kind4, name4) \
-  name0, name1, name2, name3, name4
-#define GMOCK_INTERNAL_LIST_HAS_6_TEMPLATE_PARAMS(kind0, name0, kind1, name1, \
-                                                  kind2, name2, kind3, name3, \
-                                                  kind4, name4, kind5, name5) \
-  name0, name1, name2, name3, name4, name5
-#define GMOCK_INTERNAL_LIST_HAS_7_TEMPLATE_PARAMS(                        \
-    kind0, name0, kind1, name1, kind2, name2, kind3, name3, kind4, name4, \
-    kind5, name5, kind6, name6)                                           \
-  name0, name1, name2, name3, name4, name5, name6
-#define GMOCK_INTERNAL_LIST_HAS_8_TEMPLATE_PARAMS(                        \
-    kind0, name0, kind1, name1, kind2, name2, kind3, name3, kind4, name4, \
-    kind5, name5, kind6, name6, kind7, name7)                             \
-  name0, name1, name2, name3, name4, name5, name6, name7
-#define GMOCK_INTERNAL_LIST_HAS_9_TEMPLATE_PARAMS(                        \
-    kind0, name0, kind1, name1, kind2, name2, kind3, name3, kind4, name4, \
-    kind5, name5, kind6, name6, kind7, name7, kind8, name8)               \
-  name0, name1, name2, name3, name4, name5, name6, name7, name8
-#define GMOCK_INTERNAL_LIST_HAS_10_TEMPLATE_PARAMS(                       \
-    kind0, name0, kind1, name1, kind2, name2, kind3, name3, kind4, name4, \
-    kind5, name5, kind6, name6, kind7, name7, kind8, name8, kind9, name9) \
-  name0, name1, name2, name3, name4, name5, name6, name7, name8, name9
-
-// Declares the types of value parameters.
-#define GMOCK_INTERNAL_DECL_TYPE_AND_0_VALUE_PARAMS()
-#define GMOCK_INTERNAL_DECL_TYPE_AND_1_VALUE_PARAMS(p0) , typename p0##_type
-#define GMOCK_INTERNAL_DECL_TYPE_AND_2_VALUE_PARAMS(p0, p1) \
-  , typename p0##_type, typename p1##_type
-#define GMOCK_INTERNAL_DECL_TYPE_AND_3_VALUE_PARAMS(p0, p1, p2) \
-  , typename p0##_type, typename p1##_type, typename p2##_type
-#define GMOCK_INTERNAL_DECL_TYPE_AND_4_VALUE_PARAMS(p0, p1, p2, p3) \
-  , typename p0##_type, typename p1##_type, typename p2##_type,     \
-      typename p3##_type
-#define GMOCK_INTERNAL_DECL_TYPE_AND_5_VALUE_PARAMS(p0, p1, p2, p3, p4) \
-  , typename p0##_type, typename p1##_type, typename p2##_type,         \
-      typename p3##_type, typename p4##_type
-#define GMOCK_INTERNAL_DECL_TYPE_AND_6_VALUE_PARAMS(p0, p1, p2, p3, p4, p5) \
-  , typename p0##_type, typename p1##_type, typename p2##_type,             \
-      typename p3##_type, typename p4##_type, typename p5##_type
-#define GMOCK_INTERNAL_DECL_TYPE_AND_7_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, \
-                                                    p6)                     \
-  , typename p0##_type, typename p1##_type, typename p2##_type,             \
-      typename p3##_type, typename p4##_type, typename p5##_type,           \
-      typename p6##_type
-#define GMOCK_INTERNAL_DECL_TYPE_AND_8_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, \
-                                                    p6, p7)                 \
-  , typename p0##_type, typename p1##_type, typename p2##_type,             \
-      typename p3##_type, typename p4##_type, typename p5##_type,           \
-      typename p6##_type, typename p7##_type
-#define GMOCK_INTERNAL_DECL_TYPE_AND_9_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, \
-                                                    p6, p7, p8)             \
-  , typename p0##_type, typename p1##_type, typename p2##_type,             \
-      typename p3##_type, typename p4##_type, typename p5##_type,           \
-      typename p6##_type, typename p7##_type, typename p8##_type
-#define GMOCK_INTERNAL_DECL_TYPE_AND_10_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, \
-                                                     p6, p7, p8, p9)         \
-  , typename p0##_type, typename p1##_type, typename p2##_type,              \
-      typename p3##_type, typename p4##_type, typename p5##_type,            \
-      typename p6##_type, typename p7##_type, typename p8##_type,            \
-      typename p9##_type
-
-// Initializes the value parameters.
-#define GMOCK_INTERNAL_INIT_AND_0_VALUE_PARAMS() ()
-#define GMOCK_INTERNAL_INIT_AND_1_VALUE_PARAMS(p0) \
-  (p0##_type gmock_p0) : p0(::std::move(gmock_p0))
-#define GMOCK_INTERNAL_INIT_AND_2_VALUE_PARAMS(p0, p1) \
-  (p0##_type gmock_p0, p1##_type gmock_p1)             \
-      : p0(::std::move(gmock_p0)), p1(::std::move(gmock_p1))
-#define GMOCK_INTERNAL_INIT_AND_3_VALUE_PARAMS(p0, p1, p2)     \
-  (p0##_type gmock_p0, p1##_type gmock_p1, p2##_type gmock_p2) \
-      : p0(::std::move(gmock_p0)),                             \
-        p1(::std::move(gmock_p1)),                             \
-        p2(::std::move(gmock_p2))
-#define GMOCK_INTERNAL_INIT_AND_4_VALUE_PARAMS(p0, p1, p2, p3) \
-  (p0##_type gmock_p0, p1##_type gmock_p1, p2##_type gmock_p2, \
-   p3##_type gmock_p3)                                         \
-      : p0(::std::move(gmock_p0)),                             \
-        p1(::std::move(gmock_p1)),                             \
-        p2(::std::move(gmock_p2)),                             \
-        p3(::std::move(gmock_p3))
-#define GMOCK_INTERNAL_INIT_AND_5_VALUE_PARAMS(p0, p1, p2, p3, p4) \
-  (p0##_type gmock_p0, p1##_type gmock_p1, p2##_type gmock_p2,     \
-   p3##_type gmock_p3, p4##_type gmock_p4)                         \
-      : p0(::std::move(gmock_p0)),                                 \
-        p1(::std::move(gmock_p1)),                                 \
-        p2(::std::move(gmock_p2)),                                 \
-        p3(::std::move(gmock_p3)),                                 \
-        p4(::std::move(gmock_p4))
-#define GMOCK_INTERNAL_INIT_AND_6_VALUE_PARAMS(p0, p1, p2, p3, p4, p5) \
-  (p0##_type gmock_p0, p1##_type gmock_p1, p2##_type gmock_p2,         \
-   p3##_type gmock_p3, p4##_type gmock_p4, p5##_type gmock_p5)         \
-      : p0(::std::move(gmock_p0)),                                     \
-        p1(::std::move(gmock_p1)),                                     \
-        p2(::std::move(gmock_p2)),                                     \
-        p3(::std::move(gmock_p3)),                                     \
-        p4(::std::move(gmock_p4)),                                     \
-        p5(::std::move(gmock_p5))
-#define GMOCK_INTERNAL_INIT_AND_7_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6) \
-  (p0##_type gmock_p0, p1##_type gmock_p1, p2##_type gmock_p2,             \
-   p3##_type gmock_p3, p4##_type gmock_p4, p5##_type gmock_p5,             \
-   p6##_type gmock_p6)                                                     \
-      : p0(::std::move(gmock_p0)),                                         \
-        p1(::std::move(gmock_p1)),                                         \
-        p2(::std::move(gmock_p2)),                                         \
-        p3(::std::move(gmock_p3)),                                         \
-        p4(::std::move(gmock_p4)),                                         \
-        p5(::std::move(gmock_p5)),                                         \
-        p6(::std::move(gmock_p6))
-#define GMOCK_INTERNAL_INIT_AND_8_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6, p7) \
-  (p0##_type gmock_p0, p1##_type gmock_p1, p2##_type gmock_p2,                 \
-   p3##_type gmock_p3, p4##_type gmock_p4, p5##_type gmock_p5,                 \
-   p6##_type gmock_p6, p7##_type gmock_p7)                                     \
-      : p0(::std::move(gmock_p0)),                                             \
-        p1(::std::move(gmock_p1)),                                             \
-        p2(::std::move(gmock_p2)),                                             \
-        p3(::std::move(gmock_p3)),                                             \
-        p4(::std::move(gmock_p4)),                                             \
-        p5(::std::move(gmock_p5)),                                             \
-        p6(::std::move(gmock_p6)),                                             \
-        p7(::std::move(gmock_p7))
-#define GMOCK_INTERNAL_INIT_AND_9_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6, p7, \
-                                               p8)                             \
-  (p0##_type gmock_p0, p1##_type gmock_p1, p2##_type gmock_p2,                 \
-   p3##_type gmock_p3, p4##_type gmock_p4, p5##_type gmock_p5,                 \
-   p6##_type gmock_p6, p7##_type gmock_p7, p8##_type gmock_p8)                 \
-      : p0(::std::move(gmock_p0)),                                             \
-        p1(::std::move(gmock_p1)),                                             \
-        p2(::std::move(gmock_p2)),                                             \
-        p3(::std::move(gmock_p3)),                                             \
-        p4(::std::move(gmock_p4)),                                             \
-        p5(::std::move(gmock_p5)),                                             \
-        p6(::std::move(gmock_p6)),                                             \
-        p7(::std::move(gmock_p7)),                                             \
-        p8(::std::move(gmock_p8))
-#define GMOCK_INTERNAL_INIT_AND_10_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6, \
-                                                p7, p8, p9)                 \
-  (p0##_type gmock_p0, p1##_type gmock_p1, p2##_type gmock_p2,              \
-   p3##_type gmock_p3, p4##_type gmock_p4, p5##_type gmock_p5,              \
-   p6##_type gmock_p6, p7##_type gmock_p7, p8##_type gmock_p8,              \
-   p9##_type gmock_p9)                                                      \
-      : p0(::std::move(gmock_p0)),                                          \
-        p1(::std::move(gmock_p1)),                                          \
-        p2(::std::move(gmock_p2)),                                          \
-        p3(::std::move(gmock_p3)),                                          \
-        p4(::std::move(gmock_p4)),                                          \
-        p5(::std::move(gmock_p5)),                                          \
-        p6(::std::move(gmock_p6)),                                          \
-        p7(::std::move(gmock_p7)),                                          \
-        p8(::std::move(gmock_p8)),                                          \
-        p9(::std::move(gmock_p9))
-
-// Defines the copy constructor
-#define GMOCK_INTERNAL_DEFN_COPY_AND_0_VALUE_PARAMS() \
-  {}  // Avoid https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82134
-#define GMOCK_INTERNAL_DEFN_COPY_AND_1_VALUE_PARAMS(...) = default;
-#define GMOCK_INTERNAL_DEFN_COPY_AND_2_VALUE_PARAMS(...) = default;
-#define GMOCK_INTERNAL_DEFN_COPY_AND_3_VALUE_PARAMS(...) = default;
-#define GMOCK_INTERNAL_DEFN_COPY_AND_4_VALUE_PARAMS(...) = default;
-#define GMOCK_INTERNAL_DEFN_COPY_AND_5_VALUE_PARAMS(...) = default;
-#define GMOCK_INTERNAL_DEFN_COPY_AND_6_VALUE_PARAMS(...) = default;
-#define GMOCK_INTERNAL_DEFN_COPY_AND_7_VALUE_PARAMS(...) = default;
-#define GMOCK_INTERNAL_DEFN_COPY_AND_8_VALUE_PARAMS(...) = default;
-#define GMOCK_INTERNAL_DEFN_COPY_AND_9_VALUE_PARAMS(...) = default;
-#define GMOCK_INTERNAL_DEFN_COPY_AND_10_VALUE_PARAMS(...) = default;
-
-// Declares the fields for storing the value parameters.
-#define GMOCK_INTERNAL_DEFN_AND_0_VALUE_PARAMS()
-#define GMOCK_INTERNAL_DEFN_AND_1_VALUE_PARAMS(p0) p0##_type p0;
-#define GMOCK_INTERNAL_DEFN_AND_2_VALUE_PARAMS(p0, p1) \
-  p0##_type p0;                                        \
-  p1##_type p1;
-#define GMOCK_INTERNAL_DEFN_AND_3_VALUE_PARAMS(p0, p1, p2) \
-  p0##_type p0;                                            \
-  p1##_type p1;                                            \
-  p2##_type p2;
-#define GMOCK_INTERNAL_DEFN_AND_4_VALUE_PARAMS(p0, p1, p2, p3) \
-  p0##_type p0;                                                \
-  p1##_type p1;                                                \
-  p2##_type p2;                                                \
-  p3##_type p3;
-#define GMOCK_INTERNAL_DEFN_AND_5_VALUE_PARAMS(p0, p1, p2, p3, p4) \
-  p0##_type p0;                                                    \
-  p1##_type p1;                                                    \
-  p2##_type p2;                                                    \
-  p3##_type p3;                                                    \
-  p4##_type p4;
-#define GMOCK_INTERNAL_DEFN_AND_6_VALUE_PARAMS(p0, p1, p2, p3, p4, p5) \
-  p0##_type p0;                                                        \
-  p1##_type p1;                                                        \
-  p2##_type p2;                                                        \
-  p3##_type p3;                                                        \
-  p4##_type p4;                                                        \
-  p5##_type p5;
-#define GMOCK_INTERNAL_DEFN_AND_7_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6) \
-  p0##_type p0;                                                            \
-  p1##_type p1;                                                            \
-  p2##_type p2;                                                            \
-  p3##_type p3;                                                            \
-  p4##_type p4;                                                            \
-  p5##_type p5;                                                            \
-  p6##_type p6;
-#define GMOCK_INTERNAL_DEFN_AND_8_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6, p7) \
-  p0##_type p0;                                                                \
-  p1##_type p1;                                                                \
-  p2##_type p2;                                                                \
-  p3##_type p3;                                                                \
-  p4##_type p4;                                                                \
-  p5##_type p5;                                                                \
-  p6##_type p6;                                                                \
-  p7##_type p7;
-#define GMOCK_INTERNAL_DEFN_AND_9_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6, p7, \
-                                               p8)                             \
-  p0##_type p0;                                                                \
-  p1##_type p1;                                                                \
-  p2##_type p2;                                                                \
-  p3##_type p3;                                                                \
-  p4##_type p4;                                                                \
-  p5##_type p5;                                                                \
-  p6##_type p6;                                                                \
-  p7##_type p7;                                                                \
-  p8##_type p8;
-#define GMOCK_INTERNAL_DEFN_AND_10_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6, \
-                                                p7, p8, p9)                 \
-  p0##_type p0;                                                             \
-  p1##_type p1;                                                             \
-  p2##_type p2;                                                             \
-  p3##_type p3;                                                             \
-  p4##_type p4;                                                             \
-  p5##_type p5;                                                             \
-  p6##_type p6;                                                             \
-  p7##_type p7;                                                             \
-  p8##_type p8;                                                             \
-  p9##_type p9;
-
-// Lists the value parameters.
-#define GMOCK_INTERNAL_LIST_AND_0_VALUE_PARAMS()
-#define GMOCK_INTERNAL_LIST_AND_1_VALUE_PARAMS(p0) p0
-#define GMOCK_INTERNAL_LIST_AND_2_VALUE_PARAMS(p0, p1) p0, p1
-#define GMOCK_INTERNAL_LIST_AND_3_VALUE_PARAMS(p0, p1, p2) p0, p1, p2
-#define GMOCK_INTERNAL_LIST_AND_4_VALUE_PARAMS(p0, p1, p2, p3) p0, p1, p2, p3
-#define GMOCK_INTERNAL_LIST_AND_5_VALUE_PARAMS(p0, p1, p2, p3, p4) \
-  p0, p1, p2, p3, p4
-#define GMOCK_INTERNAL_LIST_AND_6_VALUE_PARAMS(p0, p1, p2, p3, p4, p5) \
-  p0, p1, p2, p3, p4, p5
-#define GMOCK_INTERNAL_LIST_AND_7_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6) \
-  p0, p1, p2, p3, p4, p5, p6
-#define GMOCK_INTERNAL_LIST_AND_8_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6, p7) \
-  p0, p1, p2, p3, p4, p5, p6, p7
-#define GMOCK_INTERNAL_LIST_AND_9_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6, p7, \
-                                               p8)                             \
-  p0, p1, p2, p3, p4, p5, p6, p7, p8
-#define GMOCK_INTERNAL_LIST_AND_10_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6, \
-                                                p7, p8, p9)                 \
-  p0, p1, p2, p3, p4, p5, p6, p7, p8, p9
-
-// Lists the value parameter types.
-#define GMOCK_INTERNAL_LIST_TYPE_AND_0_VALUE_PARAMS()
-#define GMOCK_INTERNAL_LIST_TYPE_AND_1_VALUE_PARAMS(p0) , p0##_type
-#define GMOCK_INTERNAL_LIST_TYPE_AND_2_VALUE_PARAMS(p0, p1) \
-  , p0##_type, p1##_type
-#define GMOCK_INTERNAL_LIST_TYPE_AND_3_VALUE_PARAMS(p0, p1, p2) \
-  , p0##_type, p1##_type, p2##_type
-#define GMOCK_INTERNAL_LIST_TYPE_AND_4_VALUE_PARAMS(p0, p1, p2, p3) \
-  , p0##_type, p1##_type, p2##_type, p3##_type
-#define GMOCK_INTERNAL_LIST_TYPE_AND_5_VALUE_PARAMS(p0, p1, p2, p3, p4) \
-  , p0##_type, p1##_type, p2##_type, p3##_type, p4##_type
-#define GMOCK_INTERNAL_LIST_TYPE_AND_6_VALUE_PARAMS(p0, p1, p2, p3, p4, p5) \
-  , p0##_type, p1##_type, p2##_type, p3##_type, p4##_type, p5##_type
-#define GMOCK_INTERNAL_LIST_TYPE_AND_7_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, \
-                                                    p6)                     \
-  , p0##_type, p1##_type, p2##_type, p3##_type, p4##_type, p5##_type, p6##_type
-#define GMOCK_INTERNAL_LIST_TYPE_AND_8_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, \
-                                                    p6, p7)                 \
-  , p0##_type, p1##_type, p2##_type, p3##_type, p4##_type, p5##_type,       \
-      p6##_type, p7##_type
-#define GMOCK_INTERNAL_LIST_TYPE_AND_9_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, \
-                                                    p6, p7, p8)             \
-  , p0##_type, p1##_type, p2##_type, p3##_type, p4##_type, p5##_type,       \
-      p6##_type, p7##_type, p8##_type
-#define GMOCK_INTERNAL_LIST_TYPE_AND_10_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, \
-                                                     p6, p7, p8, p9)         \
-  , p0##_type, p1##_type, p2##_type, p3##_type, p4##_type, p5##_type,        \
-      p6##_type, p7##_type, p8##_type, p9##_type
-
-// Declares the value parameters.
-#define GMOCK_INTERNAL_DECL_AND_0_VALUE_PARAMS()
-#define GMOCK_INTERNAL_DECL_AND_1_VALUE_PARAMS(p0) p0##_type p0
-#define GMOCK_INTERNAL_DECL_AND_2_VALUE_PARAMS(p0, p1) \
-  p0##_type p0, p1##_type p1
-#define GMOCK_INTERNAL_DECL_AND_3_VALUE_PARAMS(p0, p1, p2) \
-  p0##_type p0, p1##_type p1, p2##_type p2
-#define GMOCK_INTERNAL_DECL_AND_4_VALUE_PARAMS(p0, p1, p2, p3) \
-  p0##_type p0, p1##_type p1, p2##_type p2, p3##_type p3
-#define GMOCK_INTERNAL_DECL_AND_5_VALUE_PARAMS(p0, p1, p2, p3, p4) \
-  p0##_type p0, p1##_type p1, p2##_type p2, p3##_type p3, p4##_type p4
-#define GMOCK_INTERNAL_DECL_AND_6_VALUE_PARAMS(p0, p1, p2, p3, p4, p5)  \
-  p0##_type p0, p1##_type p1, p2##_type p2, p3##_type p3, p4##_type p4, \
-      p5##_type p5
-#define GMOCK_INTERNAL_DECL_AND_7_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6) \
-  p0##_type p0, p1##_type p1, p2##_type p2, p3##_type p3, p4##_type p4,    \
-      p5##_type p5, p6##_type p6
-#define GMOCK_INTERNAL_DECL_AND_8_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6, p7) \
-  p0##_type p0, p1##_type p1, p2##_type p2, p3##_type p3, p4##_type p4,        \
-      p5##_type p5, p6##_type p6, p7##_type p7
-#define GMOCK_INTERNAL_DECL_AND_9_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6, p7, \
-                                               p8)                             \
-  p0##_type p0, p1##_type p1, p2##_type p2, p3##_type p3, p4##_type p4,        \
-      p5##_type p5, p6##_type p6, p7##_type p7, p8##_type p8
-#define GMOCK_INTERNAL_DECL_AND_10_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6, \
-                                                p7, p8, p9)                 \
-  p0##_type p0, p1##_type p1, p2##_type p2, p3##_type p3, p4##_type p4,     \
-      p5##_type p5, p6##_type p6, p7##_type p7, p8##_type p8, p9##_type p9
-
-// The suffix of the class template implementing the action template.
-#define GMOCK_INTERNAL_COUNT_AND_0_VALUE_PARAMS()
-#define GMOCK_INTERNAL_COUNT_AND_1_VALUE_PARAMS(p0) P
-#define GMOCK_INTERNAL_COUNT_AND_2_VALUE_PARAMS(p0, p1) P2
-#define GMOCK_INTERNAL_COUNT_AND_3_VALUE_PARAMS(p0, p1, p2) P3
-#define GMOCK_INTERNAL_COUNT_AND_4_VALUE_PARAMS(p0, p1, p2, p3) P4
-#define GMOCK_INTERNAL_COUNT_AND_5_VALUE_PARAMS(p0, p1, p2, p3, p4) P5
-#define GMOCK_INTERNAL_COUNT_AND_6_VALUE_PARAMS(p0, p1, p2, p3, p4, p5) P6
-#define GMOCK_INTERNAL_COUNT_AND_7_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6) P7
-#define GMOCK_INTERNAL_COUNT_AND_8_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6, \
-                                                p7)                         \
-  P8
-#define GMOCK_INTERNAL_COUNT_AND_9_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6, \
-                                                p7, p8)                     \
-  P9
-#define GMOCK_INTERNAL_COUNT_AND_10_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6, \
-                                                 p7, p8, p9)                 \
-  P10
-
-// The name of the class template implementing the action template.
-#define GMOCK_ACTION_CLASS_(name, value_params) \
-  GTEST_CONCAT_TOKEN_(name##Action, GMOCK_INTERNAL_COUNT_##value_params)
-
-#define ACTION_TEMPLATE(name, template_params, value_params)                   \
-  template <GMOCK_INTERNAL_DECL_##template_params                              \
-                GMOCK_INTERNAL_DECL_TYPE_##value_params>                       \
-  class GMOCK_ACTION_CLASS_(name, value_params) {                              \
-   public:                                                                     \
-    explicit GMOCK_ACTION_CLASS_(name, value_params)(                          \
-        GMOCK_INTERNAL_DECL_##value_params)                                    \
-        GMOCK_PP_IF(GMOCK_PP_IS_EMPTY(GMOCK_INTERNAL_COUNT_##value_params),    \
-                    = default;                                                 \
-                    ,                                                          \
-                    : impl_(std::make_shared<gmock_Impl>(                      \
-                        GMOCK_INTERNAL_LIST_##value_params)){})                \
-            GMOCK_ACTION_CLASS_(name, value_params)(const GMOCK_ACTION_CLASS_( \
-                name, value_params) &) noexcept GMOCK_INTERNAL_DEFN_COPY_      \
-        ##value_params GMOCK_ACTION_CLASS_(name, value_params)(                \
-            GMOCK_ACTION_CLASS_(name, value_params) &&) noexcept               \
-        GMOCK_INTERNAL_DEFN_COPY_##value_params template <typename F>          \
-        operator ::testing::Action<F>() const {                                \
-      return GMOCK_PP_IF(                                                      \
-          GMOCK_PP_IS_EMPTY(GMOCK_INTERNAL_COUNT_##value_params),              \
-          (::testing::internal::MakeAction<F, gmock_Impl>()),                  \
-          (::testing::internal::MakeAction<F>(impl_)));                        \
-    }                                                                          \
-                                                                               \
-   private:                                                                    \
-    class gmock_Impl {                                                         \
-     public:                                                                   \
-      explicit gmock_Impl GMOCK_INTERNAL_INIT_##value_params {}                \
-      template <typename function_type, typename return_type,                  \
-                typename args_type, GMOCK_ACTION_TEMPLATE_ARGS_NAMES_>         \
-      return_type gmock_PerformImpl(GMOCK_ACTION_ARG_TYPES_AND_NAMES_) const;  \
-      GMOCK_INTERNAL_DEFN_##value_params                                       \
-    };                                                                         \
-    GMOCK_PP_IF(GMOCK_PP_IS_EMPTY(GMOCK_INTERNAL_COUNT_##value_params), ,      \
-                std::shared_ptr<const gmock_Impl> impl_;)                      \
-  };                                                                           \
-  template <GMOCK_INTERNAL_DECL_##template_params                              \
-                GMOCK_INTERNAL_DECL_TYPE_##value_params>                       \
-  GMOCK_ACTION_CLASS_(                                                         \
-      name, value_params)<GMOCK_INTERNAL_LIST_##template_params                \
-                              GMOCK_INTERNAL_LIST_TYPE_##value_params>         \
-      name(GMOCK_INTERNAL_DECL_##value_params) GTEST_MUST_USE_RESULT_;         \
-  template <GMOCK_INTERNAL_DECL_##template_params                              \
-                GMOCK_INTERNAL_DECL_TYPE_##value_params>                       \
-  inline GMOCK_ACTION_CLASS_(                                                  \
-      name, value_params)<GMOCK_INTERNAL_LIST_##template_params                \
-                              GMOCK_INTERNAL_LIST_TYPE_##value_params>         \
-  name(GMOCK_INTERNAL_DECL_##value_params) {                                   \
-    return GMOCK_ACTION_CLASS_(                                                \
-        name, value_params)<GMOCK_INTERNAL_LIST_##template_params              \
-                                GMOCK_INTERNAL_LIST_TYPE_##value_params>(      \
-        GMOCK_INTERNAL_LIST_##value_params);                                   \
-  }                                                                            \
-  template <GMOCK_INTERNAL_DECL_##template_params                              \
-                GMOCK_INTERNAL_DECL_TYPE_##value_params>                       \
-  template <typename function_type, typename return_type, typename args_type,  \
-            GMOCK_ACTION_TEMPLATE_ARGS_NAMES_>                                 \
-  return_type GMOCK_ACTION_CLASS_(                                             \
-      name, value_params)<GMOCK_INTERNAL_LIST_##template_params                \
-                              GMOCK_INTERNAL_LIST_TYPE_##value_params>::       \
-      gmock_Impl::gmock_PerformImpl(GMOCK_ACTION_ARG_TYPES_AND_NAMES_UNUSED_)  \
-          const
-
-namespace testing {
-
-// The ACTION*() macros trigger warning C4100 (unreferenced formal
-// parameter) in MSVC with -W4.  Unfortunately they cannot be fixed in
-// the macro definition, as the warnings are generated when the macro
-// is expanded and macro expansion cannot contain #pragma.  Therefore
-// we suppress them here.
-#ifdef _MSC_VER
-#pragma warning(push)
-#pragma warning(disable : 4100)
-#endif
-
-namespace internal {
-
-// internal::InvokeArgument - a helper for InvokeArgument action.
-// The basic overloads are provided here for generic functors.
-// Overloads for other custom-callables are provided in the
-// internal/custom/gmock-generated-actions.h header.
-template <typename F, typename... Args>
-auto InvokeArgument(F f, Args... args) -> decltype(f(args...)) {
-  return f(args...);
-}
-
-template <std::size_t index, typename... Params>
-struct InvokeArgumentAction {
-  template <typename... Args,
-            typename = typename std::enable_if<(index < sizeof...(Args))>::type>
-  auto operator()(Args&&... args) const -> decltype(internal::InvokeArgument(
-      std::get<index>(std::forward_as_tuple(std::forward<Args>(args)...)),
-      std::declval<const Params&>()...)) {
-    internal::FlatTuple<Args&&...> args_tuple(FlatTupleConstructTag{},
-                                              std::forward<Args>(args)...);
-    return params.Apply([&](const Params&... unpacked_params) {
-      auto&& callable = args_tuple.template Get<index>();
-      return internal::InvokeArgument(
-          std::forward<decltype(callable)>(callable), unpacked_params...);
-    });
-  }
-
-  internal::FlatTuple<Params...> params;
-};
-
-}  // namespace internal
-
-// The InvokeArgument<N>(a1, a2, ..., a_k) action invokes the N-th
-// (0-based) argument, which must be a k-ary callable, of the mock
-// function, with arguments a1, a2, ..., a_k.
-//
-// Notes:
-//
-//   1. The arguments are passed by value by default.  If you need to
-//   pass an argument by reference, wrap it inside std::ref().  For
-//   example,
-//
-//     InvokeArgument<1>(5, string("Hello"), std::ref(foo))
-//
-//   passes 5 and string("Hello") by value, and passes foo by
-//   reference.
-//
-//   2. If the callable takes an argument by reference but std::ref() is
-//   not used, it will receive the reference to a copy of the value,
-//   instead of the original value.  For example, when the 0-th
-//   argument of the mock function takes a const string&, the action
-//
-//     InvokeArgument<0>(string("Hello"))
-//
-//   makes a copy of the temporary string("Hello") object and passes a
-//   reference of the copy, instead of the original temporary object,
-//   to the callable.  This makes it easy for a user to define an
-//   InvokeArgument action from temporary values and have it performed
-//   later.
-template <std::size_t index, typename... Params>
-internal::InvokeArgumentAction<index, typename std::decay<Params>::type...>
-InvokeArgument(Params&&... params) {
-  return {internal::FlatTuple<typename std::decay<Params>::type...>(
-      internal::FlatTupleConstructTag{}, std::forward<Params>(params)...)};
-}
-
-#ifdef _MSC_VER
-#pragma warning(pop)
-#endif
-
-}  // namespace testing
-
-#endif  // GOOGLEMOCK_INCLUDE_GMOCK_GMOCK_MORE_ACTIONS_H_
diff --git a/3rdparty/googletest-1.13.0/googlemock/include/gmock/gmock-more-matchers.h b/3rdparty/googletest-1.13.0/googlemock/include/gmock/gmock-more-matchers.h
deleted file mode 100644
index d9a92107380fb8a981805d30c0a922f6ad7fbd2f..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googlemock/include/gmock/gmock-more-matchers.h
+++ /dev/null
@@ -1,122 +0,0 @@
-// Copyright 2013, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// Google Mock - a framework for writing C++ mock classes.
-//
-// This file implements some matchers that depend on gmock-matchers.h.
-//
-// Note that tests are implemented in gmock-matchers_test.cc rather than
-// gmock-more-matchers-test.cc.
-
-// IWYU pragma: private, include "gmock/gmock.h"
-// IWYU pragma: friend gmock/.*
-
-#ifndef GOOGLEMOCK_INCLUDE_GMOCK_GMOCK_MORE_MATCHERS_H_
-#define GOOGLEMOCK_INCLUDE_GMOCK_GMOCK_MORE_MATCHERS_H_
-
-#include <ostream>
-#include <string>
-
-#include "gmock/gmock-matchers.h"
-
-namespace testing {
-
-// Silence C4100 (unreferenced formal
-// parameter) for MSVC
-#ifdef _MSC_VER
-#pragma warning(push)
-#pragma warning(disable : 4100)
-#if (_MSC_VER == 1900)
-// and silence C4800 (C4800: 'int *const ': forcing value
-// to bool 'true' or 'false') for MSVC 14
-#pragma warning(disable : 4800)
-#endif
-#endif
-
-namespace internal {
-
-// Implements the polymorphic IsEmpty matcher, which
-// can be used as a Matcher<T> as long as T is either a container that defines
-// empty() and size() (e.g. std::vector or std::string), or a C-style string.
-class IsEmptyMatcher {
- public:
-  // Matches anything that defines empty() and size().
-  template <typename MatcheeContainerType>
-  bool MatchAndExplain(const MatcheeContainerType& c,
-                       MatchResultListener* listener) const {
-    if (c.empty()) {
-      return true;
-    }
-    *listener << "whose size is " << c.size();
-    return false;
-  }
-
-  // Matches C-style strings.
-  bool MatchAndExplain(const char* s, MatchResultListener* listener) const {
-    return MatchAndExplain(std::string(s), listener);
-  }
-
-  // Describes what this matcher matches.
-  void DescribeTo(std::ostream* os) const { *os << "is empty"; }
-
-  void DescribeNegationTo(std::ostream* os) const { *os << "isn't empty"; }
-};
-
-}  // namespace internal
-
-// Creates a polymorphic matcher that matches an empty container or C-style
-// string. The container must support both size() and empty(), which all
-// STL-like containers provide.
-inline PolymorphicMatcher<internal::IsEmptyMatcher> IsEmpty() {
-  return MakePolymorphicMatcher(internal::IsEmptyMatcher());
-}
-
-// Define a matcher that matches a value that evaluates in boolean
-// context to true.  Useful for types that define "explicit operator
-// bool" operators and so can't be compared for equality with true
-// and false.
-MATCHER(IsTrue, negation ? "is false" : "is true") {
-  return static_cast<bool>(arg);
-}
-
-// Define a matcher that matches a value that evaluates in boolean
-// context to false.  Useful for types that define "explicit operator
-// bool" operators and so can't be compared for equality with true
-// and false.
-MATCHER(IsFalse, negation ? "is true" : "is false") {
-  return !static_cast<bool>(arg);
-}
-
-#ifdef _MSC_VER
-#pragma warning(pop)
-#endif
-
-}  // namespace testing
-
-#endif  // GOOGLEMOCK_INCLUDE_GMOCK_GMOCK_MORE_MATCHERS_H_
diff --git a/3rdparty/googletest-1.13.0/googlemock/include/gmock/gmock-nice-strict.h b/3rdparty/googletest-1.13.0/googlemock/include/gmock/gmock-nice-strict.h
deleted file mode 100644
index 4f0eb35db72ada705bee4b6ac19926f0598003c1..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googlemock/include/gmock/gmock-nice-strict.h
+++ /dev/null
@@ -1,277 +0,0 @@
-// Copyright 2008, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// Implements class templates NiceMock, NaggyMock, and StrictMock.
-//
-// Given a mock class MockFoo that is created using Google Mock,
-// NiceMock<MockFoo> is a subclass of MockFoo that allows
-// uninteresting calls (i.e. calls to mock methods that have no
-// EXPECT_CALL specs), NaggyMock<MockFoo> is a subclass of MockFoo
-// that prints a warning when an uninteresting call occurs, and
-// StrictMock<MockFoo> is a subclass of MockFoo that treats all
-// uninteresting calls as errors.
-//
-// Currently a mock is naggy by default, so MockFoo and
-// NaggyMock<MockFoo> behave like the same.  However, we will soon
-// switch the default behavior of mocks to be nice, as that in general
-// leads to more maintainable tests.  When that happens, MockFoo will
-// stop behaving like NaggyMock<MockFoo> and start behaving like
-// NiceMock<MockFoo>.
-//
-// NiceMock, NaggyMock, and StrictMock "inherit" the constructors of
-// their respective base class.  Therefore you can write
-// NiceMock<MockFoo>(5, "a") to construct a nice mock where MockFoo
-// has a constructor that accepts (int, const char*), for example.
-//
-// A known limitation is that NiceMock<MockFoo>, NaggyMock<MockFoo>,
-// and StrictMock<MockFoo> only works for mock methods defined using
-// the MOCK_METHOD* family of macros DIRECTLY in the MockFoo class.
-// If a mock method is defined in a base class of MockFoo, the "nice"
-// or "strict" modifier may not affect it, depending on the compiler.
-// In particular, nesting NiceMock, NaggyMock, and StrictMock is NOT
-// supported.
-
-// IWYU pragma: private, include "gmock/gmock.h"
-// IWYU pragma: friend gmock/.*
-
-#ifndef GOOGLEMOCK_INCLUDE_GMOCK_GMOCK_NICE_STRICT_H_
-#define GOOGLEMOCK_INCLUDE_GMOCK_GMOCK_NICE_STRICT_H_
-
-#include <cstdint>
-#include <type_traits>
-
-#include "gmock/gmock-spec-builders.h"
-#include "gmock/internal/gmock-port.h"
-
-namespace testing {
-template <class MockClass>
-class NiceMock;
-template <class MockClass>
-class NaggyMock;
-template <class MockClass>
-class StrictMock;
-
-namespace internal {
-template <typename T>
-std::true_type StrictnessModifierProbe(const NiceMock<T>&);
-template <typename T>
-std::true_type StrictnessModifierProbe(const NaggyMock<T>&);
-template <typename T>
-std::true_type StrictnessModifierProbe(const StrictMock<T>&);
-std::false_type StrictnessModifierProbe(...);
-
-template <typename T>
-constexpr bool HasStrictnessModifier() {
-  return decltype(StrictnessModifierProbe(std::declval<const T&>()))::value;
-}
-
-// Base classes that register and deregister with testing::Mock to alter the
-// default behavior around uninteresting calls. Inheriting from one of these
-// classes first and then MockClass ensures the MockClass constructor is run
-// after registration, and that the MockClass destructor runs before
-// deregistration. This guarantees that MockClass's constructor and destructor
-// run with the same level of strictness as its instance methods.
-
-#if GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_MINGW && \
-    (defined(_MSC_VER) || defined(__clang__))
-// We need to mark these classes with this declspec to ensure that
-// the empty base class optimization is performed.
-#define GTEST_INTERNAL_EMPTY_BASE_CLASS __declspec(empty_bases)
-#else
-#define GTEST_INTERNAL_EMPTY_BASE_CLASS
-#endif
-
-template <typename Base>
-class NiceMockImpl {
- public:
-  NiceMockImpl() {
-    ::testing::Mock::AllowUninterestingCalls(reinterpret_cast<uintptr_t>(this));
-  }
-
-  ~NiceMockImpl() {
-    ::testing::Mock::UnregisterCallReaction(reinterpret_cast<uintptr_t>(this));
-  }
-};
-
-template <typename Base>
-class NaggyMockImpl {
- public:
-  NaggyMockImpl() {
-    ::testing::Mock::WarnUninterestingCalls(reinterpret_cast<uintptr_t>(this));
-  }
-
-  ~NaggyMockImpl() {
-    ::testing::Mock::UnregisterCallReaction(reinterpret_cast<uintptr_t>(this));
-  }
-};
-
-template <typename Base>
-class StrictMockImpl {
- public:
-  StrictMockImpl() {
-    ::testing::Mock::FailUninterestingCalls(reinterpret_cast<uintptr_t>(this));
-  }
-
-  ~StrictMockImpl() {
-    ::testing::Mock::UnregisterCallReaction(reinterpret_cast<uintptr_t>(this));
-  }
-};
-
-}  // namespace internal
-
-template <class MockClass>
-class GTEST_INTERNAL_EMPTY_BASE_CLASS NiceMock
-    : private internal::NiceMockImpl<MockClass>,
-      public MockClass {
- public:
-  static_assert(!internal::HasStrictnessModifier<MockClass>(),
-                "Can't apply NiceMock to a class hierarchy that already has a "
-                "strictness modifier. See "
-                "https://google.github.io/googletest/"
-                "gmock_cook_book.html#NiceStrictNaggy");
-  NiceMock() : MockClass() {
-    static_assert(sizeof(*this) == sizeof(MockClass),
-                  "The impl subclass shouldn't introduce any padding");
-  }
-
-  // Ideally, we would inherit base class's constructors through a using
-  // declaration, which would preserve their visibility. However, many existing
-  // tests rely on the fact that current implementation reexports protected
-  // constructors as public. These tests would need to be cleaned up first.
-
-  // Single argument constructor is special-cased so that it can be
-  // made explicit.
-  template <typename A>
-  explicit NiceMock(A&& arg) : MockClass(std::forward<A>(arg)) {
-    static_assert(sizeof(*this) == sizeof(MockClass),
-                  "The impl subclass shouldn't introduce any padding");
-  }
-
-  template <typename TArg1, typename TArg2, typename... An>
-  NiceMock(TArg1&& arg1, TArg2&& arg2, An&&... args)
-      : MockClass(std::forward<TArg1>(arg1), std::forward<TArg2>(arg2),
-                  std::forward<An>(args)...) {
-    static_assert(sizeof(*this) == sizeof(MockClass),
-                  "The impl subclass shouldn't introduce any padding");
-  }
-
- private:
-  NiceMock(const NiceMock&) = delete;
-  NiceMock& operator=(const NiceMock&) = delete;
-};
-
-template <class MockClass>
-class GTEST_INTERNAL_EMPTY_BASE_CLASS NaggyMock
-    : private internal::NaggyMockImpl<MockClass>,
-      public MockClass {
-  static_assert(!internal::HasStrictnessModifier<MockClass>(),
-                "Can't apply NaggyMock to a class hierarchy that already has a "
-                "strictness modifier. See "
-                "https://google.github.io/googletest/"
-                "gmock_cook_book.html#NiceStrictNaggy");
-
- public:
-  NaggyMock() : MockClass() {
-    static_assert(sizeof(*this) == sizeof(MockClass),
-                  "The impl subclass shouldn't introduce any padding");
-  }
-
-  // Ideally, we would inherit base class's constructors through a using
-  // declaration, which would preserve their visibility. However, many existing
-  // tests rely on the fact that current implementation reexports protected
-  // constructors as public. These tests would need to be cleaned up first.
-
-  // Single argument constructor is special-cased so that it can be
-  // made explicit.
-  template <typename A>
-  explicit NaggyMock(A&& arg) : MockClass(std::forward<A>(arg)) {
-    static_assert(sizeof(*this) == sizeof(MockClass),
-                  "The impl subclass shouldn't introduce any padding");
-  }
-
-  template <typename TArg1, typename TArg2, typename... An>
-  NaggyMock(TArg1&& arg1, TArg2&& arg2, An&&... args)
-      : MockClass(std::forward<TArg1>(arg1), std::forward<TArg2>(arg2),
-                  std::forward<An>(args)...) {
-    static_assert(sizeof(*this) == sizeof(MockClass),
-                  "The impl subclass shouldn't introduce any padding");
-  }
-
- private:
-  NaggyMock(const NaggyMock&) = delete;
-  NaggyMock& operator=(const NaggyMock&) = delete;
-};
-
-template <class MockClass>
-class GTEST_INTERNAL_EMPTY_BASE_CLASS StrictMock
-    : private internal::StrictMockImpl<MockClass>,
-      public MockClass {
- public:
-  static_assert(
-      !internal::HasStrictnessModifier<MockClass>(),
-      "Can't apply StrictMock to a class hierarchy that already has a "
-      "strictness modifier. See "
-      "https://google.github.io/googletest/"
-      "gmock_cook_book.html#NiceStrictNaggy");
-  StrictMock() : MockClass() {
-    static_assert(sizeof(*this) == sizeof(MockClass),
-                  "The impl subclass shouldn't introduce any padding");
-  }
-
-  // Ideally, we would inherit base class's constructors through a using
-  // declaration, which would preserve their visibility. However, many existing
-  // tests rely on the fact that current implementation reexports protected
-  // constructors as public. These tests would need to be cleaned up first.
-
-  // Single argument constructor is special-cased so that it can be
-  // made explicit.
-  template <typename A>
-  explicit StrictMock(A&& arg) : MockClass(std::forward<A>(arg)) {
-    static_assert(sizeof(*this) == sizeof(MockClass),
-                  "The impl subclass shouldn't introduce any padding");
-  }
-
-  template <typename TArg1, typename TArg2, typename... An>
-  StrictMock(TArg1&& arg1, TArg2&& arg2, An&&... args)
-      : MockClass(std::forward<TArg1>(arg1), std::forward<TArg2>(arg2),
-                  std::forward<An>(args)...) {
-    static_assert(sizeof(*this) == sizeof(MockClass),
-                  "The impl subclass shouldn't introduce any padding");
-  }
-
- private:
-  StrictMock(const StrictMock&) = delete;
-  StrictMock& operator=(const StrictMock&) = delete;
-};
-
-#undef GTEST_INTERNAL_EMPTY_BASE_CLASS
-
-}  // namespace testing
-
-#endif  // GOOGLEMOCK_INCLUDE_GMOCK_GMOCK_NICE_STRICT_H_
diff --git a/3rdparty/googletest-1.13.0/googlemock/include/gmock/gmock-spec-builders.h b/3rdparty/googletest-1.13.0/googlemock/include/gmock/gmock-spec-builders.h
deleted file mode 100644
index 4e498d8f54f272b2d50de534a7b320a2cc369dbc..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googlemock/include/gmock/gmock-spec-builders.h
+++ /dev/null
@@ -1,2111 +0,0 @@
-// Copyright 2007, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// Google Mock - a framework for writing C++ mock classes.
-//
-// This file implements the ON_CALL() and EXPECT_CALL() macros.
-//
-// A user can use the ON_CALL() macro to specify the default action of
-// a mock method.  The syntax is:
-//
-//   ON_CALL(mock_object, Method(argument-matchers))
-//       .With(multi-argument-matcher)
-//       .WillByDefault(action);
-//
-//  where the .With() clause is optional.
-//
-// A user can use the EXPECT_CALL() macro to specify an expectation on
-// a mock method.  The syntax is:
-//
-//   EXPECT_CALL(mock_object, Method(argument-matchers))
-//       .With(multi-argument-matchers)
-//       .Times(cardinality)
-//       .InSequence(sequences)
-//       .After(expectations)
-//       .WillOnce(action)
-//       .WillRepeatedly(action)
-//       .RetiresOnSaturation();
-//
-// where all clauses are optional, and .InSequence()/.After()/
-// .WillOnce() can appear any number of times.
-
-// IWYU pragma: private, include "gmock/gmock.h"
-// IWYU pragma: friend gmock/.*
-
-#ifndef GOOGLEMOCK_INCLUDE_GMOCK_GMOCK_SPEC_BUILDERS_H_
-#define GOOGLEMOCK_INCLUDE_GMOCK_GMOCK_SPEC_BUILDERS_H_
-
-#include <cstdint>
-#include <functional>
-#include <map>
-#include <memory>
-#include <ostream>
-#include <set>
-#include <sstream>
-#include <string>
-#include <type_traits>
-#include <utility>
-#include <vector>
-
-#include "gmock/gmock-actions.h"
-#include "gmock/gmock-cardinalities.h"
-#include "gmock/gmock-matchers.h"
-#include "gmock/internal/gmock-internal-utils.h"
-#include "gmock/internal/gmock-port.h"
-#include "gtest/gtest.h"
-
-#if GTEST_HAS_EXCEPTIONS
-#include <stdexcept>  // NOLINT
-#endif
-
-GTEST_DISABLE_MSC_WARNINGS_PUSH_(4251 \
-/* class A needs to have dll-interface to be used by clients of class B */)
-
-namespace testing {
-
-// An abstract handle of an expectation.
-class Expectation;
-
-// A set of expectation handles.
-class ExpectationSet;
-
-// Anything inside the 'internal' namespace IS INTERNAL IMPLEMENTATION
-// and MUST NOT BE USED IN USER CODE!!!
-namespace internal {
-
-// Implements a mock function.
-template <typename F>
-class FunctionMocker;
-
-// Base class for expectations.
-class ExpectationBase;
-
-// Implements an expectation.
-template <typename F>
-class TypedExpectation;
-
-// Helper class for testing the Expectation class template.
-class ExpectationTester;
-
-// Helper classes for implementing NiceMock, StrictMock, and NaggyMock.
-template <typename MockClass>
-class NiceMockImpl;
-template <typename MockClass>
-class StrictMockImpl;
-template <typename MockClass>
-class NaggyMockImpl;
-
-// Protects the mock object registry (in class Mock), all function
-// mockers, and all expectations.
-//
-// The reason we don't use more fine-grained protection is: when a
-// mock function Foo() is called, it needs to consult its expectations
-// to see which one should be picked.  If another thread is allowed to
-// call a mock function (either Foo() or a different one) at the same
-// time, it could affect the "retired" attributes of Foo()'s
-// expectations when InSequence() is used, and thus affect which
-// expectation gets picked.  Therefore, we sequence all mock function
-// calls to ensure the integrity of the mock objects' states.
-GTEST_API_ GTEST_DECLARE_STATIC_MUTEX_(g_gmock_mutex);
-
-// Abstract base class of FunctionMocker.  This is the
-// type-agnostic part of the function mocker interface.  Its pure
-// virtual methods are implemented by FunctionMocker.
-class GTEST_API_ UntypedFunctionMockerBase {
- public:
-  UntypedFunctionMockerBase();
-  virtual ~UntypedFunctionMockerBase();
-
-  // Verifies that all expectations on this mock function have been
-  // satisfied.  Reports one or more Google Test non-fatal failures
-  // and returns false if not.
-  bool VerifyAndClearExpectationsLocked()
-      GTEST_EXCLUSIVE_LOCK_REQUIRED_(g_gmock_mutex);
-
-  // Clears the ON_CALL()s set on this mock function.
-  virtual void ClearDefaultActionsLocked()
-      GTEST_EXCLUSIVE_LOCK_REQUIRED_(g_gmock_mutex) = 0;
-
-  // In all of the following Untyped* functions, it's the caller's
-  // responsibility to guarantee the correctness of the arguments'
-  // types.
-
-  // Writes a message that the call is uninteresting (i.e. neither
-  // explicitly expected nor explicitly unexpected) to the given
-  // ostream.
-  virtual void UntypedDescribeUninterestingCall(const void* untyped_args,
-                                                ::std::ostream* os) const
-      GTEST_LOCK_EXCLUDED_(g_gmock_mutex) = 0;
-
-  // Returns the expectation that matches the given function arguments
-  // (or NULL is there's no match); when a match is found,
-  // untyped_action is set to point to the action that should be
-  // performed (or NULL if the action is "do default"), and
-  // is_excessive is modified to indicate whether the call exceeds the
-  // expected number.
-  virtual const ExpectationBase* UntypedFindMatchingExpectation(
-      const void* untyped_args, const void** untyped_action, bool* is_excessive,
-      ::std::ostream* what, ::std::ostream* why)
-      GTEST_LOCK_EXCLUDED_(g_gmock_mutex) = 0;
-
-  // Prints the given function arguments to the ostream.
-  virtual void UntypedPrintArgs(const void* untyped_args,
-                                ::std::ostream* os) const = 0;
-
-  // Sets the mock object this mock method belongs to, and registers
-  // this information in the global mock registry.  Will be called
-  // whenever an EXPECT_CALL() or ON_CALL() is executed on this mock
-  // method.
-  void RegisterOwner(const void* mock_obj) GTEST_LOCK_EXCLUDED_(g_gmock_mutex);
-
-  // Sets the mock object this mock method belongs to, and sets the
-  // name of the mock function.  Will be called upon each invocation
-  // of this mock function.
-  void SetOwnerAndName(const void* mock_obj, const char* name)
-      GTEST_LOCK_EXCLUDED_(g_gmock_mutex);
-
-  // Returns the mock object this mock method belongs to.  Must be
-  // called after RegisterOwner() or SetOwnerAndName() has been
-  // called.
-  const void* MockObject() const GTEST_LOCK_EXCLUDED_(g_gmock_mutex);
-
-  // Returns the name of this mock method.  Must be called after
-  // SetOwnerAndName() has been called.
-  const char* Name() const GTEST_LOCK_EXCLUDED_(g_gmock_mutex);
-
- protected:
-  typedef std::vector<const void*> UntypedOnCallSpecs;
-
-  using UntypedExpectations = std::vector<std::shared_ptr<ExpectationBase>>;
-
-  // Returns an Expectation object that references and co-owns exp,
-  // which must be an expectation on this mock function.
-  Expectation GetHandleOf(ExpectationBase* exp);
-
-  // Address of the mock object this mock method belongs to.  Only
-  // valid after this mock method has been called or
-  // ON_CALL/EXPECT_CALL has been invoked on it.
-  const void* mock_obj_;  // Protected by g_gmock_mutex.
-
-  // Name of the function being mocked.  Only valid after this mock
-  // method has been called.
-  const char* name_;  // Protected by g_gmock_mutex.
-
-  // All default action specs for this function mocker.
-  UntypedOnCallSpecs untyped_on_call_specs_;
-
-  // All expectations for this function mocker.
-  //
-  // It's undefined behavior to interleave expectations (EXPECT_CALLs
-  // or ON_CALLs) and mock function calls.  Also, the order of
-  // expectations is important.  Therefore it's a logic race condition
-  // to read/write untyped_expectations_ concurrently.  In order for
-  // tools like tsan to catch concurrent read/write accesses to
-  // untyped_expectations, we deliberately leave accesses to it
-  // unprotected.
-  UntypedExpectations untyped_expectations_;
-};  // class UntypedFunctionMockerBase
-
-// Untyped base class for OnCallSpec<F>.
-class UntypedOnCallSpecBase {
- public:
-  // The arguments are the location of the ON_CALL() statement.
-  UntypedOnCallSpecBase(const char* a_file, int a_line)
-      : file_(a_file), line_(a_line), last_clause_(kNone) {}
-
-  // Where in the source file was the default action spec defined?
-  const char* file() const { return file_; }
-  int line() const { return line_; }
-
- protected:
-  // Gives each clause in the ON_CALL() statement a name.
-  enum Clause {
-    // Do not change the order of the enum members!  The run-time
-    // syntax checking relies on it.
-    kNone,
-    kWith,
-    kWillByDefault
-  };
-
-  // Asserts that the ON_CALL() statement has a certain property.
-  void AssertSpecProperty(bool property,
-                          const std::string& failure_message) const {
-    Assert(property, file_, line_, failure_message);
-  }
-
-  // Expects that the ON_CALL() statement has a certain property.
-  void ExpectSpecProperty(bool property,
-                          const std::string& failure_message) const {
-    Expect(property, file_, line_, failure_message);
-  }
-
-  const char* file_;
-  int line_;
-
-  // The last clause in the ON_CALL() statement as seen so far.
-  // Initially kNone and changes as the statement is parsed.
-  Clause last_clause_;
-};  // class UntypedOnCallSpecBase
-
-// This template class implements an ON_CALL spec.
-template <typename F>
-class OnCallSpec : public UntypedOnCallSpecBase {
- public:
-  typedef typename Function<F>::ArgumentTuple ArgumentTuple;
-  typedef typename Function<F>::ArgumentMatcherTuple ArgumentMatcherTuple;
-
-  // Constructs an OnCallSpec object from the information inside
-  // the parenthesis of an ON_CALL() statement.
-  OnCallSpec(const char* a_file, int a_line,
-             const ArgumentMatcherTuple& matchers)
-      : UntypedOnCallSpecBase(a_file, a_line),
-        matchers_(matchers),
-        // By default, extra_matcher_ should match anything.  However,
-        // we cannot initialize it with _ as that causes ambiguity between
-        // Matcher's copy and move constructor for some argument types.
-        extra_matcher_(A<const ArgumentTuple&>()) {}
-
-  // Implements the .With() clause.
-  OnCallSpec& With(const Matcher<const ArgumentTuple&>& m) {
-    // Makes sure this is called at most once.
-    ExpectSpecProperty(last_clause_ < kWith,
-                       ".With() cannot appear "
-                       "more than once in an ON_CALL().");
-    last_clause_ = kWith;
-
-    extra_matcher_ = m;
-    return *this;
-  }
-
-  // Implements the .WillByDefault() clause.
-  OnCallSpec& WillByDefault(const Action<F>& action) {
-    ExpectSpecProperty(last_clause_ < kWillByDefault,
-                       ".WillByDefault() must appear "
-                       "exactly once in an ON_CALL().");
-    last_clause_ = kWillByDefault;
-
-    ExpectSpecProperty(!action.IsDoDefault(),
-                       "DoDefault() cannot be used in ON_CALL().");
-    action_ = action;
-    return *this;
-  }
-
-  // Returns true if and only if the given arguments match the matchers.
-  bool Matches(const ArgumentTuple& args) const {
-    return TupleMatches(matchers_, args) && extra_matcher_.Matches(args);
-  }
-
-  // Returns the action specified by the user.
-  const Action<F>& GetAction() const {
-    AssertSpecProperty(last_clause_ == kWillByDefault,
-                       ".WillByDefault() must appear exactly "
-                       "once in an ON_CALL().");
-    return action_;
-  }
-
- private:
-  // The information in statement
-  //
-  //   ON_CALL(mock_object, Method(matchers))
-  //       .With(multi-argument-matcher)
-  //       .WillByDefault(action);
-  //
-  // is recorded in the data members like this:
-  //
-  //   source file that contains the statement => file_
-  //   line number of the statement            => line_
-  //   matchers                                => matchers_
-  //   multi-argument-matcher                  => extra_matcher_
-  //   action                                  => action_
-  ArgumentMatcherTuple matchers_;
-  Matcher<const ArgumentTuple&> extra_matcher_;
-  Action<F> action_;
-};  // class OnCallSpec
-
-// Possible reactions on uninteresting calls.
-enum CallReaction {
-  kAllow,
-  kWarn,
-  kFail,
-};
-
-}  // namespace internal
-
-// Utilities for manipulating mock objects.
-class GTEST_API_ Mock {
- public:
-  // The following public methods can be called concurrently.
-
-  // Tells Google Mock to ignore mock_obj when checking for leaked
-  // mock objects.
-  static void AllowLeak(const void* mock_obj)
-      GTEST_LOCK_EXCLUDED_(internal::g_gmock_mutex);
-
-  // Verifies and clears all expectations on the given mock object.
-  // If the expectations aren't satisfied, generates one or more
-  // Google Test non-fatal failures and returns false.
-  static bool VerifyAndClearExpectations(void* mock_obj)
-      GTEST_LOCK_EXCLUDED_(internal::g_gmock_mutex);
-
-  // Verifies all expectations on the given mock object and clears its
-  // default actions and expectations.  Returns true if and only if the
-  // verification was successful.
-  static bool VerifyAndClear(void* mock_obj)
-      GTEST_LOCK_EXCLUDED_(internal::g_gmock_mutex);
-
-  // Returns whether the mock was created as a naggy mock (default)
-  static bool IsNaggy(void* mock_obj)
-      GTEST_LOCK_EXCLUDED_(internal::g_gmock_mutex);
-  // Returns whether the mock was created as a nice mock
-  static bool IsNice(void* mock_obj)
-      GTEST_LOCK_EXCLUDED_(internal::g_gmock_mutex);
-  // Returns whether the mock was created as a strict mock
-  static bool IsStrict(void* mock_obj)
-      GTEST_LOCK_EXCLUDED_(internal::g_gmock_mutex);
-
- private:
-  friend class internal::UntypedFunctionMockerBase;
-
-  // Needed for a function mocker to register itself (so that we know
-  // how to clear a mock object).
-  template <typename F>
-  friend class internal::FunctionMocker;
-
-  template <typename MockClass>
-  friend class internal::NiceMockImpl;
-  template <typename MockClass>
-  friend class internal::NaggyMockImpl;
-  template <typename MockClass>
-  friend class internal::StrictMockImpl;
-
-  // Tells Google Mock to allow uninteresting calls on the given mock
-  // object.
-  static void AllowUninterestingCalls(uintptr_t mock_obj)
-      GTEST_LOCK_EXCLUDED_(internal::g_gmock_mutex);
-
-  // Tells Google Mock to warn the user about uninteresting calls on
-  // the given mock object.
-  static void WarnUninterestingCalls(uintptr_t mock_obj)
-      GTEST_LOCK_EXCLUDED_(internal::g_gmock_mutex);
-
-  // Tells Google Mock to fail uninteresting calls on the given mock
-  // object.
-  static void FailUninterestingCalls(uintptr_t mock_obj)
-      GTEST_LOCK_EXCLUDED_(internal::g_gmock_mutex);
-
-  // Tells Google Mock the given mock object is being destroyed and
-  // its entry in the call-reaction table should be removed.
-  static void UnregisterCallReaction(uintptr_t mock_obj)
-      GTEST_LOCK_EXCLUDED_(internal::g_gmock_mutex);
-
-  // Returns the reaction Google Mock will have on uninteresting calls
-  // made on the given mock object.
-  static internal::CallReaction GetReactionOnUninterestingCalls(
-      const void* mock_obj) GTEST_LOCK_EXCLUDED_(internal::g_gmock_mutex);
-
-  // Verifies that all expectations on the given mock object have been
-  // satisfied.  Reports one or more Google Test non-fatal failures
-  // and returns false if not.
-  static bool VerifyAndClearExpectationsLocked(void* mock_obj)
-      GTEST_EXCLUSIVE_LOCK_REQUIRED_(internal::g_gmock_mutex);
-
-  // Clears all ON_CALL()s set on the given mock object.
-  static void ClearDefaultActionsLocked(void* mock_obj)
-      GTEST_EXCLUSIVE_LOCK_REQUIRED_(internal::g_gmock_mutex);
-
-  // Registers a mock object and a mock method it owns.
-  static void Register(const void* mock_obj,
-                       internal::UntypedFunctionMockerBase* mocker)
-      GTEST_LOCK_EXCLUDED_(internal::g_gmock_mutex);
-
-  // Tells Google Mock where in the source code mock_obj is used in an
-  // ON_CALL or EXPECT_CALL.  In case mock_obj is leaked, this
-  // information helps the user identify which object it is.
-  static void RegisterUseByOnCallOrExpectCall(const void* mock_obj,
-                                              const char* file, int line)
-      GTEST_LOCK_EXCLUDED_(internal::g_gmock_mutex);
-
-  // Unregisters a mock method; removes the owning mock object from
-  // the registry when the last mock method associated with it has
-  // been unregistered.  This is called only in the destructor of
-  // FunctionMocker.
-  static void UnregisterLocked(internal::UntypedFunctionMockerBase* mocker)
-      GTEST_EXCLUSIVE_LOCK_REQUIRED_(internal::g_gmock_mutex);
-};  // class Mock
-
-// An abstract handle of an expectation.  Useful in the .After()
-// clause of EXPECT_CALL() for setting the (partial) order of
-// expectations.  The syntax:
-//
-//   Expectation e1 = EXPECT_CALL(...)...;
-//   EXPECT_CALL(...).After(e1)...;
-//
-// sets two expectations where the latter can only be matched after
-// the former has been satisfied.
-//
-// Notes:
-//   - This class is copyable and has value semantics.
-//   - Constness is shallow: a const Expectation object itself cannot
-//     be modified, but the mutable methods of the ExpectationBase
-//     object it references can be called via expectation_base().
-
-class GTEST_API_ Expectation {
- public:
-  // Constructs a null object that doesn't reference any expectation.
-  Expectation();
-  Expectation(Expectation&&) = default;
-  Expectation(const Expectation&) = default;
-  Expectation& operator=(Expectation&&) = default;
-  Expectation& operator=(const Expectation&) = default;
-  ~Expectation();
-
-  // This single-argument ctor must not be explicit, in order to support the
-  //   Expectation e = EXPECT_CALL(...);
-  // syntax.
-  //
-  // A TypedExpectation object stores its pre-requisites as
-  // Expectation objects, and needs to call the non-const Retire()
-  // method on the ExpectationBase objects they reference.  Therefore
-  // Expectation must receive a *non-const* reference to the
-  // ExpectationBase object.
-  Expectation(internal::ExpectationBase& exp);  // NOLINT
-
-  // The compiler-generated copy ctor and operator= work exactly as
-  // intended, so we don't need to define our own.
-
-  // Returns true if and only if rhs references the same expectation as this
-  // object does.
-  bool operator==(const Expectation& rhs) const {
-    return expectation_base_ == rhs.expectation_base_;
-  }
-
-  bool operator!=(const Expectation& rhs) const { return !(*this == rhs); }
-
- private:
-  friend class ExpectationSet;
-  friend class Sequence;
-  friend class ::testing::internal::ExpectationBase;
-  friend class ::testing::internal::UntypedFunctionMockerBase;
-
-  template <typename F>
-  friend class ::testing::internal::FunctionMocker;
-
-  template <typename F>
-  friend class ::testing::internal::TypedExpectation;
-
-  // This comparator is needed for putting Expectation objects into a set.
-  class Less {
-   public:
-    bool operator()(const Expectation& lhs, const Expectation& rhs) const {
-      return lhs.expectation_base_.get() < rhs.expectation_base_.get();
-    }
-  };
-
-  typedef ::std::set<Expectation, Less> Set;
-
-  Expectation(
-      const std::shared_ptr<internal::ExpectationBase>& expectation_base);
-
-  // Returns the expectation this object references.
-  const std::shared_ptr<internal::ExpectationBase>& expectation_base() const {
-    return expectation_base_;
-  }
-
-  // A shared_ptr that co-owns the expectation this handle references.
-  std::shared_ptr<internal::ExpectationBase> expectation_base_;
-};
-
-// A set of expectation handles.  Useful in the .After() clause of
-// EXPECT_CALL() for setting the (partial) order of expectations.  The
-// syntax:
-//
-//   ExpectationSet es;
-//   es += EXPECT_CALL(...)...;
-//   es += EXPECT_CALL(...)...;
-//   EXPECT_CALL(...).After(es)...;
-//
-// sets three expectations where the last one can only be matched
-// after the first two have both been satisfied.
-//
-// This class is copyable and has value semantics.
-class ExpectationSet {
- public:
-  // A bidirectional iterator that can read a const element in the set.
-  typedef Expectation::Set::const_iterator const_iterator;
-
-  // An object stored in the set.  This is an alias of Expectation.
-  typedef Expectation::Set::value_type value_type;
-
-  // Constructs an empty set.
-  ExpectationSet() {}
-
-  // This single-argument ctor must not be explicit, in order to support the
-  //   ExpectationSet es = EXPECT_CALL(...);
-  // syntax.
-  ExpectationSet(internal::ExpectationBase& exp) {  // NOLINT
-    *this += Expectation(exp);
-  }
-
-  // This single-argument ctor implements implicit conversion from
-  // Expectation and thus must not be explicit.  This allows either an
-  // Expectation or an ExpectationSet to be used in .After().
-  ExpectationSet(const Expectation& e) {  // NOLINT
-    *this += e;
-  }
-
-  // The compiler-generator ctor and operator= works exactly as
-  // intended, so we don't need to define our own.
-
-  // Returns true if and only if rhs contains the same set of Expectation
-  // objects as this does.
-  bool operator==(const ExpectationSet& rhs) const {
-    return expectations_ == rhs.expectations_;
-  }
-
-  bool operator!=(const ExpectationSet& rhs) const { return !(*this == rhs); }
-
-  // Implements the syntax
-  //   expectation_set += EXPECT_CALL(...);
-  ExpectationSet& operator+=(const Expectation& e) {
-    expectations_.insert(e);
-    return *this;
-  }
-
-  int size() const { return static_cast<int>(expectations_.size()); }
-
-  const_iterator begin() const { return expectations_.begin(); }
-  const_iterator end() const { return expectations_.end(); }
-
- private:
-  Expectation::Set expectations_;
-};
-
-// Sequence objects are used by a user to specify the relative order
-// in which the expectations should match.  They are copyable (we rely
-// on the compiler-defined copy constructor and assignment operator).
-class GTEST_API_ Sequence {
- public:
-  // Constructs an empty sequence.
-  Sequence() : last_expectation_(new Expectation) {}
-
-  // Adds an expectation to this sequence.  The caller must ensure
-  // that no other thread is accessing this Sequence object.
-  void AddExpectation(const Expectation& expectation) const;
-
- private:
-  // The last expectation in this sequence.
-  std::shared_ptr<Expectation> last_expectation_;
-};  // class Sequence
-
-// An object of this type causes all EXPECT_CALL() statements
-// encountered in its scope to be put in an anonymous sequence.  The
-// work is done in the constructor and destructor.  You should only
-// create an InSequence object on the stack.
-//
-// The sole purpose for this class is to support easy definition of
-// sequential expectations, e.g.
-//
-//   {
-//     InSequence dummy;  // The name of the object doesn't matter.
-//
-//     // The following expectations must match in the order they appear.
-//     EXPECT_CALL(a, Bar())...;
-//     EXPECT_CALL(a, Baz())...;
-//     ...
-//     EXPECT_CALL(b, Xyz())...;
-//   }
-//
-// You can create InSequence objects in multiple threads, as long as
-// they are used to affect different mock objects.  The idea is that
-// each thread can create and set up its own mocks as if it's the only
-// thread.  However, for clarity of your tests we recommend you to set
-// up mocks in the main thread unless you have a good reason not to do
-// so.
-class GTEST_API_ InSequence {
- public:
-  InSequence();
-  ~InSequence();
-
- private:
-  bool sequence_created_;
-
-  InSequence(const InSequence&) = delete;
-  InSequence& operator=(const InSequence&) = delete;
-};
-
-namespace internal {
-
-// Points to the implicit sequence introduced by a living InSequence
-// object (if any) in the current thread or NULL.
-GTEST_API_ extern ThreadLocal<Sequence*> g_gmock_implicit_sequence;
-
-// Base class for implementing expectations.
-//
-// There are two reasons for having a type-agnostic base class for
-// Expectation:
-//
-//   1. We need to store collections of expectations of different
-//   types (e.g. all pre-requisites of a particular expectation, all
-//   expectations in a sequence).  Therefore these expectation objects
-//   must share a common base class.
-//
-//   2. We can avoid binary code bloat by moving methods not depending
-//   on the template argument of Expectation to the base class.
-//
-// This class is internal and mustn't be used by user code directly.
-class GTEST_API_ ExpectationBase {
- public:
-  // source_text is the EXPECT_CALL(...) source that created this Expectation.
-  ExpectationBase(const char* file, int line, const std::string& source_text);
-
-  virtual ~ExpectationBase();
-
-  // Where in the source file was the expectation spec defined?
-  const char* file() const { return file_; }
-  int line() const { return line_; }
-  const char* source_text() const { return source_text_.c_str(); }
-  // Returns the cardinality specified in the expectation spec.
-  const Cardinality& cardinality() const { return cardinality_; }
-
-  // Describes the source file location of this expectation.
-  void DescribeLocationTo(::std::ostream* os) const {
-    *os << FormatFileLocation(file(), line()) << " ";
-  }
-
-  // Describes how many times a function call matching this
-  // expectation has occurred.
-  void DescribeCallCountTo(::std::ostream* os) const
-      GTEST_EXCLUSIVE_LOCK_REQUIRED_(g_gmock_mutex);
-
-  // If this mock method has an extra matcher (i.e. .With(matcher)),
-  // describes it to the ostream.
-  virtual void MaybeDescribeExtraMatcherTo(::std::ostream* os) = 0;
-
-  // Do not rely on this for correctness.
-  // This is only for making human-readable test output easier to understand.
-  void UntypedDescription(std::string description) {
-    description_ = std::move(description);
-  }
-
- protected:
-  friend class ::testing::Expectation;
-  friend class UntypedFunctionMockerBase;
-
-  enum Clause {
-    // Don't change the order of the enum members!
-    kNone,
-    kWith,
-    kTimes,
-    kInSequence,
-    kAfter,
-    kWillOnce,
-    kWillRepeatedly,
-    kRetiresOnSaturation
-  };
-
-  typedef std::vector<const void*> UntypedActions;
-
-  // Returns an Expectation object that references and co-owns this
-  // expectation.
-  virtual Expectation GetHandle() = 0;
-
-  // Asserts that the EXPECT_CALL() statement has the given property.
-  void AssertSpecProperty(bool property,
-                          const std::string& failure_message) const {
-    Assert(property, file_, line_, failure_message);
-  }
-
-  // Expects that the EXPECT_CALL() statement has the given property.
-  void ExpectSpecProperty(bool property,
-                          const std::string& failure_message) const {
-    Expect(property, file_, line_, failure_message);
-  }
-
-  // Explicitly specifies the cardinality of this expectation.  Used
-  // by the subclasses to implement the .Times() clause.
-  void SpecifyCardinality(const Cardinality& cardinality);
-
-  // Returns true if and only if the user specified the cardinality
-  // explicitly using a .Times().
-  bool cardinality_specified() const { return cardinality_specified_; }
-
-  // Sets the cardinality of this expectation spec.
-  void set_cardinality(const Cardinality& a_cardinality) {
-    cardinality_ = a_cardinality;
-  }
-
-  // The following group of methods should only be called after the
-  // EXPECT_CALL() statement, and only when g_gmock_mutex is held by
-  // the current thread.
-
-  // Retires all pre-requisites of this expectation.
-  void RetireAllPreRequisites() GTEST_EXCLUSIVE_LOCK_REQUIRED_(g_gmock_mutex);
-
-  // Returns true if and only if this expectation is retired.
-  bool is_retired() const GTEST_EXCLUSIVE_LOCK_REQUIRED_(g_gmock_mutex) {
-    g_gmock_mutex.AssertHeld();
-    return retired_;
-  }
-
-  // Retires this expectation.
-  void Retire() GTEST_EXCLUSIVE_LOCK_REQUIRED_(g_gmock_mutex) {
-    g_gmock_mutex.AssertHeld();
-    retired_ = true;
-  }
-
-  // Returns a human-readable description of this expectation.
-  // Do not rely on this for correctness. It is only for human readability.
-  const std::string& GetDescription() const { return description_; }
-
-  // Returns true if and only if this expectation is satisfied.
-  bool IsSatisfied() const GTEST_EXCLUSIVE_LOCK_REQUIRED_(g_gmock_mutex) {
-    g_gmock_mutex.AssertHeld();
-    return cardinality().IsSatisfiedByCallCount(call_count_);
-  }
-
-  // Returns true if and only if this expectation is saturated.
-  bool IsSaturated() const GTEST_EXCLUSIVE_LOCK_REQUIRED_(g_gmock_mutex) {
-    g_gmock_mutex.AssertHeld();
-    return cardinality().IsSaturatedByCallCount(call_count_);
-  }
-
-  // Returns true if and only if this expectation is over-saturated.
-  bool IsOverSaturated() const GTEST_EXCLUSIVE_LOCK_REQUIRED_(g_gmock_mutex) {
-    g_gmock_mutex.AssertHeld();
-    return cardinality().IsOverSaturatedByCallCount(call_count_);
-  }
-
-  // Returns true if and only if all pre-requisites of this expectation are
-  // satisfied.
-  bool AllPrerequisitesAreSatisfied() const
-      GTEST_EXCLUSIVE_LOCK_REQUIRED_(g_gmock_mutex);
-
-  // Adds unsatisfied pre-requisites of this expectation to 'result'.
-  void FindUnsatisfiedPrerequisites(ExpectationSet* result) const
-      GTEST_EXCLUSIVE_LOCK_REQUIRED_(g_gmock_mutex);
-
-  // Returns the number this expectation has been invoked.
-  int call_count() const GTEST_EXCLUSIVE_LOCK_REQUIRED_(g_gmock_mutex) {
-    g_gmock_mutex.AssertHeld();
-    return call_count_;
-  }
-
-  // Increments the number this expectation has been invoked.
-  void IncrementCallCount() GTEST_EXCLUSIVE_LOCK_REQUIRED_(g_gmock_mutex) {
-    g_gmock_mutex.AssertHeld();
-    call_count_++;
-  }
-
-  // Checks the action count (i.e. the number of WillOnce() and
-  // WillRepeatedly() clauses) against the cardinality if this hasn't
-  // been done before.  Prints a warning if there are too many or too
-  // few actions.
-  void CheckActionCountIfNotDone() const GTEST_LOCK_EXCLUDED_(mutex_);
-
-  friend class ::testing::Sequence;
-  friend class ::testing::internal::ExpectationTester;
-
-  template <typename Function>
-  friend class TypedExpectation;
-
-  // Implements the .Times() clause.
-  void UntypedTimes(const Cardinality& a_cardinality);
-
-  // This group of fields are part of the spec and won't change after
-  // an EXPECT_CALL() statement finishes.
-  const char* file_;               // The file that contains the expectation.
-  int line_;                       // The line number of the expectation.
-  const std::string source_text_;  // The EXPECT_CALL(...) source text.
-  std::string description_;        // User-readable name for the expectation.
-  // True if and only if the cardinality is specified explicitly.
-  bool cardinality_specified_;
-  Cardinality cardinality_;  // The cardinality of the expectation.
-  // The immediate pre-requisites (i.e. expectations that must be
-  // satisfied before this expectation can be matched) of this
-  // expectation.  We use std::shared_ptr in the set because we want an
-  // Expectation object to be co-owned by its FunctionMocker and its
-  // successors.  This allows multiple mock objects to be deleted at
-  // different times.
-  ExpectationSet immediate_prerequisites_;
-
-  // This group of fields are the current state of the expectation,
-  // and can change as the mock function is called.
-  int call_count_;  // How many times this expectation has been invoked.
-  bool retired_;    // True if and only if this expectation has retired.
-  UntypedActions untyped_actions_;
-  bool extra_matcher_specified_;
-  bool repeated_action_specified_;  // True if a WillRepeatedly() was specified.
-  bool retires_on_saturation_;
-  Clause last_clause_;
-  mutable bool action_count_checked_;  // Under mutex_.
-  mutable Mutex mutex_;                // Protects action_count_checked_.
-};                                     // class ExpectationBase
-
-template <typename F>
-class TypedExpectation;
-
-// Implements an expectation for the given function type.
-template <typename R, typename... Args>
-class TypedExpectation<R(Args...)> : public ExpectationBase {
- private:
-  using F = R(Args...);
-
- public:
-  typedef typename Function<F>::ArgumentTuple ArgumentTuple;
-  typedef typename Function<F>::ArgumentMatcherTuple ArgumentMatcherTuple;
-  typedef typename Function<F>::Result Result;
-
-  TypedExpectation(FunctionMocker<F>* owner, const char* a_file, int a_line,
-                   const std::string& a_source_text,
-                   const ArgumentMatcherTuple& m)
-      : ExpectationBase(a_file, a_line, a_source_text),
-        owner_(owner),
-        matchers_(m),
-        // By default, extra_matcher_ should match anything.  However,
-        // we cannot initialize it with _ as that causes ambiguity between
-        // Matcher's copy and move constructor for some argument types.
-        extra_matcher_(A<const ArgumentTuple&>()),
-        repeated_action_(DoDefault()) {}
-
-  ~TypedExpectation() override {
-    // Check the validity of the action count if it hasn't been done
-    // yet (for example, if the expectation was never used).
-    CheckActionCountIfNotDone();
-    for (UntypedActions::const_iterator it = untyped_actions_.begin();
-         it != untyped_actions_.end(); ++it) {
-      delete static_cast<const Action<F>*>(*it);
-    }
-  }
-
-  // Implements the .With() clause.
-  TypedExpectation& With(const Matcher<const ArgumentTuple&>& m) {
-    if (last_clause_ == kWith) {
-      ExpectSpecProperty(false,
-                         ".With() cannot appear "
-                         "more than once in an EXPECT_CALL().");
-    } else {
-      ExpectSpecProperty(last_clause_ < kWith,
-                         ".With() must be the first "
-                         "clause in an EXPECT_CALL().");
-    }
-    last_clause_ = kWith;
-
-    extra_matcher_ = m;
-    extra_matcher_specified_ = true;
-    return *this;
-  }
-
-  // Do not rely on this for correctness.
-  // This is only for making human-readable test output easier to understand.
-  TypedExpectation& Description(std::string name) {
-    ExpectationBase::UntypedDescription(std::move(name));
-    return *this;
-  }
-
-  // Implements the .Times() clause.
-  TypedExpectation& Times(const Cardinality& a_cardinality) {
-    ExpectationBase::UntypedTimes(a_cardinality);
-    return *this;
-  }
-
-  // Implements the .Times() clause.
-  TypedExpectation& Times(int n) { return Times(Exactly(n)); }
-
-  // Implements the .InSequence() clause.
-  TypedExpectation& InSequence(const Sequence& s) {
-    ExpectSpecProperty(last_clause_ <= kInSequence,
-                       ".InSequence() cannot appear after .After(),"
-                       " .WillOnce(), .WillRepeatedly(), or "
-                       ".RetiresOnSaturation().");
-    last_clause_ = kInSequence;
-
-    s.AddExpectation(GetHandle());
-    return *this;
-  }
-  TypedExpectation& InSequence(const Sequence& s1, const Sequence& s2) {
-    return InSequence(s1).InSequence(s2);
-  }
-  TypedExpectation& InSequence(const Sequence& s1, const Sequence& s2,
-                               const Sequence& s3) {
-    return InSequence(s1, s2).InSequence(s3);
-  }
-  TypedExpectation& InSequence(const Sequence& s1, const Sequence& s2,
-                               const Sequence& s3, const Sequence& s4) {
-    return InSequence(s1, s2, s3).InSequence(s4);
-  }
-  TypedExpectation& InSequence(const Sequence& s1, const Sequence& s2,
-                               const Sequence& s3, const Sequence& s4,
-                               const Sequence& s5) {
-    return InSequence(s1, s2, s3, s4).InSequence(s5);
-  }
-
-  // Implements that .After() clause.
-  TypedExpectation& After(const ExpectationSet& s) {
-    ExpectSpecProperty(last_clause_ <= kAfter,
-                       ".After() cannot appear after .WillOnce(),"
-                       " .WillRepeatedly(), or "
-                       ".RetiresOnSaturation().");
-    last_clause_ = kAfter;
-
-    for (ExpectationSet::const_iterator it = s.begin(); it != s.end(); ++it) {
-      immediate_prerequisites_ += *it;
-    }
-    return *this;
-  }
-  TypedExpectation& After(const ExpectationSet& s1, const ExpectationSet& s2) {
-    return After(s1).After(s2);
-  }
-  TypedExpectation& After(const ExpectationSet& s1, const ExpectationSet& s2,
-                          const ExpectationSet& s3) {
-    return After(s1, s2).After(s3);
-  }
-  TypedExpectation& After(const ExpectationSet& s1, const ExpectationSet& s2,
-                          const ExpectationSet& s3, const ExpectationSet& s4) {
-    return After(s1, s2, s3).After(s4);
-  }
-  TypedExpectation& After(const ExpectationSet& s1, const ExpectationSet& s2,
-                          const ExpectationSet& s3, const ExpectationSet& s4,
-                          const ExpectationSet& s5) {
-    return After(s1, s2, s3, s4).After(s5);
-  }
-
-  // Preferred, type-safe overload: consume anything that can be directly
-  // converted to a OnceAction, except for Action<F> objects themselves.
-  TypedExpectation& WillOnce(OnceAction<F> once_action) {
-    // Call the overload below, smuggling the OnceAction as a copyable callable.
-    // We know this is safe because a WillOnce action will not be called more
-    // than once.
-    return WillOnce(Action<F>(ActionAdaptor{
-        std::make_shared<OnceAction<F>>(std::move(once_action)),
-    }));
-  }
-
-  // Fallback overload: accept Action<F> objects and those actions that define
-  // `operator Action<F>` but not `operator OnceAction<F>`.
-  //
-  // This is templated in order to cause the overload above to be preferred
-  // when the input is convertible to either type.
-  template <int&... ExplicitArgumentBarrier, typename = void>
-  TypedExpectation& WillOnce(Action<F> action) {
-    ExpectSpecProperty(last_clause_ <= kWillOnce,
-                       ".WillOnce() cannot appear after "
-                       ".WillRepeatedly() or .RetiresOnSaturation().");
-    last_clause_ = kWillOnce;
-
-    untyped_actions_.push_back(new Action<F>(std::move(action)));
-
-    if (!cardinality_specified()) {
-      set_cardinality(Exactly(static_cast<int>(untyped_actions_.size())));
-    }
-    return *this;
-  }
-
-  // Implements the .WillRepeatedly() clause.
-  TypedExpectation& WillRepeatedly(const Action<F>& action) {
-    if (last_clause_ == kWillRepeatedly) {
-      ExpectSpecProperty(false,
-                         ".WillRepeatedly() cannot appear "
-                         "more than once in an EXPECT_CALL().");
-    } else {
-      ExpectSpecProperty(last_clause_ < kWillRepeatedly,
-                         ".WillRepeatedly() cannot appear "
-                         "after .RetiresOnSaturation().");
-    }
-    last_clause_ = kWillRepeatedly;
-    repeated_action_specified_ = true;
-
-    repeated_action_ = action;
-    if (!cardinality_specified()) {
-      set_cardinality(AtLeast(static_cast<int>(untyped_actions_.size())));
-    }
-
-    // Now that no more action clauses can be specified, we check
-    // whether their count makes sense.
-    CheckActionCountIfNotDone();
-    return *this;
-  }
-
-  // Implements the .RetiresOnSaturation() clause.
-  TypedExpectation& RetiresOnSaturation() {
-    ExpectSpecProperty(last_clause_ < kRetiresOnSaturation,
-                       ".RetiresOnSaturation() cannot appear "
-                       "more than once.");
-    last_clause_ = kRetiresOnSaturation;
-    retires_on_saturation_ = true;
-
-    // Now that no more action clauses can be specified, we check
-    // whether their count makes sense.
-    CheckActionCountIfNotDone();
-    return *this;
-  }
-
-  // Returns the matchers for the arguments as specified inside the
-  // EXPECT_CALL() macro.
-  const ArgumentMatcherTuple& matchers() const { return matchers_; }
-
-  // Returns the matcher specified by the .With() clause.
-  const Matcher<const ArgumentTuple&>& extra_matcher() const {
-    return extra_matcher_;
-  }
-
-  // Returns the action specified by the .WillRepeatedly() clause.
-  const Action<F>& repeated_action() const { return repeated_action_; }
-
-  // If this mock method has an extra matcher (i.e. .With(matcher)),
-  // describes it to the ostream.
-  void MaybeDescribeExtraMatcherTo(::std::ostream* os) override {
-    if (extra_matcher_specified_) {
-      *os << "    Expected args: ";
-      extra_matcher_.DescribeTo(os);
-      *os << "\n";
-    }
-  }
-
- private:
-  template <typename Function>
-  friend class FunctionMocker;
-
-  // An adaptor that turns a OneAction<F> into something compatible with
-  // Action<F>. Must be called at most once.
-  struct ActionAdaptor {
-    std::shared_ptr<OnceAction<R(Args...)>> once_action;
-
-    R operator()(Args&&... args) const {
-      return std::move(*once_action).Call(std::forward<Args>(args)...);
-    }
-  };
-
-  // Returns an Expectation object that references and co-owns this
-  // expectation.
-  Expectation GetHandle() override { return owner_->GetHandleOf(this); }
-
-  // The following methods will be called only after the EXPECT_CALL()
-  // statement finishes and when the current thread holds
-  // g_gmock_mutex.
-
-  // Returns true if and only if this expectation matches the given arguments.
-  bool Matches(const ArgumentTuple& args) const
-      GTEST_EXCLUSIVE_LOCK_REQUIRED_(g_gmock_mutex) {
-    g_gmock_mutex.AssertHeld();
-    return TupleMatches(matchers_, args) && extra_matcher_.Matches(args);
-  }
-
-  // Returns true if and only if this expectation should handle the given
-  // arguments.
-  bool ShouldHandleArguments(const ArgumentTuple& args) const
-      GTEST_EXCLUSIVE_LOCK_REQUIRED_(g_gmock_mutex) {
-    g_gmock_mutex.AssertHeld();
-
-    // In case the action count wasn't checked when the expectation
-    // was defined (e.g. if this expectation has no WillRepeatedly()
-    // or RetiresOnSaturation() clause), we check it when the
-    // expectation is used for the first time.
-    CheckActionCountIfNotDone();
-    return !is_retired() && AllPrerequisitesAreSatisfied() && Matches(args);
-  }
-
-  // Describes the result of matching the arguments against this
-  // expectation to the given ostream.
-  void ExplainMatchResultTo(const ArgumentTuple& args, ::std::ostream* os) const
-      GTEST_EXCLUSIVE_LOCK_REQUIRED_(g_gmock_mutex) {
-    g_gmock_mutex.AssertHeld();
-
-    if (is_retired()) {
-      *os << "         Expected: the expectation is active\n"
-          << "           Actual: it is retired\n";
-    } else if (!Matches(args)) {
-      if (!TupleMatches(matchers_, args)) {
-        ExplainMatchFailureTupleTo(matchers_, args, os);
-      }
-      StringMatchResultListener listener;
-      if (!extra_matcher_.MatchAndExplain(args, &listener)) {
-        *os << "    Expected args: ";
-        extra_matcher_.DescribeTo(os);
-        *os << "\n           Actual: don't match";
-
-        internal::PrintIfNotEmpty(listener.str(), os);
-        *os << "\n";
-      }
-    } else if (!AllPrerequisitesAreSatisfied()) {
-      *os << "         Expected: all pre-requisites are satisfied\n"
-          << "           Actual: the following immediate pre-requisites "
-          << "are not satisfied:\n";
-      ExpectationSet unsatisfied_prereqs;
-      FindUnsatisfiedPrerequisites(&unsatisfied_prereqs);
-      int i = 0;
-      for (ExpectationSet::const_iterator it = unsatisfied_prereqs.begin();
-           it != unsatisfied_prereqs.end(); ++it) {
-        it->expectation_base()->DescribeLocationTo(os);
-        *os << "pre-requisite #" << i++ << "\n";
-      }
-      *os << "                   (end of pre-requisites)\n";
-    } else {
-      // This line is here just for completeness' sake.  It will never
-      // be executed as currently the ExplainMatchResultTo() function
-      // is called only when the mock function call does NOT match the
-      // expectation.
-      *os << "The call matches the expectation.\n";
-    }
-  }
-
-  // Returns the action that should be taken for the current invocation.
-  const Action<F>& GetCurrentAction(const FunctionMocker<F>* mocker,
-                                    const ArgumentTuple& args) const
-      GTEST_EXCLUSIVE_LOCK_REQUIRED_(g_gmock_mutex) {
-    g_gmock_mutex.AssertHeld();
-    const int count = call_count();
-    Assert(count >= 1, __FILE__, __LINE__,
-           "call_count() is <= 0 when GetCurrentAction() is "
-           "called - this should never happen.");
-
-    const int action_count = static_cast<int>(untyped_actions_.size());
-    if (action_count > 0 && !repeated_action_specified_ &&
-        count > action_count) {
-      // If there is at least one WillOnce() and no WillRepeatedly(),
-      // we warn the user when the WillOnce() clauses ran out.
-      ::std::stringstream ss;
-      DescribeLocationTo(&ss);
-      ss << "Actions ran out in " << source_text() << "...\n"
-         << "Called " << count << " times, but only " << action_count
-         << " WillOnce()" << (action_count == 1 ? " is" : "s are")
-         << " specified - ";
-      mocker->DescribeDefaultActionTo(args, &ss);
-      Log(kWarning, ss.str(), 1);
-    }
-
-    return count <= action_count
-               ? *static_cast<const Action<F>*>(
-                     untyped_actions_[static_cast<size_t>(count - 1)])
-               : repeated_action();
-  }
-
-  // Given the arguments of a mock function call, if the call will
-  // over-saturate this expectation, returns the default action;
-  // otherwise, returns the next action in this expectation.  Also
-  // describes *what* happened to 'what', and explains *why* Google
-  // Mock does it to 'why'.  This method is not const as it calls
-  // IncrementCallCount().  A return value of NULL means the default
-  // action.
-  const Action<F>* GetActionForArguments(const FunctionMocker<F>* mocker,
-                                         const ArgumentTuple& args,
-                                         ::std::ostream* what,
-                                         ::std::ostream* why)
-      GTEST_EXCLUSIVE_LOCK_REQUIRED_(g_gmock_mutex) {
-    g_gmock_mutex.AssertHeld();
-    const ::std::string& expectation_description = GetDescription();
-    if (IsSaturated()) {
-      // We have an excessive call.
-      IncrementCallCount();
-      *what << "Mock function ";
-      if (!expectation_description.empty()) {
-        *what << "\"" << expectation_description << "\" ";
-      }
-      *what << "called more times than expected - ";
-      mocker->DescribeDefaultActionTo(args, what);
-      DescribeCallCountTo(why);
-
-      return nullptr;
-    }
-
-    IncrementCallCount();
-    RetireAllPreRequisites();
-
-    if (retires_on_saturation_ && IsSaturated()) {
-      Retire();
-    }
-
-    // Must be done after IncrementCount()!
-    *what << "Mock function ";
-    if (!expectation_description.empty()) {
-      *what << "\"" << expectation_description << "\" ";
-    }
-    *what << "call matches " << source_text() << "...\n";
-    return &(GetCurrentAction(mocker, args));
-  }
-
-  // All the fields below won't change once the EXPECT_CALL()
-  // statement finishes.
-  FunctionMocker<F>* const owner_;
-  ArgumentMatcherTuple matchers_;
-  Matcher<const ArgumentTuple&> extra_matcher_;
-  Action<F> repeated_action_;
-
-  TypedExpectation(const TypedExpectation&) = delete;
-  TypedExpectation& operator=(const TypedExpectation&) = delete;
-};  // class TypedExpectation
-
-// A MockSpec object is used by ON_CALL() or EXPECT_CALL() for
-// specifying the default behavior of, or expectation on, a mock
-// function.
-
-// Note: class MockSpec really belongs to the ::testing namespace.
-// However if we define it in ::testing, MSVC will complain when
-// classes in ::testing::internal declare it as a friend class
-// template.  To workaround this compiler bug, we define MockSpec in
-// ::testing::internal and import it into ::testing.
-
-// Logs a message including file and line number information.
-GTEST_API_ void LogWithLocation(testing::internal::LogSeverity severity,
-                                const char* file, int line,
-                                const std::string& message);
-
-template <typename F>
-class MockSpec {
- public:
-  typedef typename internal::Function<F>::ArgumentTuple ArgumentTuple;
-  typedef
-      typename internal::Function<F>::ArgumentMatcherTuple ArgumentMatcherTuple;
-
-  // Constructs a MockSpec object, given the function mocker object
-  // that the spec is associated with.
-  MockSpec(internal::FunctionMocker<F>* function_mocker,
-           const ArgumentMatcherTuple& matchers)
-      : function_mocker_(function_mocker), matchers_(matchers) {}
-
-  // Adds a new default action spec to the function mocker and returns
-  // the newly created spec.
-  internal::OnCallSpec<F>& InternalDefaultActionSetAt(const char* file,
-                                                      int line, const char* obj,
-                                                      const char* call) {
-    LogWithLocation(internal::kInfo, file, line,
-                    std::string("ON_CALL(") + obj + ", " + call + ") invoked");
-    return function_mocker_->AddNewOnCallSpec(file, line, matchers_);
-  }
-
-  // Adds a new expectation spec to the function mocker and returns
-  // the newly created spec.
-  internal::TypedExpectation<F>& InternalExpectedAt(const char* file, int line,
-                                                    const char* obj,
-                                                    const char* call) {
-    const std::string source_text(std::string("EXPECT_CALL(") + obj + ", " +
-                                  call + ")");
-    LogWithLocation(internal::kInfo, file, line, source_text + " invoked");
-    return function_mocker_->AddNewExpectation(file, line, source_text,
-                                               matchers_);
-  }
-
-  // This operator overload is used to swallow the superfluous parameter list
-  // introduced by the ON/EXPECT_CALL macros. See the macro comments for more
-  // explanation.
-  MockSpec<F>& operator()(const internal::WithoutMatchers&, void* const) {
-    return *this;
-  }
-
- private:
-  template <typename Function>
-  friend class internal::FunctionMocker;
-
-  // The function mocker that owns this spec.
-  internal::FunctionMocker<F>* const function_mocker_;
-  // The argument matchers specified in the spec.
-  ArgumentMatcherTuple matchers_;
-};  // class MockSpec
-
-// Wrapper type for generically holding an ordinary value or lvalue reference.
-// If T is not a reference type, it must be copyable or movable.
-// ReferenceOrValueWrapper<T> is movable, and will also be copyable unless
-// T is a move-only value type (which means that it will always be copyable
-// if the current platform does not support move semantics).
-//
-// The primary template defines handling for values, but function header
-// comments describe the contract for the whole template (including
-// specializations).
-template <typename T>
-class ReferenceOrValueWrapper {
- public:
-  // Constructs a wrapper from the given value/reference.
-  explicit ReferenceOrValueWrapper(T value) : value_(std::move(value)) {}
-
-  // Unwraps and returns the underlying value/reference, exactly as
-  // originally passed. The behavior of calling this more than once on
-  // the same object is unspecified.
-  T Unwrap() { return std::move(value_); }
-
-  // Provides nondestructive access to the underlying value/reference.
-  // Always returns a const reference (more precisely,
-  // const std::add_lvalue_reference<T>::type). The behavior of calling this
-  // after calling Unwrap on the same object is unspecified.
-  const T& Peek() const { return value_; }
-
- private:
-  T value_;
-};
-
-// Specialization for lvalue reference types. See primary template
-// for documentation.
-template <typename T>
-class ReferenceOrValueWrapper<T&> {
- public:
-  // Workaround for debatable pass-by-reference lint warning (c-library-team
-  // policy precludes NOLINT in this context)
-  typedef T& reference;
-  explicit ReferenceOrValueWrapper(reference ref) : value_ptr_(&ref) {}
-  T& Unwrap() { return *value_ptr_; }
-  const T& Peek() const { return *value_ptr_; }
-
- private:
-  T* value_ptr_;
-};
-
-// Prints the held value as an action's result to os.
-template <typename T>
-void PrintAsActionResult(const T& result, std::ostream& os) {
-  os << "\n          Returns: ";
-  // T may be a reference type, so we don't use UniversalPrint().
-  UniversalPrinter<T>::Print(result, &os);
-}
-
-// Reports an uninteresting call (whose description is in msg) in the
-// manner specified by 'reaction'.
-GTEST_API_ void ReportUninterestingCall(CallReaction reaction,
-                                        const std::string& msg);
-
-// A generic RAII type that runs a user-provided function in its destructor.
-class Cleanup final {
- public:
-  explicit Cleanup(std::function<void()> f) : f_(std::move(f)) {}
-  ~Cleanup() { f_(); }
-
- private:
-  std::function<void()> f_;
-};
-
-template <typename F>
-class FunctionMocker;
-
-template <typename R, typename... Args>
-class FunctionMocker<R(Args...)> final : public UntypedFunctionMockerBase {
-  using F = R(Args...);
-
- public:
-  using Result = R;
-  using ArgumentTuple = std::tuple<Args...>;
-  using ArgumentMatcherTuple = std::tuple<Matcher<Args>...>;
-
-  FunctionMocker() {}
-
-  // There is no generally useful and implementable semantics of
-  // copying a mock object, so copying a mock is usually a user error.
-  // Thus we disallow copying function mockers.  If the user really
-  // wants to copy a mock object, they should implement their own copy
-  // operation, for example:
-  //
-  //   class MockFoo : public Foo {
-  //    public:
-  //     // Defines a copy constructor explicitly.
-  //     MockFoo(const MockFoo& src) {}
-  //     ...
-  //   };
-  FunctionMocker(const FunctionMocker&) = delete;
-  FunctionMocker& operator=(const FunctionMocker&) = delete;
-
-  // The destructor verifies that all expectations on this mock
-  // function have been satisfied.  If not, it will report Google Test
-  // non-fatal failures for the violations.
-  ~FunctionMocker() override GTEST_LOCK_EXCLUDED_(g_gmock_mutex) {
-    MutexLock l(&g_gmock_mutex);
-    VerifyAndClearExpectationsLocked();
-    Mock::UnregisterLocked(this);
-    ClearDefaultActionsLocked();
-  }
-
-  // Returns the ON_CALL spec that matches this mock function with the
-  // given arguments; returns NULL if no matching ON_CALL is found.
-  // L = *
-  const OnCallSpec<F>* FindOnCallSpec(const ArgumentTuple& args) const {
-    for (UntypedOnCallSpecs::const_reverse_iterator it =
-             untyped_on_call_specs_.rbegin();
-         it != untyped_on_call_specs_.rend(); ++it) {
-      const OnCallSpec<F>* spec = static_cast<const OnCallSpec<F>*>(*it);
-      if (spec->Matches(args)) return spec;
-    }
-
-    return nullptr;
-  }
-
-  // Performs the default action of this mock function on the given
-  // arguments and returns the result. Asserts (or throws if
-  // exceptions are enabled) with a helpful call description if there
-  // is no valid return value. This method doesn't depend on the
-  // mutable state of this object, and thus can be called concurrently
-  // without locking.
-  // L = *
-  Result PerformDefaultAction(ArgumentTuple&& args,
-                              const std::string& call_description) const {
-    const OnCallSpec<F>* const spec = this->FindOnCallSpec(args);
-    if (spec != nullptr) {
-      return spec->GetAction().Perform(std::move(args));
-    }
-    const std::string message =
-        call_description +
-        "\n    The mock function has no default action "
-        "set, and its return type has no default value set.";
-#if GTEST_HAS_EXCEPTIONS
-    if (!DefaultValue<Result>::Exists()) {
-      throw std::runtime_error(message);
-    }
-#else
-    Assert(DefaultValue<Result>::Exists(), "", -1, message);
-#endif
-    return DefaultValue<Result>::Get();
-  }
-
-  // Implements UntypedFunctionMockerBase::ClearDefaultActionsLocked():
-  // clears the ON_CALL()s set on this mock function.
-  void ClearDefaultActionsLocked() override
-      GTEST_EXCLUSIVE_LOCK_REQUIRED_(g_gmock_mutex) {
-    g_gmock_mutex.AssertHeld();
-
-    // Deleting our default actions may trigger other mock objects to be
-    // deleted, for example if an action contains a reference counted smart
-    // pointer to that mock object, and that is the last reference. So if we
-    // delete our actions within the context of the global mutex we may deadlock
-    // when this method is called again. Instead, make a copy of the set of
-    // actions to delete, clear our set within the mutex, and then delete the
-    // actions outside of the mutex.
-    UntypedOnCallSpecs specs_to_delete;
-    untyped_on_call_specs_.swap(specs_to_delete);
-
-    g_gmock_mutex.Unlock();
-    for (UntypedOnCallSpecs::const_iterator it = specs_to_delete.begin();
-         it != specs_to_delete.end(); ++it) {
-      delete static_cast<const OnCallSpec<F>*>(*it);
-    }
-
-    // Lock the mutex again, since the caller expects it to be locked when we
-    // return.
-    g_gmock_mutex.Lock();
-  }
-
-  // Returns the result of invoking this mock function with the given
-  // arguments.  This function can be safely called from multiple
-  // threads concurrently.
-  Result Invoke(Args... args) GTEST_LOCK_EXCLUDED_(g_gmock_mutex) {
-    return InvokeWith(ArgumentTuple(std::forward<Args>(args)...));
-  }
-
-  MockSpec<F> With(Matcher<Args>... m) {
-    return MockSpec<F>(this, ::std::make_tuple(std::move(m)...));
-  }
-
- protected:
-  template <typename Function>
-  friend class MockSpec;
-
-  // Adds and returns a default action spec for this mock function.
-  OnCallSpec<F>& AddNewOnCallSpec(const char* file, int line,
-                                  const ArgumentMatcherTuple& m)
-      GTEST_LOCK_EXCLUDED_(g_gmock_mutex) {
-    Mock::RegisterUseByOnCallOrExpectCall(MockObject(), file, line);
-    OnCallSpec<F>* const on_call_spec = new OnCallSpec<F>(file, line, m);
-    untyped_on_call_specs_.push_back(on_call_spec);
-    return *on_call_spec;
-  }
-
-  // Adds and returns an expectation spec for this mock function.
-  TypedExpectation<F>& AddNewExpectation(const char* file, int line,
-                                         const std::string& source_text,
-                                         const ArgumentMatcherTuple& m)
-      GTEST_LOCK_EXCLUDED_(g_gmock_mutex) {
-    Mock::RegisterUseByOnCallOrExpectCall(MockObject(), file, line);
-    TypedExpectation<F>* const expectation =
-        new TypedExpectation<F>(this, file, line, source_text, m);
-    const std::shared_ptr<ExpectationBase> untyped_expectation(expectation);
-    // See the definition of untyped_expectations_ for why access to
-    // it is unprotected here.
-    untyped_expectations_.push_back(untyped_expectation);
-
-    // Adds this expectation into the implicit sequence if there is one.
-    Sequence* const implicit_sequence = g_gmock_implicit_sequence.get();
-    if (implicit_sequence != nullptr) {
-      implicit_sequence->AddExpectation(Expectation(untyped_expectation));
-    }
-
-    return *expectation;
-  }
-
- private:
-  template <typename Func>
-  friend class TypedExpectation;
-
-  // Some utilities needed for implementing UntypedInvokeWith().
-
-  // Describes what default action will be performed for the given
-  // arguments.
-  // L = *
-  void DescribeDefaultActionTo(const ArgumentTuple& args,
-                               ::std::ostream* os) const {
-    const OnCallSpec<F>* const spec = FindOnCallSpec(args);
-
-    if (spec == nullptr) {
-      *os << (std::is_void<Result>::value ? "returning directly.\n"
-                                          : "returning default value.\n");
-    } else {
-      *os << "taking default action specified at:\n"
-          << FormatFileLocation(spec->file(), spec->line()) << "\n";
-    }
-  }
-
-  // Writes a message that the call is uninteresting (i.e. neither
-  // explicitly expected nor explicitly unexpected) to the given
-  // ostream.
-  void UntypedDescribeUninterestingCall(const void* untyped_args,
-                                        ::std::ostream* os) const override
-      GTEST_LOCK_EXCLUDED_(g_gmock_mutex) {
-    const ArgumentTuple& args =
-        *static_cast<const ArgumentTuple*>(untyped_args);
-    *os << "Uninteresting mock function call - ";
-    DescribeDefaultActionTo(args, os);
-    *os << "    Function call: " << Name();
-    UniversalPrint(args, os);
-  }
-
-  // Returns the expectation that matches the given function arguments
-  // (or NULL is there's no match); when a match is found,
-  // untyped_action is set to point to the action that should be
-  // performed (or NULL if the action is "do default"), and
-  // is_excessive is modified to indicate whether the call exceeds the
-  // expected number.
-  //
-  // Critical section: We must find the matching expectation and the
-  // corresponding action that needs to be taken in an ATOMIC
-  // transaction.  Otherwise another thread may call this mock
-  // method in the middle and mess up the state.
-  //
-  // However, performing the action has to be left out of the critical
-  // section.  The reason is that we have no control on what the
-  // action does (it can invoke an arbitrary user function or even a
-  // mock function) and excessive locking could cause a dead lock.
-  const ExpectationBase* UntypedFindMatchingExpectation(
-      const void* untyped_args, const void** untyped_action, bool* is_excessive,
-      ::std::ostream* what, ::std::ostream* why) override
-      GTEST_LOCK_EXCLUDED_(g_gmock_mutex) {
-    const ArgumentTuple& args =
-        *static_cast<const ArgumentTuple*>(untyped_args);
-    MutexLock l(&g_gmock_mutex);
-    TypedExpectation<F>* exp = this->FindMatchingExpectationLocked(args);
-    if (exp == nullptr) {  // A match wasn't found.
-      this->FormatUnexpectedCallMessageLocked(args, what, why);
-      return nullptr;
-    }
-
-    // This line must be done before calling GetActionForArguments(),
-    // which will increment the call count for *exp and thus affect
-    // its saturation status.
-    *is_excessive = exp->IsSaturated();
-    const Action<F>* action = exp->GetActionForArguments(this, args, what, why);
-    if (action != nullptr && action->IsDoDefault())
-      action = nullptr;  // Normalize "do default" to NULL.
-    *untyped_action = action;
-    return exp;
-  }
-
-  // Prints the given function arguments to the ostream.
-  void UntypedPrintArgs(const void* untyped_args,
-                        ::std::ostream* os) const override {
-    const ArgumentTuple& args =
-        *static_cast<const ArgumentTuple*>(untyped_args);
-    UniversalPrint(args, os);
-  }
-
-  // Returns the expectation that matches the arguments, or NULL if no
-  // expectation matches them.
-  TypedExpectation<F>* FindMatchingExpectationLocked(const ArgumentTuple& args)
-      const GTEST_EXCLUSIVE_LOCK_REQUIRED_(g_gmock_mutex) {
-    g_gmock_mutex.AssertHeld();
-    // See the definition of untyped_expectations_ for why access to
-    // it is unprotected here.
-    for (typename UntypedExpectations::const_reverse_iterator it =
-             untyped_expectations_.rbegin();
-         it != untyped_expectations_.rend(); ++it) {
-      TypedExpectation<F>* const exp =
-          static_cast<TypedExpectation<F>*>(it->get());
-      if (exp->ShouldHandleArguments(args)) {
-        return exp;
-      }
-    }
-    return nullptr;
-  }
-
-  // Returns a message that the arguments don't match any expectation.
-  void FormatUnexpectedCallMessageLocked(const ArgumentTuple& args,
-                                         ::std::ostream* os,
-                                         ::std::ostream* why) const
-      GTEST_EXCLUSIVE_LOCK_REQUIRED_(g_gmock_mutex) {
-    g_gmock_mutex.AssertHeld();
-    *os << "\nUnexpected mock function call - ";
-    DescribeDefaultActionTo(args, os);
-    PrintTriedExpectationsLocked(args, why);
-  }
-
-  // Prints a list of expectations that have been tried against the
-  // current mock function call.
-  void PrintTriedExpectationsLocked(const ArgumentTuple& args,
-                                    ::std::ostream* why) const
-      GTEST_EXCLUSIVE_LOCK_REQUIRED_(g_gmock_mutex) {
-    g_gmock_mutex.AssertHeld();
-    const size_t count = untyped_expectations_.size();
-    *why << "Google Mock tried the following " << count << " "
-         << (count == 1 ? "expectation, but it didn't match"
-                        : "expectations, but none matched")
-         << ":\n";
-    for (size_t i = 0; i < count; i++) {
-      TypedExpectation<F>* const expectation =
-          static_cast<TypedExpectation<F>*>(untyped_expectations_[i].get());
-      *why << "\n";
-      expectation->DescribeLocationTo(why);
-      if (count > 1) {
-        *why << "tried expectation #" << i << ": ";
-      }
-      *why << expectation->source_text() << "...\n";
-      expectation->ExplainMatchResultTo(args, why);
-      expectation->DescribeCallCountTo(why);
-    }
-  }
-
-  // Performs the given action (or the default if it's null) with the given
-  // arguments and returns the action's result.
-  // L = *
-  R PerformAction(const void* untyped_action, ArgumentTuple&& args,
-                  const std::string& call_description) const {
-    if (untyped_action == nullptr) {
-      return PerformDefaultAction(std::move(args), call_description);
-    }
-
-    // Make a copy of the action before performing it, in case the
-    // action deletes the mock object (and thus deletes itself).
-    const Action<F> action = *static_cast<const Action<F>*>(untyped_action);
-    return action.Perform(std::move(args));
-  }
-
-  // Is it possible to store an object of the supplied type in a local variable
-  // for the sake of printing it, then return it on to the caller?
-  template <typename T>
-  using can_print_result = internal::conjunction<
-      // void can't be stored as an object (and we also don't need to print it).
-      internal::negation<std::is_void<T>>,
-      // Non-moveable types can't be returned on to the user, so there's no way
-      // for us to intercept and print them.
-      std::is_move_constructible<T>>;
-
-  // Perform the supplied action, printing the result to os.
-  template <typename T = R,
-            typename std::enable_if<can_print_result<T>::value, int>::type = 0>
-  R PerformActionAndPrintResult(const void* const untyped_action,
-                                ArgumentTuple&& args,
-                                const std::string& call_description,
-                                std::ostream& os) {
-    R result = PerformAction(untyped_action, std::move(args), call_description);
-
-    PrintAsActionResult(result, os);
-    return std::forward<R>(result);
-  }
-
-  // An overload for when it's not possible to print the result. In this case we
-  // simply perform the action.
-  template <typename T = R,
-            typename std::enable_if<
-                internal::negation<can_print_result<T>>::value, int>::type = 0>
-  R PerformActionAndPrintResult(const void* const untyped_action,
-                                ArgumentTuple&& args,
-                                const std::string& call_description,
-                                std::ostream&) {
-    return PerformAction(untyped_action, std::move(args), call_description);
-  }
-
-  // Returns the result of invoking this mock function with the given
-  // arguments. This function can be safely called from multiple
-  // threads concurrently.
-  R InvokeWith(ArgumentTuple&& args) GTEST_LOCK_EXCLUDED_(g_gmock_mutex);
-};  // class FunctionMocker
-
-// Calculates the result of invoking this mock function with the given
-// arguments, prints it, and returns it.
-template <typename R, typename... Args>
-R FunctionMocker<R(Args...)>::InvokeWith(ArgumentTuple&& args)
-    GTEST_LOCK_EXCLUDED_(g_gmock_mutex) {
-  // See the definition of untyped_expectations_ for why access to it
-  // is unprotected here.
-  if (untyped_expectations_.size() == 0) {
-    // No expectation is set on this mock method - we have an
-    // uninteresting call.
-
-    // We must get Google Mock's reaction on uninteresting calls
-    // made on this mock object BEFORE performing the action,
-    // because the action may DELETE the mock object and make the
-    // following expression meaningless.
-    const CallReaction reaction =
-        Mock::GetReactionOnUninterestingCalls(MockObject());
-
-    // True if and only if we need to print this call's arguments and return
-    // value.  This definition must be kept in sync with
-    // the behavior of ReportUninterestingCall().
-    const bool need_to_report_uninteresting_call =
-        // If the user allows this uninteresting call, we print it
-        // only when they want informational messages.
-        reaction == kAllow ? LogIsVisible(kInfo) :
-                           // If the user wants this to be a warning, we print
-                           // it only when they want to see warnings.
-            reaction == kWarn
-            ? LogIsVisible(kWarning)
-            :
-            // Otherwise, the user wants this to be an error, and we
-            // should always print detailed information in the error.
-            true;
-
-    if (!need_to_report_uninteresting_call) {
-      // Perform the action without printing the call information.
-      return this->PerformDefaultAction(
-          std::move(args), "Function call: " + std::string(Name()));
-    }
-
-    // Warns about the uninteresting call.
-    ::std::stringstream ss;
-    this->UntypedDescribeUninterestingCall(&args, &ss);
-
-    // Perform the action, print the result, and then report the uninteresting
-    // call.
-    //
-    // We use RAII to do the latter in case R is void or a non-moveable type. In
-    // either case we can't assign it to a local variable.
-    const Cleanup report_uninteresting_call(
-        [&] { ReportUninterestingCall(reaction, ss.str()); });
-
-    return PerformActionAndPrintResult(nullptr, std::move(args), ss.str(), ss);
-  }
-
-  bool is_excessive = false;
-  ::std::stringstream ss;
-  ::std::stringstream why;
-  ::std::stringstream loc;
-  const void* untyped_action = nullptr;
-
-  // The UntypedFindMatchingExpectation() function acquires and
-  // releases g_gmock_mutex.
-
-  const ExpectationBase* const untyped_expectation =
-      this->UntypedFindMatchingExpectation(&args, &untyped_action,
-                                           &is_excessive, &ss, &why);
-  const bool found = untyped_expectation != nullptr;
-
-  // True if and only if we need to print the call's arguments
-  // and return value.
-  // This definition must be kept in sync with the uses of Expect()
-  // and Log() in this function.
-  const bool need_to_report_call =
-      !found || is_excessive || LogIsVisible(kInfo);
-  if (!need_to_report_call) {
-    // Perform the action without printing the call information.
-    return PerformAction(untyped_action, std::move(args), "");
-  }
-
-  ss << "    Function call: " << Name();
-  this->UntypedPrintArgs(&args, &ss);
-
-  // In case the action deletes a piece of the expectation, we
-  // generate the message beforehand.
-  if (found && !is_excessive) {
-    untyped_expectation->DescribeLocationTo(&loc);
-  }
-
-  // Perform the action, print the result, and then fail or log in whatever way
-  // is appropriate.
-  //
-  // We use RAII to do the latter in case R is void or a non-moveable type. In
-  // either case we can't assign it to a local variable.
-  const Cleanup handle_failures([&] {
-    ss << "\n" << why.str();
-
-    if (!found) {
-      // No expectation matches this call - reports a failure.
-      Expect(false, nullptr, -1, ss.str());
-    } else if (is_excessive) {
-      // We had an upper-bound violation and the failure message is in ss.
-      Expect(false, untyped_expectation->file(), untyped_expectation->line(),
-             ss.str());
-    } else {
-      // We had an expected call and the matching expectation is
-      // described in ss.
-      Log(kInfo, loc.str() + ss.str(), 2);
-    }
-  });
-
-  return PerformActionAndPrintResult(untyped_action, std::move(args), ss.str(),
-                                     ss);
-}
-
-}  // namespace internal
-
-namespace internal {
-
-template <typename F>
-class MockFunction;
-
-template <typename R, typename... Args>
-class MockFunction<R(Args...)> {
- public:
-  MockFunction(const MockFunction&) = delete;
-  MockFunction& operator=(const MockFunction&) = delete;
-
-  std::function<R(Args...)> AsStdFunction() {
-    return [this](Args... args) -> R {
-      return this->Call(std::forward<Args>(args)...);
-    };
-  }
-
-  // Implementation detail: the expansion of the MOCK_METHOD macro.
-  R Call(Args... args) {
-    mock_.SetOwnerAndName(this, "Call");
-    return mock_.Invoke(std::forward<Args>(args)...);
-  }
-
-  MockSpec<R(Args...)> gmock_Call(Matcher<Args>... m) {
-    mock_.RegisterOwner(this);
-    return mock_.With(std::move(m)...);
-  }
-
-  MockSpec<R(Args...)> gmock_Call(const WithoutMatchers&, R (*)(Args...)) {
-    return this->gmock_Call(::testing::A<Args>()...);
-  }
-
- protected:
-  MockFunction() = default;
-  ~MockFunction() = default;
-
- private:
-  FunctionMocker<R(Args...)> mock_;
-};
-
-/*
-The SignatureOf<F> struct is a meta-function returning function signature
-corresponding to the provided F argument.
-
-It makes use of MockFunction easier by allowing it to accept more F arguments
-than just function signatures.
-
-Specializations provided here cover a signature type itself and any template
-that can be parameterized with a signature, including std::function and
-boost::function.
-*/
-
-template <typename F, typename = void>
-struct SignatureOf;
-
-template <typename R, typename... Args>
-struct SignatureOf<R(Args...)> {
-  using type = R(Args...);
-};
-
-template <template <typename> class C, typename F>
-struct SignatureOf<C<F>,
-                   typename std::enable_if<std::is_function<F>::value>::type>
-    : SignatureOf<F> {};
-
-template <typename F>
-using SignatureOfT = typename SignatureOf<F>::type;
-
-}  // namespace internal
-
-// A MockFunction<F> type has one mock method whose type is
-// internal::SignatureOfT<F>.  It is useful when you just want your
-// test code to emit some messages and have Google Mock verify the
-// right messages are sent (and perhaps at the right times).  For
-// example, if you are exercising code:
-//
-//   Foo(1);
-//   Foo(2);
-//   Foo(3);
-//
-// and want to verify that Foo(1) and Foo(3) both invoke
-// mock.Bar("a"), but Foo(2) doesn't invoke anything, you can write:
-//
-// TEST(FooTest, InvokesBarCorrectly) {
-//   MyMock mock;
-//   MockFunction<void(string check_point_name)> check;
-//   {
-//     InSequence s;
-//
-//     EXPECT_CALL(mock, Bar("a"));
-//     EXPECT_CALL(check, Call("1"));
-//     EXPECT_CALL(check, Call("2"));
-//     EXPECT_CALL(mock, Bar("a"));
-//   }
-//   Foo(1);
-//   check.Call("1");
-//   Foo(2);
-//   check.Call("2");
-//   Foo(3);
-// }
-//
-// The expectation spec says that the first Bar("a") must happen
-// before check point "1", the second Bar("a") must happen after check
-// point "2", and nothing should happen between the two check
-// points. The explicit check points make it easy to tell which
-// Bar("a") is called by which call to Foo().
-//
-// MockFunction<F> can also be used to exercise code that accepts
-// std::function<internal::SignatureOfT<F>> callbacks. To do so, use
-// AsStdFunction() method to create std::function proxy forwarding to
-// original object's Call. Example:
-//
-// TEST(FooTest, RunsCallbackWithBarArgument) {
-//   MockFunction<int(string)> callback;
-//   EXPECT_CALL(callback, Call("bar")).WillOnce(Return(1));
-//   Foo(callback.AsStdFunction());
-// }
-//
-// The internal::SignatureOfT<F> indirection allows to use other types
-// than just function signature type. This is typically useful when
-// providing a mock for a predefined std::function type. Example:
-//
-// using FilterPredicate = std::function<bool(string)>;
-// void MyFilterAlgorithm(FilterPredicate predicate);
-//
-// TEST(FooTest, FilterPredicateAlwaysAccepts) {
-//   MockFunction<FilterPredicate> predicateMock;
-//   EXPECT_CALL(predicateMock, Call(_)).WillRepeatedly(Return(true));
-//   MyFilterAlgorithm(predicateMock.AsStdFunction());
-// }
-template <typename F>
-class MockFunction : public internal::MockFunction<internal::SignatureOfT<F>> {
-  using Base = internal::MockFunction<internal::SignatureOfT<F>>;
-
- public:
-  using Base::Base;
-};
-
-// The style guide prohibits "using" statements in a namespace scope
-// inside a header file.  However, the MockSpec class template is
-// meant to be defined in the ::testing namespace.  The following line
-// is just a trick for working around a bug in MSVC 8.0, which cannot
-// handle it if we define MockSpec in ::testing.
-using internal::MockSpec;
-
-// Const(x) is a convenient function for obtaining a const reference
-// to x.  This is useful for setting expectations on an overloaded
-// const mock method, e.g.
-//
-//   class MockFoo : public FooInterface {
-//    public:
-//     MOCK_METHOD0(Bar, int());
-//     MOCK_CONST_METHOD0(Bar, int&());
-//   };
-//
-//   MockFoo foo;
-//   // Expects a call to non-const MockFoo::Bar().
-//   EXPECT_CALL(foo, Bar());
-//   // Expects a call to const MockFoo::Bar().
-//   EXPECT_CALL(Const(foo), Bar());
-template <typename T>
-inline const T& Const(const T& x) {
-  return x;
-}
-
-// Constructs an Expectation object that references and co-owns exp.
-inline Expectation::Expectation(internal::ExpectationBase& exp)  // NOLINT
-    : expectation_base_(exp.GetHandle().expectation_base()) {}
-
-}  // namespace testing
-
-GTEST_DISABLE_MSC_WARNINGS_POP_()  //  4251
-
-// Implementation for ON_CALL and EXPECT_CALL macros. A separate macro is
-// required to avoid compile errors when the name of the method used in call is
-// a result of macro expansion. See CompilesWithMethodNameExpandedFromMacro
-// tests in internal/gmock-spec-builders_test.cc for more details.
-//
-// This macro supports statements both with and without parameter matchers. If
-// the parameter list is omitted, gMock will accept any parameters, which allows
-// tests to be written that don't need to encode the number of method
-// parameter. This technique may only be used for non-overloaded methods.
-//
-//   // These are the same:
-//   ON_CALL(mock, NoArgsMethod()).WillByDefault(...);
-//   ON_CALL(mock, NoArgsMethod).WillByDefault(...);
-//
-//   // As are these:
-//   ON_CALL(mock, TwoArgsMethod(_, _)).WillByDefault(...);
-//   ON_CALL(mock, TwoArgsMethod).WillByDefault(...);
-//
-//   // Can also specify args if you want, of course:
-//   ON_CALL(mock, TwoArgsMethod(_, 45)).WillByDefault(...);
-//
-//   // Overloads work as long as you specify parameters:
-//   ON_CALL(mock, OverloadedMethod(_)).WillByDefault(...);
-//   ON_CALL(mock, OverloadedMethod(_, _)).WillByDefault(...);
-//
-//   // Oops! Which overload did you want?
-//   ON_CALL(mock, OverloadedMethod).WillByDefault(...);
-//     => ERROR: call to member function 'gmock_OverloadedMethod' is ambiguous
-//
-// How this works: The mock class uses two overloads of the gmock_Method
-// expectation setter method plus an operator() overload on the MockSpec object.
-// In the matcher list form, the macro expands to:
-//
-//   // This statement:
-//   ON_CALL(mock, TwoArgsMethod(_, 45))...
-//
-//   // ...expands to:
-//   mock.gmock_TwoArgsMethod(_, 45)(WithoutMatchers(), nullptr)...
-//   |-------------v---------------||------------v-------------|
-//       invokes first overload        swallowed by operator()
-//
-//   // ...which is essentially:
-//   mock.gmock_TwoArgsMethod(_, 45)...
-//
-// Whereas the form without a matcher list:
-//
-//   // This statement:
-//   ON_CALL(mock, TwoArgsMethod)...
-//
-//   // ...expands to:
-//   mock.gmock_TwoArgsMethod(WithoutMatchers(), nullptr)...
-//   |-----------------------v--------------------------|
-//                 invokes second overload
-//
-//   // ...which is essentially:
-//   mock.gmock_TwoArgsMethod(_, _)...
-//
-// The WithoutMatchers() argument is used to disambiguate overloads and to
-// block the caller from accidentally invoking the second overload directly. The
-// second argument is an internal type derived from the method signature. The
-// failure to disambiguate two overloads of this method in the ON_CALL statement
-// is how we block callers from setting expectations on overloaded methods.
-#define GMOCK_ON_CALL_IMPL_(mock_expr, Setter, call)                    \
-  ((mock_expr).gmock_##call)(::testing::internal::GetWithoutMatchers(), \
-                             nullptr)                                   \
-      .Setter(__FILE__, __LINE__, #mock_expr, #call)
-
-#define ON_CALL(obj, call) \
-  GMOCK_ON_CALL_IMPL_(obj, InternalDefaultActionSetAt, call)
-
-#define EXPECT_CALL(obj, call) \
-  GMOCK_ON_CALL_IMPL_(obj, InternalExpectedAt, call)
-
-#endif  // GOOGLEMOCK_INCLUDE_GMOCK_GMOCK_SPEC_BUILDERS_H_
diff --git a/3rdparty/googletest-1.13.0/googlemock/include/gmock/gmock.h b/3rdparty/googletest-1.13.0/googlemock/include/gmock/gmock.h
deleted file mode 100644
index 568c8c71d78578ffab629c3634fea890c5accfd6..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googlemock/include/gmock/gmock.h
+++ /dev/null
@@ -1,96 +0,0 @@
-// Copyright 2007, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// Google Mock - a framework for writing C++ mock classes.
-//
-// This is the main header file a user should include.
-
-#ifndef GOOGLEMOCK_INCLUDE_GMOCK_GMOCK_H_
-#define GOOGLEMOCK_INCLUDE_GMOCK_GMOCK_H_
-
-// This file implements the following syntax:
-//
-//   ON_CALL(mock_object, Method(...))
-//     .With(...) ?
-//     .WillByDefault(...);
-//
-// where With() is optional and WillByDefault() must appear exactly
-// once.
-//
-//   EXPECT_CALL(mock_object, Method(...))
-//     .With(...) ?
-//     .Times(...) ?
-//     .InSequence(...) *
-//     .WillOnce(...) *
-//     .WillRepeatedly(...) ?
-//     .RetiresOnSaturation() ? ;
-//
-// where all clauses are optional and WillOnce() can be repeated.
-
-#include "gmock/gmock-actions.h"
-#include "gmock/gmock-cardinalities.h"
-#include "gmock/gmock-function-mocker.h"
-#include "gmock/gmock-matchers.h"
-#include "gmock/gmock-more-actions.h"
-#include "gmock/gmock-more-matchers.h"
-#include "gmock/gmock-nice-strict.h"
-#include "gmock/internal/gmock-internal-utils.h"
-#include "gmock/internal/gmock-port.h"
-
-// Declares Google Mock flags that we want a user to use programmatically.
-GMOCK_DECLARE_bool_(catch_leaked_mocks);
-GMOCK_DECLARE_string_(verbose);
-GMOCK_DECLARE_int32_(default_mock_behavior);
-
-namespace testing {
-
-// Initializes Google Mock.  This must be called before running the
-// tests.  In particular, it parses the command line for the flags
-// that Google Mock recognizes.  Whenever a Google Mock flag is seen,
-// it is removed from argv, and *argc is decremented.
-//
-// No value is returned.  Instead, the Google Mock flag variables are
-// updated.
-//
-// Since Google Test is needed for Google Mock to work, this function
-// also initializes Google Test and parses its flags, if that hasn't
-// been done.
-GTEST_API_ void InitGoogleMock(int* argc, char** argv);
-
-// This overloaded version can be used in Windows programs compiled in
-// UNICODE mode.
-GTEST_API_ void InitGoogleMock(int* argc, wchar_t** argv);
-
-// This overloaded version can be used on Arduino/embedded platforms where
-// there is no argc/argv.
-GTEST_API_ void InitGoogleMock();
-
-}  // namespace testing
-
-#endif  // GOOGLEMOCK_INCLUDE_GMOCK_GMOCK_H_
diff --git a/3rdparty/googletest-1.13.0/googlemock/include/gmock/internal/custom/README.md b/3rdparty/googletest-1.13.0/googlemock/include/gmock/internal/custom/README.md
deleted file mode 100644
index 9c4874fd0cb31e89682088eb1a91cc7a43857bee..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googlemock/include/gmock/internal/custom/README.md
+++ /dev/null
@@ -1,18 +0,0 @@
-# Customization Points
-
-The custom directory is an injection point for custom user configurations.
-
-## Header `gmock-port.h`
-
-The following macros can be defined:
-
-### Flag related macros:
-
-*   `GMOCK_DECLARE_bool_(name)`
-*   `GMOCK_DECLARE_int32_(name)`
-*   `GMOCK_DECLARE_string_(name)`
-*   `GMOCK_DEFINE_bool_(name, default_val, doc)`
-*   `GMOCK_DEFINE_int32_(name, default_val, doc)`
-*   `GMOCK_DEFINE_string_(name, default_val, doc)`
-*   `GMOCK_FLAG_GET(flag_name)`
-*   `GMOCK_FLAG_SET(flag_name, value)`
diff --git a/3rdparty/googletest-1.13.0/googlemock/include/gmock/internal/custom/gmock-generated-actions.h b/3rdparty/googletest-1.13.0/googlemock/include/gmock/internal/custom/gmock-generated-actions.h
deleted file mode 100644
index bbcad31c76efa9a05a9acb4032f73f6fd656b42e..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googlemock/include/gmock/internal/custom/gmock-generated-actions.h
+++ /dev/null
@@ -1,7 +0,0 @@
-// IWYU pragma: private, include "gmock/gmock.h"
-// IWYU pragma: friend gmock/.*
-
-#ifndef GOOGLEMOCK_INCLUDE_GMOCK_INTERNAL_CUSTOM_GMOCK_GENERATED_ACTIONS_H_
-#define GOOGLEMOCK_INCLUDE_GMOCK_INTERNAL_CUSTOM_GMOCK_GENERATED_ACTIONS_H_
-
-#endif  // GOOGLEMOCK_INCLUDE_GMOCK_INTERNAL_CUSTOM_GMOCK_GENERATED_ACTIONS_H_
diff --git a/3rdparty/googletest-1.13.0/googlemock/include/gmock/internal/custom/gmock-matchers.h b/3rdparty/googletest-1.13.0/googlemock/include/gmock/internal/custom/gmock-matchers.h
deleted file mode 100644
index bb7dcbaa4cb20035c897d55ba5e58f8c5d4c9cb3..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googlemock/include/gmock/internal/custom/gmock-matchers.h
+++ /dev/null
@@ -1,37 +0,0 @@
-// Copyright 2015, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// Injection point for custom user configurations. See README for details
-
-// IWYU pragma: private, include "gmock/gmock.h"
-// IWYU pragma: friend gmock/.*
-
-#ifndef GOOGLEMOCK_INCLUDE_GMOCK_INTERNAL_CUSTOM_GMOCK_MATCHERS_H_
-#define GOOGLEMOCK_INCLUDE_GMOCK_INTERNAL_CUSTOM_GMOCK_MATCHERS_H_
-#endif  // GOOGLEMOCK_INCLUDE_GMOCK_INTERNAL_CUSTOM_GMOCK_MATCHERS_H_
diff --git a/3rdparty/googletest-1.13.0/googlemock/include/gmock/internal/custom/gmock-port.h b/3rdparty/googletest-1.13.0/googlemock/include/gmock/internal/custom/gmock-port.h
deleted file mode 100644
index f055f7506b82ec481b295881a2c3f7f1b0a685e4..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googlemock/include/gmock/internal/custom/gmock-port.h
+++ /dev/null
@@ -1,40 +0,0 @@
-// Copyright 2015, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// Injection point for custom user configurations. See README for details
-//
-// ** Custom implementation starts here **
-
-// IWYU pragma: private, include "gmock/gmock.h"
-// IWYU pragma: friend gmock/.*
-
-#ifndef GOOGLEMOCK_INCLUDE_GMOCK_INTERNAL_CUSTOM_GMOCK_PORT_H_
-#define GOOGLEMOCK_INCLUDE_GMOCK_INTERNAL_CUSTOM_GMOCK_PORT_H_
-
-#endif  // GOOGLEMOCK_INCLUDE_GMOCK_INTERNAL_CUSTOM_GMOCK_PORT_H_
diff --git a/3rdparty/googletest-1.13.0/googlemock/include/gmock/internal/gmock-internal-utils.h b/3rdparty/googletest-1.13.0/googlemock/include/gmock/internal/gmock-internal-utils.h
deleted file mode 100644
index 36ab8e26a8a237472cf1982bec5586182e58e7d8..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googlemock/include/gmock/internal/gmock-internal-utils.h
+++ /dev/null
@@ -1,490 +0,0 @@
-// Copyright 2007, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// Google Mock - a framework for writing C++ mock classes.
-//
-// This file defines some utilities useful for implementing Google
-// Mock.  They are subject to change without notice, so please DO NOT
-// USE THEM IN USER CODE.
-
-// IWYU pragma: private, include "gmock/gmock.h"
-// IWYU pragma: friend gmock/.*
-
-#ifndef GOOGLEMOCK_INCLUDE_GMOCK_INTERNAL_GMOCK_INTERNAL_UTILS_H_
-#define GOOGLEMOCK_INCLUDE_GMOCK_INTERNAL_GMOCK_INTERNAL_UTILS_H_
-
-#include <stdio.h>
-
-#include <ostream>  // NOLINT
-#include <string>
-#include <type_traits>
-#include <vector>
-
-#include "gmock/internal/gmock-port.h"
-#include "gtest/gtest.h"
-
-namespace testing {
-
-template <typename>
-class Matcher;
-
-namespace internal {
-
-// Silence MSVC C4100 (unreferenced formal parameter) and
-// C4805('==': unsafe mix of type 'const int' and type 'const bool')
-#ifdef _MSC_VER
-#pragma warning(push)
-#pragma warning(disable : 4100)
-#pragma warning(disable : 4805)
-#endif
-
-// Joins a vector of strings as if they are fields of a tuple; returns
-// the joined string.
-GTEST_API_ std::string JoinAsKeyValueTuple(
-    const std::vector<const char*>& names, const Strings& values);
-
-// Converts an identifier name to a space-separated list of lower-case
-// words.  Each maximum substring of the form [A-Za-z][a-z]*|\d+ is
-// treated as one word.  For example, both "FooBar123" and
-// "foo_bar_123" are converted to "foo bar 123".
-GTEST_API_ std::string ConvertIdentifierNameToWords(const char* id_name);
-
-// GetRawPointer(p) returns the raw pointer underlying p when p is a
-// smart pointer, or returns p itself when p is already a raw pointer.
-// The following default implementation is for the smart pointer case.
-template <typename Pointer>
-inline const typename Pointer::element_type* GetRawPointer(const Pointer& p) {
-  return p.get();
-}
-// This overload version is for std::reference_wrapper, which does not work with
-// the overload above, as it does not have an `element_type`.
-template <typename Element>
-inline const Element* GetRawPointer(const std::reference_wrapper<Element>& r) {
-  return &r.get();
-}
-
-// This overloaded version is for the raw pointer case.
-template <typename Element>
-inline Element* GetRawPointer(Element* p) {
-  return p;
-}
-
-// Default definitions for all compilers.
-// NOTE: If you implement support for other compilers, make sure to avoid
-// unexpected overlaps.
-// (e.g., Clang also processes #pragma GCC, and clang-cl also handles _MSC_VER.)
-#define GMOCK_INTERNAL_WARNING_PUSH()
-#define GMOCK_INTERNAL_WARNING_CLANG(Level, Name)
-#define GMOCK_INTERNAL_WARNING_POP()
-
-#if defined(__clang__)
-#undef GMOCK_INTERNAL_WARNING_PUSH
-#define GMOCK_INTERNAL_WARNING_PUSH() _Pragma("clang diagnostic push")
-#undef GMOCK_INTERNAL_WARNING_CLANG
-#define GMOCK_INTERNAL_WARNING_CLANG(Level, Warning) \
-  _Pragma(GMOCK_PP_INTERNAL_STRINGIZE(clang diagnostic Level Warning))
-#undef GMOCK_INTERNAL_WARNING_POP
-#define GMOCK_INTERNAL_WARNING_POP() _Pragma("clang diagnostic pop")
-#endif
-
-// MSVC treats wchar_t as a native type usually, but treats it as the
-// same as unsigned short when the compiler option /Zc:wchar_t- is
-// specified.  It defines _NATIVE_WCHAR_T_DEFINED symbol when wchar_t
-// is a native type.
-#if defined(_MSC_VER) && !defined(_NATIVE_WCHAR_T_DEFINED)
-// wchar_t is a typedef.
-#else
-#define GMOCK_WCHAR_T_IS_NATIVE_ 1
-#endif
-
-// In what follows, we use the term "kind" to indicate whether a type
-// is bool, an integer type (excluding bool), a floating-point type,
-// or none of them.  This categorization is useful for determining
-// when a matcher argument type can be safely converted to another
-// type in the implementation of SafeMatcherCast.
-enum TypeKind { kBool, kInteger, kFloatingPoint, kOther };
-
-// KindOf<T>::value is the kind of type T.
-template <typename T>
-struct KindOf {
-  enum { value = kOther };  // The default kind.
-};
-
-// This macro declares that the kind of 'type' is 'kind'.
-#define GMOCK_DECLARE_KIND_(type, kind) \
-  template <>                           \
-  struct KindOf<type> {                 \
-    enum { value = kind };              \
-  }
-
-GMOCK_DECLARE_KIND_(bool, kBool);
-
-// All standard integer types.
-GMOCK_DECLARE_KIND_(char, kInteger);
-GMOCK_DECLARE_KIND_(signed char, kInteger);
-GMOCK_DECLARE_KIND_(unsigned char, kInteger);
-GMOCK_DECLARE_KIND_(short, kInteger);           // NOLINT
-GMOCK_DECLARE_KIND_(unsigned short, kInteger);  // NOLINT
-GMOCK_DECLARE_KIND_(int, kInteger);
-GMOCK_DECLARE_KIND_(unsigned int, kInteger);
-GMOCK_DECLARE_KIND_(long, kInteger);                // NOLINT
-GMOCK_DECLARE_KIND_(unsigned long, kInteger);       // NOLINT
-GMOCK_DECLARE_KIND_(long long, kInteger);           // NOLINT
-GMOCK_DECLARE_KIND_(unsigned long long, kInteger);  // NOLINT
-
-#if GMOCK_WCHAR_T_IS_NATIVE_
-GMOCK_DECLARE_KIND_(wchar_t, kInteger);
-#endif
-
-// All standard floating-point types.
-GMOCK_DECLARE_KIND_(float, kFloatingPoint);
-GMOCK_DECLARE_KIND_(double, kFloatingPoint);
-GMOCK_DECLARE_KIND_(long double, kFloatingPoint);
-
-#undef GMOCK_DECLARE_KIND_
-
-// Evaluates to the kind of 'type'.
-#define GMOCK_KIND_OF_(type)                   \
-  static_cast< ::testing::internal::TypeKind>( \
-      ::testing::internal::KindOf<type>::value)
-
-// LosslessArithmeticConvertibleImpl<kFromKind, From, kToKind, To>::value
-// is true if and only if arithmetic type From can be losslessly converted to
-// arithmetic type To.
-//
-// It's the user's responsibility to ensure that both From and To are
-// raw (i.e. has no CV modifier, is not a pointer, and is not a
-// reference) built-in arithmetic types, kFromKind is the kind of
-// From, and kToKind is the kind of To; the value is
-// implementation-defined when the above pre-condition is violated.
-template <TypeKind kFromKind, typename From, TypeKind kToKind, typename To>
-using LosslessArithmeticConvertibleImpl = std::integral_constant<
-    bool,
-    // clang-format off
-      // Converting from bool is always lossless
-      (kFromKind == kBool) ? true
-      // Converting between any other type kinds will be lossy if the type
-      // kinds are not the same.
-    : (kFromKind != kToKind) ? false
-    : (kFromKind == kInteger &&
-       // Converting between integers of different widths is allowed so long
-       // as the conversion does not go from signed to unsigned.
-      (((sizeof(From) < sizeof(To)) &&
-        !(std::is_signed<From>::value && !std::is_signed<To>::value)) ||
-       // Converting between integers of the same width only requires the
-       // two types to have the same signedness.
-       ((sizeof(From) == sizeof(To)) &&
-        (std::is_signed<From>::value == std::is_signed<To>::value)))
-       ) ? true
-      // Floating point conversions are lossless if and only if `To` is at least
-      // as wide as `From`.
-    : (kFromKind == kFloatingPoint && (sizeof(From) <= sizeof(To))) ? true
-    : false
-    // clang-format on
-    >;
-
-// LosslessArithmeticConvertible<From, To>::value is true if and only if
-// arithmetic type From can be losslessly converted to arithmetic type To.
-//
-// It's the user's responsibility to ensure that both From and To are
-// raw (i.e. has no CV modifier, is not a pointer, and is not a
-// reference) built-in arithmetic types; the value is
-// implementation-defined when the above pre-condition is violated.
-template <typename From, typename To>
-using LosslessArithmeticConvertible =
-    LosslessArithmeticConvertibleImpl<GMOCK_KIND_OF_(From), From,
-                                      GMOCK_KIND_OF_(To), To>;
-
-// This interface knows how to report a Google Mock failure (either
-// non-fatal or fatal).
-class FailureReporterInterface {
- public:
-  // The type of a failure (either non-fatal or fatal).
-  enum FailureType { kNonfatal, kFatal };
-
-  virtual ~FailureReporterInterface() {}
-
-  // Reports a failure that occurred at the given source file location.
-  virtual void ReportFailure(FailureType type, const char* file, int line,
-                             const std::string& message) = 0;
-};
-
-// Returns the failure reporter used by Google Mock.
-GTEST_API_ FailureReporterInterface* GetFailureReporter();
-
-// Asserts that condition is true; aborts the process with the given
-// message if condition is false.  We cannot use LOG(FATAL) or CHECK()
-// as Google Mock might be used to mock the log sink itself.  We
-// inline this function to prevent it from showing up in the stack
-// trace.
-inline void Assert(bool condition, const char* file, int line,
-                   const std::string& msg) {
-  if (!condition) {
-    GetFailureReporter()->ReportFailure(FailureReporterInterface::kFatal, file,
-                                        line, msg);
-  }
-}
-inline void Assert(bool condition, const char* file, int line) {
-  Assert(condition, file, line, "Assertion failed.");
-}
-
-// Verifies that condition is true; generates a non-fatal failure if
-// condition is false.
-inline void Expect(bool condition, const char* file, int line,
-                   const std::string& msg) {
-  if (!condition) {
-    GetFailureReporter()->ReportFailure(FailureReporterInterface::kNonfatal,
-                                        file, line, msg);
-  }
-}
-inline void Expect(bool condition, const char* file, int line) {
-  Expect(condition, file, line, "Expectation failed.");
-}
-
-// Severity level of a log.
-enum LogSeverity { kInfo = 0, kWarning = 1 };
-
-// Valid values for the --gmock_verbose flag.
-
-// All logs (informational and warnings) are printed.
-const char kInfoVerbosity[] = "info";
-// Only warnings are printed.
-const char kWarningVerbosity[] = "warning";
-// No logs are printed.
-const char kErrorVerbosity[] = "error";
-
-// Returns true if and only if a log with the given severity is visible
-// according to the --gmock_verbose flag.
-GTEST_API_ bool LogIsVisible(LogSeverity severity);
-
-// Prints the given message to stdout if and only if 'severity' >= the level
-// specified by the --gmock_verbose flag.  If stack_frames_to_skip >=
-// 0, also prints the stack trace excluding the top
-// stack_frames_to_skip frames.  In opt mode, any positive
-// stack_frames_to_skip is treated as 0, since we don't know which
-// function calls will be inlined by the compiler and need to be
-// conservative.
-GTEST_API_ void Log(LogSeverity severity, const std::string& message,
-                    int stack_frames_to_skip);
-
-// A marker class that is used to resolve parameterless expectations to the
-// correct overload. This must not be instantiable, to prevent client code from
-// accidentally resolving to the overload; for example:
-//
-//    ON_CALL(mock, Method({}, nullptr))...
-//
-class WithoutMatchers {
- private:
-  WithoutMatchers() {}
-  friend GTEST_API_ WithoutMatchers GetWithoutMatchers();
-};
-
-// Internal use only: access the singleton instance of WithoutMatchers.
-GTEST_API_ WithoutMatchers GetWithoutMatchers();
-
-// Invalid<T>() is usable as an expression of type T, but will terminate
-// the program with an assertion failure if actually run.  This is useful
-// when a value of type T is needed for compilation, but the statement
-// will not really be executed (or we don't care if the statement
-// crashes).
-template <typename T>
-inline T Invalid() {
-  Assert(false, "", -1, "Internal error: attempt to return invalid value");
-#if defined(__GNUC__) || defined(__clang__)
-  __builtin_unreachable();
-#elif defined(_MSC_VER)
-  __assume(0);
-#else
-  return Invalid<T>();
-#endif
-}
-
-// Given a raw type (i.e. having no top-level reference or const
-// modifier) RawContainer that's either an STL-style container or a
-// native array, class StlContainerView<RawContainer> has the
-// following members:
-//
-//   - type is a type that provides an STL-style container view to
-//     (i.e. implements the STL container concept for) RawContainer;
-//   - const_reference is a type that provides a reference to a const
-//     RawContainer;
-//   - ConstReference(raw_container) returns a const reference to an STL-style
-//     container view to raw_container, which is a RawContainer.
-//   - Copy(raw_container) returns an STL-style container view of a
-//     copy of raw_container, which is a RawContainer.
-//
-// This generic version is used when RawContainer itself is already an
-// STL-style container.
-template <class RawContainer>
-class StlContainerView {
- public:
-  typedef RawContainer type;
-  typedef const type& const_reference;
-
-  static const_reference ConstReference(const RawContainer& container) {
-    static_assert(!std::is_const<RawContainer>::value,
-                  "RawContainer type must not be const");
-    return container;
-  }
-  static type Copy(const RawContainer& container) { return container; }
-};
-
-// This specialization is used when RawContainer is a native array type.
-template <typename Element, size_t N>
-class StlContainerView<Element[N]> {
- public:
-  typedef typename std::remove_const<Element>::type RawElement;
-  typedef internal::NativeArray<RawElement> type;
-  // NativeArray<T> can represent a native array either by value or by
-  // reference (selected by a constructor argument), so 'const type'
-  // can be used to reference a const native array.  We cannot
-  // 'typedef const type& const_reference' here, as that would mean
-  // ConstReference() has to return a reference to a local variable.
-  typedef const type const_reference;
-
-  static const_reference ConstReference(const Element (&array)[N]) {
-    static_assert(std::is_same<Element, RawElement>::value,
-                  "Element type must not be const");
-    return type(array, N, RelationToSourceReference());
-  }
-  static type Copy(const Element (&array)[N]) {
-    return type(array, N, RelationToSourceCopy());
-  }
-};
-
-// This specialization is used when RawContainer is a native array
-// represented as a (pointer, size) tuple.
-template <typename ElementPointer, typename Size>
-class StlContainerView< ::std::tuple<ElementPointer, Size> > {
- public:
-  typedef typename std::remove_const<
-      typename std::pointer_traits<ElementPointer>::element_type>::type
-      RawElement;
-  typedef internal::NativeArray<RawElement> type;
-  typedef const type const_reference;
-
-  static const_reference ConstReference(
-      const ::std::tuple<ElementPointer, Size>& array) {
-    return type(std::get<0>(array), std::get<1>(array),
-                RelationToSourceReference());
-  }
-  static type Copy(const ::std::tuple<ElementPointer, Size>& array) {
-    return type(std::get<0>(array), std::get<1>(array), RelationToSourceCopy());
-  }
-};
-
-// The following specialization prevents the user from instantiating
-// StlContainer with a reference type.
-template <typename T>
-class StlContainerView<T&>;
-
-// A type transform to remove constness from the first part of a pair.
-// Pairs like that are used as the value_type of associative containers,
-// and this transform produces a similar but assignable pair.
-template <typename T>
-struct RemoveConstFromKey {
-  typedef T type;
-};
-
-// Partially specialized to remove constness from std::pair<const K, V>.
-template <typename K, typename V>
-struct RemoveConstFromKey<std::pair<const K, V> > {
-  typedef std::pair<K, V> type;
-};
-
-// Emit an assertion failure due to incorrect DoDefault() usage. Out-of-lined to
-// reduce code size.
-GTEST_API_ void IllegalDoDefault(const char* file, int line);
-
-template <typename F, typename Tuple, size_t... Idx>
-auto ApplyImpl(F&& f, Tuple&& args, IndexSequence<Idx...>)
-    -> decltype(std::forward<F>(f)(
-        std::get<Idx>(std::forward<Tuple>(args))...)) {
-  return std::forward<F>(f)(std::get<Idx>(std::forward<Tuple>(args))...);
-}
-
-// Apply the function to a tuple of arguments.
-template <typename F, typename Tuple>
-auto Apply(F&& f, Tuple&& args) -> decltype(ApplyImpl(
-    std::forward<F>(f), std::forward<Tuple>(args),
-    MakeIndexSequence<std::tuple_size<
-        typename std::remove_reference<Tuple>::type>::value>())) {
-  return ApplyImpl(std::forward<F>(f), std::forward<Tuple>(args),
-                   MakeIndexSequence<std::tuple_size<
-                       typename std::remove_reference<Tuple>::type>::value>());
-}
-
-// Template struct Function<F>, where F must be a function type, contains
-// the following typedefs:
-//
-//   Result:               the function's return type.
-//   Arg<N>:               the type of the N-th argument, where N starts with 0.
-//   ArgumentTuple:        the tuple type consisting of all parameters of F.
-//   ArgumentMatcherTuple: the tuple type consisting of Matchers for all
-//                         parameters of F.
-//   MakeResultVoid:       the function type obtained by substituting void
-//                         for the return type of F.
-//   MakeResultIgnoredValue:
-//                         the function type obtained by substituting Something
-//                         for the return type of F.
-template <typename T>
-struct Function;
-
-template <typename R, typename... Args>
-struct Function<R(Args...)> {
-  using Result = R;
-  static constexpr size_t ArgumentCount = sizeof...(Args);
-  template <size_t I>
-  using Arg = ElemFromList<I, Args...>;
-  using ArgumentTuple = std::tuple<Args...>;
-  using ArgumentMatcherTuple = std::tuple<Matcher<Args>...>;
-  using MakeResultVoid = void(Args...);
-  using MakeResultIgnoredValue = IgnoredValue(Args...);
-};
-
-template <typename R, typename... Args>
-constexpr size_t Function<R(Args...)>::ArgumentCount;
-
-// Workaround for MSVC error C2039: 'type': is not a member of 'std'
-// when std::tuple_element is used.
-// See: https://github.com/google/googletest/issues/3931
-// Can be replaced with std::tuple_element_t in C++14.
-template <size_t I, typename T>
-using TupleElement = typename std::tuple_element<I, T>::type;
-
-bool Base64Unescape(const std::string& encoded, std::string* decoded);
-
-#ifdef _MSC_VER
-#pragma warning(pop)
-#endif
-
-}  // namespace internal
-}  // namespace testing
-
-#endif  // GOOGLEMOCK_INCLUDE_GMOCK_INTERNAL_GMOCK_INTERNAL_UTILS_H_
diff --git a/3rdparty/googletest-1.13.0/googlemock/include/gmock/internal/gmock-port.h b/3rdparty/googletest-1.13.0/googlemock/include/gmock/internal/gmock-port.h
deleted file mode 100644
index bc18a25f348c32bcea02b033c9f10bdd10bdbb66..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googlemock/include/gmock/internal/gmock-port.h
+++ /dev/null
@@ -1,139 +0,0 @@
-// Copyright 2008, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// Low-level types and utilities for porting Google Mock to various
-// platforms.  All macros ending with _ and symbols defined in an
-// internal namespace are subject to change without notice.  Code
-// outside Google Mock MUST NOT USE THEM DIRECTLY.  Macros that don't
-// end with _ are part of Google Mock's public API and can be used by
-// code outside Google Mock.
-
-// IWYU pragma: private, include "gmock/gmock.h"
-// IWYU pragma: friend gmock/.*
-
-#ifndef GOOGLEMOCK_INCLUDE_GMOCK_INTERNAL_GMOCK_PORT_H_
-#define GOOGLEMOCK_INCLUDE_GMOCK_INTERNAL_GMOCK_PORT_H_
-
-#include <assert.h>
-#include <stdlib.h>
-#include <cstdint>
-#include <iostream>
-
-// Most of the utilities needed for porting Google Mock are also
-// required for Google Test and are defined in gtest-port.h.
-//
-// Note to maintainers: to reduce code duplication, prefer adding
-// portability utilities to Google Test's gtest-port.h instead of
-// here, as Google Mock depends on Google Test.  Only add a utility
-// here if it's truly specific to Google Mock.
-
-#include "gmock/internal/custom/gmock-port.h"
-#include "gtest/internal/gtest-port.h"
-
-#if GTEST_HAS_ABSL
-#include "absl/flags/declare.h"
-#include "absl/flags/flag.h"
-#endif
-
-// For MS Visual C++, check the compiler version. At least VS 2015 is
-// required to compile Google Mock.
-#if defined(_MSC_VER) && _MSC_VER < 1900
-#error "At least Visual C++ 2015 (14.0) is required to compile Google Mock."
-#endif
-
-// Macro for referencing flags.  This is public as we want the user to
-// use this syntax to reference Google Mock flags.
-#define GMOCK_FLAG_NAME_(name) gmock_##name
-#define GMOCK_FLAG(name) FLAGS_gmock_##name
-
-// Pick a command line flags implementation.
-#if GTEST_HAS_ABSL
-
-// Macros for defining flags.
-#define GMOCK_DEFINE_bool_(name, default_val, doc) \
-  ABSL_FLAG(bool, GMOCK_FLAG_NAME_(name), default_val, doc)
-#define GMOCK_DEFINE_int32_(name, default_val, doc) \
-  ABSL_FLAG(int32_t, GMOCK_FLAG_NAME_(name), default_val, doc)
-#define GMOCK_DEFINE_string_(name, default_val, doc) \
-  ABSL_FLAG(std::string, GMOCK_FLAG_NAME_(name), default_val, doc)
-
-// Macros for declaring flags.
-#define GMOCK_DECLARE_bool_(name) \
-  ABSL_DECLARE_FLAG(bool, GMOCK_FLAG_NAME_(name))
-#define GMOCK_DECLARE_int32_(name) \
-  ABSL_DECLARE_FLAG(int32_t, GMOCK_FLAG_NAME_(name))
-#define GMOCK_DECLARE_string_(name) \
-  ABSL_DECLARE_FLAG(std::string, GMOCK_FLAG_NAME_(name))
-
-#define GMOCK_FLAG_GET(name) ::absl::GetFlag(GMOCK_FLAG(name))
-#define GMOCK_FLAG_SET(name, value) \
-  (void)(::absl::SetFlag(&GMOCK_FLAG(name), value))
-
-#else  // GTEST_HAS_ABSL
-
-// Macros for defining flags.
-#define GMOCK_DEFINE_bool_(name, default_val, doc)  \
-  namespace testing {                               \
-  GTEST_API_ bool GMOCK_FLAG(name) = (default_val); \
-  }                                                 \
-  static_assert(true, "no-op to require trailing semicolon")
-#define GMOCK_DEFINE_int32_(name, default_val, doc)    \
-  namespace testing {                                  \
-  GTEST_API_ int32_t GMOCK_FLAG(name) = (default_val); \
-  }                                                    \
-  static_assert(true, "no-op to require trailing semicolon")
-#define GMOCK_DEFINE_string_(name, default_val, doc)         \
-  namespace testing {                                        \
-  GTEST_API_ ::std::string GMOCK_FLAG(name) = (default_val); \
-  }                                                          \
-  static_assert(true, "no-op to require trailing semicolon")
-
-// Macros for declaring flags.
-#define GMOCK_DECLARE_bool_(name)          \
-  namespace testing {                      \
-  GTEST_API_ extern bool GMOCK_FLAG(name); \
-  }                                        \
-  static_assert(true, "no-op to require trailing semicolon")
-#define GMOCK_DECLARE_int32_(name)            \
-  namespace testing {                         \
-  GTEST_API_ extern int32_t GMOCK_FLAG(name); \
-  }                                           \
-  static_assert(true, "no-op to require trailing semicolon")
-#define GMOCK_DECLARE_string_(name)                 \
-  namespace testing {                               \
-  GTEST_API_ extern ::std::string GMOCK_FLAG(name); \
-  }                                                 \
-  static_assert(true, "no-op to require trailing semicolon")
-
-#define GMOCK_FLAG_GET(name) ::testing::GMOCK_FLAG(name)
-#define GMOCK_FLAG_SET(name, value) (void)(::testing::GMOCK_FLAG(name) = value)
-
-#endif  // GTEST_HAS_ABSL
-
-#endif  // GOOGLEMOCK_INCLUDE_GMOCK_INTERNAL_GMOCK_PORT_H_
diff --git a/3rdparty/googletest-1.13.0/googlemock/include/gmock/internal/gmock-pp.h b/3rdparty/googletest-1.13.0/googlemock/include/gmock/internal/gmock-pp.h
deleted file mode 100644
index 94d61c09c87036dca0afc42e623ce3e15745d5cd..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googlemock/include/gmock/internal/gmock-pp.h
+++ /dev/null
@@ -1,279 +0,0 @@
-#ifndef GOOGLEMOCK_INCLUDE_GMOCK_INTERNAL_GMOCK_PP_H_
-#define GOOGLEMOCK_INCLUDE_GMOCK_INTERNAL_GMOCK_PP_H_
-
-// Expands and concatenates the arguments. Constructed macros reevaluate.
-#define GMOCK_PP_CAT(_1, _2) GMOCK_PP_INTERNAL_CAT(_1, _2)
-
-// Expands and stringifies the only argument.
-#define GMOCK_PP_STRINGIZE(...) GMOCK_PP_INTERNAL_STRINGIZE(__VA_ARGS__)
-
-// Returns empty. Given a variadic number of arguments.
-#define GMOCK_PP_EMPTY(...)
-
-// Returns a comma. Given a variadic number of arguments.
-#define GMOCK_PP_COMMA(...) ,
-
-// Returns the only argument.
-#define GMOCK_PP_IDENTITY(_1) _1
-
-// Evaluates to the number of arguments after expansion.
-//
-//   #define PAIR x, y
-//
-//   GMOCK_PP_NARG() => 1
-//   GMOCK_PP_NARG(x) => 1
-//   GMOCK_PP_NARG(x, y) => 2
-//   GMOCK_PP_NARG(PAIR) => 2
-//
-// Requires: the number of arguments after expansion is at most 15.
-#define GMOCK_PP_NARG(...) \
-  GMOCK_PP_INTERNAL_16TH(  \
-      (__VA_ARGS__, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0))
-
-// Returns 1 if the expansion of arguments has an unprotected comma. Otherwise
-// returns 0. Requires no more than 15 unprotected commas.
-#define GMOCK_PP_HAS_COMMA(...) \
-  GMOCK_PP_INTERNAL_16TH(       \
-      (__VA_ARGS__, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0))
-
-// Returns the first argument.
-#define GMOCK_PP_HEAD(...) GMOCK_PP_INTERNAL_HEAD((__VA_ARGS__, unusedArg))
-
-// Returns the tail. A variadic list of all arguments minus the first. Requires
-// at least one argument.
-#define GMOCK_PP_TAIL(...) GMOCK_PP_INTERNAL_TAIL((__VA_ARGS__))
-
-// Calls CAT(_Macro, NARG(__VA_ARGS__))(__VA_ARGS__)
-#define GMOCK_PP_VARIADIC_CALL(_Macro, ...) \
-  GMOCK_PP_IDENTITY(                        \
-      GMOCK_PP_CAT(_Macro, GMOCK_PP_NARG(__VA_ARGS__))(__VA_ARGS__))
-
-// If the arguments after expansion have no tokens, evaluates to `1`. Otherwise
-// evaluates to `0`.
-//
-// Requires: * the number of arguments after expansion is at most 15.
-//           * If the argument is a macro, it must be able to be called with one
-//             argument.
-//
-// Implementation details:
-//
-// There is one case when it generates a compile error: if the argument is macro
-// that cannot be called with one argument.
-//
-//   #define M(a, b)  // it doesn't matter what it expands to
-//
-//   // Expected: expands to `0`.
-//   // Actual: compile error.
-//   GMOCK_PP_IS_EMPTY(M)
-//
-// There are 4 cases tested:
-//
-// * __VA_ARGS__ possible expansion has no unparen'd commas. Expected 0.
-// * __VA_ARGS__ possible expansion is not enclosed in parenthesis. Expected 0.
-// * __VA_ARGS__ possible expansion is not a macro that ()-evaluates to a comma.
-//   Expected 0
-// * __VA_ARGS__ is empty, or has unparen'd commas, or is enclosed in
-//   parenthesis, or is a macro that ()-evaluates to comma. Expected 1.
-//
-// We trigger detection on '0001', i.e. on empty.
-#define GMOCK_PP_IS_EMPTY(...)                                               \
-  GMOCK_PP_INTERNAL_IS_EMPTY(GMOCK_PP_HAS_COMMA(__VA_ARGS__),                \
-                             GMOCK_PP_HAS_COMMA(GMOCK_PP_COMMA __VA_ARGS__), \
-                             GMOCK_PP_HAS_COMMA(__VA_ARGS__()),              \
-                             GMOCK_PP_HAS_COMMA(GMOCK_PP_COMMA __VA_ARGS__()))
-
-// Evaluates to _Then if _Cond is 1 and _Else if _Cond is 0.
-#define GMOCK_PP_IF(_Cond, _Then, _Else) \
-  GMOCK_PP_CAT(GMOCK_PP_INTERNAL_IF_, _Cond)(_Then, _Else)
-
-// Similar to GMOCK_PP_IF but takes _Then and _Else in parentheses.
-//
-// GMOCK_PP_GENERIC_IF(1, (a, b, c), (d, e, f)) => a, b, c
-// GMOCK_PP_GENERIC_IF(0, (a, b, c), (d, e, f)) => d, e, f
-//
-#define GMOCK_PP_GENERIC_IF(_Cond, _Then, _Else) \
-  GMOCK_PP_REMOVE_PARENS(GMOCK_PP_IF(_Cond, _Then, _Else))
-
-// Evaluates to the number of arguments after expansion. Identifies 'empty' as
-// 0.
-//
-//   #define PAIR x, y
-//
-//   GMOCK_PP_NARG0() => 0
-//   GMOCK_PP_NARG0(x) => 1
-//   GMOCK_PP_NARG0(x, y) => 2
-//   GMOCK_PP_NARG0(PAIR) => 2
-//
-// Requires: * the number of arguments after expansion is at most 15.
-//           * If the argument is a macro, it must be able to be called with one
-//             argument.
-#define GMOCK_PP_NARG0(...) \
-  GMOCK_PP_IF(GMOCK_PP_IS_EMPTY(__VA_ARGS__), 0, GMOCK_PP_NARG(__VA_ARGS__))
-
-// Expands to 1 if the first argument starts with something in parentheses,
-// otherwise to 0.
-#define GMOCK_PP_IS_BEGIN_PARENS(...)                              \
-  GMOCK_PP_HEAD(GMOCK_PP_CAT(GMOCK_PP_INTERNAL_IBP_IS_VARIADIC_R_, \
-                             GMOCK_PP_INTERNAL_IBP_IS_VARIADIC_C __VA_ARGS__))
-
-// Expands to 1 is there is only one argument and it is enclosed in parentheses.
-#define GMOCK_PP_IS_ENCLOSED_PARENS(...)             \
-  GMOCK_PP_IF(GMOCK_PP_IS_BEGIN_PARENS(__VA_ARGS__), \
-              GMOCK_PP_IS_EMPTY(GMOCK_PP_EMPTY __VA_ARGS__), 0)
-
-// Remove the parens, requires GMOCK_PP_IS_ENCLOSED_PARENS(args) => 1.
-#define GMOCK_PP_REMOVE_PARENS(...) GMOCK_PP_INTERNAL_REMOVE_PARENS __VA_ARGS__
-
-// Expands to _Macro(0, _Data, e1) _Macro(1, _Data, e2) ... _Macro(K -1, _Data,
-// eK) as many of GMOCK_INTERNAL_NARG0 _Tuple.
-// Requires: * |_Macro| can be called with 3 arguments.
-//           * |_Tuple| expansion has no more than 15 elements.
-#define GMOCK_PP_FOR_EACH(_Macro, _Data, _Tuple)                        \
-  GMOCK_PP_CAT(GMOCK_PP_INTERNAL_FOR_EACH_IMPL_, GMOCK_PP_NARG0 _Tuple) \
-  (0, _Macro, _Data, _Tuple)
-
-// Expands to _Macro(0, _Data, ) _Macro(1, _Data, ) ... _Macro(K - 1, _Data, )
-// Empty if _K = 0.
-// Requires: * |_Macro| can be called with 3 arguments.
-//           * |_K| literal between 0 and 15
-#define GMOCK_PP_REPEAT(_Macro, _Data, _N)           \
-  GMOCK_PP_CAT(GMOCK_PP_INTERNAL_FOR_EACH_IMPL_, _N) \
-  (0, _Macro, _Data, GMOCK_PP_INTENRAL_EMPTY_TUPLE)
-
-// Increments the argument, requires the argument to be between 0 and 15.
-#define GMOCK_PP_INC(_i) GMOCK_PP_CAT(GMOCK_PP_INTERNAL_INC_, _i)
-
-// Returns comma if _i != 0. Requires _i to be between 0 and 15.
-#define GMOCK_PP_COMMA_IF(_i) GMOCK_PP_CAT(GMOCK_PP_INTERNAL_COMMA_IF_, _i)
-
-// Internal details follow. Do not use any of these symbols outside of this
-// file or we will break your code.
-#define GMOCK_PP_INTENRAL_EMPTY_TUPLE (, , , , , , , , , , , , , , , )
-#define GMOCK_PP_INTERNAL_CAT(_1, _2) _1##_2
-#define GMOCK_PP_INTERNAL_STRINGIZE(...) #__VA_ARGS__
-#define GMOCK_PP_INTERNAL_CAT_5(_1, _2, _3, _4, _5) _1##_2##_3##_4##_5
-#define GMOCK_PP_INTERNAL_IS_EMPTY(_1, _2, _3, _4)                             \
-  GMOCK_PP_HAS_COMMA(GMOCK_PP_INTERNAL_CAT_5(GMOCK_PP_INTERNAL_IS_EMPTY_CASE_, \
-                                             _1, _2, _3, _4))
-#define GMOCK_PP_INTERNAL_IS_EMPTY_CASE_0001 ,
-#define GMOCK_PP_INTERNAL_IF_1(_Then, _Else) _Then
-#define GMOCK_PP_INTERNAL_IF_0(_Then, _Else) _Else
-
-// Because of MSVC treating a token with a comma in it as a single token when
-// passed to another macro, we need to force it to evaluate it as multiple
-// tokens. We do that by using a "IDENTITY(MACRO PARENTHESIZED_ARGS)" macro. We
-// define one per possible macro that relies on this behavior. Note "_Args" must
-// be parenthesized.
-#define GMOCK_PP_INTERNAL_INTERNAL_16TH(_1, _2, _3, _4, _5, _6, _7, _8, _9, \
-                                        _10, _11, _12, _13, _14, _15, _16,  \
-                                        ...)                                \
-  _16
-#define GMOCK_PP_INTERNAL_16TH(_Args) \
-  GMOCK_PP_IDENTITY(GMOCK_PP_INTERNAL_INTERNAL_16TH _Args)
-#define GMOCK_PP_INTERNAL_INTERNAL_HEAD(_1, ...) _1
-#define GMOCK_PP_INTERNAL_HEAD(_Args) \
-  GMOCK_PP_IDENTITY(GMOCK_PP_INTERNAL_INTERNAL_HEAD _Args)
-#define GMOCK_PP_INTERNAL_INTERNAL_TAIL(_1, ...) __VA_ARGS__
-#define GMOCK_PP_INTERNAL_TAIL(_Args) \
-  GMOCK_PP_IDENTITY(GMOCK_PP_INTERNAL_INTERNAL_TAIL _Args)
-
-#define GMOCK_PP_INTERNAL_IBP_IS_VARIADIC_C(...) 1 _
-#define GMOCK_PP_INTERNAL_IBP_IS_VARIADIC_R_1 1,
-#define GMOCK_PP_INTERNAL_IBP_IS_VARIADIC_R_GMOCK_PP_INTERNAL_IBP_IS_VARIADIC_C \
-  0,
-#define GMOCK_PP_INTERNAL_REMOVE_PARENS(...) __VA_ARGS__
-#define GMOCK_PP_INTERNAL_INC_0 1
-#define GMOCK_PP_INTERNAL_INC_1 2
-#define GMOCK_PP_INTERNAL_INC_2 3
-#define GMOCK_PP_INTERNAL_INC_3 4
-#define GMOCK_PP_INTERNAL_INC_4 5
-#define GMOCK_PP_INTERNAL_INC_5 6
-#define GMOCK_PP_INTERNAL_INC_6 7
-#define GMOCK_PP_INTERNAL_INC_7 8
-#define GMOCK_PP_INTERNAL_INC_8 9
-#define GMOCK_PP_INTERNAL_INC_9 10
-#define GMOCK_PP_INTERNAL_INC_10 11
-#define GMOCK_PP_INTERNAL_INC_11 12
-#define GMOCK_PP_INTERNAL_INC_12 13
-#define GMOCK_PP_INTERNAL_INC_13 14
-#define GMOCK_PP_INTERNAL_INC_14 15
-#define GMOCK_PP_INTERNAL_INC_15 16
-#define GMOCK_PP_INTERNAL_COMMA_IF_0
-#define GMOCK_PP_INTERNAL_COMMA_IF_1 ,
-#define GMOCK_PP_INTERNAL_COMMA_IF_2 ,
-#define GMOCK_PP_INTERNAL_COMMA_IF_3 ,
-#define GMOCK_PP_INTERNAL_COMMA_IF_4 ,
-#define GMOCK_PP_INTERNAL_COMMA_IF_5 ,
-#define GMOCK_PP_INTERNAL_COMMA_IF_6 ,
-#define GMOCK_PP_INTERNAL_COMMA_IF_7 ,
-#define GMOCK_PP_INTERNAL_COMMA_IF_8 ,
-#define GMOCK_PP_INTERNAL_COMMA_IF_9 ,
-#define GMOCK_PP_INTERNAL_COMMA_IF_10 ,
-#define GMOCK_PP_INTERNAL_COMMA_IF_11 ,
-#define GMOCK_PP_INTERNAL_COMMA_IF_12 ,
-#define GMOCK_PP_INTERNAL_COMMA_IF_13 ,
-#define GMOCK_PP_INTERNAL_COMMA_IF_14 ,
-#define GMOCK_PP_INTERNAL_COMMA_IF_15 ,
-#define GMOCK_PP_INTERNAL_CALL_MACRO(_Macro, _i, _Data, _element) \
-  _Macro(_i, _Data, _element)
-#define GMOCK_PP_INTERNAL_FOR_EACH_IMPL_0(_i, _Macro, _Data, _Tuple)
-#define GMOCK_PP_INTERNAL_FOR_EACH_IMPL_1(_i, _Macro, _Data, _Tuple) \
-  GMOCK_PP_INTERNAL_CALL_MACRO(_Macro, _i, _Data, GMOCK_PP_HEAD _Tuple)
-#define GMOCK_PP_INTERNAL_FOR_EACH_IMPL_2(_i, _Macro, _Data, _Tuple)    \
-  GMOCK_PP_INTERNAL_CALL_MACRO(_Macro, _i, _Data, GMOCK_PP_HEAD _Tuple) \
-  GMOCK_PP_INTERNAL_FOR_EACH_IMPL_1(GMOCK_PP_INC(_i), _Macro, _Data,    \
-                                    (GMOCK_PP_TAIL _Tuple))
-#define GMOCK_PP_INTERNAL_FOR_EACH_IMPL_3(_i, _Macro, _Data, _Tuple)    \
-  GMOCK_PP_INTERNAL_CALL_MACRO(_Macro, _i, _Data, GMOCK_PP_HEAD _Tuple) \
-  GMOCK_PP_INTERNAL_FOR_EACH_IMPL_2(GMOCK_PP_INC(_i), _Macro, _Data,    \
-                                    (GMOCK_PP_TAIL _Tuple))
-#define GMOCK_PP_INTERNAL_FOR_EACH_IMPL_4(_i, _Macro, _Data, _Tuple)    \
-  GMOCK_PP_INTERNAL_CALL_MACRO(_Macro, _i, _Data, GMOCK_PP_HEAD _Tuple) \
-  GMOCK_PP_INTERNAL_FOR_EACH_IMPL_3(GMOCK_PP_INC(_i), _Macro, _Data,    \
-                                    (GMOCK_PP_TAIL _Tuple))
-#define GMOCK_PP_INTERNAL_FOR_EACH_IMPL_5(_i, _Macro, _Data, _Tuple)    \
-  GMOCK_PP_INTERNAL_CALL_MACRO(_Macro, _i, _Data, GMOCK_PP_HEAD _Tuple) \
-  GMOCK_PP_INTERNAL_FOR_EACH_IMPL_4(GMOCK_PP_INC(_i), _Macro, _Data,    \
-                                    (GMOCK_PP_TAIL _Tuple))
-#define GMOCK_PP_INTERNAL_FOR_EACH_IMPL_6(_i, _Macro, _Data, _Tuple)    \
-  GMOCK_PP_INTERNAL_CALL_MACRO(_Macro, _i, _Data, GMOCK_PP_HEAD _Tuple) \
-  GMOCK_PP_INTERNAL_FOR_EACH_IMPL_5(GMOCK_PP_INC(_i), _Macro, _Data,    \
-                                    (GMOCK_PP_TAIL _Tuple))
-#define GMOCK_PP_INTERNAL_FOR_EACH_IMPL_7(_i, _Macro, _Data, _Tuple)    \
-  GMOCK_PP_INTERNAL_CALL_MACRO(_Macro, _i, _Data, GMOCK_PP_HEAD _Tuple) \
-  GMOCK_PP_INTERNAL_FOR_EACH_IMPL_6(GMOCK_PP_INC(_i), _Macro, _Data,    \
-                                    (GMOCK_PP_TAIL _Tuple))
-#define GMOCK_PP_INTERNAL_FOR_EACH_IMPL_8(_i, _Macro, _Data, _Tuple)    \
-  GMOCK_PP_INTERNAL_CALL_MACRO(_Macro, _i, _Data, GMOCK_PP_HEAD _Tuple) \
-  GMOCK_PP_INTERNAL_FOR_EACH_IMPL_7(GMOCK_PP_INC(_i), _Macro, _Data,    \
-                                    (GMOCK_PP_TAIL _Tuple))
-#define GMOCK_PP_INTERNAL_FOR_EACH_IMPL_9(_i, _Macro, _Data, _Tuple)    \
-  GMOCK_PP_INTERNAL_CALL_MACRO(_Macro, _i, _Data, GMOCK_PP_HEAD _Tuple) \
-  GMOCK_PP_INTERNAL_FOR_EACH_IMPL_8(GMOCK_PP_INC(_i), _Macro, _Data,    \
-                                    (GMOCK_PP_TAIL _Tuple))
-#define GMOCK_PP_INTERNAL_FOR_EACH_IMPL_10(_i, _Macro, _Data, _Tuple)   \
-  GMOCK_PP_INTERNAL_CALL_MACRO(_Macro, _i, _Data, GMOCK_PP_HEAD _Tuple) \
-  GMOCK_PP_INTERNAL_FOR_EACH_IMPL_9(GMOCK_PP_INC(_i), _Macro, _Data,    \
-                                    (GMOCK_PP_TAIL _Tuple))
-#define GMOCK_PP_INTERNAL_FOR_EACH_IMPL_11(_i, _Macro, _Data, _Tuple)   \
-  GMOCK_PP_INTERNAL_CALL_MACRO(_Macro, _i, _Data, GMOCK_PP_HEAD _Tuple) \
-  GMOCK_PP_INTERNAL_FOR_EACH_IMPL_10(GMOCK_PP_INC(_i), _Macro, _Data,   \
-                                     (GMOCK_PP_TAIL _Tuple))
-#define GMOCK_PP_INTERNAL_FOR_EACH_IMPL_12(_i, _Macro, _Data, _Tuple)   \
-  GMOCK_PP_INTERNAL_CALL_MACRO(_Macro, _i, _Data, GMOCK_PP_HEAD _Tuple) \
-  GMOCK_PP_INTERNAL_FOR_EACH_IMPL_11(GMOCK_PP_INC(_i), _Macro, _Data,   \
-                                     (GMOCK_PP_TAIL _Tuple))
-#define GMOCK_PP_INTERNAL_FOR_EACH_IMPL_13(_i, _Macro, _Data, _Tuple)   \
-  GMOCK_PP_INTERNAL_CALL_MACRO(_Macro, _i, _Data, GMOCK_PP_HEAD _Tuple) \
-  GMOCK_PP_INTERNAL_FOR_EACH_IMPL_12(GMOCK_PP_INC(_i), _Macro, _Data,   \
-                                     (GMOCK_PP_TAIL _Tuple))
-#define GMOCK_PP_INTERNAL_FOR_EACH_IMPL_14(_i, _Macro, _Data, _Tuple)   \
-  GMOCK_PP_INTERNAL_CALL_MACRO(_Macro, _i, _Data, GMOCK_PP_HEAD _Tuple) \
-  GMOCK_PP_INTERNAL_FOR_EACH_IMPL_13(GMOCK_PP_INC(_i), _Macro, _Data,   \
-                                     (GMOCK_PP_TAIL _Tuple))
-#define GMOCK_PP_INTERNAL_FOR_EACH_IMPL_15(_i, _Macro, _Data, _Tuple)   \
-  GMOCK_PP_INTERNAL_CALL_MACRO(_Macro, _i, _Data, GMOCK_PP_HEAD _Tuple) \
-  GMOCK_PP_INTERNAL_FOR_EACH_IMPL_14(GMOCK_PP_INC(_i), _Macro, _Data,   \
-                                     (GMOCK_PP_TAIL _Tuple))
-
-#endif  // GOOGLEMOCK_INCLUDE_GMOCK_INTERNAL_GMOCK_PP_H_
diff --git a/3rdparty/googletest-1.13.0/googlemock/src/gmock-all.cc b/3rdparty/googletest-1.13.0/googlemock/src/gmock-all.cc
deleted file mode 100644
index e43c9b7b4c15b3c6451bbe9bac97f08376e8db65..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googlemock/src/gmock-all.cc
+++ /dev/null
@@ -1,46 +0,0 @@
-// Copyright 2008, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-//
-// Google C++ Mocking Framework (Google Mock)
-//
-// This file #includes all Google Mock implementation .cc files.  The
-// purpose is to allow a user to build Google Mock by compiling this
-// file alone.
-
-// This line ensures that gmock.h can be compiled on its own, even
-// when it's fused.
-#include "gmock/gmock.h"
-
-// The following lines pull in the real gmock *.cc files.
-#include "src/gmock-cardinalities.cc"
-#include "src/gmock-internal-utils.cc"
-#include "src/gmock-matchers.cc"
-#include "src/gmock-spec-builders.cc"
-#include "src/gmock.cc"
diff --git a/3rdparty/googletest-1.13.0/googlemock/src/gmock-cardinalities.cc b/3rdparty/googletest-1.13.0/googlemock/src/gmock-cardinalities.cc
deleted file mode 100644
index 92cde3484abf6bb52d1527cf8b1d4fa45ea2f4d7..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googlemock/src/gmock-cardinalities.cc
+++ /dev/null
@@ -1,155 +0,0 @@
-// Copyright 2007, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// Google Mock - a framework for writing C++ mock classes.
-//
-// This file implements cardinalities.
-
-#include "gmock/gmock-cardinalities.h"
-
-#include <limits.h>
-
-#include <ostream>  // NOLINT
-#include <sstream>
-#include <string>
-
-#include "gmock/internal/gmock-internal-utils.h"
-#include "gtest/gtest.h"
-
-namespace testing {
-
-namespace {
-
-// Implements the Between(m, n) cardinality.
-class BetweenCardinalityImpl : public CardinalityInterface {
- public:
-  BetweenCardinalityImpl(int min, int max)
-      : min_(min >= 0 ? min : 0), max_(max >= min_ ? max : min_) {
-    std::stringstream ss;
-    if (min < 0) {
-      ss << "The invocation lower bound must be >= 0, "
-         << "but is actually " << min << ".";
-      internal::Expect(false, __FILE__, __LINE__, ss.str());
-    } else if (max < 0) {
-      ss << "The invocation upper bound must be >= 0, "
-         << "but is actually " << max << ".";
-      internal::Expect(false, __FILE__, __LINE__, ss.str());
-    } else if (min > max) {
-      ss << "The invocation upper bound (" << max
-         << ") must be >= the invocation lower bound (" << min << ").";
-      internal::Expect(false, __FILE__, __LINE__, ss.str());
-    }
-  }
-
-  // Conservative estimate on the lower/upper bound of the number of
-  // calls allowed.
-  int ConservativeLowerBound() const override { return min_; }
-  int ConservativeUpperBound() const override { return max_; }
-
-  bool IsSatisfiedByCallCount(int call_count) const override {
-    return min_ <= call_count && call_count <= max_;
-  }
-
-  bool IsSaturatedByCallCount(int call_count) const override {
-    return call_count >= max_;
-  }
-
-  void DescribeTo(::std::ostream* os) const override;
-
- private:
-  const int min_;
-  const int max_;
-
-  BetweenCardinalityImpl(const BetweenCardinalityImpl&) = delete;
-  BetweenCardinalityImpl& operator=(const BetweenCardinalityImpl&) = delete;
-};
-
-// Formats "n times" in a human-friendly way.
-inline std::string FormatTimes(int n) {
-  if (n == 1) {
-    return "once";
-  } else if (n == 2) {
-    return "twice";
-  } else {
-    std::stringstream ss;
-    ss << n << " times";
-    return ss.str();
-  }
-}
-
-// Describes the Between(m, n) cardinality in human-friendly text.
-void BetweenCardinalityImpl::DescribeTo(::std::ostream* os) const {
-  if (min_ == 0) {
-    if (max_ == 0) {
-      *os << "never called";
-    } else if (max_ == INT_MAX) {
-      *os << "called any number of times";
-    } else {
-      *os << "called at most " << FormatTimes(max_);
-    }
-  } else if (min_ == max_) {
-    *os << "called " << FormatTimes(min_);
-  } else if (max_ == INT_MAX) {
-    *os << "called at least " << FormatTimes(min_);
-  } else {
-    // 0 < min_ < max_ < INT_MAX
-    *os << "called between " << min_ << " and " << max_ << " times";
-  }
-}
-
-}  // Unnamed namespace
-
-// Describes the given call count to an ostream.
-void Cardinality::DescribeActualCallCountTo(int actual_call_count,
-                                            ::std::ostream* os) {
-  if (actual_call_count > 0) {
-    *os << "called " << FormatTimes(actual_call_count);
-  } else {
-    *os << "never called";
-  }
-}
-
-// Creates a cardinality that allows at least n calls.
-GTEST_API_ Cardinality AtLeast(int n) { return Between(n, INT_MAX); }
-
-// Creates a cardinality that allows at most n calls.
-GTEST_API_ Cardinality AtMost(int n) { return Between(0, n); }
-
-// Creates a cardinality that allows any number of calls.
-GTEST_API_ Cardinality AnyNumber() { return AtLeast(0); }
-
-// Creates a cardinality that allows between min and max calls.
-GTEST_API_ Cardinality Between(int min, int max) {
-  return Cardinality(new BetweenCardinalityImpl(min, max));
-}
-
-// Creates a cardinality that allows exactly n calls.
-GTEST_API_ Cardinality Exactly(int n) { return Between(n, n); }
-
-}  // namespace testing
diff --git a/3rdparty/googletest-1.13.0/googlemock/src/gmock-internal-utils.cc b/3rdparty/googletest-1.13.0/googlemock/src/gmock-internal-utils.cc
deleted file mode 100644
index 7bfff02a33ca78807ebace17005e34291ed5148f..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googlemock/src/gmock-internal-utils.cc
+++ /dev/null
@@ -1,251 +0,0 @@
-// Copyright 2007, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// Google Mock - a framework for writing C++ mock classes.
-//
-// This file defines some utilities useful for implementing Google
-// Mock.  They are subject to change without notice, so please DO NOT
-// USE THEM IN USER CODE.
-
-#include "gmock/internal/gmock-internal-utils.h"
-
-#include <ctype.h>
-
-#include <array>
-#include <cctype>
-#include <cstdint>
-#include <cstring>
-#include <ostream>  // NOLINT
-#include <string>
-#include <vector>
-
-#include "gmock/gmock.h"
-#include "gmock/internal/gmock-port.h"
-#include "gtest/gtest.h"
-
-namespace testing {
-namespace internal {
-
-// Joins a vector of strings as if they are fields of a tuple; returns
-// the joined string.
-GTEST_API_ std::string JoinAsKeyValueTuple(
-    const std::vector<const char*>& names, const Strings& values) {
-  GTEST_CHECK_(names.size() == values.size());
-  if (values.empty()) {
-    return "";
-  }
-  const auto build_one = [&](const size_t i) {
-    return std::string(names[i]) + ": " + values[i];
-  };
-  std::string result = "(" + build_one(0);
-  for (size_t i = 1; i < values.size(); i++) {
-    result += ", ";
-    result += build_one(i);
-  }
-  result += ")";
-  return result;
-}
-
-// Converts an identifier name to a space-separated list of lower-case
-// words.  Each maximum substring of the form [A-Za-z][a-z]*|\d+ is
-// treated as one word.  For example, both "FooBar123" and
-// "foo_bar_123" are converted to "foo bar 123".
-GTEST_API_ std::string ConvertIdentifierNameToWords(const char* id_name) {
-  std::string result;
-  char prev_char = '\0';
-  for (const char* p = id_name; *p != '\0'; prev_char = *(p++)) {
-    // We don't care about the current locale as the input is
-    // guaranteed to be a valid C++ identifier name.
-    const bool starts_new_word = IsUpper(*p) ||
-                                 (!IsAlpha(prev_char) && IsLower(*p)) ||
-                                 (!IsDigit(prev_char) && IsDigit(*p));
-
-    if (IsAlNum(*p)) {
-      if (starts_new_word && result != "") result += ' ';
-      result += ToLower(*p);
-    }
-  }
-  return result;
-}
-
-// This class reports Google Mock failures as Google Test failures.  A
-// user can define another class in a similar fashion if they intend to
-// use Google Mock with a testing framework other than Google Test.
-class GoogleTestFailureReporter : public FailureReporterInterface {
- public:
-  void ReportFailure(FailureType type, const char* file, int line,
-                     const std::string& message) override {
-    AssertHelper(type == kFatal ? TestPartResult::kFatalFailure
-                                : TestPartResult::kNonFatalFailure,
-                 file, line, message.c_str()) = Message();
-    if (type == kFatal) {
-      posix::Abort();
-    }
-  }
-};
-
-// Returns the global failure reporter.  Will create a
-// GoogleTestFailureReporter and return it the first time called.
-GTEST_API_ FailureReporterInterface* GetFailureReporter() {
-  // Points to the global failure reporter used by Google Mock.  gcc
-  // guarantees that the following use of failure_reporter is
-  // thread-safe.  We may need to add additional synchronization to
-  // protect failure_reporter if we port Google Mock to other
-  // compilers.
-  static FailureReporterInterface* const failure_reporter =
-      new GoogleTestFailureReporter();
-  return failure_reporter;
-}
-
-// Protects global resources (stdout in particular) used by Log().
-static GTEST_DEFINE_STATIC_MUTEX_(g_log_mutex);
-
-// Returns true if and only if a log with the given severity is visible
-// according to the --gmock_verbose flag.
-GTEST_API_ bool LogIsVisible(LogSeverity severity) {
-  if (GMOCK_FLAG_GET(verbose) == kInfoVerbosity) {
-    // Always show the log if --gmock_verbose=info.
-    return true;
-  } else if (GMOCK_FLAG_GET(verbose) == kErrorVerbosity) {
-    // Always hide it if --gmock_verbose=error.
-    return false;
-  } else {
-    // If --gmock_verbose is neither "info" nor "error", we treat it
-    // as "warning" (its default value).
-    return severity == kWarning;
-  }
-}
-
-// Prints the given message to stdout if and only if 'severity' >= the level
-// specified by the --gmock_verbose flag.  If stack_frames_to_skip >=
-// 0, also prints the stack trace excluding the top
-// stack_frames_to_skip frames.  In opt mode, any positive
-// stack_frames_to_skip is treated as 0, since we don't know which
-// function calls will be inlined by the compiler and need to be
-// conservative.
-GTEST_API_ void Log(LogSeverity severity, const std::string& message,
-                    int stack_frames_to_skip) {
-  if (!LogIsVisible(severity)) return;
-
-  // Ensures that logs from different threads don't interleave.
-  MutexLock l(&g_log_mutex);
-
-  if (severity == kWarning) {
-    // Prints a GMOCK WARNING marker to make the warnings easily searchable.
-    std::cout << "\nGMOCK WARNING:";
-  }
-  // Pre-pends a new-line to message if it doesn't start with one.
-  if (message.empty() || message[0] != '\n') {
-    std::cout << "\n";
-  }
-  std::cout << message;
-  if (stack_frames_to_skip >= 0) {
-#ifdef NDEBUG
-    // In opt mode, we have to be conservative and skip no stack frame.
-    const int actual_to_skip = 0;
-#else
-    // In dbg mode, we can do what the caller tell us to do (plus one
-    // for skipping this function's stack frame).
-    const int actual_to_skip = stack_frames_to_skip + 1;
-#endif  // NDEBUG
-
-    // Appends a new-line to message if it doesn't end with one.
-    if (!message.empty() && *message.rbegin() != '\n') {
-      std::cout << "\n";
-    }
-    std::cout << "Stack trace:\n"
-              << ::testing::internal::GetCurrentOsStackTraceExceptTop(
-                     actual_to_skip);
-  }
-  std::cout << ::std::flush;
-}
-
-GTEST_API_ WithoutMatchers GetWithoutMatchers() { return WithoutMatchers(); }
-
-GTEST_API_ void IllegalDoDefault(const char* file, int line) {
-  internal::Assert(
-      false, file, line,
-      "You are using DoDefault() inside a composite action like "
-      "DoAll() or WithArgs().  This is not supported for technical "
-      "reasons.  Please instead spell out the default action, or "
-      "assign the default action to an Action variable and use "
-      "the variable in various places.");
-}
-
-constexpr char UnBase64Impl(char c, const char* const base64, char carry) {
-  return *base64 == 0 ? static_cast<char>(65)
-         : *base64 == c
-             ? carry
-             : UnBase64Impl(c, base64 + 1, static_cast<char>(carry + 1));
-}
-
-template <size_t... I>
-constexpr std::array<char, 256> UnBase64Impl(IndexSequence<I...>,
-                                             const char* const base64) {
-  return {{UnBase64Impl(static_cast<char>(I), base64, 0)...}};
-}
-
-constexpr std::array<char, 256> UnBase64(const char* const base64) {
-  return UnBase64Impl(MakeIndexSequence<256>{}, base64);
-}
-
-static constexpr char kBase64[] =
-    "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
-static constexpr std::array<char, 256> kUnBase64 = UnBase64(kBase64);
-
-bool Base64Unescape(const std::string& encoded, std::string* decoded) {
-  decoded->clear();
-  size_t encoded_len = encoded.size();
-  decoded->reserve(3 * (encoded_len / 4) + (encoded_len % 4));
-  int bit_pos = 0;
-  char dst = 0;
-  for (int src : encoded) {
-    if (std::isspace(src) || src == '=') {
-      continue;
-    }
-    char src_bin = kUnBase64[static_cast<size_t>(src)];
-    if (src_bin >= 64) {
-      decoded->clear();
-      return false;
-    }
-    if (bit_pos == 0) {
-      dst |= static_cast<char>(src_bin << 2);
-      bit_pos = 6;
-    } else {
-      dst |= static_cast<char>(src_bin >> (bit_pos - 2));
-      decoded->push_back(dst);
-      dst = static_cast<char>(src_bin << (10 - bit_pos));
-      bit_pos = (bit_pos + 6) % 8;
-    }
-  }
-  return true;
-}
-
-}  // namespace internal
-}  // namespace testing
diff --git a/3rdparty/googletest-1.13.0/googlemock/src/gmock-matchers.cc b/3rdparty/googletest-1.13.0/googlemock/src/gmock-matchers.cc
deleted file mode 100644
index 5810b6aa391e9ec186b2420f1506c82f53d84232..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googlemock/src/gmock-matchers.cc
+++ /dev/null
@@ -1,479 +0,0 @@
-// Copyright 2007, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// Google Mock - a framework for writing C++ mock classes.
-//
-// This file implements Matcher<const string&>, Matcher<string>, and
-// utilities for defining matchers.
-
-#include "gmock/gmock-matchers.h"
-
-#include <string.h>
-
-#include <iostream>
-#include <sstream>
-#include <string>
-#include <vector>
-
-namespace testing {
-namespace internal {
-
-// Returns the description for a matcher defined using the MATCHER*()
-// macro where the user-supplied description string is "", if
-// 'negation' is false; otherwise returns the description of the
-// negation of the matcher.  'param_values' contains a list of strings
-// that are the print-out of the matcher's parameters.
-GTEST_API_ std::string FormatMatcherDescription(
-    bool negation, const char* matcher_name,
-    const std::vector<const char*>& param_names, const Strings& param_values) {
-  std::string result = ConvertIdentifierNameToWords(matcher_name);
-  if (param_values.size() >= 1) {
-    result += " " + JoinAsKeyValueTuple(param_names, param_values);
-  }
-  return negation ? "not (" + result + ")" : result;
-}
-
-// FindMaxBipartiteMatching and its helper class.
-//
-// Uses the well-known Ford-Fulkerson max flow method to find a maximum
-// bipartite matching. Flow is considered to be from left to right.
-// There is an implicit source node that is connected to all of the left
-// nodes, and an implicit sink node that is connected to all of the
-// right nodes. All edges have unit capacity.
-//
-// Neither the flow graph nor the residual flow graph are represented
-// explicitly. Instead, they are implied by the information in 'graph' and
-// a vector<int> called 'left_' whose elements are initialized to the
-// value kUnused. This represents the initial state of the algorithm,
-// where the flow graph is empty, and the residual flow graph has the
-// following edges:
-//   - An edge from source to each left_ node
-//   - An edge from each right_ node to sink
-//   - An edge from each left_ node to each right_ node, if the
-//     corresponding edge exists in 'graph'.
-//
-// When the TryAugment() method adds a flow, it sets left_[l] = r for some
-// nodes l and r. This induces the following changes:
-//   - The edges (source, l), (l, r), and (r, sink) are added to the
-//     flow graph.
-//   - The same three edges are removed from the residual flow graph.
-//   - The reverse edges (l, source), (r, l), and (sink, r) are added
-//     to the residual flow graph, which is a directional graph
-//     representing unused flow capacity.
-//
-// When the method augments a flow (moving left_[l] from some r1 to some
-// other r2), this can be thought of as "undoing" the above steps with
-// respect to r1 and "redoing" them with respect to r2.
-//
-// It bears repeating that the flow graph and residual flow graph are
-// never represented explicitly, but can be derived by looking at the
-// information in 'graph' and in left_.
-//
-// As an optimization, there is a second vector<int> called right_ which
-// does not provide any new information. Instead, it enables more
-// efficient queries about edges entering or leaving the right-side nodes
-// of the flow or residual flow graphs. The following invariants are
-// maintained:
-//
-// left[l] == kUnused or right[left[l]] == l
-// right[r] == kUnused or left[right[r]] == r
-//
-// . [ source ]                                        .
-// .   |||                                             .
-// .   |||                                             .
-// .   ||\--> left[0]=1  ---\    right[0]=-1 ----\     .
-// .   ||                   |                    |     .
-// .   |\---> left[1]=-1    \--> right[1]=0  ---\|     .
-// .   |                                        ||     .
-// .   \----> left[2]=2  ------> right[2]=2  --\||     .
-// .                                           |||     .
-// .         elements           matchers       vvv     .
-// .                                         [ sink ]  .
-//
-// See Also:
-//   [1] Cormen, et al (2001). "Section 26.2: The Ford-Fulkerson method".
-//       "Introduction to Algorithms (Second ed.)", pp. 651-664.
-//   [2] "Ford-Fulkerson algorithm", Wikipedia,
-//       'http://en.wikipedia.org/wiki/Ford%E2%80%93Fulkerson_algorithm'
-class MaxBipartiteMatchState {
- public:
-  explicit MaxBipartiteMatchState(const MatchMatrix& graph)
-      : graph_(&graph),
-        left_(graph_->LhsSize(), kUnused),
-        right_(graph_->RhsSize(), kUnused) {}
-
-  // Returns the edges of a maximal match, each in the form {left, right}.
-  ElementMatcherPairs Compute() {
-    // 'seen' is used for path finding { 0: unseen, 1: seen }.
-    ::std::vector<char> seen;
-    // Searches the residual flow graph for a path from each left node to
-    // the sink in the residual flow graph, and if one is found, add flow
-    // to the graph. It's okay to search through the left nodes once. The
-    // edge from the implicit source node to each previously-visited left
-    // node will have flow if that left node has any path to the sink
-    // whatsoever. Subsequent augmentations can only add flow to the
-    // network, and cannot take away that previous flow unit from the source.
-    // Since the source-to-left edge can only carry one flow unit (or,
-    // each element can be matched to only one matcher), there is no need
-    // to visit the left nodes more than once looking for augmented paths.
-    // The flow is known to be possible or impossible by looking at the
-    // node once.
-    for (size_t ilhs = 0; ilhs < graph_->LhsSize(); ++ilhs) {
-      // Reset the path-marking vector and try to find a path from
-      // source to sink starting at the left_[ilhs] node.
-      GTEST_CHECK_(left_[ilhs] == kUnused)
-          << "ilhs: " << ilhs << ", left_[ilhs]: " << left_[ilhs];
-      // 'seen' initialized to 'graph_->RhsSize()' copies of 0.
-      seen.assign(graph_->RhsSize(), 0);
-      TryAugment(ilhs, &seen);
-    }
-    ElementMatcherPairs result;
-    for (size_t ilhs = 0; ilhs < left_.size(); ++ilhs) {
-      size_t irhs = left_[ilhs];
-      if (irhs == kUnused) continue;
-      result.push_back(ElementMatcherPair(ilhs, irhs));
-    }
-    return result;
-  }
-
- private:
-  static const size_t kUnused = static_cast<size_t>(-1);
-
-  // Perform a depth-first search from left node ilhs to the sink.  If a
-  // path is found, flow is added to the network by linking the left and
-  // right vector elements corresponding each segment of the path.
-  // Returns true if a path to sink was found, which means that a unit of
-  // flow was added to the network. The 'seen' vector elements correspond
-  // to right nodes and are marked to eliminate cycles from the search.
-  //
-  // Left nodes will only be explored at most once because they
-  // are accessible from at most one right node in the residual flow
-  // graph.
-  //
-  // Note that left_[ilhs] is the only element of left_ that TryAugment will
-  // potentially transition from kUnused to another value. Any other
-  // left_ element holding kUnused before TryAugment will be holding it
-  // when TryAugment returns.
-  //
-  bool TryAugment(size_t ilhs, ::std::vector<char>* seen) {
-    for (size_t irhs = 0; irhs < graph_->RhsSize(); ++irhs) {
-      if ((*seen)[irhs]) continue;
-      if (!graph_->HasEdge(ilhs, irhs)) continue;
-      // There's an available edge from ilhs to irhs.
-      (*seen)[irhs] = 1;
-      // Next a search is performed to determine whether
-      // this edge is a dead end or leads to the sink.
-      //
-      // right_[irhs] == kUnused means that there is residual flow from
-      // right node irhs to the sink, so we can use that to finish this
-      // flow path and return success.
-      //
-      // Otherwise there is residual flow to some ilhs. We push flow
-      // along that path and call ourselves recursively to see if this
-      // ultimately leads to sink.
-      if (right_[irhs] == kUnused || TryAugment(right_[irhs], seen)) {
-        // Add flow from left_[ilhs] to right_[irhs].
-        left_[ilhs] = irhs;
-        right_[irhs] = ilhs;
-        return true;
-      }
-    }
-    return false;
-  }
-
-  const MatchMatrix* graph_;  // not owned
-  // Each element of the left_ vector represents a left hand side node
-  // (i.e. an element) and each element of right_ is a right hand side
-  // node (i.e. a matcher). The values in the left_ vector indicate
-  // outflow from that node to a node on the right_ side. The values
-  // in the right_ indicate inflow, and specify which left_ node is
-  // feeding that right_ node, if any. For example, left_[3] == 1 means
-  // there's a flow from element #3 to matcher #1. Such a flow would also
-  // be redundantly represented in the right_ vector as right_[1] == 3.
-  // Elements of left_ and right_ are either kUnused or mutually
-  // referent. Mutually referent means that left_[right_[i]] = i and
-  // right_[left_[i]] = i.
-  ::std::vector<size_t> left_;
-  ::std::vector<size_t> right_;
-};
-
-const size_t MaxBipartiteMatchState::kUnused;
-
-GTEST_API_ ElementMatcherPairs FindMaxBipartiteMatching(const MatchMatrix& g) {
-  return MaxBipartiteMatchState(g).Compute();
-}
-
-static void LogElementMatcherPairVec(const ElementMatcherPairs& pairs,
-                                     ::std::ostream* stream) {
-  typedef ElementMatcherPairs::const_iterator Iter;
-  ::std::ostream& os = *stream;
-  os << "{";
-  const char* sep = "";
-  for (Iter it = pairs.begin(); it != pairs.end(); ++it) {
-    os << sep << "\n  ("
-       << "element #" << it->first << ", "
-       << "matcher #" << it->second << ")";
-    sep = ",";
-  }
-  os << "\n}";
-}
-
-bool MatchMatrix::NextGraph() {
-  for (size_t ilhs = 0; ilhs < LhsSize(); ++ilhs) {
-    for (size_t irhs = 0; irhs < RhsSize(); ++irhs) {
-      char& b = matched_[SpaceIndex(ilhs, irhs)];
-      if (!b) {
-        b = 1;
-        return true;
-      }
-      b = 0;
-    }
-  }
-  return false;
-}
-
-void MatchMatrix::Randomize() {
-  for (size_t ilhs = 0; ilhs < LhsSize(); ++ilhs) {
-    for (size_t irhs = 0; irhs < RhsSize(); ++irhs) {
-      char& b = matched_[SpaceIndex(ilhs, irhs)];
-      b = static_cast<char>(rand() & 1);  // NOLINT
-    }
-  }
-}
-
-std::string MatchMatrix::DebugString() const {
-  ::std::stringstream ss;
-  const char* sep = "";
-  for (size_t i = 0; i < LhsSize(); ++i) {
-    ss << sep;
-    for (size_t j = 0; j < RhsSize(); ++j) {
-      ss << HasEdge(i, j);
-    }
-    sep = ";";
-  }
-  return ss.str();
-}
-
-void UnorderedElementsAreMatcherImplBase::DescribeToImpl(
-    ::std::ostream* os) const {
-  switch (match_flags()) {
-    case UnorderedMatcherRequire::ExactMatch:
-      if (matcher_describers_.empty()) {
-        *os << "is empty";
-        return;
-      }
-      if (matcher_describers_.size() == 1) {
-        *os << "has " << Elements(1) << " and that element ";
-        matcher_describers_[0]->DescribeTo(os);
-        return;
-      }
-      *os << "has " << Elements(matcher_describers_.size())
-          << " and there exists some permutation of elements such that:\n";
-      break;
-    case UnorderedMatcherRequire::Superset:
-      *os << "a surjection from elements to requirements exists such that:\n";
-      break;
-    case UnorderedMatcherRequire::Subset:
-      *os << "an injection from elements to requirements exists such that:\n";
-      break;
-  }
-
-  const char* sep = "";
-  for (size_t i = 0; i != matcher_describers_.size(); ++i) {
-    *os << sep;
-    if (match_flags() == UnorderedMatcherRequire::ExactMatch) {
-      *os << " - element #" << i << " ";
-    } else {
-      *os << " - an element ";
-    }
-    matcher_describers_[i]->DescribeTo(os);
-    if (match_flags() == UnorderedMatcherRequire::ExactMatch) {
-      sep = ", and\n";
-    } else {
-      sep = "\n";
-    }
-  }
-}
-
-void UnorderedElementsAreMatcherImplBase::DescribeNegationToImpl(
-    ::std::ostream* os) const {
-  switch (match_flags()) {
-    case UnorderedMatcherRequire::ExactMatch:
-      if (matcher_describers_.empty()) {
-        *os << "isn't empty";
-        return;
-      }
-      if (matcher_describers_.size() == 1) {
-        *os << "doesn't have " << Elements(1) << ", or has " << Elements(1)
-            << " that ";
-        matcher_describers_[0]->DescribeNegationTo(os);
-        return;
-      }
-      *os << "doesn't have " << Elements(matcher_describers_.size())
-          << ", or there exists no permutation of elements such that:\n";
-      break;
-    case UnorderedMatcherRequire::Superset:
-      *os << "no surjection from elements to requirements exists such that:\n";
-      break;
-    case UnorderedMatcherRequire::Subset:
-      *os << "no injection from elements to requirements exists such that:\n";
-      break;
-  }
-  const char* sep = "";
-  for (size_t i = 0; i != matcher_describers_.size(); ++i) {
-    *os << sep;
-    if (match_flags() == UnorderedMatcherRequire::ExactMatch) {
-      *os << " - element #" << i << " ";
-    } else {
-      *os << " - an element ";
-    }
-    matcher_describers_[i]->DescribeTo(os);
-    if (match_flags() == UnorderedMatcherRequire::ExactMatch) {
-      sep = ", and\n";
-    } else {
-      sep = "\n";
-    }
-  }
-}
-
-// Checks that all matchers match at least one element, and that all
-// elements match at least one matcher. This enables faster matching
-// and better error reporting.
-// Returns false, writing an explanation to 'listener', if and only
-// if the success criteria are not met.
-bool UnorderedElementsAreMatcherImplBase::VerifyMatchMatrix(
-    const ::std::vector<std::string>& element_printouts,
-    const MatchMatrix& matrix, MatchResultListener* listener) const {
-  if (matrix.LhsSize() == 0 && matrix.RhsSize() == 0) {
-    return true;
-  }
-
-  if (match_flags() == UnorderedMatcherRequire::ExactMatch) {
-    if (matrix.LhsSize() != matrix.RhsSize()) {
-      // The element count doesn't match.  If the container is empty,
-      // there's no need to explain anything as Google Mock already
-      // prints the empty container. Otherwise we just need to show
-      // how many elements there actually are.
-      if (matrix.LhsSize() != 0 && listener->IsInterested()) {
-        *listener << "which has " << Elements(matrix.LhsSize());
-      }
-      return false;
-    }
-  }
-
-  bool result = true;
-  ::std::vector<char> element_matched(matrix.LhsSize(), 0);
-  ::std::vector<char> matcher_matched(matrix.RhsSize(), 0);
-
-  for (size_t ilhs = 0; ilhs < matrix.LhsSize(); ilhs++) {
-    for (size_t irhs = 0; irhs < matrix.RhsSize(); irhs++) {
-      char matched = matrix.HasEdge(ilhs, irhs);
-      element_matched[ilhs] |= matched;
-      matcher_matched[irhs] |= matched;
-    }
-  }
-
-  if (match_flags() & UnorderedMatcherRequire::Superset) {
-    const char* sep =
-        "where the following matchers don't match any elements:\n";
-    for (size_t mi = 0; mi < matcher_matched.size(); ++mi) {
-      if (matcher_matched[mi]) continue;
-      result = false;
-      if (listener->IsInterested()) {
-        *listener << sep << "matcher #" << mi << ": ";
-        matcher_describers_[mi]->DescribeTo(listener->stream());
-        sep = ",\n";
-      }
-    }
-  }
-
-  if (match_flags() & UnorderedMatcherRequire::Subset) {
-    const char* sep =
-        "where the following elements don't match any matchers:\n";
-    const char* outer_sep = "";
-    if (!result) {
-      outer_sep = "\nand ";
-    }
-    for (size_t ei = 0; ei < element_matched.size(); ++ei) {
-      if (element_matched[ei]) continue;
-      result = false;
-      if (listener->IsInterested()) {
-        *listener << outer_sep << sep << "element #" << ei << ": "
-                  << element_printouts[ei];
-        sep = ",\n";
-        outer_sep = "";
-      }
-    }
-  }
-  return result;
-}
-
-bool UnorderedElementsAreMatcherImplBase::FindPairing(
-    const MatchMatrix& matrix, MatchResultListener* listener) const {
-  ElementMatcherPairs matches = FindMaxBipartiteMatching(matrix);
-
-  size_t max_flow = matches.size();
-  if ((match_flags() & UnorderedMatcherRequire::Superset) &&
-      max_flow < matrix.RhsSize()) {
-    if (listener->IsInterested()) {
-      *listener << "where no permutation of the elements can satisfy all "
-                   "matchers, and the closest match is "
-                << max_flow << " of " << matrix.RhsSize()
-                << " matchers with the pairings:\n";
-      LogElementMatcherPairVec(matches, listener->stream());
-    }
-    return false;
-  }
-  if ((match_flags() & UnorderedMatcherRequire::Subset) &&
-      max_flow < matrix.LhsSize()) {
-    if (listener->IsInterested()) {
-      *listener
-          << "where not all elements can be matched, and the closest match is "
-          << max_flow << " of " << matrix.RhsSize()
-          << " matchers with the pairings:\n";
-      LogElementMatcherPairVec(matches, listener->stream());
-    }
-    return false;
-  }
-
-  if (matches.size() > 1) {
-    if (listener->IsInterested()) {
-      const char* sep = "where:\n";
-      for (size_t mi = 0; mi < matches.size(); ++mi) {
-        *listener << sep << " - element #" << matches[mi].first
-                  << " is matched by matcher #" << matches[mi].second;
-        sep = ",\n";
-      }
-    }
-  }
-  return true;
-}
-
-}  // namespace internal
-}  // namespace testing
diff --git a/3rdparty/googletest-1.13.0/googlemock/src/gmock-spec-builders.cc b/3rdparty/googletest-1.13.0/googlemock/src/gmock-spec-builders.cc
deleted file mode 100644
index ea821f855a2a44cb1ee6f33dec7d3bd6cf980e1b..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googlemock/src/gmock-spec-builders.cc
+++ /dev/null
@@ -1,795 +0,0 @@
-// Copyright 2007, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// Google Mock - a framework for writing C++ mock classes.
-//
-// This file implements the spec builder syntax (ON_CALL and
-// EXPECT_CALL).
-
-#include "gmock/gmock-spec-builders.h"
-
-#include <stdlib.h>
-
-#include <iostream>  // NOLINT
-#include <map>
-#include <memory>
-#include <set>
-#include <string>
-#include <unordered_map>
-#include <vector>
-
-#include "gmock/gmock.h"
-#include "gtest/gtest.h"
-#include "gtest/internal/gtest-port.h"
-
-#if GTEST_OS_CYGWIN || GTEST_OS_LINUX || GTEST_OS_MAC
-#include <unistd.h>  // NOLINT
-#endif
-#if GTEST_OS_QURT
-#include <qurt_event.h>
-#endif
-
-// Silence C4800 (C4800: 'int *const ': forcing value
-// to bool 'true' or 'false') for MSVC 15
-#ifdef _MSC_VER
-#if _MSC_VER == 1900
-#pragma warning(push)
-#pragma warning(disable : 4800)
-#endif
-#endif
-
-namespace testing {
-namespace internal {
-
-// Protects the mock object registry (in class Mock), all function
-// mockers, and all expectations.
-GTEST_API_ GTEST_DEFINE_STATIC_MUTEX_(g_gmock_mutex);
-
-// Logs a message including file and line number information.
-GTEST_API_ void LogWithLocation(testing::internal::LogSeverity severity,
-                                const char* file, int line,
-                                const std::string& message) {
-  ::std::ostringstream s;
-  s << internal::FormatFileLocation(file, line) << " " << message
-    << ::std::endl;
-  Log(severity, s.str(), 0);
-}
-
-// Constructs an ExpectationBase object.
-ExpectationBase::ExpectationBase(const char* a_file, int a_line,
-                                 const std::string& a_source_text)
-    : file_(a_file),
-      line_(a_line),
-      source_text_(a_source_text),
-      cardinality_specified_(false),
-      cardinality_(Exactly(1)),
-      call_count_(0),
-      retired_(false),
-      extra_matcher_specified_(false),
-      repeated_action_specified_(false),
-      retires_on_saturation_(false),
-      last_clause_(kNone),
-      action_count_checked_(false) {}
-
-// Destructs an ExpectationBase object.
-ExpectationBase::~ExpectationBase() {}
-
-// Explicitly specifies the cardinality of this expectation.  Used by
-// the subclasses to implement the .Times() clause.
-void ExpectationBase::SpecifyCardinality(const Cardinality& a_cardinality) {
-  cardinality_specified_ = true;
-  cardinality_ = a_cardinality;
-}
-
-// Retires all pre-requisites of this expectation.
-void ExpectationBase::RetireAllPreRequisites()
-    GTEST_EXCLUSIVE_LOCK_REQUIRED_(g_gmock_mutex) {
-  if (is_retired()) {
-    // We can take this short-cut as we never retire an expectation
-    // until we have retired all its pre-requisites.
-    return;
-  }
-
-  ::std::vector<ExpectationBase*> expectations(1, this);
-  while (!expectations.empty()) {
-    ExpectationBase* exp = expectations.back();
-    expectations.pop_back();
-
-    for (ExpectationSet::const_iterator it =
-             exp->immediate_prerequisites_.begin();
-         it != exp->immediate_prerequisites_.end(); ++it) {
-      ExpectationBase* next = it->expectation_base().get();
-      if (!next->is_retired()) {
-        next->Retire();
-        expectations.push_back(next);
-      }
-    }
-  }
-}
-
-// Returns true if and only if all pre-requisites of this expectation
-// have been satisfied.
-bool ExpectationBase::AllPrerequisitesAreSatisfied() const
-    GTEST_EXCLUSIVE_LOCK_REQUIRED_(g_gmock_mutex) {
-  g_gmock_mutex.AssertHeld();
-  ::std::vector<const ExpectationBase*> expectations(1, this);
-  while (!expectations.empty()) {
-    const ExpectationBase* exp = expectations.back();
-    expectations.pop_back();
-
-    for (ExpectationSet::const_iterator it =
-             exp->immediate_prerequisites_.begin();
-         it != exp->immediate_prerequisites_.end(); ++it) {
-      const ExpectationBase* next = it->expectation_base().get();
-      if (!next->IsSatisfied()) return false;
-      expectations.push_back(next);
-    }
-  }
-  return true;
-}
-
-// Adds unsatisfied pre-requisites of this expectation to 'result'.
-void ExpectationBase::FindUnsatisfiedPrerequisites(ExpectationSet* result) const
-    GTEST_EXCLUSIVE_LOCK_REQUIRED_(g_gmock_mutex) {
-  g_gmock_mutex.AssertHeld();
-  ::std::vector<const ExpectationBase*> expectations(1, this);
-  while (!expectations.empty()) {
-    const ExpectationBase* exp = expectations.back();
-    expectations.pop_back();
-
-    for (ExpectationSet::const_iterator it =
-             exp->immediate_prerequisites_.begin();
-         it != exp->immediate_prerequisites_.end(); ++it) {
-      const ExpectationBase* next = it->expectation_base().get();
-
-      if (next->IsSatisfied()) {
-        // If *it is satisfied and has a call count of 0, some of its
-        // pre-requisites may not be satisfied yet.
-        if (next->call_count_ == 0) {
-          expectations.push_back(next);
-        }
-      } else {
-        // Now that we know next is unsatisfied, we are not so interested
-        // in whether its pre-requisites are satisfied.  Therefore we
-        // don't iterate into it here.
-        *result += *it;
-      }
-    }
-  }
-}
-
-// Describes how many times a function call matching this
-// expectation has occurred.
-void ExpectationBase::DescribeCallCountTo(::std::ostream* os) const
-    GTEST_EXCLUSIVE_LOCK_REQUIRED_(g_gmock_mutex) {
-  g_gmock_mutex.AssertHeld();
-
-  // Describes how many times the function is expected to be called.
-  *os << "         Expected: to be ";
-  cardinality().DescribeTo(os);
-  *os << "\n           Actual: ";
-  Cardinality::DescribeActualCallCountTo(call_count(), os);
-
-  // Describes the state of the expectation (e.g. is it satisfied?
-  // is it active?).
-  *os << " - "
-      << (IsOverSaturated() ? "over-saturated"
-          : IsSaturated()   ? "saturated"
-          : IsSatisfied()   ? "satisfied"
-                            : "unsatisfied")
-      << " and " << (is_retired() ? "retired" : "active");
-}
-
-// Checks the action count (i.e. the number of WillOnce() and
-// WillRepeatedly() clauses) against the cardinality if this hasn't
-// been done before.  Prints a warning if there are too many or too
-// few actions.
-void ExpectationBase::CheckActionCountIfNotDone() const
-    GTEST_LOCK_EXCLUDED_(mutex_) {
-  bool should_check = false;
-  {
-    MutexLock l(&mutex_);
-    if (!action_count_checked_) {
-      action_count_checked_ = true;
-      should_check = true;
-    }
-  }
-
-  if (should_check) {
-    if (!cardinality_specified_) {
-      // The cardinality was inferred - no need to check the action
-      // count against it.
-      return;
-    }
-
-    // The cardinality was explicitly specified.
-    const int action_count = static_cast<int>(untyped_actions_.size());
-    const int upper_bound = cardinality().ConservativeUpperBound();
-    const int lower_bound = cardinality().ConservativeLowerBound();
-    bool too_many;  // True if there are too many actions, or false
-    // if there are too few.
-    if (action_count > upper_bound ||
-        (action_count == upper_bound && repeated_action_specified_)) {
-      too_many = true;
-    } else if (0 < action_count && action_count < lower_bound &&
-               !repeated_action_specified_) {
-      too_many = false;
-    } else {
-      return;
-    }
-
-    ::std::stringstream ss;
-    DescribeLocationTo(&ss);
-    ss << "Too " << (too_many ? "many" : "few") << " actions specified in "
-       << source_text() << "...\n"
-       << "Expected to be ";
-    cardinality().DescribeTo(&ss);
-    ss << ", but has " << (too_many ? "" : "only ") << action_count
-       << " WillOnce()" << (action_count == 1 ? "" : "s");
-    if (repeated_action_specified_) {
-      ss << " and a WillRepeatedly()";
-    }
-    ss << ".";
-    Log(kWarning, ss.str(), -1);  // -1 means "don't print stack trace".
-  }
-}
-
-// Implements the .Times() clause.
-void ExpectationBase::UntypedTimes(const Cardinality& a_cardinality) {
-  if (last_clause_ == kTimes) {
-    ExpectSpecProperty(false,
-                       ".Times() cannot appear "
-                       "more than once in an EXPECT_CALL().");
-  } else {
-    ExpectSpecProperty(
-        last_clause_ < kTimes,
-        ".Times() may only appear *before* .InSequence(), .WillOnce(), "
-        ".WillRepeatedly(), or .RetiresOnSaturation(), not after.");
-  }
-  last_clause_ = kTimes;
-
-  SpecifyCardinality(a_cardinality);
-}
-
-// Points to the implicit sequence introduced by a living InSequence
-// object (if any) in the current thread or NULL.
-GTEST_API_ ThreadLocal<Sequence*> g_gmock_implicit_sequence;
-
-// Reports an uninteresting call (whose description is in msg) in the
-// manner specified by 'reaction'.
-void ReportUninterestingCall(CallReaction reaction, const std::string& msg) {
-  // Include a stack trace only if --gmock_verbose=info is specified.
-  const int stack_frames_to_skip =
-      GMOCK_FLAG_GET(verbose) == kInfoVerbosity ? 3 : -1;
-  switch (reaction) {
-    case kAllow:
-      Log(kInfo, msg, stack_frames_to_skip);
-      break;
-    case kWarn:
-      Log(kWarning,
-          msg +
-              "\nNOTE: You can safely ignore the above warning unless this "
-              "call should not happen.  Do not suppress it by blindly adding "
-              "an EXPECT_CALL() if you don't mean to enforce the call.  "
-              "See "
-              "https://github.com/google/googletest/blob/main/docs/"
-              "gmock_cook_book.md#"
-              "knowing-when-to-expect for details.\n",
-          stack_frames_to_skip);
-      break;
-    default:  // FAIL
-      Expect(false, nullptr, -1, msg);
-  }
-}
-
-UntypedFunctionMockerBase::UntypedFunctionMockerBase()
-    : mock_obj_(nullptr), name_("") {}
-
-UntypedFunctionMockerBase::~UntypedFunctionMockerBase() {}
-
-// Sets the mock object this mock method belongs to, and registers
-// this information in the global mock registry.  Will be called
-// whenever an EXPECT_CALL() or ON_CALL() is executed on this mock
-// method.
-void UntypedFunctionMockerBase::RegisterOwner(const void* mock_obj)
-    GTEST_LOCK_EXCLUDED_(g_gmock_mutex) {
-  {
-    MutexLock l(&g_gmock_mutex);
-    mock_obj_ = mock_obj;
-  }
-  Mock::Register(mock_obj, this);
-}
-
-// Sets the mock object this mock method belongs to, and sets the name
-// of the mock function.  Will be called upon each invocation of this
-// mock function.
-void UntypedFunctionMockerBase::SetOwnerAndName(const void* mock_obj,
-                                                const char* name)
-    GTEST_LOCK_EXCLUDED_(g_gmock_mutex) {
-  // We protect name_ under g_gmock_mutex in case this mock function
-  // is called from two threads concurrently.
-  MutexLock l(&g_gmock_mutex);
-  mock_obj_ = mock_obj;
-  name_ = name;
-}
-
-// Returns the name of the function being mocked.  Must be called
-// after RegisterOwner() or SetOwnerAndName() has been called.
-const void* UntypedFunctionMockerBase::MockObject() const
-    GTEST_LOCK_EXCLUDED_(g_gmock_mutex) {
-  const void* mock_obj;
-  {
-    // We protect mock_obj_ under g_gmock_mutex in case this mock
-    // function is called from two threads concurrently.
-    MutexLock l(&g_gmock_mutex);
-    Assert(mock_obj_ != nullptr, __FILE__, __LINE__,
-           "MockObject() must not be called before RegisterOwner() or "
-           "SetOwnerAndName() has been called.");
-    mock_obj = mock_obj_;
-  }
-  return mock_obj;
-}
-
-// Returns the name of this mock method.  Must be called after
-// SetOwnerAndName() has been called.
-const char* UntypedFunctionMockerBase::Name() const
-    GTEST_LOCK_EXCLUDED_(g_gmock_mutex) {
-  const char* name;
-  {
-    // We protect name_ under g_gmock_mutex in case this mock
-    // function is called from two threads concurrently.
-    MutexLock l(&g_gmock_mutex);
-    Assert(name_ != nullptr, __FILE__, __LINE__,
-           "Name() must not be called before SetOwnerAndName() has "
-           "been called.");
-    name = name_;
-  }
-  return name;
-}
-
-// Returns an Expectation object that references and co-owns exp,
-// which must be an expectation on this mock function.
-Expectation UntypedFunctionMockerBase::GetHandleOf(ExpectationBase* exp) {
-  // See the definition of untyped_expectations_ for why access to it
-  // is unprotected here.
-  for (UntypedExpectations::const_iterator it = untyped_expectations_.begin();
-       it != untyped_expectations_.end(); ++it) {
-    if (it->get() == exp) {
-      return Expectation(*it);
-    }
-  }
-
-  Assert(false, __FILE__, __LINE__, "Cannot find expectation.");
-  return Expectation();
-  // The above statement is just to make the code compile, and will
-  // never be executed.
-}
-
-// Verifies that all expectations on this mock function have been
-// satisfied.  Reports one or more Google Test non-fatal failures
-// and returns false if not.
-bool UntypedFunctionMockerBase::VerifyAndClearExpectationsLocked()
-    GTEST_EXCLUSIVE_LOCK_REQUIRED_(g_gmock_mutex) {
-  g_gmock_mutex.AssertHeld();
-  bool expectations_met = true;
-  for (UntypedExpectations::const_iterator it = untyped_expectations_.begin();
-       it != untyped_expectations_.end(); ++it) {
-    ExpectationBase* const untyped_expectation = it->get();
-    if (untyped_expectation->IsOverSaturated()) {
-      // There was an upper-bound violation.  Since the error was
-      // already reported when it occurred, there is no need to do
-      // anything here.
-      expectations_met = false;
-    } else if (!untyped_expectation->IsSatisfied()) {
-      expectations_met = false;
-      ::std::stringstream ss;
-
-      const ::std::string& expectation_name =
-          untyped_expectation->GetDescription();
-      ss << "Actual function ";
-      if (!expectation_name.empty()) {
-        ss << "\"" << expectation_name << "\" ";
-      }
-      ss << "call count doesn't match " << untyped_expectation->source_text()
-         << "...\n";
-      // No need to show the source file location of the expectation
-      // in the description, as the Expect() call that follows already
-      // takes care of it.
-      untyped_expectation->MaybeDescribeExtraMatcherTo(&ss);
-      untyped_expectation->DescribeCallCountTo(&ss);
-      Expect(false, untyped_expectation->file(), untyped_expectation->line(),
-             ss.str());
-    }
-  }
-
-  // Deleting our expectations may trigger other mock objects to be deleted, for
-  // example if an action contains a reference counted smart pointer to that
-  // mock object, and that is the last reference. So if we delete our
-  // expectations within the context of the global mutex we may deadlock when
-  // this method is called again. Instead, make a copy of the set of
-  // expectations to delete, clear our set within the mutex, and then clear the
-  // copied set outside of it.
-  UntypedExpectations expectations_to_delete;
-  untyped_expectations_.swap(expectations_to_delete);
-
-  g_gmock_mutex.Unlock();
-  expectations_to_delete.clear();
-  g_gmock_mutex.Lock();
-
-  return expectations_met;
-}
-
-static CallReaction intToCallReaction(int mock_behavior) {
-  if (mock_behavior >= kAllow && mock_behavior <= kFail) {
-    return static_cast<internal::CallReaction>(mock_behavior);
-  }
-  return kWarn;
-}
-
-}  // namespace internal
-
-// Class Mock.
-
-namespace {
-
-typedef std::set<internal::UntypedFunctionMockerBase*> FunctionMockers;
-
-// The current state of a mock object.  Such information is needed for
-// detecting leaked mock objects and explicitly verifying a mock's
-// expectations.
-struct MockObjectState {
-  MockObjectState()
-      : first_used_file(nullptr), first_used_line(-1), leakable(false) {}
-
-  // Where in the source file an ON_CALL or EXPECT_CALL is first
-  // invoked on this mock object.
-  const char* first_used_file;
-  int first_used_line;
-  ::std::string first_used_test_suite;
-  ::std::string first_used_test;
-  bool leakable;  // true if and only if it's OK to leak the object.
-  FunctionMockers function_mockers;  // All registered methods of the object.
-};
-
-// A global registry holding the state of all mock objects that are
-// alive.  A mock object is added to this registry the first time
-// Mock::AllowLeak(), ON_CALL(), or EXPECT_CALL() is called on it.  It
-// is removed from the registry in the mock object's destructor.
-class MockObjectRegistry {
- public:
-  // Maps a mock object (identified by its address) to its state.
-  typedef std::map<const void*, MockObjectState> StateMap;
-
-  // This destructor will be called when a program exits, after all
-  // tests in it have been run.  By then, there should be no mock
-  // object alive.  Therefore we report any living object as test
-  // failure, unless the user explicitly asked us to ignore it.
-  ~MockObjectRegistry() {
-    if (!GMOCK_FLAG_GET(catch_leaked_mocks)) return;
-
-    int leaked_count = 0;
-    for (StateMap::const_iterator it = states_.begin(); it != states_.end();
-         ++it) {
-      if (it->second.leakable)  // The user said it's fine to leak this object.
-        continue;
-
-      // FIXME: Print the type of the leaked object.
-      // This can help the user identify the leaked object.
-      std::cout << "\n";
-      const MockObjectState& state = it->second;
-      std::cout << internal::FormatFileLocation(state.first_used_file,
-                                                state.first_used_line);
-      std::cout << " ERROR: this mock object";
-      if (state.first_used_test != "") {
-        std::cout << " (used in test " << state.first_used_test_suite << "."
-                  << state.first_used_test << ")";
-      }
-      std::cout << " should be deleted but never is. Its address is @"
-                << it->first << ".";
-      leaked_count++;
-    }
-    if (leaked_count > 0) {
-      std::cout << "\nERROR: " << leaked_count << " leaked mock "
-                << (leaked_count == 1 ? "object" : "objects")
-                << " found at program exit. Expectations on a mock object are "
-                   "verified when the object is destructed. Leaking a mock "
-                   "means that its expectations aren't verified, which is "
-                   "usually a test bug. If you really intend to leak a mock, "
-                   "you can suppress this error using "
-                   "testing::Mock::AllowLeak(mock_object), or you may use a "
-                   "fake or stub instead of a mock.\n";
-      std::cout.flush();
-      ::std::cerr.flush();
-      // RUN_ALL_TESTS() has already returned when this destructor is
-      // called.  Therefore we cannot use the normal Google Test
-      // failure reporting mechanism.
-#if GTEST_OS_QURT
-      qurt_exception_raise_fatal();
-#else
-      _exit(1);  // We cannot call exit() as it is not reentrant and
-                 // may already have been called.
-#endif
-    }
-  }
-
-  StateMap& states() { return states_; }
-
- private:
-  StateMap states_;
-};
-
-// Protected by g_gmock_mutex.
-MockObjectRegistry g_mock_object_registry;
-
-// Maps a mock object to the reaction Google Mock should have when an
-// uninteresting method is called.  Protected by g_gmock_mutex.
-std::unordered_map<uintptr_t, internal::CallReaction>&
-UninterestingCallReactionMap() {
-  static auto* map = new std::unordered_map<uintptr_t, internal::CallReaction>;
-  return *map;
-}
-
-// Sets the reaction Google Mock should have when an uninteresting
-// method of the given mock object is called.
-void SetReactionOnUninterestingCalls(uintptr_t mock_obj,
-                                     internal::CallReaction reaction)
-    GTEST_LOCK_EXCLUDED_(internal::g_gmock_mutex) {
-  internal::MutexLock l(&internal::g_gmock_mutex);
-  UninterestingCallReactionMap()[mock_obj] = reaction;
-}
-
-}  // namespace
-
-// Tells Google Mock to allow uninteresting calls on the given mock
-// object.
-void Mock::AllowUninterestingCalls(uintptr_t mock_obj)
-    GTEST_LOCK_EXCLUDED_(internal::g_gmock_mutex) {
-  SetReactionOnUninterestingCalls(mock_obj, internal::kAllow);
-}
-
-// Tells Google Mock to warn the user about uninteresting calls on the
-// given mock object.
-void Mock::WarnUninterestingCalls(uintptr_t mock_obj)
-    GTEST_LOCK_EXCLUDED_(internal::g_gmock_mutex) {
-  SetReactionOnUninterestingCalls(mock_obj, internal::kWarn);
-}
-
-// Tells Google Mock to fail uninteresting calls on the given mock
-// object.
-void Mock::FailUninterestingCalls(uintptr_t mock_obj)
-    GTEST_LOCK_EXCLUDED_(internal::g_gmock_mutex) {
-  SetReactionOnUninterestingCalls(mock_obj, internal::kFail);
-}
-
-// Tells Google Mock the given mock object is being destroyed and its
-// entry in the call-reaction table should be removed.
-void Mock::UnregisterCallReaction(uintptr_t mock_obj)
-    GTEST_LOCK_EXCLUDED_(internal::g_gmock_mutex) {
-  internal::MutexLock l(&internal::g_gmock_mutex);
-  UninterestingCallReactionMap().erase(static_cast<uintptr_t>(mock_obj));
-}
-
-// Returns the reaction Google Mock will have on uninteresting calls
-// made on the given mock object.
-internal::CallReaction Mock::GetReactionOnUninterestingCalls(
-    const void* mock_obj) GTEST_LOCK_EXCLUDED_(internal::g_gmock_mutex) {
-  internal::MutexLock l(&internal::g_gmock_mutex);
-  return (UninterestingCallReactionMap().count(
-              reinterpret_cast<uintptr_t>(mock_obj)) == 0)
-             ? internal::intToCallReaction(
-                   GMOCK_FLAG_GET(default_mock_behavior))
-             : UninterestingCallReactionMap()[reinterpret_cast<uintptr_t>(
-                   mock_obj)];
-}
-
-// Tells Google Mock to ignore mock_obj when checking for leaked mock
-// objects.
-void Mock::AllowLeak(const void* mock_obj)
-    GTEST_LOCK_EXCLUDED_(internal::g_gmock_mutex) {
-  internal::MutexLock l(&internal::g_gmock_mutex);
-  g_mock_object_registry.states()[mock_obj].leakable = true;
-}
-
-// Verifies and clears all expectations on the given mock object.  If
-// the expectations aren't satisfied, generates one or more Google
-// Test non-fatal failures and returns false.
-bool Mock::VerifyAndClearExpectations(void* mock_obj)
-    GTEST_LOCK_EXCLUDED_(internal::g_gmock_mutex) {
-  internal::MutexLock l(&internal::g_gmock_mutex);
-  return VerifyAndClearExpectationsLocked(mock_obj);
-}
-
-// Verifies all expectations on the given mock object and clears its
-// default actions and expectations.  Returns true if and only if the
-// verification was successful.
-bool Mock::VerifyAndClear(void* mock_obj)
-    GTEST_LOCK_EXCLUDED_(internal::g_gmock_mutex) {
-  internal::MutexLock l(&internal::g_gmock_mutex);
-  ClearDefaultActionsLocked(mock_obj);
-  return VerifyAndClearExpectationsLocked(mock_obj);
-}
-
-// Verifies and clears all expectations on the given mock object.  If
-// the expectations aren't satisfied, generates one or more Google
-// Test non-fatal failures and returns false.
-bool Mock::VerifyAndClearExpectationsLocked(void* mock_obj)
-    GTEST_EXCLUSIVE_LOCK_REQUIRED_(internal::g_gmock_mutex) {
-  internal::g_gmock_mutex.AssertHeld();
-  if (g_mock_object_registry.states().count(mock_obj) == 0) {
-    // No EXPECT_CALL() was set on the given mock object.
-    return true;
-  }
-
-  // Verifies and clears the expectations on each mock method in the
-  // given mock object.
-  bool expectations_met = true;
-  FunctionMockers& mockers =
-      g_mock_object_registry.states()[mock_obj].function_mockers;
-  for (FunctionMockers::const_iterator it = mockers.begin();
-       it != mockers.end(); ++it) {
-    if (!(*it)->VerifyAndClearExpectationsLocked()) {
-      expectations_met = false;
-    }
-  }
-
-  // We don't clear the content of mockers, as they may still be
-  // needed by ClearDefaultActionsLocked().
-  return expectations_met;
-}
-
-bool Mock::IsNaggy(void* mock_obj)
-    GTEST_LOCK_EXCLUDED_(internal::g_gmock_mutex) {
-  return Mock::GetReactionOnUninterestingCalls(mock_obj) == internal::kWarn;
-}
-bool Mock::IsNice(void* mock_obj)
-    GTEST_LOCK_EXCLUDED_(internal::g_gmock_mutex) {
-  return Mock::GetReactionOnUninterestingCalls(mock_obj) == internal::kAllow;
-}
-bool Mock::IsStrict(void* mock_obj)
-    GTEST_LOCK_EXCLUDED_(internal::g_gmock_mutex) {
-  return Mock::GetReactionOnUninterestingCalls(mock_obj) == internal::kFail;
-}
-
-// Registers a mock object and a mock method it owns.
-void Mock::Register(const void* mock_obj,
-                    internal::UntypedFunctionMockerBase* mocker)
-    GTEST_LOCK_EXCLUDED_(internal::g_gmock_mutex) {
-  internal::MutexLock l(&internal::g_gmock_mutex);
-  g_mock_object_registry.states()[mock_obj].function_mockers.insert(mocker);
-}
-
-// Tells Google Mock where in the source code mock_obj is used in an
-// ON_CALL or EXPECT_CALL.  In case mock_obj is leaked, this
-// information helps the user identify which object it is.
-void Mock::RegisterUseByOnCallOrExpectCall(const void* mock_obj,
-                                           const char* file, int line)
-    GTEST_LOCK_EXCLUDED_(internal::g_gmock_mutex) {
-  internal::MutexLock l(&internal::g_gmock_mutex);
-  MockObjectState& state = g_mock_object_registry.states()[mock_obj];
-  if (state.first_used_file == nullptr) {
-    state.first_used_file = file;
-    state.first_used_line = line;
-    const TestInfo* const test_info =
-        UnitTest::GetInstance()->current_test_info();
-    if (test_info != nullptr) {
-      state.first_used_test_suite = test_info->test_suite_name();
-      state.first_used_test = test_info->name();
-    }
-  }
-}
-
-// Unregisters a mock method; removes the owning mock object from the
-// registry when the last mock method associated with it has been
-// unregistered.  This is called only in the destructor of
-// FunctionMockerBase.
-void Mock::UnregisterLocked(internal::UntypedFunctionMockerBase* mocker)
-    GTEST_EXCLUSIVE_LOCK_REQUIRED_(internal::g_gmock_mutex) {
-  internal::g_gmock_mutex.AssertHeld();
-  for (MockObjectRegistry::StateMap::iterator it =
-           g_mock_object_registry.states().begin();
-       it != g_mock_object_registry.states().end(); ++it) {
-    FunctionMockers& mockers = it->second.function_mockers;
-    if (mockers.erase(mocker) > 0) {
-      // mocker was in mockers and has been just removed.
-      if (mockers.empty()) {
-        g_mock_object_registry.states().erase(it);
-      }
-      return;
-    }
-  }
-}
-
-// Clears all ON_CALL()s set on the given mock object.
-void Mock::ClearDefaultActionsLocked(void* mock_obj)
-    GTEST_EXCLUSIVE_LOCK_REQUIRED_(internal::g_gmock_mutex) {
-  internal::g_gmock_mutex.AssertHeld();
-
-  if (g_mock_object_registry.states().count(mock_obj) == 0) {
-    // No ON_CALL() was set on the given mock object.
-    return;
-  }
-
-  // Clears the default actions for each mock method in the given mock
-  // object.
-  FunctionMockers& mockers =
-      g_mock_object_registry.states()[mock_obj].function_mockers;
-  for (FunctionMockers::const_iterator it = mockers.begin();
-       it != mockers.end(); ++it) {
-    (*it)->ClearDefaultActionsLocked();
-  }
-
-  // We don't clear the content of mockers, as they may still be
-  // needed by VerifyAndClearExpectationsLocked().
-}
-
-Expectation::Expectation() {}
-
-Expectation::Expectation(
-    const std::shared_ptr<internal::ExpectationBase>& an_expectation_base)
-    : expectation_base_(an_expectation_base) {}
-
-Expectation::~Expectation() {}
-
-// Adds an expectation to a sequence.
-void Sequence::AddExpectation(const Expectation& expectation) const {
-  if (*last_expectation_ != expectation) {
-    if (last_expectation_->expectation_base() != nullptr) {
-      expectation.expectation_base()->immediate_prerequisites_ +=
-          *last_expectation_;
-    }
-    *last_expectation_ = expectation;
-  }
-}
-
-// Creates the implicit sequence if there isn't one.
-InSequence::InSequence() {
-  if (internal::g_gmock_implicit_sequence.get() == nullptr) {
-    internal::g_gmock_implicit_sequence.set(new Sequence);
-    sequence_created_ = true;
-  } else {
-    sequence_created_ = false;
-  }
-}
-
-// Deletes the implicit sequence if it was created by the constructor
-// of this object.
-InSequence::~InSequence() {
-  if (sequence_created_) {
-    delete internal::g_gmock_implicit_sequence.get();
-    internal::g_gmock_implicit_sequence.set(nullptr);
-  }
-}
-
-}  // namespace testing
-
-#ifdef _MSC_VER
-#if _MSC_VER == 1900
-#pragma warning(pop)
-#endif
-#endif
diff --git a/3rdparty/googletest-1.13.0/googlemock/src/gmock.cc b/3rdparty/googletest-1.13.0/googlemock/src/gmock.cc
deleted file mode 100644
index 5025656a02a30a7258d90613ecf3e4bffd46f887..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googlemock/src/gmock.cc
+++ /dev/null
@@ -1,223 +0,0 @@
-// Copyright 2008, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "gmock/gmock.h"
-
-#include "gmock/internal/gmock-port.h"
-
-GMOCK_DEFINE_bool_(catch_leaked_mocks, true,
-                   "true if and only if Google Mock should report leaked "
-                   "mock objects as failures.");
-
-GMOCK_DEFINE_string_(verbose, testing::internal::kWarningVerbosity,
-                     "Controls how verbose Google Mock's output is."
-                     "  Valid values:\n"
-                     "  info    - prints all messages.\n"
-                     "  warning - prints warnings and errors.\n"
-                     "  error   - prints errors only.");
-
-GMOCK_DEFINE_int32_(default_mock_behavior, 1,
-                    "Controls the default behavior of mocks."
-                    "  Valid values:\n"
-                    "  0 - by default, mocks act as NiceMocks.\n"
-                    "  1 - by default, mocks act as NaggyMocks.\n"
-                    "  2 - by default, mocks act as StrictMocks.");
-
-namespace testing {
-namespace internal {
-
-// Parses a string as a command line flag.  The string should have the
-// format "--gmock_flag=value".  When def_optional is true, the
-// "=value" part can be omitted.
-//
-// Returns the value of the flag, or NULL if the parsing failed.
-static const char* ParseGoogleMockFlagValue(const char* str,
-                                            const char* flag_name,
-                                            bool def_optional) {
-  // str and flag must not be NULL.
-  if (str == nullptr || flag_name == nullptr) return nullptr;
-
-  // The flag must start with "--gmock_".
-  const std::string flag_name_str = std::string("--gmock_") + flag_name;
-  const size_t flag_name_len = flag_name_str.length();
-  if (strncmp(str, flag_name_str.c_str(), flag_name_len) != 0) return nullptr;
-
-  // Skips the flag name.
-  const char* flag_end = str + flag_name_len;
-
-  // When def_optional is true, it's OK to not have a "=value" part.
-  if (def_optional && (flag_end[0] == '\0')) {
-    return flag_end;
-  }
-
-  // If def_optional is true and there are more characters after the
-  // flag name, or if def_optional is false, there must be a '=' after
-  // the flag name.
-  if (flag_end[0] != '=') return nullptr;
-
-  // Returns the string after "=".
-  return flag_end + 1;
-}
-
-// Parses a string for a Google Mock bool flag, in the form of
-// "--gmock_flag=value".
-//
-// On success, stores the value of the flag in *value, and returns
-// true.  On failure, returns false without changing *value.
-static bool ParseGoogleMockFlag(const char* str, const char* flag_name,
-                                bool* value) {
-  // Gets the value of the flag as a string.
-  const char* const value_str = ParseGoogleMockFlagValue(str, flag_name, true);
-
-  // Aborts if the parsing failed.
-  if (value_str == nullptr) return false;
-
-  // Converts the string value to a bool.
-  *value = !(*value_str == '0' || *value_str == 'f' || *value_str == 'F');
-  return true;
-}
-
-// Parses a string for a Google Mock string flag, in the form of
-// "--gmock_flag=value".
-//
-// On success, stores the value of the flag in *value, and returns
-// true.  On failure, returns false without changing *value.
-template <typename String>
-static bool ParseGoogleMockFlag(const char* str, const char* flag_name,
-                                String* value) {
-  // Gets the value of the flag as a string.
-  const char* const value_str = ParseGoogleMockFlagValue(str, flag_name, false);
-
-  // Aborts if the parsing failed.
-  if (value_str == nullptr) return false;
-
-  // Sets *value to the value of the flag.
-  *value = value_str;
-  return true;
-}
-
-static bool ParseGoogleMockFlag(const char* str, const char* flag_name,
-                                int32_t* value) {
-  // Gets the value of the flag as a string.
-  const char* const value_str = ParseGoogleMockFlagValue(str, flag_name, true);
-
-  // Aborts if the parsing failed.
-  if (value_str == nullptr) return false;
-
-  // Sets *value to the value of the flag.
-  return ParseInt32(Message() << "The value of flag --" << flag_name, value_str,
-                    value);
-}
-
-// The internal implementation of InitGoogleMock().
-//
-// The type parameter CharType can be instantiated to either char or
-// wchar_t.
-template <typename CharType>
-void InitGoogleMockImpl(int* argc, CharType** argv) {
-  // Makes sure Google Test is initialized.  InitGoogleTest() is
-  // idempotent, so it's fine if the user has already called it.
-  InitGoogleTest(argc, argv);
-  if (*argc <= 0) return;
-
-  for (int i = 1; i != *argc; i++) {
-    const std::string arg_string = StreamableToString(argv[i]);
-    const char* const arg = arg_string.c_str();
-
-    // Do we see a Google Mock flag?
-    bool found_gmock_flag = false;
-
-#define GMOCK_INTERNAL_PARSE_FLAG(flag_name)            \
-  if (!found_gmock_flag) {                              \
-    auto value = GMOCK_FLAG_GET(flag_name);             \
-    if (ParseGoogleMockFlag(arg, #flag_name, &value)) { \
-      GMOCK_FLAG_SET(flag_name, value);                 \
-      found_gmock_flag = true;                          \
-    }                                                   \
-  }
-
-    GMOCK_INTERNAL_PARSE_FLAG(catch_leaked_mocks)
-    GMOCK_INTERNAL_PARSE_FLAG(verbose)
-    GMOCK_INTERNAL_PARSE_FLAG(default_mock_behavior)
-
-    if (found_gmock_flag) {
-      // Yes.  Shift the remainder of the argv list left by one.  Note
-      // that argv has (*argc + 1) elements, the last one always being
-      // NULL.  The following loop moves the trailing NULL element as
-      // well.
-      for (int j = i; j != *argc; j++) {
-        argv[j] = argv[j + 1];
-      }
-
-      // Decrements the argument count.
-      (*argc)--;
-
-      // We also need to decrement the iterator as we just removed
-      // an element.
-      i--;
-    }
-  }
-}
-
-}  // namespace internal
-
-// Initializes Google Mock.  This must be called before running the
-// tests.  In particular, it parses a command line for the flags that
-// Google Mock recognizes.  Whenever a Google Mock flag is seen, it is
-// removed from argv, and *argc is decremented.
-//
-// No value is returned.  Instead, the Google Mock flag variables are
-// updated.
-//
-// Since Google Test is needed for Google Mock to work, this function
-// also initializes Google Test and parses its flags, if that hasn't
-// been done.
-GTEST_API_ void InitGoogleMock(int* argc, char** argv) {
-  internal::InitGoogleMockImpl(argc, argv);
-}
-
-// This overloaded version can be used in Windows programs compiled in
-// UNICODE mode.
-GTEST_API_ void InitGoogleMock(int* argc, wchar_t** argv) {
-  internal::InitGoogleMockImpl(argc, argv);
-}
-
-// This overloaded version can be used on Arduino/embedded platforms where
-// there is no argc/argv.
-GTEST_API_ void InitGoogleMock() {
-  // Since Arduino doesn't have a command line, fake out the argc/argv arguments
-  int argc = 1;
-  const auto arg0 = "dummy";
-  char* argv0 = const_cast<char*>(arg0);
-  char** argv = &argv0;
-
-  internal::InitGoogleMockImpl(&argc, argv);
-}
-
-}  // namespace testing
diff --git a/3rdparty/googletest-1.13.0/googlemock/src/gmock_main.cc b/3rdparty/googletest-1.13.0/googlemock/src/gmock_main.cc
deleted file mode 100644
index b411c5ecb97a1f6605d61298e83077b54e6af6ba..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googlemock/src/gmock_main.cc
+++ /dev/null
@@ -1,72 +0,0 @@
-// Copyright 2008, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include <iostream>
-
-#include "gmock/gmock.h"
-#include "gtest/gtest.h"
-
-#if GTEST_OS_ESP8266 || GTEST_OS_ESP32
-#if GTEST_OS_ESP8266
-extern "C" {
-#endif
-void setup() {
-  // Since Google Mock depends on Google Test, InitGoogleMock() is
-  // also responsible for initializing Google Test.  Therefore there's
-  // no need for calling testing::InitGoogleTest() separately.
-  testing::InitGoogleMock();
-}
-void loop() { RUN_ALL_TESTS(); }
-#if GTEST_OS_ESP8266
-}
-#endif
-
-#else
-
-// MS C++ compiler/linker has a bug on Windows (not on Windows CE), which
-// causes a link error when _tmain is defined in a static library and UNICODE
-// is enabled. For this reason instead of _tmain, main function is used on
-// Windows. See the following link to track the current status of this bug:
-// https://web.archive.org/web/20170912203238/connect.microsoft.com/VisualStudio/feedback/details/394464/wmain-link-error-in-the-static-library
-// // NOLINT
-#if GTEST_OS_WINDOWS_MOBILE
-#include <tchar.h>  // NOLINT
-
-GTEST_API_ int _tmain(int argc, TCHAR** argv) {
-#else
-GTEST_API_ int main(int argc, char** argv) {
-#endif  // GTEST_OS_WINDOWS_MOBILE
-  std::cout << "Running main() from gmock_main.cc\n";
-  // Since Google Mock depends on Google Test, InitGoogleMock() is
-  // also responsible for initializing Google Test.  Therefore there's
-  // no need for calling testing::InitGoogleTest() separately.
-  testing::InitGoogleMock(&argc, argv);
-  return RUN_ALL_TESTS();
-}
-#endif
diff --git a/3rdparty/googletest-1.13.0/googlemock/test/BUILD.bazel b/3rdparty/googletest-1.13.0/googlemock/test/BUILD.bazel
deleted file mode 100644
index d4297c80fe8d182bd5675a26701bd97497d82dd5..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googlemock/test/BUILD.bazel
+++ /dev/null
@@ -1,118 +0,0 @@
-# Copyright 2017 Google Inc.
-# All Rights Reserved.
-#
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met:
-#
-#     * Redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer.
-#     * Redistributions in binary form must reproduce the above
-# copyright notice, this list of conditions and the following disclaimer
-# in the documentation and/or other materials provided with the
-# distribution.
-#     * Neither the name of Google Inc. nor the names of its
-# contributors may be used to endorse or promote products derived from
-# this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#
-#   Bazel Build for Google C++ Testing Framework(Google Test)-googlemock
-
-load("@rules_python//python:defs.bzl", "py_library", "py_test")
-
-licenses(["notice"])
-
-# Tests for GMock itself
-cc_test(
-    name = "gmock_all_test",
-    size = "small",
-    srcs = glob(include = ["gmock-*.cc"]) + ["gmock-matchers_test.h"],
-    linkopts = select({
-        "//:qnx": [],
-        "//:windows": [],
-        "//conditions:default": ["-pthread"],
-    }),
-    deps = ["//:gtest"],
-)
-
-# Python tests
-py_library(
-    name = "gmock_test_utils",
-    testonly = 1,
-    srcs = ["gmock_test_utils.py"],
-    deps = [
-        "//googletest/test:gtest_test_utils",
-    ],
-)
-
-cc_binary(
-    name = "gmock_leak_test_",
-    testonly = 1,
-    srcs = ["gmock_leak_test_.cc"],
-    deps = ["//:gtest_main"],
-)
-
-py_test(
-    name = "gmock_leak_test",
-    size = "medium",
-    srcs = ["gmock_leak_test.py"],
-    data = [
-        ":gmock_leak_test_",
-        ":gmock_test_utils",
-    ],
-    tags = [
-        "no_test_msvc2015",
-        "no_test_msvc2017",
-    ],
-)
-
-cc_test(
-    name = "gmock_link_test",
-    size = "small",
-    srcs = [
-        "gmock_link2_test.cc",
-        "gmock_link_test.cc",
-        "gmock_link_test.h",
-    ],
-    deps = ["//:gtest_main"],
-)
-
-cc_binary(
-    name = "gmock_output_test_",
-    srcs = ["gmock_output_test_.cc"],
-    deps = ["//:gtest"],
-)
-
-py_test(
-    name = "gmock_output_test",
-    size = "medium",
-    srcs = ["gmock_output_test.py"],
-    data = [
-        ":gmock_output_test_",
-        ":gmock_output_test_golden.txt",
-    ],
-    tags = [
-        "no_test_msvc2015",
-        "no_test_msvc2017",
-    ],
-    deps = [":gmock_test_utils"],
-)
-
-cc_test(
-    name = "gmock_test",
-    size = "small",
-    srcs = ["gmock_test.cc"],
-    deps = ["//:gtest_main"],
-)
diff --git a/3rdparty/googletest-1.13.0/googlemock/test/gmock-actions_test.cc b/3rdparty/googletest-1.13.0/googlemock/test/gmock-actions_test.cc
deleted file mode 100644
index 510d15c8de7d548405afdefa55d8033df364702b..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googlemock/test/gmock-actions_test.cc
+++ /dev/null
@@ -1,2167 +0,0 @@
-// Copyright 2007, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// Google Mock - a framework for writing C++ mock classes.
-//
-// This file tests the built-in actions.
-
-// Silence C4100 (unreferenced formal parameter) and C4503 (decorated name
-// length exceeded) for MSVC.
-#ifdef _MSC_VER
-#pragma warning(push)
-#pragma warning(disable : 4100)
-#pragma warning(disable : 4503)
-#if _MSC_VER == 1900
-// and silence C4800 (C4800: 'int *const ': forcing value
-// to bool 'true' or 'false') for MSVC 15
-#pragma warning(disable : 4800)
-#endif
-#endif
-
-#include "gmock/gmock-actions.h"
-
-#include <algorithm>
-#include <functional>
-#include <iterator>
-#include <memory>
-#include <string>
-#include <type_traits>
-#include <vector>
-
-#include "gmock/gmock.h"
-#include "gmock/internal/gmock-port.h"
-#include "gtest/gtest-spi.h"
-#include "gtest/gtest.h"
-
-namespace testing {
-namespace {
-
-using ::testing::internal::BuiltInDefaultValue;
-
-TEST(TypeTraits, Negation) {
-  // Direct use with std types.
-  static_assert(std::is_base_of<std::false_type,
-                                internal::negation<std::true_type>>::value,
-                "");
-
-  static_assert(std::is_base_of<std::true_type,
-                                internal::negation<std::false_type>>::value,
-                "");
-
-  // With other types that fit the requirement of a value member that is
-  // convertible to bool.
-  static_assert(std::is_base_of<
-                    std::true_type,
-                    internal::negation<std::integral_constant<int, 0>>>::value,
-                "");
-
-  static_assert(std::is_base_of<
-                    std::false_type,
-                    internal::negation<std::integral_constant<int, 1>>>::value,
-                "");
-
-  static_assert(std::is_base_of<
-                    std::false_type,
-                    internal::negation<std::integral_constant<int, -1>>>::value,
-                "");
-}
-
-// Weird false/true types that aren't actually bool constants (but should still
-// be legal according to [meta.logical] because `bool(T::value)` is valid), are
-// distinct from std::false_type and std::true_type, and are distinct from other
-// instantiations of the same template.
-//
-// These let us check finicky details mandated by the standard like
-// "std::conjunction should evaluate to a type that inherits from the first
-// false-y input".
-template <int>
-struct MyFalse : std::integral_constant<int, 0> {};
-
-template <int>
-struct MyTrue : std::integral_constant<int, -1> {};
-
-TEST(TypeTraits, Conjunction) {
-  // Base case: always true.
-  static_assert(std::is_base_of<std::true_type, internal::conjunction<>>::value,
-                "");
-
-  // One predicate: inherits from that predicate, regardless of value.
-  static_assert(
-      std::is_base_of<MyFalse<0>, internal::conjunction<MyFalse<0>>>::value,
-      "");
-
-  static_assert(
-      std::is_base_of<MyTrue<0>, internal::conjunction<MyTrue<0>>>::value, "");
-
-  // Multiple predicates, with at least one false: inherits from that one.
-  static_assert(
-      std::is_base_of<MyFalse<1>, internal::conjunction<MyTrue<0>, MyFalse<1>,
-                                                        MyTrue<2>>>::value,
-      "");
-
-  static_assert(
-      std::is_base_of<MyFalse<1>, internal::conjunction<MyTrue<0>, MyFalse<1>,
-                                                        MyFalse<2>>>::value,
-      "");
-
-  // Short circuiting: in the case above, additional predicates need not even
-  // define a value member.
-  struct Empty {};
-  static_assert(
-      std::is_base_of<MyFalse<1>, internal::conjunction<MyTrue<0>, MyFalse<1>,
-                                                        Empty>>::value,
-      "");
-
-  // All predicates true: inherits from the last.
-  static_assert(
-      std::is_base_of<MyTrue<2>, internal::conjunction<MyTrue<0>, MyTrue<1>,
-                                                       MyTrue<2>>>::value,
-      "");
-}
-
-TEST(TypeTraits, Disjunction) {
-  // Base case: always false.
-  static_assert(
-      std::is_base_of<std::false_type, internal::disjunction<>>::value, "");
-
-  // One predicate: inherits from that predicate, regardless of value.
-  static_assert(
-      std::is_base_of<MyFalse<0>, internal::disjunction<MyFalse<0>>>::value,
-      "");
-
-  static_assert(
-      std::is_base_of<MyTrue<0>, internal::disjunction<MyTrue<0>>>::value, "");
-
-  // Multiple predicates, with at least one true: inherits from that one.
-  static_assert(
-      std::is_base_of<MyTrue<1>, internal::disjunction<MyFalse<0>, MyTrue<1>,
-                                                       MyFalse<2>>>::value,
-      "");
-
-  static_assert(
-      std::is_base_of<MyTrue<1>, internal::disjunction<MyFalse<0>, MyTrue<1>,
-                                                       MyTrue<2>>>::value,
-      "");
-
-  // Short circuiting: in the case above, additional predicates need not even
-  // define a value member.
-  struct Empty {};
-  static_assert(
-      std::is_base_of<MyTrue<1>, internal::disjunction<MyFalse<0>, MyTrue<1>,
-                                                       Empty>>::value,
-      "");
-
-  // All predicates false: inherits from the last.
-  static_assert(
-      std::is_base_of<MyFalse<2>, internal::disjunction<MyFalse<0>, MyFalse<1>,
-                                                        MyFalse<2>>>::value,
-      "");
-}
-
-TEST(TypeTraits, IsInvocableRV) {
-  struct C {
-    int operator()() const { return 0; }
-    void operator()(int) & {}
-    std::string operator()(int) && { return ""; };
-  };
-
-  // The first overload is callable for const and non-const rvalues and lvalues.
-  // It can be used to obtain an int, cv void, or anything int is convertible
-  // to.
-  static_assert(internal::is_callable_r<int, C>::value, "");
-  static_assert(internal::is_callable_r<int, C&>::value, "");
-  static_assert(internal::is_callable_r<int, const C>::value, "");
-  static_assert(internal::is_callable_r<int, const C&>::value, "");
-
-  static_assert(internal::is_callable_r<void, C>::value, "");
-  static_assert(internal::is_callable_r<const volatile void, C>::value, "");
-  static_assert(internal::is_callable_r<char, C>::value, "");
-
-  // It's possible to provide an int. If it's given to an lvalue, the result is
-  // void. Otherwise it is std::string (which is also treated as allowed for a
-  // void result type).
-  static_assert(internal::is_callable_r<void, C&, int>::value, "");
-  static_assert(!internal::is_callable_r<int, C&, int>::value, "");
-  static_assert(!internal::is_callable_r<std::string, C&, int>::value, "");
-  static_assert(!internal::is_callable_r<void, const C&, int>::value, "");
-
-  static_assert(internal::is_callable_r<std::string, C, int>::value, "");
-  static_assert(internal::is_callable_r<void, C, int>::value, "");
-  static_assert(!internal::is_callable_r<int, C, int>::value, "");
-
-  // It's not possible to provide other arguments.
-  static_assert(!internal::is_callable_r<void, C, std::string>::value, "");
-  static_assert(!internal::is_callable_r<void, C, int, int>::value, "");
-
-  // In C++17 and above, where it's guaranteed that functions can return
-  // non-moveable objects, everything should work fine for non-moveable rsult
-  // types too.
-#if defined(__cplusplus) && __cplusplus >= 201703L
-  {
-    struct NonMoveable {
-      NonMoveable() = default;
-      NonMoveable(NonMoveable&&) = delete;
-    };
-
-    static_assert(!std::is_move_constructible_v<NonMoveable>);
-
-    struct Callable {
-      NonMoveable operator()() { return NonMoveable(); }
-    };
-
-    static_assert(internal::is_callable_r<NonMoveable, Callable>::value);
-    static_assert(internal::is_callable_r<void, Callable>::value);
-    static_assert(
-        internal::is_callable_r<const volatile void, Callable>::value);
-
-    static_assert(!internal::is_callable_r<int, Callable>::value);
-    static_assert(!internal::is_callable_r<NonMoveable, Callable, int>::value);
-  }
-#endif  // C++17 and above
-
-  // Nothing should choke when we try to call other arguments besides directly
-  // callable objects, but they should not show up as callable.
-  static_assert(!internal::is_callable_r<void, int>::value, "");
-  static_assert(!internal::is_callable_r<void, void (C::*)()>::value, "");
-  static_assert(!internal::is_callable_r<void, void (C::*)(), C*>::value, "");
-}
-
-// Tests that BuiltInDefaultValue<T*>::Get() returns NULL.
-TEST(BuiltInDefaultValueTest, IsNullForPointerTypes) {
-  EXPECT_TRUE(BuiltInDefaultValue<int*>::Get() == nullptr);
-  EXPECT_TRUE(BuiltInDefaultValue<const char*>::Get() == nullptr);
-  EXPECT_TRUE(BuiltInDefaultValue<void*>::Get() == nullptr);
-}
-
-// Tests that BuiltInDefaultValue<T*>::Exists() return true.
-TEST(BuiltInDefaultValueTest, ExistsForPointerTypes) {
-  EXPECT_TRUE(BuiltInDefaultValue<int*>::Exists());
-  EXPECT_TRUE(BuiltInDefaultValue<const char*>::Exists());
-  EXPECT_TRUE(BuiltInDefaultValue<void*>::Exists());
-}
-
-// Tests that BuiltInDefaultValue<T>::Get() returns 0 when T is a
-// built-in numeric type.
-TEST(BuiltInDefaultValueTest, IsZeroForNumericTypes) {
-  EXPECT_EQ(0U, BuiltInDefaultValue<unsigned char>::Get());
-  EXPECT_EQ(0, BuiltInDefaultValue<signed char>::Get());
-  EXPECT_EQ(0, BuiltInDefaultValue<char>::Get());
-#if GMOCK_WCHAR_T_IS_NATIVE_
-#if !defined(__WCHAR_UNSIGNED__)
-  EXPECT_EQ(0, BuiltInDefaultValue<wchar_t>::Get());
-#else
-  EXPECT_EQ(0U, BuiltInDefaultValue<wchar_t>::Get());
-#endif
-#endif
-  EXPECT_EQ(0U, BuiltInDefaultValue<unsigned short>::Get());  // NOLINT
-  EXPECT_EQ(0, BuiltInDefaultValue<signed short>::Get());     // NOLINT
-  EXPECT_EQ(0, BuiltInDefaultValue<short>::Get());            // NOLINT
-  EXPECT_EQ(0U, BuiltInDefaultValue<unsigned int>::Get());
-  EXPECT_EQ(0, BuiltInDefaultValue<signed int>::Get());
-  EXPECT_EQ(0, BuiltInDefaultValue<int>::Get());
-  EXPECT_EQ(0U, BuiltInDefaultValue<unsigned long>::Get());       // NOLINT
-  EXPECT_EQ(0, BuiltInDefaultValue<signed long>::Get());          // NOLINT
-  EXPECT_EQ(0, BuiltInDefaultValue<long>::Get());                 // NOLINT
-  EXPECT_EQ(0U, BuiltInDefaultValue<unsigned long long>::Get());  // NOLINT
-  EXPECT_EQ(0, BuiltInDefaultValue<signed long long>::Get());     // NOLINT
-  EXPECT_EQ(0, BuiltInDefaultValue<long long>::Get());            // NOLINT
-  EXPECT_EQ(0, BuiltInDefaultValue<float>::Get());
-  EXPECT_EQ(0, BuiltInDefaultValue<double>::Get());
-}
-
-// Tests that BuiltInDefaultValue<T>::Exists() returns true when T is a
-// built-in numeric type.
-TEST(BuiltInDefaultValueTest, ExistsForNumericTypes) {
-  EXPECT_TRUE(BuiltInDefaultValue<unsigned char>::Exists());
-  EXPECT_TRUE(BuiltInDefaultValue<signed char>::Exists());
-  EXPECT_TRUE(BuiltInDefaultValue<char>::Exists());
-#if GMOCK_WCHAR_T_IS_NATIVE_
-  EXPECT_TRUE(BuiltInDefaultValue<wchar_t>::Exists());
-#endif
-  EXPECT_TRUE(BuiltInDefaultValue<unsigned short>::Exists());  // NOLINT
-  EXPECT_TRUE(BuiltInDefaultValue<signed short>::Exists());    // NOLINT
-  EXPECT_TRUE(BuiltInDefaultValue<short>::Exists());           // NOLINT
-  EXPECT_TRUE(BuiltInDefaultValue<unsigned int>::Exists());
-  EXPECT_TRUE(BuiltInDefaultValue<signed int>::Exists());
-  EXPECT_TRUE(BuiltInDefaultValue<int>::Exists());
-  EXPECT_TRUE(BuiltInDefaultValue<unsigned long>::Exists());       // NOLINT
-  EXPECT_TRUE(BuiltInDefaultValue<signed long>::Exists());         // NOLINT
-  EXPECT_TRUE(BuiltInDefaultValue<long>::Exists());                // NOLINT
-  EXPECT_TRUE(BuiltInDefaultValue<unsigned long long>::Exists());  // NOLINT
-  EXPECT_TRUE(BuiltInDefaultValue<signed long long>::Exists());    // NOLINT
-  EXPECT_TRUE(BuiltInDefaultValue<long long>::Exists());           // NOLINT
-  EXPECT_TRUE(BuiltInDefaultValue<float>::Exists());
-  EXPECT_TRUE(BuiltInDefaultValue<double>::Exists());
-}
-
-// Tests that BuiltInDefaultValue<bool>::Get() returns false.
-TEST(BuiltInDefaultValueTest, IsFalseForBool) {
-  EXPECT_FALSE(BuiltInDefaultValue<bool>::Get());
-}
-
-// Tests that BuiltInDefaultValue<bool>::Exists() returns true.
-TEST(BuiltInDefaultValueTest, BoolExists) {
-  EXPECT_TRUE(BuiltInDefaultValue<bool>::Exists());
-}
-
-// Tests that BuiltInDefaultValue<T>::Get() returns "" when T is a
-// string type.
-TEST(BuiltInDefaultValueTest, IsEmptyStringForString) {
-  EXPECT_EQ("", BuiltInDefaultValue<::std::string>::Get());
-}
-
-// Tests that BuiltInDefaultValue<T>::Exists() returns true when T is a
-// string type.
-TEST(BuiltInDefaultValueTest, ExistsForString) {
-  EXPECT_TRUE(BuiltInDefaultValue<::std::string>::Exists());
-}
-
-// Tests that BuiltInDefaultValue<const T>::Get() returns the same
-// value as BuiltInDefaultValue<T>::Get() does.
-TEST(BuiltInDefaultValueTest, WorksForConstTypes) {
-  EXPECT_EQ("", BuiltInDefaultValue<const std::string>::Get());
-  EXPECT_EQ(0, BuiltInDefaultValue<const int>::Get());
-  EXPECT_TRUE(BuiltInDefaultValue<char* const>::Get() == nullptr);
-  EXPECT_FALSE(BuiltInDefaultValue<const bool>::Get());
-}
-
-// A type that's default constructible.
-class MyDefaultConstructible {
- public:
-  MyDefaultConstructible() : value_(42) {}
-
-  int value() const { return value_; }
-
- private:
-  int value_;
-};
-
-// A type that's not default constructible.
-class MyNonDefaultConstructible {
- public:
-  // Does not have a default ctor.
-  explicit MyNonDefaultConstructible(int a_value) : value_(a_value) {}
-
-  int value() const { return value_; }
-
- private:
-  int value_;
-};
-
-TEST(BuiltInDefaultValueTest, ExistsForDefaultConstructibleType) {
-  EXPECT_TRUE(BuiltInDefaultValue<MyDefaultConstructible>::Exists());
-}
-
-TEST(BuiltInDefaultValueTest, IsDefaultConstructedForDefaultConstructibleType) {
-  EXPECT_EQ(42, BuiltInDefaultValue<MyDefaultConstructible>::Get().value());
-}
-
-TEST(BuiltInDefaultValueTest, DoesNotExistForNonDefaultConstructibleType) {
-  EXPECT_FALSE(BuiltInDefaultValue<MyNonDefaultConstructible>::Exists());
-}
-
-// Tests that BuiltInDefaultValue<T&>::Get() aborts the program.
-TEST(BuiltInDefaultValueDeathTest, IsUndefinedForReferences) {
-  EXPECT_DEATH_IF_SUPPORTED({ BuiltInDefaultValue<int&>::Get(); }, "");
-  EXPECT_DEATH_IF_SUPPORTED({ BuiltInDefaultValue<const char&>::Get(); }, "");
-}
-
-TEST(BuiltInDefaultValueDeathTest, IsUndefinedForNonDefaultConstructibleType) {
-  EXPECT_DEATH_IF_SUPPORTED(
-      { BuiltInDefaultValue<MyNonDefaultConstructible>::Get(); }, "");
-}
-
-// Tests that DefaultValue<T>::IsSet() is false initially.
-TEST(DefaultValueTest, IsInitiallyUnset) {
-  EXPECT_FALSE(DefaultValue<int>::IsSet());
-  EXPECT_FALSE(DefaultValue<MyDefaultConstructible>::IsSet());
-  EXPECT_FALSE(DefaultValue<const MyNonDefaultConstructible>::IsSet());
-}
-
-// Tests that DefaultValue<T> can be set and then unset.
-TEST(DefaultValueTest, CanBeSetAndUnset) {
-  EXPECT_TRUE(DefaultValue<int>::Exists());
-  EXPECT_FALSE(DefaultValue<const MyNonDefaultConstructible>::Exists());
-
-  DefaultValue<int>::Set(1);
-  DefaultValue<const MyNonDefaultConstructible>::Set(
-      MyNonDefaultConstructible(42));
-
-  EXPECT_EQ(1, DefaultValue<int>::Get());
-  EXPECT_EQ(42, DefaultValue<const MyNonDefaultConstructible>::Get().value());
-
-  EXPECT_TRUE(DefaultValue<int>::Exists());
-  EXPECT_TRUE(DefaultValue<const MyNonDefaultConstructible>::Exists());
-
-  DefaultValue<int>::Clear();
-  DefaultValue<const MyNonDefaultConstructible>::Clear();
-
-  EXPECT_FALSE(DefaultValue<int>::IsSet());
-  EXPECT_FALSE(DefaultValue<const MyNonDefaultConstructible>::IsSet());
-
-  EXPECT_TRUE(DefaultValue<int>::Exists());
-  EXPECT_FALSE(DefaultValue<const MyNonDefaultConstructible>::Exists());
-}
-
-// Tests that DefaultValue<T>::Get() returns the
-// BuiltInDefaultValue<T>::Get() when DefaultValue<T>::IsSet() is
-// false.
-TEST(DefaultValueDeathTest, GetReturnsBuiltInDefaultValueWhenUnset) {
-  EXPECT_FALSE(DefaultValue<int>::IsSet());
-  EXPECT_TRUE(DefaultValue<int>::Exists());
-  EXPECT_FALSE(DefaultValue<MyNonDefaultConstructible>::IsSet());
-  EXPECT_FALSE(DefaultValue<MyNonDefaultConstructible>::Exists());
-
-  EXPECT_EQ(0, DefaultValue<int>::Get());
-
-  EXPECT_DEATH_IF_SUPPORTED({ DefaultValue<MyNonDefaultConstructible>::Get(); },
-                            "");
-}
-
-TEST(DefaultValueTest, GetWorksForMoveOnlyIfSet) {
-  EXPECT_TRUE(DefaultValue<std::unique_ptr<int>>::Exists());
-  EXPECT_TRUE(DefaultValue<std::unique_ptr<int>>::Get() == nullptr);
-  DefaultValue<std::unique_ptr<int>>::SetFactory(
-      [] { return std::unique_ptr<int>(new int(42)); });
-  EXPECT_TRUE(DefaultValue<std::unique_ptr<int>>::Exists());
-  std::unique_ptr<int> i = DefaultValue<std::unique_ptr<int>>::Get();
-  EXPECT_EQ(42, *i);
-}
-
-// Tests that DefaultValue<void>::Get() returns void.
-TEST(DefaultValueTest, GetWorksForVoid) { return DefaultValue<void>::Get(); }
-
-// Tests using DefaultValue with a reference type.
-
-// Tests that DefaultValue<T&>::IsSet() is false initially.
-TEST(DefaultValueOfReferenceTest, IsInitiallyUnset) {
-  EXPECT_FALSE(DefaultValue<int&>::IsSet());
-  EXPECT_FALSE(DefaultValue<MyDefaultConstructible&>::IsSet());
-  EXPECT_FALSE(DefaultValue<MyNonDefaultConstructible&>::IsSet());
-}
-
-// Tests that DefaultValue<T&>::Exists is false initially.
-TEST(DefaultValueOfReferenceTest, IsInitiallyNotExisting) {
-  EXPECT_FALSE(DefaultValue<int&>::Exists());
-  EXPECT_FALSE(DefaultValue<MyDefaultConstructible&>::Exists());
-  EXPECT_FALSE(DefaultValue<MyNonDefaultConstructible&>::Exists());
-}
-
-// Tests that DefaultValue<T&> can be set and then unset.
-TEST(DefaultValueOfReferenceTest, CanBeSetAndUnset) {
-  int n = 1;
-  DefaultValue<const int&>::Set(n);
-  MyNonDefaultConstructible x(42);
-  DefaultValue<MyNonDefaultConstructible&>::Set(x);
-
-  EXPECT_TRUE(DefaultValue<const int&>::Exists());
-  EXPECT_TRUE(DefaultValue<MyNonDefaultConstructible&>::Exists());
-
-  EXPECT_EQ(&n, &(DefaultValue<const int&>::Get()));
-  EXPECT_EQ(&x, &(DefaultValue<MyNonDefaultConstructible&>::Get()));
-
-  DefaultValue<const int&>::Clear();
-  DefaultValue<MyNonDefaultConstructible&>::Clear();
-
-  EXPECT_FALSE(DefaultValue<const int&>::Exists());
-  EXPECT_FALSE(DefaultValue<MyNonDefaultConstructible&>::Exists());
-
-  EXPECT_FALSE(DefaultValue<const int&>::IsSet());
-  EXPECT_FALSE(DefaultValue<MyNonDefaultConstructible&>::IsSet());
-}
-
-// Tests that DefaultValue<T&>::Get() returns the
-// BuiltInDefaultValue<T&>::Get() when DefaultValue<T&>::IsSet() is
-// false.
-TEST(DefaultValueOfReferenceDeathTest, GetReturnsBuiltInDefaultValueWhenUnset) {
-  EXPECT_FALSE(DefaultValue<int&>::IsSet());
-  EXPECT_FALSE(DefaultValue<MyNonDefaultConstructible&>::IsSet());
-
-  EXPECT_DEATH_IF_SUPPORTED({ DefaultValue<int&>::Get(); }, "");
-  EXPECT_DEATH_IF_SUPPORTED({ DefaultValue<MyNonDefaultConstructible>::Get(); },
-                            "");
-}
-
-// Tests that ActionInterface can be implemented by defining the
-// Perform method.
-
-typedef int MyGlobalFunction(bool, int);
-
-class MyActionImpl : public ActionInterface<MyGlobalFunction> {
- public:
-  int Perform(const std::tuple<bool, int>& args) override {
-    return std::get<0>(args) ? std::get<1>(args) : 0;
-  }
-};
-
-TEST(ActionInterfaceTest, CanBeImplementedByDefiningPerform) {
-  MyActionImpl my_action_impl;
-  (void)my_action_impl;
-}
-
-TEST(ActionInterfaceTest, MakeAction) {
-  Action<MyGlobalFunction> action = MakeAction(new MyActionImpl);
-
-  // When exercising the Perform() method of Action<F>, we must pass
-  // it a tuple whose size and type are compatible with F's argument
-  // types.  For example, if F is int(), then Perform() takes a
-  // 0-tuple; if F is void(bool, int), then Perform() takes a
-  // std::tuple<bool, int>, and so on.
-  EXPECT_EQ(5, action.Perform(std::make_tuple(true, 5)));
-}
-
-// Tests that Action<F> can be constructed from a pointer to
-// ActionInterface<F>.
-TEST(ActionTest, CanBeConstructedFromActionInterface) {
-  Action<MyGlobalFunction> action(new MyActionImpl);
-}
-
-// Tests that Action<F> delegates actual work to ActionInterface<F>.
-TEST(ActionTest, DelegatesWorkToActionInterface) {
-  const Action<MyGlobalFunction> action(new MyActionImpl);
-
-  EXPECT_EQ(5, action.Perform(std::make_tuple(true, 5)));
-  EXPECT_EQ(0, action.Perform(std::make_tuple(false, 1)));
-}
-
-// Tests that Action<F> can be copied.
-TEST(ActionTest, IsCopyable) {
-  Action<MyGlobalFunction> a1(new MyActionImpl);
-  Action<MyGlobalFunction> a2(a1);  // Tests the copy constructor.
-
-  // a1 should continue to work after being copied from.
-  EXPECT_EQ(5, a1.Perform(std::make_tuple(true, 5)));
-  EXPECT_EQ(0, a1.Perform(std::make_tuple(false, 1)));
-
-  // a2 should work like the action it was copied from.
-  EXPECT_EQ(5, a2.Perform(std::make_tuple(true, 5)));
-  EXPECT_EQ(0, a2.Perform(std::make_tuple(false, 1)));
-
-  a2 = a1;  // Tests the assignment operator.
-
-  // a1 should continue to work after being copied from.
-  EXPECT_EQ(5, a1.Perform(std::make_tuple(true, 5)));
-  EXPECT_EQ(0, a1.Perform(std::make_tuple(false, 1)));
-
-  // a2 should work like the action it was copied from.
-  EXPECT_EQ(5, a2.Perform(std::make_tuple(true, 5)));
-  EXPECT_EQ(0, a2.Perform(std::make_tuple(false, 1)));
-}
-
-// Tests that an Action<From> object can be converted to a
-// compatible Action<To> object.
-
-class IsNotZero : public ActionInterface<bool(int)> {  // NOLINT
- public:
-  bool Perform(const std::tuple<int>& arg) override {
-    return std::get<0>(arg) != 0;
-  }
-};
-
-TEST(ActionTest, CanBeConvertedToOtherActionType) {
-  const Action<bool(int)> a1(new IsNotZero);           // NOLINT
-  const Action<int(char)> a2 = Action<int(char)>(a1);  // NOLINT
-  EXPECT_EQ(1, a2.Perform(std::make_tuple('a')));
-  EXPECT_EQ(0, a2.Perform(std::make_tuple('\0')));
-}
-
-// The following two classes are for testing MakePolymorphicAction().
-
-// Implements a polymorphic action that returns the second of the
-// arguments it receives.
-class ReturnSecondArgumentAction {
- public:
-  // We want to verify that MakePolymorphicAction() can work with a
-  // polymorphic action whose Perform() method template is either
-  // const or not.  This lets us verify the non-const case.
-  template <typename Result, typename ArgumentTuple>
-  Result Perform(const ArgumentTuple& args) {
-    return std::get<1>(args);
-  }
-};
-
-// Implements a polymorphic action that can be used in a nullary
-// function to return 0.
-class ReturnZeroFromNullaryFunctionAction {
- public:
-  // For testing that MakePolymorphicAction() works when the
-  // implementation class' Perform() method template takes only one
-  // template parameter.
-  //
-  // We want to verify that MakePolymorphicAction() can work with a
-  // polymorphic action whose Perform() method template is either
-  // const or not.  This lets us verify the const case.
-  template <typename Result>
-  Result Perform(const std::tuple<>&) const {
-    return 0;
-  }
-};
-
-// These functions verify that MakePolymorphicAction() returns a
-// PolymorphicAction<T> where T is the argument's type.
-
-PolymorphicAction<ReturnSecondArgumentAction> ReturnSecondArgument() {
-  return MakePolymorphicAction(ReturnSecondArgumentAction());
-}
-
-PolymorphicAction<ReturnZeroFromNullaryFunctionAction>
-ReturnZeroFromNullaryFunction() {
-  return MakePolymorphicAction(ReturnZeroFromNullaryFunctionAction());
-}
-
-// Tests that MakePolymorphicAction() turns a polymorphic action
-// implementation class into a polymorphic action.
-TEST(MakePolymorphicActionTest, ConstructsActionFromImpl) {
-  Action<int(bool, int, double)> a1 = ReturnSecondArgument();  // NOLINT
-  EXPECT_EQ(5, a1.Perform(std::make_tuple(false, 5, 2.0)));
-}
-
-// Tests that MakePolymorphicAction() works when the implementation
-// class' Perform() method template has only one template parameter.
-TEST(MakePolymorphicActionTest, WorksWhenPerformHasOneTemplateParameter) {
-  Action<int()> a1 = ReturnZeroFromNullaryFunction();
-  EXPECT_EQ(0, a1.Perform(std::make_tuple()));
-
-  Action<void*()> a2 = ReturnZeroFromNullaryFunction();
-  EXPECT_TRUE(a2.Perform(std::make_tuple()) == nullptr);
-}
-
-// Tests that Return() works as an action for void-returning
-// functions.
-TEST(ReturnTest, WorksForVoid) {
-  const Action<void(int)> ret = Return();  // NOLINT
-  return ret.Perform(std::make_tuple(1));
-}
-
-// Tests that Return(v) returns v.
-TEST(ReturnTest, ReturnsGivenValue) {
-  Action<int()> ret = Return(1);  // NOLINT
-  EXPECT_EQ(1, ret.Perform(std::make_tuple()));
-
-  ret = Return(-5);
-  EXPECT_EQ(-5, ret.Perform(std::make_tuple()));
-}
-
-// Tests that Return("string literal") works.
-TEST(ReturnTest, AcceptsStringLiteral) {
-  Action<const char*()> a1 = Return("Hello");
-  EXPECT_STREQ("Hello", a1.Perform(std::make_tuple()));
-
-  Action<std::string()> a2 = Return("world");
-  EXPECT_EQ("world", a2.Perform(std::make_tuple()));
-}
-
-// Return(x) should work fine when the mock function's return type is a
-// reference-like wrapper for decltype(x), as when x is a std::string and the
-// mock function returns std::string_view.
-TEST(ReturnTest, SupportsReferenceLikeReturnType) {
-  // A reference wrapper for std::vector<int>, implicitly convertible from it.
-  struct Result {
-    const std::vector<int>* v;
-    Result(const std::vector<int>& vec) : v(&vec) {}  // NOLINT
-  };
-
-  // Set up an action for a mock function that returns the reference wrapper
-  // type, initializing it with an actual vector.
-  //
-  // The returned wrapper should be initialized with a copy of that vector
-  // that's embedded within the action itself (which should stay alive as long
-  // as the mock object is alive), rather than e.g. a reference to the temporary
-  // we feed to Return. This should work fine both for WillOnce and
-  // WillRepeatedly.
-  MockFunction<Result()> mock;
-  EXPECT_CALL(mock, Call)
-      .WillOnce(Return(std::vector<int>{17, 19, 23}))
-      .WillRepeatedly(Return(std::vector<int>{29, 31, 37}));
-
-  EXPECT_THAT(mock.AsStdFunction()(),
-              Field(&Result::v, Pointee(ElementsAre(17, 19, 23))));
-
-  EXPECT_THAT(mock.AsStdFunction()(),
-              Field(&Result::v, Pointee(ElementsAre(29, 31, 37))));
-}
-
-TEST(ReturnTest, PrefersConversionOperator) {
-  // Define types In and Out such that:
-  //
-  //  *  In is implicitly convertible to Out.
-  //  *  Out also has an explicit constructor from In.
-  //
-  struct In;
-  struct Out {
-    int x;
-
-    explicit Out(const int val) : x(val) {}
-    explicit Out(const In&) : x(0) {}
-  };
-
-  struct In {
-    operator Out() const { return Out{19}; }  // NOLINT
-  };
-
-  // Assumption check: the C++ language rules are such that a function that
-  // returns Out which uses In a return statement will use the implicit
-  // conversion path rather than the explicit constructor.
-  EXPECT_THAT([]() -> Out { return In(); }(), Field(&Out::x, 19));
-
-  // Return should work the same way: if the mock function's return type is Out
-  // and we feed Return an In value, then the Out should be created through the
-  // implicit conversion path rather than the explicit constructor.
-  MockFunction<Out()> mock;
-  EXPECT_CALL(mock, Call).WillOnce(Return(In()));
-  EXPECT_THAT(mock.AsStdFunction()(), Field(&Out::x, 19));
-}
-
-// It should be possible to use Return(R) with a mock function result type U
-// that is convertible from const R& but *not* R (such as
-// std::reference_wrapper). This should work for both WillOnce and
-// WillRepeatedly.
-TEST(ReturnTest, ConversionRequiresConstLvalueReference) {
-  using R = int;
-  using U = std::reference_wrapper<const int>;
-
-  static_assert(std::is_convertible<const R&, U>::value, "");
-  static_assert(!std::is_convertible<R, U>::value, "");
-
-  MockFunction<U()> mock;
-  EXPECT_CALL(mock, Call).WillOnce(Return(17)).WillRepeatedly(Return(19));
-
-  EXPECT_EQ(17, mock.AsStdFunction()());
-  EXPECT_EQ(19, mock.AsStdFunction()());
-}
-
-// Return(x) should not be usable with a mock function result type that's
-// implicitly convertible from decltype(x) but requires a non-const lvalue
-// reference to the input. It doesn't make sense for the conversion operator to
-// modify the input.
-TEST(ReturnTest, ConversionRequiresMutableLvalueReference) {
-  // Set up a type that is implicitly convertible from std::string&, but not
-  // std::string&& or `const std::string&`.
-  //
-  // Avoid asserting about conversion from std::string on MSVC, which seems to
-  // implement std::is_convertible incorrectly in this case.
-  struct S {
-    S(std::string&) {}  // NOLINT
-  };
-
-  static_assert(std::is_convertible<std::string&, S>::value, "");
-#ifndef _MSC_VER
-  static_assert(!std::is_convertible<std::string&&, S>::value, "");
-#endif
-  static_assert(!std::is_convertible<const std::string&, S>::value, "");
-
-  // It shouldn't be possible to use the result of Return(std::string) in a
-  // context where an S is needed.
-  //
-  // Here too we disable the assertion for MSVC, since its incorrect
-  // implementation of is_convertible causes our SFINAE to be wrong.
-  using RA = decltype(Return(std::string()));
-
-  static_assert(!std::is_convertible<RA, Action<S()>>::value, "");
-#ifndef _MSC_VER
-  static_assert(!std::is_convertible<RA, OnceAction<S()>>::value, "");
-#endif
-}
-
-TEST(ReturnTest, MoveOnlyResultType) {
-  // Return should support move-only result types when used with WillOnce.
-  {
-    MockFunction<std::unique_ptr<int>()> mock;
-    EXPECT_CALL(mock, Call)
-        // NOLINTNEXTLINE
-        .WillOnce(Return(std::unique_ptr<int>(new int(17))));
-
-    EXPECT_THAT(mock.AsStdFunction()(), Pointee(17));
-  }
-
-  // The result of Return should not be convertible to Action (so it can't be
-  // used with WillRepeatedly).
-  static_assert(!std::is_convertible<decltype(Return(std::unique_ptr<int>())),
-                                     Action<std::unique_ptr<int>()>>::value,
-                "");
-}
-
-// Tests that Return(v) is covariant.
-
-struct Base {
-  bool operator==(const Base&) { return true; }
-};
-
-struct Derived : public Base {
-  bool operator==(const Derived&) { return true; }
-};
-
-TEST(ReturnTest, IsCovariant) {
-  Base base;
-  Derived derived;
-  Action<Base*()> ret = Return(&base);
-  EXPECT_EQ(&base, ret.Perform(std::make_tuple()));
-
-  ret = Return(&derived);
-  EXPECT_EQ(&derived, ret.Perform(std::make_tuple()));
-}
-
-// Tests that the type of the value passed into Return is converted into T
-// when the action is cast to Action<T(...)> rather than when the action is
-// performed. See comments on testing::internal::ReturnAction in
-// gmock-actions.h for more information.
-class FromType {
- public:
-  explicit FromType(bool* is_converted) : converted_(is_converted) {}
-  bool* converted() const { return converted_; }
-
- private:
-  bool* const converted_;
-};
-
-class ToType {
- public:
-  // Must allow implicit conversion due to use in ImplicitCast_<T>.
-  ToType(const FromType& x) { *x.converted() = true; }  // NOLINT
-};
-
-TEST(ReturnTest, ConvertsArgumentWhenConverted) {
-  bool converted = false;
-  FromType x(&converted);
-  Action<ToType()> action(Return(x));
-  EXPECT_TRUE(converted) << "Return must convert its argument in its own "
-                         << "conversion operator.";
-  converted = false;
-  action.Perform(std::tuple<>());
-  EXPECT_FALSE(converted) << "Action must NOT convert its argument "
-                          << "when performed.";
-}
-
-// Tests that ReturnNull() returns NULL in a pointer-returning function.
-TEST(ReturnNullTest, WorksInPointerReturningFunction) {
-  const Action<int*()> a1 = ReturnNull();
-  EXPECT_TRUE(a1.Perform(std::make_tuple()) == nullptr);
-
-  const Action<const char*(bool)> a2 = ReturnNull();  // NOLINT
-  EXPECT_TRUE(a2.Perform(std::make_tuple(true)) == nullptr);
-}
-
-// Tests that ReturnNull() returns NULL for shared_ptr and unique_ptr returning
-// functions.
-TEST(ReturnNullTest, WorksInSmartPointerReturningFunction) {
-  const Action<std::unique_ptr<const int>()> a1 = ReturnNull();
-  EXPECT_TRUE(a1.Perform(std::make_tuple()) == nullptr);
-
-  const Action<std::shared_ptr<int>(std::string)> a2 = ReturnNull();
-  EXPECT_TRUE(a2.Perform(std::make_tuple("foo")) == nullptr);
-}
-
-// Tests that ReturnRef(v) works for reference types.
-TEST(ReturnRefTest, WorksForReference) {
-  const int n = 0;
-  const Action<const int&(bool)> ret = ReturnRef(n);  // NOLINT
-
-  EXPECT_EQ(&n, &ret.Perform(std::make_tuple(true)));
-}
-
-// Tests that ReturnRef(v) is covariant.
-TEST(ReturnRefTest, IsCovariant) {
-  Base base;
-  Derived derived;
-  Action<Base&()> a = ReturnRef(base);
-  EXPECT_EQ(&base, &a.Perform(std::make_tuple()));
-
-  a = ReturnRef(derived);
-  EXPECT_EQ(&derived, &a.Perform(std::make_tuple()));
-}
-
-template <typename T, typename = decltype(ReturnRef(std::declval<T&&>()))>
-bool CanCallReturnRef(T&&) {
-  return true;
-}
-bool CanCallReturnRef(Unused) { return false; }
-
-// Tests that ReturnRef(v) is working with non-temporaries (T&)
-TEST(ReturnRefTest, WorksForNonTemporary) {
-  int scalar_value = 123;
-  EXPECT_TRUE(CanCallReturnRef(scalar_value));
-
-  std::string non_scalar_value("ABC");
-  EXPECT_TRUE(CanCallReturnRef(non_scalar_value));
-
-  const int const_scalar_value{321};
-  EXPECT_TRUE(CanCallReturnRef(const_scalar_value));
-
-  const std::string const_non_scalar_value("CBA");
-  EXPECT_TRUE(CanCallReturnRef(const_non_scalar_value));
-}
-
-// Tests that ReturnRef(v) is not working with temporaries (T&&)
-TEST(ReturnRefTest, DoesNotWorkForTemporary) {
-  auto scalar_value = []() -> int { return 123; };
-  EXPECT_FALSE(CanCallReturnRef(scalar_value()));
-
-  auto non_scalar_value = []() -> std::string { return "ABC"; };
-  EXPECT_FALSE(CanCallReturnRef(non_scalar_value()));
-
-  // cannot use here callable returning "const scalar type",
-  // because such const for scalar return type is ignored
-  EXPECT_FALSE(CanCallReturnRef(static_cast<const int>(321)));
-
-  auto const_non_scalar_value = []() -> const std::string { return "CBA"; };
-  EXPECT_FALSE(CanCallReturnRef(const_non_scalar_value()));
-}
-
-// Tests that ReturnRefOfCopy(v) works for reference types.
-TEST(ReturnRefOfCopyTest, WorksForReference) {
-  int n = 42;
-  const Action<const int&()> ret = ReturnRefOfCopy(n);
-
-  EXPECT_NE(&n, &ret.Perform(std::make_tuple()));
-  EXPECT_EQ(42, ret.Perform(std::make_tuple()));
-
-  n = 43;
-  EXPECT_NE(&n, &ret.Perform(std::make_tuple()));
-  EXPECT_EQ(42, ret.Perform(std::make_tuple()));
-}
-
-// Tests that ReturnRefOfCopy(v) is covariant.
-TEST(ReturnRefOfCopyTest, IsCovariant) {
-  Base base;
-  Derived derived;
-  Action<Base&()> a = ReturnRefOfCopy(base);
-  EXPECT_NE(&base, &a.Perform(std::make_tuple()));
-
-  a = ReturnRefOfCopy(derived);
-  EXPECT_NE(&derived, &a.Perform(std::make_tuple()));
-}
-
-// Tests that ReturnRoundRobin(v) works with initializer lists
-TEST(ReturnRoundRobinTest, WorksForInitList) {
-  Action<int()> ret = ReturnRoundRobin({1, 2, 3});
-
-  EXPECT_EQ(1, ret.Perform(std::make_tuple()));
-  EXPECT_EQ(2, ret.Perform(std::make_tuple()));
-  EXPECT_EQ(3, ret.Perform(std::make_tuple()));
-  EXPECT_EQ(1, ret.Perform(std::make_tuple()));
-  EXPECT_EQ(2, ret.Perform(std::make_tuple()));
-  EXPECT_EQ(3, ret.Perform(std::make_tuple()));
-}
-
-// Tests that ReturnRoundRobin(v) works with vectors
-TEST(ReturnRoundRobinTest, WorksForVector) {
-  std::vector<double> v = {4.4, 5.5, 6.6};
-  Action<double()> ret = ReturnRoundRobin(v);
-
-  EXPECT_EQ(4.4, ret.Perform(std::make_tuple()));
-  EXPECT_EQ(5.5, ret.Perform(std::make_tuple()));
-  EXPECT_EQ(6.6, ret.Perform(std::make_tuple()));
-  EXPECT_EQ(4.4, ret.Perform(std::make_tuple()));
-  EXPECT_EQ(5.5, ret.Perform(std::make_tuple()));
-  EXPECT_EQ(6.6, ret.Perform(std::make_tuple()));
-}
-
-// Tests that DoDefault() does the default action for the mock method.
-
-class MockClass {
- public:
-  MockClass() {}
-
-  MOCK_METHOD1(IntFunc, int(bool flag));  // NOLINT
-  MOCK_METHOD0(Foo, MyNonDefaultConstructible());
-  MOCK_METHOD0(MakeUnique, std::unique_ptr<int>());
-  MOCK_METHOD0(MakeUniqueBase, std::unique_ptr<Base>());
-  MOCK_METHOD0(MakeVectorUnique, std::vector<std::unique_ptr<int>>());
-  MOCK_METHOD1(TakeUnique, int(std::unique_ptr<int>));
-  MOCK_METHOD2(TakeUnique,
-               int(const std::unique_ptr<int>&, std::unique_ptr<int>));
-
- private:
-  MockClass(const MockClass&) = delete;
-  MockClass& operator=(const MockClass&) = delete;
-};
-
-// Tests that DoDefault() returns the built-in default value for the
-// return type by default.
-TEST(DoDefaultTest, ReturnsBuiltInDefaultValueByDefault) {
-  MockClass mock;
-  EXPECT_CALL(mock, IntFunc(_)).WillOnce(DoDefault());
-  EXPECT_EQ(0, mock.IntFunc(true));
-}
-
-// Tests that DoDefault() throws (when exceptions are enabled) or aborts
-// the process when there is no built-in default value for the return type.
-TEST(DoDefaultDeathTest, DiesForUnknowType) {
-  MockClass mock;
-  EXPECT_CALL(mock, Foo()).WillRepeatedly(DoDefault());
-#if GTEST_HAS_EXCEPTIONS
-  EXPECT_ANY_THROW(mock.Foo());
-#else
-  EXPECT_DEATH_IF_SUPPORTED({ mock.Foo(); }, "");
-#endif
-}
-
-// Tests that using DoDefault() inside a composite action leads to a
-// run-time error.
-
-void VoidFunc(bool /* flag */) {}
-
-TEST(DoDefaultDeathTest, DiesIfUsedInCompositeAction) {
-  MockClass mock;
-  EXPECT_CALL(mock, IntFunc(_))
-      .WillRepeatedly(DoAll(Invoke(VoidFunc), DoDefault()));
-
-  // Ideally we should verify the error message as well.  Sadly,
-  // EXPECT_DEATH() can only capture stderr, while Google Mock's
-  // errors are printed on stdout.  Therefore we have to settle for
-  // not verifying the message.
-  EXPECT_DEATH_IF_SUPPORTED({ mock.IntFunc(true); }, "");
-}
-
-// Tests that DoDefault() returns the default value set by
-// DefaultValue<T>::Set() when it's not overridden by an ON_CALL().
-TEST(DoDefaultTest, ReturnsUserSpecifiedPerTypeDefaultValueWhenThereIsOne) {
-  DefaultValue<int>::Set(1);
-  MockClass mock;
-  EXPECT_CALL(mock, IntFunc(_)).WillOnce(DoDefault());
-  EXPECT_EQ(1, mock.IntFunc(false));
-  DefaultValue<int>::Clear();
-}
-
-// Tests that DoDefault() does the action specified by ON_CALL().
-TEST(DoDefaultTest, DoesWhatOnCallSpecifies) {
-  MockClass mock;
-  ON_CALL(mock, IntFunc(_)).WillByDefault(Return(2));
-  EXPECT_CALL(mock, IntFunc(_)).WillOnce(DoDefault());
-  EXPECT_EQ(2, mock.IntFunc(false));
-}
-
-// Tests that using DoDefault() in ON_CALL() leads to a run-time failure.
-TEST(DoDefaultTest, CannotBeUsedInOnCall) {
-  MockClass mock;
-  EXPECT_NONFATAL_FAILURE(
-      {  // NOLINT
-        ON_CALL(mock, IntFunc(_)).WillByDefault(DoDefault());
-      },
-      "DoDefault() cannot be used in ON_CALL()");
-}
-
-// Tests that SetArgPointee<N>(v) sets the variable pointed to by
-// the N-th (0-based) argument to v.
-TEST(SetArgPointeeTest, SetsTheNthPointee) {
-  typedef void MyFunction(bool, int*, char*);
-  Action<MyFunction> a = SetArgPointee<1>(2);
-
-  int n = 0;
-  char ch = '\0';
-  a.Perform(std::make_tuple(true, &n, &ch));
-  EXPECT_EQ(2, n);
-  EXPECT_EQ('\0', ch);
-
-  a = SetArgPointee<2>('a');
-  n = 0;
-  ch = '\0';
-  a.Perform(std::make_tuple(true, &n, &ch));
-  EXPECT_EQ(0, n);
-  EXPECT_EQ('a', ch);
-}
-
-// Tests that SetArgPointee<N>() accepts a string literal.
-TEST(SetArgPointeeTest, AcceptsStringLiteral) {
-  typedef void MyFunction(std::string*, const char**);
-  Action<MyFunction> a = SetArgPointee<0>("hi");
-  std::string str;
-  const char* ptr = nullptr;
-  a.Perform(std::make_tuple(&str, &ptr));
-  EXPECT_EQ("hi", str);
-  EXPECT_TRUE(ptr == nullptr);
-
-  a = SetArgPointee<1>("world");
-  str = "";
-  a.Perform(std::make_tuple(&str, &ptr));
-  EXPECT_EQ("", str);
-  EXPECT_STREQ("world", ptr);
-}
-
-TEST(SetArgPointeeTest, AcceptsWideStringLiteral) {
-  typedef void MyFunction(const wchar_t**);
-  Action<MyFunction> a = SetArgPointee<0>(L"world");
-  const wchar_t* ptr = nullptr;
-  a.Perform(std::make_tuple(&ptr));
-  EXPECT_STREQ(L"world", ptr);
-
-#if GTEST_HAS_STD_WSTRING
-
-  typedef void MyStringFunction(std::wstring*);
-  Action<MyStringFunction> a2 = SetArgPointee<0>(L"world");
-  std::wstring str = L"";
-  a2.Perform(std::make_tuple(&str));
-  EXPECT_EQ(L"world", str);
-
-#endif
-}
-
-// Tests that SetArgPointee<N>() accepts a char pointer.
-TEST(SetArgPointeeTest, AcceptsCharPointer) {
-  typedef void MyFunction(bool, std::string*, const char**);
-  const char* const hi = "hi";
-  Action<MyFunction> a = SetArgPointee<1>(hi);
-  std::string str;
-  const char* ptr = nullptr;
-  a.Perform(std::make_tuple(true, &str, &ptr));
-  EXPECT_EQ("hi", str);
-  EXPECT_TRUE(ptr == nullptr);
-
-  char world_array[] = "world";
-  char* const world = world_array;
-  a = SetArgPointee<2>(world);
-  str = "";
-  a.Perform(std::make_tuple(true, &str, &ptr));
-  EXPECT_EQ("", str);
-  EXPECT_EQ(world, ptr);
-}
-
-TEST(SetArgPointeeTest, AcceptsWideCharPointer) {
-  typedef void MyFunction(bool, const wchar_t**);
-  const wchar_t* const hi = L"hi";
-  Action<MyFunction> a = SetArgPointee<1>(hi);
-  const wchar_t* ptr = nullptr;
-  a.Perform(std::make_tuple(true, &ptr));
-  EXPECT_EQ(hi, ptr);
-
-#if GTEST_HAS_STD_WSTRING
-
-  typedef void MyStringFunction(bool, std::wstring*);
-  wchar_t world_array[] = L"world";
-  wchar_t* const world = world_array;
-  Action<MyStringFunction> a2 = SetArgPointee<1>(world);
-  std::wstring str;
-  a2.Perform(std::make_tuple(true, &str));
-  EXPECT_EQ(world_array, str);
-#endif
-}
-
-// Tests that SetArgumentPointee<N>(v) sets the variable pointed to by
-// the N-th (0-based) argument to v.
-TEST(SetArgumentPointeeTest, SetsTheNthPointee) {
-  typedef void MyFunction(bool, int*, char*);
-  Action<MyFunction> a = SetArgumentPointee<1>(2);
-
-  int n = 0;
-  char ch = '\0';
-  a.Perform(std::make_tuple(true, &n, &ch));
-  EXPECT_EQ(2, n);
-  EXPECT_EQ('\0', ch);
-
-  a = SetArgumentPointee<2>('a');
-  n = 0;
-  ch = '\0';
-  a.Perform(std::make_tuple(true, &n, &ch));
-  EXPECT_EQ(0, n);
-  EXPECT_EQ('a', ch);
-}
-
-// Sample functions and functors for testing Invoke() and etc.
-int Nullary() { return 1; }
-
-class NullaryFunctor {
- public:
-  int operator()() { return 2; }
-};
-
-bool g_done = false;
-void VoidNullary() { g_done = true; }
-
-class VoidNullaryFunctor {
- public:
-  void operator()() { g_done = true; }
-};
-
-short Short(short n) { return n; }  // NOLINT
-char Char(char ch) { return ch; }
-
-const char* CharPtr(const char* s) { return s; }
-
-bool Unary(int x) { return x < 0; }
-
-const char* Binary(const char* input, short n) { return input + n; }  // NOLINT
-
-void VoidBinary(int, char) { g_done = true; }
-
-int Ternary(int x, char y, short z) { return x + y + z; }  // NOLINT
-
-int SumOf4(int a, int b, int c, int d) { return a + b + c + d; }
-
-class Foo {
- public:
-  Foo() : value_(123) {}
-
-  int Nullary() const { return value_; }
-
- private:
-  int value_;
-};
-
-// Tests InvokeWithoutArgs(function).
-TEST(InvokeWithoutArgsTest, Function) {
-  // As an action that takes one argument.
-  Action<int(int)> a = InvokeWithoutArgs(Nullary);  // NOLINT
-  EXPECT_EQ(1, a.Perform(std::make_tuple(2)));
-
-  // As an action that takes two arguments.
-  Action<int(int, double)> a2 = InvokeWithoutArgs(Nullary);  // NOLINT
-  EXPECT_EQ(1, a2.Perform(std::make_tuple(2, 3.5)));
-
-  // As an action that returns void.
-  Action<void(int)> a3 = InvokeWithoutArgs(VoidNullary);  // NOLINT
-  g_done = false;
-  a3.Perform(std::make_tuple(1));
-  EXPECT_TRUE(g_done);
-}
-
-// Tests InvokeWithoutArgs(functor).
-TEST(InvokeWithoutArgsTest, Functor) {
-  // As an action that takes no argument.
-  Action<int()> a = InvokeWithoutArgs(NullaryFunctor());  // NOLINT
-  EXPECT_EQ(2, a.Perform(std::make_tuple()));
-
-  // As an action that takes three arguments.
-  Action<int(int, double, char)> a2 =  // NOLINT
-      InvokeWithoutArgs(NullaryFunctor());
-  EXPECT_EQ(2, a2.Perform(std::make_tuple(3, 3.5, 'a')));
-
-  // As an action that returns void.
-  Action<void()> a3 = InvokeWithoutArgs(VoidNullaryFunctor());
-  g_done = false;
-  a3.Perform(std::make_tuple());
-  EXPECT_TRUE(g_done);
-}
-
-// Tests InvokeWithoutArgs(obj_ptr, method).
-TEST(InvokeWithoutArgsTest, Method) {
-  Foo foo;
-  Action<int(bool, char)> a =  // NOLINT
-      InvokeWithoutArgs(&foo, &Foo::Nullary);
-  EXPECT_EQ(123, a.Perform(std::make_tuple(true, 'a')));
-}
-
-// Tests using IgnoreResult() on a polymorphic action.
-TEST(IgnoreResultTest, PolymorphicAction) {
-  Action<void(int)> a = IgnoreResult(Return(5));  // NOLINT
-  a.Perform(std::make_tuple(1));
-}
-
-// Tests using IgnoreResult() on a monomorphic action.
-
-int ReturnOne() {
-  g_done = true;
-  return 1;
-}
-
-TEST(IgnoreResultTest, MonomorphicAction) {
-  g_done = false;
-  Action<void()> a = IgnoreResult(Invoke(ReturnOne));
-  a.Perform(std::make_tuple());
-  EXPECT_TRUE(g_done);
-}
-
-// Tests using IgnoreResult() on an action that returns a class type.
-
-MyNonDefaultConstructible ReturnMyNonDefaultConstructible(double /* x */) {
-  g_done = true;
-  return MyNonDefaultConstructible(42);
-}
-
-TEST(IgnoreResultTest, ActionReturningClass) {
-  g_done = false;
-  Action<void(int)> a =
-      IgnoreResult(Invoke(ReturnMyNonDefaultConstructible));  // NOLINT
-  a.Perform(std::make_tuple(2));
-  EXPECT_TRUE(g_done);
-}
-
-TEST(AssignTest, Int) {
-  int x = 0;
-  Action<void(int)> a = Assign(&x, 5);
-  a.Perform(std::make_tuple(0));
-  EXPECT_EQ(5, x);
-}
-
-TEST(AssignTest, String) {
-  ::std::string x;
-  Action<void(void)> a = Assign(&x, "Hello, world");
-  a.Perform(std::make_tuple());
-  EXPECT_EQ("Hello, world", x);
-}
-
-TEST(AssignTest, CompatibleTypes) {
-  double x = 0;
-  Action<void(int)> a = Assign(&x, 5);
-  a.Perform(std::make_tuple(0));
-  EXPECT_DOUBLE_EQ(5, x);
-}
-
-// DoAll should support &&-qualified actions when used with WillOnce.
-TEST(DoAll, SupportsRefQualifiedActions) {
-  struct InitialAction {
-    void operator()(const int arg) && { EXPECT_EQ(17, arg); }
-  };
-
-  struct FinalAction {
-    int operator()() && { return 19; }
-  };
-
-  MockFunction<int(int)> mock;
-  EXPECT_CALL(mock, Call).WillOnce(DoAll(InitialAction{}, FinalAction{}));
-  EXPECT_EQ(19, mock.AsStdFunction()(17));
-}
-
-// DoAll should never provide rvalue references to the initial actions. If the
-// mock action itself accepts an rvalue reference or a non-scalar object by
-// value then the final action should receive an rvalue reference, but initial
-// actions should receive only lvalue references.
-TEST(DoAll, ProvidesLvalueReferencesToInitialActions) {
-  struct Obj {};
-
-  // Mock action accepts by value: the initial action should be fed a const
-  // lvalue reference, and the final action an rvalue reference.
-  {
-    struct InitialAction {
-      void operator()(Obj&) const { FAIL() << "Unexpected call"; }
-      void operator()(const Obj&) const {}
-      void operator()(Obj&&) const { FAIL() << "Unexpected call"; }
-      void operator()(const Obj&&) const { FAIL() << "Unexpected call"; }
-    };
-
-    MockFunction<void(Obj)> mock;
-    EXPECT_CALL(mock, Call)
-        .WillOnce(DoAll(InitialAction{}, InitialAction{}, [](Obj&&) {}))
-        .WillRepeatedly(DoAll(InitialAction{}, InitialAction{}, [](Obj&&) {}));
-
-    mock.AsStdFunction()(Obj{});
-    mock.AsStdFunction()(Obj{});
-  }
-
-  // Mock action accepts by const lvalue reference: both actions should receive
-  // a const lvalue reference.
-  {
-    struct InitialAction {
-      void operator()(Obj&) const { FAIL() << "Unexpected call"; }
-      void operator()(const Obj&) const {}
-      void operator()(Obj&&) const { FAIL() << "Unexpected call"; }
-      void operator()(const Obj&&) const { FAIL() << "Unexpected call"; }
-    };
-
-    MockFunction<void(const Obj&)> mock;
-    EXPECT_CALL(mock, Call)
-        .WillOnce(DoAll(InitialAction{}, InitialAction{}, [](const Obj&) {}))
-        .WillRepeatedly(
-            DoAll(InitialAction{}, InitialAction{}, [](const Obj&) {}));
-
-    mock.AsStdFunction()(Obj{});
-    mock.AsStdFunction()(Obj{});
-  }
-
-  // Mock action accepts by non-const lvalue reference: both actions should get
-  // a non-const lvalue reference if they want them.
-  {
-    struct InitialAction {
-      void operator()(Obj&) const {}
-      void operator()(Obj&&) const { FAIL() << "Unexpected call"; }
-    };
-
-    MockFunction<void(Obj&)> mock;
-    EXPECT_CALL(mock, Call)
-        .WillOnce(DoAll(InitialAction{}, InitialAction{}, [](Obj&) {}))
-        .WillRepeatedly(DoAll(InitialAction{}, InitialAction{}, [](Obj&) {}));
-
-    Obj obj;
-    mock.AsStdFunction()(obj);
-    mock.AsStdFunction()(obj);
-  }
-
-  // Mock action accepts by rvalue reference: the initial actions should receive
-  // a non-const lvalue reference if it wants it, and the final action an rvalue
-  // reference.
-  {
-    struct InitialAction {
-      void operator()(Obj&) const {}
-      void operator()(Obj&&) const { FAIL() << "Unexpected call"; }
-    };
-
-    MockFunction<void(Obj &&)> mock;
-    EXPECT_CALL(mock, Call)
-        .WillOnce(DoAll(InitialAction{}, InitialAction{}, [](Obj&&) {}))
-        .WillRepeatedly(DoAll(InitialAction{}, InitialAction{}, [](Obj&&) {}));
-
-    mock.AsStdFunction()(Obj{});
-    mock.AsStdFunction()(Obj{});
-  }
-
-  // &&-qualified initial actions should also be allowed with WillOnce.
-  {
-    struct InitialAction {
-      void operator()(Obj&) && {}
-    };
-
-    MockFunction<void(Obj&)> mock;
-    EXPECT_CALL(mock, Call)
-        .WillOnce(DoAll(InitialAction{}, InitialAction{}, [](Obj&) {}));
-
-    Obj obj;
-    mock.AsStdFunction()(obj);
-  }
-
-  {
-    struct InitialAction {
-      void operator()(Obj&) && {}
-    };
-
-    MockFunction<void(Obj &&)> mock;
-    EXPECT_CALL(mock, Call)
-        .WillOnce(DoAll(InitialAction{}, InitialAction{}, [](Obj&&) {}));
-
-    mock.AsStdFunction()(Obj{});
-  }
-}
-
-// DoAll should support being used with type-erased Action objects, both through
-// WillOnce and WillRepeatedly.
-TEST(DoAll, SupportsTypeErasedActions) {
-  // With only type-erased actions.
-  const Action<void()> initial_action = [] {};
-  const Action<int()> final_action = [] { return 17; };
-
-  MockFunction<int()> mock;
-  EXPECT_CALL(mock, Call)
-      .WillOnce(DoAll(initial_action, initial_action, final_action))
-      .WillRepeatedly(DoAll(initial_action, initial_action, final_action));
-
-  EXPECT_EQ(17, mock.AsStdFunction()());
-
-  // With &&-qualified and move-only final action.
-  {
-    struct FinalAction {
-      FinalAction() = default;
-      FinalAction(FinalAction&&) = default;
-
-      int operator()() && { return 17; }
-    };
-
-    EXPECT_CALL(mock, Call)
-        .WillOnce(DoAll(initial_action, initial_action, FinalAction{}));
-
-    EXPECT_EQ(17, mock.AsStdFunction()());
-  }
-}
-
-// Tests using WithArgs and with an action that takes 1 argument.
-TEST(WithArgsTest, OneArg) {
-  Action<bool(double x, int n)> a = WithArgs<1>(Invoke(Unary));  // NOLINT
-  EXPECT_TRUE(a.Perform(std::make_tuple(1.5, -1)));
-  EXPECT_FALSE(a.Perform(std::make_tuple(1.5, 1)));
-}
-
-// Tests using WithArgs with an action that takes 2 arguments.
-TEST(WithArgsTest, TwoArgs) {
-  Action<const char*(const char* s, double x, short n)> a =  // NOLINT
-      WithArgs<0, 2>(Invoke(Binary));
-  const char s[] = "Hello";
-  EXPECT_EQ(s + 2, a.Perform(std::make_tuple(CharPtr(s), 0.5, Short(2))));
-}
-
-struct ConcatAll {
-  std::string operator()() const { return {}; }
-  template <typename... I>
-  std::string operator()(const char* a, I... i) const {
-    return a + ConcatAll()(i...);
-  }
-};
-
-// Tests using WithArgs with an action that takes 10 arguments.
-TEST(WithArgsTest, TenArgs) {
-  Action<std::string(const char*, const char*, const char*, const char*)> a =
-      WithArgs<0, 1, 2, 3, 2, 1, 0, 1, 2, 3>(Invoke(ConcatAll{}));
-  EXPECT_EQ("0123210123",
-            a.Perform(std::make_tuple(CharPtr("0"), CharPtr("1"), CharPtr("2"),
-                                      CharPtr("3"))));
-}
-
-// Tests using WithArgs with an action that is not Invoke().
-class SubtractAction : public ActionInterface<int(int, int)> {
- public:
-  int Perform(const std::tuple<int, int>& args) override {
-    return std::get<0>(args) - std::get<1>(args);
-  }
-};
-
-TEST(WithArgsTest, NonInvokeAction) {
-  Action<int(const std::string&, int, int)> a =
-      WithArgs<2, 1>(MakeAction(new SubtractAction));
-  std::tuple<std::string, int, int> dummy =
-      std::make_tuple(std::string("hi"), 2, 10);
-  EXPECT_EQ(8, a.Perform(dummy));
-}
-
-// Tests using WithArgs to pass all original arguments in the original order.
-TEST(WithArgsTest, Identity) {
-  Action<int(int x, char y, short z)> a =  // NOLINT
-      WithArgs<0, 1, 2>(Invoke(Ternary));
-  EXPECT_EQ(123, a.Perform(std::make_tuple(100, Char(20), Short(3))));
-}
-
-// Tests using WithArgs with repeated arguments.
-TEST(WithArgsTest, RepeatedArguments) {
-  Action<int(bool, int m, int n)> a =  // NOLINT
-      WithArgs<1, 1, 1, 1>(Invoke(SumOf4));
-  EXPECT_EQ(4, a.Perform(std::make_tuple(false, 1, 10)));
-}
-
-// Tests using WithArgs with reversed argument order.
-TEST(WithArgsTest, ReversedArgumentOrder) {
-  Action<const char*(short n, const char* input)> a =  // NOLINT
-      WithArgs<1, 0>(Invoke(Binary));
-  const char s[] = "Hello";
-  EXPECT_EQ(s + 2, a.Perform(std::make_tuple(Short(2), CharPtr(s))));
-}
-
-// Tests using WithArgs with compatible, but not identical, argument types.
-TEST(WithArgsTest, ArgsOfCompatibleTypes) {
-  Action<long(short x, char y, double z, char c)> a =  // NOLINT
-      WithArgs<0, 1, 3>(Invoke(Ternary));
-  EXPECT_EQ(123,
-            a.Perform(std::make_tuple(Short(100), Char(20), 5.6, Char(3))));
-}
-
-// Tests using WithArgs with an action that returns void.
-TEST(WithArgsTest, VoidAction) {
-  Action<void(double x, char c, int n)> a = WithArgs<2, 1>(Invoke(VoidBinary));
-  g_done = false;
-  a.Perform(std::make_tuple(1.5, 'a', 3));
-  EXPECT_TRUE(g_done);
-}
-
-TEST(WithArgsTest, ReturnReference) {
-  Action<int&(int&, void*)> aa = WithArgs<0>([](int& a) -> int& { return a; });
-  int i = 0;
-  const int& res = aa.Perform(std::forward_as_tuple(i, nullptr));
-  EXPECT_EQ(&i, &res);
-}
-
-TEST(WithArgsTest, InnerActionWithConversion) {
-  Action<Derived*()> inner = [] { return nullptr; };
-
-  MockFunction<Base*(double)> mock;
-  EXPECT_CALL(mock, Call)
-      .WillOnce(WithoutArgs(inner))
-      .WillRepeatedly(WithoutArgs(inner));
-
-  EXPECT_EQ(nullptr, mock.AsStdFunction()(1.1));
-  EXPECT_EQ(nullptr, mock.AsStdFunction()(1.1));
-}
-
-// It should be possible to use an &&-qualified inner action as long as the
-// whole shebang is used as an rvalue with WillOnce.
-TEST(WithArgsTest, RefQualifiedInnerAction) {
-  struct SomeAction {
-    int operator()(const int arg) && {
-      EXPECT_EQ(17, arg);
-      return 19;
-    }
-  };
-
-  MockFunction<int(int, int)> mock;
-  EXPECT_CALL(mock, Call).WillOnce(WithArg<1>(SomeAction{}));
-  EXPECT_EQ(19, mock.AsStdFunction()(0, 17));
-}
-
-#if !GTEST_OS_WINDOWS_MOBILE
-
-class SetErrnoAndReturnTest : public testing::Test {
- protected:
-  void SetUp() override { errno = 0; }
-  void TearDown() override { errno = 0; }
-};
-
-TEST_F(SetErrnoAndReturnTest, Int) {
-  Action<int(void)> a = SetErrnoAndReturn(ENOTTY, -5);
-  EXPECT_EQ(-5, a.Perform(std::make_tuple()));
-  EXPECT_EQ(ENOTTY, errno);
-}
-
-TEST_F(SetErrnoAndReturnTest, Ptr) {
-  int x;
-  Action<int*(void)> a = SetErrnoAndReturn(ENOTTY, &x);
-  EXPECT_EQ(&x, a.Perform(std::make_tuple()));
-  EXPECT_EQ(ENOTTY, errno);
-}
-
-TEST_F(SetErrnoAndReturnTest, CompatibleTypes) {
-  Action<double()> a = SetErrnoAndReturn(EINVAL, 5);
-  EXPECT_DOUBLE_EQ(5.0, a.Perform(std::make_tuple()));
-  EXPECT_EQ(EINVAL, errno);
-}
-
-#endif  // !GTEST_OS_WINDOWS_MOBILE
-
-// Tests ByRef().
-
-// Tests that the result of ByRef() is copyable.
-TEST(ByRefTest, IsCopyable) {
-  const std::string s1 = "Hi";
-  const std::string s2 = "Hello";
-
-  auto ref_wrapper = ByRef(s1);
-  const std::string& r1 = ref_wrapper;
-  EXPECT_EQ(&s1, &r1);
-
-  // Assigns a new value to ref_wrapper.
-  ref_wrapper = ByRef(s2);
-  const std::string& r2 = ref_wrapper;
-  EXPECT_EQ(&s2, &r2);
-
-  auto ref_wrapper1 = ByRef(s1);
-  // Copies ref_wrapper1 to ref_wrapper.
-  ref_wrapper = ref_wrapper1;
-  const std::string& r3 = ref_wrapper;
-  EXPECT_EQ(&s1, &r3);
-}
-
-// Tests using ByRef() on a const value.
-TEST(ByRefTest, ConstValue) {
-  const int n = 0;
-  // int& ref = ByRef(n);  // This shouldn't compile - we have a
-  // negative compilation test to catch it.
-  const int& const_ref = ByRef(n);
-  EXPECT_EQ(&n, &const_ref);
-}
-
-// Tests using ByRef() on a non-const value.
-TEST(ByRefTest, NonConstValue) {
-  int n = 0;
-
-  // ByRef(n) can be used as either an int&,
-  int& ref = ByRef(n);
-  EXPECT_EQ(&n, &ref);
-
-  // or a const int&.
-  const int& const_ref = ByRef(n);
-  EXPECT_EQ(&n, &const_ref);
-}
-
-// Tests explicitly specifying the type when using ByRef().
-TEST(ByRefTest, ExplicitType) {
-  int n = 0;
-  const int& r1 = ByRef<const int>(n);
-  EXPECT_EQ(&n, &r1);
-
-  // ByRef<char>(n);  // This shouldn't compile - we have a negative
-  // compilation test to catch it.
-
-  Derived d;
-  Derived& r2 = ByRef<Derived>(d);
-  EXPECT_EQ(&d, &r2);
-
-  const Derived& r3 = ByRef<const Derived>(d);
-  EXPECT_EQ(&d, &r3);
-
-  Base& r4 = ByRef<Base>(d);
-  EXPECT_EQ(&d, &r4);
-
-  const Base& r5 = ByRef<const Base>(d);
-  EXPECT_EQ(&d, &r5);
-
-  // The following shouldn't compile - we have a negative compilation
-  // test for it.
-  //
-  // Base b;
-  // ByRef<Derived>(b);
-}
-
-// Tests that Google Mock prints expression ByRef(x) as a reference to x.
-TEST(ByRefTest, PrintsCorrectly) {
-  int n = 42;
-  ::std::stringstream expected, actual;
-  testing::internal::UniversalPrinter<const int&>::Print(n, &expected);
-  testing::internal::UniversalPrint(ByRef(n), &actual);
-  EXPECT_EQ(expected.str(), actual.str());
-}
-
-struct UnaryConstructorClass {
-  explicit UnaryConstructorClass(int v) : value(v) {}
-  int value;
-};
-
-// Tests using ReturnNew() with a unary constructor.
-TEST(ReturnNewTest, Unary) {
-  Action<UnaryConstructorClass*()> a = ReturnNew<UnaryConstructorClass>(4000);
-  UnaryConstructorClass* c = a.Perform(std::make_tuple());
-  EXPECT_EQ(4000, c->value);
-  delete c;
-}
-
-TEST(ReturnNewTest, UnaryWorksWhenMockMethodHasArgs) {
-  Action<UnaryConstructorClass*(bool, int)> a =
-      ReturnNew<UnaryConstructorClass>(4000);
-  UnaryConstructorClass* c = a.Perform(std::make_tuple(false, 5));
-  EXPECT_EQ(4000, c->value);
-  delete c;
-}
-
-TEST(ReturnNewTest, UnaryWorksWhenMockMethodReturnsPointerToConst) {
-  Action<const UnaryConstructorClass*()> a =
-      ReturnNew<UnaryConstructorClass>(4000);
-  const UnaryConstructorClass* c = a.Perform(std::make_tuple());
-  EXPECT_EQ(4000, c->value);
-  delete c;
-}
-
-class TenArgConstructorClass {
- public:
-  TenArgConstructorClass(int a1, int a2, int a3, int a4, int a5, int a6, int a7,
-                         int a8, int a9, int a10)
-      : value_(a1 + a2 + a3 + a4 + a5 + a6 + a7 + a8 + a9 + a10) {}
-  int value_;
-};
-
-// Tests using ReturnNew() with a 10-argument constructor.
-TEST(ReturnNewTest, ConstructorThatTakes10Arguments) {
-  Action<TenArgConstructorClass*()> a = ReturnNew<TenArgConstructorClass>(
-      1000000000, 200000000, 30000000, 4000000, 500000, 60000, 7000, 800, 90,
-      0);
-  TenArgConstructorClass* c = a.Perform(std::make_tuple());
-  EXPECT_EQ(1234567890, c->value_);
-  delete c;
-}
-
-std::unique_ptr<int> UniquePtrSource() {
-  return std::unique_ptr<int>(new int(19));
-}
-
-std::vector<std::unique_ptr<int>> VectorUniquePtrSource() {
-  std::vector<std::unique_ptr<int>> out;
-  out.emplace_back(new int(7));
-  return out;
-}
-
-TEST(MockMethodTest, CanReturnMoveOnlyValue_Return) {
-  MockClass mock;
-  std::unique_ptr<int> i(new int(19));
-  EXPECT_CALL(mock, MakeUnique()).WillOnce(Return(ByMove(std::move(i))));
-  EXPECT_CALL(mock, MakeVectorUnique())
-      .WillOnce(Return(ByMove(VectorUniquePtrSource())));
-  Derived* d = new Derived;
-  EXPECT_CALL(mock, MakeUniqueBase())
-      .WillOnce(Return(ByMove(std::unique_ptr<Derived>(d))));
-
-  std::unique_ptr<int> result1 = mock.MakeUnique();
-  EXPECT_EQ(19, *result1);
-
-  std::vector<std::unique_ptr<int>> vresult = mock.MakeVectorUnique();
-  EXPECT_EQ(1u, vresult.size());
-  EXPECT_NE(nullptr, vresult[0]);
-  EXPECT_EQ(7, *vresult[0]);
-
-  std::unique_ptr<Base> result2 = mock.MakeUniqueBase();
-  EXPECT_EQ(d, result2.get());
-}
-
-TEST(MockMethodTest, CanReturnMoveOnlyValue_DoAllReturn) {
-  testing::MockFunction<void()> mock_function;
-  MockClass mock;
-  std::unique_ptr<int> i(new int(19));
-  EXPECT_CALL(mock_function, Call());
-  EXPECT_CALL(mock, MakeUnique())
-      .WillOnce(DoAll(InvokeWithoutArgs(&mock_function,
-                                        &testing::MockFunction<void()>::Call),
-                      Return(ByMove(std::move(i)))));
-
-  std::unique_ptr<int> result1 = mock.MakeUnique();
-  EXPECT_EQ(19, *result1);
-}
-
-TEST(MockMethodTest, CanReturnMoveOnlyValue_Invoke) {
-  MockClass mock;
-
-  // Check default value
-  DefaultValue<std::unique_ptr<int>>::SetFactory(
-      [] { return std::unique_ptr<int>(new int(42)); });
-  EXPECT_EQ(42, *mock.MakeUnique());
-
-  EXPECT_CALL(mock, MakeUnique()).WillRepeatedly(Invoke(UniquePtrSource));
-  EXPECT_CALL(mock, MakeVectorUnique())
-      .WillRepeatedly(Invoke(VectorUniquePtrSource));
-  std::unique_ptr<int> result1 = mock.MakeUnique();
-  EXPECT_EQ(19, *result1);
-  std::unique_ptr<int> result2 = mock.MakeUnique();
-  EXPECT_EQ(19, *result2);
-  EXPECT_NE(result1, result2);
-
-  std::vector<std::unique_ptr<int>> vresult = mock.MakeVectorUnique();
-  EXPECT_EQ(1u, vresult.size());
-  EXPECT_NE(nullptr, vresult[0]);
-  EXPECT_EQ(7, *vresult[0]);
-}
-
-TEST(MockMethodTest, CanTakeMoveOnlyValue) {
-  MockClass mock;
-  auto make = [](int i) { return std::unique_ptr<int>(new int(i)); };
-
-  EXPECT_CALL(mock, TakeUnique(_)).WillRepeatedly([](std::unique_ptr<int> i) {
-    return *i;
-  });
-  // DoAll() does not compile, since it would move from its arguments twice.
-  // EXPECT_CALL(mock, TakeUnique(_, _))
-  //     .WillRepeatedly(DoAll(Invoke([](std::unique_ptr<int> j) {}),
-  //     Return(1)));
-  EXPECT_CALL(mock, TakeUnique(testing::Pointee(7)))
-      .WillOnce(Return(-7))
-      .RetiresOnSaturation();
-  EXPECT_CALL(mock, TakeUnique(testing::IsNull()))
-      .WillOnce(Return(-1))
-      .RetiresOnSaturation();
-
-  EXPECT_EQ(5, mock.TakeUnique(make(5)));
-  EXPECT_EQ(-7, mock.TakeUnique(make(7)));
-  EXPECT_EQ(7, mock.TakeUnique(make(7)));
-  EXPECT_EQ(7, mock.TakeUnique(make(7)));
-  EXPECT_EQ(-1, mock.TakeUnique({}));
-
-  // Some arguments are moved, some passed by reference.
-  auto lvalue = make(6);
-  EXPECT_CALL(mock, TakeUnique(_, _))
-      .WillOnce([](const std::unique_ptr<int>& i, std::unique_ptr<int> j) {
-        return *i * *j;
-      });
-  EXPECT_EQ(42, mock.TakeUnique(lvalue, make(7)));
-
-  // The unique_ptr can be saved by the action.
-  std::unique_ptr<int> saved;
-  EXPECT_CALL(mock, TakeUnique(_)).WillOnce([&saved](std::unique_ptr<int> i) {
-    saved = std::move(i);
-    return 0;
-  });
-  EXPECT_EQ(0, mock.TakeUnique(make(42)));
-  EXPECT_EQ(42, *saved);
-}
-
-// It should be possible to use callables with an &&-qualified call operator
-// with WillOnce, since they will be called only once. This allows actions to
-// contain and manipulate move-only types.
-TEST(MockMethodTest, ActionHasRvalueRefQualifiedCallOperator) {
-  struct Return17 {
-    int operator()() && { return 17; }
-  };
-
-  // Action is directly compatible with mocked function type.
-  {
-    MockFunction<int()> mock;
-    EXPECT_CALL(mock, Call).WillOnce(Return17());
-
-    EXPECT_EQ(17, mock.AsStdFunction()());
-  }
-
-  // Action doesn't want mocked function arguments.
-  {
-    MockFunction<int(int)> mock;
-    EXPECT_CALL(mock, Call).WillOnce(Return17());
-
-    EXPECT_EQ(17, mock.AsStdFunction()(0));
-  }
-}
-
-// Edge case: if an action has both a const-qualified and an &&-qualified call
-// operator, there should be no "ambiguous call" errors. The &&-qualified
-// operator should be used by WillOnce (since it doesn't need to retain the
-// action beyond one call), and the const-qualified one by WillRepeatedly.
-TEST(MockMethodTest, ActionHasMultipleCallOperators) {
-  struct ReturnInt {
-    int operator()() && { return 17; }
-    int operator()() const& { return 19; }
-  };
-
-  // Directly compatible with mocked function type.
-  {
-    MockFunction<int()> mock;
-    EXPECT_CALL(mock, Call).WillOnce(ReturnInt()).WillRepeatedly(ReturnInt());
-
-    EXPECT_EQ(17, mock.AsStdFunction()());
-    EXPECT_EQ(19, mock.AsStdFunction()());
-    EXPECT_EQ(19, mock.AsStdFunction()());
-  }
-
-  // Ignores function arguments.
-  {
-    MockFunction<int(int)> mock;
-    EXPECT_CALL(mock, Call).WillOnce(ReturnInt()).WillRepeatedly(ReturnInt());
-
-    EXPECT_EQ(17, mock.AsStdFunction()(0));
-    EXPECT_EQ(19, mock.AsStdFunction()(0));
-    EXPECT_EQ(19, mock.AsStdFunction()(0));
-  }
-}
-
-// WillOnce should have no problem coping with a move-only action, whether it is
-// &&-qualified or not.
-TEST(MockMethodTest, MoveOnlyAction) {
-  // &&-qualified
-  {
-    struct Return17 {
-      Return17() = default;
-      Return17(Return17&&) = default;
-
-      Return17(const Return17&) = delete;
-      Return17 operator=(const Return17&) = delete;
-
-      int operator()() && { return 17; }
-    };
-
-    MockFunction<int()> mock;
-    EXPECT_CALL(mock, Call).WillOnce(Return17());
-    EXPECT_EQ(17, mock.AsStdFunction()());
-  }
-
-  // Not &&-qualified
-  {
-    struct Return17 {
-      Return17() = default;
-      Return17(Return17&&) = default;
-
-      Return17(const Return17&) = delete;
-      Return17 operator=(const Return17&) = delete;
-
-      int operator()() const { return 17; }
-    };
-
-    MockFunction<int()> mock;
-    EXPECT_CALL(mock, Call).WillOnce(Return17());
-    EXPECT_EQ(17, mock.AsStdFunction()());
-  }
-}
-
-// It should be possible to use an action that returns a value with a mock
-// function that doesn't, both through WillOnce and WillRepeatedly.
-TEST(MockMethodTest, ActionReturnsIgnoredValue) {
-  struct ReturnInt {
-    int operator()() const { return 0; }
-  };
-
-  MockFunction<void()> mock;
-  EXPECT_CALL(mock, Call).WillOnce(ReturnInt()).WillRepeatedly(ReturnInt());
-
-  mock.AsStdFunction()();
-  mock.AsStdFunction()();
-}
-
-// Despite the fanciness around move-only actions and so on, it should still be
-// possible to hand an lvalue reference to a copyable action to WillOnce.
-TEST(MockMethodTest, WillOnceCanAcceptLvalueReference) {
-  MockFunction<int()> mock;
-
-  const auto action = [] { return 17; };
-  EXPECT_CALL(mock, Call).WillOnce(action);
-
-  EXPECT_EQ(17, mock.AsStdFunction()());
-}
-
-// A callable that doesn't use SFINAE to restrict its call operator's overload
-// set, but is still picky about which arguments it will accept.
-struct StaticAssertSingleArgument {
-  template <typename... Args>
-  static constexpr bool CheckArgs() {
-    static_assert(sizeof...(Args) == 1, "");
-    return true;
-  }
-
-  template <typename... Args, bool = CheckArgs<Args...>()>
-  int operator()(Args...) const {
-    return 17;
-  }
-};
-
-// WillOnce and WillRepeatedly should both work fine with naïve implementations
-// of actions that don't use SFINAE to limit the overload set for their call
-// operator. If they are compatible with the actual mocked signature, we
-// shouldn't probe them with no arguments and trip a static_assert.
-TEST(MockMethodTest, ActionSwallowsAllArguments) {
-  MockFunction<int(int)> mock;
-  EXPECT_CALL(mock, Call)
-      .WillOnce(StaticAssertSingleArgument{})
-      .WillRepeatedly(StaticAssertSingleArgument{});
-
-  EXPECT_EQ(17, mock.AsStdFunction()(0));
-  EXPECT_EQ(17, mock.AsStdFunction()(0));
-}
-
-struct ActionWithTemplatedConversionOperators {
-  template <typename... Args>
-  operator OnceAction<int(Args...)>() && {  // NOLINT
-    return [] { return 17; };
-  }
-
-  template <typename... Args>
-  operator Action<int(Args...)>() const {  // NOLINT
-    return [] { return 19; };
-  }
-};
-
-// It should be fine to hand both WillOnce and WillRepeatedly a function that
-// defines templated conversion operators to OnceAction and Action. WillOnce
-// should prefer the OnceAction version.
-TEST(MockMethodTest, ActionHasTemplatedConversionOperators) {
-  MockFunction<int()> mock;
-  EXPECT_CALL(mock, Call)
-      .WillOnce(ActionWithTemplatedConversionOperators{})
-      .WillRepeatedly(ActionWithTemplatedConversionOperators{});
-
-  EXPECT_EQ(17, mock.AsStdFunction()());
-  EXPECT_EQ(19, mock.AsStdFunction()());
-}
-
-// Tests for std::function based action.
-
-int Add(int val, int& ref, int* ptr) {  // NOLINT
-  int result = val + ref + *ptr;
-  ref = 42;
-  *ptr = 43;
-  return result;
-}
-
-int Deref(std::unique_ptr<int> ptr) { return *ptr; }
-
-struct Double {
-  template <typename T>
-  T operator()(T t) {
-    return 2 * t;
-  }
-};
-
-std::unique_ptr<int> UniqueInt(int i) {
-  return std::unique_ptr<int>(new int(i));
-}
-
-TEST(FunctorActionTest, ActionFromFunction) {
-  Action<int(int, int&, int*)> a = &Add;
-  int x = 1, y = 2, z = 3;
-  EXPECT_EQ(6, a.Perform(std::forward_as_tuple(x, y, &z)));
-  EXPECT_EQ(42, y);
-  EXPECT_EQ(43, z);
-
-  Action<int(std::unique_ptr<int>)> a1 = &Deref;
-  EXPECT_EQ(7, a1.Perform(std::make_tuple(UniqueInt(7))));
-}
-
-TEST(FunctorActionTest, ActionFromLambda) {
-  Action<int(bool, int)> a1 = [](bool b, int i) { return b ? i : 0; };
-  EXPECT_EQ(5, a1.Perform(std::make_tuple(true, 5)));
-  EXPECT_EQ(0, a1.Perform(std::make_tuple(false, 5)));
-
-  std::unique_ptr<int> saved;
-  Action<void(std::unique_ptr<int>)> a2 = [&saved](std::unique_ptr<int> p) {
-    saved = std::move(p);
-  };
-  a2.Perform(std::make_tuple(UniqueInt(5)));
-  EXPECT_EQ(5, *saved);
-}
-
-TEST(FunctorActionTest, PolymorphicFunctor) {
-  Action<int(int)> ai = Double();
-  EXPECT_EQ(2, ai.Perform(std::make_tuple(1)));
-  Action<double(double)> ad = Double();  // Double? Double double!
-  EXPECT_EQ(3.0, ad.Perform(std::make_tuple(1.5)));
-}
-
-TEST(FunctorActionTest, TypeConversion) {
-  // Numeric promotions are allowed.
-  const Action<bool(int)> a1 = [](int i) { return i > 1; };
-  const Action<int(bool)> a2 = Action<int(bool)>(a1);
-  EXPECT_EQ(1, a1.Perform(std::make_tuple(42)));
-  EXPECT_EQ(0, a2.Perform(std::make_tuple(42)));
-
-  // Implicit constructors are allowed.
-  const Action<bool(std::string)> s1 = [](std::string s) { return !s.empty(); };
-  const Action<int(const char*)> s2 = Action<int(const char*)>(s1);
-  EXPECT_EQ(0, s2.Perform(std::make_tuple("")));
-  EXPECT_EQ(1, s2.Perform(std::make_tuple("hello")));
-
-  // Also between the lambda and the action itself.
-  const Action<bool(std::string)> x1 = [](Unused) { return 42; };
-  const Action<bool(std::string)> x2 = [] { return 42; };
-  EXPECT_TRUE(x1.Perform(std::make_tuple("hello")));
-  EXPECT_TRUE(x2.Perform(std::make_tuple("hello")));
-
-  // Ensure decay occurs where required.
-  std::function<int()> f = [] { return 7; };
-  Action<int(int)> d = f;
-  f = nullptr;
-  EXPECT_EQ(7, d.Perform(std::make_tuple(1)));
-
-  // Ensure creation of an empty action succeeds.
-  Action<void(int)>(nullptr);
-}
-
-TEST(FunctorActionTest, UnusedArguments) {
-  // Verify that users can ignore uninteresting arguments.
-  Action<int(int, double y, double z)> a = [](int i, Unused, Unused) {
-    return 2 * i;
-  };
-  std::tuple<int, double, double> dummy = std::make_tuple(3, 7.3, 9.44);
-  EXPECT_EQ(6, a.Perform(dummy));
-}
-
-// Test that basic built-in actions work with move-only arguments.
-TEST(MoveOnlyArgumentsTest, ReturningActions) {
-  Action<int(std::unique_ptr<int>)> a = Return(1);
-  EXPECT_EQ(1, a.Perform(std::make_tuple(nullptr)));
-
-  a = testing::WithoutArgs([]() { return 7; });
-  EXPECT_EQ(7, a.Perform(std::make_tuple(nullptr)));
-
-  Action<void(std::unique_ptr<int>, int*)> a2 = testing::SetArgPointee<1>(3);
-  int x = 0;
-  a2.Perform(std::make_tuple(nullptr, &x));
-  EXPECT_EQ(x, 3);
-}
-
-ACTION(ReturnArity) { return std::tuple_size<args_type>::value; }
-
-TEST(ActionMacro, LargeArity) {
-  EXPECT_EQ(
-      1, testing::Action<int(int)>(ReturnArity()).Perform(std::make_tuple(0)));
-  EXPECT_EQ(
-      10,
-      testing::Action<int(int, int, int, int, int, int, int, int, int, int)>(
-          ReturnArity())
-          .Perform(std::make_tuple(0, 1, 2, 3, 4, 5, 6, 7, 8, 9)));
-  EXPECT_EQ(
-      20,
-      testing::Action<int(int, int, int, int, int, int, int, int, int, int, int,
-                          int, int, int, int, int, int, int, int, int)>(
-          ReturnArity())
-          .Perform(std::make_tuple(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,
-                                   14, 15, 16, 17, 18, 19)));
-}
-
-}  // namespace
-}  // namespace testing
diff --git a/3rdparty/googletest-1.13.0/googlemock/test/gmock-cardinalities_test.cc b/3rdparty/googletest-1.13.0/googlemock/test/gmock-cardinalities_test.cc
deleted file mode 100644
index cdd99563538745945c8d69daba4f63b0e0bcdcef..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googlemock/test/gmock-cardinalities_test.cc
+++ /dev/null
@@ -1,422 +0,0 @@
-// Copyright 2007, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// Google Mock - a framework for writing C++ mock classes.
-//
-// This file tests the built-in cardinalities.
-
-#include "gmock/gmock.h"
-#include "gtest/gtest-spi.h"
-#include "gtest/gtest.h"
-
-namespace {
-
-using std::stringstream;
-using testing::AnyNumber;
-using testing::AtLeast;
-using testing::AtMost;
-using testing::Between;
-using testing::Cardinality;
-using testing::CardinalityInterface;
-using testing::Exactly;
-using testing::IsSubstring;
-using testing::MakeCardinality;
-
-class MockFoo {
- public:
-  MockFoo() {}
-  MOCK_METHOD0(Bar, int());  // NOLINT
-
- private:
-  MockFoo(const MockFoo&) = delete;
-  MockFoo& operator=(const MockFoo&) = delete;
-};
-
-// Tests that Cardinality objects can be default constructed.
-TEST(CardinalityTest, IsDefaultConstructable) { Cardinality c; }
-
-// Tests that Cardinality objects are copyable.
-TEST(CardinalityTest, IsCopyable) {
-  // Tests the copy constructor.
-  Cardinality c = Exactly(1);
-  EXPECT_FALSE(c.IsSatisfiedByCallCount(0));
-  EXPECT_TRUE(c.IsSatisfiedByCallCount(1));
-  EXPECT_TRUE(c.IsSaturatedByCallCount(1));
-
-  // Tests the assignment operator.
-  c = Exactly(2);
-  EXPECT_FALSE(c.IsSatisfiedByCallCount(1));
-  EXPECT_TRUE(c.IsSatisfiedByCallCount(2));
-  EXPECT_TRUE(c.IsSaturatedByCallCount(2));
-}
-
-TEST(CardinalityTest, IsOverSaturatedByCallCountWorks) {
-  const Cardinality c = AtMost(5);
-  EXPECT_FALSE(c.IsOverSaturatedByCallCount(4));
-  EXPECT_FALSE(c.IsOverSaturatedByCallCount(5));
-  EXPECT_TRUE(c.IsOverSaturatedByCallCount(6));
-}
-
-// Tests that Cardinality::DescribeActualCallCountTo() creates the
-// correct description.
-TEST(CardinalityTest, CanDescribeActualCallCount) {
-  stringstream ss0;
-  Cardinality::DescribeActualCallCountTo(0, &ss0);
-  EXPECT_EQ("never called", ss0.str());
-
-  stringstream ss1;
-  Cardinality::DescribeActualCallCountTo(1, &ss1);
-  EXPECT_EQ("called once", ss1.str());
-
-  stringstream ss2;
-  Cardinality::DescribeActualCallCountTo(2, &ss2);
-  EXPECT_EQ("called twice", ss2.str());
-
-  stringstream ss3;
-  Cardinality::DescribeActualCallCountTo(3, &ss3);
-  EXPECT_EQ("called 3 times", ss3.str());
-}
-
-// Tests AnyNumber()
-TEST(AnyNumber, Works) {
-  const Cardinality c = AnyNumber();
-  EXPECT_TRUE(c.IsSatisfiedByCallCount(0));
-  EXPECT_FALSE(c.IsSaturatedByCallCount(0));
-
-  EXPECT_TRUE(c.IsSatisfiedByCallCount(1));
-  EXPECT_FALSE(c.IsSaturatedByCallCount(1));
-
-  EXPECT_TRUE(c.IsSatisfiedByCallCount(9));
-  EXPECT_FALSE(c.IsSaturatedByCallCount(9));
-
-  stringstream ss;
-  c.DescribeTo(&ss);
-  EXPECT_PRED_FORMAT2(IsSubstring, "called any number of times", ss.str());
-}
-
-TEST(AnyNumberTest, HasCorrectBounds) {
-  const Cardinality c = AnyNumber();
-  EXPECT_EQ(0, c.ConservativeLowerBound());
-  EXPECT_EQ(INT_MAX, c.ConservativeUpperBound());
-}
-
-// Tests AtLeast(n).
-
-TEST(AtLeastTest, OnNegativeNumber) {
-  EXPECT_NONFATAL_FAILURE(
-      {  // NOLINT
-        AtLeast(-1);
-      },
-      "The invocation lower bound must be >= 0");
-}
-
-TEST(AtLeastTest, OnZero) {
-  const Cardinality c = AtLeast(0);
-  EXPECT_TRUE(c.IsSatisfiedByCallCount(0));
-  EXPECT_FALSE(c.IsSaturatedByCallCount(0));
-
-  EXPECT_TRUE(c.IsSatisfiedByCallCount(1));
-  EXPECT_FALSE(c.IsSaturatedByCallCount(1));
-
-  stringstream ss;
-  c.DescribeTo(&ss);
-  EXPECT_PRED_FORMAT2(IsSubstring, "any number of times", ss.str());
-}
-
-TEST(AtLeastTest, OnPositiveNumber) {
-  const Cardinality c = AtLeast(2);
-  EXPECT_FALSE(c.IsSatisfiedByCallCount(0));
-  EXPECT_FALSE(c.IsSaturatedByCallCount(0));
-
-  EXPECT_FALSE(c.IsSatisfiedByCallCount(1));
-  EXPECT_FALSE(c.IsSaturatedByCallCount(1));
-
-  EXPECT_TRUE(c.IsSatisfiedByCallCount(2));
-  EXPECT_FALSE(c.IsSaturatedByCallCount(2));
-
-  stringstream ss1;
-  AtLeast(1).DescribeTo(&ss1);
-  EXPECT_PRED_FORMAT2(IsSubstring, "at least once", ss1.str());
-
-  stringstream ss2;
-  c.DescribeTo(&ss2);
-  EXPECT_PRED_FORMAT2(IsSubstring, "at least twice", ss2.str());
-
-  stringstream ss3;
-  AtLeast(3).DescribeTo(&ss3);
-  EXPECT_PRED_FORMAT2(IsSubstring, "at least 3 times", ss3.str());
-}
-
-TEST(AtLeastTest, HasCorrectBounds) {
-  const Cardinality c = AtLeast(2);
-  EXPECT_EQ(2, c.ConservativeLowerBound());
-  EXPECT_EQ(INT_MAX, c.ConservativeUpperBound());
-}
-
-// Tests AtMost(n).
-
-TEST(AtMostTest, OnNegativeNumber) {
-  EXPECT_NONFATAL_FAILURE(
-      {  // NOLINT
-        AtMost(-1);
-      },
-      "The invocation upper bound must be >= 0");
-}
-
-TEST(AtMostTest, OnZero) {
-  const Cardinality c = AtMost(0);
-  EXPECT_TRUE(c.IsSatisfiedByCallCount(0));
-  EXPECT_TRUE(c.IsSaturatedByCallCount(0));
-
-  EXPECT_FALSE(c.IsSatisfiedByCallCount(1));
-  EXPECT_TRUE(c.IsSaturatedByCallCount(1));
-
-  stringstream ss;
-  c.DescribeTo(&ss);
-  EXPECT_PRED_FORMAT2(IsSubstring, "never called", ss.str());
-}
-
-TEST(AtMostTest, OnPositiveNumber) {
-  const Cardinality c = AtMost(2);
-  EXPECT_TRUE(c.IsSatisfiedByCallCount(0));
-  EXPECT_FALSE(c.IsSaturatedByCallCount(0));
-
-  EXPECT_TRUE(c.IsSatisfiedByCallCount(1));
-  EXPECT_FALSE(c.IsSaturatedByCallCount(1));
-
-  EXPECT_TRUE(c.IsSatisfiedByCallCount(2));
-  EXPECT_TRUE(c.IsSaturatedByCallCount(2));
-
-  stringstream ss1;
-  AtMost(1).DescribeTo(&ss1);
-  EXPECT_PRED_FORMAT2(IsSubstring, "called at most once", ss1.str());
-
-  stringstream ss2;
-  c.DescribeTo(&ss2);
-  EXPECT_PRED_FORMAT2(IsSubstring, "called at most twice", ss2.str());
-
-  stringstream ss3;
-  AtMost(3).DescribeTo(&ss3);
-  EXPECT_PRED_FORMAT2(IsSubstring, "called at most 3 times", ss3.str());
-}
-
-TEST(AtMostTest, HasCorrectBounds) {
-  const Cardinality c = AtMost(2);
-  EXPECT_EQ(0, c.ConservativeLowerBound());
-  EXPECT_EQ(2, c.ConservativeUpperBound());
-}
-
-// Tests Between(m, n).
-
-TEST(BetweenTest, OnNegativeStart) {
-  EXPECT_NONFATAL_FAILURE(
-      {  // NOLINT
-        Between(-1, 2);
-      },
-      "The invocation lower bound must be >= 0, but is actually -1");
-}
-
-TEST(BetweenTest, OnNegativeEnd) {
-  EXPECT_NONFATAL_FAILURE(
-      {  // NOLINT
-        Between(1, -2);
-      },
-      "The invocation upper bound must be >= 0, but is actually -2");
-}
-
-TEST(BetweenTest, OnStartBiggerThanEnd) {
-  EXPECT_NONFATAL_FAILURE(
-      {  // NOLINT
-        Between(2, 1);
-      },
-      "The invocation upper bound (1) must be >= "
-      "the invocation lower bound (2)");
-}
-
-TEST(BetweenTest, OnZeroStartAndZeroEnd) {
-  const Cardinality c = Between(0, 0);
-
-  EXPECT_TRUE(c.IsSatisfiedByCallCount(0));
-  EXPECT_TRUE(c.IsSaturatedByCallCount(0));
-
-  EXPECT_FALSE(c.IsSatisfiedByCallCount(1));
-  EXPECT_TRUE(c.IsSaturatedByCallCount(1));
-
-  stringstream ss;
-  c.DescribeTo(&ss);
-  EXPECT_PRED_FORMAT2(IsSubstring, "never called", ss.str());
-}
-
-TEST(BetweenTest, OnZeroStartAndNonZeroEnd) {
-  const Cardinality c = Between(0, 2);
-
-  EXPECT_TRUE(c.IsSatisfiedByCallCount(0));
-  EXPECT_FALSE(c.IsSaturatedByCallCount(0));
-
-  EXPECT_TRUE(c.IsSatisfiedByCallCount(2));
-  EXPECT_TRUE(c.IsSaturatedByCallCount(2));
-
-  EXPECT_FALSE(c.IsSatisfiedByCallCount(4));
-  EXPECT_TRUE(c.IsSaturatedByCallCount(4));
-
-  stringstream ss;
-  c.DescribeTo(&ss);
-  EXPECT_PRED_FORMAT2(IsSubstring, "called at most twice", ss.str());
-}
-
-TEST(BetweenTest, OnSameStartAndEnd) {
-  const Cardinality c = Between(3, 3);
-
-  EXPECT_FALSE(c.IsSatisfiedByCallCount(2));
-  EXPECT_FALSE(c.IsSaturatedByCallCount(2));
-
-  EXPECT_TRUE(c.IsSatisfiedByCallCount(3));
-  EXPECT_TRUE(c.IsSaturatedByCallCount(3));
-
-  EXPECT_FALSE(c.IsSatisfiedByCallCount(4));
-  EXPECT_TRUE(c.IsSaturatedByCallCount(4));
-
-  stringstream ss;
-  c.DescribeTo(&ss);
-  EXPECT_PRED_FORMAT2(IsSubstring, "called 3 times", ss.str());
-}
-
-TEST(BetweenTest, OnDifferentStartAndEnd) {
-  const Cardinality c = Between(3, 5);
-
-  EXPECT_FALSE(c.IsSatisfiedByCallCount(2));
-  EXPECT_FALSE(c.IsSaturatedByCallCount(2));
-
-  EXPECT_TRUE(c.IsSatisfiedByCallCount(3));
-  EXPECT_FALSE(c.IsSaturatedByCallCount(3));
-
-  EXPECT_TRUE(c.IsSatisfiedByCallCount(5));
-  EXPECT_TRUE(c.IsSaturatedByCallCount(5));
-
-  EXPECT_FALSE(c.IsSatisfiedByCallCount(6));
-  EXPECT_TRUE(c.IsSaturatedByCallCount(6));
-
-  stringstream ss;
-  c.DescribeTo(&ss);
-  EXPECT_PRED_FORMAT2(IsSubstring, "called between 3 and 5 times", ss.str());
-}
-
-TEST(BetweenTest, HasCorrectBounds) {
-  const Cardinality c = Between(3, 5);
-  EXPECT_EQ(3, c.ConservativeLowerBound());
-  EXPECT_EQ(5, c.ConservativeUpperBound());
-}
-
-// Tests Exactly(n).
-
-TEST(ExactlyTest, OnNegativeNumber) {
-  EXPECT_NONFATAL_FAILURE(
-      {  // NOLINT
-        Exactly(-1);
-      },
-      "The invocation lower bound must be >= 0");
-}
-
-TEST(ExactlyTest, OnZero) {
-  const Cardinality c = Exactly(0);
-  EXPECT_TRUE(c.IsSatisfiedByCallCount(0));
-  EXPECT_TRUE(c.IsSaturatedByCallCount(0));
-
-  EXPECT_FALSE(c.IsSatisfiedByCallCount(1));
-  EXPECT_TRUE(c.IsSaturatedByCallCount(1));
-
-  stringstream ss;
-  c.DescribeTo(&ss);
-  EXPECT_PRED_FORMAT2(IsSubstring, "never called", ss.str());
-}
-
-TEST(ExactlyTest, OnPositiveNumber) {
-  const Cardinality c = Exactly(2);
-  EXPECT_FALSE(c.IsSatisfiedByCallCount(0));
-  EXPECT_FALSE(c.IsSaturatedByCallCount(0));
-
-  EXPECT_TRUE(c.IsSatisfiedByCallCount(2));
-  EXPECT_TRUE(c.IsSaturatedByCallCount(2));
-
-  stringstream ss1;
-  Exactly(1).DescribeTo(&ss1);
-  EXPECT_PRED_FORMAT2(IsSubstring, "called once", ss1.str());
-
-  stringstream ss2;
-  c.DescribeTo(&ss2);
-  EXPECT_PRED_FORMAT2(IsSubstring, "called twice", ss2.str());
-
-  stringstream ss3;
-  Exactly(3).DescribeTo(&ss3);
-  EXPECT_PRED_FORMAT2(IsSubstring, "called 3 times", ss3.str());
-}
-
-TEST(ExactlyTest, HasCorrectBounds) {
-  const Cardinality c = Exactly(3);
-  EXPECT_EQ(3, c.ConservativeLowerBound());
-  EXPECT_EQ(3, c.ConservativeUpperBound());
-}
-
-// Tests that a user can make their own cardinality by implementing
-// CardinalityInterface and calling MakeCardinality().
-
-class EvenCardinality : public CardinalityInterface {
- public:
-  // Returns true if and only if call_count calls will satisfy this
-  // cardinality.
-  bool IsSatisfiedByCallCount(int call_count) const override {
-    return (call_count % 2 == 0);
-  }
-
-  // Returns true if and only if call_count calls will saturate this
-  // cardinality.
-  bool IsSaturatedByCallCount(int /* call_count */) const override {
-    return false;
-  }
-
-  // Describes self to an ostream.
-  void DescribeTo(::std::ostream* ss) const override {
-    *ss << "called even number of times";
-  }
-};
-
-TEST(MakeCardinalityTest, ConstructsCardinalityFromInterface) {
-  const Cardinality c = MakeCardinality(new EvenCardinality);
-
-  EXPECT_TRUE(c.IsSatisfiedByCallCount(2));
-  EXPECT_FALSE(c.IsSatisfiedByCallCount(3));
-
-  EXPECT_FALSE(c.IsSaturatedByCallCount(10000));
-
-  stringstream ss;
-  c.DescribeTo(&ss);
-  EXPECT_EQ("called even number of times", ss.str());
-}
-
-}  // Unnamed namespace
diff --git a/3rdparty/googletest-1.13.0/googlemock/test/gmock-function-mocker_test.cc b/3rdparty/googletest-1.13.0/googlemock/test/gmock-function-mocker_test.cc
deleted file mode 100644
index 1d15a29931d0c370c1c3c2729471124056b214ac..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googlemock/test/gmock-function-mocker_test.cc
+++ /dev/null
@@ -1,1004 +0,0 @@
-// Copyright 2007, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// Silence C4503 (decorated name length exceeded) for MSVC.
-#ifdef _MSC_VER
-#pragma warning(push)
-#pragma warning(disable : 4503)
-#endif
-
-// Google Mock - a framework for writing C++ mock classes.
-//
-// This file tests the function mocker classes.
-#include "gmock/gmock-function-mocker.h"
-
-#if GTEST_OS_WINDOWS
-// MSDN says the header file to be included for STDMETHOD is BaseTyps.h but
-// we are getting compiler errors if we use basetyps.h, hence including
-// objbase.h for definition of STDMETHOD.
-#include <objbase.h>
-#endif  // GTEST_OS_WINDOWS
-
-#include <functional>
-#include <map>
-#include <string>
-#include <type_traits>
-
-#include "gmock/gmock.h"
-#include "gtest/gtest.h"
-
-namespace testing {
-namespace gmock_function_mocker_test {
-
-using testing::_;
-using testing::A;
-using testing::An;
-using testing::AnyNumber;
-using testing::Const;
-using testing::DoDefault;
-using testing::Eq;
-using testing::Lt;
-using testing::MockFunction;
-using testing::Ref;
-using testing::Return;
-using testing::ReturnRef;
-using testing::TypedEq;
-
-template <typename T>
-class TemplatedCopyable {
- public:
-  TemplatedCopyable() {}
-
-  template <typename U>
-  TemplatedCopyable(const U& other) {}  // NOLINT
-};
-
-class FooInterface {
- public:
-  virtual ~FooInterface() {}
-
-  virtual void VoidReturning(int x) = 0;
-
-  virtual int Nullary() = 0;
-  virtual bool Unary(int x) = 0;
-  virtual long Binary(short x, int y) = 0;                     // NOLINT
-  virtual int Decimal(bool b, char c, short d, int e, long f,  // NOLINT
-                      float g, double h, unsigned i, char* j,
-                      const std::string& k) = 0;
-
-  virtual bool TakesNonConstReference(int& n) = 0;  // NOLINT
-  virtual std::string TakesConstReference(const int& n) = 0;
-  virtual bool TakesConst(const int x) = 0;
-
-  virtual int OverloadedOnArgumentNumber() = 0;
-  virtual int OverloadedOnArgumentNumber(int n) = 0;
-
-  virtual int OverloadedOnArgumentType(int n) = 0;
-  virtual char OverloadedOnArgumentType(char c) = 0;
-
-  virtual int OverloadedOnConstness() = 0;
-  virtual char OverloadedOnConstness() const = 0;
-
-  virtual int TypeWithHole(int (*func)()) = 0;
-  virtual int TypeWithComma(const std::map<int, std::string>& a_map) = 0;
-  virtual int TypeWithTemplatedCopyCtor(const TemplatedCopyable<int>&) = 0;
-
-  virtual int (*ReturnsFunctionPointer1(int))(bool) = 0;
-  using fn_ptr = int (*)(bool);
-  virtual fn_ptr ReturnsFunctionPointer2(int) = 0;
-
-  virtual int RefQualifiedConstRef() const& = 0;
-  virtual int RefQualifiedConstRefRef() const&& = 0;
-  virtual int RefQualifiedRef() & = 0;
-  virtual int RefQualifiedRefRef() && = 0;
-
-  virtual int RefQualifiedOverloaded() const& = 0;
-  virtual int RefQualifiedOverloaded() const&& = 0;
-  virtual int RefQualifiedOverloaded() & = 0;
-  virtual int RefQualifiedOverloaded() && = 0;
-
-#if GTEST_OS_WINDOWS
-  STDMETHOD_(int, CTNullary)() = 0;
-  STDMETHOD_(bool, CTUnary)(int x) = 0;
-  STDMETHOD_(int, CTDecimal)
-  (bool b, char c, short d, int e, long f,  // NOLINT
-   float g, double h, unsigned i, char* j, const std::string& k) = 0;
-  STDMETHOD_(char, CTConst)(int x) const = 0;
-#endif  // GTEST_OS_WINDOWS
-};
-
-// Const qualifiers on arguments were once (incorrectly) considered
-// significant in determining whether two virtual functions had the same
-// signature. This was fixed in Visual Studio 2008. However, the compiler
-// still emits a warning that alerts about this change in behavior.
-#ifdef _MSC_VER
-#pragma warning(push)
-#pragma warning(disable : 4373)
-#endif
-class MockFoo : public FooInterface {
- public:
-  MockFoo() {}
-
-  // Makes sure that a mock function parameter can be named.
-  MOCK_METHOD(void, VoidReturning, (int n));  // NOLINT
-
-  MOCK_METHOD(int, Nullary, ());  // NOLINT
-
-  // Makes sure that a mock function parameter can be unnamed.
-  MOCK_METHOD(bool, Unary, (int));          // NOLINT
-  MOCK_METHOD(long, Binary, (short, int));  // NOLINT
-  MOCK_METHOD(int, Decimal,
-              (bool, char, short, int, long, float,  // NOLINT
-               double, unsigned, char*, const std::string& str),
-              (override));
-
-  MOCK_METHOD(bool, TakesNonConstReference, (int&));  // NOLINT
-  MOCK_METHOD(std::string, TakesConstReference, (const int&));
-  MOCK_METHOD(bool, TakesConst, (const int));  // NOLINT
-
-  // Tests that the function return type can contain unprotected comma.
-  MOCK_METHOD((std::map<int, std::string>), ReturnTypeWithComma, (), ());
-  MOCK_METHOD((std::map<int, std::string>), ReturnTypeWithComma, (int),
-              (const));  // NOLINT
-
-  MOCK_METHOD(int, OverloadedOnArgumentNumber, ());     // NOLINT
-  MOCK_METHOD(int, OverloadedOnArgumentNumber, (int));  // NOLINT
-
-  MOCK_METHOD(int, OverloadedOnArgumentType, (int));    // NOLINT
-  MOCK_METHOD(char, OverloadedOnArgumentType, (char));  // NOLINT
-
-  MOCK_METHOD(int, OverloadedOnConstness, (), (override));          // NOLINT
-  MOCK_METHOD(char, OverloadedOnConstness, (), (override, const));  // NOLINT
-
-  MOCK_METHOD(int, TypeWithHole, (int (*)()), ());  // NOLINT
-  MOCK_METHOD(int, TypeWithComma, ((const std::map<int, std::string>&)));
-  MOCK_METHOD(int, TypeWithTemplatedCopyCtor,
-              (const TemplatedCopyable<int>&));  // NOLINT
-
-  MOCK_METHOD(int (*)(bool), ReturnsFunctionPointer1, (int), ());
-  MOCK_METHOD(fn_ptr, ReturnsFunctionPointer2, (int), ());
-
-#if GTEST_OS_WINDOWS
-  MOCK_METHOD(int, CTNullary, (), (Calltype(STDMETHODCALLTYPE)));
-  MOCK_METHOD(bool, CTUnary, (int), (Calltype(STDMETHODCALLTYPE)));
-  MOCK_METHOD(int, CTDecimal,
-              (bool b, char c, short d, int e, long f, float g, double h,
-               unsigned i, char* j, const std::string& k),
-              (Calltype(STDMETHODCALLTYPE)));
-  MOCK_METHOD(char, CTConst, (int), (const, Calltype(STDMETHODCALLTYPE)));
-  MOCK_METHOD((std::map<int, std::string>), CTReturnTypeWithComma, (),
-              (Calltype(STDMETHODCALLTYPE)));
-#endif  // GTEST_OS_WINDOWS
-
-  // Test reference qualified functions.
-  MOCK_METHOD(int, RefQualifiedConstRef, (), (const, ref(&), override));
-  MOCK_METHOD(int, RefQualifiedConstRefRef, (), (const, ref(&&), override));
-  MOCK_METHOD(int, RefQualifiedRef, (), (ref(&), override));
-  MOCK_METHOD(int, RefQualifiedRefRef, (), (ref(&&), override));
-
-  MOCK_METHOD(int, RefQualifiedOverloaded, (), (const, ref(&), override));
-  MOCK_METHOD(int, RefQualifiedOverloaded, (), (const, ref(&&), override));
-  MOCK_METHOD(int, RefQualifiedOverloaded, (), (ref(&), override));
-  MOCK_METHOD(int, RefQualifiedOverloaded, (), (ref(&&), override));
-
- private:
-  MockFoo(const MockFoo&) = delete;
-  MockFoo& operator=(const MockFoo&) = delete;
-};
-
-class LegacyMockFoo : public FooInterface {
- public:
-  LegacyMockFoo() {}
-
-  // Makes sure that a mock function parameter can be named.
-  MOCK_METHOD1(VoidReturning, void(int n));  // NOLINT
-
-  MOCK_METHOD0(Nullary, int());  // NOLINT
-
-  // Makes sure that a mock function parameter can be unnamed.
-  MOCK_METHOD1(Unary, bool(int));                                  // NOLINT
-  MOCK_METHOD2(Binary, long(short, int));                          // NOLINT
-  MOCK_METHOD10(Decimal, int(bool, char, short, int, long, float,  // NOLINT
-                             double, unsigned, char*, const std::string& str));
-
-  MOCK_METHOD1(TakesNonConstReference, bool(int&));  // NOLINT
-  MOCK_METHOD1(TakesConstReference, std::string(const int&));
-  MOCK_METHOD1(TakesConst, bool(const int));  // NOLINT
-
-  // Tests that the function return type can contain unprotected comma.
-  MOCK_METHOD0(ReturnTypeWithComma, std::map<int, std::string>());
-  MOCK_CONST_METHOD1(ReturnTypeWithComma,
-                     std::map<int, std::string>(int));  // NOLINT
-
-  MOCK_METHOD0(OverloadedOnArgumentNumber, int());     // NOLINT
-  MOCK_METHOD1(OverloadedOnArgumentNumber, int(int));  // NOLINT
-
-  MOCK_METHOD1(OverloadedOnArgumentType, int(int));    // NOLINT
-  MOCK_METHOD1(OverloadedOnArgumentType, char(char));  // NOLINT
-
-  MOCK_METHOD0(OverloadedOnConstness, int());         // NOLINT
-  MOCK_CONST_METHOD0(OverloadedOnConstness, char());  // NOLINT
-
-  MOCK_METHOD1(TypeWithHole, int(int (*)()));  // NOLINT
-  MOCK_METHOD1(TypeWithComma,
-               int(const std::map<int, std::string>&));  // NOLINT
-  MOCK_METHOD1(TypeWithTemplatedCopyCtor,
-               int(const TemplatedCopyable<int>&));  // NOLINT
-
-  MOCK_METHOD1(ReturnsFunctionPointer1, int (*(int))(bool));
-  MOCK_METHOD1(ReturnsFunctionPointer2, fn_ptr(int));
-
-#if GTEST_OS_WINDOWS
-  MOCK_METHOD0_WITH_CALLTYPE(STDMETHODCALLTYPE, CTNullary, int());
-  MOCK_METHOD1_WITH_CALLTYPE(STDMETHODCALLTYPE, CTUnary, bool(int));  // NOLINT
-  MOCK_METHOD10_WITH_CALLTYPE(STDMETHODCALLTYPE, CTDecimal,
-                              int(bool b, char c, short d, int e,  // NOLINT
-                                  long f, float g, double h,       // NOLINT
-                                  unsigned i, char* j, const std::string& k));
-  MOCK_CONST_METHOD1_WITH_CALLTYPE(STDMETHODCALLTYPE, CTConst,
-                                   char(int));  // NOLINT
-
-  // Tests that the function return type can contain unprotected comma.
-  MOCK_METHOD0_WITH_CALLTYPE(STDMETHODCALLTYPE, CTReturnTypeWithComma,
-                             std::map<int, std::string>());
-#endif  // GTEST_OS_WINDOWS
-
-  // We can't mock these with the old macros, but we need to define them to make
-  // it concrete.
-  int RefQualifiedConstRef() const& override { return 0; }
-  int RefQualifiedConstRefRef() const&& override { return 0; }
-  int RefQualifiedRef() & override { return 0; }
-  int RefQualifiedRefRef() && override { return 0; }
-  int RefQualifiedOverloaded() const& override { return 0; }
-  int RefQualifiedOverloaded() const&& override { return 0; }
-  int RefQualifiedOverloaded() & override { return 0; }
-  int RefQualifiedOverloaded() && override { return 0; }
-
- private:
-  LegacyMockFoo(const LegacyMockFoo&) = delete;
-  LegacyMockFoo& operator=(const LegacyMockFoo&) = delete;
-};
-
-#ifdef _MSC_VER
-#pragma warning(pop)
-#endif
-
-template <class T>
-class FunctionMockerTest : public testing::Test {
- protected:
-  FunctionMockerTest() : foo_(&mock_foo_) {}
-
-  FooInterface* const foo_;
-  T mock_foo_;
-};
-using FunctionMockerTestTypes = ::testing::Types<MockFoo, LegacyMockFoo>;
-TYPED_TEST_SUITE(FunctionMockerTest, FunctionMockerTestTypes);
-
-// Tests mocking a void-returning function.
-TYPED_TEST(FunctionMockerTest, MocksVoidFunction) {
-  EXPECT_CALL(this->mock_foo_, VoidReturning(Lt(100)));
-  this->foo_->VoidReturning(0);
-}
-
-// Tests mocking a nullary function.
-TYPED_TEST(FunctionMockerTest, MocksNullaryFunction) {
-  EXPECT_CALL(this->mock_foo_, Nullary())
-      .WillOnce(DoDefault())
-      .WillOnce(Return(1));
-
-  EXPECT_EQ(0, this->foo_->Nullary());
-  EXPECT_EQ(1, this->foo_->Nullary());
-}
-
-// Tests mocking a unary function.
-TYPED_TEST(FunctionMockerTest, MocksUnaryFunction) {
-  EXPECT_CALL(this->mock_foo_, Unary(Eq(2))).Times(2).WillOnce(Return(true));
-
-  EXPECT_TRUE(this->foo_->Unary(2));
-  EXPECT_FALSE(this->foo_->Unary(2));
-}
-
-// Tests mocking a binary function.
-TYPED_TEST(FunctionMockerTest, MocksBinaryFunction) {
-  EXPECT_CALL(this->mock_foo_, Binary(2, _)).WillOnce(Return(3));
-
-  EXPECT_EQ(3, this->foo_->Binary(2, 1));
-}
-
-// Tests mocking a decimal function.
-TYPED_TEST(FunctionMockerTest, MocksDecimalFunction) {
-  EXPECT_CALL(this->mock_foo_,
-              Decimal(true, 'a', 0, 0, 1L, A<float>(), Lt(100), 5U, NULL, "hi"))
-      .WillOnce(Return(5));
-
-  EXPECT_EQ(5, this->foo_->Decimal(true, 'a', 0, 0, 1, 0, 0, 5, nullptr, "hi"));
-}
-
-// Tests mocking a function that takes a non-const reference.
-TYPED_TEST(FunctionMockerTest, MocksFunctionWithNonConstReferenceArgument) {
-  int a = 0;
-  EXPECT_CALL(this->mock_foo_, TakesNonConstReference(Ref(a)))
-      .WillOnce(Return(true));
-
-  EXPECT_TRUE(this->foo_->TakesNonConstReference(a));
-}
-
-// Tests mocking a function that takes a const reference.
-TYPED_TEST(FunctionMockerTest, MocksFunctionWithConstReferenceArgument) {
-  int a = 0;
-  EXPECT_CALL(this->mock_foo_, TakesConstReference(Ref(a)))
-      .WillOnce(Return("Hello"));
-
-  EXPECT_EQ("Hello", this->foo_->TakesConstReference(a));
-}
-
-// Tests mocking a function that takes a const variable.
-TYPED_TEST(FunctionMockerTest, MocksFunctionWithConstArgument) {
-  EXPECT_CALL(this->mock_foo_, TakesConst(Lt(10))).WillOnce(DoDefault());
-
-  EXPECT_FALSE(this->foo_->TakesConst(5));
-}
-
-// Tests mocking functions overloaded on the number of arguments.
-TYPED_TEST(FunctionMockerTest, MocksFunctionsOverloadedOnArgumentNumber) {
-  EXPECT_CALL(this->mock_foo_, OverloadedOnArgumentNumber())
-      .WillOnce(Return(1));
-  EXPECT_CALL(this->mock_foo_, OverloadedOnArgumentNumber(_))
-      .WillOnce(Return(2));
-
-  EXPECT_EQ(2, this->foo_->OverloadedOnArgumentNumber(1));
-  EXPECT_EQ(1, this->foo_->OverloadedOnArgumentNumber());
-}
-
-// Tests mocking functions overloaded on the types of argument.
-TYPED_TEST(FunctionMockerTest, MocksFunctionsOverloadedOnArgumentType) {
-  EXPECT_CALL(this->mock_foo_, OverloadedOnArgumentType(An<int>()))
-      .WillOnce(Return(1));
-  EXPECT_CALL(this->mock_foo_, OverloadedOnArgumentType(TypedEq<char>('a')))
-      .WillOnce(Return('b'));
-
-  EXPECT_EQ(1, this->foo_->OverloadedOnArgumentType(0));
-  EXPECT_EQ('b', this->foo_->OverloadedOnArgumentType('a'));
-}
-
-// Tests mocking functions overloaded on the const-ness of this object.
-TYPED_TEST(FunctionMockerTest, MocksFunctionsOverloadedOnConstnessOfThis) {
-  EXPECT_CALL(this->mock_foo_, OverloadedOnConstness());
-  EXPECT_CALL(Const(this->mock_foo_), OverloadedOnConstness())
-      .WillOnce(Return('a'));
-
-  EXPECT_EQ(0, this->foo_->OverloadedOnConstness());
-  EXPECT_EQ('a', Const(*this->foo_).OverloadedOnConstness());
-}
-
-TYPED_TEST(FunctionMockerTest, MocksReturnTypeWithComma) {
-  const std::map<int, std::string> a_map;
-  EXPECT_CALL(this->mock_foo_, ReturnTypeWithComma()).WillOnce(Return(a_map));
-  EXPECT_CALL(this->mock_foo_, ReturnTypeWithComma(42)).WillOnce(Return(a_map));
-
-  EXPECT_EQ(a_map, this->mock_foo_.ReturnTypeWithComma());
-  EXPECT_EQ(a_map, this->mock_foo_.ReturnTypeWithComma(42));
-}
-
-TYPED_TEST(FunctionMockerTest, MocksTypeWithTemplatedCopyCtor) {
-  EXPECT_CALL(this->mock_foo_, TypeWithTemplatedCopyCtor(_))
-      .WillOnce(Return(true));
-  EXPECT_TRUE(this->foo_->TypeWithTemplatedCopyCtor(TemplatedCopyable<int>()));
-}
-
-#if GTEST_OS_WINDOWS
-// Tests mocking a nullary function with calltype.
-TYPED_TEST(FunctionMockerTest, MocksNullaryFunctionWithCallType) {
-  EXPECT_CALL(this->mock_foo_, CTNullary())
-      .WillOnce(Return(-1))
-      .WillOnce(Return(0));
-
-  EXPECT_EQ(-1, this->foo_->CTNullary());
-  EXPECT_EQ(0, this->foo_->CTNullary());
-}
-
-// Tests mocking a unary function with calltype.
-TYPED_TEST(FunctionMockerTest, MocksUnaryFunctionWithCallType) {
-  EXPECT_CALL(this->mock_foo_, CTUnary(Eq(2)))
-      .Times(2)
-      .WillOnce(Return(true))
-      .WillOnce(Return(false));
-
-  EXPECT_TRUE(this->foo_->CTUnary(2));
-  EXPECT_FALSE(this->foo_->CTUnary(2));
-}
-
-// Tests mocking a decimal function with calltype.
-TYPED_TEST(FunctionMockerTest, MocksDecimalFunctionWithCallType) {
-  EXPECT_CALL(this->mock_foo_, CTDecimal(true, 'a', 0, 0, 1L, A<float>(),
-                                         Lt(100), 5U, NULL, "hi"))
-      .WillOnce(Return(10));
-
-  EXPECT_EQ(10, this->foo_->CTDecimal(true, 'a', 0, 0, 1, 0, 0, 5, NULL, "hi"));
-}
-
-// Tests mocking functions overloaded on the const-ness of this object.
-TYPED_TEST(FunctionMockerTest, MocksFunctionsConstFunctionWithCallType) {
-  EXPECT_CALL(Const(this->mock_foo_), CTConst(_)).WillOnce(Return('a'));
-
-  EXPECT_EQ('a', Const(*this->foo_).CTConst(0));
-}
-
-TYPED_TEST(FunctionMockerTest, MocksReturnTypeWithCommaAndCallType) {
-  const std::map<int, std::string> a_map;
-  EXPECT_CALL(this->mock_foo_, CTReturnTypeWithComma()).WillOnce(Return(a_map));
-
-  EXPECT_EQ(a_map, this->mock_foo_.CTReturnTypeWithComma());
-}
-
-#endif  // GTEST_OS_WINDOWS
-
-TEST(FunctionMockerTest, RefQualified) {
-  MockFoo mock_foo;
-
-  EXPECT_CALL(mock_foo, RefQualifiedConstRef).WillOnce(Return(1));
-  EXPECT_CALL(std::move(mock_foo),  // NOLINT
-              RefQualifiedConstRefRef)
-      .WillOnce(Return(2));
-  EXPECT_CALL(mock_foo, RefQualifiedRef).WillOnce(Return(3));
-  EXPECT_CALL(std::move(mock_foo),  // NOLINT
-              RefQualifiedRefRef)
-      .WillOnce(Return(4));
-
-  EXPECT_CALL(static_cast<const MockFoo&>(mock_foo), RefQualifiedOverloaded())
-      .WillOnce(Return(5));
-  EXPECT_CALL(static_cast<const MockFoo&&>(mock_foo), RefQualifiedOverloaded())
-      .WillOnce(Return(6));
-  EXPECT_CALL(static_cast<MockFoo&>(mock_foo), RefQualifiedOverloaded())
-      .WillOnce(Return(7));
-  EXPECT_CALL(static_cast<MockFoo&&>(mock_foo), RefQualifiedOverloaded())
-      .WillOnce(Return(8));
-
-  EXPECT_EQ(mock_foo.RefQualifiedConstRef(), 1);
-  EXPECT_EQ(std::move(mock_foo).RefQualifiedConstRefRef(), 2);  // NOLINT
-  EXPECT_EQ(mock_foo.RefQualifiedRef(), 3);
-  EXPECT_EQ(std::move(mock_foo).RefQualifiedRefRef(), 4);  // NOLINT
-
-  EXPECT_EQ(std::cref(mock_foo).get().RefQualifiedOverloaded(), 5);
-  EXPECT_EQ(std::move(std::cref(mock_foo).get())  // NOLINT
-                .RefQualifiedOverloaded(),
-            6);
-  EXPECT_EQ(mock_foo.RefQualifiedOverloaded(), 7);
-  EXPECT_EQ(std::move(mock_foo).RefQualifiedOverloaded(), 8);  // NOLINT
-}
-
-class MockB {
- public:
-  MockB() {}
-
-  MOCK_METHOD(void, DoB, ());
-
- private:
-  MockB(const MockB&) = delete;
-  MockB& operator=(const MockB&) = delete;
-};
-
-class LegacyMockB {
- public:
-  LegacyMockB() {}
-
-  MOCK_METHOD0(DoB, void());
-
- private:
-  LegacyMockB(const LegacyMockB&) = delete;
-  LegacyMockB& operator=(const LegacyMockB&) = delete;
-};
-
-template <typename T>
-class ExpectCallTest : public ::testing::Test {};
-using ExpectCallTestTypes = ::testing::Types<MockB, LegacyMockB>;
-TYPED_TEST_SUITE(ExpectCallTest, ExpectCallTestTypes);
-
-// Tests that functions with no EXPECT_CALL() rules can be called any
-// number of times.
-TYPED_TEST(ExpectCallTest, UnmentionedFunctionCanBeCalledAnyNumberOfTimes) {
-  { TypeParam b; }
-
-  {
-    TypeParam b;
-    b.DoB();
-  }
-
-  {
-    TypeParam b;
-    b.DoB();
-    b.DoB();
-  }
-}
-
-// Tests mocking template interfaces.
-
-template <typename T>
-class StackInterface {
- public:
-  virtual ~StackInterface() {}
-
-  // Template parameter appears in function parameter.
-  virtual void Push(const T& value) = 0;
-  virtual void Pop() = 0;
-  virtual int GetSize() const = 0;
-  // Template parameter appears in function return type.
-  virtual const T& GetTop() const = 0;
-};
-
-template <typename T>
-class MockStack : public StackInterface<T> {
- public:
-  MockStack() {}
-
-  MOCK_METHOD(void, Push, (const T& elem), ());
-  MOCK_METHOD(void, Pop, (), (final));
-  MOCK_METHOD(int, GetSize, (), (const, override));
-  MOCK_METHOD(const T&, GetTop, (), (const));
-
-  // Tests that the function return type can contain unprotected comma.
-  MOCK_METHOD((std::map<int, int>), ReturnTypeWithComma, (), ());
-  MOCK_METHOD((std::map<int, int>), ReturnTypeWithComma, (int), (const));
-
- private:
-  MockStack(const MockStack&) = delete;
-  MockStack& operator=(const MockStack&) = delete;
-};
-
-template <typename T>
-class LegacyMockStack : public StackInterface<T> {
- public:
-  LegacyMockStack() {}
-
-  MOCK_METHOD1_T(Push, void(const T& elem));
-  MOCK_METHOD0_T(Pop, void());
-  MOCK_CONST_METHOD0_T(GetSize, int());  // NOLINT
-  MOCK_CONST_METHOD0_T(GetTop, const T&());
-
-  // Tests that the function return type can contain unprotected comma.
-  MOCK_METHOD0_T(ReturnTypeWithComma, std::map<int, int>());
-  MOCK_CONST_METHOD1_T(ReturnTypeWithComma, std::map<int, int>(int));  // NOLINT
-
- private:
-  LegacyMockStack(const LegacyMockStack&) = delete;
-  LegacyMockStack& operator=(const LegacyMockStack&) = delete;
-};
-
-template <typename T>
-class TemplateMockTest : public ::testing::Test {};
-using TemplateMockTestTypes =
-    ::testing::Types<MockStack<int>, LegacyMockStack<int>>;
-TYPED_TEST_SUITE(TemplateMockTest, TemplateMockTestTypes);
-
-// Tests that template mock works.
-TYPED_TEST(TemplateMockTest, Works) {
-  TypeParam mock;
-
-  EXPECT_CALL(mock, GetSize())
-      .WillOnce(Return(0))
-      .WillOnce(Return(1))
-      .WillOnce(Return(0));
-  EXPECT_CALL(mock, Push(_));
-  int n = 5;
-  EXPECT_CALL(mock, GetTop()).WillOnce(ReturnRef(n));
-  EXPECT_CALL(mock, Pop()).Times(AnyNumber());
-
-  EXPECT_EQ(0, mock.GetSize());
-  mock.Push(5);
-  EXPECT_EQ(1, mock.GetSize());
-  EXPECT_EQ(5, mock.GetTop());
-  mock.Pop();
-  EXPECT_EQ(0, mock.GetSize());
-}
-
-TYPED_TEST(TemplateMockTest, MethodWithCommaInReturnTypeWorks) {
-  TypeParam mock;
-
-  const std::map<int, int> a_map;
-  EXPECT_CALL(mock, ReturnTypeWithComma()).WillOnce(Return(a_map));
-  EXPECT_CALL(mock, ReturnTypeWithComma(1)).WillOnce(Return(a_map));
-
-  EXPECT_EQ(a_map, mock.ReturnTypeWithComma());
-  EXPECT_EQ(a_map, mock.ReturnTypeWithComma(1));
-}
-
-#if GTEST_OS_WINDOWS
-// Tests mocking template interfaces with calltype.
-
-template <typename T>
-class StackInterfaceWithCallType {
- public:
-  virtual ~StackInterfaceWithCallType() {}
-
-  // Template parameter appears in function parameter.
-  STDMETHOD_(void, Push)(const T& value) = 0;
-  STDMETHOD_(void, Pop)() = 0;
-  STDMETHOD_(int, GetSize)() const = 0;
-  // Template parameter appears in function return type.
-  STDMETHOD_(const T&, GetTop)() const = 0;
-};
-
-template <typename T>
-class MockStackWithCallType : public StackInterfaceWithCallType<T> {
- public:
-  MockStackWithCallType() {}
-
-  MOCK_METHOD(void, Push, (const T& elem),
-              (Calltype(STDMETHODCALLTYPE), override));
-  MOCK_METHOD(void, Pop, (), (Calltype(STDMETHODCALLTYPE), override));
-  MOCK_METHOD(int, GetSize, (), (Calltype(STDMETHODCALLTYPE), override, const));
-  MOCK_METHOD(const T&, GetTop, (),
-              (Calltype(STDMETHODCALLTYPE), override, const));
-
- private:
-  MockStackWithCallType(const MockStackWithCallType&) = delete;
-  MockStackWithCallType& operator=(const MockStackWithCallType&) = delete;
-};
-
-template <typename T>
-class LegacyMockStackWithCallType : public StackInterfaceWithCallType<T> {
- public:
-  LegacyMockStackWithCallType() {}
-
-  MOCK_METHOD1_T_WITH_CALLTYPE(STDMETHODCALLTYPE, Push, void(const T& elem));
-  MOCK_METHOD0_T_WITH_CALLTYPE(STDMETHODCALLTYPE, Pop, void());
-  MOCK_CONST_METHOD0_T_WITH_CALLTYPE(STDMETHODCALLTYPE, GetSize, int());
-  MOCK_CONST_METHOD0_T_WITH_CALLTYPE(STDMETHODCALLTYPE, GetTop, const T&());
-
- private:
-  LegacyMockStackWithCallType(const LegacyMockStackWithCallType&) = delete;
-  LegacyMockStackWithCallType& operator=(const LegacyMockStackWithCallType&) =
-      delete;
-};
-
-template <typename T>
-class TemplateMockTestWithCallType : public ::testing::Test {};
-using TemplateMockTestWithCallTypeTypes =
-    ::testing::Types<MockStackWithCallType<int>,
-                     LegacyMockStackWithCallType<int>>;
-TYPED_TEST_SUITE(TemplateMockTestWithCallType,
-                 TemplateMockTestWithCallTypeTypes);
-
-// Tests that template mock with calltype works.
-TYPED_TEST(TemplateMockTestWithCallType, Works) {
-  TypeParam mock;
-
-  EXPECT_CALL(mock, GetSize())
-      .WillOnce(Return(0))
-      .WillOnce(Return(1))
-      .WillOnce(Return(0));
-  EXPECT_CALL(mock, Push(_));
-  int n = 5;
-  EXPECT_CALL(mock, GetTop()).WillOnce(ReturnRef(n));
-  EXPECT_CALL(mock, Pop()).Times(AnyNumber());
-
-  EXPECT_EQ(0, mock.GetSize());
-  mock.Push(5);
-  EXPECT_EQ(1, mock.GetSize());
-  EXPECT_EQ(5, mock.GetTop());
-  mock.Pop();
-  EXPECT_EQ(0, mock.GetSize());
-}
-#endif  // GTEST_OS_WINDOWS
-
-#define MY_MOCK_METHODS1_                       \
-  MOCK_METHOD(void, Overloaded, ());            \
-  MOCK_METHOD(int, Overloaded, (int), (const)); \
-  MOCK_METHOD(bool, Overloaded, (bool f, int n))
-
-#define LEGACY_MY_MOCK_METHODS1_              \
-  MOCK_METHOD0(Overloaded, void());           \
-  MOCK_CONST_METHOD1(Overloaded, int(int n)); \
-  MOCK_METHOD2(Overloaded, bool(bool f, int n))
-
-class MockOverloadedOnArgNumber {
- public:
-  MockOverloadedOnArgNumber() {}
-
-  MY_MOCK_METHODS1_;
-
- private:
-  MockOverloadedOnArgNumber(const MockOverloadedOnArgNumber&) = delete;
-  MockOverloadedOnArgNumber& operator=(const MockOverloadedOnArgNumber&) =
-      delete;
-};
-
-class LegacyMockOverloadedOnArgNumber {
- public:
-  LegacyMockOverloadedOnArgNumber() {}
-
-  LEGACY_MY_MOCK_METHODS1_;
-
- private:
-  LegacyMockOverloadedOnArgNumber(const LegacyMockOverloadedOnArgNumber&) =
-      delete;
-  LegacyMockOverloadedOnArgNumber& operator=(
-      const LegacyMockOverloadedOnArgNumber&) = delete;
-};
-
-template <typename T>
-class OverloadedMockMethodTest : public ::testing::Test {};
-using OverloadedMockMethodTestTypes =
-    ::testing::Types<MockOverloadedOnArgNumber,
-                     LegacyMockOverloadedOnArgNumber>;
-TYPED_TEST_SUITE(OverloadedMockMethodTest, OverloadedMockMethodTestTypes);
-
-TYPED_TEST(OverloadedMockMethodTest, CanOverloadOnArgNumberInMacroBody) {
-  TypeParam mock;
-  EXPECT_CALL(mock, Overloaded());
-  EXPECT_CALL(mock, Overloaded(1)).WillOnce(Return(2));
-  EXPECT_CALL(mock, Overloaded(true, 1)).WillOnce(Return(true));
-
-  mock.Overloaded();
-  EXPECT_EQ(2, mock.Overloaded(1));
-  EXPECT_TRUE(mock.Overloaded(true, 1));
-}
-
-#define MY_MOCK_METHODS2_                     \
-  MOCK_CONST_METHOD1(Overloaded, int(int n)); \
-  MOCK_METHOD1(Overloaded, int(int n))
-
-class MockOverloadedOnConstness {
- public:
-  MockOverloadedOnConstness() {}
-
-  MY_MOCK_METHODS2_;
-
- private:
-  MockOverloadedOnConstness(const MockOverloadedOnConstness&) = delete;
-  MockOverloadedOnConstness& operator=(const MockOverloadedOnConstness&) =
-      delete;
-};
-
-TEST(MockMethodOverloadedMockMethodTest, CanOverloadOnConstnessInMacroBody) {
-  MockOverloadedOnConstness mock;
-  const MockOverloadedOnConstness* const_mock = &mock;
-  EXPECT_CALL(mock, Overloaded(1)).WillOnce(Return(2));
-  EXPECT_CALL(*const_mock, Overloaded(1)).WillOnce(Return(3));
-
-  EXPECT_EQ(2, mock.Overloaded(1));
-  EXPECT_EQ(3, const_mock->Overloaded(1));
-}
-
-TEST(MockMethodMockFunctionTest, WorksForVoidNullary) {
-  MockFunction<void()> foo;
-  EXPECT_CALL(foo, Call());
-  foo.Call();
-}
-
-TEST(MockMethodMockFunctionTest, WorksForNonVoidNullary) {
-  MockFunction<int()> foo;
-  EXPECT_CALL(foo, Call()).WillOnce(Return(1)).WillOnce(Return(2));
-  EXPECT_EQ(1, foo.Call());
-  EXPECT_EQ(2, foo.Call());
-}
-
-TEST(MockMethodMockFunctionTest, WorksForVoidUnary) {
-  MockFunction<void(int)> foo;
-  EXPECT_CALL(foo, Call(1));
-  foo.Call(1);
-}
-
-TEST(MockMethodMockFunctionTest, WorksForNonVoidBinary) {
-  MockFunction<int(bool, int)> foo;
-  EXPECT_CALL(foo, Call(false, 42)).WillOnce(Return(1)).WillOnce(Return(2));
-  EXPECT_CALL(foo, Call(true, Ge(100))).WillOnce(Return(3));
-  EXPECT_EQ(1, foo.Call(false, 42));
-  EXPECT_EQ(2, foo.Call(false, 42));
-  EXPECT_EQ(3, foo.Call(true, 120));
-}
-
-TEST(MockMethodMockFunctionTest, WorksFor10Arguments) {
-  MockFunction<int(bool a0, char a1, int a2, int a3, int a4, int a5, int a6,
-                   char a7, int a8, bool a9)>
-      foo;
-  EXPECT_CALL(foo, Call(_, 'a', _, _, _, _, _, _, _, _))
-      .WillOnce(Return(1))
-      .WillOnce(Return(2));
-  EXPECT_EQ(1, foo.Call(false, 'a', 0, 0, 0, 0, 0, 'b', 0, true));
-  EXPECT_EQ(2, foo.Call(true, 'a', 0, 0, 0, 0, 0, 'b', 1, false));
-}
-
-TEST(MockMethodMockFunctionTest, AsStdFunction) {
-  MockFunction<int(int)> foo;
-  auto call = [](const std::function<int(int)>& f, int i) { return f(i); };
-  EXPECT_CALL(foo, Call(1)).WillOnce(Return(-1));
-  EXPECT_CALL(foo, Call(2)).WillOnce(Return(-2));
-  EXPECT_EQ(-1, call(foo.AsStdFunction(), 1));
-  EXPECT_EQ(-2, call(foo.AsStdFunction(), 2));
-}
-
-TEST(MockMethodMockFunctionTest, AsStdFunctionReturnsReference) {
-  MockFunction<int&()> foo;
-  int value = 1;
-  EXPECT_CALL(foo, Call()).WillOnce(ReturnRef(value));
-  int& ref = foo.AsStdFunction()();
-  EXPECT_EQ(1, ref);
-  value = 2;
-  EXPECT_EQ(2, ref);
-}
-
-TEST(MockMethodMockFunctionTest, AsStdFunctionWithReferenceParameter) {
-  MockFunction<int(int&)> foo;
-  auto call = [](const std::function<int(int&)>& f, int& i) { return f(i); };
-  int i = 42;
-  EXPECT_CALL(foo, Call(i)).WillOnce(Return(-1));
-  EXPECT_EQ(-1, call(foo.AsStdFunction(), i));
-}
-
-namespace {
-
-template <typename Expected, typename F>
-static constexpr bool IsMockFunctionTemplateArgumentDeducedTo(
-    const internal::MockFunction<F>&) {
-  return std::is_same<F, Expected>::value;
-}
-
-}  // namespace
-
-template <typename F>
-class MockMethodMockFunctionSignatureTest : public Test {};
-
-using MockMethodMockFunctionSignatureTypes =
-    Types<void(), int(), void(int), int(int), int(bool, int),
-          int(bool, char, int, int, int, int, int, char, int, bool)>;
-TYPED_TEST_SUITE(MockMethodMockFunctionSignatureTest,
-                 MockMethodMockFunctionSignatureTypes);
-
-TYPED_TEST(MockMethodMockFunctionSignatureTest,
-           IsMockFunctionTemplateArgumentDeducedForRawSignature) {
-  using Argument = TypeParam;
-  MockFunction<Argument> foo;
-  EXPECT_TRUE(IsMockFunctionTemplateArgumentDeducedTo<TypeParam>(foo));
-}
-
-TYPED_TEST(MockMethodMockFunctionSignatureTest,
-           IsMockFunctionTemplateArgumentDeducedForStdFunction) {
-  using Argument = std::function<TypeParam>;
-  MockFunction<Argument> foo;
-  EXPECT_TRUE(IsMockFunctionTemplateArgumentDeducedTo<TypeParam>(foo));
-}
-
-TYPED_TEST(
-    MockMethodMockFunctionSignatureTest,
-    IsMockFunctionCallMethodSignatureTheSameForRawSignatureAndStdFunction) {
-  using ForRawSignature = decltype(&MockFunction<TypeParam>::Call);
-  using ForStdFunction =
-      decltype(&MockFunction<std::function<TypeParam>>::Call);
-  EXPECT_TRUE((std::is_same<ForRawSignature, ForStdFunction>::value));
-}
-
-template <typename F>
-struct AlternateCallable {};
-
-TYPED_TEST(MockMethodMockFunctionSignatureTest,
-           IsMockFunctionTemplateArgumentDeducedForAlternateCallable) {
-  using Argument = AlternateCallable<TypeParam>;
-  MockFunction<Argument> foo;
-  EXPECT_TRUE(IsMockFunctionTemplateArgumentDeducedTo<TypeParam>(foo));
-}
-
-TYPED_TEST(MockMethodMockFunctionSignatureTest,
-           IsMockFunctionCallMethodSignatureTheSameForAlternateCallable) {
-  using ForRawSignature = decltype(&MockFunction<TypeParam>::Call);
-  using ForStdFunction =
-      decltype(&MockFunction<std::function<TypeParam>>::Call);
-  EXPECT_TRUE((std::is_same<ForRawSignature, ForStdFunction>::value));
-}
-
-struct MockMethodSizes0 {
-  MOCK_METHOD(void, func, ());
-};
-struct MockMethodSizes1 {
-  MOCK_METHOD(void, func, (int));
-};
-struct MockMethodSizes2 {
-  MOCK_METHOD(void, func, (int, int));
-};
-struct MockMethodSizes3 {
-  MOCK_METHOD(void, func, (int, int, int));
-};
-struct MockMethodSizes4 {
-  MOCK_METHOD(void, func, (int, int, int, int));
-};
-
-struct LegacyMockMethodSizes0 {
-  MOCK_METHOD0(func, void());
-};
-struct LegacyMockMethodSizes1 {
-  MOCK_METHOD1(func, void(int));
-};
-struct LegacyMockMethodSizes2 {
-  MOCK_METHOD2(func, void(int, int));
-};
-struct LegacyMockMethodSizes3 {
-  MOCK_METHOD3(func, void(int, int, int));
-};
-struct LegacyMockMethodSizes4 {
-  MOCK_METHOD4(func, void(int, int, int, int));
-};
-
-TEST(MockMethodMockFunctionTest, MockMethodSizeOverhead) {
-  EXPECT_EQ(sizeof(MockMethodSizes0), sizeof(MockMethodSizes1));
-  EXPECT_EQ(sizeof(MockMethodSizes0), sizeof(MockMethodSizes2));
-  EXPECT_EQ(sizeof(MockMethodSizes0), sizeof(MockMethodSizes3));
-  EXPECT_EQ(sizeof(MockMethodSizes0), sizeof(MockMethodSizes4));
-
-  EXPECT_EQ(sizeof(LegacyMockMethodSizes0), sizeof(LegacyMockMethodSizes1));
-  EXPECT_EQ(sizeof(LegacyMockMethodSizes0), sizeof(LegacyMockMethodSizes2));
-  EXPECT_EQ(sizeof(LegacyMockMethodSizes0), sizeof(LegacyMockMethodSizes3));
-  EXPECT_EQ(sizeof(LegacyMockMethodSizes0), sizeof(LegacyMockMethodSizes4));
-
-  EXPECT_EQ(sizeof(LegacyMockMethodSizes0), sizeof(MockMethodSizes0));
-}
-
-TEST(MockMethodMockFunctionTest, EnsureNoUnusedMemberFunction) {
-#ifdef __clang__
-#pragma clang diagnostic push
-#pragma clang diagnostic error "-Wunused-member-function"
-#endif
-  // https://github.com/google/googletest/issues/4052
-  struct Foo {
-    MOCK_METHOD(void, foo, ());
-  };
-  EXPECT_CALL(Foo(), foo()).Times(0);
-#ifdef __clang__
-#pragma clang diagnostic pop
-#endif
-}
-
-void hasTwoParams(int, int);
-void MaybeThrows();
-void DoesntThrow() noexcept;
-struct MockMethodNoexceptSpecifier {
-  MOCK_METHOD(void, func1, (), (noexcept));
-  MOCK_METHOD(void, func2, (), (noexcept(true)));
-  MOCK_METHOD(void, func3, (), (noexcept(false)));
-  MOCK_METHOD(void, func4, (), (noexcept(noexcept(MaybeThrows()))));
-  MOCK_METHOD(void, func5, (), (noexcept(noexcept(DoesntThrow()))));
-  MOCK_METHOD(void, func6, (), (noexcept(noexcept(DoesntThrow())), const));
-  MOCK_METHOD(void, func7, (), (const, noexcept(noexcept(DoesntThrow()))));
-  // Put commas in the noexcept expression
-  MOCK_METHOD(void, func8, (), (noexcept(noexcept(hasTwoParams(1, 2))), const));
-};
-
-TEST(MockMethodMockFunctionTest, NoexceptSpecifierPreserved) {
-  EXPECT_TRUE(noexcept(std::declval<MockMethodNoexceptSpecifier>().func1()));
-  EXPECT_TRUE(noexcept(std::declval<MockMethodNoexceptSpecifier>().func2()));
-  EXPECT_FALSE(noexcept(std::declval<MockMethodNoexceptSpecifier>().func3()));
-  EXPECT_FALSE(noexcept(std::declval<MockMethodNoexceptSpecifier>().func4()));
-  EXPECT_TRUE(noexcept(std::declval<MockMethodNoexceptSpecifier>().func5()));
-  EXPECT_TRUE(noexcept(std::declval<MockMethodNoexceptSpecifier>().func6()));
-  EXPECT_TRUE(noexcept(std::declval<MockMethodNoexceptSpecifier>().func7()));
-  EXPECT_EQ(noexcept(std::declval<MockMethodNoexceptSpecifier>().func8()),
-            noexcept(hasTwoParams(1, 2)));
-}
-
-}  // namespace gmock_function_mocker_test
-}  // namespace testing
diff --git a/3rdparty/googletest-1.13.0/googlemock/test/gmock-internal-utils_test.cc b/3rdparty/googletest-1.13.0/googlemock/test/gmock-internal-utils_test.cc
deleted file mode 100644
index 932bece5afed52af7e85d5d100a35f4d589955d0..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googlemock/test/gmock-internal-utils_test.cc
+++ /dev/null
@@ -1,765 +0,0 @@
-// Copyright 2007, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// Google Mock - a framework for writing C++ mock classes.
-//
-// This file tests the internal utilities.
-
-#include "gmock/internal/gmock-internal-utils.h"
-
-#include <stdlib.h>
-
-#include <cstdint>
-#include <map>
-#include <memory>
-#include <sstream>
-#include <string>
-#include <vector>
-
-#include "gmock/gmock.h"
-#include "gmock/internal/gmock-port.h"
-#include "gtest/gtest-spi.h"
-#include "gtest/gtest.h"
-
-// Indicates that this translation unit is part of Google Test's
-// implementation.  It must come before gtest-internal-inl.h is
-// included, or there will be a compiler error.  This trick is to
-// prevent a user from accidentally including gtest-internal-inl.h in
-// their code.
-#define GTEST_IMPLEMENTATION_ 1
-#include "src/gtest-internal-inl.h"
-#undef GTEST_IMPLEMENTATION_
-
-#if GTEST_OS_CYGWIN
-#include <sys/types.h>  // For ssize_t. NOLINT
-#endif
-
-namespace proto2 {
-class Message;
-}  // namespace proto2
-
-namespace testing {
-namespace internal {
-
-namespace {
-
-TEST(JoinAsKeyValueTupleTest, JoinsEmptyTuple) {
-  EXPECT_EQ("", JoinAsKeyValueTuple({}, Strings()));
-}
-
-TEST(JoinAsKeyValueTupleTest, JoinsOneTuple) {
-  EXPECT_EQ("(a: 1)", JoinAsKeyValueTuple({"a"}, {"1"}));
-}
-
-TEST(JoinAsKeyValueTupleTest, JoinsTwoTuple) {
-  EXPECT_EQ("(a: 1, b: 2)", JoinAsKeyValueTuple({"a", "b"}, {"1", "2"}));
-}
-
-TEST(JoinAsKeyValueTupleTest, JoinsTenTuple) {
-  EXPECT_EQ(
-      "(a: 1, b: 2, c: 3, d: 4, e: 5, f: 6, g: 7, h: 8, i: 9, j: 10)",
-      JoinAsKeyValueTuple({"a", "b", "c", "d", "e", "f", "g", "h", "i", "j"},
-                          {"1", "2", "3", "4", "5", "6", "7", "8", "9", "10"}));
-}
-
-TEST(ConvertIdentifierNameToWordsTest, WorksWhenNameContainsNoWord) {
-  EXPECT_EQ("", ConvertIdentifierNameToWords(""));
-  EXPECT_EQ("", ConvertIdentifierNameToWords("_"));
-  EXPECT_EQ("", ConvertIdentifierNameToWords("__"));
-}
-
-TEST(ConvertIdentifierNameToWordsTest, WorksWhenNameContainsDigits) {
-  EXPECT_EQ("1", ConvertIdentifierNameToWords("_1"));
-  EXPECT_EQ("2", ConvertIdentifierNameToWords("2_"));
-  EXPECT_EQ("34", ConvertIdentifierNameToWords("_34_"));
-  EXPECT_EQ("34 56", ConvertIdentifierNameToWords("_34_56"));
-}
-
-TEST(ConvertIdentifierNameToWordsTest, WorksWhenNameContainsCamelCaseWords) {
-  EXPECT_EQ("a big word", ConvertIdentifierNameToWords("ABigWord"));
-  EXPECT_EQ("foo bar", ConvertIdentifierNameToWords("FooBar"));
-  EXPECT_EQ("foo", ConvertIdentifierNameToWords("Foo_"));
-  EXPECT_EQ("foo bar", ConvertIdentifierNameToWords("_Foo_Bar_"));
-  EXPECT_EQ("foo and bar", ConvertIdentifierNameToWords("_Foo__And_Bar"));
-}
-
-TEST(ConvertIdentifierNameToWordsTest, WorksWhenNameContains_SeparatedWords) {
-  EXPECT_EQ("foo bar", ConvertIdentifierNameToWords("foo_bar"));
-  EXPECT_EQ("foo", ConvertIdentifierNameToWords("_foo_"));
-  EXPECT_EQ("foo bar", ConvertIdentifierNameToWords("_foo_bar_"));
-  EXPECT_EQ("foo and bar", ConvertIdentifierNameToWords("_foo__and_bar"));
-}
-
-TEST(ConvertIdentifierNameToWordsTest, WorksWhenNameIsMixture) {
-  EXPECT_EQ("foo bar 123", ConvertIdentifierNameToWords("Foo_bar123"));
-  EXPECT_EQ("chapter 11 section 1",
-            ConvertIdentifierNameToWords("_Chapter11Section_1_"));
-}
-
-TEST(GetRawPointerTest, WorksForSmartPointers) {
-  const char* const raw_p1 = new const char('a');  // NOLINT
-  const std::unique_ptr<const char> p1(raw_p1);
-  EXPECT_EQ(raw_p1, GetRawPointer(p1));
-  double* const raw_p2 = new double(2.5);  // NOLINT
-  const std::shared_ptr<double> p2(raw_p2);
-  EXPECT_EQ(raw_p2, GetRawPointer(p2));
-}
-
-TEST(GetRawPointerTest, WorksForRawPointers) {
-  int* p = nullptr;
-  EXPECT_TRUE(nullptr == GetRawPointer(p));
-  int n = 1;
-  EXPECT_EQ(&n, GetRawPointer(&n));
-}
-
-TEST(GetRawPointerTest, WorksForStdReferenceWrapper) {
-  int n = 1;
-  EXPECT_EQ(&n, GetRawPointer(std::ref(n)));
-  EXPECT_EQ(&n, GetRawPointer(std::cref(n)));
-}
-
-// Tests KindOf<T>.
-
-class Base {};
-class Derived : public Base {};
-
-TEST(KindOfTest, Bool) {
-  EXPECT_EQ(kBool, GMOCK_KIND_OF_(bool));  // NOLINT
-}
-
-TEST(KindOfTest, Integer) {
-  EXPECT_EQ(kInteger, GMOCK_KIND_OF_(char));                // NOLINT
-  EXPECT_EQ(kInteger, GMOCK_KIND_OF_(signed char));         // NOLINT
-  EXPECT_EQ(kInteger, GMOCK_KIND_OF_(unsigned char));       // NOLINT
-  EXPECT_EQ(kInteger, GMOCK_KIND_OF_(short));               // NOLINT
-  EXPECT_EQ(kInteger, GMOCK_KIND_OF_(unsigned short));      // NOLINT
-  EXPECT_EQ(kInteger, GMOCK_KIND_OF_(int));                 // NOLINT
-  EXPECT_EQ(kInteger, GMOCK_KIND_OF_(unsigned int));        // NOLINT
-  EXPECT_EQ(kInteger, GMOCK_KIND_OF_(long));                // NOLINT
-  EXPECT_EQ(kInteger, GMOCK_KIND_OF_(unsigned long));       // NOLINT
-  EXPECT_EQ(kInteger, GMOCK_KIND_OF_(long long));           // NOLINT
-  EXPECT_EQ(kInteger, GMOCK_KIND_OF_(unsigned long long));  // NOLINT
-  EXPECT_EQ(kInteger, GMOCK_KIND_OF_(wchar_t));             // NOLINT
-  EXPECT_EQ(kInteger, GMOCK_KIND_OF_(size_t));              // NOLINT
-#if GTEST_OS_LINUX || GTEST_OS_MAC || GTEST_OS_CYGWIN
-  // ssize_t is not defined on Windows and possibly some other OSes.
-  EXPECT_EQ(kInteger, GMOCK_KIND_OF_(ssize_t));  // NOLINT
-#endif
-}
-
-TEST(KindOfTest, FloatingPoint) {
-  EXPECT_EQ(kFloatingPoint, GMOCK_KIND_OF_(float));        // NOLINT
-  EXPECT_EQ(kFloatingPoint, GMOCK_KIND_OF_(double));       // NOLINT
-  EXPECT_EQ(kFloatingPoint, GMOCK_KIND_OF_(long double));  // NOLINT
-}
-
-TEST(KindOfTest, Other) {
-  EXPECT_EQ(kOther, GMOCK_KIND_OF_(void*));   // NOLINT
-  EXPECT_EQ(kOther, GMOCK_KIND_OF_(char**));  // NOLINT
-  EXPECT_EQ(kOther, GMOCK_KIND_OF_(Base));    // NOLINT
-}
-
-// Tests LosslessArithmeticConvertible<T, U>.
-
-TEST(LosslessArithmeticConvertibleTest, BoolToBool) {
-  EXPECT_TRUE((LosslessArithmeticConvertible<bool, bool>::value));
-}
-
-TEST(LosslessArithmeticConvertibleTest, BoolToInteger) {
-  EXPECT_TRUE((LosslessArithmeticConvertible<bool, char>::value));
-  EXPECT_TRUE((LosslessArithmeticConvertible<bool, int>::value));
-  EXPECT_TRUE(
-      (LosslessArithmeticConvertible<bool, unsigned long>::value));  // NOLINT
-}
-
-TEST(LosslessArithmeticConvertibleTest, BoolToFloatingPoint) {
-  EXPECT_TRUE((LosslessArithmeticConvertible<bool, float>::value));
-  EXPECT_TRUE((LosslessArithmeticConvertible<bool, double>::value));
-}
-
-TEST(LosslessArithmeticConvertibleTest, IntegerToBool) {
-  EXPECT_FALSE((LosslessArithmeticConvertible<unsigned char, bool>::value));
-  EXPECT_FALSE((LosslessArithmeticConvertible<int, bool>::value));
-}
-
-TEST(LosslessArithmeticConvertibleTest, IntegerToInteger) {
-  // Unsigned => larger signed is fine.
-  EXPECT_TRUE((LosslessArithmeticConvertible<unsigned char, int>::value));
-
-  // Unsigned => larger unsigned is fine.
-  EXPECT_TRUE((LosslessArithmeticConvertible<unsigned short,
-                                             uint64_t>::value));  // NOLINT
-
-  // Signed => unsigned is not fine.
-  EXPECT_FALSE(
-      (LosslessArithmeticConvertible<short, uint64_t>::value));  // NOLINT
-  EXPECT_FALSE((LosslessArithmeticConvertible<signed char,
-                                              unsigned int>::value));  // NOLINT
-
-  // Same size and same signedness: fine too.
-  EXPECT_TRUE(
-      (LosslessArithmeticConvertible<unsigned char, unsigned char>::value));
-  EXPECT_TRUE((LosslessArithmeticConvertible<int, int>::value));
-  EXPECT_TRUE((LosslessArithmeticConvertible<wchar_t, wchar_t>::value));
-  EXPECT_TRUE((LosslessArithmeticConvertible<unsigned long,
-                                             unsigned long>::value));  // NOLINT
-
-  // Same size, different signedness: not fine.
-  EXPECT_FALSE(
-      (LosslessArithmeticConvertible<unsigned char, signed char>::value));
-  EXPECT_FALSE((LosslessArithmeticConvertible<int, unsigned int>::value));
-  EXPECT_FALSE((LosslessArithmeticConvertible<uint64_t, int64_t>::value));
-
-  // Larger size => smaller size is not fine.
-  EXPECT_FALSE((LosslessArithmeticConvertible<long, char>::value));  // NOLINT
-  EXPECT_FALSE((LosslessArithmeticConvertible<int, signed char>::value));
-  EXPECT_FALSE((LosslessArithmeticConvertible<int64_t, unsigned int>::value));
-}
-
-TEST(LosslessArithmeticConvertibleTest, IntegerToFloatingPoint) {
-  // Integers cannot be losslessly converted to floating-points, as
-  // the format of the latter is implementation-defined.
-  EXPECT_FALSE((LosslessArithmeticConvertible<char, float>::value));
-  EXPECT_FALSE((LosslessArithmeticConvertible<int, double>::value));
-  EXPECT_FALSE(
-      (LosslessArithmeticConvertible<short, long double>::value));  // NOLINT
-}
-
-TEST(LosslessArithmeticConvertibleTest, FloatingPointToBool) {
-  EXPECT_FALSE((LosslessArithmeticConvertible<float, bool>::value));
-  EXPECT_FALSE((LosslessArithmeticConvertible<double, bool>::value));
-}
-
-TEST(LosslessArithmeticConvertibleTest, FloatingPointToInteger) {
-  EXPECT_FALSE((LosslessArithmeticConvertible<float, long>::value));  // NOLINT
-  EXPECT_FALSE((LosslessArithmeticConvertible<double, int64_t>::value));
-  EXPECT_FALSE((LosslessArithmeticConvertible<long double, int>::value));
-}
-
-TEST(LosslessArithmeticConvertibleTest, FloatingPointToFloatingPoint) {
-  // Smaller size => larger size is fine.
-  EXPECT_TRUE((LosslessArithmeticConvertible<float, double>::value));
-  EXPECT_TRUE((LosslessArithmeticConvertible<float, long double>::value));
-  EXPECT_TRUE((LosslessArithmeticConvertible<double, long double>::value));
-
-  // Same size: fine.
-  EXPECT_TRUE((LosslessArithmeticConvertible<float, float>::value));
-  EXPECT_TRUE((LosslessArithmeticConvertible<double, double>::value));
-
-  // Larger size => smaller size is not fine.
-  EXPECT_FALSE((LosslessArithmeticConvertible<double, float>::value));
-  GTEST_INTENTIONAL_CONST_COND_PUSH_()
-  if (sizeof(double) == sizeof(long double)) {  // NOLINT
-    GTEST_INTENTIONAL_CONST_COND_POP_()
-    // In some implementations (e.g. MSVC), double and long double
-    // have the same size.
-    EXPECT_TRUE((LosslessArithmeticConvertible<long double, double>::value));
-  } else {
-    EXPECT_FALSE((LosslessArithmeticConvertible<long double, double>::value));
-  }
-}
-
-// Tests the TupleMatches() template function.
-
-TEST(TupleMatchesTest, WorksForSize0) {
-  std::tuple<> matchers;
-  std::tuple<> values;
-
-  EXPECT_TRUE(TupleMatches(matchers, values));
-}
-
-TEST(TupleMatchesTest, WorksForSize1) {
-  std::tuple<Matcher<int>> matchers(Eq(1));
-  std::tuple<int> values1(1), values2(2);
-
-  EXPECT_TRUE(TupleMatches(matchers, values1));
-  EXPECT_FALSE(TupleMatches(matchers, values2));
-}
-
-TEST(TupleMatchesTest, WorksForSize2) {
-  std::tuple<Matcher<int>, Matcher<char>> matchers(Eq(1), Eq('a'));
-  std::tuple<int, char> values1(1, 'a'), values2(1, 'b'), values3(2, 'a'),
-      values4(2, 'b');
-
-  EXPECT_TRUE(TupleMatches(matchers, values1));
-  EXPECT_FALSE(TupleMatches(matchers, values2));
-  EXPECT_FALSE(TupleMatches(matchers, values3));
-  EXPECT_FALSE(TupleMatches(matchers, values4));
-}
-
-TEST(TupleMatchesTest, WorksForSize5) {
-  std::tuple<Matcher<int>, Matcher<char>, Matcher<bool>,
-             Matcher<long>,  // NOLINT
-             Matcher<std::string>>
-      matchers(Eq(1), Eq('a'), Eq(true), Eq(2L), Eq("hi"));
-  std::tuple<int, char, bool, long, std::string>  // NOLINT
-      values1(1, 'a', true, 2L, "hi"), values2(1, 'a', true, 2L, "hello"),
-      values3(2, 'a', true, 2L, "hi");
-
-  EXPECT_TRUE(TupleMatches(matchers, values1));
-  EXPECT_FALSE(TupleMatches(matchers, values2));
-  EXPECT_FALSE(TupleMatches(matchers, values3));
-}
-
-// Tests that Assert(true, ...) succeeds.
-TEST(AssertTest, SucceedsOnTrue) {
-  Assert(true, __FILE__, __LINE__, "This should succeed.");
-  Assert(true, __FILE__, __LINE__);  // This should succeed too.
-}
-
-// Tests that Assert(false, ...) generates a fatal failure.
-TEST(AssertTest, FailsFatallyOnFalse) {
-  EXPECT_DEATH_IF_SUPPORTED(
-      { Assert(false, __FILE__, __LINE__, "This should fail."); }, "");
-
-  EXPECT_DEATH_IF_SUPPORTED({ Assert(false, __FILE__, __LINE__); }, "");
-}
-
-// Tests that Expect(true, ...) succeeds.
-TEST(ExpectTest, SucceedsOnTrue) {
-  Expect(true, __FILE__, __LINE__, "This should succeed.");
-  Expect(true, __FILE__, __LINE__);  // This should succeed too.
-}
-
-// Tests that Expect(false, ...) generates a non-fatal failure.
-TEST(ExpectTest, FailsNonfatallyOnFalse) {
-  EXPECT_NONFATAL_FAILURE(
-      {  // NOLINT
-        Expect(false, __FILE__, __LINE__, "This should fail.");
-      },
-      "This should fail");
-
-  EXPECT_NONFATAL_FAILURE(
-      {  // NOLINT
-        Expect(false, __FILE__, __LINE__);
-      },
-      "Expectation failed");
-}
-
-// Tests LogIsVisible().
-
-class LogIsVisibleTest : public ::testing::Test {
- protected:
-  void SetUp() override { original_verbose_ = GMOCK_FLAG_GET(verbose); }
-
-  void TearDown() override { GMOCK_FLAG_SET(verbose, original_verbose_); }
-
-  std::string original_verbose_;
-};
-
-TEST_F(LogIsVisibleTest, AlwaysReturnsTrueIfVerbosityIsInfo) {
-  GMOCK_FLAG_SET(verbose, kInfoVerbosity);
-  EXPECT_TRUE(LogIsVisible(kInfo));
-  EXPECT_TRUE(LogIsVisible(kWarning));
-}
-
-TEST_F(LogIsVisibleTest, AlwaysReturnsFalseIfVerbosityIsError) {
-  GMOCK_FLAG_SET(verbose, kErrorVerbosity);
-  EXPECT_FALSE(LogIsVisible(kInfo));
-  EXPECT_FALSE(LogIsVisible(kWarning));
-}
-
-TEST_F(LogIsVisibleTest, WorksWhenVerbosityIsWarning) {
-  GMOCK_FLAG_SET(verbose, kWarningVerbosity);
-  EXPECT_FALSE(LogIsVisible(kInfo));
-  EXPECT_TRUE(LogIsVisible(kWarning));
-}
-
-#if GTEST_HAS_STREAM_REDIRECTION
-
-// Tests the Log() function.
-
-// Verifies that Log() behaves correctly for the given verbosity level
-// and log severity.
-void TestLogWithSeverity(const std::string& verbosity, LogSeverity severity,
-                         bool should_print) {
-  const std::string old_flag = GMOCK_FLAG_GET(verbose);
-  GMOCK_FLAG_SET(verbose, verbosity);
-  CaptureStdout();
-  Log(severity, "Test log.\n", 0);
-  if (should_print) {
-    EXPECT_THAT(
-        GetCapturedStdout().c_str(),
-        ContainsRegex(severity == kWarning
-                          ? "^\nGMOCK WARNING:\nTest log\\.\nStack trace:\n"
-                          : "^\nTest log\\.\nStack trace:\n"));
-  } else {
-    EXPECT_STREQ("", GetCapturedStdout().c_str());
-  }
-  GMOCK_FLAG_SET(verbose, old_flag);
-}
-
-// Tests that when the stack_frames_to_skip parameter is negative,
-// Log() doesn't include the stack trace in the output.
-TEST(LogTest, NoStackTraceWhenStackFramesToSkipIsNegative) {
-  const std::string saved_flag = GMOCK_FLAG_GET(verbose);
-  GMOCK_FLAG_SET(verbose, kInfoVerbosity);
-  CaptureStdout();
-  Log(kInfo, "Test log.\n", -1);
-  EXPECT_STREQ("\nTest log.\n", GetCapturedStdout().c_str());
-  GMOCK_FLAG_SET(verbose, saved_flag);
-}
-
-struct MockStackTraceGetter : testing::internal::OsStackTraceGetterInterface {
-  std::string CurrentStackTrace(int max_depth, int skip_count) override {
-    return (testing::Message() << max_depth << "::" << skip_count << "\n")
-        .GetString();
-  }
-  void UponLeavingGTest() override {}
-};
-
-// Tests that in opt mode, a positive stack_frames_to_skip argument is
-// treated as 0.
-TEST(LogTest, NoSkippingStackFrameInOptMode) {
-  MockStackTraceGetter* mock_os_stack_trace_getter = new MockStackTraceGetter;
-  GetUnitTestImpl()->set_os_stack_trace_getter(mock_os_stack_trace_getter);
-
-  CaptureStdout();
-  Log(kWarning, "Test log.\n", 100);
-  const std::string log = GetCapturedStdout();
-
-  std::string expected_trace =
-      (testing::Message() << GTEST_FLAG_GET(stack_trace_depth) << "::")
-          .GetString();
-  std::string expected_message =
-      "\nGMOCK WARNING:\n"
-      "Test log.\n"
-      "Stack trace:\n" +
-      expected_trace;
-  EXPECT_THAT(log, HasSubstr(expected_message));
-  int skip_count = atoi(log.substr(expected_message.size()).c_str());
-
-#if defined(NDEBUG)
-  // In opt mode, no stack frame should be skipped.
-  const int expected_skip_count = 0;
-#else
-  // In dbg mode, the stack frames should be skipped.
-  const int expected_skip_count = 100;
-#endif
-
-  // Note that each inner implementation layer will +1 the number to remove
-  // itself from the trace. This means that the value is a little higher than
-  // expected, but close enough.
-  EXPECT_THAT(skip_count,
-              AllOf(Ge(expected_skip_count), Le(expected_skip_count + 10)));
-
-  // Restores the default OS stack trace getter.
-  GetUnitTestImpl()->set_os_stack_trace_getter(nullptr);
-}
-
-// Tests that all logs are printed when the value of the
-// --gmock_verbose flag is "info".
-TEST(LogTest, AllLogsArePrintedWhenVerbosityIsInfo) {
-  TestLogWithSeverity(kInfoVerbosity, kInfo, true);
-  TestLogWithSeverity(kInfoVerbosity, kWarning, true);
-}
-
-// Tests that only warnings are printed when the value of the
-// --gmock_verbose flag is "warning".
-TEST(LogTest, OnlyWarningsArePrintedWhenVerbosityIsWarning) {
-  TestLogWithSeverity(kWarningVerbosity, kInfo, false);
-  TestLogWithSeverity(kWarningVerbosity, kWarning, true);
-}
-
-// Tests that no logs are printed when the value of the
-// --gmock_verbose flag is "error".
-TEST(LogTest, NoLogsArePrintedWhenVerbosityIsError) {
-  TestLogWithSeverity(kErrorVerbosity, kInfo, false);
-  TestLogWithSeverity(kErrorVerbosity, kWarning, false);
-}
-
-// Tests that only warnings are printed when the value of the
-// --gmock_verbose flag is invalid.
-TEST(LogTest, OnlyWarningsArePrintedWhenVerbosityIsInvalid) {
-  TestLogWithSeverity("invalid", kInfo, false);
-  TestLogWithSeverity("invalid", kWarning, true);
-}
-
-// Verifies that Log() behaves correctly for the given verbosity level
-// and log severity.
-std::string GrabOutput(void (*logger)(), const char* verbosity) {
-  const std::string saved_flag = GMOCK_FLAG_GET(verbose);
-  GMOCK_FLAG_SET(verbose, verbosity);
-  CaptureStdout();
-  logger();
-  GMOCK_FLAG_SET(verbose, saved_flag);
-  return GetCapturedStdout();
-}
-
-class DummyMock {
- public:
-  MOCK_METHOD0(TestMethod, void());
-  MOCK_METHOD1(TestMethodArg, void(int dummy));
-};
-
-void ExpectCallLogger() {
-  DummyMock mock;
-  EXPECT_CALL(mock, TestMethod());
-  mock.TestMethod();
-}
-
-// Verifies that EXPECT_CALL logs if the --gmock_verbose flag is set to "info".
-TEST(ExpectCallTest, LogsWhenVerbosityIsInfo) {
-  EXPECT_THAT(std::string(GrabOutput(ExpectCallLogger, kInfoVerbosity)),
-              HasSubstr("EXPECT_CALL(mock, TestMethod())"));
-}
-
-// Verifies that EXPECT_CALL doesn't log
-// if the --gmock_verbose flag is set to "warning".
-TEST(ExpectCallTest, DoesNotLogWhenVerbosityIsWarning) {
-  EXPECT_STREQ("", GrabOutput(ExpectCallLogger, kWarningVerbosity).c_str());
-}
-
-// Verifies that EXPECT_CALL doesn't log
-// if the --gmock_verbose flag is set to "error".
-TEST(ExpectCallTest, DoesNotLogWhenVerbosityIsError) {
-  EXPECT_STREQ("", GrabOutput(ExpectCallLogger, kErrorVerbosity).c_str());
-}
-
-void OnCallLogger() {
-  DummyMock mock;
-  ON_CALL(mock, TestMethod());
-}
-
-// Verifies that ON_CALL logs if the --gmock_verbose flag is set to "info".
-TEST(OnCallTest, LogsWhenVerbosityIsInfo) {
-  EXPECT_THAT(std::string(GrabOutput(OnCallLogger, kInfoVerbosity)),
-              HasSubstr("ON_CALL(mock, TestMethod())"));
-}
-
-// Verifies that ON_CALL doesn't log
-// if the --gmock_verbose flag is set to "warning".
-TEST(OnCallTest, DoesNotLogWhenVerbosityIsWarning) {
-  EXPECT_STREQ("", GrabOutput(OnCallLogger, kWarningVerbosity).c_str());
-}
-
-// Verifies that ON_CALL doesn't log if
-// the --gmock_verbose flag is set to "error".
-TEST(OnCallTest, DoesNotLogWhenVerbosityIsError) {
-  EXPECT_STREQ("", GrabOutput(OnCallLogger, kErrorVerbosity).c_str());
-}
-
-void OnCallAnyArgumentLogger() {
-  DummyMock mock;
-  ON_CALL(mock, TestMethodArg(_));
-}
-
-// Verifies that ON_CALL prints provided _ argument.
-TEST(OnCallTest, LogsAnythingArgument) {
-  EXPECT_THAT(std::string(GrabOutput(OnCallAnyArgumentLogger, kInfoVerbosity)),
-              HasSubstr("ON_CALL(mock, TestMethodArg(_)"));
-}
-
-#endif  // GTEST_HAS_STREAM_REDIRECTION
-
-// Tests StlContainerView.
-
-TEST(StlContainerViewTest, WorksForStlContainer) {
-  StaticAssertTypeEq<std::vector<int>,
-                     StlContainerView<std::vector<int>>::type>();
-  StaticAssertTypeEq<const std::vector<double>&,
-                     StlContainerView<std::vector<double>>::const_reference>();
-
-  typedef std::vector<char> Chars;
-  Chars v1;
-  const Chars& v2(StlContainerView<Chars>::ConstReference(v1));
-  EXPECT_EQ(&v1, &v2);
-
-  v1.push_back('a');
-  Chars v3 = StlContainerView<Chars>::Copy(v1);
-  EXPECT_THAT(v3, Eq(v3));
-}
-
-TEST(StlContainerViewTest, WorksForStaticNativeArray) {
-  StaticAssertTypeEq<NativeArray<int>, StlContainerView<int[3]>::type>();
-  StaticAssertTypeEq<NativeArray<double>,
-                     StlContainerView<const double[4]>::type>();
-  StaticAssertTypeEq<NativeArray<char[3]>,
-                     StlContainerView<const char[2][3]>::type>();
-
-  StaticAssertTypeEq<const NativeArray<int>,
-                     StlContainerView<int[2]>::const_reference>();
-
-  int a1[3] = {0, 1, 2};
-  NativeArray<int> a2 = StlContainerView<int[3]>::ConstReference(a1);
-  EXPECT_EQ(3U, a2.size());
-  EXPECT_EQ(a1, a2.begin());
-
-  const NativeArray<int> a3 = StlContainerView<int[3]>::Copy(a1);
-  ASSERT_EQ(3U, a3.size());
-  EXPECT_EQ(0, a3.begin()[0]);
-  EXPECT_EQ(1, a3.begin()[1]);
-  EXPECT_EQ(2, a3.begin()[2]);
-
-  // Makes sure a1 and a3 aren't aliases.
-  a1[0] = 3;
-  EXPECT_EQ(0, a3.begin()[0]);
-}
-
-TEST(StlContainerViewTest, WorksForDynamicNativeArray) {
-  StaticAssertTypeEq<NativeArray<int>,
-                     StlContainerView<std::tuple<const int*, size_t>>::type>();
-  StaticAssertTypeEq<
-      NativeArray<double>,
-      StlContainerView<std::tuple<std::shared_ptr<double>, int>>::type>();
-
-  StaticAssertTypeEq<
-      const NativeArray<int>,
-      StlContainerView<std::tuple<const int*, int>>::const_reference>();
-
-  int a1[3] = {0, 1, 2};
-  const int* const p1 = a1;
-  NativeArray<int> a2 =
-      StlContainerView<std::tuple<const int*, int>>::ConstReference(
-          std::make_tuple(p1, 3));
-  EXPECT_EQ(3U, a2.size());
-  EXPECT_EQ(a1, a2.begin());
-
-  const NativeArray<int> a3 = StlContainerView<std::tuple<int*, size_t>>::Copy(
-      std::make_tuple(static_cast<int*>(a1), 3));
-  ASSERT_EQ(3U, a3.size());
-  EXPECT_EQ(0, a3.begin()[0]);
-  EXPECT_EQ(1, a3.begin()[1]);
-  EXPECT_EQ(2, a3.begin()[2]);
-
-  // Makes sure a1 and a3 aren't aliases.
-  a1[0] = 3;
-  EXPECT_EQ(0, a3.begin()[0]);
-}
-
-// Tests the Function template struct.
-
-TEST(FunctionTest, Nullary) {
-  typedef Function<int()> F;  // NOLINT
-  EXPECT_EQ(0u, F::ArgumentCount);
-  EXPECT_TRUE((std::is_same<int, F::Result>::value));
-  EXPECT_TRUE((std::is_same<std::tuple<>, F::ArgumentTuple>::value));
-  EXPECT_TRUE((std::is_same<std::tuple<>, F::ArgumentMatcherTuple>::value));
-  EXPECT_TRUE((std::is_same<void(), F::MakeResultVoid>::value));
-  EXPECT_TRUE((std::is_same<IgnoredValue(), F::MakeResultIgnoredValue>::value));
-}
-
-TEST(FunctionTest, Unary) {
-  typedef Function<int(bool)> F;  // NOLINT
-  EXPECT_EQ(1u, F::ArgumentCount);
-  EXPECT_TRUE((std::is_same<int, F::Result>::value));
-  EXPECT_TRUE((std::is_same<bool, F::Arg<0>::type>::value));
-  EXPECT_TRUE((std::is_same<std::tuple<bool>, F::ArgumentTuple>::value));
-  EXPECT_TRUE((
-      std::is_same<std::tuple<Matcher<bool>>, F::ArgumentMatcherTuple>::value));
-  EXPECT_TRUE((std::is_same<void(bool), F::MakeResultVoid>::value));  // NOLINT
-  EXPECT_TRUE((std::is_same<IgnoredValue(bool),                       // NOLINT
-                            F::MakeResultIgnoredValue>::value));
-}
-
-TEST(FunctionTest, Binary) {
-  typedef Function<int(bool, const long&)> F;  // NOLINT
-  EXPECT_EQ(2u, F::ArgumentCount);
-  EXPECT_TRUE((std::is_same<int, F::Result>::value));
-  EXPECT_TRUE((std::is_same<bool, F::Arg<0>::type>::value));
-  EXPECT_TRUE((std::is_same<const long&, F::Arg<1>::type>::value));  // NOLINT
-  EXPECT_TRUE((std::is_same<std::tuple<bool, const long&>,           // NOLINT
-                            F::ArgumentTuple>::value));
-  EXPECT_TRUE(
-      (std::is_same<std::tuple<Matcher<bool>, Matcher<const long&>>,  // NOLINT
-                    F::ArgumentMatcherTuple>::value));
-  EXPECT_TRUE((std::is_same<void(bool, const long&),  // NOLINT
-                            F::MakeResultVoid>::value));
-  EXPECT_TRUE((std::is_same<IgnoredValue(bool, const long&),  // NOLINT
-                            F::MakeResultIgnoredValue>::value));
-}
-
-TEST(FunctionTest, LongArgumentList) {
-  typedef Function<char(bool, int, char*, int&, const long&)> F;  // NOLINT
-  EXPECT_EQ(5u, F::ArgumentCount);
-  EXPECT_TRUE((std::is_same<char, F::Result>::value));
-  EXPECT_TRUE((std::is_same<bool, F::Arg<0>::type>::value));
-  EXPECT_TRUE((std::is_same<int, F::Arg<1>::type>::value));
-  EXPECT_TRUE((std::is_same<char*, F::Arg<2>::type>::value));
-  EXPECT_TRUE((std::is_same<int&, F::Arg<3>::type>::value));
-  EXPECT_TRUE((std::is_same<const long&, F::Arg<4>::type>::value));  // NOLINT
-  EXPECT_TRUE(
-      (std::is_same<std::tuple<bool, int, char*, int&, const long&>,  // NOLINT
-                    F::ArgumentTuple>::value));
-  EXPECT_TRUE(
-      (std::is_same<
-          std::tuple<Matcher<bool>, Matcher<int>, Matcher<char*>, Matcher<int&>,
-                     Matcher<const long&>>,  // NOLINT
-          F::ArgumentMatcherTuple>::value));
-  EXPECT_TRUE(
-      (std::is_same<void(bool, int, char*, int&, const long&),  // NOLINT
-                    F::MakeResultVoid>::value));
-  EXPECT_TRUE((
-      std::is_same<IgnoredValue(bool, int, char*, int&, const long&),  // NOLINT
-                   F::MakeResultIgnoredValue>::value));
-}
-
-TEST(Base64Unescape, InvalidString) {
-  std::string unescaped;
-  EXPECT_FALSE(Base64Unescape("(invalid)", &unescaped));
-}
-
-TEST(Base64Unescape, ShortString) {
-  std::string unescaped;
-  EXPECT_TRUE(Base64Unescape("SGVsbG8gd29ybGQh", &unescaped));
-  EXPECT_EQ("Hello world!", unescaped);
-}
-
-TEST(Base64Unescape, ShortStringWithPadding) {
-  std::string unescaped;
-  EXPECT_TRUE(Base64Unescape("SGVsbG8gd29ybGQ=", &unescaped));
-  EXPECT_EQ("Hello world", unescaped);
-}
-
-TEST(Base64Unescape, ShortStringWithoutPadding) {
-  std::string unescaped;
-  EXPECT_TRUE(Base64Unescape("SGVsbG8gd29ybGQ", &unescaped));
-  EXPECT_EQ("Hello world", unescaped);
-}
-
-TEST(Base64Unescape, LongStringWithWhiteSpaces) {
-  std::string escaped =
-      R"(TWFuIGlzIGRpc3Rpbmd1aXNoZWQsIG5vdCBvbmx5IGJ5IGhpcyByZWFzb24sIGJ1dCBieSB0aGlz
-  IHNpbmd1bGFyIHBhc3Npb24gZnJvbSBvdGhlciBhbmltYWxzLCB3aGljaCBpcyBhIGx1c3Qgb2Yg
-  dGhlIG1pbmQsIHRoYXQgYnkgYSBwZXJzZXZlcmFuY2Ugb2YgZGVsaWdodCBpbiB0aGUgY29udGlu
-  dWVkIGFuZCBpbmRlZmF0aWdhYmxlIGdlbmVyYXRpb24gb2Yga25vd2xlZGdlLCBleGNlZWRzIHRo
-  ZSBzaG9ydCB2ZWhlbWVuY2Ugb2YgYW55IGNhcm5hbCBwbGVhc3VyZS4=)";
-  std::string expected =
-      "Man is distinguished, not only by his reason, but by this singular "
-      "passion from other animals, which is a lust of the mind, that by a "
-      "perseverance of delight in the continued and indefatigable generation "
-      "of knowledge, exceeds the short vehemence of any carnal pleasure.";
-  std::string unescaped;
-  EXPECT_TRUE(Base64Unescape(escaped, &unescaped));
-  EXPECT_EQ(expected, unescaped);
-}
-
-}  // namespace
-}  // namespace internal
-}  // namespace testing
diff --git a/3rdparty/googletest-1.13.0/googlemock/test/gmock-matchers-arithmetic_test.cc b/3rdparty/googletest-1.13.0/googlemock/test/gmock-matchers-arithmetic_test.cc
deleted file mode 100644
index a4c1def6eb15375e1fe93e26aba24a4969b416f1..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googlemock/test/gmock-matchers-arithmetic_test.cc
+++ /dev/null
@@ -1,1517 +0,0 @@
-// Copyright 2007, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// Google Mock - a framework for writing C++ mock classes.
-//
-// This file tests some commonly used argument matchers.
-
-// Silence warning C4244: 'initializing': conversion from 'int' to 'short',
-// possible loss of data and C4100, unreferenced local parameter
-#ifdef _MSC_VER
-#pragma warning(push)
-#pragma warning(disable : 4244)
-#pragma warning(disable : 4100)
-#endif
-
-#include "test/gmock-matchers_test.h"
-
-namespace testing {
-namespace gmock_matchers_test {
-namespace {
-
-typedef ::std::tuple<long, int> Tuple2;  // NOLINT
-
-// Tests that Eq() matches a 2-tuple where the first field == the
-// second field.
-TEST(Eq2Test, MatchesEqualArguments) {
-  Matcher<const Tuple2&> m = Eq();
-  EXPECT_TRUE(m.Matches(Tuple2(5L, 5)));
-  EXPECT_FALSE(m.Matches(Tuple2(5L, 6)));
-}
-
-// Tests that Eq() describes itself properly.
-TEST(Eq2Test, CanDescribeSelf) {
-  Matcher<const Tuple2&> m = Eq();
-  EXPECT_EQ("are an equal pair", Describe(m));
-}
-
-// Tests that Ge() matches a 2-tuple where the first field >= the
-// second field.
-TEST(Ge2Test, MatchesGreaterThanOrEqualArguments) {
-  Matcher<const Tuple2&> m = Ge();
-  EXPECT_TRUE(m.Matches(Tuple2(5L, 4)));
-  EXPECT_TRUE(m.Matches(Tuple2(5L, 5)));
-  EXPECT_FALSE(m.Matches(Tuple2(5L, 6)));
-}
-
-// Tests that Ge() describes itself properly.
-TEST(Ge2Test, CanDescribeSelf) {
-  Matcher<const Tuple2&> m = Ge();
-  EXPECT_EQ("are a pair where the first >= the second", Describe(m));
-}
-
-// Tests that Gt() matches a 2-tuple where the first field > the
-// second field.
-TEST(Gt2Test, MatchesGreaterThanArguments) {
-  Matcher<const Tuple2&> m = Gt();
-  EXPECT_TRUE(m.Matches(Tuple2(5L, 4)));
-  EXPECT_FALSE(m.Matches(Tuple2(5L, 5)));
-  EXPECT_FALSE(m.Matches(Tuple2(5L, 6)));
-}
-
-// Tests that Gt() describes itself properly.
-TEST(Gt2Test, CanDescribeSelf) {
-  Matcher<const Tuple2&> m = Gt();
-  EXPECT_EQ("are a pair where the first > the second", Describe(m));
-}
-
-// Tests that Le() matches a 2-tuple where the first field <= the
-// second field.
-TEST(Le2Test, MatchesLessThanOrEqualArguments) {
-  Matcher<const Tuple2&> m = Le();
-  EXPECT_TRUE(m.Matches(Tuple2(5L, 6)));
-  EXPECT_TRUE(m.Matches(Tuple2(5L, 5)));
-  EXPECT_FALSE(m.Matches(Tuple2(5L, 4)));
-}
-
-// Tests that Le() describes itself properly.
-TEST(Le2Test, CanDescribeSelf) {
-  Matcher<const Tuple2&> m = Le();
-  EXPECT_EQ("are a pair where the first <= the second", Describe(m));
-}
-
-// Tests that Lt() matches a 2-tuple where the first field < the
-// second field.
-TEST(Lt2Test, MatchesLessThanArguments) {
-  Matcher<const Tuple2&> m = Lt();
-  EXPECT_TRUE(m.Matches(Tuple2(5L, 6)));
-  EXPECT_FALSE(m.Matches(Tuple2(5L, 5)));
-  EXPECT_FALSE(m.Matches(Tuple2(5L, 4)));
-}
-
-// Tests that Lt() describes itself properly.
-TEST(Lt2Test, CanDescribeSelf) {
-  Matcher<const Tuple2&> m = Lt();
-  EXPECT_EQ("are a pair where the first < the second", Describe(m));
-}
-
-// Tests that Ne() matches a 2-tuple where the first field != the
-// second field.
-TEST(Ne2Test, MatchesUnequalArguments) {
-  Matcher<const Tuple2&> m = Ne();
-  EXPECT_TRUE(m.Matches(Tuple2(5L, 6)));
-  EXPECT_TRUE(m.Matches(Tuple2(5L, 4)));
-  EXPECT_FALSE(m.Matches(Tuple2(5L, 5)));
-}
-
-// Tests that Ne() describes itself properly.
-TEST(Ne2Test, CanDescribeSelf) {
-  Matcher<const Tuple2&> m = Ne();
-  EXPECT_EQ("are an unequal pair", Describe(m));
-}
-
-TEST(PairMatchBaseTest, WorksWithMoveOnly) {
-  using Pointers = std::tuple<std::unique_ptr<int>, std::unique_ptr<int>>;
-  Matcher<Pointers> matcher = Eq();
-  Pointers pointers;
-  // Tested values don't matter; the point is that matcher does not copy the
-  // matched values.
-  EXPECT_TRUE(matcher.Matches(pointers));
-}
-
-// Tests that IsNan() matches a NaN, with float.
-TEST(IsNan, FloatMatchesNan) {
-  float quiet_nan = std::numeric_limits<float>::quiet_NaN();
-  float other_nan = std::nanf("1");
-  float real_value = 1.0f;
-
-  Matcher<float> m = IsNan();
-  EXPECT_TRUE(m.Matches(quiet_nan));
-  EXPECT_TRUE(m.Matches(other_nan));
-  EXPECT_FALSE(m.Matches(real_value));
-
-  Matcher<float&> m_ref = IsNan();
-  EXPECT_TRUE(m_ref.Matches(quiet_nan));
-  EXPECT_TRUE(m_ref.Matches(other_nan));
-  EXPECT_FALSE(m_ref.Matches(real_value));
-
-  Matcher<const float&> m_cref = IsNan();
-  EXPECT_TRUE(m_cref.Matches(quiet_nan));
-  EXPECT_TRUE(m_cref.Matches(other_nan));
-  EXPECT_FALSE(m_cref.Matches(real_value));
-}
-
-// Tests that IsNan() matches a NaN, with double.
-TEST(IsNan, DoubleMatchesNan) {
-  double quiet_nan = std::numeric_limits<double>::quiet_NaN();
-  double other_nan = std::nan("1");
-  double real_value = 1.0;
-
-  Matcher<double> m = IsNan();
-  EXPECT_TRUE(m.Matches(quiet_nan));
-  EXPECT_TRUE(m.Matches(other_nan));
-  EXPECT_FALSE(m.Matches(real_value));
-
-  Matcher<double&> m_ref = IsNan();
-  EXPECT_TRUE(m_ref.Matches(quiet_nan));
-  EXPECT_TRUE(m_ref.Matches(other_nan));
-  EXPECT_FALSE(m_ref.Matches(real_value));
-
-  Matcher<const double&> m_cref = IsNan();
-  EXPECT_TRUE(m_cref.Matches(quiet_nan));
-  EXPECT_TRUE(m_cref.Matches(other_nan));
-  EXPECT_FALSE(m_cref.Matches(real_value));
-}
-
-// Tests that IsNan() matches a NaN, with long double.
-TEST(IsNan, LongDoubleMatchesNan) {
-  long double quiet_nan = std::numeric_limits<long double>::quiet_NaN();
-  long double other_nan = std::nan("1");
-  long double real_value = 1.0;
-
-  Matcher<long double> m = IsNan();
-  EXPECT_TRUE(m.Matches(quiet_nan));
-  EXPECT_TRUE(m.Matches(other_nan));
-  EXPECT_FALSE(m.Matches(real_value));
-
-  Matcher<long double&> m_ref = IsNan();
-  EXPECT_TRUE(m_ref.Matches(quiet_nan));
-  EXPECT_TRUE(m_ref.Matches(other_nan));
-  EXPECT_FALSE(m_ref.Matches(real_value));
-
-  Matcher<const long double&> m_cref = IsNan();
-  EXPECT_TRUE(m_cref.Matches(quiet_nan));
-  EXPECT_TRUE(m_cref.Matches(other_nan));
-  EXPECT_FALSE(m_cref.Matches(real_value));
-}
-
-// Tests that IsNan() works with Not.
-TEST(IsNan, NotMatchesNan) {
-  Matcher<float> mf = Not(IsNan());
-  EXPECT_FALSE(mf.Matches(std::numeric_limits<float>::quiet_NaN()));
-  EXPECT_FALSE(mf.Matches(std::nanf("1")));
-  EXPECT_TRUE(mf.Matches(1.0));
-
-  Matcher<double> md = Not(IsNan());
-  EXPECT_FALSE(md.Matches(std::numeric_limits<double>::quiet_NaN()));
-  EXPECT_FALSE(md.Matches(std::nan("1")));
-  EXPECT_TRUE(md.Matches(1.0));
-
-  Matcher<long double> mld = Not(IsNan());
-  EXPECT_FALSE(mld.Matches(std::numeric_limits<long double>::quiet_NaN()));
-  EXPECT_FALSE(mld.Matches(std::nanl("1")));
-  EXPECT_TRUE(mld.Matches(1.0));
-}
-
-// Tests that IsNan() can describe itself.
-TEST(IsNan, CanDescribeSelf) {
-  Matcher<float> mf = IsNan();
-  EXPECT_EQ("is NaN", Describe(mf));
-
-  Matcher<double> md = IsNan();
-  EXPECT_EQ("is NaN", Describe(md));
-
-  Matcher<long double> mld = IsNan();
-  EXPECT_EQ("is NaN", Describe(mld));
-}
-
-// Tests that IsNan() can describe itself with Not.
-TEST(IsNan, CanDescribeSelfWithNot) {
-  Matcher<float> mf = Not(IsNan());
-  EXPECT_EQ("isn't NaN", Describe(mf));
-
-  Matcher<double> md = Not(IsNan());
-  EXPECT_EQ("isn't NaN", Describe(md));
-
-  Matcher<long double> mld = Not(IsNan());
-  EXPECT_EQ("isn't NaN", Describe(mld));
-}
-
-// Tests that FloatEq() matches a 2-tuple where
-// FloatEq(first field) matches the second field.
-TEST(FloatEq2Test, MatchesEqualArguments) {
-  typedef ::std::tuple<float, float> Tpl;
-  Matcher<const Tpl&> m = FloatEq();
-  EXPECT_TRUE(m.Matches(Tpl(1.0f, 1.0f)));
-  EXPECT_TRUE(m.Matches(Tpl(0.3f, 0.1f + 0.1f + 0.1f)));
-  EXPECT_FALSE(m.Matches(Tpl(1.1f, 1.0f)));
-}
-
-// Tests that FloatEq() describes itself properly.
-TEST(FloatEq2Test, CanDescribeSelf) {
-  Matcher<const ::std::tuple<float, float>&> m = FloatEq();
-  EXPECT_EQ("are an almost-equal pair", Describe(m));
-}
-
-// Tests that NanSensitiveFloatEq() matches a 2-tuple where
-// NanSensitiveFloatEq(first field) matches the second field.
-TEST(NanSensitiveFloatEqTest, MatchesEqualArgumentsWithNaN) {
-  typedef ::std::tuple<float, float> Tpl;
-  Matcher<const Tpl&> m = NanSensitiveFloatEq();
-  EXPECT_TRUE(m.Matches(Tpl(1.0f, 1.0f)));
-  EXPECT_TRUE(m.Matches(Tpl(std::numeric_limits<float>::quiet_NaN(),
-                            std::numeric_limits<float>::quiet_NaN())));
-  EXPECT_FALSE(m.Matches(Tpl(1.1f, 1.0f)));
-  EXPECT_FALSE(m.Matches(Tpl(1.0f, std::numeric_limits<float>::quiet_NaN())));
-  EXPECT_FALSE(m.Matches(Tpl(std::numeric_limits<float>::quiet_NaN(), 1.0f)));
-}
-
-// Tests that NanSensitiveFloatEq() describes itself properly.
-TEST(NanSensitiveFloatEqTest, CanDescribeSelfWithNaNs) {
-  Matcher<const ::std::tuple<float, float>&> m = NanSensitiveFloatEq();
-  EXPECT_EQ("are an almost-equal pair", Describe(m));
-}
-
-// Tests that DoubleEq() matches a 2-tuple where
-// DoubleEq(first field) matches the second field.
-TEST(DoubleEq2Test, MatchesEqualArguments) {
-  typedef ::std::tuple<double, double> Tpl;
-  Matcher<const Tpl&> m = DoubleEq();
-  EXPECT_TRUE(m.Matches(Tpl(1.0, 1.0)));
-  EXPECT_TRUE(m.Matches(Tpl(0.3, 0.1 + 0.1 + 0.1)));
-  EXPECT_FALSE(m.Matches(Tpl(1.1, 1.0)));
-}
-
-// Tests that DoubleEq() describes itself properly.
-TEST(DoubleEq2Test, CanDescribeSelf) {
-  Matcher<const ::std::tuple<double, double>&> m = DoubleEq();
-  EXPECT_EQ("are an almost-equal pair", Describe(m));
-}
-
-// Tests that NanSensitiveDoubleEq() matches a 2-tuple where
-// NanSensitiveDoubleEq(first field) matches the second field.
-TEST(NanSensitiveDoubleEqTest, MatchesEqualArgumentsWithNaN) {
-  typedef ::std::tuple<double, double> Tpl;
-  Matcher<const Tpl&> m = NanSensitiveDoubleEq();
-  EXPECT_TRUE(m.Matches(Tpl(1.0f, 1.0f)));
-  EXPECT_TRUE(m.Matches(Tpl(std::numeric_limits<double>::quiet_NaN(),
-                            std::numeric_limits<double>::quiet_NaN())));
-  EXPECT_FALSE(m.Matches(Tpl(1.1f, 1.0f)));
-  EXPECT_FALSE(m.Matches(Tpl(1.0f, std::numeric_limits<double>::quiet_NaN())));
-  EXPECT_FALSE(m.Matches(Tpl(std::numeric_limits<double>::quiet_NaN(), 1.0f)));
-}
-
-// Tests that DoubleEq() describes itself properly.
-TEST(NanSensitiveDoubleEqTest, CanDescribeSelfWithNaNs) {
-  Matcher<const ::std::tuple<double, double>&> m = NanSensitiveDoubleEq();
-  EXPECT_EQ("are an almost-equal pair", Describe(m));
-}
-
-// Tests that FloatEq() matches a 2-tuple where
-// FloatNear(first field, max_abs_error) matches the second field.
-TEST(FloatNear2Test, MatchesEqualArguments) {
-  typedef ::std::tuple<float, float> Tpl;
-  Matcher<const Tpl&> m = FloatNear(0.5f);
-  EXPECT_TRUE(m.Matches(Tpl(1.0f, 1.0f)));
-  EXPECT_TRUE(m.Matches(Tpl(1.3f, 1.0f)));
-  EXPECT_FALSE(m.Matches(Tpl(1.8f, 1.0f)));
-}
-
-// Tests that FloatNear() describes itself properly.
-TEST(FloatNear2Test, CanDescribeSelf) {
-  Matcher<const ::std::tuple<float, float>&> m = FloatNear(0.5f);
-  EXPECT_EQ("are an almost-equal pair", Describe(m));
-}
-
-// Tests that NanSensitiveFloatNear() matches a 2-tuple where
-// NanSensitiveFloatNear(first field) matches the second field.
-TEST(NanSensitiveFloatNearTest, MatchesNearbyArgumentsWithNaN) {
-  typedef ::std::tuple<float, float> Tpl;
-  Matcher<const Tpl&> m = NanSensitiveFloatNear(0.5f);
-  EXPECT_TRUE(m.Matches(Tpl(1.0f, 1.0f)));
-  EXPECT_TRUE(m.Matches(Tpl(1.1f, 1.0f)));
-  EXPECT_TRUE(m.Matches(Tpl(std::numeric_limits<float>::quiet_NaN(),
-                            std::numeric_limits<float>::quiet_NaN())));
-  EXPECT_FALSE(m.Matches(Tpl(1.6f, 1.0f)));
-  EXPECT_FALSE(m.Matches(Tpl(1.0f, std::numeric_limits<float>::quiet_NaN())));
-  EXPECT_FALSE(m.Matches(Tpl(std::numeric_limits<float>::quiet_NaN(), 1.0f)));
-}
-
-// Tests that NanSensitiveFloatNear() describes itself properly.
-TEST(NanSensitiveFloatNearTest, CanDescribeSelfWithNaNs) {
-  Matcher<const ::std::tuple<float, float>&> m = NanSensitiveFloatNear(0.5f);
-  EXPECT_EQ("are an almost-equal pair", Describe(m));
-}
-
-// Tests that FloatEq() matches a 2-tuple where
-// DoubleNear(first field, max_abs_error) matches the second field.
-TEST(DoubleNear2Test, MatchesEqualArguments) {
-  typedef ::std::tuple<double, double> Tpl;
-  Matcher<const Tpl&> m = DoubleNear(0.5);
-  EXPECT_TRUE(m.Matches(Tpl(1.0, 1.0)));
-  EXPECT_TRUE(m.Matches(Tpl(1.3, 1.0)));
-  EXPECT_FALSE(m.Matches(Tpl(1.8, 1.0)));
-}
-
-// Tests that DoubleNear() describes itself properly.
-TEST(DoubleNear2Test, CanDescribeSelf) {
-  Matcher<const ::std::tuple<double, double>&> m = DoubleNear(0.5);
-  EXPECT_EQ("are an almost-equal pair", Describe(m));
-}
-
-// Tests that NanSensitiveDoubleNear() matches a 2-tuple where
-// NanSensitiveDoubleNear(first field) matches the second field.
-TEST(NanSensitiveDoubleNearTest, MatchesNearbyArgumentsWithNaN) {
-  typedef ::std::tuple<double, double> Tpl;
-  Matcher<const Tpl&> m = NanSensitiveDoubleNear(0.5f);
-  EXPECT_TRUE(m.Matches(Tpl(1.0f, 1.0f)));
-  EXPECT_TRUE(m.Matches(Tpl(1.1f, 1.0f)));
-  EXPECT_TRUE(m.Matches(Tpl(std::numeric_limits<double>::quiet_NaN(),
-                            std::numeric_limits<double>::quiet_NaN())));
-  EXPECT_FALSE(m.Matches(Tpl(1.6f, 1.0f)));
-  EXPECT_FALSE(m.Matches(Tpl(1.0f, std::numeric_limits<double>::quiet_NaN())));
-  EXPECT_FALSE(m.Matches(Tpl(std::numeric_limits<double>::quiet_NaN(), 1.0f)));
-}
-
-// Tests that NanSensitiveDoubleNear() describes itself properly.
-TEST(NanSensitiveDoubleNearTest, CanDescribeSelfWithNaNs) {
-  Matcher<const ::std::tuple<double, double>&> m = NanSensitiveDoubleNear(0.5f);
-  EXPECT_EQ("are an almost-equal pair", Describe(m));
-}
-
-// Tests that Not(m) matches any value that doesn't match m.
-TEST(NotTest, NegatesMatcher) {
-  Matcher<int> m;
-  m = Not(Eq(2));
-  EXPECT_TRUE(m.Matches(3));
-  EXPECT_FALSE(m.Matches(2));
-}
-
-// Tests that Not(m) describes itself properly.
-TEST(NotTest, CanDescribeSelf) {
-  Matcher<int> m = Not(Eq(5));
-  EXPECT_EQ("isn't equal to 5", Describe(m));
-}
-
-// Tests that monomorphic matchers are safely cast by the Not matcher.
-TEST(NotTest, NotMatcherSafelyCastsMonomorphicMatchers) {
-  // greater_than_5 is a monomorphic matcher.
-  Matcher<int> greater_than_5 = Gt(5);
-
-  Matcher<const int&> m = Not(greater_than_5);
-  Matcher<int&> m2 = Not(greater_than_5);
-  Matcher<int&> m3 = Not(m);
-}
-
-// Helper to allow easy testing of AllOf matchers with num parameters.
-void AllOfMatches(int num, const Matcher<int>& m) {
-  SCOPED_TRACE(Describe(m));
-  EXPECT_TRUE(m.Matches(0));
-  for (int i = 1; i <= num; ++i) {
-    EXPECT_FALSE(m.Matches(i));
-  }
-  EXPECT_TRUE(m.Matches(num + 1));
-}
-
-INSTANTIATE_GTEST_MATCHER_TEST_P(AllOfTest);
-
-// Tests that AllOf(m1, ..., mn) matches any value that matches all of
-// the given matchers.
-TEST(AllOfTest, MatchesWhenAllMatch) {
-  Matcher<int> m;
-  m = AllOf(Le(2), Ge(1));
-  EXPECT_TRUE(m.Matches(1));
-  EXPECT_TRUE(m.Matches(2));
-  EXPECT_FALSE(m.Matches(0));
-  EXPECT_FALSE(m.Matches(3));
-
-  m = AllOf(Gt(0), Ne(1), Ne(2));
-  EXPECT_TRUE(m.Matches(3));
-  EXPECT_FALSE(m.Matches(2));
-  EXPECT_FALSE(m.Matches(1));
-  EXPECT_FALSE(m.Matches(0));
-
-  m = AllOf(Gt(0), Ne(1), Ne(2), Ne(3));
-  EXPECT_TRUE(m.Matches(4));
-  EXPECT_FALSE(m.Matches(3));
-  EXPECT_FALSE(m.Matches(2));
-  EXPECT_FALSE(m.Matches(1));
-  EXPECT_FALSE(m.Matches(0));
-
-  m = AllOf(Ge(0), Lt(10), Ne(3), Ne(5), Ne(7));
-  EXPECT_TRUE(m.Matches(0));
-  EXPECT_TRUE(m.Matches(1));
-  EXPECT_FALSE(m.Matches(3));
-
-  // The following tests for varying number of sub-matchers. Due to the way
-  // the sub-matchers are handled it is enough to test every sub-matcher once
-  // with sub-matchers using the same matcher type. Varying matcher types are
-  // checked for above.
-  AllOfMatches(2, AllOf(Ne(1), Ne(2)));
-  AllOfMatches(3, AllOf(Ne(1), Ne(2), Ne(3)));
-  AllOfMatches(4, AllOf(Ne(1), Ne(2), Ne(3), Ne(4)));
-  AllOfMatches(5, AllOf(Ne(1), Ne(2), Ne(3), Ne(4), Ne(5)));
-  AllOfMatches(6, AllOf(Ne(1), Ne(2), Ne(3), Ne(4), Ne(5), Ne(6)));
-  AllOfMatches(7, AllOf(Ne(1), Ne(2), Ne(3), Ne(4), Ne(5), Ne(6), Ne(7)));
-  AllOfMatches(8,
-               AllOf(Ne(1), Ne(2), Ne(3), Ne(4), Ne(5), Ne(6), Ne(7), Ne(8)));
-  AllOfMatches(
-      9, AllOf(Ne(1), Ne(2), Ne(3), Ne(4), Ne(5), Ne(6), Ne(7), Ne(8), Ne(9)));
-  AllOfMatches(10, AllOf(Ne(1), Ne(2), Ne(3), Ne(4), Ne(5), Ne(6), Ne(7), Ne(8),
-                         Ne(9), Ne(10)));
-  AllOfMatches(
-      50, AllOf(Ne(1), Ne(2), Ne(3), Ne(4), Ne(5), Ne(6), Ne(7), Ne(8), Ne(9),
-                Ne(10), Ne(11), Ne(12), Ne(13), Ne(14), Ne(15), Ne(16), Ne(17),
-                Ne(18), Ne(19), Ne(20), Ne(21), Ne(22), Ne(23), Ne(24), Ne(25),
-                Ne(26), Ne(27), Ne(28), Ne(29), Ne(30), Ne(31), Ne(32), Ne(33),
-                Ne(34), Ne(35), Ne(36), Ne(37), Ne(38), Ne(39), Ne(40), Ne(41),
-                Ne(42), Ne(43), Ne(44), Ne(45), Ne(46), Ne(47), Ne(48), Ne(49),
-                Ne(50)));
-}
-
-// Tests that AllOf(m1, ..., mn) describes itself properly.
-TEST(AllOfTest, CanDescribeSelf) {
-  Matcher<int> m;
-  m = AllOf(Le(2), Ge(1));
-  EXPECT_EQ("(is <= 2) and (is >= 1)", Describe(m));
-
-  m = AllOf(Gt(0), Ne(1), Ne(2));
-  std::string expected_descr1 =
-      "(is > 0) and (isn't equal to 1) and (isn't equal to 2)";
-  EXPECT_EQ(expected_descr1, Describe(m));
-
-  m = AllOf(Gt(0), Ne(1), Ne(2), Ne(3));
-  std::string expected_descr2 =
-      "(is > 0) and (isn't equal to 1) and (isn't equal to 2) and (isn't equal "
-      "to 3)";
-  EXPECT_EQ(expected_descr2, Describe(m));
-
-  m = AllOf(Ge(0), Lt(10), Ne(3), Ne(5), Ne(7));
-  std::string expected_descr3 =
-      "(is >= 0) and (is < 10) and (isn't equal to 3) and (isn't equal to 5) "
-      "and (isn't equal to 7)";
-  EXPECT_EQ(expected_descr3, Describe(m));
-}
-
-// Tests that AllOf(m1, ..., mn) describes its negation properly.
-TEST(AllOfTest, CanDescribeNegation) {
-  Matcher<int> m;
-  m = AllOf(Le(2), Ge(1));
-  std::string expected_descr4 = "(isn't <= 2) or (isn't >= 1)";
-  EXPECT_EQ(expected_descr4, DescribeNegation(m));
-
-  m = AllOf(Gt(0), Ne(1), Ne(2));
-  std::string expected_descr5 =
-      "(isn't > 0) or (is equal to 1) or (is equal to 2)";
-  EXPECT_EQ(expected_descr5, DescribeNegation(m));
-
-  m = AllOf(Gt(0), Ne(1), Ne(2), Ne(3));
-  std::string expected_descr6 =
-      "(isn't > 0) or (is equal to 1) or (is equal to 2) or (is equal to 3)";
-  EXPECT_EQ(expected_descr6, DescribeNegation(m));
-
-  m = AllOf(Ge(0), Lt(10), Ne(3), Ne(5), Ne(7));
-  std::string expected_desr7 =
-      "(isn't >= 0) or (isn't < 10) or (is equal to 3) or (is equal to 5) or "
-      "(is equal to 7)";
-  EXPECT_EQ(expected_desr7, DescribeNegation(m));
-
-  m = AllOf(Ne(1), Ne(2), Ne(3), Ne(4), Ne(5), Ne(6), Ne(7), Ne(8), Ne(9),
-            Ne(10), Ne(11));
-  AllOf(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
-  EXPECT_THAT(Describe(m), EndsWith("and (isn't equal to 11)"));
-  AllOfMatches(11, m);
-}
-
-// Tests that monomorphic matchers are safely cast by the AllOf matcher.
-TEST(AllOfTest, AllOfMatcherSafelyCastsMonomorphicMatchers) {
-  // greater_than_5 and less_than_10 are monomorphic matchers.
-  Matcher<int> greater_than_5 = Gt(5);
-  Matcher<int> less_than_10 = Lt(10);
-
-  Matcher<const int&> m = AllOf(greater_than_5, less_than_10);
-  Matcher<int&> m2 = AllOf(greater_than_5, less_than_10);
-  Matcher<int&> m3 = AllOf(greater_than_5, m2);
-
-  // Tests that BothOf works when composing itself.
-  Matcher<const int&> m4 = AllOf(greater_than_5, less_than_10, less_than_10);
-  Matcher<int&> m5 = AllOf(greater_than_5, less_than_10, less_than_10);
-}
-
-TEST_P(AllOfTestP, ExplainsResult) {
-  Matcher<int> m;
-
-  // Successful match.  Both matchers need to explain.  The second
-  // matcher doesn't give an explanation, so only the first matcher's
-  // explanation is printed.
-  m = AllOf(GreaterThan(10), Lt(30));
-  EXPECT_EQ("which is 15 more than 10", Explain(m, 25));
-
-  // Successful match.  Both matchers need to explain.
-  m = AllOf(GreaterThan(10), GreaterThan(20));
-  EXPECT_EQ("which is 20 more than 10, and which is 10 more than 20",
-            Explain(m, 30));
-
-  // Successful match.  All matchers need to explain.  The second
-  // matcher doesn't given an explanation.
-  m = AllOf(GreaterThan(10), Lt(30), GreaterThan(20));
-  EXPECT_EQ("which is 15 more than 10, and which is 5 more than 20",
-            Explain(m, 25));
-
-  // Successful match.  All matchers need to explain.
-  m = AllOf(GreaterThan(10), GreaterThan(20), GreaterThan(30));
-  EXPECT_EQ(
-      "which is 30 more than 10, and which is 20 more than 20, "
-      "and which is 10 more than 30",
-      Explain(m, 40));
-
-  // Failed match.  The first matcher, which failed, needs to
-  // explain.
-  m = AllOf(GreaterThan(10), GreaterThan(20));
-  EXPECT_EQ("which is 5 less than 10", Explain(m, 5));
-
-  // Failed match.  The second matcher, which failed, needs to
-  // explain.  Since it doesn't given an explanation, nothing is
-  // printed.
-  m = AllOf(GreaterThan(10), Lt(30));
-  EXPECT_EQ("", Explain(m, 40));
-
-  // Failed match.  The second matcher, which failed, needs to
-  // explain.
-  m = AllOf(GreaterThan(10), GreaterThan(20));
-  EXPECT_EQ("which is 5 less than 20", Explain(m, 15));
-}
-
-// Helper to allow easy testing of AnyOf matchers with num parameters.
-static void AnyOfMatches(int num, const Matcher<int>& m) {
-  SCOPED_TRACE(Describe(m));
-  EXPECT_FALSE(m.Matches(0));
-  for (int i = 1; i <= num; ++i) {
-    EXPECT_TRUE(m.Matches(i));
-  }
-  EXPECT_FALSE(m.Matches(num + 1));
-}
-
-static void AnyOfStringMatches(int num, const Matcher<std::string>& m) {
-  SCOPED_TRACE(Describe(m));
-  EXPECT_FALSE(m.Matches(std::to_string(0)));
-
-  for (int i = 1; i <= num; ++i) {
-    EXPECT_TRUE(m.Matches(std::to_string(i)));
-  }
-  EXPECT_FALSE(m.Matches(std::to_string(num + 1)));
-}
-
-INSTANTIATE_GTEST_MATCHER_TEST_P(AnyOfTest);
-
-// Tests that AnyOf(m1, ..., mn) matches any value that matches at
-// least one of the given matchers.
-TEST(AnyOfTest, MatchesWhenAnyMatches) {
-  Matcher<int> m;
-  m = AnyOf(Le(1), Ge(3));
-  EXPECT_TRUE(m.Matches(1));
-  EXPECT_TRUE(m.Matches(4));
-  EXPECT_FALSE(m.Matches(2));
-
-  m = AnyOf(Lt(0), Eq(1), Eq(2));
-  EXPECT_TRUE(m.Matches(-1));
-  EXPECT_TRUE(m.Matches(1));
-  EXPECT_TRUE(m.Matches(2));
-  EXPECT_FALSE(m.Matches(0));
-
-  m = AnyOf(Lt(0), Eq(1), Eq(2), Eq(3));
-  EXPECT_TRUE(m.Matches(-1));
-  EXPECT_TRUE(m.Matches(1));
-  EXPECT_TRUE(m.Matches(2));
-  EXPECT_TRUE(m.Matches(3));
-  EXPECT_FALSE(m.Matches(0));
-
-  m = AnyOf(Le(0), Gt(10), 3, 5, 7);
-  EXPECT_TRUE(m.Matches(0));
-  EXPECT_TRUE(m.Matches(11));
-  EXPECT_TRUE(m.Matches(3));
-  EXPECT_FALSE(m.Matches(2));
-
-  // The following tests for varying number of sub-matchers. Due to the way
-  // the sub-matchers are handled it is enough to test every sub-matcher once
-  // with sub-matchers using the same matcher type. Varying matcher types are
-  // checked for above.
-  AnyOfMatches(2, AnyOf(1, 2));
-  AnyOfMatches(3, AnyOf(1, 2, 3));
-  AnyOfMatches(4, AnyOf(1, 2, 3, 4));
-  AnyOfMatches(5, AnyOf(1, 2, 3, 4, 5));
-  AnyOfMatches(6, AnyOf(1, 2, 3, 4, 5, 6));
-  AnyOfMatches(7, AnyOf(1, 2, 3, 4, 5, 6, 7));
-  AnyOfMatches(8, AnyOf(1, 2, 3, 4, 5, 6, 7, 8));
-  AnyOfMatches(9, AnyOf(1, 2, 3, 4, 5, 6, 7, 8, 9));
-  AnyOfMatches(10, AnyOf(1, 2, 3, 4, 5, 6, 7, 8, 9, 10));
-}
-
-// Tests the variadic version of the AnyOfMatcher.
-TEST(AnyOfTest, VariadicMatchesWhenAnyMatches) {
-  // Also make sure AnyOf is defined in the right namespace and does not depend
-  // on ADL.
-  Matcher<int> m = ::testing::AnyOf(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
-
-  EXPECT_THAT(Describe(m), EndsWith("or (is equal to 11)"));
-  AnyOfMatches(11, m);
-  AnyOfMatches(50, AnyOf(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
-                         17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
-                         31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44,
-                         45, 46, 47, 48, 49, 50));
-  AnyOfStringMatches(
-      50, AnyOf("1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12",
-                "13", "14", "15", "16", "17", "18", "19", "20", "21", "22",
-                "23", "24", "25", "26", "27", "28", "29", "30", "31", "32",
-                "33", "34", "35", "36", "37", "38", "39", "40", "41", "42",
-                "43", "44", "45", "46", "47", "48", "49", "50"));
-}
-
-TEST(ConditionalTest, MatchesFirstIfCondition) {
-  Matcher<std::string> eq_red = Eq("red");
-  Matcher<std::string> ne_red = Ne("red");
-  Matcher<std::string> m = Conditional(true, eq_red, ne_red);
-  EXPECT_TRUE(m.Matches("red"));
-  EXPECT_FALSE(m.Matches("green"));
-
-  StringMatchResultListener listener;
-  StringMatchResultListener expected;
-  EXPECT_FALSE(m.MatchAndExplain("green", &listener));
-  EXPECT_FALSE(eq_red.MatchAndExplain("green", &expected));
-  EXPECT_THAT(listener.str(), Eq(expected.str()));
-}
-
-TEST(ConditionalTest, MatchesSecondIfCondition) {
-  Matcher<std::string> eq_red = Eq("red");
-  Matcher<std::string> ne_red = Ne("red");
-  Matcher<std::string> m = Conditional(false, eq_red, ne_red);
-  EXPECT_FALSE(m.Matches("red"));
-  EXPECT_TRUE(m.Matches("green"));
-
-  StringMatchResultListener listener;
-  StringMatchResultListener expected;
-  EXPECT_FALSE(m.MatchAndExplain("red", &listener));
-  EXPECT_FALSE(ne_red.MatchAndExplain("red", &expected));
-  EXPECT_THAT(listener.str(), Eq(expected.str()));
-}
-
-// Tests that AnyOf(m1, ..., mn) describes itself properly.
-TEST(AnyOfTest, CanDescribeSelf) {
-  Matcher<int> m;
-  m = AnyOf(Le(1), Ge(3));
-
-  EXPECT_EQ("(is <= 1) or (is >= 3)", Describe(m));
-
-  m = AnyOf(Lt(0), Eq(1), Eq(2));
-  EXPECT_EQ("(is < 0) or (is equal to 1) or (is equal to 2)", Describe(m));
-
-  m = AnyOf(Lt(0), Eq(1), Eq(2), Eq(3));
-  EXPECT_EQ("(is < 0) or (is equal to 1) or (is equal to 2) or (is equal to 3)",
-            Describe(m));
-
-  m = AnyOf(Le(0), Gt(10), 3, 5, 7);
-  EXPECT_EQ(
-      "(is <= 0) or (is > 10) or (is equal to 3) or (is equal to 5) or (is "
-      "equal to 7)",
-      Describe(m));
-}
-
-// Tests that AnyOf(m1, ..., mn) describes its negation properly.
-TEST(AnyOfTest, CanDescribeNegation) {
-  Matcher<int> m;
-  m = AnyOf(Le(1), Ge(3));
-  EXPECT_EQ("(isn't <= 1) and (isn't >= 3)", DescribeNegation(m));
-
-  m = AnyOf(Lt(0), Eq(1), Eq(2));
-  EXPECT_EQ("(isn't < 0) and (isn't equal to 1) and (isn't equal to 2)",
-            DescribeNegation(m));
-
-  m = AnyOf(Lt(0), Eq(1), Eq(2), Eq(3));
-  EXPECT_EQ(
-      "(isn't < 0) and (isn't equal to 1) and (isn't equal to 2) and (isn't "
-      "equal to 3)",
-      DescribeNegation(m));
-
-  m = AnyOf(Le(0), Gt(10), 3, 5, 7);
-  EXPECT_EQ(
-      "(isn't <= 0) and (isn't > 10) and (isn't equal to 3) and (isn't equal "
-      "to 5) and (isn't equal to 7)",
-      DescribeNegation(m));
-}
-
-// Tests that monomorphic matchers are safely cast by the AnyOf matcher.
-TEST(AnyOfTest, AnyOfMatcherSafelyCastsMonomorphicMatchers) {
-  // greater_than_5 and less_than_10 are monomorphic matchers.
-  Matcher<int> greater_than_5 = Gt(5);
-  Matcher<int> less_than_10 = Lt(10);
-
-  Matcher<const int&> m = AnyOf(greater_than_5, less_than_10);
-  Matcher<int&> m2 = AnyOf(greater_than_5, less_than_10);
-  Matcher<int&> m3 = AnyOf(greater_than_5, m2);
-
-  // Tests that EitherOf works when composing itself.
-  Matcher<const int&> m4 = AnyOf(greater_than_5, less_than_10, less_than_10);
-  Matcher<int&> m5 = AnyOf(greater_than_5, less_than_10, less_than_10);
-}
-
-TEST_P(AnyOfTestP, ExplainsResult) {
-  Matcher<int> m;
-
-  // Failed match.  Both matchers need to explain.  The second
-  // matcher doesn't give an explanation, so only the first matcher's
-  // explanation is printed.
-  m = AnyOf(GreaterThan(10), Lt(0));
-  EXPECT_EQ("which is 5 less than 10", Explain(m, 5));
-
-  // Failed match.  Both matchers need to explain.
-  m = AnyOf(GreaterThan(10), GreaterThan(20));
-  EXPECT_EQ("which is 5 less than 10, and which is 15 less than 20",
-            Explain(m, 5));
-
-  // Failed match.  All matchers need to explain.  The second
-  // matcher doesn't given an explanation.
-  m = AnyOf(GreaterThan(10), Gt(20), GreaterThan(30));
-  EXPECT_EQ("which is 5 less than 10, and which is 25 less than 30",
-            Explain(m, 5));
-
-  // Failed match.  All matchers need to explain.
-  m = AnyOf(GreaterThan(10), GreaterThan(20), GreaterThan(30));
-  EXPECT_EQ(
-      "which is 5 less than 10, and which is 15 less than 20, "
-      "and which is 25 less than 30",
-      Explain(m, 5));
-
-  // Successful match.  The first matcher, which succeeded, needs to
-  // explain.
-  m = AnyOf(GreaterThan(10), GreaterThan(20));
-  EXPECT_EQ("which is 5 more than 10", Explain(m, 15));
-
-  // Successful match.  The second matcher, which succeeded, needs to
-  // explain.  Since it doesn't given an explanation, nothing is
-  // printed.
-  m = AnyOf(GreaterThan(10), Lt(30));
-  EXPECT_EQ("", Explain(m, 0));
-
-  // Successful match.  The second matcher, which succeeded, needs to
-  // explain.
-  m = AnyOf(GreaterThan(30), GreaterThan(20));
-  EXPECT_EQ("which is 5 more than 20", Explain(m, 25));
-}
-
-// The following predicate function and predicate functor are for
-// testing the Truly(predicate) matcher.
-
-// Returns non-zero if the input is positive.  Note that the return
-// type of this function is not bool.  It's OK as Truly() accepts any
-// unary function or functor whose return type can be implicitly
-// converted to bool.
-int IsPositive(double x) { return x > 0 ? 1 : 0; }
-
-// This functor returns true if the input is greater than the given
-// number.
-class IsGreaterThan {
- public:
-  explicit IsGreaterThan(int threshold) : threshold_(threshold) {}
-
-  bool operator()(int n) const { return n > threshold_; }
-
- private:
-  int threshold_;
-};
-
-// For testing Truly().
-const int foo = 0;
-
-// This predicate returns true if and only if the argument references foo and
-// has a zero value.
-bool ReferencesFooAndIsZero(const int& n) { return (&n == &foo) && (n == 0); }
-
-// Tests that Truly(predicate) matches what satisfies the given
-// predicate.
-TEST(TrulyTest, MatchesWhatSatisfiesThePredicate) {
-  Matcher<double> m = Truly(IsPositive);
-  EXPECT_TRUE(m.Matches(2.0));
-  EXPECT_FALSE(m.Matches(-1.5));
-}
-
-// Tests that Truly(predicate_functor) works too.
-TEST(TrulyTest, CanBeUsedWithFunctor) {
-  Matcher<int> m = Truly(IsGreaterThan(5));
-  EXPECT_TRUE(m.Matches(6));
-  EXPECT_FALSE(m.Matches(4));
-}
-
-// A class that can be implicitly converted to bool.
-class ConvertibleToBool {
- public:
-  explicit ConvertibleToBool(int number) : number_(number) {}
-  operator bool() const { return number_ != 0; }
-
- private:
-  int number_;
-};
-
-ConvertibleToBool IsNotZero(int number) { return ConvertibleToBool(number); }
-
-// Tests that the predicate used in Truly() may return a class that's
-// implicitly convertible to bool, even when the class has no
-// operator!().
-TEST(TrulyTest, PredicateCanReturnAClassConvertibleToBool) {
-  Matcher<int> m = Truly(IsNotZero);
-  EXPECT_TRUE(m.Matches(1));
-  EXPECT_FALSE(m.Matches(0));
-}
-
-// Tests that Truly(predicate) can describe itself properly.
-TEST(TrulyTest, CanDescribeSelf) {
-  Matcher<double> m = Truly(IsPositive);
-  EXPECT_EQ("satisfies the given predicate", Describe(m));
-}
-
-// Tests that Truly(predicate) works when the matcher takes its
-// argument by reference.
-TEST(TrulyTest, WorksForByRefArguments) {
-  Matcher<const int&> m = Truly(ReferencesFooAndIsZero);
-  EXPECT_TRUE(m.Matches(foo));
-  int n = 0;
-  EXPECT_FALSE(m.Matches(n));
-}
-
-// Tests that Truly(predicate) provides a helpful reason when it fails.
-TEST(TrulyTest, ExplainsFailures) {
-  StringMatchResultListener listener;
-  EXPECT_FALSE(ExplainMatchResult(Truly(IsPositive), -1, &listener));
-  EXPECT_EQ(listener.str(), "didn't satisfy the given predicate");
-}
-
-// Tests that Matches(m) is a predicate satisfied by whatever that
-// matches matcher m.
-TEST(MatchesTest, IsSatisfiedByWhatMatchesTheMatcher) {
-  EXPECT_TRUE(Matches(Ge(0))(1));
-  EXPECT_FALSE(Matches(Eq('a'))('b'));
-}
-
-// Tests that Matches(m) works when the matcher takes its argument by
-// reference.
-TEST(MatchesTest, WorksOnByRefArguments) {
-  int m = 0, n = 0;
-  EXPECT_TRUE(Matches(AllOf(Ref(n), Eq(0)))(n));
-  EXPECT_FALSE(Matches(Ref(m))(n));
-}
-
-// Tests that a Matcher on non-reference type can be used in
-// Matches().
-TEST(MatchesTest, WorksWithMatcherOnNonRefType) {
-  Matcher<int> eq5 = Eq(5);
-  EXPECT_TRUE(Matches(eq5)(5));
-  EXPECT_FALSE(Matches(eq5)(2));
-}
-
-// Tests Value(value, matcher).  Since Value() is a simple wrapper for
-// Matches(), which has been tested already, we don't spend a lot of
-// effort on testing Value().
-TEST(ValueTest, WorksWithPolymorphicMatcher) {
-  EXPECT_TRUE(Value("hi", StartsWith("h")));
-  EXPECT_FALSE(Value(5, Gt(10)));
-}
-
-TEST(ValueTest, WorksWithMonomorphicMatcher) {
-  const Matcher<int> is_zero = Eq(0);
-  EXPECT_TRUE(Value(0, is_zero));
-  EXPECT_FALSE(Value('a', is_zero));
-
-  int n = 0;
-  const Matcher<const int&> ref_n = Ref(n);
-  EXPECT_TRUE(Value(n, ref_n));
-  EXPECT_FALSE(Value(1, ref_n));
-}
-
-TEST(AllArgsTest, WorksForTuple) {
-  EXPECT_THAT(std::make_tuple(1, 2L), AllArgs(Lt()));
-  EXPECT_THAT(std::make_tuple(2L, 1), Not(AllArgs(Lt())));
-}
-
-TEST(AllArgsTest, WorksForNonTuple) {
-  EXPECT_THAT(42, AllArgs(Gt(0)));
-  EXPECT_THAT('a', Not(AllArgs(Eq('b'))));
-}
-
-class AllArgsHelper {
- public:
-  AllArgsHelper() {}
-
-  MOCK_METHOD2(Helper, int(char x, int y));
-
- private:
-  AllArgsHelper(const AllArgsHelper&) = delete;
-  AllArgsHelper& operator=(const AllArgsHelper&) = delete;
-};
-
-TEST(AllArgsTest, WorksInWithClause) {
-  AllArgsHelper helper;
-  ON_CALL(helper, Helper(_, _)).With(AllArgs(Lt())).WillByDefault(Return(1));
-  EXPECT_CALL(helper, Helper(_, _));
-  EXPECT_CALL(helper, Helper(_, _)).With(AllArgs(Gt())).WillOnce(Return(2));
-
-  EXPECT_EQ(1, helper.Helper('\1', 2));
-  EXPECT_EQ(2, helper.Helper('a', 1));
-}
-
-class OptionalMatchersHelper {
- public:
-  OptionalMatchersHelper() {}
-
-  MOCK_METHOD0(NoArgs, int());
-
-  MOCK_METHOD1(OneArg, int(int y));
-
-  MOCK_METHOD2(TwoArgs, int(char x, int y));
-
-  MOCK_METHOD1(Overloaded, int(char x));
-  MOCK_METHOD2(Overloaded, int(char x, int y));
-
- private:
-  OptionalMatchersHelper(const OptionalMatchersHelper&) = delete;
-  OptionalMatchersHelper& operator=(const OptionalMatchersHelper&) = delete;
-};
-
-TEST(AllArgsTest, WorksWithoutMatchers) {
-  OptionalMatchersHelper helper;
-
-  ON_CALL(helper, NoArgs).WillByDefault(Return(10));
-  ON_CALL(helper, OneArg).WillByDefault(Return(20));
-  ON_CALL(helper, TwoArgs).WillByDefault(Return(30));
-
-  EXPECT_EQ(10, helper.NoArgs());
-  EXPECT_EQ(20, helper.OneArg(1));
-  EXPECT_EQ(30, helper.TwoArgs('\1', 2));
-
-  EXPECT_CALL(helper, NoArgs).Times(1);
-  EXPECT_CALL(helper, OneArg).WillOnce(Return(100));
-  EXPECT_CALL(helper, OneArg(17)).WillOnce(Return(200));
-  EXPECT_CALL(helper, TwoArgs).Times(0);
-
-  EXPECT_EQ(10, helper.NoArgs());
-  EXPECT_EQ(100, helper.OneArg(1));
-  EXPECT_EQ(200, helper.OneArg(17));
-}
-
-// Tests floating-point matchers.
-template <typename RawType>
-class FloatingPointTest : public testing::Test {
- protected:
-  typedef testing::internal::FloatingPoint<RawType> Floating;
-  typedef typename Floating::Bits Bits;
-
-  FloatingPointTest()
-      : max_ulps_(Floating::kMaxUlps),
-        zero_bits_(Floating(0).bits()),
-        one_bits_(Floating(1).bits()),
-        infinity_bits_(Floating(Floating::Infinity()).bits()),
-        close_to_positive_zero_(
-            Floating::ReinterpretBits(zero_bits_ + max_ulps_ / 2)),
-        close_to_negative_zero_(
-            -Floating::ReinterpretBits(zero_bits_ + max_ulps_ - max_ulps_ / 2)),
-        further_from_negative_zero_(-Floating::ReinterpretBits(
-            zero_bits_ + max_ulps_ + 1 - max_ulps_ / 2)),
-        close_to_one_(Floating::ReinterpretBits(one_bits_ + max_ulps_)),
-        further_from_one_(Floating::ReinterpretBits(one_bits_ + max_ulps_ + 1)),
-        infinity_(Floating::Infinity()),
-        close_to_infinity_(
-            Floating::ReinterpretBits(infinity_bits_ - max_ulps_)),
-        further_from_infinity_(
-            Floating::ReinterpretBits(infinity_bits_ - max_ulps_ - 1)),
-        max_(Floating::Max()),
-        nan1_(Floating::ReinterpretBits(Floating::kExponentBitMask | 1)),
-        nan2_(Floating::ReinterpretBits(Floating::kExponentBitMask | 200)) {}
-
-  void TestSize() { EXPECT_EQ(sizeof(RawType), sizeof(Bits)); }
-
-  // A battery of tests for FloatingEqMatcher::Matches.
-  // matcher_maker is a pointer to a function which creates a FloatingEqMatcher.
-  void TestMatches(
-      testing::internal::FloatingEqMatcher<RawType> (*matcher_maker)(RawType)) {
-    Matcher<RawType> m1 = matcher_maker(0.0);
-    EXPECT_TRUE(m1.Matches(-0.0));
-    EXPECT_TRUE(m1.Matches(close_to_positive_zero_));
-    EXPECT_TRUE(m1.Matches(close_to_negative_zero_));
-    EXPECT_FALSE(m1.Matches(1.0));
-
-    Matcher<RawType> m2 = matcher_maker(close_to_positive_zero_);
-    EXPECT_FALSE(m2.Matches(further_from_negative_zero_));
-
-    Matcher<RawType> m3 = matcher_maker(1.0);
-    EXPECT_TRUE(m3.Matches(close_to_one_));
-    EXPECT_FALSE(m3.Matches(further_from_one_));
-
-    // Test commutativity: matcher_maker(0.0).Matches(1.0) was tested above.
-    EXPECT_FALSE(m3.Matches(0.0));
-
-    Matcher<RawType> m4 = matcher_maker(-infinity_);
-    EXPECT_TRUE(m4.Matches(-close_to_infinity_));
-
-    Matcher<RawType> m5 = matcher_maker(infinity_);
-    EXPECT_TRUE(m5.Matches(close_to_infinity_));
-
-    // This is interesting as the representations of infinity_ and nan1_
-    // are only 1 DLP apart.
-    EXPECT_FALSE(m5.Matches(nan1_));
-
-    // matcher_maker can produce a Matcher<const RawType&>, which is needed in
-    // some cases.
-    Matcher<const RawType&> m6 = matcher_maker(0.0);
-    EXPECT_TRUE(m6.Matches(-0.0));
-    EXPECT_TRUE(m6.Matches(close_to_positive_zero_));
-    EXPECT_FALSE(m6.Matches(1.0));
-
-    // matcher_maker can produce a Matcher<RawType&>, which is needed in some
-    // cases.
-    Matcher<RawType&> m7 = matcher_maker(0.0);
-    RawType x = 0.0;
-    EXPECT_TRUE(m7.Matches(x));
-    x = 0.01f;
-    EXPECT_FALSE(m7.Matches(x));
-  }
-
-  // Pre-calculated numbers to be used by the tests.
-
-  const Bits max_ulps_;
-
-  const Bits zero_bits_;      // The bits that represent 0.0.
-  const Bits one_bits_;       // The bits that represent 1.0.
-  const Bits infinity_bits_;  // The bits that represent +infinity.
-
-  // Some numbers close to 0.0.
-  const RawType close_to_positive_zero_;
-  const RawType close_to_negative_zero_;
-  const RawType further_from_negative_zero_;
-
-  // Some numbers close to 1.0.
-  const RawType close_to_one_;
-  const RawType further_from_one_;
-
-  // Some numbers close to +infinity.
-  const RawType infinity_;
-  const RawType close_to_infinity_;
-  const RawType further_from_infinity_;
-
-  // Maximum representable value that's not infinity.
-  const RawType max_;
-
-  // Some NaNs.
-  const RawType nan1_;
-  const RawType nan2_;
-};
-
-// Tests floating-point matchers with fixed epsilons.
-template <typename RawType>
-class FloatingPointNearTest : public FloatingPointTest<RawType> {
- protected:
-  typedef FloatingPointTest<RawType> ParentType;
-
-  // A battery of tests for FloatingEqMatcher::Matches with a fixed epsilon.
-  // matcher_maker is a pointer to a function which creates a FloatingEqMatcher.
-  void TestNearMatches(testing::internal::FloatingEqMatcher<RawType> (
-      *matcher_maker)(RawType, RawType)) {
-    Matcher<RawType> m1 = matcher_maker(0.0, 0.0);
-    EXPECT_TRUE(m1.Matches(0.0));
-    EXPECT_TRUE(m1.Matches(-0.0));
-    EXPECT_FALSE(m1.Matches(ParentType::close_to_positive_zero_));
-    EXPECT_FALSE(m1.Matches(ParentType::close_to_negative_zero_));
-    EXPECT_FALSE(m1.Matches(1.0));
-
-    Matcher<RawType> m2 = matcher_maker(0.0, 1.0);
-    EXPECT_TRUE(m2.Matches(0.0));
-    EXPECT_TRUE(m2.Matches(-0.0));
-    EXPECT_TRUE(m2.Matches(1.0));
-    EXPECT_TRUE(m2.Matches(-1.0));
-    EXPECT_FALSE(m2.Matches(ParentType::close_to_one_));
-    EXPECT_FALSE(m2.Matches(-ParentType::close_to_one_));
-
-    // Check that inf matches inf, regardless of the of the specified max
-    // absolute error.
-    Matcher<RawType> m3 = matcher_maker(ParentType::infinity_, 0.0);
-    EXPECT_TRUE(m3.Matches(ParentType::infinity_));
-    EXPECT_FALSE(m3.Matches(ParentType::close_to_infinity_));
-    EXPECT_FALSE(m3.Matches(-ParentType::infinity_));
-
-    Matcher<RawType> m4 = matcher_maker(-ParentType::infinity_, 0.0);
-    EXPECT_TRUE(m4.Matches(-ParentType::infinity_));
-    EXPECT_FALSE(m4.Matches(-ParentType::close_to_infinity_));
-    EXPECT_FALSE(m4.Matches(ParentType::infinity_));
-
-    // Test various overflow scenarios.
-    Matcher<RawType> m5 = matcher_maker(ParentType::max_, ParentType::max_);
-    EXPECT_TRUE(m5.Matches(ParentType::max_));
-    EXPECT_FALSE(m5.Matches(-ParentType::max_));
-
-    Matcher<RawType> m6 = matcher_maker(-ParentType::max_, ParentType::max_);
-    EXPECT_FALSE(m6.Matches(ParentType::max_));
-    EXPECT_TRUE(m6.Matches(-ParentType::max_));
-
-    Matcher<RawType> m7 = matcher_maker(ParentType::max_, 0);
-    EXPECT_TRUE(m7.Matches(ParentType::max_));
-    EXPECT_FALSE(m7.Matches(-ParentType::max_));
-
-    Matcher<RawType> m8 = matcher_maker(-ParentType::max_, 0);
-    EXPECT_FALSE(m8.Matches(ParentType::max_));
-    EXPECT_TRUE(m8.Matches(-ParentType::max_));
-
-    // The difference between max() and -max() normally overflows to infinity,
-    // but it should still match if the max_abs_error is also infinity.
-    Matcher<RawType> m9 =
-        matcher_maker(ParentType::max_, ParentType::infinity_);
-    EXPECT_TRUE(m8.Matches(-ParentType::max_));
-
-    // matcher_maker can produce a Matcher<const RawType&>, which is needed in
-    // some cases.
-    Matcher<const RawType&> m10 = matcher_maker(0.0, 1.0);
-    EXPECT_TRUE(m10.Matches(-0.0));
-    EXPECT_TRUE(m10.Matches(ParentType::close_to_positive_zero_));
-    EXPECT_FALSE(m10.Matches(ParentType::close_to_one_));
-
-    // matcher_maker can produce a Matcher<RawType&>, which is needed in some
-    // cases.
-    Matcher<RawType&> m11 = matcher_maker(0.0, 1.0);
-    RawType x = 0.0;
-    EXPECT_TRUE(m11.Matches(x));
-    x = 1.0f;
-    EXPECT_TRUE(m11.Matches(x));
-    x = -1.0f;
-    EXPECT_TRUE(m11.Matches(x));
-    x = 1.1f;
-    EXPECT_FALSE(m11.Matches(x));
-    x = -1.1f;
-    EXPECT_FALSE(m11.Matches(x));
-  }
-};
-
-// Instantiate FloatingPointTest for testing floats.
-typedef FloatingPointTest<float> FloatTest;
-
-TEST_F(FloatTest, FloatEqApproximatelyMatchesFloats) { TestMatches(&FloatEq); }
-
-TEST_F(FloatTest, NanSensitiveFloatEqApproximatelyMatchesFloats) {
-  TestMatches(&NanSensitiveFloatEq);
-}
-
-TEST_F(FloatTest, FloatEqCannotMatchNaN) {
-  // FloatEq never matches NaN.
-  Matcher<float> m = FloatEq(nan1_);
-  EXPECT_FALSE(m.Matches(nan1_));
-  EXPECT_FALSE(m.Matches(nan2_));
-  EXPECT_FALSE(m.Matches(1.0));
-}
-
-TEST_F(FloatTest, NanSensitiveFloatEqCanMatchNaN) {
-  // NanSensitiveFloatEq will match NaN.
-  Matcher<float> m = NanSensitiveFloatEq(nan1_);
-  EXPECT_TRUE(m.Matches(nan1_));
-  EXPECT_TRUE(m.Matches(nan2_));
-  EXPECT_FALSE(m.Matches(1.0));
-}
-
-TEST_F(FloatTest, FloatEqCanDescribeSelf) {
-  Matcher<float> m1 = FloatEq(2.0f);
-  EXPECT_EQ("is approximately 2", Describe(m1));
-  EXPECT_EQ("isn't approximately 2", DescribeNegation(m1));
-
-  Matcher<float> m2 = FloatEq(0.5f);
-  EXPECT_EQ("is approximately 0.5", Describe(m2));
-  EXPECT_EQ("isn't approximately 0.5", DescribeNegation(m2));
-
-  Matcher<float> m3 = FloatEq(nan1_);
-  EXPECT_EQ("never matches", Describe(m3));
-  EXPECT_EQ("is anything", DescribeNegation(m3));
-}
-
-TEST_F(FloatTest, NanSensitiveFloatEqCanDescribeSelf) {
-  Matcher<float> m1 = NanSensitiveFloatEq(2.0f);
-  EXPECT_EQ("is approximately 2", Describe(m1));
-  EXPECT_EQ("isn't approximately 2", DescribeNegation(m1));
-
-  Matcher<float> m2 = NanSensitiveFloatEq(0.5f);
-  EXPECT_EQ("is approximately 0.5", Describe(m2));
-  EXPECT_EQ("isn't approximately 0.5", DescribeNegation(m2));
-
-  Matcher<float> m3 = NanSensitiveFloatEq(nan1_);
-  EXPECT_EQ("is NaN", Describe(m3));
-  EXPECT_EQ("isn't NaN", DescribeNegation(m3));
-}
-
-// Instantiate FloatingPointTest for testing floats with a user-specified
-// max absolute error.
-typedef FloatingPointNearTest<float> FloatNearTest;
-
-TEST_F(FloatNearTest, FloatNearMatches) { TestNearMatches(&FloatNear); }
-
-TEST_F(FloatNearTest, NanSensitiveFloatNearApproximatelyMatchesFloats) {
-  TestNearMatches(&NanSensitiveFloatNear);
-}
-
-TEST_F(FloatNearTest, FloatNearCanDescribeSelf) {
-  Matcher<float> m1 = FloatNear(2.0f, 0.5f);
-  EXPECT_EQ("is approximately 2 (absolute error <= 0.5)", Describe(m1));
-  EXPECT_EQ("isn't approximately 2 (absolute error > 0.5)",
-            DescribeNegation(m1));
-
-  Matcher<float> m2 = FloatNear(0.5f, 0.5f);
-  EXPECT_EQ("is approximately 0.5 (absolute error <= 0.5)", Describe(m2));
-  EXPECT_EQ("isn't approximately 0.5 (absolute error > 0.5)",
-            DescribeNegation(m2));
-
-  Matcher<float> m3 = FloatNear(nan1_, 0.0);
-  EXPECT_EQ("never matches", Describe(m3));
-  EXPECT_EQ("is anything", DescribeNegation(m3));
-}
-
-TEST_F(FloatNearTest, NanSensitiveFloatNearCanDescribeSelf) {
-  Matcher<float> m1 = NanSensitiveFloatNear(2.0f, 0.5f);
-  EXPECT_EQ("is approximately 2 (absolute error <= 0.5)", Describe(m1));
-  EXPECT_EQ("isn't approximately 2 (absolute error > 0.5)",
-            DescribeNegation(m1));
-
-  Matcher<float> m2 = NanSensitiveFloatNear(0.5f, 0.5f);
-  EXPECT_EQ("is approximately 0.5 (absolute error <= 0.5)", Describe(m2));
-  EXPECT_EQ("isn't approximately 0.5 (absolute error > 0.5)",
-            DescribeNegation(m2));
-
-  Matcher<float> m3 = NanSensitiveFloatNear(nan1_, 0.1f);
-  EXPECT_EQ("is NaN", Describe(m3));
-  EXPECT_EQ("isn't NaN", DescribeNegation(m3));
-}
-
-TEST_F(FloatNearTest, FloatNearCannotMatchNaN) {
-  // FloatNear never matches NaN.
-  Matcher<float> m = FloatNear(ParentType::nan1_, 0.1f);
-  EXPECT_FALSE(m.Matches(nan1_));
-  EXPECT_FALSE(m.Matches(nan2_));
-  EXPECT_FALSE(m.Matches(1.0));
-}
-
-TEST_F(FloatNearTest, NanSensitiveFloatNearCanMatchNaN) {
-  // NanSensitiveFloatNear will match NaN.
-  Matcher<float> m = NanSensitiveFloatNear(nan1_, 0.1f);
-  EXPECT_TRUE(m.Matches(nan1_));
-  EXPECT_TRUE(m.Matches(nan2_));
-  EXPECT_FALSE(m.Matches(1.0));
-}
-
-// Instantiate FloatingPointTest for testing doubles.
-typedef FloatingPointTest<double> DoubleTest;
-
-TEST_F(DoubleTest, DoubleEqApproximatelyMatchesDoubles) {
-  TestMatches(&DoubleEq);
-}
-
-TEST_F(DoubleTest, NanSensitiveDoubleEqApproximatelyMatchesDoubles) {
-  TestMatches(&NanSensitiveDoubleEq);
-}
-
-TEST_F(DoubleTest, DoubleEqCannotMatchNaN) {
-  // DoubleEq never matches NaN.
-  Matcher<double> m = DoubleEq(nan1_);
-  EXPECT_FALSE(m.Matches(nan1_));
-  EXPECT_FALSE(m.Matches(nan2_));
-  EXPECT_FALSE(m.Matches(1.0));
-}
-
-TEST_F(DoubleTest, NanSensitiveDoubleEqCanMatchNaN) {
-  // NanSensitiveDoubleEq will match NaN.
-  Matcher<double> m = NanSensitiveDoubleEq(nan1_);
-  EXPECT_TRUE(m.Matches(nan1_));
-  EXPECT_TRUE(m.Matches(nan2_));
-  EXPECT_FALSE(m.Matches(1.0));
-}
-
-TEST_F(DoubleTest, DoubleEqCanDescribeSelf) {
-  Matcher<double> m1 = DoubleEq(2.0);
-  EXPECT_EQ("is approximately 2", Describe(m1));
-  EXPECT_EQ("isn't approximately 2", DescribeNegation(m1));
-
-  Matcher<double> m2 = DoubleEq(0.5);
-  EXPECT_EQ("is approximately 0.5", Describe(m2));
-  EXPECT_EQ("isn't approximately 0.5", DescribeNegation(m2));
-
-  Matcher<double> m3 = DoubleEq(nan1_);
-  EXPECT_EQ("never matches", Describe(m3));
-  EXPECT_EQ("is anything", DescribeNegation(m3));
-}
-
-TEST_F(DoubleTest, NanSensitiveDoubleEqCanDescribeSelf) {
-  Matcher<double> m1 = NanSensitiveDoubleEq(2.0);
-  EXPECT_EQ("is approximately 2", Describe(m1));
-  EXPECT_EQ("isn't approximately 2", DescribeNegation(m1));
-
-  Matcher<double> m2 = NanSensitiveDoubleEq(0.5);
-  EXPECT_EQ("is approximately 0.5", Describe(m2));
-  EXPECT_EQ("isn't approximately 0.5", DescribeNegation(m2));
-
-  Matcher<double> m3 = NanSensitiveDoubleEq(nan1_);
-  EXPECT_EQ("is NaN", Describe(m3));
-  EXPECT_EQ("isn't NaN", DescribeNegation(m3));
-}
-
-// Instantiate FloatingPointTest for testing floats with a user-specified
-// max absolute error.
-typedef FloatingPointNearTest<double> DoubleNearTest;
-
-TEST_F(DoubleNearTest, DoubleNearMatches) { TestNearMatches(&DoubleNear); }
-
-TEST_F(DoubleNearTest, NanSensitiveDoubleNearApproximatelyMatchesDoubles) {
-  TestNearMatches(&NanSensitiveDoubleNear);
-}
-
-TEST_F(DoubleNearTest, DoubleNearCanDescribeSelf) {
-  Matcher<double> m1 = DoubleNear(2.0, 0.5);
-  EXPECT_EQ("is approximately 2 (absolute error <= 0.5)", Describe(m1));
-  EXPECT_EQ("isn't approximately 2 (absolute error > 0.5)",
-            DescribeNegation(m1));
-
-  Matcher<double> m2 = DoubleNear(0.5, 0.5);
-  EXPECT_EQ("is approximately 0.5 (absolute error <= 0.5)", Describe(m2));
-  EXPECT_EQ("isn't approximately 0.5 (absolute error > 0.5)",
-            DescribeNegation(m2));
-
-  Matcher<double> m3 = DoubleNear(nan1_, 0.0);
-  EXPECT_EQ("never matches", Describe(m3));
-  EXPECT_EQ("is anything", DescribeNegation(m3));
-}
-
-TEST_F(DoubleNearTest, ExplainsResultWhenMatchFails) {
-  EXPECT_EQ("", Explain(DoubleNear(2.0, 0.1), 2.05));
-  EXPECT_EQ("which is 0.2 from 2", Explain(DoubleNear(2.0, 0.1), 2.2));
-  EXPECT_EQ("which is -0.3 from 2", Explain(DoubleNear(2.0, 0.1), 1.7));
-
-  const std::string explanation =
-      Explain(DoubleNear(2.1, 1e-10), 2.1 + 1.2e-10);
-  // Different C++ implementations may print floating-point numbers
-  // slightly differently.
-  EXPECT_TRUE(explanation == "which is 1.2e-10 from 2.1" ||  // GCC
-              explanation == "which is 1.2e-010 from 2.1")   // MSVC
-      << " where explanation is \"" << explanation << "\".";
-}
-
-TEST_F(DoubleNearTest, NanSensitiveDoubleNearCanDescribeSelf) {
-  Matcher<double> m1 = NanSensitiveDoubleNear(2.0, 0.5);
-  EXPECT_EQ("is approximately 2 (absolute error <= 0.5)", Describe(m1));
-  EXPECT_EQ("isn't approximately 2 (absolute error > 0.5)",
-            DescribeNegation(m1));
-
-  Matcher<double> m2 = NanSensitiveDoubleNear(0.5, 0.5);
-  EXPECT_EQ("is approximately 0.5 (absolute error <= 0.5)", Describe(m2));
-  EXPECT_EQ("isn't approximately 0.5 (absolute error > 0.5)",
-            DescribeNegation(m2));
-
-  Matcher<double> m3 = NanSensitiveDoubleNear(nan1_, 0.1);
-  EXPECT_EQ("is NaN", Describe(m3));
-  EXPECT_EQ("isn't NaN", DescribeNegation(m3));
-}
-
-TEST_F(DoubleNearTest, DoubleNearCannotMatchNaN) {
-  // DoubleNear never matches NaN.
-  Matcher<double> m = DoubleNear(ParentType::nan1_, 0.1);
-  EXPECT_FALSE(m.Matches(nan1_));
-  EXPECT_FALSE(m.Matches(nan2_));
-  EXPECT_FALSE(m.Matches(1.0));
-}
-
-TEST_F(DoubleNearTest, NanSensitiveDoubleNearCanMatchNaN) {
-  // NanSensitiveDoubleNear will match NaN.
-  Matcher<double> m = NanSensitiveDoubleNear(nan1_, 0.1);
-  EXPECT_TRUE(m.Matches(nan1_));
-  EXPECT_TRUE(m.Matches(nan2_));
-  EXPECT_FALSE(m.Matches(1.0));
-}
-
-TEST(NotTest, WorksOnMoveOnlyType) {
-  std::unique_ptr<int> p(new int(3));
-  EXPECT_THAT(p, Pointee(Eq(3)));
-  EXPECT_THAT(p, Not(Pointee(Eq(2))));
-}
-
-TEST(AllOfTest, HugeMatcher) {
-  // Verify that using AllOf with many arguments doesn't cause
-  // the compiler to exceed template instantiation depth limit.
-  EXPECT_THAT(0, testing::AllOf(_, _, _, _, _, _, _, _, _,
-                                testing::AllOf(_, _, _, _, _, _, _, _, _, _)));
-}
-
-TEST(AnyOfTest, HugeMatcher) {
-  // Verify that using AnyOf with many arguments doesn't cause
-  // the compiler to exceed template instantiation depth limit.
-  EXPECT_THAT(0, testing::AnyOf(_, _, _, _, _, _, _, _, _,
-                                testing::AnyOf(_, _, _, _, _, _, _, _, _, _)));
-}
-
-namespace adl_test {
-
-// Verifies that the implementation of ::testing::AllOf and ::testing::AnyOf
-// don't issue unqualified recursive calls.  If they do, the argument dependent
-// name lookup will cause AllOf/AnyOf in the 'adl_test' namespace to be found
-// as a candidate and the compilation will break due to an ambiguous overload.
-
-// The matcher must be in the same namespace as AllOf/AnyOf to make argument
-// dependent lookup find those.
-MATCHER(M, "") {
-  (void)arg;
-  return true;
-}
-
-template <typename T1, typename T2>
-bool AllOf(const T1& /*t1*/, const T2& /*t2*/) {
-  return true;
-}
-
-TEST(AllOfTest, DoesNotCallAllOfUnqualified) {
-  EXPECT_THAT(42,
-              testing::AllOf(M(), M(), M(), M(), M(), M(), M(), M(), M(), M()));
-}
-
-template <typename T1, typename T2>
-bool AnyOf(const T1&, const T2&) {
-  return true;
-}
-
-TEST(AnyOfTest, DoesNotCallAnyOfUnqualified) {
-  EXPECT_THAT(42,
-              testing::AnyOf(M(), M(), M(), M(), M(), M(), M(), M(), M(), M()));
-}
-
-}  // namespace adl_test
-
-TEST(AllOfTest, WorksOnMoveOnlyType) {
-  std::unique_ptr<int> p(new int(3));
-  EXPECT_THAT(p, AllOf(Pointee(Eq(3)), Pointee(Gt(0)), Pointee(Lt(5))));
-  EXPECT_THAT(p, Not(AllOf(Pointee(Eq(3)), Pointee(Gt(0)), Pointee(Lt(3)))));
-}
-
-TEST(AnyOfTest, WorksOnMoveOnlyType) {
-  std::unique_ptr<int> p(new int(3));
-  EXPECT_THAT(p, AnyOf(Pointee(Eq(5)), Pointee(Lt(0)), Pointee(Lt(5))));
-  EXPECT_THAT(p, Not(AnyOf(Pointee(Eq(5)), Pointee(Lt(0)), Pointee(Gt(5)))));
-}
-
-}  // namespace
-}  // namespace gmock_matchers_test
-}  // namespace testing
-
-#ifdef _MSC_VER
-#pragma warning(pop)
-#endif
diff --git a/3rdparty/googletest-1.13.0/googlemock/test/gmock-matchers-comparisons_test.cc b/3rdparty/googletest-1.13.0/googlemock/test/gmock-matchers-comparisons_test.cc
deleted file mode 100644
index 0cf731ff099bd0fc7aacfb20e3fac207a2003162..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googlemock/test/gmock-matchers-comparisons_test.cc
+++ /dev/null
@@ -1,2359 +0,0 @@
-// Copyright 2007, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// Google Mock - a framework for writing C++ mock classes.
-//
-// This file tests some commonly used argument matchers.
-
-// Silence warning C4244: 'initializing': conversion from 'int' to 'short',
-// possible loss of data and C4100, unreferenced local parameter
-#ifdef _MSC_VER
-#pragma warning(push)
-#pragma warning(disable : 4244)
-#pragma warning(disable : 4100)
-#endif
-
-#include <vector>
-
-#include "test/gmock-matchers_test.h"
-
-namespace testing {
-namespace gmock_matchers_test {
-namespace {
-
-INSTANTIATE_GTEST_MATCHER_TEST_P(MonotonicMatcherTest);
-
-TEST_P(MonotonicMatcherTestP, IsPrintable) {
-  stringstream ss;
-  ss << GreaterThan(5);
-  EXPECT_EQ("is > 5", ss.str());
-}
-
-TEST(MatchResultListenerTest, StreamingWorks) {
-  StringMatchResultListener listener;
-  listener << "hi" << 5;
-  EXPECT_EQ("hi5", listener.str());
-
-  listener.Clear();
-  EXPECT_EQ("", listener.str());
-
-  listener << 42;
-  EXPECT_EQ("42", listener.str());
-
-  // Streaming shouldn't crash when the underlying ostream is NULL.
-  DummyMatchResultListener dummy;
-  dummy << "hi" << 5;
-}
-
-TEST(MatchResultListenerTest, CanAccessUnderlyingStream) {
-  EXPECT_TRUE(DummyMatchResultListener().stream() == nullptr);
-  EXPECT_TRUE(StreamMatchResultListener(nullptr).stream() == nullptr);
-
-  EXPECT_EQ(&std::cout, StreamMatchResultListener(&std::cout).stream());
-}
-
-TEST(MatchResultListenerTest, IsInterestedWorks) {
-  EXPECT_TRUE(StringMatchResultListener().IsInterested());
-  EXPECT_TRUE(StreamMatchResultListener(&std::cout).IsInterested());
-
-  EXPECT_FALSE(DummyMatchResultListener().IsInterested());
-  EXPECT_FALSE(StreamMatchResultListener(nullptr).IsInterested());
-}
-
-// Makes sure that the MatcherInterface<T> interface doesn't
-// change.
-class EvenMatcherImpl : public MatcherInterface<int> {
- public:
-  bool MatchAndExplain(int x,
-                       MatchResultListener* /* listener */) const override {
-    return x % 2 == 0;
-  }
-
-  void DescribeTo(ostream* os) const override { *os << "is an even number"; }
-
-  // We deliberately don't define DescribeNegationTo() and
-  // ExplainMatchResultTo() here, to make sure the definition of these
-  // two methods is optional.
-};
-
-// Makes sure that the MatcherInterface API doesn't change.
-TEST(MatcherInterfaceTest, CanBeImplementedUsingPublishedAPI) {
-  EvenMatcherImpl m;
-}
-
-// Tests implementing a monomorphic matcher using MatchAndExplain().
-
-class NewEvenMatcherImpl : public MatcherInterface<int> {
- public:
-  bool MatchAndExplain(int x, MatchResultListener* listener) const override {
-    const bool match = x % 2 == 0;
-    // Verifies that we can stream to a listener directly.
-    *listener << "value % " << 2;
-    if (listener->stream() != nullptr) {
-      // Verifies that we can stream to a listener's underlying stream
-      // too.
-      *listener->stream() << " == " << (x % 2);
-    }
-    return match;
-  }
-
-  void DescribeTo(ostream* os) const override { *os << "is an even number"; }
-};
-
-TEST(MatcherInterfaceTest, CanBeImplementedUsingNewAPI) {
-  Matcher<int> m = MakeMatcher(new NewEvenMatcherImpl);
-  EXPECT_TRUE(m.Matches(2));
-  EXPECT_FALSE(m.Matches(3));
-  EXPECT_EQ("value % 2 == 0", Explain(m, 2));
-  EXPECT_EQ("value % 2 == 1", Explain(m, 3));
-}
-
-INSTANTIATE_GTEST_MATCHER_TEST_P(MatcherTest);
-
-// Tests default-constructing a matcher.
-TEST(MatcherTest, CanBeDefaultConstructed) { Matcher<double> m; }
-
-// Tests that Matcher<T> can be constructed from a MatcherInterface<T>*.
-TEST(MatcherTest, CanBeConstructedFromMatcherInterface) {
-  const MatcherInterface<int>* impl = new EvenMatcherImpl;
-  Matcher<int> m(impl);
-  EXPECT_TRUE(m.Matches(4));
-  EXPECT_FALSE(m.Matches(5));
-}
-
-// Tests that value can be used in place of Eq(value).
-TEST(MatcherTest, CanBeImplicitlyConstructedFromValue) {
-  Matcher<int> m1 = 5;
-  EXPECT_TRUE(m1.Matches(5));
-  EXPECT_FALSE(m1.Matches(6));
-}
-
-// Tests that NULL can be used in place of Eq(NULL).
-TEST(MatcherTest, CanBeImplicitlyConstructedFromNULL) {
-  Matcher<int*> m1 = nullptr;
-  EXPECT_TRUE(m1.Matches(nullptr));
-  int n = 0;
-  EXPECT_FALSE(m1.Matches(&n));
-}
-
-// Tests that matchers can be constructed from a variable that is not properly
-// defined. This should be illegal, but many users rely on this accidentally.
-struct Undefined {
-  virtual ~Undefined() = 0;
-  static const int kInt = 1;
-};
-
-TEST(MatcherTest, CanBeConstructedFromUndefinedVariable) {
-  Matcher<int> m1 = Undefined::kInt;
-  EXPECT_TRUE(m1.Matches(1));
-  EXPECT_FALSE(m1.Matches(2));
-}
-
-// Test that a matcher parameterized with an abstract class compiles.
-TEST(MatcherTest, CanAcceptAbstractClass) { Matcher<const Undefined&> m = _; }
-
-// Tests that matchers are copyable.
-TEST(MatcherTest, IsCopyable) {
-  // Tests the copy constructor.
-  Matcher<bool> m1 = Eq(false);
-  EXPECT_TRUE(m1.Matches(false));
-  EXPECT_FALSE(m1.Matches(true));
-
-  // Tests the assignment operator.
-  m1 = Eq(true);
-  EXPECT_TRUE(m1.Matches(true));
-  EXPECT_FALSE(m1.Matches(false));
-}
-
-// Tests that Matcher<T>::DescribeTo() calls
-// MatcherInterface<T>::DescribeTo().
-TEST(MatcherTest, CanDescribeItself) {
-  EXPECT_EQ("is an even number", Describe(Matcher<int>(new EvenMatcherImpl)));
-}
-
-// Tests Matcher<T>::MatchAndExplain().
-TEST_P(MatcherTestP, MatchAndExplain) {
-  Matcher<int> m = GreaterThan(0);
-  StringMatchResultListener listener1;
-  EXPECT_TRUE(m.MatchAndExplain(42, &listener1));
-  EXPECT_EQ("which is 42 more than 0", listener1.str());
-
-  StringMatchResultListener listener2;
-  EXPECT_FALSE(m.MatchAndExplain(-9, &listener2));
-  EXPECT_EQ("which is 9 less than 0", listener2.str());
-}
-
-// Tests that a C-string literal can be implicitly converted to a
-// Matcher<std::string> or Matcher<const std::string&>.
-TEST(StringMatcherTest, CanBeImplicitlyConstructedFromCStringLiteral) {
-  Matcher<std::string> m1 = "hi";
-  EXPECT_TRUE(m1.Matches("hi"));
-  EXPECT_FALSE(m1.Matches("hello"));
-
-  Matcher<const std::string&> m2 = "hi";
-  EXPECT_TRUE(m2.Matches("hi"));
-  EXPECT_FALSE(m2.Matches("hello"));
-}
-
-// Tests that a string object can be implicitly converted to a
-// Matcher<std::string> or Matcher<const std::string&>.
-TEST(StringMatcherTest, CanBeImplicitlyConstructedFromString) {
-  Matcher<std::string> m1 = std::string("hi");
-  EXPECT_TRUE(m1.Matches("hi"));
-  EXPECT_FALSE(m1.Matches("hello"));
-
-  Matcher<const std::string&> m2 = std::string("hi");
-  EXPECT_TRUE(m2.Matches("hi"));
-  EXPECT_FALSE(m2.Matches("hello"));
-}
-
-#if GTEST_INTERNAL_HAS_STRING_VIEW
-// Tests that a C-string literal can be implicitly converted to a
-// Matcher<StringView> or Matcher<const StringView&>.
-TEST(StringViewMatcherTest, CanBeImplicitlyConstructedFromCStringLiteral) {
-  Matcher<internal::StringView> m1 = "cats";
-  EXPECT_TRUE(m1.Matches("cats"));
-  EXPECT_FALSE(m1.Matches("dogs"));
-
-  Matcher<const internal::StringView&> m2 = "cats";
-  EXPECT_TRUE(m2.Matches("cats"));
-  EXPECT_FALSE(m2.Matches("dogs"));
-}
-
-// Tests that a std::string object can be implicitly converted to a
-// Matcher<StringView> or Matcher<const StringView&>.
-TEST(StringViewMatcherTest, CanBeImplicitlyConstructedFromString) {
-  Matcher<internal::StringView> m1 = std::string("cats");
-  EXPECT_TRUE(m1.Matches("cats"));
-  EXPECT_FALSE(m1.Matches("dogs"));
-
-  Matcher<const internal::StringView&> m2 = std::string("cats");
-  EXPECT_TRUE(m2.Matches("cats"));
-  EXPECT_FALSE(m2.Matches("dogs"));
-}
-
-// Tests that a StringView object can be implicitly converted to a
-// Matcher<StringView> or Matcher<const StringView&>.
-TEST(StringViewMatcherTest, CanBeImplicitlyConstructedFromStringView) {
-  Matcher<internal::StringView> m1 = internal::StringView("cats");
-  EXPECT_TRUE(m1.Matches("cats"));
-  EXPECT_FALSE(m1.Matches("dogs"));
-
-  Matcher<const internal::StringView&> m2 = internal::StringView("cats");
-  EXPECT_TRUE(m2.Matches("cats"));
-  EXPECT_FALSE(m2.Matches("dogs"));
-}
-#endif  // GTEST_INTERNAL_HAS_STRING_VIEW
-
-// Tests that a std::reference_wrapper<std::string> object can be implicitly
-// converted to a Matcher<std::string> or Matcher<const std::string&> via Eq().
-TEST(StringMatcherTest,
-     CanBeImplicitlyConstructedFromEqReferenceWrapperString) {
-  std::string value = "cats";
-  Matcher<std::string> m1 = Eq(std::ref(value));
-  EXPECT_TRUE(m1.Matches("cats"));
-  EXPECT_FALSE(m1.Matches("dogs"));
-
-  Matcher<const std::string&> m2 = Eq(std::ref(value));
-  EXPECT_TRUE(m2.Matches("cats"));
-  EXPECT_FALSE(m2.Matches("dogs"));
-}
-
-// Tests that MakeMatcher() constructs a Matcher<T> from a
-// MatcherInterface* without requiring the user to explicitly
-// write the type.
-TEST(MakeMatcherTest, ConstructsMatcherFromMatcherInterface) {
-  const MatcherInterface<int>* dummy_impl = new EvenMatcherImpl;
-  Matcher<int> m = MakeMatcher(dummy_impl);
-}
-
-// Tests that MakePolymorphicMatcher() can construct a polymorphic
-// matcher from its implementation using the old API.
-const int g_bar = 1;
-class ReferencesBarOrIsZeroImpl {
- public:
-  template <typename T>
-  bool MatchAndExplain(const T& x, MatchResultListener* /* listener */) const {
-    const void* p = &x;
-    return p == &g_bar || x == 0;
-  }
-
-  void DescribeTo(ostream* os) const { *os << "g_bar or zero"; }
-
-  void DescribeNegationTo(ostream* os) const {
-    *os << "doesn't reference g_bar and is not zero";
-  }
-};
-
-// This function verifies that MakePolymorphicMatcher() returns a
-// PolymorphicMatcher<T> where T is the argument's type.
-PolymorphicMatcher<ReferencesBarOrIsZeroImpl> ReferencesBarOrIsZero() {
-  return MakePolymorphicMatcher(ReferencesBarOrIsZeroImpl());
-}
-
-TEST(MakePolymorphicMatcherTest, ConstructsMatcherUsingOldAPI) {
-  // Using a polymorphic matcher to match a reference type.
-  Matcher<const int&> m1 = ReferencesBarOrIsZero();
-  EXPECT_TRUE(m1.Matches(0));
-  // Verifies that the identity of a by-reference argument is preserved.
-  EXPECT_TRUE(m1.Matches(g_bar));
-  EXPECT_FALSE(m1.Matches(1));
-  EXPECT_EQ("g_bar or zero", Describe(m1));
-
-  // Using a polymorphic matcher to match a value type.
-  Matcher<double> m2 = ReferencesBarOrIsZero();
-  EXPECT_TRUE(m2.Matches(0.0));
-  EXPECT_FALSE(m2.Matches(0.1));
-  EXPECT_EQ("g_bar or zero", Describe(m2));
-}
-
-// Tests implementing a polymorphic matcher using MatchAndExplain().
-
-class PolymorphicIsEvenImpl {
- public:
-  void DescribeTo(ostream* os) const { *os << "is even"; }
-
-  void DescribeNegationTo(ostream* os) const { *os << "is odd"; }
-
-  template <typename T>
-  bool MatchAndExplain(const T& x, MatchResultListener* listener) const {
-    // Verifies that we can stream to the listener directly.
-    *listener << "% " << 2;
-    if (listener->stream() != nullptr) {
-      // Verifies that we can stream to the listener's underlying stream
-      // too.
-      *listener->stream() << " == " << (x % 2);
-    }
-    return (x % 2) == 0;
-  }
-};
-
-PolymorphicMatcher<PolymorphicIsEvenImpl> PolymorphicIsEven() {
-  return MakePolymorphicMatcher(PolymorphicIsEvenImpl());
-}
-
-TEST(MakePolymorphicMatcherTest, ConstructsMatcherUsingNewAPI) {
-  // Using PolymorphicIsEven() as a Matcher<int>.
-  const Matcher<int> m1 = PolymorphicIsEven();
-  EXPECT_TRUE(m1.Matches(42));
-  EXPECT_FALSE(m1.Matches(43));
-  EXPECT_EQ("is even", Describe(m1));
-
-  const Matcher<int> not_m1 = Not(m1);
-  EXPECT_EQ("is odd", Describe(not_m1));
-
-  EXPECT_EQ("% 2 == 0", Explain(m1, 42));
-
-  // Using PolymorphicIsEven() as a Matcher<char>.
-  const Matcher<char> m2 = PolymorphicIsEven();
-  EXPECT_TRUE(m2.Matches('\x42'));
-  EXPECT_FALSE(m2.Matches('\x43'));
-  EXPECT_EQ("is even", Describe(m2));
-
-  const Matcher<char> not_m2 = Not(m2);
-  EXPECT_EQ("is odd", Describe(not_m2));
-
-  EXPECT_EQ("% 2 == 0", Explain(m2, '\x42'));
-}
-
-INSTANTIATE_GTEST_MATCHER_TEST_P(MatcherCastTest);
-
-// Tests that MatcherCast<T>(m) works when m is a polymorphic matcher.
-TEST_P(MatcherCastTestP, FromPolymorphicMatcher) {
-  Matcher<int16_t> m;
-  if (use_gtest_matcher_) {
-    m = MatcherCast<int16_t>(GtestGreaterThan(int64_t{5}));
-  } else {
-    m = MatcherCast<int16_t>(Gt(int64_t{5}));
-  }
-  EXPECT_TRUE(m.Matches(6));
-  EXPECT_FALSE(m.Matches(4));
-}
-
-// For testing casting matchers between compatible types.
-class IntValue {
- public:
-  // An int can be statically (although not implicitly) cast to a
-  // IntValue.
-  explicit IntValue(int a_value) : value_(a_value) {}
-
-  int value() const { return value_; }
-
- private:
-  int value_;
-};
-
-// For testing casting matchers between compatible types.
-bool IsPositiveIntValue(const IntValue& foo) { return foo.value() > 0; }
-
-// Tests that MatcherCast<T>(m) works when m is a Matcher<U> where T
-// can be statically converted to U.
-TEST(MatcherCastTest, FromCompatibleType) {
-  Matcher<double> m1 = Eq(2.0);
-  Matcher<int> m2 = MatcherCast<int>(m1);
-  EXPECT_TRUE(m2.Matches(2));
-  EXPECT_FALSE(m2.Matches(3));
-
-  Matcher<IntValue> m3 = Truly(IsPositiveIntValue);
-  Matcher<int> m4 = MatcherCast<int>(m3);
-  // In the following, the arguments 1 and 0 are statically converted
-  // to IntValue objects, and then tested by the IsPositiveIntValue()
-  // predicate.
-  EXPECT_TRUE(m4.Matches(1));
-  EXPECT_FALSE(m4.Matches(0));
-}
-
-// Tests that MatcherCast<T>(m) works when m is a Matcher<const T&>.
-TEST(MatcherCastTest, FromConstReferenceToNonReference) {
-  Matcher<const int&> m1 = Eq(0);
-  Matcher<int> m2 = MatcherCast<int>(m1);
-  EXPECT_TRUE(m2.Matches(0));
-  EXPECT_FALSE(m2.Matches(1));
-}
-
-// Tests that MatcherCast<T>(m) works when m is a Matcher<T&>.
-TEST(MatcherCastTest, FromReferenceToNonReference) {
-  Matcher<int&> m1 = Eq(0);
-  Matcher<int> m2 = MatcherCast<int>(m1);
-  EXPECT_TRUE(m2.Matches(0));
-  EXPECT_FALSE(m2.Matches(1));
-}
-
-// Tests that MatcherCast<const T&>(m) works when m is a Matcher<T>.
-TEST(MatcherCastTest, FromNonReferenceToConstReference) {
-  Matcher<int> m1 = Eq(0);
-  Matcher<const int&> m2 = MatcherCast<const int&>(m1);
-  EXPECT_TRUE(m2.Matches(0));
-  EXPECT_FALSE(m2.Matches(1));
-}
-
-// Tests that MatcherCast<T&>(m) works when m is a Matcher<T>.
-TEST(MatcherCastTest, FromNonReferenceToReference) {
-  Matcher<int> m1 = Eq(0);
-  Matcher<int&> m2 = MatcherCast<int&>(m1);
-  int n = 0;
-  EXPECT_TRUE(m2.Matches(n));
-  n = 1;
-  EXPECT_FALSE(m2.Matches(n));
-}
-
-// Tests that MatcherCast<T>(m) works when m is a Matcher<T>.
-TEST(MatcherCastTest, FromSameType) {
-  Matcher<int> m1 = Eq(0);
-  Matcher<int> m2 = MatcherCast<int>(m1);
-  EXPECT_TRUE(m2.Matches(0));
-  EXPECT_FALSE(m2.Matches(1));
-}
-
-// Tests that MatcherCast<T>(m) works when m is a value of the same type as the
-// value type of the Matcher.
-TEST(MatcherCastTest, FromAValue) {
-  Matcher<int> m = MatcherCast<int>(42);
-  EXPECT_TRUE(m.Matches(42));
-  EXPECT_FALSE(m.Matches(239));
-}
-
-// Tests that MatcherCast<T>(m) works when m is a value of the type implicitly
-// convertible to the value type of the Matcher.
-TEST(MatcherCastTest, FromAnImplicitlyConvertibleValue) {
-  const int kExpected = 'c';
-  Matcher<int> m = MatcherCast<int>('c');
-  EXPECT_TRUE(m.Matches(kExpected));
-  EXPECT_FALSE(m.Matches(kExpected + 1));
-}
-
-struct NonImplicitlyConstructibleTypeWithOperatorEq {
-  friend bool operator==(
-      const NonImplicitlyConstructibleTypeWithOperatorEq& /* ignored */,
-      int rhs) {
-    return 42 == rhs;
-  }
-  friend bool operator==(
-      int lhs,
-      const NonImplicitlyConstructibleTypeWithOperatorEq& /* ignored */) {
-    return lhs == 42;
-  }
-};
-
-// Tests that MatcherCast<T>(m) works when m is a neither a matcher nor
-// implicitly convertible to the value type of the Matcher, but the value type
-// of the matcher has operator==() overload accepting m.
-TEST(MatcherCastTest, NonImplicitlyConstructibleTypeWithOperatorEq) {
-  Matcher<NonImplicitlyConstructibleTypeWithOperatorEq> m1 =
-      MatcherCast<NonImplicitlyConstructibleTypeWithOperatorEq>(42);
-  EXPECT_TRUE(m1.Matches(NonImplicitlyConstructibleTypeWithOperatorEq()));
-
-  Matcher<NonImplicitlyConstructibleTypeWithOperatorEq> m2 =
-      MatcherCast<NonImplicitlyConstructibleTypeWithOperatorEq>(239);
-  EXPECT_FALSE(m2.Matches(NonImplicitlyConstructibleTypeWithOperatorEq()));
-
-  // When updating the following lines please also change the comment to
-  // namespace convertible_from_any.
-  Matcher<int> m3 =
-      MatcherCast<int>(NonImplicitlyConstructibleTypeWithOperatorEq());
-  EXPECT_TRUE(m3.Matches(42));
-  EXPECT_FALSE(m3.Matches(239));
-}
-
-// ConvertibleFromAny does not work with MSVC. resulting in
-// error C2440: 'initializing': cannot convert from 'Eq' to 'M'
-// No constructor could take the source type, or constructor overload
-// resolution was ambiguous
-
-#if !defined _MSC_VER
-
-// The below ConvertibleFromAny struct is implicitly constructible from anything
-// and when in the same namespace can interact with other tests. In particular,
-// if it is in the same namespace as other tests and one removes
-//   NonImplicitlyConstructibleTypeWithOperatorEq::operator==(int lhs, ...);
-// then the corresponding test still compiles (and it should not!) by implicitly
-// converting NonImplicitlyConstructibleTypeWithOperatorEq to ConvertibleFromAny
-// in m3.Matcher().
-namespace convertible_from_any {
-// Implicitly convertible from any type.
-struct ConvertibleFromAny {
-  ConvertibleFromAny(int a_value) : value(a_value) {}
-  template <typename T>
-  ConvertibleFromAny(const T& /*a_value*/) : value(-1) {
-    ADD_FAILURE() << "Conversion constructor called";
-  }
-  int value;
-};
-
-bool operator==(const ConvertibleFromAny& a, const ConvertibleFromAny& b) {
-  return a.value == b.value;
-}
-
-ostream& operator<<(ostream& os, const ConvertibleFromAny& a) {
-  return os << a.value;
-}
-
-TEST(MatcherCastTest, ConversionConstructorIsUsed) {
-  Matcher<ConvertibleFromAny> m = MatcherCast<ConvertibleFromAny>(1);
-  EXPECT_TRUE(m.Matches(ConvertibleFromAny(1)));
-  EXPECT_FALSE(m.Matches(ConvertibleFromAny(2)));
-}
-
-TEST(MatcherCastTest, FromConvertibleFromAny) {
-  Matcher<ConvertibleFromAny> m =
-      MatcherCast<ConvertibleFromAny>(Eq(ConvertibleFromAny(1)));
-  EXPECT_TRUE(m.Matches(ConvertibleFromAny(1)));
-  EXPECT_FALSE(m.Matches(ConvertibleFromAny(2)));
-}
-}  // namespace convertible_from_any
-
-#endif  // !defined _MSC_VER
-
-struct IntReferenceWrapper {
-  IntReferenceWrapper(const int& a_value) : value(&a_value) {}
-  const int* value;
-};
-
-bool operator==(const IntReferenceWrapper& a, const IntReferenceWrapper& b) {
-  return a.value == b.value;
-}
-
-TEST(MatcherCastTest, ValueIsNotCopied) {
-  int n = 42;
-  Matcher<IntReferenceWrapper> m = MatcherCast<IntReferenceWrapper>(n);
-  // Verify that the matcher holds a reference to n, not to its temporary copy.
-  EXPECT_TRUE(m.Matches(n));
-}
-
-class Base {
- public:
-  virtual ~Base() {}
-  Base() {}
-
- private:
-  Base(const Base&) = delete;
-  Base& operator=(const Base&) = delete;
-};
-
-class Derived : public Base {
- public:
-  Derived() : Base() {}
-  int i;
-};
-
-class OtherDerived : public Base {};
-
-INSTANTIATE_GTEST_MATCHER_TEST_P(SafeMatcherCastTest);
-
-// Tests that SafeMatcherCast<T>(m) works when m is a polymorphic matcher.
-TEST_P(SafeMatcherCastTestP, FromPolymorphicMatcher) {
-  Matcher<char> m2;
-  if (use_gtest_matcher_) {
-    m2 = SafeMatcherCast<char>(GtestGreaterThan(32));
-  } else {
-    m2 = SafeMatcherCast<char>(Gt(32));
-  }
-  EXPECT_TRUE(m2.Matches('A'));
-  EXPECT_FALSE(m2.Matches('\n'));
-}
-
-// Tests that SafeMatcherCast<T>(m) works when m is a Matcher<U> where
-// T and U are arithmetic types and T can be losslessly converted to
-// U.
-TEST(SafeMatcherCastTest, FromLosslesslyConvertibleArithmeticType) {
-  Matcher<double> m1 = DoubleEq(1.0);
-  Matcher<float> m2 = SafeMatcherCast<float>(m1);
-  EXPECT_TRUE(m2.Matches(1.0f));
-  EXPECT_FALSE(m2.Matches(2.0f));
-
-  Matcher<char> m3 = SafeMatcherCast<char>(TypedEq<int>('a'));
-  EXPECT_TRUE(m3.Matches('a'));
-  EXPECT_FALSE(m3.Matches('b'));
-}
-
-// Tests that SafeMatcherCast<T>(m) works when m is a Matcher<U> where T and U
-// are pointers or references to a derived and a base class, correspondingly.
-TEST(SafeMatcherCastTest, FromBaseClass) {
-  Derived d, d2;
-  Matcher<Base*> m1 = Eq(&d);
-  Matcher<Derived*> m2 = SafeMatcherCast<Derived*>(m1);
-  EXPECT_TRUE(m2.Matches(&d));
-  EXPECT_FALSE(m2.Matches(&d2));
-
-  Matcher<Base&> m3 = Ref(d);
-  Matcher<Derived&> m4 = SafeMatcherCast<Derived&>(m3);
-  EXPECT_TRUE(m4.Matches(d));
-  EXPECT_FALSE(m4.Matches(d2));
-}
-
-// Tests that SafeMatcherCast<T&>(m) works when m is a Matcher<const T&>.
-TEST(SafeMatcherCastTest, FromConstReferenceToReference) {
-  int n = 0;
-  Matcher<const int&> m1 = Ref(n);
-  Matcher<int&> m2 = SafeMatcherCast<int&>(m1);
-  int n1 = 0;
-  EXPECT_TRUE(m2.Matches(n));
-  EXPECT_FALSE(m2.Matches(n1));
-}
-
-// Tests that MatcherCast<const T&>(m) works when m is a Matcher<T>.
-TEST(SafeMatcherCastTest, FromNonReferenceToConstReference) {
-  Matcher<std::unique_ptr<int>> m1 = IsNull();
-  Matcher<const std::unique_ptr<int>&> m2 =
-      SafeMatcherCast<const std::unique_ptr<int>&>(m1);
-  EXPECT_TRUE(m2.Matches(std::unique_ptr<int>()));
-  EXPECT_FALSE(m2.Matches(std::unique_ptr<int>(new int)));
-}
-
-// Tests that SafeMatcherCast<T&>(m) works when m is a Matcher<T>.
-TEST(SafeMatcherCastTest, FromNonReferenceToReference) {
-  Matcher<int> m1 = Eq(0);
-  Matcher<int&> m2 = SafeMatcherCast<int&>(m1);
-  int n = 0;
-  EXPECT_TRUE(m2.Matches(n));
-  n = 1;
-  EXPECT_FALSE(m2.Matches(n));
-}
-
-// Tests that SafeMatcherCast<T>(m) works when m is a Matcher<T>.
-TEST(SafeMatcherCastTest, FromSameType) {
-  Matcher<int> m1 = Eq(0);
-  Matcher<int> m2 = SafeMatcherCast<int>(m1);
-  EXPECT_TRUE(m2.Matches(0));
-  EXPECT_FALSE(m2.Matches(1));
-}
-
-#if !defined _MSC_VER
-
-namespace convertible_from_any {
-TEST(SafeMatcherCastTest, ConversionConstructorIsUsed) {
-  Matcher<ConvertibleFromAny> m = SafeMatcherCast<ConvertibleFromAny>(1);
-  EXPECT_TRUE(m.Matches(ConvertibleFromAny(1)));
-  EXPECT_FALSE(m.Matches(ConvertibleFromAny(2)));
-}
-
-TEST(SafeMatcherCastTest, FromConvertibleFromAny) {
-  Matcher<ConvertibleFromAny> m =
-      SafeMatcherCast<ConvertibleFromAny>(Eq(ConvertibleFromAny(1)));
-  EXPECT_TRUE(m.Matches(ConvertibleFromAny(1)));
-  EXPECT_FALSE(m.Matches(ConvertibleFromAny(2)));
-}
-}  // namespace convertible_from_any
-
-#endif  // !defined _MSC_VER
-
-TEST(SafeMatcherCastTest, ValueIsNotCopied) {
-  int n = 42;
-  Matcher<IntReferenceWrapper> m = SafeMatcherCast<IntReferenceWrapper>(n);
-  // Verify that the matcher holds a reference to n, not to its temporary copy.
-  EXPECT_TRUE(m.Matches(n));
-}
-
-TEST(ExpectThat, TakesLiterals) {
-  EXPECT_THAT(1, 1);
-  EXPECT_THAT(1.0, 1.0);
-  EXPECT_THAT(std::string(), "");
-}
-
-TEST(ExpectThat, TakesFunctions) {
-  struct Helper {
-    static void Func() {}
-  };
-  void (*func)() = Helper::Func;
-  EXPECT_THAT(func, Helper::Func);
-  EXPECT_THAT(func, &Helper::Func);
-}
-
-// Tests that A<T>() matches any value of type T.
-TEST(ATest, MatchesAnyValue) {
-  // Tests a matcher for a value type.
-  Matcher<double> m1 = A<double>();
-  EXPECT_TRUE(m1.Matches(91.43));
-  EXPECT_TRUE(m1.Matches(-15.32));
-
-  // Tests a matcher for a reference type.
-  int a = 2;
-  int b = -6;
-  Matcher<int&> m2 = A<int&>();
-  EXPECT_TRUE(m2.Matches(a));
-  EXPECT_TRUE(m2.Matches(b));
-}
-
-TEST(ATest, WorksForDerivedClass) {
-  Base base;
-  Derived derived;
-  EXPECT_THAT(&base, A<Base*>());
-  // This shouldn't compile: EXPECT_THAT(&base, A<Derived*>());
-  EXPECT_THAT(&derived, A<Base*>());
-  EXPECT_THAT(&derived, A<Derived*>());
-}
-
-// Tests that A<T>() describes itself properly.
-TEST(ATest, CanDescribeSelf) { EXPECT_EQ("is anything", Describe(A<bool>())); }
-
-// Tests that An<T>() matches any value of type T.
-TEST(AnTest, MatchesAnyValue) {
-  // Tests a matcher for a value type.
-  Matcher<int> m1 = An<int>();
-  EXPECT_TRUE(m1.Matches(9143));
-  EXPECT_TRUE(m1.Matches(-1532));
-
-  // Tests a matcher for a reference type.
-  int a = 2;
-  int b = -6;
-  Matcher<int&> m2 = An<int&>();
-  EXPECT_TRUE(m2.Matches(a));
-  EXPECT_TRUE(m2.Matches(b));
-}
-
-// Tests that An<T>() describes itself properly.
-TEST(AnTest, CanDescribeSelf) { EXPECT_EQ("is anything", Describe(An<int>())); }
-
-// Tests that _ can be used as a matcher for any type and matches any
-// value of that type.
-TEST(UnderscoreTest, MatchesAnyValue) {
-  // Uses _ as a matcher for a value type.
-  Matcher<int> m1 = _;
-  EXPECT_TRUE(m1.Matches(123));
-  EXPECT_TRUE(m1.Matches(-242));
-
-  // Uses _ as a matcher for a reference type.
-  bool a = false;
-  const bool b = true;
-  Matcher<const bool&> m2 = _;
-  EXPECT_TRUE(m2.Matches(a));
-  EXPECT_TRUE(m2.Matches(b));
-}
-
-// Tests that _ describes itself properly.
-TEST(UnderscoreTest, CanDescribeSelf) {
-  Matcher<int> m = _;
-  EXPECT_EQ("is anything", Describe(m));
-}
-
-// Tests that Eq(x) matches any value equal to x.
-TEST(EqTest, MatchesEqualValue) {
-  // 2 C-strings with same content but different addresses.
-  const char a1[] = "hi";
-  const char a2[] = "hi";
-
-  Matcher<const char*> m1 = Eq(a1);
-  EXPECT_TRUE(m1.Matches(a1));
-  EXPECT_FALSE(m1.Matches(a2));
-}
-
-// Tests that Eq(v) describes itself properly.
-
-class Unprintable {
- public:
-  Unprintable() : c_('a') {}
-
-  bool operator==(const Unprintable& /* rhs */) const { return true; }
-  // -Wunused-private-field: dummy accessor for `c_`.
-  char dummy_c() { return c_; }
-
- private:
-  char c_;
-};
-
-TEST(EqTest, CanDescribeSelf) {
-  Matcher<Unprintable> m = Eq(Unprintable());
-  EXPECT_EQ("is equal to 1-byte object <61>", Describe(m));
-}
-
-// Tests that Eq(v) can be used to match any type that supports
-// comparing with type T, where T is v's type.
-TEST(EqTest, IsPolymorphic) {
-  Matcher<int> m1 = Eq(1);
-  EXPECT_TRUE(m1.Matches(1));
-  EXPECT_FALSE(m1.Matches(2));
-
-  Matcher<char> m2 = Eq(1);
-  EXPECT_TRUE(m2.Matches('\1'));
-  EXPECT_FALSE(m2.Matches('a'));
-}
-
-// Tests that TypedEq<T>(v) matches values of type T that's equal to v.
-TEST(TypedEqTest, ChecksEqualityForGivenType) {
-  Matcher<char> m1 = TypedEq<char>('a');
-  EXPECT_TRUE(m1.Matches('a'));
-  EXPECT_FALSE(m1.Matches('b'));
-
-  Matcher<int> m2 = TypedEq<int>(6);
-  EXPECT_TRUE(m2.Matches(6));
-  EXPECT_FALSE(m2.Matches(7));
-}
-
-// Tests that TypedEq(v) describes itself properly.
-TEST(TypedEqTest, CanDescribeSelf) {
-  EXPECT_EQ("is equal to 2", Describe(TypedEq<int>(2)));
-}
-
-// Tests that TypedEq<T>(v) has type Matcher<T>.
-
-// Type<T>::IsTypeOf(v) compiles if and only if the type of value v is T, where
-// T is a "bare" type (i.e. not in the form of const U or U&).  If v's type is
-// not T, the compiler will generate a message about "undefined reference".
-template <typename T>
-struct Type {
-  static bool IsTypeOf(const T& /* v */) { return true; }
-
-  template <typename T2>
-  static void IsTypeOf(T2 v);
-};
-
-TEST(TypedEqTest, HasSpecifiedType) {
-  // Verfies that the type of TypedEq<T>(v) is Matcher<T>.
-  Type<Matcher<int>>::IsTypeOf(TypedEq<int>(5));
-  Type<Matcher<double>>::IsTypeOf(TypedEq<double>(5));
-}
-
-// Tests that Ge(v) matches anything >= v.
-TEST(GeTest, ImplementsGreaterThanOrEqual) {
-  Matcher<int> m1 = Ge(0);
-  EXPECT_TRUE(m1.Matches(1));
-  EXPECT_TRUE(m1.Matches(0));
-  EXPECT_FALSE(m1.Matches(-1));
-}
-
-// Tests that Ge(v) describes itself properly.
-TEST(GeTest, CanDescribeSelf) {
-  Matcher<int> m = Ge(5);
-  EXPECT_EQ("is >= 5", Describe(m));
-}
-
-// Tests that Gt(v) matches anything > v.
-TEST(GtTest, ImplementsGreaterThan) {
-  Matcher<double> m1 = Gt(0);
-  EXPECT_TRUE(m1.Matches(1.0));
-  EXPECT_FALSE(m1.Matches(0.0));
-  EXPECT_FALSE(m1.Matches(-1.0));
-}
-
-// Tests that Gt(v) describes itself properly.
-TEST(GtTest, CanDescribeSelf) {
-  Matcher<int> m = Gt(5);
-  EXPECT_EQ("is > 5", Describe(m));
-}
-
-// Tests that Le(v) matches anything <= v.
-TEST(LeTest, ImplementsLessThanOrEqual) {
-  Matcher<char> m1 = Le('b');
-  EXPECT_TRUE(m1.Matches('a'));
-  EXPECT_TRUE(m1.Matches('b'));
-  EXPECT_FALSE(m1.Matches('c'));
-}
-
-// Tests that Le(v) describes itself properly.
-TEST(LeTest, CanDescribeSelf) {
-  Matcher<int> m = Le(5);
-  EXPECT_EQ("is <= 5", Describe(m));
-}
-
-// Tests that Lt(v) matches anything < v.
-TEST(LtTest, ImplementsLessThan) {
-  Matcher<const std::string&> m1 = Lt("Hello");
-  EXPECT_TRUE(m1.Matches("Abc"));
-  EXPECT_FALSE(m1.Matches("Hello"));
-  EXPECT_FALSE(m1.Matches("Hello, world!"));
-}
-
-// Tests that Lt(v) describes itself properly.
-TEST(LtTest, CanDescribeSelf) {
-  Matcher<int> m = Lt(5);
-  EXPECT_EQ("is < 5", Describe(m));
-}
-
-// Tests that Ne(v) matches anything != v.
-TEST(NeTest, ImplementsNotEqual) {
-  Matcher<int> m1 = Ne(0);
-  EXPECT_TRUE(m1.Matches(1));
-  EXPECT_TRUE(m1.Matches(-1));
-  EXPECT_FALSE(m1.Matches(0));
-}
-
-// Tests that Ne(v) describes itself properly.
-TEST(NeTest, CanDescribeSelf) {
-  Matcher<int> m = Ne(5);
-  EXPECT_EQ("isn't equal to 5", Describe(m));
-}
-
-class MoveOnly {
- public:
-  explicit MoveOnly(int i) : i_(i) {}
-  MoveOnly(const MoveOnly&) = delete;
-  MoveOnly(MoveOnly&&) = default;
-  MoveOnly& operator=(const MoveOnly&) = delete;
-  MoveOnly& operator=(MoveOnly&&) = default;
-
-  bool operator==(const MoveOnly& other) const { return i_ == other.i_; }
-  bool operator!=(const MoveOnly& other) const { return i_ != other.i_; }
-  bool operator<(const MoveOnly& other) const { return i_ < other.i_; }
-  bool operator<=(const MoveOnly& other) const { return i_ <= other.i_; }
-  bool operator>(const MoveOnly& other) const { return i_ > other.i_; }
-  bool operator>=(const MoveOnly& other) const { return i_ >= other.i_; }
-
- private:
-  int i_;
-};
-
-struct MoveHelper {
-  MOCK_METHOD1(Call, void(MoveOnly));
-};
-
-// Disable this test in VS 2015 (version 14), where it fails when SEH is enabled
-#if defined(_MSC_VER) && (_MSC_VER < 1910)
-TEST(ComparisonBaseTest, DISABLED_WorksWithMoveOnly) {
-#else
-TEST(ComparisonBaseTest, WorksWithMoveOnly) {
-#endif
-  MoveOnly m{0};
-  MoveHelper helper;
-
-  EXPECT_CALL(helper, Call(Eq(ByRef(m))));
-  helper.Call(MoveOnly(0));
-  EXPECT_CALL(helper, Call(Ne(ByRef(m))));
-  helper.Call(MoveOnly(1));
-  EXPECT_CALL(helper, Call(Le(ByRef(m))));
-  helper.Call(MoveOnly(0));
-  EXPECT_CALL(helper, Call(Lt(ByRef(m))));
-  helper.Call(MoveOnly(-1));
-  EXPECT_CALL(helper, Call(Ge(ByRef(m))));
-  helper.Call(MoveOnly(0));
-  EXPECT_CALL(helper, Call(Gt(ByRef(m))));
-  helper.Call(MoveOnly(1));
-}
-
-TEST(IsEmptyTest, MatchesContainer) {
-  const Matcher<std::vector<int>> m = IsEmpty();
-  std::vector<int> a = {};
-  std::vector<int> b = {1};
-  EXPECT_TRUE(m.Matches(a));
-  EXPECT_FALSE(m.Matches(b));
-}
-
-TEST(IsEmptyTest, MatchesStdString) {
-  const Matcher<std::string> m = IsEmpty();
-  std::string a = "z";
-  std::string b = "";
-  EXPECT_FALSE(m.Matches(a));
-  EXPECT_TRUE(m.Matches(b));
-}
-
-TEST(IsEmptyTest, MatchesCString) {
-  const Matcher<const char*> m = IsEmpty();
-  const char a[] = "";
-  const char b[] = "x";
-  EXPECT_TRUE(m.Matches(a));
-  EXPECT_FALSE(m.Matches(b));
-}
-
-// Tests that IsNull() matches any NULL pointer of any type.
-TEST(IsNullTest, MatchesNullPointer) {
-  Matcher<int*> m1 = IsNull();
-  int* p1 = nullptr;
-  int n = 0;
-  EXPECT_TRUE(m1.Matches(p1));
-  EXPECT_FALSE(m1.Matches(&n));
-
-  Matcher<const char*> m2 = IsNull();
-  const char* p2 = nullptr;
-  EXPECT_TRUE(m2.Matches(p2));
-  EXPECT_FALSE(m2.Matches("hi"));
-
-  Matcher<void*> m3 = IsNull();
-  void* p3 = nullptr;
-  EXPECT_TRUE(m3.Matches(p3));
-  EXPECT_FALSE(m3.Matches(reinterpret_cast<void*>(0xbeef)));
-}
-
-TEST(IsNullTest, StdFunction) {
-  const Matcher<std::function<void()>> m = IsNull();
-
-  EXPECT_TRUE(m.Matches(std::function<void()>()));
-  EXPECT_FALSE(m.Matches([] {}));
-}
-
-// Tests that IsNull() describes itself properly.
-TEST(IsNullTest, CanDescribeSelf) {
-  Matcher<int*> m = IsNull();
-  EXPECT_EQ("is NULL", Describe(m));
-  EXPECT_EQ("isn't NULL", DescribeNegation(m));
-}
-
-// Tests that NotNull() matches any non-NULL pointer of any type.
-TEST(NotNullTest, MatchesNonNullPointer) {
-  Matcher<int*> m1 = NotNull();
-  int* p1 = nullptr;
-  int n = 0;
-  EXPECT_FALSE(m1.Matches(p1));
-  EXPECT_TRUE(m1.Matches(&n));
-
-  Matcher<const char*> m2 = NotNull();
-  const char* p2 = nullptr;
-  EXPECT_FALSE(m2.Matches(p2));
-  EXPECT_TRUE(m2.Matches("hi"));
-}
-
-TEST(NotNullTest, LinkedPtr) {
-  const Matcher<std::shared_ptr<int>> m = NotNull();
-  const std::shared_ptr<int> null_p;
-  const std::shared_ptr<int> non_null_p(new int);
-
-  EXPECT_FALSE(m.Matches(null_p));
-  EXPECT_TRUE(m.Matches(non_null_p));
-}
-
-TEST(NotNullTest, ReferenceToConstLinkedPtr) {
-  const Matcher<const std::shared_ptr<double>&> m = NotNull();
-  const std::shared_ptr<double> null_p;
-  const std::shared_ptr<double> non_null_p(new double);
-
-  EXPECT_FALSE(m.Matches(null_p));
-  EXPECT_TRUE(m.Matches(non_null_p));
-}
-
-TEST(NotNullTest, StdFunction) {
-  const Matcher<std::function<void()>> m = NotNull();
-
-  EXPECT_TRUE(m.Matches([] {}));
-  EXPECT_FALSE(m.Matches(std::function<void()>()));
-}
-
-// Tests that NotNull() describes itself properly.
-TEST(NotNullTest, CanDescribeSelf) {
-  Matcher<int*> m = NotNull();
-  EXPECT_EQ("isn't NULL", Describe(m));
-}
-
-// Tests that Ref(variable) matches an argument that references
-// 'variable'.
-TEST(RefTest, MatchesSameVariable) {
-  int a = 0;
-  int b = 0;
-  Matcher<int&> m = Ref(a);
-  EXPECT_TRUE(m.Matches(a));
-  EXPECT_FALSE(m.Matches(b));
-}
-
-// Tests that Ref(variable) describes itself properly.
-TEST(RefTest, CanDescribeSelf) {
-  int n = 5;
-  Matcher<int&> m = Ref(n);
-  stringstream ss;
-  ss << "references the variable @" << &n << " 5";
-  EXPECT_EQ(ss.str(), Describe(m));
-}
-
-// Test that Ref(non_const_varialbe) can be used as a matcher for a
-// const reference.
-TEST(RefTest, CanBeUsedAsMatcherForConstReference) {
-  int a = 0;
-  int b = 0;
-  Matcher<const int&> m = Ref(a);
-  EXPECT_TRUE(m.Matches(a));
-  EXPECT_FALSE(m.Matches(b));
-}
-
-// Tests that Ref(variable) is covariant, i.e. Ref(derived) can be
-// used wherever Ref(base) can be used (Ref(derived) is a sub-type
-// of Ref(base), but not vice versa.
-
-TEST(RefTest, IsCovariant) {
-  Base base, base2;
-  Derived derived;
-  Matcher<const Base&> m1 = Ref(base);
-  EXPECT_TRUE(m1.Matches(base));
-  EXPECT_FALSE(m1.Matches(base2));
-  EXPECT_FALSE(m1.Matches(derived));
-
-  m1 = Ref(derived);
-  EXPECT_TRUE(m1.Matches(derived));
-  EXPECT_FALSE(m1.Matches(base));
-  EXPECT_FALSE(m1.Matches(base2));
-}
-
-TEST(RefTest, ExplainsResult) {
-  int n = 0;
-  EXPECT_THAT(Explain(Matcher<const int&>(Ref(n)), n),
-              StartsWith("which is located @"));
-
-  int m = 0;
-  EXPECT_THAT(Explain(Matcher<const int&>(Ref(n)), m),
-              StartsWith("which is located @"));
-}
-
-// Tests string comparison matchers.
-
-template <typename T = std::string>
-std::string FromStringLike(internal::StringLike<T> str) {
-  return std::string(str);
-}
-
-TEST(StringLike, TestConversions) {
-  EXPECT_EQ("foo", FromStringLike("foo"));
-  EXPECT_EQ("foo", FromStringLike(std::string("foo")));
-#if GTEST_INTERNAL_HAS_STRING_VIEW
-  EXPECT_EQ("foo", FromStringLike(internal::StringView("foo")));
-#endif  // GTEST_INTERNAL_HAS_STRING_VIEW
-
-  // Non deducible types.
-  EXPECT_EQ("", FromStringLike({}));
-  EXPECT_EQ("foo", FromStringLike({'f', 'o', 'o'}));
-  const char buf[] = "foo";
-  EXPECT_EQ("foo", FromStringLike({buf, buf + 3}));
-}
-
-TEST(StrEqTest, MatchesEqualString) {
-  Matcher<const char*> m = StrEq(std::string("Hello"));
-  EXPECT_TRUE(m.Matches("Hello"));
-  EXPECT_FALSE(m.Matches("hello"));
-  EXPECT_FALSE(m.Matches(nullptr));
-
-  Matcher<const std::string&> m2 = StrEq("Hello");
-  EXPECT_TRUE(m2.Matches("Hello"));
-  EXPECT_FALSE(m2.Matches("Hi"));
-
-#if GTEST_INTERNAL_HAS_STRING_VIEW
-  Matcher<const internal::StringView&> m3 =
-      StrEq(internal::StringView("Hello"));
-  EXPECT_TRUE(m3.Matches(internal::StringView("Hello")));
-  EXPECT_FALSE(m3.Matches(internal::StringView("hello")));
-  EXPECT_FALSE(m3.Matches(internal::StringView()));
-
-  Matcher<const internal::StringView&> m_empty = StrEq("");
-  EXPECT_TRUE(m_empty.Matches(internal::StringView("")));
-  EXPECT_TRUE(m_empty.Matches(internal::StringView()));
-  EXPECT_FALSE(m_empty.Matches(internal::StringView("hello")));
-#endif  // GTEST_INTERNAL_HAS_STRING_VIEW
-}
-
-TEST(StrEqTest, CanDescribeSelf) {
-  Matcher<std::string> m = StrEq("Hi-\'\"?\\\a\b\f\n\r\t\v\xD3");
-  EXPECT_EQ("is equal to \"Hi-\'\\\"?\\\\\\a\\b\\f\\n\\r\\t\\v\\xD3\"",
-            Describe(m));
-
-  std::string str("01204500800");
-  str[3] = '\0';
-  Matcher<std::string> m2 = StrEq(str);
-  EXPECT_EQ("is equal to \"012\\04500800\"", Describe(m2));
-  str[0] = str[6] = str[7] = str[9] = str[10] = '\0';
-  Matcher<std::string> m3 = StrEq(str);
-  EXPECT_EQ("is equal to \"\\012\\045\\0\\08\\0\\0\"", Describe(m3));
-}
-
-TEST(StrNeTest, MatchesUnequalString) {
-  Matcher<const char*> m = StrNe("Hello");
-  EXPECT_TRUE(m.Matches(""));
-  EXPECT_TRUE(m.Matches(nullptr));
-  EXPECT_FALSE(m.Matches("Hello"));
-
-  Matcher<std::string> m2 = StrNe(std::string("Hello"));
-  EXPECT_TRUE(m2.Matches("hello"));
-  EXPECT_FALSE(m2.Matches("Hello"));
-
-#if GTEST_INTERNAL_HAS_STRING_VIEW
-  Matcher<const internal::StringView> m3 = StrNe(internal::StringView("Hello"));
-  EXPECT_TRUE(m3.Matches(internal::StringView("")));
-  EXPECT_TRUE(m3.Matches(internal::StringView()));
-  EXPECT_FALSE(m3.Matches(internal::StringView("Hello")));
-#endif  // GTEST_INTERNAL_HAS_STRING_VIEW
-}
-
-TEST(StrNeTest, CanDescribeSelf) {
-  Matcher<const char*> m = StrNe("Hi");
-  EXPECT_EQ("isn't equal to \"Hi\"", Describe(m));
-}
-
-TEST(StrCaseEqTest, MatchesEqualStringIgnoringCase) {
-  Matcher<const char*> m = StrCaseEq(std::string("Hello"));
-  EXPECT_TRUE(m.Matches("Hello"));
-  EXPECT_TRUE(m.Matches("hello"));
-  EXPECT_FALSE(m.Matches("Hi"));
-  EXPECT_FALSE(m.Matches(nullptr));
-
-  Matcher<const std::string&> m2 = StrCaseEq("Hello");
-  EXPECT_TRUE(m2.Matches("hello"));
-  EXPECT_FALSE(m2.Matches("Hi"));
-
-#if GTEST_INTERNAL_HAS_STRING_VIEW
-  Matcher<const internal::StringView&> m3 =
-      StrCaseEq(internal::StringView("Hello"));
-  EXPECT_TRUE(m3.Matches(internal::StringView("Hello")));
-  EXPECT_TRUE(m3.Matches(internal::StringView("hello")));
-  EXPECT_FALSE(m3.Matches(internal::StringView("Hi")));
-  EXPECT_FALSE(m3.Matches(internal::StringView()));
-#endif  // GTEST_INTERNAL_HAS_STRING_VIEW
-}
-
-TEST(StrCaseEqTest, MatchesEqualStringWith0IgnoringCase) {
-  std::string str1("oabocdooeoo");
-  std::string str2("OABOCDOOEOO");
-  Matcher<const std::string&> m0 = StrCaseEq(str1);
-  EXPECT_FALSE(m0.Matches(str2 + std::string(1, '\0')));
-
-  str1[3] = str2[3] = '\0';
-  Matcher<const std::string&> m1 = StrCaseEq(str1);
-  EXPECT_TRUE(m1.Matches(str2));
-
-  str1[0] = str1[6] = str1[7] = str1[10] = '\0';
-  str2[0] = str2[6] = str2[7] = str2[10] = '\0';
-  Matcher<const std::string&> m2 = StrCaseEq(str1);
-  str1[9] = str2[9] = '\0';
-  EXPECT_FALSE(m2.Matches(str2));
-
-  Matcher<const std::string&> m3 = StrCaseEq(str1);
-  EXPECT_TRUE(m3.Matches(str2));
-
-  EXPECT_FALSE(m3.Matches(str2 + "x"));
-  str2.append(1, '\0');
-  EXPECT_FALSE(m3.Matches(str2));
-  EXPECT_FALSE(m3.Matches(std::string(str2, 0, 9)));
-}
-
-TEST(StrCaseEqTest, CanDescribeSelf) {
-  Matcher<std::string> m = StrCaseEq("Hi");
-  EXPECT_EQ("is equal to (ignoring case) \"Hi\"", Describe(m));
-}
-
-TEST(StrCaseNeTest, MatchesUnequalStringIgnoringCase) {
-  Matcher<const char*> m = StrCaseNe("Hello");
-  EXPECT_TRUE(m.Matches("Hi"));
-  EXPECT_TRUE(m.Matches(nullptr));
-  EXPECT_FALSE(m.Matches("Hello"));
-  EXPECT_FALSE(m.Matches("hello"));
-
-  Matcher<std::string> m2 = StrCaseNe(std::string("Hello"));
-  EXPECT_TRUE(m2.Matches(""));
-  EXPECT_FALSE(m2.Matches("Hello"));
-
-#if GTEST_INTERNAL_HAS_STRING_VIEW
-  Matcher<const internal::StringView> m3 =
-      StrCaseNe(internal::StringView("Hello"));
-  EXPECT_TRUE(m3.Matches(internal::StringView("Hi")));
-  EXPECT_TRUE(m3.Matches(internal::StringView()));
-  EXPECT_FALSE(m3.Matches(internal::StringView("Hello")));
-  EXPECT_FALSE(m3.Matches(internal::StringView("hello")));
-#endif  // GTEST_INTERNAL_HAS_STRING_VIEW
-}
-
-TEST(StrCaseNeTest, CanDescribeSelf) {
-  Matcher<const char*> m = StrCaseNe("Hi");
-  EXPECT_EQ("isn't equal to (ignoring case) \"Hi\"", Describe(m));
-}
-
-// Tests that HasSubstr() works for matching string-typed values.
-TEST(HasSubstrTest, WorksForStringClasses) {
-  const Matcher<std::string> m1 = HasSubstr("foo");
-  EXPECT_TRUE(m1.Matches(std::string("I love food.")));
-  EXPECT_FALSE(m1.Matches(std::string("tofo")));
-
-  const Matcher<const std::string&> m2 = HasSubstr("foo");
-  EXPECT_TRUE(m2.Matches(std::string("I love food.")));
-  EXPECT_FALSE(m2.Matches(std::string("tofo")));
-
-  const Matcher<std::string> m_empty = HasSubstr("");
-  EXPECT_TRUE(m_empty.Matches(std::string()));
-  EXPECT_TRUE(m_empty.Matches(std::string("not empty")));
-}
-
-// Tests that HasSubstr() works for matching C-string-typed values.
-TEST(HasSubstrTest, WorksForCStrings) {
-  const Matcher<char*> m1 = HasSubstr("foo");
-  EXPECT_TRUE(m1.Matches(const_cast<char*>("I love food.")));
-  EXPECT_FALSE(m1.Matches(const_cast<char*>("tofo")));
-  EXPECT_FALSE(m1.Matches(nullptr));
-
-  const Matcher<const char*> m2 = HasSubstr("foo");
-  EXPECT_TRUE(m2.Matches("I love food."));
-  EXPECT_FALSE(m2.Matches("tofo"));
-  EXPECT_FALSE(m2.Matches(nullptr));
-
-  const Matcher<const char*> m_empty = HasSubstr("");
-  EXPECT_TRUE(m_empty.Matches("not empty"));
-  EXPECT_TRUE(m_empty.Matches(""));
-  EXPECT_FALSE(m_empty.Matches(nullptr));
-}
-
-#if GTEST_INTERNAL_HAS_STRING_VIEW
-// Tests that HasSubstr() works for matching StringView-typed values.
-TEST(HasSubstrTest, WorksForStringViewClasses) {
-  const Matcher<internal::StringView> m1 =
-      HasSubstr(internal::StringView("foo"));
-  EXPECT_TRUE(m1.Matches(internal::StringView("I love food.")));
-  EXPECT_FALSE(m1.Matches(internal::StringView("tofo")));
-  EXPECT_FALSE(m1.Matches(internal::StringView()));
-
-  const Matcher<const internal::StringView&> m2 = HasSubstr("foo");
-  EXPECT_TRUE(m2.Matches(internal::StringView("I love food.")));
-  EXPECT_FALSE(m2.Matches(internal::StringView("tofo")));
-  EXPECT_FALSE(m2.Matches(internal::StringView()));
-
-  const Matcher<const internal::StringView&> m3 = HasSubstr("");
-  EXPECT_TRUE(m3.Matches(internal::StringView("foo")));
-  EXPECT_TRUE(m3.Matches(internal::StringView("")));
-  EXPECT_TRUE(m3.Matches(internal::StringView()));
-}
-#endif  // GTEST_INTERNAL_HAS_STRING_VIEW
-
-// Tests that HasSubstr(s) describes itself properly.
-TEST(HasSubstrTest, CanDescribeSelf) {
-  Matcher<std::string> m = HasSubstr("foo\n\"");
-  EXPECT_EQ("has substring \"foo\\n\\\"\"", Describe(m));
-}
-
-INSTANTIATE_GTEST_MATCHER_TEST_P(KeyTest);
-
-TEST(KeyTest, CanDescribeSelf) {
-  Matcher<const pair<std::string, int>&> m = Key("foo");
-  EXPECT_EQ("has a key that is equal to \"foo\"", Describe(m));
-  EXPECT_EQ("doesn't have a key that is equal to \"foo\"", DescribeNegation(m));
-}
-
-TEST_P(KeyTestP, ExplainsResult) {
-  Matcher<pair<int, bool>> m = Key(GreaterThan(10));
-  EXPECT_EQ("whose first field is a value which is 5 less than 10",
-            Explain(m, make_pair(5, true)));
-  EXPECT_EQ("whose first field is a value which is 5 more than 10",
-            Explain(m, make_pair(15, true)));
-}
-
-TEST(KeyTest, MatchesCorrectly) {
-  pair<int, std::string> p(25, "foo");
-  EXPECT_THAT(p, Key(25));
-  EXPECT_THAT(p, Not(Key(42)));
-  EXPECT_THAT(p, Key(Ge(20)));
-  EXPECT_THAT(p, Not(Key(Lt(25))));
-}
-
-TEST(KeyTest, WorksWithMoveOnly) {
-  pair<std::unique_ptr<int>, std::unique_ptr<int>> p;
-  EXPECT_THAT(p, Key(Eq(nullptr)));
-}
-
-INSTANTIATE_GTEST_MATCHER_TEST_P(PairTest);
-
-template <size_t I>
-struct Tag {};
-
-struct PairWithGet {
-  int member_1;
-  std::string member_2;
-  using first_type = int;
-  using second_type = std::string;
-
-  const int& GetImpl(Tag<0>) const { return member_1; }
-  const std::string& GetImpl(Tag<1>) const { return member_2; }
-};
-template <size_t I>
-auto get(const PairWithGet& value) -> decltype(value.GetImpl(Tag<I>())) {
-  return value.GetImpl(Tag<I>());
-}
-TEST(PairTest, MatchesPairWithGetCorrectly) {
-  PairWithGet p{25, "foo"};
-  EXPECT_THAT(p, Key(25));
-  EXPECT_THAT(p, Not(Key(42)));
-  EXPECT_THAT(p, Key(Ge(20)));
-  EXPECT_THAT(p, Not(Key(Lt(25))));
-
-  std::vector<PairWithGet> v = {{11, "Foo"}, {29, "gMockIsBestMock"}};
-  EXPECT_THAT(v, Contains(Key(29)));
-}
-
-TEST(KeyTest, SafelyCastsInnerMatcher) {
-  Matcher<int> is_positive = Gt(0);
-  Matcher<int> is_negative = Lt(0);
-  pair<char, bool> p('a', true);
-  EXPECT_THAT(p, Key(is_positive));
-  EXPECT_THAT(p, Not(Key(is_negative)));
-}
-
-TEST(KeyTest, InsideContainsUsingMap) {
-  map<int, char> container;
-  container.insert(make_pair(1, 'a'));
-  container.insert(make_pair(2, 'b'));
-  container.insert(make_pair(4, 'c'));
-  EXPECT_THAT(container, Contains(Key(1)));
-  EXPECT_THAT(container, Not(Contains(Key(3))));
-}
-
-TEST(KeyTest, InsideContainsUsingMultimap) {
-  multimap<int, char> container;
-  container.insert(make_pair(1, 'a'));
-  container.insert(make_pair(2, 'b'));
-  container.insert(make_pair(4, 'c'));
-
-  EXPECT_THAT(container, Not(Contains(Key(25))));
-  container.insert(make_pair(25, 'd'));
-  EXPECT_THAT(container, Contains(Key(25)));
-  container.insert(make_pair(25, 'e'));
-  EXPECT_THAT(container, Contains(Key(25)));
-
-  EXPECT_THAT(container, Contains(Key(1)));
-  EXPECT_THAT(container, Not(Contains(Key(3))));
-}
-
-TEST(PairTest, Typing) {
-  // Test verifies the following type conversions can be compiled.
-  Matcher<const pair<const char*, int>&> m1 = Pair("foo", 42);
-  Matcher<const pair<const char*, int>> m2 = Pair("foo", 42);
-  Matcher<pair<const char*, int>> m3 = Pair("foo", 42);
-
-  Matcher<pair<int, const std::string>> m4 = Pair(25, "42");
-  Matcher<pair<const std::string, int>> m5 = Pair("25", 42);
-}
-
-TEST(PairTest, CanDescribeSelf) {
-  Matcher<const pair<std::string, int>&> m1 = Pair("foo", 42);
-  EXPECT_EQ(
-      "has a first field that is equal to \"foo\""
-      ", and has a second field that is equal to 42",
-      Describe(m1));
-  EXPECT_EQ(
-      "has a first field that isn't equal to \"foo\""
-      ", or has a second field that isn't equal to 42",
-      DescribeNegation(m1));
-  // Double and triple negation (1 or 2 times not and description of negation).
-  Matcher<const pair<int, int>&> m2 = Not(Pair(Not(13), 42));
-  EXPECT_EQ(
-      "has a first field that isn't equal to 13"
-      ", and has a second field that is equal to 42",
-      DescribeNegation(m2));
-}
-
-TEST_P(PairTestP, CanExplainMatchResultTo) {
-  // If neither field matches, Pair() should explain about the first
-  // field.
-  const Matcher<pair<int, int>> m = Pair(GreaterThan(0), GreaterThan(0));
-  EXPECT_EQ("whose first field does not match, which is 1 less than 0",
-            Explain(m, make_pair(-1, -2)));
-
-  // If the first field matches but the second doesn't, Pair() should
-  // explain about the second field.
-  EXPECT_EQ("whose second field does not match, which is 2 less than 0",
-            Explain(m, make_pair(1, -2)));
-
-  // If the first field doesn't match but the second does, Pair()
-  // should explain about the first field.
-  EXPECT_EQ("whose first field does not match, which is 1 less than 0",
-            Explain(m, make_pair(-1, 2)));
-
-  // If both fields match, Pair() should explain about them both.
-  EXPECT_EQ(
-      "whose both fields match, where the first field is a value "
-      "which is 1 more than 0, and the second field is a value "
-      "which is 2 more than 0",
-      Explain(m, make_pair(1, 2)));
-
-  // If only the first match has an explanation, only this explanation should
-  // be printed.
-  const Matcher<pair<int, int>> explain_first = Pair(GreaterThan(0), 0);
-  EXPECT_EQ(
-      "whose both fields match, where the first field is a value "
-      "which is 1 more than 0",
-      Explain(explain_first, make_pair(1, 0)));
-
-  // If only the second match has an explanation, only this explanation should
-  // be printed.
-  const Matcher<pair<int, int>> explain_second = Pair(0, GreaterThan(0));
-  EXPECT_EQ(
-      "whose both fields match, where the second field is a value "
-      "which is 1 more than 0",
-      Explain(explain_second, make_pair(0, 1)));
-}
-
-TEST(PairTest, MatchesCorrectly) {
-  pair<int, std::string> p(25, "foo");
-
-  // Both fields match.
-  EXPECT_THAT(p, Pair(25, "foo"));
-  EXPECT_THAT(p, Pair(Ge(20), HasSubstr("o")));
-
-  // 'first' doesnt' match, but 'second' matches.
-  EXPECT_THAT(p, Not(Pair(42, "foo")));
-  EXPECT_THAT(p, Not(Pair(Lt(25), "foo")));
-
-  // 'first' matches, but 'second' doesn't match.
-  EXPECT_THAT(p, Not(Pair(25, "bar")));
-  EXPECT_THAT(p, Not(Pair(25, Not("foo"))));
-
-  // Neither field matches.
-  EXPECT_THAT(p, Not(Pair(13, "bar")));
-  EXPECT_THAT(p, Not(Pair(Lt(13), HasSubstr("a"))));
-}
-
-TEST(PairTest, WorksWithMoveOnly) {
-  pair<std::unique_ptr<int>, std::unique_ptr<int>> p;
-  p.second.reset(new int(7));
-  EXPECT_THAT(p, Pair(Eq(nullptr), Ne(nullptr)));
-}
-
-TEST(PairTest, SafelyCastsInnerMatchers) {
-  Matcher<int> is_positive = Gt(0);
-  Matcher<int> is_negative = Lt(0);
-  pair<char, bool> p('a', true);
-  EXPECT_THAT(p, Pair(is_positive, _));
-  EXPECT_THAT(p, Not(Pair(is_negative, _)));
-  EXPECT_THAT(p, Pair(_, is_positive));
-  EXPECT_THAT(p, Not(Pair(_, is_negative)));
-}
-
-TEST(PairTest, InsideContainsUsingMap) {
-  map<int, char> container;
-  container.insert(make_pair(1, 'a'));
-  container.insert(make_pair(2, 'b'));
-  container.insert(make_pair(4, 'c'));
-  EXPECT_THAT(container, Contains(Pair(1, 'a')));
-  EXPECT_THAT(container, Contains(Pair(1, _)));
-  EXPECT_THAT(container, Contains(Pair(_, 'a')));
-  EXPECT_THAT(container, Not(Contains(Pair(3, _))));
-}
-
-INSTANTIATE_GTEST_MATCHER_TEST_P(FieldsAreTest);
-
-TEST(FieldsAreTest, MatchesCorrectly) {
-  std::tuple<int, std::string, double> p(25, "foo", .5);
-
-  // All fields match.
-  EXPECT_THAT(p, FieldsAre(25, "foo", .5));
-  EXPECT_THAT(p, FieldsAre(Ge(20), HasSubstr("o"), DoubleEq(.5)));
-
-  // Some don't match.
-  EXPECT_THAT(p, Not(FieldsAre(26, "foo", .5)));
-  EXPECT_THAT(p, Not(FieldsAre(25, "fo", .5)));
-  EXPECT_THAT(p, Not(FieldsAre(25, "foo", .6)));
-}
-
-TEST(FieldsAreTest, CanDescribeSelf) {
-  Matcher<const pair<std::string, int>&> m1 = FieldsAre("foo", 42);
-  EXPECT_EQ(
-      "has field #0 that is equal to \"foo\""
-      ", and has field #1 that is equal to 42",
-      Describe(m1));
-  EXPECT_EQ(
-      "has field #0 that isn't equal to \"foo\""
-      ", or has field #1 that isn't equal to 42",
-      DescribeNegation(m1));
-}
-
-TEST_P(FieldsAreTestP, CanExplainMatchResultTo) {
-  // The first one that fails is the one that gives the error.
-  Matcher<std::tuple<int, int, int>> m =
-      FieldsAre(GreaterThan(0), GreaterThan(0), GreaterThan(0));
-
-  EXPECT_EQ("whose field #0 does not match, which is 1 less than 0",
-            Explain(m, std::make_tuple(-1, -2, -3)));
-  EXPECT_EQ("whose field #1 does not match, which is 2 less than 0",
-            Explain(m, std::make_tuple(1, -2, -3)));
-  EXPECT_EQ("whose field #2 does not match, which is 3 less than 0",
-            Explain(m, std::make_tuple(1, 2, -3)));
-
-  // If they all match, we get a long explanation of success.
-  EXPECT_EQ(
-      "whose all elements match, "
-      "where field #0 is a value which is 1 more than 0"
-      ", and field #1 is a value which is 2 more than 0"
-      ", and field #2 is a value which is 3 more than 0",
-      Explain(m, std::make_tuple(1, 2, 3)));
-
-  // Only print those that have an explanation.
-  m = FieldsAre(GreaterThan(0), 0, GreaterThan(0));
-  EXPECT_EQ(
-      "whose all elements match, "
-      "where field #0 is a value which is 1 more than 0"
-      ", and field #2 is a value which is 3 more than 0",
-      Explain(m, std::make_tuple(1, 0, 3)));
-
-  // If only one has an explanation, then print that one.
-  m = FieldsAre(0, GreaterThan(0), 0);
-  EXPECT_EQ(
-      "whose all elements match, "
-      "where field #1 is a value which is 1 more than 0",
-      Explain(m, std::make_tuple(0, 1, 0)));
-}
-
-#if defined(__cpp_structured_bindings) && __cpp_structured_bindings >= 201606
-TEST(FieldsAreTest, StructuredBindings) {
-  // testing::FieldsAre can also match aggregates and such with C++17 and up.
-  struct MyType {
-    int i;
-    std::string str;
-  };
-  EXPECT_THAT((MyType{17, "foo"}), FieldsAre(Eq(17), HasSubstr("oo")));
-
-  // Test all the supported arities.
-  struct MyVarType1 {
-    int a;
-  };
-  EXPECT_THAT(MyVarType1{}, FieldsAre(0));
-  struct MyVarType2 {
-    int a, b;
-  };
-  EXPECT_THAT(MyVarType2{}, FieldsAre(0, 0));
-  struct MyVarType3 {
-    int a, b, c;
-  };
-  EXPECT_THAT(MyVarType3{}, FieldsAre(0, 0, 0));
-  struct MyVarType4 {
-    int a, b, c, d;
-  };
-  EXPECT_THAT(MyVarType4{}, FieldsAre(0, 0, 0, 0));
-  struct MyVarType5 {
-    int a, b, c, d, e;
-  };
-  EXPECT_THAT(MyVarType5{}, FieldsAre(0, 0, 0, 0, 0));
-  struct MyVarType6 {
-    int a, b, c, d, e, f;
-  };
-  EXPECT_THAT(MyVarType6{}, FieldsAre(0, 0, 0, 0, 0, 0));
-  struct MyVarType7 {
-    int a, b, c, d, e, f, g;
-  };
-  EXPECT_THAT(MyVarType7{}, FieldsAre(0, 0, 0, 0, 0, 0, 0));
-  struct MyVarType8 {
-    int a, b, c, d, e, f, g, h;
-  };
-  EXPECT_THAT(MyVarType8{}, FieldsAre(0, 0, 0, 0, 0, 0, 0, 0));
-  struct MyVarType9 {
-    int a, b, c, d, e, f, g, h, i;
-  };
-  EXPECT_THAT(MyVarType9{}, FieldsAre(0, 0, 0, 0, 0, 0, 0, 0, 0));
-  struct MyVarType10 {
-    int a, b, c, d, e, f, g, h, i, j;
-  };
-  EXPECT_THAT(MyVarType10{}, FieldsAre(0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
-  struct MyVarType11 {
-    int a, b, c, d, e, f, g, h, i, j, k;
-  };
-  EXPECT_THAT(MyVarType11{}, FieldsAre(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
-  struct MyVarType12 {
-    int a, b, c, d, e, f, g, h, i, j, k, l;
-  };
-  EXPECT_THAT(MyVarType12{}, FieldsAre(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
-  struct MyVarType13 {
-    int a, b, c, d, e, f, g, h, i, j, k, l, m;
-  };
-  EXPECT_THAT(MyVarType13{}, FieldsAre(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
-  struct MyVarType14 {
-    int a, b, c, d, e, f, g, h, i, j, k, l, m, n;
-  };
-  EXPECT_THAT(MyVarType14{},
-              FieldsAre(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
-  struct MyVarType15 {
-    int a, b, c, d, e, f, g, h, i, j, k, l, m, n, o;
-  };
-  EXPECT_THAT(MyVarType15{},
-              FieldsAre(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
-  struct MyVarType16 {
-    int a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p;
-  };
-  EXPECT_THAT(MyVarType16{},
-              FieldsAre(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
-  struct MyVarType17 {
-    int a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q;
-  };
-  EXPECT_THAT(MyVarType17{},
-              FieldsAre(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
-  struct MyVarType18 {
-    int a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r;
-  };
-  EXPECT_THAT(MyVarType18{},
-              FieldsAre(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
-  struct MyVarType19 {
-    int a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s;
-  };
-  EXPECT_THAT(MyVarType19{}, FieldsAre(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-                                       0, 0, 0, 0, 0));
-}
-#endif
-
-TEST(PairTest, UseGetInsteadOfMembers) {
-  PairWithGet pair{7, "ABC"};
-  EXPECT_THAT(pair, Pair(7, "ABC"));
-  EXPECT_THAT(pair, Pair(Ge(7), HasSubstr("AB")));
-  EXPECT_THAT(pair, Not(Pair(Lt(7), "ABC")));
-
-  std::vector<PairWithGet> v = {{11, "Foo"}, {29, "gMockIsBestMock"}};
-  EXPECT_THAT(v,
-              ElementsAre(Pair(11, std::string("Foo")), Pair(Ge(10), Not(""))));
-}
-
-// Tests StartsWith(s).
-
-TEST(StartsWithTest, MatchesStringWithGivenPrefix) {
-  const Matcher<const char*> m1 = StartsWith(std::string(""));
-  EXPECT_TRUE(m1.Matches("Hi"));
-  EXPECT_TRUE(m1.Matches(""));
-  EXPECT_FALSE(m1.Matches(nullptr));
-
-  const Matcher<const std::string&> m2 = StartsWith("Hi");
-  EXPECT_TRUE(m2.Matches("Hi"));
-  EXPECT_TRUE(m2.Matches("Hi Hi!"));
-  EXPECT_TRUE(m2.Matches("High"));
-  EXPECT_FALSE(m2.Matches("H"));
-  EXPECT_FALSE(m2.Matches(" Hi"));
-
-#if GTEST_INTERNAL_HAS_STRING_VIEW
-  const Matcher<internal::StringView> m_empty =
-      StartsWith(internal::StringView(""));
-  EXPECT_TRUE(m_empty.Matches(internal::StringView()));
-  EXPECT_TRUE(m_empty.Matches(internal::StringView("")));
-  EXPECT_TRUE(m_empty.Matches(internal::StringView("not empty")));
-#endif  // GTEST_INTERNAL_HAS_STRING_VIEW
-}
-
-TEST(StartsWithTest, CanDescribeSelf) {
-  Matcher<const std::string> m = StartsWith("Hi");
-  EXPECT_EQ("starts with \"Hi\"", Describe(m));
-}
-
-// Tests EndsWith(s).
-
-TEST(EndsWithTest, MatchesStringWithGivenSuffix) {
-  const Matcher<const char*> m1 = EndsWith("");
-  EXPECT_TRUE(m1.Matches("Hi"));
-  EXPECT_TRUE(m1.Matches(""));
-  EXPECT_FALSE(m1.Matches(nullptr));
-
-  const Matcher<const std::string&> m2 = EndsWith(std::string("Hi"));
-  EXPECT_TRUE(m2.Matches("Hi"));
-  EXPECT_TRUE(m2.Matches("Wow Hi Hi"));
-  EXPECT_TRUE(m2.Matches("Super Hi"));
-  EXPECT_FALSE(m2.Matches("i"));
-  EXPECT_FALSE(m2.Matches("Hi "));
-
-#if GTEST_INTERNAL_HAS_STRING_VIEW
-  const Matcher<const internal::StringView&> m4 =
-      EndsWith(internal::StringView(""));
-  EXPECT_TRUE(m4.Matches("Hi"));
-  EXPECT_TRUE(m4.Matches(""));
-  EXPECT_TRUE(m4.Matches(internal::StringView()));
-  EXPECT_TRUE(m4.Matches(internal::StringView("")));
-#endif  // GTEST_INTERNAL_HAS_STRING_VIEW
-}
-
-TEST(EndsWithTest, CanDescribeSelf) {
-  Matcher<const std::string> m = EndsWith("Hi");
-  EXPECT_EQ("ends with \"Hi\"", Describe(m));
-}
-
-// Tests WhenBase64Unescaped.
-
-TEST(WhenBase64UnescapedTest, MatchesUnescapedBase64Strings) {
-  const Matcher<const char*> m1 = WhenBase64Unescaped(EndsWith("!"));
-  EXPECT_FALSE(m1.Matches("invalid base64"));
-  EXPECT_FALSE(m1.Matches("aGVsbG8gd29ybGQ="));  // hello world
-  EXPECT_TRUE(m1.Matches("aGVsbG8gd29ybGQh"));   // hello world!
-
-  const Matcher<const std::string&> m2 = WhenBase64Unescaped(EndsWith("!"));
-  EXPECT_FALSE(m2.Matches("invalid base64"));
-  EXPECT_FALSE(m2.Matches("aGVsbG8gd29ybGQ="));  // hello world
-  EXPECT_TRUE(m2.Matches("aGVsbG8gd29ybGQh"));   // hello world!
-
-#if GTEST_INTERNAL_HAS_STRING_VIEW
-  const Matcher<const internal::StringView&> m3 =
-      WhenBase64Unescaped(EndsWith("!"));
-  EXPECT_FALSE(m3.Matches("invalid base64"));
-  EXPECT_FALSE(m3.Matches("aGVsbG8gd29ybGQ="));  // hello world
-  EXPECT_TRUE(m3.Matches("aGVsbG8gd29ybGQh"));   // hello world!
-#endif  // GTEST_INTERNAL_HAS_STRING_VIEW
-}
-
-TEST(WhenBase64UnescapedTest, CanDescribeSelf) {
-  const Matcher<const char*> m = WhenBase64Unescaped(EndsWith("!"));
-  EXPECT_EQ("matches after Base64Unescape ends with \"!\"", Describe(m));
-}
-
-// Tests MatchesRegex().
-
-TEST(MatchesRegexTest, MatchesStringMatchingGivenRegex) {
-  const Matcher<const char*> m1 = MatchesRegex("a.*z");
-  EXPECT_TRUE(m1.Matches("az"));
-  EXPECT_TRUE(m1.Matches("abcz"));
-  EXPECT_FALSE(m1.Matches(nullptr));
-
-  const Matcher<const std::string&> m2 = MatchesRegex(new RE("a.*z"));
-  EXPECT_TRUE(m2.Matches("azbz"));
-  EXPECT_FALSE(m2.Matches("az1"));
-  EXPECT_FALSE(m2.Matches("1az"));
-
-#if GTEST_INTERNAL_HAS_STRING_VIEW
-  const Matcher<const internal::StringView&> m3 = MatchesRegex("a.*z");
-  EXPECT_TRUE(m3.Matches(internal::StringView("az")));
-  EXPECT_TRUE(m3.Matches(internal::StringView("abcz")));
-  EXPECT_FALSE(m3.Matches(internal::StringView("1az")));
-  EXPECT_FALSE(m3.Matches(internal::StringView()));
-  const Matcher<const internal::StringView&> m4 =
-      MatchesRegex(internal::StringView(""));
-  EXPECT_TRUE(m4.Matches(internal::StringView("")));
-  EXPECT_TRUE(m4.Matches(internal::StringView()));
-#endif  // GTEST_INTERNAL_HAS_STRING_VIEW
-}
-
-TEST(MatchesRegexTest, CanDescribeSelf) {
-  Matcher<const std::string> m1 = MatchesRegex(std::string("Hi.*"));
-  EXPECT_EQ("matches regular expression \"Hi.*\"", Describe(m1));
-
-  Matcher<const char*> m2 = MatchesRegex(new RE("a.*"));
-  EXPECT_EQ("matches regular expression \"a.*\"", Describe(m2));
-
-#if GTEST_INTERNAL_HAS_STRING_VIEW
-  Matcher<const internal::StringView> m3 = MatchesRegex(new RE("0.*"));
-  EXPECT_EQ("matches regular expression \"0.*\"", Describe(m3));
-#endif  // GTEST_INTERNAL_HAS_STRING_VIEW
-}
-
-// Tests ContainsRegex().
-
-TEST(ContainsRegexTest, MatchesStringContainingGivenRegex) {
-  const Matcher<const char*> m1 = ContainsRegex(std::string("a.*z"));
-  EXPECT_TRUE(m1.Matches("az"));
-  EXPECT_TRUE(m1.Matches("0abcz1"));
-  EXPECT_FALSE(m1.Matches(nullptr));
-
-  const Matcher<const std::string&> m2 = ContainsRegex(new RE("a.*z"));
-  EXPECT_TRUE(m2.Matches("azbz"));
-  EXPECT_TRUE(m2.Matches("az1"));
-  EXPECT_FALSE(m2.Matches("1a"));
-
-#if GTEST_INTERNAL_HAS_STRING_VIEW
-  const Matcher<const internal::StringView&> m3 = ContainsRegex(new RE("a.*z"));
-  EXPECT_TRUE(m3.Matches(internal::StringView("azbz")));
-  EXPECT_TRUE(m3.Matches(internal::StringView("az1")));
-  EXPECT_FALSE(m3.Matches(internal::StringView("1a")));
-  EXPECT_FALSE(m3.Matches(internal::StringView()));
-  const Matcher<const internal::StringView&> m4 =
-      ContainsRegex(internal::StringView(""));
-  EXPECT_TRUE(m4.Matches(internal::StringView("")));
-  EXPECT_TRUE(m4.Matches(internal::StringView()));
-#endif  // GTEST_INTERNAL_HAS_STRING_VIEW
-}
-
-TEST(ContainsRegexTest, CanDescribeSelf) {
-  Matcher<const std::string> m1 = ContainsRegex("Hi.*");
-  EXPECT_EQ("contains regular expression \"Hi.*\"", Describe(m1));
-
-  Matcher<const char*> m2 = ContainsRegex(new RE("a.*"));
-  EXPECT_EQ("contains regular expression \"a.*\"", Describe(m2));
-
-#if GTEST_INTERNAL_HAS_STRING_VIEW
-  Matcher<const internal::StringView> m3 = ContainsRegex(new RE("0.*"));
-  EXPECT_EQ("contains regular expression \"0.*\"", Describe(m3));
-#endif  // GTEST_INTERNAL_HAS_STRING_VIEW
-}
-
-// Tests for wide strings.
-#if GTEST_HAS_STD_WSTRING
-TEST(StdWideStrEqTest, MatchesEqual) {
-  Matcher<const wchar_t*> m = StrEq(::std::wstring(L"Hello"));
-  EXPECT_TRUE(m.Matches(L"Hello"));
-  EXPECT_FALSE(m.Matches(L"hello"));
-  EXPECT_FALSE(m.Matches(nullptr));
-
-  Matcher<const ::std::wstring&> m2 = StrEq(L"Hello");
-  EXPECT_TRUE(m2.Matches(L"Hello"));
-  EXPECT_FALSE(m2.Matches(L"Hi"));
-
-  Matcher<const ::std::wstring&> m3 = StrEq(L"\xD3\x576\x8D3\xC74D");
-  EXPECT_TRUE(m3.Matches(L"\xD3\x576\x8D3\xC74D"));
-  EXPECT_FALSE(m3.Matches(L"\xD3\x576\x8D3\xC74E"));
-
-  ::std::wstring str(L"01204500800");
-  str[3] = L'\0';
-  Matcher<const ::std::wstring&> m4 = StrEq(str);
-  EXPECT_TRUE(m4.Matches(str));
-  str[0] = str[6] = str[7] = str[9] = str[10] = L'\0';
-  Matcher<const ::std::wstring&> m5 = StrEq(str);
-  EXPECT_TRUE(m5.Matches(str));
-}
-
-TEST(StdWideStrEqTest, CanDescribeSelf) {
-  Matcher<::std::wstring> m = StrEq(L"Hi-\'\"?\\\a\b\f\n\r\t\v");
-  EXPECT_EQ("is equal to L\"Hi-\'\\\"?\\\\\\a\\b\\f\\n\\r\\t\\v\"",
-            Describe(m));
-
-  Matcher<::std::wstring> m2 = StrEq(L"\xD3\x576\x8D3\xC74D");
-  EXPECT_EQ("is equal to L\"\\xD3\\x576\\x8D3\\xC74D\"", Describe(m2));
-
-  ::std::wstring str(L"01204500800");
-  str[3] = L'\0';
-  Matcher<const ::std::wstring&> m4 = StrEq(str);
-  EXPECT_EQ("is equal to L\"012\\04500800\"", Describe(m4));
-  str[0] = str[6] = str[7] = str[9] = str[10] = L'\0';
-  Matcher<const ::std::wstring&> m5 = StrEq(str);
-  EXPECT_EQ("is equal to L\"\\012\\045\\0\\08\\0\\0\"", Describe(m5));
-}
-
-TEST(StdWideStrNeTest, MatchesUnequalString) {
-  Matcher<const wchar_t*> m = StrNe(L"Hello");
-  EXPECT_TRUE(m.Matches(L""));
-  EXPECT_TRUE(m.Matches(nullptr));
-  EXPECT_FALSE(m.Matches(L"Hello"));
-
-  Matcher<::std::wstring> m2 = StrNe(::std::wstring(L"Hello"));
-  EXPECT_TRUE(m2.Matches(L"hello"));
-  EXPECT_FALSE(m2.Matches(L"Hello"));
-}
-
-TEST(StdWideStrNeTest, CanDescribeSelf) {
-  Matcher<const wchar_t*> m = StrNe(L"Hi");
-  EXPECT_EQ("isn't equal to L\"Hi\"", Describe(m));
-}
-
-TEST(StdWideStrCaseEqTest, MatchesEqualStringIgnoringCase) {
-  Matcher<const wchar_t*> m = StrCaseEq(::std::wstring(L"Hello"));
-  EXPECT_TRUE(m.Matches(L"Hello"));
-  EXPECT_TRUE(m.Matches(L"hello"));
-  EXPECT_FALSE(m.Matches(L"Hi"));
-  EXPECT_FALSE(m.Matches(nullptr));
-
-  Matcher<const ::std::wstring&> m2 = StrCaseEq(L"Hello");
-  EXPECT_TRUE(m2.Matches(L"hello"));
-  EXPECT_FALSE(m2.Matches(L"Hi"));
-}
-
-TEST(StdWideStrCaseEqTest, MatchesEqualStringWith0IgnoringCase) {
-  ::std::wstring str1(L"oabocdooeoo");
-  ::std::wstring str2(L"OABOCDOOEOO");
-  Matcher<const ::std::wstring&> m0 = StrCaseEq(str1);
-  EXPECT_FALSE(m0.Matches(str2 + ::std::wstring(1, L'\0')));
-
-  str1[3] = str2[3] = L'\0';
-  Matcher<const ::std::wstring&> m1 = StrCaseEq(str1);
-  EXPECT_TRUE(m1.Matches(str2));
-
-  str1[0] = str1[6] = str1[7] = str1[10] = L'\0';
-  str2[0] = str2[6] = str2[7] = str2[10] = L'\0';
-  Matcher<const ::std::wstring&> m2 = StrCaseEq(str1);
-  str1[9] = str2[9] = L'\0';
-  EXPECT_FALSE(m2.Matches(str2));
-
-  Matcher<const ::std::wstring&> m3 = StrCaseEq(str1);
-  EXPECT_TRUE(m3.Matches(str2));
-
-  EXPECT_FALSE(m3.Matches(str2 + L"x"));
-  str2.append(1, L'\0');
-  EXPECT_FALSE(m3.Matches(str2));
-  EXPECT_FALSE(m3.Matches(::std::wstring(str2, 0, 9)));
-}
-
-TEST(StdWideStrCaseEqTest, CanDescribeSelf) {
-  Matcher<::std::wstring> m = StrCaseEq(L"Hi");
-  EXPECT_EQ("is equal to (ignoring case) L\"Hi\"", Describe(m));
-}
-
-TEST(StdWideStrCaseNeTest, MatchesUnequalStringIgnoringCase) {
-  Matcher<const wchar_t*> m = StrCaseNe(L"Hello");
-  EXPECT_TRUE(m.Matches(L"Hi"));
-  EXPECT_TRUE(m.Matches(nullptr));
-  EXPECT_FALSE(m.Matches(L"Hello"));
-  EXPECT_FALSE(m.Matches(L"hello"));
-
-  Matcher<::std::wstring> m2 = StrCaseNe(::std::wstring(L"Hello"));
-  EXPECT_TRUE(m2.Matches(L""));
-  EXPECT_FALSE(m2.Matches(L"Hello"));
-}
-
-TEST(StdWideStrCaseNeTest, CanDescribeSelf) {
-  Matcher<const wchar_t*> m = StrCaseNe(L"Hi");
-  EXPECT_EQ("isn't equal to (ignoring case) L\"Hi\"", Describe(m));
-}
-
-// Tests that HasSubstr() works for matching wstring-typed values.
-TEST(StdWideHasSubstrTest, WorksForStringClasses) {
-  const Matcher<::std::wstring> m1 = HasSubstr(L"foo");
-  EXPECT_TRUE(m1.Matches(::std::wstring(L"I love food.")));
-  EXPECT_FALSE(m1.Matches(::std::wstring(L"tofo")));
-
-  const Matcher<const ::std::wstring&> m2 = HasSubstr(L"foo");
-  EXPECT_TRUE(m2.Matches(::std::wstring(L"I love food.")));
-  EXPECT_FALSE(m2.Matches(::std::wstring(L"tofo")));
-}
-
-// Tests that HasSubstr() works for matching C-wide-string-typed values.
-TEST(StdWideHasSubstrTest, WorksForCStrings) {
-  const Matcher<wchar_t*> m1 = HasSubstr(L"foo");
-  EXPECT_TRUE(m1.Matches(const_cast<wchar_t*>(L"I love food.")));
-  EXPECT_FALSE(m1.Matches(const_cast<wchar_t*>(L"tofo")));
-  EXPECT_FALSE(m1.Matches(nullptr));
-
-  const Matcher<const wchar_t*> m2 = HasSubstr(L"foo");
-  EXPECT_TRUE(m2.Matches(L"I love food."));
-  EXPECT_FALSE(m2.Matches(L"tofo"));
-  EXPECT_FALSE(m2.Matches(nullptr));
-}
-
-// Tests that HasSubstr(s) describes itself properly.
-TEST(StdWideHasSubstrTest, CanDescribeSelf) {
-  Matcher<::std::wstring> m = HasSubstr(L"foo\n\"");
-  EXPECT_EQ("has substring L\"foo\\n\\\"\"", Describe(m));
-}
-
-// Tests StartsWith(s).
-
-TEST(StdWideStartsWithTest, MatchesStringWithGivenPrefix) {
-  const Matcher<const wchar_t*> m1 = StartsWith(::std::wstring(L""));
-  EXPECT_TRUE(m1.Matches(L"Hi"));
-  EXPECT_TRUE(m1.Matches(L""));
-  EXPECT_FALSE(m1.Matches(nullptr));
-
-  const Matcher<const ::std::wstring&> m2 = StartsWith(L"Hi");
-  EXPECT_TRUE(m2.Matches(L"Hi"));
-  EXPECT_TRUE(m2.Matches(L"Hi Hi!"));
-  EXPECT_TRUE(m2.Matches(L"High"));
-  EXPECT_FALSE(m2.Matches(L"H"));
-  EXPECT_FALSE(m2.Matches(L" Hi"));
-}
-
-TEST(StdWideStartsWithTest, CanDescribeSelf) {
-  Matcher<const ::std::wstring> m = StartsWith(L"Hi");
-  EXPECT_EQ("starts with L\"Hi\"", Describe(m));
-}
-
-// Tests EndsWith(s).
-
-TEST(StdWideEndsWithTest, MatchesStringWithGivenSuffix) {
-  const Matcher<const wchar_t*> m1 = EndsWith(L"");
-  EXPECT_TRUE(m1.Matches(L"Hi"));
-  EXPECT_TRUE(m1.Matches(L""));
-  EXPECT_FALSE(m1.Matches(nullptr));
-
-  const Matcher<const ::std::wstring&> m2 = EndsWith(::std::wstring(L"Hi"));
-  EXPECT_TRUE(m2.Matches(L"Hi"));
-  EXPECT_TRUE(m2.Matches(L"Wow Hi Hi"));
-  EXPECT_TRUE(m2.Matches(L"Super Hi"));
-  EXPECT_FALSE(m2.Matches(L"i"));
-  EXPECT_FALSE(m2.Matches(L"Hi "));
-}
-
-TEST(StdWideEndsWithTest, CanDescribeSelf) {
-  Matcher<const ::std::wstring> m = EndsWith(L"Hi");
-  EXPECT_EQ("ends with L\"Hi\"", Describe(m));
-}
-
-#endif  // GTEST_HAS_STD_WSTRING
-
-TEST(ExplainMatchResultTest, WorksWithPolymorphicMatcher) {
-  StringMatchResultListener listener1;
-  EXPECT_TRUE(ExplainMatchResult(PolymorphicIsEven(), 42, &listener1));
-  EXPECT_EQ("% 2 == 0", listener1.str());
-
-  StringMatchResultListener listener2;
-  EXPECT_FALSE(ExplainMatchResult(Ge(42), 1.5, &listener2));
-  EXPECT_EQ("", listener2.str());
-}
-
-TEST(ExplainMatchResultTest, WorksWithMonomorphicMatcher) {
-  const Matcher<int> is_even = PolymorphicIsEven();
-  StringMatchResultListener listener1;
-  EXPECT_TRUE(ExplainMatchResult(is_even, 42, &listener1));
-  EXPECT_EQ("% 2 == 0", listener1.str());
-
-  const Matcher<const double&> is_zero = Eq(0);
-  StringMatchResultListener listener2;
-  EXPECT_FALSE(ExplainMatchResult(is_zero, 1.5, &listener2));
-  EXPECT_EQ("", listener2.str());
-}
-
-MATCHER(ConstructNoArg, "") { return true; }
-MATCHER_P(Construct1Arg, arg1, "") { return true; }
-MATCHER_P2(Construct2Args, arg1, arg2, "") { return true; }
-
-TEST(MatcherConstruct, ExplicitVsImplicit) {
-  {
-    // No arg constructor can be constructed with empty brace.
-    ConstructNoArgMatcher m = {};
-    (void)m;
-    // And with no args
-    ConstructNoArgMatcher m2;
-    (void)m2;
-  }
-  {
-    // The one arg constructor has an explicit constructor.
-    // This is to prevent the implicit conversion.
-    using M = Construct1ArgMatcherP<int>;
-    EXPECT_TRUE((std::is_constructible<M, int>::value));
-    EXPECT_FALSE((std::is_convertible<int, M>::value));
-  }
-  {
-    // Multiple arg matchers can be constructed with an implicit construction.
-    Construct2ArgsMatcherP2<int, double> m = {1, 2.2};
-    (void)m;
-  }
-}
-
-MATCHER_P(Really, inner_matcher, "") {
-  return ExplainMatchResult(inner_matcher, arg, result_listener);
-}
-
-TEST(ExplainMatchResultTest, WorksInsideMATCHER) {
-  EXPECT_THAT(0, Really(Eq(0)));
-}
-
-TEST(DescribeMatcherTest, WorksWithValue) {
-  EXPECT_EQ("is equal to 42", DescribeMatcher<int>(42));
-  EXPECT_EQ("isn't equal to 42", DescribeMatcher<int>(42, true));
-}
-
-TEST(DescribeMatcherTest, WorksWithMonomorphicMatcher) {
-  const Matcher<int> monomorphic = Le(0);
-  EXPECT_EQ("is <= 0", DescribeMatcher<int>(monomorphic));
-  EXPECT_EQ("isn't <= 0", DescribeMatcher<int>(monomorphic, true));
-}
-
-TEST(DescribeMatcherTest, WorksWithPolymorphicMatcher) {
-  EXPECT_EQ("is even", DescribeMatcher<int>(PolymorphicIsEven()));
-  EXPECT_EQ("is odd", DescribeMatcher<int>(PolymorphicIsEven(), true));
-}
-
-MATCHER_P(FieldIIs, inner_matcher, "") {
-  return ExplainMatchResult(inner_matcher, arg.i, result_listener);
-}
-
-#if GTEST_HAS_RTTI
-TEST(WhenDynamicCastToTest, SameType) {
-  Derived derived;
-  derived.i = 4;
-
-  // Right type. A pointer is passed down.
-  Base* as_base_ptr = &derived;
-  EXPECT_THAT(as_base_ptr, WhenDynamicCastTo<Derived*>(Not(IsNull())));
-  EXPECT_THAT(as_base_ptr, WhenDynamicCastTo<Derived*>(Pointee(FieldIIs(4))));
-  EXPECT_THAT(as_base_ptr,
-              Not(WhenDynamicCastTo<Derived*>(Pointee(FieldIIs(5)))));
-}
-
-TEST(WhenDynamicCastToTest, WrongTypes) {
-  Base base;
-  Derived derived;
-  OtherDerived other_derived;
-
-  // Wrong types. NULL is passed.
-  EXPECT_THAT(&base, Not(WhenDynamicCastTo<Derived*>(Pointee(_))));
-  EXPECT_THAT(&base, WhenDynamicCastTo<Derived*>(IsNull()));
-  Base* as_base_ptr = &derived;
-  EXPECT_THAT(as_base_ptr, Not(WhenDynamicCastTo<OtherDerived*>(Pointee(_))));
-  EXPECT_THAT(as_base_ptr, WhenDynamicCastTo<OtherDerived*>(IsNull()));
-  as_base_ptr = &other_derived;
-  EXPECT_THAT(as_base_ptr, Not(WhenDynamicCastTo<Derived*>(Pointee(_))));
-  EXPECT_THAT(as_base_ptr, WhenDynamicCastTo<Derived*>(IsNull()));
-}
-
-TEST(WhenDynamicCastToTest, AlreadyNull) {
-  // Already NULL.
-  Base* as_base_ptr = nullptr;
-  EXPECT_THAT(as_base_ptr, WhenDynamicCastTo<Derived*>(IsNull()));
-}
-
-struct AmbiguousCastTypes {
-  class VirtualDerived : public virtual Base {};
-  class DerivedSub1 : public VirtualDerived {};
-  class DerivedSub2 : public VirtualDerived {};
-  class ManyDerivedInHierarchy : public DerivedSub1, public DerivedSub2 {};
-};
-
-TEST(WhenDynamicCastToTest, AmbiguousCast) {
-  AmbiguousCastTypes::DerivedSub1 sub1;
-  AmbiguousCastTypes::ManyDerivedInHierarchy many_derived;
-  // Multiply derived from Base. dynamic_cast<> returns NULL.
-  Base* as_base_ptr =
-      static_cast<AmbiguousCastTypes::DerivedSub1*>(&many_derived);
-  EXPECT_THAT(as_base_ptr,
-              WhenDynamicCastTo<AmbiguousCastTypes::VirtualDerived*>(IsNull()));
-  as_base_ptr = &sub1;
-  EXPECT_THAT(
-      as_base_ptr,
-      WhenDynamicCastTo<AmbiguousCastTypes::VirtualDerived*>(Not(IsNull())));
-}
-
-TEST(WhenDynamicCastToTest, Describe) {
-  Matcher<Base*> matcher = WhenDynamicCastTo<Derived*>(Pointee(_));
-  const std::string prefix =
-      "when dynamic_cast to " + internal::GetTypeName<Derived*>() + ", ";
-  EXPECT_EQ(prefix + "points to a value that is anything", Describe(matcher));
-  EXPECT_EQ(prefix + "does not point to a value that is anything",
-            DescribeNegation(matcher));
-}
-
-TEST(WhenDynamicCastToTest, Explain) {
-  Matcher<Base*> matcher = WhenDynamicCastTo<Derived*>(Pointee(_));
-  Base* null = nullptr;
-  EXPECT_THAT(Explain(matcher, null), HasSubstr("NULL"));
-  Derived derived;
-  EXPECT_TRUE(matcher.Matches(&derived));
-  EXPECT_THAT(Explain(matcher, &derived), HasSubstr("which points to "));
-
-  // With references, the matcher itself can fail. Test for that one.
-  Matcher<const Base&> ref_matcher = WhenDynamicCastTo<const OtherDerived&>(_);
-  EXPECT_THAT(Explain(ref_matcher, derived),
-              HasSubstr("which cannot be dynamic_cast"));
-}
-
-TEST(WhenDynamicCastToTest, GoodReference) {
-  Derived derived;
-  derived.i = 4;
-  Base& as_base_ref = derived;
-  EXPECT_THAT(as_base_ref, WhenDynamicCastTo<const Derived&>(FieldIIs(4)));
-  EXPECT_THAT(as_base_ref, WhenDynamicCastTo<const Derived&>(Not(FieldIIs(5))));
-}
-
-TEST(WhenDynamicCastToTest, BadReference) {
-  Derived derived;
-  Base& as_base_ref = derived;
-  EXPECT_THAT(as_base_ref, Not(WhenDynamicCastTo<const OtherDerived&>(_)));
-}
-#endif  // GTEST_HAS_RTTI
-
-class DivisibleByImpl {
- public:
-  explicit DivisibleByImpl(int a_divider) : divider_(a_divider) {}
-
-  // For testing using ExplainMatchResultTo() with polymorphic matchers.
-  template <typename T>
-  bool MatchAndExplain(const T& n, MatchResultListener* listener) const {
-    *listener << "which is " << (n % divider_) << " modulo " << divider_;
-    return (n % divider_) == 0;
-  }
-
-  void DescribeTo(ostream* os) const { *os << "is divisible by " << divider_; }
-
-  void DescribeNegationTo(ostream* os) const {
-    *os << "is not divisible by " << divider_;
-  }
-
-  void set_divider(int a_divider) { divider_ = a_divider; }
-  int divider() const { return divider_; }
-
- private:
-  int divider_;
-};
-
-PolymorphicMatcher<DivisibleByImpl> DivisibleBy(int n) {
-  return MakePolymorphicMatcher(DivisibleByImpl(n));
-}
-
-// Tests that when AllOf() fails, only the first failing matcher is
-// asked to explain why.
-TEST(ExplainMatchResultTest, AllOf_False_False) {
-  const Matcher<int> m = AllOf(DivisibleBy(4), DivisibleBy(3));
-  EXPECT_EQ("which is 1 modulo 4", Explain(m, 5));
-}
-
-// Tests that when AllOf() fails, only the first failing matcher is
-// asked to explain why.
-TEST(ExplainMatchResultTest, AllOf_False_True) {
-  const Matcher<int> m = AllOf(DivisibleBy(4), DivisibleBy(3));
-  EXPECT_EQ("which is 2 modulo 4", Explain(m, 6));
-}
-
-// Tests that when AllOf() fails, only the first failing matcher is
-// asked to explain why.
-TEST(ExplainMatchResultTest, AllOf_True_False) {
-  const Matcher<int> m = AllOf(Ge(1), DivisibleBy(3));
-  EXPECT_EQ("which is 2 modulo 3", Explain(m, 5));
-}
-
-// Tests that when AllOf() succeeds, all matchers are asked to explain
-// why.
-TEST(ExplainMatchResultTest, AllOf_True_True) {
-  const Matcher<int> m = AllOf(DivisibleBy(2), DivisibleBy(3));
-  EXPECT_EQ("which is 0 modulo 2, and which is 0 modulo 3", Explain(m, 6));
-}
-
-TEST(ExplainMatchResultTest, AllOf_True_True_2) {
-  const Matcher<int> m = AllOf(Ge(2), Le(3));
-  EXPECT_EQ("", Explain(m, 2));
-}
-
-INSTANTIATE_GTEST_MATCHER_TEST_P(ExplainmatcherResultTest);
-
-TEST_P(ExplainmatcherResultTestP, MonomorphicMatcher) {
-  const Matcher<int> m = GreaterThan(5);
-  EXPECT_EQ("which is 1 more than 5", Explain(m, 6));
-}
-
-// Tests PolymorphicMatcher::mutable_impl().
-TEST(PolymorphicMatcherTest, CanAccessMutableImpl) {
-  PolymorphicMatcher<DivisibleByImpl> m(DivisibleByImpl(42));
-  DivisibleByImpl& impl = m.mutable_impl();
-  EXPECT_EQ(42, impl.divider());
-
-  impl.set_divider(0);
-  EXPECT_EQ(0, m.mutable_impl().divider());
-}
-
-// Tests PolymorphicMatcher::impl().
-TEST(PolymorphicMatcherTest, CanAccessImpl) {
-  const PolymorphicMatcher<DivisibleByImpl> m(DivisibleByImpl(42));
-  const DivisibleByImpl& impl = m.impl();
-  EXPECT_EQ(42, impl.divider());
-}
-
-}  // namespace
-}  // namespace gmock_matchers_test
-}  // namespace testing
-
-#ifdef _MSC_VER
-#pragma warning(pop)
-#endif
diff --git a/3rdparty/googletest-1.13.0/googlemock/test/gmock-matchers-containers_test.cc b/3rdparty/googletest-1.13.0/googlemock/test/gmock-matchers-containers_test.cc
deleted file mode 100644
index 98787f25e0279e2515894d5a5898c95afcd6c6ab..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googlemock/test/gmock-matchers-containers_test.cc
+++ /dev/null
@@ -1,3129 +0,0 @@
-// Copyright 2007, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// Google Mock - a framework for writing C++ mock classes.
-//
-// This file tests some commonly used argument matchers.
-
-// Silence warning C4244: 'initializing': conversion from 'int' to 'short',
-// possible loss of data and C4100, unreferenced local parameter
-#ifdef _MSC_VER
-#pragma warning(push)
-#pragma warning(disable : 4244)
-#pragma warning(disable : 4100)
-#endif
-
-#include "test/gmock-matchers_test.h"
-
-namespace testing {
-namespace gmock_matchers_test {
-namespace {
-
-std::vector<std::unique_ptr<int>> MakeUniquePtrs(const std::vector<int>& ints) {
-  std::vector<std::unique_ptr<int>> pointers;
-  for (int i : ints) pointers.emplace_back(new int(i));
-  return pointers;
-}
-
-std::string OfType(const std::string& type_name) {
-#if GTEST_HAS_RTTI
-  return IsReadableTypeName(type_name) ? " (of type " + type_name + ")" : "";
-#else
-  return "";
-#endif
-}
-
-TEST(ContainsTest, WorksWithMoveOnly) {
-  ContainerHelper helper;
-  EXPECT_CALL(helper, Call(Contains(Pointee(2))));
-  helper.Call(MakeUniquePtrs({1, 2}));
-}
-
-INSTANTIATE_GTEST_MATCHER_TEST_P(ElementsAreTest);
-
-// Tests the variadic version of the ElementsAreMatcher
-TEST(ElementsAreTest, HugeMatcher) {
-  vector<int> test_vector{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
-
-  EXPECT_THAT(test_vector,
-              ElementsAre(Eq(1), Eq(2), Lt(13), Eq(4), Eq(5), Eq(6), Eq(7),
-                          Eq(8), Eq(9), Eq(10), Gt(1), Eq(12)));
-}
-
-// Tests the variadic version of the UnorderedElementsAreMatcher
-TEST(ElementsAreTest, HugeMatcherStr) {
-  vector<std::string> test_vector{
-      "literal_string", "", "", "", "", "", "", "", "", "", "", ""};
-
-  EXPECT_THAT(test_vector, UnorderedElementsAre("literal_string", _, _, _, _, _,
-                                                _, _, _, _, _, _));
-}
-
-// Tests the variadic version of the UnorderedElementsAreMatcher
-TEST(ElementsAreTest, HugeMatcherUnordered) {
-  vector<int> test_vector{2, 1, 8, 5, 4, 6, 7, 3, 9, 12, 11, 10};
-
-  EXPECT_THAT(test_vector, UnorderedElementsAre(
-                               Eq(2), Eq(1), Gt(7), Eq(5), Eq(4), Eq(6), Eq(7),
-                               Eq(3), Eq(9), Eq(12), Eq(11), Ne(122)));
-}
-
-// Tests that ASSERT_THAT() and EXPECT_THAT() work when the value
-// matches the matcher.
-TEST(MatcherAssertionTest, WorksWhenMatcherIsSatisfied) {
-  ASSERT_THAT(5, Ge(2)) << "This should succeed.";
-  ASSERT_THAT("Foo", EndsWith("oo"));
-  EXPECT_THAT(2, AllOf(Le(7), Ge(0))) << "This should succeed too.";
-  EXPECT_THAT("Hello", StartsWith("Hell"));
-}
-
-// Tests that ASSERT_THAT() and EXPECT_THAT() work when the value
-// doesn't match the matcher.
-TEST(MatcherAssertionTest, WorksWhenMatcherIsNotSatisfied) {
-  // 'n' must be static as it is used in an EXPECT_FATAL_FAILURE(),
-  // which cannot reference auto variables.
-  static unsigned short n;  // NOLINT
-  n = 5;
-
-  EXPECT_FATAL_FAILURE(ASSERT_THAT(n, Gt(10)),
-                       "Value of: n\n"
-                       "Expected: is > 10\n"
-                       "  Actual: 5" +
-                           OfType("unsigned short"));
-  n = 0;
-  EXPECT_NONFATAL_FAILURE(EXPECT_THAT(n, AllOf(Le(7), Ge(5))),
-                          "Value of: n\n"
-                          "Expected: (is <= 7) and (is >= 5)\n"
-                          "  Actual: 0" +
-                              OfType("unsigned short"));
-}
-
-// Tests that ASSERT_THAT() and EXPECT_THAT() work when the argument
-// has a reference type.
-TEST(MatcherAssertionTest, WorksForByRefArguments) {
-  // We use a static variable here as EXPECT_FATAL_FAILURE() cannot
-  // reference auto variables.
-  static int n;
-  n = 0;
-  EXPECT_THAT(n, AllOf(Le(7), Ref(n)));
-  EXPECT_FATAL_FAILURE(ASSERT_THAT(n, Not(Ref(n))),
-                       "Value of: n\n"
-                       "Expected: does not reference the variable @");
-  // Tests the "Actual" part.
-  EXPECT_FATAL_FAILURE(ASSERT_THAT(n, Not(Ref(n))),
-                       "Actual: 0" + OfType("int") + ", which is located @");
-}
-
-// Tests that ASSERT_THAT() and EXPECT_THAT() work when the matcher is
-// monomorphic.
-TEST(MatcherAssertionTest, WorksForMonomorphicMatcher) {
-  Matcher<const char*> starts_with_he = StartsWith("he");
-  ASSERT_THAT("hello", starts_with_he);
-
-  Matcher<const std::string&> ends_with_ok = EndsWith("ok");
-  ASSERT_THAT("book", ends_with_ok);
-  const std::string bad = "bad";
-  EXPECT_NONFATAL_FAILURE(EXPECT_THAT(bad, ends_with_ok),
-                          "Value of: bad\n"
-                          "Expected: ends with \"ok\"\n"
-                          "  Actual: \"bad\"");
-  Matcher<int> is_greater_than_5 = Gt(5);
-  EXPECT_NONFATAL_FAILURE(EXPECT_THAT(5, is_greater_than_5),
-                          "Value of: 5\n"
-                          "Expected: is > 5\n"
-                          "  Actual: 5" +
-                              OfType("int"));
-}
-
-TEST(PointeeTest, RawPointer) {
-  const Matcher<int*> m = Pointee(Ge(0));
-
-  int n = 1;
-  EXPECT_TRUE(m.Matches(&n));
-  n = -1;
-  EXPECT_FALSE(m.Matches(&n));
-  EXPECT_FALSE(m.Matches(nullptr));
-}
-
-TEST(PointeeTest, RawPointerToConst) {
-  const Matcher<const double*> m = Pointee(Ge(0));
-
-  double x = 1;
-  EXPECT_TRUE(m.Matches(&x));
-  x = -1;
-  EXPECT_FALSE(m.Matches(&x));
-  EXPECT_FALSE(m.Matches(nullptr));
-}
-
-TEST(PointeeTest, ReferenceToConstRawPointer) {
-  const Matcher<int* const&> m = Pointee(Ge(0));
-
-  int n = 1;
-  EXPECT_TRUE(m.Matches(&n));
-  n = -1;
-  EXPECT_FALSE(m.Matches(&n));
-  EXPECT_FALSE(m.Matches(nullptr));
-}
-
-TEST(PointeeTest, ReferenceToNonConstRawPointer) {
-  const Matcher<double*&> m = Pointee(Ge(0));
-
-  double x = 1.0;
-  double* p = &x;
-  EXPECT_TRUE(m.Matches(p));
-  x = -1;
-  EXPECT_FALSE(m.Matches(p));
-  p = nullptr;
-  EXPECT_FALSE(m.Matches(p));
-}
-
-TEST(PointeeTest, SmartPointer) {
-  const Matcher<std::unique_ptr<int>> m = Pointee(Ge(0));
-
-  std::unique_ptr<int> n(new int(1));
-  EXPECT_TRUE(m.Matches(n));
-}
-
-TEST(PointeeTest, SmartPointerToConst) {
-  const Matcher<std::unique_ptr<const int>> m = Pointee(Ge(0));
-
-  // There's no implicit conversion from unique_ptr<int> to const
-  // unique_ptr<const int>, so we must pass a unique_ptr<const int> into the
-  // matcher.
-  std::unique_ptr<const int> n(new int(1));
-  EXPECT_TRUE(m.Matches(n));
-}
-
-TEST(PointerTest, RawPointer) {
-  int n = 1;
-  const Matcher<int*> m = Pointer(Eq(&n));
-
-  EXPECT_TRUE(m.Matches(&n));
-
-  int* p = nullptr;
-  EXPECT_FALSE(m.Matches(p));
-  EXPECT_FALSE(m.Matches(nullptr));
-}
-
-TEST(PointerTest, RawPointerToConst) {
-  int n = 1;
-  const Matcher<const int*> m = Pointer(Eq(&n));
-
-  EXPECT_TRUE(m.Matches(&n));
-
-  int* p = nullptr;
-  EXPECT_FALSE(m.Matches(p));
-  EXPECT_FALSE(m.Matches(nullptr));
-}
-
-TEST(PointerTest, SmartPointer) {
-  std::unique_ptr<int> n(new int(10));
-  int* raw_n = n.get();
-  const Matcher<std::unique_ptr<int>> m = Pointer(Eq(raw_n));
-
-  EXPECT_TRUE(m.Matches(n));
-}
-
-TEST(PointerTest, SmartPointerToConst) {
-  std::unique_ptr<const int> n(new int(10));
-  const int* raw_n = n.get();
-  const Matcher<std::unique_ptr<const int>> m = Pointer(Eq(raw_n));
-
-  // There's no implicit conversion from unique_ptr<int> to const
-  // unique_ptr<const int>, so we must pass a unique_ptr<const int> into the
-  // matcher.
-  std::unique_ptr<const int> p(new int(10));
-  EXPECT_FALSE(m.Matches(p));
-}
-
-// Minimal const-propagating pointer.
-template <typename T>
-class ConstPropagatingPtr {
- public:
-  typedef T element_type;
-
-  ConstPropagatingPtr() : val_() {}
-  explicit ConstPropagatingPtr(T* t) : val_(t) {}
-  ConstPropagatingPtr(const ConstPropagatingPtr& other) : val_(other.val_) {}
-
-  T* get() { return val_; }
-  T& operator*() { return *val_; }
-  // Most smart pointers return non-const T* and T& from the next methods.
-  const T* get() const { return val_; }
-  const T& operator*() const { return *val_; }
-
- private:
-  T* val_;
-};
-
-INSTANTIATE_GTEST_MATCHER_TEST_P(PointeeTest);
-
-TEST(PointeeTest, WorksWithConstPropagatingPointers) {
-  const Matcher<ConstPropagatingPtr<int>> m = Pointee(Lt(5));
-  int three = 3;
-  const ConstPropagatingPtr<int> co(&three);
-  ConstPropagatingPtr<int> o(&three);
-  EXPECT_TRUE(m.Matches(o));
-  EXPECT_TRUE(m.Matches(co));
-  *o = 6;
-  EXPECT_FALSE(m.Matches(o));
-  EXPECT_FALSE(m.Matches(ConstPropagatingPtr<int>()));
-}
-
-TEST(PointeeTest, NeverMatchesNull) {
-  const Matcher<const char*> m = Pointee(_);
-  EXPECT_FALSE(m.Matches(nullptr));
-}
-
-// Tests that we can write Pointee(value) instead of Pointee(Eq(value)).
-TEST(PointeeTest, MatchesAgainstAValue) {
-  const Matcher<int*> m = Pointee(5);
-
-  int n = 5;
-  EXPECT_TRUE(m.Matches(&n));
-  n = -1;
-  EXPECT_FALSE(m.Matches(&n));
-  EXPECT_FALSE(m.Matches(nullptr));
-}
-
-TEST(PointeeTest, CanDescribeSelf) {
-  const Matcher<int*> m = Pointee(Gt(3));
-  EXPECT_EQ("points to a value that is > 3", Describe(m));
-  EXPECT_EQ("does not point to a value that is > 3", DescribeNegation(m));
-}
-
-TEST_P(PointeeTestP, CanExplainMatchResult) {
-  const Matcher<const std::string*> m = Pointee(StartsWith("Hi"));
-
-  EXPECT_EQ("", Explain(m, static_cast<const std::string*>(nullptr)));
-
-  const Matcher<long*> m2 = Pointee(GreaterThan(1));  // NOLINT
-  long n = 3;                                         // NOLINT
-  EXPECT_EQ("which points to 3" + OfType("long") + ", which is 2 more than 1",
-            Explain(m2, &n));
-}
-
-TEST(PointeeTest, AlwaysExplainsPointee) {
-  const Matcher<int*> m = Pointee(0);
-  int n = 42;
-  EXPECT_EQ("which points to 42" + OfType("int"), Explain(m, &n));
-}
-
-// An uncopyable class.
-class Uncopyable {
- public:
-  Uncopyable() : value_(-1) {}
-  explicit Uncopyable(int a_value) : value_(a_value) {}
-
-  int value() const { return value_; }
-  void set_value(int i) { value_ = i; }
-
- private:
-  int value_;
-  Uncopyable(const Uncopyable&) = delete;
-  Uncopyable& operator=(const Uncopyable&) = delete;
-};
-
-// Returns true if and only if x.value() is positive.
-bool ValueIsPositive(const Uncopyable& x) { return x.value() > 0; }
-
-MATCHER_P(UncopyableIs, inner_matcher, "") {
-  return ExplainMatchResult(inner_matcher, arg.value(), result_listener);
-}
-
-// A user-defined struct for testing Field().
-struct AStruct {
-  AStruct() : x(0), y(1.0), z(5), p(nullptr) {}
-  AStruct(const AStruct& rhs)
-      : x(rhs.x), y(rhs.y), z(rhs.z.value()), p(rhs.p) {}
-
-  int x;           // A non-const field.
-  const double y;  // A const field.
-  Uncopyable z;    // An uncopyable field.
-  const char* p;   // A pointer field.
-};
-
-// A derived struct for testing Field().
-struct DerivedStruct : public AStruct {
-  char ch;
-};
-
-INSTANTIATE_GTEST_MATCHER_TEST_P(FieldTest);
-
-// Tests that Field(&Foo::field, ...) works when field is non-const.
-TEST(FieldTest, WorksForNonConstField) {
-  Matcher<AStruct> m = Field(&AStruct::x, Ge(0));
-  Matcher<AStruct> m_with_name = Field("x", &AStruct::x, Ge(0));
-
-  AStruct a;
-  EXPECT_TRUE(m.Matches(a));
-  EXPECT_TRUE(m_with_name.Matches(a));
-  a.x = -1;
-  EXPECT_FALSE(m.Matches(a));
-  EXPECT_FALSE(m_with_name.Matches(a));
-}
-
-// Tests that Field(&Foo::field, ...) works when field is const.
-TEST(FieldTest, WorksForConstField) {
-  AStruct a;
-
-  Matcher<AStruct> m = Field(&AStruct::y, Ge(0.0));
-  Matcher<AStruct> m_with_name = Field("y", &AStruct::y, Ge(0.0));
-  EXPECT_TRUE(m.Matches(a));
-  EXPECT_TRUE(m_with_name.Matches(a));
-  m = Field(&AStruct::y, Le(0.0));
-  m_with_name = Field("y", &AStruct::y, Le(0.0));
-  EXPECT_FALSE(m.Matches(a));
-  EXPECT_FALSE(m_with_name.Matches(a));
-}
-
-// Tests that Field(&Foo::field, ...) works when field is not copyable.
-TEST(FieldTest, WorksForUncopyableField) {
-  AStruct a;
-
-  Matcher<AStruct> m = Field(&AStruct::z, Truly(ValueIsPositive));
-  EXPECT_TRUE(m.Matches(a));
-  m = Field(&AStruct::z, Not(Truly(ValueIsPositive)));
-  EXPECT_FALSE(m.Matches(a));
-}
-
-// Tests that Field(&Foo::field, ...) works when field is a pointer.
-TEST(FieldTest, WorksForPointerField) {
-  // Matching against NULL.
-  Matcher<AStruct> m = Field(&AStruct::p, static_cast<const char*>(nullptr));
-  AStruct a;
-  EXPECT_TRUE(m.Matches(a));
-  a.p = "hi";
-  EXPECT_FALSE(m.Matches(a));
-
-  // Matching a pointer that is not NULL.
-  m = Field(&AStruct::p, StartsWith("hi"));
-  a.p = "hill";
-  EXPECT_TRUE(m.Matches(a));
-  a.p = "hole";
-  EXPECT_FALSE(m.Matches(a));
-}
-
-// Tests that Field() works when the object is passed by reference.
-TEST(FieldTest, WorksForByRefArgument) {
-  Matcher<const AStruct&> m = Field(&AStruct::x, Ge(0));
-
-  AStruct a;
-  EXPECT_TRUE(m.Matches(a));
-  a.x = -1;
-  EXPECT_FALSE(m.Matches(a));
-}
-
-// Tests that Field(&Foo::field, ...) works when the argument's type
-// is a sub-type of Foo.
-TEST(FieldTest, WorksForArgumentOfSubType) {
-  // Note that the matcher expects DerivedStruct but we say AStruct
-  // inside Field().
-  Matcher<const DerivedStruct&> m = Field(&AStruct::x, Ge(0));
-
-  DerivedStruct d;
-  EXPECT_TRUE(m.Matches(d));
-  d.x = -1;
-  EXPECT_FALSE(m.Matches(d));
-}
-
-// Tests that Field(&Foo::field, m) works when field's type and m's
-// argument type are compatible but not the same.
-TEST(FieldTest, WorksForCompatibleMatcherType) {
-  // The field is an int, but the inner matcher expects a signed char.
-  Matcher<const AStruct&> m = Field(&AStruct::x, Matcher<signed char>(Ge(0)));
-
-  AStruct a;
-  EXPECT_TRUE(m.Matches(a));
-  a.x = -1;
-  EXPECT_FALSE(m.Matches(a));
-}
-
-// Tests that Field() can describe itself.
-TEST(FieldTest, CanDescribeSelf) {
-  Matcher<const AStruct&> m = Field(&AStruct::x, Ge(0));
-
-  EXPECT_EQ("is an object whose given field is >= 0", Describe(m));
-  EXPECT_EQ("is an object whose given field isn't >= 0", DescribeNegation(m));
-}
-
-TEST(FieldTest, CanDescribeSelfWithFieldName) {
-  Matcher<const AStruct&> m = Field("field_name", &AStruct::x, Ge(0));
-
-  EXPECT_EQ("is an object whose field `field_name` is >= 0", Describe(m));
-  EXPECT_EQ("is an object whose field `field_name` isn't >= 0",
-            DescribeNegation(m));
-}
-
-// Tests that Field() can explain the match result.
-TEST_P(FieldTestP, CanExplainMatchResult) {
-  Matcher<const AStruct&> m = Field(&AStruct::x, Ge(0));
-
-  AStruct a;
-  a.x = 1;
-  EXPECT_EQ("whose given field is 1" + OfType("int"), Explain(m, a));
-
-  m = Field(&AStruct::x, GreaterThan(0));
-  EXPECT_EQ(
-      "whose given field is 1" + OfType("int") + ", which is 1 more than 0",
-      Explain(m, a));
-}
-
-TEST_P(FieldTestP, CanExplainMatchResultWithFieldName) {
-  Matcher<const AStruct&> m = Field("field_name", &AStruct::x, Ge(0));
-
-  AStruct a;
-  a.x = 1;
-  EXPECT_EQ("whose field `field_name` is 1" + OfType("int"), Explain(m, a));
-
-  m = Field("field_name", &AStruct::x, GreaterThan(0));
-  EXPECT_EQ("whose field `field_name` is 1" + OfType("int") +
-                ", which is 1 more than 0",
-            Explain(m, a));
-}
-
-INSTANTIATE_GTEST_MATCHER_TEST_P(FieldForPointerTest);
-
-// Tests that Field() works when the argument is a pointer to const.
-TEST(FieldForPointerTest, WorksForPointerToConst) {
-  Matcher<const AStruct*> m = Field(&AStruct::x, Ge(0));
-
-  AStruct a;
-  EXPECT_TRUE(m.Matches(&a));
-  a.x = -1;
-  EXPECT_FALSE(m.Matches(&a));
-}
-
-// Tests that Field() works when the argument is a pointer to non-const.
-TEST(FieldForPointerTest, WorksForPointerToNonConst) {
-  Matcher<AStruct*> m = Field(&AStruct::x, Ge(0));
-
-  AStruct a;
-  EXPECT_TRUE(m.Matches(&a));
-  a.x = -1;
-  EXPECT_FALSE(m.Matches(&a));
-}
-
-// Tests that Field() works when the argument is a reference to a const pointer.
-TEST(FieldForPointerTest, WorksForReferenceToConstPointer) {
-  Matcher<AStruct* const&> m = Field(&AStruct::x, Ge(0));
-
-  AStruct a;
-  EXPECT_TRUE(m.Matches(&a));
-  a.x = -1;
-  EXPECT_FALSE(m.Matches(&a));
-}
-
-// Tests that Field() does not match the NULL pointer.
-TEST(FieldForPointerTest, DoesNotMatchNull) {
-  Matcher<const AStruct*> m = Field(&AStruct::x, _);
-  EXPECT_FALSE(m.Matches(nullptr));
-}
-
-// Tests that Field(&Foo::field, ...) works when the argument's type
-// is a sub-type of const Foo*.
-TEST(FieldForPointerTest, WorksForArgumentOfSubType) {
-  // Note that the matcher expects DerivedStruct but we say AStruct
-  // inside Field().
-  Matcher<DerivedStruct*> m = Field(&AStruct::x, Ge(0));
-
-  DerivedStruct d;
-  EXPECT_TRUE(m.Matches(&d));
-  d.x = -1;
-  EXPECT_FALSE(m.Matches(&d));
-}
-
-// Tests that Field() can describe itself when used to match a pointer.
-TEST(FieldForPointerTest, CanDescribeSelf) {
-  Matcher<const AStruct*> m = Field(&AStruct::x, Ge(0));
-
-  EXPECT_EQ("is an object whose given field is >= 0", Describe(m));
-  EXPECT_EQ("is an object whose given field isn't >= 0", DescribeNegation(m));
-}
-
-TEST(FieldForPointerTest, CanDescribeSelfWithFieldName) {
-  Matcher<const AStruct*> m = Field("field_name", &AStruct::x, Ge(0));
-
-  EXPECT_EQ("is an object whose field `field_name` is >= 0", Describe(m));
-  EXPECT_EQ("is an object whose field `field_name` isn't >= 0",
-            DescribeNegation(m));
-}
-
-// Tests that Field() can explain the result of matching a pointer.
-TEST_P(FieldForPointerTestP, CanExplainMatchResult) {
-  Matcher<const AStruct*> m = Field(&AStruct::x, Ge(0));
-
-  AStruct a;
-  a.x = 1;
-  EXPECT_EQ("", Explain(m, static_cast<const AStruct*>(nullptr)));
-  EXPECT_EQ("which points to an object whose given field is 1" + OfType("int"),
-            Explain(m, &a));
-
-  m = Field(&AStruct::x, GreaterThan(0));
-  EXPECT_EQ("which points to an object whose given field is 1" + OfType("int") +
-                ", which is 1 more than 0",
-            Explain(m, &a));
-}
-
-TEST_P(FieldForPointerTestP, CanExplainMatchResultWithFieldName) {
-  Matcher<const AStruct*> m = Field("field_name", &AStruct::x, Ge(0));
-
-  AStruct a;
-  a.x = 1;
-  EXPECT_EQ("", Explain(m, static_cast<const AStruct*>(nullptr)));
-  EXPECT_EQ(
-      "which points to an object whose field `field_name` is 1" + OfType("int"),
-      Explain(m, &a));
-
-  m = Field("field_name", &AStruct::x, GreaterThan(0));
-  EXPECT_EQ("which points to an object whose field `field_name` is 1" +
-                OfType("int") + ", which is 1 more than 0",
-            Explain(m, &a));
-}
-
-// A user-defined class for testing Property().
-class AClass {
- public:
-  AClass() : n_(0) {}
-
-  // A getter that returns a non-reference.
-  int n() const { return n_; }
-
-  void set_n(int new_n) { n_ = new_n; }
-
-  // A getter that returns a reference to const.
-  const std::string& s() const { return s_; }
-
-  const std::string& s_ref() const& { return s_; }
-
-  void set_s(const std::string& new_s) { s_ = new_s; }
-
-  // A getter that returns a reference to non-const.
-  double& x() const { return x_; }
-
- private:
-  int n_;
-  std::string s_;
-
-  static double x_;
-};
-
-double AClass::x_ = 0.0;
-
-// A derived class for testing Property().
-class DerivedClass : public AClass {
- public:
-  int k() const { return k_; }
-
- private:
-  int k_;
-};
-
-INSTANTIATE_GTEST_MATCHER_TEST_P(PropertyTest);
-
-// Tests that Property(&Foo::property, ...) works when property()
-// returns a non-reference.
-TEST(PropertyTest, WorksForNonReferenceProperty) {
-  Matcher<const AClass&> m = Property(&AClass::n, Ge(0));
-  Matcher<const AClass&> m_with_name = Property("n", &AClass::n, Ge(0));
-
-  AClass a;
-  a.set_n(1);
-  EXPECT_TRUE(m.Matches(a));
-  EXPECT_TRUE(m_with_name.Matches(a));
-
-  a.set_n(-1);
-  EXPECT_FALSE(m.Matches(a));
-  EXPECT_FALSE(m_with_name.Matches(a));
-}
-
-// Tests that Property(&Foo::property, ...) works when property()
-// returns a reference to const.
-TEST(PropertyTest, WorksForReferenceToConstProperty) {
-  Matcher<const AClass&> m = Property(&AClass::s, StartsWith("hi"));
-  Matcher<const AClass&> m_with_name =
-      Property("s", &AClass::s, StartsWith("hi"));
-
-  AClass a;
-  a.set_s("hill");
-  EXPECT_TRUE(m.Matches(a));
-  EXPECT_TRUE(m_with_name.Matches(a));
-
-  a.set_s("hole");
-  EXPECT_FALSE(m.Matches(a));
-  EXPECT_FALSE(m_with_name.Matches(a));
-}
-
-// Tests that Property(&Foo::property, ...) works when property() is
-// ref-qualified.
-TEST(PropertyTest, WorksForRefQualifiedProperty) {
-  Matcher<const AClass&> m = Property(&AClass::s_ref, StartsWith("hi"));
-  Matcher<const AClass&> m_with_name =
-      Property("s", &AClass::s_ref, StartsWith("hi"));
-
-  AClass a;
-  a.set_s("hill");
-  EXPECT_TRUE(m.Matches(a));
-  EXPECT_TRUE(m_with_name.Matches(a));
-
-  a.set_s("hole");
-  EXPECT_FALSE(m.Matches(a));
-  EXPECT_FALSE(m_with_name.Matches(a));
-}
-
-// Tests that Property(&Foo::property, ...) works when property()
-// returns a reference to non-const.
-TEST(PropertyTest, WorksForReferenceToNonConstProperty) {
-  double x = 0.0;
-  AClass a;
-
-  Matcher<const AClass&> m = Property(&AClass::x, Ref(x));
-  EXPECT_FALSE(m.Matches(a));
-
-  m = Property(&AClass::x, Not(Ref(x)));
-  EXPECT_TRUE(m.Matches(a));
-}
-
-// Tests that Property(&Foo::property, ...) works when the argument is
-// passed by value.
-TEST(PropertyTest, WorksForByValueArgument) {
-  Matcher<AClass> m = Property(&AClass::s, StartsWith("hi"));
-
-  AClass a;
-  a.set_s("hill");
-  EXPECT_TRUE(m.Matches(a));
-
-  a.set_s("hole");
-  EXPECT_FALSE(m.Matches(a));
-}
-
-// Tests that Property(&Foo::property, ...) works when the argument's
-// type is a sub-type of Foo.
-TEST(PropertyTest, WorksForArgumentOfSubType) {
-  // The matcher expects a DerivedClass, but inside the Property() we
-  // say AClass.
-  Matcher<const DerivedClass&> m = Property(&AClass::n, Ge(0));
-
-  DerivedClass d;
-  d.set_n(1);
-  EXPECT_TRUE(m.Matches(d));
-
-  d.set_n(-1);
-  EXPECT_FALSE(m.Matches(d));
-}
-
-// Tests that Property(&Foo::property, m) works when property()'s type
-// and m's argument type are compatible but different.
-TEST(PropertyTest, WorksForCompatibleMatcherType) {
-  // n() returns an int but the inner matcher expects a signed char.
-  Matcher<const AClass&> m = Property(&AClass::n, Matcher<signed char>(Ge(0)));
-
-  Matcher<const AClass&> m_with_name =
-      Property("n", &AClass::n, Matcher<signed char>(Ge(0)));
-
-  AClass a;
-  EXPECT_TRUE(m.Matches(a));
-  EXPECT_TRUE(m_with_name.Matches(a));
-  a.set_n(-1);
-  EXPECT_FALSE(m.Matches(a));
-  EXPECT_FALSE(m_with_name.Matches(a));
-}
-
-// Tests that Property() can describe itself.
-TEST(PropertyTest, CanDescribeSelf) {
-  Matcher<const AClass&> m = Property(&AClass::n, Ge(0));
-
-  EXPECT_EQ("is an object whose given property is >= 0", Describe(m));
-  EXPECT_EQ("is an object whose given property isn't >= 0",
-            DescribeNegation(m));
-}
-
-TEST(PropertyTest, CanDescribeSelfWithPropertyName) {
-  Matcher<const AClass&> m = Property("fancy_name", &AClass::n, Ge(0));
-
-  EXPECT_EQ("is an object whose property `fancy_name` is >= 0", Describe(m));
-  EXPECT_EQ("is an object whose property `fancy_name` isn't >= 0",
-            DescribeNegation(m));
-}
-
-// Tests that Property() can explain the match result.
-TEST_P(PropertyTestP, CanExplainMatchResult) {
-  Matcher<const AClass&> m = Property(&AClass::n, Ge(0));
-
-  AClass a;
-  a.set_n(1);
-  EXPECT_EQ("whose given property is 1" + OfType("int"), Explain(m, a));
-
-  m = Property(&AClass::n, GreaterThan(0));
-  EXPECT_EQ(
-      "whose given property is 1" + OfType("int") + ", which is 1 more than 0",
-      Explain(m, a));
-}
-
-TEST_P(PropertyTestP, CanExplainMatchResultWithPropertyName) {
-  Matcher<const AClass&> m = Property("fancy_name", &AClass::n, Ge(0));
-
-  AClass a;
-  a.set_n(1);
-  EXPECT_EQ("whose property `fancy_name` is 1" + OfType("int"), Explain(m, a));
-
-  m = Property("fancy_name", &AClass::n, GreaterThan(0));
-  EXPECT_EQ("whose property `fancy_name` is 1" + OfType("int") +
-                ", which is 1 more than 0",
-            Explain(m, a));
-}
-
-INSTANTIATE_GTEST_MATCHER_TEST_P(PropertyForPointerTest);
-
-// Tests that Property() works when the argument is a pointer to const.
-TEST(PropertyForPointerTest, WorksForPointerToConst) {
-  Matcher<const AClass*> m = Property(&AClass::n, Ge(0));
-
-  AClass a;
-  a.set_n(1);
-  EXPECT_TRUE(m.Matches(&a));
-
-  a.set_n(-1);
-  EXPECT_FALSE(m.Matches(&a));
-}
-
-// Tests that Property() works when the argument is a pointer to non-const.
-TEST(PropertyForPointerTest, WorksForPointerToNonConst) {
-  Matcher<AClass*> m = Property(&AClass::s, StartsWith("hi"));
-
-  AClass a;
-  a.set_s("hill");
-  EXPECT_TRUE(m.Matches(&a));
-
-  a.set_s("hole");
-  EXPECT_FALSE(m.Matches(&a));
-}
-
-// Tests that Property() works when the argument is a reference to a
-// const pointer.
-TEST(PropertyForPointerTest, WorksForReferenceToConstPointer) {
-  Matcher<AClass* const&> m = Property(&AClass::s, StartsWith("hi"));
-
-  AClass a;
-  a.set_s("hill");
-  EXPECT_TRUE(m.Matches(&a));
-
-  a.set_s("hole");
-  EXPECT_FALSE(m.Matches(&a));
-}
-
-// Tests that Property() does not match the NULL pointer.
-TEST(PropertyForPointerTest, WorksForReferenceToNonConstProperty) {
-  Matcher<const AClass*> m = Property(&AClass::x, _);
-  EXPECT_FALSE(m.Matches(nullptr));
-}
-
-// Tests that Property(&Foo::property, ...) works when the argument's
-// type is a sub-type of const Foo*.
-TEST(PropertyForPointerTest, WorksForArgumentOfSubType) {
-  // The matcher expects a DerivedClass, but inside the Property() we
-  // say AClass.
-  Matcher<const DerivedClass*> m = Property(&AClass::n, Ge(0));
-
-  DerivedClass d;
-  d.set_n(1);
-  EXPECT_TRUE(m.Matches(&d));
-
-  d.set_n(-1);
-  EXPECT_FALSE(m.Matches(&d));
-}
-
-// Tests that Property() can describe itself when used to match a pointer.
-TEST(PropertyForPointerTest, CanDescribeSelf) {
-  Matcher<const AClass*> m = Property(&AClass::n, Ge(0));
-
-  EXPECT_EQ("is an object whose given property is >= 0", Describe(m));
-  EXPECT_EQ("is an object whose given property isn't >= 0",
-            DescribeNegation(m));
-}
-
-TEST(PropertyForPointerTest, CanDescribeSelfWithPropertyDescription) {
-  Matcher<const AClass*> m = Property("fancy_name", &AClass::n, Ge(0));
-
-  EXPECT_EQ("is an object whose property `fancy_name` is >= 0", Describe(m));
-  EXPECT_EQ("is an object whose property `fancy_name` isn't >= 0",
-            DescribeNegation(m));
-}
-
-// Tests that Property() can explain the result of matching a pointer.
-TEST_P(PropertyForPointerTestP, CanExplainMatchResult) {
-  Matcher<const AClass*> m = Property(&AClass::n, Ge(0));
-
-  AClass a;
-  a.set_n(1);
-  EXPECT_EQ("", Explain(m, static_cast<const AClass*>(nullptr)));
-  EXPECT_EQ(
-      "which points to an object whose given property is 1" + OfType("int"),
-      Explain(m, &a));
-
-  m = Property(&AClass::n, GreaterThan(0));
-  EXPECT_EQ("which points to an object whose given property is 1" +
-                OfType("int") + ", which is 1 more than 0",
-            Explain(m, &a));
-}
-
-TEST_P(PropertyForPointerTestP, CanExplainMatchResultWithPropertyName) {
-  Matcher<const AClass*> m = Property("fancy_name", &AClass::n, Ge(0));
-
-  AClass a;
-  a.set_n(1);
-  EXPECT_EQ("", Explain(m, static_cast<const AClass*>(nullptr)));
-  EXPECT_EQ("which points to an object whose property `fancy_name` is 1" +
-                OfType("int"),
-            Explain(m, &a));
-
-  m = Property("fancy_name", &AClass::n, GreaterThan(0));
-  EXPECT_EQ("which points to an object whose property `fancy_name` is 1" +
-                OfType("int") + ", which is 1 more than 0",
-            Explain(m, &a));
-}
-
-// Tests ResultOf.
-
-// Tests that ResultOf(f, ...) compiles and works as expected when f is a
-// function pointer.
-std::string IntToStringFunction(int input) {
-  return input == 1 ? "foo" : "bar";
-}
-
-INSTANTIATE_GTEST_MATCHER_TEST_P(ResultOfTest);
-
-TEST(ResultOfTest, WorksForFunctionPointers) {
-  Matcher<int> matcher = ResultOf(&IntToStringFunction, Eq(std::string("foo")));
-
-  EXPECT_TRUE(matcher.Matches(1));
-  EXPECT_FALSE(matcher.Matches(2));
-}
-
-// Tests that ResultOf() can describe itself.
-TEST(ResultOfTest, CanDescribeItself) {
-  Matcher<int> matcher = ResultOf(&IntToStringFunction, StrEq("foo"));
-
-  EXPECT_EQ(
-      "is mapped by the given callable to a value that "
-      "is equal to \"foo\"",
-      Describe(matcher));
-  EXPECT_EQ(
-      "is mapped by the given callable to a value that "
-      "isn't equal to \"foo\"",
-      DescribeNegation(matcher));
-}
-
-// Tests that ResultOf() can describe itself when provided a result description.
-TEST(ResultOfTest, CanDescribeItselfWithResultDescription) {
-  Matcher<int> matcher =
-      ResultOf("string conversion", &IntToStringFunction, StrEq("foo"));
-
-  EXPECT_EQ("whose string conversion is equal to \"foo\"", Describe(matcher));
-  EXPECT_EQ("whose string conversion isn't equal to \"foo\"",
-            DescribeNegation(matcher));
-}
-
-// Tests that ResultOf() can explain the match result.
-int IntFunction(int input) { return input == 42 ? 80 : 90; }
-
-TEST_P(ResultOfTestP, CanExplainMatchResult) {
-  Matcher<int> matcher = ResultOf(&IntFunction, Ge(85));
-  EXPECT_EQ("which is mapped by the given callable to 90" + OfType("int"),
-            Explain(matcher, 36));
-
-  matcher = ResultOf(&IntFunction, GreaterThan(85));
-  EXPECT_EQ("which is mapped by the given callable to 90" + OfType("int") +
-                ", which is 5 more than 85",
-            Explain(matcher, 36));
-}
-
-TEST_P(ResultOfTestP, CanExplainMatchResultWithResultDescription) {
-  Matcher<int> matcher = ResultOf("magic int conversion", &IntFunction, Ge(85));
-  EXPECT_EQ("whose magic int conversion is 90" + OfType("int"),
-            Explain(matcher, 36));
-
-  matcher = ResultOf("magic int conversion", &IntFunction, GreaterThan(85));
-  EXPECT_EQ("whose magic int conversion is 90" + OfType("int") +
-                ", which is 5 more than 85",
-            Explain(matcher, 36));
-}
-
-// Tests that ResultOf(f, ...) compiles and works as expected when f(x)
-// returns a non-reference.
-TEST(ResultOfTest, WorksForNonReferenceResults) {
-  Matcher<int> matcher = ResultOf(&IntFunction, Eq(80));
-
-  EXPECT_TRUE(matcher.Matches(42));
-  EXPECT_FALSE(matcher.Matches(36));
-}
-
-// Tests that ResultOf(f, ...) compiles and works as expected when f(x)
-// returns a reference to non-const.
-double& DoubleFunction(double& input) { return input; }  // NOLINT
-
-Uncopyable& RefUncopyableFunction(Uncopyable& obj) {  // NOLINT
-  return obj;
-}
-
-TEST(ResultOfTest, WorksForReferenceToNonConstResults) {
-  double x = 3.14;
-  double x2 = x;
-  Matcher<double&> matcher = ResultOf(&DoubleFunction, Ref(x));
-
-  EXPECT_TRUE(matcher.Matches(x));
-  EXPECT_FALSE(matcher.Matches(x2));
-
-  // Test that ResultOf works with uncopyable objects
-  Uncopyable obj(0);
-  Uncopyable obj2(0);
-  Matcher<Uncopyable&> matcher2 = ResultOf(&RefUncopyableFunction, Ref(obj));
-
-  EXPECT_TRUE(matcher2.Matches(obj));
-  EXPECT_FALSE(matcher2.Matches(obj2));
-}
-
-// Tests that ResultOf(f, ...) compiles and works as expected when f(x)
-// returns a reference to const.
-const std::string& StringFunction(const std::string& input) { return input; }
-
-TEST(ResultOfTest, WorksForReferenceToConstResults) {
-  std::string s = "foo";
-  std::string s2 = s;
-  Matcher<const std::string&> matcher = ResultOf(&StringFunction, Ref(s));
-
-  EXPECT_TRUE(matcher.Matches(s));
-  EXPECT_FALSE(matcher.Matches(s2));
-}
-
-// Tests that ResultOf(f, m) works when f(x) and m's
-// argument types are compatible but different.
-TEST(ResultOfTest, WorksForCompatibleMatcherTypes) {
-  // IntFunction() returns int but the inner matcher expects a signed char.
-  Matcher<int> matcher = ResultOf(IntFunction, Matcher<signed char>(Ge(85)));
-
-  EXPECT_TRUE(matcher.Matches(36));
-  EXPECT_FALSE(matcher.Matches(42));
-}
-
-// Tests that the program aborts when ResultOf is passed
-// a NULL function pointer.
-TEST(ResultOfDeathTest, DiesOnNullFunctionPointers) {
-  EXPECT_DEATH_IF_SUPPORTED(
-      ResultOf(static_cast<std::string (*)(int dummy)>(nullptr),
-               Eq(std::string("foo"))),
-      "NULL function pointer is passed into ResultOf\\(\\)\\.");
-}
-
-// Tests that ResultOf(f, ...) compiles and works as expected when f is a
-// function reference.
-TEST(ResultOfTest, WorksForFunctionReferences) {
-  Matcher<int> matcher = ResultOf(IntToStringFunction, StrEq("foo"));
-  EXPECT_TRUE(matcher.Matches(1));
-  EXPECT_FALSE(matcher.Matches(2));
-}
-
-// Tests that ResultOf(f, ...) compiles and works as expected when f is a
-// function object.
-struct Functor {
-  std::string operator()(int input) const { return IntToStringFunction(input); }
-};
-
-TEST(ResultOfTest, WorksForFunctors) {
-  Matcher<int> matcher = ResultOf(Functor(), Eq(std::string("foo")));
-
-  EXPECT_TRUE(matcher.Matches(1));
-  EXPECT_FALSE(matcher.Matches(2));
-}
-
-// Tests that ResultOf(f, ...) compiles and works as expected when f is a
-// functor with more than one operator() defined. ResultOf() must work
-// for each defined operator().
-struct PolymorphicFunctor {
-  typedef int result_type;
-  int operator()(int n) { return n; }
-  int operator()(const char* s) { return static_cast<int>(strlen(s)); }
-  std::string operator()(int* p) { return p ? "good ptr" : "null"; }
-};
-
-TEST(ResultOfTest, WorksForPolymorphicFunctors) {
-  Matcher<int> matcher_int = ResultOf(PolymorphicFunctor(), Ge(5));
-
-  EXPECT_TRUE(matcher_int.Matches(10));
-  EXPECT_FALSE(matcher_int.Matches(2));
-
-  Matcher<const char*> matcher_string = ResultOf(PolymorphicFunctor(), Ge(5));
-
-  EXPECT_TRUE(matcher_string.Matches("long string"));
-  EXPECT_FALSE(matcher_string.Matches("shrt"));
-}
-
-TEST(ResultOfTest, WorksForPolymorphicFunctorsIgnoringResultType) {
-  Matcher<int*> matcher = ResultOf(PolymorphicFunctor(), "good ptr");
-
-  int n = 0;
-  EXPECT_TRUE(matcher.Matches(&n));
-  EXPECT_FALSE(matcher.Matches(nullptr));
-}
-
-TEST(ResultOfTest, WorksForLambdas) {
-  Matcher<int> matcher = ResultOf(
-      [](int str_len) {
-        return std::string(static_cast<size_t>(str_len), 'x');
-      },
-      "xxx");
-  EXPECT_TRUE(matcher.Matches(3));
-  EXPECT_FALSE(matcher.Matches(1));
-}
-
-TEST(ResultOfTest, WorksForNonCopyableArguments) {
-  Matcher<std::unique_ptr<int>> matcher = ResultOf(
-      [](const std::unique_ptr<int>& str_len) {
-        return std::string(static_cast<size_t>(*str_len), 'x');
-      },
-      "xxx");
-  EXPECT_TRUE(matcher.Matches(std::unique_ptr<int>(new int(3))));
-  EXPECT_FALSE(matcher.Matches(std::unique_ptr<int>(new int(1))));
-}
-
-const int* ReferencingFunction(const int& n) { return &n; }
-
-struct ReferencingFunctor {
-  typedef const int* result_type;
-  result_type operator()(const int& n) { return &n; }
-};
-
-TEST(ResultOfTest, WorksForReferencingCallables) {
-  const int n = 1;
-  const int n2 = 1;
-  Matcher<const int&> matcher2 = ResultOf(ReferencingFunction, Eq(&n));
-  EXPECT_TRUE(matcher2.Matches(n));
-  EXPECT_FALSE(matcher2.Matches(n2));
-
-  Matcher<const int&> matcher3 = ResultOf(ReferencingFunctor(), Eq(&n));
-  EXPECT_TRUE(matcher3.Matches(n));
-  EXPECT_FALSE(matcher3.Matches(n2));
-}
-
-TEST(SizeIsTest, ImplementsSizeIs) {
-  vector<int> container;
-  EXPECT_THAT(container, SizeIs(0));
-  EXPECT_THAT(container, Not(SizeIs(1)));
-  container.push_back(0);
-  EXPECT_THAT(container, Not(SizeIs(0)));
-  EXPECT_THAT(container, SizeIs(1));
-  container.push_back(0);
-  EXPECT_THAT(container, Not(SizeIs(0)));
-  EXPECT_THAT(container, SizeIs(2));
-}
-
-TEST(SizeIsTest, WorksWithMap) {
-  map<std::string, int> container;
-  EXPECT_THAT(container, SizeIs(0));
-  EXPECT_THAT(container, Not(SizeIs(1)));
-  container.insert(make_pair("foo", 1));
-  EXPECT_THAT(container, Not(SizeIs(0)));
-  EXPECT_THAT(container, SizeIs(1));
-  container.insert(make_pair("bar", 2));
-  EXPECT_THAT(container, Not(SizeIs(0)));
-  EXPECT_THAT(container, SizeIs(2));
-}
-
-TEST(SizeIsTest, WorksWithReferences) {
-  vector<int> container;
-  Matcher<const vector<int>&> m = SizeIs(1);
-  EXPECT_THAT(container, Not(m));
-  container.push_back(0);
-  EXPECT_THAT(container, m);
-}
-
-TEST(SizeIsTest, WorksWithMoveOnly) {
-  ContainerHelper helper;
-  EXPECT_CALL(helper, Call(SizeIs(3)));
-  helper.Call(MakeUniquePtrs({1, 2, 3}));
-}
-
-// SizeIs should work for any type that provides a size() member function.
-// For example, a size_type member type should not need to be provided.
-struct MinimalistCustomType {
-  int size() const { return 1; }
-};
-TEST(SizeIsTest, WorksWithMinimalistCustomType) {
-  MinimalistCustomType container;
-  EXPECT_THAT(container, SizeIs(1));
-  EXPECT_THAT(container, Not(SizeIs(0)));
-}
-
-TEST(SizeIsTest, CanDescribeSelf) {
-  Matcher<vector<int>> m = SizeIs(2);
-  EXPECT_EQ("has a size that is equal to 2", Describe(m));
-  EXPECT_EQ("has a size that isn't equal to 2", DescribeNegation(m));
-}
-
-TEST(SizeIsTest, ExplainsResult) {
-  Matcher<vector<int>> m1 = SizeIs(2);
-  Matcher<vector<int>> m2 = SizeIs(Lt(2u));
-  Matcher<vector<int>> m3 = SizeIs(AnyOf(0, 3));
-  Matcher<vector<int>> m4 = SizeIs(Gt(1u));
-  vector<int> container;
-  EXPECT_EQ("whose size 0 doesn't match", Explain(m1, container));
-  EXPECT_EQ("whose size 0 matches", Explain(m2, container));
-  EXPECT_EQ("whose size 0 matches", Explain(m3, container));
-  EXPECT_EQ("whose size 0 doesn't match", Explain(m4, container));
-  container.push_back(0);
-  container.push_back(0);
-  EXPECT_EQ("whose size 2 matches", Explain(m1, container));
-  EXPECT_EQ("whose size 2 doesn't match", Explain(m2, container));
-  EXPECT_EQ("whose size 2 doesn't match", Explain(m3, container));
-  EXPECT_EQ("whose size 2 matches", Explain(m4, container));
-}
-
-TEST(WhenSortedByTest, WorksForEmptyContainer) {
-  const vector<int> numbers;
-  EXPECT_THAT(numbers, WhenSortedBy(less<int>(), ElementsAre()));
-  EXPECT_THAT(numbers, Not(WhenSortedBy(less<int>(), ElementsAre(1))));
-}
-
-TEST(WhenSortedByTest, WorksForNonEmptyContainer) {
-  vector<unsigned> numbers;
-  numbers.push_back(3);
-  numbers.push_back(1);
-  numbers.push_back(2);
-  numbers.push_back(2);
-  EXPECT_THAT(numbers,
-              WhenSortedBy(greater<unsigned>(), ElementsAre(3, 2, 2, 1)));
-  EXPECT_THAT(numbers,
-              Not(WhenSortedBy(greater<unsigned>(), ElementsAre(1, 2, 2, 3))));
-}
-
-TEST(WhenSortedByTest, WorksForNonVectorContainer) {
-  list<std::string> words;
-  words.push_back("say");
-  words.push_back("hello");
-  words.push_back("world");
-  EXPECT_THAT(words, WhenSortedBy(less<std::string>(),
-                                  ElementsAre("hello", "say", "world")));
-  EXPECT_THAT(words, Not(WhenSortedBy(less<std::string>(),
-                                      ElementsAre("say", "hello", "world"))));
-}
-
-TEST(WhenSortedByTest, WorksForNativeArray) {
-  const int numbers[] = {1, 3, 2, 4};
-  const int sorted_numbers[] = {1, 2, 3, 4};
-  EXPECT_THAT(numbers, WhenSortedBy(less<int>(), ElementsAre(1, 2, 3, 4)));
-  EXPECT_THAT(numbers,
-              WhenSortedBy(less<int>(), ElementsAreArray(sorted_numbers)));
-  EXPECT_THAT(numbers, Not(WhenSortedBy(less<int>(), ElementsAre(1, 3, 2, 4))));
-}
-
-TEST(WhenSortedByTest, CanDescribeSelf) {
-  const Matcher<vector<int>> m = WhenSortedBy(less<int>(), ElementsAre(1, 2));
-  EXPECT_EQ(
-      "(when sorted) has 2 elements where\n"
-      "element #0 is equal to 1,\n"
-      "element #1 is equal to 2",
-      Describe(m));
-  EXPECT_EQ(
-      "(when sorted) doesn't have 2 elements, or\n"
-      "element #0 isn't equal to 1, or\n"
-      "element #1 isn't equal to 2",
-      DescribeNegation(m));
-}
-
-TEST(WhenSortedByTest, ExplainsMatchResult) {
-  const int a[] = {2, 1};
-  EXPECT_EQ("which is { 1, 2 } when sorted, whose element #0 doesn't match",
-            Explain(WhenSortedBy(less<int>(), ElementsAre(2, 3)), a));
-  EXPECT_EQ("which is { 1, 2 } when sorted",
-            Explain(WhenSortedBy(less<int>(), ElementsAre(1, 2)), a));
-}
-
-// WhenSorted() is a simple wrapper on WhenSortedBy().  Hence we don't
-// need to test it as exhaustively as we test the latter.
-
-TEST(WhenSortedTest, WorksForEmptyContainer) {
-  const vector<int> numbers;
-  EXPECT_THAT(numbers, WhenSorted(ElementsAre()));
-  EXPECT_THAT(numbers, Not(WhenSorted(ElementsAre(1))));
-}
-
-TEST(WhenSortedTest, WorksForNonEmptyContainer) {
-  list<std::string> words;
-  words.push_back("3");
-  words.push_back("1");
-  words.push_back("2");
-  words.push_back("2");
-  EXPECT_THAT(words, WhenSorted(ElementsAre("1", "2", "2", "3")));
-  EXPECT_THAT(words, Not(WhenSorted(ElementsAre("3", "1", "2", "2"))));
-}
-
-TEST(WhenSortedTest, WorksForMapTypes) {
-  map<std::string, int> word_counts;
-  word_counts["and"] = 1;
-  word_counts["the"] = 1;
-  word_counts["buffalo"] = 2;
-  EXPECT_THAT(word_counts,
-              WhenSorted(ElementsAre(Pair("and", 1), Pair("buffalo", 2),
-                                     Pair("the", 1))));
-  EXPECT_THAT(word_counts,
-              Not(WhenSorted(ElementsAre(Pair("and", 1), Pair("the", 1),
-                                         Pair("buffalo", 2)))));
-}
-
-TEST(WhenSortedTest, WorksForMultiMapTypes) {
-  multimap<int, int> ifib;
-  ifib.insert(make_pair(8, 6));
-  ifib.insert(make_pair(2, 3));
-  ifib.insert(make_pair(1, 1));
-  ifib.insert(make_pair(3, 4));
-  ifib.insert(make_pair(1, 2));
-  ifib.insert(make_pair(5, 5));
-  EXPECT_THAT(ifib,
-              WhenSorted(ElementsAre(Pair(1, 1), Pair(1, 2), Pair(2, 3),
-                                     Pair(3, 4), Pair(5, 5), Pair(8, 6))));
-  EXPECT_THAT(ifib,
-              Not(WhenSorted(ElementsAre(Pair(8, 6), Pair(2, 3), Pair(1, 1),
-                                         Pair(3, 4), Pair(1, 2), Pair(5, 5)))));
-}
-
-TEST(WhenSortedTest, WorksForPolymorphicMatcher) {
-  std::deque<int> d;
-  d.push_back(2);
-  d.push_back(1);
-  EXPECT_THAT(d, WhenSorted(ElementsAre(1, 2)));
-  EXPECT_THAT(d, Not(WhenSorted(ElementsAre(2, 1))));
-}
-
-TEST(WhenSortedTest, WorksForVectorConstRefMatcher) {
-  std::deque<int> d;
-  d.push_back(2);
-  d.push_back(1);
-  Matcher<const std::vector<int>&> vector_match = ElementsAre(1, 2);
-  EXPECT_THAT(d, WhenSorted(vector_match));
-  Matcher<const std::vector<int>&> not_vector_match = ElementsAre(2, 1);
-  EXPECT_THAT(d, Not(WhenSorted(not_vector_match)));
-}
-
-// Deliberately bare pseudo-container.
-// Offers only begin() and end() accessors, yielding InputIterator.
-template <typename T>
-class Streamlike {
- private:
-  class ConstIter;
-
- public:
-  typedef ConstIter const_iterator;
-  typedef T value_type;
-
-  template <typename InIter>
-  Streamlike(InIter first, InIter last) : remainder_(first, last) {}
-
-  const_iterator begin() const {
-    return const_iterator(this, remainder_.begin());
-  }
-  const_iterator end() const { return const_iterator(this, remainder_.end()); }
-
- private:
-  class ConstIter {
-   public:
-    using iterator_category = std::input_iterator_tag;
-    using value_type = T;
-    using difference_type = ptrdiff_t;
-    using pointer = const value_type*;
-    using reference = const value_type&;
-
-    ConstIter(const Streamlike* s, typename std::list<value_type>::iterator pos)
-        : s_(s), pos_(pos) {}
-
-    const value_type& operator*() const { return *pos_; }
-    const value_type* operator->() const { return &*pos_; }
-    ConstIter& operator++() {
-      s_->remainder_.erase(pos_++);
-      return *this;
-    }
-
-    // *iter++ is required to work (see std::istreambuf_iterator).
-    // (void)iter++ is also required to work.
-    class PostIncrProxy {
-     public:
-      explicit PostIncrProxy(const value_type& value) : value_(value) {}
-      value_type operator*() const { return value_; }
-
-     private:
-      value_type value_;
-    };
-    PostIncrProxy operator++(int) {
-      PostIncrProxy proxy(**this);
-      ++(*this);
-      return proxy;
-    }
-
-    friend bool operator==(const ConstIter& a, const ConstIter& b) {
-      return a.s_ == b.s_ && a.pos_ == b.pos_;
-    }
-    friend bool operator!=(const ConstIter& a, const ConstIter& b) {
-      return !(a == b);
-    }
-
-   private:
-    const Streamlike* s_;
-    typename std::list<value_type>::iterator pos_;
-  };
-
-  friend std::ostream& operator<<(std::ostream& os, const Streamlike& s) {
-    os << "[";
-    typedef typename std::list<value_type>::const_iterator Iter;
-    const char* sep = "";
-    for (Iter it = s.remainder_.begin(); it != s.remainder_.end(); ++it) {
-      os << sep << *it;
-      sep = ",";
-    }
-    os << "]";
-    return os;
-  }
-
-  mutable std::list<value_type> remainder_;  // modified by iteration
-};
-
-TEST(StreamlikeTest, Iteration) {
-  const int a[5] = {2, 1, 4, 5, 3};
-  Streamlike<int> s(a, a + 5);
-  Streamlike<int>::const_iterator it = s.begin();
-  const int* ip = a;
-  while (it != s.end()) {
-    SCOPED_TRACE(ip - a);
-    EXPECT_EQ(*ip++, *it++);
-  }
-}
-
-INSTANTIATE_GTEST_MATCHER_TEST_P(BeginEndDistanceIsTest);
-
-TEST(BeginEndDistanceIsTest, WorksWithForwardList) {
-  std::forward_list<int> container;
-  EXPECT_THAT(container, BeginEndDistanceIs(0));
-  EXPECT_THAT(container, Not(BeginEndDistanceIs(1)));
-  container.push_front(0);
-  EXPECT_THAT(container, Not(BeginEndDistanceIs(0)));
-  EXPECT_THAT(container, BeginEndDistanceIs(1));
-  container.push_front(0);
-  EXPECT_THAT(container, Not(BeginEndDistanceIs(0)));
-  EXPECT_THAT(container, BeginEndDistanceIs(2));
-}
-
-TEST(BeginEndDistanceIsTest, WorksWithNonStdList) {
-  const int a[5] = {1, 2, 3, 4, 5};
-  Streamlike<int> s(a, a + 5);
-  EXPECT_THAT(s, BeginEndDistanceIs(5));
-}
-
-TEST(BeginEndDistanceIsTest, CanDescribeSelf) {
-  Matcher<vector<int>> m = BeginEndDistanceIs(2);
-  EXPECT_EQ("distance between begin() and end() is equal to 2", Describe(m));
-  EXPECT_EQ("distance between begin() and end() isn't equal to 2",
-            DescribeNegation(m));
-}
-
-TEST(BeginEndDistanceIsTest, WorksWithMoveOnly) {
-  ContainerHelper helper;
-  EXPECT_CALL(helper, Call(BeginEndDistanceIs(2)));
-  helper.Call(MakeUniquePtrs({1, 2}));
-}
-
-TEST_P(BeginEndDistanceIsTestP, ExplainsResult) {
-  Matcher<vector<int>> m1 = BeginEndDistanceIs(2);
-  Matcher<vector<int>> m2 = BeginEndDistanceIs(Lt(2));
-  Matcher<vector<int>> m3 = BeginEndDistanceIs(AnyOf(0, 3));
-  Matcher<vector<int>> m4 = BeginEndDistanceIs(GreaterThan(1));
-  vector<int> container;
-  EXPECT_EQ("whose distance between begin() and end() 0 doesn't match",
-            Explain(m1, container));
-  EXPECT_EQ("whose distance between begin() and end() 0 matches",
-            Explain(m2, container));
-  EXPECT_EQ("whose distance between begin() and end() 0 matches",
-            Explain(m3, container));
-  EXPECT_EQ(
-      "whose distance between begin() and end() 0 doesn't match, which is 1 "
-      "less than 1",
-      Explain(m4, container));
-  container.push_back(0);
-  container.push_back(0);
-  EXPECT_EQ("whose distance between begin() and end() 2 matches",
-            Explain(m1, container));
-  EXPECT_EQ("whose distance between begin() and end() 2 doesn't match",
-            Explain(m2, container));
-  EXPECT_EQ("whose distance between begin() and end() 2 doesn't match",
-            Explain(m3, container));
-  EXPECT_EQ(
-      "whose distance between begin() and end() 2 matches, which is 1 more "
-      "than 1",
-      Explain(m4, container));
-}
-
-TEST(WhenSortedTest, WorksForStreamlike) {
-  // Streamlike 'container' provides only minimal iterator support.
-  // Its iterators are tagged with input_iterator_tag.
-  const int a[5] = {2, 1, 4, 5, 3};
-  Streamlike<int> s(std::begin(a), std::end(a));
-  EXPECT_THAT(s, WhenSorted(ElementsAre(1, 2, 3, 4, 5)));
-  EXPECT_THAT(s, Not(WhenSorted(ElementsAre(2, 1, 4, 5, 3))));
-}
-
-TEST(WhenSortedTest, WorksForVectorConstRefMatcherOnStreamlike) {
-  const int a[] = {2, 1, 4, 5, 3};
-  Streamlike<int> s(std::begin(a), std::end(a));
-  Matcher<const std::vector<int>&> vector_match = ElementsAre(1, 2, 3, 4, 5);
-  EXPECT_THAT(s, WhenSorted(vector_match));
-  EXPECT_THAT(s, Not(WhenSorted(ElementsAre(2, 1, 4, 5, 3))));
-}
-
-TEST(IsSupersetOfTest, WorksForNativeArray) {
-  const int subset[] = {1, 4};
-  const int superset[] = {1, 2, 4};
-  const int disjoint[] = {1, 0, 3};
-  EXPECT_THAT(subset, IsSupersetOf(subset));
-  EXPECT_THAT(subset, Not(IsSupersetOf(superset)));
-  EXPECT_THAT(superset, IsSupersetOf(subset));
-  EXPECT_THAT(subset, Not(IsSupersetOf(disjoint)));
-  EXPECT_THAT(disjoint, Not(IsSupersetOf(subset)));
-}
-
-TEST(IsSupersetOfTest, WorksWithDuplicates) {
-  const int not_enough[] = {1, 2};
-  const int enough[] = {1, 1, 2};
-  const int expected[] = {1, 1};
-  EXPECT_THAT(not_enough, Not(IsSupersetOf(expected)));
-  EXPECT_THAT(enough, IsSupersetOf(expected));
-}
-
-TEST(IsSupersetOfTest, WorksForEmpty) {
-  vector<int> numbers;
-  vector<int> expected;
-  EXPECT_THAT(numbers, IsSupersetOf(expected));
-  expected.push_back(1);
-  EXPECT_THAT(numbers, Not(IsSupersetOf(expected)));
-  expected.clear();
-  numbers.push_back(1);
-  numbers.push_back(2);
-  EXPECT_THAT(numbers, IsSupersetOf(expected));
-  expected.push_back(1);
-  EXPECT_THAT(numbers, IsSupersetOf(expected));
-  expected.push_back(2);
-  EXPECT_THAT(numbers, IsSupersetOf(expected));
-  expected.push_back(3);
-  EXPECT_THAT(numbers, Not(IsSupersetOf(expected)));
-}
-
-TEST(IsSupersetOfTest, WorksForStreamlike) {
-  const int a[5] = {1, 2, 3, 4, 5};
-  Streamlike<int> s(std::begin(a), std::end(a));
-
-  vector<int> expected;
-  expected.push_back(1);
-  expected.push_back(2);
-  expected.push_back(5);
-  EXPECT_THAT(s, IsSupersetOf(expected));
-
-  expected.push_back(0);
-  EXPECT_THAT(s, Not(IsSupersetOf(expected)));
-}
-
-TEST(IsSupersetOfTest, TakesStlContainer) {
-  const int actual[] = {3, 1, 2};
-
-  ::std::list<int> expected;
-  expected.push_back(1);
-  expected.push_back(3);
-  EXPECT_THAT(actual, IsSupersetOf(expected));
-
-  expected.push_back(4);
-  EXPECT_THAT(actual, Not(IsSupersetOf(expected)));
-}
-
-TEST(IsSupersetOfTest, Describe) {
-  typedef std::vector<int> IntVec;
-  IntVec expected;
-  expected.push_back(111);
-  expected.push_back(222);
-  expected.push_back(333);
-  EXPECT_THAT(
-      Describe<IntVec>(IsSupersetOf(expected)),
-      Eq("a surjection from elements to requirements exists such that:\n"
-         " - an element is equal to 111\n"
-         " - an element is equal to 222\n"
-         " - an element is equal to 333"));
-}
-
-TEST(IsSupersetOfTest, DescribeNegation) {
-  typedef std::vector<int> IntVec;
-  IntVec expected;
-  expected.push_back(111);
-  expected.push_back(222);
-  expected.push_back(333);
-  EXPECT_THAT(
-      DescribeNegation<IntVec>(IsSupersetOf(expected)),
-      Eq("no surjection from elements to requirements exists such that:\n"
-         " - an element is equal to 111\n"
-         " - an element is equal to 222\n"
-         " - an element is equal to 333"));
-}
-
-TEST(IsSupersetOfTest, MatchAndExplain) {
-  std::vector<int> v;
-  v.push_back(2);
-  v.push_back(3);
-  std::vector<int> expected;
-  expected.push_back(1);
-  expected.push_back(2);
-  StringMatchResultListener listener;
-  ASSERT_FALSE(ExplainMatchResult(IsSupersetOf(expected), v, &listener))
-      << listener.str();
-  EXPECT_THAT(listener.str(),
-              Eq("where the following matchers don't match any elements:\n"
-                 "matcher #0: is equal to 1"));
-
-  v.push_back(1);
-  listener.Clear();
-  ASSERT_TRUE(ExplainMatchResult(IsSupersetOf(expected), v, &listener))
-      << listener.str();
-  EXPECT_THAT(listener.str(), Eq("where:\n"
-                                 " - element #0 is matched by matcher #1,\n"
-                                 " - element #2 is matched by matcher #0"));
-}
-
-TEST(IsSupersetOfTest, WorksForRhsInitializerList) {
-  const int numbers[] = {1, 3, 6, 2, 4, 5};
-  EXPECT_THAT(numbers, IsSupersetOf({1, 2}));
-  EXPECT_THAT(numbers, Not(IsSupersetOf({3, 0})));
-}
-
-TEST(IsSupersetOfTest, WorksWithMoveOnly) {
-  ContainerHelper helper;
-  EXPECT_CALL(helper, Call(IsSupersetOf({Pointee(1)})));
-  helper.Call(MakeUniquePtrs({1, 2}));
-  EXPECT_CALL(helper, Call(Not(IsSupersetOf({Pointee(1), Pointee(2)}))));
-  helper.Call(MakeUniquePtrs({2}));
-}
-
-TEST(IsSubsetOfTest, WorksForNativeArray) {
-  const int subset[] = {1, 4};
-  const int superset[] = {1, 2, 4};
-  const int disjoint[] = {1, 0, 3};
-  EXPECT_THAT(subset, IsSubsetOf(subset));
-  EXPECT_THAT(subset, IsSubsetOf(superset));
-  EXPECT_THAT(superset, Not(IsSubsetOf(subset)));
-  EXPECT_THAT(subset, Not(IsSubsetOf(disjoint)));
-  EXPECT_THAT(disjoint, Not(IsSubsetOf(subset)));
-}
-
-TEST(IsSubsetOfTest, WorksWithDuplicates) {
-  const int not_enough[] = {1, 2};
-  const int enough[] = {1, 1, 2};
-  const int actual[] = {1, 1};
-  EXPECT_THAT(actual, Not(IsSubsetOf(not_enough)));
-  EXPECT_THAT(actual, IsSubsetOf(enough));
-}
-
-TEST(IsSubsetOfTest, WorksForEmpty) {
-  vector<int> numbers;
-  vector<int> expected;
-  EXPECT_THAT(numbers, IsSubsetOf(expected));
-  expected.push_back(1);
-  EXPECT_THAT(numbers, IsSubsetOf(expected));
-  expected.clear();
-  numbers.push_back(1);
-  numbers.push_back(2);
-  EXPECT_THAT(numbers, Not(IsSubsetOf(expected)));
-  expected.push_back(1);
-  EXPECT_THAT(numbers, Not(IsSubsetOf(expected)));
-  expected.push_back(2);
-  EXPECT_THAT(numbers, IsSubsetOf(expected));
-  expected.push_back(3);
-  EXPECT_THAT(numbers, IsSubsetOf(expected));
-}
-
-TEST(IsSubsetOfTest, WorksForStreamlike) {
-  const int a[5] = {1, 2};
-  Streamlike<int> s(std::begin(a), std::end(a));
-
-  vector<int> expected;
-  expected.push_back(1);
-  EXPECT_THAT(s, Not(IsSubsetOf(expected)));
-  expected.push_back(2);
-  expected.push_back(5);
-  EXPECT_THAT(s, IsSubsetOf(expected));
-}
-
-TEST(IsSubsetOfTest, TakesStlContainer) {
-  const int actual[] = {3, 1, 2};
-
-  ::std::list<int> expected;
-  expected.push_back(1);
-  expected.push_back(3);
-  EXPECT_THAT(actual, Not(IsSubsetOf(expected)));
-
-  expected.push_back(2);
-  expected.push_back(4);
-  EXPECT_THAT(actual, IsSubsetOf(expected));
-}
-
-TEST(IsSubsetOfTest, Describe) {
-  typedef std::vector<int> IntVec;
-  IntVec expected;
-  expected.push_back(111);
-  expected.push_back(222);
-  expected.push_back(333);
-
-  EXPECT_THAT(
-      Describe<IntVec>(IsSubsetOf(expected)),
-      Eq("an injection from elements to requirements exists such that:\n"
-         " - an element is equal to 111\n"
-         " - an element is equal to 222\n"
-         " - an element is equal to 333"));
-}
-
-TEST(IsSubsetOfTest, DescribeNegation) {
-  typedef std::vector<int> IntVec;
-  IntVec expected;
-  expected.push_back(111);
-  expected.push_back(222);
-  expected.push_back(333);
-  EXPECT_THAT(
-      DescribeNegation<IntVec>(IsSubsetOf(expected)),
-      Eq("no injection from elements to requirements exists such that:\n"
-         " - an element is equal to 111\n"
-         " - an element is equal to 222\n"
-         " - an element is equal to 333"));
-}
-
-TEST(IsSubsetOfTest, MatchAndExplain) {
-  std::vector<int> v;
-  v.push_back(2);
-  v.push_back(3);
-  std::vector<int> expected;
-  expected.push_back(1);
-  expected.push_back(2);
-  StringMatchResultListener listener;
-  ASSERT_FALSE(ExplainMatchResult(IsSubsetOf(expected), v, &listener))
-      << listener.str();
-  EXPECT_THAT(listener.str(),
-              Eq("where the following elements don't match any matchers:\n"
-                 "element #1: 3"));
-
-  expected.push_back(3);
-  listener.Clear();
-  ASSERT_TRUE(ExplainMatchResult(IsSubsetOf(expected), v, &listener))
-      << listener.str();
-  EXPECT_THAT(listener.str(), Eq("where:\n"
-                                 " - element #0 is matched by matcher #1,\n"
-                                 " - element #1 is matched by matcher #2"));
-}
-
-TEST(IsSubsetOfTest, WorksForRhsInitializerList) {
-  const int numbers[] = {1, 2, 3};
-  EXPECT_THAT(numbers, IsSubsetOf({1, 2, 3, 4}));
-  EXPECT_THAT(numbers, Not(IsSubsetOf({1, 2})));
-}
-
-TEST(IsSubsetOfTest, WorksWithMoveOnly) {
-  ContainerHelper helper;
-  EXPECT_CALL(helper, Call(IsSubsetOf({Pointee(1), Pointee(2)})));
-  helper.Call(MakeUniquePtrs({1}));
-  EXPECT_CALL(helper, Call(Not(IsSubsetOf({Pointee(1)}))));
-  helper.Call(MakeUniquePtrs({2}));
-}
-
-// Tests using ElementsAre() and ElementsAreArray() with stream-like
-// "containers".
-
-TEST(ElemensAreStreamTest, WorksForStreamlike) {
-  const int a[5] = {1, 2, 3, 4, 5};
-  Streamlike<int> s(std::begin(a), std::end(a));
-  EXPECT_THAT(s, ElementsAre(1, 2, 3, 4, 5));
-  EXPECT_THAT(s, Not(ElementsAre(2, 1, 4, 5, 3)));
-}
-
-TEST(ElemensAreArrayStreamTest, WorksForStreamlike) {
-  const int a[5] = {1, 2, 3, 4, 5};
-  Streamlike<int> s(std::begin(a), std::end(a));
-
-  vector<int> expected;
-  expected.push_back(1);
-  expected.push_back(2);
-  expected.push_back(3);
-  expected.push_back(4);
-  expected.push_back(5);
-  EXPECT_THAT(s, ElementsAreArray(expected));
-
-  expected[3] = 0;
-  EXPECT_THAT(s, Not(ElementsAreArray(expected)));
-}
-
-TEST(ElementsAreTest, WorksWithUncopyable) {
-  Uncopyable objs[2];
-  objs[0].set_value(-3);
-  objs[1].set_value(1);
-  EXPECT_THAT(objs, ElementsAre(UncopyableIs(-3), Truly(ValueIsPositive)));
-}
-
-TEST(ElementsAreTest, WorksWithMoveOnly) {
-  ContainerHelper helper;
-  EXPECT_CALL(helper, Call(ElementsAre(Pointee(1), Pointee(2))));
-  helper.Call(MakeUniquePtrs({1, 2}));
-
-  EXPECT_CALL(helper, Call(ElementsAreArray({Pointee(3), Pointee(4)})));
-  helper.Call(MakeUniquePtrs({3, 4}));
-}
-
-TEST(ElementsAreTest, TakesStlContainer) {
-  const int actual[] = {3, 1, 2};
-
-  ::std::list<int> expected;
-  expected.push_back(3);
-  expected.push_back(1);
-  expected.push_back(2);
-  EXPECT_THAT(actual, ElementsAreArray(expected));
-
-  expected.push_back(4);
-  EXPECT_THAT(actual, Not(ElementsAreArray(expected)));
-}
-
-// Tests for UnorderedElementsAreArray()
-
-TEST(UnorderedElementsAreArrayTest, SucceedsWhenExpected) {
-  const int a[] = {0, 1, 2, 3, 4};
-  std::vector<int> s(std::begin(a), std::end(a));
-  do {
-    StringMatchResultListener listener;
-    EXPECT_TRUE(ExplainMatchResult(UnorderedElementsAreArray(a), s, &listener))
-        << listener.str();
-  } while (std::next_permutation(s.begin(), s.end()));
-}
-
-TEST(UnorderedElementsAreArrayTest, VectorBool) {
-  const bool a[] = {0, 1, 0, 1, 1};
-  const bool b[] = {1, 0, 1, 1, 0};
-  std::vector<bool> expected(std::begin(a), std::end(a));
-  std::vector<bool> actual(std::begin(b), std::end(b));
-  StringMatchResultListener listener;
-  EXPECT_TRUE(ExplainMatchResult(UnorderedElementsAreArray(expected), actual,
-                                 &listener))
-      << listener.str();
-}
-
-TEST(UnorderedElementsAreArrayTest, WorksForStreamlike) {
-  // Streamlike 'container' provides only minimal iterator support.
-  // Its iterators are tagged with input_iterator_tag, and it has no
-  // size() or empty() methods.
-  const int a[5] = {2, 1, 4, 5, 3};
-  Streamlike<int> s(std::begin(a), std::end(a));
-
-  ::std::vector<int> expected;
-  expected.push_back(1);
-  expected.push_back(2);
-  expected.push_back(3);
-  expected.push_back(4);
-  expected.push_back(5);
-  EXPECT_THAT(s, UnorderedElementsAreArray(expected));
-
-  expected.push_back(6);
-  EXPECT_THAT(s, Not(UnorderedElementsAreArray(expected)));
-}
-
-TEST(UnorderedElementsAreArrayTest, TakesStlContainer) {
-  const int actual[] = {3, 1, 2};
-
-  ::std::list<int> expected;
-  expected.push_back(1);
-  expected.push_back(2);
-  expected.push_back(3);
-  EXPECT_THAT(actual, UnorderedElementsAreArray(expected));
-
-  expected.push_back(4);
-  EXPECT_THAT(actual, Not(UnorderedElementsAreArray(expected)));
-}
-
-TEST(UnorderedElementsAreArrayTest, TakesInitializerList) {
-  const int a[5] = {2, 1, 4, 5, 3};
-  EXPECT_THAT(a, UnorderedElementsAreArray({1, 2, 3, 4, 5}));
-  EXPECT_THAT(a, Not(UnorderedElementsAreArray({1, 2, 3, 4, 6})));
-}
-
-TEST(UnorderedElementsAreArrayTest, TakesInitializerListOfCStrings) {
-  const std::string a[5] = {"a", "b", "c", "d", "e"};
-  EXPECT_THAT(a, UnorderedElementsAreArray({"a", "b", "c", "d", "e"}));
-  EXPECT_THAT(a, Not(UnorderedElementsAreArray({"a", "b", "c", "d", "ef"})));
-}
-
-TEST(UnorderedElementsAreArrayTest, TakesInitializerListOfSameTypedMatchers) {
-  const int a[5] = {2, 1, 4, 5, 3};
-  EXPECT_THAT(a,
-              UnorderedElementsAreArray({Eq(1), Eq(2), Eq(3), Eq(4), Eq(5)}));
-  EXPECT_THAT(
-      a, Not(UnorderedElementsAreArray({Eq(1), Eq(2), Eq(3), Eq(4), Eq(6)})));
-}
-
-TEST(UnorderedElementsAreArrayTest,
-     TakesInitializerListOfDifferentTypedMatchers) {
-  const int a[5] = {2, 1, 4, 5, 3};
-  // The compiler cannot infer the type of the initializer list if its
-  // elements have different types.  We must explicitly specify the
-  // unified element type in this case.
-  EXPECT_THAT(a, UnorderedElementsAreArray<Matcher<int>>(
-                     {Eq(1), Ne(-2), Ge(3), Le(4), Eq(5)}));
-  EXPECT_THAT(a, Not(UnorderedElementsAreArray<Matcher<int>>(
-                     {Eq(1), Ne(-2), Ge(3), Le(4), Eq(6)})));
-}
-
-TEST(UnorderedElementsAreArrayTest, WorksWithMoveOnly) {
-  ContainerHelper helper;
-  EXPECT_CALL(helper,
-              Call(UnorderedElementsAreArray({Pointee(1), Pointee(2)})));
-  helper.Call(MakeUniquePtrs({2, 1}));
-}
-
-class UnorderedElementsAreTest : public testing::Test {
- protected:
-  typedef std::vector<int> IntVec;
-};
-
-TEST_F(UnorderedElementsAreTest, WorksWithUncopyable) {
-  Uncopyable objs[2];
-  objs[0].set_value(-3);
-  objs[1].set_value(1);
-  EXPECT_THAT(objs,
-              UnorderedElementsAre(Truly(ValueIsPositive), UncopyableIs(-3)));
-}
-
-TEST_F(UnorderedElementsAreTest, SucceedsWhenExpected) {
-  const int a[] = {1, 2, 3};
-  std::vector<int> s(std::begin(a), std::end(a));
-  do {
-    StringMatchResultListener listener;
-    EXPECT_TRUE(ExplainMatchResult(UnorderedElementsAre(1, 2, 3), s, &listener))
-        << listener.str();
-  } while (std::next_permutation(s.begin(), s.end()));
-}
-
-TEST_F(UnorderedElementsAreTest, FailsWhenAnElementMatchesNoMatcher) {
-  const int a[] = {1, 2, 3};
-  std::vector<int> s(std::begin(a), std::end(a));
-  std::vector<Matcher<int>> mv;
-  mv.push_back(1);
-  mv.push_back(2);
-  mv.push_back(2);
-  // The element with value '3' matches nothing: fail fast.
-  StringMatchResultListener listener;
-  EXPECT_FALSE(ExplainMatchResult(UnorderedElementsAreArray(mv), s, &listener))
-      << listener.str();
-}
-
-TEST_F(UnorderedElementsAreTest, WorksForStreamlike) {
-  // Streamlike 'container' provides only minimal iterator support.
-  // Its iterators are tagged with input_iterator_tag, and it has no
-  // size() or empty() methods.
-  const int a[5] = {2, 1, 4, 5, 3};
-  Streamlike<int> s(std::begin(a), std::end(a));
-
-  EXPECT_THAT(s, UnorderedElementsAre(1, 2, 3, 4, 5));
-  EXPECT_THAT(s, Not(UnorderedElementsAre(2, 2, 3, 4, 5)));
-}
-
-TEST_F(UnorderedElementsAreTest, WorksWithMoveOnly) {
-  ContainerHelper helper;
-  EXPECT_CALL(helper, Call(UnorderedElementsAre(Pointee(1), Pointee(2))));
-  helper.Call(MakeUniquePtrs({2, 1}));
-}
-
-// One naive implementation of the matcher runs in O(N!) time, which is too
-// slow for many real-world inputs. This test shows that our matcher can match
-// 100 inputs very quickly (a few milliseconds).  An O(100!) is 10^158
-// iterations and obviously effectively incomputable.
-// [ RUN      ] UnorderedElementsAreTest.Performance
-// [       OK ] UnorderedElementsAreTest.Performance (4 ms)
-TEST_F(UnorderedElementsAreTest, Performance) {
-  std::vector<int> s;
-  std::vector<Matcher<int>> mv;
-  for (int i = 0; i < 100; ++i) {
-    s.push_back(i);
-    mv.push_back(_);
-  }
-  mv[50] = Eq(0);
-  StringMatchResultListener listener;
-  EXPECT_TRUE(ExplainMatchResult(UnorderedElementsAreArray(mv), s, &listener))
-      << listener.str();
-}
-
-// Another variant of 'Performance' with similar expectations.
-// [ RUN      ] UnorderedElementsAreTest.PerformanceHalfStrict
-// [       OK ] UnorderedElementsAreTest.PerformanceHalfStrict (4 ms)
-TEST_F(UnorderedElementsAreTest, PerformanceHalfStrict) {
-  std::vector<int> s;
-  std::vector<Matcher<int>> mv;
-  for (int i = 0; i < 100; ++i) {
-    s.push_back(i);
-    if (i & 1) {
-      mv.push_back(_);
-    } else {
-      mv.push_back(i);
-    }
-  }
-  StringMatchResultListener listener;
-  EXPECT_TRUE(ExplainMatchResult(UnorderedElementsAreArray(mv), s, &listener))
-      << listener.str();
-}
-
-TEST_F(UnorderedElementsAreTest, FailMessageCountWrong) {
-  std::vector<int> v;
-  v.push_back(4);
-  StringMatchResultListener listener;
-  EXPECT_FALSE(ExplainMatchResult(UnorderedElementsAre(1, 2, 3), v, &listener))
-      << listener.str();
-  EXPECT_THAT(listener.str(), Eq("which has 1 element"));
-}
-
-TEST_F(UnorderedElementsAreTest, FailMessageCountWrongZero) {
-  std::vector<int> v;
-  StringMatchResultListener listener;
-  EXPECT_FALSE(ExplainMatchResult(UnorderedElementsAre(1, 2, 3), v, &listener))
-      << listener.str();
-  EXPECT_THAT(listener.str(), Eq(""));
-}
-
-TEST_F(UnorderedElementsAreTest, FailMessageUnmatchedMatchers) {
-  std::vector<int> v;
-  v.push_back(1);
-  v.push_back(1);
-  StringMatchResultListener listener;
-  EXPECT_FALSE(ExplainMatchResult(UnorderedElementsAre(1, 2), v, &listener))
-      << listener.str();
-  EXPECT_THAT(listener.str(),
-              Eq("where the following matchers don't match any elements:\n"
-                 "matcher #1: is equal to 2"));
-}
-
-TEST_F(UnorderedElementsAreTest, FailMessageUnmatchedElements) {
-  std::vector<int> v;
-  v.push_back(1);
-  v.push_back(2);
-  StringMatchResultListener listener;
-  EXPECT_FALSE(ExplainMatchResult(UnorderedElementsAre(1, 1), v, &listener))
-      << listener.str();
-  EXPECT_THAT(listener.str(),
-              Eq("where the following elements don't match any matchers:\n"
-                 "element #1: 2"));
-}
-
-TEST_F(UnorderedElementsAreTest, FailMessageUnmatchedMatcherAndElement) {
-  std::vector<int> v;
-  v.push_back(2);
-  v.push_back(3);
-  StringMatchResultListener listener;
-  EXPECT_FALSE(ExplainMatchResult(UnorderedElementsAre(1, 2), v, &listener))
-      << listener.str();
-  EXPECT_THAT(listener.str(),
-              Eq("where"
-                 " the following matchers don't match any elements:\n"
-                 "matcher #0: is equal to 1\n"
-                 "and"
-                 " where"
-                 " the following elements don't match any matchers:\n"
-                 "element #1: 3"));
-}
-
-// Test helper for formatting element, matcher index pairs in expectations.
-static std::string EMString(int element, int matcher) {
-  stringstream ss;
-  ss << "(element #" << element << ", matcher #" << matcher << ")";
-  return ss.str();
-}
-
-TEST_F(UnorderedElementsAreTest, FailMessageImperfectMatchOnly) {
-  // A situation where all elements and matchers have a match
-  // associated with them, but the max matching is not perfect.
-  std::vector<std::string> v;
-  v.push_back("a");
-  v.push_back("b");
-  v.push_back("c");
-  StringMatchResultListener listener;
-  EXPECT_FALSE(ExplainMatchResult(
-      UnorderedElementsAre("a", "a", AnyOf("b", "c")), v, &listener))
-      << listener.str();
-
-  std::string prefix =
-      "where no permutation of the elements can satisfy all matchers, "
-      "and the closest match is 2 of 3 matchers with the "
-      "pairings:\n";
-
-  // We have to be a bit loose here, because there are 4 valid max matches.
-  EXPECT_THAT(
-      listener.str(),
-      AnyOf(
-          prefix + "{\n  " + EMString(0, 0) + ",\n  " + EMString(1, 2) + "\n}",
-          prefix + "{\n  " + EMString(0, 1) + ",\n  " + EMString(1, 2) + "\n}",
-          prefix + "{\n  " + EMString(0, 0) + ",\n  " + EMString(2, 2) + "\n}",
-          prefix + "{\n  " + EMString(0, 1) + ",\n  " + EMString(2, 2) +
-              "\n}"));
-}
-
-TEST_F(UnorderedElementsAreTest, Describe) {
-  EXPECT_THAT(Describe<IntVec>(UnorderedElementsAre()), Eq("is empty"));
-  EXPECT_THAT(Describe<IntVec>(UnorderedElementsAre(345)),
-              Eq("has 1 element and that element is equal to 345"));
-  EXPECT_THAT(Describe<IntVec>(UnorderedElementsAre(111, 222, 333)),
-              Eq("has 3 elements and there exists some permutation "
-                 "of elements such that:\n"
-                 " - element #0 is equal to 111, and\n"
-                 " - element #1 is equal to 222, and\n"
-                 " - element #2 is equal to 333"));
-}
-
-TEST_F(UnorderedElementsAreTest, DescribeNegation) {
-  EXPECT_THAT(DescribeNegation<IntVec>(UnorderedElementsAre()),
-              Eq("isn't empty"));
-  EXPECT_THAT(
-      DescribeNegation<IntVec>(UnorderedElementsAre(345)),
-      Eq("doesn't have 1 element, or has 1 element that isn't equal to 345"));
-  EXPECT_THAT(DescribeNegation<IntVec>(UnorderedElementsAre(123, 234, 345)),
-              Eq("doesn't have 3 elements, or there exists no permutation "
-                 "of elements such that:\n"
-                 " - element #0 is equal to 123, and\n"
-                 " - element #1 is equal to 234, and\n"
-                 " - element #2 is equal to 345"));
-}
-
-// Tests Each().
-
-INSTANTIATE_GTEST_MATCHER_TEST_P(EachTest);
-
-TEST_P(EachTestP, ExplainsMatchResultCorrectly) {
-  set<int> a;  // empty
-
-  Matcher<set<int>> m = Each(2);
-  EXPECT_EQ("", Explain(m, a));
-
-  Matcher<const int(&)[1]> n = Each(1);  // NOLINT
-
-  const int b[1] = {1};
-  EXPECT_EQ("", Explain(n, b));
-
-  n = Each(3);
-  EXPECT_EQ("whose element #0 doesn't match", Explain(n, b));
-
-  a.insert(1);
-  a.insert(2);
-  a.insert(3);
-  m = Each(GreaterThan(0));
-  EXPECT_EQ("", Explain(m, a));
-
-  m = Each(GreaterThan(10));
-  EXPECT_EQ("whose element #0 doesn't match, which is 9 less than 10",
-            Explain(m, a));
-}
-
-TEST(EachTest, DescribesItselfCorrectly) {
-  Matcher<vector<int>> m = Each(1);
-  EXPECT_EQ("only contains elements that is equal to 1", Describe(m));
-
-  Matcher<vector<int>> m2 = Not(m);
-  EXPECT_EQ("contains some element that isn't equal to 1", Describe(m2));
-}
-
-TEST(EachTest, MatchesVectorWhenAllElementsMatch) {
-  vector<int> some_vector;
-  EXPECT_THAT(some_vector, Each(1));
-  some_vector.push_back(3);
-  EXPECT_THAT(some_vector, Not(Each(1)));
-  EXPECT_THAT(some_vector, Each(3));
-  some_vector.push_back(1);
-  some_vector.push_back(2);
-  EXPECT_THAT(some_vector, Not(Each(3)));
-  EXPECT_THAT(some_vector, Each(Lt(3.5)));
-
-  vector<std::string> another_vector;
-  another_vector.push_back("fee");
-  EXPECT_THAT(another_vector, Each(std::string("fee")));
-  another_vector.push_back("fie");
-  another_vector.push_back("foe");
-  another_vector.push_back("fum");
-  EXPECT_THAT(another_vector, Not(Each(std::string("fee"))));
-}
-
-TEST(EachTest, MatchesMapWhenAllElementsMatch) {
-  map<const char*, int> my_map;
-  const char* bar = "a string";
-  my_map[bar] = 2;
-  EXPECT_THAT(my_map, Each(make_pair(bar, 2)));
-
-  map<std::string, int> another_map;
-  EXPECT_THAT(another_map, Each(make_pair(std::string("fee"), 1)));
-  another_map["fee"] = 1;
-  EXPECT_THAT(another_map, Each(make_pair(std::string("fee"), 1)));
-  another_map["fie"] = 2;
-  another_map["foe"] = 3;
-  another_map["fum"] = 4;
-  EXPECT_THAT(another_map, Not(Each(make_pair(std::string("fee"), 1))));
-  EXPECT_THAT(another_map, Not(Each(make_pair(std::string("fum"), 1))));
-  EXPECT_THAT(another_map, Each(Pair(_, Gt(0))));
-}
-
-TEST(EachTest, AcceptsMatcher) {
-  const int a[] = {1, 2, 3};
-  EXPECT_THAT(a, Each(Gt(0)));
-  EXPECT_THAT(a, Not(Each(Gt(1))));
-}
-
-TEST(EachTest, WorksForNativeArrayAsTuple) {
-  const int a[] = {1, 2};
-  const int* const pointer = a;
-  EXPECT_THAT(std::make_tuple(pointer, 2), Each(Gt(0)));
-  EXPECT_THAT(std::make_tuple(pointer, 2), Not(Each(Gt(1))));
-}
-
-TEST(EachTest, WorksWithMoveOnly) {
-  ContainerHelper helper;
-  EXPECT_CALL(helper, Call(Each(Pointee(Gt(0)))));
-  helper.Call(MakeUniquePtrs({1, 2}));
-}
-
-// For testing Pointwise().
-class IsHalfOfMatcher {
- public:
-  template <typename T1, typename T2>
-  bool MatchAndExplain(const std::tuple<T1, T2>& a_pair,
-                       MatchResultListener* listener) const {
-    if (std::get<0>(a_pair) == std::get<1>(a_pair) / 2) {
-      *listener << "where the second is " << std::get<1>(a_pair);
-      return true;
-    } else {
-      *listener << "where the second/2 is " << std::get<1>(a_pair) / 2;
-      return false;
-    }
-  }
-
-  void DescribeTo(ostream* os) const {
-    *os << "are a pair where the first is half of the second";
-  }
-
-  void DescribeNegationTo(ostream* os) const {
-    *os << "are a pair where the first isn't half of the second";
-  }
-};
-
-PolymorphicMatcher<IsHalfOfMatcher> IsHalfOf() {
-  return MakePolymorphicMatcher(IsHalfOfMatcher());
-}
-
-TEST(PointwiseTest, DescribesSelf) {
-  vector<int> rhs;
-  rhs.push_back(1);
-  rhs.push_back(2);
-  rhs.push_back(3);
-  const Matcher<const vector<int>&> m = Pointwise(IsHalfOf(), rhs);
-  EXPECT_EQ(
-      "contains 3 values, where each value and its corresponding value "
-      "in { 1, 2, 3 } are a pair where the first is half of the second",
-      Describe(m));
-  EXPECT_EQ(
-      "doesn't contain exactly 3 values, or contains a value x at some "
-      "index i where x and the i-th value of { 1, 2, 3 } are a pair "
-      "where the first isn't half of the second",
-      DescribeNegation(m));
-}
-
-TEST(PointwiseTest, MakesCopyOfRhs) {
-  list<signed char> rhs;
-  rhs.push_back(2);
-  rhs.push_back(4);
-
-  int lhs[] = {1, 2};
-  const Matcher<const int(&)[2]> m = Pointwise(IsHalfOf(), rhs);
-  EXPECT_THAT(lhs, m);
-
-  // Changing rhs now shouldn't affect m, which made a copy of rhs.
-  rhs.push_back(6);
-  EXPECT_THAT(lhs, m);
-}
-
-TEST(PointwiseTest, WorksForLhsNativeArray) {
-  const int lhs[] = {1, 2, 3};
-  vector<int> rhs;
-  rhs.push_back(2);
-  rhs.push_back(4);
-  rhs.push_back(6);
-  EXPECT_THAT(lhs, Pointwise(Lt(), rhs));
-  EXPECT_THAT(lhs, Not(Pointwise(Gt(), rhs)));
-}
-
-TEST(PointwiseTest, WorksForRhsNativeArray) {
-  const int rhs[] = {1, 2, 3};
-  vector<int> lhs;
-  lhs.push_back(2);
-  lhs.push_back(4);
-  lhs.push_back(6);
-  EXPECT_THAT(lhs, Pointwise(Gt(), rhs));
-  EXPECT_THAT(lhs, Not(Pointwise(Lt(), rhs)));
-}
-
-// Test is effective only with sanitizers.
-TEST(PointwiseTest, WorksForVectorOfBool) {
-  vector<bool> rhs(3, false);
-  rhs[1] = true;
-  vector<bool> lhs = rhs;
-  EXPECT_THAT(lhs, Pointwise(Eq(), rhs));
-  rhs[0] = true;
-  EXPECT_THAT(lhs, Not(Pointwise(Eq(), rhs)));
-}
-
-TEST(PointwiseTest, WorksForRhsInitializerList) {
-  const vector<int> lhs{2, 4, 6};
-  EXPECT_THAT(lhs, Pointwise(Gt(), {1, 2, 3}));
-  EXPECT_THAT(lhs, Not(Pointwise(Lt(), {3, 3, 7})));
-}
-
-TEST(PointwiseTest, RejectsWrongSize) {
-  const double lhs[2] = {1, 2};
-  const int rhs[1] = {0};
-  EXPECT_THAT(lhs, Not(Pointwise(Gt(), rhs)));
-  EXPECT_EQ("which contains 2 values", Explain(Pointwise(Gt(), rhs), lhs));
-
-  const int rhs2[3] = {0, 1, 2};
-  EXPECT_THAT(lhs, Not(Pointwise(Gt(), rhs2)));
-}
-
-TEST(PointwiseTest, RejectsWrongContent) {
-  const double lhs[3] = {1, 2, 3};
-  const int rhs[3] = {2, 6, 4};
-  EXPECT_THAT(lhs, Not(Pointwise(IsHalfOf(), rhs)));
-  EXPECT_EQ(
-      "where the value pair (2, 6) at index #1 don't match, "
-      "where the second/2 is 3",
-      Explain(Pointwise(IsHalfOf(), rhs), lhs));
-}
-
-TEST(PointwiseTest, AcceptsCorrectContent) {
-  const double lhs[3] = {1, 2, 3};
-  const int rhs[3] = {2, 4, 6};
-  EXPECT_THAT(lhs, Pointwise(IsHalfOf(), rhs));
-  EXPECT_EQ("", Explain(Pointwise(IsHalfOf(), rhs), lhs));
-}
-
-TEST(PointwiseTest, AllowsMonomorphicInnerMatcher) {
-  const double lhs[3] = {1, 2, 3};
-  const int rhs[3] = {2, 4, 6};
-  const Matcher<std::tuple<const double&, const int&>> m1 = IsHalfOf();
-  EXPECT_THAT(lhs, Pointwise(m1, rhs));
-  EXPECT_EQ("", Explain(Pointwise(m1, rhs), lhs));
-
-  // This type works as a std::tuple<const double&, const int&> can be
-  // implicitly cast to std::tuple<double, int>.
-  const Matcher<std::tuple<double, int>> m2 = IsHalfOf();
-  EXPECT_THAT(lhs, Pointwise(m2, rhs));
-  EXPECT_EQ("", Explain(Pointwise(m2, rhs), lhs));
-}
-
-MATCHER(PointeeEquals, "Points to an equal value") {
-  return ExplainMatchResult(::testing::Pointee(::testing::get<1>(arg)),
-                            ::testing::get<0>(arg), result_listener);
-}
-
-TEST(PointwiseTest, WorksWithMoveOnly) {
-  ContainerHelper helper;
-  EXPECT_CALL(helper, Call(Pointwise(PointeeEquals(), std::vector<int>{1, 2})));
-  helper.Call(MakeUniquePtrs({1, 2}));
-}
-
-TEST(UnorderedPointwiseTest, DescribesSelf) {
-  vector<int> rhs;
-  rhs.push_back(1);
-  rhs.push_back(2);
-  rhs.push_back(3);
-  const Matcher<const vector<int>&> m = UnorderedPointwise(IsHalfOf(), rhs);
-  EXPECT_EQ(
-      "has 3 elements and there exists some permutation of elements such "
-      "that:\n"
-      " - element #0 and 1 are a pair where the first is half of the second, "
-      "and\n"
-      " - element #1 and 2 are a pair where the first is half of the second, "
-      "and\n"
-      " - element #2 and 3 are a pair where the first is half of the second",
-      Describe(m));
-  EXPECT_EQ(
-      "doesn't have 3 elements, or there exists no permutation of elements "
-      "such that:\n"
-      " - element #0 and 1 are a pair where the first is half of the second, "
-      "and\n"
-      " - element #1 and 2 are a pair where the first is half of the second, "
-      "and\n"
-      " - element #2 and 3 are a pair where the first is half of the second",
-      DescribeNegation(m));
-}
-
-TEST(UnorderedPointwiseTest, MakesCopyOfRhs) {
-  list<signed char> rhs;
-  rhs.push_back(2);
-  rhs.push_back(4);
-
-  int lhs[] = {2, 1};
-  const Matcher<const int(&)[2]> m = UnorderedPointwise(IsHalfOf(), rhs);
-  EXPECT_THAT(lhs, m);
-
-  // Changing rhs now shouldn't affect m, which made a copy of rhs.
-  rhs.push_back(6);
-  EXPECT_THAT(lhs, m);
-}
-
-TEST(UnorderedPointwiseTest, WorksForLhsNativeArray) {
-  const int lhs[] = {1, 2, 3};
-  vector<int> rhs;
-  rhs.push_back(4);
-  rhs.push_back(6);
-  rhs.push_back(2);
-  EXPECT_THAT(lhs, UnorderedPointwise(Lt(), rhs));
-  EXPECT_THAT(lhs, Not(UnorderedPointwise(Gt(), rhs)));
-}
-
-TEST(UnorderedPointwiseTest, WorksForRhsNativeArray) {
-  const int rhs[] = {1, 2, 3};
-  vector<int> lhs;
-  lhs.push_back(4);
-  lhs.push_back(2);
-  lhs.push_back(6);
-  EXPECT_THAT(lhs, UnorderedPointwise(Gt(), rhs));
-  EXPECT_THAT(lhs, Not(UnorderedPointwise(Lt(), rhs)));
-}
-
-TEST(UnorderedPointwiseTest, WorksForRhsInitializerList) {
-  const vector<int> lhs{2, 4, 6};
-  EXPECT_THAT(lhs, UnorderedPointwise(Gt(), {5, 1, 3}));
-  EXPECT_THAT(lhs, Not(UnorderedPointwise(Lt(), {1, 1, 7})));
-}
-
-TEST(UnorderedPointwiseTest, RejectsWrongSize) {
-  const double lhs[2] = {1, 2};
-  const int rhs[1] = {0};
-  EXPECT_THAT(lhs, Not(UnorderedPointwise(Gt(), rhs)));
-  EXPECT_EQ("which has 2 elements",
-            Explain(UnorderedPointwise(Gt(), rhs), lhs));
-
-  const int rhs2[3] = {0, 1, 2};
-  EXPECT_THAT(lhs, Not(UnorderedPointwise(Gt(), rhs2)));
-}
-
-TEST(UnorderedPointwiseTest, RejectsWrongContent) {
-  const double lhs[3] = {1, 2, 3};
-  const int rhs[3] = {2, 6, 6};
-  EXPECT_THAT(lhs, Not(UnorderedPointwise(IsHalfOf(), rhs)));
-  EXPECT_EQ(
-      "where the following elements don't match any matchers:\n"
-      "element #1: 2",
-      Explain(UnorderedPointwise(IsHalfOf(), rhs), lhs));
-}
-
-TEST(UnorderedPointwiseTest, AcceptsCorrectContentInSameOrder) {
-  const double lhs[3] = {1, 2, 3};
-  const int rhs[3] = {2, 4, 6};
-  EXPECT_THAT(lhs, UnorderedPointwise(IsHalfOf(), rhs));
-}
-
-TEST(UnorderedPointwiseTest, AcceptsCorrectContentInDifferentOrder) {
-  const double lhs[3] = {1, 2, 3};
-  const int rhs[3] = {6, 4, 2};
-  EXPECT_THAT(lhs, UnorderedPointwise(IsHalfOf(), rhs));
-}
-
-TEST(UnorderedPointwiseTest, AllowsMonomorphicInnerMatcher) {
-  const double lhs[3] = {1, 2, 3};
-  const int rhs[3] = {4, 6, 2};
-  const Matcher<std::tuple<const double&, const int&>> m1 = IsHalfOf();
-  EXPECT_THAT(lhs, UnorderedPointwise(m1, rhs));
-
-  // This type works as a std::tuple<const double&, const int&> can be
-  // implicitly cast to std::tuple<double, int>.
-  const Matcher<std::tuple<double, int>> m2 = IsHalfOf();
-  EXPECT_THAT(lhs, UnorderedPointwise(m2, rhs));
-}
-
-TEST(UnorderedPointwiseTest, WorksWithMoveOnly) {
-  ContainerHelper helper;
-  EXPECT_CALL(helper, Call(UnorderedPointwise(PointeeEquals(),
-                                              std::vector<int>{1, 2})));
-  helper.Call(MakeUniquePtrs({2, 1}));
-}
-
-TEST(PointeeTest, WorksOnMoveOnlyType) {
-  std::unique_ptr<int> p(new int(3));
-  EXPECT_THAT(p, Pointee(Eq(3)));
-  EXPECT_THAT(p, Not(Pointee(Eq(2))));
-}
-
-class PredicateFormatterFromMatcherTest : public ::testing::Test {
- protected:
-  enum Behavior { kInitialSuccess, kAlwaysFail, kFlaky };
-
-  // A matcher that can return different results when used multiple times on the
-  // same input. No real matcher should do this; but this lets us test that we
-  // detect such behavior and fail appropriately.
-  class MockMatcher : public MatcherInterface<Behavior> {
-   public:
-    bool MatchAndExplain(Behavior behavior,
-                         MatchResultListener* listener) const override {
-      *listener << "[MatchAndExplain]";
-      switch (behavior) {
-        case kInitialSuccess:
-          // The first call to MatchAndExplain should use a "not interested"
-          // listener; so this is expected to return |true|. There should be no
-          // subsequent calls.
-          return !listener->IsInterested();
-
-        case kAlwaysFail:
-          return false;
-
-        case kFlaky:
-          // The first call to MatchAndExplain should use a "not interested"
-          // listener; so this will return |false|. Subsequent calls should have
-          // an "interested" listener; so this will return |true|, thus
-          // simulating a flaky matcher.
-          return listener->IsInterested();
-      }
-
-      GTEST_LOG_(FATAL) << "This should never be reached";
-      return false;
-    }
-
-    void DescribeTo(ostream* os) const override { *os << "[DescribeTo]"; }
-
-    void DescribeNegationTo(ostream* os) const override {
-      *os << "[DescribeNegationTo]";
-    }
-  };
-
-  AssertionResult RunPredicateFormatter(Behavior behavior) {
-    auto matcher = MakeMatcher(new MockMatcher);
-    PredicateFormatterFromMatcher<Matcher<Behavior>> predicate_formatter(
-        matcher);
-    return predicate_formatter("dummy-name", behavior);
-  }
-};
-
-TEST_F(PredicateFormatterFromMatcherTest, ShortCircuitOnSuccess) {
-  AssertionResult result = RunPredicateFormatter(kInitialSuccess);
-  EXPECT_TRUE(result);  // Implicit cast to bool.
-  std::string expect;
-  EXPECT_EQ(expect, result.message());
-}
-
-TEST_F(PredicateFormatterFromMatcherTest, NoShortCircuitOnFailure) {
-  AssertionResult result = RunPredicateFormatter(kAlwaysFail);
-  EXPECT_FALSE(result);  // Implicit cast to bool.
-  std::string expect =
-      "Value of: dummy-name\nExpected: [DescribeTo]\n"
-      "  Actual: 1" +
-      OfType(internal::GetTypeName<Behavior>()) + ", [MatchAndExplain]";
-  EXPECT_EQ(expect, result.message());
-}
-
-TEST_F(PredicateFormatterFromMatcherTest, DetectsFlakyShortCircuit) {
-  AssertionResult result = RunPredicateFormatter(kFlaky);
-  EXPECT_FALSE(result);  // Implicit cast to bool.
-  std::string expect =
-      "Value of: dummy-name\nExpected: [DescribeTo]\n"
-      "  The matcher failed on the initial attempt; but passed when rerun to "
-      "generate the explanation.\n"
-      "  Actual: 2" +
-      OfType(internal::GetTypeName<Behavior>()) + ", [MatchAndExplain]";
-  EXPECT_EQ(expect, result.message());
-}
-
-// Tests for ElementsAre().
-
-TEST(ElementsAreTest, CanDescribeExpectingNoElement) {
-  Matcher<const vector<int>&> m = ElementsAre();
-  EXPECT_EQ("is empty", Describe(m));
-}
-
-TEST(ElementsAreTest, CanDescribeExpectingOneElement) {
-  Matcher<vector<int>> m = ElementsAre(Gt(5));
-  EXPECT_EQ("has 1 element that is > 5", Describe(m));
-}
-
-TEST(ElementsAreTest, CanDescribeExpectingManyElements) {
-  Matcher<list<std::string>> m = ElementsAre(StrEq("one"), "two");
-  EXPECT_EQ(
-      "has 2 elements where\n"
-      "element #0 is equal to \"one\",\n"
-      "element #1 is equal to \"two\"",
-      Describe(m));
-}
-
-TEST(ElementsAreTest, CanDescribeNegationOfExpectingNoElement) {
-  Matcher<vector<int>> m = ElementsAre();
-  EXPECT_EQ("isn't empty", DescribeNegation(m));
-}
-
-TEST(ElementsAreTest, CanDescribeNegationOfExpectingOneElement) {
-  Matcher<const list<int>&> m = ElementsAre(Gt(5));
-  EXPECT_EQ(
-      "doesn't have 1 element, or\n"
-      "element #0 isn't > 5",
-      DescribeNegation(m));
-}
-
-TEST(ElementsAreTest, CanDescribeNegationOfExpectingManyElements) {
-  Matcher<const list<std::string>&> m = ElementsAre("one", "two");
-  EXPECT_EQ(
-      "doesn't have 2 elements, or\n"
-      "element #0 isn't equal to \"one\", or\n"
-      "element #1 isn't equal to \"two\"",
-      DescribeNegation(m));
-}
-
-TEST(ElementsAreTest, DoesNotExplainTrivialMatch) {
-  Matcher<const list<int>&> m = ElementsAre(1, Ne(2));
-
-  list<int> test_list;
-  test_list.push_back(1);
-  test_list.push_back(3);
-  EXPECT_EQ("", Explain(m, test_list));  // No need to explain anything.
-}
-
-TEST_P(ElementsAreTestP, ExplainsNonTrivialMatch) {
-  Matcher<const vector<int>&> m =
-      ElementsAre(GreaterThan(1), 0, GreaterThan(2));
-
-  const int a[] = {10, 0, 100};
-  vector<int> test_vector(std::begin(a), std::end(a));
-  EXPECT_EQ(
-      "whose element #0 matches, which is 9 more than 1,\n"
-      "and whose element #2 matches, which is 98 more than 2",
-      Explain(m, test_vector));
-}
-
-TEST(ElementsAreTest, CanExplainMismatchWrongSize) {
-  Matcher<const list<int>&> m = ElementsAre(1, 3);
-
-  list<int> test_list;
-  // No need to explain when the container is empty.
-  EXPECT_EQ("", Explain(m, test_list));
-
-  test_list.push_back(1);
-  EXPECT_EQ("which has 1 element", Explain(m, test_list));
-}
-
-TEST_P(ElementsAreTestP, CanExplainMismatchRightSize) {
-  Matcher<const vector<int>&> m = ElementsAre(1, GreaterThan(5));
-
-  vector<int> v;
-  v.push_back(2);
-  v.push_back(1);
-  EXPECT_EQ("whose element #0 doesn't match", Explain(m, v));
-
-  v[0] = 1;
-  EXPECT_EQ("whose element #1 doesn't match, which is 4 less than 5",
-            Explain(m, v));
-}
-
-TEST(ElementsAreTest, MatchesOneElementVector) {
-  vector<std::string> test_vector;
-  test_vector.push_back("test string");
-
-  EXPECT_THAT(test_vector, ElementsAre(StrEq("test string")));
-}
-
-TEST(ElementsAreTest, MatchesOneElementList) {
-  list<std::string> test_list;
-  test_list.push_back("test string");
-
-  EXPECT_THAT(test_list, ElementsAre("test string"));
-}
-
-TEST(ElementsAreTest, MatchesThreeElementVector) {
-  vector<std::string> test_vector;
-  test_vector.push_back("one");
-  test_vector.push_back("two");
-  test_vector.push_back("three");
-
-  EXPECT_THAT(test_vector, ElementsAre("one", StrEq("two"), _));
-}
-
-TEST(ElementsAreTest, MatchesOneElementEqMatcher) {
-  vector<int> test_vector;
-  test_vector.push_back(4);
-
-  EXPECT_THAT(test_vector, ElementsAre(Eq(4)));
-}
-
-TEST(ElementsAreTest, MatchesOneElementAnyMatcher) {
-  vector<int> test_vector;
-  test_vector.push_back(4);
-
-  EXPECT_THAT(test_vector, ElementsAre(_));
-}
-
-TEST(ElementsAreTest, MatchesOneElementValue) {
-  vector<int> test_vector;
-  test_vector.push_back(4);
-
-  EXPECT_THAT(test_vector, ElementsAre(4));
-}
-
-TEST(ElementsAreTest, MatchesThreeElementsMixedMatchers) {
-  vector<int> test_vector;
-  test_vector.push_back(1);
-  test_vector.push_back(2);
-  test_vector.push_back(3);
-
-  EXPECT_THAT(test_vector, ElementsAre(1, Eq(2), _));
-}
-
-TEST(ElementsAreTest, MatchesTenElementVector) {
-  const int a[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
-  vector<int> test_vector(std::begin(a), std::end(a));
-
-  EXPECT_THAT(test_vector,
-              // The element list can contain values and/or matchers
-              // of different types.
-              ElementsAre(0, Ge(0), _, 3, 4, Ne(2), Eq(6), 7, 8, _));
-}
-
-TEST(ElementsAreTest, DoesNotMatchWrongSize) {
-  vector<std::string> test_vector;
-  test_vector.push_back("test string");
-  test_vector.push_back("test string");
-
-  Matcher<vector<std::string>> m = ElementsAre(StrEq("test string"));
-  EXPECT_FALSE(m.Matches(test_vector));
-}
-
-TEST(ElementsAreTest, DoesNotMatchWrongValue) {
-  vector<std::string> test_vector;
-  test_vector.push_back("other string");
-
-  Matcher<vector<std::string>> m = ElementsAre(StrEq("test string"));
-  EXPECT_FALSE(m.Matches(test_vector));
-}
-
-TEST(ElementsAreTest, DoesNotMatchWrongOrder) {
-  vector<std::string> test_vector;
-  test_vector.push_back("one");
-  test_vector.push_back("three");
-  test_vector.push_back("two");
-
-  Matcher<vector<std::string>> m =
-      ElementsAre(StrEq("one"), StrEq("two"), StrEq("three"));
-  EXPECT_FALSE(m.Matches(test_vector));
-}
-
-TEST(ElementsAreTest, WorksForNestedContainer) {
-  constexpr std::array<const char*, 2> strings = {{"Hi", "world"}};
-
-  vector<list<char>> nested;
-  for (const auto& s : strings) {
-    nested.emplace_back(s, s + strlen(s));
-  }
-
-  EXPECT_THAT(nested, ElementsAre(ElementsAre('H', Ne('e')),
-                                  ElementsAre('w', 'o', _, _, 'd')));
-  EXPECT_THAT(nested, Not(ElementsAre(ElementsAre('H', 'e'),
-                                      ElementsAre('w', 'o', _, _, 'd'))));
-}
-
-TEST(ElementsAreTest, WorksWithByRefElementMatchers) {
-  int a[] = {0, 1, 2};
-  vector<int> v(std::begin(a), std::end(a));
-
-  EXPECT_THAT(v, ElementsAre(Ref(v[0]), Ref(v[1]), Ref(v[2])));
-  EXPECT_THAT(v, Not(ElementsAre(Ref(v[0]), Ref(v[1]), Ref(a[2]))));
-}
-
-TEST(ElementsAreTest, WorksWithContainerPointerUsingPointee) {
-  int a[] = {0, 1, 2};
-  vector<int> v(std::begin(a), std::end(a));
-
-  EXPECT_THAT(&v, Pointee(ElementsAre(0, 1, _)));
-  EXPECT_THAT(&v, Not(Pointee(ElementsAre(0, _, 3))));
-}
-
-TEST(ElementsAreTest, WorksWithNativeArrayPassedByReference) {
-  int array[] = {0, 1, 2};
-  EXPECT_THAT(array, ElementsAre(0, 1, _));
-  EXPECT_THAT(array, Not(ElementsAre(1, _, _)));
-  EXPECT_THAT(array, Not(ElementsAre(0, _)));
-}
-
-class NativeArrayPassedAsPointerAndSize {
- public:
-  NativeArrayPassedAsPointerAndSize() {}
-
-  MOCK_METHOD(void, Helper, (int* array, int size));
-
- private:
-  NativeArrayPassedAsPointerAndSize(const NativeArrayPassedAsPointerAndSize&) =
-      delete;
-  NativeArrayPassedAsPointerAndSize& operator=(
-      const NativeArrayPassedAsPointerAndSize&) = delete;
-};
-
-TEST(ElementsAreTest, WorksWithNativeArrayPassedAsPointerAndSize) {
-  int array[] = {0, 1};
-  ::std::tuple<int*, size_t> array_as_tuple(array, 2);
-  EXPECT_THAT(array_as_tuple, ElementsAre(0, 1));
-  EXPECT_THAT(array_as_tuple, Not(ElementsAre(0)));
-
-  NativeArrayPassedAsPointerAndSize helper;
-  EXPECT_CALL(helper, Helper(_, _)).With(ElementsAre(0, 1));
-  helper.Helper(array, 2);
-}
-
-TEST(ElementsAreTest, WorksWithTwoDimensionalNativeArray) {
-  const char a2[][3] = {"hi", "lo"};
-  EXPECT_THAT(a2, ElementsAre(ElementsAre('h', 'i', '\0'),
-                              ElementsAre('l', 'o', '\0')));
-  EXPECT_THAT(a2, ElementsAre(StrEq("hi"), StrEq("lo")));
-  EXPECT_THAT(a2, ElementsAre(Not(ElementsAre('h', 'o', '\0')),
-                              ElementsAre('l', 'o', '\0')));
-}
-
-TEST(ElementsAreTest, AcceptsStringLiteral) {
-  std::string array[] = {"hi", "one", "two"};
-  EXPECT_THAT(array, ElementsAre("hi", "one", "two"));
-  EXPECT_THAT(array, Not(ElementsAre("hi", "one", "too")));
-}
-
-// Declared here with the size unknown.  Defined AFTER the following test.
-extern const char kHi[];
-
-TEST(ElementsAreTest, AcceptsArrayWithUnknownSize) {
-  // The size of kHi is not known in this test, but ElementsAre() should
-  // still accept it.
-
-  std::string array1[] = {"hi"};
-  EXPECT_THAT(array1, ElementsAre(kHi));
-
-  std::string array2[] = {"ho"};
-  EXPECT_THAT(array2, Not(ElementsAre(kHi)));
-}
-
-const char kHi[] = "hi";
-
-TEST(ElementsAreTest, MakesCopyOfArguments) {
-  int x = 1;
-  int y = 2;
-  // This should make a copy of x and y.
-  ::testing::internal::ElementsAreMatcher<std::tuple<int, int>>
-      polymorphic_matcher = ElementsAre(x, y);
-  // Changing x and y now shouldn't affect the meaning of the above matcher.
-  x = y = 0;
-  const int array1[] = {1, 2};
-  EXPECT_THAT(array1, polymorphic_matcher);
-  const int array2[] = {0, 0};
-  EXPECT_THAT(array2, Not(polymorphic_matcher));
-}
-
-// Tests for ElementsAreArray().  Since ElementsAreArray() shares most
-// of the implementation with ElementsAre(), we don't test it as
-// thoroughly here.
-
-TEST(ElementsAreArrayTest, CanBeCreatedWithValueArray) {
-  const int a[] = {1, 2, 3};
-
-  vector<int> test_vector(std::begin(a), std::end(a));
-  EXPECT_THAT(test_vector, ElementsAreArray(a));
-
-  test_vector[2] = 0;
-  EXPECT_THAT(test_vector, Not(ElementsAreArray(a)));
-}
-
-TEST(ElementsAreArrayTest, CanBeCreatedWithArraySize) {
-  std::array<const char*, 3> a = {{"one", "two", "three"}};
-
-  vector<std::string> test_vector(std::begin(a), std::end(a));
-  EXPECT_THAT(test_vector, ElementsAreArray(a.data(), a.size()));
-
-  const char** p = a.data();
-  test_vector[0] = "1";
-  EXPECT_THAT(test_vector, Not(ElementsAreArray(p, a.size())));
-}
-
-TEST(ElementsAreArrayTest, CanBeCreatedWithoutArraySize) {
-  const char* a[] = {"one", "two", "three"};
-
-  vector<std::string> test_vector(std::begin(a), std::end(a));
-  EXPECT_THAT(test_vector, ElementsAreArray(a));
-
-  test_vector[0] = "1";
-  EXPECT_THAT(test_vector, Not(ElementsAreArray(a)));
-}
-
-TEST(ElementsAreArrayTest, CanBeCreatedWithMatcherArray) {
-  const Matcher<std::string> kMatcherArray[] = {StrEq("one"), StrEq("two"),
-                                                StrEq("three")};
-
-  vector<std::string> test_vector;
-  test_vector.push_back("one");
-  test_vector.push_back("two");
-  test_vector.push_back("three");
-  EXPECT_THAT(test_vector, ElementsAreArray(kMatcherArray));
-
-  test_vector.push_back("three");
-  EXPECT_THAT(test_vector, Not(ElementsAreArray(kMatcherArray)));
-}
-
-TEST(ElementsAreArrayTest, CanBeCreatedWithVector) {
-  const int a[] = {1, 2, 3};
-  vector<int> test_vector(std::begin(a), std::end(a));
-  const vector<int> expected(std::begin(a), std::end(a));
-  EXPECT_THAT(test_vector, ElementsAreArray(expected));
-  test_vector.push_back(4);
-  EXPECT_THAT(test_vector, Not(ElementsAreArray(expected)));
-}
-
-TEST(ElementsAreArrayTest, TakesInitializerList) {
-  const int a[5] = {1, 2, 3, 4, 5};
-  EXPECT_THAT(a, ElementsAreArray({1, 2, 3, 4, 5}));
-  EXPECT_THAT(a, Not(ElementsAreArray({1, 2, 3, 5, 4})));
-  EXPECT_THAT(a, Not(ElementsAreArray({1, 2, 3, 4, 6})));
-}
-
-TEST(ElementsAreArrayTest, TakesInitializerListOfCStrings) {
-  const std::string a[5] = {"a", "b", "c", "d", "e"};
-  EXPECT_THAT(a, ElementsAreArray({"a", "b", "c", "d", "e"}));
-  EXPECT_THAT(a, Not(ElementsAreArray({"a", "b", "c", "e", "d"})));
-  EXPECT_THAT(a, Not(ElementsAreArray({"a", "b", "c", "d", "ef"})));
-}
-
-TEST(ElementsAreArrayTest, TakesInitializerListOfSameTypedMatchers) {
-  const int a[5] = {1, 2, 3, 4, 5};
-  EXPECT_THAT(a, ElementsAreArray({Eq(1), Eq(2), Eq(3), Eq(4), Eq(5)}));
-  EXPECT_THAT(a, Not(ElementsAreArray({Eq(1), Eq(2), Eq(3), Eq(4), Eq(6)})));
-}
-
-TEST(ElementsAreArrayTest, TakesInitializerListOfDifferentTypedMatchers) {
-  const int a[5] = {1, 2, 3, 4, 5};
-  // The compiler cannot infer the type of the initializer list if its
-  // elements have different types.  We must explicitly specify the
-  // unified element type in this case.
-  EXPECT_THAT(
-      a, ElementsAreArray<Matcher<int>>({Eq(1), Ne(-2), Ge(3), Le(4), Eq(5)}));
-  EXPECT_THAT(a, Not(ElementsAreArray<Matcher<int>>(
-                     {Eq(1), Ne(-2), Ge(3), Le(4), Eq(6)})));
-}
-
-TEST(ElementsAreArrayTest, CanBeCreatedWithMatcherVector) {
-  const int a[] = {1, 2, 3};
-  const Matcher<int> kMatchers[] = {Eq(1), Eq(2), Eq(3)};
-  vector<int> test_vector(std::begin(a), std::end(a));
-  const vector<Matcher<int>> expected(std::begin(kMatchers),
-                                      std::end(kMatchers));
-  EXPECT_THAT(test_vector, ElementsAreArray(expected));
-  test_vector.push_back(4);
-  EXPECT_THAT(test_vector, Not(ElementsAreArray(expected)));
-}
-
-TEST(ElementsAreArrayTest, CanBeCreatedWithIteratorRange) {
-  const int a[] = {1, 2, 3};
-  const vector<int> test_vector(std::begin(a), std::end(a));
-  const vector<int> expected(std::begin(a), std::end(a));
-  EXPECT_THAT(test_vector, ElementsAreArray(expected.begin(), expected.end()));
-  // Pointers are iterators, too.
-  EXPECT_THAT(test_vector, ElementsAreArray(std::begin(a), std::end(a)));
-  // The empty range of NULL pointers should also be okay.
-  int* const null_int = nullptr;
-  EXPECT_THAT(test_vector, Not(ElementsAreArray(null_int, null_int)));
-  EXPECT_THAT((vector<int>()), ElementsAreArray(null_int, null_int));
-}
-
-// Since ElementsAre() and ElementsAreArray() share much of the
-// implementation, we only do a test for native arrays here.
-TEST(ElementsAreArrayTest, WorksWithNativeArray) {
-  ::std::string a[] = {"hi", "ho"};
-  ::std::string b[] = {"hi", "ho"};
-
-  EXPECT_THAT(a, ElementsAreArray(b));
-  EXPECT_THAT(a, ElementsAreArray(b, 2));
-  EXPECT_THAT(a, Not(ElementsAreArray(b, 1)));
-}
-
-TEST(ElementsAreArrayTest, SourceLifeSpan) {
-  const int a[] = {1, 2, 3};
-  vector<int> test_vector(std::begin(a), std::end(a));
-  vector<int> expect(std::begin(a), std::end(a));
-  ElementsAreArrayMatcher<int> matcher_maker =
-      ElementsAreArray(expect.begin(), expect.end());
-  EXPECT_THAT(test_vector, matcher_maker);
-  // Changing in place the values that initialized matcher_maker should not
-  // affect matcher_maker anymore. It should have made its own copy of them.
-  for (int& i : expect) {
-    i += 10;
-  }
-  EXPECT_THAT(test_vector, matcher_maker);
-  test_vector.push_back(3);
-  EXPECT_THAT(test_vector, Not(matcher_maker));
-}
-
-// Tests Contains().
-
-INSTANTIATE_GTEST_MATCHER_TEST_P(ContainsTest);
-
-TEST(ContainsTest, ListMatchesWhenElementIsInContainer) {
-  list<int> some_list;
-  some_list.push_back(3);
-  some_list.push_back(1);
-  some_list.push_back(2);
-  some_list.push_back(3);
-  EXPECT_THAT(some_list, Contains(1));
-  EXPECT_THAT(some_list, Contains(Gt(2.5)));
-  EXPECT_THAT(some_list, Contains(Eq(2.0f)));
-
-  list<std::string> another_list;
-  another_list.push_back("fee");
-  another_list.push_back("fie");
-  another_list.push_back("foe");
-  another_list.push_back("fum");
-  EXPECT_THAT(another_list, Contains(std::string("fee")));
-}
-
-TEST(ContainsTest, ListDoesNotMatchWhenElementIsNotInContainer) {
-  list<int> some_list;
-  some_list.push_back(3);
-  some_list.push_back(1);
-  EXPECT_THAT(some_list, Not(Contains(4)));
-}
-
-TEST(ContainsTest, SetMatchesWhenElementIsInContainer) {
-  set<int> some_set;
-  some_set.insert(3);
-  some_set.insert(1);
-  some_set.insert(2);
-  EXPECT_THAT(some_set, Contains(Eq(1.0)));
-  EXPECT_THAT(some_set, Contains(Eq(3.0f)));
-  EXPECT_THAT(some_set, Contains(2));
-
-  set<std::string> another_set;
-  another_set.insert("fee");
-  another_set.insert("fie");
-  another_set.insert("foe");
-  another_set.insert("fum");
-  EXPECT_THAT(another_set, Contains(Eq(std::string("fum"))));
-}
-
-TEST(ContainsTest, SetDoesNotMatchWhenElementIsNotInContainer) {
-  set<int> some_set;
-  some_set.insert(3);
-  some_set.insert(1);
-  EXPECT_THAT(some_set, Not(Contains(4)));
-
-  set<std::string> c_string_set;
-  c_string_set.insert("hello");
-  EXPECT_THAT(c_string_set, Not(Contains(std::string("goodbye"))));
-}
-
-TEST_P(ContainsTestP, ExplainsMatchResultCorrectly) {
-  const int a[2] = {1, 2};
-  Matcher<const int(&)[2]> m = Contains(2);
-  EXPECT_EQ("whose element #1 matches", Explain(m, a));
-
-  m = Contains(3);
-  EXPECT_EQ("", Explain(m, a));
-
-  m = Contains(GreaterThan(0));
-  EXPECT_EQ("whose element #0 matches, which is 1 more than 0", Explain(m, a));
-
-  m = Contains(GreaterThan(10));
-  EXPECT_EQ("", Explain(m, a));
-}
-
-TEST(ContainsTest, DescribesItselfCorrectly) {
-  Matcher<vector<int>> m = Contains(1);
-  EXPECT_EQ("contains at least one element that is equal to 1", Describe(m));
-
-  Matcher<vector<int>> m2 = Not(m);
-  EXPECT_EQ("doesn't contain any element that is equal to 1", Describe(m2));
-}
-
-TEST(ContainsTest, MapMatchesWhenElementIsInContainer) {
-  map<std::string, int> my_map;
-  const char* bar = "a string";
-  my_map[bar] = 2;
-  EXPECT_THAT(my_map, Contains(pair<const char* const, int>(bar, 2)));
-
-  map<std::string, int> another_map;
-  another_map["fee"] = 1;
-  another_map["fie"] = 2;
-  another_map["foe"] = 3;
-  another_map["fum"] = 4;
-  EXPECT_THAT(another_map,
-              Contains(pair<const std::string, int>(std::string("fee"), 1)));
-  EXPECT_THAT(another_map, Contains(pair<const std::string, int>("fie", 2)));
-}
-
-TEST(ContainsTest, MapDoesNotMatchWhenElementIsNotInContainer) {
-  map<int, int> some_map;
-  some_map[1] = 11;
-  some_map[2] = 22;
-  EXPECT_THAT(some_map, Not(Contains(pair<const int, int>(2, 23))));
-}
-
-TEST(ContainsTest, ArrayMatchesWhenElementIsInContainer) {
-  const char* string_array[] = {"fee", "fie", "foe", "fum"};
-  EXPECT_THAT(string_array, Contains(Eq(std::string("fum"))));
-}
-
-TEST(ContainsTest, ArrayDoesNotMatchWhenElementIsNotInContainer) {
-  int int_array[] = {1, 2, 3, 4};
-  EXPECT_THAT(int_array, Not(Contains(5)));
-}
-
-TEST(ContainsTest, AcceptsMatcher) {
-  const int a[] = {1, 2, 3};
-  EXPECT_THAT(a, Contains(Gt(2)));
-  EXPECT_THAT(a, Not(Contains(Gt(4))));
-}
-
-TEST(ContainsTest, WorksForNativeArrayAsTuple) {
-  const int a[] = {1, 2};
-  const int* const pointer = a;
-  EXPECT_THAT(std::make_tuple(pointer, 2), Contains(1));
-  EXPECT_THAT(std::make_tuple(pointer, 2), Not(Contains(Gt(3))));
-}
-
-TEST(ContainsTest, WorksForTwoDimensionalNativeArray) {
-  int a[][3] = {{1, 2, 3}, {4, 5, 6}};
-  EXPECT_THAT(a, Contains(ElementsAre(4, 5, 6)));
-  EXPECT_THAT(a, Contains(Contains(5)));
-  EXPECT_THAT(a, Not(Contains(ElementsAre(3, 4, 5))));
-  EXPECT_THAT(a, Contains(Not(Contains(5))));
-}
-
-}  // namespace
-}  // namespace gmock_matchers_test
-}  // namespace testing
-
-#ifdef _MSC_VER
-#pragma warning(pop)
-#endif
diff --git a/3rdparty/googletest-1.13.0/googlemock/test/gmock-matchers-misc_test.cc b/3rdparty/googletest-1.13.0/googlemock/test/gmock-matchers-misc_test.cc
deleted file mode 100644
index 42a0e44d7f498be08bc9bf2b34a1fd2905c23eb8..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googlemock/test/gmock-matchers-misc_test.cc
+++ /dev/null
@@ -1,1819 +0,0 @@
-// Copyright 2007, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// Google Mock - a framework for writing C++ mock classes.
-//
-// This file tests some commonly used argument matchers.
-
-// Silence warning C4244: 'initializing': conversion from 'int' to 'short',
-// possible loss of data and C4100, unreferenced local parameter
-#ifdef _MSC_VER
-#pragma warning(push)
-#pragma warning(disable : 4244)
-#pragma warning(disable : 4100)
-#endif
-
-#include "test/gmock-matchers_test.h"
-
-namespace testing {
-namespace gmock_matchers_test {
-namespace {
-
-TEST(AddressTest, NonConst) {
-  int n = 1;
-  const Matcher<int> m = Address(Eq(&n));
-
-  EXPECT_TRUE(m.Matches(n));
-
-  int other = 5;
-
-  EXPECT_FALSE(m.Matches(other));
-
-  int& n_ref = n;
-
-  EXPECT_TRUE(m.Matches(n_ref));
-}
-
-TEST(AddressTest, Const) {
-  const int n = 1;
-  const Matcher<int> m = Address(Eq(&n));
-
-  EXPECT_TRUE(m.Matches(n));
-
-  int other = 5;
-
-  EXPECT_FALSE(m.Matches(other));
-}
-
-TEST(AddressTest, MatcherDoesntCopy) {
-  std::unique_ptr<int> n(new int(1));
-  const Matcher<std::unique_ptr<int>> m = Address(Eq(&n));
-
-  EXPECT_TRUE(m.Matches(n));
-}
-
-TEST(AddressTest, Describe) {
-  Matcher<int> matcher = Address(_);
-  EXPECT_EQ("has address that is anything", Describe(matcher));
-  EXPECT_EQ("does not have address that is anything",
-            DescribeNegation(matcher));
-}
-
-// The following two tests verify that values without a public copy
-// ctor can be used as arguments to matchers like Eq(), Ge(), and etc
-// with the help of ByRef().
-
-class NotCopyable {
- public:
-  explicit NotCopyable(int a_value) : value_(a_value) {}
-
-  int value() const { return value_; }
-
-  bool operator==(const NotCopyable& rhs) const {
-    return value() == rhs.value();
-  }
-
-  bool operator>=(const NotCopyable& rhs) const {
-    return value() >= rhs.value();
-  }
-
- private:
-  int value_;
-
-  NotCopyable(const NotCopyable&) = delete;
-  NotCopyable& operator=(const NotCopyable&) = delete;
-};
-
-TEST(ByRefTest, AllowsNotCopyableConstValueInMatchers) {
-  const NotCopyable const_value1(1);
-  const Matcher<const NotCopyable&> m = Eq(ByRef(const_value1));
-
-  const NotCopyable n1(1), n2(2);
-  EXPECT_TRUE(m.Matches(n1));
-  EXPECT_FALSE(m.Matches(n2));
-}
-
-TEST(ByRefTest, AllowsNotCopyableValueInMatchers) {
-  NotCopyable value2(2);
-  const Matcher<NotCopyable&> m = Ge(ByRef(value2));
-
-  NotCopyable n1(1), n2(2);
-  EXPECT_FALSE(m.Matches(n1));
-  EXPECT_TRUE(m.Matches(n2));
-}
-
-TEST(IsEmptyTest, ImplementsIsEmpty) {
-  vector<int> container;
-  EXPECT_THAT(container, IsEmpty());
-  container.push_back(0);
-  EXPECT_THAT(container, Not(IsEmpty()));
-  container.push_back(1);
-  EXPECT_THAT(container, Not(IsEmpty()));
-}
-
-TEST(IsEmptyTest, WorksWithString) {
-  std::string text;
-  EXPECT_THAT(text, IsEmpty());
-  text = "foo";
-  EXPECT_THAT(text, Not(IsEmpty()));
-  text = std::string("\0", 1);
-  EXPECT_THAT(text, Not(IsEmpty()));
-}
-
-TEST(IsEmptyTest, CanDescribeSelf) {
-  Matcher<vector<int>> m = IsEmpty();
-  EXPECT_EQ("is empty", Describe(m));
-  EXPECT_EQ("isn't empty", DescribeNegation(m));
-}
-
-TEST(IsEmptyTest, ExplainsResult) {
-  Matcher<vector<int>> m = IsEmpty();
-  vector<int> container;
-  EXPECT_EQ("", Explain(m, container));
-  container.push_back(0);
-  EXPECT_EQ("whose size is 1", Explain(m, container));
-}
-
-TEST(IsEmptyTest, WorksWithMoveOnly) {
-  ContainerHelper helper;
-  EXPECT_CALL(helper, Call(IsEmpty()));
-  helper.Call({});
-}
-
-TEST(IsTrueTest, IsTrueIsFalse) {
-  EXPECT_THAT(true, IsTrue());
-  EXPECT_THAT(false, IsFalse());
-  EXPECT_THAT(true, Not(IsFalse()));
-  EXPECT_THAT(false, Not(IsTrue()));
-  EXPECT_THAT(0, Not(IsTrue()));
-  EXPECT_THAT(0, IsFalse());
-  EXPECT_THAT(nullptr, Not(IsTrue()));
-  EXPECT_THAT(nullptr, IsFalse());
-  EXPECT_THAT(-1, IsTrue());
-  EXPECT_THAT(-1, Not(IsFalse()));
-  EXPECT_THAT(1, IsTrue());
-  EXPECT_THAT(1, Not(IsFalse()));
-  EXPECT_THAT(2, IsTrue());
-  EXPECT_THAT(2, Not(IsFalse()));
-  int a = 42;
-  EXPECT_THAT(a, IsTrue());
-  EXPECT_THAT(a, Not(IsFalse()));
-  EXPECT_THAT(&a, IsTrue());
-  EXPECT_THAT(&a, Not(IsFalse()));
-  EXPECT_THAT(false, Not(IsTrue()));
-  EXPECT_THAT(true, Not(IsFalse()));
-  EXPECT_THAT(std::true_type(), IsTrue());
-  EXPECT_THAT(std::true_type(), Not(IsFalse()));
-  EXPECT_THAT(std::false_type(), IsFalse());
-  EXPECT_THAT(std::false_type(), Not(IsTrue()));
-  EXPECT_THAT(nullptr, Not(IsTrue()));
-  EXPECT_THAT(nullptr, IsFalse());
-  std::unique_ptr<int> null_unique;
-  std::unique_ptr<int> nonnull_unique(new int(0));
-  EXPECT_THAT(null_unique, Not(IsTrue()));
-  EXPECT_THAT(null_unique, IsFalse());
-  EXPECT_THAT(nonnull_unique, IsTrue());
-  EXPECT_THAT(nonnull_unique, Not(IsFalse()));
-}
-
-#if GTEST_HAS_TYPED_TEST
-// Tests ContainerEq with different container types, and
-// different element types.
-
-template <typename T>
-class ContainerEqTest : public testing::Test {};
-
-typedef testing::Types<set<int>, vector<size_t>, multiset<size_t>, list<int>>
-    ContainerEqTestTypes;
-
-TYPED_TEST_SUITE(ContainerEqTest, ContainerEqTestTypes);
-
-// Tests that the filled container is equal to itself.
-TYPED_TEST(ContainerEqTest, EqualsSelf) {
-  static const int vals[] = {1, 1, 2, 3, 5, 8};
-  TypeParam my_set(vals, vals + 6);
-  const Matcher<TypeParam> m = ContainerEq(my_set);
-  EXPECT_TRUE(m.Matches(my_set));
-  EXPECT_EQ("", Explain(m, my_set));
-}
-
-// Tests that missing values are reported.
-TYPED_TEST(ContainerEqTest, ValueMissing) {
-  static const int vals[] = {1, 1, 2, 3, 5, 8};
-  static const int test_vals[] = {2, 1, 8, 5};
-  TypeParam my_set(vals, vals + 6);
-  TypeParam test_set(test_vals, test_vals + 4);
-  const Matcher<TypeParam> m = ContainerEq(my_set);
-  EXPECT_FALSE(m.Matches(test_set));
-  EXPECT_EQ("which doesn't have these expected elements: 3",
-            Explain(m, test_set));
-}
-
-// Tests that added values are reported.
-TYPED_TEST(ContainerEqTest, ValueAdded) {
-  static const int vals[] = {1, 1, 2, 3, 5, 8};
-  static const int test_vals[] = {1, 2, 3, 5, 8, 46};
-  TypeParam my_set(vals, vals + 6);
-  TypeParam test_set(test_vals, test_vals + 6);
-  const Matcher<const TypeParam&> m = ContainerEq(my_set);
-  EXPECT_FALSE(m.Matches(test_set));
-  EXPECT_EQ("which has these unexpected elements: 46", Explain(m, test_set));
-}
-
-// Tests that added and missing values are reported together.
-TYPED_TEST(ContainerEqTest, ValueAddedAndRemoved) {
-  static const int vals[] = {1, 1, 2, 3, 5, 8};
-  static const int test_vals[] = {1, 2, 3, 8, 46};
-  TypeParam my_set(vals, vals + 6);
-  TypeParam test_set(test_vals, test_vals + 5);
-  const Matcher<TypeParam> m = ContainerEq(my_set);
-  EXPECT_FALSE(m.Matches(test_set));
-  EXPECT_EQ(
-      "which has these unexpected elements: 46,\n"
-      "and doesn't have these expected elements: 5",
-      Explain(m, test_set));
-}
-
-// Tests duplicated value -- expect no explanation.
-TYPED_TEST(ContainerEqTest, DuplicateDifference) {
-  static const int vals[] = {1, 1, 2, 3, 5, 8};
-  static const int test_vals[] = {1, 2, 3, 5, 8};
-  TypeParam my_set(vals, vals + 6);
-  TypeParam test_set(test_vals, test_vals + 5);
-  const Matcher<const TypeParam&> m = ContainerEq(my_set);
-  // Depending on the container, match may be true or false
-  // But in any case there should be no explanation.
-  EXPECT_EQ("", Explain(m, test_set));
-}
-#endif  // GTEST_HAS_TYPED_TEST
-
-// Tests that multiple missing values are reported.
-// Using just vector here, so order is predictable.
-TEST(ContainerEqExtraTest, MultipleValuesMissing) {
-  static const int vals[] = {1, 1, 2, 3, 5, 8};
-  static const int test_vals[] = {2, 1, 5};
-  vector<int> my_set(vals, vals + 6);
-  vector<int> test_set(test_vals, test_vals + 3);
-  const Matcher<vector<int>> m = ContainerEq(my_set);
-  EXPECT_FALSE(m.Matches(test_set));
-  EXPECT_EQ("which doesn't have these expected elements: 3, 8",
-            Explain(m, test_set));
-}
-
-// Tests that added values are reported.
-// Using just vector here, so order is predictable.
-TEST(ContainerEqExtraTest, MultipleValuesAdded) {
-  static const int vals[] = {1, 1, 2, 3, 5, 8};
-  static const int test_vals[] = {1, 2, 92, 3, 5, 8, 46};
-  list<size_t> my_set(vals, vals + 6);
-  list<size_t> test_set(test_vals, test_vals + 7);
-  const Matcher<const list<size_t>&> m = ContainerEq(my_set);
-  EXPECT_FALSE(m.Matches(test_set));
-  EXPECT_EQ("which has these unexpected elements: 92, 46",
-            Explain(m, test_set));
-}
-
-// Tests that added and missing values are reported together.
-TEST(ContainerEqExtraTest, MultipleValuesAddedAndRemoved) {
-  static const int vals[] = {1, 1, 2, 3, 5, 8};
-  static const int test_vals[] = {1, 2, 3, 92, 46};
-  list<size_t> my_set(vals, vals + 6);
-  list<size_t> test_set(test_vals, test_vals + 5);
-  const Matcher<const list<size_t>> m = ContainerEq(my_set);
-  EXPECT_FALSE(m.Matches(test_set));
-  EXPECT_EQ(
-      "which has these unexpected elements: 92, 46,\n"
-      "and doesn't have these expected elements: 5, 8",
-      Explain(m, test_set));
-}
-
-// Tests to see that duplicate elements are detected,
-// but (as above) not reported in the explanation.
-TEST(ContainerEqExtraTest, MultiSetOfIntDuplicateDifference) {
-  static const int vals[] = {1, 1, 2, 3, 5, 8};
-  static const int test_vals[] = {1, 2, 3, 5, 8};
-  vector<int> my_set(vals, vals + 6);
-  vector<int> test_set(test_vals, test_vals + 5);
-  const Matcher<vector<int>> m = ContainerEq(my_set);
-  EXPECT_TRUE(m.Matches(my_set));
-  EXPECT_FALSE(m.Matches(test_set));
-  // There is nothing to report when both sets contain all the same values.
-  EXPECT_EQ("", Explain(m, test_set));
-}
-
-// Tests that ContainerEq works for non-trivial associative containers,
-// like maps.
-TEST(ContainerEqExtraTest, WorksForMaps) {
-  map<int, std::string> my_map;
-  my_map[0] = "a";
-  my_map[1] = "b";
-
-  map<int, std::string> test_map;
-  test_map[0] = "aa";
-  test_map[1] = "b";
-
-  const Matcher<const map<int, std::string>&> m = ContainerEq(my_map);
-  EXPECT_TRUE(m.Matches(my_map));
-  EXPECT_FALSE(m.Matches(test_map));
-
-  EXPECT_EQ(
-      "which has these unexpected elements: (0, \"aa\"),\n"
-      "and doesn't have these expected elements: (0, \"a\")",
-      Explain(m, test_map));
-}
-
-TEST(ContainerEqExtraTest, WorksForNativeArray) {
-  int a1[] = {1, 2, 3};
-  int a2[] = {1, 2, 3};
-  int b[] = {1, 2, 4};
-
-  EXPECT_THAT(a1, ContainerEq(a2));
-  EXPECT_THAT(a1, Not(ContainerEq(b)));
-}
-
-TEST(ContainerEqExtraTest, WorksForTwoDimensionalNativeArray) {
-  const char a1[][3] = {"hi", "lo"};
-  const char a2[][3] = {"hi", "lo"};
-  const char b[][3] = {"lo", "hi"};
-
-  // Tests using ContainerEq() in the first dimension.
-  EXPECT_THAT(a1, ContainerEq(a2));
-  EXPECT_THAT(a1, Not(ContainerEq(b)));
-
-  // Tests using ContainerEq() in the second dimension.
-  EXPECT_THAT(a1, ElementsAre(ContainerEq(a2[0]), ContainerEq(a2[1])));
-  EXPECT_THAT(a1, ElementsAre(Not(ContainerEq(b[0])), ContainerEq(a2[1])));
-}
-
-TEST(ContainerEqExtraTest, WorksForNativeArrayAsTuple) {
-  const int a1[] = {1, 2, 3};
-  const int a2[] = {1, 2, 3};
-  const int b[] = {1, 2, 3, 4};
-
-  const int* const p1 = a1;
-  EXPECT_THAT(std::make_tuple(p1, 3), ContainerEq(a2));
-  EXPECT_THAT(std::make_tuple(p1, 3), Not(ContainerEq(b)));
-
-  const int c[] = {1, 3, 2};
-  EXPECT_THAT(std::make_tuple(p1, 3), Not(ContainerEq(c)));
-}
-
-TEST(ContainerEqExtraTest, CopiesNativeArrayParameter) {
-  std::string a1[][3] = {{"hi", "hello", "ciao"}, {"bye", "see you", "ciao"}};
-
-  std::string a2[][3] = {{"hi", "hello", "ciao"}, {"bye", "see you", "ciao"}};
-
-  const Matcher<const std::string(&)[2][3]> m = ContainerEq(a2);
-  EXPECT_THAT(a1, m);
-
-  a2[0][0] = "ha";
-  EXPECT_THAT(a1, m);
-}
-
-namespace {
-
-// Used as a check on the more complex max flow method used in the
-// real testing::internal::FindMaxBipartiteMatching. This method is
-// compatible but runs in worst-case factorial time, so we only
-// use it in testing for small problem sizes.
-template <typename Graph>
-class BacktrackingMaxBPMState {
- public:
-  // Does not take ownership of 'g'.
-  explicit BacktrackingMaxBPMState(const Graph* g) : graph_(g) {}
-
-  ElementMatcherPairs Compute() {
-    if (graph_->LhsSize() == 0 || graph_->RhsSize() == 0) {
-      return best_so_far_;
-    }
-    lhs_used_.assign(graph_->LhsSize(), kUnused);
-    rhs_used_.assign(graph_->RhsSize(), kUnused);
-    for (size_t irhs = 0; irhs < graph_->RhsSize(); ++irhs) {
-      matches_.clear();
-      RecurseInto(irhs);
-      if (best_so_far_.size() == graph_->RhsSize()) break;
-    }
-    return best_so_far_;
-  }
-
- private:
-  static const size_t kUnused = static_cast<size_t>(-1);
-
-  void PushMatch(size_t lhs, size_t rhs) {
-    matches_.push_back(ElementMatcherPair(lhs, rhs));
-    lhs_used_[lhs] = rhs;
-    rhs_used_[rhs] = lhs;
-    if (matches_.size() > best_so_far_.size()) {
-      best_so_far_ = matches_;
-    }
-  }
-
-  void PopMatch() {
-    const ElementMatcherPair& back = matches_.back();
-    lhs_used_[back.first] = kUnused;
-    rhs_used_[back.second] = kUnused;
-    matches_.pop_back();
-  }
-
-  bool RecurseInto(size_t irhs) {
-    if (rhs_used_[irhs] != kUnused) {
-      return true;
-    }
-    for (size_t ilhs = 0; ilhs < graph_->LhsSize(); ++ilhs) {
-      if (lhs_used_[ilhs] != kUnused) {
-        continue;
-      }
-      if (!graph_->HasEdge(ilhs, irhs)) {
-        continue;
-      }
-      PushMatch(ilhs, irhs);
-      if (best_so_far_.size() == graph_->RhsSize()) {
-        return false;
-      }
-      for (size_t mi = irhs + 1; mi < graph_->RhsSize(); ++mi) {
-        if (!RecurseInto(mi)) return false;
-      }
-      PopMatch();
-    }
-    return true;
-  }
-
-  const Graph* graph_;  // not owned
-  std::vector<size_t> lhs_used_;
-  std::vector<size_t> rhs_used_;
-  ElementMatcherPairs matches_;
-  ElementMatcherPairs best_so_far_;
-};
-
-template <typename Graph>
-const size_t BacktrackingMaxBPMState<Graph>::kUnused;
-
-}  // namespace
-
-// Implement a simple backtracking algorithm to determine if it is possible
-// to find one element per matcher, without reusing elements.
-template <typename Graph>
-ElementMatcherPairs FindBacktrackingMaxBPM(const Graph& g) {
-  return BacktrackingMaxBPMState<Graph>(&g).Compute();
-}
-
-class BacktrackingBPMTest : public ::testing::Test {};
-
-// Tests the MaxBipartiteMatching algorithm with square matrices.
-// The single int param is the # of nodes on each of the left and right sides.
-class BipartiteTest : public ::testing::TestWithParam<size_t> {};
-
-// Verify all match graphs up to some moderate number of edges.
-TEST_P(BipartiteTest, Exhaustive) {
-  size_t nodes = GetParam();
-  MatchMatrix graph(nodes, nodes);
-  do {
-    ElementMatcherPairs matches = internal::FindMaxBipartiteMatching(graph);
-    EXPECT_EQ(FindBacktrackingMaxBPM(graph).size(), matches.size())
-        << "graph: " << graph.DebugString();
-    // Check that all elements of matches are in the graph.
-    // Check that elements of first and second are unique.
-    std::vector<bool> seen_element(graph.LhsSize());
-    std::vector<bool> seen_matcher(graph.RhsSize());
-    SCOPED_TRACE(PrintToString(matches));
-    for (size_t i = 0; i < matches.size(); ++i) {
-      size_t ilhs = matches[i].first;
-      size_t irhs = matches[i].second;
-      EXPECT_TRUE(graph.HasEdge(ilhs, irhs));
-      EXPECT_FALSE(seen_element[ilhs]);
-      EXPECT_FALSE(seen_matcher[irhs]);
-      seen_element[ilhs] = true;
-      seen_matcher[irhs] = true;
-    }
-  } while (graph.NextGraph());
-}
-
-INSTANTIATE_TEST_SUITE_P(AllGraphs, BipartiteTest,
-                         ::testing::Range(size_t{0}, size_t{5}));
-
-// Parameterized by a pair interpreted as (LhsSize, RhsSize).
-class BipartiteNonSquareTest
-    : public ::testing::TestWithParam<std::pair<size_t, size_t>> {};
-
-TEST_F(BipartiteNonSquareTest, SimpleBacktracking) {
-  //   .......
-  // 0:-----\ :
-  // 1:---\ | :
-  // 2:---\ | :
-  // 3:-\ | | :
-  //  :.......:
-  //    0 1 2
-  MatchMatrix g(4, 3);
-  constexpr std::array<std::array<size_t, 2>, 4> kEdges = {
-      {{{0, 2}}, {{1, 1}}, {{2, 1}}, {{3, 0}}}};
-  for (size_t i = 0; i < kEdges.size(); ++i) {
-    g.SetEdge(kEdges[i][0], kEdges[i][1], true);
-  }
-  EXPECT_THAT(FindBacktrackingMaxBPM(g),
-              ElementsAre(Pair(3, 0), Pair(AnyOf(1, 2), 1), Pair(0, 2)))
-      << g.DebugString();
-}
-
-// Verify a few nonsquare matrices.
-TEST_P(BipartiteNonSquareTest, Exhaustive) {
-  size_t nlhs = GetParam().first;
-  size_t nrhs = GetParam().second;
-  MatchMatrix graph(nlhs, nrhs);
-  do {
-    EXPECT_EQ(FindBacktrackingMaxBPM(graph).size(),
-              internal::FindMaxBipartiteMatching(graph).size())
-        << "graph: " << graph.DebugString()
-        << "\nbacktracking: " << PrintToString(FindBacktrackingMaxBPM(graph))
-        << "\nmax flow: "
-        << PrintToString(internal::FindMaxBipartiteMatching(graph));
-  } while (graph.NextGraph());
-}
-
-INSTANTIATE_TEST_SUITE_P(
-    AllGraphs, BipartiteNonSquareTest,
-    testing::Values(std::make_pair(1, 2), std::make_pair(2, 1),
-                    std::make_pair(3, 2), std::make_pair(2, 3),
-                    std::make_pair(4, 1), std::make_pair(1, 4),
-                    std::make_pair(4, 3), std::make_pair(3, 4)));
-
-class BipartiteRandomTest
-    : public ::testing::TestWithParam<std::pair<int, int>> {};
-
-// Verifies a large sample of larger graphs.
-TEST_P(BipartiteRandomTest, LargerNets) {
-  int nodes = GetParam().first;
-  int iters = GetParam().second;
-  MatchMatrix graph(static_cast<size_t>(nodes), static_cast<size_t>(nodes));
-
-  auto seed = static_cast<uint32_t>(GTEST_FLAG_GET(random_seed));
-  if (seed == 0) {
-    seed = static_cast<uint32_t>(time(nullptr));
-  }
-
-  for (; iters > 0; --iters, ++seed) {
-    srand(static_cast<unsigned int>(seed));
-    graph.Randomize();
-    EXPECT_EQ(FindBacktrackingMaxBPM(graph).size(),
-              internal::FindMaxBipartiteMatching(graph).size())
-        << " graph: " << graph.DebugString()
-        << "\nTo reproduce the failure, rerun the test with the flag"
-           " --"
-        << GTEST_FLAG_PREFIX_ << "random_seed=" << seed;
-  }
-}
-
-// Test argument is a std::pair<int, int> representing (nodes, iters).
-INSTANTIATE_TEST_SUITE_P(Samples, BipartiteRandomTest,
-                         testing::Values(std::make_pair(5, 10000),
-                                         std::make_pair(6, 5000),
-                                         std::make_pair(7, 2000),
-                                         std::make_pair(8, 500),
-                                         std::make_pair(9, 100)));
-
-// Tests IsReadableTypeName().
-
-TEST(IsReadableTypeNameTest, ReturnsTrueForShortNames) {
-  EXPECT_TRUE(IsReadableTypeName("int"));
-  EXPECT_TRUE(IsReadableTypeName("const unsigned char*"));
-  EXPECT_TRUE(IsReadableTypeName("MyMap<int, void*>"));
-  EXPECT_TRUE(IsReadableTypeName("void (*)(int, bool)"));
-}
-
-TEST(IsReadableTypeNameTest, ReturnsTrueForLongNonTemplateNonFunctionNames) {
-  EXPECT_TRUE(IsReadableTypeName("my_long_namespace::MyClassName"));
-  EXPECT_TRUE(IsReadableTypeName("int [5][6][7][8][9][10][11]"));
-  EXPECT_TRUE(IsReadableTypeName("my_namespace::MyOuterClass::MyInnerClass"));
-}
-
-TEST(IsReadableTypeNameTest, ReturnsFalseForLongTemplateNames) {
-  EXPECT_FALSE(
-      IsReadableTypeName("basic_string<char, std::char_traits<char> >"));
-  EXPECT_FALSE(IsReadableTypeName("std::vector<int, std::alloc_traits<int> >"));
-}
-
-TEST(IsReadableTypeNameTest, ReturnsFalseForLongFunctionTypeNames) {
-  EXPECT_FALSE(IsReadableTypeName("void (&)(int, bool, char, float)"));
-}
-
-// Tests FormatMatcherDescription().
-
-TEST(FormatMatcherDescriptionTest, WorksForEmptyDescription) {
-  EXPECT_EQ("is even",
-            FormatMatcherDescription(false, "IsEven", {}, Strings()));
-  EXPECT_EQ("not (is even)",
-            FormatMatcherDescription(true, "IsEven", {}, Strings()));
-
-  EXPECT_EQ("equals (a: 5)",
-            FormatMatcherDescription(false, "Equals", {"a"}, {"5"}));
-
-  EXPECT_EQ(
-      "is in range (a: 5, b: 8)",
-      FormatMatcherDescription(false, "IsInRange", {"a", "b"}, {"5", "8"}));
-}
-
-INSTANTIATE_GTEST_MATCHER_TEST_P(MatcherTupleTest);
-
-TEST_P(MatcherTupleTestP, ExplainsMatchFailure) {
-  stringstream ss1;
-  ExplainMatchFailureTupleTo(
-      std::make_tuple(Matcher<char>(Eq('a')), GreaterThan(5)),
-      std::make_tuple('a', 10), &ss1);
-  EXPECT_EQ("", ss1.str());  // Successful match.
-
-  stringstream ss2;
-  ExplainMatchFailureTupleTo(
-      std::make_tuple(GreaterThan(5), Matcher<char>(Eq('a'))),
-      std::make_tuple(2, 'b'), &ss2);
-  EXPECT_EQ(
-      "  Expected arg #0: is > 5\n"
-      "           Actual: 2, which is 3 less than 5\n"
-      "  Expected arg #1: is equal to 'a' (97, 0x61)\n"
-      "           Actual: 'b' (98, 0x62)\n",
-      ss2.str());  // Failed match where both arguments need explanation.
-
-  stringstream ss3;
-  ExplainMatchFailureTupleTo(
-      std::make_tuple(GreaterThan(5), Matcher<char>(Eq('a'))),
-      std::make_tuple(2, 'a'), &ss3);
-  EXPECT_EQ(
-      "  Expected arg #0: is > 5\n"
-      "           Actual: 2, which is 3 less than 5\n",
-      ss3.str());  // Failed match where only one argument needs
-                   // explanation.
-}
-
-// Sample optional type implementation with minimal requirements for use with
-// Optional matcher.
-template <typename T>
-class SampleOptional {
- public:
-  using value_type = T;
-  explicit SampleOptional(T value)
-      : value_(std::move(value)), has_value_(true) {}
-  SampleOptional() : value_(), has_value_(false) {}
-  operator bool() const { return has_value_; }
-  const T& operator*() const { return value_; }
-
- private:
-  T value_;
-  bool has_value_;
-};
-
-TEST(OptionalTest, DescribesSelf) {
-  const Matcher<SampleOptional<int>> m = Optional(Eq(1));
-  EXPECT_EQ("value is equal to 1", Describe(m));
-}
-
-TEST(OptionalTest, ExplainsSelf) {
-  const Matcher<SampleOptional<int>> m = Optional(Eq(1));
-  EXPECT_EQ("whose value 1 matches", Explain(m, SampleOptional<int>(1)));
-  EXPECT_EQ("whose value 2 doesn't match", Explain(m, SampleOptional<int>(2)));
-}
-
-TEST(OptionalTest, MatchesNonEmptyOptional) {
-  const Matcher<SampleOptional<int>> m1 = Optional(1);
-  const Matcher<SampleOptional<int>> m2 = Optional(Eq(2));
-  const Matcher<SampleOptional<int>> m3 = Optional(Lt(3));
-  SampleOptional<int> opt(1);
-  EXPECT_TRUE(m1.Matches(opt));
-  EXPECT_FALSE(m2.Matches(opt));
-  EXPECT_TRUE(m3.Matches(opt));
-}
-
-TEST(OptionalTest, DoesNotMatchNullopt) {
-  const Matcher<SampleOptional<int>> m = Optional(1);
-  SampleOptional<int> empty;
-  EXPECT_FALSE(m.Matches(empty));
-}
-
-TEST(OptionalTest, WorksWithMoveOnly) {
-  Matcher<SampleOptional<std::unique_ptr<int>>> m = Optional(Eq(nullptr));
-  EXPECT_TRUE(m.Matches(SampleOptional<std::unique_ptr<int>>(nullptr)));
-}
-
-class SampleVariantIntString {
- public:
-  SampleVariantIntString(int i) : i_(i), has_int_(true) {}
-  SampleVariantIntString(const std::string& s) : s_(s), has_int_(false) {}
-
-  template <typename T>
-  friend bool holds_alternative(const SampleVariantIntString& value) {
-    return value.has_int_ == std::is_same<T, int>::value;
-  }
-
-  template <typename T>
-  friend const T& get(const SampleVariantIntString& value) {
-    return value.get_impl(static_cast<T*>(nullptr));
-  }
-
- private:
-  const int& get_impl(int*) const { return i_; }
-  const std::string& get_impl(std::string*) const { return s_; }
-
-  int i_;
-  std::string s_;
-  bool has_int_;
-};
-
-TEST(VariantTest, DescribesSelf) {
-  const Matcher<SampleVariantIntString> m = VariantWith<int>(Eq(1));
-  EXPECT_THAT(Describe(m), ContainsRegex("is a variant<> with value of type "
-                                         "'.*' and the value is equal to 1"));
-}
-
-TEST(VariantTest, ExplainsSelf) {
-  const Matcher<SampleVariantIntString> m = VariantWith<int>(Eq(1));
-  EXPECT_THAT(Explain(m, SampleVariantIntString(1)),
-              ContainsRegex("whose value 1"));
-  EXPECT_THAT(Explain(m, SampleVariantIntString("A")),
-              HasSubstr("whose value is not of type '"));
-  EXPECT_THAT(Explain(m, SampleVariantIntString(2)),
-              "whose value 2 doesn't match");
-}
-
-TEST(VariantTest, FullMatch) {
-  Matcher<SampleVariantIntString> m = VariantWith<int>(Eq(1));
-  EXPECT_TRUE(m.Matches(SampleVariantIntString(1)));
-
-  m = VariantWith<std::string>(Eq("1"));
-  EXPECT_TRUE(m.Matches(SampleVariantIntString("1")));
-}
-
-TEST(VariantTest, TypeDoesNotMatch) {
-  Matcher<SampleVariantIntString> m = VariantWith<int>(Eq(1));
-  EXPECT_FALSE(m.Matches(SampleVariantIntString("1")));
-
-  m = VariantWith<std::string>(Eq("1"));
-  EXPECT_FALSE(m.Matches(SampleVariantIntString(1)));
-}
-
-TEST(VariantTest, InnerDoesNotMatch) {
-  Matcher<SampleVariantIntString> m = VariantWith<int>(Eq(1));
-  EXPECT_FALSE(m.Matches(SampleVariantIntString(2)));
-
-  m = VariantWith<std::string>(Eq("1"));
-  EXPECT_FALSE(m.Matches(SampleVariantIntString("2")));
-}
-
-class SampleAnyType {
- public:
-  explicit SampleAnyType(int i) : index_(0), i_(i) {}
-  explicit SampleAnyType(const std::string& s) : index_(1), s_(s) {}
-
-  template <typename T>
-  friend const T* any_cast(const SampleAnyType* any) {
-    return any->get_impl(static_cast<T*>(nullptr));
-  }
-
- private:
-  int index_;
-  int i_;
-  std::string s_;
-
-  const int* get_impl(int*) const { return index_ == 0 ? &i_ : nullptr; }
-  const std::string* get_impl(std::string*) const {
-    return index_ == 1 ? &s_ : nullptr;
-  }
-};
-
-TEST(AnyWithTest, FullMatch) {
-  Matcher<SampleAnyType> m = AnyWith<int>(Eq(1));
-  EXPECT_TRUE(m.Matches(SampleAnyType(1)));
-}
-
-TEST(AnyWithTest, TestBadCastType) {
-  Matcher<SampleAnyType> m = AnyWith<std::string>(Eq("fail"));
-  EXPECT_FALSE(m.Matches(SampleAnyType(1)));
-}
-
-TEST(AnyWithTest, TestUseInContainers) {
-  std::vector<SampleAnyType> a;
-  a.emplace_back(1);
-  a.emplace_back(2);
-  a.emplace_back(3);
-  EXPECT_THAT(
-      a, ElementsAreArray({AnyWith<int>(1), AnyWith<int>(2), AnyWith<int>(3)}));
-
-  std::vector<SampleAnyType> b;
-  b.emplace_back("hello");
-  b.emplace_back("merhaba");
-  b.emplace_back("salut");
-  EXPECT_THAT(b, ElementsAreArray({AnyWith<std::string>("hello"),
-                                   AnyWith<std::string>("merhaba"),
-                                   AnyWith<std::string>("salut")}));
-}
-TEST(AnyWithTest, TestCompare) {
-  EXPECT_THAT(SampleAnyType(1), AnyWith<int>(Gt(0)));
-}
-
-TEST(AnyWithTest, DescribesSelf) {
-  const Matcher<const SampleAnyType&> m = AnyWith<int>(Eq(1));
-  EXPECT_THAT(Describe(m), ContainsRegex("is an 'any' type with value of type "
-                                         "'.*' and the value is equal to 1"));
-}
-
-TEST(AnyWithTest, ExplainsSelf) {
-  const Matcher<const SampleAnyType&> m = AnyWith<int>(Eq(1));
-
-  EXPECT_THAT(Explain(m, SampleAnyType(1)), ContainsRegex("whose value 1"));
-  EXPECT_THAT(Explain(m, SampleAnyType("A")),
-              HasSubstr("whose value is not of type '"));
-  EXPECT_THAT(Explain(m, SampleAnyType(2)), "whose value 2 doesn't match");
-}
-
-// Tests Args<k0, ..., kn>(m).
-
-TEST(ArgsTest, AcceptsZeroTemplateArg) {
-  const std::tuple<int, bool> t(5, true);
-  EXPECT_THAT(t, Args<>(Eq(std::tuple<>())));
-  EXPECT_THAT(t, Not(Args<>(Ne(std::tuple<>()))));
-}
-
-TEST(ArgsTest, AcceptsOneTemplateArg) {
-  const std::tuple<int, bool> t(5, true);
-  EXPECT_THAT(t, Args<0>(Eq(std::make_tuple(5))));
-  EXPECT_THAT(t, Args<1>(Eq(std::make_tuple(true))));
-  EXPECT_THAT(t, Not(Args<1>(Eq(std::make_tuple(false)))));
-}
-
-TEST(ArgsTest, AcceptsTwoTemplateArgs) {
-  const std::tuple<short, int, long> t(4, 5, 6L);  // NOLINT
-
-  EXPECT_THAT(t, (Args<0, 1>(Lt())));
-  EXPECT_THAT(t, (Args<1, 2>(Lt())));
-  EXPECT_THAT(t, Not(Args<0, 2>(Gt())));
-}
-
-TEST(ArgsTest, AcceptsRepeatedTemplateArgs) {
-  const std::tuple<short, int, long> t(4, 5, 6L);  // NOLINT
-  EXPECT_THAT(t, (Args<0, 0>(Eq())));
-  EXPECT_THAT(t, Not(Args<1, 1>(Ne())));
-}
-
-TEST(ArgsTest, AcceptsDecreasingTemplateArgs) {
-  const std::tuple<short, int, long> t(4, 5, 6L);  // NOLINT
-  EXPECT_THAT(t, (Args<2, 0>(Gt())));
-  EXPECT_THAT(t, Not(Args<2, 1>(Lt())));
-}
-
-MATCHER(SumIsZero, "") {
-  return std::get<0>(arg) + std::get<1>(arg) + std::get<2>(arg) == 0;
-}
-
-TEST(ArgsTest, AcceptsMoreTemplateArgsThanArityOfOriginalTuple) {
-  EXPECT_THAT(std::make_tuple(-1, 2), (Args<0, 0, 1>(SumIsZero())));
-  EXPECT_THAT(std::make_tuple(1, 2), Not(Args<0, 0, 1>(SumIsZero())));
-}
-
-TEST(ArgsTest, CanBeNested) {
-  const std::tuple<short, int, long, int> t(4, 5, 6L, 6);  // NOLINT
-  EXPECT_THAT(t, (Args<1, 2, 3>(Args<1, 2>(Eq()))));
-  EXPECT_THAT(t, (Args<0, 1, 3>(Args<0, 2>(Lt()))));
-}
-
-TEST(ArgsTest, CanMatchTupleByValue) {
-  typedef std::tuple<char, int, int> Tuple3;
-  const Matcher<Tuple3> m = Args<1, 2>(Lt());
-  EXPECT_TRUE(m.Matches(Tuple3('a', 1, 2)));
-  EXPECT_FALSE(m.Matches(Tuple3('b', 2, 2)));
-}
-
-TEST(ArgsTest, CanMatchTupleByReference) {
-  typedef std::tuple<char, char, int> Tuple3;
-  const Matcher<const Tuple3&> m = Args<0, 1>(Lt());
-  EXPECT_TRUE(m.Matches(Tuple3('a', 'b', 2)));
-  EXPECT_FALSE(m.Matches(Tuple3('b', 'b', 2)));
-}
-
-// Validates that arg is printed as str.
-MATCHER_P(PrintsAs, str, "") { return testing::PrintToString(arg) == str; }
-
-TEST(ArgsTest, AcceptsTenTemplateArgs) {
-  EXPECT_THAT(std::make_tuple(0, 1L, 2, 3L, 4, 5, 6, 7, 8, 9),
-              (Args<9, 8, 7, 6, 5, 4, 3, 2, 1, 0>(
-                  PrintsAs("(9, 8, 7, 6, 5, 4, 3, 2, 1, 0)"))));
-  EXPECT_THAT(std::make_tuple(0, 1L, 2, 3L, 4, 5, 6, 7, 8, 9),
-              Not(Args<9, 8, 7, 6, 5, 4, 3, 2, 1, 0>(
-                  PrintsAs("(0, 8, 7, 6, 5, 4, 3, 2, 1, 0)"))));
-}
-
-TEST(ArgsTest, DescirbesSelfCorrectly) {
-  const Matcher<std::tuple<int, bool, char>> m = Args<2, 0>(Lt());
-  EXPECT_EQ(
-      "are a tuple whose fields (#2, #0) are a pair where "
-      "the first < the second",
-      Describe(m));
-}
-
-TEST(ArgsTest, DescirbesNestedArgsCorrectly) {
-  const Matcher<const std::tuple<int, bool, char, int>&> m =
-      Args<0, 2, 3>(Args<2, 0>(Lt()));
-  EXPECT_EQ(
-      "are a tuple whose fields (#0, #2, #3) are a tuple "
-      "whose fields (#2, #0) are a pair where the first < the second",
-      Describe(m));
-}
-
-TEST(ArgsTest, DescribesNegationCorrectly) {
-  const Matcher<std::tuple<int, char>> m = Args<1, 0>(Gt());
-  EXPECT_EQ(
-      "are a tuple whose fields (#1, #0) aren't a pair "
-      "where the first > the second",
-      DescribeNegation(m));
-}
-
-TEST(ArgsTest, ExplainsMatchResultWithoutInnerExplanation) {
-  const Matcher<std::tuple<bool, int, int>> m = Args<1, 2>(Eq());
-  EXPECT_EQ("whose fields (#1, #2) are (42, 42)",
-            Explain(m, std::make_tuple(false, 42, 42)));
-  EXPECT_EQ("whose fields (#1, #2) are (42, 43)",
-            Explain(m, std::make_tuple(false, 42, 43)));
-}
-
-// For testing Args<>'s explanation.
-class LessThanMatcher : public MatcherInterface<std::tuple<char, int>> {
- public:
-  void DescribeTo(::std::ostream* /*os*/) const override {}
-
-  bool MatchAndExplain(std::tuple<char, int> value,
-                       MatchResultListener* listener) const override {
-    const int diff = std::get<0>(value) - std::get<1>(value);
-    if (diff > 0) {
-      *listener << "where the first value is " << diff
-                << " more than the second";
-    }
-    return diff < 0;
-  }
-};
-
-Matcher<std::tuple<char, int>> LessThan() {
-  return MakeMatcher(new LessThanMatcher);
-}
-
-TEST(ArgsTest, ExplainsMatchResultWithInnerExplanation) {
-  const Matcher<std::tuple<char, int, int>> m = Args<0, 2>(LessThan());
-  EXPECT_EQ(
-      "whose fields (#0, #2) are ('a' (97, 0x61), 42), "
-      "where the first value is 55 more than the second",
-      Explain(m, std::make_tuple('a', 42, 42)));
-  EXPECT_EQ("whose fields (#0, #2) are ('\\0', 43)",
-            Explain(m, std::make_tuple('\0', 42, 43)));
-}
-
-// Tests for the MATCHER*() macro family.
-
-// Tests that a simple MATCHER() definition works.
-
-MATCHER(IsEven, "") { return (arg % 2) == 0; }
-
-TEST(MatcherMacroTest, Works) {
-  const Matcher<int> m = IsEven();
-  EXPECT_TRUE(m.Matches(6));
-  EXPECT_FALSE(m.Matches(7));
-
-  EXPECT_EQ("is even", Describe(m));
-  EXPECT_EQ("not (is even)", DescribeNegation(m));
-  EXPECT_EQ("", Explain(m, 6));
-  EXPECT_EQ("", Explain(m, 7));
-}
-
-// This also tests that the description string can reference 'negation'.
-MATCHER(IsEven2, negation ? "is odd" : "is even") {
-  if ((arg % 2) == 0) {
-    // Verifies that we can stream to result_listener, a listener
-    // supplied by the MATCHER macro implicitly.
-    *result_listener << "OK";
-    return true;
-  } else {
-    *result_listener << "% 2 == " << (arg % 2);
-    return false;
-  }
-}
-
-// This also tests that the description string can reference matcher
-// parameters.
-MATCHER_P2(EqSumOf, x, y,
-           std::string(negation ? "doesn't equal" : "equals") + " the sum of " +
-               PrintToString(x) + " and " + PrintToString(y)) {
-  if (arg == (x + y)) {
-    *result_listener << "OK";
-    return true;
-  } else {
-    // Verifies that we can stream to the underlying stream of
-    // result_listener.
-    if (result_listener->stream() != nullptr) {
-      *result_listener->stream() << "diff == " << (x + y - arg);
-    }
-    return false;
-  }
-}
-
-// Tests that the matcher description can reference 'negation' and the
-// matcher parameters.
-TEST(MatcherMacroTest, DescriptionCanReferenceNegationAndParameters) {
-  const Matcher<int> m1 = IsEven2();
-  EXPECT_EQ("is even", Describe(m1));
-  EXPECT_EQ("is odd", DescribeNegation(m1));
-
-  const Matcher<int> m2 = EqSumOf(5, 9);
-  EXPECT_EQ("equals the sum of 5 and 9", Describe(m2));
-  EXPECT_EQ("doesn't equal the sum of 5 and 9", DescribeNegation(m2));
-}
-
-// Tests explaining match result in a MATCHER* macro.
-TEST(MatcherMacroTest, CanExplainMatchResult) {
-  const Matcher<int> m1 = IsEven2();
-  EXPECT_EQ("OK", Explain(m1, 4));
-  EXPECT_EQ("% 2 == 1", Explain(m1, 5));
-
-  const Matcher<int> m2 = EqSumOf(1, 2);
-  EXPECT_EQ("OK", Explain(m2, 3));
-  EXPECT_EQ("diff == -1", Explain(m2, 4));
-}
-
-// Tests that the body of MATCHER() can reference the type of the
-// value being matched.
-
-MATCHER(IsEmptyString, "") {
-  StaticAssertTypeEq<::std::string, arg_type>();
-  return arg.empty();
-}
-
-MATCHER(IsEmptyStringByRef, "") {
-  StaticAssertTypeEq<const ::std::string&, arg_type>();
-  return arg.empty();
-}
-
-TEST(MatcherMacroTest, CanReferenceArgType) {
-  const Matcher<::std::string> m1 = IsEmptyString();
-  EXPECT_TRUE(m1.Matches(""));
-
-  const Matcher<const ::std::string&> m2 = IsEmptyStringByRef();
-  EXPECT_TRUE(m2.Matches(""));
-}
-
-// Tests that MATCHER() can be used in a namespace.
-
-namespace matcher_test {
-MATCHER(IsOdd, "") { return (arg % 2) != 0; }
-}  // namespace matcher_test
-
-TEST(MatcherMacroTest, WorksInNamespace) {
-  Matcher<int> m = matcher_test::IsOdd();
-  EXPECT_FALSE(m.Matches(4));
-  EXPECT_TRUE(m.Matches(5));
-}
-
-// Tests that Value() can be used to compose matchers.
-MATCHER(IsPositiveOdd, "") {
-  return Value(arg, matcher_test::IsOdd()) && arg > 0;
-}
-
-TEST(MatcherMacroTest, CanBeComposedUsingValue) {
-  EXPECT_THAT(3, IsPositiveOdd());
-  EXPECT_THAT(4, Not(IsPositiveOdd()));
-  EXPECT_THAT(-1, Not(IsPositiveOdd()));
-}
-
-// Tests that a simple MATCHER_P() definition works.
-
-MATCHER_P(IsGreaterThan32And, n, "") { return arg > 32 && arg > n; }
-
-TEST(MatcherPMacroTest, Works) {
-  const Matcher<int> m = IsGreaterThan32And(5);
-  EXPECT_TRUE(m.Matches(36));
-  EXPECT_FALSE(m.Matches(5));
-
-  EXPECT_EQ("is greater than 32 and (n: 5)", Describe(m));
-  EXPECT_EQ("not (is greater than 32 and (n: 5))", DescribeNegation(m));
-  EXPECT_EQ("", Explain(m, 36));
-  EXPECT_EQ("", Explain(m, 5));
-}
-
-// Tests that the description is calculated correctly from the matcher name.
-MATCHER_P(_is_Greater_Than32and_, n, "") { return arg > 32 && arg > n; }
-
-TEST(MatcherPMacroTest, GeneratesCorrectDescription) {
-  const Matcher<int> m = _is_Greater_Than32and_(5);
-
-  EXPECT_EQ("is greater than 32 and (n: 5)", Describe(m));
-  EXPECT_EQ("not (is greater than 32 and (n: 5))", DescribeNegation(m));
-  EXPECT_EQ("", Explain(m, 36));
-  EXPECT_EQ("", Explain(m, 5));
-}
-
-// Tests that a MATCHER_P matcher can be explicitly instantiated with
-// a reference parameter type.
-
-class UncopyableFoo {
- public:
-  explicit UncopyableFoo(char value) : value_(value) { (void)value_; }
-
-  UncopyableFoo(const UncopyableFoo&) = delete;
-  void operator=(const UncopyableFoo&) = delete;
-
- private:
-  char value_;
-};
-
-MATCHER_P(ReferencesUncopyable, variable, "") { return &arg == &variable; }
-
-TEST(MatcherPMacroTest, WorksWhenExplicitlyInstantiatedWithReference) {
-  UncopyableFoo foo1('1'), foo2('2');
-  const Matcher<const UncopyableFoo&> m =
-      ReferencesUncopyable<const UncopyableFoo&>(foo1);
-
-  EXPECT_TRUE(m.Matches(foo1));
-  EXPECT_FALSE(m.Matches(foo2));
-
-  // We don't want the address of the parameter printed, as most
-  // likely it will just annoy the user.  If the address is
-  // interesting, the user should consider passing the parameter by
-  // pointer instead.
-  EXPECT_EQ("references uncopyable (variable: 1-byte object <31>)",
-            Describe(m));
-}
-
-// Tests that the body of MATCHER_Pn() can reference the parameter
-// types.
-
-MATCHER_P3(ParamTypesAreIntLongAndChar, foo, bar, baz, "") {
-  StaticAssertTypeEq<int, foo_type>();
-  StaticAssertTypeEq<long, bar_type>();  // NOLINT
-  StaticAssertTypeEq<char, baz_type>();
-  return arg == 0;
-}
-
-TEST(MatcherPnMacroTest, CanReferenceParamTypes) {
-  EXPECT_THAT(0, ParamTypesAreIntLongAndChar(10, 20L, 'a'));
-}
-
-// Tests that a MATCHER_Pn matcher can be explicitly instantiated with
-// reference parameter types.
-
-MATCHER_P2(ReferencesAnyOf, variable1, variable2, "") {
-  return &arg == &variable1 || &arg == &variable2;
-}
-
-TEST(MatcherPnMacroTest, WorksWhenExplicitlyInstantiatedWithReferences) {
-  UncopyableFoo foo1('1'), foo2('2'), foo3('3');
-  const Matcher<const UncopyableFoo&> const_m =
-      ReferencesAnyOf<const UncopyableFoo&, const UncopyableFoo&>(foo1, foo2);
-
-  EXPECT_TRUE(const_m.Matches(foo1));
-  EXPECT_TRUE(const_m.Matches(foo2));
-  EXPECT_FALSE(const_m.Matches(foo3));
-
-  const Matcher<UncopyableFoo&> m =
-      ReferencesAnyOf<UncopyableFoo&, UncopyableFoo&>(foo1, foo2);
-
-  EXPECT_TRUE(m.Matches(foo1));
-  EXPECT_TRUE(m.Matches(foo2));
-  EXPECT_FALSE(m.Matches(foo3));
-}
-
-TEST(MatcherPnMacroTest,
-     GeneratesCorretDescriptionWhenExplicitlyInstantiatedWithReferences) {
-  UncopyableFoo foo1('1'), foo2('2');
-  const Matcher<const UncopyableFoo&> m =
-      ReferencesAnyOf<const UncopyableFoo&, const UncopyableFoo&>(foo1, foo2);
-
-  // We don't want the addresses of the parameters printed, as most
-  // likely they will just annoy the user.  If the addresses are
-  // interesting, the user should consider passing the parameters by
-  // pointers instead.
-  EXPECT_EQ(
-      "references any of (variable1: 1-byte object <31>, variable2: 1-byte "
-      "object <32>)",
-      Describe(m));
-}
-
-// Tests that a simple MATCHER_P2() definition works.
-
-MATCHER_P2(IsNotInClosedRange, low, hi, "") { return arg < low || arg > hi; }
-
-TEST(MatcherPnMacroTest, Works) {
-  const Matcher<const long&> m = IsNotInClosedRange(10, 20);  // NOLINT
-  EXPECT_TRUE(m.Matches(36L));
-  EXPECT_FALSE(m.Matches(15L));
-
-  EXPECT_EQ("is not in closed range (low: 10, hi: 20)", Describe(m));
-  EXPECT_EQ("not (is not in closed range (low: 10, hi: 20))",
-            DescribeNegation(m));
-  EXPECT_EQ("", Explain(m, 36L));
-  EXPECT_EQ("", Explain(m, 15L));
-}
-
-// Tests that MATCHER*() definitions can be overloaded on the number
-// of parameters; also tests MATCHER_Pn() where n >= 3.
-
-MATCHER(EqualsSumOf, "") { return arg == 0; }
-MATCHER_P(EqualsSumOf, a, "") { return arg == a; }
-MATCHER_P2(EqualsSumOf, a, b, "") { return arg == a + b; }
-MATCHER_P3(EqualsSumOf, a, b, c, "") { return arg == a + b + c; }
-MATCHER_P4(EqualsSumOf, a, b, c, d, "") { return arg == a + b + c + d; }
-MATCHER_P5(EqualsSumOf, a, b, c, d, e, "") { return arg == a + b + c + d + e; }
-MATCHER_P6(EqualsSumOf, a, b, c, d, e, f, "") {
-  return arg == a + b + c + d + e + f;
-}
-MATCHER_P7(EqualsSumOf, a, b, c, d, e, f, g, "") {
-  return arg == a + b + c + d + e + f + g;
-}
-MATCHER_P8(EqualsSumOf, a, b, c, d, e, f, g, h, "") {
-  return arg == a + b + c + d + e + f + g + h;
-}
-MATCHER_P9(EqualsSumOf, a, b, c, d, e, f, g, h, i, "") {
-  return arg == a + b + c + d + e + f + g + h + i;
-}
-MATCHER_P10(EqualsSumOf, a, b, c, d, e, f, g, h, i, j, "") {
-  return arg == a + b + c + d + e + f + g + h + i + j;
-}
-
-TEST(MatcherPnMacroTest, CanBeOverloadedOnNumberOfParameters) {
-  EXPECT_THAT(0, EqualsSumOf());
-  EXPECT_THAT(1, EqualsSumOf(1));
-  EXPECT_THAT(12, EqualsSumOf(10, 2));
-  EXPECT_THAT(123, EqualsSumOf(100, 20, 3));
-  EXPECT_THAT(1234, EqualsSumOf(1000, 200, 30, 4));
-  EXPECT_THAT(12345, EqualsSumOf(10000, 2000, 300, 40, 5));
-  EXPECT_THAT("abcdef",
-              EqualsSumOf(::std::string("a"), 'b', 'c', "d", "e", 'f'));
-  EXPECT_THAT("abcdefg",
-              EqualsSumOf(::std::string("a"), 'b', 'c', "d", "e", 'f', 'g'));
-  EXPECT_THAT("abcdefgh", EqualsSumOf(::std::string("a"), 'b', 'c', "d", "e",
-                                      'f', 'g', "h"));
-  EXPECT_THAT("abcdefghi", EqualsSumOf(::std::string("a"), 'b', 'c', "d", "e",
-                                       'f', 'g', "h", 'i'));
-  EXPECT_THAT("abcdefghij",
-              EqualsSumOf(::std::string("a"), 'b', 'c', "d", "e", 'f', 'g', "h",
-                          'i', ::std::string("j")));
-
-  EXPECT_THAT(1, Not(EqualsSumOf()));
-  EXPECT_THAT(-1, Not(EqualsSumOf(1)));
-  EXPECT_THAT(-12, Not(EqualsSumOf(10, 2)));
-  EXPECT_THAT(-123, Not(EqualsSumOf(100, 20, 3)));
-  EXPECT_THAT(-1234, Not(EqualsSumOf(1000, 200, 30, 4)));
-  EXPECT_THAT(-12345, Not(EqualsSumOf(10000, 2000, 300, 40, 5)));
-  EXPECT_THAT("abcdef ",
-              Not(EqualsSumOf(::std::string("a"), 'b', 'c', "d", "e", 'f')));
-  EXPECT_THAT("abcdefg ", Not(EqualsSumOf(::std::string("a"), 'b', 'c', "d",
-                                          "e", 'f', 'g')));
-  EXPECT_THAT("abcdefgh ", Not(EqualsSumOf(::std::string("a"), 'b', 'c', "d",
-                                           "e", 'f', 'g', "h")));
-  EXPECT_THAT("abcdefghi ", Not(EqualsSumOf(::std::string("a"), 'b', 'c', "d",
-                                            "e", 'f', 'g', "h", 'i')));
-  EXPECT_THAT("abcdefghij ",
-              Not(EqualsSumOf(::std::string("a"), 'b', 'c', "d", "e", 'f', 'g',
-                              "h", 'i', ::std::string("j"))));
-}
-
-// Tests that a MATCHER_Pn() definition can be instantiated with any
-// compatible parameter types.
-TEST(MatcherPnMacroTest, WorksForDifferentParameterTypes) {
-  EXPECT_THAT(123, EqualsSumOf(100L, 20, static_cast<char>(3)));
-  EXPECT_THAT("abcd", EqualsSumOf(::std::string("a"), "b", 'c', "d"));
-
-  EXPECT_THAT(124, Not(EqualsSumOf(100L, 20, static_cast<char>(3))));
-  EXPECT_THAT("abcde", Not(EqualsSumOf(::std::string("a"), "b", 'c', "d")));
-}
-
-// Tests that the matcher body can promote the parameter types.
-
-MATCHER_P2(EqConcat, prefix, suffix, "") {
-  // The following lines promote the two parameters to desired types.
-  std::string prefix_str(prefix);
-  char suffix_char = static_cast<char>(suffix);
-  return arg == prefix_str + suffix_char;
-}
-
-TEST(MatcherPnMacroTest, SimpleTypePromotion) {
-  Matcher<std::string> no_promo = EqConcat(std::string("foo"), 't');
-  Matcher<const std::string&> promo = EqConcat("foo", static_cast<int>('t'));
-  EXPECT_FALSE(no_promo.Matches("fool"));
-  EXPECT_FALSE(promo.Matches("fool"));
-  EXPECT_TRUE(no_promo.Matches("foot"));
-  EXPECT_TRUE(promo.Matches("foot"));
-}
-
-// Verifies the type of a MATCHER*.
-
-TEST(MatcherPnMacroTest, TypesAreCorrect) {
-  // EqualsSumOf() must be assignable to a EqualsSumOfMatcher variable.
-  EqualsSumOfMatcher a0 = EqualsSumOf();
-
-  // EqualsSumOf(1) must be assignable to a EqualsSumOfMatcherP variable.
-  EqualsSumOfMatcherP<int> a1 = EqualsSumOf(1);
-
-  // EqualsSumOf(p1, ..., pk) must be assignable to a EqualsSumOfMatcherPk
-  // variable, and so on.
-  EqualsSumOfMatcherP2<int, char> a2 = EqualsSumOf(1, '2');
-  EqualsSumOfMatcherP3<int, int, char> a3 = EqualsSumOf(1, 2, '3');
-  EqualsSumOfMatcherP4<int, int, int, char> a4 = EqualsSumOf(1, 2, 3, '4');
-  EqualsSumOfMatcherP5<int, int, int, int, char> a5 =
-      EqualsSumOf(1, 2, 3, 4, '5');
-  EqualsSumOfMatcherP6<int, int, int, int, int, char> a6 =
-      EqualsSumOf(1, 2, 3, 4, 5, '6');
-  EqualsSumOfMatcherP7<int, int, int, int, int, int, char> a7 =
-      EqualsSumOf(1, 2, 3, 4, 5, 6, '7');
-  EqualsSumOfMatcherP8<int, int, int, int, int, int, int, char> a8 =
-      EqualsSumOf(1, 2, 3, 4, 5, 6, 7, '8');
-  EqualsSumOfMatcherP9<int, int, int, int, int, int, int, int, char> a9 =
-      EqualsSumOf(1, 2, 3, 4, 5, 6, 7, 8, '9');
-  EqualsSumOfMatcherP10<int, int, int, int, int, int, int, int, int, char> a10 =
-      EqualsSumOf(1, 2, 3, 4, 5, 6, 7, 8, 9, '0');
-
-  // Avoid "unused variable" warnings.
-  (void)a0;
-  (void)a1;
-  (void)a2;
-  (void)a3;
-  (void)a4;
-  (void)a5;
-  (void)a6;
-  (void)a7;
-  (void)a8;
-  (void)a9;
-  (void)a10;
-}
-
-// Tests that matcher-typed parameters can be used in Value() inside a
-// MATCHER_Pn definition.
-
-// Succeeds if arg matches exactly 2 of the 3 matchers.
-MATCHER_P3(TwoOf, m1, m2, m3, "") {
-  const int count = static_cast<int>(Value(arg, m1)) +
-                    static_cast<int>(Value(arg, m2)) +
-                    static_cast<int>(Value(arg, m3));
-  return count == 2;
-}
-
-TEST(MatcherPnMacroTest, CanUseMatcherTypedParameterInValue) {
-  EXPECT_THAT(42, TwoOf(Gt(0), Lt(50), Eq(10)));
-  EXPECT_THAT(0, Not(TwoOf(Gt(-1), Lt(1), Eq(0))));
-}
-
-// Tests Contains().Times().
-
-INSTANTIATE_GTEST_MATCHER_TEST_P(ContainsTimes);
-
-TEST(ContainsTimes, ListMatchesWhenElementQuantityMatches) {
-  list<int> some_list;
-  some_list.push_back(3);
-  some_list.push_back(1);
-  some_list.push_back(2);
-  some_list.push_back(3);
-  EXPECT_THAT(some_list, Contains(3).Times(2));
-  EXPECT_THAT(some_list, Contains(2).Times(1));
-  EXPECT_THAT(some_list, Contains(Ge(2)).Times(3));
-  EXPECT_THAT(some_list, Contains(Ge(2)).Times(Gt(2)));
-  EXPECT_THAT(some_list, Contains(4).Times(0));
-  EXPECT_THAT(some_list, Contains(_).Times(4));
-  EXPECT_THAT(some_list, Not(Contains(5).Times(1)));
-  EXPECT_THAT(some_list, Contains(5).Times(_));  // Times(_) always matches
-  EXPECT_THAT(some_list, Not(Contains(3).Times(1)));
-  EXPECT_THAT(some_list, Contains(3).Times(Not(1)));
-  EXPECT_THAT(list<int>{}, Not(Contains(_)));
-}
-
-TEST_P(ContainsTimesP, ExplainsMatchResultCorrectly) {
-  const int a[2] = {1, 2};
-  Matcher<const int(&)[2]> m = Contains(2).Times(3);
-  EXPECT_EQ(
-      "whose element #1 matches but whose match quantity of 1 does not match",
-      Explain(m, a));
-
-  m = Contains(3).Times(0);
-  EXPECT_EQ("has no element that matches and whose match quantity of 0 matches",
-            Explain(m, a));
-
-  m = Contains(3).Times(4);
-  EXPECT_EQ(
-      "has no element that matches and whose match quantity of 0 does not "
-      "match",
-      Explain(m, a));
-
-  m = Contains(2).Times(4);
-  EXPECT_EQ(
-      "whose element #1 matches but whose match quantity of 1 does not "
-      "match",
-      Explain(m, a));
-
-  m = Contains(GreaterThan(0)).Times(2);
-  EXPECT_EQ("whose elements (0, 1) match and whose match quantity of 2 matches",
-            Explain(m, a));
-
-  m = Contains(GreaterThan(10)).Times(Gt(1));
-  EXPECT_EQ(
-      "has no element that matches and whose match quantity of 0 does not "
-      "match",
-      Explain(m, a));
-
-  m = Contains(GreaterThan(0)).Times(GreaterThan<size_t>(5));
-  EXPECT_EQ(
-      "whose elements (0, 1) match but whose match quantity of 2 does not "
-      "match, which is 3 less than 5",
-      Explain(m, a));
-}
-
-TEST(ContainsTimes, DescribesItselfCorrectly) {
-  Matcher<vector<int>> m = Contains(1).Times(2);
-  EXPECT_EQ("quantity of elements that match is equal to 1 is equal to 2",
-            Describe(m));
-
-  Matcher<vector<int>> m2 = Not(m);
-  EXPECT_EQ("quantity of elements that match is equal to 1 isn't equal to 2",
-            Describe(m2));
-}
-
-// Tests AllOfArray()
-
-TEST(AllOfArrayTest, BasicForms) {
-  // Iterator
-  std::vector<int> v0{};
-  std::vector<int> v1{1};
-  std::vector<int> v2{2, 3};
-  std::vector<int> v3{4, 4, 4};
-  EXPECT_THAT(0, AllOfArray(v0.begin(), v0.end()));
-  EXPECT_THAT(1, AllOfArray(v1.begin(), v1.end()));
-  EXPECT_THAT(2, Not(AllOfArray(v1.begin(), v1.end())));
-  EXPECT_THAT(3, Not(AllOfArray(v2.begin(), v2.end())));
-  EXPECT_THAT(4, AllOfArray(v3.begin(), v3.end()));
-  // Pointer +  size
-  int ar[6] = {1, 2, 3, 4, 4, 4};
-  EXPECT_THAT(0, AllOfArray(ar, 0));
-  EXPECT_THAT(1, AllOfArray(ar, 1));
-  EXPECT_THAT(2, Not(AllOfArray(ar, 1)));
-  EXPECT_THAT(3, Not(AllOfArray(ar + 1, 3)));
-  EXPECT_THAT(4, AllOfArray(ar + 3, 3));
-  // Array
-  // int ar0[0];  Not usable
-  int ar1[1] = {1};
-  int ar2[2] = {2, 3};
-  int ar3[3] = {4, 4, 4};
-  // EXPECT_THAT(0, Not(AllOfArray(ar0)));  // Cannot work
-  EXPECT_THAT(1, AllOfArray(ar1));
-  EXPECT_THAT(2, Not(AllOfArray(ar1)));
-  EXPECT_THAT(3, Not(AllOfArray(ar2)));
-  EXPECT_THAT(4, AllOfArray(ar3));
-  // Container
-  EXPECT_THAT(0, AllOfArray(v0));
-  EXPECT_THAT(1, AllOfArray(v1));
-  EXPECT_THAT(2, Not(AllOfArray(v1)));
-  EXPECT_THAT(3, Not(AllOfArray(v2)));
-  EXPECT_THAT(4, AllOfArray(v3));
-  // Initializer
-  EXPECT_THAT(0, AllOfArray<int>({}));  // Requires template arg.
-  EXPECT_THAT(1, AllOfArray({1}));
-  EXPECT_THAT(2, Not(AllOfArray({1})));
-  EXPECT_THAT(3, Not(AllOfArray({2, 3})));
-  EXPECT_THAT(4, AllOfArray({4, 4, 4}));
-}
-
-TEST(AllOfArrayTest, Matchers) {
-  // vector
-  std::vector<Matcher<int>> matchers{Ge(1), Lt(2)};
-  EXPECT_THAT(0, Not(AllOfArray(matchers)));
-  EXPECT_THAT(1, AllOfArray(matchers));
-  EXPECT_THAT(2, Not(AllOfArray(matchers)));
-  // initializer_list
-  EXPECT_THAT(0, Not(AllOfArray({Ge(0), Ge(1)})));
-  EXPECT_THAT(1, AllOfArray({Ge(0), Ge(1)}));
-}
-
-INSTANTIATE_GTEST_MATCHER_TEST_P(AnyOfArrayTest);
-
-TEST(AnyOfArrayTest, BasicForms) {
-  // Iterator
-  std::vector<int> v0{};
-  std::vector<int> v1{1};
-  std::vector<int> v2{2, 3};
-  EXPECT_THAT(0, Not(AnyOfArray(v0.begin(), v0.end())));
-  EXPECT_THAT(1, AnyOfArray(v1.begin(), v1.end()));
-  EXPECT_THAT(2, Not(AnyOfArray(v1.begin(), v1.end())));
-  EXPECT_THAT(3, AnyOfArray(v2.begin(), v2.end()));
-  EXPECT_THAT(4, Not(AnyOfArray(v2.begin(), v2.end())));
-  // Pointer +  size
-  int ar[3] = {1, 2, 3};
-  EXPECT_THAT(0, Not(AnyOfArray(ar, 0)));
-  EXPECT_THAT(1, AnyOfArray(ar, 1));
-  EXPECT_THAT(2, Not(AnyOfArray(ar, 1)));
-  EXPECT_THAT(3, AnyOfArray(ar + 1, 2));
-  EXPECT_THAT(4, Not(AnyOfArray(ar + 1, 2)));
-  // Array
-  // int ar0[0];  Not usable
-  int ar1[1] = {1};
-  int ar2[2] = {2, 3};
-  // EXPECT_THAT(0, Not(AnyOfArray(ar0)));  // Cannot work
-  EXPECT_THAT(1, AnyOfArray(ar1));
-  EXPECT_THAT(2, Not(AnyOfArray(ar1)));
-  EXPECT_THAT(3, AnyOfArray(ar2));
-  EXPECT_THAT(4, Not(AnyOfArray(ar2)));
-  // Container
-  EXPECT_THAT(0, Not(AnyOfArray(v0)));
-  EXPECT_THAT(1, AnyOfArray(v1));
-  EXPECT_THAT(2, Not(AnyOfArray(v1)));
-  EXPECT_THAT(3, AnyOfArray(v2));
-  EXPECT_THAT(4, Not(AnyOfArray(v2)));
-  // Initializer
-  EXPECT_THAT(0, Not(AnyOfArray<int>({})));  // Requires template arg.
-  EXPECT_THAT(1, AnyOfArray({1}));
-  EXPECT_THAT(2, Not(AnyOfArray({1})));
-  EXPECT_THAT(3, AnyOfArray({2, 3}));
-  EXPECT_THAT(4, Not(AnyOfArray({2, 3})));
-}
-
-TEST(AnyOfArrayTest, Matchers) {
-  // We negate test AllOfArrayTest.Matchers.
-  // vector
-  std::vector<Matcher<int>> matchers{Lt(1), Ge(2)};
-  EXPECT_THAT(0, AnyOfArray(matchers));
-  EXPECT_THAT(1, Not(AnyOfArray(matchers)));
-  EXPECT_THAT(2, AnyOfArray(matchers));
-  // initializer_list
-  EXPECT_THAT(0, AnyOfArray({Lt(0), Lt(1)}));
-  EXPECT_THAT(1, Not(AllOfArray({Lt(0), Lt(1)})));
-}
-
-TEST_P(AnyOfArrayTestP, ExplainsMatchResultCorrectly) {
-  // AnyOfArray and AllOfArray use the same underlying template-template,
-  // thus it is sufficient to test one here.
-  const std::vector<int> v0{};
-  const std::vector<int> v1{1};
-  const std::vector<int> v2{2, 3};
-  const Matcher<int> m0 = AnyOfArray(v0);
-  const Matcher<int> m1 = AnyOfArray(v1);
-  const Matcher<int> m2 = AnyOfArray(v2);
-  EXPECT_EQ("", Explain(m0, 0));
-  EXPECT_EQ("", Explain(m1, 1));
-  EXPECT_EQ("", Explain(m1, 2));
-  EXPECT_EQ("", Explain(m2, 3));
-  EXPECT_EQ("", Explain(m2, 4));
-  EXPECT_EQ("()", Describe(m0));
-  EXPECT_EQ("(is equal to 1)", Describe(m1));
-  EXPECT_EQ("(is equal to 2) or (is equal to 3)", Describe(m2));
-  EXPECT_EQ("()", DescribeNegation(m0));
-  EXPECT_EQ("(isn't equal to 1)", DescribeNegation(m1));
-  EXPECT_EQ("(isn't equal to 2) and (isn't equal to 3)", DescribeNegation(m2));
-  // Explain with matchers
-  const Matcher<int> g1 = AnyOfArray({GreaterThan(1)});
-  const Matcher<int> g2 = AnyOfArray({GreaterThan(1), GreaterThan(2)});
-  // Explains the first positive match and all prior negative matches...
-  EXPECT_EQ("which is 1 less than 1", Explain(g1, 0));
-  EXPECT_EQ("which is the same as 1", Explain(g1, 1));
-  EXPECT_EQ("which is 1 more than 1", Explain(g1, 2));
-  EXPECT_EQ("which is 1 less than 1, and which is 2 less than 2",
-            Explain(g2, 0));
-  EXPECT_EQ("which is the same as 1, and which is 1 less than 2",
-            Explain(g2, 1));
-  EXPECT_EQ("which is 1 more than 1",  // Only the first
-            Explain(g2, 2));
-}
-
-MATCHER(IsNotNull, "") { return arg != nullptr; }
-
-// Verifies that a matcher defined using MATCHER() can work on
-// move-only types.
-TEST(MatcherMacroTest, WorksOnMoveOnlyType) {
-  std::unique_ptr<int> p(new int(3));
-  EXPECT_THAT(p, IsNotNull());
-  EXPECT_THAT(std::unique_ptr<int>(), Not(IsNotNull()));
-}
-
-MATCHER_P(UniquePointee, pointee, "") { return *arg == pointee; }
-
-// Verifies that a matcher defined using MATCHER_P*() can work on
-// move-only types.
-TEST(MatcherPMacroTest, WorksOnMoveOnlyType) {
-  std::unique_ptr<int> p(new int(3));
-  EXPECT_THAT(p, UniquePointee(3));
-  EXPECT_THAT(p, Not(UniquePointee(2)));
-}
-
-MATCHER(EnsureNoUnusedButMarkedUnusedWarning, "") { return (arg % 2) == 0; }
-
-TEST(MockMethodMockFunctionTest, EnsureNoUnusedButMarkedUnusedWarning) {
-#ifdef __clang__
-#pragma clang diagnostic push
-#pragma clang diagnostic error "-Wused-but-marked-unused"
-#endif
-  // https://github.com/google/googletest/issues/4055
-  EXPECT_THAT(0, EnsureNoUnusedButMarkedUnusedWarning());
-#ifdef __clang__
-#pragma clang diagnostic pop
-#endif
-}
-
-#if GTEST_HAS_EXCEPTIONS
-
-// std::function<void()> is used below for compatibility with older copies of
-// GCC. Normally, a raw lambda is all that is needed.
-
-// Test that examples from documentation compile
-TEST(ThrowsTest, Examples) {
-  EXPECT_THAT(
-      std::function<void()>([]() { throw std::runtime_error("message"); }),
-      Throws<std::runtime_error>());
-
-  EXPECT_THAT(
-      std::function<void()>([]() { throw std::runtime_error("message"); }),
-      ThrowsMessage<std::runtime_error>(HasSubstr("message")));
-}
-
-TEST(ThrowsTest, PrintsExceptionWhat) {
-  EXPECT_THAT(
-      std::function<void()>([]() { throw std::runtime_error("ABC123XYZ"); }),
-      ThrowsMessage<std::runtime_error>(HasSubstr("ABC123XYZ")));
-}
-
-TEST(ThrowsTest, DoesNotGenerateDuplicateCatchClauseWarning) {
-  EXPECT_THAT(std::function<void()>([]() { throw std::exception(); }),
-              Throws<std::exception>());
-}
-
-TEST(ThrowsTest, CallableExecutedExactlyOnce) {
-  size_t a = 0;
-
-  EXPECT_THAT(std::function<void()>([&a]() {
-                a++;
-                throw 10;
-              }),
-              Throws<int>());
-  EXPECT_EQ(a, 1u);
-
-  EXPECT_THAT(std::function<void()>([&a]() {
-                a++;
-                throw std::runtime_error("message");
-              }),
-              Throws<std::runtime_error>());
-  EXPECT_EQ(a, 2u);
-
-  EXPECT_THAT(std::function<void()>([&a]() {
-                a++;
-                throw std::runtime_error("message");
-              }),
-              ThrowsMessage<std::runtime_error>(HasSubstr("message")));
-  EXPECT_EQ(a, 3u);
-
-  EXPECT_THAT(std::function<void()>([&a]() {
-                a++;
-                throw std::runtime_error("message");
-              }),
-              Throws<std::runtime_error>(
-                  Property(&std::runtime_error::what, HasSubstr("message"))));
-  EXPECT_EQ(a, 4u);
-}
-
-TEST(ThrowsTest, Describe) {
-  Matcher<std::function<void()>> matcher = Throws<std::runtime_error>();
-  std::stringstream ss;
-  matcher.DescribeTo(&ss);
-  auto explanation = ss.str();
-  EXPECT_THAT(explanation, HasSubstr("std::runtime_error"));
-}
-
-TEST(ThrowsTest, Success) {
-  Matcher<std::function<void()>> matcher = Throws<std::runtime_error>();
-  StringMatchResultListener listener;
-  EXPECT_TRUE(matcher.MatchAndExplain(
-      []() { throw std::runtime_error("error message"); }, &listener));
-  EXPECT_THAT(listener.str(), HasSubstr("std::runtime_error"));
-}
-
-TEST(ThrowsTest, FailWrongType) {
-  Matcher<std::function<void()>> matcher = Throws<std::runtime_error>();
-  StringMatchResultListener listener;
-  EXPECT_FALSE(matcher.MatchAndExplain(
-      []() { throw std::logic_error("error message"); }, &listener));
-  EXPECT_THAT(listener.str(), HasSubstr("std::logic_error"));
-  EXPECT_THAT(listener.str(), HasSubstr("\"error message\""));
-}
-
-TEST(ThrowsTest, FailWrongTypeNonStd) {
-  Matcher<std::function<void()>> matcher = Throws<std::runtime_error>();
-  StringMatchResultListener listener;
-  EXPECT_FALSE(matcher.MatchAndExplain([]() { throw 10; }, &listener));
-  EXPECT_THAT(listener.str(),
-              HasSubstr("throws an exception of an unknown type"));
-}
-
-TEST(ThrowsTest, FailNoThrow) {
-  Matcher<std::function<void()>> matcher = Throws<std::runtime_error>();
-  StringMatchResultListener listener;
-  EXPECT_FALSE(matcher.MatchAndExplain([]() { (void)0; }, &listener));
-  EXPECT_THAT(listener.str(), HasSubstr("does not throw any exception"));
-}
-
-class ThrowsPredicateTest
-    : public TestWithParam<Matcher<std::function<void()>>> {};
-
-TEST_P(ThrowsPredicateTest, Describe) {
-  Matcher<std::function<void()>> matcher = GetParam();
-  std::stringstream ss;
-  matcher.DescribeTo(&ss);
-  auto explanation = ss.str();
-  EXPECT_THAT(explanation, HasSubstr("std::runtime_error"));
-  EXPECT_THAT(explanation, HasSubstr("error message"));
-}
-
-TEST_P(ThrowsPredicateTest, Success) {
-  Matcher<std::function<void()>> matcher = GetParam();
-  StringMatchResultListener listener;
-  EXPECT_TRUE(matcher.MatchAndExplain(
-      []() { throw std::runtime_error("error message"); }, &listener));
-  EXPECT_THAT(listener.str(), HasSubstr("std::runtime_error"));
-}
-
-TEST_P(ThrowsPredicateTest, FailWrongType) {
-  Matcher<std::function<void()>> matcher = GetParam();
-  StringMatchResultListener listener;
-  EXPECT_FALSE(matcher.MatchAndExplain(
-      []() { throw std::logic_error("error message"); }, &listener));
-  EXPECT_THAT(listener.str(), HasSubstr("std::logic_error"));
-  EXPECT_THAT(listener.str(), HasSubstr("\"error message\""));
-}
-
-TEST_P(ThrowsPredicateTest, FailWrongTypeNonStd) {
-  Matcher<std::function<void()>> matcher = GetParam();
-  StringMatchResultListener listener;
-  EXPECT_FALSE(matcher.MatchAndExplain([]() { throw 10; }, &listener));
-  EXPECT_THAT(listener.str(),
-              HasSubstr("throws an exception of an unknown type"));
-}
-
-TEST_P(ThrowsPredicateTest, FailNoThrow) {
-  Matcher<std::function<void()>> matcher = GetParam();
-  StringMatchResultListener listener;
-  EXPECT_FALSE(matcher.MatchAndExplain([]() {}, &listener));
-  EXPECT_THAT(listener.str(), HasSubstr("does not throw any exception"));
-}
-
-INSTANTIATE_TEST_SUITE_P(
-    AllMessagePredicates, ThrowsPredicateTest,
-    Values(Matcher<std::function<void()>>(
-        ThrowsMessage<std::runtime_error>(HasSubstr("error message")))));
-
-// Tests that Throws<E1>(Matcher<E2>{}) compiles even when E2 != const E1&.
-TEST(ThrowsPredicateCompilesTest, ExceptionMatcherAcceptsBroadType) {
-  {
-    Matcher<std::function<void()>> matcher =
-        ThrowsMessage<std::runtime_error>(HasSubstr("error message"));
-    EXPECT_TRUE(
-        matcher.Matches([]() { throw std::runtime_error("error message"); }));
-    EXPECT_FALSE(
-        matcher.Matches([]() { throw std::runtime_error("wrong message"); }));
-  }
-
-  {
-    Matcher<uint64_t> inner = Eq(10);
-    Matcher<std::function<void()>> matcher = Throws<uint32_t>(inner);
-    EXPECT_TRUE(matcher.Matches([]() { throw(uint32_t) 10; }));
-    EXPECT_FALSE(matcher.Matches([]() { throw(uint32_t) 11; }));
-  }
-}
-
-// Tests that ThrowsMessage("message") is equivalent
-// to ThrowsMessage(Eq<std::string>("message")).
-TEST(ThrowsPredicateCompilesTest, MessageMatcherAcceptsNonMatcher) {
-  Matcher<std::function<void()>> matcher =
-      ThrowsMessage<std::runtime_error>("error message");
-  EXPECT_TRUE(
-      matcher.Matches([]() { throw std::runtime_error("error message"); }));
-  EXPECT_FALSE(matcher.Matches(
-      []() { throw std::runtime_error("wrong error message"); }));
-}
-
-#endif  // GTEST_HAS_EXCEPTIONS
-
-}  // namespace
-}  // namespace gmock_matchers_test
-}  // namespace testing
-
-#ifdef _MSC_VER
-#pragma warning(pop)
-#endif
diff --git a/3rdparty/googletest-1.13.0/googlemock/test/gmock-matchers_test.h b/3rdparty/googletest-1.13.0/googlemock/test/gmock-matchers_test.h
deleted file mode 100644
index 6c986e9ff0b87dcf50183e010ecf7184adea736e..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googlemock/test/gmock-matchers_test.h
+++ /dev/null
@@ -1,192 +0,0 @@
-// Copyright 2007, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// Google Mock - a framework for writing C++ mock classes.
-//
-// This file tests some commonly used argument matchers.
-
-#ifndef GOOGLEMOCK_TEST_GMOCK_MATCHERS_TEST_H_
-#define GOOGLEMOCK_TEST_GMOCK_MATCHERS_TEST_H_
-
-#include <string.h>
-#include <time.h>
-
-#include <array>
-#include <cstdint>
-#include <deque>
-#include <forward_list>
-#include <functional>
-#include <iostream>
-#include <iterator>
-#include <limits>
-#include <list>
-#include <map>
-#include <memory>
-#include <set>
-#include <sstream>
-#include <string>
-#include <type_traits>
-#include <unordered_map>
-#include <unordered_set>
-#include <utility>
-#include <vector>
-
-#include "gmock/gmock-matchers.h"
-#include "gmock/gmock-more-matchers.h"
-#include "gmock/gmock.h"
-#include "gtest/gtest-spi.h"
-#include "gtest/gtest.h"
-
-namespace testing {
-namespace gmock_matchers_test {
-
-using std::greater;
-using std::less;
-using std::list;
-using std::make_pair;
-using std::map;
-using std::multimap;
-using std::multiset;
-using std::ostream;
-using std::pair;
-using std::set;
-using std::stringstream;
-using std::vector;
-using testing::internal::DummyMatchResultListener;
-using testing::internal::ElementMatcherPair;
-using testing::internal::ElementMatcherPairs;
-using testing::internal::ElementsAreArrayMatcher;
-using testing::internal::ExplainMatchFailureTupleTo;
-using testing::internal::FloatingEqMatcher;
-using testing::internal::FormatMatcherDescription;
-using testing::internal::IsReadableTypeName;
-using testing::internal::MatchMatrix;
-using testing::internal::PredicateFormatterFromMatcher;
-using testing::internal::RE;
-using testing::internal::StreamMatchResultListener;
-using testing::internal::Strings;
-
-// Helper for testing container-valued matchers in mock method context. It is
-// important to test matchers in this context, since it requires additional type
-// deduction beyond what EXPECT_THAT does, thus making it more restrictive.
-struct ContainerHelper {
-  MOCK_METHOD1(Call, void(std::vector<std::unique_ptr<int>>));
-};
-
-// For testing ExplainMatchResultTo().
-template <typename T>
-struct GtestGreaterThanMatcher {
-  using is_gtest_matcher = void;
-
-  void DescribeTo(ostream* os) const { *os << "is > " << rhs; }
-  void DescribeNegationTo(ostream* os) const { *os << "is <= " << rhs; }
-
-  bool MatchAndExplain(T lhs, MatchResultListener* listener) const {
-    if (lhs > rhs) {
-      *listener << "which is " << (lhs - rhs) << " more than " << rhs;
-    } else if (lhs == rhs) {
-      *listener << "which is the same as " << rhs;
-    } else {
-      *listener << "which is " << (rhs - lhs) << " less than " << rhs;
-    }
-
-    return lhs > rhs;
-  }
-
-  T rhs;
-};
-
-template <typename T>
-GtestGreaterThanMatcher<typename std::decay<T>::type> GtestGreaterThan(
-    T&& rhs) {
-  return {rhs};
-}
-
-// As the matcher above, but using the base class with virtual functions.
-template <typename T>
-class GreaterThanMatcher : public MatcherInterface<T> {
- public:
-  explicit GreaterThanMatcher(T rhs) : impl_{rhs} {}
-
-  void DescribeTo(ostream* os) const override { impl_.DescribeTo(os); }
-  void DescribeNegationTo(ostream* os) const override {
-    impl_.DescribeNegationTo(os);
-  }
-
-  bool MatchAndExplain(T lhs, MatchResultListener* listener) const override {
-    return impl_.MatchAndExplain(lhs, listener);
-  }
-
- private:
-  const GtestGreaterThanMatcher<T> impl_;
-};
-
-// Names and instantiates a new instance of GTestMatcherTestP.
-#define INSTANTIATE_GTEST_MATCHER_TEST_P(TestSuite)                         \
-  using TestSuite##P = GTestMatcherTestP;                                   \
-  INSTANTIATE_TEST_SUITE_P(MatcherInterface, TestSuite##P, Values(false));  \
-  INSTANTIATE_TEST_SUITE_P(GtestMatcher, TestSuite##P, Values(true))
-
-class GTestMatcherTestP : public testing::TestWithParam<bool> {
- public:
-  template <typename T>
-  Matcher<T> GreaterThan(T n) {
-    if (use_gtest_matcher_) {
-      return GtestGreaterThan(n);
-    } else {
-      return MakeMatcher(new GreaterThanMatcher<T>(n));
-    }
-  }
-  const bool use_gtest_matcher_ = GetParam();
-};
-
-// Returns the description of the given matcher.
-template <typename T>
-std::string Describe(const Matcher<T>& m) {
-  return DescribeMatcher<T>(m);
-}
-
-// Returns the description of the negation of the given matcher.
-template <typename T>
-std::string DescribeNegation(const Matcher<T>& m) {
-  return DescribeMatcher<T>(m, true);
-}
-
-// Returns the reason why x matches, or doesn't match, m.
-template <typename MatcherType, typename Value>
-std::string Explain(const MatcherType& m, const Value& x) {
-  StringMatchResultListener listener;
-  ExplainMatchResult(m, x, &listener);
-  return listener.str();
-}
-
-}  // namespace gmock_matchers_test
-}  // namespace testing
-
-#endif  // GOOGLEMOCK_TEST_GMOCK_MATCHERS_TEST_H_
diff --git a/3rdparty/googletest-1.13.0/googlemock/test/gmock-more-actions_test.cc b/3rdparty/googletest-1.13.0/googlemock/test/gmock-more-actions_test.cc
deleted file mode 100644
index b9b66bf9000068bcc4ac803fba9c8d7aff5ddacb..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googlemock/test/gmock-more-actions_test.cc
+++ /dev/null
@@ -1,1550 +0,0 @@
-// Copyright 2007, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// Google Mock - a framework for writing C++ mock classes.
-//
-// This file tests the built-in actions in gmock-actions.h.
-
-#ifdef _MSC_VER
-#pragma warning(push)
-#pragma warning(disable : 4577)
-#endif
-
-#include "gmock/gmock-more-actions.h"
-
-#include <functional>
-#include <memory>
-#include <sstream>
-#include <string>
-
-#include "gmock/gmock.h"
-#include "gtest/gtest-spi.h"
-#include "gtest/gtest.h"
-
-namespace testing {
-namespace gmock_more_actions_test {
-
-using ::std::plus;
-using ::std::string;
-using testing::Action;
-using testing::DeleteArg;
-using testing::Invoke;
-using testing::ReturnArg;
-using testing::ReturnPointee;
-using testing::SaveArg;
-using testing::SaveArgPointee;
-using testing::SetArgReferee;
-using testing::Unused;
-using testing::WithArg;
-using testing::WithoutArgs;
-
-// For suppressing compiler warnings on conversion possibly losing precision.
-inline short Short(short n) { return n; }  // NOLINT
-inline char Char(char ch) { return ch; }
-
-// Sample functions and functors for testing Invoke() and etc.
-int Nullary() { return 1; }
-
-bool g_done = false;
-
-bool Unary(int x) { return x < 0; }
-
-bool ByConstRef(const std::string& s) { return s == "Hi"; }
-
-const double g_double = 0;
-bool ReferencesGlobalDouble(const double& x) { return &x == &g_double; }
-
-struct UnaryFunctor {
-  int operator()(bool x) { return x ? 1 : -1; }
-};
-
-const char* Binary(const char* input, short n) { return input + n; }  // NOLINT
-
-int Ternary(int x, char y, short z) { return x + y + z; }  // NOLINT
-
-int SumOf4(int a, int b, int c, int d) { return a + b + c + d; }
-
-int SumOfFirst2(int a, int b, Unused, Unused) { return a + b; }
-
-int SumOf5(int a, int b, int c, int d, int e) { return a + b + c + d + e; }
-
-struct SumOf5Functor {
-  int operator()(int a, int b, int c, int d, int e) {
-    return a + b + c + d + e;
-  }
-};
-
-int SumOf6(int a, int b, int c, int d, int e, int f) {
-  return a + b + c + d + e + f;
-}
-
-struct SumOf6Functor {
-  int operator()(int a, int b, int c, int d, int e, int f) {
-    return a + b + c + d + e + f;
-  }
-};
-
-std::string Concat7(const char* s1, const char* s2, const char* s3,
-                    const char* s4, const char* s5, const char* s6,
-                    const char* s7) {
-  return std::string(s1) + s2 + s3 + s4 + s5 + s6 + s7;
-}
-
-std::string Concat8(const char* s1, const char* s2, const char* s3,
-                    const char* s4, const char* s5, const char* s6,
-                    const char* s7, const char* s8) {
-  return std::string(s1) + s2 + s3 + s4 + s5 + s6 + s7 + s8;
-}
-
-std::string Concat9(const char* s1, const char* s2, const char* s3,
-                    const char* s4, const char* s5, const char* s6,
-                    const char* s7, const char* s8, const char* s9) {
-  return std::string(s1) + s2 + s3 + s4 + s5 + s6 + s7 + s8 + s9;
-}
-
-std::string Concat10(const char* s1, const char* s2, const char* s3,
-                     const char* s4, const char* s5, const char* s6,
-                     const char* s7, const char* s8, const char* s9,
-                     const char* s10) {
-  return std::string(s1) + s2 + s3 + s4 + s5 + s6 + s7 + s8 + s9 + s10;
-}
-
-class Foo {
- public:
-  Foo() : value_(123) {}
-
-  int Nullary() const { return value_; }
-
-  short Unary(long x) { return static_cast<short>(value_ + x); }  // NOLINT
-
-  std::string Binary(const std::string& str, char c) const { return str + c; }
-
-  int Ternary(int x, bool y, char z) { return value_ + x + y * z; }
-
-  int SumOf4(int a, int b, int c, int d) const {
-    return a + b + c + d + value_;
-  }
-
-  int SumOfLast2(Unused, Unused, int a, int b) const { return a + b; }
-
-  int SumOf5(int a, int b, int c, int d, int e) { return a + b + c + d + e; }
-
-  int SumOf6(int a, int b, int c, int d, int e, int f) {
-    return a + b + c + d + e + f;
-  }
-
-  std::string Concat7(const char* s1, const char* s2, const char* s3,
-                      const char* s4, const char* s5, const char* s6,
-                      const char* s7) {
-    return std::string(s1) + s2 + s3 + s4 + s5 + s6 + s7;
-  }
-
-  std::string Concat8(const char* s1, const char* s2, const char* s3,
-                      const char* s4, const char* s5, const char* s6,
-                      const char* s7, const char* s8) {
-    return std::string(s1) + s2 + s3 + s4 + s5 + s6 + s7 + s8;
-  }
-
-  std::string Concat9(const char* s1, const char* s2, const char* s3,
-                      const char* s4, const char* s5, const char* s6,
-                      const char* s7, const char* s8, const char* s9) {
-    return std::string(s1) + s2 + s3 + s4 + s5 + s6 + s7 + s8 + s9;
-  }
-
-  std::string Concat10(const char* s1, const char* s2, const char* s3,
-                       const char* s4, const char* s5, const char* s6,
-                       const char* s7, const char* s8, const char* s9,
-                       const char* s10) {
-    return std::string(s1) + s2 + s3 + s4 + s5 + s6 + s7 + s8 + s9 + s10;
-  }
-
- private:
-  int value_;
-};
-
-// Tests using Invoke() with a nullary function.
-TEST(InvokeTest, Nullary) {
-  Action<int()> a = Invoke(Nullary);  // NOLINT
-  EXPECT_EQ(1, a.Perform(std::make_tuple()));
-}
-
-// Tests using Invoke() with a unary function.
-TEST(InvokeTest, Unary) {
-  Action<bool(int)> a = Invoke(Unary);  // NOLINT
-  EXPECT_FALSE(a.Perform(std::make_tuple(1)));
-  EXPECT_TRUE(a.Perform(std::make_tuple(-1)));
-}
-
-// Tests using Invoke() with a binary function.
-TEST(InvokeTest, Binary) {
-  Action<const char*(const char*, short)> a = Invoke(Binary);  // NOLINT
-  const char* p = "Hello";
-  EXPECT_EQ(p + 2, a.Perform(std::make_tuple(p, Short(2))));
-}
-
-// Tests using Invoke() with a ternary function.
-TEST(InvokeTest, Ternary) {
-  Action<int(int, char, short)> a = Invoke(Ternary);  // NOLINT
-  EXPECT_EQ(6, a.Perform(std::make_tuple(1, '\2', Short(3))));
-}
-
-// Tests using Invoke() with a 4-argument function.
-TEST(InvokeTest, FunctionThatTakes4Arguments) {
-  Action<int(int, int, int, int)> a = Invoke(SumOf4);  // NOLINT
-  EXPECT_EQ(1234, a.Perform(std::make_tuple(1000, 200, 30, 4)));
-}
-
-// Tests using Invoke() with a 5-argument function.
-TEST(InvokeTest, FunctionThatTakes5Arguments) {
-  Action<int(int, int, int, int, int)> a = Invoke(SumOf5);  // NOLINT
-  EXPECT_EQ(12345, a.Perform(std::make_tuple(10000, 2000, 300, 40, 5)));
-}
-
-// Tests using Invoke() with a 6-argument function.
-TEST(InvokeTest, FunctionThatTakes6Arguments) {
-  Action<int(int, int, int, int, int, int)> a = Invoke(SumOf6);  // NOLINT
-  EXPECT_EQ(123456,
-            a.Perform(std::make_tuple(100000, 20000, 3000, 400, 50, 6)));
-}
-
-// A helper that turns the type of a C-string literal from const
-// char[N] to const char*.
-inline const char* CharPtr(const char* s) { return s; }
-
-// Tests using Invoke() with a 7-argument function.
-TEST(InvokeTest, FunctionThatTakes7Arguments) {
-  Action<std::string(const char*, const char*, const char*, const char*,
-                     const char*, const char*, const char*)>
-      a = Invoke(Concat7);
-  EXPECT_EQ("1234567",
-            a.Perform(std::make_tuple(CharPtr("1"), CharPtr("2"), CharPtr("3"),
-                                      CharPtr("4"), CharPtr("5"), CharPtr("6"),
-                                      CharPtr("7"))));
-}
-
-// Tests using Invoke() with a 8-argument function.
-TEST(InvokeTest, FunctionThatTakes8Arguments) {
-  Action<std::string(const char*, const char*, const char*, const char*,
-                     const char*, const char*, const char*, const char*)>
-      a = Invoke(Concat8);
-  EXPECT_EQ("12345678",
-            a.Perform(std::make_tuple(CharPtr("1"), CharPtr("2"), CharPtr("3"),
-                                      CharPtr("4"), CharPtr("5"), CharPtr("6"),
-                                      CharPtr("7"), CharPtr("8"))));
-}
-
-// Tests using Invoke() with a 9-argument function.
-TEST(InvokeTest, FunctionThatTakes9Arguments) {
-  Action<std::string(const char*, const char*, const char*, const char*,
-                     const char*, const char*, const char*, const char*,
-                     const char*)>
-      a = Invoke(Concat9);
-  EXPECT_EQ("123456789", a.Perform(std::make_tuple(
-                             CharPtr("1"), CharPtr("2"), CharPtr("3"),
-                             CharPtr("4"), CharPtr("5"), CharPtr("6"),
-                             CharPtr("7"), CharPtr("8"), CharPtr("9"))));
-}
-
-// Tests using Invoke() with a 10-argument function.
-TEST(InvokeTest, FunctionThatTakes10Arguments) {
-  Action<std::string(const char*, const char*, const char*, const char*,
-                     const char*, const char*, const char*, const char*,
-                     const char*, const char*)>
-      a = Invoke(Concat10);
-  EXPECT_EQ("1234567890",
-            a.Perform(std::make_tuple(CharPtr("1"), CharPtr("2"), CharPtr("3"),
-                                      CharPtr("4"), CharPtr("5"), CharPtr("6"),
-                                      CharPtr("7"), CharPtr("8"), CharPtr("9"),
-                                      CharPtr("0"))));
-}
-
-// Tests using Invoke() with functions with parameters declared as Unused.
-TEST(InvokeTest, FunctionWithUnusedParameters) {
-  Action<int(int, int, double, const std::string&)> a1 = Invoke(SumOfFirst2);
-  std::tuple<int, int, double, std::string> dummy =
-      std::make_tuple(10, 2, 5.6, std::string("hi"));
-  EXPECT_EQ(12, a1.Perform(dummy));
-
-  Action<int(int, int, bool, int*)> a2 = Invoke(SumOfFirst2);
-  EXPECT_EQ(
-      23, a2.Perform(std::make_tuple(20, 3, true, static_cast<int*>(nullptr))));
-}
-
-// Tests using Invoke() with methods with parameters declared as Unused.
-TEST(InvokeTest, MethodWithUnusedParameters) {
-  Foo foo;
-  Action<int(std::string, bool, int, int)> a1 = Invoke(&foo, &Foo::SumOfLast2);
-  EXPECT_EQ(12, a1.Perform(std::make_tuple(CharPtr("hi"), true, 10, 2)));
-
-  Action<int(char, double, int, int)> a2 = Invoke(&foo, &Foo::SumOfLast2);
-  EXPECT_EQ(23, a2.Perform(std::make_tuple('a', 2.5, 20, 3)));
-}
-
-// Tests using Invoke() with a functor.
-TEST(InvokeTest, Functor) {
-  Action<long(long, int)> a = Invoke(plus<long>());  // NOLINT
-  EXPECT_EQ(3L, a.Perform(std::make_tuple(1, 2)));
-}
-
-// Tests using Invoke(f) as an action of a compatible type.
-TEST(InvokeTest, FunctionWithCompatibleType) {
-  Action<long(int, short, char, bool)> a = Invoke(SumOf4);  // NOLINT
-  EXPECT_EQ(4321, a.Perform(std::make_tuple(4000, Short(300), Char(20), true)));
-}
-
-// Tests using Invoke() with an object pointer and a method pointer.
-
-// Tests using Invoke() with a nullary method.
-TEST(InvokeMethodTest, Nullary) {
-  Foo foo;
-  Action<int()> a = Invoke(&foo, &Foo::Nullary);  // NOLINT
-  EXPECT_EQ(123, a.Perform(std::make_tuple()));
-}
-
-// Tests using Invoke() with a unary method.
-TEST(InvokeMethodTest, Unary) {
-  Foo foo;
-  Action<short(long)> a = Invoke(&foo, &Foo::Unary);  // NOLINT
-  EXPECT_EQ(4123, a.Perform(std::make_tuple(4000)));
-}
-
-// Tests using Invoke() with a binary method.
-TEST(InvokeMethodTest, Binary) {
-  Foo foo;
-  Action<std::string(const std::string&, char)> a = Invoke(&foo, &Foo::Binary);
-  std::string s("Hell");
-  std::tuple<std::string, char> dummy = std::make_tuple(s, 'o');
-  EXPECT_EQ("Hello", a.Perform(dummy));
-}
-
-// Tests using Invoke() with a ternary method.
-TEST(InvokeMethodTest, Ternary) {
-  Foo foo;
-  Action<int(int, bool, char)> a = Invoke(&foo, &Foo::Ternary);  // NOLINT
-  EXPECT_EQ(1124, a.Perform(std::make_tuple(1000, true, Char(1))));
-}
-
-// Tests using Invoke() with a 4-argument method.
-TEST(InvokeMethodTest, MethodThatTakes4Arguments) {
-  Foo foo;
-  Action<int(int, int, int, int)> a = Invoke(&foo, &Foo::SumOf4);  // NOLINT
-  EXPECT_EQ(1357, a.Perform(std::make_tuple(1000, 200, 30, 4)));
-}
-
-// Tests using Invoke() with a 5-argument method.
-TEST(InvokeMethodTest, MethodThatTakes5Arguments) {
-  Foo foo;
-  Action<int(int, int, int, int, int)> a =
-      Invoke(&foo, &Foo::SumOf5);  // NOLINT
-  EXPECT_EQ(12345, a.Perform(std::make_tuple(10000, 2000, 300, 40, 5)));
-}
-
-// Tests using Invoke() with a 6-argument method.
-TEST(InvokeMethodTest, MethodThatTakes6Arguments) {
-  Foo foo;
-  Action<int(int, int, int, int, int, int)> a =  // NOLINT
-      Invoke(&foo, &Foo::SumOf6);
-  EXPECT_EQ(123456,
-            a.Perform(std::make_tuple(100000, 20000, 3000, 400, 50, 6)));
-}
-
-// Tests using Invoke() with a 7-argument method.
-TEST(InvokeMethodTest, MethodThatTakes7Arguments) {
-  Foo foo;
-  Action<std::string(const char*, const char*, const char*, const char*,
-                     const char*, const char*, const char*)>
-      a = Invoke(&foo, &Foo::Concat7);
-  EXPECT_EQ("1234567",
-            a.Perform(std::make_tuple(CharPtr("1"), CharPtr("2"), CharPtr("3"),
-                                      CharPtr("4"), CharPtr("5"), CharPtr("6"),
-                                      CharPtr("7"))));
-}
-
-// Tests using Invoke() with a 8-argument method.
-TEST(InvokeMethodTest, MethodThatTakes8Arguments) {
-  Foo foo;
-  Action<std::string(const char*, const char*, const char*, const char*,
-                     const char*, const char*, const char*, const char*)>
-      a = Invoke(&foo, &Foo::Concat8);
-  EXPECT_EQ("12345678",
-            a.Perform(std::make_tuple(CharPtr("1"), CharPtr("2"), CharPtr("3"),
-                                      CharPtr("4"), CharPtr("5"), CharPtr("6"),
-                                      CharPtr("7"), CharPtr("8"))));
-}
-
-// Tests using Invoke() with a 9-argument method.
-TEST(InvokeMethodTest, MethodThatTakes9Arguments) {
-  Foo foo;
-  Action<std::string(const char*, const char*, const char*, const char*,
-                     const char*, const char*, const char*, const char*,
-                     const char*)>
-      a = Invoke(&foo, &Foo::Concat9);
-  EXPECT_EQ("123456789", a.Perform(std::make_tuple(
-                             CharPtr("1"), CharPtr("2"), CharPtr("3"),
-                             CharPtr("4"), CharPtr("5"), CharPtr("6"),
-                             CharPtr("7"), CharPtr("8"), CharPtr("9"))));
-}
-
-// Tests using Invoke() with a 10-argument method.
-TEST(InvokeMethodTest, MethodThatTakes10Arguments) {
-  Foo foo;
-  Action<std::string(const char*, const char*, const char*, const char*,
-                     const char*, const char*, const char*, const char*,
-                     const char*, const char*)>
-      a = Invoke(&foo, &Foo::Concat10);
-  EXPECT_EQ("1234567890",
-            a.Perform(std::make_tuple(CharPtr("1"), CharPtr("2"), CharPtr("3"),
-                                      CharPtr("4"), CharPtr("5"), CharPtr("6"),
-                                      CharPtr("7"), CharPtr("8"), CharPtr("9"),
-                                      CharPtr("0"))));
-}
-
-// Tests using Invoke(f) as an action of a compatible type.
-TEST(InvokeMethodTest, MethodWithCompatibleType) {
-  Foo foo;
-  Action<long(int, short, char, bool)> a =  // NOLINT
-      Invoke(&foo, &Foo::SumOf4);
-  EXPECT_EQ(4444, a.Perform(std::make_tuple(4000, Short(300), Char(20), true)));
-}
-
-// Tests using WithoutArgs with an action that takes no argument.
-TEST(WithoutArgsTest, NoArg) {
-  Action<int(int n)> a = WithoutArgs(Invoke(Nullary));  // NOLINT
-  EXPECT_EQ(1, a.Perform(std::make_tuple(2)));
-}
-
-// Tests using WithArg with an action that takes 1 argument.
-TEST(WithArgTest, OneArg) {
-  Action<bool(double x, int n)> b = WithArg<1>(Invoke(Unary));  // NOLINT
-  EXPECT_TRUE(b.Perform(std::make_tuple(1.5, -1)));
-  EXPECT_FALSE(b.Perform(std::make_tuple(1.5, 1)));
-}
-
-TEST(ReturnArgActionTest, WorksForOneArgIntArg0) {
-  const Action<int(int)> a = ReturnArg<0>();
-  EXPECT_EQ(5, a.Perform(std::make_tuple(5)));
-}
-
-TEST(ReturnArgActionTest, WorksForMultiArgBoolArg0) {
-  const Action<bool(bool, bool, bool)> a = ReturnArg<0>();
-  EXPECT_TRUE(a.Perform(std::make_tuple(true, false, false)));
-}
-
-TEST(ReturnArgActionTest, WorksForMultiArgStringArg2) {
-  const Action<std::string(int, int, std::string, int)> a = ReturnArg<2>();
-  EXPECT_EQ("seven", a.Perform(std::make_tuple(5, 6, std::string("seven"), 8)));
-}
-
-TEST(ReturnArgActionTest, WorksForNonConstRefArg0) {
-  const Action<std::string&(std::string&)> a = ReturnArg<0>();
-  std::string s = "12345";
-  EXPECT_EQ(&s, &a.Perform(std::forward_as_tuple(s)));
-}
-
-TEST(SaveArgActionTest, WorksForSameType) {
-  int result = 0;
-  const Action<void(int n)> a1 = SaveArg<0>(&result);
-  a1.Perform(std::make_tuple(5));
-  EXPECT_EQ(5, result);
-}
-
-TEST(SaveArgActionTest, WorksForCompatibleType) {
-  int result = 0;
-  const Action<void(bool, char)> a1 = SaveArg<1>(&result);
-  a1.Perform(std::make_tuple(true, 'a'));
-  EXPECT_EQ('a', result);
-}
-
-TEST(SaveArgPointeeActionTest, WorksForSameType) {
-  int result = 0;
-  const int value = 5;
-  const Action<void(const int*)> a1 = SaveArgPointee<0>(&result);
-  a1.Perform(std::make_tuple(&value));
-  EXPECT_EQ(5, result);
-}
-
-TEST(SaveArgPointeeActionTest, WorksForCompatibleType) {
-  int result = 0;
-  char value = 'a';
-  const Action<void(bool, char*)> a1 = SaveArgPointee<1>(&result);
-  a1.Perform(std::make_tuple(true, &value));
-  EXPECT_EQ('a', result);
-}
-
-TEST(SetArgRefereeActionTest, WorksForSameType) {
-  int value = 0;
-  const Action<void(int&)> a1 = SetArgReferee<0>(1);
-  a1.Perform(std::tuple<int&>(value));
-  EXPECT_EQ(1, value);
-}
-
-TEST(SetArgRefereeActionTest, WorksForCompatibleType) {
-  int value = 0;
-  const Action<void(int, int&)> a1 = SetArgReferee<1>('a');
-  a1.Perform(std::tuple<int, int&>(0, value));
-  EXPECT_EQ('a', value);
-}
-
-TEST(SetArgRefereeActionTest, WorksWithExtraArguments) {
-  int value = 0;
-  const Action<void(bool, int, int&, const char*)> a1 = SetArgReferee<2>('a');
-  a1.Perform(std::tuple<bool, int, int&, const char*>(true, 0, value, "hi"));
-  EXPECT_EQ('a', value);
-}
-
-// A class that can be used to verify that its destructor is called: it will set
-// the bool provided to the constructor to true when destroyed.
-class DeletionTester {
- public:
-  explicit DeletionTester(bool* is_deleted) : is_deleted_(is_deleted) {
-    // Make sure the bit is set to false.
-    *is_deleted_ = false;
-  }
-
-  ~DeletionTester() { *is_deleted_ = true; }
-
- private:
-  bool* is_deleted_;
-};
-
-TEST(DeleteArgActionTest, OneArg) {
-  bool is_deleted = false;
-  DeletionTester* t = new DeletionTester(&is_deleted);
-  const Action<void(DeletionTester*)> a1 = DeleteArg<0>();  // NOLINT
-  EXPECT_FALSE(is_deleted);
-  a1.Perform(std::make_tuple(t));
-  EXPECT_TRUE(is_deleted);
-}
-
-TEST(DeleteArgActionTest, TenArgs) {
-  bool is_deleted = false;
-  DeletionTester* t = new DeletionTester(&is_deleted);
-  const Action<void(bool, int, int, const char*, bool, int, int, int, int,
-                    DeletionTester*)>
-      a1 = DeleteArg<9>();
-  EXPECT_FALSE(is_deleted);
-  a1.Perform(std::make_tuple(true, 5, 6, CharPtr("hi"), false, 7, 8, 9, 10, t));
-  EXPECT_TRUE(is_deleted);
-}
-
-#if GTEST_HAS_EXCEPTIONS
-
-TEST(ThrowActionTest, ThrowsGivenExceptionInVoidFunction) {
-  const Action<void(int n)> a = Throw('a');
-  EXPECT_THROW(a.Perform(std::make_tuple(0)), char);
-}
-
-class MyException {};
-
-TEST(ThrowActionTest, ThrowsGivenExceptionInNonVoidFunction) {
-  const Action<double(char ch)> a = Throw(MyException());
-  EXPECT_THROW(a.Perform(std::make_tuple('0')), MyException);
-}
-
-TEST(ThrowActionTest, ThrowsGivenExceptionInNullaryFunction) {
-  const Action<double()> a = Throw(MyException());
-  EXPECT_THROW(a.Perform(std::make_tuple()), MyException);
-}
-
-class Object {
- public:
-  virtual ~Object() {}
-  virtual void Func() {}
-};
-
-class MockObject : public Object {
- public:
-  ~MockObject() override {}
-  MOCK_METHOD(void, Func, (), (override));
-};
-
-TEST(ThrowActionTest, Times0) {
-  EXPECT_NONFATAL_FAILURE(
-      [] {
-        try {
-          MockObject m;
-          ON_CALL(m, Func()).WillByDefault([] { throw "something"; });
-          EXPECT_CALL(m, Func()).Times(0);
-          m.Func();
-        } catch (...) {
-          // Exception is caught but Times(0) still triggers a failure.
-        }
-      }(),
-      "");
-}
-
-#endif  // GTEST_HAS_EXCEPTIONS
-
-// Tests that SetArrayArgument<N>(first, last) sets the elements of the array
-// pointed to by the N-th (0-based) argument to values in range [first, last).
-TEST(SetArrayArgumentTest, SetsTheNthArray) {
-  using MyFunction = void(bool, int*, char*);
-  int numbers[] = {1, 2, 3};
-  Action<MyFunction> a = SetArrayArgument<1>(numbers, numbers + 3);
-
-  int n[4] = {};
-  int* pn = n;
-  char ch[4] = {};
-  char* pch = ch;
-  a.Perform(std::make_tuple(true, pn, pch));
-  EXPECT_EQ(1, n[0]);
-  EXPECT_EQ(2, n[1]);
-  EXPECT_EQ(3, n[2]);
-  EXPECT_EQ(0, n[3]);
-  EXPECT_EQ('\0', ch[0]);
-  EXPECT_EQ('\0', ch[1]);
-  EXPECT_EQ('\0', ch[2]);
-  EXPECT_EQ('\0', ch[3]);
-
-  // Tests first and last are iterators.
-  std::string letters = "abc";
-  a = SetArrayArgument<2>(letters.begin(), letters.end());
-  std::fill_n(n, 4, 0);
-  std::fill_n(ch, 4, '\0');
-  a.Perform(std::make_tuple(true, pn, pch));
-  EXPECT_EQ(0, n[0]);
-  EXPECT_EQ(0, n[1]);
-  EXPECT_EQ(0, n[2]);
-  EXPECT_EQ(0, n[3]);
-  EXPECT_EQ('a', ch[0]);
-  EXPECT_EQ('b', ch[1]);
-  EXPECT_EQ('c', ch[2]);
-  EXPECT_EQ('\0', ch[3]);
-}
-
-// Tests SetArrayArgument<N>(first, last) where first == last.
-TEST(SetArrayArgumentTest, SetsTheNthArrayWithEmptyRange) {
-  using MyFunction = void(bool, int*);
-  int numbers[] = {1, 2, 3};
-  Action<MyFunction> a = SetArrayArgument<1>(numbers, numbers);
-
-  int n[4] = {};
-  int* pn = n;
-  a.Perform(std::make_tuple(true, pn));
-  EXPECT_EQ(0, n[0]);
-  EXPECT_EQ(0, n[1]);
-  EXPECT_EQ(0, n[2]);
-  EXPECT_EQ(0, n[3]);
-}
-
-// Tests SetArrayArgument<N>(first, last) where *first is convertible
-// (but not equal) to the argument type.
-TEST(SetArrayArgumentTest, SetsTheNthArrayWithConvertibleType) {
-  using MyFunction = void(bool, int*);
-  char chars[] = {97, 98, 99};
-  Action<MyFunction> a = SetArrayArgument<1>(chars, chars + 3);
-
-  int codes[4] = {111, 222, 333, 444};
-  int* pcodes = codes;
-  a.Perform(std::make_tuple(true, pcodes));
-  EXPECT_EQ(97, codes[0]);
-  EXPECT_EQ(98, codes[1]);
-  EXPECT_EQ(99, codes[2]);
-  EXPECT_EQ(444, codes[3]);
-}
-
-// Test SetArrayArgument<N>(first, last) with iterator as argument.
-TEST(SetArrayArgumentTest, SetsTheNthArrayWithIteratorArgument) {
-  using MyFunction = void(bool, std::back_insert_iterator<std::string>);
-  std::string letters = "abc";
-  Action<MyFunction> a = SetArrayArgument<1>(letters.begin(), letters.end());
-
-  std::string s;
-  a.Perform(std::make_tuple(true, back_inserter(s)));
-  EXPECT_EQ(letters, s);
-}
-
-TEST(ReturnPointeeTest, Works) {
-  int n = 42;
-  const Action<int()> a = ReturnPointee(&n);
-  EXPECT_EQ(42, a.Perform(std::make_tuple()));
-
-  n = 43;
-  EXPECT_EQ(43, a.Perform(std::make_tuple()));
-}
-
-// Tests InvokeArgument<N>(...).
-
-// Tests using InvokeArgument with a nullary function.
-TEST(InvokeArgumentTest, Function0) {
-  Action<int(int, int (*)())> a = InvokeArgument<1>();  // NOLINT
-  EXPECT_EQ(1, a.Perform(std::make_tuple(2, &Nullary)));
-}
-
-// Tests using InvokeArgument with a unary function.
-TEST(InvokeArgumentTest, Functor1) {
-  Action<int(UnaryFunctor)> a = InvokeArgument<0>(true);  // NOLINT
-  EXPECT_EQ(1, a.Perform(std::make_tuple(UnaryFunctor())));
-}
-
-// Tests using InvokeArgument with a 5-ary function.
-TEST(InvokeArgumentTest, Function5) {
-  Action<int(int (*)(int, int, int, int, int))> a =  // NOLINT
-      InvokeArgument<0>(10000, 2000, 300, 40, 5);
-  EXPECT_EQ(12345, a.Perform(std::make_tuple(&SumOf5)));
-}
-
-// Tests using InvokeArgument with a 5-ary functor.
-TEST(InvokeArgumentTest, Functor5) {
-  Action<int(SumOf5Functor)> a =  // NOLINT
-      InvokeArgument<0>(10000, 2000, 300, 40, 5);
-  EXPECT_EQ(12345, a.Perform(std::make_tuple(SumOf5Functor())));
-}
-
-// Tests using InvokeArgument with a 6-ary function.
-TEST(InvokeArgumentTest, Function6) {
-  Action<int(int (*)(int, int, int, int, int, int))> a =  // NOLINT
-      InvokeArgument<0>(100000, 20000, 3000, 400, 50, 6);
-  EXPECT_EQ(123456, a.Perform(std::make_tuple(&SumOf6)));
-}
-
-// Tests using InvokeArgument with a 6-ary functor.
-TEST(InvokeArgumentTest, Functor6) {
-  Action<int(SumOf6Functor)> a =  // NOLINT
-      InvokeArgument<0>(100000, 20000, 3000, 400, 50, 6);
-  EXPECT_EQ(123456, a.Perform(std::make_tuple(SumOf6Functor())));
-}
-
-// Tests using InvokeArgument with a 7-ary function.
-TEST(InvokeArgumentTest, Function7) {
-  Action<std::string(std::string(*)(const char*, const char*, const char*,
-                                    const char*, const char*, const char*,
-                                    const char*))>
-      a = InvokeArgument<0>("1", "2", "3", "4", "5", "6", "7");
-  EXPECT_EQ("1234567", a.Perform(std::make_tuple(&Concat7)));
-}
-
-// Tests using InvokeArgument with a 8-ary function.
-TEST(InvokeArgumentTest, Function8) {
-  Action<std::string(std::string(*)(const char*, const char*, const char*,
-                                    const char*, const char*, const char*,
-                                    const char*, const char*))>
-      a = InvokeArgument<0>("1", "2", "3", "4", "5", "6", "7", "8");
-  EXPECT_EQ("12345678", a.Perform(std::make_tuple(&Concat8)));
-}
-
-// Tests using InvokeArgument with a 9-ary function.
-TEST(InvokeArgumentTest, Function9) {
-  Action<std::string(std::string(*)(const char*, const char*, const char*,
-                                    const char*, const char*, const char*,
-                                    const char*, const char*, const char*))>
-      a = InvokeArgument<0>("1", "2", "3", "4", "5", "6", "7", "8", "9");
-  EXPECT_EQ("123456789", a.Perform(std::make_tuple(&Concat9)));
-}
-
-// Tests using InvokeArgument with a 10-ary function.
-TEST(InvokeArgumentTest, Function10) {
-  Action<std::string(std::string(*)(
-      const char*, const char*, const char*, const char*, const char*,
-      const char*, const char*, const char*, const char*, const char*))>
-      a = InvokeArgument<0>("1", "2", "3", "4", "5", "6", "7", "8", "9", "0");
-  EXPECT_EQ("1234567890", a.Perform(std::make_tuple(&Concat10)));
-}
-
-// Tests using InvokeArgument with a function that takes a pointer argument.
-TEST(InvokeArgumentTest, ByPointerFunction) {
-  Action<const char*(const char* (*)(const char* input, short n))>  // NOLINT
-      a = InvokeArgument<0>(static_cast<const char*>("Hi"), Short(1));
-  EXPECT_STREQ("i", a.Perform(std::make_tuple(&Binary)));
-}
-
-// Tests using InvokeArgument with a function that takes a const char*
-// by passing it a C-string literal.
-TEST(InvokeArgumentTest, FunctionWithCStringLiteral) {
-  Action<const char*(const char* (*)(const char* input, short n))>  // NOLINT
-      a = InvokeArgument<0>("Hi", Short(1));
-  EXPECT_STREQ("i", a.Perform(std::make_tuple(&Binary)));
-}
-
-// Tests using InvokeArgument with a function that takes a const reference.
-TEST(InvokeArgumentTest, ByConstReferenceFunction) {
-  Action<bool(bool (*function)(const std::string& s))> a =  // NOLINT
-      InvokeArgument<0>(std::string("Hi"));
-  // When action 'a' is constructed, it makes a copy of the temporary
-  // string object passed to it, so it's OK to use 'a' later, when the
-  // temporary object has already died.
-  EXPECT_TRUE(a.Perform(std::make_tuple(&ByConstRef)));
-}
-
-// Tests using InvokeArgument with ByRef() and a function that takes a
-// const reference.
-TEST(InvokeArgumentTest, ByExplicitConstReferenceFunction) {
-  Action<bool(bool (*)(const double& x))> a =  // NOLINT
-      InvokeArgument<0>(ByRef(g_double));
-  // The above line calls ByRef() on a const value.
-  EXPECT_TRUE(a.Perform(std::make_tuple(&ReferencesGlobalDouble)));
-
-  double x = 0;
-  a = InvokeArgument<0>(ByRef(x));  // This calls ByRef() on a non-const.
-  EXPECT_FALSE(a.Perform(std::make_tuple(&ReferencesGlobalDouble)));
-}
-
-// Tests DoAll(a1, a2).
-TEST(DoAllTest, TwoActions) {
-  int n = 0;
-  Action<int(int*)> a = DoAll(SetArgPointee<0>(1),  // NOLINT
-                              Return(2));
-  EXPECT_EQ(2, a.Perform(std::make_tuple(&n)));
-  EXPECT_EQ(1, n);
-}
-
-// Tests DoAll(a1, a2, a3).
-TEST(DoAllTest, ThreeActions) {
-  int m = 0, n = 0;
-  Action<int(int*, int*)> a = DoAll(SetArgPointee<0>(1),  // NOLINT
-                                    SetArgPointee<1>(2), Return(3));
-  EXPECT_EQ(3, a.Perform(std::make_tuple(&m, &n)));
-  EXPECT_EQ(1, m);
-  EXPECT_EQ(2, n);
-}
-
-// Tests DoAll(a1, a2, a3, a4).
-TEST(DoAllTest, FourActions) {
-  int m = 0, n = 0;
-  char ch = '\0';
-  Action<int(int*, int*, char*)> a =  // NOLINT
-      DoAll(SetArgPointee<0>(1), SetArgPointee<1>(2), SetArgPointee<2>('a'),
-            Return(3));
-  EXPECT_EQ(3, a.Perform(std::make_tuple(&m, &n, &ch)));
-  EXPECT_EQ(1, m);
-  EXPECT_EQ(2, n);
-  EXPECT_EQ('a', ch);
-}
-
-// Tests DoAll(a1, a2, a3, a4, a5).
-TEST(DoAllTest, FiveActions) {
-  int m = 0, n = 0;
-  char a = '\0', b = '\0';
-  Action<int(int*, int*, char*, char*)> action =  // NOLINT
-      DoAll(SetArgPointee<0>(1), SetArgPointee<1>(2), SetArgPointee<2>('a'),
-            SetArgPointee<3>('b'), Return(3));
-  EXPECT_EQ(3, action.Perform(std::make_tuple(&m, &n, &a, &b)));
-  EXPECT_EQ(1, m);
-  EXPECT_EQ(2, n);
-  EXPECT_EQ('a', a);
-  EXPECT_EQ('b', b);
-}
-
-// Tests DoAll(a1, a2, ..., a6).
-TEST(DoAllTest, SixActions) {
-  int m = 0, n = 0;
-  char a = '\0', b = '\0', c = '\0';
-  Action<int(int*, int*, char*, char*, char*)> action =  // NOLINT
-      DoAll(SetArgPointee<0>(1), SetArgPointee<1>(2), SetArgPointee<2>('a'),
-            SetArgPointee<3>('b'), SetArgPointee<4>('c'), Return(3));
-  EXPECT_EQ(3, action.Perform(std::make_tuple(&m, &n, &a, &b, &c)));
-  EXPECT_EQ(1, m);
-  EXPECT_EQ(2, n);
-  EXPECT_EQ('a', a);
-  EXPECT_EQ('b', b);
-  EXPECT_EQ('c', c);
-}
-
-// Tests DoAll(a1, a2, ..., a7).
-TEST(DoAllTest, SevenActions) {
-  int m = 0, n = 0;
-  char a = '\0', b = '\0', c = '\0', d = '\0';
-  Action<int(int*, int*, char*, char*, char*, char*)> action =  // NOLINT
-      DoAll(SetArgPointee<0>(1), SetArgPointee<1>(2), SetArgPointee<2>('a'),
-            SetArgPointee<3>('b'), SetArgPointee<4>('c'), SetArgPointee<5>('d'),
-            Return(3));
-  EXPECT_EQ(3, action.Perform(std::make_tuple(&m, &n, &a, &b, &c, &d)));
-  EXPECT_EQ(1, m);
-  EXPECT_EQ(2, n);
-  EXPECT_EQ('a', a);
-  EXPECT_EQ('b', b);
-  EXPECT_EQ('c', c);
-  EXPECT_EQ('d', d);
-}
-
-// Tests DoAll(a1, a2, ..., a8).
-TEST(DoAllTest, EightActions) {
-  int m = 0, n = 0;
-  char a = '\0', b = '\0', c = '\0', d = '\0', e = '\0';
-  Action<int(int*, int*, char*, char*, char*, char*,  // NOLINT
-             char*)>
-      action =
-          DoAll(SetArgPointee<0>(1), SetArgPointee<1>(2), SetArgPointee<2>('a'),
-                SetArgPointee<3>('b'), SetArgPointee<4>('c'),
-                SetArgPointee<5>('d'), SetArgPointee<6>('e'), Return(3));
-  EXPECT_EQ(3, action.Perform(std::make_tuple(&m, &n, &a, &b, &c, &d, &e)));
-  EXPECT_EQ(1, m);
-  EXPECT_EQ(2, n);
-  EXPECT_EQ('a', a);
-  EXPECT_EQ('b', b);
-  EXPECT_EQ('c', c);
-  EXPECT_EQ('d', d);
-  EXPECT_EQ('e', e);
-}
-
-// Tests DoAll(a1, a2, ..., a9).
-TEST(DoAllTest, NineActions) {
-  int m = 0, n = 0;
-  char a = '\0', b = '\0', c = '\0', d = '\0', e = '\0', f = '\0';
-  Action<int(int*, int*, char*, char*, char*, char*,  // NOLINT
-             char*, char*)>
-      action = DoAll(SetArgPointee<0>(1), SetArgPointee<1>(2),
-                     SetArgPointee<2>('a'), SetArgPointee<3>('b'),
-                     SetArgPointee<4>('c'), SetArgPointee<5>('d'),
-                     SetArgPointee<6>('e'), SetArgPointee<7>('f'), Return(3));
-  EXPECT_EQ(3, action.Perform(std::make_tuple(&m, &n, &a, &b, &c, &d, &e, &f)));
-  EXPECT_EQ(1, m);
-  EXPECT_EQ(2, n);
-  EXPECT_EQ('a', a);
-  EXPECT_EQ('b', b);
-  EXPECT_EQ('c', c);
-  EXPECT_EQ('d', d);
-  EXPECT_EQ('e', e);
-  EXPECT_EQ('f', f);
-}
-
-// Tests DoAll(a1, a2, ..., a10).
-TEST(DoAllTest, TenActions) {
-  int m = 0, n = 0;
-  char a = '\0', b = '\0', c = '\0', d = '\0';
-  char e = '\0', f = '\0', g = '\0';
-  Action<int(int*, int*, char*, char*, char*, char*,  // NOLINT
-             char*, char*, char*)>
-      action =
-          DoAll(SetArgPointee<0>(1), SetArgPointee<1>(2), SetArgPointee<2>('a'),
-                SetArgPointee<3>('b'), SetArgPointee<4>('c'),
-                SetArgPointee<5>('d'), SetArgPointee<6>('e'),
-                SetArgPointee<7>('f'), SetArgPointee<8>('g'), Return(3));
-  EXPECT_EQ(
-      3, action.Perform(std::make_tuple(&m, &n, &a, &b, &c, &d, &e, &f, &g)));
-  EXPECT_EQ(1, m);
-  EXPECT_EQ(2, n);
-  EXPECT_EQ('a', a);
-  EXPECT_EQ('b', b);
-  EXPECT_EQ('c', c);
-  EXPECT_EQ('d', d);
-  EXPECT_EQ('e', e);
-  EXPECT_EQ('f', f);
-  EXPECT_EQ('g', g);
-}
-
-TEST(DoAllTest, NoArgs) {
-  bool ran_first = false;
-  Action<bool()> a =
-      DoAll([&] { ran_first = true; }, [&] { return ran_first; });
-  EXPECT_TRUE(a.Perform({}));
-}
-
-TEST(DoAllTest, MoveOnlyArgs) {
-  bool ran_first = false;
-  Action<int(std::unique_ptr<int>)> a =
-      DoAll(InvokeWithoutArgs([&] { ran_first = true; }),
-            [](std::unique_ptr<int> p) { return *p; });
-  EXPECT_EQ(7, a.Perform(std::make_tuple(std::unique_ptr<int>(new int(7)))));
-  EXPECT_TRUE(ran_first);
-}
-
-TEST(DoAllTest, ImplicitlyConvertsActionArguments) {
-  bool ran_first = false;
-  // Action<void(std::vector<int>)> isn't an
-  // Action<void(const std::vector<int>&) but can be converted.
-  Action<void(std::vector<int>)> first = [&] { ran_first = true; };
-  Action<int(std::vector<int>)> a =
-      DoAll(first, [](std::vector<int> arg) { return arg.front(); });
-  EXPECT_EQ(7, a.Perform(std::make_tuple(std::vector<int>{7})));
-  EXPECT_TRUE(ran_first);
-}
-
-// The ACTION*() macros trigger warning C4100 (unreferenced formal
-// parameter) in MSVC with -W4.  Unfortunately they cannot be fixed in
-// the macro definition, as the warnings are generated when the macro
-// is expanded and macro expansion cannot contain #pragma.  Therefore
-// we suppress them here.
-// Also suppress C4503 decorated name length exceeded, name was truncated
-#ifdef _MSC_VER
-#pragma warning(push)
-#pragma warning(disable : 4100)
-#pragma warning(disable : 4503)
-#endif
-// Tests the ACTION*() macro family.
-
-// Tests that ACTION() can define an action that doesn't reference the
-// mock function arguments.
-ACTION(Return5) { return 5; }
-
-TEST(ActionMacroTest, WorksWhenNotReferencingArguments) {
-  Action<double()> a1 = Return5();
-  EXPECT_DOUBLE_EQ(5, a1.Perform(std::make_tuple()));
-
-  Action<int(double, bool)> a2 = Return5();
-  EXPECT_EQ(5, a2.Perform(std::make_tuple(1, true)));
-}
-
-// Tests that ACTION() can define an action that returns void.
-ACTION(IncrementArg1) { (*arg1)++; }
-
-TEST(ActionMacroTest, WorksWhenReturningVoid) {
-  Action<void(int, int*)> a1 = IncrementArg1();
-  int n = 0;
-  a1.Perform(std::make_tuple(5, &n));
-  EXPECT_EQ(1, n);
-}
-
-// Tests that the body of ACTION() can reference the type of the
-// argument.
-ACTION(IncrementArg2) {
-  StaticAssertTypeEq<int*, arg2_type>();
-  arg2_type temp = arg2;
-  (*temp)++;
-}
-
-TEST(ActionMacroTest, CanReferenceArgumentType) {
-  Action<void(int, bool, int*)> a1 = IncrementArg2();
-  int n = 0;
-  a1.Perform(std::make_tuple(5, false, &n));
-  EXPECT_EQ(1, n);
-}
-
-// Tests that the body of ACTION() can reference the argument tuple
-// via args_type and args.
-ACTION(Sum2) {
-  StaticAssertTypeEq<std::tuple<int, char, int*>, args_type>();
-  args_type args_copy = args;
-  return std::get<0>(args_copy) + std::get<1>(args_copy);
-}
-
-TEST(ActionMacroTest, CanReferenceArgumentTuple) {
-  Action<int(int, char, int*)> a1 = Sum2();
-  int dummy = 0;
-  EXPECT_EQ(11, a1.Perform(std::make_tuple(5, Char(6), &dummy)));
-}
-
-namespace {
-
-// Tests that the body of ACTION() can reference the mock function
-// type.
-int Dummy(bool flag) { return flag ? 1 : 0; }
-
-}  // namespace
-
-ACTION(InvokeDummy) {
-  StaticAssertTypeEq<int(bool), function_type>();
-  function_type* fp = &Dummy;
-  return (*fp)(true);
-}
-
-TEST(ActionMacroTest, CanReferenceMockFunctionType) {
-  Action<int(bool)> a1 = InvokeDummy();
-  EXPECT_EQ(1, a1.Perform(std::make_tuple(true)));
-  EXPECT_EQ(1, a1.Perform(std::make_tuple(false)));
-}
-
-// Tests that the body of ACTION() can reference the mock function's
-// return type.
-ACTION(InvokeDummy2) {
-  StaticAssertTypeEq<int, return_type>();
-  return_type result = Dummy(true);
-  return result;
-}
-
-TEST(ActionMacroTest, CanReferenceMockFunctionReturnType) {
-  Action<int(bool)> a1 = InvokeDummy2();
-  EXPECT_EQ(1, a1.Perform(std::make_tuple(true)));
-  EXPECT_EQ(1, a1.Perform(std::make_tuple(false)));
-}
-
-// Tests that ACTION() works for arguments passed by const reference.
-ACTION(ReturnAddrOfConstBoolReferenceArg) {
-  StaticAssertTypeEq<const bool&, arg1_type>();
-  return &arg1;
-}
-
-TEST(ActionMacroTest, WorksForConstReferenceArg) {
-  Action<const bool*(int, const bool&)> a = ReturnAddrOfConstBoolReferenceArg();
-  const bool b = false;
-  EXPECT_EQ(&b, a.Perform(std::tuple<int, const bool&>(0, b)));
-}
-
-// Tests that ACTION() works for arguments passed by non-const reference.
-ACTION(ReturnAddrOfIntReferenceArg) {
-  StaticAssertTypeEq<int&, arg0_type>();
-  return &arg0;
-}
-
-TEST(ActionMacroTest, WorksForNonConstReferenceArg) {
-  Action<int*(int&, bool, int)> a = ReturnAddrOfIntReferenceArg();
-  int n = 0;
-  EXPECT_EQ(&n, a.Perform(std::tuple<int&, bool, int>(n, true, 1)));
-}
-
-// Tests that ACTION() can be used in a namespace.
-namespace action_test {
-ACTION(Sum) { return arg0 + arg1; }
-}  // namespace action_test
-
-TEST(ActionMacroTest, WorksInNamespace) {
-  Action<int(int, int)> a1 = action_test::Sum();
-  EXPECT_EQ(3, a1.Perform(std::make_tuple(1, 2)));
-}
-
-// Tests that the same ACTION definition works for mock functions with
-// different argument numbers.
-ACTION(PlusTwo) { return arg0 + 2; }
-
-TEST(ActionMacroTest, WorksForDifferentArgumentNumbers) {
-  Action<int(int)> a1 = PlusTwo();
-  EXPECT_EQ(4, a1.Perform(std::make_tuple(2)));
-
-  Action<double(float, void*)> a2 = PlusTwo();
-  int dummy;
-  EXPECT_DOUBLE_EQ(6, a2.Perform(std::make_tuple(4.0f, &dummy)));
-}
-
-// Tests that ACTION_P can define a parameterized action.
-ACTION_P(Plus, n) { return arg0 + n; }
-
-TEST(ActionPMacroTest, DefinesParameterizedAction) {
-  Action<int(int m, bool t)> a1 = Plus(9);
-  EXPECT_EQ(10, a1.Perform(std::make_tuple(1, true)));
-}
-
-// Tests that the body of ACTION_P can reference the argument types
-// and the parameter type.
-ACTION_P(TypedPlus, n) {
-  arg0_type t1 = arg0;
-  n_type t2 = n;
-  return t1 + t2;
-}
-
-TEST(ActionPMacroTest, CanReferenceArgumentAndParameterTypes) {
-  Action<int(char m, bool t)> a1 = TypedPlus(9);
-  EXPECT_EQ(10, a1.Perform(std::make_tuple(Char(1), true)));
-}
-
-// Tests that a parameterized action can be used in any mock function
-// whose type is compatible.
-TEST(ActionPMacroTest, WorksInCompatibleMockFunction) {
-  Action<std::string(const std::string& s)> a1 = Plus("tail");
-  const std::string re = "re";
-  std::tuple<const std::string> dummy = std::make_tuple(re);
-  EXPECT_EQ("retail", a1.Perform(dummy));
-}
-
-// Tests that we can use ACTION*() to define actions overloaded on the
-// number of parameters.
-
-ACTION(OverloadedAction) { return arg0 ? arg1 : "hello"; }
-
-ACTION_P(OverloadedAction, default_value) {
-  return arg0 ? arg1 : default_value;
-}
-
-ACTION_P2(OverloadedAction, true_value, false_value) {
-  return arg0 ? true_value : false_value;
-}
-
-TEST(ActionMacroTest, CanDefineOverloadedActions) {
-  using MyAction = Action<const char*(bool, const char*)>;
-
-  const MyAction a1 = OverloadedAction();
-  EXPECT_STREQ("hello", a1.Perform(std::make_tuple(false, CharPtr("world"))));
-  EXPECT_STREQ("world", a1.Perform(std::make_tuple(true, CharPtr("world"))));
-
-  const MyAction a2 = OverloadedAction("hi");
-  EXPECT_STREQ("hi", a2.Perform(std::make_tuple(false, CharPtr("world"))));
-  EXPECT_STREQ("world", a2.Perform(std::make_tuple(true, CharPtr("world"))));
-
-  const MyAction a3 = OverloadedAction("hi", "you");
-  EXPECT_STREQ("hi", a3.Perform(std::make_tuple(true, CharPtr("world"))));
-  EXPECT_STREQ("you", a3.Perform(std::make_tuple(false, CharPtr("world"))));
-}
-
-// Tests ACTION_Pn where n >= 3.
-
-ACTION_P3(Plus, m, n, k) { return arg0 + m + n + k; }
-
-TEST(ActionPnMacroTest, WorksFor3Parameters) {
-  Action<double(int m, bool t)> a1 = Plus(100, 20, 3.4);
-  EXPECT_DOUBLE_EQ(3123.4, a1.Perform(std::make_tuple(3000, true)));
-
-  Action<std::string(const std::string& s)> a2 = Plus("tail", "-", ">");
-  const std::string re = "re";
-  std::tuple<const std::string> dummy = std::make_tuple(re);
-  EXPECT_EQ("retail->", a2.Perform(dummy));
-}
-
-ACTION_P4(Plus, p0, p1, p2, p3) { return arg0 + p0 + p1 + p2 + p3; }
-
-TEST(ActionPnMacroTest, WorksFor4Parameters) {
-  Action<int(int)> a1 = Plus(1, 2, 3, 4);
-  EXPECT_EQ(10 + 1 + 2 + 3 + 4, a1.Perform(std::make_tuple(10)));
-}
-
-ACTION_P5(Plus, p0, p1, p2, p3, p4) { return arg0 + p0 + p1 + p2 + p3 + p4; }
-
-TEST(ActionPnMacroTest, WorksFor5Parameters) {
-  Action<int(int)> a1 = Plus(1, 2, 3, 4, 5);
-  EXPECT_EQ(10 + 1 + 2 + 3 + 4 + 5, a1.Perform(std::make_tuple(10)));
-}
-
-ACTION_P6(Plus, p0, p1, p2, p3, p4, p5) {
-  return arg0 + p0 + p1 + p2 + p3 + p4 + p5;
-}
-
-TEST(ActionPnMacroTest, WorksFor6Parameters) {
-  Action<int(int)> a1 = Plus(1, 2, 3, 4, 5, 6);
-  EXPECT_EQ(10 + 1 + 2 + 3 + 4 + 5 + 6, a1.Perform(std::make_tuple(10)));
-}
-
-ACTION_P7(Plus, p0, p1, p2, p3, p4, p5, p6) {
-  return arg0 + p0 + p1 + p2 + p3 + p4 + p5 + p6;
-}
-
-TEST(ActionPnMacroTest, WorksFor7Parameters) {
-  Action<int(int)> a1 = Plus(1, 2, 3, 4, 5, 6, 7);
-  EXPECT_EQ(10 + 1 + 2 + 3 + 4 + 5 + 6 + 7, a1.Perform(std::make_tuple(10)));
-}
-
-ACTION_P8(Plus, p0, p1, p2, p3, p4, p5, p6, p7) {
-  return arg0 + p0 + p1 + p2 + p3 + p4 + p5 + p6 + p7;
-}
-
-TEST(ActionPnMacroTest, WorksFor8Parameters) {
-  Action<int(int)> a1 = Plus(1, 2, 3, 4, 5, 6, 7, 8);
-  EXPECT_EQ(10 + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8,
-            a1.Perform(std::make_tuple(10)));
-}
-
-ACTION_P9(Plus, p0, p1, p2, p3, p4, p5, p6, p7, p8) {
-  return arg0 + p0 + p1 + p2 + p3 + p4 + p5 + p6 + p7 + p8;
-}
-
-TEST(ActionPnMacroTest, WorksFor9Parameters) {
-  Action<int(int)> a1 = Plus(1, 2, 3, 4, 5, 6, 7, 8, 9);
-  EXPECT_EQ(10 + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9,
-            a1.Perform(std::make_tuple(10)));
-}
-
-ACTION_P10(Plus, p0, p1, p2, p3, p4, p5, p6, p7, p8, last_param) {
-  arg0_type t0 = arg0;
-  last_param_type t9 = last_param;
-  return t0 + p0 + p1 + p2 + p3 + p4 + p5 + p6 + p7 + p8 + t9;
-}
-
-TEST(ActionPnMacroTest, WorksFor10Parameters) {
-  Action<int(int)> a1 = Plus(1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
-  EXPECT_EQ(10 + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 + 10,
-            a1.Perform(std::make_tuple(10)));
-}
-
-// Tests that the action body can promote the parameter types.
-
-ACTION_P2(PadArgument, prefix, suffix) {
-  // The following lines promote the two parameters to desired types.
-  std::string prefix_str(prefix);
-  char suffix_char = static_cast<char>(suffix);
-  return prefix_str + arg0 + suffix_char;
-}
-
-TEST(ActionPnMacroTest, SimpleTypePromotion) {
-  Action<std::string(const char*)> no_promo =
-      PadArgument(std::string("foo"), 'r');
-  Action<std::string(const char*)> promo =
-      PadArgument("foo", static_cast<int>('r'));
-  EXPECT_EQ("foobar", no_promo.Perform(std::make_tuple(CharPtr("ba"))));
-  EXPECT_EQ("foobar", promo.Perform(std::make_tuple(CharPtr("ba"))));
-}
-
-// Tests that we can partially restrict parameter types using a
-// straight-forward pattern.
-
-// Defines a generic action that doesn't restrict the types of its
-// parameters.
-ACTION_P3(ConcatImpl, a, b, c) {
-  std::stringstream ss;
-  ss << a << b << c;
-  return ss.str();
-}
-
-// Next, we try to restrict that either the first parameter is a
-// string, or the second parameter is an int.
-
-// Defines a partially specialized wrapper that restricts the first
-// parameter to std::string.
-template <typename T1, typename T2>
-// ConcatImplActionP3 is the class template ACTION_P3 uses to
-// implement ConcatImpl.  We shouldn't change the name as this
-// pattern requires the user to use it directly.
-ConcatImplActionP3<std::string, T1, T2> Concat(const std::string& a, T1 b,
-                                               T2 c) {
-  GTEST_INTENTIONAL_CONST_COND_PUSH_()
-  if (true) {
-    GTEST_INTENTIONAL_CONST_COND_POP_()
-    // This branch verifies that ConcatImpl() can be invoked without
-    // explicit template arguments.
-    return ConcatImpl(a, b, c);
-  } else {
-    // This branch verifies that ConcatImpl() can also be invoked with
-    // explicit template arguments.  It doesn't really need to be
-    // executed as this is a compile-time verification.
-    return ConcatImpl<std::string, T1, T2>(a, b, c);
-  }
-}
-
-// Defines another partially specialized wrapper that restricts the
-// second parameter to int.
-template <typename T1, typename T2>
-ConcatImplActionP3<T1, int, T2> Concat(T1 a, int b, T2 c) {
-  return ConcatImpl(a, b, c);
-}
-
-TEST(ActionPnMacroTest, CanPartiallyRestrictParameterTypes) {
-  Action<const std::string()> a1 = Concat("Hello", "1", 2);
-  EXPECT_EQ("Hello12", a1.Perform(std::make_tuple()));
-
-  a1 = Concat(1, 2, 3);
-  EXPECT_EQ("123", a1.Perform(std::make_tuple()));
-}
-
-// Verifies the type of an ACTION*.
-
-ACTION(DoFoo) {}
-ACTION_P(DoFoo, p) {}
-ACTION_P2(DoFoo, p0, p1) {}
-
-TEST(ActionPnMacroTest, TypesAreCorrect) {
-  // DoFoo() must be assignable to a DoFooAction variable.
-  DoFooAction a0 = DoFoo();
-
-  // DoFoo(1) must be assignable to a DoFooActionP variable.
-  DoFooActionP<int> a1 = DoFoo(1);
-
-  // DoFoo(p1, ..., pk) must be assignable to a DoFooActionPk
-  // variable, and so on.
-  DoFooActionP2<int, char> a2 = DoFoo(1, '2');
-  PlusActionP3<int, int, char> a3 = Plus(1, 2, '3');
-  PlusActionP4<int, int, int, char> a4 = Plus(1, 2, 3, '4');
-  PlusActionP5<int, int, int, int, char> a5 = Plus(1, 2, 3, 4, '5');
-  PlusActionP6<int, int, int, int, int, char> a6 = Plus(1, 2, 3, 4, 5, '6');
-  PlusActionP7<int, int, int, int, int, int, char> a7 =
-      Plus(1, 2, 3, 4, 5, 6, '7');
-  PlusActionP8<int, int, int, int, int, int, int, char> a8 =
-      Plus(1, 2, 3, 4, 5, 6, 7, '8');
-  PlusActionP9<int, int, int, int, int, int, int, int, char> a9 =
-      Plus(1, 2, 3, 4, 5, 6, 7, 8, '9');
-  PlusActionP10<int, int, int, int, int, int, int, int, int, char> a10 =
-      Plus(1, 2, 3, 4, 5, 6, 7, 8, 9, '0');
-
-  // Avoid "unused variable" warnings.
-  (void)a0;
-  (void)a1;
-  (void)a2;
-  (void)a3;
-  (void)a4;
-  (void)a5;
-  (void)a6;
-  (void)a7;
-  (void)a8;
-  (void)a9;
-  (void)a10;
-}
-
-// Tests that an ACTION_P*() action can be explicitly instantiated
-// with reference-typed parameters.
-
-ACTION_P(Plus1, x) { return x; }
-ACTION_P2(Plus2, x, y) { return x + y; }
-ACTION_P3(Plus3, x, y, z) { return x + y + z; }
-ACTION_P10(Plus10, a0, a1, a2, a3, a4, a5, a6, a7, a8, a9) {
-  return a0 + a1 + a2 + a3 + a4 + a5 + a6 + a7 + a8 + a9;
-}
-
-TEST(ActionPnMacroTest, CanExplicitlyInstantiateWithReferenceTypes) {
-  int x = 1, y = 2, z = 3;
-  const std::tuple<> empty = std::make_tuple();
-
-  Action<int()> a = Plus1<int&>(x);
-  EXPECT_EQ(1, a.Perform(empty));
-
-  a = Plus2<const int&, int&>(x, y);
-  EXPECT_EQ(3, a.Perform(empty));
-
-  a = Plus3<int&, const int&, int&>(x, y, z);
-  EXPECT_EQ(6, a.Perform(empty));
-
-  int n[10] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
-  a = Plus10<const int&, int&, const int&, int&, const int&, int&, const int&,
-             int&, const int&, int&>(n[0], n[1], n[2], n[3], n[4], n[5], n[6],
-                                     n[7], n[8], n[9]);
-  EXPECT_EQ(55, a.Perform(empty));
-}
-
-class TenArgConstructorClass {
- public:
-  TenArgConstructorClass(int a1, int a2, int a3, int a4, int a5, int a6, int a7,
-                         int a8, int a9, int a10)
-      : value_(a1 + a2 + a3 + a4 + a5 + a6 + a7 + a8 + a9 + a10) {}
-  int value_;
-};
-
-// Tests that ACTION_TEMPLATE works when there is no value parameter.
-ACTION_TEMPLATE(CreateNew, HAS_1_TEMPLATE_PARAMS(typename, T),
-                AND_0_VALUE_PARAMS()) {
-  return new T;
-}
-
-TEST(ActionTemplateTest, WorksWithoutValueParam) {
-  const Action<int*()> a = CreateNew<int>();
-  int* p = a.Perform(std::make_tuple());
-  delete p;
-}
-
-// Tests that ACTION_TEMPLATE works when there are value parameters.
-ACTION_TEMPLATE(CreateNew, HAS_1_TEMPLATE_PARAMS(typename, T),
-                AND_1_VALUE_PARAMS(a0)) {
-  return new T(a0);
-}
-
-TEST(ActionTemplateTest, WorksWithValueParams) {
-  const Action<int*()> a = CreateNew<int>(42);
-  int* p = a.Perform(std::make_tuple());
-  EXPECT_EQ(42, *p);
-  delete p;
-}
-
-// Tests that ACTION_TEMPLATE works for integral template parameters.
-ACTION_TEMPLATE(MyDeleteArg, HAS_1_TEMPLATE_PARAMS(int, k),
-                AND_0_VALUE_PARAMS()) {
-  delete std::get<k>(args);
-}
-
-// Resets a bool variable in the destructor.
-class BoolResetter {
- public:
-  explicit BoolResetter(bool* value) : value_(value) {}
-  ~BoolResetter() { *value_ = false; }
-
- private:
-  bool* value_;
-};
-
-TEST(ActionTemplateTest, WorksForIntegralTemplateParams) {
-  const Action<void(int*, BoolResetter*)> a = MyDeleteArg<1>();
-  int n = 0;
-  bool b = true;
-  auto* resetter = new BoolResetter(&b);
-  a.Perform(std::make_tuple(&n, resetter));
-  EXPECT_FALSE(b);  // Verifies that resetter is deleted.
-}
-
-// Tests that ACTION_TEMPLATES works for template template parameters.
-ACTION_TEMPLATE(ReturnSmartPointer,
-                HAS_1_TEMPLATE_PARAMS(template <typename Pointee> class,
-                                      Pointer),
-                AND_1_VALUE_PARAMS(pointee)) {
-  return Pointer<pointee_type>(new pointee_type(pointee));
-}
-
-TEST(ActionTemplateTest, WorksForTemplateTemplateParameters) {
-  const Action<std::shared_ptr<int>()> a =
-      ReturnSmartPointer<std::shared_ptr>(42);
-  std::shared_ptr<int> p = a.Perform(std::make_tuple());
-  EXPECT_EQ(42, *p);
-}
-
-// Tests that ACTION_TEMPLATE works for 10 template parameters.
-template <typename T1, typename T2, typename T3, int k4, bool k5,
-          unsigned int k6, typename T7, typename T8, typename T9>
-struct GiantTemplate {
- public:
-  explicit GiantTemplate(int a_value) : value(a_value) {}
-  int value;
-};
-
-ACTION_TEMPLATE(ReturnGiant,
-                HAS_10_TEMPLATE_PARAMS(typename, T1, typename, T2, typename, T3,
-                                       int, k4, bool, k5, unsigned int, k6,
-                                       class, T7, class, T8, class, T9,
-                                       template <typename T> class, T10),
-                AND_1_VALUE_PARAMS(value)) {
-  return GiantTemplate<T10<T1>, T2, T3, k4, k5, k6, T7, T8, T9>(value);
-}
-
-TEST(ActionTemplateTest, WorksFor10TemplateParameters) {
-  using Giant = GiantTemplate<std::shared_ptr<int>, bool, double, 5, true, 6,
-                              char, unsigned, int>;
-  const Action<Giant()> a = ReturnGiant<int, bool, double, 5, true, 6, char,
-                                        unsigned, int, std::shared_ptr>(42);
-  Giant giant = a.Perform(std::make_tuple());
-  EXPECT_EQ(42, giant.value);
-}
-
-// Tests that ACTION_TEMPLATE works for 10 value parameters.
-ACTION_TEMPLATE(ReturnSum, HAS_1_TEMPLATE_PARAMS(typename, Number),
-                AND_10_VALUE_PARAMS(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10)) {
-  return static_cast<Number>(v1) + v2 + v3 + v4 + v5 + v6 + v7 + v8 + v9 + v10;
-}
-
-TEST(ActionTemplateTest, WorksFor10ValueParameters) {
-  const Action<int()> a = ReturnSum<int>(1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
-  EXPECT_EQ(55, a.Perform(std::make_tuple()));
-}
-
-// Tests that ACTION_TEMPLATE and ACTION/ACTION_P* can be overloaded
-// on the number of value parameters.
-
-ACTION(ReturnSum) { return 0; }
-
-ACTION_P(ReturnSum, x) { return x; }
-
-ACTION_TEMPLATE(ReturnSum, HAS_1_TEMPLATE_PARAMS(typename, Number),
-                AND_2_VALUE_PARAMS(v1, v2)) {
-  return static_cast<Number>(v1) + v2;
-}
-
-ACTION_TEMPLATE(ReturnSum, HAS_1_TEMPLATE_PARAMS(typename, Number),
-                AND_3_VALUE_PARAMS(v1, v2, v3)) {
-  return static_cast<Number>(v1) + v2 + v3;
-}
-
-ACTION_TEMPLATE(ReturnSum, HAS_2_TEMPLATE_PARAMS(typename, Number, int, k),
-                AND_4_VALUE_PARAMS(v1, v2, v3, v4)) {
-  return static_cast<Number>(v1) + v2 + v3 + v4 + k;
-}
-
-TEST(ActionTemplateTest, CanBeOverloadedOnNumberOfValueParameters) {
-  const Action<int()> a0 = ReturnSum();
-  const Action<int()> a1 = ReturnSum(1);
-  const Action<int()> a2 = ReturnSum<int>(1, 2);
-  const Action<int()> a3 = ReturnSum<int>(1, 2, 3);
-  const Action<int()> a4 = ReturnSum<int, 10000>(2000, 300, 40, 5);
-  EXPECT_EQ(0, a0.Perform(std::make_tuple()));
-  EXPECT_EQ(1, a1.Perform(std::make_tuple()));
-  EXPECT_EQ(3, a2.Perform(std::make_tuple()));
-  EXPECT_EQ(6, a3.Perform(std::make_tuple()));
-  EXPECT_EQ(12345, a4.Perform(std::make_tuple()));
-}
-
-}  // namespace gmock_more_actions_test
-}  // namespace testing
diff --git a/3rdparty/googletest-1.13.0/googlemock/test/gmock-nice-strict_test.cc b/3rdparty/googletest-1.13.0/googlemock/test/gmock-nice-strict_test.cc
deleted file mode 100644
index 08254e1acdd077a31034dd40b7d36efde855d960..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googlemock/test/gmock-nice-strict_test.cc
+++ /dev/null
@@ -1,541 +0,0 @@
-// Copyright 2008, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "gmock/gmock-nice-strict.h"
-
-#include <string>
-#include <utility>
-
-#include "gmock/gmock.h"
-#include "gtest/gtest-spi.h"
-#include "gtest/gtest.h"
-
-// This must not be defined inside the ::testing namespace, or it will
-// clash with ::testing::Mock.
-class Mock {
- public:
-  Mock() {}
-
-  MOCK_METHOD0(DoThis, void());
-
- private:
-  Mock(const Mock&) = delete;
-  Mock& operator=(const Mock&) = delete;
-};
-
-namespace testing {
-namespace gmock_nice_strict_test {
-
-using testing::HasSubstr;
-using testing::NaggyMock;
-using testing::NiceMock;
-using testing::StrictMock;
-
-#if GTEST_HAS_STREAM_REDIRECTION
-using testing::internal::CaptureStdout;
-using testing::internal::GetCapturedStdout;
-#endif
-
-// Class without default constructor.
-class NotDefaultConstructible {
- public:
-  explicit NotDefaultConstructible(int) {}
-};
-
-class CallsMockMethodInDestructor {
- public:
-  ~CallsMockMethodInDestructor() { OnDestroy(); }
-  MOCK_METHOD(void, OnDestroy, ());
-};
-
-// Defines some mock classes needed by the tests.
-
-class Foo {
- public:
-  virtual ~Foo() {}
-
-  virtual void DoThis() = 0;
-  virtual int DoThat(bool flag) = 0;
-};
-
-class MockFoo : public Foo {
- public:
-  MockFoo() {}
-  void Delete() { delete this; }
-
-  MOCK_METHOD0(DoThis, void());
-  MOCK_METHOD1(DoThat, int(bool flag));
-  MOCK_METHOD0(ReturnNonDefaultConstructible, NotDefaultConstructible());
-
- private:
-  MockFoo(const MockFoo&) = delete;
-  MockFoo& operator=(const MockFoo&) = delete;
-};
-
-class MockBar {
- public:
-  explicit MockBar(const std::string& s) : str_(s) {}
-
-  MockBar(char a1, char a2, std::string a3, std::string a4, int a5, int a6,
-          const std::string& a7, const std::string& a8, bool a9, bool a10) {
-    str_ = std::string() + a1 + a2 + a3 + a4 + static_cast<char>(a5) +
-           static_cast<char>(a6) + a7 + a8 + (a9 ? 'T' : 'F') +
-           (a10 ? 'T' : 'F');
-  }
-
-  virtual ~MockBar() {}
-
-  const std::string& str() const { return str_; }
-
-  MOCK_METHOD0(This, int());
-  MOCK_METHOD2(That, std::string(int, bool));
-
- private:
-  std::string str_;
-
-  MockBar(const MockBar&) = delete;
-  MockBar& operator=(const MockBar&) = delete;
-};
-
-class MockBaz {
- public:
-  class MoveOnly {
-   public:
-    MoveOnly() = default;
-
-    MoveOnly(const MoveOnly&) = delete;
-    MoveOnly& operator=(const MoveOnly&) = delete;
-
-    MoveOnly(MoveOnly&&) = default;
-    MoveOnly& operator=(MoveOnly&&) = default;
-  };
-
-  MockBaz(MoveOnly) {}
-};
-
-#if GTEST_HAS_STREAM_REDIRECTION
-
-// Tests that a raw mock generates warnings for uninteresting calls.
-TEST(RawMockTest, WarningForUninterestingCall) {
-  const std::string saved_flag = GMOCK_FLAG_GET(verbose);
-  GMOCK_FLAG_SET(verbose, "warning");
-
-  MockFoo raw_foo;
-
-  CaptureStdout();
-  raw_foo.DoThis();
-  raw_foo.DoThat(true);
-  EXPECT_THAT(GetCapturedStdout(),
-              HasSubstr("Uninteresting mock function call"));
-
-  GMOCK_FLAG_SET(verbose, saved_flag);
-}
-
-// Tests that a raw mock generates warnings for uninteresting calls
-// that delete the mock object.
-TEST(RawMockTest, WarningForUninterestingCallAfterDeath) {
-  const std::string saved_flag = GMOCK_FLAG_GET(verbose);
-  GMOCK_FLAG_SET(verbose, "warning");
-
-  MockFoo* const raw_foo = new MockFoo;
-
-  ON_CALL(*raw_foo, DoThis()).WillByDefault(Invoke(raw_foo, &MockFoo::Delete));
-
-  CaptureStdout();
-  raw_foo->DoThis();
-  EXPECT_THAT(GetCapturedStdout(),
-              HasSubstr("Uninteresting mock function call"));
-
-  GMOCK_FLAG_SET(verbose, saved_flag);
-}
-
-// Tests that a raw mock generates informational logs for
-// uninteresting calls.
-TEST(RawMockTest, InfoForUninterestingCall) {
-  MockFoo raw_foo;
-
-  const std::string saved_flag = GMOCK_FLAG_GET(verbose);
-  GMOCK_FLAG_SET(verbose, "info");
-  CaptureStdout();
-  raw_foo.DoThis();
-  EXPECT_THAT(GetCapturedStdout(),
-              HasSubstr("Uninteresting mock function call"));
-
-  GMOCK_FLAG_SET(verbose, saved_flag);
-}
-
-TEST(RawMockTest, IsNaggy_IsNice_IsStrict) {
-  MockFoo raw_foo;
-  EXPECT_TRUE(Mock::IsNaggy(&raw_foo));
-  EXPECT_FALSE(Mock::IsNice(&raw_foo));
-  EXPECT_FALSE(Mock::IsStrict(&raw_foo));
-}
-
-// Tests that a nice mock generates no warning for uninteresting calls.
-TEST(NiceMockTest, NoWarningForUninterestingCall) {
-  NiceMock<MockFoo> nice_foo;
-
-  CaptureStdout();
-  nice_foo.DoThis();
-  nice_foo.DoThat(true);
-  EXPECT_EQ("", GetCapturedStdout());
-}
-
-// Tests that a nice mock generates no warning for uninteresting calls
-// that delete the mock object.
-TEST(NiceMockTest, NoWarningForUninterestingCallAfterDeath) {
-  NiceMock<MockFoo>* const nice_foo = new NiceMock<MockFoo>;
-
-  ON_CALL(*nice_foo, DoThis())
-      .WillByDefault(Invoke(nice_foo, &MockFoo::Delete));
-
-  CaptureStdout();
-  nice_foo->DoThis();
-  EXPECT_EQ("", GetCapturedStdout());
-}
-
-// Tests that a nice mock generates informational logs for
-// uninteresting calls.
-TEST(NiceMockTest, InfoForUninterestingCall) {
-  NiceMock<MockFoo> nice_foo;
-
-  const std::string saved_flag = GMOCK_FLAG_GET(verbose);
-  GMOCK_FLAG_SET(verbose, "info");
-  CaptureStdout();
-  nice_foo.DoThis();
-  EXPECT_THAT(GetCapturedStdout(),
-              HasSubstr("Uninteresting mock function call"));
-
-  GMOCK_FLAG_SET(verbose, saved_flag);
-}
-
-#endif  // GTEST_HAS_STREAM_REDIRECTION
-
-// Tests that a nice mock allows expected calls.
-TEST(NiceMockTest, AllowsExpectedCall) {
-  NiceMock<MockFoo> nice_foo;
-
-  EXPECT_CALL(nice_foo, DoThis());
-  nice_foo.DoThis();
-}
-
-// Tests that an unexpected call on a nice mock which returns a
-// not-default-constructible type throws an exception and the exception contains
-// the method's name.
-TEST(NiceMockTest, ThrowsExceptionForUnknownReturnTypes) {
-  NiceMock<MockFoo> nice_foo;
-#if GTEST_HAS_EXCEPTIONS
-  try {
-    nice_foo.ReturnNonDefaultConstructible();
-    FAIL();
-  } catch (const std::runtime_error& ex) {
-    EXPECT_THAT(ex.what(), HasSubstr("ReturnNonDefaultConstructible"));
-  }
-#else
-  EXPECT_DEATH_IF_SUPPORTED({ nice_foo.ReturnNonDefaultConstructible(); }, "");
-#endif
-}
-
-// Tests that an unexpected call on a nice mock fails.
-TEST(NiceMockTest, UnexpectedCallFails) {
-  NiceMock<MockFoo> nice_foo;
-
-  EXPECT_CALL(nice_foo, DoThis()).Times(0);
-  EXPECT_NONFATAL_FAILURE(nice_foo.DoThis(), "called more times than expected");
-}
-
-// Tests that NiceMock works with a mock class that has a non-default
-// constructor.
-TEST(NiceMockTest, NonDefaultConstructor) {
-  NiceMock<MockBar> nice_bar("hi");
-  EXPECT_EQ("hi", nice_bar.str());
-
-  nice_bar.This();
-  nice_bar.That(5, true);
-}
-
-// Tests that NiceMock works with a mock class that has a 10-ary
-// non-default constructor.
-TEST(NiceMockTest, NonDefaultConstructor10) {
-  NiceMock<MockBar> nice_bar('a', 'b', "c", "d", 'e', 'f', "g", "h", true,
-                             false);
-  EXPECT_EQ("abcdefghTF", nice_bar.str());
-
-  nice_bar.This();
-  nice_bar.That(5, true);
-}
-
-TEST(NiceMockTest, AllowLeak) {
-  NiceMock<MockFoo>* leaked = new NiceMock<MockFoo>;
-  Mock::AllowLeak(leaked);
-  EXPECT_CALL(*leaked, DoThis());
-  leaked->DoThis();
-}
-
-TEST(NiceMockTest, MoveOnlyConstructor) {
-  NiceMock<MockBaz> nice_baz(MockBaz::MoveOnly{});
-}
-
-// Tests that NiceMock<Mock> compiles where Mock is a user-defined
-// class (as opposed to ::testing::Mock).
-TEST(NiceMockTest, AcceptsClassNamedMock) {
-  NiceMock< ::Mock> nice;
-  EXPECT_CALL(nice, DoThis());
-  nice.DoThis();
-}
-
-TEST(NiceMockTest, IsNiceInDestructor) {
-  {
-    NiceMock<CallsMockMethodInDestructor> nice_on_destroy;
-    // Don't add an expectation for the call before the mock goes out of scope.
-  }
-}
-
-TEST(NiceMockTest, IsNaggy_IsNice_IsStrict) {
-  NiceMock<MockFoo> nice_foo;
-  EXPECT_FALSE(Mock::IsNaggy(&nice_foo));
-  EXPECT_TRUE(Mock::IsNice(&nice_foo));
-  EXPECT_FALSE(Mock::IsStrict(&nice_foo));
-}
-
-#if GTEST_HAS_STREAM_REDIRECTION
-
-// Tests that a naggy mock generates warnings for uninteresting calls.
-TEST(NaggyMockTest, WarningForUninterestingCall) {
-  const std::string saved_flag = GMOCK_FLAG_GET(verbose);
-  GMOCK_FLAG_SET(verbose, "warning");
-
-  NaggyMock<MockFoo> naggy_foo;
-
-  CaptureStdout();
-  naggy_foo.DoThis();
-  naggy_foo.DoThat(true);
-  EXPECT_THAT(GetCapturedStdout(),
-              HasSubstr("Uninteresting mock function call"));
-
-  GMOCK_FLAG_SET(verbose, saved_flag);
-}
-
-// Tests that a naggy mock generates a warning for an uninteresting call
-// that deletes the mock object.
-TEST(NaggyMockTest, WarningForUninterestingCallAfterDeath) {
-  const std::string saved_flag = GMOCK_FLAG_GET(verbose);
-  GMOCK_FLAG_SET(verbose, "warning");
-
-  NaggyMock<MockFoo>* const naggy_foo = new NaggyMock<MockFoo>;
-
-  ON_CALL(*naggy_foo, DoThis())
-      .WillByDefault(Invoke(naggy_foo, &MockFoo::Delete));
-
-  CaptureStdout();
-  naggy_foo->DoThis();
-  EXPECT_THAT(GetCapturedStdout(),
-              HasSubstr("Uninteresting mock function call"));
-
-  GMOCK_FLAG_SET(verbose, saved_flag);
-}
-
-#endif  // GTEST_HAS_STREAM_REDIRECTION
-
-// Tests that a naggy mock allows expected calls.
-TEST(NaggyMockTest, AllowsExpectedCall) {
-  NaggyMock<MockFoo> naggy_foo;
-
-  EXPECT_CALL(naggy_foo, DoThis());
-  naggy_foo.DoThis();
-}
-
-// Tests that an unexpected call on a naggy mock fails.
-TEST(NaggyMockTest, UnexpectedCallFails) {
-  NaggyMock<MockFoo> naggy_foo;
-
-  EXPECT_CALL(naggy_foo, DoThis()).Times(0);
-  EXPECT_NONFATAL_FAILURE(naggy_foo.DoThis(),
-                          "called more times than expected");
-}
-
-// Tests that NaggyMock works with a mock class that has a non-default
-// constructor.
-TEST(NaggyMockTest, NonDefaultConstructor) {
-  NaggyMock<MockBar> naggy_bar("hi");
-  EXPECT_EQ("hi", naggy_bar.str());
-
-  naggy_bar.This();
-  naggy_bar.That(5, true);
-}
-
-// Tests that NaggyMock works with a mock class that has a 10-ary
-// non-default constructor.
-TEST(NaggyMockTest, NonDefaultConstructor10) {
-  NaggyMock<MockBar> naggy_bar('0', '1', "2", "3", '4', '5', "6", "7", true,
-                               false);
-  EXPECT_EQ("01234567TF", naggy_bar.str());
-
-  naggy_bar.This();
-  naggy_bar.That(5, true);
-}
-
-TEST(NaggyMockTest, AllowLeak) {
-  NaggyMock<MockFoo>* leaked = new NaggyMock<MockFoo>;
-  Mock::AllowLeak(leaked);
-  EXPECT_CALL(*leaked, DoThis());
-  leaked->DoThis();
-}
-
-TEST(NaggyMockTest, MoveOnlyConstructor) {
-  NaggyMock<MockBaz> naggy_baz(MockBaz::MoveOnly{});
-}
-
-// Tests that NaggyMock<Mock> compiles where Mock is a user-defined
-// class (as opposed to ::testing::Mock).
-TEST(NaggyMockTest, AcceptsClassNamedMock) {
-  NaggyMock< ::Mock> naggy;
-  EXPECT_CALL(naggy, DoThis());
-  naggy.DoThis();
-}
-
-TEST(NaggyMockTest, IsNaggyInDestructor) {
-  const std::string saved_flag = GMOCK_FLAG_GET(verbose);
-  GMOCK_FLAG_SET(verbose, "warning");
-  CaptureStdout();
-
-  {
-    NaggyMock<CallsMockMethodInDestructor> naggy_on_destroy;
-    // Don't add an expectation for the call before the mock goes out of scope.
-  }
-
-  EXPECT_THAT(GetCapturedStdout(),
-              HasSubstr("Uninteresting mock function call"));
-
-  GMOCK_FLAG_SET(verbose, saved_flag);
-}
-
-TEST(NaggyMockTest, IsNaggy_IsNice_IsStrict) {
-  NaggyMock<MockFoo> naggy_foo;
-  EXPECT_TRUE(Mock::IsNaggy(&naggy_foo));
-  EXPECT_FALSE(Mock::IsNice(&naggy_foo));
-  EXPECT_FALSE(Mock::IsStrict(&naggy_foo));
-}
-
-// Tests that a strict mock allows expected calls.
-TEST(StrictMockTest, AllowsExpectedCall) {
-  StrictMock<MockFoo> strict_foo;
-
-  EXPECT_CALL(strict_foo, DoThis());
-  strict_foo.DoThis();
-}
-
-// Tests that an unexpected call on a strict mock fails.
-TEST(StrictMockTest, UnexpectedCallFails) {
-  StrictMock<MockFoo> strict_foo;
-
-  EXPECT_CALL(strict_foo, DoThis()).Times(0);
-  EXPECT_NONFATAL_FAILURE(strict_foo.DoThis(),
-                          "called more times than expected");
-}
-
-// Tests that an uninteresting call on a strict mock fails.
-TEST(StrictMockTest, UninterestingCallFails) {
-  StrictMock<MockFoo> strict_foo;
-
-  EXPECT_NONFATAL_FAILURE(strict_foo.DoThis(),
-                          "Uninteresting mock function call");
-}
-
-// Tests that an uninteresting call on a strict mock fails, even if
-// the call deletes the mock object.
-TEST(StrictMockTest, UninterestingCallFailsAfterDeath) {
-  StrictMock<MockFoo>* const strict_foo = new StrictMock<MockFoo>;
-
-  ON_CALL(*strict_foo, DoThis())
-      .WillByDefault(Invoke(strict_foo, &MockFoo::Delete));
-
-  EXPECT_NONFATAL_FAILURE(strict_foo->DoThis(),
-                          "Uninteresting mock function call");
-}
-
-// Tests that StrictMock works with a mock class that has a
-// non-default constructor.
-TEST(StrictMockTest, NonDefaultConstructor) {
-  StrictMock<MockBar> strict_bar("hi");
-  EXPECT_EQ("hi", strict_bar.str());
-
-  EXPECT_NONFATAL_FAILURE(strict_bar.That(5, true),
-                          "Uninteresting mock function call");
-}
-
-// Tests that StrictMock works with a mock class that has a 10-ary
-// non-default constructor.
-TEST(StrictMockTest, NonDefaultConstructor10) {
-  StrictMock<MockBar> strict_bar('a', 'b', "c", "d", 'e', 'f', "g", "h", true,
-                                 false);
-  EXPECT_EQ("abcdefghTF", strict_bar.str());
-
-  EXPECT_NONFATAL_FAILURE(strict_bar.That(5, true),
-                          "Uninteresting mock function call");
-}
-
-TEST(StrictMockTest, AllowLeak) {
-  StrictMock<MockFoo>* leaked = new StrictMock<MockFoo>;
-  Mock::AllowLeak(leaked);
-  EXPECT_CALL(*leaked, DoThis());
-  leaked->DoThis();
-}
-
-TEST(StrictMockTest, MoveOnlyConstructor) {
-  StrictMock<MockBaz> strict_baz(MockBaz::MoveOnly{});
-}
-
-// Tests that StrictMock<Mock> compiles where Mock is a user-defined
-// class (as opposed to ::testing::Mock).
-TEST(StrictMockTest, AcceptsClassNamedMock) {
-  StrictMock< ::Mock> strict;
-  EXPECT_CALL(strict, DoThis());
-  strict.DoThis();
-}
-
-TEST(StrictMockTest, IsStrictInDestructor) {
-  EXPECT_NONFATAL_FAILURE(
-      {
-        StrictMock<CallsMockMethodInDestructor> strict_on_destroy;
-        // Don't add an expectation for the call before the mock goes out of
-        // scope.
-      },
-      "Uninteresting mock function call");
-}
-
-TEST(StrictMockTest, IsNaggy_IsNice_IsStrict) {
-  StrictMock<MockFoo> strict_foo;
-  EXPECT_FALSE(Mock::IsNaggy(&strict_foo));
-  EXPECT_FALSE(Mock::IsNice(&strict_foo));
-  EXPECT_TRUE(Mock::IsStrict(&strict_foo));
-}
-
-}  // namespace gmock_nice_strict_test
-}  // namespace testing
diff --git a/3rdparty/googletest-1.13.0/googlemock/test/gmock-port_test.cc b/3rdparty/googletest-1.13.0/googlemock/test/gmock-port_test.cc
deleted file mode 100644
index c31af821559b53d54f297a31adea91af409f63df..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googlemock/test/gmock-port_test.cc
+++ /dev/null
@@ -1,42 +0,0 @@
-// Copyright 2008, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// Google Mock - a framework for writing C++ mock classes.
-//
-// This file tests the internal cross-platform support utilities.
-
-#include "gmock/internal/gmock-port.h"
-
-#include "gtest/gtest.h"
-
-// NOTE: if this file is left without tests for some reason, put a dummy
-// test here to make references to symbols in the gtest library and avoid
-// 'undefined symbol' linker errors in gmock_main:
-
-TEST(DummyTest, Dummy) {}
diff --git a/3rdparty/googletest-1.13.0/googlemock/test/gmock-pp-string_test.cc b/3rdparty/googletest-1.13.0/googlemock/test/gmock-pp-string_test.cc
deleted file mode 100644
index 53c80f4e3d2f7606d1997cb6613e3ec5dc1cc285..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googlemock/test/gmock-pp-string_test.cc
+++ /dev/null
@@ -1,205 +0,0 @@
-// Copyright 2018, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// Google Mock - a framework for writing C++ mock classes.
-//
-// This file tests the internal preprocessor macro library.
-#include <string>
-
-#include "gmock/gmock.h"
-#include "gmock/internal/gmock-pp.h"
-
-namespace testing {
-namespace {
-
-// Matcher to verify that to strings are identical up to whitespace
-// Not 100% correct, because it treats "AB" as equal to "A B".
-::testing::Matcher<const std::string&> SameExceptSpaces(const std::string& s) {
-  auto remove_spaces = [](std::string to_split) {
-    to_split.erase(std::remove(to_split.begin(), to_split.end(), ' '),
-                   to_split.end());
-    return to_split;
-  };
-  return ::testing::ResultOf(remove_spaces, remove_spaces(s));
-}
-
-// Verify that a macro expands to a given text. Ignores whitespace difference.
-// In MSVC, GMOCK_PP_STRINGIZE() returns nothing, rather than "". So concatenate
-// with an empty string.
-#define EXPECT_EXPANSION(Result, Macro) \
-  EXPECT_THAT("" GMOCK_PP_STRINGIZE(Macro), SameExceptSpaces(Result))
-
-TEST(Macros, Cat) {
-  EXPECT_EXPANSION("14", GMOCK_PP_CAT(1, 4));
-  EXPECT_EXPANSION("+=", GMOCK_PP_CAT(+, =));
-}
-
-TEST(Macros, Narg) {
-  EXPECT_EXPANSION("1", GMOCK_PP_NARG());
-  EXPECT_EXPANSION("1", GMOCK_PP_NARG(x));
-  EXPECT_EXPANSION("2", GMOCK_PP_NARG(x, y));
-  EXPECT_EXPANSION("3", GMOCK_PP_NARG(x, y, z));
-  EXPECT_EXPANSION("4", GMOCK_PP_NARG(x, y, z, w));
-
-  EXPECT_EXPANSION("0", GMOCK_PP_NARG0());
-  EXPECT_EXPANSION("1", GMOCK_PP_NARG0(x));
-  EXPECT_EXPANSION("2", GMOCK_PP_NARG0(x, y));
-}
-
-TEST(Macros, Comma) {
-  EXPECT_EXPANSION("0", GMOCK_PP_HAS_COMMA());
-  EXPECT_EXPANSION("1", GMOCK_PP_HAS_COMMA(, ));
-  EXPECT_EXPANSION("0", GMOCK_PP_HAS_COMMA((, )));
-}
-
-TEST(Macros, IsEmpty) {
-  EXPECT_EXPANSION("1", GMOCK_PP_IS_EMPTY());
-  EXPECT_EXPANSION("0", GMOCK_PP_IS_EMPTY(, ));
-  EXPECT_EXPANSION("0", GMOCK_PP_IS_EMPTY(a));
-  EXPECT_EXPANSION("0", GMOCK_PP_IS_EMPTY(()));
-
-#define GMOCK_PP_INTERNAL_IS_EMPTY_TEST_1
-  EXPECT_EXPANSION("1", GMOCK_PP_IS_EMPTY(GMOCK_PP_INTERNAL_IS_EMPTY_TEST_1));
-}
-
-TEST(Macros, If) {
-  EXPECT_EXPANSION("1", GMOCK_PP_IF(1, 1, 2));
-  EXPECT_EXPANSION("2", GMOCK_PP_IF(0, 1, 2));
-}
-
-TEST(Macros, HeadTail) {
-  EXPECT_EXPANSION("1", GMOCK_PP_HEAD(1));
-  EXPECT_EXPANSION("1", GMOCK_PP_HEAD(1, 2));
-  EXPECT_EXPANSION("1", GMOCK_PP_HEAD(1, 2, 3));
-
-  EXPECT_EXPANSION("", GMOCK_PP_TAIL(1));
-  EXPECT_EXPANSION("2", GMOCK_PP_TAIL(1, 2));
-  EXPECT_EXPANSION("2", GMOCK_PP_HEAD(GMOCK_PP_TAIL(1, 2, 3)));
-}
-
-TEST(Macros, Parentheses) {
-  EXPECT_EXPANSION("0", GMOCK_PP_IS_BEGIN_PARENS(sss));
-  EXPECT_EXPANSION("0", GMOCK_PP_IS_BEGIN_PARENS(sss()));
-  EXPECT_EXPANSION("0", GMOCK_PP_IS_BEGIN_PARENS(sss() sss));
-  EXPECT_EXPANSION("1", GMOCK_PP_IS_BEGIN_PARENS((sss)));
-  EXPECT_EXPANSION("1", GMOCK_PP_IS_BEGIN_PARENS((sss)ss));
-
-  EXPECT_EXPANSION("0", GMOCK_PP_IS_ENCLOSED_PARENS(sss));
-  EXPECT_EXPANSION("0", GMOCK_PP_IS_ENCLOSED_PARENS(sss()));
-  EXPECT_EXPANSION("0", GMOCK_PP_IS_ENCLOSED_PARENS(sss() sss));
-  EXPECT_EXPANSION("1", GMOCK_PP_IS_ENCLOSED_PARENS((sss)));
-  EXPECT_EXPANSION("0", GMOCK_PP_IS_ENCLOSED_PARENS((sss)ss));
-
-  EXPECT_EXPANSION("1 + 1", GMOCK_PP_REMOVE_PARENS((1 + 1)));
-}
-
-TEST(Macros, Increment) {
-  EXPECT_EXPANSION("1", GMOCK_PP_INC(0));
-  EXPECT_EXPANSION("2", GMOCK_PP_INC(1));
-  EXPECT_EXPANSION("3", GMOCK_PP_INC(2));
-  EXPECT_EXPANSION("4", GMOCK_PP_INC(3));
-  EXPECT_EXPANSION("5", GMOCK_PP_INC(4));
-
-  EXPECT_EXPANSION("16", GMOCK_PP_INC(15));
-}
-
-#define JOINER_CAT(a, b) a##b
-#define JOINER(_N, _Data, _Elem) JOINER_CAT(_Data, _N) = _Elem
-
-TEST(Macros, Repeat) {
-  EXPECT_EXPANSION("", GMOCK_PP_REPEAT(JOINER, X, 0));
-  EXPECT_EXPANSION("X0=", GMOCK_PP_REPEAT(JOINER, X, 1));
-  EXPECT_EXPANSION("X0= X1=", GMOCK_PP_REPEAT(JOINER, X, 2));
-  EXPECT_EXPANSION("X0= X1= X2=", GMOCK_PP_REPEAT(JOINER, X, 3));
-  EXPECT_EXPANSION("X0= X1= X2= X3=", GMOCK_PP_REPEAT(JOINER, X, 4));
-  EXPECT_EXPANSION("X0= X1= X2= X3= X4=", GMOCK_PP_REPEAT(JOINER, X, 5));
-  EXPECT_EXPANSION("X0= X1= X2= X3= X4= X5=", GMOCK_PP_REPEAT(JOINER, X, 6));
-  EXPECT_EXPANSION("X0= X1= X2= X3= X4= X5= X6=",
-                   GMOCK_PP_REPEAT(JOINER, X, 7));
-  EXPECT_EXPANSION("X0= X1= X2= X3= X4= X5= X6= X7=",
-                   GMOCK_PP_REPEAT(JOINER, X, 8));
-  EXPECT_EXPANSION("X0= X1= X2= X3= X4= X5= X6= X7= X8=",
-                   GMOCK_PP_REPEAT(JOINER, X, 9));
-  EXPECT_EXPANSION("X0= X1= X2= X3= X4= X5= X6= X7= X8= X9=",
-                   GMOCK_PP_REPEAT(JOINER, X, 10));
-  EXPECT_EXPANSION("X0= X1= X2= X3= X4= X5= X6= X7= X8= X9= X10=",
-                   GMOCK_PP_REPEAT(JOINER, X, 11));
-  EXPECT_EXPANSION("X0= X1= X2= X3= X4= X5= X6= X7= X8= X9= X10= X11=",
-                   GMOCK_PP_REPEAT(JOINER, X, 12));
-  EXPECT_EXPANSION("X0= X1= X2= X3= X4= X5= X6= X7= X8= X9= X10= X11= X12=",
-                   GMOCK_PP_REPEAT(JOINER, X, 13));
-  EXPECT_EXPANSION(
-      "X0= X1= X2= X3= X4= X5= X6= X7= X8= X9= X10= X11= X12= X13=",
-      GMOCK_PP_REPEAT(JOINER, X, 14));
-  EXPECT_EXPANSION(
-      "X0= X1= X2= X3= X4= X5= X6= X7= X8= X9= X10= X11= X12= X13= X14=",
-      GMOCK_PP_REPEAT(JOINER, X, 15));
-}
-TEST(Macros, ForEach) {
-  EXPECT_EXPANSION("", GMOCK_PP_FOR_EACH(JOINER, X, ()));
-  EXPECT_EXPANSION("X0=a", GMOCK_PP_FOR_EACH(JOINER, X, (a)));
-  EXPECT_EXPANSION("X0=a X1=b", GMOCK_PP_FOR_EACH(JOINER, X, (a, b)));
-  EXPECT_EXPANSION("X0=a X1=b X2=c", GMOCK_PP_FOR_EACH(JOINER, X, (a, b, c)));
-  EXPECT_EXPANSION("X0=a X1=b X2=c X3=d",
-                   GMOCK_PP_FOR_EACH(JOINER, X, (a, b, c, d)));
-  EXPECT_EXPANSION("X0=a X1=b X2=c X3=d X4=e",
-                   GMOCK_PP_FOR_EACH(JOINER, X, (a, b, c, d, e)));
-  EXPECT_EXPANSION("X0=a X1=b X2=c X3=d X4=e X5=f",
-                   GMOCK_PP_FOR_EACH(JOINER, X, (a, b, c, d, e, f)));
-  EXPECT_EXPANSION("X0=a X1=b X2=c X3=d X4=e X5=f X6=g",
-                   GMOCK_PP_FOR_EACH(JOINER, X, (a, b, c, d, e, f, g)));
-  EXPECT_EXPANSION("X0=a X1=b X2=c X3=d X4=e X5=f X6=g X7=h",
-                   GMOCK_PP_FOR_EACH(JOINER, X, (a, b, c, d, e, f, g, h)));
-  EXPECT_EXPANSION("X0=a X1=b X2=c X3=d X4=e X5=f X6=g X7=h X8=i",
-                   GMOCK_PP_FOR_EACH(JOINER, X, (a, b, c, d, e, f, g, h, i)));
-  EXPECT_EXPANSION(
-      "X0=a X1=b X2=c X3=d X4=e X5=f X6=g X7=h X8=i X9=j",
-      GMOCK_PP_FOR_EACH(JOINER, X, (a, b, c, d, e, f, g, h, i, j)));
-  EXPECT_EXPANSION(
-      "X0=a X1=b X2=c X3=d X4=e X5=f X6=g X7=h X8=i X9=j X10=k",
-      GMOCK_PP_FOR_EACH(JOINER, X, (a, b, c, d, e, f, g, h, i, j, k)));
-  EXPECT_EXPANSION(
-      "X0=a X1=b X2=c X3=d X4=e X5=f X6=g X7=h X8=i X9=j X10=k X11=l",
-      GMOCK_PP_FOR_EACH(JOINER, X, (a, b, c, d, e, f, g, h, i, j, k, l)));
-  EXPECT_EXPANSION(
-      "X0=a X1=b X2=c X3=d X4=e X5=f X6=g X7=h X8=i X9=j X10=k X11=l X12=m",
-      GMOCK_PP_FOR_EACH(JOINER, X, (a, b, c, d, e, f, g, h, i, j, k, l, m)));
-  EXPECT_EXPANSION(
-      "X0=a X1=b X2=c X3=d X4=e X5=f X6=g X7=h X8=i X9=j X10=k X11=l X12=m "
-      "X13=n",
-      GMOCK_PP_FOR_EACH(JOINER, X, (a, b, c, d, e, f, g, h, i, j, k, l, m, n)));
-  EXPECT_EXPANSION(
-      "X0=a X1=b X2=c X3=d X4=e X5=f X6=g X7=h X8=i X9=j X10=k X11=l X12=m "
-      "X13=n X14=o",
-      GMOCK_PP_FOR_EACH(JOINER, X,
-                        (a, b, c, d, e, f, g, h, i, j, k, l, m, n, o)));
-}
-
-}  // namespace
-}  // namespace testing
diff --git a/3rdparty/googletest-1.13.0/googlemock/test/gmock-pp_test.cc b/3rdparty/googletest-1.13.0/googlemock/test/gmock-pp_test.cc
deleted file mode 100644
index 5d1566e3885368322397d6909cd419943f942a62..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googlemock/test/gmock-pp_test.cc
+++ /dev/null
@@ -1,83 +0,0 @@
-#include "gmock/internal/gmock-pp.h"
-
-// Used to test MSVC treating __VA_ARGS__ with a comma in it as one value
-#define GMOCK_TEST_REPLACE_comma_WITH_COMMA_I_comma ,
-#define GMOCK_TEST_REPLACE_comma_WITH_COMMA(x) \
-  GMOCK_PP_CAT(GMOCK_TEST_REPLACE_comma_WITH_COMMA_I_, x)
-
-// Static assertions.
-namespace testing {
-namespace internal {
-namespace gmockpp {
-
-static_assert(GMOCK_PP_CAT(1, 4) == 14, "");
-static_assert(GMOCK_PP_INTERNAL_INTERNAL_16TH(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
-                                              12, 13, 14, 15, 16, 17, 18) == 16,
-              "");
-static_assert(GMOCK_PP_NARG() == 1, "");
-static_assert(GMOCK_PP_NARG(x) == 1, "");
-static_assert(GMOCK_PP_NARG(x, y) == 2, "");
-static_assert(GMOCK_PP_NARG(x, y, z) == 3, "");
-static_assert(GMOCK_PP_NARG(x, y, z, w) == 4, "");
-static_assert(!GMOCK_PP_HAS_COMMA(), "");
-static_assert(GMOCK_PP_HAS_COMMA(b, ), "");
-static_assert(!GMOCK_PP_HAS_COMMA((, )), "");
-static_assert(GMOCK_PP_HAS_COMMA(GMOCK_TEST_REPLACE_comma_WITH_COMMA(comma)),
-              "");
-static_assert(
-    GMOCK_PP_HAS_COMMA(GMOCK_TEST_REPLACE_comma_WITH_COMMA(comma(unrelated))),
-    "");
-static_assert(!GMOCK_PP_IS_EMPTY(, ), "");
-static_assert(!GMOCK_PP_IS_EMPTY(a), "");
-static_assert(!GMOCK_PP_IS_EMPTY(()), "");
-static_assert(GMOCK_PP_IF(1, 1, 2) == 1, "");
-static_assert(GMOCK_PP_IF(0, 1, 2) == 2, "");
-static_assert(GMOCK_PP_NARG0(x) == 1, "");
-static_assert(GMOCK_PP_NARG0(x, y) == 2, "");
-static_assert(GMOCK_PP_HEAD(1) == 1, "");
-static_assert(GMOCK_PP_HEAD(1, 2) == 1, "");
-static_assert(GMOCK_PP_HEAD(1, 2, 3) == 1, "");
-static_assert(GMOCK_PP_TAIL(1, 2) == 2, "");
-static_assert(GMOCK_PP_HEAD(GMOCK_PP_TAIL(1, 2, 3)) == 2, "");
-static_assert(!GMOCK_PP_IS_BEGIN_PARENS(sss), "");
-static_assert(!GMOCK_PP_IS_BEGIN_PARENS(sss()), "");
-static_assert(!GMOCK_PP_IS_BEGIN_PARENS(sss() sss), "");
-static_assert(GMOCK_PP_IS_BEGIN_PARENS((sss)), "");
-static_assert(GMOCK_PP_IS_BEGIN_PARENS((sss)ss), "");
-static_assert(!GMOCK_PP_IS_ENCLOSED_PARENS(sss), "");
-static_assert(!GMOCK_PP_IS_ENCLOSED_PARENS(sss()), "");
-static_assert(!GMOCK_PP_IS_ENCLOSED_PARENS(sss() sss), "");
-static_assert(!GMOCK_PP_IS_ENCLOSED_PARENS((sss)ss), "");
-static_assert(GMOCK_PP_REMOVE_PARENS((1 + 1)) * 2 == 3, "");
-static_assert(GMOCK_PP_INC(4) == 5, "");
-
-template <class... Args>
-struct Test {
-  static constexpr int kArgs = sizeof...(Args);
-};
-#define GMOCK_PP_INTERNAL_TYPE_TEST(_i, _Data, _element) \
-  GMOCK_PP_COMMA_IF(_i) _element
-static_assert(Test<GMOCK_PP_FOR_EACH(GMOCK_PP_INTERNAL_TYPE_TEST, ~,
-                                     (int, float, double, char))>::kArgs == 4,
-              "");
-#define GMOCK_PP_INTERNAL_VAR_TEST_1(_x) 1
-#define GMOCK_PP_INTERNAL_VAR_TEST_2(_x, _y) 2
-#define GMOCK_PP_INTERNAL_VAR_TEST_3(_x, _y, _z) 3
-
-#define GMOCK_PP_INTERNAL_VAR_TEST(...) \
-  GMOCK_PP_VARIADIC_CALL(GMOCK_PP_INTERNAL_VAR_TEST_, __VA_ARGS__)
-static_assert(GMOCK_PP_INTERNAL_VAR_TEST(x, y) == 2, "");
-static_assert(GMOCK_PP_INTERNAL_VAR_TEST(silly) == 1, "");
-static_assert(GMOCK_PP_INTERNAL_VAR_TEST(x, y, z) == 3, "");
-
-// TODO(iserna): The following asserts fail in --config=lexan.
-#define GMOCK_PP_INTERNAL_IS_EMPTY_TEST_1
-static_assert(GMOCK_PP_IS_EMPTY(GMOCK_PP_INTERNAL_IS_EMPTY_TEST_1), "");
-static_assert(GMOCK_PP_IS_EMPTY(), "");
-static_assert(GMOCK_PP_IS_ENCLOSED_PARENS((sss)), "");
-static_assert(GMOCK_PP_IS_EMPTY(GMOCK_PP_TAIL(1)), "");
-static_assert(GMOCK_PP_NARG0() == 0, "");
-
-}  // namespace gmockpp
-}  // namespace internal
-}  // namespace testing
diff --git a/3rdparty/googletest-1.13.0/googlemock/test/gmock-spec-builders_test.cc b/3rdparty/googletest-1.13.0/googlemock/test/gmock-spec-builders_test.cc
deleted file mode 100644
index 165944ead44e179792afdf5fd604032269067073..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googlemock/test/gmock-spec-builders_test.cc
+++ /dev/null
@@ -1,2620 +0,0 @@
-// Copyright 2007, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// Google Mock - a framework for writing C++ mock classes.
-//
-// This file tests the spec builder syntax.
-
-#include "gmock/gmock-spec-builders.h"
-
-#include <memory>
-#include <ostream>  // NOLINT
-#include <sstream>
-#include <string>
-#include <type_traits>
-
-#include "gmock/gmock.h"
-#include "gmock/internal/gmock-port.h"
-#include "gtest/gtest-spi.h"
-#include "gtest/gtest.h"
-#include "gtest/internal/gtest-port.h"
-
-namespace testing {
-namespace {
-
-using ::testing::internal::FormatFileLocation;
-using ::testing::internal::kAllow;
-using ::testing::internal::kErrorVerbosity;
-using ::testing::internal::kFail;
-using ::testing::internal::kInfoVerbosity;
-using ::testing::internal::kWarn;
-using ::testing::internal::kWarningVerbosity;
-
-#if GTEST_HAS_STREAM_REDIRECTION
-using ::testing::internal::CaptureStdout;
-using ::testing::internal::GetCapturedStdout;
-#endif
-
-class Incomplete;
-
-class MockIncomplete {
- public:
-  // This line verifies that a mock method can take a by-reference
-  // argument of an incomplete type.
-  MOCK_METHOD1(ByRefFunc, void(const Incomplete& x));
-};
-
-// Tells Google Mock how to print a value of type Incomplete.
-void PrintTo(const Incomplete& x, ::std::ostream* os);
-
-TEST(MockMethodTest, CanInstantiateWithIncompleteArgType) {
-  // Even though this mock class contains a mock method that takes
-  // by-reference an argument whose type is incomplete, we can still
-  // use the mock, as long as Google Mock knows how to print the
-  // argument.
-  MockIncomplete incomplete;
-  EXPECT_CALL(incomplete, ByRefFunc(_)).Times(AnyNumber());
-}
-
-// The definition of the printer for the argument type doesn't have to
-// be visible where the mock is used.
-void PrintTo(const Incomplete& /* x */, ::std::ostream* os) {
-  *os << "incomplete";
-}
-
-class Result {};
-
-// A type that's not default constructible.
-class NonDefaultConstructible {
- public:
-  explicit NonDefaultConstructible(int /* dummy */) {}
-};
-
-class MockA {
- public:
-  MockA() {}
-
-  MOCK_METHOD1(DoA, void(int n));
-  MOCK_METHOD1(ReturnResult, Result(int n));
-  MOCK_METHOD0(ReturnNonDefaultConstructible, NonDefaultConstructible());
-  MOCK_METHOD2(Binary, bool(int x, int y));
-  MOCK_METHOD2(ReturnInt, int(int x, int y));
-
- private:
-  MockA(const MockA&) = delete;
-  MockA& operator=(const MockA&) = delete;
-};
-
-class MockB {
- public:
-  MockB() {}
-
-  MOCK_CONST_METHOD0(DoB, int());  // NOLINT
-  MOCK_METHOD1(DoB, int(int n));   // NOLINT
-
- private:
-  MockB(const MockB&) = delete;
-  MockB& operator=(const MockB&) = delete;
-};
-
-class ReferenceHoldingMock {
- public:
-  ReferenceHoldingMock() {}
-
-  MOCK_METHOD1(AcceptReference, void(std::shared_ptr<MockA>*));
-
- private:
-  ReferenceHoldingMock(const ReferenceHoldingMock&) = delete;
-  ReferenceHoldingMock& operator=(const ReferenceHoldingMock&) = delete;
-};
-
-// Tests that EXPECT_CALL and ON_CALL compile in a presence of macro
-// redefining a mock method name. This could happen, for example, when
-// the tested code #includes Win32 API headers which define many APIs
-// as macros, e.g. #define TextOut TextOutW.
-
-#define Method MethodW
-
-class CC {
- public:
-  virtual ~CC() {}
-  virtual int Method() = 0;
-};
-class MockCC : public CC {
- public:
-  MockCC() {}
-
-  MOCK_METHOD0(Method, int());
-
- private:
-  MockCC(const MockCC&) = delete;
-  MockCC& operator=(const MockCC&) = delete;
-};
-
-// Tests that a method with expanded name compiles.
-TEST(OnCallSyntaxTest, CompilesWithMethodNameExpandedFromMacro) {
-  MockCC cc;
-  ON_CALL(cc, Method());
-}
-
-// Tests that the method with expanded name not only compiles but runs
-// and returns a correct value, too.
-TEST(OnCallSyntaxTest, WorksWithMethodNameExpandedFromMacro) {
-  MockCC cc;
-  ON_CALL(cc, Method()).WillByDefault(Return(42));
-  EXPECT_EQ(42, cc.Method());
-}
-
-// Tests that a method with expanded name compiles.
-TEST(ExpectCallSyntaxTest, CompilesWithMethodNameExpandedFromMacro) {
-  MockCC cc;
-  EXPECT_CALL(cc, Method());
-  cc.Method();
-}
-
-// Tests that it works, too.
-TEST(ExpectCallSyntaxTest, WorksWithMethodNameExpandedFromMacro) {
-  MockCC cc;
-  EXPECT_CALL(cc, Method()).WillOnce(Return(42));
-  EXPECT_EQ(42, cc.Method());
-}
-
-#undef Method  // Done with macro redefinition tests.
-
-// Tests that ON_CALL evaluates its arguments exactly once as promised
-// by Google Mock.
-TEST(OnCallSyntaxTest, EvaluatesFirstArgumentOnce) {
-  MockA a;
-  MockA* pa = &a;
-
-  ON_CALL(*pa++, DoA(_));
-  EXPECT_EQ(&a + 1, pa);
-}
-
-TEST(OnCallSyntaxTest, EvaluatesSecondArgumentOnce) {
-  MockA a;
-  int n = 0;
-
-  ON_CALL(a, DoA(n++));
-  EXPECT_EQ(1, n);
-}
-
-// Tests that the syntax of ON_CALL() is enforced at run time.
-
-TEST(OnCallSyntaxTest, WithIsOptional) {
-  MockA a;
-
-  ON_CALL(a, DoA(5)).WillByDefault(Return());
-  ON_CALL(a, DoA(_)).With(_).WillByDefault(Return());
-}
-
-TEST(OnCallSyntaxTest, WithCanAppearAtMostOnce) {
-  MockA a;
-
-  EXPECT_NONFATAL_FAILURE(
-      {  // NOLINT
-        ON_CALL(a, ReturnResult(_))
-            .With(_)
-            .With(_)
-            .WillByDefault(Return(Result()));
-      },
-      ".With() cannot appear more than once in an ON_CALL()");
-}
-
-TEST(OnCallSyntaxTest, WillByDefaultIsMandatory) {
-  MockA a;
-
-  EXPECT_DEATH_IF_SUPPORTED(
-      {
-        ON_CALL(a, DoA(5));
-        a.DoA(5);
-      },
-      "");
-}
-
-TEST(OnCallSyntaxTest, WillByDefaultCanAppearAtMostOnce) {
-  MockA a;
-
-  EXPECT_NONFATAL_FAILURE(
-      {  // NOLINT
-        ON_CALL(a, DoA(5)).WillByDefault(Return()).WillByDefault(Return());
-      },
-      ".WillByDefault() must appear exactly once in an ON_CALL()");
-}
-
-// Tests that EXPECT_CALL evaluates its arguments exactly once as
-// promised by Google Mock.
-TEST(ExpectCallSyntaxTest, EvaluatesFirstArgumentOnce) {
-  MockA a;
-  MockA* pa = &a;
-
-  EXPECT_CALL(*pa++, DoA(_));
-  a.DoA(0);
-  EXPECT_EQ(&a + 1, pa);
-}
-
-TEST(ExpectCallSyntaxTest, EvaluatesSecondArgumentOnce) {
-  MockA a;
-  int n = 0;
-
-  EXPECT_CALL(a, DoA(n++));
-  a.DoA(0);
-  EXPECT_EQ(1, n);
-}
-
-// Tests that the syntax of EXPECT_CALL() is enforced at run time.
-
-TEST(ExpectCallSyntaxTest, WithIsOptional) {
-  MockA a;
-
-  EXPECT_CALL(a, DoA(5)).Times(0);
-  EXPECT_CALL(a, DoA(6)).With(_).Times(0);
-}
-
-TEST(ExpectCallSyntaxTest, WithCanAppearAtMostOnce) {
-  MockA a;
-
-  EXPECT_NONFATAL_FAILURE(
-      {  // NOLINT
-        EXPECT_CALL(a, DoA(6)).With(_).With(_);
-      },
-      ".With() cannot appear more than once in an EXPECT_CALL()");
-
-  a.DoA(6);
-}
-
-TEST(ExpectCallSyntaxTest, WithMustBeFirstClause) {
-  MockA a;
-
-  EXPECT_NONFATAL_FAILURE(
-      {  // NOLINT
-        EXPECT_CALL(a, DoA(1)).Times(1).With(_);
-      },
-      ".With() must be the first clause in an EXPECT_CALL()");
-
-  a.DoA(1);
-
-  EXPECT_NONFATAL_FAILURE(
-      {  // NOLINT
-        EXPECT_CALL(a, DoA(2)).WillOnce(Return()).With(_);
-      },
-      ".With() must be the first clause in an EXPECT_CALL()");
-
-  a.DoA(2);
-}
-
-TEST(ExpectCallSyntaxTest, TimesCanBeInferred) {
-  MockA a;
-
-  EXPECT_CALL(a, DoA(1)).WillOnce(Return());
-
-  EXPECT_CALL(a, DoA(2)).WillOnce(Return()).WillRepeatedly(Return());
-
-  a.DoA(1);
-  a.DoA(2);
-  a.DoA(2);
-}
-
-TEST(ExpectCallSyntaxTest, TimesCanAppearAtMostOnce) {
-  MockA a;
-
-  EXPECT_NONFATAL_FAILURE(
-      {  // NOLINT
-        EXPECT_CALL(a, DoA(1)).Times(1).Times(2);
-      },
-      ".Times() cannot appear more than once in an EXPECT_CALL()");
-
-  a.DoA(1);
-  a.DoA(1);
-}
-
-TEST(ExpectCallSyntaxTest, TimesMustBeBeforeInSequence) {
-  MockA a;
-  Sequence s;
-
-  EXPECT_NONFATAL_FAILURE(
-      {  // NOLINT
-        EXPECT_CALL(a, DoA(1)).InSequence(s).Times(1);
-      },
-      ".Times() may only appear *before* ");
-
-  a.DoA(1);
-}
-
-TEST(ExpectCallSyntaxTest, InSequenceIsOptional) {
-  MockA a;
-  Sequence s;
-
-  EXPECT_CALL(a, DoA(1));
-  EXPECT_CALL(a, DoA(2)).InSequence(s);
-
-  a.DoA(1);
-  a.DoA(2);
-}
-
-TEST(ExpectCallSyntaxTest, InSequenceCanAppearMultipleTimes) {
-  MockA a;
-  Sequence s1, s2;
-
-  EXPECT_CALL(a, DoA(1)).InSequence(s1, s2).InSequence(s1);
-
-  a.DoA(1);
-}
-
-TEST(ExpectCallSyntaxTest, InSequenceMustBeBeforeAfter) {
-  MockA a;
-  Sequence s;
-
-  Expectation e = EXPECT_CALL(a, DoA(1)).Times(AnyNumber());
-  EXPECT_NONFATAL_FAILURE(
-      {  // NOLINT
-        EXPECT_CALL(a, DoA(2)).After(e).InSequence(s);
-      },
-      ".InSequence() cannot appear after ");
-
-  a.DoA(2);
-}
-
-TEST(ExpectCallSyntaxTest, InSequenceMustBeBeforeWillOnce) {
-  MockA a;
-  Sequence s;
-
-  EXPECT_NONFATAL_FAILURE(
-      {  // NOLINT
-        EXPECT_CALL(a, DoA(1)).WillOnce(Return()).InSequence(s);
-      },
-      ".InSequence() cannot appear after ");
-
-  a.DoA(1);
-}
-
-TEST(ExpectCallSyntaxTest, AfterMustBeBeforeWillOnce) {
-  MockA a;
-
-  Expectation e = EXPECT_CALL(a, DoA(1));
-  EXPECT_NONFATAL_FAILURE(
-      { EXPECT_CALL(a, DoA(2)).WillOnce(Return()).After(e); },
-      ".After() cannot appear after ");
-
-  a.DoA(1);
-  a.DoA(2);
-}
-
-TEST(ExpectCallSyntaxTest, WillIsOptional) {
-  MockA a;
-
-  EXPECT_CALL(a, DoA(1));
-  EXPECT_CALL(a, DoA(2)).WillOnce(Return());
-
-  a.DoA(1);
-  a.DoA(2);
-}
-
-TEST(ExpectCallSyntaxTest, WillCanAppearMultipleTimes) {
-  MockA a;
-
-  EXPECT_CALL(a, DoA(1))
-      .Times(AnyNumber())
-      .WillOnce(Return())
-      .WillOnce(Return())
-      .WillOnce(Return());
-}
-
-TEST(ExpectCallSyntaxTest, WillMustBeBeforeWillRepeatedly) {
-  MockA a;
-
-  EXPECT_NONFATAL_FAILURE(
-      {  // NOLINT
-        EXPECT_CALL(a, DoA(1)).WillRepeatedly(Return()).WillOnce(Return());
-      },
-      ".WillOnce() cannot appear after ");
-
-  a.DoA(1);
-}
-
-TEST(ExpectCallSyntaxTest, WillRepeatedlyIsOptional) {
-  MockA a;
-
-  EXPECT_CALL(a, DoA(1)).WillOnce(Return());
-  EXPECT_CALL(a, DoA(2)).WillOnce(Return()).WillRepeatedly(Return());
-
-  a.DoA(1);
-  a.DoA(2);
-  a.DoA(2);
-}
-
-TEST(ExpectCallSyntaxTest, WillRepeatedlyCannotAppearMultipleTimes) {
-  MockA a;
-
-  EXPECT_NONFATAL_FAILURE(
-      {  // NOLINT
-        EXPECT_CALL(a, DoA(1)).WillRepeatedly(Return()).WillRepeatedly(
-            Return());
-      },
-      ".WillRepeatedly() cannot appear more than once in an "
-      "EXPECT_CALL()");
-}
-
-TEST(ExpectCallSyntaxTest, WillRepeatedlyMustBeBeforeRetiresOnSaturation) {
-  MockA a;
-
-  EXPECT_NONFATAL_FAILURE(
-      {  // NOLINT
-        EXPECT_CALL(a, DoA(1)).RetiresOnSaturation().WillRepeatedly(Return());
-      },
-      ".WillRepeatedly() cannot appear after ");
-}
-
-TEST(ExpectCallSyntaxTest, RetiresOnSaturationIsOptional) {
-  MockA a;
-
-  EXPECT_CALL(a, DoA(1));
-  EXPECT_CALL(a, DoA(1)).RetiresOnSaturation();
-
-  a.DoA(1);
-  a.DoA(1);
-}
-
-TEST(ExpectCallSyntaxTest, RetiresOnSaturationCannotAppearMultipleTimes) {
-  MockA a;
-
-  EXPECT_NONFATAL_FAILURE(
-      {  // NOLINT
-        EXPECT_CALL(a, DoA(1)).RetiresOnSaturation().RetiresOnSaturation();
-      },
-      ".RetiresOnSaturation() cannot appear more than once");
-
-  a.DoA(1);
-}
-
-TEST(ExpectCallSyntaxTest, DefaultCardinalityIsOnce) {
-  {
-    MockA a;
-    EXPECT_CALL(a, DoA(1));
-    a.DoA(1);
-  }
-  EXPECT_NONFATAL_FAILURE(
-      {  // NOLINT
-        MockA a;
-        EXPECT_CALL(a, DoA(1));
-      },
-      "to be called once");
-  EXPECT_NONFATAL_FAILURE(
-      {  // NOLINT
-        MockA a;
-        EXPECT_CALL(a, DoA(1));
-        a.DoA(1);
-        a.DoA(1);
-      },
-      "to be called once");
-}
-
-#if GTEST_HAS_STREAM_REDIRECTION
-
-// Tests that Google Mock doesn't print a warning when the number of
-// WillOnce() is adequate.
-TEST(ExpectCallSyntaxTest, DoesNotWarnOnAdequateActionCount) {
-  CaptureStdout();
-  {
-    MockB b;
-
-    // It's always fine to omit WillOnce() entirely.
-    EXPECT_CALL(b, DoB()).Times(0);
-    EXPECT_CALL(b, DoB(1)).Times(AtMost(1));
-    EXPECT_CALL(b, DoB(2)).Times(1).WillRepeatedly(Return(1));
-
-    // It's fine for the number of WillOnce()s to equal the upper bound.
-    EXPECT_CALL(b, DoB(3))
-        .Times(Between(1, 2))
-        .WillOnce(Return(1))
-        .WillOnce(Return(2));
-
-    // It's fine for the number of WillOnce()s to be smaller than the
-    // upper bound when there is a WillRepeatedly().
-    EXPECT_CALL(b, DoB(4)).Times(AtMost(3)).WillOnce(Return(1)).WillRepeatedly(
-        Return(2));
-
-    // Satisfies the above expectations.
-    b.DoB(2);
-    b.DoB(3);
-  }
-  EXPECT_STREQ("", GetCapturedStdout().c_str());
-}
-
-// Tests that Google Mock warns on having too many actions in an
-// expectation compared to its cardinality.
-TEST(ExpectCallSyntaxTest, WarnsOnTooManyActions) {
-  CaptureStdout();
-  {
-    MockB b;
-
-    // Warns when the number of WillOnce()s is larger than the upper bound.
-    EXPECT_CALL(b, DoB()).Times(0).WillOnce(Return(1));  // #1
-    EXPECT_CALL(b, DoB()).Times(AtMost(1)).WillOnce(Return(1)).WillOnce(
-        Return(2));  // #2
-    EXPECT_CALL(b, DoB(1))
-        .Times(1)
-        .WillOnce(Return(1))
-        .WillOnce(Return(2))
-        .RetiresOnSaturation();  // #3
-
-    // Warns when the number of WillOnce()s equals the upper bound and
-    // there is a WillRepeatedly().
-    EXPECT_CALL(b, DoB()).Times(0).WillRepeatedly(Return(1));  // #4
-    EXPECT_CALL(b, DoB(2)).Times(1).WillOnce(Return(1)).WillRepeatedly(
-        Return(2));  // #5
-
-    // Satisfies the above expectations.
-    b.DoB(1);
-    b.DoB(2);
-  }
-  const std::string output = GetCapturedStdout();
-  EXPECT_PRED_FORMAT2(IsSubstring,
-                      "Too many actions specified in EXPECT_CALL(b, DoB())...\n"
-                      "Expected to be never called, but has 1 WillOnce().",
-                      output);  // #1
-  EXPECT_PRED_FORMAT2(IsSubstring,
-                      "Too many actions specified in EXPECT_CALL(b, DoB())...\n"
-                      "Expected to be called at most once, "
-                      "but has 2 WillOnce()s.",
-                      output);  // #2
-  EXPECT_PRED_FORMAT2(
-      IsSubstring,
-      "Too many actions specified in EXPECT_CALL(b, DoB(1))...\n"
-      "Expected to be called once, but has 2 WillOnce()s.",
-      output);  // #3
-  EXPECT_PRED_FORMAT2(IsSubstring,
-                      "Too many actions specified in EXPECT_CALL(b, DoB())...\n"
-                      "Expected to be never called, but has 0 WillOnce()s "
-                      "and a WillRepeatedly().",
-                      output);  // #4
-  EXPECT_PRED_FORMAT2(
-      IsSubstring,
-      "Too many actions specified in EXPECT_CALL(b, DoB(2))...\n"
-      "Expected to be called once, but has 1 WillOnce() "
-      "and a WillRepeatedly().",
-      output);  // #5
-}
-
-// Tests that Google Mock warns on having too few actions in an
-// expectation compared to its cardinality.
-TEST(ExpectCallSyntaxTest, WarnsOnTooFewActions) {
-  MockB b;
-
-  EXPECT_CALL(b, DoB()).Times(Between(2, 3)).WillOnce(Return(1));
-
-  CaptureStdout();
-  b.DoB();
-  const std::string output = GetCapturedStdout();
-  EXPECT_PRED_FORMAT2(IsSubstring,
-                      "Too few actions specified in EXPECT_CALL(b, DoB())...\n"
-                      "Expected to be called between 2 and 3 times, "
-                      "but has only 1 WillOnce().",
-                      output);
-  b.DoB();
-}
-
-TEST(ExpectCallSyntaxTest, WarningIsErrorWithFlag) {
-  int original_behavior = GMOCK_FLAG_GET(default_mock_behavior);
-
-  GMOCK_FLAG_SET(default_mock_behavior, kAllow);
-  CaptureStdout();
-  {
-    MockA a;
-    a.DoA(0);
-  }
-  std::string output = GetCapturedStdout();
-  EXPECT_TRUE(output.empty()) << output;
-
-  GMOCK_FLAG_SET(default_mock_behavior, kWarn);
-  CaptureStdout();
-  {
-    MockA a;
-    a.DoA(0);
-  }
-  std::string warning_output = GetCapturedStdout();
-  EXPECT_PRED_FORMAT2(IsSubstring, "GMOCK WARNING", warning_output);
-  EXPECT_PRED_FORMAT2(IsSubstring, "Uninteresting mock function call",
-                      warning_output);
-
-  GMOCK_FLAG_SET(default_mock_behavior, kFail);
-  EXPECT_NONFATAL_FAILURE(
-      {
-        MockA a;
-        a.DoA(0);
-      },
-      "Uninteresting mock function call");
-
-  // Out of bounds values are converted to kWarn
-  GMOCK_FLAG_SET(default_mock_behavior, -1);
-  CaptureStdout();
-  {
-    MockA a;
-    a.DoA(0);
-  }
-  warning_output = GetCapturedStdout();
-  EXPECT_PRED_FORMAT2(IsSubstring, "GMOCK WARNING", warning_output);
-  EXPECT_PRED_FORMAT2(IsSubstring, "Uninteresting mock function call",
-                      warning_output);
-  GMOCK_FLAG_SET(default_mock_behavior, 3);
-  CaptureStdout();
-  {
-    MockA a;
-    a.DoA(0);
-  }
-  warning_output = GetCapturedStdout();
-  EXPECT_PRED_FORMAT2(IsSubstring, "GMOCK WARNING", warning_output);
-  EXPECT_PRED_FORMAT2(IsSubstring, "Uninteresting mock function call",
-                      warning_output);
-
-  GMOCK_FLAG_SET(default_mock_behavior, original_behavior);
-}
-
-#endif  // GTEST_HAS_STREAM_REDIRECTION
-
-// Tests the semantics of ON_CALL().
-
-// Tests that the built-in default action is taken when no ON_CALL()
-// is specified.
-TEST(OnCallTest, TakesBuiltInDefaultActionWhenNoOnCall) {
-  MockB b;
-  EXPECT_CALL(b, DoB());
-
-  EXPECT_EQ(0, b.DoB());
-}
-
-// Tests that the built-in default action is taken when no ON_CALL()
-// matches the invocation.
-TEST(OnCallTest, TakesBuiltInDefaultActionWhenNoOnCallMatches) {
-  MockB b;
-  ON_CALL(b, DoB(1)).WillByDefault(Return(1));
-  EXPECT_CALL(b, DoB(_));
-
-  EXPECT_EQ(0, b.DoB(2));
-}
-
-// Tests that the last matching ON_CALL() action is taken.
-TEST(OnCallTest, PicksLastMatchingOnCall) {
-  MockB b;
-  ON_CALL(b, DoB(_)).WillByDefault(Return(3));
-  ON_CALL(b, DoB(2)).WillByDefault(Return(2));
-  ON_CALL(b, DoB(1)).WillByDefault(Return(1));
-  EXPECT_CALL(b, DoB(_));
-
-  EXPECT_EQ(2, b.DoB(2));
-}
-
-// Tests the semantics of EXPECT_CALL().
-
-// Tests that any call is allowed when no EXPECT_CALL() is specified.
-TEST(ExpectCallTest, AllowsAnyCallWhenNoSpec) {
-  MockB b;
-  EXPECT_CALL(b, DoB());
-  // There is no expectation on DoB(int).
-
-  b.DoB();
-
-  // DoB(int) can be called any number of times.
-  b.DoB(1);
-  b.DoB(2);
-}
-
-// Tests that the last matching EXPECT_CALL() fires.
-TEST(ExpectCallTest, PicksLastMatchingExpectCall) {
-  MockB b;
-  EXPECT_CALL(b, DoB(_)).WillRepeatedly(Return(2));
-  EXPECT_CALL(b, DoB(1)).WillRepeatedly(Return(1));
-
-  EXPECT_EQ(1, b.DoB(1));
-}
-
-// Tests lower-bound violation.
-TEST(ExpectCallTest, CatchesTooFewCalls) {
-  EXPECT_NONFATAL_FAILURE(
-      {  // NOLINT
-        MockB b;
-        EXPECT_CALL(b, DoB(5)).Description("DoB Method").Times(AtLeast(2));
-
-        b.DoB(5);
-      },
-      "Actual function \"DoB Method\" call count "
-      "doesn't match EXPECT_CALL(b, DoB(5))...\n"
-      "         Expected: to be called at least twice\n"
-      "           Actual: called once - unsatisfied and active");
-}
-
-// Tests that the cardinality can be inferred when no Times(...) is
-// specified.
-TEST(ExpectCallTest, InfersCardinalityWhenThereIsNoWillRepeatedly) {
-  {
-    MockB b;
-    EXPECT_CALL(b, DoB()).WillOnce(Return(1)).WillOnce(Return(2));
-
-    EXPECT_EQ(1, b.DoB());
-    EXPECT_EQ(2, b.DoB());
-  }
-
-  EXPECT_NONFATAL_FAILURE(
-      {  // NOLINT
-        MockB b;
-        EXPECT_CALL(b, DoB()).WillOnce(Return(1)).WillOnce(Return(2));
-
-        EXPECT_EQ(1, b.DoB());
-      },
-      "to be called twice");
-
-  {  // NOLINT
-    MockB b;
-    EXPECT_CALL(b, DoB()).WillOnce(Return(1)).WillOnce(Return(2));
-
-    EXPECT_EQ(1, b.DoB());
-    EXPECT_EQ(2, b.DoB());
-    EXPECT_NONFATAL_FAILURE(b.DoB(), "to be called twice");
-  }
-}
-
-TEST(ExpectCallTest, InfersCardinality1WhenThereIsWillRepeatedly) {
-  {
-    MockB b;
-    EXPECT_CALL(b, DoB()).WillOnce(Return(1)).WillRepeatedly(Return(2));
-
-    EXPECT_EQ(1, b.DoB());
-  }
-
-  {  // NOLINT
-    MockB b;
-    EXPECT_CALL(b, DoB()).WillOnce(Return(1)).WillRepeatedly(Return(2));
-
-    EXPECT_EQ(1, b.DoB());
-    EXPECT_EQ(2, b.DoB());
-    EXPECT_EQ(2, b.DoB());
-  }
-
-  EXPECT_NONFATAL_FAILURE(
-      {  // NOLINT
-        MockB b;
-        EXPECT_CALL(b, DoB()).WillOnce(Return(1)).WillRepeatedly(Return(2));
-      },
-      "to be called at least once");
-}
-
-#if defined(__cplusplus) && __cplusplus >= 201703L
-
-// It should be possible to return a non-moveable type from a mock action in
-// C++17 and above, where it's guaranteed that such a type can be initialized
-// from a prvalue returned from a function.
-TEST(ExpectCallTest, NonMoveableType) {
-  // Define a non-moveable result type.
-  struct Result {
-    explicit Result(int x_in) : x(x_in) {}
-    Result(Result&&) = delete;
-
-    int x;
-  };
-
-  static_assert(!std::is_move_constructible_v<Result>);
-  static_assert(!std::is_copy_constructible_v<Result>);
-
-  static_assert(!std::is_move_assignable_v<Result>);
-  static_assert(!std::is_copy_assignable_v<Result>);
-
-  // We should be able to use a callable that returns that result as both a
-  // OnceAction and an Action, whether the callable ignores arguments or not.
-  const auto return_17 = [] { return Result(17); };
-
-  static_cast<void>(OnceAction<Result()>{return_17});
-  static_cast<void>(Action<Result()>{return_17});
-
-  static_cast<void>(OnceAction<Result(int)>{return_17});
-  static_cast<void>(Action<Result(int)>{return_17});
-
-  // It should be possible to return the result end to end through an
-  // EXPECT_CALL statement, with both WillOnce and WillRepeatedly.
-  MockFunction<Result()> mock;
-  EXPECT_CALL(mock, Call)   //
-      .WillOnce(return_17)  //
-      .WillRepeatedly(return_17);
-
-  EXPECT_EQ(17, mock.AsStdFunction()().x);
-  EXPECT_EQ(17, mock.AsStdFunction()().x);
-  EXPECT_EQ(17, mock.AsStdFunction()().x);
-}
-
-#endif  // C++17 and above
-
-// Tests that the n-th action is taken for the n-th matching
-// invocation.
-TEST(ExpectCallTest, NthMatchTakesNthAction) {
-  MockB b;
-  EXPECT_CALL(b, DoB()).WillOnce(Return(1)).WillOnce(Return(2)).WillOnce(
-      Return(3));
-
-  EXPECT_EQ(1, b.DoB());
-  EXPECT_EQ(2, b.DoB());
-  EXPECT_EQ(3, b.DoB());
-}
-
-// Tests that the WillRepeatedly() action is taken when the WillOnce(...)
-// list is exhausted.
-TEST(ExpectCallTest, TakesRepeatedActionWhenWillListIsExhausted) {
-  MockB b;
-  EXPECT_CALL(b, DoB()).WillOnce(Return(1)).WillRepeatedly(Return(2));
-
-  EXPECT_EQ(1, b.DoB());
-  EXPECT_EQ(2, b.DoB());
-  EXPECT_EQ(2, b.DoB());
-}
-
-#if GTEST_HAS_STREAM_REDIRECTION
-
-// Tests that the default action is taken when the WillOnce(...) list is
-// exhausted and there is no WillRepeatedly().
-TEST(ExpectCallTest, TakesDefaultActionWhenWillListIsExhausted) {
-  MockB b;
-  EXPECT_CALL(b, DoB(_)).Times(1);
-  EXPECT_CALL(b, DoB())
-      .Times(AnyNumber())
-      .WillOnce(Return(1))
-      .WillOnce(Return(2));
-
-  CaptureStdout();
-  EXPECT_EQ(0, b.DoB(1));  // Shouldn't generate a warning as the
-                           // expectation has no action clause at all.
-  EXPECT_EQ(1, b.DoB());
-  EXPECT_EQ(2, b.DoB());
-  const std::string output1 = GetCapturedStdout();
-  EXPECT_STREQ("", output1.c_str());
-
-  CaptureStdout();
-  EXPECT_EQ(0, b.DoB());
-  EXPECT_EQ(0, b.DoB());
-  const std::string output2 = GetCapturedStdout();
-  EXPECT_THAT(output2.c_str(),
-              HasSubstr("Actions ran out in EXPECT_CALL(b, DoB())...\n"
-                        "Called 3 times, but only 2 WillOnce()s are specified"
-                        " - returning default value."));
-  EXPECT_THAT(output2.c_str(),
-              HasSubstr("Actions ran out in EXPECT_CALL(b, DoB())...\n"
-                        "Called 4 times, but only 2 WillOnce()s are specified"
-                        " - returning default value."));
-}
-
-TEST(FunctionMockerMessageTest, ReportsExpectCallLocationForExhausedActions) {
-  MockB b;
-  std::string expect_call_location = FormatFileLocation(__FILE__, __LINE__ + 1);
-  EXPECT_CALL(b, DoB()).Times(AnyNumber()).WillOnce(Return(1));
-
-  EXPECT_EQ(1, b.DoB());
-
-  CaptureStdout();
-  EXPECT_EQ(0, b.DoB());
-  const std::string output = GetCapturedStdout();
-  // The warning message should contain the call location.
-  EXPECT_PRED_FORMAT2(IsSubstring, expect_call_location, output);
-}
-
-TEST(FunctionMockerMessageTest,
-     ReportsDefaultActionLocationOfUninterestingCallsForNaggyMock) {
-  std::string on_call_location;
-  CaptureStdout();
-  {
-    NaggyMock<MockB> b;
-    on_call_location = FormatFileLocation(__FILE__, __LINE__ + 1);
-    ON_CALL(b, DoB(_)).WillByDefault(Return(0));
-    b.DoB(0);
-  }
-  EXPECT_PRED_FORMAT2(IsSubstring, on_call_location, GetCapturedStdout());
-}
-
-#endif  // GTEST_HAS_STREAM_REDIRECTION
-
-// Tests that an uninteresting call performs the default action.
-TEST(UninterestingCallTest, DoesDefaultAction) {
-  // When there is an ON_CALL() statement, the action specified by it
-  // should be taken.
-  MockA a;
-  ON_CALL(a, Binary(_, _)).WillByDefault(Return(true));
-  EXPECT_TRUE(a.Binary(1, 2));
-
-  // When there is no ON_CALL(), the default value for the return type
-  // should be returned.
-  MockB b;
-  EXPECT_EQ(0, b.DoB());
-}
-
-// Tests that an unexpected call performs the default action.
-TEST(UnexpectedCallTest, DoesDefaultAction) {
-  // When there is an ON_CALL() statement, the action specified by it
-  // should be taken.
-  MockA a;
-  ON_CALL(a, Binary(_, _)).WillByDefault(Return(true));
-  EXPECT_CALL(a, Binary(0, 0));
-  a.Binary(0, 0);
-  bool result = false;
-  EXPECT_NONFATAL_FAILURE(result = a.Binary(1, 2),
-                          "Unexpected mock function call");
-  EXPECT_TRUE(result);
-
-  // When there is no ON_CALL(), the default value for the return type
-  // should be returned.
-  MockB b;
-  EXPECT_CALL(b, DoB(0)).Times(0);
-  int n = -1;
-  EXPECT_NONFATAL_FAILURE(n = b.DoB(1), "Unexpected mock function call");
-  EXPECT_EQ(0, n);
-}
-
-// Tests that when an unexpected void function generates the right
-// failure message.
-TEST(UnexpectedCallTest, GeneratesFailureForVoidFunction) {
-  // First, tests the message when there is only one EXPECT_CALL().
-  MockA a1;
-  EXPECT_CALL(a1, DoA(1));
-  a1.DoA(1);
-  // Ideally we should match the failure message against a regex, but
-  // EXPECT_NONFATAL_FAILURE doesn't support that, so we test for
-  // multiple sub-strings instead.
-  EXPECT_NONFATAL_FAILURE(
-      a1.DoA(9),
-      "Unexpected mock function call - returning directly.\n"
-      "    Function call: DoA(9)\n"
-      "Google Mock tried the following 1 expectation, but it didn't match:");
-  EXPECT_NONFATAL_FAILURE(
-      a1.DoA(9),
-      "  Expected arg #0: is equal to 1\n"
-      "           Actual: 9\n"
-      "         Expected: to be called once\n"
-      "           Actual: called once - saturated and active");
-
-  // Next, tests the message when there are more than one EXPECT_CALL().
-  MockA a2;
-  EXPECT_CALL(a2, DoA(1));
-  EXPECT_CALL(a2, DoA(3));
-  a2.DoA(1);
-  EXPECT_NONFATAL_FAILURE(
-      a2.DoA(2),
-      "Unexpected mock function call - returning directly.\n"
-      "    Function call: DoA(2)\n"
-      "Google Mock tried the following 2 expectations, but none matched:");
-  EXPECT_NONFATAL_FAILURE(
-      a2.DoA(2),
-      "tried expectation #0: EXPECT_CALL(a2, DoA(1))...\n"
-      "  Expected arg #0: is equal to 1\n"
-      "           Actual: 2\n"
-      "         Expected: to be called once\n"
-      "           Actual: called once - saturated and active");
-  EXPECT_NONFATAL_FAILURE(
-      a2.DoA(2),
-      "tried expectation #1: EXPECT_CALL(a2, DoA(3))...\n"
-      "  Expected arg #0: is equal to 3\n"
-      "           Actual: 2\n"
-      "         Expected: to be called once\n"
-      "           Actual: never called - unsatisfied and active");
-  a2.DoA(3);
-}
-
-// Tests that an unexpected non-void function generates the right
-// failure message.
-TEST(UnexpectedCallTest, GeneartesFailureForNonVoidFunction) {
-  MockB b1;
-  EXPECT_CALL(b1, DoB(1));
-  b1.DoB(1);
-  EXPECT_NONFATAL_FAILURE(
-      b1.DoB(2),
-      "Unexpected mock function call - returning default value.\n"
-      "    Function call: DoB(2)\n"
-      "          Returns: 0\n"
-      "Google Mock tried the following 1 expectation, but it didn't match:");
-  EXPECT_NONFATAL_FAILURE(
-      b1.DoB(2),
-      "  Expected arg #0: is equal to 1\n"
-      "           Actual: 2\n"
-      "         Expected: to be called once\n"
-      "           Actual: called once - saturated and active");
-}
-
-// Tests that Google Mock explains that an retired expectation doesn't
-// match the call.
-TEST(UnexpectedCallTest, RetiredExpectation) {
-  MockB b;
-  EXPECT_CALL(b, DoB(1)).RetiresOnSaturation();
-
-  b.DoB(1);
-  EXPECT_NONFATAL_FAILURE(b.DoB(1),
-                          "         Expected: the expectation is active\n"
-                          "           Actual: it is retired");
-}
-
-// Tests that Google Mock explains that an expectation that doesn't
-// match the arguments doesn't match the call.
-TEST(UnexpectedCallTest, UnmatchedArguments) {
-  MockB b;
-  EXPECT_CALL(b, DoB(1));
-
-  EXPECT_NONFATAL_FAILURE(b.DoB(2),
-                          "  Expected arg #0: is equal to 1\n"
-                          "           Actual: 2\n");
-  b.DoB(1);
-}
-
-// Tests that Google Mock explains that an expectation with
-// unsatisfied pre-requisites doesn't match the call.
-TEST(UnexpectedCallTest, UnsatisfiedPrerequisites) {
-  Sequence s1, s2;
-  MockB b;
-  EXPECT_CALL(b, DoB(1)).InSequence(s1);
-  EXPECT_CALL(b, DoB(2)).Times(AnyNumber()).InSequence(s1);
-  EXPECT_CALL(b, DoB(3)).InSequence(s2);
-  EXPECT_CALL(b, DoB(4)).InSequence(s1, s2);
-
-  ::testing::TestPartResultArray failures;
-  {
-    ::testing::ScopedFakeTestPartResultReporter reporter(&failures);
-    b.DoB(4);
-    // Now 'failures' contains the Google Test failures generated by
-    // the above statement.
-  }
-
-  // There should be one non-fatal failure.
-  ASSERT_EQ(1, failures.size());
-  const ::testing::TestPartResult& r = failures.GetTestPartResult(0);
-  EXPECT_EQ(::testing::TestPartResult::kNonFatalFailure, r.type());
-
-  // Verifies that the failure message contains the two unsatisfied
-  // pre-requisites but not the satisfied one.
-#if GTEST_USES_PCRE
-  EXPECT_THAT(
-      r.message(),
-      ContainsRegex(
-          // PCRE has trouble using (.|\n) to match any character, but
-          // supports the (?s) prefix for using . to match any character.
-          "(?s)the following immediate pre-requisites are not satisfied:\n"
-          ".*: pre-requisite #0\n"
-          ".*: pre-requisite #1"));
-#elif GTEST_USES_POSIX_RE
-  EXPECT_THAT(r.message(),
-              ContainsRegex(
-                  // POSIX RE doesn't understand the (?s) prefix, but has no
-                  // trouble with (.|\n).
-                  "the following immediate pre-requisites are not satisfied:\n"
-                  "(.|\n)*: pre-requisite #0\n"
-                  "(.|\n)*: pre-requisite #1"));
-#else
-  // We can only use Google Test's own simple regex.
-  EXPECT_THAT(r.message(),
-              ContainsRegex(
-                  "the following immediate pre-requisites are not satisfied:"));
-  EXPECT_THAT(r.message(), ContainsRegex(": pre-requisite #0"));
-  EXPECT_THAT(r.message(), ContainsRegex(": pre-requisite #1"));
-#endif  // GTEST_USES_PCRE
-
-  b.DoB(1);
-  b.DoB(3);
-  b.DoB(4);
-}
-
-TEST(UndefinedReturnValueTest,
-     ReturnValueIsMandatoryWhenNotDefaultConstructible) {
-  MockA a;
-  // FIXME: We should really verify the output message,
-  // but we cannot yet due to that EXPECT_DEATH only captures stderr
-  // while Google Mock logs to stdout.
-#if GTEST_HAS_EXCEPTIONS
-  EXPECT_ANY_THROW(a.ReturnNonDefaultConstructible());
-#else
-  EXPECT_DEATH_IF_SUPPORTED(a.ReturnNonDefaultConstructible(), "");
-#endif
-}
-
-// Tests that an excessive call (one whose arguments match the
-// matchers but is called too many times) performs the default action.
-TEST(ExcessiveCallTest, DoesDefaultAction) {
-  // When there is an ON_CALL() statement, the action specified by it
-  // should be taken.
-  MockA a;
-  ON_CALL(a, Binary(_, _)).WillByDefault(Return(true));
-  EXPECT_CALL(a, Binary(0, 0));
-  a.Binary(0, 0);
-  bool result = false;
-  EXPECT_NONFATAL_FAILURE(result = a.Binary(0, 0),
-                          "Mock function called more times than expected");
-  EXPECT_TRUE(result);
-
-  // When there is no ON_CALL(), the default value for the return type
-  // should be returned.
-  MockB b;
-  EXPECT_CALL(b, DoB(0)).Description("DoB Method").Times(0);
-  int n = -1;
-  EXPECT_NONFATAL_FAILURE(
-      n = b.DoB(0),
-      "Mock function \"DoB Method\" called more times than expected");
-  EXPECT_EQ(0, n);
-}
-
-// Tests that when a void function is called too many times,
-// the failure message contains the argument values.
-TEST(ExcessiveCallTest, GeneratesFailureForVoidFunction) {
-  MockA a;
-  EXPECT_CALL(a, DoA(_)).Description("DoA Method").Times(0);
-  EXPECT_NONFATAL_FAILURE(
-      a.DoA(9),
-      "Mock function \"DoA Method\" called more times than expected - "
-      "returning directly.\n"
-      "    Function call: DoA(9)\n"
-      "         Expected: to be never called\n"
-      "           Actual: called once - over-saturated and active");
-}
-
-// Tests that when a non-void function is called too many times, the
-// failure message contains the argument values and the return value.
-TEST(ExcessiveCallTest, GeneratesFailureForNonVoidFunction) {
-  MockB b;
-  EXPECT_CALL(b, DoB(_));
-  b.DoB(1);
-  EXPECT_NONFATAL_FAILURE(
-      b.DoB(2),
-      "Mock function called more times than expected - "
-      "returning default value.\n"
-      "    Function call: DoB(2)\n"
-      "          Returns: 0\n"
-      "         Expected: to be called once\n"
-      "           Actual: called twice - over-saturated and active");
-}
-
-// Tests using sequences.
-
-TEST(InSequenceTest, AllExpectationInScopeAreInSequence) {
-  MockA a;
-  {
-    InSequence dummy;
-
-    EXPECT_CALL(a, DoA(1));
-    EXPECT_CALL(a, DoA(2));
-  }
-
-  EXPECT_NONFATAL_FAILURE(
-      {  // NOLINT
-        a.DoA(2);
-      },
-      "Unexpected mock function call");
-
-  a.DoA(1);
-  a.DoA(2);
-}
-
-TEST(InSequenceTest, NestedInSequence) {
-  MockA a;
-  {
-    InSequence dummy;
-
-    EXPECT_CALL(a, DoA(1));
-    {
-      InSequence dummy2;
-
-      EXPECT_CALL(a, DoA(2));
-      EXPECT_CALL(a, DoA(3));
-    }
-  }
-
-  EXPECT_NONFATAL_FAILURE(
-      {  // NOLINT
-        a.DoA(1);
-        a.DoA(3);
-      },
-      "Unexpected mock function call");
-
-  a.DoA(2);
-  a.DoA(3);
-}
-
-TEST(InSequenceTest, ExpectationsOutOfScopeAreNotAffected) {
-  MockA a;
-  {
-    InSequence dummy;
-
-    EXPECT_CALL(a, DoA(1));
-    EXPECT_CALL(a, DoA(2));
-  }
-  EXPECT_CALL(a, DoA(3));
-
-  EXPECT_NONFATAL_FAILURE(
-      {  // NOLINT
-        a.DoA(2);
-      },
-      "Unexpected mock function call");
-
-  a.DoA(3);
-  a.DoA(1);
-  a.DoA(2);
-}
-
-// Tests that any order is allowed when no sequence is used.
-TEST(SequenceTest, AnyOrderIsOkByDefault) {
-  {
-    MockA a;
-    MockB b;
-
-    EXPECT_CALL(a, DoA(1));
-    EXPECT_CALL(b, DoB()).Times(AnyNumber());
-
-    a.DoA(1);
-    b.DoB();
-  }
-
-  {  // NOLINT
-    MockA a;
-    MockB b;
-
-    EXPECT_CALL(a, DoA(1));
-    EXPECT_CALL(b, DoB()).Times(AnyNumber());
-
-    b.DoB();
-    a.DoA(1);
-  }
-}
-
-// Tests that the calls must be in strict order when a complete order
-// is specified.
-TEST(SequenceTest, CallsMustBeInStrictOrderWhenSaidSo1) {
-  MockA a;
-  ON_CALL(a, ReturnResult(_)).WillByDefault(Return(Result()));
-
-  Sequence s;
-  EXPECT_CALL(a, ReturnResult(1)).InSequence(s);
-  EXPECT_CALL(a, ReturnResult(2)).InSequence(s);
-  EXPECT_CALL(a, ReturnResult(3)).InSequence(s);
-
-  a.ReturnResult(1);
-
-  // May only be called after a.ReturnResult(2).
-  EXPECT_NONFATAL_FAILURE(a.ReturnResult(3), "Unexpected mock function call");
-
-  a.ReturnResult(2);
-  a.ReturnResult(3);
-}
-
-// Tests that the calls must be in strict order when a complete order
-// is specified.
-TEST(SequenceTest, CallsMustBeInStrictOrderWhenSaidSo2) {
-  MockA a;
-  ON_CALL(a, ReturnResult(_)).WillByDefault(Return(Result()));
-
-  Sequence s;
-  EXPECT_CALL(a, ReturnResult(1)).InSequence(s);
-  EXPECT_CALL(a, ReturnResult(2)).InSequence(s);
-
-  // May only be called after a.ReturnResult(1).
-  EXPECT_NONFATAL_FAILURE(a.ReturnResult(2), "Unexpected mock function call");
-
-  a.ReturnResult(1);
-  a.ReturnResult(2);
-}
-
-// Tests specifying a DAG using multiple sequences.
-class PartialOrderTest : public testing::Test {
- protected:
-  PartialOrderTest() {
-    ON_CALL(a_, ReturnResult(_)).WillByDefault(Return(Result()));
-
-    // Specifies this partial ordering:
-    //
-    // a.ReturnResult(1) ==>
-    //                       a.ReturnResult(2) * n  ==>  a.ReturnResult(3)
-    // b.DoB() * 2       ==>
-    Sequence x, y;
-    EXPECT_CALL(a_, ReturnResult(1)).InSequence(x);
-    EXPECT_CALL(b_, DoB()).Times(2).InSequence(y);
-    EXPECT_CALL(a_, ReturnResult(2)).Times(AnyNumber()).InSequence(x, y);
-    EXPECT_CALL(a_, ReturnResult(3)).InSequence(x);
-  }
-
-  MockA a_;
-  MockB b_;
-};
-
-TEST_F(PartialOrderTest, CallsMustConformToSpecifiedDag1) {
-  a_.ReturnResult(1);
-  b_.DoB();
-
-  // May only be called after the second DoB().
-  EXPECT_NONFATAL_FAILURE(a_.ReturnResult(2), "Unexpected mock function call");
-
-  b_.DoB();
-  a_.ReturnResult(3);
-}
-
-TEST_F(PartialOrderTest, CallsMustConformToSpecifiedDag2) {
-  // May only be called after ReturnResult(1).
-  EXPECT_NONFATAL_FAILURE(a_.ReturnResult(2), "Unexpected mock function call");
-
-  a_.ReturnResult(1);
-  b_.DoB();
-  b_.DoB();
-  a_.ReturnResult(3);
-}
-
-TEST_F(PartialOrderTest, CallsMustConformToSpecifiedDag3) {
-  // May only be called last.
-  EXPECT_NONFATAL_FAILURE(a_.ReturnResult(3), "Unexpected mock function call");
-
-  a_.ReturnResult(1);
-  b_.DoB();
-  b_.DoB();
-  a_.ReturnResult(3);
-}
-
-TEST_F(PartialOrderTest, CallsMustConformToSpecifiedDag4) {
-  a_.ReturnResult(1);
-  b_.DoB();
-  b_.DoB();
-  a_.ReturnResult(3);
-
-  // May only be called before ReturnResult(3).
-  EXPECT_NONFATAL_FAILURE(a_.ReturnResult(2), "Unexpected mock function call");
-}
-
-TEST(SequenceTest, Retirement) {
-  MockA a;
-  Sequence s;
-
-  EXPECT_CALL(a, DoA(1)).InSequence(s);
-  EXPECT_CALL(a, DoA(_)).InSequence(s).RetiresOnSaturation();
-  EXPECT_CALL(a, DoA(1)).InSequence(s);
-
-  a.DoA(1);
-  a.DoA(2);
-  a.DoA(1);
-}
-
-// Tests Expectation.
-
-TEST(ExpectationTest, ConstrutorsWork) {
-  MockA a;
-  Expectation e1;  // Default ctor.
-
-  // Ctor from various forms of EXPECT_CALL.
-  Expectation e2 = EXPECT_CALL(a, DoA(2));
-  Expectation e3 = EXPECT_CALL(a, DoA(3)).With(_);
-  {
-    Sequence s;
-    Expectation e4 = EXPECT_CALL(a, DoA(4)).Times(1);
-    Expectation e5 = EXPECT_CALL(a, DoA(5)).InSequence(s);
-  }
-  Expectation e6 = EXPECT_CALL(a, DoA(6)).After(e2);
-  Expectation e7 = EXPECT_CALL(a, DoA(7)).WillOnce(Return());
-  Expectation e8 = EXPECT_CALL(a, DoA(8)).WillRepeatedly(Return());
-  Expectation e9 = EXPECT_CALL(a, DoA(9)).RetiresOnSaturation();
-
-  Expectation e10 = e2;  // Copy ctor.
-
-  EXPECT_THAT(e1, Ne(e2));
-  EXPECT_THAT(e2, Eq(e10));
-
-  a.DoA(2);
-  a.DoA(3);
-  a.DoA(4);
-  a.DoA(5);
-  a.DoA(6);
-  a.DoA(7);
-  a.DoA(8);
-  a.DoA(9);
-}
-
-TEST(ExpectationTest, AssignmentWorks) {
-  MockA a;
-  Expectation e1;
-  Expectation e2 = EXPECT_CALL(a, DoA(1));
-
-  EXPECT_THAT(e1, Ne(e2));
-
-  e1 = e2;
-  EXPECT_THAT(e1, Eq(e2));
-
-  a.DoA(1);
-}
-
-// Tests ExpectationSet.
-
-TEST(ExpectationSetTest, MemberTypesAreCorrect) {
-  ::testing::StaticAssertTypeEq<Expectation, ExpectationSet::value_type>();
-}
-
-TEST(ExpectationSetTest, ConstructorsWork) {
-  MockA a;
-
-  Expectation e1;
-  const Expectation e2;
-  ExpectationSet es1;                           // Default ctor.
-  ExpectationSet es2 = EXPECT_CALL(a, DoA(1));  // Ctor from EXPECT_CALL.
-  ExpectationSet es3 = e1;                      // Ctor from Expectation.
-  ExpectationSet es4(e1);    // Ctor from Expectation; alternative syntax.
-  ExpectationSet es5 = e2;   // Ctor from const Expectation.
-  ExpectationSet es6(e2);    // Ctor from const Expectation; alternative syntax.
-  ExpectationSet es7 = es2;  // Copy ctor.
-
-  EXPECT_EQ(0, es1.size());
-  EXPECT_EQ(1, es2.size());
-  EXPECT_EQ(1, es3.size());
-  EXPECT_EQ(1, es4.size());
-  EXPECT_EQ(1, es5.size());
-  EXPECT_EQ(1, es6.size());
-  EXPECT_EQ(1, es7.size());
-
-  EXPECT_THAT(es3, Ne(es2));
-  EXPECT_THAT(es4, Eq(es3));
-  EXPECT_THAT(es5, Eq(es4));
-  EXPECT_THAT(es6, Eq(es5));
-  EXPECT_THAT(es7, Eq(es2));
-  a.DoA(1);
-}
-
-TEST(ExpectationSetTest, AssignmentWorks) {
-  ExpectationSet es1;
-  ExpectationSet es2 = Expectation();
-
-  es1 = es2;
-  EXPECT_EQ(1, es1.size());
-  EXPECT_THAT(*(es1.begin()), Eq(Expectation()));
-  EXPECT_THAT(es1, Eq(es2));
-}
-
-TEST(ExpectationSetTest, InsertionWorks) {
-  ExpectationSet es1;
-  Expectation e1;
-  es1 += e1;
-  EXPECT_EQ(1, es1.size());
-  EXPECT_THAT(*(es1.begin()), Eq(e1));
-
-  MockA a;
-  Expectation e2 = EXPECT_CALL(a, DoA(1));
-  es1 += e2;
-  EXPECT_EQ(2, es1.size());
-
-  ExpectationSet::const_iterator it1 = es1.begin();
-  ExpectationSet::const_iterator it2 = it1;
-  ++it2;
-  EXPECT_TRUE(*it1 == e1 || *it2 == e1);  // e1 must be in the set.
-  EXPECT_TRUE(*it1 == e2 || *it2 == e2);  // e2 must be in the set too.
-  a.DoA(1);
-}
-
-TEST(ExpectationSetTest, SizeWorks) {
-  ExpectationSet es;
-  EXPECT_EQ(0, es.size());
-
-  es += Expectation();
-  EXPECT_EQ(1, es.size());
-
-  MockA a;
-  es += EXPECT_CALL(a, DoA(1));
-  EXPECT_EQ(2, es.size());
-
-  a.DoA(1);
-}
-
-TEST(ExpectationSetTest, IsEnumerable) {
-  ExpectationSet es;
-  EXPECT_TRUE(es.begin() == es.end());
-
-  es += Expectation();
-  ExpectationSet::const_iterator it = es.begin();
-  EXPECT_TRUE(it != es.end());
-  EXPECT_THAT(*it, Eq(Expectation()));
-  ++it;
-  EXPECT_TRUE(it == es.end());
-}
-
-// Tests the .After() clause.
-
-TEST(AfterTest, SucceedsWhenPartialOrderIsSatisfied) {
-  MockA a;
-  ExpectationSet es;
-  es += EXPECT_CALL(a, DoA(1));
-  es += EXPECT_CALL(a, DoA(2));
-  EXPECT_CALL(a, DoA(3)).After(es);
-
-  a.DoA(1);
-  a.DoA(2);
-  a.DoA(3);
-}
-
-TEST(AfterTest, SucceedsWhenTotalOrderIsSatisfied) {
-  MockA a;
-  MockB b;
-  // The following also verifies that const Expectation objects work
-  // too.  Do not remove the const modifiers.
-  const Expectation e1 = EXPECT_CALL(a, DoA(1));
-  const Expectation e2 = EXPECT_CALL(b, DoB()).Times(2).After(e1);
-  EXPECT_CALL(a, DoA(2)).After(e2);
-
-  a.DoA(1);
-  b.DoB();
-  b.DoB();
-  a.DoA(2);
-}
-
-// Calls must be in strict order when specified so using .After().
-TEST(AfterTest, CallsMustBeInStrictOrderWhenSpecifiedSo1) {
-  MockA a;
-  MockB b;
-
-  // Define ordering:
-  //   a.DoA(1) ==> b.DoB() ==> a.DoA(2)
-  Expectation e1 = EXPECT_CALL(a, DoA(1));
-  Expectation e2 = EXPECT_CALL(b, DoB()).After(e1);
-  EXPECT_CALL(a, DoA(2)).After(e2);
-
-  a.DoA(1);
-
-  // May only be called after DoB().
-  EXPECT_NONFATAL_FAILURE(a.DoA(2), "Unexpected mock function call");
-
-  b.DoB();
-  a.DoA(2);
-}
-
-// Calls must be in strict order when specified so using .After().
-TEST(AfterTest, CallsMustBeInStrictOrderWhenSpecifiedSo2) {
-  MockA a;
-  MockB b;
-
-  // Define ordering:
-  //   a.DoA(1) ==> b.DoB() * 2 ==> a.DoA(2)
-  Expectation e1 = EXPECT_CALL(a, DoA(1));
-  Expectation e2 = EXPECT_CALL(b, DoB()).Times(2).After(e1);
-  EXPECT_CALL(a, DoA(2)).After(e2);
-
-  a.DoA(1);
-  b.DoB();
-
-  // May only be called after the second DoB().
-  EXPECT_NONFATAL_FAILURE(a.DoA(2), "Unexpected mock function call");
-
-  b.DoB();
-  a.DoA(2);
-}
-
-// Calls must satisfy the partial order when specified so.
-TEST(AfterTest, CallsMustSatisfyPartialOrderWhenSpecifiedSo) {
-  MockA a;
-  ON_CALL(a, ReturnResult(_)).WillByDefault(Return(Result()));
-
-  // Define ordering:
-  //   a.DoA(1) ==>
-  //   a.DoA(2) ==> a.ReturnResult(3)
-  Expectation e = EXPECT_CALL(a, DoA(1));
-  const ExpectationSet es = EXPECT_CALL(a, DoA(2));
-  EXPECT_CALL(a, ReturnResult(3)).After(e, es);
-
-  // May only be called last.
-  EXPECT_NONFATAL_FAILURE(a.ReturnResult(3), "Unexpected mock function call");
-
-  a.DoA(2);
-  a.DoA(1);
-  a.ReturnResult(3);
-}
-
-// Calls must satisfy the partial order when specified so.
-TEST(AfterTest, CallsMustSatisfyPartialOrderWhenSpecifiedSo2) {
-  MockA a;
-
-  // Define ordering:
-  //   a.DoA(1) ==>
-  //   a.DoA(2) ==> a.DoA(3)
-  Expectation e = EXPECT_CALL(a, DoA(1));
-  const ExpectationSet es = EXPECT_CALL(a, DoA(2));
-  EXPECT_CALL(a, DoA(3)).After(e, es);
-
-  a.DoA(2);
-
-  // May only be called last.
-  EXPECT_NONFATAL_FAILURE(a.DoA(3), "Unexpected mock function call");
-
-  a.DoA(1);
-  a.DoA(3);
-}
-
-// .After() can be combined with .InSequence().
-TEST(AfterTest, CanBeUsedWithInSequence) {
-  MockA a;
-  Sequence s;
-  Expectation e = EXPECT_CALL(a, DoA(1));
-  EXPECT_CALL(a, DoA(2)).InSequence(s);
-  EXPECT_CALL(a, DoA(3)).InSequence(s).After(e);
-
-  a.DoA(1);
-
-  // May only be after DoA(2).
-  EXPECT_NONFATAL_FAILURE(a.DoA(3), "Unexpected mock function call");
-
-  a.DoA(2);
-  a.DoA(3);
-}
-
-// .After() can be called multiple times.
-TEST(AfterTest, CanBeCalledManyTimes) {
-  MockA a;
-  Expectation e1 = EXPECT_CALL(a, DoA(1));
-  Expectation e2 = EXPECT_CALL(a, DoA(2));
-  Expectation e3 = EXPECT_CALL(a, DoA(3));
-  EXPECT_CALL(a, DoA(4)).After(e1).After(e2).After(e3);
-
-  a.DoA(3);
-  a.DoA(1);
-  a.DoA(2);
-  a.DoA(4);
-}
-
-// .After() accepts up to 5 arguments.
-TEST(AfterTest, AcceptsUpToFiveArguments) {
-  MockA a;
-  Expectation e1 = EXPECT_CALL(a, DoA(1));
-  Expectation e2 = EXPECT_CALL(a, DoA(2));
-  Expectation e3 = EXPECT_CALL(a, DoA(3));
-  ExpectationSet es1 = EXPECT_CALL(a, DoA(4));
-  ExpectationSet es2 = EXPECT_CALL(a, DoA(5));
-  EXPECT_CALL(a, DoA(6)).After(e1, e2, e3, es1, es2);
-
-  a.DoA(5);
-  a.DoA(2);
-  a.DoA(4);
-  a.DoA(1);
-  a.DoA(3);
-  a.DoA(6);
-}
-
-// .After() allows input to contain duplicated Expectations.
-TEST(AfterTest, AcceptsDuplicatedInput) {
-  MockA a;
-  ON_CALL(a, ReturnResult(_)).WillByDefault(Return(Result()));
-
-  // Define ordering:
-  //   DoA(1) ==>
-  //   DoA(2) ==> ReturnResult(3)
-  Expectation e1 = EXPECT_CALL(a, DoA(1));
-  Expectation e2 = EXPECT_CALL(a, DoA(2));
-  ExpectationSet es;
-  es += e1;
-  es += e2;
-  EXPECT_CALL(a, ReturnResult(3)).After(e1, e2, es, e1);
-
-  a.DoA(1);
-
-  // May only be after DoA(2).
-  EXPECT_NONFATAL_FAILURE(a.ReturnResult(3), "Unexpected mock function call");
-
-  a.DoA(2);
-  a.ReturnResult(3);
-}
-
-// An Expectation added to an ExpectationSet after it has been used in
-// an .After() has no effect.
-TEST(AfterTest, ChangesToExpectationSetHaveNoEffectAfterwards) {
-  MockA a;
-  ExpectationSet es1 = EXPECT_CALL(a, DoA(1));
-  Expectation e2 = EXPECT_CALL(a, DoA(2));
-  EXPECT_CALL(a, DoA(3)).After(es1);
-  es1 += e2;
-
-  a.DoA(1);
-  a.DoA(3);
-  a.DoA(2);
-}
-
-// Tests that Google Mock correctly handles calls to mock functions
-// after a mock object owning one of their pre-requisites has died.
-
-// Tests that calls that satisfy the original spec are successful.
-TEST(DeletingMockEarlyTest, Success1) {
-  MockB* const b1 = new MockB;
-  MockA* const a = new MockA;
-  MockB* const b2 = new MockB;
-
-  {
-    InSequence dummy;
-    EXPECT_CALL(*b1, DoB(_)).WillOnce(Return(1));
-    EXPECT_CALL(*a, Binary(_, _))
-        .Times(AnyNumber())
-        .WillRepeatedly(Return(true));
-    EXPECT_CALL(*b2, DoB(_)).Times(AnyNumber()).WillRepeatedly(Return(2));
-  }
-
-  EXPECT_EQ(1, b1->DoB(1));
-  delete b1;
-  // a's pre-requisite has died.
-  EXPECT_TRUE(a->Binary(0, 1));
-  delete b2;
-  // a's successor has died.
-  EXPECT_TRUE(a->Binary(1, 2));
-  delete a;
-}
-
-// Tests that calls that satisfy the original spec are successful.
-TEST(DeletingMockEarlyTest, Success2) {
-  MockB* const b1 = new MockB;
-  MockA* const a = new MockA;
-  MockB* const b2 = new MockB;
-
-  {
-    InSequence dummy;
-    EXPECT_CALL(*b1, DoB(_)).WillOnce(Return(1));
-    EXPECT_CALL(*a, Binary(_, _)).Times(AnyNumber());
-    EXPECT_CALL(*b2, DoB(_)).Times(AnyNumber()).WillRepeatedly(Return(2));
-  }
-
-  delete a;  // a is trivially satisfied.
-  EXPECT_EQ(1, b1->DoB(1));
-  EXPECT_EQ(2, b2->DoB(2));
-  delete b1;
-  delete b2;
-}
-
-// Tests that it's OK to delete a mock object itself in its action.
-
-// Suppresses warning on unreferenced formal parameter in MSVC with
-// -W4.
-#ifdef _MSC_VER
-#pragma warning(push)
-#pragma warning(disable : 4100)
-#endif
-
-ACTION_P(Delete, ptr) { delete ptr; }
-
-#ifdef _MSC_VER
-#pragma warning(pop)
-#endif
-
-TEST(DeletingMockEarlyTest, CanDeleteSelfInActionReturningVoid) {
-  MockA* const a = new MockA;
-  EXPECT_CALL(*a, DoA(_)).WillOnce(Delete(a));
-  a->DoA(42);  // This will cause a to be deleted.
-}
-
-TEST(DeletingMockEarlyTest, CanDeleteSelfInActionReturningValue) {
-  MockA* const a = new MockA;
-  EXPECT_CALL(*a, ReturnResult(_)).WillOnce(DoAll(Delete(a), Return(Result())));
-  a->ReturnResult(42);  // This will cause a to be deleted.
-}
-
-// Tests that calls that violate the original spec yield failures.
-TEST(DeletingMockEarlyTest, Failure1) {
-  MockB* const b1 = new MockB;
-  MockA* const a = new MockA;
-  MockB* const b2 = new MockB;
-
-  {
-    InSequence dummy;
-    EXPECT_CALL(*b1, DoB(_)).WillOnce(Return(1));
-    EXPECT_CALL(*a, Binary(_, _)).Times(AnyNumber());
-    EXPECT_CALL(*b2, DoB(_)).Times(AnyNumber()).WillRepeatedly(Return(2));
-  }
-
-  delete a;  // a is trivially satisfied.
-  EXPECT_NONFATAL_FAILURE({ b2->DoB(2); }, "Unexpected mock function call");
-  EXPECT_EQ(1, b1->DoB(1));
-  delete b1;
-  delete b2;
-}
-
-// Tests that calls that violate the original spec yield failures.
-TEST(DeletingMockEarlyTest, Failure2) {
-  MockB* const b1 = new MockB;
-  MockA* const a = new MockA;
-  MockB* const b2 = new MockB;
-
-  {
-    InSequence dummy;
-    EXPECT_CALL(*b1, DoB(_));
-    EXPECT_CALL(*a, Binary(_, _)).Times(AnyNumber());
-    EXPECT_CALL(*b2, DoB(_)).Times(AnyNumber());
-  }
-
-  EXPECT_NONFATAL_FAILURE(delete b1, "Actual: never called");
-  EXPECT_NONFATAL_FAILURE(a->Binary(0, 1), "Unexpected mock function call");
-  EXPECT_NONFATAL_FAILURE(b2->DoB(1), "Unexpected mock function call");
-  delete a;
-  delete b2;
-}
-
-class EvenNumberCardinality : public CardinalityInterface {
- public:
-  // Returns true if and only if call_count calls will satisfy this
-  // cardinality.
-  bool IsSatisfiedByCallCount(int call_count) const override {
-    return call_count % 2 == 0;
-  }
-
-  // Returns true if and only if call_count calls will saturate this
-  // cardinality.
-  bool IsSaturatedByCallCount(int /* call_count */) const override {
-    return false;
-  }
-
-  // Describes self to an ostream.
-  void DescribeTo(::std::ostream* os) const override {
-    *os << "called even number of times";
-  }
-};
-
-Cardinality EvenNumber() { return Cardinality(new EvenNumberCardinality); }
-
-TEST(ExpectationBaseTest,
-     AllPrerequisitesAreSatisfiedWorksForNonMonotonicCardinality) {
-  MockA* a = new MockA;
-  Sequence s;
-
-  EXPECT_CALL(*a, DoA(1)).Times(EvenNumber()).InSequence(s);
-  EXPECT_CALL(*a, DoA(2)).Times(AnyNumber()).InSequence(s);
-  EXPECT_CALL(*a, DoA(3)).Times(AnyNumber());
-
-  a->DoA(3);
-  a->DoA(1);
-  EXPECT_NONFATAL_FAILURE(a->DoA(2), "Unexpected mock function call");
-  EXPECT_NONFATAL_FAILURE(delete a, "to be called even number of times");
-}
-
-// The following tests verify the message generated when a mock
-// function is called.
-
-struct Printable {};
-
-inline void operator<<(::std::ostream& os, const Printable&) {
-  os << "Printable";
-}
-
-struct Unprintable {
-  Unprintable() : value(0) {}
-  int value;
-};
-
-class MockC {
- public:
-  MockC() {}
-
-  MOCK_METHOD6(VoidMethod, void(bool cond, int n, std::string s, void* p,
-                                const Printable& x, Unprintable y));
-  MOCK_METHOD0(NonVoidMethod, int());  // NOLINT
-
- private:
-  MockC(const MockC&) = delete;
-  MockC& operator=(const MockC&) = delete;
-};
-
-class VerboseFlagPreservingFixture : public testing::Test {
- protected:
-  VerboseFlagPreservingFixture()
-      : saved_verbose_flag_(GMOCK_FLAG_GET(verbose)) {}
-
-  ~VerboseFlagPreservingFixture() override {
-    GMOCK_FLAG_SET(verbose, saved_verbose_flag_);
-  }
-
- private:
-  const std::string saved_verbose_flag_;
-
-  VerboseFlagPreservingFixture(const VerboseFlagPreservingFixture&) = delete;
-  VerboseFlagPreservingFixture& operator=(const VerboseFlagPreservingFixture&) =
-      delete;
-};
-
-#if GTEST_HAS_STREAM_REDIRECTION
-
-// Tests that an uninteresting mock function call on a naggy mock
-// generates a warning without the stack trace when
-// --gmock_verbose=warning is specified.
-TEST(FunctionCallMessageTest,
-     UninterestingCallOnNaggyMockGeneratesNoStackTraceWhenVerboseWarning) {
-  GMOCK_FLAG_SET(verbose, kWarningVerbosity);
-  NaggyMock<MockC> c;
-  CaptureStdout();
-  c.VoidMethod(false, 5, "Hi", nullptr, Printable(), Unprintable());
-  const std::string output = GetCapturedStdout();
-  EXPECT_PRED_FORMAT2(IsSubstring, "GMOCK WARNING", output);
-  EXPECT_PRED_FORMAT2(IsNotSubstring, "Stack trace:", output);
-}
-
-// Tests that an uninteresting mock function call on a naggy mock
-// generates a warning containing the stack trace when
-// --gmock_verbose=info is specified.
-TEST(FunctionCallMessageTest,
-     UninterestingCallOnNaggyMockGeneratesFyiWithStackTraceWhenVerboseInfo) {
-  GMOCK_FLAG_SET(verbose, kInfoVerbosity);
-  NaggyMock<MockC> c;
-  CaptureStdout();
-  c.VoidMethod(false, 5, "Hi", nullptr, Printable(), Unprintable());
-  const std::string output = GetCapturedStdout();
-  EXPECT_PRED_FORMAT2(IsSubstring, "GMOCK WARNING", output);
-  EXPECT_PRED_FORMAT2(IsSubstring, "Stack trace:", output);
-
-#ifndef NDEBUG
-
-  // We check the stack trace content in dbg-mode only, as opt-mode
-  // may inline the call we are interested in seeing.
-
-  // Verifies that a void mock function's name appears in the stack
-  // trace.
-  EXPECT_PRED_FORMAT2(IsSubstring, "VoidMethod(", output);
-
-  // Verifies that a non-void mock function's name appears in the
-  // stack trace.
-  CaptureStdout();
-  c.NonVoidMethod();
-  const std::string output2 = GetCapturedStdout();
-  EXPECT_PRED_FORMAT2(IsSubstring, "NonVoidMethod(", output2);
-
-#endif  // NDEBUG
-}
-
-// Tests that an uninteresting mock function call on a naggy mock
-// causes the function arguments and return value to be printed.
-TEST(FunctionCallMessageTest,
-     UninterestingCallOnNaggyMockPrintsArgumentsAndReturnValue) {
-  // A non-void mock function.
-  NaggyMock<MockB> b;
-  CaptureStdout();
-  b.DoB();
-  const std::string output1 = GetCapturedStdout();
-  EXPECT_PRED_FORMAT2(
-      IsSubstring,
-      "Uninteresting mock function call - returning default value.\n"
-      "    Function call: DoB()\n"
-      "          Returns: 0\n",
-      output1.c_str());
-  // Makes sure the return value is printed.
-
-  // A void mock function.
-  NaggyMock<MockC> c;
-  CaptureStdout();
-  c.VoidMethod(false, 5, "Hi", nullptr, Printable(), Unprintable());
-  const std::string output2 = GetCapturedStdout();
-  EXPECT_THAT(
-      output2.c_str(),
-      ContainsRegex("Uninteresting mock function call - returning directly\\.\n"
-                    "    Function call: VoidMethod"
-                    "\\(false, 5, \"Hi\", NULL, @.+ "
-                    "Printable, 4-byte object <00-00 00-00>\\)"));
-  // A void function has no return value to print.
-}
-
-// Tests how the --gmock_verbose flag affects Google Mock's output.
-
-class GMockVerboseFlagTest : public VerboseFlagPreservingFixture {
- public:
-  // Verifies that the given Google Mock output is correct.  (When
-  // should_print is true, the output should match the given regex and
-  // contain the given function name in the stack trace.  When it's
-  // false, the output should be empty.)
-  void VerifyOutput(const std::string& output, bool should_print,
-                    const std::string& expected_substring,
-                    const std::string& function_name) {
-    if (should_print) {
-      EXPECT_THAT(output.c_str(), HasSubstr(expected_substring));
-#ifndef NDEBUG
-      // We check the stack trace content in dbg-mode only, as opt-mode
-      // may inline the call we are interested in seeing.
-      EXPECT_THAT(output.c_str(), HasSubstr(function_name));
-#else
-      // Suppresses 'unused function parameter' warnings.
-      static_cast<void>(function_name);
-#endif  // NDEBUG
-    } else {
-      EXPECT_STREQ("", output.c_str());
-    }
-  }
-
-  // Tests how the flag affects expected calls.
-  void TestExpectedCall(bool should_print) {
-    MockA a;
-    EXPECT_CALL(a, DoA(5));
-    EXPECT_CALL(a, Binary(_, 1)).WillOnce(Return(true));
-
-    // A void-returning function.
-    CaptureStdout();
-    a.DoA(5);
-    VerifyOutput(GetCapturedStdout(), should_print,
-                 "Mock function call matches EXPECT_CALL(a, DoA(5))...\n"
-                 "    Function call: DoA(5)\n"
-                 "Stack trace:\n",
-                 "DoA");
-
-    // A non-void-returning function.
-    CaptureStdout();
-    a.Binary(2, 1);
-    VerifyOutput(GetCapturedStdout(), should_print,
-                 "Mock function call matches EXPECT_CALL(a, Binary(_, 1))...\n"
-                 "    Function call: Binary(2, 1)\n"
-                 "          Returns: true\n"
-                 "Stack trace:\n",
-                 "Binary");
-  }
-
-  // Tests how the flag affects uninteresting calls on a naggy mock.
-  void TestUninterestingCallOnNaggyMock(bool should_print) {
-    NaggyMock<MockA> a;
-    const std::string note =
-        "NOTE: You can safely ignore the above warning unless this "
-        "call should not happen.  Do not suppress it by blindly adding "
-        "an EXPECT_CALL() if you don't mean to enforce the call.  "
-        "See "
-        "https://github.com/google/googletest/blob/main/docs/"
-        "gmock_cook_book.md#"
-        "knowing-when-to-expect for details.";
-
-    // A void-returning function.
-    CaptureStdout();
-    a.DoA(5);
-    VerifyOutput(GetCapturedStdout(), should_print,
-                 "\nGMOCK WARNING:\n"
-                 "Uninteresting mock function call - returning directly.\n"
-                 "    Function call: DoA(5)\n" +
-                     note,
-                 "DoA");
-
-    // A non-void-returning function.
-    CaptureStdout();
-    a.Binary(2, 1);
-    VerifyOutput(GetCapturedStdout(), should_print,
-                 "\nGMOCK WARNING:\n"
-                 "Uninteresting mock function call - returning default value.\n"
-                 "    Function call: Binary(2, 1)\n"
-                 "          Returns: false\n" +
-                     note,
-                 "Binary");
-  }
-};
-
-// Tests that --gmock_verbose=info causes both expected and
-// uninteresting calls to be reported.
-TEST_F(GMockVerboseFlagTest, Info) {
-  GMOCK_FLAG_SET(verbose, kInfoVerbosity);
-  TestExpectedCall(true);
-  TestUninterestingCallOnNaggyMock(true);
-}
-
-// Tests that --gmock_verbose=warning causes uninteresting calls to be
-// reported.
-TEST_F(GMockVerboseFlagTest, Warning) {
-  GMOCK_FLAG_SET(verbose, kWarningVerbosity);
-  TestExpectedCall(false);
-  TestUninterestingCallOnNaggyMock(true);
-}
-
-// Tests that --gmock_verbose=warning causes neither expected nor
-// uninteresting calls to be reported.
-TEST_F(GMockVerboseFlagTest, Error) {
-  GMOCK_FLAG_SET(verbose, kErrorVerbosity);
-  TestExpectedCall(false);
-  TestUninterestingCallOnNaggyMock(false);
-}
-
-// Tests that --gmock_verbose=SOME_INVALID_VALUE has the same effect
-// as --gmock_verbose=warning.
-TEST_F(GMockVerboseFlagTest, InvalidFlagIsTreatedAsWarning) {
-  GMOCK_FLAG_SET(verbose, "invalid");  // Treated as "warning".
-  TestExpectedCall(false);
-  TestUninterestingCallOnNaggyMock(true);
-}
-
-#endif  // GTEST_HAS_STREAM_REDIRECTION
-
-// A helper class that generates a failure when printed.  We use it to
-// ensure that Google Mock doesn't print a value (even to an internal
-// buffer) when it is not supposed to do so.
-class PrintMeNot {};
-
-void PrintTo(PrintMeNot /* dummy */, ::std::ostream* /* os */) {
-  ADD_FAILURE() << "Google Mock is printing a value that shouldn't be "
-                << "printed even to an internal buffer.";
-}
-
-class LogTestHelper {
- public:
-  LogTestHelper() {}
-
-  MOCK_METHOD1(Foo, PrintMeNot(PrintMeNot));
-
- private:
-  LogTestHelper(const LogTestHelper&) = delete;
-  LogTestHelper& operator=(const LogTestHelper&) = delete;
-};
-
-class GMockLogTest : public VerboseFlagPreservingFixture {
- protected:
-  LogTestHelper helper_;
-};
-
-TEST_F(GMockLogTest, DoesNotPrintGoodCallInternallyIfVerbosityIsWarning) {
-  GMOCK_FLAG_SET(verbose, kWarningVerbosity);
-  EXPECT_CALL(helper_, Foo(_)).WillOnce(Return(PrintMeNot()));
-  helper_.Foo(PrintMeNot());  // This is an expected call.
-}
-
-TEST_F(GMockLogTest, DoesNotPrintGoodCallInternallyIfVerbosityIsError) {
-  GMOCK_FLAG_SET(verbose, kErrorVerbosity);
-  EXPECT_CALL(helper_, Foo(_)).WillOnce(Return(PrintMeNot()));
-  helper_.Foo(PrintMeNot());  // This is an expected call.
-}
-
-TEST_F(GMockLogTest, DoesNotPrintWarningInternallyIfVerbosityIsError) {
-  GMOCK_FLAG_SET(verbose, kErrorVerbosity);
-  ON_CALL(helper_, Foo(_)).WillByDefault(Return(PrintMeNot()));
-  helper_.Foo(PrintMeNot());  // This should generate a warning.
-}
-
-// Tests Mock::AllowLeak().
-
-TEST(AllowLeakTest, AllowsLeakingUnusedMockObject) {
-  MockA* a = new MockA;
-  Mock::AllowLeak(a);
-}
-
-TEST(AllowLeakTest, CanBeCalledBeforeOnCall) {
-  MockA* a = new MockA;
-  Mock::AllowLeak(a);
-  ON_CALL(*a, DoA(_)).WillByDefault(Return());
-  a->DoA(0);
-}
-
-TEST(AllowLeakTest, CanBeCalledAfterOnCall) {
-  MockA* a = new MockA;
-  ON_CALL(*a, DoA(_)).WillByDefault(Return());
-  Mock::AllowLeak(a);
-}
-
-TEST(AllowLeakTest, CanBeCalledBeforeExpectCall) {
-  MockA* a = new MockA;
-  Mock::AllowLeak(a);
-  EXPECT_CALL(*a, DoA(_));
-  a->DoA(0);
-}
-
-TEST(AllowLeakTest, CanBeCalledAfterExpectCall) {
-  MockA* a = new MockA;
-  EXPECT_CALL(*a, DoA(_)).Times(AnyNumber());
-  Mock::AllowLeak(a);
-}
-
-TEST(AllowLeakTest, WorksWhenBothOnCallAndExpectCallArePresent) {
-  MockA* a = new MockA;
-  ON_CALL(*a, DoA(_)).WillByDefault(Return());
-  EXPECT_CALL(*a, DoA(_)).Times(AnyNumber());
-  Mock::AllowLeak(a);
-}
-
-// Tests that we can verify and clear a mock object's expectations
-// when none of its methods has expectations.
-TEST(VerifyAndClearExpectationsTest, NoMethodHasExpectations) {
-  MockB b;
-  ASSERT_TRUE(Mock::VerifyAndClearExpectations(&b));
-
-  // There should be no expectations on the methods now, so we can
-  // freely call them.
-  EXPECT_EQ(0, b.DoB());
-  EXPECT_EQ(0, b.DoB(1));
-}
-
-// Tests that we can verify and clear a mock object's expectations
-// when some, but not all, of its methods have expectations *and* the
-// verification succeeds.
-TEST(VerifyAndClearExpectationsTest, SomeMethodsHaveExpectationsAndSucceed) {
-  MockB b;
-  EXPECT_CALL(b, DoB()).WillOnce(Return(1));
-  b.DoB();
-  ASSERT_TRUE(Mock::VerifyAndClearExpectations(&b));
-
-  // There should be no expectations on the methods now, so we can
-  // freely call them.
-  EXPECT_EQ(0, b.DoB());
-  EXPECT_EQ(0, b.DoB(1));
-}
-
-// Tests that we can verify and clear a mock object's expectations
-// when some, but not all, of its methods have expectations *and* the
-// verification fails.
-TEST(VerifyAndClearExpectationsTest, SomeMethodsHaveExpectationsAndFail) {
-  MockB b;
-  EXPECT_CALL(b, DoB()).WillOnce(Return(1));
-  bool result = true;
-  EXPECT_NONFATAL_FAILURE(result = Mock::VerifyAndClearExpectations(&b),
-                          "Actual: never called");
-  ASSERT_FALSE(result);
-
-  // There should be no expectations on the methods now, so we can
-  // freely call them.
-  EXPECT_EQ(0, b.DoB());
-  EXPECT_EQ(0, b.DoB(1));
-}
-
-// Tests that we can verify and clear a mock object's expectations
-// when all of its methods have expectations.
-TEST(VerifyAndClearExpectationsTest, AllMethodsHaveExpectations) {
-  MockB b;
-  EXPECT_CALL(b, DoB()).WillOnce(Return(1));
-  EXPECT_CALL(b, DoB(_)).WillOnce(Return(2));
-  b.DoB();
-  b.DoB(1);
-  ASSERT_TRUE(Mock::VerifyAndClearExpectations(&b));
-
-  // There should be no expectations on the methods now, so we can
-  // freely call them.
-  EXPECT_EQ(0, b.DoB());
-  EXPECT_EQ(0, b.DoB(1));
-}
-
-// Tests that we can verify and clear a mock object's expectations
-// when a method has more than one expectation.
-TEST(VerifyAndClearExpectationsTest, AMethodHasManyExpectations) {
-  MockB b;
-  EXPECT_CALL(b, DoB(0)).WillOnce(Return(1));
-  EXPECT_CALL(b, DoB(_)).WillOnce(Return(2));
-  b.DoB(1);
-  bool result = true;
-  EXPECT_NONFATAL_FAILURE(result = Mock::VerifyAndClearExpectations(&b),
-                          "Actual: never called");
-  ASSERT_FALSE(result);
-
-  // There should be no expectations on the methods now, so we can
-  // freely call them.
-  EXPECT_EQ(0, b.DoB());
-  EXPECT_EQ(0, b.DoB(1));
-}
-
-// Tests that we can call VerifyAndClearExpectations() on the same
-// mock object multiple times.
-TEST(VerifyAndClearExpectationsTest, CanCallManyTimes) {
-  MockB b;
-  EXPECT_CALL(b, DoB());
-  b.DoB();
-  Mock::VerifyAndClearExpectations(&b);
-
-  EXPECT_CALL(b, DoB(_)).WillOnce(Return(1));
-  b.DoB(1);
-  Mock::VerifyAndClearExpectations(&b);
-  Mock::VerifyAndClearExpectations(&b);
-
-  // There should be no expectations on the methods now, so we can
-  // freely call them.
-  EXPECT_EQ(0, b.DoB());
-  EXPECT_EQ(0, b.DoB(1));
-}
-
-// Tests that we can clear a mock object's default actions when none
-// of its methods has default actions.
-TEST(VerifyAndClearTest, NoMethodHasDefaultActions) {
-  MockB b;
-  // If this crashes or generates a failure, the test will catch it.
-  Mock::VerifyAndClear(&b);
-  EXPECT_EQ(0, b.DoB());
-}
-
-// Tests that we can clear a mock object's default actions when some,
-// but not all of its methods have default actions.
-TEST(VerifyAndClearTest, SomeMethodsHaveDefaultActions) {
-  MockB b;
-  ON_CALL(b, DoB()).WillByDefault(Return(1));
-
-  Mock::VerifyAndClear(&b);
-
-  // Verifies that the default action of int DoB() was removed.
-  EXPECT_EQ(0, b.DoB());
-}
-
-// Tests that we can clear a mock object's default actions when all of
-// its methods have default actions.
-TEST(VerifyAndClearTest, AllMethodsHaveDefaultActions) {
-  MockB b;
-  ON_CALL(b, DoB()).WillByDefault(Return(1));
-  ON_CALL(b, DoB(_)).WillByDefault(Return(2));
-
-  Mock::VerifyAndClear(&b);
-
-  // Verifies that the default action of int DoB() was removed.
-  EXPECT_EQ(0, b.DoB());
-
-  // Verifies that the default action of int DoB(int) was removed.
-  EXPECT_EQ(0, b.DoB(0));
-}
-
-// Tests that we can clear a mock object's default actions when a
-// method has more than one ON_CALL() set on it.
-TEST(VerifyAndClearTest, AMethodHasManyDefaultActions) {
-  MockB b;
-  ON_CALL(b, DoB(0)).WillByDefault(Return(1));
-  ON_CALL(b, DoB(_)).WillByDefault(Return(2));
-
-  Mock::VerifyAndClear(&b);
-
-  // Verifies that the default actions (there are two) of int DoB(int)
-  // were removed.
-  EXPECT_EQ(0, b.DoB(0));
-  EXPECT_EQ(0, b.DoB(1));
-}
-
-// Tests that we can call VerifyAndClear() on a mock object multiple
-// times.
-TEST(VerifyAndClearTest, CanCallManyTimes) {
-  MockB b;
-  ON_CALL(b, DoB()).WillByDefault(Return(1));
-  Mock::VerifyAndClear(&b);
-  Mock::VerifyAndClear(&b);
-
-  ON_CALL(b, DoB(_)).WillByDefault(Return(1));
-  Mock::VerifyAndClear(&b);
-
-  EXPECT_EQ(0, b.DoB());
-  EXPECT_EQ(0, b.DoB(1));
-}
-
-// Tests that VerifyAndClear() works when the verification succeeds.
-TEST(VerifyAndClearTest, Success) {
-  MockB b;
-  ON_CALL(b, DoB()).WillByDefault(Return(1));
-  EXPECT_CALL(b, DoB(1)).WillOnce(Return(2));
-
-  b.DoB();
-  b.DoB(1);
-  ASSERT_TRUE(Mock::VerifyAndClear(&b));
-
-  // There should be no expectations on the methods now, so we can
-  // freely call them.
-  EXPECT_EQ(0, b.DoB());
-  EXPECT_EQ(0, b.DoB(1));
-}
-
-// Tests that VerifyAndClear() works when the verification fails.
-TEST(VerifyAndClearTest, Failure) {
-  MockB b;
-  ON_CALL(b, DoB(_)).WillByDefault(Return(1));
-  EXPECT_CALL(b, DoB()).WillOnce(Return(2));
-
-  b.DoB(1);
-  bool result = true;
-  EXPECT_NONFATAL_FAILURE(result = Mock::VerifyAndClear(&b),
-                          "Actual: never called");
-  ASSERT_FALSE(result);
-
-  // There should be no expectations on the methods now, so we can
-  // freely call them.
-  EXPECT_EQ(0, b.DoB());
-  EXPECT_EQ(0, b.DoB(1));
-}
-
-// Tests that VerifyAndClear() works when the default actions and
-// expectations are set on a const mock object.
-TEST(VerifyAndClearTest, Const) {
-  MockB b;
-  ON_CALL(Const(b), DoB()).WillByDefault(Return(1));
-
-  EXPECT_CALL(Const(b), DoB()).WillOnce(DoDefault()).WillOnce(Return(2));
-
-  b.DoB();
-  b.DoB();
-  ASSERT_TRUE(Mock::VerifyAndClear(&b));
-
-  // There should be no expectations on the methods now, so we can
-  // freely call them.
-  EXPECT_EQ(0, b.DoB());
-  EXPECT_EQ(0, b.DoB(1));
-}
-
-// Tests that we can set default actions and expectations on a mock
-// object after VerifyAndClear() has been called on it.
-TEST(VerifyAndClearTest, CanSetDefaultActionsAndExpectationsAfterwards) {
-  MockB b;
-  ON_CALL(b, DoB()).WillByDefault(Return(1));
-  EXPECT_CALL(b, DoB(_)).WillOnce(Return(2));
-  b.DoB(1);
-
-  Mock::VerifyAndClear(&b);
-
-  EXPECT_CALL(b, DoB()).WillOnce(Return(3));
-  ON_CALL(b, DoB(_)).WillByDefault(Return(4));
-
-  EXPECT_EQ(3, b.DoB());
-  EXPECT_EQ(4, b.DoB(1));
-}
-
-// Tests that calling VerifyAndClear() on one mock object does not
-// affect other mock objects (either of the same type or not).
-TEST(VerifyAndClearTest, DoesNotAffectOtherMockObjects) {
-  MockA a;
-  MockB b1;
-  MockB b2;
-
-  ON_CALL(a, Binary(_, _)).WillByDefault(Return(true));
-  EXPECT_CALL(a, Binary(_, _)).WillOnce(DoDefault()).WillOnce(Return(false));
-
-  ON_CALL(b1, DoB()).WillByDefault(Return(1));
-  EXPECT_CALL(b1, DoB(_)).WillOnce(Return(2));
-
-  ON_CALL(b2, DoB()).WillByDefault(Return(3));
-  EXPECT_CALL(b2, DoB(_));
-
-  b2.DoB(0);
-  Mock::VerifyAndClear(&b2);
-
-  // Verifies that the default actions and expectations of a and b1
-  // are still in effect.
-  EXPECT_TRUE(a.Binary(0, 0));
-  EXPECT_FALSE(a.Binary(0, 0));
-
-  EXPECT_EQ(1, b1.DoB());
-  EXPECT_EQ(2, b1.DoB(0));
-}
-
-TEST(VerifyAndClearTest,
-     DestroyingChainedMocksDoesNotDeadlockThroughExpectations) {
-  std::shared_ptr<MockA> a(new MockA);
-  ReferenceHoldingMock test_mock;
-
-  // EXPECT_CALL stores a reference to a inside test_mock.
-  EXPECT_CALL(test_mock, AcceptReference(_))
-      .WillRepeatedly(SetArgPointee<0>(a));
-
-  // Throw away the reference to the mock that we have in a. After this, the
-  // only reference to it is stored by test_mock.
-  a.reset();
-
-  // When test_mock goes out of scope, it destroys the last remaining reference
-  // to the mock object originally pointed to by a. This will cause the MockA
-  // destructor to be called from inside the ReferenceHoldingMock destructor.
-  // The state of all mocks is protected by a single global lock, but there
-  // should be no deadlock.
-}
-
-TEST(VerifyAndClearTest,
-     DestroyingChainedMocksDoesNotDeadlockThroughDefaultAction) {
-  std::shared_ptr<MockA> a(new MockA);
-  ReferenceHoldingMock test_mock;
-
-  // ON_CALL stores a reference to a inside test_mock.
-  ON_CALL(test_mock, AcceptReference(_)).WillByDefault(SetArgPointee<0>(a));
-
-  // Throw away the reference to the mock that we have in a. After this, the
-  // only reference to it is stored by test_mock.
-  a.reset();
-
-  // When test_mock goes out of scope, it destroys the last remaining reference
-  // to the mock object originally pointed to by a. This will cause the MockA
-  // destructor to be called from inside the ReferenceHoldingMock destructor.
-  // The state of all mocks is protected by a single global lock, but there
-  // should be no deadlock.
-}
-
-// Tests that a mock function's action can call a mock function
-// (either the same function or a different one) either as an explicit
-// action or as a default action without causing a dead lock.  It
-// verifies that the action is not performed inside the critical
-// section.
-TEST(SynchronizationTest, CanCallMockMethodInAction) {
-  MockA a;
-  MockC c;
-  ON_CALL(a, DoA(_)).WillByDefault(
-      IgnoreResult(InvokeWithoutArgs(&c, &MockC::NonVoidMethod)));
-  EXPECT_CALL(a, DoA(1));
-  EXPECT_CALL(a, DoA(1))
-      .WillOnce(Invoke(&a, &MockA::DoA))
-      .RetiresOnSaturation();
-  EXPECT_CALL(c, NonVoidMethod());
-
-  a.DoA(1);
-  // This will match the second EXPECT_CALL() and trigger another a.DoA(1),
-  // which will in turn match the first EXPECT_CALL() and trigger a call to
-  // c.NonVoidMethod() that was specified by the ON_CALL() since the first
-  // EXPECT_CALL() did not specify an action.
-}
-
-TEST(ParameterlessExpectationsTest, CanSetExpectationsWithoutMatchers) {
-  MockA a;
-  int do_a_arg0 = 0;
-  ON_CALL(a, DoA).WillByDefault(SaveArg<0>(&do_a_arg0));
-  int do_a_47_arg0 = 0;
-  ON_CALL(a, DoA(47)).WillByDefault(SaveArg<0>(&do_a_47_arg0));
-
-  a.DoA(17);
-  EXPECT_THAT(do_a_arg0, 17);
-  EXPECT_THAT(do_a_47_arg0, 0);
-  a.DoA(47);
-  EXPECT_THAT(do_a_arg0, 17);
-  EXPECT_THAT(do_a_47_arg0, 47);
-
-  ON_CALL(a, Binary).WillByDefault(Return(true));
-  ON_CALL(a, Binary(_, 14)).WillByDefault(Return(false));
-  EXPECT_THAT(a.Binary(14, 17), true);
-  EXPECT_THAT(a.Binary(17, 14), false);
-}
-
-TEST(ParameterlessExpectationsTest, CanSetExpectationsForOverloadedMethods) {
-  MockB b;
-  ON_CALL(b, DoB()).WillByDefault(Return(9));
-  ON_CALL(b, DoB(5)).WillByDefault(Return(11));
-
-  EXPECT_THAT(b.DoB(), 9);
-  EXPECT_THAT(b.DoB(1), 0);  // default value
-  EXPECT_THAT(b.DoB(5), 11);
-}
-
-struct MockWithConstMethods {
- public:
-  MOCK_CONST_METHOD1(Foo, int(int));
-  MOCK_CONST_METHOD2(Bar, int(int, const char*));
-};
-
-TEST(ParameterlessExpectationsTest, CanSetExpectationsForConstMethods) {
-  MockWithConstMethods mock;
-  ON_CALL(mock, Foo).WillByDefault(Return(7));
-  ON_CALL(mock, Bar).WillByDefault(Return(33));
-
-  EXPECT_THAT(mock.Foo(17), 7);
-  EXPECT_THAT(mock.Bar(27, "purple"), 33);
-}
-
-class MockConstOverload {
- public:
-  MOCK_METHOD1(Overloaded, int(int));
-  MOCK_CONST_METHOD1(Overloaded, int(int));
-};
-
-TEST(ParameterlessExpectationsTest,
-     CanSetExpectationsForConstOverloadedMethods) {
-  MockConstOverload mock;
-  ON_CALL(mock, Overloaded(_)).WillByDefault(Return(7));
-  ON_CALL(mock, Overloaded(5)).WillByDefault(Return(9));
-  ON_CALL(Const(mock), Overloaded(5)).WillByDefault(Return(11));
-  ON_CALL(Const(mock), Overloaded(7)).WillByDefault(Return(13));
-
-  EXPECT_THAT(mock.Overloaded(1), 7);
-  EXPECT_THAT(mock.Overloaded(5), 9);
-  EXPECT_THAT(mock.Overloaded(7), 7);
-
-  const MockConstOverload& const_mock = mock;
-  EXPECT_THAT(const_mock.Overloaded(1), 0);
-  EXPECT_THAT(const_mock.Overloaded(5), 11);
-  EXPECT_THAT(const_mock.Overloaded(7), 13);
-}
-
-}  // namespace
-}  // namespace testing
-
-// Allows the user to define their own main and then invoke gmock_main
-// from it. This might be necessary on some platforms which require
-// specific setup and teardown.
-#if GMOCK_RENAME_MAIN
-int gmock_main(int argc, char** argv) {
-#else
-int main(int argc, char** argv) {
-#endif  // GMOCK_RENAME_MAIN
-  testing::InitGoogleMock(&argc, argv);
-  // Ensures that the tests pass no matter what value of
-  // --gmock_catch_leaked_mocks and --gmock_verbose the user specifies.
-  GMOCK_FLAG_SET(catch_leaked_mocks, true);
-  GMOCK_FLAG_SET(verbose, testing::internal::kWarningVerbosity);
-
-  return RUN_ALL_TESTS();
-}
diff --git a/3rdparty/googletest-1.13.0/googlemock/test/gmock_all_test.cc b/3rdparty/googletest-1.13.0/googlemock/test/gmock_all_test.cc
deleted file mode 100644
index 6db0086bb724406d446e1b09983f477041165d28..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googlemock/test/gmock_all_test.cc
+++ /dev/null
@@ -1,49 +0,0 @@
-// Copyright 2009, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-//
-// Tests for Google C++ Mocking Framework (Google Mock)
-//
-// Some users use a build system that Google Mock doesn't support directly,
-// yet they still want to build and run Google Mock's own tests.  This file
-// includes most such tests, making it easier for these users to maintain
-// their build scripts (they just need to build this file, even though the
-// below list of actual *_test.cc files might change).
-#include "test/gmock-actions_test.cc"
-#include "test/gmock-cardinalities_test.cc"
-#include "test/gmock-internal-utils_test.cc"
-#include "test/gmock-matchers-arithmetic_test.cc"
-#include "test/gmock-matchers-comparisons_test.cc"
-#include "test/gmock-matchers-containers_test.cc"
-#include "test/gmock-matchers-misc_test.cc"
-#include "test/gmock-more-actions_test.cc"
-#include "test/gmock-nice-strict_test.cc"
-#include "test/gmock-port_test.cc"
-#include "test/gmock-spec-builders_test.cc"
-#include "test/gmock_test.cc"
diff --git a/3rdparty/googletest-1.13.0/googlemock/test/gmock_ex_test.cc b/3rdparty/googletest-1.13.0/googlemock/test/gmock_ex_test.cc
deleted file mode 100644
index 44e5e35f66a9b3fc165be147cc9e3b55c36f640e..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googlemock/test/gmock_ex_test.cc
+++ /dev/null
@@ -1,78 +0,0 @@
-// Copyright 2013, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// Tests Google Mock's functionality that depends on exceptions.
-
-#include "gmock/gmock.h"
-#include "gtest/gtest.h"
-
-#if GTEST_HAS_EXCEPTIONS
-namespace {
-
-using testing::HasSubstr;
-
-using testing::internal::GoogleTestFailureException;
-
-// A type that cannot be default constructed.
-class NonDefaultConstructible {
- public:
-  explicit NonDefaultConstructible(int /* dummy */) {}
-};
-
-class MockFoo {
- public:
-  // A mock method that returns a user-defined type.  Google Mock
-  // doesn't know what the default value for this type is.
-  MOCK_METHOD0(GetNonDefaultConstructible, NonDefaultConstructible());
-};
-
-TEST(DefaultValueTest, ThrowsRuntimeErrorWhenNoDefaultValue) {
-  MockFoo mock;
-  try {
-    // No expectation is set on this method, so Google Mock must
-    // return the default value.  However, since Google Mock knows
-    // nothing about the return type, it doesn't know what to return,
-    // and has to throw (when exceptions are enabled) or abort
-    // (otherwise).
-    mock.GetNonDefaultConstructible();
-    FAIL() << "GetNonDefaultConstructible()'s return type has no default "
-           << "value, so Google Mock should have thrown.";
-  } catch (const GoogleTestFailureException& /* unused */) {
-    FAIL() << "Google Test does not try to catch an exception of type "
-           << "GoogleTestFailureException, which is used for reporting "
-           << "a failure to other testing frameworks.  Google Mock should "
-           << "not throw a GoogleTestFailureException as it will kill the "
-           << "entire test program instead of just the current TEST.";
-  } catch (const std::exception& ex) {
-    EXPECT_THAT(ex.what(), HasSubstr("has no default value"));
-  }
-}
-
-}  // unnamed namespace
-#endif
diff --git a/3rdparty/googletest-1.13.0/googlemock/test/gmock_leak_test.py b/3rdparty/googletest-1.13.0/googlemock/test/gmock_leak_test.py
deleted file mode 100644
index 4f41c7bbd0e9ce202840ca6ee39ae5b7ba5821d0..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googlemock/test/gmock_leak_test.py
+++ /dev/null
@@ -1,104 +0,0 @@
-#!/usr/bin/env python
-#
-# Copyright 2009, Google Inc.
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met:
-#
-#     * Redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer.
-#     * Redistributions in binary form must reproduce the above
-# copyright notice, this list of conditions and the following disclaimer
-# in the documentation and/or other materials provided with the
-# distribution.
-#     * Neither the name of Google Inc. nor the names of its
-# contributors may be used to endorse or promote products derived from
-# this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-"""Tests that leaked mock objects can be caught be Google Mock."""
-
-from googlemock.test import gmock_test_utils
-
-PROGRAM_PATH = gmock_test_utils.GetTestExecutablePath('gmock_leak_test_')
-TEST_WITH_EXPECT_CALL = [PROGRAM_PATH, '--gtest_filter=*ExpectCall*']
-TEST_WITH_ON_CALL = [PROGRAM_PATH, '--gtest_filter=*OnCall*']
-TEST_MULTIPLE_LEAKS = [PROGRAM_PATH, '--gtest_filter=*MultipleLeaked*']
-
-environ = gmock_test_utils.environ
-SetEnvVar = gmock_test_utils.SetEnvVar
-
-# Tests in this file run a Google-Test-based test program and expect it
-# to terminate prematurely.  Therefore they are incompatible with
-# the premature-exit-file protocol by design.  Unset the
-# premature-exit filepath to prevent Google Test from creating
-# the file.
-SetEnvVar(gmock_test_utils.PREMATURE_EXIT_FILE_ENV_VAR, None)
-
-
-class GMockLeakTest(gmock_test_utils.TestCase):
-
-  def testCatchesLeakedMockByDefault(self):
-    self.assertNotEqual(
-        0,
-        gmock_test_utils.Subprocess(TEST_WITH_EXPECT_CALL,
-                                    env=environ).exit_code)
-    self.assertNotEqual(
-        0,
-        gmock_test_utils.Subprocess(TEST_WITH_ON_CALL,
-                                    env=environ).exit_code)
-
-  def testDoesNotCatchLeakedMockWhenDisabled(self):
-    self.assertEquals(
-        0,
-        gmock_test_utils.Subprocess(TEST_WITH_EXPECT_CALL +
-                                    ['--gmock_catch_leaked_mocks=0'],
-                                    env=environ).exit_code)
-    self.assertEquals(
-        0,
-        gmock_test_utils.Subprocess(TEST_WITH_ON_CALL +
-                                    ['--gmock_catch_leaked_mocks=0'],
-                                    env=environ).exit_code)
-
-  def testCatchesLeakedMockWhenEnabled(self):
-    self.assertNotEqual(
-        0,
-        gmock_test_utils.Subprocess(TEST_WITH_EXPECT_CALL +
-                                    ['--gmock_catch_leaked_mocks'],
-                                    env=environ).exit_code)
-    self.assertNotEqual(
-        0,
-        gmock_test_utils.Subprocess(TEST_WITH_ON_CALL +
-                                    ['--gmock_catch_leaked_mocks'],
-                                    env=environ).exit_code)
-
-  def testCatchesLeakedMockWhenEnabledWithExplictFlagValue(self):
-    self.assertNotEqual(
-        0,
-        gmock_test_utils.Subprocess(TEST_WITH_EXPECT_CALL +
-                                    ['--gmock_catch_leaked_mocks=1'],
-                                    env=environ).exit_code)
-
-  def testCatchesMultipleLeakedMocks(self):
-    self.assertNotEqual(
-        0,
-        gmock_test_utils.Subprocess(TEST_MULTIPLE_LEAKS +
-                                    ['--gmock_catch_leaked_mocks'],
-                                    env=environ).exit_code)
-
-
-if __name__ == '__main__':
-  gmock_test_utils.Main()
diff --git a/3rdparty/googletest-1.13.0/googlemock/test/gmock_leak_test_.cc b/3rdparty/googletest-1.13.0/googlemock/test/gmock_leak_test_.cc
deleted file mode 100644
index fa645916f89c8276a016d707bb260a6675f98d8b..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googlemock/test/gmock_leak_test_.cc
+++ /dev/null
@@ -1,99 +0,0 @@
-// Copyright 2009, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// Google Mock - a framework for writing C++ mock classes.
-//
-// This program is for verifying that a leaked mock object can be
-// caught by Google Mock's leak detector.
-
-#include "gmock/gmock.h"
-
-namespace {
-
-using ::testing::Return;
-
-class FooInterface {
- public:
-  virtual ~FooInterface() {}
-  virtual void DoThis() = 0;
-};
-
-class MockFoo : public FooInterface {
- public:
-  MockFoo() {}
-
-  MOCK_METHOD0(DoThis, void());
-
- private:
-  MockFoo(const MockFoo&) = delete;
-  MockFoo& operator=(const MockFoo&) = delete;
-};
-
-TEST(LeakTest, LeakedMockWithExpectCallCausesFailureWhenLeakCheckingIsEnabled) {
-  MockFoo* foo = new MockFoo;
-
-  EXPECT_CALL(*foo, DoThis());
-  foo->DoThis();
-
-  // In order to test the leak detector, we deliberately leak foo.
-
-  // Makes sure Google Mock's leak detector can change the exit code
-  // to 1 even when the code is already exiting with 0.
-  exit(0);
-}
-
-TEST(LeakTest, LeakedMockWithOnCallCausesFailureWhenLeakCheckingIsEnabled) {
-  MockFoo* foo = new MockFoo;
-
-  ON_CALL(*foo, DoThis()).WillByDefault(Return());
-
-  // In order to test the leak detector, we deliberately leak foo.
-
-  // Makes sure Google Mock's leak detector can change the exit code
-  // to 1 even when the code is already exiting with 0.
-  exit(0);
-}
-
-TEST(LeakTest, CatchesMultipleLeakedMockObjects) {
-  MockFoo* foo1 = new MockFoo;
-  MockFoo* foo2 = new MockFoo;
-
-  ON_CALL(*foo1, DoThis()).WillByDefault(Return());
-  EXPECT_CALL(*foo2, DoThis());
-  foo2->DoThis();
-
-  // In order to test the leak detector, we deliberately leak foo1 and
-  // foo2.
-
-  // Makes sure Google Mock's leak detector can change the exit code
-  // to 1 even when the code is already exiting with 0.
-  exit(0);
-}
-
-}  // namespace
diff --git a/3rdparty/googletest-1.13.0/googlemock/test/gmock_link2_test.cc b/3rdparty/googletest-1.13.0/googlemock/test/gmock_link2_test.cc
deleted file mode 100644
index cd3d6908876129c79c0bbe795f8f634821198df3..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googlemock/test/gmock_link2_test.cc
+++ /dev/null
@@ -1,38 +0,0 @@
-// Copyright 2008, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// Google Mock - a framework for writing C++ mock classes.
-//
-// This file is for verifying that various Google Mock constructs do not
-// produce linker errors when instantiated in different translation units.
-// Please see gmock_link_test.h for details.
-
-#define LinkTest LinkTest2
-
-#include "test/gmock_link_test.h"
diff --git a/3rdparty/googletest-1.13.0/googlemock/test/gmock_link_test.cc b/3rdparty/googletest-1.13.0/googlemock/test/gmock_link_test.cc
deleted file mode 100644
index f51e3988df3a3bc54c53609d4ef6717a40a8ed5a..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googlemock/test/gmock_link_test.cc
+++ /dev/null
@@ -1,38 +0,0 @@
-// Copyright 2008, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// Google Mock - a framework for writing C++ mock classes.
-//
-// This file is for verifying that various Google Mock constructs do not
-// produce linker errors when instantiated in different translation units.
-// Please see gmock_link_test.h for details.
-
-#define LinkTest LinkTest1
-
-#include "test/gmock_link_test.h"
diff --git a/3rdparty/googletest-1.13.0/googlemock/test/gmock_link_test.h b/3rdparty/googletest-1.13.0/googlemock/test/gmock_link_test.h
deleted file mode 100644
index eaf18e9d8c6355031e062bf9845b7d64cb4de33b..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googlemock/test/gmock_link_test.h
+++ /dev/null
@@ -1,689 +0,0 @@
-// Copyright 2009, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// Google Mock - a framework for writing C++ mock classes.
-//
-// This file tests that:
-// a. A header file defining a mock class can be included in multiple
-//    translation units without causing a link error.
-// b. Actions and matchers can be instantiated with identical template
-//    arguments in different translation units without causing link
-//    errors.
-//    The following constructs are currently tested:
-//    Actions:
-//      Return()
-//      Return(value)
-//      ReturnNull
-//      ReturnRef
-//      Assign
-//      SetArgPointee
-//      SetArrayArgument
-//      SetErrnoAndReturn
-//      Invoke(function)
-//      Invoke(object, method)
-//      InvokeWithoutArgs(function)
-//      InvokeWithoutArgs(object, method)
-//      InvokeArgument
-//      WithArg
-//      WithArgs
-//      WithoutArgs
-//      DoAll
-//      DoDefault
-//      IgnoreResult
-//      Throw
-//      ACTION()-generated
-//      ACTION_P()-generated
-//      ACTION_P2()-generated
-//    Matchers:
-//      _
-//      A
-//      An
-//      Eq
-//      Gt, Lt, Ge, Le, Ne
-//      NotNull
-//      Ref
-//      TypedEq
-//      DoubleEq
-//      FloatEq
-//      NanSensitiveDoubleEq
-//      NanSensitiveFloatEq
-//      ContainsRegex
-//      MatchesRegex
-//      EndsWith
-//      HasSubstr
-//      StartsWith
-//      StrCaseEq
-//      StrCaseNe
-//      StrEq
-//      StrNe
-//      ElementsAre
-//      ElementsAreArray
-//      ContainerEq
-//      Field
-//      Property
-//      ResultOf(function)
-//      ResultOf(callback)
-//      Pointee
-//      Truly(predicate)
-//      AddressSatisfies
-//      AllOf
-//      AnyOf
-//      Not
-//      MatcherCast<T>
-//
-//  Please note: this test does not verify the functioning of these
-//  constructs, only that the programs using them will link successfully.
-//
-// Implementation note:
-// This test requires identical definitions of Interface and Mock to be
-// included in different translation units.  We achieve this by writing
-// them in this header and #including it in gmock_link_test.cc and
-// gmock_link2_test.cc.  Because the symbols generated by the compiler for
-// those constructs must be identical in both translation units,
-// definitions of Interface and Mock tests MUST be kept in the SAME
-// NON-ANONYMOUS namespace in this file.  The test fixture class LinkTest
-// is defined as LinkTest1 in gmock_link_test.cc and as LinkTest2 in
-// gmock_link2_test.cc to avoid producing linker errors.
-
-#ifndef GOOGLEMOCK_TEST_GMOCK_LINK_TEST_H_
-#define GOOGLEMOCK_TEST_GMOCK_LINK_TEST_H_
-
-#include "gmock/gmock.h"
-
-#if !GTEST_OS_WINDOWS_MOBILE
-#include <errno.h>
-#endif
-
-#include <iostream>
-#include <vector>
-
-#include "gtest/gtest.h"
-#include "gtest/internal/gtest-port.h"
-
-using testing::_;
-using testing::A;
-using testing::Action;
-using testing::AllOf;
-using testing::AnyOf;
-using testing::Assign;
-using testing::ContainerEq;
-using testing::DoAll;
-using testing::DoDefault;
-using testing::DoubleEq;
-using testing::ElementsAre;
-using testing::ElementsAreArray;
-using testing::EndsWith;
-using testing::Eq;
-using testing::Field;
-using testing::FloatEq;
-using testing::Ge;
-using testing::Gt;
-using testing::HasSubstr;
-using testing::IgnoreResult;
-using testing::Invoke;
-using testing::InvokeArgument;
-using testing::InvokeWithoutArgs;
-using testing::IsNull;
-using testing::IsSubsetOf;
-using testing::IsSupersetOf;
-using testing::Le;
-using testing::Lt;
-using testing::Matcher;
-using testing::MatcherCast;
-using testing::NanSensitiveDoubleEq;
-using testing::NanSensitiveFloatEq;
-using testing::Ne;
-using testing::Not;
-using testing::NotNull;
-using testing::Pointee;
-using testing::Property;
-using testing::Ref;
-using testing::ResultOf;
-using testing::Return;
-using testing::ReturnNull;
-using testing::ReturnRef;
-using testing::SetArgPointee;
-using testing::SetArrayArgument;
-using testing::StartsWith;
-using testing::StrCaseEq;
-using testing::StrCaseNe;
-using testing::StrEq;
-using testing::StrNe;
-using testing::Truly;
-using testing::TypedEq;
-using testing::WithArg;
-using testing::WithArgs;
-using testing::WithoutArgs;
-
-#if !GTEST_OS_WINDOWS_MOBILE
-using testing::SetErrnoAndReturn;
-#endif
-
-#if GTEST_HAS_EXCEPTIONS
-using testing::Throw;
-#endif
-
-using testing::ContainsRegex;
-using testing::MatchesRegex;
-
-class Interface {
- public:
-  virtual ~Interface() {}
-  virtual void VoidFromString(char* str) = 0;
-  virtual char* StringFromString(char* str) = 0;
-  virtual int IntFromString(char* str) = 0;
-  virtual int& IntRefFromString(char* str) = 0;
-  virtual void VoidFromFunc(void (*func)(char* str)) = 0;
-  virtual void VoidFromIntRef(int& n) = 0;  // NOLINT
-  virtual void VoidFromFloat(float n) = 0;
-  virtual void VoidFromDouble(double n) = 0;
-  virtual void VoidFromVector(const std::vector<int>& v) = 0;
-};
-
-class Mock : public Interface {
- public:
-  Mock() {}
-
-  MOCK_METHOD1(VoidFromString, void(char* str));
-  MOCK_METHOD1(StringFromString, char*(char* str));
-  MOCK_METHOD1(IntFromString, int(char* str));
-  MOCK_METHOD1(IntRefFromString, int&(char* str));
-  MOCK_METHOD1(VoidFromFunc, void(void (*func)(char* str)));
-  MOCK_METHOD1(VoidFromIntRef, void(int& n));  // NOLINT
-  MOCK_METHOD1(VoidFromFloat, void(float n));
-  MOCK_METHOD1(VoidFromDouble, void(double n));
-  MOCK_METHOD1(VoidFromVector, void(const std::vector<int>& v));
-
- private:
-  Mock(const Mock&) = delete;
-  Mock& operator=(const Mock&) = delete;
-};
-
-class InvokeHelper {
- public:
-  static void StaticVoidFromVoid() {}
-  void VoidFromVoid() {}
-  static void StaticVoidFromString(char* /* str */) {}
-  void VoidFromString(char* /* str */) {}
-  static int StaticIntFromString(char* /* str */) { return 1; }
-  static bool StaticBoolFromString(const char* /* str */) { return true; }
-};
-
-class FieldHelper {
- public:
-  explicit FieldHelper(int a_field) : field_(a_field) {}
-  int field() const { return field_; }
-  int field_;  // NOLINT -- need external access to field_ to test
-               //           the Field matcher.
-};
-
-// Tests the linkage of the ReturnVoid action.
-TEST(LinkTest, TestReturnVoid) {
-  Mock mock;
-
-  EXPECT_CALL(mock, VoidFromString(_)).WillOnce(Return());
-  mock.VoidFromString(nullptr);
-}
-
-// Tests the linkage of the Return action.
-TEST(LinkTest, TestReturn) {
-  Mock mock;
-  char ch = 'x';
-
-  EXPECT_CALL(mock, StringFromString(_)).WillOnce(Return(&ch));
-  mock.StringFromString(nullptr);
-}
-
-// Tests the linkage of the ReturnNull action.
-TEST(LinkTest, TestReturnNull) {
-  Mock mock;
-
-  EXPECT_CALL(mock, VoidFromString(_)).WillOnce(Return());
-  mock.VoidFromString(nullptr);
-}
-
-// Tests the linkage of the ReturnRef action.
-TEST(LinkTest, TestReturnRef) {
-  Mock mock;
-  int n = 42;
-
-  EXPECT_CALL(mock, IntRefFromString(_)).WillOnce(ReturnRef(n));
-  mock.IntRefFromString(nullptr);
-}
-
-// Tests the linkage of the Assign action.
-TEST(LinkTest, TestAssign) {
-  Mock mock;
-  char ch = 'x';
-
-  EXPECT_CALL(mock, VoidFromString(_)).WillOnce(Assign(&ch, 'y'));
-  mock.VoidFromString(nullptr);
-}
-
-// Tests the linkage of the SetArgPointee action.
-TEST(LinkTest, TestSetArgPointee) {
-  Mock mock;
-  char ch = 'x';
-
-  EXPECT_CALL(mock, VoidFromString(_)).WillOnce(SetArgPointee<0>('y'));
-  mock.VoidFromString(&ch);
-}
-
-// Tests the linkage of the SetArrayArgument action.
-TEST(LinkTest, TestSetArrayArgument) {
-  Mock mock;
-  char ch = 'x';
-  char ch2 = 'y';
-
-  EXPECT_CALL(mock, VoidFromString(_))
-      .WillOnce(SetArrayArgument<0>(&ch2, &ch2 + 1));
-  mock.VoidFromString(&ch);
-}
-
-#if !GTEST_OS_WINDOWS_MOBILE
-
-// Tests the linkage of the SetErrnoAndReturn action.
-TEST(LinkTest, TestSetErrnoAndReturn) {
-  Mock mock;
-
-  int saved_errno = errno;
-  EXPECT_CALL(mock, IntFromString(_)).WillOnce(SetErrnoAndReturn(1, -1));
-  mock.IntFromString(nullptr);
-  errno = saved_errno;
-}
-
-#endif  // !GTEST_OS_WINDOWS_MOBILE
-
-// Tests the linkage of the Invoke(function) and Invoke(object, method) actions.
-TEST(LinkTest, TestInvoke) {
-  Mock mock;
-  InvokeHelper test_invoke_helper;
-
-  EXPECT_CALL(mock, VoidFromString(_))
-      .WillOnce(Invoke(&InvokeHelper::StaticVoidFromString))
-      .WillOnce(Invoke(&test_invoke_helper, &InvokeHelper::VoidFromString));
-  mock.VoidFromString(nullptr);
-  mock.VoidFromString(nullptr);
-}
-
-// Tests the linkage of the InvokeWithoutArgs action.
-TEST(LinkTest, TestInvokeWithoutArgs) {
-  Mock mock;
-  InvokeHelper test_invoke_helper;
-
-  EXPECT_CALL(mock, VoidFromString(_))
-      .WillOnce(InvokeWithoutArgs(&InvokeHelper::StaticVoidFromVoid))
-      .WillOnce(
-          InvokeWithoutArgs(&test_invoke_helper, &InvokeHelper::VoidFromVoid));
-  mock.VoidFromString(nullptr);
-  mock.VoidFromString(nullptr);
-}
-
-// Tests the linkage of the InvokeArgument action.
-TEST(LinkTest, TestInvokeArgument) {
-  Mock mock;
-  char ch = 'x';
-
-  EXPECT_CALL(mock, VoidFromFunc(_)).WillOnce(InvokeArgument<0>(&ch));
-  mock.VoidFromFunc(InvokeHelper::StaticVoidFromString);
-}
-
-// Tests the linkage of the WithArg action.
-TEST(LinkTest, TestWithArg) {
-  Mock mock;
-
-  EXPECT_CALL(mock, VoidFromString(_))
-      .WillOnce(WithArg<0>(Invoke(&InvokeHelper::StaticVoidFromString)));
-  mock.VoidFromString(nullptr);
-}
-
-// Tests the linkage of the WithArgs action.
-TEST(LinkTest, TestWithArgs) {
-  Mock mock;
-
-  EXPECT_CALL(mock, VoidFromString(_))
-      .WillOnce(WithArgs<0>(Invoke(&InvokeHelper::StaticVoidFromString)));
-  mock.VoidFromString(nullptr);
-}
-
-// Tests the linkage of the WithoutArgs action.
-TEST(LinkTest, TestWithoutArgs) {
-  Mock mock;
-
-  EXPECT_CALL(mock, VoidFromString(_)).WillOnce(WithoutArgs(Return()));
-  mock.VoidFromString(nullptr);
-}
-
-// Tests the linkage of the DoAll action.
-TEST(LinkTest, TestDoAll) {
-  Mock mock;
-  char ch = 'x';
-
-  EXPECT_CALL(mock, VoidFromString(_))
-      .WillOnce(DoAll(SetArgPointee<0>('y'), Return()));
-  mock.VoidFromString(&ch);
-}
-
-// Tests the linkage of the DoDefault action.
-TEST(LinkTest, TestDoDefault) {
-  Mock mock;
-  char ch = 'x';
-
-  ON_CALL(mock, VoidFromString(_)).WillByDefault(Return());
-  EXPECT_CALL(mock, VoidFromString(_)).WillOnce(DoDefault());
-  mock.VoidFromString(&ch);
-}
-
-// Tests the linkage of the IgnoreResult action.
-TEST(LinkTest, TestIgnoreResult) {
-  Mock mock;
-
-  EXPECT_CALL(mock, VoidFromString(_)).WillOnce(IgnoreResult(Return(42)));
-  mock.VoidFromString(nullptr);
-}
-
-#if GTEST_HAS_EXCEPTIONS
-// Tests the linkage of the Throw action.
-TEST(LinkTest, TestThrow) {
-  Mock mock;
-
-  EXPECT_CALL(mock, VoidFromString(_)).WillOnce(Throw(42));
-  EXPECT_THROW(mock.VoidFromString(nullptr), int);
-}
-#endif  // GTEST_HAS_EXCEPTIONS
-
-// The ACTION*() macros trigger warning C4100 (unreferenced formal
-// parameter) in MSVC with -W4.  Unfortunately they cannot be fixed in
-// the macro definition, as the warnings are generated when the macro
-// is expanded and macro expansion cannot contain #pragma.  Therefore
-// we suppress them here.
-#ifdef _MSC_VER
-#pragma warning(push)
-#pragma warning(disable : 4100)
-#endif
-
-// Tests the linkage of actions created using ACTION macro.
-namespace {
-ACTION(Return1) { return 1; }
-}  // namespace
-
-TEST(LinkTest, TestActionMacro) {
-  Mock mock;
-
-  EXPECT_CALL(mock, IntFromString(_)).WillOnce(Return1());
-  mock.IntFromString(nullptr);
-}
-
-// Tests the linkage of actions created using ACTION_P macro.
-namespace {
-ACTION_P(ReturnArgument, ret_value) { return ret_value; }
-}  // namespace
-
-TEST(LinkTest, TestActionPMacro) {
-  Mock mock;
-
-  EXPECT_CALL(mock, IntFromString(_)).WillOnce(ReturnArgument(42));
-  mock.IntFromString(nullptr);
-}
-
-// Tests the linkage of actions created using ACTION_P2 macro.
-namespace {
-ACTION_P2(ReturnEqualsEitherOf, first, second) {
-  return arg0 == first || arg0 == second;
-}
-}  // namespace
-
-#ifdef _MSC_VER
-#pragma warning(pop)
-#endif
-
-TEST(LinkTest, TestActionP2Macro) {
-  Mock mock;
-  char ch = 'x';
-
-  EXPECT_CALL(mock, IntFromString(_))
-      .WillOnce(ReturnEqualsEitherOf("one", "two"));
-  mock.IntFromString(&ch);
-}
-
-// Tests the linkage of the "_" matcher.
-TEST(LinkTest, TestMatcherAnything) {
-  Mock mock;
-
-  ON_CALL(mock, VoidFromString(_)).WillByDefault(Return());
-}
-
-// Tests the linkage of the A matcher.
-TEST(LinkTest, TestMatcherA) {
-  Mock mock;
-
-  ON_CALL(mock, VoidFromString(A<char*>())).WillByDefault(Return());
-}
-
-// Tests the linkage of the Eq and the "bare value" matcher.
-TEST(LinkTest, TestMatchersEq) {
-  Mock mock;
-  const char* p = "x";
-
-  ON_CALL(mock, VoidFromString(Eq(p))).WillByDefault(Return());
-  ON_CALL(mock, VoidFromString(const_cast<char*>("y"))).WillByDefault(Return());
-}
-
-// Tests the linkage of the Lt, Gt, Le, Ge, and Ne matchers.
-TEST(LinkTest, TestMatchersRelations) {
-  Mock mock;
-
-  ON_CALL(mock, VoidFromFloat(Lt(1.0f))).WillByDefault(Return());
-  ON_CALL(mock, VoidFromFloat(Gt(1.0f))).WillByDefault(Return());
-  ON_CALL(mock, VoidFromFloat(Le(1.0f))).WillByDefault(Return());
-  ON_CALL(mock, VoidFromFloat(Ge(1.0f))).WillByDefault(Return());
-  ON_CALL(mock, VoidFromFloat(Ne(1.0f))).WillByDefault(Return());
-}
-
-// Tests the linkage of the NotNull matcher.
-TEST(LinkTest, TestMatcherNotNull) {
-  Mock mock;
-
-  ON_CALL(mock, VoidFromString(NotNull())).WillByDefault(Return());
-}
-
-// Tests the linkage of the IsNull matcher.
-TEST(LinkTest, TestMatcherIsNull) {
-  Mock mock;
-
-  ON_CALL(mock, VoidFromString(IsNull())).WillByDefault(Return());
-}
-
-// Tests the linkage of the Ref matcher.
-TEST(LinkTest, TestMatcherRef) {
-  Mock mock;
-  int a = 0;
-
-  ON_CALL(mock, VoidFromIntRef(Ref(a))).WillByDefault(Return());
-}
-
-// Tests the linkage of the TypedEq matcher.
-TEST(LinkTest, TestMatcherTypedEq) {
-  Mock mock;
-  long a = 0;
-
-  ON_CALL(mock, VoidFromIntRef(TypedEq<int&>(a))).WillByDefault(Return());
-}
-
-// Tests the linkage of the FloatEq, DoubleEq, NanSensitiveFloatEq and
-// NanSensitiveDoubleEq matchers.
-TEST(LinkTest, TestMatchersFloatingPoint) {
-  Mock mock;
-  float a = 0;
-
-  ON_CALL(mock, VoidFromFloat(FloatEq(a))).WillByDefault(Return());
-  ON_CALL(mock, VoidFromDouble(DoubleEq(a))).WillByDefault(Return());
-  ON_CALL(mock, VoidFromFloat(NanSensitiveFloatEq(a))).WillByDefault(Return());
-  ON_CALL(mock, VoidFromDouble(NanSensitiveDoubleEq(a)))
-      .WillByDefault(Return());
-}
-
-// Tests the linkage of the ContainsRegex matcher.
-TEST(LinkTest, TestMatcherContainsRegex) {
-  Mock mock;
-
-  ON_CALL(mock, VoidFromString(ContainsRegex(".*"))).WillByDefault(Return());
-}
-
-// Tests the linkage of the MatchesRegex matcher.
-TEST(LinkTest, TestMatcherMatchesRegex) {
-  Mock mock;
-
-  ON_CALL(mock, VoidFromString(MatchesRegex(".*"))).WillByDefault(Return());
-}
-
-// Tests the linkage of the StartsWith, EndsWith, and HasSubstr matchers.
-TEST(LinkTest, TestMatchersSubstrings) {
-  Mock mock;
-
-  ON_CALL(mock, VoidFromString(StartsWith("a"))).WillByDefault(Return());
-  ON_CALL(mock, VoidFromString(EndsWith("c"))).WillByDefault(Return());
-  ON_CALL(mock, VoidFromString(HasSubstr("b"))).WillByDefault(Return());
-}
-
-// Tests the linkage of the StrEq, StrNe, StrCaseEq, and StrCaseNe matchers.
-TEST(LinkTest, TestMatchersStringEquality) {
-  Mock mock;
-  ON_CALL(mock, VoidFromString(StrEq("a"))).WillByDefault(Return());
-  ON_CALL(mock, VoidFromString(StrNe("a"))).WillByDefault(Return());
-  ON_CALL(mock, VoidFromString(StrCaseEq("a"))).WillByDefault(Return());
-  ON_CALL(mock, VoidFromString(StrCaseNe("a"))).WillByDefault(Return());
-}
-
-// Tests the linkage of the ElementsAre matcher.
-TEST(LinkTest, TestMatcherElementsAre) {
-  Mock mock;
-
-  ON_CALL(mock, VoidFromVector(ElementsAre('a', _))).WillByDefault(Return());
-}
-
-// Tests the linkage of the ElementsAreArray matcher.
-TEST(LinkTest, TestMatcherElementsAreArray) {
-  Mock mock;
-  char arr[] = {'a', 'b'};
-
-  ON_CALL(mock, VoidFromVector(ElementsAreArray(arr))).WillByDefault(Return());
-}
-
-// Tests the linkage of the IsSubsetOf matcher.
-TEST(LinkTest, TestMatcherIsSubsetOf) {
-  Mock mock;
-  char arr[] = {'a', 'b'};
-
-  ON_CALL(mock, VoidFromVector(IsSubsetOf(arr))).WillByDefault(Return());
-}
-
-// Tests the linkage of the IsSupersetOf matcher.
-TEST(LinkTest, TestMatcherIsSupersetOf) {
-  Mock mock;
-  char arr[] = {'a', 'b'};
-
-  ON_CALL(mock, VoidFromVector(IsSupersetOf(arr))).WillByDefault(Return());
-}
-
-// Tests the linkage of the ContainerEq matcher.
-TEST(LinkTest, TestMatcherContainerEq) {
-  Mock mock;
-  std::vector<int> v;
-
-  ON_CALL(mock, VoidFromVector(ContainerEq(v))).WillByDefault(Return());
-}
-
-// Tests the linkage of the Field matcher.
-TEST(LinkTest, TestMatcherField) {
-  FieldHelper helper(0);
-
-  Matcher<const FieldHelper&> m = Field(&FieldHelper::field_, Eq(0));
-  EXPECT_TRUE(m.Matches(helper));
-
-  Matcher<const FieldHelper*> m2 = Field(&FieldHelper::field_, Eq(0));
-  EXPECT_TRUE(m2.Matches(&helper));
-}
-
-// Tests the linkage of the Property matcher.
-TEST(LinkTest, TestMatcherProperty) {
-  FieldHelper helper(0);
-
-  Matcher<const FieldHelper&> m = Property(&FieldHelper::field, Eq(0));
-  EXPECT_TRUE(m.Matches(helper));
-
-  Matcher<const FieldHelper*> m2 = Property(&FieldHelper::field, Eq(0));
-  EXPECT_TRUE(m2.Matches(&helper));
-}
-
-// Tests the linkage of the ResultOf matcher.
-TEST(LinkTest, TestMatcherResultOf) {
-  Matcher<char*> m = ResultOf(&InvokeHelper::StaticIntFromString, Eq(1));
-  EXPECT_TRUE(m.Matches(nullptr));
-}
-
-// Tests the linkage of the ResultOf matcher.
-TEST(LinkTest, TestMatcherPointee) {
-  int n = 1;
-
-  Matcher<int*> m = Pointee(Eq(1));
-  EXPECT_TRUE(m.Matches(&n));
-}
-
-// Tests the linkage of the Truly matcher.
-TEST(LinkTest, TestMatcherTruly) {
-  Matcher<const char*> m = Truly(&InvokeHelper::StaticBoolFromString);
-  EXPECT_TRUE(m.Matches(nullptr));
-}
-
-// Tests the linkage of the AllOf matcher.
-TEST(LinkTest, TestMatcherAllOf) {
-  Matcher<int> m = AllOf(_, Eq(1));
-  EXPECT_TRUE(m.Matches(1));
-}
-
-// Tests the linkage of the AnyOf matcher.
-TEST(LinkTest, TestMatcherAnyOf) {
-  Matcher<int> m = AnyOf(_, Eq(1));
-  EXPECT_TRUE(m.Matches(1));
-}
-
-// Tests the linkage of the Not matcher.
-TEST(LinkTest, TestMatcherNot) {
-  Matcher<int> m = Not(_);
-  EXPECT_FALSE(m.Matches(1));
-}
-
-// Tests the linkage of the MatcherCast<T>() function.
-TEST(LinkTest, TestMatcherCast) {
-  Matcher<const char*> m = MatcherCast<const char*>(_);
-  EXPECT_TRUE(m.Matches(nullptr));
-}
-
-#endif  // GOOGLEMOCK_TEST_GMOCK_LINK_TEST_H_
diff --git a/3rdparty/googletest-1.13.0/googlemock/test/gmock_output_test.py b/3rdparty/googletest-1.13.0/googlemock/test/gmock_output_test.py
deleted file mode 100644
index d7c3f27e876b661be4c8e204f27544c7839438ab..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googlemock/test/gmock_output_test.py
+++ /dev/null
@@ -1,186 +0,0 @@
-#!/usr/bin/env python
-#
-# Copyright 2008, Google Inc.
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met:
-#
-#     * Redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer.
-#     * Redistributions in binary form must reproduce the above
-# copyright notice, this list of conditions and the following disclaimer
-# in the documentation and/or other materials provided with the
-# distribution.
-#     * Neither the name of Google Inc. nor the names of its
-# contributors may be used to endorse or promote products derived from
-# this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-r"""Tests the text output of Google C++ Mocking Framework.
-
-To update the golden file:
-gmock_output_test.py --build_dir=BUILD/DIR --gengolden
-where BUILD/DIR contains the built gmock_output_test_ file.
-gmock_output_test.py --gengolden
-gmock_output_test.py
-
-"""
-
-from io import open    # pylint: disable=redefined-builtin, g-importing-member
-import os
-import re
-import sys
-from googlemock.test import gmock_test_utils
-
-
-# The flag for generating the golden file
-GENGOLDEN_FLAG = '--gengolden'
-
-PROGRAM_PATH = gmock_test_utils.GetTestExecutablePath('gmock_output_test_')
-COMMAND = [PROGRAM_PATH, '--gtest_stack_trace_depth=0', '--gtest_print_time=0']
-GOLDEN_NAME = 'gmock_output_test_golden.txt'
-GOLDEN_PATH = os.path.join(gmock_test_utils.GetSourceDir(), GOLDEN_NAME)
-
-
-def ToUnixLineEnding(s):
-  """Changes all Windows/Mac line endings in s to UNIX line endings."""
-
-  return s.replace('\r\n', '\n').replace('\r', '\n')
-
-
-def RemoveReportHeaderAndFooter(output):
-  """Removes Google Test result report's header and footer from the output."""
-
-  output = re.sub(r'.*gtest_main.*\n', '', output)
-  output = re.sub(r'\[.*\d+ tests.*\n', '', output)
-  output = re.sub(r'\[.* test environment .*\n', '', output)
-  output = re.sub(r'\[=+\] \d+ tests .* ran.*', '', output)
-  output = re.sub(r'.* FAILED TESTS\n', '', output)
-  return output
-
-
-def RemoveLocations(output):
-  """Removes all file location info from a Google Test program's output.
-
-  Args:
-       output:  the output of a Google Test program.
-
-  Returns:
-       output with all file location info (in the form of
-       'DIRECTORY/FILE_NAME:LINE_NUMBER: 'or
-       'DIRECTORY\\FILE_NAME(LINE_NUMBER): ') replaced by
-       'FILE:#: '.
-  """
-
-  return re.sub(r'.*[/\\](.+)(\:\d+|\(\d+\))\:', 'FILE:#:', output)
-
-
-def NormalizeErrorMarker(output):
-  """Normalizes the error marker, which is different on Windows vs on Linux."""
-
-  return re.sub(r' error: ', ' Failure\n', output)
-
-
-def RemoveMemoryAddresses(output):
-  """Removes memory addresses from the test output."""
-
-  return re.sub(r'@\w+', '@0x#', output)
-
-
-def RemoveTestNamesOfLeakedMocks(output):
-  """Removes the test names of leaked mock objects from the test output."""
-
-  return re.sub(r'\(used in test .+\) ', '', output)
-
-
-def GetLeakyTests(output):
-  """Returns a list of test names that leak mock objects."""
-
-  # findall() returns a list of all matches of the regex in output.
-  # For example, if '(used in test FooTest.Bar)' is in output, the
-  # list will contain 'FooTest.Bar'.
-  return re.findall(r'\(used in test (.+)\)', output)
-
-
-def GetNormalizedOutputAndLeakyTests(output):
-  """Normalizes the output of gmock_output_test_.
-
-  Args:
-    output: The test output.
-
-  Returns:
-    A tuple (the normalized test output, the list of test names that have
-    leaked mocks).
-  """
-
-  output = ToUnixLineEnding(output)
-  output = RemoveReportHeaderAndFooter(output)
-  output = NormalizeErrorMarker(output)
-  output = RemoveLocations(output)
-  output = RemoveMemoryAddresses(output)
-  return (RemoveTestNamesOfLeakedMocks(output), GetLeakyTests(output))
-
-
-def GetShellCommandOutput(cmd):
-  """Runs a command in a sub-process, and returns its STDOUT in a string."""
-
-  return gmock_test_utils.Subprocess(cmd, capture_stderr=False).output
-
-
-def GetNormalizedCommandOutputAndLeakyTests(cmd):
-  """Runs a command and returns its normalized output and a list of leaky tests.
-
-  Args:
-    cmd:  the shell command.
-  """
-
-  # Disables exception pop-ups on Windows.
-  os.environ['GTEST_CATCH_EXCEPTIONS'] = '1'
-  return GetNormalizedOutputAndLeakyTests(GetShellCommandOutput(cmd))
-
-
-class GMockOutputTest(gmock_test_utils.TestCase):
-
-  def testOutput(self):
-    (output, leaky_tests) = GetNormalizedCommandOutputAndLeakyTests(COMMAND)
-    golden_file = open(GOLDEN_PATH, 'rb')
-    golden = golden_file.read().decode('utf-8')
-    golden_file.close()
-    # On Windows the repository might have been checked out with \r\n line
-    # endings, so normalize it here.
-    golden = ToUnixLineEnding(golden)
-
-    # The normalized output should match the golden file.
-    self.assertEqual(golden, output)
-
-    # The raw output should contain 2 leaked mock object errors for
-    # test GMockOutputTest.CatchesLeakedMocks.
-    self.assertEqual(['GMockOutputTest.CatchesLeakedMocks',
-                      'GMockOutputTest.CatchesLeakedMocks'],
-                     leaky_tests)
-
-
-if __name__ == '__main__':
-  if sys.argv[1:] == [GENGOLDEN_FLAG]:
-    (output, _) = GetNormalizedCommandOutputAndLeakyTests(COMMAND)
-    golden_file = open(GOLDEN_PATH, 'wb')
-    golden_file.write(output)
-    golden_file.close()
-    # Suppress the error "googletest was imported but a call to its main()
-    # was never detected."
-    os._exit(0)
-  else:
-    gmock_test_utils.Main()
diff --git a/3rdparty/googletest-1.13.0/googlemock/test/gmock_output_test_.cc b/3rdparty/googletest-1.13.0/googlemock/test/gmock_output_test_.cc
deleted file mode 100644
index a178691591bc1ef8117ee81a5d418d539f48df20..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googlemock/test/gmock_output_test_.cc
+++ /dev/null
@@ -1,291 +0,0 @@
-// Copyright 2008, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// Tests Google Mock's output in various scenarios.  This ensures that
-// Google Mock's messages are readable and useful.
-
-#include <stdio.h>
-
-#include <string>
-
-#include "gmock/gmock.h"
-#include "gtest/gtest.h"
-
-// Silence C4100 (unreferenced formal parameter)
-#ifdef _MSC_VER
-#pragma warning(push)
-#pragma warning(disable : 4100)
-#endif
-
-using testing::_;
-using testing::AnyNumber;
-using testing::Ge;
-using testing::InSequence;
-using testing::NaggyMock;
-using testing::Ref;
-using testing::Return;
-using testing::Sequence;
-using testing::Value;
-
-class MockFoo {
- public:
-  MockFoo() {}
-
-  MOCK_METHOD3(Bar, char(const std::string& s, int i, double x));
-  MOCK_METHOD2(Bar2, bool(int x, int y));
-  MOCK_METHOD2(Bar3, void(int x, int y));
-
- private:
-  MockFoo(const MockFoo&) = delete;
-  MockFoo& operator=(const MockFoo&) = delete;
-};
-
-class GMockOutputTest : public testing::Test {
- protected:
-  NaggyMock<MockFoo> foo_;
-};
-
-TEST_F(GMockOutputTest, ExpectedCall) {
-  GMOCK_FLAG_SET(verbose, "info");
-
-  EXPECT_CALL(foo_, Bar2(0, _));
-  foo_.Bar2(0, 0);  // Expected call
-
-  GMOCK_FLAG_SET(verbose, "warning");
-}
-
-TEST_F(GMockOutputTest, ExpectedCallToVoidFunction) {
-  GMOCK_FLAG_SET(verbose, "info");
-
-  EXPECT_CALL(foo_, Bar3(0, _));
-  foo_.Bar3(0, 0);  // Expected call
-
-  GMOCK_FLAG_SET(verbose, "warning");
-}
-
-TEST_F(GMockOutputTest, ExplicitActionsRunOut) {
-  EXPECT_CALL(foo_, Bar2(_, _)).Times(2).WillOnce(Return(false));
-  foo_.Bar2(2, 2);
-  foo_.Bar2(1, 1);  // Explicit actions in EXPECT_CALL run out.
-}
-
-TEST_F(GMockOutputTest, UnexpectedCall) {
-  EXPECT_CALL(foo_, Bar2(0, _));
-
-  foo_.Bar2(1, 0);  // Unexpected call
-  foo_.Bar2(0, 0);  // Expected call
-}
-
-TEST_F(GMockOutputTest, UnexpectedCallToVoidFunction) {
-  EXPECT_CALL(foo_, Bar3(0, _));
-
-  foo_.Bar3(1, 0);  // Unexpected call
-  foo_.Bar3(0, 0);  // Expected call
-}
-
-TEST_F(GMockOutputTest, ExcessiveCall) {
-  EXPECT_CALL(foo_, Bar2(0, _));
-
-  foo_.Bar2(0, 0);  // Expected call
-  foo_.Bar2(0, 1);  // Excessive call
-}
-
-TEST_F(GMockOutputTest, ExcessiveCallToVoidFunction) {
-  EXPECT_CALL(foo_, Bar3(0, _));
-
-  foo_.Bar3(0, 0);  // Expected call
-  foo_.Bar3(0, 1);  // Excessive call
-}
-
-TEST_F(GMockOutputTest, UninterestingCall) {
-  foo_.Bar2(0, 1);  // Uninteresting call
-}
-
-TEST_F(GMockOutputTest, UninterestingCallToVoidFunction) {
-  foo_.Bar3(0, 1);  // Uninteresting call
-}
-
-TEST_F(GMockOutputTest, RetiredExpectation) {
-  EXPECT_CALL(foo_, Bar2(_, _)).RetiresOnSaturation();
-  EXPECT_CALL(foo_, Bar2(0, 0));
-
-  foo_.Bar2(1, 1);
-  foo_.Bar2(1, 1);  // Matches a retired expectation
-  foo_.Bar2(0, 0);
-}
-
-TEST_F(GMockOutputTest, UnsatisfiedPrerequisite) {
-  {
-    InSequence s;
-    EXPECT_CALL(foo_, Bar(_, 0, _));
-    EXPECT_CALL(foo_, Bar2(0, 0));
-    EXPECT_CALL(foo_, Bar2(1, _));
-  }
-
-  foo_.Bar2(1, 0);  // Has one immediate unsatisfied pre-requisite
-  foo_.Bar("Hi", 0, 0);
-  foo_.Bar2(0, 0);
-  foo_.Bar2(1, 0);
-}
-
-TEST_F(GMockOutputTest, UnsatisfiedPrerequisites) {
-  Sequence s1, s2;
-
-  EXPECT_CALL(foo_, Bar(_, 0, _)).InSequence(s1);
-  EXPECT_CALL(foo_, Bar2(0, 0)).InSequence(s2);
-  EXPECT_CALL(foo_, Bar2(1, _)).InSequence(s1, s2);
-
-  foo_.Bar2(1, 0);  // Has two immediate unsatisfied pre-requisites
-  foo_.Bar("Hi", 0, 0);
-  foo_.Bar2(0, 0);
-  foo_.Bar2(1, 0);
-}
-
-TEST_F(GMockOutputTest, UnsatisfiedWith) {
-  EXPECT_CALL(foo_, Bar2(_, _)).With(Ge());
-}
-
-TEST_F(GMockOutputTest, UnsatisfiedExpectation) {
-  EXPECT_CALL(foo_, Bar(_, _, _));
-  EXPECT_CALL(foo_, Bar2(0, _)).Times(2);
-
-  foo_.Bar2(0, 1);
-}
-
-TEST_F(GMockOutputTest, MismatchArguments) {
-  const std::string s = "Hi";
-  EXPECT_CALL(foo_, Bar(Ref(s), _, Ge(0)));
-
-  foo_.Bar("Ho", 0, -0.1);  // Mismatch arguments
-  foo_.Bar(s, 0, 0);
-}
-
-TEST_F(GMockOutputTest, MismatchWith) {
-  EXPECT_CALL(foo_, Bar2(Ge(2), Ge(1))).With(Ge());
-
-  foo_.Bar2(2, 3);  // Mismatch With()
-  foo_.Bar2(2, 1);
-}
-
-TEST_F(GMockOutputTest, MismatchArgumentsAndWith) {
-  EXPECT_CALL(foo_, Bar2(Ge(2), Ge(1))).With(Ge());
-
-  foo_.Bar2(1, 3);  // Mismatch arguments and mismatch With()
-  foo_.Bar2(2, 1);
-}
-
-TEST_F(GMockOutputTest, UnexpectedCallWithDefaultAction) {
-  ON_CALL(foo_, Bar2(_, _)).WillByDefault(Return(true));   // Default action #1
-  ON_CALL(foo_, Bar2(1, _)).WillByDefault(Return(false));  // Default action #2
-
-  EXPECT_CALL(foo_, Bar2(2, 2));
-  foo_.Bar2(1, 0);  // Unexpected call, takes default action #2.
-  foo_.Bar2(0, 0);  // Unexpected call, takes default action #1.
-  foo_.Bar2(2, 2);  // Expected call.
-}
-
-TEST_F(GMockOutputTest, ExcessiveCallWithDefaultAction) {
-  ON_CALL(foo_, Bar2(_, _)).WillByDefault(Return(true));   // Default action #1
-  ON_CALL(foo_, Bar2(1, _)).WillByDefault(Return(false));  // Default action #2
-
-  EXPECT_CALL(foo_, Bar2(2, 2));
-  EXPECT_CALL(foo_, Bar2(1, 1));
-
-  foo_.Bar2(2, 2);  // Expected call.
-  foo_.Bar2(2, 2);  // Excessive call, takes default action #1.
-  foo_.Bar2(1, 1);  // Expected call.
-  foo_.Bar2(1, 1);  // Excessive call, takes default action #2.
-}
-
-TEST_F(GMockOutputTest, UninterestingCallWithDefaultAction) {
-  ON_CALL(foo_, Bar2(_, _)).WillByDefault(Return(true));   // Default action #1
-  ON_CALL(foo_, Bar2(1, _)).WillByDefault(Return(false));  // Default action #2
-
-  foo_.Bar2(2, 2);  // Uninteresting call, takes default action #1.
-  foo_.Bar2(1, 1);  // Uninteresting call, takes default action #2.
-}
-
-TEST_F(GMockOutputTest, ExplicitActionsRunOutWithDefaultAction) {
-  ON_CALL(foo_, Bar2(_, _)).WillByDefault(Return(true));  // Default action #1
-
-  EXPECT_CALL(foo_, Bar2(_, _)).Times(2).WillOnce(Return(false));
-  foo_.Bar2(2, 2);
-  foo_.Bar2(1, 1);  // Explicit actions in EXPECT_CALL run out.
-}
-
-TEST_F(GMockOutputTest, CatchesLeakedMocks) {
-  MockFoo* foo1 = new MockFoo;
-  MockFoo* foo2 = new MockFoo;
-
-  // Invokes ON_CALL on foo1.
-  ON_CALL(*foo1, Bar(_, _, _)).WillByDefault(Return('a'));
-
-  // Invokes EXPECT_CALL on foo2.
-  EXPECT_CALL(*foo2, Bar2(_, _));
-  EXPECT_CALL(*foo2, Bar2(1, _));
-  EXPECT_CALL(*foo2, Bar3(_, _)).Times(AnyNumber());
-  foo2->Bar2(2, 1);
-  foo2->Bar2(1, 1);
-
-  // Both foo1 and foo2 are deliberately leaked.
-}
-
-MATCHER_P2(IsPair, first, second, "") {
-  return Value(arg.first, first) && Value(arg.second, second);
-}
-
-TEST_F(GMockOutputTest, PrintsMatcher) {
-  const testing::Matcher<int> m1 = Ge(48);
-  EXPECT_THAT((std::pair<int, bool>(42, true)), IsPair(m1, true));
-}
-
-void TestCatchesLeakedMocksInAdHocTests() {
-  MockFoo* foo = new MockFoo;
-
-  // Invokes EXPECT_CALL on foo.
-  EXPECT_CALL(*foo, Bar2(_, _));
-  foo->Bar2(2, 1);
-
-  // foo is deliberately leaked.
-}
-
-int main(int argc, char** argv) {
-  testing::InitGoogleMock(&argc, argv);
-  // Ensures that the tests pass no matter what value of
-  // --gmock_catch_leaked_mocks and --gmock_verbose the user specifies.
-  GMOCK_FLAG_SET(catch_leaked_mocks, true);
-  GMOCK_FLAG_SET(verbose, "warning");
-
-  TestCatchesLeakedMocksInAdHocTests();
-  return RUN_ALL_TESTS();
-}
-
-#ifdef _MSC_VER
-#pragma warning(pop)
-#endif
diff --git a/3rdparty/googletest-1.13.0/googlemock/test/gmock_stress_test.cc b/3rdparty/googletest-1.13.0/googlemock/test/gmock_stress_test.cc
deleted file mode 100644
index 9e42cd935847c76914abf2ebd36f6e696ffc72b6..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googlemock/test/gmock_stress_test.cc
+++ /dev/null
@@ -1,227 +0,0 @@
-// Copyright 2007, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// Tests that Google Mock constructs can be used in a large number of
-// threads concurrently.
-
-#include "gmock/gmock.h"
-#include "gtest/gtest.h"
-
-namespace testing {
-namespace {
-
-// From gtest-port.h.
-using ::testing::internal::ThreadWithParam;
-
-// The maximum number of test threads (not including helper threads)
-// to create.
-const int kMaxTestThreads = 50;
-
-// How many times to repeat a task in a test thread.
-const int kRepeat = 50;
-
-class MockFoo {
- public:
-  MOCK_METHOD1(Bar, int(int n));                                   // NOLINT
-  MOCK_METHOD2(Baz, char(const char* s1, const std::string& s2));  // NOLINT
-};
-
-// Helper for waiting for the given thread to finish and then deleting it.
-template <typename T>
-void JoinAndDelete(ThreadWithParam<T>* t) {
-  t->Join();
-  delete t;
-}
-
-struct Dummy {};
-
-// Tests that different mock objects can be used in their respective
-// threads.  This should generate no Google Test failure.
-void TestConcurrentMockObjects(Dummy /* dummy */) {
-  // Creates a mock and does some typical operations on it.
-  MockFoo foo;
-  ON_CALL(foo, Bar(_)).WillByDefault(Return(1));
-  ON_CALL(foo, Baz(_, _)).WillByDefault(Return('b'));
-  ON_CALL(foo, Baz(_, "you")).WillByDefault(Return('a'));
-
-  EXPECT_CALL(foo, Bar(0)).Times(AtMost(3));
-  EXPECT_CALL(foo, Baz(_, _));
-  EXPECT_CALL(foo, Baz("hi", "you"))
-      .WillOnce(Return('z'))
-      .WillRepeatedly(DoDefault());
-
-  EXPECT_EQ(1, foo.Bar(0));
-  EXPECT_EQ(1, foo.Bar(0));
-  EXPECT_EQ('z', foo.Baz("hi", "you"));
-  EXPECT_EQ('a', foo.Baz("hi", "you"));
-  EXPECT_EQ('b', foo.Baz("hi", "me"));
-}
-
-// Tests invoking methods of the same mock object in multiple threads.
-
-struct Helper1Param {
-  MockFoo* mock_foo;
-  int* count;
-};
-
-void Helper1(Helper1Param param) {
-  for (int i = 0; i < kRepeat; i++) {
-    const char ch = param.mock_foo->Baz("a", "b");
-    if (ch == 'a') {
-      // It was an expected call.
-      (*param.count)++;
-    } else {
-      // It was an excessive call.
-      EXPECT_EQ('\0', ch);
-    }
-
-    // An unexpected call.
-    EXPECT_EQ('\0', param.mock_foo->Baz("x", "y")) << "Expected failure.";
-
-    // An uninteresting call.
-    EXPECT_EQ(1, param.mock_foo->Bar(5));
-  }
-}
-
-// This should generate 3*kRepeat + 1 failures in total.
-void TestConcurrentCallsOnSameObject(Dummy /* dummy */) {
-  MockFoo foo;
-
-  ON_CALL(foo, Bar(_)).WillByDefault(Return(1));
-  EXPECT_CALL(foo, Baz(_, "b")).Times(kRepeat).WillRepeatedly(Return('a'));
-  EXPECT_CALL(foo, Baz(_, "c"));  // Expected to be unsatisfied.
-
-  // This chunk of code should generate kRepeat failures about
-  // excessive calls, and 2*kRepeat failures about unexpected calls.
-  int count1 = 0;
-  const Helper1Param param = {&foo, &count1};
-  ThreadWithParam<Helper1Param>* const t =
-      new ThreadWithParam<Helper1Param>(Helper1, param, nullptr);
-
-  int count2 = 0;
-  const Helper1Param param2 = {&foo, &count2};
-  Helper1(param2);
-  JoinAndDelete(t);
-
-  EXPECT_EQ(kRepeat, count1 + count2);
-
-  // foo's destructor should generate one failure about unsatisfied
-  // expectation.
-}
-
-// Tests using the same mock object in multiple threads when the
-// expectations are partially ordered.
-
-void Helper2(MockFoo* foo) {
-  for (int i = 0; i < kRepeat; i++) {
-    foo->Bar(2);
-    foo->Bar(3);
-  }
-}
-
-// This should generate no Google Test failures.
-void TestPartiallyOrderedExpectationsWithThreads(Dummy /* dummy */) {
-  MockFoo foo;
-  Sequence s1, s2;
-
-  {
-    InSequence dummy;
-    EXPECT_CALL(foo, Bar(0));
-    EXPECT_CALL(foo, Bar(1)).InSequence(s1, s2);
-  }
-
-  EXPECT_CALL(foo, Bar(2))
-      .Times(2 * kRepeat)
-      .InSequence(s1)
-      .RetiresOnSaturation();
-  EXPECT_CALL(foo, Bar(3)).Times(2 * kRepeat).InSequence(s2);
-
-  {
-    InSequence dummy;
-    EXPECT_CALL(foo, Bar(2)).InSequence(s1, s2);
-    EXPECT_CALL(foo, Bar(4));
-  }
-
-  foo.Bar(0);
-  foo.Bar(1);
-
-  ThreadWithParam<MockFoo*>* const t =
-      new ThreadWithParam<MockFoo*>(Helper2, &foo, nullptr);
-  Helper2(&foo);
-  JoinAndDelete(t);
-
-  foo.Bar(2);
-  foo.Bar(4);
-}
-
-// Tests using Google Mock constructs in many threads concurrently.
-TEST(StressTest, CanUseGMockWithThreads) {
-  void (*test_routines[])(Dummy dummy) = {
-      &TestConcurrentMockObjects,
-      &TestConcurrentCallsOnSameObject,
-      &TestPartiallyOrderedExpectationsWithThreads,
-  };
-
-  const int kRoutines = sizeof(test_routines) / sizeof(test_routines[0]);
-  const int kCopiesOfEachRoutine = kMaxTestThreads / kRoutines;
-  const int kTestThreads = kCopiesOfEachRoutine * kRoutines;
-  ThreadWithParam<Dummy>* threads[kTestThreads] = {};
-  for (int i = 0; i < kTestThreads; i++) {
-    // Creates a thread to run the test function.
-    threads[i] = new ThreadWithParam<Dummy>(test_routines[i % kRoutines],
-                                            Dummy(), nullptr);
-    GTEST_LOG_(INFO) << "Thread #" << i << " running . . .";
-  }
-
-  // At this point, we have many threads running.
-  for (int i = 0; i < kTestThreads; i++) {
-    JoinAndDelete(threads[i]);
-  }
-
-  // Ensures that the correct number of failures have been reported.
-  const TestInfo* const info = UnitTest::GetInstance()->current_test_info();
-  const TestResult& result = *info->result();
-  const int kExpectedFailures = (3 * kRepeat + 1) * kCopiesOfEachRoutine;
-  GTEST_CHECK_(kExpectedFailures == result.total_part_count())
-      << "Expected " << kExpectedFailures << " failures, but got "
-      << result.total_part_count();
-}
-
-}  // namespace
-}  // namespace testing
-
-int main(int argc, char** argv) {
-  testing::InitGoogleMock(&argc, argv);
-
-  const int exit_code = RUN_ALL_TESTS();  // Expected to fail.
-  GTEST_CHECK_(exit_code != 0) << "RUN_ALL_TESTS() did not fail as expected";
-
-  printf("\nPASS\n");
-  return 0;
-}
diff --git a/3rdparty/googletest-1.13.0/googlemock/test/gmock_test.cc b/3rdparty/googletest-1.13.0/googlemock/test/gmock_test.cc
deleted file mode 100644
index 8f1bd5d03eb89b8149cf574a95d52cd0e25bac3d..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googlemock/test/gmock_test.cc
+++ /dev/null
@@ -1,179 +0,0 @@
-// Copyright 2008, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// Google Mock - a framework for writing C++ mock classes.
-//
-// This file tests code in gmock.cc.
-
-#include "gmock/gmock.h"
-
-#include <string>
-
-#include "gtest/gtest.h"
-#include "gtest/internal/custom/gtest.h"
-
-#if !defined(GTEST_CUSTOM_INIT_GOOGLE_TEST_FUNCTION_)
-
-using testing::InitGoogleMock;
-
-// Verifies that calling InitGoogleMock() on argv results in new_argv,
-// and the gmock_verbose flag's value is set to expected_gmock_verbose.
-template <typename Char, int M, int N>
-void TestInitGoogleMock(const Char* (&argv)[M], const Char* (&new_argv)[N],
-                        const ::std::string& expected_gmock_verbose) {
-  const ::std::string old_verbose = GMOCK_FLAG_GET(verbose);
-
-  int argc = M - 1;
-  InitGoogleMock(&argc, const_cast<Char**>(argv));
-  ASSERT_EQ(N - 1, argc) << "The new argv has wrong number of elements.";
-
-  for (int i = 0; i < N; i++) {
-    EXPECT_STREQ(new_argv[i], argv[i]);
-  }
-
-  EXPECT_EQ(expected_gmock_verbose, GMOCK_FLAG_GET(verbose));
-  GMOCK_FLAG_SET(verbose, old_verbose);  // Restores the gmock_verbose flag.
-}
-
-TEST(InitGoogleMockTest, ParsesInvalidCommandLine) {
-  const char* argv[] = {nullptr};
-
-  const char* new_argv[] = {nullptr};
-
-  TestInitGoogleMock(argv, new_argv, GMOCK_FLAG_GET(verbose));
-}
-
-TEST(InitGoogleMockTest, ParsesEmptyCommandLine) {
-  const char* argv[] = {"foo.exe", nullptr};
-
-  const char* new_argv[] = {"foo.exe", nullptr};
-
-  TestInitGoogleMock(argv, new_argv, GMOCK_FLAG_GET(verbose));
-}
-
-TEST(InitGoogleMockTest, ParsesSingleFlag) {
-  const char* argv[] = {"foo.exe", "--gmock_verbose=info", nullptr};
-
-  const char* new_argv[] = {"foo.exe", nullptr};
-
-  TestInitGoogleMock(argv, new_argv, "info");
-}
-
-TEST(InitGoogleMockTest, ParsesMultipleFlags) {
-  int old_default_behavior = GMOCK_FLAG_GET(default_mock_behavior);
-  const wchar_t* argv[] = {L"foo.exe", L"--gmock_verbose=info",
-                           L"--gmock_default_mock_behavior=2", nullptr};
-
-  const wchar_t* new_argv[] = {L"foo.exe", nullptr};
-
-  TestInitGoogleMock(argv, new_argv, "info");
-  EXPECT_EQ(2, GMOCK_FLAG_GET(default_mock_behavior));
-  EXPECT_NE(2, old_default_behavior);
-  GMOCK_FLAG_SET(default_mock_behavior, old_default_behavior);
-}
-
-TEST(InitGoogleMockTest, ParsesUnrecognizedFlag) {
-  const char* argv[] = {"foo.exe", "--non_gmock_flag=blah", nullptr};
-
-  const char* new_argv[] = {"foo.exe", "--non_gmock_flag=blah", nullptr};
-
-  TestInitGoogleMock(argv, new_argv, GMOCK_FLAG_GET(verbose));
-}
-
-TEST(InitGoogleMockTest, ParsesGoogleMockFlagAndUnrecognizedFlag) {
-  const char* argv[] = {"foo.exe", "--non_gmock_flag=blah",
-                        "--gmock_verbose=error", nullptr};
-
-  const char* new_argv[] = {"foo.exe", "--non_gmock_flag=blah", nullptr};
-
-  TestInitGoogleMock(argv, new_argv, "error");
-}
-
-TEST(WideInitGoogleMockTest, ParsesInvalidCommandLine) {
-  const wchar_t* argv[] = {nullptr};
-
-  const wchar_t* new_argv[] = {nullptr};
-
-  TestInitGoogleMock(argv, new_argv, GMOCK_FLAG_GET(verbose));
-}
-
-TEST(WideInitGoogleMockTest, ParsesEmptyCommandLine) {
-  const wchar_t* argv[] = {L"foo.exe", nullptr};
-
-  const wchar_t* new_argv[] = {L"foo.exe", nullptr};
-
-  TestInitGoogleMock(argv, new_argv, GMOCK_FLAG_GET(verbose));
-}
-
-TEST(WideInitGoogleMockTest, ParsesSingleFlag) {
-  const wchar_t* argv[] = {L"foo.exe", L"--gmock_verbose=info", nullptr};
-
-  const wchar_t* new_argv[] = {L"foo.exe", nullptr};
-
-  TestInitGoogleMock(argv, new_argv, "info");
-}
-
-TEST(WideInitGoogleMockTest, ParsesMultipleFlags) {
-  int old_default_behavior = GMOCK_FLAG_GET(default_mock_behavior);
-  const wchar_t* argv[] = {L"foo.exe", L"--gmock_verbose=info",
-                           L"--gmock_default_mock_behavior=2", nullptr};
-
-  const wchar_t* new_argv[] = {L"foo.exe", nullptr};
-
-  TestInitGoogleMock(argv, new_argv, "info");
-  EXPECT_EQ(2, GMOCK_FLAG_GET(default_mock_behavior));
-  EXPECT_NE(2, old_default_behavior);
-  GMOCK_FLAG_SET(default_mock_behavior, old_default_behavior);
-}
-
-TEST(WideInitGoogleMockTest, ParsesUnrecognizedFlag) {
-  const wchar_t* argv[] = {L"foo.exe", L"--non_gmock_flag=blah", nullptr};
-
-  const wchar_t* new_argv[] = {L"foo.exe", L"--non_gmock_flag=blah", nullptr};
-
-  TestInitGoogleMock(argv, new_argv, GMOCK_FLAG_GET(verbose));
-}
-
-TEST(WideInitGoogleMockTest, ParsesGoogleMockFlagAndUnrecognizedFlag) {
-  const wchar_t* argv[] = {L"foo.exe", L"--non_gmock_flag=blah",
-                           L"--gmock_verbose=error", nullptr};
-
-  const wchar_t* new_argv[] = {L"foo.exe", L"--non_gmock_flag=blah", nullptr};
-
-  TestInitGoogleMock(argv, new_argv, "error");
-}
-
-#endif  // !defined(GTEST_CUSTOM_INIT_GOOGLE_TEST_FUNCTION_)
-
-// Makes sure Google Mock flags can be accessed in code.
-TEST(FlagTest, IsAccessibleInCode) {
-  bool dummy =
-      GMOCK_FLAG_GET(catch_leaked_mocks) && GMOCK_FLAG_GET(verbose) == "";
-  (void)dummy;  // Avoids the "unused local variable" warning.
-}
diff --git a/3rdparty/googletest-1.13.0/googlemock/test/gmock_test_utils.py b/3rdparty/googletest-1.13.0/googlemock/test/gmock_test_utils.py
deleted file mode 100644
index d7bc0974a7095b706041ba91f5f7d6e27fe89e62..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googlemock/test/gmock_test_utils.py
+++ /dev/null
@@ -1,96 +0,0 @@
-# Copyright 2006, Google Inc.
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met:
-#
-#     * Redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer.
-#     * Redistributions in binary form must reproduce the above
-# copyright notice, this list of conditions and the following disclaimer
-# in the documentation and/or other materials provided with the
-# distribution.
-#     * Neither the name of Google Inc. nor the names of its
-# contributors may be used to endorse or promote products derived from
-# this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-"""Unit test utilities for Google C++ Mocking Framework."""
-
-import os
-
-# pylint: disable=C6204
-from googletest.test import gtest_test_utils
-
-
-def GetSourceDir():
-  """Returns the absolute path of the directory where the .py files are."""
-
-  return gtest_test_utils.GetSourceDir()
-
-
-def GetTestExecutablePath(executable_name):
-  """Returns the absolute path of the test binary given its name.
-
-  The function will print a message and abort the program if the resulting file
-  doesn't exist.
-
-  Args:
-    executable_name: name of the test binary that the test script runs.
-
-  Returns:
-    The absolute path of the test binary.
-  """
-
-  return gtest_test_utils.GetTestExecutablePath(executable_name)
-
-
-def GetExitStatus(exit_code):
-  """Returns the argument to exit(), or -1 if exit() wasn't called.
-
-  Args:
-    exit_code: the result value of os.system(command).
-  """
-
-  if os.name == 'nt':
-    # On Windows, os.WEXITSTATUS() doesn't work and os.system() returns
-    # the argument to exit() directly.
-    return exit_code
-  else:
-    # On Unix, os.WEXITSTATUS() must be used to extract the exit status
-    # from the result of os.system().
-    if os.WIFEXITED(exit_code):
-      return os.WEXITSTATUS(exit_code)
-    else:
-      return -1
-
-
-# Suppresses the "Invalid const name" lint complaint
-# pylint: disable-msg=C6409
-
-# Exposes utilities from gtest_test_utils.
-Subprocess = gtest_test_utils.Subprocess
-TestCase = gtest_test_utils.TestCase
-environ = gtest_test_utils.environ
-SetEnvVar = gtest_test_utils.SetEnvVar
-PREMATURE_EXIT_FILE_ENV_VAR = gtest_test_utils.PREMATURE_EXIT_FILE_ENV_VAR
-
-# pylint: enable-msg=C6409
-
-
-def Main():
-  """Runs the unit test."""
-
-  gtest_test_utils.Main()
diff --git a/3rdparty/googletest-1.13.0/googletest/README.md b/3rdparty/googletest-1.13.0/googletest/README.md
deleted file mode 100644
index 9bdbcec04c985e9b5b28858c08255b3020b40ae4..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/README.md
+++ /dev/null
@@ -1,217 +0,0 @@
-### Generic Build Instructions
-
-#### Setup
-
-To build GoogleTest and your tests that use it, you need to tell your build
-system where to find its headers and source files. The exact way to do it
-depends on which build system you use, and is usually straightforward.
-
-### Build with CMake
-
-GoogleTest comes with a CMake build script
-([CMakeLists.txt](https://github.com/google/googletest/blob/main/CMakeLists.txt))
-that can be used on a wide range of platforms ("C" stands for cross-platform.).
-If you don't have CMake installed already, you can download it for free from
-<http://www.cmake.org/>.
-
-CMake works by generating native makefiles or build projects that can be used in
-the compiler environment of your choice. You can either build GoogleTest as a
-standalone project or it can be incorporated into an existing CMake build for
-another project.
-
-#### Standalone CMake Project
-
-When building GoogleTest as a standalone project, the typical workflow starts
-with
-
-```
-git clone https://github.com/google/googletest.git -b release-1.12.1
-cd googletest        # Main directory of the cloned repository.
-mkdir build          # Create a directory to hold the build output.
-cd build
-cmake ..             # Generate native build scripts for GoogleTest.
-```
-
-The above command also includes GoogleMock by default. And so, if you want to
-build only GoogleTest, you should replace the last command with
-
-```
-cmake .. -DBUILD_GMOCK=OFF
-```
-
-If you are on a \*nix system, you should now see a Makefile in the current
-directory. Just type `make` to build GoogleTest. And then you can simply install
-GoogleTest if you are a system administrator.
-
-```
-make
-sudo make install    # Install in /usr/local/ by default
-```
-
-If you use Windows and have Visual Studio installed, a `gtest.sln` file and
-several `.vcproj` files will be created. You can then build them using Visual
-Studio.
-
-On Mac OS X with Xcode installed, a `.xcodeproj` file will be generated.
-
-#### Incorporating Into An Existing CMake Project
-
-If you want to use GoogleTest in a project which already uses CMake, the easiest
-way is to get installed libraries and headers.
-
-*   Import GoogleTest by using `find_package` (or `pkg_check_modules`). For
-    example, if `find_package(GTest CONFIG REQUIRED)` succeeds, you can use the
-    libraries as `GTest::gtest`, `GTest::gmock`.
-
-And a more robust and flexible approach is to build GoogleTest as part of that
-project directly. This is done by making the GoogleTest source code available to
-the main build and adding it using CMake's `add_subdirectory()` command. This
-has the significant advantage that the same compiler and linker settings are
-used between GoogleTest and the rest of your project, so issues associated with
-using incompatible libraries (eg debug/release), etc. are avoided. This is
-particularly useful on Windows. Making GoogleTest's source code available to the
-main build can be done a few different ways:
-
-*   Download the GoogleTest source code manually and place it at a known
-    location. This is the least flexible approach and can make it more difficult
-    to use with continuous integration systems, etc.
-*   Embed the GoogleTest source code as a direct copy in the main project's
-    source tree. This is often the simplest approach, but is also the hardest to
-    keep up to date. Some organizations may not permit this method.
-*   Add GoogleTest as a git submodule or equivalent. This may not always be
-    possible or appropriate. Git submodules, for example, have their own set of
-    advantages and drawbacks.
-*   Use CMake to download GoogleTest as part of the build's configure step. This
-    approach doesn't have the limitations of the other methods.
-
-The last of the above methods is implemented with a small piece of CMake code
-that downloads and pulls the GoogleTest code into the main build.
-
-Just add to your `CMakeLists.txt`:
-
-```cmake
-include(FetchContent)
-FetchContent_Declare(
-  googletest
-  # Specify the commit you depend on and update it regularly.
-  URL https://github.com/google/googletest/archive/5376968f6948923e2411081fd9372e71a59d8e77.zip
-)
-# For Windows: Prevent overriding the parent project's compiler/linker settings
-set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
-FetchContent_MakeAvailable(googletest)
-
-# Now simply link against gtest or gtest_main as needed. Eg
-add_executable(example example.cpp)
-target_link_libraries(example gtest_main)
-add_test(NAME example_test COMMAND example)
-```
-
-Note that this approach requires CMake 3.14 or later due to its use of the
-`FetchContent_MakeAvailable()` command.
-
-##### Visual Studio Dynamic vs Static Runtimes
-
-By default, new Visual Studio projects link the C runtimes dynamically but
-GoogleTest links them statically. This will generate an error that looks
-something like the following: gtest.lib(gtest-all.obj) : error LNK2038: mismatch
-detected for 'RuntimeLibrary': value 'MTd_StaticDebug' doesn't match value
-'MDd_DynamicDebug' in main.obj
-
-GoogleTest already has a CMake option for this: `gtest_force_shared_crt`
-
-Enabling this option will make gtest link the runtimes dynamically too, and
-match the project in which it is included.
-
-#### C++ Standard Version
-
-An environment that supports C++11 is required in order to successfully build
-GoogleTest. One way to ensure this is to specify the standard in the top-level
-project, for example by using the `set(CMAKE_CXX_STANDARD 11)` command. If this
-is not feasible, for example in a C project using GoogleTest for validation,
-then it can be specified by adding it to the options for cmake via the
-`DCMAKE_CXX_FLAGS` option.
-
-### Tweaking GoogleTest
-
-GoogleTest can be used in diverse environments. The default configuration may
-not work (or may not work well) out of the box in some environments. However,
-you can easily tweak GoogleTest by defining control macros on the compiler
-command line. Generally, these macros are named like `GTEST_XYZ` and you define
-them to either 1 or 0 to enable or disable a certain feature.
-
-We list the most frequently used macros below. For a complete list, see file
-[include/gtest/internal/gtest-port.h](https://github.com/google/googletest/blob/main/googletest/include/gtest/internal/gtest-port.h).
-
-### Multi-threaded Tests
-
-GoogleTest is thread-safe where the pthread library is available. After
-`#include "gtest/gtest.h"`, you can check the
-`GTEST_IS_THREADSAFE` macro to see whether this is the case (yes if the macro is
-`#defined` to 1, no if it's undefined.).
-
-If GoogleTest doesn't correctly detect whether pthread is available in your
-environment, you can force it with
-
-    -DGTEST_HAS_PTHREAD=1
-
-or
-
-    -DGTEST_HAS_PTHREAD=0
-
-When GoogleTest uses pthread, you may need to add flags to your compiler and/or
-linker to select the pthread library, or you'll get link errors. If you use the
-CMake script, this is taken care of for you. If you use your own build script,
-you'll need to read your compiler and linker's manual to figure out what flags
-to add.
-
-### As a Shared Library (DLL)
-
-GoogleTest is compact, so most users can build and link it as a static library
-for the simplicity. You can choose to use GoogleTest as a shared library (known
-as a DLL on Windows) if you prefer.
-
-To compile *gtest* as a shared library, add
-
-    -DGTEST_CREATE_SHARED_LIBRARY=1
-
-to the compiler flags. You'll also need to tell the linker to produce a shared
-library instead - consult your linker's manual for how to do it.
-
-To compile your *tests* that use the gtest shared library, add
-
-    -DGTEST_LINKED_AS_SHARED_LIBRARY=1
-
-to the compiler flags.
-
-Note: while the above steps aren't technically necessary today when using some
-compilers (e.g. GCC), they may become necessary in the future, if we decide to
-improve the speed of loading the library (see
-<https://gcc.gnu.org/wiki/Visibility> for details). Therefore you are
-recommended to always add the above flags when using GoogleTest as a shared
-library. Otherwise a future release of GoogleTest may break your build script.
-
-### Avoiding Macro Name Clashes
-
-In C++, macros don't obey namespaces. Therefore two libraries that both define a
-macro of the same name will clash if you `#include` both definitions. In case a
-GoogleTest macro clashes with another library, you can force GoogleTest to
-rename its macro to avoid the conflict.
-
-Specifically, if both GoogleTest and some other code define macro FOO, you can
-add
-
-    -DGTEST_DONT_DEFINE_FOO=1
-
-to the compiler flags to tell GoogleTest to change the macro's name from `FOO`
-to `GTEST_FOO`. Currently `FOO` can be `ASSERT_EQ`, `ASSERT_FALSE`, `ASSERT_GE`,
-`ASSERT_GT`, `ASSERT_LE`, `ASSERT_LT`, `ASSERT_NE`, `ASSERT_TRUE`,
-`EXPECT_FALSE`, `EXPECT_TRUE`, `FAIL`, `SUCCEED`, `TEST`, or `TEST_F`. For
-example, with `-DGTEST_DONT_DEFINE_TEST=1`, you'll need to write
-
-    GTEST_TEST(SomeTest, DoesThis) { ... }
-
-instead of
-
-    TEST(SomeTest, DoesThis) { ... }
-
-in order to define a test.
diff --git a/3rdparty/googletest-1.13.0/googletest/cmake/Config.cmake.in b/3rdparty/googletest-1.13.0/googletest/cmake/Config.cmake.in
deleted file mode 100644
index 12be4498b1a079681a129e31b17c57e18126a63a..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/cmake/Config.cmake.in
+++ /dev/null
@@ -1,9 +0,0 @@
-@PACKAGE_INIT@
-include(CMakeFindDependencyMacro)
-if (@GTEST_HAS_PTHREAD@)
-  set(THREADS_PREFER_PTHREAD_FLAG @THREADS_PREFER_PTHREAD_FLAG@)
-  find_dependency(Threads)
-endif()
-
-include("${CMAKE_CURRENT_LIST_DIR}/@targets_export_name@.cmake")
-check_required_components("@project_name@")
diff --git a/3rdparty/googletest-1.13.0/googletest/cmake/gtest.pc.in b/3rdparty/googletest-1.13.0/googletest/cmake/gtest.pc.in
deleted file mode 100644
index b4148fae42b1cd782dc1b6a5d1d7e1e3a8e4e665..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/cmake/gtest.pc.in
+++ /dev/null
@@ -1,9 +0,0 @@
-libdir=@CMAKE_INSTALL_FULL_LIBDIR@
-includedir=@CMAKE_INSTALL_FULL_INCLUDEDIR@
-
-Name: gtest
-Description: GoogleTest (without main() function)
-Version: @PROJECT_VERSION@
-URL: https://github.com/google/googletest
-Libs: -L${libdir} -lgtest @CMAKE_THREAD_LIBS_INIT@
-Cflags: -I${includedir} @GTEST_HAS_PTHREAD_MACRO@
diff --git a/3rdparty/googletest-1.13.0/googletest/cmake/gtest_main.pc.in b/3rdparty/googletest-1.13.0/googletest/cmake/gtest_main.pc.in
deleted file mode 100644
index 38c88c54d5386d960ec48d9f76664eb9798b4e7b..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/cmake/gtest_main.pc.in
+++ /dev/null
@@ -1,10 +0,0 @@
-libdir=@CMAKE_INSTALL_FULL_LIBDIR@
-includedir=@CMAKE_INSTALL_FULL_INCLUDEDIR@
-
-Name: gtest_main
-Description: GoogleTest (with main() function)
-Version: @PROJECT_VERSION@
-URL: https://github.com/google/googletest
-Requires: gtest = @PROJECT_VERSION@
-Libs: -L${libdir} -lgtest_main @CMAKE_THREAD_LIBS_INIT@
-Cflags: -I${includedir} @GTEST_HAS_PTHREAD_MACRO@
diff --git a/3rdparty/googletest-1.13.0/googletest/cmake/internal_utils.cmake b/3rdparty/googletest-1.13.0/googletest/cmake/internal_utils.cmake
deleted file mode 100644
index 41405587fdd5938adfc5978bccd852766de11b4c..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/cmake/internal_utils.cmake
+++ /dev/null
@@ -1,356 +0,0 @@
-# Defines functions and macros useful for building Google Test and
-# Google Mock.
-#
-# Note:
-#
-# - This file will be run twice when building Google Mock (once via
-#   Google Test's CMakeLists.txt, and once via Google Mock's).
-#   Therefore it shouldn't have any side effects other than defining
-#   the functions and macros.
-#
-# - The functions/macros defined in this file may depend on Google
-#   Test and Google Mock's option() definitions, and thus must be
-#   called *after* the options have been defined.
-
-if (POLICY CMP0054)
-  cmake_policy(SET CMP0054 NEW)
-endif (POLICY CMP0054)
-
-# Tweaks CMake's default compiler/linker settings to suit Google Test's needs.
-#
-# This must be a macro(), as inside a function string() can only
-# update variables in the function scope.
-macro(fix_default_compiler_settings_)
-  if (CMAKE_CXX_COMPILER_ID MATCHES "MSVC|Clang")
-    # For MSVC and Clang, CMake sets certain flags to defaults we want to
-    # override.
-    # This replacement code is taken from sample in the CMake Wiki at
-    # https://gitlab.kitware.com/cmake/community/wikis/FAQ#dynamic-replace.
-    foreach (flag_var
-             CMAKE_C_FLAGS CMAKE_C_FLAGS_DEBUG CMAKE_C_FLAGS_RELEASE
-             CMAKE_C_FLAGS_MINSIZEREL CMAKE_C_FLAGS_RELWITHDEBINFO
-             CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE
-             CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO)
-      if (NOT BUILD_SHARED_LIBS AND NOT gtest_force_shared_crt)
-        # When Google Test is built as a shared library, it should also use
-        # shared runtime libraries.  Otherwise, it may end up with multiple
-        # copies of runtime library data in different modules, resulting in
-        # hard-to-find crashes. When it is built as a static library, it is
-        # preferable to use CRT as static libraries, as we don't have to rely
-        # on CRT DLLs being available. CMake always defaults to using shared
-        # CRT libraries, so we override that default here.
-        string(REPLACE "/MD" "-MT" ${flag_var} "${${flag_var}}")
-
-        # When using Ninja with Clang, static builds pass -D_DLL on Windows.
-        # This is incorrect and should not happen, so we fix that here.
-        string(REPLACE "-D_DLL" "" ${flag_var} "${${flag_var}}")
-      endif()
-
-      # We prefer more strict warning checking for building Google Test.
-      # Replaces /W3 with /W4 in defaults.
-      string(REPLACE "/W3" "/W4" ${flag_var} "${${flag_var}}")
-
-      # Prevent D9025 warning for targets that have exception handling
-      # turned off (/EHs-c- flag). Where required, exceptions are explicitly
-      # re-enabled using the cxx_exception_flags variable.
-      string(REPLACE "/EHsc" "" ${flag_var} "${${flag_var}}")
-    endforeach()
-  endif()
-endmacro()
-
-# Defines the compiler/linker flags used to build Google Test and
-# Google Mock.  You can tweak these definitions to suit your need.  A
-# variable's value is empty before it's explicitly assigned to.
-macro(config_compiler_and_linker)
-  # Note: pthreads on MinGW is not supported, even if available
-  # instead, we use windows threading primitives
-  unset(GTEST_HAS_PTHREAD)
-  if (NOT gtest_disable_pthreads AND NOT MINGW)
-    # Defines CMAKE_USE_PTHREADS_INIT and CMAKE_THREAD_LIBS_INIT.
-    find_package(Threads)
-    if (CMAKE_USE_PTHREADS_INIT)
-      set(GTEST_HAS_PTHREAD ON)
-    endif()
-  endif()
-
-  fix_default_compiler_settings_()
-  if (MSVC)
-    # Newlines inside flags variables break CMake's NMake generator.
-    # TODO(vladl@google.com): Add -RTCs and -RTCu to debug builds.
-    set(cxx_base_flags "-GS -W4 -WX -wd4251 -wd4275 -nologo -J")
-    set(cxx_base_flags "${cxx_base_flags} -D_UNICODE -DUNICODE -DWIN32 -D_WIN32")
-    set(cxx_base_flags "${cxx_base_flags} -DSTRICT -DWIN32_LEAN_AND_MEAN")
-    set(cxx_exception_flags "-EHsc -D_HAS_EXCEPTIONS=1")
-    set(cxx_no_exception_flags "-EHs-c- -D_HAS_EXCEPTIONS=0")
-    set(cxx_no_rtti_flags "-GR-")
-    # Suppress "unreachable code" warning
-    # http://stackoverflow.com/questions/3232669 explains the issue.
-    set(cxx_base_flags "${cxx_base_flags} -wd4702")
-    # Ensure MSVC treats source files as UTF-8 encoded.
-    if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
-      set(cxx_base_flags "${cxx_base_flags} -utf-8")
-    endif()
-  elseif (CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
-    set(cxx_base_flags "-Wall -Wshadow -Wconversion")
-    set(cxx_exception_flags "-fexceptions")
-    set(cxx_no_exception_flags "-fno-exceptions")
-    set(cxx_strict_flags "-W -Wpointer-arith -Wreturn-type -Wcast-qual -Wwrite-strings -Wswitch -Wunused-parameter -Wcast-align -Wchar-subscripts -Winline -Wredundant-decls")
-    set(cxx_no_rtti_flags "-fno-rtti")
-  elseif (CMAKE_COMPILER_IS_GNUCXX)
-    set(cxx_base_flags "-Wall -Wshadow")
-    if(NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS 7.0.0)
-      set(cxx_base_flags "${cxx_base_flags} -Wno-error=dangling-else")
-    endif()
-    set(cxx_exception_flags "-fexceptions")
-    set(cxx_no_exception_flags "-fno-exceptions")
-    # Until version 4.3.2, GCC doesn't define a macro to indicate
-    # whether RTTI is enabled.  Therefore we define GTEST_HAS_RTTI
-    # explicitly.
-    set(cxx_no_rtti_flags "-fno-rtti -DGTEST_HAS_RTTI=0")
-    set(cxx_strict_flags
-      "-Wextra -Wno-unused-parameter -Wno-missing-field-initializers")
-  elseif (CMAKE_CXX_COMPILER_ID STREQUAL "SunPro")
-    set(cxx_exception_flags "-features=except")
-    # Sun Pro doesn't provide macros to indicate whether exceptions and
-    # RTTI are enabled, so we define GTEST_HAS_* explicitly.
-    set(cxx_no_exception_flags "-features=no%except -DGTEST_HAS_EXCEPTIONS=0")
-    set(cxx_no_rtti_flags "-features=no%rtti -DGTEST_HAS_RTTI=0")
-  elseif (CMAKE_CXX_COMPILER_ID STREQUAL "VisualAge" OR
-      CMAKE_CXX_COMPILER_ID STREQUAL "XL")
-    # CMake 2.8 changes Visual Age's compiler ID to "XL".
-    set(cxx_exception_flags "-qeh")
-    set(cxx_no_exception_flags "-qnoeh")
-    # Until version 9.0, Visual Age doesn't define a macro to indicate
-    # whether RTTI is enabled.  Therefore we define GTEST_HAS_RTTI
-    # explicitly.
-    set(cxx_no_rtti_flags "-qnortti -DGTEST_HAS_RTTI=0")
-  elseif (CMAKE_CXX_COMPILER_ID STREQUAL "HP")
-    set(cxx_base_flags "-AA -mt")
-    set(cxx_exception_flags "-DGTEST_HAS_EXCEPTIONS=1")
-    set(cxx_no_exception_flags "+noeh -DGTEST_HAS_EXCEPTIONS=0")
-    # RTTI can not be disabled in HP aCC compiler.
-    set(cxx_no_rtti_flags "")
-  endif()
-
-  # The pthreads library is available and allowed?
-  if (DEFINED GTEST_HAS_PTHREAD)
-    set(GTEST_HAS_PTHREAD_MACRO "-DGTEST_HAS_PTHREAD=1")
-  else()
-    set(GTEST_HAS_PTHREAD_MACRO "-DGTEST_HAS_PTHREAD=0")
-  endif()
-  set(cxx_base_flags "${cxx_base_flags} ${GTEST_HAS_PTHREAD_MACRO}")
-
-  # For building gtest's own tests and samples.
-  set(cxx_exception "${cxx_base_flags} ${cxx_exception_flags}")
-  set(cxx_no_exception
-    "${CMAKE_CXX_FLAGS} ${cxx_base_flags} ${cxx_no_exception_flags}")
-  set(cxx_default "${cxx_exception}")
-  set(cxx_no_rtti "${cxx_default} ${cxx_no_rtti_flags}")
-
-  # For building the gtest libraries.
-  set(cxx_strict "${cxx_default} ${cxx_strict_flags}")
-endmacro()
-
-# Defines the gtest & gtest_main libraries.  User tests should link
-# with one of them.
-function(cxx_library_with_type name type cxx_flags)
-  # type can be either STATIC or SHARED to denote a static or shared library.
-  # ARGN refers to additional arguments after 'cxx_flags'.
-  add_library(${name} ${type} ${ARGN})
-  add_library(${cmake_package_name}::${name} ALIAS ${name})
-  set_target_properties(${name}
-    PROPERTIES
-    COMPILE_FLAGS "${cxx_flags}")
-  # Set the output directory for build artifacts
-  set_target_properties(${name}
-    PROPERTIES
-    RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/bin"
-    LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib"
-    ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib"
-    PDB_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/bin"
-    COMPILE_PDB_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib")
-  # make PDBs match library name
-  get_target_property(pdb_debug_postfix ${name} DEBUG_POSTFIX)
-  set_target_properties(${name}
-    PROPERTIES
-    PDB_NAME "${name}"
-    PDB_NAME_DEBUG "${name}${pdb_debug_postfix}"
-    COMPILE_PDB_NAME "${name}"
-    COMPILE_PDB_NAME_DEBUG "${name}${pdb_debug_postfix}")
-
-  if (BUILD_SHARED_LIBS OR type STREQUAL "SHARED")
-    set_target_properties(${name}
-      PROPERTIES
-      COMPILE_DEFINITIONS "GTEST_CREATE_SHARED_LIBRARY=1")
-    if (NOT "${CMAKE_VERSION}" VERSION_LESS "2.8.11")
-      target_compile_definitions(${name} INTERFACE
-        $<INSTALL_INTERFACE:GTEST_LINKED_AS_SHARED_LIBRARY=1>)
-    endif()
-  endif()
-  if (DEFINED GTEST_HAS_PTHREAD)
-    if ("${CMAKE_VERSION}" VERSION_LESS "3.1.0")
-      set(threads_spec ${CMAKE_THREAD_LIBS_INIT})
-    else()
-      set(threads_spec Threads::Threads)
-    endif()
-    target_link_libraries(${name} PUBLIC ${threads_spec})
-  endif()
-
-  if (NOT "${CMAKE_VERSION}" VERSION_LESS "3.8")
-    target_compile_features(${name} PUBLIC cxx_std_14)
-  endif()
-endfunction()
-
-########################################################################
-#
-# Helper functions for creating build targets.
-
-function(cxx_shared_library name cxx_flags)
-  cxx_library_with_type(${name} SHARED "${cxx_flags}" ${ARGN})
-endfunction()
-
-function(cxx_library name cxx_flags)
-  cxx_library_with_type(${name} "" "${cxx_flags}" ${ARGN})
-endfunction()
-
-# cxx_executable_with_flags(name cxx_flags libs srcs...)
-#
-# creates a named C++ executable that depends on the given libraries and
-# is built from the given source files with the given compiler flags.
-function(cxx_executable_with_flags name cxx_flags libs)
-  add_executable(${name} ${ARGN})
-  if (MSVC)
-    # BigObj required for tests.
-    set(cxx_flags "${cxx_flags} -bigobj")
-  endif()
-  if (cxx_flags)
-    set_target_properties(${name}
-      PROPERTIES
-      COMPILE_FLAGS "${cxx_flags}")
-  endif()
-  if (BUILD_SHARED_LIBS)
-    set_target_properties(${name}
-      PROPERTIES
-      COMPILE_DEFINITIONS "GTEST_LINKED_AS_SHARED_LIBRARY=1")
-  endif()
-  # To support mixing linking in static and dynamic libraries, link each
-  # library in with an extra call to target_link_libraries.
-  foreach (lib "${libs}")
-    target_link_libraries(${name} ${lib})
-  endforeach()
-endfunction()
-
-# cxx_executable(name dir lib srcs...)
-#
-# creates a named target that depends on the given libs and is built
-# from the given source files.  dir/name.cc is implicitly included in
-# the source file list.
-function(cxx_executable name dir libs)
-  cxx_executable_with_flags(
-    ${name} "${cxx_default}" "${libs}" "${dir}/${name}.cc" ${ARGN})
-endfunction()
-
-# CMP0094 policy enables finding a Python executable in the LOCATION order, as
-# specified by the PATH environment variable.
-if (POLICY CMP0094)
-  cmake_policy(SET CMP0094 NEW)
-endif()
-
-# Sets PYTHONINTERP_FOUND and PYTHON_EXECUTABLE.
-if ("${CMAKE_VERSION}" VERSION_LESS "3.12.0")
-  find_package(PythonInterp)
-else()
-  find_package(Python COMPONENTS Interpreter)
-  set(PYTHONINTERP_FOUND ${Python_Interpreter_FOUND})
-  set(PYTHON_EXECUTABLE ${Python_EXECUTABLE})
-endif()
-
-# cxx_test_with_flags(name cxx_flags libs srcs...)
-#
-# creates a named C++ test that depends on the given libs and is built
-# from the given source files with the given compiler flags.
-function(cxx_test_with_flags name cxx_flags libs)
-  cxx_executable_with_flags(${name} "${cxx_flags}" "${libs}" ${ARGN})
-    add_test(NAME ${name} COMMAND "$<TARGET_FILE:${name}>")
-endfunction()
-
-# cxx_test(name libs srcs...)
-#
-# creates a named test target that depends on the given libs and is
-# built from the given source files.  Unlike cxx_test_with_flags,
-# test/name.cc is already implicitly included in the source file list.
-function(cxx_test name libs)
-  cxx_test_with_flags("${name}" "${cxx_default}" "${libs}"
-    "test/${name}.cc" ${ARGN})
-endfunction()
-
-# py_test(name)
-#
-# creates a Python test with the given name whose main module is in
-# test/name.py.  It does nothing if Python is not installed.
-function(py_test name)
-  if (PYTHONINTERP_FOUND)
-    if ("${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION}" VERSION_GREATER 3.1)
-      if (CMAKE_CONFIGURATION_TYPES)
-        # Multi-configuration build generators as for Visual Studio save
-        # output in a subdirectory of CMAKE_CURRENT_BINARY_DIR (Debug,
-        # Release etc.), so we have to provide it here.
-        add_test(NAME ${name}
-          COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/test/${name}.py
-              --build_dir=${CMAKE_CURRENT_BINARY_DIR}/$<CONFIG> ${ARGN})
-      else (CMAKE_CONFIGURATION_TYPES)
-        # Single-configuration build generators like Makefile generators
-        # don't have subdirs below CMAKE_CURRENT_BINARY_DIR.
-        add_test(NAME ${name}
-          COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/test/${name}.py
-            --build_dir=${CMAKE_CURRENT_BINARY_DIR} ${ARGN})
-      endif (CMAKE_CONFIGURATION_TYPES)
-    else()
-      # ${CMAKE_CURRENT_BINARY_DIR} is known at configuration time, so we can
-      # directly bind it from cmake. ${CTEST_CONFIGURATION_TYPE} is known
-      # only at ctest runtime (by calling ctest -c <Configuration>), so
-      # we have to escape $ to delay variable substitution here.
-      add_test(NAME ${name}
-        COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/test/${name}.py
-          --build_dir=${CMAKE_CURRENT_BINARY_DIR}/\${CTEST_CONFIGURATION_TYPE} ${ARGN})
-    endif()
-    # Make the Python import path consistent between Bazel and CMake.
-    set_tests_properties(${name} PROPERTIES ENVIRONMENT PYTHONPATH=${CMAKE_SOURCE_DIR})
-  endif(PYTHONINTERP_FOUND)
-endfunction()
-
-# install_project(targets...)
-#
-# Installs the specified targets and configures the associated pkgconfig files.
-function(install_project)
-  if(INSTALL_GTEST)
-    install(DIRECTORY "${PROJECT_SOURCE_DIR}/include/"
-      DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}")
-    # Install the project targets.
-    install(TARGETS ${ARGN}
-      EXPORT ${targets_export_name}
-      RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}"
-      ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}"
-      LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}")
-    if(CMAKE_CXX_COMPILER_ID MATCHES "MSVC")
-      # Install PDBs
-      foreach(t ${ARGN})
-        get_target_property(t_pdb_name ${t} COMPILE_PDB_NAME)
-        get_target_property(t_pdb_name_debug ${t} COMPILE_PDB_NAME_DEBUG)
-        get_target_property(t_pdb_output_directory ${t} PDB_OUTPUT_DIRECTORY)
-        install(FILES
-          "${t_pdb_output_directory}/\${CMAKE_INSTALL_CONFIG_NAME}/$<$<CONFIG:Debug>:${t_pdb_name_debug}>$<$<NOT:$<CONFIG:Debug>>:${t_pdb_name}>.pdb"
-          DESTINATION ${CMAKE_INSTALL_LIBDIR}
-          OPTIONAL)
-      endforeach()
-    endif()
-    # Configure and install pkgconfig files.
-    foreach(t ${ARGN})
-      set(configured_pc "${generated_dir}/${t}.pc")
-      configure_file("${PROJECT_SOURCE_DIR}/cmake/${t}.pc.in"
-        "${configured_pc}" @ONLY)
-      install(FILES "${configured_pc}"
-        DESTINATION "${CMAKE_INSTALL_LIBDIR}/pkgconfig")
-    endforeach()
-  endif()
-endfunction()
diff --git a/3rdparty/googletest-1.13.0/googletest/cmake/libgtest.la.in b/3rdparty/googletest-1.13.0/googletest/cmake/libgtest.la.in
deleted file mode 100644
index 840c83885f989a02a0ab3073a74e65bc04ddf3e1..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/cmake/libgtest.la.in
+++ /dev/null
@@ -1,21 +0,0 @@
-# libgtest.la - a libtool library file
-# Generated by libtool (GNU libtool) 2.4.6
-
-# Please DO NOT delete this file!
-# It is necessary for linking the library.
-
-# Names of this library.
-library_names='libgtest.so'
-
-# Is this an already installed library?
-installed=yes
-
-# Should we warn about portability when linking against -modules?
-shouldnotlink=no
-
-# Files to dlopen/dlpreopen
-dlopen=''
-dlpreopen=''
-
-# Directory that this library needs to be installed in:
-libdir='@CMAKE_INSTALL_FULL_LIBDIR@'
diff --git a/3rdparty/googletest-1.13.0/googletest/docs/README.md b/3rdparty/googletest-1.13.0/googletest/docs/README.md
deleted file mode 100644
index 1bc57b799cce933c034c31859594ca1b87689aef..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/docs/README.md
+++ /dev/null
@@ -1,4 +0,0 @@
-# Content Moved
-
-We are working on updates to the GoogleTest documentation, which has moved to
-the top-level [docs](../../docs) directory.
diff --git a/3rdparty/googletest-1.13.0/googletest/include/gtest/gtest-assertion-result.h b/3rdparty/googletest-1.13.0/googletest/include/gtest/gtest-assertion-result.h
deleted file mode 100644
index addbb59c6413c2dec2ccee11b20a875343c2a02d..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/include/gtest/gtest-assertion-result.h
+++ /dev/null
@@ -1,237 +0,0 @@
-// Copyright 2005, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// The Google C++ Testing and Mocking Framework (Google Test)
-//
-// This file implements the AssertionResult type.
-
-// IWYU pragma: private, include "gtest/gtest.h"
-// IWYU pragma: friend gtest/.*
-// IWYU pragma: friend gmock/.*
-
-#ifndef GOOGLETEST_INCLUDE_GTEST_GTEST_ASSERTION_RESULT_H_
-#define GOOGLETEST_INCLUDE_GTEST_GTEST_ASSERTION_RESULT_H_
-
-#include <memory>
-#include <ostream>
-#include <string>
-#include <type_traits>
-
-#include "gtest/gtest-message.h"
-#include "gtest/internal/gtest-port.h"
-
-GTEST_DISABLE_MSC_WARNINGS_PUSH_(4251                                   \
-/* class A needs to have dll-interface to be used by clients of class B */)
-
-namespace testing {
-
-// A class for indicating whether an assertion was successful.  When
-// the assertion wasn't successful, the AssertionResult object
-// remembers a non-empty message that describes how it failed.
-//
-// To create an instance of this class, use one of the factory functions
-// (AssertionSuccess() and AssertionFailure()).
-//
-// This class is useful for two purposes:
-//   1. Defining predicate functions to be used with Boolean test assertions
-//      EXPECT_TRUE/EXPECT_FALSE and their ASSERT_ counterparts
-//   2. Defining predicate-format functions to be
-//      used with predicate assertions (ASSERT_PRED_FORMAT*, etc).
-//
-// For example, if you define IsEven predicate:
-//
-//   testing::AssertionResult IsEven(int n) {
-//     if ((n % 2) == 0)
-//       return testing::AssertionSuccess();
-//     else
-//       return testing::AssertionFailure() << n << " is odd";
-//   }
-//
-// Then the failed expectation EXPECT_TRUE(IsEven(Fib(5)))
-// will print the message
-//
-//   Value of: IsEven(Fib(5))
-//     Actual: false (5 is odd)
-//   Expected: true
-//
-// instead of a more opaque
-//
-//   Value of: IsEven(Fib(5))
-//     Actual: false
-//   Expected: true
-//
-// in case IsEven is a simple Boolean predicate.
-//
-// If you expect your predicate to be reused and want to support informative
-// messages in EXPECT_FALSE and ASSERT_FALSE (negative assertions show up
-// about half as often as positive ones in our tests), supply messages for
-// both success and failure cases:
-//
-//   testing::AssertionResult IsEven(int n) {
-//     if ((n % 2) == 0)
-//       return testing::AssertionSuccess() << n << " is even";
-//     else
-//       return testing::AssertionFailure() << n << " is odd";
-//   }
-//
-// Then a statement EXPECT_FALSE(IsEven(Fib(6))) will print
-//
-//   Value of: IsEven(Fib(6))
-//     Actual: true (8 is even)
-//   Expected: false
-//
-// NB: Predicates that support negative Boolean assertions have reduced
-// performance in positive ones so be careful not to use them in tests
-// that have lots (tens of thousands) of positive Boolean assertions.
-//
-// To use this class with EXPECT_PRED_FORMAT assertions such as:
-//
-//   // Verifies that Foo() returns an even number.
-//   EXPECT_PRED_FORMAT1(IsEven, Foo());
-//
-// you need to define:
-//
-//   testing::AssertionResult IsEven(const char* expr, int n) {
-//     if ((n % 2) == 0)
-//       return testing::AssertionSuccess();
-//     else
-//       return testing::AssertionFailure()
-//         << "Expected: " << expr << " is even\n  Actual: it's " << n;
-//   }
-//
-// If Foo() returns 5, you will see the following message:
-//
-//   Expected: Foo() is even
-//     Actual: it's 5
-//
-class GTEST_API_ AssertionResult {
- public:
-  // Copy constructor.
-  // Used in EXPECT_TRUE/FALSE(assertion_result).
-  AssertionResult(const AssertionResult& other);
-
-// C4800 is a level 3 warning in Visual Studio 2015 and earlier.
-// This warning is not emitted in Visual Studio 2017.
-// This warning is off by default starting in Visual Studio 2019 but can be
-// enabled with command-line options.
-#if defined(_MSC_VER) && (_MSC_VER < 1910 || _MSC_VER >= 1920)
-  GTEST_DISABLE_MSC_WARNINGS_PUSH_(4800 /* forcing value to bool */)
-#endif
-
-  // Used in the EXPECT_TRUE/FALSE(bool_expression).
-  //
-  // T must be contextually convertible to bool.
-  //
-  // The second parameter prevents this overload from being considered if
-  // the argument is implicitly convertible to AssertionResult. In that case
-  // we want AssertionResult's copy constructor to be used.
-  template <typename T>
-  explicit AssertionResult(
-      const T& success,
-      typename std::enable_if<
-          !std::is_convertible<T, AssertionResult>::value>::type*
-      /*enabler*/
-      = nullptr)
-      : success_(success) {}
-
-#if defined(_MSC_VER) && (_MSC_VER < 1910 || _MSC_VER >= 1920)
-  GTEST_DISABLE_MSC_WARNINGS_POP_()
-#endif
-
-  // Assignment operator.
-  AssertionResult& operator=(AssertionResult other) {
-    swap(other);
-    return *this;
-  }
-
-  // Returns true if and only if the assertion succeeded.
-  operator bool() const { return success_; }  // NOLINT
-
-  // Returns the assertion's negation. Used with EXPECT/ASSERT_FALSE.
-  AssertionResult operator!() const;
-
-  // Returns the text streamed into this AssertionResult. Test assertions
-  // use it when they fail (i.e., the predicate's outcome doesn't match the
-  // assertion's expectation). When nothing has been streamed into the
-  // object, returns an empty string.
-  const char* message() const {
-    return message_.get() != nullptr ? message_->c_str() : "";
-  }
-  // Deprecated; please use message() instead.
-  const char* failure_message() const { return message(); }
-
-  // Streams a custom failure message into this object.
-  template <typename T>
-  AssertionResult& operator<<(const T& value) {
-    AppendMessage(Message() << value);
-    return *this;
-  }
-
-  // Allows streaming basic output manipulators such as endl or flush into
-  // this object.
-  AssertionResult& operator<<(
-      ::std::ostream& (*basic_manipulator)(::std::ostream& stream)) {
-    AppendMessage(Message() << basic_manipulator);
-    return *this;
-  }
-
- private:
-  // Appends the contents of message to message_.
-  void AppendMessage(const Message& a_message) {
-    if (message_.get() == nullptr) message_.reset(new ::std::string);
-    message_->append(a_message.GetString().c_str());
-  }
-
-  // Swap the contents of this AssertionResult with other.
-  void swap(AssertionResult& other);
-
-  // Stores result of the assertion predicate.
-  bool success_;
-  // Stores the message describing the condition in case the expectation
-  // construct is not satisfied with the predicate's outcome.
-  // Referenced via a pointer to avoid taking too much stack frame space
-  // with test assertions.
-  std::unique_ptr< ::std::string> message_;
-};
-
-// Makes a successful assertion result.
-GTEST_API_ AssertionResult AssertionSuccess();
-
-// Makes a failed assertion result.
-GTEST_API_ AssertionResult AssertionFailure();
-
-// Makes a failed assertion result with the given failure message.
-// Deprecated; use AssertionFailure() << msg.
-GTEST_API_ AssertionResult AssertionFailure(const Message& msg);
-
-}  // namespace testing
-
-GTEST_DISABLE_MSC_WARNINGS_POP_()  // 4251
-
-#endif  // GOOGLETEST_INCLUDE_GTEST_GTEST_ASSERTION_RESULT_H_
diff --git a/3rdparty/googletest-1.13.0/googletest/include/gtest/gtest-death-test.h b/3rdparty/googletest-1.13.0/googletest/include/gtest/gtest-death-test.h
deleted file mode 100644
index 84e5a5bbd372df80f268165a601bc995e907641d..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/include/gtest/gtest-death-test.h
+++ /dev/null
@@ -1,345 +0,0 @@
-// Copyright 2005, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// The Google C++ Testing and Mocking Framework (Google Test)
-//
-// This header file defines the public API for death tests.  It is
-// #included by gtest.h so a user doesn't need to include this
-// directly.
-
-// IWYU pragma: private, include "gtest/gtest.h"
-// IWYU pragma: friend gtest/.*
-// IWYU pragma: friend gmock/.*
-
-#ifndef GOOGLETEST_INCLUDE_GTEST_GTEST_DEATH_TEST_H_
-#define GOOGLETEST_INCLUDE_GTEST_GTEST_DEATH_TEST_H_
-
-#include "gtest/internal/gtest-death-test-internal.h"
-
-// This flag controls the style of death tests.  Valid values are "threadsafe",
-// meaning that the death test child process will re-execute the test binary
-// from the start, running only a single death test, or "fast",
-// meaning that the child process will execute the test logic immediately
-// after forking.
-GTEST_DECLARE_string_(death_test_style);
-
-namespace testing {
-
-#if GTEST_HAS_DEATH_TEST
-
-namespace internal {
-
-// Returns a Boolean value indicating whether the caller is currently
-// executing in the context of the death test child process.  Tools such as
-// Valgrind heap checkers may need this to modify their behavior in death
-// tests.  IMPORTANT: This is an internal utility.  Using it may break the
-// implementation of death tests.  User code MUST NOT use it.
-GTEST_API_ bool InDeathTestChild();
-
-}  // namespace internal
-
-// The following macros are useful for writing death tests.
-
-// Here's what happens when an ASSERT_DEATH* or EXPECT_DEATH* is
-// executed:
-//
-//   1. It generates a warning if there is more than one active
-//   thread.  This is because it's safe to fork() or clone() only
-//   when there is a single thread.
-//
-//   2. The parent process clone()s a sub-process and runs the death
-//   test in it; the sub-process exits with code 0 at the end of the
-//   death test, if it hasn't exited already.
-//
-//   3. The parent process waits for the sub-process to terminate.
-//
-//   4. The parent process checks the exit code and error message of
-//   the sub-process.
-//
-// Examples:
-//
-//   ASSERT_DEATH(server.SendMessage(56, "Hello"), "Invalid port number");
-//   for (int i = 0; i < 5; i++) {
-//     EXPECT_DEATH(server.ProcessRequest(i),
-//                  "Invalid request .* in ProcessRequest()")
-//                  << "Failed to die on request " << i;
-//   }
-//
-//   ASSERT_EXIT(server.ExitNow(), ::testing::ExitedWithCode(0), "Exiting");
-//
-//   bool KilledBySIGHUP(int exit_code) {
-//     return WIFSIGNALED(exit_code) && WTERMSIG(exit_code) == SIGHUP;
-//   }
-//
-//   ASSERT_EXIT(client.HangUpServer(), KilledBySIGHUP, "Hanging up!");
-//
-// The final parameter to each of these macros is a matcher applied to any data
-// the sub-process wrote to stderr.  For compatibility with existing tests, a
-// bare string is interpreted as a regular expression matcher.
-//
-// On the regular expressions used in death tests:
-//
-//   On POSIX-compliant systems (*nix), we use the <regex.h> library,
-//   which uses the POSIX extended regex syntax.
-//
-//   On other platforms (e.g. Windows or Mac), we only support a simple regex
-//   syntax implemented as part of Google Test.  This limited
-//   implementation should be enough most of the time when writing
-//   death tests; though it lacks many features you can find in PCRE
-//   or POSIX extended regex syntax.  For example, we don't support
-//   union ("x|y"), grouping ("(xy)"), brackets ("[xy]"), and
-//   repetition count ("x{5,7}"), among others.
-//
-//   Below is the syntax that we do support.  We chose it to be a
-//   subset of both PCRE and POSIX extended regex, so it's easy to
-//   learn wherever you come from.  In the following: 'A' denotes a
-//   literal character, period (.), or a single \\ escape sequence;
-//   'x' and 'y' denote regular expressions; 'm' and 'n' are for
-//   natural numbers.
-//
-//     c     matches any literal character c
-//     \\d   matches any decimal digit
-//     \\D   matches any character that's not a decimal digit
-//     \\f   matches \f
-//     \\n   matches \n
-//     \\r   matches \r
-//     \\s   matches any ASCII whitespace, including \n
-//     \\S   matches any character that's not a whitespace
-//     \\t   matches \t
-//     \\v   matches \v
-//     \\w   matches any letter, _, or decimal digit
-//     \\W   matches any character that \\w doesn't match
-//     \\c   matches any literal character c, which must be a punctuation
-//     .     matches any single character except \n
-//     A?    matches 0 or 1 occurrences of A
-//     A*    matches 0 or many occurrences of A
-//     A+    matches 1 or many occurrences of A
-//     ^     matches the beginning of a string (not that of each line)
-//     $     matches the end of a string (not that of each line)
-//     xy    matches x followed by y
-//
-//   If you accidentally use PCRE or POSIX extended regex features
-//   not implemented by us, you will get a run-time failure.  In that
-//   case, please try to rewrite your regular expression within the
-//   above syntax.
-//
-//   This implementation is *not* meant to be as highly tuned or robust
-//   as a compiled regex library, but should perform well enough for a
-//   death test, which already incurs significant overhead by launching
-//   a child process.
-//
-// Known caveats:
-//
-//   A "threadsafe" style death test obtains the path to the test
-//   program from argv[0] and re-executes it in the sub-process.  For
-//   simplicity, the current implementation doesn't search the PATH
-//   when launching the sub-process.  This means that the user must
-//   invoke the test program via a path that contains at least one
-//   path separator (e.g. path/to/foo_test and
-//   /absolute/path/to/bar_test are fine, but foo_test is not).  This
-//   is rarely a problem as people usually don't put the test binary
-//   directory in PATH.
-//
-
-// Asserts that a given `statement` causes the program to exit, with an
-// integer exit status that satisfies `predicate`, and emitting error output
-// that matches `matcher`.
-#define ASSERT_EXIT(statement, predicate, matcher) \
-  GTEST_DEATH_TEST_(statement, predicate, matcher, GTEST_FATAL_FAILURE_)
-
-// Like `ASSERT_EXIT`, but continues on to successive tests in the
-// test suite, if any:
-#define EXPECT_EXIT(statement, predicate, matcher) \
-  GTEST_DEATH_TEST_(statement, predicate, matcher, GTEST_NONFATAL_FAILURE_)
-
-// Asserts that a given `statement` causes the program to exit, either by
-// explicitly exiting with a nonzero exit code or being killed by a
-// signal, and emitting error output that matches `matcher`.
-#define ASSERT_DEATH(statement, matcher) \
-  ASSERT_EXIT(statement, ::testing::internal::ExitedUnsuccessfully, matcher)
-
-// Like `ASSERT_DEATH`, but continues on to successive tests in the
-// test suite, if any:
-#define EXPECT_DEATH(statement, matcher) \
-  EXPECT_EXIT(statement, ::testing::internal::ExitedUnsuccessfully, matcher)
-
-// Two predicate classes that can be used in {ASSERT,EXPECT}_EXIT*:
-
-// Tests that an exit code describes a normal exit with a given exit code.
-class GTEST_API_ ExitedWithCode {
- public:
-  explicit ExitedWithCode(int exit_code);
-  ExitedWithCode(const ExitedWithCode&) = default;
-  void operator=(const ExitedWithCode& other) = delete;
-  bool operator()(int exit_status) const;
-
- private:
-  const int exit_code_;
-};
-
-#if !GTEST_OS_WINDOWS && !GTEST_OS_FUCHSIA
-// Tests that an exit code describes an exit due to termination by a
-// given signal.
-class GTEST_API_ KilledBySignal {
- public:
-  explicit KilledBySignal(int signum);
-  bool operator()(int exit_status) const;
-
- private:
-  const int signum_;
-};
-#endif  // !GTEST_OS_WINDOWS
-
-// EXPECT_DEBUG_DEATH asserts that the given statements die in debug mode.
-// The death testing framework causes this to have interesting semantics,
-// since the sideeffects of the call are only visible in opt mode, and not
-// in debug mode.
-//
-// In practice, this can be used to test functions that utilize the
-// LOG(DFATAL) macro using the following style:
-//
-// int DieInDebugOr12(int* sideeffect) {
-//   if (sideeffect) {
-//     *sideeffect = 12;
-//   }
-//   LOG(DFATAL) << "death";
-//   return 12;
-// }
-//
-// TEST(TestSuite, TestDieOr12WorksInDgbAndOpt) {
-//   int sideeffect = 0;
-//   // Only asserts in dbg.
-//   EXPECT_DEBUG_DEATH(DieInDebugOr12(&sideeffect), "death");
-//
-// #ifdef NDEBUG
-//   // opt-mode has sideeffect visible.
-//   EXPECT_EQ(12, sideeffect);
-// #else
-//   // dbg-mode no visible sideeffect.
-//   EXPECT_EQ(0, sideeffect);
-// #endif
-// }
-//
-// This will assert that DieInDebugReturn12InOpt() crashes in debug
-// mode, usually due to a DCHECK or LOG(DFATAL), but returns the
-// appropriate fallback value (12 in this case) in opt mode. If you
-// need to test that a function has appropriate side-effects in opt
-// mode, include assertions against the side-effects.  A general
-// pattern for this is:
-//
-// EXPECT_DEBUG_DEATH({
-//   // Side-effects here will have an effect after this statement in
-//   // opt mode, but none in debug mode.
-//   EXPECT_EQ(12, DieInDebugOr12(&sideeffect));
-// }, "death");
-//
-#ifdef NDEBUG
-
-#define EXPECT_DEBUG_DEATH(statement, regex) \
-  GTEST_EXECUTE_STATEMENT_(statement, regex)
-
-#define ASSERT_DEBUG_DEATH(statement, regex) \
-  GTEST_EXECUTE_STATEMENT_(statement, regex)
-
-#else
-
-#define EXPECT_DEBUG_DEATH(statement, regex) EXPECT_DEATH(statement, regex)
-
-#define ASSERT_DEBUG_DEATH(statement, regex) ASSERT_DEATH(statement, regex)
-
-#endif  // NDEBUG for EXPECT_DEBUG_DEATH
-#endif  // GTEST_HAS_DEATH_TEST
-
-// This macro is used for implementing macros such as
-// EXPECT_DEATH_IF_SUPPORTED and ASSERT_DEATH_IF_SUPPORTED on systems where
-// death tests are not supported. Those macros must compile on such systems
-// if and only if EXPECT_DEATH and ASSERT_DEATH compile with the same parameters
-// on systems that support death tests. This allows one to write such a macro on
-// a system that does not support death tests and be sure that it will compile
-// on a death-test supporting system. It is exposed publicly so that systems
-// that have death-tests with stricter requirements than GTEST_HAS_DEATH_TEST
-// can write their own equivalent of EXPECT_DEATH_IF_SUPPORTED and
-// ASSERT_DEATH_IF_SUPPORTED.
-//
-// Parameters:
-//   statement -  A statement that a macro such as EXPECT_DEATH would test
-//                for program termination. This macro has to make sure this
-//                statement is compiled but not executed, to ensure that
-//                EXPECT_DEATH_IF_SUPPORTED compiles with a certain
-//                parameter if and only if EXPECT_DEATH compiles with it.
-//   regex     -  A regex that a macro such as EXPECT_DEATH would use to test
-//                the output of statement.  This parameter has to be
-//                compiled but not evaluated by this macro, to ensure that
-//                this macro only accepts expressions that a macro such as
-//                EXPECT_DEATH would accept.
-//   terminator - Must be an empty statement for EXPECT_DEATH_IF_SUPPORTED
-//                and a return statement for ASSERT_DEATH_IF_SUPPORTED.
-//                This ensures that ASSERT_DEATH_IF_SUPPORTED will not
-//                compile inside functions where ASSERT_DEATH doesn't
-//                compile.
-//
-//  The branch that has an always false condition is used to ensure that
-//  statement and regex are compiled (and thus syntactically correct) but
-//  never executed. The unreachable code macro protects the terminator
-//  statement from generating an 'unreachable code' warning in case
-//  statement unconditionally returns or throws. The Message constructor at
-//  the end allows the syntax of streaming additional messages into the
-//  macro, for compilational compatibility with EXPECT_DEATH/ASSERT_DEATH.
-#define GTEST_UNSUPPORTED_DEATH_TEST(statement, regex, terminator)             \
-  GTEST_AMBIGUOUS_ELSE_BLOCKER_                                                \
-  if (::testing::internal::AlwaysTrue()) {                                     \
-    GTEST_LOG_(WARNING) << "Death tests are not supported on this platform.\n" \
-                        << "Statement '" #statement "' cannot be verified.";   \
-  } else if (::testing::internal::AlwaysFalse()) {                             \
-    ::testing::internal::RE::PartialMatch(".*", (regex));                      \
-    GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement);                 \
-    terminator;                                                                \
-  } else                                                                       \
-    ::testing::Message()
-
-// EXPECT_DEATH_IF_SUPPORTED(statement, regex) and
-// ASSERT_DEATH_IF_SUPPORTED(statement, regex) expand to real death tests if
-// death tests are supported; otherwise they just issue a warning.  This is
-// useful when you are combining death test assertions with normal test
-// assertions in one test.
-#if GTEST_HAS_DEATH_TEST
-#define EXPECT_DEATH_IF_SUPPORTED(statement, regex) \
-  EXPECT_DEATH(statement, regex)
-#define ASSERT_DEATH_IF_SUPPORTED(statement, regex) \
-  ASSERT_DEATH(statement, regex)
-#else
-#define EXPECT_DEATH_IF_SUPPORTED(statement, regex) \
-  GTEST_UNSUPPORTED_DEATH_TEST(statement, regex, )
-#define ASSERT_DEATH_IF_SUPPORTED(statement, regex) \
-  GTEST_UNSUPPORTED_DEATH_TEST(statement, regex, return)
-#endif
-
-}  // namespace testing
-
-#endif  // GOOGLETEST_INCLUDE_GTEST_GTEST_DEATH_TEST_H_
diff --git a/3rdparty/googletest-1.13.0/googletest/include/gtest/gtest-matchers.h b/3rdparty/googletest-1.13.0/googletest/include/gtest/gtest-matchers.h
deleted file mode 100644
index 4a60b0d0b8da61b9581ca98c01def55848ecfed9..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/include/gtest/gtest-matchers.h
+++ /dev/null
@@ -1,956 +0,0 @@
-// Copyright 2007, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// The Google C++ Testing and Mocking Framework (Google Test)
-//
-// This file implements just enough of the matcher interface to allow
-// EXPECT_DEATH and friends to accept a matcher argument.
-
-// IWYU pragma: private, include "gtest/gtest.h"
-// IWYU pragma: friend gtest/.*
-// IWYU pragma: friend gmock/.*
-
-#ifndef GOOGLETEST_INCLUDE_GTEST_GTEST_MATCHERS_H_
-#define GOOGLETEST_INCLUDE_GTEST_GTEST_MATCHERS_H_
-
-#include <atomic>
-#include <memory>
-#include <ostream>
-#include <string>
-#include <type_traits>
-
-#include "gtest/gtest-printers.h"
-#include "gtest/internal/gtest-internal.h"
-#include "gtest/internal/gtest-port.h"
-
-// MSVC warning C5046 is new as of VS2017 version 15.8.
-#if defined(_MSC_VER) && _MSC_VER >= 1915
-#define GTEST_MAYBE_5046_ 5046
-#else
-#define GTEST_MAYBE_5046_
-#endif
-
-GTEST_DISABLE_MSC_WARNINGS_PUSH_(
-    4251 GTEST_MAYBE_5046_ /* class A needs to have dll-interface to be used by
-                              clients of class B */
-    /* Symbol involving type with internal linkage not defined */)
-
-namespace testing {
-
-// To implement a matcher Foo for type T, define:
-//   1. a class FooMatcherMatcher that implements the matcher interface:
-//     using is_gtest_matcher = void;
-//     bool MatchAndExplain(const T&, std::ostream*);
-//       (MatchResultListener* can also be used instead of std::ostream*)
-//     void DescribeTo(std::ostream*);
-//     void DescribeNegationTo(std::ostream*);
-//
-//   2. a factory function that creates a Matcher<T> object from a
-//      FooMatcherMatcher.
-
-class MatchResultListener {
- public:
-  // Creates a listener object with the given underlying ostream.  The
-  // listener does not own the ostream, and does not dereference it
-  // in the constructor or destructor.
-  explicit MatchResultListener(::std::ostream* os) : stream_(os) {}
-  virtual ~MatchResultListener() = 0;  // Makes this class abstract.
-
-  // Streams x to the underlying ostream; does nothing if the ostream
-  // is NULL.
-  template <typename T>
-  MatchResultListener& operator<<(const T& x) {
-    if (stream_ != nullptr) *stream_ << x;
-    return *this;
-  }
-
-  // Returns the underlying ostream.
-  ::std::ostream* stream() { return stream_; }
-
-  // Returns true if and only if the listener is interested in an explanation
-  // of the match result.  A matcher's MatchAndExplain() method can use
-  // this information to avoid generating the explanation when no one
-  // intends to hear it.
-  bool IsInterested() const { return stream_ != nullptr; }
-
- private:
-  ::std::ostream* const stream_;
-
-  MatchResultListener(const MatchResultListener&) = delete;
-  MatchResultListener& operator=(const MatchResultListener&) = delete;
-};
-
-inline MatchResultListener::~MatchResultListener() {}
-
-// An instance of a subclass of this knows how to describe itself as a
-// matcher.
-class GTEST_API_ MatcherDescriberInterface {
- public:
-  virtual ~MatcherDescriberInterface() {}
-
-  // Describes this matcher to an ostream.  The function should print
-  // a verb phrase that describes the property a value matching this
-  // matcher should have.  The subject of the verb phrase is the value
-  // being matched.  For example, the DescribeTo() method of the Gt(7)
-  // matcher prints "is greater than 7".
-  virtual void DescribeTo(::std::ostream* os) const = 0;
-
-  // Describes the negation of this matcher to an ostream.  For
-  // example, if the description of this matcher is "is greater than
-  // 7", the negated description could be "is not greater than 7".
-  // You are not required to override this when implementing
-  // MatcherInterface, but it is highly advised so that your matcher
-  // can produce good error messages.
-  virtual void DescribeNegationTo(::std::ostream* os) const {
-    *os << "not (";
-    DescribeTo(os);
-    *os << ")";
-  }
-};
-
-// The implementation of a matcher.
-template <typename T>
-class MatcherInterface : public MatcherDescriberInterface {
- public:
-  // Returns true if and only if the matcher matches x; also explains the
-  // match result to 'listener' if necessary (see the next paragraph), in
-  // the form of a non-restrictive relative clause ("which ...",
-  // "whose ...", etc) that describes x.  For example, the
-  // MatchAndExplain() method of the Pointee(...) matcher should
-  // generate an explanation like "which points to ...".
-  //
-  // Implementations of MatchAndExplain() should add an explanation of
-  // the match result *if and only if* they can provide additional
-  // information that's not already present (or not obvious) in the
-  // print-out of x and the matcher's description.  Whether the match
-  // succeeds is not a factor in deciding whether an explanation is
-  // needed, as sometimes the caller needs to print a failure message
-  // when the match succeeds (e.g. when the matcher is used inside
-  // Not()).
-  //
-  // For example, a "has at least 10 elements" matcher should explain
-  // what the actual element count is, regardless of the match result,
-  // as it is useful information to the reader; on the other hand, an
-  // "is empty" matcher probably only needs to explain what the actual
-  // size is when the match fails, as it's redundant to say that the
-  // size is 0 when the value is already known to be empty.
-  //
-  // You should override this method when defining a new matcher.
-  //
-  // It's the responsibility of the caller (Google Test) to guarantee
-  // that 'listener' is not NULL.  This helps to simplify a matcher's
-  // implementation when it doesn't care about the performance, as it
-  // can talk to 'listener' without checking its validity first.
-  // However, in order to implement dummy listeners efficiently,
-  // listener->stream() may be NULL.
-  virtual bool MatchAndExplain(T x, MatchResultListener* listener) const = 0;
-
-  // Inherits these methods from MatcherDescriberInterface:
-  //   virtual void DescribeTo(::std::ostream* os) const = 0;
-  //   virtual void DescribeNegationTo(::std::ostream* os) const;
-};
-
-namespace internal {
-
-struct AnyEq {
-  template <typename A, typename B>
-  bool operator()(const A& a, const B& b) const {
-    return a == b;
-  }
-};
-struct AnyNe {
-  template <typename A, typename B>
-  bool operator()(const A& a, const B& b) const {
-    return a != b;
-  }
-};
-struct AnyLt {
-  template <typename A, typename B>
-  bool operator()(const A& a, const B& b) const {
-    return a < b;
-  }
-};
-struct AnyGt {
-  template <typename A, typename B>
-  bool operator()(const A& a, const B& b) const {
-    return a > b;
-  }
-};
-struct AnyLe {
-  template <typename A, typename B>
-  bool operator()(const A& a, const B& b) const {
-    return a <= b;
-  }
-};
-struct AnyGe {
-  template <typename A, typename B>
-  bool operator()(const A& a, const B& b) const {
-    return a >= b;
-  }
-};
-
-// A match result listener that ignores the explanation.
-class DummyMatchResultListener : public MatchResultListener {
- public:
-  DummyMatchResultListener() : MatchResultListener(nullptr) {}
-
- private:
-  DummyMatchResultListener(const DummyMatchResultListener&) = delete;
-  DummyMatchResultListener& operator=(const DummyMatchResultListener&) = delete;
-};
-
-// A match result listener that forwards the explanation to a given
-// ostream.  The difference between this and MatchResultListener is
-// that the former is concrete.
-class StreamMatchResultListener : public MatchResultListener {
- public:
-  explicit StreamMatchResultListener(::std::ostream* os)
-      : MatchResultListener(os) {}
-
- private:
-  StreamMatchResultListener(const StreamMatchResultListener&) = delete;
-  StreamMatchResultListener& operator=(const StreamMatchResultListener&) =
-      delete;
-};
-
-struct SharedPayloadBase {
-  std::atomic<int> ref{1};
-  void Ref() { ref.fetch_add(1, std::memory_order_relaxed); }
-  bool Unref() { return ref.fetch_sub(1, std::memory_order_acq_rel) == 1; }
-};
-
-template <typename T>
-struct SharedPayload : SharedPayloadBase {
-  explicit SharedPayload(const T& v) : value(v) {}
-  explicit SharedPayload(T&& v) : value(std::move(v)) {}
-
-  static void Destroy(SharedPayloadBase* shared) {
-    delete static_cast<SharedPayload*>(shared);
-  }
-
-  T value;
-};
-
-// An internal class for implementing Matcher<T>, which will derive
-// from it.  We put functionalities common to all Matcher<T>
-// specializations here to avoid code duplication.
-template <typename T>
-class MatcherBase : private MatcherDescriberInterface {
- public:
-  // Returns true if and only if the matcher matches x; also explains the
-  // match result to 'listener'.
-  bool MatchAndExplain(const T& x, MatchResultListener* listener) const {
-    GTEST_CHECK_(vtable_ != nullptr);
-    return vtable_->match_and_explain(*this, x, listener);
-  }
-
-  // Returns true if and only if this matcher matches x.
-  bool Matches(const T& x) const {
-    DummyMatchResultListener dummy;
-    return MatchAndExplain(x, &dummy);
-  }
-
-  // Describes this matcher to an ostream.
-  void DescribeTo(::std::ostream* os) const final {
-    GTEST_CHECK_(vtable_ != nullptr);
-    vtable_->describe(*this, os, false);
-  }
-
-  // Describes the negation of this matcher to an ostream.
-  void DescribeNegationTo(::std::ostream* os) const final {
-    GTEST_CHECK_(vtable_ != nullptr);
-    vtable_->describe(*this, os, true);
-  }
-
-  // Explains why x matches, or doesn't match, the matcher.
-  void ExplainMatchResultTo(const T& x, ::std::ostream* os) const {
-    StreamMatchResultListener listener(os);
-    MatchAndExplain(x, &listener);
-  }
-
-  // Returns the describer for this matcher object; retains ownership
-  // of the describer, which is only guaranteed to be alive when
-  // this matcher object is alive.
-  const MatcherDescriberInterface* GetDescriber() const {
-    if (vtable_ == nullptr) return nullptr;
-    return vtable_->get_describer(*this);
-  }
-
- protected:
-  MatcherBase() : vtable_(nullptr), buffer_() {}
-
-  // Constructs a matcher from its implementation.
-  template <typename U>
-  explicit MatcherBase(const MatcherInterface<U>* impl)
-      : vtable_(nullptr), buffer_() {
-    Init(impl);
-  }
-
-  template <typename M, typename = typename std::remove_reference<
-                            M>::type::is_gtest_matcher>
-  MatcherBase(M&& m) : vtable_(nullptr), buffer_() {  // NOLINT
-    Init(std::forward<M>(m));
-  }
-
-  MatcherBase(const MatcherBase& other)
-      : vtable_(other.vtable_), buffer_(other.buffer_) {
-    if (IsShared()) buffer_.shared->Ref();
-  }
-
-  MatcherBase& operator=(const MatcherBase& other) {
-    if (this == &other) return *this;
-    Destroy();
-    vtable_ = other.vtable_;
-    buffer_ = other.buffer_;
-    if (IsShared()) buffer_.shared->Ref();
-    return *this;
-  }
-
-  MatcherBase(MatcherBase&& other)
-      : vtable_(other.vtable_), buffer_(other.buffer_) {
-    other.vtable_ = nullptr;
-  }
-
-  MatcherBase& operator=(MatcherBase&& other) {
-    if (this == &other) return *this;
-    Destroy();
-    vtable_ = other.vtable_;
-    buffer_ = other.buffer_;
-    other.vtable_ = nullptr;
-    return *this;
-  }
-
-  ~MatcherBase() override { Destroy(); }
-
- private:
-  struct VTable {
-    bool (*match_and_explain)(const MatcherBase&, const T&,
-                              MatchResultListener*);
-    void (*describe)(const MatcherBase&, std::ostream*, bool negation);
-    // Returns the captured object if it implements the interface, otherwise
-    // returns the MatcherBase itself.
-    const MatcherDescriberInterface* (*get_describer)(const MatcherBase&);
-    // Called on shared instances when the reference count reaches 0.
-    void (*shared_destroy)(SharedPayloadBase*);
-  };
-
-  bool IsShared() const {
-    return vtable_ != nullptr && vtable_->shared_destroy != nullptr;
-  }
-
-  // If the implementation uses a listener, call that.
-  template <typename P>
-  static auto MatchAndExplainImpl(const MatcherBase& m, const T& value,
-                                  MatchResultListener* listener)
-      -> decltype(P::Get(m).MatchAndExplain(value, listener->stream())) {
-    return P::Get(m).MatchAndExplain(value, listener->stream());
-  }
-
-  template <typename P>
-  static auto MatchAndExplainImpl(const MatcherBase& m, const T& value,
-                                  MatchResultListener* listener)
-      -> decltype(P::Get(m).MatchAndExplain(value, listener)) {
-    return P::Get(m).MatchAndExplain(value, listener);
-  }
-
-  template <typename P>
-  static void DescribeImpl(const MatcherBase& m, std::ostream* os,
-                           bool negation) {
-    if (negation) {
-      P::Get(m).DescribeNegationTo(os);
-    } else {
-      P::Get(m).DescribeTo(os);
-    }
-  }
-
-  template <typename P>
-  static const MatcherDescriberInterface* GetDescriberImpl(
-      const MatcherBase& m) {
-    // If the impl is a MatcherDescriberInterface, then return it.
-    // Otherwise use MatcherBase itself.
-    // This allows us to implement the GetDescriber() function without support
-    // from the impl, but some users really want to get their impl back when
-    // they call GetDescriber().
-    // We use std::get on a tuple as a workaround of not having `if constexpr`.
-    return std::get<(
-        std::is_convertible<decltype(&P::Get(m)),
-                            const MatcherDescriberInterface*>::value
-            ? 1
-            : 0)>(std::make_tuple(&m, &P::Get(m)));
-  }
-
-  template <typename P>
-  const VTable* GetVTable() {
-    static constexpr VTable kVTable = {&MatchAndExplainImpl<P>,
-                                       &DescribeImpl<P>, &GetDescriberImpl<P>,
-                                       P::shared_destroy};
-    return &kVTable;
-  }
-
-  union Buffer {
-    // Add some types to give Buffer some common alignment/size use cases.
-    void* ptr;
-    double d;
-    int64_t i;
-    // And add one for the out-of-line cases.
-    SharedPayloadBase* shared;
-  };
-
-  void Destroy() {
-    if (IsShared() && buffer_.shared->Unref()) {
-      vtable_->shared_destroy(buffer_.shared);
-    }
-  }
-
-  template <typename M>
-  static constexpr bool IsInlined() {
-    return sizeof(M) <= sizeof(Buffer) && alignof(M) <= alignof(Buffer) &&
-           std::is_trivially_copy_constructible<M>::value &&
-           std::is_trivially_destructible<M>::value;
-  }
-
-  template <typename M, bool = MatcherBase::IsInlined<M>()>
-  struct ValuePolicy {
-    static const M& Get(const MatcherBase& m) {
-      // When inlined along with Init, need to be explicit to avoid violating
-      // strict aliasing rules.
-      const M* ptr =
-          static_cast<const M*>(static_cast<const void*>(&m.buffer_));
-      return *ptr;
-    }
-    static void Init(MatcherBase& m, M impl) {
-      ::new (static_cast<void*>(&m.buffer_)) M(impl);
-    }
-    static constexpr auto shared_destroy = nullptr;
-  };
-
-  template <typename M>
-  struct ValuePolicy<M, false> {
-    using Shared = SharedPayload<M>;
-    static const M& Get(const MatcherBase& m) {
-      return static_cast<Shared*>(m.buffer_.shared)->value;
-    }
-    template <typename Arg>
-    static void Init(MatcherBase& m, Arg&& arg) {
-      m.buffer_.shared = new Shared(std::forward<Arg>(arg));
-    }
-    static constexpr auto shared_destroy = &Shared::Destroy;
-  };
-
-  template <typename U, bool B>
-  struct ValuePolicy<const MatcherInterface<U>*, B> {
-    using M = const MatcherInterface<U>;
-    using Shared = SharedPayload<std::unique_ptr<M>>;
-    static const M& Get(const MatcherBase& m) {
-      return *static_cast<Shared*>(m.buffer_.shared)->value;
-    }
-    static void Init(MatcherBase& m, M* impl) {
-      m.buffer_.shared = new Shared(std::unique_ptr<M>(impl));
-    }
-
-    static constexpr auto shared_destroy = &Shared::Destroy;
-  };
-
-  template <typename M>
-  void Init(M&& m) {
-    using MM = typename std::decay<M>::type;
-    using Policy = ValuePolicy<MM>;
-    vtable_ = GetVTable<Policy>();
-    Policy::Init(*this, std::forward<M>(m));
-  }
-
-  const VTable* vtable_;
-  Buffer buffer_;
-};
-
-}  // namespace internal
-
-// A Matcher<T> is a copyable and IMMUTABLE (except by assignment)
-// object that can check whether a value of type T matches.  The
-// implementation of Matcher<T> is just a std::shared_ptr to const
-// MatcherInterface<T>.  Don't inherit from Matcher!
-template <typename T>
-class Matcher : public internal::MatcherBase<T> {
- public:
-  // Constructs a null matcher.  Needed for storing Matcher objects in STL
-  // containers.  A default-constructed matcher is not yet initialized.  You
-  // cannot use it until a valid value has been assigned to it.
-  explicit Matcher() {}  // NOLINT
-
-  // Constructs a matcher from its implementation.
-  explicit Matcher(const MatcherInterface<const T&>* impl)
-      : internal::MatcherBase<T>(impl) {}
-
-  template <typename U>
-  explicit Matcher(
-      const MatcherInterface<U>* impl,
-      typename std::enable_if<!std::is_same<U, const U&>::value>::type* =
-          nullptr)
-      : internal::MatcherBase<T>(impl) {}
-
-  template <typename M, typename = typename std::remove_reference<
-                            M>::type::is_gtest_matcher>
-  Matcher(M&& m) : internal::MatcherBase<T>(std::forward<M>(m)) {}  // NOLINT
-
-  // Implicit constructor here allows people to write
-  // EXPECT_CALL(foo, Bar(5)) instead of EXPECT_CALL(foo, Bar(Eq(5))) sometimes
-  Matcher(T value);  // NOLINT
-};
-
-// The following two specializations allow the user to write str
-// instead of Eq(str) and "foo" instead of Eq("foo") when a std::string
-// matcher is expected.
-template <>
-class GTEST_API_ Matcher<const std::string&>
-    : public internal::MatcherBase<const std::string&> {
- public:
-  Matcher() {}
-
-  explicit Matcher(const MatcherInterface<const std::string&>* impl)
-      : internal::MatcherBase<const std::string&>(impl) {}
-
-  template <typename M, typename = typename std::remove_reference<
-                            M>::type::is_gtest_matcher>
-  Matcher(M&& m)  // NOLINT
-      : internal::MatcherBase<const std::string&>(std::forward<M>(m)) {}
-
-  // Allows the user to write str instead of Eq(str) sometimes, where
-  // str is a std::string object.
-  Matcher(const std::string& s);  // NOLINT
-
-  // Allows the user to write "foo" instead of Eq("foo") sometimes.
-  Matcher(const char* s);  // NOLINT
-};
-
-template <>
-class GTEST_API_ Matcher<std::string>
-    : public internal::MatcherBase<std::string> {
- public:
-  Matcher() {}
-
-  explicit Matcher(const MatcherInterface<const std::string&>* impl)
-      : internal::MatcherBase<std::string>(impl) {}
-  explicit Matcher(const MatcherInterface<std::string>* impl)
-      : internal::MatcherBase<std::string>(impl) {}
-
-  template <typename M, typename = typename std::remove_reference<
-                            M>::type::is_gtest_matcher>
-  Matcher(M&& m)  // NOLINT
-      : internal::MatcherBase<std::string>(std::forward<M>(m)) {}
-
-  // Allows the user to write str instead of Eq(str) sometimes, where
-  // str is a string object.
-  Matcher(const std::string& s);  // NOLINT
-
-  // Allows the user to write "foo" instead of Eq("foo") sometimes.
-  Matcher(const char* s);  // NOLINT
-};
-
-#if GTEST_INTERNAL_HAS_STRING_VIEW
-// The following two specializations allow the user to write str
-// instead of Eq(str) and "foo" instead of Eq("foo") when a absl::string_view
-// matcher is expected.
-template <>
-class GTEST_API_ Matcher<const internal::StringView&>
-    : public internal::MatcherBase<const internal::StringView&> {
- public:
-  Matcher() {}
-
-  explicit Matcher(const MatcherInterface<const internal::StringView&>* impl)
-      : internal::MatcherBase<const internal::StringView&>(impl) {}
-
-  template <typename M, typename = typename std::remove_reference<
-                            M>::type::is_gtest_matcher>
-  Matcher(M&& m)  // NOLINT
-      : internal::MatcherBase<const internal::StringView&>(std::forward<M>(m)) {
-  }
-
-  // Allows the user to write str instead of Eq(str) sometimes, where
-  // str is a std::string object.
-  Matcher(const std::string& s);  // NOLINT
-
-  // Allows the user to write "foo" instead of Eq("foo") sometimes.
-  Matcher(const char* s);  // NOLINT
-
-  // Allows the user to pass absl::string_views or std::string_views directly.
-  Matcher(internal::StringView s);  // NOLINT
-};
-
-template <>
-class GTEST_API_ Matcher<internal::StringView>
-    : public internal::MatcherBase<internal::StringView> {
- public:
-  Matcher() {}
-
-  explicit Matcher(const MatcherInterface<const internal::StringView&>* impl)
-      : internal::MatcherBase<internal::StringView>(impl) {}
-  explicit Matcher(const MatcherInterface<internal::StringView>* impl)
-      : internal::MatcherBase<internal::StringView>(impl) {}
-
-  template <typename M, typename = typename std::remove_reference<
-                            M>::type::is_gtest_matcher>
-  Matcher(M&& m)  // NOLINT
-      : internal::MatcherBase<internal::StringView>(std::forward<M>(m)) {}
-
-  // Allows the user to write str instead of Eq(str) sometimes, where
-  // str is a std::string object.
-  Matcher(const std::string& s);  // NOLINT
-
-  // Allows the user to write "foo" instead of Eq("foo") sometimes.
-  Matcher(const char* s);  // NOLINT
-
-  // Allows the user to pass absl::string_views or std::string_views directly.
-  Matcher(internal::StringView s);  // NOLINT
-};
-#endif  // GTEST_INTERNAL_HAS_STRING_VIEW
-
-// Prints a matcher in a human-readable format.
-template <typename T>
-std::ostream& operator<<(std::ostream& os, const Matcher<T>& matcher) {
-  matcher.DescribeTo(&os);
-  return os;
-}
-
-// The PolymorphicMatcher class template makes it easy to implement a
-// polymorphic matcher (i.e. a matcher that can match values of more
-// than one type, e.g. Eq(n) and NotNull()).
-//
-// To define a polymorphic matcher, a user should provide an Impl
-// class that has a DescribeTo() method and a DescribeNegationTo()
-// method, and define a member function (or member function template)
-//
-//   bool MatchAndExplain(const Value& value,
-//                        MatchResultListener* listener) const;
-//
-// See the definition of NotNull() for a complete example.
-template <class Impl>
-class PolymorphicMatcher {
- public:
-  explicit PolymorphicMatcher(const Impl& an_impl) : impl_(an_impl) {}
-
-  // Returns a mutable reference to the underlying matcher
-  // implementation object.
-  Impl& mutable_impl() { return impl_; }
-
-  // Returns an immutable reference to the underlying matcher
-  // implementation object.
-  const Impl& impl() const { return impl_; }
-
-  template <typename T>
-  operator Matcher<T>() const {
-    return Matcher<T>(new MonomorphicImpl<const T&>(impl_));
-  }
-
- private:
-  template <typename T>
-  class MonomorphicImpl : public MatcherInterface<T> {
-   public:
-    explicit MonomorphicImpl(const Impl& impl) : impl_(impl) {}
-
-    void DescribeTo(::std::ostream* os) const override { impl_.DescribeTo(os); }
-
-    void DescribeNegationTo(::std::ostream* os) const override {
-      impl_.DescribeNegationTo(os);
-    }
-
-    bool MatchAndExplain(T x, MatchResultListener* listener) const override {
-      return impl_.MatchAndExplain(x, listener);
-    }
-
-   private:
-    const Impl impl_;
-  };
-
-  Impl impl_;
-};
-
-// Creates a matcher from its implementation.
-// DEPRECATED: Especially in the generic code, prefer:
-//   Matcher<T>(new MyMatcherImpl<const T&>(...));
-//
-// MakeMatcher may create a Matcher that accepts its argument by value, which
-// leads to unnecessary copies & lack of support for non-copyable types.
-template <typename T>
-inline Matcher<T> MakeMatcher(const MatcherInterface<T>* impl) {
-  return Matcher<T>(impl);
-}
-
-// Creates a polymorphic matcher from its implementation.  This is
-// easier to use than the PolymorphicMatcher<Impl> constructor as it
-// doesn't require you to explicitly write the template argument, e.g.
-//
-//   MakePolymorphicMatcher(foo);
-// vs
-//   PolymorphicMatcher<TypeOfFoo>(foo);
-template <class Impl>
-inline PolymorphicMatcher<Impl> MakePolymorphicMatcher(const Impl& impl) {
-  return PolymorphicMatcher<Impl>(impl);
-}
-
-namespace internal {
-// Implements a matcher that compares a given value with a
-// pre-supplied value using one of the ==, <=, <, etc, operators.  The
-// two values being compared don't have to have the same type.
-//
-// The matcher defined here is polymorphic (for example, Eq(5) can be
-// used to match an int, a short, a double, etc).  Therefore we use
-// a template type conversion operator in the implementation.
-//
-// The following template definition assumes that the Rhs parameter is
-// a "bare" type (i.e. neither 'const T' nor 'T&').
-template <typename D, typename Rhs, typename Op>
-class ComparisonBase {
- public:
-  explicit ComparisonBase(const Rhs& rhs) : rhs_(rhs) {}
-
-  using is_gtest_matcher = void;
-
-  template <typename Lhs>
-  bool MatchAndExplain(const Lhs& lhs, std::ostream*) const {
-    return Op()(lhs, Unwrap(rhs_));
-  }
-  void DescribeTo(std::ostream* os) const {
-    *os << D::Desc() << " ";
-    UniversalPrint(Unwrap(rhs_), os);
-  }
-  void DescribeNegationTo(std::ostream* os) const {
-    *os << D::NegatedDesc() << " ";
-    UniversalPrint(Unwrap(rhs_), os);
-  }
-
- private:
-  template <typename T>
-  static const T& Unwrap(const T& v) {
-    return v;
-  }
-  template <typename T>
-  static const T& Unwrap(std::reference_wrapper<T> v) {
-    return v;
-  }
-
-  Rhs rhs_;
-};
-
-template <typename Rhs>
-class EqMatcher : public ComparisonBase<EqMatcher<Rhs>, Rhs, AnyEq> {
- public:
-  explicit EqMatcher(const Rhs& rhs)
-      : ComparisonBase<EqMatcher<Rhs>, Rhs, AnyEq>(rhs) {}
-  static const char* Desc() { return "is equal to"; }
-  static const char* NegatedDesc() { return "isn't equal to"; }
-};
-template <typename Rhs>
-class NeMatcher : public ComparisonBase<NeMatcher<Rhs>, Rhs, AnyNe> {
- public:
-  explicit NeMatcher(const Rhs& rhs)
-      : ComparisonBase<NeMatcher<Rhs>, Rhs, AnyNe>(rhs) {}
-  static const char* Desc() { return "isn't equal to"; }
-  static const char* NegatedDesc() { return "is equal to"; }
-};
-template <typename Rhs>
-class LtMatcher : public ComparisonBase<LtMatcher<Rhs>, Rhs, AnyLt> {
- public:
-  explicit LtMatcher(const Rhs& rhs)
-      : ComparisonBase<LtMatcher<Rhs>, Rhs, AnyLt>(rhs) {}
-  static const char* Desc() { return "is <"; }
-  static const char* NegatedDesc() { return "isn't <"; }
-};
-template <typename Rhs>
-class GtMatcher : public ComparisonBase<GtMatcher<Rhs>, Rhs, AnyGt> {
- public:
-  explicit GtMatcher(const Rhs& rhs)
-      : ComparisonBase<GtMatcher<Rhs>, Rhs, AnyGt>(rhs) {}
-  static const char* Desc() { return "is >"; }
-  static const char* NegatedDesc() { return "isn't >"; }
-};
-template <typename Rhs>
-class LeMatcher : public ComparisonBase<LeMatcher<Rhs>, Rhs, AnyLe> {
- public:
-  explicit LeMatcher(const Rhs& rhs)
-      : ComparisonBase<LeMatcher<Rhs>, Rhs, AnyLe>(rhs) {}
-  static const char* Desc() { return "is <="; }
-  static const char* NegatedDesc() { return "isn't <="; }
-};
-template <typename Rhs>
-class GeMatcher : public ComparisonBase<GeMatcher<Rhs>, Rhs, AnyGe> {
- public:
-  explicit GeMatcher(const Rhs& rhs)
-      : ComparisonBase<GeMatcher<Rhs>, Rhs, AnyGe>(rhs) {}
-  static const char* Desc() { return "is >="; }
-  static const char* NegatedDesc() { return "isn't >="; }
-};
-
-template <typename T, typename = typename std::enable_if<
-                          std::is_constructible<std::string, T>::value>::type>
-using StringLike = T;
-
-// Implements polymorphic matchers MatchesRegex(regex) and
-// ContainsRegex(regex), which can be used as a Matcher<T> as long as
-// T can be converted to a string.
-class MatchesRegexMatcher {
- public:
-  MatchesRegexMatcher(const RE* regex, bool full_match)
-      : regex_(regex), full_match_(full_match) {}
-
-#if GTEST_INTERNAL_HAS_STRING_VIEW
-  bool MatchAndExplain(const internal::StringView& s,
-                       MatchResultListener* listener) const {
-    return MatchAndExplain(std::string(s), listener);
-  }
-#endif  // GTEST_INTERNAL_HAS_STRING_VIEW
-
-  // Accepts pointer types, particularly:
-  //   const char*
-  //   char*
-  //   const wchar_t*
-  //   wchar_t*
-  template <typename CharType>
-  bool MatchAndExplain(CharType* s, MatchResultListener* listener) const {
-    return s != nullptr && MatchAndExplain(std::string(s), listener);
-  }
-
-  // Matches anything that can convert to std::string.
-  //
-  // This is a template, not just a plain function with const std::string&,
-  // because absl::string_view has some interfering non-explicit constructors.
-  template <class MatcheeStringType>
-  bool MatchAndExplain(const MatcheeStringType& s,
-                       MatchResultListener* /* listener */) const {
-    const std::string s2(s);
-    return full_match_ ? RE::FullMatch(s2, *regex_)
-                       : RE::PartialMatch(s2, *regex_);
-  }
-
-  void DescribeTo(::std::ostream* os) const {
-    *os << (full_match_ ? "matches" : "contains") << " regular expression ";
-    UniversalPrinter<std::string>::Print(regex_->pattern(), os);
-  }
-
-  void DescribeNegationTo(::std::ostream* os) const {
-    *os << "doesn't " << (full_match_ ? "match" : "contain")
-        << " regular expression ";
-    UniversalPrinter<std::string>::Print(regex_->pattern(), os);
-  }
-
- private:
-  const std::shared_ptr<const RE> regex_;
-  const bool full_match_;
-};
-}  // namespace internal
-
-// Matches a string that fully matches regular expression 'regex'.
-// The matcher takes ownership of 'regex'.
-inline PolymorphicMatcher<internal::MatchesRegexMatcher> MatchesRegex(
-    const internal::RE* regex) {
-  return MakePolymorphicMatcher(internal::MatchesRegexMatcher(regex, true));
-}
-template <typename T = std::string>
-PolymorphicMatcher<internal::MatchesRegexMatcher> MatchesRegex(
-    const internal::StringLike<T>& regex) {
-  return MatchesRegex(new internal::RE(std::string(regex)));
-}
-
-// Matches a string that contains regular expression 'regex'.
-// The matcher takes ownership of 'regex'.
-inline PolymorphicMatcher<internal::MatchesRegexMatcher> ContainsRegex(
-    const internal::RE* regex) {
-  return MakePolymorphicMatcher(internal::MatchesRegexMatcher(regex, false));
-}
-template <typename T = std::string>
-PolymorphicMatcher<internal::MatchesRegexMatcher> ContainsRegex(
-    const internal::StringLike<T>& regex) {
-  return ContainsRegex(new internal::RE(std::string(regex)));
-}
-
-// Creates a polymorphic matcher that matches anything equal to x.
-// Note: if the parameter of Eq() were declared as const T&, Eq("foo")
-// wouldn't compile.
-template <typename T>
-inline internal::EqMatcher<T> Eq(T x) {
-  return internal::EqMatcher<T>(x);
-}
-
-// Constructs a Matcher<T> from a 'value' of type T.  The constructed
-// matcher matches any value that's equal to 'value'.
-template <typename T>
-Matcher<T>::Matcher(T value) {
-  *this = Eq(value);
-}
-
-// Creates a monomorphic matcher that matches anything with type Lhs
-// and equal to rhs.  A user may need to use this instead of Eq(...)
-// in order to resolve an overloading ambiguity.
-//
-// TypedEq<T>(x) is just a convenient short-hand for Matcher<T>(Eq(x))
-// or Matcher<T>(x), but more readable than the latter.
-//
-// We could define similar monomorphic matchers for other comparison
-// operations (e.g. TypedLt, TypedGe, and etc), but decided not to do
-// it yet as those are used much less than Eq() in practice.  A user
-// can always write Matcher<T>(Lt(5)) to be explicit about the type,
-// for example.
-template <typename Lhs, typename Rhs>
-inline Matcher<Lhs> TypedEq(const Rhs& rhs) {
-  return Eq(rhs);
-}
-
-// Creates a polymorphic matcher that matches anything >= x.
-template <typename Rhs>
-inline internal::GeMatcher<Rhs> Ge(Rhs x) {
-  return internal::GeMatcher<Rhs>(x);
-}
-
-// Creates a polymorphic matcher that matches anything > x.
-template <typename Rhs>
-inline internal::GtMatcher<Rhs> Gt(Rhs x) {
-  return internal::GtMatcher<Rhs>(x);
-}
-
-// Creates a polymorphic matcher that matches anything <= x.
-template <typename Rhs>
-inline internal::LeMatcher<Rhs> Le(Rhs x) {
-  return internal::LeMatcher<Rhs>(x);
-}
-
-// Creates a polymorphic matcher that matches anything < x.
-template <typename Rhs>
-inline internal::LtMatcher<Rhs> Lt(Rhs x) {
-  return internal::LtMatcher<Rhs>(x);
-}
-
-// Creates a polymorphic matcher that matches anything != x.
-template <typename Rhs>
-inline internal::NeMatcher<Rhs> Ne(Rhs x) {
-  return internal::NeMatcher<Rhs>(x);
-}
-}  // namespace testing
-
-GTEST_DISABLE_MSC_WARNINGS_POP_()  //  4251 5046
-
-#endif  // GOOGLETEST_INCLUDE_GTEST_GTEST_MATCHERS_H_
diff --git a/3rdparty/googletest-1.13.0/googletest/include/gtest/gtest-message.h b/3rdparty/googletest-1.13.0/googletest/include/gtest/gtest-message.h
deleted file mode 100644
index 4d4b152b1d8b708cd9f39385039432a5f3502010..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/include/gtest/gtest-message.h
+++ /dev/null
@@ -1,220 +0,0 @@
-// Copyright 2005, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// The Google C++ Testing and Mocking Framework (Google Test)
-//
-// This header file defines the Message class.
-//
-// IMPORTANT NOTE: Due to limitation of the C++ language, we have to
-// leave some internal implementation details in this header file.
-// They are clearly marked by comments like this:
-//
-//   // INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
-//
-// Such code is NOT meant to be used by a user directly, and is subject
-// to CHANGE WITHOUT NOTICE.  Therefore DO NOT DEPEND ON IT in a user
-// program!
-
-// IWYU pragma: private, include "gtest/gtest.h"
-// IWYU pragma: friend gtest/.*
-// IWYU pragma: friend gmock/.*
-
-#ifndef GOOGLETEST_INCLUDE_GTEST_GTEST_MESSAGE_H_
-#define GOOGLETEST_INCLUDE_GTEST_GTEST_MESSAGE_H_
-
-#include <limits>
-#include <memory>
-#include <ostream>
-#include <sstream>
-#include <string>
-
-#include "gtest/internal/gtest-port.h"
-
-GTEST_DISABLE_MSC_WARNINGS_PUSH_(4251 \
-/* class A needs to have dll-interface to be used by clients of class B */)
-
-// Ensures that there is at least one operator<< in the global namespace.
-// See Message& operator<<(...) below for why.
-void operator<<(const testing::internal::Secret&, int);
-
-namespace testing {
-
-// The Message class works like an ostream repeater.
-//
-// Typical usage:
-//
-//   1. You stream a bunch of values to a Message object.
-//      It will remember the text in a stringstream.
-//   2. Then you stream the Message object to an ostream.
-//      This causes the text in the Message to be streamed
-//      to the ostream.
-//
-// For example;
-//
-//   testing::Message foo;
-//   foo << 1 << " != " << 2;
-//   std::cout << foo;
-//
-// will print "1 != 2".
-//
-// Message is not intended to be inherited from.  In particular, its
-// destructor is not virtual.
-//
-// Note that stringstream behaves differently in gcc and in MSVC.  You
-// can stream a NULL char pointer to it in the former, but not in the
-// latter (it causes an access violation if you do).  The Message
-// class hides this difference by treating a NULL char pointer as
-// "(null)".
-class GTEST_API_ Message {
- private:
-  // The type of basic IO manipulators (endl, ends, and flush) for
-  // narrow streams.
-  typedef std::ostream& (*BasicNarrowIoManip)(std::ostream&);
-
- public:
-  // Constructs an empty Message.
-  Message();
-
-  // Copy constructor.
-  Message(const Message& msg) : ss_(new ::std::stringstream) {  // NOLINT
-    *ss_ << msg.GetString();
-  }
-
-  // Constructs a Message from a C-string.
-  explicit Message(const char* str) : ss_(new ::std::stringstream) {
-    *ss_ << str;
-  }
-
-  // Streams a non-pointer value to this object.
-  template <typename T>
-  inline Message& operator<<(const T& val) {
-        // Some libraries overload << for STL containers.  These
-    // overloads are defined in the global namespace instead of ::std.
-    //
-    // C++'s symbol lookup rule (i.e. Koenig lookup) says that these
-    // overloads are visible in either the std namespace or the global
-    // namespace, but not other namespaces, including the testing
-    // namespace which Google Test's Message class is in.
-    //
-    // To allow STL containers (and other types that has a << operator
-    // defined in the global namespace) to be used in Google Test
-    // assertions, testing::Message must access the custom << operator
-    // from the global namespace.  With this using declaration,
-    // overloads of << defined in the global namespace and those
-    // visible via Koenig lookup are both exposed in this function.
-    using ::operator<<;
-    *ss_ << val;
-    return *this;
-  }
-
-  // Streams a pointer value to this object.
-  //
-  // This function is an overload of the previous one.  When you
-  // stream a pointer to a Message, this definition will be used as it
-  // is more specialized.  (The C++ Standard, section
-  // [temp.func.order].)  If you stream a non-pointer, then the
-  // previous definition will be used.
-  //
-  // The reason for this overload is that streaming a NULL pointer to
-  // ostream is undefined behavior.  Depending on the compiler, you
-  // may get "0", "(nil)", "(null)", or an access violation.  To
-  // ensure consistent result across compilers, we always treat NULL
-  // as "(null)".
-  template <typename T>
-  inline Message& operator<<(T* const& pointer) {  // NOLINT
-    if (pointer == nullptr) {
-      *ss_ << "(null)";
-    } else {
-      *ss_ << pointer;
-    }
-    return *this;
-  }
-
-  // Since the basic IO manipulators are overloaded for both narrow
-  // and wide streams, we have to provide this specialized definition
-  // of operator <<, even though its body is the same as the
-  // templatized version above.  Without this definition, streaming
-  // endl or other basic IO manipulators to Message will confuse the
-  // compiler.
-  Message& operator<<(BasicNarrowIoManip val) {
-    *ss_ << val;
-    return *this;
-  }
-
-  // Instead of 1/0, we want to see true/false for bool values.
-  Message& operator<<(bool b) { return *this << (b ? "true" : "false"); }
-
-  // These two overloads allow streaming a wide C string to a Message
-  // using the UTF-8 encoding.
-  Message& operator<<(const wchar_t* wide_c_str);
-  Message& operator<<(wchar_t* wide_c_str);
-
-#if GTEST_HAS_STD_WSTRING
-  // Converts the given wide string to a narrow string using the UTF-8
-  // encoding, and streams the result to this Message object.
-  Message& operator<<(const ::std::wstring& wstr);
-#endif  // GTEST_HAS_STD_WSTRING
-
-  // Gets the text streamed to this object so far as an std::string.
-  // Each '\0' character in the buffer is replaced with "\\0".
-  //
-  // INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
-  std::string GetString() const;
-
- private:
-  // We'll hold the text streamed to this object here.
-  const std::unique_ptr< ::std::stringstream> ss_;
-
-  // We declare (but don't implement) this to prevent the compiler
-  // from implementing the assignment operator.
-  void operator=(const Message&);
-};
-
-// Streams a Message to an ostream.
-inline std::ostream& operator<<(std::ostream& os, const Message& sb) {
-  return os << sb.GetString();
-}
-
-namespace internal {
-
-// Converts a streamable value to an std::string.  A NULL pointer is
-// converted to "(null)".  When the input value is a ::string,
-// ::std::string, ::wstring, or ::std::wstring object, each NUL
-// character in it is replaced with "\\0".
-template <typename T>
-std::string StreamableToString(const T& streamable) {
-  return (Message() << streamable).GetString();
-}
-
-}  // namespace internal
-}  // namespace testing
-
-GTEST_DISABLE_MSC_WARNINGS_POP_()  //  4251
-
-#endif  // GOOGLETEST_INCLUDE_GTEST_GTEST_MESSAGE_H_
diff --git a/3rdparty/googletest-1.13.0/googletest/include/gtest/gtest-param-test.h b/3rdparty/googletest-1.13.0/googletest/include/gtest/gtest-param-test.h
deleted file mode 100644
index 1adb9a725203ed86d3a9708fdd9ce8547e549766..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/include/gtest/gtest-param-test.h
+++ /dev/null
@@ -1,545 +0,0 @@
-// Copyright 2008, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// Macros and functions for implementing parameterized tests
-// in Google C++ Testing and Mocking Framework (Google Test)
-
-// IWYU pragma: private, include "gtest/gtest.h"
-// IWYU pragma: friend gtest/.*
-// IWYU pragma: friend gmock/.*
-
-#ifndef GOOGLETEST_INCLUDE_GTEST_GTEST_PARAM_TEST_H_
-#define GOOGLETEST_INCLUDE_GTEST_GTEST_PARAM_TEST_H_
-
-// Value-parameterized tests allow you to test your code with different
-// parameters without writing multiple copies of the same test.
-//
-// Here is how you use value-parameterized tests:
-
-#if 0
-
-// To write value-parameterized tests, first you should define a fixture
-// class. It is usually derived from testing::TestWithParam<T> (see below for
-// another inheritance scheme that's sometimes useful in more complicated
-// class hierarchies), where the type of your parameter values.
-// TestWithParam<T> is itself derived from testing::Test. T can be any
-// copyable type. If it's a raw pointer, you are responsible for managing the
-// lifespan of the pointed values.
-
-class FooTest : public ::testing::TestWithParam<const char*> {
-  // You can implement all the usual class fixture members here.
-};
-
-// Then, use the TEST_P macro to define as many parameterized tests
-// for this fixture as you want. The _P suffix is for "parameterized"
-// or "pattern", whichever you prefer to think.
-
-TEST_P(FooTest, DoesBlah) {
-  // Inside a test, access the test parameter with the GetParam() method
-  // of the TestWithParam<T> class:
-  EXPECT_TRUE(foo.Blah(GetParam()));
-  ...
-}
-
-TEST_P(FooTest, HasBlahBlah) {
-  ...
-}
-
-// Finally, you can use INSTANTIATE_TEST_SUITE_P to instantiate the test
-// case with any set of parameters you want. Google Test defines a number
-// of functions for generating test parameters. They return what we call
-// (surprise!) parameter generators. Here is a summary of them, which
-// are all in the testing namespace:
-//
-//
-//  Range(begin, end [, step]) - Yields values {begin, begin+step,
-//                               begin+step+step, ...}. The values do not
-//                               include end. step defaults to 1.
-//  Values(v1, v2, ..., vN)    - Yields values {v1, v2, ..., vN}.
-//  ValuesIn(container)        - Yields values from a C-style array, an STL
-//  ValuesIn(begin,end)          container, or an iterator range [begin, end).
-//  Bool()                     - Yields sequence {false, true}.
-//  Combine(g1, g2, ..., gN)   - Yields all combinations (the Cartesian product
-//                               for the math savvy) of the values generated
-//                               by the N generators.
-//
-// For more details, see comments at the definitions of these functions below
-// in this file.
-//
-// The following statement will instantiate tests from the FooTest test suite
-// each with parameter values "meeny", "miny", and "moe".
-
-INSTANTIATE_TEST_SUITE_P(InstantiationName,
-                         FooTest,
-                         Values("meeny", "miny", "moe"));
-
-// To distinguish different instances of the pattern, (yes, you
-// can instantiate it more than once) the first argument to the
-// INSTANTIATE_TEST_SUITE_P macro is a prefix that will be added to the
-// actual test suite name. Remember to pick unique prefixes for different
-// instantiations. The tests from the instantiation above will have
-// these names:
-//
-//    * InstantiationName/FooTest.DoesBlah/0 for "meeny"
-//    * InstantiationName/FooTest.DoesBlah/1 for "miny"
-//    * InstantiationName/FooTest.DoesBlah/2 for "moe"
-//    * InstantiationName/FooTest.HasBlahBlah/0 for "meeny"
-//    * InstantiationName/FooTest.HasBlahBlah/1 for "miny"
-//    * InstantiationName/FooTest.HasBlahBlah/2 for "moe"
-//
-// You can use these names in --gtest_filter.
-//
-// This statement will instantiate all tests from FooTest again, each
-// with parameter values "cat" and "dog":
-
-const char* pets[] = {"cat", "dog"};
-INSTANTIATE_TEST_SUITE_P(AnotherInstantiationName, FooTest, ValuesIn(pets));
-
-// The tests from the instantiation above will have these names:
-//
-//    * AnotherInstantiationName/FooTest.DoesBlah/0 for "cat"
-//    * AnotherInstantiationName/FooTest.DoesBlah/1 for "dog"
-//    * AnotherInstantiationName/FooTest.HasBlahBlah/0 for "cat"
-//    * AnotherInstantiationName/FooTest.HasBlahBlah/1 for "dog"
-//
-// Please note that INSTANTIATE_TEST_SUITE_P will instantiate all tests
-// in the given test suite, whether their definitions come before or
-// AFTER the INSTANTIATE_TEST_SUITE_P statement.
-//
-// Please also note that generator expressions (including parameters to the
-// generators) are evaluated in InitGoogleTest(), after main() has started.
-// This allows the user on one hand, to adjust generator parameters in order
-// to dynamically determine a set of tests to run and on the other hand,
-// give the user a chance to inspect the generated tests with Google Test
-// reflection API before RUN_ALL_TESTS() is executed.
-//
-// You can see samples/sample7_unittest.cc and samples/sample8_unittest.cc
-// for more examples.
-//
-// In the future, we plan to publish the API for defining new parameter
-// generators. But for now this interface remains part of the internal
-// implementation and is subject to change.
-//
-//
-// A parameterized test fixture must be derived from testing::Test and from
-// testing::WithParamInterface<T>, where T is the type of the parameter
-// values. Inheriting from TestWithParam<T> satisfies that requirement because
-// TestWithParam<T> inherits from both Test and WithParamInterface. In more
-// complicated hierarchies, however, it is occasionally useful to inherit
-// separately from Test and WithParamInterface. For example:
-
-class BaseTest : public ::testing::Test {
-  // You can inherit all the usual members for a non-parameterized test
-  // fixture here.
-};
-
-class DerivedTest : public BaseTest, public ::testing::WithParamInterface<int> {
-  // The usual test fixture members go here too.
-};
-
-TEST_F(BaseTest, HasFoo) {
-  // This is an ordinary non-parameterized test.
-}
-
-TEST_P(DerivedTest, DoesBlah) {
-  // GetParam works just the same here as if you inherit from TestWithParam.
-  EXPECT_TRUE(foo.Blah(GetParam()));
-}
-
-#endif  // 0
-
-#include <iterator>
-#include <utility>
-
-#include "gtest/internal/gtest-internal.h"
-#include "gtest/internal/gtest-param-util.h"
-#include "gtest/internal/gtest-port.h"
-
-namespace testing {
-
-// Functions producing parameter generators.
-//
-// Google Test uses these generators to produce parameters for value-
-// parameterized tests. When a parameterized test suite is instantiated
-// with a particular generator, Google Test creates and runs tests
-// for each element in the sequence produced by the generator.
-//
-// In the following sample, tests from test suite FooTest are instantiated
-// each three times with parameter values 3, 5, and 8:
-//
-// class FooTest : public TestWithParam<int> { ... };
-//
-// TEST_P(FooTest, TestThis) {
-// }
-// TEST_P(FooTest, TestThat) {
-// }
-// INSTANTIATE_TEST_SUITE_P(TestSequence, FooTest, Values(3, 5, 8));
-//
-
-// Range() returns generators providing sequences of values in a range.
-//
-// Synopsis:
-// Range(start, end)
-//   - returns a generator producing a sequence of values {start, start+1,
-//     start+2, ..., }.
-// Range(start, end, step)
-//   - returns a generator producing a sequence of values {start, start+step,
-//     start+step+step, ..., }.
-// Notes:
-//   * The generated sequences never include end. For example, Range(1, 5)
-//     returns a generator producing a sequence {1, 2, 3, 4}. Range(1, 9, 2)
-//     returns a generator producing {1, 3, 5, 7}.
-//   * start and end must have the same type. That type may be any integral or
-//     floating-point type or a user defined type satisfying these conditions:
-//     * It must be assignable (have operator=() defined).
-//     * It must have operator+() (operator+(int-compatible type) for
-//       two-operand version).
-//     * It must have operator<() defined.
-//     Elements in the resulting sequences will also have that type.
-//   * Condition start < end must be satisfied in order for resulting sequences
-//     to contain any elements.
-//
-template <typename T, typename IncrementT>
-internal::ParamGenerator<T> Range(T start, T end, IncrementT step) {
-  return internal::ParamGenerator<T>(
-      new internal::RangeGenerator<T, IncrementT>(start, end, step));
-}
-
-template <typename T>
-internal::ParamGenerator<T> Range(T start, T end) {
-  return Range(start, end, 1);
-}
-
-// ValuesIn() function allows generation of tests with parameters coming from
-// a container.
-//
-// Synopsis:
-// ValuesIn(const T (&array)[N])
-//   - returns a generator producing sequences with elements from
-//     a C-style array.
-// ValuesIn(const Container& container)
-//   - returns a generator producing sequences with elements from
-//     an STL-style container.
-// ValuesIn(Iterator begin, Iterator end)
-//   - returns a generator producing sequences with elements from
-//     a range [begin, end) defined by a pair of STL-style iterators. These
-//     iterators can also be plain C pointers.
-//
-// Please note that ValuesIn copies the values from the containers
-// passed in and keeps them to generate tests in RUN_ALL_TESTS().
-//
-// Examples:
-//
-// This instantiates tests from test suite StringTest
-// each with C-string values of "foo", "bar", and "baz":
-//
-// const char* strings[] = {"foo", "bar", "baz"};
-// INSTANTIATE_TEST_SUITE_P(StringSequence, StringTest, ValuesIn(strings));
-//
-// This instantiates tests from test suite StlStringTest
-// each with STL strings with values "a" and "b":
-//
-// ::std::vector< ::std::string> GetParameterStrings() {
-//   ::std::vector< ::std::string> v;
-//   v.push_back("a");
-//   v.push_back("b");
-//   return v;
-// }
-//
-// INSTANTIATE_TEST_SUITE_P(CharSequence,
-//                          StlStringTest,
-//                          ValuesIn(GetParameterStrings()));
-//
-//
-// This will also instantiate tests from CharTest
-// each with parameter values 'a' and 'b':
-//
-// ::std::list<char> GetParameterChars() {
-//   ::std::list<char> list;
-//   list.push_back('a');
-//   list.push_back('b');
-//   return list;
-// }
-// ::std::list<char> l = GetParameterChars();
-// INSTANTIATE_TEST_SUITE_P(CharSequence2,
-//                          CharTest,
-//                          ValuesIn(l.begin(), l.end()));
-//
-template <typename ForwardIterator>
-internal::ParamGenerator<
-    typename std::iterator_traits<ForwardIterator>::value_type>
-ValuesIn(ForwardIterator begin, ForwardIterator end) {
-  typedef typename std::iterator_traits<ForwardIterator>::value_type ParamType;
-  return internal::ParamGenerator<ParamType>(
-      new internal::ValuesInIteratorRangeGenerator<ParamType>(begin, end));
-}
-
-template <typename T, size_t N>
-internal::ParamGenerator<T> ValuesIn(const T (&array)[N]) {
-  return ValuesIn(array, array + N);
-}
-
-template <class Container>
-internal::ParamGenerator<typename Container::value_type> ValuesIn(
-    const Container& container) {
-  return ValuesIn(container.begin(), container.end());
-}
-
-// Values() allows generating tests from explicitly specified list of
-// parameters.
-//
-// Synopsis:
-// Values(T v1, T v2, ..., T vN)
-//   - returns a generator producing sequences with elements v1, v2, ..., vN.
-//
-// For example, this instantiates tests from test suite BarTest each
-// with values "one", "two", and "three":
-//
-// INSTANTIATE_TEST_SUITE_P(NumSequence,
-//                          BarTest,
-//                          Values("one", "two", "three"));
-//
-// This instantiates tests from test suite BazTest each with values 1, 2, 3.5.
-// The exact type of values will depend on the type of parameter in BazTest.
-//
-// INSTANTIATE_TEST_SUITE_P(FloatingNumbers, BazTest, Values(1, 2, 3.5));
-//
-//
-template <typename... T>
-internal::ValueArray<T...> Values(T... v) {
-  return internal::ValueArray<T...>(std::move(v)...);
-}
-
-// Bool() allows generating tests with parameters in a set of (false, true).
-//
-// Synopsis:
-// Bool()
-//   - returns a generator producing sequences with elements {false, true}.
-//
-// It is useful when testing code that depends on Boolean flags. Combinations
-// of multiple flags can be tested when several Bool()'s are combined using
-// Combine() function.
-//
-// In the following example all tests in the test suite FlagDependentTest
-// will be instantiated twice with parameters false and true.
-//
-// class FlagDependentTest : public testing::TestWithParam<bool> {
-//   virtual void SetUp() {
-//     external_flag = GetParam();
-//   }
-// }
-// INSTANTIATE_TEST_SUITE_P(BoolSequence, FlagDependentTest, Bool());
-//
-inline internal::ParamGenerator<bool> Bool() { return Values(false, true); }
-
-// Combine() allows the user to combine two or more sequences to produce
-// values of a Cartesian product of those sequences' elements.
-//
-// Synopsis:
-// Combine(gen1, gen2, ..., genN)
-//   - returns a generator producing sequences with elements coming from
-//     the Cartesian product of elements from the sequences generated by
-//     gen1, gen2, ..., genN. The sequence elements will have a type of
-//     std::tuple<T1, T2, ..., TN> where T1, T2, ..., TN are the types
-//     of elements from sequences produces by gen1, gen2, ..., genN.
-//
-// Example:
-//
-// This will instantiate tests in test suite AnimalTest each one with
-// the parameter values tuple("cat", BLACK), tuple("cat", WHITE),
-// tuple("dog", BLACK), and tuple("dog", WHITE):
-//
-// enum Color { BLACK, GRAY, WHITE };
-// class AnimalTest
-//     : public testing::TestWithParam<std::tuple<const char*, Color> > {...};
-//
-// TEST_P(AnimalTest, AnimalLooksNice) {...}
-//
-// INSTANTIATE_TEST_SUITE_P(AnimalVariations, AnimalTest,
-//                          Combine(Values("cat", "dog"),
-//                                  Values(BLACK, WHITE)));
-//
-// This will instantiate tests in FlagDependentTest with all variations of two
-// Boolean flags:
-//
-// class FlagDependentTest
-//     : public testing::TestWithParam<std::tuple<bool, bool> > {
-//   virtual void SetUp() {
-//     // Assigns external_flag_1 and external_flag_2 values from the tuple.
-//     std::tie(external_flag_1, external_flag_2) = GetParam();
-//   }
-// };
-//
-// TEST_P(FlagDependentTest, TestFeature1) {
-//   // Test your code using external_flag_1 and external_flag_2 here.
-// }
-// INSTANTIATE_TEST_SUITE_P(TwoBoolSequence, FlagDependentTest,
-//                          Combine(Bool(), Bool()));
-//
-template <typename... Generator>
-internal::CartesianProductHolder<Generator...> Combine(const Generator&... g) {
-  return internal::CartesianProductHolder<Generator...>(g...);
-}
-
-// ConvertGenerator() wraps a parameter generator in order to cast each produced
-// value through a known type before supplying it to the test suite
-//
-// Synopsis:
-// ConvertGenerator<T>(gen)
-//   - returns a generator producing the same elements as generated by gen, but
-//     each element is static_cast to type T before being returned
-//
-// It is useful when using the Combine() function to get the generated
-// parameters in a custom type instead of std::tuple
-//
-// Example:
-//
-// This will instantiate tests in test suite AnimalTest each one with
-// the parameter values tuple("cat", BLACK), tuple("cat", WHITE),
-// tuple("dog", BLACK), and tuple("dog", WHITE):
-//
-// enum Color { BLACK, GRAY, WHITE };
-// struct ParamType {
-//   using TupleT = std::tuple<const char*, Color>;
-//   std::string animal;
-//   Color color;
-//   ParamType(TupleT t) : animal(std::get<0>(t)), color(std::get<1>(t)) {}
-// };
-// class AnimalTest
-//     : public testing::TestWithParam<ParamType> {...};
-//
-// TEST_P(AnimalTest, AnimalLooksNice) {...}
-//
-// INSTANTIATE_TEST_SUITE_P(AnimalVariations, AnimalTest,
-//                          ConvertGenerator<ParamType::TupleT>(
-//                              Combine(Values("cat", "dog"),
-//                                      Values(BLACK, WHITE))));
-//
-template <typename T>
-internal::ParamConverterGenerator<T> ConvertGenerator(
-    internal::ParamGenerator<T> gen) {
-  return internal::ParamConverterGenerator<T>(gen);
-}
-
-#define TEST_P(test_suite_name, test_name)                                     \
-  class GTEST_TEST_CLASS_NAME_(test_suite_name, test_name)                     \
-      : public test_suite_name, private ::testing::internal::GTestNonCopyable {\
-   public:                                                                     \
-    GTEST_TEST_CLASS_NAME_(test_suite_name, test_name)() {}                    \
-    void TestBody() override;                                                  \
-                                                                               \
-   private:                                                                    \
-    static int AddToRegistry() {                                               \
-      ::testing::UnitTest::GetInstance()                                       \
-          ->parameterized_test_registry()                                      \
-          .GetTestSuitePatternHolder<test_suite_name>(                         \
-              GTEST_STRINGIFY_(test_suite_name),                               \
-              ::testing::internal::CodeLocation(__FILE__, __LINE__))           \
-          ->AddTestPattern(                                                    \
-              GTEST_STRINGIFY_(test_suite_name), GTEST_STRINGIFY_(test_name),  \
-              new ::testing::internal::TestMetaFactory<GTEST_TEST_CLASS_NAME_( \
-                  test_suite_name, test_name)>(),                              \
-              ::testing::internal::CodeLocation(__FILE__, __LINE__));          \
-      return 0;                                                                \
-    }                                                                          \
-    static int gtest_registering_dummy_ GTEST_ATTRIBUTE_UNUSED_;               \
-  };                                                                           \
-  int GTEST_TEST_CLASS_NAME_(test_suite_name,                                  \
-                             test_name)::gtest_registering_dummy_ =            \
-      GTEST_TEST_CLASS_NAME_(test_suite_name, test_name)::AddToRegistry();     \
-  void GTEST_TEST_CLASS_NAME_(test_suite_name, test_name)::TestBody()
-
-// The last argument to INSTANTIATE_TEST_SUITE_P allows the user to specify
-// generator and an optional function or functor that generates custom test name
-// suffixes based on the test parameters. Such a function or functor should
-// accept one argument of type testing::TestParamInfo<class ParamType>, and
-// return std::string.
-//
-// testing::PrintToStringParamName is a builtin test suffix generator that
-// returns the value of testing::PrintToString(GetParam()).
-//
-// Note: test names must be non-empty, unique, and may only contain ASCII
-// alphanumeric characters or underscore. Because PrintToString adds quotes
-// to std::string and C strings, it won't work for these types.
-
-#define GTEST_EXPAND_(arg) arg
-#define GTEST_GET_FIRST_(first, ...) first
-#define GTEST_GET_SECOND_(first, second, ...) second
-
-#define INSTANTIATE_TEST_SUITE_P(prefix, test_suite_name, ...)               \
-  static ::testing::internal::ParamGenerator<test_suite_name::ParamType>     \
-      gtest_##prefix##test_suite_name##_EvalGenerator_() {                   \
-    return GTEST_EXPAND_(GTEST_GET_FIRST_(__VA_ARGS__, DUMMY_PARAM_));       \
-  }                                                                          \
-  static ::std::string gtest_##prefix##test_suite_name##_EvalGenerateName_(  \
-      const ::testing::TestParamInfo<test_suite_name::ParamType>& info) {    \
-    if (::testing::internal::AlwaysFalse()) {                                \
-      ::testing::internal::TestNotEmpty(GTEST_EXPAND_(GTEST_GET_SECOND_(     \
-          __VA_ARGS__,                                                       \
-          ::testing::internal::DefaultParamName<test_suite_name::ParamType>, \
-          DUMMY_PARAM_)));                                                   \
-      auto t = std::make_tuple(__VA_ARGS__);                                 \
-      static_assert(std::tuple_size<decltype(t)>::value <= 2,                \
-                    "Too Many Args!");                                       \
-    }                                                                        \
-    return ((GTEST_EXPAND_(GTEST_GET_SECOND_(                                \
-        __VA_ARGS__,                                                         \
-        ::testing::internal::DefaultParamName<test_suite_name::ParamType>,   \
-        DUMMY_PARAM_))))(info);                                              \
-  }                                                                          \
-  static int gtest_##prefix##test_suite_name##_dummy_                        \
-      GTEST_ATTRIBUTE_UNUSED_ =                                              \
-          ::testing::UnitTest::GetInstance()                                 \
-              ->parameterized_test_registry()                                \
-              .GetTestSuitePatternHolder<test_suite_name>(                   \
-                  GTEST_STRINGIFY_(test_suite_name),                         \
-                  ::testing::internal::CodeLocation(__FILE__, __LINE__))     \
-              ->AddTestSuiteInstantiation(                                   \
-                  GTEST_STRINGIFY_(prefix),                                  \
-                  &gtest_##prefix##test_suite_name##_EvalGenerator_,         \
-                  &gtest_##prefix##test_suite_name##_EvalGenerateName_,      \
-                  __FILE__, __LINE__)
-
-// Allow Marking a Parameterized test class as not needing to be instantiated.
-#define GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(T)                  \
-  namespace gtest_do_not_use_outside_namespace_scope {}                   \
-  static const ::testing::internal::MarkAsIgnored gtest_allow_ignore_##T( \
-      GTEST_STRINGIFY_(T))
-
-// Legacy API is deprecated but still available
-#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
-#define INSTANTIATE_TEST_CASE_P                                            \
-  static_assert(::testing::internal::InstantiateTestCase_P_IsDeprecated(), \
-                "");                                                       \
-  INSTANTIATE_TEST_SUITE_P
-#endif  // GTEST_REMOVE_LEGACY_TEST_CASEAPI_
-
-}  // namespace testing
-
-#endif  // GOOGLETEST_INCLUDE_GTEST_GTEST_PARAM_TEST_H_
diff --git a/3rdparty/googletest-1.13.0/googletest/include/gtest/gtest-printers.h b/3rdparty/googletest-1.13.0/googletest/include/gtest/gtest-printers.h
deleted file mode 100644
index 0055e37ffa7c96edeb856222462a2d3465682ae0..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/include/gtest/gtest-printers.h
+++ /dev/null
@@ -1,1131 +0,0 @@
-// Copyright 2007, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// Google Test - The Google C++ Testing and Mocking Framework
-//
-// This file implements a universal value printer that can print a
-// value of any type T:
-//
-//   void ::testing::internal::UniversalPrinter<T>::Print(value, ostream_ptr);
-//
-// A user can teach this function how to print a class type T by
-// defining either operator<<() or PrintTo() in the namespace that
-// defines T.  More specifically, the FIRST defined function in the
-// following list will be used (assuming T is defined in namespace
-// foo):
-//
-//   1. foo::PrintTo(const T&, ostream*)
-//   2. operator<<(ostream&, const T&) defined in either foo or the
-//      global namespace.
-//
-// However if T is an STL-style container then it is printed element-wise
-// unless foo::PrintTo(const T&, ostream*) is defined. Note that
-// operator<<() is ignored for container types.
-//
-// If none of the above is defined, it will print the debug string of
-// the value if it is a protocol buffer, or print the raw bytes in the
-// value otherwise.
-//
-// To aid debugging: when T is a reference type, the address of the
-// value is also printed; when T is a (const) char pointer, both the
-// pointer value and the NUL-terminated string it points to are
-// printed.
-//
-// We also provide some convenient wrappers:
-//
-//   // Prints a value to a string.  For a (const or not) char
-//   // pointer, the NUL-terminated string (but not the pointer) is
-//   // printed.
-//   std::string ::testing::PrintToString(const T& value);
-//
-//   // Prints a value tersely: for a reference type, the referenced
-//   // value (but not the address) is printed; for a (const or not) char
-//   // pointer, the NUL-terminated string (but not the pointer) is
-//   // printed.
-//   void ::testing::internal::UniversalTersePrint(const T& value, ostream*);
-//
-//   // Prints value using the type inferred by the compiler.  The difference
-//   // from UniversalTersePrint() is that this function prints both the
-//   // pointer and the NUL-terminated string for a (const or not) char pointer.
-//   void ::testing::internal::UniversalPrint(const T& value, ostream*);
-//
-//   // Prints the fields of a tuple tersely to a string vector, one
-//   // element for each field. Tuple support must be enabled in
-//   // gtest-port.h.
-//   std::vector<string> UniversalTersePrintTupleFieldsToStrings(
-//       const Tuple& value);
-//
-// Known limitation:
-//
-// The print primitives print the elements of an STL-style container
-// using the compiler-inferred type of *iter where iter is a
-// const_iterator of the container.  When const_iterator is an input
-// iterator but not a forward iterator, this inferred type may not
-// match value_type, and the print output may be incorrect.  In
-// practice, this is rarely a problem as for most containers
-// const_iterator is a forward iterator.  We'll fix this if there's an
-// actual need for it.  Note that this fix cannot rely on value_type
-// being defined as many user-defined container types don't have
-// value_type.
-
-// IWYU pragma: private, include "gtest/gtest.h"
-// IWYU pragma: friend gtest/.*
-// IWYU pragma: friend gmock/.*
-
-#ifndef GOOGLETEST_INCLUDE_GTEST_GTEST_PRINTERS_H_
-#define GOOGLETEST_INCLUDE_GTEST_GTEST_PRINTERS_H_
-
-#include <functional>
-#include <memory>
-#include <ostream>  // NOLINT
-#include <sstream>
-#include <string>
-#include <tuple>
-#include <type_traits>
-#include <typeinfo>
-#include <utility>
-#include <vector>
-
-#include "gtest/internal/gtest-internal.h"
-#include "gtest/internal/gtest-port.h"
-
-namespace testing {
-
-// Definitions in the internal* namespaces are subject to change without notice.
-// DO NOT USE THEM IN USER CODE!
-namespace internal {
-
-template <typename T>
-void UniversalPrint(const T& value, ::std::ostream* os);
-
-// Used to print an STL-style container when the user doesn't define
-// a PrintTo() for it.
-struct ContainerPrinter {
-  template <typename T,
-            typename = typename std::enable_if<
-                (sizeof(IsContainerTest<T>(0)) == sizeof(IsContainer)) &&
-                !IsRecursiveContainer<T>::value>::type>
-  static void PrintValue(const T& container, std::ostream* os) {
-    const size_t kMaxCount = 32;  // The maximum number of elements to print.
-    *os << '{';
-    size_t count = 0;
-    for (auto&& elem : container) {
-      if (count > 0) {
-        *os << ',';
-        if (count == kMaxCount) {  // Enough has been printed.
-          *os << " ...";
-          break;
-        }
-      }
-      *os << ' ';
-      // We cannot call PrintTo(elem, os) here as PrintTo() doesn't
-      // handle `elem` being a native array.
-      internal::UniversalPrint(elem, os);
-      ++count;
-    }
-
-    if (count > 0) {
-      *os << ' ';
-    }
-    *os << '}';
-  }
-};
-
-// Used to print a pointer that is neither a char pointer nor a member
-// pointer, when the user doesn't define PrintTo() for it.  (A member
-// variable pointer or member function pointer doesn't really point to
-// a location in the address space.  Their representation is
-// implementation-defined.  Therefore they will be printed as raw
-// bytes.)
-struct FunctionPointerPrinter {
-  template <typename T, typename = typename std::enable_if<
-                            std::is_function<T>::value>::type>
-  static void PrintValue(T* p, ::std::ostream* os) {
-    if (p == nullptr) {
-      *os << "NULL";
-    } else {
-      // T is a function type, so '*os << p' doesn't do what we want
-      // (it just prints p as bool).  We want to print p as a const
-      // void*.
-      *os << reinterpret_cast<const void*>(p);
-    }
-  }
-};
-
-struct PointerPrinter {
-  template <typename T>
-  static void PrintValue(T* p, ::std::ostream* os) {
-    if (p == nullptr) {
-      *os << "NULL";
-    } else {
-      // T is not a function type.  We just call << to print p,
-      // relying on ADL to pick up user-defined << for their pointer
-      // types, if any.
-      *os << p;
-    }
-  }
-};
-
-namespace internal_stream_operator_without_lexical_name_lookup {
-
-// The presence of an operator<< here will terminate lexical scope lookup
-// straight away (even though it cannot be a match because of its argument
-// types). Thus, the two operator<< calls in StreamPrinter will find only ADL
-// candidates.
-struct LookupBlocker {};
-void operator<<(LookupBlocker, LookupBlocker);
-
-struct StreamPrinter {
-  template <typename T,
-            // Don't accept member pointers here. We'd print them via implicit
-            // conversion to bool, which isn't useful.
-            typename = typename std::enable_if<
-                !std::is_member_pointer<T>::value>::type,
-            // Only accept types for which we can find a streaming operator via
-            // ADL (possibly involving implicit conversions).
-            typename = decltype(std::declval<std::ostream&>()
-                                << std::declval<const T&>())>
-  static void PrintValue(const T& value, ::std::ostream* os) {
-    // Call streaming operator found by ADL, possibly with implicit conversions
-    // of the arguments.
-    *os << value;
-  }
-};
-
-}  // namespace internal_stream_operator_without_lexical_name_lookup
-
-struct ProtobufPrinter {
-  // We print a protobuf using its ShortDebugString() when the string
-  // doesn't exceed this many characters; otherwise we print it using
-  // DebugString() for better readability.
-  static const size_t kProtobufOneLinerMaxLength = 50;
-
-  template <typename T,
-            typename = typename std::enable_if<
-                internal::HasDebugStringAndShortDebugString<T>::value>::type>
-  static void PrintValue(const T& value, ::std::ostream* os) {
-    std::string pretty_str = value.ShortDebugString();
-    if (pretty_str.length() > kProtobufOneLinerMaxLength) {
-      pretty_str = "\n" + value.DebugString();
-    }
-    *os << ("<" + pretty_str + ">");
-  }
-};
-
-struct ConvertibleToIntegerPrinter {
-  // Since T has no << operator or PrintTo() but can be implicitly
-  // converted to BiggestInt, we print it as a BiggestInt.
-  //
-  // Most likely T is an enum type (either named or unnamed), in which
-  // case printing it as an integer is the desired behavior.  In case
-  // T is not an enum, printing it as an integer is the best we can do
-  // given that it has no user-defined printer.
-  static void PrintValue(internal::BiggestInt value, ::std::ostream* os) {
-    *os << value;
-  }
-};
-
-struct ConvertibleToStringViewPrinter {
-#if GTEST_INTERNAL_HAS_STRING_VIEW
-  static void PrintValue(internal::StringView value, ::std::ostream* os) {
-    internal::UniversalPrint(value, os);
-  }
-#endif
-};
-
-// Prints the given number of bytes in the given object to the given
-// ostream.
-GTEST_API_ void PrintBytesInObjectTo(const unsigned char* obj_bytes,
-                                     size_t count, ::std::ostream* os);
-struct RawBytesPrinter {
-  // SFINAE on `sizeof` to make sure we have a complete type.
-  template <typename T, size_t = sizeof(T)>
-  static void PrintValue(const T& value, ::std::ostream* os) {
-    PrintBytesInObjectTo(
-        static_cast<const unsigned char*>(
-            // Load bearing cast to void* to support iOS
-            reinterpret_cast<const void*>(std::addressof(value))),
-        sizeof(value), os);
-  }
-};
-
-struct FallbackPrinter {
-  template <typename T>
-  static void PrintValue(const T&, ::std::ostream* os) {
-    *os << "(incomplete type)";
-  }
-};
-
-// Try every printer in order and return the first one that works.
-template <typename T, typename E, typename Printer, typename... Printers>
-struct FindFirstPrinter : FindFirstPrinter<T, E, Printers...> {};
-
-template <typename T, typename Printer, typename... Printers>
-struct FindFirstPrinter<
-    T, decltype(Printer::PrintValue(std::declval<const T&>(), nullptr)),
-    Printer, Printers...> {
-  using type = Printer;
-};
-
-// Select the best printer in the following order:
-//  - Print containers (they have begin/end/etc).
-//  - Print function pointers.
-//  - Print object pointers.
-//  - Use the stream operator, if available.
-//  - Print protocol buffers.
-//  - Print types convertible to BiggestInt.
-//  - Print types convertible to StringView, if available.
-//  - Fallback to printing the raw bytes of the object.
-template <typename T>
-void PrintWithFallback(const T& value, ::std::ostream* os) {
-  using Printer = typename FindFirstPrinter<
-      T, void, ContainerPrinter, FunctionPointerPrinter, PointerPrinter,
-      internal_stream_operator_without_lexical_name_lookup::StreamPrinter,
-      ProtobufPrinter, ConvertibleToIntegerPrinter,
-      ConvertibleToStringViewPrinter, RawBytesPrinter, FallbackPrinter>::type;
-  Printer::PrintValue(value, os);
-}
-
-// FormatForComparison<ToPrint, OtherOperand>::Format(value) formats a
-// value of type ToPrint that is an operand of a comparison assertion
-// (e.g. ASSERT_EQ).  OtherOperand is the type of the other operand in
-// the comparison, and is used to help determine the best way to
-// format the value.  In particular, when the value is a C string
-// (char pointer) and the other operand is an STL string object, we
-// want to format the C string as a string, since we know it is
-// compared by value with the string object.  If the value is a char
-// pointer but the other operand is not an STL string object, we don't
-// know whether the pointer is supposed to point to a NUL-terminated
-// string, and thus want to print it as a pointer to be safe.
-//
-// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
-
-// The default case.
-template <typename ToPrint, typename OtherOperand>
-class FormatForComparison {
- public:
-  static ::std::string Format(const ToPrint& value) {
-    return ::testing::PrintToString(value);
-  }
-};
-
-// Array.
-template <typename ToPrint, size_t N, typename OtherOperand>
-class FormatForComparison<ToPrint[N], OtherOperand> {
- public:
-  static ::std::string Format(const ToPrint* value) {
-    return FormatForComparison<const ToPrint*, OtherOperand>::Format(value);
-  }
-};
-
-// By default, print C string as pointers to be safe, as we don't know
-// whether they actually point to a NUL-terminated string.
-
-#define GTEST_IMPL_FORMAT_C_STRING_AS_POINTER_(CharType)                \
-  template <typename OtherOperand>                                      \
-  class FormatForComparison<CharType*, OtherOperand> {                  \
-   public:                                                              \
-    static ::std::string Format(CharType* value) {                      \
-      return ::testing::PrintToString(static_cast<const void*>(value)); \
-    }                                                                   \
-  }
-
-GTEST_IMPL_FORMAT_C_STRING_AS_POINTER_(char);
-GTEST_IMPL_FORMAT_C_STRING_AS_POINTER_(const char);
-GTEST_IMPL_FORMAT_C_STRING_AS_POINTER_(wchar_t);
-GTEST_IMPL_FORMAT_C_STRING_AS_POINTER_(const wchar_t);
-#ifdef __cpp_lib_char8_t
-GTEST_IMPL_FORMAT_C_STRING_AS_POINTER_(char8_t);
-GTEST_IMPL_FORMAT_C_STRING_AS_POINTER_(const char8_t);
-#endif
-GTEST_IMPL_FORMAT_C_STRING_AS_POINTER_(char16_t);
-GTEST_IMPL_FORMAT_C_STRING_AS_POINTER_(const char16_t);
-GTEST_IMPL_FORMAT_C_STRING_AS_POINTER_(char32_t);
-GTEST_IMPL_FORMAT_C_STRING_AS_POINTER_(const char32_t);
-
-#undef GTEST_IMPL_FORMAT_C_STRING_AS_POINTER_
-
-// If a C string is compared with an STL string object, we know it's meant
-// to point to a NUL-terminated string, and thus can print it as a string.
-
-#define GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(CharType, OtherStringType) \
-  template <>                                                            \
-  class FormatForComparison<CharType*, OtherStringType> {                \
-   public:                                                               \
-    static ::std::string Format(CharType* value) {                       \
-      return ::testing::PrintToString(value);                            \
-    }                                                                    \
-  }
-
-GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(char, ::std::string);
-GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(const char, ::std::string);
-#ifdef __cpp_lib_char8_t
-GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(char8_t, ::std::u8string);
-GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(const char8_t, ::std::u8string);
-#endif
-GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(char16_t, ::std::u16string);
-GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(const char16_t, ::std::u16string);
-GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(char32_t, ::std::u32string);
-GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(const char32_t, ::std::u32string);
-
-#if GTEST_HAS_STD_WSTRING
-GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(wchar_t, ::std::wstring);
-GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(const wchar_t, ::std::wstring);
-#endif
-
-#undef GTEST_IMPL_FORMAT_C_STRING_AS_STRING_
-
-// Formats a comparison assertion (e.g. ASSERT_EQ, EXPECT_LT, and etc)
-// operand to be used in a failure message.  The type (but not value)
-// of the other operand may affect the format.  This allows us to
-// print a char* as a raw pointer when it is compared against another
-// char* or void*, and print it as a C string when it is compared
-// against an std::string object, for example.
-//
-// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
-template <typename T1, typename T2>
-std::string FormatForComparisonFailureMessage(const T1& value,
-                                              const T2& /* other_operand */) {
-  return FormatForComparison<T1, T2>::Format(value);
-}
-
-// UniversalPrinter<T>::Print(value, ostream_ptr) prints the given
-// value to the given ostream.  The caller must ensure that
-// 'ostream_ptr' is not NULL, or the behavior is undefined.
-//
-// We define UniversalPrinter as a class template (as opposed to a
-// function template), as we need to partially specialize it for
-// reference types, which cannot be done with function templates.
-template <typename T>
-class UniversalPrinter;
-
-// Prints the given value using the << operator if it has one;
-// otherwise prints the bytes in it.  This is what
-// UniversalPrinter<T>::Print() does when PrintTo() is not specialized
-// or overloaded for type T.
-//
-// A user can override this behavior for a class type Foo by defining
-// an overload of PrintTo() in the namespace where Foo is defined.  We
-// give the user this option as sometimes defining a << operator for
-// Foo is not desirable (e.g. the coding style may prevent doing it,
-// or there is already a << operator but it doesn't do what the user
-// wants).
-template <typename T>
-void PrintTo(const T& value, ::std::ostream* os) {
-  internal::PrintWithFallback(value, os);
-}
-
-// The following list of PrintTo() overloads tells
-// UniversalPrinter<T>::Print() how to print standard types (built-in
-// types, strings, plain arrays, and pointers).
-
-// Overloads for various char types.
-GTEST_API_ void PrintTo(unsigned char c, ::std::ostream* os);
-GTEST_API_ void PrintTo(signed char c, ::std::ostream* os);
-inline void PrintTo(char c, ::std::ostream* os) {
-  // When printing a plain char, we always treat it as unsigned.  This
-  // way, the output won't be affected by whether the compiler thinks
-  // char is signed or not.
-  PrintTo(static_cast<unsigned char>(c), os);
-}
-
-// Overloads for other simple built-in types.
-inline void PrintTo(bool x, ::std::ostream* os) {
-  *os << (x ? "true" : "false");
-}
-
-// Overload for wchar_t type.
-// Prints a wchar_t as a symbol if it is printable or as its internal
-// code otherwise and also as its decimal code (except for L'\0').
-// The L'\0' char is printed as "L'\\0'". The decimal code is printed
-// as signed integer when wchar_t is implemented by the compiler
-// as a signed type and is printed as an unsigned integer when wchar_t
-// is implemented as an unsigned type.
-GTEST_API_ void PrintTo(wchar_t wc, ::std::ostream* os);
-
-GTEST_API_ void PrintTo(char32_t c, ::std::ostream* os);
-inline void PrintTo(char16_t c, ::std::ostream* os) {
-  PrintTo(ImplicitCast_<char32_t>(c), os);
-}
-#ifdef __cpp_char8_t
-inline void PrintTo(char8_t c, ::std::ostream* os) {
-  PrintTo(ImplicitCast_<char32_t>(c), os);
-}
-#endif
-
-// gcc/clang __{u,}int128_t
-#if defined(__SIZEOF_INT128__)
-GTEST_API_ void PrintTo(__uint128_t v, ::std::ostream* os);
-GTEST_API_ void PrintTo(__int128_t v, ::std::ostream* os);
-#endif  // __SIZEOF_INT128__
-
-// The default resolution used to print floating-point values uses only
-// 6 digits, which can be confusing if a test compares two values whose
-// difference lies in the 7th digit.  So we'd like to print out numbers
-// in full precision.
-// However if the value is something simple like 1.1, full will print a
-// long string like 1.100000001 due to floating-point numbers not using
-// a base of 10.  This routiune returns an appropriate resolution for a
-// given floating-point number, that is, 6 if it will be accurate, or a
-// max_digits10 value (full precision) if it won't,  for values between
-// 0.0001 and one million.
-// It does this by computing what those digits would be (by multiplying
-// by an appropriate power of 10), then dividing by that power again to
-// see if gets the original value back.
-// A similar algorithm applies for values larger than one million; note
-// that for those values, we must divide to get a six-digit number, and
-// then multiply to possibly get the original value again.
-template <typename FloatType>
-int AppropriateResolution(FloatType val) {
-  int full = std::numeric_limits<FloatType>::max_digits10;
-  if (val < 0) val = -val;
-
-  if (val < 1000000) {
-    FloatType mulfor6 = 1e10;
-    if (val >= 100000.0) {  // 100,000 to 999,999
-      mulfor6 = 1.0;
-    } else if (val >= 10000.0) {
-      mulfor6 = 1e1;
-    } else if (val >= 1000.0) {
-      mulfor6 = 1e2;
-    } else if (val >= 100.0) {
-      mulfor6 = 1e3;
-    } else if (val >= 10.0) {
-      mulfor6 = 1e4;
-    } else if (val >= 1.0) {
-      mulfor6 = 1e5;
-    } else if (val >= 0.1) {
-      mulfor6 = 1e6;
-    } else if (val >= 0.01) {
-      mulfor6 = 1e7;
-    } else if (val >= 0.001) {
-      mulfor6 = 1e8;
-    } else if (val >= 0.0001) {
-      mulfor6 = 1e9;
-    }
-    if (static_cast<int32_t>(val * mulfor6 + 0.5) / mulfor6 == val) return 6;
-  } else if (val < 1e10) {
-    FloatType divfor6 = 1.0;
-    if (val >= 1e9) {  // 1,000,000,000 to 9,999,999,999
-      divfor6 = 10000;
-    } else if (val >= 1e8) {  // 100,000,000 to 999,999,999
-      divfor6 = 1000;
-    } else if (val >= 1e7) {  // 10,000,000 to 99,999,999
-      divfor6 = 100;
-    } else if (val >= 1e6) {  // 1,000,000 to 9,999,999
-      divfor6 = 10;
-    }
-    if (static_cast<int32_t>(val / divfor6 + 0.5) * divfor6 == val) return 6;
-  }
-  return full;
-}
-
-inline void PrintTo(float f, ::std::ostream* os) {
-  auto old_precision = os->precision();
-  os->precision(AppropriateResolution(f));
-  *os << f;
-  os->precision(old_precision);
-}
-
-inline void PrintTo(double d, ::std::ostream* os) {
-  auto old_precision = os->precision();
-  os->precision(AppropriateResolution(d));
-  *os << d;
-  os->precision(old_precision);
-}
-
-// Overloads for C strings.
-GTEST_API_ void PrintTo(const char* s, ::std::ostream* os);
-inline void PrintTo(char* s, ::std::ostream* os) {
-  PrintTo(ImplicitCast_<const char*>(s), os);
-}
-
-// signed/unsigned char is often used for representing binary data, so
-// we print pointers to it as void* to be safe.
-inline void PrintTo(const signed char* s, ::std::ostream* os) {
-  PrintTo(ImplicitCast_<const void*>(s), os);
-}
-inline void PrintTo(signed char* s, ::std::ostream* os) {
-  PrintTo(ImplicitCast_<const void*>(s), os);
-}
-inline void PrintTo(const unsigned char* s, ::std::ostream* os) {
-  PrintTo(ImplicitCast_<const void*>(s), os);
-}
-inline void PrintTo(unsigned char* s, ::std::ostream* os) {
-  PrintTo(ImplicitCast_<const void*>(s), os);
-}
-#ifdef __cpp_char8_t
-// Overloads for u8 strings.
-GTEST_API_ void PrintTo(const char8_t* s, ::std::ostream* os);
-inline void PrintTo(char8_t* s, ::std::ostream* os) {
-  PrintTo(ImplicitCast_<const char8_t*>(s), os);
-}
-#endif
-// Overloads for u16 strings.
-GTEST_API_ void PrintTo(const char16_t* s, ::std::ostream* os);
-inline void PrintTo(char16_t* s, ::std::ostream* os) {
-  PrintTo(ImplicitCast_<const char16_t*>(s), os);
-}
-// Overloads for u32 strings.
-GTEST_API_ void PrintTo(const char32_t* s, ::std::ostream* os);
-inline void PrintTo(char32_t* s, ::std::ostream* os) {
-  PrintTo(ImplicitCast_<const char32_t*>(s), os);
-}
-
-// MSVC can be configured to define wchar_t as a typedef of unsigned
-// short.  It defines _NATIVE_WCHAR_T_DEFINED when wchar_t is a native
-// type.  When wchar_t is a typedef, defining an overload for const
-// wchar_t* would cause unsigned short* be printed as a wide string,
-// possibly causing invalid memory accesses.
-#if !defined(_MSC_VER) || defined(_NATIVE_WCHAR_T_DEFINED)
-// Overloads for wide C strings
-GTEST_API_ void PrintTo(const wchar_t* s, ::std::ostream* os);
-inline void PrintTo(wchar_t* s, ::std::ostream* os) {
-  PrintTo(ImplicitCast_<const wchar_t*>(s), os);
-}
-#endif
-
-// Overload for C arrays.  Multi-dimensional arrays are printed
-// properly.
-
-// Prints the given number of elements in an array, without printing
-// the curly braces.
-template <typename T>
-void PrintRawArrayTo(const T a[], size_t count, ::std::ostream* os) {
-  UniversalPrint(a[0], os);
-  for (size_t i = 1; i != count; i++) {
-    *os << ", ";
-    UniversalPrint(a[i], os);
-  }
-}
-
-// Overloads for ::std::string.
-GTEST_API_ void PrintStringTo(const ::std::string& s, ::std::ostream* os);
-inline void PrintTo(const ::std::string& s, ::std::ostream* os) {
-  PrintStringTo(s, os);
-}
-
-// Overloads for ::std::u8string
-#ifdef __cpp_lib_char8_t
-GTEST_API_ void PrintU8StringTo(const ::std::u8string& s, ::std::ostream* os);
-inline void PrintTo(const ::std::u8string& s, ::std::ostream* os) {
-  PrintU8StringTo(s, os);
-}
-#endif
-
-// Overloads for ::std::u16string
-GTEST_API_ void PrintU16StringTo(const ::std::u16string& s, ::std::ostream* os);
-inline void PrintTo(const ::std::u16string& s, ::std::ostream* os) {
-  PrintU16StringTo(s, os);
-}
-
-// Overloads for ::std::u32string
-GTEST_API_ void PrintU32StringTo(const ::std::u32string& s, ::std::ostream* os);
-inline void PrintTo(const ::std::u32string& s, ::std::ostream* os) {
-  PrintU32StringTo(s, os);
-}
-
-// Overloads for ::std::wstring.
-#if GTEST_HAS_STD_WSTRING
-GTEST_API_ void PrintWideStringTo(const ::std::wstring& s, ::std::ostream* os);
-inline void PrintTo(const ::std::wstring& s, ::std::ostream* os) {
-  PrintWideStringTo(s, os);
-}
-#endif  // GTEST_HAS_STD_WSTRING
-
-#if GTEST_INTERNAL_HAS_STRING_VIEW
-// Overload for internal::StringView.
-inline void PrintTo(internal::StringView sp, ::std::ostream* os) {
-  PrintTo(::std::string(sp), os);
-}
-#endif  // GTEST_INTERNAL_HAS_STRING_VIEW
-
-inline void PrintTo(std::nullptr_t, ::std::ostream* os) { *os << "(nullptr)"; }
-
-#if GTEST_HAS_RTTI
-inline void PrintTo(const std::type_info& info, std::ostream* os) {
-  *os << internal::GetTypeName(info);
-}
-#endif  // GTEST_HAS_RTTI
-
-template <typename T>
-void PrintTo(std::reference_wrapper<T> ref, ::std::ostream* os) {
-  UniversalPrinter<T&>::Print(ref.get(), os);
-}
-
-inline const void* VoidifyPointer(const void* p) { return p; }
-inline const void* VoidifyPointer(volatile const void* p) {
-  return const_cast<const void*>(p);
-}
-
-template <typename T, typename Ptr>
-void PrintSmartPointer(const Ptr& ptr, std::ostream* os, char) {
-  if (ptr == nullptr) {
-    *os << "(nullptr)";
-  } else {
-    // We can't print the value. Just print the pointer..
-    *os << "(" << (VoidifyPointer)(ptr.get()) << ")";
-  }
-}
-template <typename T, typename Ptr,
-          typename = typename std::enable_if<!std::is_void<T>::value &&
-                                             !std::is_array<T>::value>::type>
-void PrintSmartPointer(const Ptr& ptr, std::ostream* os, int) {
-  if (ptr == nullptr) {
-    *os << "(nullptr)";
-  } else {
-    *os << "(ptr = " << (VoidifyPointer)(ptr.get()) << ", value = ";
-    UniversalPrinter<T>::Print(*ptr, os);
-    *os << ")";
-  }
-}
-
-template <typename T, typename D>
-void PrintTo(const std::unique_ptr<T, D>& ptr, std::ostream* os) {
-  (PrintSmartPointer<T>)(ptr, os, 0);
-}
-
-template <typename T>
-void PrintTo(const std::shared_ptr<T>& ptr, std::ostream* os) {
-  (PrintSmartPointer<T>)(ptr, os, 0);
-}
-
-// Helper function for printing a tuple.  T must be instantiated with
-// a tuple type.
-template <typename T>
-void PrintTupleTo(const T&, std::integral_constant<size_t, 0>,
-                  ::std::ostream*) {}
-
-template <typename T, size_t I>
-void PrintTupleTo(const T& t, std::integral_constant<size_t, I>,
-                  ::std::ostream* os) {
-  PrintTupleTo(t, std::integral_constant<size_t, I - 1>(), os);
-  GTEST_INTENTIONAL_CONST_COND_PUSH_()
-  if (I > 1) {
-    GTEST_INTENTIONAL_CONST_COND_POP_()
-    *os << ", ";
-  }
-  UniversalPrinter<typename std::tuple_element<I - 1, T>::type>::Print(
-      std::get<I - 1>(t), os);
-}
-
-template <typename... Types>
-void PrintTo(const ::std::tuple<Types...>& t, ::std::ostream* os) {
-  *os << "(";
-  PrintTupleTo(t, std::integral_constant<size_t, sizeof...(Types)>(), os);
-  *os << ")";
-}
-
-// Overload for std::pair.
-template <typename T1, typename T2>
-void PrintTo(const ::std::pair<T1, T2>& value, ::std::ostream* os) {
-  *os << '(';
-  // We cannot use UniversalPrint(value.first, os) here, as T1 may be
-  // a reference type.  The same for printing value.second.
-  UniversalPrinter<T1>::Print(value.first, os);
-  *os << ", ";
-  UniversalPrinter<T2>::Print(value.second, os);
-  *os << ')';
-}
-
-// Implements printing a non-reference type T by letting the compiler
-// pick the right overload of PrintTo() for T.
-template <typename T>
-class UniversalPrinter {
- public:
-  // MSVC warns about adding const to a function type, so we want to
-  // disable the warning.
-  GTEST_DISABLE_MSC_WARNINGS_PUSH_(4180)
-
-  // Note: we deliberately don't call this PrintTo(), as that name
-  // conflicts with ::testing::internal::PrintTo in the body of the
-  // function.
-  static void Print(const T& value, ::std::ostream* os) {
-    // By default, ::testing::internal::PrintTo() is used for printing
-    // the value.
-    //
-    // Thanks to Koenig look-up, if T is a class and has its own
-    // PrintTo() function defined in its namespace, that function will
-    // be visible here.  Since it is more specific than the generic ones
-    // in ::testing::internal, it will be picked by the compiler in the
-    // following statement - exactly what we want.
-    PrintTo(value, os);
-  }
-
-  GTEST_DISABLE_MSC_WARNINGS_POP_()
-};
-
-// Remove any const-qualifiers before passing a type to UniversalPrinter.
-template <typename T>
-class UniversalPrinter<const T> : public UniversalPrinter<T> {};
-
-#if GTEST_INTERNAL_HAS_ANY
-
-// Printer for std::any / absl::any
-
-template <>
-class UniversalPrinter<Any> {
- public:
-  static void Print(const Any& value, ::std::ostream* os) {
-    if (value.has_value()) {
-      *os << "value of type " << GetTypeName(value);
-    } else {
-      *os << "no value";
-    }
-  }
-
- private:
-  static std::string GetTypeName(const Any& value) {
-#if GTEST_HAS_RTTI
-    return internal::GetTypeName(value.type());
-#else
-    static_cast<void>(value);  // possibly unused
-    return "<unknown_type>";
-#endif  // GTEST_HAS_RTTI
-  }
-};
-
-#endif  // GTEST_INTERNAL_HAS_ANY
-
-#if GTEST_INTERNAL_HAS_OPTIONAL
-
-// Printer for std::optional / absl::optional
-
-template <typename T>
-class UniversalPrinter<Optional<T>> {
- public:
-  static void Print(const Optional<T>& value, ::std::ostream* os) {
-    *os << '(';
-    if (!value) {
-      *os << "nullopt";
-    } else {
-      UniversalPrint(*value, os);
-    }
-    *os << ')';
-  }
-};
-
-template <>
-class UniversalPrinter<decltype(Nullopt())> {
- public:
-  static void Print(decltype(Nullopt()), ::std::ostream* os) {
-    *os << "(nullopt)";
-  }
-};
-
-#endif  // GTEST_INTERNAL_HAS_OPTIONAL
-
-#if GTEST_INTERNAL_HAS_VARIANT
-
-// Printer for std::variant / absl::variant
-
-template <typename... T>
-class UniversalPrinter<Variant<T...>> {
- public:
-  static void Print(const Variant<T...>& value, ::std::ostream* os) {
-    *os << '(';
-#if GTEST_HAS_ABSL
-    absl::visit(Visitor{os, value.index()}, value);
-#else
-    std::visit(Visitor{os, value.index()}, value);
-#endif  // GTEST_HAS_ABSL
-    *os << ')';
-  }
-
- private:
-  struct Visitor {
-    template <typename U>
-    void operator()(const U& u) const {
-      *os << "'" << GetTypeName<U>() << "(index = " << index
-          << ")' with value ";
-      UniversalPrint(u, os);
-    }
-    ::std::ostream* os;
-    std::size_t index;
-  };
-};
-
-#endif  // GTEST_INTERNAL_HAS_VARIANT
-
-// UniversalPrintArray(begin, len, os) prints an array of 'len'
-// elements, starting at address 'begin'.
-template <typename T>
-void UniversalPrintArray(const T* begin, size_t len, ::std::ostream* os) {
-  if (len == 0) {
-    *os << "{}";
-  } else {
-    *os << "{ ";
-    const size_t kThreshold = 18;
-    const size_t kChunkSize = 8;
-    // If the array has more than kThreshold elements, we'll have to
-    // omit some details by printing only the first and the last
-    // kChunkSize elements.
-    if (len <= kThreshold) {
-      PrintRawArrayTo(begin, len, os);
-    } else {
-      PrintRawArrayTo(begin, kChunkSize, os);
-      *os << ", ..., ";
-      PrintRawArrayTo(begin + len - kChunkSize, kChunkSize, os);
-    }
-    *os << " }";
-  }
-}
-// This overload prints a (const) char array compactly.
-GTEST_API_ void UniversalPrintArray(const char* begin, size_t len,
-                                    ::std::ostream* os);
-
-#ifdef __cpp_char8_t
-// This overload prints a (const) char8_t array compactly.
-GTEST_API_ void UniversalPrintArray(const char8_t* begin, size_t len,
-                                    ::std::ostream* os);
-#endif
-
-// This overload prints a (const) char16_t array compactly.
-GTEST_API_ void UniversalPrintArray(const char16_t* begin, size_t len,
-                                    ::std::ostream* os);
-
-// This overload prints a (const) char32_t array compactly.
-GTEST_API_ void UniversalPrintArray(const char32_t* begin, size_t len,
-                                    ::std::ostream* os);
-
-// This overload prints a (const) wchar_t array compactly.
-GTEST_API_ void UniversalPrintArray(const wchar_t* begin, size_t len,
-                                    ::std::ostream* os);
-
-// Implements printing an array type T[N].
-template <typename T, size_t N>
-class UniversalPrinter<T[N]> {
- public:
-  // Prints the given array, omitting some elements when there are too
-  // many.
-  static void Print(const T (&a)[N], ::std::ostream* os) {
-    UniversalPrintArray(a, N, os);
-  }
-};
-
-// Implements printing a reference type T&.
-template <typename T>
-class UniversalPrinter<T&> {
- public:
-  // MSVC warns about adding const to a function type, so we want to
-  // disable the warning.
-  GTEST_DISABLE_MSC_WARNINGS_PUSH_(4180)
-
-  static void Print(const T& value, ::std::ostream* os) {
-    // Prints the address of the value.  We use reinterpret_cast here
-    // as static_cast doesn't compile when T is a function type.
-    *os << "@" << reinterpret_cast<const void*>(&value) << " ";
-
-    // Then prints the value itself.
-    UniversalPrint(value, os);
-  }
-
-  GTEST_DISABLE_MSC_WARNINGS_POP_()
-};
-
-// Prints a value tersely: for a reference type, the referenced value
-// (but not the address) is printed; for a (const) char pointer, the
-// NUL-terminated string (but not the pointer) is printed.
-
-template <typename T>
-class UniversalTersePrinter {
- public:
-  static void Print(const T& value, ::std::ostream* os) {
-    UniversalPrint(value, os);
-  }
-};
-template <typename T>
-class UniversalTersePrinter<T&> {
- public:
-  static void Print(const T& value, ::std::ostream* os) {
-    UniversalPrint(value, os);
-  }
-};
-template <typename T>
-class UniversalTersePrinter<std::reference_wrapper<T>> {
- public:
-  static void Print(std::reference_wrapper<T> value, ::std::ostream* os) {
-    UniversalTersePrinter<T>::Print(value.get(), os);
-  }
-};
-template <typename T, size_t N>
-class UniversalTersePrinter<T[N]> {
- public:
-  static void Print(const T (&value)[N], ::std::ostream* os) {
-    UniversalPrinter<T[N]>::Print(value, os);
-  }
-};
-template <>
-class UniversalTersePrinter<const char*> {
- public:
-  static void Print(const char* str, ::std::ostream* os) {
-    if (str == nullptr) {
-      *os << "NULL";
-    } else {
-      UniversalPrint(std::string(str), os);
-    }
-  }
-};
-template <>
-class UniversalTersePrinter<char*> : public UniversalTersePrinter<const char*> {
-};
-
-#ifdef __cpp_char8_t
-template <>
-class UniversalTersePrinter<const char8_t*> {
- public:
-  static void Print(const char8_t* str, ::std::ostream* os) {
-    if (str == nullptr) {
-      *os << "NULL";
-    } else {
-      UniversalPrint(::std::u8string(str), os);
-    }
-  }
-};
-template <>
-class UniversalTersePrinter<char8_t*>
-    : public UniversalTersePrinter<const char8_t*> {};
-#endif
-
-template <>
-class UniversalTersePrinter<const char16_t*> {
- public:
-  static void Print(const char16_t* str, ::std::ostream* os) {
-    if (str == nullptr) {
-      *os << "NULL";
-    } else {
-      UniversalPrint(::std::u16string(str), os);
-    }
-  }
-};
-template <>
-class UniversalTersePrinter<char16_t*>
-    : public UniversalTersePrinter<const char16_t*> {};
-
-template <>
-class UniversalTersePrinter<const char32_t*> {
- public:
-  static void Print(const char32_t* str, ::std::ostream* os) {
-    if (str == nullptr) {
-      *os << "NULL";
-    } else {
-      UniversalPrint(::std::u32string(str), os);
-    }
-  }
-};
-template <>
-class UniversalTersePrinter<char32_t*>
-    : public UniversalTersePrinter<const char32_t*> {};
-
-#if GTEST_HAS_STD_WSTRING
-template <>
-class UniversalTersePrinter<const wchar_t*> {
- public:
-  static void Print(const wchar_t* str, ::std::ostream* os) {
-    if (str == nullptr) {
-      *os << "NULL";
-    } else {
-      UniversalPrint(::std::wstring(str), os);
-    }
-  }
-};
-#endif
-
-template <>
-class UniversalTersePrinter<wchar_t*> {
- public:
-  static void Print(wchar_t* str, ::std::ostream* os) {
-    UniversalTersePrinter<const wchar_t*>::Print(str, os);
-  }
-};
-
-template <typename T>
-void UniversalTersePrint(const T& value, ::std::ostream* os) {
-  UniversalTersePrinter<T>::Print(value, os);
-}
-
-// Prints a value using the type inferred by the compiler.  The
-// difference between this and UniversalTersePrint() is that for a
-// (const) char pointer, this prints both the pointer and the
-// NUL-terminated string.
-template <typename T>
-void UniversalPrint(const T& value, ::std::ostream* os) {
-  // A workarond for the bug in VC++ 7.1 that prevents us from instantiating
-  // UniversalPrinter with T directly.
-  typedef T T1;
-  UniversalPrinter<T1>::Print(value, os);
-}
-
-typedef ::std::vector<::std::string> Strings;
-
-// Tersely prints the first N fields of a tuple to a string vector,
-// one element for each field.
-template <typename Tuple>
-void TersePrintPrefixToStrings(const Tuple&, std::integral_constant<size_t, 0>,
-                               Strings*) {}
-template <typename Tuple, size_t I>
-void TersePrintPrefixToStrings(const Tuple& t,
-                               std::integral_constant<size_t, I>,
-                               Strings* strings) {
-  TersePrintPrefixToStrings(t, std::integral_constant<size_t, I - 1>(),
-                            strings);
-  ::std::stringstream ss;
-  UniversalTersePrint(std::get<I - 1>(t), &ss);
-  strings->push_back(ss.str());
-}
-
-// Prints the fields of a tuple tersely to a string vector, one
-// element for each field.  See the comment before
-// UniversalTersePrint() for how we define "tersely".
-template <typename Tuple>
-Strings UniversalTersePrintTupleFieldsToStrings(const Tuple& value) {
-  Strings result;
-  TersePrintPrefixToStrings(
-      value, std::integral_constant<size_t, std::tuple_size<Tuple>::value>(),
-      &result);
-  return result;
-}
-
-}  // namespace internal
-
-template <typename T>
-::std::string PrintToString(const T& value) {
-  ::std::stringstream ss;
-  internal::UniversalTersePrinter<T>::Print(value, &ss);
-  return ss.str();
-}
-
-}  // namespace testing
-
-// Include any custom printer added by the local installation.
-// We must include this header at the end to make sure it can use the
-// declarations from this file.
-#include "gtest/internal/custom/gtest-printers.h"
-
-#endif  // GOOGLETEST_INCLUDE_GTEST_GTEST_PRINTERS_H_
diff --git a/3rdparty/googletest-1.13.0/googletest/include/gtest/gtest-spi.h b/3rdparty/googletest-1.13.0/googletest/include/gtest/gtest-spi.h
deleted file mode 100644
index c0613b6959560be931c52f5d0fd8f9bdf927dcbd..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/include/gtest/gtest-spi.h
+++ /dev/null
@@ -1,250 +0,0 @@
-// Copyright 2007, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// Utilities for testing Google Test itself and code that uses Google Test
-// (e.g. frameworks built on top of Google Test).
-
-#ifndef GOOGLETEST_INCLUDE_GTEST_GTEST_SPI_H_
-#define GOOGLETEST_INCLUDE_GTEST_GTEST_SPI_H_
-
-#include <string>
-
-#include "gtest/gtest.h"
-
-GTEST_DISABLE_MSC_WARNINGS_PUSH_(4251 \
-/* class A needs to have dll-interface to be used by clients of class B */)
-
-namespace testing {
-
-// This helper class can be used to mock out Google Test failure reporting
-// so that we can test Google Test or code that builds on Google Test.
-//
-// An object of this class appends a TestPartResult object to the
-// TestPartResultArray object given in the constructor whenever a Google Test
-// failure is reported. It can either intercept only failures that are
-// generated in the same thread that created this object or it can intercept
-// all generated failures. The scope of this mock object can be controlled with
-// the second argument to the two arguments constructor.
-class GTEST_API_ ScopedFakeTestPartResultReporter
-    : public TestPartResultReporterInterface {
- public:
-  // The two possible mocking modes of this object.
-  enum InterceptMode {
-    INTERCEPT_ONLY_CURRENT_THREAD,  // Intercepts only thread local failures.
-    INTERCEPT_ALL_THREADS           // Intercepts all failures.
-  };
-
-  // The c'tor sets this object as the test part result reporter used
-  // by Google Test.  The 'result' parameter specifies where to report the
-  // results. This reporter will only catch failures generated in the current
-  // thread. DEPRECATED
-  explicit ScopedFakeTestPartResultReporter(TestPartResultArray* result);
-
-  // Same as above, but you can choose the interception scope of this object.
-  ScopedFakeTestPartResultReporter(InterceptMode intercept_mode,
-                                   TestPartResultArray* result);
-
-  // The d'tor restores the previous test part result reporter.
-  ~ScopedFakeTestPartResultReporter() override;
-
-  // Appends the TestPartResult object to the TestPartResultArray
-  // received in the constructor.
-  //
-  // This method is from the TestPartResultReporterInterface
-  // interface.
-  void ReportTestPartResult(const TestPartResult& result) override;
-
- private:
-  void Init();
-
-  const InterceptMode intercept_mode_;
-  TestPartResultReporterInterface* old_reporter_;
-  TestPartResultArray* const result_;
-
-  ScopedFakeTestPartResultReporter(const ScopedFakeTestPartResultReporter&) =
-      delete;
-  ScopedFakeTestPartResultReporter& operator=(
-      const ScopedFakeTestPartResultReporter&) = delete;
-};
-
-namespace internal {
-
-// A helper class for implementing EXPECT_FATAL_FAILURE() and
-// EXPECT_NONFATAL_FAILURE().  Its destructor verifies that the given
-// TestPartResultArray contains exactly one failure that has the given
-// type and contains the given substring.  If that's not the case, a
-// non-fatal failure will be generated.
-class GTEST_API_ SingleFailureChecker {
- public:
-  // The constructor remembers the arguments.
-  SingleFailureChecker(const TestPartResultArray* results,
-                       TestPartResult::Type type, const std::string& substr);
-  ~SingleFailureChecker();
-
- private:
-  const TestPartResultArray* const results_;
-  const TestPartResult::Type type_;
-  const std::string substr_;
-
-  SingleFailureChecker(const SingleFailureChecker&) = delete;
-  SingleFailureChecker& operator=(const SingleFailureChecker&) = delete;
-};
-
-}  // namespace internal
-
-}  // namespace testing
-
-GTEST_DISABLE_MSC_WARNINGS_POP_()  //  4251
-
-// A set of macros for testing Google Test assertions or code that's expected
-// to generate Google Test fatal failures (e.g. a failure from an ASSERT_EQ, but
-// not a non-fatal failure, as from EXPECT_EQ).  It verifies that the given
-// statement will cause exactly one fatal Google Test failure with 'substr'
-// being part of the failure message.
-//
-// There are two different versions of this macro. EXPECT_FATAL_FAILURE only
-// affects and considers failures generated in the current thread and
-// EXPECT_FATAL_FAILURE_ON_ALL_THREADS does the same but for all threads.
-//
-// The verification of the assertion is done correctly even when the statement
-// throws an exception or aborts the current function.
-//
-// Known restrictions:
-//   - 'statement' cannot reference local non-static variables or
-//     non-static members of the current object.
-//   - 'statement' cannot return a value.
-//   - You cannot stream a failure message to this macro.
-//
-// Note that even though the implementations of the following two
-// macros are much alike, we cannot refactor them to use a common
-// helper macro, due to some peculiarity in how the preprocessor
-// works.  The AcceptsMacroThatExpandsToUnprotectedComma test in
-// gtest_unittest.cc will fail to compile if we do that.
-#define EXPECT_FATAL_FAILURE(statement, substr)                               \
-  do {                                                                        \
-    class GTestExpectFatalFailureHelper {                                     \
-     public:                                                                  \
-      static void Execute() { statement; }                                    \
-    };                                                                        \
-    ::testing::TestPartResultArray gtest_failures;                            \
-    ::testing::internal::SingleFailureChecker gtest_checker(                  \
-        &gtest_failures, ::testing::TestPartResult::kFatalFailure, (substr)); \
-    {                                                                         \
-      ::testing::ScopedFakeTestPartResultReporter gtest_reporter(             \
-          ::testing::ScopedFakeTestPartResultReporter::                       \
-              INTERCEPT_ONLY_CURRENT_THREAD,                                  \
-          &gtest_failures);                                                   \
-      GTestExpectFatalFailureHelper::Execute();                               \
-    }                                                                         \
-  } while (::testing::internal::AlwaysFalse())
-
-#define EXPECT_FATAL_FAILURE_ON_ALL_THREADS(statement, substr)                \
-  do {                                                                        \
-    class GTestExpectFatalFailureHelper {                                     \
-     public:                                                                  \
-      static void Execute() { statement; }                                    \
-    };                                                                        \
-    ::testing::TestPartResultArray gtest_failures;                            \
-    ::testing::internal::SingleFailureChecker gtest_checker(                  \
-        &gtest_failures, ::testing::TestPartResult::kFatalFailure, (substr)); \
-    {                                                                         \
-      ::testing::ScopedFakeTestPartResultReporter gtest_reporter(             \
-          ::testing::ScopedFakeTestPartResultReporter::INTERCEPT_ALL_THREADS, \
-          &gtest_failures);                                                   \
-      GTestExpectFatalFailureHelper::Execute();                               \
-    }                                                                         \
-  } while (::testing::internal::AlwaysFalse())
-
-// A macro for testing Google Test assertions or code that's expected to
-// generate Google Test non-fatal failures (e.g. a failure from an EXPECT_EQ,
-// but not from an ASSERT_EQ). It asserts that the given statement will cause
-// exactly one non-fatal Google Test failure with 'substr' being part of the
-// failure message.
-//
-// There are two different versions of this macro. EXPECT_NONFATAL_FAILURE only
-// affects and considers failures generated in the current thread and
-// EXPECT_NONFATAL_FAILURE_ON_ALL_THREADS does the same but for all threads.
-//
-// 'statement' is allowed to reference local variables and members of
-// the current object.
-//
-// The verification of the assertion is done correctly even when the statement
-// throws an exception or aborts the current function.
-//
-// Known restrictions:
-//   - You cannot stream a failure message to this macro.
-//
-// Note that even though the implementations of the following two
-// macros are much alike, we cannot refactor them to use a common
-// helper macro, due to some peculiarity in how the preprocessor
-// works.  If we do that, the code won't compile when the user gives
-// EXPECT_NONFATAL_FAILURE() a statement that contains a macro that
-// expands to code containing an unprotected comma.  The
-// AcceptsMacroThatExpandsToUnprotectedComma test in gtest_unittest.cc
-// catches that.
-//
-// For the same reason, we have to write
-//   if (::testing::internal::AlwaysTrue()) { statement; }
-// instead of
-//   GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement)
-// to avoid an MSVC warning on unreachable code.
-#define EXPECT_NONFATAL_FAILURE(statement, substr)                    \
-  do {                                                                \
-    ::testing::TestPartResultArray gtest_failures;                    \
-    ::testing::internal::SingleFailureChecker gtest_checker(          \
-        &gtest_failures, ::testing::TestPartResult::kNonFatalFailure, \
-        (substr));                                                    \
-    {                                                                 \
-      ::testing::ScopedFakeTestPartResultReporter gtest_reporter(     \
-          ::testing::ScopedFakeTestPartResultReporter::               \
-              INTERCEPT_ONLY_CURRENT_THREAD,                          \
-          &gtest_failures);                                           \
-      if (::testing::internal::AlwaysTrue()) {                        \
-        statement;                                                    \
-      }                                                               \
-    }                                                                 \
-  } while (::testing::internal::AlwaysFalse())
-
-#define EXPECT_NONFATAL_FAILURE_ON_ALL_THREADS(statement, substr)             \
-  do {                                                                        \
-    ::testing::TestPartResultArray gtest_failures;                            \
-    ::testing::internal::SingleFailureChecker gtest_checker(                  \
-        &gtest_failures, ::testing::TestPartResult::kNonFatalFailure,         \
-        (substr));                                                            \
-    {                                                                         \
-      ::testing::ScopedFakeTestPartResultReporter gtest_reporter(             \
-          ::testing::ScopedFakeTestPartResultReporter::INTERCEPT_ALL_THREADS, \
-          &gtest_failures);                                                   \
-      if (::testing::internal::AlwaysTrue()) {                                \
-        statement;                                                            \
-      }                                                                       \
-    }                                                                         \
-  } while (::testing::internal::AlwaysFalse())
-
-#endif  // GOOGLETEST_INCLUDE_GTEST_GTEST_SPI_H_
diff --git a/3rdparty/googletest-1.13.0/googletest/include/gtest/gtest-test-part.h b/3rdparty/googletest-1.13.0/googletest/include/gtest/gtest-test-part.h
deleted file mode 100644
index 8290b4d653d1678433db6c27e1ebe5b3499387b7..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/include/gtest/gtest-test-part.h
+++ /dev/null
@@ -1,192 +0,0 @@
-// Copyright 2008, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// IWYU pragma: private, include "gtest/gtest.h"
-// IWYU pragma: friend gtest/.*
-// IWYU pragma: friend gmock/.*
-
-#ifndef GOOGLETEST_INCLUDE_GTEST_GTEST_TEST_PART_H_
-#define GOOGLETEST_INCLUDE_GTEST_GTEST_TEST_PART_H_
-
-#include <iosfwd>
-#include <ostream>
-#include <string>
-#include <vector>
-
-#include "gtest/internal/gtest-internal.h"
-#include "gtest/internal/gtest-string.h"
-
-GTEST_DISABLE_MSC_WARNINGS_PUSH_(4251 \
-/* class A needs to have dll-interface to be used by clients of class B */)
-
-namespace testing {
-
-// A copyable object representing the result of a test part (i.e. an
-// assertion or an explicit FAIL(), ADD_FAILURE(), or SUCCESS()).
-//
-// Don't inherit from TestPartResult as its destructor is not virtual.
-class GTEST_API_ TestPartResult {
- public:
-  // The possible outcomes of a test part (i.e. an assertion or an
-  // explicit SUCCEED(), FAIL(), or ADD_FAILURE()).
-  enum Type {
-    kSuccess,          // Succeeded.
-    kNonFatalFailure,  // Failed but the test can continue.
-    kFatalFailure,     // Failed and the test should be terminated.
-    kSkip              // Skipped.
-  };
-
-  // C'tor.  TestPartResult does NOT have a default constructor.
-  // Always use this constructor (with parameters) to create a
-  // TestPartResult object.
-  TestPartResult(Type a_type, const char* a_file_name, int a_line_number,
-                 const char* a_message)
-      : type_(a_type),
-        file_name_(a_file_name == nullptr ? "" : a_file_name),
-        line_number_(a_line_number),
-        summary_(ExtractSummary(a_message)),
-        message_(a_message) {}
-
-  // Gets the outcome of the test part.
-  Type type() const { return type_; }
-
-  // Gets the name of the source file where the test part took place, or
-  // NULL if it's unknown.
-  const char* file_name() const {
-    return file_name_.empty() ? nullptr : file_name_.c_str();
-  }
-
-  // Gets the line in the source file where the test part took place,
-  // or -1 if it's unknown.
-  int line_number() const { return line_number_; }
-
-  // Gets the summary of the failure message.
-  const char* summary() const { return summary_.c_str(); }
-
-  // Gets the message associated with the test part.
-  const char* message() const { return message_.c_str(); }
-
-  // Returns true if and only if the test part was skipped.
-  bool skipped() const { return type_ == kSkip; }
-
-  // Returns true if and only if the test part passed.
-  bool passed() const { return type_ == kSuccess; }
-
-  // Returns true if and only if the test part non-fatally failed.
-  bool nonfatally_failed() const { return type_ == kNonFatalFailure; }
-
-  // Returns true if and only if the test part fatally failed.
-  bool fatally_failed() const { return type_ == kFatalFailure; }
-
-  // Returns true if and only if the test part failed.
-  bool failed() const { return fatally_failed() || nonfatally_failed(); }
-
- private:
-  Type type_;
-
-  // Gets the summary of the failure message by omitting the stack
-  // trace in it.
-  static std::string ExtractSummary(const char* message);
-
-  // The name of the source file where the test part took place, or
-  // "" if the source file is unknown.
-  std::string file_name_;
-  // The line in the source file where the test part took place, or -1
-  // if the line number is unknown.
-  int line_number_;
-  std::string summary_;  // The test failure summary.
-  std::string message_;  // The test failure message.
-};
-
-// Prints a TestPartResult object.
-std::ostream& operator<<(std::ostream& os, const TestPartResult& result);
-
-// An array of TestPartResult objects.
-//
-// Don't inherit from TestPartResultArray as its destructor is not
-// virtual.
-class GTEST_API_ TestPartResultArray {
- public:
-  TestPartResultArray() {}
-
-  // Appends the given TestPartResult to the array.
-  void Append(const TestPartResult& result);
-
-  // Returns the TestPartResult at the given index (0-based).
-  const TestPartResult& GetTestPartResult(int index) const;
-
-  // Returns the number of TestPartResult objects in the array.
-  int size() const;
-
- private:
-  std::vector<TestPartResult> array_;
-
-  TestPartResultArray(const TestPartResultArray&) = delete;
-  TestPartResultArray& operator=(const TestPartResultArray&) = delete;
-};
-
-// This interface knows how to report a test part result.
-class GTEST_API_ TestPartResultReporterInterface {
- public:
-  virtual ~TestPartResultReporterInterface() {}
-
-  virtual void ReportTestPartResult(const TestPartResult& result) = 0;
-};
-
-namespace internal {
-
-// This helper class is used by {ASSERT|EXPECT}_NO_FATAL_FAILURE to check if a
-// statement generates new fatal failures. To do so it registers itself as the
-// current test part result reporter. Besides checking if fatal failures were
-// reported, it only delegates the reporting to the former result reporter.
-// The original result reporter is restored in the destructor.
-// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
-class GTEST_API_ HasNewFatalFailureHelper
-    : public TestPartResultReporterInterface {
- public:
-  HasNewFatalFailureHelper();
-  ~HasNewFatalFailureHelper() override;
-  void ReportTestPartResult(const TestPartResult& result) override;
-  bool has_new_fatal_failure() const { return has_new_fatal_failure_; }
-
- private:
-  bool has_new_fatal_failure_;
-  TestPartResultReporterInterface* original_reporter_;
-
-  HasNewFatalFailureHelper(const HasNewFatalFailureHelper&) = delete;
-  HasNewFatalFailureHelper& operator=(const HasNewFatalFailureHelper&) = delete;
-};
-
-}  // namespace internal
-
-}  // namespace testing
-
-GTEST_DISABLE_MSC_WARNINGS_POP_()  //  4251
-
-#endif  // GOOGLETEST_INCLUDE_GTEST_GTEST_TEST_PART_H_
diff --git a/3rdparty/googletest-1.13.0/googletest/include/gtest/gtest-typed-test.h b/3rdparty/googletest-1.13.0/googletest/include/gtest/gtest-typed-test.h
deleted file mode 100644
index bd35a32660130403369577f61fb68c85c4f85eb3..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/include/gtest/gtest-typed-test.h
+++ /dev/null
@@ -1,331 +0,0 @@
-// Copyright 2008 Google Inc.
-// All Rights Reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// IWYU pragma: private, include "gtest/gtest.h"
-// IWYU pragma: friend gtest/.*
-// IWYU pragma: friend gmock/.*
-
-#ifndef GOOGLETEST_INCLUDE_GTEST_GTEST_TYPED_TEST_H_
-#define GOOGLETEST_INCLUDE_GTEST_GTEST_TYPED_TEST_H_
-
-// This header implements typed tests and type-parameterized tests.
-
-// Typed (aka type-driven) tests repeat the same test for types in a
-// list.  You must know which types you want to test with when writing
-// typed tests. Here's how you do it:
-
-#if 0
-
-// First, define a fixture class template.  It should be parameterized
-// by a type.  Remember to derive it from testing::Test.
-template <typename T>
-class FooTest : public testing::Test {
- public:
-  ...
-  typedef std::list<T> List;
-  static T shared_;
-  T value_;
-};
-
-// Next, associate a list of types with the test suite, which will be
-// repeated for each type in the list.  The typedef is necessary for
-// the macro to parse correctly.
-typedef testing::Types<char, int, unsigned int> MyTypes;
-TYPED_TEST_SUITE(FooTest, MyTypes);
-
-// If the type list contains only one type, you can write that type
-// directly without Types<...>:
-//   TYPED_TEST_SUITE(FooTest, int);
-
-// Then, use TYPED_TEST() instead of TEST_F() to define as many typed
-// tests for this test suite as you want.
-TYPED_TEST(FooTest, DoesBlah) {
-  // Inside a test, refer to the special name TypeParam to get the type
-  // parameter.  Since we are inside a derived class template, C++ requires
-  // us to visit the members of FooTest via 'this'.
-  TypeParam n = this->value_;
-
-  // To visit static members of the fixture, add the TestFixture::
-  // prefix.
-  n += TestFixture::shared_;
-
-  // To refer to typedefs in the fixture, add the "typename
-  // TestFixture::" prefix.
-  typename TestFixture::List values;
-  values.push_back(n);
-  ...
-}
-
-TYPED_TEST(FooTest, HasPropertyA) { ... }
-
-// TYPED_TEST_SUITE takes an optional third argument which allows to specify a
-// class that generates custom test name suffixes based on the type. This should
-// be a class which has a static template function GetName(int index) returning
-// a string for each type. The provided integer index equals the index of the
-// type in the provided type list. In many cases the index can be ignored.
-//
-// For example:
-//   class MyTypeNames {
-//    public:
-//     template <typename T>
-//     static std::string GetName(int) {
-//       if (std::is_same<T, char>()) return "char";
-//       if (std::is_same<T, int>()) return "int";
-//       if (std::is_same<T, unsigned int>()) return "unsignedInt";
-//     }
-//   };
-//   TYPED_TEST_SUITE(FooTest, MyTypes, MyTypeNames);
-
-#endif  // 0
-
-// Type-parameterized tests are abstract test patterns parameterized
-// by a type.  Compared with typed tests, type-parameterized tests
-// allow you to define the test pattern without knowing what the type
-// parameters are.  The defined pattern can be instantiated with
-// different types any number of times, in any number of translation
-// units.
-//
-// If you are designing an interface or concept, you can define a
-// suite of type-parameterized tests to verify properties that any
-// valid implementation of the interface/concept should have.  Then,
-// each implementation can easily instantiate the test suite to verify
-// that it conforms to the requirements, without having to write
-// similar tests repeatedly.  Here's an example:
-
-#if 0
-
-// First, define a fixture class template.  It should be parameterized
-// by a type.  Remember to derive it from testing::Test.
-template <typename T>
-class FooTest : public testing::Test {
-  ...
-};
-
-// Next, declare that you will define a type-parameterized test suite
-// (the _P suffix is for "parameterized" or "pattern", whichever you
-// prefer):
-TYPED_TEST_SUITE_P(FooTest);
-
-// Then, use TYPED_TEST_P() to define as many type-parameterized tests
-// for this type-parameterized test suite as you want.
-TYPED_TEST_P(FooTest, DoesBlah) {
-  // Inside a test, refer to TypeParam to get the type parameter.
-  TypeParam n = 0;
-  ...
-}
-
-TYPED_TEST_P(FooTest, HasPropertyA) { ... }
-
-// Now the tricky part: you need to register all test patterns before
-// you can instantiate them.  The first argument of the macro is the
-// test suite name; the rest are the names of the tests in this test
-// case.
-REGISTER_TYPED_TEST_SUITE_P(FooTest,
-                            DoesBlah, HasPropertyA);
-
-// Finally, you are free to instantiate the pattern with the types you
-// want.  If you put the above code in a header file, you can #include
-// it in multiple C++ source files and instantiate it multiple times.
-//
-// To distinguish different instances of the pattern, the first
-// argument to the INSTANTIATE_* macro is a prefix that will be added
-// to the actual test suite name.  Remember to pick unique prefixes for
-// different instances.
-typedef testing::Types<char, int, unsigned int> MyTypes;
-INSTANTIATE_TYPED_TEST_SUITE_P(My, FooTest, MyTypes);
-
-// If the type list contains only one type, you can write that type
-// directly without Types<...>:
-//   INSTANTIATE_TYPED_TEST_SUITE_P(My, FooTest, int);
-//
-// Similar to the optional argument of TYPED_TEST_SUITE above,
-// INSTANTIATE_TEST_SUITE_P takes an optional fourth argument which allows to
-// generate custom names.
-//   INSTANTIATE_TYPED_TEST_SUITE_P(My, FooTest, MyTypes, MyTypeNames);
-
-#endif  // 0
-
-#include "gtest/internal/gtest-internal.h"
-#include "gtest/internal/gtest-port.h"
-#include "gtest/internal/gtest-type-util.h"
-
-// Implements typed tests.
-
-// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
-//
-// Expands to the name of the typedef for the type parameters of the
-// given test suite.
-#define GTEST_TYPE_PARAMS_(TestSuiteName) gtest_type_params_##TestSuiteName##_
-
-// Expands to the name of the typedef for the NameGenerator, responsible for
-// creating the suffixes of the name.
-#define GTEST_NAME_GENERATOR_(TestSuiteName) \
-  gtest_type_params_##TestSuiteName##_NameGenerator
-
-#define TYPED_TEST_SUITE(CaseName, Types, ...)                          \
-  typedef ::testing::internal::GenerateTypeList<Types>::type            \
-      GTEST_TYPE_PARAMS_(CaseName);                                     \
-  typedef ::testing::internal::NameGeneratorSelector<__VA_ARGS__>::type \
-  GTEST_NAME_GENERATOR_(CaseName)
-
-#define TYPED_TEST(CaseName, TestName)                                        \
-  static_assert(sizeof(GTEST_STRINGIFY_(TestName)) > 1,                       \
-                "test-name must not be empty");                               \
-  template <typename gtest_TypeParam_>                                        \
-  class GTEST_TEST_CLASS_NAME_(CaseName, TestName)                            \
-      : public CaseName<gtest_TypeParam_> {                                   \
-   private:                                                                   \
-    typedef CaseName<gtest_TypeParam_> TestFixture;                           \
-    typedef gtest_TypeParam_ TypeParam;                                       \
-    void TestBody() override;                                                 \
-  };                                                                          \
-  static bool gtest_##CaseName##_##TestName##_registered_                     \
-      GTEST_ATTRIBUTE_UNUSED_ = ::testing::internal::TypeParameterizedTest<   \
-          CaseName,                                                           \
-          ::testing::internal::TemplateSel<GTEST_TEST_CLASS_NAME_(CaseName,   \
-                                                                  TestName)>, \
-          GTEST_TYPE_PARAMS_(                                                 \
-              CaseName)>::Register("",                                        \
-                                   ::testing::internal::CodeLocation(         \
-                                       __FILE__, __LINE__),                   \
-                                   GTEST_STRINGIFY_(CaseName),                \
-                                   GTEST_STRINGIFY_(TestName), 0,             \
-                                   ::testing::internal::GenerateNames<        \
-                                       GTEST_NAME_GENERATOR_(CaseName),       \
-                                       GTEST_TYPE_PARAMS_(CaseName)>());      \
-  template <typename gtest_TypeParam_>                                        \
-  void GTEST_TEST_CLASS_NAME_(CaseName,                                       \
-                              TestName)<gtest_TypeParam_>::TestBody()
-
-// Legacy API is deprecated but still available
-#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
-#define TYPED_TEST_CASE                                                \
-  static_assert(::testing::internal::TypedTestCaseIsDeprecated(), ""); \
-  TYPED_TEST_SUITE
-#endif  // GTEST_REMOVE_LEGACY_TEST_CASEAPI_
-
-// Implements type-parameterized tests.
-
-// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
-//
-// Expands to the namespace name that the type-parameterized tests for
-// the given type-parameterized test suite are defined in.  The exact
-// name of the namespace is subject to change without notice.
-#define GTEST_SUITE_NAMESPACE_(TestSuiteName) gtest_suite_##TestSuiteName##_
-
-// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
-//
-// Expands to the name of the variable used to remember the names of
-// the defined tests in the given test suite.
-#define GTEST_TYPED_TEST_SUITE_P_STATE_(TestSuiteName) \
-  gtest_typed_test_suite_p_state_##TestSuiteName##_
-
-// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE DIRECTLY.
-//
-// Expands to the name of the variable used to remember the names of
-// the registered tests in the given test suite.
-#define GTEST_REGISTERED_TEST_NAMES_(TestSuiteName) \
-  gtest_registered_test_names_##TestSuiteName##_
-
-// The variables defined in the type-parameterized test macros are
-// static as typically these macros are used in a .h file that can be
-// #included in multiple translation units linked together.
-#define TYPED_TEST_SUITE_P(SuiteName)              \
-  static ::testing::internal::TypedTestSuitePState \
-  GTEST_TYPED_TEST_SUITE_P_STATE_(SuiteName)
-
-// Legacy API is deprecated but still available
-#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
-#define TYPED_TEST_CASE_P                                                 \
-  static_assert(::testing::internal::TypedTestCase_P_IsDeprecated(), ""); \
-  TYPED_TEST_SUITE_P
-#endif  // GTEST_REMOVE_LEGACY_TEST_CASEAPI_
-
-#define TYPED_TEST_P(SuiteName, TestName)                             \
-  namespace GTEST_SUITE_NAMESPACE_(SuiteName) {                       \
-    template <typename gtest_TypeParam_>                              \
-    class TestName : public SuiteName<gtest_TypeParam_> {             \
-     private:                                                         \
-      typedef SuiteName<gtest_TypeParam_> TestFixture;                \
-      typedef gtest_TypeParam_ TypeParam;                             \
-      void TestBody() override;                                       \
-    };                                                                \
-    static bool gtest_##TestName##_defined_ GTEST_ATTRIBUTE_UNUSED_ = \
-        GTEST_TYPED_TEST_SUITE_P_STATE_(SuiteName).AddTestName(       \
-            __FILE__, __LINE__, GTEST_STRINGIFY_(SuiteName),          \
-            GTEST_STRINGIFY_(TestName));                              \
-  }                                                                   \
-  template <typename gtest_TypeParam_>                                \
-  void GTEST_SUITE_NAMESPACE_(                                        \
-      SuiteName)::TestName<gtest_TypeParam_>::TestBody()
-
-// Note: this won't work correctly if the trailing arguments are macros.
-#define REGISTER_TYPED_TEST_SUITE_P(SuiteName, ...)                         \
-  namespace GTEST_SUITE_NAMESPACE_(SuiteName) {                             \
-    typedef ::testing::internal::Templates<__VA_ARGS__> gtest_AllTests_;    \
-  }                                                                         \
-  static const char* const GTEST_REGISTERED_TEST_NAMES_(                    \
-      SuiteName) GTEST_ATTRIBUTE_UNUSED_ =                                  \
-      GTEST_TYPED_TEST_SUITE_P_STATE_(SuiteName).VerifyRegisteredTestNames( \
-          GTEST_STRINGIFY_(SuiteName), __FILE__, __LINE__, #__VA_ARGS__)
-
-// Legacy API is deprecated but still available
-#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
-#define REGISTER_TYPED_TEST_CASE_P                                           \
-  static_assert(::testing::internal::RegisterTypedTestCase_P_IsDeprecated(), \
-                "");                                                         \
-  REGISTER_TYPED_TEST_SUITE_P
-#endif  // GTEST_REMOVE_LEGACY_TEST_CASEAPI_
-
-#define INSTANTIATE_TYPED_TEST_SUITE_P(Prefix, SuiteName, Types, ...)     \
-  static_assert(sizeof(GTEST_STRINGIFY_(Prefix)) > 1,                     \
-                "test-suit-prefix must not be empty");                    \
-  static bool gtest_##Prefix##_##SuiteName GTEST_ATTRIBUTE_UNUSED_ =      \
-      ::testing::internal::TypeParameterizedTestSuite<                    \
-          SuiteName, GTEST_SUITE_NAMESPACE_(SuiteName)::gtest_AllTests_,  \
-          ::testing::internal::GenerateTypeList<Types>::type>::           \
-          Register(GTEST_STRINGIFY_(Prefix),                              \
-                   ::testing::internal::CodeLocation(__FILE__, __LINE__), \
-                   &GTEST_TYPED_TEST_SUITE_P_STATE_(SuiteName),           \
-                   GTEST_STRINGIFY_(SuiteName),                           \
-                   GTEST_REGISTERED_TEST_NAMES_(SuiteName),               \
-                   ::testing::internal::GenerateNames<                    \
-                       ::testing::internal::NameGeneratorSelector<        \
-                           __VA_ARGS__>::type,                            \
-                       ::testing::internal::GenerateTypeList<Types>::type>())
-
-// Legacy API is deprecated but still available
-#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
-#define INSTANTIATE_TYPED_TEST_CASE_P                                      \
-  static_assert(                                                           \
-      ::testing::internal::InstantiateTypedTestCase_P_IsDeprecated(), ""); \
-  INSTANTIATE_TYPED_TEST_SUITE_P
-#endif  // GTEST_REMOVE_LEGACY_TEST_CASEAPI_
-
-#endif  // GOOGLETEST_INCLUDE_GTEST_GTEST_TYPED_TEST_H_
diff --git a/3rdparty/googletest-1.13.0/googletest/include/gtest/gtest.h b/3rdparty/googletest-1.13.0/googletest/include/gtest/gtest.h
deleted file mode 100644
index 3e452a503f8950c8ae04e257912e472015c4f60d..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/include/gtest/gtest.h
+++ /dev/null
@@ -1,2318 +0,0 @@
-// Copyright 2005, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// The Google C++ Testing and Mocking Framework (Google Test)
-//
-// This header file defines the public API for Google Test.  It should be
-// included by any test program that uses Google Test.
-//
-// IMPORTANT NOTE: Due to limitation of the C++ language, we have to
-// leave some internal implementation details in this header file.
-// They are clearly marked by comments like this:
-//
-//   // INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
-//
-// Such code is NOT meant to be used by a user directly, and is subject
-// to CHANGE WITHOUT NOTICE.  Therefore DO NOT DEPEND ON IT in a user
-// program!
-//
-// Acknowledgment: Google Test borrowed the idea of automatic test
-// registration from Barthelemy Dagenais' (barthelemy@prologique.com)
-// easyUnit framework.
-
-#ifndef GOOGLETEST_INCLUDE_GTEST_GTEST_H_
-#define GOOGLETEST_INCLUDE_GTEST_GTEST_H_
-
-#include <cstddef>
-#include <cstdint>
-#include <iomanip>
-#include <limits>
-#include <memory>
-#include <ostream>
-#include <set>
-#include <sstream>
-#include <string>
-#include <type_traits>
-#include <vector>
-
-#include "gtest/gtest-assertion-result.h"
-#include "gtest/gtest-death-test.h"
-#include "gtest/gtest-matchers.h"
-#include "gtest/gtest-message.h"
-#include "gtest/gtest-param-test.h"
-#include "gtest/gtest-printers.h"
-#include "gtest/gtest-test-part.h"
-#include "gtest/gtest-typed-test.h"
-#include "gtest/gtest_pred_impl.h"
-#include "gtest/gtest_prod.h"
-#include "gtest/internal/gtest-internal.h"
-#include "gtest/internal/gtest-string.h"
-
-GTEST_DISABLE_MSC_WARNINGS_PUSH_(4251 \
-/* class A needs to have dll-interface to be used by clients of class B */)
-
-// Declares the flags.
-
-// This flag temporary enables the disabled tests.
-GTEST_DECLARE_bool_(also_run_disabled_tests);
-
-// This flag brings the debugger on an assertion failure.
-GTEST_DECLARE_bool_(break_on_failure);
-
-// This flag controls whether Google Test catches all test-thrown exceptions
-// and logs them as failures.
-GTEST_DECLARE_bool_(catch_exceptions);
-
-// This flag enables using colors in terminal output. Available values are
-// "yes" to enable colors, "no" (disable colors), or "auto" (the default)
-// to let Google Test decide.
-GTEST_DECLARE_string_(color);
-
-// This flag controls whether the test runner should continue execution past
-// first failure.
-GTEST_DECLARE_bool_(fail_fast);
-
-// This flag sets up the filter to select by name using a glob pattern
-// the tests to run. If the filter is not given all tests are executed.
-GTEST_DECLARE_string_(filter);
-
-// This flag controls whether Google Test installs a signal handler that dumps
-// debugging information when fatal signals are raised.
-GTEST_DECLARE_bool_(install_failure_signal_handler);
-
-// This flag causes the Google Test to list tests. None of the tests listed
-// are actually run if the flag is provided.
-GTEST_DECLARE_bool_(list_tests);
-
-// This flag controls whether Google Test emits a detailed XML report to a file
-// in addition to its normal textual output.
-GTEST_DECLARE_string_(output);
-
-// This flags control whether Google Test prints only test failures.
-GTEST_DECLARE_bool_(brief);
-
-// This flags control whether Google Test prints the elapsed time for each
-// test.
-GTEST_DECLARE_bool_(print_time);
-
-// This flags control whether Google Test prints UTF8 characters as text.
-GTEST_DECLARE_bool_(print_utf8);
-
-// This flag specifies the random number seed.
-GTEST_DECLARE_int32_(random_seed);
-
-// This flag sets how many times the tests are repeated. The default value
-// is 1. If the value is -1 the tests are repeating forever.
-GTEST_DECLARE_int32_(repeat);
-
-// This flag controls whether Google Test Environments are recreated for each
-// repeat of the tests. The default value is true. If set to false the global
-// test Environment objects are only set up once, for the first iteration, and
-// only torn down once, for the last.
-GTEST_DECLARE_bool_(recreate_environments_when_repeating);
-
-// This flag controls whether Google Test includes Google Test internal
-// stack frames in failure stack traces.
-GTEST_DECLARE_bool_(show_internal_stack_frames);
-
-// When this flag is specified, tests' order is randomized on every iteration.
-GTEST_DECLARE_bool_(shuffle);
-
-// This flag specifies the maximum number of stack frames to be
-// printed in a failure message.
-GTEST_DECLARE_int32_(stack_trace_depth);
-
-// When this flag is specified, a failed assertion will throw an
-// exception if exceptions are enabled, or exit the program with a
-// non-zero code otherwise. For use with an external test framework.
-GTEST_DECLARE_bool_(throw_on_failure);
-
-// When this flag is set with a "host:port" string, on supported
-// platforms test results are streamed to the specified port on
-// the specified host machine.
-GTEST_DECLARE_string_(stream_result_to);
-
-#if GTEST_USE_OWN_FLAGFILE_FLAG_
-GTEST_DECLARE_string_(flagfile);
-#endif  // GTEST_USE_OWN_FLAGFILE_FLAG_
-
-namespace testing {
-
-// Silence C4100 (unreferenced formal parameter) and 4805
-// unsafe mix of type 'const int' and type 'const bool'
-#ifdef _MSC_VER
-#pragma warning(push)
-#pragma warning(disable : 4805)
-#pragma warning(disable : 4100)
-#endif
-
-// The upper limit for valid stack trace depths.
-const int kMaxStackTraceDepth = 100;
-
-namespace internal {
-
-class AssertHelper;
-class DefaultGlobalTestPartResultReporter;
-class ExecDeathTest;
-class NoExecDeathTest;
-class FinalSuccessChecker;
-class GTestFlagSaver;
-class StreamingListenerTest;
-class TestResultAccessor;
-class TestEventListenersAccessor;
-class TestEventRepeater;
-class UnitTestRecordPropertyTestHelper;
-class WindowsDeathTest;
-class FuchsiaDeathTest;
-class UnitTestImpl* GetUnitTestImpl();
-void ReportFailureInUnknownLocation(TestPartResult::Type result_type,
-                                    const std::string& message);
-std::set<std::string>* GetIgnoredParameterizedTestSuites();
-
-// A base class that prevents subclasses from being copyable.
-// We do this instead of using '= delete' so as to avoid triggering warnings
-// inside user code regarding any of our declarations.
-class GTestNonCopyable {
- public:
-  GTestNonCopyable() = default;
-  GTestNonCopyable(const GTestNonCopyable &) = delete;
-  GTestNonCopyable &operator=(const GTestNonCopyable &) = delete;
-  ~GTestNonCopyable() = default;
-};
-
-}  // namespace internal
-
-// The friend relationship of some of these classes is cyclic.
-// If we don't forward declare them the compiler might confuse the classes
-// in friendship clauses with same named classes on the scope.
-class Test;
-class TestSuite;
-
-// Old API is still available but deprecated
-#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
-using TestCase = TestSuite;
-#endif
-class TestInfo;
-class UnitTest;
-
-// The abstract class that all tests inherit from.
-//
-// In Google Test, a unit test program contains one or many TestSuites, and
-// each TestSuite contains one or many Tests.
-//
-// When you define a test using the TEST macro, you don't need to
-// explicitly derive from Test - the TEST macro automatically does
-// this for you.
-//
-// The only time you derive from Test is when defining a test fixture
-// to be used in a TEST_F.  For example:
-//
-//   class FooTest : public testing::Test {
-//    protected:
-//     void SetUp() override { ... }
-//     void TearDown() override { ... }
-//     ...
-//   };
-//
-//   TEST_F(FooTest, Bar) { ... }
-//   TEST_F(FooTest, Baz) { ... }
-//
-// Test is not copyable.
-class GTEST_API_ Test {
- public:
-  friend class TestInfo;
-
-  // The d'tor is virtual as we intend to inherit from Test.
-  virtual ~Test();
-
-  // Sets up the stuff shared by all tests in this test suite.
-  //
-  // Google Test will call Foo::SetUpTestSuite() before running the first
-  // test in test suite Foo.  Hence a sub-class can define its own
-  // SetUpTestSuite() method to shadow the one defined in the super
-  // class.
-  static void SetUpTestSuite() {}
-
-  // Tears down the stuff shared by all tests in this test suite.
-  //
-  // Google Test will call Foo::TearDownTestSuite() after running the last
-  // test in test suite Foo.  Hence a sub-class can define its own
-  // TearDownTestSuite() method to shadow the one defined in the super
-  // class.
-  static void TearDownTestSuite() {}
-
-  // Legacy API is deprecated but still available. Use SetUpTestSuite and
-  // TearDownTestSuite instead.
-#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
-  static void TearDownTestCase() {}
-  static void SetUpTestCase() {}
-#endif  // GTEST_REMOVE_LEGACY_TEST_CASEAPI_
-
-  // Returns true if and only if the current test has a fatal failure.
-  static bool HasFatalFailure();
-
-  // Returns true if and only if the current test has a non-fatal failure.
-  static bool HasNonfatalFailure();
-
-  // Returns true if and only if the current test was skipped.
-  static bool IsSkipped();
-
-  // Returns true if and only if the current test has a (either fatal or
-  // non-fatal) failure.
-  static bool HasFailure() { return HasFatalFailure() || HasNonfatalFailure(); }
-
-  // Logs a property for the current test, test suite, or for the entire
-  // invocation of the test program when used outside of the context of a
-  // test suite.  Only the last value for a given key is remembered.  These
-  // are public static so they can be called from utility functions that are
-  // not members of the test fixture.  Calls to RecordProperty made during
-  // lifespan of the test (from the moment its constructor starts to the
-  // moment its destructor finishes) will be output in XML as attributes of
-  // the <testcase> element.  Properties recorded from fixture's
-  // SetUpTestSuite or TearDownTestSuite are logged as attributes of the
-  // corresponding <testsuite> element.  Calls to RecordProperty made in the
-  // global context (before or after invocation of RUN_ALL_TESTS and from
-  // SetUp/TearDown method of Environment objects registered with Google
-  // Test) will be output as attributes of the <testsuites> element.
-  static void RecordProperty(const std::string& key, const std::string& value);
-  static void RecordProperty(const std::string& key, int64_t value);
-
- protected:
-  // Creates a Test object.
-  Test();
-
-  // Sets up the test fixture.
-  virtual void SetUp();
-
-  // Tears down the test fixture.
-  virtual void TearDown();
-
- private:
-  // Returns true if and only if the current test has the same fixture class
-  // as the first test in the current test suite.
-  static bool HasSameFixtureClass();
-
-  // Runs the test after the test fixture has been set up.
-  //
-  // A sub-class must implement this to define the test logic.
-  //
-  // DO NOT OVERRIDE THIS FUNCTION DIRECTLY IN A USER PROGRAM.
-  // Instead, use the TEST or TEST_F macro.
-  virtual void TestBody() = 0;
-
-  // Sets up, executes, and tears down the test.
-  void Run();
-
-  // Deletes self.  We deliberately pick an unusual name for this
-  // internal method to avoid clashing with names used in user TESTs.
-  void DeleteSelf_() { delete this; }
-
-  const std::unique_ptr<GTEST_FLAG_SAVER_> gtest_flag_saver_;
-
-  // Often a user misspells SetUp() as Setup() and spends a long time
-  // wondering why it is never called by Google Test.  The declaration of
-  // the following method is solely for catching such an error at
-  // compile time:
-  //
-  //   - The return type is deliberately chosen to be not void, so it
-  //   will be a conflict if void Setup() is declared in the user's
-  //   test fixture.
-  //
-  //   - This method is private, so it will be another compiler error
-  //   if the method is called from the user's test fixture.
-  //
-  // DO NOT OVERRIDE THIS FUNCTION.
-  //
-  // If you see an error about overriding the following function or
-  // about it being private, you have mis-spelled SetUp() as Setup().
-  struct Setup_should_be_spelled_SetUp {};
-  virtual Setup_should_be_spelled_SetUp* Setup() { return nullptr; }
-
-  // We disallow copying Tests.
-  Test(const Test&) = delete;
-  Test& operator=(const Test&) = delete;
-};
-
-typedef internal::TimeInMillis TimeInMillis;
-
-// A copyable object representing a user specified test property which can be
-// output as a key/value string pair.
-//
-// Don't inherit from TestProperty as its destructor is not virtual.
-class TestProperty {
- public:
-  // C'tor.  TestProperty does NOT have a default constructor.
-  // Always use this constructor (with parameters) to create a
-  // TestProperty object.
-  TestProperty(const std::string& a_key, const std::string& a_value)
-      : key_(a_key), value_(a_value) {}
-
-  // Gets the user supplied key.
-  const char* key() const { return key_.c_str(); }
-
-  // Gets the user supplied value.
-  const char* value() const { return value_.c_str(); }
-
-  // Sets a new value, overriding the one supplied in the constructor.
-  void SetValue(const std::string& new_value) { value_ = new_value; }
-
- private:
-  // The key supplied by the user.
-  std::string key_;
-  // The value supplied by the user.
-  std::string value_;
-};
-
-// The result of a single Test.  This includes a list of
-// TestPartResults, a list of TestProperties, a count of how many
-// death tests there are in the Test, and how much time it took to run
-// the Test.
-//
-// TestResult is not copyable.
-class GTEST_API_ TestResult {
- public:
-  // Creates an empty TestResult.
-  TestResult();
-
-  // D'tor.  Do not inherit from TestResult.
-  ~TestResult();
-
-  // Gets the number of all test parts.  This is the sum of the number
-  // of successful test parts and the number of failed test parts.
-  int total_part_count() const;
-
-  // Returns the number of the test properties.
-  int test_property_count() const;
-
-  // Returns true if and only if the test passed (i.e. no test part failed).
-  bool Passed() const { return !Skipped() && !Failed(); }
-
-  // Returns true if and only if the test was skipped.
-  bool Skipped() const;
-
-  // Returns true if and only if the test failed.
-  bool Failed() const;
-
-  // Returns true if and only if the test fatally failed.
-  bool HasFatalFailure() const;
-
-  // Returns true if and only if the test has a non-fatal failure.
-  bool HasNonfatalFailure() const;
-
-  // Returns the elapsed time, in milliseconds.
-  TimeInMillis elapsed_time() const { return elapsed_time_; }
-
-  // Gets the time of the test case start, in ms from the start of the
-  // UNIX epoch.
-  TimeInMillis start_timestamp() const { return start_timestamp_; }
-
-  // Returns the i-th test part result among all the results. i can range from 0
-  // to total_part_count() - 1. If i is not in that range, aborts the program.
-  const TestPartResult& GetTestPartResult(int i) const;
-
-  // Returns the i-th test property. i can range from 0 to
-  // test_property_count() - 1. If i is not in that range, aborts the
-  // program.
-  const TestProperty& GetTestProperty(int i) const;
-
- private:
-  friend class TestInfo;
-  friend class TestSuite;
-  friend class UnitTest;
-  friend class internal::DefaultGlobalTestPartResultReporter;
-  friend class internal::ExecDeathTest;
-  friend class internal::TestResultAccessor;
-  friend class internal::UnitTestImpl;
-  friend class internal::WindowsDeathTest;
-  friend class internal::FuchsiaDeathTest;
-
-  // Gets the vector of TestPartResults.
-  const std::vector<TestPartResult>& test_part_results() const {
-    return test_part_results_;
-  }
-
-  // Gets the vector of TestProperties.
-  const std::vector<TestProperty>& test_properties() const {
-    return test_properties_;
-  }
-
-  // Sets the start time.
-  void set_start_timestamp(TimeInMillis start) { start_timestamp_ = start; }
-
-  // Sets the elapsed time.
-  void set_elapsed_time(TimeInMillis elapsed) { elapsed_time_ = elapsed; }
-
-  // Adds a test property to the list. The property is validated and may add
-  // a non-fatal failure if invalid (e.g., if it conflicts with reserved
-  // key names). If a property is already recorded for the same key, the
-  // value will be updated, rather than storing multiple values for the same
-  // key.  xml_element specifies the element for which the property is being
-  // recorded and is used for validation.
-  void RecordProperty(const std::string& xml_element,
-                      const TestProperty& test_property);
-
-  // Adds a failure if the key is a reserved attribute of Google Test
-  // testsuite tags.  Returns true if the property is valid.
-  // FIXME: Validate attribute names are legal and human readable.
-  static bool ValidateTestProperty(const std::string& xml_element,
-                                   const TestProperty& test_property);
-
-  // Adds a test part result to the list.
-  void AddTestPartResult(const TestPartResult& test_part_result);
-
-  // Returns the death test count.
-  int death_test_count() const { return death_test_count_; }
-
-  // Increments the death test count, returning the new count.
-  int increment_death_test_count() { return ++death_test_count_; }
-
-  // Clears the test part results.
-  void ClearTestPartResults();
-
-  // Clears the object.
-  void Clear();
-
-  // Protects mutable state of the property vector and of owned
-  // properties, whose values may be updated.
-  internal::Mutex test_properties_mutex_;
-
-  // The vector of TestPartResults
-  std::vector<TestPartResult> test_part_results_;
-  // The vector of TestProperties
-  std::vector<TestProperty> test_properties_;
-  // Running count of death tests.
-  int death_test_count_;
-  // The start time, in milliseconds since UNIX Epoch.
-  TimeInMillis start_timestamp_;
-  // The elapsed time, in milliseconds.
-  TimeInMillis elapsed_time_;
-
-  // We disallow copying TestResult.
-  TestResult(const TestResult&) = delete;
-  TestResult& operator=(const TestResult&) = delete;
-};  // class TestResult
-
-// A TestInfo object stores the following information about a test:
-//
-//   Test suite name
-//   Test name
-//   Whether the test should be run
-//   A function pointer that creates the test object when invoked
-//   Test result
-//
-// The constructor of TestInfo registers itself with the UnitTest
-// singleton such that the RUN_ALL_TESTS() macro knows which tests to
-// run.
-class GTEST_API_ TestInfo {
- public:
-  // Destructs a TestInfo object.  This function is not virtual, so
-  // don't inherit from TestInfo.
-  ~TestInfo();
-
-  // Returns the test suite name.
-  const char* test_suite_name() const { return test_suite_name_.c_str(); }
-
-// Legacy API is deprecated but still available
-#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
-  const char* test_case_name() const { return test_suite_name(); }
-#endif  // GTEST_REMOVE_LEGACY_TEST_CASEAPI_
-
-  // Returns the test name.
-  const char* name() const { return name_.c_str(); }
-
-  // Returns the name of the parameter type, or NULL if this is not a typed
-  // or a type-parameterized test.
-  const char* type_param() const {
-    if (type_param_.get() != nullptr) return type_param_->c_str();
-    return nullptr;
-  }
-
-  // Returns the text representation of the value parameter, or NULL if this
-  // is not a value-parameterized test.
-  const char* value_param() const {
-    if (value_param_.get() != nullptr) return value_param_->c_str();
-    return nullptr;
-  }
-
-  // Returns the file name where this test is defined.
-  const char* file() const { return location_.file.c_str(); }
-
-  // Returns the line where this test is defined.
-  int line() const { return location_.line; }
-
-  // Return true if this test should not be run because it's in another shard.
-  bool is_in_another_shard() const { return is_in_another_shard_; }
-
-  // Returns true if this test should run, that is if the test is not
-  // disabled (or it is disabled but the also_run_disabled_tests flag has
-  // been specified) and its full name matches the user-specified filter.
-  //
-  // Google Test allows the user to filter the tests by their full names.
-  // The full name of a test Bar in test suite Foo is defined as
-  // "Foo.Bar".  Only the tests that match the filter will run.
-  //
-  // A filter is a colon-separated list of glob (not regex) patterns,
-  // optionally followed by a '-' and a colon-separated list of
-  // negative patterns (tests to exclude).  A test is run if it
-  // matches one of the positive patterns and does not match any of
-  // the negative patterns.
-  //
-  // For example, *A*:Foo.* is a filter that matches any string that
-  // contains the character 'A' or starts with "Foo.".
-  bool should_run() const { return should_run_; }
-
-  // Returns true if and only if this test will appear in the XML report.
-  bool is_reportable() const {
-    // The XML report includes tests matching the filter, excluding those
-    // run in other shards.
-    return matches_filter_ && !is_in_another_shard_;
-  }
-
-  // Returns the result of the test.
-  const TestResult* result() const { return &result_; }
-
- private:
-#if GTEST_HAS_DEATH_TEST
-  friend class internal::DefaultDeathTestFactory;
-#endif  // GTEST_HAS_DEATH_TEST
-  friend class Test;
-  friend class TestSuite;
-  friend class internal::UnitTestImpl;
-  friend class internal::StreamingListenerTest;
-  friend TestInfo* internal::MakeAndRegisterTestInfo(
-      const char* test_suite_name, const char* name, const char* type_param,
-      const char* value_param, internal::CodeLocation code_location,
-      internal::TypeId fixture_class_id, internal::SetUpTestSuiteFunc set_up_tc,
-      internal::TearDownTestSuiteFunc tear_down_tc,
-      internal::TestFactoryBase* factory);
-
-  // Constructs a TestInfo object. The newly constructed instance assumes
-  // ownership of the factory object.
-  TestInfo(const std::string& test_suite_name, const std::string& name,
-           const char* a_type_param,   // NULL if not a type-parameterized test
-           const char* a_value_param,  // NULL if not a value-parameterized test
-           internal::CodeLocation a_code_location,
-           internal::TypeId fixture_class_id,
-           internal::TestFactoryBase* factory);
-
-  // Increments the number of death tests encountered in this test so
-  // far.
-  int increment_death_test_count() {
-    return result_.increment_death_test_count();
-  }
-
-  // Creates the test object, runs it, records its result, and then
-  // deletes it.
-  void Run();
-
-  // Skip and records the test result for this object.
-  void Skip();
-
-  static void ClearTestResult(TestInfo* test_info) {
-    test_info->result_.Clear();
-  }
-
-  // These fields are immutable properties of the test.
-  const std::string test_suite_name_;  // test suite name
-  const std::string name_;             // Test name
-  // Name of the parameter type, or NULL if this is not a typed or a
-  // type-parameterized test.
-  const std::unique_ptr<const ::std::string> type_param_;
-  // Text representation of the value parameter, or NULL if this is not a
-  // value-parameterized test.
-  const std::unique_ptr<const ::std::string> value_param_;
-  internal::CodeLocation location_;
-  const internal::TypeId fixture_class_id_;  // ID of the test fixture class
-  bool should_run_;           // True if and only if this test should run
-  bool is_disabled_;          // True if and only if this test is disabled
-  bool matches_filter_;       // True if this test matches the
-                              // user-specified filter.
-  bool is_in_another_shard_;  // Will be run in another shard.
-  internal::TestFactoryBase* const factory_;  // The factory that creates
-                                              // the test object
-
-  // This field is mutable and needs to be reset before running the
-  // test for the second time.
-  TestResult result_;
-
-  TestInfo(const TestInfo&) = delete;
-  TestInfo& operator=(const TestInfo&) = delete;
-};
-
-// A test suite, which consists of a vector of TestInfos.
-//
-// TestSuite is not copyable.
-class GTEST_API_ TestSuite {
- public:
-  // Creates a TestSuite with the given name.
-  //
-  // TestSuite does NOT have a default constructor.  Always use this
-  // constructor to create a TestSuite object.
-  //
-  // Arguments:
-  //
-  //   name:         name of the test suite
-  //   a_type_param: the name of the test's type parameter, or NULL if
-  //                 this is not a type-parameterized test.
-  //   set_up_tc:    pointer to the function that sets up the test suite
-  //   tear_down_tc: pointer to the function that tears down the test suite
-  TestSuite(const char* name, const char* a_type_param,
-            internal::SetUpTestSuiteFunc set_up_tc,
-            internal::TearDownTestSuiteFunc tear_down_tc);
-
-  // Destructor of TestSuite.
-  virtual ~TestSuite();
-
-  // Gets the name of the TestSuite.
-  const char* name() const { return name_.c_str(); }
-
-  // Returns the name of the parameter type, or NULL if this is not a
-  // type-parameterized test suite.
-  const char* type_param() const {
-    if (type_param_.get() != nullptr) return type_param_->c_str();
-    return nullptr;
-  }
-
-  // Returns true if any test in this test suite should run.
-  bool should_run() const { return should_run_; }
-
-  // Gets the number of successful tests in this test suite.
-  int successful_test_count() const;
-
-  // Gets the number of skipped tests in this test suite.
-  int skipped_test_count() const;
-
-  // Gets the number of failed tests in this test suite.
-  int failed_test_count() const;
-
-  // Gets the number of disabled tests that will be reported in the XML report.
-  int reportable_disabled_test_count() const;
-
-  // Gets the number of disabled tests in this test suite.
-  int disabled_test_count() const;
-
-  // Gets the number of tests to be printed in the XML report.
-  int reportable_test_count() const;
-
-  // Get the number of tests in this test suite that should run.
-  int test_to_run_count() const;
-
-  // Gets the number of all tests in this test suite.
-  int total_test_count() const;
-
-  // Returns true if and only if the test suite passed.
-  bool Passed() const { return !Failed(); }
-
-  // Returns true if and only if the test suite failed.
-  bool Failed() const {
-    return failed_test_count() > 0 || ad_hoc_test_result().Failed();
-  }
-
-  // Returns the elapsed time, in milliseconds.
-  TimeInMillis elapsed_time() const { return elapsed_time_; }
-
-  // Gets the time of the test suite start, in ms from the start of the
-  // UNIX epoch.
-  TimeInMillis start_timestamp() const { return start_timestamp_; }
-
-  // Returns the i-th test among all the tests. i can range from 0 to
-  // total_test_count() - 1. If i is not in that range, returns NULL.
-  const TestInfo* GetTestInfo(int i) const;
-
-  // Returns the TestResult that holds test properties recorded during
-  // execution of SetUpTestSuite and TearDownTestSuite.
-  const TestResult& ad_hoc_test_result() const { return ad_hoc_test_result_; }
-
- private:
-  friend class Test;
-  friend class internal::UnitTestImpl;
-
-  // Gets the (mutable) vector of TestInfos in this TestSuite.
-  std::vector<TestInfo*>& test_info_list() { return test_info_list_; }
-
-  // Gets the (immutable) vector of TestInfos in this TestSuite.
-  const std::vector<TestInfo*>& test_info_list() const {
-    return test_info_list_;
-  }
-
-  // Returns the i-th test among all the tests. i can range from 0 to
-  // total_test_count() - 1. If i is not in that range, returns NULL.
-  TestInfo* GetMutableTestInfo(int i);
-
-  // Sets the should_run member.
-  void set_should_run(bool should) { should_run_ = should; }
-
-  // Adds a TestInfo to this test suite.  Will delete the TestInfo upon
-  // destruction of the TestSuite object.
-  void AddTestInfo(TestInfo* test_info);
-
-  // Clears the results of all tests in this test suite.
-  void ClearResult();
-
-  // Clears the results of all tests in the given test suite.
-  static void ClearTestSuiteResult(TestSuite* test_suite) {
-    test_suite->ClearResult();
-  }
-
-  // Runs every test in this TestSuite.
-  void Run();
-
-  // Skips the execution of tests under this TestSuite
-  void Skip();
-
-  // Runs SetUpTestSuite() for this TestSuite.  This wrapper is needed
-  // for catching exceptions thrown from SetUpTestSuite().
-  void RunSetUpTestSuite() {
-    if (set_up_tc_ != nullptr) {
-      (*set_up_tc_)();
-    }
-  }
-
-  // Runs TearDownTestSuite() for this TestSuite.  This wrapper is
-  // needed for catching exceptions thrown from TearDownTestSuite().
-  void RunTearDownTestSuite() {
-    if (tear_down_tc_ != nullptr) {
-      (*tear_down_tc_)();
-    }
-  }
-
-  // Returns true if and only if test passed.
-  static bool TestPassed(const TestInfo* test_info) {
-    return test_info->should_run() && test_info->result()->Passed();
-  }
-
-  // Returns true if and only if test skipped.
-  static bool TestSkipped(const TestInfo* test_info) {
-    return test_info->should_run() && test_info->result()->Skipped();
-  }
-
-  // Returns true if and only if test failed.
-  static bool TestFailed(const TestInfo* test_info) {
-    return test_info->should_run() && test_info->result()->Failed();
-  }
-
-  // Returns true if and only if the test is disabled and will be reported in
-  // the XML report.
-  static bool TestReportableDisabled(const TestInfo* test_info) {
-    return test_info->is_reportable() && test_info->is_disabled_;
-  }
-
-  // Returns true if and only if test is disabled.
-  static bool TestDisabled(const TestInfo* test_info) {
-    return test_info->is_disabled_;
-  }
-
-  // Returns true if and only if this test will appear in the XML report.
-  static bool TestReportable(const TestInfo* test_info) {
-    return test_info->is_reportable();
-  }
-
-  // Returns true if the given test should run.
-  static bool ShouldRunTest(const TestInfo* test_info) {
-    return test_info->should_run();
-  }
-
-  // Shuffles the tests in this test suite.
-  void ShuffleTests(internal::Random* random);
-
-  // Restores the test order to before the first shuffle.
-  void UnshuffleTests();
-
-  // Name of the test suite.
-  std::string name_;
-  // Name of the parameter type, or NULL if this is not a typed or a
-  // type-parameterized test.
-  const std::unique_ptr<const ::std::string> type_param_;
-  // The vector of TestInfos in their original order.  It owns the
-  // elements in the vector.
-  std::vector<TestInfo*> test_info_list_;
-  // Provides a level of indirection for the test list to allow easy
-  // shuffling and restoring the test order.  The i-th element in this
-  // vector is the index of the i-th test in the shuffled test list.
-  std::vector<int> test_indices_;
-  // Pointer to the function that sets up the test suite.
-  internal::SetUpTestSuiteFunc set_up_tc_;
-  // Pointer to the function that tears down the test suite.
-  internal::TearDownTestSuiteFunc tear_down_tc_;
-  // True if and only if any test in this test suite should run.
-  bool should_run_;
-  // The start time, in milliseconds since UNIX Epoch.
-  TimeInMillis start_timestamp_;
-  // Elapsed time, in milliseconds.
-  TimeInMillis elapsed_time_;
-  // Holds test properties recorded during execution of SetUpTestSuite and
-  // TearDownTestSuite.
-  TestResult ad_hoc_test_result_;
-
-  // We disallow copying TestSuites.
-  TestSuite(const TestSuite&) = delete;
-  TestSuite& operator=(const TestSuite&) = delete;
-};
-
-// An Environment object is capable of setting up and tearing down an
-// environment.  You should subclass this to define your own
-// environment(s).
-//
-// An Environment object does the set-up and tear-down in virtual
-// methods SetUp() and TearDown() instead of the constructor and the
-// destructor, as:
-//
-//   1. You cannot safely throw from a destructor.  This is a problem
-//      as in some cases Google Test is used where exceptions are enabled, and
-//      we may want to implement ASSERT_* using exceptions where they are
-//      available.
-//   2. You cannot use ASSERT_* directly in a constructor or
-//      destructor.
-class Environment {
- public:
-  // The d'tor is virtual as we need to subclass Environment.
-  virtual ~Environment() {}
-
-  // Override this to define how to set up the environment.
-  virtual void SetUp() {}
-
-  // Override this to define how to tear down the environment.
-  virtual void TearDown() {}
-
- private:
-  // If you see an error about overriding the following function or
-  // about it being private, you have mis-spelled SetUp() as Setup().
-  struct Setup_should_be_spelled_SetUp {};
-  virtual Setup_should_be_spelled_SetUp* Setup() { return nullptr; }
-};
-
-#if GTEST_HAS_EXCEPTIONS
-
-// Exception which can be thrown from TestEventListener::OnTestPartResult.
-class GTEST_API_ AssertionException
-    : public internal::GoogleTestFailureException {
- public:
-  explicit AssertionException(const TestPartResult& result)
-      : GoogleTestFailureException(result) {}
-};
-
-#endif  // GTEST_HAS_EXCEPTIONS
-
-// The interface for tracing execution of tests. The methods are organized in
-// the order the corresponding events are fired.
-class TestEventListener {
- public:
-  virtual ~TestEventListener() {}
-
-  // Fired before any test activity starts.
-  virtual void OnTestProgramStart(const UnitTest& unit_test) = 0;
-
-  // Fired before each iteration of tests starts.  There may be more than
-  // one iteration if GTEST_FLAG(repeat) is set. iteration is the iteration
-  // index, starting from 0.
-  virtual void OnTestIterationStart(const UnitTest& unit_test,
-                                    int iteration) = 0;
-
-  // Fired before environment set-up for each iteration of tests starts.
-  virtual void OnEnvironmentsSetUpStart(const UnitTest& unit_test) = 0;
-
-  // Fired after environment set-up for each iteration of tests ends.
-  virtual void OnEnvironmentsSetUpEnd(const UnitTest& unit_test) = 0;
-
-  // Fired before the test suite starts.
-  virtual void OnTestSuiteStart(const TestSuite& /*test_suite*/) {}
-
-  //  Legacy API is deprecated but still available
-#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
-  virtual void OnTestCaseStart(const TestCase& /*test_case*/) {}
-#endif  //  GTEST_REMOVE_LEGACY_TEST_CASEAPI_
-
-  // Fired before the test starts.
-  virtual void OnTestStart(const TestInfo& test_info) = 0;
-
-  // Fired when a test is disabled
-  virtual void OnTestDisabled(const TestInfo& /*test_info*/) {}
-
-  // Fired after a failed assertion or a SUCCEED() invocation.
-  // If you want to throw an exception from this function to skip to the next
-  // TEST, it must be AssertionException defined above, or inherited from it.
-  virtual void OnTestPartResult(const TestPartResult& test_part_result) = 0;
-
-  // Fired after the test ends.
-  virtual void OnTestEnd(const TestInfo& test_info) = 0;
-
-  // Fired after the test suite ends.
-  virtual void OnTestSuiteEnd(const TestSuite& /*test_suite*/) {}
-
-//  Legacy API is deprecated but still available
-#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
-  virtual void OnTestCaseEnd(const TestCase& /*test_case*/) {}
-#endif  //  GTEST_REMOVE_LEGACY_TEST_CASEAPI_
-
-  // Fired before environment tear-down for each iteration of tests starts.
-  virtual void OnEnvironmentsTearDownStart(const UnitTest& unit_test) = 0;
-
-  // Fired after environment tear-down for each iteration of tests ends.
-  virtual void OnEnvironmentsTearDownEnd(const UnitTest& unit_test) = 0;
-
-  // Fired after each iteration of tests finishes.
-  virtual void OnTestIterationEnd(const UnitTest& unit_test, int iteration) = 0;
-
-  // Fired after all test activities have ended.
-  virtual void OnTestProgramEnd(const UnitTest& unit_test) = 0;
-};
-
-// The convenience class for users who need to override just one or two
-// methods and are not concerned that a possible change to a signature of
-// the methods they override will not be caught during the build.  For
-// comments about each method please see the definition of TestEventListener
-// above.
-class EmptyTestEventListener : public TestEventListener {
- public:
-  void OnTestProgramStart(const UnitTest& /*unit_test*/) override {}
-  void OnTestIterationStart(const UnitTest& /*unit_test*/,
-                            int /*iteration*/) override {}
-  void OnEnvironmentsSetUpStart(const UnitTest& /*unit_test*/) override {}
-  void OnEnvironmentsSetUpEnd(const UnitTest& /*unit_test*/) override {}
-  void OnTestSuiteStart(const TestSuite& /*test_suite*/) override {}
-//  Legacy API is deprecated but still available
-#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
-  void OnTestCaseStart(const TestCase& /*test_case*/) override {}
-#endif  //  GTEST_REMOVE_LEGACY_TEST_CASEAPI_
-
-  void OnTestStart(const TestInfo& /*test_info*/) override {}
-  void OnTestDisabled(const TestInfo& /*test_info*/) override {}
-  void OnTestPartResult(const TestPartResult& /*test_part_result*/) override {}
-  void OnTestEnd(const TestInfo& /*test_info*/) override {}
-  void OnTestSuiteEnd(const TestSuite& /*test_suite*/) override {}
-#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
-  void OnTestCaseEnd(const TestCase& /*test_case*/) override {}
-#endif  //  GTEST_REMOVE_LEGACY_TEST_CASEAPI_
-
-  void OnEnvironmentsTearDownStart(const UnitTest& /*unit_test*/) override {}
-  void OnEnvironmentsTearDownEnd(const UnitTest& /*unit_test*/) override {}
-  void OnTestIterationEnd(const UnitTest& /*unit_test*/,
-                          int /*iteration*/) override {}
-  void OnTestProgramEnd(const UnitTest& /*unit_test*/) override {}
-};
-
-// TestEventListeners lets users add listeners to track events in Google Test.
-class GTEST_API_ TestEventListeners {
- public:
-  TestEventListeners();
-  ~TestEventListeners();
-
-  // Appends an event listener to the end of the list. Google Test assumes
-  // the ownership of the listener (i.e. it will delete the listener when
-  // the test program finishes).
-  void Append(TestEventListener* listener);
-
-  // Removes the given event listener from the list and returns it.  It then
-  // becomes the caller's responsibility to delete the listener. Returns
-  // NULL if the listener is not found in the list.
-  TestEventListener* Release(TestEventListener* listener);
-
-  // Returns the standard listener responsible for the default console
-  // output.  Can be removed from the listeners list to shut down default
-  // console output.  Note that removing this object from the listener list
-  // with Release transfers its ownership to the caller and makes this
-  // function return NULL the next time.
-  TestEventListener* default_result_printer() const {
-    return default_result_printer_;
-  }
-
-  // Returns the standard listener responsible for the default XML output
-  // controlled by the --gtest_output=xml flag.  Can be removed from the
-  // listeners list by users who want to shut down the default XML output
-  // controlled by this flag and substitute it with custom one.  Note that
-  // removing this object from the listener list with Release transfers its
-  // ownership to the caller and makes this function return NULL the next
-  // time.
-  TestEventListener* default_xml_generator() const {
-    return default_xml_generator_;
-  }
-
- private:
-  friend class TestSuite;
-  friend class TestInfo;
-  friend class internal::DefaultGlobalTestPartResultReporter;
-  friend class internal::NoExecDeathTest;
-  friend class internal::TestEventListenersAccessor;
-  friend class internal::UnitTestImpl;
-
-  // Returns repeater that broadcasts the TestEventListener events to all
-  // subscribers.
-  TestEventListener* repeater();
-
-  // Sets the default_result_printer attribute to the provided listener.
-  // The listener is also added to the listener list and previous
-  // default_result_printer is removed from it and deleted. The listener can
-  // also be NULL in which case it will not be added to the list. Does
-  // nothing if the previous and the current listener objects are the same.
-  void SetDefaultResultPrinter(TestEventListener* listener);
-
-  // Sets the default_xml_generator attribute to the provided listener.  The
-  // listener is also added to the listener list and previous
-  // default_xml_generator is removed from it and deleted. The listener can
-  // also be NULL in which case it will not be added to the list. Does
-  // nothing if the previous and the current listener objects are the same.
-  void SetDefaultXmlGenerator(TestEventListener* listener);
-
-  // Controls whether events will be forwarded by the repeater to the
-  // listeners in the list.
-  bool EventForwardingEnabled() const;
-  void SuppressEventForwarding();
-
-  // The actual list of listeners.
-  internal::TestEventRepeater* repeater_;
-  // Listener responsible for the standard result output.
-  TestEventListener* default_result_printer_;
-  // Listener responsible for the creation of the XML output file.
-  TestEventListener* default_xml_generator_;
-
-  // We disallow copying TestEventListeners.
-  TestEventListeners(const TestEventListeners&) = delete;
-  TestEventListeners& operator=(const TestEventListeners&) = delete;
-};
-
-// A UnitTest consists of a vector of TestSuites.
-//
-// This is a singleton class.  The only instance of UnitTest is
-// created when UnitTest::GetInstance() is first called.  This
-// instance is never deleted.
-//
-// UnitTest is not copyable.
-//
-// This class is thread-safe as long as the methods are called
-// according to their specification.
-class GTEST_API_ UnitTest {
- public:
-  // Gets the singleton UnitTest object.  The first time this method
-  // is called, a UnitTest object is constructed and returned.
-  // Consecutive calls will return the same object.
-  static UnitTest* GetInstance();
-
-  // Runs all tests in this UnitTest object and prints the result.
-  // Returns 0 if successful, or 1 otherwise.
-  //
-  // This method can only be called from the main thread.
-  //
-  // INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
-  int Run() GTEST_MUST_USE_RESULT_;
-
-  // Returns the working directory when the first TEST() or TEST_F()
-  // was executed.  The UnitTest object owns the string.
-  const char* original_working_dir() const;
-
-  // Returns the TestSuite object for the test that's currently running,
-  // or NULL if no test is running.
-  const TestSuite* current_test_suite() const GTEST_LOCK_EXCLUDED_(mutex_);
-
-// Legacy API is still available but deprecated
-#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
-  const TestCase* current_test_case() const GTEST_LOCK_EXCLUDED_(mutex_);
-#endif
-
-  // Returns the TestInfo object for the test that's currently running,
-  // or NULL if no test is running.
-  const TestInfo* current_test_info() const GTEST_LOCK_EXCLUDED_(mutex_);
-
-  // Returns the random seed used at the start of the current test run.
-  int random_seed() const;
-
-  // Returns the ParameterizedTestSuiteRegistry object used to keep track of
-  // value-parameterized tests and instantiate and register them.
-  //
-  // INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
-  internal::ParameterizedTestSuiteRegistry& parameterized_test_registry()
-      GTEST_LOCK_EXCLUDED_(mutex_);
-
-  // Gets the number of successful test suites.
-  int successful_test_suite_count() const;
-
-  // Gets the number of failed test suites.
-  int failed_test_suite_count() const;
-
-  // Gets the number of all test suites.
-  int total_test_suite_count() const;
-
-  // Gets the number of all test suites that contain at least one test
-  // that should run.
-  int test_suite_to_run_count() const;
-
-  //  Legacy API is deprecated but still available
-#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
-  int successful_test_case_count() const;
-  int failed_test_case_count() const;
-  int total_test_case_count() const;
-  int test_case_to_run_count() const;
-#endif  //  GTEST_REMOVE_LEGACY_TEST_CASEAPI_
-
-  // Gets the number of successful tests.
-  int successful_test_count() const;
-
-  // Gets the number of skipped tests.
-  int skipped_test_count() const;
-
-  // Gets the number of failed tests.
-  int failed_test_count() const;
-
-  // Gets the number of disabled tests that will be reported in the XML report.
-  int reportable_disabled_test_count() const;
-
-  // Gets the number of disabled tests.
-  int disabled_test_count() const;
-
-  // Gets the number of tests to be printed in the XML report.
-  int reportable_test_count() const;
-
-  // Gets the number of all tests.
-  int total_test_count() const;
-
-  // Gets the number of tests that should run.
-  int test_to_run_count() const;
-
-  // Gets the time of the test program start, in ms from the start of the
-  // UNIX epoch.
-  TimeInMillis start_timestamp() const;
-
-  // Gets the elapsed time, in milliseconds.
-  TimeInMillis elapsed_time() const;
-
-  // Returns true if and only if the unit test passed (i.e. all test suites
-  // passed).
-  bool Passed() const;
-
-  // Returns true if and only if the unit test failed (i.e. some test suite
-  // failed or something outside of all tests failed).
-  bool Failed() const;
-
-  // Gets the i-th test suite among all the test suites. i can range from 0 to
-  // total_test_suite_count() - 1. If i is not in that range, returns NULL.
-  const TestSuite* GetTestSuite(int i) const;
-
-//  Legacy API is deprecated but still available
-#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
-  const TestCase* GetTestCase(int i) const;
-#endif  //  GTEST_REMOVE_LEGACY_TEST_CASEAPI_
-
-  // Returns the TestResult containing information on test failures and
-  // properties logged outside of individual test suites.
-  const TestResult& ad_hoc_test_result() const;
-
-  // Returns the list of event listeners that can be used to track events
-  // inside Google Test.
-  TestEventListeners& listeners();
-
- private:
-  // Registers and returns a global test environment.  When a test
-  // program is run, all global test environments will be set-up in
-  // the order they were registered.  After all tests in the program
-  // have finished, all global test environments will be torn-down in
-  // the *reverse* order they were registered.
-  //
-  // The UnitTest object takes ownership of the given environment.
-  //
-  // This method can only be called from the main thread.
-  Environment* AddEnvironment(Environment* env);
-
-  // Adds a TestPartResult to the current TestResult object.  All
-  // Google Test assertion macros (e.g. ASSERT_TRUE, EXPECT_EQ, etc)
-  // eventually call this to report their results.  The user code
-  // should use the assertion macros instead of calling this directly.
-  void AddTestPartResult(TestPartResult::Type result_type,
-                         const char* file_name, int line_number,
-                         const std::string& message,
-                         const std::string& os_stack_trace)
-      GTEST_LOCK_EXCLUDED_(mutex_);
-
-  // Adds a TestProperty to the current TestResult object when invoked from
-  // inside a test, to current TestSuite's ad_hoc_test_result_ when invoked
-  // from SetUpTestSuite or TearDownTestSuite, or to the global property set
-  // when invoked elsewhere.  If the result already contains a property with
-  // the same key, the value will be updated.
-  void RecordProperty(const std::string& key, const std::string& value);
-
-  // Gets the i-th test suite among all the test suites. i can range from 0 to
-  // total_test_suite_count() - 1. If i is not in that range, returns NULL.
-  TestSuite* GetMutableTestSuite(int i);
-
-  // Accessors for the implementation object.
-  internal::UnitTestImpl* impl() { return impl_; }
-  const internal::UnitTestImpl* impl() const { return impl_; }
-
-  // These classes and functions are friends as they need to access private
-  // members of UnitTest.
-  friend class ScopedTrace;
-  friend class Test;
-  friend class internal::AssertHelper;
-  friend class internal::StreamingListenerTest;
-  friend class internal::UnitTestRecordPropertyTestHelper;
-  friend Environment* AddGlobalTestEnvironment(Environment* env);
-  friend std::set<std::string>* internal::GetIgnoredParameterizedTestSuites();
-  friend internal::UnitTestImpl* internal::GetUnitTestImpl();
-  friend void internal::ReportFailureInUnknownLocation(
-      TestPartResult::Type result_type, const std::string& message);
-
-  // Creates an empty UnitTest.
-  UnitTest();
-
-  // D'tor
-  virtual ~UnitTest();
-
-  // Pushes a trace defined by SCOPED_TRACE() on to the per-thread
-  // Google Test trace stack.
-  void PushGTestTrace(const internal::TraceInfo& trace)
-      GTEST_LOCK_EXCLUDED_(mutex_);
-
-  // Pops a trace from the per-thread Google Test trace stack.
-  void PopGTestTrace() GTEST_LOCK_EXCLUDED_(mutex_);
-
-  // Protects mutable state in *impl_.  This is mutable as some const
-  // methods need to lock it too.
-  mutable internal::Mutex mutex_;
-
-  // Opaque implementation object.  This field is never changed once
-  // the object is constructed.  We don't mark it as const here, as
-  // doing so will cause a warning in the constructor of UnitTest.
-  // Mutable state in *impl_ is protected by mutex_.
-  internal::UnitTestImpl* impl_;
-
-  // We disallow copying UnitTest.
-  UnitTest(const UnitTest&) = delete;
-  UnitTest& operator=(const UnitTest&) = delete;
-};
-
-// A convenient wrapper for adding an environment for the test
-// program.
-//
-// You should call this before RUN_ALL_TESTS() is called, probably in
-// main().  If you use gtest_main, you need to call this before main()
-// starts for it to take effect.  For example, you can define a global
-// variable like this:
-//
-//   testing::Environment* const foo_env =
-//       testing::AddGlobalTestEnvironment(new FooEnvironment);
-//
-// However, we strongly recommend you to write your own main() and
-// call AddGlobalTestEnvironment() there, as relying on initialization
-// of global variables makes the code harder to read and may cause
-// problems when you register multiple environments from different
-// translation units and the environments have dependencies among them
-// (remember that the compiler doesn't guarantee the order in which
-// global variables from different translation units are initialized).
-inline Environment* AddGlobalTestEnvironment(Environment* env) {
-  return UnitTest::GetInstance()->AddEnvironment(env);
-}
-
-// Initializes Google Test.  This must be called before calling
-// RUN_ALL_TESTS().  In particular, it parses a command line for the
-// flags that Google Test recognizes.  Whenever a Google Test flag is
-// seen, it is removed from argv, and *argc is decremented.
-//
-// No value is returned.  Instead, the Google Test flag variables are
-// updated.
-//
-// Calling the function for the second time has no user-visible effect.
-GTEST_API_ void InitGoogleTest(int* argc, char** argv);
-
-// This overloaded version can be used in Windows programs compiled in
-// UNICODE mode.
-GTEST_API_ void InitGoogleTest(int* argc, wchar_t** argv);
-
-// This overloaded version can be used on Arduino/embedded platforms where
-// there is no argc/argv.
-GTEST_API_ void InitGoogleTest();
-
-namespace internal {
-
-// Separate the error generating code from the code path to reduce the stack
-// frame size of CmpHelperEQ. This helps reduce the overhead of some sanitizers
-// when calling EXPECT_* in a tight loop.
-template <typename T1, typename T2>
-AssertionResult CmpHelperEQFailure(const char* lhs_expression,
-                                   const char* rhs_expression, const T1& lhs,
-                                   const T2& rhs) {
-  return EqFailure(lhs_expression, rhs_expression,
-                   FormatForComparisonFailureMessage(lhs, rhs),
-                   FormatForComparisonFailureMessage(rhs, lhs), false);
-}
-
-// This block of code defines operator==/!=
-// to block lexical scope lookup.
-// It prevents using invalid operator==/!= defined at namespace scope.
-struct faketype {};
-inline bool operator==(faketype, faketype) { return true; }
-inline bool operator!=(faketype, faketype) { return false; }
-
-// The helper function for {ASSERT|EXPECT}_EQ.
-template <typename T1, typename T2>
-AssertionResult CmpHelperEQ(const char* lhs_expression,
-                            const char* rhs_expression, const T1& lhs,
-                            const T2& rhs) {
-  if (lhs == rhs) {
-    return AssertionSuccess();
-  }
-
-  return CmpHelperEQFailure(lhs_expression, rhs_expression, lhs, rhs);
-}
-
-class EqHelper {
- public:
-  // This templatized version is for the general case.
-  template <
-      typename T1, typename T2,
-      // Disable this overload for cases where one argument is a pointer
-      // and the other is the null pointer constant.
-      typename std::enable_if<!std::is_integral<T1>::value ||
-                              !std::is_pointer<T2>::value>::type* = nullptr>
-  static AssertionResult Compare(const char* lhs_expression,
-                                 const char* rhs_expression, const T1& lhs,
-                                 const T2& rhs) {
-    return CmpHelperEQ(lhs_expression, rhs_expression, lhs, rhs);
-  }
-
-  // With this overloaded version, we allow anonymous enums to be used
-  // in {ASSERT|EXPECT}_EQ when compiled with gcc 4, as anonymous
-  // enums can be implicitly cast to BiggestInt.
-  //
-  // Even though its body looks the same as the above version, we
-  // cannot merge the two, as it will make anonymous enums unhappy.
-  static AssertionResult Compare(const char* lhs_expression,
-                                 const char* rhs_expression, BiggestInt lhs,
-                                 BiggestInt rhs) {
-    return CmpHelperEQ(lhs_expression, rhs_expression, lhs, rhs);
-  }
-
-  template <typename T>
-  static AssertionResult Compare(
-      const char* lhs_expression, const char* rhs_expression,
-      // Handle cases where '0' is used as a null pointer literal.
-      std::nullptr_t /* lhs */, T* rhs) {
-    // We already know that 'lhs' is a null pointer.
-    return CmpHelperEQ(lhs_expression, rhs_expression, static_cast<T*>(nullptr),
-                       rhs);
-  }
-};
-
-// Separate the error generating code from the code path to reduce the stack
-// frame size of CmpHelperOP. This helps reduce the overhead of some sanitizers
-// when calling EXPECT_OP in a tight loop.
-template <typename T1, typename T2>
-AssertionResult CmpHelperOpFailure(const char* expr1, const char* expr2,
-                                   const T1& val1, const T2& val2,
-                                   const char* op) {
-  return AssertionFailure()
-         << "Expected: (" << expr1 << ") " << op << " (" << expr2
-         << "), actual: " << FormatForComparisonFailureMessage(val1, val2)
-         << " vs " << FormatForComparisonFailureMessage(val2, val1);
-}
-
-// A macro for implementing the helper functions needed to implement
-// ASSERT_?? and EXPECT_??.  It is here just to avoid copy-and-paste
-// of similar code.
-//
-// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
-
-#define GTEST_IMPL_CMP_HELPER_(op_name, op)                                \
-  template <typename T1, typename T2>                                      \
-  AssertionResult CmpHelper##op_name(const char* expr1, const char* expr2, \
-                                     const T1& val1, const T2& val2) {     \
-    if (val1 op val2) {                                                    \
-      return AssertionSuccess();                                           \
-    } else {                                                               \
-      return CmpHelperOpFailure(expr1, expr2, val1, val2, #op);            \
-    }                                                                      \
-  }
-
-// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
-
-// Implements the helper function for {ASSERT|EXPECT}_NE
-GTEST_IMPL_CMP_HELPER_(NE, !=)
-// Implements the helper function for {ASSERT|EXPECT}_LE
-GTEST_IMPL_CMP_HELPER_(LE, <=)
-// Implements the helper function for {ASSERT|EXPECT}_LT
-GTEST_IMPL_CMP_HELPER_(LT, <)
-// Implements the helper function for {ASSERT|EXPECT}_GE
-GTEST_IMPL_CMP_HELPER_(GE, >=)
-// Implements the helper function for {ASSERT|EXPECT}_GT
-GTEST_IMPL_CMP_HELPER_(GT, >)
-
-#undef GTEST_IMPL_CMP_HELPER_
-
-// The helper function for {ASSERT|EXPECT}_STREQ.
-//
-// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
-GTEST_API_ AssertionResult CmpHelperSTREQ(const char* s1_expression,
-                                          const char* s2_expression,
-                                          const char* s1, const char* s2);
-
-// The helper function for {ASSERT|EXPECT}_STRCASEEQ.
-//
-// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
-GTEST_API_ AssertionResult CmpHelperSTRCASEEQ(const char* s1_expression,
-                                              const char* s2_expression,
-                                              const char* s1, const char* s2);
-
-// The helper function for {ASSERT|EXPECT}_STRNE.
-//
-// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
-GTEST_API_ AssertionResult CmpHelperSTRNE(const char* s1_expression,
-                                          const char* s2_expression,
-                                          const char* s1, const char* s2);
-
-// The helper function for {ASSERT|EXPECT}_STRCASENE.
-//
-// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
-GTEST_API_ AssertionResult CmpHelperSTRCASENE(const char* s1_expression,
-                                              const char* s2_expression,
-                                              const char* s1, const char* s2);
-
-// Helper function for *_STREQ on wide strings.
-//
-// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
-GTEST_API_ AssertionResult CmpHelperSTREQ(const char* s1_expression,
-                                          const char* s2_expression,
-                                          const wchar_t* s1, const wchar_t* s2);
-
-// Helper function for *_STRNE on wide strings.
-//
-// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
-GTEST_API_ AssertionResult CmpHelperSTRNE(const char* s1_expression,
-                                          const char* s2_expression,
-                                          const wchar_t* s1, const wchar_t* s2);
-
-}  // namespace internal
-
-// IsSubstring() and IsNotSubstring() are intended to be used as the
-// first argument to {EXPECT,ASSERT}_PRED_FORMAT2(), not by
-// themselves.  They check whether needle is a substring of haystack
-// (NULL is considered a substring of itself only), and return an
-// appropriate error message when they fail.
-//
-// The {needle,haystack}_expr arguments are the stringified
-// expressions that generated the two real arguments.
-GTEST_API_ AssertionResult IsSubstring(const char* needle_expr,
-                                       const char* haystack_expr,
-                                       const char* needle,
-                                       const char* haystack);
-GTEST_API_ AssertionResult IsSubstring(const char* needle_expr,
-                                       const char* haystack_expr,
-                                       const wchar_t* needle,
-                                       const wchar_t* haystack);
-GTEST_API_ AssertionResult IsNotSubstring(const char* needle_expr,
-                                          const char* haystack_expr,
-                                          const char* needle,
-                                          const char* haystack);
-GTEST_API_ AssertionResult IsNotSubstring(const char* needle_expr,
-                                          const char* haystack_expr,
-                                          const wchar_t* needle,
-                                          const wchar_t* haystack);
-GTEST_API_ AssertionResult IsSubstring(const char* needle_expr,
-                                       const char* haystack_expr,
-                                       const ::std::string& needle,
-                                       const ::std::string& haystack);
-GTEST_API_ AssertionResult IsNotSubstring(const char* needle_expr,
-                                          const char* haystack_expr,
-                                          const ::std::string& needle,
-                                          const ::std::string& haystack);
-
-#if GTEST_HAS_STD_WSTRING
-GTEST_API_ AssertionResult IsSubstring(const char* needle_expr,
-                                       const char* haystack_expr,
-                                       const ::std::wstring& needle,
-                                       const ::std::wstring& haystack);
-GTEST_API_ AssertionResult IsNotSubstring(const char* needle_expr,
-                                          const char* haystack_expr,
-                                          const ::std::wstring& needle,
-                                          const ::std::wstring& haystack);
-#endif  // GTEST_HAS_STD_WSTRING
-
-namespace internal {
-
-// Helper template function for comparing floating-points.
-//
-// Template parameter:
-//
-//   RawType: the raw floating-point type (either float or double)
-//
-// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
-template <typename RawType>
-AssertionResult CmpHelperFloatingPointEQ(const char* lhs_expression,
-                                         const char* rhs_expression,
-                                         RawType lhs_value, RawType rhs_value) {
-  const FloatingPoint<RawType> lhs(lhs_value), rhs(rhs_value);
-
-  if (lhs.AlmostEquals(rhs)) {
-    return AssertionSuccess();
-  }
-
-  ::std::stringstream lhs_ss;
-  lhs_ss << std::setprecision(std::numeric_limits<RawType>::digits10 + 2)
-         << lhs_value;
-
-  ::std::stringstream rhs_ss;
-  rhs_ss << std::setprecision(std::numeric_limits<RawType>::digits10 + 2)
-         << rhs_value;
-
-  return EqFailure(lhs_expression, rhs_expression,
-                   StringStreamToString(&lhs_ss), StringStreamToString(&rhs_ss),
-                   false);
-}
-
-// Helper function for implementing ASSERT_NEAR.
-//
-// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
-GTEST_API_ AssertionResult DoubleNearPredFormat(const char* expr1,
-                                                const char* expr2,
-                                                const char* abs_error_expr,
-                                                double val1, double val2,
-                                                double abs_error);
-
-// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
-// A class that enables one to stream messages to assertion macros
-class GTEST_API_ AssertHelper {
- public:
-  // Constructor.
-  AssertHelper(TestPartResult::Type type, const char* file, int line,
-               const char* message);
-  ~AssertHelper();
-
-  // Message assignment is a semantic trick to enable assertion
-  // streaming; see the GTEST_MESSAGE_ macro below.
-  void operator=(const Message& message) const;
-
- private:
-  // We put our data in a struct so that the size of the AssertHelper class can
-  // be as small as possible.  This is important because gcc is incapable of
-  // re-using stack space even for temporary variables, so every EXPECT_EQ
-  // reserves stack space for another AssertHelper.
-  struct AssertHelperData {
-    AssertHelperData(TestPartResult::Type t, const char* srcfile, int line_num,
-                     const char* msg)
-        : type(t), file(srcfile), line(line_num), message(msg) {}
-
-    TestPartResult::Type const type;
-    const char* const file;
-    int const line;
-    std::string const message;
-
-   private:
-    AssertHelperData(const AssertHelperData&) = delete;
-    AssertHelperData& operator=(const AssertHelperData&) = delete;
-  };
-
-  AssertHelperData* const data_;
-
-  AssertHelper(const AssertHelper&) = delete;
-  AssertHelper& operator=(const AssertHelper&) = delete;
-};
-
-}  // namespace internal
-
-// The pure interface class that all value-parameterized tests inherit from.
-// A value-parameterized class must inherit from both ::testing::Test and
-// ::testing::WithParamInterface. In most cases that just means inheriting
-// from ::testing::TestWithParam, but more complicated test hierarchies
-// may need to inherit from Test and WithParamInterface at different levels.
-//
-// This interface has support for accessing the test parameter value via
-// the GetParam() method.
-//
-// Use it with one of the parameter generator defining functions, like Range(),
-// Values(), ValuesIn(), Bool(), Combine(), and ConvertGenerator<T>().
-//
-// class FooTest : public ::testing::TestWithParam<int> {
-//  protected:
-//   FooTest() {
-//     // Can use GetParam() here.
-//   }
-//   ~FooTest() override {
-//     // Can use GetParam() here.
-//   }
-//   void SetUp() override {
-//     // Can use GetParam() here.
-//   }
-//   void TearDown override {
-//     // Can use GetParam() here.
-//   }
-// };
-// TEST_P(FooTest, DoesBar) {
-//   // Can use GetParam() method here.
-//   Foo foo;
-//   ASSERT_TRUE(foo.DoesBar(GetParam()));
-// }
-// INSTANTIATE_TEST_SUITE_P(OneToTenRange, FooTest, ::testing::Range(1, 10));
-
-template <typename T>
-class WithParamInterface {
- public:
-  typedef T ParamType;
-  virtual ~WithParamInterface() {}
-
-  // The current parameter value. Is also available in the test fixture's
-  // constructor.
-  static const ParamType& GetParam() {
-    GTEST_CHECK_(parameter_ != nullptr)
-        << "GetParam() can only be called inside a value-parameterized test "
-        << "-- did you intend to write TEST_P instead of TEST_F?";
-    return *parameter_;
-  }
-
- private:
-  // Sets parameter value. The caller is responsible for making sure the value
-  // remains alive and unchanged throughout the current test.
-  static void SetParam(const ParamType* parameter) { parameter_ = parameter; }
-
-  // Static value used for accessing parameter during a test lifetime.
-  static const ParamType* parameter_;
-
-  // TestClass must be a subclass of WithParamInterface<T> and Test.
-  template <class TestClass>
-  friend class internal::ParameterizedTestFactory;
-};
-
-template <typename T>
-const T* WithParamInterface<T>::parameter_ = nullptr;
-
-// Most value-parameterized classes can ignore the existence of
-// WithParamInterface, and can just inherit from ::testing::TestWithParam.
-
-template <typename T>
-class TestWithParam : public Test, public WithParamInterface<T> {};
-
-// Macros for indicating success/failure in test code.
-
-// Skips test in runtime.
-// Skipping test aborts current function.
-// Skipped tests are neither successful nor failed.
-#define GTEST_SKIP() GTEST_SKIP_("")
-
-// ADD_FAILURE unconditionally adds a failure to the current test.
-// SUCCEED generates a success - it doesn't automatically make the
-// current test successful, as a test is only successful when it has
-// no failure.
-//
-// EXPECT_* verifies that a certain condition is satisfied.  If not,
-// it behaves like ADD_FAILURE.  In particular:
-//
-//   EXPECT_TRUE  verifies that a Boolean condition is true.
-//   EXPECT_FALSE verifies that a Boolean condition is false.
-//
-// FAIL and ASSERT_* are similar to ADD_FAILURE and EXPECT_*, except
-// that they will also abort the current function on failure.  People
-// usually want the fail-fast behavior of FAIL and ASSERT_*, but those
-// writing data-driven tests often find themselves using ADD_FAILURE
-// and EXPECT_* more.
-
-// Generates a nonfatal failure with a generic message.
-#define ADD_FAILURE() GTEST_NONFATAL_FAILURE_("Failed")
-
-// Generates a nonfatal failure at the given source file location with
-// a generic message.
-#define ADD_FAILURE_AT(file, line)        \
-  GTEST_MESSAGE_AT_(file, line, "Failed", \
-                    ::testing::TestPartResult::kNonFatalFailure)
-
-// Generates a fatal failure with a generic message.
-#define GTEST_FAIL() GTEST_FATAL_FAILURE_("Failed")
-
-// Like GTEST_FAIL(), but at the given source file location.
-#define GTEST_FAIL_AT(file, line)                \
-  return GTEST_MESSAGE_AT_(file, line, "Failed", \
-                           ::testing::TestPartResult::kFatalFailure)
-
-// Define this macro to 1 to omit the definition of FAIL(), which is a
-// generic name and clashes with some other libraries.
-#if !GTEST_DONT_DEFINE_FAIL
-#define FAIL() GTEST_FAIL()
-#endif
-
-// Generates a success with a generic message.
-#define GTEST_SUCCEED() GTEST_SUCCESS_("Succeeded")
-
-// Define this macro to 1 to omit the definition of SUCCEED(), which
-// is a generic name and clashes with some other libraries.
-#if !GTEST_DONT_DEFINE_SUCCEED
-#define SUCCEED() GTEST_SUCCEED()
-#endif
-
-// Macros for testing exceptions.
-//
-//    * {ASSERT|EXPECT}_THROW(statement, expected_exception):
-//         Tests that the statement throws the expected exception.
-//    * {ASSERT|EXPECT}_NO_THROW(statement):
-//         Tests that the statement doesn't throw any exception.
-//    * {ASSERT|EXPECT}_ANY_THROW(statement):
-//         Tests that the statement throws an exception.
-
-#define EXPECT_THROW(statement, expected_exception) \
-  GTEST_TEST_THROW_(statement, expected_exception, GTEST_NONFATAL_FAILURE_)
-#define EXPECT_NO_THROW(statement) \
-  GTEST_TEST_NO_THROW_(statement, GTEST_NONFATAL_FAILURE_)
-#define EXPECT_ANY_THROW(statement) \
-  GTEST_TEST_ANY_THROW_(statement, GTEST_NONFATAL_FAILURE_)
-#define ASSERT_THROW(statement, expected_exception) \
-  GTEST_TEST_THROW_(statement, expected_exception, GTEST_FATAL_FAILURE_)
-#define ASSERT_NO_THROW(statement) \
-  GTEST_TEST_NO_THROW_(statement, GTEST_FATAL_FAILURE_)
-#define ASSERT_ANY_THROW(statement) \
-  GTEST_TEST_ANY_THROW_(statement, GTEST_FATAL_FAILURE_)
-
-// Boolean assertions. Condition can be either a Boolean expression or an
-// AssertionResult. For more information on how to use AssertionResult with
-// these macros see comments on that class.
-#define GTEST_EXPECT_TRUE(condition)                      \
-  GTEST_TEST_BOOLEAN_(condition, #condition, false, true, \
-                      GTEST_NONFATAL_FAILURE_)
-#define GTEST_EXPECT_FALSE(condition)                        \
-  GTEST_TEST_BOOLEAN_(!(condition), #condition, true, false, \
-                      GTEST_NONFATAL_FAILURE_)
-#define GTEST_ASSERT_TRUE(condition) \
-  GTEST_TEST_BOOLEAN_(condition, #condition, false, true, GTEST_FATAL_FAILURE_)
-#define GTEST_ASSERT_FALSE(condition)                        \
-  GTEST_TEST_BOOLEAN_(!(condition), #condition, true, false, \
-                      GTEST_FATAL_FAILURE_)
-
-// Define these macros to 1 to omit the definition of the corresponding
-// EXPECT or ASSERT, which clashes with some users' own code.
-
-#if !GTEST_DONT_DEFINE_EXPECT_TRUE
-#define EXPECT_TRUE(condition) GTEST_EXPECT_TRUE(condition)
-#endif
-
-#if !GTEST_DONT_DEFINE_EXPECT_FALSE
-#define EXPECT_FALSE(condition) GTEST_EXPECT_FALSE(condition)
-#endif
-
-#if !GTEST_DONT_DEFINE_ASSERT_TRUE
-#define ASSERT_TRUE(condition) GTEST_ASSERT_TRUE(condition)
-#endif
-
-#if !GTEST_DONT_DEFINE_ASSERT_FALSE
-#define ASSERT_FALSE(condition) GTEST_ASSERT_FALSE(condition)
-#endif
-
-// Macros for testing equalities and inequalities.
-//
-//    * {ASSERT|EXPECT}_EQ(v1, v2): Tests that v1 == v2
-//    * {ASSERT|EXPECT}_NE(v1, v2): Tests that v1 != v2
-//    * {ASSERT|EXPECT}_LT(v1, v2): Tests that v1 < v2
-//    * {ASSERT|EXPECT}_LE(v1, v2): Tests that v1 <= v2
-//    * {ASSERT|EXPECT}_GT(v1, v2): Tests that v1 > v2
-//    * {ASSERT|EXPECT}_GE(v1, v2): Tests that v1 >= v2
-//
-// When they are not, Google Test prints both the tested expressions and
-// their actual values.  The values must be compatible built-in types,
-// or you will get a compiler error.  By "compatible" we mean that the
-// values can be compared by the respective operator.
-//
-// Note:
-//
-//   1. It is possible to make a user-defined type work with
-//   {ASSERT|EXPECT}_??(), but that requires overloading the
-//   comparison operators and is thus discouraged by the Google C++
-//   Usage Guide.  Therefore, you are advised to use the
-//   {ASSERT|EXPECT}_TRUE() macro to assert that two objects are
-//   equal.
-//
-//   2. The {ASSERT|EXPECT}_??() macros do pointer comparisons on
-//   pointers (in particular, C strings).  Therefore, if you use it
-//   with two C strings, you are testing how their locations in memory
-//   are related, not how their content is related.  To compare two C
-//   strings by content, use {ASSERT|EXPECT}_STR*().
-//
-//   3. {ASSERT|EXPECT}_EQ(v1, v2) is preferred to
-//   {ASSERT|EXPECT}_TRUE(v1 == v2), as the former tells you
-//   what the actual value is when it fails, and similarly for the
-//   other comparisons.
-//
-//   4. Do not depend on the order in which {ASSERT|EXPECT}_??()
-//   evaluate their arguments, which is undefined.
-//
-//   5. These macros evaluate their arguments exactly once.
-//
-// Examples:
-//
-//   EXPECT_NE(Foo(), 5);
-//   EXPECT_EQ(a_pointer, NULL);
-//   ASSERT_LT(i, array_size);
-//   ASSERT_GT(records.size(), 0) << "There is no record left.";
-
-#define EXPECT_EQ(val1, val2) \
-  EXPECT_PRED_FORMAT2(::testing::internal::EqHelper::Compare, val1, val2)
-#define EXPECT_NE(val1, val2) \
-  EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperNE, val1, val2)
-#define EXPECT_LE(val1, val2) \
-  EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperLE, val1, val2)
-#define EXPECT_LT(val1, val2) \
-  EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperLT, val1, val2)
-#define EXPECT_GE(val1, val2) \
-  EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperGE, val1, val2)
-#define EXPECT_GT(val1, val2) \
-  EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperGT, val1, val2)
-
-#define GTEST_ASSERT_EQ(val1, val2) \
-  ASSERT_PRED_FORMAT2(::testing::internal::EqHelper::Compare, val1, val2)
-#define GTEST_ASSERT_NE(val1, val2) \
-  ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperNE, val1, val2)
-#define GTEST_ASSERT_LE(val1, val2) \
-  ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperLE, val1, val2)
-#define GTEST_ASSERT_LT(val1, val2) \
-  ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperLT, val1, val2)
-#define GTEST_ASSERT_GE(val1, val2) \
-  ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperGE, val1, val2)
-#define GTEST_ASSERT_GT(val1, val2) \
-  ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperGT, val1, val2)
-
-// Define macro GTEST_DONT_DEFINE_ASSERT_XY to 1 to omit the definition of
-// ASSERT_XY(), which clashes with some users' own code.
-
-#if !GTEST_DONT_DEFINE_ASSERT_EQ
-#define ASSERT_EQ(val1, val2) GTEST_ASSERT_EQ(val1, val2)
-#endif
-
-#if !GTEST_DONT_DEFINE_ASSERT_NE
-#define ASSERT_NE(val1, val2) GTEST_ASSERT_NE(val1, val2)
-#endif
-
-#if !GTEST_DONT_DEFINE_ASSERT_LE
-#define ASSERT_LE(val1, val2) GTEST_ASSERT_LE(val1, val2)
-#endif
-
-#if !GTEST_DONT_DEFINE_ASSERT_LT
-#define ASSERT_LT(val1, val2) GTEST_ASSERT_LT(val1, val2)
-#endif
-
-#if !GTEST_DONT_DEFINE_ASSERT_GE
-#define ASSERT_GE(val1, val2) GTEST_ASSERT_GE(val1, val2)
-#endif
-
-#if !GTEST_DONT_DEFINE_ASSERT_GT
-#define ASSERT_GT(val1, val2) GTEST_ASSERT_GT(val1, val2)
-#endif
-
-// C-string Comparisons.  All tests treat NULL and any non-NULL string
-// as different.  Two NULLs are equal.
-//
-//    * {ASSERT|EXPECT}_STREQ(s1, s2):     Tests that s1 == s2
-//    * {ASSERT|EXPECT}_STRNE(s1, s2):     Tests that s1 != s2
-//    * {ASSERT|EXPECT}_STRCASEEQ(s1, s2): Tests that s1 == s2, ignoring case
-//    * {ASSERT|EXPECT}_STRCASENE(s1, s2): Tests that s1 != s2, ignoring case
-//
-// For wide or narrow string objects, you can use the
-// {ASSERT|EXPECT}_??() macros.
-//
-// Don't depend on the order in which the arguments are evaluated,
-// which is undefined.
-//
-// These macros evaluate their arguments exactly once.
-
-#define EXPECT_STREQ(s1, s2) \
-  EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperSTREQ, s1, s2)
-#define EXPECT_STRNE(s1, s2) \
-  EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperSTRNE, s1, s2)
-#define EXPECT_STRCASEEQ(s1, s2) \
-  EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperSTRCASEEQ, s1, s2)
-#define EXPECT_STRCASENE(s1, s2) \
-  EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperSTRCASENE, s1, s2)
-
-#define ASSERT_STREQ(s1, s2) \
-  ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperSTREQ, s1, s2)
-#define ASSERT_STRNE(s1, s2) \
-  ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperSTRNE, s1, s2)
-#define ASSERT_STRCASEEQ(s1, s2) \
-  ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperSTRCASEEQ, s1, s2)
-#define ASSERT_STRCASENE(s1, s2) \
-  ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperSTRCASENE, s1, s2)
-
-// Macros for comparing floating-point numbers.
-//
-//    * {ASSERT|EXPECT}_FLOAT_EQ(val1, val2):
-//         Tests that two float values are almost equal.
-//    * {ASSERT|EXPECT}_DOUBLE_EQ(val1, val2):
-//         Tests that two double values are almost equal.
-//    * {ASSERT|EXPECT}_NEAR(v1, v2, abs_error):
-//         Tests that v1 and v2 are within the given distance to each other.
-//
-// Google Test uses ULP-based comparison to automatically pick a default
-// error bound that is appropriate for the operands.  See the
-// FloatingPoint template class in gtest-internal.h if you are
-// interested in the implementation details.
-
-#define EXPECT_FLOAT_EQ(val1, val2)                                         \
-  EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperFloatingPointEQ<float>, \
-                      val1, val2)
-
-#define EXPECT_DOUBLE_EQ(val1, val2)                                         \
-  EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperFloatingPointEQ<double>, \
-                      val1, val2)
-
-#define ASSERT_FLOAT_EQ(val1, val2)                                         \
-  ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperFloatingPointEQ<float>, \
-                      val1, val2)
-
-#define ASSERT_DOUBLE_EQ(val1, val2)                                         \
-  ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperFloatingPointEQ<double>, \
-                      val1, val2)
-
-#define EXPECT_NEAR(val1, val2, abs_error)                                   \
-  EXPECT_PRED_FORMAT3(::testing::internal::DoubleNearPredFormat, val1, val2, \
-                      abs_error)
-
-#define ASSERT_NEAR(val1, val2, abs_error)                                   \
-  ASSERT_PRED_FORMAT3(::testing::internal::DoubleNearPredFormat, val1, val2, \
-                      abs_error)
-
-// These predicate format functions work on floating-point values, and
-// can be used in {ASSERT|EXPECT}_PRED_FORMAT2*(), e.g.
-//
-//   EXPECT_PRED_FORMAT2(testing::DoubleLE, Foo(), 5.0);
-
-// Asserts that val1 is less than, or almost equal to, val2.  Fails
-// otherwise.  In particular, it fails if either val1 or val2 is NaN.
-GTEST_API_ AssertionResult FloatLE(const char* expr1, const char* expr2,
-                                   float val1, float val2);
-GTEST_API_ AssertionResult DoubleLE(const char* expr1, const char* expr2,
-                                    double val1, double val2);
-
-#if GTEST_OS_WINDOWS
-
-// Macros that test for HRESULT failure and success, these are only useful
-// on Windows, and rely on Windows SDK macros and APIs to compile.
-//
-//    * {ASSERT|EXPECT}_HRESULT_{SUCCEEDED|FAILED}(expr)
-//
-// When expr unexpectedly fails or succeeds, Google Test prints the
-// expected result and the actual result with both a human-readable
-// string representation of the error, if available, as well as the
-// hex result code.
-#define EXPECT_HRESULT_SUCCEEDED(expr) \
-  EXPECT_PRED_FORMAT1(::testing::internal::IsHRESULTSuccess, (expr))
-
-#define ASSERT_HRESULT_SUCCEEDED(expr) \
-  ASSERT_PRED_FORMAT1(::testing::internal::IsHRESULTSuccess, (expr))
-
-#define EXPECT_HRESULT_FAILED(expr) \
-  EXPECT_PRED_FORMAT1(::testing::internal::IsHRESULTFailure, (expr))
-
-#define ASSERT_HRESULT_FAILED(expr) \
-  ASSERT_PRED_FORMAT1(::testing::internal::IsHRESULTFailure, (expr))
-
-#endif  // GTEST_OS_WINDOWS
-
-// Macros that execute statement and check that it doesn't generate new fatal
-// failures in the current thread.
-//
-//   * {ASSERT|EXPECT}_NO_FATAL_FAILURE(statement);
-//
-// Examples:
-//
-//   EXPECT_NO_FATAL_FAILURE(Process());
-//   ASSERT_NO_FATAL_FAILURE(Process()) << "Process() failed";
-//
-#define ASSERT_NO_FATAL_FAILURE(statement) \
-  GTEST_TEST_NO_FATAL_FAILURE_(statement, GTEST_FATAL_FAILURE_)
-#define EXPECT_NO_FATAL_FAILURE(statement) \
-  GTEST_TEST_NO_FATAL_FAILURE_(statement, GTEST_NONFATAL_FAILURE_)
-
-// Causes a trace (including the given source file path and line number,
-// and the given message) to be included in every test failure message generated
-// by code in the scope of the lifetime of an instance of this class. The effect
-// is undone with the destruction of the instance.
-//
-// The message argument can be anything streamable to std::ostream.
-//
-// Example:
-//   testing::ScopedTrace trace("file.cc", 123, "message");
-//
-class GTEST_API_ ScopedTrace {
- public:
-  // The c'tor pushes the given source file location and message onto
-  // a trace stack maintained by Google Test.
-
-  // Template version. Uses Message() to convert the values into strings.
-  // Slow, but flexible.
-  template <typename T>
-  ScopedTrace(const char* file, int line, const T& message) {
-    PushTrace(file, line, (Message() << message).GetString());
-  }
-
-  // Optimize for some known types.
-  ScopedTrace(const char* file, int line, const char* message) {
-    PushTrace(file, line, message ? message : "(null)");
-  }
-
-  ScopedTrace(const char* file, int line, const std::string& message) {
-    PushTrace(file, line, message);
-  }
-
-  // The d'tor pops the info pushed by the c'tor.
-  //
-  // Note that the d'tor is not virtual in order to be efficient.
-  // Don't inherit from ScopedTrace!
-  ~ScopedTrace();
-
- private:
-  void PushTrace(const char* file, int line, std::string message);
-
-  ScopedTrace(const ScopedTrace&) = delete;
-  ScopedTrace& operator=(const ScopedTrace&) = delete;
-};
-
-// Causes a trace (including the source file path, the current line
-// number, and the given message) to be included in every test failure
-// message generated by code in the current scope.  The effect is
-// undone when the control leaves the current scope.
-//
-// The message argument can be anything streamable to std::ostream.
-//
-// In the implementation, we include the current line number as part
-// of the dummy variable name, thus allowing multiple SCOPED_TRACE()s
-// to appear in the same block - as long as they are on different
-// lines.
-//
-// Assuming that each thread maintains its own stack of traces.
-// Therefore, a SCOPED_TRACE() would (correctly) only affect the
-// assertions in its own thread.
-#define SCOPED_TRACE(message)                                         \
-  ::testing::ScopedTrace GTEST_CONCAT_TOKEN_(gtest_trace_, __LINE__)( \
-      __FILE__, __LINE__, (message))
-
-// Compile-time assertion for type equality.
-// StaticAssertTypeEq<type1, type2>() compiles if and only if type1 and type2
-// are the same type.  The value it returns is not interesting.
-//
-// Instead of making StaticAssertTypeEq a class template, we make it a
-// function template that invokes a helper class template.  This
-// prevents a user from misusing StaticAssertTypeEq<T1, T2> by
-// defining objects of that type.
-//
-// CAVEAT:
-//
-// When used inside a method of a class template,
-// StaticAssertTypeEq<T1, T2>() is effective ONLY IF the method is
-// instantiated.  For example, given:
-//
-//   template <typename T> class Foo {
-//    public:
-//     void Bar() { testing::StaticAssertTypeEq<int, T>(); }
-//   };
-//
-// the code:
-//
-//   void Test1() { Foo<bool> foo; }
-//
-// will NOT generate a compiler error, as Foo<bool>::Bar() is never
-// actually instantiated.  Instead, you need:
-//
-//   void Test2() { Foo<bool> foo; foo.Bar(); }
-//
-// to cause a compiler error.
-template <typename T1, typename T2>
-constexpr bool StaticAssertTypeEq() noexcept {
-  static_assert(std::is_same<T1, T2>::value, "T1 and T2 are not the same type");
-  return true;
-}
-
-// Defines a test.
-//
-// The first parameter is the name of the test suite, and the second
-// parameter is the name of the test within the test suite.
-//
-// The convention is to end the test suite name with "Test".  For
-// example, a test suite for the Foo class can be named FooTest.
-//
-// Test code should appear between braces after an invocation of
-// this macro.  Example:
-//
-//   TEST(FooTest, InitializesCorrectly) {
-//     Foo foo;
-//     EXPECT_TRUE(foo.StatusIsOK());
-//   }
-
-// Note that we call GetTestTypeId() instead of GetTypeId<
-// ::testing::Test>() here to get the type ID of testing::Test.  This
-// is to work around a suspected linker bug when using Google Test as
-// a framework on Mac OS X.  The bug causes GetTypeId<
-// ::testing::Test>() to return different values depending on whether
-// the call is from the Google Test framework itself or from user test
-// code.  GetTestTypeId() is guaranteed to always return the same
-// value, as it always calls GetTypeId<>() from the Google Test
-// framework.
-#define GTEST_TEST(test_suite_name, test_name)             \
-  GTEST_TEST_(test_suite_name, test_name, ::testing::Test, \
-              ::testing::internal::GetTestTypeId())
-
-// Define this macro to 1 to omit the definition of TEST(), which
-// is a generic name and clashes with some other libraries.
-#if !GTEST_DONT_DEFINE_TEST
-#define TEST(test_suite_name, test_name) GTEST_TEST(test_suite_name, test_name)
-#endif
-
-// Defines a test that uses a test fixture.
-//
-// The first parameter is the name of the test fixture class, which
-// also doubles as the test suite name.  The second parameter is the
-// name of the test within the test suite.
-//
-// A test fixture class must be declared earlier.  The user should put
-// the test code between braces after using this macro.  Example:
-//
-//   class FooTest : public testing::Test {
-//    protected:
-//     void SetUp() override { b_.AddElement(3); }
-//
-//     Foo a_;
-//     Foo b_;
-//   };
-//
-//   TEST_F(FooTest, InitializesCorrectly) {
-//     EXPECT_TRUE(a_.StatusIsOK());
-//   }
-//
-//   TEST_F(FooTest, ReturnsElementCountCorrectly) {
-//     EXPECT_EQ(a_.size(), 0);
-//     EXPECT_EQ(b_.size(), 1);
-//   }
-#define GTEST_TEST_F(test_fixture, test_name)        \
-  GTEST_TEST_(test_fixture, test_name, test_fixture, \
-              ::testing::internal::GetTypeId<test_fixture>())
-#if !GTEST_DONT_DEFINE_TEST_F
-#define TEST_F(test_fixture, test_name) GTEST_TEST_F(test_fixture, test_name)
-#endif
-
-// Returns a path to a temporary directory, which should be writable. It is
-// implementation-dependent whether or not the path is terminated by the
-// directory-separator character.
-GTEST_API_ std::string TempDir();
-
-// Returns a path to a directory that contains ancillary data files that might
-// be used by tests. It is implementation dependent whether or not the path is
-// terminated by the directory-separator character. The directory and the files
-// in it should be considered read-only.
-GTEST_API_ std::string SrcDir();
-
-#ifdef _MSC_VER
-#pragma warning(pop)
-#endif
-
-// Dynamically registers a test with the framework.
-//
-// This is an advanced API only to be used when the `TEST` macros are
-// insufficient. The macros should be preferred when possible, as they avoid
-// most of the complexity of calling this function.
-//
-// The `factory` argument is a factory callable (move-constructible) object or
-// function pointer that creates a new instance of the Test object. It
-// handles ownership to the caller. The signature of the callable is
-// `Fixture*()`, where `Fixture` is the test fixture class for the test. All
-// tests registered with the same `test_suite_name` must return the same
-// fixture type. This is checked at runtime.
-//
-// The framework will infer the fixture class from the factory and will call
-// the `SetUpTestSuite` and `TearDownTestSuite` for it.
-//
-// Must be called before `RUN_ALL_TESTS()` is invoked, otherwise behavior is
-// undefined.
-//
-// Use case example:
-//
-// class MyFixture : public ::testing::Test {
-//  public:
-//   // All of these optional, just like in regular macro usage.
-//   static void SetUpTestSuite() { ... }
-//   static void TearDownTestSuite() { ... }
-//   void SetUp() override { ... }
-//   void TearDown() override { ... }
-// };
-//
-// class MyTest : public MyFixture {
-//  public:
-//   explicit MyTest(int data) : data_(data) {}
-//   void TestBody() override { ... }
-//
-//  private:
-//   int data_;
-// };
-//
-// void RegisterMyTests(const std::vector<int>& values) {
-//   for (int v : values) {
-//     ::testing::RegisterTest(
-//         "MyFixture", ("Test" + std::to_string(v)).c_str(), nullptr,
-//         std::to_string(v).c_str(),
-//         __FILE__, __LINE__,
-//         // Important to use the fixture type as the return type here.
-//         [=]() -> MyFixture* { return new MyTest(v); });
-//   }
-// }
-// ...
-// int main(int argc, char** argv) {
-//   ::testing::InitGoogleTest(&argc, argv);
-//   std::vector<int> values_to_test = LoadValuesFromConfig();
-//   RegisterMyTests(values_to_test);
-//   ...
-//   return RUN_ALL_TESTS();
-// }
-//
-template <int&... ExplicitParameterBarrier, typename Factory>
-TestInfo* RegisterTest(const char* test_suite_name, const char* test_name,
-                       const char* type_param, const char* value_param,
-                       const char* file, int line, Factory factory) {
-  using TestT = typename std::remove_pointer<decltype(factory())>::type;
-
-  class FactoryImpl : public internal::TestFactoryBase {
-   public:
-    explicit FactoryImpl(Factory f) : factory_(std::move(f)) {}
-    Test* CreateTest() override { return factory_(); }
-
-   private:
-    Factory factory_;
-  };
-
-  return internal::MakeAndRegisterTestInfo(
-      test_suite_name, test_name, type_param, value_param,
-      internal::CodeLocation(file, line), internal::GetTypeId<TestT>(),
-      internal::SuiteApiResolver<TestT>::GetSetUpCaseOrSuite(file, line),
-      internal::SuiteApiResolver<TestT>::GetTearDownCaseOrSuite(file, line),
-      new FactoryImpl{std::move(factory)});
-}
-
-}  // namespace testing
-
-// Use this function in main() to run all tests.  It returns 0 if all
-// tests are successful, or 1 otherwise.
-//
-// RUN_ALL_TESTS() should be invoked after the command line has been
-// parsed by InitGoogleTest().
-//
-// This function was formerly a macro; thus, it is in the global
-// namespace and has an all-caps name.
-int RUN_ALL_TESTS() GTEST_MUST_USE_RESULT_;
-
-inline int RUN_ALL_TESTS() { return ::testing::UnitTest::GetInstance()->Run(); }
-
-GTEST_DISABLE_MSC_WARNINGS_POP_()  //  4251
-
-#endif  // GOOGLETEST_INCLUDE_GTEST_GTEST_H_
diff --git a/3rdparty/googletest-1.13.0/googletest/include/gtest/gtest_pred_impl.h b/3rdparty/googletest-1.13.0/googletest/include/gtest/gtest_pred_impl.h
deleted file mode 100644
index 47a24aa687af6ebfed8423bee7802192e73d97ac..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/include/gtest/gtest_pred_impl.h
+++ /dev/null
@@ -1,279 +0,0 @@
-// Copyright 2006, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-// Implements a family of generic predicate assertion macros.
-
-// IWYU pragma: private, include "gtest/gtest.h"
-// IWYU pragma: friend gtest/.*
-// IWYU pragma: friend gmock/.*
-
-#ifndef GOOGLETEST_INCLUDE_GTEST_GTEST_PRED_IMPL_H_
-#define GOOGLETEST_INCLUDE_GTEST_GTEST_PRED_IMPL_H_
-
-#include "gtest/gtest-assertion-result.h"
-#include "gtest/internal/gtest-internal.h"
-#include "gtest/internal/gtest-port.h"
-
-namespace testing {
-
-// This header implements a family of generic predicate assertion
-// macros:
-//
-//   ASSERT_PRED_FORMAT1(pred_format, v1)
-//   ASSERT_PRED_FORMAT2(pred_format, v1, v2)
-//   ...
-//
-// where pred_format is a function or functor that takes n (in the
-// case of ASSERT_PRED_FORMATn) values and their source expression
-// text, and returns a testing::AssertionResult.  See the definition
-// of ASSERT_EQ in gtest.h for an example.
-//
-// If you don't care about formatting, you can use the more
-// restrictive version:
-//
-//   ASSERT_PRED1(pred, v1)
-//   ASSERT_PRED2(pred, v1, v2)
-//   ...
-//
-// where pred is an n-ary function or functor that returns bool,
-// and the values v1, v2, ..., must support the << operator for
-// streaming to std::ostream.
-//
-// We also define the EXPECT_* variations.
-//
-// For now we only support predicates whose arity is at most 5.
-// Please email googletestframework@googlegroups.com if you need
-// support for higher arities.
-
-// GTEST_ASSERT_ is the basic statement to which all of the assertions
-// in this file reduce.  Don't use this in your code.
-
-#define GTEST_ASSERT_(expression, on_failure)                   \
-  GTEST_AMBIGUOUS_ELSE_BLOCKER_                                 \
-  if (const ::testing::AssertionResult gtest_ar = (expression)) \
-    ;                                                           \
-  else                                                          \
-    on_failure(gtest_ar.failure_message())
-
-// Helper function for implementing {EXPECT|ASSERT}_PRED1.  Don't use
-// this in your code.
-template <typename Pred, typename T1>
-AssertionResult AssertPred1Helper(const char* pred_text, const char* e1,
-                                  Pred pred, const T1& v1) {
-  if (pred(v1)) return AssertionSuccess();
-
-  return AssertionFailure()
-         << pred_text << "(" << e1 << ") evaluates to false, where"
-         << "\n"
-         << e1 << " evaluates to " << ::testing::PrintToString(v1);
-}
-
-// Internal macro for implementing {EXPECT|ASSERT}_PRED_FORMAT1.
-// Don't use this in your code.
-#define GTEST_PRED_FORMAT1_(pred_format, v1, on_failure) \
-  GTEST_ASSERT_(pred_format(#v1, v1), on_failure)
-
-// Internal macro for implementing {EXPECT|ASSERT}_PRED1.  Don't use
-// this in your code.
-#define GTEST_PRED1_(pred, v1, on_failure) \
-  GTEST_ASSERT_(::testing::AssertPred1Helper(#pred, #v1, pred, v1), on_failure)
-
-// Unary predicate assertion macros.
-#define EXPECT_PRED_FORMAT1(pred_format, v1) \
-  GTEST_PRED_FORMAT1_(pred_format, v1, GTEST_NONFATAL_FAILURE_)
-#define EXPECT_PRED1(pred, v1) GTEST_PRED1_(pred, v1, GTEST_NONFATAL_FAILURE_)
-#define ASSERT_PRED_FORMAT1(pred_format, v1) \
-  GTEST_PRED_FORMAT1_(pred_format, v1, GTEST_FATAL_FAILURE_)
-#define ASSERT_PRED1(pred, v1) GTEST_PRED1_(pred, v1, GTEST_FATAL_FAILURE_)
-
-// Helper function for implementing {EXPECT|ASSERT}_PRED2.  Don't use
-// this in your code.
-template <typename Pred, typename T1, typename T2>
-AssertionResult AssertPred2Helper(const char* pred_text, const char* e1,
-                                  const char* e2, Pred pred, const T1& v1,
-                                  const T2& v2) {
-  if (pred(v1, v2)) return AssertionSuccess();
-
-  return AssertionFailure()
-         << pred_text << "(" << e1 << ", " << e2
-         << ") evaluates to false, where"
-         << "\n"
-         << e1 << " evaluates to " << ::testing::PrintToString(v1) << "\n"
-         << e2 << " evaluates to " << ::testing::PrintToString(v2);
-}
-
-// Internal macro for implementing {EXPECT|ASSERT}_PRED_FORMAT2.
-// Don't use this in your code.
-#define GTEST_PRED_FORMAT2_(pred_format, v1, v2, on_failure) \
-  GTEST_ASSERT_(pred_format(#v1, #v2, v1, v2), on_failure)
-
-// Internal macro for implementing {EXPECT|ASSERT}_PRED2.  Don't use
-// this in your code.
-#define GTEST_PRED2_(pred, v1, v2, on_failure)                               \
-  GTEST_ASSERT_(::testing::AssertPred2Helper(#pred, #v1, #v2, pred, v1, v2), \
-                on_failure)
-
-// Binary predicate assertion macros.
-#define EXPECT_PRED_FORMAT2(pred_format, v1, v2) \
-  GTEST_PRED_FORMAT2_(pred_format, v1, v2, GTEST_NONFATAL_FAILURE_)
-#define EXPECT_PRED2(pred, v1, v2) \
-  GTEST_PRED2_(pred, v1, v2, GTEST_NONFATAL_FAILURE_)
-#define ASSERT_PRED_FORMAT2(pred_format, v1, v2) \
-  GTEST_PRED_FORMAT2_(pred_format, v1, v2, GTEST_FATAL_FAILURE_)
-#define ASSERT_PRED2(pred, v1, v2) \
-  GTEST_PRED2_(pred, v1, v2, GTEST_FATAL_FAILURE_)
-
-// Helper function for implementing {EXPECT|ASSERT}_PRED3.  Don't use
-// this in your code.
-template <typename Pred, typename T1, typename T2, typename T3>
-AssertionResult AssertPred3Helper(const char* pred_text, const char* e1,
-                                  const char* e2, const char* e3, Pred pred,
-                                  const T1& v1, const T2& v2, const T3& v3) {
-  if (pred(v1, v2, v3)) return AssertionSuccess();
-
-  return AssertionFailure()
-         << pred_text << "(" << e1 << ", " << e2 << ", " << e3
-         << ") evaluates to false, where"
-         << "\n"
-         << e1 << " evaluates to " << ::testing::PrintToString(v1) << "\n"
-         << e2 << " evaluates to " << ::testing::PrintToString(v2) << "\n"
-         << e3 << " evaluates to " << ::testing::PrintToString(v3);
-}
-
-// Internal macro for implementing {EXPECT|ASSERT}_PRED_FORMAT3.
-// Don't use this in your code.
-#define GTEST_PRED_FORMAT3_(pred_format, v1, v2, v3, on_failure) \
-  GTEST_ASSERT_(pred_format(#v1, #v2, #v3, v1, v2, v3), on_failure)
-
-// Internal macro for implementing {EXPECT|ASSERT}_PRED3.  Don't use
-// this in your code.
-#define GTEST_PRED3_(pred, v1, v2, v3, on_failure)                          \
-  GTEST_ASSERT_(                                                            \
-      ::testing::AssertPred3Helper(#pred, #v1, #v2, #v3, pred, v1, v2, v3), \
-      on_failure)
-
-// Ternary predicate assertion macros.
-#define EXPECT_PRED_FORMAT3(pred_format, v1, v2, v3) \
-  GTEST_PRED_FORMAT3_(pred_format, v1, v2, v3, GTEST_NONFATAL_FAILURE_)
-#define EXPECT_PRED3(pred, v1, v2, v3) \
-  GTEST_PRED3_(pred, v1, v2, v3, GTEST_NONFATAL_FAILURE_)
-#define ASSERT_PRED_FORMAT3(pred_format, v1, v2, v3) \
-  GTEST_PRED_FORMAT3_(pred_format, v1, v2, v3, GTEST_FATAL_FAILURE_)
-#define ASSERT_PRED3(pred, v1, v2, v3) \
-  GTEST_PRED3_(pred, v1, v2, v3, GTEST_FATAL_FAILURE_)
-
-// Helper function for implementing {EXPECT|ASSERT}_PRED4.  Don't use
-// this in your code.
-template <typename Pred, typename T1, typename T2, typename T3, typename T4>
-AssertionResult AssertPred4Helper(const char* pred_text, const char* e1,
-                                  const char* e2, const char* e3,
-                                  const char* e4, Pred pred, const T1& v1,
-                                  const T2& v2, const T3& v3, const T4& v4) {
-  if (pred(v1, v2, v3, v4)) return AssertionSuccess();
-
-  return AssertionFailure()
-         << pred_text << "(" << e1 << ", " << e2 << ", " << e3 << ", " << e4
-         << ") evaluates to false, where"
-         << "\n"
-         << e1 << " evaluates to " << ::testing::PrintToString(v1) << "\n"
-         << e2 << " evaluates to " << ::testing::PrintToString(v2) << "\n"
-         << e3 << " evaluates to " << ::testing::PrintToString(v3) << "\n"
-         << e4 << " evaluates to " << ::testing::PrintToString(v4);
-}
-
-// Internal macro for implementing {EXPECT|ASSERT}_PRED_FORMAT4.
-// Don't use this in your code.
-#define GTEST_PRED_FORMAT4_(pred_format, v1, v2, v3, v4, on_failure) \
-  GTEST_ASSERT_(pred_format(#v1, #v2, #v3, #v4, v1, v2, v3, v4), on_failure)
-
-// Internal macro for implementing {EXPECT|ASSERT}_PRED4.  Don't use
-// this in your code.
-#define GTEST_PRED4_(pred, v1, v2, v3, v4, on_failure)                        \
-  GTEST_ASSERT_(::testing::AssertPred4Helper(#pred, #v1, #v2, #v3, #v4, pred, \
-                                             v1, v2, v3, v4),                 \
-                on_failure)
-
-// 4-ary predicate assertion macros.
-#define EXPECT_PRED_FORMAT4(pred_format, v1, v2, v3, v4) \
-  GTEST_PRED_FORMAT4_(pred_format, v1, v2, v3, v4, GTEST_NONFATAL_FAILURE_)
-#define EXPECT_PRED4(pred, v1, v2, v3, v4) \
-  GTEST_PRED4_(pred, v1, v2, v3, v4, GTEST_NONFATAL_FAILURE_)
-#define ASSERT_PRED_FORMAT4(pred_format, v1, v2, v3, v4) \
-  GTEST_PRED_FORMAT4_(pred_format, v1, v2, v3, v4, GTEST_FATAL_FAILURE_)
-#define ASSERT_PRED4(pred, v1, v2, v3, v4) \
-  GTEST_PRED4_(pred, v1, v2, v3, v4, GTEST_FATAL_FAILURE_)
-
-// Helper function for implementing {EXPECT|ASSERT}_PRED5.  Don't use
-// this in your code.
-template <typename Pred, typename T1, typename T2, typename T3, typename T4,
-          typename T5>
-AssertionResult AssertPred5Helper(const char* pred_text, const char* e1,
-                                  const char* e2, const char* e3,
-                                  const char* e4, const char* e5, Pred pred,
-                                  const T1& v1, const T2& v2, const T3& v3,
-                                  const T4& v4, const T5& v5) {
-  if (pred(v1, v2, v3, v4, v5)) return AssertionSuccess();
-
-  return AssertionFailure()
-         << pred_text << "(" << e1 << ", " << e2 << ", " << e3 << ", " << e4
-         << ", " << e5 << ") evaluates to false, where"
-         << "\n"
-         << e1 << " evaluates to " << ::testing::PrintToString(v1) << "\n"
-         << e2 << " evaluates to " << ::testing::PrintToString(v2) << "\n"
-         << e3 << " evaluates to " << ::testing::PrintToString(v3) << "\n"
-         << e4 << " evaluates to " << ::testing::PrintToString(v4) << "\n"
-         << e5 << " evaluates to " << ::testing::PrintToString(v5);
-}
-
-// Internal macro for implementing {EXPECT|ASSERT}_PRED_FORMAT5.
-// Don't use this in your code.
-#define GTEST_PRED_FORMAT5_(pred_format, v1, v2, v3, v4, v5, on_failure)  \
-  GTEST_ASSERT_(pred_format(#v1, #v2, #v3, #v4, #v5, v1, v2, v3, v4, v5), \
-                on_failure)
-
-// Internal macro for implementing {EXPECT|ASSERT}_PRED5.  Don't use
-// this in your code.
-#define GTEST_PRED5_(pred, v1, v2, v3, v4, v5, on_failure)                   \
-  GTEST_ASSERT_(::testing::AssertPred5Helper(#pred, #v1, #v2, #v3, #v4, #v5, \
-                                             pred, v1, v2, v3, v4, v5),      \
-                on_failure)
-
-// 5-ary predicate assertion macros.
-#define EXPECT_PRED_FORMAT5(pred_format, v1, v2, v3, v4, v5) \
-  GTEST_PRED_FORMAT5_(pred_format, v1, v2, v3, v4, v5, GTEST_NONFATAL_FAILURE_)
-#define EXPECT_PRED5(pred, v1, v2, v3, v4, v5) \
-  GTEST_PRED5_(pred, v1, v2, v3, v4, v5, GTEST_NONFATAL_FAILURE_)
-#define ASSERT_PRED_FORMAT5(pred_format, v1, v2, v3, v4, v5) \
-  GTEST_PRED_FORMAT5_(pred_format, v1, v2, v3, v4, v5, GTEST_FATAL_FAILURE_)
-#define ASSERT_PRED5(pred, v1, v2, v3, v4, v5) \
-  GTEST_PRED5_(pred, v1, v2, v3, v4, v5, GTEST_FATAL_FAILURE_)
-
-}  // namespace testing
-
-#endif  // GOOGLETEST_INCLUDE_GTEST_GTEST_PRED_IMPL_H_
diff --git a/3rdparty/googletest-1.13.0/googletest/include/gtest/gtest_prod.h b/3rdparty/googletest-1.13.0/googletest/include/gtest/gtest_prod.h
deleted file mode 100644
index 1f37dc31c347228e21f2474e1dbf0aed6b988189..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/include/gtest/gtest_prod.h
+++ /dev/null
@@ -1,60 +0,0 @@
-// Copyright 2006, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// Google C++ Testing and Mocking Framework definitions useful in production
-// code.
-
-#ifndef GOOGLETEST_INCLUDE_GTEST_GTEST_PROD_H_
-#define GOOGLETEST_INCLUDE_GTEST_GTEST_PROD_H_
-
-// When you need to test the private or protected members of a class,
-// use the FRIEND_TEST macro to declare your tests as friends of the
-// class.  For example:
-//
-// class MyClass {
-//  private:
-//   void PrivateMethod();
-//   FRIEND_TEST(MyClassTest, PrivateMethodWorks);
-// };
-//
-// class MyClassTest : public testing::Test {
-//   // ...
-// };
-//
-// TEST_F(MyClassTest, PrivateMethodWorks) {
-//   // Can call MyClass::PrivateMethod() here.
-// }
-//
-// Note: The test class must be in the same namespace as the class being tested.
-// For example, putting MyClassTest in an anonymous namespace will not work.
-
-#define FRIEND_TEST(test_case_name, test_name) \
-  friend class test_case_name##_##test_name##_Test
-
-#endif  // GOOGLETEST_INCLUDE_GTEST_GTEST_PROD_H_
diff --git a/3rdparty/googletest-1.13.0/googletest/include/gtest/internal/custom/README.md b/3rdparty/googletest-1.13.0/googletest/include/gtest/internal/custom/README.md
deleted file mode 100644
index cb49e2c754c250ed975e371551267b938ebcdfa1..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/include/gtest/internal/custom/README.md
+++ /dev/null
@@ -1,44 +0,0 @@
-# Customization Points
-
-The custom directory is an injection point for custom user configurations.
-
-## Header `gtest.h`
-
-### The following macros can be defined:
-
-*   `GTEST_OS_STACK_TRACE_GETTER_` - The name of an implementation of
-    `OsStackTraceGetterInterface`.
-*   `GTEST_CUSTOM_TEMPDIR_FUNCTION_` - An override for `testing::TempDir()`. See
-    `testing::TempDir` for semantics and signature.
-
-## Header `gtest-port.h`
-
-The following macros can be defined:
-
-### Logging:
-
-*   `GTEST_LOG_(severity)`
-*   `GTEST_CHECK_(condition)`
-*   Functions `LogToStderr()` and `FlushInfoLog()` have to be provided too.
-
-### Threading:
-
-*   `GTEST_HAS_NOTIFICATION_` - Enabled if Notification is already provided.
-*   `GTEST_HAS_MUTEX_AND_THREAD_LOCAL_` - Enabled if `Mutex` and `ThreadLocal`
-    are already provided. Must also provide `GTEST_DECLARE_STATIC_MUTEX_(mutex)`
-    and `GTEST_DEFINE_STATIC_MUTEX_(mutex)`
-*   `GTEST_EXCLUSIVE_LOCK_REQUIRED_(locks)`
-*   `GTEST_LOCK_EXCLUDED_(locks)`
-
-### Underlying library support features
-
-*   `GTEST_HAS_CXXABI_H_`
-
-### Exporting API symbols:
-
-*   `GTEST_API_` - Specifier for exported symbols.
-
-## Header `gtest-printers.h`
-
-*   See documentation at `gtest/gtest-printers.h` for details on how to define a
-    custom printer.
diff --git a/3rdparty/googletest-1.13.0/googletest/include/gtest/internal/custom/gtest-port.h b/3rdparty/googletest-1.13.0/googletest/include/gtest/internal/custom/gtest-port.h
deleted file mode 100644
index db02881c0c899a74f07985980afa2e6e98c68875..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/include/gtest/internal/custom/gtest-port.h
+++ /dev/null
@@ -1,37 +0,0 @@
-// Copyright 2015, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-// Injection point for custom user configurations. See README for details
-//
-// ** Custom implementation starts here **
-
-#ifndef GOOGLETEST_INCLUDE_GTEST_INTERNAL_CUSTOM_GTEST_PORT_H_
-#define GOOGLETEST_INCLUDE_GTEST_INTERNAL_CUSTOM_GTEST_PORT_H_
-
-#endif  // GOOGLETEST_INCLUDE_GTEST_INTERNAL_CUSTOM_GTEST_PORT_H_
diff --git a/3rdparty/googletest-1.13.0/googletest/include/gtest/internal/custom/gtest-printers.h b/3rdparty/googletest-1.13.0/googletest/include/gtest/internal/custom/gtest-printers.h
deleted file mode 100644
index b9495d83783bcde050f5fdc177036aa4880b3fd9..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/include/gtest/internal/custom/gtest-printers.h
+++ /dev/null
@@ -1,42 +0,0 @@
-// Copyright 2015, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-// This file provides an injection point for custom printers in a local
-// installation of gTest.
-// It will be included from gtest-printers.h and the overrides in this file
-// will be visible to everyone.
-//
-// Injection point for custom user configurations. See README for details
-//
-// ** Custom implementation starts here **
-
-#ifndef GOOGLETEST_INCLUDE_GTEST_INTERNAL_CUSTOM_GTEST_PRINTERS_H_
-#define GOOGLETEST_INCLUDE_GTEST_INTERNAL_CUSTOM_GTEST_PRINTERS_H_
-
-#endif  // GOOGLETEST_INCLUDE_GTEST_INTERNAL_CUSTOM_GTEST_PRINTERS_H_
diff --git a/3rdparty/googletest-1.13.0/googletest/include/gtest/internal/custom/gtest.h b/3rdparty/googletest-1.13.0/googletest/include/gtest/internal/custom/gtest.h
deleted file mode 100644
index afaaf17ba28e3beb790a12638cffec8dba5698ea..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/include/gtest/internal/custom/gtest.h
+++ /dev/null
@@ -1,37 +0,0 @@
-// Copyright 2015, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-// Injection point for custom user configurations. See README for details
-//
-// ** Custom implementation starts here **
-
-#ifndef GOOGLETEST_INCLUDE_GTEST_INTERNAL_CUSTOM_GTEST_H_
-#define GOOGLETEST_INCLUDE_GTEST_INTERNAL_CUSTOM_GTEST_H_
-
-#endif  // GOOGLETEST_INCLUDE_GTEST_INTERNAL_CUSTOM_GTEST_H_
diff --git a/3rdparty/googletest-1.13.0/googletest/include/gtest/internal/gtest-death-test-internal.h b/3rdparty/googletest-1.13.0/googletest/include/gtest/internal/gtest-death-test-internal.h
deleted file mode 100644
index 4687dae2b46af65c80ea75dc93455037f50e6b6b..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/include/gtest/internal/gtest-death-test-internal.h
+++ /dev/null
@@ -1,307 +0,0 @@
-// Copyright 2005, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// The Google C++ Testing and Mocking Framework (Google Test)
-//
-// This header file defines internal utilities needed for implementing
-// death tests.  They are subject to change without notice.
-
-// IWYU pragma: private, include "gtest/gtest.h"
-// IWYU pragma: friend gtest/.*
-// IWYU pragma: friend gmock/.*
-
-#ifndef GOOGLETEST_INCLUDE_GTEST_INTERNAL_GTEST_DEATH_TEST_INTERNAL_H_
-#define GOOGLETEST_INCLUDE_GTEST_INTERNAL_GTEST_DEATH_TEST_INTERNAL_H_
-
-#include <stdio.h>
-
-#include <memory>
-#include <string>
-
-#include "gtest/gtest-matchers.h"
-#include "gtest/internal/gtest-internal.h"
-
-GTEST_DECLARE_string_(internal_run_death_test);
-
-namespace testing {
-namespace internal {
-
-// Names of the flags (needed for parsing Google Test flags).
-const char kDeathTestStyleFlag[] = "death_test_style";
-const char kDeathTestUseFork[] = "death_test_use_fork";
-const char kInternalRunDeathTestFlag[] = "internal_run_death_test";
-
-#if GTEST_HAS_DEATH_TEST
-
-GTEST_DISABLE_MSC_WARNINGS_PUSH_(4251 \
-/* class A needs to have dll-interface to be used by clients of class B */)
-
-// DeathTest is a class that hides much of the complexity of the
-// GTEST_DEATH_TEST_ macro.  It is abstract; its static Create method
-// returns a concrete class that depends on the prevailing death test
-// style, as defined by the --gtest_death_test_style and/or
-// --gtest_internal_run_death_test flags.
-
-// In describing the results of death tests, these terms are used with
-// the corresponding definitions:
-//
-// exit status:  The integer exit information in the format specified
-//               by wait(2)
-// exit code:    The integer code passed to exit(3), _exit(2), or
-//               returned from main()
-class GTEST_API_ DeathTest {
- public:
-  // Create returns false if there was an error determining the
-  // appropriate action to take for the current death test; for example,
-  // if the gtest_death_test_style flag is set to an invalid value.
-  // The LastMessage method will return a more detailed message in that
-  // case.  Otherwise, the DeathTest pointer pointed to by the "test"
-  // argument is set.  If the death test should be skipped, the pointer
-  // is set to NULL; otherwise, it is set to the address of a new concrete
-  // DeathTest object that controls the execution of the current test.
-  static bool Create(const char* statement, Matcher<const std::string&> matcher,
-                     const char* file, int line, DeathTest** test);
-  DeathTest();
-  virtual ~DeathTest() {}
-
-  // A helper class that aborts a death test when it's deleted.
-  class ReturnSentinel {
-   public:
-    explicit ReturnSentinel(DeathTest* test) : test_(test) {}
-    ~ReturnSentinel() { test_->Abort(TEST_ENCOUNTERED_RETURN_STATEMENT); }
-
-   private:
-    DeathTest* const test_;
-    ReturnSentinel(const ReturnSentinel&) = delete;
-    ReturnSentinel& operator=(const ReturnSentinel&) = delete;
-  };
-
-  // An enumeration of possible roles that may be taken when a death
-  // test is encountered.  EXECUTE means that the death test logic should
-  // be executed immediately.  OVERSEE means that the program should prepare
-  // the appropriate environment for a child process to execute the death
-  // test, then wait for it to complete.
-  enum TestRole { OVERSEE_TEST, EXECUTE_TEST };
-
-  // An enumeration of the three reasons that a test might be aborted.
-  enum AbortReason {
-    TEST_ENCOUNTERED_RETURN_STATEMENT,
-    TEST_THREW_EXCEPTION,
-    TEST_DID_NOT_DIE
-  };
-
-  // Assumes one of the above roles.
-  virtual TestRole AssumeRole() = 0;
-
-  // Waits for the death test to finish and returns its status.
-  virtual int Wait() = 0;
-
-  // Returns true if the death test passed; that is, the test process
-  // exited during the test, its exit status matches a user-supplied
-  // predicate, and its stderr output matches a user-supplied regular
-  // expression.
-  // The user-supplied predicate may be a macro expression rather
-  // than a function pointer or functor, or else Wait and Passed could
-  // be combined.
-  virtual bool Passed(bool exit_status_ok) = 0;
-
-  // Signals that the death test did not die as expected.
-  virtual void Abort(AbortReason reason) = 0;
-
-  // Returns a human-readable outcome message regarding the outcome of
-  // the last death test.
-  static const char* LastMessage();
-
-  static void set_last_death_test_message(const std::string& message);
-
- private:
-  // A string containing a description of the outcome of the last death test.
-  static std::string last_death_test_message_;
-
-  DeathTest(const DeathTest&) = delete;
-  DeathTest& operator=(const DeathTest&) = delete;
-};
-
-GTEST_DISABLE_MSC_WARNINGS_POP_()  //  4251
-
-// Factory interface for death tests.  May be mocked out for testing.
-class DeathTestFactory {
- public:
-  virtual ~DeathTestFactory() {}
-  virtual bool Create(const char* statement,
-                      Matcher<const std::string&> matcher, const char* file,
-                      int line, DeathTest** test) = 0;
-};
-
-// A concrete DeathTestFactory implementation for normal use.
-class DefaultDeathTestFactory : public DeathTestFactory {
- public:
-  bool Create(const char* statement, Matcher<const std::string&> matcher,
-              const char* file, int line, DeathTest** test) override;
-};
-
-// Returns true if exit_status describes a process that was terminated
-// by a signal, or exited normally with a nonzero exit code.
-GTEST_API_ bool ExitedUnsuccessfully(int exit_status);
-
-// A string passed to EXPECT_DEATH (etc.) is caught by one of these overloads
-// and interpreted as a regex (rather than an Eq matcher) for legacy
-// compatibility.
-inline Matcher<const ::std::string&> MakeDeathTestMatcher(
-    ::testing::internal::RE regex) {
-  return ContainsRegex(regex.pattern());
-}
-inline Matcher<const ::std::string&> MakeDeathTestMatcher(const char* regex) {
-  return ContainsRegex(regex);
-}
-inline Matcher<const ::std::string&> MakeDeathTestMatcher(
-    const ::std::string& regex) {
-  return ContainsRegex(regex);
-}
-
-// If a Matcher<const ::std::string&> is passed to EXPECT_DEATH (etc.), it's
-// used directly.
-inline Matcher<const ::std::string&> MakeDeathTestMatcher(
-    Matcher<const ::std::string&> matcher) {
-  return matcher;
-}
-
-// Traps C++ exceptions escaping statement and reports them as test
-// failures. Note that trapping SEH exceptions is not implemented here.
-#if GTEST_HAS_EXCEPTIONS
-#define GTEST_EXECUTE_DEATH_TEST_STATEMENT_(statement, death_test)           \
-  try {                                                                      \
-    GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement);               \
-  } catch (const ::std::exception& gtest_exception) {                        \
-    fprintf(                                                                 \
-        stderr,                                                              \
-        "\n%s: Caught std::exception-derived exception escaping the "        \
-        "death test statement. Exception message: %s\n",                     \
-        ::testing::internal::FormatFileLocation(__FILE__, __LINE__).c_str(), \
-        gtest_exception.what());                                             \
-    fflush(stderr);                                                          \
-    death_test->Abort(::testing::internal::DeathTest::TEST_THREW_EXCEPTION); \
-  } catch (...) {                                                            \
-    death_test->Abort(::testing::internal::DeathTest::TEST_THREW_EXCEPTION); \
-  }
-
-#else
-#define GTEST_EXECUTE_DEATH_TEST_STATEMENT_(statement, death_test) \
-  GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement)
-
-#endif
-
-// This macro is for implementing ASSERT_DEATH*, EXPECT_DEATH*,
-// ASSERT_EXIT*, and EXPECT_EXIT*.
-#define GTEST_DEATH_TEST_(statement, predicate, regex_or_matcher, fail)        \
-  GTEST_AMBIGUOUS_ELSE_BLOCKER_                                                \
-  if (::testing::internal::AlwaysTrue()) {                                     \
-    ::testing::internal::DeathTest* gtest_dt;                                  \
-    if (!::testing::internal::DeathTest::Create(                               \
-            #statement,                                                        \
-            ::testing::internal::MakeDeathTestMatcher(regex_or_matcher),       \
-            __FILE__, __LINE__, &gtest_dt)) {                                  \
-      goto GTEST_CONCAT_TOKEN_(gtest_label_, __LINE__);                        \
-    }                                                                          \
-    if (gtest_dt != nullptr) {                                                 \
-      std::unique_ptr< ::testing::internal::DeathTest> gtest_dt_ptr(gtest_dt); \
-      switch (gtest_dt->AssumeRole()) {                                        \
-        case ::testing::internal::DeathTest::OVERSEE_TEST:                     \
-          if (!gtest_dt->Passed(predicate(gtest_dt->Wait()))) {                \
-            goto GTEST_CONCAT_TOKEN_(gtest_label_, __LINE__);                  \
-          }                                                                    \
-          break;                                                               \
-        case ::testing::internal::DeathTest::EXECUTE_TEST: {                   \
-          ::testing::internal::DeathTest::ReturnSentinel gtest_sentinel(       \
-              gtest_dt);                                                       \
-          GTEST_EXECUTE_DEATH_TEST_STATEMENT_(statement, gtest_dt);            \
-          gtest_dt->Abort(::testing::internal::DeathTest::TEST_DID_NOT_DIE);   \
-          break;                                                               \
-        }                                                                      \
-      }                                                                        \
-    }                                                                          \
-  } else                                                                       \
-    GTEST_CONCAT_TOKEN_(gtest_label_, __LINE__)                                \
-        : fail(::testing::internal::DeathTest::LastMessage())
-// The symbol "fail" here expands to something into which a message
-// can be streamed.
-
-// This macro is for implementing ASSERT/EXPECT_DEBUG_DEATH when compiled in
-// NDEBUG mode. In this case we need the statements to be executed and the macro
-// must accept a streamed message even though the message is never printed.
-// The regex object is not evaluated, but it is used to prevent "unused"
-// warnings and to avoid an expression that doesn't compile in debug mode.
-#define GTEST_EXECUTE_STATEMENT_(statement, regex_or_matcher)    \
-  GTEST_AMBIGUOUS_ELSE_BLOCKER_                                  \
-  if (::testing::internal::AlwaysTrue()) {                       \
-    GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement);   \
-  } else if (!::testing::internal::AlwaysTrue()) {               \
-    ::testing::internal::MakeDeathTestMatcher(regex_or_matcher); \
-  } else                                                         \
-    ::testing::Message()
-
-// A class representing the parsed contents of the
-// --gtest_internal_run_death_test flag, as it existed when
-// RUN_ALL_TESTS was called.
-class InternalRunDeathTestFlag {
- public:
-  InternalRunDeathTestFlag(const std::string& a_file, int a_line, int an_index,
-                           int a_write_fd)
-      : file_(a_file), line_(a_line), index_(an_index), write_fd_(a_write_fd) {}
-
-  ~InternalRunDeathTestFlag() {
-    if (write_fd_ >= 0) posix::Close(write_fd_);
-  }
-
-  const std::string& file() const { return file_; }
-  int line() const { return line_; }
-  int index() const { return index_; }
-  int write_fd() const { return write_fd_; }
-
- private:
-  std::string file_;
-  int line_;
-  int index_;
-  int write_fd_;
-
-  InternalRunDeathTestFlag(const InternalRunDeathTestFlag&) = delete;
-  InternalRunDeathTestFlag& operator=(const InternalRunDeathTestFlag&) = delete;
-};
-
-// Returns a newly created InternalRunDeathTestFlag object with fields
-// initialized from the GTEST_FLAG(internal_run_death_test) flag if
-// the flag is specified; otherwise returns NULL.
-InternalRunDeathTestFlag* ParseInternalRunDeathTestFlag();
-
-#endif  // GTEST_HAS_DEATH_TEST
-
-}  // namespace internal
-}  // namespace testing
-
-#endif  // GOOGLETEST_INCLUDE_GTEST_INTERNAL_GTEST_DEATH_TEST_INTERNAL_H_
diff --git a/3rdparty/googletest-1.13.0/googletest/include/gtest/internal/gtest-filepath.h b/3rdparty/googletest-1.13.0/googletest/include/gtest/internal/gtest-filepath.h
deleted file mode 100644
index 5189c81dabfa6687f362e6bcafc2662e3dec3196..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/include/gtest/internal/gtest-filepath.h
+++ /dev/null
@@ -1,227 +0,0 @@
-// Copyright 2008, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// Google Test filepath utilities
-//
-// This header file declares classes and functions used internally by
-// Google Test.  They are subject to change without notice.
-//
-// This file is #included in gtest/internal/gtest-internal.h.
-// Do not include this header file separately!
-
-// IWYU pragma: private, include "gtest/gtest.h"
-// IWYU pragma: friend gtest/.*
-// IWYU pragma: friend gmock/.*
-
-#ifndef GOOGLETEST_INCLUDE_GTEST_INTERNAL_GTEST_FILEPATH_H_
-#define GOOGLETEST_INCLUDE_GTEST_INTERNAL_GTEST_FILEPATH_H_
-
-#include <string>
-
-#include "gtest/internal/gtest-port.h"
-#include "gtest/internal/gtest-string.h"
-
-GTEST_DISABLE_MSC_WARNINGS_PUSH_(4251 \
-/* class A needs to have dll-interface to be used by clients of class B */)
-
-#if GTEST_HAS_FILE_SYSTEM
-
-namespace testing {
-namespace internal {
-
-// FilePath - a class for file and directory pathname manipulation which
-// handles platform-specific conventions (like the pathname separator).
-// Used for helper functions for naming files in a directory for xml output.
-// Except for Set methods, all methods are const or static, which provides an
-// "immutable value object" -- useful for peace of mind.
-// A FilePath with a value ending in a path separator ("like/this/") represents
-// a directory, otherwise it is assumed to represent a file. In either case,
-// it may or may not represent an actual file or directory in the file system.
-// Names are NOT checked for syntax correctness -- no checking for illegal
-// characters, malformed paths, etc.
-
-class GTEST_API_ FilePath {
- public:
-  FilePath() : pathname_("") {}
-  FilePath(const FilePath& rhs) : pathname_(rhs.pathname_) {}
-
-  explicit FilePath(const std::string& pathname) : pathname_(pathname) {
-    Normalize();
-  }
-
-  FilePath& operator=(const FilePath& rhs) {
-    Set(rhs);
-    return *this;
-  }
-
-  void Set(const FilePath& rhs) { pathname_ = rhs.pathname_; }
-
-  const std::string& string() const { return pathname_; }
-  const char* c_str() const { return pathname_.c_str(); }
-
-  // Returns the current working directory, or "" if unsuccessful.
-  static FilePath GetCurrentDir();
-
-  // Given directory = "dir", base_name = "test", number = 0,
-  // extension = "xml", returns "dir/test.xml". If number is greater
-  // than zero (e.g., 12), returns "dir/test_12.xml".
-  // On Windows platform, uses \ as the separator rather than /.
-  static FilePath MakeFileName(const FilePath& directory,
-                               const FilePath& base_name, int number,
-                               const char* extension);
-
-  // Given directory = "dir", relative_path = "test.xml",
-  // returns "dir/test.xml".
-  // On Windows, uses \ as the separator rather than /.
-  static FilePath ConcatPaths(const FilePath& directory,
-                              const FilePath& relative_path);
-
-  // Returns a pathname for a file that does not currently exist. The pathname
-  // will be directory/base_name.extension or
-  // directory/base_name_<number>.extension if directory/base_name.extension
-  // already exists. The number will be incremented until a pathname is found
-  // that does not already exist.
-  // Examples: 'dir/foo_test.xml' or 'dir/foo_test_1.xml'.
-  // There could be a race condition if two or more processes are calling this
-  // function at the same time -- they could both pick the same filename.
-  static FilePath GenerateUniqueFileName(const FilePath& directory,
-                                         const FilePath& base_name,
-                                         const char* extension);
-
-  // Returns true if and only if the path is "".
-  bool IsEmpty() const { return pathname_.empty(); }
-
-  // If input name has a trailing separator character, removes it and returns
-  // the name, otherwise return the name string unmodified.
-  // On Windows platform, uses \ as the separator, other platforms use /.
-  FilePath RemoveTrailingPathSeparator() const;
-
-  // Returns a copy of the FilePath with the directory part removed.
-  // Example: FilePath("path/to/file").RemoveDirectoryName() returns
-  // FilePath("file"). If there is no directory part ("just_a_file"), it returns
-  // the FilePath unmodified. If there is no file part ("just_a_dir/") it
-  // returns an empty FilePath ("").
-  // On Windows platform, '\' is the path separator, otherwise it is '/'.
-  FilePath RemoveDirectoryName() const;
-
-  // RemoveFileName returns the directory path with the filename removed.
-  // Example: FilePath("path/to/file").RemoveFileName() returns "path/to/".
-  // If the FilePath is "a_file" or "/a_file", RemoveFileName returns
-  // FilePath("./") or, on Windows, FilePath(".\\"). If the filepath does
-  // not have a file, like "just/a/dir/", it returns the FilePath unmodified.
-  // On Windows platform, '\' is the path separator, otherwise it is '/'.
-  FilePath RemoveFileName() const;
-
-  // Returns a copy of the FilePath with the case-insensitive extension removed.
-  // Example: FilePath("dir/file.exe").RemoveExtension("EXE") returns
-  // FilePath("dir/file"). If a case-insensitive extension is not
-  // found, returns a copy of the original FilePath.
-  FilePath RemoveExtension(const char* extension) const;
-
-  // Creates directories so that path exists. Returns true if successful or if
-  // the directories already exist; returns false if unable to create
-  // directories for any reason. Will also return false if the FilePath does
-  // not represent a directory (that is, it doesn't end with a path separator).
-  bool CreateDirectoriesRecursively() const;
-
-  // Create the directory so that path exists. Returns true if successful or
-  // if the directory already exists; returns false if unable to create the
-  // directory for any reason, including if the parent directory does not
-  // exist. Not named "CreateDirectory" because that's a macro on Windows.
-  bool CreateFolder() const;
-
-  // Returns true if FilePath describes something in the file-system,
-  // either a file, directory, or whatever, and that something exists.
-  bool FileOrDirectoryExists() const;
-
-  // Returns true if pathname describes a directory in the file-system
-  // that exists.
-  bool DirectoryExists() const;
-
-  // Returns true if FilePath ends with a path separator, which indicates that
-  // it is intended to represent a directory. Returns false otherwise.
-  // This does NOT check that a directory (or file) actually exists.
-  bool IsDirectory() const;
-
-  // Returns true if pathname describes a root directory. (Windows has one
-  // root directory per disk drive.)
-  bool IsRootDirectory() const;
-
-  // Returns true if pathname describes an absolute path.
-  bool IsAbsolutePath() const;
-
- private:
-  // Replaces multiple consecutive separators with a single separator.
-  // For example, "bar///foo" becomes "bar/foo". Does not eliminate other
-  // redundancies that might be in a pathname involving "." or "..".
-  //
-  // A pathname with multiple consecutive separators may occur either through
-  // user error or as a result of some scripts or APIs that generate a pathname
-  // with a trailing separator. On other platforms the same API or script
-  // may NOT generate a pathname with a trailing "/". Then elsewhere that
-  // pathname may have another "/" and pathname components added to it,
-  // without checking for the separator already being there.
-  // The script language and operating system may allow paths like "foo//bar"
-  // but some of the functions in FilePath will not handle that correctly. In
-  // particular, RemoveTrailingPathSeparator() only removes one separator, and
-  // it is called in CreateDirectoriesRecursively() assuming that it will change
-  // a pathname from directory syntax (trailing separator) to filename syntax.
-  //
-  // On Windows this method also replaces the alternate path separator '/' with
-  // the primary path separator '\\', so that for example "bar\\/\\foo" becomes
-  // "bar\\foo".
-
-  void Normalize();
-
-  // Returns a pointer to the last occurrence of a valid path separator in
-  // the FilePath. On Windows, for example, both '/' and '\' are valid path
-  // separators. Returns NULL if no path separator was found.
-  const char* FindLastPathSeparator() const;
-
-  // Returns the length of the path root, including the directory separator at
-  // the end of the prefix. Returns zero by definition if the path is relative.
-  // Examples:
-  // - [Windows] "..\Sibling" => 0
-  // - [Windows] "\Windows" => 1
-  // - [Windows] "C:/Windows\Notepad.exe" => 3
-  // - [Windows] "\\Host\Share\C$/Windows" => 13
-  // - [UNIX] "/bin" => 1
-  size_t CalculateRootLength() const;
-
-  std::string pathname_;
-};  // class FilePath
-
-}  // namespace internal
-}  // namespace testing
-
-GTEST_DISABLE_MSC_WARNINGS_POP_()  //  4251
-
-#endif  // GTEST_HAS_FILE_SYSTEM
-
-#endif  // GOOGLETEST_INCLUDE_GTEST_INTERNAL_GTEST_FILEPATH_H_
diff --git a/3rdparty/googletest-1.13.0/googletest/include/gtest/internal/gtest-internal.h b/3rdparty/googletest-1.13.0/googletest/include/gtest/internal/gtest-internal.h
deleted file mode 100644
index 3121d428b398447602b495599b0c5f4a12a74d1d..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/include/gtest/internal/gtest-internal.h
+++ /dev/null
@@ -1,1571 +0,0 @@
-// Copyright 2005, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// The Google C++ Testing and Mocking Framework (Google Test)
-//
-// This header file declares functions and macros used internally by
-// Google Test.  They are subject to change without notice.
-
-// IWYU pragma: private, include "gtest/gtest.h"
-// IWYU pragma: friend gtest/.*
-// IWYU pragma: friend gmock/.*
-
-#ifndef GOOGLETEST_INCLUDE_GTEST_INTERNAL_GTEST_INTERNAL_H_
-#define GOOGLETEST_INCLUDE_GTEST_INTERNAL_GTEST_INTERNAL_H_
-
-#include "gtest/internal/gtest-port.h"
-
-#if GTEST_OS_LINUX
-#include <stdlib.h>
-#include <sys/types.h>
-#include <sys/wait.h>
-#include <unistd.h>
-#endif  // GTEST_OS_LINUX
-
-#if GTEST_HAS_EXCEPTIONS
-#include <stdexcept>
-#endif
-
-#include <ctype.h>
-#include <float.h>
-#include <string.h>
-
-#include <cstdint>
-#include <functional>
-#include <iomanip>
-#include <limits>
-#include <map>
-#include <set>
-#include <string>
-#include <type_traits>
-#include <utility>
-#include <vector>
-
-#include "gtest/gtest-message.h"
-#include "gtest/internal/gtest-filepath.h"
-#include "gtest/internal/gtest-string.h"
-#include "gtest/internal/gtest-type-util.h"
-
-// Due to C++ preprocessor weirdness, we need double indirection to
-// concatenate two tokens when one of them is __LINE__.  Writing
-//
-//   foo ## __LINE__
-//
-// will result in the token foo__LINE__, instead of foo followed by
-// the current line number.  For more details, see
-// http://www.parashift.com/c++-faq-lite/misc-technical-issues.html#faq-39.6
-#define GTEST_CONCAT_TOKEN_(foo, bar) GTEST_CONCAT_TOKEN_IMPL_(foo, bar)
-#define GTEST_CONCAT_TOKEN_IMPL_(foo, bar) foo##bar
-
-// Stringifies its argument.
-// Work around a bug in visual studio which doesn't accept code like this:
-//
-//   #define GTEST_STRINGIFY_(name) #name
-//   #define MACRO(a, b, c) ... GTEST_STRINGIFY_(a) ...
-//   MACRO(, x, y)
-//
-// Complaining about the argument to GTEST_STRINGIFY_ being empty.
-// This is allowed by the spec.
-#define GTEST_STRINGIFY_HELPER_(name, ...) #name
-#define GTEST_STRINGIFY_(...) GTEST_STRINGIFY_HELPER_(__VA_ARGS__, )
-
-namespace proto2 {
-class MessageLite;
-}
-
-namespace testing {
-
-// Forward declarations.
-
-class AssertionResult;  // Result of an assertion.
-class Message;          // Represents a failure message.
-class Test;             // Represents a test.
-class TestInfo;         // Information about a test.
-class TestPartResult;   // Result of a test part.
-class UnitTest;         // A collection of test suites.
-
-template <typename T>
-::std::string PrintToString(const T& value);
-
-namespace internal {
-
-struct TraceInfo;    // Information about a trace point.
-class TestInfoImpl;  // Opaque implementation of TestInfo
-class UnitTestImpl;  // Opaque implementation of UnitTest
-
-// The text used in failure messages to indicate the start of the
-// stack trace.
-GTEST_API_ extern const char kStackTraceMarker[];
-
-// An IgnoredValue object can be implicitly constructed from ANY value.
-class IgnoredValue {
-  struct Sink {};
-
- public:
-  // This constructor template allows any value to be implicitly
-  // converted to IgnoredValue.  The object has no data member and
-  // doesn't try to remember anything about the argument.  We
-  // deliberately omit the 'explicit' keyword in order to allow the
-  // conversion to be implicit.
-  // Disable the conversion if T already has a magical conversion operator.
-  // Otherwise we get ambiguity.
-  template <typename T,
-            typename std::enable_if<!std::is_convertible<T, Sink>::value,
-                                    int>::type = 0>
-  IgnoredValue(const T& /* ignored */) {}  // NOLINT(runtime/explicit)
-};
-
-// Appends the user-supplied message to the Google-Test-generated message.
-GTEST_API_ std::string AppendUserMessage(const std::string& gtest_msg,
-                                         const Message& user_msg);
-
-#if GTEST_HAS_EXCEPTIONS
-
-GTEST_DISABLE_MSC_WARNINGS_PUSH_(
-    4275 /* an exported class was derived from a class that was not exported */)
-
-// This exception is thrown by (and only by) a failed Google Test
-// assertion when GTEST_FLAG(throw_on_failure) is true (if exceptions
-// are enabled).  We derive it from std::runtime_error, which is for
-// errors presumably detectable only at run time.  Since
-// std::runtime_error inherits from std::exception, many testing
-// frameworks know how to extract and print the message inside it.
-class GTEST_API_ GoogleTestFailureException : public ::std::runtime_error {
- public:
-  explicit GoogleTestFailureException(const TestPartResult& failure);
-};
-
-GTEST_DISABLE_MSC_WARNINGS_POP_()  //  4275
-
-#endif  // GTEST_HAS_EXCEPTIONS
-
-namespace edit_distance {
-// Returns the optimal edits to go from 'left' to 'right'.
-// All edits cost the same, with replace having lower priority than
-// add/remove.
-// Simple implementation of the Wagner-Fischer algorithm.
-// See http://en.wikipedia.org/wiki/Wagner-Fischer_algorithm
-enum EditType { kMatch, kAdd, kRemove, kReplace };
-GTEST_API_ std::vector<EditType> CalculateOptimalEdits(
-    const std::vector<size_t>& left, const std::vector<size_t>& right);
-
-// Same as above, but the input is represented as strings.
-GTEST_API_ std::vector<EditType> CalculateOptimalEdits(
-    const std::vector<std::string>& left,
-    const std::vector<std::string>& right);
-
-// Create a diff of the input strings in Unified diff format.
-GTEST_API_ std::string CreateUnifiedDiff(const std::vector<std::string>& left,
-                                         const std::vector<std::string>& right,
-                                         size_t context = 2);
-
-}  // namespace edit_distance
-
-// Constructs and returns the message for an equality assertion
-// (e.g. ASSERT_EQ, EXPECT_STREQ, etc) failure.
-//
-// The first four parameters are the expressions used in the assertion
-// and their values, as strings.  For example, for ASSERT_EQ(foo, bar)
-// where foo is 5 and bar is 6, we have:
-//
-//   expected_expression: "foo"
-//   actual_expression:   "bar"
-//   expected_value:      "5"
-//   actual_value:        "6"
-//
-// The ignoring_case parameter is true if and only if the assertion is a
-// *_STRCASEEQ*.  When it's true, the string " (ignoring case)" will
-// be inserted into the message.
-GTEST_API_ AssertionResult EqFailure(const char* expected_expression,
-                                     const char* actual_expression,
-                                     const std::string& expected_value,
-                                     const std::string& actual_value,
-                                     bool ignoring_case);
-
-// Constructs a failure message for Boolean assertions such as EXPECT_TRUE.
-GTEST_API_ std::string GetBoolAssertionFailureMessage(
-    const AssertionResult& assertion_result, const char* expression_text,
-    const char* actual_predicate_value, const char* expected_predicate_value);
-
-// This template class represents an IEEE floating-point number
-// (either single-precision or double-precision, depending on the
-// template parameters).
-//
-// The purpose of this class is to do more sophisticated number
-// comparison.  (Due to round-off error, etc, it's very unlikely that
-// two floating-points will be equal exactly.  Hence a naive
-// comparison by the == operation often doesn't work.)
-//
-// Format of IEEE floating-point:
-//
-//   The most-significant bit being the leftmost, an IEEE
-//   floating-point looks like
-//
-//     sign_bit exponent_bits fraction_bits
-//
-//   Here, sign_bit is a single bit that designates the sign of the
-//   number.
-//
-//   For float, there are 8 exponent bits and 23 fraction bits.
-//
-//   For double, there are 11 exponent bits and 52 fraction bits.
-//
-//   More details can be found at
-//   http://en.wikipedia.org/wiki/IEEE_floating-point_standard.
-//
-// Template parameter:
-//
-//   RawType: the raw floating-point type (either float or double)
-template <typename RawType>
-class FloatingPoint {
- public:
-  // Defines the unsigned integer type that has the same size as the
-  // floating point number.
-  typedef typename TypeWithSize<sizeof(RawType)>::UInt Bits;
-
-  // Constants.
-
-  // # of bits in a number.
-  static const size_t kBitCount = 8 * sizeof(RawType);
-
-  // # of fraction bits in a number.
-  static const size_t kFractionBitCount =
-      std::numeric_limits<RawType>::digits - 1;
-
-  // # of exponent bits in a number.
-  static const size_t kExponentBitCount = kBitCount - 1 - kFractionBitCount;
-
-  // The mask for the sign bit.
-  static const Bits kSignBitMask = static_cast<Bits>(1) << (kBitCount - 1);
-
-  // The mask for the fraction bits.
-  static const Bits kFractionBitMask = ~static_cast<Bits>(0) >>
-                                       (kExponentBitCount + 1);
-
-  // The mask for the exponent bits.
-  static const Bits kExponentBitMask = ~(kSignBitMask | kFractionBitMask);
-
-  // How many ULP's (Units in the Last Place) we want to tolerate when
-  // comparing two numbers.  The larger the value, the more error we
-  // allow.  A 0 value means that two numbers must be exactly the same
-  // to be considered equal.
-  //
-  // The maximum error of a single floating-point operation is 0.5
-  // units in the last place.  On Intel CPU's, all floating-point
-  // calculations are done with 80-bit precision, while double has 64
-  // bits.  Therefore, 4 should be enough for ordinary use.
-  //
-  // See the following article for more details on ULP:
-  // http://randomascii.wordpress.com/2012/02/25/comparing-floating-point-numbers-2012-edition/
-  static const uint32_t kMaxUlps = 4;
-
-  // Constructs a FloatingPoint from a raw floating-point number.
-  //
-  // On an Intel CPU, passing a non-normalized NAN (Not a Number)
-  // around may change its bits, although the new value is guaranteed
-  // to be also a NAN.  Therefore, don't expect this constructor to
-  // preserve the bits in x when x is a NAN.
-  explicit FloatingPoint(const RawType& x) { u_.value_ = x; }
-
-  // Static methods
-
-  // Reinterprets a bit pattern as a floating-point number.
-  //
-  // This function is needed to test the AlmostEquals() method.
-  static RawType ReinterpretBits(const Bits bits) {
-    FloatingPoint fp(0);
-    fp.u_.bits_ = bits;
-    return fp.u_.value_;
-  }
-
-  // Returns the floating-point number that represent positive infinity.
-  static RawType Infinity() { return ReinterpretBits(kExponentBitMask); }
-
-  // Returns the maximum representable finite floating-point number.
-  static RawType Max();
-
-  // Non-static methods
-
-  // Returns the bits that represents this number.
-  const Bits& bits() const { return u_.bits_; }
-
-  // Returns the exponent bits of this number.
-  Bits exponent_bits() const { return kExponentBitMask & u_.bits_; }
-
-  // Returns the fraction bits of this number.
-  Bits fraction_bits() const { return kFractionBitMask & u_.bits_; }
-
-  // Returns the sign bit of this number.
-  Bits sign_bit() const { return kSignBitMask & u_.bits_; }
-
-  // Returns true if and only if this is NAN (not a number).
-  bool is_nan() const {
-    // It's a NAN if the exponent bits are all ones and the fraction
-    // bits are not entirely zeros.
-    return (exponent_bits() == kExponentBitMask) && (fraction_bits() != 0);
-  }
-
-  // Returns true if and only if this number is at most kMaxUlps ULP's away
-  // from rhs.  In particular, this function:
-  //
-  //   - returns false if either number is (or both are) NAN.
-  //   - treats really large numbers as almost equal to infinity.
-  //   - thinks +0.0 and -0.0 are 0 DLP's apart.
-  bool AlmostEquals(const FloatingPoint& rhs) const {
-    // The IEEE standard says that any comparison operation involving
-    // a NAN must return false.
-    if (is_nan() || rhs.is_nan()) return false;
-
-    return DistanceBetweenSignAndMagnitudeNumbers(u_.bits_, rhs.u_.bits_) <=
-           kMaxUlps;
-  }
-
- private:
-  // The data type used to store the actual floating-point number.
-  union FloatingPointUnion {
-    RawType value_;  // The raw floating-point number.
-    Bits bits_;      // The bits that represent the number.
-  };
-
-  // Converts an integer from the sign-and-magnitude representation to
-  // the biased representation.  More precisely, let N be 2 to the
-  // power of (kBitCount - 1), an integer x is represented by the
-  // unsigned number x + N.
-  //
-  // For instance,
-  //
-  //   -N + 1 (the most negative number representable using
-  //          sign-and-magnitude) is represented by 1;
-  //   0      is represented by N; and
-  //   N - 1  (the biggest number representable using
-  //          sign-and-magnitude) is represented by 2N - 1.
-  //
-  // Read http://en.wikipedia.org/wiki/Signed_number_representations
-  // for more details on signed number representations.
-  static Bits SignAndMagnitudeToBiased(const Bits& sam) {
-    if (kSignBitMask & sam) {
-      // sam represents a negative number.
-      return ~sam + 1;
-    } else {
-      // sam represents a positive number.
-      return kSignBitMask | sam;
-    }
-  }
-
-  // Given two numbers in the sign-and-magnitude representation,
-  // returns the distance between them as an unsigned number.
-  static Bits DistanceBetweenSignAndMagnitudeNumbers(const Bits& sam1,
-                                                     const Bits& sam2) {
-    const Bits biased1 = SignAndMagnitudeToBiased(sam1);
-    const Bits biased2 = SignAndMagnitudeToBiased(sam2);
-    return (biased1 >= biased2) ? (biased1 - biased2) : (biased2 - biased1);
-  }
-
-  FloatingPointUnion u_;
-};
-
-// We cannot use std::numeric_limits<T>::max() as it clashes with the max()
-// macro defined by <windows.h>.
-template <>
-inline float FloatingPoint<float>::Max() {
-  return FLT_MAX;
-}
-template <>
-inline double FloatingPoint<double>::Max() {
-  return DBL_MAX;
-}
-
-// Typedefs the instances of the FloatingPoint template class that we
-// care to use.
-typedef FloatingPoint<float> Float;
-typedef FloatingPoint<double> Double;
-
-// In order to catch the mistake of putting tests that use different
-// test fixture classes in the same test suite, we need to assign
-// unique IDs to fixture classes and compare them.  The TypeId type is
-// used to hold such IDs.  The user should treat TypeId as an opaque
-// type: the only operation allowed on TypeId values is to compare
-// them for equality using the == operator.
-typedef const void* TypeId;
-
-template <typename T>
-class TypeIdHelper {
- public:
-  // dummy_ must not have a const type.  Otherwise an overly eager
-  // compiler (e.g. MSVC 7.1 & 8.0) may try to merge
-  // TypeIdHelper<T>::dummy_ for different Ts as an "optimization".
-  static bool dummy_;
-};
-
-template <typename T>
-bool TypeIdHelper<T>::dummy_ = false;
-
-// GetTypeId<T>() returns the ID of type T.  Different values will be
-// returned for different types.  Calling the function twice with the
-// same type argument is guaranteed to return the same ID.
-template <typename T>
-TypeId GetTypeId() {
-  // The compiler is required to allocate a different
-  // TypeIdHelper<T>::dummy_ variable for each T used to instantiate
-  // the template.  Therefore, the address of dummy_ is guaranteed to
-  // be unique.
-  return &(TypeIdHelper<T>::dummy_);
-}
-
-// Returns the type ID of ::testing::Test.  Always call this instead
-// of GetTypeId< ::testing::Test>() to get the type ID of
-// ::testing::Test, as the latter may give the wrong result due to a
-// suspected linker bug when compiling Google Test as a Mac OS X
-// framework.
-GTEST_API_ TypeId GetTestTypeId();
-
-// Defines the abstract factory interface that creates instances
-// of a Test object.
-class TestFactoryBase {
- public:
-  virtual ~TestFactoryBase() {}
-
-  // Creates a test instance to run. The instance is both created and destroyed
-  // within TestInfoImpl::Run()
-  virtual Test* CreateTest() = 0;
-
- protected:
-  TestFactoryBase() {}
-
- private:
-  TestFactoryBase(const TestFactoryBase&) = delete;
-  TestFactoryBase& operator=(const TestFactoryBase&) = delete;
-};
-
-// This class provides implementation of TestFactoryBase interface.
-// It is used in TEST and TEST_F macros.
-template <class TestClass>
-class TestFactoryImpl : public TestFactoryBase {
- public:
-  Test* CreateTest() override { return new TestClass; }
-};
-
-#if GTEST_OS_WINDOWS
-
-// Predicate-formatters for implementing the HRESULT checking macros
-// {ASSERT|EXPECT}_HRESULT_{SUCCEEDED|FAILED}
-// We pass a long instead of HRESULT to avoid causing an
-// include dependency for the HRESULT type.
-GTEST_API_ AssertionResult IsHRESULTSuccess(const char* expr,
-                                            long hr);  // NOLINT
-GTEST_API_ AssertionResult IsHRESULTFailure(const char* expr,
-                                            long hr);  // NOLINT
-
-#endif  // GTEST_OS_WINDOWS
-
-// Types of SetUpTestSuite() and TearDownTestSuite() functions.
-using SetUpTestSuiteFunc = void (*)();
-using TearDownTestSuiteFunc = void (*)();
-
-struct CodeLocation {
-  CodeLocation(const std::string& a_file, int a_line)
-      : file(a_file), line(a_line) {}
-
-  std::string file;
-  int line;
-};
-
-//  Helper to identify which setup function for TestCase / TestSuite to call.
-//  Only one function is allowed, either TestCase or TestSute but not both.
-
-// Utility functions to help SuiteApiResolver
-using SetUpTearDownSuiteFuncType = void (*)();
-
-inline SetUpTearDownSuiteFuncType GetNotDefaultOrNull(
-    SetUpTearDownSuiteFuncType a, SetUpTearDownSuiteFuncType def) {
-  return a == def ? nullptr : a;
-}
-
-template <typename T>
-//  Note that SuiteApiResolver inherits from T because
-//  SetUpTestSuite()/TearDownTestSuite() could be protected. This way
-//  SuiteApiResolver can access them.
-struct SuiteApiResolver : T {
-  // testing::Test is only forward declared at this point. So we make it a
-  // dependent class for the compiler to be OK with it.
-  using Test =
-      typename std::conditional<sizeof(T) != 0, ::testing::Test, void>::type;
-
-  static SetUpTearDownSuiteFuncType GetSetUpCaseOrSuite(const char* filename,
-                                                        int line_num) {
-#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
-    SetUpTearDownSuiteFuncType test_case_fp =
-        GetNotDefaultOrNull(&T::SetUpTestCase, &Test::SetUpTestCase);
-    SetUpTearDownSuiteFuncType test_suite_fp =
-        GetNotDefaultOrNull(&T::SetUpTestSuite, &Test::SetUpTestSuite);
-
-    GTEST_CHECK_(!test_case_fp || !test_suite_fp)
-        << "Test can not provide both SetUpTestSuite and SetUpTestCase, please "
-           "make sure there is only one present at "
-        << filename << ":" << line_num;
-
-    return test_case_fp != nullptr ? test_case_fp : test_suite_fp;
-#else
-    (void)(filename);
-    (void)(line_num);
-    return &T::SetUpTestSuite;
-#endif
-  }
-
-  static SetUpTearDownSuiteFuncType GetTearDownCaseOrSuite(const char* filename,
-                                                           int line_num) {
-#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
-    SetUpTearDownSuiteFuncType test_case_fp =
-        GetNotDefaultOrNull(&T::TearDownTestCase, &Test::TearDownTestCase);
-    SetUpTearDownSuiteFuncType test_suite_fp =
-        GetNotDefaultOrNull(&T::TearDownTestSuite, &Test::TearDownTestSuite);
-
-    GTEST_CHECK_(!test_case_fp || !test_suite_fp)
-        << "Test can not provide both TearDownTestSuite and TearDownTestCase,"
-           " please make sure there is only one present at"
-        << filename << ":" << line_num;
-
-    return test_case_fp != nullptr ? test_case_fp : test_suite_fp;
-#else
-    (void)(filename);
-    (void)(line_num);
-    return &T::TearDownTestSuite;
-#endif
-  }
-};
-
-// Creates a new TestInfo object and registers it with Google Test;
-// returns the created object.
-//
-// Arguments:
-//
-//   test_suite_name:  name of the test suite
-//   name:             name of the test
-//   type_param:       the name of the test's type parameter, or NULL if
-//                     this is not a typed or a type-parameterized test.
-//   value_param:      text representation of the test's value parameter,
-//                     or NULL if this is not a type-parameterized test.
-//   code_location:    code location where the test is defined
-//   fixture_class_id: ID of the test fixture class
-//   set_up_tc:        pointer to the function that sets up the test suite
-//   tear_down_tc:     pointer to the function that tears down the test suite
-//   factory:          pointer to the factory that creates a test object.
-//                     The newly created TestInfo instance will assume
-//                     ownership of the factory object.
-GTEST_API_ TestInfo* MakeAndRegisterTestInfo(
-    const char* test_suite_name, const char* name, const char* type_param,
-    const char* value_param, CodeLocation code_location,
-    TypeId fixture_class_id, SetUpTestSuiteFunc set_up_tc,
-    TearDownTestSuiteFunc tear_down_tc, TestFactoryBase* factory);
-
-// If *pstr starts with the given prefix, modifies *pstr to be right
-// past the prefix and returns true; otherwise leaves *pstr unchanged
-// and returns false.  None of pstr, *pstr, and prefix can be NULL.
-GTEST_API_ bool SkipPrefix(const char* prefix, const char** pstr);
-
-GTEST_DISABLE_MSC_WARNINGS_PUSH_(4251 \
-/* class A needs to have dll-interface to be used by clients of class B */)
-
-// State of the definition of a type-parameterized test suite.
-class GTEST_API_ TypedTestSuitePState {
- public:
-  TypedTestSuitePState() : registered_(false) {}
-
-  // Adds the given test name to defined_test_names_ and return true
-  // if the test suite hasn't been registered; otherwise aborts the
-  // program.
-  bool AddTestName(const char* file, int line, const char* case_name,
-                   const char* test_name) {
-    if (registered_) {
-      fprintf(stderr,
-              "%s Test %s must be defined before "
-              "REGISTER_TYPED_TEST_SUITE_P(%s, ...).\n",
-              FormatFileLocation(file, line).c_str(), test_name, case_name);
-      fflush(stderr);
-      posix::Abort();
-    }
-    registered_tests_.insert(
-        ::std::make_pair(test_name, CodeLocation(file, line)));
-    return true;
-  }
-
-  bool TestExists(const std::string& test_name) const {
-    return registered_tests_.count(test_name) > 0;
-  }
-
-  const CodeLocation& GetCodeLocation(const std::string& test_name) const {
-    RegisteredTestsMap::const_iterator it = registered_tests_.find(test_name);
-    GTEST_CHECK_(it != registered_tests_.end());
-    return it->second;
-  }
-
-  // Verifies that registered_tests match the test names in
-  // defined_test_names_; returns registered_tests if successful, or
-  // aborts the program otherwise.
-  const char* VerifyRegisteredTestNames(const char* test_suite_name,
-                                        const char* file, int line,
-                                        const char* registered_tests);
-
- private:
-  typedef ::std::map<std::string, CodeLocation, std::less<>> RegisteredTestsMap;
-
-  bool registered_;
-  RegisteredTestsMap registered_tests_;
-};
-
-//  Legacy API is deprecated but still available
-#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
-using TypedTestCasePState = TypedTestSuitePState;
-#endif  //  GTEST_REMOVE_LEGACY_TEST_CASEAPI_
-
-GTEST_DISABLE_MSC_WARNINGS_POP_()  //  4251
-
-// Skips to the first non-space char after the first comma in 'str';
-// returns NULL if no comma is found in 'str'.
-inline const char* SkipComma(const char* str) {
-  const char* comma = strchr(str, ',');
-  if (comma == nullptr) {
-    return nullptr;
-  }
-  while (IsSpace(*(++comma))) {
-  }
-  return comma;
-}
-
-// Returns the prefix of 'str' before the first comma in it; returns
-// the entire string if it contains no comma.
-inline std::string GetPrefixUntilComma(const char* str) {
-  const char* comma = strchr(str, ',');
-  return comma == nullptr ? str : std::string(str, comma);
-}
-
-// Splits a given string on a given delimiter, populating a given
-// vector with the fields.
-void SplitString(const ::std::string& str, char delimiter,
-                 ::std::vector<::std::string>* dest);
-
-// The default argument to the template below for the case when the user does
-// not provide a name generator.
-struct DefaultNameGenerator {
-  template <typename T>
-  static std::string GetName(int i) {
-    return StreamableToString(i);
-  }
-};
-
-template <typename Provided = DefaultNameGenerator>
-struct NameGeneratorSelector {
-  typedef Provided type;
-};
-
-template <typename NameGenerator>
-void GenerateNamesRecursively(internal::None, std::vector<std::string>*, int) {}
-
-template <typename NameGenerator, typename Types>
-void GenerateNamesRecursively(Types, std::vector<std::string>* result, int i) {
-  result->push_back(NameGenerator::template GetName<typename Types::Head>(i));
-  GenerateNamesRecursively<NameGenerator>(typename Types::Tail(), result,
-                                          i + 1);
-}
-
-template <typename NameGenerator, typename Types>
-std::vector<std::string> GenerateNames() {
-  std::vector<std::string> result;
-  GenerateNamesRecursively<NameGenerator>(Types(), &result, 0);
-  return result;
-}
-
-// TypeParameterizedTest<Fixture, TestSel, Types>::Register()
-// registers a list of type-parameterized tests with Google Test.  The
-// return value is insignificant - we just need to return something
-// such that we can call this function in a namespace scope.
-//
-// Implementation note: The GTEST_TEMPLATE_ macro declares a template
-// template parameter.  It's defined in gtest-type-util.h.
-template <GTEST_TEMPLATE_ Fixture, class TestSel, typename Types>
-class TypeParameterizedTest {
- public:
-  // 'index' is the index of the test in the type list 'Types'
-  // specified in INSTANTIATE_TYPED_TEST_SUITE_P(Prefix, TestSuite,
-  // Types).  Valid values for 'index' are [0, N - 1] where N is the
-  // length of Types.
-  static bool Register(const char* prefix, const CodeLocation& code_location,
-                       const char* case_name, const char* test_names, int index,
-                       const std::vector<std::string>& type_names =
-                           GenerateNames<DefaultNameGenerator, Types>()) {
-    typedef typename Types::Head Type;
-    typedef Fixture<Type> FixtureClass;
-    typedef typename GTEST_BIND_(TestSel, Type) TestClass;
-
-    // First, registers the first type-parameterized test in the type
-    // list.
-    MakeAndRegisterTestInfo(
-        (std::string(prefix) + (prefix[0] == '\0' ? "" : "/") + case_name +
-         "/" + type_names[static_cast<size_t>(index)])
-            .c_str(),
-        StripTrailingSpaces(GetPrefixUntilComma(test_names)).c_str(),
-        GetTypeName<Type>().c_str(),
-        nullptr,  // No value parameter.
-        code_location, GetTypeId<FixtureClass>(),
-        SuiteApiResolver<TestClass>::GetSetUpCaseOrSuite(
-            code_location.file.c_str(), code_location.line),
-        SuiteApiResolver<TestClass>::GetTearDownCaseOrSuite(
-            code_location.file.c_str(), code_location.line),
-        new TestFactoryImpl<TestClass>);
-
-    // Next, recurses (at compile time) with the tail of the type list.
-    return TypeParameterizedTest<Fixture, TestSel,
-                                 typename Types::Tail>::Register(prefix,
-                                                                 code_location,
-                                                                 case_name,
-                                                                 test_names,
-                                                                 index + 1,
-                                                                 type_names);
-  }
-};
-
-// The base case for the compile time recursion.
-template <GTEST_TEMPLATE_ Fixture, class TestSel>
-class TypeParameterizedTest<Fixture, TestSel, internal::None> {
- public:
-  static bool Register(const char* /*prefix*/, const CodeLocation&,
-                       const char* /*case_name*/, const char* /*test_names*/,
-                       int /*index*/,
-                       const std::vector<std::string>& =
-                           std::vector<std::string>() /*type_names*/) {
-    return true;
-  }
-};
-
-GTEST_API_ void RegisterTypeParameterizedTestSuite(const char* test_suite_name,
-                                                   CodeLocation code_location);
-GTEST_API_ void RegisterTypeParameterizedTestSuiteInstantiation(
-    const char* case_name);
-
-// TypeParameterizedTestSuite<Fixture, Tests, Types>::Register()
-// registers *all combinations* of 'Tests' and 'Types' with Google
-// Test.  The return value is insignificant - we just need to return
-// something such that we can call this function in a namespace scope.
-template <GTEST_TEMPLATE_ Fixture, typename Tests, typename Types>
-class TypeParameterizedTestSuite {
- public:
-  static bool Register(const char* prefix, CodeLocation code_location,
-                       const TypedTestSuitePState* state, const char* case_name,
-                       const char* test_names,
-                       const std::vector<std::string>& type_names =
-                           GenerateNames<DefaultNameGenerator, Types>()) {
-    RegisterTypeParameterizedTestSuiteInstantiation(case_name);
-    std::string test_name =
-        StripTrailingSpaces(GetPrefixUntilComma(test_names));
-    if (!state->TestExists(test_name)) {
-      fprintf(stderr, "Failed to get code location for test %s.%s at %s.",
-              case_name, test_name.c_str(),
-              FormatFileLocation(code_location.file.c_str(), code_location.line)
-                  .c_str());
-      fflush(stderr);
-      posix::Abort();
-    }
-    const CodeLocation& test_location = state->GetCodeLocation(test_name);
-
-    typedef typename Tests::Head Head;
-
-    // First, register the first test in 'Test' for each type in 'Types'.
-    TypeParameterizedTest<Fixture, Head, Types>::Register(
-        prefix, test_location, case_name, test_names, 0, type_names);
-
-    // Next, recurses (at compile time) with the tail of the test list.
-    return TypeParameterizedTestSuite<Fixture, typename Tests::Tail,
-                                      Types>::Register(prefix, code_location,
-                                                       state, case_name,
-                                                       SkipComma(test_names),
-                                                       type_names);
-  }
-};
-
-// The base case for the compile time recursion.
-template <GTEST_TEMPLATE_ Fixture, typename Types>
-class TypeParameterizedTestSuite<Fixture, internal::None, Types> {
- public:
-  static bool Register(const char* /*prefix*/, const CodeLocation&,
-                       const TypedTestSuitePState* /*state*/,
-                       const char* /*case_name*/, const char* /*test_names*/,
-                       const std::vector<std::string>& =
-                           std::vector<std::string>() /*type_names*/) {
-    return true;
-  }
-};
-
-// Returns the current OS stack trace as an std::string.
-//
-// The maximum number of stack frames to be included is specified by
-// the gtest_stack_trace_depth flag.  The skip_count parameter
-// specifies the number of top frames to be skipped, which doesn't
-// count against the number of frames to be included.
-//
-// For example, if Foo() calls Bar(), which in turn calls
-// GetCurrentOsStackTraceExceptTop(..., 1), Foo() will be included in
-// the trace but Bar() and GetCurrentOsStackTraceExceptTop() won't.
-GTEST_API_ std::string GetCurrentOsStackTraceExceptTop(int skip_count);
-
-// Helpers for suppressing warnings on unreachable code or constant
-// condition.
-
-// Always returns true.
-GTEST_API_ bool AlwaysTrue();
-
-// Always returns false.
-inline bool AlwaysFalse() { return !AlwaysTrue(); }
-
-// Helper for suppressing false warning from Clang on a const char*
-// variable declared in a conditional expression always being NULL in
-// the else branch.
-struct GTEST_API_ ConstCharPtr {
-  ConstCharPtr(const char* str) : value(str) {}
-  operator bool() const { return true; }
-  const char* value;
-};
-
-// Helper for declaring std::string within 'if' statement
-// in pre C++17 build environment.
-struct TrueWithString {
-  TrueWithString() = default;
-  explicit TrueWithString(const char* str) : value(str) {}
-  explicit TrueWithString(const std::string& str) : value(str) {}
-  explicit operator bool() const { return true; }
-  std::string value;
-};
-
-// A simple Linear Congruential Generator for generating random
-// numbers with a uniform distribution.  Unlike rand() and srand(), it
-// doesn't use global state (and therefore can't interfere with user
-// code).  Unlike rand_r(), it's portable.  An LCG isn't very random,
-// but it's good enough for our purposes.
-class GTEST_API_ Random {
- public:
-  static const uint32_t kMaxRange = 1u << 31;
-
-  explicit Random(uint32_t seed) : state_(seed) {}
-
-  void Reseed(uint32_t seed) { state_ = seed; }
-
-  // Generates a random number from [0, range).  Crashes if 'range' is
-  // 0 or greater than kMaxRange.
-  uint32_t Generate(uint32_t range);
-
- private:
-  uint32_t state_;
-  Random(const Random&) = delete;
-  Random& operator=(const Random&) = delete;
-};
-
-// Turns const U&, U&, const U, and U all into U.
-#define GTEST_REMOVE_REFERENCE_AND_CONST_(T) \
-  typename std::remove_const<typename std::remove_reference<T>::type>::type
-
-// HasDebugStringAndShortDebugString<T>::value is a compile-time bool constant
-// that's true if and only if T has methods DebugString() and ShortDebugString()
-// that return std::string.
-template <typename T>
-class HasDebugStringAndShortDebugString {
- private:
-  template <typename C>
-  static auto CheckDebugString(C*) -> typename std::is_same<
-      std::string, decltype(std::declval<const C>().DebugString())>::type;
-  template <typename>
-  static std::false_type CheckDebugString(...);
-
-  template <typename C>
-  static auto CheckShortDebugString(C*) -> typename std::is_same<
-      std::string, decltype(std::declval<const C>().ShortDebugString())>::type;
-  template <typename>
-  static std::false_type CheckShortDebugString(...);
-
-  using HasDebugStringType = decltype(CheckDebugString<T>(nullptr));
-  using HasShortDebugStringType = decltype(CheckShortDebugString<T>(nullptr));
-
- public:
-  static constexpr bool value =
-      HasDebugStringType::value && HasShortDebugStringType::value;
-};
-
-template <typename T>
-constexpr bool HasDebugStringAndShortDebugString<T>::value;
-
-// When the compiler sees expression IsContainerTest<C>(0), if C is an
-// STL-style container class, the first overload of IsContainerTest
-// will be viable (since both C::iterator* and C::const_iterator* are
-// valid types and NULL can be implicitly converted to them).  It will
-// be picked over the second overload as 'int' is a perfect match for
-// the type of argument 0.  If C::iterator or C::const_iterator is not
-// a valid type, the first overload is not viable, and the second
-// overload will be picked.  Therefore, we can determine whether C is
-// a container class by checking the type of IsContainerTest<C>(0).
-// The value of the expression is insignificant.
-//
-// In C++11 mode we check the existence of a const_iterator and that an
-// iterator is properly implemented for the container.
-//
-// For pre-C++11 that we look for both C::iterator and C::const_iterator.
-// The reason is that C++ injects the name of a class as a member of the
-// class itself (e.g. you can refer to class iterator as either
-// 'iterator' or 'iterator::iterator').  If we look for C::iterator
-// only, for example, we would mistakenly think that a class named
-// iterator is an STL container.
-//
-// Also note that the simpler approach of overloading
-// IsContainerTest(typename C::const_iterator*) and
-// IsContainerTest(...) doesn't work with Visual Age C++ and Sun C++.
-typedef int IsContainer;
-template <class C,
-          class Iterator = decltype(::std::declval<const C&>().begin()),
-          class = decltype(::std::declval<const C&>().end()),
-          class = decltype(++::std::declval<Iterator&>()),
-          class = decltype(*::std::declval<Iterator>()),
-          class = typename C::const_iterator>
-IsContainer IsContainerTest(int /* dummy */) {
-  return 0;
-}
-
-typedef char IsNotContainer;
-template <class C>
-IsNotContainer IsContainerTest(long /* dummy */) {
-  return '\0';
-}
-
-// Trait to detect whether a type T is a hash table.
-// The heuristic used is that the type contains an inner type `hasher` and does
-// not contain an inner type `reverse_iterator`.
-// If the container is iterable in reverse, then order might actually matter.
-template <typename T>
-struct IsHashTable {
- private:
-  template <typename U>
-  static char test(typename U::hasher*, typename U::reverse_iterator*);
-  template <typename U>
-  static int test(typename U::hasher*, ...);
-  template <typename U>
-  static char test(...);
-
- public:
-  static const bool value = sizeof(test<T>(nullptr, nullptr)) == sizeof(int);
-};
-
-template <typename T>
-const bool IsHashTable<T>::value;
-
-template <typename C,
-          bool = sizeof(IsContainerTest<C>(0)) == sizeof(IsContainer)>
-struct IsRecursiveContainerImpl;
-
-template <typename C>
-struct IsRecursiveContainerImpl<C, false> : public std::false_type {};
-
-// Since the IsRecursiveContainerImpl depends on the IsContainerTest we need to
-// obey the same inconsistencies as the IsContainerTest, namely check if
-// something is a container is relying on only const_iterator in C++11 and
-// is relying on both const_iterator and iterator otherwise
-template <typename C>
-struct IsRecursiveContainerImpl<C, true> {
-  using value_type = decltype(*std::declval<typename C::const_iterator>());
-  using type =
-      std::is_same<typename std::remove_const<
-                       typename std::remove_reference<value_type>::type>::type,
-                   C>;
-};
-
-// IsRecursiveContainer<Type> is a unary compile-time predicate that
-// evaluates whether C is a recursive container type. A recursive container
-// type is a container type whose value_type is equal to the container type
-// itself. An example for a recursive container type is
-// boost::filesystem::path, whose iterator has a value_type that is equal to
-// boost::filesystem::path.
-template <typename C>
-struct IsRecursiveContainer : public IsRecursiveContainerImpl<C>::type {};
-
-// Utilities for native arrays.
-
-// ArrayEq() compares two k-dimensional native arrays using the
-// elements' operator==, where k can be any integer >= 0.  When k is
-// 0, ArrayEq() degenerates into comparing a single pair of values.
-
-template <typename T, typename U>
-bool ArrayEq(const T* lhs, size_t size, const U* rhs);
-
-// This generic version is used when k is 0.
-template <typename T, typename U>
-inline bool ArrayEq(const T& lhs, const U& rhs) {
-  return lhs == rhs;
-}
-
-// This overload is used when k >= 1.
-template <typename T, typename U, size_t N>
-inline bool ArrayEq(const T (&lhs)[N], const U (&rhs)[N]) {
-  return internal::ArrayEq(lhs, N, rhs);
-}
-
-// This helper reduces code bloat.  If we instead put its logic inside
-// the previous ArrayEq() function, arrays with different sizes would
-// lead to different copies of the template code.
-template <typename T, typename U>
-bool ArrayEq(const T* lhs, size_t size, const U* rhs) {
-  for (size_t i = 0; i != size; i++) {
-    if (!internal::ArrayEq(lhs[i], rhs[i])) return false;
-  }
-  return true;
-}
-
-// Finds the first element in the iterator range [begin, end) that
-// equals elem.  Element may be a native array type itself.
-template <typename Iter, typename Element>
-Iter ArrayAwareFind(Iter begin, Iter end, const Element& elem) {
-  for (Iter it = begin; it != end; ++it) {
-    if (internal::ArrayEq(*it, elem)) return it;
-  }
-  return end;
-}
-
-// CopyArray() copies a k-dimensional native array using the elements'
-// operator=, where k can be any integer >= 0.  When k is 0,
-// CopyArray() degenerates into copying a single value.
-
-template <typename T, typename U>
-void CopyArray(const T* from, size_t size, U* to);
-
-// This generic version is used when k is 0.
-template <typename T, typename U>
-inline void CopyArray(const T& from, U* to) {
-  *to = from;
-}
-
-// This overload is used when k >= 1.
-template <typename T, typename U, size_t N>
-inline void CopyArray(const T (&from)[N], U (*to)[N]) {
-  internal::CopyArray(from, N, *to);
-}
-
-// This helper reduces code bloat.  If we instead put its logic inside
-// the previous CopyArray() function, arrays with different sizes
-// would lead to different copies of the template code.
-template <typename T, typename U>
-void CopyArray(const T* from, size_t size, U* to) {
-  for (size_t i = 0; i != size; i++) {
-    internal::CopyArray(from[i], to + i);
-  }
-}
-
-// The relation between an NativeArray object (see below) and the
-// native array it represents.
-// We use 2 different structs to allow non-copyable types to be used, as long
-// as RelationToSourceReference() is passed.
-struct RelationToSourceReference {};
-struct RelationToSourceCopy {};
-
-// Adapts a native array to a read-only STL-style container.  Instead
-// of the complete STL container concept, this adaptor only implements
-// members useful for Google Mock's container matchers.  New members
-// should be added as needed.  To simplify the implementation, we only
-// support Element being a raw type (i.e. having no top-level const or
-// reference modifier).  It's the client's responsibility to satisfy
-// this requirement.  Element can be an array type itself (hence
-// multi-dimensional arrays are supported).
-template <typename Element>
-class NativeArray {
- public:
-  // STL-style container typedefs.
-  typedef Element value_type;
-  typedef Element* iterator;
-  typedef const Element* const_iterator;
-
-  // Constructs from a native array. References the source.
-  NativeArray(const Element* array, size_t count, RelationToSourceReference) {
-    InitRef(array, count);
-  }
-
-  // Constructs from a native array. Copies the source.
-  NativeArray(const Element* array, size_t count, RelationToSourceCopy) {
-    InitCopy(array, count);
-  }
-
-  // Copy constructor.
-  NativeArray(const NativeArray& rhs) {
-    (this->*rhs.clone_)(rhs.array_, rhs.size_);
-  }
-
-  ~NativeArray() {
-    if (clone_ != &NativeArray::InitRef) delete[] array_;
-  }
-
-  // STL-style container methods.
-  size_t size() const { return size_; }
-  const_iterator begin() const { return array_; }
-  const_iterator end() const { return array_ + size_; }
-  bool operator==(const NativeArray& rhs) const {
-    return size() == rhs.size() && ArrayEq(begin(), size(), rhs.begin());
-  }
-
- private:
-  static_assert(!std::is_const<Element>::value, "Type must not be const");
-  static_assert(!std::is_reference<Element>::value,
-                "Type must not be a reference");
-
-  // Initializes this object with a copy of the input.
-  void InitCopy(const Element* array, size_t a_size) {
-    Element* const copy = new Element[a_size];
-    CopyArray(array, a_size, copy);
-    array_ = copy;
-    size_ = a_size;
-    clone_ = &NativeArray::InitCopy;
-  }
-
-  // Initializes this object with a reference of the input.
-  void InitRef(const Element* array, size_t a_size) {
-    array_ = array;
-    size_ = a_size;
-    clone_ = &NativeArray::InitRef;
-  }
-
-  const Element* array_;
-  size_t size_;
-  void (NativeArray::*clone_)(const Element*, size_t);
-};
-
-// Backport of std::index_sequence.
-template <size_t... Is>
-struct IndexSequence {
-  using type = IndexSequence;
-};
-
-// Double the IndexSequence, and one if plus_one is true.
-template <bool plus_one, typename T, size_t sizeofT>
-struct DoubleSequence;
-template <size_t... I, size_t sizeofT>
-struct DoubleSequence<true, IndexSequence<I...>, sizeofT> {
-  using type = IndexSequence<I..., (sizeofT + I)..., 2 * sizeofT>;
-};
-template <size_t... I, size_t sizeofT>
-struct DoubleSequence<false, IndexSequence<I...>, sizeofT> {
-  using type = IndexSequence<I..., (sizeofT + I)...>;
-};
-
-// Backport of std::make_index_sequence.
-// It uses O(ln(N)) instantiation depth.
-template <size_t N>
-struct MakeIndexSequenceImpl
-    : DoubleSequence<N % 2 == 1, typename MakeIndexSequenceImpl<N / 2>::type,
-                     N / 2>::type {};
-
-template <>
-struct MakeIndexSequenceImpl<0> : IndexSequence<> {};
-
-template <size_t N>
-using MakeIndexSequence = typename MakeIndexSequenceImpl<N>::type;
-
-template <typename... T>
-using IndexSequenceFor = typename MakeIndexSequence<sizeof...(T)>::type;
-
-template <size_t>
-struct Ignore {
-  Ignore(...);  // NOLINT
-};
-
-template <typename>
-struct ElemFromListImpl;
-template <size_t... I>
-struct ElemFromListImpl<IndexSequence<I...>> {
-  // We make Ignore a template to solve a problem with MSVC.
-  // A non-template Ignore would work fine with `decltype(Ignore(I))...`, but
-  // MSVC doesn't understand how to deal with that pack expansion.
-  // Use `0 * I` to have a single instantiation of Ignore.
-  template <typename R>
-  static R Apply(Ignore<0 * I>..., R (*)(), ...);
-};
-
-template <size_t N, typename... T>
-struct ElemFromList {
-  using type =
-      decltype(ElemFromListImpl<typename MakeIndexSequence<N>::type>::Apply(
-          static_cast<T (*)()>(nullptr)...));
-};
-
-struct FlatTupleConstructTag {};
-
-template <typename... T>
-class FlatTuple;
-
-template <typename Derived, size_t I>
-struct FlatTupleElemBase;
-
-template <typename... T, size_t I>
-struct FlatTupleElemBase<FlatTuple<T...>, I> {
-  using value_type = typename ElemFromList<I, T...>::type;
-  FlatTupleElemBase() = default;
-  template <typename Arg>
-  explicit FlatTupleElemBase(FlatTupleConstructTag, Arg&& t)
-      : value(std::forward<Arg>(t)) {}
-  value_type value;
-};
-
-template <typename Derived, typename Idx>
-struct FlatTupleBase;
-
-template <size_t... Idx, typename... T>
-struct FlatTupleBase<FlatTuple<T...>, IndexSequence<Idx...>>
-    : FlatTupleElemBase<FlatTuple<T...>, Idx>... {
-  using Indices = IndexSequence<Idx...>;
-  FlatTupleBase() = default;
-  template <typename... Args>
-  explicit FlatTupleBase(FlatTupleConstructTag, Args&&... args)
-      : FlatTupleElemBase<FlatTuple<T...>, Idx>(FlatTupleConstructTag{},
-                                                std::forward<Args>(args))... {}
-
-  template <size_t I>
-  const typename ElemFromList<I, T...>::type& Get() const {
-    return FlatTupleElemBase<FlatTuple<T...>, I>::value;
-  }
-
-  template <size_t I>
-  typename ElemFromList<I, T...>::type& Get() {
-    return FlatTupleElemBase<FlatTuple<T...>, I>::value;
-  }
-
-  template <typename F>
-  auto Apply(F&& f) -> decltype(std::forward<F>(f)(this->Get<Idx>()...)) {
-    return std::forward<F>(f)(Get<Idx>()...);
-  }
-
-  template <typename F>
-  auto Apply(F&& f) const -> decltype(std::forward<F>(f)(this->Get<Idx>()...)) {
-    return std::forward<F>(f)(Get<Idx>()...);
-  }
-};
-
-// Analog to std::tuple but with different tradeoffs.
-// This class minimizes the template instantiation depth, thus allowing more
-// elements than std::tuple would. std::tuple has been seen to require an
-// instantiation depth of more than 10x the number of elements in some
-// implementations.
-// FlatTuple and ElemFromList are not recursive and have a fixed depth
-// regardless of T...
-// MakeIndexSequence, on the other hand, it is recursive but with an
-// instantiation depth of O(ln(N)).
-template <typename... T>
-class FlatTuple
-    : private FlatTupleBase<FlatTuple<T...>,
-                            typename MakeIndexSequence<sizeof...(T)>::type> {
-  using Indices = typename FlatTupleBase<
-      FlatTuple<T...>, typename MakeIndexSequence<sizeof...(T)>::type>::Indices;
-
- public:
-  FlatTuple() = default;
-  template <typename... Args>
-  explicit FlatTuple(FlatTupleConstructTag tag, Args&&... args)
-      : FlatTuple::FlatTupleBase(tag, std::forward<Args>(args)...) {}
-
-  using FlatTuple::FlatTupleBase::Apply;
-  using FlatTuple::FlatTupleBase::Get;
-};
-
-// Utility functions to be called with static_assert to induce deprecation
-// warnings.
-GTEST_INTERNAL_DEPRECATED(
-    "INSTANTIATE_TEST_CASE_P is deprecated, please use "
-    "INSTANTIATE_TEST_SUITE_P")
-constexpr bool InstantiateTestCase_P_IsDeprecated() { return true; }
-
-GTEST_INTERNAL_DEPRECATED(
-    "TYPED_TEST_CASE_P is deprecated, please use "
-    "TYPED_TEST_SUITE_P")
-constexpr bool TypedTestCase_P_IsDeprecated() { return true; }
-
-GTEST_INTERNAL_DEPRECATED(
-    "TYPED_TEST_CASE is deprecated, please use "
-    "TYPED_TEST_SUITE")
-constexpr bool TypedTestCaseIsDeprecated() { return true; }
-
-GTEST_INTERNAL_DEPRECATED(
-    "REGISTER_TYPED_TEST_CASE_P is deprecated, please use "
-    "REGISTER_TYPED_TEST_SUITE_P")
-constexpr bool RegisterTypedTestCase_P_IsDeprecated() { return true; }
-
-GTEST_INTERNAL_DEPRECATED(
-    "INSTANTIATE_TYPED_TEST_CASE_P is deprecated, please use "
-    "INSTANTIATE_TYPED_TEST_SUITE_P")
-constexpr bool InstantiateTypedTestCase_P_IsDeprecated() { return true; }
-
-}  // namespace internal
-}  // namespace testing
-
-namespace std {
-// Some standard library implementations use `struct tuple_size` and some use
-// `class tuple_size`. Clang warns about the mismatch.
-// https://reviews.llvm.org/D55466
-#ifdef __clang__
-#pragma clang diagnostic push
-#pragma clang diagnostic ignored "-Wmismatched-tags"
-#endif
-template <typename... Ts>
-struct tuple_size<testing::internal::FlatTuple<Ts...>>
-    : std::integral_constant<size_t, sizeof...(Ts)> {};
-#ifdef __clang__
-#pragma clang diagnostic pop
-#endif
-}  // namespace std
-
-#define GTEST_MESSAGE_AT_(file, line, message, result_type)             \
-  ::testing::internal::AssertHelper(result_type, file, line, message) = \
-      ::testing::Message()
-
-#define GTEST_MESSAGE_(message, result_type) \
-  GTEST_MESSAGE_AT_(__FILE__, __LINE__, message, result_type)
-
-#define GTEST_FATAL_FAILURE_(message) \
-  return GTEST_MESSAGE_(message, ::testing::TestPartResult::kFatalFailure)
-
-#define GTEST_NONFATAL_FAILURE_(message) \
-  GTEST_MESSAGE_(message, ::testing::TestPartResult::kNonFatalFailure)
-
-#define GTEST_SUCCESS_(message) \
-  GTEST_MESSAGE_(message, ::testing::TestPartResult::kSuccess)
-
-#define GTEST_SKIP_(message) \
-  return GTEST_MESSAGE_(message, ::testing::TestPartResult::kSkip)
-
-// Suppress MSVC warning 4072 (unreachable code) for the code following
-// statement if it returns or throws (or doesn't return or throw in some
-// situations).
-// NOTE: The "else" is important to keep this expansion to prevent a top-level
-// "else" from attaching to our "if".
-#define GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement) \
-  if (::testing::internal::AlwaysTrue()) {                        \
-    statement;                                                    \
-  } else                     /* NOLINT */                         \
-    static_assert(true, "")  // User must have a semicolon after expansion.
-
-#if GTEST_HAS_EXCEPTIONS
-
-namespace testing {
-namespace internal {
-
-class NeverThrown {
- public:
-  const char* what() const noexcept {
-    return "this exception should never be thrown";
-  }
-};
-
-}  // namespace internal
-}  // namespace testing
-
-#if GTEST_HAS_RTTI
-
-#define GTEST_EXCEPTION_TYPE_(e) ::testing::internal::GetTypeName(typeid(e))
-
-#else  // GTEST_HAS_RTTI
-
-#define GTEST_EXCEPTION_TYPE_(e) \
-  std::string { "an std::exception-derived error" }
-
-#endif  // GTEST_HAS_RTTI
-
-#define GTEST_TEST_THROW_CATCH_STD_EXCEPTION_(statement, expected_exception)   \
-  catch (typename std::conditional<                                            \
-         std::is_same<typename std::remove_cv<typename std::remove_reference<  \
-                          expected_exception>::type>::type,                    \
-                      std::exception>::value,                                  \
-         const ::testing::internal::NeverThrown&, const std::exception&>::type \
-             e) {                                                              \
-    gtest_msg.value = "Expected: " #statement                                  \
-                      " throws an exception of type " #expected_exception      \
-                      ".\n  Actual: it throws ";                               \
-    gtest_msg.value += GTEST_EXCEPTION_TYPE_(e);                               \
-    gtest_msg.value += " with description \"";                                 \
-    gtest_msg.value += e.what();                                               \
-    gtest_msg.value += "\".";                                                  \
-    goto GTEST_CONCAT_TOKEN_(gtest_label_testthrow_, __LINE__);                \
-  }
-
-#else  // GTEST_HAS_EXCEPTIONS
-
-#define GTEST_TEST_THROW_CATCH_STD_EXCEPTION_(statement, expected_exception)
-
-#endif  // GTEST_HAS_EXCEPTIONS
-
-#define GTEST_TEST_THROW_(statement, expected_exception, fail)              \
-  GTEST_AMBIGUOUS_ELSE_BLOCKER_                                             \
-  if (::testing::internal::TrueWithString gtest_msg{}) {                    \
-    bool gtest_caught_expected = false;                                     \
-    try {                                                                   \
-      GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement);            \
-    } catch (expected_exception const&) {                                   \
-      gtest_caught_expected = true;                                         \
-    }                                                                       \
-    GTEST_TEST_THROW_CATCH_STD_EXCEPTION_(statement, expected_exception)    \
-    catch (...) {                                                           \
-      gtest_msg.value = "Expected: " #statement                             \
-                        " throws an exception of type " #expected_exception \
-                        ".\n  Actual: it throws a different type.";         \
-      goto GTEST_CONCAT_TOKEN_(gtest_label_testthrow_, __LINE__);           \
-    }                                                                       \
-    if (!gtest_caught_expected) {                                           \
-      gtest_msg.value = "Expected: " #statement                             \
-                        " throws an exception of type " #expected_exception \
-                        ".\n  Actual: it throws nothing.";                  \
-      goto GTEST_CONCAT_TOKEN_(gtest_label_testthrow_, __LINE__);           \
-    }                                                                       \
-  } else /*NOLINT*/                                                         \
-    GTEST_CONCAT_TOKEN_(gtest_label_testthrow_, __LINE__)                   \
-        : fail(gtest_msg.value.c_str())
-
-#if GTEST_HAS_EXCEPTIONS
-
-#define GTEST_TEST_NO_THROW_CATCH_STD_EXCEPTION_()                \
-  catch (std::exception const& e) {                               \
-    gtest_msg.value = "it throws ";                               \
-    gtest_msg.value += GTEST_EXCEPTION_TYPE_(e);                  \
-    gtest_msg.value += " with description \"";                    \
-    gtest_msg.value += e.what();                                  \
-    gtest_msg.value += "\".";                                     \
-    goto GTEST_CONCAT_TOKEN_(gtest_label_testnothrow_, __LINE__); \
-  }
-
-#else  // GTEST_HAS_EXCEPTIONS
-
-#define GTEST_TEST_NO_THROW_CATCH_STD_EXCEPTION_()
-
-#endif  // GTEST_HAS_EXCEPTIONS
-
-#define GTEST_TEST_NO_THROW_(statement, fail)                            \
-  GTEST_AMBIGUOUS_ELSE_BLOCKER_                                          \
-  if (::testing::internal::TrueWithString gtest_msg{}) {                 \
-    try {                                                                \
-      GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement);         \
-    }                                                                    \
-    GTEST_TEST_NO_THROW_CATCH_STD_EXCEPTION_()                           \
-    catch (...) {                                                        \
-      gtest_msg.value = "it throws.";                                    \
-      goto GTEST_CONCAT_TOKEN_(gtest_label_testnothrow_, __LINE__);      \
-    }                                                                    \
-  } else                                                                 \
-    GTEST_CONCAT_TOKEN_(gtest_label_testnothrow_, __LINE__)              \
-        : fail(("Expected: " #statement " doesn't throw an exception.\n" \
-                "  Actual: " +                                           \
-                gtest_msg.value)                                         \
-                   .c_str())
-
-#define GTEST_TEST_ANY_THROW_(statement, fail)                       \
-  GTEST_AMBIGUOUS_ELSE_BLOCKER_                                      \
-  if (::testing::internal::AlwaysTrue()) {                           \
-    bool gtest_caught_any = false;                                   \
-    try {                                                            \
-      GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement);     \
-    } catch (...) {                                                  \
-      gtest_caught_any = true;                                       \
-    }                                                                \
-    if (!gtest_caught_any) {                                         \
-      goto GTEST_CONCAT_TOKEN_(gtest_label_testanythrow_, __LINE__); \
-    }                                                                \
-  } else                                                             \
-    GTEST_CONCAT_TOKEN_(gtest_label_testanythrow_, __LINE__)         \
-        : fail("Expected: " #statement                               \
-               " throws an exception.\n"                             \
-               "  Actual: it doesn't.")
-
-// Implements Boolean test assertions such as EXPECT_TRUE. expression can be
-// either a boolean expression or an AssertionResult. text is a textual
-// representation of expression as it was passed into the EXPECT_TRUE.
-#define GTEST_TEST_BOOLEAN_(expression, text, actual, expected, fail) \
-  GTEST_AMBIGUOUS_ELSE_BLOCKER_                                       \
-  if (const ::testing::AssertionResult gtest_ar_ =                    \
-          ::testing::AssertionResult(expression))                     \
-    ;                                                                 \
-  else                                                                \
-    fail(::testing::internal::GetBoolAssertionFailureMessage(         \
-             gtest_ar_, text, #actual, #expected)                     \
-             .c_str())
-
-#define GTEST_TEST_NO_FATAL_FAILURE_(statement, fail)                          \
-  GTEST_AMBIGUOUS_ELSE_BLOCKER_                                                \
-  if (::testing::internal::AlwaysTrue()) {                                     \
-    ::testing::internal::HasNewFatalFailureHelper gtest_fatal_failure_checker; \
-    GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement);                 \
-    if (gtest_fatal_failure_checker.has_new_fatal_failure()) {                 \
-      goto GTEST_CONCAT_TOKEN_(gtest_label_testnofatal_, __LINE__);            \
-    }                                                                          \
-  } else                                                                       \
-    GTEST_CONCAT_TOKEN_(gtest_label_testnofatal_, __LINE__)                    \
-        : fail("Expected: " #statement                                         \
-               " doesn't generate new fatal "                                  \
-               "failures in the current thread.\n"                             \
-               "  Actual: it does.")
-
-// Expands to the name of the class that implements the given test.
-#define GTEST_TEST_CLASS_NAME_(test_suite_name, test_name) \
-  test_suite_name##_##test_name##_Test
-
-// Helper macro for defining tests.
-#define GTEST_TEST_(test_suite_name, test_name, parent_class, parent_id)       \
-  static_assert(sizeof(GTEST_STRINGIFY_(test_suite_name)) > 1,                 \
-                "test_suite_name must not be empty");                          \
-  static_assert(sizeof(GTEST_STRINGIFY_(test_name)) > 1,                       \
-                "test_name must not be empty");                                \
-  class GTEST_TEST_CLASS_NAME_(test_suite_name, test_name)                     \
-      : public parent_class {                                                  \
-   public:                                                                     \
-    GTEST_TEST_CLASS_NAME_(test_suite_name, test_name)() = default;            \
-    ~GTEST_TEST_CLASS_NAME_(test_suite_name, test_name)() override = default;  \
-    GTEST_TEST_CLASS_NAME_(test_suite_name, test_name)                         \
-    (const GTEST_TEST_CLASS_NAME_(test_suite_name, test_name) &) = delete;     \
-    GTEST_TEST_CLASS_NAME_(test_suite_name, test_name) & operator=(            \
-        const GTEST_TEST_CLASS_NAME_(test_suite_name,                          \
-                                     test_name) &) = delete; /* NOLINT */      \
-    GTEST_TEST_CLASS_NAME_(test_suite_name, test_name)                         \
-    (GTEST_TEST_CLASS_NAME_(test_suite_name, test_name) &&) noexcept = delete; \
-    GTEST_TEST_CLASS_NAME_(test_suite_name, test_name) & operator=(            \
-        GTEST_TEST_CLASS_NAME_(test_suite_name,                                \
-                               test_name) &&) noexcept = delete; /* NOLINT */  \
-                                                                               \
-   private:                                                                    \
-    void TestBody() override;                                                  \
-    static ::testing::TestInfo* const test_info_ GTEST_ATTRIBUTE_UNUSED_;      \
-  };                                                                           \
-                                                                               \
-  ::testing::TestInfo* const GTEST_TEST_CLASS_NAME_(test_suite_name,           \
-                                                    test_name)::test_info_ =   \
-      ::testing::internal::MakeAndRegisterTestInfo(                            \
-          #test_suite_name, #test_name, nullptr, nullptr,                      \
-          ::testing::internal::CodeLocation(__FILE__, __LINE__), (parent_id),  \
-          ::testing::internal::SuiteApiResolver<                               \
-              parent_class>::GetSetUpCaseOrSuite(__FILE__, __LINE__),          \
-          ::testing::internal::SuiteApiResolver<                               \
-              parent_class>::GetTearDownCaseOrSuite(__FILE__, __LINE__),       \
-          new ::testing::internal::TestFactoryImpl<GTEST_TEST_CLASS_NAME_(     \
-              test_suite_name, test_name)>);                                   \
-  void GTEST_TEST_CLASS_NAME_(test_suite_name, test_name)::TestBody()
-
-#endif  // GOOGLETEST_INCLUDE_GTEST_INTERNAL_GTEST_INTERNAL_H_
diff --git a/3rdparty/googletest-1.13.0/googletest/include/gtest/internal/gtest-param-util.h b/3rdparty/googletest-1.13.0/googletest/include/gtest/internal/gtest-param-util.h
deleted file mode 100644
index 7092d10e677c7a47ffea46b064efd5d3b2e85352..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/include/gtest/internal/gtest-param-util.h
+++ /dev/null
@@ -1,1031 +0,0 @@
-// Copyright 2008 Google Inc.
-// All Rights Reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// Type and function utilities for implementing parameterized tests.
-
-// IWYU pragma: private, include "gtest/gtest.h"
-// IWYU pragma: friend gtest/.*
-// IWYU pragma: friend gmock/.*
-
-#ifndef GOOGLETEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_H_
-#define GOOGLETEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_H_
-
-#include <ctype.h>
-
-#include <cassert>
-#include <iterator>
-#include <map>
-#include <memory>
-#include <ostream>
-#include <set>
-#include <string>
-#include <tuple>
-#include <type_traits>
-#include <utility>
-#include <vector>
-
-#include "gtest/gtest-printers.h"
-#include "gtest/gtest-test-part.h"
-#include "gtest/internal/gtest-internal.h"
-#include "gtest/internal/gtest-port.h"
-
-namespace testing {
-// Input to a parameterized test name generator, describing a test parameter.
-// Consists of the parameter value and the integer parameter index.
-template <class ParamType>
-struct TestParamInfo {
-  TestParamInfo(const ParamType& a_param, size_t an_index)
-      : param(a_param), index(an_index) {}
-  ParamType param;
-  size_t index;
-};
-
-// A builtin parameterized test name generator which returns the result of
-// testing::PrintToString.
-struct PrintToStringParamName {
-  template <class ParamType>
-  std::string operator()(const TestParamInfo<ParamType>& info) const {
-    return PrintToString(info.param);
-  }
-};
-
-namespace internal {
-
-// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
-// Utility Functions
-
-// Outputs a message explaining invalid registration of different
-// fixture class for the same test suite. This may happen when
-// TEST_P macro is used to define two tests with the same name
-// but in different namespaces.
-GTEST_API_ void ReportInvalidTestSuiteType(const char* test_suite_name,
-                                           CodeLocation code_location);
-
-template <typename>
-class ParamGeneratorInterface;
-template <typename>
-class ParamGenerator;
-
-// Interface for iterating over elements provided by an implementation
-// of ParamGeneratorInterface<T>.
-template <typename T>
-class ParamIteratorInterface {
- public:
-  virtual ~ParamIteratorInterface() {}
-  // A pointer to the base generator instance.
-  // Used only for the purposes of iterator comparison
-  // to make sure that two iterators belong to the same generator.
-  virtual const ParamGeneratorInterface<T>* BaseGenerator() const = 0;
-  // Advances iterator to point to the next element
-  // provided by the generator. The caller is responsible
-  // for not calling Advance() on an iterator equal to
-  // BaseGenerator()->End().
-  virtual void Advance() = 0;
-  // Clones the iterator object. Used for implementing copy semantics
-  // of ParamIterator<T>.
-  virtual ParamIteratorInterface* Clone() const = 0;
-  // Dereferences the current iterator and provides (read-only) access
-  // to the pointed value. It is the caller's responsibility not to call
-  // Current() on an iterator equal to BaseGenerator()->End().
-  // Used for implementing ParamGenerator<T>::operator*().
-  virtual const T* Current() const = 0;
-  // Determines whether the given iterator and other point to the same
-  // element in the sequence generated by the generator.
-  // Used for implementing ParamGenerator<T>::operator==().
-  virtual bool Equals(const ParamIteratorInterface& other) const = 0;
-};
-
-// Class iterating over elements provided by an implementation of
-// ParamGeneratorInterface<T>. It wraps ParamIteratorInterface<T>
-// and implements the const forward iterator concept.
-template <typename T>
-class ParamIterator {
- public:
-  typedef T value_type;
-  typedef const T& reference;
-  typedef ptrdiff_t difference_type;
-
-  // ParamIterator assumes ownership of the impl_ pointer.
-  ParamIterator(const ParamIterator& other) : impl_(other.impl_->Clone()) {}
-  ParamIterator& operator=(const ParamIterator& other) {
-    if (this != &other) impl_.reset(other.impl_->Clone());
-    return *this;
-  }
-
-  const T& operator*() const { return *impl_->Current(); }
-  const T* operator->() const { return impl_->Current(); }
-  // Prefix version of operator++.
-  ParamIterator& operator++() {
-    impl_->Advance();
-    return *this;
-  }
-  // Postfix version of operator++.
-  ParamIterator operator++(int /*unused*/) {
-    ParamIteratorInterface<T>* clone = impl_->Clone();
-    impl_->Advance();
-    return ParamIterator(clone);
-  }
-  bool operator==(const ParamIterator& other) const {
-    return impl_.get() == other.impl_.get() || impl_->Equals(*other.impl_);
-  }
-  bool operator!=(const ParamIterator& other) const {
-    return !(*this == other);
-  }
-
- private:
-  friend class ParamGenerator<T>;
-  explicit ParamIterator(ParamIteratorInterface<T>* impl) : impl_(impl) {}
-  std::unique_ptr<ParamIteratorInterface<T>> impl_;
-};
-
-// ParamGeneratorInterface<T> is the binary interface to access generators
-// defined in other translation units.
-template <typename T>
-class ParamGeneratorInterface {
- public:
-  typedef T ParamType;
-
-  virtual ~ParamGeneratorInterface() {}
-
-  // Generator interface definition
-  virtual ParamIteratorInterface<T>* Begin() const = 0;
-  virtual ParamIteratorInterface<T>* End() const = 0;
-};
-
-// Wraps ParamGeneratorInterface<T> and provides general generator syntax
-// compatible with the STL Container concept.
-// This class implements copy initialization semantics and the contained
-// ParamGeneratorInterface<T> instance is shared among all copies
-// of the original object. This is possible because that instance is immutable.
-template <typename T>
-class ParamGenerator {
- public:
-  typedef ParamIterator<T> iterator;
-
-  explicit ParamGenerator(ParamGeneratorInterface<T>* impl) : impl_(impl) {}
-  ParamGenerator(const ParamGenerator& other) : impl_(other.impl_) {}
-
-  ParamGenerator& operator=(const ParamGenerator& other) {
-    impl_ = other.impl_;
-    return *this;
-  }
-
-  iterator begin() const { return iterator(impl_->Begin()); }
-  iterator end() const { return iterator(impl_->End()); }
-
- private:
-  std::shared_ptr<const ParamGeneratorInterface<T>> impl_;
-};
-
-// Generates values from a range of two comparable values. Can be used to
-// generate sequences of user-defined types that implement operator+() and
-// operator<().
-// This class is used in the Range() function.
-template <typename T, typename IncrementT>
-class RangeGenerator : public ParamGeneratorInterface<T> {
- public:
-  RangeGenerator(T begin, T end, IncrementT step)
-      : begin_(begin),
-        end_(end),
-        step_(step),
-        end_index_(CalculateEndIndex(begin, end, step)) {}
-  ~RangeGenerator() override {}
-
-  ParamIteratorInterface<T>* Begin() const override {
-    return new Iterator(this, begin_, 0, step_);
-  }
-  ParamIteratorInterface<T>* End() const override {
-    return new Iterator(this, end_, end_index_, step_);
-  }
-
- private:
-  class Iterator : public ParamIteratorInterface<T> {
-   public:
-    Iterator(const ParamGeneratorInterface<T>* base, T value, int index,
-             IncrementT step)
-        : base_(base), value_(value), index_(index), step_(step) {}
-    ~Iterator() override {}
-
-    const ParamGeneratorInterface<T>* BaseGenerator() const override {
-      return base_;
-    }
-    void Advance() override {
-      value_ = static_cast<T>(value_ + step_);
-      index_++;
-    }
-    ParamIteratorInterface<T>* Clone() const override {
-      return new Iterator(*this);
-    }
-    const T* Current() const override { return &value_; }
-    bool Equals(const ParamIteratorInterface<T>& other) const override {
-      // Having the same base generator guarantees that the other
-      // iterator is of the same type and we can downcast.
-      GTEST_CHECK_(BaseGenerator() == other.BaseGenerator())
-          << "The program attempted to compare iterators "
-          << "from different generators." << std::endl;
-      const int other_index =
-          CheckedDowncastToActualType<const Iterator>(&other)->index_;
-      return index_ == other_index;
-    }
-
-   private:
-    Iterator(const Iterator& other)
-        : ParamIteratorInterface<T>(),
-          base_(other.base_),
-          value_(other.value_),
-          index_(other.index_),
-          step_(other.step_) {}
-
-    // No implementation - assignment is unsupported.
-    void operator=(const Iterator& other);
-
-    const ParamGeneratorInterface<T>* const base_;
-    T value_;
-    int index_;
-    const IncrementT step_;
-  };  // class RangeGenerator::Iterator
-
-  static int CalculateEndIndex(const T& begin, const T& end,
-                               const IncrementT& step) {
-    int end_index = 0;
-    for (T i = begin; i < end; i = static_cast<T>(i + step)) end_index++;
-    return end_index;
-  }
-
-  // No implementation - assignment is unsupported.
-  void operator=(const RangeGenerator& other);
-
-  const T begin_;
-  const T end_;
-  const IncrementT step_;
-  // The index for the end() iterator. All the elements in the generated
-  // sequence are indexed (0-based) to aid iterator comparison.
-  const int end_index_;
-};  // class RangeGenerator
-
-// Generates values from a pair of STL-style iterators. Used in the
-// ValuesIn() function. The elements are copied from the source range
-// since the source can be located on the stack, and the generator
-// is likely to persist beyond that stack frame.
-template <typename T>
-class ValuesInIteratorRangeGenerator : public ParamGeneratorInterface<T> {
- public:
-  template <typename ForwardIterator>
-  ValuesInIteratorRangeGenerator(ForwardIterator begin, ForwardIterator end)
-      : container_(begin, end) {}
-  ~ValuesInIteratorRangeGenerator() override {}
-
-  ParamIteratorInterface<T>* Begin() const override {
-    return new Iterator(this, container_.begin());
-  }
-  ParamIteratorInterface<T>* End() const override {
-    return new Iterator(this, container_.end());
-  }
-
- private:
-  typedef typename ::std::vector<T> ContainerType;
-
-  class Iterator : public ParamIteratorInterface<T> {
-   public:
-    Iterator(const ParamGeneratorInterface<T>* base,
-             typename ContainerType::const_iterator iterator)
-        : base_(base), iterator_(iterator) {}
-    ~Iterator() override {}
-
-    const ParamGeneratorInterface<T>* BaseGenerator() const override {
-      return base_;
-    }
-    void Advance() override {
-      ++iterator_;
-      value_.reset();
-    }
-    ParamIteratorInterface<T>* Clone() const override {
-      return new Iterator(*this);
-    }
-    // We need to use cached value referenced by iterator_ because *iterator_
-    // can return a temporary object (and of type other then T), so just
-    // having "return &*iterator_;" doesn't work.
-    // value_ is updated here and not in Advance() because Advance()
-    // can advance iterator_ beyond the end of the range, and we cannot
-    // detect that fact. The client code, on the other hand, is
-    // responsible for not calling Current() on an out-of-range iterator.
-    const T* Current() const override {
-      if (value_.get() == nullptr) value_.reset(new T(*iterator_));
-      return value_.get();
-    }
-    bool Equals(const ParamIteratorInterface<T>& other) const override {
-      // Having the same base generator guarantees that the other
-      // iterator is of the same type and we can downcast.
-      GTEST_CHECK_(BaseGenerator() == other.BaseGenerator())
-          << "The program attempted to compare iterators "
-          << "from different generators." << std::endl;
-      return iterator_ ==
-             CheckedDowncastToActualType<const Iterator>(&other)->iterator_;
-    }
-
-   private:
-    Iterator(const Iterator& other)
-        // The explicit constructor call suppresses a false warning
-        // emitted by gcc when supplied with the -Wextra option.
-        : ParamIteratorInterface<T>(),
-          base_(other.base_),
-          iterator_(other.iterator_) {}
-
-    const ParamGeneratorInterface<T>* const base_;
-    typename ContainerType::const_iterator iterator_;
-    // A cached value of *iterator_. We keep it here to allow access by
-    // pointer in the wrapping iterator's operator->().
-    // value_ needs to be mutable to be accessed in Current().
-    // Use of std::unique_ptr helps manage cached value's lifetime,
-    // which is bound by the lifespan of the iterator itself.
-    mutable std::unique_ptr<const T> value_;
-  };  // class ValuesInIteratorRangeGenerator::Iterator
-
-  // No implementation - assignment is unsupported.
-  void operator=(const ValuesInIteratorRangeGenerator& other);
-
-  const ContainerType container_;
-};  // class ValuesInIteratorRangeGenerator
-
-// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
-//
-// Default parameterized test name generator, returns a string containing the
-// integer test parameter index.
-template <class ParamType>
-std::string DefaultParamName(const TestParamInfo<ParamType>& info) {
-  Message name_stream;
-  name_stream << info.index;
-  return name_stream.GetString();
-}
-
-template <typename T = int>
-void TestNotEmpty() {
-  static_assert(sizeof(T) == 0, "Empty arguments are not allowed.");
-}
-template <typename T = int>
-void TestNotEmpty(const T&) {}
-
-// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
-//
-// Stores a parameter value and later creates tests parameterized with that
-// value.
-template <class TestClass>
-class ParameterizedTestFactory : public TestFactoryBase {
- public:
-  typedef typename TestClass::ParamType ParamType;
-  explicit ParameterizedTestFactory(ParamType parameter)
-      : parameter_(parameter) {}
-  Test* CreateTest() override {
-    TestClass::SetParam(&parameter_);
-    return new TestClass();
-  }
-
- private:
-  const ParamType parameter_;
-
-  ParameterizedTestFactory(const ParameterizedTestFactory&) = delete;
-  ParameterizedTestFactory& operator=(const ParameterizedTestFactory&) = delete;
-};
-
-// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
-//
-// TestMetaFactoryBase is a base class for meta-factories that create
-// test factories for passing into MakeAndRegisterTestInfo function.
-template <class ParamType>
-class TestMetaFactoryBase {
- public:
-  virtual ~TestMetaFactoryBase() {}
-
-  virtual TestFactoryBase* CreateTestFactory(ParamType parameter) = 0;
-};
-
-// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
-//
-// TestMetaFactory creates test factories for passing into
-// MakeAndRegisterTestInfo function. Since MakeAndRegisterTestInfo receives
-// ownership of test factory pointer, same factory object cannot be passed
-// into that method twice. But ParameterizedTestSuiteInfo is going to call
-// it for each Test/Parameter value combination. Thus it needs meta factory
-// creator class.
-template <class TestSuite>
-class TestMetaFactory
-    : public TestMetaFactoryBase<typename TestSuite::ParamType> {
- public:
-  using ParamType = typename TestSuite::ParamType;
-
-  TestMetaFactory() {}
-
-  TestFactoryBase* CreateTestFactory(ParamType parameter) override {
-    return new ParameterizedTestFactory<TestSuite>(parameter);
-  }
-
- private:
-  TestMetaFactory(const TestMetaFactory&) = delete;
-  TestMetaFactory& operator=(const TestMetaFactory&) = delete;
-};
-
-// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
-//
-// ParameterizedTestSuiteInfoBase is a generic interface
-// to ParameterizedTestSuiteInfo classes. ParameterizedTestSuiteInfoBase
-// accumulates test information provided by TEST_P macro invocations
-// and generators provided by INSTANTIATE_TEST_SUITE_P macro invocations
-// and uses that information to register all resulting test instances
-// in RegisterTests method. The ParameterizeTestSuiteRegistry class holds
-// a collection of pointers to the ParameterizedTestSuiteInfo objects
-// and calls RegisterTests() on each of them when asked.
-class ParameterizedTestSuiteInfoBase {
- public:
-  virtual ~ParameterizedTestSuiteInfoBase() {}
-
-  // Base part of test suite name for display purposes.
-  virtual const std::string& GetTestSuiteName() const = 0;
-  // Test suite id to verify identity.
-  virtual TypeId GetTestSuiteTypeId() const = 0;
-  // UnitTest class invokes this method to register tests in this
-  // test suite right before running them in RUN_ALL_TESTS macro.
-  // This method should not be called more than once on any single
-  // instance of a ParameterizedTestSuiteInfoBase derived class.
-  virtual void RegisterTests() = 0;
-
- protected:
-  ParameterizedTestSuiteInfoBase() {}
-
- private:
-  ParameterizedTestSuiteInfoBase(const ParameterizedTestSuiteInfoBase&) =
-      delete;
-  ParameterizedTestSuiteInfoBase& operator=(
-      const ParameterizedTestSuiteInfoBase&) = delete;
-};
-
-// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
-//
-// Report a the name of a test_suit as safe to ignore
-// as the side effect of construction of this type.
-struct GTEST_API_ MarkAsIgnored {
-  explicit MarkAsIgnored(const char* test_suite);
-};
-
-GTEST_API_ void InsertSyntheticTestCase(const std::string& name,
-                                        CodeLocation location, bool has_test_p);
-
-// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
-//
-// ParameterizedTestSuiteInfo accumulates tests obtained from TEST_P
-// macro invocations for a particular test suite and generators
-// obtained from INSTANTIATE_TEST_SUITE_P macro invocations for that
-// test suite. It registers tests with all values generated by all
-// generators when asked.
-template <class TestSuite>
-class ParameterizedTestSuiteInfo : public ParameterizedTestSuiteInfoBase {
- public:
-  // ParamType and GeneratorCreationFunc are private types but are required
-  // for declarations of public methods AddTestPattern() and
-  // AddTestSuiteInstantiation().
-  using ParamType = typename TestSuite::ParamType;
-  // A function that returns an instance of appropriate generator type.
-  typedef ParamGenerator<ParamType>(GeneratorCreationFunc)();
-  using ParamNameGeneratorFunc = std::string(const TestParamInfo<ParamType>&);
-
-  explicit ParameterizedTestSuiteInfo(const char* name,
-                                      CodeLocation code_location)
-      : test_suite_name_(name), code_location_(code_location) {}
-
-  // Test suite base name for display purposes.
-  const std::string& GetTestSuiteName() const override {
-    return test_suite_name_;
-  }
-  // Test suite id to verify identity.
-  TypeId GetTestSuiteTypeId() const override { return GetTypeId<TestSuite>(); }
-  // TEST_P macro uses AddTestPattern() to record information
-  // about a single test in a LocalTestInfo structure.
-  // test_suite_name is the base name of the test suite (without invocation
-  // prefix). test_base_name is the name of an individual test without
-  // parameter index. For the test SequenceA/FooTest.DoBar/1 FooTest is
-  // test suite base name and DoBar is test base name.
-  void AddTestPattern(const char* test_suite_name, const char* test_base_name,
-                      TestMetaFactoryBase<ParamType>* meta_factory,
-                      CodeLocation code_location) {
-    tests_.push_back(std::shared_ptr<TestInfo>(new TestInfo(
-        test_suite_name, test_base_name, meta_factory, code_location)));
-  }
-  // INSTANTIATE_TEST_SUITE_P macro uses AddGenerator() to record information
-  // about a generator.
-  int AddTestSuiteInstantiation(const std::string& instantiation_name,
-                                GeneratorCreationFunc* func,
-                                ParamNameGeneratorFunc* name_func,
-                                const char* file, int line) {
-    instantiations_.push_back(
-        InstantiationInfo(instantiation_name, func, name_func, file, line));
-    return 0;  // Return value used only to run this method in namespace scope.
-  }
-  // UnitTest class invokes this method to register tests in this test suite
-  // right before running tests in RUN_ALL_TESTS macro.
-  // This method should not be called more than once on any single
-  // instance of a ParameterizedTestSuiteInfoBase derived class.
-  // UnitTest has a guard to prevent from calling this method more than once.
-  void RegisterTests() override {
-    bool generated_instantiations = false;
-
-    for (typename TestInfoContainer::iterator test_it = tests_.begin();
-         test_it != tests_.end(); ++test_it) {
-      std::shared_ptr<TestInfo> test_info = *test_it;
-      for (typename InstantiationContainer::iterator gen_it =
-               instantiations_.begin();
-           gen_it != instantiations_.end(); ++gen_it) {
-        const std::string& instantiation_name = gen_it->name;
-        ParamGenerator<ParamType> generator((*gen_it->generator)());
-        ParamNameGeneratorFunc* name_func = gen_it->name_func;
-        const char* file = gen_it->file;
-        int line = gen_it->line;
-
-        std::string test_suite_name;
-        if (!instantiation_name.empty())
-          test_suite_name = instantiation_name + "/";
-        test_suite_name += test_info->test_suite_base_name;
-
-        size_t i = 0;
-        std::set<std::string> test_param_names;
-        for (typename ParamGenerator<ParamType>::iterator param_it =
-                 generator.begin();
-             param_it != generator.end(); ++param_it, ++i) {
-          generated_instantiations = true;
-
-          Message test_name_stream;
-
-          std::string param_name =
-              name_func(TestParamInfo<ParamType>(*param_it, i));
-
-          GTEST_CHECK_(IsValidParamName(param_name))
-              << "Parameterized test name '" << param_name
-              << "' is invalid, in " << file << " line " << line << std::endl;
-
-          GTEST_CHECK_(test_param_names.count(param_name) == 0)
-              << "Duplicate parameterized test name '" << param_name << "', in "
-              << file << " line " << line << std::endl;
-
-          test_param_names.insert(param_name);
-
-          if (!test_info->test_base_name.empty()) {
-            test_name_stream << test_info->test_base_name << "/";
-          }
-          test_name_stream << param_name;
-          MakeAndRegisterTestInfo(
-              test_suite_name.c_str(), test_name_stream.GetString().c_str(),
-              nullptr,  // No type parameter.
-              PrintToString(*param_it).c_str(), test_info->code_location,
-              GetTestSuiteTypeId(),
-              SuiteApiResolver<TestSuite>::GetSetUpCaseOrSuite(file, line),
-              SuiteApiResolver<TestSuite>::GetTearDownCaseOrSuite(file, line),
-              test_info->test_meta_factory->CreateTestFactory(*param_it));
-        }  // for param_it
-      }    // for gen_it
-    }      // for test_it
-
-    if (!generated_instantiations) {
-      // There are no generaotrs, or they all generate nothing ...
-      InsertSyntheticTestCase(GetTestSuiteName(), code_location_,
-                              !tests_.empty());
-    }
-  }  // RegisterTests
-
- private:
-  // LocalTestInfo structure keeps information about a single test registered
-  // with TEST_P macro.
-  struct TestInfo {
-    TestInfo(const char* a_test_suite_base_name, const char* a_test_base_name,
-             TestMetaFactoryBase<ParamType>* a_test_meta_factory,
-             CodeLocation a_code_location)
-        : test_suite_base_name(a_test_suite_base_name),
-          test_base_name(a_test_base_name),
-          test_meta_factory(a_test_meta_factory),
-          code_location(a_code_location) {}
-
-    const std::string test_suite_base_name;
-    const std::string test_base_name;
-    const std::unique_ptr<TestMetaFactoryBase<ParamType>> test_meta_factory;
-    const CodeLocation code_location;
-  };
-  using TestInfoContainer = ::std::vector<std::shared_ptr<TestInfo>>;
-  // Records data received from INSTANTIATE_TEST_SUITE_P macros:
-  //  <Instantiation name, Sequence generator creation function,
-  //     Name generator function, Source file, Source line>
-  struct InstantiationInfo {
-    InstantiationInfo(const std::string& name_in,
-                      GeneratorCreationFunc* generator_in,
-                      ParamNameGeneratorFunc* name_func_in, const char* file_in,
-                      int line_in)
-        : name(name_in),
-          generator(generator_in),
-          name_func(name_func_in),
-          file(file_in),
-          line(line_in) {}
-
-    std::string name;
-    GeneratorCreationFunc* generator;
-    ParamNameGeneratorFunc* name_func;
-    const char* file;
-    int line;
-  };
-  typedef ::std::vector<InstantiationInfo> InstantiationContainer;
-
-  static bool IsValidParamName(const std::string& name) {
-    // Check for empty string
-    if (name.empty()) return false;
-
-    // Check for invalid characters
-    for (std::string::size_type index = 0; index < name.size(); ++index) {
-      if (!IsAlNum(name[index]) && name[index] != '_') return false;
-    }
-
-    return true;
-  }
-
-  const std::string test_suite_name_;
-  CodeLocation code_location_;
-  TestInfoContainer tests_;
-  InstantiationContainer instantiations_;
-
-  ParameterizedTestSuiteInfo(const ParameterizedTestSuiteInfo&) = delete;
-  ParameterizedTestSuiteInfo& operator=(const ParameterizedTestSuiteInfo&) =
-      delete;
-};  // class ParameterizedTestSuiteInfo
-
-//  Legacy API is deprecated but still available
-#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
-template <class TestCase>
-using ParameterizedTestCaseInfo = ParameterizedTestSuiteInfo<TestCase>;
-#endif  //  GTEST_REMOVE_LEGACY_TEST_CASEAPI_
-
-// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
-//
-// ParameterizedTestSuiteRegistry contains a map of
-// ParameterizedTestSuiteInfoBase classes accessed by test suite names. TEST_P
-// and INSTANTIATE_TEST_SUITE_P macros use it to locate their corresponding
-// ParameterizedTestSuiteInfo descriptors.
-class ParameterizedTestSuiteRegistry {
- public:
-  ParameterizedTestSuiteRegistry() {}
-  ~ParameterizedTestSuiteRegistry() {
-    for (auto& test_suite_info : test_suite_infos_) {
-      delete test_suite_info;
-    }
-  }
-
-  // Looks up or creates and returns a structure containing information about
-  // tests and instantiations of a particular test suite.
-  template <class TestSuite>
-  ParameterizedTestSuiteInfo<TestSuite>* GetTestSuitePatternHolder(
-      const char* test_suite_name, CodeLocation code_location) {
-    ParameterizedTestSuiteInfo<TestSuite>* typed_test_info = nullptr;
-    for (auto& test_suite_info : test_suite_infos_) {
-      if (test_suite_info->GetTestSuiteName() == test_suite_name) {
-        if (test_suite_info->GetTestSuiteTypeId() != GetTypeId<TestSuite>()) {
-          // Complain about incorrect usage of Google Test facilities
-          // and terminate the program since we cannot guaranty correct
-          // test suite setup and tear-down in this case.
-          ReportInvalidTestSuiteType(test_suite_name, code_location);
-          posix::Abort();
-        } else {
-          // At this point we are sure that the object we found is of the same
-          // type we are looking for, so we downcast it to that type
-          // without further checks.
-          typed_test_info = CheckedDowncastToActualType<
-              ParameterizedTestSuiteInfo<TestSuite>>(test_suite_info);
-        }
-        break;
-      }
-    }
-    if (typed_test_info == nullptr) {
-      typed_test_info = new ParameterizedTestSuiteInfo<TestSuite>(
-          test_suite_name, code_location);
-      test_suite_infos_.push_back(typed_test_info);
-    }
-    return typed_test_info;
-  }
-  void RegisterTests() {
-    for (auto& test_suite_info : test_suite_infos_) {
-      test_suite_info->RegisterTests();
-    }
-  }
-//  Legacy API is deprecated but still available
-#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
-  template <class TestCase>
-  ParameterizedTestCaseInfo<TestCase>* GetTestCasePatternHolder(
-      const char* test_case_name, CodeLocation code_location) {
-    return GetTestSuitePatternHolder<TestCase>(test_case_name, code_location);
-  }
-
-#endif  //  GTEST_REMOVE_LEGACY_TEST_CASEAPI_
-
- private:
-  using TestSuiteInfoContainer = ::std::vector<ParameterizedTestSuiteInfoBase*>;
-
-  TestSuiteInfoContainer test_suite_infos_;
-
-  ParameterizedTestSuiteRegistry(const ParameterizedTestSuiteRegistry&) =
-      delete;
-  ParameterizedTestSuiteRegistry& operator=(
-      const ParameterizedTestSuiteRegistry&) = delete;
-};
-
-// Keep track of what type-parameterized test suite are defined and
-// where as well as which are intatiated. This allows susequently
-// identifying suits that are defined but never used.
-class TypeParameterizedTestSuiteRegistry {
- public:
-  // Add a suite definition
-  void RegisterTestSuite(const char* test_suite_name,
-                         CodeLocation code_location);
-
-  // Add an instantiation of a suit.
-  void RegisterInstantiation(const char* test_suite_name);
-
-  // For each suit repored as defined but not reported as instantiation,
-  // emit a test that reports that fact (configurably, as an error).
-  void CheckForInstantiations();
-
- private:
-  struct TypeParameterizedTestSuiteInfo {
-    explicit TypeParameterizedTestSuiteInfo(CodeLocation c)
-        : code_location(c), instantiated(false) {}
-
-    CodeLocation code_location;
-    bool instantiated;
-  };
-
-  std::map<std::string, TypeParameterizedTestSuiteInfo> suites_;
-};
-
-}  // namespace internal
-
-// Forward declarations of ValuesIn(), which is implemented in
-// include/gtest/gtest-param-test.h.
-template <class Container>
-internal::ParamGenerator<typename Container::value_type> ValuesIn(
-    const Container& container);
-
-namespace internal {
-// Used in the Values() function to provide polymorphic capabilities.
-
-#ifdef _MSC_VER
-#pragma warning(push)
-#pragma warning(disable : 4100)
-#endif
-
-template <typename... Ts>
-class ValueArray {
- public:
-  explicit ValueArray(Ts... v) : v_(FlatTupleConstructTag{}, std::move(v)...) {}
-
-  template <typename T>
-  operator ParamGenerator<T>() const {  // NOLINT
-    return ValuesIn(MakeVector<T>(MakeIndexSequence<sizeof...(Ts)>()));
-  }
-
- private:
-  template <typename T, size_t... I>
-  std::vector<T> MakeVector(IndexSequence<I...>) const {
-    return std::vector<T>{static_cast<T>(v_.template Get<I>())...};
-  }
-
-  FlatTuple<Ts...> v_;
-};
-
-#ifdef _MSC_VER
-#pragma warning(pop)
-#endif
-
-template <typename... T>
-class CartesianProductGenerator
-    : public ParamGeneratorInterface<::std::tuple<T...>> {
- public:
-  typedef ::std::tuple<T...> ParamType;
-
-  CartesianProductGenerator(const std::tuple<ParamGenerator<T>...>& g)
-      : generators_(g) {}
-  ~CartesianProductGenerator() override {}
-
-  ParamIteratorInterface<ParamType>* Begin() const override {
-    return new Iterator(this, generators_, false);
-  }
-  ParamIteratorInterface<ParamType>* End() const override {
-    return new Iterator(this, generators_, true);
-  }
-
- private:
-  template <class I>
-  class IteratorImpl;
-  template <size_t... I>
-  class IteratorImpl<IndexSequence<I...>>
-      : public ParamIteratorInterface<ParamType> {
-   public:
-    IteratorImpl(const ParamGeneratorInterface<ParamType>* base,
-                 const std::tuple<ParamGenerator<T>...>& generators,
-                 bool is_end)
-        : base_(base),
-          begin_(std::get<I>(generators).begin()...),
-          end_(std::get<I>(generators).end()...),
-          current_(is_end ? end_ : begin_) {
-      ComputeCurrentValue();
-    }
-    ~IteratorImpl() override {}
-
-    const ParamGeneratorInterface<ParamType>* BaseGenerator() const override {
-      return base_;
-    }
-    // Advance should not be called on beyond-of-range iterators
-    // so no component iterators must be beyond end of range, either.
-    void Advance() override {
-      assert(!AtEnd());
-      // Advance the last iterator.
-      ++std::get<sizeof...(T) - 1>(current_);
-      // if that reaches end, propagate that up.
-      AdvanceIfEnd<sizeof...(T) - 1>();
-      ComputeCurrentValue();
-    }
-    ParamIteratorInterface<ParamType>* Clone() const override {
-      return new IteratorImpl(*this);
-    }
-
-    const ParamType* Current() const override { return current_value_.get(); }
-
-    bool Equals(const ParamIteratorInterface<ParamType>& other) const override {
-      // Having the same base generator guarantees that the other
-      // iterator is of the same type and we can downcast.
-      GTEST_CHECK_(BaseGenerator() == other.BaseGenerator())
-          << "The program attempted to compare iterators "
-          << "from different generators." << std::endl;
-      const IteratorImpl* typed_other =
-          CheckedDowncastToActualType<const IteratorImpl>(&other);
-
-      // We must report iterators equal if they both point beyond their
-      // respective ranges. That can happen in a variety of fashions,
-      // so we have to consult AtEnd().
-      if (AtEnd() && typed_other->AtEnd()) return true;
-
-      bool same = true;
-      bool dummy[] = {
-          (same = same && std::get<I>(current_) ==
-                              std::get<I>(typed_other->current_))...};
-      (void)dummy;
-      return same;
-    }
-
-   private:
-    template <size_t ThisI>
-    void AdvanceIfEnd() {
-      if (std::get<ThisI>(current_) != std::get<ThisI>(end_)) return;
-
-      bool last = ThisI == 0;
-      if (last) {
-        // We are done. Nothing else to propagate.
-        return;
-      }
-
-      constexpr size_t NextI = ThisI - (ThisI != 0);
-      std::get<ThisI>(current_) = std::get<ThisI>(begin_);
-      ++std::get<NextI>(current_);
-      AdvanceIfEnd<NextI>();
-    }
-
-    void ComputeCurrentValue() {
-      if (!AtEnd())
-        current_value_ = std::make_shared<ParamType>(*std::get<I>(current_)...);
-    }
-    bool AtEnd() const {
-      bool at_end = false;
-      bool dummy[] = {
-          (at_end = at_end || std::get<I>(current_) == std::get<I>(end_))...};
-      (void)dummy;
-      return at_end;
-    }
-
-    const ParamGeneratorInterface<ParamType>* const base_;
-    std::tuple<typename ParamGenerator<T>::iterator...> begin_;
-    std::tuple<typename ParamGenerator<T>::iterator...> end_;
-    std::tuple<typename ParamGenerator<T>::iterator...> current_;
-    std::shared_ptr<ParamType> current_value_;
-  };
-
-  using Iterator = IteratorImpl<typename MakeIndexSequence<sizeof...(T)>::type>;
-
-  std::tuple<ParamGenerator<T>...> generators_;
-};
-
-template <class... Gen>
-class CartesianProductHolder {
- public:
-  CartesianProductHolder(const Gen&... g) : generators_(g...) {}
-  template <typename... T>
-  operator ParamGenerator<::std::tuple<T...>>() const {
-    return ParamGenerator<::std::tuple<T...>>(
-        new CartesianProductGenerator<T...>(generators_));
-  }
-
- private:
-  std::tuple<Gen...> generators_;
-};
-
-template <typename From, typename To>
-class ParamGeneratorConverter : public ParamGeneratorInterface<To> {
- public:
-  ParamGeneratorConverter(ParamGenerator<From> gen) // NOLINT
-      : generator_(std::move(gen)) {}
-
-  ParamIteratorInterface<To>* Begin() const override {
-    return new Iterator(this, generator_.begin(), generator_.end());
-  }
-  ParamIteratorInterface<To>* End() const override {
-    return new Iterator(this, generator_.end(), generator_.end());
-  }
-
- private:
-  class Iterator : public ParamIteratorInterface<To> {
-   public:
-    Iterator(const ParamGeneratorInterface<To>* base, ParamIterator<From> it,
-             ParamIterator<From> end)
-        : base_(base), it_(it), end_(end) {
-      if (it_ != end_) value_ = std::make_shared<To>(static_cast<To>(*it_));
-    }
-    ~Iterator() override {}
-
-    const ParamGeneratorInterface<To>* BaseGenerator() const override {
-      return base_;
-    }
-    void Advance() override {
-      ++it_;
-      if (it_ != end_) value_ = std::make_shared<To>(static_cast<To>(*it_));
-    }
-    ParamIteratorInterface<To>* Clone() const override {
-      return new Iterator(*this);
-    }
-    const To* Current() const override { return value_.get(); }
-    bool Equals(const ParamIteratorInterface<To>& other) const override {
-      // Having the same base generator guarantees that the other
-      // iterator is of the same type and we can downcast.
-      GTEST_CHECK_(BaseGenerator() == other.BaseGenerator())
-          << "The program attempted to compare iterators "
-          << "from different generators." << std::endl;
-      const ParamIterator<From> other_it =
-          CheckedDowncastToActualType<const Iterator>(&other)->it_;
-      return it_ == other_it;
-    }
-
-   private:
-    Iterator(const Iterator& other) = default;
-
-    const ParamGeneratorInterface<To>* const base_;
-    ParamIterator<From> it_;
-    ParamIterator<From> end_;
-    std::shared_ptr<To> value_;
-  };  // class ParamGeneratorConverter::Iterator
-
-  ParamGenerator<From> generator_;
-};  // class ParamGeneratorConverter
-
-template <class Gen>
-class ParamConverterGenerator {
- public:
-  ParamConverterGenerator(ParamGenerator<Gen> g)  // NOLINT
-      : generator_(std::move(g)) {}
-
-  template <typename T>
-  operator ParamGenerator<T>() const {  // NOLINT
-    return ParamGenerator<T>(new ParamGeneratorConverter<Gen, T>(generator_));
-  }
-
- private:
-  ParamGenerator<Gen> generator_;
-};
-
-}  // namespace internal
-}  // namespace testing
-
-#endif  // GOOGLETEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_H_
diff --git a/3rdparty/googletest-1.13.0/googletest/include/gtest/internal/gtest-port-arch.h b/3rdparty/googletest-1.13.0/googletest/include/gtest/internal/gtest-port-arch.h
deleted file mode 100644
index 04064606f5d02bb8753535e4c7868a79f5efe2e5..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/include/gtest/internal/gtest-port-arch.h
+++ /dev/null
@@ -1,118 +0,0 @@
-// Copyright 2015, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// The Google C++ Testing and Mocking Framework (Google Test)
-//
-// This header file defines the GTEST_OS_* macro.
-// It is separate from gtest-port.h so that custom/gtest-port.h can include it.
-
-#ifndef GOOGLETEST_INCLUDE_GTEST_INTERNAL_GTEST_PORT_ARCH_H_
-#define GOOGLETEST_INCLUDE_GTEST_INTERNAL_GTEST_PORT_ARCH_H_
-
-// Determines the platform on which Google Test is compiled.
-#ifdef __CYGWIN__
-#define GTEST_OS_CYGWIN 1
-#elif defined(__MINGW__) || defined(__MINGW32__) || defined(__MINGW64__)
-#define GTEST_OS_WINDOWS_MINGW 1
-#define GTEST_OS_WINDOWS 1
-#elif defined _WIN32
-#define GTEST_OS_WINDOWS 1
-#ifdef _WIN32_WCE
-#define GTEST_OS_WINDOWS_MOBILE 1
-#elif defined(WINAPI_FAMILY)
-#include <winapifamily.h>
-#if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP)
-#define GTEST_OS_WINDOWS_DESKTOP 1
-#elif WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_PHONE_APP)
-#define GTEST_OS_WINDOWS_PHONE 1
-#elif WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP)
-#define GTEST_OS_WINDOWS_RT 1
-#elif WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_TV_TITLE)
-#define GTEST_OS_WINDOWS_PHONE 1
-#define GTEST_OS_WINDOWS_TV_TITLE 1
-#else
-// WINAPI_FAMILY defined but no known partition matched.
-// Default to desktop.
-#define GTEST_OS_WINDOWS_DESKTOP 1
-#endif
-#else
-#define GTEST_OS_WINDOWS_DESKTOP 1
-#endif  // _WIN32_WCE
-#elif defined __OS2__
-#define GTEST_OS_OS2 1
-#elif defined __APPLE__
-#define GTEST_OS_MAC 1
-#include <TargetConditionals.h>
-#if TARGET_OS_IPHONE
-#define GTEST_OS_IOS 1
-#endif
-#elif defined __DragonFly__
-#define GTEST_OS_DRAGONFLY 1
-#elif defined __FreeBSD__
-#define GTEST_OS_FREEBSD 1
-#elif defined __Fuchsia__
-#define GTEST_OS_FUCHSIA 1
-#elif defined(__GNU__)
-#define GTEST_OS_GNU_HURD 1
-#elif defined(__GLIBC__) && defined(__FreeBSD_kernel__)
-#define GTEST_OS_GNU_KFREEBSD 1
-#elif defined __linux__
-#define GTEST_OS_LINUX 1
-#if defined __ANDROID__
-#define GTEST_OS_LINUX_ANDROID 1
-#endif
-#elif defined __MVS__
-#define GTEST_OS_ZOS 1
-#elif defined(__sun) && defined(__SVR4)
-#define GTEST_OS_SOLARIS 1
-#elif defined(_AIX)
-#define GTEST_OS_AIX 1
-#elif defined(__hpux)
-#define GTEST_OS_HPUX 1
-#elif defined __native_client__
-#define GTEST_OS_NACL 1
-#elif defined __NetBSD__
-#define GTEST_OS_NETBSD 1
-#elif defined __OpenBSD__
-#define GTEST_OS_OPENBSD 1
-#elif defined __QNX__
-#define GTEST_OS_QNX 1
-#elif defined(__HAIKU__)
-#define GTEST_OS_HAIKU 1
-#elif defined ESP8266
-#define GTEST_OS_ESP8266 1
-#elif defined ESP32
-#define GTEST_OS_ESP32 1
-#elif defined(__XTENSA__)
-#define GTEST_OS_XTENSA 1
-#elif defined(__hexagon__)
-#define GTEST_OS_QURT 1
-#endif  // __CYGWIN__
-
-#endif  // GOOGLETEST_INCLUDE_GTEST_INTERNAL_GTEST_PORT_ARCH_H_
diff --git a/3rdparty/googletest-1.13.0/googletest/include/gtest/internal/gtest-port.h b/3rdparty/googletest-1.13.0/googletest/include/gtest/internal/gtest-port.h
deleted file mode 100644
index 6db191b7a069821eaf641c6e082993b44905684a..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/include/gtest/internal/gtest-port.h
+++ /dev/null
@@ -1,2460 +0,0 @@
-// Copyright 2005, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// Low-level types and utilities for porting Google Test to various
-// platforms.  All macros ending with _ and symbols defined in an
-// internal namespace are subject to change without notice.  Code
-// outside Google Test MUST NOT USE THEM DIRECTLY.  Macros that don't
-// end with _ are part of Google Test's public API and can be used by
-// code outside Google Test.
-//
-// This file is fundamental to Google Test.  All other Google Test source
-// files are expected to #include this.  Therefore, it cannot #include
-// any other Google Test header.
-
-// IWYU pragma: private, include "gtest/gtest.h"
-// IWYU pragma: friend gtest/.*
-// IWYU pragma: friend gmock/.*
-
-#ifndef GOOGLETEST_INCLUDE_GTEST_INTERNAL_GTEST_PORT_H_
-#define GOOGLETEST_INCLUDE_GTEST_INTERNAL_GTEST_PORT_H_
-
-// Environment-describing macros
-// -----------------------------
-//
-// Google Test can be used in many different environments.  Macros in
-// this section tell Google Test what kind of environment it is being
-// used in, such that Google Test can provide environment-specific
-// features and implementations.
-//
-// Google Test tries to automatically detect the properties of its
-// environment, so users usually don't need to worry about these
-// macros.  However, the automatic detection is not perfect.
-// Sometimes it's necessary for a user to define some of the following
-// macros in the build script to override Google Test's decisions.
-//
-// If the user doesn't define a macro in the list, Google Test will
-// provide a default definition.  After this header is #included, all
-// macros in this list will be defined to either 1 or 0.
-//
-// Notes to maintainers:
-//   - Each macro here is a user-tweakable knob; do not grow the list
-//     lightly.
-//   - Use #if to key off these macros.  Don't use #ifdef or "#if
-//     defined(...)", which will not work as these macros are ALWAYS
-//     defined.
-//
-//   GTEST_HAS_CLONE          - Define it to 1/0 to indicate that clone(2)
-//                              is/isn't available.
-//   GTEST_HAS_EXCEPTIONS     - Define it to 1/0 to indicate that exceptions
-//                              are enabled.
-//   GTEST_HAS_POSIX_RE       - Define it to 1/0 to indicate that POSIX regular
-//                              expressions are/aren't available.
-//   GTEST_HAS_PTHREAD        - Define it to 1/0 to indicate that <pthread.h>
-//                              is/isn't available.
-//   GTEST_HAS_RTTI           - Define it to 1/0 to indicate that RTTI is/isn't
-//                              enabled.
-//   GTEST_HAS_STD_WSTRING    - Define it to 1/0 to indicate that
-//                              std::wstring does/doesn't work (Google Test can
-//                              be used where std::wstring is unavailable).
-//   GTEST_HAS_FILE_SYSTEM    - Define it to 1/0 to indicate whether or not a
-//                              file system is/isn't available.
-//   GTEST_HAS_SEH            - Define it to 1/0 to indicate whether the
-//                              compiler supports Microsoft's "Structured
-//                              Exception Handling".
-//   GTEST_HAS_STREAM_REDIRECTION
-//                            - Define it to 1/0 to indicate whether the
-//                              platform supports I/O stream redirection using
-//                              dup() and dup2().
-//   GTEST_LINKED_AS_SHARED_LIBRARY
-//                            - Define to 1 when compiling tests that use
-//                              Google Test as a shared library (known as
-//                              DLL on Windows).
-//   GTEST_CREATE_SHARED_LIBRARY
-//                            - Define to 1 when compiling Google Test itself
-//                              as a shared library.
-//   GTEST_DEFAULT_DEATH_TEST_STYLE
-//                            - The default value of --gtest_death_test_style.
-//                              The legacy default has been "fast" in the open
-//                              source version since 2008. The recommended value
-//                              is "threadsafe", and can be set in
-//                              custom/gtest-port.h.
-
-// Platform-indicating macros
-// --------------------------
-//
-// Macros indicating the platform on which Google Test is being used
-// (a macro is defined to 1 if compiled on the given platform;
-// otherwise UNDEFINED -- it's never defined to 0.).  Google Test
-// defines these macros automatically.  Code outside Google Test MUST
-// NOT define them.
-//
-//   GTEST_OS_AIX      - IBM AIX
-//   GTEST_OS_CYGWIN   - Cygwin
-//   GTEST_OS_DRAGONFLY - DragonFlyBSD
-//   GTEST_OS_FREEBSD  - FreeBSD
-//   GTEST_OS_FUCHSIA  - Fuchsia
-//   GTEST_OS_GNU_HURD - GNU/Hurd
-//   GTEST_OS_GNU_KFREEBSD - GNU/kFreeBSD
-//   GTEST_OS_HAIKU    - Haiku
-//   GTEST_OS_HPUX     - HP-UX
-//   GTEST_OS_LINUX    - Linux
-//     GTEST_OS_LINUX_ANDROID - Google Android
-//   GTEST_OS_MAC      - Mac OS X
-//     GTEST_OS_IOS    - iOS
-//   GTEST_OS_NACL     - Google Native Client (NaCl)
-//   GTEST_OS_NETBSD   - NetBSD
-//   GTEST_OS_OPENBSD  - OpenBSD
-//   GTEST_OS_OS2      - OS/2
-//   GTEST_OS_QNX      - QNX
-//   GTEST_OS_SOLARIS  - Sun Solaris
-//   GTEST_OS_WINDOWS  - Windows (Desktop, MinGW, or Mobile)
-//     GTEST_OS_WINDOWS_DESKTOP  - Windows Desktop
-//     GTEST_OS_WINDOWS_MINGW    - MinGW
-//     GTEST_OS_WINDOWS_MOBILE   - Windows Mobile
-//     GTEST_OS_WINDOWS_PHONE    - Windows Phone
-//     GTEST_OS_WINDOWS_RT       - Windows Store App/WinRT
-//   GTEST_OS_ZOS      - z/OS
-//
-// Among the platforms, Cygwin, Linux, Mac OS X, and Windows have the
-// most stable support.  Since core members of the Google Test project
-// don't have access to other platforms, support for them may be less
-// stable.  If you notice any problems on your platform, please notify
-// googletestframework@googlegroups.com (patches for fixing them are
-// even more welcome!).
-//
-// It is possible that none of the GTEST_OS_* macros are defined.
-
-// Feature-indicating macros
-// -------------------------
-//
-// Macros indicating which Google Test features are available (a macro
-// is defined to 1 if the corresponding feature is supported;
-// otherwise UNDEFINED -- it's never defined to 0.).  Google Test
-// defines these macros automatically.  Code outside Google Test MUST
-// NOT define them.
-//
-// These macros are public so that portable tests can be written.
-// Such tests typically surround code using a feature with an #if
-// which controls that code.  For example:
-//
-// #if GTEST_HAS_DEATH_TEST
-//   EXPECT_DEATH(DoSomethingDeadly());
-// #endif
-//
-//   GTEST_HAS_DEATH_TEST   - death tests
-//   GTEST_HAS_TYPED_TEST   - typed tests
-//   GTEST_HAS_TYPED_TEST_P - type-parameterized tests
-//   GTEST_IS_THREADSAFE    - Google Test is thread-safe.
-//   GTEST_USES_RE2         - the RE2 regular expression library is used
-//   GTEST_USES_POSIX_RE    - enhanced POSIX regex is used. Do not confuse with
-//                            GTEST_HAS_POSIX_RE (see above) which users can
-//                            define themselves.
-//   GTEST_USES_SIMPLE_RE   - our own simple regex is used;
-//                            the above RE\b(s) are mutually exclusive.
-
-// Misc public macros
-// ------------------
-//
-//   GTEST_FLAG(flag_name)  - references the variable corresponding to
-//                            the given Google Test flag.
-
-// Internal utilities
-// ------------------
-//
-// The following macros and utilities are for Google Test's INTERNAL
-// use only.  Code outside Google Test MUST NOT USE THEM DIRECTLY.
-//
-// Macros for basic C++ coding:
-//   GTEST_AMBIGUOUS_ELSE_BLOCKER_ - for disabling a gcc warning.
-//   GTEST_ATTRIBUTE_UNUSED_  - declares that a class' instances or a
-//                              variable don't have to be used.
-//   GTEST_MUST_USE_RESULT_   - declares that a function's result must be used.
-//   GTEST_INTENTIONAL_CONST_COND_PUSH_ - start code section where MSVC C4127 is
-//                                        suppressed (constant conditional).
-//   GTEST_INTENTIONAL_CONST_COND_POP_  - finish code section where MSVC C4127
-//                                        is suppressed.
-//   GTEST_INTERNAL_HAS_ANY - for enabling UniversalPrinter<std::any> or
-//                            UniversalPrinter<absl::any> specializations.
-//   GTEST_INTERNAL_HAS_OPTIONAL - for enabling UniversalPrinter<std::optional>
-//   or
-//                                 UniversalPrinter<absl::optional>
-//                                 specializations.
-//   GTEST_INTERNAL_HAS_STRING_VIEW - for enabling Matcher<std::string_view> or
-//                                    Matcher<absl::string_view>
-//                                    specializations.
-//   GTEST_INTERNAL_HAS_VARIANT - for enabling UniversalPrinter<std::variant> or
-//                                UniversalPrinter<absl::variant>
-//                                specializations.
-//
-// Synchronization:
-//   Mutex, MutexLock, ThreadLocal, GetThreadCount()
-//                            - synchronization primitives.
-//
-// Regular expressions:
-//   RE             - a simple regular expression class using
-//                     1) the RE2 syntax on all platforms when built with RE2
-//                        and Abseil as dependencies
-//                     2) the POSIX Extended Regular Expression syntax on
-//                        UNIX-like platforms,
-//                     3) A reduced regular exception syntax on other platforms,
-//                        including Windows.
-// Logging:
-//   GTEST_LOG_()   - logs messages at the specified severity level.
-//   LogToStderr()  - directs all log messages to stderr.
-//   FlushInfoLog() - flushes informational log messages.
-//
-// Stdout and stderr capturing:
-//   CaptureStdout()     - starts capturing stdout.
-//   GetCapturedStdout() - stops capturing stdout and returns the captured
-//                         string.
-//   CaptureStderr()     - starts capturing stderr.
-//   GetCapturedStderr() - stops capturing stderr and returns the captured
-//                         string.
-//
-// Integer types:
-//   TypeWithSize   - maps an integer to a int type.
-//   TimeInMillis   - integers of known sizes.
-//   BiggestInt     - the biggest signed integer type.
-//
-// Command-line utilities:
-//   GetInjectableArgvs() - returns the command line as a vector of strings.
-//
-// Environment variable utilities:
-//   GetEnv()             - gets the value of an environment variable.
-//   BoolFromGTestEnv()   - parses a bool environment variable.
-//   Int32FromGTestEnv()  - parses an int32_t environment variable.
-//   StringFromGTestEnv() - parses a string environment variable.
-//
-// Deprecation warnings:
-//   GTEST_INTERNAL_DEPRECATED(message) - attribute marking a function as
-//                                        deprecated; calling a marked function
-//                                        should generate a compiler warning
-
-// The definition of GTEST_INTERNAL_CPLUSPLUS_LANG comes first because it can
-// potentially be used as an #include guard.
-#if defined(_MSVC_LANG)
-#define GTEST_INTERNAL_CPLUSPLUS_LANG _MSVC_LANG
-#elif defined(__cplusplus)
-#define GTEST_INTERNAL_CPLUSPLUS_LANG __cplusplus
-#endif
-
-#if !defined(GTEST_INTERNAL_CPLUSPLUS_LANG) || \
-    GTEST_INTERNAL_CPLUSPLUS_LANG < 201402L
-#error C++ versions less than C++14 are not supported.
-#endif
-
-#include <ctype.h>   // for isspace, etc
-#include <stddef.h>  // for ptrdiff_t
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include <cerrno>
-// #include <condition_variable>  // Guarded by GTEST_IS_THREADSAFE below
-#include <cstdint>
-#include <iostream>
-#include <limits>
-#include <locale>
-#include <memory>
-#include <ostream>
-#include <string>
-// #include <mutex>  // Guarded by GTEST_IS_THREADSAFE below
-#include <tuple>
-#include <type_traits>
-#include <vector>
-
-#ifndef _WIN32_WCE
-#include <sys/stat.h>
-#include <sys/types.h>
-#endif  // !_WIN32_WCE
-
-#if defined __APPLE__
-#include <AvailabilityMacros.h>
-#include <TargetConditionals.h>
-#endif
-
-#include "gtest/internal/custom/gtest-port.h"
-#include "gtest/internal/gtest-port-arch.h"
-
-#if GTEST_HAS_ABSL
-#include "absl/flags/declare.h"
-#include "absl/flags/flag.h"
-#include "absl/flags/reflection.h"
-#endif
-
-#if !defined(GTEST_DEV_EMAIL_)
-#define GTEST_DEV_EMAIL_ "googletestframework@@googlegroups.com"
-#define GTEST_FLAG_PREFIX_ "gtest_"
-#define GTEST_FLAG_PREFIX_DASH_ "gtest-"
-#define GTEST_FLAG_PREFIX_UPPER_ "GTEST_"
-#define GTEST_NAME_ "Google Test"
-#define GTEST_PROJECT_URL_ "https://github.com/google/googletest/"
-#endif  // !defined(GTEST_DEV_EMAIL_)
-
-#if !defined(GTEST_INIT_GOOGLE_TEST_NAME_)
-#define GTEST_INIT_GOOGLE_TEST_NAME_ "testing::InitGoogleTest"
-#endif  // !defined(GTEST_INIT_GOOGLE_TEST_NAME_)
-
-// Determines the version of gcc that is used to compile this.
-#ifdef __GNUC__
-// 40302 means version 4.3.2.
-#define GTEST_GCC_VER_ \
-  (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__)
-#endif  // __GNUC__
-
-// Macros for disabling Microsoft Visual C++ warnings.
-//
-//   GTEST_DISABLE_MSC_WARNINGS_PUSH_(4800 4385)
-//   /* code that triggers warnings C4800 and C4385 */
-//   GTEST_DISABLE_MSC_WARNINGS_POP_()
-#if defined(_MSC_VER)
-#define GTEST_DISABLE_MSC_WARNINGS_PUSH_(warnings) \
-  __pragma(warning(push)) __pragma(warning(disable : warnings))
-#define GTEST_DISABLE_MSC_WARNINGS_POP_() __pragma(warning(pop))
-#else
-// Not all compilers are MSVC
-#define GTEST_DISABLE_MSC_WARNINGS_PUSH_(warnings)
-#define GTEST_DISABLE_MSC_WARNINGS_POP_()
-#endif
-
-// Clang on Windows does not understand MSVC's pragma warning.
-// We need clang-specific way to disable function deprecation warning.
-#ifdef __clang__
-#define GTEST_DISABLE_MSC_DEPRECATED_PUSH_()                            \
-  _Pragma("clang diagnostic push")                                      \
-      _Pragma("clang diagnostic ignored \"-Wdeprecated-declarations\"") \
-          _Pragma("clang diagnostic ignored \"-Wdeprecated-implementations\"")
-#define GTEST_DISABLE_MSC_DEPRECATED_POP_() _Pragma("clang diagnostic pop")
-#else
-#define GTEST_DISABLE_MSC_DEPRECATED_PUSH_() \
-  GTEST_DISABLE_MSC_WARNINGS_PUSH_(4996)
-#define GTEST_DISABLE_MSC_DEPRECATED_POP_() GTEST_DISABLE_MSC_WARNINGS_POP_()
-#endif
-
-// Brings in definitions for functions used in the testing::internal::posix
-// namespace (read, write, close, chdir, isatty, stat). We do not currently
-// use them on Windows Mobile.
-#if GTEST_OS_WINDOWS
-#if !GTEST_OS_WINDOWS_MOBILE
-#include <direct.h>
-#include <io.h>
-#endif
-// In order to avoid having to include <windows.h>, use forward declaration
-#if GTEST_OS_WINDOWS_MINGW && !defined(__MINGW64_VERSION_MAJOR)
-// MinGW defined _CRITICAL_SECTION and _RTL_CRITICAL_SECTION as two
-// separate (equivalent) structs, instead of using typedef
-typedef struct _CRITICAL_SECTION GTEST_CRITICAL_SECTION;
-#else
-// Assume CRITICAL_SECTION is a typedef of _RTL_CRITICAL_SECTION.
-// This assumption is verified by
-// WindowsTypesTest.CRITICAL_SECTIONIs_RTL_CRITICAL_SECTION.
-typedef struct _RTL_CRITICAL_SECTION GTEST_CRITICAL_SECTION;
-#endif
-#elif GTEST_OS_XTENSA
-#include <unistd.h>
-// Xtensa toolchains define strcasecmp in the string.h header instead of
-// strings.h. string.h is already included.
-#else
-// This assumes that non-Windows OSes provide unistd.h. For OSes where this
-// is not the case, we need to include headers that provide the functions
-// mentioned above.
-#include <strings.h>
-#include <unistd.h>
-#endif  // GTEST_OS_WINDOWS
-
-#if GTEST_OS_LINUX_ANDROID
-// Used to define __ANDROID_API__ matching the target NDK API level.
-#include <android/api-level.h>  // NOLINT
-#endif
-
-// Defines this to true if and only if Google Test can use POSIX regular
-// expressions.
-#ifndef GTEST_HAS_POSIX_RE
-#if GTEST_OS_LINUX_ANDROID
-// On Android, <regex.h> is only available starting with Gingerbread.
-#define GTEST_HAS_POSIX_RE (__ANDROID_API__ >= 9)
-#else
-#define GTEST_HAS_POSIX_RE \
-  !(GTEST_OS_WINDOWS || GTEST_OS_XTENSA || GTEST_OS_QURT)
-#endif
-#endif
-
-// Select the regular expression implementation.
-#if GTEST_HAS_ABSL
-// When using Abseil, RE2 is required.
-#include "absl/strings/string_view.h"
-#include "re2/re2.h"
-#define GTEST_USES_RE2 1
-#elif GTEST_HAS_POSIX_RE
-#include <regex.h>  // NOLINT
-#define GTEST_USES_POSIX_RE 1
-#else
-// Use our own simple regex implementation.
-#define GTEST_USES_SIMPLE_RE 1
-#endif
-
-#ifndef GTEST_HAS_EXCEPTIONS
-// The user didn't tell us whether exceptions are enabled, so we need
-// to figure it out.
-#if defined(_MSC_VER) && defined(_CPPUNWIND)
-// MSVC defines _CPPUNWIND to 1 if and only if exceptions are enabled.
-#define GTEST_HAS_EXCEPTIONS 1
-#elif defined(__BORLANDC__)
-// C++Builder's implementation of the STL uses the _HAS_EXCEPTIONS
-// macro to enable exceptions, so we'll do the same.
-// Assumes that exceptions are enabled by default.
-#ifndef _HAS_EXCEPTIONS
-#define _HAS_EXCEPTIONS 1
-#endif  // _HAS_EXCEPTIONS
-#define GTEST_HAS_EXCEPTIONS _HAS_EXCEPTIONS
-#elif defined(__clang__)
-// clang defines __EXCEPTIONS if and only if exceptions are enabled before clang
-// 220714, but if and only if cleanups are enabled after that. In Obj-C++ files,
-// there can be cleanups for ObjC exceptions which also need cleanups, even if
-// C++ exceptions are disabled. clang has __has_feature(cxx_exceptions) which
-// checks for C++ exceptions starting at clang r206352, but which checked for
-// cleanups prior to that. To reliably check for C++ exception availability with
-// clang, check for
-// __EXCEPTIONS && __has_feature(cxx_exceptions).
-#define GTEST_HAS_EXCEPTIONS (__EXCEPTIONS && __has_feature(cxx_exceptions))
-#elif defined(__GNUC__) && __EXCEPTIONS
-// gcc defines __EXCEPTIONS to 1 if and only if exceptions are enabled.
-#define GTEST_HAS_EXCEPTIONS 1
-#elif defined(__SUNPRO_CC)
-// Sun Pro CC supports exceptions.  However, there is no compile-time way of
-// detecting whether they are enabled or not.  Therefore, we assume that
-// they are enabled unless the user tells us otherwise.
-#define GTEST_HAS_EXCEPTIONS 1
-#elif defined(__IBMCPP__) && __EXCEPTIONS
-// xlC defines __EXCEPTIONS to 1 if and only if exceptions are enabled.
-#define GTEST_HAS_EXCEPTIONS 1
-#elif defined(__HP_aCC)
-// Exception handling is in effect by default in HP aCC compiler. It has to
-// be turned of by +noeh compiler option if desired.
-#define GTEST_HAS_EXCEPTIONS 1
-#else
-// For other compilers, we assume exceptions are disabled to be
-// conservative.
-#define GTEST_HAS_EXCEPTIONS 0
-#endif  // defined(_MSC_VER) || defined(__BORLANDC__)
-#endif  // GTEST_HAS_EXCEPTIONS
-
-#ifndef GTEST_HAS_STD_WSTRING
-// The user didn't tell us whether ::std::wstring is available, so we need
-// to figure it out.
-// Cygwin 1.7 and below doesn't support ::std::wstring.
-// Solaris' libc++ doesn't support it either.  Android has
-// no support for it at least as recent as Froyo (2.2).
-#define GTEST_HAS_STD_WSTRING                                         \
-  (!(GTEST_OS_LINUX_ANDROID || GTEST_OS_CYGWIN || GTEST_OS_SOLARIS || \
-     GTEST_OS_HAIKU || GTEST_OS_ESP32 || GTEST_OS_ESP8266 ||          \
-     GTEST_OS_XTENSA || GTEST_OS_QURT))
-
-#endif  // GTEST_HAS_STD_WSTRING
-
-#ifndef GTEST_HAS_FILE_SYSTEM
-// Most platforms support a file system.
-#define GTEST_HAS_FILE_SYSTEM 1
-#endif  // GTEST_HAS_FILE_SYSTEM
-
-// Determines whether RTTI is available.
-#ifndef GTEST_HAS_RTTI
-// The user didn't tell us whether RTTI is enabled, so we need to
-// figure it out.
-
-#ifdef _MSC_VER
-
-#ifdef _CPPRTTI  // MSVC defines this macro if and only if RTTI is enabled.
-#define GTEST_HAS_RTTI 1
-#else
-#define GTEST_HAS_RTTI 0
-#endif
-
-// Starting with version 4.3.2, gcc defines __GXX_RTTI if and only if RTTI is
-// enabled.
-#elif defined(__GNUC__)
-
-#ifdef __GXX_RTTI
-// When building against STLport with the Android NDK and with
-// -frtti -fno-exceptions, the build fails at link time with undefined
-// references to __cxa_bad_typeid. Note sure if STL or toolchain bug,
-// so disable RTTI when detected.
-#if GTEST_OS_LINUX_ANDROID && defined(_STLPORT_MAJOR) && !defined(__EXCEPTIONS)
-#define GTEST_HAS_RTTI 0
-#else
-#define GTEST_HAS_RTTI 1
-#endif  // GTEST_OS_LINUX_ANDROID && __STLPORT_MAJOR && !__EXCEPTIONS
-#else
-#define GTEST_HAS_RTTI 0
-#endif  // __GXX_RTTI
-
-// Clang defines __GXX_RTTI starting with version 3.0, but its manual recommends
-// using has_feature instead. has_feature(cxx_rtti) is supported since 2.7, the
-// first version with C++ support.
-#elif defined(__clang__)
-
-#define GTEST_HAS_RTTI __has_feature(cxx_rtti)
-
-// Starting with version 9.0 IBM Visual Age defines __RTTI_ALL__ to 1 if
-// both the typeid and dynamic_cast features are present.
-#elif defined(__IBMCPP__) && (__IBMCPP__ >= 900)
-
-#ifdef __RTTI_ALL__
-#define GTEST_HAS_RTTI 1
-#else
-#define GTEST_HAS_RTTI 0
-#endif
-
-#else
-
-// For all other compilers, we assume RTTI is enabled.
-#define GTEST_HAS_RTTI 1
-
-#endif  // _MSC_VER
-
-#endif  // GTEST_HAS_RTTI
-
-// It's this header's responsibility to #include <typeinfo> when RTTI
-// is enabled.
-#if GTEST_HAS_RTTI
-#include <typeinfo>
-#endif
-
-// Determines whether Google Test can use the pthreads library.
-#ifndef GTEST_HAS_PTHREAD
-// The user didn't tell us explicitly, so we make reasonable assumptions about
-// which platforms have pthreads support.
-//
-// To disable threading support in Google Test, add -DGTEST_HAS_PTHREAD=0
-// to your compiler flags.
-#define GTEST_HAS_PTHREAD                                                      \
-  (GTEST_OS_LINUX || GTEST_OS_MAC || GTEST_OS_HPUX || GTEST_OS_QNX ||          \
-   GTEST_OS_FREEBSD || GTEST_OS_NACL || GTEST_OS_NETBSD || GTEST_OS_FUCHSIA || \
-   GTEST_OS_DRAGONFLY || GTEST_OS_GNU_KFREEBSD || GTEST_OS_OPENBSD ||          \
-   GTEST_OS_HAIKU || GTEST_OS_GNU_HURD)
-#endif  // GTEST_HAS_PTHREAD
-
-#if GTEST_HAS_PTHREAD
-// gtest-port.h guarantees to #include <pthread.h> when GTEST_HAS_PTHREAD is
-// true.
-#include <pthread.h>  // NOLINT
-
-// For timespec and nanosleep, used below.
-#include <time.h>  // NOLINT
-#endif
-
-// Determines whether clone(2) is supported.
-// Usually it will only be available on Linux, excluding
-// Linux on the Itanium architecture.
-// Also see http://linux.die.net/man/2/clone.
-#ifndef GTEST_HAS_CLONE
-// The user didn't tell us, so we need to figure it out.
-
-#if GTEST_OS_LINUX && !defined(__ia64__)
-#if GTEST_OS_LINUX_ANDROID
-// On Android, clone() became available at different API levels for each 32-bit
-// architecture.
-#if defined(__LP64__) || (defined(__arm__) && __ANDROID_API__ >= 9) || \
-    (defined(__mips__) && __ANDROID_API__ >= 12) ||                    \
-    (defined(__i386__) && __ANDROID_API__ >= 17)
-#define GTEST_HAS_CLONE 1
-#else
-#define GTEST_HAS_CLONE 0
-#endif
-#else
-#define GTEST_HAS_CLONE 1
-#endif
-#else
-#define GTEST_HAS_CLONE 0
-#endif  // GTEST_OS_LINUX && !defined(__ia64__)
-
-#endif  // GTEST_HAS_CLONE
-
-// Determines whether to support stream redirection. This is used to test
-// output correctness and to implement death tests.
-#ifndef GTEST_HAS_STREAM_REDIRECTION
-// By default, we assume that stream redirection is supported on all
-// platforms except known mobile / embedded ones. Also, if the port doesn't have
-// a file system, stream redirection is not supported.
-#if GTEST_OS_WINDOWS_MOBILE || GTEST_OS_WINDOWS_PHONE ||          \
-    GTEST_OS_WINDOWS_RT || GTEST_OS_ESP8266 || GTEST_OS_XTENSA || \
-    GTEST_OS_QURT || !GTEST_HAS_FILE_SYSTEM
-#define GTEST_HAS_STREAM_REDIRECTION 0
-#else
-#define GTEST_HAS_STREAM_REDIRECTION 1
-#endif  // !GTEST_OS_WINDOWS_MOBILE
-#endif  // GTEST_HAS_STREAM_REDIRECTION
-
-// Determines whether to support death tests.
-// pops up a dialog window that cannot be suppressed programmatically.
-#if (GTEST_OS_LINUX || GTEST_OS_CYGWIN || GTEST_OS_SOLARIS ||             \
-     (GTEST_OS_MAC && !GTEST_OS_IOS) ||                                   \
-     (GTEST_OS_WINDOWS_DESKTOP && _MSC_VER) || GTEST_OS_WINDOWS_MINGW ||  \
-     GTEST_OS_AIX || GTEST_OS_HPUX || GTEST_OS_OPENBSD || GTEST_OS_QNX || \
-     GTEST_OS_FREEBSD || GTEST_OS_NETBSD || GTEST_OS_FUCHSIA ||           \
-     GTEST_OS_DRAGONFLY || GTEST_OS_GNU_KFREEBSD || GTEST_OS_HAIKU ||     \
-     GTEST_OS_GNU_HURD)
-// Death tests require a file system to work properly.
-#if GTEST_HAS_FILE_SYSTEM
-#define GTEST_HAS_DEATH_TEST 1
-#endif  // GTEST_HAS_FILE_SYSTEM
-#endif
-
-// Determines whether to support type-driven tests.
-
-// Typed tests need <typeinfo> and variadic macros, which GCC, VC++ 8.0,
-// Sun Pro CC, IBM Visual Age, and HP aCC support.
-#if defined(__GNUC__) || defined(_MSC_VER) || defined(__SUNPRO_CC) || \
-    defined(__IBMCPP__) || defined(__HP_aCC)
-#define GTEST_HAS_TYPED_TEST 1
-#define GTEST_HAS_TYPED_TEST_P 1
-#endif
-
-// Determines whether the system compiler uses UTF-16 for encoding wide strings.
-#define GTEST_WIDE_STRING_USES_UTF16_ \
-  (GTEST_OS_WINDOWS || GTEST_OS_CYGWIN || GTEST_OS_AIX || GTEST_OS_OS2)
-
-// Determines whether test results can be streamed to a socket.
-#if GTEST_OS_LINUX || GTEST_OS_GNU_KFREEBSD || GTEST_OS_DRAGONFLY || \
-    GTEST_OS_FREEBSD || GTEST_OS_NETBSD || GTEST_OS_OPENBSD ||       \
-    GTEST_OS_GNU_HURD
-#define GTEST_CAN_STREAM_RESULTS_ 1
-#endif
-
-// Defines some utility macros.
-
-// The GNU compiler emits a warning if nested "if" statements are followed by
-// an "else" statement and braces are not used to explicitly disambiguate the
-// "else" binding.  This leads to problems with code like:
-//
-//   if (gate)
-//     ASSERT_*(condition) << "Some message";
-//
-// The "switch (0) case 0:" idiom is used to suppress this.
-#ifdef __INTEL_COMPILER
-#define GTEST_AMBIGUOUS_ELSE_BLOCKER_
-#else
-#define GTEST_AMBIGUOUS_ELSE_BLOCKER_ \
-  switch (0)                          \
-  case 0:                             \
-  default:  // NOLINT
-#endif
-
-// GTEST_HAVE_ATTRIBUTE_
-//
-// A function-like feature checking macro that is a wrapper around
-// `__has_attribute`, which is defined by GCC 5+ and Clang and evaluates to a
-// nonzero constant integer if the attribute is supported or 0 if not.
-//
-// It evaluates to zero if `__has_attribute` is not defined by the compiler.
-//
-// GCC: https://gcc.gnu.org/gcc-5/changes.html
-// Clang: https://clang.llvm.org/docs/LanguageExtensions.html
-#ifdef __has_attribute
-#define GTEST_HAVE_ATTRIBUTE_(x) __has_attribute(x)
-#else
-#define GTEST_HAVE_ATTRIBUTE_(x) 0
-#endif
-
-// GTEST_HAVE_FEATURE_
-//
-// A function-like feature checking macro that is a wrapper around
-// `__has_feature`.
-#ifdef __has_feature
-#define GTEST_HAVE_FEATURE_(x) __has_feature(x)
-#else
-#define GTEST_HAVE_FEATURE_(x) 0
-#endif
-
-// Use this annotation after a variable or parameter declaration to tell the
-// compiler the variable/parameter does not have to be used.
-// Example:
-//
-//   GTEST_ATTRIBUTE_UNUSED_ int foo = bar();
-#if GTEST_HAVE_ATTRIBUTE_(unused)
-#define GTEST_ATTRIBUTE_UNUSED_ __attribute__((unused))
-#else
-#define GTEST_ATTRIBUTE_UNUSED_
-#endif
-
-// Use this annotation before a function that takes a printf format string.
-#if GTEST_HAVE_ATTRIBUTE_(format) && defined(__MINGW_PRINTF_FORMAT)
-// MinGW has two different printf implementations. Ensure the format macro
-// matches the selected implementation. See
-// https://sourceforge.net/p/mingw-w64/wiki2/gnu%20printf/.
-#define GTEST_ATTRIBUTE_PRINTF_(string_index, first_to_check) \
-  __attribute__((format(__MINGW_PRINTF_FORMAT, string_index, first_to_check)))
-#elif GTEST_HAVE_ATTRIBUTE_(format)
-#define GTEST_ATTRIBUTE_PRINTF_(string_index, first_to_check)   \
-  __attribute__((format(printf, string_index, first_to_check)))
-#else
-#define GTEST_ATTRIBUTE_PRINTF_(string_index, first_to_check)
-#endif
-
-// Tell the compiler to warn about unused return values for functions declared
-// with this macro.  The macro should be used on function declarations
-// following the argument list:
-//
-//   Sprocket* AllocateSprocket() GTEST_MUST_USE_RESULT_;
-#if GTEST_HAVE_ATTRIBUTE_(warn_unused_result)
-#define GTEST_MUST_USE_RESULT_ __attribute__((warn_unused_result))
-#else
-#define GTEST_MUST_USE_RESULT_
-#endif
-
-// MS C++ compiler emits warning when a conditional expression is compile time
-// constant. In some contexts this warning is false positive and needs to be
-// suppressed. Use the following two macros in such cases:
-//
-// GTEST_INTENTIONAL_CONST_COND_PUSH_()
-// while (true) {
-// GTEST_INTENTIONAL_CONST_COND_POP_()
-// }
-#define GTEST_INTENTIONAL_CONST_COND_PUSH_() \
-  GTEST_DISABLE_MSC_WARNINGS_PUSH_(4127)
-#define GTEST_INTENTIONAL_CONST_COND_POP_() GTEST_DISABLE_MSC_WARNINGS_POP_()
-
-// Determine whether the compiler supports Microsoft's Structured Exception
-// Handling.  This is supported by several Windows compilers but generally
-// does not exist on any other system.
-#ifndef GTEST_HAS_SEH
-// The user didn't tell us, so we need to figure it out.
-
-#if defined(_MSC_VER) || defined(__BORLANDC__)
-// These two compilers are known to support SEH.
-#define GTEST_HAS_SEH 1
-#else
-// Assume no SEH.
-#define GTEST_HAS_SEH 0
-#endif
-
-#endif  // GTEST_HAS_SEH
-
-#ifndef GTEST_IS_THREADSAFE
-
-#define GTEST_IS_THREADSAFE                                                 \
-  (GTEST_HAS_MUTEX_AND_THREAD_LOCAL_ ||                                     \
-   (GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_PHONE && !GTEST_OS_WINDOWS_RT) || \
-   GTEST_HAS_PTHREAD)
-
-#endif  // GTEST_IS_THREADSAFE
-
-#if GTEST_IS_THREADSAFE
-// Some platforms don't support including these threading related headers.
-#include <condition_variable>  // NOLINT
-#include <mutex>               // NOLINT
-#endif                         // GTEST_IS_THREADSAFE
-
-// GTEST_API_ qualifies all symbols that must be exported. The definitions below
-// are guarded by #ifndef to give embedders a chance to define GTEST_API_ in
-// gtest/internal/custom/gtest-port.h
-#ifndef GTEST_API_
-
-#ifdef _MSC_VER
-#if GTEST_LINKED_AS_SHARED_LIBRARY
-#define GTEST_API_ __declspec(dllimport)
-#elif GTEST_CREATE_SHARED_LIBRARY
-#define GTEST_API_ __declspec(dllexport)
-#endif
-#elif GTEST_HAVE_ATTRIBUTE_(visibility)
-#define GTEST_API_ __attribute__((visibility("default")))
-#endif  // _MSC_VER
-
-#endif  // GTEST_API_
-
-#ifndef GTEST_API_
-#define GTEST_API_
-#endif  // GTEST_API_
-
-#ifndef GTEST_DEFAULT_DEATH_TEST_STYLE
-#define GTEST_DEFAULT_DEATH_TEST_STYLE "fast"
-#endif  // GTEST_DEFAULT_DEATH_TEST_STYLE
-
-#if GTEST_HAVE_ATTRIBUTE_(noinline)
-// Ask the compiler to never inline a given function.
-#define GTEST_NO_INLINE_ __attribute__((noinline))
-#else
-#define GTEST_NO_INLINE_
-#endif
-
-#if GTEST_HAVE_ATTRIBUTE_(disable_tail_calls)
-// Ask the compiler not to perform tail call optimization inside
-// the marked function.
-#define GTEST_NO_TAIL_CALL_ __attribute__((disable_tail_calls))
-#elif __GNUC__
-#define GTEST_NO_TAIL_CALL_ \
-  __attribute__((optimize("no-optimize-sibling-calls")))
-#else
-#define GTEST_NO_TAIL_CALL_
-#endif
-
-// _LIBCPP_VERSION is defined by the libc++ library from the LLVM project.
-#if !defined(GTEST_HAS_CXXABI_H_)
-#if defined(__GLIBCXX__) || (defined(_LIBCPP_VERSION) && !defined(_MSC_VER))
-#define GTEST_HAS_CXXABI_H_ 1
-#else
-#define GTEST_HAS_CXXABI_H_ 0
-#endif
-#endif
-
-// A function level attribute to disable checking for use of uninitialized
-// memory when built with MemorySanitizer.
-#if GTEST_HAVE_ATTRIBUTE_(no_sanitize_memory)
-#define GTEST_ATTRIBUTE_NO_SANITIZE_MEMORY_ __attribute__((no_sanitize_memory))
-#else
-#define GTEST_ATTRIBUTE_NO_SANITIZE_MEMORY_
-#endif
-
-// A function level attribute to disable AddressSanitizer instrumentation.
-#if GTEST_HAVE_ATTRIBUTE_(no_sanitize_address)
-#define GTEST_ATTRIBUTE_NO_SANITIZE_ADDRESS_ \
-  __attribute__((no_sanitize_address))
-#else
-#define GTEST_ATTRIBUTE_NO_SANITIZE_ADDRESS_
-#endif
-
-// A function level attribute to disable HWAddressSanitizer instrumentation.
-#if GTEST_HAVE_FEATURE_(hwaddress_sanitizer) && \
-    GTEST_HAVE_ATTRIBUTE_(no_sanitize)
-#define GTEST_ATTRIBUTE_NO_SANITIZE_HWADDRESS_ \
-  __attribute__((no_sanitize("hwaddress")))
-#else
-#define GTEST_ATTRIBUTE_NO_SANITIZE_HWADDRESS_
-#endif
-
-// A function level attribute to disable ThreadSanitizer instrumentation.
-#if GTEST_HAVE_ATTRIBUTE_(no_sanitize_thread)
-#define GTEST_ATTRIBUTE_NO_SANITIZE_THREAD_ __attribute((no_sanitize_thread))
-#else
-#define GTEST_ATTRIBUTE_NO_SANITIZE_THREAD_
-#endif
-
-namespace testing {
-
-class Message;
-
-// Legacy imports for backwards compatibility.
-// New code should use std:: names directly.
-using std::get;
-using std::make_tuple;
-using std::tuple;
-using std::tuple_element;
-using std::tuple_size;
-
-namespace internal {
-
-// A secret type that Google Test users don't know about.  It has no
-// definition on purpose.  Therefore it's impossible to create a
-// Secret object, which is what we want.
-class Secret;
-
-// A helper for suppressing warnings on constant condition.  It just
-// returns 'condition'.
-GTEST_API_ bool IsTrue(bool condition);
-
-// Defines RE.
-
-#if GTEST_USES_RE2
-
-// This is almost `using RE = ::RE2`, except it is copy-constructible, and it
-// needs to disambiguate the `std::string`, `absl::string_view`, and `const
-// char*` constructors.
-class GTEST_API_ RE {
- public:
-  RE(absl::string_view regex) : regex_(regex) {}                  // NOLINT
-  RE(const char* regex) : RE(absl::string_view(regex)) {}         // NOLINT
-  RE(const std::string& regex) : RE(absl::string_view(regex)) {}  // NOLINT
-  RE(const RE& other) : RE(other.pattern()) {}
-
-  const std::string& pattern() const { return regex_.pattern(); }
-
-  static bool FullMatch(absl::string_view str, const RE& re) {
-    return RE2::FullMatch(str, re.regex_);
-  }
-  static bool PartialMatch(absl::string_view str, const RE& re) {
-    return RE2::PartialMatch(str, re.regex_);
-  }
-
- private:
-  RE2 regex_;
-};
-
-#elif GTEST_USES_POSIX_RE || GTEST_USES_SIMPLE_RE
-
-// A simple C++ wrapper for <regex.h>.  It uses the POSIX Extended
-// Regular Expression syntax.
-class GTEST_API_ RE {
- public:
-  // A copy constructor is required by the Standard to initialize object
-  // references from r-values.
-  RE(const RE& other) { Init(other.pattern()); }
-
-  // Constructs an RE from a string.
-  RE(const ::std::string& regex) { Init(regex.c_str()); }  // NOLINT
-
-  RE(const char* regex) { Init(regex); }  // NOLINT
-  ~RE();
-
-  // Returns the string representation of the regex.
-  const char* pattern() const { return pattern_; }
-
-  // FullMatch(str, re) returns true if and only if regular expression re
-  // matches the entire str.
-  // PartialMatch(str, re) returns true if and only if regular expression re
-  // matches a substring of str (including str itself).
-  static bool FullMatch(const ::std::string& str, const RE& re) {
-    return FullMatch(str.c_str(), re);
-  }
-  static bool PartialMatch(const ::std::string& str, const RE& re) {
-    return PartialMatch(str.c_str(), re);
-  }
-
-  static bool FullMatch(const char* str, const RE& re);
-  static bool PartialMatch(const char* str, const RE& re);
-
- private:
-  void Init(const char* regex);
-  const char* pattern_;
-  bool is_valid_;
-
-#if GTEST_USES_POSIX_RE
-
-  regex_t full_regex_;     // For FullMatch().
-  regex_t partial_regex_;  // For PartialMatch().
-
-#else  // GTEST_USES_SIMPLE_RE
-
-  const char* full_pattern_;  // For FullMatch();
-
-#endif
-};
-
-#endif  // ::testing::internal::RE implementation
-
-// Formats a source file path and a line number as they would appear
-// in an error message from the compiler used to compile this code.
-GTEST_API_ ::std::string FormatFileLocation(const char* file, int line);
-
-// Formats a file location for compiler-independent XML output.
-// Although this function is not platform dependent, we put it next to
-// FormatFileLocation in order to contrast the two functions.
-GTEST_API_ ::std::string FormatCompilerIndependentFileLocation(const char* file,
-                                                               int line);
-
-// Defines logging utilities:
-//   GTEST_LOG_(severity) - logs messages at the specified severity level. The
-//                          message itself is streamed into the macro.
-//   LogToStderr()  - directs all log messages to stderr.
-//   FlushInfoLog() - flushes informational log messages.
-
-enum GTestLogSeverity { GTEST_INFO, GTEST_WARNING, GTEST_ERROR, GTEST_FATAL };
-
-// Formats log entry severity, provides a stream object for streaming the
-// log message, and terminates the message with a newline when going out of
-// scope.
-class GTEST_API_ GTestLog {
- public:
-  GTestLog(GTestLogSeverity severity, const char* file, int line);
-
-  // Flushes the buffers and, if severity is GTEST_FATAL, aborts the program.
-  ~GTestLog();
-
-  ::std::ostream& GetStream() { return ::std::cerr; }
-
- private:
-  const GTestLogSeverity severity_;
-
-  GTestLog(const GTestLog&) = delete;
-  GTestLog& operator=(const GTestLog&) = delete;
-};
-
-#if !defined(GTEST_LOG_)
-
-#define GTEST_LOG_(severity)                                           \
-  ::testing::internal::GTestLog(::testing::internal::GTEST_##severity, \
-                                __FILE__, __LINE__)                    \
-      .GetStream()
-
-inline void LogToStderr() {}
-inline void FlushInfoLog() { fflush(nullptr); }
-
-#endif  // !defined(GTEST_LOG_)
-
-#if !defined(GTEST_CHECK_)
-// INTERNAL IMPLEMENTATION - DO NOT USE.
-//
-// GTEST_CHECK_ is an all-mode assert. It aborts the program if the condition
-// is not satisfied.
-//  Synopsis:
-//    GTEST_CHECK_(boolean_condition);
-//     or
-//    GTEST_CHECK_(boolean_condition) << "Additional message";
-//
-//    This checks the condition and if the condition is not satisfied
-//    it prints message about the condition violation, including the
-//    condition itself, plus additional message streamed into it, if any,
-//    and then it aborts the program. It aborts the program irrespective of
-//    whether it is built in the debug mode or not.
-#define GTEST_CHECK_(condition)               \
-  GTEST_AMBIGUOUS_ELSE_BLOCKER_               \
-  if (::testing::internal::IsTrue(condition)) \
-    ;                                         \
-  else                                        \
-    GTEST_LOG_(FATAL) << "Condition " #condition " failed. "
-#endif  // !defined(GTEST_CHECK_)
-
-// An all-mode assert to verify that the given POSIX-style function
-// call returns 0 (indicating success).  Known limitation: this
-// doesn't expand to a balanced 'if' statement, so enclose the macro
-// in {} if you need to use it as the only statement in an 'if'
-// branch.
-#define GTEST_CHECK_POSIX_SUCCESS_(posix_call) \
-  if (const int gtest_error = (posix_call))    \
-  GTEST_LOG_(FATAL) << #posix_call << "failed with error " << gtest_error
-
-// Transforms "T" into "const T&" according to standard reference collapsing
-// rules (this is only needed as a backport for C++98 compilers that do not
-// support reference collapsing). Specifically, it transforms:
-//
-//   char         ==> const char&
-//   const char   ==> const char&
-//   char&        ==> char&
-//   const char&  ==> const char&
-//
-// Note that the non-const reference will not have "const" added. This is
-// standard, and necessary so that "T" can always bind to "const T&".
-template <typename T>
-struct ConstRef {
-  typedef const T& type;
-};
-template <typename T>
-struct ConstRef<T&> {
-  typedef T& type;
-};
-
-// The argument T must depend on some template parameters.
-#define GTEST_REFERENCE_TO_CONST_(T) \
-  typename ::testing::internal::ConstRef<T>::type
-
-// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
-//
-// Use ImplicitCast_ as a safe version of static_cast for upcasting in
-// the type hierarchy (e.g. casting a Foo* to a SuperclassOfFoo* or a
-// const Foo*).  When you use ImplicitCast_, the compiler checks that
-// the cast is safe.  Such explicit ImplicitCast_s are necessary in
-// surprisingly many situations where C++ demands an exact type match
-// instead of an argument type convertible to a target type.
-//
-// The syntax for using ImplicitCast_ is the same as for static_cast:
-//
-//   ImplicitCast_<ToType>(expr)
-//
-// ImplicitCast_ would have been part of the C++ standard library,
-// but the proposal was submitted too late.  It will probably make
-// its way into the language in the future.
-//
-// This relatively ugly name is intentional. It prevents clashes with
-// similar functions users may have (e.g., implicit_cast). The internal
-// namespace alone is not enough because the function can be found by ADL.
-template <typename To>
-inline To ImplicitCast_(To x) {
-  return x;
-}
-
-// When you upcast (that is, cast a pointer from type Foo to type
-// SuperclassOfFoo), it's fine to use ImplicitCast_<>, since upcasts
-// always succeed.  When you downcast (that is, cast a pointer from
-// type Foo to type SubclassOfFoo), static_cast<> isn't safe, because
-// how do you know the pointer is really of type SubclassOfFoo?  It
-// could be a bare Foo, or of type DifferentSubclassOfFoo.  Thus,
-// when you downcast, you should use this macro.  In debug mode, we
-// use dynamic_cast<> to double-check the downcast is legal (we die
-// if it's not).  In normal mode, we do the efficient static_cast<>
-// instead.  Thus, it's important to test in debug mode to make sure
-// the cast is legal!
-//    This is the only place in the code we should use dynamic_cast<>.
-// In particular, you SHOULDN'T be using dynamic_cast<> in order to
-// do RTTI (eg code like this:
-//    if (dynamic_cast<Subclass1>(foo)) HandleASubclass1Object(foo);
-//    if (dynamic_cast<Subclass2>(foo)) HandleASubclass2Object(foo);
-// You should design the code some other way not to need this.
-//
-// This relatively ugly name is intentional. It prevents clashes with
-// similar functions users may have (e.g., down_cast). The internal
-// namespace alone is not enough because the function can be found by ADL.
-template <typename To, typename From>  // use like this: DownCast_<T*>(foo);
-inline To DownCast_(From* f) {         // so we only accept pointers
-  // Ensures that To is a sub-type of From *.  This test is here only
-  // for compile-time type checking, and has no overhead in an
-  // optimized build at run-time, as it will be optimized away
-  // completely.
-  GTEST_INTENTIONAL_CONST_COND_PUSH_()
-  if (false) {
-    GTEST_INTENTIONAL_CONST_COND_POP_()
-    const To to = nullptr;
-    ::testing::internal::ImplicitCast_<From*>(to);
-  }
-
-#if GTEST_HAS_RTTI
-  // RTTI: debug mode only!
-  GTEST_CHECK_(f == nullptr || dynamic_cast<To>(f) != nullptr);
-#endif
-  return static_cast<To>(f);
-}
-
-// Downcasts the pointer of type Base to Derived.
-// Derived must be a subclass of Base. The parameter MUST
-// point to a class of type Derived, not any subclass of it.
-// When RTTI is available, the function performs a runtime
-// check to enforce this.
-template <class Derived, class Base>
-Derived* CheckedDowncastToActualType(Base* base) {
-#if GTEST_HAS_RTTI
-  GTEST_CHECK_(typeid(*base) == typeid(Derived));
-#endif
-
-#if GTEST_HAS_DOWNCAST_
-  return ::down_cast<Derived*>(base);
-#elif GTEST_HAS_RTTI
-  return dynamic_cast<Derived*>(base);  // NOLINT
-#else
-  return static_cast<Derived*>(base);  // Poor man's downcast.
-#endif
-}
-
-#if GTEST_HAS_STREAM_REDIRECTION
-
-// Defines the stderr capturer:
-//   CaptureStdout     - starts capturing stdout.
-//   GetCapturedStdout - stops capturing stdout and returns the captured string.
-//   CaptureStderr     - starts capturing stderr.
-//   GetCapturedStderr - stops capturing stderr and returns the captured string.
-//
-GTEST_API_ void CaptureStdout();
-GTEST_API_ std::string GetCapturedStdout();
-GTEST_API_ void CaptureStderr();
-GTEST_API_ std::string GetCapturedStderr();
-
-#endif  // GTEST_HAS_STREAM_REDIRECTION
-// Returns the size (in bytes) of a file.
-GTEST_API_ size_t GetFileSize(FILE* file);
-
-// Reads the entire content of a file as a string.
-GTEST_API_ std::string ReadEntireFile(FILE* file);
-
-// All command line arguments.
-GTEST_API_ std::vector<std::string> GetArgvs();
-
-#if GTEST_HAS_DEATH_TEST
-
-std::vector<std::string> GetInjectableArgvs();
-// Deprecated: pass the args vector by value instead.
-void SetInjectableArgvs(const std::vector<std::string>* new_argvs);
-void SetInjectableArgvs(const std::vector<std::string>& new_argvs);
-void ClearInjectableArgvs();
-
-#endif  // GTEST_HAS_DEATH_TEST
-
-// Defines synchronization primitives.
-#if GTEST_IS_THREADSAFE
-
-#if GTEST_OS_WINDOWS
-// Provides leak-safe Windows kernel handle ownership.
-// Used in death tests and in threading support.
-class GTEST_API_ AutoHandle {
- public:
-  // Assume that Win32 HANDLE type is equivalent to void*. Doing so allows us to
-  // avoid including <windows.h> in this header file. Including <windows.h> is
-  // undesirable because it defines a lot of symbols and macros that tend to
-  // conflict with client code. This assumption is verified by
-  // WindowsTypesTest.HANDLEIsVoidStar.
-  typedef void* Handle;
-  AutoHandle();
-  explicit AutoHandle(Handle handle);
-
-  ~AutoHandle();
-
-  Handle Get() const;
-  void Reset();
-  void Reset(Handle handle);
-
- private:
-  // Returns true if and only if the handle is a valid handle object that can be
-  // closed.
-  bool IsCloseable() const;
-
-  Handle handle_;
-
-  AutoHandle(const AutoHandle&) = delete;
-  AutoHandle& operator=(const AutoHandle&) = delete;
-};
-#endif
-
-#if GTEST_HAS_NOTIFICATION_
-// Notification has already been imported into the namespace.
-// Nothing to do here.
-
-#else
-GTEST_DISABLE_MSC_WARNINGS_PUSH_(4251 \
-/* class A needs to have dll-interface to be used by clients of class B */)
-
-// Allows a controller thread to pause execution of newly created
-// threads until notified.  Instances of this class must be created
-// and destroyed in the controller thread.
-//
-// This class is only for testing Google Test's own constructs. Do not
-// use it in user tests, either directly or indirectly.
-// TODO(b/203539622): Replace unconditionally with absl::Notification.
-class GTEST_API_ Notification {
- public:
-  Notification() : notified_(false) {}
-  Notification(const Notification&) = delete;
-  Notification& operator=(const Notification&) = delete;
-
-  // Notifies all threads created with this notification to start. Must
-  // be called from the controller thread.
-  void Notify() {
-    std::lock_guard<std::mutex> lock(mu_);
-    notified_ = true;
-    cv_.notify_all();
-  }
-
-  // Blocks until the controller thread notifies. Must be called from a test
-  // thread.
-  void WaitForNotification() {
-    std::unique_lock<std::mutex> lock(mu_);
-    cv_.wait(lock, [this]() { return notified_; });
-  }
-
- private:
-  std::mutex mu_;
-  std::condition_variable cv_;
-  bool notified_;
-};
-GTEST_DISABLE_MSC_WARNINGS_POP_()  // 4251
-#endif  // GTEST_HAS_NOTIFICATION_
-
-// On MinGW, we can have both GTEST_OS_WINDOWS and GTEST_HAS_PTHREAD
-// defined, but we don't want to use MinGW's pthreads implementation, which
-// has conformance problems with some versions of the POSIX standard.
-#if GTEST_HAS_PTHREAD && !GTEST_OS_WINDOWS_MINGW
-
-// As a C-function, ThreadFuncWithCLinkage cannot be templated itself.
-// Consequently, it cannot select a correct instantiation of ThreadWithParam
-// in order to call its Run(). Introducing ThreadWithParamBase as a
-// non-templated base class for ThreadWithParam allows us to bypass this
-// problem.
-class ThreadWithParamBase {
- public:
-  virtual ~ThreadWithParamBase() {}
-  virtual void Run() = 0;
-};
-
-// pthread_create() accepts a pointer to a function type with the C linkage.
-// According to the Standard (7.5/1), function types with different linkages
-// are different even if they are otherwise identical.  Some compilers (for
-// example, SunStudio) treat them as different types.  Since class methods
-// cannot be defined with C-linkage we need to define a free C-function to
-// pass into pthread_create().
-extern "C" inline void* ThreadFuncWithCLinkage(void* thread) {
-  static_cast<ThreadWithParamBase*>(thread)->Run();
-  return nullptr;
-}
-
-// Helper class for testing Google Test's multi-threading constructs.
-// To use it, write:
-//
-//   void ThreadFunc(int param) { /* Do things with param */ }
-//   Notification thread_can_start;
-//   ...
-//   // The thread_can_start parameter is optional; you can supply NULL.
-//   ThreadWithParam<int> thread(&ThreadFunc, 5, &thread_can_start);
-//   thread_can_start.Notify();
-//
-// These classes are only for testing Google Test's own constructs. Do
-// not use them in user tests, either directly or indirectly.
-template <typename T>
-class ThreadWithParam : public ThreadWithParamBase {
- public:
-  typedef void UserThreadFunc(T);
-
-  ThreadWithParam(UserThreadFunc* func, T param, Notification* thread_can_start)
-      : func_(func),
-        param_(param),
-        thread_can_start_(thread_can_start),
-        finished_(false) {
-    ThreadWithParamBase* const base = this;
-    // The thread can be created only after all fields except thread_
-    // have been initialized.
-    GTEST_CHECK_POSIX_SUCCESS_(
-        pthread_create(&thread_, nullptr, &ThreadFuncWithCLinkage, base));
-  }
-  ~ThreadWithParam() override { Join(); }
-
-  void Join() {
-    if (!finished_) {
-      GTEST_CHECK_POSIX_SUCCESS_(pthread_join(thread_, nullptr));
-      finished_ = true;
-    }
-  }
-
-  void Run() override {
-    if (thread_can_start_ != nullptr) thread_can_start_->WaitForNotification();
-    func_(param_);
-  }
-
- private:
-  UserThreadFunc* const func_;  // User-supplied thread function.
-  const T param_;  // User-supplied parameter to the thread function.
-  // When non-NULL, used to block execution until the controller thread
-  // notifies.
-  Notification* const thread_can_start_;
-  bool finished_;  // true if and only if we know that the thread function has
-                   // finished.
-  pthread_t thread_;  // The native thread object.
-
-  ThreadWithParam(const ThreadWithParam&) = delete;
-  ThreadWithParam& operator=(const ThreadWithParam&) = delete;
-};
-#endif  // !GTEST_OS_WINDOWS && GTEST_HAS_PTHREAD ||
-        // GTEST_HAS_MUTEX_AND_THREAD_LOCAL_
-
-#if GTEST_HAS_MUTEX_AND_THREAD_LOCAL_
-// Mutex and ThreadLocal have already been imported into the namespace.
-// Nothing to do here.
-
-#elif GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_PHONE && !GTEST_OS_WINDOWS_RT
-
-// Mutex implements mutex on Windows platforms.  It is used in conjunction
-// with class MutexLock:
-//
-//   Mutex mutex;
-//   ...
-//   MutexLock lock(&mutex);  // Acquires the mutex and releases it at the
-//                            // end of the current scope.
-//
-// A static Mutex *must* be defined or declared using one of the following
-// macros:
-//   GTEST_DEFINE_STATIC_MUTEX_(g_some_mutex);
-//   GTEST_DECLARE_STATIC_MUTEX_(g_some_mutex);
-//
-// (A non-static Mutex is defined/declared in the usual way).
-class GTEST_API_ Mutex {
- public:
-  enum MutexType { kStatic = 0, kDynamic = 1 };
-  // We rely on kStaticMutex being 0 as it is to what the linker initializes
-  // type_ in static mutexes.  critical_section_ will be initialized lazily
-  // in ThreadSafeLazyInit().
-  enum StaticConstructorSelector { kStaticMutex = 0 };
-
-  // This constructor intentionally does nothing.  It relies on type_ being
-  // statically initialized to 0 (effectively setting it to kStatic) and on
-  // ThreadSafeLazyInit() to lazily initialize the rest of the members.
-  explicit Mutex(StaticConstructorSelector /*dummy*/) {}
-
-  Mutex();
-  ~Mutex();
-
-  void Lock();
-
-  void Unlock();
-
-  // Does nothing if the current thread holds the mutex. Otherwise, crashes
-  // with high probability.
-  void AssertHeld();
-
- private:
-  // Initializes owner_thread_id_ and critical_section_ in static mutexes.
-  void ThreadSafeLazyInit();
-
-  // Per https://blogs.msdn.microsoft.com/oldnewthing/20040223-00/?p=40503,
-  // we assume that 0 is an invalid value for thread IDs.
-  unsigned int owner_thread_id_;
-
-  // For static mutexes, we rely on these members being initialized to zeros
-  // by the linker.
-  MutexType type_;
-  long critical_section_init_phase_;  // NOLINT
-  GTEST_CRITICAL_SECTION* critical_section_;
-
-  Mutex(const Mutex&) = delete;
-  Mutex& operator=(const Mutex&) = delete;
-};
-
-#define GTEST_DECLARE_STATIC_MUTEX_(mutex) \
-  extern ::testing::internal::Mutex mutex
-
-#define GTEST_DEFINE_STATIC_MUTEX_(mutex) \
-  ::testing::internal::Mutex mutex(::testing::internal::Mutex::kStaticMutex)
-
-// We cannot name this class MutexLock because the ctor declaration would
-// conflict with a macro named MutexLock, which is defined on some
-// platforms. That macro is used as a defensive measure to prevent against
-// inadvertent misuses of MutexLock like "MutexLock(&mu)" rather than
-// "MutexLock l(&mu)".  Hence the typedef trick below.
-class GTestMutexLock {
- public:
-  explicit GTestMutexLock(Mutex* mutex) : mutex_(mutex) { mutex_->Lock(); }
-
-  ~GTestMutexLock() { mutex_->Unlock(); }
-
- private:
-  Mutex* const mutex_;
-
-  GTestMutexLock(const GTestMutexLock&) = delete;
-  GTestMutexLock& operator=(const GTestMutexLock&) = delete;
-};
-
-typedef GTestMutexLock MutexLock;
-
-// Base class for ValueHolder<T>.  Allows a caller to hold and delete a value
-// without knowing its type.
-class ThreadLocalValueHolderBase {
- public:
-  virtual ~ThreadLocalValueHolderBase() {}
-};
-
-// Provides a way for a thread to send notifications to a ThreadLocal
-// regardless of its parameter type.
-class ThreadLocalBase {
- public:
-  // Creates a new ValueHolder<T> object holding a default value passed to
-  // this ThreadLocal<T>'s constructor and returns it.  It is the caller's
-  // responsibility not to call this when the ThreadLocal<T> instance already
-  // has a value on the current thread.
-  virtual ThreadLocalValueHolderBase* NewValueForCurrentThread() const = 0;
-
- protected:
-  ThreadLocalBase() {}
-  virtual ~ThreadLocalBase() {}
-
- private:
-  ThreadLocalBase(const ThreadLocalBase&) = delete;
-  ThreadLocalBase& operator=(const ThreadLocalBase&) = delete;
-};
-
-// Maps a thread to a set of ThreadLocals that have values instantiated on that
-// thread and notifies them when the thread exits.  A ThreadLocal instance is
-// expected to persist until all threads it has values on have terminated.
-class GTEST_API_ ThreadLocalRegistry {
- public:
-  // Registers thread_local_instance as having value on the current thread.
-  // Returns a value that can be used to identify the thread from other threads.
-  static ThreadLocalValueHolderBase* GetValueOnCurrentThread(
-      const ThreadLocalBase* thread_local_instance);
-
-  // Invoked when a ThreadLocal instance is destroyed.
-  static void OnThreadLocalDestroyed(
-      const ThreadLocalBase* thread_local_instance);
-};
-
-class GTEST_API_ ThreadWithParamBase {
- public:
-  void Join();
-
- protected:
-  class Runnable {
-   public:
-    virtual ~Runnable() {}
-    virtual void Run() = 0;
-  };
-
-  ThreadWithParamBase(Runnable* runnable, Notification* thread_can_start);
-  virtual ~ThreadWithParamBase();
-
- private:
-  AutoHandle thread_;
-};
-
-// Helper class for testing Google Test's multi-threading constructs.
-template <typename T>
-class ThreadWithParam : public ThreadWithParamBase {
- public:
-  typedef void UserThreadFunc(T);
-
-  ThreadWithParam(UserThreadFunc* func, T param, Notification* thread_can_start)
-      : ThreadWithParamBase(new RunnableImpl(func, param), thread_can_start) {}
-  virtual ~ThreadWithParam() {}
-
- private:
-  class RunnableImpl : public Runnable {
-   public:
-    RunnableImpl(UserThreadFunc* func, T param) : func_(func), param_(param) {}
-    virtual ~RunnableImpl() {}
-    virtual void Run() { func_(param_); }
-
-   private:
-    UserThreadFunc* const func_;
-    const T param_;
-
-    RunnableImpl(const RunnableImpl&) = delete;
-    RunnableImpl& operator=(const RunnableImpl&) = delete;
-  };
-
-  ThreadWithParam(const ThreadWithParam&) = delete;
-  ThreadWithParam& operator=(const ThreadWithParam&) = delete;
-};
-
-// Implements thread-local storage on Windows systems.
-//
-//   // Thread 1
-//   ThreadLocal<int> tl(100);  // 100 is the default value for each thread.
-//
-//   // Thread 2
-//   tl.set(150);  // Changes the value for thread 2 only.
-//   EXPECT_EQ(150, tl.get());
-//
-//   // Thread 1
-//   EXPECT_EQ(100, tl.get());  // In thread 1, tl has the original value.
-//   tl.set(200);
-//   EXPECT_EQ(200, tl.get());
-//
-// The template type argument T must have a public copy constructor.
-// In addition, the default ThreadLocal constructor requires T to have
-// a public default constructor.
-//
-// The users of a TheadLocal instance have to make sure that all but one
-// threads (including the main one) using that instance have exited before
-// destroying it. Otherwise, the per-thread objects managed for them by the
-// ThreadLocal instance are not guaranteed to be destroyed on all platforms.
-//
-// Google Test only uses global ThreadLocal objects.  That means they
-// will die after main() has returned.  Therefore, no per-thread
-// object managed by Google Test will be leaked as long as all threads
-// using Google Test have exited when main() returns.
-template <typename T>
-class ThreadLocal : public ThreadLocalBase {
- public:
-  ThreadLocal() : default_factory_(new DefaultValueHolderFactory()) {}
-  explicit ThreadLocal(const T& value)
-      : default_factory_(new InstanceValueHolderFactory(value)) {}
-
-  ~ThreadLocal() override { ThreadLocalRegistry::OnThreadLocalDestroyed(this); }
-
-  T* pointer() { return GetOrCreateValue(); }
-  const T* pointer() const { return GetOrCreateValue(); }
-  const T& get() const { return *pointer(); }
-  void set(const T& value) { *pointer() = value; }
-
- private:
-  // Holds a value of T.  Can be deleted via its base class without the caller
-  // knowing the type of T.
-  class ValueHolder : public ThreadLocalValueHolderBase {
-   public:
-    ValueHolder() : value_() {}
-    explicit ValueHolder(const T& value) : value_(value) {}
-
-    T* pointer() { return &value_; }
-
-   private:
-    T value_;
-    ValueHolder(const ValueHolder&) = delete;
-    ValueHolder& operator=(const ValueHolder&) = delete;
-  };
-
-  T* GetOrCreateValue() const {
-    return static_cast<ValueHolder*>(
-               ThreadLocalRegistry::GetValueOnCurrentThread(this))
-        ->pointer();
-  }
-
-  ThreadLocalValueHolderBase* NewValueForCurrentThread() const override {
-    return default_factory_->MakeNewHolder();
-  }
-
-  class ValueHolderFactory {
-   public:
-    ValueHolderFactory() {}
-    virtual ~ValueHolderFactory() {}
-    virtual ValueHolder* MakeNewHolder() const = 0;
-
-   private:
-    ValueHolderFactory(const ValueHolderFactory&) = delete;
-    ValueHolderFactory& operator=(const ValueHolderFactory&) = delete;
-  };
-
-  class DefaultValueHolderFactory : public ValueHolderFactory {
-   public:
-    DefaultValueHolderFactory() {}
-    ValueHolder* MakeNewHolder() const override { return new ValueHolder(); }
-
-   private:
-    DefaultValueHolderFactory(const DefaultValueHolderFactory&) = delete;
-    DefaultValueHolderFactory& operator=(const DefaultValueHolderFactory&) =
-        delete;
-  };
-
-  class InstanceValueHolderFactory : public ValueHolderFactory {
-   public:
-    explicit InstanceValueHolderFactory(const T& value) : value_(value) {}
-    ValueHolder* MakeNewHolder() const override {
-      return new ValueHolder(value_);
-    }
-
-   private:
-    const T value_;  // The value for each thread.
-
-    InstanceValueHolderFactory(const InstanceValueHolderFactory&) = delete;
-    InstanceValueHolderFactory& operator=(const InstanceValueHolderFactory&) =
-        delete;
-  };
-
-  std::unique_ptr<ValueHolderFactory> default_factory_;
-
-  ThreadLocal(const ThreadLocal&) = delete;
-  ThreadLocal& operator=(const ThreadLocal&) = delete;
-};
-
-#elif GTEST_HAS_PTHREAD
-
-// MutexBase and Mutex implement mutex on pthreads-based platforms.
-class MutexBase {
- public:
-  // Acquires this mutex.
-  void Lock() {
-    GTEST_CHECK_POSIX_SUCCESS_(pthread_mutex_lock(&mutex_));
-    owner_ = pthread_self();
-    has_owner_ = true;
-  }
-
-  // Releases this mutex.
-  void Unlock() {
-    // Since the lock is being released the owner_ field should no longer be
-    // considered valid. We don't protect writing to has_owner_ here, as it's
-    // the caller's responsibility to ensure that the current thread holds the
-    // mutex when this is called.
-    has_owner_ = false;
-    GTEST_CHECK_POSIX_SUCCESS_(pthread_mutex_unlock(&mutex_));
-  }
-
-  // Does nothing if the current thread holds the mutex. Otherwise, crashes
-  // with high probability.
-  void AssertHeld() const {
-    GTEST_CHECK_(has_owner_ && pthread_equal(owner_, pthread_self()))
-        << "The current thread is not holding the mutex @" << this;
-  }
-
-  // A static mutex may be used before main() is entered.  It may even
-  // be used before the dynamic initialization stage.  Therefore we
-  // must be able to initialize a static mutex object at link time.
-  // This means MutexBase has to be a POD and its member variables
-  // have to be public.
- public:
-  pthread_mutex_t mutex_;  // The underlying pthread mutex.
-  // has_owner_ indicates whether the owner_ field below contains a valid thread
-  // ID and is therefore safe to inspect (e.g., to use in pthread_equal()). All
-  // accesses to the owner_ field should be protected by a check of this field.
-  // An alternative might be to memset() owner_ to all zeros, but there's no
-  // guarantee that a zero'd pthread_t is necessarily invalid or even different
-  // from pthread_self().
-  bool has_owner_;
-  pthread_t owner_;  // The thread holding the mutex.
-};
-
-// Forward-declares a static mutex.
-#define GTEST_DECLARE_STATIC_MUTEX_(mutex) \
-  extern ::testing::internal::MutexBase mutex
-
-// Defines and statically (i.e. at link time) initializes a static mutex.
-// The initialization list here does not explicitly initialize each field,
-// instead relying on default initialization for the unspecified fields. In
-// particular, the owner_ field (a pthread_t) is not explicitly initialized.
-// This allows initialization to work whether pthread_t is a scalar or struct.
-// The flag -Wmissing-field-initializers must not be specified for this to work.
-#define GTEST_DEFINE_STATIC_MUTEX_(mutex) \
-  ::testing::internal::MutexBase mutex = {PTHREAD_MUTEX_INITIALIZER, false, 0}
-
-// The Mutex class can only be used for mutexes created at runtime. It
-// shares its API with MutexBase otherwise.
-class Mutex : public MutexBase {
- public:
-  Mutex() {
-    GTEST_CHECK_POSIX_SUCCESS_(pthread_mutex_init(&mutex_, nullptr));
-    has_owner_ = false;
-  }
-  ~Mutex() { GTEST_CHECK_POSIX_SUCCESS_(pthread_mutex_destroy(&mutex_)); }
-
- private:
-  Mutex(const Mutex&) = delete;
-  Mutex& operator=(const Mutex&) = delete;
-};
-
-// We cannot name this class MutexLock because the ctor declaration would
-// conflict with a macro named MutexLock, which is defined on some
-// platforms. That macro is used as a defensive measure to prevent against
-// inadvertent misuses of MutexLock like "MutexLock(&mu)" rather than
-// "MutexLock l(&mu)".  Hence the typedef trick below.
-class GTestMutexLock {
- public:
-  explicit GTestMutexLock(MutexBase* mutex) : mutex_(mutex) { mutex_->Lock(); }
-
-  ~GTestMutexLock() { mutex_->Unlock(); }
-
- private:
-  MutexBase* const mutex_;
-
-  GTestMutexLock(const GTestMutexLock&) = delete;
-  GTestMutexLock& operator=(const GTestMutexLock&) = delete;
-};
-
-typedef GTestMutexLock MutexLock;
-
-// Helpers for ThreadLocal.
-
-// pthread_key_create() requires DeleteThreadLocalValue() to have
-// C-linkage.  Therefore it cannot be templatized to access
-// ThreadLocal<T>.  Hence the need for class
-// ThreadLocalValueHolderBase.
-class GTEST_API_ ThreadLocalValueHolderBase {
- public:
-  virtual ~ThreadLocalValueHolderBase() {}
-};
-
-// Called by pthread to delete thread-local data stored by
-// pthread_setspecific().
-extern "C" inline void DeleteThreadLocalValue(void* value_holder) {
-  delete static_cast<ThreadLocalValueHolderBase*>(value_holder);
-}
-
-// Implements thread-local storage on pthreads-based systems.
-template <typename T>
-class GTEST_API_ ThreadLocal {
- public:
-  ThreadLocal()
-      : key_(CreateKey()), default_factory_(new DefaultValueHolderFactory()) {}
-  explicit ThreadLocal(const T& value)
-      : key_(CreateKey()),
-        default_factory_(new InstanceValueHolderFactory(value)) {}
-
-  ~ThreadLocal() {
-    // Destroys the managed object for the current thread, if any.
-    DeleteThreadLocalValue(pthread_getspecific(key_));
-
-    // Releases resources associated with the key.  This will *not*
-    // delete managed objects for other threads.
-    GTEST_CHECK_POSIX_SUCCESS_(pthread_key_delete(key_));
-  }
-
-  T* pointer() { return GetOrCreateValue(); }
-  const T* pointer() const { return GetOrCreateValue(); }
-  const T& get() const { return *pointer(); }
-  void set(const T& value) { *pointer() = value; }
-
- private:
-  // Holds a value of type T.
-  class ValueHolder : public ThreadLocalValueHolderBase {
-   public:
-    ValueHolder() : value_() {}
-    explicit ValueHolder(const T& value) : value_(value) {}
-
-    T* pointer() { return &value_; }
-
-   private:
-    T value_;
-    ValueHolder(const ValueHolder&) = delete;
-    ValueHolder& operator=(const ValueHolder&) = delete;
-  };
-
-  static pthread_key_t CreateKey() {
-    pthread_key_t key;
-    // When a thread exits, DeleteThreadLocalValue() will be called on
-    // the object managed for that thread.
-    GTEST_CHECK_POSIX_SUCCESS_(
-        pthread_key_create(&key, &DeleteThreadLocalValue));
-    return key;
-  }
-
-  T* GetOrCreateValue() const {
-    ThreadLocalValueHolderBase* const holder =
-        static_cast<ThreadLocalValueHolderBase*>(pthread_getspecific(key_));
-    if (holder != nullptr) {
-      return CheckedDowncastToActualType<ValueHolder>(holder)->pointer();
-    }
-
-    ValueHolder* const new_holder = default_factory_->MakeNewHolder();
-    ThreadLocalValueHolderBase* const holder_base = new_holder;
-    GTEST_CHECK_POSIX_SUCCESS_(pthread_setspecific(key_, holder_base));
-    return new_holder->pointer();
-  }
-
-  class ValueHolderFactory {
-   public:
-    ValueHolderFactory() {}
-    virtual ~ValueHolderFactory() {}
-    virtual ValueHolder* MakeNewHolder() const = 0;
-
-   private:
-    ValueHolderFactory(const ValueHolderFactory&) = delete;
-    ValueHolderFactory& operator=(const ValueHolderFactory&) = delete;
-  };
-
-  class DefaultValueHolderFactory : public ValueHolderFactory {
-   public:
-    DefaultValueHolderFactory() {}
-    ValueHolder* MakeNewHolder() const override { return new ValueHolder(); }
-
-   private:
-    DefaultValueHolderFactory(const DefaultValueHolderFactory&) = delete;
-    DefaultValueHolderFactory& operator=(const DefaultValueHolderFactory&) =
-        delete;
-  };
-
-  class InstanceValueHolderFactory : public ValueHolderFactory {
-   public:
-    explicit InstanceValueHolderFactory(const T& value) : value_(value) {}
-    ValueHolder* MakeNewHolder() const override {
-      return new ValueHolder(value_);
-    }
-
-   private:
-    const T value_;  // The value for each thread.
-
-    InstanceValueHolderFactory(const InstanceValueHolderFactory&) = delete;
-    InstanceValueHolderFactory& operator=(const InstanceValueHolderFactory&) =
-        delete;
-  };
-
-  // A key pthreads uses for looking up per-thread values.
-  const pthread_key_t key_;
-  std::unique_ptr<ValueHolderFactory> default_factory_;
-
-  ThreadLocal(const ThreadLocal&) = delete;
-  ThreadLocal& operator=(const ThreadLocal&) = delete;
-};
-
-#endif  // GTEST_HAS_MUTEX_AND_THREAD_LOCAL_
-
-#else  // GTEST_IS_THREADSAFE
-
-// A dummy implementation of synchronization primitives (mutex, lock,
-// and thread-local variable).  Necessary for compiling Google Test where
-// mutex is not supported - using Google Test in multiple threads is not
-// supported on such platforms.
-
-class Mutex {
- public:
-  Mutex() {}
-  void Lock() {}
-  void Unlock() {}
-  void AssertHeld() const {}
-};
-
-#define GTEST_DECLARE_STATIC_MUTEX_(mutex) \
-  extern ::testing::internal::Mutex mutex
-
-#define GTEST_DEFINE_STATIC_MUTEX_(mutex) ::testing::internal::Mutex mutex
-
-// We cannot name this class MutexLock because the ctor declaration would
-// conflict with a macro named MutexLock, which is defined on some
-// platforms. That macro is used as a defensive measure to prevent against
-// inadvertent misuses of MutexLock like "MutexLock(&mu)" rather than
-// "MutexLock l(&mu)".  Hence the typedef trick below.
-class GTestMutexLock {
- public:
-  explicit GTestMutexLock(Mutex*) {}  // NOLINT
-};
-
-typedef GTestMutexLock MutexLock;
-
-template <typename T>
-class GTEST_API_ ThreadLocal {
- public:
-  ThreadLocal() : value_() {}
-  explicit ThreadLocal(const T& value) : value_(value) {}
-  T* pointer() { return &value_; }
-  const T* pointer() const { return &value_; }
-  const T& get() const { return value_; }
-  void set(const T& value) { value_ = value; }
-
- private:
-  T value_;
-};
-
-#endif  // GTEST_IS_THREADSAFE
-
-// Returns the number of threads running in the process, or 0 to indicate that
-// we cannot detect it.
-GTEST_API_ size_t GetThreadCount();
-
-#if GTEST_OS_WINDOWS
-#define GTEST_PATH_SEP_ "\\"
-#define GTEST_HAS_ALT_PATH_SEP_ 1
-#else
-#define GTEST_PATH_SEP_ "/"
-#define GTEST_HAS_ALT_PATH_SEP_ 0
-#endif  // GTEST_OS_WINDOWS
-
-// Utilities for char.
-
-// isspace(int ch) and friends accept an unsigned char or EOF.  char
-// may be signed, depending on the compiler (or compiler flags).
-// Therefore we need to cast a char to unsigned char before calling
-// isspace(), etc.
-
-inline bool IsAlpha(char ch) {
-  return isalpha(static_cast<unsigned char>(ch)) != 0;
-}
-inline bool IsAlNum(char ch) {
-  return isalnum(static_cast<unsigned char>(ch)) != 0;
-}
-inline bool IsDigit(char ch) {
-  return isdigit(static_cast<unsigned char>(ch)) != 0;
-}
-inline bool IsLower(char ch) {
-  return islower(static_cast<unsigned char>(ch)) != 0;
-}
-inline bool IsSpace(char ch) {
-  return isspace(static_cast<unsigned char>(ch)) != 0;
-}
-inline bool IsUpper(char ch) {
-  return isupper(static_cast<unsigned char>(ch)) != 0;
-}
-inline bool IsXDigit(char ch) {
-  return isxdigit(static_cast<unsigned char>(ch)) != 0;
-}
-#ifdef __cpp_char8_t
-inline bool IsXDigit(char8_t ch) {
-  return isxdigit(static_cast<unsigned char>(ch)) != 0;
-}
-#endif
-inline bool IsXDigit(char16_t ch) {
-  const unsigned char low_byte = static_cast<unsigned char>(ch);
-  return ch == low_byte && isxdigit(low_byte) != 0;
-}
-inline bool IsXDigit(char32_t ch) {
-  const unsigned char low_byte = static_cast<unsigned char>(ch);
-  return ch == low_byte && isxdigit(low_byte) != 0;
-}
-inline bool IsXDigit(wchar_t ch) {
-  const unsigned char low_byte = static_cast<unsigned char>(ch);
-  return ch == low_byte && isxdigit(low_byte) != 0;
-}
-
-inline char ToLower(char ch) {
-  return static_cast<char>(tolower(static_cast<unsigned char>(ch)));
-}
-inline char ToUpper(char ch) {
-  return static_cast<char>(toupper(static_cast<unsigned char>(ch)));
-}
-
-inline std::string StripTrailingSpaces(std::string str) {
-  std::string::iterator it = str.end();
-  while (it != str.begin() && IsSpace(*--it)) it = str.erase(it);
-  return str;
-}
-
-// The testing::internal::posix namespace holds wrappers for common
-// POSIX functions.  These wrappers hide the differences between
-// Windows/MSVC and POSIX systems.  Since some compilers define these
-// standard functions as macros, the wrapper cannot have the same name
-// as the wrapped function.
-
-namespace posix {
-
-// File system porting.
-#if GTEST_HAS_FILE_SYSTEM
-#if GTEST_OS_WINDOWS
-
-typedef struct _stat StatStruct;
-
-#if GTEST_OS_WINDOWS_MOBILE
-inline int FileNo(FILE* file) { return reinterpret_cast<int>(_fileno(file)); }
-// Stat(), RmDir(), and IsDir() are not needed on Windows CE at this
-// time and thus not defined there.
-#else
-inline int FileNo(FILE* file) { return _fileno(file); }
-inline int Stat(const char* path, StatStruct* buf) { return _stat(path, buf); }
-inline int RmDir(const char* dir) { return _rmdir(dir); }
-inline bool IsDir(const StatStruct& st) { return (_S_IFDIR & st.st_mode) != 0; }
-#endif  // GTEST_OS_WINDOWS_MOBILE
-
-#elif GTEST_OS_ESP8266
-typedef struct stat StatStruct;
-
-inline int FileNo(FILE* file) { return fileno(file); }
-inline int Stat(const char* path, StatStruct* buf) {
-  // stat function not implemented on ESP8266
-  return 0;
-}
-inline int RmDir(const char* dir) { return rmdir(dir); }
-inline bool IsDir(const StatStruct& st) { return S_ISDIR(st.st_mode); }
-
-#else
-
-typedef struct stat StatStruct;
-
-inline int FileNo(FILE* file) { return fileno(file); }
-inline int Stat(const char* path, StatStruct* buf) { return stat(path, buf); }
-#if GTEST_OS_QURT
-// QuRT doesn't support any directory functions, including rmdir
-inline int RmDir(const char*) { return 0; }
-#else
-inline int RmDir(const char* dir) { return rmdir(dir); }
-#endif
-inline bool IsDir(const StatStruct& st) { return S_ISDIR(st.st_mode); }
-
-#endif  // GTEST_OS_WINDOWS
-#endif  // GTEST_HAS_FILE_SYSTEM
-
-// Other functions with a different name on Windows.
-
-#if GTEST_OS_WINDOWS
-
-#ifdef __BORLANDC__
-inline int DoIsATTY(int fd) { return isatty(fd); }
-inline int StrCaseCmp(const char* s1, const char* s2) {
-  return stricmp(s1, s2);
-}
-inline char* StrDup(const char* src) { return strdup(src); }
-#else  // !__BORLANDC__
-#if GTEST_OS_WINDOWS_MOBILE || GTEST_OS_ZOS || GTEST_OS_IOS || \
-    GTEST_OS_WINDOWS_PHONE || GTEST_OS_WINDOWS_RT || defined(ESP_PLATFORM)
-inline int DoIsATTY(int /* fd */) { return 0; }
-#else
-inline int DoIsATTY(int fd) { return _isatty(fd); }
-#endif  // GTEST_OS_WINDOWS_MOBILE
-inline int StrCaseCmp(const char* s1, const char* s2) {
-  return _stricmp(s1, s2);
-}
-inline char* StrDup(const char* src) { return _strdup(src); }
-#endif  // __BORLANDC__
-
-#elif GTEST_OS_ESP8266
-
-inline int DoIsATTY(int fd) { return isatty(fd); }
-inline int StrCaseCmp(const char* s1, const char* s2) {
-  return strcasecmp(s1, s2);
-}
-inline char* StrDup(const char* src) { return strdup(src); }
-
-#else
-
-inline int DoIsATTY(int fd) { return isatty(fd); }
-inline int StrCaseCmp(const char* s1, const char* s2) {
-  return strcasecmp(s1, s2);
-}
-inline char* StrDup(const char* src) { return strdup(src); }
-
-#endif  // GTEST_OS_WINDOWS
-
-inline int IsATTY(int fd) {
-  // DoIsATTY might change errno (for example ENOTTY in case you redirect stdout
-  // to a file on Linux), which is unexpected, so save the previous value, and
-  // restore it after the call.
-  int savedErrno = errno;
-  int isAttyValue = DoIsATTY(fd);
-  errno = savedErrno;
-
-  return isAttyValue;
-}
-
-// Functions deprecated by MSVC 8.0.
-
-GTEST_DISABLE_MSC_DEPRECATED_PUSH_()
-
-// ChDir(), FReopen(), FDOpen(), Read(), Write(), Close(), and
-// StrError() aren't needed on Windows CE at this time and thus not
-// defined there.
-#if GTEST_HAS_FILE_SYSTEM
-#if !GTEST_OS_WINDOWS_MOBILE && !GTEST_OS_WINDOWS_PHONE &&           \
-    !GTEST_OS_WINDOWS_RT && !GTEST_OS_ESP8266 && !GTEST_OS_XTENSA && \
-    !GTEST_OS_QURT
-inline int ChDir(const char* dir) { return chdir(dir); }
-#endif
-inline FILE* FOpen(const char* path, const char* mode) {
-#if GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_MINGW
-  struct wchar_codecvt : public std::codecvt<wchar_t, char, std::mbstate_t> {};
-  std::wstring_convert<wchar_codecvt> converter;
-  std::wstring wide_path = converter.from_bytes(path);
-  std::wstring wide_mode = converter.from_bytes(mode);
-  return _wfopen(wide_path.c_str(), wide_mode.c_str());
-#else   // GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_MINGW
-  return fopen(path, mode);
-#endif  // GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_MINGW
-}
-#if !GTEST_OS_WINDOWS_MOBILE && !GTEST_OS_QURT
-inline FILE* FReopen(const char* path, const char* mode, FILE* stream) {
-  return freopen(path, mode, stream);
-}
-inline FILE* FDOpen(int fd, const char* mode) { return fdopen(fd, mode); }
-#endif  // !GTEST_OS_WINDOWS_MOBILE && !GTEST_OS_QURT
-inline int FClose(FILE* fp) { return fclose(fp); }
-#if !GTEST_OS_WINDOWS_MOBILE && !GTEST_OS_QURT
-inline int Read(int fd, void* buf, unsigned int count) {
-  return static_cast<int>(read(fd, buf, count));
-}
-inline int Write(int fd, const void* buf, unsigned int count) {
-  return static_cast<int>(write(fd, buf, count));
-}
-inline int Close(int fd) { return close(fd); }
-#endif  // !GTEST_OS_WINDOWS_MOBILE && !GTEST_OS_QURT
-#endif  // GTEST_HAS_FILE_SYSTEM
-
-#if !GTEST_OS_WINDOWS_MOBILE && !GTEST_OS_QURT
-inline const char* StrError(int errnum) { return strerror(errnum); }
-#endif  // !GTEST_OS_WINDOWS_MOBILE && !GTEST_OS_QURT
-
-inline const char* GetEnv(const char* name) {
-#if GTEST_OS_WINDOWS_MOBILE || GTEST_OS_WINDOWS_PHONE ||          \
-    GTEST_OS_WINDOWS_RT || GTEST_OS_ESP8266 || GTEST_OS_XTENSA || \
-    GTEST_OS_QURT
-  // We are on an embedded platform, which has no environment variables.
-  static_cast<void>(name);  // To prevent 'unused argument' warning.
-  return nullptr;
-#elif defined(__BORLANDC__) || defined(__SunOS_5_8) || defined(__SunOS_5_9)
-  // Environment variables which we programmatically clear will be set to the
-  // empty string rather than unset (NULL).  Handle that case.
-  const char* const env = getenv(name);
-  return (env != nullptr && env[0] != '\0') ? env : nullptr;
-#else
-  return getenv(name);
-#endif
-}
-
-GTEST_DISABLE_MSC_DEPRECATED_POP_()
-
-#if GTEST_OS_WINDOWS_MOBILE
-// Windows CE has no C library. The abort() function is used in
-// several places in Google Test. This implementation provides a reasonable
-// imitation of standard behaviour.
-[[noreturn]] void Abort();
-#else
-[[noreturn]] inline void Abort() { abort(); }
-#endif  // GTEST_OS_WINDOWS_MOBILE
-
-}  // namespace posix
-
-// MSVC "deprecates" snprintf and issues warnings wherever it is used.  In
-// order to avoid these warnings, we need to use _snprintf or _snprintf_s on
-// MSVC-based platforms.  We map the GTEST_SNPRINTF_ macro to the appropriate
-// function in order to achieve that.  We use macro definition here because
-// snprintf is a variadic function.
-#if defined(_MSC_VER) && !GTEST_OS_WINDOWS_MOBILE
-// MSVC 2005 and above support variadic macros.
-#define GTEST_SNPRINTF_(buffer, size, format, ...) \
-  _snprintf_s(buffer, size, size, format, __VA_ARGS__)
-#elif defined(_MSC_VER)
-// Windows CE does not define _snprintf_s
-#define GTEST_SNPRINTF_ _snprintf
-#else
-#define GTEST_SNPRINTF_ snprintf
-#endif
-
-// The biggest signed integer type the compiler supports.
-//
-// long long is guaranteed to be at least 64-bits in C++11.
-using BiggestInt = long long;  // NOLINT
-
-// The maximum number a BiggestInt can represent.
-constexpr BiggestInt kMaxBiggestInt = (std::numeric_limits<BiggestInt>::max)();
-
-// This template class serves as a compile-time function from size to
-// type.  It maps a size in bytes to a primitive type with that
-// size. e.g.
-//
-//   TypeWithSize<4>::UInt
-//
-// is typedef-ed to be unsigned int (unsigned integer made up of 4
-// bytes).
-//
-// Such functionality should belong to STL, but I cannot find it
-// there.
-//
-// Google Test uses this class in the implementation of floating-point
-// comparison.
-//
-// For now it only handles UInt (unsigned int) as that's all Google Test
-// needs.  Other types can be easily added in the future if need
-// arises.
-template <size_t size>
-class TypeWithSize {
- public:
-  // This prevents the user from using TypeWithSize<N> with incorrect
-  // values of N.
-  using UInt = void;
-};
-
-// The specialization for size 4.
-template <>
-class TypeWithSize<4> {
- public:
-  using Int = std::int32_t;
-  using UInt = std::uint32_t;
-};
-
-// The specialization for size 8.
-template <>
-class TypeWithSize<8> {
- public:
-  using Int = std::int64_t;
-  using UInt = std::uint64_t;
-};
-
-// Integer types of known sizes.
-using TimeInMillis = int64_t;  // Represents time in milliseconds.
-
-// Utilities for command line flags and environment variables.
-
-// Macro for referencing flags.
-#if !defined(GTEST_FLAG)
-#define GTEST_FLAG_NAME_(name) gtest_##name
-#define GTEST_FLAG(name) FLAGS_gtest_##name
-#endif  // !defined(GTEST_FLAG)
-
-// Pick a command line flags implementation.
-#if GTEST_HAS_ABSL
-
-// Macros for defining flags.
-#define GTEST_DEFINE_bool_(name, default_val, doc) \
-  ABSL_FLAG(bool, GTEST_FLAG_NAME_(name), default_val, doc)
-#define GTEST_DEFINE_int32_(name, default_val, doc) \
-  ABSL_FLAG(int32_t, GTEST_FLAG_NAME_(name), default_val, doc)
-#define GTEST_DEFINE_string_(name, default_val, doc) \
-  ABSL_FLAG(std::string, GTEST_FLAG_NAME_(name), default_val, doc)
-
-// Macros for declaring flags.
-#define GTEST_DECLARE_bool_(name) \
-  ABSL_DECLARE_FLAG(bool, GTEST_FLAG_NAME_(name))
-#define GTEST_DECLARE_int32_(name) \
-  ABSL_DECLARE_FLAG(int32_t, GTEST_FLAG_NAME_(name))
-#define GTEST_DECLARE_string_(name) \
-  ABSL_DECLARE_FLAG(std::string, GTEST_FLAG_NAME_(name))
-
-#define GTEST_FLAG_SAVER_ ::absl::FlagSaver
-
-#define GTEST_FLAG_GET(name) ::absl::GetFlag(GTEST_FLAG(name))
-#define GTEST_FLAG_SET(name, value) \
-  (void)(::absl::SetFlag(&GTEST_FLAG(name), value))
-#define GTEST_USE_OWN_FLAGFILE_FLAG_ 0
-
-#else  // GTEST_HAS_ABSL
-
-// Macros for defining flags.
-#define GTEST_DEFINE_bool_(name, default_val, doc)  \
-  namespace testing {                               \
-  GTEST_API_ bool GTEST_FLAG(name) = (default_val); \
-  }                                                 \
-  static_assert(true, "no-op to require trailing semicolon")
-#define GTEST_DEFINE_int32_(name, default_val, doc)         \
-  namespace testing {                                       \
-  GTEST_API_ std::int32_t GTEST_FLAG(name) = (default_val); \
-  }                                                         \
-  static_assert(true, "no-op to require trailing semicolon")
-#define GTEST_DEFINE_string_(name, default_val, doc)         \
-  namespace testing {                                        \
-  GTEST_API_ ::std::string GTEST_FLAG(name) = (default_val); \
-  }                                                          \
-  static_assert(true, "no-op to require trailing semicolon")
-
-// Macros for declaring flags.
-#define GTEST_DECLARE_bool_(name)          \
-  namespace testing {                      \
-  GTEST_API_ extern bool GTEST_FLAG(name); \
-  }                                        \
-  static_assert(true, "no-op to require trailing semicolon")
-#define GTEST_DECLARE_int32_(name)                 \
-  namespace testing {                              \
-  GTEST_API_ extern std::int32_t GTEST_FLAG(name); \
-  }                                                \
-  static_assert(true, "no-op to require trailing semicolon")
-#define GTEST_DECLARE_string_(name)                 \
-  namespace testing {                               \
-  GTEST_API_ extern ::std::string GTEST_FLAG(name); \
-  }                                                 \
-  static_assert(true, "no-op to require trailing semicolon")
-
-#define GTEST_FLAG_SAVER_ ::testing::internal::GTestFlagSaver
-
-#define GTEST_FLAG_GET(name) ::testing::GTEST_FLAG(name)
-#define GTEST_FLAG_SET(name, value) (void)(::testing::GTEST_FLAG(name) = value)
-#define GTEST_USE_OWN_FLAGFILE_FLAG_ 1
-
-#endif  // GTEST_HAS_ABSL
-
-// Thread annotations
-#if !defined(GTEST_EXCLUSIVE_LOCK_REQUIRED_)
-#define GTEST_EXCLUSIVE_LOCK_REQUIRED_(locks)
-#define GTEST_LOCK_EXCLUDED_(locks)
-#endif  // !defined(GTEST_EXCLUSIVE_LOCK_REQUIRED_)
-
-// Parses 'str' for a 32-bit signed integer.  If successful, writes the result
-// to *value and returns true; otherwise leaves *value unchanged and returns
-// false.
-GTEST_API_ bool ParseInt32(const Message& src_text, const char* str,
-                           int32_t* value);
-
-// Parses a bool/int32_t/string from the environment variable
-// corresponding to the given Google Test flag.
-bool BoolFromGTestEnv(const char* flag, bool default_val);
-GTEST_API_ int32_t Int32FromGTestEnv(const char* flag, int32_t default_val);
-std::string OutputFlagAlsoCheckEnvVar();
-const char* StringFromGTestEnv(const char* flag, const char* default_val);
-
-}  // namespace internal
-}  // namespace testing
-
-#if !defined(GTEST_INTERNAL_DEPRECATED)
-
-// Internal Macro to mark an API deprecated, for googletest usage only
-// Usage: class GTEST_INTERNAL_DEPRECATED(message) MyClass or
-// GTEST_INTERNAL_DEPRECATED(message) <return_type> myFunction(); Every usage of
-// a deprecated entity will trigger a warning when compiled with
-// `-Wdeprecated-declarations` option (clang, gcc, any __GNUC__ compiler).
-// For msvc /W3 option will need to be used
-// Note that for 'other' compilers this macro evaluates to nothing to prevent
-// compilations errors.
-#if defined(_MSC_VER)
-#define GTEST_INTERNAL_DEPRECATED(message) __declspec(deprecated(message))
-#elif defined(__GNUC__)
-#define GTEST_INTERNAL_DEPRECATED(message) __attribute__((deprecated(message)))
-#else
-#define GTEST_INTERNAL_DEPRECATED(message)
-#endif
-
-#endif  // !defined(GTEST_INTERNAL_DEPRECATED)
-
-#if GTEST_HAS_ABSL
-// Always use absl::any for UniversalPrinter<> specializations if googletest
-// is built with absl support.
-#define GTEST_INTERNAL_HAS_ANY 1
-#include "absl/types/any.h"
-namespace testing {
-namespace internal {
-using Any = ::absl::any;
-}  // namespace internal
-}  // namespace testing
-#else
-#ifdef __has_include
-#if __has_include(<any>) && __cplusplus >= 201703L
-// Otherwise for C++17 and higher use std::any for UniversalPrinter<>
-// specializations.
-#define GTEST_INTERNAL_HAS_ANY 1
-#include <any>
-namespace testing {
-namespace internal {
-using Any = ::std::any;
-}  // namespace internal
-}  // namespace testing
-// The case where absl is configured NOT to alias std::any is not
-// supported.
-#endif  // __has_include(<any>) && __cplusplus >= 201703L
-#endif  // __has_include
-#endif  // GTEST_HAS_ABSL
-
-#if GTEST_HAS_ABSL
-// Always use absl::optional for UniversalPrinter<> specializations if
-// googletest is built with absl support.
-#define GTEST_INTERNAL_HAS_OPTIONAL 1
-#include "absl/types/optional.h"
-namespace testing {
-namespace internal {
-template <typename T>
-using Optional = ::absl::optional<T>;
-inline ::absl::nullopt_t Nullopt() { return ::absl::nullopt; }
-}  // namespace internal
-}  // namespace testing
-#else
-#ifdef __has_include
-#if __has_include(<optional>) && __cplusplus >= 201703L
-// Otherwise for C++17 and higher use std::optional for UniversalPrinter<>
-// specializations.
-#define GTEST_INTERNAL_HAS_OPTIONAL 1
-#include <optional>
-namespace testing {
-namespace internal {
-template <typename T>
-using Optional = ::std::optional<T>;
-inline ::std::nullopt_t Nullopt() { return ::std::nullopt; }
-}  // namespace internal
-}  // namespace testing
-// The case where absl is configured NOT to alias std::optional is not
-// supported.
-#endif  // __has_include(<optional>) && __cplusplus >= 201703L
-#endif  // __has_include
-#endif  // GTEST_HAS_ABSL
-
-#if GTEST_HAS_ABSL
-// Always use absl::string_view for Matcher<> specializations if googletest
-// is built with absl support.
-#define GTEST_INTERNAL_HAS_STRING_VIEW 1
-#include "absl/strings/string_view.h"
-namespace testing {
-namespace internal {
-using StringView = ::absl::string_view;
-}  // namespace internal
-}  // namespace testing
-#else
-#ifdef __has_include
-#if __has_include(<string_view>) && __cplusplus >= 201703L
-// Otherwise for C++17 and higher use std::string_view for Matcher<>
-// specializations.
-#define GTEST_INTERNAL_HAS_STRING_VIEW 1
-#include <string_view>
-namespace testing {
-namespace internal {
-using StringView = ::std::string_view;
-}  // namespace internal
-}  // namespace testing
-// The case where absl is configured NOT to alias std::string_view is not
-// supported.
-#endif  // __has_include(<string_view>) && __cplusplus >= 201703L
-#endif  // __has_include
-#endif  // GTEST_HAS_ABSL
-
-#if GTEST_HAS_ABSL
-// Always use absl::variant for UniversalPrinter<> specializations if googletest
-// is built with absl support.
-#define GTEST_INTERNAL_HAS_VARIANT 1
-#include "absl/types/variant.h"
-namespace testing {
-namespace internal {
-template <typename... T>
-using Variant = ::absl::variant<T...>;
-}  // namespace internal
-}  // namespace testing
-#else
-#ifdef __has_include
-#if __has_include(<variant>) && __cplusplus >= 201703L
-// Otherwise for C++17 and higher use std::variant for UniversalPrinter<>
-// specializations.
-#define GTEST_INTERNAL_HAS_VARIANT 1
-#include <variant>
-namespace testing {
-namespace internal {
-template <typename... T>
-using Variant = ::std::variant<T...>;
-}  // namespace internal
-}  // namespace testing
-// The case where absl is configured NOT to alias std::variant is not supported.
-#endif  // __has_include(<variant>) && __cplusplus >= 201703L
-#endif  // __has_include
-#endif  // GTEST_HAS_ABSL
-
-#endif  // GOOGLETEST_INCLUDE_GTEST_INTERNAL_GTEST_PORT_H_
diff --git a/3rdparty/googletest-1.13.0/googletest/include/gtest/internal/gtest-string.h b/3rdparty/googletest-1.13.0/googletest/include/gtest/internal/gtest-string.h
deleted file mode 100644
index cc0dd7529c165447c9171561092c3ce15d403390..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/include/gtest/internal/gtest-string.h
+++ /dev/null
@@ -1,178 +0,0 @@
-// Copyright 2005, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// The Google C++ Testing and Mocking Framework (Google Test)
-//
-// This header file declares the String class and functions used internally by
-// Google Test.  They are subject to change without notice. They should not used
-// by code external to Google Test.
-//
-// This header file is #included by gtest-internal.h.
-// It should not be #included by other files.
-
-// IWYU pragma: private, include "gtest/gtest.h"
-// IWYU pragma: friend gtest/.*
-// IWYU pragma: friend gmock/.*
-
-#ifndef GOOGLETEST_INCLUDE_GTEST_INTERNAL_GTEST_STRING_H_
-#define GOOGLETEST_INCLUDE_GTEST_INTERNAL_GTEST_STRING_H_
-
-#ifdef __BORLANDC__
-// string.h is not guaranteed to provide strcpy on C++ Builder.
-#include <mem.h>
-#endif
-
-#include <string.h>
-
-#include <cstdint>
-#include <sstream>
-#include <string>
-
-#include "gtest/internal/gtest-port.h"
-
-namespace testing {
-namespace internal {
-
-// String - an abstract class holding static string utilities.
-class GTEST_API_ String {
- public:
-  // Static utility methods
-
-  // Clones a 0-terminated C string, allocating memory using new.  The
-  // caller is responsible for deleting the return value using
-  // delete[].  Returns the cloned string, or NULL if the input is
-  // NULL.
-  //
-  // This is different from strdup() in string.h, which allocates
-  // memory using malloc().
-  static const char* CloneCString(const char* c_str);
-
-#if GTEST_OS_WINDOWS_MOBILE
-  // Windows CE does not have the 'ANSI' versions of Win32 APIs. To be
-  // able to pass strings to Win32 APIs on CE we need to convert them
-  // to 'Unicode', UTF-16.
-
-  // Creates a UTF-16 wide string from the given ANSI string, allocating
-  // memory using new. The caller is responsible for deleting the return
-  // value using delete[]. Returns the wide string, or NULL if the
-  // input is NULL.
-  //
-  // The wide string is created using the ANSI codepage (CP_ACP) to
-  // match the behaviour of the ANSI versions of Win32 calls and the
-  // C runtime.
-  static LPCWSTR AnsiToUtf16(const char* c_str);
-
-  // Creates an ANSI string from the given wide string, allocating
-  // memory using new. The caller is responsible for deleting the return
-  // value using delete[]. Returns the ANSI string, or NULL if the
-  // input is NULL.
-  //
-  // The returned string is created using the ANSI codepage (CP_ACP) to
-  // match the behaviour of the ANSI versions of Win32 calls and the
-  // C runtime.
-  static const char* Utf16ToAnsi(LPCWSTR utf16_str);
-#endif
-
-  // Compares two C strings.  Returns true if and only if they have the same
-  // content.
-  //
-  // Unlike strcmp(), this function can handle NULL argument(s).  A
-  // NULL C string is considered different to any non-NULL C string,
-  // including the empty string.
-  static bool CStringEquals(const char* lhs, const char* rhs);
-
-  // Converts a wide C string to a String using the UTF-8 encoding.
-  // NULL will be converted to "(null)".  If an error occurred during
-  // the conversion, "(failed to convert from wide string)" is
-  // returned.
-  static std::string ShowWideCString(const wchar_t* wide_c_str);
-
-  // Compares two wide C strings.  Returns true if and only if they have the
-  // same content.
-  //
-  // Unlike wcscmp(), this function can handle NULL argument(s).  A
-  // NULL C string is considered different to any non-NULL C string,
-  // including the empty string.
-  static bool WideCStringEquals(const wchar_t* lhs, const wchar_t* rhs);
-
-  // Compares two C strings, ignoring case.  Returns true if and only if
-  // they have the same content.
-  //
-  // Unlike strcasecmp(), this function can handle NULL argument(s).
-  // A NULL C string is considered different to any non-NULL C string,
-  // including the empty string.
-  static bool CaseInsensitiveCStringEquals(const char* lhs, const char* rhs);
-
-  // Compares two wide C strings, ignoring case.  Returns true if and only if
-  // they have the same content.
-  //
-  // Unlike wcscasecmp(), this function can handle NULL argument(s).
-  // A NULL C string is considered different to any non-NULL wide C string,
-  // including the empty string.
-  // NB: The implementations on different platforms slightly differ.
-  // On windows, this method uses _wcsicmp which compares according to LC_CTYPE
-  // environment variable. On GNU platform this method uses wcscasecmp
-  // which compares according to LC_CTYPE category of the current locale.
-  // On MacOS X, it uses towlower, which also uses LC_CTYPE category of the
-  // current locale.
-  static bool CaseInsensitiveWideCStringEquals(const wchar_t* lhs,
-                                               const wchar_t* rhs);
-
-  // Returns true if and only if the given string ends with the given suffix,
-  // ignoring case. Any string is considered to end with an empty suffix.
-  static bool EndsWithCaseInsensitive(const std::string& str,
-                                      const std::string& suffix);
-
-  // Formats an int value as "%02d".
-  static std::string FormatIntWidth2(int value);  // "%02d" for width == 2
-
-  // Formats an int value to given width with leading zeros.
-  static std::string FormatIntWidthN(int value, int width);
-
-  // Formats an int value as "%X".
-  static std::string FormatHexInt(int value);
-
-  // Formats an int value as "%X".
-  static std::string FormatHexUInt32(uint32_t value);
-
-  // Formats a byte as "%02X".
-  static std::string FormatByte(unsigned char value);
-
- private:
-  String();  // Not meant to be instantiated.
-};           // class String
-
-// Gets the content of the stringstream's buffer as an std::string.  Each '\0'
-// character in the buffer is replaced with "\\0".
-GTEST_API_ std::string StringStreamToString(::std::stringstream* stream);
-
-}  // namespace internal
-}  // namespace testing
-
-#endif  // GOOGLETEST_INCLUDE_GTEST_INTERNAL_GTEST_STRING_H_
diff --git a/3rdparty/googletest-1.13.0/googletest/include/gtest/internal/gtest-type-util.h b/3rdparty/googletest-1.13.0/googletest/include/gtest/internal/gtest-type-util.h
deleted file mode 100644
index 17a470b62658a4e2742d268ed2d61353294b1c09..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/include/gtest/internal/gtest-type-util.h
+++ /dev/null
@@ -1,190 +0,0 @@
-// Copyright 2008 Google Inc.
-// All Rights Reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// Type utilities needed for implementing typed and type-parameterized
-// tests.
-
-// IWYU pragma: private, include "gtest/gtest.h"
-// IWYU pragma: friend gtest/.*
-// IWYU pragma: friend gmock/.*
-
-#ifndef GOOGLETEST_INCLUDE_GTEST_INTERNAL_GTEST_TYPE_UTIL_H_
-#define GOOGLETEST_INCLUDE_GTEST_INTERNAL_GTEST_TYPE_UTIL_H_
-
-#include <string>
-#include <type_traits>
-#include <typeinfo>
-
-#include "gtest/internal/gtest-port.h"
-
-// #ifdef __GNUC__ is too general here.  It is possible to use gcc without using
-// libstdc++ (which is where cxxabi.h comes from).
-#if GTEST_HAS_CXXABI_H_
-#include <cxxabi.h>
-#elif defined(__HP_aCC)
-#include <acxx_demangle.h>
-#endif  // GTEST_HASH_CXXABI_H_
-
-namespace testing {
-namespace internal {
-
-// Canonicalizes a given name with respect to the Standard C++ Library.
-// This handles removing the inline namespace within `std` that is
-// used by various standard libraries (e.g., `std::__1`).  Names outside
-// of namespace std are returned unmodified.
-inline std::string CanonicalizeForStdLibVersioning(std::string s) {
-  static const char prefix[] = "std::__";
-  if (s.compare(0, strlen(prefix), prefix) == 0) {
-    std::string::size_type end = s.find("::", strlen(prefix));
-    if (end != s.npos) {
-      // Erase everything between the initial `std` and the second `::`.
-      s.erase(strlen("std"), end - strlen("std"));
-    }
-  }
-  return s;
-}
-
-#if GTEST_HAS_RTTI
-// GetTypeName(const std::type_info&) returns a human-readable name of type T.
-inline std::string GetTypeName(const std::type_info& type) {
-  const char* const name = type.name();
-#if GTEST_HAS_CXXABI_H_ || defined(__HP_aCC)
-  int status = 0;
-  // gcc's implementation of typeid(T).name() mangles the type name,
-  // so we have to demangle it.
-#if GTEST_HAS_CXXABI_H_
-  using abi::__cxa_demangle;
-#endif  // GTEST_HAS_CXXABI_H_
-  char* const readable_name = __cxa_demangle(name, nullptr, nullptr, &status);
-  const std::string name_str(status == 0 ? readable_name : name);
-  free(readable_name);
-  return CanonicalizeForStdLibVersioning(name_str);
-#else
-  return name;
-#endif  // GTEST_HAS_CXXABI_H_ || __HP_aCC
-}
-#endif  // GTEST_HAS_RTTI
-
-// GetTypeName<T>() returns a human-readable name of type T if and only if
-// RTTI is enabled, otherwise it returns a dummy type name.
-// NB: This function is also used in Google Mock, so don't move it inside of
-// the typed-test-only section below.
-template <typename T>
-std::string GetTypeName() {
-#if GTEST_HAS_RTTI
-  return GetTypeName(typeid(T));
-#else
-  return "<type>";
-#endif  // GTEST_HAS_RTTI
-}
-
-// A unique type indicating an empty node
-struct None {};
-
-#define GTEST_TEMPLATE_ \
-  template <typename T> \
-  class
-
-// The template "selector" struct TemplateSel<Tmpl> is used to
-// represent Tmpl, which must be a class template with one type
-// parameter, as a type.  TemplateSel<Tmpl>::Bind<T>::type is defined
-// as the type Tmpl<T>.  This allows us to actually instantiate the
-// template "selected" by TemplateSel<Tmpl>.
-//
-// This trick is necessary for simulating typedef for class templates,
-// which C++ doesn't support directly.
-template <GTEST_TEMPLATE_ Tmpl>
-struct TemplateSel {
-  template <typename T>
-  struct Bind {
-    typedef Tmpl<T> type;
-  };
-};
-
-#define GTEST_BIND_(TmplSel, T) TmplSel::template Bind<T>::type
-
-template <GTEST_TEMPLATE_ Head_, GTEST_TEMPLATE_... Tail_>
-struct Templates {
-  using Head = TemplateSel<Head_>;
-  using Tail = Templates<Tail_...>;
-};
-
-template <GTEST_TEMPLATE_ Head_>
-struct Templates<Head_> {
-  using Head = TemplateSel<Head_>;
-  using Tail = None;
-};
-
-// Tuple-like type lists
-template <typename Head_, typename... Tail_>
-struct Types {
-  using Head = Head_;
-  using Tail = Types<Tail_...>;
-};
-
-template <typename Head_>
-struct Types<Head_> {
-  using Head = Head_;
-  using Tail = None;
-};
-
-// Helper metafunctions to tell apart a single type from types
-// generated by ::testing::Types
-template <typename... Ts>
-struct ProxyTypeList {
-  using type = Types<Ts...>;
-};
-
-template <typename>
-struct is_proxy_type_list : std::false_type {};
-
-template <typename... Ts>
-struct is_proxy_type_list<ProxyTypeList<Ts...>> : std::true_type {};
-
-// Generator which conditionally creates type lists.
-// It recognizes if a requested type list should be created
-// and prevents creating a new type list nested within another one.
-template <typename T>
-struct GenerateTypeList {
- private:
-  using proxy = typename std::conditional<is_proxy_type_list<T>::value, T,
-                                          ProxyTypeList<T>>::type;
-
- public:
-  using type = typename proxy::type;
-};
-
-}  // namespace internal
-
-template <typename... Ts>
-using Types = internal::ProxyTypeList<Ts...>;
-
-}  // namespace testing
-
-#endif  // GOOGLETEST_INCLUDE_GTEST_INTERNAL_GTEST_TYPE_UTIL_H_
diff --git a/3rdparty/googletest-1.13.0/googletest/samples/prime_tables.h b/3rdparty/googletest-1.13.0/googletest/samples/prime_tables.h
deleted file mode 100644
index 7c0286e1aee89c85360d5067961e34477a5eca78..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/samples/prime_tables.h
+++ /dev/null
@@ -1,124 +0,0 @@
-// Copyright 2008 Google Inc.
-// All Rights Reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// This provides interface PrimeTable that determines whether a number is a
-// prime and determines a next prime number. This interface is used
-// in Google Test samples demonstrating use of parameterized tests.
-
-#ifndef GOOGLETEST_SAMPLES_PRIME_TABLES_H_
-#define GOOGLETEST_SAMPLES_PRIME_TABLES_H_
-
-#include <algorithm>
-
-// The prime table interface.
-class PrimeTable {
- public:
-  virtual ~PrimeTable() {}
-
-  // Returns true if and only if n is a prime number.
-  virtual bool IsPrime(int n) const = 0;
-
-  // Returns the smallest prime number greater than p; or returns -1
-  // if the next prime is beyond the capacity of the table.
-  virtual int GetNextPrime(int p) const = 0;
-};
-
-// Implementation #1 calculates the primes on-the-fly.
-class OnTheFlyPrimeTable : public PrimeTable {
- public:
-  bool IsPrime(int n) const override {
-    if (n <= 1) return false;
-
-    for (int i = 2; i * i <= n; i++) {
-      // n is divisible by an integer other than 1 and itself.
-      if ((n % i) == 0) return false;
-    }
-
-    return true;
-  }
-
-  int GetNextPrime(int p) const override {
-    if (p < 0) return -1;
-
-    for (int n = p + 1;; n++) {
-      if (IsPrime(n)) return n;
-    }
-  }
-};
-
-// Implementation #2 pre-calculates the primes and stores the result
-// in an array.
-class PreCalculatedPrimeTable : public PrimeTable {
- public:
-  // 'max' specifies the maximum number the prime table holds.
-  explicit PreCalculatedPrimeTable(int max)
-      : is_prime_size_(max + 1), is_prime_(new bool[max + 1]) {
-    CalculatePrimesUpTo(max);
-  }
-  ~PreCalculatedPrimeTable() override { delete[] is_prime_; }
-
-  bool IsPrime(int n) const override {
-    return 0 <= n && n < is_prime_size_ && is_prime_[n];
-  }
-
-  int GetNextPrime(int p) const override {
-    for (int n = p + 1; n < is_prime_size_; n++) {
-      if (is_prime_[n]) return n;
-    }
-
-    return -1;
-  }
-
- private:
-  void CalculatePrimesUpTo(int max) {
-    ::std::fill(is_prime_, is_prime_ + is_prime_size_, true);
-    is_prime_[0] = is_prime_[1] = false;
-
-    // Checks every candidate for prime number (we know that 2 is the only even
-    // prime).
-    for (int i = 2; i * i <= max; i += i % 2 + 1) {
-      if (!is_prime_[i]) continue;
-
-      // Marks all multiples of i (except i itself) as non-prime.
-      // We are starting here from i-th multiplier, because all smaller
-      // complex numbers were already marked.
-      for (int j = i * i; j <= max; j += i) {
-        is_prime_[j] = false;
-      }
-    }
-  }
-
-  const int is_prime_size_;
-  bool* const is_prime_;
-
-  // Disables compiler warning "assignment operator could not be generated."
-  void operator=(const PreCalculatedPrimeTable& rhs);
-};
-
-#endif  // GOOGLETEST_SAMPLES_PRIME_TABLES_H_
diff --git a/3rdparty/googletest-1.13.0/googletest/samples/sample1.cc b/3rdparty/googletest-1.13.0/googletest/samples/sample1.cc
deleted file mode 100644
index 80b69f415c806c0923c831176ef066b350552aa3..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/samples/sample1.cc
+++ /dev/null
@@ -1,66 +0,0 @@
-// Copyright 2005, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// A sample program demonstrating using Google C++ testing framework.
-
-#include "sample1.h"
-
-// Returns n! (the factorial of n).  For negative n, n! is defined to be 1.
-int Factorial(int n) {
-  int result = 1;
-  for (int i = 1; i <= n; i++) {
-    result *= i;
-  }
-
-  return result;
-}
-
-// Returns true if and only if n is a prime number.
-bool IsPrime(int n) {
-  // Trivial case 1: small numbers
-  if (n <= 1) return false;
-
-  // Trivial case 2: even numbers
-  if (n % 2 == 0) return n == 2;
-
-  // Now, we have that n is odd and n >= 3.
-
-  // Try to divide n by every odd number i, starting from 3
-  for (int i = 3;; i += 2) {
-    // We only have to try i up to the square root of n
-    if (i > n / i) break;
-
-    // Now, we have i <= n/i < n.
-    // If n is divisible by i, n is not prime.
-    if (n % i == 0) return false;
-  }
-
-  // n has no integer factor in the range (1, n), and thus is prime.
-  return true;
-}
diff --git a/3rdparty/googletest-1.13.0/googletest/samples/sample1.h b/3rdparty/googletest-1.13.0/googletest/samples/sample1.h
deleted file mode 100644
index ba392cfbd2668b6abeb09b19475c59a07e078916..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/samples/sample1.h
+++ /dev/null
@@ -1,41 +0,0 @@
-// Copyright 2005, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// A sample program demonstrating using Google C++ testing framework.
-
-#ifndef GOOGLETEST_SAMPLES_SAMPLE1_H_
-#define GOOGLETEST_SAMPLES_SAMPLE1_H_
-
-// Returns n! (the factorial of n).  For negative n, n! is defined to be 1.
-int Factorial(int n);
-
-// Returns true if and only if n is a prime number.
-bool IsPrime(int n);
-
-#endif  // GOOGLETEST_SAMPLES_SAMPLE1_H_
diff --git a/3rdparty/googletest-1.13.0/googletest/samples/sample10_unittest.cc b/3rdparty/googletest-1.13.0/googletest/samples/sample10_unittest.cc
deleted file mode 100644
index 95b4811b87d1192f8e545533e18066d3a9b84b90..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/samples/sample10_unittest.cc
+++ /dev/null
@@ -1,139 +0,0 @@
-// Copyright 2009 Google Inc. All Rights Reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// This sample shows how to use Google Test listener API to implement
-// a primitive leak checker.
-
-#include <stdio.h>
-#include <stdlib.h>
-
-#include "gtest/gtest.h"
-using ::testing::EmptyTestEventListener;
-using ::testing::InitGoogleTest;
-using ::testing::Test;
-using ::testing::TestEventListeners;
-using ::testing::TestInfo;
-using ::testing::TestPartResult;
-using ::testing::UnitTest;
-
-namespace {
-// We will track memory used by this class.
-class Water {
- public:
-  // Normal Water declarations go here.
-
-  // operator new and operator delete help us control water allocation.
-  void* operator new(size_t allocation_size) {
-    allocated_++;
-    return malloc(allocation_size);
-  }
-
-  void operator delete(void* block, size_t /* allocation_size */) {
-    allocated_--;
-    free(block);
-  }
-
-  static int allocated() { return allocated_; }
-
- private:
-  static int allocated_;
-};
-
-int Water::allocated_ = 0;
-
-// This event listener monitors how many Water objects are created and
-// destroyed by each test, and reports a failure if a test leaks some Water
-// objects. It does this by comparing the number of live Water objects at
-// the beginning of a test and at the end of a test.
-class LeakChecker : public EmptyTestEventListener {
- private:
-  // Called before a test starts.
-  void OnTestStart(const TestInfo& /* test_info */) override {
-    initially_allocated_ = Water::allocated();
-  }
-
-  // Called after a test ends.
-  void OnTestEnd(const TestInfo& /* test_info */) override {
-    int difference = Water::allocated() - initially_allocated_;
-
-    // You can generate a failure in any event handler except
-    // OnTestPartResult. Just use an appropriate Google Test assertion to do
-    // it.
-    EXPECT_LE(difference, 0) << "Leaked " << difference << " unit(s) of Water!";
-  }
-
-  int initially_allocated_;
-};
-
-TEST(ListenersTest, DoesNotLeak) {
-  Water* water = new Water;
-  delete water;
-}
-
-// This should fail when the --check_for_leaks command line flag is
-// specified.
-TEST(ListenersTest, LeaksWater) {
-  Water* water = new Water;
-  EXPECT_TRUE(water != nullptr);
-}
-}  // namespace
-
-int main(int argc, char** argv) {
-  InitGoogleTest(&argc, argv);
-
-  bool check_for_leaks = false;
-  if (argc > 1 && strcmp(argv[1], "--check_for_leaks") == 0)
-    check_for_leaks = true;
-  else
-    printf("%s\n",
-           "Run this program with --check_for_leaks to enable "
-           "custom leak checking in the tests.");
-
-  // If we are given the --check_for_leaks command line flag, installs the
-  // leak checker.
-  if (check_for_leaks) {
-    TestEventListeners& listeners = UnitTest::GetInstance()->listeners();
-
-    // Adds the leak checker to the end of the test event listener list,
-    // after the default text output printer and the default XML report
-    // generator.
-    //
-    // The order is important - it ensures that failures generated in the
-    // leak checker's OnTestEnd() method are processed by the text and XML
-    // printers *before* their OnTestEnd() methods are called, such that
-    // they are attributed to the right test. Remember that a listener
-    // receives an OnXyzStart event *after* listeners preceding it in the
-    // list received that event, and receives an OnXyzEnd event *before*
-    // listeners preceding it.
-    //
-    // We don't need to worry about deleting the new listener later, as
-    // Google Test will do it.
-    listeners.Append(new LeakChecker);
-  }
-  return RUN_ALL_TESTS();
-}
diff --git a/3rdparty/googletest-1.13.0/googletest/samples/sample1_unittest.cc b/3rdparty/googletest-1.13.0/googletest/samples/sample1_unittest.cc
deleted file mode 100644
index 60f2770ca0e7a3983d87c04b6fe502390df5c551..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/samples/sample1_unittest.cc
+++ /dev/null
@@ -1,148 +0,0 @@
-// Copyright 2005, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// A sample program demonstrating using Google C++ testing framework.
-
-// This sample shows how to write a simple unit test for a function,
-// using Google C++ testing framework.
-//
-// Writing a unit test using Google C++ testing framework is easy as 1-2-3:
-
-// Step 1. Include necessary header files such that the stuff your
-// test logic needs is declared.
-//
-// Don't forget gtest.h, which declares the testing framework.
-
-#include "sample1.h"
-
-#include <limits.h>
-
-#include "gtest/gtest.h"
-namespace {
-
-// Step 2. Use the TEST macro to define your tests.
-//
-// TEST has two parameters: the test case name and the test name.
-// After using the macro, you should define your test logic between a
-// pair of braces.  You can use a bunch of macros to indicate the
-// success or failure of a test.  EXPECT_TRUE and EXPECT_EQ are
-// examples of such macros.  For a complete list, see gtest.h.
-//
-// <TechnicalDetails>
-//
-// In Google Test, tests are grouped into test cases.  This is how we
-// keep test code organized.  You should put logically related tests
-// into the same test case.
-//
-// The test case name and the test name should both be valid C++
-// identifiers.  And you should not use underscore (_) in the names.
-//
-// Google Test guarantees that each test you define is run exactly
-// once, but it makes no guarantee on the order the tests are
-// executed.  Therefore, you should write your tests in such a way
-// that their results don't depend on their order.
-//
-// </TechnicalDetails>
-
-// Tests Factorial().
-
-// Tests factorial of negative numbers.
-TEST(FactorialTest, Negative) {
-  // This test is named "Negative", and belongs to the "FactorialTest"
-  // test case.
-  EXPECT_EQ(1, Factorial(-5));
-  EXPECT_EQ(1, Factorial(-1));
-  EXPECT_GT(Factorial(-10), 0);
-
-  // <TechnicalDetails>
-  //
-  // EXPECT_EQ(expected, actual) is the same as
-  //
-  //   EXPECT_TRUE((expected) == (actual))
-  //
-  // except that it will print both the expected value and the actual
-  // value when the assertion fails.  This is very helpful for
-  // debugging.  Therefore in this case EXPECT_EQ is preferred.
-  //
-  // On the other hand, EXPECT_TRUE accepts any Boolean expression,
-  // and is thus more general.
-  //
-  // </TechnicalDetails>
-}
-
-// Tests factorial of 0.
-TEST(FactorialTest, Zero) { EXPECT_EQ(1, Factorial(0)); }
-
-// Tests factorial of positive numbers.
-TEST(FactorialTest, Positive) {
-  EXPECT_EQ(1, Factorial(1));
-  EXPECT_EQ(2, Factorial(2));
-  EXPECT_EQ(6, Factorial(3));
-  EXPECT_EQ(40320, Factorial(8));
-}
-
-// Tests IsPrime()
-
-// Tests negative input.
-TEST(IsPrimeTest, Negative) {
-  // This test belongs to the IsPrimeTest test case.
-
-  EXPECT_FALSE(IsPrime(-1));
-  EXPECT_FALSE(IsPrime(-2));
-  EXPECT_FALSE(IsPrime(INT_MIN));
-}
-
-// Tests some trivial cases.
-TEST(IsPrimeTest, Trivial) {
-  EXPECT_FALSE(IsPrime(0));
-  EXPECT_FALSE(IsPrime(1));
-  EXPECT_TRUE(IsPrime(2));
-  EXPECT_TRUE(IsPrime(3));
-}
-
-// Tests positive input.
-TEST(IsPrimeTest, Positive) {
-  EXPECT_FALSE(IsPrime(4));
-  EXPECT_TRUE(IsPrime(5));
-  EXPECT_FALSE(IsPrime(6));
-  EXPECT_TRUE(IsPrime(23));
-}
-}  // namespace
-
-// Step 3. Call RUN_ALL_TESTS() in main().
-//
-// We do this by linking in src/gtest_main.cc file, which consists of
-// a main() function which calls RUN_ALL_TESTS() for us.
-//
-// This runs all the tests you've defined, prints the result, and
-// returns 0 if successful, or 1 otherwise.
-//
-// Did you notice that we didn't register the tests?  The
-// RUN_ALL_TESTS() macro magically knows about all the tests we
-// defined.  Isn't this convenient?
diff --git a/3rdparty/googletest-1.13.0/googletest/samples/sample2.cc b/3rdparty/googletest-1.13.0/googletest/samples/sample2.cc
deleted file mode 100644
index be7c4c9949f8ac975a25f303287994b4a5be5aef..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/samples/sample2.cc
+++ /dev/null
@@ -1,54 +0,0 @@
-// Copyright 2005, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// A sample program demonstrating using Google C++ testing framework.
-
-#include "sample2.h"
-
-#include <string.h>
-
-// Clones a 0-terminated C string, allocating memory using new.
-const char* MyString::CloneCString(const char* a_c_string) {
-  if (a_c_string == nullptr) return nullptr;
-
-  const size_t len = strlen(a_c_string);
-  char* const clone = new char[len + 1];
-  memcpy(clone, a_c_string, len + 1);
-
-  return clone;
-}
-
-// Sets the 0-terminated C string this MyString object
-// represents.
-void MyString::Set(const char* a_c_string) {
-  // Makes sure this works when c_string == c_string_
-  const char* const temp = MyString::CloneCString(a_c_string);
-  delete[] c_string_;
-  c_string_ = temp;
-}
diff --git a/3rdparty/googletest-1.13.0/googletest/samples/sample2.h b/3rdparty/googletest-1.13.0/googletest/samples/sample2.h
deleted file mode 100644
index 15a1ce7781a95fdd991c74c13cd934184aeab3b4..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/samples/sample2.h
+++ /dev/null
@@ -1,79 +0,0 @@
-// Copyright 2005, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// A sample program demonstrating using Google C++ testing framework.
-
-#ifndef GOOGLETEST_SAMPLES_SAMPLE2_H_
-#define GOOGLETEST_SAMPLES_SAMPLE2_H_
-
-#include <string.h>
-
-// A simple string class.
-class MyString {
- private:
-  const char* c_string_;
-  const MyString& operator=(const MyString& rhs);
-
- public:
-  // Clones a 0-terminated C string, allocating memory using new.
-  static const char* CloneCString(const char* a_c_string);
-
-  ////////////////////////////////////////////////////////////
-  //
-  // C'tors
-
-  // The default c'tor constructs a NULL string.
-  MyString() : c_string_(nullptr) {}
-
-  // Constructs a MyString by cloning a 0-terminated C string.
-  explicit MyString(const char* a_c_string) : c_string_(nullptr) {
-    Set(a_c_string);
-  }
-
-  // Copy c'tor
-  MyString(const MyString& string) : c_string_(nullptr) {
-    Set(string.c_string_);
-  }
-
-  ////////////////////////////////////////////////////////////
-  //
-  // D'tor.  MyString is intended to be a final class, so the d'tor
-  // doesn't need to be virtual.
-  ~MyString() { delete[] c_string_; }
-
-  // Gets the 0-terminated C string this MyString object represents.
-  const char* c_string() const { return c_string_; }
-
-  size_t Length() const { return c_string_ == nullptr ? 0 : strlen(c_string_); }
-
-  // Sets the 0-terminated C string this MyString object represents.
-  void Set(const char* c_string);
-};
-
-#endif  // GOOGLETEST_SAMPLES_SAMPLE2_H_
diff --git a/3rdparty/googletest-1.13.0/googletest/samples/sample2_unittest.cc b/3rdparty/googletest-1.13.0/googletest/samples/sample2_unittest.cc
deleted file mode 100644
index cd734f943a08726ddde2eb0266e82b5d6eca046e..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/samples/sample2_unittest.cc
+++ /dev/null
@@ -1,107 +0,0 @@
-// Copyright 2005, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// A sample program demonstrating using Google C++ testing framework.
-
-// This sample shows how to write a more complex unit test for a class
-// that has multiple member functions.
-//
-// Usually, it's a good idea to have one test for each method in your
-// class.  You don't have to do that exactly, but it helps to keep
-// your tests organized.  You may also throw in additional tests as
-// needed.
-
-#include "sample2.h"
-
-#include "gtest/gtest.h"
-namespace {
-// In this example, we test the MyString class (a simple string).
-
-// Tests the default c'tor.
-TEST(MyString, DefaultConstructor) {
-  const MyString s;
-
-  // Asserts that s.c_string() returns NULL.
-  //
-  // <TechnicalDetails>
-  //
-  // If we write NULL instead of
-  //
-  //   static_cast<const char *>(NULL)
-  //
-  // in this assertion, it will generate a warning on gcc 3.4.  The
-  // reason is that EXPECT_EQ needs to know the types of its
-  // arguments in order to print them when it fails.  Since NULL is
-  // #defined as 0, the compiler will use the formatter function for
-  // int to print it.  However, gcc thinks that NULL should be used as
-  // a pointer, not an int, and therefore complains.
-  //
-  // The root of the problem is C++'s lack of distinction between the
-  // integer number 0 and the null pointer constant.  Unfortunately,
-  // we have to live with this fact.
-  //
-  // </TechnicalDetails>
-  EXPECT_STREQ(nullptr, s.c_string());
-
-  EXPECT_EQ(0u, s.Length());
-}
-
-const char kHelloString[] = "Hello, world!";
-
-// Tests the c'tor that accepts a C string.
-TEST(MyString, ConstructorFromCString) {
-  const MyString s(kHelloString);
-  EXPECT_EQ(0, strcmp(s.c_string(), kHelloString));
-  EXPECT_EQ(sizeof(kHelloString) / sizeof(kHelloString[0]) - 1, s.Length());
-}
-
-// Tests the copy c'tor.
-TEST(MyString, CopyConstructor) {
-  const MyString s1(kHelloString);
-  const MyString s2 = s1;
-  EXPECT_EQ(0, strcmp(s2.c_string(), kHelloString));
-}
-
-// Tests the Set method.
-TEST(MyString, Set) {
-  MyString s;
-
-  s.Set(kHelloString);
-  EXPECT_EQ(0, strcmp(s.c_string(), kHelloString));
-
-  // Set should work when the input pointer is the same as the one
-  // already in the MyString object.
-  s.Set(s.c_string());
-  EXPECT_EQ(0, strcmp(s.c_string(), kHelloString));
-
-  // Can we set the MyString to NULL?
-  s.Set(nullptr);
-  EXPECT_STREQ(nullptr, s.c_string());
-}
-}  // namespace
diff --git a/3rdparty/googletest-1.13.0/googletest/samples/sample3-inl.h b/3rdparty/googletest-1.13.0/googletest/samples/sample3-inl.h
deleted file mode 100644
index bc3ffb9c409348298c2ee3dc0ff58d8912f7019b..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/samples/sample3-inl.h
+++ /dev/null
@@ -1,171 +0,0 @@
-// Copyright 2005, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// A sample program demonstrating using Google C++ testing framework.
-
-#ifndef GOOGLETEST_SAMPLES_SAMPLE3_INL_H_
-#define GOOGLETEST_SAMPLES_SAMPLE3_INL_H_
-
-#include <stddef.h>
-
-// Queue is a simple queue implemented as a singled-linked list.
-//
-// The element type must support copy constructor.
-template <typename E>  // E is the element type
-class Queue;
-
-// QueueNode is a node in a Queue, which consists of an element of
-// type E and a pointer to the next node.
-template <typename E>  // E is the element type
-class QueueNode {
-  friend class Queue<E>;
-
- public:
-  // Gets the element in this node.
-  const E& element() const { return element_; }
-
-  // Gets the next node in the queue.
-  QueueNode* next() { return next_; }
-  const QueueNode* next() const { return next_; }
-
- private:
-  // Creates a node with a given element value.  The next pointer is
-  // set to NULL.
-  explicit QueueNode(const E& an_element)
-      : element_(an_element), next_(nullptr) {}
-
-  // We disable the default assignment operator and copy c'tor.
-  const QueueNode& operator=(const QueueNode&);
-  QueueNode(const QueueNode&);
-
-  E element_;
-  QueueNode* next_;
-};
-
-template <typename E>  // E is the element type.
-class Queue {
- public:
-  // Creates an empty queue.
-  Queue() : head_(nullptr), last_(nullptr), size_(0) {}
-
-  // D'tor.  Clears the queue.
-  ~Queue() { Clear(); }
-
-  // Clears the queue.
-  void Clear() {
-    if (size_ > 0) {
-      // 1. Deletes every node.
-      QueueNode<E>* node = head_;
-      QueueNode<E>* next = node->next();
-      for (;;) {
-        delete node;
-        node = next;
-        if (node == nullptr) break;
-        next = node->next();
-      }
-
-      // 2. Resets the member variables.
-      head_ = last_ = nullptr;
-      size_ = 0;
-    }
-  }
-
-  // Gets the number of elements.
-  size_t Size() const { return size_; }
-
-  // Gets the first element of the queue, or NULL if the queue is empty.
-  QueueNode<E>* Head() { return head_; }
-  const QueueNode<E>* Head() const { return head_; }
-
-  // Gets the last element of the queue, or NULL if the queue is empty.
-  QueueNode<E>* Last() { return last_; }
-  const QueueNode<E>* Last() const { return last_; }
-
-  // Adds an element to the end of the queue.  A copy of the element is
-  // created using the copy constructor, and then stored in the queue.
-  // Changes made to the element in the queue doesn't affect the source
-  // object, and vice versa.
-  void Enqueue(const E& element) {
-    QueueNode<E>* new_node = new QueueNode<E>(element);
-
-    if (size_ == 0) {
-      head_ = last_ = new_node;
-      size_ = 1;
-    } else {
-      last_->next_ = new_node;
-      last_ = new_node;
-      size_++;
-    }
-  }
-
-  // Removes the head of the queue and returns it.  Returns NULL if
-  // the queue is empty.
-  E* Dequeue() {
-    if (size_ == 0) {
-      return nullptr;
-    }
-
-    const QueueNode<E>* const old_head = head_;
-    head_ = head_->next_;
-    size_--;
-    if (size_ == 0) {
-      last_ = nullptr;
-    }
-
-    E* element = new E(old_head->element());
-    delete old_head;
-
-    return element;
-  }
-
-  // Applies a function/functor on each element of the queue, and
-  // returns the result in a new queue.  The original queue is not
-  // affected.
-  template <typename F>
-  Queue* Map(F function) const {
-    Queue* new_queue = new Queue();
-    for (const QueueNode<E>* node = head_; node != nullptr;
-         node = node->next_) {
-      new_queue->Enqueue(function(node->element()));
-    }
-
-    return new_queue;
-  }
-
- private:
-  QueueNode<E>* head_;  // The first node of the queue.
-  QueueNode<E>* last_;  // The last node of the queue.
-  size_t size_;         // The number of elements in the queue.
-
-  // We disallow copying a queue.
-  Queue(const Queue&);
-  const Queue& operator=(const Queue&);
-};
-
-#endif  // GOOGLETEST_SAMPLES_SAMPLE3_INL_H_
diff --git a/3rdparty/googletest-1.13.0/googletest/samples/sample3_unittest.cc b/3rdparty/googletest-1.13.0/googletest/samples/sample3_unittest.cc
deleted file mode 100644
index 71609c6a09864bdf735de55f431290afda03e28d..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/samples/sample3_unittest.cc
+++ /dev/null
@@ -1,146 +0,0 @@
-// Copyright 2005, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// A sample program demonstrating using Google C++ testing framework.
-
-// In this example, we use a more advanced feature of Google Test called
-// test fixture.
-//
-// A test fixture is a place to hold objects and functions shared by
-// all tests in a test case.  Using a test fixture avoids duplicating
-// the test code necessary to initialize and cleanup those common
-// objects for each test.  It is also useful for defining sub-routines
-// that your tests need to invoke a lot.
-//
-// <TechnicalDetails>
-//
-// The tests share the test fixture in the sense of code sharing, not
-// data sharing.  Each test is given its own fresh copy of the
-// fixture.  You cannot expect the data modified by one test to be
-// passed on to another test, which is a bad idea.
-//
-// The reason for this design is that tests should be independent and
-// repeatable.  In particular, a test should not fail as the result of
-// another test's failure.  If one test depends on info produced by
-// another test, then the two tests should really be one big test.
-//
-// The macros for indicating the success/failure of a test
-// (EXPECT_TRUE, FAIL, etc) need to know what the current test is
-// (when Google Test prints the test result, it tells you which test
-// each failure belongs to).  Technically, these macros invoke a
-// member function of the Test class.  Therefore, you cannot use them
-// in a global function.  That's why you should put test sub-routines
-// in a test fixture.
-//
-// </TechnicalDetails>
-
-#include "sample3-inl.h"
-#include "gtest/gtest.h"
-namespace {
-// To use a test fixture, derive a class from testing::Test.
-class QueueTestSmpl3 : public testing::Test {
- protected:  // You should make the members protected s.t. they can be
-             // accessed from sub-classes.
-  // virtual void SetUp() will be called before each test is run.  You
-  // should define it if you need to initialize the variables.
-  // Otherwise, this can be skipped.
-  void SetUp() override {
-    q1_.Enqueue(1);
-    q2_.Enqueue(2);
-    q2_.Enqueue(3);
-  }
-
-  // virtual void TearDown() will be called after each test is run.
-  // You should define it if there is cleanup work to do.  Otherwise,
-  // you don't have to provide it.
-  //
-  // virtual void TearDown() {
-  // }
-
-  // A helper function that some test uses.
-  static int Double(int n) { return 2 * n; }
-
-  // A helper function for testing Queue::Map().
-  void MapTester(const Queue<int>* q) {
-    // Creates a new queue, where each element is twice as big as the
-    // corresponding one in q.
-    const Queue<int>* const new_q = q->Map(Double);
-
-    // Verifies that the new queue has the same size as q.
-    ASSERT_EQ(q->Size(), new_q->Size());
-
-    // Verifies the relationship between the elements of the two queues.
-    for (const QueueNode<int>*n1 = q->Head(), *n2 = new_q->Head();
-         n1 != nullptr; n1 = n1->next(), n2 = n2->next()) {
-      EXPECT_EQ(2 * n1->element(), n2->element());
-    }
-
-    delete new_q;
-  }
-
-  // Declares the variables your tests want to use.
-  Queue<int> q0_;
-  Queue<int> q1_;
-  Queue<int> q2_;
-};
-
-// When you have a test fixture, you define a test using TEST_F
-// instead of TEST.
-
-// Tests the default c'tor.
-TEST_F(QueueTestSmpl3, DefaultConstructor) {
-  // You can access data in the test fixture here.
-  EXPECT_EQ(0u, q0_.Size());
-}
-
-// Tests Dequeue().
-TEST_F(QueueTestSmpl3, Dequeue) {
-  int* n = q0_.Dequeue();
-  EXPECT_TRUE(n == nullptr);
-
-  n = q1_.Dequeue();
-  ASSERT_TRUE(n != nullptr);
-  EXPECT_EQ(1, *n);
-  EXPECT_EQ(0u, q1_.Size());
-  delete n;
-
-  n = q2_.Dequeue();
-  ASSERT_TRUE(n != nullptr);
-  EXPECT_EQ(2, *n);
-  EXPECT_EQ(1u, q2_.Size());
-  delete n;
-}
-
-// Tests the Queue::Map() function.
-TEST_F(QueueTestSmpl3, Map) {
-  MapTester(&q0_);
-  MapTester(&q1_);
-  MapTester(&q2_);
-}
-}  // namespace
diff --git a/3rdparty/googletest-1.13.0/googletest/samples/sample4.cc b/3rdparty/googletest-1.13.0/googletest/samples/sample4.cc
deleted file mode 100644
index 489c89b0d34bbfaaac30761e871a0c2f8d7e4e65..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/samples/sample4.cc
+++ /dev/null
@@ -1,50 +0,0 @@
-// Copyright 2005, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// A sample program demonstrating using Google C++ testing framework.
-
-#include "sample4.h"
-
-#include <stdio.h>
-
-// Returns the current counter value, and increments it.
-int Counter::Increment() { return counter_++; }
-
-// Returns the current counter value, and decrements it.
-// counter can not be less than 0, return 0 in this case
-int Counter::Decrement() {
-  if (counter_ == 0) {
-    return counter_;
-  } else {
-    return counter_--;
-  }
-}
-
-// Prints the current counter value to STDOUT.
-void Counter::Print() const { printf("%d", counter_); }
diff --git a/3rdparty/googletest-1.13.0/googletest/samples/sample4.h b/3rdparty/googletest-1.13.0/googletest/samples/sample4.h
deleted file mode 100644
index 0c4ed92e738ba61ed6b5f28fd3b0cb4ca2c264b8..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/samples/sample4.h
+++ /dev/null
@@ -1,53 +0,0 @@
-// Copyright 2005, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// A sample program demonstrating using Google C++ testing framework.
-#ifndef GOOGLETEST_SAMPLES_SAMPLE4_H_
-#define GOOGLETEST_SAMPLES_SAMPLE4_H_
-
-// A simple monotonic counter.
-class Counter {
- private:
-  int counter_;
-
- public:
-  // Creates a counter that starts at 0.
-  Counter() : counter_(0) {}
-
-  // Returns the current counter value, and increments it.
-  int Increment();
-
-  // Returns the current counter value, and decrements it.
-  int Decrement();
-
-  // Prints the current counter value to STDOUT.
-  void Print() const;
-};
-
-#endif  // GOOGLETEST_SAMPLES_SAMPLE4_H_
diff --git a/3rdparty/googletest-1.13.0/googletest/samples/sample4_unittest.cc b/3rdparty/googletest-1.13.0/googletest/samples/sample4_unittest.cc
deleted file mode 100644
index fb9973fe666e5e9a3719b49214b71e650d5b71a5..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/samples/sample4_unittest.cc
+++ /dev/null
@@ -1,53 +0,0 @@
-// Copyright 2005, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "sample4.h"
-
-#include "gtest/gtest.h"
-
-namespace {
-// Tests the Increment() method.
-
-TEST(Counter, Increment) {
-  Counter c;
-
-  // Test that counter 0 returns 0
-  EXPECT_EQ(0, c.Decrement());
-
-  // EXPECT_EQ() evaluates its arguments exactly once, so they
-  // can have side effects.
-
-  EXPECT_EQ(0, c.Increment());
-  EXPECT_EQ(1, c.Increment());
-  EXPECT_EQ(2, c.Increment());
-
-  EXPECT_EQ(3, c.Decrement());
-}
-
-}  // namespace
diff --git a/3rdparty/googletest-1.13.0/googletest/samples/sample5_unittest.cc b/3rdparty/googletest-1.13.0/googletest/samples/sample5_unittest.cc
deleted file mode 100644
index cc8c0f012e96eb9edac6401451b2a9cfed27f0aa..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/samples/sample5_unittest.cc
+++ /dev/null
@@ -1,189 +0,0 @@
-// Copyright 2005, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// This sample teaches how to reuse a test fixture in multiple test
-// cases by deriving sub-fixtures from it.
-//
-// When you define a test fixture, you specify the name of the test
-// case that will use this fixture.  Therefore, a test fixture can
-// be used by only one test case.
-//
-// Sometimes, more than one test cases may want to use the same or
-// slightly different test fixtures.  For example, you may want to
-// make sure that all tests for a GUI library don't leak important
-// system resources like fonts and brushes.  In Google Test, you do
-// this by putting the shared logic in a super (as in "super class")
-// test fixture, and then have each test case use a fixture derived
-// from this super fixture.
-
-#include <limits.h>
-#include <time.h>
-
-#include "sample1.h"
-#include "sample3-inl.h"
-#include "gtest/gtest.h"
-namespace {
-// In this sample, we want to ensure that every test finishes within
-// ~5 seconds.  If a test takes longer to run, we consider it a
-// failure.
-//
-// We put the code for timing a test in a test fixture called
-// "QuickTest".  QuickTest is intended to be the super fixture that
-// other fixtures derive from, therefore there is no test case with
-// the name "QuickTest".  This is OK.
-//
-// Later, we will derive multiple test fixtures from QuickTest.
-class QuickTest : public testing::Test {
- protected:
-  // Remember that SetUp() is run immediately before a test starts.
-  // This is a good place to record the start time.
-  void SetUp() override { start_time_ = time(nullptr); }
-
-  // TearDown() is invoked immediately after a test finishes.  Here we
-  // check if the test was too slow.
-  void TearDown() override {
-    // Gets the time when the test finishes
-    const time_t end_time = time(nullptr);
-
-    // Asserts that the test took no more than ~5 seconds.  Did you
-    // know that you can use assertions in SetUp() and TearDown() as
-    // well?
-    EXPECT_TRUE(end_time - start_time_ <= 5) << "The test took too long.";
-  }
-
-  // The UTC time (in seconds) when the test starts
-  time_t start_time_;
-};
-
-// We derive a fixture named IntegerFunctionTest from the QuickTest
-// fixture.  All tests using this fixture will be automatically
-// required to be quick.
-class IntegerFunctionTest : public QuickTest {
-  // We don't need any more logic than already in the QuickTest fixture.
-  // Therefore the body is empty.
-};
-
-// Now we can write tests in the IntegerFunctionTest test case.
-
-// Tests Factorial()
-TEST_F(IntegerFunctionTest, Factorial) {
-  // Tests factorial of negative numbers.
-  EXPECT_EQ(1, Factorial(-5));
-  EXPECT_EQ(1, Factorial(-1));
-  EXPECT_GT(Factorial(-10), 0);
-
-  // Tests factorial of 0.
-  EXPECT_EQ(1, Factorial(0));
-
-  // Tests factorial of positive numbers.
-  EXPECT_EQ(1, Factorial(1));
-  EXPECT_EQ(2, Factorial(2));
-  EXPECT_EQ(6, Factorial(3));
-  EXPECT_EQ(40320, Factorial(8));
-}
-
-// Tests IsPrime()
-TEST_F(IntegerFunctionTest, IsPrime) {
-  // Tests negative input.
-  EXPECT_FALSE(IsPrime(-1));
-  EXPECT_FALSE(IsPrime(-2));
-  EXPECT_FALSE(IsPrime(INT_MIN));
-
-  // Tests some trivial cases.
-  EXPECT_FALSE(IsPrime(0));
-  EXPECT_FALSE(IsPrime(1));
-  EXPECT_TRUE(IsPrime(2));
-  EXPECT_TRUE(IsPrime(3));
-
-  // Tests positive input.
-  EXPECT_FALSE(IsPrime(4));
-  EXPECT_TRUE(IsPrime(5));
-  EXPECT_FALSE(IsPrime(6));
-  EXPECT_TRUE(IsPrime(23));
-}
-
-// The next test case (named "QueueTest") also needs to be quick, so
-// we derive another fixture from QuickTest.
-//
-// The QueueTest test fixture has some logic and shared objects in
-// addition to what's in QuickTest already.  We define the additional
-// stuff inside the body of the test fixture, as usual.
-class QueueTest : public QuickTest {
- protected:
-  void SetUp() override {
-    // First, we need to set up the super fixture (QuickTest).
-    QuickTest::SetUp();
-
-    // Second, some additional setup for this fixture.
-    q1_.Enqueue(1);
-    q2_.Enqueue(2);
-    q2_.Enqueue(3);
-  }
-
-  // By default, TearDown() inherits the behavior of
-  // QuickTest::TearDown().  As we have no additional cleaning work
-  // for QueueTest, we omit it here.
-  //
-  // virtual void TearDown() {
-  //   QuickTest::TearDown();
-  // }
-
-  Queue<int> q0_;
-  Queue<int> q1_;
-  Queue<int> q2_;
-};
-
-// Now, let's write tests using the QueueTest fixture.
-
-// Tests the default constructor.
-TEST_F(QueueTest, DefaultConstructor) { EXPECT_EQ(0u, q0_.Size()); }
-
-// Tests Dequeue().
-TEST_F(QueueTest, Dequeue) {
-  int* n = q0_.Dequeue();
-  EXPECT_TRUE(n == nullptr);
-
-  n = q1_.Dequeue();
-  EXPECT_TRUE(n != nullptr);
-  EXPECT_EQ(1, *n);
-  EXPECT_EQ(0u, q1_.Size());
-  delete n;
-
-  n = q2_.Dequeue();
-  EXPECT_TRUE(n != nullptr);
-  EXPECT_EQ(2, *n);
-  EXPECT_EQ(1u, q2_.Size());
-  delete n;
-}
-}  // namespace
-// If necessary, you can derive further test fixtures from a derived
-// fixture itself.  For example, you can derive another fixture from
-// QueueTest.  Google Test imposes no limit on how deep the hierarchy
-// can be.  In practice, however, you probably don't want it to be too
-// deep as to be confusing.
diff --git a/3rdparty/googletest-1.13.0/googletest/samples/sample6_unittest.cc b/3rdparty/googletest-1.13.0/googletest/samples/sample6_unittest.cc
deleted file mode 100644
index cf576f0a5354fb80367825858f4e7e618399af30..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/samples/sample6_unittest.cc
+++ /dev/null
@@ -1,214 +0,0 @@
-// Copyright 2008 Google Inc.
-// All Rights Reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// This sample shows how to test common properties of multiple
-// implementations of the same interface (aka interface tests).
-
-// The interface and its implementations are in this header.
-#include "prime_tables.h"
-#include "gtest/gtest.h"
-namespace {
-// First, we define some factory functions for creating instances of
-// the implementations.  You may be able to skip this step if all your
-// implementations can be constructed the same way.
-
-template <class T>
-PrimeTable* CreatePrimeTable();
-
-template <>
-PrimeTable* CreatePrimeTable<OnTheFlyPrimeTable>() {
-  return new OnTheFlyPrimeTable;
-}
-
-template <>
-PrimeTable* CreatePrimeTable<PreCalculatedPrimeTable>() {
-  return new PreCalculatedPrimeTable(10000);
-}
-
-// Then we define a test fixture class template.
-template <class T>
-class PrimeTableTest : public testing::Test {
- protected:
-  // The ctor calls the factory function to create a prime table
-  // implemented by T.
-  PrimeTableTest() : table_(CreatePrimeTable<T>()) {}
-
-  ~PrimeTableTest() override { delete table_; }
-
-  // Note that we test an implementation via the base interface
-  // instead of the actual implementation class.  This is important
-  // for keeping the tests close to the real world scenario, where the
-  // implementation is invoked via the base interface.  It avoids
-  // got-yas where the implementation class has a method that shadows
-  // a method with the same name (but slightly different argument
-  // types) in the base interface, for example.
-  PrimeTable* const table_;
-};
-
-using testing::Types;
-
-// Google Test offers two ways for reusing tests for different types.
-// The first is called "typed tests".  You should use it if you
-// already know *all* the types you are gonna exercise when you write
-// the tests.
-
-// To write a typed test case, first use
-//
-//   TYPED_TEST_SUITE(TestCaseName, TypeList);
-//
-// to declare it and specify the type parameters.  As with TEST_F,
-// TestCaseName must match the test fixture name.
-
-// The list of types we want to test.
-typedef Types<OnTheFlyPrimeTable, PreCalculatedPrimeTable> Implementations;
-
-TYPED_TEST_SUITE(PrimeTableTest, Implementations);
-
-// Then use TYPED_TEST(TestCaseName, TestName) to define a typed test,
-// similar to TEST_F.
-TYPED_TEST(PrimeTableTest, ReturnsFalseForNonPrimes) {
-  // Inside the test body, you can refer to the type parameter by
-  // TypeParam, and refer to the fixture class by TestFixture.  We
-  // don't need them in this example.
-
-  // Since we are in the template world, C++ requires explicitly
-  // writing 'this->' when referring to members of the fixture class.
-  // This is something you have to learn to live with.
-  EXPECT_FALSE(this->table_->IsPrime(-5));
-  EXPECT_FALSE(this->table_->IsPrime(0));
-  EXPECT_FALSE(this->table_->IsPrime(1));
-  EXPECT_FALSE(this->table_->IsPrime(4));
-  EXPECT_FALSE(this->table_->IsPrime(6));
-  EXPECT_FALSE(this->table_->IsPrime(100));
-}
-
-TYPED_TEST(PrimeTableTest, ReturnsTrueForPrimes) {
-  EXPECT_TRUE(this->table_->IsPrime(2));
-  EXPECT_TRUE(this->table_->IsPrime(3));
-  EXPECT_TRUE(this->table_->IsPrime(5));
-  EXPECT_TRUE(this->table_->IsPrime(7));
-  EXPECT_TRUE(this->table_->IsPrime(11));
-  EXPECT_TRUE(this->table_->IsPrime(131));
-}
-
-TYPED_TEST(PrimeTableTest, CanGetNextPrime) {
-  EXPECT_EQ(2, this->table_->GetNextPrime(0));
-  EXPECT_EQ(3, this->table_->GetNextPrime(2));
-  EXPECT_EQ(5, this->table_->GetNextPrime(3));
-  EXPECT_EQ(7, this->table_->GetNextPrime(5));
-  EXPECT_EQ(11, this->table_->GetNextPrime(7));
-  EXPECT_EQ(131, this->table_->GetNextPrime(128));
-}
-
-// That's it!  Google Test will repeat each TYPED_TEST for each type
-// in the type list specified in TYPED_TEST_SUITE.  Sit back and be
-// happy that you don't have to define them multiple times.
-
-using testing::Types;
-
-// Sometimes, however, you don't yet know all the types that you want
-// to test when you write the tests.  For example, if you are the
-// author of an interface and expect other people to implement it, you
-// might want to write a set of tests to make sure each implementation
-// conforms to some basic requirements, but you don't know what
-// implementations will be written in the future.
-//
-// How can you write the tests without committing to the type
-// parameters?  That's what "type-parameterized tests" can do for you.
-// It is a bit more involved than typed tests, but in return you get a
-// test pattern that can be reused in many contexts, which is a big
-// win.  Here's how you do it:
-
-// First, define a test fixture class template.  Here we just reuse
-// the PrimeTableTest fixture defined earlier:
-
-template <class T>
-class PrimeTableTest2 : public PrimeTableTest<T> {};
-
-// Then, declare the test case.  The argument is the name of the test
-// fixture, and also the name of the test case (as usual).  The _P
-// suffix is for "parameterized" or "pattern".
-TYPED_TEST_SUITE_P(PrimeTableTest2);
-
-// Next, use TYPED_TEST_P(TestCaseName, TestName) to define a test,
-// similar to what you do with TEST_F.
-TYPED_TEST_P(PrimeTableTest2, ReturnsFalseForNonPrimes) {
-  EXPECT_FALSE(this->table_->IsPrime(-5));
-  EXPECT_FALSE(this->table_->IsPrime(0));
-  EXPECT_FALSE(this->table_->IsPrime(1));
-  EXPECT_FALSE(this->table_->IsPrime(4));
-  EXPECT_FALSE(this->table_->IsPrime(6));
-  EXPECT_FALSE(this->table_->IsPrime(100));
-}
-
-TYPED_TEST_P(PrimeTableTest2, ReturnsTrueForPrimes) {
-  EXPECT_TRUE(this->table_->IsPrime(2));
-  EXPECT_TRUE(this->table_->IsPrime(3));
-  EXPECT_TRUE(this->table_->IsPrime(5));
-  EXPECT_TRUE(this->table_->IsPrime(7));
-  EXPECT_TRUE(this->table_->IsPrime(11));
-  EXPECT_TRUE(this->table_->IsPrime(131));
-}
-
-TYPED_TEST_P(PrimeTableTest2, CanGetNextPrime) {
-  EXPECT_EQ(2, this->table_->GetNextPrime(0));
-  EXPECT_EQ(3, this->table_->GetNextPrime(2));
-  EXPECT_EQ(5, this->table_->GetNextPrime(3));
-  EXPECT_EQ(7, this->table_->GetNextPrime(5));
-  EXPECT_EQ(11, this->table_->GetNextPrime(7));
-  EXPECT_EQ(131, this->table_->GetNextPrime(128));
-}
-
-// Type-parameterized tests involve one extra step: you have to
-// enumerate the tests you defined:
-REGISTER_TYPED_TEST_SUITE_P(
-    PrimeTableTest2,  // The first argument is the test case name.
-    // The rest of the arguments are the test names.
-    ReturnsFalseForNonPrimes, ReturnsTrueForPrimes, CanGetNextPrime);
-
-// At this point the test pattern is done.  However, you don't have
-// any real test yet as you haven't said which types you want to run
-// the tests with.
-
-// To turn the abstract test pattern into real tests, you instantiate
-// it with a list of types.  Usually the test pattern will be defined
-// in a .h file, and anyone can #include and instantiate it.  You can
-// even instantiate it more than once in the same program.  To tell
-// different instances apart, you give each of them a name, which will
-// become part of the test case name and can be used in test filters.
-
-// The list of types we want to test.  Note that it doesn't have to be
-// defined at the time we write the TYPED_TEST_P()s.
-typedef Types<OnTheFlyPrimeTable, PreCalculatedPrimeTable>
-    PrimeTableImplementations;
-INSTANTIATE_TYPED_TEST_SUITE_P(OnTheFlyAndPreCalculated,    // Instance name
-                               PrimeTableTest2,             // Test case name
-                               PrimeTableImplementations);  // Type list
-
-}  // namespace
diff --git a/3rdparty/googletest-1.13.0/googletest/samples/sample7_unittest.cc b/3rdparty/googletest-1.13.0/googletest/samples/sample7_unittest.cc
deleted file mode 100644
index 3ad22cab8d12414408d0cc936a07b68e398575dc..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/samples/sample7_unittest.cc
+++ /dev/null
@@ -1,113 +0,0 @@
-// Copyright 2008 Google Inc.
-// All Rights Reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// This sample shows how to test common properties of multiple
-// implementations of an interface (aka interface tests) using
-// value-parameterized tests. Each test in the test case has
-// a parameter that is an interface pointer to an implementation
-// tested.
-
-// The interface and its implementations are in this header.
-#include "prime_tables.h"
-#include "gtest/gtest.h"
-namespace {
-
-using ::testing::TestWithParam;
-using ::testing::Values;
-
-// As a general rule, to prevent a test from affecting the tests that come
-// after it, you should create and destroy the tested objects for each test
-// instead of reusing them.  In this sample we will define a simple factory
-// function for PrimeTable objects.  We will instantiate objects in test's
-// SetUp() method and delete them in TearDown() method.
-typedef PrimeTable* CreatePrimeTableFunc();
-
-PrimeTable* CreateOnTheFlyPrimeTable() { return new OnTheFlyPrimeTable(); }
-
-template <size_t max_precalculated>
-PrimeTable* CreatePreCalculatedPrimeTable() {
-  return new PreCalculatedPrimeTable(max_precalculated);
-}
-
-// Inside the test body, fixture constructor, SetUp(), and TearDown() you
-// can refer to the test parameter by GetParam().  In this case, the test
-// parameter is a factory function which we call in fixture's SetUp() to
-// create and store an instance of PrimeTable.
-class PrimeTableTestSmpl7 : public TestWithParam<CreatePrimeTableFunc*> {
- public:
-  ~PrimeTableTestSmpl7() override { delete table_; }
-  void SetUp() override { table_ = (*GetParam())(); }
-  void TearDown() override {
-    delete table_;
-    table_ = nullptr;
-  }
-
- protected:
-  PrimeTable* table_;
-};
-
-TEST_P(PrimeTableTestSmpl7, ReturnsFalseForNonPrimes) {
-  EXPECT_FALSE(table_->IsPrime(-5));
-  EXPECT_FALSE(table_->IsPrime(0));
-  EXPECT_FALSE(table_->IsPrime(1));
-  EXPECT_FALSE(table_->IsPrime(4));
-  EXPECT_FALSE(table_->IsPrime(6));
-  EXPECT_FALSE(table_->IsPrime(100));
-}
-
-TEST_P(PrimeTableTestSmpl7, ReturnsTrueForPrimes) {
-  EXPECT_TRUE(table_->IsPrime(2));
-  EXPECT_TRUE(table_->IsPrime(3));
-  EXPECT_TRUE(table_->IsPrime(5));
-  EXPECT_TRUE(table_->IsPrime(7));
-  EXPECT_TRUE(table_->IsPrime(11));
-  EXPECT_TRUE(table_->IsPrime(131));
-}
-
-TEST_P(PrimeTableTestSmpl7, CanGetNextPrime) {
-  EXPECT_EQ(2, table_->GetNextPrime(0));
-  EXPECT_EQ(3, table_->GetNextPrime(2));
-  EXPECT_EQ(5, table_->GetNextPrime(3));
-  EXPECT_EQ(7, table_->GetNextPrime(5));
-  EXPECT_EQ(11, table_->GetNextPrime(7));
-  EXPECT_EQ(131, table_->GetNextPrime(128));
-}
-
-// In order to run value-parameterized tests, you need to instantiate them,
-// or bind them to a list of values which will be used as test parameters.
-// You can instantiate them in a different translation module, or even
-// instantiate them several times.
-//
-// Here, we instantiate our tests with a list of two PrimeTable object
-// factory functions:
-INSTANTIATE_TEST_SUITE_P(OnTheFlyAndPreCalculated, PrimeTableTestSmpl7,
-                         Values(&CreateOnTheFlyPrimeTable,
-                                &CreatePreCalculatedPrimeTable<1000>));
-
-}  // namespace
diff --git a/3rdparty/googletest-1.13.0/googletest/samples/sample8_unittest.cc b/3rdparty/googletest-1.13.0/googletest/samples/sample8_unittest.cc
deleted file mode 100644
index 9717e28608e66c3c902f5cdcb083292576342d36..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/samples/sample8_unittest.cc
+++ /dev/null
@@ -1,152 +0,0 @@
-// Copyright 2008 Google Inc.
-// All Rights Reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// This sample shows how to test code relying on some global flag variables.
-// Combine() helps with generating all possible combinations of such flags,
-// and each test is given one combination as a parameter.
-
-// Use class definitions to test from this header.
-#include "prime_tables.h"
-#include "gtest/gtest.h"
-namespace {
-
-// Suppose we want to introduce a new, improved implementation of PrimeTable
-// which combines speed of PrecalcPrimeTable and versatility of
-// OnTheFlyPrimeTable (see prime_tables.h). Inside it instantiates both
-// PrecalcPrimeTable and OnTheFlyPrimeTable and uses the one that is more
-// appropriate under the circumstances. But in low memory conditions, it can be
-// told to instantiate without PrecalcPrimeTable instance at all and use only
-// OnTheFlyPrimeTable.
-class HybridPrimeTable : public PrimeTable {
- public:
-  HybridPrimeTable(bool force_on_the_fly, int max_precalculated)
-      : on_the_fly_impl_(new OnTheFlyPrimeTable),
-        precalc_impl_(force_on_the_fly
-                          ? nullptr
-                          : new PreCalculatedPrimeTable(max_precalculated)),
-        max_precalculated_(max_precalculated) {}
-  ~HybridPrimeTable() override {
-    delete on_the_fly_impl_;
-    delete precalc_impl_;
-  }
-
-  bool IsPrime(int n) const override {
-    if (precalc_impl_ != nullptr && n < max_precalculated_)
-      return precalc_impl_->IsPrime(n);
-    else
-      return on_the_fly_impl_->IsPrime(n);
-  }
-
-  int GetNextPrime(int p) const override {
-    int next_prime = -1;
-    if (precalc_impl_ != nullptr && p < max_precalculated_)
-      next_prime = precalc_impl_->GetNextPrime(p);
-
-    return next_prime != -1 ? next_prime : on_the_fly_impl_->GetNextPrime(p);
-  }
-
- private:
-  OnTheFlyPrimeTable* on_the_fly_impl_;
-  PreCalculatedPrimeTable* precalc_impl_;
-  int max_precalculated_;
-};
-
-using ::testing::Bool;
-using ::testing::Combine;
-using ::testing::TestWithParam;
-using ::testing::Values;
-
-// To test all code paths for HybridPrimeTable we must test it with numbers
-// both within and outside PreCalculatedPrimeTable's capacity and also with
-// PreCalculatedPrimeTable disabled. We do this by defining fixture which will
-// accept different combinations of parameters for instantiating a
-// HybridPrimeTable instance.
-class PrimeTableTest : public TestWithParam< ::std::tuple<bool, int> > {
- protected:
-  void SetUp() override {
-    bool force_on_the_fly;
-    int max_precalculated;
-    std::tie(force_on_the_fly, max_precalculated) = GetParam();
-    table_ = new HybridPrimeTable(force_on_the_fly, max_precalculated);
-  }
-  void TearDown() override {
-    delete table_;
-    table_ = nullptr;
-  }
-  HybridPrimeTable* table_;
-};
-
-TEST_P(PrimeTableTest, ReturnsFalseForNonPrimes) {
-  // Inside the test body, you can refer to the test parameter by GetParam().
-  // In this case, the test parameter is a PrimeTable interface pointer which
-  // we can use directly.
-  // Please note that you can also save it in the fixture's SetUp() method
-  // or constructor and use saved copy in the tests.
-
-  EXPECT_FALSE(table_->IsPrime(-5));
-  EXPECT_FALSE(table_->IsPrime(0));
-  EXPECT_FALSE(table_->IsPrime(1));
-  EXPECT_FALSE(table_->IsPrime(4));
-  EXPECT_FALSE(table_->IsPrime(6));
-  EXPECT_FALSE(table_->IsPrime(100));
-}
-
-TEST_P(PrimeTableTest, ReturnsTrueForPrimes) {
-  EXPECT_TRUE(table_->IsPrime(2));
-  EXPECT_TRUE(table_->IsPrime(3));
-  EXPECT_TRUE(table_->IsPrime(5));
-  EXPECT_TRUE(table_->IsPrime(7));
-  EXPECT_TRUE(table_->IsPrime(11));
-  EXPECT_TRUE(table_->IsPrime(131));
-}
-
-TEST_P(PrimeTableTest, CanGetNextPrime) {
-  EXPECT_EQ(2, table_->GetNextPrime(0));
-  EXPECT_EQ(3, table_->GetNextPrime(2));
-  EXPECT_EQ(5, table_->GetNextPrime(3));
-  EXPECT_EQ(7, table_->GetNextPrime(5));
-  EXPECT_EQ(11, table_->GetNextPrime(7));
-  EXPECT_EQ(131, table_->GetNextPrime(128));
-}
-
-// In order to run value-parameterized tests, you need to instantiate them,
-// or bind them to a list of values which will be used as test parameters.
-// You can instantiate them in a different translation module, or even
-// instantiate them several times.
-//
-// Here, we instantiate our tests with a list of parameters. We must combine
-// all variations of the boolean flag suppressing PrecalcPrimeTable and some
-// meaningful values for tests. We choose a small value (1), and a value that
-// will put some of the tested numbers beyond the capability of the
-// PrecalcPrimeTable instance and some inside it (10). Combine will produce all
-// possible combinations.
-INSTANTIATE_TEST_SUITE_P(MeaningfulTestParameters, PrimeTableTest,
-                         Combine(Bool(), Values(1, 10)));
-
-}  // namespace
diff --git a/3rdparty/googletest-1.13.0/googletest/samples/sample9_unittest.cc b/3rdparty/googletest-1.13.0/googletest/samples/sample9_unittest.cc
deleted file mode 100644
index d627ea7d5776e4a360d56e9b67329af5426b663e..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/samples/sample9_unittest.cc
+++ /dev/null
@@ -1,149 +0,0 @@
-// Copyright 2009 Google Inc. All Rights Reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// This sample shows how to use Google Test listener API to implement
-// an alternative console output and how to use the UnitTest reflection API
-// to enumerate test suites and tests and to inspect their results.
-
-#include <stdio.h>
-
-#include "gtest/gtest.h"
-
-using ::testing::EmptyTestEventListener;
-using ::testing::InitGoogleTest;
-using ::testing::Test;
-using ::testing::TestEventListeners;
-using ::testing::TestInfo;
-using ::testing::TestPartResult;
-using ::testing::TestSuite;
-using ::testing::UnitTest;
-namespace {
-// Provides alternative output mode which produces minimal amount of
-// information about tests.
-class TersePrinter : public EmptyTestEventListener {
- private:
-  // Called before any test activity starts.
-  void OnTestProgramStart(const UnitTest& /* unit_test */) override {}
-
-  // Called after all test activities have ended.
-  void OnTestProgramEnd(const UnitTest& unit_test) override {
-    fprintf(stdout, "TEST %s\n", unit_test.Passed() ? "PASSED" : "FAILED");
-    fflush(stdout);
-  }
-
-  // Called before a test starts.
-  void OnTestStart(const TestInfo& test_info) override {
-    fprintf(stdout, "*** Test %s.%s starting.\n", test_info.test_suite_name(),
-            test_info.name());
-    fflush(stdout);
-  }
-
-  // Called after a failed assertion or a SUCCEED() invocation.
-  void OnTestPartResult(const TestPartResult& test_part_result) override {
-    fprintf(stdout, "%s in %s:%d\n%s\n",
-            test_part_result.failed() ? "*** Failure" : "Success",
-            test_part_result.file_name(), test_part_result.line_number(),
-            test_part_result.summary());
-    fflush(stdout);
-  }
-
-  // Called after a test ends.
-  void OnTestEnd(const TestInfo& test_info) override {
-    fprintf(stdout, "*** Test %s.%s ending.\n", test_info.test_suite_name(),
-            test_info.name());
-    fflush(stdout);
-  }
-};  // class TersePrinter
-
-TEST(CustomOutputTest, PrintsMessage) {
-  printf("Printing something from the test body...\n");
-}
-
-TEST(CustomOutputTest, Succeeds) {
-  SUCCEED() << "SUCCEED() has been invoked from here";
-}
-
-TEST(CustomOutputTest, Fails) {
-  EXPECT_EQ(1, 2)
-      << "This test fails in order to demonstrate alternative failure messages";
-}
-}  // namespace
-
-int main(int argc, char** argv) {
-  InitGoogleTest(&argc, argv);
-
-  bool terse_output = false;
-  if (argc > 1 && strcmp(argv[1], "--terse_output") == 0)
-    terse_output = true;
-  else
-    printf("%s\n",
-           "Run this program with --terse_output to change the way "
-           "it prints its output.");
-
-  UnitTest& unit_test = *UnitTest::GetInstance();
-
-  // If we are given the --terse_output command line flag, suppresses the
-  // standard output and attaches own result printer.
-  if (terse_output) {
-    TestEventListeners& listeners = unit_test.listeners();
-
-    // Removes the default console output listener from the list so it will
-    // not receive events from Google Test and won't print any output. Since
-    // this operation transfers ownership of the listener to the caller we
-    // have to delete it as well.
-    delete listeners.Release(listeners.default_result_printer());
-
-    // Adds the custom output listener to the list. It will now receive
-    // events from Google Test and print the alternative output. We don't
-    // have to worry about deleting it since Google Test assumes ownership
-    // over it after adding it to the list.
-    listeners.Append(new TersePrinter);
-  }
-  int ret_val = RUN_ALL_TESTS();
-
-  // This is an example of using the UnitTest reflection API to inspect test
-  // results. Here we discount failures from the tests we expected to fail.
-  int unexpectedly_failed_tests = 0;
-  for (int i = 0; i < unit_test.total_test_suite_count(); ++i) {
-    const testing::TestSuite& test_suite = *unit_test.GetTestSuite(i);
-    for (int j = 0; j < test_suite.total_test_count(); ++j) {
-      const TestInfo& test_info = *test_suite.GetTestInfo(j);
-      // Counts failed tests that were not meant to fail (those without
-      // 'Fails' in the name).
-      if (test_info.result()->Failed() &&
-          strcmp(test_info.name(), "Fails") != 0) {
-        unexpectedly_failed_tests++;
-      }
-    }
-  }
-
-  // Test that were meant to fail should not affect the test program outcome.
-  if (unexpectedly_failed_tests == 0) ret_val = 0;
-
-  return ret_val;
-}
diff --git a/3rdparty/googletest-1.13.0/googletest/src/gtest-all.cc b/3rdparty/googletest-1.13.0/googletest/src/gtest-all.cc
deleted file mode 100644
index 2a70ed88c7841a58f0bd54e6e8cc5c14f47dce48..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/src/gtest-all.cc
+++ /dev/null
@@ -1,49 +0,0 @@
-// Copyright 2008, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-//
-// Google C++ Testing and Mocking Framework (Google Test)
-//
-// Sometimes it's desirable to build Google Test by compiling a single file.
-// This file serves this purpose.
-
-// This line ensures that gtest.h can be compiled on its own, even
-// when it's fused.
-#include "gtest/gtest.h"
-
-// The following lines pull in the real gtest *.cc files.
-#include "src/gtest-assertion-result.cc"
-#include "src/gtest-death-test.cc"
-#include "src/gtest-filepath.cc"
-#include "src/gtest-matchers.cc"
-#include "src/gtest-port.cc"
-#include "src/gtest-printers.cc"
-#include "src/gtest-test-part.cc"
-#include "src/gtest-typed-test.cc"
-#include "src/gtest.cc"
diff --git a/3rdparty/googletest-1.13.0/googletest/src/gtest-assertion-result.cc b/3rdparty/googletest-1.13.0/googletest/src/gtest-assertion-result.cc
deleted file mode 100644
index f1c0b10dc9e50124bfdb999f7778b6ad8791141c..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/src/gtest-assertion-result.cc
+++ /dev/null
@@ -1,77 +0,0 @@
-// Copyright 2005, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// The Google C++ Testing and Mocking Framework (Google Test)
-//
-// This file defines the AssertionResult type.
-
-#include "gtest/gtest-assertion-result.h"
-
-#include <string>
-#include <utility>
-
-#include "gtest/gtest-message.h"
-
-namespace testing {
-
-// AssertionResult constructors.
-// Used in EXPECT_TRUE/FALSE(assertion_result).
-AssertionResult::AssertionResult(const AssertionResult& other)
-    : success_(other.success_),
-      message_(other.message_.get() != nullptr
-                   ? new ::std::string(*other.message_)
-                   : static_cast< ::std::string*>(nullptr)) {}
-
-// Swaps two AssertionResults.
-void AssertionResult::swap(AssertionResult& other) {
-  using std::swap;
-  swap(success_, other.success_);
-  swap(message_, other.message_);
-}
-
-// Returns the assertion's negation. Used with EXPECT/ASSERT_FALSE.
-AssertionResult AssertionResult::operator!() const {
-  AssertionResult negation(!success_);
-  if (message_.get() != nullptr) negation << *message_;
-  return negation;
-}
-
-// Makes a successful assertion result.
-AssertionResult AssertionSuccess() { return AssertionResult(true); }
-
-// Makes a failed assertion result.
-AssertionResult AssertionFailure() { return AssertionResult(false); }
-
-// Makes a failed assertion result with the given failure message.
-// Deprecated; use AssertionFailure() << message.
-AssertionResult AssertionFailure(const Message& message) {
-  return AssertionFailure() << message;
-}
-
-}  // namespace testing
diff --git a/3rdparty/googletest-1.13.0/googletest/src/gtest-death-test.cc b/3rdparty/googletest-1.13.0/googletest/src/gtest-death-test.cc
deleted file mode 100644
index b6968a9c949db5f96bb2010e82881e68efa6abe6..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/src/gtest-death-test.cc
+++ /dev/null
@@ -1,1620 +0,0 @@
-// Copyright 2005, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-//
-// This file implements death tests.
-
-#include "gtest/gtest-death-test.h"
-
-#include <functional>
-#include <utility>
-
-#include "gtest/internal/custom/gtest.h"
-#include "gtest/internal/gtest-port.h"
-
-#if GTEST_HAS_DEATH_TEST
-
-#if GTEST_OS_MAC
-#include <crt_externs.h>
-#endif  // GTEST_OS_MAC
-
-#include <errno.h>
-#include <fcntl.h>
-#include <limits.h>
-
-#if GTEST_OS_LINUX
-#include <signal.h>
-#endif  // GTEST_OS_LINUX
-
-#include <stdarg.h>
-
-#if GTEST_OS_WINDOWS
-#include <windows.h>
-#else
-#include <sys/mman.h>
-#include <sys/wait.h>
-#endif  // GTEST_OS_WINDOWS
-
-#if GTEST_OS_QNX
-#include <spawn.h>
-#endif  // GTEST_OS_QNX
-
-#if GTEST_OS_FUCHSIA
-#include <lib/fdio/fd.h>
-#include <lib/fdio/io.h>
-#include <lib/fdio/spawn.h>
-#include <lib/zx/channel.h>
-#include <lib/zx/port.h>
-#include <lib/zx/process.h>
-#include <lib/zx/socket.h>
-#include <zircon/processargs.h>
-#include <zircon/syscalls.h>
-#include <zircon/syscalls/policy.h>
-#include <zircon/syscalls/port.h>
-#endif  // GTEST_OS_FUCHSIA
-
-#endif  // GTEST_HAS_DEATH_TEST
-
-#include "gtest/gtest-message.h"
-#include "gtest/internal/gtest-string.h"
-#include "src/gtest-internal-inl.h"
-
-namespace testing {
-
-// Constants.
-
-// The default death test style.
-//
-// This is defined in internal/gtest-port.h as "fast", but can be overridden by
-// a definition in internal/custom/gtest-port.h. The recommended value, which is
-// used internally at Google, is "threadsafe".
-static const char kDefaultDeathTestStyle[] = GTEST_DEFAULT_DEATH_TEST_STYLE;
-
-}  // namespace testing
-
-GTEST_DEFINE_string_(
-    death_test_style,
-    testing::internal::StringFromGTestEnv("death_test_style",
-                                          testing::kDefaultDeathTestStyle),
-    "Indicates how to run a death test in a forked child process: "
-    "\"threadsafe\" (child process re-executes the test binary "
-    "from the beginning, running only the specific death test) or "
-    "\"fast\" (child process runs the death test immediately "
-    "after forking).");
-
-GTEST_DEFINE_bool_(
-    death_test_use_fork,
-    testing::internal::BoolFromGTestEnv("death_test_use_fork", false),
-    "Instructs to use fork()/_exit() instead of clone() in death tests. "
-    "Ignored and always uses fork() on POSIX systems where clone() is not "
-    "implemented. Useful when running under valgrind or similar tools if "
-    "those do not support clone(). Valgrind 3.3.1 will just fail if "
-    "it sees an unsupported combination of clone() flags. "
-    "It is not recommended to use this flag w/o valgrind though it will "
-    "work in 99% of the cases. Once valgrind is fixed, this flag will "
-    "most likely be removed.");
-
-GTEST_DEFINE_string_(
-    internal_run_death_test, "",
-    "Indicates the file, line number, temporal index of "
-    "the single death test to run, and a file descriptor to "
-    "which a success code may be sent, all separated by "
-    "the '|' characters.  This flag is specified if and only if the "
-    "current process is a sub-process launched for running a thread-safe "
-    "death test.  FOR INTERNAL USE ONLY.");
-
-namespace testing {
-
-#if GTEST_HAS_DEATH_TEST
-
-namespace internal {
-
-// Valid only for fast death tests. Indicates the code is running in the
-// child process of a fast style death test.
-#if !GTEST_OS_WINDOWS && !GTEST_OS_FUCHSIA
-static bool g_in_fast_death_test_child = false;
-#endif
-
-// Returns a Boolean value indicating whether the caller is currently
-// executing in the context of the death test child process.  Tools such as
-// Valgrind heap checkers may need this to modify their behavior in death
-// tests.  IMPORTANT: This is an internal utility.  Using it may break the
-// implementation of death tests.  User code MUST NOT use it.
-bool InDeathTestChild() {
-#if GTEST_OS_WINDOWS || GTEST_OS_FUCHSIA
-
-  // On Windows and Fuchsia, death tests are thread-safe regardless of the value
-  // of the death_test_style flag.
-  return !GTEST_FLAG_GET(internal_run_death_test).empty();
-
-#else
-
-  if (GTEST_FLAG_GET(death_test_style) == "threadsafe")
-    return !GTEST_FLAG_GET(internal_run_death_test).empty();
-  else
-    return g_in_fast_death_test_child;
-#endif
-}
-
-}  // namespace internal
-
-// ExitedWithCode constructor.
-ExitedWithCode::ExitedWithCode(int exit_code) : exit_code_(exit_code) {}
-
-// ExitedWithCode function-call operator.
-bool ExitedWithCode::operator()(int exit_status) const {
-#if GTEST_OS_WINDOWS || GTEST_OS_FUCHSIA
-
-  return exit_status == exit_code_;
-
-#else
-
-  return WIFEXITED(exit_status) && WEXITSTATUS(exit_status) == exit_code_;
-
-#endif  // GTEST_OS_WINDOWS || GTEST_OS_FUCHSIA
-}
-
-#if !GTEST_OS_WINDOWS && !GTEST_OS_FUCHSIA
-// KilledBySignal constructor.
-KilledBySignal::KilledBySignal(int signum) : signum_(signum) {}
-
-// KilledBySignal function-call operator.
-bool KilledBySignal::operator()(int exit_status) const {
-#if defined(GTEST_KILLED_BY_SIGNAL_OVERRIDE_)
-  {
-    bool result;
-    if (GTEST_KILLED_BY_SIGNAL_OVERRIDE_(signum_, exit_status, &result)) {
-      return result;
-    }
-  }
-#endif  // defined(GTEST_KILLED_BY_SIGNAL_OVERRIDE_)
-  return WIFSIGNALED(exit_status) && WTERMSIG(exit_status) == signum_;
-}
-#endif  // !GTEST_OS_WINDOWS && !GTEST_OS_FUCHSIA
-
-namespace internal {
-
-// Utilities needed for death tests.
-
-// Generates a textual description of a given exit code, in the format
-// specified by wait(2).
-static std::string ExitSummary(int exit_code) {
-  Message m;
-
-#if GTEST_OS_WINDOWS || GTEST_OS_FUCHSIA
-
-  m << "Exited with exit status " << exit_code;
-
-#else
-
-  if (WIFEXITED(exit_code)) {
-    m << "Exited with exit status " << WEXITSTATUS(exit_code);
-  } else if (WIFSIGNALED(exit_code)) {
-    m << "Terminated by signal " << WTERMSIG(exit_code);
-  }
-#ifdef WCOREDUMP
-  if (WCOREDUMP(exit_code)) {
-    m << " (core dumped)";
-  }
-#endif
-#endif  // GTEST_OS_WINDOWS || GTEST_OS_FUCHSIA
-
-  return m.GetString();
-}
-
-// Returns true if exit_status describes a process that was terminated
-// by a signal, or exited normally with a nonzero exit code.
-bool ExitedUnsuccessfully(int exit_status) {
-  return !ExitedWithCode(0)(exit_status);
-}
-
-#if !GTEST_OS_WINDOWS && !GTEST_OS_FUCHSIA
-// Generates a textual failure message when a death test finds more than
-// one thread running, or cannot determine the number of threads, prior
-// to executing the given statement.  It is the responsibility of the
-// caller not to pass a thread_count of 1.
-static std::string DeathTestThreadWarning(size_t thread_count) {
-  Message msg;
-  msg << "Death tests use fork(), which is unsafe particularly"
-      << " in a threaded context. For this test, " << GTEST_NAME_ << " ";
-  if (thread_count == 0) {
-    msg << "couldn't detect the number of threads.";
-  } else {
-    msg << "detected " << thread_count << " threads.";
-  }
-  msg << " See "
-         "https://github.com/google/googletest/blob/main/docs/"
-         "advanced.md#death-tests-and-threads"
-      << " for more explanation and suggested solutions, especially if"
-      << " this is the last message you see before your test times out.";
-  return msg.GetString();
-}
-#endif  // !GTEST_OS_WINDOWS && !GTEST_OS_FUCHSIA
-
-// Flag characters for reporting a death test that did not die.
-static const char kDeathTestLived = 'L';
-static const char kDeathTestReturned = 'R';
-static const char kDeathTestThrew = 'T';
-static const char kDeathTestInternalError = 'I';
-
-#if GTEST_OS_FUCHSIA
-
-// File descriptor used for the pipe in the child process.
-static const int kFuchsiaReadPipeFd = 3;
-
-#endif
-
-// An enumeration describing all of the possible ways that a death test can
-// conclude.  DIED means that the process died while executing the test
-// code; LIVED means that process lived beyond the end of the test code;
-// RETURNED means that the test statement attempted to execute a return
-// statement, which is not allowed; THREW means that the test statement
-// returned control by throwing an exception.  IN_PROGRESS means the test
-// has not yet concluded.
-enum DeathTestOutcome { IN_PROGRESS, DIED, LIVED, RETURNED, THREW };
-
-// Routine for aborting the program which is safe to call from an
-// exec-style death test child process, in which case the error
-// message is propagated back to the parent process.  Otherwise, the
-// message is simply printed to stderr.  In either case, the program
-// then exits with status 1.
-[[noreturn]] static void DeathTestAbort(const std::string& message) {
-  // On a POSIX system, this function may be called from a threadsafe-style
-  // death test child process, which operates on a very small stack.  Use
-  // the heap for any additional non-minuscule memory requirements.
-  const InternalRunDeathTestFlag* const flag =
-      GetUnitTestImpl()->internal_run_death_test_flag();
-  if (flag != nullptr) {
-    FILE* parent = posix::FDOpen(flag->write_fd(), "w");
-    fputc(kDeathTestInternalError, parent);
-    fprintf(parent, "%s", message.c_str());
-    fflush(parent);
-    _exit(1);
-  } else {
-    fprintf(stderr, "%s", message.c_str());
-    fflush(stderr);
-    posix::Abort();
-  }
-}
-
-// A replacement for CHECK that calls DeathTestAbort if the assertion
-// fails.
-#define GTEST_DEATH_TEST_CHECK_(expression)                              \
-  do {                                                                   \
-    if (!::testing::internal::IsTrue(expression)) {                      \
-      DeathTestAbort(::std::string("CHECK failed: File ") + __FILE__ +   \
-                     ", line " +                                         \
-                     ::testing::internal::StreamableToString(__LINE__) + \
-                     ": " + #expression);                                \
-    }                                                                    \
-  } while (::testing::internal::AlwaysFalse())
-
-// This macro is similar to GTEST_DEATH_TEST_CHECK_, but it is meant for
-// evaluating any system call that fulfills two conditions: it must return
-// -1 on failure, and set errno to EINTR when it is interrupted and
-// should be tried again.  The macro expands to a loop that repeatedly
-// evaluates the expression as long as it evaluates to -1 and sets
-// errno to EINTR.  If the expression evaluates to -1 but errno is
-// something other than EINTR, DeathTestAbort is called.
-#define GTEST_DEATH_TEST_CHECK_SYSCALL_(expression)                      \
-  do {                                                                   \
-    int gtest_retval;                                                    \
-    do {                                                                 \
-      gtest_retval = (expression);                                       \
-    } while (gtest_retval == -1 && errno == EINTR);                      \
-    if (gtest_retval == -1) {                                            \
-      DeathTestAbort(::std::string("CHECK failed: File ") + __FILE__ +   \
-                     ", line " +                                         \
-                     ::testing::internal::StreamableToString(__LINE__) + \
-                     ": " + #expression + " != -1");                     \
-    }                                                                    \
-  } while (::testing::internal::AlwaysFalse())
-
-// Returns the message describing the last system error in errno.
-std::string GetLastErrnoDescription() {
-  return errno == 0 ? "" : posix::StrError(errno);
-}
-
-// This is called from a death test parent process to read a failure
-// message from the death test child process and log it with the FATAL
-// severity. On Windows, the message is read from a pipe handle. On other
-// platforms, it is read from a file descriptor.
-static void FailFromInternalError(int fd) {
-  Message error;
-  char buffer[256];
-  int num_read;
-
-  do {
-    while ((num_read = posix::Read(fd, buffer, 255)) > 0) {
-      buffer[num_read] = '\0';
-      error << buffer;
-    }
-  } while (num_read == -1 && errno == EINTR);
-
-  if (num_read == 0) {
-    GTEST_LOG_(FATAL) << error.GetString();
-  } else {
-    const int last_error = errno;
-    GTEST_LOG_(FATAL) << "Error while reading death test internal: "
-                      << GetLastErrnoDescription() << " [" << last_error << "]";
-  }
-}
-
-// Death test constructor.  Increments the running death test count
-// for the current test.
-DeathTest::DeathTest() {
-  TestInfo* const info = GetUnitTestImpl()->current_test_info();
-  if (info == nullptr) {
-    DeathTestAbort(
-        "Cannot run a death test outside of a TEST or "
-        "TEST_F construct");
-  }
-}
-
-// Creates and returns a death test by dispatching to the current
-// death test factory.
-bool DeathTest::Create(const char* statement,
-                       Matcher<const std::string&> matcher, const char* file,
-                       int line, DeathTest** test) {
-  return GetUnitTestImpl()->death_test_factory()->Create(
-      statement, std::move(matcher), file, line, test);
-}
-
-const char* DeathTest::LastMessage() {
-  return last_death_test_message_.c_str();
-}
-
-void DeathTest::set_last_death_test_message(const std::string& message) {
-  last_death_test_message_ = message;
-}
-
-std::string DeathTest::last_death_test_message_;
-
-// Provides cross platform implementation for some death functionality.
-class DeathTestImpl : public DeathTest {
- protected:
-  DeathTestImpl(const char* a_statement, Matcher<const std::string&> matcher)
-      : statement_(a_statement),
-        matcher_(std::move(matcher)),
-        spawned_(false),
-        status_(-1),
-        outcome_(IN_PROGRESS),
-        read_fd_(-1),
-        write_fd_(-1) {}
-
-  // read_fd_ is expected to be closed and cleared by a derived class.
-  ~DeathTestImpl() override { GTEST_DEATH_TEST_CHECK_(read_fd_ == -1); }
-
-  void Abort(AbortReason reason) override;
-  bool Passed(bool status_ok) override;
-
-  const char* statement() const { return statement_; }
-  bool spawned() const { return spawned_; }
-  void set_spawned(bool is_spawned) { spawned_ = is_spawned; }
-  int status() const { return status_; }
-  void set_status(int a_status) { status_ = a_status; }
-  DeathTestOutcome outcome() const { return outcome_; }
-  void set_outcome(DeathTestOutcome an_outcome) { outcome_ = an_outcome; }
-  int read_fd() const { return read_fd_; }
-  void set_read_fd(int fd) { read_fd_ = fd; }
-  int write_fd() const { return write_fd_; }
-  void set_write_fd(int fd) { write_fd_ = fd; }
-
-  // Called in the parent process only. Reads the result code of the death
-  // test child process via a pipe, interprets it to set the outcome_
-  // member, and closes read_fd_.  Outputs diagnostics and terminates in
-  // case of unexpected codes.
-  void ReadAndInterpretStatusByte();
-
-  // Returns stderr output from the child process.
-  virtual std::string GetErrorLogs();
-
- private:
-  // The textual content of the code this object is testing.  This class
-  // doesn't own this string and should not attempt to delete it.
-  const char* const statement_;
-  // A matcher that's expected to match the stderr output by the child process.
-  Matcher<const std::string&> matcher_;
-  // True if the death test child process has been successfully spawned.
-  bool spawned_;
-  // The exit status of the child process.
-  int status_;
-  // How the death test concluded.
-  DeathTestOutcome outcome_;
-  // Descriptor to the read end of the pipe to the child process.  It is
-  // always -1 in the child process.  The child keeps its write end of the
-  // pipe in write_fd_.
-  int read_fd_;
-  // Descriptor to the child's write end of the pipe to the parent process.
-  // It is always -1 in the parent process.  The parent keeps its end of the
-  // pipe in read_fd_.
-  int write_fd_;
-};
-
-// Called in the parent process only. Reads the result code of the death
-// test child process via a pipe, interprets it to set the outcome_
-// member, and closes read_fd_.  Outputs diagnostics and terminates in
-// case of unexpected codes.
-void DeathTestImpl::ReadAndInterpretStatusByte() {
-  char flag;
-  int bytes_read;
-
-  // The read() here blocks until data is available (signifying the
-  // failure of the death test) or until the pipe is closed (signifying
-  // its success), so it's okay to call this in the parent before
-  // the child process has exited.
-  do {
-    bytes_read = posix::Read(read_fd(), &flag, 1);
-  } while (bytes_read == -1 && errno == EINTR);
-
-  if (bytes_read == 0) {
-    set_outcome(DIED);
-  } else if (bytes_read == 1) {
-    switch (flag) {
-      case kDeathTestReturned:
-        set_outcome(RETURNED);
-        break;
-      case kDeathTestThrew:
-        set_outcome(THREW);
-        break;
-      case kDeathTestLived:
-        set_outcome(LIVED);
-        break;
-      case kDeathTestInternalError:
-        FailFromInternalError(read_fd());  // Does not return.
-        break;
-      default:
-        GTEST_LOG_(FATAL) << "Death test child process reported "
-                          << "unexpected status byte ("
-                          << static_cast<unsigned int>(flag) << ")";
-    }
-  } else {
-    GTEST_LOG_(FATAL) << "Read from death test child process failed: "
-                      << GetLastErrnoDescription();
-  }
-  GTEST_DEATH_TEST_CHECK_SYSCALL_(posix::Close(read_fd()));
-  set_read_fd(-1);
-}
-
-std::string DeathTestImpl::GetErrorLogs() { return GetCapturedStderr(); }
-
-// Signals that the death test code which should have exited, didn't.
-// Should be called only in a death test child process.
-// Writes a status byte to the child's status file descriptor, then
-// calls _exit(1).
-void DeathTestImpl::Abort(AbortReason reason) {
-  // The parent process considers the death test to be a failure if
-  // it finds any data in our pipe.  So, here we write a single flag byte
-  // to the pipe, then exit.
-  const char status_ch = reason == TEST_DID_NOT_DIE       ? kDeathTestLived
-                         : reason == TEST_THREW_EXCEPTION ? kDeathTestThrew
-                                                          : kDeathTestReturned;
-
-  GTEST_DEATH_TEST_CHECK_SYSCALL_(posix::Write(write_fd(), &status_ch, 1));
-  // We are leaking the descriptor here because on some platforms (i.e.,
-  // when built as Windows DLL), destructors of global objects will still
-  // run after calling _exit(). On such systems, write_fd_ will be
-  // indirectly closed from the destructor of UnitTestImpl, causing double
-  // close if it is also closed here. On debug configurations, double close
-  // may assert. As there are no in-process buffers to flush here, we are
-  // relying on the OS to close the descriptor after the process terminates
-  // when the destructors are not run.
-  _exit(1);  // Exits w/o any normal exit hooks (we were supposed to crash)
-}
-
-// Returns an indented copy of stderr output for a death test.
-// This makes distinguishing death test output lines from regular log lines
-// much easier.
-static ::std::string FormatDeathTestOutput(const ::std::string& output) {
-  ::std::string ret;
-  for (size_t at = 0;;) {
-    const size_t line_end = output.find('\n', at);
-    ret += "[  DEATH   ] ";
-    if (line_end == ::std::string::npos) {
-      ret += output.substr(at);
-      break;
-    }
-    ret += output.substr(at, line_end + 1 - at);
-    at = line_end + 1;
-  }
-  return ret;
-}
-
-// Assesses the success or failure of a death test, using both private
-// members which have previously been set, and one argument:
-//
-// Private data members:
-//   outcome:  An enumeration describing how the death test
-//             concluded: DIED, LIVED, THREW, or RETURNED.  The death test
-//             fails in the latter three cases.
-//   status:   The exit status of the child process. On *nix, it is in the
-//             in the format specified by wait(2). On Windows, this is the
-//             value supplied to the ExitProcess() API or a numeric code
-//             of the exception that terminated the program.
-//   matcher_: A matcher that's expected to match the stderr output by the child
-//             process.
-//
-// Argument:
-//   status_ok: true if exit_status is acceptable in the context of
-//              this particular death test, which fails if it is false
-//
-// Returns true if and only if all of the above conditions are met.  Otherwise,
-// the first failing condition, in the order given above, is the one that is
-// reported. Also sets the last death test message string.
-bool DeathTestImpl::Passed(bool status_ok) {
-  if (!spawned()) return false;
-
-  const std::string error_message = GetErrorLogs();
-
-  bool success = false;
-  Message buffer;
-
-  buffer << "Death test: " << statement() << "\n";
-  switch (outcome()) {
-    case LIVED:
-      buffer << "    Result: failed to die.\n"
-             << " Error msg:\n"
-             << FormatDeathTestOutput(error_message);
-      break;
-    case THREW:
-      buffer << "    Result: threw an exception.\n"
-             << " Error msg:\n"
-             << FormatDeathTestOutput(error_message);
-      break;
-    case RETURNED:
-      buffer << "    Result: illegal return in test statement.\n"
-             << " Error msg:\n"
-             << FormatDeathTestOutput(error_message);
-      break;
-    case DIED:
-      if (status_ok) {
-        if (matcher_.Matches(error_message)) {
-          success = true;
-        } else {
-          std::ostringstream stream;
-          matcher_.DescribeTo(&stream);
-          buffer << "    Result: died but not with expected error.\n"
-                 << "  Expected: " << stream.str() << "\n"
-                 << "Actual msg:\n"
-                 << FormatDeathTestOutput(error_message);
-        }
-      } else {
-        buffer << "    Result: died but not with expected exit code:\n"
-               << "            " << ExitSummary(status()) << "\n"
-               << "Actual msg:\n"
-               << FormatDeathTestOutput(error_message);
-      }
-      break;
-    case IN_PROGRESS:
-    default:
-      GTEST_LOG_(FATAL)
-          << "DeathTest::Passed somehow called before conclusion of test";
-  }
-
-  DeathTest::set_last_death_test_message(buffer.GetString());
-  return success;
-}
-
-#if GTEST_OS_WINDOWS
-// WindowsDeathTest implements death tests on Windows. Due to the
-// specifics of starting new processes on Windows, death tests there are
-// always threadsafe, and Google Test considers the
-// --gtest_death_test_style=fast setting to be equivalent to
-// --gtest_death_test_style=threadsafe there.
-//
-// A few implementation notes:  Like the Linux version, the Windows
-// implementation uses pipes for child-to-parent communication. But due to
-// the specifics of pipes on Windows, some extra steps are required:
-//
-// 1. The parent creates a communication pipe and stores handles to both
-//    ends of it.
-// 2. The parent starts the child and provides it with the information
-//    necessary to acquire the handle to the write end of the pipe.
-// 3. The child acquires the write end of the pipe and signals the parent
-//    using a Windows event.
-// 4. Now the parent can release the write end of the pipe on its side. If
-//    this is done before step 3, the object's reference count goes down to
-//    0 and it is destroyed, preventing the child from acquiring it. The
-//    parent now has to release it, or read operations on the read end of
-//    the pipe will not return when the child terminates.
-// 5. The parent reads child's output through the pipe (outcome code and
-//    any possible error messages) from the pipe, and its stderr and then
-//    determines whether to fail the test.
-//
-// Note: to distinguish Win32 API calls from the local method and function
-// calls, the former are explicitly resolved in the global namespace.
-//
-class WindowsDeathTest : public DeathTestImpl {
- public:
-  WindowsDeathTest(const char* a_statement, Matcher<const std::string&> matcher,
-                   const char* file, int line)
-      : DeathTestImpl(a_statement, std::move(matcher)),
-        file_(file),
-        line_(line) {}
-
-  // All of these virtual functions are inherited from DeathTest.
-  virtual int Wait();
-  virtual TestRole AssumeRole();
-
- private:
-  // The name of the file in which the death test is located.
-  const char* const file_;
-  // The line number on which the death test is located.
-  const int line_;
-  // Handle to the write end of the pipe to the child process.
-  AutoHandle write_handle_;
-  // Child process handle.
-  AutoHandle child_handle_;
-  // Event the child process uses to signal the parent that it has
-  // acquired the handle to the write end of the pipe. After seeing this
-  // event the parent can release its own handles to make sure its
-  // ReadFile() calls return when the child terminates.
-  AutoHandle event_handle_;
-};
-
-// Waits for the child in a death test to exit, returning its exit
-// status, or 0 if no child process exists.  As a side effect, sets the
-// outcome data member.
-int WindowsDeathTest::Wait() {
-  if (!spawned()) return 0;
-
-  // Wait until the child either signals that it has acquired the write end
-  // of the pipe or it dies.
-  const HANDLE wait_handles[2] = {child_handle_.Get(), event_handle_.Get()};
-  switch (::WaitForMultipleObjects(2, wait_handles,
-                                   FALSE,  // Waits for any of the handles.
-                                   INFINITE)) {
-    case WAIT_OBJECT_0:
-    case WAIT_OBJECT_0 + 1:
-      break;
-    default:
-      GTEST_DEATH_TEST_CHECK_(false);  // Should not get here.
-  }
-
-  // The child has acquired the write end of the pipe or exited.
-  // We release the handle on our side and continue.
-  write_handle_.Reset();
-  event_handle_.Reset();
-
-  ReadAndInterpretStatusByte();
-
-  // Waits for the child process to exit if it haven't already. This
-  // returns immediately if the child has already exited, regardless of
-  // whether previous calls to WaitForMultipleObjects synchronized on this
-  // handle or not.
-  GTEST_DEATH_TEST_CHECK_(WAIT_OBJECT_0 ==
-                          ::WaitForSingleObject(child_handle_.Get(), INFINITE));
-  DWORD status_code;
-  GTEST_DEATH_TEST_CHECK_(
-      ::GetExitCodeProcess(child_handle_.Get(), &status_code) != FALSE);
-  child_handle_.Reset();
-  set_status(static_cast<int>(status_code));
-  return status();
-}
-
-// The AssumeRole process for a Windows death test.  It creates a child
-// process with the same executable as the current process to run the
-// death test.  The child process is given the --gtest_filter and
-// --gtest_internal_run_death_test flags such that it knows to run the
-// current death test only.
-DeathTest::TestRole WindowsDeathTest::AssumeRole() {
-  const UnitTestImpl* const impl = GetUnitTestImpl();
-  const InternalRunDeathTestFlag* const flag =
-      impl->internal_run_death_test_flag();
-  const TestInfo* const info = impl->current_test_info();
-  const int death_test_index = info->result()->death_test_count();
-
-  if (flag != nullptr) {
-    // ParseInternalRunDeathTestFlag() has performed all the necessary
-    // processing.
-    set_write_fd(flag->write_fd());
-    return EXECUTE_TEST;
-  }
-
-  // WindowsDeathTest uses an anonymous pipe to communicate results of
-  // a death test.
-  SECURITY_ATTRIBUTES handles_are_inheritable = {sizeof(SECURITY_ATTRIBUTES),
-                                                 nullptr, TRUE};
-  HANDLE read_handle, write_handle;
-  GTEST_DEATH_TEST_CHECK_(::CreatePipe(&read_handle, &write_handle,
-                                       &handles_are_inheritable,
-                                       0)  // Default buffer size.
-                          != FALSE);
-  set_read_fd(
-      ::_open_osfhandle(reinterpret_cast<intptr_t>(read_handle), O_RDONLY));
-  write_handle_.Reset(write_handle);
-  event_handle_.Reset(::CreateEvent(
-      &handles_are_inheritable,
-      TRUE,       // The event will automatically reset to non-signaled state.
-      FALSE,      // The initial state is non-signalled.
-      nullptr));  // The even is unnamed.
-  GTEST_DEATH_TEST_CHECK_(event_handle_.Get() != nullptr);
-  const std::string filter_flag = std::string("--") + GTEST_FLAG_PREFIX_ +
-                                  "filter=" + info->test_suite_name() + "." +
-                                  info->name();
-  const std::string internal_flag =
-      std::string("--") + GTEST_FLAG_PREFIX_ +
-      "internal_run_death_test=" + file_ + "|" + StreamableToString(line_) +
-      "|" + StreamableToString(death_test_index) + "|" +
-      StreamableToString(static_cast<unsigned int>(::GetCurrentProcessId())) +
-      // size_t has the same width as pointers on both 32-bit and 64-bit
-      // Windows platforms.
-      // See http://msdn.microsoft.com/en-us/library/tcxf1dw6.aspx.
-      "|" + StreamableToString(reinterpret_cast<size_t>(write_handle)) + "|" +
-      StreamableToString(reinterpret_cast<size_t>(event_handle_.Get()));
-
-  char executable_path[_MAX_PATH + 1];  // NOLINT
-  GTEST_DEATH_TEST_CHECK_(_MAX_PATH + 1 != ::GetModuleFileNameA(nullptr,
-                                                                executable_path,
-                                                                _MAX_PATH));
-
-  std::string command_line = std::string(::GetCommandLineA()) + " " +
-                             filter_flag + " \"" + internal_flag + "\"";
-
-  DeathTest::set_last_death_test_message("");
-
-  CaptureStderr();
-  // Flush the log buffers since the log streams are shared with the child.
-  FlushInfoLog();
-
-  // The child process will share the standard handles with the parent.
-  STARTUPINFOA startup_info;
-  memset(&startup_info, 0, sizeof(STARTUPINFO));
-  startup_info.dwFlags = STARTF_USESTDHANDLES;
-  startup_info.hStdInput = ::GetStdHandle(STD_INPUT_HANDLE);
-  startup_info.hStdOutput = ::GetStdHandle(STD_OUTPUT_HANDLE);
-  startup_info.hStdError = ::GetStdHandle(STD_ERROR_HANDLE);
-
-  PROCESS_INFORMATION process_info;
-  GTEST_DEATH_TEST_CHECK_(
-      ::CreateProcessA(
-          executable_path, const_cast<char*>(command_line.c_str()),
-          nullptr,  // Returned process handle is not inheritable.
-          nullptr,  // Returned thread handle is not inheritable.
-          TRUE,  // Child inherits all inheritable handles (for write_handle_).
-          0x0,   // Default creation flags.
-          nullptr,  // Inherit the parent's environment.
-          UnitTest::GetInstance()->original_working_dir(), &startup_info,
-          &process_info) != FALSE);
-  child_handle_.Reset(process_info.hProcess);
-  ::CloseHandle(process_info.hThread);
-  set_spawned(true);
-  return OVERSEE_TEST;
-}
-
-#elif GTEST_OS_FUCHSIA
-
-class FuchsiaDeathTest : public DeathTestImpl {
- public:
-  FuchsiaDeathTest(const char* a_statement, Matcher<const std::string&> matcher,
-                   const char* file, int line)
-      : DeathTestImpl(a_statement, std::move(matcher)),
-        file_(file),
-        line_(line) {}
-
-  // All of these virtual functions are inherited from DeathTest.
-  int Wait() override;
-  TestRole AssumeRole() override;
-  std::string GetErrorLogs() override;
-
- private:
-  // The name of the file in which the death test is located.
-  const char* const file_;
-  // The line number on which the death test is located.
-  const int line_;
-  // The stderr data captured by the child process.
-  std::string captured_stderr_;
-
-  zx::process child_process_;
-  zx::channel exception_channel_;
-  zx::socket stderr_socket_;
-};
-
-// Utility class for accumulating command-line arguments.
-class Arguments {
- public:
-  Arguments() { args_.push_back(nullptr); }
-
-  ~Arguments() {
-    for (std::vector<char*>::iterator i = args_.begin(); i != args_.end();
-         ++i) {
-      free(*i);
-    }
-  }
-  void AddArgument(const char* argument) {
-    args_.insert(args_.end() - 1, posix::StrDup(argument));
-  }
-
-  template <typename Str>
-  void AddArguments(const ::std::vector<Str>& arguments) {
-    for (typename ::std::vector<Str>::const_iterator i = arguments.begin();
-         i != arguments.end(); ++i) {
-      args_.insert(args_.end() - 1, posix::StrDup(i->c_str()));
-    }
-  }
-  char* const* Argv() { return &args_[0]; }
-
-  int size() { return static_cast<int>(args_.size()) - 1; }
-
- private:
-  std::vector<char*> args_;
-};
-
-// Waits for the child in a death test to exit, returning its exit
-// status, or 0 if no child process exists.  As a side effect, sets the
-// outcome data member.
-int FuchsiaDeathTest::Wait() {
-  const int kProcessKey = 0;
-  const int kSocketKey = 1;
-  const int kExceptionKey = 2;
-
-  if (!spawned()) return 0;
-
-  // Create a port to wait for socket/task/exception events.
-  zx_status_t status_zx;
-  zx::port port;
-  status_zx = zx::port::create(0, &port);
-  GTEST_DEATH_TEST_CHECK_(status_zx == ZX_OK);
-
-  // Register to wait for the child process to terminate.
-  status_zx =
-      child_process_.wait_async(port, kProcessKey, ZX_PROCESS_TERMINATED, 0);
-  GTEST_DEATH_TEST_CHECK_(status_zx == ZX_OK);
-
-  // Register to wait for the socket to be readable or closed.
-  status_zx = stderr_socket_.wait_async(
-      port, kSocketKey, ZX_SOCKET_READABLE | ZX_SOCKET_PEER_CLOSED, 0);
-  GTEST_DEATH_TEST_CHECK_(status_zx == ZX_OK);
-
-  // Register to wait for an exception.
-  status_zx = exception_channel_.wait_async(port, kExceptionKey,
-                                            ZX_CHANNEL_READABLE, 0);
-  GTEST_DEATH_TEST_CHECK_(status_zx == ZX_OK);
-
-  bool process_terminated = false;
-  bool socket_closed = false;
-  do {
-    zx_port_packet_t packet = {};
-    status_zx = port.wait(zx::time::infinite(), &packet);
-    GTEST_DEATH_TEST_CHECK_(status_zx == ZX_OK);
-
-    if (packet.key == kExceptionKey) {
-      // Process encountered an exception. Kill it directly rather than
-      // letting other handlers process the event. We will get a kProcessKey
-      // event when the process actually terminates.
-      status_zx = child_process_.kill();
-      GTEST_DEATH_TEST_CHECK_(status_zx == ZX_OK);
-    } else if (packet.key == kProcessKey) {
-      // Process terminated.
-      GTEST_DEATH_TEST_CHECK_(ZX_PKT_IS_SIGNAL_ONE(packet.type));
-      GTEST_DEATH_TEST_CHECK_(packet.signal.observed & ZX_PROCESS_TERMINATED);
-      process_terminated = true;
-    } else if (packet.key == kSocketKey) {
-      GTEST_DEATH_TEST_CHECK_(ZX_PKT_IS_SIGNAL_ONE(packet.type));
-      if (packet.signal.observed & ZX_SOCKET_READABLE) {
-        // Read data from the socket.
-        constexpr size_t kBufferSize = 1024;
-        do {
-          size_t old_length = captured_stderr_.length();
-          size_t bytes_read = 0;
-          captured_stderr_.resize(old_length + kBufferSize);
-          status_zx =
-              stderr_socket_.read(0, &captured_stderr_.front() + old_length,
-                                  kBufferSize, &bytes_read);
-          captured_stderr_.resize(old_length + bytes_read);
-        } while (status_zx == ZX_OK);
-        if (status_zx == ZX_ERR_PEER_CLOSED) {
-          socket_closed = true;
-        } else {
-          GTEST_DEATH_TEST_CHECK_(status_zx == ZX_ERR_SHOULD_WAIT);
-          status_zx = stderr_socket_.wait_async(
-              port, kSocketKey, ZX_SOCKET_READABLE | ZX_SOCKET_PEER_CLOSED, 0);
-          GTEST_DEATH_TEST_CHECK_(status_zx == ZX_OK);
-        }
-      } else {
-        GTEST_DEATH_TEST_CHECK_(packet.signal.observed & ZX_SOCKET_PEER_CLOSED);
-        socket_closed = true;
-      }
-    }
-  } while (!process_terminated && !socket_closed);
-
-  ReadAndInterpretStatusByte();
-
-  zx_info_process_t buffer;
-  status_zx = child_process_.get_info(ZX_INFO_PROCESS, &buffer, sizeof(buffer),
-                                      nullptr, nullptr);
-  GTEST_DEATH_TEST_CHECK_(status_zx == ZX_OK);
-
-  GTEST_DEATH_TEST_CHECK_(buffer.flags & ZX_INFO_PROCESS_FLAG_EXITED);
-  set_status(static_cast<int>(buffer.return_code));
-  return status();
-}
-
-// The AssumeRole process for a Fuchsia death test.  It creates a child
-// process with the same executable as the current process to run the
-// death test.  The child process is given the --gtest_filter and
-// --gtest_internal_run_death_test flags such that it knows to run the
-// current death test only.
-DeathTest::TestRole FuchsiaDeathTest::AssumeRole() {
-  const UnitTestImpl* const impl = GetUnitTestImpl();
-  const InternalRunDeathTestFlag* const flag =
-      impl->internal_run_death_test_flag();
-  const TestInfo* const info = impl->current_test_info();
-  const int death_test_index = info->result()->death_test_count();
-
-  if (flag != nullptr) {
-    // ParseInternalRunDeathTestFlag() has performed all the necessary
-    // processing.
-    set_write_fd(kFuchsiaReadPipeFd);
-    return EXECUTE_TEST;
-  }
-
-  // Flush the log buffers since the log streams are shared with the child.
-  FlushInfoLog();
-
-  // Build the child process command line.
-  const std::string filter_flag = std::string("--") + GTEST_FLAG_PREFIX_ +
-                                  "filter=" + info->test_suite_name() + "." +
-                                  info->name();
-  const std::string internal_flag = std::string("--") + GTEST_FLAG_PREFIX_ +
-                                    kInternalRunDeathTestFlag + "=" + file_ +
-                                    "|" + StreamableToString(line_) + "|" +
-                                    StreamableToString(death_test_index);
-  Arguments args;
-  args.AddArguments(GetInjectableArgvs());
-  args.AddArgument(filter_flag.c_str());
-  args.AddArgument(internal_flag.c_str());
-
-  // Build the pipe for communication with the child.
-  zx_status_t status;
-  zx_handle_t child_pipe_handle;
-  int child_pipe_fd;
-  status = fdio_pipe_half(&child_pipe_fd, &child_pipe_handle);
-  GTEST_DEATH_TEST_CHECK_(status == ZX_OK);
-  set_read_fd(child_pipe_fd);
-
-  // Set the pipe handle for the child.
-  fdio_spawn_action_t spawn_actions[2] = {};
-  fdio_spawn_action_t* add_handle_action = &spawn_actions[0];
-  add_handle_action->action = FDIO_SPAWN_ACTION_ADD_HANDLE;
-  add_handle_action->h.id = PA_HND(PA_FD, kFuchsiaReadPipeFd);
-  add_handle_action->h.handle = child_pipe_handle;
-
-  // Create a socket pair will be used to receive the child process' stderr.
-  zx::socket stderr_producer_socket;
-  status = zx::socket::create(0, &stderr_producer_socket, &stderr_socket_);
-  GTEST_DEATH_TEST_CHECK_(status >= 0);
-  int stderr_producer_fd = -1;
-  status =
-      fdio_fd_create(stderr_producer_socket.release(), &stderr_producer_fd);
-  GTEST_DEATH_TEST_CHECK_(status >= 0);
-
-  // Make the stderr socket nonblocking.
-  GTEST_DEATH_TEST_CHECK_(fcntl(stderr_producer_fd, F_SETFL, 0) == 0);
-
-  fdio_spawn_action_t* add_stderr_action = &spawn_actions[1];
-  add_stderr_action->action = FDIO_SPAWN_ACTION_CLONE_FD;
-  add_stderr_action->fd.local_fd = stderr_producer_fd;
-  add_stderr_action->fd.target_fd = STDERR_FILENO;
-
-  // Create a child job.
-  zx_handle_t child_job = ZX_HANDLE_INVALID;
-  status = zx_job_create(zx_job_default(), 0, &child_job);
-  GTEST_DEATH_TEST_CHECK_(status == ZX_OK);
-  zx_policy_basic_t policy;
-  policy.condition = ZX_POL_NEW_ANY;
-  policy.policy = ZX_POL_ACTION_ALLOW;
-  status = zx_job_set_policy(child_job, ZX_JOB_POL_RELATIVE, ZX_JOB_POL_BASIC,
-                             &policy, 1);
-  GTEST_DEATH_TEST_CHECK_(status == ZX_OK);
-
-  // Create an exception channel attached to the |child_job|, to allow
-  // us to suppress the system default exception handler from firing.
-  status = zx_task_create_exception_channel(
-      child_job, 0, exception_channel_.reset_and_get_address());
-  GTEST_DEATH_TEST_CHECK_(status == ZX_OK);
-
-  // Spawn the child process.
-  status = fdio_spawn_etc(child_job, FDIO_SPAWN_CLONE_ALL, args.Argv()[0],
-                          args.Argv(), nullptr, 2, spawn_actions,
-                          child_process_.reset_and_get_address(), nullptr);
-  GTEST_DEATH_TEST_CHECK_(status == ZX_OK);
-
-  set_spawned(true);
-  return OVERSEE_TEST;
-}
-
-std::string FuchsiaDeathTest::GetErrorLogs() { return captured_stderr_; }
-
-#else  // We are neither on Windows, nor on Fuchsia.
-
-// ForkingDeathTest provides implementations for most of the abstract
-// methods of the DeathTest interface.  Only the AssumeRole method is
-// left undefined.
-class ForkingDeathTest : public DeathTestImpl {
- public:
-  ForkingDeathTest(const char* statement, Matcher<const std::string&> matcher);
-
-  // All of these virtual functions are inherited from DeathTest.
-  int Wait() override;
-
- protected:
-  void set_child_pid(pid_t child_pid) { child_pid_ = child_pid; }
-
- private:
-  // PID of child process during death test; 0 in the child process itself.
-  pid_t child_pid_;
-};
-
-// Constructs a ForkingDeathTest.
-ForkingDeathTest::ForkingDeathTest(const char* a_statement,
-                                   Matcher<const std::string&> matcher)
-    : DeathTestImpl(a_statement, std::move(matcher)), child_pid_(-1) {}
-
-// Waits for the child in a death test to exit, returning its exit
-// status, or 0 if no child process exists.  As a side effect, sets the
-// outcome data member.
-int ForkingDeathTest::Wait() {
-  if (!spawned()) return 0;
-
-  ReadAndInterpretStatusByte();
-
-  int status_value;
-  GTEST_DEATH_TEST_CHECK_SYSCALL_(waitpid(child_pid_, &status_value, 0));
-  set_status(status_value);
-  return status_value;
-}
-
-// A concrete death test class that forks, then immediately runs the test
-// in the child process.
-class NoExecDeathTest : public ForkingDeathTest {
- public:
-  NoExecDeathTest(const char* a_statement, Matcher<const std::string&> matcher)
-      : ForkingDeathTest(a_statement, std::move(matcher)) {}
-  TestRole AssumeRole() override;
-};
-
-// The AssumeRole process for a fork-and-run death test.  It implements a
-// straightforward fork, with a simple pipe to transmit the status byte.
-DeathTest::TestRole NoExecDeathTest::AssumeRole() {
-  const size_t thread_count = GetThreadCount();
-  if (thread_count != 1) {
-    GTEST_LOG_(WARNING) << DeathTestThreadWarning(thread_count);
-  }
-
-  int pipe_fd[2];
-  GTEST_DEATH_TEST_CHECK_(pipe(pipe_fd) != -1);
-
-  DeathTest::set_last_death_test_message("");
-  CaptureStderr();
-  // When we fork the process below, the log file buffers are copied, but the
-  // file descriptors are shared.  We flush all log files here so that closing
-  // the file descriptors in the child process doesn't throw off the
-  // synchronization between descriptors and buffers in the parent process.
-  // This is as close to the fork as possible to avoid a race condition in case
-  // there are multiple threads running before the death test, and another
-  // thread writes to the log file.
-  FlushInfoLog();
-
-  const pid_t child_pid = fork();
-  GTEST_DEATH_TEST_CHECK_(child_pid != -1);
-  set_child_pid(child_pid);
-  if (child_pid == 0) {
-    GTEST_DEATH_TEST_CHECK_SYSCALL_(close(pipe_fd[0]));
-    set_write_fd(pipe_fd[1]);
-    // Redirects all logging to stderr in the child process to prevent
-    // concurrent writes to the log files.  We capture stderr in the parent
-    // process and append the child process' output to a log.
-    LogToStderr();
-    // Event forwarding to the listeners of event listener API mush be shut
-    // down in death test subprocesses.
-    GetUnitTestImpl()->listeners()->SuppressEventForwarding();
-    g_in_fast_death_test_child = true;
-    return EXECUTE_TEST;
-  } else {
-    GTEST_DEATH_TEST_CHECK_SYSCALL_(close(pipe_fd[1]));
-    set_read_fd(pipe_fd[0]);
-    set_spawned(true);
-    return OVERSEE_TEST;
-  }
-}
-
-// A concrete death test class that forks and re-executes the main
-// program from the beginning, with command-line flags set that cause
-// only this specific death test to be run.
-class ExecDeathTest : public ForkingDeathTest {
- public:
-  ExecDeathTest(const char* a_statement, Matcher<const std::string&> matcher,
-                const char* file, int line)
-      : ForkingDeathTest(a_statement, std::move(matcher)),
-        file_(file),
-        line_(line) {}
-  TestRole AssumeRole() override;
-
- private:
-  static ::std::vector<std::string> GetArgvsForDeathTestChildProcess() {
-    ::std::vector<std::string> args = GetInjectableArgvs();
-#if defined(GTEST_EXTRA_DEATH_TEST_COMMAND_LINE_ARGS_)
-    ::std::vector<std::string> extra_args =
-        GTEST_EXTRA_DEATH_TEST_COMMAND_LINE_ARGS_();
-    args.insert(args.end(), extra_args.begin(), extra_args.end());
-#endif  // defined(GTEST_EXTRA_DEATH_TEST_COMMAND_LINE_ARGS_)
-    return args;
-  }
-  // The name of the file in which the death test is located.
-  const char* const file_;
-  // The line number on which the death test is located.
-  const int line_;
-};
-
-// Utility class for accumulating command-line arguments.
-class Arguments {
- public:
-  Arguments() { args_.push_back(nullptr); }
-
-  ~Arguments() {
-    for (std::vector<char*>::iterator i = args_.begin(); i != args_.end();
-         ++i) {
-      free(*i);
-    }
-  }
-  void AddArgument(const char* argument) {
-    args_.insert(args_.end() - 1, posix::StrDup(argument));
-  }
-
-  template <typename Str>
-  void AddArguments(const ::std::vector<Str>& arguments) {
-    for (typename ::std::vector<Str>::const_iterator i = arguments.begin();
-         i != arguments.end(); ++i) {
-      args_.insert(args_.end() - 1, posix::StrDup(i->c_str()));
-    }
-  }
-  char* const* Argv() { return &args_[0]; }
-
- private:
-  std::vector<char*> args_;
-};
-
-// A struct that encompasses the arguments to the child process of a
-// threadsafe-style death test process.
-struct ExecDeathTestArgs {
-  char* const* argv;  // Command-line arguments for the child's call to exec
-  int close_fd;       // File descriptor to close; the read end of a pipe
-};
-
-#if GTEST_OS_QNX
-extern "C" char** environ;
-#else   // GTEST_OS_QNX
-// The main function for a threadsafe-style death test child process.
-// This function is called in a clone()-ed process and thus must avoid
-// any potentially unsafe operations like malloc or libc functions.
-static int ExecDeathTestChildMain(void* child_arg) {
-  ExecDeathTestArgs* const args = static_cast<ExecDeathTestArgs*>(child_arg);
-  GTEST_DEATH_TEST_CHECK_SYSCALL_(close(args->close_fd));
-
-  // We need to execute the test program in the same environment where
-  // it was originally invoked.  Therefore we change to the original
-  // working directory first.
-  const char* const original_dir =
-      UnitTest::GetInstance()->original_working_dir();
-  // We can safely call chdir() as it's a direct system call.
-  if (chdir(original_dir) != 0) {
-    DeathTestAbort(std::string("chdir(\"") + original_dir +
-                   "\") failed: " + GetLastErrnoDescription());
-    return EXIT_FAILURE;
-  }
-
-  // We can safely call execv() as it's almost a direct system call. We
-  // cannot use execvp() as it's a libc function and thus potentially
-  // unsafe.  Since execv() doesn't search the PATH, the user must
-  // invoke the test program via a valid path that contains at least
-  // one path separator.
-  execv(args->argv[0], args->argv);
-  DeathTestAbort(std::string("execv(") + args->argv[0] + ", ...) in " +
-                 original_dir + " failed: " + GetLastErrnoDescription());
-  return EXIT_FAILURE;
-}
-#endif  // GTEST_OS_QNX
-
-#if GTEST_HAS_CLONE
-// Two utility routines that together determine the direction the stack
-// grows.
-// This could be accomplished more elegantly by a single recursive
-// function, but we want to guard against the unlikely possibility of
-// a smart compiler optimizing the recursion away.
-//
-// GTEST_NO_INLINE_ is required to prevent GCC 4.6 from inlining
-// StackLowerThanAddress into StackGrowsDown, which then doesn't give
-// correct answer.
-static void StackLowerThanAddress(const void* ptr,
-                                  bool* result) GTEST_NO_INLINE_;
-// Make sure sanitizers do not tamper with the stack here.
-// Ideally, we want to use `__builtin_frame_address` instead of a local variable
-// address with sanitizer disabled, but it does not work when the
-// compiler optimizes the stack frame out, which happens on PowerPC targets.
-// HWAddressSanitizer add a random tag to the MSB of the local variable address,
-// making comparison result unpredictable.
-GTEST_ATTRIBUTE_NO_SANITIZE_ADDRESS_
-GTEST_ATTRIBUTE_NO_SANITIZE_HWADDRESS_
-static void StackLowerThanAddress(const void* ptr, bool* result) {
-  int dummy = 0;
-  *result = std::less<const void*>()(&dummy, ptr);
-}
-
-// Make sure AddressSanitizer does not tamper with the stack here.
-GTEST_ATTRIBUTE_NO_SANITIZE_ADDRESS_
-GTEST_ATTRIBUTE_NO_SANITIZE_HWADDRESS_
-static bool StackGrowsDown() {
-  int dummy = 0;
-  bool result;
-  StackLowerThanAddress(&dummy, &result);
-  return result;
-}
-#endif  // GTEST_HAS_CLONE
-
-// Spawns a child process with the same executable as the current process in
-// a thread-safe manner and instructs it to run the death test.  The
-// implementation uses fork(2) + exec.  On systems where clone(2) is
-// available, it is used instead, being slightly more thread-safe.  On QNX,
-// fork supports only single-threaded environments, so this function uses
-// spawn(2) there instead.  The function dies with an error message if
-// anything goes wrong.
-static pid_t ExecDeathTestSpawnChild(char* const* argv, int close_fd) {
-  ExecDeathTestArgs args = {argv, close_fd};
-  pid_t child_pid = -1;
-
-#if GTEST_OS_QNX
-  // Obtains the current directory and sets it to be closed in the child
-  // process.
-  const int cwd_fd = open(".", O_RDONLY);
-  GTEST_DEATH_TEST_CHECK_(cwd_fd != -1);
-  GTEST_DEATH_TEST_CHECK_SYSCALL_(fcntl(cwd_fd, F_SETFD, FD_CLOEXEC));
-  // We need to execute the test program in the same environment where
-  // it was originally invoked.  Therefore we change to the original
-  // working directory first.
-  const char* const original_dir =
-      UnitTest::GetInstance()->original_working_dir();
-  // We can safely call chdir() as it's a direct system call.
-  if (chdir(original_dir) != 0) {
-    DeathTestAbort(std::string("chdir(\"") + original_dir +
-                   "\") failed: " + GetLastErrnoDescription());
-    return EXIT_FAILURE;
-  }
-
-  int fd_flags;
-  // Set close_fd to be closed after spawn.
-  GTEST_DEATH_TEST_CHECK_SYSCALL_(fd_flags = fcntl(close_fd, F_GETFD));
-  GTEST_DEATH_TEST_CHECK_SYSCALL_(
-      fcntl(close_fd, F_SETFD, fd_flags | FD_CLOEXEC));
-  struct inheritance inherit = {0};
-  // spawn is a system call.
-  child_pid = spawn(args.argv[0], 0, nullptr, &inherit, args.argv, environ);
-  // Restores the current working directory.
-  GTEST_DEATH_TEST_CHECK_(fchdir(cwd_fd) != -1);
-  GTEST_DEATH_TEST_CHECK_SYSCALL_(close(cwd_fd));
-
-#else  // GTEST_OS_QNX
-#if GTEST_OS_LINUX
-  // When a SIGPROF signal is received while fork() or clone() are executing,
-  // the process may hang. To avoid this, we ignore SIGPROF here and re-enable
-  // it after the call to fork()/clone() is complete.
-  struct sigaction saved_sigprof_action;
-  struct sigaction ignore_sigprof_action;
-  memset(&ignore_sigprof_action, 0, sizeof(ignore_sigprof_action));
-  sigemptyset(&ignore_sigprof_action.sa_mask);
-  ignore_sigprof_action.sa_handler = SIG_IGN;
-  GTEST_DEATH_TEST_CHECK_SYSCALL_(
-      sigaction(SIGPROF, &ignore_sigprof_action, &saved_sigprof_action));
-#endif  // GTEST_OS_LINUX
-
-#if GTEST_HAS_CLONE
-  const bool use_fork = GTEST_FLAG_GET(death_test_use_fork);
-
-  if (!use_fork) {
-    static const bool stack_grows_down = StackGrowsDown();
-    const auto stack_size = static_cast<size_t>(getpagesize() * 2);
-    // MMAP_ANONYMOUS is not defined on Mac, so we use MAP_ANON instead.
-    void* const stack = mmap(nullptr, stack_size, PROT_READ | PROT_WRITE,
-                             MAP_ANON | MAP_PRIVATE, -1, 0);
-    GTEST_DEATH_TEST_CHECK_(stack != MAP_FAILED);
-
-    // Maximum stack alignment in bytes:  For a downward-growing stack, this
-    // amount is subtracted from size of the stack space to get an address
-    // that is within the stack space and is aligned on all systems we care
-    // about.  As far as I know there is no ABI with stack alignment greater
-    // than 64.  We assume stack and stack_size already have alignment of
-    // kMaxStackAlignment.
-    const size_t kMaxStackAlignment = 64;
-    void* const stack_top =
-        static_cast<char*>(stack) +
-        (stack_grows_down ? stack_size - kMaxStackAlignment : 0);
-    GTEST_DEATH_TEST_CHECK_(
-        static_cast<size_t>(stack_size) > kMaxStackAlignment &&
-        reinterpret_cast<uintptr_t>(stack_top) % kMaxStackAlignment == 0);
-
-    child_pid = clone(&ExecDeathTestChildMain, stack_top, SIGCHLD, &args);
-
-    GTEST_DEATH_TEST_CHECK_(munmap(stack, stack_size) != -1);
-  }
-#else
-  const bool use_fork = true;
-#endif  // GTEST_HAS_CLONE
-
-  if (use_fork && (child_pid = fork()) == 0) {
-    ExecDeathTestChildMain(&args);
-    _exit(0);
-  }
-#endif  // GTEST_OS_QNX
-#if GTEST_OS_LINUX
-  GTEST_DEATH_TEST_CHECK_SYSCALL_(
-      sigaction(SIGPROF, &saved_sigprof_action, nullptr));
-#endif  // GTEST_OS_LINUX
-
-  GTEST_DEATH_TEST_CHECK_(child_pid != -1);
-  return child_pid;
-}
-
-// The AssumeRole process for a fork-and-exec death test.  It re-executes the
-// main program from the beginning, setting the --gtest_filter
-// and --gtest_internal_run_death_test flags to cause only the current
-// death test to be re-run.
-DeathTest::TestRole ExecDeathTest::AssumeRole() {
-  const UnitTestImpl* const impl = GetUnitTestImpl();
-  const InternalRunDeathTestFlag* const flag =
-      impl->internal_run_death_test_flag();
-  const TestInfo* const info = impl->current_test_info();
-  const int death_test_index = info->result()->death_test_count();
-
-  if (flag != nullptr) {
-    set_write_fd(flag->write_fd());
-    return EXECUTE_TEST;
-  }
-
-  int pipe_fd[2];
-  GTEST_DEATH_TEST_CHECK_(pipe(pipe_fd) != -1);
-  // Clear the close-on-exec flag on the write end of the pipe, lest
-  // it be closed when the child process does an exec:
-  GTEST_DEATH_TEST_CHECK_(fcntl(pipe_fd[1], F_SETFD, 0) != -1);
-
-  const std::string filter_flag = std::string("--") + GTEST_FLAG_PREFIX_ +
-                                  "filter=" + info->test_suite_name() + "." +
-                                  info->name();
-  const std::string internal_flag = std::string("--") + GTEST_FLAG_PREFIX_ +
-                                    "internal_run_death_test=" + file_ + "|" +
-                                    StreamableToString(line_) + "|" +
-                                    StreamableToString(death_test_index) + "|" +
-                                    StreamableToString(pipe_fd[1]);
-  Arguments args;
-  args.AddArguments(GetArgvsForDeathTestChildProcess());
-  args.AddArgument(filter_flag.c_str());
-  args.AddArgument(internal_flag.c_str());
-
-  DeathTest::set_last_death_test_message("");
-
-  CaptureStderr();
-  // See the comment in NoExecDeathTest::AssumeRole for why the next line
-  // is necessary.
-  FlushInfoLog();
-
-  const pid_t child_pid = ExecDeathTestSpawnChild(args.Argv(), pipe_fd[0]);
-  GTEST_DEATH_TEST_CHECK_SYSCALL_(close(pipe_fd[1]));
-  set_child_pid(child_pid);
-  set_read_fd(pipe_fd[0]);
-  set_spawned(true);
-  return OVERSEE_TEST;
-}
-
-#endif  // !GTEST_OS_WINDOWS
-
-// Creates a concrete DeathTest-derived class that depends on the
-// --gtest_death_test_style flag, and sets the pointer pointed to
-// by the "test" argument to its address.  If the test should be
-// skipped, sets that pointer to NULL.  Returns true, unless the
-// flag is set to an invalid value.
-bool DefaultDeathTestFactory::Create(const char* statement,
-                                     Matcher<const std::string&> matcher,
-                                     const char* file, int line,
-                                     DeathTest** test) {
-  UnitTestImpl* const impl = GetUnitTestImpl();
-  const InternalRunDeathTestFlag* const flag =
-      impl->internal_run_death_test_flag();
-  const int death_test_index =
-      impl->current_test_info()->increment_death_test_count();
-
-  if (flag != nullptr) {
-    if (death_test_index > flag->index()) {
-      DeathTest::set_last_death_test_message(
-          "Death test count (" + StreamableToString(death_test_index) +
-          ") somehow exceeded expected maximum (" +
-          StreamableToString(flag->index()) + ")");
-      return false;
-    }
-
-    if (!(flag->file() == file && flag->line() == line &&
-          flag->index() == death_test_index)) {
-      *test = nullptr;
-      return true;
-    }
-  }
-
-#if GTEST_OS_WINDOWS
-
-  if (GTEST_FLAG_GET(death_test_style) == "threadsafe" ||
-      GTEST_FLAG_GET(death_test_style) == "fast") {
-    *test = new WindowsDeathTest(statement, std::move(matcher), file, line);
-  }
-
-#elif GTEST_OS_FUCHSIA
-
-  if (GTEST_FLAG_GET(death_test_style) == "threadsafe" ||
-      GTEST_FLAG_GET(death_test_style) == "fast") {
-    *test = new FuchsiaDeathTest(statement, std::move(matcher), file, line);
-  }
-
-#else
-
-  if (GTEST_FLAG_GET(death_test_style) == "threadsafe") {
-    *test = new ExecDeathTest(statement, std::move(matcher), file, line);
-  } else if (GTEST_FLAG_GET(death_test_style) == "fast") {
-    *test = new NoExecDeathTest(statement, std::move(matcher));
-  }
-
-#endif  // GTEST_OS_WINDOWS
-
-  else {  // NOLINT - this is more readable than unbalanced brackets inside #if.
-    DeathTest::set_last_death_test_message("Unknown death test style \"" +
-                                           GTEST_FLAG_GET(death_test_style) +
-                                           "\" encountered");
-    return false;
-  }
-
-  return true;
-}
-
-#if GTEST_OS_WINDOWS
-// Recreates the pipe and event handles from the provided parameters,
-// signals the event, and returns a file descriptor wrapped around the pipe
-// handle. This function is called in the child process only.
-static int GetStatusFileDescriptor(unsigned int parent_process_id,
-                                   size_t write_handle_as_size_t,
-                                   size_t event_handle_as_size_t) {
-  AutoHandle parent_process_handle(::OpenProcess(PROCESS_DUP_HANDLE,
-                                                 FALSE,  // Non-inheritable.
-                                                 parent_process_id));
-  if (parent_process_handle.Get() == INVALID_HANDLE_VALUE) {
-    DeathTestAbort("Unable to open parent process " +
-                   StreamableToString(parent_process_id));
-  }
-
-  GTEST_CHECK_(sizeof(HANDLE) <= sizeof(size_t));
-
-  const HANDLE write_handle = reinterpret_cast<HANDLE>(write_handle_as_size_t);
-  HANDLE dup_write_handle;
-
-  // The newly initialized handle is accessible only in the parent
-  // process. To obtain one accessible within the child, we need to use
-  // DuplicateHandle.
-  if (!::DuplicateHandle(parent_process_handle.Get(), write_handle,
-                         ::GetCurrentProcess(), &dup_write_handle,
-                         0x0,    // Requested privileges ignored since
-                                 // DUPLICATE_SAME_ACCESS is used.
-                         FALSE,  // Request non-inheritable handler.
-                         DUPLICATE_SAME_ACCESS)) {
-    DeathTestAbort("Unable to duplicate the pipe handle " +
-                   StreamableToString(write_handle_as_size_t) +
-                   " from the parent process " +
-                   StreamableToString(parent_process_id));
-  }
-
-  const HANDLE event_handle = reinterpret_cast<HANDLE>(event_handle_as_size_t);
-  HANDLE dup_event_handle;
-
-  if (!::DuplicateHandle(parent_process_handle.Get(), event_handle,
-                         ::GetCurrentProcess(), &dup_event_handle, 0x0, FALSE,
-                         DUPLICATE_SAME_ACCESS)) {
-    DeathTestAbort("Unable to duplicate the event handle " +
-                   StreamableToString(event_handle_as_size_t) +
-                   " from the parent process " +
-                   StreamableToString(parent_process_id));
-  }
-
-  const int write_fd =
-      ::_open_osfhandle(reinterpret_cast<intptr_t>(dup_write_handle), O_APPEND);
-  if (write_fd == -1) {
-    DeathTestAbort("Unable to convert pipe handle " +
-                   StreamableToString(write_handle_as_size_t) +
-                   " to a file descriptor");
-  }
-
-  // Signals the parent that the write end of the pipe has been acquired
-  // so the parent can release its own write end.
-  ::SetEvent(dup_event_handle);
-
-  return write_fd;
-}
-#endif  // GTEST_OS_WINDOWS
-
-// Returns a newly created InternalRunDeathTestFlag object with fields
-// initialized from the GTEST_FLAG(internal_run_death_test) flag if
-// the flag is specified; otherwise returns NULL.
-InternalRunDeathTestFlag* ParseInternalRunDeathTestFlag() {
-  if (GTEST_FLAG_GET(internal_run_death_test) == "") return nullptr;
-
-  // GTEST_HAS_DEATH_TEST implies that we have ::std::string, so we
-  // can use it here.
-  int line = -1;
-  int index = -1;
-  ::std::vector< ::std::string> fields;
-  SplitString(GTEST_FLAG_GET(internal_run_death_test), '|', &fields);
-  int write_fd = -1;
-
-#if GTEST_OS_WINDOWS
-
-  unsigned int parent_process_id = 0;
-  size_t write_handle_as_size_t = 0;
-  size_t event_handle_as_size_t = 0;
-
-  if (fields.size() != 6 || !ParseNaturalNumber(fields[1], &line) ||
-      !ParseNaturalNumber(fields[2], &index) ||
-      !ParseNaturalNumber(fields[3], &parent_process_id) ||
-      !ParseNaturalNumber(fields[4], &write_handle_as_size_t) ||
-      !ParseNaturalNumber(fields[5], &event_handle_as_size_t)) {
-    DeathTestAbort("Bad --gtest_internal_run_death_test flag: " +
-                   GTEST_FLAG_GET(internal_run_death_test));
-  }
-  write_fd = GetStatusFileDescriptor(parent_process_id, write_handle_as_size_t,
-                                     event_handle_as_size_t);
-
-#elif GTEST_OS_FUCHSIA
-
-  if (fields.size() != 3 || !ParseNaturalNumber(fields[1], &line) ||
-      !ParseNaturalNumber(fields[2], &index)) {
-    DeathTestAbort("Bad --gtest_internal_run_death_test flag: " +
-                   GTEST_FLAG_GET(internal_run_death_test));
-  }
-
-#else
-
-  if (fields.size() != 4 || !ParseNaturalNumber(fields[1], &line) ||
-      !ParseNaturalNumber(fields[2], &index) ||
-      !ParseNaturalNumber(fields[3], &write_fd)) {
-    DeathTestAbort("Bad --gtest_internal_run_death_test flag: " +
-                   GTEST_FLAG_GET(internal_run_death_test));
-  }
-
-#endif  // GTEST_OS_WINDOWS
-
-  return new InternalRunDeathTestFlag(fields[0], line, index, write_fd);
-}
-
-}  // namespace internal
-
-#endif  // GTEST_HAS_DEATH_TEST
-
-}  // namespace testing
diff --git a/3rdparty/googletest-1.13.0/googletest/src/gtest-filepath.cc b/3rdparty/googletest-1.13.0/googletest/src/gtest-filepath.cc
deleted file mode 100644
index 9d79ea49d8ae66ae8beecfc4c9ab761f6276650d..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/src/gtest-filepath.cc
+++ /dev/null
@@ -1,410 +0,0 @@
-// Copyright 2008, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "gtest/internal/gtest-filepath.h"
-
-#include <stdlib.h>
-
-#include "gtest/gtest-message.h"
-#include "gtest/internal/gtest-port.h"
-
-#if GTEST_OS_WINDOWS_MOBILE
-#include <windows.h>
-#elif GTEST_OS_WINDOWS
-#include <direct.h>
-#include <io.h>
-#else
-#include <limits.h>
-
-#include <climits>  // Some Linux distributions define PATH_MAX here.
-#endif              // GTEST_OS_WINDOWS_MOBILE
-
-#include "gtest/internal/gtest-string.h"
-
-#if GTEST_OS_WINDOWS
-#define GTEST_PATH_MAX_ _MAX_PATH
-#elif defined(PATH_MAX)
-#define GTEST_PATH_MAX_ PATH_MAX
-#elif defined(_XOPEN_PATH_MAX)
-#define GTEST_PATH_MAX_ _XOPEN_PATH_MAX
-#else
-#define GTEST_PATH_MAX_ _POSIX_PATH_MAX
-#endif  // GTEST_OS_WINDOWS
-
-#if GTEST_HAS_FILE_SYSTEM
-
-namespace testing {
-namespace internal {
-
-#if GTEST_OS_WINDOWS
-// On Windows, '\\' is the standard path separator, but many tools and the
-// Windows API also accept '/' as an alternate path separator. Unless otherwise
-// noted, a file path can contain either kind of path separators, or a mixture
-// of them.
-const char kPathSeparator = '\\';
-const char kAlternatePathSeparator = '/';
-const char kAlternatePathSeparatorString[] = "/";
-#if GTEST_OS_WINDOWS_MOBILE
-// Windows CE doesn't have a current directory. You should not use
-// the current directory in tests on Windows CE, but this at least
-// provides a reasonable fallback.
-const char kCurrentDirectoryString[] = "\\";
-// Windows CE doesn't define INVALID_FILE_ATTRIBUTES
-const DWORD kInvalidFileAttributes = 0xffffffff;
-#else
-const char kCurrentDirectoryString[] = ".\\";
-#endif  // GTEST_OS_WINDOWS_MOBILE
-#else
-const char kPathSeparator = '/';
-const char kCurrentDirectoryString[] = "./";
-#endif  // GTEST_OS_WINDOWS
-
-// Returns whether the given character is a valid path separator.
-static bool IsPathSeparator(char c) {
-#if GTEST_HAS_ALT_PATH_SEP_
-  return (c == kPathSeparator) || (c == kAlternatePathSeparator);
-#else
-  return c == kPathSeparator;
-#endif
-}
-
-// Returns the current working directory, or "" if unsuccessful.
-FilePath FilePath::GetCurrentDir() {
-#if GTEST_OS_WINDOWS_MOBILE || GTEST_OS_WINDOWS_PHONE ||         \
-    GTEST_OS_WINDOWS_RT || GTEST_OS_ESP8266 || GTEST_OS_ESP32 || \
-    GTEST_OS_XTENSA || GTEST_OS_QURT
-  // These platforms do not have a current directory, so we just return
-  // something reasonable.
-  return FilePath(kCurrentDirectoryString);
-#elif GTEST_OS_WINDOWS
-  char cwd[GTEST_PATH_MAX_ + 1] = {'\0'};
-  return FilePath(_getcwd(cwd, sizeof(cwd)) == nullptr ? "" : cwd);
-#else
-  char cwd[GTEST_PATH_MAX_ + 1] = {'\0'};
-  char* result = getcwd(cwd, sizeof(cwd));
-#if GTEST_OS_NACL
-  // getcwd will likely fail in NaCl due to the sandbox, so return something
-  // reasonable. The user may have provided a shim implementation for getcwd,
-  // however, so fallback only when failure is detected.
-  return FilePath(result == nullptr ? kCurrentDirectoryString : cwd);
-#endif  // GTEST_OS_NACL
-  return FilePath(result == nullptr ? "" : cwd);
-#endif  // GTEST_OS_WINDOWS_MOBILE
-}
-
-// Returns a copy of the FilePath with the case-insensitive extension removed.
-// Example: FilePath("dir/file.exe").RemoveExtension("EXE") returns
-// FilePath("dir/file"). If a case-insensitive extension is not
-// found, returns a copy of the original FilePath.
-FilePath FilePath::RemoveExtension(const char* extension) const {
-  const std::string dot_extension = std::string(".") + extension;
-  if (String::EndsWithCaseInsensitive(pathname_, dot_extension)) {
-    return FilePath(
-        pathname_.substr(0, pathname_.length() - dot_extension.length()));
-  }
-  return *this;
-}
-
-// Returns a pointer to the last occurrence of a valid path separator in
-// the FilePath. On Windows, for example, both '/' and '\' are valid path
-// separators. Returns NULL if no path separator was found.
-const char* FilePath::FindLastPathSeparator() const {
-  const char* const last_sep = strrchr(c_str(), kPathSeparator);
-#if GTEST_HAS_ALT_PATH_SEP_
-  const char* const last_alt_sep = strrchr(c_str(), kAlternatePathSeparator);
-  // Comparing two pointers of which only one is NULL is undefined.
-  if (last_alt_sep != nullptr &&
-      (last_sep == nullptr || last_alt_sep > last_sep)) {
-    return last_alt_sep;
-  }
-#endif
-  return last_sep;
-}
-
-size_t FilePath::CalculateRootLength() const {
-  const auto &path = pathname_;
-  auto s = path.begin();
-  auto end = path.end();
-#if GTEST_OS_WINDOWS
-  if (end - s >= 2 && s[1] == ':' &&
-      (end - s == 2 || IsPathSeparator(s[2])) &&
-      (('A' <= s[0] && s[0] <= 'Z') || ('a' <= s[0] && s[0] <= 'z'))) {
-    // A typical absolute path like "C:\Windows" or "D:"
-    s += 2;
-    if (s != end) {
-      ++s;
-    }
-  } else if (end - s >= 3 && IsPathSeparator(*s) && IsPathSeparator(*(s + 1))
-             && !IsPathSeparator(*(s + 2))) {
-    // Move past the "\\" prefix in a UNC path like "\\Server\Share\Folder"
-    s += 2;
-    // Skip 2 components and their following separators ("Server\" and "Share\")
-    for (int i = 0; i < 2; ++i) {
-      while (s != end) {
-        bool stop = IsPathSeparator(*s);
-        ++s;
-        if (stop) {
-          break;
-        }
-      }
-    }
-  } else if (s != end && IsPathSeparator(*s)) {
-    // A drive-rooted path like "\Windows"
-    ++s;
-  }
-#else
-  if (s != end && IsPathSeparator(*s)) {
-    ++s;
-  }
-#endif
-  return static_cast<size_t>(s - path.begin());
-}
-
-// Returns a copy of the FilePath with the directory part removed.
-// Example: FilePath("path/to/file").RemoveDirectoryName() returns
-// FilePath("file"). If there is no directory part ("just_a_file"), it returns
-// the FilePath unmodified. If there is no file part ("just_a_dir/") it
-// returns an empty FilePath ("").
-// On Windows platform, '\' is the path separator, otherwise it is '/'.
-FilePath FilePath::RemoveDirectoryName() const {
-  const char* const last_sep = FindLastPathSeparator();
-  return last_sep ? FilePath(last_sep + 1) : *this;
-}
-
-// RemoveFileName returns the directory path with the filename removed.
-// Example: FilePath("path/to/file").RemoveFileName() returns "path/to/".
-// If the FilePath is "a_file" or "/a_file", RemoveFileName returns
-// FilePath("./") or, on Windows, FilePath(".\\"). If the filepath does
-// not have a file, like "just/a/dir/", it returns the FilePath unmodified.
-// On Windows platform, '\' is the path separator, otherwise it is '/'.
-FilePath FilePath::RemoveFileName() const {
-  const char* const last_sep = FindLastPathSeparator();
-  std::string dir;
-  if (last_sep) {
-    dir = std::string(c_str(), static_cast<size_t>(last_sep + 1 - c_str()));
-  } else {
-    dir = kCurrentDirectoryString;
-  }
-  return FilePath(dir);
-}
-
-// Helper functions for naming files in a directory for xml output.
-
-// Given directory = "dir", base_name = "test", number = 0,
-// extension = "xml", returns "dir/test.xml". If number is greater
-// than zero (e.g., 12), returns "dir/test_12.xml".
-// On Windows platform, uses \ as the separator rather than /.
-FilePath FilePath::MakeFileName(const FilePath& directory,
-                                const FilePath& base_name, int number,
-                                const char* extension) {
-  std::string file;
-  if (number == 0) {
-    file = base_name.string() + "." + extension;
-  } else {
-    file =
-        base_name.string() + "_" + StreamableToString(number) + "." + extension;
-  }
-  return ConcatPaths(directory, FilePath(file));
-}
-
-// Given directory = "dir", relative_path = "test.xml", returns "dir/test.xml".
-// On Windows, uses \ as the separator rather than /.
-FilePath FilePath::ConcatPaths(const FilePath& directory,
-                               const FilePath& relative_path) {
-  if (directory.IsEmpty()) return relative_path;
-  const FilePath dir(directory.RemoveTrailingPathSeparator());
-  return FilePath(dir.string() + kPathSeparator + relative_path.string());
-}
-
-// Returns true if pathname describes something findable in the file-system,
-// either a file, directory, or whatever.
-bool FilePath::FileOrDirectoryExists() const {
-#if GTEST_OS_WINDOWS_MOBILE
-  LPCWSTR unicode = String::AnsiToUtf16(pathname_.c_str());
-  const DWORD attributes = GetFileAttributes(unicode);
-  delete[] unicode;
-  return attributes != kInvalidFileAttributes;
-#else
-  posix::StatStruct file_stat{};
-  return posix::Stat(pathname_.c_str(), &file_stat) == 0;
-#endif  // GTEST_OS_WINDOWS_MOBILE
-}
-
-// Returns true if pathname describes a directory in the file-system
-// that exists.
-bool FilePath::DirectoryExists() const {
-  bool result = false;
-#if GTEST_OS_WINDOWS
-  // Don't strip off trailing separator if path is a root directory on
-  // Windows (like "C:\\").
-  const FilePath& path(IsRootDirectory() ? *this
-                                         : RemoveTrailingPathSeparator());
-#else
-  const FilePath& path(*this);
-#endif
-
-#if GTEST_OS_WINDOWS_MOBILE
-  LPCWSTR unicode = String::AnsiToUtf16(path.c_str());
-  const DWORD attributes = GetFileAttributes(unicode);
-  delete[] unicode;
-  if ((attributes != kInvalidFileAttributes) &&
-      (attributes & FILE_ATTRIBUTE_DIRECTORY)) {
-    result = true;
-  }
-#else
-  posix::StatStruct file_stat{};
-  result =
-      posix::Stat(path.c_str(), &file_stat) == 0 && posix::IsDir(file_stat);
-#endif  // GTEST_OS_WINDOWS_MOBILE
-
-  return result;
-}
-
-// Returns true if pathname describes a root directory. (Windows has one
-// root directory per disk drive. UNC share roots are also included.)
-bool FilePath::IsRootDirectory() const {
-  size_t root_length = CalculateRootLength();
-  return root_length > 0 && root_length == pathname_.size() &&
-         IsPathSeparator(pathname_[root_length - 1]);
-}
-
-// Returns true if pathname describes an absolute path.
-bool FilePath::IsAbsolutePath() const {
-  return CalculateRootLength() > 0;
-}
-
-// Returns a pathname for a file that does not currently exist. The pathname
-// will be directory/base_name.extension or
-// directory/base_name_<number>.extension if directory/base_name.extension
-// already exists. The number will be incremented until a pathname is found
-// that does not already exist.
-// Examples: 'dir/foo_test.xml' or 'dir/foo_test_1.xml'.
-// There could be a race condition if two or more processes are calling this
-// function at the same time -- they could both pick the same filename.
-FilePath FilePath::GenerateUniqueFileName(const FilePath& directory,
-                                          const FilePath& base_name,
-                                          const char* extension) {
-  FilePath full_pathname;
-  int number = 0;
-  do {
-    full_pathname.Set(MakeFileName(directory, base_name, number++, extension));
-  } while (full_pathname.FileOrDirectoryExists());
-  return full_pathname;
-}
-
-// Returns true if FilePath ends with a path separator, which indicates that
-// it is intended to represent a directory. Returns false otherwise.
-// This does NOT check that a directory (or file) actually exists.
-bool FilePath::IsDirectory() const {
-  return !pathname_.empty() &&
-         IsPathSeparator(pathname_.c_str()[pathname_.length() - 1]);
-}
-
-// Create directories so that path exists. Returns true if successful or if
-// the directories already exist; returns false if unable to create directories
-// for any reason.
-bool FilePath::CreateDirectoriesRecursively() const {
-  if (!this->IsDirectory()) {
-    return false;
-  }
-
-  if (pathname_.length() == 0 || this->DirectoryExists()) {
-    return true;
-  }
-
-  const FilePath parent(this->RemoveTrailingPathSeparator().RemoveFileName());
-  return parent.CreateDirectoriesRecursively() && this->CreateFolder();
-}
-
-// Create the directory so that path exists. Returns true if successful or
-// if the directory already exists; returns false if unable to create the
-// directory for any reason, including if the parent directory does not
-// exist. Not named "CreateDirectory" because that's a macro on Windows.
-bool FilePath::CreateFolder() const {
-#if GTEST_OS_WINDOWS_MOBILE
-  FilePath removed_sep(this->RemoveTrailingPathSeparator());
-  LPCWSTR unicode = String::AnsiToUtf16(removed_sep.c_str());
-  int result = CreateDirectory(unicode, nullptr) ? 0 : -1;
-  delete[] unicode;
-#elif GTEST_OS_WINDOWS
-  int result = _mkdir(pathname_.c_str());
-#elif GTEST_OS_ESP8266 || GTEST_OS_XTENSA || GTEST_OS_QURT
-  // do nothing
-  int result = 0;
-#else
-  int result = mkdir(pathname_.c_str(), 0777);
-#endif  // GTEST_OS_WINDOWS_MOBILE
-
-  if (result == -1) {
-    return this->DirectoryExists();  // An error is OK if the directory exists.
-  }
-  return true;  // No error.
-}
-
-// If input name has a trailing separator character, remove it and return the
-// name, otherwise return the name string unmodified.
-// On Windows platform, uses \ as the separator, other platforms use /.
-FilePath FilePath::RemoveTrailingPathSeparator() const {
-  return IsDirectory() ? FilePath(pathname_.substr(0, pathname_.length() - 1))
-                       : *this;
-}
-
-// Removes any redundant separators that might be in the pathname.
-// For example, "bar///foo" becomes "bar/foo". Does not eliminate other
-// redundancies that might be in a pathname involving "." or "..".
-// Note that "\\Host\Share" does not contain a redundancy on Windows!
-void FilePath::Normalize() {
-  auto out = pathname_.begin();
-
-  auto i = pathname_.cbegin();
-#if GTEST_OS_WINDOWS
-  // UNC paths are treated specially
-  if (pathname_.end() - i >= 3 && IsPathSeparator(*i) &&
-      IsPathSeparator(*(i + 1)) && !IsPathSeparator(*(i + 2))) {
-    *(out++) = kPathSeparator;
-    *(out++) = kPathSeparator;
-  }
-#endif
-  while (i != pathname_.end()) {
-    const char character = *i;
-    if (!IsPathSeparator(character)) {
-      *(out++) = character;
-    } else if (out == pathname_.begin() || *std::prev(out) != kPathSeparator) {
-      *(out++) = kPathSeparator;
-    }
-    ++i;
-  }
-
-  pathname_.erase(out, pathname_.end());
-}
-
-}  // namespace internal
-}  // namespace testing
-
-#endif  // GTEST_HAS_FILE_SYSTEM
diff --git a/3rdparty/googletest-1.13.0/googletest/src/gtest-internal-inl.h b/3rdparty/googletest-1.13.0/googletest/src/gtest-internal-inl.h
deleted file mode 100644
index 2c9db4f207854dba430fbed9ad79490289c46e13..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/src/gtest-internal-inl.h
+++ /dev/null
@@ -1,1217 +0,0 @@
-// Copyright 2005, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// Utility functions and classes used by the Google C++ testing framework.//
-// This file contains purely Google Test's internal implementation.  Please
-// DO NOT #INCLUDE IT IN A USER PROGRAM.
-
-#ifndef GOOGLETEST_SRC_GTEST_INTERNAL_INL_H_
-#define GOOGLETEST_SRC_GTEST_INTERNAL_INL_H_
-
-#ifndef _WIN32_WCE
-#include <errno.h>
-#endif  // !_WIN32_WCE
-#include <stddef.h>
-#include <stdlib.h>  // For strtoll/_strtoul64/malloc/free.
-#include <string.h>  // For memmove.
-
-#include <algorithm>
-#include <cstdint>
-#include <memory>
-#include <set>
-#include <string>
-#include <vector>
-
-#include "gtest/internal/gtest-port.h"
-
-#if GTEST_CAN_STREAM_RESULTS_
-#include <arpa/inet.h>  // NOLINT
-#include <netdb.h>      // NOLINT
-#endif
-
-#if GTEST_OS_WINDOWS
-#include <windows.h>  // NOLINT
-#endif                // GTEST_OS_WINDOWS
-
-#include "gtest/gtest-spi.h"
-#include "gtest/gtest.h"
-
-GTEST_DISABLE_MSC_WARNINGS_PUSH_(4251 \
-/* class A needs to have dll-interface to be used by clients of class B */)
-
-// Declares the flags.
-//
-// We don't want the users to modify this flag in the code, but want
-// Google Test's own unit tests to be able to access it. Therefore we
-// declare it here as opposed to in gtest.h.
-GTEST_DECLARE_bool_(death_test_use_fork);
-
-namespace testing {
-namespace internal {
-
-// The value of GetTestTypeId() as seen from within the Google Test
-// library.  This is solely for testing GetTestTypeId().
-GTEST_API_ extern const TypeId kTestTypeIdInGoogleTest;
-
-// A valid random seed must be in [1, kMaxRandomSeed].
-const int kMaxRandomSeed = 99999;
-
-// g_help_flag is true if and only if the --help flag or an equivalent form
-// is specified on the command line.
-GTEST_API_ extern bool g_help_flag;
-
-// Returns the current time in milliseconds.
-GTEST_API_ TimeInMillis GetTimeInMillis();
-
-// Returns true if and only if Google Test should use colors in the output.
-GTEST_API_ bool ShouldUseColor(bool stdout_is_tty);
-
-// Formats the given time in milliseconds as seconds.
-GTEST_API_ std::string FormatTimeInMillisAsSeconds(TimeInMillis ms);
-
-// Converts the given time in milliseconds to a date string in the ISO 8601
-// format, without the timezone information.  N.B.: due to the use the
-// non-reentrant localtime() function, this function is not thread safe.  Do
-// not use it in any code that can be called from multiple threads.
-GTEST_API_ std::string FormatEpochTimeInMillisAsIso8601(TimeInMillis ms);
-
-// Parses a string for an Int32 flag, in the form of "--flag=value".
-//
-// On success, stores the value of the flag in *value, and returns
-// true.  On failure, returns false without changing *value.
-GTEST_API_ bool ParseFlag(const char* str, const char* flag, int32_t* value);
-
-// Returns a random seed in range [1, kMaxRandomSeed] based on the
-// given --gtest_random_seed flag value.
-inline int GetRandomSeedFromFlag(int32_t random_seed_flag) {
-  const unsigned int raw_seed =
-      (random_seed_flag == 0) ? static_cast<unsigned int>(GetTimeInMillis())
-                              : static_cast<unsigned int>(random_seed_flag);
-
-  // Normalizes the actual seed to range [1, kMaxRandomSeed] such that
-  // it's easy to type.
-  const int normalized_seed =
-      static_cast<int>((raw_seed - 1U) %
-                       static_cast<unsigned int>(kMaxRandomSeed)) +
-      1;
-  return normalized_seed;
-}
-
-// Returns the first valid random seed after 'seed'.  The behavior is
-// undefined if 'seed' is invalid.  The seed after kMaxRandomSeed is
-// considered to be 1.
-inline int GetNextRandomSeed(int seed) {
-  GTEST_CHECK_(1 <= seed && seed <= kMaxRandomSeed)
-      << "Invalid random seed " << seed << " - must be in [1, "
-      << kMaxRandomSeed << "].";
-  const int next_seed = seed + 1;
-  return (next_seed > kMaxRandomSeed) ? 1 : next_seed;
-}
-
-// This class saves the values of all Google Test flags in its c'tor, and
-// restores them in its d'tor.
-class GTestFlagSaver {
- public:
-  // The c'tor.
-  GTestFlagSaver() {
-    also_run_disabled_tests_ = GTEST_FLAG_GET(also_run_disabled_tests);
-    break_on_failure_ = GTEST_FLAG_GET(break_on_failure);
-    catch_exceptions_ = GTEST_FLAG_GET(catch_exceptions);
-    color_ = GTEST_FLAG_GET(color);
-    death_test_style_ = GTEST_FLAG_GET(death_test_style);
-    death_test_use_fork_ = GTEST_FLAG_GET(death_test_use_fork);
-    fail_fast_ = GTEST_FLAG_GET(fail_fast);
-    filter_ = GTEST_FLAG_GET(filter);
-    internal_run_death_test_ = GTEST_FLAG_GET(internal_run_death_test);
-    list_tests_ = GTEST_FLAG_GET(list_tests);
-    output_ = GTEST_FLAG_GET(output);
-    brief_ = GTEST_FLAG_GET(brief);
-    print_time_ = GTEST_FLAG_GET(print_time);
-    print_utf8_ = GTEST_FLAG_GET(print_utf8);
-    random_seed_ = GTEST_FLAG_GET(random_seed);
-    repeat_ = GTEST_FLAG_GET(repeat);
-    recreate_environments_when_repeating_ =
-        GTEST_FLAG_GET(recreate_environments_when_repeating);
-    shuffle_ = GTEST_FLAG_GET(shuffle);
-    stack_trace_depth_ = GTEST_FLAG_GET(stack_trace_depth);
-    stream_result_to_ = GTEST_FLAG_GET(stream_result_to);
-    throw_on_failure_ = GTEST_FLAG_GET(throw_on_failure);
-  }
-
-  // The d'tor is not virtual.  DO NOT INHERIT FROM THIS CLASS.
-  ~GTestFlagSaver() {
-    GTEST_FLAG_SET(also_run_disabled_tests, also_run_disabled_tests_);
-    GTEST_FLAG_SET(break_on_failure, break_on_failure_);
-    GTEST_FLAG_SET(catch_exceptions, catch_exceptions_);
-    GTEST_FLAG_SET(color, color_);
-    GTEST_FLAG_SET(death_test_style, death_test_style_);
-    GTEST_FLAG_SET(death_test_use_fork, death_test_use_fork_);
-    GTEST_FLAG_SET(filter, filter_);
-    GTEST_FLAG_SET(fail_fast, fail_fast_);
-    GTEST_FLAG_SET(internal_run_death_test, internal_run_death_test_);
-    GTEST_FLAG_SET(list_tests, list_tests_);
-    GTEST_FLAG_SET(output, output_);
-    GTEST_FLAG_SET(brief, brief_);
-    GTEST_FLAG_SET(print_time, print_time_);
-    GTEST_FLAG_SET(print_utf8, print_utf8_);
-    GTEST_FLAG_SET(random_seed, random_seed_);
-    GTEST_FLAG_SET(repeat, repeat_);
-    GTEST_FLAG_SET(recreate_environments_when_repeating,
-                   recreate_environments_when_repeating_);
-    GTEST_FLAG_SET(shuffle, shuffle_);
-    GTEST_FLAG_SET(stack_trace_depth, stack_trace_depth_);
-    GTEST_FLAG_SET(stream_result_to, stream_result_to_);
-    GTEST_FLAG_SET(throw_on_failure, throw_on_failure_);
-  }
-
- private:
-  // Fields for saving the original values of flags.
-  bool also_run_disabled_tests_;
-  bool break_on_failure_;
-  bool catch_exceptions_;
-  std::string color_;
-  std::string death_test_style_;
-  bool death_test_use_fork_;
-  bool fail_fast_;
-  std::string filter_;
-  std::string internal_run_death_test_;
-  bool list_tests_;
-  std::string output_;
-  bool brief_;
-  bool print_time_;
-  bool print_utf8_;
-  int32_t random_seed_;
-  int32_t repeat_;
-  bool recreate_environments_when_repeating_;
-  bool shuffle_;
-  int32_t stack_trace_depth_;
-  std::string stream_result_to_;
-  bool throw_on_failure_;
-};
-
-// Converts a Unicode code point to a narrow string in UTF-8 encoding.
-// code_point parameter is of type UInt32 because wchar_t may not be
-// wide enough to contain a code point.
-// If the code_point is not a valid Unicode code point
-// (i.e. outside of Unicode range U+0 to U+10FFFF) it will be converted
-// to "(Invalid Unicode 0xXXXXXXXX)".
-GTEST_API_ std::string CodePointToUtf8(uint32_t code_point);
-
-// Converts a wide string to a narrow string in UTF-8 encoding.
-// The wide string is assumed to have the following encoding:
-//   UTF-16 if sizeof(wchar_t) == 2 (on Windows, Cygwin)
-//   UTF-32 if sizeof(wchar_t) == 4 (on Linux)
-// Parameter str points to a null-terminated wide string.
-// Parameter num_chars may additionally limit the number
-// of wchar_t characters processed. -1 is used when the entire string
-// should be processed.
-// If the string contains code points that are not valid Unicode code points
-// (i.e. outside of Unicode range U+0 to U+10FFFF) they will be output
-// as '(Invalid Unicode 0xXXXXXXXX)'. If the string is in UTF16 encoding
-// and contains invalid UTF-16 surrogate pairs, values in those pairs
-// will be encoded as individual Unicode characters from Basic Normal Plane.
-GTEST_API_ std::string WideStringToUtf8(const wchar_t* str, int num_chars);
-
-// Reads the GTEST_SHARD_STATUS_FILE environment variable, and creates the file
-// if the variable is present. If a file already exists at this location, this
-// function will write over it. If the variable is present, but the file cannot
-// be created, prints an error and exits.
-void WriteToShardStatusFileIfNeeded();
-
-// Checks whether sharding is enabled by examining the relevant
-// environment variable values. If the variables are present,
-// but inconsistent (e.g., shard_index >= total_shards), prints
-// an error and exits. If in_subprocess_for_death_test, sharding is
-// disabled because it must only be applied to the original test
-// process. Otherwise, we could filter out death tests we intended to execute.
-GTEST_API_ bool ShouldShard(const char* total_shards_str,
-                            const char* shard_index_str,
-                            bool in_subprocess_for_death_test);
-
-// Parses the environment variable var as a 32-bit integer. If it is unset,
-// returns default_val. If it is not a 32-bit integer, prints an error and
-// and aborts.
-GTEST_API_ int32_t Int32FromEnvOrDie(const char* env_var, int32_t default_val);
-
-// Given the total number of shards, the shard index, and the test id,
-// returns true if and only if the test should be run on this shard. The test id
-// is some arbitrary but unique non-negative integer assigned to each test
-// method. Assumes that 0 <= shard_index < total_shards.
-GTEST_API_ bool ShouldRunTestOnShard(int total_shards, int shard_index,
-                                     int test_id);
-
-// STL container utilities.
-
-// Returns the number of elements in the given container that satisfy
-// the given predicate.
-template <class Container, typename Predicate>
-inline int CountIf(const Container& c, Predicate predicate) {
-  // Implemented as an explicit loop since std::count_if() in libCstd on
-  // Solaris has a non-standard signature.
-  int count = 0;
-  for (auto it = c.begin(); it != c.end(); ++it) {
-    if (predicate(*it)) ++count;
-  }
-  return count;
-}
-
-// Applies a function/functor to each element in the container.
-template <class Container, typename Functor>
-void ForEach(const Container& c, Functor functor) {
-  std::for_each(c.begin(), c.end(), functor);
-}
-
-// Returns the i-th element of the vector, or default_value if i is not
-// in range [0, v.size()).
-template <typename E>
-inline E GetElementOr(const std::vector<E>& v, int i, E default_value) {
-  return (i < 0 || i >= static_cast<int>(v.size())) ? default_value
-                                                    : v[static_cast<size_t>(i)];
-}
-
-// Performs an in-place shuffle of a range of the vector's elements.
-// 'begin' and 'end' are element indices as an STL-style range;
-// i.e. [begin, end) are shuffled, where 'end' == size() means to
-// shuffle to the end of the vector.
-template <typename E>
-void ShuffleRange(internal::Random* random, int begin, int end,
-                  std::vector<E>* v) {
-  const int size = static_cast<int>(v->size());
-  GTEST_CHECK_(0 <= begin && begin <= size)
-      << "Invalid shuffle range start " << begin << ": must be in range [0, "
-      << size << "].";
-  GTEST_CHECK_(begin <= end && end <= size)
-      << "Invalid shuffle range finish " << end << ": must be in range ["
-      << begin << ", " << size << "].";
-
-  // Fisher-Yates shuffle, from
-  // http://en.wikipedia.org/wiki/Fisher-Yates_shuffle
-  for (int range_width = end - begin; range_width >= 2; range_width--) {
-    const int last_in_range = begin + range_width - 1;
-    const int selected =
-        begin +
-        static_cast<int>(random->Generate(static_cast<uint32_t>(range_width)));
-    std::swap((*v)[static_cast<size_t>(selected)],
-              (*v)[static_cast<size_t>(last_in_range)]);
-  }
-}
-
-// Performs an in-place shuffle of the vector's elements.
-template <typename E>
-inline void Shuffle(internal::Random* random, std::vector<E>* v) {
-  ShuffleRange(random, 0, static_cast<int>(v->size()), v);
-}
-
-// A function for deleting an object.  Handy for being used as a
-// functor.
-template <typename T>
-static void Delete(T* x) {
-  delete x;
-}
-
-// A predicate that checks the key of a TestProperty against a known key.
-//
-// TestPropertyKeyIs is copyable.
-class TestPropertyKeyIs {
- public:
-  // Constructor.
-  //
-  // TestPropertyKeyIs has NO default constructor.
-  explicit TestPropertyKeyIs(const std::string& key) : key_(key) {}
-
-  // Returns true if and only if the test name of test property matches on key_.
-  bool operator()(const TestProperty& test_property) const {
-    return test_property.key() == key_;
-  }
-
- private:
-  std::string key_;
-};
-
-// Class UnitTestOptions.
-//
-// This class contains functions for processing options the user
-// specifies when running the tests.  It has only static members.
-//
-// In most cases, the user can specify an option using either an
-// environment variable or a command line flag.  E.g. you can set the
-// test filter using either GTEST_FILTER or --gtest_filter.  If both
-// the variable and the flag are present, the latter overrides the
-// former.
-class GTEST_API_ UnitTestOptions {
- public:
-  // Functions for processing the gtest_output flag.
-
-  // Returns the output format, or "" for normal printed output.
-  static std::string GetOutputFormat();
-
-  // Returns the absolute path of the requested output file, or the
-  // default (test_detail.xml in the original working directory) if
-  // none was explicitly specified.
-  static std::string GetAbsolutePathToOutputFile();
-
-  // Functions for processing the gtest_filter flag.
-
-  // Returns true if and only if the user-specified filter matches the test
-  // suite name and the test name.
-  static bool FilterMatchesTest(const std::string& test_suite_name,
-                                const std::string& test_name);
-
-#if GTEST_OS_WINDOWS
-  // Function for supporting the gtest_catch_exception flag.
-
-  // Returns EXCEPTION_EXECUTE_HANDLER if Google Test should handle the
-  // given SEH exception, or EXCEPTION_CONTINUE_SEARCH otherwise.
-  // This function is useful as an __except condition.
-  static int GTestShouldProcessSEH(DWORD exception_code);
-#endif  // GTEST_OS_WINDOWS
-
-  // Returns true if "name" matches the ':' separated list of glob-style
-  // filters in "filter".
-  static bool MatchesFilter(const std::string& name, const char* filter);
-};
-
-#if GTEST_HAS_FILE_SYSTEM
-// Returns the current application's name, removing directory path if that
-// is present.  Used by UnitTestOptions::GetOutputFile.
-GTEST_API_ FilePath GetCurrentExecutableName();
-#endif  // GTEST_HAS_FILE_SYSTEM
-
-// The role interface for getting the OS stack trace as a string.
-class OsStackTraceGetterInterface {
- public:
-  OsStackTraceGetterInterface() {}
-  virtual ~OsStackTraceGetterInterface() {}
-
-  // Returns the current OS stack trace as an std::string.  Parameters:
-  //
-  //   max_depth  - the maximum number of stack frames to be included
-  //                in the trace.
-  //   skip_count - the number of top frames to be skipped; doesn't count
-  //                against max_depth.
-  virtual std::string CurrentStackTrace(int max_depth, int skip_count) = 0;
-
-  // UponLeavingGTest() should be called immediately before Google Test calls
-  // user code. It saves some information about the current stack that
-  // CurrentStackTrace() will use to find and hide Google Test stack frames.
-  virtual void UponLeavingGTest() = 0;
-
-  // This string is inserted in place of stack frames that are part of
-  // Google Test's implementation.
-  static const char* const kElidedFramesMarker;
-
- private:
-  OsStackTraceGetterInterface(const OsStackTraceGetterInterface&) = delete;
-  OsStackTraceGetterInterface& operator=(const OsStackTraceGetterInterface&) =
-      delete;
-};
-
-// A working implementation of the OsStackTraceGetterInterface interface.
-class OsStackTraceGetter : public OsStackTraceGetterInterface {
- public:
-  OsStackTraceGetter() {}
-
-  std::string CurrentStackTrace(int max_depth, int skip_count) override;
-  void UponLeavingGTest() override;
-
- private:
-#if GTEST_HAS_ABSL
-  Mutex mutex_;  // Protects all internal state.
-
-  // We save the stack frame below the frame that calls user code.
-  // We do this because the address of the frame immediately below
-  // the user code changes between the call to UponLeavingGTest()
-  // and any calls to the stack trace code from within the user code.
-  void* caller_frame_ = nullptr;
-#endif  // GTEST_HAS_ABSL
-
-  OsStackTraceGetter(const OsStackTraceGetter&) = delete;
-  OsStackTraceGetter& operator=(const OsStackTraceGetter&) = delete;
-};
-
-// Information about a Google Test trace point.
-struct TraceInfo {
-  const char* file;
-  int line;
-  std::string message;
-};
-
-// This is the default global test part result reporter used in UnitTestImpl.
-// This class should only be used by UnitTestImpl.
-class DefaultGlobalTestPartResultReporter
-    : public TestPartResultReporterInterface {
- public:
-  explicit DefaultGlobalTestPartResultReporter(UnitTestImpl* unit_test);
-  // Implements the TestPartResultReporterInterface. Reports the test part
-  // result in the current test.
-  void ReportTestPartResult(const TestPartResult& result) override;
-
- private:
-  UnitTestImpl* const unit_test_;
-
-  DefaultGlobalTestPartResultReporter(
-      const DefaultGlobalTestPartResultReporter&) = delete;
-  DefaultGlobalTestPartResultReporter& operator=(
-      const DefaultGlobalTestPartResultReporter&) = delete;
-};
-
-// This is the default per thread test part result reporter used in
-// UnitTestImpl. This class should only be used by UnitTestImpl.
-class DefaultPerThreadTestPartResultReporter
-    : public TestPartResultReporterInterface {
- public:
-  explicit DefaultPerThreadTestPartResultReporter(UnitTestImpl* unit_test);
-  // Implements the TestPartResultReporterInterface. The implementation just
-  // delegates to the current global test part result reporter of *unit_test_.
-  void ReportTestPartResult(const TestPartResult& result) override;
-
- private:
-  UnitTestImpl* const unit_test_;
-
-  DefaultPerThreadTestPartResultReporter(
-      const DefaultPerThreadTestPartResultReporter&) = delete;
-  DefaultPerThreadTestPartResultReporter& operator=(
-      const DefaultPerThreadTestPartResultReporter&) = delete;
-};
-
-// The private implementation of the UnitTest class.  We don't protect
-// the methods under a mutex, as this class is not accessible by a
-// user and the UnitTest class that delegates work to this class does
-// proper locking.
-class GTEST_API_ UnitTestImpl {
- public:
-  explicit UnitTestImpl(UnitTest* parent);
-  virtual ~UnitTestImpl();
-
-  // There are two different ways to register your own TestPartResultReporter.
-  // You can register your own reporter to listen either only for test results
-  // from the current thread or for results from all threads.
-  // By default, each per-thread test result reporter just passes a new
-  // TestPartResult to the global test result reporter, which registers the
-  // test part result for the currently running test.
-
-  // Returns the global test part result reporter.
-  TestPartResultReporterInterface* GetGlobalTestPartResultReporter();
-
-  // Sets the global test part result reporter.
-  void SetGlobalTestPartResultReporter(
-      TestPartResultReporterInterface* reporter);
-
-  // Returns the test part result reporter for the current thread.
-  TestPartResultReporterInterface* GetTestPartResultReporterForCurrentThread();
-
-  // Sets the test part result reporter for the current thread.
-  void SetTestPartResultReporterForCurrentThread(
-      TestPartResultReporterInterface* reporter);
-
-  // Gets the number of successful test suites.
-  int successful_test_suite_count() const;
-
-  // Gets the number of failed test suites.
-  int failed_test_suite_count() const;
-
-  // Gets the number of all test suites.
-  int total_test_suite_count() const;
-
-  // Gets the number of all test suites that contain at least one test
-  // that should run.
-  int test_suite_to_run_count() const;
-
-  // Gets the number of successful tests.
-  int successful_test_count() const;
-
-  // Gets the number of skipped tests.
-  int skipped_test_count() const;
-
-  // Gets the number of failed tests.
-  int failed_test_count() const;
-
-  // Gets the number of disabled tests that will be reported in the XML report.
-  int reportable_disabled_test_count() const;
-
-  // Gets the number of disabled tests.
-  int disabled_test_count() const;
-
-  // Gets the number of tests to be printed in the XML report.
-  int reportable_test_count() const;
-
-  // Gets the number of all tests.
-  int total_test_count() const;
-
-  // Gets the number of tests that should run.
-  int test_to_run_count() const;
-
-  // Gets the time of the test program start, in ms from the start of the
-  // UNIX epoch.
-  TimeInMillis start_timestamp() const { return start_timestamp_; }
-
-  // Gets the elapsed time, in milliseconds.
-  TimeInMillis elapsed_time() const { return elapsed_time_; }
-
-  // Returns true if and only if the unit test passed (i.e. all test suites
-  // passed).
-  bool Passed() const { return !Failed(); }
-
-  // Returns true if and only if the unit test failed (i.e. some test suite
-  // failed or something outside of all tests failed).
-  bool Failed() const {
-    return failed_test_suite_count() > 0 || ad_hoc_test_result()->Failed();
-  }
-
-  // Gets the i-th test suite among all the test suites. i can range from 0 to
-  // total_test_suite_count() - 1. If i is not in that range, returns NULL.
-  const TestSuite* GetTestSuite(int i) const {
-    const int index = GetElementOr(test_suite_indices_, i, -1);
-    return index < 0 ? nullptr : test_suites_[static_cast<size_t>(i)];
-  }
-
-  //  Legacy API is deprecated but still available
-#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
-  const TestCase* GetTestCase(int i) const { return GetTestSuite(i); }
-#endif  //  GTEST_REMOVE_LEGACY_TEST_CASEAPI_
-
-  // Gets the i-th test suite among all the test suites. i can range from 0 to
-  // total_test_suite_count() - 1. If i is not in that range, returns NULL.
-  TestSuite* GetMutableSuiteCase(int i) {
-    const int index = GetElementOr(test_suite_indices_, i, -1);
-    return index < 0 ? nullptr : test_suites_[static_cast<size_t>(index)];
-  }
-
-  // Provides access to the event listener list.
-  TestEventListeners* listeners() { return &listeners_; }
-
-  // Returns the TestResult for the test that's currently running, or
-  // the TestResult for the ad hoc test if no test is running.
-  TestResult* current_test_result();
-
-  // Returns the TestResult for the ad hoc test.
-  const TestResult* ad_hoc_test_result() const { return &ad_hoc_test_result_; }
-
-  // Sets the OS stack trace getter.
-  //
-  // Does nothing if the input and the current OS stack trace getter
-  // are the same; otherwise, deletes the old getter and makes the
-  // input the current getter.
-  void set_os_stack_trace_getter(OsStackTraceGetterInterface* getter);
-
-  // Returns the current OS stack trace getter if it is not NULL;
-  // otherwise, creates an OsStackTraceGetter, makes it the current
-  // getter, and returns it.
-  OsStackTraceGetterInterface* os_stack_trace_getter();
-
-  // Returns the current OS stack trace as an std::string.
-  //
-  // The maximum number of stack frames to be included is specified by
-  // the gtest_stack_trace_depth flag.  The skip_count parameter
-  // specifies the number of top frames to be skipped, which doesn't
-  // count against the number of frames to be included.
-  //
-  // For example, if Foo() calls Bar(), which in turn calls
-  // CurrentOsStackTraceExceptTop(1), Foo() will be included in the
-  // trace but Bar() and CurrentOsStackTraceExceptTop() won't.
-  std::string CurrentOsStackTraceExceptTop(int skip_count)
-      GTEST_NO_INLINE_ GTEST_NO_TAIL_CALL_;
-
-  // Finds and returns a TestSuite with the given name.  If one doesn't
-  // exist, creates one and returns it.
-  //
-  // Arguments:
-  //
-  //   test_suite_name: name of the test suite
-  //   type_param:      the name of the test's type parameter, or NULL if
-  //                    this is not a typed or a type-parameterized test.
-  //   set_up_tc:       pointer to the function that sets up the test suite
-  //   tear_down_tc:    pointer to the function that tears down the test suite
-  TestSuite* GetTestSuite(const char* test_suite_name, const char* type_param,
-                          internal::SetUpTestSuiteFunc set_up_tc,
-                          internal::TearDownTestSuiteFunc tear_down_tc);
-
-//  Legacy API is deprecated but still available
-#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
-  TestCase* GetTestCase(const char* test_case_name, const char* type_param,
-                        internal::SetUpTestSuiteFunc set_up_tc,
-                        internal::TearDownTestSuiteFunc tear_down_tc) {
-    return GetTestSuite(test_case_name, type_param, set_up_tc, tear_down_tc);
-  }
-#endif  //  GTEST_REMOVE_LEGACY_TEST_CASEAPI_
-
-  // Adds a TestInfo to the unit test.
-  //
-  // Arguments:
-  //
-  //   set_up_tc:    pointer to the function that sets up the test suite
-  //   tear_down_tc: pointer to the function that tears down the test suite
-  //   test_info:    the TestInfo object
-  void AddTestInfo(internal::SetUpTestSuiteFunc set_up_tc,
-                   internal::TearDownTestSuiteFunc tear_down_tc,
-                   TestInfo* test_info) {
-#if GTEST_HAS_DEATH_TEST
-    // In order to support thread-safe death tests, we need to
-    // remember the original working directory when the test program
-    // was first invoked.  We cannot do this in RUN_ALL_TESTS(), as
-    // the user may have changed the current directory before calling
-    // RUN_ALL_TESTS().  Therefore we capture the current directory in
-    // AddTestInfo(), which is called to register a TEST or TEST_F
-    // before main() is reached.
-    if (original_working_dir_.IsEmpty()) {
-      original_working_dir_.Set(FilePath::GetCurrentDir());
-      GTEST_CHECK_(!original_working_dir_.IsEmpty())
-          << "Failed to get the current working directory.";
-    }
-#endif  // GTEST_HAS_DEATH_TEST
-
-    GetTestSuite(test_info->test_suite_name(), test_info->type_param(),
-                 set_up_tc, tear_down_tc)
-        ->AddTestInfo(test_info);
-  }
-
-  // Returns ParameterizedTestSuiteRegistry object used to keep track of
-  // value-parameterized tests and instantiate and register them.
-  internal::ParameterizedTestSuiteRegistry& parameterized_test_registry() {
-    return parameterized_test_registry_;
-  }
-
-  std::set<std::string>* ignored_parameterized_test_suites() {
-    return &ignored_parameterized_test_suites_;
-  }
-
-  // Returns TypeParameterizedTestSuiteRegistry object used to keep track of
-  // type-parameterized tests and instantiations of them.
-  internal::TypeParameterizedTestSuiteRegistry&
-  type_parameterized_test_registry() {
-    return type_parameterized_test_registry_;
-  }
-
-  // Sets the TestSuite object for the test that's currently running.
-  void set_current_test_suite(TestSuite* a_current_test_suite) {
-    current_test_suite_ = a_current_test_suite;
-  }
-
-  // Sets the TestInfo object for the test that's currently running.  If
-  // current_test_info is NULL, the assertion results will be stored in
-  // ad_hoc_test_result_.
-  void set_current_test_info(TestInfo* a_current_test_info) {
-    current_test_info_ = a_current_test_info;
-  }
-
-  // Registers all parameterized tests defined using TEST_P and
-  // INSTANTIATE_TEST_SUITE_P, creating regular tests for each test/parameter
-  // combination. This method can be called more then once; it has guards
-  // protecting from registering the tests more then once.  If
-  // value-parameterized tests are disabled, RegisterParameterizedTests is
-  // present but does nothing.
-  void RegisterParameterizedTests();
-
-  // Runs all tests in this UnitTest object, prints the result, and
-  // returns true if all tests are successful.  If any exception is
-  // thrown during a test, this test is considered to be failed, but
-  // the rest of the tests will still be run.
-  bool RunAllTests();
-
-  // Clears the results of all tests, except the ad hoc tests.
-  void ClearNonAdHocTestResult() {
-    ForEach(test_suites_, TestSuite::ClearTestSuiteResult);
-  }
-
-  // Clears the results of ad-hoc test assertions.
-  void ClearAdHocTestResult() { ad_hoc_test_result_.Clear(); }
-
-  // Adds a TestProperty to the current TestResult object when invoked in a
-  // context of a test or a test suite, or to the global property set. If the
-  // result already contains a property with the same key, the value will be
-  // updated.
-  void RecordProperty(const TestProperty& test_property);
-
-  enum ReactionToSharding { HONOR_SHARDING_PROTOCOL, IGNORE_SHARDING_PROTOCOL };
-
-  // Matches the full name of each test against the user-specified
-  // filter to decide whether the test should run, then records the
-  // result in each TestSuite and TestInfo object.
-  // If shard_tests == HONOR_SHARDING_PROTOCOL, further filters tests
-  // based on sharding variables in the environment.
-  // Returns the number of tests that should run.
-  int FilterTests(ReactionToSharding shard_tests);
-
-  // Prints the names of the tests matching the user-specified filter flag.
-  void ListTestsMatchingFilter();
-
-  const TestSuite* current_test_suite() const { return current_test_suite_; }
-  TestInfo* current_test_info() { return current_test_info_; }
-  const TestInfo* current_test_info() const { return current_test_info_; }
-
-  // Returns the vector of environments that need to be set-up/torn-down
-  // before/after the tests are run.
-  std::vector<Environment*>& environments() { return environments_; }
-
-  // Getters for the per-thread Google Test trace stack.
-  std::vector<TraceInfo>& gtest_trace_stack() {
-    return *(gtest_trace_stack_.pointer());
-  }
-  const std::vector<TraceInfo>& gtest_trace_stack() const {
-    return gtest_trace_stack_.get();
-  }
-
-#if GTEST_HAS_DEATH_TEST
-  void InitDeathTestSubprocessControlInfo() {
-    internal_run_death_test_flag_.reset(ParseInternalRunDeathTestFlag());
-  }
-  // Returns a pointer to the parsed --gtest_internal_run_death_test
-  // flag, or NULL if that flag was not specified.
-  // This information is useful only in a death test child process.
-  // Must not be called before a call to InitGoogleTest.
-  const InternalRunDeathTestFlag* internal_run_death_test_flag() const {
-    return internal_run_death_test_flag_.get();
-  }
-
-  // Returns a pointer to the current death test factory.
-  internal::DeathTestFactory* death_test_factory() {
-    return death_test_factory_.get();
-  }
-
-  void SuppressTestEventsIfInSubprocess();
-
-  friend class ReplaceDeathTestFactory;
-#endif  // GTEST_HAS_DEATH_TEST
-
-  // Initializes the event listener performing XML output as specified by
-  // UnitTestOptions. Must not be called before InitGoogleTest.
-  void ConfigureXmlOutput();
-
-#if GTEST_CAN_STREAM_RESULTS_
-  // Initializes the event listener for streaming test results to a socket.
-  // Must not be called before InitGoogleTest.
-  void ConfigureStreamingOutput();
-#endif
-
-  // Performs initialization dependent upon flag values obtained in
-  // ParseGoogleTestFlagsOnly.  Is called from InitGoogleTest after the call to
-  // ParseGoogleTestFlagsOnly.  In case a user neglects to call InitGoogleTest
-  // this function is also called from RunAllTests.  Since this function can be
-  // called more than once, it has to be idempotent.
-  void PostFlagParsingInit();
-
-  // Gets the random seed used at the start of the current test iteration.
-  int random_seed() const { return random_seed_; }
-
-  // Gets the random number generator.
-  internal::Random* random() { return &random_; }
-
-  // Shuffles all test suites, and the tests within each test suite,
-  // making sure that death tests are still run first.
-  void ShuffleTests();
-
-  // Restores the test suites and tests to their order before the first shuffle.
-  void UnshuffleTests();
-
-  // Returns the value of GTEST_FLAG(catch_exceptions) at the moment
-  // UnitTest::Run() starts.
-  bool catch_exceptions() const { return catch_exceptions_; }
-
- private:
-  friend class ::testing::UnitTest;
-
-  // Used by UnitTest::Run() to capture the state of
-  // GTEST_FLAG(catch_exceptions) at the moment it starts.
-  void set_catch_exceptions(bool value) { catch_exceptions_ = value; }
-
-  // The UnitTest object that owns this implementation object.
-  UnitTest* const parent_;
-
-#if GTEST_HAS_FILE_SYSTEM
-  // The working directory when the first TEST() or TEST_F() was
-  // executed.
-  internal::FilePath original_working_dir_;
-#endif  // GTEST_HAS_FILE_SYSTEM
-
-  // The default test part result reporters.
-  DefaultGlobalTestPartResultReporter default_global_test_part_result_reporter_;
-  DefaultPerThreadTestPartResultReporter
-      default_per_thread_test_part_result_reporter_;
-
-  // Points to (but doesn't own) the global test part result reporter.
-  TestPartResultReporterInterface* global_test_part_result_reporter_;
-
-  // Protects read and write access to global_test_part_result_reporter_.
-  internal::Mutex global_test_part_result_reporter_mutex_;
-
-  // Points to (but doesn't own) the per-thread test part result reporter.
-  internal::ThreadLocal<TestPartResultReporterInterface*>
-      per_thread_test_part_result_reporter_;
-
-  // The vector of environments that need to be set-up/torn-down
-  // before/after the tests are run.
-  std::vector<Environment*> environments_;
-
-  // The vector of TestSuites in their original order.  It owns the
-  // elements in the vector.
-  std::vector<TestSuite*> test_suites_;
-
-  // Provides a level of indirection for the test suite list to allow
-  // easy shuffling and restoring the test suite order.  The i-th
-  // element of this vector is the index of the i-th test suite in the
-  // shuffled order.
-  std::vector<int> test_suite_indices_;
-
-  // ParameterizedTestRegistry object used to register value-parameterized
-  // tests.
-  internal::ParameterizedTestSuiteRegistry parameterized_test_registry_;
-  internal::TypeParameterizedTestSuiteRegistry
-      type_parameterized_test_registry_;
-
-  // The set holding the name of parameterized
-  // test suites that may go uninstantiated.
-  std::set<std::string> ignored_parameterized_test_suites_;
-
-  // Indicates whether RegisterParameterizedTests() has been called already.
-  bool parameterized_tests_registered_;
-
-  // Index of the last death test suite registered.  Initially -1.
-  int last_death_test_suite_;
-
-  // This points to the TestSuite for the currently running test.  It
-  // changes as Google Test goes through one test suite after another.
-  // When no test is running, this is set to NULL and Google Test
-  // stores assertion results in ad_hoc_test_result_.  Initially NULL.
-  TestSuite* current_test_suite_;
-
-  // This points to the TestInfo for the currently running test.  It
-  // changes as Google Test goes through one test after another.  When
-  // no test is running, this is set to NULL and Google Test stores
-  // assertion results in ad_hoc_test_result_.  Initially NULL.
-  TestInfo* current_test_info_;
-
-  // Normally, a user only writes assertions inside a TEST or TEST_F,
-  // or inside a function called by a TEST or TEST_F.  Since Google
-  // Test keeps track of which test is current running, it can
-  // associate such an assertion with the test it belongs to.
-  //
-  // If an assertion is encountered when no TEST or TEST_F is running,
-  // Google Test attributes the assertion result to an imaginary "ad hoc"
-  // test, and records the result in ad_hoc_test_result_.
-  TestResult ad_hoc_test_result_;
-
-  // The list of event listeners that can be used to track events inside
-  // Google Test.
-  TestEventListeners listeners_;
-
-  // The OS stack trace getter.  Will be deleted when the UnitTest
-  // object is destructed.  By default, an OsStackTraceGetter is used,
-  // but the user can set this field to use a custom getter if that is
-  // desired.
-  OsStackTraceGetterInterface* os_stack_trace_getter_;
-
-  // True if and only if PostFlagParsingInit() has been called.
-  bool post_flag_parse_init_performed_;
-
-  // The random number seed used at the beginning of the test run.
-  int random_seed_;
-
-  // Our random number generator.
-  internal::Random random_;
-
-  // The time of the test program start, in ms from the start of the
-  // UNIX epoch.
-  TimeInMillis start_timestamp_;
-
-  // How long the test took to run, in milliseconds.
-  TimeInMillis elapsed_time_;
-
-#if GTEST_HAS_DEATH_TEST
-  // The decomposed components of the gtest_internal_run_death_test flag,
-  // parsed when RUN_ALL_TESTS is called.
-  std::unique_ptr<InternalRunDeathTestFlag> internal_run_death_test_flag_;
-  std::unique_ptr<internal::DeathTestFactory> death_test_factory_;
-#endif  // GTEST_HAS_DEATH_TEST
-
-  // A per-thread stack of traces created by the SCOPED_TRACE() macro.
-  internal::ThreadLocal<std::vector<TraceInfo> > gtest_trace_stack_;
-
-  // The value of GTEST_FLAG(catch_exceptions) at the moment RunAllTests()
-  // starts.
-  bool catch_exceptions_;
-
-  UnitTestImpl(const UnitTestImpl&) = delete;
-  UnitTestImpl& operator=(const UnitTestImpl&) = delete;
-};  // class UnitTestImpl
-
-// Convenience function for accessing the global UnitTest
-// implementation object.
-inline UnitTestImpl* GetUnitTestImpl() {
-  return UnitTest::GetInstance()->impl();
-}
-
-#if GTEST_USES_SIMPLE_RE
-
-// Internal helper functions for implementing the simple regular
-// expression matcher.
-GTEST_API_ bool IsInSet(char ch, const char* str);
-GTEST_API_ bool IsAsciiDigit(char ch);
-GTEST_API_ bool IsAsciiPunct(char ch);
-GTEST_API_ bool IsRepeat(char ch);
-GTEST_API_ bool IsAsciiWhiteSpace(char ch);
-GTEST_API_ bool IsAsciiWordChar(char ch);
-GTEST_API_ bool IsValidEscape(char ch);
-GTEST_API_ bool AtomMatchesChar(bool escaped, char pattern, char ch);
-GTEST_API_ bool ValidateRegex(const char* regex);
-GTEST_API_ bool MatchRegexAtHead(const char* regex, const char* str);
-GTEST_API_ bool MatchRepetitionAndRegexAtHead(bool escaped, char ch,
-                                              char repeat, const char* regex,
-                                              const char* str);
-GTEST_API_ bool MatchRegexAnywhere(const char* regex, const char* str);
-
-#endif  // GTEST_USES_SIMPLE_RE
-
-// Parses the command line for Google Test flags, without initializing
-// other parts of Google Test.
-GTEST_API_ void ParseGoogleTestFlagsOnly(int* argc, char** argv);
-GTEST_API_ void ParseGoogleTestFlagsOnly(int* argc, wchar_t** argv);
-
-#if GTEST_HAS_DEATH_TEST
-
-// Returns the message describing the last system error, regardless of the
-// platform.
-GTEST_API_ std::string GetLastErrnoDescription();
-
-// Attempts to parse a string into a positive integer pointed to by the
-// number parameter.  Returns true if that is possible.
-// GTEST_HAS_DEATH_TEST implies that we have ::std::string, so we can use
-// it here.
-template <typename Integer>
-bool ParseNaturalNumber(const ::std::string& str, Integer* number) {
-  // Fail fast if the given string does not begin with a digit;
-  // this bypasses strtoXXX's "optional leading whitespace and plus
-  // or minus sign" semantics, which are undesirable here.
-  if (str.empty() || !IsDigit(str[0])) {
-    return false;
-  }
-  errno = 0;
-
-  char* end;
-  // BiggestConvertible is the largest integer type that system-provided
-  // string-to-number conversion routines can return.
-  using BiggestConvertible = unsigned long long;  // NOLINT
-
-  const BiggestConvertible parsed = strtoull(str.c_str(), &end, 10);  // NOLINT
-  const bool parse_success = *end == '\0' && errno == 0;
-
-  GTEST_CHECK_(sizeof(Integer) <= sizeof(parsed));
-
-  const Integer result = static_cast<Integer>(parsed);
-  if (parse_success && static_cast<BiggestConvertible>(result) == parsed) {
-    *number = result;
-    return true;
-  }
-  return false;
-}
-#endif  // GTEST_HAS_DEATH_TEST
-
-// TestResult contains some private methods that should be hidden from
-// Google Test user but are required for testing. This class allow our tests
-// to access them.
-//
-// This class is supplied only for the purpose of testing Google Test's own
-// constructs. Do not use it in user tests, either directly or indirectly.
-class TestResultAccessor {
- public:
-  static void RecordProperty(TestResult* test_result,
-                             const std::string& xml_element,
-                             const TestProperty& property) {
-    test_result->RecordProperty(xml_element, property);
-  }
-
-  static void ClearTestPartResults(TestResult* test_result) {
-    test_result->ClearTestPartResults();
-  }
-
-  static const std::vector<testing::TestPartResult>& test_part_results(
-      const TestResult& test_result) {
-    return test_result.test_part_results();
-  }
-};
-
-#if GTEST_CAN_STREAM_RESULTS_
-
-// Streams test results to the given port on the given host machine.
-class StreamingListener : public EmptyTestEventListener {
- public:
-  // Abstract base class for writing strings to a socket.
-  class AbstractSocketWriter {
-   public:
-    virtual ~AbstractSocketWriter() {}
-
-    // Sends a string to the socket.
-    virtual void Send(const std::string& message) = 0;
-
-    // Closes the socket.
-    virtual void CloseConnection() {}
-
-    // Sends a string and a newline to the socket.
-    void SendLn(const std::string& message) { Send(message + "\n"); }
-  };
-
-  // Concrete class for actually writing strings to a socket.
-  class SocketWriter : public AbstractSocketWriter {
-   public:
-    SocketWriter(const std::string& host, const std::string& port)
-        : sockfd_(-1), host_name_(host), port_num_(port) {
-      MakeConnection();
-    }
-
-    ~SocketWriter() override {
-      if (sockfd_ != -1) CloseConnection();
-    }
-
-    // Sends a string to the socket.
-    void Send(const std::string& message) override {
-      GTEST_CHECK_(sockfd_ != -1)
-          << "Send() can be called only when there is a connection.";
-
-      const auto len = static_cast<size_t>(message.length());
-      if (write(sockfd_, message.c_str(), len) != static_cast<ssize_t>(len)) {
-        GTEST_LOG_(WARNING) << "stream_result_to: failed to stream to "
-                            << host_name_ << ":" << port_num_;
-      }
-    }
-
-   private:
-    // Creates a client socket and connects to the server.
-    void MakeConnection();
-
-    // Closes the socket.
-    void CloseConnection() override {
-      GTEST_CHECK_(sockfd_ != -1)
-          << "CloseConnection() can be called only when there is a connection.";
-
-      close(sockfd_);
-      sockfd_ = -1;
-    }
-
-    int sockfd_;  // socket file descriptor
-    const std::string host_name_;
-    const std::string port_num_;
-
-    SocketWriter(const SocketWriter&) = delete;
-    SocketWriter& operator=(const SocketWriter&) = delete;
-  };  // class SocketWriter
-
-  // Escapes '=', '&', '%', and '\n' characters in str as "%xx".
-  static std::string UrlEncode(const char* str);
-
-  StreamingListener(const std::string& host, const std::string& port)
-      : socket_writer_(new SocketWriter(host, port)) {
-    Start();
-  }
-
-  explicit StreamingListener(AbstractSocketWriter* socket_writer)
-      : socket_writer_(socket_writer) {
-    Start();
-  }
-
-  void OnTestProgramStart(const UnitTest& /* unit_test */) override {
-    SendLn("event=TestProgramStart");
-  }
-
-  void OnTestProgramEnd(const UnitTest& unit_test) override {
-    // Note that Google Test current only report elapsed time for each
-    // test iteration, not for the entire test program.
-    SendLn("event=TestProgramEnd&passed=" + FormatBool(unit_test.Passed()));
-
-    // Notify the streaming server to stop.
-    socket_writer_->CloseConnection();
-  }
-
-  void OnTestIterationStart(const UnitTest& /* unit_test */,
-                            int iteration) override {
-    SendLn("event=TestIterationStart&iteration=" +
-           StreamableToString(iteration));
-  }
-
-  void OnTestIterationEnd(const UnitTest& unit_test,
-                          int /* iteration */) override {
-    SendLn("event=TestIterationEnd&passed=" + FormatBool(unit_test.Passed()) +
-           "&elapsed_time=" + StreamableToString(unit_test.elapsed_time()) +
-           "ms");
-  }
-
-  // Note that "event=TestCaseStart" is a wire format and has to remain
-  // "case" for compatibility
-  void OnTestSuiteStart(const TestSuite& test_suite) override {
-    SendLn(std::string("event=TestCaseStart&name=") + test_suite.name());
-  }
-
-  // Note that "event=TestCaseEnd" is a wire format and has to remain
-  // "case" for compatibility
-  void OnTestSuiteEnd(const TestSuite& test_suite) override {
-    SendLn("event=TestCaseEnd&passed=" + FormatBool(test_suite.Passed()) +
-           "&elapsed_time=" + StreamableToString(test_suite.elapsed_time()) +
-           "ms");
-  }
-
-  void OnTestStart(const TestInfo& test_info) override {
-    SendLn(std::string("event=TestStart&name=") + test_info.name());
-  }
-
-  void OnTestEnd(const TestInfo& test_info) override {
-    SendLn("event=TestEnd&passed=" +
-           FormatBool((test_info.result())->Passed()) + "&elapsed_time=" +
-           StreamableToString((test_info.result())->elapsed_time()) + "ms");
-  }
-
-  void OnTestPartResult(const TestPartResult& test_part_result) override {
-    const char* file_name = test_part_result.file_name();
-    if (file_name == nullptr) file_name = "";
-    SendLn("event=TestPartResult&file=" + UrlEncode(file_name) +
-           "&line=" + StreamableToString(test_part_result.line_number()) +
-           "&message=" + UrlEncode(test_part_result.message()));
-  }
-
- private:
-  // Sends the given message and a newline to the socket.
-  void SendLn(const std::string& message) { socket_writer_->SendLn(message); }
-
-  // Called at the start of streaming to notify the receiver what
-  // protocol we are using.
-  void Start() { SendLn("gtest_streaming_protocol_version=1.0"); }
-
-  std::string FormatBool(bool value) { return value ? "1" : "0"; }
-
-  const std::unique_ptr<AbstractSocketWriter> socket_writer_;
-
-  StreamingListener(const StreamingListener&) = delete;
-  StreamingListener& operator=(const StreamingListener&) = delete;
-};  // class StreamingListener
-
-#endif  // GTEST_CAN_STREAM_RESULTS_
-
-}  // namespace internal
-}  // namespace testing
-
-GTEST_DISABLE_MSC_WARNINGS_POP_()  //  4251
-
-#endif  // GOOGLETEST_SRC_GTEST_INTERNAL_INL_H_
diff --git a/3rdparty/googletest-1.13.0/googletest/src/gtest-matchers.cc b/3rdparty/googletest-1.13.0/googletest/src/gtest-matchers.cc
deleted file mode 100644
index 7e3bcc0cff3806c1151357ab1f31da2a0b5b9dbc..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/src/gtest-matchers.cc
+++ /dev/null
@@ -1,98 +0,0 @@
-// Copyright 2007, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// The Google C++ Testing and Mocking Framework (Google Test)
-//
-// This file implements just enough of the matcher interface to allow
-// EXPECT_DEATH and friends to accept a matcher argument.
-
-#include "gtest/gtest-matchers.h"
-
-#include <string>
-
-#include "gtest/internal/gtest-internal.h"
-#include "gtest/internal/gtest-port.h"
-
-namespace testing {
-
-// Constructs a matcher that matches a const std::string& whose value is
-// equal to s.
-Matcher<const std::string&>::Matcher(const std::string& s) { *this = Eq(s); }
-
-// Constructs a matcher that matches a const std::string& whose value is
-// equal to s.
-Matcher<const std::string&>::Matcher(const char* s) {
-  *this = Eq(std::string(s));
-}
-
-// Constructs a matcher that matches a std::string whose value is equal to
-// s.
-Matcher<std::string>::Matcher(const std::string& s) { *this = Eq(s); }
-
-// Constructs a matcher that matches a std::string whose value is equal to
-// s.
-Matcher<std::string>::Matcher(const char* s) { *this = Eq(std::string(s)); }
-
-#if GTEST_INTERNAL_HAS_STRING_VIEW
-// Constructs a matcher that matches a const StringView& whose value is
-// equal to s.
-Matcher<const internal::StringView&>::Matcher(const std::string& s) {
-  *this = Eq(s);
-}
-
-// Constructs a matcher that matches a const StringView& whose value is
-// equal to s.
-Matcher<const internal::StringView&>::Matcher(const char* s) {
-  *this = Eq(std::string(s));
-}
-
-// Constructs a matcher that matches a const StringView& whose value is
-// equal to s.
-Matcher<const internal::StringView&>::Matcher(internal::StringView s) {
-  *this = Eq(std::string(s));
-}
-
-// Constructs a matcher that matches a StringView whose value is equal to
-// s.
-Matcher<internal::StringView>::Matcher(const std::string& s) { *this = Eq(s); }
-
-// Constructs a matcher that matches a StringView whose value is equal to
-// s.
-Matcher<internal::StringView>::Matcher(const char* s) {
-  *this = Eq(std::string(s));
-}
-
-// Constructs a matcher that matches a StringView whose value is equal to
-// s.
-Matcher<internal::StringView>::Matcher(internal::StringView s) {
-  *this = Eq(std::string(s));
-}
-#endif  // GTEST_INTERNAL_HAS_STRING_VIEW
-
-}  // namespace testing
diff --git a/3rdparty/googletest-1.13.0/googletest/src/gtest-port.cc b/3rdparty/googletest-1.13.0/googletest/src/gtest-port.cc
deleted file mode 100644
index d797fe4d5866c7da296a64428ad4da352c1e7fe4..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/src/gtest-port.cc
+++ /dev/null
@@ -1,1394 +0,0 @@
-// Copyright 2008, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "gtest/internal/gtest-port.h"
-
-#include <limits.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include <cstdint>
-#include <fstream>
-#include <memory>
-
-#if GTEST_OS_WINDOWS
-#include <io.h>
-#include <sys/stat.h>
-#include <windows.h>
-
-#include <map>  // Used in ThreadLocal.
-#ifdef _MSC_VER
-#include <crtdbg.h>
-#endif  // _MSC_VER
-#else
-#include <unistd.h>
-#endif  // GTEST_OS_WINDOWS
-
-#if GTEST_OS_MAC
-#include <mach/mach_init.h>
-#include <mach/task.h>
-#include <mach/vm_map.h>
-#endif  // GTEST_OS_MAC
-
-#if GTEST_OS_DRAGONFLY || GTEST_OS_FREEBSD || GTEST_OS_GNU_KFREEBSD || \
-    GTEST_OS_NETBSD || GTEST_OS_OPENBSD
-#include <sys/sysctl.h>
-#if GTEST_OS_DRAGONFLY || GTEST_OS_FREEBSD || GTEST_OS_GNU_KFREEBSD
-#include <sys/user.h>
-#endif
-#endif
-
-#if GTEST_OS_QNX
-#include <devctl.h>
-#include <fcntl.h>
-#include <sys/procfs.h>
-#endif  // GTEST_OS_QNX
-
-#if GTEST_OS_AIX
-#include <procinfo.h>
-#include <sys/types.h>
-#endif  // GTEST_OS_AIX
-
-#if GTEST_OS_FUCHSIA
-#include <zircon/process.h>
-#include <zircon/syscalls.h>
-#endif  // GTEST_OS_FUCHSIA
-
-#include "gtest/gtest-message.h"
-#include "gtest/gtest-spi.h"
-#include "gtest/internal/gtest-internal.h"
-#include "gtest/internal/gtest-string.h"
-#include "src/gtest-internal-inl.h"
-
-namespace testing {
-namespace internal {
-
-#if GTEST_OS_LINUX || GTEST_OS_GNU_HURD
-
-namespace {
-template <typename T>
-T ReadProcFileField(const std::string& filename, int field) {
-  std::string dummy;
-  std::ifstream file(filename.c_str());
-  while (field-- > 0) {
-    file >> dummy;
-  }
-  T output = 0;
-  file >> output;
-  return output;
-}
-}  // namespace
-
-// Returns the number of active threads, or 0 when there is an error.
-size_t GetThreadCount() {
-  const std::string filename =
-      (Message() << "/proc/" << getpid() << "/stat").GetString();
-  return ReadProcFileField<size_t>(filename, 19);
-}
-
-#elif GTEST_OS_MAC
-
-size_t GetThreadCount() {
-  const task_t task = mach_task_self();
-  mach_msg_type_number_t thread_count;
-  thread_act_array_t thread_list;
-  const kern_return_t status = task_threads(task, &thread_list, &thread_count);
-  if (status == KERN_SUCCESS) {
-    // task_threads allocates resources in thread_list and we need to free them
-    // to avoid leaks.
-    vm_deallocate(task, reinterpret_cast<vm_address_t>(thread_list),
-                  sizeof(thread_t) * thread_count);
-    return static_cast<size_t>(thread_count);
-  } else {
-    return 0;
-  }
-}
-
-#elif GTEST_OS_DRAGONFLY || GTEST_OS_FREEBSD || GTEST_OS_GNU_KFREEBSD || \
-    GTEST_OS_NETBSD
-
-#if GTEST_OS_NETBSD
-#undef KERN_PROC
-#define KERN_PROC KERN_PROC2
-#define kinfo_proc kinfo_proc2
-#endif
-
-#if GTEST_OS_DRAGONFLY
-#define KP_NLWP(kp) (kp.kp_nthreads)
-#elif GTEST_OS_FREEBSD || GTEST_OS_GNU_KFREEBSD
-#define KP_NLWP(kp) (kp.ki_numthreads)
-#elif GTEST_OS_NETBSD
-#define KP_NLWP(kp) (kp.p_nlwps)
-#endif
-
-// Returns the number of threads running in the process, or 0 to indicate that
-// we cannot detect it.
-size_t GetThreadCount() {
-  int mib[] = {
-    CTL_KERN,
-    KERN_PROC,
-    KERN_PROC_PID,
-    getpid(),
-#if GTEST_OS_NETBSD
-    sizeof(struct kinfo_proc),
-    1,
-#endif
-  };
-  u_int miblen = sizeof(mib) / sizeof(mib[0]);
-  struct kinfo_proc info;
-  size_t size = sizeof(info);
-  if (sysctl(mib, miblen, &info, &size, NULL, 0)) {
-    return 0;
-  }
-  return static_cast<size_t>(KP_NLWP(info));
-}
-#elif GTEST_OS_OPENBSD
-
-// Returns the number of threads running in the process, or 0 to indicate that
-// we cannot detect it.
-size_t GetThreadCount() {
-  int mib[] = {
-      CTL_KERN,
-      KERN_PROC,
-      KERN_PROC_PID | KERN_PROC_SHOW_THREADS,
-      getpid(),
-      sizeof(struct kinfo_proc),
-      0,
-  };
-  u_int miblen = sizeof(mib) / sizeof(mib[0]);
-
-  // get number of structs
-  size_t size;
-  if (sysctl(mib, miblen, NULL, &size, NULL, 0)) {
-    return 0;
-  }
-
-  mib[5] = static_cast<int>(size / static_cast<size_t>(mib[4]));
-
-  // populate array of structs
-  struct kinfo_proc info[mib[5]];
-  if (sysctl(mib, miblen, &info, &size, NULL, 0)) {
-    return 0;
-  }
-
-  // exclude empty members
-  size_t nthreads = 0;
-  for (size_t i = 0; i < size / static_cast<size_t>(mib[4]); i++) {
-    if (info[i].p_tid != -1) nthreads++;
-  }
-  return nthreads;
-}
-
-#elif GTEST_OS_QNX
-
-// Returns the number of threads running in the process, or 0 to indicate that
-// we cannot detect it.
-size_t GetThreadCount() {
-  const int fd = open("/proc/self/as", O_RDONLY);
-  if (fd < 0) {
-    return 0;
-  }
-  procfs_info process_info;
-  const int status =
-      devctl(fd, DCMD_PROC_INFO, &process_info, sizeof(process_info), nullptr);
-  close(fd);
-  if (status == EOK) {
-    return static_cast<size_t>(process_info.num_threads);
-  } else {
-    return 0;
-  }
-}
-
-#elif GTEST_OS_AIX
-
-size_t GetThreadCount() {
-  struct procentry64 entry;
-  pid_t pid = getpid();
-  int status = getprocs64(&entry, sizeof(entry), nullptr, 0, &pid, 1);
-  if (status == 1) {
-    return entry.pi_thcount;
-  } else {
-    return 0;
-  }
-}
-
-#elif GTEST_OS_FUCHSIA
-
-size_t GetThreadCount() {
-  int dummy_buffer;
-  size_t avail;
-  zx_status_t status =
-      zx_object_get_info(zx_process_self(), ZX_INFO_PROCESS_THREADS,
-                         &dummy_buffer, 0, nullptr, &avail);
-  if (status == ZX_OK) {
-    return avail;
-  } else {
-    return 0;
-  }
-}
-
-#else
-
-size_t GetThreadCount() {
-  // There's no portable way to detect the number of threads, so we just
-  // return 0 to indicate that we cannot detect it.
-  return 0;
-}
-
-#endif  // GTEST_OS_LINUX
-
-#if GTEST_IS_THREADSAFE && GTEST_OS_WINDOWS
-
-AutoHandle::AutoHandle() : handle_(INVALID_HANDLE_VALUE) {}
-
-AutoHandle::AutoHandle(Handle handle) : handle_(handle) {}
-
-AutoHandle::~AutoHandle() { Reset(); }
-
-AutoHandle::Handle AutoHandle::Get() const { return handle_; }
-
-void AutoHandle::Reset() { Reset(INVALID_HANDLE_VALUE); }
-
-void AutoHandle::Reset(HANDLE handle) {
-  // Resetting with the same handle we already own is invalid.
-  if (handle_ != handle) {
-    if (IsCloseable()) {
-      ::CloseHandle(handle_);
-    }
-    handle_ = handle;
-  } else {
-    GTEST_CHECK_(!IsCloseable())
-        << "Resetting a valid handle to itself is likely a programmer error "
-           "and thus not allowed.";
-  }
-}
-
-bool AutoHandle::IsCloseable() const {
-  // Different Windows APIs may use either of these values to represent an
-  // invalid handle.
-  return handle_ != nullptr && handle_ != INVALID_HANDLE_VALUE;
-}
-
-Mutex::Mutex()
-    : owner_thread_id_(0),
-      type_(kDynamic),
-      critical_section_init_phase_(0),
-      critical_section_(new CRITICAL_SECTION) {
-  ::InitializeCriticalSection(critical_section_);
-}
-
-Mutex::~Mutex() {
-  // Static mutexes are leaked intentionally. It is not thread-safe to try
-  // to clean them up.
-  if (type_ == kDynamic) {
-    ::DeleteCriticalSection(critical_section_);
-    delete critical_section_;
-    critical_section_ = nullptr;
-  }
-}
-
-void Mutex::Lock() {
-  ThreadSafeLazyInit();
-  ::EnterCriticalSection(critical_section_);
-  owner_thread_id_ = ::GetCurrentThreadId();
-}
-
-void Mutex::Unlock() {
-  ThreadSafeLazyInit();
-  // We don't protect writing to owner_thread_id_ here, as it's the
-  // caller's responsibility to ensure that the current thread holds the
-  // mutex when this is called.
-  owner_thread_id_ = 0;
-  ::LeaveCriticalSection(critical_section_);
-}
-
-// Does nothing if the current thread holds the mutex. Otherwise, crashes
-// with high probability.
-void Mutex::AssertHeld() {
-  ThreadSafeLazyInit();
-  GTEST_CHECK_(owner_thread_id_ == ::GetCurrentThreadId())
-      << "The current thread is not holding the mutex @" << this;
-}
-
-namespace {
-
-#ifdef _MSC_VER
-// Use the RAII idiom to flag mem allocs that are intentionally never
-// deallocated. The motivation is to silence the false positive mem leaks
-// that are reported by the debug version of MS's CRT which can only detect
-// if an alloc is missing a matching deallocation.
-// Example:
-//    MemoryIsNotDeallocated memory_is_not_deallocated;
-//    critical_section_ = new CRITICAL_SECTION;
-//
-class MemoryIsNotDeallocated {
- public:
-  MemoryIsNotDeallocated() : old_crtdbg_flag_(0) {
-    old_crtdbg_flag_ = _CrtSetDbgFlag(_CRTDBG_REPORT_FLAG);
-    // Set heap allocation block type to _IGNORE_BLOCK so that MS debug CRT
-    // doesn't report mem leak if there's no matching deallocation.
-    (void)_CrtSetDbgFlag(old_crtdbg_flag_ & ~_CRTDBG_ALLOC_MEM_DF);
-  }
-
-  ~MemoryIsNotDeallocated() {
-    // Restore the original _CRTDBG_ALLOC_MEM_DF flag
-    (void)_CrtSetDbgFlag(old_crtdbg_flag_);
-  }
-
- private:
-  int old_crtdbg_flag_;
-
-  MemoryIsNotDeallocated(const MemoryIsNotDeallocated&) = delete;
-  MemoryIsNotDeallocated& operator=(const MemoryIsNotDeallocated&) = delete;
-};
-#endif  // _MSC_VER
-
-}  // namespace
-
-// Initializes owner_thread_id_ and critical_section_ in static mutexes.
-void Mutex::ThreadSafeLazyInit() {
-  // Dynamic mutexes are initialized in the constructor.
-  if (type_ == kStatic) {
-    switch (
-        ::InterlockedCompareExchange(&critical_section_init_phase_, 1L, 0L)) {
-      case 0:
-        // If critical_section_init_phase_ was 0 before the exchange, we
-        // are the first to test it and need to perform the initialization.
-        owner_thread_id_ = 0;
-        {
-          // Use RAII to flag that following mem alloc is never deallocated.
-#ifdef _MSC_VER
-          MemoryIsNotDeallocated memory_is_not_deallocated;
-#endif  // _MSC_VER
-          critical_section_ = new CRITICAL_SECTION;
-        }
-        ::InitializeCriticalSection(critical_section_);
-        // Updates the critical_section_init_phase_ to 2 to signal
-        // initialization complete.
-        GTEST_CHECK_(::InterlockedCompareExchange(&critical_section_init_phase_,
-                                                  2L, 1L) == 1L);
-        break;
-      case 1:
-        // Somebody else is already initializing the mutex; spin until they
-        // are done.
-        while (::InterlockedCompareExchange(&critical_section_init_phase_, 2L,
-                                            2L) != 2L) {
-          // Possibly yields the rest of the thread's time slice to other
-          // threads.
-          ::Sleep(0);
-        }
-        break;
-
-      case 2:
-        break;  // The mutex is already initialized and ready for use.
-
-      default:
-        GTEST_CHECK_(false)
-            << "Unexpected value of critical_section_init_phase_ "
-            << "while initializing a static mutex.";
-    }
-  }
-}
-
-namespace {
-
-class ThreadWithParamSupport : public ThreadWithParamBase {
- public:
-  static HANDLE CreateThread(Runnable* runnable,
-                             Notification* thread_can_start) {
-    ThreadMainParam* param = new ThreadMainParam(runnable, thread_can_start);
-    DWORD thread_id;
-    HANDLE thread_handle = ::CreateThread(
-        nullptr,  // Default security.
-        0,        // Default stack size.
-        &ThreadWithParamSupport::ThreadMain,
-        param,        // Parameter to ThreadMainStatic
-        0x0,          // Default creation flags.
-        &thread_id);  // Need a valid pointer for the call to work under Win98.
-    GTEST_CHECK_(thread_handle != nullptr)
-        << "CreateThread failed with error " << ::GetLastError() << ".";
-    if (thread_handle == nullptr) {
-      delete param;
-    }
-    return thread_handle;
-  }
-
- private:
-  struct ThreadMainParam {
-    ThreadMainParam(Runnable* runnable, Notification* thread_can_start)
-        : runnable_(runnable), thread_can_start_(thread_can_start) {}
-    std::unique_ptr<Runnable> runnable_;
-    // Does not own.
-    Notification* thread_can_start_;
-  };
-
-  static DWORD WINAPI ThreadMain(void* ptr) {
-    // Transfers ownership.
-    std::unique_ptr<ThreadMainParam> param(static_cast<ThreadMainParam*>(ptr));
-    if (param->thread_can_start_ != nullptr)
-      param->thread_can_start_->WaitForNotification();
-    param->runnable_->Run();
-    return 0;
-  }
-
-  // Prohibit instantiation.
-  ThreadWithParamSupport();
-
-  ThreadWithParamSupport(const ThreadWithParamSupport&) = delete;
-  ThreadWithParamSupport& operator=(const ThreadWithParamSupport&) = delete;
-};
-
-}  // namespace
-
-ThreadWithParamBase::ThreadWithParamBase(Runnable* runnable,
-                                         Notification* thread_can_start)
-    : thread_(
-          ThreadWithParamSupport::CreateThread(runnable, thread_can_start)) {}
-
-ThreadWithParamBase::~ThreadWithParamBase() { Join(); }
-
-void ThreadWithParamBase::Join() {
-  GTEST_CHECK_(::WaitForSingleObject(thread_.Get(), INFINITE) == WAIT_OBJECT_0)
-      << "Failed to join the thread with error " << ::GetLastError() << ".";
-}
-
-// Maps a thread to a set of ThreadIdToThreadLocals that have values
-// instantiated on that thread and notifies them when the thread exits.  A
-// ThreadLocal instance is expected to persist until all threads it has
-// values on have terminated.
-class ThreadLocalRegistryImpl {
- public:
-  // Registers thread_local_instance as having value on the current thread.
-  // Returns a value that can be used to identify the thread from other threads.
-  static ThreadLocalValueHolderBase* GetValueOnCurrentThread(
-      const ThreadLocalBase* thread_local_instance) {
-#ifdef _MSC_VER
-    MemoryIsNotDeallocated memory_is_not_deallocated;
-#endif  // _MSC_VER
-    DWORD current_thread = ::GetCurrentThreadId();
-    MutexLock lock(&mutex_);
-    ThreadIdToThreadLocals* const thread_to_thread_locals =
-        GetThreadLocalsMapLocked();
-    ThreadIdToThreadLocals::iterator thread_local_pos =
-        thread_to_thread_locals->find(current_thread);
-    if (thread_local_pos == thread_to_thread_locals->end()) {
-      thread_local_pos =
-          thread_to_thread_locals
-              ->insert(std::make_pair(current_thread, ThreadLocalValues()))
-              .first;
-      StartWatcherThreadFor(current_thread);
-    }
-    ThreadLocalValues& thread_local_values = thread_local_pos->second;
-    ThreadLocalValues::iterator value_pos =
-        thread_local_values.find(thread_local_instance);
-    if (value_pos == thread_local_values.end()) {
-      value_pos =
-          thread_local_values
-              .insert(std::make_pair(
-                  thread_local_instance,
-                  std::shared_ptr<ThreadLocalValueHolderBase>(
-                      thread_local_instance->NewValueForCurrentThread())))
-              .first;
-    }
-    return value_pos->second.get();
-  }
-
-  static void OnThreadLocalDestroyed(
-      const ThreadLocalBase* thread_local_instance) {
-    std::vector<std::shared_ptr<ThreadLocalValueHolderBase> > value_holders;
-    // Clean up the ThreadLocalValues data structure while holding the lock, but
-    // defer the destruction of the ThreadLocalValueHolderBases.
-    {
-      MutexLock lock(&mutex_);
-      ThreadIdToThreadLocals* const thread_to_thread_locals =
-          GetThreadLocalsMapLocked();
-      for (ThreadIdToThreadLocals::iterator it =
-               thread_to_thread_locals->begin();
-           it != thread_to_thread_locals->end(); ++it) {
-        ThreadLocalValues& thread_local_values = it->second;
-        ThreadLocalValues::iterator value_pos =
-            thread_local_values.find(thread_local_instance);
-        if (value_pos != thread_local_values.end()) {
-          value_holders.push_back(value_pos->second);
-          thread_local_values.erase(value_pos);
-          // This 'if' can only be successful at most once, so theoretically we
-          // could break out of the loop here, but we don't bother doing so.
-        }
-      }
-    }
-    // Outside the lock, let the destructor for 'value_holders' deallocate the
-    // ThreadLocalValueHolderBases.
-  }
-
-  static void OnThreadExit(DWORD thread_id) {
-    GTEST_CHECK_(thread_id != 0) << ::GetLastError();
-    std::vector<std::shared_ptr<ThreadLocalValueHolderBase> > value_holders;
-    // Clean up the ThreadIdToThreadLocals data structure while holding the
-    // lock, but defer the destruction of the ThreadLocalValueHolderBases.
-    {
-      MutexLock lock(&mutex_);
-      ThreadIdToThreadLocals* const thread_to_thread_locals =
-          GetThreadLocalsMapLocked();
-      ThreadIdToThreadLocals::iterator thread_local_pos =
-          thread_to_thread_locals->find(thread_id);
-      if (thread_local_pos != thread_to_thread_locals->end()) {
-        ThreadLocalValues& thread_local_values = thread_local_pos->second;
-        for (ThreadLocalValues::iterator value_pos =
-                 thread_local_values.begin();
-             value_pos != thread_local_values.end(); ++value_pos) {
-          value_holders.push_back(value_pos->second);
-        }
-        thread_to_thread_locals->erase(thread_local_pos);
-      }
-    }
-    // Outside the lock, let the destructor for 'value_holders' deallocate the
-    // ThreadLocalValueHolderBases.
-  }
-
- private:
-  // In a particular thread, maps a ThreadLocal object to its value.
-  typedef std::map<const ThreadLocalBase*,
-                   std::shared_ptr<ThreadLocalValueHolderBase> >
-      ThreadLocalValues;
-  // Stores all ThreadIdToThreadLocals having values in a thread, indexed by
-  // thread's ID.
-  typedef std::map<DWORD, ThreadLocalValues> ThreadIdToThreadLocals;
-
-  // Holds the thread id and thread handle that we pass from
-  // StartWatcherThreadFor to WatcherThreadFunc.
-  typedef std::pair<DWORD, HANDLE> ThreadIdAndHandle;
-
-  static void StartWatcherThreadFor(DWORD thread_id) {
-    // The returned handle will be kept in thread_map and closed by
-    // watcher_thread in WatcherThreadFunc.
-    HANDLE thread =
-        ::OpenThread(SYNCHRONIZE | THREAD_QUERY_INFORMATION, FALSE, thread_id);
-    GTEST_CHECK_(thread != nullptr);
-    // We need to pass a valid thread ID pointer into CreateThread for it
-    // to work correctly under Win98.
-    DWORD watcher_thread_id;
-    HANDLE watcher_thread = ::CreateThread(
-        nullptr,  // Default security.
-        0,        // Default stack size
-        &ThreadLocalRegistryImpl::WatcherThreadFunc,
-        reinterpret_cast<LPVOID>(new ThreadIdAndHandle(thread_id, thread)),
-        CREATE_SUSPENDED, &watcher_thread_id);
-    GTEST_CHECK_(watcher_thread != nullptr)
-        << "CreateThread failed with error " << ::GetLastError() << ".";
-    // Give the watcher thread the same priority as ours to avoid being
-    // blocked by it.
-    ::SetThreadPriority(watcher_thread,
-                        ::GetThreadPriority(::GetCurrentThread()));
-    ::ResumeThread(watcher_thread);
-    ::CloseHandle(watcher_thread);
-  }
-
-  // Monitors exit from a given thread and notifies those
-  // ThreadIdToThreadLocals about thread termination.
-  static DWORD WINAPI WatcherThreadFunc(LPVOID param) {
-    const ThreadIdAndHandle* tah =
-        reinterpret_cast<const ThreadIdAndHandle*>(param);
-    GTEST_CHECK_(::WaitForSingleObject(tah->second, INFINITE) == WAIT_OBJECT_0);
-    OnThreadExit(tah->first);
-    ::CloseHandle(tah->second);
-    delete tah;
-    return 0;
-  }
-
-  // Returns map of thread local instances.
-  static ThreadIdToThreadLocals* GetThreadLocalsMapLocked() {
-    mutex_.AssertHeld();
-#ifdef _MSC_VER
-    MemoryIsNotDeallocated memory_is_not_deallocated;
-#endif  // _MSC_VER
-    static ThreadIdToThreadLocals* map = new ThreadIdToThreadLocals();
-    return map;
-  }
-
-  // Protects access to GetThreadLocalsMapLocked() and its return value.
-  static Mutex mutex_;
-  // Protects access to GetThreadMapLocked() and its return value.
-  static Mutex thread_map_mutex_;
-};
-
-Mutex ThreadLocalRegistryImpl::mutex_(Mutex::kStaticMutex);  // NOLINT
-Mutex ThreadLocalRegistryImpl::thread_map_mutex_(
-    Mutex::kStaticMutex);  // NOLINT
-
-ThreadLocalValueHolderBase* ThreadLocalRegistry::GetValueOnCurrentThread(
-    const ThreadLocalBase* thread_local_instance) {
-  return ThreadLocalRegistryImpl::GetValueOnCurrentThread(
-      thread_local_instance);
-}
-
-void ThreadLocalRegistry::OnThreadLocalDestroyed(
-    const ThreadLocalBase* thread_local_instance) {
-  ThreadLocalRegistryImpl::OnThreadLocalDestroyed(thread_local_instance);
-}
-
-#endif  // GTEST_IS_THREADSAFE && GTEST_OS_WINDOWS
-
-#if GTEST_USES_POSIX_RE
-
-// Implements RE.  Currently only needed for death tests.
-
-RE::~RE() {
-  if (is_valid_) {
-    // regfree'ing an invalid regex might crash because the content
-    // of the regex is undefined. Since the regex's are essentially
-    // the same, one cannot be valid (or invalid) without the other
-    // being so too.
-    regfree(&partial_regex_);
-    regfree(&full_regex_);
-  }
-  free(const_cast<char*>(pattern_));
-}
-
-// Returns true if and only if regular expression re matches the entire str.
-bool RE::FullMatch(const char* str, const RE& re) {
-  if (!re.is_valid_) return false;
-
-  regmatch_t match;
-  return regexec(&re.full_regex_, str, 1, &match, 0) == 0;
-}
-
-// Returns true if and only if regular expression re matches a substring of
-// str (including str itself).
-bool RE::PartialMatch(const char* str, const RE& re) {
-  if (!re.is_valid_) return false;
-
-  regmatch_t match;
-  return regexec(&re.partial_regex_, str, 1, &match, 0) == 0;
-}
-
-// Initializes an RE from its string representation.
-void RE::Init(const char* regex) {
-  pattern_ = posix::StrDup(regex);
-
-  // Reserves enough bytes to hold the regular expression used for a
-  // full match.
-  const size_t full_regex_len = strlen(regex) + 10;
-  char* const full_pattern = new char[full_regex_len];
-
-  snprintf(full_pattern, full_regex_len, "^(%s)$", regex);
-  is_valid_ = regcomp(&full_regex_, full_pattern, REG_EXTENDED) == 0;
-  // We want to call regcomp(&partial_regex_, ...) even if the
-  // previous expression returns false.  Otherwise partial_regex_ may
-  // not be properly initialized can may cause trouble when it's
-  // freed.
-  //
-  // Some implementation of POSIX regex (e.g. on at least some
-  // versions of Cygwin) doesn't accept the empty string as a valid
-  // regex.  We change it to an equivalent form "()" to be safe.
-  if (is_valid_) {
-    const char* const partial_regex = (*regex == '\0') ? "()" : regex;
-    is_valid_ = regcomp(&partial_regex_, partial_regex, REG_EXTENDED) == 0;
-  }
-  EXPECT_TRUE(is_valid_)
-      << "Regular expression \"" << regex
-      << "\" is not a valid POSIX Extended regular expression.";
-
-  delete[] full_pattern;
-}
-
-#elif GTEST_USES_SIMPLE_RE
-
-// Returns true if and only if ch appears anywhere in str (excluding the
-// terminating '\0' character).
-bool IsInSet(char ch, const char* str) {
-  return ch != '\0' && strchr(str, ch) != nullptr;
-}
-
-// Returns true if and only if ch belongs to the given classification.
-// Unlike similar functions in <ctype.h>, these aren't affected by the
-// current locale.
-bool IsAsciiDigit(char ch) { return '0' <= ch && ch <= '9'; }
-bool IsAsciiPunct(char ch) {
-  return IsInSet(ch, "^-!\"#$%&'()*+,./:;<=>?@[\\]_`{|}~");
-}
-bool IsRepeat(char ch) { return IsInSet(ch, "?*+"); }
-bool IsAsciiWhiteSpace(char ch) { return IsInSet(ch, " \f\n\r\t\v"); }
-bool IsAsciiWordChar(char ch) {
-  return ('a' <= ch && ch <= 'z') || ('A' <= ch && ch <= 'Z') ||
-         ('0' <= ch && ch <= '9') || ch == '_';
-}
-
-// Returns true if and only if "\\c" is a supported escape sequence.
-bool IsValidEscape(char c) {
-  return (IsAsciiPunct(c) || IsInSet(c, "dDfnrsStvwW"));
-}
-
-// Returns true if and only if the given atom (specified by escaped and
-// pattern) matches ch.  The result is undefined if the atom is invalid.
-bool AtomMatchesChar(bool escaped, char pattern_char, char ch) {
-  if (escaped) {  // "\\p" where p is pattern_char.
-    switch (pattern_char) {
-      case 'd':
-        return IsAsciiDigit(ch);
-      case 'D':
-        return !IsAsciiDigit(ch);
-      case 'f':
-        return ch == '\f';
-      case 'n':
-        return ch == '\n';
-      case 'r':
-        return ch == '\r';
-      case 's':
-        return IsAsciiWhiteSpace(ch);
-      case 'S':
-        return !IsAsciiWhiteSpace(ch);
-      case 't':
-        return ch == '\t';
-      case 'v':
-        return ch == '\v';
-      case 'w':
-        return IsAsciiWordChar(ch);
-      case 'W':
-        return !IsAsciiWordChar(ch);
-    }
-    return IsAsciiPunct(pattern_char) && pattern_char == ch;
-  }
-
-  return (pattern_char == '.' && ch != '\n') || pattern_char == ch;
-}
-
-// Helper function used by ValidateRegex() to format error messages.
-static std::string FormatRegexSyntaxError(const char* regex, int index) {
-  return (Message() << "Syntax error at index " << index
-                    << " in simple regular expression \"" << regex << "\": ")
-      .GetString();
-}
-
-// Generates non-fatal failures and returns false if regex is invalid;
-// otherwise returns true.
-bool ValidateRegex(const char* regex) {
-  if (regex == nullptr) {
-    ADD_FAILURE() << "NULL is not a valid simple regular expression.";
-    return false;
-  }
-
-  bool is_valid = true;
-
-  // True if and only if ?, *, or + can follow the previous atom.
-  bool prev_repeatable = false;
-  for (int i = 0; regex[i]; i++) {
-    if (regex[i] == '\\') {  // An escape sequence
-      i++;
-      if (regex[i] == '\0') {
-        ADD_FAILURE() << FormatRegexSyntaxError(regex, i - 1)
-                      << "'\\' cannot appear at the end.";
-        return false;
-      }
-
-      if (!IsValidEscape(regex[i])) {
-        ADD_FAILURE() << FormatRegexSyntaxError(regex, i - 1)
-                      << "invalid escape sequence \"\\" << regex[i] << "\".";
-        is_valid = false;
-      }
-      prev_repeatable = true;
-    } else {  // Not an escape sequence.
-      const char ch = regex[i];
-
-      if (ch == '^' && i > 0) {
-        ADD_FAILURE() << FormatRegexSyntaxError(regex, i)
-                      << "'^' can only appear at the beginning.";
-        is_valid = false;
-      } else if (ch == '$' && regex[i + 1] != '\0') {
-        ADD_FAILURE() << FormatRegexSyntaxError(regex, i)
-                      << "'$' can only appear at the end.";
-        is_valid = false;
-      } else if (IsInSet(ch, "()[]{}|")) {
-        ADD_FAILURE() << FormatRegexSyntaxError(regex, i) << "'" << ch
-                      << "' is unsupported.";
-        is_valid = false;
-      } else if (IsRepeat(ch) && !prev_repeatable) {
-        ADD_FAILURE() << FormatRegexSyntaxError(regex, i) << "'" << ch
-                      << "' can only follow a repeatable token.";
-        is_valid = false;
-      }
-
-      prev_repeatable = !IsInSet(ch, "^$?*+");
-    }
-  }
-
-  return is_valid;
-}
-
-// Matches a repeated regex atom followed by a valid simple regular
-// expression.  The regex atom is defined as c if escaped is false,
-// or \c otherwise.  repeat is the repetition meta character (?, *,
-// or +).  The behavior is undefined if str contains too many
-// characters to be indexable by size_t, in which case the test will
-// probably time out anyway.  We are fine with this limitation as
-// std::string has it too.
-bool MatchRepetitionAndRegexAtHead(bool escaped, char c, char repeat,
-                                   const char* regex, const char* str) {
-  const size_t min_count = (repeat == '+') ? 1 : 0;
-  const size_t max_count = (repeat == '?') ? 1 : static_cast<size_t>(-1) - 1;
-  // We cannot call numeric_limits::max() as it conflicts with the
-  // max() macro on Windows.
-
-  for (size_t i = 0; i <= max_count; ++i) {
-    // We know that the atom matches each of the first i characters in str.
-    if (i >= min_count && MatchRegexAtHead(regex, str + i)) {
-      // We have enough matches at the head, and the tail matches too.
-      // Since we only care about *whether* the pattern matches str
-      // (as opposed to *how* it matches), there is no need to find a
-      // greedy match.
-      return true;
-    }
-    if (str[i] == '\0' || !AtomMatchesChar(escaped, c, str[i])) return false;
-  }
-  return false;
-}
-
-// Returns true if and only if regex matches a prefix of str. regex must
-// be a valid simple regular expression and not start with "^", or the
-// result is undefined.
-bool MatchRegexAtHead(const char* regex, const char* str) {
-  if (*regex == '\0')  // An empty regex matches a prefix of anything.
-    return true;
-
-  // "$" only matches the end of a string.  Note that regex being
-  // valid guarantees that there's nothing after "$" in it.
-  if (*regex == '$') return *str == '\0';
-
-  // Is the first thing in regex an escape sequence?
-  const bool escaped = *regex == '\\';
-  if (escaped) ++regex;
-  if (IsRepeat(regex[1])) {
-    // MatchRepetitionAndRegexAtHead() calls MatchRegexAtHead(), so
-    // here's an indirect recursion.  It terminates as the regex gets
-    // shorter in each recursion.
-    return MatchRepetitionAndRegexAtHead(escaped, regex[0], regex[1], regex + 2,
-                                         str);
-  } else {
-    // regex isn't empty, isn't "$", and doesn't start with a
-    // repetition.  We match the first atom of regex with the first
-    // character of str and recurse.
-    return (*str != '\0') && AtomMatchesChar(escaped, *regex, *str) &&
-           MatchRegexAtHead(regex + 1, str + 1);
-  }
-}
-
-// Returns true if and only if regex matches any substring of str.  regex must
-// be a valid simple regular expression, or the result is undefined.
-//
-// The algorithm is recursive, but the recursion depth doesn't exceed
-// the regex length, so we won't need to worry about running out of
-// stack space normally.  In rare cases the time complexity can be
-// exponential with respect to the regex length + the string length,
-// but usually it's must faster (often close to linear).
-bool MatchRegexAnywhere(const char* regex, const char* str) {
-  if (regex == nullptr || str == nullptr) return false;
-
-  if (*regex == '^') return MatchRegexAtHead(regex + 1, str);
-
-  // A successful match can be anywhere in str.
-  do {
-    if (MatchRegexAtHead(regex, str)) return true;
-  } while (*str++ != '\0');
-  return false;
-}
-
-// Implements the RE class.
-
-RE::~RE() {
-  free(const_cast<char*>(pattern_));
-  free(const_cast<char*>(full_pattern_));
-}
-
-// Returns true if and only if regular expression re matches the entire str.
-bool RE::FullMatch(const char* str, const RE& re) {
-  return re.is_valid_ && MatchRegexAnywhere(re.full_pattern_, str);
-}
-
-// Returns true if and only if regular expression re matches a substring of
-// str (including str itself).
-bool RE::PartialMatch(const char* str, const RE& re) {
-  return re.is_valid_ && MatchRegexAnywhere(re.pattern_, str);
-}
-
-// Initializes an RE from its string representation.
-void RE::Init(const char* regex) {
-  pattern_ = full_pattern_ = nullptr;
-  if (regex != nullptr) {
-    pattern_ = posix::StrDup(regex);
-  }
-
-  is_valid_ = ValidateRegex(regex);
-  if (!is_valid_) {
-    // No need to calculate the full pattern when the regex is invalid.
-    return;
-  }
-
-  const size_t len = strlen(regex);
-  // Reserves enough bytes to hold the regular expression used for a
-  // full match: we need space to prepend a '^', append a '$', and
-  // terminate the string with '\0'.
-  char* buffer = static_cast<char*>(malloc(len + 3));
-  full_pattern_ = buffer;
-
-  if (*regex != '^')
-    *buffer++ = '^';  // Makes sure full_pattern_ starts with '^'.
-
-  // We don't use snprintf or strncpy, as they trigger a warning when
-  // compiled with VC++ 8.0.
-  memcpy(buffer, regex, len);
-  buffer += len;
-
-  if (len == 0 || regex[len - 1] != '$')
-    *buffer++ = '$';  // Makes sure full_pattern_ ends with '$'.
-
-  *buffer = '\0';
-}
-
-#endif  // GTEST_USES_POSIX_RE
-
-const char kUnknownFile[] = "unknown file";
-
-// Formats a source file path and a line number as they would appear
-// in an error message from the compiler used to compile this code.
-GTEST_API_ ::std::string FormatFileLocation(const char* file, int line) {
-  const std::string file_name(file == nullptr ? kUnknownFile : file);
-
-  if (line < 0) {
-    return file_name + ":";
-  }
-#ifdef _MSC_VER
-  return file_name + "(" + StreamableToString(line) + "):";
-#else
-  return file_name + ":" + StreamableToString(line) + ":";
-#endif  // _MSC_VER
-}
-
-// Formats a file location for compiler-independent XML output.
-// Although this function is not platform dependent, we put it next to
-// FormatFileLocation in order to contrast the two functions.
-// Note that FormatCompilerIndependentFileLocation() does NOT append colon
-// to the file location it produces, unlike FormatFileLocation().
-GTEST_API_ ::std::string FormatCompilerIndependentFileLocation(const char* file,
-                                                               int line) {
-  const std::string file_name(file == nullptr ? kUnknownFile : file);
-
-  if (line < 0)
-    return file_name;
-  else
-    return file_name + ":" + StreamableToString(line);
-}
-
-GTestLog::GTestLog(GTestLogSeverity severity, const char* file, int line)
-    : severity_(severity) {
-  const char* const marker = severity == GTEST_INFO      ? "[  INFO ]"
-                             : severity == GTEST_WARNING ? "[WARNING]"
-                             : severity == GTEST_ERROR   ? "[ ERROR ]"
-                                                         : "[ FATAL ]";
-  GetStream() << ::std::endl
-              << marker << " " << FormatFileLocation(file, line).c_str()
-              << ": ";
-}
-
-// Flushes the buffers and, if severity is GTEST_FATAL, aborts the program.
-GTestLog::~GTestLog() {
-  GetStream() << ::std::endl;
-  if (severity_ == GTEST_FATAL) {
-    fflush(stderr);
-    posix::Abort();
-  }
-}
-
-// Disable Microsoft deprecation warnings for POSIX functions called from
-// this class (creat, dup, dup2, and close)
-GTEST_DISABLE_MSC_DEPRECATED_PUSH_()
-
-#if GTEST_HAS_STREAM_REDIRECTION
-
-// Object that captures an output stream (stdout/stderr).
-class CapturedStream {
- public:
-  // The ctor redirects the stream to a temporary file.
-  explicit CapturedStream(int fd) : fd_(fd), uncaptured_fd_(dup(fd)) {
-#if GTEST_OS_WINDOWS
-    char temp_dir_path[MAX_PATH + 1] = {'\0'};   // NOLINT
-    char temp_file_path[MAX_PATH + 1] = {'\0'};  // NOLINT
-
-    ::GetTempPathA(sizeof(temp_dir_path), temp_dir_path);
-    const UINT success = ::GetTempFileNameA(temp_dir_path, "gtest_redir",
-                                            0,  // Generate unique file name.
-                                            temp_file_path);
-    GTEST_CHECK_(success != 0)
-        << "Unable to create a temporary file in " << temp_dir_path;
-    const int captured_fd = creat(temp_file_path, _S_IREAD | _S_IWRITE);
-    GTEST_CHECK_(captured_fd != -1)
-        << "Unable to open temporary file " << temp_file_path;
-    filename_ = temp_file_path;
-#else
-    // There's no guarantee that a test has write access to the current
-    // directory, so we create the temporary file in a temporary directory.
-    std::string name_template;
-
-#if GTEST_OS_LINUX_ANDROID
-    // Note: Android applications are expected to call the framework's
-    // Context.getExternalStorageDirectory() method through JNI to get
-    // the location of the world-writable SD Card directory. However,
-    // this requires a Context handle, which cannot be retrieved
-    // globally from native code. Doing so also precludes running the
-    // code as part of a regular standalone executable, which doesn't
-    // run in a Dalvik process (e.g. when running it through 'adb shell').
-    //
-    // The location /data/local/tmp is directly accessible from native code.
-    // '/sdcard' and other variants cannot be relied on, as they are not
-    // guaranteed to be mounted, or may have a delay in mounting.
-    name_template = "/data/local/tmp/";
-#elif GTEST_OS_IOS
-    char user_temp_dir[PATH_MAX + 1];
-
-    // Documented alternative to NSTemporaryDirectory() (for obtaining creating
-    // a temporary directory) at
-    // https://developer.apple.com/library/archive/documentation/Security/Conceptual/SecureCodingGuide/Articles/RaceConditions.html#//apple_ref/doc/uid/TP40002585-SW10
-    //
-    // _CS_DARWIN_USER_TEMP_DIR (as well as _CS_DARWIN_USER_CACHE_DIR) is not
-    // documented in the confstr() man page at
-    // https://developer.apple.com/library/archive/documentation/System/Conceptual/ManPages_iPhoneOS/man3/confstr.3.html#//apple_ref/doc/man/3/confstr
-    // but are still available, according to the WebKit patches at
-    // https://trac.webkit.org/changeset/262004/webkit
-    // https://trac.webkit.org/changeset/263705/webkit
-    //
-    // The confstr() implementation falls back to getenv("TMPDIR"). See
-    // https://opensource.apple.com/source/Libc/Libc-1439.100.3/gen/confstr.c.auto.html
-    ::confstr(_CS_DARWIN_USER_TEMP_DIR, user_temp_dir, sizeof(user_temp_dir));
-
-    name_template = user_temp_dir;
-    if (name_template.back() != GTEST_PATH_SEP_[0])
-      name_template.push_back(GTEST_PATH_SEP_[0]);
-#else
-    name_template = "/tmp/";
-#endif
-    name_template.append("gtest_captured_stream.XXXXXX");
-
-    // mkstemp() modifies the string bytes in place, and does not go beyond the
-    // string's length. This results in well-defined behavior in C++17.
-    //
-    // The const_cast is needed below C++17. The constraints on std::string
-    // implementations in C++11 and above make assumption behind the const_cast
-    // fairly safe.
-    const int captured_fd = ::mkstemp(const_cast<char*>(name_template.data()));
-    if (captured_fd == -1) {
-      GTEST_LOG_(WARNING)
-          << "Failed to create tmp file " << name_template
-          << " for test; does the test have access to the /tmp directory?";
-    }
-    filename_ = std::move(name_template);
-#endif  // GTEST_OS_WINDOWS
-    fflush(nullptr);
-    dup2(captured_fd, fd_);
-    close(captured_fd);
-  }
-
-  ~CapturedStream() { remove(filename_.c_str()); }
-
-  std::string GetCapturedString() {
-    if (uncaptured_fd_ != -1) {
-      // Restores the original stream.
-      fflush(nullptr);
-      dup2(uncaptured_fd_, fd_);
-      close(uncaptured_fd_);
-      uncaptured_fd_ = -1;
-    }
-
-    FILE* const file = posix::FOpen(filename_.c_str(), "r");
-    if (file == nullptr) {
-      GTEST_LOG_(FATAL) << "Failed to open tmp file " << filename_
-                        << " for capturing stream.";
-    }
-    const std::string content = ReadEntireFile(file);
-    posix::FClose(file);
-    return content;
-  }
-
- private:
-  const int fd_;  // A stream to capture.
-  int uncaptured_fd_;
-  // Name of the temporary file holding the stderr output.
-  ::std::string filename_;
-
-  CapturedStream(const CapturedStream&) = delete;
-  CapturedStream& operator=(const CapturedStream&) = delete;
-};
-
-GTEST_DISABLE_MSC_DEPRECATED_POP_()
-
-static CapturedStream* g_captured_stderr = nullptr;
-static CapturedStream* g_captured_stdout = nullptr;
-
-// Starts capturing an output stream (stdout/stderr).
-static void CaptureStream(int fd, const char* stream_name,
-                          CapturedStream** stream) {
-  if (*stream != nullptr) {
-    GTEST_LOG_(FATAL) << "Only one " << stream_name
-                      << " capturer can exist at a time.";
-  }
-  *stream = new CapturedStream(fd);
-}
-
-// Stops capturing the output stream and returns the captured string.
-static std::string GetCapturedStream(CapturedStream** captured_stream) {
-  const std::string content = (*captured_stream)->GetCapturedString();
-
-  delete *captured_stream;
-  *captured_stream = nullptr;
-
-  return content;
-}
-
-#if defined(_MSC_VER) || defined(__BORLANDC__)
-// MSVC and C++Builder do not provide a definition of STDERR_FILENO.
-const int kStdOutFileno = 1;
-const int kStdErrFileno = 2;
-#else
-const int kStdOutFileno = STDOUT_FILENO;
-const int kStdErrFileno = STDERR_FILENO;
-#endif  // defined(_MSC_VER) || defined(__BORLANDC__)
-
-// Starts capturing stdout.
-void CaptureStdout() {
-  CaptureStream(kStdOutFileno, "stdout", &g_captured_stdout);
-}
-
-// Starts capturing stderr.
-void CaptureStderr() {
-  CaptureStream(kStdErrFileno, "stderr", &g_captured_stderr);
-}
-
-// Stops capturing stdout and returns the captured string.
-std::string GetCapturedStdout() {
-  return GetCapturedStream(&g_captured_stdout);
-}
-
-// Stops capturing stderr and returns the captured string.
-std::string GetCapturedStderr() {
-  return GetCapturedStream(&g_captured_stderr);
-}
-
-#endif  // GTEST_HAS_STREAM_REDIRECTION
-
-size_t GetFileSize(FILE* file) {
-  fseek(file, 0, SEEK_END);
-  return static_cast<size_t>(ftell(file));
-}
-
-std::string ReadEntireFile(FILE* file) {
-  const size_t file_size = GetFileSize(file);
-  char* const buffer = new char[file_size];
-
-  size_t bytes_last_read = 0;  // # of bytes read in the last fread()
-  size_t bytes_read = 0;       // # of bytes read so far
-
-  fseek(file, 0, SEEK_SET);
-
-  // Keeps reading the file until we cannot read further or the
-  // pre-determined file size is reached.
-  do {
-    bytes_last_read =
-        fread(buffer + bytes_read, 1, file_size - bytes_read, file);
-    bytes_read += bytes_last_read;
-  } while (bytes_last_read > 0 && bytes_read < file_size);
-
-  const std::string content(buffer, bytes_read);
-  delete[] buffer;
-
-  return content;
-}
-
-#if GTEST_HAS_DEATH_TEST
-static const std::vector<std::string>* g_injected_test_argvs =
-    nullptr;  // Owned.
-
-std::vector<std::string> GetInjectableArgvs() {
-  if (g_injected_test_argvs != nullptr) {
-    return *g_injected_test_argvs;
-  }
-  return GetArgvs();
-}
-
-void SetInjectableArgvs(const std::vector<std::string>* new_argvs) {
-  if (g_injected_test_argvs != new_argvs) delete g_injected_test_argvs;
-  g_injected_test_argvs = new_argvs;
-}
-
-void SetInjectableArgvs(const std::vector<std::string>& new_argvs) {
-  SetInjectableArgvs(
-      new std::vector<std::string>(new_argvs.begin(), new_argvs.end()));
-}
-
-void ClearInjectableArgvs() {
-  delete g_injected_test_argvs;
-  g_injected_test_argvs = nullptr;
-}
-#endif  // GTEST_HAS_DEATH_TEST
-
-#if GTEST_OS_WINDOWS_MOBILE
-namespace posix {
-void Abort() {
-  DebugBreak();
-  TerminateProcess(GetCurrentProcess(), 1);
-}
-}  // namespace posix
-#endif  // GTEST_OS_WINDOWS_MOBILE
-
-// Returns the name of the environment variable corresponding to the
-// given flag.  For example, FlagToEnvVar("foo") will return
-// "GTEST_FOO" in the open-source version.
-static std::string FlagToEnvVar(const char* flag) {
-  const std::string full_flag =
-      (Message() << GTEST_FLAG_PREFIX_ << flag).GetString();
-
-  Message env_var;
-  for (size_t i = 0; i != full_flag.length(); i++) {
-    env_var << ToUpper(full_flag.c_str()[i]);
-  }
-
-  return env_var.GetString();
-}
-
-// Parses 'str' for a 32-bit signed integer.  If successful, writes
-// the result to *value and returns true; otherwise leaves *value
-// unchanged and returns false.
-bool ParseInt32(const Message& src_text, const char* str, int32_t* value) {
-  // Parses the environment variable as a decimal integer.
-  char* end = nullptr;
-  const long long_value = strtol(str, &end, 10);  // NOLINT
-
-  // Has strtol() consumed all characters in the string?
-  if (*end != '\0') {
-    // No - an invalid character was encountered.
-    Message msg;
-    msg << "WARNING: " << src_text
-        << " is expected to be a 32-bit integer, but actually"
-        << " has value \"" << str << "\".\n";
-    printf("%s", msg.GetString().c_str());
-    fflush(stdout);
-    return false;
-  }
-
-  // Is the parsed value in the range of an int32_t?
-  const auto result = static_cast<int32_t>(long_value);
-  if (long_value == LONG_MAX || long_value == LONG_MIN ||
-      // The parsed value overflows as a long.  (strtol() returns
-      // LONG_MAX or LONG_MIN when the input overflows.)
-      result != long_value
-      // The parsed value overflows as an int32_t.
-  ) {
-    Message msg;
-    msg << "WARNING: " << src_text
-        << " is expected to be a 32-bit integer, but actually"
-        << " has value " << str << ", which overflows.\n";
-    printf("%s", msg.GetString().c_str());
-    fflush(stdout);
-    return false;
-  }
-
-  *value = result;
-  return true;
-}
-
-// Reads and returns the Boolean environment variable corresponding to
-// the given flag; if it's not set, returns default_value.
-//
-// The value is considered true if and only if it's not "0".
-bool BoolFromGTestEnv(const char* flag, bool default_value) {
-#if defined(GTEST_GET_BOOL_FROM_ENV_)
-  return GTEST_GET_BOOL_FROM_ENV_(flag, default_value);
-#else
-  const std::string env_var = FlagToEnvVar(flag);
-  const char* const string_value = posix::GetEnv(env_var.c_str());
-  return string_value == nullptr ? default_value
-                                 : strcmp(string_value, "0") != 0;
-#endif  // defined(GTEST_GET_BOOL_FROM_ENV_)
-}
-
-// Reads and returns a 32-bit integer stored in the environment
-// variable corresponding to the given flag; if it isn't set or
-// doesn't represent a valid 32-bit integer, returns default_value.
-int32_t Int32FromGTestEnv(const char* flag, int32_t default_value) {
-#if defined(GTEST_GET_INT32_FROM_ENV_)
-  return GTEST_GET_INT32_FROM_ENV_(flag, default_value);
-#else
-  const std::string env_var = FlagToEnvVar(flag);
-  const char* const string_value = posix::GetEnv(env_var.c_str());
-  if (string_value == nullptr) {
-    // The environment variable is not set.
-    return default_value;
-  }
-
-  int32_t result = default_value;
-  if (!ParseInt32(Message() << "Environment variable " << env_var, string_value,
-                  &result)) {
-    printf("The default value %s is used.\n",
-           (Message() << default_value).GetString().c_str());
-    fflush(stdout);
-    return default_value;
-  }
-
-  return result;
-#endif  // defined(GTEST_GET_INT32_FROM_ENV_)
-}
-
-// As a special case for the 'output' flag, if GTEST_OUTPUT is not
-// set, we look for XML_OUTPUT_FILE, which is set by the Bazel build
-// system.  The value of XML_OUTPUT_FILE is a filename without the
-// "xml:" prefix of GTEST_OUTPUT.
-// Note that this is meant to be called at the call site so it does
-// not check that the flag is 'output'
-// In essence this checks an env variable called XML_OUTPUT_FILE
-// and if it is set we prepend "xml:" to its value, if it not set we return ""
-std::string OutputFlagAlsoCheckEnvVar() {
-  std::string default_value_for_output_flag = "";
-  const char* xml_output_file_env = posix::GetEnv("XML_OUTPUT_FILE");
-  if (nullptr != xml_output_file_env) {
-    default_value_for_output_flag = std::string("xml:") + xml_output_file_env;
-  }
-  return default_value_for_output_flag;
-}
-
-// Reads and returns the string environment variable corresponding to
-// the given flag; if it's not set, returns default_value.
-const char* StringFromGTestEnv(const char* flag, const char* default_value) {
-#if defined(GTEST_GET_STRING_FROM_ENV_)
-  return GTEST_GET_STRING_FROM_ENV_(flag, default_value);
-#else
-  const std::string env_var = FlagToEnvVar(flag);
-  const char* const value = posix::GetEnv(env_var.c_str());
-  return value == nullptr ? default_value : value;
-#endif  // defined(GTEST_GET_STRING_FROM_ENV_)
-}
-
-}  // namespace internal
-}  // namespace testing
diff --git a/3rdparty/googletest-1.13.0/googletest/src/gtest-printers.cc b/3rdparty/googletest-1.13.0/googletest/src/gtest-printers.cc
deleted file mode 100644
index d475ad36f5eccf07a245cd60460c6207b0769863..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/src/gtest-printers.cc
+++ /dev/null
@@ -1,553 +0,0 @@
-// Copyright 2007, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// Google Test - The Google C++ Testing and Mocking Framework
-//
-// This file implements a universal value printer that can print a
-// value of any type T:
-//
-//   void ::testing::internal::UniversalPrinter<T>::Print(value, ostream_ptr);
-//
-// It uses the << operator when possible, and prints the bytes in the
-// object otherwise.  A user can override its behavior for a class
-// type Foo by defining either operator<<(::std::ostream&, const Foo&)
-// or void PrintTo(const Foo&, ::std::ostream*) in the namespace that
-// defines Foo.
-
-#include "gtest/gtest-printers.h"
-
-#include <stdio.h>
-
-#include <cctype>
-#include <cstdint>
-#include <cwchar>
-#include <ostream>  // NOLINT
-#include <string>
-#include <type_traits>
-
-#include "gtest/internal/gtest-port.h"
-#include "src/gtest-internal-inl.h"
-
-namespace testing {
-
-namespace {
-
-using ::std::ostream;
-
-// Prints a segment of bytes in the given object.
-GTEST_ATTRIBUTE_NO_SANITIZE_MEMORY_
-GTEST_ATTRIBUTE_NO_SANITIZE_ADDRESS_
-GTEST_ATTRIBUTE_NO_SANITIZE_HWADDRESS_
-GTEST_ATTRIBUTE_NO_SANITIZE_THREAD_
-void PrintByteSegmentInObjectTo(const unsigned char* obj_bytes, size_t start,
-                                size_t count, ostream* os) {
-  char text[5] = "";
-  for (size_t i = 0; i != count; i++) {
-    const size_t j = start + i;
-    if (i != 0) {
-      // Organizes the bytes into groups of 2 for easy parsing by
-      // human.
-      if ((j % 2) == 0)
-        *os << ' ';
-      else
-        *os << '-';
-    }
-    GTEST_SNPRINTF_(text, sizeof(text), "%02X", obj_bytes[j]);
-    *os << text;
-  }
-}
-
-// Prints the bytes in the given value to the given ostream.
-void PrintBytesInObjectToImpl(const unsigned char* obj_bytes, size_t count,
-                              ostream* os) {
-  // Tells the user how big the object is.
-  *os << count << "-byte object <";
-
-  const size_t kThreshold = 132;
-  const size_t kChunkSize = 64;
-  // If the object size is bigger than kThreshold, we'll have to omit
-  // some details by printing only the first and the last kChunkSize
-  // bytes.
-  if (count < kThreshold) {
-    PrintByteSegmentInObjectTo(obj_bytes, 0, count, os);
-  } else {
-    PrintByteSegmentInObjectTo(obj_bytes, 0, kChunkSize, os);
-    *os << " ... ";
-    // Rounds up to 2-byte boundary.
-    const size_t resume_pos = (count - kChunkSize + 1) / 2 * 2;
-    PrintByteSegmentInObjectTo(obj_bytes, resume_pos, count - resume_pos, os);
-  }
-  *os << ">";
-}
-
-// Helpers for widening a character to char32_t. Since the standard does not
-// specify if char / wchar_t is signed or unsigned, it is important to first
-// convert it to the unsigned type of the same width before widening it to
-// char32_t.
-template <typename CharType>
-char32_t ToChar32(CharType in) {
-  return static_cast<char32_t>(
-      static_cast<typename std::make_unsigned<CharType>::type>(in));
-}
-
-}  // namespace
-
-namespace internal {
-
-// Delegates to PrintBytesInObjectToImpl() to print the bytes in the
-// given object.  The delegation simplifies the implementation, which
-// uses the << operator and thus is easier done outside of the
-// ::testing::internal namespace, which contains a << operator that
-// sometimes conflicts with the one in STL.
-void PrintBytesInObjectTo(const unsigned char* obj_bytes, size_t count,
-                          ostream* os) {
-  PrintBytesInObjectToImpl(obj_bytes, count, os);
-}
-
-// Depending on the value of a char (or wchar_t), we print it in one
-// of three formats:
-//   - as is if it's a printable ASCII (e.g. 'a', '2', ' '),
-//   - as a hexadecimal escape sequence (e.g. '\x7F'), or
-//   - as a special escape sequence (e.g. '\r', '\n').
-enum CharFormat { kAsIs, kHexEscape, kSpecialEscape };
-
-// Returns true if c is a printable ASCII character.  We test the
-// value of c directly instead of calling isprint(), which is buggy on
-// Windows Mobile.
-inline bool IsPrintableAscii(char32_t c) { return 0x20 <= c && c <= 0x7E; }
-
-// Prints c (of type char, char8_t, char16_t, char32_t, or wchar_t) as a
-// character literal without the quotes, escaping it when necessary; returns how
-// c was formatted.
-template <typename Char>
-static CharFormat PrintAsCharLiteralTo(Char c, ostream* os) {
-  const char32_t u_c = ToChar32(c);
-  switch (u_c) {
-    case L'\0':
-      *os << "\\0";
-      break;
-    case L'\'':
-      *os << "\\'";
-      break;
-    case L'\\':
-      *os << "\\\\";
-      break;
-    case L'\a':
-      *os << "\\a";
-      break;
-    case L'\b':
-      *os << "\\b";
-      break;
-    case L'\f':
-      *os << "\\f";
-      break;
-    case L'\n':
-      *os << "\\n";
-      break;
-    case L'\r':
-      *os << "\\r";
-      break;
-    case L'\t':
-      *os << "\\t";
-      break;
-    case L'\v':
-      *os << "\\v";
-      break;
-    default:
-      if (IsPrintableAscii(u_c)) {
-        *os << static_cast<char>(c);
-        return kAsIs;
-      } else {
-        ostream::fmtflags flags = os->flags();
-        *os << "\\x" << std::hex << std::uppercase << static_cast<int>(u_c);
-        os->flags(flags);
-        return kHexEscape;
-      }
-  }
-  return kSpecialEscape;
-}
-
-// Prints a char32_t c as if it's part of a string literal, escaping it when
-// necessary; returns how c was formatted.
-static CharFormat PrintAsStringLiteralTo(char32_t c, ostream* os) {
-  switch (c) {
-    case L'\'':
-      *os << "'";
-      return kAsIs;
-    case L'"':
-      *os << "\\\"";
-      return kSpecialEscape;
-    default:
-      return PrintAsCharLiteralTo(c, os);
-  }
-}
-
-static const char* GetCharWidthPrefix(char) { return ""; }
-
-static const char* GetCharWidthPrefix(signed char) { return ""; }
-
-static const char* GetCharWidthPrefix(unsigned char) { return ""; }
-
-#ifdef __cpp_char8_t
-static const char* GetCharWidthPrefix(char8_t) { return "u8"; }
-#endif
-
-static const char* GetCharWidthPrefix(char16_t) { return "u"; }
-
-static const char* GetCharWidthPrefix(char32_t) { return "U"; }
-
-static const char* GetCharWidthPrefix(wchar_t) { return "L"; }
-
-// Prints a char c as if it's part of a string literal, escaping it when
-// necessary; returns how c was formatted.
-static CharFormat PrintAsStringLiteralTo(char c, ostream* os) {
-  return PrintAsStringLiteralTo(ToChar32(c), os);
-}
-
-#ifdef __cpp_char8_t
-static CharFormat PrintAsStringLiteralTo(char8_t c, ostream* os) {
-  return PrintAsStringLiteralTo(ToChar32(c), os);
-}
-#endif
-
-static CharFormat PrintAsStringLiteralTo(char16_t c, ostream* os) {
-  return PrintAsStringLiteralTo(ToChar32(c), os);
-}
-
-static CharFormat PrintAsStringLiteralTo(wchar_t c, ostream* os) {
-  return PrintAsStringLiteralTo(ToChar32(c), os);
-}
-
-// Prints a character c (of type char, char8_t, char16_t, char32_t, or wchar_t)
-// and its code. '\0' is printed as "'\\0'", other unprintable characters are
-// also properly escaped using the standard C++ escape sequence.
-template <typename Char>
-void PrintCharAndCodeTo(Char c, ostream* os) {
-  // First, print c as a literal in the most readable form we can find.
-  *os << GetCharWidthPrefix(c) << "'";
-  const CharFormat format = PrintAsCharLiteralTo(c, os);
-  *os << "'";
-
-  // To aid user debugging, we also print c's code in decimal, unless
-  // it's 0 (in which case c was printed as '\\0', making the code
-  // obvious).
-  if (c == 0) return;
-  *os << " (" << static_cast<int>(c);
-
-  // For more convenience, we print c's code again in hexadecimal,
-  // unless c was already printed in the form '\x##' or the code is in
-  // [1, 9].
-  if (format == kHexEscape || (1 <= c && c <= 9)) {
-    // Do nothing.
-  } else {
-    *os << ", 0x" << String::FormatHexInt(static_cast<int>(c));
-  }
-  *os << ")";
-}
-
-void PrintTo(unsigned char c, ::std::ostream* os) { PrintCharAndCodeTo(c, os); }
-void PrintTo(signed char c, ::std::ostream* os) { PrintCharAndCodeTo(c, os); }
-
-// Prints a wchar_t as a symbol if it is printable or as its internal
-// code otherwise and also as its code.  L'\0' is printed as "L'\\0'".
-void PrintTo(wchar_t wc, ostream* os) { PrintCharAndCodeTo(wc, os); }
-
-// TODO(dcheng): Consider making this delegate to PrintCharAndCodeTo() as well.
-void PrintTo(char32_t c, ::std::ostream* os) {
-  *os << std::hex << "U+" << std::uppercase << std::setfill('0') << std::setw(4)
-      << static_cast<uint32_t>(c);
-}
-
-// gcc/clang __{u,}int128_t
-#if defined(__SIZEOF_INT128__)
-void PrintTo(__uint128_t v, ::std::ostream* os) {
-  if (v == 0) {
-    *os << "0";
-    return;
-  }
-
-  // Buffer large enough for ceil(log10(2^128))==39 and the null terminator
-  char buf[40];
-  char* p = buf + sizeof(buf);
-
-  // Some configurations have a __uint128_t, but no support for built in
-  // division. Do manual long division instead.
-
-  uint64_t high = static_cast<uint64_t>(v >> 64);
-  uint64_t low = static_cast<uint64_t>(v);
-
-  *--p = 0;
-  while (high != 0 || low != 0) {
-    uint64_t high_mod = high % 10;
-    high = high / 10;
-    // This is the long division algorithm specialized for a divisor of 10 and
-    // only two elements.
-    // Notable values:
-    //   2^64 / 10 == 1844674407370955161
-    //   2^64 % 10 == 6
-    const uint64_t carry = 6 * high_mod + low % 10;
-    low = low / 10 + high_mod * 1844674407370955161 + carry / 10;
-
-    char digit = static_cast<char>(carry % 10);
-    *--p = static_cast<char>('0' + digit);
-  }
-  *os << p;
-}
-void PrintTo(__int128_t v, ::std::ostream* os) {
-  __uint128_t uv = static_cast<__uint128_t>(v);
-  if (v < 0) {
-    *os << "-";
-    uv = -uv;
-  }
-  PrintTo(uv, os);
-}
-#endif  // __SIZEOF_INT128__
-
-// Prints the given array of characters to the ostream.  CharType must be either
-// char, char8_t, char16_t, char32_t, or wchar_t.
-// The array starts at begin, the length is len, it may include '\0' characters
-// and may not be NUL-terminated.
-template <typename CharType>
-GTEST_ATTRIBUTE_NO_SANITIZE_MEMORY_ GTEST_ATTRIBUTE_NO_SANITIZE_ADDRESS_
-    GTEST_ATTRIBUTE_NO_SANITIZE_HWADDRESS_
-        GTEST_ATTRIBUTE_NO_SANITIZE_THREAD_ static CharFormat
-        PrintCharsAsStringTo(const CharType* begin, size_t len, ostream* os) {
-  const char* const quote_prefix = GetCharWidthPrefix(*begin);
-  *os << quote_prefix << "\"";
-  bool is_previous_hex = false;
-  CharFormat print_format = kAsIs;
-  for (size_t index = 0; index < len; ++index) {
-    const CharType cur = begin[index];
-    if (is_previous_hex && IsXDigit(cur)) {
-      // Previous character is of '\x..' form and this character can be
-      // interpreted as another hexadecimal digit in its number. Break string to
-      // disambiguate.
-      *os << "\" " << quote_prefix << "\"";
-    }
-    is_previous_hex = PrintAsStringLiteralTo(cur, os) == kHexEscape;
-    // Remember if any characters required hex escaping.
-    if (is_previous_hex) {
-      print_format = kHexEscape;
-    }
-  }
-  *os << "\"";
-  return print_format;
-}
-
-// Prints a (const) char/wchar_t array of 'len' elements, starting at address
-// 'begin'.  CharType must be either char or wchar_t.
-template <typename CharType>
-GTEST_ATTRIBUTE_NO_SANITIZE_MEMORY_ GTEST_ATTRIBUTE_NO_SANITIZE_ADDRESS_
-    GTEST_ATTRIBUTE_NO_SANITIZE_HWADDRESS_
-        GTEST_ATTRIBUTE_NO_SANITIZE_THREAD_ static void
-        UniversalPrintCharArray(const CharType* begin, size_t len,
-                                ostream* os) {
-  // The code
-  //   const char kFoo[] = "foo";
-  // generates an array of 4, not 3, elements, with the last one being '\0'.
-  //
-  // Therefore when printing a char array, we don't print the last element if
-  // it's '\0', such that the output matches the string literal as it's
-  // written in the source code.
-  if (len > 0 && begin[len - 1] == '\0') {
-    PrintCharsAsStringTo(begin, len - 1, os);
-    return;
-  }
-
-  // If, however, the last element in the array is not '\0', e.g.
-  //    const char kFoo[] = { 'f', 'o', 'o' };
-  // we must print the entire array.  We also print a message to indicate
-  // that the array is not NUL-terminated.
-  PrintCharsAsStringTo(begin, len, os);
-  *os << " (no terminating NUL)";
-}
-
-// Prints a (const) char array of 'len' elements, starting at address 'begin'.
-void UniversalPrintArray(const char* begin, size_t len, ostream* os) {
-  UniversalPrintCharArray(begin, len, os);
-}
-
-#ifdef __cpp_char8_t
-// Prints a (const) char8_t array of 'len' elements, starting at address
-// 'begin'.
-void UniversalPrintArray(const char8_t* begin, size_t len, ostream* os) {
-  UniversalPrintCharArray(begin, len, os);
-}
-#endif
-
-// Prints a (const) char16_t array of 'len' elements, starting at address
-// 'begin'.
-void UniversalPrintArray(const char16_t* begin, size_t len, ostream* os) {
-  UniversalPrintCharArray(begin, len, os);
-}
-
-// Prints a (const) char32_t array of 'len' elements, starting at address
-// 'begin'.
-void UniversalPrintArray(const char32_t* begin, size_t len, ostream* os) {
-  UniversalPrintCharArray(begin, len, os);
-}
-
-// Prints a (const) wchar_t array of 'len' elements, starting at address
-// 'begin'.
-void UniversalPrintArray(const wchar_t* begin, size_t len, ostream* os) {
-  UniversalPrintCharArray(begin, len, os);
-}
-
-namespace {
-
-// Prints a null-terminated C-style string to the ostream.
-template <typename Char>
-void PrintCStringTo(const Char* s, ostream* os) {
-  if (s == nullptr) {
-    *os << "NULL";
-  } else {
-    *os << ImplicitCast_<const void*>(s) << " pointing to ";
-    PrintCharsAsStringTo(s, std::char_traits<Char>::length(s), os);
-  }
-}
-
-}  // anonymous namespace
-
-void PrintTo(const char* s, ostream* os) { PrintCStringTo(s, os); }
-
-#ifdef __cpp_char8_t
-void PrintTo(const char8_t* s, ostream* os) { PrintCStringTo(s, os); }
-#endif
-
-void PrintTo(const char16_t* s, ostream* os) { PrintCStringTo(s, os); }
-
-void PrintTo(const char32_t* s, ostream* os) { PrintCStringTo(s, os); }
-
-// MSVC compiler can be configured to define whar_t as a typedef
-// of unsigned short. Defining an overload for const wchar_t* in that case
-// would cause pointers to unsigned shorts be printed as wide strings,
-// possibly accessing more memory than intended and causing invalid
-// memory accesses. MSVC defines _NATIVE_WCHAR_T_DEFINED symbol when
-// wchar_t is implemented as a native type.
-#if !defined(_MSC_VER) || defined(_NATIVE_WCHAR_T_DEFINED)
-// Prints the given wide C string to the ostream.
-void PrintTo(const wchar_t* s, ostream* os) { PrintCStringTo(s, os); }
-#endif  // wchar_t is native
-
-namespace {
-
-bool ContainsUnprintableControlCodes(const char* str, size_t length) {
-  const unsigned char* s = reinterpret_cast<const unsigned char*>(str);
-
-  for (size_t i = 0; i < length; i++) {
-    unsigned char ch = *s++;
-    if (std::iscntrl(ch)) {
-      switch (ch) {
-        case '\t':
-        case '\n':
-        case '\r':
-          break;
-        default:
-          return true;
-      }
-    }
-  }
-  return false;
-}
-
-bool IsUTF8TrailByte(unsigned char t) { return 0x80 <= t && t <= 0xbf; }
-
-bool IsValidUTF8(const char* str, size_t length) {
-  const unsigned char* s = reinterpret_cast<const unsigned char*>(str);
-
-  for (size_t i = 0; i < length;) {
-    unsigned char lead = s[i++];
-
-    if (lead <= 0x7f) {
-      continue;  // single-byte character (ASCII) 0..7F
-    }
-    if (lead < 0xc2) {
-      return false;  // trail byte or non-shortest form
-    } else if (lead <= 0xdf && (i + 1) <= length && IsUTF8TrailByte(s[i])) {
-      ++i;  // 2-byte character
-    } else if (0xe0 <= lead && lead <= 0xef && (i + 2) <= length &&
-               IsUTF8TrailByte(s[i]) && IsUTF8TrailByte(s[i + 1]) &&
-               // check for non-shortest form and surrogate
-               (lead != 0xe0 || s[i] >= 0xa0) &&
-               (lead != 0xed || s[i] < 0xa0)) {
-      i += 2;  // 3-byte character
-    } else if (0xf0 <= lead && lead <= 0xf4 && (i + 3) <= length &&
-               IsUTF8TrailByte(s[i]) && IsUTF8TrailByte(s[i + 1]) &&
-               IsUTF8TrailByte(s[i + 2]) &&
-               // check for non-shortest form
-               (lead != 0xf0 || s[i] >= 0x90) &&
-               (lead != 0xf4 || s[i] < 0x90)) {
-      i += 3;  // 4-byte character
-    } else {
-      return false;
-    }
-  }
-  return true;
-}
-
-void ConditionalPrintAsText(const char* str, size_t length, ostream* os) {
-  if (!ContainsUnprintableControlCodes(str, length) &&
-      IsValidUTF8(str, length)) {
-    *os << "\n    As Text: \"" << str << "\"";
-  }
-}
-
-}  // anonymous namespace
-
-void PrintStringTo(const ::std::string& s, ostream* os) {
-  if (PrintCharsAsStringTo(s.data(), s.size(), os) == kHexEscape) {
-    if (GTEST_FLAG_GET(print_utf8)) {
-      ConditionalPrintAsText(s.data(), s.size(), os);
-    }
-  }
-}
-
-#ifdef __cpp_char8_t
-void PrintU8StringTo(const ::std::u8string& s, ostream* os) {
-  PrintCharsAsStringTo(s.data(), s.size(), os);
-}
-#endif
-
-void PrintU16StringTo(const ::std::u16string& s, ostream* os) {
-  PrintCharsAsStringTo(s.data(), s.size(), os);
-}
-
-void PrintU32StringTo(const ::std::u32string& s, ostream* os) {
-  PrintCharsAsStringTo(s.data(), s.size(), os);
-}
-
-#if GTEST_HAS_STD_WSTRING
-void PrintWideStringTo(const ::std::wstring& s, ostream* os) {
-  PrintCharsAsStringTo(s.data(), s.size(), os);
-}
-#endif  // GTEST_HAS_STD_WSTRING
-
-}  // namespace internal
-
-}  // namespace testing
diff --git a/3rdparty/googletest-1.13.0/googletest/src/gtest-test-part.cc b/3rdparty/googletest-1.13.0/googletest/src/gtest-test-part.cc
deleted file mode 100644
index eb7c8d1cf9235390e1081c5d5f9acb886802f220..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/src/gtest-test-part.cc
+++ /dev/null
@@ -1,105 +0,0 @@
-// Copyright 2008, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-//
-// The Google C++ Testing and Mocking Framework (Google Test)
-
-#include "gtest/gtest-test-part.h"
-
-#include "gtest/internal/gtest-port.h"
-#include "src/gtest-internal-inl.h"
-
-namespace testing {
-
-using internal::GetUnitTestImpl;
-
-// Gets the summary of the failure message by omitting the stack trace
-// in it.
-std::string TestPartResult::ExtractSummary(const char* message) {
-  const char* const stack_trace = strstr(message, internal::kStackTraceMarker);
-  return stack_trace == nullptr ? message : std::string(message, stack_trace);
-}
-
-// Prints a TestPartResult object.
-std::ostream& operator<<(std::ostream& os, const TestPartResult& result) {
-  return os << internal::FormatFileLocation(result.file_name(),
-                                            result.line_number())
-            << " "
-            << (result.type() == TestPartResult::kSuccess ? "Success"
-                : result.type() == TestPartResult::kSkip  ? "Skipped"
-                : result.type() == TestPartResult::kFatalFailure
-                    ? "Fatal failure"
-                    : "Non-fatal failure")
-            << ":\n"
-            << result.message() << std::endl;
-}
-
-// Appends a TestPartResult to the array.
-void TestPartResultArray::Append(const TestPartResult& result) {
-  array_.push_back(result);
-}
-
-// Returns the TestPartResult at the given index (0-based).
-const TestPartResult& TestPartResultArray::GetTestPartResult(int index) const {
-  if (index < 0 || index >= size()) {
-    printf("\nInvalid index (%d) into TestPartResultArray.\n", index);
-    internal::posix::Abort();
-  }
-
-  return array_[static_cast<size_t>(index)];
-}
-
-// Returns the number of TestPartResult objects in the array.
-int TestPartResultArray::size() const {
-  return static_cast<int>(array_.size());
-}
-
-namespace internal {
-
-HasNewFatalFailureHelper::HasNewFatalFailureHelper()
-    : has_new_fatal_failure_(false),
-      original_reporter_(
-          GetUnitTestImpl()->GetTestPartResultReporterForCurrentThread()) {
-  GetUnitTestImpl()->SetTestPartResultReporterForCurrentThread(this);
-}
-
-HasNewFatalFailureHelper::~HasNewFatalFailureHelper() {
-  GetUnitTestImpl()->SetTestPartResultReporterForCurrentThread(
-      original_reporter_);
-}
-
-void HasNewFatalFailureHelper::ReportTestPartResult(
-    const TestPartResult& result) {
-  if (result.fatally_failed()) has_new_fatal_failure_ = true;
-  original_reporter_->ReportTestPartResult(result);
-}
-
-}  // namespace internal
-
-}  // namespace testing
diff --git a/3rdparty/googletest-1.13.0/googletest/src/gtest-typed-test.cc b/3rdparty/googletest-1.13.0/googletest/src/gtest-typed-test.cc
deleted file mode 100644
index a2828b83c66457d6082d55b05ac5ae596a9d1efc..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/src/gtest-typed-test.cc
+++ /dev/null
@@ -1,104 +0,0 @@
-// Copyright 2008 Google Inc.
-// All Rights Reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "gtest/gtest-typed-test.h"
-
-#include "gtest/gtest.h"
-
-namespace testing {
-namespace internal {
-
-// Skips to the first non-space char in str. Returns an empty string if str
-// contains only whitespace characters.
-static const char* SkipSpaces(const char* str) {
-  while (IsSpace(*str)) str++;
-  return str;
-}
-
-static std::vector<std::string> SplitIntoTestNames(const char* src) {
-  std::vector<std::string> name_vec;
-  src = SkipSpaces(src);
-  for (; src != nullptr; src = SkipComma(src)) {
-    name_vec.push_back(StripTrailingSpaces(GetPrefixUntilComma(src)));
-  }
-  return name_vec;
-}
-
-// Verifies that registered_tests match the test names in
-// registered_tests_; returns registered_tests if successful, or
-// aborts the program otherwise.
-const char* TypedTestSuitePState::VerifyRegisteredTestNames(
-    const char* test_suite_name, const char* file, int line,
-    const char* registered_tests) {
-  RegisterTypeParameterizedTestSuite(test_suite_name, CodeLocation(file, line));
-
-  typedef RegisteredTestsMap::const_iterator RegisteredTestIter;
-  registered_ = true;
-
-  std::vector<std::string> name_vec = SplitIntoTestNames(registered_tests);
-
-  Message errors;
-
-  std::set<std::string> tests;
-  for (std::vector<std::string>::const_iterator name_it = name_vec.begin();
-       name_it != name_vec.end(); ++name_it) {
-    const std::string& name = *name_it;
-    if (tests.count(name) != 0) {
-      errors << "Test " << name << " is listed more than once.\n";
-      continue;
-    }
-
-    if (registered_tests_.count(name) != 0) {
-      tests.insert(name);
-    } else {
-      errors << "No test named " << name
-             << " can be found in this test suite.\n";
-    }
-  }
-
-  for (RegisteredTestIter it = registered_tests_.begin();
-       it != registered_tests_.end(); ++it) {
-    if (tests.count(it->first) == 0) {
-      errors << "You forgot to list test " << it->first << ".\n";
-    }
-  }
-
-  const std::string& errors_str = errors.GetString();
-  if (errors_str != "") {
-    fprintf(stderr, "%s %s", FormatFileLocation(file, line).c_str(),
-            errors_str.c_str());
-    fflush(stderr);
-    posix::Abort();
-  }
-
-  return registered_tests;
-}
-
-}  // namespace internal
-}  // namespace testing
diff --git a/3rdparty/googletest-1.13.0/googletest/src/gtest.cc b/3rdparty/googletest-1.13.0/googletest/src/gtest.cc
deleted file mode 100644
index a64e887c969d2a30fb67060a59978afe992b431b..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/src/gtest.cc
+++ /dev/null
@@ -1,6844 +0,0 @@
-// Copyright 2005, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-//
-// The Google C++ Testing and Mocking Framework (Google Test)
-
-#include "gtest/gtest.h"
-
-#include <ctype.h>
-#include <stdarg.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <time.h>
-#include <wchar.h>
-#include <wctype.h>
-
-#include <algorithm>
-#include <chrono>  // NOLINT
-#include <cmath>
-#include <cstdint>
-#include <initializer_list>
-#include <iomanip>
-#include <iterator>
-#include <limits>
-#include <list>
-#include <map>
-#include <ostream>  // NOLINT
-#include <sstream>
-#include <unordered_set>
-#include <vector>
-
-#include "gtest/gtest-assertion-result.h"
-#include "gtest/gtest-spi.h"
-#include "gtest/internal/custom/gtest.h"
-#include "gtest/internal/gtest-port.h"
-
-#if GTEST_OS_LINUX
-
-#include <fcntl.h>   // NOLINT
-#include <limits.h>  // NOLINT
-#include <sched.h>   // NOLINT
-// Declares vsnprintf().  This header is not available on Windows.
-#include <strings.h>   // NOLINT
-#include <sys/mman.h>  // NOLINT
-#include <sys/time.h>  // NOLINT
-#include <unistd.h>    // NOLINT
-
-#include <string>
-
-#elif GTEST_OS_ZOS
-#include <sys/time.h>  // NOLINT
-
-// On z/OS we additionally need strings.h for strcasecmp.
-#include <strings.h>   // NOLINT
-
-#elif GTEST_OS_WINDOWS_MOBILE  // We are on Windows CE.
-
-#include <windows.h>  // NOLINT
-#undef min
-
-#elif GTEST_OS_WINDOWS  // We are on Windows proper.
-
-#include <windows.h>  // NOLINT
-#undef min
-
-#ifdef _MSC_VER
-#include <crtdbg.h>  // NOLINT
-#endif
-
-#include <io.h>         // NOLINT
-#include <sys/stat.h>   // NOLINT
-#include <sys/timeb.h>  // NOLINT
-#include <sys/types.h>  // NOLINT
-
-#if GTEST_OS_WINDOWS_MINGW
-#include <sys/time.h>  // NOLINT
-#endif                 // GTEST_OS_WINDOWS_MINGW
-
-#else
-
-// cpplint thinks that the header is already included, so we want to
-// silence it.
-#include <sys/time.h>  // NOLINT
-#include <unistd.h>    // NOLINT
-
-#endif  // GTEST_OS_LINUX
-
-#if GTEST_HAS_EXCEPTIONS
-#include <stdexcept>
-#endif
-
-#if GTEST_CAN_STREAM_RESULTS_
-#include <arpa/inet.h>   // NOLINT
-#include <netdb.h>       // NOLINT
-#include <sys/socket.h>  // NOLINT
-#include <sys/types.h>   // NOLINT
-#endif
-
-#include "src/gtest-internal-inl.h"
-
-#if GTEST_OS_WINDOWS
-#define vsnprintf _vsnprintf
-#endif  // GTEST_OS_WINDOWS
-
-#if GTEST_OS_MAC
-#ifndef GTEST_OS_IOS
-#include <crt_externs.h>
-#endif
-#endif
-
-#if GTEST_HAS_ABSL
-#include "absl/debugging/failure_signal_handler.h"
-#include "absl/debugging/stacktrace.h"
-#include "absl/debugging/symbolize.h"
-#include "absl/flags/parse.h"
-#include "absl/flags/usage.h"
-#include "absl/strings/str_cat.h"
-#include "absl/strings/str_replace.h"
-#endif  // GTEST_HAS_ABSL
-
-// Checks builtin compiler feature |x| while avoiding an extra layer of #ifdefs
-// at the callsite.
-#if defined(__has_builtin)
-#define GTEST_HAS_BUILTIN(x) __has_builtin(x)
-#else
-#define GTEST_HAS_BUILTIN(x) 0
-#endif  // defined(__has_builtin)
-
-namespace testing {
-
-using internal::CountIf;
-using internal::ForEach;
-using internal::GetElementOr;
-using internal::Shuffle;
-
-// Constants.
-
-// A test whose test suite name or test name matches this filter is
-// disabled and not run.
-static const char kDisableTestFilter[] = "DISABLED_*:*/DISABLED_*";
-
-// A test suite whose name matches this filter is considered a death
-// test suite and will be run before test suites whose name doesn't
-// match this filter.
-static const char kDeathTestSuiteFilter[] = "*DeathTest:*DeathTest/*";
-
-// A test filter that matches everything.
-static const char kUniversalFilter[] = "*";
-
-// The default output format.
-static const char kDefaultOutputFormat[] = "xml";
-// The default output file.
-static const char kDefaultOutputFile[] = "test_detail";
-
-// The environment variable name for the test shard index.
-static const char kTestShardIndex[] = "GTEST_SHARD_INDEX";
-// The environment variable name for the total number of test shards.
-static const char kTestTotalShards[] = "GTEST_TOTAL_SHARDS";
-// The environment variable name for the test shard status file.
-static const char kTestShardStatusFile[] = "GTEST_SHARD_STATUS_FILE";
-
-namespace internal {
-
-// The text used in failure messages to indicate the start of the
-// stack trace.
-const char kStackTraceMarker[] = "\nStack trace:\n";
-
-// g_help_flag is true if and only if the --help flag or an equivalent form
-// is specified on the command line.
-bool g_help_flag = false;
-
-#if GTEST_HAS_FILE_SYSTEM
-// Utility function to Open File for Writing
-static FILE* OpenFileForWriting(const std::string& output_file) {
-  FILE* fileout = nullptr;
-  FilePath output_file_path(output_file);
-  FilePath output_dir(output_file_path.RemoveFileName());
-
-  if (output_dir.CreateDirectoriesRecursively()) {
-    fileout = posix::FOpen(output_file.c_str(), "w");
-  }
-  if (fileout == nullptr) {
-    GTEST_LOG_(FATAL) << "Unable to open file \"" << output_file << "\"";
-  }
-  return fileout;
-}
-#endif  // GTEST_HAS_FILE_SYSTEM
-
-}  // namespace internal
-
-// Bazel passes in the argument to '--test_filter' via the TESTBRIDGE_TEST_ONLY
-// environment variable.
-static const char* GetDefaultFilter() {
-  const char* const testbridge_test_only =
-      internal::posix::GetEnv("TESTBRIDGE_TEST_ONLY");
-  if (testbridge_test_only != nullptr) {
-    return testbridge_test_only;
-  }
-  return kUniversalFilter;
-}
-
-// Bazel passes in the argument to '--test_runner_fail_fast' via the
-// TESTBRIDGE_TEST_RUNNER_FAIL_FAST environment variable.
-static bool GetDefaultFailFast() {
-  const char* const testbridge_test_runner_fail_fast =
-      internal::posix::GetEnv("TESTBRIDGE_TEST_RUNNER_FAIL_FAST");
-  if (testbridge_test_runner_fail_fast != nullptr) {
-    return strcmp(testbridge_test_runner_fail_fast, "1") == 0;
-  }
-  return false;
-}
-
-}  // namespace testing
-
-GTEST_DEFINE_bool_(
-    fail_fast,
-    testing::internal::BoolFromGTestEnv("fail_fast",
-                                        testing::GetDefaultFailFast()),
-    "True if and only if a test failure should stop further test execution.");
-
-GTEST_DEFINE_bool_(
-    also_run_disabled_tests,
-    testing::internal::BoolFromGTestEnv("also_run_disabled_tests", false),
-    "Run disabled tests too, in addition to the tests normally being run.");
-
-GTEST_DEFINE_bool_(
-    break_on_failure,
-    testing::internal::BoolFromGTestEnv("break_on_failure", false),
-    "True if and only if a failed assertion should be a debugger "
-    "break-point.");
-
-GTEST_DEFINE_bool_(catch_exceptions,
-                   testing::internal::BoolFromGTestEnv("catch_exceptions",
-                                                       true),
-                   "True if and only if " GTEST_NAME_
-                   " should catch exceptions and treat them as test failures.");
-
-GTEST_DEFINE_string_(
-    color, testing::internal::StringFromGTestEnv("color", "auto"),
-    "Whether to use colors in the output.  Valid values: yes, no, "
-    "and auto.  'auto' means to use colors if the output is "
-    "being sent to a terminal and the TERM environment variable "
-    "is set to a terminal type that supports colors.");
-
-GTEST_DEFINE_string_(
-    filter,
-    testing::internal::StringFromGTestEnv("filter",
-                                          testing::GetDefaultFilter()),
-    "A colon-separated list of glob (not regex) patterns "
-    "for filtering the tests to run, optionally followed by a "
-    "'-' and a : separated list of negative patterns (tests to "
-    "exclude).  A test is run if it matches one of the positive "
-    "patterns and does not match any of the negative patterns.");
-
-GTEST_DEFINE_bool_(
-    install_failure_signal_handler,
-    testing::internal::BoolFromGTestEnv("install_failure_signal_handler",
-                                        false),
-    "If true and supported on the current platform, " GTEST_NAME_
-    " should "
-    "install a signal handler that dumps debugging information when fatal "
-    "signals are raised.");
-
-GTEST_DEFINE_bool_(list_tests, false, "List all tests without running them.");
-
-// The net priority order after flag processing is thus:
-//   --gtest_output command line flag
-//   GTEST_OUTPUT environment variable
-//   XML_OUTPUT_FILE environment variable
-//   ''
-GTEST_DEFINE_string_(
-    output,
-    testing::internal::StringFromGTestEnv(
-        "output", testing::internal::OutputFlagAlsoCheckEnvVar().c_str()),
-    "A format (defaults to \"xml\" but can be specified to be \"json\"), "
-    "optionally followed by a colon and an output file name or directory. "
-    "A directory is indicated by a trailing pathname separator. "
-    "Examples: \"xml:filename.xml\", \"xml::directoryname/\". "
-    "If a directory is specified, output files will be created "
-    "within that directory, with file-names based on the test "
-    "executable's name and, if necessary, made unique by adding "
-    "digits.");
-
-GTEST_DEFINE_bool_(
-    brief, testing::internal::BoolFromGTestEnv("brief", false),
-    "True if only test failures should be displayed in text output.");
-
-GTEST_DEFINE_bool_(print_time,
-                   testing::internal::BoolFromGTestEnv("print_time", true),
-                   "True if and only if " GTEST_NAME_
-                   " should display elapsed time in text output.");
-
-GTEST_DEFINE_bool_(print_utf8,
-                   testing::internal::BoolFromGTestEnv("print_utf8", true),
-                   "True if and only if " GTEST_NAME_
-                   " prints UTF8 characters as text.");
-
-GTEST_DEFINE_int32_(
-    random_seed, testing::internal::Int32FromGTestEnv("random_seed", 0),
-    "Random number seed to use when shuffling test orders.  Must be in range "
-    "[1, 99999], or 0 to use a seed based on the current time.");
-
-GTEST_DEFINE_int32_(
-    repeat, testing::internal::Int32FromGTestEnv("repeat", 1),
-    "How many times to repeat each test.  Specify a negative number "
-    "for repeating forever.  Useful for shaking out flaky tests.");
-
-GTEST_DEFINE_bool_(
-    recreate_environments_when_repeating,
-    testing::internal::BoolFromGTestEnv("recreate_environments_when_repeating",
-                                        false),
-    "Controls whether global test environments are recreated for each repeat "
-    "of the tests. If set to false the global test environments are only set "
-    "up once, for the first iteration, and only torn down once, for the last. "
-    "Useful for shaking out flaky tests with stable, expensive test "
-    "environments. If --gtest_repeat is set to a negative number, meaning "
-    "there is no last run, the environments will always be recreated to avoid "
-    "leaks.");
-
-GTEST_DEFINE_bool_(show_internal_stack_frames, false,
-                   "True if and only if " GTEST_NAME_
-                   " should include internal stack frames when "
-                   "printing test failure stack traces.");
-
-GTEST_DEFINE_bool_(shuffle,
-                   testing::internal::BoolFromGTestEnv("shuffle", false),
-                   "True if and only if " GTEST_NAME_
-                   " should randomize tests' order on every run.");
-
-GTEST_DEFINE_int32_(
-    stack_trace_depth,
-    testing::internal::Int32FromGTestEnv("stack_trace_depth",
-                                         testing::kMaxStackTraceDepth),
-    "The maximum number of stack frames to print when an "
-    "assertion fails.  The valid range is 0 through 100, inclusive.");
-
-GTEST_DEFINE_string_(
-    stream_result_to,
-    testing::internal::StringFromGTestEnv("stream_result_to", ""),
-    "This flag specifies the host name and the port number on which to stream "
-    "test results. Example: \"localhost:555\". The flag is effective only on "
-    "Linux.");
-
-GTEST_DEFINE_bool_(
-    throw_on_failure,
-    testing::internal::BoolFromGTestEnv("throw_on_failure", false),
-    "When this flag is specified, a failed assertion will throw an exception "
-    "if exceptions are enabled or exit the program with a non-zero code "
-    "otherwise. For use with an external test framework.");
-
-#if GTEST_USE_OWN_FLAGFILE_FLAG_
-GTEST_DEFINE_string_(
-    flagfile, testing::internal::StringFromGTestEnv("flagfile", ""),
-    "This flag specifies the flagfile to read command-line flags from.");
-#endif  // GTEST_USE_OWN_FLAGFILE_FLAG_
-
-namespace testing {
-namespace internal {
-
-const uint32_t Random::kMaxRange;
-
-// Generates a random number from [0, range), using a Linear
-// Congruential Generator (LCG).  Crashes if 'range' is 0 or greater
-// than kMaxRange.
-uint32_t Random::Generate(uint32_t range) {
-  // These constants are the same as are used in glibc's rand(3).
-  // Use wider types than necessary to prevent unsigned overflow diagnostics.
-  state_ = static_cast<uint32_t>(1103515245ULL * state_ + 12345U) % kMaxRange;
-
-  GTEST_CHECK_(range > 0) << "Cannot generate a number in the range [0, 0).";
-  GTEST_CHECK_(range <= kMaxRange)
-      << "Generation of a number in [0, " << range << ") was requested, "
-      << "but this can only generate numbers in [0, " << kMaxRange << ").";
-
-  // Converting via modulus introduces a bit of downward bias, but
-  // it's simple, and a linear congruential generator isn't too good
-  // to begin with.
-  return state_ % range;
-}
-
-// GTestIsInitialized() returns true if and only if the user has initialized
-// Google Test.  Useful for catching the user mistake of not initializing
-// Google Test before calling RUN_ALL_TESTS().
-static bool GTestIsInitialized() { return GetArgvs().size() > 0; }
-
-// Iterates over a vector of TestSuites, keeping a running sum of the
-// results of calling a given int-returning method on each.
-// Returns the sum.
-static int SumOverTestSuiteList(const std::vector<TestSuite*>& case_list,
-                                int (TestSuite::*method)() const) {
-  int sum = 0;
-  for (size_t i = 0; i < case_list.size(); i++) {
-    sum += (case_list[i]->*method)();
-  }
-  return sum;
-}
-
-// Returns true if and only if the test suite passed.
-static bool TestSuitePassed(const TestSuite* test_suite) {
-  return test_suite->should_run() && test_suite->Passed();
-}
-
-// Returns true if and only if the test suite failed.
-static bool TestSuiteFailed(const TestSuite* test_suite) {
-  return test_suite->should_run() && test_suite->Failed();
-}
-
-// Returns true if and only if test_suite contains at least one test that
-// should run.
-static bool ShouldRunTestSuite(const TestSuite* test_suite) {
-  return test_suite->should_run();
-}
-
-// AssertHelper constructor.
-AssertHelper::AssertHelper(TestPartResult::Type type, const char* file,
-                           int line, const char* message)
-    : data_(new AssertHelperData(type, file, line, message)) {}
-
-AssertHelper::~AssertHelper() { delete data_; }
-
-// Message assignment, for assertion streaming support.
-void AssertHelper::operator=(const Message& message) const {
-  UnitTest::GetInstance()->AddTestPartResult(
-      data_->type, data_->file, data_->line,
-      AppendUserMessage(data_->message, message),
-      UnitTest::GetInstance()->impl()->CurrentOsStackTraceExceptTop(1)
-      // Skips the stack frame for this function itself.
-  );  // NOLINT
-}
-
-namespace {
-
-// When TEST_P is found without a matching INSTANTIATE_TEST_SUITE_P
-// to creates test cases for it, a synthetic test case is
-// inserted to report ether an error or a log message.
-//
-// This configuration bit will likely be removed at some point.
-constexpr bool kErrorOnUninstantiatedParameterizedTest = true;
-constexpr bool kErrorOnUninstantiatedTypeParameterizedTest = true;
-
-// A test that fails at a given file/line location with a given message.
-class FailureTest : public Test {
- public:
-  explicit FailureTest(const CodeLocation& loc, std::string error_message,
-                       bool as_error)
-      : loc_(loc),
-        error_message_(std::move(error_message)),
-        as_error_(as_error) {}
-
-  void TestBody() override {
-    if (as_error_) {
-      AssertHelper(TestPartResult::kNonFatalFailure, loc_.file.c_str(),
-                   loc_.line, "") = Message() << error_message_;
-    } else {
-      std::cout << error_message_ << std::endl;
-    }
-  }
-
- private:
-  const CodeLocation loc_;
-  const std::string error_message_;
-  const bool as_error_;
-};
-
-}  // namespace
-
-std::set<std::string>* GetIgnoredParameterizedTestSuites() {
-  return UnitTest::GetInstance()->impl()->ignored_parameterized_test_suites();
-}
-
-// Add a given test_suit to the list of them allow to go un-instantiated.
-MarkAsIgnored::MarkAsIgnored(const char* test_suite) {
-  GetIgnoredParameterizedTestSuites()->insert(test_suite);
-}
-
-// If this parameterized test suite has no instantiations (and that
-// has not been marked as okay), emit a test case reporting that.
-void InsertSyntheticTestCase(const std::string& name, CodeLocation location,
-                             bool has_test_p) {
-  const auto& ignored = *GetIgnoredParameterizedTestSuites();
-  if (ignored.find(name) != ignored.end()) return;
-
-  const char kMissingInstantiation[] =  //
-      " is defined via TEST_P, but never instantiated. None of the test cases "
-      "will run. Either no INSTANTIATE_TEST_SUITE_P is provided or the only "
-      "ones provided expand to nothing."
-      "\n\n"
-      "Ideally, TEST_P definitions should only ever be included as part of "
-      "binaries that intend to use them. (As opposed to, for example, being "
-      "placed in a library that may be linked in to get other utilities.)";
-
-  const char kMissingTestCase[] =  //
-      " is instantiated via INSTANTIATE_TEST_SUITE_P, but no tests are "
-      "defined via TEST_P . No test cases will run."
-      "\n\n"
-      "Ideally, INSTANTIATE_TEST_SUITE_P should only ever be invoked from "
-      "code that always depend on code that provides TEST_P. Failing to do "
-      "so is often an indication of dead code, e.g. the last TEST_P was "
-      "removed but the rest got left behind.";
-
-  std::string message =
-      "Parameterized test suite " + name +
-      (has_test_p ? kMissingInstantiation : kMissingTestCase) +
-      "\n\n"
-      "To suppress this error for this test suite, insert the following line "
-      "(in a non-header) in the namespace it is defined in:"
-      "\n\n"
-      "GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(" +
-      name + ");";
-
-  std::string full_name = "UninstantiatedParameterizedTestSuite<" + name + ">";
-  RegisterTest(  //
-      "GoogleTestVerification", full_name.c_str(),
-      nullptr,  // No type parameter.
-      nullptr,  // No value parameter.
-      location.file.c_str(), location.line, [message, location] {
-        return new FailureTest(location, message,
-                               kErrorOnUninstantiatedParameterizedTest);
-      });
-}
-
-void RegisterTypeParameterizedTestSuite(const char* test_suite_name,
-                                        CodeLocation code_location) {
-  GetUnitTestImpl()->type_parameterized_test_registry().RegisterTestSuite(
-      test_suite_name, code_location);
-}
-
-void RegisterTypeParameterizedTestSuiteInstantiation(const char* case_name) {
-  GetUnitTestImpl()->type_parameterized_test_registry().RegisterInstantiation(
-      case_name);
-}
-
-void TypeParameterizedTestSuiteRegistry::RegisterTestSuite(
-    const char* test_suite_name, CodeLocation code_location) {
-  suites_.emplace(std::string(test_suite_name),
-                  TypeParameterizedTestSuiteInfo(code_location));
-}
-
-void TypeParameterizedTestSuiteRegistry::RegisterInstantiation(
-    const char* test_suite_name) {
-  auto it = suites_.find(std::string(test_suite_name));
-  if (it != suites_.end()) {
-    it->second.instantiated = true;
-  } else {
-    GTEST_LOG_(ERROR) << "Unknown type parameterized test suit '"
-                      << test_suite_name << "'";
-  }
-}
-
-void TypeParameterizedTestSuiteRegistry::CheckForInstantiations() {
-  const auto& ignored = *GetIgnoredParameterizedTestSuites();
-  for (const auto& testcase : suites_) {
-    if (testcase.second.instantiated) continue;
-    if (ignored.find(testcase.first) != ignored.end()) continue;
-
-    std::string message =
-        "Type parameterized test suite " + testcase.first +
-        " is defined via REGISTER_TYPED_TEST_SUITE_P, but never instantiated "
-        "via INSTANTIATE_TYPED_TEST_SUITE_P. None of the test cases will run."
-        "\n\n"
-        "Ideally, TYPED_TEST_P definitions should only ever be included as "
-        "part of binaries that intend to use them. (As opposed to, for "
-        "example, being placed in a library that may be linked in to get other "
-        "utilities.)"
-        "\n\n"
-        "To suppress this error for this test suite, insert the following line "
-        "(in a non-header) in the namespace it is defined in:"
-        "\n\n"
-        "GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(" +
-        testcase.first + ");";
-
-    std::string full_name =
-        "UninstantiatedTypeParameterizedTestSuite<" + testcase.first + ">";
-    RegisterTest(  //
-        "GoogleTestVerification", full_name.c_str(),
-        nullptr,  // No type parameter.
-        nullptr,  // No value parameter.
-        testcase.second.code_location.file.c_str(),
-        testcase.second.code_location.line, [message, testcase] {
-          return new FailureTest(testcase.second.code_location, message,
-                                 kErrorOnUninstantiatedTypeParameterizedTest);
-        });
-  }
-}
-
-// A copy of all command line arguments.  Set by InitGoogleTest().
-static ::std::vector<std::string> g_argvs;
-
-::std::vector<std::string> GetArgvs() {
-#if defined(GTEST_CUSTOM_GET_ARGVS_)
-  // GTEST_CUSTOM_GET_ARGVS_() may return a container of std::string or
-  // ::string. This code converts it to the appropriate type.
-  const auto& custom = GTEST_CUSTOM_GET_ARGVS_();
-  return ::std::vector<std::string>(custom.begin(), custom.end());
-#else   // defined(GTEST_CUSTOM_GET_ARGVS_)
-  return g_argvs;
-#endif  // defined(GTEST_CUSTOM_GET_ARGVS_)
-}
-
-#if GTEST_HAS_FILE_SYSTEM
-// Returns the current application's name, removing directory path if that
-// is present.
-FilePath GetCurrentExecutableName() {
-  FilePath result;
-
-#if GTEST_OS_WINDOWS || GTEST_OS_OS2
-  result.Set(FilePath(GetArgvs()[0]).RemoveExtension("exe"));
-#else
-  result.Set(FilePath(GetArgvs()[0]));
-#endif  // GTEST_OS_WINDOWS
-
-  return result.RemoveDirectoryName();
-}
-#endif  // GTEST_HAS_FILE_SYSTEM
-
-// Functions for processing the gtest_output flag.
-
-// Returns the output format, or "" for normal printed output.
-std::string UnitTestOptions::GetOutputFormat() {
-  std::string s = GTEST_FLAG_GET(output);
-  const char* const gtest_output_flag = s.c_str();
-  const char* const colon = strchr(gtest_output_flag, ':');
-  return (colon == nullptr)
-             ? std::string(gtest_output_flag)
-             : std::string(gtest_output_flag,
-                           static_cast<size_t>(colon - gtest_output_flag));
-}
-
-#if GTEST_HAS_FILE_SYSTEM
-// Returns the name of the requested output file, or the default if none
-// was explicitly specified.
-std::string UnitTestOptions::GetAbsolutePathToOutputFile() {
-  std::string s = GTEST_FLAG_GET(output);
-  const char* const gtest_output_flag = s.c_str();
-
-  std::string format = GetOutputFormat();
-  if (format.empty()) format = std::string(kDefaultOutputFormat);
-
-  const char* const colon = strchr(gtest_output_flag, ':');
-  if (colon == nullptr)
-    return internal::FilePath::MakeFileName(
-               internal::FilePath(
-                   UnitTest::GetInstance()->original_working_dir()),
-               internal::FilePath(kDefaultOutputFile), 0, format.c_str())
-        .string();
-
-  internal::FilePath output_name(colon + 1);
-  if (!output_name.IsAbsolutePath())
-    output_name = internal::FilePath::ConcatPaths(
-        internal::FilePath(UnitTest::GetInstance()->original_working_dir()),
-        internal::FilePath(colon + 1));
-
-  if (!output_name.IsDirectory()) return output_name.string();
-
-  internal::FilePath result(internal::FilePath::GenerateUniqueFileName(
-      output_name, internal::GetCurrentExecutableName(),
-      GetOutputFormat().c_str()));
-  return result.string();
-}
-#endif  // GTEST_HAS_FILE_SYSTEM
-
-// Returns true if and only if the wildcard pattern matches the string. Each
-// pattern consists of regular characters, single-character wildcards (?), and
-// multi-character wildcards (*).
-//
-// This function implements a linear-time string globbing algorithm based on
-// https://research.swtch.com/glob.
-static bool PatternMatchesString(const std::string& name_str,
-                                 const char* pattern, const char* pattern_end) {
-  const char* name = name_str.c_str();
-  const char* const name_begin = name;
-  const char* const name_end = name + name_str.size();
-
-  const char* pattern_next = pattern;
-  const char* name_next = name;
-
-  while (pattern < pattern_end || name < name_end) {
-    if (pattern < pattern_end) {
-      switch (*pattern) {
-        default:  // Match an ordinary character.
-          if (name < name_end && *name == *pattern) {
-            ++pattern;
-            ++name;
-            continue;
-          }
-          break;
-        case '?':  // Match any single character.
-          if (name < name_end) {
-            ++pattern;
-            ++name;
-            continue;
-          }
-          break;
-        case '*':
-          // Match zero or more characters. Start by skipping over the wildcard
-          // and matching zero characters from name. If that fails, restart and
-          // match one more character than the last attempt.
-          pattern_next = pattern;
-          name_next = name + 1;
-          ++pattern;
-          continue;
-      }
-    }
-    // Failed to match a character. Restart if possible.
-    if (name_begin < name_next && name_next <= name_end) {
-      pattern = pattern_next;
-      name = name_next;
-      continue;
-    }
-    return false;
-  }
-  return true;
-}
-
-namespace {
-
-bool IsGlobPattern(const std::string& pattern) {
-  return std::any_of(pattern.begin(), pattern.end(),
-                     [](const char c) { return c == '?' || c == '*'; });
-}
-
-class UnitTestFilter {
- public:
-  UnitTestFilter() = default;
-
-  // Constructs a filter from a string of patterns separated by `:`.
-  explicit UnitTestFilter(const std::string& filter) {
-    // By design "" filter matches "" string.
-    std::vector<std::string> all_patterns;
-    SplitString(filter, ':', &all_patterns);
-    const auto exact_match_patterns_begin = std::partition(
-        all_patterns.begin(), all_patterns.end(), &IsGlobPattern);
-
-    glob_patterns_.reserve(static_cast<size_t>(
-        std::distance(all_patterns.begin(), exact_match_patterns_begin)));
-    std::move(all_patterns.begin(), exact_match_patterns_begin,
-              std::inserter(glob_patterns_, glob_patterns_.begin()));
-    std::move(
-        exact_match_patterns_begin, all_patterns.end(),
-        std::inserter(exact_match_patterns_, exact_match_patterns_.begin()));
-  }
-
-  // Returns true if and only if name matches at least one of the patterns in
-  // the filter.
-  bool MatchesName(const std::string& name) const {
-    return exact_match_patterns_.count(name) > 0 ||
-           std::any_of(glob_patterns_.begin(), glob_patterns_.end(),
-                       [&name](const std::string& pattern) {
-                         return PatternMatchesString(
-                             name, pattern.c_str(),
-                             pattern.c_str() + pattern.size());
-                       });
-  }
-
- private:
-  std::vector<std::string> glob_patterns_;
-  std::unordered_set<std::string> exact_match_patterns_;
-};
-
-class PositiveAndNegativeUnitTestFilter {
- public:
-  // Constructs a positive and a negative filter from a string. The string
-  // contains a positive filter optionally followed by a '-' character and a
-  // negative filter. In case only a negative filter is provided the positive
-  // filter will be assumed "*".
-  // A filter is a list of patterns separated by ':'.
-  explicit PositiveAndNegativeUnitTestFilter(const std::string& filter) {
-    std::vector<std::string> positive_and_negative_filters;
-
-    // NOTE: `SplitString` always returns a non-empty container.
-    SplitString(filter, '-', &positive_and_negative_filters);
-    const auto& positive_filter = positive_and_negative_filters.front();
-
-    if (positive_and_negative_filters.size() > 1) {
-      positive_filter_ = UnitTestFilter(
-          positive_filter.empty() ? kUniversalFilter : positive_filter);
-
-      // TODO(b/214626361): Fail on multiple '-' characters
-      // For the moment to preserve old behavior we concatenate the rest of the
-      // string parts with `-` as separator to generate the negative filter.
-      auto negative_filter_string = positive_and_negative_filters[1];
-      for (std::size_t i = 2; i < positive_and_negative_filters.size(); i++)
-        negative_filter_string =
-            negative_filter_string + '-' + positive_and_negative_filters[i];
-      negative_filter_ = UnitTestFilter(negative_filter_string);
-    } else {
-      // In case we don't have a negative filter and positive filter is ""
-      // we do not use kUniversalFilter by design as opposed to when we have a
-      // negative filter.
-      positive_filter_ = UnitTestFilter(positive_filter);
-    }
-  }
-
-  // Returns true if and only if test name (this is generated by appending test
-  // suit name and test name via a '.' character) matches the positive filter
-  // and does not match the negative filter.
-  bool MatchesTest(const std::string& test_suite_name,
-                   const std::string& test_name) const {
-    return MatchesName(test_suite_name + "." + test_name);
-  }
-
-  // Returns true if and only if name matches the positive filter and does not
-  // match the negative filter.
-  bool MatchesName(const std::string& name) const {
-    return positive_filter_.MatchesName(name) &&
-           !negative_filter_.MatchesName(name);
-  }
-
- private:
-  UnitTestFilter positive_filter_;
-  UnitTestFilter negative_filter_;
-};
-}  // namespace
-
-bool UnitTestOptions::MatchesFilter(const std::string& name_str,
-                                    const char* filter) {
-  return UnitTestFilter(filter).MatchesName(name_str);
-}
-
-// Returns true if and only if the user-specified filter matches the test
-// suite name and the test name.
-bool UnitTestOptions::FilterMatchesTest(const std::string& test_suite_name,
-                                        const std::string& test_name) {
-  // Split --gtest_filter at '-', if there is one, to separate into
-  // positive filter and negative filter portions
-  return PositiveAndNegativeUnitTestFilter(GTEST_FLAG_GET(filter))
-      .MatchesTest(test_suite_name, test_name);
-}
-
-#if GTEST_HAS_SEH
-// Returns EXCEPTION_EXECUTE_HANDLER if Google Test should handle the
-// given SEH exception, or EXCEPTION_CONTINUE_SEARCH otherwise.
-// This function is useful as an __except condition.
-int UnitTestOptions::GTestShouldProcessSEH(DWORD exception_code) {
-  // Google Test should handle a SEH exception if:
-  //   1. the user wants it to, AND
-  //   2. this is not a breakpoint exception, AND
-  //   3. this is not a C++ exception (VC++ implements them via SEH,
-  //      apparently).
-  //
-  // SEH exception code for C++ exceptions.
-  // (see http://support.microsoft.com/kb/185294 for more information).
-  const DWORD kCxxExceptionCode = 0xe06d7363;
-
-  bool should_handle = true;
-
-  if (!GTEST_FLAG_GET(catch_exceptions))
-    should_handle = false;
-  else if (exception_code == EXCEPTION_BREAKPOINT)
-    should_handle = false;
-  else if (exception_code == kCxxExceptionCode)
-    should_handle = false;
-
-  return should_handle ? EXCEPTION_EXECUTE_HANDLER : EXCEPTION_CONTINUE_SEARCH;
-}
-#endif  // GTEST_HAS_SEH
-
-}  // namespace internal
-
-// The c'tor sets this object as the test part result reporter used by
-// Google Test.  The 'result' parameter specifies where to report the
-// results. Intercepts only failures from the current thread.
-ScopedFakeTestPartResultReporter::ScopedFakeTestPartResultReporter(
-    TestPartResultArray* result)
-    : intercept_mode_(INTERCEPT_ONLY_CURRENT_THREAD), result_(result) {
-  Init();
-}
-
-// The c'tor sets this object as the test part result reporter used by
-// Google Test.  The 'result' parameter specifies where to report the
-// results.
-ScopedFakeTestPartResultReporter::ScopedFakeTestPartResultReporter(
-    InterceptMode intercept_mode, TestPartResultArray* result)
-    : intercept_mode_(intercept_mode), result_(result) {
-  Init();
-}
-
-void ScopedFakeTestPartResultReporter::Init() {
-  internal::UnitTestImpl* const impl = internal::GetUnitTestImpl();
-  if (intercept_mode_ == INTERCEPT_ALL_THREADS) {
-    old_reporter_ = impl->GetGlobalTestPartResultReporter();
-    impl->SetGlobalTestPartResultReporter(this);
-  } else {
-    old_reporter_ = impl->GetTestPartResultReporterForCurrentThread();
-    impl->SetTestPartResultReporterForCurrentThread(this);
-  }
-}
-
-// The d'tor restores the test part result reporter used by Google Test
-// before.
-ScopedFakeTestPartResultReporter::~ScopedFakeTestPartResultReporter() {
-  internal::UnitTestImpl* const impl = internal::GetUnitTestImpl();
-  if (intercept_mode_ == INTERCEPT_ALL_THREADS) {
-    impl->SetGlobalTestPartResultReporter(old_reporter_);
-  } else {
-    impl->SetTestPartResultReporterForCurrentThread(old_reporter_);
-  }
-}
-
-// Increments the test part result count and remembers the result.
-// This method is from the TestPartResultReporterInterface interface.
-void ScopedFakeTestPartResultReporter::ReportTestPartResult(
-    const TestPartResult& result) {
-  result_->Append(result);
-}
-
-namespace internal {
-
-// Returns the type ID of ::testing::Test.  We should always call this
-// instead of GetTypeId< ::testing::Test>() to get the type ID of
-// testing::Test.  This is to work around a suspected linker bug when
-// using Google Test as a framework on Mac OS X.  The bug causes
-// GetTypeId< ::testing::Test>() to return different values depending
-// on whether the call is from the Google Test framework itself or
-// from user test code.  GetTestTypeId() is guaranteed to always
-// return the same value, as it always calls GetTypeId<>() from the
-// gtest.cc, which is within the Google Test framework.
-TypeId GetTestTypeId() { return GetTypeId<Test>(); }
-
-// The value of GetTestTypeId() as seen from within the Google Test
-// library.  This is solely for testing GetTestTypeId().
-extern const TypeId kTestTypeIdInGoogleTest = GetTestTypeId();
-
-// This predicate-formatter checks that 'results' contains a test part
-// failure of the given type and that the failure message contains the
-// given substring.
-static AssertionResult HasOneFailure(const char* /* results_expr */,
-                                     const char* /* type_expr */,
-                                     const char* /* substr_expr */,
-                                     const TestPartResultArray& results,
-                                     TestPartResult::Type type,
-                                     const std::string& substr) {
-  const std::string expected(type == TestPartResult::kFatalFailure
-                                 ? "1 fatal failure"
-                                 : "1 non-fatal failure");
-  Message msg;
-  if (results.size() != 1) {
-    msg << "Expected: " << expected << "\n"
-        << "  Actual: " << results.size() << " failures";
-    for (int i = 0; i < results.size(); i++) {
-      msg << "\n" << results.GetTestPartResult(i);
-    }
-    return AssertionFailure() << msg;
-  }
-
-  const TestPartResult& r = results.GetTestPartResult(0);
-  if (r.type() != type) {
-    return AssertionFailure() << "Expected: " << expected << "\n"
-                              << "  Actual:\n"
-                              << r;
-  }
-
-  if (strstr(r.message(), substr.c_str()) == nullptr) {
-    return AssertionFailure()
-           << "Expected: " << expected << " containing \"" << substr << "\"\n"
-           << "  Actual:\n"
-           << r;
-  }
-
-  return AssertionSuccess();
-}
-
-// The constructor of SingleFailureChecker remembers where to look up
-// test part results, what type of failure we expect, and what
-// substring the failure message should contain.
-SingleFailureChecker::SingleFailureChecker(const TestPartResultArray* results,
-                                           TestPartResult::Type type,
-                                           const std::string& substr)
-    : results_(results), type_(type), substr_(substr) {}
-
-// The destructor of SingleFailureChecker verifies that the given
-// TestPartResultArray contains exactly one failure that has the given
-// type and contains the given substring.  If that's not the case, a
-// non-fatal failure will be generated.
-SingleFailureChecker::~SingleFailureChecker() {
-  EXPECT_PRED_FORMAT3(HasOneFailure, *results_, type_, substr_);
-}
-
-DefaultGlobalTestPartResultReporter::DefaultGlobalTestPartResultReporter(
-    UnitTestImpl* unit_test)
-    : unit_test_(unit_test) {}
-
-void DefaultGlobalTestPartResultReporter::ReportTestPartResult(
-    const TestPartResult& result) {
-  unit_test_->current_test_result()->AddTestPartResult(result);
-  unit_test_->listeners()->repeater()->OnTestPartResult(result);
-}
-
-DefaultPerThreadTestPartResultReporter::DefaultPerThreadTestPartResultReporter(
-    UnitTestImpl* unit_test)
-    : unit_test_(unit_test) {}
-
-void DefaultPerThreadTestPartResultReporter::ReportTestPartResult(
-    const TestPartResult& result) {
-  unit_test_->GetGlobalTestPartResultReporter()->ReportTestPartResult(result);
-}
-
-// Returns the global test part result reporter.
-TestPartResultReporterInterface*
-UnitTestImpl::GetGlobalTestPartResultReporter() {
-  internal::MutexLock lock(&global_test_part_result_reporter_mutex_);
-  return global_test_part_result_reporter_;
-}
-
-// Sets the global test part result reporter.
-void UnitTestImpl::SetGlobalTestPartResultReporter(
-    TestPartResultReporterInterface* reporter) {
-  internal::MutexLock lock(&global_test_part_result_reporter_mutex_);
-  global_test_part_result_reporter_ = reporter;
-}
-
-// Returns the test part result reporter for the current thread.
-TestPartResultReporterInterface*
-UnitTestImpl::GetTestPartResultReporterForCurrentThread() {
-  return per_thread_test_part_result_reporter_.get();
-}
-
-// Sets the test part result reporter for the current thread.
-void UnitTestImpl::SetTestPartResultReporterForCurrentThread(
-    TestPartResultReporterInterface* reporter) {
-  per_thread_test_part_result_reporter_.set(reporter);
-}
-
-// Gets the number of successful test suites.
-int UnitTestImpl::successful_test_suite_count() const {
-  return CountIf(test_suites_, TestSuitePassed);
-}
-
-// Gets the number of failed test suites.
-int UnitTestImpl::failed_test_suite_count() const {
-  return CountIf(test_suites_, TestSuiteFailed);
-}
-
-// Gets the number of all test suites.
-int UnitTestImpl::total_test_suite_count() const {
-  return static_cast<int>(test_suites_.size());
-}
-
-// Gets the number of all test suites that contain at least one test
-// that should run.
-int UnitTestImpl::test_suite_to_run_count() const {
-  return CountIf(test_suites_, ShouldRunTestSuite);
-}
-
-// Gets the number of successful tests.
-int UnitTestImpl::successful_test_count() const {
-  return SumOverTestSuiteList(test_suites_, &TestSuite::successful_test_count);
-}
-
-// Gets the number of skipped tests.
-int UnitTestImpl::skipped_test_count() const {
-  return SumOverTestSuiteList(test_suites_, &TestSuite::skipped_test_count);
-}
-
-// Gets the number of failed tests.
-int UnitTestImpl::failed_test_count() const {
-  return SumOverTestSuiteList(test_suites_, &TestSuite::failed_test_count);
-}
-
-// Gets the number of disabled tests that will be reported in the XML report.
-int UnitTestImpl::reportable_disabled_test_count() const {
-  return SumOverTestSuiteList(test_suites_,
-                              &TestSuite::reportable_disabled_test_count);
-}
-
-// Gets the number of disabled tests.
-int UnitTestImpl::disabled_test_count() const {
-  return SumOverTestSuiteList(test_suites_, &TestSuite::disabled_test_count);
-}
-
-// Gets the number of tests to be printed in the XML report.
-int UnitTestImpl::reportable_test_count() const {
-  return SumOverTestSuiteList(test_suites_, &TestSuite::reportable_test_count);
-}
-
-// Gets the number of all tests.
-int UnitTestImpl::total_test_count() const {
-  return SumOverTestSuiteList(test_suites_, &TestSuite::total_test_count);
-}
-
-// Gets the number of tests that should run.
-int UnitTestImpl::test_to_run_count() const {
-  return SumOverTestSuiteList(test_suites_, &TestSuite::test_to_run_count);
-}
-
-// Returns the current OS stack trace as an std::string.
-//
-// The maximum number of stack frames to be included is specified by
-// the gtest_stack_trace_depth flag.  The skip_count parameter
-// specifies the number of top frames to be skipped, which doesn't
-// count against the number of frames to be included.
-//
-// For example, if Foo() calls Bar(), which in turn calls
-// CurrentOsStackTraceExceptTop(1), Foo() will be included in the
-// trace but Bar() and CurrentOsStackTraceExceptTop() won't.
-std::string UnitTestImpl::CurrentOsStackTraceExceptTop(int skip_count) {
-  return os_stack_trace_getter()->CurrentStackTrace(
-      static_cast<int>(GTEST_FLAG_GET(stack_trace_depth)), skip_count + 1
-      // Skips the user-specified number of frames plus this function
-      // itself.
-  );  // NOLINT
-}
-
-// A helper class for measuring elapsed times.
-class Timer {
- public:
-  Timer() : start_(clock::now()) {}
-
-  // Return time elapsed in milliseconds since the timer was created.
-  TimeInMillis Elapsed() {
-    return std::chrono::duration_cast<std::chrono::milliseconds>(
-               clock::now() - start_)
-        .count();
-  }
-
- private:
-  // Fall back to the system_clock when building with newlib on a system
-  // without a monotonic clock.
-#if defined(_NEWLIB_VERSION) && !defined(CLOCK_MONOTONIC)
-  using clock = std::chrono::system_clock;
-#else
-  using clock = std::chrono::steady_clock;
-#endif
-  clock::time_point start_;
-};
-
-// Returns a timestamp as milliseconds since the epoch. Note this time may jump
-// around subject to adjustments by the system, to measure elapsed time use
-// Timer instead.
-TimeInMillis GetTimeInMillis() {
-  return std::chrono::duration_cast<std::chrono::milliseconds>(
-             std::chrono::system_clock::now() -
-             std::chrono::system_clock::from_time_t(0))
-      .count();
-}
-
-// Utilities
-
-// class String.
-
-#if GTEST_OS_WINDOWS_MOBILE
-// Creates a UTF-16 wide string from the given ANSI string, allocating
-// memory using new. The caller is responsible for deleting the return
-// value using delete[]. Returns the wide string, or NULL if the
-// input is NULL.
-LPCWSTR String::AnsiToUtf16(const char* ansi) {
-  if (!ansi) return nullptr;
-  const int length = strlen(ansi);
-  const int unicode_length =
-      MultiByteToWideChar(CP_ACP, 0, ansi, length, nullptr, 0);
-  WCHAR* unicode = new WCHAR[unicode_length + 1];
-  MultiByteToWideChar(CP_ACP, 0, ansi, length, unicode, unicode_length);
-  unicode[unicode_length] = 0;
-  return unicode;
-}
-
-// Creates an ANSI string from the given wide string, allocating
-// memory using new. The caller is responsible for deleting the return
-// value using delete[]. Returns the ANSI string, or NULL if the
-// input is NULL.
-const char* String::Utf16ToAnsi(LPCWSTR utf16_str) {
-  if (!utf16_str) return nullptr;
-  const int ansi_length = WideCharToMultiByte(CP_ACP, 0, utf16_str, -1, nullptr,
-                                              0, nullptr, nullptr);
-  char* ansi = new char[ansi_length + 1];
-  WideCharToMultiByte(CP_ACP, 0, utf16_str, -1, ansi, ansi_length, nullptr,
-                      nullptr);
-  ansi[ansi_length] = 0;
-  return ansi;
-}
-
-#endif  // GTEST_OS_WINDOWS_MOBILE
-
-// Compares two C strings.  Returns true if and only if they have the same
-// content.
-//
-// Unlike strcmp(), this function can handle NULL argument(s).  A NULL
-// C string is considered different to any non-NULL C string,
-// including the empty string.
-bool String::CStringEquals(const char* lhs, const char* rhs) {
-  if (lhs == nullptr) return rhs == nullptr;
-
-  if (rhs == nullptr) return false;
-
-  return strcmp(lhs, rhs) == 0;
-}
-
-#if GTEST_HAS_STD_WSTRING
-
-// Converts an array of wide chars to a narrow string using the UTF-8
-// encoding, and streams the result to the given Message object.
-static void StreamWideCharsToMessage(const wchar_t* wstr, size_t length,
-                                     Message* msg) {
-  for (size_t i = 0; i != length;) {  // NOLINT
-    if (wstr[i] != L'\0') {
-      *msg << WideStringToUtf8(wstr + i, static_cast<int>(length - i));
-      while (i != length && wstr[i] != L'\0') i++;
-    } else {
-      *msg << '\0';
-      i++;
-    }
-  }
-}
-
-#endif  // GTEST_HAS_STD_WSTRING
-
-void SplitString(const ::std::string& str, char delimiter,
-                 ::std::vector< ::std::string>* dest) {
-  ::std::vector< ::std::string> parsed;
-  ::std::string::size_type pos = 0;
-  while (::testing::internal::AlwaysTrue()) {
-    const ::std::string::size_type colon = str.find(delimiter, pos);
-    if (colon == ::std::string::npos) {
-      parsed.push_back(str.substr(pos));
-      break;
-    } else {
-      parsed.push_back(str.substr(pos, colon - pos));
-      pos = colon + 1;
-    }
-  }
-  dest->swap(parsed);
-}
-
-}  // namespace internal
-
-// Constructs an empty Message.
-// We allocate the stringstream separately because otherwise each use of
-// ASSERT/EXPECT in a procedure adds over 200 bytes to the procedure's
-// stack frame leading to huge stack frames in some cases; gcc does not reuse
-// the stack space.
-Message::Message() : ss_(new ::std::stringstream) {
-  // By default, we want there to be enough precision when printing
-  // a double to a Message.
-  *ss_ << std::setprecision(std::numeric_limits<double>::digits10 + 2);
-}
-
-// These two overloads allow streaming a wide C string to a Message
-// using the UTF-8 encoding.
-Message& Message::operator<<(const wchar_t* wide_c_str) {
-  return *this << internal::String::ShowWideCString(wide_c_str);
-}
-Message& Message::operator<<(wchar_t* wide_c_str) {
-  return *this << internal::String::ShowWideCString(wide_c_str);
-}
-
-#if GTEST_HAS_STD_WSTRING
-// Converts the given wide string to a narrow string using the UTF-8
-// encoding, and streams the result to this Message object.
-Message& Message::operator<<(const ::std::wstring& wstr) {
-  internal::StreamWideCharsToMessage(wstr.c_str(), wstr.length(), this);
-  return *this;
-}
-#endif  // GTEST_HAS_STD_WSTRING
-
-// Gets the text streamed to this object so far as an std::string.
-// Each '\0' character in the buffer is replaced with "\\0".
-std::string Message::GetString() const {
-  return internal::StringStreamToString(ss_.get());
-}
-
-namespace internal {
-
-namespace edit_distance {
-std::vector<EditType> CalculateOptimalEdits(const std::vector<size_t>& left,
-                                            const std::vector<size_t>& right) {
-  std::vector<std::vector<double> > costs(
-      left.size() + 1, std::vector<double>(right.size() + 1));
-  std::vector<std::vector<EditType> > best_move(
-      left.size() + 1, std::vector<EditType>(right.size() + 1));
-
-  // Populate for empty right.
-  for (size_t l_i = 0; l_i < costs.size(); ++l_i) {
-    costs[l_i][0] = static_cast<double>(l_i);
-    best_move[l_i][0] = kRemove;
-  }
-  // Populate for empty left.
-  for (size_t r_i = 1; r_i < costs[0].size(); ++r_i) {
-    costs[0][r_i] = static_cast<double>(r_i);
-    best_move[0][r_i] = kAdd;
-  }
-
-  for (size_t l_i = 0; l_i < left.size(); ++l_i) {
-    for (size_t r_i = 0; r_i < right.size(); ++r_i) {
-      if (left[l_i] == right[r_i]) {
-        // Found a match. Consume it.
-        costs[l_i + 1][r_i + 1] = costs[l_i][r_i];
-        best_move[l_i + 1][r_i + 1] = kMatch;
-        continue;
-      }
-
-      const double add = costs[l_i + 1][r_i];
-      const double remove = costs[l_i][r_i + 1];
-      const double replace = costs[l_i][r_i];
-      if (add < remove && add < replace) {
-        costs[l_i + 1][r_i + 1] = add + 1;
-        best_move[l_i + 1][r_i + 1] = kAdd;
-      } else if (remove < add && remove < replace) {
-        costs[l_i + 1][r_i + 1] = remove + 1;
-        best_move[l_i + 1][r_i + 1] = kRemove;
-      } else {
-        // We make replace a little more expensive than add/remove to lower
-        // their priority.
-        costs[l_i + 1][r_i + 1] = replace + 1.00001;
-        best_move[l_i + 1][r_i + 1] = kReplace;
-      }
-    }
-  }
-
-  // Reconstruct the best path. We do it in reverse order.
-  std::vector<EditType> best_path;
-  for (size_t l_i = left.size(), r_i = right.size(); l_i > 0 || r_i > 0;) {
-    EditType move = best_move[l_i][r_i];
-    best_path.push_back(move);
-    l_i -= move != kAdd;
-    r_i -= move != kRemove;
-  }
-  std::reverse(best_path.begin(), best_path.end());
-  return best_path;
-}
-
-namespace {
-
-// Helper class to convert string into ids with deduplication.
-class InternalStrings {
- public:
-  size_t GetId(const std::string& str) {
-    IdMap::iterator it = ids_.find(str);
-    if (it != ids_.end()) return it->second;
-    size_t id = ids_.size();
-    return ids_[str] = id;
-  }
-
- private:
-  typedef std::map<std::string, size_t> IdMap;
-  IdMap ids_;
-};
-
-}  // namespace
-
-std::vector<EditType> CalculateOptimalEdits(
-    const std::vector<std::string>& left,
-    const std::vector<std::string>& right) {
-  std::vector<size_t> left_ids, right_ids;
-  {
-    InternalStrings intern_table;
-    for (size_t i = 0; i < left.size(); ++i) {
-      left_ids.push_back(intern_table.GetId(left[i]));
-    }
-    for (size_t i = 0; i < right.size(); ++i) {
-      right_ids.push_back(intern_table.GetId(right[i]));
-    }
-  }
-  return CalculateOptimalEdits(left_ids, right_ids);
-}
-
-namespace {
-
-// Helper class that holds the state for one hunk and prints it out to the
-// stream.
-// It reorders adds/removes when possible to group all removes before all
-// adds. It also adds the hunk header before printint into the stream.
-class Hunk {
- public:
-  Hunk(size_t left_start, size_t right_start)
-      : left_start_(left_start),
-        right_start_(right_start),
-        adds_(),
-        removes_(),
-        common_() {}
-
-  void PushLine(char edit, const char* line) {
-    switch (edit) {
-      case ' ':
-        ++common_;
-        FlushEdits();
-        hunk_.push_back(std::make_pair(' ', line));
-        break;
-      case '-':
-        ++removes_;
-        hunk_removes_.push_back(std::make_pair('-', line));
-        break;
-      case '+':
-        ++adds_;
-        hunk_adds_.push_back(std::make_pair('+', line));
-        break;
-    }
-  }
-
-  void PrintTo(std::ostream* os) {
-    PrintHeader(os);
-    FlushEdits();
-    for (std::list<std::pair<char, const char*> >::const_iterator it =
-             hunk_.begin();
-         it != hunk_.end(); ++it) {
-      *os << it->first << it->second << "\n";
-    }
-  }
-
-  bool has_edits() const { return adds_ || removes_; }
-
- private:
-  void FlushEdits() {
-    hunk_.splice(hunk_.end(), hunk_removes_);
-    hunk_.splice(hunk_.end(), hunk_adds_);
-  }
-
-  // Print a unified diff header for one hunk.
-  // The format is
-  //   "@@ -<left_start>,<left_length> +<right_start>,<right_length> @@"
-  // where the left/right parts are omitted if unnecessary.
-  void PrintHeader(std::ostream* ss) const {
-    *ss << "@@ ";
-    if (removes_) {
-      *ss << "-" << left_start_ << "," << (removes_ + common_);
-    }
-    if (removes_ && adds_) {
-      *ss << " ";
-    }
-    if (adds_) {
-      *ss << "+" << right_start_ << "," << (adds_ + common_);
-    }
-    *ss << " @@\n";
-  }
-
-  size_t left_start_, right_start_;
-  size_t adds_, removes_, common_;
-  std::list<std::pair<char, const char*> > hunk_, hunk_adds_, hunk_removes_;
-};
-
-}  // namespace
-
-// Create a list of diff hunks in Unified diff format.
-// Each hunk has a header generated by PrintHeader above plus a body with
-// lines prefixed with ' ' for no change, '-' for deletion and '+' for
-// addition.
-// 'context' represents the desired unchanged prefix/suffix around the diff.
-// If two hunks are close enough that their contexts overlap, then they are
-// joined into one hunk.
-std::string CreateUnifiedDiff(const std::vector<std::string>& left,
-                              const std::vector<std::string>& right,
-                              size_t context) {
-  const std::vector<EditType> edits = CalculateOptimalEdits(left, right);
-
-  size_t l_i = 0, r_i = 0, edit_i = 0;
-  std::stringstream ss;
-  while (edit_i < edits.size()) {
-    // Find first edit.
-    while (edit_i < edits.size() && edits[edit_i] == kMatch) {
-      ++l_i;
-      ++r_i;
-      ++edit_i;
-    }
-
-    // Find the first line to include in the hunk.
-    const size_t prefix_context = std::min(l_i, context);
-    Hunk hunk(l_i - prefix_context + 1, r_i - prefix_context + 1);
-    for (size_t i = prefix_context; i > 0; --i) {
-      hunk.PushLine(' ', left[l_i - i].c_str());
-    }
-
-    // Iterate the edits until we found enough suffix for the hunk or the input
-    // is over.
-    size_t n_suffix = 0;
-    for (; edit_i < edits.size(); ++edit_i) {
-      if (n_suffix >= context) {
-        // Continue only if the next hunk is very close.
-        auto it = edits.begin() + static_cast<int>(edit_i);
-        while (it != edits.end() && *it == kMatch) ++it;
-        if (it == edits.end() ||
-            static_cast<size_t>(it - edits.begin()) - edit_i >= context) {
-          // There is no next edit or it is too far away.
-          break;
-        }
-      }
-
-      EditType edit = edits[edit_i];
-      // Reset count when a non match is found.
-      n_suffix = edit == kMatch ? n_suffix + 1 : 0;
-
-      if (edit == kMatch || edit == kRemove || edit == kReplace) {
-        hunk.PushLine(edit == kMatch ? ' ' : '-', left[l_i].c_str());
-      }
-      if (edit == kAdd || edit == kReplace) {
-        hunk.PushLine('+', right[r_i].c_str());
-      }
-
-      // Advance indices, depending on edit type.
-      l_i += edit != kAdd;
-      r_i += edit != kRemove;
-    }
-
-    if (!hunk.has_edits()) {
-      // We are done. We don't want this hunk.
-      break;
-    }
-
-    hunk.PrintTo(&ss);
-  }
-  return ss.str();
-}
-
-}  // namespace edit_distance
-
-namespace {
-
-// The string representation of the values received in EqFailure() are already
-// escaped. Split them on escaped '\n' boundaries. Leave all other escaped
-// characters the same.
-std::vector<std::string> SplitEscapedString(const std::string& str) {
-  std::vector<std::string> lines;
-  size_t start = 0, end = str.size();
-  if (end > 2 && str[0] == '"' && str[end - 1] == '"') {
-    ++start;
-    --end;
-  }
-  bool escaped = false;
-  for (size_t i = start; i + 1 < end; ++i) {
-    if (escaped) {
-      escaped = false;
-      if (str[i] == 'n') {
-        lines.push_back(str.substr(start, i - start - 1));
-        start = i + 1;
-      }
-    } else {
-      escaped = str[i] == '\\';
-    }
-  }
-  lines.push_back(str.substr(start, end - start));
-  return lines;
-}
-
-}  // namespace
-
-// Constructs and returns the message for an equality assertion
-// (e.g. ASSERT_EQ, EXPECT_STREQ, etc) failure.
-//
-// The first four parameters are the expressions used in the assertion
-// and their values, as strings.  For example, for ASSERT_EQ(foo, bar)
-// where foo is 5 and bar is 6, we have:
-//
-//   lhs_expression: "foo"
-//   rhs_expression: "bar"
-//   lhs_value:      "5"
-//   rhs_value:      "6"
-//
-// The ignoring_case parameter is true if and only if the assertion is a
-// *_STRCASEEQ*.  When it's true, the string "Ignoring case" will
-// be inserted into the message.
-AssertionResult EqFailure(const char* lhs_expression,
-                          const char* rhs_expression,
-                          const std::string& lhs_value,
-                          const std::string& rhs_value, bool ignoring_case) {
-  Message msg;
-  msg << "Expected equality of these values:";
-  msg << "\n  " << lhs_expression;
-  if (lhs_value != lhs_expression) {
-    msg << "\n    Which is: " << lhs_value;
-  }
-  msg << "\n  " << rhs_expression;
-  if (rhs_value != rhs_expression) {
-    msg << "\n    Which is: " << rhs_value;
-  }
-
-  if (ignoring_case) {
-    msg << "\nIgnoring case";
-  }
-
-  if (!lhs_value.empty() && !rhs_value.empty()) {
-    const std::vector<std::string> lhs_lines = SplitEscapedString(lhs_value);
-    const std::vector<std::string> rhs_lines = SplitEscapedString(rhs_value);
-    if (lhs_lines.size() > 1 || rhs_lines.size() > 1) {
-      msg << "\nWith diff:\n"
-          << edit_distance::CreateUnifiedDiff(lhs_lines, rhs_lines);
-    }
-  }
-
-  return AssertionFailure() << msg;
-}
-
-// Constructs a failure message for Boolean assertions such as EXPECT_TRUE.
-std::string GetBoolAssertionFailureMessage(
-    const AssertionResult& assertion_result, const char* expression_text,
-    const char* actual_predicate_value, const char* expected_predicate_value) {
-  const char* actual_message = assertion_result.message();
-  Message msg;
-  msg << "Value of: " << expression_text
-      << "\n  Actual: " << actual_predicate_value;
-  if (actual_message[0] != '\0') msg << " (" << actual_message << ")";
-  msg << "\nExpected: " << expected_predicate_value;
-  return msg.GetString();
-}
-
-// Helper function for implementing ASSERT_NEAR.
-AssertionResult DoubleNearPredFormat(const char* expr1, const char* expr2,
-                                     const char* abs_error_expr, double val1,
-                                     double val2, double abs_error) {
-  const double diff = fabs(val1 - val2);
-  if (diff <= abs_error) return AssertionSuccess();
-
-  // Find the value which is closest to zero.
-  const double min_abs = std::min(fabs(val1), fabs(val2));
-  // Find the distance to the next double from that value.
-  const double epsilon =
-      nextafter(min_abs, std::numeric_limits<double>::infinity()) - min_abs;
-  // Detect the case where abs_error is so small that EXPECT_NEAR is
-  // effectively the same as EXPECT_EQUAL, and give an informative error
-  // message so that the situation can be more easily understood without
-  // requiring exotic floating-point knowledge.
-  // Don't do an epsilon check if abs_error is zero because that implies
-  // that an equality check was actually intended.
-  if (!(std::isnan)(val1) && !(std::isnan)(val2) && abs_error > 0 &&
-      abs_error < epsilon) {
-    return AssertionFailure()
-           << "The difference between " << expr1 << " and " << expr2 << " is "
-           << diff << ", where\n"
-           << expr1 << " evaluates to " << val1 << ",\n"
-           << expr2 << " evaluates to " << val2 << ".\nThe abs_error parameter "
-           << abs_error_expr << " evaluates to " << abs_error
-           << " which is smaller than the minimum distance between doubles for "
-              "numbers of this magnitude which is "
-           << epsilon
-           << ", thus making this EXPECT_NEAR check equivalent to "
-              "EXPECT_EQUAL. Consider using EXPECT_DOUBLE_EQ instead.";
-  }
-  return AssertionFailure()
-         << "The difference between " << expr1 << " and " << expr2 << " is "
-         << diff << ", which exceeds " << abs_error_expr << ", where\n"
-         << expr1 << " evaluates to " << val1 << ",\n"
-         << expr2 << " evaluates to " << val2 << ", and\n"
-         << abs_error_expr << " evaluates to " << abs_error << ".";
-}
-
-// Helper template for implementing FloatLE() and DoubleLE().
-template <typename RawType>
-AssertionResult FloatingPointLE(const char* expr1, const char* expr2,
-                                RawType val1, RawType val2) {
-  // Returns success if val1 is less than val2,
-  if (val1 < val2) {
-    return AssertionSuccess();
-  }
-
-  // or if val1 is almost equal to val2.
-  const FloatingPoint<RawType> lhs(val1), rhs(val2);
-  if (lhs.AlmostEquals(rhs)) {
-    return AssertionSuccess();
-  }
-
-  // Note that the above two checks will both fail if either val1 or
-  // val2 is NaN, as the IEEE floating-point standard requires that
-  // any predicate involving a NaN must return false.
-
-  ::std::stringstream val1_ss;
-  val1_ss << std::setprecision(std::numeric_limits<RawType>::digits10 + 2)
-          << val1;
-
-  ::std::stringstream val2_ss;
-  val2_ss << std::setprecision(std::numeric_limits<RawType>::digits10 + 2)
-          << val2;
-
-  return AssertionFailure()
-         << "Expected: (" << expr1 << ") <= (" << expr2 << ")\n"
-         << "  Actual: " << StringStreamToString(&val1_ss) << " vs "
-         << StringStreamToString(&val2_ss);
-}
-
-}  // namespace internal
-
-// Asserts that val1 is less than, or almost equal to, val2.  Fails
-// otherwise.  In particular, it fails if either val1 or val2 is NaN.
-AssertionResult FloatLE(const char* expr1, const char* expr2, float val1,
-                        float val2) {
-  return internal::FloatingPointLE<float>(expr1, expr2, val1, val2);
-}
-
-// Asserts that val1 is less than, or almost equal to, val2.  Fails
-// otherwise.  In particular, it fails if either val1 or val2 is NaN.
-AssertionResult DoubleLE(const char* expr1, const char* expr2, double val1,
-                         double val2) {
-  return internal::FloatingPointLE<double>(expr1, expr2, val1, val2);
-}
-
-namespace internal {
-
-// The helper function for {ASSERT|EXPECT}_STREQ.
-AssertionResult CmpHelperSTREQ(const char* lhs_expression,
-                               const char* rhs_expression, const char* lhs,
-                               const char* rhs) {
-  if (String::CStringEquals(lhs, rhs)) {
-    return AssertionSuccess();
-  }
-
-  return EqFailure(lhs_expression, rhs_expression, PrintToString(lhs),
-                   PrintToString(rhs), false);
-}
-
-// The helper function for {ASSERT|EXPECT}_STRCASEEQ.
-AssertionResult CmpHelperSTRCASEEQ(const char* lhs_expression,
-                                   const char* rhs_expression, const char* lhs,
-                                   const char* rhs) {
-  if (String::CaseInsensitiveCStringEquals(lhs, rhs)) {
-    return AssertionSuccess();
-  }
-
-  return EqFailure(lhs_expression, rhs_expression, PrintToString(lhs),
-                   PrintToString(rhs), true);
-}
-
-// The helper function for {ASSERT|EXPECT}_STRNE.
-AssertionResult CmpHelperSTRNE(const char* s1_expression,
-                               const char* s2_expression, const char* s1,
-                               const char* s2) {
-  if (!String::CStringEquals(s1, s2)) {
-    return AssertionSuccess();
-  } else {
-    return AssertionFailure()
-           << "Expected: (" << s1_expression << ") != (" << s2_expression
-           << "), actual: \"" << s1 << "\" vs \"" << s2 << "\"";
-  }
-}
-
-// The helper function for {ASSERT|EXPECT}_STRCASENE.
-AssertionResult CmpHelperSTRCASENE(const char* s1_expression,
-                                   const char* s2_expression, const char* s1,
-                                   const char* s2) {
-  if (!String::CaseInsensitiveCStringEquals(s1, s2)) {
-    return AssertionSuccess();
-  } else {
-    return AssertionFailure()
-           << "Expected: (" << s1_expression << ") != (" << s2_expression
-           << ") (ignoring case), actual: \"" << s1 << "\" vs \"" << s2 << "\"";
-  }
-}
-
-}  // namespace internal
-
-namespace {
-
-// Helper functions for implementing IsSubString() and IsNotSubstring().
-
-// This group of overloaded functions return true if and only if needle
-// is a substring of haystack.  NULL is considered a substring of
-// itself only.
-
-bool IsSubstringPred(const char* needle, const char* haystack) {
-  if (needle == nullptr || haystack == nullptr) return needle == haystack;
-
-  return strstr(haystack, needle) != nullptr;
-}
-
-bool IsSubstringPred(const wchar_t* needle, const wchar_t* haystack) {
-  if (needle == nullptr || haystack == nullptr) return needle == haystack;
-
-  return wcsstr(haystack, needle) != nullptr;
-}
-
-// StringType here can be either ::std::string or ::std::wstring.
-template <typename StringType>
-bool IsSubstringPred(const StringType& needle, const StringType& haystack) {
-  return haystack.find(needle) != StringType::npos;
-}
-
-// This function implements either IsSubstring() or IsNotSubstring(),
-// depending on the value of the expected_to_be_substring parameter.
-// StringType here can be const char*, const wchar_t*, ::std::string,
-// or ::std::wstring.
-template <typename StringType>
-AssertionResult IsSubstringImpl(bool expected_to_be_substring,
-                                const char* needle_expr,
-                                const char* haystack_expr,
-                                const StringType& needle,
-                                const StringType& haystack) {
-  if (IsSubstringPred(needle, haystack) == expected_to_be_substring)
-    return AssertionSuccess();
-
-  const bool is_wide_string = sizeof(needle[0]) > 1;
-  const char* const begin_string_quote = is_wide_string ? "L\"" : "\"";
-  return AssertionFailure()
-         << "Value of: " << needle_expr << "\n"
-         << "  Actual: " << begin_string_quote << needle << "\"\n"
-         << "Expected: " << (expected_to_be_substring ? "" : "not ")
-         << "a substring of " << haystack_expr << "\n"
-         << "Which is: " << begin_string_quote << haystack << "\"";
-}
-
-}  // namespace
-
-// IsSubstring() and IsNotSubstring() check whether needle is a
-// substring of haystack (NULL is considered a substring of itself
-// only), and return an appropriate error message when they fail.
-
-AssertionResult IsSubstring(const char* needle_expr, const char* haystack_expr,
-                            const char* needle, const char* haystack) {
-  return IsSubstringImpl(true, needle_expr, haystack_expr, needle, haystack);
-}
-
-AssertionResult IsSubstring(const char* needle_expr, const char* haystack_expr,
-                            const wchar_t* needle, const wchar_t* haystack) {
-  return IsSubstringImpl(true, needle_expr, haystack_expr, needle, haystack);
-}
-
-AssertionResult IsNotSubstring(const char* needle_expr,
-                               const char* haystack_expr, const char* needle,
-                               const char* haystack) {
-  return IsSubstringImpl(false, needle_expr, haystack_expr, needle, haystack);
-}
-
-AssertionResult IsNotSubstring(const char* needle_expr,
-                               const char* haystack_expr, const wchar_t* needle,
-                               const wchar_t* haystack) {
-  return IsSubstringImpl(false, needle_expr, haystack_expr, needle, haystack);
-}
-
-AssertionResult IsSubstring(const char* needle_expr, const char* haystack_expr,
-                            const ::std::string& needle,
-                            const ::std::string& haystack) {
-  return IsSubstringImpl(true, needle_expr, haystack_expr, needle, haystack);
-}
-
-AssertionResult IsNotSubstring(const char* needle_expr,
-                               const char* haystack_expr,
-                               const ::std::string& needle,
-                               const ::std::string& haystack) {
-  return IsSubstringImpl(false, needle_expr, haystack_expr, needle, haystack);
-}
-
-#if GTEST_HAS_STD_WSTRING
-AssertionResult IsSubstring(const char* needle_expr, const char* haystack_expr,
-                            const ::std::wstring& needle,
-                            const ::std::wstring& haystack) {
-  return IsSubstringImpl(true, needle_expr, haystack_expr, needle, haystack);
-}
-
-AssertionResult IsNotSubstring(const char* needle_expr,
-                               const char* haystack_expr,
-                               const ::std::wstring& needle,
-                               const ::std::wstring& haystack) {
-  return IsSubstringImpl(false, needle_expr, haystack_expr, needle, haystack);
-}
-#endif  // GTEST_HAS_STD_WSTRING
-
-namespace internal {
-
-#if GTEST_OS_WINDOWS
-
-namespace {
-
-// Helper function for IsHRESULT{SuccessFailure} predicates
-AssertionResult HRESULTFailureHelper(const char* expr, const char* expected,
-                                     long hr) {  // NOLINT
-#if GTEST_OS_WINDOWS_MOBILE || GTEST_OS_WINDOWS_TV_TITLE
-
-  // Windows CE doesn't support FormatMessage.
-  const char error_text[] = "";
-
-#else
-
-  // Looks up the human-readable system message for the HRESULT code
-  // and since we're not passing any params to FormatMessage, we don't
-  // want inserts expanded.
-  const DWORD kFlags =
-      FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS;
-  const DWORD kBufSize = 4096;
-  // Gets the system's human readable message string for this HRESULT.
-  char error_text[kBufSize] = {'\0'};
-  DWORD message_length = ::FormatMessageA(kFlags,
-                                          0,  // no source, we're asking system
-                                          static_cast<DWORD>(hr),  // the error
-                                          0,  // no line width restrictions
-                                          error_text,  // output buffer
-                                          kBufSize,    // buf size
-                                          nullptr);  // no arguments for inserts
-  // Trims tailing white space (FormatMessage leaves a trailing CR-LF)
-  for (; message_length && IsSpace(error_text[message_length - 1]);
-       --message_length) {
-    error_text[message_length - 1] = '\0';
-  }
-
-#endif  // GTEST_OS_WINDOWS_MOBILE
-
-  const std::string error_hex("0x" + String::FormatHexInt(hr));
-  return ::testing::AssertionFailure()
-         << "Expected: " << expr << " " << expected << ".\n"
-         << "  Actual: " << error_hex << " " << error_text << "\n";
-}
-
-}  // namespace
-
-AssertionResult IsHRESULTSuccess(const char* expr, long hr) {  // NOLINT
-  if (SUCCEEDED(hr)) {
-    return AssertionSuccess();
-  }
-  return HRESULTFailureHelper(expr, "succeeds", hr);
-}
-
-AssertionResult IsHRESULTFailure(const char* expr, long hr) {  // NOLINT
-  if (FAILED(hr)) {
-    return AssertionSuccess();
-  }
-  return HRESULTFailureHelper(expr, "fails", hr);
-}
-
-#endif  // GTEST_OS_WINDOWS
-
-// Utility functions for encoding Unicode text (wide strings) in
-// UTF-8.
-
-// A Unicode code-point can have up to 21 bits, and is encoded in UTF-8
-// like this:
-//
-// Code-point length   Encoding
-//   0 -  7 bits       0xxxxxxx
-//   8 - 11 bits       110xxxxx 10xxxxxx
-//  12 - 16 bits       1110xxxx 10xxxxxx 10xxxxxx
-//  17 - 21 bits       11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
-
-// The maximum code-point a one-byte UTF-8 sequence can represent.
-constexpr uint32_t kMaxCodePoint1 = (static_cast<uint32_t>(1) << 7) - 1;
-
-// The maximum code-point a two-byte UTF-8 sequence can represent.
-constexpr uint32_t kMaxCodePoint2 = (static_cast<uint32_t>(1) << (5 + 6)) - 1;
-
-// The maximum code-point a three-byte UTF-8 sequence can represent.
-constexpr uint32_t kMaxCodePoint3 =
-    (static_cast<uint32_t>(1) << (4 + 2 * 6)) - 1;
-
-// The maximum code-point a four-byte UTF-8 sequence can represent.
-constexpr uint32_t kMaxCodePoint4 =
-    (static_cast<uint32_t>(1) << (3 + 3 * 6)) - 1;
-
-// Chops off the n lowest bits from a bit pattern.  Returns the n
-// lowest bits.  As a side effect, the original bit pattern will be
-// shifted to the right by n bits.
-inline uint32_t ChopLowBits(uint32_t* bits, int n) {
-  const uint32_t low_bits = *bits & ((static_cast<uint32_t>(1) << n) - 1);
-  *bits >>= n;
-  return low_bits;
-}
-
-// Converts a Unicode code point to a narrow string in UTF-8 encoding.
-// code_point parameter is of type uint32_t because wchar_t may not be
-// wide enough to contain a code point.
-// If the code_point is not a valid Unicode code point
-// (i.e. outside of Unicode range U+0 to U+10FFFF) it will be converted
-// to "(Invalid Unicode 0xXXXXXXXX)".
-std::string CodePointToUtf8(uint32_t code_point) {
-  if (code_point > kMaxCodePoint4) {
-    return "(Invalid Unicode 0x" + String::FormatHexUInt32(code_point) + ")";
-  }
-
-  char str[5];  // Big enough for the largest valid code point.
-  if (code_point <= kMaxCodePoint1) {
-    str[1] = '\0';
-    str[0] = static_cast<char>(code_point);  // 0xxxxxxx
-  } else if (code_point <= kMaxCodePoint2) {
-    str[2] = '\0';
-    str[1] = static_cast<char>(0x80 | ChopLowBits(&code_point, 6));  // 10xxxxxx
-    str[0] = static_cast<char>(0xC0 | code_point);                   // 110xxxxx
-  } else if (code_point <= kMaxCodePoint3) {
-    str[3] = '\0';
-    str[2] = static_cast<char>(0x80 | ChopLowBits(&code_point, 6));  // 10xxxxxx
-    str[1] = static_cast<char>(0x80 | ChopLowBits(&code_point, 6));  // 10xxxxxx
-    str[0] = static_cast<char>(0xE0 | code_point);                   // 1110xxxx
-  } else {  // code_point <= kMaxCodePoint4
-    str[4] = '\0';
-    str[3] = static_cast<char>(0x80 | ChopLowBits(&code_point, 6));  // 10xxxxxx
-    str[2] = static_cast<char>(0x80 | ChopLowBits(&code_point, 6));  // 10xxxxxx
-    str[1] = static_cast<char>(0x80 | ChopLowBits(&code_point, 6));  // 10xxxxxx
-    str[0] = static_cast<char>(0xF0 | code_point);                   // 11110xxx
-  }
-  return str;
-}
-
-// The following two functions only make sense if the system
-// uses UTF-16 for wide string encoding. All supported systems
-// with 16 bit wchar_t (Windows, Cygwin) do use UTF-16.
-
-// Determines if the arguments constitute UTF-16 surrogate pair
-// and thus should be combined into a single Unicode code point
-// using CreateCodePointFromUtf16SurrogatePair.
-inline bool IsUtf16SurrogatePair(wchar_t first, wchar_t second) {
-  return sizeof(wchar_t) == 2 && (first & 0xFC00) == 0xD800 &&
-         (second & 0xFC00) == 0xDC00;
-}
-
-// Creates a Unicode code point from UTF16 surrogate pair.
-inline uint32_t CreateCodePointFromUtf16SurrogatePair(wchar_t first,
-                                                      wchar_t second) {
-  const auto first_u = static_cast<uint32_t>(first);
-  const auto second_u = static_cast<uint32_t>(second);
-  const uint32_t mask = (1 << 10) - 1;
-  return (sizeof(wchar_t) == 2)
-             ? (((first_u & mask) << 10) | (second_u & mask)) + 0x10000
-             :
-             // This function should not be called when the condition is
-             // false, but we provide a sensible default in case it is.
-             first_u;
-}
-
-// Converts a wide string to a narrow string in UTF-8 encoding.
-// The wide string is assumed to have the following encoding:
-//   UTF-16 if sizeof(wchar_t) == 2 (on Windows, Cygwin)
-//   UTF-32 if sizeof(wchar_t) == 4 (on Linux)
-// Parameter str points to a null-terminated wide string.
-// Parameter num_chars may additionally limit the number
-// of wchar_t characters processed. -1 is used when the entire string
-// should be processed.
-// If the string contains code points that are not valid Unicode code points
-// (i.e. outside of Unicode range U+0 to U+10FFFF) they will be output
-// as '(Invalid Unicode 0xXXXXXXXX)'. If the string is in UTF16 encoding
-// and contains invalid UTF-16 surrogate pairs, values in those pairs
-// will be encoded as individual Unicode characters from Basic Normal Plane.
-std::string WideStringToUtf8(const wchar_t* str, int num_chars) {
-  if (num_chars == -1) num_chars = static_cast<int>(wcslen(str));
-
-  ::std::stringstream stream;
-  for (int i = 0; i < num_chars; ++i) {
-    uint32_t unicode_code_point;
-
-    if (str[i] == L'\0') {
-      break;
-    } else if (i + 1 < num_chars && IsUtf16SurrogatePair(str[i], str[i + 1])) {
-      unicode_code_point =
-          CreateCodePointFromUtf16SurrogatePair(str[i], str[i + 1]);
-      i++;
-    } else {
-      unicode_code_point = static_cast<uint32_t>(str[i]);
-    }
-
-    stream << CodePointToUtf8(unicode_code_point);
-  }
-  return StringStreamToString(&stream);
-}
-
-// Converts a wide C string to an std::string using the UTF-8 encoding.
-// NULL will be converted to "(null)".
-std::string String::ShowWideCString(const wchar_t* wide_c_str) {
-  if (wide_c_str == nullptr) return "(null)";
-
-  return internal::WideStringToUtf8(wide_c_str, -1);
-}
-
-// Compares two wide C strings.  Returns true if and only if they have the
-// same content.
-//
-// Unlike wcscmp(), this function can handle NULL argument(s).  A NULL
-// C string is considered different to any non-NULL C string,
-// including the empty string.
-bool String::WideCStringEquals(const wchar_t* lhs, const wchar_t* rhs) {
-  if (lhs == nullptr) return rhs == nullptr;
-
-  if (rhs == nullptr) return false;
-
-  return wcscmp(lhs, rhs) == 0;
-}
-
-// Helper function for *_STREQ on wide strings.
-AssertionResult CmpHelperSTREQ(const char* lhs_expression,
-                               const char* rhs_expression, const wchar_t* lhs,
-                               const wchar_t* rhs) {
-  if (String::WideCStringEquals(lhs, rhs)) {
-    return AssertionSuccess();
-  }
-
-  return EqFailure(lhs_expression, rhs_expression, PrintToString(lhs),
-                   PrintToString(rhs), false);
-}
-
-// Helper function for *_STRNE on wide strings.
-AssertionResult CmpHelperSTRNE(const char* s1_expression,
-                               const char* s2_expression, const wchar_t* s1,
-                               const wchar_t* s2) {
-  if (!String::WideCStringEquals(s1, s2)) {
-    return AssertionSuccess();
-  }
-
-  return AssertionFailure()
-         << "Expected: (" << s1_expression << ") != (" << s2_expression
-         << "), actual: " << PrintToString(s1) << " vs " << PrintToString(s2);
-}
-
-// Compares two C strings, ignoring case.  Returns true if and only if they have
-// the same content.
-//
-// Unlike strcasecmp(), this function can handle NULL argument(s).  A
-// NULL C string is considered different to any non-NULL C string,
-// including the empty string.
-bool String::CaseInsensitiveCStringEquals(const char* lhs, const char* rhs) {
-  if (lhs == nullptr) return rhs == nullptr;
-  if (rhs == nullptr) return false;
-  return posix::StrCaseCmp(lhs, rhs) == 0;
-}
-
-// Compares two wide C strings, ignoring case.  Returns true if and only if they
-// have the same content.
-//
-// Unlike wcscasecmp(), this function can handle NULL argument(s).
-// A NULL C string is considered different to any non-NULL wide C string,
-// including the empty string.
-// NB: The implementations on different platforms slightly differ.
-// On windows, this method uses _wcsicmp which compares according to LC_CTYPE
-// environment variable. On GNU platform this method uses wcscasecmp
-// which compares according to LC_CTYPE category of the current locale.
-// On MacOS X, it uses towlower, which also uses LC_CTYPE category of the
-// current locale.
-bool String::CaseInsensitiveWideCStringEquals(const wchar_t* lhs,
-                                              const wchar_t* rhs) {
-  if (lhs == nullptr) return rhs == nullptr;
-
-  if (rhs == nullptr) return false;
-
-#if GTEST_OS_WINDOWS
-  return _wcsicmp(lhs, rhs) == 0;
-#elif GTEST_OS_LINUX && !GTEST_OS_LINUX_ANDROID
-  return wcscasecmp(lhs, rhs) == 0;
-#else
-  // Android, Mac OS X and Cygwin don't define wcscasecmp.
-  // Other unknown OSes may not define it either.
-  wint_t left, right;
-  do {
-    left = towlower(static_cast<wint_t>(*lhs++));
-    right = towlower(static_cast<wint_t>(*rhs++));
-  } while (left && left == right);
-  return left == right;
-#endif  // OS selector
-}
-
-// Returns true if and only if str ends with the given suffix, ignoring case.
-// Any string is considered to end with an empty suffix.
-bool String::EndsWithCaseInsensitive(const std::string& str,
-                                     const std::string& suffix) {
-  const size_t str_len = str.length();
-  const size_t suffix_len = suffix.length();
-  return (str_len >= suffix_len) &&
-         CaseInsensitiveCStringEquals(str.c_str() + str_len - suffix_len,
-                                      suffix.c_str());
-}
-
-// Formats an int value as "%02d".
-std::string String::FormatIntWidth2(int value) {
-  return FormatIntWidthN(value, 2);
-}
-
-// Formats an int value to given width with leading zeros.
-std::string String::FormatIntWidthN(int value, int width) {
-  std::stringstream ss;
-  ss << std::setfill('0') << std::setw(width) << value;
-  return ss.str();
-}
-
-// Formats an int value as "%X".
-std::string String::FormatHexUInt32(uint32_t value) {
-  std::stringstream ss;
-  ss << std::hex << std::uppercase << value;
-  return ss.str();
-}
-
-// Formats an int value as "%X".
-std::string String::FormatHexInt(int value) {
-  return FormatHexUInt32(static_cast<uint32_t>(value));
-}
-
-// Formats a byte as "%02X".
-std::string String::FormatByte(unsigned char value) {
-  std::stringstream ss;
-  ss << std::setfill('0') << std::setw(2) << std::hex << std::uppercase
-     << static_cast<unsigned int>(value);
-  return ss.str();
-}
-
-// Converts the buffer in a stringstream to an std::string, converting NUL
-// bytes to "\\0" along the way.
-std::string StringStreamToString(::std::stringstream* ss) {
-  const ::std::string& str = ss->str();
-  const char* const start = str.c_str();
-  const char* const end = start + str.length();
-
-  std::string result;
-  result.reserve(static_cast<size_t>(2 * (end - start)));
-  for (const char* ch = start; ch != end; ++ch) {
-    if (*ch == '\0') {
-      result += "\\0";  // Replaces NUL with "\\0";
-    } else {
-      result += *ch;
-    }
-  }
-
-  return result;
-}
-
-// Appends the user-supplied message to the Google-Test-generated message.
-std::string AppendUserMessage(const std::string& gtest_msg,
-                              const Message& user_msg) {
-  // Appends the user message if it's non-empty.
-  const std::string user_msg_string = user_msg.GetString();
-  if (user_msg_string.empty()) {
-    return gtest_msg;
-  }
-  if (gtest_msg.empty()) {
-    return user_msg_string;
-  }
-  return gtest_msg + "\n" + user_msg_string;
-}
-
-}  // namespace internal
-
-// class TestResult
-
-// Creates an empty TestResult.
-TestResult::TestResult()
-    : death_test_count_(0), start_timestamp_(0), elapsed_time_(0) {}
-
-// D'tor.
-TestResult::~TestResult() {}
-
-// Returns the i-th test part result among all the results. i can
-// range from 0 to total_part_count() - 1. If i is not in that range,
-// aborts the program.
-const TestPartResult& TestResult::GetTestPartResult(int i) const {
-  if (i < 0 || i >= total_part_count()) internal::posix::Abort();
-  return test_part_results_.at(static_cast<size_t>(i));
-}
-
-// Returns the i-th test property. i can range from 0 to
-// test_property_count() - 1. If i is not in that range, aborts the
-// program.
-const TestProperty& TestResult::GetTestProperty(int i) const {
-  if (i < 0 || i >= test_property_count()) internal::posix::Abort();
-  return test_properties_.at(static_cast<size_t>(i));
-}
-
-// Clears the test part results.
-void TestResult::ClearTestPartResults() { test_part_results_.clear(); }
-
-// Adds a test part result to the list.
-void TestResult::AddTestPartResult(const TestPartResult& test_part_result) {
-  test_part_results_.push_back(test_part_result);
-}
-
-// Adds a test property to the list. If a property with the same key as the
-// supplied property is already represented, the value of this test_property
-// replaces the old value for that key.
-void TestResult::RecordProperty(const std::string& xml_element,
-                                const TestProperty& test_property) {
-  if (!ValidateTestProperty(xml_element, test_property)) {
-    return;
-  }
-  internal::MutexLock lock(&test_properties_mutex_);
-  const std::vector<TestProperty>::iterator property_with_matching_key =
-      std::find_if(test_properties_.begin(), test_properties_.end(),
-                   internal::TestPropertyKeyIs(test_property.key()));
-  if (property_with_matching_key == test_properties_.end()) {
-    test_properties_.push_back(test_property);
-    return;
-  }
-  property_with_matching_key->SetValue(test_property.value());
-}
-
-// The list of reserved attributes used in the <testsuites> element of XML
-// output.
-static const char* const kReservedTestSuitesAttributes[] = {
-    "disabled",    "errors", "failures", "name",
-    "random_seed", "tests",  "time",     "timestamp"};
-
-// The list of reserved attributes used in the <testsuite> element of XML
-// output.
-static const char* const kReservedTestSuiteAttributes[] = {
-    "disabled", "errors", "failures",  "name",
-    "tests",    "time",   "timestamp", "skipped"};
-
-// The list of reserved attributes used in the <testcase> element of XML output.
-static const char* const kReservedTestCaseAttributes[] = {
-    "classname",  "name",        "status", "time",
-    "type_param", "value_param", "file",   "line"};
-
-// Use a slightly different set for allowed output to ensure existing tests can
-// still RecordProperty("result") or "RecordProperty(timestamp")
-static const char* const kReservedOutputTestCaseAttributes[] = {
-    "classname",   "name", "status", "time",   "type_param",
-    "value_param", "file", "line",   "result", "timestamp"};
-
-template <size_t kSize>
-std::vector<std::string> ArrayAsVector(const char* const (&array)[kSize]) {
-  return std::vector<std::string>(array, array + kSize);
-}
-
-static std::vector<std::string> GetReservedAttributesForElement(
-    const std::string& xml_element) {
-  if (xml_element == "testsuites") {
-    return ArrayAsVector(kReservedTestSuitesAttributes);
-  } else if (xml_element == "testsuite") {
-    return ArrayAsVector(kReservedTestSuiteAttributes);
-  } else if (xml_element == "testcase") {
-    return ArrayAsVector(kReservedTestCaseAttributes);
-  } else {
-    GTEST_CHECK_(false) << "Unrecognized xml_element provided: " << xml_element;
-  }
-  // This code is unreachable but some compilers may not realizes that.
-  return std::vector<std::string>();
-}
-
-// TODO(jdesprez): Merge the two getReserved attributes once skip is improved
-static std::vector<std::string> GetReservedOutputAttributesForElement(
-    const std::string& xml_element) {
-  if (xml_element == "testsuites") {
-    return ArrayAsVector(kReservedTestSuitesAttributes);
-  } else if (xml_element == "testsuite") {
-    return ArrayAsVector(kReservedTestSuiteAttributes);
-  } else if (xml_element == "testcase") {
-    return ArrayAsVector(kReservedOutputTestCaseAttributes);
-  } else {
-    GTEST_CHECK_(false) << "Unrecognized xml_element provided: " << xml_element;
-  }
-  // This code is unreachable but some compilers may not realizes that.
-  return std::vector<std::string>();
-}
-
-static std::string FormatWordList(const std::vector<std::string>& words) {
-  Message word_list;
-  for (size_t i = 0; i < words.size(); ++i) {
-    if (i > 0 && words.size() > 2) {
-      word_list << ", ";
-    }
-    if (i == words.size() - 1) {
-      word_list << "and ";
-    }
-    word_list << "'" << words[i] << "'";
-  }
-  return word_list.GetString();
-}
-
-static bool ValidateTestPropertyName(
-    const std::string& property_name,
-    const std::vector<std::string>& reserved_names) {
-  if (std::find(reserved_names.begin(), reserved_names.end(), property_name) !=
-      reserved_names.end()) {
-    ADD_FAILURE() << "Reserved key used in RecordProperty(): " << property_name
-                  << " (" << FormatWordList(reserved_names)
-                  << " are reserved by " << GTEST_NAME_ << ")";
-    return false;
-  }
-  return true;
-}
-
-// Adds a failure if the key is a reserved attribute of the element named
-// xml_element.  Returns true if the property is valid.
-bool TestResult::ValidateTestProperty(const std::string& xml_element,
-                                      const TestProperty& test_property) {
-  return ValidateTestPropertyName(test_property.key(),
-                                  GetReservedAttributesForElement(xml_element));
-}
-
-// Clears the object.
-void TestResult::Clear() {
-  test_part_results_.clear();
-  test_properties_.clear();
-  death_test_count_ = 0;
-  elapsed_time_ = 0;
-}
-
-// Returns true off the test part was skipped.
-static bool TestPartSkipped(const TestPartResult& result) {
-  return result.skipped();
-}
-
-// Returns true if and only if the test was skipped.
-bool TestResult::Skipped() const {
-  return !Failed() && CountIf(test_part_results_, TestPartSkipped) > 0;
-}
-
-// Returns true if and only if the test failed.
-bool TestResult::Failed() const {
-  for (int i = 0; i < total_part_count(); ++i) {
-    if (GetTestPartResult(i).failed()) return true;
-  }
-  return false;
-}
-
-// Returns true if and only if the test part fatally failed.
-static bool TestPartFatallyFailed(const TestPartResult& result) {
-  return result.fatally_failed();
-}
-
-// Returns true if and only if the test fatally failed.
-bool TestResult::HasFatalFailure() const {
-  return CountIf(test_part_results_, TestPartFatallyFailed) > 0;
-}
-
-// Returns true if and only if the test part non-fatally failed.
-static bool TestPartNonfatallyFailed(const TestPartResult& result) {
-  return result.nonfatally_failed();
-}
-
-// Returns true if and only if the test has a non-fatal failure.
-bool TestResult::HasNonfatalFailure() const {
-  return CountIf(test_part_results_, TestPartNonfatallyFailed) > 0;
-}
-
-// Gets the number of all test parts.  This is the sum of the number
-// of successful test parts and the number of failed test parts.
-int TestResult::total_part_count() const {
-  return static_cast<int>(test_part_results_.size());
-}
-
-// Returns the number of the test properties.
-int TestResult::test_property_count() const {
-  return static_cast<int>(test_properties_.size());
-}
-
-// class Test
-
-// Creates a Test object.
-
-// The c'tor saves the states of all flags.
-Test::Test() : gtest_flag_saver_(new GTEST_FLAG_SAVER_) {}
-
-// The d'tor restores the states of all flags.  The actual work is
-// done by the d'tor of the gtest_flag_saver_ field, and thus not
-// visible here.
-Test::~Test() {}
-
-// Sets up the test fixture.
-//
-// A sub-class may override this.
-void Test::SetUp() {}
-
-// Tears down the test fixture.
-//
-// A sub-class may override this.
-void Test::TearDown() {}
-
-// Allows user supplied key value pairs to be recorded for later output.
-void Test::RecordProperty(const std::string& key, const std::string& value) {
-  UnitTest::GetInstance()->RecordProperty(key, value);
-}
-// We do not define a customary serialization except for integers,
-// but other values could be logged in this way.
-void Test::RecordProperty(const std::string& key, int64_t value) {
-  RecordProperty(key, (Message() << value).GetString());
-}
-
-namespace internal {
-
-void ReportFailureInUnknownLocation(TestPartResult::Type result_type,
-                                    const std::string& message) {
-  // This function is a friend of UnitTest and as such has access to
-  // AddTestPartResult.
-  UnitTest::GetInstance()->AddTestPartResult(
-      result_type,
-      nullptr,  // No info about the source file where the exception occurred.
-      -1,       // We have no info on which line caused the exception.
-      message,
-      "");  // No stack trace, either.
-}
-
-}  // namespace internal
-
-// Google Test requires all tests in the same test suite to use the same test
-// fixture class.  This function checks if the current test has the
-// same fixture class as the first test in the current test suite.  If
-// yes, it returns true; otherwise it generates a Google Test failure and
-// returns false.
-bool Test::HasSameFixtureClass() {
-  internal::UnitTestImpl* const impl = internal::GetUnitTestImpl();
-  const TestSuite* const test_suite = impl->current_test_suite();
-
-  // Info about the first test in the current test suite.
-  const TestInfo* const first_test_info = test_suite->test_info_list()[0];
-  const internal::TypeId first_fixture_id = first_test_info->fixture_class_id_;
-  const char* const first_test_name = first_test_info->name();
-
-  // Info about the current test.
-  const TestInfo* const this_test_info = impl->current_test_info();
-  const internal::TypeId this_fixture_id = this_test_info->fixture_class_id_;
-  const char* const this_test_name = this_test_info->name();
-
-  if (this_fixture_id != first_fixture_id) {
-    // Is the first test defined using TEST?
-    const bool first_is_TEST = first_fixture_id == internal::GetTestTypeId();
-    // Is this test defined using TEST?
-    const bool this_is_TEST = this_fixture_id == internal::GetTestTypeId();
-
-    if (first_is_TEST || this_is_TEST) {
-      // Both TEST and TEST_F appear in same test suite, which is incorrect.
-      // Tell the user how to fix this.
-
-      // Gets the name of the TEST and the name of the TEST_F.  Note
-      // that first_is_TEST and this_is_TEST cannot both be true, as
-      // the fixture IDs are different for the two tests.
-      const char* const TEST_name =
-          first_is_TEST ? first_test_name : this_test_name;
-      const char* const TEST_F_name =
-          first_is_TEST ? this_test_name : first_test_name;
-
-      ADD_FAILURE()
-          << "All tests in the same test suite must use the same test fixture\n"
-          << "class, so mixing TEST_F and TEST in the same test suite is\n"
-          << "illegal.  In test suite " << this_test_info->test_suite_name()
-          << ",\n"
-          << "test " << TEST_F_name << " is defined using TEST_F but\n"
-          << "test " << TEST_name << " is defined using TEST.  You probably\n"
-          << "want to change the TEST to TEST_F or move it to another test\n"
-          << "case.";
-    } else {
-      // Two fixture classes with the same name appear in two different
-      // namespaces, which is not allowed. Tell the user how to fix this.
-      ADD_FAILURE()
-          << "All tests in the same test suite must use the same test fixture\n"
-          << "class.  However, in test suite "
-          << this_test_info->test_suite_name() << ",\n"
-          << "you defined test " << first_test_name << " and test "
-          << this_test_name << "\n"
-          << "using two different test fixture classes.  This can happen if\n"
-          << "the two classes are from different namespaces or translation\n"
-          << "units and have the same name.  You should probably rename one\n"
-          << "of the classes to put the tests into different test suites.";
-    }
-    return false;
-  }
-
-  return true;
-}
-
-#if GTEST_HAS_SEH
-
-// Adds an "exception thrown" fatal failure to the current test.  This
-// function returns its result via an output parameter pointer because VC++
-// prohibits creation of objects with destructors on stack in functions
-// using __try (see error C2712).
-static std::string* FormatSehExceptionMessage(DWORD exception_code,
-                                              const char* location) {
-  Message message;
-  message << "SEH exception with code 0x" << std::setbase(16) << exception_code
-          << std::setbase(10) << " thrown in " << location << ".";
-
-  return new std::string(message.GetString());
-}
-
-#endif  // GTEST_HAS_SEH
-
-namespace internal {
-
-#if GTEST_HAS_EXCEPTIONS
-
-// Adds an "exception thrown" fatal failure to the current test.
-static std::string FormatCxxExceptionMessage(const char* description,
-                                             const char* location) {
-  Message message;
-  if (description != nullptr) {
-    message << "C++ exception with description \"" << description << "\"";
-  } else {
-    message << "Unknown C++ exception";
-  }
-  message << " thrown in " << location << ".";
-
-  return message.GetString();
-}
-
-static std::string PrintTestPartResultToString(
-    const TestPartResult& test_part_result);
-
-GoogleTestFailureException::GoogleTestFailureException(
-    const TestPartResult& failure)
-    : ::std::runtime_error(PrintTestPartResultToString(failure).c_str()) {}
-
-#endif  // GTEST_HAS_EXCEPTIONS
-
-// We put these helper functions in the internal namespace as IBM's xlC
-// compiler rejects the code if they were declared static.
-
-// Runs the given method and handles SEH exceptions it throws, when
-// SEH is supported; returns the 0-value for type Result in case of an
-// SEH exception.  (Microsoft compilers cannot handle SEH and C++
-// exceptions in the same function.  Therefore, we provide a separate
-// wrapper function for handling SEH exceptions.)
-template <class T, typename Result>
-Result HandleSehExceptionsInMethodIfSupported(T* object, Result (T::*method)(),
-                                              const char* location) {
-#if GTEST_HAS_SEH
-  __try {
-    return (object->*method)();
-  } __except (internal::UnitTestOptions::GTestShouldProcessSEH(  // NOLINT
-      GetExceptionCode())) {
-    // We create the exception message on the heap because VC++ prohibits
-    // creation of objects with destructors on stack in functions using __try
-    // (see error C2712).
-    std::string* exception_message =
-        FormatSehExceptionMessage(GetExceptionCode(), location);
-    internal::ReportFailureInUnknownLocation(TestPartResult::kFatalFailure,
-                                             *exception_message);
-    delete exception_message;
-    return static_cast<Result>(0);
-  }
-#else
-  (void)location;
-  return (object->*method)();
-#endif  // GTEST_HAS_SEH
-}
-
-// Runs the given method and catches and reports C++ and/or SEH-style
-// exceptions, if they are supported; returns the 0-value for type
-// Result in case of an SEH exception.
-template <class T, typename Result>
-Result HandleExceptionsInMethodIfSupported(T* object, Result (T::*method)(),
-                                           const char* location) {
-  // NOTE: The user code can affect the way in which Google Test handles
-  // exceptions by setting GTEST_FLAG(catch_exceptions), but only before
-  // RUN_ALL_TESTS() starts. It is technically possible to check the flag
-  // after the exception is caught and either report or re-throw the
-  // exception based on the flag's value:
-  //
-  // try {
-  //   // Perform the test method.
-  // } catch (...) {
-  //   if (GTEST_FLAG_GET(catch_exceptions))
-  //     // Report the exception as failure.
-  //   else
-  //     throw;  // Re-throws the original exception.
-  // }
-  //
-  // However, the purpose of this flag is to allow the program to drop into
-  // the debugger when the exception is thrown. On most platforms, once the
-  // control enters the catch block, the exception origin information is
-  // lost and the debugger will stop the program at the point of the
-  // re-throw in this function -- instead of at the point of the original
-  // throw statement in the code under test.  For this reason, we perform
-  // the check early, sacrificing the ability to affect Google Test's
-  // exception handling in the method where the exception is thrown.
-  if (internal::GetUnitTestImpl()->catch_exceptions()) {
-#if GTEST_HAS_EXCEPTIONS
-    try {
-      return HandleSehExceptionsInMethodIfSupported(object, method, location);
-    } catch (const AssertionException&) {  // NOLINT
-      // This failure was reported already.
-    } catch (const internal::GoogleTestFailureException&) {  // NOLINT
-      // This exception type can only be thrown by a failed Google
-      // Test assertion with the intention of letting another testing
-      // framework catch it.  Therefore we just re-throw it.
-      throw;
-    } catch (const std::exception& e) {  // NOLINT
-      internal::ReportFailureInUnknownLocation(
-          TestPartResult::kFatalFailure,
-          FormatCxxExceptionMessage(e.what(), location));
-    } catch (...) {  // NOLINT
-      internal::ReportFailureInUnknownLocation(
-          TestPartResult::kFatalFailure,
-          FormatCxxExceptionMessage(nullptr, location));
-    }
-    return static_cast<Result>(0);
-#else
-    return HandleSehExceptionsInMethodIfSupported(object, method, location);
-#endif  // GTEST_HAS_EXCEPTIONS
-  } else {
-    return (object->*method)();
-  }
-}
-
-}  // namespace internal
-
-// Runs the test and updates the test result.
-void Test::Run() {
-  if (!HasSameFixtureClass()) return;
-
-  internal::UnitTestImpl* const impl = internal::GetUnitTestImpl();
-  impl->os_stack_trace_getter()->UponLeavingGTest();
-  internal::HandleExceptionsInMethodIfSupported(this, &Test::SetUp, "SetUp()");
-  // We will run the test only if SetUp() was successful and didn't call
-  // GTEST_SKIP().
-  if (!HasFatalFailure() && !IsSkipped()) {
-    impl->os_stack_trace_getter()->UponLeavingGTest();
-    internal::HandleExceptionsInMethodIfSupported(this, &Test::TestBody,
-                                                  "the test body");
-  }
-
-  // However, we want to clean up as much as possible.  Hence we will
-  // always call TearDown(), even if SetUp() or the test body has
-  // failed.
-  impl->os_stack_trace_getter()->UponLeavingGTest();
-  internal::HandleExceptionsInMethodIfSupported(this, &Test::TearDown,
-                                                "TearDown()");
-}
-
-// Returns true if and only if the current test has a fatal failure.
-bool Test::HasFatalFailure() {
-  return internal::GetUnitTestImpl()->current_test_result()->HasFatalFailure();
-}
-
-// Returns true if and only if the current test has a non-fatal failure.
-bool Test::HasNonfatalFailure() {
-  return internal::GetUnitTestImpl()
-      ->current_test_result()
-      ->HasNonfatalFailure();
-}
-
-// Returns true if and only if the current test was skipped.
-bool Test::IsSkipped() {
-  return internal::GetUnitTestImpl()->current_test_result()->Skipped();
-}
-
-// class TestInfo
-
-// Constructs a TestInfo object. It assumes ownership of the test factory
-// object.
-TestInfo::TestInfo(const std::string& a_test_suite_name,
-                   const std::string& a_name, const char* a_type_param,
-                   const char* a_value_param,
-                   internal::CodeLocation a_code_location,
-                   internal::TypeId fixture_class_id,
-                   internal::TestFactoryBase* factory)
-    : test_suite_name_(a_test_suite_name),
-      // begin()/end() is MSVC 17.3.3 ASAN crash workaround (GitHub issue #3997)
-      name_(a_name.begin(), a_name.end()),
-      type_param_(a_type_param ? new std::string(a_type_param) : nullptr),
-      value_param_(a_value_param ? new std::string(a_value_param) : nullptr),
-      location_(a_code_location),
-      fixture_class_id_(fixture_class_id),
-      should_run_(false),
-      is_disabled_(false),
-      matches_filter_(false),
-      is_in_another_shard_(false),
-      factory_(factory),
-      result_() {}
-
-// Destructs a TestInfo object.
-TestInfo::~TestInfo() { delete factory_; }
-
-namespace internal {
-
-// Creates a new TestInfo object and registers it with Google Test;
-// returns the created object.
-//
-// Arguments:
-//
-//   test_suite_name:  name of the test suite
-//   name:             name of the test
-//   type_param:       the name of the test's type parameter, or NULL if
-//                     this is not a typed or a type-parameterized test.
-//   value_param:      text representation of the test's value parameter,
-//                     or NULL if this is not a value-parameterized test.
-//   code_location:    code location where the test is defined
-//   fixture_class_id: ID of the test fixture class
-//   set_up_tc:        pointer to the function that sets up the test suite
-//   tear_down_tc:     pointer to the function that tears down the test suite
-//   factory:          pointer to the factory that creates a test object.
-//                     The newly created TestInfo instance will assume
-//                     ownership of the factory object.
-TestInfo* MakeAndRegisterTestInfo(
-    const char* test_suite_name, const char* name, const char* type_param,
-    const char* value_param, CodeLocation code_location,
-    TypeId fixture_class_id, SetUpTestSuiteFunc set_up_tc,
-    TearDownTestSuiteFunc tear_down_tc, TestFactoryBase* factory) {
-  TestInfo* const test_info =
-      new TestInfo(test_suite_name, name, type_param, value_param,
-                   code_location, fixture_class_id, factory);
-  GetUnitTestImpl()->AddTestInfo(set_up_tc, tear_down_tc, test_info);
-  return test_info;
-}
-
-void ReportInvalidTestSuiteType(const char* test_suite_name,
-                                CodeLocation code_location) {
-  Message errors;
-  errors
-      << "Attempted redefinition of test suite " << test_suite_name << ".\n"
-      << "All tests in the same test suite must use the same test fixture\n"
-      << "class.  However, in test suite " << test_suite_name << ", you tried\n"
-      << "to define a test using a fixture class different from the one\n"
-      << "used earlier. This can happen if the two fixture classes are\n"
-      << "from different namespaces and have the same name. You should\n"
-      << "probably rename one of the classes to put the tests into different\n"
-      << "test suites.";
-
-  GTEST_LOG_(ERROR) << FormatFileLocation(code_location.file.c_str(),
-                                          code_location.line)
-                    << " " << errors.GetString();
-}
-
-// This method expands all parameterized tests registered with macros TEST_P
-// and INSTANTIATE_TEST_SUITE_P into regular tests and registers those.
-// This will be done just once during the program runtime.
-void UnitTestImpl::RegisterParameterizedTests() {
-  if (!parameterized_tests_registered_) {
-    parameterized_test_registry_.RegisterTests();
-    type_parameterized_test_registry_.CheckForInstantiations();
-    parameterized_tests_registered_ = true;
-  }
-}
-
-}  // namespace internal
-
-// Creates the test object, runs it, records its result, and then
-// deletes it.
-void TestInfo::Run() {
-  TestEventListener* repeater = UnitTest::GetInstance()->listeners().repeater();
-  if (!should_run_) {
-    if (is_disabled_ && matches_filter_) repeater->OnTestDisabled(*this);
-    return;
-  }
-
-  // Tells UnitTest where to store test result.
-  internal::UnitTestImpl* const impl = internal::GetUnitTestImpl();
-  impl->set_current_test_info(this);
-
-  // Notifies the unit test event listeners that a test is about to start.
-  repeater->OnTestStart(*this);
-  result_.set_start_timestamp(internal::GetTimeInMillis());
-  internal::Timer timer;
-  impl->os_stack_trace_getter()->UponLeavingGTest();
-
-  // Creates the test object.
-  Test* const test = internal::HandleExceptionsInMethodIfSupported(
-      factory_, &internal::TestFactoryBase::CreateTest,
-      "the test fixture's constructor");
-
-  // Runs the test if the constructor didn't generate a fatal failure or invoke
-  // GTEST_SKIP().
-  // Note that the object will not be null
-  if (!Test::HasFatalFailure() && !Test::IsSkipped()) {
-    // This doesn't throw as all user code that can throw are wrapped into
-    // exception handling code.
-    test->Run();
-  }
-
-  if (test != nullptr) {
-    // Deletes the test object.
-    impl->os_stack_trace_getter()->UponLeavingGTest();
-    internal::HandleExceptionsInMethodIfSupported(
-        test, &Test::DeleteSelf_, "the test fixture's destructor");
-  }
-
-  result_.set_elapsed_time(timer.Elapsed());
-
-  // Notifies the unit test event listener that a test has just finished.
-  repeater->OnTestEnd(*this);
-
-  // Tells UnitTest to stop associating assertion results to this
-  // test.
-  impl->set_current_test_info(nullptr);
-}
-
-// Skip and records a skipped test result for this object.
-void TestInfo::Skip() {
-  if (!should_run_) return;
-
-  internal::UnitTestImpl* const impl = internal::GetUnitTestImpl();
-  impl->set_current_test_info(this);
-
-  TestEventListener* repeater = UnitTest::GetInstance()->listeners().repeater();
-
-  // Notifies the unit test event listeners that a test is about to start.
-  repeater->OnTestStart(*this);
-
-  const TestPartResult test_part_result =
-      TestPartResult(TestPartResult::kSkip, this->file(), this->line(), "");
-  impl->GetTestPartResultReporterForCurrentThread()->ReportTestPartResult(
-      test_part_result);
-
-  // Notifies the unit test event listener that a test has just finished.
-  repeater->OnTestEnd(*this);
-  impl->set_current_test_info(nullptr);
-}
-
-// class TestSuite
-
-// Gets the number of successful tests in this test suite.
-int TestSuite::successful_test_count() const {
-  return CountIf(test_info_list_, TestPassed);
-}
-
-// Gets the number of successful tests in this test suite.
-int TestSuite::skipped_test_count() const {
-  return CountIf(test_info_list_, TestSkipped);
-}
-
-// Gets the number of failed tests in this test suite.
-int TestSuite::failed_test_count() const {
-  return CountIf(test_info_list_, TestFailed);
-}
-
-// Gets the number of disabled tests that will be reported in the XML report.
-int TestSuite::reportable_disabled_test_count() const {
-  return CountIf(test_info_list_, TestReportableDisabled);
-}
-
-// Gets the number of disabled tests in this test suite.
-int TestSuite::disabled_test_count() const {
-  return CountIf(test_info_list_, TestDisabled);
-}
-
-// Gets the number of tests to be printed in the XML report.
-int TestSuite::reportable_test_count() const {
-  return CountIf(test_info_list_, TestReportable);
-}
-
-// Get the number of tests in this test suite that should run.
-int TestSuite::test_to_run_count() const {
-  return CountIf(test_info_list_, ShouldRunTest);
-}
-
-// Gets the number of all tests.
-int TestSuite::total_test_count() const {
-  return static_cast<int>(test_info_list_.size());
-}
-
-// Creates a TestSuite with the given name.
-//
-// Arguments:
-//
-//   a_name:       name of the test suite
-//   a_type_param: the name of the test suite's type parameter, or NULL if
-//                 this is not a typed or a type-parameterized test suite.
-//   set_up_tc:    pointer to the function that sets up the test suite
-//   tear_down_tc: pointer to the function that tears down the test suite
-TestSuite::TestSuite(const char* a_name, const char* a_type_param,
-                     internal::SetUpTestSuiteFunc set_up_tc,
-                     internal::TearDownTestSuiteFunc tear_down_tc)
-    : name_(a_name),
-      type_param_(a_type_param ? new std::string(a_type_param) : nullptr),
-      set_up_tc_(set_up_tc),
-      tear_down_tc_(tear_down_tc),
-      should_run_(false),
-      start_timestamp_(0),
-      elapsed_time_(0) {}
-
-// Destructor of TestSuite.
-TestSuite::~TestSuite() {
-  // Deletes every Test in the collection.
-  ForEach(test_info_list_, internal::Delete<TestInfo>);
-}
-
-// Returns the i-th test among all the tests. i can range from 0 to
-// total_test_count() - 1. If i is not in that range, returns NULL.
-const TestInfo* TestSuite::GetTestInfo(int i) const {
-  const int index = GetElementOr(test_indices_, i, -1);
-  return index < 0 ? nullptr : test_info_list_[static_cast<size_t>(index)];
-}
-
-// Returns the i-th test among all the tests. i can range from 0 to
-// total_test_count() - 1. If i is not in that range, returns NULL.
-TestInfo* TestSuite::GetMutableTestInfo(int i) {
-  const int index = GetElementOr(test_indices_, i, -1);
-  return index < 0 ? nullptr : test_info_list_[static_cast<size_t>(index)];
-}
-
-// Adds a test to this test suite.  Will delete the test upon
-// destruction of the TestSuite object.
-void TestSuite::AddTestInfo(TestInfo* test_info) {
-  test_info_list_.push_back(test_info);
-  test_indices_.push_back(static_cast<int>(test_indices_.size()));
-}
-
-// Runs every test in this TestSuite.
-void TestSuite::Run() {
-  if (!should_run_) return;
-
-  internal::UnitTestImpl* const impl = internal::GetUnitTestImpl();
-  impl->set_current_test_suite(this);
-
-  TestEventListener* repeater = UnitTest::GetInstance()->listeners().repeater();
-
-  // Call both legacy and the new API
-  repeater->OnTestSuiteStart(*this);
-//  Legacy API is deprecated but still available
-#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
-  repeater->OnTestCaseStart(*this);
-#endif  //  GTEST_REMOVE_LEGACY_TEST_CASEAPI_
-
-  impl->os_stack_trace_getter()->UponLeavingGTest();
-  internal::HandleExceptionsInMethodIfSupported(
-      this, &TestSuite::RunSetUpTestSuite, "SetUpTestSuite()");
-
-  const bool skip_all = ad_hoc_test_result().Failed();
-
-  start_timestamp_ = internal::GetTimeInMillis();
-  internal::Timer timer;
-  for (int i = 0; i < total_test_count(); i++) {
-    if (skip_all) {
-      GetMutableTestInfo(i)->Skip();
-    } else {
-      GetMutableTestInfo(i)->Run();
-    }
-    if (GTEST_FLAG_GET(fail_fast) &&
-        GetMutableTestInfo(i)->result()->Failed()) {
-      for (int j = i + 1; j < total_test_count(); j++) {
-        GetMutableTestInfo(j)->Skip();
-      }
-      break;
-    }
-  }
-  elapsed_time_ = timer.Elapsed();
-
-  impl->os_stack_trace_getter()->UponLeavingGTest();
-  internal::HandleExceptionsInMethodIfSupported(
-      this, &TestSuite::RunTearDownTestSuite, "TearDownTestSuite()");
-
-  // Call both legacy and the new API
-  repeater->OnTestSuiteEnd(*this);
-//  Legacy API is deprecated but still available
-#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
-  repeater->OnTestCaseEnd(*this);
-#endif  //  GTEST_REMOVE_LEGACY_TEST_CASEAPI_
-
-  impl->set_current_test_suite(nullptr);
-}
-
-// Skips all tests under this TestSuite.
-void TestSuite::Skip() {
-  if (!should_run_) return;
-
-  internal::UnitTestImpl* const impl = internal::GetUnitTestImpl();
-  impl->set_current_test_suite(this);
-
-  TestEventListener* repeater = UnitTest::GetInstance()->listeners().repeater();
-
-  // Call both legacy and the new API
-  repeater->OnTestSuiteStart(*this);
-//  Legacy API is deprecated but still available
-#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
-  repeater->OnTestCaseStart(*this);
-#endif  //  GTEST_REMOVE_LEGACY_TEST_CASEAPI_
-
-  for (int i = 0; i < total_test_count(); i++) {
-    GetMutableTestInfo(i)->Skip();
-  }
-
-  // Call both legacy and the new API
-  repeater->OnTestSuiteEnd(*this);
-  // Legacy API is deprecated but still available
-#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
-  repeater->OnTestCaseEnd(*this);
-#endif  //  GTEST_REMOVE_LEGACY_TEST_CASEAPI_
-
-  impl->set_current_test_suite(nullptr);
-}
-
-// Clears the results of all tests in this test suite.
-void TestSuite::ClearResult() {
-  ad_hoc_test_result_.Clear();
-  ForEach(test_info_list_, TestInfo::ClearTestResult);
-}
-
-// Shuffles the tests in this test suite.
-void TestSuite::ShuffleTests(internal::Random* random) {
-  Shuffle(random, &test_indices_);
-}
-
-// Restores the test order to before the first shuffle.
-void TestSuite::UnshuffleTests() {
-  for (size_t i = 0; i < test_indices_.size(); i++) {
-    test_indices_[i] = static_cast<int>(i);
-  }
-}
-
-// Formats a countable noun.  Depending on its quantity, either the
-// singular form or the plural form is used. e.g.
-//
-// FormatCountableNoun(1, "formula", "formuli") returns "1 formula".
-// FormatCountableNoun(5, "book", "books") returns "5 books".
-static std::string FormatCountableNoun(int count, const char* singular_form,
-                                       const char* plural_form) {
-  return internal::StreamableToString(count) + " " +
-         (count == 1 ? singular_form : plural_form);
-}
-
-// Formats the count of tests.
-static std::string FormatTestCount(int test_count) {
-  return FormatCountableNoun(test_count, "test", "tests");
-}
-
-// Formats the count of test suites.
-static std::string FormatTestSuiteCount(int test_suite_count) {
-  return FormatCountableNoun(test_suite_count, "test suite", "test suites");
-}
-
-// Converts a TestPartResult::Type enum to human-friendly string
-// representation.  Both kNonFatalFailure and kFatalFailure are translated
-// to "Failure", as the user usually doesn't care about the difference
-// between the two when viewing the test result.
-static const char* TestPartResultTypeToString(TestPartResult::Type type) {
-  switch (type) {
-    case TestPartResult::kSkip:
-      return "Skipped\n";
-    case TestPartResult::kSuccess:
-      return "Success";
-
-    case TestPartResult::kNonFatalFailure:
-    case TestPartResult::kFatalFailure:
-#ifdef _MSC_VER
-      return "error: ";
-#else
-      return "Failure\n";
-#endif
-    default:
-      return "Unknown result type";
-  }
-}
-
-namespace internal {
-namespace {
-enum class GTestColor { kDefault, kRed, kGreen, kYellow };
-}  // namespace
-
-// Prints a TestPartResult to an std::string.
-static std::string PrintTestPartResultToString(
-    const TestPartResult& test_part_result) {
-  return (Message() << internal::FormatFileLocation(
-                           test_part_result.file_name(),
-                           test_part_result.line_number())
-                    << " "
-                    << TestPartResultTypeToString(test_part_result.type())
-                    << test_part_result.message())
-      .GetString();
-}
-
-// Prints a TestPartResult.
-static void PrintTestPartResult(const TestPartResult& test_part_result) {
-  const std::string& result = PrintTestPartResultToString(test_part_result);
-  printf("%s\n", result.c_str());
-  fflush(stdout);
-  // If the test program runs in Visual Studio or a debugger, the
-  // following statements add the test part result message to the Output
-  // window such that the user can double-click on it to jump to the
-  // corresponding source code location; otherwise they do nothing.
-#if GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_MOBILE
-  // We don't call OutputDebugString*() on Windows Mobile, as printing
-  // to stdout is done by OutputDebugString() there already - we don't
-  // want the same message printed twice.
-  ::OutputDebugStringA(result.c_str());
-  ::OutputDebugStringA("\n");
-#endif
-}
-
-// class PrettyUnitTestResultPrinter
-#if GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_MOBILE && !GTEST_OS_WINDOWS_PHONE && \
-    !GTEST_OS_WINDOWS_RT && !GTEST_OS_WINDOWS_MINGW
-
-// Returns the character attribute for the given color.
-static WORD GetColorAttribute(GTestColor color) {
-  switch (color) {
-    case GTestColor::kRed:
-      return FOREGROUND_RED;
-    case GTestColor::kGreen:
-      return FOREGROUND_GREEN;
-    case GTestColor::kYellow:
-      return FOREGROUND_RED | FOREGROUND_GREEN;
-    default:
-      return 0;
-  }
-}
-
-static int GetBitOffset(WORD color_mask) {
-  if (color_mask == 0) return 0;
-
-  int bitOffset = 0;
-  while ((color_mask & 1) == 0) {
-    color_mask >>= 1;
-    ++bitOffset;
-  }
-  return bitOffset;
-}
-
-static WORD GetNewColor(GTestColor color, WORD old_color_attrs) {
-  // Let's reuse the BG
-  static const WORD background_mask = BACKGROUND_BLUE | BACKGROUND_GREEN |
-                                      BACKGROUND_RED | BACKGROUND_INTENSITY;
-  static const WORD foreground_mask = FOREGROUND_BLUE | FOREGROUND_GREEN |
-                                      FOREGROUND_RED | FOREGROUND_INTENSITY;
-  const WORD existing_bg = old_color_attrs & background_mask;
-
-  WORD new_color =
-      GetColorAttribute(color) | existing_bg | FOREGROUND_INTENSITY;
-  static const int bg_bitOffset = GetBitOffset(background_mask);
-  static const int fg_bitOffset = GetBitOffset(foreground_mask);
-
-  if (((new_color & background_mask) >> bg_bitOffset) ==
-      ((new_color & foreground_mask) >> fg_bitOffset)) {
-    new_color ^= FOREGROUND_INTENSITY;  // invert intensity
-  }
-  return new_color;
-}
-
-#else
-
-// Returns the ANSI color code for the given color. GTestColor::kDefault is
-// an invalid input.
-static const char* GetAnsiColorCode(GTestColor color) {
-  switch (color) {
-    case GTestColor::kRed:
-      return "1";
-    case GTestColor::kGreen:
-      return "2";
-    case GTestColor::kYellow:
-      return "3";
-    default:
-      return nullptr;
-  }
-}
-
-#endif  // GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_MOBILE
-
-// Returns true if and only if Google Test should use colors in the output.
-bool ShouldUseColor(bool stdout_is_tty) {
-  std::string c = GTEST_FLAG_GET(color);
-  const char* const gtest_color = c.c_str();
-
-  if (String::CaseInsensitiveCStringEquals(gtest_color, "auto")) {
-#if GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_MINGW
-    // On Windows the TERM variable is usually not set, but the
-    // console there does support colors.
-    return stdout_is_tty;
-#else
-    // On non-Windows platforms, we rely on the TERM variable.
-    const char* const term = posix::GetEnv("TERM");
-    const bool term_supports_color =
-        term != nullptr && (String::CStringEquals(term, "xterm") ||
-                            String::CStringEquals(term, "xterm-color") ||
-                            String::CStringEquals(term, "xterm-kitty") ||
-                            String::CStringEquals(term, "screen") ||
-                            String::CStringEquals(term, "tmux") ||
-                            String::CStringEquals(term, "rxvt-unicode") ||
-                            String::CStringEquals(term, "linux") ||
-                            String::CStringEquals(term, "cygwin") ||
-                            String::EndsWithCaseInsensitive(term, "-256color"));
-    return stdout_is_tty && term_supports_color;
-#endif  // GTEST_OS_WINDOWS
-  }
-
-  return String::CaseInsensitiveCStringEquals(gtest_color, "yes") ||
-         String::CaseInsensitiveCStringEquals(gtest_color, "true") ||
-         String::CaseInsensitiveCStringEquals(gtest_color, "t") ||
-         String::CStringEquals(gtest_color, "1");
-  // We take "yes", "true", "t", and "1" as meaning "yes".  If the
-  // value is neither one of these nor "auto", we treat it as "no" to
-  // be conservative.
-}
-
-// Helpers for printing colored strings to stdout. Note that on Windows, we
-// cannot simply emit special characters and have the terminal change colors.
-// This routine must actually emit the characters rather than return a string
-// that would be colored when printed, as can be done on Linux.
-
-GTEST_ATTRIBUTE_PRINTF_(2, 3)
-static void ColoredPrintf(GTestColor color, const char* fmt, ...) {
-  va_list args;
-  va_start(args, fmt);
-
-  static const bool in_color_mode =
-#if GTEST_HAS_FILE_SYSTEM
-      ShouldUseColor(posix::IsATTY(posix::FileNo(stdout)) != 0);
-#else
-      false;
-#endif  // GTEST_HAS_FILE_SYSTEM
-
-  const bool use_color = in_color_mode && (color != GTestColor::kDefault);
-
-  if (!use_color) {
-    vprintf(fmt, args);
-    va_end(args);
-    return;
-  }
-
-#if GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_MOBILE && !GTEST_OS_WINDOWS_PHONE && \
-    !GTEST_OS_WINDOWS_RT && !GTEST_OS_WINDOWS_MINGW
-  const HANDLE stdout_handle = GetStdHandle(STD_OUTPUT_HANDLE);
-
-  // Gets the current text color.
-  CONSOLE_SCREEN_BUFFER_INFO buffer_info;
-  GetConsoleScreenBufferInfo(stdout_handle, &buffer_info);
-  const WORD old_color_attrs = buffer_info.wAttributes;
-  const WORD new_color = GetNewColor(color, old_color_attrs);
-
-  // We need to flush the stream buffers into the console before each
-  // SetConsoleTextAttribute call lest it affect the text that is already
-  // printed but has not yet reached the console.
-  fflush(stdout);
-  SetConsoleTextAttribute(stdout_handle, new_color);
-
-  vprintf(fmt, args);
-
-  fflush(stdout);
-  // Restores the text color.
-  SetConsoleTextAttribute(stdout_handle, old_color_attrs);
-#else
-  printf("\033[0;3%sm", GetAnsiColorCode(color));
-  vprintf(fmt, args);
-  printf("\033[m");  // Resets the terminal to default.
-#endif  // GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_MOBILE
-  va_end(args);
-}
-
-// Text printed in Google Test's text output and --gtest_list_tests
-// output to label the type parameter and value parameter for a test.
-static const char kTypeParamLabel[] = "TypeParam";
-static const char kValueParamLabel[] = "GetParam()";
-
-static void PrintFullTestCommentIfPresent(const TestInfo& test_info) {
-  const char* const type_param = test_info.type_param();
-  const char* const value_param = test_info.value_param();
-
-  if (type_param != nullptr || value_param != nullptr) {
-    printf(", where ");
-    if (type_param != nullptr) {
-      printf("%s = %s", kTypeParamLabel, type_param);
-      if (value_param != nullptr) printf(" and ");
-    }
-    if (value_param != nullptr) {
-      printf("%s = %s", kValueParamLabel, value_param);
-    }
-  }
-}
-
-// This class implements the TestEventListener interface.
-//
-// Class PrettyUnitTestResultPrinter is copyable.
-class PrettyUnitTestResultPrinter : public TestEventListener {
- public:
-  PrettyUnitTestResultPrinter() {}
-  static void PrintTestName(const char* test_suite, const char* test) {
-    printf("%s.%s", test_suite, test);
-  }
-
-  // The following methods override what's in the TestEventListener class.
-  void OnTestProgramStart(const UnitTest& /*unit_test*/) override {}
-  void OnTestIterationStart(const UnitTest& unit_test, int iteration) override;
-  void OnEnvironmentsSetUpStart(const UnitTest& unit_test) override;
-  void OnEnvironmentsSetUpEnd(const UnitTest& /*unit_test*/) override {}
-#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
-  void OnTestCaseStart(const TestCase& test_case) override;
-#else
-  void OnTestSuiteStart(const TestSuite& test_suite) override;
-#endif  // OnTestCaseStart
-
-  void OnTestStart(const TestInfo& test_info) override;
-  void OnTestDisabled(const TestInfo& test_info) override;
-
-  void OnTestPartResult(const TestPartResult& result) override;
-  void OnTestEnd(const TestInfo& test_info) override;
-#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
-  void OnTestCaseEnd(const TestCase& test_case) override;
-#else
-  void OnTestSuiteEnd(const TestSuite& test_suite) override;
-#endif  // GTEST_REMOVE_LEGACY_TEST_CASEAPI_
-
-  void OnEnvironmentsTearDownStart(const UnitTest& unit_test) override;
-  void OnEnvironmentsTearDownEnd(const UnitTest& /*unit_test*/) override {}
-  void OnTestIterationEnd(const UnitTest& unit_test, int iteration) override;
-  void OnTestProgramEnd(const UnitTest& /*unit_test*/) override {}
-
- private:
-  static void PrintFailedTests(const UnitTest& unit_test);
-  static void PrintFailedTestSuites(const UnitTest& unit_test);
-  static void PrintSkippedTests(const UnitTest& unit_test);
-};
-
-// Fired before each iteration of tests starts.
-void PrettyUnitTestResultPrinter::OnTestIterationStart(
-    const UnitTest& unit_test, int iteration) {
-  if (GTEST_FLAG_GET(repeat) != 1)
-    printf("\nRepeating all tests (iteration %d) . . .\n\n", iteration + 1);
-
-  std::string f = GTEST_FLAG_GET(filter);
-  const char* const filter = f.c_str();
-
-  // Prints the filter if it's not *.  This reminds the user that some
-  // tests may be skipped.
-  if (!String::CStringEquals(filter, kUniversalFilter)) {
-    ColoredPrintf(GTestColor::kYellow, "Note: %s filter = %s\n", GTEST_NAME_,
-                  filter);
-  }
-
-  if (internal::ShouldShard(kTestTotalShards, kTestShardIndex, false)) {
-    const int32_t shard_index = Int32FromEnvOrDie(kTestShardIndex, -1);
-    ColoredPrintf(GTestColor::kYellow, "Note: This is test shard %d of %s.\n",
-                  static_cast<int>(shard_index) + 1,
-                  internal::posix::GetEnv(kTestTotalShards));
-  }
-
-  if (GTEST_FLAG_GET(shuffle)) {
-    ColoredPrintf(GTestColor::kYellow,
-                  "Note: Randomizing tests' orders with a seed of %d .\n",
-                  unit_test.random_seed());
-  }
-
-  ColoredPrintf(GTestColor::kGreen, "[==========] ");
-  printf("Running %s from %s.\n",
-         FormatTestCount(unit_test.test_to_run_count()).c_str(),
-         FormatTestSuiteCount(unit_test.test_suite_to_run_count()).c_str());
-  fflush(stdout);
-}
-
-void PrettyUnitTestResultPrinter::OnEnvironmentsSetUpStart(
-    const UnitTest& /*unit_test*/) {
-  ColoredPrintf(GTestColor::kGreen, "[----------] ");
-  printf("Global test environment set-up.\n");
-  fflush(stdout);
-}
-
-#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
-void PrettyUnitTestResultPrinter::OnTestCaseStart(const TestCase& test_case) {
-  const std::string counts =
-      FormatCountableNoun(test_case.test_to_run_count(), "test", "tests");
-  ColoredPrintf(GTestColor::kGreen, "[----------] ");
-  printf("%s from %s", counts.c_str(), test_case.name());
-  if (test_case.type_param() == nullptr) {
-    printf("\n");
-  } else {
-    printf(", where %s = %s\n", kTypeParamLabel, test_case.type_param());
-  }
-  fflush(stdout);
-}
-#else
-void PrettyUnitTestResultPrinter::OnTestSuiteStart(
-    const TestSuite& test_suite) {
-  const std::string counts =
-      FormatCountableNoun(test_suite.test_to_run_count(), "test", "tests");
-  ColoredPrintf(GTestColor::kGreen, "[----------] ");
-  printf("%s from %s", counts.c_str(), test_suite.name());
-  if (test_suite.type_param() == nullptr) {
-    printf("\n");
-  } else {
-    printf(", where %s = %s\n", kTypeParamLabel, test_suite.type_param());
-  }
-  fflush(stdout);
-}
-#endif  // GTEST_REMOVE_LEGACY_TEST_CASEAPI_
-
-void PrettyUnitTestResultPrinter::OnTestStart(const TestInfo& test_info) {
-  ColoredPrintf(GTestColor::kGreen, "[ RUN      ] ");
-  PrintTestName(test_info.test_suite_name(), test_info.name());
-  printf("\n");
-  fflush(stdout);
-}
-
-void PrettyUnitTestResultPrinter::OnTestDisabled(const TestInfo& test_info) {
-  ColoredPrintf(GTestColor::kYellow, "[ DISABLED ] ");
-  PrintTestName(test_info.test_suite_name(), test_info.name());
-  printf("\n");
-  fflush(stdout);
-}
-
-// Called after an assertion failure.
-void PrettyUnitTestResultPrinter::OnTestPartResult(
-    const TestPartResult& result) {
-  switch (result.type()) {
-    // If the test part succeeded, we don't need to do anything.
-    case TestPartResult::kSuccess:
-      return;
-    default:
-      // Print failure message from the assertion
-      // (e.g. expected this and got that).
-      PrintTestPartResult(result);
-      fflush(stdout);
-  }
-}
-
-void PrettyUnitTestResultPrinter::OnTestEnd(const TestInfo& test_info) {
-  if (test_info.result()->Passed()) {
-    ColoredPrintf(GTestColor::kGreen, "[       OK ] ");
-  } else if (test_info.result()->Skipped()) {
-    ColoredPrintf(GTestColor::kGreen, "[  SKIPPED ] ");
-  } else {
-    ColoredPrintf(GTestColor::kRed, "[  FAILED  ] ");
-  }
-  PrintTestName(test_info.test_suite_name(), test_info.name());
-  if (test_info.result()->Failed()) PrintFullTestCommentIfPresent(test_info);
-
-  if (GTEST_FLAG_GET(print_time)) {
-    printf(" (%s ms)\n",
-           internal::StreamableToString(test_info.result()->elapsed_time())
-               .c_str());
-  } else {
-    printf("\n");
-  }
-  fflush(stdout);
-}
-
-#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
-void PrettyUnitTestResultPrinter::OnTestCaseEnd(const TestCase& test_case) {
-  if (!GTEST_FLAG_GET(print_time)) return;
-
-  const std::string counts =
-      FormatCountableNoun(test_case.test_to_run_count(), "test", "tests");
-  ColoredPrintf(GTestColor::kGreen, "[----------] ");
-  printf("%s from %s (%s ms total)\n\n", counts.c_str(), test_case.name(),
-         internal::StreamableToString(test_case.elapsed_time()).c_str());
-  fflush(stdout);
-}
-#else
-void PrettyUnitTestResultPrinter::OnTestSuiteEnd(const TestSuite& test_suite) {
-  if (!GTEST_FLAG_GET(print_time)) return;
-
-  const std::string counts =
-      FormatCountableNoun(test_suite.test_to_run_count(), "test", "tests");
-  ColoredPrintf(GTestColor::kGreen, "[----------] ");
-  printf("%s from %s (%s ms total)\n\n", counts.c_str(), test_suite.name(),
-         internal::StreamableToString(test_suite.elapsed_time()).c_str());
-  fflush(stdout);
-}
-#endif  // GTEST_REMOVE_LEGACY_TEST_CASEAPI_
-
-void PrettyUnitTestResultPrinter::OnEnvironmentsTearDownStart(
-    const UnitTest& /*unit_test*/) {
-  ColoredPrintf(GTestColor::kGreen, "[----------] ");
-  printf("Global test environment tear-down\n");
-  fflush(stdout);
-}
-
-// Internal helper for printing the list of failed tests.
-void PrettyUnitTestResultPrinter::PrintFailedTests(const UnitTest& unit_test) {
-  const int failed_test_count = unit_test.failed_test_count();
-  ColoredPrintf(GTestColor::kRed, "[  FAILED  ] ");
-  printf("%s, listed below:\n", FormatTestCount(failed_test_count).c_str());
-
-  for (int i = 0; i < unit_test.total_test_suite_count(); ++i) {
-    const TestSuite& test_suite = *unit_test.GetTestSuite(i);
-    if (!test_suite.should_run() || (test_suite.failed_test_count() == 0)) {
-      continue;
-    }
-    for (int j = 0; j < test_suite.total_test_count(); ++j) {
-      const TestInfo& test_info = *test_suite.GetTestInfo(j);
-      if (!test_info.should_run() || !test_info.result()->Failed()) {
-        continue;
-      }
-      ColoredPrintf(GTestColor::kRed, "[  FAILED  ] ");
-      printf("%s.%s", test_suite.name(), test_info.name());
-      PrintFullTestCommentIfPresent(test_info);
-      printf("\n");
-    }
-  }
-  printf("\n%2d FAILED %s\n", failed_test_count,
-         failed_test_count == 1 ? "TEST" : "TESTS");
-}
-
-// Internal helper for printing the list of test suite failures not covered by
-// PrintFailedTests.
-void PrettyUnitTestResultPrinter::PrintFailedTestSuites(
-    const UnitTest& unit_test) {
-  int suite_failure_count = 0;
-  for (int i = 0; i < unit_test.total_test_suite_count(); ++i) {
-    const TestSuite& test_suite = *unit_test.GetTestSuite(i);
-    if (!test_suite.should_run()) {
-      continue;
-    }
-    if (test_suite.ad_hoc_test_result().Failed()) {
-      ColoredPrintf(GTestColor::kRed, "[  FAILED  ] ");
-      printf("%s: SetUpTestSuite or TearDownTestSuite\n", test_suite.name());
-      ++suite_failure_count;
-    }
-  }
-  if (suite_failure_count > 0) {
-    printf("\n%2d FAILED TEST %s\n", suite_failure_count,
-           suite_failure_count == 1 ? "SUITE" : "SUITES");
-  }
-}
-
-// Internal helper for printing the list of skipped tests.
-void PrettyUnitTestResultPrinter::PrintSkippedTests(const UnitTest& unit_test) {
-  const int skipped_test_count = unit_test.skipped_test_count();
-  if (skipped_test_count == 0) {
-    return;
-  }
-
-  for (int i = 0; i < unit_test.total_test_suite_count(); ++i) {
-    const TestSuite& test_suite = *unit_test.GetTestSuite(i);
-    if (!test_suite.should_run() || (test_suite.skipped_test_count() == 0)) {
-      continue;
-    }
-    for (int j = 0; j < test_suite.total_test_count(); ++j) {
-      const TestInfo& test_info = *test_suite.GetTestInfo(j);
-      if (!test_info.should_run() || !test_info.result()->Skipped()) {
-        continue;
-      }
-      ColoredPrintf(GTestColor::kGreen, "[  SKIPPED ] ");
-      printf("%s.%s", test_suite.name(), test_info.name());
-      printf("\n");
-    }
-  }
-}
-
-void PrettyUnitTestResultPrinter::OnTestIterationEnd(const UnitTest& unit_test,
-                                                     int /*iteration*/) {
-  ColoredPrintf(GTestColor::kGreen, "[==========] ");
-  printf("%s from %s ran.",
-         FormatTestCount(unit_test.test_to_run_count()).c_str(),
-         FormatTestSuiteCount(unit_test.test_suite_to_run_count()).c_str());
-  if (GTEST_FLAG_GET(print_time)) {
-    printf(" (%s ms total)",
-           internal::StreamableToString(unit_test.elapsed_time()).c_str());
-  }
-  printf("\n");
-  ColoredPrintf(GTestColor::kGreen, "[  PASSED  ] ");
-  printf("%s.\n", FormatTestCount(unit_test.successful_test_count()).c_str());
-
-  const int skipped_test_count = unit_test.skipped_test_count();
-  if (skipped_test_count > 0) {
-    ColoredPrintf(GTestColor::kGreen, "[  SKIPPED ] ");
-    printf("%s, listed below:\n", FormatTestCount(skipped_test_count).c_str());
-    PrintSkippedTests(unit_test);
-  }
-
-  if (!unit_test.Passed()) {
-    PrintFailedTests(unit_test);
-    PrintFailedTestSuites(unit_test);
-  }
-
-  int num_disabled = unit_test.reportable_disabled_test_count();
-  if (num_disabled && !GTEST_FLAG_GET(also_run_disabled_tests)) {
-    if (unit_test.Passed()) {
-      printf("\n");  // Add a spacer if no FAILURE banner is displayed.
-    }
-    ColoredPrintf(GTestColor::kYellow, "  YOU HAVE %d DISABLED %s\n\n",
-                  num_disabled, num_disabled == 1 ? "TEST" : "TESTS");
-  }
-  // Ensure that Google Test output is printed before, e.g., heapchecker output.
-  fflush(stdout);
-}
-
-// End PrettyUnitTestResultPrinter
-
-// This class implements the TestEventListener interface.
-//
-// Class BriefUnitTestResultPrinter is copyable.
-class BriefUnitTestResultPrinter : public TestEventListener {
- public:
-  BriefUnitTestResultPrinter() {}
-  static void PrintTestName(const char* test_suite, const char* test) {
-    printf("%s.%s", test_suite, test);
-  }
-
-  // The following methods override what's in the TestEventListener class.
-  void OnTestProgramStart(const UnitTest& /*unit_test*/) override {}
-  void OnTestIterationStart(const UnitTest& /*unit_test*/,
-                            int /*iteration*/) override {}
-  void OnEnvironmentsSetUpStart(const UnitTest& /*unit_test*/) override {}
-  void OnEnvironmentsSetUpEnd(const UnitTest& /*unit_test*/) override {}
-#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
-  void OnTestCaseStart(const TestCase& /*test_case*/) override {}
-#else
-  void OnTestSuiteStart(const TestSuite& /*test_suite*/) override {}
-#endif  // OnTestCaseStart
-
-  void OnTestStart(const TestInfo& /*test_info*/) override {}
-  void OnTestDisabled(const TestInfo& /*test_info*/) override {}
-
-  void OnTestPartResult(const TestPartResult& result) override;
-  void OnTestEnd(const TestInfo& test_info) override;
-#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
-  void OnTestCaseEnd(const TestCase& /*test_case*/) override {}
-#else
-  void OnTestSuiteEnd(const TestSuite& /*test_suite*/) override {}
-#endif  // GTEST_REMOVE_LEGACY_TEST_CASEAPI_
-
-  void OnEnvironmentsTearDownStart(const UnitTest& /*unit_test*/) override {}
-  void OnEnvironmentsTearDownEnd(const UnitTest& /*unit_test*/) override {}
-  void OnTestIterationEnd(const UnitTest& unit_test, int iteration) override;
-  void OnTestProgramEnd(const UnitTest& /*unit_test*/) override {}
-};
-
-// Called after an assertion failure.
-void BriefUnitTestResultPrinter::OnTestPartResult(
-    const TestPartResult& result) {
-  switch (result.type()) {
-    // If the test part succeeded, we don't need to do anything.
-    case TestPartResult::kSuccess:
-      return;
-    default:
-      // Print failure message from the assertion
-      // (e.g. expected this and got that).
-      PrintTestPartResult(result);
-      fflush(stdout);
-  }
-}
-
-void BriefUnitTestResultPrinter::OnTestEnd(const TestInfo& test_info) {
-  if (test_info.result()->Failed()) {
-    ColoredPrintf(GTestColor::kRed, "[  FAILED  ] ");
-    PrintTestName(test_info.test_suite_name(), test_info.name());
-    PrintFullTestCommentIfPresent(test_info);
-
-    if (GTEST_FLAG_GET(print_time)) {
-      printf(" (%s ms)\n",
-             internal::StreamableToString(test_info.result()->elapsed_time())
-                 .c_str());
-    } else {
-      printf("\n");
-    }
-    fflush(stdout);
-  }
-}
-
-void BriefUnitTestResultPrinter::OnTestIterationEnd(const UnitTest& unit_test,
-                                                    int /*iteration*/) {
-  ColoredPrintf(GTestColor::kGreen, "[==========] ");
-  printf("%s from %s ran.",
-         FormatTestCount(unit_test.test_to_run_count()).c_str(),
-         FormatTestSuiteCount(unit_test.test_suite_to_run_count()).c_str());
-  if (GTEST_FLAG_GET(print_time)) {
-    printf(" (%s ms total)",
-           internal::StreamableToString(unit_test.elapsed_time()).c_str());
-  }
-  printf("\n");
-  ColoredPrintf(GTestColor::kGreen, "[  PASSED  ] ");
-  printf("%s.\n", FormatTestCount(unit_test.successful_test_count()).c_str());
-
-  const int skipped_test_count = unit_test.skipped_test_count();
-  if (skipped_test_count > 0) {
-    ColoredPrintf(GTestColor::kGreen, "[  SKIPPED ] ");
-    printf("%s.\n", FormatTestCount(skipped_test_count).c_str());
-  }
-
-  int num_disabled = unit_test.reportable_disabled_test_count();
-  if (num_disabled && !GTEST_FLAG_GET(also_run_disabled_tests)) {
-    if (unit_test.Passed()) {
-      printf("\n");  // Add a spacer if no FAILURE banner is displayed.
-    }
-    ColoredPrintf(GTestColor::kYellow, "  YOU HAVE %d DISABLED %s\n\n",
-                  num_disabled, num_disabled == 1 ? "TEST" : "TESTS");
-  }
-  // Ensure that Google Test output is printed before, e.g., heapchecker output.
-  fflush(stdout);
-}
-
-// End BriefUnitTestResultPrinter
-
-// class TestEventRepeater
-//
-// This class forwards events to other event listeners.
-class TestEventRepeater : public TestEventListener {
- public:
-  TestEventRepeater() : forwarding_enabled_(true) {}
-  ~TestEventRepeater() override;
-  void Append(TestEventListener* listener);
-  TestEventListener* Release(TestEventListener* listener);
-
-  // Controls whether events will be forwarded to listeners_. Set to false
-  // in death test child processes.
-  bool forwarding_enabled() const { return forwarding_enabled_; }
-  void set_forwarding_enabled(bool enable) { forwarding_enabled_ = enable; }
-
-  void OnTestProgramStart(const UnitTest& unit_test) override;
-  void OnTestIterationStart(const UnitTest& unit_test, int iteration) override;
-  void OnEnvironmentsSetUpStart(const UnitTest& unit_test) override;
-  void OnEnvironmentsSetUpEnd(const UnitTest& unit_test) override;
-//  Legacy API is deprecated but still available
-#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
-  void OnTestCaseStart(const TestSuite& parameter) override;
-#endif  //  GTEST_REMOVE_LEGACY_TEST_CASEAPI_
-  void OnTestSuiteStart(const TestSuite& parameter) override;
-  void OnTestStart(const TestInfo& test_info) override;
-  void OnTestDisabled(const TestInfo& test_info) override;
-  void OnTestPartResult(const TestPartResult& result) override;
-  void OnTestEnd(const TestInfo& test_info) override;
-//  Legacy API is deprecated but still available
-#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
-  void OnTestCaseEnd(const TestCase& parameter) override;
-#endif  //  GTEST_REMOVE_LEGACY_TEST_CASEAPI_
-  void OnTestSuiteEnd(const TestSuite& parameter) override;
-  void OnEnvironmentsTearDownStart(const UnitTest& unit_test) override;
-  void OnEnvironmentsTearDownEnd(const UnitTest& unit_test) override;
-  void OnTestIterationEnd(const UnitTest& unit_test, int iteration) override;
-  void OnTestProgramEnd(const UnitTest& unit_test) override;
-
- private:
-  // Controls whether events will be forwarded to listeners_. Set to false
-  // in death test child processes.
-  bool forwarding_enabled_;
-  // The list of listeners that receive events.
-  std::vector<TestEventListener*> listeners_;
-
-  TestEventRepeater(const TestEventRepeater&) = delete;
-  TestEventRepeater& operator=(const TestEventRepeater&) = delete;
-};
-
-TestEventRepeater::~TestEventRepeater() {
-  ForEach(listeners_, Delete<TestEventListener>);
-}
-
-void TestEventRepeater::Append(TestEventListener* listener) {
-  listeners_.push_back(listener);
-}
-
-TestEventListener* TestEventRepeater::Release(TestEventListener* listener) {
-  for (size_t i = 0; i < listeners_.size(); ++i) {
-    if (listeners_[i] == listener) {
-      listeners_.erase(listeners_.begin() + static_cast<int>(i));
-      return listener;
-    }
-  }
-
-  return nullptr;
-}
-
-// Since most methods are very similar, use macros to reduce boilerplate.
-// This defines a member that forwards the call to all listeners.
-#define GTEST_REPEATER_METHOD_(Name, Type)              \
-  void TestEventRepeater::Name(const Type& parameter) { \
-    if (forwarding_enabled_) {                          \
-      for (size_t i = 0; i < listeners_.size(); i++) {  \
-        listeners_[i]->Name(parameter);                 \
-      }                                                 \
-    }                                                   \
-  }
-// This defines a member that forwards the call to all listeners in reverse
-// order.
-#define GTEST_REVERSE_REPEATER_METHOD_(Name, Type)      \
-  void TestEventRepeater::Name(const Type& parameter) { \
-    if (forwarding_enabled_) {                          \
-      for (size_t i = listeners_.size(); i != 0; i--) { \
-        listeners_[i - 1]->Name(parameter);             \
-      }                                                 \
-    }                                                   \
-  }
-
-GTEST_REPEATER_METHOD_(OnTestProgramStart, UnitTest)
-GTEST_REPEATER_METHOD_(OnEnvironmentsSetUpStart, UnitTest)
-//  Legacy API is deprecated but still available
-#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
-GTEST_REPEATER_METHOD_(OnTestCaseStart, TestSuite)
-#endif  //  GTEST_REMOVE_LEGACY_TEST_CASEAPI_
-GTEST_REPEATER_METHOD_(OnTestSuiteStart, TestSuite)
-GTEST_REPEATER_METHOD_(OnTestStart, TestInfo)
-GTEST_REPEATER_METHOD_(OnTestDisabled, TestInfo)
-GTEST_REPEATER_METHOD_(OnTestPartResult, TestPartResult)
-GTEST_REPEATER_METHOD_(OnEnvironmentsTearDownStart, UnitTest)
-GTEST_REVERSE_REPEATER_METHOD_(OnEnvironmentsSetUpEnd, UnitTest)
-GTEST_REVERSE_REPEATER_METHOD_(OnEnvironmentsTearDownEnd, UnitTest)
-GTEST_REVERSE_REPEATER_METHOD_(OnTestEnd, TestInfo)
-//  Legacy API is deprecated but still available
-#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
-GTEST_REVERSE_REPEATER_METHOD_(OnTestCaseEnd, TestSuite)
-#endif  //  GTEST_REMOVE_LEGACY_TEST_CASEAPI_
-GTEST_REVERSE_REPEATER_METHOD_(OnTestSuiteEnd, TestSuite)
-GTEST_REVERSE_REPEATER_METHOD_(OnTestProgramEnd, UnitTest)
-
-#undef GTEST_REPEATER_METHOD_
-#undef GTEST_REVERSE_REPEATER_METHOD_
-
-void TestEventRepeater::OnTestIterationStart(const UnitTest& unit_test,
-                                             int iteration) {
-  if (forwarding_enabled_) {
-    for (size_t i = 0; i < listeners_.size(); i++) {
-      listeners_[i]->OnTestIterationStart(unit_test, iteration);
-    }
-  }
-}
-
-void TestEventRepeater::OnTestIterationEnd(const UnitTest& unit_test,
-                                           int iteration) {
-  if (forwarding_enabled_) {
-    for (size_t i = listeners_.size(); i > 0; i--) {
-      listeners_[i - 1]->OnTestIterationEnd(unit_test, iteration);
-    }
-  }
-}
-
-// End TestEventRepeater
-
-#if GTEST_HAS_FILE_SYSTEM
-// This class generates an XML output file.
-class XmlUnitTestResultPrinter : public EmptyTestEventListener {
- public:
-  explicit XmlUnitTestResultPrinter(const char* output_file);
-
-  void OnTestIterationEnd(const UnitTest& unit_test, int iteration) override;
-  void ListTestsMatchingFilter(const std::vector<TestSuite*>& test_suites);
-
-  // Prints an XML summary of all unit tests.
-  static void PrintXmlTestsList(std::ostream* stream,
-                                const std::vector<TestSuite*>& test_suites);
-
- private:
-  // Is c a whitespace character that is normalized to a space character
-  // when it appears in an XML attribute value?
-  static bool IsNormalizableWhitespace(unsigned char c) {
-    return c == '\t' || c == '\n' || c == '\r';
-  }
-
-  // May c appear in a well-formed XML document?
-  // https://www.w3.org/TR/REC-xml/#charsets
-  static bool IsValidXmlCharacter(unsigned char c) {
-    return IsNormalizableWhitespace(c) || c >= 0x20;
-  }
-
-  // Returns an XML-escaped copy of the input string str.  If
-  // is_attribute is true, the text is meant to appear as an attribute
-  // value, and normalizable whitespace is preserved by replacing it
-  // with character references.
-  static std::string EscapeXml(const std::string& str, bool is_attribute);
-
-  // Returns the given string with all characters invalid in XML removed.
-  static std::string RemoveInvalidXmlCharacters(const std::string& str);
-
-  // Convenience wrapper around EscapeXml when str is an attribute value.
-  static std::string EscapeXmlAttribute(const std::string& str) {
-    return EscapeXml(str, true);
-  }
-
-  // Convenience wrapper around EscapeXml when str is not an attribute value.
-  static std::string EscapeXmlText(const char* str) {
-    return EscapeXml(str, false);
-  }
-
-  // Verifies that the given attribute belongs to the given element and
-  // streams the attribute as XML.
-  static void OutputXmlAttribute(std::ostream* stream,
-                                 const std::string& element_name,
-                                 const std::string& name,
-                                 const std::string& value);
-
-  // Streams an XML CDATA section, escaping invalid CDATA sequences as needed.
-  static void OutputXmlCDataSection(::std::ostream* stream, const char* data);
-
-  // Streams a test suite XML stanza containing the given test result.
-  //
-  // Requires: result.Failed()
-  static void OutputXmlTestSuiteForTestResult(::std::ostream* stream,
-                                              const TestResult& result);
-
-  // Streams an XML representation of a TestResult object.
-  static void OutputXmlTestResult(::std::ostream* stream,
-                                  const TestResult& result);
-
-  // Streams an XML representation of a TestInfo object.
-  static void OutputXmlTestInfo(::std::ostream* stream,
-                                const char* test_suite_name,
-                                const TestInfo& test_info);
-
-  // Prints an XML representation of a TestSuite object
-  static void PrintXmlTestSuite(::std::ostream* stream,
-                                const TestSuite& test_suite);
-
-  // Prints an XML summary of unit_test to output stream out.
-  static void PrintXmlUnitTest(::std::ostream* stream,
-                               const UnitTest& unit_test);
-
-  // Produces a string representing the test properties in a result as space
-  // delimited XML attributes based on the property key="value" pairs.
-  // When the std::string is not empty, it includes a space at the beginning,
-  // to delimit this attribute from prior attributes.
-  static std::string TestPropertiesAsXmlAttributes(const TestResult& result);
-
-  // Streams an XML representation of the test properties of a TestResult
-  // object.
-  static void OutputXmlTestProperties(std::ostream* stream,
-                                      const TestResult& result);
-
-  // The output file.
-  const std::string output_file_;
-
-  XmlUnitTestResultPrinter(const XmlUnitTestResultPrinter&) = delete;
-  XmlUnitTestResultPrinter& operator=(const XmlUnitTestResultPrinter&) = delete;
-};
-
-// Creates a new XmlUnitTestResultPrinter.
-XmlUnitTestResultPrinter::XmlUnitTestResultPrinter(const char* output_file)
-    : output_file_(output_file) {
-  if (output_file_.empty()) {
-    GTEST_LOG_(FATAL) << "XML output file may not be null";
-  }
-}
-
-// Called after the unit test ends.
-void XmlUnitTestResultPrinter::OnTestIterationEnd(const UnitTest& unit_test,
-                                                  int /*iteration*/) {
-  FILE* xmlout = OpenFileForWriting(output_file_);
-  std::stringstream stream;
-  PrintXmlUnitTest(&stream, unit_test);
-  fprintf(xmlout, "%s", StringStreamToString(&stream).c_str());
-  fclose(xmlout);
-}
-
-void XmlUnitTestResultPrinter::ListTestsMatchingFilter(
-    const std::vector<TestSuite*>& test_suites) {
-  FILE* xmlout = OpenFileForWriting(output_file_);
-  std::stringstream stream;
-  PrintXmlTestsList(&stream, test_suites);
-  fprintf(xmlout, "%s", StringStreamToString(&stream).c_str());
-  fclose(xmlout);
-}
-
-// Returns an XML-escaped copy of the input string str.  If is_attribute
-// is true, the text is meant to appear as an attribute value, and
-// normalizable whitespace is preserved by replacing it with character
-// references.
-//
-// Invalid XML characters in str, if any, are stripped from the output.
-// It is expected that most, if not all, of the text processed by this
-// module will consist of ordinary English text.
-// If this module is ever modified to produce version 1.1 XML output,
-// most invalid characters can be retained using character references.
-std::string XmlUnitTestResultPrinter::EscapeXml(const std::string& str,
-                                                bool is_attribute) {
-  Message m;
-
-  for (size_t i = 0; i < str.size(); ++i) {
-    const char ch = str[i];
-    switch (ch) {
-      case '<':
-        m << "&lt;";
-        break;
-      case '>':
-        m << "&gt;";
-        break;
-      case '&':
-        m << "&amp;";
-        break;
-      case '\'':
-        if (is_attribute)
-          m << "&apos;";
-        else
-          m << '\'';
-        break;
-      case '"':
-        if (is_attribute)
-          m << "&quot;";
-        else
-          m << '"';
-        break;
-      default:
-        if (IsValidXmlCharacter(static_cast<unsigned char>(ch))) {
-          if (is_attribute &&
-              IsNormalizableWhitespace(static_cast<unsigned char>(ch)))
-            m << "&#x" << String::FormatByte(static_cast<unsigned char>(ch))
-              << ";";
-          else
-            m << ch;
-        }
-        break;
-    }
-  }
-
-  return m.GetString();
-}
-
-// Returns the given string with all characters invalid in XML removed.
-// Currently invalid characters are dropped from the string. An
-// alternative is to replace them with certain characters such as . or ?.
-std::string XmlUnitTestResultPrinter::RemoveInvalidXmlCharacters(
-    const std::string& str) {
-  std::string output;
-  output.reserve(str.size());
-  for (std::string::const_iterator it = str.begin(); it != str.end(); ++it)
-    if (IsValidXmlCharacter(static_cast<unsigned char>(*it)))
-      output.push_back(*it);
-
-  return output;
-}
-
-// The following routines generate an XML representation of a UnitTest
-// object.
-//
-// This is how Google Test concepts map to the DTD:
-//
-// <testsuites name="AllTests">        <-- corresponds to a UnitTest object
-//   <testsuite name="testcase-name">  <-- corresponds to a TestSuite object
-//     <testcase name="test-name">     <-- corresponds to a TestInfo object
-//       <failure message="...">...</failure>
-//       <failure message="...">...</failure>
-//       <failure message="...">...</failure>
-//                                     <-- individual assertion failures
-//     </testcase>
-//   </testsuite>
-// </testsuites>
-
-// Formats the given time in milliseconds as seconds.
-std::string FormatTimeInMillisAsSeconds(TimeInMillis ms) {
-  ::std::stringstream ss;
-  ss << (static_cast<double>(ms) * 1e-3);
-  return ss.str();
-}
-
-static bool PortableLocaltime(time_t seconds, struct tm* out) {
-#if defined(_MSC_VER)
-  return localtime_s(out, &seconds) == 0;
-#elif defined(__MINGW32__) || defined(__MINGW64__)
-  // MINGW <time.h> provides neither localtime_r nor localtime_s, but uses
-  // Windows' localtime(), which has a thread-local tm buffer.
-  struct tm* tm_ptr = localtime(&seconds);  // NOLINT
-  if (tm_ptr == nullptr) return false;
-  *out = *tm_ptr;
-  return true;
-#elif defined(__STDC_LIB_EXT1__)
-  // Uses localtime_s when available as localtime_r is only available from
-  // C23 standard.
-  return localtime_s(&seconds, out) != nullptr;
-#else
-  return localtime_r(&seconds, out) != nullptr;
-#endif
-}
-
-// Converts the given epoch time in milliseconds to a date string in the ISO
-// 8601 format, without the timezone information.
-std::string FormatEpochTimeInMillisAsIso8601(TimeInMillis ms) {
-  struct tm time_struct;
-  if (!PortableLocaltime(static_cast<time_t>(ms / 1000), &time_struct))
-    return "";
-  // YYYY-MM-DDThh:mm:ss.sss
-  return StreamableToString(time_struct.tm_year + 1900) + "-" +
-         String::FormatIntWidth2(time_struct.tm_mon + 1) + "-" +
-         String::FormatIntWidth2(time_struct.tm_mday) + "T" +
-         String::FormatIntWidth2(time_struct.tm_hour) + ":" +
-         String::FormatIntWidth2(time_struct.tm_min) + ":" +
-         String::FormatIntWidth2(time_struct.tm_sec) + "." +
-         String::FormatIntWidthN(static_cast<int>(ms % 1000), 3);
-}
-
-// Streams an XML CDATA section, escaping invalid CDATA sequences as needed.
-void XmlUnitTestResultPrinter::OutputXmlCDataSection(::std::ostream* stream,
-                                                     const char* data) {
-  const char* segment = data;
-  *stream << "<![CDATA[";
-  for (;;) {
-    const char* const next_segment = strstr(segment, "]]>");
-    if (next_segment != nullptr) {
-      stream->write(segment,
-                    static_cast<std::streamsize>(next_segment - segment));
-      *stream << "]]>]]&gt;<![CDATA[";
-      segment = next_segment + strlen("]]>");
-    } else {
-      *stream << segment;
-      break;
-    }
-  }
-  *stream << "]]>";
-}
-
-void XmlUnitTestResultPrinter::OutputXmlAttribute(
-    std::ostream* stream, const std::string& element_name,
-    const std::string& name, const std::string& value) {
-  const std::vector<std::string>& allowed_names =
-      GetReservedOutputAttributesForElement(element_name);
-
-  GTEST_CHECK_(std::find(allowed_names.begin(), allowed_names.end(), name) !=
-               allowed_names.end())
-      << "Attribute " << name << " is not allowed for element <" << element_name
-      << ">.";
-
-  *stream << " " << name << "=\"" << EscapeXmlAttribute(value) << "\"";
-}
-
-// Streams a test suite XML stanza containing the given test result.
-void XmlUnitTestResultPrinter::OutputXmlTestSuiteForTestResult(
-    ::std::ostream* stream, const TestResult& result) {
-  // Output the boilerplate for a minimal test suite with one test.
-  *stream << "  <testsuite";
-  OutputXmlAttribute(stream, "testsuite", "name", "NonTestSuiteFailure");
-  OutputXmlAttribute(stream, "testsuite", "tests", "1");
-  OutputXmlAttribute(stream, "testsuite", "failures", "1");
-  OutputXmlAttribute(stream, "testsuite", "disabled", "0");
-  OutputXmlAttribute(stream, "testsuite", "skipped", "0");
-  OutputXmlAttribute(stream, "testsuite", "errors", "0");
-  OutputXmlAttribute(stream, "testsuite", "time",
-                     FormatTimeInMillisAsSeconds(result.elapsed_time()));
-  OutputXmlAttribute(
-      stream, "testsuite", "timestamp",
-      FormatEpochTimeInMillisAsIso8601(result.start_timestamp()));
-  *stream << ">";
-
-  // Output the boilerplate for a minimal test case with a single test.
-  *stream << "    <testcase";
-  OutputXmlAttribute(stream, "testcase", "name", "");
-  OutputXmlAttribute(stream, "testcase", "status", "run");
-  OutputXmlAttribute(stream, "testcase", "result", "completed");
-  OutputXmlAttribute(stream, "testcase", "classname", "");
-  OutputXmlAttribute(stream, "testcase", "time",
-                     FormatTimeInMillisAsSeconds(result.elapsed_time()));
-  OutputXmlAttribute(
-      stream, "testcase", "timestamp",
-      FormatEpochTimeInMillisAsIso8601(result.start_timestamp()));
-
-  // Output the actual test result.
-  OutputXmlTestResult(stream, result);
-
-  // Complete the test suite.
-  *stream << "  </testsuite>\n";
-}
-
-// Prints an XML representation of a TestInfo object.
-void XmlUnitTestResultPrinter::OutputXmlTestInfo(::std::ostream* stream,
-                                                 const char* test_suite_name,
-                                                 const TestInfo& test_info) {
-  const TestResult& result = *test_info.result();
-  const std::string kTestsuite = "testcase";
-
-  if (test_info.is_in_another_shard()) {
-    return;
-  }
-
-  *stream << "    <testcase";
-  OutputXmlAttribute(stream, kTestsuite, "name", test_info.name());
-
-  if (test_info.value_param() != nullptr) {
-    OutputXmlAttribute(stream, kTestsuite, "value_param",
-                       test_info.value_param());
-  }
-  if (test_info.type_param() != nullptr) {
-    OutputXmlAttribute(stream, kTestsuite, "type_param",
-                       test_info.type_param());
-  }
-
-  OutputXmlAttribute(stream, kTestsuite, "file", test_info.file());
-  OutputXmlAttribute(stream, kTestsuite, "line",
-                     StreamableToString(test_info.line()));
-  if (GTEST_FLAG_GET(list_tests)) {
-    *stream << " />\n";
-    return;
-  }
-
-  OutputXmlAttribute(stream, kTestsuite, "status",
-                     test_info.should_run() ? "run" : "notrun");
-  OutputXmlAttribute(stream, kTestsuite, "result",
-                     test_info.should_run()
-                         ? (result.Skipped() ? "skipped" : "completed")
-                         : "suppressed");
-  OutputXmlAttribute(stream, kTestsuite, "time",
-                     FormatTimeInMillisAsSeconds(result.elapsed_time()));
-  OutputXmlAttribute(
-      stream, kTestsuite, "timestamp",
-      FormatEpochTimeInMillisAsIso8601(result.start_timestamp()));
-  OutputXmlAttribute(stream, kTestsuite, "classname", test_suite_name);
-
-  OutputXmlTestResult(stream, result);
-}
-
-void XmlUnitTestResultPrinter::OutputXmlTestResult(::std::ostream* stream,
-                                                   const TestResult& result) {
-  int failures = 0;
-  int skips = 0;
-  for (int i = 0; i < result.total_part_count(); ++i) {
-    const TestPartResult& part = result.GetTestPartResult(i);
-    if (part.failed()) {
-      if (++failures == 1 && skips == 0) {
-        *stream << ">\n";
-      }
-      const std::string location =
-          internal::FormatCompilerIndependentFileLocation(part.file_name(),
-                                                          part.line_number());
-      const std::string summary = location + "\n" + part.summary();
-      *stream << "      <failure message=\"" << EscapeXmlAttribute(summary)
-              << "\" type=\"\">";
-      const std::string detail = location + "\n" + part.message();
-      OutputXmlCDataSection(stream, RemoveInvalidXmlCharacters(detail).c_str());
-      *stream << "</failure>\n";
-    } else if (part.skipped()) {
-      if (++skips == 1 && failures == 0) {
-        *stream << ">\n";
-      }
-      const std::string location =
-          internal::FormatCompilerIndependentFileLocation(part.file_name(),
-                                                          part.line_number());
-      const std::string summary = location + "\n" + part.summary();
-      *stream << "      <skipped message=\""
-              << EscapeXmlAttribute(summary.c_str()) << "\">";
-      const std::string detail = location + "\n" + part.message();
-      OutputXmlCDataSection(stream, RemoveInvalidXmlCharacters(detail).c_str());
-      *stream << "</skipped>\n";
-    }
-  }
-
-  if (failures == 0 && skips == 0 && result.test_property_count() == 0) {
-    *stream << " />\n";
-  } else {
-    if (failures == 0 && skips == 0) {
-      *stream << ">\n";
-    }
-    OutputXmlTestProperties(stream, result);
-    *stream << "    </testcase>\n";
-  }
-}
-
-// Prints an XML representation of a TestSuite object
-void XmlUnitTestResultPrinter::PrintXmlTestSuite(std::ostream* stream,
-                                                 const TestSuite& test_suite) {
-  const std::string kTestsuite = "testsuite";
-  *stream << "  <" << kTestsuite;
-  OutputXmlAttribute(stream, kTestsuite, "name", test_suite.name());
-  OutputXmlAttribute(stream, kTestsuite, "tests",
-                     StreamableToString(test_suite.reportable_test_count()));
-  if (!GTEST_FLAG_GET(list_tests)) {
-    OutputXmlAttribute(stream, kTestsuite, "failures",
-                       StreamableToString(test_suite.failed_test_count()));
-    OutputXmlAttribute(
-        stream, kTestsuite, "disabled",
-        StreamableToString(test_suite.reportable_disabled_test_count()));
-    OutputXmlAttribute(stream, kTestsuite, "skipped",
-                       StreamableToString(test_suite.skipped_test_count()));
-
-    OutputXmlAttribute(stream, kTestsuite, "errors", "0");
-
-    OutputXmlAttribute(stream, kTestsuite, "time",
-                       FormatTimeInMillisAsSeconds(test_suite.elapsed_time()));
-    OutputXmlAttribute(
-        stream, kTestsuite, "timestamp",
-        FormatEpochTimeInMillisAsIso8601(test_suite.start_timestamp()));
-    *stream << TestPropertiesAsXmlAttributes(test_suite.ad_hoc_test_result());
-  }
-  *stream << ">\n";
-  for (int i = 0; i < test_suite.total_test_count(); ++i) {
-    if (test_suite.GetTestInfo(i)->is_reportable())
-      OutputXmlTestInfo(stream, test_suite.name(), *test_suite.GetTestInfo(i));
-  }
-  *stream << "  </" << kTestsuite << ">\n";
-}
-
-// Prints an XML summary of unit_test to output stream out.
-void XmlUnitTestResultPrinter::PrintXmlUnitTest(std::ostream* stream,
-                                                const UnitTest& unit_test) {
-  const std::string kTestsuites = "testsuites";
-
-  *stream << "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
-  *stream << "<" << kTestsuites;
-
-  OutputXmlAttribute(stream, kTestsuites, "tests",
-                     StreamableToString(unit_test.reportable_test_count()));
-  OutputXmlAttribute(stream, kTestsuites, "failures",
-                     StreamableToString(unit_test.failed_test_count()));
-  OutputXmlAttribute(
-      stream, kTestsuites, "disabled",
-      StreamableToString(unit_test.reportable_disabled_test_count()));
-  OutputXmlAttribute(stream, kTestsuites, "errors", "0");
-  OutputXmlAttribute(stream, kTestsuites, "time",
-                     FormatTimeInMillisAsSeconds(unit_test.elapsed_time()));
-  OutputXmlAttribute(
-      stream, kTestsuites, "timestamp",
-      FormatEpochTimeInMillisAsIso8601(unit_test.start_timestamp()));
-
-  if (GTEST_FLAG_GET(shuffle)) {
-    OutputXmlAttribute(stream, kTestsuites, "random_seed",
-                       StreamableToString(unit_test.random_seed()));
-  }
-  *stream << TestPropertiesAsXmlAttributes(unit_test.ad_hoc_test_result());
-
-  OutputXmlAttribute(stream, kTestsuites, "name", "AllTests");
-  *stream << ">\n";
-
-  for (int i = 0; i < unit_test.total_test_suite_count(); ++i) {
-    if (unit_test.GetTestSuite(i)->reportable_test_count() > 0)
-      PrintXmlTestSuite(stream, *unit_test.GetTestSuite(i));
-  }
-
-  // If there was a test failure outside of one of the test suites (like in a
-  // test environment) include that in the output.
-  if (unit_test.ad_hoc_test_result().Failed()) {
-    OutputXmlTestSuiteForTestResult(stream, unit_test.ad_hoc_test_result());
-  }
-
-  *stream << "</" << kTestsuites << ">\n";
-}
-
-void XmlUnitTestResultPrinter::PrintXmlTestsList(
-    std::ostream* stream, const std::vector<TestSuite*>& test_suites) {
-  const std::string kTestsuites = "testsuites";
-
-  *stream << "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
-  *stream << "<" << kTestsuites;
-
-  int total_tests = 0;
-  for (auto test_suite : test_suites) {
-    total_tests += test_suite->total_test_count();
-  }
-  OutputXmlAttribute(stream, kTestsuites, "tests",
-                     StreamableToString(total_tests));
-  OutputXmlAttribute(stream, kTestsuites, "name", "AllTests");
-  *stream << ">\n";
-
-  for (auto test_suite : test_suites) {
-    PrintXmlTestSuite(stream, *test_suite);
-  }
-  *stream << "</" << kTestsuites << ">\n";
-}
-
-// Produces a string representing the test properties in a result as space
-// delimited XML attributes based on the property key="value" pairs.
-std::string XmlUnitTestResultPrinter::TestPropertiesAsXmlAttributes(
-    const TestResult& result) {
-  Message attributes;
-  for (int i = 0; i < result.test_property_count(); ++i) {
-    const TestProperty& property = result.GetTestProperty(i);
-    attributes << " " << property.key() << "="
-               << "\"" << EscapeXmlAttribute(property.value()) << "\"";
-  }
-  return attributes.GetString();
-}
-
-void XmlUnitTestResultPrinter::OutputXmlTestProperties(
-    std::ostream* stream, const TestResult& result) {
-  const std::string kProperties = "properties";
-  const std::string kProperty = "property";
-
-  if (result.test_property_count() <= 0) {
-    return;
-  }
-
-  *stream << "      <" << kProperties << ">\n";
-  for (int i = 0; i < result.test_property_count(); ++i) {
-    const TestProperty& property = result.GetTestProperty(i);
-    *stream << "        <" << kProperty;
-    *stream << " name=\"" << EscapeXmlAttribute(property.key()) << "\"";
-    *stream << " value=\"" << EscapeXmlAttribute(property.value()) << "\"";
-    *stream << "/>\n";
-  }
-  *stream << "      </" << kProperties << ">\n";
-}
-
-// End XmlUnitTestResultPrinter
-#endif  // GTEST_HAS_FILE_SYSTEM
-
-#if GTEST_HAS_FILE_SYSTEM
-// This class generates an JSON output file.
-class JsonUnitTestResultPrinter : public EmptyTestEventListener {
- public:
-  explicit JsonUnitTestResultPrinter(const char* output_file);
-
-  void OnTestIterationEnd(const UnitTest& unit_test, int iteration) override;
-
-  // Prints an JSON summary of all unit tests.
-  static void PrintJsonTestList(::std::ostream* stream,
-                                const std::vector<TestSuite*>& test_suites);
-
- private:
-  // Returns an JSON-escaped copy of the input string str.
-  static std::string EscapeJson(const std::string& str);
-
-  //// Verifies that the given attribute belongs to the given element and
-  //// streams the attribute as JSON.
-  static void OutputJsonKey(std::ostream* stream,
-                            const std::string& element_name,
-                            const std::string& name, const std::string& value,
-                            const std::string& indent, bool comma = true);
-  static void OutputJsonKey(std::ostream* stream,
-                            const std::string& element_name,
-                            const std::string& name, int value,
-                            const std::string& indent, bool comma = true);
-
-  // Streams a test suite JSON stanza containing the given test result.
-  //
-  // Requires: result.Failed()
-  static void OutputJsonTestSuiteForTestResult(::std::ostream* stream,
-                                               const TestResult& result);
-
-  // Streams a JSON representation of a TestResult object.
-  static void OutputJsonTestResult(::std::ostream* stream,
-                                   const TestResult& result);
-
-  // Streams a JSON representation of a TestInfo object.
-  static void OutputJsonTestInfo(::std::ostream* stream,
-                                 const char* test_suite_name,
-                                 const TestInfo& test_info);
-
-  // Prints a JSON representation of a TestSuite object
-  static void PrintJsonTestSuite(::std::ostream* stream,
-                                 const TestSuite& test_suite);
-
-  // Prints a JSON summary of unit_test to output stream out.
-  static void PrintJsonUnitTest(::std::ostream* stream,
-                                const UnitTest& unit_test);
-
-  // Produces a string representing the test properties in a result as
-  // a JSON dictionary.
-  static std::string TestPropertiesAsJson(const TestResult& result,
-                                          const std::string& indent);
-
-  // The output file.
-  const std::string output_file_;
-
-  JsonUnitTestResultPrinter(const JsonUnitTestResultPrinter&) = delete;
-  JsonUnitTestResultPrinter& operator=(const JsonUnitTestResultPrinter&) =
-      delete;
-};
-
-// Creates a new JsonUnitTestResultPrinter.
-JsonUnitTestResultPrinter::JsonUnitTestResultPrinter(const char* output_file)
-    : output_file_(output_file) {
-  if (output_file_.empty()) {
-    GTEST_LOG_(FATAL) << "JSON output file may not be null";
-  }
-}
-
-void JsonUnitTestResultPrinter::OnTestIterationEnd(const UnitTest& unit_test,
-                                                   int /*iteration*/) {
-  FILE* jsonout = OpenFileForWriting(output_file_);
-  std::stringstream stream;
-  PrintJsonUnitTest(&stream, unit_test);
-  fprintf(jsonout, "%s", StringStreamToString(&stream).c_str());
-  fclose(jsonout);
-}
-
-// Returns an JSON-escaped copy of the input string str.
-std::string JsonUnitTestResultPrinter::EscapeJson(const std::string& str) {
-  Message m;
-
-  for (size_t i = 0; i < str.size(); ++i) {
-    const char ch = str[i];
-    switch (ch) {
-      case '\\':
-      case '"':
-      case '/':
-        m << '\\' << ch;
-        break;
-      case '\b':
-        m << "\\b";
-        break;
-      case '\t':
-        m << "\\t";
-        break;
-      case '\n':
-        m << "\\n";
-        break;
-      case '\f':
-        m << "\\f";
-        break;
-      case '\r':
-        m << "\\r";
-        break;
-      default:
-        if (ch < ' ') {
-          m << "\\u00" << String::FormatByte(static_cast<unsigned char>(ch));
-        } else {
-          m << ch;
-        }
-        break;
-    }
-  }
-
-  return m.GetString();
-}
-
-// The following routines generate an JSON representation of a UnitTest
-// object.
-
-// Formats the given time in milliseconds as seconds.
-static std::string FormatTimeInMillisAsDuration(TimeInMillis ms) {
-  ::std::stringstream ss;
-  ss << (static_cast<double>(ms) * 1e-3) << "s";
-  return ss.str();
-}
-
-// Converts the given epoch time in milliseconds to a date string in the
-// RFC3339 format, without the timezone information.
-static std::string FormatEpochTimeInMillisAsRFC3339(TimeInMillis ms) {
-  struct tm time_struct;
-  if (!PortableLocaltime(static_cast<time_t>(ms / 1000), &time_struct))
-    return "";
-  // YYYY-MM-DDThh:mm:ss
-  return StreamableToString(time_struct.tm_year + 1900) + "-" +
-         String::FormatIntWidth2(time_struct.tm_mon + 1) + "-" +
-         String::FormatIntWidth2(time_struct.tm_mday) + "T" +
-         String::FormatIntWidth2(time_struct.tm_hour) + ":" +
-         String::FormatIntWidth2(time_struct.tm_min) + ":" +
-         String::FormatIntWidth2(time_struct.tm_sec) + "Z";
-}
-
-static inline std::string Indent(size_t width) {
-  return std::string(width, ' ');
-}
-
-void JsonUnitTestResultPrinter::OutputJsonKey(std::ostream* stream,
-                                              const std::string& element_name,
-                                              const std::string& name,
-                                              const std::string& value,
-                                              const std::string& indent,
-                                              bool comma) {
-  const std::vector<std::string>& allowed_names =
-      GetReservedOutputAttributesForElement(element_name);
-
-  GTEST_CHECK_(std::find(allowed_names.begin(), allowed_names.end(), name) !=
-               allowed_names.end())
-      << "Key \"" << name << "\" is not allowed for value \"" << element_name
-      << "\".";
-
-  *stream << indent << "\"" << name << "\": \"" << EscapeJson(value) << "\"";
-  if (comma) *stream << ",\n";
-}
-
-void JsonUnitTestResultPrinter::OutputJsonKey(
-    std::ostream* stream, const std::string& element_name,
-    const std::string& name, int value, const std::string& indent, bool comma) {
-  const std::vector<std::string>& allowed_names =
-      GetReservedOutputAttributesForElement(element_name);
-
-  GTEST_CHECK_(std::find(allowed_names.begin(), allowed_names.end(), name) !=
-               allowed_names.end())
-      << "Key \"" << name << "\" is not allowed for value \"" << element_name
-      << "\".";
-
-  *stream << indent << "\"" << name << "\": " << StreamableToString(value);
-  if (comma) *stream << ",\n";
-}
-
-// Streams a test suite JSON stanza containing the given test result.
-void JsonUnitTestResultPrinter::OutputJsonTestSuiteForTestResult(
-    ::std::ostream* stream, const TestResult& result) {
-  // Output the boilerplate for a new test suite.
-  *stream << Indent(4) << "{\n";
-  OutputJsonKey(stream, "testsuite", "name", "NonTestSuiteFailure", Indent(6));
-  OutputJsonKey(stream, "testsuite", "tests", 1, Indent(6));
-  if (!GTEST_FLAG_GET(list_tests)) {
-    OutputJsonKey(stream, "testsuite", "failures", 1, Indent(6));
-    OutputJsonKey(stream, "testsuite", "disabled", 0, Indent(6));
-    OutputJsonKey(stream, "testsuite", "skipped", 0, Indent(6));
-    OutputJsonKey(stream, "testsuite", "errors", 0, Indent(6));
-    OutputJsonKey(stream, "testsuite", "time",
-                  FormatTimeInMillisAsDuration(result.elapsed_time()),
-                  Indent(6));
-    OutputJsonKey(stream, "testsuite", "timestamp",
-                  FormatEpochTimeInMillisAsRFC3339(result.start_timestamp()),
-                  Indent(6));
-  }
-  *stream << Indent(6) << "\"testsuite\": [\n";
-
-  // Output the boilerplate for a new test case.
-  *stream << Indent(8) << "{\n";
-  OutputJsonKey(stream, "testcase", "name", "", Indent(10));
-  OutputJsonKey(stream, "testcase", "status", "RUN", Indent(10));
-  OutputJsonKey(stream, "testcase", "result", "COMPLETED", Indent(10));
-  OutputJsonKey(stream, "testcase", "timestamp",
-                FormatEpochTimeInMillisAsRFC3339(result.start_timestamp()),
-                Indent(10));
-  OutputJsonKey(stream, "testcase", "time",
-                FormatTimeInMillisAsDuration(result.elapsed_time()),
-                Indent(10));
-  OutputJsonKey(stream, "testcase", "classname", "", Indent(10), false);
-  *stream << TestPropertiesAsJson(result, Indent(10));
-
-  // Output the actual test result.
-  OutputJsonTestResult(stream, result);
-
-  // Finish the test suite.
-  *stream << "\n" << Indent(6) << "]\n" << Indent(4) << "}";
-}
-
-// Prints a JSON representation of a TestInfo object.
-void JsonUnitTestResultPrinter::OutputJsonTestInfo(::std::ostream* stream,
-                                                   const char* test_suite_name,
-                                                   const TestInfo& test_info) {
-  const TestResult& result = *test_info.result();
-  const std::string kTestsuite = "testcase";
-  const std::string kIndent = Indent(10);
-
-  *stream << Indent(8) << "{\n";
-  OutputJsonKey(stream, kTestsuite, "name", test_info.name(), kIndent);
-
-  if (test_info.value_param() != nullptr) {
-    OutputJsonKey(stream, kTestsuite, "value_param", test_info.value_param(),
-                  kIndent);
-  }
-  if (test_info.type_param() != nullptr) {
-    OutputJsonKey(stream, kTestsuite, "type_param", test_info.type_param(),
-                  kIndent);
-  }
-
-  OutputJsonKey(stream, kTestsuite, "file", test_info.file(), kIndent);
-  OutputJsonKey(stream, kTestsuite, "line", test_info.line(), kIndent, false);
-  if (GTEST_FLAG_GET(list_tests)) {
-    *stream << "\n" << Indent(8) << "}";
-    return;
-  } else {
-    *stream << ",\n";
-  }
-
-  OutputJsonKey(stream, kTestsuite, "status",
-                test_info.should_run() ? "RUN" : "NOTRUN", kIndent);
-  OutputJsonKey(stream, kTestsuite, "result",
-                test_info.should_run()
-                    ? (result.Skipped() ? "SKIPPED" : "COMPLETED")
-                    : "SUPPRESSED",
-                kIndent);
-  OutputJsonKey(stream, kTestsuite, "timestamp",
-                FormatEpochTimeInMillisAsRFC3339(result.start_timestamp()),
-                kIndent);
-  OutputJsonKey(stream, kTestsuite, "time",
-                FormatTimeInMillisAsDuration(result.elapsed_time()), kIndent);
-  OutputJsonKey(stream, kTestsuite, "classname", test_suite_name, kIndent,
-                false);
-  *stream << TestPropertiesAsJson(result, kIndent);
-
-  OutputJsonTestResult(stream, result);
-}
-
-void JsonUnitTestResultPrinter::OutputJsonTestResult(::std::ostream* stream,
-                                                     const TestResult& result) {
-  const std::string kIndent = Indent(10);
-
-  int failures = 0;
-  for (int i = 0; i < result.total_part_count(); ++i) {
-    const TestPartResult& part = result.GetTestPartResult(i);
-    if (part.failed()) {
-      *stream << ",\n";
-      if (++failures == 1) {
-        *stream << kIndent << "\""
-                << "failures"
-                << "\": [\n";
-      }
-      const std::string location =
-          internal::FormatCompilerIndependentFileLocation(part.file_name(),
-                                                          part.line_number());
-      const std::string message = EscapeJson(location + "\n" + part.message());
-      *stream << kIndent << "  {\n"
-              << kIndent << "    \"failure\": \"" << message << "\",\n"
-              << kIndent << "    \"type\": \"\"\n"
-              << kIndent << "  }";
-    }
-  }
-
-  if (failures > 0) *stream << "\n" << kIndent << "]";
-  *stream << "\n" << Indent(8) << "}";
-}
-
-// Prints an JSON representation of a TestSuite object
-void JsonUnitTestResultPrinter::PrintJsonTestSuite(
-    std::ostream* stream, const TestSuite& test_suite) {
-  const std::string kTestsuite = "testsuite";
-  const std::string kIndent = Indent(6);
-
-  *stream << Indent(4) << "{\n";
-  OutputJsonKey(stream, kTestsuite, "name", test_suite.name(), kIndent);
-  OutputJsonKey(stream, kTestsuite, "tests", test_suite.reportable_test_count(),
-                kIndent);
-  if (!GTEST_FLAG_GET(list_tests)) {
-    OutputJsonKey(stream, kTestsuite, "failures",
-                  test_suite.failed_test_count(), kIndent);
-    OutputJsonKey(stream, kTestsuite, "disabled",
-                  test_suite.reportable_disabled_test_count(), kIndent);
-    OutputJsonKey(stream, kTestsuite, "errors", 0, kIndent);
-    OutputJsonKey(
-        stream, kTestsuite, "timestamp",
-        FormatEpochTimeInMillisAsRFC3339(test_suite.start_timestamp()),
-        kIndent);
-    OutputJsonKey(stream, kTestsuite, "time",
-                  FormatTimeInMillisAsDuration(test_suite.elapsed_time()),
-                  kIndent, false);
-    *stream << TestPropertiesAsJson(test_suite.ad_hoc_test_result(), kIndent)
-            << ",\n";
-  }
-
-  *stream << kIndent << "\"" << kTestsuite << "\": [\n";
-
-  bool comma = false;
-  for (int i = 0; i < test_suite.total_test_count(); ++i) {
-    if (test_suite.GetTestInfo(i)->is_reportable()) {
-      if (comma) {
-        *stream << ",\n";
-      } else {
-        comma = true;
-      }
-      OutputJsonTestInfo(stream, test_suite.name(), *test_suite.GetTestInfo(i));
-    }
-  }
-  *stream << "\n" << kIndent << "]\n" << Indent(4) << "}";
-}
-
-// Prints a JSON summary of unit_test to output stream out.
-void JsonUnitTestResultPrinter::PrintJsonUnitTest(std::ostream* stream,
-                                                  const UnitTest& unit_test) {
-  const std::string kTestsuites = "testsuites";
-  const std::string kIndent = Indent(2);
-  *stream << "{\n";
-
-  OutputJsonKey(stream, kTestsuites, "tests", unit_test.reportable_test_count(),
-                kIndent);
-  OutputJsonKey(stream, kTestsuites, "failures", unit_test.failed_test_count(),
-                kIndent);
-  OutputJsonKey(stream, kTestsuites, "disabled",
-                unit_test.reportable_disabled_test_count(), kIndent);
-  OutputJsonKey(stream, kTestsuites, "errors", 0, kIndent);
-  if (GTEST_FLAG_GET(shuffle)) {
-    OutputJsonKey(stream, kTestsuites, "random_seed", unit_test.random_seed(),
-                  kIndent);
-  }
-  OutputJsonKey(stream, kTestsuites, "timestamp",
-                FormatEpochTimeInMillisAsRFC3339(unit_test.start_timestamp()),
-                kIndent);
-  OutputJsonKey(stream, kTestsuites, "time",
-                FormatTimeInMillisAsDuration(unit_test.elapsed_time()), kIndent,
-                false);
-
-  *stream << TestPropertiesAsJson(unit_test.ad_hoc_test_result(), kIndent)
-          << ",\n";
-
-  OutputJsonKey(stream, kTestsuites, "name", "AllTests", kIndent);
-  *stream << kIndent << "\"" << kTestsuites << "\": [\n";
-
-  bool comma = false;
-  for (int i = 0; i < unit_test.total_test_suite_count(); ++i) {
-    if (unit_test.GetTestSuite(i)->reportable_test_count() > 0) {
-      if (comma) {
-        *stream << ",\n";
-      } else {
-        comma = true;
-      }
-      PrintJsonTestSuite(stream, *unit_test.GetTestSuite(i));
-    }
-  }
-
-  // If there was a test failure outside of one of the test suites (like in a
-  // test environment) include that in the output.
-  if (unit_test.ad_hoc_test_result().Failed()) {
-    if (comma) {
-      *stream << ",\n";
-    }
-    OutputJsonTestSuiteForTestResult(stream, unit_test.ad_hoc_test_result());
-  }
-
-  *stream << "\n"
-          << kIndent << "]\n"
-          << "}\n";
-}
-
-void JsonUnitTestResultPrinter::PrintJsonTestList(
-    std::ostream* stream, const std::vector<TestSuite*>& test_suites) {
-  const std::string kTestsuites = "testsuites";
-  const std::string kIndent = Indent(2);
-  *stream << "{\n";
-  int total_tests = 0;
-  for (auto test_suite : test_suites) {
-    total_tests += test_suite->total_test_count();
-  }
-  OutputJsonKey(stream, kTestsuites, "tests", total_tests, kIndent);
-
-  OutputJsonKey(stream, kTestsuites, "name", "AllTests", kIndent);
-  *stream << kIndent << "\"" << kTestsuites << "\": [\n";
-
-  for (size_t i = 0; i < test_suites.size(); ++i) {
-    if (i != 0) {
-      *stream << ",\n";
-    }
-    PrintJsonTestSuite(stream, *test_suites[i]);
-  }
-
-  *stream << "\n"
-          << kIndent << "]\n"
-          << "}\n";
-}
-// Produces a string representing the test properties in a result as
-// a JSON dictionary.
-std::string JsonUnitTestResultPrinter::TestPropertiesAsJson(
-    const TestResult& result, const std::string& indent) {
-  Message attributes;
-  for (int i = 0; i < result.test_property_count(); ++i) {
-    const TestProperty& property = result.GetTestProperty(i);
-    attributes << ",\n"
-               << indent << "\"" << property.key() << "\": "
-               << "\"" << EscapeJson(property.value()) << "\"";
-  }
-  return attributes.GetString();
-}
-
-// End JsonUnitTestResultPrinter
-#endif  // GTEST_HAS_FILE_SYSTEM
-
-#if GTEST_CAN_STREAM_RESULTS_
-
-// Checks if str contains '=', '&', '%' or '\n' characters. If yes,
-// replaces them by "%xx" where xx is their hexadecimal value. For
-// example, replaces "=" with "%3D".  This algorithm is O(strlen(str))
-// in both time and space -- important as the input str may contain an
-// arbitrarily long test failure message and stack trace.
-std::string StreamingListener::UrlEncode(const char* str) {
-  std::string result;
-  result.reserve(strlen(str) + 1);
-  for (char ch = *str; ch != '\0'; ch = *++str) {
-    switch (ch) {
-      case '%':
-      case '=':
-      case '&':
-      case '\n':
-        result.push_back('%');
-        result.append(String::FormatByte(static_cast<unsigned char>(ch)));
-        break;
-      default:
-        result.push_back(ch);
-        break;
-    }
-  }
-  return result;
-}
-
-void StreamingListener::SocketWriter::MakeConnection() {
-  GTEST_CHECK_(sockfd_ == -1)
-      << "MakeConnection() can't be called when there is already a connection.";
-
-  addrinfo hints;
-  memset(&hints, 0, sizeof(hints));
-  hints.ai_family = AF_UNSPEC;  // To allow both IPv4 and IPv6 addresses.
-  hints.ai_socktype = SOCK_STREAM;
-  addrinfo* servinfo = nullptr;
-
-  // Use the getaddrinfo() to get a linked list of IP addresses for
-  // the given host name.
-  const int error_num =
-      getaddrinfo(host_name_.c_str(), port_num_.c_str(), &hints, &servinfo);
-  if (error_num != 0) {
-    GTEST_LOG_(WARNING) << "stream_result_to: getaddrinfo() failed: "
-                        << gai_strerror(error_num);
-  }
-
-  // Loop through all the results and connect to the first we can.
-  for (addrinfo* cur_addr = servinfo; sockfd_ == -1 && cur_addr != nullptr;
-       cur_addr = cur_addr->ai_next) {
-    sockfd_ = socket(cur_addr->ai_family, cur_addr->ai_socktype,
-                     cur_addr->ai_protocol);
-    if (sockfd_ != -1) {
-      // Connect the client socket to the server socket.
-      if (connect(sockfd_, cur_addr->ai_addr, cur_addr->ai_addrlen) == -1) {
-        close(sockfd_);
-        sockfd_ = -1;
-      }
-    }
-  }
-
-  freeaddrinfo(servinfo);  // all done with this structure
-
-  if (sockfd_ == -1) {
-    GTEST_LOG_(WARNING) << "stream_result_to: failed to connect to "
-                        << host_name_ << ":" << port_num_;
-  }
-}
-
-// End of class Streaming Listener
-#endif  // GTEST_CAN_STREAM_RESULTS__
-
-// class OsStackTraceGetter
-
-const char* const OsStackTraceGetterInterface::kElidedFramesMarker =
-    "... " GTEST_NAME_ " internal frames ...";
-
-std::string OsStackTraceGetter::CurrentStackTrace(int max_depth, int skip_count)
-    GTEST_LOCK_EXCLUDED_(mutex_) {
-#if GTEST_HAS_ABSL
-  std::string result;
-
-  if (max_depth <= 0) {
-    return result;
-  }
-
-  max_depth = std::min(max_depth, kMaxStackTraceDepth);
-
-  std::vector<void*> raw_stack(max_depth);
-  // Skips the frames requested by the caller, plus this function.
-  const int raw_stack_size =
-      absl::GetStackTrace(&raw_stack[0], max_depth, skip_count + 1);
-
-  void* caller_frame = nullptr;
-  {
-    MutexLock lock(&mutex_);
-    caller_frame = caller_frame_;
-  }
-
-  for (int i = 0; i < raw_stack_size; ++i) {
-    if (raw_stack[i] == caller_frame &&
-        !GTEST_FLAG_GET(show_internal_stack_frames)) {
-      // Add a marker to the trace and stop adding frames.
-      absl::StrAppend(&result, kElidedFramesMarker, "\n");
-      break;
-    }
-
-    char tmp[1024];
-    const char* symbol = "(unknown)";
-    if (absl::Symbolize(raw_stack[i], tmp, sizeof(tmp))) {
-      symbol = tmp;
-    }
-
-    char line[1024];
-    snprintf(line, sizeof(line), "  %p: %s\n", raw_stack[i], symbol);
-    result += line;
-  }
-
-  return result;
-
-#else   // !GTEST_HAS_ABSL
-  static_cast<void>(max_depth);
-  static_cast<void>(skip_count);
-  return "";
-#endif  // GTEST_HAS_ABSL
-}
-
-void OsStackTraceGetter::UponLeavingGTest() GTEST_LOCK_EXCLUDED_(mutex_) {
-#if GTEST_HAS_ABSL
-  void* caller_frame = nullptr;
-  if (absl::GetStackTrace(&caller_frame, 1, 3) <= 0) {
-    caller_frame = nullptr;
-  }
-
-  MutexLock lock(&mutex_);
-  caller_frame_ = caller_frame;
-#endif  // GTEST_HAS_ABSL
-}
-
-#if GTEST_HAS_DEATH_TEST
-// A helper class that creates the premature-exit file in its
-// constructor and deletes the file in its destructor.
-class ScopedPrematureExitFile {
- public:
-  explicit ScopedPrematureExitFile(const char* premature_exit_filepath)
-      : premature_exit_filepath_(
-            premature_exit_filepath ? premature_exit_filepath : "") {
-    // If a path to the premature-exit file is specified...
-    if (!premature_exit_filepath_.empty()) {
-      // create the file with a single "0" character in it.  I/O
-      // errors are ignored as there's nothing better we can do and we
-      // don't want to fail the test because of this.
-      FILE* pfile = posix::FOpen(premature_exit_filepath_.c_str(), "w");
-      fwrite("0", 1, 1, pfile);
-      fclose(pfile);
-    }
-  }
-
-  ~ScopedPrematureExitFile() {
-#if !GTEST_OS_ESP8266
-    if (!premature_exit_filepath_.empty()) {
-      int retval = remove(premature_exit_filepath_.c_str());
-      if (retval) {
-        GTEST_LOG_(ERROR) << "Failed to remove premature exit filepath \""
-                          << premature_exit_filepath_ << "\" with error "
-                          << retval;
-      }
-    }
-#endif
-  }
-
- private:
-  const std::string premature_exit_filepath_;
-
-  ScopedPrematureExitFile(const ScopedPrematureExitFile&) = delete;
-  ScopedPrematureExitFile& operator=(const ScopedPrematureExitFile&) = delete;
-};
-#endif  // GTEST_HAS_DEATH_TEST
-
-}  // namespace internal
-
-// class TestEventListeners
-
-TestEventListeners::TestEventListeners()
-    : repeater_(new internal::TestEventRepeater()),
-      default_result_printer_(nullptr),
-      default_xml_generator_(nullptr) {}
-
-TestEventListeners::~TestEventListeners() { delete repeater_; }
-
-// Returns the standard listener responsible for the default console
-// output.  Can be removed from the listeners list to shut down default
-// console output.  Note that removing this object from the listener list
-// with Release transfers its ownership to the user.
-void TestEventListeners::Append(TestEventListener* listener) {
-  repeater_->Append(listener);
-}
-
-// Removes the given event listener from the list and returns it.  It then
-// becomes the caller's responsibility to delete the listener. Returns
-// NULL if the listener is not found in the list.
-TestEventListener* TestEventListeners::Release(TestEventListener* listener) {
-  if (listener == default_result_printer_)
-    default_result_printer_ = nullptr;
-  else if (listener == default_xml_generator_)
-    default_xml_generator_ = nullptr;
-  return repeater_->Release(listener);
-}
-
-// Returns repeater that broadcasts the TestEventListener events to all
-// subscribers.
-TestEventListener* TestEventListeners::repeater() { return repeater_; }
-
-// Sets the default_result_printer attribute to the provided listener.
-// The listener is also added to the listener list and previous
-// default_result_printer is removed from it and deleted. The listener can
-// also be NULL in which case it will not be added to the list. Does
-// nothing if the previous and the current listener objects are the same.
-void TestEventListeners::SetDefaultResultPrinter(TestEventListener* listener) {
-  if (default_result_printer_ != listener) {
-    // It is an error to pass this method a listener that is already in the
-    // list.
-    delete Release(default_result_printer_);
-    default_result_printer_ = listener;
-    if (listener != nullptr) Append(listener);
-  }
-}
-
-// Sets the default_xml_generator attribute to the provided listener.  The
-// listener is also added to the listener list and previous
-// default_xml_generator is removed from it and deleted. The listener can
-// also be NULL in which case it will not be added to the list. Does
-// nothing if the previous and the current listener objects are the same.
-void TestEventListeners::SetDefaultXmlGenerator(TestEventListener* listener) {
-  if (default_xml_generator_ != listener) {
-    // It is an error to pass this method a listener that is already in the
-    // list.
-    delete Release(default_xml_generator_);
-    default_xml_generator_ = listener;
-    if (listener != nullptr) Append(listener);
-  }
-}
-
-// Controls whether events will be forwarded by the repeater to the
-// listeners in the list.
-bool TestEventListeners::EventForwardingEnabled() const {
-  return repeater_->forwarding_enabled();
-}
-
-void TestEventListeners::SuppressEventForwarding() {
-  repeater_->set_forwarding_enabled(false);
-}
-
-// class UnitTest
-
-// Gets the singleton UnitTest object.  The first time this method is
-// called, a UnitTest object is constructed and returned.  Consecutive
-// calls will return the same object.
-//
-// We don't protect this under mutex_ as a user is not supposed to
-// call this before main() starts, from which point on the return
-// value will never change.
-UnitTest* UnitTest::GetInstance() {
-  // CodeGear C++Builder insists on a public destructor for the
-  // default implementation.  Use this implementation to keep good OO
-  // design with private destructor.
-
-#if defined(__BORLANDC__)
-  static UnitTest* const instance = new UnitTest;
-  return instance;
-#else
-  static UnitTest instance;
-  return &instance;
-#endif  // defined(__BORLANDC__)
-}
-
-// Gets the number of successful test suites.
-int UnitTest::successful_test_suite_count() const {
-  return impl()->successful_test_suite_count();
-}
-
-// Gets the number of failed test suites.
-int UnitTest::failed_test_suite_count() const {
-  return impl()->failed_test_suite_count();
-}
-
-// Gets the number of all test suites.
-int UnitTest::total_test_suite_count() const {
-  return impl()->total_test_suite_count();
-}
-
-// Gets the number of all test suites that contain at least one test
-// that should run.
-int UnitTest::test_suite_to_run_count() const {
-  return impl()->test_suite_to_run_count();
-}
-
-//  Legacy API is deprecated but still available
-#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
-int UnitTest::successful_test_case_count() const {
-  return impl()->successful_test_suite_count();
-}
-int UnitTest::failed_test_case_count() const {
-  return impl()->failed_test_suite_count();
-}
-int UnitTest::total_test_case_count() const {
-  return impl()->total_test_suite_count();
-}
-int UnitTest::test_case_to_run_count() const {
-  return impl()->test_suite_to_run_count();
-}
-#endif  //  GTEST_REMOVE_LEGACY_TEST_CASEAPI_
-
-// Gets the number of successful tests.
-int UnitTest::successful_test_count() const {
-  return impl()->successful_test_count();
-}
-
-// Gets the number of skipped tests.
-int UnitTest::skipped_test_count() const {
-  return impl()->skipped_test_count();
-}
-
-// Gets the number of failed tests.
-int UnitTest::failed_test_count() const { return impl()->failed_test_count(); }
-
-// Gets the number of disabled tests that will be reported in the XML report.
-int UnitTest::reportable_disabled_test_count() const {
-  return impl()->reportable_disabled_test_count();
-}
-
-// Gets the number of disabled tests.
-int UnitTest::disabled_test_count() const {
-  return impl()->disabled_test_count();
-}
-
-// Gets the number of tests to be printed in the XML report.
-int UnitTest::reportable_test_count() const {
-  return impl()->reportable_test_count();
-}
-
-// Gets the number of all tests.
-int UnitTest::total_test_count() const { return impl()->total_test_count(); }
-
-// Gets the number of tests that should run.
-int UnitTest::test_to_run_count() const { return impl()->test_to_run_count(); }
-
-// Gets the time of the test program start, in ms from the start of the
-// UNIX epoch.
-internal::TimeInMillis UnitTest::start_timestamp() const {
-  return impl()->start_timestamp();
-}
-
-// Gets the elapsed time, in milliseconds.
-internal::TimeInMillis UnitTest::elapsed_time() const {
-  return impl()->elapsed_time();
-}
-
-// Returns true if and only if the unit test passed (i.e. all test suites
-// passed).
-bool UnitTest::Passed() const { return impl()->Passed(); }
-
-// Returns true if and only if the unit test failed (i.e. some test suite
-// failed or something outside of all tests failed).
-bool UnitTest::Failed() const { return impl()->Failed(); }
-
-// Gets the i-th test suite among all the test suites. i can range from 0 to
-// total_test_suite_count() - 1. If i is not in that range, returns NULL.
-const TestSuite* UnitTest::GetTestSuite(int i) const {
-  return impl()->GetTestSuite(i);
-}
-
-//  Legacy API is deprecated but still available
-#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
-const TestCase* UnitTest::GetTestCase(int i) const {
-  return impl()->GetTestCase(i);
-}
-#endif  //  GTEST_REMOVE_LEGACY_TEST_CASEAPI_
-
-// Returns the TestResult containing information on test failures and
-// properties logged outside of individual test suites.
-const TestResult& UnitTest::ad_hoc_test_result() const {
-  return *impl()->ad_hoc_test_result();
-}
-
-// Gets the i-th test suite among all the test suites. i can range from 0 to
-// total_test_suite_count() - 1. If i is not in that range, returns NULL.
-TestSuite* UnitTest::GetMutableTestSuite(int i) {
-  return impl()->GetMutableSuiteCase(i);
-}
-
-// Returns the list of event listeners that can be used to track events
-// inside Google Test.
-TestEventListeners& UnitTest::listeners() { return *impl()->listeners(); }
-
-// Registers and returns a global test environment.  When a test
-// program is run, all global test environments will be set-up in the
-// order they were registered.  After all tests in the program have
-// finished, all global test environments will be torn-down in the
-// *reverse* order they were registered.
-//
-// The UnitTest object takes ownership of the given environment.
-//
-// We don't protect this under mutex_, as we only support calling it
-// from the main thread.
-Environment* UnitTest::AddEnvironment(Environment* env) {
-  if (env == nullptr) {
-    return nullptr;
-  }
-
-  impl_->environments().push_back(env);
-  return env;
-}
-
-// Adds a TestPartResult to the current TestResult object.  All Google Test
-// assertion macros (e.g. ASSERT_TRUE, EXPECT_EQ, etc) eventually call
-// this to report their results.  The user code should use the
-// assertion macros instead of calling this directly.
-void UnitTest::AddTestPartResult(TestPartResult::Type result_type,
-                                 const char* file_name, int line_number,
-                                 const std::string& message,
-                                 const std::string& os_stack_trace)
-    GTEST_LOCK_EXCLUDED_(mutex_) {
-  Message msg;
-  msg << message;
-
-  internal::MutexLock lock(&mutex_);
-  if (impl_->gtest_trace_stack().size() > 0) {
-    msg << "\n" << GTEST_NAME_ << " trace:";
-
-    for (size_t i = impl_->gtest_trace_stack().size(); i > 0; --i) {
-      const internal::TraceInfo& trace = impl_->gtest_trace_stack()[i - 1];
-      msg << "\n"
-          << internal::FormatFileLocation(trace.file, trace.line) << " "
-          << trace.message;
-    }
-  }
-
-  if (os_stack_trace.c_str() != nullptr && !os_stack_trace.empty()) {
-    msg << internal::kStackTraceMarker << os_stack_trace;
-  }
-
-  const TestPartResult result = TestPartResult(
-      result_type, file_name, line_number, msg.GetString().c_str());
-  impl_->GetTestPartResultReporterForCurrentThread()->ReportTestPartResult(
-      result);
-
-  if (result_type != TestPartResult::kSuccess &&
-      result_type != TestPartResult::kSkip) {
-    // gtest_break_on_failure takes precedence over
-    // gtest_throw_on_failure.  This allows a user to set the latter
-    // in the code (perhaps in order to use Google Test assertions
-    // with another testing framework) and specify the former on the
-    // command line for debugging.
-    if (GTEST_FLAG_GET(break_on_failure)) {
-#if GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_PHONE && !GTEST_OS_WINDOWS_RT
-      // Using DebugBreak on Windows allows gtest to still break into a debugger
-      // when a failure happens and both the --gtest_break_on_failure and
-      // the --gtest_catch_exceptions flags are specified.
-      DebugBreak();
-#elif (!defined(__native_client__)) &&            \
-    ((defined(__clang__) || defined(__GNUC__)) && \
-     (defined(__x86_64__) || defined(__i386__)))
-      // with clang/gcc we can achieve the same effect on x86 by invoking int3
-      asm("int3");
-#elif GTEST_HAS_BUILTIN(__builtin_trap)
-      __builtin_trap();
-#elif defined(SIGTRAP)
-      raise(SIGTRAP);
-#else
-      // Dereference nullptr through a volatile pointer to prevent the compiler
-      // from removing. We use this rather than abort() or __builtin_trap() for
-      // portability: some debuggers don't correctly trap abort().
-      *static_cast<volatile int*>(nullptr) = 1;
-#endif  // GTEST_OS_WINDOWS
-    } else if (GTEST_FLAG_GET(throw_on_failure)) {
-#if GTEST_HAS_EXCEPTIONS
-      throw internal::GoogleTestFailureException(result);
-#else
-      // We cannot call abort() as it generates a pop-up in debug mode
-      // that cannot be suppressed in VC 7.1 or below.
-      exit(1);
-#endif
-    }
-  }
-}
-
-// Adds a TestProperty to the current TestResult object when invoked from
-// inside a test, to current TestSuite's ad_hoc_test_result_ when invoked
-// from SetUpTestSuite or TearDownTestSuite, or to the global property set
-// when invoked elsewhere.  If the result already contains a property with
-// the same key, the value will be updated.
-void UnitTest::RecordProperty(const std::string& key,
-                              const std::string& value) {
-  impl_->RecordProperty(TestProperty(key, value));
-}
-
-// Runs all tests in this UnitTest object and prints the result.
-// Returns 0 if successful, or 1 otherwise.
-//
-// We don't protect this under mutex_, as we only support calling it
-// from the main thread.
-int UnitTest::Run() {
-#if GTEST_HAS_DEATH_TEST
-  const bool in_death_test_child_process =
-      GTEST_FLAG_GET(internal_run_death_test).length() > 0;
-
-  // Google Test implements this protocol for catching that a test
-  // program exits before returning control to Google Test:
-  //
-  //   1. Upon start, Google Test creates a file whose absolute path
-  //      is specified by the environment variable
-  //      TEST_PREMATURE_EXIT_FILE.
-  //   2. When Google Test has finished its work, it deletes the file.
-  //
-  // This allows a test runner to set TEST_PREMATURE_EXIT_FILE before
-  // running a Google-Test-based test program and check the existence
-  // of the file at the end of the test execution to see if it has
-  // exited prematurely.
-
-  // If we are in the child process of a death test, don't
-  // create/delete the premature exit file, as doing so is unnecessary
-  // and will confuse the parent process.  Otherwise, create/delete
-  // the file upon entering/leaving this function.  If the program
-  // somehow exits before this function has a chance to return, the
-  // premature-exit file will be left undeleted, causing a test runner
-  // that understands the premature-exit-file protocol to report the
-  // test as having failed.
-  const internal::ScopedPrematureExitFile premature_exit_file(
-      in_death_test_child_process
-          ? nullptr
-          : internal::posix::GetEnv("TEST_PREMATURE_EXIT_FILE"));
-#endif  // GTEST_HAS_DEATH_TEST
-
-  // Captures the value of GTEST_FLAG(catch_exceptions).  This value will be
-  // used for the duration of the program.
-  impl()->set_catch_exceptions(GTEST_FLAG_GET(catch_exceptions));
-
-#if GTEST_OS_WINDOWS
-  // Either the user wants Google Test to catch exceptions thrown by the
-  // tests or this is executing in the context of death test child
-  // process. In either case the user does not want to see pop-up dialogs
-  // about crashes - they are expected.
-  if (impl()->catch_exceptions() || in_death_test_child_process) {
-#if !GTEST_OS_WINDOWS_MOBILE && !GTEST_OS_WINDOWS_PHONE && !GTEST_OS_WINDOWS_RT
-    // SetErrorMode doesn't exist on CE.
-    SetErrorMode(SEM_FAILCRITICALERRORS | SEM_NOALIGNMENTFAULTEXCEPT |
-                 SEM_NOGPFAULTERRORBOX | SEM_NOOPENFILEERRORBOX);
-#endif  // !GTEST_OS_WINDOWS_MOBILE
-
-#if (defined(_MSC_VER) || GTEST_OS_WINDOWS_MINGW) && !GTEST_OS_WINDOWS_MOBILE
-    // Death test children can be terminated with _abort().  On Windows,
-    // _abort() can show a dialog with a warning message.  This forces the
-    // abort message to go to stderr instead.
-    _set_error_mode(_OUT_TO_STDERR);
-#endif
-
-#if defined(_MSC_VER) && !GTEST_OS_WINDOWS_MOBILE
-    // In the debug version, Visual Studio pops up a separate dialog
-    // offering a choice to debug the aborted program. We need to suppress
-    // this dialog or it will pop up for every EXPECT/ASSERT_DEATH statement
-    // executed. Google Test will notify the user of any unexpected
-    // failure via stderr.
-    if (!GTEST_FLAG_GET(break_on_failure))
-      _set_abort_behavior(
-          0x0,                                    // Clear the following flags:
-          _WRITE_ABORT_MSG | _CALL_REPORTFAULT);  // pop-up window, core dump.
-
-    // In debug mode, the Windows CRT can crash with an assertion over invalid
-    // input (e.g. passing an invalid file descriptor).  The default handling
-    // for these assertions is to pop up a dialog and wait for user input.
-    // Instead ask the CRT to dump such assertions to stderr non-interactively.
-    if (!IsDebuggerPresent()) {
-      (void)_CrtSetReportMode(_CRT_ASSERT,
-                              _CRTDBG_MODE_FILE | _CRTDBG_MODE_DEBUG);
-      (void)_CrtSetReportFile(_CRT_ASSERT, _CRTDBG_FILE_STDERR);
-    }
-#endif
-  }
-#endif  // GTEST_OS_WINDOWS
-
-  return internal::HandleExceptionsInMethodIfSupported(
-             impl(), &internal::UnitTestImpl::RunAllTests,
-             "auxiliary test code (environments or event listeners)")
-             ? 0
-             : 1;
-}
-
-#if GTEST_HAS_FILE_SYSTEM
-// Returns the working directory when the first TEST() or TEST_F() was
-// executed.
-const char* UnitTest::original_working_dir() const {
-  return impl_->original_working_dir_.c_str();
-}
-#endif  // GTEST_HAS_FILE_SYSTEM
-
-// Returns the TestSuite object for the test that's currently running,
-// or NULL if no test is running.
-const TestSuite* UnitTest::current_test_suite() const
-    GTEST_LOCK_EXCLUDED_(mutex_) {
-  internal::MutexLock lock(&mutex_);
-  return impl_->current_test_suite();
-}
-
-// Legacy API is still available but deprecated
-#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
-const TestCase* UnitTest::current_test_case() const
-    GTEST_LOCK_EXCLUDED_(mutex_) {
-  internal::MutexLock lock(&mutex_);
-  return impl_->current_test_suite();
-}
-#endif
-
-// Returns the TestInfo object for the test that's currently running,
-// or NULL if no test is running.
-const TestInfo* UnitTest::current_test_info() const
-    GTEST_LOCK_EXCLUDED_(mutex_) {
-  internal::MutexLock lock(&mutex_);
-  return impl_->current_test_info();
-}
-
-// Returns the random seed used at the start of the current test run.
-int UnitTest::random_seed() const { return impl_->random_seed(); }
-
-// Returns ParameterizedTestSuiteRegistry object used to keep track of
-// value-parameterized tests and instantiate and register them.
-internal::ParameterizedTestSuiteRegistry&
-UnitTest::parameterized_test_registry() GTEST_LOCK_EXCLUDED_(mutex_) {
-  return impl_->parameterized_test_registry();
-}
-
-// Creates an empty UnitTest.
-UnitTest::UnitTest() { impl_ = new internal::UnitTestImpl(this); }
-
-// Destructor of UnitTest.
-UnitTest::~UnitTest() { delete impl_; }
-
-// Pushes a trace defined by SCOPED_TRACE() on to the per-thread
-// Google Test trace stack.
-void UnitTest::PushGTestTrace(const internal::TraceInfo& trace)
-    GTEST_LOCK_EXCLUDED_(mutex_) {
-  internal::MutexLock lock(&mutex_);
-  impl_->gtest_trace_stack().push_back(trace);
-}
-
-// Pops a trace from the per-thread Google Test trace stack.
-void UnitTest::PopGTestTrace() GTEST_LOCK_EXCLUDED_(mutex_) {
-  internal::MutexLock lock(&mutex_);
-  impl_->gtest_trace_stack().pop_back();
-}
-
-namespace internal {
-
-UnitTestImpl::UnitTestImpl(UnitTest* parent)
-    : parent_(parent),
-      GTEST_DISABLE_MSC_WARNINGS_PUSH_(4355 /* using this in initializer */)
-          default_global_test_part_result_reporter_(this),
-      default_per_thread_test_part_result_reporter_(this),
-      GTEST_DISABLE_MSC_WARNINGS_POP_() global_test_part_result_reporter_(
-          &default_global_test_part_result_reporter_),
-      per_thread_test_part_result_reporter_(
-          &default_per_thread_test_part_result_reporter_),
-      parameterized_test_registry_(),
-      parameterized_tests_registered_(false),
-      last_death_test_suite_(-1),
-      current_test_suite_(nullptr),
-      current_test_info_(nullptr),
-      ad_hoc_test_result_(),
-      os_stack_trace_getter_(nullptr),
-      post_flag_parse_init_performed_(false),
-      random_seed_(0),  // Will be overridden by the flag before first use.
-      random_(0),       // Will be reseeded before first use.
-      start_timestamp_(0),
-      elapsed_time_(0),
-#if GTEST_HAS_DEATH_TEST
-      death_test_factory_(new DefaultDeathTestFactory),
-#endif
-      // Will be overridden by the flag before first use.
-      catch_exceptions_(false) {
-  listeners()->SetDefaultResultPrinter(new PrettyUnitTestResultPrinter);
-}
-
-UnitTestImpl::~UnitTestImpl() {
-  // Deletes every TestSuite.
-  ForEach(test_suites_, internal::Delete<TestSuite>);
-
-  // Deletes every Environment.
-  ForEach(environments_, internal::Delete<Environment>);
-
-  delete os_stack_trace_getter_;
-}
-
-// Adds a TestProperty to the current TestResult object when invoked in a
-// context of a test, to current test suite's ad_hoc_test_result when invoke
-// from SetUpTestSuite/TearDownTestSuite, or to the global property set
-// otherwise.  If the result already contains a property with the same key,
-// the value will be updated.
-void UnitTestImpl::RecordProperty(const TestProperty& test_property) {
-  std::string xml_element;
-  TestResult* test_result;  // TestResult appropriate for property recording.
-
-  if (current_test_info_ != nullptr) {
-    xml_element = "testcase";
-    test_result = &(current_test_info_->result_);
-  } else if (current_test_suite_ != nullptr) {
-    xml_element = "testsuite";
-    test_result = &(current_test_suite_->ad_hoc_test_result_);
-  } else {
-    xml_element = "testsuites";
-    test_result = &ad_hoc_test_result_;
-  }
-  test_result->RecordProperty(xml_element, test_property);
-}
-
-#if GTEST_HAS_DEATH_TEST
-// Disables event forwarding if the control is currently in a death test
-// subprocess. Must not be called before InitGoogleTest.
-void UnitTestImpl::SuppressTestEventsIfInSubprocess() {
-  if (internal_run_death_test_flag_.get() != nullptr)
-    listeners()->SuppressEventForwarding();
-}
-#endif  // GTEST_HAS_DEATH_TEST
-
-// Initializes event listeners performing XML output as specified by
-// UnitTestOptions. Must not be called before InitGoogleTest.
-void UnitTestImpl::ConfigureXmlOutput() {
-  const std::string& output_format = UnitTestOptions::GetOutputFormat();
-#if GTEST_HAS_FILE_SYSTEM
-  if (output_format == "xml") {
-    listeners()->SetDefaultXmlGenerator(new XmlUnitTestResultPrinter(
-        UnitTestOptions::GetAbsolutePathToOutputFile().c_str()));
-  } else if (output_format == "json") {
-    listeners()->SetDefaultXmlGenerator(new JsonUnitTestResultPrinter(
-        UnitTestOptions::GetAbsolutePathToOutputFile().c_str()));
-  } else if (output_format != "") {
-    GTEST_LOG_(WARNING) << "WARNING: unrecognized output format \""
-                        << output_format << "\" ignored.";
-  }
-#else
-  GTEST_LOG_(ERROR) << "ERROR: alternative output formats require "
-                    << "GTEST_HAS_FILE_SYSTEM to be enabled";
-#endif  // GTEST_HAS_FILE_SYSTEM
-}
-
-#if GTEST_CAN_STREAM_RESULTS_
-// Initializes event listeners for streaming test results in string form.
-// Must not be called before InitGoogleTest.
-void UnitTestImpl::ConfigureStreamingOutput() {
-  const std::string& target = GTEST_FLAG_GET(stream_result_to);
-  if (!target.empty()) {
-    const size_t pos = target.find(':');
-    if (pos != std::string::npos) {
-      listeners()->Append(
-          new StreamingListener(target.substr(0, pos), target.substr(pos + 1)));
-    } else {
-      GTEST_LOG_(WARNING) << "unrecognized streaming target \"" << target
-                          << "\" ignored.";
-    }
-  }
-}
-#endif  // GTEST_CAN_STREAM_RESULTS_
-
-// Performs initialization dependent upon flag values obtained in
-// ParseGoogleTestFlagsOnly.  Is called from InitGoogleTest after the call to
-// ParseGoogleTestFlagsOnly.  In case a user neglects to call InitGoogleTest
-// this function is also called from RunAllTests.  Since this function can be
-// called more than once, it has to be idempotent.
-void UnitTestImpl::PostFlagParsingInit() {
-  // Ensures that this function does not execute more than once.
-  if (!post_flag_parse_init_performed_) {
-    post_flag_parse_init_performed_ = true;
-
-#if defined(GTEST_CUSTOM_TEST_EVENT_LISTENER_)
-    // Register to send notifications about key process state changes.
-    listeners()->Append(new GTEST_CUSTOM_TEST_EVENT_LISTENER_());
-#endif  // defined(GTEST_CUSTOM_TEST_EVENT_LISTENER_)
-
-#if GTEST_HAS_DEATH_TEST
-    InitDeathTestSubprocessControlInfo();
-    SuppressTestEventsIfInSubprocess();
-#endif  // GTEST_HAS_DEATH_TEST
-
-    // Registers parameterized tests. This makes parameterized tests
-    // available to the UnitTest reflection API without running
-    // RUN_ALL_TESTS.
-    RegisterParameterizedTests();
-
-    // Configures listeners for XML output. This makes it possible for users
-    // to shut down the default XML output before invoking RUN_ALL_TESTS.
-    ConfigureXmlOutput();
-
-    if (GTEST_FLAG_GET(brief)) {
-      listeners()->SetDefaultResultPrinter(new BriefUnitTestResultPrinter);
-    }
-
-#if GTEST_CAN_STREAM_RESULTS_
-    // Configures listeners for streaming test results to the specified server.
-    ConfigureStreamingOutput();
-#endif  // GTEST_CAN_STREAM_RESULTS_
-
-#if GTEST_HAS_ABSL
-    if (GTEST_FLAG_GET(install_failure_signal_handler)) {
-      absl::FailureSignalHandlerOptions options;
-      absl::InstallFailureSignalHandler(options);
-    }
-#endif  // GTEST_HAS_ABSL
-  }
-}
-
-// A predicate that checks the name of a TestSuite against a known
-// value.
-//
-// This is used for implementation of the UnitTest class only.  We put
-// it in the anonymous namespace to prevent polluting the outer
-// namespace.
-//
-// TestSuiteNameIs is copyable.
-class TestSuiteNameIs {
- public:
-  // Constructor.
-  explicit TestSuiteNameIs(const std::string& name) : name_(name) {}
-
-  // Returns true if and only if the name of test_suite matches name_.
-  bool operator()(const TestSuite* test_suite) const {
-    return test_suite != nullptr &&
-           strcmp(test_suite->name(), name_.c_str()) == 0;
-  }
-
- private:
-  std::string name_;
-};
-
-// Finds and returns a TestSuite with the given name.  If one doesn't
-// exist, creates one and returns it.  It's the CALLER'S
-// RESPONSIBILITY to ensure that this function is only called WHEN THE
-// TESTS ARE NOT SHUFFLED.
-//
-// Arguments:
-//
-//   test_suite_name: name of the test suite
-//   type_param:      the name of the test suite's type parameter, or NULL if
-//                    this is not a typed or a type-parameterized test suite.
-//   set_up_tc:       pointer to the function that sets up the test suite
-//   tear_down_tc:    pointer to the function that tears down the test suite
-TestSuite* UnitTestImpl::GetTestSuite(
-    const char* test_suite_name, const char* type_param,
-    internal::SetUpTestSuiteFunc set_up_tc,
-    internal::TearDownTestSuiteFunc tear_down_tc) {
-  // Can we find a TestSuite with the given name?
-  const auto test_suite =
-      std::find_if(test_suites_.rbegin(), test_suites_.rend(),
-                   TestSuiteNameIs(test_suite_name));
-
-  if (test_suite != test_suites_.rend()) return *test_suite;
-
-  // No.  Let's create one.
-  auto* const new_test_suite =
-      new TestSuite(test_suite_name, type_param, set_up_tc, tear_down_tc);
-
-  const UnitTestFilter death_test_suite_filter(kDeathTestSuiteFilter);
-  // Is this a death test suite?
-  if (death_test_suite_filter.MatchesName(test_suite_name)) {
-    // Yes.  Inserts the test suite after the last death test suite
-    // defined so far.  This only works when the test suites haven't
-    // been shuffled.  Otherwise we may end up running a death test
-    // after a non-death test.
-    ++last_death_test_suite_;
-    test_suites_.insert(test_suites_.begin() + last_death_test_suite_,
-                        new_test_suite);
-  } else {
-    // No.  Appends to the end of the list.
-    test_suites_.push_back(new_test_suite);
-  }
-
-  test_suite_indices_.push_back(static_cast<int>(test_suite_indices_.size()));
-  return new_test_suite;
-}
-
-// Helpers for setting up / tearing down the given environment.  They
-// are for use in the ForEach() function.
-static void SetUpEnvironment(Environment* env) { env->SetUp(); }
-static void TearDownEnvironment(Environment* env) { env->TearDown(); }
-
-// Runs all tests in this UnitTest object, prints the result, and
-// returns true if all tests are successful.  If any exception is
-// thrown during a test, the test is considered to be failed, but the
-// rest of the tests will still be run.
-//
-// When parameterized tests are enabled, it expands and registers
-// parameterized tests first in RegisterParameterizedTests().
-// All other functions called from RunAllTests() may safely assume that
-// parameterized tests are ready to be counted and run.
-bool UnitTestImpl::RunAllTests() {
-  // True if and only if Google Test is initialized before RUN_ALL_TESTS() is
-  // called.
-  const bool gtest_is_initialized_before_run_all_tests = GTestIsInitialized();
-
-  // Do not run any test if the --help flag was specified.
-  if (g_help_flag) return true;
-
-  // Repeats the call to the post-flag parsing initialization in case the
-  // user didn't call InitGoogleTest.
-  PostFlagParsingInit();
-
-#if GTEST_HAS_FILE_SYSTEM
-  // Even if sharding is not on, test runners may want to use the
-  // GTEST_SHARD_STATUS_FILE to query whether the test supports the sharding
-  // protocol.
-  internal::WriteToShardStatusFileIfNeeded();
-#endif  // GTEST_HAS_FILE_SYSTEM
-
-  // True if and only if we are in a subprocess for running a thread-safe-style
-  // death test.
-  bool in_subprocess_for_death_test = false;
-
-#if GTEST_HAS_DEATH_TEST
-  in_subprocess_for_death_test =
-      (internal_run_death_test_flag_.get() != nullptr);
-#if defined(GTEST_EXTRA_DEATH_TEST_CHILD_SETUP_)
-  if (in_subprocess_for_death_test) {
-    GTEST_EXTRA_DEATH_TEST_CHILD_SETUP_();
-  }
-#endif  // defined(GTEST_EXTRA_DEATH_TEST_CHILD_SETUP_)
-#endif  // GTEST_HAS_DEATH_TEST
-
-  const bool should_shard = ShouldShard(kTestTotalShards, kTestShardIndex,
-                                        in_subprocess_for_death_test);
-
-  // Compares the full test names with the filter to decide which
-  // tests to run.
-  const bool has_tests_to_run =
-      FilterTests(should_shard ? HONOR_SHARDING_PROTOCOL
-                               : IGNORE_SHARDING_PROTOCOL) > 0;
-
-  // Lists the tests and exits if the --gtest_list_tests flag was specified.
-  if (GTEST_FLAG_GET(list_tests)) {
-    // This must be called *after* FilterTests() has been called.
-    ListTestsMatchingFilter();
-    return true;
-  }
-
-  random_seed_ = GetRandomSeedFromFlag(GTEST_FLAG_GET(random_seed));
-
-  // True if and only if at least one test has failed.
-  bool failed = false;
-
-  TestEventListener* repeater = listeners()->repeater();
-
-  start_timestamp_ = GetTimeInMillis();
-  repeater->OnTestProgramStart(*parent_);
-
-  // How many times to repeat the tests?  We don't want to repeat them
-  // when we are inside the subprocess of a death test.
-  const int repeat = in_subprocess_for_death_test ? 1 : GTEST_FLAG_GET(repeat);
-
-  // Repeats forever if the repeat count is negative.
-  const bool gtest_repeat_forever = repeat < 0;
-
-  // Should test environments be set up and torn down for each repeat, or only
-  // set up on the first and torn down on the last iteration? If there is no
-  // "last" iteration because the tests will repeat forever, always recreate the
-  // environments to avoid leaks in case one of the environments is using
-  // resources that are external to this process. Without this check there would
-  // be no way to clean up those external resources automatically.
-  const bool recreate_environments_when_repeating =
-      GTEST_FLAG_GET(recreate_environments_when_repeating) ||
-      gtest_repeat_forever;
-
-  for (int i = 0; gtest_repeat_forever || i != repeat; i++) {
-    // We want to preserve failures generated by ad-hoc test
-    // assertions executed before RUN_ALL_TESTS().
-    ClearNonAdHocTestResult();
-
-    Timer timer;
-
-    // Shuffles test suites and tests if requested.
-    if (has_tests_to_run && GTEST_FLAG_GET(shuffle)) {
-      random()->Reseed(static_cast<uint32_t>(random_seed_));
-      // This should be done before calling OnTestIterationStart(),
-      // such that a test event listener can see the actual test order
-      // in the event.
-      ShuffleTests();
-    }
-
-    // Tells the unit test event listeners that the tests are about to start.
-    repeater->OnTestIterationStart(*parent_, i);
-
-    // Runs each test suite if there is at least one test to run.
-    if (has_tests_to_run) {
-      // Sets up all environments beforehand. If test environments aren't
-      // recreated for each iteration, only do so on the first iteration.
-      if (i == 0 || recreate_environments_when_repeating) {
-        repeater->OnEnvironmentsSetUpStart(*parent_);
-        ForEach(environments_, SetUpEnvironment);
-        repeater->OnEnvironmentsSetUpEnd(*parent_);
-      }
-
-      // Runs the tests only if there was no fatal failure or skip triggered
-      // during global set-up.
-      if (Test::IsSkipped()) {
-        // Emit diagnostics when global set-up calls skip, as it will not be
-        // emitted by default.
-        TestResult& test_result =
-            *internal::GetUnitTestImpl()->current_test_result();
-        for (int j = 0; j < test_result.total_part_count(); ++j) {
-          const TestPartResult& test_part_result =
-              test_result.GetTestPartResult(j);
-          if (test_part_result.type() == TestPartResult::kSkip) {
-            const std::string& result = test_part_result.message();
-            printf("%s\n", result.c_str());
-          }
-        }
-        fflush(stdout);
-      } else if (!Test::HasFatalFailure()) {
-        for (int test_index = 0; test_index < total_test_suite_count();
-             test_index++) {
-          GetMutableSuiteCase(test_index)->Run();
-          if (GTEST_FLAG_GET(fail_fast) &&
-              GetMutableSuiteCase(test_index)->Failed()) {
-            for (int j = test_index + 1; j < total_test_suite_count(); j++) {
-              GetMutableSuiteCase(j)->Skip();
-            }
-            break;
-          }
-        }
-      } else if (Test::HasFatalFailure()) {
-        // If there was a fatal failure during the global setup then we know we
-        // aren't going to run any tests. Explicitly mark all of the tests as
-        // skipped to make this obvious in the output.
-        for (int test_index = 0; test_index < total_test_suite_count();
-             test_index++) {
-          GetMutableSuiteCase(test_index)->Skip();
-        }
-      }
-
-      // Tears down all environments in reverse order afterwards. If test
-      // environments aren't recreated for each iteration, only do so on the
-      // last iteration.
-      if (i == repeat - 1 || recreate_environments_when_repeating) {
-        repeater->OnEnvironmentsTearDownStart(*parent_);
-        std::for_each(environments_.rbegin(), environments_.rend(),
-                      TearDownEnvironment);
-        repeater->OnEnvironmentsTearDownEnd(*parent_);
-      }
-    }
-
-    elapsed_time_ = timer.Elapsed();
-
-    // Tells the unit test event listener that the tests have just finished.
-    repeater->OnTestIterationEnd(*parent_, i);
-
-    // Gets the result and clears it.
-    if (!Passed()) {
-      failed = true;
-    }
-
-    // Restores the original test order after the iteration.  This
-    // allows the user to quickly repro a failure that happens in the
-    // N-th iteration without repeating the first (N - 1) iterations.
-    // This is not enclosed in "if (GTEST_FLAG(shuffle)) { ... }", in
-    // case the user somehow changes the value of the flag somewhere
-    // (it's always safe to unshuffle the tests).
-    UnshuffleTests();
-
-    if (GTEST_FLAG_GET(shuffle)) {
-      // Picks a new random seed for each iteration.
-      random_seed_ = GetNextRandomSeed(random_seed_);
-    }
-  }
-
-  repeater->OnTestProgramEnd(*parent_);
-
-  if (!gtest_is_initialized_before_run_all_tests) {
-    ColoredPrintf(
-        GTestColor::kRed,
-        "\nIMPORTANT NOTICE - DO NOT IGNORE:\n"
-        "This test program did NOT call " GTEST_INIT_GOOGLE_TEST_NAME_
-        "() before calling RUN_ALL_TESTS(). This is INVALID. Soon " GTEST_NAME_
-        " will start to enforce the valid usage. "
-        "Please fix it ASAP, or IT WILL START TO FAIL.\n");  // NOLINT
-#if GTEST_FOR_GOOGLE_
-    ColoredPrintf(GTestColor::kRed,
-                  "For more details, see http://wiki/Main/ValidGUnitMain.\n");
-#endif  // GTEST_FOR_GOOGLE_
-  }
-
-  return !failed;
-}
-
-#if GTEST_HAS_FILE_SYSTEM
-// Reads the GTEST_SHARD_STATUS_FILE environment variable, and creates the file
-// if the variable is present. If a file already exists at this location, this
-// function will write over it. If the variable is present, but the file cannot
-// be created, prints an error and exits.
-void WriteToShardStatusFileIfNeeded() {
-  const char* const test_shard_file = posix::GetEnv(kTestShardStatusFile);
-  if (test_shard_file != nullptr) {
-    FILE* const file = posix::FOpen(test_shard_file, "w");
-    if (file == nullptr) {
-      ColoredPrintf(GTestColor::kRed,
-                    "Could not write to the test shard status file \"%s\" "
-                    "specified by the %s environment variable.\n",
-                    test_shard_file, kTestShardStatusFile);
-      fflush(stdout);
-      exit(EXIT_FAILURE);
-    }
-    fclose(file);
-  }
-}
-#endif  // GTEST_HAS_FILE_SYSTEM
-
-// Checks whether sharding is enabled by examining the relevant
-// environment variable values. If the variables are present,
-// but inconsistent (i.e., shard_index >= total_shards), prints
-// an error and exits. If in_subprocess_for_death_test, sharding is
-// disabled because it must only be applied to the original test
-// process. Otherwise, we could filter out death tests we intended to execute.
-bool ShouldShard(const char* total_shards_env, const char* shard_index_env,
-                 bool in_subprocess_for_death_test) {
-  if (in_subprocess_for_death_test) {
-    return false;
-  }
-
-  const int32_t total_shards = Int32FromEnvOrDie(total_shards_env, -1);
-  const int32_t shard_index = Int32FromEnvOrDie(shard_index_env, -1);
-
-  if (total_shards == -1 && shard_index == -1) {
-    return false;
-  } else if (total_shards == -1 && shard_index != -1) {
-    const Message msg = Message() << "Invalid environment variables: you have "
-                                  << kTestShardIndex << " = " << shard_index
-                                  << ", but have left " << kTestTotalShards
-                                  << " unset.\n";
-    ColoredPrintf(GTestColor::kRed, "%s", msg.GetString().c_str());
-    fflush(stdout);
-    exit(EXIT_FAILURE);
-  } else if (total_shards != -1 && shard_index == -1) {
-    const Message msg = Message()
-                        << "Invalid environment variables: you have "
-                        << kTestTotalShards << " = " << total_shards
-                        << ", but have left " << kTestShardIndex << " unset.\n";
-    ColoredPrintf(GTestColor::kRed, "%s", msg.GetString().c_str());
-    fflush(stdout);
-    exit(EXIT_FAILURE);
-  } else if (shard_index < 0 || shard_index >= total_shards) {
-    const Message msg =
-        Message() << "Invalid environment variables: we require 0 <= "
-                  << kTestShardIndex << " < " << kTestTotalShards
-                  << ", but you have " << kTestShardIndex << "=" << shard_index
-                  << ", " << kTestTotalShards << "=" << total_shards << ".\n";
-    ColoredPrintf(GTestColor::kRed, "%s", msg.GetString().c_str());
-    fflush(stdout);
-    exit(EXIT_FAILURE);
-  }
-
-  return total_shards > 1;
-}
-
-// Parses the environment variable var as an Int32. If it is unset,
-// returns default_val. If it is not an Int32, prints an error
-// and aborts.
-int32_t Int32FromEnvOrDie(const char* var, int32_t default_val) {
-  const char* str_val = posix::GetEnv(var);
-  if (str_val == nullptr) {
-    return default_val;
-  }
-
-  int32_t result;
-  if (!ParseInt32(Message() << "The value of environment variable " << var,
-                  str_val, &result)) {
-    exit(EXIT_FAILURE);
-  }
-  return result;
-}
-
-// Given the total number of shards, the shard index, and the test id,
-// returns true if and only if the test should be run on this shard. The test id
-// is some arbitrary but unique non-negative integer assigned to each test
-// method. Assumes that 0 <= shard_index < total_shards.
-bool ShouldRunTestOnShard(int total_shards, int shard_index, int test_id) {
-  return (test_id % total_shards) == shard_index;
-}
-
-// Compares the name of each test with the user-specified filter to
-// decide whether the test should be run, then records the result in
-// each TestSuite and TestInfo object.
-// If shard_tests == true, further filters tests based on sharding
-// variables in the environment - see
-// https://github.com/google/googletest/blob/main/docs/advanced.md
-// . Returns the number of tests that should run.
-int UnitTestImpl::FilterTests(ReactionToSharding shard_tests) {
-  const int32_t total_shards = shard_tests == HONOR_SHARDING_PROTOCOL
-                                   ? Int32FromEnvOrDie(kTestTotalShards, -1)
-                                   : -1;
-  const int32_t shard_index = shard_tests == HONOR_SHARDING_PROTOCOL
-                                  ? Int32FromEnvOrDie(kTestShardIndex, -1)
-                                  : -1;
-
-  const PositiveAndNegativeUnitTestFilter gtest_flag_filter(
-      GTEST_FLAG_GET(filter));
-  const UnitTestFilter disable_test_filter(kDisableTestFilter);
-  // num_runnable_tests are the number of tests that will
-  // run across all shards (i.e., match filter and are not disabled).
-  // num_selected_tests are the number of tests to be run on
-  // this shard.
-  int num_runnable_tests = 0;
-  int num_selected_tests = 0;
-  for (auto* test_suite : test_suites_) {
-    const std::string& test_suite_name = test_suite->name();
-    test_suite->set_should_run(false);
-
-    for (size_t j = 0; j < test_suite->test_info_list().size(); j++) {
-      TestInfo* const test_info = test_suite->test_info_list()[j];
-      const std::string test_name(test_info->name());
-      // A test is disabled if test suite name or test name matches
-      // kDisableTestFilter.
-      const bool is_disabled =
-          disable_test_filter.MatchesName(test_suite_name) ||
-          disable_test_filter.MatchesName(test_name);
-      test_info->is_disabled_ = is_disabled;
-
-      const bool matches_filter =
-          gtest_flag_filter.MatchesTest(test_suite_name, test_name);
-      test_info->matches_filter_ = matches_filter;
-
-      const bool is_runnable =
-          (GTEST_FLAG_GET(also_run_disabled_tests) || !is_disabled) &&
-          matches_filter;
-
-      const bool is_in_another_shard =
-          shard_tests != IGNORE_SHARDING_PROTOCOL &&
-          !ShouldRunTestOnShard(total_shards, shard_index, num_runnable_tests);
-      test_info->is_in_another_shard_ = is_in_another_shard;
-      const bool is_selected = is_runnable && !is_in_another_shard;
-
-      num_runnable_tests += is_runnable;
-      num_selected_tests += is_selected;
-
-      test_info->should_run_ = is_selected;
-      test_suite->set_should_run(test_suite->should_run() || is_selected);
-    }
-  }
-  return num_selected_tests;
-}
-
-// Prints the given C-string on a single line by replacing all '\n'
-// characters with string "\\n".  If the output takes more than
-// max_length characters, only prints the first max_length characters
-// and "...".
-static void PrintOnOneLine(const char* str, int max_length) {
-  if (str != nullptr) {
-    for (int i = 0; *str != '\0'; ++str) {
-      if (i >= max_length) {
-        printf("...");
-        break;
-      }
-      if (*str == '\n') {
-        printf("\\n");
-        i += 2;
-      } else {
-        printf("%c", *str);
-        ++i;
-      }
-    }
-  }
-}
-
-// Prints the names of the tests matching the user-specified filter flag.
-void UnitTestImpl::ListTestsMatchingFilter() {
-  // Print at most this many characters for each type/value parameter.
-  const int kMaxParamLength = 250;
-
-  for (auto* test_suite : test_suites_) {
-    bool printed_test_suite_name = false;
-
-    for (size_t j = 0; j < test_suite->test_info_list().size(); j++) {
-      const TestInfo* const test_info = test_suite->test_info_list()[j];
-      if (test_info->matches_filter_) {
-        if (!printed_test_suite_name) {
-          printed_test_suite_name = true;
-          printf("%s.", test_suite->name());
-          if (test_suite->type_param() != nullptr) {
-            printf("  # %s = ", kTypeParamLabel);
-            // We print the type parameter on a single line to make
-            // the output easy to parse by a program.
-            PrintOnOneLine(test_suite->type_param(), kMaxParamLength);
-          }
-          printf("\n");
-        }
-        printf("  %s", test_info->name());
-        if (test_info->value_param() != nullptr) {
-          printf("  # %s = ", kValueParamLabel);
-          // We print the value parameter on a single line to make the
-          // output easy to parse by a program.
-          PrintOnOneLine(test_info->value_param(), kMaxParamLength);
-        }
-        printf("\n");
-      }
-    }
-  }
-  fflush(stdout);
-  #if GTEST_HAS_FILE_SYSTEM
-  const std::string& output_format = UnitTestOptions::GetOutputFormat();
-  if (output_format == "xml" || output_format == "json") {
-    FILE* fileout = OpenFileForWriting(
-        UnitTestOptions::GetAbsolutePathToOutputFile().c_str());
-    std::stringstream stream;
-    if (output_format == "xml") {
-      XmlUnitTestResultPrinter(
-          UnitTestOptions::GetAbsolutePathToOutputFile().c_str())
-          .PrintXmlTestsList(&stream, test_suites_);
-    } else if (output_format == "json") {
-      JsonUnitTestResultPrinter(
-          UnitTestOptions::GetAbsolutePathToOutputFile().c_str())
-          .PrintJsonTestList(&stream, test_suites_);
-    }
-    fprintf(fileout, "%s", StringStreamToString(&stream).c_str());
-    fclose(fileout);
-  }
-#endif  // GTEST_HAS_FILE_SYSTEM
-}
-
-// Sets the OS stack trace getter.
-//
-// Does nothing if the input and the current OS stack trace getter are
-// the same; otherwise, deletes the old getter and makes the input the
-// current getter.
-void UnitTestImpl::set_os_stack_trace_getter(
-    OsStackTraceGetterInterface* getter) {
-  if (os_stack_trace_getter_ != getter) {
-    delete os_stack_trace_getter_;
-    os_stack_trace_getter_ = getter;
-  }
-}
-
-// Returns the current OS stack trace getter if it is not NULL;
-// otherwise, creates an OsStackTraceGetter, makes it the current
-// getter, and returns it.
-OsStackTraceGetterInterface* UnitTestImpl::os_stack_trace_getter() {
-  if (os_stack_trace_getter_ == nullptr) {
-#ifdef GTEST_OS_STACK_TRACE_GETTER_
-    os_stack_trace_getter_ = new GTEST_OS_STACK_TRACE_GETTER_;
-#else
-    os_stack_trace_getter_ = new OsStackTraceGetter;
-#endif  // GTEST_OS_STACK_TRACE_GETTER_
-  }
-
-  return os_stack_trace_getter_;
-}
-
-// Returns the most specific TestResult currently running.
-TestResult* UnitTestImpl::current_test_result() {
-  if (current_test_info_ != nullptr) {
-    return &current_test_info_->result_;
-  }
-  if (current_test_suite_ != nullptr) {
-    return &current_test_suite_->ad_hoc_test_result_;
-  }
-  return &ad_hoc_test_result_;
-}
-
-// Shuffles all test suites, and the tests within each test suite,
-// making sure that death tests are still run first.
-void UnitTestImpl::ShuffleTests() {
-  // Shuffles the death test suites.
-  ShuffleRange(random(), 0, last_death_test_suite_ + 1, &test_suite_indices_);
-
-  // Shuffles the non-death test suites.
-  ShuffleRange(random(), last_death_test_suite_ + 1,
-               static_cast<int>(test_suites_.size()), &test_suite_indices_);
-
-  // Shuffles the tests inside each test suite.
-  for (auto& test_suite : test_suites_) {
-    test_suite->ShuffleTests(random());
-  }
-}
-
-// Restores the test suites and tests to their order before the first shuffle.
-void UnitTestImpl::UnshuffleTests() {
-  for (size_t i = 0; i < test_suites_.size(); i++) {
-    // Unshuffles the tests in each test suite.
-    test_suites_[i]->UnshuffleTests();
-    // Resets the index of each test suite.
-    test_suite_indices_[i] = static_cast<int>(i);
-  }
-}
-
-// Returns the current OS stack trace as an std::string.
-//
-// The maximum number of stack frames to be included is specified by
-// the gtest_stack_trace_depth flag.  The skip_count parameter
-// specifies the number of top frames to be skipped, which doesn't
-// count against the number of frames to be included.
-//
-// For example, if Foo() calls Bar(), which in turn calls
-// GetCurrentOsStackTraceExceptTop(..., 1), Foo() will be included in
-// the trace but Bar() and GetCurrentOsStackTraceExceptTop() won't.
-GTEST_NO_INLINE_ GTEST_NO_TAIL_CALL_ std::string
-GetCurrentOsStackTraceExceptTop(int skip_count) {
-  // We pass skip_count + 1 to skip this wrapper function in addition
-  // to what the user really wants to skip.
-  return GetUnitTestImpl()->CurrentOsStackTraceExceptTop(skip_count + 1);
-}
-
-// Used by the GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_ macro to
-// suppress unreachable code warnings.
-namespace {
-class ClassUniqueToAlwaysTrue {};
-}  // namespace
-
-bool IsTrue(bool condition) { return condition; }
-
-bool AlwaysTrue() {
-#if GTEST_HAS_EXCEPTIONS
-  // This condition is always false so AlwaysTrue() never actually throws,
-  // but it makes the compiler think that it may throw.
-  if (IsTrue(false)) throw ClassUniqueToAlwaysTrue();
-#endif  // GTEST_HAS_EXCEPTIONS
-  return true;
-}
-
-// If *pstr starts with the given prefix, modifies *pstr to be right
-// past the prefix and returns true; otherwise leaves *pstr unchanged
-// and returns false.  None of pstr, *pstr, and prefix can be NULL.
-bool SkipPrefix(const char* prefix, const char** pstr) {
-  const size_t prefix_len = strlen(prefix);
-  if (strncmp(*pstr, prefix, prefix_len) == 0) {
-    *pstr += prefix_len;
-    return true;
-  }
-  return false;
-}
-
-// Parses a string as a command line flag.  The string should have
-// the format "--flag=value".  When def_optional is true, the "=value"
-// part can be omitted.
-//
-// Returns the value of the flag, or NULL if the parsing failed.
-static const char* ParseFlagValue(const char* str, const char* flag_name,
-                                  bool def_optional) {
-  // str and flag must not be NULL.
-  if (str == nullptr || flag_name == nullptr) return nullptr;
-
-  // The flag must start with "--" followed by GTEST_FLAG_PREFIX_.
-  const std::string flag_str =
-      std::string("--") + GTEST_FLAG_PREFIX_ + flag_name;
-  const size_t flag_len = flag_str.length();
-  if (strncmp(str, flag_str.c_str(), flag_len) != 0) return nullptr;
-
-  // Skips the flag name.
-  const char* flag_end = str + flag_len;
-
-  // When def_optional is true, it's OK to not have a "=value" part.
-  if (def_optional && (flag_end[0] == '\0')) {
-    return flag_end;
-  }
-
-  // If def_optional is true and there are more characters after the
-  // flag name, or if def_optional is false, there must be a '=' after
-  // the flag name.
-  if (flag_end[0] != '=') return nullptr;
-
-  // Returns the string after "=".
-  return flag_end + 1;
-}
-
-// Parses a string for a bool flag, in the form of either
-// "--flag=value" or "--flag".
-//
-// In the former case, the value is taken as true as long as it does
-// not start with '0', 'f', or 'F'.
-//
-// In the latter case, the value is taken as true.
-//
-// On success, stores the value of the flag in *value, and returns
-// true.  On failure, returns false without changing *value.
-static bool ParseFlag(const char* str, const char* flag_name, bool* value) {
-  // Gets the value of the flag as a string.
-  const char* const value_str = ParseFlagValue(str, flag_name, true);
-
-  // Aborts if the parsing failed.
-  if (value_str == nullptr) return false;
-
-  // Converts the string value to a bool.
-  *value = !(*value_str == '0' || *value_str == 'f' || *value_str == 'F');
-  return true;
-}
-
-// Parses a string for an int32_t flag, in the form of "--flag=value".
-//
-// On success, stores the value of the flag in *value, and returns
-// true.  On failure, returns false without changing *value.
-bool ParseFlag(const char* str, const char* flag_name, int32_t* value) {
-  // Gets the value of the flag as a string.
-  const char* const value_str = ParseFlagValue(str, flag_name, false);
-
-  // Aborts if the parsing failed.
-  if (value_str == nullptr) return false;
-
-  // Sets *value to the value of the flag.
-  return ParseInt32(Message() << "The value of flag --" << flag_name, value_str,
-                    value);
-}
-
-// Parses a string for a string flag, in the form of "--flag=value".
-//
-// On success, stores the value of the flag in *value, and returns
-// true.  On failure, returns false without changing *value.
-template <typename String>
-static bool ParseFlag(const char* str, const char* flag_name, String* value) {
-  // Gets the value of the flag as a string.
-  const char* const value_str = ParseFlagValue(str, flag_name, false);
-
-  // Aborts if the parsing failed.
-  if (value_str == nullptr) return false;
-
-  // Sets *value to the value of the flag.
-  *value = value_str;
-  return true;
-}
-
-// Determines whether a string has a prefix that Google Test uses for its
-// flags, i.e., starts with GTEST_FLAG_PREFIX_ or GTEST_FLAG_PREFIX_DASH_.
-// If Google Test detects that a command line flag has its prefix but is not
-// recognized, it will print its help message. Flags starting with
-// GTEST_INTERNAL_PREFIX_ followed by "internal_" are considered Google Test
-// internal flags and do not trigger the help message.
-static bool HasGoogleTestFlagPrefix(const char* str) {
-  return (SkipPrefix("--", &str) || SkipPrefix("-", &str) ||
-          SkipPrefix("/", &str)) &&
-         !SkipPrefix(GTEST_FLAG_PREFIX_ "internal_", &str) &&
-         (SkipPrefix(GTEST_FLAG_PREFIX_, &str) ||
-          SkipPrefix(GTEST_FLAG_PREFIX_DASH_, &str));
-}
-
-// Prints a string containing code-encoded text.  The following escape
-// sequences can be used in the string to control the text color:
-//
-//   @@    prints a single '@' character.
-//   @R    changes the color to red.
-//   @G    changes the color to green.
-//   @Y    changes the color to yellow.
-//   @D    changes to the default terminal text color.
-//
-static void PrintColorEncoded(const char* str) {
-  GTestColor color = GTestColor::kDefault;  // The current color.
-
-  // Conceptually, we split the string into segments divided by escape
-  // sequences.  Then we print one segment at a time.  At the end of
-  // each iteration, the str pointer advances to the beginning of the
-  // next segment.
-  for (;;) {
-    const char* p = strchr(str, '@');
-    if (p == nullptr) {
-      ColoredPrintf(color, "%s", str);
-      return;
-    }
-
-    ColoredPrintf(color, "%s", std::string(str, p).c_str());
-
-    const char ch = p[1];
-    str = p + 2;
-    if (ch == '@') {
-      ColoredPrintf(color, "@");
-    } else if (ch == 'D') {
-      color = GTestColor::kDefault;
-    } else if (ch == 'R') {
-      color = GTestColor::kRed;
-    } else if (ch == 'G') {
-      color = GTestColor::kGreen;
-    } else if (ch == 'Y') {
-      color = GTestColor::kYellow;
-    } else {
-      --str;
-    }
-  }
-}
-
-static const char kColorEncodedHelpMessage[] =
-    "This program contains tests written using " GTEST_NAME_
-    ". You can use the\n"
-    "following command line flags to control its behavior:\n"
-    "\n"
-    "Test Selection:\n"
-    "  @G--" GTEST_FLAG_PREFIX_
-    "list_tests@D\n"
-    "      List the names of all tests instead of running them. The name of\n"
-    "      TEST(Foo, Bar) is \"Foo.Bar\".\n"
-    "  @G--" GTEST_FLAG_PREFIX_
-    "filter=@YPOSITIVE_PATTERNS"
-    "[@G-@YNEGATIVE_PATTERNS]@D\n"
-    "      Run only the tests whose name matches one of the positive patterns "
-    "but\n"
-    "      none of the negative patterns. '?' matches any single character; "
-    "'*'\n"
-    "      matches any substring; ':' separates two patterns.\n"
-    "  @G--" GTEST_FLAG_PREFIX_
-    "also_run_disabled_tests@D\n"
-    "      Run all disabled tests too.\n"
-    "\n"
-    "Test Execution:\n"
-    "  @G--" GTEST_FLAG_PREFIX_
-    "repeat=@Y[COUNT]@D\n"
-    "      Run the tests repeatedly; use a negative count to repeat forever.\n"
-    "  @G--" GTEST_FLAG_PREFIX_
-    "shuffle@D\n"
-    "      Randomize tests' orders on every iteration.\n"
-    "  @G--" GTEST_FLAG_PREFIX_
-    "random_seed=@Y[NUMBER]@D\n"
-    "      Random number seed to use for shuffling test orders (between 1 and\n"
-    "      99999, or 0 to use a seed based on the current time).\n"
-    "  @G--" GTEST_FLAG_PREFIX_
-    "recreate_environments_when_repeating@D\n"
-    "      Sets up and tears down the global test environment on each repeat\n"
-    "      of the test.\n"
-    "\n"
-    "Test Output:\n"
-    "  @G--" GTEST_FLAG_PREFIX_
-    "color=@Y(@Gyes@Y|@Gno@Y|@Gauto@Y)@D\n"
-    "      Enable/disable colored output. The default is @Gauto@D.\n"
-    "  @G--" GTEST_FLAG_PREFIX_
-    "brief=1@D\n"
-    "      Only print test failures.\n"
-    "  @G--" GTEST_FLAG_PREFIX_
-    "print_time=0@D\n"
-    "      Don't print the elapsed time of each test.\n"
-    "  @G--" GTEST_FLAG_PREFIX_
-    "output=@Y(@Gjson@Y|@Gxml@Y)[@G:@YDIRECTORY_PATH@G" GTEST_PATH_SEP_
-    "@Y|@G:@YFILE_PATH]@D\n"
-    "      Generate a JSON or XML report in the given directory or with the "
-    "given\n"
-    "      file name. @YFILE_PATH@D defaults to @Gtest_detail.xml@D.\n"
-#if GTEST_CAN_STREAM_RESULTS_
-    "  @G--" GTEST_FLAG_PREFIX_
-    "stream_result_to=@YHOST@G:@YPORT@D\n"
-    "      Stream test results to the given server.\n"
-#endif  // GTEST_CAN_STREAM_RESULTS_
-    "\n"
-    "Assertion Behavior:\n"
-#if GTEST_HAS_DEATH_TEST && !GTEST_OS_WINDOWS
-    "  @G--" GTEST_FLAG_PREFIX_
-    "death_test_style=@Y(@Gfast@Y|@Gthreadsafe@Y)@D\n"
-    "      Set the default death test style.\n"
-#endif  // GTEST_HAS_DEATH_TEST && !GTEST_OS_WINDOWS
-    "  @G--" GTEST_FLAG_PREFIX_
-    "break_on_failure@D\n"
-    "      Turn assertion failures into debugger break-points.\n"
-    "  @G--" GTEST_FLAG_PREFIX_
-    "throw_on_failure@D\n"
-    "      Turn assertion failures into C++ exceptions for use by an external\n"
-    "      test framework.\n"
-    "  @G--" GTEST_FLAG_PREFIX_
-    "catch_exceptions=0@D\n"
-    "      Do not report exceptions as test failures. Instead, allow them\n"
-    "      to crash the program or throw a pop-up (on Windows).\n"
-    "\n"
-    "Except for @G--" GTEST_FLAG_PREFIX_
-    "list_tests@D, you can alternatively set "
-    "the corresponding\n"
-    "environment variable of a flag (all letters in upper-case). For example, "
-    "to\n"
-    "disable colored text output, you can either specify "
-    "@G--" GTEST_FLAG_PREFIX_
-    "color=no@D or set\n"
-    "the @G" GTEST_FLAG_PREFIX_UPPER_
-    "COLOR@D environment variable to @Gno@D.\n"
-    "\n"
-    "For more information, please read the " GTEST_NAME_
-    " documentation at\n"
-    "@G" GTEST_PROJECT_URL_ "@D. If you find a bug in " GTEST_NAME_
-    "\n"
-    "(not one in your own code or tests), please report it to\n"
-    "@G<" GTEST_DEV_EMAIL_ ">@D.\n";
-
-static bool ParseGoogleTestFlag(const char* const arg) {
-#define GTEST_INTERNAL_PARSE_FLAG(flag_name)  \
-  do {                                        \
-    auto value = GTEST_FLAG_GET(flag_name);   \
-    if (ParseFlag(arg, #flag_name, &value)) { \
-      GTEST_FLAG_SET(flag_name, value);       \
-      return true;                            \
-    }                                         \
-  } while (false)
-
-  GTEST_INTERNAL_PARSE_FLAG(also_run_disabled_tests);
-  GTEST_INTERNAL_PARSE_FLAG(break_on_failure);
-  GTEST_INTERNAL_PARSE_FLAG(catch_exceptions);
-  GTEST_INTERNAL_PARSE_FLAG(color);
-  GTEST_INTERNAL_PARSE_FLAG(death_test_style);
-  GTEST_INTERNAL_PARSE_FLAG(death_test_use_fork);
-  GTEST_INTERNAL_PARSE_FLAG(fail_fast);
-  GTEST_INTERNAL_PARSE_FLAG(filter);
-  GTEST_INTERNAL_PARSE_FLAG(internal_run_death_test);
-  GTEST_INTERNAL_PARSE_FLAG(list_tests);
-  GTEST_INTERNAL_PARSE_FLAG(output);
-  GTEST_INTERNAL_PARSE_FLAG(brief);
-  GTEST_INTERNAL_PARSE_FLAG(print_time);
-  GTEST_INTERNAL_PARSE_FLAG(print_utf8);
-  GTEST_INTERNAL_PARSE_FLAG(random_seed);
-  GTEST_INTERNAL_PARSE_FLAG(repeat);
-  GTEST_INTERNAL_PARSE_FLAG(recreate_environments_when_repeating);
-  GTEST_INTERNAL_PARSE_FLAG(shuffle);
-  GTEST_INTERNAL_PARSE_FLAG(stack_trace_depth);
-  GTEST_INTERNAL_PARSE_FLAG(stream_result_to);
-  GTEST_INTERNAL_PARSE_FLAG(throw_on_failure);
-  return false;
-}
-
-#if GTEST_USE_OWN_FLAGFILE_FLAG_ && GTEST_HAS_FILE_SYSTEM
-static void LoadFlagsFromFile(const std::string& path) {
-  FILE* flagfile = posix::FOpen(path.c_str(), "r");
-  if (!flagfile) {
-    GTEST_LOG_(FATAL) << "Unable to open file \"" << GTEST_FLAG_GET(flagfile)
-                      << "\"";
-  }
-  std::string contents(ReadEntireFile(flagfile));
-  posix::FClose(flagfile);
-  std::vector<std::string> lines;
-  SplitString(contents, '\n', &lines);
-  for (size_t i = 0; i < lines.size(); ++i) {
-    if (lines[i].empty()) continue;
-    if (!ParseGoogleTestFlag(lines[i].c_str())) g_help_flag = true;
-  }
-}
-#endif  // GTEST_USE_OWN_FLAGFILE_FLAG_ && GTEST_HAS_FILE_SYSTEM
-
-// Parses the command line for Google Test flags, without initializing
-// other parts of Google Test.  The type parameter CharType can be
-// instantiated to either char or wchar_t.
-template <typename CharType>
-void ParseGoogleTestFlagsOnlyImpl(int* argc, CharType** argv) {
-  std::string flagfile_value;
-  for (int i = 1; i < *argc; i++) {
-    const std::string arg_string = StreamableToString(argv[i]);
-    const char* const arg = arg_string.c_str();
-
-    using internal::ParseFlag;
-
-    bool remove_flag = false;
-    if (ParseGoogleTestFlag(arg)) {
-      remove_flag = true;
-#if GTEST_USE_OWN_FLAGFILE_FLAG_ && GTEST_HAS_FILE_SYSTEM
-    } else if (ParseFlag(arg, "flagfile", &flagfile_value)) {
-      GTEST_FLAG_SET(flagfile, flagfile_value);
-      LoadFlagsFromFile(flagfile_value);
-      remove_flag = true;
-#endif  // GTEST_USE_OWN_FLAGFILE_FLAG_ && GTEST_HAS_FILE_SYSTEM
-    } else if (arg_string == "--help" || HasGoogleTestFlagPrefix(arg)) {
-      // Both help flag and unrecognized Google Test flags (excluding
-      // internal ones) trigger help display.
-      g_help_flag = true;
-    }
-
-    if (remove_flag) {
-      // Shift the remainder of the argv list left by one.  Note
-      // that argv has (*argc + 1) elements, the last one always being
-      // NULL.  The following loop moves the trailing NULL element as
-      // well.
-      for (int j = i; j != *argc; j++) {
-        argv[j] = argv[j + 1];
-      }
-
-      // Decrements the argument count.
-      (*argc)--;
-
-      // We also need to decrement the iterator as we just removed
-      // an element.
-      i--;
-    }
-  }
-
-  if (g_help_flag) {
-    // We print the help here instead of in RUN_ALL_TESTS(), as the
-    // latter may not be called at all if the user is using Google
-    // Test with another testing framework.
-    PrintColorEncoded(kColorEncodedHelpMessage);
-  }
-}
-
-// Parses the command line for Google Test flags, without initializing
-// other parts of Google Test.
-void ParseGoogleTestFlagsOnly(int* argc, char** argv) {
-#if GTEST_HAS_ABSL
-  if (*argc > 0) {
-    // absl::ParseCommandLine() requires *argc > 0.
-    auto positional_args = absl::flags_internal::ParseCommandLineImpl(
-        *argc, argv, absl::flags_internal::ArgvListAction::kRemoveParsedArgs,
-        absl::flags_internal::UsageFlagsAction::kHandleUsage,
-        absl::flags_internal::OnUndefinedFlag::kReportUndefined);
-    // Any command-line positional arguments not part of any command-line flag
-    // (or arguments to a flag) are copied back out to argv, with the program
-    // invocation name at position 0, and argc is resized. This includes
-    // positional arguments after the flag-terminating delimiter '--'.
-    // See https://abseil.io/docs/cpp/guides/flags.
-    std::copy(positional_args.begin(), positional_args.end(), argv);
-    if (static_cast<int>(positional_args.size()) < *argc) {
-      argv[positional_args.size()] = nullptr;
-      *argc = static_cast<int>(positional_args.size());
-    }
-  }
-#else
-  ParseGoogleTestFlagsOnlyImpl(argc, argv);
-#endif
-
-  // Fix the value of *_NSGetArgc() on macOS, but if and only if
-  // *_NSGetArgv() == argv
-  // Only applicable to char** version of argv
-#if GTEST_OS_MAC
-#ifndef GTEST_OS_IOS
-  if (*_NSGetArgv() == argv) {
-    *_NSGetArgc() = *argc;
-  }
-#endif
-#endif
-}
-void ParseGoogleTestFlagsOnly(int* argc, wchar_t** argv) {
-  ParseGoogleTestFlagsOnlyImpl(argc, argv);
-}
-
-// The internal implementation of InitGoogleTest().
-//
-// The type parameter CharType can be instantiated to either char or
-// wchar_t.
-template <typename CharType>
-void InitGoogleTestImpl(int* argc, CharType** argv) {
-  // We don't want to run the initialization code twice.
-  if (GTestIsInitialized()) return;
-
-  if (*argc <= 0) return;
-
-  g_argvs.clear();
-  for (int i = 0; i != *argc; i++) {
-    g_argvs.push_back(StreamableToString(argv[i]));
-  }
-
-#if GTEST_HAS_ABSL
-  absl::InitializeSymbolizer(g_argvs[0].c_str());
-
-  // When using the Abseil Flags library, set the program usage message to the
-  // help message, but remove the color-encoding from the message first.
-  absl::SetProgramUsageMessage(absl::StrReplaceAll(
-      kColorEncodedHelpMessage,
-      {{"@D", ""}, {"@R", ""}, {"@G", ""}, {"@Y", ""}, {"@@", "@"}}));
-#endif  // GTEST_HAS_ABSL
-
-  ParseGoogleTestFlagsOnly(argc, argv);
-  GetUnitTestImpl()->PostFlagParsingInit();
-}
-
-}  // namespace internal
-
-// Initializes Google Test.  This must be called before calling
-// RUN_ALL_TESTS().  In particular, it parses a command line for the
-// flags that Google Test recognizes.  Whenever a Google Test flag is
-// seen, it is removed from argv, and *argc is decremented.
-//
-// No value is returned.  Instead, the Google Test flag variables are
-// updated.
-//
-// Calling the function for the second time has no user-visible effect.
-void InitGoogleTest(int* argc, char** argv) {
-#if defined(GTEST_CUSTOM_INIT_GOOGLE_TEST_FUNCTION_)
-  GTEST_CUSTOM_INIT_GOOGLE_TEST_FUNCTION_(argc, argv);
-#else   // defined(GTEST_CUSTOM_INIT_GOOGLE_TEST_FUNCTION_)
-  internal::InitGoogleTestImpl(argc, argv);
-#endif  // defined(GTEST_CUSTOM_INIT_GOOGLE_TEST_FUNCTION_)
-}
-
-// This overloaded version can be used in Windows programs compiled in
-// UNICODE mode.
-void InitGoogleTest(int* argc, wchar_t** argv) {
-#if defined(GTEST_CUSTOM_INIT_GOOGLE_TEST_FUNCTION_)
-  GTEST_CUSTOM_INIT_GOOGLE_TEST_FUNCTION_(argc, argv);
-#else   // defined(GTEST_CUSTOM_INIT_GOOGLE_TEST_FUNCTION_)
-  internal::InitGoogleTestImpl(argc, argv);
-#endif  // defined(GTEST_CUSTOM_INIT_GOOGLE_TEST_FUNCTION_)
-}
-
-// This overloaded version can be used on Arduino/embedded platforms where
-// there is no argc/argv.
-void InitGoogleTest() {
-  // Since Arduino doesn't have a command line, fake out the argc/argv arguments
-  int argc = 1;
-  const auto arg0 = "dummy";
-  char* argv0 = const_cast<char*>(arg0);
-  char** argv = &argv0;
-
-#if defined(GTEST_CUSTOM_INIT_GOOGLE_TEST_FUNCTION_)
-  GTEST_CUSTOM_INIT_GOOGLE_TEST_FUNCTION_(&argc, argv);
-#else   // defined(GTEST_CUSTOM_INIT_GOOGLE_TEST_FUNCTION_)
-  internal::InitGoogleTestImpl(&argc, argv);
-#endif  // defined(GTEST_CUSTOM_INIT_GOOGLE_TEST_FUNCTION_)
-}
-
-#if !defined(GTEST_CUSTOM_TEMPDIR_FUNCTION_) || \
-    !defined(GTEST_CUSTOM_SRCDIR_FUNCTION_)
-// Returns the value of the first environment variable that is set and contains
-// a non-empty string. If there are none, returns the "fallback" string. Adds
-// the director-separator character as a suffix if not provided in the
-// environment variable value.
-static std::string GetDirFromEnv(
-    std::initializer_list<const char*> environment_variables,
-    const char* fallback, char separator) {
-  for (const char* variable_name : environment_variables) {
-    const char* value = internal::posix::GetEnv(variable_name);
-    if (value != nullptr && value[0] != '\0') {
-      if (value[strlen(value) - 1] != separator) {
-        return std::string(value).append(1, separator);
-      }
-      return value;
-    }
-  }
-  return fallback;
-}
-#endif
-
-std::string TempDir() {
-#if defined(GTEST_CUSTOM_TEMPDIR_FUNCTION_)
-  return GTEST_CUSTOM_TEMPDIR_FUNCTION_();
-#elif GTEST_OS_WINDOWS || GTEST_OS_WINDOWS_MOBILE
-  return GetDirFromEnv({"TEST_TMPDIR", "TEMP"}, "\\temp\\", '\\');
-#elif GTEST_OS_LINUX_ANDROID
-  return GetDirFromEnv({"TEST_TMPDIR", "TMPDIR"}, "/data/local/tmp/", '/');
-#else
-  return GetDirFromEnv({"TEST_TMPDIR", "TMPDIR"}, "/tmp/", '/');
-#endif
-}
-
-#if !defined(GTEST_CUSTOM_SRCDIR_FUNCTION_)
-// Returns the directory path (including terminating separator) of the current
-// executable as derived from argv[0].
-static std::string GetCurrentExecutableDirectory() {
-  internal::FilePath argv_0(internal::GetArgvs()[0]);
-  return argv_0.RemoveFileName().string();
-}
-#endif
-
-std::string SrcDir() {
-#if defined(GTEST_CUSTOM_SRCDIR_FUNCTION_)
-  return GTEST_CUSTOM_SRCDIR_FUNCTION_();
-#elif GTEST_OS_WINDOWS || GTEST_OS_WINDOWS_MOBILE
-  return GetDirFromEnv({"TEST_SRCDIR"}, GetCurrentExecutableDirectory().c_str(),
-                       '\\');
-#elif GTEST_OS_LINUX_ANDROID
-  return GetDirFromEnv({"TEST_SRCDIR"}, GetCurrentExecutableDirectory().c_str(),
-                       '/');
-#else
-  return GetDirFromEnv({"TEST_SRCDIR"}, GetCurrentExecutableDirectory().c_str(),
-                       '/');
-#endif
-}
-
-// Class ScopedTrace
-
-// Pushes the given source file location and message onto a per-thread
-// trace stack maintained by Google Test.
-void ScopedTrace::PushTrace(const char* file, int line, std::string message) {
-  internal::TraceInfo trace;
-  trace.file = file;
-  trace.line = line;
-  trace.message.swap(message);
-
-  UnitTest::GetInstance()->PushGTestTrace(trace);
-}
-
-// Pops the info pushed by the c'tor.
-ScopedTrace::~ScopedTrace() GTEST_LOCK_EXCLUDED_(&UnitTest::mutex_) {
-  UnitTest::GetInstance()->PopGTestTrace();
-}
-
-}  // namespace testing
diff --git a/3rdparty/googletest-1.13.0/googletest/src/gtest_main.cc b/3rdparty/googletest-1.13.0/googletest/src/gtest_main.cc
deleted file mode 100644
index 5abaa29fa1142ce77fdc87221dd77268dc98d49a..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/src/gtest_main.cc
+++ /dev/null
@@ -1,65 +0,0 @@
-// Copyright 2006, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include <cstdio>
-
-#include "gtest/gtest.h"
-
-#if GTEST_OS_ESP8266 || GTEST_OS_ESP32
-// Arduino-like platforms: program entry points are setup/loop instead of main.
-
-#if GTEST_OS_ESP8266
-extern "C" {
-#endif
-
-void setup() { testing::InitGoogleTest(); }
-
-void loop() { RUN_ALL_TESTS(); }
-
-#if GTEST_OS_ESP8266
-}
-#endif
-
-#elif GTEST_OS_QURT
-// QuRT: program entry point is main, but argc/argv are unusable.
-
-GTEST_API_ int main() {
-  printf("Running main() from %s\n", __FILE__);
-  testing::InitGoogleTest();
-  return RUN_ALL_TESTS();
-}
-#else
-// Normal platforms: program entry point is main, argc/argv are initialized.
-
-GTEST_API_ int main(int argc, char **argv) {
-  printf("Running main() from %s\n", __FILE__);
-  testing::InitGoogleTest(&argc, argv);
-  return RUN_ALL_TESTS();
-}
-#endif
diff --git a/3rdparty/googletest-1.13.0/googletest/test/BUILD.bazel b/3rdparty/googletest-1.13.0/googletest/test/BUILD.bazel
deleted file mode 100644
index 7754c1303fc00eb606f3ddf9c3eb607f7d608a14..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/test/BUILD.bazel
+++ /dev/null
@@ -1,594 +0,0 @@
-# Copyright 2017 Google Inc.
-# All Rights Reserved.
-#
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met:
-#
-#     * Redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer.
-#     * Redistributions in binary form must reproduce the above
-# copyright notice, this list of conditions and the following disclaimer
-# in the documentation and/or other materials provided with the
-# distribution.
-#     * Neither the name of Google Inc. nor the names of its
-# contributors may be used to endorse or promote products derived from
-# this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#
-# Bazel BUILD for The Google C++ Testing Framework (Google Test)
-
-load("@rules_python//python:defs.bzl", "py_library", "py_test")
-
-licenses(["notice"])
-
-package(default_visibility = ["//:__subpackages__"])
-
-#on windows exclude gtest-tuple.h
-cc_test(
-    name = "gtest_all_test",
-    size = "small",
-    srcs = glob(
-        include = [
-            "gtest-*.cc",
-            "googletest-*.cc",
-            "*.h",
-            "googletest/include/gtest/**/*.h",
-        ],
-        exclude = [
-            "gtest-unittest-api_test.cc",
-            "googletest/src/gtest-all.cc",
-            "gtest_all_test.cc",
-            "gtest-death-test_ex_test.cc",
-            "gtest-listener_test.cc",
-            "gtest-unittest-api_test.cc",
-            "googletest-param-test-test.cc",
-            "googletest-param-test2-test.cc",
-            "googletest-catch-exceptions-test_.cc",
-            "googletest-color-test_.cc",
-            "googletest-env-var-test_.cc",
-            "googletest-failfast-unittest_.cc",
-            "googletest-filter-unittest_.cc",
-            "googletest-global-environment-unittest_.cc",
-            "googletest-break-on-failure-unittest_.cc",
-            "googletest-listener-test.cc",
-            "googletest-output-test_.cc",
-            "googletest-list-tests-unittest_.cc",
-            "googletest-shuffle-test_.cc",
-            "googletest-setuptestsuite-test_.cc",
-            "googletest-uninitialized-test_.cc",
-            "googletest-death-test_ex_test.cc",
-            "googletest-param-test-test",
-            "googletest-throw-on-failure-test_.cc",
-            "googletest-param-test-invalid-name1-test_.cc",
-            "googletest-param-test-invalid-name2-test_.cc",
-        ],
-    ) + select({
-        "//:windows": [],
-        "//conditions:default": [],
-    }),
-    copts = select({
-        "//:windows": ["-DGTEST_USE_OWN_TR1_TUPLE=0"],
-        "//conditions:default": ["-DGTEST_USE_OWN_TR1_TUPLE=1"],
-    }) + select({
-        # Ensure MSVC treats source files as UTF-8 encoded.
-        "//:msvc_compiler": ["-utf-8"],
-        "//conditions:default": [],
-    }),
-    includes = [
-        "googletest",
-        "googletest/include",
-        "googletest/include/internal",
-        "googletest/test",
-    ],
-    linkopts = select({
-        "//:qnx": [],
-        "//:windows": [],
-        "//conditions:default": ["-pthread"],
-    }),
-    deps = ["//:gtest_main"],
-)
-
-# Tests death tests.
-cc_test(
-    name = "googletest-death-test-test",
-    size = "medium",
-    srcs = ["googletest-death-test-test.cc"],
-    deps = ["//:gtest_main"],
-)
-
-cc_test(
-    name = "gtest_test_macro_stack_footprint_test",
-    size = "small",
-    srcs = ["gtest_test_macro_stack_footprint_test.cc"],
-    deps = ["//:gtest"],
-)
-
-#These googletest tests have their own main()
-cc_test(
-    name = "googletest-listener-test",
-    size = "small",
-    srcs = ["googletest-listener-test.cc"],
-    deps = ["//:gtest_main"],
-)
-
-cc_test(
-    name = "gtest-unittest-api_test",
-    size = "small",
-    srcs = [
-        "gtest-unittest-api_test.cc",
-    ],
-    deps = [
-        "//:gtest",
-    ],
-)
-
-cc_test(
-    name = "googletest-param-test-test",
-    size = "small",
-    srcs = [
-        "googletest-param-test-test.cc",
-        "googletest-param-test-test.h",
-        "googletest-param-test2-test.cc",
-    ],
-    deps = ["//:gtest"],
-)
-
-cc_test(
-    name = "gtest_unittest",
-    size = "small",
-    srcs = ["gtest_unittest.cc"],
-    shard_count = 2,
-    deps = ["//:gtest_main"],
-)
-
-#  Py tests
-
-py_library(
-    name = "gtest_test_utils",
-    testonly = 1,
-    srcs = ["gtest_test_utils.py"],
-    imports = ["."],
-)
-
-cc_binary(
-    name = "gtest_help_test_",
-    testonly = 1,
-    srcs = ["gtest_help_test_.cc"],
-    deps = ["//:gtest_main"],
-)
-
-py_test(
-    name = "gtest_help_test",
-    size = "small",
-    srcs = ["gtest_help_test.py"],
-    args = select({
-        "//:has_absl": ["--has_absl_flags"],
-        "//conditions:default": [],
-    }),
-    data = [":gtest_help_test_"],
-    deps = [":gtest_test_utils"],
-)
-
-cc_binary(
-    name = "googletest-output-test_",
-    testonly = 1,
-    srcs = ["googletest-output-test_.cc"],
-    deps = ["//:gtest"],
-)
-
-py_test(
-    name = "googletest-output-test",
-    size = "small",
-    srcs = ["googletest-output-test.py"],
-    args = select({
-        "//:has_absl": [],
-        "//conditions:default": ["--no_stacktrace_support"],
-    }),
-    data = [
-        "googletest-output-test-golden-lin.txt",
-        ":googletest-output-test_",
-    ],
-    deps = [":gtest_test_utils"],
-)
-
-cc_binary(
-    name = "googletest-color-test_",
-    testonly = 1,
-    srcs = ["googletest-color-test_.cc"],
-    deps = ["//:gtest"],
-)
-
-py_test(
-    name = "googletest-color-test",
-    size = "small",
-    srcs = ["googletest-color-test.py"],
-    data = [":googletest-color-test_"],
-    deps = [":gtest_test_utils"],
-)
-
-cc_binary(
-    name = "googletest-env-var-test_",
-    testonly = 1,
-    srcs = ["googletest-env-var-test_.cc"],
-    deps = ["//:gtest"],
-)
-
-py_test(
-    name = "googletest-env-var-test",
-    size = "medium",
-    srcs = ["googletest-env-var-test.py"],
-    data = [":googletest-env-var-test_"],
-    deps = [":gtest_test_utils"],
-)
-
-cc_binary(
-    name = "googletest-failfast-unittest_",
-    testonly = 1,
-    srcs = ["googletest-failfast-unittest_.cc"],
-    deps = ["//:gtest"],
-)
-
-py_test(
-    name = "googletest-failfast-unittest",
-    size = "medium",
-    srcs = ["googletest-failfast-unittest.py"],
-    data = [":googletest-failfast-unittest_"],
-    deps = [":gtest_test_utils"],
-)
-
-cc_binary(
-    name = "googletest-filter-unittest_",
-    testonly = 1,
-    srcs = ["googletest-filter-unittest_.cc"],
-    deps = ["//:gtest"],
-)
-
-py_test(
-    name = "googletest-filter-unittest",
-    size = "medium",
-    srcs = ["googletest-filter-unittest.py"],
-    data = [":googletest-filter-unittest_"],
-    deps = [":gtest_test_utils"],
-)
-
-cc_binary(
-    name = "googletest-global-environment-unittest_",
-    testonly = 1,
-    srcs = ["googletest-global-environment-unittest_.cc"],
-    deps = ["//:gtest"],
-)
-
-py_test(
-    name = "googletest-global-environment-unittest",
-    size = "medium",
-    srcs = ["googletest-global-environment-unittest.py"],
-    data = [":googletest-global-environment-unittest_"],
-    deps = [":gtest_test_utils"],
-)
-
-cc_binary(
-    name = "googletest-break-on-failure-unittest_",
-    testonly = 1,
-    srcs = ["googletest-break-on-failure-unittest_.cc"],
-    deps = ["//:gtest"],
-)
-
-py_test(
-    name = "googletest-break-on-failure-unittest",
-    size = "small",
-    srcs = ["googletest-break-on-failure-unittest.py"],
-    data = [":googletest-break-on-failure-unittest_"],
-    deps = [":gtest_test_utils"],
-)
-
-cc_test(
-    name = "gtest_assert_by_exception_test",
-    size = "small",
-    srcs = ["gtest_assert_by_exception_test.cc"],
-    deps = ["//:gtest"],
-)
-
-cc_binary(
-    name = "googletest-throw-on-failure-test_",
-    testonly = 1,
-    srcs = ["googletest-throw-on-failure-test_.cc"],
-    deps = ["//:gtest"],
-)
-
-py_test(
-    name = "googletest-throw-on-failure-test",
-    size = "small",
-    srcs = ["googletest-throw-on-failure-test.py"],
-    data = [":googletest-throw-on-failure-test_"],
-    deps = [":gtest_test_utils"],
-)
-
-cc_binary(
-    name = "googletest-list-tests-unittest_",
-    testonly = 1,
-    srcs = ["googletest-list-tests-unittest_.cc"],
-    deps = ["//:gtest"],
-)
-
-cc_test(
-    name = "gtest_skip_test",
-    size = "small",
-    srcs = ["gtest_skip_test.cc"],
-    deps = ["//:gtest_main"],
-)
-
-cc_test(
-    name = "gtest_skip_in_environment_setup_test",
-    size = "small",
-    srcs = ["gtest_skip_in_environment_setup_test.cc"],
-    deps = ["//:gtest_main"],
-)
-
-py_test(
-    name = "gtest_skip_check_output_test",
-    size = "small",
-    srcs = ["gtest_skip_check_output_test.py"],
-    data = [":gtest_skip_test"],
-    deps = [":gtest_test_utils"],
-)
-
-py_test(
-    name = "gtest_skip_environment_check_output_test",
-    size = "small",
-    srcs = ["gtest_skip_environment_check_output_test.py"],
-    data = [
-        ":gtest_skip_in_environment_setup_test",
-    ],
-    deps = [":gtest_test_utils"],
-)
-
-py_test(
-    name = "googletest-list-tests-unittest",
-    size = "small",
-    srcs = ["googletest-list-tests-unittest.py"],
-    data = [":googletest-list-tests-unittest_"],
-    deps = [":gtest_test_utils"],
-)
-
-cc_binary(
-    name = "googletest-shuffle-test_",
-    srcs = ["googletest-shuffle-test_.cc"],
-    deps = ["//:gtest"],
-)
-
-py_test(
-    name = "googletest-shuffle-test",
-    size = "small",
-    srcs = ["googletest-shuffle-test.py"],
-    data = [":googletest-shuffle-test_"],
-    deps = [":gtest_test_utils"],
-)
-
-cc_binary(
-    name = "googletest-catch-exceptions-no-ex-test_",
-    testonly = 1,
-    srcs = ["googletest-catch-exceptions-test_.cc"],
-    deps = ["//:gtest_main"],
-)
-
-cc_binary(
-    name = "googletest-catch-exceptions-ex-test_",
-    testonly = 1,
-    srcs = ["googletest-catch-exceptions-test_.cc"],
-    copts = ["-fexceptions"],
-    deps = ["//:gtest_main"],
-)
-
-py_test(
-    name = "googletest-catch-exceptions-test",
-    size = "small",
-    srcs = ["googletest-catch-exceptions-test.py"],
-    data = [
-        ":googletest-catch-exceptions-ex-test_",
-        ":googletest-catch-exceptions-no-ex-test_",
-    ],
-    deps = [":gtest_test_utils"],
-)
-
-cc_binary(
-    name = "gtest_xml_output_unittest_",
-    testonly = 1,
-    srcs = ["gtest_xml_output_unittest_.cc"],
-    deps = ["//:gtest"],
-)
-
-cc_test(
-    name = "gtest_no_test_unittest",
-    size = "small",
-    srcs = ["gtest_no_test_unittest.cc"],
-    deps = ["//:gtest"],
-)
-
-py_test(
-    name = "gtest_xml_output_unittest",
-    size = "small",
-    srcs = [
-        "gtest_xml_output_unittest.py",
-        "gtest_xml_test_utils.py",
-    ],
-    args = select({
-        "//:has_absl": [],
-        "//conditions:default": ["--no_stacktrace_support"],
-    }),
-    data = [
-        # We invoke gtest_no_test_unittest to verify the XML output
-        # when the test program contains no test definition.
-        ":gtest_no_test_unittest",
-        ":gtest_xml_output_unittest_",
-    ],
-    deps = [":gtest_test_utils"],
-)
-
-cc_binary(
-    name = "gtest_xml_outfile1_test_",
-    testonly = 1,
-    srcs = ["gtest_xml_outfile1_test_.cc"],
-    deps = ["//:gtest_main"],
-)
-
-cc_binary(
-    name = "gtest_xml_outfile2_test_",
-    testonly = 1,
-    srcs = ["gtest_xml_outfile2_test_.cc"],
-    deps = ["//:gtest_main"],
-)
-
-py_test(
-    name = "gtest_xml_outfiles_test",
-    size = "small",
-    srcs = [
-        "gtest_xml_outfiles_test.py",
-        "gtest_xml_test_utils.py",
-    ],
-    data = [
-        ":gtest_xml_outfile1_test_",
-        ":gtest_xml_outfile2_test_",
-    ],
-    deps = [":gtest_test_utils"],
-)
-
-cc_binary(
-    name = "googletest-setuptestsuite-test_",
-    testonly = 1,
-    srcs = ["googletest-setuptestsuite-test_.cc"],
-    deps = ["//:gtest_main"],
-)
-
-py_test(
-    name = "googletest-setuptestsuite-test",
-    size = "medium",
-    srcs = ["googletest-setuptestsuite-test.py"],
-    data = [":googletest-setuptestsuite-test_"],
-    deps = [":gtest_test_utils"],
-)
-
-cc_binary(
-    name = "googletest-uninitialized-test_",
-    testonly = 1,
-    srcs = ["googletest-uninitialized-test_.cc"],
-    deps = ["//:gtest"],
-)
-
-py_test(
-    name = "googletest-uninitialized-test",
-    size = "medium",
-    srcs = ["googletest-uninitialized-test.py"],
-    data = ["googletest-uninitialized-test_"],
-    deps = [":gtest_test_utils"],
-)
-
-cc_binary(
-    name = "gtest_testbridge_test_",
-    testonly = 1,
-    srcs = ["gtest_testbridge_test_.cc"],
-    deps = ["//:gtest_main"],
-)
-
-# Tests that filtering via testbridge works
-py_test(
-    name = "gtest_testbridge_test",
-    size = "small",
-    srcs = ["gtest_testbridge_test.py"],
-    data = [":gtest_testbridge_test_"],
-    deps = [":gtest_test_utils"],
-)
-
-py_test(
-    name = "googletest-json-outfiles-test",
-    size = "small",
-    srcs = [
-        "googletest-json-outfiles-test.py",
-        "gtest_json_test_utils.py",
-    ],
-    data = [
-        ":gtest_xml_outfile1_test_",
-        ":gtest_xml_outfile2_test_",
-    ],
-    deps = [":gtest_test_utils"],
-)
-
-py_test(
-    name = "googletest-json-output-unittest",
-    size = "medium",
-    srcs = [
-        "googletest-json-output-unittest.py",
-        "gtest_json_test_utils.py",
-    ],
-    args = select({
-        "//:has_absl": [],
-        "//conditions:default": ["--no_stacktrace_support"],
-    }),
-    data = [
-        # We invoke gtest_no_test_unittest to verify the JSON output
-        # when the test program contains no test definition.
-        ":gtest_no_test_unittest",
-        ":gtest_xml_output_unittest_",
-    ],
-    deps = [":gtest_test_utils"],
-)
-
-# Verifies interaction of death tests and exceptions.
-cc_test(
-    name = "googletest-death-test_ex_catch_test",
-    size = "medium",
-    srcs = ["googletest-death-test_ex_test.cc"],
-    copts = ["-fexceptions"],
-    defines = ["GTEST_ENABLE_CATCH_EXCEPTIONS_=1"],
-    deps = ["//:gtest"],
-)
-
-cc_binary(
-    name = "googletest-param-test-invalid-name1-test_",
-    testonly = 1,
-    srcs = ["googletest-param-test-invalid-name1-test_.cc"],
-    deps = ["//:gtest"],
-)
-
-cc_binary(
-    name = "googletest-param-test-invalid-name2-test_",
-    testonly = 1,
-    srcs = ["googletest-param-test-invalid-name2-test_.cc"],
-    deps = ["//:gtest"],
-)
-
-py_test(
-    name = "googletest-param-test-invalid-name1-test",
-    size = "small",
-    srcs = ["googletest-param-test-invalid-name1-test.py"],
-    data = [":googletest-param-test-invalid-name1-test_"],
-    tags = [
-        "no_test_msvc2015",
-        "no_test_msvc2017",
-    ],
-    deps = [":gtest_test_utils"],
-)
-
-py_test(
-    name = "googletest-param-test-invalid-name2-test",
-    size = "small",
-    srcs = ["googletest-param-test-invalid-name2-test.py"],
-    data = [":googletest-param-test-invalid-name2-test_"],
-    tags = [
-        "no_test_msvc2015",
-        "no_test_msvc2017",
-    ],
-    deps = [":gtest_test_utils"],
-)
diff --git a/3rdparty/googletest-1.13.0/googletest/test/googletest-break-on-failure-unittest.py b/3rdparty/googletest-1.13.0/googletest/test/googletest-break-on-failure-unittest.py
deleted file mode 100644
index 4eafba3e6bb46dfe5f3b07ce1be7b68325430105..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/test/googletest-break-on-failure-unittest.py
+++ /dev/null
@@ -1,208 +0,0 @@
-#!/usr/bin/env python
-#
-# Copyright 2006, Google Inc.
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met:
-#
-#     * Redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer.
-#     * Redistributions in binary form must reproduce the above
-# copyright notice, this list of conditions and the following disclaimer
-# in the documentation and/or other materials provided with the
-# distribution.
-#     * Neither the name of Google Inc. nor the names of its
-# contributors may be used to endorse or promote products derived from
-# this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-"""Unit test for Google Test's break-on-failure mode.
-
-A user can ask Google Test to seg-fault when an assertion fails, using
-either the GTEST_BREAK_ON_FAILURE environment variable or the
---gtest_break_on_failure flag.  This script tests such functionality
-by invoking googletest-break-on-failure-unittest_ (a program written with
-Google Test) with different environments and command line flags.
-"""
-
-import os
-from googletest.test import gtest_test_utils
-
-# Constants.
-
-IS_WINDOWS = os.name == 'nt'
-
-# The environment variable for enabling/disabling the break-on-failure mode.
-BREAK_ON_FAILURE_ENV_VAR = 'GTEST_BREAK_ON_FAILURE'
-
-# The command line flag for enabling/disabling the break-on-failure mode.
-BREAK_ON_FAILURE_FLAG = 'gtest_break_on_failure'
-
-# The environment variable for enabling/disabling the throw-on-failure mode.
-THROW_ON_FAILURE_ENV_VAR = 'GTEST_THROW_ON_FAILURE'
-
-# The environment variable for enabling/disabling the catch-exceptions mode.
-CATCH_EXCEPTIONS_ENV_VAR = 'GTEST_CATCH_EXCEPTIONS'
-
-# Path to the googletest-break-on-failure-unittest_ program.
-EXE_PATH = gtest_test_utils.GetTestExecutablePath(
-    'googletest-break-on-failure-unittest_')
-
-
-environ = gtest_test_utils.environ
-SetEnvVar = gtest_test_utils.SetEnvVar
-
-# Tests in this file run a Google-Test-based test program and expect it
-# to terminate prematurely.  Therefore they are incompatible with
-# the premature-exit-file protocol by design.  Unset the
-# premature-exit filepath to prevent Google Test from creating
-# the file.
-SetEnvVar(gtest_test_utils.PREMATURE_EXIT_FILE_ENV_VAR, None)
-
-
-def Run(command):
-  """Runs a command; returns 1 if it was killed by a signal, or 0 otherwise."""
-
-  p = gtest_test_utils.Subprocess(command, env=environ)
-  if p.terminated_by_signal:
-    return 1
-  else:
-    return 0
-
-
-# The tests.
-
-
-class GTestBreakOnFailureUnitTest(gtest_test_utils.TestCase):
-  """Tests using the GTEST_BREAK_ON_FAILURE environment variable or
-  the --gtest_break_on_failure flag to turn assertion failures into
-  segmentation faults.
-  """
-
-  def RunAndVerify(self, env_var_value, flag_value, expect_seg_fault):
-    """Runs googletest-break-on-failure-unittest_ and verifies that it does
-    (or does not) have a seg-fault.
-
-    Args:
-      env_var_value:    value of the GTEST_BREAK_ON_FAILURE environment
-                        variable; None if the variable should be unset.
-      flag_value:       value of the --gtest_break_on_failure flag;
-                        None if the flag should not be present.
-      expect_seg_fault: 1 if the program is expected to generate a seg-fault;
-                        0 otherwise.
-    """
-
-    SetEnvVar(BREAK_ON_FAILURE_ENV_VAR, env_var_value)
-
-    if env_var_value is None:
-      env_var_value_msg = ' is not set'
-    else:
-      env_var_value_msg = '=' + env_var_value
-
-    if flag_value is None:
-      flag = ''
-    elif flag_value == '0':
-      flag = '--%s=0' % BREAK_ON_FAILURE_FLAG
-    else:
-      flag = '--%s' % BREAK_ON_FAILURE_FLAG
-
-    command = [EXE_PATH]
-    if flag:
-      command.append(flag)
-
-    if expect_seg_fault:
-      should_or_not = 'should'
-    else:
-      should_or_not = 'should not'
-
-    has_seg_fault = Run(command)
-
-    SetEnvVar(BREAK_ON_FAILURE_ENV_VAR, None)
-
-    msg = ('when %s%s, an assertion failure in "%s" %s cause a seg-fault.' %
-           (BREAK_ON_FAILURE_ENV_VAR, env_var_value_msg, ' '.join(command),
-            should_or_not))
-    self.assert_(has_seg_fault == expect_seg_fault, msg)
-
-  def testDefaultBehavior(self):
-    """Tests the behavior of the default mode."""
-
-    self.RunAndVerify(env_var_value=None,
-                      flag_value=None,
-                      expect_seg_fault=0)
-
-  def testEnvVar(self):
-    """Tests using the GTEST_BREAK_ON_FAILURE environment variable."""
-
-    self.RunAndVerify(env_var_value='0',
-                      flag_value=None,
-                      expect_seg_fault=0)
-    self.RunAndVerify(env_var_value='1',
-                      flag_value=None,
-                      expect_seg_fault=1)
-
-  def testFlag(self):
-    """Tests using the --gtest_break_on_failure flag."""
-
-    self.RunAndVerify(env_var_value=None,
-                      flag_value='0',
-                      expect_seg_fault=0)
-    self.RunAndVerify(env_var_value=None,
-                      flag_value='1',
-                      expect_seg_fault=1)
-
-  def testFlagOverridesEnvVar(self):
-    """Tests that the flag overrides the environment variable."""
-
-    self.RunAndVerify(env_var_value='0',
-                      flag_value='0',
-                      expect_seg_fault=0)
-    self.RunAndVerify(env_var_value='0',
-                      flag_value='1',
-                      expect_seg_fault=1)
-    self.RunAndVerify(env_var_value='1',
-                      flag_value='0',
-                      expect_seg_fault=0)
-    self.RunAndVerify(env_var_value='1',
-                      flag_value='1',
-                      expect_seg_fault=1)
-
-  def testBreakOnFailureOverridesThrowOnFailure(self):
-    """Tests that gtest_break_on_failure overrides gtest_throw_on_failure."""
-
-    SetEnvVar(THROW_ON_FAILURE_ENV_VAR, '1')
-    try:
-      self.RunAndVerify(env_var_value=None,
-                        flag_value='1',
-                        expect_seg_fault=1)
-    finally:
-      SetEnvVar(THROW_ON_FAILURE_ENV_VAR, None)
-
-  if IS_WINDOWS:
-    def testCatchExceptionsDoesNotInterfere(self):
-      """Tests that gtest_catch_exceptions doesn't interfere."""
-
-      SetEnvVar(CATCH_EXCEPTIONS_ENV_VAR, '1')
-      try:
-        self.RunAndVerify(env_var_value='1',
-                          flag_value='1',
-                          expect_seg_fault=1)
-      finally:
-        SetEnvVar(CATCH_EXCEPTIONS_ENV_VAR, None)
-
-
-if __name__ == '__main__':
-  gtest_test_utils.Main()
diff --git a/3rdparty/googletest-1.13.0/googletest/test/googletest-break-on-failure-unittest_.cc b/3rdparty/googletest-1.13.0/googletest/test/googletest-break-on-failure-unittest_.cc
deleted file mode 100644
index 324294f33ec7a2b8ad8cbcc608e6aa2dea252d58..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/test/googletest-break-on-failure-unittest_.cc
+++ /dev/null
@@ -1,83 +0,0 @@
-// Copyright 2006, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// Unit test for Google Test's break-on-failure mode.
-//
-// A user can ask Google Test to seg-fault when an assertion fails, using
-// either the GTEST_BREAK_ON_FAILURE environment variable or the
-// --gtest_break_on_failure flag.  This file is used for testing such
-// functionality.
-//
-// This program will be invoked from a Python unit test.  It is
-// expected to fail.  Don't run it directly.
-
-#include "gtest/gtest.h"
-
-#if GTEST_OS_WINDOWS
-#include <stdlib.h>
-#include <windows.h>
-#endif
-
-namespace {
-
-// A test that's expected to fail.
-TEST(Foo, Bar) { EXPECT_EQ(2, 3); }
-
-#if GTEST_HAS_SEH && !GTEST_OS_WINDOWS_MOBILE
-// On Windows Mobile global exception handlers are not supported.
-LONG WINAPI
-ExitWithExceptionCode(struct _EXCEPTION_POINTERS* exception_pointers) {
-  exit(exception_pointers->ExceptionRecord->ExceptionCode);
-}
-#endif
-
-}  // namespace
-
-int main(int argc, char** argv) {
-#if GTEST_OS_WINDOWS
-  // Suppresses display of the Windows error dialog upon encountering
-  // a general protection fault (segment violation).
-  SetErrorMode(SEM_NOGPFAULTERRORBOX | SEM_FAILCRITICALERRORS);
-
-#if GTEST_HAS_SEH && !GTEST_OS_WINDOWS_MOBILE
-
-  // The default unhandled exception filter does not always exit
-  // with the exception code as exit code - for example it exits with
-  // 0 for EXCEPTION_ACCESS_VIOLATION and 1 for EXCEPTION_BREAKPOINT
-  // if the application is compiled in debug mode. Thus we use our own
-  // filter which always exits with the exception code for unhandled
-  // exceptions.
-  SetUnhandledExceptionFilter(ExitWithExceptionCode);
-
-#endif
-#endif  // GTEST_OS_WINDOWS
-  testing::InitGoogleTest(&argc, argv);
-
-  return RUN_ALL_TESTS();
-}
diff --git a/3rdparty/googletest-1.13.0/googletest/test/googletest-catch-exceptions-test.py b/3rdparty/googletest-1.13.0/googletest/test/googletest-catch-exceptions-test.py
deleted file mode 100644
index d38d91a62a43cb1a0bce169c4b747dbbf042d6d6..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/test/googletest-catch-exceptions-test.py
+++ /dev/null
@@ -1,236 +0,0 @@
-#!/usr/bin/env python
-#
-# Copyright 2010 Google Inc.  All Rights Reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met:
-#
-#     * Redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer.
-#     * Redistributions in binary form must reproduce the above
-# copyright notice, this list of conditions and the following disclaimer
-# in the documentation and/or other materials provided with the
-# distribution.
-#     * Neither the name of Google Inc. nor the names of its
-# contributors may be used to endorse or promote products derived from
-# this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-"""Tests Google Test's exception catching behavior.
-
-This script invokes googletest-catch-exceptions-test_ and
-googletest-catch-exceptions-ex-test_ (programs written with
-Google Test) and verifies their output.
-"""
-
-from googletest.test import gtest_test_utils
-
-# Constants.
-FLAG_PREFIX = '--gtest_'
-LIST_TESTS_FLAG = FLAG_PREFIX + 'list_tests'
-NO_CATCH_EXCEPTIONS_FLAG = FLAG_PREFIX + 'catch_exceptions=0'
-FILTER_FLAG = FLAG_PREFIX + 'filter'
-
-# Path to the googletest-catch-exceptions-ex-test_ binary, compiled with
-# exceptions enabled.
-EX_EXE_PATH = gtest_test_utils.GetTestExecutablePath(
-    'googletest-catch-exceptions-ex-test_')
-
-# Path to the googletest-catch-exceptions-test_ binary, compiled with
-# exceptions disabled.
-EXE_PATH = gtest_test_utils.GetTestExecutablePath(
-    'googletest-catch-exceptions-no-ex-test_')
-
-environ = gtest_test_utils.environ
-SetEnvVar = gtest_test_utils.SetEnvVar
-
-# Tests in this file run a Google-Test-based test program and expect it
-# to terminate prematurely.  Therefore they are incompatible with
-# the premature-exit-file protocol by design.  Unset the
-# premature-exit filepath to prevent Google Test from creating
-# the file.
-SetEnvVar(gtest_test_utils.PREMATURE_EXIT_FILE_ENV_VAR, None)
-
-TEST_LIST = gtest_test_utils.Subprocess(
-    [EXE_PATH, LIST_TESTS_FLAG], env=environ).output
-
-SUPPORTS_SEH_EXCEPTIONS = 'ThrowsSehException' in TEST_LIST
-
-if SUPPORTS_SEH_EXCEPTIONS:
-  BINARY_OUTPUT = gtest_test_utils.Subprocess([EXE_PATH], env=environ).output
-
-EX_BINARY_OUTPUT = gtest_test_utils.Subprocess(
-    [EX_EXE_PATH], env=environ).output
-
-
-# The tests.
-if SUPPORTS_SEH_EXCEPTIONS:
-  # pylint:disable-msg=C6302
-  class CatchSehExceptionsTest(gtest_test_utils.TestCase):
-    """Tests exception-catching behavior."""
-
-
-    def TestSehExceptions(self, test_output):
-      self.assert_('SEH exception with code 0x2a thrown '
-                   'in the test fixture\'s constructor'
-                   in test_output)
-      self.assert_('SEH exception with code 0x2a thrown '
-                   'in the test fixture\'s destructor'
-                   in test_output)
-      self.assert_('SEH exception with code 0x2a thrown in SetUpTestSuite()'
-                   in test_output)
-      self.assert_('SEH exception with code 0x2a thrown in TearDownTestSuite()'
-                   in test_output)
-      self.assert_('SEH exception with code 0x2a thrown in SetUp()'
-                   in test_output)
-      self.assert_('SEH exception with code 0x2a thrown in TearDown()'
-                   in test_output)
-      self.assert_('SEH exception with code 0x2a thrown in the test body'
-                   in test_output)
-
-    def testCatchesSehExceptionsWithCxxExceptionsEnabled(self):
-      self.TestSehExceptions(EX_BINARY_OUTPUT)
-
-    def testCatchesSehExceptionsWithCxxExceptionsDisabled(self):
-      self.TestSehExceptions(BINARY_OUTPUT)
-
-
-class CatchCxxExceptionsTest(gtest_test_utils.TestCase):
-  """Tests C++ exception-catching behavior.
-
-     Tests in this test case verify that:
-     * C++ exceptions are caught and logged as C++ (not SEH) exceptions
-     * Exception thrown affect the remainder of the test work flow in the
-       expected manner.
-  """
-
-  def testCatchesCxxExceptionsInFixtureConstructor(self):
-    self.assertTrue(
-        'C++ exception with description '
-        '"Standard C++ exception" thrown '
-        'in the test fixture\'s constructor' in EX_BINARY_OUTPUT,
-        EX_BINARY_OUTPUT)
-    self.assert_('unexpected' not in EX_BINARY_OUTPUT,
-                 'This failure belongs in this test only if '
-                 '"CxxExceptionInConstructorTest" (no quotes) '
-                 'appears on the same line as words "called unexpectedly"')
-
-  if ('CxxExceptionInDestructorTest.ThrowsExceptionInDestructor' in
-      EX_BINARY_OUTPUT):
-
-    def testCatchesCxxExceptionsInFixtureDestructor(self):
-      self.assertTrue(
-          'C++ exception with description '
-          '"Standard C++ exception" thrown '
-          'in the test fixture\'s destructor' in EX_BINARY_OUTPUT,
-          EX_BINARY_OUTPUT)
-      self.assertTrue(
-          'CxxExceptionInDestructorTest::TearDownTestSuite() '
-          'called as expected.' in EX_BINARY_OUTPUT, EX_BINARY_OUTPUT)
-
-  def testCatchesCxxExceptionsInSetUpTestCase(self):
-    self.assertTrue(
-        'C++ exception with description "Standard C++ exception"'
-        ' thrown in SetUpTestSuite()' in EX_BINARY_OUTPUT, EX_BINARY_OUTPUT)
-    self.assertTrue(
-        'CxxExceptionInConstructorTest::TearDownTestSuite() '
-        'called as expected.' in EX_BINARY_OUTPUT, EX_BINARY_OUTPUT)
-    self.assertFalse(
-        'CxxExceptionInSetUpTestSuiteTest constructor '
-        'called as expected.' in EX_BINARY_OUTPUT, EX_BINARY_OUTPUT)
-    self.assertFalse(
-        'CxxExceptionInSetUpTestSuiteTest destructor '
-        'called as expected.' in EX_BINARY_OUTPUT, EX_BINARY_OUTPUT)
-    self.assertFalse(
-        'CxxExceptionInSetUpTestSuiteTest::SetUp() '
-        'called as expected.' in EX_BINARY_OUTPUT, EX_BINARY_OUTPUT)
-    self.assertFalse(
-        'CxxExceptionInSetUpTestSuiteTest::TearDown() '
-        'called as expected.' in EX_BINARY_OUTPUT, EX_BINARY_OUTPUT)
-    self.assertFalse(
-        'CxxExceptionInSetUpTestSuiteTest test body '
-        'called as expected.' in EX_BINARY_OUTPUT, EX_BINARY_OUTPUT)
-
-  def testCatchesCxxExceptionsInTearDownTestCase(self):
-    self.assertTrue(
-        'C++ exception with description "Standard C++ exception"'
-        ' thrown in TearDownTestSuite()' in EX_BINARY_OUTPUT, EX_BINARY_OUTPUT)
-
-  def testCatchesCxxExceptionsInSetUp(self):
-    self.assertTrue(
-        'C++ exception with description "Standard C++ exception"'
-        ' thrown in SetUp()' in EX_BINARY_OUTPUT, EX_BINARY_OUTPUT)
-    self.assertTrue(
-        'CxxExceptionInSetUpTest::TearDownTestSuite() '
-        'called as expected.' in EX_BINARY_OUTPUT, EX_BINARY_OUTPUT)
-    self.assertTrue(
-        'CxxExceptionInSetUpTest destructor '
-        'called as expected.' in EX_BINARY_OUTPUT, EX_BINARY_OUTPUT)
-    self.assertTrue(
-        'CxxExceptionInSetUpTest::TearDown() '
-        'called as expected.' in EX_BINARY_OUTPUT, EX_BINARY_OUTPUT)
-    self.assert_('unexpected' not in EX_BINARY_OUTPUT,
-                 'This failure belongs in this test only if '
-                 '"CxxExceptionInSetUpTest" (no quotes) '
-                 'appears on the same line as words "called unexpectedly"')
-
-  def testCatchesCxxExceptionsInTearDown(self):
-    self.assertTrue(
-        'C++ exception with description "Standard C++ exception"'
-        ' thrown in TearDown()' in EX_BINARY_OUTPUT, EX_BINARY_OUTPUT)
-    self.assertTrue(
-        'CxxExceptionInTearDownTest::TearDownTestSuite() '
-        'called as expected.' in EX_BINARY_OUTPUT, EX_BINARY_OUTPUT)
-    self.assertTrue(
-        'CxxExceptionInTearDownTest destructor '
-        'called as expected.' in EX_BINARY_OUTPUT, EX_BINARY_OUTPUT)
-
-  def testCatchesCxxExceptionsInTestBody(self):
-    self.assertTrue(
-        'C++ exception with description "Standard C++ exception"'
-        ' thrown in the test body' in EX_BINARY_OUTPUT, EX_BINARY_OUTPUT)
-    self.assertTrue(
-        'CxxExceptionInTestBodyTest::TearDownTestSuite() '
-        'called as expected.' in EX_BINARY_OUTPUT, EX_BINARY_OUTPUT)
-    self.assertTrue(
-        'CxxExceptionInTestBodyTest destructor '
-        'called as expected.' in EX_BINARY_OUTPUT, EX_BINARY_OUTPUT)
-    self.assertTrue(
-        'CxxExceptionInTestBodyTest::TearDown() '
-        'called as expected.' in EX_BINARY_OUTPUT, EX_BINARY_OUTPUT)
-
-  def testCatchesNonStdCxxExceptions(self):
-    self.assertTrue(
-        'Unknown C++ exception thrown in the test body' in EX_BINARY_OUTPUT,
-        EX_BINARY_OUTPUT)
-
-  def testUnhandledCxxExceptionsAbortTheProgram(self):
-    # Filters out SEH exception tests on Windows. Unhandled SEH exceptions
-    # cause tests to show pop-up windows there.
-    FITLER_OUT_SEH_TESTS_FLAG = FILTER_FLAG + '=-*Seh*'
-    # By default, Google Test doesn't catch the exceptions.
-    uncaught_exceptions_ex_binary_output = gtest_test_utils.Subprocess(
-        [EX_EXE_PATH,
-         NO_CATCH_EXCEPTIONS_FLAG,
-         FITLER_OUT_SEH_TESTS_FLAG],
-        env=environ).output
-
-    self.assert_('Unhandled C++ exception terminating the program'
-                 in uncaught_exceptions_ex_binary_output)
-    self.assert_('unexpected' not in uncaught_exceptions_ex_binary_output)
-
-
-if __name__ == '__main__':
-  gtest_test_utils.Main()
diff --git a/3rdparty/googletest-1.13.0/googletest/test/googletest-catch-exceptions-test_.cc b/3rdparty/googletest-1.13.0/googletest/test/googletest-catch-exceptions-test_.cc
deleted file mode 100644
index 3c8f4f4b790cf063897c9b9cd1b11806dedd8167..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/test/googletest-catch-exceptions-test_.cc
+++ /dev/null
@@ -1,289 +0,0 @@
-// Copyright 2010, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-//
-// Tests for Google Test itself. Tests in this file throw C++ or SEH
-// exceptions, and the output is verified by
-// googletest-catch-exceptions-test.py.
-
-#include <stdio.h>   // NOLINT
-#include <stdlib.h>  // For exit().
-
-#include "gtest/gtest.h"
-
-#if GTEST_HAS_SEH
-#include <windows.h>
-#endif
-
-#if GTEST_HAS_EXCEPTIONS
-#include <exception>  // For set_terminate().
-#include <stdexcept>
-#endif
-
-using testing::Test;
-
-#if GTEST_HAS_SEH
-
-class SehExceptionInConstructorTest : public Test {
- public:
-  SehExceptionInConstructorTest() { RaiseException(42, 0, 0, NULL); }
-};
-
-TEST_F(SehExceptionInConstructorTest, ThrowsExceptionInConstructor) {}
-
-class SehExceptionInDestructorTest : public Test {
- public:
-  ~SehExceptionInDestructorTest() { RaiseException(42, 0, 0, NULL); }
-};
-
-TEST_F(SehExceptionInDestructorTest, ThrowsExceptionInDestructor) {}
-
-class SehExceptionInSetUpTestSuiteTest : public Test {
- public:
-  static void SetUpTestSuite() { RaiseException(42, 0, 0, NULL); }
-};
-
-TEST_F(SehExceptionInSetUpTestSuiteTest, ThrowsExceptionInSetUpTestSuite) {}
-
-class SehExceptionInTearDownTestSuiteTest : public Test {
- public:
-  static void TearDownTestSuite() { RaiseException(42, 0, 0, NULL); }
-};
-
-TEST_F(SehExceptionInTearDownTestSuiteTest,
-       ThrowsExceptionInTearDownTestSuite) {}
-
-class SehExceptionInSetUpTest : public Test {
- protected:
-  virtual void SetUp() { RaiseException(42, 0, 0, NULL); }
-};
-
-TEST_F(SehExceptionInSetUpTest, ThrowsExceptionInSetUp) {}
-
-class SehExceptionInTearDownTest : public Test {
- protected:
-  virtual void TearDown() { RaiseException(42, 0, 0, NULL); }
-};
-
-TEST_F(SehExceptionInTearDownTest, ThrowsExceptionInTearDown) {}
-
-TEST(SehExceptionTest, ThrowsSehException) { RaiseException(42, 0, 0, NULL); }
-
-#endif  // GTEST_HAS_SEH
-
-#if GTEST_HAS_EXCEPTIONS
-
-class CxxExceptionInConstructorTest : public Test {
- public:
-  CxxExceptionInConstructorTest() {
-    // Without this macro VC++ complains about unreachable code at the end of
-    // the constructor.
-    GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(
-        throw std::runtime_error("Standard C++ exception"));
-  }
-
-  static void TearDownTestSuite() {
-    printf("%s",
-           "CxxExceptionInConstructorTest::TearDownTestSuite() "
-           "called as expected.\n");
-  }
-
- protected:
-  ~CxxExceptionInConstructorTest() override {
-    ADD_FAILURE() << "CxxExceptionInConstructorTest destructor "
-                  << "called unexpectedly.";
-  }
-
-  void SetUp() override {
-    ADD_FAILURE() << "CxxExceptionInConstructorTest::SetUp() "
-                  << "called unexpectedly.";
-  }
-
-  void TearDown() override {
-    ADD_FAILURE() << "CxxExceptionInConstructorTest::TearDown() "
-                  << "called unexpectedly.";
-  }
-};
-
-TEST_F(CxxExceptionInConstructorTest, ThrowsExceptionInConstructor) {
-  ADD_FAILURE() << "CxxExceptionInConstructorTest test body "
-                << "called unexpectedly.";
-}
-
-class CxxExceptionInSetUpTestSuiteTest : public Test {
- public:
-  CxxExceptionInSetUpTestSuiteTest() {
-    printf("%s",
-           "CxxExceptionInSetUpTestSuiteTest constructor "
-           "called as expected.\n");
-  }
-
-  static void SetUpTestSuite() {
-    throw std::runtime_error("Standard C++ exception");
-  }
-
-  static void TearDownTestSuite() {
-    printf("%s",
-           "CxxExceptionInSetUpTestSuiteTest::TearDownTestSuite() "
-           "called as expected.\n");
-  }
-
- protected:
-  ~CxxExceptionInSetUpTestSuiteTest() override {
-    printf("%s",
-           "CxxExceptionInSetUpTestSuiteTest destructor "
-           "called as expected.\n");
-  }
-
-  void SetUp() override {
-    printf("%s",
-           "CxxExceptionInSetUpTestSuiteTest::SetUp() "
-           "called as expected.\n");
-  }
-
-  void TearDown() override {
-    printf("%s",
-           "CxxExceptionInSetUpTestSuiteTest::TearDown() "
-           "called as expected.\n");
-  }
-};
-
-TEST_F(CxxExceptionInSetUpTestSuiteTest, ThrowsExceptionInSetUpTestSuite) {
-  printf("%s",
-         "CxxExceptionInSetUpTestSuiteTest test body "
-         "called as expected.\n");
-}
-
-class CxxExceptionInTearDownTestSuiteTest : public Test {
- public:
-  static void TearDownTestSuite() {
-    throw std::runtime_error("Standard C++ exception");
-  }
-};
-
-TEST_F(CxxExceptionInTearDownTestSuiteTest,
-       ThrowsExceptionInTearDownTestSuite) {}
-
-class CxxExceptionInSetUpTest : public Test {
- public:
-  static void TearDownTestSuite() {
-    printf("%s",
-           "CxxExceptionInSetUpTest::TearDownTestSuite() "
-           "called as expected.\n");
-  }
-
- protected:
-  ~CxxExceptionInSetUpTest() override {
-    printf("%s",
-           "CxxExceptionInSetUpTest destructor "
-           "called as expected.\n");
-  }
-
-  void SetUp() override { throw std::runtime_error("Standard C++ exception"); }
-
-  void TearDown() override {
-    printf("%s",
-           "CxxExceptionInSetUpTest::TearDown() "
-           "called as expected.\n");
-  }
-};
-
-TEST_F(CxxExceptionInSetUpTest, ThrowsExceptionInSetUp) {
-  ADD_FAILURE() << "CxxExceptionInSetUpTest test body "
-                << "called unexpectedly.";
-}
-
-class CxxExceptionInTearDownTest : public Test {
- public:
-  static void TearDownTestSuite() {
-    printf("%s",
-           "CxxExceptionInTearDownTest::TearDownTestSuite() "
-           "called as expected.\n");
-  }
-
- protected:
-  ~CxxExceptionInTearDownTest() override {
-    printf("%s",
-           "CxxExceptionInTearDownTest destructor "
-           "called as expected.\n");
-  }
-
-  void TearDown() override {
-    throw std::runtime_error("Standard C++ exception");
-  }
-};
-
-TEST_F(CxxExceptionInTearDownTest, ThrowsExceptionInTearDown) {}
-
-class CxxExceptionInTestBodyTest : public Test {
- public:
-  static void TearDownTestSuite() {
-    printf("%s",
-           "CxxExceptionInTestBodyTest::TearDownTestSuite() "
-           "called as expected.\n");
-  }
-
- protected:
-  ~CxxExceptionInTestBodyTest() override {
-    printf("%s",
-           "CxxExceptionInTestBodyTest destructor "
-           "called as expected.\n");
-  }
-
-  void TearDown() override {
-    printf("%s",
-           "CxxExceptionInTestBodyTest::TearDown() "
-           "called as expected.\n");
-  }
-};
-
-TEST_F(CxxExceptionInTestBodyTest, ThrowsStdCxxException) {
-  throw std::runtime_error("Standard C++ exception");
-}
-
-TEST(CxxExceptionTest, ThrowsNonStdCxxException) { throw "C-string"; }
-
-// This terminate handler aborts the program using exit() rather than abort().
-// This avoids showing pop-ups on Windows systems and core dumps on Unix-like
-// ones.
-void TerminateHandler() {
-  fprintf(stderr, "%s\n", "Unhandled C++ exception terminating the program.");
-  fflush(nullptr);
-  exit(3);
-}
-
-#endif  // GTEST_HAS_EXCEPTIONS
-
-int main(int argc, char** argv) {
-#if GTEST_HAS_EXCEPTIONS
-  std::set_terminate(&TerminateHandler);
-#endif
-  testing::InitGoogleTest(&argc, argv);
-  return RUN_ALL_TESTS();
-}
diff --git a/3rdparty/googletest-1.13.0/googletest/test/googletest-color-test.py b/3rdparty/googletest-1.13.0/googletest/test/googletest-color-test.py
deleted file mode 100644
index f08edf707f1c056ded6542fbd4b21a86f422a9c0..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/test/googletest-color-test.py
+++ /dev/null
@@ -1,128 +0,0 @@
-#!/usr/bin/env python
-#
-# Copyright 2008, Google Inc.
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met:
-#
-#     * Redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer.
-#     * Redistributions in binary form must reproduce the above
-# copyright notice, this list of conditions and the following disclaimer
-# in the documentation and/or other materials provided with the
-# distribution.
-#     * Neither the name of Google Inc. nor the names of its
-# contributors may be used to endorse or promote products derived from
-# this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-"""Verifies that Google Test correctly determines whether to use colors."""
-
-import os
-from googletest.test import gtest_test_utils
-
-IS_WINDOWS = os.name == 'nt'
-
-COLOR_ENV_VAR = 'GTEST_COLOR'
-COLOR_FLAG = 'gtest_color'
-COMMAND = gtest_test_utils.GetTestExecutablePath('googletest-color-test_')
-
-
-def SetEnvVar(env_var, value):
-  """Sets the env variable to 'value'; unsets it when 'value' is None."""
-
-  if value is not None:
-    os.environ[env_var] = value
-  elif env_var in os.environ:
-    del os.environ[env_var]
-
-
-def UsesColor(term, color_env_var, color_flag):
-  """Runs googletest-color-test_ and returns its exit code."""
-
-  SetEnvVar('TERM', term)
-  SetEnvVar(COLOR_ENV_VAR, color_env_var)
-
-  if color_flag is None:
-    args = []
-  else:
-    args = ['--%s=%s' % (COLOR_FLAG, color_flag)]
-  p = gtest_test_utils.Subprocess([COMMAND] + args)
-  return not p.exited or p.exit_code
-
-
-class GTestColorTest(gtest_test_utils.TestCase):
-  def testNoEnvVarNoFlag(self):
-    """Tests the case when there's neither GTEST_COLOR nor --gtest_color."""
-
-    if not IS_WINDOWS:
-      self.assert_(not UsesColor('dumb', None, None))
-      self.assert_(not UsesColor('emacs', None, None))
-      self.assert_(not UsesColor('xterm-mono', None, None))
-      self.assert_(not UsesColor('unknown', None, None))
-      self.assert_(not UsesColor(None, None, None))
-    self.assert_(UsesColor('linux', None, None))
-    self.assert_(UsesColor('cygwin', None, None))
-    self.assert_(UsesColor('xterm', None, None))
-    self.assert_(UsesColor('xterm-color', None, None))
-    self.assert_(UsesColor('xterm-kitty', None, None))
-    self.assert_(UsesColor('xterm-256color', None, None))
-
-  def testFlagOnly(self):
-    """Tests the case when there's --gtest_color but not GTEST_COLOR."""
-
-    self.assert_(not UsesColor('dumb', None, 'no'))
-    self.assert_(not UsesColor('xterm-color', None, 'no'))
-    if not IS_WINDOWS:
-      self.assert_(not UsesColor('emacs', None, 'auto'))
-    self.assert_(UsesColor('xterm', None, 'auto'))
-    self.assert_(UsesColor('dumb', None, 'yes'))
-    self.assert_(UsesColor('xterm', None, 'yes'))
-
-  def testEnvVarOnly(self):
-    """Tests the case when there's GTEST_COLOR but not --gtest_color."""
-
-    self.assert_(not UsesColor('dumb', 'no', None))
-    self.assert_(not UsesColor('xterm-color', 'no', None))
-    if not IS_WINDOWS:
-      self.assert_(not UsesColor('dumb', 'auto', None))
-    self.assert_(UsesColor('xterm-color', 'auto', None))
-    self.assert_(UsesColor('dumb', 'yes', None))
-    self.assert_(UsesColor('xterm-color', 'yes', None))
-
-  def testEnvVarAndFlag(self):
-    """Tests the case when there are both GTEST_COLOR and --gtest_color."""
-
-    self.assert_(not UsesColor('xterm-color', 'no', 'no'))
-    self.assert_(UsesColor('dumb', 'no', 'yes'))
-    self.assert_(UsesColor('xterm-color', 'no', 'auto'))
-
-  def testAliasesOfYesAndNo(self):
-    """Tests using aliases in specifying --gtest_color."""
-
-    self.assert_(UsesColor('dumb', None, 'true'))
-    self.assert_(UsesColor('dumb', None, 'YES'))
-    self.assert_(UsesColor('dumb', None, 'T'))
-    self.assert_(UsesColor('dumb', None, '1'))
-
-    self.assert_(not UsesColor('xterm', None, 'f'))
-    self.assert_(not UsesColor('xterm', None, 'false'))
-    self.assert_(not UsesColor('xterm', None, '0'))
-    self.assert_(not UsesColor('xterm', None, 'unknown'))
-
-
-if __name__ == '__main__':
-  gtest_test_utils.Main()
diff --git a/3rdparty/googletest-1.13.0/googletest/test/googletest-color-test_.cc b/3rdparty/googletest-1.13.0/googletest/test/googletest-color-test_.cc
deleted file mode 100644
index 55657b7262b6ba8fd65b76cfe9d8b707304e64a8..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/test/googletest-color-test_.cc
+++ /dev/null
@@ -1,60 +0,0 @@
-// Copyright 2008, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// A helper program for testing how Google Test determines whether to use
-// colors in the output.  It prints "YES" and returns 1 if Google Test
-// decides to use colors, and prints "NO" and returns 0 otherwise.
-
-#include <stdio.h>
-
-#include "gtest/gtest.h"
-#include "src/gtest-internal-inl.h"
-
-using testing::internal::ShouldUseColor;
-
-// The purpose of this is to ensure that the UnitTest singleton is
-// created before main() is entered, and thus that ShouldUseColor()
-// works the same way as in a real Google-Test-based test.  We don't actual
-// run the TEST itself.
-TEST(GTestColorTest, Dummy) {}
-
-int main(int argc, char** argv) {
-  testing::InitGoogleTest(&argc, argv);
-
-  if (ShouldUseColor(true)) {
-    // Google Test decides to use colors in the output (assuming it
-    // goes to a TTY).
-    printf("YES\n");
-    return 1;
-  } else {
-    // Google Test decides not to use colors in the output.
-    printf("NO\n");
-    return 0;
-  }
-}
diff --git a/3rdparty/googletest-1.13.0/googletest/test/googletest-death-test-test.cc b/3rdparty/googletest-1.13.0/googletest/test/googletest-death-test-test.cc
deleted file mode 100644
index 4737ff9bfb6b7f8563eb0c23ca564e02b4d0fd45..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/test/googletest-death-test-test.cc
+++ /dev/null
@@ -1,1526 +0,0 @@
-// Copyright 2005, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-//
-// Tests for death tests.
-
-#include "gtest/gtest-death-test.h"
-#include "gtest/gtest.h"
-#include "gtest/internal/gtest-filepath.h"
-
-using testing::internal::AlwaysFalse;
-using testing::internal::AlwaysTrue;
-
-#if GTEST_HAS_DEATH_TEST
-
-#if GTEST_OS_WINDOWS
-#include <direct.h>  // For chdir().
-#include <fcntl.h>   // For O_BINARY
-#include <io.h>
-#else
-#include <sys/wait.h>  // For waitpid.
-#include <unistd.h>
-#endif  // GTEST_OS_WINDOWS
-
-#include <limits.h>
-#include <signal.h>
-#include <stdio.h>
-
-#if GTEST_OS_LINUX
-#include <sys/time.h>
-#endif  // GTEST_OS_LINUX
-
-#include "gtest/gtest-spi.h"
-#include "src/gtest-internal-inl.h"
-
-namespace posix = ::testing::internal::posix;
-
-using testing::ContainsRegex;
-using testing::Matcher;
-using testing::Message;
-using testing::internal::DeathTest;
-using testing::internal::DeathTestFactory;
-using testing::internal::FilePath;
-using testing::internal::GetLastErrnoDescription;
-using testing::internal::GetUnitTestImpl;
-using testing::internal::InDeathTestChild;
-using testing::internal::ParseNaturalNumber;
-
-namespace testing {
-namespace internal {
-
-// A helper class whose objects replace the death test factory for a
-// single UnitTest object during their lifetimes.
-class ReplaceDeathTestFactory {
- public:
-  explicit ReplaceDeathTestFactory(DeathTestFactory* new_factory)
-      : unit_test_impl_(GetUnitTestImpl()) {
-    old_factory_ = unit_test_impl_->death_test_factory_.release();
-    unit_test_impl_->death_test_factory_.reset(new_factory);
-  }
-
-  ~ReplaceDeathTestFactory() {
-    unit_test_impl_->death_test_factory_.release();
-    unit_test_impl_->death_test_factory_.reset(old_factory_);
-  }
-
- private:
-  // Prevents copying ReplaceDeathTestFactory objects.
-  ReplaceDeathTestFactory(const ReplaceDeathTestFactory&);
-  void operator=(const ReplaceDeathTestFactory&);
-
-  UnitTestImpl* unit_test_impl_;
-  DeathTestFactory* old_factory_;
-};
-
-}  // namespace internal
-}  // namespace testing
-
-namespace {
-
-void DieWithMessage(const ::std::string& message) {
-  fprintf(stderr, "%s", message.c_str());
-  fflush(stderr);  // Make sure the text is printed before the process exits.
-
-  // We call _exit() instead of exit(), as the former is a direct
-  // system call and thus safer in the presence of threads.  exit()
-  // will invoke user-defined exit-hooks, which may do dangerous
-  // things that conflict with death tests.
-  //
-  // Some compilers can recognize that _exit() never returns and issue the
-  // 'unreachable code' warning for code following this function, unless
-  // fooled by a fake condition.
-  if (AlwaysTrue()) _exit(1);
-}
-
-void DieInside(const ::std::string& function) {
-  DieWithMessage("death inside " + function + "().");
-}
-
-// Tests that death tests work.
-
-class TestForDeathTest : public testing::Test {
- protected:
-  TestForDeathTest() : original_dir_(FilePath::GetCurrentDir()) {}
-
-  ~TestForDeathTest() override { posix::ChDir(original_dir_.c_str()); }
-
-  // A static member function that's expected to die.
-  static void StaticMemberFunction() { DieInside("StaticMemberFunction"); }
-
-  // A method of the test fixture that may die.
-  void MemberFunction() {
-    if (should_die_) DieInside("MemberFunction");
-  }
-
-  // True if and only if MemberFunction() should die.
-  bool should_die_;
-  const FilePath original_dir_;
-};
-
-// A class with a member function that may die.
-class MayDie {
- public:
-  explicit MayDie(bool should_die) : should_die_(should_die) {}
-
-  // A member function that may die.
-  void MemberFunction() const {
-    if (should_die_) DieInside("MayDie::MemberFunction");
-  }
-
- private:
-  // True if and only if MemberFunction() should die.
-  bool should_die_;
-};
-
-// A global function that's expected to die.
-void GlobalFunction() { DieInside("GlobalFunction"); }
-
-// A non-void function that's expected to die.
-int NonVoidFunction() {
-  DieInside("NonVoidFunction");
-  return 1;
-}
-
-// A unary function that may die.
-void DieIf(bool should_die) {
-  if (should_die) DieInside("DieIf");
-}
-
-// A binary function that may die.
-bool DieIfLessThan(int x, int y) {
-  if (x < y) {
-    DieInside("DieIfLessThan");
-  }
-  return true;
-}
-
-// Tests that ASSERT_DEATH can be used outside a TEST, TEST_F, or test fixture.
-void DeathTestSubroutine() {
-  EXPECT_DEATH(GlobalFunction(), "death.*GlobalFunction");
-  ASSERT_DEATH(GlobalFunction(), "death.*GlobalFunction");
-}
-
-// Death in dbg, not opt.
-int DieInDebugElse12(int* sideeffect) {
-  if (sideeffect) *sideeffect = 12;
-
-#ifndef NDEBUG
-
-  DieInside("DieInDebugElse12");
-
-#endif  // NDEBUG
-
-  return 12;
-}
-
-#if GTEST_OS_WINDOWS
-
-// Death in dbg due to Windows CRT assertion failure, not opt.
-int DieInCRTDebugElse12(int* sideeffect) {
-  if (sideeffect) *sideeffect = 12;
-
-  // Create an invalid fd by closing a valid one
-  int fdpipe[2];
-  EXPECT_EQ(_pipe(fdpipe, 256, O_BINARY), 0);
-  EXPECT_EQ(_close(fdpipe[0]), 0);
-  EXPECT_EQ(_close(fdpipe[1]), 0);
-
-  // _dup() should crash in debug mode
-  EXPECT_EQ(_dup(fdpipe[0]), -1);
-
-  return 12;
-}
-
-#endif  // GTEST_OS_WINDOWS
-
-#if GTEST_OS_WINDOWS || GTEST_OS_FUCHSIA
-
-// Tests the ExitedWithCode predicate.
-TEST(ExitStatusPredicateTest, ExitedWithCode) {
-  // On Windows, the process's exit code is the same as its exit status,
-  // so the predicate just compares the its input with its parameter.
-  EXPECT_TRUE(testing::ExitedWithCode(0)(0));
-  EXPECT_TRUE(testing::ExitedWithCode(1)(1));
-  EXPECT_TRUE(testing::ExitedWithCode(42)(42));
-  EXPECT_FALSE(testing::ExitedWithCode(0)(1));
-  EXPECT_FALSE(testing::ExitedWithCode(1)(0));
-}
-
-#else
-
-// Returns the exit status of a process that calls _exit(2) with a
-// given exit code.  This is a helper function for the
-// ExitStatusPredicateTest test suite.
-static int NormalExitStatus(int exit_code) {
-  pid_t child_pid = fork();
-  if (child_pid == 0) {
-    _exit(exit_code);
-  }
-  int status;
-  waitpid(child_pid, &status, 0);
-  return status;
-}
-
-// Returns the exit status of a process that raises a given signal.
-// If the signal does not cause the process to die, then it returns
-// instead the exit status of a process that exits normally with exit
-// code 1.  This is a helper function for the ExitStatusPredicateTest
-// test suite.
-static int KilledExitStatus(int signum) {
-  pid_t child_pid = fork();
-  if (child_pid == 0) {
-    raise(signum);
-    _exit(1);
-  }
-  int status;
-  waitpid(child_pid, &status, 0);
-  return status;
-}
-
-// Tests the ExitedWithCode predicate.
-TEST(ExitStatusPredicateTest, ExitedWithCode) {
-  const int status0 = NormalExitStatus(0);
-  const int status1 = NormalExitStatus(1);
-  const int status42 = NormalExitStatus(42);
-  const testing::ExitedWithCode pred0(0);
-  const testing::ExitedWithCode pred1(1);
-  const testing::ExitedWithCode pred42(42);
-  EXPECT_PRED1(pred0, status0);
-  EXPECT_PRED1(pred1, status1);
-  EXPECT_PRED1(pred42, status42);
-  EXPECT_FALSE(pred0(status1));
-  EXPECT_FALSE(pred42(status0));
-  EXPECT_FALSE(pred1(status42));
-}
-
-// Tests the KilledBySignal predicate.
-TEST(ExitStatusPredicateTest, KilledBySignal) {
-  const int status_segv = KilledExitStatus(SIGSEGV);
-  const int status_kill = KilledExitStatus(SIGKILL);
-  const testing::KilledBySignal pred_segv(SIGSEGV);
-  const testing::KilledBySignal pred_kill(SIGKILL);
-  EXPECT_PRED1(pred_segv, status_segv);
-  EXPECT_PRED1(pred_kill, status_kill);
-  EXPECT_FALSE(pred_segv(status_kill));
-  EXPECT_FALSE(pred_kill(status_segv));
-}
-
-#endif  // GTEST_OS_WINDOWS || GTEST_OS_FUCHSIA
-
-// The following code intentionally tests a suboptimal syntax.
-#ifdef __GNUC__
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wdangling-else"
-#pragma GCC diagnostic ignored "-Wempty-body"
-#pragma GCC diagnostic ignored "-Wpragmas"
-#endif
-// Tests that the death test macros expand to code which may or may not
-// be followed by operator<<, and that in either case the complete text
-// comprises only a single C++ statement.
-TEST_F(TestForDeathTest, SingleStatement) {
-  if (AlwaysFalse())
-    // This would fail if executed; this is a compilation test only
-    ASSERT_DEATH(return, "");
-
-  if (AlwaysTrue())
-    EXPECT_DEATH(_exit(1), "");
-  else
-    // This empty "else" branch is meant to ensure that EXPECT_DEATH
-    // doesn't expand into an "if" statement without an "else"
-    ;
-
-  if (AlwaysFalse()) ASSERT_DEATH(return, "") << "did not die";
-
-  if (AlwaysFalse())
-    ;
-  else
-    EXPECT_DEATH(_exit(1), "") << 1 << 2 << 3;
-}
-#ifdef __GNUC__
-#pragma GCC diagnostic pop
-#endif
-
-#if GTEST_USES_PCRE
-
-void DieWithEmbeddedNul() {
-  fprintf(stderr, "Hello%cmy null world.\n", '\0');
-  fflush(stderr);
-  _exit(1);
-}
-
-// Tests that EXPECT_DEATH and ASSERT_DEATH work when the error
-// message has a NUL character in it.
-TEST_F(TestForDeathTest, EmbeddedNulInMessage) {
-  EXPECT_DEATH(DieWithEmbeddedNul(), "my null world");
-  ASSERT_DEATH(DieWithEmbeddedNul(), "my null world");
-}
-
-#endif  // GTEST_USES_PCRE
-
-// Tests that death test macros expand to code which interacts well with switch
-// statements.
-TEST_F(TestForDeathTest, SwitchStatement) {
-  // Microsoft compiler usually complains about switch statements without
-  // case labels. We suppress that warning for this test.
-  GTEST_DISABLE_MSC_WARNINGS_PUSH_(4065)
-
-  switch (0)
-  default:
-    ASSERT_DEATH(_exit(1), "") << "exit in default switch handler";
-
-  switch (0)
-  case 0:
-    EXPECT_DEATH(_exit(1), "") << "exit in switch case";
-
-  GTEST_DISABLE_MSC_WARNINGS_POP_()
-}
-
-// Tests that a static member function can be used in a "fast" style
-// death test.
-TEST_F(TestForDeathTest, StaticMemberFunctionFastStyle) {
-  GTEST_FLAG_SET(death_test_style, "fast");
-  ASSERT_DEATH(StaticMemberFunction(), "death.*StaticMember");
-}
-
-// Tests that a method of the test fixture can be used in a "fast"
-// style death test.
-TEST_F(TestForDeathTest, MemberFunctionFastStyle) {
-  GTEST_FLAG_SET(death_test_style, "fast");
-  should_die_ = true;
-  EXPECT_DEATH(MemberFunction(), "inside.*MemberFunction");
-}
-
-void ChangeToRootDir() { posix::ChDir(GTEST_PATH_SEP_); }
-
-// Tests that death tests work even if the current directory has been
-// changed.
-TEST_F(TestForDeathTest, FastDeathTestInChangedDir) {
-  GTEST_FLAG_SET(death_test_style, "fast");
-
-  ChangeToRootDir();
-  EXPECT_EXIT(_exit(1), testing::ExitedWithCode(1), "");
-
-  ChangeToRootDir();
-  ASSERT_DEATH(_exit(1), "");
-}
-
-#if GTEST_OS_LINUX
-void SigprofAction(int, siginfo_t*, void*) { /* no op */
-}
-
-// Sets SIGPROF action and ITIMER_PROF timer (interval: 1ms).
-void SetSigprofActionAndTimer() {
-  struct sigaction signal_action;
-  memset(&signal_action, 0, sizeof(signal_action));
-  sigemptyset(&signal_action.sa_mask);
-  signal_action.sa_sigaction = SigprofAction;
-  signal_action.sa_flags = SA_RESTART | SA_SIGINFO;
-  ASSERT_EQ(0, sigaction(SIGPROF, &signal_action, nullptr));
-  // timer comes second, to avoid SIGPROF premature delivery, as suggested at
-  // https://www.gnu.org/software/libc/manual/html_node/Setting-an-Alarm.html
-  struct itimerval timer;
-  timer.it_interval.tv_sec = 0;
-  timer.it_interval.tv_usec = 1;
-  timer.it_value = timer.it_interval;
-  ASSERT_EQ(0, setitimer(ITIMER_PROF, &timer, nullptr));
-}
-
-// Disables ITIMER_PROF timer and ignores SIGPROF signal.
-void DisableSigprofActionAndTimer(struct sigaction* old_signal_action) {
-  struct itimerval timer;
-  timer.it_interval.tv_sec = 0;
-  timer.it_interval.tv_usec = 0;
-  timer.it_value = timer.it_interval;
-  ASSERT_EQ(0, setitimer(ITIMER_PROF, &timer, nullptr));
-  struct sigaction signal_action;
-  memset(&signal_action, 0, sizeof(signal_action));
-  sigemptyset(&signal_action.sa_mask);
-  signal_action.sa_handler = SIG_IGN;
-  ASSERT_EQ(0, sigaction(SIGPROF, &signal_action, old_signal_action));
-}
-
-// Tests that death tests work when SIGPROF handler and timer are set.
-TEST_F(TestForDeathTest, FastSigprofActionSet) {
-  GTEST_FLAG_SET(death_test_style, "fast");
-  SetSigprofActionAndTimer();
-  EXPECT_DEATH(_exit(1), "");
-  struct sigaction old_signal_action;
-  DisableSigprofActionAndTimer(&old_signal_action);
-  EXPECT_TRUE(old_signal_action.sa_sigaction == SigprofAction);
-}
-
-TEST_F(TestForDeathTest, ThreadSafeSigprofActionSet) {
-  GTEST_FLAG_SET(death_test_style, "threadsafe");
-  SetSigprofActionAndTimer();
-  EXPECT_DEATH(_exit(1), "");
-  struct sigaction old_signal_action;
-  DisableSigprofActionAndTimer(&old_signal_action);
-  EXPECT_TRUE(old_signal_action.sa_sigaction == SigprofAction);
-}
-#endif  // GTEST_OS_LINUX
-
-// Repeats a representative sample of death tests in the "threadsafe" style:
-
-TEST_F(TestForDeathTest, StaticMemberFunctionThreadsafeStyle) {
-  GTEST_FLAG_SET(death_test_style, "threadsafe");
-  ASSERT_DEATH(StaticMemberFunction(), "death.*StaticMember");
-}
-
-TEST_F(TestForDeathTest, MemberFunctionThreadsafeStyle) {
-  GTEST_FLAG_SET(death_test_style, "threadsafe");
-  should_die_ = true;
-  EXPECT_DEATH(MemberFunction(), "inside.*MemberFunction");
-}
-
-TEST_F(TestForDeathTest, ThreadsafeDeathTestInLoop) {
-  GTEST_FLAG_SET(death_test_style, "threadsafe");
-
-  for (int i = 0; i < 3; ++i)
-    EXPECT_EXIT(_exit(i), testing::ExitedWithCode(i), "") << ": i = " << i;
-}
-
-TEST_F(TestForDeathTest, ThreadsafeDeathTestInChangedDir) {
-  GTEST_FLAG_SET(death_test_style, "threadsafe");
-
-  ChangeToRootDir();
-  EXPECT_EXIT(_exit(1), testing::ExitedWithCode(1), "");
-
-  ChangeToRootDir();
-  ASSERT_DEATH(_exit(1), "");
-}
-
-TEST_F(TestForDeathTest, MixedStyles) {
-  GTEST_FLAG_SET(death_test_style, "threadsafe");
-  EXPECT_DEATH(_exit(1), "");
-  GTEST_FLAG_SET(death_test_style, "fast");
-  EXPECT_DEATH(_exit(1), "");
-}
-
-#if GTEST_HAS_CLONE && GTEST_HAS_PTHREAD
-
-bool pthread_flag;
-
-void SetPthreadFlag() { pthread_flag = true; }
-
-TEST_F(TestForDeathTest, DoesNotExecuteAtforkHooks) {
-  if (!GTEST_FLAG_GET(death_test_use_fork)) {
-    GTEST_FLAG_SET(death_test_style, "threadsafe");
-    pthread_flag = false;
-    ASSERT_EQ(0, pthread_atfork(&SetPthreadFlag, nullptr, nullptr));
-    ASSERT_DEATH(_exit(1), "");
-    ASSERT_FALSE(pthread_flag);
-  }
-}
-
-#endif  // GTEST_HAS_CLONE && GTEST_HAS_PTHREAD
-
-// Tests that a method of another class can be used in a death test.
-TEST_F(TestForDeathTest, MethodOfAnotherClass) {
-  const MayDie x(true);
-  ASSERT_DEATH(x.MemberFunction(), "MayDie\\:\\:MemberFunction");
-}
-
-// Tests that a global function can be used in a death test.
-TEST_F(TestForDeathTest, GlobalFunction) {
-  EXPECT_DEATH(GlobalFunction(), "GlobalFunction");
-}
-
-// Tests that any value convertible to an RE works as a second
-// argument to EXPECT_DEATH.
-TEST_F(TestForDeathTest, AcceptsAnythingConvertibleToRE) {
-  static const char regex_c_str[] = "GlobalFunction";
-  EXPECT_DEATH(GlobalFunction(), regex_c_str);
-
-  const testing::internal::RE regex(regex_c_str);
-  EXPECT_DEATH(GlobalFunction(), regex);
-
-#if !GTEST_USES_PCRE
-
-  const ::std::string regex_std_str(regex_c_str);
-  EXPECT_DEATH(GlobalFunction(), regex_std_str);
-
-  // This one is tricky; a temporary pointer into another temporary.  Reference
-  // lifetime extension of the pointer is not sufficient.
-  EXPECT_DEATH(GlobalFunction(), ::std::string(regex_c_str).c_str());
-
-#endif  // !GTEST_USES_PCRE
-}
-
-// Tests that a non-void function can be used in a death test.
-TEST_F(TestForDeathTest, NonVoidFunction) {
-  ASSERT_DEATH(NonVoidFunction(), "NonVoidFunction");
-}
-
-// Tests that functions that take parameter(s) can be used in a death test.
-TEST_F(TestForDeathTest, FunctionWithParameter) {
-  EXPECT_DEATH(DieIf(true), "DieIf\\(\\)");
-  EXPECT_DEATH(DieIfLessThan(2, 3), "DieIfLessThan");
-}
-
-// Tests that ASSERT_DEATH can be used outside a TEST, TEST_F, or test fixture.
-TEST_F(TestForDeathTest, OutsideFixture) { DeathTestSubroutine(); }
-
-// Tests that death tests can be done inside a loop.
-TEST_F(TestForDeathTest, InsideLoop) {
-  for (int i = 0; i < 5; i++) {
-    EXPECT_DEATH(DieIfLessThan(-1, i), "DieIfLessThan") << "where i == " << i;
-  }
-}
-
-// Tests that a compound statement can be used in a death test.
-TEST_F(TestForDeathTest, CompoundStatement) {
-  EXPECT_DEATH(
-      {  // NOLINT
-        const int x = 2;
-        const int y = x + 1;
-        DieIfLessThan(x, y);
-      },
-      "DieIfLessThan");
-}
-
-// Tests that code that doesn't die causes a death test to fail.
-TEST_F(TestForDeathTest, DoesNotDie) {
-  EXPECT_NONFATAL_FAILURE(EXPECT_DEATH(DieIf(false), "DieIf"), "failed to die");
-}
-
-// Tests that a death test fails when the error message isn't expected.
-TEST_F(TestForDeathTest, ErrorMessageMismatch) {
-  EXPECT_NONFATAL_FAILURE(
-      {  // NOLINT
-        EXPECT_DEATH(DieIf(true), "DieIfLessThan")
-            << "End of death test message.";
-      },
-      "died but not with expected error");
-}
-
-// On exit, *aborted will be true if and only if the EXPECT_DEATH()
-// statement aborted the function.
-void ExpectDeathTestHelper(bool* aborted) {
-  *aborted = true;
-  EXPECT_DEATH(DieIf(false), "DieIf");  // This assertion should fail.
-  *aborted = false;
-}
-
-// Tests that EXPECT_DEATH doesn't abort the test on failure.
-TEST_F(TestForDeathTest, EXPECT_DEATH) {
-  bool aborted = true;
-  EXPECT_NONFATAL_FAILURE(ExpectDeathTestHelper(&aborted), "failed to die");
-  EXPECT_FALSE(aborted);
-}
-
-// Tests that ASSERT_DEATH does abort the test on failure.
-TEST_F(TestForDeathTest, ASSERT_DEATH) {
-  static bool aborted;
-  EXPECT_FATAL_FAILURE(
-      {  // NOLINT
-        aborted = true;
-        ASSERT_DEATH(DieIf(false), "DieIf");  // This assertion should fail.
-        aborted = false;
-      },
-      "failed to die");
-  EXPECT_TRUE(aborted);
-}
-
-// Tests that EXPECT_DEATH evaluates the arguments exactly once.
-TEST_F(TestForDeathTest, SingleEvaluation) {
-  int x = 3;
-  EXPECT_DEATH(DieIf((++x) == 4), "DieIf");
-
-  const char* regex = "DieIf";
-  const char* regex_save = regex;
-  EXPECT_DEATH(DieIfLessThan(3, 4), regex++);
-  EXPECT_EQ(regex_save + 1, regex);
-}
-
-// Tests that run-away death tests are reported as failures.
-TEST_F(TestForDeathTest, RunawayIsFailure) {
-  EXPECT_NONFATAL_FAILURE(EXPECT_DEATH(static_cast<void>(0), "Foo"),
-                          "failed to die.");
-}
-
-// Tests that death tests report executing 'return' in the statement as
-// failure.
-TEST_F(TestForDeathTest, ReturnIsFailure) {
-  EXPECT_FATAL_FAILURE(ASSERT_DEATH(return, "Bar"),
-                       "illegal return in test statement.");
-}
-
-// Tests that EXPECT_DEBUG_DEATH works as expected, that is, you can stream a
-// message to it, and in debug mode it:
-// 1. Asserts on death.
-// 2. Has no side effect.
-//
-// And in opt mode, it:
-// 1.  Has side effects but does not assert.
-TEST_F(TestForDeathTest, TestExpectDebugDeath) {
-  int sideeffect = 0;
-
-  // Put the regex in a local variable to make sure we don't get an "unused"
-  // warning in opt mode.
-  const char* regex = "death.*DieInDebugElse12";
-
-  EXPECT_DEBUG_DEATH(DieInDebugElse12(&sideeffect), regex)
-      << "Must accept a streamed message";
-
-#ifdef NDEBUG
-
-  // Checks that the assignment occurs in opt mode (sideeffect).
-  EXPECT_EQ(12, sideeffect);
-
-#else
-
-  // Checks that the assignment does not occur in dbg mode (no sideeffect).
-  EXPECT_EQ(0, sideeffect);
-
-#endif
-}
-
-#if GTEST_OS_WINDOWS
-
-// https://docs.microsoft.com/en-us/cpp/c-runtime-library/reference/crtsetreportmode
-// In debug mode, the calls to _CrtSetReportMode and _CrtSetReportFile enable
-// the dumping of assertions to stderr. Tests that EXPECT_DEATH works as
-// expected when in CRT debug mode (compiled with /MTd or /MDd, which defines
-// _DEBUG) the Windows CRT crashes the process with an assertion failure.
-// 1. Asserts on death.
-// 2. Has no side effect (doesn't pop up a window or wait for user input).
-#ifdef _DEBUG
-TEST_F(TestForDeathTest, CRTDebugDeath) {
-  EXPECT_DEATH(DieInCRTDebugElse12(nullptr), "dup.* : Assertion failed")
-      << "Must accept a streamed message";
-}
-#endif  // _DEBUG
-
-#endif  // GTEST_OS_WINDOWS
-
-// Tests that ASSERT_DEBUG_DEATH works as expected, that is, you can stream a
-// message to it, and in debug mode it:
-// 1. Asserts on death.
-// 2. Has no side effect.
-//
-// And in opt mode, it:
-// 1.  Has side effects but does not assert.
-TEST_F(TestForDeathTest, TestAssertDebugDeath) {
-  int sideeffect = 0;
-
-  ASSERT_DEBUG_DEATH(DieInDebugElse12(&sideeffect), "death.*DieInDebugElse12")
-      << "Must accept a streamed message";
-
-#ifdef NDEBUG
-
-  // Checks that the assignment occurs in opt mode (sideeffect).
-  EXPECT_EQ(12, sideeffect);
-
-#else
-
-  // Checks that the assignment does not occur in dbg mode (no sideeffect).
-  EXPECT_EQ(0, sideeffect);
-
-#endif
-}
-
-#ifndef NDEBUG
-
-void ExpectDebugDeathHelper(bool* aborted) {
-  *aborted = true;
-  EXPECT_DEBUG_DEATH(return, "") << "This is expected to fail.";
-  *aborted = false;
-}
-
-#if GTEST_OS_WINDOWS
-TEST(PopUpDeathTest, DoesNotShowPopUpOnAbort) {
-  printf(
-      "This test should be considered failing if it shows "
-      "any pop-up dialogs.\n");
-  fflush(stdout);
-
-  EXPECT_DEATH(
-      {
-        GTEST_FLAG_SET(catch_exceptions, false);
-        abort();
-      },
-      "");
-}
-#endif  // GTEST_OS_WINDOWS
-
-// Tests that EXPECT_DEBUG_DEATH in debug mode does not abort
-// the function.
-TEST_F(TestForDeathTest, ExpectDebugDeathDoesNotAbort) {
-  bool aborted = true;
-  EXPECT_NONFATAL_FAILURE(ExpectDebugDeathHelper(&aborted), "");
-  EXPECT_FALSE(aborted);
-}
-
-void AssertDebugDeathHelper(bool* aborted) {
-  *aborted = true;
-  GTEST_LOG_(INFO) << "Before ASSERT_DEBUG_DEATH";
-  ASSERT_DEBUG_DEATH(GTEST_LOG_(INFO) << "In ASSERT_DEBUG_DEATH"; return, "")
-      << "This is expected to fail.";
-  GTEST_LOG_(INFO) << "After ASSERT_DEBUG_DEATH";
-  *aborted = false;
-}
-
-// Tests that ASSERT_DEBUG_DEATH in debug mode aborts the function on
-// failure.
-TEST_F(TestForDeathTest, AssertDebugDeathAborts) {
-  static bool aborted;
-  aborted = false;
-  EXPECT_FATAL_FAILURE(AssertDebugDeathHelper(&aborted), "");
-  EXPECT_TRUE(aborted);
-}
-
-TEST_F(TestForDeathTest, AssertDebugDeathAborts2) {
-  static bool aborted;
-  aborted = false;
-  EXPECT_FATAL_FAILURE(AssertDebugDeathHelper(&aborted), "");
-  EXPECT_TRUE(aborted);
-}
-
-TEST_F(TestForDeathTest, AssertDebugDeathAborts3) {
-  static bool aborted;
-  aborted = false;
-  EXPECT_FATAL_FAILURE(AssertDebugDeathHelper(&aborted), "");
-  EXPECT_TRUE(aborted);
-}
-
-TEST_F(TestForDeathTest, AssertDebugDeathAborts4) {
-  static bool aborted;
-  aborted = false;
-  EXPECT_FATAL_FAILURE(AssertDebugDeathHelper(&aborted), "");
-  EXPECT_TRUE(aborted);
-}
-
-TEST_F(TestForDeathTest, AssertDebugDeathAborts5) {
-  static bool aborted;
-  aborted = false;
-  EXPECT_FATAL_FAILURE(AssertDebugDeathHelper(&aborted), "");
-  EXPECT_TRUE(aborted);
-}
-
-TEST_F(TestForDeathTest, AssertDebugDeathAborts6) {
-  static bool aborted;
-  aborted = false;
-  EXPECT_FATAL_FAILURE(AssertDebugDeathHelper(&aborted), "");
-  EXPECT_TRUE(aborted);
-}
-
-TEST_F(TestForDeathTest, AssertDebugDeathAborts7) {
-  static bool aborted;
-  aborted = false;
-  EXPECT_FATAL_FAILURE(AssertDebugDeathHelper(&aborted), "");
-  EXPECT_TRUE(aborted);
-}
-
-TEST_F(TestForDeathTest, AssertDebugDeathAborts8) {
-  static bool aborted;
-  aborted = false;
-  EXPECT_FATAL_FAILURE(AssertDebugDeathHelper(&aborted), "");
-  EXPECT_TRUE(aborted);
-}
-
-TEST_F(TestForDeathTest, AssertDebugDeathAborts9) {
-  static bool aborted;
-  aborted = false;
-  EXPECT_FATAL_FAILURE(AssertDebugDeathHelper(&aborted), "");
-  EXPECT_TRUE(aborted);
-}
-
-TEST_F(TestForDeathTest, AssertDebugDeathAborts10) {
-  static bool aborted;
-  aborted = false;
-  EXPECT_FATAL_FAILURE(AssertDebugDeathHelper(&aborted), "");
-  EXPECT_TRUE(aborted);
-}
-
-#endif  // _NDEBUG
-
-// Tests the *_EXIT family of macros, using a variety of predicates.
-static void TestExitMacros() {
-  EXPECT_EXIT(_exit(1), testing::ExitedWithCode(1), "");
-  ASSERT_EXIT(_exit(42), testing::ExitedWithCode(42), "");
-
-#if GTEST_OS_WINDOWS
-
-  // Of all signals effects on the process exit code, only those of SIGABRT
-  // are documented on Windows.
-  // See https://msdn.microsoft.com/en-us/query-bi/m/dwwzkt4c.
-  EXPECT_EXIT(raise(SIGABRT), testing::ExitedWithCode(3), "") << "b_ar";
-
-#elif !GTEST_OS_FUCHSIA
-
-  // Fuchsia has no unix signals.
-  EXPECT_EXIT(raise(SIGKILL), testing::KilledBySignal(SIGKILL), "") << "foo";
-  ASSERT_EXIT(raise(SIGUSR2), testing::KilledBySignal(SIGUSR2), "") << "bar";
-
-  EXPECT_FATAL_FAILURE(
-      {  // NOLINT
-        ASSERT_EXIT(_exit(0), testing::KilledBySignal(SIGSEGV), "")
-            << "This failure is expected, too.";
-      },
-      "This failure is expected, too.");
-
-#endif  // GTEST_OS_WINDOWS
-
-  EXPECT_NONFATAL_FAILURE(
-      {  // NOLINT
-        EXPECT_EXIT(raise(SIGSEGV), testing::ExitedWithCode(0), "")
-            << "This failure is expected.";
-      },
-      "This failure is expected.");
-}
-
-TEST_F(TestForDeathTest, ExitMacros) { TestExitMacros(); }
-
-TEST_F(TestForDeathTest, ExitMacrosUsingFork) {
-  GTEST_FLAG_SET(death_test_use_fork, true);
-  TestExitMacros();
-}
-
-TEST_F(TestForDeathTest, InvalidStyle) {
-  GTEST_FLAG_SET(death_test_style, "rococo");
-  EXPECT_NONFATAL_FAILURE(
-      {  // NOLINT
-        EXPECT_DEATH(_exit(0), "") << "This failure is expected.";
-      },
-      "This failure is expected.");
-}
-
-TEST_F(TestForDeathTest, DeathTestFailedOutput) {
-  GTEST_FLAG_SET(death_test_style, "fast");
-  EXPECT_NONFATAL_FAILURE(
-      EXPECT_DEATH(DieWithMessage("death\n"), "expected message"),
-      "Actual msg:\n"
-      "[  DEATH   ] death\n");
-}
-
-TEST_F(TestForDeathTest, DeathTestUnexpectedReturnOutput) {
-  GTEST_FLAG_SET(death_test_style, "fast");
-  EXPECT_NONFATAL_FAILURE(EXPECT_DEATH(
-                              {
-                                fprintf(stderr, "returning\n");
-                                fflush(stderr);
-                                return;
-                              },
-                              ""),
-                          "    Result: illegal return in test statement.\n"
-                          " Error msg:\n"
-                          "[  DEATH   ] returning\n");
-}
-
-TEST_F(TestForDeathTest, DeathTestBadExitCodeOutput) {
-  GTEST_FLAG_SET(death_test_style, "fast");
-  EXPECT_NONFATAL_FAILURE(
-      EXPECT_EXIT(DieWithMessage("exiting with rc 1\n"),
-                  testing::ExitedWithCode(3), "expected message"),
-      "    Result: died but not with expected exit code:\n"
-      "            Exited with exit status 1\n"
-      "Actual msg:\n"
-      "[  DEATH   ] exiting with rc 1\n");
-}
-
-TEST_F(TestForDeathTest, DeathTestMultiLineMatchFail) {
-  GTEST_FLAG_SET(death_test_style, "fast");
-  EXPECT_NONFATAL_FAILURE(
-      EXPECT_DEATH(DieWithMessage("line 1\nline 2\nline 3\n"),
-                   "line 1\nxyz\nline 3\n"),
-      "Actual msg:\n"
-      "[  DEATH   ] line 1\n"
-      "[  DEATH   ] line 2\n"
-      "[  DEATH   ] line 3\n");
-}
-
-TEST_F(TestForDeathTest, DeathTestMultiLineMatchPass) {
-  GTEST_FLAG_SET(death_test_style, "fast");
-  EXPECT_DEATH(DieWithMessage("line 1\nline 2\nline 3\n"),
-               "line 1\nline 2\nline 3\n");
-}
-
-// A DeathTestFactory that returns MockDeathTests.
-class MockDeathTestFactory : public DeathTestFactory {
- public:
-  MockDeathTestFactory();
-  bool Create(const char* statement,
-              testing::Matcher<const std::string&> matcher, const char* file,
-              int line, DeathTest** test) override;
-
-  // Sets the parameters for subsequent calls to Create.
-  void SetParameters(bool create, DeathTest::TestRole role, int status,
-                     bool passed);
-
-  // Accessors.
-  int AssumeRoleCalls() const { return assume_role_calls_; }
-  int WaitCalls() const { return wait_calls_; }
-  size_t PassedCalls() const { return passed_args_.size(); }
-  bool PassedArgument(int n) const {
-    return passed_args_[static_cast<size_t>(n)];
-  }
-  size_t AbortCalls() const { return abort_args_.size(); }
-  DeathTest::AbortReason AbortArgument(int n) const {
-    return abort_args_[static_cast<size_t>(n)];
-  }
-  bool TestDeleted() const { return test_deleted_; }
-
- private:
-  friend class MockDeathTest;
-  // If true, Create will return a MockDeathTest; otherwise it returns
-  // NULL.
-  bool create_;
-  // The value a MockDeathTest will return from its AssumeRole method.
-  DeathTest::TestRole role_;
-  // The value a MockDeathTest will return from its Wait method.
-  int status_;
-  // The value a MockDeathTest will return from its Passed method.
-  bool passed_;
-
-  // Number of times AssumeRole was called.
-  int assume_role_calls_;
-  // Number of times Wait was called.
-  int wait_calls_;
-  // The arguments to the calls to Passed since the last call to
-  // SetParameters.
-  std::vector<bool> passed_args_;
-  // The arguments to the calls to Abort since the last call to
-  // SetParameters.
-  std::vector<DeathTest::AbortReason> abort_args_;
-  // True if the last MockDeathTest returned by Create has been
-  // deleted.
-  bool test_deleted_;
-};
-
-// A DeathTest implementation useful in testing.  It returns values set
-// at its creation from its various inherited DeathTest methods, and
-// reports calls to those methods to its parent MockDeathTestFactory
-// object.
-class MockDeathTest : public DeathTest {
- public:
-  MockDeathTest(MockDeathTestFactory* parent, TestRole role, int status,
-                bool passed)
-      : parent_(parent), role_(role), status_(status), passed_(passed) {}
-  ~MockDeathTest() override { parent_->test_deleted_ = true; }
-  TestRole AssumeRole() override {
-    ++parent_->assume_role_calls_;
-    return role_;
-  }
-  int Wait() override {
-    ++parent_->wait_calls_;
-    return status_;
-  }
-  bool Passed(bool exit_status_ok) override {
-    parent_->passed_args_.push_back(exit_status_ok);
-    return passed_;
-  }
-  void Abort(AbortReason reason) override {
-    parent_->abort_args_.push_back(reason);
-  }
-
- private:
-  MockDeathTestFactory* const parent_;
-  const TestRole role_;
-  const int status_;
-  const bool passed_;
-};
-
-// MockDeathTestFactory constructor.
-MockDeathTestFactory::MockDeathTestFactory()
-    : create_(true),
-      role_(DeathTest::OVERSEE_TEST),
-      status_(0),
-      passed_(true),
-      assume_role_calls_(0),
-      wait_calls_(0),
-      passed_args_(),
-      abort_args_() {}
-
-// Sets the parameters for subsequent calls to Create.
-void MockDeathTestFactory::SetParameters(bool create, DeathTest::TestRole role,
-                                         int status, bool passed) {
-  create_ = create;
-  role_ = role;
-  status_ = status;
-  passed_ = passed;
-
-  assume_role_calls_ = 0;
-  wait_calls_ = 0;
-  passed_args_.clear();
-  abort_args_.clear();
-}
-
-// Sets test to NULL (if create_ is false) or to the address of a new
-// MockDeathTest object with parameters taken from the last call
-// to SetParameters (if create_ is true).  Always returns true.
-bool MockDeathTestFactory::Create(
-    const char* /*statement*/, testing::Matcher<const std::string&> /*matcher*/,
-    const char* /*file*/, int /*line*/, DeathTest** test) {
-  test_deleted_ = false;
-  if (create_) {
-    *test = new MockDeathTest(this, role_, status_, passed_);
-  } else {
-    *test = nullptr;
-  }
-  return true;
-}
-
-// A test fixture for testing the logic of the GTEST_DEATH_TEST_ macro.
-// It installs a MockDeathTestFactory that is used for the duration
-// of the test case.
-class MacroLogicDeathTest : public testing::Test {
- protected:
-  static testing::internal::ReplaceDeathTestFactory* replacer_;
-  static MockDeathTestFactory* factory_;
-
-  static void SetUpTestSuite() {
-    factory_ = new MockDeathTestFactory;
-    replacer_ = new testing::internal::ReplaceDeathTestFactory(factory_);
-  }
-
-  static void TearDownTestSuite() {
-    delete replacer_;
-    replacer_ = nullptr;
-    delete factory_;
-    factory_ = nullptr;
-  }
-
-  // Runs a death test that breaks the rules by returning.  Such a death
-  // test cannot be run directly from a test routine that uses a
-  // MockDeathTest, or the remainder of the routine will not be executed.
-  static void RunReturningDeathTest(bool* flag) {
-    ASSERT_DEATH(
-        {  // NOLINT
-          *flag = true;
-          return;
-        },
-        "");
-  }
-};
-
-testing::internal::ReplaceDeathTestFactory* MacroLogicDeathTest::replacer_ =
-    nullptr;
-MockDeathTestFactory* MacroLogicDeathTest::factory_ = nullptr;
-
-// Test that nothing happens when the factory doesn't return a DeathTest:
-TEST_F(MacroLogicDeathTest, NothingHappens) {
-  bool flag = false;
-  factory_->SetParameters(false, DeathTest::OVERSEE_TEST, 0, true);
-  EXPECT_DEATH(flag = true, "");
-  EXPECT_FALSE(flag);
-  EXPECT_EQ(0, factory_->AssumeRoleCalls());
-  EXPECT_EQ(0, factory_->WaitCalls());
-  EXPECT_EQ(0U, factory_->PassedCalls());
-  EXPECT_EQ(0U, factory_->AbortCalls());
-  EXPECT_FALSE(factory_->TestDeleted());
-}
-
-// Test that the parent process doesn't run the death test code,
-// and that the Passed method returns false when the (simulated)
-// child process exits with status 0:
-TEST_F(MacroLogicDeathTest, ChildExitsSuccessfully) {
-  bool flag = false;
-  factory_->SetParameters(true, DeathTest::OVERSEE_TEST, 0, true);
-  EXPECT_DEATH(flag = true, "");
-  EXPECT_FALSE(flag);
-  EXPECT_EQ(1, factory_->AssumeRoleCalls());
-  EXPECT_EQ(1, factory_->WaitCalls());
-  ASSERT_EQ(1U, factory_->PassedCalls());
-  EXPECT_FALSE(factory_->PassedArgument(0));
-  EXPECT_EQ(0U, factory_->AbortCalls());
-  EXPECT_TRUE(factory_->TestDeleted());
-}
-
-// Tests that the Passed method was given the argument "true" when
-// the (simulated) child process exits with status 1:
-TEST_F(MacroLogicDeathTest, ChildExitsUnsuccessfully) {
-  bool flag = false;
-  factory_->SetParameters(true, DeathTest::OVERSEE_TEST, 1, true);
-  EXPECT_DEATH(flag = true, "");
-  EXPECT_FALSE(flag);
-  EXPECT_EQ(1, factory_->AssumeRoleCalls());
-  EXPECT_EQ(1, factory_->WaitCalls());
-  ASSERT_EQ(1U, factory_->PassedCalls());
-  EXPECT_TRUE(factory_->PassedArgument(0));
-  EXPECT_EQ(0U, factory_->AbortCalls());
-  EXPECT_TRUE(factory_->TestDeleted());
-}
-
-// Tests that the (simulated) child process executes the death test
-// code, and is aborted with the correct AbortReason if it
-// executes a return statement.
-TEST_F(MacroLogicDeathTest, ChildPerformsReturn) {
-  bool flag = false;
-  factory_->SetParameters(true, DeathTest::EXECUTE_TEST, 0, true);
-  RunReturningDeathTest(&flag);
-  EXPECT_TRUE(flag);
-  EXPECT_EQ(1, factory_->AssumeRoleCalls());
-  EXPECT_EQ(0, factory_->WaitCalls());
-  EXPECT_EQ(0U, factory_->PassedCalls());
-  EXPECT_EQ(1U, factory_->AbortCalls());
-  EXPECT_EQ(DeathTest::TEST_ENCOUNTERED_RETURN_STATEMENT,
-            factory_->AbortArgument(0));
-  EXPECT_TRUE(factory_->TestDeleted());
-}
-
-// Tests that the (simulated) child process is aborted with the
-// correct AbortReason if it does not die.
-TEST_F(MacroLogicDeathTest, ChildDoesNotDie) {
-  bool flag = false;
-  factory_->SetParameters(true, DeathTest::EXECUTE_TEST, 0, true);
-  EXPECT_DEATH(flag = true, "");
-  EXPECT_TRUE(flag);
-  EXPECT_EQ(1, factory_->AssumeRoleCalls());
-  EXPECT_EQ(0, factory_->WaitCalls());
-  EXPECT_EQ(0U, factory_->PassedCalls());
-  // This time there are two calls to Abort: one since the test didn't
-  // die, and another from the ReturnSentinel when it's destroyed.  The
-  // sentinel normally isn't destroyed if a test doesn't die, since
-  // _exit(2) is called in that case by ForkingDeathTest, but not by
-  // our MockDeathTest.
-  ASSERT_EQ(2U, factory_->AbortCalls());
-  EXPECT_EQ(DeathTest::TEST_DID_NOT_DIE, factory_->AbortArgument(0));
-  EXPECT_EQ(DeathTest::TEST_ENCOUNTERED_RETURN_STATEMENT,
-            factory_->AbortArgument(1));
-  EXPECT_TRUE(factory_->TestDeleted());
-}
-
-// Tests that a successful death test does not register a successful
-// test part.
-TEST(SuccessRegistrationDeathTest, NoSuccessPart) {
-  EXPECT_DEATH(_exit(1), "");
-  EXPECT_EQ(0, GetUnitTestImpl()->current_test_result()->total_part_count());
-}
-
-TEST(StreamingAssertionsDeathTest, DeathTest) {
-  EXPECT_DEATH(_exit(1), "") << "unexpected failure";
-  ASSERT_DEATH(_exit(1), "") << "unexpected failure";
-  EXPECT_NONFATAL_FAILURE(
-      {  // NOLINT
-        EXPECT_DEATH(_exit(0), "") << "expected failure";
-      },
-      "expected failure");
-  EXPECT_FATAL_FAILURE(
-      {  // NOLINT
-        ASSERT_DEATH(_exit(0), "") << "expected failure";
-      },
-      "expected failure");
-}
-
-// Tests that GetLastErrnoDescription returns an empty string when the
-// last error is 0 and non-empty string when it is non-zero.
-TEST(GetLastErrnoDescription, GetLastErrnoDescriptionWorks) {
-  errno = ENOENT;
-  EXPECT_STRNE("", GetLastErrnoDescription().c_str());
-  errno = 0;
-  EXPECT_STREQ("", GetLastErrnoDescription().c_str());
-}
-
-#if GTEST_OS_WINDOWS
-TEST(AutoHandleTest, AutoHandleWorks) {
-  HANDLE handle = ::CreateEvent(NULL, FALSE, FALSE, NULL);
-  ASSERT_NE(INVALID_HANDLE_VALUE, handle);
-
-  // Tests that the AutoHandle is correctly initialized with a handle.
-  testing::internal::AutoHandle auto_handle(handle);
-  EXPECT_EQ(handle, auto_handle.Get());
-
-  // Tests that Reset assigns INVALID_HANDLE_VALUE.
-  // Note that this cannot verify whether the original handle is closed.
-  auto_handle.Reset();
-  EXPECT_EQ(INVALID_HANDLE_VALUE, auto_handle.Get());
-
-  // Tests that Reset assigns the new handle.
-  // Note that this cannot verify whether the original handle is closed.
-  handle = ::CreateEvent(NULL, FALSE, FALSE, NULL);
-  ASSERT_NE(INVALID_HANDLE_VALUE, handle);
-  auto_handle.Reset(handle);
-  EXPECT_EQ(handle, auto_handle.Get());
-
-  // Tests that AutoHandle contains INVALID_HANDLE_VALUE by default.
-  testing::internal::AutoHandle auto_handle2;
-  EXPECT_EQ(INVALID_HANDLE_VALUE, auto_handle2.Get());
-}
-#endif  // GTEST_OS_WINDOWS
-
-#if GTEST_OS_WINDOWS
-typedef unsigned __int64 BiggestParsable;
-typedef signed __int64 BiggestSignedParsable;
-#else
-typedef unsigned long long BiggestParsable;
-typedef signed long long BiggestSignedParsable;
-#endif  // GTEST_OS_WINDOWS
-
-// We cannot use std::numeric_limits<T>::max() as it clashes with the
-// max() macro defined by <windows.h>.
-const BiggestParsable kBiggestParsableMax = ULLONG_MAX;
-const BiggestSignedParsable kBiggestSignedParsableMax = LLONG_MAX;
-
-TEST(ParseNaturalNumberTest, RejectsInvalidFormat) {
-  BiggestParsable result = 0;
-
-  // Rejects non-numbers.
-  EXPECT_FALSE(ParseNaturalNumber("non-number string", &result));
-
-  // Rejects numbers with whitespace prefix.
-  EXPECT_FALSE(ParseNaturalNumber(" 123", &result));
-
-  // Rejects negative numbers.
-  EXPECT_FALSE(ParseNaturalNumber("-123", &result));
-
-  // Rejects numbers starting with a plus sign.
-  EXPECT_FALSE(ParseNaturalNumber("+123", &result));
-  errno = 0;
-}
-
-TEST(ParseNaturalNumberTest, RejectsOverflownNumbers) {
-  BiggestParsable result = 0;
-
-  EXPECT_FALSE(ParseNaturalNumber("99999999999999999999999", &result));
-
-  signed char char_result = 0;
-  EXPECT_FALSE(ParseNaturalNumber("200", &char_result));
-  errno = 0;
-}
-
-TEST(ParseNaturalNumberTest, AcceptsValidNumbers) {
-  BiggestParsable result = 0;
-
-  result = 0;
-  ASSERT_TRUE(ParseNaturalNumber("123", &result));
-  EXPECT_EQ(123U, result);
-
-  // Check 0 as an edge case.
-  result = 1;
-  ASSERT_TRUE(ParseNaturalNumber("0", &result));
-  EXPECT_EQ(0U, result);
-
-  result = 1;
-  ASSERT_TRUE(ParseNaturalNumber("00000", &result));
-  EXPECT_EQ(0U, result);
-}
-
-TEST(ParseNaturalNumberTest, AcceptsTypeLimits) {
-  Message msg;
-  msg << kBiggestParsableMax;
-
-  BiggestParsable result = 0;
-  EXPECT_TRUE(ParseNaturalNumber(msg.GetString(), &result));
-  EXPECT_EQ(kBiggestParsableMax, result);
-
-  Message msg2;
-  msg2 << kBiggestSignedParsableMax;
-
-  BiggestSignedParsable signed_result = 0;
-  EXPECT_TRUE(ParseNaturalNumber(msg2.GetString(), &signed_result));
-  EXPECT_EQ(kBiggestSignedParsableMax, signed_result);
-
-  Message msg3;
-  msg3 << INT_MAX;
-
-  int int_result = 0;
-  EXPECT_TRUE(ParseNaturalNumber(msg3.GetString(), &int_result));
-  EXPECT_EQ(INT_MAX, int_result);
-
-  Message msg4;
-  msg4 << UINT_MAX;
-
-  unsigned int uint_result = 0;
-  EXPECT_TRUE(ParseNaturalNumber(msg4.GetString(), &uint_result));
-  EXPECT_EQ(UINT_MAX, uint_result);
-}
-
-TEST(ParseNaturalNumberTest, WorksForShorterIntegers) {
-  short short_result = 0;
-  ASSERT_TRUE(ParseNaturalNumber("123", &short_result));
-  EXPECT_EQ(123, short_result);
-
-  signed char char_result = 0;
-  ASSERT_TRUE(ParseNaturalNumber("123", &char_result));
-  EXPECT_EQ(123, char_result);
-}
-
-#if GTEST_OS_WINDOWS
-TEST(EnvironmentTest, HandleFitsIntoSizeT) {
-  ASSERT_TRUE(sizeof(HANDLE) <= sizeof(size_t));
-}
-#endif  // GTEST_OS_WINDOWS
-
-// Tests that EXPECT_DEATH_IF_SUPPORTED/ASSERT_DEATH_IF_SUPPORTED trigger
-// failures when death tests are available on the system.
-TEST(ConditionalDeathMacrosDeathTest, ExpectsDeathWhenDeathTestsAvailable) {
-  EXPECT_DEATH_IF_SUPPORTED(DieInside("CondDeathTestExpectMacro"),
-                            "death inside CondDeathTestExpectMacro");
-  ASSERT_DEATH_IF_SUPPORTED(DieInside("CondDeathTestAssertMacro"),
-                            "death inside CondDeathTestAssertMacro");
-
-  // Empty statement will not crash, which must trigger a failure.
-  EXPECT_NONFATAL_FAILURE(EXPECT_DEATH_IF_SUPPORTED(;, ""), "");
-  EXPECT_FATAL_FAILURE(ASSERT_DEATH_IF_SUPPORTED(;, ""), "");
-}
-
-TEST(InDeathTestChildDeathTest, ReportsDeathTestCorrectlyInFastStyle) {
-  GTEST_FLAG_SET(death_test_style, "fast");
-  EXPECT_FALSE(InDeathTestChild());
-  EXPECT_DEATH(
-      {
-        fprintf(stderr, InDeathTestChild() ? "Inside" : "Outside");
-        fflush(stderr);
-        _exit(1);
-      },
-      "Inside");
-}
-
-TEST(InDeathTestChildDeathTest, ReportsDeathTestCorrectlyInThreadSafeStyle) {
-  GTEST_FLAG_SET(death_test_style, "threadsafe");
-  EXPECT_FALSE(InDeathTestChild());
-  EXPECT_DEATH(
-      {
-        fprintf(stderr, InDeathTestChild() ? "Inside" : "Outside");
-        fflush(stderr);
-        _exit(1);
-      },
-      "Inside");
-}
-
-void DieWithMessage(const char* message) {
-  fputs(message, stderr);
-  fflush(stderr);  // Make sure the text is printed before the process exits.
-  _exit(1);
-}
-
-TEST(MatcherDeathTest, DoesNotBreakBareRegexMatching) {
-  // googletest tests this, of course; here we ensure that including googlemock
-  // has not broken it.
-#if GTEST_USES_POSIX_RE
-  EXPECT_DEATH(DieWithMessage("O, I die, Horatio."), "I d[aeiou]e");
-#else
-  EXPECT_DEATH(DieWithMessage("O, I die, Horatio."), "I di?e");
-#endif
-}
-
-TEST(MatcherDeathTest, MonomorphicMatcherMatches) {
-  EXPECT_DEATH(DieWithMessage("Behind O, I am slain!"),
-               Matcher<const std::string&>(ContainsRegex("I am slain")));
-}
-
-TEST(MatcherDeathTest, MonomorphicMatcherDoesNotMatch) {
-  EXPECT_NONFATAL_FAILURE(
-      EXPECT_DEATH(
-          DieWithMessage("Behind O, I am slain!"),
-          Matcher<const std::string&>(ContainsRegex("Ow, I am slain"))),
-      "Expected: contains regular expression \"Ow, I am slain\"");
-}
-
-TEST(MatcherDeathTest, PolymorphicMatcherMatches) {
-  EXPECT_DEATH(DieWithMessage("The rest is silence."),
-               ContainsRegex("rest is silence"));
-}
-
-TEST(MatcherDeathTest, PolymorphicMatcherDoesNotMatch) {
-  EXPECT_NONFATAL_FAILURE(
-      EXPECT_DEATH(DieWithMessage("The rest is silence."),
-                   ContainsRegex("rest is science")),
-      "Expected: contains regular expression \"rest is science\"");
-}
-
-}  // namespace
-
-#else  // !GTEST_HAS_DEATH_TEST follows
-
-namespace {
-
-using testing::internal::CaptureStderr;
-using testing::internal::GetCapturedStderr;
-
-// Tests that EXPECT_DEATH_IF_SUPPORTED/ASSERT_DEATH_IF_SUPPORTED are still
-// defined but do not trigger failures when death tests are not available on
-// the system.
-TEST(ConditionalDeathMacrosTest, WarnsWhenDeathTestsNotAvailable) {
-  // Empty statement will not crash, but that should not trigger a failure
-  // when death tests are not supported.
-  CaptureStderr();
-  EXPECT_DEATH_IF_SUPPORTED(;, "");
-  std::string output = GetCapturedStderr();
-  ASSERT_TRUE(NULL != strstr(output.c_str(),
-                             "Death tests are not supported on this platform"));
-  ASSERT_TRUE(NULL != strstr(output.c_str(), ";"));
-
-  // The streamed message should not be printed as there is no test failure.
-  CaptureStderr();
-  EXPECT_DEATH_IF_SUPPORTED(;, "") << "streamed message";
-  output = GetCapturedStderr();
-  ASSERT_TRUE(NULL == strstr(output.c_str(), "streamed message"));
-
-  CaptureStderr();
-  ASSERT_DEATH_IF_SUPPORTED(;, "");  // NOLINT
-  output = GetCapturedStderr();
-  ASSERT_TRUE(NULL != strstr(output.c_str(),
-                             "Death tests are not supported on this platform"));
-  ASSERT_TRUE(NULL != strstr(output.c_str(), ";"));
-
-  CaptureStderr();
-  ASSERT_DEATH_IF_SUPPORTED(;, "") << "streamed message";  // NOLINT
-  output = GetCapturedStderr();
-  ASSERT_TRUE(NULL == strstr(output.c_str(), "streamed message"));
-}
-
-void FuncWithAssert(int* n) {
-  ASSERT_DEATH_IF_SUPPORTED(return;, "");
-  (*n)++;
-}
-
-// Tests that ASSERT_DEATH_IF_SUPPORTED does not return from the current
-// function (as ASSERT_DEATH does) if death tests are not supported.
-TEST(ConditionalDeathMacrosTest, AssertDeatDoesNotReturnhIfUnsupported) {
-  int n = 0;
-  FuncWithAssert(&n);
-  EXPECT_EQ(1, n);
-}
-
-}  // namespace
-
-#endif  // !GTEST_HAS_DEATH_TEST
-
-namespace {
-
-// The following code intentionally tests a suboptimal syntax.
-#ifdef __GNUC__
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wdangling-else"
-#pragma GCC diagnostic ignored "-Wempty-body"
-#pragma GCC diagnostic ignored "-Wpragmas"
-#endif
-// Tests that the death test macros expand to code which may or may not
-// be followed by operator<<, and that in either case the complete text
-// comprises only a single C++ statement.
-//
-// The syntax should work whether death tests are available or not.
-TEST(ConditionalDeathMacrosSyntaxDeathTest, SingleStatement) {
-  if (AlwaysFalse())
-    // This would fail if executed; this is a compilation test only
-    ASSERT_DEATH_IF_SUPPORTED(return, "");
-
-  if (AlwaysTrue())
-    EXPECT_DEATH_IF_SUPPORTED(_exit(1), "");
-  else
-    // This empty "else" branch is meant to ensure that EXPECT_DEATH
-    // doesn't expand into an "if" statement without an "else"
-    ;  // NOLINT
-
-  if (AlwaysFalse()) ASSERT_DEATH_IF_SUPPORTED(return, "") << "did not die";
-
-  if (AlwaysFalse())
-    ;  // NOLINT
-  else
-    EXPECT_DEATH_IF_SUPPORTED(_exit(1), "") << 1 << 2 << 3;
-}
-#ifdef __GNUC__
-#pragma GCC diagnostic pop
-#endif
-
-// Tests that conditional death test macros expand to code which interacts
-// well with switch statements.
-TEST(ConditionalDeathMacrosSyntaxDeathTest, SwitchStatement) {
-  // Microsoft compiler usually complains about switch statements without
-  // case labels. We suppress that warning for this test.
-  GTEST_DISABLE_MSC_WARNINGS_PUSH_(4065)
-
-  switch (0)
-  default:
-    ASSERT_DEATH_IF_SUPPORTED(_exit(1), "") << "exit in default switch handler";
-
-  switch (0)
-  case 0:
-    EXPECT_DEATH_IF_SUPPORTED(_exit(1), "") << "exit in switch case";
-
-  GTEST_DISABLE_MSC_WARNINGS_POP_()
-}
-
-// Tests that a test case whose name ends with "DeathTest" works fine
-// on Windows.
-TEST(NotADeathTest, Test) { SUCCEED(); }
-
-}  // namespace
diff --git a/3rdparty/googletest-1.13.0/googletest/test/googletest-death-test_ex_test.cc b/3rdparty/googletest-1.13.0/googletest/test/googletest-death-test_ex_test.cc
deleted file mode 100644
index f2515e377e4c7f4210325a730cc5ed6a56e23e4a..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/test/googletest-death-test_ex_test.cc
+++ /dev/null
@@ -1,91 +0,0 @@
-// Copyright 2010, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-//
-// Tests that verify interaction of exceptions and death tests.
-
-#include "gtest/gtest-death-test.h"
-#include "gtest/gtest.h"
-
-#if GTEST_HAS_DEATH_TEST
-
-#if GTEST_HAS_SEH
-#include <windows.h>  // For RaiseException().
-#endif
-
-#include "gtest/gtest-spi.h"
-
-#if GTEST_HAS_EXCEPTIONS
-
-#include <exception>  // For std::exception.
-
-// Tests that death tests report thrown exceptions as failures and that the
-// exceptions do not escape death test macros.
-TEST(CxxExceptionDeathTest, ExceptionIsFailure) {
-  try {
-    EXPECT_NONFATAL_FAILURE(EXPECT_DEATH(throw 1, ""), "threw an exception");
-  } catch (...) {  // NOLINT
-    FAIL() << "An exception escaped a death test macro invocation "
-           << "with catch_exceptions "
-           << (GTEST_FLAG_GET(catch_exceptions) ? "enabled" : "disabled");
-  }
-}
-
-class TestException : public std::exception {
- public:
-  const char* what() const noexcept override { return "exceptional message"; }
-};
-
-TEST(CxxExceptionDeathTest, PrintsMessageForStdExceptions) {
-  // Verifies that the exception message is quoted in the failure text.
-  EXPECT_NONFATAL_FAILURE(EXPECT_DEATH(throw TestException(), ""),
-                          "exceptional message");
-  // Verifies that the location is mentioned in the failure text.
-  EXPECT_NONFATAL_FAILURE(EXPECT_DEATH(throw TestException(), ""), __FILE__);
-}
-#endif  // GTEST_HAS_EXCEPTIONS
-
-#if GTEST_HAS_SEH
-// Tests that enabling interception of SEH exceptions with the
-// catch_exceptions flag does not interfere with SEH exceptions being
-// treated as death by death tests.
-TEST(SehExceptionDeasTest, CatchExceptionsDoesNotInterfere) {
-  EXPECT_DEATH(RaiseException(42, 0x0, 0, NULL), "")
-      << "with catch_exceptions "
-      << (GTEST_FLAG_GET(catch_exceptions) ? "enabled" : "disabled");
-}
-#endif
-
-#endif  // GTEST_HAS_DEATH_TEST
-
-int main(int argc, char** argv) {
-  testing::InitGoogleTest(&argc, argv);
-  GTEST_FLAG_SET(catch_exceptions, GTEST_ENABLE_CATCH_EXCEPTIONS_ != 0);
-  return RUN_ALL_TESTS();
-}
diff --git a/3rdparty/googletest-1.13.0/googletest/test/googletest-env-var-test.py b/3rdparty/googletest-1.13.0/googletest/test/googletest-env-var-test.py
deleted file mode 100644
index bc4d87d93841aa725377a10cd29cb0e3032c5939..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/test/googletest-env-var-test.py
+++ /dev/null
@@ -1,120 +0,0 @@
-#!/usr/bin/env python
-#
-# Copyright 2008, Google Inc.
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met:
-#
-#     * Redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer.
-#     * Redistributions in binary form must reproduce the above
-# copyright notice, this list of conditions and the following disclaimer
-# in the documentation and/or other materials provided with the
-# distribution.
-#     * Neither the name of Google Inc. nor the names of its
-# contributors may be used to endorse or promote products derived from
-# this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-"""Verifies that Google Test correctly parses environment variables."""
-
-import os
-from googletest.test import gtest_test_utils
-
-
-IS_WINDOWS = os.name == 'nt'
-IS_LINUX = os.name == 'posix' and os.uname()[0] == 'Linux'
-
-COMMAND = gtest_test_utils.GetTestExecutablePath('googletest-env-var-test_')
-
-environ = os.environ.copy()
-
-
-def AssertEq(expected, actual):
-  if expected != actual:
-    print('Expected: %s' % (expected,))
-    print('  Actual: %s' % (actual,))
-    raise AssertionError
-
-
-def SetEnvVar(env_var, value):
-  """Sets the env variable to 'value'; unsets it when 'value' is None."""
-
-  if value is not None:
-    environ[env_var] = value
-  elif env_var in environ:
-    del environ[env_var]
-
-
-def GetFlag(flag):
-  """Runs googletest-env-var-test_ and returns its output."""
-
-  args = [COMMAND]
-  if flag is not None:
-    args += [flag]
-  return gtest_test_utils.Subprocess(args, env=environ).output
-
-
-def TestFlag(flag, test_val, default_val):
-  """Verifies that the given flag is affected by the corresponding env var."""
-
-  env_var = 'GTEST_' + flag.upper()
-  SetEnvVar(env_var, test_val)
-  AssertEq(test_val, GetFlag(flag))
-  SetEnvVar(env_var, None)
-  AssertEq(default_val, GetFlag(flag))
-
-
-class GTestEnvVarTest(gtest_test_utils.TestCase):
-
-  def testEnvVarAffectsFlag(self):
-    """Tests that environment variable should affect the corresponding flag."""
-
-    TestFlag('break_on_failure', '1', '0')
-    TestFlag('color', 'yes', 'auto')
-    SetEnvVar('TESTBRIDGE_TEST_RUNNER_FAIL_FAST', None)  # For 'fail_fast' test
-    TestFlag('fail_fast', '1', '0')
-    TestFlag('filter', 'FooTest.Bar', '*')
-    SetEnvVar('XML_OUTPUT_FILE', None)  # For 'output' test
-    TestFlag('output', 'xml:tmp/foo.xml', '')
-    TestFlag('brief', '1', '0')
-    TestFlag('print_time', '0', '1')
-    TestFlag('repeat', '999', '1')
-    TestFlag('throw_on_failure', '1', '0')
-    TestFlag('death_test_style', 'threadsafe', 'fast')
-    TestFlag('catch_exceptions', '0', '1')
-
-    if IS_LINUX:
-      TestFlag('death_test_use_fork', '1', '0')
-      TestFlag('stack_trace_depth', '0', '100')
-
-
-  def testXmlOutputFile(self):
-    """Tests that $XML_OUTPUT_FILE affects the output flag."""
-
-    SetEnvVar('GTEST_OUTPUT', None)
-    SetEnvVar('XML_OUTPUT_FILE', 'tmp/bar.xml')
-    AssertEq('xml:tmp/bar.xml', GetFlag('output'))
-
-  def testXmlOutputFileOverride(self):
-    """Tests that $XML_OUTPUT_FILE is overridden by $GTEST_OUTPUT."""
-
-    SetEnvVar('GTEST_OUTPUT', 'xml:tmp/foo.xml')
-    SetEnvVar('XML_OUTPUT_FILE', 'tmp/bar.xml')
-    AssertEq('xml:tmp/foo.xml', GetFlag('output'))
-
-if __name__ == '__main__':
-  gtest_test_utils.Main()
diff --git a/3rdparty/googletest-1.13.0/googletest/test/googletest-env-var-test_.cc b/3rdparty/googletest-1.13.0/googletest/test/googletest-env-var-test_.cc
deleted file mode 100644
index 365337508027e99689bda1206dfaa380df4e7ce4..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/test/googletest-env-var-test_.cc
+++ /dev/null
@@ -1,130 +0,0 @@
-// Copyright 2008, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// A helper program for testing that Google Test parses the environment
-// variables correctly.
-
-#include <iostream>
-
-#include "gtest/gtest.h"
-#include "src/gtest-internal-inl.h"
-
-using ::std::cout;
-
-namespace testing {
-
-// The purpose of this is to make the test more realistic by ensuring
-// that the UnitTest singleton is created before main() is entered.
-// We don't actual run the TEST itself.
-TEST(GTestEnvVarTest, Dummy) {}
-
-void PrintFlag(const char* flag) {
-  if (strcmp(flag, "break_on_failure") == 0) {
-    cout << GTEST_FLAG_GET(break_on_failure);
-    return;
-  }
-
-  if (strcmp(flag, "catch_exceptions") == 0) {
-    cout << GTEST_FLAG_GET(catch_exceptions);
-    return;
-  }
-
-  if (strcmp(flag, "color") == 0) {
-    cout << GTEST_FLAG_GET(color);
-    return;
-  }
-
-  if (strcmp(flag, "death_test_style") == 0) {
-    cout << GTEST_FLAG_GET(death_test_style);
-    return;
-  }
-
-  if (strcmp(flag, "death_test_use_fork") == 0) {
-    cout << GTEST_FLAG_GET(death_test_use_fork);
-    return;
-  }
-
-  if (strcmp(flag, "fail_fast") == 0) {
-    cout << GTEST_FLAG_GET(fail_fast);
-    return;
-  }
-
-  if (strcmp(flag, "filter") == 0) {
-    cout << GTEST_FLAG_GET(filter);
-    return;
-  }
-
-  if (strcmp(flag, "output") == 0) {
-    cout << GTEST_FLAG_GET(output);
-    return;
-  }
-
-  if (strcmp(flag, "brief") == 0) {
-    cout << GTEST_FLAG_GET(brief);
-    return;
-  }
-
-  if (strcmp(flag, "print_time") == 0) {
-    cout << GTEST_FLAG_GET(print_time);
-    return;
-  }
-
-  if (strcmp(flag, "repeat") == 0) {
-    cout << GTEST_FLAG_GET(repeat);
-    return;
-  }
-
-  if (strcmp(flag, "stack_trace_depth") == 0) {
-    cout << GTEST_FLAG_GET(stack_trace_depth);
-    return;
-  }
-
-  if (strcmp(flag, "throw_on_failure") == 0) {
-    cout << GTEST_FLAG_GET(throw_on_failure);
-    return;
-  }
-
-  cout << "Invalid flag name " << flag
-       << ".  Valid names are break_on_failure, color, filter, etc.\n";
-  exit(1);
-}
-
-}  // namespace testing
-
-int main(int argc, char** argv) {
-  testing::InitGoogleTest(&argc, argv);
-
-  if (argc != 2) {
-    cout << "Usage: googletest-env-var-test_ NAME_OF_FLAG\n";
-    return 1;
-  }
-
-  testing::PrintFlag(argv[1]);
-  return 0;
-}
diff --git a/3rdparty/googletest-1.13.0/googletest/test/googletest-failfast-unittest.py b/3rdparty/googletest-1.13.0/googletest/test/googletest-failfast-unittest.py
deleted file mode 100644
index 1356d4f8b5b7fcb288e403f14ce5082bc5038a66..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/test/googletest-failfast-unittest.py
+++ /dev/null
@@ -1,410 +0,0 @@
-#!/usr/bin/env python
-#
-# Copyright 2020 Google Inc. All Rights Reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met:
-#
-#     * Redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer.
-#     * Redistributions in binary form must reproduce the above
-# copyright notice, this list of conditions and the following disclaimer
-# in the documentation and/or other materials provided with the
-# distribution.
-#     * Neither the name of Google Inc. nor the names of its
-# contributors may be used to endorse or promote products derived from
-# this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-"""Unit test for Google Test fail_fast.
-
-A user can specify if a Google Test program should continue test execution
-after a test failure via the GTEST_FAIL_FAST environment variable or the
---gtest_fail_fast flag. The default value of the flag can also be changed
-by Bazel fail fast environment variable TESTBRIDGE_TEST_RUNNER_FAIL_FAST.
-
-This script tests such functionality by invoking googletest-failfast-unittest_
-(a program written with Google Test) with different environments and command
-line flags.
-"""
-
-import os
-from googletest.test import gtest_test_utils
-
-# Constants.
-
-# Bazel testbridge environment variable for fail fast
-BAZEL_FAIL_FAST_ENV_VAR = 'TESTBRIDGE_TEST_RUNNER_FAIL_FAST'
-
-# The environment variable for specifying fail fast.
-FAIL_FAST_ENV_VAR = 'GTEST_FAIL_FAST'
-
-# The command line flag for specifying fail fast.
-FAIL_FAST_FLAG = 'gtest_fail_fast'
-
-# The command line flag to run disabled tests.
-RUN_DISABLED_FLAG = 'gtest_also_run_disabled_tests'
-
-# The command line flag for specifying a filter.
-FILTER_FLAG = 'gtest_filter'
-
-# Command to run the googletest-failfast-unittest_ program.
-COMMAND = gtest_test_utils.GetTestExecutablePath(
-    'googletest-failfast-unittest_')
-
-# The command line flag to tell Google Test to output the list of tests it
-# will run.
-LIST_TESTS_FLAG = '--gtest_list_tests'
-
-# Indicates whether Google Test supports death tests.
-SUPPORTS_DEATH_TESTS = 'HasDeathTest' in gtest_test_utils.Subprocess(
-    [COMMAND, LIST_TESTS_FLAG]).output
-
-# Utilities.
-
-environ = os.environ.copy()
-
-
-def SetEnvVar(env_var, value):
-  """Sets the env variable to 'value'; unsets it when 'value' is None."""
-
-  if value is not None:
-    environ[env_var] = value
-  elif env_var in environ:
-    del environ[env_var]
-
-
-def RunAndReturnOutput(test_suite=None, fail_fast=None, run_disabled=False):
-  """Runs the test program and returns its output."""
-
-  args = []
-  xml_path = os.path.join(gtest_test_utils.GetTempDir(),
-                          '.GTestFailFastUnitTest.xml')
-  args += ['--gtest_output=xml:' + xml_path]
-  if fail_fast is not None:
-    if isinstance(fail_fast, str):
-      args += ['--%s=%s' % (FAIL_FAST_FLAG, fail_fast)]
-    elif fail_fast:
-      args += ['--%s' % FAIL_FAST_FLAG]
-    else:
-      args += ['--no%s' % FAIL_FAST_FLAG]
-  if test_suite:
-    args += ['--%s=%s.*' % (FILTER_FLAG, test_suite)]
-  if run_disabled:
-    args += ['--%s' % RUN_DISABLED_FLAG]
-  txt_out = gtest_test_utils.Subprocess([COMMAND] + args, env=environ).output
-  with open(xml_path) as xml_file:
-    return txt_out, xml_file.read()
-
-
-# The unit test.
-class GTestFailFastUnitTest(gtest_test_utils.TestCase):
-  """Tests the env variable or the command line flag for fail_fast."""
-
-  def testDefaultBehavior(self):
-    """Tests the behavior of not specifying the fail_fast."""
-
-    txt, _ = RunAndReturnOutput()
-    self.assertIn('22 FAILED TEST', txt)
-
-  def testGoogletestFlag(self):
-    txt, _ = RunAndReturnOutput(test_suite='HasSimpleTest', fail_fast=True)
-    self.assertIn('1 FAILED TEST', txt)
-    self.assertIn('[  SKIPPED ] 3 tests', txt)
-
-    txt, _ = RunAndReturnOutput(test_suite='HasSimpleTest', fail_fast=False)
-    self.assertIn('4 FAILED TEST', txt)
-    self.assertNotIn('[  SKIPPED ]', txt)
-
-  def testGoogletestEnvVar(self):
-    """Tests the behavior of specifying fail_fast via Googletest env var."""
-
-    try:
-      SetEnvVar(FAIL_FAST_ENV_VAR, '1')
-      txt, _ = RunAndReturnOutput('HasSimpleTest')
-      self.assertIn('1 FAILED TEST', txt)
-      self.assertIn('[  SKIPPED ] 3 tests', txt)
-
-      SetEnvVar(FAIL_FAST_ENV_VAR, '0')
-      txt, _ = RunAndReturnOutput('HasSimpleTest')
-      self.assertIn('4 FAILED TEST', txt)
-      self.assertNotIn('[  SKIPPED ]', txt)
-    finally:
-      SetEnvVar(FAIL_FAST_ENV_VAR, None)
-
-  def testBazelEnvVar(self):
-    """Tests the behavior of specifying fail_fast via Bazel testbridge."""
-
-    try:
-      SetEnvVar(BAZEL_FAIL_FAST_ENV_VAR, '1')
-      txt, _ = RunAndReturnOutput('HasSimpleTest')
-      self.assertIn('1 FAILED TEST', txt)
-      self.assertIn('[  SKIPPED ] 3 tests', txt)
-
-      SetEnvVar(BAZEL_FAIL_FAST_ENV_VAR, '0')
-      txt, _ = RunAndReturnOutput('HasSimpleTest')
-      self.assertIn('4 FAILED TEST', txt)
-      self.assertNotIn('[  SKIPPED ]', txt)
-    finally:
-      SetEnvVar(BAZEL_FAIL_FAST_ENV_VAR, None)
-
-  def testFlagOverridesEnvVar(self):
-    """Tests precedence of flag over env var."""
-
-    try:
-      SetEnvVar(FAIL_FAST_ENV_VAR, '0')
-      txt, _ = RunAndReturnOutput('HasSimpleTest', True)
-      self.assertIn('1 FAILED TEST', txt)
-      self.assertIn('[  SKIPPED ] 3 tests', txt)
-    finally:
-      SetEnvVar(FAIL_FAST_ENV_VAR, None)
-
-  def testGoogletestEnvVarOverridesBazelEnvVar(self):
-    """Tests that the Googletest native env var over Bazel testbridge."""
-
-    try:
-      SetEnvVar(BAZEL_FAIL_FAST_ENV_VAR, '0')
-      SetEnvVar(FAIL_FAST_ENV_VAR, '1')
-      txt, _ = RunAndReturnOutput('HasSimpleTest')
-      self.assertIn('1 FAILED TEST', txt)
-      self.assertIn('[  SKIPPED ] 3 tests', txt)
-    finally:
-      SetEnvVar(FAIL_FAST_ENV_VAR, None)
-      SetEnvVar(BAZEL_FAIL_FAST_ENV_VAR, None)
-
-  def testEventListener(self):
-    txt, _ = RunAndReturnOutput(test_suite='HasSkipTest', fail_fast=True)
-    self.assertIn('1 FAILED TEST', txt)
-    self.assertIn('[  SKIPPED ] 3 tests', txt)
-    for expected_count, callback in [(1, 'OnTestSuiteStart'),
-                                     (5, 'OnTestStart'),
-                                     (5, 'OnTestEnd'),
-                                     (5, 'OnTestPartResult'),
-                                     (1, 'OnTestSuiteEnd')]:
-      self.assertEqual(
-          expected_count, txt.count(callback),
-          'Expected %d calls to callback %s match count on output: %s ' %
-          (expected_count, callback, txt))
-
-    txt, _ = RunAndReturnOutput(test_suite='HasSkipTest', fail_fast=False)
-    self.assertIn('3 FAILED TEST', txt)
-    self.assertIn('[  SKIPPED ] 1 test', txt)
-    for expected_count, callback in [(1, 'OnTestSuiteStart'),
-                                     (5, 'OnTestStart'),
-                                     (5, 'OnTestEnd'),
-                                     (5, 'OnTestPartResult'),
-                                     (1, 'OnTestSuiteEnd')]:
-      self.assertEqual(
-          expected_count, txt.count(callback),
-          'Expected %d calls to callback %s match count on output: %s ' %
-          (expected_count, callback, txt))
-
-  def assertXmlResultCount(self, result, count, xml):
-    self.assertEqual(
-        count, xml.count('result="%s"' % result),
-        'Expected \'result="%s"\' match count of %s: %s ' %
-        (result, count, xml))
-
-  def assertXmlStatusCount(self, status, count, xml):
-    self.assertEqual(
-        count, xml.count('status="%s"' % status),
-        'Expected \'status="%s"\' match count of %s: %s ' %
-        (status, count, xml))
-
-  def assertFailFastXmlAndTxtOutput(self,
-                                    fail_fast,
-                                    test_suite,
-                                    passed_count,
-                                    failure_count,
-                                    skipped_count,
-                                    suppressed_count,
-                                    run_disabled=False):
-    """Assert XML and text output of a test execution."""
-
-    txt, xml = RunAndReturnOutput(test_suite, fail_fast, run_disabled)
-    if failure_count > 0:
-      self.assertIn('%s FAILED TEST' % failure_count, txt)
-    if suppressed_count > 0:
-      self.assertIn('%s DISABLED TEST' % suppressed_count, txt)
-    if skipped_count > 0:
-      self.assertIn('[  SKIPPED ] %s tests' % skipped_count, txt)
-    self.assertXmlStatusCount('run',
-                              passed_count + failure_count + skipped_count, xml)
-    self.assertXmlStatusCount('notrun', suppressed_count, xml)
-    self.assertXmlResultCount('completed', passed_count + failure_count, xml)
-    self.assertXmlResultCount('skipped', skipped_count, xml)
-    self.assertXmlResultCount('suppressed', suppressed_count, xml)
-
-  def assertFailFastBehavior(self,
-                             test_suite,
-                             passed_count,
-                             failure_count,
-                             skipped_count,
-                             suppressed_count,
-                             run_disabled=False):
-    """Assert --fail_fast via flag."""
-
-    for fail_fast in ('true', '1', 't', True):
-      self.assertFailFastXmlAndTxtOutput(fail_fast, test_suite, passed_count,
-                                         failure_count, skipped_count,
-                                         suppressed_count, run_disabled)
-
-  def assertNotFailFastBehavior(self,
-                                test_suite,
-                                passed_count,
-                                failure_count,
-                                skipped_count,
-                                suppressed_count,
-                                run_disabled=False):
-    """Assert --nofail_fast via flag."""
-
-    for fail_fast in ('false', '0', 'f', False):
-      self.assertFailFastXmlAndTxtOutput(fail_fast, test_suite, passed_count,
-                                         failure_count, skipped_count,
-                                         suppressed_count, run_disabled)
-
-  def testFlag_HasFixtureTest(self):
-    """Tests the behavior of fail_fast and TEST_F."""
-    self.assertFailFastBehavior(
-        test_suite='HasFixtureTest',
-        passed_count=1,
-        failure_count=1,
-        skipped_count=3,
-        suppressed_count=0)
-    self.assertNotFailFastBehavior(
-        test_suite='HasFixtureTest',
-        passed_count=1,
-        failure_count=4,
-        skipped_count=0,
-        suppressed_count=0)
-
-  def testFlag_HasSimpleTest(self):
-    """Tests the behavior of fail_fast and TEST."""
-    self.assertFailFastBehavior(
-        test_suite='HasSimpleTest',
-        passed_count=1,
-        failure_count=1,
-        skipped_count=3,
-        suppressed_count=0)
-    self.assertNotFailFastBehavior(
-        test_suite='HasSimpleTest',
-        passed_count=1,
-        failure_count=4,
-        skipped_count=0,
-        suppressed_count=0)
-
-  def testFlag_HasParametersTest(self):
-    """Tests the behavior of fail_fast and TEST_P."""
-    self.assertFailFastBehavior(
-        test_suite='HasParametersSuite/HasParametersTest',
-        passed_count=0,
-        failure_count=1,
-        skipped_count=3,
-        suppressed_count=0)
-    self.assertNotFailFastBehavior(
-        test_suite='HasParametersSuite/HasParametersTest',
-        passed_count=0,
-        failure_count=4,
-        skipped_count=0,
-        suppressed_count=0)
-
-  def testFlag_HasDisabledTest(self):
-    """Tests the behavior of fail_fast and Disabled test cases."""
-    self.assertFailFastBehavior(
-        test_suite='HasDisabledTest',
-        passed_count=1,
-        failure_count=1,
-        skipped_count=2,
-        suppressed_count=1,
-        run_disabled=False)
-    self.assertNotFailFastBehavior(
-        test_suite='HasDisabledTest',
-        passed_count=1,
-        failure_count=3,
-        skipped_count=0,
-        suppressed_count=1,
-        run_disabled=False)
-
-  def testFlag_HasDisabledRunDisabledTest(self):
-    """Tests the behavior of fail_fast and Disabled test cases enabled."""
-    self.assertFailFastBehavior(
-        test_suite='HasDisabledTest',
-        passed_count=1,
-        failure_count=1,
-        skipped_count=3,
-        suppressed_count=0,
-        run_disabled=True)
-    self.assertNotFailFastBehavior(
-        test_suite='HasDisabledTest',
-        passed_count=1,
-        failure_count=4,
-        skipped_count=0,
-        suppressed_count=0,
-        run_disabled=True)
-
-  def testFlag_HasDisabledSuiteTest(self):
-    """Tests the behavior of fail_fast and Disabled test suites."""
-    self.assertFailFastBehavior(
-        test_suite='DISABLED_HasDisabledSuite',
-        passed_count=0,
-        failure_count=0,
-        skipped_count=0,
-        suppressed_count=5,
-        run_disabled=False)
-    self.assertNotFailFastBehavior(
-        test_suite='DISABLED_HasDisabledSuite',
-        passed_count=0,
-        failure_count=0,
-        skipped_count=0,
-        suppressed_count=5,
-        run_disabled=False)
-
-  def testFlag_HasDisabledSuiteRunDisabledTest(self):
-    """Tests the behavior of fail_fast and Disabled test suites enabled."""
-    self.assertFailFastBehavior(
-        test_suite='DISABLED_HasDisabledSuite',
-        passed_count=1,
-        failure_count=1,
-        skipped_count=3,
-        suppressed_count=0,
-        run_disabled=True)
-    self.assertNotFailFastBehavior(
-        test_suite='DISABLED_HasDisabledSuite',
-        passed_count=1,
-        failure_count=4,
-        skipped_count=0,
-        suppressed_count=0,
-        run_disabled=True)
-
-  if SUPPORTS_DEATH_TESTS:
-
-    def testFlag_HasDeathTest(self):
-      """Tests the behavior of fail_fast and death tests."""
-      self.assertFailFastBehavior(
-          test_suite='HasDeathTest',
-          passed_count=1,
-          failure_count=1,
-          skipped_count=3,
-          suppressed_count=0)
-      self.assertNotFailFastBehavior(
-          test_suite='HasDeathTest',
-          passed_count=1,
-          failure_count=4,
-          skipped_count=0,
-          suppressed_count=0)
-
-
-if __name__ == '__main__':
-  gtest_test_utils.Main()
diff --git a/3rdparty/googletest-1.13.0/googletest/test/googletest-failfast-unittest_.cc b/3rdparty/googletest-1.13.0/googletest/test/googletest-failfast-unittest_.cc
deleted file mode 100644
index 3bd05a8eb293259862fe7d137db70601ea1f63fd..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/test/googletest-failfast-unittest_.cc
+++ /dev/null
@@ -1,166 +0,0 @@
-// Copyright 2005, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// Unit test for Google Test test filters.
-//
-// A user can specify which test(s) in a Google Test program to run via
-// either the GTEST_FILTER environment variable or the --gtest_filter
-// flag.  This is used for testing such functionality.
-//
-// The program will be invoked from a Python unit test.  Don't run it
-// directly.
-
-#include "gtest/gtest.h"
-
-namespace {
-
-// Test HasFixtureTest.
-
-class HasFixtureTest : public testing::Test {};
-
-TEST_F(HasFixtureTest, Test0) {}
-
-TEST_F(HasFixtureTest, Test1) { FAIL() << "Expected failure."; }
-
-TEST_F(HasFixtureTest, Test2) { FAIL() << "Expected failure."; }
-
-TEST_F(HasFixtureTest, Test3) { FAIL() << "Expected failure."; }
-
-TEST_F(HasFixtureTest, Test4) { FAIL() << "Expected failure."; }
-
-// Test HasSimpleTest.
-
-TEST(HasSimpleTest, Test0) {}
-
-TEST(HasSimpleTest, Test1) { FAIL() << "Expected failure."; }
-
-TEST(HasSimpleTest, Test2) { FAIL() << "Expected failure."; }
-
-TEST(HasSimpleTest, Test3) { FAIL() << "Expected failure."; }
-
-TEST(HasSimpleTest, Test4) { FAIL() << "Expected failure."; }
-
-// Test HasDisabledTest.
-
-TEST(HasDisabledTest, Test0) {}
-
-TEST(HasDisabledTest, DISABLED_Test1) { FAIL() << "Expected failure."; }
-
-TEST(HasDisabledTest, Test2) { FAIL() << "Expected failure."; }
-
-TEST(HasDisabledTest, Test3) { FAIL() << "Expected failure."; }
-
-TEST(HasDisabledTest, Test4) { FAIL() << "Expected failure."; }
-
-// Test HasDeathTest
-
-TEST(HasDeathTest, Test0) { EXPECT_DEATH_IF_SUPPORTED(exit(1), ".*"); }
-
-TEST(HasDeathTest, Test1) {
-  EXPECT_DEATH_IF_SUPPORTED(FAIL() << "Expected failure.", ".*");
-}
-
-TEST(HasDeathTest, Test2) {
-  EXPECT_DEATH_IF_SUPPORTED(FAIL() << "Expected failure.", ".*");
-}
-
-TEST(HasDeathTest, Test3) {
-  EXPECT_DEATH_IF_SUPPORTED(FAIL() << "Expected failure.", ".*");
-}
-
-TEST(HasDeathTest, Test4) {
-  EXPECT_DEATH_IF_SUPPORTED(FAIL() << "Expected failure.", ".*");
-}
-
-// Test DISABLED_HasDisabledSuite
-
-TEST(DISABLED_HasDisabledSuite, Test0) {}
-
-TEST(DISABLED_HasDisabledSuite, Test1) { FAIL() << "Expected failure."; }
-
-TEST(DISABLED_HasDisabledSuite, Test2) { FAIL() << "Expected failure."; }
-
-TEST(DISABLED_HasDisabledSuite, Test3) { FAIL() << "Expected failure."; }
-
-TEST(DISABLED_HasDisabledSuite, Test4) { FAIL() << "Expected failure."; }
-
-// Test HasParametersTest
-
-class HasParametersTest : public testing::TestWithParam<int> {};
-
-TEST_P(HasParametersTest, Test1) { FAIL() << "Expected failure."; }
-
-TEST_P(HasParametersTest, Test2) { FAIL() << "Expected failure."; }
-
-INSTANTIATE_TEST_SUITE_P(HasParametersSuite, HasParametersTest,
-                         testing::Values(1, 2));
-
-class MyTestListener : public ::testing::EmptyTestEventListener {
-  void OnTestSuiteStart(const ::testing::TestSuite& test_suite) override {
-    printf("We are in OnTestSuiteStart of %s.\n", test_suite.name());
-  }
-
-  void OnTestStart(const ::testing::TestInfo& test_info) override {
-    printf("We are in OnTestStart of %s.%s.\n", test_info.test_suite_name(),
-           test_info.name());
-  }
-
-  void OnTestPartResult(
-      const ::testing::TestPartResult& test_part_result) override {
-    printf("We are in OnTestPartResult %s:%d.\n", test_part_result.file_name(),
-           test_part_result.line_number());
-  }
-
-  void OnTestEnd(const ::testing::TestInfo& test_info) override {
-    printf("We are in OnTestEnd of %s.%s.\n", test_info.test_suite_name(),
-           test_info.name());
-  }
-
-  void OnTestSuiteEnd(const ::testing::TestSuite& test_suite) override {
-    printf("We are in OnTestSuiteEnd of %s.\n", test_suite.name());
-  }
-};
-
-TEST(HasSkipTest, Test0) { SUCCEED() << "Expected success."; }
-
-TEST(HasSkipTest, Test1) { GTEST_SKIP() << "Expected skip."; }
-
-TEST(HasSkipTest, Test2) { FAIL() << "Expected failure."; }
-
-TEST(HasSkipTest, Test3) { FAIL() << "Expected failure."; }
-
-TEST(HasSkipTest, Test4) { FAIL() << "Expected failure."; }
-
-}  // namespace
-
-int main(int argc, char** argv) {
-  ::testing::InitGoogleTest(&argc, argv);
-  ::testing::UnitTest::GetInstance()->listeners().Append(new MyTestListener());
-  return RUN_ALL_TESTS();
-}
diff --git a/3rdparty/googletest-1.13.0/googletest/test/googletest-filepath-test.cc b/3rdparty/googletest-1.13.0/googletest/test/googletest-filepath-test.cc
deleted file mode 100644
index 5f0c9c2fb8bfbb3920a4ad07ece104af2c8dab1c..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/test/googletest-filepath-test.cc
+++ /dev/null
@@ -1,671 +0,0 @@
-// Copyright 2008, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-// Google Test filepath utilities
-//
-// This file tests classes and functions used internally by
-// Google Test.  They are subject to change without notice.
-//
-// This file is #included from gtest-internal.h.
-// Do not #include this file anywhere else!
-
-#include <string>
-
-#include "gtest/gtest.h"
-#include "gtest/internal/gtest-filepath.h"
-#include "src/gtest-internal-inl.h"
-
-#if GTEST_OS_WINDOWS_MOBILE
-#include <windows.h>  // NOLINT
-#elif GTEST_OS_WINDOWS
-#include <direct.h>  // NOLINT
-#endif               // GTEST_OS_WINDOWS_MOBILE
-
-namespace testing {
-namespace internal {
-namespace {
-
-#if GTEST_OS_WINDOWS_MOBILE
-
-// Windows CE doesn't have the remove C function.
-int remove(const char* path) {
-  LPCWSTR wpath = String::AnsiToUtf16(path);
-  int ret = DeleteFile(wpath) ? 0 : -1;
-  delete[] wpath;
-  return ret;
-}
-// Windows CE doesn't have the _rmdir C function.
-int _rmdir(const char* path) {
-  FilePath filepath(path);
-  LPCWSTR wpath =
-      String::AnsiToUtf16(filepath.RemoveTrailingPathSeparator().c_str());
-  int ret = RemoveDirectory(wpath) ? 0 : -1;
-  delete[] wpath;
-  return ret;
-}
-
-#else
-
-TEST(GetCurrentDirTest, ReturnsCurrentDir) {
-  const FilePath original_dir = FilePath::GetCurrentDir();
-  EXPECT_FALSE(original_dir.IsEmpty());
-
-  posix::ChDir(GTEST_PATH_SEP_);
-  const FilePath cwd = FilePath::GetCurrentDir();
-  posix::ChDir(original_dir.c_str());
-
-#if GTEST_OS_WINDOWS || GTEST_OS_OS2
-
-  // Skips the ":".
-  const char* const cwd_without_drive = strchr(cwd.c_str(), ':');
-  ASSERT_TRUE(cwd_without_drive != NULL);
-  EXPECT_STREQ(GTEST_PATH_SEP_, cwd_without_drive + 1);
-
-#else
-
-  EXPECT_EQ(GTEST_PATH_SEP_, cwd.string());
-
-#endif
-}
-
-#endif  // GTEST_OS_WINDOWS_MOBILE
-
-TEST(IsEmptyTest, ReturnsTrueForEmptyPath) {
-  EXPECT_TRUE(FilePath("").IsEmpty());
-}
-
-TEST(IsEmptyTest, ReturnsFalseForNonEmptyPath) {
-  EXPECT_FALSE(FilePath("a").IsEmpty());
-  EXPECT_FALSE(FilePath(".").IsEmpty());
-  EXPECT_FALSE(FilePath("a/b").IsEmpty());
-  EXPECT_FALSE(FilePath("a\\b\\").IsEmpty());
-}
-
-// RemoveDirectoryName "" -> ""
-TEST(RemoveDirectoryNameTest, WhenEmptyName) {
-  EXPECT_EQ("", FilePath("").RemoveDirectoryName().string());
-}
-
-// RemoveDirectoryName "afile" -> "afile"
-TEST(RemoveDirectoryNameTest, ButNoDirectory) {
-  EXPECT_EQ("afile", FilePath("afile").RemoveDirectoryName().string());
-}
-
-// RemoveDirectoryName "/afile" -> "afile"
-TEST(RemoveDirectoryNameTest, RootFileShouldGiveFileName) {
-  EXPECT_EQ("afile",
-            FilePath(GTEST_PATH_SEP_ "afile").RemoveDirectoryName().string());
-}
-
-// RemoveDirectoryName "adir/" -> ""
-TEST(RemoveDirectoryNameTest, WhereThereIsNoFileName) {
-  EXPECT_EQ("",
-            FilePath("adir" GTEST_PATH_SEP_).RemoveDirectoryName().string());
-}
-
-// RemoveDirectoryName "adir/afile" -> "afile"
-TEST(RemoveDirectoryNameTest, ShouldGiveFileName) {
-  EXPECT_EQ(
-      "afile",
-      FilePath("adir" GTEST_PATH_SEP_ "afile").RemoveDirectoryName().string());
-}
-
-// RemoveDirectoryName "adir/subdir/afile" -> "afile"
-TEST(RemoveDirectoryNameTest, ShouldAlsoGiveFileName) {
-  EXPECT_EQ("afile",
-            FilePath("adir" GTEST_PATH_SEP_ "subdir" GTEST_PATH_SEP_ "afile")
-                .RemoveDirectoryName()
-                .string());
-}
-
-#if GTEST_HAS_ALT_PATH_SEP_
-
-// Tests that RemoveDirectoryName() works with the alternate separator
-// on Windows.
-
-// RemoveDirectoryName("/afile") -> "afile"
-TEST(RemoveDirectoryNameTest, RootFileShouldGiveFileNameForAlternateSeparator) {
-  EXPECT_EQ("afile", FilePath("/afile").RemoveDirectoryName().string());
-}
-
-// RemoveDirectoryName("adir/") -> ""
-TEST(RemoveDirectoryNameTest, WhereThereIsNoFileNameForAlternateSeparator) {
-  EXPECT_EQ("", FilePath("adir/").RemoveDirectoryName().string());
-}
-
-// RemoveDirectoryName("adir/afile") -> "afile"
-TEST(RemoveDirectoryNameTest, ShouldGiveFileNameForAlternateSeparator) {
-  EXPECT_EQ("afile", FilePath("adir/afile").RemoveDirectoryName().string());
-}
-
-// RemoveDirectoryName("adir/subdir/afile") -> "afile"
-TEST(RemoveDirectoryNameTest, ShouldAlsoGiveFileNameForAlternateSeparator) {
-  EXPECT_EQ("afile",
-            FilePath("adir/subdir/afile").RemoveDirectoryName().string());
-}
-
-#endif
-
-// RemoveFileName "" -> "./"
-TEST(RemoveFileNameTest, EmptyName) {
-#if GTEST_OS_WINDOWS_MOBILE
-  // On Windows CE, we use the root as the current directory.
-  EXPECT_EQ(GTEST_PATH_SEP_, FilePath("").RemoveFileName().string());
-#else
-  EXPECT_EQ("." GTEST_PATH_SEP_, FilePath("").RemoveFileName().string());
-#endif
-}
-
-// RemoveFileName "adir/" -> "adir/"
-TEST(RemoveFileNameTest, ButNoFile) {
-  EXPECT_EQ("adir" GTEST_PATH_SEP_,
-            FilePath("adir" GTEST_PATH_SEP_).RemoveFileName().string());
-}
-
-// RemoveFileName "adir/afile" -> "adir/"
-TEST(RemoveFileNameTest, GivesDirName) {
-  EXPECT_EQ("adir" GTEST_PATH_SEP_,
-            FilePath("adir" GTEST_PATH_SEP_ "afile").RemoveFileName().string());
-}
-
-// RemoveFileName "adir/subdir/afile" -> "adir/subdir/"
-TEST(RemoveFileNameTest, GivesDirAndSubDirName) {
-  EXPECT_EQ("adir" GTEST_PATH_SEP_ "subdir" GTEST_PATH_SEP_,
-            FilePath("adir" GTEST_PATH_SEP_ "subdir" GTEST_PATH_SEP_ "afile")
-                .RemoveFileName()
-                .string());
-}
-
-// RemoveFileName "/afile" -> "/"
-TEST(RemoveFileNameTest, GivesRootDir) {
-  EXPECT_EQ(GTEST_PATH_SEP_,
-            FilePath(GTEST_PATH_SEP_ "afile").RemoveFileName().string());
-}
-
-#if GTEST_HAS_ALT_PATH_SEP_
-
-// Tests that RemoveFileName() works with the alternate separator on
-// Windows.
-
-// RemoveFileName("adir/") -> "adir/"
-TEST(RemoveFileNameTest, ButNoFileForAlternateSeparator) {
-  EXPECT_EQ("adir" GTEST_PATH_SEP_,
-            FilePath("adir/").RemoveFileName().string());
-}
-
-// RemoveFileName("adir/afile") -> "adir/"
-TEST(RemoveFileNameTest, GivesDirNameForAlternateSeparator) {
-  EXPECT_EQ("adir" GTEST_PATH_SEP_,
-            FilePath("adir/afile").RemoveFileName().string());
-}
-
-// RemoveFileName("adir/subdir/afile") -> "adir/subdir/"
-TEST(RemoveFileNameTest, GivesDirAndSubDirNameForAlternateSeparator) {
-  EXPECT_EQ("adir" GTEST_PATH_SEP_ "subdir" GTEST_PATH_SEP_,
-            FilePath("adir/subdir/afile").RemoveFileName().string());
-}
-
-// RemoveFileName("/afile") -> "\"
-TEST(RemoveFileNameTest, GivesRootDirForAlternateSeparator) {
-  EXPECT_EQ(GTEST_PATH_SEP_, FilePath("/afile").RemoveFileName().string());
-}
-
-#endif
-
-TEST(MakeFileNameTest, GenerateWhenNumberIsZero) {
-  FilePath actual =
-      FilePath::MakeFileName(FilePath("foo"), FilePath("bar"), 0, "xml");
-  EXPECT_EQ("foo" GTEST_PATH_SEP_ "bar.xml", actual.string());
-}
-
-TEST(MakeFileNameTest, GenerateFileNameNumberGtZero) {
-  FilePath actual =
-      FilePath::MakeFileName(FilePath("foo"), FilePath("bar"), 12, "xml");
-  EXPECT_EQ("foo" GTEST_PATH_SEP_ "bar_12.xml", actual.string());
-}
-
-TEST(MakeFileNameTest, GenerateFileNameWithSlashNumberIsZero) {
-  FilePath actual = FilePath::MakeFileName(FilePath("foo" GTEST_PATH_SEP_),
-                                           FilePath("bar"), 0, "xml");
-  EXPECT_EQ("foo" GTEST_PATH_SEP_ "bar.xml", actual.string());
-}
-
-TEST(MakeFileNameTest, GenerateFileNameWithSlashNumberGtZero) {
-  FilePath actual = FilePath::MakeFileName(FilePath("foo" GTEST_PATH_SEP_),
-                                           FilePath("bar"), 12, "xml");
-  EXPECT_EQ("foo" GTEST_PATH_SEP_ "bar_12.xml", actual.string());
-}
-
-TEST(MakeFileNameTest, GenerateWhenNumberIsZeroAndDirIsEmpty) {
-  FilePath actual =
-      FilePath::MakeFileName(FilePath(""), FilePath("bar"), 0, "xml");
-  EXPECT_EQ("bar.xml", actual.string());
-}
-
-TEST(MakeFileNameTest, GenerateWhenNumberIsNotZeroAndDirIsEmpty) {
-  FilePath actual =
-      FilePath::MakeFileName(FilePath(""), FilePath("bar"), 14, "xml");
-  EXPECT_EQ("bar_14.xml", actual.string());
-}
-
-TEST(ConcatPathsTest, WorksWhenDirDoesNotEndWithPathSep) {
-  FilePath actual = FilePath::ConcatPaths(FilePath("foo"), FilePath("bar.xml"));
-  EXPECT_EQ("foo" GTEST_PATH_SEP_ "bar.xml", actual.string());
-}
-
-TEST(ConcatPathsTest, WorksWhenPath1EndsWithPathSep) {
-  FilePath actual = FilePath::ConcatPaths(FilePath("foo" GTEST_PATH_SEP_),
-                                          FilePath("bar.xml"));
-  EXPECT_EQ("foo" GTEST_PATH_SEP_ "bar.xml", actual.string());
-}
-
-TEST(ConcatPathsTest, Path1BeingEmpty) {
-  FilePath actual = FilePath::ConcatPaths(FilePath(""), FilePath("bar.xml"));
-  EXPECT_EQ("bar.xml", actual.string());
-}
-
-TEST(ConcatPathsTest, Path2BeingEmpty) {
-  FilePath actual = FilePath::ConcatPaths(FilePath("foo"), FilePath(""));
-  EXPECT_EQ("foo" GTEST_PATH_SEP_, actual.string());
-}
-
-TEST(ConcatPathsTest, BothPathBeingEmpty) {
-  FilePath actual = FilePath::ConcatPaths(FilePath(""), FilePath(""));
-  EXPECT_EQ("", actual.string());
-}
-
-TEST(ConcatPathsTest, Path1ContainsPathSep) {
-  FilePath actual = FilePath::ConcatPaths(FilePath("foo" GTEST_PATH_SEP_ "bar"),
-                                          FilePath("foobar.xml"));
-  EXPECT_EQ("foo" GTEST_PATH_SEP_ "bar" GTEST_PATH_SEP_ "foobar.xml",
-            actual.string());
-}
-
-TEST(ConcatPathsTest, Path2ContainsPathSep) {
-  FilePath actual =
-      FilePath::ConcatPaths(FilePath("foo" GTEST_PATH_SEP_),
-                            FilePath("bar" GTEST_PATH_SEP_ "bar.xml"));
-  EXPECT_EQ("foo" GTEST_PATH_SEP_ "bar" GTEST_PATH_SEP_ "bar.xml",
-            actual.string());
-}
-
-TEST(ConcatPathsTest, Path2EndsWithPathSep) {
-  FilePath actual =
-      FilePath::ConcatPaths(FilePath("foo"), FilePath("bar" GTEST_PATH_SEP_));
-  EXPECT_EQ("foo" GTEST_PATH_SEP_ "bar" GTEST_PATH_SEP_, actual.string());
-}
-
-// RemoveTrailingPathSeparator "" -> ""
-TEST(RemoveTrailingPathSeparatorTest, EmptyString) {
-  EXPECT_EQ("", FilePath("").RemoveTrailingPathSeparator().string());
-}
-
-// RemoveTrailingPathSeparator "foo" -> "foo"
-TEST(RemoveTrailingPathSeparatorTest, FileNoSlashString) {
-  EXPECT_EQ("foo", FilePath("foo").RemoveTrailingPathSeparator().string());
-}
-
-// RemoveTrailingPathSeparator "foo/" -> "foo"
-TEST(RemoveTrailingPathSeparatorTest, ShouldRemoveTrailingSeparator) {
-  EXPECT_EQ(
-      "foo",
-      FilePath("foo" GTEST_PATH_SEP_).RemoveTrailingPathSeparator().string());
-#if GTEST_HAS_ALT_PATH_SEP_
-  EXPECT_EQ("foo", FilePath("foo/").RemoveTrailingPathSeparator().string());
-#endif
-}
-
-// RemoveTrailingPathSeparator "foo/bar/" -> "foo/bar/"
-TEST(RemoveTrailingPathSeparatorTest, ShouldRemoveLastSeparator) {
-  EXPECT_EQ("foo" GTEST_PATH_SEP_ "bar",
-            FilePath("foo" GTEST_PATH_SEP_ "bar" GTEST_PATH_SEP_)
-                .RemoveTrailingPathSeparator()
-                .string());
-}
-
-// RemoveTrailingPathSeparator "foo/bar" -> "foo/bar"
-TEST(RemoveTrailingPathSeparatorTest, ShouldReturnUnmodified) {
-  EXPECT_EQ("foo" GTEST_PATH_SEP_ "bar", FilePath("foo" GTEST_PATH_SEP_ "bar")
-                                             .RemoveTrailingPathSeparator()
-                                             .string());
-}
-
-TEST(DirectoryTest, RootDirectoryExists) {
-#if GTEST_OS_WINDOWS              // We are on Windows.
-  char current_drive[_MAX_PATH];  // NOLINT
-  current_drive[0] = static_cast<char>(_getdrive() + 'A' - 1);
-  current_drive[1] = ':';
-  current_drive[2] = '\\';
-  current_drive[3] = '\0';
-  EXPECT_TRUE(FilePath(current_drive).DirectoryExists());
-#else
-  EXPECT_TRUE(FilePath("/").DirectoryExists());
-#endif  // GTEST_OS_WINDOWS
-}
-
-#if GTEST_OS_WINDOWS
-TEST(DirectoryTest, RootOfWrongDriveDoesNotExists) {
-  const int saved_drive_ = _getdrive();
-  // Find a drive that doesn't exist. Start with 'Z' to avoid common ones.
-  for (char drive = 'Z'; drive >= 'A'; drive--)
-    if (_chdrive(drive - 'A' + 1) == -1) {
-      char non_drive[_MAX_PATH];  // NOLINT
-      non_drive[0] = drive;
-      non_drive[1] = ':';
-      non_drive[2] = '\\';
-      non_drive[3] = '\0';
-      EXPECT_FALSE(FilePath(non_drive).DirectoryExists());
-      break;
-    }
-  _chdrive(saved_drive_);
-}
-#endif  // GTEST_OS_WINDOWS
-
-#if !GTEST_OS_WINDOWS_MOBILE
-// Windows CE _does_ consider an empty directory to exist.
-TEST(DirectoryTest, EmptyPathDirectoryDoesNotExist) {
-  EXPECT_FALSE(FilePath("").DirectoryExists());
-}
-#endif  // !GTEST_OS_WINDOWS_MOBILE
-
-TEST(DirectoryTest, CurrentDirectoryExists) {
-#if GTEST_OS_WINDOWS  // We are on Windows.
-#ifndef _WIN32_CE     // Windows CE doesn't have a current directory.
-
-  EXPECT_TRUE(FilePath(".").DirectoryExists());
-  EXPECT_TRUE(FilePath(".\\").DirectoryExists());
-
-#endif  // _WIN32_CE
-#else
-  EXPECT_TRUE(FilePath(".").DirectoryExists());
-  EXPECT_TRUE(FilePath("./").DirectoryExists());
-#endif  // GTEST_OS_WINDOWS
-}
-
-// "foo/bar" == foo//bar" == "foo///bar"
-TEST(NormalizeTest, MultipleConsecutiveSeparatorsInMidstring) {
-  EXPECT_EQ("foo" GTEST_PATH_SEP_ "bar",
-            FilePath("foo" GTEST_PATH_SEP_ "bar").string());
-  EXPECT_EQ("foo" GTEST_PATH_SEP_ "bar",
-            FilePath("foo" GTEST_PATH_SEP_ GTEST_PATH_SEP_ "bar").string());
-  EXPECT_EQ(
-      "foo" GTEST_PATH_SEP_ "bar",
-      FilePath("foo" GTEST_PATH_SEP_ GTEST_PATH_SEP_ GTEST_PATH_SEP_ "bar")
-          .string());
-}
-
-// "/bar" == //bar" == "///bar"
-TEST(NormalizeTest, MultipleConsecutiveSeparatorsAtStringStart) {
-  EXPECT_EQ(GTEST_PATH_SEP_ "bar", FilePath(GTEST_PATH_SEP_ "bar").string());
-#if GTEST_OS_WINDOWS
-  EXPECT_EQ(GTEST_PATH_SEP_ GTEST_PATH_SEP_ "bar",
-            FilePath(GTEST_PATH_SEP_ GTEST_PATH_SEP_ "bar").string());
-#else
-  EXPECT_EQ(GTEST_PATH_SEP_ "bar",
-            FilePath(GTEST_PATH_SEP_ GTEST_PATH_SEP_ "bar").string());
-#endif
-  EXPECT_EQ(
-      GTEST_PATH_SEP_ "bar",
-      FilePath(GTEST_PATH_SEP_ GTEST_PATH_SEP_ GTEST_PATH_SEP_ "bar").string());
-}
-
-// "foo/" == foo//" == "foo///"
-TEST(NormalizeTest, MultipleConsecutiveSeparatorsAtStringEnd) {
-  EXPECT_EQ("foo" GTEST_PATH_SEP_, FilePath("foo" GTEST_PATH_SEP_).string());
-  EXPECT_EQ("foo" GTEST_PATH_SEP_,
-            FilePath("foo" GTEST_PATH_SEP_ GTEST_PATH_SEP_).string());
-  EXPECT_EQ(
-      "foo" GTEST_PATH_SEP_,
-      FilePath("foo" GTEST_PATH_SEP_ GTEST_PATH_SEP_ GTEST_PATH_SEP_).string());
-}
-
-#if GTEST_HAS_ALT_PATH_SEP_
-
-// Tests that separators at the end of the string are normalized
-// regardless of their combination (e.g. "foo\" =="foo/\" ==
-// "foo\\/").
-TEST(NormalizeTest, MixAlternateSeparatorAtStringEnd) {
-  EXPECT_EQ("foo" GTEST_PATH_SEP_, FilePath("foo/").string());
-  EXPECT_EQ("foo" GTEST_PATH_SEP_,
-            FilePath("foo" GTEST_PATH_SEP_ "/").string());
-  EXPECT_EQ("foo" GTEST_PATH_SEP_, FilePath("foo//" GTEST_PATH_SEP_).string());
-}
-
-#endif
-
-TEST(AssignmentOperatorTest, DefaultAssignedToNonDefault) {
-  FilePath default_path;
-  FilePath non_default_path("path");
-  non_default_path = default_path;
-  EXPECT_EQ("", non_default_path.string());
-  EXPECT_EQ("", default_path.string());  // RHS var is unchanged.
-}
-
-TEST(AssignmentOperatorTest, NonDefaultAssignedToDefault) {
-  FilePath non_default_path("path");
-  FilePath default_path;
-  default_path = non_default_path;
-  EXPECT_EQ("path", default_path.string());
-  EXPECT_EQ("path", non_default_path.string());  // RHS var is unchanged.
-}
-
-TEST(AssignmentOperatorTest, ConstAssignedToNonConst) {
-  const FilePath const_default_path("const_path");
-  FilePath non_default_path("path");
-  non_default_path = const_default_path;
-  EXPECT_EQ("const_path", non_default_path.string());
-}
-
-class DirectoryCreationTest : public Test {
- protected:
-  void SetUp() override {
-    testdata_path_.Set(
-        FilePath(TempDir() + GetCurrentExecutableName().string() +
-                 "_directory_creation" GTEST_PATH_SEP_ "test" GTEST_PATH_SEP_));
-    testdata_file_.Set(testdata_path_.RemoveTrailingPathSeparator());
-
-    unique_file0_.Set(
-        FilePath::MakeFileName(testdata_path_, FilePath("unique"), 0, "txt"));
-    unique_file1_.Set(
-        FilePath::MakeFileName(testdata_path_, FilePath("unique"), 1, "txt"));
-
-    remove(testdata_file_.c_str());
-    remove(unique_file0_.c_str());
-    remove(unique_file1_.c_str());
-    posix::RmDir(testdata_path_.c_str());
-  }
-
-  void TearDown() override {
-    remove(testdata_file_.c_str());
-    remove(unique_file0_.c_str());
-    remove(unique_file1_.c_str());
-    posix::RmDir(testdata_path_.c_str());
-  }
-
-  void CreateTextFile(const char* filename) {
-    FILE* f = posix::FOpen(filename, "w");
-    fprintf(f, "text\n");
-    fclose(f);
-  }
-
-  // Strings representing a directory and a file, with identical paths
-  // except for the trailing separator character that distinquishes
-  // a directory named 'test' from a file named 'test'. Example names:
-  FilePath testdata_path_;  // "/tmp/directory_creation/test/"
-  FilePath testdata_file_;  // "/tmp/directory_creation/test"
-  FilePath unique_file0_;   // "/tmp/directory_creation/test/unique.txt"
-  FilePath unique_file1_;   // "/tmp/directory_creation/test/unique_1.txt"
-};
-
-TEST_F(DirectoryCreationTest, CreateDirectoriesRecursively) {
-  EXPECT_FALSE(testdata_path_.DirectoryExists()) << testdata_path_.string();
-  EXPECT_TRUE(testdata_path_.CreateDirectoriesRecursively());
-  EXPECT_TRUE(testdata_path_.DirectoryExists());
-}
-
-TEST_F(DirectoryCreationTest, CreateDirectoriesForAlreadyExistingPath) {
-  EXPECT_FALSE(testdata_path_.DirectoryExists()) << testdata_path_.string();
-  EXPECT_TRUE(testdata_path_.CreateDirectoriesRecursively());
-  // Call 'create' again... should still succeed.
-  EXPECT_TRUE(testdata_path_.CreateDirectoriesRecursively());
-}
-
-TEST_F(DirectoryCreationTest, CreateDirectoriesAndUniqueFilename) {
-  FilePath file_path(FilePath::GenerateUniqueFileName(
-      testdata_path_, FilePath("unique"), "txt"));
-  EXPECT_EQ(unique_file0_.string(), file_path.string());
-  EXPECT_FALSE(file_path.FileOrDirectoryExists());  // file not there
-
-  testdata_path_.CreateDirectoriesRecursively();
-  EXPECT_FALSE(file_path.FileOrDirectoryExists());  // file still not there
-  CreateTextFile(file_path.c_str());
-  EXPECT_TRUE(file_path.FileOrDirectoryExists());
-
-  FilePath file_path2(FilePath::GenerateUniqueFileName(
-      testdata_path_, FilePath("unique"), "txt"));
-  EXPECT_EQ(unique_file1_.string(), file_path2.string());
-  EXPECT_FALSE(file_path2.FileOrDirectoryExists());  // file not there
-  CreateTextFile(file_path2.c_str());
-  EXPECT_TRUE(file_path2.FileOrDirectoryExists());
-}
-
-TEST_F(DirectoryCreationTest, CreateDirectoriesFail) {
-  // force a failure by putting a file where we will try to create a directory.
-  CreateTextFile(testdata_file_.c_str());
-  EXPECT_TRUE(testdata_file_.FileOrDirectoryExists());
-  EXPECT_FALSE(testdata_file_.DirectoryExists());
-  EXPECT_FALSE(testdata_file_.CreateDirectoriesRecursively());
-}
-
-TEST(NoDirectoryCreationTest, CreateNoDirectoriesForDefaultXmlFile) {
-  const FilePath test_detail_xml("test_detail.xml");
-  EXPECT_FALSE(test_detail_xml.CreateDirectoriesRecursively());
-}
-
-TEST(FilePathTest, DefaultConstructor) {
-  FilePath fp;
-  EXPECT_EQ("", fp.string());
-}
-
-TEST(FilePathTest, CharAndCopyConstructors) {
-  const FilePath fp("spicy");
-  EXPECT_EQ("spicy", fp.string());
-
-  const FilePath fp_copy(fp);
-  EXPECT_EQ("spicy", fp_copy.string());
-}
-
-TEST(FilePathTest, StringConstructor) {
-  const FilePath fp(std::string("cider"));
-  EXPECT_EQ("cider", fp.string());
-}
-
-TEST(FilePathTest, Set) {
-  const FilePath apple("apple");
-  FilePath mac("mac");
-  mac.Set(apple);  // Implement Set() since overloading operator= is forbidden.
-  EXPECT_EQ("apple", mac.string());
-  EXPECT_EQ("apple", apple.string());
-}
-
-TEST(FilePathTest, ToString) {
-  const FilePath file("drink");
-  EXPECT_EQ("drink", file.string());
-}
-
-TEST(FilePathTest, RemoveExtension) {
-  EXPECT_EQ("app", FilePath("app.cc").RemoveExtension("cc").string());
-  EXPECT_EQ("app", FilePath("app.exe").RemoveExtension("exe").string());
-  EXPECT_EQ("APP", FilePath("APP.EXE").RemoveExtension("exe").string());
-}
-
-TEST(FilePathTest, RemoveExtensionWhenThereIsNoExtension) {
-  EXPECT_EQ("app", FilePath("app").RemoveExtension("exe").string());
-}
-
-TEST(FilePathTest, IsDirectory) {
-  EXPECT_FALSE(FilePath("cola").IsDirectory());
-  EXPECT_TRUE(FilePath("koala" GTEST_PATH_SEP_).IsDirectory());
-#if GTEST_HAS_ALT_PATH_SEP_
-  EXPECT_TRUE(FilePath("koala/").IsDirectory());
-#endif
-}
-
-TEST(FilePathTest, IsAbsolutePath) {
-  EXPECT_FALSE(FilePath("is" GTEST_PATH_SEP_ "relative").IsAbsolutePath());
-  EXPECT_FALSE(FilePath("").IsAbsolutePath());
-#if GTEST_OS_WINDOWS
-  EXPECT_TRUE(
-      FilePath("c:\\" GTEST_PATH_SEP_ "is_not" GTEST_PATH_SEP_ "relative")
-          .IsAbsolutePath());
-  EXPECT_FALSE(FilePath("c:foo" GTEST_PATH_SEP_ "bar").IsAbsolutePath());
-  EXPECT_TRUE(
-      FilePath("c:/" GTEST_PATH_SEP_ "is_not" GTEST_PATH_SEP_ "relative")
-          .IsAbsolutePath());
-  EXPECT_TRUE(FilePath("d:/Windows").IsAbsolutePath());
-  EXPECT_TRUE(FilePath("\\\\Host\\Share").IsAbsolutePath());
-  EXPECT_TRUE(FilePath("\\\\Host\\Share\\Folder").IsAbsolutePath());
-#else
-  EXPECT_TRUE(FilePath(GTEST_PATH_SEP_ "is_not" GTEST_PATH_SEP_ "relative")
-                  .IsAbsolutePath());
-#endif  // GTEST_OS_WINDOWS
-}
-
-TEST(FilePathTest, IsRootDirectory) {
-#if GTEST_OS_WINDOWS
-  EXPECT_TRUE(FilePath("a:\\").IsRootDirectory());
-  EXPECT_TRUE(FilePath("Z:/").IsRootDirectory());
-  EXPECT_TRUE(FilePath("e://").IsRootDirectory());
-  EXPECT_FALSE(FilePath("").IsRootDirectory());
-  EXPECT_FALSE(FilePath("b:").IsRootDirectory());
-  EXPECT_FALSE(FilePath("b:a").IsRootDirectory());
-  EXPECT_FALSE(FilePath("8:/").IsRootDirectory());
-  EXPECT_FALSE(FilePath("c|/").IsRootDirectory());
-  EXPECT_TRUE(FilePath("c:/").IsRootDirectory());
-  EXPECT_FALSE(FilePath("d:/Windows").IsRootDirectory());
-
-  // This is for backward compatibility, since callers (even in this library)
-  // have assumed IsRootDirectory() implies a trailing directory separator.
-  EXPECT_FALSE(FilePath("\\\\Host\\Share").IsRootDirectory());
-
-  EXPECT_TRUE(FilePath("\\\\Host\\Share\\").IsRootDirectory());
-  EXPECT_FALSE(FilePath("\\\\Host\\Share\\.").IsRootDirectory());
-  EXPECT_FALSE(FilePath("\\\\Host\\Share\\C$\\").IsRootDirectory());
-#else
-  EXPECT_TRUE(FilePath("/").IsRootDirectory());
-  EXPECT_TRUE(FilePath("//").IsRootDirectory());
-  EXPECT_FALSE(FilePath("").IsRootDirectory());
-  EXPECT_FALSE(FilePath("\\").IsRootDirectory());
-  EXPECT_FALSE(FilePath("/x").IsRootDirectory());
-#endif
-}
-
-}  // namespace
-}  // namespace internal
-}  // namespace testing
diff --git a/3rdparty/googletest-1.13.0/googletest/test/googletest-filter-unittest.py b/3rdparty/googletest-1.13.0/googletest/test/googletest-filter-unittest.py
deleted file mode 100644
index 2c4a1b18a113dcd690a09b3fb4d65f9db183fea0..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/test/googletest-filter-unittest.py
+++ /dev/null
@@ -1,670 +0,0 @@
-#!/usr/bin/env python
-#
-# Copyright 2005 Google Inc. All Rights Reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met:
-#
-#     * Redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer.
-#     * Redistributions in binary form must reproduce the above
-# copyright notice, this list of conditions and the following disclaimer
-# in the documentation and/or other materials provided with the
-# distribution.
-#     * Neither the name of Google Inc. nor the names of its
-# contributors may be used to endorse or promote products derived from
-# this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-"""Unit test for Google Test test filters.
-
-A user can specify which test(s) in a Google Test program to run via either
-the GTEST_FILTER environment variable or the --gtest_filter flag.
-This script tests such functionality by invoking
-googletest-filter-unittest_ (a program written with Google Test) with different
-environments and command line flags.
-
-Note that test sharding may also influence which tests are filtered. Therefore,
-we test that here also.
-"""
-
-import os
-import re
-try:
-  from sets import Set as set  # For Python 2.3 compatibility
-except ImportError:
-  pass
-import sys
-from googletest.test import gtest_test_utils
-
-# Constants.
-
-# Checks if this platform can pass empty environment variables to child
-# processes.  We set an env variable to an empty string and invoke a python
-# script in a subprocess to print whether the variable is STILL in
-# os.environ.  We then use 'eval' to parse the child's output so that an
-# exception is thrown if the input is anything other than 'True' nor 'False'.
-CAN_PASS_EMPTY_ENV = False
-if sys.executable:
-  os.environ['EMPTY_VAR'] = ''
-  child = gtest_test_utils.Subprocess(
-      [sys.executable, '-c', 'import os; print(\'EMPTY_VAR\' in os.environ)'])
-  CAN_PASS_EMPTY_ENV = eval(child.output)
-
-
-# Check if this platform can unset environment variables in child processes.
-# We set an env variable to a non-empty string, unset it, and invoke
-# a python script in a subprocess to print whether the variable
-# is NO LONGER in os.environ.
-# We use 'eval' to parse the child's output so that an exception
-# is thrown if the input is neither 'True' nor 'False'.
-CAN_UNSET_ENV = False
-if sys.executable:
-  os.environ['UNSET_VAR'] = 'X'
-  del os.environ['UNSET_VAR']
-  child = gtest_test_utils.Subprocess(
-      [sys.executable, '-c', 'import os; print(\'UNSET_VAR\' not in os.environ)'
-      ])
-  CAN_UNSET_ENV = eval(child.output)
-
-
-# Checks if we should test with an empty filter. This doesn't
-# make sense on platforms that cannot pass empty env variables (Win32)
-# and on platforms that cannot unset variables (since we cannot tell
-# the difference between "" and NULL -- Borland and Solaris < 5.10)
-CAN_TEST_EMPTY_FILTER = (CAN_PASS_EMPTY_ENV and CAN_UNSET_ENV)
-
-
-# The environment variable for specifying the test filters.
-FILTER_ENV_VAR = 'GTEST_FILTER'
-
-# The environment variables for test sharding.
-TOTAL_SHARDS_ENV_VAR = 'GTEST_TOTAL_SHARDS'
-SHARD_INDEX_ENV_VAR = 'GTEST_SHARD_INDEX'
-SHARD_STATUS_FILE_ENV_VAR = 'GTEST_SHARD_STATUS_FILE'
-
-# The command line flag for specifying the test filters.
-FILTER_FLAG = 'gtest_filter'
-
-# The command line flag for including disabled tests.
-ALSO_RUN_DISABLED_TESTS_FLAG = 'gtest_also_run_disabled_tests'
-
-# Command to run the googletest-filter-unittest_ program.
-COMMAND = gtest_test_utils.GetTestExecutablePath('googletest-filter-unittest_')
-
-# Regex for determining whether parameterized tests are enabled in the binary.
-PARAM_TEST_REGEX = re.compile(r'/ParamTest')
-
-# Regex for parsing test case names from Google Test's output.
-TEST_CASE_REGEX = re.compile(r'^\[\-+\] \d+ tests? from (\w+(/\w+)?)')
-
-# Regex for parsing test names from Google Test's output.
-TEST_REGEX = re.compile(r'^\[\s*RUN\s*\].*\.(\w+(/\w+)?)')
-
-# Regex for parsing disabled banner from Google Test's output
-DISABLED_BANNER_REGEX = re.compile(r'^\[\s*DISABLED\s*\] (.*)')
-
-# The command line flag to tell Google Test to output the list of tests it
-# will run.
-LIST_TESTS_FLAG = '--gtest_list_tests'
-
-# Indicates whether Google Test supports death tests.
-SUPPORTS_DEATH_TESTS = 'HasDeathTest' in gtest_test_utils.Subprocess(
-    [COMMAND, LIST_TESTS_FLAG]).output
-
-# Full names of all tests in googletest-filter-unittests_.
-PARAM_TESTS = [
-    'SeqP/ParamTest.TestX/0',
-    'SeqP/ParamTest.TestX/1',
-    'SeqP/ParamTest.TestY/0',
-    'SeqP/ParamTest.TestY/1',
-    'SeqQ/ParamTest.TestX/0',
-    'SeqQ/ParamTest.TestX/1',
-    'SeqQ/ParamTest.TestY/0',
-    'SeqQ/ParamTest.TestY/1',
-    ]
-
-DISABLED_TESTS = [
-    'BarTest.DISABLED_TestFour',
-    'BarTest.DISABLED_TestFive',
-    'BazTest.DISABLED_TestC',
-    'DISABLED_FoobarTest.Test1',
-    'DISABLED_FoobarTest.DISABLED_Test2',
-    'DISABLED_FoobarbazTest.TestA',
-    ]
-
-if SUPPORTS_DEATH_TESTS:
-  DEATH_TESTS = [
-    'HasDeathTest.Test1',
-    'HasDeathTest.Test2',
-    ]
-else:
-  DEATH_TESTS = []
-
-# All the non-disabled tests.
-ACTIVE_TESTS = [
-    'FooTest.Abc',
-    'FooTest.Xyz',
-
-    'BarTest.TestOne',
-    'BarTest.TestTwo',
-    'BarTest.TestThree',
-
-    'BazTest.TestOne',
-    'BazTest.TestA',
-    'BazTest.TestB',
-    ] + DEATH_TESTS + PARAM_TESTS
-
-param_tests_present = None
-
-# Utilities.
-
-environ = os.environ.copy()
-
-
-def SetEnvVar(env_var, value):
-  """Sets the env variable to 'value'; unsets it when 'value' is None."""
-
-  if value is not None:
-    environ[env_var] = value
-  elif env_var in environ:
-    del environ[env_var]
-
-
-def RunAndReturnOutput(args = None):
-  """Runs the test program and returns its output."""
-
-  return gtest_test_utils.Subprocess([COMMAND] + (args or []),
-                                     env=environ).output
-
-
-def RunAndExtractTestList(args = None):
-  """Runs the test program and returns its exit code and a list of tests run."""
-
-  p = gtest_test_utils.Subprocess([COMMAND] + (args or []), env=environ)
-  tests_run = []
-  test_case = ''
-  test = ''
-  for line in p.output.split('\n'):
-    match = TEST_CASE_REGEX.match(line)
-    if match is not None:
-      test_case = match.group(1)
-    else:
-      match = TEST_REGEX.match(line)
-      if match is not None:
-        test = match.group(1)
-        tests_run.append(test_case + '.' + test)
-  return (tests_run, p.exit_code)
-
-
-def RunAndExtractDisabledBannerList(args=None):
-  """Runs the test program and returns tests that printed a disabled banner."""
-  p = gtest_test_utils.Subprocess([COMMAND] + (args or []), env=environ)
-  banners_printed = []
-  for line in p.output.split('\n'):
-    match = DISABLED_BANNER_REGEX.match(line)
-    if match is not None:
-      banners_printed.append(match.group(1))
-  return banners_printed
-
-
-def InvokeWithModifiedEnv(extra_env, function, *args, **kwargs):
-  """Runs the given function and arguments in a modified environment."""
-  try:
-    original_env = environ.copy()
-    environ.update(extra_env)
-    return function(*args, **kwargs)
-  finally:
-    environ.clear()
-    environ.update(original_env)
-
-
-def RunWithSharding(total_shards, shard_index, command):
-  """Runs a test program shard and returns exit code and a list of tests run."""
-
-  extra_env = {SHARD_INDEX_ENV_VAR: str(shard_index),
-               TOTAL_SHARDS_ENV_VAR: str(total_shards)}
-  return InvokeWithModifiedEnv(extra_env, RunAndExtractTestList, command)
-
-# The unit test.
-
-
-class GTestFilterUnitTest(gtest_test_utils.TestCase):
-  """Tests the env variable or the command line flag to filter tests."""
-
-  # Utilities.
-
-  def AssertSetEqual(self, lhs, rhs):
-    """Asserts that two sets are equal."""
-
-    for elem in lhs:
-      self.assert_(elem in rhs, '%s in %s' % (elem, rhs))
-
-    for elem in rhs:
-      self.assert_(elem in lhs, '%s in %s' % (elem, lhs))
-
-  def AssertPartitionIsValid(self, set_var, list_of_sets):
-    """Asserts that list_of_sets is a valid partition of set_var."""
-
-    full_partition = []
-    for slice_var in list_of_sets:
-      full_partition.extend(slice_var)
-    self.assertEqual(len(set_var), len(full_partition))
-    self.assertEqual(set(set_var), set(full_partition))
-
-  def AdjustForParameterizedTests(self, tests_to_run):
-    """Adjust tests_to_run in case value parameterized tests are disabled."""
-
-    global param_tests_present
-    if not param_tests_present:
-      return list(set(tests_to_run) - set(PARAM_TESTS))
-    else:
-      return tests_to_run
-
-  def RunAndVerify(self, gtest_filter, tests_to_run):
-    """Checks that the binary runs correct set of tests for a given filter."""
-
-    tests_to_run = self.AdjustForParameterizedTests(tests_to_run)
-
-    # First, tests using the environment variable.
-
-    # Windows removes empty variables from the environment when passing it
-    # to a new process.  This means it is impossible to pass an empty filter
-    # into a process using the environment variable.  However, we can still
-    # test the case when the variable is not supplied (i.e., gtest_filter is
-    # None).
-    # pylint: disable-msg=C6403
-    if CAN_TEST_EMPTY_FILTER or gtest_filter != '':
-      SetEnvVar(FILTER_ENV_VAR, gtest_filter)
-      tests_run = RunAndExtractTestList()[0]
-      SetEnvVar(FILTER_ENV_VAR, None)
-      self.AssertSetEqual(tests_run, tests_to_run)
-    # pylint: enable-msg=C6403
-
-    # Next, tests using the command line flag.
-
-    if gtest_filter is None:
-      args = []
-    else:
-      args = ['--%s=%s' % (FILTER_FLAG, gtest_filter)]
-
-    tests_run = RunAndExtractTestList(args)[0]
-    self.AssertSetEqual(tests_run, tests_to_run)
-
-  def RunAndVerifyWithSharding(self, gtest_filter, total_shards, tests_to_run,
-                               args=None, check_exit_0=False):
-    """Checks that binary runs correct tests for the given filter and shard.
-
-    Runs all shards of googletest-filter-unittest_ with the given filter, and
-    verifies that the right set of tests were run. The union of tests run
-    on each shard should be identical to tests_to_run, without duplicates.
-    If check_exit_0, .
-
-    Args:
-      gtest_filter: A filter to apply to the tests.
-      total_shards: A total number of shards to split test run into.
-      tests_to_run: A set of tests expected to run.
-      args   :      Arguments to pass to the to the test binary.
-      check_exit_0: When set to a true value, make sure that all shards
-                    return 0.
-    """
-
-    tests_to_run = self.AdjustForParameterizedTests(tests_to_run)
-
-    # Windows removes empty variables from the environment when passing it
-    # to a new process.  This means it is impossible to pass an empty filter
-    # into a process using the environment variable.  However, we can still
-    # test the case when the variable is not supplied (i.e., gtest_filter is
-    # None).
-    # pylint: disable-msg=C6403
-    if CAN_TEST_EMPTY_FILTER or gtest_filter != '':
-      SetEnvVar(FILTER_ENV_VAR, gtest_filter)
-      partition = []
-      for i in range(0, total_shards):
-        (tests_run, exit_code) = RunWithSharding(total_shards, i, args)
-        if check_exit_0:
-          self.assertEqual(0, exit_code)
-        partition.append(tests_run)
-
-      self.AssertPartitionIsValid(tests_to_run, partition)
-      SetEnvVar(FILTER_ENV_VAR, None)
-    # pylint: enable-msg=C6403
-
-  def RunAndVerifyAllowingDisabled(self, gtest_filter, tests_to_run):
-    """Checks that the binary runs correct set of tests for the given filter.
-
-    Runs googletest-filter-unittest_ with the given filter, and enables
-    disabled tests. Verifies that the right set of tests were run.
-
-    Args:
-      gtest_filter: A filter to apply to the tests.
-      tests_to_run: A set of tests expected to run.
-    """
-
-    tests_to_run = self.AdjustForParameterizedTests(tests_to_run)
-
-    # Construct the command line.
-    args = ['--%s' % ALSO_RUN_DISABLED_TESTS_FLAG]
-    if gtest_filter is not None:
-      args.append('--%s=%s' % (FILTER_FLAG, gtest_filter))
-
-    tests_run = RunAndExtractTestList(args)[0]
-    self.AssertSetEqual(tests_run, tests_to_run)
-
-  def setUp(self):
-    """Sets up test case.
-
-    Determines whether value-parameterized tests are enabled in the binary and
-    sets the flags accordingly.
-    """
-
-    global param_tests_present
-    if param_tests_present is None:
-      param_tests_present = PARAM_TEST_REGEX.search(
-          RunAndReturnOutput()) is not None
-
-  def testDefaultBehavior(self):
-    """Tests the behavior of not specifying the filter."""
-
-    self.RunAndVerify(None, ACTIVE_TESTS)
-
-  def testDefaultBehaviorWithShards(self):
-    """Tests the behavior without the filter, with sharding enabled."""
-
-    self.RunAndVerifyWithSharding(None, 1, ACTIVE_TESTS)
-    self.RunAndVerifyWithSharding(None, 2, ACTIVE_TESTS)
-    self.RunAndVerifyWithSharding(None, len(ACTIVE_TESTS) - 1, ACTIVE_TESTS)
-    self.RunAndVerifyWithSharding(None, len(ACTIVE_TESTS), ACTIVE_TESTS)
-    self.RunAndVerifyWithSharding(None, len(ACTIVE_TESTS) + 1, ACTIVE_TESTS)
-
-  def testEmptyFilter(self):
-    """Tests an empty filter."""
-
-    self.RunAndVerify('', [])
-    self.RunAndVerifyWithSharding('', 1, [])
-    self.RunAndVerifyWithSharding('', 2, [])
-
-  def testBadFilter(self):
-    """Tests a filter that matches nothing."""
-
-    self.RunAndVerify('BadFilter', [])
-    self.RunAndVerifyAllowingDisabled('BadFilter', [])
-
-  def testFullName(self):
-    """Tests filtering by full name."""
-
-    self.RunAndVerify('FooTest.Xyz', ['FooTest.Xyz'])
-    self.RunAndVerifyAllowingDisabled('FooTest.Xyz', ['FooTest.Xyz'])
-    self.RunAndVerifyWithSharding('FooTest.Xyz', 5, ['FooTest.Xyz'])
-
-  def testUniversalFilters(self):
-    """Tests filters that match everything."""
-
-    self.RunAndVerify('*', ACTIVE_TESTS)
-    self.RunAndVerify('*.*', ACTIVE_TESTS)
-    self.RunAndVerifyWithSharding('*.*', len(ACTIVE_TESTS) - 3, ACTIVE_TESTS)
-    self.RunAndVerifyAllowingDisabled('*', ACTIVE_TESTS + DISABLED_TESTS)
-    self.RunAndVerifyAllowingDisabled('*.*', ACTIVE_TESTS + DISABLED_TESTS)
-
-  def testFilterByTestCase(self):
-    """Tests filtering by test case name."""
-
-    self.RunAndVerify('FooTest.*', ['FooTest.Abc', 'FooTest.Xyz'])
-
-    BAZ_TESTS = ['BazTest.TestOne', 'BazTest.TestA', 'BazTest.TestB']
-    self.RunAndVerify('BazTest.*', BAZ_TESTS)
-    self.RunAndVerifyAllowingDisabled('BazTest.*',
-                                      BAZ_TESTS + ['BazTest.DISABLED_TestC'])
-
-  def testFilterByTest(self):
-    """Tests filtering by test name."""
-
-    self.RunAndVerify('*.TestOne', ['BarTest.TestOne', 'BazTest.TestOne'])
-
-  def testFilterDisabledTests(self):
-    """Select only the disabled tests to run."""
-
-    self.RunAndVerify('DISABLED_FoobarTest.Test1', [])
-    self.RunAndVerifyAllowingDisabled('DISABLED_FoobarTest.Test1',
-                                      ['DISABLED_FoobarTest.Test1'])
-
-    self.RunAndVerify('*DISABLED_*', [])
-    self.RunAndVerifyAllowingDisabled('*DISABLED_*', DISABLED_TESTS)
-
-    self.RunAndVerify('*.DISABLED_*', [])
-    self.RunAndVerifyAllowingDisabled('*.DISABLED_*', [
-        'BarTest.DISABLED_TestFour',
-        'BarTest.DISABLED_TestFive',
-        'BazTest.DISABLED_TestC',
-        'DISABLED_FoobarTest.DISABLED_Test2',
-        ])
-
-    self.RunAndVerify('DISABLED_*', [])
-    self.RunAndVerifyAllowingDisabled('DISABLED_*', [
-        'DISABLED_FoobarTest.Test1',
-        'DISABLED_FoobarTest.DISABLED_Test2',
-        'DISABLED_FoobarbazTest.TestA',
-        ])
-
-  def testWildcardInTestCaseName(self):
-    """Tests using wildcard in the test case name."""
-
-    self.RunAndVerify('*a*.*', [
-        'BarTest.TestOne',
-        'BarTest.TestTwo',
-        'BarTest.TestThree',
-
-        'BazTest.TestOne',
-        'BazTest.TestA',
-        'BazTest.TestB', ] + DEATH_TESTS + PARAM_TESTS)
-
-  def testWildcardInTestName(self):
-    """Tests using wildcard in the test name."""
-
-    self.RunAndVerify('*.*A*', ['FooTest.Abc', 'BazTest.TestA'])
-
-  def testFilterWithoutDot(self):
-    """Tests a filter that has no '.' in it."""
-
-    self.RunAndVerify('*z*', [
-        'FooTest.Xyz',
-
-        'BazTest.TestOne',
-        'BazTest.TestA',
-        'BazTest.TestB',
-        ])
-
-  def testTwoPatterns(self):
-    """Tests filters that consist of two patterns."""
-
-    self.RunAndVerify('Foo*.*:*A*', [
-        'FooTest.Abc',
-        'FooTest.Xyz',
-
-        'BazTest.TestA',
-        ])
-
-    # An empty pattern + a non-empty one
-    self.RunAndVerify(':*A*', ['FooTest.Abc', 'BazTest.TestA'])
-
-  def testThreePatterns(self):
-    """Tests filters that consist of three patterns."""
-
-    self.RunAndVerify('*oo*:*A*:*One', [
-        'FooTest.Abc',
-        'FooTest.Xyz',
-
-        'BarTest.TestOne',
-
-        'BazTest.TestOne',
-        'BazTest.TestA',
-        ])
-
-    # The 2nd pattern is empty.
-    self.RunAndVerify('*oo*::*One', [
-        'FooTest.Abc',
-        'FooTest.Xyz',
-
-        'BarTest.TestOne',
-
-        'BazTest.TestOne',
-        ])
-
-    # The last 2 patterns are empty.
-    self.RunAndVerify('*oo*::', [
-        'FooTest.Abc',
-        'FooTest.Xyz',
-        ])
-
-  def testNegativeFilters(self):
-    self.RunAndVerify('*-BazTest.TestOne', [
-        'FooTest.Abc',
-        'FooTest.Xyz',
-
-        'BarTest.TestOne',
-        'BarTest.TestTwo',
-        'BarTest.TestThree',
-
-        'BazTest.TestA',
-        'BazTest.TestB',
-        ] + DEATH_TESTS + PARAM_TESTS)
-
-    self.RunAndVerify('*-FooTest.Abc:BazTest.*', [
-        'FooTest.Xyz',
-
-        'BarTest.TestOne',
-        'BarTest.TestTwo',
-        'BarTest.TestThree',
-        ] + DEATH_TESTS + PARAM_TESTS)
-
-    self.RunAndVerify('BarTest.*-BarTest.TestOne', [
-        'BarTest.TestTwo',
-        'BarTest.TestThree',
-        ])
-
-    # Tests without leading '*'.
-    self.RunAndVerify('-FooTest.Abc:FooTest.Xyz:BazTest.*', [
-        'BarTest.TestOne',
-        'BarTest.TestTwo',
-        'BarTest.TestThree',
-        ] + DEATH_TESTS + PARAM_TESTS)
-
-    # Value parameterized tests.
-    self.RunAndVerify('*/*', PARAM_TESTS)
-
-    # Value parameterized tests filtering by the sequence name.
-    self.RunAndVerify('SeqP/*', [
-        'SeqP/ParamTest.TestX/0',
-        'SeqP/ParamTest.TestX/1',
-        'SeqP/ParamTest.TestY/0',
-        'SeqP/ParamTest.TestY/1',
-        ])
-
-    # Value parameterized tests filtering by the test name.
-    self.RunAndVerify('*/0', [
-        'SeqP/ParamTest.TestX/0',
-        'SeqP/ParamTest.TestY/0',
-        'SeqQ/ParamTest.TestX/0',
-        'SeqQ/ParamTest.TestY/0',
-        ])
-
-  def testFlagOverridesEnvVar(self):
-    """Tests that the filter flag overrides the filtering env. variable."""
-
-    SetEnvVar(FILTER_ENV_VAR, 'Foo*')
-    args = ['--%s=%s' % (FILTER_FLAG, '*One')]
-    tests_run = RunAndExtractTestList(args)[0]
-    SetEnvVar(FILTER_ENV_VAR, None)
-
-    self.AssertSetEqual(tests_run, ['BarTest.TestOne', 'BazTest.TestOne'])
-
-  def testShardStatusFileIsCreated(self):
-    """Tests that the shard file is created if specified in the environment."""
-
-    shard_status_file = os.path.join(gtest_test_utils.GetTempDir(),
-                                     'shard_status_file')
-    self.assert_(not os.path.exists(shard_status_file))
-
-    extra_env = {SHARD_STATUS_FILE_ENV_VAR: shard_status_file}
-    try:
-      InvokeWithModifiedEnv(extra_env, RunAndReturnOutput)
-    finally:
-      self.assert_(os.path.exists(shard_status_file))
-      os.remove(shard_status_file)
-
-  def testShardStatusFileIsCreatedWithListTests(self):
-    """Tests that the shard file is created with the "list_tests" flag."""
-
-    shard_status_file = os.path.join(gtest_test_utils.GetTempDir(),
-                                     'shard_status_file2')
-    self.assert_(not os.path.exists(shard_status_file))
-
-    extra_env = {SHARD_STATUS_FILE_ENV_VAR: shard_status_file}
-    try:
-      output = InvokeWithModifiedEnv(extra_env,
-                                     RunAndReturnOutput,
-                                     [LIST_TESTS_FLAG])
-    finally:
-      # This assertion ensures that Google Test enumerated the tests as
-      # opposed to running them.
-      self.assert_('[==========]' not in output,
-                   'Unexpected output during test enumeration.\n'
-                   'Please ensure that LIST_TESTS_FLAG is assigned the\n'
-                   'correct flag value for listing Google Test tests.')
-
-      self.assert_(os.path.exists(shard_status_file))
-      os.remove(shard_status_file)
-
-  def testDisabledBanner(self):
-    """Tests that the disabled banner prints only tests that match filter."""
-    make_filter = lambda s: ['--%s=%s' % (FILTER_FLAG, s)]
-
-    banners = RunAndExtractDisabledBannerList(make_filter('*'))
-    self.AssertSetEqual(banners, [
-        'BarTest.DISABLED_TestFour', 'BarTest.DISABLED_TestFive',
-        'BazTest.DISABLED_TestC'
-    ])
-
-    banners = RunAndExtractDisabledBannerList(make_filter('Bar*'))
-    self.AssertSetEqual(
-        banners, ['BarTest.DISABLED_TestFour', 'BarTest.DISABLED_TestFive'])
-
-    banners = RunAndExtractDisabledBannerList(make_filter('*-Bar*'))
-    self.AssertSetEqual(banners, ['BazTest.DISABLED_TestC'])
-
-  if SUPPORTS_DEATH_TESTS:
-    def testShardingWorksWithDeathTests(self):
-      """Tests integration with death tests and sharding."""
-
-      gtest_filter = 'HasDeathTest.*:SeqP/*'
-      expected_tests = [
-          'HasDeathTest.Test1',
-          'HasDeathTest.Test2',
-
-          'SeqP/ParamTest.TestX/0',
-          'SeqP/ParamTest.TestX/1',
-          'SeqP/ParamTest.TestY/0',
-          'SeqP/ParamTest.TestY/1',
-          ]
-
-      for flag in ['--gtest_death_test_style=threadsafe',
-                   '--gtest_death_test_style=fast']:
-        self.RunAndVerifyWithSharding(gtest_filter, 3, expected_tests,
-                                      check_exit_0=True, args=[flag])
-        self.RunAndVerifyWithSharding(gtest_filter, 5, expected_tests,
-                                      check_exit_0=True, args=[flag])
-
-if __name__ == '__main__':
-  gtest_test_utils.Main()
diff --git a/3rdparty/googletest-1.13.0/googletest/test/googletest-filter-unittest_.cc b/3rdparty/googletest-1.13.0/googletest/test/googletest-filter-unittest_.cc
deleted file mode 100644
index bc7aa59408371334fdbb7593dde95f2802cc1955..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/test/googletest-filter-unittest_.cc
+++ /dev/null
@@ -1,106 +0,0 @@
-// Copyright 2005, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// Unit test for Google Test test filters.
-//
-// A user can specify which test(s) in a Google Test program to run via
-// either the GTEST_FILTER environment variable or the --gtest_filter
-// flag.  This is used for testing such functionality.
-//
-// The program will be invoked from a Python unit test.  Don't run it
-// directly.
-
-#include "gtest/gtest.h"
-
-namespace {
-
-// Test case FooTest.
-
-class FooTest : public testing::Test {};
-
-TEST_F(FooTest, Abc) {}
-
-TEST_F(FooTest, Xyz) { FAIL() << "Expected failure."; }
-
-// Test case BarTest.
-
-TEST(BarTest, TestOne) {}
-
-TEST(BarTest, TestTwo) {}
-
-TEST(BarTest, TestThree) {}
-
-TEST(BarTest, DISABLED_TestFour) { FAIL() << "Expected failure."; }
-
-TEST(BarTest, DISABLED_TestFive) { FAIL() << "Expected failure."; }
-
-// Test case BazTest.
-
-TEST(BazTest, TestOne) { FAIL() << "Expected failure."; }
-
-TEST(BazTest, TestA) {}
-
-TEST(BazTest, TestB) {}
-
-TEST(BazTest, DISABLED_TestC) { FAIL() << "Expected failure."; }
-
-// Test case HasDeathTest
-
-TEST(HasDeathTest, Test1) { EXPECT_DEATH_IF_SUPPORTED(exit(1), ".*"); }
-
-// We need at least two death tests to make sure that the all death tests
-// aren't on the first shard.
-TEST(HasDeathTest, Test2) { EXPECT_DEATH_IF_SUPPORTED(exit(1), ".*"); }
-
-// Test case FoobarTest
-
-TEST(DISABLED_FoobarTest, Test1) { FAIL() << "Expected failure."; }
-
-TEST(DISABLED_FoobarTest, DISABLED_Test2) { FAIL() << "Expected failure."; }
-
-// Test case FoobarbazTest
-
-TEST(DISABLED_FoobarbazTest, TestA) { FAIL() << "Expected failure."; }
-
-class ParamTest : public testing::TestWithParam<int> {};
-
-TEST_P(ParamTest, TestX) {}
-
-TEST_P(ParamTest, TestY) {}
-
-INSTANTIATE_TEST_SUITE_P(SeqP, ParamTest, testing::Values(1, 2));
-INSTANTIATE_TEST_SUITE_P(SeqQ, ParamTest, testing::Values(5, 6));
-
-}  // namespace
-
-int main(int argc, char **argv) {
-  ::testing::InitGoogleTest(&argc, argv);
-
-  return RUN_ALL_TESTS();
-}
diff --git a/3rdparty/googletest-1.13.0/googletest/test/googletest-global-environment-unittest.py b/3rdparty/googletest-1.13.0/googletest/test/googletest-global-environment-unittest.py
deleted file mode 100644
index 265793442f97758b28fb2732cced44575a185fd0..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/test/googletest-global-environment-unittest.py
+++ /dev/null
@@ -1,130 +0,0 @@
-# Copyright 2021 Google Inc. All Rights Reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met:
-#
-#     * Redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer.
-#     * Redistributions in binary form must reproduce the above
-# copyright notice, this list of conditions and the following disclaimer
-# in the documentation and/or other materials provided with the
-# distribution.
-#     * Neither the name of Google Inc. nor the names of its
-# contributors may be used to endorse or promote products derived from
-# this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-"""Unit test for Google Test's global test environment behavior.
-
-A user can specify a global test environment via
-testing::AddGlobalTestEnvironment. Failures in the global environment should
-result in all unit tests being skipped.
-
-This script tests such functionality by invoking
-googletest-global-environment-unittest_ (a program written with Google Test).
-"""
-
-import re
-from googletest.test import gtest_test_utils
-
-
-def RunAndReturnOutput(args=None):
-  """Runs the test program and returns its output."""
-
-  return gtest_test_utils.Subprocess([
-      gtest_test_utils.GetTestExecutablePath(
-          'googletest-global-environment-unittest_')
-  ] + (args or [])).output
-
-
-class GTestGlobalEnvironmentUnitTest(gtest_test_utils.TestCase):
-  """Tests global test environment failures."""
-
-  def testEnvironmentSetUpFails(self):
-    """Tests the behavior of not specifying the fail_fast."""
-
-    # Run the test.
-    txt = RunAndReturnOutput()
-
-    # We should see the text of the global environment setup error.
-    self.assertIn('Canned environment setup error', txt)
-
-    # Our test should have been skipped due to the error, and not treated as a
-    # pass.
-    self.assertIn('[  SKIPPED ] 1 test', txt)
-    self.assertIn('[  PASSED  ] 0 tests', txt)
-
-    # The test case shouldn't have been run.
-    self.assertNotIn('Unexpected call', txt)
-
-  def testEnvironmentSetUpAndTornDownForEachRepeat(self):
-    """Tests the behavior of test environments and gtest_repeat."""
-
-    # When --gtest_recreate_environments_when_repeating is true, the global test
-    # environment should be set up and torn down for each iteration.
-    txt = RunAndReturnOutput([
-        '--gtest_repeat=2',
-        '--gtest_recreate_environments_when_repeating=true',
-    ])
-
-    expected_pattern = ('(.|\n)*'
-                        r'Repeating all tests \(iteration 1\)'
-                        '(.|\n)*'
-                        'Global test environment set-up.'
-                        '(.|\n)*'
-                        'SomeTest.DoesFoo'
-                        '(.|\n)*'
-                        'Global test environment tear-down'
-                        '(.|\n)*'
-                        r'Repeating all tests \(iteration 2\)'
-                        '(.|\n)*'
-                        'Global test environment set-up.'
-                        '(.|\n)*'
-                        'SomeTest.DoesFoo'
-                        '(.|\n)*'
-                        'Global test environment tear-down'
-                        '(.|\n)*')
-    self.assertRegex(txt, expected_pattern)
-
-  def testEnvironmentSetUpAndTornDownOnce(self):
-    """Tests environment and --gtest_recreate_environments_when_repeating."""
-
-    # By default the environment should only be set up and torn down once, at
-    # the start and end of the test respectively.
-    txt = RunAndReturnOutput([
-        '--gtest_repeat=2',
-    ])
-
-    expected_pattern = ('(.|\n)*'
-                        r'Repeating all tests \(iteration 1\)'
-                        '(.|\n)*'
-                        'Global test environment set-up.'
-                        '(.|\n)*'
-                        'SomeTest.DoesFoo'
-                        '(.|\n)*'
-                        r'Repeating all tests \(iteration 2\)'
-                        '(.|\n)*'
-                        'SomeTest.DoesFoo'
-                        '(.|\n)*'
-                        'Global test environment tear-down'
-                        '(.|\n)*')
-    self.assertRegex(txt, expected_pattern)
-
-    self.assertEqual(len(re.findall('Global test environment set-up', txt)), 1)
-    self.assertEqual(
-        len(re.findall('Global test environment tear-down', txt)), 1)
-
-
-if __name__ == '__main__':
-  gtest_test_utils.Main()
diff --git a/3rdparty/googletest-1.13.0/googletest/test/googletest-global-environment-unittest_.cc b/3rdparty/googletest-1.13.0/googletest/test/googletest-global-environment-unittest_.cc
deleted file mode 100644
index f401b2fac25d17dd7fe5ab788b1174f6b9293c03..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/test/googletest-global-environment-unittest_.cc
+++ /dev/null
@@ -1,58 +0,0 @@
-// Copyright 2005, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// Unit test for Google Test global test environments.
-//
-// The program will be invoked from a Python unit test.  Don't run it
-// directly.
-
-#include "gtest/gtest.h"
-
-namespace {
-
-// An environment that always fails in its SetUp method.
-class FailingEnvironment final : public ::testing::Environment {
- public:
-  void SetUp() override { FAIL() << "Canned environment setup error"; }
-};
-
-// Register the environment.
-auto* const g_environment_ =
-    ::testing::AddGlobalTestEnvironment(new FailingEnvironment);
-
-// A test that doesn't actually run.
-TEST(SomeTest, DoesFoo) { FAIL() << "Unexpected call"; }
-
-}  // namespace
-
-int main(int argc, char** argv) {
-  ::testing::InitGoogleTest(&argc, argv);
-
-  return RUN_ALL_TESTS();
-}
diff --git a/3rdparty/googletest-1.13.0/googletest/test/googletest-json-outfiles-test.py b/3rdparty/googletest-1.13.0/googletest/test/googletest-json-outfiles-test.py
deleted file mode 100644
index 179283b85334f04a2cc99c1e6c082a6702627cac..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/test/googletest-json-outfiles-test.py
+++ /dev/null
@@ -1,195 +0,0 @@
-#!/usr/bin/env python
-# Copyright 2018, Google Inc.
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met:
-#
-#     * Redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer.
-#     * Redistributions in binary form must reproduce the above
-# copyright notice, this list of conditions and the following disclaimer
-# in the documentation and/or other materials provided with the
-# distribution.
-#     * Neither the name of Google Inc. nor the names of its
-# contributors may be used to endorse or promote products derived from
-# this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-"""Unit test for the gtest_json_output module."""
-
-import json
-import os
-from googletest.test import gtest_json_test_utils
-from googletest.test import gtest_test_utils
-
-GTEST_OUTPUT_SUBDIR = 'json_outfiles'
-GTEST_OUTPUT_1_TEST = 'gtest_xml_outfile1_test_'
-GTEST_OUTPUT_2_TEST = 'gtest_xml_outfile2_test_'
-
-EXPECTED_1 = {
-    u'tests':
-        1,
-    u'failures':
-        0,
-    u'disabled':
-        0,
-    u'errors':
-        0,
-    u'time':
-        u'*',
-    u'timestamp':
-        u'*',
-    u'name':
-        u'AllTests',
-    u'testsuites': [{
-        u'name':
-            u'PropertyOne',
-        u'tests':
-            1,
-        u'failures':
-            0,
-        u'disabled':
-            0,
-        u'errors':
-            0,
-        u'time':
-            u'*',
-        u'timestamp':
-            u'*',
-        u'testsuite': [{
-            u'name': u'TestSomeProperties',
-            u'file': u'gtest_xml_outfile1_test_.cc',
-            u'line': 41,
-            u'status': u'RUN',
-            u'result': u'COMPLETED',
-            u'time': u'*',
-            u'timestamp': u'*',
-            u'classname': u'PropertyOne',
-            u'SetUpProp': u'1',
-            u'TestSomeProperty': u'1',
-            u'TearDownProp': u'1',
-        }],
-    }],
-}
-
-EXPECTED_2 = {
-    u'tests':
-        1,
-    u'failures':
-        0,
-    u'disabled':
-        0,
-    u'errors':
-        0,
-    u'time':
-        u'*',
-    u'timestamp':
-        u'*',
-    u'name':
-        u'AllTests',
-    u'testsuites': [{
-        u'name':
-            u'PropertyTwo',
-        u'tests':
-            1,
-        u'failures':
-            0,
-        u'disabled':
-            0,
-        u'errors':
-            0,
-        u'time':
-            u'*',
-        u'timestamp':
-            u'*',
-        u'testsuite': [{
-            u'name': u'TestSomeProperties',
-            u'file': u'gtest_xml_outfile2_test_.cc',
-            u'line': 41,
-            u'status': u'RUN',
-            u'result': u'COMPLETED',
-            u'timestamp': u'*',
-            u'time': u'*',
-            u'classname': u'PropertyTwo',
-            u'SetUpProp': u'2',
-            u'TestSomeProperty': u'2',
-            u'TearDownProp': u'2',
-        }],
-    }],
-}
-
-
-class GTestJsonOutFilesTest(gtest_test_utils.TestCase):
-  """Unit test for Google Test's JSON output functionality."""
-
-  def setUp(self):
-    # We want the trailing '/' that the last "" provides in os.path.join, for
-    # telling Google Test to create an output directory instead of a single file
-    # for xml output.
-    self.output_dir_ = os.path.join(gtest_test_utils.GetTempDir(),
-                                    GTEST_OUTPUT_SUBDIR, '')
-    self.DeleteFilesAndDir()
-
-  def tearDown(self):
-    self.DeleteFilesAndDir()
-
-  def DeleteFilesAndDir(self):
-    try:
-      os.remove(os.path.join(self.output_dir_, GTEST_OUTPUT_1_TEST + '.json'))
-    except os.error:
-      pass
-    try:
-      os.remove(os.path.join(self.output_dir_, GTEST_OUTPUT_2_TEST + '.json'))
-    except os.error:
-      pass
-    try:
-      os.rmdir(self.output_dir_)
-    except os.error:
-      pass
-
-  def testOutfile1(self):
-    self._TestOutFile(GTEST_OUTPUT_1_TEST, EXPECTED_1)
-
-  def testOutfile2(self):
-    self._TestOutFile(GTEST_OUTPUT_2_TEST, EXPECTED_2)
-
-  def _TestOutFile(self, test_name, expected):
-    gtest_prog_path = gtest_test_utils.GetTestExecutablePath(test_name)
-    command = [gtest_prog_path, '--gtest_output=json:%s' % self.output_dir_]
-    p = gtest_test_utils.Subprocess(command,
-                                    working_dir=gtest_test_utils.GetTempDir())
-    self.assert_(p.exited)
-    self.assertEquals(0, p.exit_code)
-
-    output_file_name1 = test_name + '.json'
-    output_file1 = os.path.join(self.output_dir_, output_file_name1)
-    output_file_name2 = 'lt-' + output_file_name1
-    output_file2 = os.path.join(self.output_dir_, output_file_name2)
-    self.assert_(os.path.isfile(output_file1) or os.path.isfile(output_file2),
-                 output_file1)
-
-    if os.path.isfile(output_file1):
-      with open(output_file1) as f:
-        actual = json.load(f)
-    else:
-      with open(output_file2) as f:
-        actual = json.load(f)
-    self.assertEqual(expected, gtest_json_test_utils.normalize(actual))
-
-
-if __name__ == '__main__':
-  os.environ['GTEST_STACK_TRACE_DEPTH'] = '0'
-  gtest_test_utils.Main()
diff --git a/3rdparty/googletest-1.13.0/googletest/test/googletest-json-output-unittest.py b/3rdparty/googletest-1.13.0/googletest/test/googletest-json-output-unittest.py
deleted file mode 100644
index e0fbe465092195c5e497ea4c56b1a1289faa5970..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/test/googletest-json-output-unittest.py
+++ /dev/null
@@ -1,912 +0,0 @@
-#!/usr/bin/env python
-# Copyright 2018, Google Inc.
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met:
-#
-#     * Redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer.
-#     * Redistributions in binary form must reproduce the above
-# copyright notice, this list of conditions and the following disclaimer
-# in the documentation and/or other materials provided with the
-# distribution.
-#     * Neither the name of Google Inc. nor the names of its
-# contributors may be used to endorse or promote products derived from
-# this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-"""Unit test for the gtest_json_output module."""
-
-import datetime
-import errno
-import json
-import os
-import re
-import sys
-
-from googletest.test import gtest_json_test_utils
-from googletest.test import gtest_test_utils
-
-GTEST_FILTER_FLAG = '--gtest_filter'
-GTEST_LIST_TESTS_FLAG = '--gtest_list_tests'
-GTEST_OUTPUT_FLAG = '--gtest_output'
-GTEST_DEFAULT_OUTPUT_FILE = 'test_detail.json'
-GTEST_PROGRAM_NAME = 'gtest_xml_output_unittest_'
-
-# The flag indicating stacktraces are not supported
-NO_STACKTRACE_SUPPORT_FLAG = '--no_stacktrace_support'
-
-SUPPORTS_STACK_TRACES = NO_STACKTRACE_SUPPORT_FLAG not in sys.argv
-
-if SUPPORTS_STACK_TRACES:
-  STACK_TRACE_TEMPLATE = '\nStack trace:\n*'
-else:
-  STACK_TRACE_TEMPLATE = ''
-
-EXPECTED_NON_EMPTY = {
-    u'tests':
-        26,
-    u'failures':
-        5,
-    u'disabled':
-        2,
-    u'errors':
-        0,
-    u'timestamp':
-        u'*',
-    u'time':
-        u'*',
-    u'ad_hoc_property':
-        u'42',
-    u'name':
-        u'AllTests',
-    u'testsuites': [{
-        u'name':
-            u'SuccessfulTest',
-        u'tests':
-            1,
-        u'failures':
-            0,
-        u'disabled':
-            0,
-        u'errors':
-            0,
-        u'time':
-            u'*',
-        u'timestamp':
-            u'*',
-        u'testsuite': [{
-            u'name': u'Succeeds',
-            u'file': u'gtest_xml_output_unittest_.cc',
-            u'line': 51,
-            u'status': u'RUN',
-            u'result': u'COMPLETED',
-            u'time': u'*',
-            u'timestamp': u'*',
-            u'classname': u'SuccessfulTest'
-        }]
-    }, {
-        u'name':
-            u'FailedTest',
-        u'tests':
-            1,
-        u'failures':
-            1,
-        u'disabled':
-            0,
-        u'errors':
-            0,
-        u'time':
-            u'*',
-        u'timestamp':
-            u'*',
-        u'testsuite': [{
-            u'name':
-                u'Fails',
-            u'file':
-                u'gtest_xml_output_unittest_.cc',
-            u'line':
-                59,
-            u'status':
-                u'RUN',
-            u'result':
-                u'COMPLETED',
-            u'time':
-                u'*',
-            u'timestamp':
-                u'*',
-            u'classname':
-                u'FailedTest',
-            u'failures': [{
-                u'failure': u'gtest_xml_output_unittest_.cc:*\n'
-                            u'Expected equality of these values:\n'
-                            u'  1\n  2' + STACK_TRACE_TEMPLATE,
-                u'type': u''
-            }]
-        }]
-    }, {
-        u'name':
-            u'DisabledTest',
-        u'tests':
-            1,
-        u'failures':
-            0,
-        u'disabled':
-            1,
-        u'errors':
-            0,
-        u'time':
-            u'*',
-        u'timestamp':
-            u'*',
-        u'testsuite': [{
-            u'name': u'DISABLED_test_not_run',
-            u'file': u'gtest_xml_output_unittest_.cc',
-            u'line': 66,
-            u'status': u'NOTRUN',
-            u'result': u'SUPPRESSED',
-            u'time': u'*',
-            u'timestamp': u'*',
-            u'classname': u'DisabledTest'
-        }]
-    }, {
-        u'name':
-            u'SkippedTest',
-        u'tests':
-            3,
-        u'failures':
-            1,
-        u'disabled':
-            0,
-        u'errors':
-            0,
-        u'time':
-            u'*',
-        u'timestamp':
-            u'*',
-        u'testsuite': [{
-            u'name': u'Skipped',
-            u'file': 'gtest_xml_output_unittest_.cc',
-            u'line': 73,
-            u'status': u'RUN',
-            u'result': u'SKIPPED',
-            u'time': u'*',
-            u'timestamp': u'*',
-            u'classname': u'SkippedTest'
-        }, {
-            u'name': u'SkippedWithMessage',
-            u'file': 'gtest_xml_output_unittest_.cc',
-            u'line': 77,
-            u'status': u'RUN',
-            u'result': u'SKIPPED',
-            u'time': u'*',
-            u'timestamp': u'*',
-            u'classname': u'SkippedTest'
-        }, {
-            u'name':
-                u'SkippedAfterFailure',
-            u'file':
-                'gtest_xml_output_unittest_.cc',
-            u'line':
-                81,
-            u'status':
-                u'RUN',
-            u'result':
-                u'COMPLETED',
-            u'time':
-                u'*',
-            u'timestamp':
-                u'*',
-            u'classname':
-                u'SkippedTest',
-            u'failures': [{
-                u'failure': u'gtest_xml_output_unittest_.cc:*\n'
-                            u'Expected equality of these values:\n'
-                            u'  1\n  2' + STACK_TRACE_TEMPLATE,
-                u'type': u''
-            }]
-        }]
-    }, {
-        u'name':
-            u'MixedResultTest',
-        u'tests':
-            3,
-        u'failures':
-            1,
-        u'disabled':
-            1,
-        u'errors':
-            0,
-        u'time':
-            u'*',
-        u'timestamp':
-            u'*',
-        u'testsuite': [{
-            u'name': u'Succeeds',
-            u'file': 'gtest_xml_output_unittest_.cc',
-            u'line': 86,
-            u'status': u'RUN',
-            u'result': u'COMPLETED',
-            u'time': u'*',
-            u'timestamp': u'*',
-            u'classname': u'MixedResultTest'
-        }, {
-            u'name':
-                u'Fails',
-            u'file':
-                u'gtest_xml_output_unittest_.cc',
-            u'line':
-                91,
-            u'status':
-                u'RUN',
-            u'result':
-                u'COMPLETED',
-            u'time':
-                u'*',
-            u'timestamp':
-                u'*',
-            u'classname':
-                u'MixedResultTest',
-            u'failures': [{
-                u'failure': u'gtest_xml_output_unittest_.cc:*\n'
-                            u'Expected equality of these values:\n'
-                            u'  1\n  2' + STACK_TRACE_TEMPLATE,
-                u'type': u''
-            }, {
-                u'failure': u'gtest_xml_output_unittest_.cc:*\n'
-                            u'Expected equality of these values:\n'
-                            u'  2\n  3' + STACK_TRACE_TEMPLATE,
-                u'type': u''
-            }]
-        }, {
-            u'name': u'DISABLED_test',
-            u'file': u'gtest_xml_output_unittest_.cc',
-            u'line': 96,
-            u'status': u'NOTRUN',
-            u'result': u'SUPPRESSED',
-            u'time': u'*',
-            u'timestamp': u'*',
-            u'classname': u'MixedResultTest'
-        }]
-    }, {
-        u'name':
-            u'XmlQuotingTest',
-        u'tests':
-            1,
-        u'failures':
-            1,
-        u'disabled':
-            0,
-        u'errors':
-            0,
-        u'time':
-            u'*',
-        u'timestamp':
-            u'*',
-        u'testsuite': [{
-            u'name':
-                u'OutputsCData',
-            u'file':
-                u'gtest_xml_output_unittest_.cc',
-            u'line':
-                100,
-            u'status':
-                u'RUN',
-            u'result':
-                u'COMPLETED',
-            u'time':
-                u'*',
-            u'timestamp':
-                u'*',
-            u'classname':
-                u'XmlQuotingTest',
-            u'failures': [{
-                u'failure': u'gtest_xml_output_unittest_.cc:*\n'
-                            u'Failed\nXML output: <?xml encoding="utf-8">'
-                            u'<top><![CDATA[cdata text]]></top>' +
-                            STACK_TRACE_TEMPLATE,
-                u'type': u''
-            }]
-        }]
-    }, {
-        u'name':
-            u'InvalidCharactersTest',
-        u'tests':
-            1,
-        u'failures':
-            1,
-        u'disabled':
-            0,
-        u'errors':
-            0,
-        u'time':
-            u'*',
-        u'timestamp':
-            u'*',
-        u'testsuite': [{
-            u'name':
-                u'InvalidCharactersInMessage',
-            u'file':
-                u'gtest_xml_output_unittest_.cc',
-            u'line':
-                107,
-            u'status':
-                u'RUN',
-            u'result':
-                u'COMPLETED',
-            u'time':
-                u'*',
-            u'timestamp':
-                u'*',
-            u'classname':
-                u'InvalidCharactersTest',
-            u'failures': [{
-                u'failure': u'gtest_xml_output_unittest_.cc:*\n'
-                            u'Failed\nInvalid characters in brackets'
-                            u' [\x01\x02]' + STACK_TRACE_TEMPLATE,
-                u'type': u''
-            }]
-        }]
-    }, {
-        u'name':
-            u'PropertyRecordingTest',
-        u'tests':
-            4,
-        u'failures':
-            0,
-        u'disabled':
-            0,
-        u'errors':
-            0,
-        u'time':
-            u'*',
-        u'timestamp':
-            u'*',
-        u'SetUpTestSuite':
-            u'yes',
-        u'TearDownTestSuite':
-            u'aye',
-        u'testsuite': [{
-            u'name': u'OneProperty',
-            u'file': u'gtest_xml_output_unittest_.cc',
-            u'line': 119,
-            u'status': u'RUN',
-            u'result': u'COMPLETED',
-            u'time': u'*',
-            u'timestamp': u'*',
-            u'classname': u'PropertyRecordingTest',
-            u'key_1': u'1'
-        }, {
-            u'name': u'IntValuedProperty',
-            u'file': u'gtest_xml_output_unittest_.cc',
-            u'line': 123,
-            u'status': u'RUN',
-            u'result': u'COMPLETED',
-            u'time': u'*',
-            u'timestamp': u'*',
-            u'classname': u'PropertyRecordingTest',
-            u'key_int': u'1'
-        }, {
-            u'name': u'ThreeProperties',
-            u'file': u'gtest_xml_output_unittest_.cc',
-            u'line': 127,
-            u'status': u'RUN',
-            u'result': u'COMPLETED',
-            u'time': u'*',
-            u'timestamp': u'*',
-            u'classname': u'PropertyRecordingTest',
-            u'key_1': u'1',
-            u'key_2': u'2',
-            u'key_3': u'3'
-        }, {
-            u'name': u'TwoValuesForOneKeyUsesLastValue',
-            u'file': u'gtest_xml_output_unittest_.cc',
-            u'line': 133,
-            u'status': u'RUN',
-            u'result': u'COMPLETED',
-            u'time': u'*',
-            u'timestamp': u'*',
-            u'classname': u'PropertyRecordingTest',
-            u'key_1': u'2'
-        }]
-    }, {
-        u'name':
-            u'NoFixtureTest',
-        u'tests':
-            3,
-        u'failures':
-            0,
-        u'disabled':
-            0,
-        u'errors':
-            0,
-        u'time':
-            u'*',
-        u'timestamp':
-            u'*',
-        u'testsuite': [{
-            u'name': u'RecordProperty',
-            u'file': u'gtest_xml_output_unittest_.cc',
-            u'line': 138,
-            u'status': u'RUN',
-            u'result': u'COMPLETED',
-            u'time': u'*',
-            u'timestamp': u'*',
-            u'classname': u'NoFixtureTest',
-            u'key': u'1'
-        }, {
-            u'name': u'ExternalUtilityThatCallsRecordIntValuedProperty',
-            u'file': u'gtest_xml_output_unittest_.cc',
-            u'line': 151,
-            u'status': u'RUN',
-            u'result': u'COMPLETED',
-            u'time': u'*',
-            u'timestamp': u'*',
-            u'classname': u'NoFixtureTest',
-            u'key_for_utility_int': u'1'
-        }, {
-            u'name': u'ExternalUtilityThatCallsRecordStringValuedProperty',
-            u'file': u'gtest_xml_output_unittest_.cc',
-            u'line': 155,
-            u'status': u'RUN',
-            u'result': u'COMPLETED',
-            u'time': u'*',
-            u'timestamp': u'*',
-            u'classname': u'NoFixtureTest',
-            u'key_for_utility_string': u'1'
-        }]
-    }, {
-        u'name':
-            u'TypedTest/0',
-        u'tests':
-            1,
-        u'failures':
-            0,
-        u'disabled':
-            0,
-        u'errors':
-            0,
-        u'time':
-            u'*',
-        u'timestamp':
-            u'*',
-        u'testsuite': [{
-            u'name': u'HasTypeParamAttribute',
-            u'type_param': u'int',
-            u'file': u'gtest_xml_output_unittest_.cc',
-            u'line': 171,
-            u'status': u'RUN',
-            u'result': u'COMPLETED',
-            u'time': u'*',
-            u'timestamp': u'*',
-            u'classname': u'TypedTest/0'
-        }]
-    }, {
-        u'name':
-            u'TypedTest/1',
-        u'tests':
-            1,
-        u'failures':
-            0,
-        u'disabled':
-            0,
-        u'errors':
-            0,
-        u'time':
-            u'*',
-        u'timestamp':
-            u'*',
-        u'testsuite': [{
-            u'name': u'HasTypeParamAttribute',
-            u'type_param': u'long',
-            u'file': u'gtest_xml_output_unittest_.cc',
-            u'line': 171,
-            u'status': u'RUN',
-            u'result': u'COMPLETED',
-            u'time': u'*',
-            u'timestamp': u'*',
-            u'classname': u'TypedTest/1'
-        }]
-    }, {
-        u'name':
-            u'Single/TypeParameterizedTestSuite/0',
-        u'tests':
-            1,
-        u'failures':
-            0,
-        u'disabled':
-            0,
-        u'errors':
-            0,
-        u'time':
-            u'*',
-        u'timestamp':
-            u'*',
-        u'testsuite': [{
-            u'name': u'HasTypeParamAttribute',
-            u'type_param': u'int',
-            u'file': u'gtest_xml_output_unittest_.cc',
-            u'line': 178,
-            u'status': u'RUN',
-            u'result': u'COMPLETED',
-            u'time': u'*',
-            u'timestamp': u'*',
-            u'classname': u'Single/TypeParameterizedTestSuite/0'
-        }]
-    }, {
-        u'name':
-            u'Single/TypeParameterizedTestSuite/1',
-        u'tests':
-            1,
-        u'failures':
-            0,
-        u'disabled':
-            0,
-        u'errors':
-            0,
-        u'time':
-            u'*',
-        u'timestamp':
-            u'*',
-        u'testsuite': [{
-            u'name': u'HasTypeParamAttribute',
-            u'type_param': u'long',
-            u'file': u'gtest_xml_output_unittest_.cc',
-            u'line': 178,
-            u'status': u'RUN',
-            u'result': u'COMPLETED',
-            u'time': u'*',
-            u'timestamp': u'*',
-            u'classname': u'Single/TypeParameterizedTestSuite/1'
-        }]
-    }, {
-        u'name':
-            u'Single/ValueParamTest',
-        u'tests':
-            4,
-        u'failures':
-            0,
-        u'disabled':
-            0,
-        u'errors':
-            0,
-        u'time':
-            u'*',
-        u'timestamp':
-            u'*',
-        u'testsuite': [{
-            u'name': u'HasValueParamAttribute/0',
-            u'value_param': u'33',
-            u'file': u'gtest_xml_output_unittest_.cc',
-            u'line': 162,
-            u'status': u'RUN',
-            u'result': u'COMPLETED',
-            u'time': u'*',
-            u'timestamp': u'*',
-            u'classname': u'Single/ValueParamTest'
-        }, {
-            u'name': u'HasValueParamAttribute/1',
-            u'value_param': u'42',
-            u'file': u'gtest_xml_output_unittest_.cc',
-            u'line': 162,
-            u'status': u'RUN',
-            u'result': u'COMPLETED',
-            u'time': u'*',
-            u'timestamp': u'*',
-            u'classname': u'Single/ValueParamTest'
-        }, {
-            u'name': u'AnotherTestThatHasValueParamAttribute/0',
-            u'value_param': u'33',
-            u'file': u'gtest_xml_output_unittest_.cc',
-            u'line': 163,
-            u'status': u'RUN',
-            u'result': u'COMPLETED',
-            u'time': u'*',
-            u'timestamp': u'*',
-            u'classname': u'Single/ValueParamTest'
-        }, {
-            u'name': u'AnotherTestThatHasValueParamAttribute/1',
-            u'value_param': u'42',
-            u'file': u'gtest_xml_output_unittest_.cc',
-            u'line': 163,
-            u'status': u'RUN',
-            u'result': u'COMPLETED',
-            u'time': u'*',
-            u'timestamp': u'*',
-            u'classname': u'Single/ValueParamTest'
-        }]
-    }]
-}
-
-EXPECTED_FILTERED = {
-    u'tests':
-        1,
-    u'failures':
-        0,
-    u'disabled':
-        0,
-    u'errors':
-        0,
-    u'time':
-        u'*',
-    u'timestamp':
-        u'*',
-    u'name':
-        u'AllTests',
-    u'ad_hoc_property':
-        u'42',
-    u'testsuites': [{
-        u'name':
-            u'SuccessfulTest',
-        u'tests':
-            1,
-        u'failures':
-            0,
-        u'disabled':
-            0,
-        u'errors':
-            0,
-        u'time':
-            u'*',
-        u'timestamp':
-            u'*',
-        u'testsuite': [{
-            u'name': u'Succeeds',
-            u'file': u'gtest_xml_output_unittest_.cc',
-            u'line': 51,
-            u'status': u'RUN',
-            u'result': u'COMPLETED',
-            u'time': u'*',
-            u'timestamp': u'*',
-            u'classname': u'SuccessfulTest',
-        }]
-    }],
-}
-
-EXPECTED_NO_TEST = {
-    u'tests':
-        0,
-    u'failures':
-        0,
-    u'disabled':
-        0,
-    u'errors':
-        0,
-    u'time':
-        u'*',
-    u'timestamp':
-        u'*',
-    u'name':
-        u'AllTests',
-    u'testsuites': [{
-        u'name':
-            u'NonTestSuiteFailure',
-        u'tests':
-            1,
-        u'failures':
-            1,
-        u'disabled':
-            0,
-        u'skipped':
-            0,
-        u'errors':
-            0,
-        u'time':
-            u'*',
-        u'timestamp':
-            u'*',
-        u'testsuite': [{
-            u'name':
-                u'',
-            u'status':
-                u'RUN',
-            u'result':
-                u'COMPLETED',
-            u'time':
-                u'*',
-            u'timestamp':
-                u'*',
-            u'classname':
-                u'',
-            u'failures': [{
-                u'failure': u'gtest_no_test_unittest.cc:*\n'
-                            u'Expected equality of these values:\n'
-                            u'  1\n  2' + STACK_TRACE_TEMPLATE,
-                u'type': u'',
-            }]
-        }]
-    }],
-}
-
-GTEST_PROGRAM_PATH = gtest_test_utils.GetTestExecutablePath(GTEST_PROGRAM_NAME)
-
-SUPPORTS_TYPED_TESTS = 'TypedTest' in gtest_test_utils.Subprocess(
-    [GTEST_PROGRAM_PATH, GTEST_LIST_TESTS_FLAG], capture_stderr=False).output
-
-
-class GTestJsonOutputUnitTest(gtest_test_utils.TestCase):
-  """Unit test for Google Test's JSON output functionality.
-  """
-
-  # This test currently breaks on platforms that do not support typed and
-  # type-parameterized tests, so we don't run it under them.
-  if SUPPORTS_TYPED_TESTS:
-
-    def testNonEmptyJsonOutput(self):
-      """Verifies JSON output for a Google Test binary with non-empty output.
-
-      Runs a test program that generates a non-empty JSON output, and
-      tests that the JSON output is expected.
-      """
-      self._TestJsonOutput(GTEST_PROGRAM_NAME, EXPECTED_NON_EMPTY, 1)
-
-  def testNoTestJsonOutput(self):
-    """Verifies JSON output for a Google Test binary without actual tests.
-
-    Runs a test program that generates an JSON output for a binary with no
-    tests, and tests that the JSON output is expected.
-    """
-
-    self._TestJsonOutput('gtest_no_test_unittest', EXPECTED_NO_TEST, 0)
-
-  def testTimestampValue(self):
-    """Checks whether the timestamp attribute in the JSON output is valid.
-
-    Runs a test program that generates an empty JSON output, and checks if
-    the timestamp attribute in the testsuites tag is valid.
-    """
-    actual = self._GetJsonOutput('gtest_no_test_unittest', [], 0)
-    date_time_str = actual['timestamp']
-    # datetime.strptime() is only available in Python 2.5+ so we have to
-    # parse the expected datetime manually.
-    match = re.match(r'(\d+)-(\d\d)-(\d\d)T(\d\d):(\d\d):(\d\d)', date_time_str)
-    self.assertTrue(
-        re.match,
-        'JSON datettime string %s has incorrect format' % date_time_str)
-    date_time_from_json = datetime.datetime(
-        year=int(match.group(1)), month=int(match.group(2)),
-        day=int(match.group(3)), hour=int(match.group(4)),
-        minute=int(match.group(5)), second=int(match.group(6)))
-
-    time_delta = abs(datetime.datetime.now() - date_time_from_json)
-    # timestamp value should be near the current local time
-    self.assertTrue(time_delta < datetime.timedelta(seconds=600),
-                    'time_delta is %s' % time_delta)
-
-  def testDefaultOutputFile(self):
-    """Verifies the default output file name.
-
-    Confirms that Google Test produces an JSON output file with the expected
-    default name if no name is explicitly specified.
-    """
-    output_file = os.path.join(gtest_test_utils.GetTempDir(),
-                               GTEST_DEFAULT_OUTPUT_FILE)
-    gtest_prog_path = gtest_test_utils.GetTestExecutablePath(
-        'gtest_no_test_unittest')
-    try:
-      os.remove(output_file)
-    except OSError:
-      e = sys.exc_info()[1]
-      if e.errno != errno.ENOENT:
-        raise
-
-    p = gtest_test_utils.Subprocess(
-        [gtest_prog_path, '%s=json' % GTEST_OUTPUT_FLAG],
-        working_dir=gtest_test_utils.GetTempDir())
-    self.assert_(p.exited)
-    self.assertEquals(0, p.exit_code)
-    self.assert_(os.path.isfile(output_file))
-
-  def testSuppressedJsonOutput(self):
-    """Verifies that no JSON output is generated.
-
-    Tests that no JSON file is generated if the default JSON listener is
-    shut down before RUN_ALL_TESTS is invoked.
-    """
-
-    json_path = os.path.join(gtest_test_utils.GetTempDir(),
-                             GTEST_PROGRAM_NAME + 'out.json')
-    if os.path.isfile(json_path):
-      os.remove(json_path)
-
-    command = [GTEST_PROGRAM_PATH,
-               '%s=json:%s' % (GTEST_OUTPUT_FLAG, json_path),
-               '--shut_down_xml']
-    p = gtest_test_utils.Subprocess(command)
-    if p.terminated_by_signal:
-      # p.signal is available only if p.terminated_by_signal is True.
-      self.assertFalse(
-          p.terminated_by_signal,
-          '%s was killed by signal %d' % (GTEST_PROGRAM_NAME, p.signal))
-    else:
-      self.assert_(p.exited)
-      self.assertEquals(1, p.exit_code,
-                        "'%s' exited with code %s, which doesn't match "
-                        'the expected exit code %s.'
-                        % (command, p.exit_code, 1))
-
-    self.assert_(not os.path.isfile(json_path))
-
-  def testFilteredTestJsonOutput(self):
-    """Verifies JSON output when a filter is applied.
-
-    Runs a test program that executes only some tests and verifies that
-    non-selected tests do not show up in the JSON output.
-    """
-
-    self._TestJsonOutput(GTEST_PROGRAM_NAME, EXPECTED_FILTERED, 0,
-                         extra_args=['%s=SuccessfulTest.*' % GTEST_FILTER_FLAG])
-
-  def _GetJsonOutput(self, gtest_prog_name, extra_args, expected_exit_code):
-    """Returns the JSON output generated by running the program gtest_prog_name.
-
-    Furthermore, the program's exit code must be expected_exit_code.
-
-    Args:
-      gtest_prog_name: Google Test binary name.
-      extra_args: extra arguments to binary invocation.
-      expected_exit_code: program's exit code.
-    """
-    json_path = os.path.join(gtest_test_utils.GetTempDir(),
-                             gtest_prog_name + 'out.json')
-    gtest_prog_path = gtest_test_utils.GetTestExecutablePath(gtest_prog_name)
-
-    command = (
-        [gtest_prog_path, '%s=json:%s' % (GTEST_OUTPUT_FLAG, json_path)] +
-        extra_args
-    )
-    p = gtest_test_utils.Subprocess(command)
-    if p.terminated_by_signal:
-      self.assert_(False,
-                   '%s was killed by signal %d' % (gtest_prog_name, p.signal))
-    else:
-      self.assert_(p.exited)
-      self.assertEquals(expected_exit_code, p.exit_code,
-                        "'%s' exited with code %s, which doesn't match "
-                        'the expected exit code %s.'
-                        % (command, p.exit_code, expected_exit_code))
-    with open(json_path) as f:
-      actual = json.load(f)
-    return actual
-
-  def _TestJsonOutput(self, gtest_prog_name, expected,
-                      expected_exit_code, extra_args=None):
-    """Checks the JSON output generated by the Google Test binary.
-
-    Asserts that the JSON document generated by running the program
-    gtest_prog_name matches expected_json, a string containing another
-    JSON document.  Furthermore, the program's exit code must be
-    expected_exit_code.
-
-    Args:
-      gtest_prog_name: Google Test binary name.
-      expected: expected output.
-      expected_exit_code: program's exit code.
-      extra_args: extra arguments to binary invocation.
-    """
-
-    actual = self._GetJsonOutput(gtest_prog_name, extra_args or [],
-                                 expected_exit_code)
-    self.assertEqual(expected, gtest_json_test_utils.normalize(actual))
-
-
-if __name__ == '__main__':
-  if NO_STACKTRACE_SUPPORT_FLAG in sys.argv:
-    # unittest.main() can't handle unknown flags
-    sys.argv.remove(NO_STACKTRACE_SUPPORT_FLAG)
-
-  os.environ['GTEST_STACK_TRACE_DEPTH'] = '1'
-  gtest_test_utils.Main()
diff --git a/3rdparty/googletest-1.13.0/googletest/test/googletest-list-tests-unittest.py b/3rdparty/googletest-1.13.0/googletest/test/googletest-list-tests-unittest.py
deleted file mode 100644
index 9d56883d741d80d7bad8301e20620fca6b156002..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/test/googletest-list-tests-unittest.py
+++ /dev/null
@@ -1,205 +0,0 @@
-#!/usr/bin/env python
-#
-# Copyright 2006, Google Inc.
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met:
-#
-#     * Redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer.
-#     * Redistributions in binary form must reproduce the above
-# copyright notice, this list of conditions and the following disclaimer
-# in the documentation and/or other materials provided with the
-# distribution.
-#     * Neither the name of Google Inc. nor the names of its
-# contributors may be used to endorse or promote products derived from
-# this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-"""Unit test for Google Test's --gtest_list_tests flag.
-
-A user can ask Google Test to list all tests by specifying the
---gtest_list_tests flag.  This script tests such functionality
-by invoking googletest-list-tests-unittest_ (a program written with
-Google Test) the command line flags.
-"""
-
-import re
-from googletest.test import gtest_test_utils
-
-# Constants.
-
-# The command line flag for enabling/disabling listing all tests.
-LIST_TESTS_FLAG = 'gtest_list_tests'
-
-# Path to the googletest-list-tests-unittest_ program.
-EXE_PATH = gtest_test_utils.GetTestExecutablePath('googletest-list-tests-unittest_')
-
-# The expected output when running googletest-list-tests-unittest_ with
-# --gtest_list_tests
-EXPECTED_OUTPUT_NO_FILTER_RE = re.compile(r"""FooDeathTest\.
-  Test1
-Foo\.
-  Bar1
-  Bar2
-  DISABLED_Bar3
-Abc\.
-  Xyz
-  Def
-FooBar\.
-  Baz
-FooTest\.
-  Test1
-  DISABLED_Test2
-  Test3
-TypedTest/0\.  # TypeParam = (VeryLo{245}|class VeryLo{239})\.\.\.
-  TestA
-  TestB
-TypedTest/1\.  # TypeParam = int\s*\*( __ptr64)?
-  TestA
-  TestB
-TypedTest/2\.  # TypeParam = .*MyArray<bool,\s*42>
-  TestA
-  TestB
-My/TypeParamTest/0\.  # TypeParam = (VeryLo{245}|class VeryLo{239})\.\.\.
-  TestA
-  TestB
-My/TypeParamTest/1\.  # TypeParam = int\s*\*( __ptr64)?
-  TestA
-  TestB
-My/TypeParamTest/2\.  # TypeParam = .*MyArray<bool,\s*42>
-  TestA
-  TestB
-MyInstantiation/ValueParamTest\.
-  TestA/0  # GetParam\(\) = one line
-  TestA/1  # GetParam\(\) = two\\nlines
-  TestA/2  # GetParam\(\) = a very\\nlo{241}\.\.\.
-  TestB/0  # GetParam\(\) = one line
-  TestB/1  # GetParam\(\) = two\\nlines
-  TestB/2  # GetParam\(\) = a very\\nlo{241}\.\.\.
-""")
-
-# The expected output when running googletest-list-tests-unittest_ with
-# --gtest_list_tests and --gtest_filter=Foo*.
-EXPECTED_OUTPUT_FILTER_FOO_RE = re.compile(r"""FooDeathTest\.
-  Test1
-Foo\.
-  Bar1
-  Bar2
-  DISABLED_Bar3
-FooBar\.
-  Baz
-FooTest\.
-  Test1
-  DISABLED_Test2
-  Test3
-""")
-
-# Utilities.
-
-
-def Run(args):
-  """Runs googletest-list-tests-unittest_ and returns the list of tests printed."""
-
-  return gtest_test_utils.Subprocess([EXE_PATH] + args,
-                                     capture_stderr=False).output
-
-
-# The unit test.
-
-
-class GTestListTestsUnitTest(gtest_test_utils.TestCase):
-  """Tests using the --gtest_list_tests flag to list all tests."""
-
-  def RunAndVerify(self, flag_value, expected_output_re, other_flag):
-    """Runs googletest-list-tests-unittest_ and verifies that it prints
-    the correct tests.
-
-    Args:
-      flag_value:         value of the --gtest_list_tests flag;
-                          None if the flag should not be present.
-      expected_output_re: regular expression that matches the expected
-                          output after running command;
-      other_flag:         a different flag to be passed to command
-                          along with gtest_list_tests;
-                          None if the flag should not be present.
-    """
-
-    if flag_value is None:
-      flag = ''
-      flag_expression = 'not set'
-    elif flag_value == '0':
-      flag = '--%s=0' % LIST_TESTS_FLAG
-      flag_expression = '0'
-    else:
-      flag = '--%s' % LIST_TESTS_FLAG
-      flag_expression = '1'
-
-    args = [flag]
-
-    if other_flag is not None:
-      args += [other_flag]
-
-    output = Run(args)
-
-    if expected_output_re:
-      self.assert_(
-          expected_output_re.match(output),
-          ('when %s is %s, the output of "%s" is "%s",\n'
-           'which does not match regex "%s"' %
-           (LIST_TESTS_FLAG, flag_expression, ' '.join(args), output,
-            expected_output_re.pattern)))
-    else:
-      self.assert_(
-          not EXPECTED_OUTPUT_NO_FILTER_RE.match(output),
-          ('when %s is %s, the output of "%s" is "%s"'%
-           (LIST_TESTS_FLAG, flag_expression, ' '.join(args), output)))
-
-  def testDefaultBehavior(self):
-    """Tests the behavior of the default mode."""
-
-    self.RunAndVerify(flag_value=None,
-                      expected_output_re=None,
-                      other_flag=None)
-
-  def testFlag(self):
-    """Tests using the --gtest_list_tests flag."""
-
-    self.RunAndVerify(flag_value='0',
-                      expected_output_re=None,
-                      other_flag=None)
-    self.RunAndVerify(flag_value='1',
-                      expected_output_re=EXPECTED_OUTPUT_NO_FILTER_RE,
-                      other_flag=None)
-
-  def testOverrideNonFilterFlags(self):
-    """Tests that --gtest_list_tests overrides the non-filter flags."""
-
-    self.RunAndVerify(flag_value='1',
-                      expected_output_re=EXPECTED_OUTPUT_NO_FILTER_RE,
-                      other_flag='--gtest_break_on_failure')
-
-  def testWithFilterFlags(self):
-    """Tests that --gtest_list_tests takes into account the
-    --gtest_filter flag."""
-
-    self.RunAndVerify(flag_value='1',
-                      expected_output_re=EXPECTED_OUTPUT_FILTER_FOO_RE,
-                      other_flag='--gtest_filter=Foo*')
-
-
-if __name__ == '__main__':
-  gtest_test_utils.Main()
diff --git a/3rdparty/googletest-1.13.0/googletest/test/googletest-list-tests-unittest_.cc b/3rdparty/googletest-1.13.0/googletest/test/googletest-list-tests-unittest_.cc
deleted file mode 100644
index 5577e8961cdc2b3190bb11aada789ec9376b11be..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/test/googletest-list-tests-unittest_.cc
+++ /dev/null
@@ -1,140 +0,0 @@
-// Copyright 2006, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// Unit test for Google Test's --gtest_list_tests flag.
-//
-// A user can ask Google Test to list all tests that will run
-// so that when using a filter, a user will know what
-// tests to look for. The tests will not be run after listing.
-//
-// This program will be invoked from a Python unit test.
-// Don't run it directly.
-
-#include "gtest/gtest.h"
-
-// Several different test cases and tests that will be listed.
-TEST(Foo, Bar1) {}
-
-TEST(Foo, Bar2) {}
-
-TEST(Foo, DISABLED_Bar3) {}
-
-TEST(Abc, Xyz) {}
-
-TEST(Abc, Def) {}
-
-TEST(FooBar, Baz) {}
-
-class FooTest : public testing::Test {};
-
-TEST_F(FooTest, Test1) {}
-
-TEST_F(FooTest, DISABLED_Test2) {}
-
-TEST_F(FooTest, Test3) {}
-
-TEST(FooDeathTest, Test1) {}
-
-// A group of value-parameterized tests.
-
-class MyType {
- public:
-  explicit MyType(const std::string& a_value) : value_(a_value) {}
-
-  const std::string& value() const { return value_; }
-
- private:
-  std::string value_;
-};
-
-// Teaches Google Test how to print a MyType.
-void PrintTo(const MyType& x, std::ostream* os) { *os << x.value(); }
-
-class ValueParamTest : public testing::TestWithParam<MyType> {};
-
-TEST_P(ValueParamTest, TestA) {}
-
-TEST_P(ValueParamTest, TestB) {}
-
-INSTANTIATE_TEST_SUITE_P(
-    MyInstantiation, ValueParamTest,
-    testing::Values(
-        MyType("one line"), MyType("two\nlines"),
-        MyType("a "
-               "very\nloooooooooooooooooooooooooooooooooooooooooooooooooooooooo"
-               "ooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo"
-               "ooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo"
-               "ooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo"
-               "ooooong line")));  // NOLINT
-
-// A group of typed tests.
-
-// A deliberately long type name for testing the line-truncating
-// behavior when printing a type parameter.
-class
-    VeryLoooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooogName {  // NOLINT
-};
-
-template <typename T>
-class TypedTest : public testing::Test {};
-
-template <typename T, int kSize>
-class MyArray {};
-
-typedef testing::Types<
-    VeryLoooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooogName,  // NOLINT
-    int*, MyArray<bool, 42> >
-    MyTypes;
-
-TYPED_TEST_SUITE(TypedTest, MyTypes);
-
-TYPED_TEST(TypedTest, TestA) {}
-
-TYPED_TEST(TypedTest, TestB) {}
-
-// A group of type-parameterized tests.
-
-template <typename T>
-class TypeParamTest : public testing::Test {};
-
-TYPED_TEST_SUITE_P(TypeParamTest);
-
-TYPED_TEST_P(TypeParamTest, TestA) {}
-
-TYPED_TEST_P(TypeParamTest, TestB) {}
-
-REGISTER_TYPED_TEST_SUITE_P(TypeParamTest, TestA, TestB);
-
-INSTANTIATE_TYPED_TEST_SUITE_P(My, TypeParamTest, MyTypes);
-
-int main(int argc, char** argv) {
-  ::testing::InitGoogleTest(&argc, argv);
-
-  return RUN_ALL_TESTS();
-}
diff --git a/3rdparty/googletest-1.13.0/googletest/test/googletest-listener-test.cc b/3rdparty/googletest-1.13.0/googletest/test/googletest-listener-test.cc
deleted file mode 100644
index 89d01b37ff403107e53c7e455a0d4cce3488c0de..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/test/googletest-listener-test.cc
+++ /dev/null
@@ -1,514 +0,0 @@
-// Copyright 2009 Google Inc. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-//
-// The Google C++ Testing and Mocking Framework (Google Test)
-//
-// This file verifies Google Test event listeners receive events at the
-// right times.
-
-#include <vector>
-
-#include "gtest/gtest.h"
-#include "gtest/internal/custom/gtest.h"
-
-using ::testing::AddGlobalTestEnvironment;
-using ::testing::Environment;
-using ::testing::InitGoogleTest;
-using ::testing::Test;
-using ::testing::TestEventListener;
-using ::testing::TestInfo;
-using ::testing::TestPartResult;
-using ::testing::TestSuite;
-using ::testing::UnitTest;
-
-// Used by tests to register their events.
-std::vector<std::string>* g_events = nullptr;
-
-namespace testing {
-namespace internal {
-
-class EventRecordingListener : public TestEventListener {
- public:
-  explicit EventRecordingListener(const char* name) : name_(name) {}
-
- protected:
-  void OnTestProgramStart(const UnitTest& /*unit_test*/) override {
-    g_events->push_back(GetFullMethodName("OnTestProgramStart"));
-  }
-
-  void OnTestIterationStart(const UnitTest& /*unit_test*/,
-                            int iteration) override {
-    Message message;
-    message << GetFullMethodName("OnTestIterationStart") << "(" << iteration
-            << ")";
-    g_events->push_back(message.GetString());
-  }
-
-  void OnEnvironmentsSetUpStart(const UnitTest& /*unit_test*/) override {
-    g_events->push_back(GetFullMethodName("OnEnvironmentsSetUpStart"));
-  }
-
-  void OnEnvironmentsSetUpEnd(const UnitTest& /*unit_test*/) override {
-    g_events->push_back(GetFullMethodName("OnEnvironmentsSetUpEnd"));
-  }
-#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
-  void OnTestCaseStart(const TestCase& /*test_case*/) override {
-    g_events->push_back(GetFullMethodName("OnTestCaseStart"));
-  }
-#endif  // GTEST_REMOVE_LEGACY_TEST_CASEAPI_
-
-  void OnTestStart(const TestInfo& /*test_info*/) override {
-    g_events->push_back(GetFullMethodName("OnTestStart"));
-  }
-
-  void OnTestPartResult(const TestPartResult& /*test_part_result*/) override {
-    g_events->push_back(GetFullMethodName("OnTestPartResult"));
-  }
-
-  void OnTestEnd(const TestInfo& /*test_info*/) override {
-    g_events->push_back(GetFullMethodName("OnTestEnd"));
-  }
-
-#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
-  void OnTestCaseEnd(const TestCase& /*test_case*/) override {
-    g_events->push_back(GetFullMethodName("OnTestCaseEnd"));
-  }
-#endif  // GTEST_REMOVE_LEGACY_TEST_CASEAPI_
-
-  void OnEnvironmentsTearDownStart(const UnitTest& /*unit_test*/) override {
-    g_events->push_back(GetFullMethodName("OnEnvironmentsTearDownStart"));
-  }
-
-  void OnEnvironmentsTearDownEnd(const UnitTest& /*unit_test*/) override {
-    g_events->push_back(GetFullMethodName("OnEnvironmentsTearDownEnd"));
-  }
-
-  void OnTestIterationEnd(const UnitTest& /*unit_test*/,
-                          int iteration) override {
-    Message message;
-    message << GetFullMethodName("OnTestIterationEnd") << "(" << iteration
-            << ")";
-    g_events->push_back(message.GetString());
-  }
-
-  void OnTestProgramEnd(const UnitTest& /*unit_test*/) override {
-    g_events->push_back(GetFullMethodName("OnTestProgramEnd"));
-  }
-
- private:
-  std::string GetFullMethodName(const char* name) { return name_ + "." + name; }
-
-  std::string name_;
-};
-
-// This listener is using OnTestSuiteStart, OnTestSuiteEnd API
-class EventRecordingListener2 : public TestEventListener {
- public:
-  explicit EventRecordingListener2(const char* name) : name_(name) {}
-
- protected:
-  void OnTestProgramStart(const UnitTest& /*unit_test*/) override {
-    g_events->push_back(GetFullMethodName("OnTestProgramStart"));
-  }
-
-  void OnTestIterationStart(const UnitTest& /*unit_test*/,
-                            int iteration) override {
-    Message message;
-    message << GetFullMethodName("OnTestIterationStart") << "(" << iteration
-            << ")";
-    g_events->push_back(message.GetString());
-  }
-
-  void OnEnvironmentsSetUpStart(const UnitTest& /*unit_test*/) override {
-    g_events->push_back(GetFullMethodName("OnEnvironmentsSetUpStart"));
-  }
-
-  void OnEnvironmentsSetUpEnd(const UnitTest& /*unit_test*/) override {
-    g_events->push_back(GetFullMethodName("OnEnvironmentsSetUpEnd"));
-  }
-
-  void OnTestSuiteStart(const TestSuite& /*test_suite*/) override {
-    g_events->push_back(GetFullMethodName("OnTestSuiteStart"));
-  }
-
-  void OnTestStart(const TestInfo& /*test_info*/) override {
-    g_events->push_back(GetFullMethodName("OnTestStart"));
-  }
-
-  void OnTestPartResult(const TestPartResult& /*test_part_result*/) override {
-    g_events->push_back(GetFullMethodName("OnTestPartResult"));
-  }
-
-  void OnTestEnd(const TestInfo& /*test_info*/) override {
-    g_events->push_back(GetFullMethodName("OnTestEnd"));
-  }
-
-  void OnTestSuiteEnd(const TestSuite& /*test_suite*/) override {
-    g_events->push_back(GetFullMethodName("OnTestSuiteEnd"));
-  }
-
-  void OnEnvironmentsTearDownStart(const UnitTest& /*unit_test*/) override {
-    g_events->push_back(GetFullMethodName("OnEnvironmentsTearDownStart"));
-  }
-
-  void OnEnvironmentsTearDownEnd(const UnitTest& /*unit_test*/) override {
-    g_events->push_back(GetFullMethodName("OnEnvironmentsTearDownEnd"));
-  }
-
-  void OnTestIterationEnd(const UnitTest& /*unit_test*/,
-                          int iteration) override {
-    Message message;
-    message << GetFullMethodName("OnTestIterationEnd") << "(" << iteration
-            << ")";
-    g_events->push_back(message.GetString());
-  }
-
-  void OnTestProgramEnd(const UnitTest& /*unit_test*/) override {
-    g_events->push_back(GetFullMethodName("OnTestProgramEnd"));
-  }
-
- private:
-  std::string GetFullMethodName(const char* name) { return name_ + "." + name; }
-
-  std::string name_;
-};
-
-class EnvironmentInvocationCatcher : public Environment {
- protected:
-  void SetUp() override { g_events->push_back("Environment::SetUp"); }
-
-  void TearDown() override { g_events->push_back("Environment::TearDown"); }
-};
-
-class ListenerTest : public Test {
- protected:
-  static void SetUpTestSuite() {
-    g_events->push_back("ListenerTest::SetUpTestSuite");
-  }
-
-  static void TearDownTestSuite() {
-    g_events->push_back("ListenerTest::TearDownTestSuite");
-  }
-
-  void SetUp() override { g_events->push_back("ListenerTest::SetUp"); }
-
-  void TearDown() override { g_events->push_back("ListenerTest::TearDown"); }
-};
-
-TEST_F(ListenerTest, DoesFoo) {
-  // Test execution order within a test case is not guaranteed so we are not
-  // recording the test name.
-  g_events->push_back("ListenerTest::* Test Body");
-  SUCCEED();  // Triggers OnTestPartResult.
-}
-
-TEST_F(ListenerTest, DoesBar) {
-  g_events->push_back("ListenerTest::* Test Body");
-  SUCCEED();  // Triggers OnTestPartResult.
-}
-
-}  // namespace internal
-
-}  // namespace testing
-
-using ::testing::internal::EnvironmentInvocationCatcher;
-using ::testing::internal::EventRecordingListener;
-using ::testing::internal::EventRecordingListener2;
-
-void VerifyResults(const std::vector<std::string>& data,
-                   const char* const* expected_data,
-                   size_t expected_data_size) {
-  const size_t actual_size = data.size();
-  // If the following assertion fails, a new entry will be appended to
-  // data.  Hence we save data.size() first.
-  EXPECT_EQ(expected_data_size, actual_size);
-
-  // Compares the common prefix.
-  const size_t shorter_size =
-      expected_data_size <= actual_size ? expected_data_size : actual_size;
-  size_t i = 0;
-  for (; i < shorter_size; ++i) {
-    ASSERT_STREQ(expected_data[i], data[i].c_str()) << "at position " << i;
-  }
-
-  // Prints extra elements in the actual data.
-  for (; i < actual_size; ++i) {
-    printf("  Actual event #%lu: %s\n", static_cast<unsigned long>(i),
-           data[i].c_str());
-  }
-}
-
-int main(int argc, char** argv) {
-  std::vector<std::string> events;
-  g_events = &events;
-  InitGoogleTest(&argc, argv);
-
-  UnitTest::GetInstance()->listeners().Append(
-      new EventRecordingListener("1st"));
-  UnitTest::GetInstance()->listeners().Append(
-      new EventRecordingListener("2nd"));
-  UnitTest::GetInstance()->listeners().Append(
-      new EventRecordingListener2("3rd"));
-
-  AddGlobalTestEnvironment(new EnvironmentInvocationCatcher);
-
-  GTEST_CHECK_(events.size() == 0)
-      << "AddGlobalTestEnvironment should not generate any events itself.";
-
-  GTEST_FLAG_SET(repeat, 2);
-  GTEST_FLAG_SET(recreate_environments_when_repeating, true);
-  int ret_val = RUN_ALL_TESTS();
-
-#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
-
-  // The deprecated OnTestSuiteStart/OnTestCaseStart events are included
-  const char* const expected_events[] = {"1st.OnTestProgramStart",
-                                         "2nd.OnTestProgramStart",
-                                         "3rd.OnTestProgramStart",
-                                         "1st.OnTestIterationStart(0)",
-                                         "2nd.OnTestIterationStart(0)",
-                                         "3rd.OnTestIterationStart(0)",
-                                         "1st.OnEnvironmentsSetUpStart",
-                                         "2nd.OnEnvironmentsSetUpStart",
-                                         "3rd.OnEnvironmentsSetUpStart",
-                                         "Environment::SetUp",
-                                         "3rd.OnEnvironmentsSetUpEnd",
-                                         "2nd.OnEnvironmentsSetUpEnd",
-                                         "1st.OnEnvironmentsSetUpEnd",
-                                         "3rd.OnTestSuiteStart",
-                                         "1st.OnTestCaseStart",
-                                         "2nd.OnTestCaseStart",
-                                         "ListenerTest::SetUpTestSuite",
-                                         "1st.OnTestStart",
-                                         "2nd.OnTestStart",
-                                         "3rd.OnTestStart",
-                                         "ListenerTest::SetUp",
-                                         "ListenerTest::* Test Body",
-                                         "1st.OnTestPartResult",
-                                         "2nd.OnTestPartResult",
-                                         "3rd.OnTestPartResult",
-                                         "ListenerTest::TearDown",
-                                         "3rd.OnTestEnd",
-                                         "2nd.OnTestEnd",
-                                         "1st.OnTestEnd",
-                                         "1st.OnTestStart",
-                                         "2nd.OnTestStart",
-                                         "3rd.OnTestStart",
-                                         "ListenerTest::SetUp",
-                                         "ListenerTest::* Test Body",
-                                         "1st.OnTestPartResult",
-                                         "2nd.OnTestPartResult",
-                                         "3rd.OnTestPartResult",
-                                         "ListenerTest::TearDown",
-                                         "3rd.OnTestEnd",
-                                         "2nd.OnTestEnd",
-                                         "1st.OnTestEnd",
-                                         "ListenerTest::TearDownTestSuite",
-                                         "3rd.OnTestSuiteEnd",
-                                         "2nd.OnTestCaseEnd",
-                                         "1st.OnTestCaseEnd",
-                                         "1st.OnEnvironmentsTearDownStart",
-                                         "2nd.OnEnvironmentsTearDownStart",
-                                         "3rd.OnEnvironmentsTearDownStart",
-                                         "Environment::TearDown",
-                                         "3rd.OnEnvironmentsTearDownEnd",
-                                         "2nd.OnEnvironmentsTearDownEnd",
-                                         "1st.OnEnvironmentsTearDownEnd",
-                                         "3rd.OnTestIterationEnd(0)",
-                                         "2nd.OnTestIterationEnd(0)",
-                                         "1st.OnTestIterationEnd(0)",
-                                         "1st.OnTestIterationStart(1)",
-                                         "2nd.OnTestIterationStart(1)",
-                                         "3rd.OnTestIterationStart(1)",
-                                         "1st.OnEnvironmentsSetUpStart",
-                                         "2nd.OnEnvironmentsSetUpStart",
-                                         "3rd.OnEnvironmentsSetUpStart",
-                                         "Environment::SetUp",
-                                         "3rd.OnEnvironmentsSetUpEnd",
-                                         "2nd.OnEnvironmentsSetUpEnd",
-                                         "1st.OnEnvironmentsSetUpEnd",
-                                         "3rd.OnTestSuiteStart",
-                                         "1st.OnTestCaseStart",
-                                         "2nd.OnTestCaseStart",
-                                         "ListenerTest::SetUpTestSuite",
-                                         "1st.OnTestStart",
-                                         "2nd.OnTestStart",
-                                         "3rd.OnTestStart",
-                                         "ListenerTest::SetUp",
-                                         "ListenerTest::* Test Body",
-                                         "1st.OnTestPartResult",
-                                         "2nd.OnTestPartResult",
-                                         "3rd.OnTestPartResult",
-                                         "ListenerTest::TearDown",
-                                         "3rd.OnTestEnd",
-                                         "2nd.OnTestEnd",
-                                         "1st.OnTestEnd",
-                                         "1st.OnTestStart",
-                                         "2nd.OnTestStart",
-                                         "3rd.OnTestStart",
-                                         "ListenerTest::SetUp",
-                                         "ListenerTest::* Test Body",
-                                         "1st.OnTestPartResult",
-                                         "2nd.OnTestPartResult",
-                                         "3rd.OnTestPartResult",
-                                         "ListenerTest::TearDown",
-                                         "3rd.OnTestEnd",
-                                         "2nd.OnTestEnd",
-                                         "1st.OnTestEnd",
-                                         "ListenerTest::TearDownTestSuite",
-                                         "3rd.OnTestSuiteEnd",
-                                         "2nd.OnTestCaseEnd",
-                                         "1st.OnTestCaseEnd",
-                                         "1st.OnEnvironmentsTearDownStart",
-                                         "2nd.OnEnvironmentsTearDownStart",
-                                         "3rd.OnEnvironmentsTearDownStart",
-                                         "Environment::TearDown",
-                                         "3rd.OnEnvironmentsTearDownEnd",
-                                         "2nd.OnEnvironmentsTearDownEnd",
-                                         "1st.OnEnvironmentsTearDownEnd",
-                                         "3rd.OnTestIterationEnd(1)",
-                                         "2nd.OnTestIterationEnd(1)",
-                                         "1st.OnTestIterationEnd(1)",
-                                         "3rd.OnTestProgramEnd",
-                                         "2nd.OnTestProgramEnd",
-                                         "1st.OnTestProgramEnd"};
-#else
-  const char* const expected_events[] = {"1st.OnTestProgramStart",
-                                         "2nd.OnTestProgramStart",
-                                         "3rd.OnTestProgramStart",
-                                         "1st.OnTestIterationStart(0)",
-                                         "2nd.OnTestIterationStart(0)",
-                                         "3rd.OnTestIterationStart(0)",
-                                         "1st.OnEnvironmentsSetUpStart",
-                                         "2nd.OnEnvironmentsSetUpStart",
-                                         "3rd.OnEnvironmentsSetUpStart",
-                                         "Environment::SetUp",
-                                         "3rd.OnEnvironmentsSetUpEnd",
-                                         "2nd.OnEnvironmentsSetUpEnd",
-                                         "1st.OnEnvironmentsSetUpEnd",
-                                         "3rd.OnTestSuiteStart",
-                                         "ListenerTest::SetUpTestSuite",
-                                         "1st.OnTestStart",
-                                         "2nd.OnTestStart",
-                                         "3rd.OnTestStart",
-                                         "ListenerTest::SetUp",
-                                         "ListenerTest::* Test Body",
-                                         "1st.OnTestPartResult",
-                                         "2nd.OnTestPartResult",
-                                         "3rd.OnTestPartResult",
-                                         "ListenerTest::TearDown",
-                                         "3rd.OnTestEnd",
-                                         "2nd.OnTestEnd",
-                                         "1st.OnTestEnd",
-                                         "1st.OnTestStart",
-                                         "2nd.OnTestStart",
-                                         "3rd.OnTestStart",
-                                         "ListenerTest::SetUp",
-                                         "ListenerTest::* Test Body",
-                                         "1st.OnTestPartResult",
-                                         "2nd.OnTestPartResult",
-                                         "3rd.OnTestPartResult",
-                                         "ListenerTest::TearDown",
-                                         "3rd.OnTestEnd",
-                                         "2nd.OnTestEnd",
-                                         "1st.OnTestEnd",
-                                         "ListenerTest::TearDownTestSuite",
-                                         "3rd.OnTestSuiteEnd",
-                                         "1st.OnEnvironmentsTearDownStart",
-                                         "2nd.OnEnvironmentsTearDownStart",
-                                         "3rd.OnEnvironmentsTearDownStart",
-                                         "Environment::TearDown",
-                                         "3rd.OnEnvironmentsTearDownEnd",
-                                         "2nd.OnEnvironmentsTearDownEnd",
-                                         "1st.OnEnvironmentsTearDownEnd",
-                                         "3rd.OnTestIterationEnd(0)",
-                                         "2nd.OnTestIterationEnd(0)",
-                                         "1st.OnTestIterationEnd(0)",
-                                         "1st.OnTestIterationStart(1)",
-                                         "2nd.OnTestIterationStart(1)",
-                                         "3rd.OnTestIterationStart(1)",
-                                         "1st.OnEnvironmentsSetUpStart",
-                                         "2nd.OnEnvironmentsSetUpStart",
-                                         "3rd.OnEnvironmentsSetUpStart",
-                                         "Environment::SetUp",
-                                         "3rd.OnEnvironmentsSetUpEnd",
-                                         "2nd.OnEnvironmentsSetUpEnd",
-                                         "1st.OnEnvironmentsSetUpEnd",
-                                         "3rd.OnTestSuiteStart",
-                                         "ListenerTest::SetUpTestSuite",
-                                         "1st.OnTestStart",
-                                         "2nd.OnTestStart",
-                                         "3rd.OnTestStart",
-                                         "ListenerTest::SetUp",
-                                         "ListenerTest::* Test Body",
-                                         "1st.OnTestPartResult",
-                                         "2nd.OnTestPartResult",
-                                         "3rd.OnTestPartResult",
-                                         "ListenerTest::TearDown",
-                                         "3rd.OnTestEnd",
-                                         "2nd.OnTestEnd",
-                                         "1st.OnTestEnd",
-                                         "1st.OnTestStart",
-                                         "2nd.OnTestStart",
-                                         "3rd.OnTestStart",
-                                         "ListenerTest::SetUp",
-                                         "ListenerTest::* Test Body",
-                                         "1st.OnTestPartResult",
-                                         "2nd.OnTestPartResult",
-                                         "3rd.OnTestPartResult",
-                                         "ListenerTest::TearDown",
-                                         "3rd.OnTestEnd",
-                                         "2nd.OnTestEnd",
-                                         "1st.OnTestEnd",
-                                         "ListenerTest::TearDownTestSuite",
-                                         "3rd.OnTestSuiteEnd",
-                                         "1st.OnEnvironmentsTearDownStart",
-                                         "2nd.OnEnvironmentsTearDownStart",
-                                         "3rd.OnEnvironmentsTearDownStart",
-                                         "Environment::TearDown",
-                                         "3rd.OnEnvironmentsTearDownEnd",
-                                         "2nd.OnEnvironmentsTearDownEnd",
-                                         "1st.OnEnvironmentsTearDownEnd",
-                                         "3rd.OnTestIterationEnd(1)",
-                                         "2nd.OnTestIterationEnd(1)",
-                                         "1st.OnTestIterationEnd(1)",
-                                         "3rd.OnTestProgramEnd",
-                                         "2nd.OnTestProgramEnd",
-                                         "1st.OnTestProgramEnd"};
-#endif  // GTEST_REMOVE_LEGACY_TEST_CASEAPI_
-
-  VerifyResults(events, expected_events,
-                sizeof(expected_events) / sizeof(expected_events[0]));
-
-  // We need to check manually for ad hoc test failures that happen after
-  // RUN_ALL_TESTS finishes.
-  if (UnitTest::GetInstance()->Failed()) ret_val = 1;
-
-  return ret_val;
-}
diff --git a/3rdparty/googletest-1.13.0/googletest/test/googletest-message-test.cc b/3rdparty/googletest-1.13.0/googletest/test/googletest-message-test.cc
deleted file mode 100644
index 54e9d43c9208e441aeb1a9672732e300831c4e56..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/test/googletest-message-test.cc
+++ /dev/null
@@ -1,161 +0,0 @@
-// Copyright 2005, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-//
-// Tests for the Message class.
-
-#include <sstream>
-#include <string>
-
-#include "gtest/gtest-message.h"
-#include "gtest/gtest.h"
-
-namespace {
-
-using ::testing::Message;
-
-// Tests the testing::Message class
-
-// Tests the default constructor.
-TEST(MessageTest, DefaultConstructor) {
-  const Message msg;
-  EXPECT_EQ("", msg.GetString());
-}
-
-// Tests the copy constructor.
-TEST(MessageTest, CopyConstructor) {
-  const Message msg1("Hello");
-  const Message msg2(msg1);
-  EXPECT_EQ("Hello", msg2.GetString());
-}
-
-// Tests constructing a Message from a C-string.
-TEST(MessageTest, ConstructsFromCString) {
-  Message msg("Hello");
-  EXPECT_EQ("Hello", msg.GetString());
-}
-
-// Tests streaming a float.
-TEST(MessageTest, StreamsFloat) {
-  const std::string s = (Message() << 1.23456F << " " << 2.34567F).GetString();
-  // Both numbers should be printed with enough precision.
-  EXPECT_PRED_FORMAT2(testing::IsSubstring, "1.234560", s.c_str());
-  EXPECT_PRED_FORMAT2(testing::IsSubstring, " 2.345669", s.c_str());
-}
-
-// Tests streaming a double.
-TEST(MessageTest, StreamsDouble) {
-  const std::string s =
-      (Message() << 1260570880.4555497 << " " << 1260572265.1954534)
-          .GetString();
-  // Both numbers should be printed with enough precision.
-  EXPECT_PRED_FORMAT2(testing::IsSubstring, "1260570880.45", s.c_str());
-  EXPECT_PRED_FORMAT2(testing::IsSubstring, " 1260572265.19", s.c_str());
-}
-
-// Tests streaming a non-char pointer.
-TEST(MessageTest, StreamsPointer) {
-  int n = 0;
-  int* p = &n;
-  EXPECT_NE("(null)", (Message() << p).GetString());
-}
-
-// Tests streaming a NULL non-char pointer.
-TEST(MessageTest, StreamsNullPointer) {
-  int* p = nullptr;
-  EXPECT_EQ("(null)", (Message() << p).GetString());
-}
-
-// Tests streaming a C string.
-TEST(MessageTest, StreamsCString) {
-  EXPECT_EQ("Foo", (Message() << "Foo").GetString());
-}
-
-// Tests streaming a NULL C string.
-TEST(MessageTest, StreamsNullCString) {
-  char* p = nullptr;
-  EXPECT_EQ("(null)", (Message() << p).GetString());
-}
-
-// Tests streaming std::string.
-TEST(MessageTest, StreamsString) {
-  const ::std::string str("Hello");
-  EXPECT_EQ("Hello", (Message() << str).GetString());
-}
-
-// Tests that we can output strings containing embedded NULs.
-TEST(MessageTest, StreamsStringWithEmbeddedNUL) {
-  const char char_array_with_nul[] = "Here's a NUL\0 and some more string";
-  const ::std::string string_with_nul(char_array_with_nul,
-                                      sizeof(char_array_with_nul) - 1);
-  EXPECT_EQ("Here's a NUL\\0 and some more string",
-            (Message() << string_with_nul).GetString());
-}
-
-// Tests streaming a NUL char.
-TEST(MessageTest, StreamsNULChar) {
-  EXPECT_EQ("\\0", (Message() << '\0').GetString());
-}
-
-// Tests streaming int.
-TEST(MessageTest, StreamsInt) {
-  EXPECT_EQ("123", (Message() << 123).GetString());
-}
-
-// Tests that basic IO manipulators (endl, ends, and flush) can be
-// streamed to Message.
-TEST(MessageTest, StreamsBasicIoManip) {
-  EXPECT_EQ(
-      "Line 1.\nA NUL char \\0 in line 2.",
-      (Message() << "Line 1." << std::endl
-                 << "A NUL char " << std::ends << std::flush << " in line 2.")
-          .GetString());
-}
-
-// Tests Message::GetString()
-TEST(MessageTest, GetString) {
-  Message msg;
-  msg << 1 << " lamb";
-  EXPECT_EQ("1 lamb", msg.GetString());
-}
-
-// Tests streaming a Message object to an ostream.
-TEST(MessageTest, StreamsToOStream) {
-  Message msg("Hello");
-  ::std::stringstream ss;
-  ss << msg;
-  EXPECT_EQ("Hello", testing::internal::StringStreamToString(&ss));
-}
-
-// Tests that a Message object doesn't take up too much stack space.
-TEST(MessageTest, DoesNotTakeUpMuchStackSpace) {
-  EXPECT_LE(sizeof(Message), 16U);
-}
-
-}  // namespace
diff --git a/3rdparty/googletest-1.13.0/googletest/test/googletest-options-test.cc b/3rdparty/googletest-1.13.0/googletest/test/googletest-options-test.cc
deleted file mode 100644
index 8746320985ac2412b367c2932d18b63d1a3846bb..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/test/googletest-options-test.cc
+++ /dev/null
@@ -1,222 +0,0 @@
-// Copyright 2008, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-// Google Test UnitTestOptions tests
-//
-// This file tests classes and functions used internally by
-// Google Test.  They are subject to change without notice.
-//
-// This file is #included from gtest.cc, to avoid changing build or
-// make-files on Windows and other platforms. Do not #include this file
-// anywhere else!
-
-#include <string>
-
-#include "gtest/gtest.h"
-
-#if GTEST_OS_WINDOWS_MOBILE
-#include <windows.h>
-#elif GTEST_OS_WINDOWS
-#include <direct.h>
-#elif GTEST_OS_OS2
-// For strcasecmp on OS/2
-#include <strings.h>
-#endif  // GTEST_OS_WINDOWS_MOBILE
-
-#include "src/gtest-internal-inl.h"
-
-namespace testing {
-namespace internal {
-namespace {
-
-// Turns the given relative path into an absolute path.
-FilePath GetAbsolutePathOf(const FilePath& relative_path) {
-  return FilePath::ConcatPaths(FilePath::GetCurrentDir(), relative_path);
-}
-
-// Testing UnitTestOptions::GetOutputFormat/GetOutputFile.
-
-TEST(XmlOutputTest, GetOutputFormatDefault) {
-  GTEST_FLAG_SET(output, "");
-  EXPECT_STREQ("", UnitTestOptions::GetOutputFormat().c_str());
-}
-
-TEST(XmlOutputTest, GetOutputFormat) {
-  GTEST_FLAG_SET(output, "xml:filename");
-  EXPECT_STREQ("xml", UnitTestOptions::GetOutputFormat().c_str());
-}
-
-TEST(XmlOutputTest, GetOutputFileDefault) {
-  GTEST_FLAG_SET(output, "");
-  EXPECT_EQ(GetAbsolutePathOf(FilePath("test_detail.xml")).string(),
-            UnitTestOptions::GetAbsolutePathToOutputFile());
-}
-
-TEST(XmlOutputTest, GetOutputFileSingleFile) {
-  GTEST_FLAG_SET(output, "xml:filename.abc");
-  EXPECT_EQ(GetAbsolutePathOf(FilePath("filename.abc")).string(),
-            UnitTestOptions::GetAbsolutePathToOutputFile());
-}
-
-TEST(XmlOutputTest, GetOutputFileFromDirectoryPath) {
-  GTEST_FLAG_SET(output, "xml:path" GTEST_PATH_SEP_);
-  const std::string expected_output_file =
-      GetAbsolutePathOf(FilePath(std::string("path") + GTEST_PATH_SEP_ +
-                                 GetCurrentExecutableName().string() + ".xml"))
-          .string();
-  const std::string& output_file =
-      UnitTestOptions::GetAbsolutePathToOutputFile();
-#if GTEST_OS_WINDOWS
-  EXPECT_STRCASEEQ(expected_output_file.c_str(), output_file.c_str());
-#else
-  EXPECT_EQ(expected_output_file, output_file.c_str());
-#endif
-}
-
-TEST(OutputFileHelpersTest, GetCurrentExecutableName) {
-  const std::string exe_str = GetCurrentExecutableName().string();
-#if GTEST_OS_WINDOWS
-  const bool success =
-      _strcmpi("googletest-options-test", exe_str.c_str()) == 0 ||
-      _strcmpi("gtest-options-ex_test", exe_str.c_str()) == 0 ||
-      _strcmpi("gtest_all_test", exe_str.c_str()) == 0 ||
-      _strcmpi("gtest_dll_test", exe_str.c_str()) == 0;
-#elif GTEST_OS_OS2
-  const bool success =
-      strcasecmp("googletest-options-test", exe_str.c_str()) == 0 ||
-      strcasecmp("gtest-options-ex_test", exe_str.c_str()) == 0 ||
-      strcasecmp("gtest_all_test", exe_str.c_str()) == 0 ||
-      strcasecmp("gtest_dll_test", exe_str.c_str()) == 0;
-#elif GTEST_OS_FUCHSIA
-  const bool success = exe_str == "app";
-#else
-  const bool success =
-      exe_str == "googletest-options-test" || exe_str == "gtest_all_test" ||
-      exe_str == "lt-gtest_all_test" || exe_str == "gtest_dll_test";
-#endif  // GTEST_OS_WINDOWS
-  if (!success) FAIL() << "GetCurrentExecutableName() returns " << exe_str;
-}
-
-#if !GTEST_OS_FUCHSIA
-
-class XmlOutputChangeDirTest : public Test {
- protected:
-  void SetUp() override {
-    original_working_dir_ = FilePath::GetCurrentDir();
-    posix::ChDir("..");
-    // This will make the test fail if run from the root directory.
-    EXPECT_NE(original_working_dir_.string(),
-              FilePath::GetCurrentDir().string());
-  }
-
-  void TearDown() override {
-    posix::ChDir(original_working_dir_.string().c_str());
-  }
-
-  FilePath original_working_dir_;
-};
-
-TEST_F(XmlOutputChangeDirTest, PreserveOriginalWorkingDirWithDefault) {
-  GTEST_FLAG_SET(output, "");
-  EXPECT_EQ(
-      FilePath::ConcatPaths(original_working_dir_, FilePath("test_detail.xml"))
-          .string(),
-      UnitTestOptions::GetAbsolutePathToOutputFile());
-}
-
-TEST_F(XmlOutputChangeDirTest, PreserveOriginalWorkingDirWithDefaultXML) {
-  GTEST_FLAG_SET(output, "xml");
-  EXPECT_EQ(
-      FilePath::ConcatPaths(original_working_dir_, FilePath("test_detail.xml"))
-          .string(),
-      UnitTestOptions::GetAbsolutePathToOutputFile());
-}
-
-TEST_F(XmlOutputChangeDirTest, PreserveOriginalWorkingDirWithRelativeFile) {
-  GTEST_FLAG_SET(output, "xml:filename.abc");
-  EXPECT_EQ(
-      FilePath::ConcatPaths(original_working_dir_, FilePath("filename.abc"))
-          .string(),
-      UnitTestOptions::GetAbsolutePathToOutputFile());
-}
-
-TEST_F(XmlOutputChangeDirTest, PreserveOriginalWorkingDirWithRelativePath) {
-  GTEST_FLAG_SET(output, "xml:path" GTEST_PATH_SEP_);
-  const std::string expected_output_file =
-      FilePath::ConcatPaths(
-          original_working_dir_,
-          FilePath(std::string("path") + GTEST_PATH_SEP_ +
-                   GetCurrentExecutableName().string() + ".xml"))
-          .string();
-  const std::string& output_file =
-      UnitTestOptions::GetAbsolutePathToOutputFile();
-#if GTEST_OS_WINDOWS
-  EXPECT_STRCASEEQ(expected_output_file.c_str(), output_file.c_str());
-#else
-  EXPECT_EQ(expected_output_file, output_file.c_str());
-#endif
-}
-
-TEST_F(XmlOutputChangeDirTest, PreserveOriginalWorkingDirWithAbsoluteFile) {
-#if GTEST_OS_WINDOWS
-  GTEST_FLAG_SET(output, "xml:c:\\tmp\\filename.abc");
-  EXPECT_EQ(FilePath("c:\\tmp\\filename.abc").string(),
-            UnitTestOptions::GetAbsolutePathToOutputFile());
-#else
-  GTEST_FLAG_SET(output, "xml:/tmp/filename.abc");
-  EXPECT_EQ(FilePath("/tmp/filename.abc").string(),
-            UnitTestOptions::GetAbsolutePathToOutputFile());
-#endif
-}
-
-TEST_F(XmlOutputChangeDirTest, PreserveOriginalWorkingDirWithAbsolutePath) {
-#if GTEST_OS_WINDOWS
-  const std::string path = "c:\\tmp\\";
-#else
-  const std::string path = "/tmp/";
-#endif
-
-  GTEST_FLAG_SET(output, "xml:" + path);
-  const std::string expected_output_file =
-      path + GetCurrentExecutableName().string() + ".xml";
-  const std::string& output_file =
-      UnitTestOptions::GetAbsolutePathToOutputFile();
-
-#if GTEST_OS_WINDOWS
-  EXPECT_STRCASEEQ(expected_output_file.c_str(), output_file.c_str());
-#else
-  EXPECT_EQ(expected_output_file, output_file.c_str());
-#endif
-}
-
-#endif  // !GTEST_OS_FUCHSIA
-
-}  // namespace
-}  // namespace internal
-}  // namespace testing
diff --git a/3rdparty/googletest-1.13.0/googletest/test/googletest-output-test.py b/3rdparty/googletest-1.13.0/googletest/test/googletest-output-test.py
deleted file mode 100644
index ff44483331fbd420d5f926d9a5406ed870b5b049..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/test/googletest-output-test.py
+++ /dev/null
@@ -1,346 +0,0 @@
-#!/usr/bin/env python
-#
-# Copyright 2008, Google Inc.
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met:
-#
-#     * Redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer.
-#     * Redistributions in binary form must reproduce the above
-# copyright notice, this list of conditions and the following disclaimer
-# in the documentation and/or other materials provided with the
-# distribution.
-#     * Neither the name of Google Inc. nor the names of its
-# contributors may be used to endorse or promote products derived from
-# this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-r"""Tests the text output of Google C++ Testing and Mocking Framework.
-
-To update the golden file:
-googletest_output_test.py --build_dir=BUILD/DIR --gengolden
-where BUILD/DIR contains the built googletest-output-test_ file.
-googletest_output_test.py --gengolden
-googletest_output_test.py
-"""
-
-import difflib
-import os
-import re
-import sys
-from googletest.test import gtest_test_utils
-
-
-# The flag for generating the golden file
-GENGOLDEN_FLAG = '--gengolden'
-CATCH_EXCEPTIONS_ENV_VAR_NAME = 'GTEST_CATCH_EXCEPTIONS'
-
-# The flag indicating stacktraces are not supported
-NO_STACKTRACE_SUPPORT_FLAG = '--no_stacktrace_support'
-
-IS_LINUX = os.name == 'posix' and os.uname()[0] == 'Linux'
-IS_WINDOWS = os.name == 'nt'
-
-GOLDEN_NAME = 'googletest-output-test-golden-lin.txt'
-
-PROGRAM_PATH = gtest_test_utils.GetTestExecutablePath('googletest-output-test_')
-
-# At least one command we exercise must not have the
-# 'internal_skip_environment_and_ad_hoc_tests' argument.
-COMMAND_LIST_TESTS = ({}, [PROGRAM_PATH, '--gtest_list_tests'])
-COMMAND_WITH_COLOR = ({}, [PROGRAM_PATH, '--gtest_color=yes'])
-COMMAND_WITH_TIME = ({}, [PROGRAM_PATH,
-                          '--gtest_print_time',
-                          'internal_skip_environment_and_ad_hoc_tests',
-                          '--gtest_filter=FatalFailureTest.*:LoggingTest.*'])
-COMMAND_WITH_DISABLED = (
-    {}, [PROGRAM_PATH,
-         '--gtest_also_run_disabled_tests',
-         'internal_skip_environment_and_ad_hoc_tests',
-         '--gtest_filter=*DISABLED_*'])
-COMMAND_WITH_SHARDING = (
-    {'GTEST_SHARD_INDEX': '1', 'GTEST_TOTAL_SHARDS': '2'},
-    [PROGRAM_PATH,
-     'internal_skip_environment_and_ad_hoc_tests',
-     '--gtest_filter=PassingTest.*'])
-
-GOLDEN_PATH = os.path.join(gtest_test_utils.GetSourceDir(), GOLDEN_NAME)
-
-
-def ToUnixLineEnding(s):
-  """Changes all Windows/Mac line endings in s to UNIX line endings."""
-
-  return s.replace('\r\n', '\n').replace('\r', '\n')
-
-
-def RemoveLocations(test_output):
-  """Removes all file location info from a Google Test program's output.
-
-  Args:
-       test_output:  the output of a Google Test program.
-
-  Returns:
-       output with all file location info (in the form of
-       'DIRECTORY/FILE_NAME:LINE_NUMBER: 'or
-       'DIRECTORY\\FILE_NAME(LINE_NUMBER): ') replaced by
-       'FILE_NAME:#: '.
-  """
-
-  return re.sub(r'.*[/\\]((googletest-output-test_|gtest).cc)(\:\d+|\(\d+\))\: ',
-                r'\1:#: ', test_output)
-
-
-def RemoveStackTraceDetails(output):
-  """Removes all stack traces from a Google Test program's output."""
-
-  # *? means "find the shortest string that matches".
-  return re.sub(r'Stack trace:(.|\n)*?\n\n',
-                'Stack trace: (omitted)\n\n', output)
-
-
-def RemoveStackTraces(output):
-  """Removes all traces of stack traces from a Google Test program's output."""
-
-  # *? means "find the shortest string that matches".
-  return re.sub(r'Stack trace:(.|\n)*?\n\n', '', output)
-
-
-def RemoveTime(output):
-  """Removes all time information from a Google Test program's output."""
-
-  return re.sub(r'\(\d+ ms', '(? ms', output)
-
-
-def RemoveTypeInfoDetails(test_output):
-  """Removes compiler-specific type info from Google Test program's output.
-
-  Args:
-       test_output:  the output of a Google Test program.
-
-  Returns:
-       output with type information normalized to canonical form.
-  """
-
-  # some compilers output the name of type 'unsigned int' as 'unsigned'
-  return re.sub(r'unsigned int', 'unsigned', test_output)
-
-
-def NormalizeToCurrentPlatform(test_output):
-  """Normalizes platform specific output details for easier comparison."""
-
-  if IS_WINDOWS:
-    # Removes the color information that is not present on Windows.
-    test_output = re.sub('\x1b\\[(0;3\d)?m', '', test_output)
-    # Changes failure message headers into the Windows format.
-    test_output = re.sub(r': Failure\n', r': error: ', test_output)
-    # Changes file(line_number) to file:line_number.
-    test_output = re.sub(r'((\w|\.)+)\((\d+)\):', r'\1:\3:', test_output)
-
-  return test_output
-
-
-def RemoveTestCounts(output):
-  """Removes test counts from a Google Test program's output."""
-
-  output = re.sub(r'\d+ tests?, listed below',
-                  '? tests, listed below', output)
-  output = re.sub(r'\d+ FAILED TESTS',
-                  '? FAILED TESTS', output)
-  output = re.sub(r'\d+ tests? from \d+ test cases?',
-                  '? tests from ? test cases', output)
-  output = re.sub(r'\d+ tests? from ([a-zA-Z_])',
-                  r'? tests from \1', output)
-  return re.sub(r'\d+ tests?\.', '? tests.', output)
-
-
-def RemoveMatchingTests(test_output, pattern):
-  """Removes output of specified tests from a Google Test program's output.
-
-  This function strips not only the beginning and the end of a test but also
-  all output in between.
-
-  Args:
-    test_output:       A string containing the test output.
-    pattern:           A regex string that matches names of test cases or
-                       tests to remove.
-
-  Returns:
-    Contents of test_output with tests whose names match pattern removed.
-  """
-
-  test_output = re.sub(
-      r'.*\[ RUN      \] .*%s(.|\n)*?\[(  FAILED  |       OK )\] .*%s.*\n' % (
-          pattern, pattern),
-      '',
-      test_output)
-  return re.sub(r'.*%s.*\n' % pattern, '', test_output)
-
-
-def NormalizeOutput(output):
-  """Normalizes output (the output of googletest-output-test_.exe)."""
-
-  output = ToUnixLineEnding(output)
-  output = RemoveLocations(output)
-  output = RemoveStackTraceDetails(output)
-  output = RemoveTime(output)
-  return output
-
-
-def GetShellCommandOutput(env_cmd):
-  """Runs a command in a sub-process, and returns its output in a string.
-
-  Args:
-    env_cmd: The shell command. A 2-tuple where element 0 is a dict of extra
-             environment variables to set, and element 1 is a string with
-             the command and any flags.
-
-  Returns:
-    A string with the command's combined standard and diagnostic output.
-  """
-
-  # Spawns cmd in a sub-process, and gets its standard I/O file objects.
-  # Set and save the environment properly.
-  environ = os.environ.copy()
-  environ.update(env_cmd[0])
-  p = gtest_test_utils.Subprocess(env_cmd[1], env=environ)
-
-  return p.output
-
-
-def GetCommandOutput(env_cmd):
-  """Runs a command and returns its output with all file location
-  info stripped off.
-
-  Args:
-    env_cmd:  The shell command. A 2-tuple where element 0 is a dict of extra
-              environment variables to set, and element 1 is a string with
-              the command and any flags.
-  """
-
-  # Disables exception pop-ups on Windows.
-  environ, cmdline = env_cmd
-  environ = dict(environ)  # Ensures we are modifying a copy.
-  environ[CATCH_EXCEPTIONS_ENV_VAR_NAME] = '1'
-  return NormalizeOutput(GetShellCommandOutput((environ, cmdline)))
-
-
-def GetOutputOfAllCommands():
-  """Returns concatenated output from several representative commands."""
-
-  return (GetCommandOutput(COMMAND_WITH_COLOR) +
-          GetCommandOutput(COMMAND_WITH_TIME) +
-          GetCommandOutput(COMMAND_WITH_DISABLED) +
-          GetCommandOutput(COMMAND_WITH_SHARDING))
-
-
-test_list = GetShellCommandOutput(COMMAND_LIST_TESTS)
-SUPPORTS_DEATH_TESTS = 'DeathTest' in test_list
-SUPPORTS_TYPED_TESTS = 'TypedTest' in test_list
-SUPPORTS_THREADS = 'ExpectFailureWithThreadsTest' in test_list
-SUPPORTS_STACK_TRACES = NO_STACKTRACE_SUPPORT_FLAG not in sys.argv
-
-CAN_GENERATE_GOLDEN_FILE = (SUPPORTS_DEATH_TESTS and
-                            SUPPORTS_TYPED_TESTS and
-                            SUPPORTS_THREADS and
-                            SUPPORTS_STACK_TRACES)
-
-class GTestOutputTest(gtest_test_utils.TestCase):
-  def RemoveUnsupportedTests(self, test_output):
-    if not SUPPORTS_DEATH_TESTS:
-      test_output = RemoveMatchingTests(test_output, 'DeathTest')
-    if not SUPPORTS_TYPED_TESTS:
-      test_output = RemoveMatchingTests(test_output, 'TypedTest')
-      test_output = RemoveMatchingTests(test_output, 'TypedDeathTest')
-      test_output = RemoveMatchingTests(test_output, 'TypeParamDeathTest')
-    if not SUPPORTS_THREADS:
-      test_output = RemoveMatchingTests(test_output,
-                                        'ExpectFailureWithThreadsTest')
-      test_output = RemoveMatchingTests(test_output,
-                                        'ScopedFakeTestPartResultReporterTest')
-      test_output = RemoveMatchingTests(test_output,
-                                        'WorksConcurrently')
-    if not SUPPORTS_STACK_TRACES:
-      test_output = RemoveStackTraces(test_output)
-
-    return test_output
-
-  def testOutput(self):
-    output = GetOutputOfAllCommands()
-
-    golden_file = open(GOLDEN_PATH, 'rb')
-    # A mis-configured source control system can cause \r appear in EOL
-    # sequences when we read the golden file irrespective of an operating
-    # system used. Therefore, we need to strip those \r's from newlines
-    # unconditionally.
-    golden = ToUnixLineEnding(golden_file.read().decode())
-    golden_file.close()
-
-    # We want the test to pass regardless of certain features being
-    # supported or not.
-
-    # We still have to remove type name specifics in all cases.
-    normalized_actual = RemoveTypeInfoDetails(output)
-    normalized_golden = RemoveTypeInfoDetails(golden)
-
-    if CAN_GENERATE_GOLDEN_FILE:
-      self.assertEqual(normalized_golden, normalized_actual,
-                       '\n'.join(difflib.unified_diff(
-                           normalized_golden.split('\n'),
-                           normalized_actual.split('\n'),
-                           'golden', 'actual')))
-    else:
-      normalized_actual = NormalizeToCurrentPlatform(
-          RemoveTestCounts(normalized_actual))
-      normalized_golden = NormalizeToCurrentPlatform(
-          RemoveTestCounts(self.RemoveUnsupportedTests(normalized_golden)))
-
-      # This code is very handy when debugging golden file differences:
-      if os.getenv('DEBUG_GTEST_OUTPUT_TEST'):
-        open(os.path.join(
-            gtest_test_utils.GetSourceDir(),
-            '_googletest-output-test_normalized_actual.txt'), 'wb').write(
-                normalized_actual)
-        open(os.path.join(
-            gtest_test_utils.GetSourceDir(),
-            '_googletest-output-test_normalized_golden.txt'), 'wb').write(
-                normalized_golden)
-
-      self.assertEqual(normalized_golden, normalized_actual)
-
-
-if __name__ == '__main__':
-  if NO_STACKTRACE_SUPPORT_FLAG in sys.argv:
-    # unittest.main() can't handle unknown flags
-    sys.argv.remove(NO_STACKTRACE_SUPPORT_FLAG)
-
-  if GENGOLDEN_FLAG in sys.argv:
-    if CAN_GENERATE_GOLDEN_FILE:
-      output = GetOutputOfAllCommands()
-      golden_file = open(GOLDEN_PATH, 'wb')
-      golden_file.write(output.encode())
-      golden_file.close()
-    else:
-      message = (
-          """Unable to write a golden file when compiled in an environment
-that does not support all the required features (death tests,
-typed tests, stack traces, and multiple threads).
-Please build this test and generate the golden file using Blaze on Linux.""")
-
-      sys.stderr.write(message)
-      sys.exit(1)
-  else:
-    gtest_test_utils.Main()
diff --git a/3rdparty/googletest-1.13.0/googletest/test/googletest-output-test_.cc b/3rdparty/googletest-1.13.0/googletest/test/googletest-output-test_.cc
deleted file mode 100644
index c2f96d980d263646dba9ee45037e3ed87eaacbe3..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/test/googletest-output-test_.cc
+++ /dev/null
@@ -1,1053 +0,0 @@
-// Copyright 2005, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-// The purpose of this file is to generate Google Test output under
-// various conditions.  The output will then be verified by
-// googletest-output-test.py to ensure that Google Test generates the
-// desired messages.  Therefore, most tests in this file are MEANT TO
-// FAIL.
-
-#include <stdlib.h>
-
-#include "gtest/gtest-spi.h"
-#include "gtest/gtest.h"
-#include "src/gtest-internal-inl.h"
-
-#if _MSC_VER
-GTEST_DISABLE_MSC_WARNINGS_PUSH_(4127 /* conditional expression is constant */)
-#endif  //  _MSC_VER
-
-#if GTEST_IS_THREADSAFE
-using testing::ScopedFakeTestPartResultReporter;
-using testing::TestPartResultArray;
-
-using testing::internal::Notification;
-using testing::internal::ThreadWithParam;
-#endif
-
-namespace posix = ::testing::internal::posix;
-
-// Tests catching fatal failures.
-
-// A subroutine used by the following test.
-void TestEq1(int x) { ASSERT_EQ(1, x); }
-
-// This function calls a test subroutine, catches the fatal failure it
-// generates, and then returns early.
-void TryTestSubroutine() {
-  // Calls a subrountine that yields a fatal failure.
-  TestEq1(2);
-
-  // Catches the fatal failure and aborts the test.
-  //
-  // The testing::Test:: prefix is necessary when calling
-  // HasFatalFailure() outside of a TEST, TEST_F, or test fixture.
-  if (testing::Test::HasFatalFailure()) return;
-
-  // If we get here, something is wrong.
-  FAIL() << "This should never be reached.";
-}
-
-TEST(PassingTest, PassingTest1) {}
-
-TEST(PassingTest, PassingTest2) {}
-
-// Tests that parameters of failing parameterized tests are printed in the
-// failing test summary.
-class FailingParamTest : public testing::TestWithParam<int> {};
-
-TEST_P(FailingParamTest, Fails) { EXPECT_EQ(1, GetParam()); }
-
-// This generates a test which will fail. Google Test is expected to print
-// its parameter when it outputs the list of all failed tests.
-INSTANTIATE_TEST_SUITE_P(PrintingFailingParams, FailingParamTest,
-                         testing::Values(2));
-
-// Tests that an empty value for the test suite basename yields just
-// the test name without any prior /
-class EmptyBasenameParamInst : public testing::TestWithParam<int> {};
-
-TEST_P(EmptyBasenameParamInst, Passes) { EXPECT_EQ(1, GetParam()); }
-
-INSTANTIATE_TEST_SUITE_P(, EmptyBasenameParamInst, testing::Values(1));
-
-static const char kGoldenString[] = "\"Line\0 1\"\nLine 2";
-
-TEST(NonfatalFailureTest, EscapesStringOperands) {
-  std::string actual = "actual \"string\"";
-  EXPECT_EQ(kGoldenString, actual);
-
-  const char* golden = kGoldenString;
-  EXPECT_EQ(golden, actual);
-}
-
-TEST(NonfatalFailureTest, DiffForLongStrings) {
-  std::string golden_str(kGoldenString, sizeof(kGoldenString) - 1);
-  EXPECT_EQ(golden_str, "Line 2");
-}
-
-// Tests catching a fatal failure in a subroutine.
-TEST(FatalFailureTest, FatalFailureInSubroutine) {
-  printf("(expecting a failure that x should be 1)\n");
-
-  TryTestSubroutine();
-}
-
-// Tests catching a fatal failure in a nested subroutine.
-TEST(FatalFailureTest, FatalFailureInNestedSubroutine) {
-  printf("(expecting a failure that x should be 1)\n");
-
-  // Calls a subrountine that yields a fatal failure.
-  TryTestSubroutine();
-
-  // Catches the fatal failure and aborts the test.
-  //
-  // When calling HasFatalFailure() inside a TEST, TEST_F, or test
-  // fixture, the testing::Test:: prefix is not needed.
-  if (HasFatalFailure()) return;
-
-  // If we get here, something is wrong.
-  FAIL() << "This should never be reached.";
-}
-
-// Tests HasFatalFailure() after a failed EXPECT check.
-TEST(FatalFailureTest, NonfatalFailureInSubroutine) {
-  printf("(expecting a failure on false)\n");
-  EXPECT_TRUE(false);               // Generates a nonfatal failure
-  ASSERT_FALSE(HasFatalFailure());  // This should succeed.
-}
-
-// Tests interleaving user logging and Google Test assertions.
-TEST(LoggingTest, InterleavingLoggingAndAssertions) {
-  static const int a[4] = {3, 9, 2, 6};
-
-  printf("(expecting 2 failures on (3) >= (a[i]))\n");
-  for (int i = 0; i < static_cast<int>(sizeof(a) / sizeof(*a)); i++) {
-    printf("i == %d\n", i);
-    EXPECT_GE(3, a[i]);
-  }
-}
-
-// Tests the SCOPED_TRACE macro.
-
-// A helper function for testing SCOPED_TRACE.
-void SubWithoutTrace(int n) {
-  EXPECT_EQ(1, n);
-  ASSERT_EQ(2, n);
-}
-
-// Another helper function for testing SCOPED_TRACE.
-void SubWithTrace(int n) {
-  SCOPED_TRACE(testing::Message() << "n = " << n);
-
-  SubWithoutTrace(n);
-}
-
-TEST(SCOPED_TRACETest, AcceptedValues) {
-  SCOPED_TRACE("literal string");
-  SCOPED_TRACE(std::string("std::string"));
-  SCOPED_TRACE(1337);  // streamable type
-  const char* null_value = nullptr;
-  SCOPED_TRACE(null_value);
-
-  ADD_FAILURE() << "Just checking that all these values work fine.";
-}
-
-// Tests that SCOPED_TRACE() obeys lexical scopes.
-TEST(SCOPED_TRACETest, ObeysScopes) {
-  printf("(expected to fail)\n");
-
-  // There should be no trace before SCOPED_TRACE() is invoked.
-  ADD_FAILURE() << "This failure is expected, and shouldn't have a trace.";
-
-  {
-    SCOPED_TRACE("Expected trace");
-    // After SCOPED_TRACE(), a failure in the current scope should contain
-    // the trace.
-    ADD_FAILURE() << "This failure is expected, and should have a trace.";
-  }
-
-  // Once the control leaves the scope of the SCOPED_TRACE(), there
-  // should be no trace again.
-  ADD_FAILURE() << "This failure is expected, and shouldn't have a trace.";
-}
-
-// Tests that SCOPED_TRACE works inside a loop.
-TEST(SCOPED_TRACETest, WorksInLoop) {
-  printf("(expected to fail)\n");
-
-  for (int i = 1; i <= 2; i++) {
-    SCOPED_TRACE(testing::Message() << "i = " << i);
-
-    SubWithoutTrace(i);
-  }
-}
-
-// Tests that SCOPED_TRACE works in a subroutine.
-TEST(SCOPED_TRACETest, WorksInSubroutine) {
-  printf("(expected to fail)\n");
-
-  SubWithTrace(1);
-  SubWithTrace(2);
-}
-
-// Tests that SCOPED_TRACE can be nested.
-TEST(SCOPED_TRACETest, CanBeNested) {
-  printf("(expected to fail)\n");
-
-  SCOPED_TRACE("");  // A trace without a message.
-
-  SubWithTrace(2);
-}
-
-// Tests that multiple SCOPED_TRACEs can be used in the same scope.
-TEST(SCOPED_TRACETest, CanBeRepeated) {
-  printf("(expected to fail)\n");
-
-  SCOPED_TRACE("A");
-  ADD_FAILURE()
-      << "This failure is expected, and should contain trace point A.";
-
-  SCOPED_TRACE("B");
-  ADD_FAILURE()
-      << "This failure is expected, and should contain trace point A and B.";
-
-  {
-    SCOPED_TRACE("C");
-    ADD_FAILURE() << "This failure is expected, and should "
-                  << "contain trace point A, B, and C.";
-  }
-
-  SCOPED_TRACE("D");
-  ADD_FAILURE() << "This failure is expected, and should "
-                << "contain trace point A, B, and D.";
-}
-
-#if GTEST_IS_THREADSAFE
-// Tests that SCOPED_TRACE()s can be used concurrently from multiple
-// threads.  Namely, an assertion should be affected by
-// SCOPED_TRACE()s in its own thread only.
-
-// Here's the sequence of actions that happen in the test:
-//
-//   Thread A (main)                | Thread B (spawned)
-//   ===============================|================================
-//   spawns thread B                |
-//   -------------------------------+--------------------------------
-//   waits for n1                   | SCOPED_TRACE("Trace B");
-//                                  | generates failure #1
-//                                  | notifies n1
-//   -------------------------------+--------------------------------
-//   SCOPED_TRACE("Trace A");       | waits for n2
-//   generates failure #2           |
-//   notifies n2                    |
-//   -------------------------------|--------------------------------
-//   waits for n3                   | generates failure #3
-//                                  | trace B dies
-//                                  | generates failure #4
-//                                  | notifies n3
-//   -------------------------------|--------------------------------
-//   generates failure #5           | finishes
-//   trace A dies                   |
-//   generates failure #6           |
-//   -------------------------------|--------------------------------
-//   waits for thread B to finish   |
-
-struct CheckPoints {
-  Notification n1;
-  Notification n2;
-  Notification n3;
-};
-
-static void ThreadWithScopedTrace(CheckPoints* check_points) {
-  {
-    SCOPED_TRACE("Trace B");
-    ADD_FAILURE() << "Expected failure #1 (in thread B, only trace B alive).";
-    check_points->n1.Notify();
-    check_points->n2.WaitForNotification();
-
-    ADD_FAILURE()
-        << "Expected failure #3 (in thread B, trace A & B both alive).";
-  }  // Trace B dies here.
-  ADD_FAILURE() << "Expected failure #4 (in thread B, only trace A alive).";
-  check_points->n3.Notify();
-}
-
-TEST(SCOPED_TRACETest, WorksConcurrently) {
-  printf("(expecting 6 failures)\n");
-
-  CheckPoints check_points;
-  ThreadWithParam<CheckPoints*> thread(&ThreadWithScopedTrace, &check_points,
-                                       nullptr);
-  check_points.n1.WaitForNotification();
-
-  {
-    SCOPED_TRACE("Trace A");
-    ADD_FAILURE()
-        << "Expected failure #2 (in thread A, trace A & B both alive).";
-    check_points.n2.Notify();
-    check_points.n3.WaitForNotification();
-
-    ADD_FAILURE() << "Expected failure #5 (in thread A, only trace A alive).";
-  }  // Trace A dies here.
-  ADD_FAILURE() << "Expected failure #6 (in thread A, no trace alive).";
-  thread.Join();
-}
-#endif  // GTEST_IS_THREADSAFE
-
-// Tests basic functionality of the ScopedTrace utility (most of its features
-// are already tested in SCOPED_TRACETest).
-TEST(ScopedTraceTest, WithExplicitFileAndLine) {
-  testing::ScopedTrace trace("explicit_file.cc", 123, "expected trace message");
-  ADD_FAILURE() << "Check that the trace is attached to a particular location.";
-}
-
-TEST(DisabledTestsWarningTest,
-     DISABLED_AlsoRunDisabledTestsFlagSuppressesWarning) {
-  // This test body is intentionally empty.  Its sole purpose is for
-  // verifying that the --gtest_also_run_disabled_tests flag
-  // suppresses the "YOU HAVE 12 DISABLED TESTS" warning at the end of
-  // the test output.
-}
-
-// Tests using assertions outside of TEST and TEST_F.
-//
-// This function creates two failures intentionally.
-void AdHocTest() {
-  printf("The non-test part of the code is expected to have 2 failures.\n\n");
-  EXPECT_TRUE(false);
-  EXPECT_EQ(2, 3);
-}
-
-// Runs all TESTs, all TEST_Fs, and the ad hoc test.
-int RunAllTests() {
-  AdHocTest();
-  return RUN_ALL_TESTS();
-}
-
-// Tests non-fatal failures in the fixture constructor.
-class NonFatalFailureInFixtureConstructorTest : public testing::Test {
- protected:
-  NonFatalFailureInFixtureConstructorTest() {
-    printf("(expecting 5 failures)\n");
-    ADD_FAILURE() << "Expected failure #1, in the test fixture c'tor.";
-  }
-
-  ~NonFatalFailureInFixtureConstructorTest() override {
-    ADD_FAILURE() << "Expected failure #5, in the test fixture d'tor.";
-  }
-
-  void SetUp() override { ADD_FAILURE() << "Expected failure #2, in SetUp()."; }
-
-  void TearDown() override {
-    ADD_FAILURE() << "Expected failure #4, in TearDown.";
-  }
-};
-
-TEST_F(NonFatalFailureInFixtureConstructorTest, FailureInConstructor) {
-  ADD_FAILURE() << "Expected failure #3, in the test body.";
-}
-
-// Tests fatal failures in the fixture constructor.
-class FatalFailureInFixtureConstructorTest : public testing::Test {
- protected:
-  FatalFailureInFixtureConstructorTest() {
-    printf("(expecting 2 failures)\n");
-    Init();
-  }
-
-  ~FatalFailureInFixtureConstructorTest() override {
-    ADD_FAILURE() << "Expected failure #2, in the test fixture d'tor.";
-  }
-
-  void SetUp() override {
-    ADD_FAILURE() << "UNEXPECTED failure in SetUp().  "
-                  << "We should never get here, as the test fixture c'tor "
-                  << "had a fatal failure.";
-  }
-
-  void TearDown() override {
-    ADD_FAILURE() << "UNEXPECTED failure in TearDown().  "
-                  << "We should never get here, as the test fixture c'tor "
-                  << "had a fatal failure.";
-  }
-
- private:
-  void Init() { FAIL() << "Expected failure #1, in the test fixture c'tor."; }
-};
-
-TEST_F(FatalFailureInFixtureConstructorTest, FailureInConstructor) {
-  ADD_FAILURE() << "UNEXPECTED failure in the test body.  "
-                << "We should never get here, as the test fixture c'tor "
-                << "had a fatal failure.";
-}
-
-// Tests non-fatal failures in SetUp().
-class NonFatalFailureInSetUpTest : public testing::Test {
- protected:
-  ~NonFatalFailureInSetUpTest() override { Deinit(); }
-
-  void SetUp() override {
-    printf("(expecting 4 failures)\n");
-    ADD_FAILURE() << "Expected failure #1, in SetUp().";
-  }
-
-  void TearDown() override { FAIL() << "Expected failure #3, in TearDown()."; }
-
- private:
-  void Deinit() { FAIL() << "Expected failure #4, in the test fixture d'tor."; }
-};
-
-TEST_F(NonFatalFailureInSetUpTest, FailureInSetUp) {
-  FAIL() << "Expected failure #2, in the test function.";
-}
-
-// Tests fatal failures in SetUp().
-class FatalFailureInSetUpTest : public testing::Test {
- protected:
-  ~FatalFailureInSetUpTest() override { Deinit(); }
-
-  void SetUp() override {
-    printf("(expecting 3 failures)\n");
-    FAIL() << "Expected failure #1, in SetUp().";
-  }
-
-  void TearDown() override { FAIL() << "Expected failure #2, in TearDown()."; }
-
- private:
-  void Deinit() { FAIL() << "Expected failure #3, in the test fixture d'tor."; }
-};
-
-TEST_F(FatalFailureInSetUpTest, FailureInSetUp) {
-  FAIL() << "UNEXPECTED failure in the test function.  "
-         << "We should never get here, as SetUp() failed.";
-}
-
-TEST(AddFailureAtTest, MessageContainsSpecifiedFileAndLineNumber) {
-  ADD_FAILURE_AT("foo.cc", 42) << "Expected nonfatal failure in foo.cc";
-}
-
-TEST(GtestFailAtTest, MessageContainsSpecifiedFileAndLineNumber) {
-  GTEST_FAIL_AT("foo.cc", 42) << "Expected fatal failure in foo.cc";
-}
-
-// The MixedUpTestSuiteTest test case verifies that Google Test will fail a
-// test if it uses a different fixture class than what other tests in
-// the same test case use.  It deliberately contains two fixture
-// classes with the same name but defined in different namespaces.
-
-// The MixedUpTestSuiteWithSameTestNameTest test case verifies that
-// when the user defines two tests with the same test case name AND
-// same test name (but in different namespaces), the second test will
-// fail.
-
-namespace foo {
-
-class MixedUpTestSuiteTest : public testing::Test {};
-
-TEST_F(MixedUpTestSuiteTest, FirstTestFromNamespaceFoo) {}
-TEST_F(MixedUpTestSuiteTest, SecondTestFromNamespaceFoo) {}
-
-class MixedUpTestSuiteWithSameTestNameTest : public testing::Test {};
-
-TEST_F(MixedUpTestSuiteWithSameTestNameTest,
-       TheSecondTestWithThisNameShouldFail) {}
-
-}  // namespace foo
-
-namespace bar {
-
-class MixedUpTestSuiteTest : public testing::Test {};
-
-// The following two tests are expected to fail.  We rely on the
-// golden file to check that Google Test generates the right error message.
-TEST_F(MixedUpTestSuiteTest, ThisShouldFail) {}
-TEST_F(MixedUpTestSuiteTest, ThisShouldFailToo) {}
-
-class MixedUpTestSuiteWithSameTestNameTest : public testing::Test {};
-
-// Expected to fail.  We rely on the golden file to check that Google Test
-// generates the right error message.
-TEST_F(MixedUpTestSuiteWithSameTestNameTest,
-       TheSecondTestWithThisNameShouldFail) {}
-
-}  // namespace bar
-
-// The following two test cases verify that Google Test catches the user
-// error of mixing TEST and TEST_F in the same test case.  The first
-// test case checks the scenario where TEST_F appears before TEST, and
-// the second one checks where TEST appears before TEST_F.
-
-class TEST_F_before_TEST_in_same_test_case : public testing::Test {};
-
-TEST_F(TEST_F_before_TEST_in_same_test_case, DefinedUsingTEST_F) {}
-
-// Expected to fail.  We rely on the golden file to check that Google Test
-// generates the right error message.
-TEST(TEST_F_before_TEST_in_same_test_case, DefinedUsingTESTAndShouldFail) {}
-
-class TEST_before_TEST_F_in_same_test_case : public testing::Test {};
-
-TEST(TEST_before_TEST_F_in_same_test_case, DefinedUsingTEST) {}
-
-// Expected to fail.  We rely on the golden file to check that Google Test
-// generates the right error message.
-TEST_F(TEST_before_TEST_F_in_same_test_case, DefinedUsingTEST_FAndShouldFail) {}
-
-// Used for testing EXPECT_NONFATAL_FAILURE() and EXPECT_FATAL_FAILURE().
-int global_integer = 0;
-
-// Tests that EXPECT_NONFATAL_FAILURE() can reference global variables.
-TEST(ExpectNonfatalFailureTest, CanReferenceGlobalVariables) {
-  global_integer = 0;
-  EXPECT_NONFATAL_FAILURE(
-      { EXPECT_EQ(1, global_integer) << "Expected non-fatal failure."; },
-      "Expected non-fatal failure.");
-}
-
-// Tests that EXPECT_NONFATAL_FAILURE() can reference local variables
-// (static or not).
-TEST(ExpectNonfatalFailureTest, CanReferenceLocalVariables) {
-  int m = 0;
-  static int n;
-  n = 1;
-  EXPECT_NONFATAL_FAILURE({ EXPECT_EQ(m, n) << "Expected non-fatal failure."; },
-                          "Expected non-fatal failure.");
-}
-
-// Tests that EXPECT_NONFATAL_FAILURE() succeeds when there is exactly
-// one non-fatal failure and no fatal failure.
-TEST(ExpectNonfatalFailureTest, SucceedsWhenThereIsOneNonfatalFailure) {
-  EXPECT_NONFATAL_FAILURE({ ADD_FAILURE() << "Expected non-fatal failure."; },
-                          "Expected non-fatal failure.");
-}
-
-// Tests that EXPECT_NONFATAL_FAILURE() fails when there is no
-// non-fatal failure.
-TEST(ExpectNonfatalFailureTest, FailsWhenThereIsNoNonfatalFailure) {
-  printf("(expecting a failure)\n");
-  EXPECT_NONFATAL_FAILURE({}, "");
-}
-
-// Tests that EXPECT_NONFATAL_FAILURE() fails when there are two
-// non-fatal failures.
-TEST(ExpectNonfatalFailureTest, FailsWhenThereAreTwoNonfatalFailures) {
-  printf("(expecting a failure)\n");
-  EXPECT_NONFATAL_FAILURE(
-      {
-        ADD_FAILURE() << "Expected non-fatal failure 1.";
-        ADD_FAILURE() << "Expected non-fatal failure 2.";
-      },
-      "");
-}
-
-// Tests that EXPECT_NONFATAL_FAILURE() fails when there is one fatal
-// failure.
-TEST(ExpectNonfatalFailureTest, FailsWhenThereIsOneFatalFailure) {
-  printf("(expecting a failure)\n");
-  EXPECT_NONFATAL_FAILURE({ FAIL() << "Expected fatal failure."; }, "");
-}
-
-// Tests that EXPECT_NONFATAL_FAILURE() fails when the statement being
-// tested returns.
-TEST(ExpectNonfatalFailureTest, FailsWhenStatementReturns) {
-  printf("(expecting a failure)\n");
-  EXPECT_NONFATAL_FAILURE({ return; }, "");
-}
-
-#if GTEST_HAS_EXCEPTIONS
-
-// Tests that EXPECT_NONFATAL_FAILURE() fails when the statement being
-// tested throws.
-TEST(ExpectNonfatalFailureTest, FailsWhenStatementThrows) {
-  printf("(expecting a failure)\n");
-  try {
-    EXPECT_NONFATAL_FAILURE({ throw 0; }, "");
-  } catch (int) {  // NOLINT
-  }
-}
-
-#endif  // GTEST_HAS_EXCEPTIONS
-
-// Tests that EXPECT_FATAL_FAILURE() can reference global variables.
-TEST(ExpectFatalFailureTest, CanReferenceGlobalVariables) {
-  global_integer = 0;
-  EXPECT_FATAL_FAILURE(
-      { ASSERT_EQ(1, global_integer) << "Expected fatal failure."; },
-      "Expected fatal failure.");
-}
-
-// Tests that EXPECT_FATAL_FAILURE() can reference local static
-// variables.
-TEST(ExpectFatalFailureTest, CanReferenceLocalStaticVariables) {
-  static int n;
-  n = 1;
-  EXPECT_FATAL_FAILURE({ ASSERT_EQ(0, n) << "Expected fatal failure."; },
-                       "Expected fatal failure.");
-}
-
-// Tests that EXPECT_FATAL_FAILURE() succeeds when there is exactly
-// one fatal failure and no non-fatal failure.
-TEST(ExpectFatalFailureTest, SucceedsWhenThereIsOneFatalFailure) {
-  EXPECT_FATAL_FAILURE({ FAIL() << "Expected fatal failure."; },
-                       "Expected fatal failure.");
-}
-
-// Tests that EXPECT_FATAL_FAILURE() fails when there is no fatal
-// failure.
-TEST(ExpectFatalFailureTest, FailsWhenThereIsNoFatalFailure) {
-  printf("(expecting a failure)\n");
-  EXPECT_FATAL_FAILURE({}, "");
-}
-
-// A helper for generating a fatal failure.
-void FatalFailure() { FAIL() << "Expected fatal failure."; }
-
-// Tests that EXPECT_FATAL_FAILURE() fails when there are two
-// fatal failures.
-TEST(ExpectFatalFailureTest, FailsWhenThereAreTwoFatalFailures) {
-  printf("(expecting a failure)\n");
-  EXPECT_FATAL_FAILURE(
-      {
-        FatalFailure();
-        FatalFailure();
-      },
-      "");
-}
-
-// Tests that EXPECT_FATAL_FAILURE() fails when there is one non-fatal
-// failure.
-TEST(ExpectFatalFailureTest, FailsWhenThereIsOneNonfatalFailure) {
-  printf("(expecting a failure)\n");
-  EXPECT_FATAL_FAILURE({ ADD_FAILURE() << "Expected non-fatal failure."; }, "");
-}
-
-// Tests that EXPECT_FATAL_FAILURE() fails when the statement being
-// tested returns.
-TEST(ExpectFatalFailureTest, FailsWhenStatementReturns) {
-  printf("(expecting a failure)\n");
-  EXPECT_FATAL_FAILURE({ return; }, "");
-}
-
-#if GTEST_HAS_EXCEPTIONS
-
-// Tests that EXPECT_FATAL_FAILURE() fails when the statement being
-// tested throws.
-TEST(ExpectFatalFailureTest, FailsWhenStatementThrows) {
-  printf("(expecting a failure)\n");
-  try {
-    EXPECT_FATAL_FAILURE({ throw 0; }, "");
-  } catch (int) {  // NOLINT
-  }
-}
-
-#endif  // GTEST_HAS_EXCEPTIONS
-
-// This #ifdef block tests the output of value-parameterized tests.
-
-std::string ParamNameFunc(const testing::TestParamInfo<std::string>& info) {
-  return info.param;
-}
-
-class ParamTest : public testing::TestWithParam<std::string> {};
-
-TEST_P(ParamTest, Success) { EXPECT_EQ("a", GetParam()); }
-
-TEST_P(ParamTest, Failure) { EXPECT_EQ("b", GetParam()) << "Expected failure"; }
-
-INSTANTIATE_TEST_SUITE_P(PrintingStrings, ParamTest,
-                         testing::Values(std::string("a")), ParamNameFunc);
-
-// The case where a suite has INSTANTIATE_TEST_SUITE_P but not TEST_P.
-using NoTests = ParamTest;
-INSTANTIATE_TEST_SUITE_P(ThisIsOdd, NoTests, ::testing::Values("Hello"));
-
-// fails under kErrorOnUninstantiatedParameterizedTest=true
-class DetectNotInstantiatedTest : public testing::TestWithParam<int> {};
-TEST_P(DetectNotInstantiatedTest, Used) {}
-
-// This would make the test failure from the above go away.
-// INSTANTIATE_TEST_SUITE_P(Fix, DetectNotInstantiatedTest, testing::Values(1));
-
-template <typename T>
-class TypedTest : public testing::Test {};
-
-TYPED_TEST_SUITE(TypedTest, testing::Types<int>);
-
-TYPED_TEST(TypedTest, Success) { EXPECT_EQ(0, TypeParam()); }
-
-TYPED_TEST(TypedTest, Failure) {
-  EXPECT_EQ(1, TypeParam()) << "Expected failure";
-}
-
-typedef testing::Types<char, int> TypesForTestWithNames;
-
-template <typename T>
-class TypedTestWithNames : public testing::Test {};
-
-class TypedTestNames {
- public:
-  template <typename T>
-  static std::string GetName(int i) {
-    if (std::is_same<T, char>::value)
-      return std::string("char") + ::testing::PrintToString(i);
-    if (std::is_same<T, int>::value)
-      return std::string("int") + ::testing::PrintToString(i);
-  }
-};
-
-TYPED_TEST_SUITE(TypedTestWithNames, TypesForTestWithNames, TypedTestNames);
-
-TYPED_TEST(TypedTestWithNames, Success) {}
-
-TYPED_TEST(TypedTestWithNames, Failure) { FAIL(); }
-
-template <typename T>
-class TypedTestP : public testing::Test {};
-
-TYPED_TEST_SUITE_P(TypedTestP);
-
-TYPED_TEST_P(TypedTestP, Success) { EXPECT_EQ(0U, TypeParam()); }
-
-TYPED_TEST_P(TypedTestP, Failure) {
-  EXPECT_EQ(1U, TypeParam()) << "Expected failure";
-}
-
-REGISTER_TYPED_TEST_SUITE_P(TypedTestP, Success, Failure);
-
-typedef testing::Types<unsigned char, unsigned int> UnsignedTypes;
-INSTANTIATE_TYPED_TEST_SUITE_P(Unsigned, TypedTestP, UnsignedTypes);
-
-class TypedTestPNames {
- public:
-  template <typename T>
-  static std::string GetName(int i) {
-    if (std::is_same<T, unsigned char>::value) {
-      return std::string("unsignedChar") + ::testing::PrintToString(i);
-    }
-    if (std::is_same<T, unsigned int>::value) {
-      return std::string("unsignedInt") + ::testing::PrintToString(i);
-    }
-  }
-};
-
-INSTANTIATE_TYPED_TEST_SUITE_P(UnsignedCustomName, TypedTestP, UnsignedTypes,
-                               TypedTestPNames);
-
-template <typename T>
-class DetectNotInstantiatedTypesTest : public testing::Test {};
-TYPED_TEST_SUITE_P(DetectNotInstantiatedTypesTest);
-TYPED_TEST_P(DetectNotInstantiatedTypesTest, Used) {
-  TypeParam instantiate;
-  (void)instantiate;
-}
-REGISTER_TYPED_TEST_SUITE_P(DetectNotInstantiatedTypesTest, Used);
-
-// kErrorOnUninstantiatedTypeParameterizedTest=true would make the above fail.
-// Adding the following would make that test failure go away.
-//
-// typedef ::testing::Types<char, int, unsigned int> MyTypes;
-// INSTANTIATE_TYPED_TEST_SUITE_P(All, DetectNotInstantiatedTypesTest, MyTypes);
-
-#if GTEST_HAS_DEATH_TEST
-
-// We rely on the golden file to verify that tests whose test case
-// name ends with DeathTest are run first.
-
-TEST(ADeathTest, ShouldRunFirst) {}
-
-// We rely on the golden file to verify that typed tests whose test
-// case name ends with DeathTest are run first.
-
-template <typename T>
-class ATypedDeathTest : public testing::Test {};
-
-typedef testing::Types<int, double> NumericTypes;
-TYPED_TEST_SUITE(ATypedDeathTest, NumericTypes);
-
-TYPED_TEST(ATypedDeathTest, ShouldRunFirst) {}
-
-// We rely on the golden file to verify that type-parameterized tests
-// whose test case name ends with DeathTest are run first.
-
-template <typename T>
-class ATypeParamDeathTest : public testing::Test {};
-
-TYPED_TEST_SUITE_P(ATypeParamDeathTest);
-
-TYPED_TEST_P(ATypeParamDeathTest, ShouldRunFirst) {}
-
-REGISTER_TYPED_TEST_SUITE_P(ATypeParamDeathTest, ShouldRunFirst);
-
-INSTANTIATE_TYPED_TEST_SUITE_P(My, ATypeParamDeathTest, NumericTypes);
-
-#endif  // GTEST_HAS_DEATH_TEST
-
-// Tests various failure conditions of
-// EXPECT_{,NON}FATAL_FAILURE{,_ON_ALL_THREADS}.
-class ExpectFailureTest : public testing::Test {
- public:  // Must be public and not protected due to a bug in g++ 3.4.2.
-  enum FailureMode { FATAL_FAILURE, NONFATAL_FAILURE };
-  static void AddFailure(FailureMode failure) {
-    if (failure == FATAL_FAILURE) {
-      FAIL() << "Expected fatal failure.";
-    } else {
-      ADD_FAILURE() << "Expected non-fatal failure.";
-    }
-  }
-};
-
-TEST_F(ExpectFailureTest, ExpectFatalFailure) {
-  // Expected fatal failure, but succeeds.
-  printf("(expecting 1 failure)\n");
-  EXPECT_FATAL_FAILURE(SUCCEED(), "Expected fatal failure.");
-  // Expected fatal failure, but got a non-fatal failure.
-  printf("(expecting 1 failure)\n");
-  EXPECT_FATAL_FAILURE(AddFailure(NONFATAL_FAILURE),
-                       "Expected non-fatal "
-                       "failure.");
-  // Wrong message.
-  printf("(expecting 1 failure)\n");
-  EXPECT_FATAL_FAILURE(AddFailure(FATAL_FAILURE),
-                       "Some other fatal failure "
-                       "expected.");
-}
-
-TEST_F(ExpectFailureTest, ExpectNonFatalFailure) {
-  // Expected non-fatal failure, but succeeds.
-  printf("(expecting 1 failure)\n");
-  EXPECT_NONFATAL_FAILURE(SUCCEED(), "Expected non-fatal failure.");
-  // Expected non-fatal failure, but got a fatal failure.
-  printf("(expecting 1 failure)\n");
-  EXPECT_NONFATAL_FAILURE(AddFailure(FATAL_FAILURE), "Expected fatal failure.");
-  // Wrong message.
-  printf("(expecting 1 failure)\n");
-  EXPECT_NONFATAL_FAILURE(AddFailure(NONFATAL_FAILURE),
-                          "Some other non-fatal "
-                          "failure.");
-}
-
-#if GTEST_IS_THREADSAFE
-
-class ExpectFailureWithThreadsTest : public ExpectFailureTest {
- protected:
-  static void AddFailureInOtherThread(FailureMode failure) {
-    ThreadWithParam<FailureMode> thread(&AddFailure, failure, nullptr);
-    thread.Join();
-  }
-};
-
-TEST_F(ExpectFailureWithThreadsTest, ExpectFatalFailure) {
-  // We only intercept the current thread.
-  printf("(expecting 2 failures)\n");
-  EXPECT_FATAL_FAILURE(AddFailureInOtherThread(FATAL_FAILURE),
-                       "Expected fatal failure.");
-}
-
-TEST_F(ExpectFailureWithThreadsTest, ExpectNonFatalFailure) {
-  // We only intercept the current thread.
-  printf("(expecting 2 failures)\n");
-  EXPECT_NONFATAL_FAILURE(AddFailureInOtherThread(NONFATAL_FAILURE),
-                          "Expected non-fatal failure.");
-}
-
-typedef ExpectFailureWithThreadsTest ScopedFakeTestPartResultReporterTest;
-
-// Tests that the ScopedFakeTestPartResultReporter only catches failures from
-// the current thread if it is instantiated with INTERCEPT_ONLY_CURRENT_THREAD.
-TEST_F(ScopedFakeTestPartResultReporterTest, InterceptOnlyCurrentThread) {
-  printf("(expecting 2 failures)\n");
-  TestPartResultArray results;
-  {
-    ScopedFakeTestPartResultReporter reporter(
-        ScopedFakeTestPartResultReporter::INTERCEPT_ONLY_CURRENT_THREAD,
-        &results);
-    AddFailureInOtherThread(FATAL_FAILURE);
-    AddFailureInOtherThread(NONFATAL_FAILURE);
-  }
-  // The two failures should not have been intercepted.
-  EXPECT_EQ(0, results.size()) << "This shouldn't fail.";
-}
-
-#endif  // GTEST_IS_THREADSAFE
-
-TEST_F(ExpectFailureTest, ExpectFatalFailureOnAllThreads) {
-  // Expected fatal failure, but succeeds.
-  printf("(expecting 1 failure)\n");
-  EXPECT_FATAL_FAILURE_ON_ALL_THREADS(SUCCEED(), "Expected fatal failure.");
-  // Expected fatal failure, but got a non-fatal failure.
-  printf("(expecting 1 failure)\n");
-  EXPECT_FATAL_FAILURE_ON_ALL_THREADS(AddFailure(NONFATAL_FAILURE),
-                                      "Expected non-fatal failure.");
-  // Wrong message.
-  printf("(expecting 1 failure)\n");
-  EXPECT_FATAL_FAILURE_ON_ALL_THREADS(AddFailure(FATAL_FAILURE),
-                                      "Some other fatal failure expected.");
-}
-
-TEST_F(ExpectFailureTest, ExpectNonFatalFailureOnAllThreads) {
-  // Expected non-fatal failure, but succeeds.
-  printf("(expecting 1 failure)\n");
-  EXPECT_NONFATAL_FAILURE_ON_ALL_THREADS(SUCCEED(),
-                                         "Expected non-fatal "
-                                         "failure.");
-  // Expected non-fatal failure, but got a fatal failure.
-  printf("(expecting 1 failure)\n");
-  EXPECT_NONFATAL_FAILURE_ON_ALL_THREADS(AddFailure(FATAL_FAILURE),
-                                         "Expected fatal failure.");
-  // Wrong message.
-  printf("(expecting 1 failure)\n");
-  EXPECT_NONFATAL_FAILURE_ON_ALL_THREADS(AddFailure(NONFATAL_FAILURE),
-                                         "Some other non-fatal failure.");
-}
-
-class DynamicFixture : public testing::Test {
- protected:
-  DynamicFixture() { printf("DynamicFixture()\n"); }
-  ~DynamicFixture() override { printf("~DynamicFixture()\n"); }
-  void SetUp() override { printf("DynamicFixture::SetUp\n"); }
-  void TearDown() override { printf("DynamicFixture::TearDown\n"); }
-
-  static void SetUpTestSuite() { printf("DynamicFixture::SetUpTestSuite\n"); }
-  static void TearDownTestSuite() {
-    printf("DynamicFixture::TearDownTestSuite\n");
-  }
-};
-
-template <bool Pass>
-class DynamicTest : public DynamicFixture {
- public:
-  void TestBody() override { EXPECT_TRUE(Pass); }
-};
-
-auto dynamic_test = (
-    // Register two tests with the same fixture correctly.
-    testing::RegisterTest(
-        "DynamicFixture", "DynamicTestPass", nullptr, nullptr, __FILE__,
-        __LINE__, []() -> DynamicFixture* { return new DynamicTest<true>; }),
-    testing::RegisterTest(
-        "DynamicFixture", "DynamicTestFail", nullptr, nullptr, __FILE__,
-        __LINE__, []() -> DynamicFixture* { return new DynamicTest<false>; }),
-
-    // Register the same fixture with another name. That's fine.
-    testing::RegisterTest(
-        "DynamicFixtureAnotherName", "DynamicTestPass", nullptr, nullptr,
-        __FILE__, __LINE__,
-        []() -> DynamicFixture* { return new DynamicTest<true>; }),
-
-    // Register two tests with the same fixture incorrectly.
-    testing::RegisterTest(
-        "BadDynamicFixture1", "FixtureBase", nullptr, nullptr, __FILE__,
-        __LINE__, []() -> DynamicFixture* { return new DynamicTest<true>; }),
-    testing::RegisterTest(
-        "BadDynamicFixture1", "TestBase", nullptr, nullptr, __FILE__, __LINE__,
-        []() -> testing::Test* { return new DynamicTest<true>; }),
-
-    // Register two tests with the same fixture incorrectly by omitting the
-    // return type.
-    testing::RegisterTest(
-        "BadDynamicFixture2", "FixtureBase", nullptr, nullptr, __FILE__,
-        __LINE__, []() -> DynamicFixture* { return new DynamicTest<true>; }),
-    testing::RegisterTest("BadDynamicFixture2", "Derived", nullptr, nullptr,
-                          __FILE__, __LINE__,
-                          []() { return new DynamicTest<true>; }));
-
-// Two test environments for testing testing::AddGlobalTestEnvironment().
-
-class FooEnvironment : public testing::Environment {
- public:
-  void SetUp() override { printf("%s", "FooEnvironment::SetUp() called.\n"); }
-
-  void TearDown() override {
-    printf("%s", "FooEnvironment::TearDown() called.\n");
-    FAIL() << "Expected fatal failure.";
-  }
-};
-
-class BarEnvironment : public testing::Environment {
- public:
-  void SetUp() override { printf("%s", "BarEnvironment::SetUp() called.\n"); }
-
-  void TearDown() override {
-    printf("%s", "BarEnvironment::TearDown() called.\n");
-    ADD_FAILURE() << "Expected non-fatal failure.";
-  }
-};
-
-class TestSuiteThatFailsToSetUp : public testing::Test {
- public:
-  static void SetUpTestSuite() { EXPECT_TRUE(false); }
-};
-TEST_F(TestSuiteThatFailsToSetUp, ShouldNotRun) { std::abort(); }
-
-// The main function.
-//
-// The idea is to use Google Test to run all the tests we have defined (some
-// of them are intended to fail), and then compare the test results
-// with the "golden" file.
-int main(int argc, char** argv) {
-  GTEST_FLAG_SET(print_time, false);
-
-  // We just run the tests, knowing some of them are intended to fail.
-  // We will use a separate Python script to compare the output of
-  // this program with the golden file.
-
-  // It's hard to test InitGoogleTest() directly, as it has many
-  // global side effects.  The following line serves as a test
-  // for it.
-  testing::InitGoogleTest(&argc, argv);
-  bool internal_skip_environment_and_ad_hoc_tests =
-      std::count(argv, argv + argc,
-                 std::string("internal_skip_environment_and_ad_hoc_tests")) > 0;
-
-#if GTEST_HAS_DEATH_TEST
-  if (GTEST_FLAG_GET(internal_run_death_test) != "") {
-    // Skip the usual output capturing if we're running as the child
-    // process of an threadsafe-style death test.
-#if GTEST_OS_WINDOWS
-    posix::FReopen("nul:", "w", stdout);
-#else
-    posix::FReopen("/dev/null", "w", stdout);
-#endif  // GTEST_OS_WINDOWS
-    return RUN_ALL_TESTS();
-  }
-#endif  // GTEST_HAS_DEATH_TEST
-
-  if (internal_skip_environment_and_ad_hoc_tests) return RUN_ALL_TESTS();
-
-  // Registers two global test environments.
-  // The golden file verifies that they are set up in the order they
-  // are registered, and torn down in the reverse order.
-  testing::AddGlobalTestEnvironment(new FooEnvironment);
-  testing::AddGlobalTestEnvironment(new BarEnvironment);
-#if _MSC_VER
-  GTEST_DISABLE_MSC_WARNINGS_POP_()  //  4127
-#endif                               //  _MSC_VER
-  return RunAllTests();
-}
diff --git a/3rdparty/googletest-1.13.0/googletest/test/googletest-param-test-invalid-name1-test.py b/3rdparty/googletest-1.13.0/googletest/test/googletest-param-test-invalid-name1-test.py
deleted file mode 100644
index b8d609a700c85a283cd30b2c2b3c83623851dab8..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/test/googletest-param-test-invalid-name1-test.py
+++ /dev/null
@@ -1,63 +0,0 @@
-#!/usr/bin/env python
-#
-# Copyright 2015 Google Inc. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met:
-#
-#     * Redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer.
-#     * Redistributions in binary form must reproduce the above
-# copyright notice, this list of conditions and the following disclaimer
-# in the documentation and/or other materials provided with the
-# distribution.
-#     * Neither the name of Google Inc. nor the names of its
-# contributors may be used to endorse or promote products derived from
-# this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-"""Verifies that Google Test warns the user when not initialized properly."""
-
-from googletest.test import gtest_test_utils
-
-binary_name = 'googletest-param-test-invalid-name1-test_'
-COMMAND = gtest_test_utils.GetTestExecutablePath(binary_name)
-
-
-def Assert(condition):
-  if not condition:
-    raise AssertionError
-
-
-def TestExitCodeAndOutput(command):
-  """Runs the given command and verifies its exit code and output."""
-
-  err = ('Parameterized test name \'"InvalidWithQuotes"\' is invalid')
-
-  p = gtest_test_utils.Subprocess(command)
-  Assert(p.terminated_by_signal)
-
-  # Verify the output message contains appropriate output
-  Assert(err in p.output)
-
-
-class GTestParamTestInvalidName1Test(gtest_test_utils.TestCase):
-
-  def testExitCodeAndOutput(self):
-    TestExitCodeAndOutput(COMMAND)
-
-
-if __name__ == '__main__':
-  gtest_test_utils.Main()
diff --git a/3rdparty/googletest-1.13.0/googletest/test/googletest-param-test-invalid-name1-test_.cc b/3rdparty/googletest-1.13.0/googletest/test/googletest-param-test-invalid-name1-test_.cc
deleted file mode 100644
index 004733a12c5007da354da92474e17b17fefd842b..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/test/googletest-param-test-invalid-name1-test_.cc
+++ /dev/null
@@ -1,46 +0,0 @@
-// Copyright 2015, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "gtest/gtest.h"
-
-namespace {
-class DummyTest : public ::testing::TestWithParam<const char *> {};
-
-TEST_P(DummyTest, Dummy) {}
-
-INSTANTIATE_TEST_SUITE_P(InvalidTestName, DummyTest,
-                         ::testing::Values("InvalidWithQuotes"),
-                         ::testing::PrintToStringParamName());
-
-}  // namespace
-
-int main(int argc, char *argv[]) {
-  testing::InitGoogleTest(&argc, argv);
-  return RUN_ALL_TESTS();
-}
diff --git a/3rdparty/googletest-1.13.0/googletest/test/googletest-param-test-invalid-name2-test.py b/3rdparty/googletest-1.13.0/googletest/test/googletest-param-test-invalid-name2-test.py
deleted file mode 100644
index d92fa065ae2312bf9ea63272b22612cf372e3ada..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/test/googletest-param-test-invalid-name2-test.py
+++ /dev/null
@@ -1,62 +0,0 @@
-#!/usr/bin/env python
-#
-# Copyright 2015 Google Inc. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met:
-#
-#     * Redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer.
-#     * Redistributions in binary form must reproduce the above
-# copyright notice, this list of conditions and the following disclaimer
-# in the documentation and/or other materials provided with the
-# distribution.
-#     * Neither the name of Google Inc. nor the names of its
-# contributors may be used to endorse or promote products derived from
-# this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-"""Verifies that Google Test warns the user when not initialized properly."""
-
-from googletest.test import gtest_test_utils
-
-binary_name = 'googletest-param-test-invalid-name2-test_'
-COMMAND = gtest_test_utils.GetTestExecutablePath(binary_name)
-
-
-def Assert(condition):
-  if not condition:
-    raise AssertionError
-
-
-def TestExitCodeAndOutput(command):
-  """Runs the given command and verifies its exit code and output."""
-
-  err = ('Duplicate parameterized test name \'a\'')
-
-  p = gtest_test_utils.Subprocess(command)
-  Assert(p.terminated_by_signal)
-
-  # Check for appropriate output
-  Assert(err in p.output)
-
-
-class GTestParamTestInvalidName2Test(gtest_test_utils.TestCase):
-
-  def testExitCodeAndOutput(self):
-    TestExitCodeAndOutput(COMMAND)
-
-if __name__ == '__main__':
-  gtest_test_utils.Main()
diff --git a/3rdparty/googletest-1.13.0/googletest/test/googletest-param-test-invalid-name2-test_.cc b/3rdparty/googletest-1.13.0/googletest/test/googletest-param-test-invalid-name2-test_.cc
deleted file mode 100644
index d0c44da544594949c10ad632fe132f9aa629e85e..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/test/googletest-param-test-invalid-name2-test_.cc
+++ /dev/null
@@ -1,50 +0,0 @@
-// Copyright 2015, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "gtest/gtest.h"
-
-namespace {
-class DummyTest : public ::testing::TestWithParam<const char *> {};
-
-std::string StringParamTestSuffix(
-    const testing::TestParamInfo<const char *> &info) {
-  return std::string(info.param);
-}
-
-TEST_P(DummyTest, Dummy) {}
-
-INSTANTIATE_TEST_SUITE_P(DuplicateTestNames, DummyTest,
-                         ::testing::Values("a", "b", "a", "c"),
-                         StringParamTestSuffix);
-}  // namespace
-
-int main(int argc, char *argv[]) {
-  testing::InitGoogleTest(&argc, argv);
-  return RUN_ALL_TESTS();
-}
diff --git a/3rdparty/googletest-1.13.0/googletest/test/googletest-param-test-test.cc b/3rdparty/googletest-1.13.0/googletest/test/googletest-param-test-test.cc
deleted file mode 100644
index 63db2860739247ed7c5ccb51a2604171587f45d8..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/test/googletest-param-test-test.cc
+++ /dev/null
@@ -1,1173 +0,0 @@
-// Copyright 2008, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-//
-// Tests for Google Test itself. This file verifies that the parameter
-// generators objects produce correct parameter sequences and that
-// Google Test runtime instantiates correct tests from those sequences.
-
-#include "test/googletest-param-test-test.h"
-
-#include <algorithm>
-#include <iostream>
-#include <list>
-#include <set>
-#include <sstream>
-#include <string>
-#include <tuple>
-#include <vector>
-
-#include "gtest/gtest.h"
-#include "src/gtest-internal-inl.h"  // for UnitTestOptions
-
-using ::std::sort;
-using ::std::vector;
-
-using ::testing::AddGlobalTestEnvironment;
-using ::testing::Bool;
-using ::testing::Combine;
-using ::testing::ConvertGenerator;
-using ::testing::Message;
-using ::testing::Range;
-using ::testing::TestWithParam;
-using ::testing::Values;
-using ::testing::ValuesIn;
-
-using ::testing::internal::ParamGenerator;
-using ::testing::internal::UnitTestOptions;
-
-// Prints a value to a string.
-//
-// FIXME: remove PrintValue() when we move matchers and
-// EXPECT_THAT() from Google Mock to Google Test.  At that time, we
-// can write EXPECT_THAT(x, Eq(y)) to compare two tuples x and y, as
-// EXPECT_THAT() and the matchers know how to print tuples.
-template <typename T>
-::std::string PrintValue(const T& value) {
-  return testing::PrintToString(value);
-}
-
-// Verifies that a sequence generated by the generator and accessed
-// via the iterator object matches the expected one using Google Test
-// assertions.
-template <typename T, size_t N>
-void VerifyGenerator(const ParamGenerator<T>& generator,
-                     const T (&expected_values)[N]) {
-  typename ParamGenerator<T>::iterator it = generator.begin();
-  for (size_t i = 0; i < N; ++i) {
-    ASSERT_FALSE(it == generator.end())
-        << "At element " << i << " when accessing via an iterator "
-        << "created with the copy constructor.\n";
-    // We cannot use EXPECT_EQ() here as the values may be tuples,
-    // which don't support <<.
-    EXPECT_TRUE(expected_values[i] == *it)
-        << "where i is " << i << ", expected_values[i] is "
-        << PrintValue(expected_values[i]) << ", *it is " << PrintValue(*it)
-        << ", and 'it' is an iterator created with the copy constructor.\n";
-    ++it;
-  }
-  EXPECT_TRUE(it == generator.end())
-      << "At the presumed end of sequence when accessing via an iterator "
-      << "created with the copy constructor.\n";
-
-  // Test the iterator assignment. The following lines verify that
-  // the sequence accessed via an iterator initialized via the
-  // assignment operator (as opposed to a copy constructor) matches
-  // just the same.
-  it = generator.begin();
-  for (size_t i = 0; i < N; ++i) {
-    ASSERT_FALSE(it == generator.end())
-        << "At element " << i << " when accessing via an iterator "
-        << "created with the assignment operator.\n";
-    EXPECT_TRUE(expected_values[i] == *it)
-        << "where i is " << i << ", expected_values[i] is "
-        << PrintValue(expected_values[i]) << ", *it is " << PrintValue(*it)
-        << ", and 'it' is an iterator created with the copy constructor.\n";
-    ++it;
-  }
-  EXPECT_TRUE(it == generator.end())
-      << "At the presumed end of sequence when accessing via an iterator "
-      << "created with the assignment operator.\n";
-}
-
-template <typename T>
-void VerifyGeneratorIsEmpty(const ParamGenerator<T>& generator) {
-  typename ParamGenerator<T>::iterator it = generator.begin();
-  EXPECT_TRUE(it == generator.end());
-
-  it = generator.begin();
-  EXPECT_TRUE(it == generator.end());
-}
-
-// Generator tests. They test that each of the provided generator functions
-// generates an expected sequence of values. The general test pattern
-// instantiates a generator using one of the generator functions,
-// checks the sequence produced by the generator using its iterator API,
-// and then resets the iterator back to the beginning of the sequence
-// and checks the sequence again.
-
-// Tests that iterators produced by generator functions conform to the
-// ForwardIterator concept.
-TEST(IteratorTest, ParamIteratorConformsToForwardIteratorConcept) {
-  const ParamGenerator<int> gen = Range(0, 10);
-  ParamGenerator<int>::iterator it = gen.begin();
-
-  // Verifies that iterator initialization works as expected.
-  ParamGenerator<int>::iterator it2 = it;
-  EXPECT_TRUE(*it == *it2) << "Initialized iterators must point to the "
-                           << "element same as its source points to";
-
-  // Verifies that iterator assignment works as expected.
-  ++it;
-  EXPECT_FALSE(*it == *it2);
-  it2 = it;
-  EXPECT_TRUE(*it == *it2) << "Assigned iterators must point to the "
-                           << "element same as its source points to";
-
-  // Verifies that prefix operator++() returns *this.
-  EXPECT_EQ(&it, &(++it)) << "Result of the prefix operator++ must be "
-                          << "refer to the original object";
-
-  // Verifies that the result of the postfix operator++ points to the value
-  // pointed to by the original iterator.
-  int original_value = *it;  // Have to compute it outside of macro call to be
-                             // unaffected by the parameter evaluation order.
-  EXPECT_EQ(original_value, *(it++));
-
-  // Verifies that prefix and postfix operator++() advance an iterator
-  // all the same.
-  it2 = it;
-  ++it;
-  ++it2;
-  EXPECT_TRUE(*it == *it2);
-}
-
-// Tests that Range() generates the expected sequence.
-TEST(RangeTest, IntRangeWithDefaultStep) {
-  const ParamGenerator<int> gen = Range(0, 3);
-  const int expected_values[] = {0, 1, 2};
-  VerifyGenerator(gen, expected_values);
-}
-
-// Edge case. Tests that Range() generates the single element sequence
-// as expected when provided with range limits that are equal.
-TEST(RangeTest, IntRangeSingleValue) {
-  const ParamGenerator<int> gen = Range(0, 1);
-  const int expected_values[] = {0};
-  VerifyGenerator(gen, expected_values);
-}
-
-// Edge case. Tests that Range() with generates empty sequence when
-// supplied with an empty range.
-TEST(RangeTest, IntRangeEmpty) {
-  const ParamGenerator<int> gen = Range(0, 0);
-  VerifyGeneratorIsEmpty(gen);
-}
-
-// Tests that Range() with custom step (greater then one) generates
-// the expected sequence.
-TEST(RangeTest, IntRangeWithCustomStep) {
-  const ParamGenerator<int> gen = Range(0, 9, 3);
-  const int expected_values[] = {0, 3, 6};
-  VerifyGenerator(gen, expected_values);
-}
-
-// Tests that Range() with custom step (greater then one) generates
-// the expected sequence when the last element does not fall on the
-// upper range limit. Sequences generated by Range() must not have
-// elements beyond the range limits.
-TEST(RangeTest, IntRangeWithCustomStepOverUpperBound) {
-  const ParamGenerator<int> gen = Range(0, 4, 3);
-  const int expected_values[] = {0, 3};
-  VerifyGenerator(gen, expected_values);
-}
-
-// Verifies that Range works with user-defined types that define
-// copy constructor, operator=(), operator+(), and operator<().
-class DogAdder {
- public:
-  explicit DogAdder(const char* a_value) : value_(a_value) {}
-  DogAdder(const DogAdder& other) : value_(other.value_.c_str()) {}
-
-  DogAdder operator=(const DogAdder& other) {
-    if (this != &other) value_ = other.value_;
-    return *this;
-  }
-  DogAdder operator+(const DogAdder& other) const {
-    Message msg;
-    msg << value_.c_str() << other.value_.c_str();
-    return DogAdder(msg.GetString().c_str());
-  }
-  bool operator<(const DogAdder& other) const { return value_ < other.value_; }
-  const std::string& value() const { return value_; }
-
- private:
-  std::string value_;
-};
-
-TEST(RangeTest, WorksWithACustomType) {
-  const ParamGenerator<DogAdder> gen =
-      Range(DogAdder("cat"), DogAdder("catdogdog"), DogAdder("dog"));
-  ParamGenerator<DogAdder>::iterator it = gen.begin();
-
-  ASSERT_FALSE(it == gen.end());
-  EXPECT_STREQ("cat", it->value().c_str());
-
-  ASSERT_FALSE(++it == gen.end());
-  EXPECT_STREQ("catdog", it->value().c_str());
-
-  EXPECT_TRUE(++it == gen.end());
-}
-
-class IntWrapper {
- public:
-  explicit IntWrapper(int a_value) : value_(a_value) {}
-  IntWrapper(const IntWrapper& other) : value_(other.value_) {}
-
-  IntWrapper operator=(const IntWrapper& other) {
-    value_ = other.value_;
-    return *this;
-  }
-  // operator+() adds a different type.
-  IntWrapper operator+(int other) const { return IntWrapper(value_ + other); }
-  bool operator<(const IntWrapper& other) const {
-    return value_ < other.value_;
-  }
-  int value() const { return value_; }
-
- private:
-  int value_;
-};
-
-TEST(RangeTest, WorksWithACustomTypeWithDifferentIncrementType) {
-  const ParamGenerator<IntWrapper> gen = Range(IntWrapper(0), IntWrapper(2));
-  ParamGenerator<IntWrapper>::iterator it = gen.begin();
-
-  ASSERT_FALSE(it == gen.end());
-  EXPECT_EQ(0, it->value());
-
-  ASSERT_FALSE(++it == gen.end());
-  EXPECT_EQ(1, it->value());
-
-  EXPECT_TRUE(++it == gen.end());
-}
-
-// Tests that ValuesIn() with an array parameter generates
-// the expected sequence.
-TEST(ValuesInTest, ValuesInArray) {
-  int array[] = {3, 5, 8};
-  const ParamGenerator<int> gen = ValuesIn(array);
-  VerifyGenerator(gen, array);
-}
-
-// Tests that ValuesIn() with a const array parameter generates
-// the expected sequence.
-TEST(ValuesInTest, ValuesInConstArray) {
-  const int array[] = {3, 5, 8};
-  const ParamGenerator<int> gen = ValuesIn(array);
-  VerifyGenerator(gen, array);
-}
-
-// Edge case. Tests that ValuesIn() with an array parameter containing a
-// single element generates the single element sequence.
-TEST(ValuesInTest, ValuesInSingleElementArray) {
-  int array[] = {42};
-  const ParamGenerator<int> gen = ValuesIn(array);
-  VerifyGenerator(gen, array);
-}
-
-// Tests that ValuesIn() generates the expected sequence for an STL
-// container (vector).
-TEST(ValuesInTest, ValuesInVector) {
-  typedef ::std::vector<int> ContainerType;
-  ContainerType values;
-  values.push_back(3);
-  values.push_back(5);
-  values.push_back(8);
-  const ParamGenerator<int> gen = ValuesIn(values);
-
-  const int expected_values[] = {3, 5, 8};
-  VerifyGenerator(gen, expected_values);
-}
-
-// Tests that ValuesIn() generates the expected sequence.
-TEST(ValuesInTest, ValuesInIteratorRange) {
-  typedef ::std::vector<int> ContainerType;
-  ContainerType values;
-  values.push_back(3);
-  values.push_back(5);
-  values.push_back(8);
-  const ParamGenerator<int> gen = ValuesIn(values.begin(), values.end());
-
-  const int expected_values[] = {3, 5, 8};
-  VerifyGenerator(gen, expected_values);
-}
-
-// Edge case. Tests that ValuesIn() provided with an iterator range specifying a
-// single value generates a single-element sequence.
-TEST(ValuesInTest, ValuesInSingleElementIteratorRange) {
-  typedef ::std::vector<int> ContainerType;
-  ContainerType values;
-  values.push_back(42);
-  const ParamGenerator<int> gen = ValuesIn(values.begin(), values.end());
-
-  const int expected_values[] = {42};
-  VerifyGenerator(gen, expected_values);
-}
-
-// Edge case. Tests that ValuesIn() provided with an empty iterator range
-// generates an empty sequence.
-TEST(ValuesInTest, ValuesInEmptyIteratorRange) {
-  typedef ::std::vector<int> ContainerType;
-  ContainerType values;
-  const ParamGenerator<int> gen = ValuesIn(values.begin(), values.end());
-
-  VerifyGeneratorIsEmpty(gen);
-}
-
-// Tests that the Values() generates the expected sequence.
-TEST(ValuesTest, ValuesWorks) {
-  const ParamGenerator<int> gen = Values(3, 5, 8);
-
-  const int expected_values[] = {3, 5, 8};
-  VerifyGenerator(gen, expected_values);
-}
-
-// Tests that Values() generates the expected sequences from elements of
-// different types convertible to ParamGenerator's parameter type.
-TEST(ValuesTest, ValuesWorksForValuesOfCompatibleTypes) {
-  const ParamGenerator<double> gen = Values(3, 5.0f, 8.0);
-
-  const double expected_values[] = {3.0, 5.0, 8.0};
-  VerifyGenerator(gen, expected_values);
-}
-
-TEST(ValuesTest, ValuesWorksForMaxLengthList) {
-  const ParamGenerator<int> gen =
-      Values(10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120, 130, 140, 150,
-             160, 170, 180, 190, 200, 210, 220, 230, 240, 250, 260, 270, 280,
-             290, 300, 310, 320, 330, 340, 350, 360, 370, 380, 390, 400, 410,
-             420, 430, 440, 450, 460, 470, 480, 490, 500);
-
-  const int expected_values[] = {
-      10,  20,  30,  40,  50,  60,  70,  80,  90,  100, 110, 120, 130,
-      140, 150, 160, 170, 180, 190, 200, 210, 220, 230, 240, 250, 260,
-      270, 280, 290, 300, 310, 320, 330, 340, 350, 360, 370, 380, 390,
-      400, 410, 420, 430, 440, 450, 460, 470, 480, 490, 500};
-  VerifyGenerator(gen, expected_values);
-}
-
-// Edge case test. Tests that single-parameter Values() generates the sequence
-// with the single value.
-TEST(ValuesTest, ValuesWithSingleParameter) {
-  const ParamGenerator<int> gen = Values(42);
-
-  const int expected_values[] = {42};
-  VerifyGenerator(gen, expected_values);
-}
-
-// Tests that Bool() generates sequence (false, true).
-TEST(BoolTest, BoolWorks) {
-  const ParamGenerator<bool> gen = Bool();
-
-  const bool expected_values[] = {false, true};
-  VerifyGenerator(gen, expected_values);
-}
-
-// Tests that Combine() with two parameters generates the expected sequence.
-TEST(CombineTest, CombineWithTwoParameters) {
-  const char* foo = "foo";
-  const char* bar = "bar";
-  const ParamGenerator<std::tuple<const char*, int>> gen =
-      Combine(Values(foo, bar), Values(3, 4));
-
-  std::tuple<const char*, int> expected_values[] = {
-      std::make_tuple(foo, 3), std::make_tuple(foo, 4), std::make_tuple(bar, 3),
-      std::make_tuple(bar, 4)};
-  VerifyGenerator(gen, expected_values);
-}
-
-// Tests that Combine() with three parameters generates the expected sequence.
-TEST(CombineTest, CombineWithThreeParameters) {
-  const ParamGenerator<std::tuple<int, int, int>> gen =
-      Combine(Values(0, 1), Values(3, 4), Values(5, 6));
-  std::tuple<int, int, int> expected_values[] = {
-      std::make_tuple(0, 3, 5), std::make_tuple(0, 3, 6),
-      std::make_tuple(0, 4, 5), std::make_tuple(0, 4, 6),
-      std::make_tuple(1, 3, 5), std::make_tuple(1, 3, 6),
-      std::make_tuple(1, 4, 5), std::make_tuple(1, 4, 6)};
-  VerifyGenerator(gen, expected_values);
-}
-
-// Tests that the Combine() with the first parameter generating a single value
-// sequence generates a sequence with the number of elements equal to the
-// number of elements in the sequence generated by the second parameter.
-TEST(CombineTest, CombineWithFirstParameterSingleValue) {
-  const ParamGenerator<std::tuple<int, int>> gen =
-      Combine(Values(42), Values(0, 1));
-
-  std::tuple<int, int> expected_values[] = {std::make_tuple(42, 0),
-                                            std::make_tuple(42, 1)};
-  VerifyGenerator(gen, expected_values);
-}
-
-// Tests that the Combine() with the second parameter generating a single value
-// sequence generates a sequence with the number of elements equal to the
-// number of elements in the sequence generated by the first parameter.
-TEST(CombineTest, CombineWithSecondParameterSingleValue) {
-  const ParamGenerator<std::tuple<int, int>> gen =
-      Combine(Values(0, 1), Values(42));
-
-  std::tuple<int, int> expected_values[] = {std::make_tuple(0, 42),
-                                            std::make_tuple(1, 42)};
-  VerifyGenerator(gen, expected_values);
-}
-
-// Tests that when the first parameter produces an empty sequence,
-// Combine() produces an empty sequence, too.
-TEST(CombineTest, CombineWithFirstParameterEmptyRange) {
-  const ParamGenerator<std::tuple<int, int>> gen =
-      Combine(Range(0, 0), Values(0, 1));
-  VerifyGeneratorIsEmpty(gen);
-}
-
-// Tests that when the second parameter produces an empty sequence,
-// Combine() produces an empty sequence, too.
-TEST(CombineTest, CombineWithSecondParameterEmptyRange) {
-  const ParamGenerator<std::tuple<int, int>> gen =
-      Combine(Values(0, 1), Range(1, 1));
-  VerifyGeneratorIsEmpty(gen);
-}
-
-// Edge case. Tests that combine works with the maximum number
-// of parameters supported by Google Test (currently 10).
-TEST(CombineTest, CombineWithMaxNumberOfParameters) {
-  const char* foo = "foo";
-  const char* bar = "bar";
-  const ParamGenerator<
-      std::tuple<const char*, int, int, int, int, int, int, int, int, int>>
-      gen =
-          Combine(Values(foo, bar), Values(1), Values(2), Values(3), Values(4),
-                  Values(5), Values(6), Values(7), Values(8), Values(9));
-
-  std::tuple<const char*, int, int, int, int, int, int, int, int, int>
-      expected_values[] = {std::make_tuple(foo, 1, 2, 3, 4, 5, 6, 7, 8, 9),
-                           std::make_tuple(bar, 1, 2, 3, 4, 5, 6, 7, 8, 9)};
-  VerifyGenerator(gen, expected_values);
-}
-
-class NonDefaultConstructAssignString {
- public:
-  NonDefaultConstructAssignString(const std::string& s) : str_(s) {}
-  NonDefaultConstructAssignString() = delete;
-  NonDefaultConstructAssignString(const NonDefaultConstructAssignString&) =
-      default;
-  NonDefaultConstructAssignString& operator=(
-      const NonDefaultConstructAssignString&) = delete;
-  ~NonDefaultConstructAssignString() = default;
-
-  const std::string& str() const { return str_; }
-
- private:
-  std::string str_;
-};
-
-TEST(CombineTest, NonDefaultConstructAssign) {
-  const ParamGenerator<std::tuple<int, NonDefaultConstructAssignString>> gen =
-      Combine(Values(0, 1), Values(NonDefaultConstructAssignString("A"),
-                                   NonDefaultConstructAssignString("B")));
-
-  ParamGenerator<std::tuple<int, NonDefaultConstructAssignString>>::iterator
-      it = gen.begin();
-
-  EXPECT_EQ(0, std::get<0>(*it));
-  EXPECT_EQ("A", std::get<1>(*it).str());
-  ++it;
-
-  EXPECT_EQ(0, std::get<0>(*it));
-  EXPECT_EQ("B", std::get<1>(*it).str());
-  ++it;
-
-  EXPECT_EQ(1, std::get<0>(*it));
-  EXPECT_EQ("A", std::get<1>(*it).str());
-  ++it;
-
-  EXPECT_EQ(1, std::get<0>(*it));
-  EXPECT_EQ("B", std::get<1>(*it).str());
-  ++it;
-
-  EXPECT_TRUE(it == gen.end());
-}
-
-template <typename T>
-class ConstructFromT {
- public:
-  explicit ConstructFromT(const T& t) : t_(t) {}
-  template <typename... Args,
-            typename std::enable_if<sizeof...(Args) != 1, int>::type = 0>
-  ConstructFromT(Args&&... args) : t_(std::forward<Args>(args)...) {}
-
-  bool operator==(const ConstructFromT& other) const { return other.t_ == t_; }
-
-  const T& get() const { return t_; }
-
- private:
-  T t_;
-};
-
-TEST(ConvertTest, CombineWithTwoParameters) {
-  const char* foo = "foo";
-  const char* bar = "bar";
-  const ParamGenerator<ConstructFromT<std::tuple<const char*, int>>> gen =
-      ConvertGenerator<std::tuple<const char*, int>>(
-          Combine(Values(foo, bar), Values(3, 4)));
-
-  ConstructFromT<std::tuple<const char*, int>> expected_values[] = {
-      {foo, 3}, {foo, 4}, {bar, 3}, {bar, 4}};
-  VerifyGenerator(gen, expected_values);
-}
-
-TEST(ConvertTest, NonDefaultConstructAssign) {
-  const ParamGenerator<
-      ConstructFromT<std::tuple<int, NonDefaultConstructAssignString>>>
-      gen = ConvertGenerator<std::tuple<int, NonDefaultConstructAssignString>>(
-          Combine(Values(0, 1), Values(NonDefaultConstructAssignString("A"),
-                                       NonDefaultConstructAssignString("B"))));
-
-  ParamGenerator<ConstructFromT<
-      std::tuple<int, NonDefaultConstructAssignString>>>::iterator it =
-      gen.begin();
-
-  EXPECT_EQ(0, std::get<0>(it->get()));
-  EXPECT_EQ("A", std::get<1>(it->get()).str());
-  ++it;
-
-  EXPECT_EQ(0, std::get<0>(it->get()));
-  EXPECT_EQ("B", std::get<1>(it->get()).str());
-  ++it;
-
-  EXPECT_EQ(1, std::get<0>(it->get()));
-  EXPECT_EQ("A", std::get<1>(it->get()).str());
-  ++it;
-
-  EXPECT_EQ(1, std::get<0>(it->get()));
-  EXPECT_EQ("B", std::get<1>(it->get()).str());
-  ++it;
-
-  EXPECT_TRUE(it == gen.end());
-}
-
-// Tests that an generator produces correct sequence after being
-// assigned from another generator.
-TEST(ParamGeneratorTest, AssignmentWorks) {
-  ParamGenerator<int> gen = Values(1, 2);
-  const ParamGenerator<int> gen2 = Values(3, 4);
-  gen = gen2;
-
-  const int expected_values[] = {3, 4};
-  VerifyGenerator(gen, expected_values);
-}
-
-// This test verifies that the tests are expanded and run as specified:
-// one test per element from the sequence produced by the generator
-// specified in INSTANTIATE_TEST_SUITE_P. It also verifies that the test's
-// fixture constructor, SetUp(), and TearDown() have run and have been
-// supplied with the correct parameters.
-
-// The use of environment object allows detection of the case where no test
-// case functionality is run at all. In this case TearDownTestSuite will not
-// be able to detect missing tests, naturally.
-template <int kExpectedCalls>
-class TestGenerationEnvironment : public ::testing::Environment {
- public:
-  static TestGenerationEnvironment* Instance() {
-    static TestGenerationEnvironment* instance = new TestGenerationEnvironment;
-    return instance;
-  }
-
-  void FixtureConstructorExecuted() { fixture_constructor_count_++; }
-  void SetUpExecuted() { set_up_count_++; }
-  void TearDownExecuted() { tear_down_count_++; }
-  void TestBodyExecuted() { test_body_count_++; }
-
-  void TearDown() override {
-    // If all MultipleTestGenerationTest tests have been de-selected
-    // by the filter flag, the following checks make no sense.
-    bool perform_check = false;
-
-    for (int i = 0; i < kExpectedCalls; ++i) {
-      Message msg;
-      msg << "TestsExpandedAndRun/" << i;
-      if (UnitTestOptions::FilterMatchesTest(
-              "TestExpansionModule/MultipleTestGenerationTest",
-              msg.GetString().c_str())) {
-        perform_check = true;
-      }
-    }
-    if (perform_check) {
-      EXPECT_EQ(kExpectedCalls, fixture_constructor_count_)
-          << "Fixture constructor of ParamTestGenerationTest test case "
-          << "has not been run as expected.";
-      EXPECT_EQ(kExpectedCalls, set_up_count_)
-          << "Fixture SetUp method of ParamTestGenerationTest test case "
-          << "has not been run as expected.";
-      EXPECT_EQ(kExpectedCalls, tear_down_count_)
-          << "Fixture TearDown method of ParamTestGenerationTest test case "
-          << "has not been run as expected.";
-      EXPECT_EQ(kExpectedCalls, test_body_count_)
-          << "Test in ParamTestGenerationTest test case "
-          << "has not been run as expected.";
-    }
-  }
-
- private:
-  TestGenerationEnvironment()
-      : fixture_constructor_count_(0),
-        set_up_count_(0),
-        tear_down_count_(0),
-        test_body_count_(0) {}
-
-  int fixture_constructor_count_;
-  int set_up_count_;
-  int tear_down_count_;
-  int test_body_count_;
-
-  TestGenerationEnvironment(const TestGenerationEnvironment&) = delete;
-  TestGenerationEnvironment& operator=(const TestGenerationEnvironment&) =
-      delete;
-};
-
-const int test_generation_params[] = {36, 42, 72};
-
-class TestGenerationTest : public TestWithParam<int> {
- public:
-  enum {
-    PARAMETER_COUNT =
-        sizeof(test_generation_params) / sizeof(test_generation_params[0])
-  };
-
-  typedef TestGenerationEnvironment<PARAMETER_COUNT> Environment;
-
-  TestGenerationTest() {
-    Environment::Instance()->FixtureConstructorExecuted();
-    current_parameter_ = GetParam();
-  }
-  void SetUp() override {
-    Environment::Instance()->SetUpExecuted();
-    EXPECT_EQ(current_parameter_, GetParam());
-  }
-  void TearDown() override {
-    Environment::Instance()->TearDownExecuted();
-    EXPECT_EQ(current_parameter_, GetParam());
-  }
-
-  static void SetUpTestSuite() {
-    bool all_tests_in_test_case_selected = true;
-
-    for (int i = 0; i < PARAMETER_COUNT; ++i) {
-      Message test_name;
-      test_name << "TestsExpandedAndRun/" << i;
-      if (!UnitTestOptions::FilterMatchesTest(
-              "TestExpansionModule/MultipleTestGenerationTest",
-              test_name.GetString())) {
-        all_tests_in_test_case_selected = false;
-      }
-    }
-    EXPECT_TRUE(all_tests_in_test_case_selected)
-        << "When running the TestGenerationTest test case all of its tests\n"
-        << "must be selected by the filter flag for the test case to pass.\n"
-        << "If not all of them are enabled, we can't reliably conclude\n"
-        << "that the correct number of tests have been generated.";
-
-    collected_parameters_.clear();
-  }
-
-  static void TearDownTestSuite() {
-    vector<int> expected_values(test_generation_params,
-                                test_generation_params + PARAMETER_COUNT);
-    // Test execution order is not guaranteed by Google Test,
-    // so the order of values in collected_parameters_ can be
-    // different and we have to sort to compare.
-    sort(expected_values.begin(), expected_values.end());
-    sort(collected_parameters_.begin(), collected_parameters_.end());
-
-    EXPECT_TRUE(collected_parameters_ == expected_values);
-  }
-
- protected:
-  int current_parameter_;
-  static vector<int> collected_parameters_;
-
- private:
-  TestGenerationTest(const TestGenerationTest&) = delete;
-  TestGenerationTest& operator=(const TestGenerationTest&) = delete;
-};
-vector<int> TestGenerationTest::collected_parameters_;
-
-TEST_P(TestGenerationTest, TestsExpandedAndRun) {
-  Environment::Instance()->TestBodyExecuted();
-  EXPECT_EQ(current_parameter_, GetParam());
-  collected_parameters_.push_back(GetParam());
-}
-INSTANTIATE_TEST_SUITE_P(TestExpansionModule, TestGenerationTest,
-                         ValuesIn(test_generation_params));
-
-// This test verifies that the element sequence (third parameter of
-// INSTANTIATE_TEST_SUITE_P) is evaluated in InitGoogleTest() and neither at
-// the call site of INSTANTIATE_TEST_SUITE_P nor in RUN_ALL_TESTS().  For
-// that, we declare param_value_ to be a static member of
-// GeneratorEvaluationTest and initialize it to 0.  We set it to 1 in
-// main(), just before invocation of InitGoogleTest().  After calling
-// InitGoogleTest(), we set the value to 2.  If the sequence is evaluated
-// before or after InitGoogleTest, INSTANTIATE_TEST_SUITE_P will create a
-// test with parameter other than 1, and the test body will fail the
-// assertion.
-class GeneratorEvaluationTest : public TestWithParam<int> {
- public:
-  static int param_value() { return param_value_; }
-  static void set_param_value(int param_value) { param_value_ = param_value; }
-
- private:
-  static int param_value_;
-};
-int GeneratorEvaluationTest::param_value_ = 0;
-
-TEST_P(GeneratorEvaluationTest, GeneratorsEvaluatedInMain) {
-  EXPECT_EQ(1, GetParam());
-}
-INSTANTIATE_TEST_SUITE_P(GenEvalModule, GeneratorEvaluationTest,
-                         Values(GeneratorEvaluationTest::param_value()));
-
-// Tests that generators defined in a different translation unit are
-// functional. Generator extern_gen is defined in gtest-param-test_test2.cc.
-extern ParamGenerator<int> extern_gen;
-class ExternalGeneratorTest : public TestWithParam<int> {};
-TEST_P(ExternalGeneratorTest, ExternalGenerator) {
-  // Sequence produced by extern_gen contains only a single value
-  // which we verify here.
-  EXPECT_EQ(GetParam(), 33);
-}
-INSTANTIATE_TEST_SUITE_P(ExternalGeneratorModule, ExternalGeneratorTest,
-                         extern_gen);
-
-// Tests that a parameterized test case can be defined in one translation
-// unit and instantiated in another. This test will be instantiated in
-// gtest-param-test_test2.cc. ExternalInstantiationTest fixture class is
-// defined in gtest-param-test_test.h.
-TEST_P(ExternalInstantiationTest, IsMultipleOf33) {
-  EXPECT_EQ(0, GetParam() % 33);
-}
-
-// Tests that a parameterized test case can be instantiated with multiple
-// generators.
-class MultipleInstantiationTest : public TestWithParam<int> {};
-TEST_P(MultipleInstantiationTest, AllowsMultipleInstances) {}
-INSTANTIATE_TEST_SUITE_P(Sequence1, MultipleInstantiationTest, Values(1, 2));
-INSTANTIATE_TEST_SUITE_P(Sequence2, MultipleInstantiationTest, Range(3, 5));
-
-// Tests that a parameterized test case can be instantiated
-// in multiple translation units. This test will be instantiated
-// here and in gtest-param-test_test2.cc.
-// InstantiationInMultipleTranslationUnitsTest fixture class
-// is defined in gtest-param-test_test.h.
-TEST_P(InstantiationInMultipleTranslationUnitsTest, IsMultipleOf42) {
-  EXPECT_EQ(0, GetParam() % 42);
-}
-INSTANTIATE_TEST_SUITE_P(Sequence1, InstantiationInMultipleTranslationUnitsTest,
-                         Values(42, 42 * 2));
-
-// Tests that each iteration of parameterized test runs in a separate test
-// object.
-class SeparateInstanceTest : public TestWithParam<int> {
- public:
-  SeparateInstanceTest() : count_(0) {}
-
-  static void TearDownTestSuite() {
-    EXPECT_GE(global_count_, 2)
-        << "If some (but not all) SeparateInstanceTest tests have been "
-        << "filtered out this test will fail. Make sure that all "
-        << "GeneratorEvaluationTest are selected or de-selected together "
-        << "by the test filter.";
-  }
-
- protected:
-  int count_;
-  static int global_count_;
-};
-int SeparateInstanceTest::global_count_ = 0;
-
-TEST_P(SeparateInstanceTest, TestsRunInSeparateInstances) {
-  EXPECT_EQ(0, count_++);
-  global_count_++;
-}
-INSTANTIATE_TEST_SUITE_P(FourElemSequence, SeparateInstanceTest, Range(1, 4));
-
-// Tests that all instantiations of a test have named appropriately. Test
-// defined with TEST_P(TestSuiteName, TestName) and instantiated with
-// INSTANTIATE_TEST_SUITE_P(SequenceName, TestSuiteName, generator) must be
-// named SequenceName/TestSuiteName.TestName/i, where i is the 0-based index of
-// the sequence element used to instantiate the test.
-class NamingTest : public TestWithParam<int> {};
-
-TEST_P(NamingTest, TestsReportCorrectNamesAndParameters) {
-  const ::testing::TestInfo* const test_info =
-      ::testing::UnitTest::GetInstance()->current_test_info();
-
-  EXPECT_STREQ("ZeroToFiveSequence/NamingTest", test_info->test_suite_name());
-
-  Message index_stream;
-  index_stream << "TestsReportCorrectNamesAndParameters/" << GetParam();
-  EXPECT_STREQ(index_stream.GetString().c_str(), test_info->name());
-
-  EXPECT_EQ(::testing::PrintToString(GetParam()), test_info->value_param());
-}
-
-INSTANTIATE_TEST_SUITE_P(ZeroToFiveSequence, NamingTest, Range(0, 5));
-
-// Tests that macros in test names are expanded correctly.
-class MacroNamingTest : public TestWithParam<int> {};
-
-#define PREFIX_WITH_FOO(test_name) Foo##test_name
-#define PREFIX_WITH_MACRO(test_name) Macro##test_name
-
-TEST_P(PREFIX_WITH_MACRO(NamingTest), PREFIX_WITH_FOO(SomeTestName)) {
-  const ::testing::TestInfo* const test_info =
-      ::testing::UnitTest::GetInstance()->current_test_info();
-
-  EXPECT_STREQ("FortyTwo/MacroNamingTest", test_info->test_suite_name());
-  EXPECT_STREQ("FooSomeTestName/0", test_info->name());
-}
-
-INSTANTIATE_TEST_SUITE_P(FortyTwo, MacroNamingTest, Values(42));
-
-// Tests the same thing for non-parametrized tests.
-class MacroNamingTestNonParametrized : public ::testing::Test {};
-
-TEST_F(PREFIX_WITH_MACRO(NamingTestNonParametrized),
-       PREFIX_WITH_FOO(SomeTestName)) {
-  const ::testing::TestInfo* const test_info =
-      ::testing::UnitTest::GetInstance()->current_test_info();
-
-  EXPECT_STREQ("MacroNamingTestNonParametrized", test_info->test_suite_name());
-  EXPECT_STREQ("FooSomeTestName", test_info->name());
-}
-
-TEST(MacroNameing, LookupNames) {
-  std::set<std::string> know_suite_names, know_test_names;
-
-  const auto& ins = testing::UnitTest::GetInstance();
-  int ts = 0;
-  while (const testing::TestSuite* suite = ins->GetTestSuite(ts++)) {
-    know_suite_names.insert(suite->name());
-
-    int ti = 0;
-    while (const testing::TestInfo* info = suite->GetTestInfo(ti++)) {
-      know_test_names.insert(std::string(suite->name()) + "." + info->name());
-    }
-  }
-
-  // Check that the expected form of the test suit name actually exists.
-  EXPECT_NE(  //
-      know_suite_names.find("FortyTwo/MacroNamingTest"),
-      know_suite_names.end());
-  EXPECT_NE(know_suite_names.find("MacroNamingTestNonParametrized"),
-            know_suite_names.end());
-  // Check that the expected form of the test name actually exists.
-  EXPECT_NE(  //
-      know_test_names.find("FortyTwo/MacroNamingTest.FooSomeTestName/0"),
-      know_test_names.end());
-  EXPECT_NE(
-      know_test_names.find("MacroNamingTestNonParametrized.FooSomeTestName"),
-      know_test_names.end());
-}
-
-// Tests that user supplied custom parameter names are working correctly.
-// Runs the test with a builtin helper method which uses PrintToString,
-// as well as a custom function and custom functor to ensure all possible
-// uses work correctly.
-class CustomFunctorNamingTest : public TestWithParam<std::string> {};
-TEST_P(CustomFunctorNamingTest, CustomTestNames) {}
-
-struct CustomParamNameFunctor {
-  std::string operator()(const ::testing::TestParamInfo<std::string>& inf) {
-    return inf.param;
-  }
-};
-
-INSTANTIATE_TEST_SUITE_P(CustomParamNameFunctor, CustomFunctorNamingTest,
-                         Values(std::string("FunctorName")),
-                         CustomParamNameFunctor());
-
-INSTANTIATE_TEST_SUITE_P(AllAllowedCharacters, CustomFunctorNamingTest,
-                         Values("abcdefghijklmnopqrstuvwxyz",
-                                "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "01234567890_"),
-                         CustomParamNameFunctor());
-
-inline std::string CustomParamNameFunction(
-    const ::testing::TestParamInfo<std::string>& inf) {
-  return inf.param;
-}
-
-class CustomFunctionNamingTest : public TestWithParam<std::string> {};
-TEST_P(CustomFunctionNamingTest, CustomTestNames) {}
-
-INSTANTIATE_TEST_SUITE_P(CustomParamNameFunction, CustomFunctionNamingTest,
-                         Values(std::string("FunctionName")),
-                         CustomParamNameFunction);
-
-INSTANTIATE_TEST_SUITE_P(CustomParamNameFunctionP, CustomFunctionNamingTest,
-                         Values(std::string("FunctionNameP")),
-                         &CustomParamNameFunction);
-
-// Test custom naming with a lambda
-
-class CustomLambdaNamingTest : public TestWithParam<std::string> {};
-TEST_P(CustomLambdaNamingTest, CustomTestNames) {}
-
-INSTANTIATE_TEST_SUITE_P(CustomParamNameLambda, CustomLambdaNamingTest,
-                         Values(std::string("LambdaName")),
-                         [](const ::testing::TestParamInfo<std::string>& inf) {
-                           return inf.param;
-                         });
-
-TEST(CustomNamingTest, CheckNameRegistry) {
-  const auto& unit_test = ::testing::UnitTest::GetInstance();
-  std::set<std::string> test_names;
-  for (int suite_num = 0; suite_num < unit_test->total_test_suite_count();
-       ++suite_num) {
-    const ::testing::TestSuite* test_suite = unit_test->GetTestSuite(suite_num);
-    for (int test_num = 0; test_num < test_suite->total_test_count();
-         ++test_num) {
-      const ::testing::TestInfo* test_info = test_suite->GetTestInfo(test_num);
-      test_names.insert(std::string(test_info->name()));
-    }
-  }
-  EXPECT_EQ(1u, test_names.count("CustomTestNames/FunctorName"));
-  EXPECT_EQ(1u, test_names.count("CustomTestNames/FunctionName"));
-  EXPECT_EQ(1u, test_names.count("CustomTestNames/FunctionNameP"));
-  EXPECT_EQ(1u, test_names.count("CustomTestNames/LambdaName"));
-}
-
-// Test a numeric name to ensure PrintToStringParamName works correctly.
-
-class CustomIntegerNamingTest : public TestWithParam<int> {};
-
-TEST_P(CustomIntegerNamingTest, TestsReportCorrectNames) {
-  const ::testing::TestInfo* const test_info =
-      ::testing::UnitTest::GetInstance()->current_test_info();
-  Message test_name_stream;
-  test_name_stream << "TestsReportCorrectNames/" << GetParam();
-  EXPECT_STREQ(test_name_stream.GetString().c_str(), test_info->name());
-}
-
-INSTANTIATE_TEST_SUITE_P(PrintToString, CustomIntegerNamingTest, Range(0, 5),
-                         ::testing::PrintToStringParamName());
-
-// Test a custom struct with PrintToString.
-
-struct CustomStruct {
-  explicit CustomStruct(int value) : x(value) {}
-  int x;
-};
-
-std::ostream& operator<<(std::ostream& stream, const CustomStruct& val) {
-  stream << val.x;
-  return stream;
-}
-
-class CustomStructNamingTest : public TestWithParam<CustomStruct> {};
-
-TEST_P(CustomStructNamingTest, TestsReportCorrectNames) {
-  const ::testing::TestInfo* const test_info =
-      ::testing::UnitTest::GetInstance()->current_test_info();
-  Message test_name_stream;
-  test_name_stream << "TestsReportCorrectNames/" << GetParam();
-  EXPECT_STREQ(test_name_stream.GetString().c_str(), test_info->name());
-}
-
-INSTANTIATE_TEST_SUITE_P(PrintToString, CustomStructNamingTest,
-                         Values(CustomStruct(0), CustomStruct(1)),
-                         ::testing::PrintToStringParamName());
-
-// Test that using a stateful parameter naming function works as expected.
-
-struct StatefulNamingFunctor {
-  StatefulNamingFunctor() : sum(0) {}
-  std::string operator()(const ::testing::TestParamInfo<int>& info) {
-    int value = info.param + sum;
-    sum += info.param;
-    return ::testing::PrintToString(value);
-  }
-  int sum;
-};
-
-class StatefulNamingTest : public ::testing::TestWithParam<int> {
- protected:
-  StatefulNamingTest() : sum_(0) {}
-  int sum_;
-};
-
-TEST_P(StatefulNamingTest, TestsReportCorrectNames) {
-  const ::testing::TestInfo* const test_info =
-      ::testing::UnitTest::GetInstance()->current_test_info();
-  sum_ += GetParam();
-  Message test_name_stream;
-  test_name_stream << "TestsReportCorrectNames/" << sum_;
-  EXPECT_STREQ(test_name_stream.GetString().c_str(), test_info->name());
-}
-
-INSTANTIATE_TEST_SUITE_P(StatefulNamingFunctor, StatefulNamingTest, Range(0, 5),
-                         StatefulNamingFunctor());
-
-// Class that cannot be streamed into an ostream.  It needs to be copyable
-// (and, in case of MSVC, also assignable) in order to be a test parameter
-// type.  Its default copy constructor and assignment operator do exactly
-// what we need.
-class Unstreamable {
- public:
-  explicit Unstreamable(int value) : value_(value) {}
-  // -Wunused-private-field: dummy accessor for `value_`.
-  const int& dummy_value() const { return value_; }
-
- private:
-  int value_;
-};
-
-class CommentTest : public TestWithParam<Unstreamable> {};
-
-TEST_P(CommentTest, TestsCorrectlyReportUnstreamableParams) {
-  const ::testing::TestInfo* const test_info =
-      ::testing::UnitTest::GetInstance()->current_test_info();
-
-  EXPECT_EQ(::testing::PrintToString(GetParam()), test_info->value_param());
-}
-
-INSTANTIATE_TEST_SUITE_P(InstantiationWithComments, CommentTest,
-                         Values(Unstreamable(1)));
-
-// Verify that we can create a hierarchy of test fixtures, where the base
-// class fixture is not parameterized and the derived class is. In this case
-// ParameterizedDerivedTest inherits from NonParameterizedBaseTest.  We
-// perform simple tests on both.
-class NonParameterizedBaseTest : public ::testing::Test {
- public:
-  NonParameterizedBaseTest() : n_(17) {}
-
- protected:
-  int n_;
-};
-
-class ParameterizedDerivedTest : public NonParameterizedBaseTest,
-                                 public ::testing::WithParamInterface<int> {
- protected:
-  ParameterizedDerivedTest() : count_(0) {}
-  int count_;
-  static int global_count_;
-};
-
-int ParameterizedDerivedTest::global_count_ = 0;
-
-TEST_F(NonParameterizedBaseTest, FixtureIsInitialized) { EXPECT_EQ(17, n_); }
-
-TEST_P(ParameterizedDerivedTest, SeesSequence) {
-  EXPECT_EQ(17, n_);
-  EXPECT_EQ(0, count_++);
-  EXPECT_EQ(GetParam(), global_count_++);
-}
-
-class ParameterizedDeathTest : public ::testing::TestWithParam<int> {};
-
-TEST_F(ParameterizedDeathTest, GetParamDiesFromTestF) {
-  EXPECT_DEATH_IF_SUPPORTED(GetParam(), ".* value-parameterized test .*");
-}
-
-INSTANTIATE_TEST_SUITE_P(RangeZeroToFive, ParameterizedDerivedTest,
-                         Range(0, 5));
-
-// Tests param generator working with Enums
-enum MyEnums {
-  ENUM1 = 1,
-  ENUM2 = 3,
-  ENUM3 = 8,
-};
-
-class MyEnumTest : public testing::TestWithParam<MyEnums> {};
-
-TEST_P(MyEnumTest, ChecksParamMoreThanZero) { EXPECT_GE(10, GetParam()); }
-INSTANTIATE_TEST_SUITE_P(MyEnumTests, MyEnumTest,
-                         ::testing::Values(ENUM1, ENUM2, 0));
-
-namespace works_here {
-// Never used not instantiated, this should work.
-class NotUsedTest : public testing::TestWithParam<int> {};
-
-///////
-// Never used not instantiated, this should work.
-template <typename T>
-class NotUsedTypeTest : public testing::Test {};
-TYPED_TEST_SUITE_P(NotUsedTypeTest);
-
-// Used but not instantiated, this would fail. but...
-class NotInstantiatedTest : public testing::TestWithParam<int> {};
-// ... we mark is as allowed.
-GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(NotInstantiatedTest);
-
-TEST_P(NotInstantiatedTest, Used) {}
-
-using OtherName = NotInstantiatedTest;
-GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(OtherName);
-TEST_P(OtherName, Used) {}
-
-// Used but not instantiated, this would fail. but...
-template <typename T>
-class NotInstantiatedTypeTest : public testing::Test {};
-TYPED_TEST_SUITE_P(NotInstantiatedTypeTest);
-// ... we mark is as allowed.
-GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(NotInstantiatedTypeTest);
-
-TYPED_TEST_P(NotInstantiatedTypeTest, Used) {}
-REGISTER_TYPED_TEST_SUITE_P(NotInstantiatedTypeTest, Used);
-}  // namespace works_here
-
-int main(int argc, char** argv) {
-  // Used in TestGenerationTest test suite.
-  AddGlobalTestEnvironment(TestGenerationTest::Environment::Instance());
-  // Used in GeneratorEvaluationTest test suite. Tests that the updated value
-  // will be picked up for instantiating tests in GeneratorEvaluationTest.
-  GeneratorEvaluationTest::set_param_value(1);
-
-  ::testing::InitGoogleTest(&argc, argv);
-
-  // Used in GeneratorEvaluationTest test suite. Tests that value updated
-  // here will NOT be used for instantiating tests in
-  // GeneratorEvaluationTest.
-  GeneratorEvaluationTest::set_param_value(2);
-
-  return RUN_ALL_TESTS();
-}
diff --git a/3rdparty/googletest-1.13.0/googletest/test/googletest-param-test-test.h b/3rdparty/googletest-1.13.0/googletest/test/googletest-param-test-test.h
deleted file mode 100644
index 6d77e1049abc83906546e950be3f778474a440d3..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/test/googletest-param-test-test.h
+++ /dev/null
@@ -1,49 +0,0 @@
-// Copyright 2008, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-// The Google C++ Testing and Mocking Framework (Google Test)
-//
-// This header file provides classes and functions used internally
-// for testing Google Test itself.
-
-#ifndef GOOGLETEST_TEST_GOOGLETEST_PARAM_TEST_TEST_H_
-#define GOOGLETEST_TEST_GOOGLETEST_PARAM_TEST_TEST_H_
-
-#include "gtest/gtest.h"
-
-// Test fixture for testing definition and instantiation of a test
-// in separate translation units.
-class ExternalInstantiationTest : public ::testing::TestWithParam<int> {};
-
-// Test fixture for testing instantiation of a test in multiple
-// translation units.
-class InstantiationInMultipleTranslationUnitsTest
-    : public ::testing::TestWithParam<int> {};
-
-#endif  // GOOGLETEST_TEST_GOOGLETEST_PARAM_TEST_TEST_H_
diff --git a/3rdparty/googletest-1.13.0/googletest/test/googletest-param-test2-test.cc b/3rdparty/googletest-1.13.0/googletest/test/googletest-param-test2-test.cc
deleted file mode 100644
index 71727a674f099d11f26a1d00a4af6c6135fb3509..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/test/googletest-param-test2-test.cc
+++ /dev/null
@@ -1,58 +0,0 @@
-// Copyright 2008, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-//
-// Tests for Google Test itself.  This verifies that the basic constructs of
-// Google Test work.
-
-#include "gtest/gtest.h"
-#include "test/googletest-param-test-test.h"
-
-using ::testing::Values;
-using ::testing::internal::ParamGenerator;
-
-// Tests that generators defined in a different translation unit
-// are functional. The test using extern_gen is defined
-// in googletest-param-test-test.cc.
-ParamGenerator<int> extern_gen = Values(33);
-
-// Tests that a parameterized test case can be defined in one translation unit
-// and instantiated in another. The test is defined in
-// googletest-param-test-test.cc and ExternalInstantiationTest fixture class is
-// defined in gtest-param-test_test.h.
-INSTANTIATE_TEST_SUITE_P(MultiplesOf33, ExternalInstantiationTest,
-                         Values(33, 66));
-
-// Tests that a parameterized test case can be instantiated
-// in multiple translation units. Another instantiation is defined
-// in googletest-param-test-test.cc and
-// InstantiationInMultipleTranslationUnitsTest fixture is defined in
-// gtest-param-test_test.h
-INSTANTIATE_TEST_SUITE_P(Sequence2, InstantiationInMultipleTranslationUnitsTest,
-                         Values(42 * 3, 42 * 4, 42 * 5));
diff --git a/3rdparty/googletest-1.13.0/googletest/test/googletest-port-test.cc b/3rdparty/googletest-1.13.0/googletest/test/googletest-port-test.cc
deleted file mode 100644
index bb536ac546652668d64d8326125b65b935989d73..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/test/googletest-port-test.cc
+++ /dev/null
@@ -1,1299 +0,0 @@
-// Copyright 2008, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-// This file tests the internal cross-platform support utilities.
-#include <stdio.h>
-
-#include "gtest/internal/gtest-port.h"
-
-#if GTEST_OS_MAC
-#include <time.h>
-#endif  // GTEST_OS_MAC
-
-#include <chrono>  // NOLINT
-#include <list>
-#include <memory>
-#include <string>
-#include <thread>   // NOLINT
-#include <utility>  // For std::pair and std::make_pair.
-#include <vector>
-
-#include "gtest/gtest-spi.h"
-#include "gtest/gtest.h"
-#include "src/gtest-internal-inl.h"
-
-using std::make_pair;
-using std::pair;
-
-namespace testing {
-namespace internal {
-
-TEST(IsXDigitTest, WorksForNarrowAscii) {
-  EXPECT_TRUE(IsXDigit('0'));
-  EXPECT_TRUE(IsXDigit('9'));
-  EXPECT_TRUE(IsXDigit('A'));
-  EXPECT_TRUE(IsXDigit('F'));
-  EXPECT_TRUE(IsXDigit('a'));
-  EXPECT_TRUE(IsXDigit('f'));
-
-  EXPECT_FALSE(IsXDigit('-'));
-  EXPECT_FALSE(IsXDigit('g'));
-  EXPECT_FALSE(IsXDigit('G'));
-}
-
-TEST(IsXDigitTest, ReturnsFalseForNarrowNonAscii) {
-  EXPECT_FALSE(IsXDigit(static_cast<char>('\x80')));
-  EXPECT_FALSE(IsXDigit(static_cast<char>('0' | '\x80')));
-}
-
-TEST(IsXDigitTest, WorksForWideAscii) {
-  EXPECT_TRUE(IsXDigit(L'0'));
-  EXPECT_TRUE(IsXDigit(L'9'));
-  EXPECT_TRUE(IsXDigit(L'A'));
-  EXPECT_TRUE(IsXDigit(L'F'));
-  EXPECT_TRUE(IsXDigit(L'a'));
-  EXPECT_TRUE(IsXDigit(L'f'));
-
-  EXPECT_FALSE(IsXDigit(L'-'));
-  EXPECT_FALSE(IsXDigit(L'g'));
-  EXPECT_FALSE(IsXDigit(L'G'));
-}
-
-TEST(IsXDigitTest, ReturnsFalseForWideNonAscii) {
-  EXPECT_FALSE(IsXDigit(static_cast<wchar_t>(0x80)));
-  EXPECT_FALSE(IsXDigit(static_cast<wchar_t>(L'0' | 0x80)));
-  EXPECT_FALSE(IsXDigit(static_cast<wchar_t>(L'0' | 0x100)));
-}
-
-class Base {
- public:
-  Base() : member_(0) {}
-  explicit Base(int n) : member_(n) {}
-  Base(const Base&) = default;
-  Base& operator=(const Base&) = default;
-  virtual ~Base() {}
-  int member() { return member_; }
-
- private:
-  int member_;
-};
-
-class Derived : public Base {
- public:
-  explicit Derived(int n) : Base(n) {}
-};
-
-TEST(ImplicitCastTest, ConvertsPointers) {
-  Derived derived(0);
-  EXPECT_TRUE(&derived == ::testing::internal::ImplicitCast_<Base*>(&derived));
-}
-
-TEST(ImplicitCastTest, CanUseInheritance) {
-  Derived derived(1);
-  Base base = ::testing::internal::ImplicitCast_<Base>(derived);
-  EXPECT_EQ(derived.member(), base.member());
-}
-
-class Castable {
- public:
-  explicit Castable(bool* converted) : converted_(converted) {}
-  operator Base() {
-    *converted_ = true;
-    return Base();
-  }
-
- private:
-  bool* converted_;
-};
-
-TEST(ImplicitCastTest, CanUseNonConstCastOperator) {
-  bool converted = false;
-  Castable castable(&converted);
-  Base base = ::testing::internal::ImplicitCast_<Base>(castable);
-  EXPECT_TRUE(converted);
-}
-
-class ConstCastable {
- public:
-  explicit ConstCastable(bool* converted) : converted_(converted) {}
-  operator Base() const {
-    *converted_ = true;
-    return Base();
-  }
-
- private:
-  bool* converted_;
-};
-
-TEST(ImplicitCastTest, CanUseConstCastOperatorOnConstValues) {
-  bool converted = false;
-  const ConstCastable const_castable(&converted);
-  Base base = ::testing::internal::ImplicitCast_<Base>(const_castable);
-  EXPECT_TRUE(converted);
-}
-
-class ConstAndNonConstCastable {
- public:
-  ConstAndNonConstCastable(bool* converted, bool* const_converted)
-      : converted_(converted), const_converted_(const_converted) {}
-  operator Base() {
-    *converted_ = true;
-    return Base();
-  }
-  operator Base() const {
-    *const_converted_ = true;
-    return Base();
-  }
-
- private:
-  bool* converted_;
-  bool* const_converted_;
-};
-
-TEST(ImplicitCastTest, CanSelectBetweenConstAndNonConstCasrAppropriately) {
-  bool converted = false;
-  bool const_converted = false;
-  ConstAndNonConstCastable castable(&converted, &const_converted);
-  Base base = ::testing::internal::ImplicitCast_<Base>(castable);
-  EXPECT_TRUE(converted);
-  EXPECT_FALSE(const_converted);
-
-  converted = false;
-  const_converted = false;
-  const ConstAndNonConstCastable const_castable(&converted, &const_converted);
-  base = ::testing::internal::ImplicitCast_<Base>(const_castable);
-  EXPECT_FALSE(converted);
-  EXPECT_TRUE(const_converted);
-}
-
-class To {
- public:
-  To(bool* converted) { *converted = true; }  // NOLINT
-};
-
-TEST(ImplicitCastTest, CanUseImplicitConstructor) {
-  bool converted = false;
-  To to = ::testing::internal::ImplicitCast_<To>(&converted);
-  (void)to;
-  EXPECT_TRUE(converted);
-}
-
-// The following code intentionally tests a suboptimal syntax.
-#ifdef __GNUC__
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wdangling-else"
-#pragma GCC diagnostic ignored "-Wempty-body"
-#pragma GCC diagnostic ignored "-Wpragmas"
-#endif
-TEST(GtestCheckSyntaxTest, BehavesLikeASingleStatement) {
-  if (AlwaysFalse())
-    GTEST_CHECK_(false) << "This should never be executed; "
-                           "It's a compilation test only.";
-
-  if (AlwaysTrue())
-    GTEST_CHECK_(true);
-  else
-    ;  // NOLINT
-
-  if (AlwaysFalse())
-    ;  // NOLINT
-  else
-    GTEST_CHECK_(true) << "";
-}
-#ifdef __GNUC__
-#pragma GCC diagnostic pop
-#endif
-
-TEST(GtestCheckSyntaxTest, WorksWithSwitch) {
-  switch (0) {
-    case 1:
-      break;
-    default:
-      GTEST_CHECK_(true);
-  }
-
-  switch (0)
-  case 0:
-    GTEST_CHECK_(true) << "Check failed in switch case";
-}
-
-// Verifies behavior of FormatFileLocation.
-TEST(FormatFileLocationTest, FormatsFileLocation) {
-  EXPECT_PRED_FORMAT2(IsSubstring, "foo.cc", FormatFileLocation("foo.cc", 42));
-  EXPECT_PRED_FORMAT2(IsSubstring, "42", FormatFileLocation("foo.cc", 42));
-}
-
-TEST(FormatFileLocationTest, FormatsUnknownFile) {
-  EXPECT_PRED_FORMAT2(IsSubstring, "unknown file",
-                      FormatFileLocation(nullptr, 42));
-  EXPECT_PRED_FORMAT2(IsSubstring, "42", FormatFileLocation(nullptr, 42));
-}
-
-TEST(FormatFileLocationTest, FormatsUknownLine) {
-  EXPECT_EQ("foo.cc:", FormatFileLocation("foo.cc", -1));
-}
-
-TEST(FormatFileLocationTest, FormatsUknownFileAndLine) {
-  EXPECT_EQ("unknown file:", FormatFileLocation(nullptr, -1));
-}
-
-// Verifies behavior of FormatCompilerIndependentFileLocation.
-TEST(FormatCompilerIndependentFileLocationTest, FormatsFileLocation) {
-  EXPECT_EQ("foo.cc:42", FormatCompilerIndependentFileLocation("foo.cc", 42));
-}
-
-TEST(FormatCompilerIndependentFileLocationTest, FormatsUknownFile) {
-  EXPECT_EQ("unknown file:42",
-            FormatCompilerIndependentFileLocation(nullptr, 42));
-}
-
-TEST(FormatCompilerIndependentFileLocationTest, FormatsUknownLine) {
-  EXPECT_EQ("foo.cc", FormatCompilerIndependentFileLocation("foo.cc", -1));
-}
-
-TEST(FormatCompilerIndependentFileLocationTest, FormatsUknownFileAndLine) {
-  EXPECT_EQ("unknown file", FormatCompilerIndependentFileLocation(nullptr, -1));
-}
-
-#if GTEST_OS_LINUX || GTEST_OS_MAC || GTEST_OS_QNX || GTEST_OS_FUCHSIA || \
-    GTEST_OS_DRAGONFLY || GTEST_OS_FREEBSD || GTEST_OS_GNU_KFREEBSD ||    \
-    GTEST_OS_NETBSD || GTEST_OS_OPENBSD || GTEST_OS_GNU_HURD
-void* ThreadFunc(void* data) {
-  internal::Mutex* mutex = static_cast<internal::Mutex*>(data);
-  mutex->Lock();
-  mutex->Unlock();
-  return nullptr;
-}
-
-TEST(GetThreadCountTest, ReturnsCorrectValue) {
-  size_t starting_count;
-  size_t thread_count_after_create;
-  size_t thread_count_after_join;
-
-  // We can't guarantee that no other thread was created or destroyed between
-  // any two calls to GetThreadCount(). We make multiple attempts, hoping that
-  // background noise is not constant and we would see the "right" values at
-  // some point.
-  for (int attempt = 0; attempt < 20; ++attempt) {
-    starting_count = GetThreadCount();
-    pthread_t thread_id;
-
-    internal::Mutex mutex;
-    {
-      internal::MutexLock lock(&mutex);
-      pthread_attr_t attr;
-      ASSERT_EQ(0, pthread_attr_init(&attr));
-      ASSERT_EQ(0, pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE));
-
-      const int status = pthread_create(&thread_id, &attr, &ThreadFunc, &mutex);
-      ASSERT_EQ(0, pthread_attr_destroy(&attr));
-      ASSERT_EQ(0, status);
-    }
-
-    thread_count_after_create = GetThreadCount();
-
-    void* dummy;
-    ASSERT_EQ(0, pthread_join(thread_id, &dummy));
-
-    // Join before we decide whether we need to retry the test. Retry if an
-    // arbitrary other thread was created or destroyed in the meantime.
-    if (thread_count_after_create != starting_count + 1) continue;
-
-    // The OS may not immediately report the updated thread count after
-    // joining a thread, causing flakiness in this test. To counter that, we
-    // wait for up to .5 seconds for the OS to report the correct value.
-    bool thread_count_matches = false;
-    for (int i = 0; i < 5; ++i) {
-      thread_count_after_join = GetThreadCount();
-      if (thread_count_after_join == starting_count) {
-        thread_count_matches = true;
-        break;
-      }
-
-      std::this_thread::sleep_for(std::chrono::milliseconds(100));
-    }
-
-    // Retry if an arbitrary other thread was created or destroyed.
-    if (!thread_count_matches) continue;
-
-    break;
-  }
-
-  EXPECT_EQ(thread_count_after_create, starting_count + 1);
-  EXPECT_EQ(thread_count_after_join, starting_count);
-}
-#else
-TEST(GetThreadCountTest, ReturnsZeroWhenUnableToCountThreads) {
-  EXPECT_EQ(0U, GetThreadCount());
-}
-#endif  // GTEST_OS_LINUX || GTEST_OS_MAC || GTEST_OS_QNX || GTEST_OS_FUCHSIA
-
-TEST(GtestCheckDeathTest, DiesWithCorrectOutputOnFailure) {
-  const bool a_false_condition = false;
-  const char regex[] =
-#ifdef _MSC_VER
-      "googletest-port-test\\.cc\\(\\d+\\):"
-#elif GTEST_USES_POSIX_RE
-      "googletest-port-test\\.cc:[0-9]+"
-#else
-      "googletest-port-test\\.cc:\\d+"
-#endif  // _MSC_VER
-      ".*a_false_condition.*Extra info.*";
-
-  EXPECT_DEATH_IF_SUPPORTED(GTEST_CHECK_(a_false_condition) << "Extra info",
-                            regex);
-}
-
-#if GTEST_HAS_DEATH_TEST
-
-TEST(GtestCheckDeathTest, LivesSilentlyOnSuccess) {
-  EXPECT_EXIT(
-      {
-        GTEST_CHECK_(true) << "Extra info";
-        ::std::cerr << "Success\n";
-        exit(0);
-      },
-      ::testing::ExitedWithCode(0), "Success");
-}
-
-#endif  // GTEST_HAS_DEATH_TEST
-
-// Verifies that Google Test choose regular expression engine appropriate to
-// the platform. The test will produce compiler errors in case of failure.
-// For simplicity, we only cover the most important platforms here.
-TEST(RegexEngineSelectionTest, SelectsCorrectRegexEngine) {
-#if GTEST_HAS_ABSL
-  EXPECT_TRUE(GTEST_USES_RE2);
-#elif GTEST_HAS_POSIX_RE
-  EXPECT_TRUE(GTEST_USES_POSIX_RE);
-#else
-  EXPECT_TRUE(GTEST_USES_SIMPLE_RE);
-#endif
-}
-
-#if GTEST_USES_POSIX_RE
-
-template <typename Str>
-class RETest : public ::testing::Test {};
-
-// Defines StringTypes as the list of all string types that class RE
-// supports.
-typedef testing::Types< ::std::string, const char*> StringTypes;
-
-TYPED_TEST_SUITE(RETest, StringTypes);
-
-// Tests RE's implicit constructors.
-TYPED_TEST(RETest, ImplicitConstructorWorks) {
-  const RE empty(TypeParam(""));
-  EXPECT_STREQ("", empty.pattern());
-
-  const RE simple(TypeParam("hello"));
-  EXPECT_STREQ("hello", simple.pattern());
-
-  const RE normal(TypeParam(".*(\\w+)"));
-  EXPECT_STREQ(".*(\\w+)", normal.pattern());
-}
-
-// Tests that RE's constructors reject invalid regular expressions.
-TYPED_TEST(RETest, RejectsInvalidRegex) {
-  EXPECT_NONFATAL_FAILURE(
-      { const RE invalid(TypeParam("?")); },
-      "\"?\" is not a valid POSIX Extended regular expression.");
-}
-
-// Tests RE::FullMatch().
-TYPED_TEST(RETest, FullMatchWorks) {
-  const RE empty(TypeParam(""));
-  EXPECT_TRUE(RE::FullMatch(TypeParam(""), empty));
-  EXPECT_FALSE(RE::FullMatch(TypeParam("a"), empty));
-
-  const RE re(TypeParam("a.*z"));
-  EXPECT_TRUE(RE::FullMatch(TypeParam("az"), re));
-  EXPECT_TRUE(RE::FullMatch(TypeParam("axyz"), re));
-  EXPECT_FALSE(RE::FullMatch(TypeParam("baz"), re));
-  EXPECT_FALSE(RE::FullMatch(TypeParam("azy"), re));
-}
-
-// Tests RE::PartialMatch().
-TYPED_TEST(RETest, PartialMatchWorks) {
-  const RE empty(TypeParam(""));
-  EXPECT_TRUE(RE::PartialMatch(TypeParam(""), empty));
-  EXPECT_TRUE(RE::PartialMatch(TypeParam("a"), empty));
-
-  const RE re(TypeParam("a.*z"));
-  EXPECT_TRUE(RE::PartialMatch(TypeParam("az"), re));
-  EXPECT_TRUE(RE::PartialMatch(TypeParam("axyz"), re));
-  EXPECT_TRUE(RE::PartialMatch(TypeParam("baz"), re));
-  EXPECT_TRUE(RE::PartialMatch(TypeParam("azy"), re));
-  EXPECT_FALSE(RE::PartialMatch(TypeParam("zza"), re));
-}
-
-#elif GTEST_USES_SIMPLE_RE
-
-TEST(IsInSetTest, NulCharIsNotInAnySet) {
-  EXPECT_FALSE(IsInSet('\0', ""));
-  EXPECT_FALSE(IsInSet('\0', "\0"));
-  EXPECT_FALSE(IsInSet('\0', "a"));
-}
-
-TEST(IsInSetTest, WorksForNonNulChars) {
-  EXPECT_FALSE(IsInSet('a', "Ab"));
-  EXPECT_FALSE(IsInSet('c', ""));
-
-  EXPECT_TRUE(IsInSet('b', "bcd"));
-  EXPECT_TRUE(IsInSet('b', "ab"));
-}
-
-TEST(IsAsciiDigitTest, IsFalseForNonDigit) {
-  EXPECT_FALSE(IsAsciiDigit('\0'));
-  EXPECT_FALSE(IsAsciiDigit(' '));
-  EXPECT_FALSE(IsAsciiDigit('+'));
-  EXPECT_FALSE(IsAsciiDigit('-'));
-  EXPECT_FALSE(IsAsciiDigit('.'));
-  EXPECT_FALSE(IsAsciiDigit('a'));
-}
-
-TEST(IsAsciiDigitTest, IsTrueForDigit) {
-  EXPECT_TRUE(IsAsciiDigit('0'));
-  EXPECT_TRUE(IsAsciiDigit('1'));
-  EXPECT_TRUE(IsAsciiDigit('5'));
-  EXPECT_TRUE(IsAsciiDigit('9'));
-}
-
-TEST(IsAsciiPunctTest, IsFalseForNonPunct) {
-  EXPECT_FALSE(IsAsciiPunct('\0'));
-  EXPECT_FALSE(IsAsciiPunct(' '));
-  EXPECT_FALSE(IsAsciiPunct('\n'));
-  EXPECT_FALSE(IsAsciiPunct('a'));
-  EXPECT_FALSE(IsAsciiPunct('0'));
-}
-
-TEST(IsAsciiPunctTest, IsTrueForPunct) {
-  for (const char* p = "^-!\"#$%&'()*+,./:;<=>?@[\\]_`{|}~"; *p; p++) {
-    EXPECT_PRED1(IsAsciiPunct, *p);
-  }
-}
-
-TEST(IsRepeatTest, IsFalseForNonRepeatChar) {
-  EXPECT_FALSE(IsRepeat('\0'));
-  EXPECT_FALSE(IsRepeat(' '));
-  EXPECT_FALSE(IsRepeat('a'));
-  EXPECT_FALSE(IsRepeat('1'));
-  EXPECT_FALSE(IsRepeat('-'));
-}
-
-TEST(IsRepeatTest, IsTrueForRepeatChar) {
-  EXPECT_TRUE(IsRepeat('?'));
-  EXPECT_TRUE(IsRepeat('*'));
-  EXPECT_TRUE(IsRepeat('+'));
-}
-
-TEST(IsAsciiWhiteSpaceTest, IsFalseForNonWhiteSpace) {
-  EXPECT_FALSE(IsAsciiWhiteSpace('\0'));
-  EXPECT_FALSE(IsAsciiWhiteSpace('a'));
-  EXPECT_FALSE(IsAsciiWhiteSpace('1'));
-  EXPECT_FALSE(IsAsciiWhiteSpace('+'));
-  EXPECT_FALSE(IsAsciiWhiteSpace('_'));
-}
-
-TEST(IsAsciiWhiteSpaceTest, IsTrueForWhiteSpace) {
-  EXPECT_TRUE(IsAsciiWhiteSpace(' '));
-  EXPECT_TRUE(IsAsciiWhiteSpace('\n'));
-  EXPECT_TRUE(IsAsciiWhiteSpace('\r'));
-  EXPECT_TRUE(IsAsciiWhiteSpace('\t'));
-  EXPECT_TRUE(IsAsciiWhiteSpace('\v'));
-  EXPECT_TRUE(IsAsciiWhiteSpace('\f'));
-}
-
-TEST(IsAsciiWordCharTest, IsFalseForNonWordChar) {
-  EXPECT_FALSE(IsAsciiWordChar('\0'));
-  EXPECT_FALSE(IsAsciiWordChar('+'));
-  EXPECT_FALSE(IsAsciiWordChar('.'));
-  EXPECT_FALSE(IsAsciiWordChar(' '));
-  EXPECT_FALSE(IsAsciiWordChar('\n'));
-}
-
-TEST(IsAsciiWordCharTest, IsTrueForLetter) {
-  EXPECT_TRUE(IsAsciiWordChar('a'));
-  EXPECT_TRUE(IsAsciiWordChar('b'));
-  EXPECT_TRUE(IsAsciiWordChar('A'));
-  EXPECT_TRUE(IsAsciiWordChar('Z'));
-}
-
-TEST(IsAsciiWordCharTest, IsTrueForDigit) {
-  EXPECT_TRUE(IsAsciiWordChar('0'));
-  EXPECT_TRUE(IsAsciiWordChar('1'));
-  EXPECT_TRUE(IsAsciiWordChar('7'));
-  EXPECT_TRUE(IsAsciiWordChar('9'));
-}
-
-TEST(IsAsciiWordCharTest, IsTrueForUnderscore) {
-  EXPECT_TRUE(IsAsciiWordChar('_'));
-}
-
-TEST(IsValidEscapeTest, IsFalseForNonPrintable) {
-  EXPECT_FALSE(IsValidEscape('\0'));
-  EXPECT_FALSE(IsValidEscape('\007'));
-}
-
-TEST(IsValidEscapeTest, IsFalseForDigit) {
-  EXPECT_FALSE(IsValidEscape('0'));
-  EXPECT_FALSE(IsValidEscape('9'));
-}
-
-TEST(IsValidEscapeTest, IsFalseForWhiteSpace) {
-  EXPECT_FALSE(IsValidEscape(' '));
-  EXPECT_FALSE(IsValidEscape('\n'));
-}
-
-TEST(IsValidEscapeTest, IsFalseForSomeLetter) {
-  EXPECT_FALSE(IsValidEscape('a'));
-  EXPECT_FALSE(IsValidEscape('Z'));
-}
-
-TEST(IsValidEscapeTest, IsTrueForPunct) {
-  EXPECT_TRUE(IsValidEscape('.'));
-  EXPECT_TRUE(IsValidEscape('-'));
-  EXPECT_TRUE(IsValidEscape('^'));
-  EXPECT_TRUE(IsValidEscape('$'));
-  EXPECT_TRUE(IsValidEscape('('));
-  EXPECT_TRUE(IsValidEscape(']'));
-  EXPECT_TRUE(IsValidEscape('{'));
-  EXPECT_TRUE(IsValidEscape('|'));
-}
-
-TEST(IsValidEscapeTest, IsTrueForSomeLetter) {
-  EXPECT_TRUE(IsValidEscape('d'));
-  EXPECT_TRUE(IsValidEscape('D'));
-  EXPECT_TRUE(IsValidEscape('s'));
-  EXPECT_TRUE(IsValidEscape('S'));
-  EXPECT_TRUE(IsValidEscape('w'));
-  EXPECT_TRUE(IsValidEscape('W'));
-}
-
-TEST(AtomMatchesCharTest, EscapedPunct) {
-  EXPECT_FALSE(AtomMatchesChar(true, '\\', '\0'));
-  EXPECT_FALSE(AtomMatchesChar(true, '\\', ' '));
-  EXPECT_FALSE(AtomMatchesChar(true, '_', '.'));
-  EXPECT_FALSE(AtomMatchesChar(true, '.', 'a'));
-
-  EXPECT_TRUE(AtomMatchesChar(true, '\\', '\\'));
-  EXPECT_TRUE(AtomMatchesChar(true, '_', '_'));
-  EXPECT_TRUE(AtomMatchesChar(true, '+', '+'));
-  EXPECT_TRUE(AtomMatchesChar(true, '.', '.'));
-}
-
-TEST(AtomMatchesCharTest, Escaped_d) {
-  EXPECT_FALSE(AtomMatchesChar(true, 'd', '\0'));
-  EXPECT_FALSE(AtomMatchesChar(true, 'd', 'a'));
-  EXPECT_FALSE(AtomMatchesChar(true, 'd', '.'));
-
-  EXPECT_TRUE(AtomMatchesChar(true, 'd', '0'));
-  EXPECT_TRUE(AtomMatchesChar(true, 'd', '9'));
-}
-
-TEST(AtomMatchesCharTest, Escaped_D) {
-  EXPECT_FALSE(AtomMatchesChar(true, 'D', '0'));
-  EXPECT_FALSE(AtomMatchesChar(true, 'D', '9'));
-
-  EXPECT_TRUE(AtomMatchesChar(true, 'D', '\0'));
-  EXPECT_TRUE(AtomMatchesChar(true, 'D', 'a'));
-  EXPECT_TRUE(AtomMatchesChar(true, 'D', '-'));
-}
-
-TEST(AtomMatchesCharTest, Escaped_s) {
-  EXPECT_FALSE(AtomMatchesChar(true, 's', '\0'));
-  EXPECT_FALSE(AtomMatchesChar(true, 's', 'a'));
-  EXPECT_FALSE(AtomMatchesChar(true, 's', '.'));
-  EXPECT_FALSE(AtomMatchesChar(true, 's', '9'));
-
-  EXPECT_TRUE(AtomMatchesChar(true, 's', ' '));
-  EXPECT_TRUE(AtomMatchesChar(true, 's', '\n'));
-  EXPECT_TRUE(AtomMatchesChar(true, 's', '\t'));
-}
-
-TEST(AtomMatchesCharTest, Escaped_S) {
-  EXPECT_FALSE(AtomMatchesChar(true, 'S', ' '));
-  EXPECT_FALSE(AtomMatchesChar(true, 'S', '\r'));
-
-  EXPECT_TRUE(AtomMatchesChar(true, 'S', '\0'));
-  EXPECT_TRUE(AtomMatchesChar(true, 'S', 'a'));
-  EXPECT_TRUE(AtomMatchesChar(true, 'S', '9'));
-}
-
-TEST(AtomMatchesCharTest, Escaped_w) {
-  EXPECT_FALSE(AtomMatchesChar(true, 'w', '\0'));
-  EXPECT_FALSE(AtomMatchesChar(true, 'w', '+'));
-  EXPECT_FALSE(AtomMatchesChar(true, 'w', ' '));
-  EXPECT_FALSE(AtomMatchesChar(true, 'w', '\n'));
-
-  EXPECT_TRUE(AtomMatchesChar(true, 'w', '0'));
-  EXPECT_TRUE(AtomMatchesChar(true, 'w', 'b'));
-  EXPECT_TRUE(AtomMatchesChar(true, 'w', 'C'));
-  EXPECT_TRUE(AtomMatchesChar(true, 'w', '_'));
-}
-
-TEST(AtomMatchesCharTest, Escaped_W) {
-  EXPECT_FALSE(AtomMatchesChar(true, 'W', 'A'));
-  EXPECT_FALSE(AtomMatchesChar(true, 'W', 'b'));
-  EXPECT_FALSE(AtomMatchesChar(true, 'W', '9'));
-  EXPECT_FALSE(AtomMatchesChar(true, 'W', '_'));
-
-  EXPECT_TRUE(AtomMatchesChar(true, 'W', '\0'));
-  EXPECT_TRUE(AtomMatchesChar(true, 'W', '*'));
-  EXPECT_TRUE(AtomMatchesChar(true, 'W', '\n'));
-}
-
-TEST(AtomMatchesCharTest, EscapedWhiteSpace) {
-  EXPECT_FALSE(AtomMatchesChar(true, 'f', '\0'));
-  EXPECT_FALSE(AtomMatchesChar(true, 'f', '\n'));
-  EXPECT_FALSE(AtomMatchesChar(true, 'n', '\0'));
-  EXPECT_FALSE(AtomMatchesChar(true, 'n', '\r'));
-  EXPECT_FALSE(AtomMatchesChar(true, 'r', '\0'));
-  EXPECT_FALSE(AtomMatchesChar(true, 'r', 'a'));
-  EXPECT_FALSE(AtomMatchesChar(true, 't', '\0'));
-  EXPECT_FALSE(AtomMatchesChar(true, 't', 't'));
-  EXPECT_FALSE(AtomMatchesChar(true, 'v', '\0'));
-  EXPECT_FALSE(AtomMatchesChar(true, 'v', '\f'));
-
-  EXPECT_TRUE(AtomMatchesChar(true, 'f', '\f'));
-  EXPECT_TRUE(AtomMatchesChar(true, 'n', '\n'));
-  EXPECT_TRUE(AtomMatchesChar(true, 'r', '\r'));
-  EXPECT_TRUE(AtomMatchesChar(true, 't', '\t'));
-  EXPECT_TRUE(AtomMatchesChar(true, 'v', '\v'));
-}
-
-TEST(AtomMatchesCharTest, UnescapedDot) {
-  EXPECT_FALSE(AtomMatchesChar(false, '.', '\n'));
-
-  EXPECT_TRUE(AtomMatchesChar(false, '.', '\0'));
-  EXPECT_TRUE(AtomMatchesChar(false, '.', '.'));
-  EXPECT_TRUE(AtomMatchesChar(false, '.', 'a'));
-  EXPECT_TRUE(AtomMatchesChar(false, '.', ' '));
-}
-
-TEST(AtomMatchesCharTest, UnescapedChar) {
-  EXPECT_FALSE(AtomMatchesChar(false, 'a', '\0'));
-  EXPECT_FALSE(AtomMatchesChar(false, 'a', 'b'));
-  EXPECT_FALSE(AtomMatchesChar(false, '$', 'a'));
-
-  EXPECT_TRUE(AtomMatchesChar(false, '$', '$'));
-  EXPECT_TRUE(AtomMatchesChar(false, '5', '5'));
-  EXPECT_TRUE(AtomMatchesChar(false, 'Z', 'Z'));
-}
-
-TEST(ValidateRegexTest, GeneratesFailureAndReturnsFalseForInvalid) {
-  EXPECT_NONFATAL_FAILURE(ASSERT_FALSE(ValidateRegex(NULL)),
-                          "NULL is not a valid simple regular expression");
-  EXPECT_NONFATAL_FAILURE(
-      ASSERT_FALSE(ValidateRegex("a\\")),
-      "Syntax error at index 1 in simple regular expression \"a\\\": ");
-  EXPECT_NONFATAL_FAILURE(ASSERT_FALSE(ValidateRegex("a\\")),
-                          "'\\' cannot appear at the end");
-  EXPECT_NONFATAL_FAILURE(ASSERT_FALSE(ValidateRegex("\\n\\")),
-                          "'\\' cannot appear at the end");
-  EXPECT_NONFATAL_FAILURE(ASSERT_FALSE(ValidateRegex("\\s\\hb")),
-                          "invalid escape sequence \"\\h\"");
-  EXPECT_NONFATAL_FAILURE(ASSERT_FALSE(ValidateRegex("^^")),
-                          "'^' can only appear at the beginning");
-  EXPECT_NONFATAL_FAILURE(ASSERT_FALSE(ValidateRegex(".*^b")),
-                          "'^' can only appear at the beginning");
-  EXPECT_NONFATAL_FAILURE(ASSERT_FALSE(ValidateRegex("$$")),
-                          "'$' can only appear at the end");
-  EXPECT_NONFATAL_FAILURE(ASSERT_FALSE(ValidateRegex("^$a")),
-                          "'$' can only appear at the end");
-  EXPECT_NONFATAL_FAILURE(ASSERT_FALSE(ValidateRegex("a(b")),
-                          "'(' is unsupported");
-  EXPECT_NONFATAL_FAILURE(ASSERT_FALSE(ValidateRegex("ab)")),
-                          "')' is unsupported");
-  EXPECT_NONFATAL_FAILURE(ASSERT_FALSE(ValidateRegex("[ab")),
-                          "'[' is unsupported");
-  EXPECT_NONFATAL_FAILURE(ASSERT_FALSE(ValidateRegex("a{2")),
-                          "'{' is unsupported");
-  EXPECT_NONFATAL_FAILURE(ASSERT_FALSE(ValidateRegex("?")),
-                          "'?' can only follow a repeatable token");
-  EXPECT_NONFATAL_FAILURE(ASSERT_FALSE(ValidateRegex("^*")),
-                          "'*' can only follow a repeatable token");
-  EXPECT_NONFATAL_FAILURE(ASSERT_FALSE(ValidateRegex("5*+")),
-                          "'+' can only follow a repeatable token");
-}
-
-TEST(ValidateRegexTest, ReturnsTrueForValid) {
-  EXPECT_TRUE(ValidateRegex(""));
-  EXPECT_TRUE(ValidateRegex("a"));
-  EXPECT_TRUE(ValidateRegex(".*"));
-  EXPECT_TRUE(ValidateRegex("^a_+"));
-  EXPECT_TRUE(ValidateRegex("^a\\t\\&?"));
-  EXPECT_TRUE(ValidateRegex("09*$"));
-  EXPECT_TRUE(ValidateRegex("^Z$"));
-  EXPECT_TRUE(ValidateRegex("a\\^Z\\$\\(\\)\\|\\[\\]\\{\\}"));
-}
-
-TEST(MatchRepetitionAndRegexAtHeadTest, WorksForZeroOrOne) {
-  EXPECT_FALSE(MatchRepetitionAndRegexAtHead(false, 'a', '?', "a", "ba"));
-  // Repeating more than once.
-  EXPECT_FALSE(MatchRepetitionAndRegexAtHead(false, 'a', '?', "b", "aab"));
-
-  // Repeating zero times.
-  EXPECT_TRUE(MatchRepetitionAndRegexAtHead(false, 'a', '?', "b", "ba"));
-  // Repeating once.
-  EXPECT_TRUE(MatchRepetitionAndRegexAtHead(false, 'a', '?', "b", "ab"));
-  EXPECT_TRUE(MatchRepetitionAndRegexAtHead(false, '#', '?', ".", "##"));
-}
-
-TEST(MatchRepetitionAndRegexAtHeadTest, WorksForZeroOrMany) {
-  EXPECT_FALSE(MatchRepetitionAndRegexAtHead(false, '.', '*', "a$", "baab"));
-
-  // Repeating zero times.
-  EXPECT_TRUE(MatchRepetitionAndRegexAtHead(false, '.', '*', "b", "bc"));
-  // Repeating once.
-  EXPECT_TRUE(MatchRepetitionAndRegexAtHead(false, '.', '*', "b", "abc"));
-  // Repeating more than once.
-  EXPECT_TRUE(MatchRepetitionAndRegexAtHead(true, 'w', '*', "-", "ab_1-g"));
-}
-
-TEST(MatchRepetitionAndRegexAtHeadTest, WorksForOneOrMany) {
-  EXPECT_FALSE(MatchRepetitionAndRegexAtHead(false, '.', '+', "a$", "baab"));
-  // Repeating zero times.
-  EXPECT_FALSE(MatchRepetitionAndRegexAtHead(false, '.', '+', "b", "bc"));
-
-  // Repeating once.
-  EXPECT_TRUE(MatchRepetitionAndRegexAtHead(false, '.', '+', "b", "abc"));
-  // Repeating more than once.
-  EXPECT_TRUE(MatchRepetitionAndRegexAtHead(true, 'w', '+', "-", "ab_1-g"));
-}
-
-TEST(MatchRegexAtHeadTest, ReturnsTrueForEmptyRegex) {
-  EXPECT_TRUE(MatchRegexAtHead("", ""));
-  EXPECT_TRUE(MatchRegexAtHead("", "ab"));
-}
-
-TEST(MatchRegexAtHeadTest, WorksWhenDollarIsInRegex) {
-  EXPECT_FALSE(MatchRegexAtHead("$", "a"));
-
-  EXPECT_TRUE(MatchRegexAtHead("$", ""));
-  EXPECT_TRUE(MatchRegexAtHead("a$", "a"));
-}
-
-TEST(MatchRegexAtHeadTest, WorksWhenRegexStartsWithEscapeSequence) {
-  EXPECT_FALSE(MatchRegexAtHead("\\w", "+"));
-  EXPECT_FALSE(MatchRegexAtHead("\\W", "ab"));
-
-  EXPECT_TRUE(MatchRegexAtHead("\\sa", "\nab"));
-  EXPECT_TRUE(MatchRegexAtHead("\\d", "1a"));
-}
-
-TEST(MatchRegexAtHeadTest, WorksWhenRegexStartsWithRepetition) {
-  EXPECT_FALSE(MatchRegexAtHead(".+a", "abc"));
-  EXPECT_FALSE(MatchRegexAtHead("a?b", "aab"));
-
-  EXPECT_TRUE(MatchRegexAtHead(".*a", "bc12-ab"));
-  EXPECT_TRUE(MatchRegexAtHead("a?b", "b"));
-  EXPECT_TRUE(MatchRegexAtHead("a?b", "ab"));
-}
-
-TEST(MatchRegexAtHeadTest, WorksWhenRegexStartsWithRepetionOfEscapeSequence) {
-  EXPECT_FALSE(MatchRegexAtHead("\\.+a", "abc"));
-  EXPECT_FALSE(MatchRegexAtHead("\\s?b", "  b"));
-
-  EXPECT_TRUE(MatchRegexAtHead("\\(*a", "((((ab"));
-  EXPECT_TRUE(MatchRegexAtHead("\\^?b", "^b"));
-  EXPECT_TRUE(MatchRegexAtHead("\\\\?b", "b"));
-  EXPECT_TRUE(MatchRegexAtHead("\\\\?b", "\\b"));
-}
-
-TEST(MatchRegexAtHeadTest, MatchesSequentially) {
-  EXPECT_FALSE(MatchRegexAtHead("ab.*c", "acabc"));
-
-  EXPECT_TRUE(MatchRegexAtHead("ab.*c", "ab-fsc"));
-}
-
-TEST(MatchRegexAnywhereTest, ReturnsFalseWhenStringIsNull) {
-  EXPECT_FALSE(MatchRegexAnywhere("", NULL));
-}
-
-TEST(MatchRegexAnywhereTest, WorksWhenRegexStartsWithCaret) {
-  EXPECT_FALSE(MatchRegexAnywhere("^a", "ba"));
-  EXPECT_FALSE(MatchRegexAnywhere("^$", "a"));
-
-  EXPECT_TRUE(MatchRegexAnywhere("^a", "ab"));
-  EXPECT_TRUE(MatchRegexAnywhere("^", "ab"));
-  EXPECT_TRUE(MatchRegexAnywhere("^$", ""));
-}
-
-TEST(MatchRegexAnywhereTest, ReturnsFalseWhenNoMatch) {
-  EXPECT_FALSE(MatchRegexAnywhere("a", "bcde123"));
-  EXPECT_FALSE(MatchRegexAnywhere("a.+a", "--aa88888888"));
-}
-
-TEST(MatchRegexAnywhereTest, ReturnsTrueWhenMatchingPrefix) {
-  EXPECT_TRUE(MatchRegexAnywhere("\\w+", "ab1_ - 5"));
-  EXPECT_TRUE(MatchRegexAnywhere(".*=", "="));
-  EXPECT_TRUE(MatchRegexAnywhere("x.*ab?.*bc", "xaaabc"));
-}
-
-TEST(MatchRegexAnywhereTest, ReturnsTrueWhenMatchingNonPrefix) {
-  EXPECT_TRUE(MatchRegexAnywhere("\\w+", "$$$ ab1_ - 5"));
-  EXPECT_TRUE(MatchRegexAnywhere("\\.+=", "=  ...="));
-}
-
-// Tests RE's implicit constructors.
-TEST(RETest, ImplicitConstructorWorks) {
-  const RE empty("");
-  EXPECT_STREQ("", empty.pattern());
-
-  const RE simple("hello");
-  EXPECT_STREQ("hello", simple.pattern());
-}
-
-// Tests that RE's constructors reject invalid regular expressions.
-TEST(RETest, RejectsInvalidRegex) {
-  EXPECT_NONFATAL_FAILURE({ const RE normal(NULL); },
-                          "NULL is not a valid simple regular expression");
-
-  EXPECT_NONFATAL_FAILURE({ const RE normal(".*(\\w+"); },
-                          "'(' is unsupported");
-
-  EXPECT_NONFATAL_FAILURE({ const RE invalid("^?"); },
-                          "'?' can only follow a repeatable token");
-}
-
-// Tests RE::FullMatch().
-TEST(RETest, FullMatchWorks) {
-  const RE empty("");
-  EXPECT_TRUE(RE::FullMatch("", empty));
-  EXPECT_FALSE(RE::FullMatch("a", empty));
-
-  const RE re1("a");
-  EXPECT_TRUE(RE::FullMatch("a", re1));
-
-  const RE re("a.*z");
-  EXPECT_TRUE(RE::FullMatch("az", re));
-  EXPECT_TRUE(RE::FullMatch("axyz", re));
-  EXPECT_FALSE(RE::FullMatch("baz", re));
-  EXPECT_FALSE(RE::FullMatch("azy", re));
-}
-
-// Tests RE::PartialMatch().
-TEST(RETest, PartialMatchWorks) {
-  const RE empty("");
-  EXPECT_TRUE(RE::PartialMatch("", empty));
-  EXPECT_TRUE(RE::PartialMatch("a", empty));
-
-  const RE re("a.*z");
-  EXPECT_TRUE(RE::PartialMatch("az", re));
-  EXPECT_TRUE(RE::PartialMatch("axyz", re));
-  EXPECT_TRUE(RE::PartialMatch("baz", re));
-  EXPECT_TRUE(RE::PartialMatch("azy", re));
-  EXPECT_FALSE(RE::PartialMatch("zza", re));
-}
-
-#endif  // GTEST_USES_POSIX_RE
-
-#if !GTEST_OS_WINDOWS_MOBILE
-
-TEST(CaptureTest, CapturesStdout) {
-  CaptureStdout();
-  fprintf(stdout, "abc");
-  EXPECT_STREQ("abc", GetCapturedStdout().c_str());
-
-  CaptureStdout();
-  fprintf(stdout, "def%cghi", '\0');
-  EXPECT_EQ(::std::string("def\0ghi", 7), ::std::string(GetCapturedStdout()));
-}
-
-TEST(CaptureTest, CapturesStderr) {
-  CaptureStderr();
-  fprintf(stderr, "jkl");
-  EXPECT_STREQ("jkl", GetCapturedStderr().c_str());
-
-  CaptureStderr();
-  fprintf(stderr, "jkl%cmno", '\0');
-  EXPECT_EQ(::std::string("jkl\0mno", 7), ::std::string(GetCapturedStderr()));
-}
-
-// Tests that stdout and stderr capture don't interfere with each other.
-TEST(CaptureTest, CapturesStdoutAndStderr) {
-  CaptureStdout();
-  CaptureStderr();
-  fprintf(stdout, "pqr");
-  fprintf(stderr, "stu");
-  EXPECT_STREQ("pqr", GetCapturedStdout().c_str());
-  EXPECT_STREQ("stu", GetCapturedStderr().c_str());
-}
-
-TEST(CaptureDeathTest, CannotReenterStdoutCapture) {
-  CaptureStdout();
-  EXPECT_DEATH_IF_SUPPORTED(CaptureStdout(),
-                            "Only one stdout capturer can exist at a time");
-  GetCapturedStdout();
-
-  // We cannot test stderr capturing using death tests as they use it
-  // themselves.
-}
-
-#endif  // !GTEST_OS_WINDOWS_MOBILE
-
-TEST(ThreadLocalTest, DefaultConstructorInitializesToDefaultValues) {
-  ThreadLocal<int> t1;
-  EXPECT_EQ(0, t1.get());
-
-  ThreadLocal<void*> t2;
-  EXPECT_TRUE(t2.get() == nullptr);
-}
-
-TEST(ThreadLocalTest, SingleParamConstructorInitializesToParam) {
-  ThreadLocal<int> t1(123);
-  EXPECT_EQ(123, t1.get());
-
-  int i = 0;
-  ThreadLocal<int*> t2(&i);
-  EXPECT_EQ(&i, t2.get());
-}
-
-class NoDefaultContructor {
- public:
-  explicit NoDefaultContructor(const char*) {}
-  NoDefaultContructor(const NoDefaultContructor&) {}
-};
-
-TEST(ThreadLocalTest, ValueDefaultContructorIsNotRequiredForParamVersion) {
-  ThreadLocal<NoDefaultContructor> bar(NoDefaultContructor("foo"));
-  bar.pointer();
-}
-
-TEST(ThreadLocalTest, GetAndPointerReturnSameValue) {
-  ThreadLocal<std::string> thread_local_string;
-
-  EXPECT_EQ(thread_local_string.pointer(), &(thread_local_string.get()));
-
-  // Verifies the condition still holds after calling set.
-  thread_local_string.set("foo");
-  EXPECT_EQ(thread_local_string.pointer(), &(thread_local_string.get()));
-}
-
-TEST(ThreadLocalTest, PointerAndConstPointerReturnSameValue) {
-  ThreadLocal<std::string> thread_local_string;
-  const ThreadLocal<std::string>& const_thread_local_string =
-      thread_local_string;
-
-  EXPECT_EQ(thread_local_string.pointer(), const_thread_local_string.pointer());
-
-  thread_local_string.set("foo");
-  EXPECT_EQ(thread_local_string.pointer(), const_thread_local_string.pointer());
-}
-
-#if GTEST_IS_THREADSAFE
-
-void AddTwo(int* param) { *param += 2; }
-
-TEST(ThreadWithParamTest, ConstructorExecutesThreadFunc) {
-  int i = 40;
-  ThreadWithParam<int*> thread(&AddTwo, &i, nullptr);
-  thread.Join();
-  EXPECT_EQ(42, i);
-}
-
-TEST(MutexDeathTest, AssertHeldShouldAssertWhenNotLocked) {
-  // AssertHeld() is flaky only in the presence of multiple threads accessing
-  // the lock. In this case, the test is robust.
-  EXPECT_DEATH_IF_SUPPORTED(
-      {
-        Mutex m;
-        { MutexLock lock(&m); }
-        m.AssertHeld();
-      },
-      "thread .*hold");
-}
-
-TEST(MutexTest, AssertHeldShouldNotAssertWhenLocked) {
-  Mutex m;
-  MutexLock lock(&m);
-  m.AssertHeld();
-}
-
-class AtomicCounterWithMutex {
- public:
-  explicit AtomicCounterWithMutex(Mutex* mutex)
-      : value_(0), mutex_(mutex), random_(42) {}
-
-  void Increment() {
-    MutexLock lock(mutex_);
-    int temp = value_;
-    {
-      // We need to put up a memory barrier to prevent reads and writes to
-      // value_ rearranged with the call to sleep_for when observed
-      // from other threads.
-#if GTEST_HAS_PTHREAD
-      // On POSIX, locking a mutex puts up a memory barrier.  We cannot use
-      // Mutex and MutexLock here or rely on their memory barrier
-      // functionality as we are testing them here.
-      pthread_mutex_t memory_barrier_mutex;
-      GTEST_CHECK_POSIX_SUCCESS_(
-          pthread_mutex_init(&memory_barrier_mutex, nullptr));
-      GTEST_CHECK_POSIX_SUCCESS_(pthread_mutex_lock(&memory_barrier_mutex));
-
-      std::this_thread::sleep_for(
-          std::chrono::milliseconds(random_.Generate(30)));
-
-      GTEST_CHECK_POSIX_SUCCESS_(pthread_mutex_unlock(&memory_barrier_mutex));
-      GTEST_CHECK_POSIX_SUCCESS_(pthread_mutex_destroy(&memory_barrier_mutex));
-#elif GTEST_OS_WINDOWS
-      // On Windows, performing an interlocked access puts up a memory barrier.
-      volatile LONG dummy = 0;
-      ::InterlockedIncrement(&dummy);
-      std::this_thread::sleep_for(
-          std::chrono::milliseconds(random_.Generate(30)));
-      ::InterlockedIncrement(&dummy);
-#else
-#error "Memory barrier not implemented on this platform."
-#endif  // GTEST_HAS_PTHREAD
-    }
-    value_ = temp + 1;
-  }
-  int value() const { return value_; }
-
- private:
-  volatile int value_;
-  Mutex* const mutex_;  // Protects value_.
-  Random random_;
-};
-
-void CountingThreadFunc(pair<AtomicCounterWithMutex*, int> param) {
-  for (int i = 0; i < param.second; ++i) param.first->Increment();
-}
-
-// Tests that the mutex only lets one thread at a time to lock it.
-TEST(MutexTest, OnlyOneThreadCanLockAtATime) {
-  Mutex mutex;
-  AtomicCounterWithMutex locked_counter(&mutex);
-
-  typedef ThreadWithParam<pair<AtomicCounterWithMutex*, int> > ThreadType;
-  const int kCycleCount = 20;
-  const int kThreadCount = 7;
-  std::unique_ptr<ThreadType> counting_threads[kThreadCount];
-  Notification threads_can_start;
-  // Creates and runs kThreadCount threads that increment locked_counter
-  // kCycleCount times each.
-  for (int i = 0; i < kThreadCount; ++i) {
-    counting_threads[i].reset(new ThreadType(
-        &CountingThreadFunc, make_pair(&locked_counter, kCycleCount),
-        &threads_can_start));
-  }
-  threads_can_start.Notify();
-  for (int i = 0; i < kThreadCount; ++i) counting_threads[i]->Join();
-
-  // If the mutex lets more than one thread to increment the counter at a
-  // time, they are likely to encounter a race condition and have some
-  // increments overwritten, resulting in the lower then expected counter
-  // value.
-  EXPECT_EQ(kCycleCount * kThreadCount, locked_counter.value());
-}
-
-template <typename T>
-void RunFromThread(void(func)(T), T param) {
-  ThreadWithParam<T> thread(func, param, nullptr);
-  thread.Join();
-}
-
-void RetrieveThreadLocalValue(
-    pair<ThreadLocal<std::string>*, std::string*> param) {
-  *param.second = param.first->get();
-}
-
-TEST(ThreadLocalTest, ParameterizedConstructorSetsDefault) {
-  ThreadLocal<std::string> thread_local_string("foo");
-  EXPECT_STREQ("foo", thread_local_string.get().c_str());
-
-  thread_local_string.set("bar");
-  EXPECT_STREQ("bar", thread_local_string.get().c_str());
-
-  std::string result;
-  RunFromThread(&RetrieveThreadLocalValue,
-                make_pair(&thread_local_string, &result));
-  EXPECT_STREQ("foo", result.c_str());
-}
-
-// Keeps track of whether of destructors being called on instances of
-// DestructorTracker.  On Windows, waits for the destructor call reports.
-class DestructorCall {
- public:
-  DestructorCall() {
-    invoked_ = false;
-#if GTEST_OS_WINDOWS
-    wait_event_.Reset(::CreateEvent(NULL, TRUE, FALSE, NULL));
-    GTEST_CHECK_(wait_event_.Get() != NULL);
-#endif
-  }
-
-  bool CheckDestroyed() const {
-#if GTEST_OS_WINDOWS
-    if (::WaitForSingleObject(wait_event_.Get(), 1000) != WAIT_OBJECT_0)
-      return false;
-#endif
-    return invoked_;
-  }
-
-  void ReportDestroyed() {
-    invoked_ = true;
-#if GTEST_OS_WINDOWS
-    ::SetEvent(wait_event_.Get());
-#endif
-  }
-
-  static std::vector<DestructorCall*>& List() { return *list_; }
-
-  static void ResetList() {
-    for (size_t i = 0; i < list_->size(); ++i) {
-      delete list_->at(i);
-    }
-    list_->clear();
-  }
-
- private:
-  bool invoked_;
-#if GTEST_OS_WINDOWS
-  AutoHandle wait_event_;
-#endif
-  static std::vector<DestructorCall*>* const list_;
-
-  DestructorCall(const DestructorCall&) = delete;
-  DestructorCall& operator=(const DestructorCall&) = delete;
-};
-
-std::vector<DestructorCall*>* const DestructorCall::list_ =
-    new std::vector<DestructorCall*>;
-
-// DestructorTracker keeps track of whether its instances have been
-// destroyed.
-class DestructorTracker {
- public:
-  DestructorTracker() : index_(GetNewIndex()) {}
-  DestructorTracker(const DestructorTracker& /* rhs */)
-      : index_(GetNewIndex()) {}
-  ~DestructorTracker() {
-    // We never access DestructorCall::List() concurrently, so we don't need
-    // to protect this access with a mutex.
-    DestructorCall::List()[index_]->ReportDestroyed();
-  }
-
- private:
-  static size_t GetNewIndex() {
-    DestructorCall::List().push_back(new DestructorCall);
-    return DestructorCall::List().size() - 1;
-  }
-  const size_t index_;
-};
-
-typedef ThreadLocal<DestructorTracker>* ThreadParam;
-
-void CallThreadLocalGet(ThreadParam thread_local_param) {
-  thread_local_param->get();
-}
-
-// Tests that when a ThreadLocal object dies in a thread, it destroys
-// the managed object for that thread.
-TEST(ThreadLocalTest, DestroysManagedObjectForOwnThreadWhenDying) {
-  DestructorCall::ResetList();
-
-  {
-    ThreadLocal<DestructorTracker> thread_local_tracker;
-    ASSERT_EQ(0U, DestructorCall::List().size());
-
-    // This creates another DestructorTracker object for the main thread.
-    thread_local_tracker.get();
-    ASSERT_EQ(1U, DestructorCall::List().size());
-    ASSERT_FALSE(DestructorCall::List()[0]->CheckDestroyed());
-  }
-
-  // Now thread_local_tracker has died.
-  ASSERT_EQ(1U, DestructorCall::List().size());
-  EXPECT_TRUE(DestructorCall::List()[0]->CheckDestroyed());
-
-  DestructorCall::ResetList();
-}
-
-// Tests that when a thread exits, the thread-local object for that
-// thread is destroyed.
-TEST(ThreadLocalTest, DestroysManagedObjectAtThreadExit) {
-  DestructorCall::ResetList();
-
-  {
-    ThreadLocal<DestructorTracker> thread_local_tracker;
-    ASSERT_EQ(0U, DestructorCall::List().size());
-
-    // This creates another DestructorTracker object in the new thread.
-    ThreadWithParam<ThreadParam> thread(&CallThreadLocalGet,
-                                        &thread_local_tracker, nullptr);
-    thread.Join();
-
-    // The thread has exited, and we should have a DestroyedTracker
-    // instance created for it. But it may not have been destroyed yet.
-    ASSERT_EQ(1U, DestructorCall::List().size());
-  }
-
-  // The thread has exited and thread_local_tracker has died.
-  ASSERT_EQ(1U, DestructorCall::List().size());
-  EXPECT_TRUE(DestructorCall::List()[0]->CheckDestroyed());
-
-  DestructorCall::ResetList();
-}
-
-TEST(ThreadLocalTest, ThreadLocalMutationsAffectOnlyCurrentThread) {
-  ThreadLocal<std::string> thread_local_string;
-  thread_local_string.set("Foo");
-  EXPECT_STREQ("Foo", thread_local_string.get().c_str());
-
-  std::string result;
-  RunFromThread(&RetrieveThreadLocalValue,
-                make_pair(&thread_local_string, &result));
-  EXPECT_TRUE(result.empty());
-}
-
-#endif  // GTEST_IS_THREADSAFE
-
-#if GTEST_OS_WINDOWS
-TEST(WindowsTypesTest, HANDLEIsVoidStar) {
-  StaticAssertTypeEq<HANDLE, void*>();
-}
-
-#if GTEST_OS_WINDOWS_MINGW && !defined(__MINGW64_VERSION_MAJOR)
-TEST(WindowsTypesTest, _CRITICAL_SECTIONIs_CRITICAL_SECTION) {
-  StaticAssertTypeEq<CRITICAL_SECTION, _CRITICAL_SECTION>();
-}
-#else
-TEST(WindowsTypesTest, CRITICAL_SECTIONIs_RTL_CRITICAL_SECTION) {
-  StaticAssertTypeEq<CRITICAL_SECTION, _RTL_CRITICAL_SECTION>();
-}
-#endif
-
-#endif  // GTEST_OS_WINDOWS
-
-}  // namespace internal
-}  // namespace testing
diff --git a/3rdparty/googletest-1.13.0/googletest/test/googletest-printers-test.cc b/3rdparty/googletest-1.13.0/googletest/test/googletest-printers-test.cc
deleted file mode 100644
index 8a7db25b0c8fdf273b696666fc8fe4c7bccec567..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/test/googletest-printers-test.cc
+++ /dev/null
@@ -1,1987 +0,0 @@
-// Copyright 2007, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// Google Test - The Google C++ Testing and Mocking Framework
-//
-// This file tests the universal value printer.
-
-#include <algorithm>
-#include <cctype>
-#include <cstdint>
-#include <cstring>
-#include <deque>
-#include <forward_list>
-#include <functional>
-#include <limits>
-#include <list>
-#include <map>
-#include <memory>
-#include <set>
-#include <sstream>
-#include <string>
-#include <unordered_map>
-#include <unordered_set>
-#include <utility>
-#include <vector>
-
-#include "gtest/gtest-printers.h"
-#include "gtest/gtest.h"
-
-// Some user-defined types for testing the universal value printer.
-
-// An anonymous enum type.
-enum AnonymousEnum { kAE1 = -1, kAE2 = 1 };
-
-// An enum without a user-defined printer.
-enum EnumWithoutPrinter { kEWP1 = -2, kEWP2 = 42 };
-
-// An enum with a << operator.
-enum EnumWithStreaming { kEWS1 = 10 };
-
-std::ostream& operator<<(std::ostream& os, EnumWithStreaming e) {
-  return os << (e == kEWS1 ? "kEWS1" : "invalid");
-}
-
-// An enum with a PrintTo() function.
-enum EnumWithPrintTo { kEWPT1 = 1 };
-
-void PrintTo(EnumWithPrintTo e, std::ostream* os) {
-  *os << (e == kEWPT1 ? "kEWPT1" : "invalid");
-}
-
-// A class implicitly convertible to BiggestInt.
-class BiggestIntConvertible {
- public:
-  operator ::testing::internal::BiggestInt() const { return 42; }
-};
-
-// A parent class with two child classes. The parent and one of the kids have
-// stream operators.
-class ParentClass {};
-class ChildClassWithStreamOperator : public ParentClass {};
-class ChildClassWithoutStreamOperator : public ParentClass {};
-static void operator<<(std::ostream& os, const ParentClass&) {
-  os << "ParentClass";
-}
-static void operator<<(std::ostream& os, const ChildClassWithStreamOperator&) {
-  os << "ChildClassWithStreamOperator";
-}
-
-// A user-defined unprintable class template in the global namespace.
-template <typename T>
-class UnprintableTemplateInGlobal {
- public:
-  UnprintableTemplateInGlobal() : value_() {}
-
- private:
-  T value_;
-};
-
-// A user-defined streamable type in the global namespace.
-class StreamableInGlobal {
- public:
-  virtual ~StreamableInGlobal() {}
-};
-
-inline void operator<<(::std::ostream& os, const StreamableInGlobal& /* x */) {
-  os << "StreamableInGlobal";
-}
-
-void operator<<(::std::ostream& os, const StreamableInGlobal* /* x */) {
-  os << "StreamableInGlobal*";
-}
-
-namespace foo {
-
-// A user-defined unprintable type in a user namespace.
-class UnprintableInFoo {
- public:
-  UnprintableInFoo() : z_(0) { memcpy(xy_, "\xEF\x12\x0\x0\x34\xAB\x0\x0", 8); }
-  double z() const { return z_; }
-
- private:
-  char xy_[8];
-  double z_;
-};
-
-// A user-defined printable type in a user-chosen namespace.
-struct PrintableViaPrintTo {
-  PrintableViaPrintTo() : value() {}
-  int value;
-};
-
-void PrintTo(const PrintableViaPrintTo& x, ::std::ostream* os) {
-  *os << "PrintableViaPrintTo: " << x.value;
-}
-
-// A type with a user-defined << for printing its pointer.
-struct PointerPrintable {};
-
-::std::ostream& operator<<(::std::ostream& os,
-                           const PointerPrintable* /* x */) {
-  return os << "PointerPrintable*";
-}
-
-// A user-defined printable class template in a user-chosen namespace.
-template <typename T>
-class PrintableViaPrintToTemplate {
- public:
-  explicit PrintableViaPrintToTemplate(const T& a_value) : value_(a_value) {}
-
-  const T& value() const { return value_; }
-
- private:
-  T value_;
-};
-
-template <typename T>
-void PrintTo(const PrintableViaPrintToTemplate<T>& x, ::std::ostream* os) {
-  *os << "PrintableViaPrintToTemplate: " << x.value();
-}
-
-// A user-defined streamable class template in a user namespace.
-template <typename T>
-class StreamableTemplateInFoo {
- public:
-  StreamableTemplateInFoo() : value_() {}
-
-  const T& value() const { return value_; }
-
- private:
-  T value_;
-};
-
-template <typename T>
-inline ::std::ostream& operator<<(::std::ostream& os,
-                                  const StreamableTemplateInFoo<T>& x) {
-  return os << "StreamableTemplateInFoo: " << x.value();
-}
-
-// A user-defined streamable type in a user namespace whose operator<< is
-// templated on the type of the output stream.
-struct TemplatedStreamableInFoo {};
-
-template <typename OutputStream>
-OutputStream& operator<<(OutputStream& os,
-                         const TemplatedStreamableInFoo& /*ts*/) {
-  os << "TemplatedStreamableInFoo";
-  return os;
-}
-
-struct StreamableInLocal {};
-void operator<<(::std::ostream& os, const StreamableInLocal& /* x */) {
-  os << "StreamableInLocal";
-}
-
-// A user-defined streamable but recursively-defined container type in
-// a user namespace, it mimics therefore std::filesystem::path or
-// boost::filesystem::path.
-class PathLike {
- public:
-  struct iterator {
-    typedef PathLike value_type;
-
-    iterator& operator++();
-    PathLike& operator*();
-  };
-
-  using value_type = char;
-  using const_iterator = iterator;
-
-  PathLike() {}
-
-  iterator begin() const { return iterator(); }
-  iterator end() const { return iterator(); }
-
-  friend ::std::ostream& operator<<(::std::ostream& os, const PathLike&) {
-    return os << "Streamable-PathLike";
-  }
-};
-
-}  // namespace foo
-
-namespace testing {
-namespace {
-template <typename T>
-class Wrapper {
- public:
-  explicit Wrapper(T&& value) : value_(std::forward<T>(value)) {}
-
-  const T& value() const { return value_; }
-
- private:
-  T value_;
-};
-
-}  // namespace
-
-namespace internal {
-template <typename T>
-class UniversalPrinter<Wrapper<T>> {
- public:
-  static void Print(const Wrapper<T>& w, ::std::ostream* os) {
-    *os << "Wrapper(";
-    UniversalPrint(w.value(), os);
-    *os << ')';
-  }
-};
-}  // namespace internal
-
-namespace gtest_printers_test {
-
-using ::std::deque;
-using ::std::list;
-using ::std::make_pair;
-using ::std::map;
-using ::std::multimap;
-using ::std::multiset;
-using ::std::pair;
-using ::std::set;
-using ::std::vector;
-using ::testing::PrintToString;
-using ::testing::internal::FormatForComparisonFailureMessage;
-using ::testing::internal::ImplicitCast_;
-using ::testing::internal::NativeArray;
-using ::testing::internal::RelationToSourceReference;
-using ::testing::internal::Strings;
-using ::testing::internal::UniversalPrint;
-using ::testing::internal::UniversalPrinter;
-using ::testing::internal::UniversalTersePrint;
-using ::testing::internal::UniversalTersePrintTupleFieldsToStrings;
-
-// Prints a value to a string using the universal value printer.  This
-// is a helper for testing UniversalPrinter<T>::Print() for various types.
-template <typename T>
-std::string Print(const T& value) {
-  ::std::stringstream ss;
-  UniversalPrinter<T>::Print(value, &ss);
-  return ss.str();
-}
-
-// Prints a value passed by reference to a string, using the universal
-// value printer.  This is a helper for testing
-// UniversalPrinter<T&>::Print() for various types.
-template <typename T>
-std::string PrintByRef(const T& value) {
-  ::std::stringstream ss;
-  UniversalPrinter<T&>::Print(value, &ss);
-  return ss.str();
-}
-
-// Tests printing various enum types.
-
-TEST(PrintEnumTest, AnonymousEnum) {
-  EXPECT_EQ("-1", Print(kAE1));
-  EXPECT_EQ("1", Print(kAE2));
-}
-
-TEST(PrintEnumTest, EnumWithoutPrinter) {
-  EXPECT_EQ("-2", Print(kEWP1));
-  EXPECT_EQ("42", Print(kEWP2));
-}
-
-TEST(PrintEnumTest, EnumWithStreaming) {
-  EXPECT_EQ("kEWS1", Print(kEWS1));
-  EXPECT_EQ("invalid", Print(static_cast<EnumWithStreaming>(0)));
-}
-
-TEST(PrintEnumTest, EnumWithPrintTo) {
-  EXPECT_EQ("kEWPT1", Print(kEWPT1));
-  EXPECT_EQ("invalid", Print(static_cast<EnumWithPrintTo>(0)));
-}
-
-// Tests printing a class implicitly convertible to BiggestInt.
-
-TEST(PrintClassTest, BiggestIntConvertible) {
-  EXPECT_EQ("42", Print(BiggestIntConvertible()));
-}
-
-// Tests printing various char types.
-
-// char.
-TEST(PrintCharTest, PlainChar) {
-  EXPECT_EQ("'\\0'", Print('\0'));
-  EXPECT_EQ("'\\'' (39, 0x27)", Print('\''));
-  EXPECT_EQ("'\"' (34, 0x22)", Print('"'));
-  EXPECT_EQ("'?' (63, 0x3F)", Print('?'));
-  EXPECT_EQ("'\\\\' (92, 0x5C)", Print('\\'));
-  EXPECT_EQ("'\\a' (7)", Print('\a'));
-  EXPECT_EQ("'\\b' (8)", Print('\b'));
-  EXPECT_EQ("'\\f' (12, 0xC)", Print('\f'));
-  EXPECT_EQ("'\\n' (10, 0xA)", Print('\n'));
-  EXPECT_EQ("'\\r' (13, 0xD)", Print('\r'));
-  EXPECT_EQ("'\\t' (9)", Print('\t'));
-  EXPECT_EQ("'\\v' (11, 0xB)", Print('\v'));
-  EXPECT_EQ("'\\x7F' (127)", Print('\x7F'));
-  EXPECT_EQ("'\\xFF' (255)", Print('\xFF'));
-  EXPECT_EQ("' ' (32, 0x20)", Print(' '));
-  EXPECT_EQ("'a' (97, 0x61)", Print('a'));
-}
-
-// signed char.
-TEST(PrintCharTest, SignedChar) {
-  EXPECT_EQ("'\\0'", Print(static_cast<signed char>('\0')));
-  EXPECT_EQ("'\\xCE' (-50)", Print(static_cast<signed char>(-50)));
-}
-
-// unsigned char.
-TEST(PrintCharTest, UnsignedChar) {
-  EXPECT_EQ("'\\0'", Print(static_cast<unsigned char>('\0')));
-  EXPECT_EQ("'b' (98, 0x62)", Print(static_cast<unsigned char>('b')));
-}
-
-TEST(PrintCharTest, Char16) { EXPECT_EQ("U+0041", Print(u'A')); }
-
-TEST(PrintCharTest, Char32) { EXPECT_EQ("U+0041", Print(U'A')); }
-
-#ifdef __cpp_char8_t
-TEST(PrintCharTest, Char8) { EXPECT_EQ("U+0041", Print(u8'A')); }
-#endif
-
-// Tests printing other simple, built-in types.
-
-// bool.
-TEST(PrintBuiltInTypeTest, Bool) {
-  EXPECT_EQ("false", Print(false));
-  EXPECT_EQ("true", Print(true));
-}
-
-// wchar_t.
-TEST(PrintBuiltInTypeTest, Wchar_t) {
-  EXPECT_EQ("L'\\0'", Print(L'\0'));
-  EXPECT_EQ("L'\\'' (39, 0x27)", Print(L'\''));
-  EXPECT_EQ("L'\"' (34, 0x22)", Print(L'"'));
-  EXPECT_EQ("L'?' (63, 0x3F)", Print(L'?'));
-  EXPECT_EQ("L'\\\\' (92, 0x5C)", Print(L'\\'));
-  EXPECT_EQ("L'\\a' (7)", Print(L'\a'));
-  EXPECT_EQ("L'\\b' (8)", Print(L'\b'));
-  EXPECT_EQ("L'\\f' (12, 0xC)", Print(L'\f'));
-  EXPECT_EQ("L'\\n' (10, 0xA)", Print(L'\n'));
-  EXPECT_EQ("L'\\r' (13, 0xD)", Print(L'\r'));
-  EXPECT_EQ("L'\\t' (9)", Print(L'\t'));
-  EXPECT_EQ("L'\\v' (11, 0xB)", Print(L'\v'));
-  EXPECT_EQ("L'\\x7F' (127)", Print(L'\x7F'));
-  EXPECT_EQ("L'\\xFF' (255)", Print(L'\xFF'));
-  EXPECT_EQ("L' ' (32, 0x20)", Print(L' '));
-  EXPECT_EQ("L'a' (97, 0x61)", Print(L'a'));
-  EXPECT_EQ("L'\\x576' (1398)", Print(static_cast<wchar_t>(0x576)));
-  EXPECT_EQ("L'\\xC74D' (51021)", Print(static_cast<wchar_t>(0xC74D)));
-}
-
-// Test that int64_t provides more storage than wchar_t.
-TEST(PrintTypeSizeTest, Wchar_t) {
-  EXPECT_LT(sizeof(wchar_t), sizeof(int64_t));
-}
-
-// Various integer types.
-TEST(PrintBuiltInTypeTest, Integer) {
-  EXPECT_EQ("'\\xFF' (255)", Print(static_cast<unsigned char>(255)));  // uint8
-  EXPECT_EQ("'\\x80' (-128)", Print(static_cast<signed char>(-128)));  // int8
-  EXPECT_EQ("65535", Print(std::numeric_limits<uint16_t>::max()));     // uint16
-  EXPECT_EQ("-32768", Print(std::numeric_limits<int16_t>::min()));     // int16
-  EXPECT_EQ("4294967295",
-            Print(std::numeric_limits<uint32_t>::max()));  // uint32
-  EXPECT_EQ("-2147483648",
-            Print(std::numeric_limits<int32_t>::min()));  // int32
-  EXPECT_EQ("18446744073709551615",
-            Print(std::numeric_limits<uint64_t>::max()));  // uint64
-  EXPECT_EQ("-9223372036854775808",
-            Print(std::numeric_limits<int64_t>::min()));  // int64
-#ifdef __cpp_char8_t
-  EXPECT_EQ("U+0000",
-            Print(std::numeric_limits<char8_t>::min()));  // char8_t
-  EXPECT_EQ("U+00FF",
-            Print(std::numeric_limits<char8_t>::max()));  // char8_t
-#endif
-  EXPECT_EQ("U+0000",
-            Print(std::numeric_limits<char16_t>::min()));  // char16_t
-  EXPECT_EQ("U+FFFF",
-            Print(std::numeric_limits<char16_t>::max()));  // char16_t
-  EXPECT_EQ("U+0000",
-            Print(std::numeric_limits<char32_t>::min()));  // char32_t
-  EXPECT_EQ("U+FFFFFFFF",
-            Print(std::numeric_limits<char32_t>::max()));  // char32_t
-}
-
-// Size types.
-TEST(PrintBuiltInTypeTest, Size_t) {
-  EXPECT_EQ("1", Print(sizeof('a')));  // size_t.
-#if !GTEST_OS_WINDOWS
-  // Windows has no ssize_t type.
-  EXPECT_EQ("-2", Print(static_cast<ssize_t>(-2)));  // ssize_t.
-#endif                                               // !GTEST_OS_WINDOWS
-}
-
-// gcc/clang __{u,}int128_t values.
-#if defined(__SIZEOF_INT128__)
-TEST(PrintBuiltInTypeTest, Int128) {
-  // Small ones
-  EXPECT_EQ("0", Print(__int128_t{0}));
-  EXPECT_EQ("0", Print(__uint128_t{0}));
-  EXPECT_EQ("12345", Print(__int128_t{12345}));
-  EXPECT_EQ("12345", Print(__uint128_t{12345}));
-  EXPECT_EQ("-12345", Print(__int128_t{-12345}));
-
-  // Large ones
-  EXPECT_EQ("340282366920938463463374607431768211455", Print(~__uint128_t{}));
-  __int128_t max_128 = static_cast<__int128_t>(~__uint128_t{} / 2);
-  EXPECT_EQ("-170141183460469231731687303715884105728", Print(~max_128));
-  EXPECT_EQ("170141183460469231731687303715884105727", Print(max_128));
-}
-#endif  // __SIZEOF_INT128__
-
-// Floating-points.
-TEST(PrintBuiltInTypeTest, FloatingPoints) {
-  // float (32-bit precision)
-  EXPECT_EQ("1.5", Print(1.5f));
-
-  EXPECT_EQ("1.0999999",  Print(1.09999990f));
-  EXPECT_EQ("1.1",        Print(1.10000002f));
-  EXPECT_EQ("1.10000014", Print(1.10000014f));
-  EXPECT_EQ("9e+09",      Print(9e9f));
-
-  // double
-  EXPECT_EQ("-2.5", Print(-2.5));  // double
-}
-
-#if GTEST_HAS_RTTI
-TEST(PrintBuiltInTypeTest, TypeInfo) {
-  struct MyStruct {};
-  auto res = Print(typeid(MyStruct{}));
-  // We can't guarantee that we can demangle the name, but either name should
-  // contain the substring "MyStruct".
-  EXPECT_NE(res.find("MyStruct"), res.npos) << res;
-}
-#endif  // GTEST_HAS_RTTI
-
-// Since ::std::stringstream::operator<<(const void *) formats the pointer
-// output differently with different compilers, we have to create the expected
-// output first and use it as our expectation.
-static std::string PrintPointer(const void* p) {
-  ::std::stringstream expected_result_stream;
-  expected_result_stream << p;
-  return expected_result_stream.str();
-}
-
-// Tests printing C strings.
-
-// const char*.
-TEST(PrintCStringTest, Const) {
-  const char* p = "World";
-  EXPECT_EQ(PrintPointer(p) + " pointing to \"World\"", Print(p));
-}
-
-// char*.
-TEST(PrintCStringTest, NonConst) {
-  char p[] = "Hi";
-  EXPECT_EQ(PrintPointer(p) + " pointing to \"Hi\"",
-            Print(static_cast<char*>(p)));
-}
-
-// NULL C string.
-TEST(PrintCStringTest, Null) {
-  const char* p = nullptr;
-  EXPECT_EQ("NULL", Print(p));
-}
-
-// Tests that C strings are escaped properly.
-TEST(PrintCStringTest, EscapesProperly) {
-  const char* p = "'\"?\\\a\b\f\n\r\t\v\x7F\xFF a";
-  EXPECT_EQ(PrintPointer(p) +
-                " pointing to \"'\\\"?\\\\\\a\\b\\f"
-                "\\n\\r\\t\\v\\x7F\\xFF a\"",
-            Print(p));
-}
-
-#ifdef __cpp_char8_t
-// const char8_t*.
-TEST(PrintU8StringTest, Const) {
-  const char8_t* p = u8"界";
-  EXPECT_EQ(PrintPointer(p) + " pointing to u8\"\\xE7\\x95\\x8C\"", Print(p));
-}
-
-// char8_t*.
-TEST(PrintU8StringTest, NonConst) {
-  char8_t p[] = u8"世";
-  EXPECT_EQ(PrintPointer(p) + " pointing to u8\"\\xE4\\xB8\\x96\"",
-            Print(static_cast<char8_t*>(p)));
-}
-
-// NULL u8 string.
-TEST(PrintU8StringTest, Null) {
-  const char8_t* p = nullptr;
-  EXPECT_EQ("NULL", Print(p));
-}
-
-// Tests that u8 strings are escaped properly.
-TEST(PrintU8StringTest, EscapesProperly) {
-  const char8_t* p = u8"'\"?\\\a\b\f\n\r\t\v\x7F\xFF hello 世界";
-  EXPECT_EQ(PrintPointer(p) +
-                " pointing to u8\"'\\\"?\\\\\\a\\b\\f\\n\\r\\t\\v\\x7F\\xFF "
-                "hello \\xE4\\xB8\\x96\\xE7\\x95\\x8C\"",
-            Print(p));
-}
-#endif
-
-// const char16_t*.
-TEST(PrintU16StringTest, Const) {
-  const char16_t* p = u"界";
-  EXPECT_EQ(PrintPointer(p) + " pointing to u\"\\x754C\"", Print(p));
-}
-
-// char16_t*.
-TEST(PrintU16StringTest, NonConst) {
-  char16_t p[] = u"世";
-  EXPECT_EQ(PrintPointer(p) + " pointing to u\"\\x4E16\"",
-            Print(static_cast<char16_t*>(p)));
-}
-
-// NULL u16 string.
-TEST(PrintU16StringTest, Null) {
-  const char16_t* p = nullptr;
-  EXPECT_EQ("NULL", Print(p));
-}
-
-// Tests that u16 strings are escaped properly.
-TEST(PrintU16StringTest, EscapesProperly) {
-  const char16_t* p = u"'\"?\\\a\b\f\n\r\t\v\x7F\xFF hello 世界";
-  EXPECT_EQ(PrintPointer(p) +
-                " pointing to u\"'\\\"?\\\\\\a\\b\\f\\n\\r\\t\\v\\x7F\\xFF "
-                "hello \\x4E16\\x754C\"",
-            Print(p));
-}
-
-// const char32_t*.
-TEST(PrintU32StringTest, Const) {
-  const char32_t* p = U"🗺️";
-  EXPECT_EQ(PrintPointer(p) + " pointing to U\"\\x1F5FA\\xFE0F\"", Print(p));
-}
-
-// char32_t*.
-TEST(PrintU32StringTest, NonConst) {
-  char32_t p[] = U"🌌";
-  EXPECT_EQ(PrintPointer(p) + " pointing to U\"\\x1F30C\"",
-            Print(static_cast<char32_t*>(p)));
-}
-
-// NULL u32 string.
-TEST(PrintU32StringTest, Null) {
-  const char32_t* p = nullptr;
-  EXPECT_EQ("NULL", Print(p));
-}
-
-// Tests that u32 strings are escaped properly.
-TEST(PrintU32StringTest, EscapesProperly) {
-  const char32_t* p = U"'\"?\\\a\b\f\n\r\t\v\x7F\xFF hello 🗺️";
-  EXPECT_EQ(PrintPointer(p) +
-                " pointing to U\"'\\\"?\\\\\\a\\b\\f\\n\\r\\t\\v\\x7F\\xFF "
-                "hello \\x1F5FA\\xFE0F\"",
-            Print(p));
-}
-
-// MSVC compiler can be configured to define whar_t as a typedef
-// of unsigned short. Defining an overload for const wchar_t* in that case
-// would cause pointers to unsigned shorts be printed as wide strings,
-// possibly accessing more memory than intended and causing invalid
-// memory accesses. MSVC defines _NATIVE_WCHAR_T_DEFINED symbol when
-// wchar_t is implemented as a native type.
-#if !defined(_MSC_VER) || defined(_NATIVE_WCHAR_T_DEFINED)
-
-// const wchar_t*.
-TEST(PrintWideCStringTest, Const) {
-  const wchar_t* p = L"World";
-  EXPECT_EQ(PrintPointer(p) + " pointing to L\"World\"", Print(p));
-}
-
-// wchar_t*.
-TEST(PrintWideCStringTest, NonConst) {
-  wchar_t p[] = L"Hi";
-  EXPECT_EQ(PrintPointer(p) + " pointing to L\"Hi\"",
-            Print(static_cast<wchar_t*>(p)));
-}
-
-// NULL wide C string.
-TEST(PrintWideCStringTest, Null) {
-  const wchar_t* p = nullptr;
-  EXPECT_EQ("NULL", Print(p));
-}
-
-// Tests that wide C strings are escaped properly.
-TEST(PrintWideCStringTest, EscapesProperly) {
-  const wchar_t s[] = {'\'',  '"',   '?',    '\\', '\a', '\b',
-                       '\f',  '\n',  '\r',   '\t', '\v', 0xD3,
-                       0x576, 0x8D3, 0xC74D, ' ',  'a',  '\0'};
-  EXPECT_EQ(PrintPointer(s) +
-                " pointing to L\"'\\\"?\\\\\\a\\b\\f"
-                "\\n\\r\\t\\v\\xD3\\x576\\x8D3\\xC74D a\"",
-            Print(static_cast<const wchar_t*>(s)));
-}
-#endif  // native wchar_t
-
-// Tests printing pointers to other char types.
-
-// signed char*.
-TEST(PrintCharPointerTest, SignedChar) {
-  signed char* p = reinterpret_cast<signed char*>(0x1234);
-  EXPECT_EQ(PrintPointer(p), Print(p));
-  p = nullptr;
-  EXPECT_EQ("NULL", Print(p));
-}
-
-// const signed char*.
-TEST(PrintCharPointerTest, ConstSignedChar) {
-  signed char* p = reinterpret_cast<signed char*>(0x1234);
-  EXPECT_EQ(PrintPointer(p), Print(p));
-  p = nullptr;
-  EXPECT_EQ("NULL", Print(p));
-}
-
-// unsigned char*.
-TEST(PrintCharPointerTest, UnsignedChar) {
-  unsigned char* p = reinterpret_cast<unsigned char*>(0x1234);
-  EXPECT_EQ(PrintPointer(p), Print(p));
-  p = nullptr;
-  EXPECT_EQ("NULL", Print(p));
-}
-
-// const unsigned char*.
-TEST(PrintCharPointerTest, ConstUnsignedChar) {
-  const unsigned char* p = reinterpret_cast<const unsigned char*>(0x1234);
-  EXPECT_EQ(PrintPointer(p), Print(p));
-  p = nullptr;
-  EXPECT_EQ("NULL", Print(p));
-}
-
-// Tests printing pointers to simple, built-in types.
-
-// bool*.
-TEST(PrintPointerToBuiltInTypeTest, Bool) {
-  bool* p = reinterpret_cast<bool*>(0xABCD);
-  EXPECT_EQ(PrintPointer(p), Print(p));
-  p = nullptr;
-  EXPECT_EQ("NULL", Print(p));
-}
-
-// void*.
-TEST(PrintPointerToBuiltInTypeTest, Void) {
-  void* p = reinterpret_cast<void*>(0xABCD);
-  EXPECT_EQ(PrintPointer(p), Print(p));
-  p = nullptr;
-  EXPECT_EQ("NULL", Print(p));
-}
-
-// const void*.
-TEST(PrintPointerToBuiltInTypeTest, ConstVoid) {
-  const void* p = reinterpret_cast<const void*>(0xABCD);
-  EXPECT_EQ(PrintPointer(p), Print(p));
-  p = nullptr;
-  EXPECT_EQ("NULL", Print(p));
-}
-
-// Tests printing pointers to pointers.
-TEST(PrintPointerToPointerTest, IntPointerPointer) {
-  int** p = reinterpret_cast<int**>(0xABCD);
-  EXPECT_EQ(PrintPointer(p), Print(p));
-  p = nullptr;
-  EXPECT_EQ("NULL", Print(p));
-}
-
-// Tests printing (non-member) function pointers.
-
-void MyFunction(int /* n */) {}
-
-TEST(PrintPointerTest, NonMemberFunctionPointer) {
-  // We cannot directly cast &MyFunction to const void* because the
-  // standard disallows casting between pointers to functions and
-  // pointers to objects, and some compilers (e.g. GCC 3.4) enforce
-  // this limitation.
-  EXPECT_EQ(PrintPointer(reinterpret_cast<const void*>(
-                reinterpret_cast<internal::BiggestInt>(&MyFunction))),
-            Print(&MyFunction));
-  int (*p)(bool) = NULL;  // NOLINT
-  EXPECT_EQ("NULL", Print(p));
-}
-
-// An assertion predicate determining whether a one string is a prefix for
-// another.
-template <typename StringType>
-AssertionResult HasPrefix(const StringType& str, const StringType& prefix) {
-  if (str.find(prefix, 0) == 0) return AssertionSuccess();
-
-  const bool is_wide_string = sizeof(prefix[0]) > 1;
-  const char* const begin_string_quote = is_wide_string ? "L\"" : "\"";
-  return AssertionFailure()
-         << begin_string_quote << prefix << "\" is not a prefix of "
-         << begin_string_quote << str << "\"\n";
-}
-
-// Tests printing member variable pointers.  Although they are called
-// pointers, they don't point to a location in the address space.
-// Their representation is implementation-defined.  Thus they will be
-// printed as raw bytes.
-
-struct Foo {
- public:
-  virtual ~Foo() {}
-  int MyMethod(char x) { return x + 1; }
-  virtual char MyVirtualMethod(int /* n */) { return 'a'; }
-
-  int value;
-};
-
-TEST(PrintPointerTest, MemberVariablePointer) {
-  EXPECT_TRUE(HasPrefix(Print(&Foo::value),
-                        Print(sizeof(&Foo::value)) + "-byte object "));
-  int Foo::*p = NULL;  // NOLINT
-  EXPECT_TRUE(HasPrefix(Print(p), Print(sizeof(p)) + "-byte object "));
-}
-
-// Tests printing member function pointers.  Although they are called
-// pointers, they don't point to a location in the address space.
-// Their representation is implementation-defined.  Thus they will be
-// printed as raw bytes.
-TEST(PrintPointerTest, MemberFunctionPointer) {
-  EXPECT_TRUE(HasPrefix(Print(&Foo::MyMethod),
-                        Print(sizeof(&Foo::MyMethod)) + "-byte object "));
-  EXPECT_TRUE(
-      HasPrefix(Print(&Foo::MyVirtualMethod),
-                Print(sizeof((&Foo::MyVirtualMethod))) + "-byte object "));
-  int (Foo::*p)(char) = NULL;  // NOLINT
-  EXPECT_TRUE(HasPrefix(Print(p), Print(sizeof(p)) + "-byte object "));
-}
-
-// Tests printing C arrays.
-
-// The difference between this and Print() is that it ensures that the
-// argument is a reference to an array.
-template <typename T, size_t N>
-std::string PrintArrayHelper(T (&a)[N]) {
-  return Print(a);
-}
-
-// One-dimensional array.
-TEST(PrintArrayTest, OneDimensionalArray) {
-  int a[5] = {1, 2, 3, 4, 5};
-  EXPECT_EQ("{ 1, 2, 3, 4, 5 }", PrintArrayHelper(a));
-}
-
-// Two-dimensional array.
-TEST(PrintArrayTest, TwoDimensionalArray) {
-  int a[2][5] = {{1, 2, 3, 4, 5}, {6, 7, 8, 9, 0}};
-  EXPECT_EQ("{ { 1, 2, 3, 4, 5 }, { 6, 7, 8, 9, 0 } }", PrintArrayHelper(a));
-}
-
-// Array of const elements.
-TEST(PrintArrayTest, ConstArray) {
-  const bool a[1] = {false};
-  EXPECT_EQ("{ false }", PrintArrayHelper(a));
-}
-
-// char array without terminating NUL.
-TEST(PrintArrayTest, CharArrayWithNoTerminatingNul) {
-  // Array a contains '\0' in the middle and doesn't end with '\0'.
-  char a[] = {'H', '\0', 'i'};
-  EXPECT_EQ("\"H\\0i\" (no terminating NUL)", PrintArrayHelper(a));
-}
-
-// char array with terminating NUL.
-TEST(PrintArrayTest, CharArrayWithTerminatingNul) {
-  const char a[] = "\0Hi";
-  EXPECT_EQ("\"\\0Hi\"", PrintArrayHelper(a));
-}
-
-#ifdef __cpp_char8_t
-// char_t array without terminating NUL.
-TEST(PrintArrayTest, Char8ArrayWithNoTerminatingNul) {
-  // Array a contains '\0' in the middle and doesn't end with '\0'.
-  const char8_t a[] = {u8'H', u8'\0', u8'i'};
-  EXPECT_EQ("u8\"H\\0i\" (no terminating NUL)", PrintArrayHelper(a));
-}
-
-// char8_t array with terminating NUL.
-TEST(PrintArrayTest, Char8ArrayWithTerminatingNul) {
-  const char8_t a[] = u8"\0世界";
-  EXPECT_EQ("u8\"\\0\\xE4\\xB8\\x96\\xE7\\x95\\x8C\"", PrintArrayHelper(a));
-}
-#endif
-
-// const char16_t array without terminating NUL.
-TEST(PrintArrayTest, Char16ArrayWithNoTerminatingNul) {
-  // Array a contains '\0' in the middle and doesn't end with '\0'.
-  const char16_t a[] = {u'こ', u'\0', u'ん', u'に', u'ち', u'は'};
-  EXPECT_EQ("u\"\\x3053\\0\\x3093\\x306B\\x3061\\x306F\" (no terminating NUL)",
-            PrintArrayHelper(a));
-}
-
-// char16_t array with terminating NUL.
-TEST(PrintArrayTest, Char16ArrayWithTerminatingNul) {
-  const char16_t a[] = u"\0こんにちは";
-  EXPECT_EQ("u\"\\0\\x3053\\x3093\\x306B\\x3061\\x306F\"", PrintArrayHelper(a));
-}
-
-// char32_t array without terminating NUL.
-TEST(PrintArrayTest, Char32ArrayWithNoTerminatingNul) {
-  // Array a contains '\0' in the middle and doesn't end with '\0'.
-  const char32_t a[] = {U'👋', U'\0', U'🌌'};
-  EXPECT_EQ("U\"\\x1F44B\\0\\x1F30C\" (no terminating NUL)",
-            PrintArrayHelper(a));
-}
-
-// char32_t array with terminating NUL.
-TEST(PrintArrayTest, Char32ArrayWithTerminatingNul) {
-  const char32_t a[] = U"\0👋🌌";
-  EXPECT_EQ("U\"\\0\\x1F44B\\x1F30C\"", PrintArrayHelper(a));
-}
-
-// wchar_t array without terminating NUL.
-TEST(PrintArrayTest, WCharArrayWithNoTerminatingNul) {
-  // Array a contains '\0' in the middle and doesn't end with '\0'.
-  const wchar_t a[] = {L'H', L'\0', L'i'};
-  EXPECT_EQ("L\"H\\0i\" (no terminating NUL)", PrintArrayHelper(a));
-}
-
-// wchar_t array with terminating NUL.
-TEST(PrintArrayTest, WCharArrayWithTerminatingNul) {
-  const wchar_t a[] = L"\0Hi";
-  EXPECT_EQ("L\"\\0Hi\"", PrintArrayHelper(a));
-}
-
-// Array of objects.
-TEST(PrintArrayTest, ObjectArray) {
-  std::string a[3] = {"Hi", "Hello", "Ni hao"};
-  EXPECT_EQ("{ \"Hi\", \"Hello\", \"Ni hao\" }", PrintArrayHelper(a));
-}
-
-// Array with many elements.
-TEST(PrintArrayTest, BigArray) {
-  int a[100] = {1, 2, 3};
-  EXPECT_EQ("{ 1, 2, 3, 0, 0, 0, 0, 0, ..., 0, 0, 0, 0, 0, 0, 0, 0 }",
-            PrintArrayHelper(a));
-}
-
-// Tests printing ::string and ::std::string.
-
-// ::std::string.
-TEST(PrintStringTest, StringInStdNamespace) {
-  const char s[] = "'\"?\\\a\b\f\n\0\r\t\v\x7F\xFF a";
-  const ::std::string str(s, sizeof(s));
-  EXPECT_EQ("\"'\\\"?\\\\\\a\\b\\f\\n\\0\\r\\t\\v\\x7F\\xFF a\\0\"",
-            Print(str));
-}
-
-TEST(PrintStringTest, StringAmbiguousHex) {
-  // "\x6BANANA" is ambiguous, it can be interpreted as starting with either of:
-  // '\x6', '\x6B', or '\x6BA'.
-
-  // a hex escaping sequence following by a decimal digit
-  EXPECT_EQ("\"0\\x12\" \"3\"", Print(::std::string("0\x12"
-                                                    "3")));
-  // a hex escaping sequence following by a hex digit (lower-case)
-  EXPECT_EQ("\"mm\\x6\" \"bananas\"", Print(::std::string("mm\x6"
-                                                          "bananas")));
-  // a hex escaping sequence following by a hex digit (upper-case)
-  EXPECT_EQ("\"NOM\\x6\" \"BANANA\"", Print(::std::string("NOM\x6"
-                                                          "BANANA")));
-  // a hex escaping sequence following by a non-xdigit
-  EXPECT_EQ("\"!\\x5-!\"", Print(::std::string("!\x5-!")));
-}
-
-// Tests printing ::std::wstring.
-#if GTEST_HAS_STD_WSTRING
-// ::std::wstring.
-TEST(PrintWideStringTest, StringInStdNamespace) {
-  const wchar_t s[] = L"'\"?\\\a\b\f\n\0\r\t\v\xD3\x576\x8D3\xC74D a";
-  const ::std::wstring str(s, sizeof(s) / sizeof(wchar_t));
-  EXPECT_EQ(
-      "L\"'\\\"?\\\\\\a\\b\\f\\n\\0\\r\\t\\v"
-      "\\xD3\\x576\\x8D3\\xC74D a\\0\"",
-      Print(str));
-}
-
-TEST(PrintWideStringTest, StringAmbiguousHex) {
-  // same for wide strings.
-  EXPECT_EQ("L\"0\\x12\" L\"3\"", Print(::std::wstring(L"0\x12"
-                                                       L"3")));
-  EXPECT_EQ("L\"mm\\x6\" L\"bananas\"", Print(::std::wstring(L"mm\x6"
-                                                             L"bananas")));
-  EXPECT_EQ("L\"NOM\\x6\" L\"BANANA\"", Print(::std::wstring(L"NOM\x6"
-                                                             L"BANANA")));
-  EXPECT_EQ("L\"!\\x5-!\"", Print(::std::wstring(L"!\x5-!")));
-}
-#endif  // GTEST_HAS_STD_WSTRING
-
-#ifdef __cpp_char8_t
-TEST(PrintStringTest, U8String) {
-  std::u8string str = u8"Hello, 世界";
-  EXPECT_EQ(str, str);  // Verify EXPECT_EQ compiles with this type.
-  EXPECT_EQ("u8\"Hello, \\xE4\\xB8\\x96\\xE7\\x95\\x8C\"", Print(str));
-}
-#endif
-
-TEST(PrintStringTest, U16String) {
-  std::u16string str = u"Hello, 世界";
-  EXPECT_EQ(str, str);  // Verify EXPECT_EQ compiles with this type.
-  EXPECT_EQ("u\"Hello, \\x4E16\\x754C\"", Print(str));
-}
-
-TEST(PrintStringTest, U32String) {
-  std::u32string str = U"Hello, 🗺️";
-  EXPECT_EQ(str, str);  // Verify EXPECT_EQ compiles with this type
-  EXPECT_EQ("U\"Hello, \\x1F5FA\\xFE0F\"", Print(str));
-}
-
-// Tests printing types that support generic streaming (i.e. streaming
-// to std::basic_ostream<Char, CharTraits> for any valid Char and
-// CharTraits types).
-
-// Tests printing a non-template type that supports generic streaming.
-
-class AllowsGenericStreaming {};
-
-template <typename Char, typename CharTraits>
-std::basic_ostream<Char, CharTraits>& operator<<(
-    std::basic_ostream<Char, CharTraits>& os,
-    const AllowsGenericStreaming& /* a */) {
-  return os << "AllowsGenericStreaming";
-}
-
-TEST(PrintTypeWithGenericStreamingTest, NonTemplateType) {
-  AllowsGenericStreaming a;
-  EXPECT_EQ("AllowsGenericStreaming", Print(a));
-}
-
-// Tests printing a template type that supports generic streaming.
-
-template <typename T>
-class AllowsGenericStreamingTemplate {};
-
-template <typename Char, typename CharTraits, typename T>
-std::basic_ostream<Char, CharTraits>& operator<<(
-    std::basic_ostream<Char, CharTraits>& os,
-    const AllowsGenericStreamingTemplate<T>& /* a */) {
-  return os << "AllowsGenericStreamingTemplate";
-}
-
-TEST(PrintTypeWithGenericStreamingTest, TemplateType) {
-  AllowsGenericStreamingTemplate<int> a;
-  EXPECT_EQ("AllowsGenericStreamingTemplate", Print(a));
-}
-
-// Tests printing a type that supports generic streaming and can be
-// implicitly converted to another printable type.
-
-template <typename T>
-class AllowsGenericStreamingAndImplicitConversionTemplate {
- public:
-  operator bool() const { return false; }
-};
-
-template <typename Char, typename CharTraits, typename T>
-std::basic_ostream<Char, CharTraits>& operator<<(
-    std::basic_ostream<Char, CharTraits>& os,
-    const AllowsGenericStreamingAndImplicitConversionTemplate<T>& /* a */) {
-  return os << "AllowsGenericStreamingAndImplicitConversionTemplate";
-}
-
-TEST(PrintTypeWithGenericStreamingTest, TypeImplicitlyConvertible) {
-  AllowsGenericStreamingAndImplicitConversionTemplate<int> a;
-  EXPECT_EQ("AllowsGenericStreamingAndImplicitConversionTemplate", Print(a));
-}
-
-#if GTEST_INTERNAL_HAS_STRING_VIEW
-
-// Tests printing internal::StringView.
-
-TEST(PrintStringViewTest, SimpleStringView) {
-  const internal::StringView sp = "Hello";
-  EXPECT_EQ("\"Hello\"", Print(sp));
-}
-
-TEST(PrintStringViewTest, UnprintableCharacters) {
-  const char str[] = "NUL (\0) and \r\t";
-  const internal::StringView sp(str, sizeof(str) - 1);
-  EXPECT_EQ("\"NUL (\\0) and \\r\\t\"", Print(sp));
-}
-
-#endif  // GTEST_INTERNAL_HAS_STRING_VIEW
-
-// Tests printing STL containers.
-
-TEST(PrintStlContainerTest, EmptyDeque) {
-  deque<char> empty;
-  EXPECT_EQ("{}", Print(empty));
-}
-
-TEST(PrintStlContainerTest, NonEmptyDeque) {
-  deque<int> non_empty;
-  non_empty.push_back(1);
-  non_empty.push_back(3);
-  EXPECT_EQ("{ 1, 3 }", Print(non_empty));
-}
-
-TEST(PrintStlContainerTest, OneElementHashMap) {
-  ::std::unordered_map<int, char> map1;
-  map1[1] = 'a';
-  EXPECT_EQ("{ (1, 'a' (97, 0x61)) }", Print(map1));
-}
-
-TEST(PrintStlContainerTest, HashMultiMap) {
-  ::std::unordered_multimap<int, bool> map1;
-  map1.insert(make_pair(5, true));
-  map1.insert(make_pair(5, false));
-
-  // Elements of hash_multimap can be printed in any order.
-  const std::string result = Print(map1);
-  EXPECT_TRUE(result == "{ (5, true), (5, false) }" ||
-              result == "{ (5, false), (5, true) }")
-      << " where Print(map1) returns \"" << result << "\".";
-}
-
-TEST(PrintStlContainerTest, HashSet) {
-  ::std::unordered_set<int> set1;
-  set1.insert(1);
-  EXPECT_EQ("{ 1 }", Print(set1));
-}
-
-TEST(PrintStlContainerTest, HashMultiSet) {
-  const int kSize = 5;
-  int a[kSize] = {1, 1, 2, 5, 1};
-  ::std::unordered_multiset<int> set1(a, a + kSize);
-
-  // Elements of hash_multiset can be printed in any order.
-  const std::string result = Print(set1);
-  const std::string expected_pattern = "{ d, d, d, d, d }";  // d means a digit.
-
-  // Verifies the result matches the expected pattern; also extracts
-  // the numbers in the result.
-  ASSERT_EQ(expected_pattern.length(), result.length());
-  std::vector<int> numbers;
-  for (size_t i = 0; i != result.length(); i++) {
-    if (expected_pattern[i] == 'd') {
-      ASSERT_NE(isdigit(static_cast<unsigned char>(result[i])), 0);
-      numbers.push_back(result[i] - '0');
-    } else {
-      EXPECT_EQ(expected_pattern[i], result[i])
-          << " where result is " << result;
-    }
-  }
-
-  // Makes sure the result contains the right numbers.
-  std::sort(numbers.begin(), numbers.end());
-  std::sort(a, a + kSize);
-  EXPECT_TRUE(std::equal(a, a + kSize, numbers.begin()));
-}
-
-TEST(PrintStlContainerTest, List) {
-  const std::string a[] = {"hello", "world"};
-  const list<std::string> strings(a, a + 2);
-  EXPECT_EQ("{ \"hello\", \"world\" }", Print(strings));
-}
-
-TEST(PrintStlContainerTest, Map) {
-  map<int, bool> map1;
-  map1[1] = true;
-  map1[5] = false;
-  map1[3] = true;
-  EXPECT_EQ("{ (1, true), (3, true), (5, false) }", Print(map1));
-}
-
-TEST(PrintStlContainerTest, MultiMap) {
-  multimap<bool, int> map1;
-  // The make_pair template function would deduce the type as
-  // pair<bool, int> here, and since the key part in a multimap has to
-  // be constant, without a templated ctor in the pair class (as in
-  // libCstd on Solaris), make_pair call would fail to compile as no
-  // implicit conversion is found.  Thus explicit typename is used
-  // here instead.
-  map1.insert(pair<const bool, int>(true, 0));
-  map1.insert(pair<const bool, int>(true, 1));
-  map1.insert(pair<const bool, int>(false, 2));
-  EXPECT_EQ("{ (false, 2), (true, 0), (true, 1) }", Print(map1));
-}
-
-TEST(PrintStlContainerTest, Set) {
-  const unsigned int a[] = {3, 0, 5};
-  set<unsigned int> set1(a, a + 3);
-  EXPECT_EQ("{ 0, 3, 5 }", Print(set1));
-}
-
-TEST(PrintStlContainerTest, MultiSet) {
-  const int a[] = {1, 1, 2, 5, 1};
-  multiset<int> set1(a, a + 5);
-  EXPECT_EQ("{ 1, 1, 1, 2, 5 }", Print(set1));
-}
-
-TEST(PrintStlContainerTest, SinglyLinkedList) {
-  int a[] = {9, 2, 8};
-  const std::forward_list<int> ints(a, a + 3);
-  EXPECT_EQ("{ 9, 2, 8 }", Print(ints));
-}
-
-TEST(PrintStlContainerTest, Pair) {
-  pair<const bool, int> p(true, 5);
-  EXPECT_EQ("(true, 5)", Print(p));
-}
-
-TEST(PrintStlContainerTest, Vector) {
-  vector<int> v;
-  v.push_back(1);
-  v.push_back(2);
-  EXPECT_EQ("{ 1, 2 }", Print(v));
-}
-
-TEST(PrintStlContainerTest, LongSequence) {
-  const int a[100] = {1, 2, 3};
-  const vector<int> v(a, a + 100);
-  EXPECT_EQ(
-      "{ 1, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, "
-      "0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... }",
-      Print(v));
-}
-
-TEST(PrintStlContainerTest, NestedContainer) {
-  const int a1[] = {1, 2};
-  const int a2[] = {3, 4, 5};
-  const list<int> l1(a1, a1 + 2);
-  const list<int> l2(a2, a2 + 3);
-
-  vector<list<int>> v;
-  v.push_back(l1);
-  v.push_back(l2);
-  EXPECT_EQ("{ { 1, 2 }, { 3, 4, 5 } }", Print(v));
-}
-
-TEST(PrintStlContainerTest, OneDimensionalNativeArray) {
-  const int a[3] = {1, 2, 3};
-  NativeArray<int> b(a, 3, RelationToSourceReference());
-  EXPECT_EQ("{ 1, 2, 3 }", Print(b));
-}
-
-TEST(PrintStlContainerTest, TwoDimensionalNativeArray) {
-  const int a[2][3] = {{1, 2, 3}, {4, 5, 6}};
-  NativeArray<int[3]> b(a, 2, RelationToSourceReference());
-  EXPECT_EQ("{ { 1, 2, 3 }, { 4, 5, 6 } }", Print(b));
-}
-
-// Tests that a class named iterator isn't treated as a container.
-
-struct iterator {
-  char x;
-};
-
-TEST(PrintStlContainerTest, Iterator) {
-  iterator it = {};
-  EXPECT_EQ("1-byte object <00>", Print(it));
-}
-
-// Tests that a class named const_iterator isn't treated as a container.
-
-struct const_iterator {
-  char x;
-};
-
-TEST(PrintStlContainerTest, ConstIterator) {
-  const_iterator it = {};
-  EXPECT_EQ("1-byte object <00>", Print(it));
-}
-
-// Tests printing ::std::tuples.
-
-// Tuples of various arities.
-TEST(PrintStdTupleTest, VariousSizes) {
-  ::std::tuple<> t0;
-  EXPECT_EQ("()", Print(t0));
-
-  ::std::tuple<int> t1(5);
-  EXPECT_EQ("(5)", Print(t1));
-
-  ::std::tuple<char, bool> t2('a', true);
-  EXPECT_EQ("('a' (97, 0x61), true)", Print(t2));
-
-  ::std::tuple<bool, int, int> t3(false, 2, 3);
-  EXPECT_EQ("(false, 2, 3)", Print(t3));
-
-  ::std::tuple<bool, int, int, int> t4(false, 2, 3, 4);
-  EXPECT_EQ("(false, 2, 3, 4)", Print(t4));
-
-  const char* const str = "8";
-  ::std::tuple<bool, char, short, int32_t, int64_t, float, double,  // NOLINT
-               const char*, void*, std::string>
-      t10(false, 'a', static_cast<short>(3), 4, 5, 1.5F, -2.5, str,  // NOLINT
-          nullptr, "10");
-  EXPECT_EQ("(false, 'a' (97, 0x61), 3, 4, 5, 1.5, -2.5, " + PrintPointer(str) +
-                " pointing to \"8\", NULL, \"10\")",
-            Print(t10));
-}
-
-// Nested tuples.
-TEST(PrintStdTupleTest, NestedTuple) {
-  ::std::tuple<::std::tuple<int, bool>, char> nested(::std::make_tuple(5, true),
-                                                     'a');
-  EXPECT_EQ("((5, true), 'a' (97, 0x61))", Print(nested));
-}
-
-TEST(PrintNullptrT, Basic) { EXPECT_EQ("(nullptr)", Print(nullptr)); }
-
-TEST(PrintReferenceWrapper, Printable) {
-  int x = 5;
-  EXPECT_EQ("@" + PrintPointer(&x) + " 5", Print(std::ref(x)));
-  EXPECT_EQ("@" + PrintPointer(&x) + " 5", Print(std::cref(x)));
-}
-
-TEST(PrintReferenceWrapper, Unprintable) {
-  ::foo::UnprintableInFoo up;
-  EXPECT_EQ(
-      "@" + PrintPointer(&up) +
-          " 16-byte object <EF-12 00-00 34-AB 00-00 00-00 00-00 00-00 00-00>",
-      Print(std::ref(up)));
-  EXPECT_EQ(
-      "@" + PrintPointer(&up) +
-          " 16-byte object <EF-12 00-00 34-AB 00-00 00-00 00-00 00-00 00-00>",
-      Print(std::cref(up)));
-}
-
-// Tests printing user-defined unprintable types.
-
-// Unprintable types in the global namespace.
-TEST(PrintUnprintableTypeTest, InGlobalNamespace) {
-  EXPECT_EQ("1-byte object <00>", Print(UnprintableTemplateInGlobal<char>()));
-}
-
-// Unprintable types in a user namespace.
-TEST(PrintUnprintableTypeTest, InUserNamespace) {
-  EXPECT_EQ("16-byte object <EF-12 00-00 34-AB 00-00 00-00 00-00 00-00 00-00>",
-            Print(::foo::UnprintableInFoo()));
-}
-
-// Unprintable types are that too big to be printed completely.
-
-struct Big {
-  Big() { memset(array, 0, sizeof(array)); }
-  char array[257];
-};
-
-TEST(PrintUnpritableTypeTest, BigObject) {
-  EXPECT_EQ(
-      "257-byte object <00-00 00-00 00-00 00-00 00-00 00-00 "
-      "00-00 00-00 00-00 00-00 00-00 00-00 00-00 00-00 00-00 00-00 "
-      "00-00 00-00 00-00 00-00 00-00 00-00 00-00 00-00 00-00 00-00 "
-      "00-00 00-00 00-00 00-00 00-00 00-00 ... 00-00 00-00 00-00 "
-      "00-00 00-00 00-00 00-00 00-00 00-00 00-00 00-00 00-00 00-00 "
-      "00-00 00-00 00-00 00-00 00-00 00-00 00-00 00-00 00-00 00-00 "
-      "00-00 00-00 00-00 00-00 00-00 00-00 00-00 00-00 00>",
-      Print(Big()));
-}
-
-// Tests printing user-defined streamable types.
-
-// Streamable types in the global namespace.
-TEST(PrintStreamableTypeTest, InGlobalNamespace) {
-  StreamableInGlobal x;
-  EXPECT_EQ("StreamableInGlobal", Print(x));
-  EXPECT_EQ("StreamableInGlobal*", Print(&x));
-}
-
-// Printable template types in a user namespace.
-TEST(PrintStreamableTypeTest, TemplateTypeInUserNamespace) {
-  EXPECT_EQ("StreamableTemplateInFoo: 0",
-            Print(::foo::StreamableTemplateInFoo<int>()));
-}
-
-TEST(PrintStreamableTypeTest, TypeInUserNamespaceWithTemplatedStreamOperator) {
-  EXPECT_EQ("TemplatedStreamableInFoo",
-            Print(::foo::TemplatedStreamableInFoo()));
-}
-
-TEST(PrintStreamableTypeTest, SubclassUsesSuperclassStreamOperator) {
-  ParentClass parent;
-  ChildClassWithStreamOperator child_stream;
-  ChildClassWithoutStreamOperator child_no_stream;
-  EXPECT_EQ("ParentClass", Print(parent));
-  EXPECT_EQ("ChildClassWithStreamOperator", Print(child_stream));
-  EXPECT_EQ("ParentClass", Print(child_no_stream));
-}
-
-// Tests printing a user-defined recursive container type that has a <<
-// operator.
-TEST(PrintStreamableTypeTest, PathLikeInUserNamespace) {
-  ::foo::PathLike x;
-  EXPECT_EQ("Streamable-PathLike", Print(x));
-  const ::foo::PathLike cx;
-  EXPECT_EQ("Streamable-PathLike", Print(cx));
-}
-
-// Tests printing user-defined types that have a PrintTo() function.
-TEST(PrintPrintableTypeTest, InUserNamespace) {
-  EXPECT_EQ("PrintableViaPrintTo: 0", Print(::foo::PrintableViaPrintTo()));
-}
-
-// Tests printing a pointer to a user-defined type that has a <<
-// operator for its pointer.
-TEST(PrintPrintableTypeTest, PointerInUserNamespace) {
-  ::foo::PointerPrintable x;
-  EXPECT_EQ("PointerPrintable*", Print(&x));
-}
-
-// Tests printing user-defined class template that have a PrintTo() function.
-TEST(PrintPrintableTypeTest, TemplateInUserNamespace) {
-  EXPECT_EQ("PrintableViaPrintToTemplate: 5",
-            Print(::foo::PrintableViaPrintToTemplate<int>(5)));
-}
-
-// Tests that the universal printer prints both the address and the
-// value of a reference.
-TEST(PrintReferenceTest, PrintsAddressAndValue) {
-  int n = 5;
-  EXPECT_EQ("@" + PrintPointer(&n) + " 5", PrintByRef(n));
-
-  int a[2][3] = {{0, 1, 2}, {3, 4, 5}};
-  EXPECT_EQ("@" + PrintPointer(a) + " { { 0, 1, 2 }, { 3, 4, 5 } }",
-            PrintByRef(a));
-
-  const ::foo::UnprintableInFoo x;
-  EXPECT_EQ("@" + PrintPointer(&x) +
-                " 16-byte object "
-                "<EF-12 00-00 34-AB 00-00 00-00 00-00 00-00 00-00>",
-            PrintByRef(x));
-}
-
-// Tests that the universal printer prints a function pointer passed by
-// reference.
-TEST(PrintReferenceTest, HandlesFunctionPointer) {
-  void (*fp)(int n) = &MyFunction;
-  const std::string fp_pointer_string =
-      PrintPointer(reinterpret_cast<const void*>(&fp));
-  // We cannot directly cast &MyFunction to const void* because the
-  // standard disallows casting between pointers to functions and
-  // pointers to objects, and some compilers (e.g. GCC 3.4) enforce
-  // this limitation.
-  const std::string fp_string = PrintPointer(reinterpret_cast<const void*>(
-      reinterpret_cast<internal::BiggestInt>(fp)));
-  EXPECT_EQ("@" + fp_pointer_string + " " + fp_string, PrintByRef(fp));
-}
-
-// Tests that the universal printer prints a member function pointer
-// passed by reference.
-TEST(PrintReferenceTest, HandlesMemberFunctionPointer) {
-  int (Foo::*p)(char ch) = &Foo::MyMethod;
-  EXPECT_TRUE(HasPrefix(PrintByRef(p),
-                        "@" + PrintPointer(reinterpret_cast<const void*>(&p)) +
-                            " " + Print(sizeof(p)) + "-byte object "));
-
-  char (Foo::*p2)(int n) = &Foo::MyVirtualMethod;
-  EXPECT_TRUE(HasPrefix(PrintByRef(p2),
-                        "@" + PrintPointer(reinterpret_cast<const void*>(&p2)) +
-                            " " + Print(sizeof(p2)) + "-byte object "));
-}
-
-// Tests that the universal printer prints a member variable pointer
-// passed by reference.
-TEST(PrintReferenceTest, HandlesMemberVariablePointer) {
-  int Foo::*p = &Foo::value;  // NOLINT
-  EXPECT_TRUE(HasPrefix(PrintByRef(p), "@" + PrintPointer(&p) + " " +
-                                           Print(sizeof(p)) + "-byte object "));
-}
-
-// Tests that FormatForComparisonFailureMessage(), which is used to print
-// an operand in a comparison assertion (e.g. ASSERT_EQ) when the assertion
-// fails, formats the operand in the desired way.
-
-// scalar
-TEST(FormatForComparisonFailureMessageTest, WorksForScalar) {
-  EXPECT_STREQ("123", FormatForComparisonFailureMessage(123, 124).c_str());
-}
-
-// non-char pointer
-TEST(FormatForComparisonFailureMessageTest, WorksForNonCharPointer) {
-  int n = 0;
-  EXPECT_EQ(PrintPointer(&n),
-            FormatForComparisonFailureMessage(&n, &n).c_str());
-}
-
-// non-char array
-TEST(FormatForComparisonFailureMessageTest, FormatsNonCharArrayAsPointer) {
-  // In expression 'array == x', 'array' is compared by pointer.
-  // Therefore we want to print an array operand as a pointer.
-  int n[] = {1, 2, 3};
-  EXPECT_EQ(PrintPointer(n), FormatForComparisonFailureMessage(n, n).c_str());
-}
-
-// Tests formatting a char pointer when it's compared with another pointer.
-// In this case we want to print it as a raw pointer, as the comparison is by
-// pointer.
-
-// char pointer vs pointer
-TEST(FormatForComparisonFailureMessageTest, WorksForCharPointerVsPointer) {
-  // In expression 'p == x', where 'p' and 'x' are (const or not) char
-  // pointers, the operands are compared by pointer.  Therefore we
-  // want to print 'p' as a pointer instead of a C string (we don't
-  // even know if it's supposed to point to a valid C string).
-
-  // const char*
-  const char* s = "hello";
-  EXPECT_EQ(PrintPointer(s), FormatForComparisonFailureMessage(s, s).c_str());
-
-  // char*
-  char ch = 'a';
-  EXPECT_EQ(PrintPointer(&ch),
-            FormatForComparisonFailureMessage(&ch, &ch).c_str());
-}
-
-// wchar_t pointer vs pointer
-TEST(FormatForComparisonFailureMessageTest, WorksForWCharPointerVsPointer) {
-  // In expression 'p == x', where 'p' and 'x' are (const or not) char
-  // pointers, the operands are compared by pointer.  Therefore we
-  // want to print 'p' as a pointer instead of a wide C string (we don't
-  // even know if it's supposed to point to a valid wide C string).
-
-  // const wchar_t*
-  const wchar_t* s = L"hello";
-  EXPECT_EQ(PrintPointer(s), FormatForComparisonFailureMessage(s, s).c_str());
-
-  // wchar_t*
-  wchar_t ch = L'a';
-  EXPECT_EQ(PrintPointer(&ch),
-            FormatForComparisonFailureMessage(&ch, &ch).c_str());
-}
-
-// Tests formatting a char pointer when it's compared to a string object.
-// In this case we want to print the char pointer as a C string.
-
-// char pointer vs std::string
-TEST(FormatForComparisonFailureMessageTest, WorksForCharPointerVsStdString) {
-  const char* s = "hello \"world";
-  EXPECT_STREQ("\"hello \\\"world\"",  // The string content should be escaped.
-               FormatForComparisonFailureMessage(s, ::std::string()).c_str());
-
-  // char*
-  char str[] = "hi\1";
-  char* p = str;
-  EXPECT_STREQ("\"hi\\x1\"",  // The string content should be escaped.
-               FormatForComparisonFailureMessage(p, ::std::string()).c_str());
-}
-
-#if GTEST_HAS_STD_WSTRING
-// wchar_t pointer vs std::wstring
-TEST(FormatForComparisonFailureMessageTest, WorksForWCharPointerVsStdWString) {
-  const wchar_t* s = L"hi \"world";
-  EXPECT_STREQ("L\"hi \\\"world\"",  // The string content should be escaped.
-               FormatForComparisonFailureMessage(s, ::std::wstring()).c_str());
-
-  // wchar_t*
-  wchar_t str[] = L"hi\1";
-  wchar_t* p = str;
-  EXPECT_STREQ("L\"hi\\x1\"",  // The string content should be escaped.
-               FormatForComparisonFailureMessage(p, ::std::wstring()).c_str());
-}
-#endif
-
-// Tests formatting a char array when it's compared with a pointer or array.
-// In this case we want to print the array as a row pointer, as the comparison
-// is by pointer.
-
-// char array vs pointer
-TEST(FormatForComparisonFailureMessageTest, WorksForCharArrayVsPointer) {
-  char str[] = "hi \"world\"";
-  char* p = nullptr;
-  EXPECT_EQ(PrintPointer(str),
-            FormatForComparisonFailureMessage(str, p).c_str());
-}
-
-// char array vs char array
-TEST(FormatForComparisonFailureMessageTest, WorksForCharArrayVsCharArray) {
-  const char str[] = "hi \"world\"";
-  EXPECT_EQ(PrintPointer(str),
-            FormatForComparisonFailureMessage(str, str).c_str());
-}
-
-// wchar_t array vs pointer
-TEST(FormatForComparisonFailureMessageTest, WorksForWCharArrayVsPointer) {
-  wchar_t str[] = L"hi \"world\"";
-  wchar_t* p = nullptr;
-  EXPECT_EQ(PrintPointer(str),
-            FormatForComparisonFailureMessage(str, p).c_str());
-}
-
-// wchar_t array vs wchar_t array
-TEST(FormatForComparisonFailureMessageTest, WorksForWCharArrayVsWCharArray) {
-  const wchar_t str[] = L"hi \"world\"";
-  EXPECT_EQ(PrintPointer(str),
-            FormatForComparisonFailureMessage(str, str).c_str());
-}
-
-// Tests formatting a char array when it's compared with a string object.
-// In this case we want to print the array as a C string.
-
-// char array vs std::string
-TEST(FormatForComparisonFailureMessageTest, WorksForCharArrayVsStdString) {
-  const char str[] = "hi \"world\"";
-  EXPECT_STREQ("\"hi \\\"world\\\"\"",  // The content should be escaped.
-               FormatForComparisonFailureMessage(str, ::std::string()).c_str());
-}
-
-#if GTEST_HAS_STD_WSTRING
-// wchar_t array vs std::wstring
-TEST(FormatForComparisonFailureMessageTest, WorksForWCharArrayVsStdWString) {
-  const wchar_t str[] = L"hi \"w\0rld\"";
-  EXPECT_STREQ(
-      "L\"hi \\\"w\"",  // The content should be escaped.
-                        // Embedded NUL terminates the string.
-      FormatForComparisonFailureMessage(str, ::std::wstring()).c_str());
-}
-#endif
-
-// Useful for testing PrintToString().  We cannot use EXPECT_EQ()
-// there as its implementation uses PrintToString().  The caller must
-// ensure that 'value' has no side effect.
-#define EXPECT_PRINT_TO_STRING_(value, expected_string)  \
-  EXPECT_TRUE(PrintToString(value) == (expected_string)) \
-      << " where " #value " prints as " << (PrintToString(value))
-
-TEST(PrintToStringTest, WorksForScalar) { EXPECT_PRINT_TO_STRING_(123, "123"); }
-
-TEST(PrintToStringTest, WorksForPointerToConstChar) {
-  const char* p = "hello";
-  EXPECT_PRINT_TO_STRING_(p, "\"hello\"");
-}
-
-TEST(PrintToStringTest, WorksForPointerToNonConstChar) {
-  char s[] = "hello";
-  char* p = s;
-  EXPECT_PRINT_TO_STRING_(p, "\"hello\"");
-}
-
-TEST(PrintToStringTest, EscapesForPointerToConstChar) {
-  const char* p = "hello\n";
-  EXPECT_PRINT_TO_STRING_(p, "\"hello\\n\"");
-}
-
-TEST(PrintToStringTest, EscapesForPointerToNonConstChar) {
-  char s[] = "hello\1";
-  char* p = s;
-  EXPECT_PRINT_TO_STRING_(p, "\"hello\\x1\"");
-}
-
-TEST(PrintToStringTest, WorksForArray) {
-  int n[3] = {1, 2, 3};
-  EXPECT_PRINT_TO_STRING_(n, "{ 1, 2, 3 }");
-}
-
-TEST(PrintToStringTest, WorksForCharArray) {
-  char s[] = "hello";
-  EXPECT_PRINT_TO_STRING_(s, "\"hello\"");
-}
-
-TEST(PrintToStringTest, WorksForCharArrayWithEmbeddedNul) {
-  const char str_with_nul[] = "hello\0 world";
-  EXPECT_PRINT_TO_STRING_(str_with_nul, "\"hello\\0 world\"");
-
-  char mutable_str_with_nul[] = "hello\0 world";
-  EXPECT_PRINT_TO_STRING_(mutable_str_with_nul, "\"hello\\0 world\"");
-}
-
-TEST(PrintToStringTest, ContainsNonLatin) {
-  // Test with valid UTF-8. Prints both in hex and as text.
-  std::string non_ascii_str = ::std::string("오전 4:30");
-  EXPECT_PRINT_TO_STRING_(non_ascii_str,
-                          "\"\\xEC\\x98\\xA4\\xEC\\xA0\\x84 4:30\"\n"
-                          "    As Text: \"오전 4:30\"");
-  non_ascii_str = ::std::string("From ä — ẑ");
-  EXPECT_PRINT_TO_STRING_(non_ascii_str,
-                          "\"From \\xC3\\xA4 \\xE2\\x80\\x94 \\xE1\\xBA\\x91\""
-                          "\n    As Text: \"From ä — ẑ\"");
-}
-
-TEST(PrintToStringTest, PrintStreamableInLocal) {
-  EXPECT_STREQ("StreamableInLocal",
-               PrintToString(foo::StreamableInLocal()).c_str());
-}
-
-TEST(PrintToStringTest, PrintReferenceToStreamableInLocal) {
-  foo::StreamableInLocal s;
-  std::reference_wrapper<foo::StreamableInLocal> r(s);
-  EXPECT_STREQ("StreamableInLocal", PrintToString(r).c_str());
-}
-
-TEST(PrintToStringTest, PrintReferenceToStreamableInGlobal) {
-  StreamableInGlobal s;
-  std::reference_wrapper<StreamableInGlobal> r(s);
-  EXPECT_STREQ("StreamableInGlobal", PrintToString(r).c_str());
-}
-
-TEST(IsValidUTF8Test, IllFormedUTF8) {
-  // The following test strings are ill-formed UTF-8 and are printed
-  // as hex only (or ASCII, in case of ASCII bytes) because IsValidUTF8() is
-  // expected to fail, thus output does not contain "As Text:".
-
-  static const char* const kTestdata[][2] = {
-      // 2-byte lead byte followed by a single-byte character.
-      {"\xC3\x74", "\"\\xC3t\""},
-      // Valid 2-byte character followed by an orphan trail byte.
-      {"\xC3\x84\xA4", "\"\\xC3\\x84\\xA4\""},
-      // Lead byte without trail byte.
-      {"abc\xC3", "\"abc\\xC3\""},
-      // 3-byte lead byte, single-byte character, orphan trail byte.
-      {"x\xE2\x70\x94", "\"x\\xE2p\\x94\""},
-      // Truncated 3-byte character.
-      {"\xE2\x80", "\"\\xE2\\x80\""},
-      // Truncated 3-byte character followed by valid 2-byte char.
-      {"\xE2\x80\xC3\x84", "\"\\xE2\\x80\\xC3\\x84\""},
-      // Truncated 3-byte character followed by a single-byte character.
-      {"\xE2\x80\x7A", "\"\\xE2\\x80z\""},
-      // 3-byte lead byte followed by valid 3-byte character.
-      {"\xE2\xE2\x80\x94", "\"\\xE2\\xE2\\x80\\x94\""},
-      // 4-byte lead byte followed by valid 3-byte character.
-      {"\xF0\xE2\x80\x94", "\"\\xF0\\xE2\\x80\\x94\""},
-      // Truncated 4-byte character.
-      {"\xF0\xE2\x80", "\"\\xF0\\xE2\\x80\""},
-      // Invalid UTF-8 byte sequences embedded in other chars.
-      {"abc\xE2\x80\x94\xC3\x74xyc", "\"abc\\xE2\\x80\\x94\\xC3txyc\""},
-      {"abc\xC3\x84\xE2\x80\xC3\x84xyz",
-       "\"abc\\xC3\\x84\\xE2\\x80\\xC3\\x84xyz\""},
-      // Non-shortest UTF-8 byte sequences are also ill-formed.
-      // The classics: xC0, xC1 lead byte.
-      {"\xC0\x80", "\"\\xC0\\x80\""},
-      {"\xC1\x81", "\"\\xC1\\x81\""},
-      // Non-shortest sequences.
-      {"\xE0\x80\x80", "\"\\xE0\\x80\\x80\""},
-      {"\xf0\x80\x80\x80", "\"\\xF0\\x80\\x80\\x80\""},
-      // Last valid code point before surrogate range, should be printed as
-      // text,
-      // too.
-      {"\xED\x9F\xBF", "\"\\xED\\x9F\\xBF\"\n    As Text: \"퟿\""},
-      // Start of surrogate lead. Surrogates are not printed as text.
-      {"\xED\xA0\x80", "\"\\xED\\xA0\\x80\""},
-      // Last non-private surrogate lead.
-      {"\xED\xAD\xBF", "\"\\xED\\xAD\\xBF\""},
-      // First private-use surrogate lead.
-      {"\xED\xAE\x80", "\"\\xED\\xAE\\x80\""},
-      // Last private-use surrogate lead.
-      {"\xED\xAF\xBF", "\"\\xED\\xAF\\xBF\""},
-      // Mid-point of surrogate trail.
-      {"\xED\xB3\xBF", "\"\\xED\\xB3\\xBF\""},
-      // First valid code point after surrogate range, should be printed as
-      // text,
-      // too.
-      {"\xEE\x80\x80", "\"\\xEE\\x80\\x80\"\n    As Text: \"\""}};
-
-  for (int i = 0; i < int(sizeof(kTestdata) / sizeof(kTestdata[0])); ++i) {
-    EXPECT_PRINT_TO_STRING_(kTestdata[i][0], kTestdata[i][1]);
-  }
-}
-
-#undef EXPECT_PRINT_TO_STRING_
-
-TEST(UniversalTersePrintTest, WorksForNonReference) {
-  ::std::stringstream ss;
-  UniversalTersePrint(123, &ss);
-  EXPECT_EQ("123", ss.str());
-}
-
-TEST(UniversalTersePrintTest, WorksForReference) {
-  const int& n = 123;
-  ::std::stringstream ss;
-  UniversalTersePrint(n, &ss);
-  EXPECT_EQ("123", ss.str());
-}
-
-TEST(UniversalTersePrintTest, WorksForCString) {
-  const char* s1 = "abc";
-  ::std::stringstream ss1;
-  UniversalTersePrint(s1, &ss1);
-  EXPECT_EQ("\"abc\"", ss1.str());
-
-  char* s2 = const_cast<char*>(s1);
-  ::std::stringstream ss2;
-  UniversalTersePrint(s2, &ss2);
-  EXPECT_EQ("\"abc\"", ss2.str());
-
-  const char* s3 = nullptr;
-  ::std::stringstream ss3;
-  UniversalTersePrint(s3, &ss3);
-  EXPECT_EQ("NULL", ss3.str());
-}
-
-TEST(UniversalPrintTest, WorksForNonReference) {
-  ::std::stringstream ss;
-  UniversalPrint(123, &ss);
-  EXPECT_EQ("123", ss.str());
-}
-
-TEST(UniversalPrintTest, WorksForReference) {
-  const int& n = 123;
-  ::std::stringstream ss;
-  UniversalPrint(n, &ss);
-  EXPECT_EQ("123", ss.str());
-}
-
-TEST(UniversalPrintTest, WorksForPairWithConst) {
-  std::pair<const Wrapper<std::string>, int> p(Wrapper<std::string>("abc"), 1);
-  ::std::stringstream ss;
-  UniversalPrint(p, &ss);
-  EXPECT_EQ("(Wrapper(\"abc\"), 1)", ss.str());
-}
-
-TEST(UniversalPrintTest, WorksForCString) {
-  const char* s1 = "abc";
-  ::std::stringstream ss1;
-  UniversalPrint(s1, &ss1);
-  EXPECT_EQ(PrintPointer(s1) + " pointing to \"abc\"", std::string(ss1.str()));
-
-  char* s2 = const_cast<char*>(s1);
-  ::std::stringstream ss2;
-  UniversalPrint(s2, &ss2);
-  EXPECT_EQ(PrintPointer(s2) + " pointing to \"abc\"", std::string(ss2.str()));
-
-  const char* s3 = nullptr;
-  ::std::stringstream ss3;
-  UniversalPrint(s3, &ss3);
-  EXPECT_EQ("NULL", ss3.str());
-}
-
-TEST(UniversalPrintTest, WorksForCharArray) {
-  const char str[] = "\"Line\0 1\"\nLine 2";
-  ::std::stringstream ss1;
-  UniversalPrint(str, &ss1);
-  EXPECT_EQ("\"\\\"Line\\0 1\\\"\\nLine 2\"", ss1.str());
-
-  const char mutable_str[] = "\"Line\0 1\"\nLine 2";
-  ::std::stringstream ss2;
-  UniversalPrint(mutable_str, &ss2);
-  EXPECT_EQ("\"\\\"Line\\0 1\\\"\\nLine 2\"", ss2.str());
-}
-
-TEST(UniversalPrintTest, IncompleteType) {
-  struct Incomplete;
-  char some_object = 0;
-  EXPECT_EQ("(incomplete type)",
-            PrintToString(reinterpret_cast<Incomplete&>(some_object)));
-}
-
-TEST(UniversalPrintTest, SmartPointers) {
-  EXPECT_EQ("(nullptr)", PrintToString(std::unique_ptr<int>()));
-  std::unique_ptr<int> p(new int(17));
-  EXPECT_EQ("(ptr = " + PrintPointer(p.get()) + ", value = 17)",
-            PrintToString(p));
-  std::unique_ptr<int[]> p2(new int[2]);
-  EXPECT_EQ("(" + PrintPointer(p2.get()) + ")", PrintToString(p2));
-
-  EXPECT_EQ("(nullptr)", PrintToString(std::shared_ptr<int>()));
-  std::shared_ptr<int> p3(new int(1979));
-  EXPECT_EQ("(ptr = " + PrintPointer(p3.get()) + ", value = 1979)",
-            PrintToString(p3));
-#if __cpp_lib_shared_ptr_arrays >= 201611L
-  std::shared_ptr<int[]> p4(new int[2]);
-  EXPECT_EQ("(" + PrintPointer(p4.get()) + ")", PrintToString(p4));
-#endif
-
-  // modifiers
-  EXPECT_EQ("(nullptr)", PrintToString(std::unique_ptr<int>()));
-  EXPECT_EQ("(nullptr)", PrintToString(std::unique_ptr<const int>()));
-  EXPECT_EQ("(nullptr)", PrintToString(std::unique_ptr<volatile int>()));
-  EXPECT_EQ("(nullptr)", PrintToString(std::unique_ptr<volatile const int>()));
-  EXPECT_EQ("(nullptr)", PrintToString(std::unique_ptr<int[]>()));
-  EXPECT_EQ("(nullptr)", PrintToString(std::unique_ptr<const int[]>()));
-  EXPECT_EQ("(nullptr)", PrintToString(std::unique_ptr<volatile int[]>()));
-  EXPECT_EQ("(nullptr)",
-            PrintToString(std::unique_ptr<volatile const int[]>()));
-  EXPECT_EQ("(nullptr)", PrintToString(std::shared_ptr<int>()));
-  EXPECT_EQ("(nullptr)", PrintToString(std::shared_ptr<const int>()));
-  EXPECT_EQ("(nullptr)", PrintToString(std::shared_ptr<volatile int>()));
-  EXPECT_EQ("(nullptr)", PrintToString(std::shared_ptr<volatile const int>()));
-#if __cpp_lib_shared_ptr_arrays >= 201611L
-  EXPECT_EQ("(nullptr)", PrintToString(std::shared_ptr<int[]>()));
-  EXPECT_EQ("(nullptr)", PrintToString(std::shared_ptr<const int[]>()));
-  EXPECT_EQ("(nullptr)", PrintToString(std::shared_ptr<volatile int[]>()));
-  EXPECT_EQ("(nullptr)",
-            PrintToString(std::shared_ptr<volatile const int[]>()));
-#endif
-
-  // void
-  EXPECT_EQ("(nullptr)", PrintToString(std::unique_ptr<void, void (*)(void*)>(
-                             nullptr, nullptr)));
-  EXPECT_EQ("(" + PrintPointer(p.get()) + ")",
-            PrintToString(
-                std::unique_ptr<void, void (*)(void*)>(p.get(), [](void*) {})));
-  EXPECT_EQ("(nullptr)", PrintToString(std::shared_ptr<void>()));
-  EXPECT_EQ("(" + PrintPointer(p.get()) + ")",
-            PrintToString(std::shared_ptr<void>(p.get(), [](void*) {})));
-}
-
-TEST(UniversalTersePrintTupleFieldsToStringsTestWithStd, PrintsEmptyTuple) {
-  Strings result = UniversalTersePrintTupleFieldsToStrings(::std::make_tuple());
-  EXPECT_EQ(0u, result.size());
-}
-
-TEST(UniversalTersePrintTupleFieldsToStringsTestWithStd, PrintsOneTuple) {
-  Strings result =
-      UniversalTersePrintTupleFieldsToStrings(::std::make_tuple(1));
-  ASSERT_EQ(1u, result.size());
-  EXPECT_EQ("1", result[0]);
-}
-
-TEST(UniversalTersePrintTupleFieldsToStringsTestWithStd, PrintsTwoTuple) {
-  Strings result =
-      UniversalTersePrintTupleFieldsToStrings(::std::make_tuple(1, 'a'));
-  ASSERT_EQ(2u, result.size());
-  EXPECT_EQ("1", result[0]);
-  EXPECT_EQ("'a' (97, 0x61)", result[1]);
-}
-
-TEST(UniversalTersePrintTupleFieldsToStringsTestWithStd, PrintsTersely) {
-  const int n = 1;
-  Strings result = UniversalTersePrintTupleFieldsToStrings(
-      ::std::tuple<const int&, const char*>(n, "a"));
-  ASSERT_EQ(2u, result.size());
-  EXPECT_EQ("1", result[0]);
-  EXPECT_EQ("\"a\"", result[1]);
-}
-
-#if GTEST_INTERNAL_HAS_ANY
-class PrintAnyTest : public ::testing::Test {
- protected:
-  template <typename T>
-  static std::string ExpectedTypeName() {
-#if GTEST_HAS_RTTI
-    return internal::GetTypeName<T>();
-#else
-    return "<unknown_type>";
-#endif  // GTEST_HAS_RTTI
-  }
-};
-
-TEST_F(PrintAnyTest, Empty) {
-  internal::Any any;
-  EXPECT_EQ("no value", PrintToString(any));
-}
-
-TEST_F(PrintAnyTest, NonEmpty) {
-  internal::Any any;
-  constexpr int val1 = 10;
-  const std::string val2 = "content";
-
-  any = val1;
-  EXPECT_EQ("value of type " + ExpectedTypeName<int>(), PrintToString(any));
-
-  any = val2;
-  EXPECT_EQ("value of type " + ExpectedTypeName<std::string>(),
-            PrintToString(any));
-}
-#endif  // GTEST_INTERNAL_HAS_ANY
-
-#if GTEST_INTERNAL_HAS_OPTIONAL
-TEST(PrintOptionalTest, Basic) {
-  EXPECT_EQ("(nullopt)", PrintToString(internal::Nullopt()));
-  internal::Optional<int> value;
-  EXPECT_EQ("(nullopt)", PrintToString(value));
-  value = {7};
-  EXPECT_EQ("(7)", PrintToString(value));
-  EXPECT_EQ("(1.1)", PrintToString(internal::Optional<double>{1.1}));
-  EXPECT_EQ("(\"A\")", PrintToString(internal::Optional<std::string>{"A"}));
-}
-#endif  // GTEST_INTERNAL_HAS_OPTIONAL
-
-#if GTEST_INTERNAL_HAS_VARIANT
-struct NonPrintable {
-  unsigned char contents = 17;
-};
-
-TEST(PrintOneofTest, Basic) {
-  using Type = internal::Variant<int, StreamableInGlobal, NonPrintable>;
-  EXPECT_EQ("('int(index = 0)' with value 7)", PrintToString(Type(7)));
-  EXPECT_EQ("('StreamableInGlobal(index = 1)' with value StreamableInGlobal)",
-            PrintToString(Type(StreamableInGlobal{})));
-  EXPECT_EQ(
-      "('testing::gtest_printers_test::NonPrintable(index = 2)' with value "
-      "1-byte object <11>)",
-      PrintToString(Type(NonPrintable{})));
-}
-#endif  // GTEST_INTERNAL_HAS_VARIANT
-namespace {
-class string_ref;
-
-/**
- * This is a synthetic pointer to a fixed size string.
- */
-class string_ptr {
- public:
-  string_ptr(const char* data, size_t size) : data_(data), size_(size) {}
-
-  string_ptr& operator++() noexcept {
-    data_ += size_;
-    return *this;
-  }
-
-  string_ref operator*() const noexcept;
-
- private:
-  const char* data_;
-  size_t size_;
-};
-
-/**
- * This is a synthetic reference of a fixed size string.
- */
-class string_ref {
- public:
-  string_ref(const char* data, size_t size) : data_(data), size_(size) {}
-
-  string_ptr operator&() const noexcept { return {data_, size_}; }  // NOLINT
-
-  bool operator==(const char* s) const noexcept {
-    if (size_ > 0 && data_[size_ - 1] != 0) {
-      return std::string(data_, size_) == std::string(s);
-    } else {
-      return std::string(data_) == std::string(s);
-    }
-  }
-
- private:
-  const char* data_;
-  size_t size_;
-};
-
-string_ref string_ptr::operator*() const noexcept { return {data_, size_}; }
-
-TEST(string_ref, compare) {
-  const char* s = "alex\0davidjohn\0";
-  string_ptr ptr(s, 5);
-  EXPECT_EQ(*ptr, "alex");
-  EXPECT_TRUE(*ptr == "alex");
-  ++ptr;
-  EXPECT_EQ(*ptr, "david");
-  EXPECT_TRUE(*ptr == "david");
-  ++ptr;
-  EXPECT_EQ(*ptr, "john");
-}
-
-}  // namespace
-
-}  // namespace gtest_printers_test
-}  // namespace testing
diff --git a/3rdparty/googletest-1.13.0/googletest/test/googletest-setuptestsuite-test.py b/3rdparty/googletest-1.13.0/googletest/test/googletest-setuptestsuite-test.py
deleted file mode 100644
index 9d1fd0295cd101c9de195b067a4aa876a9e4b123..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/test/googletest-setuptestsuite-test.py
+++ /dev/null
@@ -1,54 +0,0 @@
-#!/usr/bin/env python
-#
-# Copyright 2019, Google Inc.
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met:
-#
-#     * Redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer.
-#     * Redistributions in binary form must reproduce the above
-# copyright notice, this list of conditions and the following disclaimer
-# in the documentation and/or other materials provided with the
-# distribution.
-#     * Neither the name of Google Inc. nor the names of its
-# contributors may be used to endorse or promote products derived from
-# this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-"""Verifies that SetUpTestSuite and TearDownTestSuite errors are noticed."""
-
-from googletest.test import gtest_test_utils
-
-COMMAND = gtest_test_utils.GetTestExecutablePath(
-    'googletest-setuptestsuite-test_')
-
-
-class GTestSetUpTestSuiteTest(gtest_test_utils.TestCase):
-
-  def testSetupErrorAndTearDownError(self):
-    p = gtest_test_utils.Subprocess(COMMAND)
-    self.assertNotEqual(p.exit_code, 0, msg=p.output)
-
-    self.assertIn(
-        '[  FAILED  ] SetupFailTest: SetUpTestSuite or TearDownTestSuite\n'
-        '[  FAILED  ] TearDownFailTest: SetUpTestSuite or TearDownTestSuite\n'
-        '\n'
-        ' 2 FAILED TEST SUITES\n',
-        p.output)
-
-if __name__ == '__main__':
-  gtest_test_utils.Main()
diff --git a/3rdparty/googletest-1.13.0/googletest/test/googletest-setuptestsuite-test_.cc b/3rdparty/googletest-1.13.0/googletest/test/googletest-setuptestsuite-test_.cc
deleted file mode 100644
index d20899f568661417e1f5d9389a27ec9e0563f9d9..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/test/googletest-setuptestsuite-test_.cc
+++ /dev/null
@@ -1,44 +0,0 @@
-// Copyright 2008, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "gtest/gtest.h"
-
-class SetupFailTest : public ::testing::Test {
- protected:
-  static void SetUpTestSuite() { ASSERT_EQ("", "SET_UP_FAIL"); }
-};
-
-TEST_F(SetupFailTest, NoopPassingTest) {}
-
-class TearDownFailTest : public ::testing::Test {
- protected:
-  static void TearDownTestSuite() { ASSERT_EQ("", "TEAR_DOWN_FAIL"); }
-};
-
-TEST_F(TearDownFailTest, NoopPassingTest) {}
diff --git a/3rdparty/googletest-1.13.0/googletest/test/googletest-shuffle-test.py b/3rdparty/googletest-1.13.0/googletest/test/googletest-shuffle-test.py
deleted file mode 100644
index 9d2adc1286b278533faae0e6616db8accd1f2667..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/test/googletest-shuffle-test.py
+++ /dev/null
@@ -1,323 +0,0 @@
-#!/usr/bin/env python
-#
-# Copyright 2009 Google Inc. All Rights Reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met:
-#
-#     * Redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer.
-#     * Redistributions in binary form must reproduce the above
-# copyright notice, this list of conditions and the following disclaimer
-# in the documentation and/or other materials provided with the
-# distribution.
-#     * Neither the name of Google Inc. nor the names of its
-# contributors may be used to endorse or promote products derived from
-# this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-"""Verifies that test shuffling works."""
-
-import os
-from googletest.test import gtest_test_utils
-
-# Command to run the googletest-shuffle-test_ program.
-COMMAND = gtest_test_utils.GetTestExecutablePath('googletest-shuffle-test_')
-
-# The environment variables for test sharding.
-TOTAL_SHARDS_ENV_VAR = 'GTEST_TOTAL_SHARDS'
-SHARD_INDEX_ENV_VAR = 'GTEST_SHARD_INDEX'
-
-TEST_FILTER = 'A*.A:A*.B:C*'
-
-ALL_TESTS = []
-ACTIVE_TESTS = []
-FILTERED_TESTS = []
-SHARDED_TESTS = []
-
-SHUFFLED_ALL_TESTS = []
-SHUFFLED_ACTIVE_TESTS = []
-SHUFFLED_FILTERED_TESTS = []
-SHUFFLED_SHARDED_TESTS = []
-
-
-def AlsoRunDisabledTestsFlag():
-  return '--gtest_also_run_disabled_tests'
-
-
-def FilterFlag(test_filter):
-  return '--gtest_filter=%s' % (test_filter,)
-
-
-def RepeatFlag(n):
-  return '--gtest_repeat=%s' % (n,)
-
-
-def ShuffleFlag():
-  return '--gtest_shuffle'
-
-
-def RandomSeedFlag(n):
-  return '--gtest_random_seed=%s' % (n,)
-
-
-def RunAndReturnOutput(extra_env, args):
-  """Runs the test program and returns its output."""
-
-  environ_copy = os.environ.copy()
-  environ_copy.update(extra_env)
-
-  return gtest_test_utils.Subprocess([COMMAND] + args, env=environ_copy).output
-
-
-def GetTestsForAllIterations(extra_env, args):
-  """Runs the test program and returns a list of test lists.
-
-  Args:
-    extra_env: a map from environment variables to their values
-    args: command line flags to pass to googletest-shuffle-test_
-
-  Returns:
-    A list where the i-th element is the list of tests run in the i-th
-    test iteration.
-  """
-
-  test_iterations = []
-  for line in RunAndReturnOutput(extra_env, args).split('\n'):
-    if line.startswith('----'):
-      tests = []
-      test_iterations.append(tests)
-    elif line.strip():
-      tests.append(line.strip())  # 'TestCaseName.TestName'
-
-  return test_iterations
-
-
-def GetTestCases(tests):
-  """Returns a list of test cases in the given full test names.
-
-  Args:
-    tests: a list of full test names
-
-  Returns:
-    A list of test cases from 'tests', in their original order.
-    Consecutive duplicates are removed.
-  """
-
-  test_cases = []
-  for test in tests:
-    test_case = test.split('.')[0]
-    if not test_case in test_cases:
-      test_cases.append(test_case)
-
-  return test_cases
-
-
-def CalculateTestLists():
-  """Calculates the list of tests run under different flags."""
-
-  if not ALL_TESTS:
-    ALL_TESTS.extend(
-        GetTestsForAllIterations({}, [AlsoRunDisabledTestsFlag()])[0])
-
-  if not ACTIVE_TESTS:
-    ACTIVE_TESTS.extend(GetTestsForAllIterations({}, [])[0])
-
-  if not FILTERED_TESTS:
-    FILTERED_TESTS.extend(
-        GetTestsForAllIterations({}, [FilterFlag(TEST_FILTER)])[0])
-
-  if not SHARDED_TESTS:
-    SHARDED_TESTS.extend(
-        GetTestsForAllIterations({TOTAL_SHARDS_ENV_VAR: '3',
-                                  SHARD_INDEX_ENV_VAR: '1'},
-                                 [])[0])
-
-  if not SHUFFLED_ALL_TESTS:
-    SHUFFLED_ALL_TESTS.extend(GetTestsForAllIterations(
-        {}, [AlsoRunDisabledTestsFlag(), ShuffleFlag(), RandomSeedFlag(1)])[0])
-
-  if not SHUFFLED_ACTIVE_TESTS:
-    SHUFFLED_ACTIVE_TESTS.extend(GetTestsForAllIterations(
-        {}, [ShuffleFlag(), RandomSeedFlag(1)])[0])
-
-  if not SHUFFLED_FILTERED_TESTS:
-    SHUFFLED_FILTERED_TESTS.extend(GetTestsForAllIterations(
-        {}, [ShuffleFlag(), RandomSeedFlag(1), FilterFlag(TEST_FILTER)])[0])
-
-  if not SHUFFLED_SHARDED_TESTS:
-    SHUFFLED_SHARDED_TESTS.extend(
-        GetTestsForAllIterations({TOTAL_SHARDS_ENV_VAR: '3',
-                                  SHARD_INDEX_ENV_VAR: '1'},
-                                 [ShuffleFlag(), RandomSeedFlag(1)])[0])
-
-
-class GTestShuffleUnitTest(gtest_test_utils.TestCase):
-  """Tests test shuffling."""
-
-  def setUp(self):
-    CalculateTestLists()
-
-  def testShufflePreservesNumberOfTests(self):
-    self.assertEqual(len(ALL_TESTS), len(SHUFFLED_ALL_TESTS))
-    self.assertEqual(len(ACTIVE_TESTS), len(SHUFFLED_ACTIVE_TESTS))
-    self.assertEqual(len(FILTERED_TESTS), len(SHUFFLED_FILTERED_TESTS))
-    self.assertEqual(len(SHARDED_TESTS), len(SHUFFLED_SHARDED_TESTS))
-
-  def testShuffleChangesTestOrder(self):
-    self.assert_(SHUFFLED_ALL_TESTS != ALL_TESTS, SHUFFLED_ALL_TESTS)
-    self.assert_(SHUFFLED_ACTIVE_TESTS != ACTIVE_TESTS, SHUFFLED_ACTIVE_TESTS)
-    self.assert_(SHUFFLED_FILTERED_TESTS != FILTERED_TESTS,
-                 SHUFFLED_FILTERED_TESTS)
-    self.assert_(SHUFFLED_SHARDED_TESTS != SHARDED_TESTS,
-                 SHUFFLED_SHARDED_TESTS)
-
-  def testShuffleChangesTestCaseOrder(self):
-    self.assert_(GetTestCases(SHUFFLED_ALL_TESTS) != GetTestCases(ALL_TESTS),
-                 GetTestCases(SHUFFLED_ALL_TESTS))
-    self.assert_(
-        GetTestCases(SHUFFLED_ACTIVE_TESTS) != GetTestCases(ACTIVE_TESTS),
-        GetTestCases(SHUFFLED_ACTIVE_TESTS))
-    self.assert_(
-        GetTestCases(SHUFFLED_FILTERED_TESTS) != GetTestCases(FILTERED_TESTS),
-        GetTestCases(SHUFFLED_FILTERED_TESTS))
-    self.assert_(
-        GetTestCases(SHUFFLED_SHARDED_TESTS) != GetTestCases(SHARDED_TESTS),
-        GetTestCases(SHUFFLED_SHARDED_TESTS))
-
-  def testShuffleDoesNotRepeatTest(self):
-    for test in SHUFFLED_ALL_TESTS:
-      self.assertEqual(1, SHUFFLED_ALL_TESTS.count(test),
-                       '%s appears more than once' % (test,))
-    for test in SHUFFLED_ACTIVE_TESTS:
-      self.assertEqual(1, SHUFFLED_ACTIVE_TESTS.count(test),
-                       '%s appears more than once' % (test,))
-    for test in SHUFFLED_FILTERED_TESTS:
-      self.assertEqual(1, SHUFFLED_FILTERED_TESTS.count(test),
-                       '%s appears more than once' % (test,))
-    for test in SHUFFLED_SHARDED_TESTS:
-      self.assertEqual(1, SHUFFLED_SHARDED_TESTS.count(test),
-                       '%s appears more than once' % (test,))
-
-  def testShuffleDoesNotCreateNewTest(self):
-    for test in SHUFFLED_ALL_TESTS:
-      self.assert_(test in ALL_TESTS, '%s is an invalid test' % (test,))
-    for test in SHUFFLED_ACTIVE_TESTS:
-      self.assert_(test in ACTIVE_TESTS, '%s is an invalid test' % (test,))
-    for test in SHUFFLED_FILTERED_TESTS:
-      self.assert_(test in FILTERED_TESTS, '%s is an invalid test' % (test,))
-    for test in SHUFFLED_SHARDED_TESTS:
-      self.assert_(test in SHARDED_TESTS, '%s is an invalid test' % (test,))
-
-  def testShuffleIncludesAllTests(self):
-    for test in ALL_TESTS:
-      self.assert_(test in SHUFFLED_ALL_TESTS, '%s is missing' % (test,))
-    for test in ACTIVE_TESTS:
-      self.assert_(test in SHUFFLED_ACTIVE_TESTS, '%s is missing' % (test,))
-    for test in FILTERED_TESTS:
-      self.assert_(test in SHUFFLED_FILTERED_TESTS, '%s is missing' % (test,))
-    for test in SHARDED_TESTS:
-      self.assert_(test in SHUFFLED_SHARDED_TESTS, '%s is missing' % (test,))
-
-  def testShuffleLeavesDeathTestsAtFront(self):
-    non_death_test_found = False
-    for test in SHUFFLED_ACTIVE_TESTS:
-      if 'DeathTest.' in test:
-        self.assert_(not non_death_test_found,
-                     '%s appears after a non-death test' % (test,))
-      else:
-        non_death_test_found = True
-
-  def _VerifyTestCasesDoNotInterleave(self, tests):
-    test_cases = []
-    for test in tests:
-      [test_case, _] = test.split('.')
-      if test_cases and test_cases[-1] != test_case:
-        test_cases.append(test_case)
-        self.assertEqual(1, test_cases.count(test_case),
-                         'Test case %s is not grouped together in %s' %
-                         (test_case, tests))
-
-  def testShuffleDoesNotInterleaveTestCases(self):
-    self._VerifyTestCasesDoNotInterleave(SHUFFLED_ALL_TESTS)
-    self._VerifyTestCasesDoNotInterleave(SHUFFLED_ACTIVE_TESTS)
-    self._VerifyTestCasesDoNotInterleave(SHUFFLED_FILTERED_TESTS)
-    self._VerifyTestCasesDoNotInterleave(SHUFFLED_SHARDED_TESTS)
-
-  def testShuffleRestoresOrderAfterEachIteration(self):
-    # Get the test lists in all 3 iterations, using random seed 1, 2,
-    # and 3 respectively.  Google Test picks a different seed in each
-    # iteration, and this test depends on the current implementation
-    # picking successive numbers.  This dependency is not ideal, but
-    # makes the test much easier to write.
-    [tests_in_iteration1, tests_in_iteration2, tests_in_iteration3] = (
-        GetTestsForAllIterations(
-            {}, [ShuffleFlag(), RandomSeedFlag(1), RepeatFlag(3)]))
-
-    # Make sure running the tests with random seed 1 gets the same
-    # order as in iteration 1 above.
-    [tests_with_seed1] = GetTestsForAllIterations(
-        {}, [ShuffleFlag(), RandomSeedFlag(1)])
-    self.assertEqual(tests_in_iteration1, tests_with_seed1)
-
-    # Make sure running the tests with random seed 2 gets the same
-    # order as in iteration 2 above.  Success means that Google Test
-    # correctly restores the test order before re-shuffling at the
-    # beginning of iteration 2.
-    [tests_with_seed2] = GetTestsForAllIterations(
-        {}, [ShuffleFlag(), RandomSeedFlag(2)])
-    self.assertEqual(tests_in_iteration2, tests_with_seed2)
-
-    # Make sure running the tests with random seed 3 gets the same
-    # order as in iteration 3 above.  Success means that Google Test
-    # correctly restores the test order before re-shuffling at the
-    # beginning of iteration 3.
-    [tests_with_seed3] = GetTestsForAllIterations(
-        {}, [ShuffleFlag(), RandomSeedFlag(3)])
-    self.assertEqual(tests_in_iteration3, tests_with_seed3)
-
-  def testShuffleGeneratesNewOrderInEachIteration(self):
-    [tests_in_iteration1, tests_in_iteration2, tests_in_iteration3] = (
-        GetTestsForAllIterations(
-            {}, [ShuffleFlag(), RandomSeedFlag(1), RepeatFlag(3)]))
-
-    self.assert_(tests_in_iteration1 != tests_in_iteration2,
-                 tests_in_iteration1)
-    self.assert_(tests_in_iteration1 != tests_in_iteration3,
-                 tests_in_iteration1)
-    self.assert_(tests_in_iteration2 != tests_in_iteration3,
-                 tests_in_iteration2)
-
-  def testShuffleShardedTestsPreservesPartition(self):
-    # If we run M tests on N shards, the same M tests should be run in
-    # total, regardless of the random seeds used by the shards.
-    [tests1] = GetTestsForAllIterations({TOTAL_SHARDS_ENV_VAR: '3',
-                                         SHARD_INDEX_ENV_VAR: '0'},
-                                        [ShuffleFlag(), RandomSeedFlag(1)])
-    [tests2] = GetTestsForAllIterations({TOTAL_SHARDS_ENV_VAR: '3',
-                                         SHARD_INDEX_ENV_VAR: '1'},
-                                        [ShuffleFlag(), RandomSeedFlag(20)])
-    [tests3] = GetTestsForAllIterations({TOTAL_SHARDS_ENV_VAR: '3',
-                                         SHARD_INDEX_ENV_VAR: '2'},
-                                        [ShuffleFlag(), RandomSeedFlag(25)])
-    sorted_sharded_tests = tests1 + tests2 + tests3
-    sorted_sharded_tests.sort()
-    sorted_active_tests = []
-    sorted_active_tests.extend(ACTIVE_TESTS)
-    sorted_active_tests.sort()
-    self.assertEqual(sorted_active_tests, sorted_sharded_tests)
-
-if __name__ == '__main__':
-  gtest_test_utils.Main()
diff --git a/3rdparty/googletest-1.13.0/googletest/test/googletest-shuffle-test_.cc b/3rdparty/googletest-1.13.0/googletest/test/googletest-shuffle-test_.cc
deleted file mode 100644
index a14e22f98cf8c8093775ea6b6761f02675e2c0a6..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/test/googletest-shuffle-test_.cc
+++ /dev/null
@@ -1,100 +0,0 @@
-// Copyright 2009, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// Verifies that test shuffling works.
-
-#include "gtest/gtest.h"
-
-namespace {
-
-using ::testing::EmptyTestEventListener;
-using ::testing::InitGoogleTest;
-using ::testing::Message;
-using ::testing::Test;
-using ::testing::TestEventListeners;
-using ::testing::TestInfo;
-using ::testing::UnitTest;
-
-// The test methods are empty, as the sole purpose of this program is
-// to print the test names before/after shuffling.
-
-class A : public Test {};
-TEST_F(A, A) {}
-TEST_F(A, B) {}
-
-TEST(ADeathTest, A) {}
-TEST(ADeathTest, B) {}
-TEST(ADeathTest, C) {}
-
-TEST(B, A) {}
-TEST(B, B) {}
-TEST(B, C) {}
-TEST(B, DISABLED_D) {}
-TEST(B, DISABLED_E) {}
-
-TEST(BDeathTest, A) {}
-TEST(BDeathTest, B) {}
-
-TEST(C, A) {}
-TEST(C, B) {}
-TEST(C, C) {}
-TEST(C, DISABLED_D) {}
-
-TEST(CDeathTest, A) {}
-
-TEST(DISABLED_D, A) {}
-TEST(DISABLED_D, DISABLED_B) {}
-
-// This printer prints the full test names only, starting each test
-// iteration with a "----" marker.
-class TestNamePrinter : public EmptyTestEventListener {
- public:
-  void OnTestIterationStart(const UnitTest& /* unit_test */,
-                            int /* iteration */) override {
-    printf("----\n");
-  }
-
-  void OnTestStart(const TestInfo& test_info) override {
-    printf("%s.%s\n", test_info.test_suite_name(), test_info.name());
-  }
-};
-
-}  // namespace
-
-int main(int argc, char** argv) {
-  InitGoogleTest(&argc, argv);
-
-  // Replaces the default printer with TestNamePrinter, which prints
-  // the test name only.
-  TestEventListeners& listeners = UnitTest::GetInstance()->listeners();
-  delete listeners.Release(listeners.default_result_printer());
-  listeners.Append(new TestNamePrinter);
-
-  return RUN_ALL_TESTS();
-}
diff --git a/3rdparty/googletest-1.13.0/googletest/test/googletest-test-part-test.cc b/3rdparty/googletest-1.13.0/googletest/test/googletest-test-part-test.cc
deleted file mode 100644
index 076e5be2fa44c16e0c66e64654efa2b19612c68c..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/test/googletest-test-part-test.cc
+++ /dev/null
@@ -1,220 +0,0 @@
-// Copyright 2008 Google Inc.
-// All Rights Reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "gtest/gtest-test-part.h"
-#include "gtest/gtest.h"
-
-using testing::Message;
-using testing::Test;
-using testing::TestPartResult;
-using testing::TestPartResultArray;
-
-namespace {
-
-// Tests the TestPartResult class.
-
-// The test fixture for testing TestPartResult.
-class TestPartResultTest : public Test {
- protected:
-  TestPartResultTest()
-      : r1_(TestPartResult::kSuccess, "foo/bar.cc", 10, "Success!"),
-        r2_(TestPartResult::kNonFatalFailure, "foo/bar.cc", -1, "Failure!"),
-        r3_(TestPartResult::kFatalFailure, nullptr, -1, "Failure!"),
-        r4_(TestPartResult::kSkip, "foo/bar.cc", 2, "Skipped!") {}
-
-  TestPartResult r1_, r2_, r3_, r4_;
-};
-
-TEST_F(TestPartResultTest, ConstructorWorks) {
-  Message message;
-  message << "something is terribly wrong";
-  message << static_cast<const char*>(testing::internal::kStackTraceMarker);
-  message << "some unimportant stack trace";
-
-  const TestPartResult result(TestPartResult::kNonFatalFailure, "some_file.cc",
-                              42, message.GetString().c_str());
-
-  EXPECT_EQ(TestPartResult::kNonFatalFailure, result.type());
-  EXPECT_STREQ("some_file.cc", result.file_name());
-  EXPECT_EQ(42, result.line_number());
-  EXPECT_STREQ(message.GetString().c_str(), result.message());
-  EXPECT_STREQ("something is terribly wrong", result.summary());
-}
-
-TEST_F(TestPartResultTest, ResultAccessorsWork) {
-  const TestPartResult success(TestPartResult::kSuccess, "file.cc", 42,
-                               "message");
-  EXPECT_TRUE(success.passed());
-  EXPECT_FALSE(success.failed());
-  EXPECT_FALSE(success.nonfatally_failed());
-  EXPECT_FALSE(success.fatally_failed());
-  EXPECT_FALSE(success.skipped());
-
-  const TestPartResult nonfatal_failure(TestPartResult::kNonFatalFailure,
-                                        "file.cc", 42, "message");
-  EXPECT_FALSE(nonfatal_failure.passed());
-  EXPECT_TRUE(nonfatal_failure.failed());
-  EXPECT_TRUE(nonfatal_failure.nonfatally_failed());
-  EXPECT_FALSE(nonfatal_failure.fatally_failed());
-  EXPECT_FALSE(nonfatal_failure.skipped());
-
-  const TestPartResult fatal_failure(TestPartResult::kFatalFailure, "file.cc",
-                                     42, "message");
-  EXPECT_FALSE(fatal_failure.passed());
-  EXPECT_TRUE(fatal_failure.failed());
-  EXPECT_FALSE(fatal_failure.nonfatally_failed());
-  EXPECT_TRUE(fatal_failure.fatally_failed());
-  EXPECT_FALSE(fatal_failure.skipped());
-
-  const TestPartResult skip(TestPartResult::kSkip, "file.cc", 42, "message");
-  EXPECT_FALSE(skip.passed());
-  EXPECT_FALSE(skip.failed());
-  EXPECT_FALSE(skip.nonfatally_failed());
-  EXPECT_FALSE(skip.fatally_failed());
-  EXPECT_TRUE(skip.skipped());
-}
-
-// Tests TestPartResult::type().
-TEST_F(TestPartResultTest, type) {
-  EXPECT_EQ(TestPartResult::kSuccess, r1_.type());
-  EXPECT_EQ(TestPartResult::kNonFatalFailure, r2_.type());
-  EXPECT_EQ(TestPartResult::kFatalFailure, r3_.type());
-  EXPECT_EQ(TestPartResult::kSkip, r4_.type());
-}
-
-// Tests TestPartResult::file_name().
-TEST_F(TestPartResultTest, file_name) {
-  EXPECT_STREQ("foo/bar.cc", r1_.file_name());
-  EXPECT_STREQ(nullptr, r3_.file_name());
-  EXPECT_STREQ("foo/bar.cc", r4_.file_name());
-}
-
-// Tests TestPartResult::line_number().
-TEST_F(TestPartResultTest, line_number) {
-  EXPECT_EQ(10, r1_.line_number());
-  EXPECT_EQ(-1, r2_.line_number());
-  EXPECT_EQ(2, r4_.line_number());
-}
-
-// Tests TestPartResult::message().
-TEST_F(TestPartResultTest, message) {
-  EXPECT_STREQ("Success!", r1_.message());
-  EXPECT_STREQ("Skipped!", r4_.message());
-}
-
-// Tests TestPartResult::passed().
-TEST_F(TestPartResultTest, Passed) {
-  EXPECT_TRUE(r1_.passed());
-  EXPECT_FALSE(r2_.passed());
-  EXPECT_FALSE(r3_.passed());
-  EXPECT_FALSE(r4_.passed());
-}
-
-// Tests TestPartResult::failed().
-TEST_F(TestPartResultTest, Failed) {
-  EXPECT_FALSE(r1_.failed());
-  EXPECT_TRUE(r2_.failed());
-  EXPECT_TRUE(r3_.failed());
-  EXPECT_FALSE(r4_.failed());
-}
-
-// Tests TestPartResult::failed().
-TEST_F(TestPartResultTest, Skipped) {
-  EXPECT_FALSE(r1_.skipped());
-  EXPECT_FALSE(r2_.skipped());
-  EXPECT_FALSE(r3_.skipped());
-  EXPECT_TRUE(r4_.skipped());
-}
-
-// Tests TestPartResult::fatally_failed().
-TEST_F(TestPartResultTest, FatallyFailed) {
-  EXPECT_FALSE(r1_.fatally_failed());
-  EXPECT_FALSE(r2_.fatally_failed());
-  EXPECT_TRUE(r3_.fatally_failed());
-  EXPECT_FALSE(r4_.fatally_failed());
-}
-
-// Tests TestPartResult::nonfatally_failed().
-TEST_F(TestPartResultTest, NonfatallyFailed) {
-  EXPECT_FALSE(r1_.nonfatally_failed());
-  EXPECT_TRUE(r2_.nonfatally_failed());
-  EXPECT_FALSE(r3_.nonfatally_failed());
-  EXPECT_FALSE(r4_.nonfatally_failed());
-}
-
-// Tests the TestPartResultArray class.
-
-class TestPartResultArrayTest : public Test {
- protected:
-  TestPartResultArrayTest()
-      : r1_(TestPartResult::kNonFatalFailure, "foo/bar.cc", -1, "Failure 1"),
-        r2_(TestPartResult::kFatalFailure, "foo/bar.cc", -1, "Failure 2") {}
-
-  const TestPartResult r1_, r2_;
-};
-
-// Tests that TestPartResultArray initially has size 0.
-TEST_F(TestPartResultArrayTest, InitialSizeIsZero) {
-  TestPartResultArray results;
-  EXPECT_EQ(0, results.size());
-}
-
-// Tests that TestPartResultArray contains the given TestPartResult
-// after one Append() operation.
-TEST_F(TestPartResultArrayTest, ContainsGivenResultAfterAppend) {
-  TestPartResultArray results;
-  results.Append(r1_);
-  EXPECT_EQ(1, results.size());
-  EXPECT_STREQ("Failure 1", results.GetTestPartResult(0).message());
-}
-
-// Tests that TestPartResultArray contains the given TestPartResults
-// after two Append() operations.
-TEST_F(TestPartResultArrayTest, ContainsGivenResultsAfterTwoAppends) {
-  TestPartResultArray results;
-  results.Append(r1_);
-  results.Append(r2_);
-  EXPECT_EQ(2, results.size());
-  EXPECT_STREQ("Failure 1", results.GetTestPartResult(0).message());
-  EXPECT_STREQ("Failure 2", results.GetTestPartResult(1).message());
-}
-
-typedef TestPartResultArrayTest TestPartResultArrayDeathTest;
-
-// Tests that the program dies when GetTestPartResult() is called with
-// an invalid index.
-TEST_F(TestPartResultArrayDeathTest, DiesWhenIndexIsOutOfBound) {
-  TestPartResultArray results;
-  results.Append(r1_);
-
-  EXPECT_DEATH_IF_SUPPORTED(results.GetTestPartResult(-1), "");
-  EXPECT_DEATH_IF_SUPPORTED(results.GetTestPartResult(1), "");
-}
-
-}  // namespace
diff --git a/3rdparty/googletest-1.13.0/googletest/test/googletest-throw-on-failure-test.py b/3rdparty/googletest-1.13.0/googletest/test/googletest-throw-on-failure-test.py
deleted file mode 100644
index 772bbc5f39b993064b4cd0bc24cc472c719d6c9e..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/test/googletest-throw-on-failure-test.py
+++ /dev/null
@@ -1,168 +0,0 @@
-#!/usr/bin/env python
-#
-# Copyright 2009, Google Inc.
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met:
-#
-#     * Redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer.
-#     * Redistributions in binary form must reproduce the above
-# copyright notice, this list of conditions and the following disclaimer
-# in the documentation and/or other materials provided with the
-# distribution.
-#     * Neither the name of Google Inc. nor the names of its
-# contributors may be used to endorse or promote products derived from
-# this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-"""Tests Google Test's throw-on-failure mode with exceptions disabled.
-
-This script invokes googletest-throw-on-failure-test_ (a program written with
-Google Test) with different environments and command line flags.
-"""
-
-import os
-from googletest.test import gtest_test_utils
-
-
-# Constants.
-
-# The command line flag for enabling/disabling the throw-on-failure mode.
-THROW_ON_FAILURE = 'gtest_throw_on_failure'
-
-# Path to the googletest-throw-on-failure-test_ program, compiled with
-# exceptions disabled.
-EXE_PATH = gtest_test_utils.GetTestExecutablePath(
-    'googletest-throw-on-failure-test_')
-
-
-# Utilities.
-
-
-def SetEnvVar(env_var, value):
-  """Sets an environment variable to a given value; unsets it when the
-  given value is None.
-  """
-
-  env_var = env_var.upper()
-  if value is not None:
-    os.environ[env_var] = value
-  elif env_var in os.environ:
-    del os.environ[env_var]
-
-
-def Run(command):
-  """Runs a command; returns True/False if its exit code is/isn't 0."""
-
-  print('Running "%s". . .' % ' '.join(command))
-  p = gtest_test_utils.Subprocess(command)
-  return p.exited and p.exit_code == 0
-
-
-# The tests.
-class ThrowOnFailureTest(gtest_test_utils.TestCase):
-  """Tests the throw-on-failure mode."""
-
-  def RunAndVerify(self, env_var_value, flag_value, should_fail):
-    """Runs googletest-throw-on-failure-test_ and verifies that it does
-    (or does not) exit with a non-zero code.
-
-    Args:
-      env_var_value:    value of the GTEST_BREAK_ON_FAILURE environment
-                        variable; None if the variable should be unset.
-      flag_value:       value of the --gtest_break_on_failure flag;
-                        None if the flag should not be present.
-      should_fail:      True if and only if the program is expected to fail.
-    """
-
-    SetEnvVar(THROW_ON_FAILURE, env_var_value)
-
-    if env_var_value is None:
-      env_var_value_msg = ' is not set'
-    else:
-      env_var_value_msg = '=' + env_var_value
-
-    if flag_value is None:
-      flag = ''
-    elif flag_value == '0':
-      flag = '--%s=0' % THROW_ON_FAILURE
-    else:
-      flag = '--%s' % THROW_ON_FAILURE
-
-    command = [EXE_PATH]
-    if flag:
-      command.append(flag)
-
-    if should_fail:
-      should_or_not = 'should'
-    else:
-      should_or_not = 'should not'
-
-    failed = not Run(command)
-
-    SetEnvVar(THROW_ON_FAILURE, None)
-
-    msg = ('when %s%s, an assertion failure in "%s" %s cause a non-zero '
-           'exit code.' %
-           (THROW_ON_FAILURE, env_var_value_msg, ' '.join(command),
-            should_or_not))
-    self.assert_(failed == should_fail, msg)
-
-  def testDefaultBehavior(self):
-    """Tests the behavior of the default mode."""
-
-    self.RunAndVerify(env_var_value=None, flag_value=None, should_fail=False)
-
-  def testThrowOnFailureEnvVar(self):
-    """Tests using the GTEST_THROW_ON_FAILURE environment variable."""
-
-    self.RunAndVerify(env_var_value='0',
-                      flag_value=None,
-                      should_fail=False)
-    self.RunAndVerify(env_var_value='1',
-                      flag_value=None,
-                      should_fail=True)
-
-  def testThrowOnFailureFlag(self):
-    """Tests using the --gtest_throw_on_failure flag."""
-
-    self.RunAndVerify(env_var_value=None,
-                      flag_value='0',
-                      should_fail=False)
-    self.RunAndVerify(env_var_value=None,
-                      flag_value='1',
-                      should_fail=True)
-
-  def testThrowOnFailureFlagOverridesEnvVar(self):
-    """Tests that --gtest_throw_on_failure overrides GTEST_THROW_ON_FAILURE."""
-
-    self.RunAndVerify(env_var_value='0',
-                      flag_value='0',
-                      should_fail=False)
-    self.RunAndVerify(env_var_value='0',
-                      flag_value='1',
-                      should_fail=True)
-    self.RunAndVerify(env_var_value='1',
-                      flag_value='0',
-                      should_fail=False)
-    self.RunAndVerify(env_var_value='1',
-                      flag_value='1',
-                      should_fail=True)
-
-
-if __name__ == '__main__':
-  gtest_test_utils.Main()
diff --git a/3rdparty/googletest-1.13.0/googletest/test/googletest-throw-on-failure-test_.cc b/3rdparty/googletest-1.13.0/googletest/test/googletest-throw-on-failure-test_.cc
deleted file mode 100644
index 3b81a5a1db667cd0acc50e2955fa5a199bfadbab..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/test/googletest-throw-on-failure-test_.cc
+++ /dev/null
@@ -1,71 +0,0 @@
-// Copyright 2009, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// Tests Google Test's throw-on-failure mode with exceptions disabled.
-//
-// This program must be compiled with exceptions disabled.  It will be
-// invoked by googletest-throw-on-failure-test.py, and is expected to exit
-// with non-zero in the throw-on-failure mode or 0 otherwise.
-
-#include <stdio.h>   // for fflush, fprintf, NULL, etc.
-#include <stdlib.h>  // for exit
-
-#include <exception>  // for set_terminate
-
-#include "gtest/gtest.h"
-
-// This terminate handler aborts the program using exit() rather than abort().
-// This avoids showing pop-ups on Windows systems and core dumps on Unix-like
-// ones.
-void TerminateHandler() {
-  fprintf(stderr, "%s\n", "Unhandled C++ exception terminating the program.");
-  fflush(nullptr);
-  exit(1);
-}
-
-int main(int argc, char** argv) {
-#if GTEST_HAS_EXCEPTIONS
-  std::set_terminate(&TerminateHandler);
-#endif
-  testing::InitGoogleTest(&argc, argv);
-
-  // We want to ensure that people can use Google Test assertions in
-  // other testing frameworks, as long as they initialize Google Test
-  // properly and set the throw-on-failure mode.  Therefore, we don't
-  // use Google Test's constructs for defining and running tests
-  // (e.g. TEST and RUN_ALL_TESTS) here.
-
-  // In the throw-on-failure mode with exceptions disabled, this
-  // assertion will cause the program to exit with a non-zero code.
-  EXPECT_EQ(2, 3);
-
-  // When not in the throw-on-failure mode, the control will reach
-  // here.
-  return 0;
-}
diff --git a/3rdparty/googletest-1.13.0/googletest/test/googletest-uninitialized-test.py b/3rdparty/googletest-1.13.0/googletest/test/googletest-uninitialized-test.py
deleted file mode 100644
index 73c91764a5b8a93e1a1acd41a87f9e4c7fe9cf02..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/test/googletest-uninitialized-test.py
+++ /dev/null
@@ -1,67 +0,0 @@
-#!/usr/bin/env python
-#
-# Copyright 2008, Google Inc.
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met:
-#
-#     * Redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer.
-#     * Redistributions in binary form must reproduce the above
-# copyright notice, this list of conditions and the following disclaimer
-# in the documentation and/or other materials provided with the
-# distribution.
-#     * Neither the name of Google Inc. nor the names of its
-# contributors may be used to endorse or promote products derived from
-# this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-"""Verifies that Google Test warns the user when not initialized properly."""
-
-from googletest.test import gtest_test_utils
-
-COMMAND = gtest_test_utils.GetTestExecutablePath('googletest-uninitialized-test_')
-
-
-def Assert(condition):
-  if not condition:
-    raise AssertionError
-
-
-def AssertEq(expected, actual):
-  if expected != actual:
-    print('Expected: %s' % (expected,))
-    print('  Actual: %s' % (actual,))
-    raise AssertionError
-
-
-def TestExitCodeAndOutput(command):
-  """Runs the given command and verifies its exit code and output."""
-
-  # Verifies that 'command' exits with code 1.
-  p = gtest_test_utils.Subprocess(command)
-  if p.exited and p.exit_code == 0:
-    Assert('IMPORTANT NOTICE' in p.output);
-  Assert('InitGoogleTest' in p.output)
-
-
-class GTestUninitializedTest(gtest_test_utils.TestCase):
-  def testExitCodeAndOutput(self):
-    TestExitCodeAndOutput(COMMAND)
-
-
-if __name__ == '__main__':
-  gtest_test_utils.Main()
diff --git a/3rdparty/googletest-1.13.0/googletest/test/googletest-uninitialized-test_.cc b/3rdparty/googletest-1.13.0/googletest/test/googletest-uninitialized-test_.cc
deleted file mode 100644
index 88b61fcefd5e5eb81ff0f6783e18bb9ff2a53108..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/test/googletest-uninitialized-test_.cc
+++ /dev/null
@@ -1,39 +0,0 @@
-// Copyright 2008, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "gtest/gtest.h"
-
-TEST(DummyTest, Dummy) {
-  // This test doesn't verify anything.  We just need it to create a
-  // realistic stage for testing the behavior of Google Test when
-  // RUN_ALL_TESTS() is called without
-  // testing::InitGoogleTest() being called first.
-}
-
-int main() { return RUN_ALL_TESTS(); }
diff --git a/3rdparty/googletest-1.13.0/googletest/test/gtest-typed-test2_test.cc b/3rdparty/googletest-1.13.0/googletest/test/gtest-typed-test2_test.cc
deleted file mode 100644
index f2eae12058b747925668281aabda1aa8dbbdb80b..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/test/gtest-typed-test2_test.cc
+++ /dev/null
@@ -1,39 +0,0 @@
-// Copyright 2008 Google Inc.
-// All Rights Reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include <vector>
-
-#include "gtest/gtest.h"
-#include "test/gtest-typed-test_test.h"
-
-// Tests that the same type-parameterized test case can be
-// instantiated in different translation units linked together.
-// (ContainerTest is also instantiated in gtest-typed-test_test.cc.)
-INSTANTIATE_TYPED_TEST_SUITE_P(Vector, ContainerTest,
-                               testing::Types<std::vector<int> >);
diff --git a/3rdparty/googletest-1.13.0/googletest/test/gtest-typed-test_test.cc b/3rdparty/googletest-1.13.0/googletest/test/gtest-typed-test_test.cc
deleted file mode 100644
index 4b1c23ddb9f7dcc4959acf015b03a7e7d2d8c91b..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/test/gtest-typed-test_test.cc
+++ /dev/null
@@ -1,423 +0,0 @@
-// Copyright 2008 Google Inc.
-// All Rights Reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "test/gtest-typed-test_test.h"
-
-#include <set>
-#include <string>
-#include <type_traits>
-#include <vector>
-
-#include "gtest/gtest.h"
-
-#if _MSC_VER
-GTEST_DISABLE_MSC_WARNINGS_PUSH_(4127 /* conditional expression is constant */)
-#endif  //  _MSC_VER
-
-using testing::Test;
-
-// Used for testing that SetUpTestSuite()/TearDownTestSuite(), fixture
-// ctor/dtor, and SetUp()/TearDown() work correctly in typed tests and
-// type-parameterized test.
-template <typename T>
-class CommonTest : public Test {
-  // For some technical reason, SetUpTestSuite() and TearDownTestSuite()
-  // must be public.
- public:
-  static void SetUpTestSuite() { shared_ = new T(5); }
-
-  static void TearDownTestSuite() {
-    delete shared_;
-    shared_ = nullptr;
-  }
-
-  // This 'protected:' is optional.  There's no harm in making all
-  // members of this fixture class template public.
- protected:
-  // We used to use std::list here, but switched to std::vector since
-  // MSVC's <list> doesn't compile cleanly with /W4.
-  typedef std::vector<T> Vector;
-  typedef std::set<int> IntSet;
-
-  CommonTest() : value_(1) {}
-
-  ~CommonTest() override { EXPECT_EQ(3, value_); }
-
-  void SetUp() override {
-    EXPECT_EQ(1, value_);
-    value_++;
-  }
-
-  void TearDown() override {
-    EXPECT_EQ(2, value_);
-    value_++;
-  }
-
-  T value_;
-  static T* shared_;
-};
-
-template <typename T>
-T* CommonTest<T>::shared_ = nullptr;
-
-using testing::Types;
-
-// Tests that SetUpTestSuite()/TearDownTestSuite(), fixture ctor/dtor,
-// and SetUp()/TearDown() work correctly in typed tests
-
-typedef Types<char, int> TwoTypes;
-TYPED_TEST_SUITE(CommonTest, TwoTypes);
-
-TYPED_TEST(CommonTest, ValuesAreCorrect) {
-  // Static members of the fixture class template can be visited via
-  // the TestFixture:: prefix.
-  EXPECT_EQ(5, *TestFixture::shared_);
-
-  // Typedefs in the fixture class template can be visited via the
-  // "typename TestFixture::" prefix.
-  typename TestFixture::Vector empty;
-  EXPECT_EQ(0U, empty.size());
-
-  typename TestFixture::IntSet empty2;
-  EXPECT_EQ(0U, empty2.size());
-
-  // Non-static members of the fixture class must be visited via
-  // 'this', as required by C++ for class templates.
-  EXPECT_EQ(2, this->value_);
-}
-
-// The second test makes sure shared_ is not deleted after the first
-// test.
-TYPED_TEST(CommonTest, ValuesAreStillCorrect) {
-  // Static members of the fixture class template can also be visited
-  // via 'this'.
-  ASSERT_TRUE(this->shared_ != nullptr);
-  EXPECT_EQ(5, *this->shared_);
-
-  // TypeParam can be used to refer to the type parameter.
-  EXPECT_EQ(static_cast<TypeParam>(2), this->value_);
-}
-
-// Tests that multiple TYPED_TEST_SUITE's can be defined in the same
-// translation unit.
-
-template <typename T>
-class TypedTest1 : public Test {};
-
-// Verifies that the second argument of TYPED_TEST_SUITE can be a
-// single type.
-TYPED_TEST_SUITE(TypedTest1, int);
-TYPED_TEST(TypedTest1, A) {}
-
-template <typename T>
-class TypedTest2 : public Test {};
-
-// Verifies that the second argument of TYPED_TEST_SUITE can be a
-// Types<...> type list.
-TYPED_TEST_SUITE(TypedTest2, Types<int>);
-
-// This also verifies that tests from different typed test cases can
-// share the same name.
-TYPED_TEST(TypedTest2, A) {}
-
-// Tests that a typed test case can be defined in a namespace.
-
-namespace library1 {
-
-template <typename T>
-class NumericTest : public Test {};
-
-typedef Types<int, long> NumericTypes;
-TYPED_TEST_SUITE(NumericTest, NumericTypes);
-
-TYPED_TEST(NumericTest, DefaultIsZero) { EXPECT_EQ(0, TypeParam()); }
-
-}  // namespace library1
-
-// Tests that custom names work.
-template <typename T>
-class TypedTestWithNames : public Test {};
-
-class TypedTestNames {
- public:
-  template <typename T>
-  static std::string GetName(int i) {
-    if (std::is_same<T, char>::value) {
-      return std::string("char") + ::testing::PrintToString(i);
-    }
-    if (std::is_same<T, int>::value) {
-      return std::string("int") + ::testing::PrintToString(i);
-    }
-  }
-};
-
-TYPED_TEST_SUITE(TypedTestWithNames, TwoTypes, TypedTestNames);
-
-TYPED_TEST(TypedTestWithNames, TestSuiteName) {
-  if (std::is_same<TypeParam, char>::value) {
-    EXPECT_STREQ(::testing::UnitTest::GetInstance()
-                     ->current_test_info()
-                     ->test_suite_name(),
-                 "TypedTestWithNames/char0");
-  }
-  if (std::is_same<TypeParam, int>::value) {
-    EXPECT_STREQ(::testing::UnitTest::GetInstance()
-                     ->current_test_info()
-                     ->test_suite_name(),
-                 "TypedTestWithNames/int1");
-  }
-}
-
-using testing::Types;
-using testing::internal::TypedTestSuitePState;
-
-// Tests TypedTestSuitePState.
-
-class TypedTestSuitePStateTest : public Test {
- protected:
-  void SetUp() override {
-    state_.AddTestName("foo.cc", 0, "FooTest", "A");
-    state_.AddTestName("foo.cc", 0, "FooTest", "B");
-    state_.AddTestName("foo.cc", 0, "FooTest", "C");
-  }
-
-  TypedTestSuitePState state_;
-};
-
-TEST_F(TypedTestSuitePStateTest, SucceedsForMatchingList) {
-  const char* tests = "A, B, C";
-  EXPECT_EQ(tests,
-            state_.VerifyRegisteredTestNames("Suite", "foo.cc", 1, tests));
-}
-
-// Makes sure that the order of the tests and spaces around the names
-// don't matter.
-TEST_F(TypedTestSuitePStateTest, IgnoresOrderAndSpaces) {
-  const char* tests = "A,C,   B";
-  EXPECT_EQ(tests,
-            state_.VerifyRegisteredTestNames("Suite", "foo.cc", 1, tests));
-}
-
-using TypedTestSuitePStateDeathTest = TypedTestSuitePStateTest;
-
-TEST_F(TypedTestSuitePStateDeathTest, DetectsDuplicates) {
-  EXPECT_DEATH_IF_SUPPORTED(
-      state_.VerifyRegisteredTestNames("Suite", "foo.cc", 1, "A, B, A, C"),
-      "foo\\.cc.1.?: Test A is listed more than once\\.");
-}
-
-TEST_F(TypedTestSuitePStateDeathTest, DetectsExtraTest) {
-  EXPECT_DEATH_IF_SUPPORTED(
-      state_.VerifyRegisteredTestNames("Suite", "foo.cc", 1, "A, B, C, D"),
-      "foo\\.cc.1.?: No test named D can be found in this test suite\\.");
-}
-
-TEST_F(TypedTestSuitePStateDeathTest, DetectsMissedTest) {
-  EXPECT_DEATH_IF_SUPPORTED(
-      state_.VerifyRegisteredTestNames("Suite", "foo.cc", 1, "A, C"),
-      "foo\\.cc.1.?: You forgot to list test B\\.");
-}
-
-// Tests that defining a test for a parameterized test case generates
-// a run-time error if the test case has been registered.
-TEST_F(TypedTestSuitePStateDeathTest, DetectsTestAfterRegistration) {
-  state_.VerifyRegisteredTestNames("Suite", "foo.cc", 1, "A, B, C");
-  EXPECT_DEATH_IF_SUPPORTED(
-      state_.AddTestName("foo.cc", 2, "FooTest", "D"),
-      "foo\\.cc.2.?: Test D must be defined before REGISTER_TYPED_TEST_SUITE_P"
-      "\\(FooTest, \\.\\.\\.\\)\\.");
-}
-
-// Tests that SetUpTestSuite()/TearDownTestSuite(), fixture ctor/dtor,
-// and SetUp()/TearDown() work correctly in type-parameterized tests.
-
-template <typename T>
-class DerivedTest : public CommonTest<T> {};
-
-TYPED_TEST_SUITE_P(DerivedTest);
-
-TYPED_TEST_P(DerivedTest, ValuesAreCorrect) {
-  // Static members of the fixture class template can be visited via
-  // the TestFixture:: prefix.
-  EXPECT_EQ(5, *TestFixture::shared_);
-
-  // Non-static members of the fixture class must be visited via
-  // 'this', as required by C++ for class templates.
-  EXPECT_EQ(2, this->value_);
-}
-
-// The second test makes sure shared_ is not deleted after the first
-// test.
-TYPED_TEST_P(DerivedTest, ValuesAreStillCorrect) {
-  // Static members of the fixture class template can also be visited
-  // via 'this'.
-  ASSERT_TRUE(this->shared_ != nullptr);
-  EXPECT_EQ(5, *this->shared_);
-  EXPECT_EQ(2, this->value_);
-}
-
-REGISTER_TYPED_TEST_SUITE_P(DerivedTest, ValuesAreCorrect,
-                            ValuesAreStillCorrect);
-
-typedef Types<short, long> MyTwoTypes;
-INSTANTIATE_TYPED_TEST_SUITE_P(My, DerivedTest, MyTwoTypes);
-
-// Tests that custom names work with type parametrized tests. We reuse the
-// TwoTypes from above here.
-template <typename T>
-class TypeParametrizedTestWithNames : public Test {};
-
-TYPED_TEST_SUITE_P(TypeParametrizedTestWithNames);
-
-TYPED_TEST_P(TypeParametrizedTestWithNames, TestSuiteName) {
-  if (std::is_same<TypeParam, char>::value) {
-    EXPECT_STREQ(::testing::UnitTest::GetInstance()
-                     ->current_test_info()
-                     ->test_suite_name(),
-                 "CustomName/TypeParametrizedTestWithNames/parChar0");
-  }
-  if (std::is_same<TypeParam, int>::value) {
-    EXPECT_STREQ(::testing::UnitTest::GetInstance()
-                     ->current_test_info()
-                     ->test_suite_name(),
-                 "CustomName/TypeParametrizedTestWithNames/parInt1");
-  }
-}
-
-REGISTER_TYPED_TEST_SUITE_P(TypeParametrizedTestWithNames, TestSuiteName);
-
-class TypeParametrizedTestNames {
- public:
-  template <typename T>
-  static std::string GetName(int i) {
-    if (std::is_same<T, char>::value) {
-      return std::string("parChar") + ::testing::PrintToString(i);
-    }
-    if (std::is_same<T, int>::value) {
-      return std::string("parInt") + ::testing::PrintToString(i);
-    }
-  }
-};
-
-INSTANTIATE_TYPED_TEST_SUITE_P(CustomName, TypeParametrizedTestWithNames,
-                               TwoTypes, TypeParametrizedTestNames);
-
-// Tests that multiple TYPED_TEST_SUITE_P's can be defined in the same
-// translation unit.
-
-template <typename T>
-class TypedTestP1 : public Test {};
-
-TYPED_TEST_SUITE_P(TypedTestP1);
-
-// For testing that the code between TYPED_TEST_SUITE_P() and
-// TYPED_TEST_P() is not enclosed in a namespace.
-using IntAfterTypedTestSuiteP = int;
-
-TYPED_TEST_P(TypedTestP1, A) {}
-TYPED_TEST_P(TypedTestP1, B) {}
-
-// For testing that the code between TYPED_TEST_P() and
-// REGISTER_TYPED_TEST_SUITE_P() is not enclosed in a namespace.
-using IntBeforeRegisterTypedTestSuiteP = int;
-
-REGISTER_TYPED_TEST_SUITE_P(TypedTestP1, A, B);
-
-template <typename T>
-class TypedTestP2 : public Test {};
-
-TYPED_TEST_SUITE_P(TypedTestP2);
-
-// This also verifies that tests from different type-parameterized
-// test cases can share the same name.
-TYPED_TEST_P(TypedTestP2, A) {}
-
-REGISTER_TYPED_TEST_SUITE_P(TypedTestP2, A);
-
-// Verifies that the code between TYPED_TEST_SUITE_P() and
-// REGISTER_TYPED_TEST_SUITE_P() is not enclosed in a namespace.
-IntAfterTypedTestSuiteP after = 0;
-IntBeforeRegisterTypedTestSuiteP before = 0;
-
-// Verifies that the last argument of INSTANTIATE_TYPED_TEST_SUITE_P()
-// can be either a single type or a Types<...> type list.
-INSTANTIATE_TYPED_TEST_SUITE_P(Int, TypedTestP1, int);
-INSTANTIATE_TYPED_TEST_SUITE_P(Int, TypedTestP2, Types<int>);
-
-// Tests that the same type-parameterized test case can be
-// instantiated more than once in the same translation unit.
-INSTANTIATE_TYPED_TEST_SUITE_P(Double, TypedTestP2, Types<double>);
-
-// Tests that the same type-parameterized test case can be
-// instantiated in different translation units linked together.
-// (ContainerTest is also instantiated in gtest-typed-test_test.cc.)
-typedef Types<std::vector<double>, std::set<char> > MyContainers;
-INSTANTIATE_TYPED_TEST_SUITE_P(My, ContainerTest, MyContainers);
-
-// Tests that a type-parameterized test case can be defined and
-// instantiated in a namespace.
-
-namespace library2 {
-
-template <typename T>
-class NumericTest : public Test {};
-
-TYPED_TEST_SUITE_P(NumericTest);
-
-TYPED_TEST_P(NumericTest, DefaultIsZero) { EXPECT_EQ(0, TypeParam()); }
-
-TYPED_TEST_P(NumericTest, ZeroIsLessThanOne) {
-  EXPECT_LT(TypeParam(0), TypeParam(1));
-}
-
-REGISTER_TYPED_TEST_SUITE_P(NumericTest, DefaultIsZero, ZeroIsLessThanOne);
-typedef Types<int, double> NumericTypes;
-INSTANTIATE_TYPED_TEST_SUITE_P(My, NumericTest, NumericTypes);
-
-static const char* GetTestName() {
-  return testing::UnitTest::GetInstance()->current_test_info()->name();
-}
-// Test the stripping of space from test names
-template <typename T>
-class TrimmedTest : public Test {};
-TYPED_TEST_SUITE_P(TrimmedTest);
-TYPED_TEST_P(TrimmedTest, Test1) { EXPECT_STREQ("Test1", GetTestName()); }
-TYPED_TEST_P(TrimmedTest, Test2) { EXPECT_STREQ("Test2", GetTestName()); }
-TYPED_TEST_P(TrimmedTest, Test3) { EXPECT_STREQ("Test3", GetTestName()); }
-TYPED_TEST_P(TrimmedTest, Test4) { EXPECT_STREQ("Test4", GetTestName()); }
-TYPED_TEST_P(TrimmedTest, Test5) { EXPECT_STREQ("Test5", GetTestName()); }
-REGISTER_TYPED_TEST_SUITE_P(TrimmedTest, Test1, Test2, Test3, Test4,
-                            Test5);  // NOLINT
-template <typename T1, typename T2>
-struct MyPair {};
-// Be sure to try a type with a comma in its name just in case it matters.
-typedef Types<int, double, MyPair<int, int> > TrimTypes;
-INSTANTIATE_TYPED_TEST_SUITE_P(My, TrimmedTest, TrimTypes);
-
-}  // namespace library2
diff --git a/3rdparty/googletest-1.13.0/googletest/test/gtest-typed-test_test.h b/3rdparty/googletest-1.13.0/googletest/test/gtest-typed-test_test.h
deleted file mode 100644
index f3ef0a5962970e18bbd096414271b746b6d525c4..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/test/gtest-typed-test_test.h
+++ /dev/null
@@ -1,57 +0,0 @@
-// Copyright 2008 Google Inc.
-// All Rights Reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#ifndef GOOGLETEST_TEST_GTEST_TYPED_TEST_TEST_H_
-#define GOOGLETEST_TEST_GTEST_TYPED_TEST_TEST_H_
-
-#include "gtest/gtest.h"
-
-using testing::Test;
-
-// For testing that the same type-parameterized test case can be
-// instantiated in different translation units linked together.
-// ContainerTest will be instantiated in both gtest-typed-test_test.cc
-// and gtest-typed-test2_test.cc.
-
-template <typename T>
-class ContainerTest : public Test {};
-
-TYPED_TEST_SUITE_P(ContainerTest);
-
-TYPED_TEST_P(ContainerTest, CanBeDefaultConstructed) { TypeParam container; }
-
-TYPED_TEST_P(ContainerTest, InitialSizeIsZero) {
-  TypeParam container;
-  EXPECT_EQ(0U, container.size());
-}
-
-REGISTER_TYPED_TEST_SUITE_P(ContainerTest, CanBeDefaultConstructed,
-                            InitialSizeIsZero);
-
-#endif  // GOOGLETEST_TEST_GTEST_TYPED_TEST_TEST_H_
diff --git a/3rdparty/googletest-1.13.0/googletest/test/gtest-unittest-api_test.cc b/3rdparty/googletest-1.13.0/googletest/test/gtest-unittest-api_test.cc
deleted file mode 100644
index 2ea692739ad4cf20ba7e93c2f390a1f23965042d..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/test/gtest-unittest-api_test.cc
+++ /dev/null
@@ -1,328 +0,0 @@
-// Copyright 2009 Google Inc.  All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-//
-// The Google C++ Testing and Mocking Framework (Google Test)
-//
-// This file contains tests verifying correctness of data provided via
-// UnitTest's public methods.
-
-#include <string.h>  // For strcmp.
-
-#include <algorithm>
-
-#include "gtest/gtest.h"
-
-using ::testing::InitGoogleTest;
-
-namespace testing {
-namespace internal {
-
-template <typename T>
-struct LessByName {
-  bool operator()(const T* a, const T* b) {
-    return strcmp(a->name(), b->name()) < 0;
-  }
-};
-
-class UnitTestHelper {
- public:
-  // Returns the array of pointers to all test suites sorted by the test suite
-  // name.  The caller is responsible for deleting the array.
-  static TestSuite const** GetSortedTestSuites() {
-    UnitTest& unit_test = *UnitTest::GetInstance();
-    auto const** const test_suites = new const TestSuite*[static_cast<size_t>(
-        unit_test.total_test_suite_count())];
-
-    for (int i = 0; i < unit_test.total_test_suite_count(); ++i)
-      test_suites[i] = unit_test.GetTestSuite(i);
-
-    std::sort(test_suites, test_suites + unit_test.total_test_suite_count(),
-              LessByName<TestSuite>());
-    return test_suites;
-  }
-
-  // Returns the test suite by its name.  The caller doesn't own the returned
-  // pointer.
-  static const TestSuite* FindTestSuite(const char* name) {
-    UnitTest& unit_test = *UnitTest::GetInstance();
-    for (int i = 0; i < unit_test.total_test_suite_count(); ++i) {
-      const TestSuite* test_suite = unit_test.GetTestSuite(i);
-      if (0 == strcmp(test_suite->name(), name)) return test_suite;
-    }
-    return nullptr;
-  }
-
-  // Returns the array of pointers to all tests in a particular test suite
-  // sorted by the test name.  The caller is responsible for deleting the
-  // array.
-  static TestInfo const** GetSortedTests(const TestSuite* test_suite) {
-    TestInfo const** const tests = new const TestInfo*[static_cast<size_t>(
-        test_suite->total_test_count())];
-
-    for (int i = 0; i < test_suite->total_test_count(); ++i)
-      tests[i] = test_suite->GetTestInfo(i);
-
-    std::sort(tests, tests + test_suite->total_test_count(),
-              LessByName<TestInfo>());
-    return tests;
-  }
-};
-
-template <typename T>
-class TestSuiteWithCommentTest : public Test {};
-TYPED_TEST_SUITE(TestSuiteWithCommentTest, Types<int>);
-TYPED_TEST(TestSuiteWithCommentTest, Dummy) {}
-
-const int kTypedTestSuites = 1;
-const int kTypedTests = 1;
-
-// We can only test the accessors that do not change value while tests run.
-// Since tests can be run in any order, the values the accessors that track
-// test execution (such as failed_test_count) can not be predicted.
-TEST(ApiTest, UnitTestImmutableAccessorsWork) {
-  const auto& unit_test = UnitTest::GetInstance();
-
-  ASSERT_EQ(2 + kTypedTestSuites, unit_test->total_test_suite_count());
-  EXPECT_EQ(1 + kTypedTestSuites, unit_test->test_suite_to_run_count());
-  EXPECT_EQ(2, unit_test->disabled_test_count());
-  EXPECT_EQ(5 + kTypedTests, unit_test->total_test_count());
-  EXPECT_EQ(3 + kTypedTests, unit_test->test_to_run_count());
-
-  const TestSuite** const test_suites = UnitTestHelper::GetSortedTestSuites();
-
-  EXPECT_STREQ("ApiTest", test_suites[0]->name());
-  EXPECT_STREQ("DISABLED_Test", test_suites[1]->name());
-  EXPECT_STREQ("TestSuiteWithCommentTest/0", test_suites[2]->name());
-
-  delete[] test_suites;
-
-  // The following lines initiate actions to verify certain methods in
-  // FinalSuccessChecker::TearDown.
-
-  // Records a test property to verify TestResult::GetTestProperty().
-  RecordProperty("key", "value");
-}
-
-AssertionResult IsNull(const char* str) {
-  if (str != nullptr) {
-    return testing::AssertionFailure() << "argument is " << str;
-  }
-  return AssertionSuccess();
-}
-
-TEST(ApiTest, TestSuiteImmutableAccessorsWork) {
-  const TestSuite* test_suite = UnitTestHelper::FindTestSuite("ApiTest");
-  ASSERT_TRUE(test_suite != nullptr);
-
-  EXPECT_STREQ("ApiTest", test_suite->name());
-  EXPECT_TRUE(IsNull(test_suite->type_param()));
-  EXPECT_TRUE(test_suite->should_run());
-  EXPECT_EQ(1, test_suite->disabled_test_count());
-  EXPECT_EQ(3, test_suite->test_to_run_count());
-  ASSERT_EQ(4, test_suite->total_test_count());
-
-  const TestInfo** tests = UnitTestHelper::GetSortedTests(test_suite);
-
-  EXPECT_STREQ("DISABLED_Dummy1", tests[0]->name());
-  EXPECT_STREQ("ApiTest", tests[0]->test_suite_name());
-  EXPECT_TRUE(IsNull(tests[0]->value_param()));
-  EXPECT_TRUE(IsNull(tests[0]->type_param()));
-  EXPECT_FALSE(tests[0]->should_run());
-
-  EXPECT_STREQ("TestSuiteDisabledAccessorsWork", tests[1]->name());
-  EXPECT_STREQ("ApiTest", tests[1]->test_suite_name());
-  EXPECT_TRUE(IsNull(tests[1]->value_param()));
-  EXPECT_TRUE(IsNull(tests[1]->type_param()));
-  EXPECT_TRUE(tests[1]->should_run());
-
-  EXPECT_STREQ("TestSuiteImmutableAccessorsWork", tests[2]->name());
-  EXPECT_STREQ("ApiTest", tests[2]->test_suite_name());
-  EXPECT_TRUE(IsNull(tests[2]->value_param()));
-  EXPECT_TRUE(IsNull(tests[2]->type_param()));
-  EXPECT_TRUE(tests[2]->should_run());
-
-  EXPECT_STREQ("UnitTestImmutableAccessorsWork", tests[3]->name());
-  EXPECT_STREQ("ApiTest", tests[3]->test_suite_name());
-  EXPECT_TRUE(IsNull(tests[3]->value_param()));
-  EXPECT_TRUE(IsNull(tests[3]->type_param()));
-  EXPECT_TRUE(tests[3]->should_run());
-
-  delete[] tests;
-  tests = nullptr;
-
-  test_suite = UnitTestHelper::FindTestSuite("TestSuiteWithCommentTest/0");
-  ASSERT_TRUE(test_suite != nullptr);
-
-  EXPECT_STREQ("TestSuiteWithCommentTest/0", test_suite->name());
-  EXPECT_STREQ(GetTypeName<Types<int>>().c_str(), test_suite->type_param());
-  EXPECT_TRUE(test_suite->should_run());
-  EXPECT_EQ(0, test_suite->disabled_test_count());
-  EXPECT_EQ(1, test_suite->test_to_run_count());
-  ASSERT_EQ(1, test_suite->total_test_count());
-
-  tests = UnitTestHelper::GetSortedTests(test_suite);
-
-  EXPECT_STREQ("Dummy", tests[0]->name());
-  EXPECT_STREQ("TestSuiteWithCommentTest/0", tests[0]->test_suite_name());
-  EXPECT_TRUE(IsNull(tests[0]->value_param()));
-  EXPECT_STREQ(GetTypeName<Types<int>>().c_str(), tests[0]->type_param());
-  EXPECT_TRUE(tests[0]->should_run());
-
-  delete[] tests;
-}
-
-TEST(ApiTest, TestSuiteDisabledAccessorsWork) {
-  const TestSuite* test_suite = UnitTestHelper::FindTestSuite("DISABLED_Test");
-  ASSERT_TRUE(test_suite != nullptr);
-
-  EXPECT_STREQ("DISABLED_Test", test_suite->name());
-  EXPECT_TRUE(IsNull(test_suite->type_param()));
-  EXPECT_FALSE(test_suite->should_run());
-  EXPECT_EQ(1, test_suite->disabled_test_count());
-  EXPECT_EQ(0, test_suite->test_to_run_count());
-  ASSERT_EQ(1, test_suite->total_test_count());
-
-  const TestInfo* const test_info = test_suite->GetTestInfo(0);
-  EXPECT_STREQ("Dummy2", test_info->name());
-  EXPECT_STREQ("DISABLED_Test", test_info->test_suite_name());
-  EXPECT_TRUE(IsNull(test_info->value_param()));
-  EXPECT_TRUE(IsNull(test_info->type_param()));
-  EXPECT_FALSE(test_info->should_run());
-}
-
-// These two tests are here to provide support for testing
-// test_suite_to_run_count, disabled_test_count, and test_to_run_count.
-TEST(ApiTest, DISABLED_Dummy1) {}
-TEST(DISABLED_Test, Dummy2) {}
-
-class FinalSuccessChecker : public Environment {
- protected:
-  void TearDown() override {
-    const auto& unit_test = UnitTest::GetInstance();
-
-    EXPECT_EQ(1 + kTypedTestSuites, unit_test->successful_test_suite_count());
-    EXPECT_EQ(3 + kTypedTests, unit_test->successful_test_count());
-    EXPECT_EQ(0, unit_test->failed_test_suite_count());
-    EXPECT_EQ(0, unit_test->failed_test_count());
-    EXPECT_TRUE(unit_test->Passed());
-    EXPECT_FALSE(unit_test->Failed());
-    ASSERT_EQ(2 + kTypedTestSuites, unit_test->total_test_suite_count());
-
-    const TestSuite** const test_suites = UnitTestHelper::GetSortedTestSuites();
-
-    EXPECT_STREQ("ApiTest", test_suites[0]->name());
-    EXPECT_TRUE(IsNull(test_suites[0]->type_param()));
-    EXPECT_TRUE(test_suites[0]->should_run());
-    EXPECT_EQ(1, test_suites[0]->disabled_test_count());
-    ASSERT_EQ(4, test_suites[0]->total_test_count());
-    EXPECT_EQ(3, test_suites[0]->successful_test_count());
-    EXPECT_EQ(0, test_suites[0]->failed_test_count());
-    EXPECT_TRUE(test_suites[0]->Passed());
-    EXPECT_FALSE(test_suites[0]->Failed());
-
-    EXPECT_STREQ("DISABLED_Test", test_suites[1]->name());
-    EXPECT_TRUE(IsNull(test_suites[1]->type_param()));
-    EXPECT_FALSE(test_suites[1]->should_run());
-    EXPECT_EQ(1, test_suites[1]->disabled_test_count());
-    ASSERT_EQ(1, test_suites[1]->total_test_count());
-    EXPECT_EQ(0, test_suites[1]->successful_test_count());
-    EXPECT_EQ(0, test_suites[1]->failed_test_count());
-
-    EXPECT_STREQ("TestSuiteWithCommentTest/0", test_suites[2]->name());
-    EXPECT_STREQ(GetTypeName<Types<int>>().c_str(),
-                 test_suites[2]->type_param());
-    EXPECT_TRUE(test_suites[2]->should_run());
-    EXPECT_EQ(0, test_suites[2]->disabled_test_count());
-    ASSERT_EQ(1, test_suites[2]->total_test_count());
-    EXPECT_EQ(1, test_suites[2]->successful_test_count());
-    EXPECT_EQ(0, test_suites[2]->failed_test_count());
-    EXPECT_TRUE(test_suites[2]->Passed());
-    EXPECT_FALSE(test_suites[2]->Failed());
-
-    const TestSuite* test_suite = UnitTestHelper::FindTestSuite("ApiTest");
-    const TestInfo** tests = UnitTestHelper::GetSortedTests(test_suite);
-    EXPECT_STREQ("DISABLED_Dummy1", tests[0]->name());
-    EXPECT_STREQ("ApiTest", tests[0]->test_suite_name());
-    EXPECT_FALSE(tests[0]->should_run());
-
-    EXPECT_STREQ("TestSuiteDisabledAccessorsWork", tests[1]->name());
-    EXPECT_STREQ("ApiTest", tests[1]->test_suite_name());
-    EXPECT_TRUE(IsNull(tests[1]->value_param()));
-    EXPECT_TRUE(IsNull(tests[1]->type_param()));
-    EXPECT_TRUE(tests[1]->should_run());
-    EXPECT_TRUE(tests[1]->result()->Passed());
-    EXPECT_EQ(0, tests[1]->result()->test_property_count());
-
-    EXPECT_STREQ("TestSuiteImmutableAccessorsWork", tests[2]->name());
-    EXPECT_STREQ("ApiTest", tests[2]->test_suite_name());
-    EXPECT_TRUE(IsNull(tests[2]->value_param()));
-    EXPECT_TRUE(IsNull(tests[2]->type_param()));
-    EXPECT_TRUE(tests[2]->should_run());
-    EXPECT_TRUE(tests[2]->result()->Passed());
-    EXPECT_EQ(0, tests[2]->result()->test_property_count());
-
-    EXPECT_STREQ("UnitTestImmutableAccessorsWork", tests[3]->name());
-    EXPECT_STREQ("ApiTest", tests[3]->test_suite_name());
-    EXPECT_TRUE(IsNull(tests[3]->value_param()));
-    EXPECT_TRUE(IsNull(tests[3]->type_param()));
-    EXPECT_TRUE(tests[3]->should_run());
-    EXPECT_TRUE(tests[3]->result()->Passed());
-    EXPECT_EQ(1, tests[3]->result()->test_property_count());
-    const TestProperty& property = tests[3]->result()->GetTestProperty(0);
-    EXPECT_STREQ("key", property.key());
-    EXPECT_STREQ("value", property.value());
-
-    delete[] tests;
-
-    test_suite = UnitTestHelper::FindTestSuite("TestSuiteWithCommentTest/0");
-    tests = UnitTestHelper::GetSortedTests(test_suite);
-
-    EXPECT_STREQ("Dummy", tests[0]->name());
-    EXPECT_STREQ("TestSuiteWithCommentTest/0", tests[0]->test_suite_name());
-    EXPECT_TRUE(IsNull(tests[0]->value_param()));
-    EXPECT_STREQ(GetTypeName<Types<int>>().c_str(), tests[0]->type_param());
-    EXPECT_TRUE(tests[0]->should_run());
-    EXPECT_TRUE(tests[0]->result()->Passed());
-    EXPECT_EQ(0, tests[0]->result()->test_property_count());
-
-    delete[] tests;
-    delete[] test_suites;
-  }
-};
-
-}  // namespace internal
-}  // namespace testing
-
-int main(int argc, char** argv) {
-  InitGoogleTest(&argc, argv);
-
-  AddGlobalTestEnvironment(new testing::internal::FinalSuccessChecker());
-
-  return RUN_ALL_TESTS();
-}
diff --git a/3rdparty/googletest-1.13.0/googletest/test/gtest_all_test.cc b/3rdparty/googletest-1.13.0/googletest/test/gtest_all_test.cc
deleted file mode 100644
index 615b29b706516b15ffb6d1ae515ae9a56600b815..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/test/gtest_all_test.cc
+++ /dev/null
@@ -1,46 +0,0 @@
-// Copyright 2009, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-//
-// Tests for Google C++ Testing and Mocking Framework (Google Test)
-//
-// Sometimes it's desirable to build most of Google Test's own tests
-// by compiling a single file.  This file serves this purpose.
-#include "test/googletest-filepath-test.cc"
-#include "test/googletest-message-test.cc"
-#include "test/googletest-options-test.cc"
-#include "test/googletest-port-test.cc"
-#include "test/googletest-test-part-test.cc"
-#include "test/gtest-typed-test2_test.cc"
-#include "test/gtest-typed-test_test.cc"
-#include "test/gtest_pred_impl_unittest.cc"
-#include "test/gtest_prod_test.cc"
-#include "test/gtest_skip_test.cc"
-#include "test/gtest_unittest.cc"
-#include "test/production.cc"
diff --git a/3rdparty/googletest-1.13.0/googletest/test/gtest_assert_by_exception_test.cc b/3rdparty/googletest-1.13.0/googletest/test/gtest_assert_by_exception_test.cc
deleted file mode 100644
index f507eac475756e5dc44521bbe56b7f9568edfb95..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/test/gtest_assert_by_exception_test.cc
+++ /dev/null
@@ -1,112 +0,0 @@
-// Copyright 2009, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// Tests Google Test's assert-by-exception mode with exceptions enabled.
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include <stdexcept>
-
-#include "gtest/gtest.h"
-
-class ThrowListener : public testing::EmptyTestEventListener {
-  void OnTestPartResult(const testing::TestPartResult& result) override {
-    if (result.type() == testing::TestPartResult::kFatalFailure) {
-      throw testing::AssertionException(result);
-    }
-  }
-};
-
-// Prints the given failure message and exits the program with
-// non-zero.  We use this instead of a Google Test assertion to
-// indicate a failure, as the latter is been tested and cannot be
-// relied on.
-void Fail(const char* msg) {
-  printf("FAILURE: %s\n", msg);
-  fflush(stdout);
-  exit(1);
-}
-
-static void AssertFalse() { ASSERT_EQ(2, 3) << "Expected failure"; }
-
-// Tests that an assertion failure throws a subclass of
-// std::runtime_error.
-TEST(Test, Test) {
-  // A successful assertion shouldn't throw.
-  try {
-    EXPECT_EQ(3, 3);
-  } catch (...) {
-    Fail("A successful assertion wrongfully threw.");
-  }
-
-  // A successful assertion shouldn't throw.
-  try {
-    EXPECT_EQ(3, 4);
-  } catch (...) {
-    Fail("A failed non-fatal assertion wrongfully threw.");
-  }
-
-  // A failed assertion should throw.
-  try {
-    AssertFalse();
-  } catch (const testing::AssertionException& e) {
-    if (strstr(e.what(), "Expected failure") != nullptr) throw;
-
-    printf("%s",
-           "A failed assertion did throw an exception of the right type, "
-           "but the message is incorrect.  Instead of containing \"Expected "
-           "failure\", it is:\n");
-    Fail(e.what());
-  } catch (...) {
-    Fail("A failed assertion threw the wrong type of exception.");
-  }
-  Fail("A failed assertion should've thrown but didn't.");
-}
-
-int kTestForContinuingTest = 0;
-
-TEST(Test, Test2) { kTestForContinuingTest = 1; }
-
-int main(int argc, char** argv) {
-  testing::InitGoogleTest(&argc, argv);
-  testing::UnitTest::GetInstance()->listeners().Append(new ThrowListener);
-
-  int result = RUN_ALL_TESTS();
-  if (result == 0) {
-    printf("RUN_ALL_TESTS returned %d\n", result);
-    Fail("Expected failure instead.");
-  }
-
-  if (kTestForContinuingTest == 0) {
-    Fail("Should have continued with other tests, but did not.");
-  }
-  return 0;
-}
diff --git a/3rdparty/googletest-1.13.0/googletest/test/gtest_dirs_test.cc b/3rdparty/googletest-1.13.0/googletest/test/gtest_dirs_test.cc
deleted file mode 100644
index c0da9ac463c50498e4e4aa7a42ef3c03c14517a6..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/test/gtest_dirs_test.cc
+++ /dev/null
@@ -1,97 +0,0 @@
-#include <sys/stat.h>
-
-#include <cstdlib>
-#include <cstring>
-#include <string>
-
-#include "gtest/gtest.h"
-#include "gtest/internal/gtest-port.h"
-
-namespace {
-
-class SetEnv {
- public:
-  // Sets the environment value with name `name` to `value`, unless `value` is
-  // nullptr, in which case it unsets it. Restores the original value on
-  // destruction.
-  SetEnv(const char* name, const char* value) : name_(name) {
-    const char* old_value = getenv(name);
-    if (old_value != nullptr) {
-      saved_value_ = old_value;
-      have_saved_value_ = true;
-    }
-    if (value == nullptr) {
-      GTEST_CHECK_POSIX_SUCCESS_(unsetenv(name));
-    } else {
-      GTEST_CHECK_POSIX_SUCCESS_(setenv(name, value, 1 /*overwrite*/));
-    }
-  }
-
-  ~SetEnv() {
-    if (have_saved_value_) {
-      GTEST_CHECK_POSIX_SUCCESS_(
-          setenv(name_.c_str(), saved_value_.c_str(), 1 /*overwrite*/));
-    } else {
-      GTEST_CHECK_POSIX_SUCCESS_(unsetenv(name_.c_str()));
-    }
-  }
-
- private:
-  std::string name_;
-  bool have_saved_value_ = false;
-  std::string saved_value_;
-};
-
-class MakeTempDir {
- public:
-  // Creates a directory with a unique name including `testname`.
-  // The destructor removes it.
-  explicit MakeTempDir(const std::string& testname) {
-    // mkdtemp requires that the last 6 characters of the input pattern
-    // are Xs, and the string is modified by replacing those characters.
-    std::string pattern = "/tmp/" + testname + "_XXXXXX";
-    GTEST_CHECK_(mkdtemp(pattern.data()) != nullptr);
-    dirname_ = pattern;
-  }
-
-  ~MakeTempDir() { GTEST_CHECK_POSIX_SUCCESS_(rmdir(dirname_.c_str())); }
-
-  const char* DirName() const { return dirname_.c_str(); }
-
- private:
-  std::string dirname_;
-};
-
-bool StartsWith(const std::string& str, const std::string& prefix) {
-  return str.substr(0, prefix.size()) == prefix;
-}
-
-TEST(TempDirTest, InEnvironment) {
-  // Since the test infrastructure might be verifying directory existence or
-  // even creating subdirectories, we need to be careful that the directories we
-  // specify are actually valid.
-  MakeTempDir temp_dir("TempDirTest_InEnvironment");
-  SetEnv set_env("TEST_TMPDIR", temp_dir.DirName());
-  EXPECT_TRUE(StartsWith(testing::TempDir(), temp_dir.DirName()));
-}
-
-TEST(TempDirTest, NotInEnvironment) {
-  SetEnv set_env("TEST_TMPDIR", nullptr);
-  EXPECT_NE(testing::TempDir(), "");
-}
-
-TEST(SrcDirTest, InEnvironment) {
-  // Since the test infrastructure might be verifying directory existence or
-  // even creating subdirectories, we need to be careful that the directories we
-  // specify are actually valid.
-  MakeTempDir temp_dir("SrcDirTest_InEnvironment");
-  SetEnv set_env("TEST_SRCDIR", temp_dir.DirName());
-  EXPECT_TRUE(StartsWith(testing::SrcDir(), temp_dir.DirName()));
-}
-
-TEST(SrcDirTest, NotInEnvironment) {
-  SetEnv set_env("TEST_SRCDIR", nullptr);
-  EXPECT_NE(testing::SrcDir(), "");
-}
-
-}  // namespace
diff --git a/3rdparty/googletest-1.13.0/googletest/test/gtest_environment_test.cc b/3rdparty/googletest-1.13.0/googletest/test/gtest_environment_test.cc
deleted file mode 100644
index 122eaf3ca80212efe7d8210a9f8dd9ef72c64b15..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/test/gtest_environment_test.cc
+++ /dev/null
@@ -1,179 +0,0 @@
-// Copyright 2007, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-//
-// Tests using global test environments.
-
-#include <stdio.h>
-#include <stdlib.h>
-
-#include "gtest/gtest.h"
-#include "src/gtest-internal-inl.h"
-
-namespace {
-
-enum FailureType { NO_FAILURE, NON_FATAL_FAILURE, FATAL_FAILURE };
-
-// For testing using global test environments.
-class MyEnvironment : public testing::Environment {
- public:
-  MyEnvironment() { Reset(); }
-
-  // Depending on the value of failure_in_set_up_, SetUp() will
-  // generate a non-fatal failure, generate a fatal failure, or
-  // succeed.
-  void SetUp() override {
-    set_up_was_run_ = true;
-
-    switch (failure_in_set_up_) {
-      case NON_FATAL_FAILURE:
-        ADD_FAILURE() << "Expected non-fatal failure in global set-up.";
-        break;
-      case FATAL_FAILURE:
-        FAIL() << "Expected fatal failure in global set-up.";
-        break;
-      default:
-        break;
-    }
-  }
-
-  // Generates a non-fatal failure.
-  void TearDown() override {
-    tear_down_was_run_ = true;
-    ADD_FAILURE() << "Expected non-fatal failure in global tear-down.";
-  }
-
-  // Resets the state of the environment s.t. it can be reused.
-  void Reset() {
-    failure_in_set_up_ = NO_FAILURE;
-    set_up_was_run_ = false;
-    tear_down_was_run_ = false;
-  }
-
-  // We call this function to set the type of failure SetUp() should
-  // generate.
-  void set_failure_in_set_up(FailureType type) { failure_in_set_up_ = type; }
-
-  // Was SetUp() run?
-  bool set_up_was_run() const { return set_up_was_run_; }
-
-  // Was TearDown() run?
-  bool tear_down_was_run() const { return tear_down_was_run_; }
-
- private:
-  FailureType failure_in_set_up_;
-  bool set_up_was_run_;
-  bool tear_down_was_run_;
-};
-
-// Was the TEST run?
-bool test_was_run;
-
-// The sole purpose of this TEST is to enable us to check whether it
-// was run.
-TEST(FooTest, Bar) { test_was_run = true; }
-
-// Prints the message and aborts the program if condition is false.
-void Check(bool condition, const char* msg) {
-  if (!condition) {
-    printf("FAILED: %s\n", msg);
-    testing::internal::posix::Abort();
-  }
-}
-
-// Runs the tests.  Return true if and only if successful.
-//
-// The 'failure' parameter specifies the type of failure that should
-// be generated by the global set-up.
-int RunAllTests(MyEnvironment* env, FailureType failure) {
-  env->Reset();
-  env->set_failure_in_set_up(failure);
-  test_was_run = false;
-  testing::internal::GetUnitTestImpl()->ClearAdHocTestResult();
-  return RUN_ALL_TESTS();
-}
-
-}  // namespace
-
-int main(int argc, char** argv) {
-  testing::InitGoogleTest(&argc, argv);
-
-  // Registers a global test environment, and verifies that the
-  // registration function returns its argument.
-  MyEnvironment* const env = new MyEnvironment;
-  Check(testing::AddGlobalTestEnvironment(env) == env,
-        "AddGlobalTestEnvironment() should return its argument.");
-
-  // Verifies that RUN_ALL_TESTS() runs the tests when the global
-  // set-up is successful.
-  Check(RunAllTests(env, NO_FAILURE) != 0,
-        "RUN_ALL_TESTS() should return non-zero, as the global tear-down "
-        "should generate a failure.");
-  Check(test_was_run,
-        "The tests should run, as the global set-up should generate no "
-        "failure");
-  Check(env->tear_down_was_run(),
-        "The global tear-down should run, as the global set-up was run.");
-
-  // Verifies that RUN_ALL_TESTS() runs the tests when the global
-  // set-up generates no fatal failure.
-  Check(RunAllTests(env, NON_FATAL_FAILURE) != 0,
-        "RUN_ALL_TESTS() should return non-zero, as both the global set-up "
-        "and the global tear-down should generate a non-fatal failure.");
-  Check(test_was_run,
-        "The tests should run, as the global set-up should generate no "
-        "fatal failure.");
-  Check(env->tear_down_was_run(),
-        "The global tear-down should run, as the global set-up was run.");
-
-  // Verifies that RUN_ALL_TESTS() runs no test when the global set-up
-  // generates a fatal failure.
-  Check(RunAllTests(env, FATAL_FAILURE) != 0,
-        "RUN_ALL_TESTS() should return non-zero, as the global set-up "
-        "should generate a fatal failure.");
-  Check(!test_was_run,
-        "The tests should not run, as the global set-up should generate "
-        "a fatal failure.");
-  Check(env->tear_down_was_run(),
-        "The global tear-down should run, as the global set-up was run.");
-
-  // Verifies that RUN_ALL_TESTS() doesn't do global set-up or
-  // tear-down when there is no test to run.
-  GTEST_FLAG_SET(filter, "-*");
-  Check(RunAllTests(env, NO_FAILURE) == 0,
-        "RUN_ALL_TESTS() should return zero, as there is no test to run.");
-  Check(!env->set_up_was_run(),
-        "The global set-up should not run, as there is no test to run.");
-  Check(!env->tear_down_was_run(),
-        "The global tear-down should not run, "
-        "as the global set-up was not run.");
-
-  printf("PASS\n");
-  return 0;
-}
diff --git a/3rdparty/googletest-1.13.0/googletest/test/gtest_help_test.py b/3rdparty/googletest-1.13.0/googletest/test/gtest_help_test.py
deleted file mode 100644
index 642ab865062b02aeef713867c6f49ab745e23210..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/test/gtest_help_test.py
+++ /dev/null
@@ -1,189 +0,0 @@
-#!/usr/bin/env python
-#
-# Copyright 2009, Google Inc.
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met:
-#
-#     * Redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer.
-#     * Redistributions in binary form must reproduce the above
-# copyright notice, this list of conditions and the following disclaimer
-# in the documentation and/or other materials provided with the
-# distribution.
-#     * Neither the name of Google Inc. nor the names of its
-# contributors may be used to endorse or promote products derived from
-# this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-"""Tests the --help flag of Google C++ Testing and Mocking Framework.
-
-SYNOPSIS
-       gtest_help_test.py --build_dir=BUILD/DIR
-         # where BUILD/DIR contains the built gtest_help_test_ file.
-       gtest_help_test.py
-"""
-
-import os
-import re
-import sys
-from googletest.test import gtest_test_utils
-
-
-IS_LINUX = os.name == 'posix' and os.uname()[0] == 'Linux'
-IS_GNUHURD = os.name == 'posix' and os.uname()[0] == 'GNU'
-IS_GNUKFREEBSD = os.name == 'posix' and os.uname()[0] == 'GNU/kFreeBSD'
-IS_OPENBSD = os.name == 'posix' and os.uname()[0] == 'OpenBSD'
-IS_WINDOWS = os.name == 'nt'
-
-PROGRAM_PATH = gtest_test_utils.GetTestExecutablePath('gtest_help_test_')
-FLAG_PREFIX = '--gtest_'
-DEATH_TEST_STYLE_FLAG = FLAG_PREFIX + 'death_test_style'
-STREAM_RESULT_TO_FLAG = FLAG_PREFIX + 'stream_result_to'
-UNKNOWN_GTEST_PREFIXED_FLAG = FLAG_PREFIX + 'unknown_flag_for_testing'
-LIST_TESTS_FLAG = FLAG_PREFIX + 'list_tests'
-INTERNAL_FLAG_FOR_TESTING = FLAG_PREFIX + 'internal_flag_for_testing'
-
-SUPPORTS_DEATH_TESTS = "DeathTest" in gtest_test_utils.Subprocess(
-    [PROGRAM_PATH, LIST_TESTS_FLAG]).output
-
-HAS_ABSL_FLAGS = '--has_absl_flags' in sys.argv
-
-# The help message must match this regex.
-HELP_REGEX = re.compile(
-    FLAG_PREFIX + r'list_tests.*' +
-    FLAG_PREFIX + r'filter=.*' +
-    FLAG_PREFIX + r'also_run_disabled_tests.*' +
-    FLAG_PREFIX + r'repeat=.*' +
-    FLAG_PREFIX + r'shuffle.*' +
-    FLAG_PREFIX + r'random_seed=.*' +
-    FLAG_PREFIX + r'color=.*' +
-    FLAG_PREFIX + r'brief.*' +
-    FLAG_PREFIX + r'print_time.*' +
-    FLAG_PREFIX + r'output=.*' +
-    FLAG_PREFIX + r'break_on_failure.*' +
-    FLAG_PREFIX + r'throw_on_failure.*' +
-    FLAG_PREFIX + r'catch_exceptions=0.*',
-    re.DOTALL)
-
-
-def RunWithFlag(flag):
-  """Runs gtest_help_test_ with the given flag.
-
-  Returns:
-    the exit code and the text output as a tuple.
-  Args:
-    flag: the command-line flag to pass to gtest_help_test_, or None.
-  """
-
-  if flag is None:
-    command = [PROGRAM_PATH]
-  else:
-    command = [PROGRAM_PATH, flag]
-  child = gtest_test_utils.Subprocess(command)
-  return child.exit_code, child.output
-
-
-class GTestHelpTest(gtest_test_utils.TestCase):
-  """Tests the --help flag and its equivalent forms."""
-
-  def TestHelpFlag(self, flag):
-    """Verifies correct behavior when help flag is specified.
-
-    The right message must be printed and the tests must
-    skipped when the given flag is specified.
-
-    Args:
-      flag:  A flag to pass to the binary or None.
-    """
-
-    exit_code, output = RunWithFlag(flag)
-    if HAS_ABSL_FLAGS:
-      # The Abseil flags library prints the ProgramUsageMessage() with
-      # --help and returns 1.
-      self.assertEqual(1, exit_code)
-    else:
-      self.assertEqual(0, exit_code)
-
-    self.assertTrue(HELP_REGEX.search(output), output)
-
-    if IS_LINUX or IS_GNUHURD or IS_GNUKFREEBSD or IS_OPENBSD:
-      self.assertIn(STREAM_RESULT_TO_FLAG, output)
-    else:
-      self.assertNotIn(STREAM_RESULT_TO_FLAG, output)
-
-    if SUPPORTS_DEATH_TESTS and not IS_WINDOWS:
-      self.assertIn(DEATH_TEST_STYLE_FLAG, output)
-    else:
-      self.assertNotIn(DEATH_TEST_STYLE_FLAG, output)
-
-  def TestUnknownFlagWithAbseil(self, flag):
-    """Verifies correct behavior when an unknown flag is specified.
-
-    The right message must be printed and the tests must
-    skipped when the given flag is specified.
-
-    Args:
-      flag:  A flag to pass to the binary or None.
-    """
-    exit_code, output = RunWithFlag(flag)
-    self.assertEqual(1, exit_code)
-    self.assertIn('ERROR: Unknown command line flag', output)
-
-  def TestNonHelpFlag(self, flag):
-    """Verifies correct behavior when no help flag is specified.
-
-    Verifies that when no help flag is specified, the tests are run
-    and the help message is not printed.
-
-    Args:
-      flag:  A flag to pass to the binary or None.
-    """
-
-    exit_code, output = RunWithFlag(flag)
-    self.assertNotEqual(exit_code, 0)
-    self.assertFalse(HELP_REGEX.search(output), output)
-
-  def testPrintsHelpWithFullFlag(self):
-    self.TestHelpFlag('--help')
-
-  def testPrintsHelpWithUnrecognizedGoogleTestFlag(self):
-    # The behavior is slightly different when Abseil flags is
-    # used. Abseil flags rejects all unknown flags, while the builtin
-    # GTest flags implementation interprets an unknown flag with a
-    # '--gtest_' prefix as a request for help.
-    if HAS_ABSL_FLAGS:
-      self.TestUnknownFlagWithAbseil(UNKNOWN_GTEST_PREFIXED_FLAG)
-    else:
-      self.TestHelpFlag(UNKNOWN_GTEST_PREFIXED_FLAG)
-
-  def testRunsTestsWithoutHelpFlag(self):
-    """Verifies that when no help flag is specified, the tests are run
-    and the help message is not printed."""
-
-    self.TestNonHelpFlag(None)
-
-  def testRunsTestsWithGtestInternalFlag(self):
-    """Verifies that the tests are run and no help message is printed when
-    a flag starting with Google Test prefix and 'internal_' is supplied."""
-
-    self.TestNonHelpFlag(INTERNAL_FLAG_FOR_TESTING)
-
-
-if __name__ == '__main__':
-  if '--has_absl_flags' in sys.argv:
-    sys.argv.remove('--has_absl_flags')
-  gtest_test_utils.Main()
diff --git a/3rdparty/googletest-1.13.0/googletest/test/gtest_help_test_.cc b/3rdparty/googletest-1.13.0/googletest/test/gtest_help_test_.cc
deleted file mode 100644
index da289f05f3f4189282e8a2253c0a28d59d2c308a..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/test/gtest_help_test_.cc
+++ /dev/null
@@ -1,44 +0,0 @@
-// Copyright 2009, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// This program is meant to be run by gtest_help_test.py.  Do not run
-// it directly.
-
-#include "gtest/gtest.h"
-
-// When a help flag is specified, this program should skip the tests
-// and exit with 0; otherwise the following test will be executed,
-// causing this program to exit with a non-zero code.
-TEST(HelpFlagTest, ShouldNotBeRun) {
-  ASSERT_TRUE(false) << "Tests shouldn't be run when --help is specified.";
-}
-
-#if GTEST_HAS_DEATH_TEST
-TEST(DeathTest, UsedByPythonScriptToDetectSupportForDeathTestsInThisBinary) {}
-#endif
diff --git a/3rdparty/googletest-1.13.0/googletest/test/gtest_json_test_utils.py b/3rdparty/googletest-1.13.0/googletest/test/gtest_json_test_utils.py
deleted file mode 100644
index f62896c9234b6c9f173798bb07702235537e1515..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/test/gtest_json_test_utils.py
+++ /dev/null
@@ -1,62 +0,0 @@
-# Copyright 2018, Google Inc.
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met:
-#
-#     * Redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer.
-#     * Redistributions in binary form must reproduce the above
-# copyright notice, this list of conditions and the following disclaimer
-# in the documentation and/or other materials provided with the
-# distribution.
-#     * Neither the name of Google Inc. nor the names of its
-# contributors may be used to endorse or promote products derived from
-# this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-"""Unit test utilities for gtest_json_output."""
-
-import re
-
-
-def normalize(obj):
-  """Normalize output object.
-
-  Args:
-     obj: Google Test's JSON output object to normalize.
-
-  Returns:
-     Normalized output without any references to transient information that may
-     change from run to run.
-  """
-  def _normalize(key, value):
-    if key == 'time':
-      return re.sub(r'^\d+(\.\d+)?s$', '*', value)
-    elif key == 'timestamp':
-      return re.sub(r'^\d{4}-\d\d-\d\dT\d\d:\d\d:\d\dZ$', '*', value)
-    elif key == 'failure':
-      value = re.sub(r'^.*[/\\](.*:)\d+\n', '\\1*\n', value)
-      return re.sub(r'Stack trace:\n(.|\n)*', 'Stack trace:\n*', value)
-    elif key == 'file':
-      return re.sub(r'^.*[/\\](.*)', '\\1', value)
-    else:
-      return normalize(value)
-  if isinstance(obj, dict):
-    return {k: _normalize(k, v) for k, v in obj.items()}
-  if isinstance(obj, list):
-    return [normalize(x) for x in obj]
-  else:
-    return obj
diff --git a/3rdparty/googletest-1.13.0/googletest/test/gtest_list_output_unittest.py b/3rdparty/googletest-1.13.0/googletest/test/gtest_list_output_unittest.py
deleted file mode 100644
index faacf103c342b8e68c5ed4ba6bb03c018671bd2d..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/test/gtest_list_output_unittest.py
+++ /dev/null
@@ -1,286 +0,0 @@
-#!/usr/bin/env python
-#
-# Copyright 2006, Google Inc.
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met:
-#
-#     * Redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer.
-#     * Redistributions in binary form must reproduce the above
-# copyright notice, this list of conditions and the following disclaimer
-# in the documentation and/or other materials provided with the
-# distribution.
-#     * Neither the name of Google Inc. nor the names of its
-# contributors may be used to endorse or promote products derived from
-# this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-"""Unit test for Google Test's --gtest_list_tests flag.
-
-A user can ask Google Test to list all tests by specifying the
---gtest_list_tests flag. If output is requested, via --gtest_output=xml
-or --gtest_output=json, the tests are listed, with extra information in the
-output file.
-This script tests such functionality by invoking gtest_list_output_unittest_
- (a program written with Google Test) the command line flags.
-"""
-
-import os
-import re
-from googletest.test import gtest_test_utils
-
-GTEST_LIST_TESTS_FLAG = '--gtest_list_tests'
-GTEST_OUTPUT_FLAG = '--gtest_output'
-
-EXPECTED_XML = """<\?xml version="1.0" encoding="UTF-8"\?>
-<testsuites tests="16" name="AllTests">
-  <testsuite name="FooTest" tests="2">
-    <testcase name="Test1" file=".*gtest_list_output_unittest_.cc" line="43" />
-    <testcase name="Test2" file=".*gtest_list_output_unittest_.cc" line="45" />
-  </testsuite>
-  <testsuite name="FooTestFixture" tests="2">
-    <testcase name="Test3" file=".*gtest_list_output_unittest_.cc" line="48" />
-    <testcase name="Test4" file=".*gtest_list_output_unittest_.cc" line="49" />
-  </testsuite>
-  <testsuite name="TypedTest/0" tests="2">
-    <testcase name="Test7" type_param="int" file=".*gtest_list_output_unittest_.cc" line="60" />
-    <testcase name="Test8" type_param="int" file=".*gtest_list_output_unittest_.cc" line="61" />
-  </testsuite>
-  <testsuite name="TypedTest/1" tests="2">
-    <testcase name="Test7" type_param="bool" file=".*gtest_list_output_unittest_.cc" line="60" />
-    <testcase name="Test8" type_param="bool" file=".*gtest_list_output_unittest_.cc" line="61" />
-  </testsuite>
-  <testsuite name="Single/TypeParameterizedTestSuite/0" tests="2">
-    <testcase name="Test9" type_param="int" file=".*gtest_list_output_unittest_.cc" line="66" />
-    <testcase name="Test10" type_param="int" file=".*gtest_list_output_unittest_.cc" line="67" />
-  </testsuite>
-  <testsuite name="Single/TypeParameterizedTestSuite/1" tests="2">
-    <testcase name="Test9" type_param="bool" file=".*gtest_list_output_unittest_.cc" line="66" />
-    <testcase name="Test10" type_param="bool" file=".*gtest_list_output_unittest_.cc" line="67" />
-  </testsuite>
-  <testsuite name="ValueParam/ValueParamTest" tests="4">
-    <testcase name="Test5/0" value_param="33" file=".*gtest_list_output_unittest_.cc" line="52" />
-    <testcase name="Test5/1" value_param="42" file=".*gtest_list_output_unittest_.cc" line="52" />
-    <testcase name="Test6/0" value_param="33" file=".*gtest_list_output_unittest_.cc" line="53" />
-    <testcase name="Test6/1" value_param="42" file=".*gtest_list_output_unittest_.cc" line="53" />
-  </testsuite>
-</testsuites>
-"""
-
-EXPECTED_JSON = """{
-  "tests": 16,
-  "name": "AllTests",
-  "testsuites": \[
-    {
-      "name": "FooTest",
-      "tests": 2,
-      "testsuite": \[
-        {
-          "name": "Test1",
-          "file": ".*gtest_list_output_unittest_.cc",
-          "line": 43
-        },
-        {
-          "name": "Test2",
-          "file": ".*gtest_list_output_unittest_.cc",
-          "line": 45
-        }
-      \]
-    },
-    {
-      "name": "FooTestFixture",
-      "tests": 2,
-      "testsuite": \[
-        {
-          "name": "Test3",
-          "file": ".*gtest_list_output_unittest_.cc",
-          "line": 48
-        },
-        {
-          "name": "Test4",
-          "file": ".*gtest_list_output_unittest_.cc",
-          "line": 49
-        }
-      \]
-    },
-    {
-      "name": "TypedTest\\\\/0",
-      "tests": 2,
-      "testsuite": \[
-        {
-          "name": "Test7",
-          "type_param": "int",
-          "file": ".*gtest_list_output_unittest_.cc",
-          "line": 60
-        },
-        {
-          "name": "Test8",
-          "type_param": "int",
-          "file": ".*gtest_list_output_unittest_.cc",
-          "line": 61
-        }
-      \]
-    },
-    {
-      "name": "TypedTest\\\\/1",
-      "tests": 2,
-      "testsuite": \[
-        {
-          "name": "Test7",
-          "type_param": "bool",
-          "file": ".*gtest_list_output_unittest_.cc",
-          "line": 60
-        },
-        {
-          "name": "Test8",
-          "type_param": "bool",
-          "file": ".*gtest_list_output_unittest_.cc",
-          "line": 61
-        }
-      \]
-    },
-    {
-      "name": "Single\\\\/TypeParameterizedTestSuite\\\\/0",
-      "tests": 2,
-      "testsuite": \[
-        {
-          "name": "Test9",
-          "type_param": "int",
-          "file": ".*gtest_list_output_unittest_.cc",
-          "line": 66
-        },
-        {
-          "name": "Test10",
-          "type_param": "int",
-          "file": ".*gtest_list_output_unittest_.cc",
-          "line": 67
-        }
-      \]
-    },
-    {
-      "name": "Single\\\\/TypeParameterizedTestSuite\\\\/1",
-      "tests": 2,
-      "testsuite": \[
-        {
-          "name": "Test9",
-          "type_param": "bool",
-          "file": ".*gtest_list_output_unittest_.cc",
-          "line": 66
-        },
-        {
-          "name": "Test10",
-          "type_param": "bool",
-          "file": ".*gtest_list_output_unittest_.cc",
-          "line": 67
-        }
-      \]
-    },
-    {
-      "name": "ValueParam\\\\/ValueParamTest",
-      "tests": 4,
-      "testsuite": \[
-        {
-          "name": "Test5\\\\/0",
-          "value_param": "33",
-          "file": ".*gtest_list_output_unittest_.cc",
-          "line": 52
-        },
-        {
-          "name": "Test5\\\\/1",
-          "value_param": "42",
-          "file": ".*gtest_list_output_unittest_.cc",
-          "line": 52
-        },
-        {
-          "name": "Test6\\\\/0",
-          "value_param": "33",
-          "file": ".*gtest_list_output_unittest_.cc",
-          "line": 53
-        },
-        {
-          "name": "Test6\\\\/1",
-          "value_param": "42",
-          "file": ".*gtest_list_output_unittest_.cc",
-          "line": 53
-        }
-      \]
-    }
-  \]
-}
-"""
-
-
-class GTestListTestsOutputUnitTest(gtest_test_utils.TestCase):
-  """Unit test for Google Test's list tests with output to file functionality.
-  """
-
-  def testXml(self):
-    """Verifies XML output for listing tests in a Google Test binary.
-
-    Runs a test program that generates an empty XML output, and
-    tests that the XML output is expected.
-    """
-    self._TestOutput('xml', EXPECTED_XML)
-
-  def testJSON(self):
-    """Verifies XML output for listing tests in a Google Test binary.
-
-    Runs a test program that generates an empty XML output, and
-    tests that the XML output is expected.
-    """
-    self._TestOutput('json', EXPECTED_JSON)
-
-  def _GetOutput(self, out_format):
-    file_path = os.path.join(gtest_test_utils.GetTempDir(),
-                             'test_out.' + out_format)
-    gtest_prog_path = gtest_test_utils.GetTestExecutablePath(
-        'gtest_list_output_unittest_')
-
-    command = ([
-        gtest_prog_path,
-        '%s=%s:%s' % (GTEST_OUTPUT_FLAG, out_format, file_path),
-        '--gtest_list_tests'
-    ])
-    environ_copy = os.environ.copy()
-    p = gtest_test_utils.Subprocess(
-        command, env=environ_copy, working_dir=gtest_test_utils.GetTempDir())
-
-    self.assertTrue(p.exited)
-    self.assertEqual(0, p.exit_code)
-    self.assertTrue(os.path.isfile(file_path))
-    with open(file_path) as f:
-      result = f.read()
-    return result
-
-  def _TestOutput(self, test_format, expected_output):
-    actual = self._GetOutput(test_format)
-    actual_lines = actual.splitlines()
-    expected_lines = expected_output.splitlines()
-    line_count = 0
-    for actual_line in actual_lines:
-      expected_line = expected_lines[line_count]
-      expected_line_re = re.compile(expected_line.strip())
-      self.assertTrue(
-          expected_line_re.match(actual_line.strip()),
-          ('actual output of "%s",\n'
-           'which does not match expected regex of "%s"\n'
-           'on line %d' % (actual, expected_output, line_count)))
-      line_count = line_count + 1
-
-
-if __name__ == '__main__':
-  os.environ['GTEST_STACK_TRACE_DEPTH'] = '1'
-  gtest_test_utils.Main()
diff --git a/3rdparty/googletest-1.13.0/googletest/test/gtest_list_output_unittest_.cc b/3rdparty/googletest-1.13.0/googletest/test/gtest_list_output_unittest_.cc
deleted file mode 100644
index 92b9d4f28eec03a5fea75396742ed6884826f0f5..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/test/gtest_list_output_unittest_.cc
+++ /dev/null
@@ -1,77 +0,0 @@
-// Copyright 2018, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-// Author: david.schuldenfrei@gmail.com (David Schuldenfrei)
-
-// Unit test for Google Test's --gtest_list_tests and --gtest_output flag.
-//
-// A user can ask Google Test to list all tests that will run,
-// and have the output saved in a Json/Xml file.
-// The tests will not be run after listing.
-//
-// This program will be invoked from a Python unit test.
-// Don't run it directly.
-
-#include "gtest/gtest.h"
-
-TEST(FooTest, Test1) {}
-
-TEST(FooTest, Test2) {}
-
-class FooTestFixture : public ::testing::Test {};
-TEST_F(FooTestFixture, Test3) {}
-TEST_F(FooTestFixture, Test4) {}
-
-class ValueParamTest : public ::testing::TestWithParam<int> {};
-TEST_P(ValueParamTest, Test5) {}
-TEST_P(ValueParamTest, Test6) {}
-INSTANTIATE_TEST_SUITE_P(ValueParam, ValueParamTest, ::testing::Values(33, 42));
-
-template <typename T>
-class TypedTest : public ::testing::Test {};
-typedef testing::Types<int, bool> TypedTestTypes;
-TYPED_TEST_SUITE(TypedTest, TypedTestTypes);
-TYPED_TEST(TypedTest, Test7) {}
-TYPED_TEST(TypedTest, Test8) {}
-
-template <typename T>
-class TypeParameterizedTestSuite : public ::testing::Test {};
-TYPED_TEST_SUITE_P(TypeParameterizedTestSuite);
-TYPED_TEST_P(TypeParameterizedTestSuite, Test9) {}
-TYPED_TEST_P(TypeParameterizedTestSuite, Test10) {}
-REGISTER_TYPED_TEST_SUITE_P(TypeParameterizedTestSuite, Test9, Test10);
-typedef testing::Types<int, bool> TypeParameterizedTestSuiteTypes;  // NOLINT
-INSTANTIATE_TYPED_TEST_SUITE_P(Single, TypeParameterizedTestSuite,
-                               TypeParameterizedTestSuiteTypes);
-
-int main(int argc, char **argv) {
-  ::testing::InitGoogleTest(&argc, argv);
-
-  return RUN_ALL_TESTS();
-}
diff --git a/3rdparty/googletest-1.13.0/googletest/test/gtest_main_unittest.cc b/3rdparty/googletest-1.13.0/googletest/test/gtest_main_unittest.cc
deleted file mode 100644
index 29cd5510a4b1b85216db5e08fc78636ed551155a..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/test/gtest_main_unittest.cc
+++ /dev/null
@@ -1,42 +0,0 @@
-// Copyright 2006, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "gtest/gtest.h"
-
-// Tests that we don't have to define main() when we link to
-// gtest_main instead of gtest.
-
-namespace {
-
-TEST(GTestMainTest, ShouldSucceed) {}
-
-}  // namespace
-
-// We are using the main() function defined in gtest_main.cc, so we
-// don't define it here.
diff --git a/3rdparty/googletest-1.13.0/googletest/test/gtest_no_test_unittest.cc b/3rdparty/googletest-1.13.0/googletest/test/gtest_no_test_unittest.cc
deleted file mode 100644
index d4f88dbfdfa68e6a0c40bb99fac6e4dbccff8f3b..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/test/gtest_no_test_unittest.cc
+++ /dev/null
@@ -1,54 +0,0 @@
-// Copyright 2006, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// Tests that a Google Test program that has no test defined can run
-// successfully.
-
-#include "gtest/gtest.h"
-
-int main(int argc, char **argv) {
-  testing::InitGoogleTest(&argc, argv);
-
-  // An ad-hoc assertion outside of all tests.
-  //
-  // This serves three purposes:
-  //
-  // 1. It verifies that an ad-hoc assertion can be executed even if
-  //    no test is defined.
-  // 2. It verifies that a failed ad-hoc assertion causes the test
-  //    program to fail.
-  // 3. We had a bug where the XML output won't be generated if an
-  //    assertion is executed before RUN_ALL_TESTS() is called, even
-  //    though --gtest_output=xml is specified.  This makes sure the
-  //    bug is fixed and doesn't regress.
-  EXPECT_EQ(1, 2);
-
-  // The above EXPECT_EQ() should cause RUN_ALL_TESTS() to return non-zero.
-  return RUN_ALL_TESTS() ? 0 : 1;
-}
diff --git a/3rdparty/googletest-1.13.0/googletest/test/gtest_pred_impl_unittest.cc b/3rdparty/googletest-1.13.0/googletest/test/gtest_pred_impl_unittest.cc
deleted file mode 100644
index 033e2d9f6519489abd962dcbe37be40916698370..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/test/gtest_pred_impl_unittest.cc
+++ /dev/null
@@ -1,2070 +0,0 @@
-// Copyright 2006, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// Regression test for gtest_pred_impl.h
-//
-// This file is generated by a script and quite long.  If you intend to
-// learn how Google Test works by reading its unit tests, read
-// gtest_unittest.cc instead.
-//
-// This is intended as a regression test for the Google Test predicate
-// assertions.  We compile it as part of the gtest_unittest target
-// only to keep the implementation tidy and compact, as it is quite
-// involved to set up the stage for testing Google Test using Google
-// Test itself.
-//
-// Currently, gtest_unittest takes ~11 seconds to run in the testing
-// daemon.  In the future, if it grows too large and needs much more
-// time to finish, we should consider separating this file into a
-// stand-alone regression test.
-
-#include <iostream>
-#include <ostream>
-
-#include "gtest/gtest-spi.h"
-#include "gtest/gtest.h"
-
-// A user-defined data type.
-struct Bool {
-  explicit Bool(int val) : value(val != 0) {}
-
-  bool operator>(int n) const { return value > Bool(n).value; }
-
-  Bool operator+(const Bool& rhs) const { return Bool(value + rhs.value); }
-
-  bool operator==(const Bool& rhs) const { return value == rhs.value; }
-
-  bool value;
-};
-
-// Enables Bool to be used in assertions.
-std::ostream& operator<<(std::ostream& os, const Bool& x) {
-  return os << (x.value ? "true" : "false");
-}
-
-// Sample functions/functors for testing unary predicate assertions.
-
-// A unary predicate function.
-template <typename T1>
-bool PredFunction1(T1 v1) {
-  return v1 > 0;
-}
-
-// The following two functions are needed because a compiler doesn't have
-// a context yet to know which template function must be instantiated.
-bool PredFunction1Int(int v1) { return v1 > 0; }
-bool PredFunction1Bool(Bool v1) { return v1 > 0; }
-
-// A unary predicate functor.
-struct PredFunctor1 {
-  template <typename T1>
-  bool operator()(const T1& v1) {
-    return v1 > 0;
-  }
-};
-
-// A unary predicate-formatter function.
-template <typename T1>
-testing::AssertionResult PredFormatFunction1(const char* e1, const T1& v1) {
-  if (PredFunction1(v1)) return testing::AssertionSuccess();
-
-  return testing::AssertionFailure()
-         << e1 << " is expected to be positive, but evaluates to " << v1 << ".";
-}
-
-// A unary predicate-formatter functor.
-struct PredFormatFunctor1 {
-  template <typename T1>
-  testing::AssertionResult operator()(const char* e1, const T1& v1) const {
-    return PredFormatFunction1(e1, v1);
-  }
-};
-
-// Tests for {EXPECT|ASSERT}_PRED_FORMAT1.
-
-class Predicate1Test : public testing::Test {
- protected:
-  void SetUp() override {
-    expected_to_finish_ = true;
-    finished_ = false;
-    n1_ = 0;
-  }
-
-  void TearDown() override {
-    // Verifies that each of the predicate's arguments was evaluated
-    // exactly once.
-    EXPECT_EQ(1, n1_) << "The predicate assertion didn't evaluate argument 2 "
-                         "exactly once.";
-
-    // Verifies that the control flow in the test function is expected.
-    if (expected_to_finish_ && !finished_) {
-      FAIL() << "The predicate assertion unexpectedly aborted the test.";
-    } else if (!expected_to_finish_ && finished_) {
-      FAIL() << "The failed predicate assertion didn't abort the test "
-                "as expected.";
-    }
-  }
-
-  // true if and only if the test function is expected to run to finish.
-  static bool expected_to_finish_;
-
-  // true if and only if the test function did run to finish.
-  static bool finished_;
-
-  static int n1_;
-};
-
-bool Predicate1Test::expected_to_finish_;
-bool Predicate1Test::finished_;
-int Predicate1Test::n1_;
-
-typedef Predicate1Test EXPECT_PRED_FORMAT1Test;
-typedef Predicate1Test ASSERT_PRED_FORMAT1Test;
-typedef Predicate1Test EXPECT_PRED1Test;
-typedef Predicate1Test ASSERT_PRED1Test;
-
-// Tests a successful EXPECT_PRED1 where the
-// predicate-formatter is a function on a built-in type (int).
-TEST_F(EXPECT_PRED1Test, FunctionOnBuiltInTypeSuccess) {
-  EXPECT_PRED1(PredFunction1Int, ++n1_);
-  finished_ = true;
-}
-
-// Tests a successful EXPECT_PRED1 where the
-// predicate-formatter is a function on a user-defined type (Bool).
-TEST_F(EXPECT_PRED1Test, FunctionOnUserTypeSuccess) {
-  EXPECT_PRED1(PredFunction1Bool, Bool(++n1_));
-  finished_ = true;
-}
-
-// Tests a successful EXPECT_PRED1 where the
-// predicate-formatter is a functor on a built-in type (int).
-TEST_F(EXPECT_PRED1Test, FunctorOnBuiltInTypeSuccess) {
-  EXPECT_PRED1(PredFunctor1(), ++n1_);
-  finished_ = true;
-}
-
-// Tests a successful EXPECT_PRED1 where the
-// predicate-formatter is a functor on a user-defined type (Bool).
-TEST_F(EXPECT_PRED1Test, FunctorOnUserTypeSuccess) {
-  EXPECT_PRED1(PredFunctor1(), Bool(++n1_));
-  finished_ = true;
-}
-
-// Tests a failed EXPECT_PRED1 where the
-// predicate-formatter is a function on a built-in type (int).
-TEST_F(EXPECT_PRED1Test, FunctionOnBuiltInTypeFailure) {
-  EXPECT_NONFATAL_FAILURE(
-      {  // NOLINT
-        EXPECT_PRED1(PredFunction1Int, n1_++);
-        finished_ = true;
-      },
-      "");
-}
-
-// Tests a failed EXPECT_PRED1 where the
-// predicate-formatter is a function on a user-defined type (Bool).
-TEST_F(EXPECT_PRED1Test, FunctionOnUserTypeFailure) {
-  EXPECT_NONFATAL_FAILURE(
-      {  // NOLINT
-        EXPECT_PRED1(PredFunction1Bool, Bool(n1_++));
-        finished_ = true;
-      },
-      "");
-}
-
-// Tests a failed EXPECT_PRED1 where the
-// predicate-formatter is a functor on a built-in type (int).
-TEST_F(EXPECT_PRED1Test, FunctorOnBuiltInTypeFailure) {
-  EXPECT_NONFATAL_FAILURE(
-      {  // NOLINT
-        EXPECT_PRED1(PredFunctor1(), n1_++);
-        finished_ = true;
-      },
-      "");
-}
-
-// Tests a failed EXPECT_PRED1 where the
-// predicate-formatter is a functor on a user-defined type (Bool).
-TEST_F(EXPECT_PRED1Test, FunctorOnUserTypeFailure) {
-  EXPECT_NONFATAL_FAILURE(
-      {  // NOLINT
-        EXPECT_PRED1(PredFunctor1(), Bool(n1_++));
-        finished_ = true;
-      },
-      "");
-}
-
-// Tests a successful ASSERT_PRED1 where the
-// predicate-formatter is a function on a built-in type (int).
-TEST_F(ASSERT_PRED1Test, FunctionOnBuiltInTypeSuccess) {
-  ASSERT_PRED1(PredFunction1Int, ++n1_);
-  finished_ = true;
-}
-
-// Tests a successful ASSERT_PRED1 where the
-// predicate-formatter is a function on a user-defined type (Bool).
-TEST_F(ASSERT_PRED1Test, FunctionOnUserTypeSuccess) {
-  ASSERT_PRED1(PredFunction1Bool, Bool(++n1_));
-  finished_ = true;
-}
-
-// Tests a successful ASSERT_PRED1 where the
-// predicate-formatter is a functor on a built-in type (int).
-TEST_F(ASSERT_PRED1Test, FunctorOnBuiltInTypeSuccess) {
-  ASSERT_PRED1(PredFunctor1(), ++n1_);
-  finished_ = true;
-}
-
-// Tests a successful ASSERT_PRED1 where the
-// predicate-formatter is a functor on a user-defined type (Bool).
-TEST_F(ASSERT_PRED1Test, FunctorOnUserTypeSuccess) {
-  ASSERT_PRED1(PredFunctor1(), Bool(++n1_));
-  finished_ = true;
-}
-
-// Tests a failed ASSERT_PRED1 where the
-// predicate-formatter is a function on a built-in type (int).
-TEST_F(ASSERT_PRED1Test, FunctionOnBuiltInTypeFailure) {
-  expected_to_finish_ = false;
-  EXPECT_FATAL_FAILURE(
-      {  // NOLINT
-        ASSERT_PRED1(PredFunction1Int, n1_++);
-        finished_ = true;
-      },
-      "");
-}
-
-// Tests a failed ASSERT_PRED1 where the
-// predicate-formatter is a function on a user-defined type (Bool).
-TEST_F(ASSERT_PRED1Test, FunctionOnUserTypeFailure) {
-  expected_to_finish_ = false;
-  EXPECT_FATAL_FAILURE(
-      {  // NOLINT
-        ASSERT_PRED1(PredFunction1Bool, Bool(n1_++));
-        finished_ = true;
-      },
-      "");
-}
-
-// Tests a failed ASSERT_PRED1 where the
-// predicate-formatter is a functor on a built-in type (int).
-TEST_F(ASSERT_PRED1Test, FunctorOnBuiltInTypeFailure) {
-  expected_to_finish_ = false;
-  EXPECT_FATAL_FAILURE(
-      {  // NOLINT
-        ASSERT_PRED1(PredFunctor1(), n1_++);
-        finished_ = true;
-      },
-      "");
-}
-
-// Tests a failed ASSERT_PRED1 where the
-// predicate-formatter is a functor on a user-defined type (Bool).
-TEST_F(ASSERT_PRED1Test, FunctorOnUserTypeFailure) {
-  expected_to_finish_ = false;
-  EXPECT_FATAL_FAILURE(
-      {  // NOLINT
-        ASSERT_PRED1(PredFunctor1(), Bool(n1_++));
-        finished_ = true;
-      },
-      "");
-}
-
-// Tests a successful EXPECT_PRED_FORMAT1 where the
-// predicate-formatter is a function on a built-in type (int).
-TEST_F(EXPECT_PRED_FORMAT1Test, FunctionOnBuiltInTypeSuccess) {
-  EXPECT_PRED_FORMAT1(PredFormatFunction1, ++n1_);
-  finished_ = true;
-}
-
-// Tests a successful EXPECT_PRED_FORMAT1 where the
-// predicate-formatter is a function on a user-defined type (Bool).
-TEST_F(EXPECT_PRED_FORMAT1Test, FunctionOnUserTypeSuccess) {
-  EXPECT_PRED_FORMAT1(PredFormatFunction1, Bool(++n1_));
-  finished_ = true;
-}
-
-// Tests a successful EXPECT_PRED_FORMAT1 where the
-// predicate-formatter is a functor on a built-in type (int).
-TEST_F(EXPECT_PRED_FORMAT1Test, FunctorOnBuiltInTypeSuccess) {
-  EXPECT_PRED_FORMAT1(PredFormatFunctor1(), ++n1_);
-  finished_ = true;
-}
-
-// Tests a successful EXPECT_PRED_FORMAT1 where the
-// predicate-formatter is a functor on a user-defined type (Bool).
-TEST_F(EXPECT_PRED_FORMAT1Test, FunctorOnUserTypeSuccess) {
-  EXPECT_PRED_FORMAT1(PredFormatFunctor1(), Bool(++n1_));
-  finished_ = true;
-}
-
-// Tests a failed EXPECT_PRED_FORMAT1 where the
-// predicate-formatter is a function on a built-in type (int).
-TEST_F(EXPECT_PRED_FORMAT1Test, FunctionOnBuiltInTypeFailure) {
-  EXPECT_NONFATAL_FAILURE(
-      {  // NOLINT
-        EXPECT_PRED_FORMAT1(PredFormatFunction1, n1_++);
-        finished_ = true;
-      },
-      "");
-}
-
-// Tests a failed EXPECT_PRED_FORMAT1 where the
-// predicate-formatter is a function on a user-defined type (Bool).
-TEST_F(EXPECT_PRED_FORMAT1Test, FunctionOnUserTypeFailure) {
-  EXPECT_NONFATAL_FAILURE(
-      {  // NOLINT
-        EXPECT_PRED_FORMAT1(PredFormatFunction1, Bool(n1_++));
-        finished_ = true;
-      },
-      "");
-}
-
-// Tests a failed EXPECT_PRED_FORMAT1 where the
-// predicate-formatter is a functor on a built-in type (int).
-TEST_F(EXPECT_PRED_FORMAT1Test, FunctorOnBuiltInTypeFailure) {
-  EXPECT_NONFATAL_FAILURE(
-      {  // NOLINT
-        EXPECT_PRED_FORMAT1(PredFormatFunctor1(), n1_++);
-        finished_ = true;
-      },
-      "");
-}
-
-// Tests a failed EXPECT_PRED_FORMAT1 where the
-// predicate-formatter is a functor on a user-defined type (Bool).
-TEST_F(EXPECT_PRED_FORMAT1Test, FunctorOnUserTypeFailure) {
-  EXPECT_NONFATAL_FAILURE(
-      {  // NOLINT
-        EXPECT_PRED_FORMAT1(PredFormatFunctor1(), Bool(n1_++));
-        finished_ = true;
-      },
-      "");
-}
-
-// Tests a successful ASSERT_PRED_FORMAT1 where the
-// predicate-formatter is a function on a built-in type (int).
-TEST_F(ASSERT_PRED_FORMAT1Test, FunctionOnBuiltInTypeSuccess) {
-  ASSERT_PRED_FORMAT1(PredFormatFunction1, ++n1_);
-  finished_ = true;
-}
-
-// Tests a successful ASSERT_PRED_FORMAT1 where the
-// predicate-formatter is a function on a user-defined type (Bool).
-TEST_F(ASSERT_PRED_FORMAT1Test, FunctionOnUserTypeSuccess) {
-  ASSERT_PRED_FORMAT1(PredFormatFunction1, Bool(++n1_));
-  finished_ = true;
-}
-
-// Tests a successful ASSERT_PRED_FORMAT1 where the
-// predicate-formatter is a functor on a built-in type (int).
-TEST_F(ASSERT_PRED_FORMAT1Test, FunctorOnBuiltInTypeSuccess) {
-  ASSERT_PRED_FORMAT1(PredFormatFunctor1(), ++n1_);
-  finished_ = true;
-}
-
-// Tests a successful ASSERT_PRED_FORMAT1 where the
-// predicate-formatter is a functor on a user-defined type (Bool).
-TEST_F(ASSERT_PRED_FORMAT1Test, FunctorOnUserTypeSuccess) {
-  ASSERT_PRED_FORMAT1(PredFormatFunctor1(), Bool(++n1_));
-  finished_ = true;
-}
-
-// Tests a failed ASSERT_PRED_FORMAT1 where the
-// predicate-formatter is a function on a built-in type (int).
-TEST_F(ASSERT_PRED_FORMAT1Test, FunctionOnBuiltInTypeFailure) {
-  expected_to_finish_ = false;
-  EXPECT_FATAL_FAILURE(
-      {  // NOLINT
-        ASSERT_PRED_FORMAT1(PredFormatFunction1, n1_++);
-        finished_ = true;
-      },
-      "");
-}
-
-// Tests a failed ASSERT_PRED_FORMAT1 where the
-// predicate-formatter is a function on a user-defined type (Bool).
-TEST_F(ASSERT_PRED_FORMAT1Test, FunctionOnUserTypeFailure) {
-  expected_to_finish_ = false;
-  EXPECT_FATAL_FAILURE(
-      {  // NOLINT
-        ASSERT_PRED_FORMAT1(PredFormatFunction1, Bool(n1_++));
-        finished_ = true;
-      },
-      "");
-}
-
-// Tests a failed ASSERT_PRED_FORMAT1 where the
-// predicate-formatter is a functor on a built-in type (int).
-TEST_F(ASSERT_PRED_FORMAT1Test, FunctorOnBuiltInTypeFailure) {
-  expected_to_finish_ = false;
-  EXPECT_FATAL_FAILURE(
-      {  // NOLINT
-        ASSERT_PRED_FORMAT1(PredFormatFunctor1(), n1_++);
-        finished_ = true;
-      },
-      "");
-}
-
-// Tests a failed ASSERT_PRED_FORMAT1 where the
-// predicate-formatter is a functor on a user-defined type (Bool).
-TEST_F(ASSERT_PRED_FORMAT1Test, FunctorOnUserTypeFailure) {
-  expected_to_finish_ = false;
-  EXPECT_FATAL_FAILURE(
-      {  // NOLINT
-        ASSERT_PRED_FORMAT1(PredFormatFunctor1(), Bool(n1_++));
-        finished_ = true;
-      },
-      "");
-}
-// Sample functions/functors for testing binary predicate assertions.
-
-// A binary predicate function.
-template <typename T1, typename T2>
-bool PredFunction2(T1 v1, T2 v2) {
-  return v1 + v2 > 0;
-}
-
-// The following two functions are needed because a compiler doesn't have
-// a context yet to know which template function must be instantiated.
-bool PredFunction2Int(int v1, int v2) { return v1 + v2 > 0; }
-bool PredFunction2Bool(Bool v1, Bool v2) { return v1 + v2 > 0; }
-
-// A binary predicate functor.
-struct PredFunctor2 {
-  template <typename T1, typename T2>
-  bool operator()(const T1& v1, const T2& v2) {
-    return v1 + v2 > 0;
-  }
-};
-
-// A binary predicate-formatter function.
-template <typename T1, typename T2>
-testing::AssertionResult PredFormatFunction2(const char* e1, const char* e2,
-                                             const T1& v1, const T2& v2) {
-  if (PredFunction2(v1, v2)) return testing::AssertionSuccess();
-
-  return testing::AssertionFailure()
-         << e1 << " + " << e2
-         << " is expected to be positive, but evaluates to " << v1 + v2 << ".";
-}
-
-// A binary predicate-formatter functor.
-struct PredFormatFunctor2 {
-  template <typename T1, typename T2>
-  testing::AssertionResult operator()(const char* e1, const char* e2,
-                                      const T1& v1, const T2& v2) const {
-    return PredFormatFunction2(e1, e2, v1, v2);
-  }
-};
-
-// Tests for {EXPECT|ASSERT}_PRED_FORMAT2.
-
-class Predicate2Test : public testing::Test {
- protected:
-  void SetUp() override {
-    expected_to_finish_ = true;
-    finished_ = false;
-    n1_ = n2_ = 0;
-  }
-
-  void TearDown() override {
-    // Verifies that each of the predicate's arguments was evaluated
-    // exactly once.
-    EXPECT_EQ(1, n1_) << "The predicate assertion didn't evaluate argument 2 "
-                         "exactly once.";
-    EXPECT_EQ(1, n2_) << "The predicate assertion didn't evaluate argument 3 "
-                         "exactly once.";
-
-    // Verifies that the control flow in the test function is expected.
-    if (expected_to_finish_ && !finished_) {
-      FAIL() << "The predicate assertion unexpectedly aborted the test.";
-    } else if (!expected_to_finish_ && finished_) {
-      FAIL() << "The failed predicate assertion didn't abort the test "
-                "as expected.";
-    }
-  }
-
-  // true if and only if the test function is expected to run to finish.
-  static bool expected_to_finish_;
-
-  // true if and only if the test function did run to finish.
-  static bool finished_;
-
-  static int n1_;
-  static int n2_;
-};
-
-bool Predicate2Test::expected_to_finish_;
-bool Predicate2Test::finished_;
-int Predicate2Test::n1_;
-int Predicate2Test::n2_;
-
-typedef Predicate2Test EXPECT_PRED_FORMAT2Test;
-typedef Predicate2Test ASSERT_PRED_FORMAT2Test;
-typedef Predicate2Test EXPECT_PRED2Test;
-typedef Predicate2Test ASSERT_PRED2Test;
-
-// Tests a successful EXPECT_PRED2 where the
-// predicate-formatter is a function on a built-in type (int).
-TEST_F(EXPECT_PRED2Test, FunctionOnBuiltInTypeSuccess) {
-  EXPECT_PRED2(PredFunction2Int, ++n1_, ++n2_);
-  finished_ = true;
-}
-
-// Tests a successful EXPECT_PRED2 where the
-// predicate-formatter is a function on a user-defined type (Bool).
-TEST_F(EXPECT_PRED2Test, FunctionOnUserTypeSuccess) {
-  EXPECT_PRED2(PredFunction2Bool, Bool(++n1_), Bool(++n2_));
-  finished_ = true;
-}
-
-// Tests a successful EXPECT_PRED2 where the
-// predicate-formatter is a functor on a built-in type (int).
-TEST_F(EXPECT_PRED2Test, FunctorOnBuiltInTypeSuccess) {
-  EXPECT_PRED2(PredFunctor2(), ++n1_, ++n2_);
-  finished_ = true;
-}
-
-// Tests a successful EXPECT_PRED2 where the
-// predicate-formatter is a functor on a user-defined type (Bool).
-TEST_F(EXPECT_PRED2Test, FunctorOnUserTypeSuccess) {
-  EXPECT_PRED2(PredFunctor2(), Bool(++n1_), Bool(++n2_));
-  finished_ = true;
-}
-
-// Tests a failed EXPECT_PRED2 where the
-// predicate-formatter is a function on a built-in type (int).
-TEST_F(EXPECT_PRED2Test, FunctionOnBuiltInTypeFailure) {
-  EXPECT_NONFATAL_FAILURE(
-      {  // NOLINT
-        EXPECT_PRED2(PredFunction2Int, n1_++, n2_++);
-        finished_ = true;
-      },
-      "");
-}
-
-// Tests a failed EXPECT_PRED2 where the
-// predicate-formatter is a function on a user-defined type (Bool).
-TEST_F(EXPECT_PRED2Test, FunctionOnUserTypeFailure) {
-  EXPECT_NONFATAL_FAILURE(
-      {  // NOLINT
-        EXPECT_PRED2(PredFunction2Bool, Bool(n1_++), Bool(n2_++));
-        finished_ = true;
-      },
-      "");
-}
-
-// Tests a failed EXPECT_PRED2 where the
-// predicate-formatter is a functor on a built-in type (int).
-TEST_F(EXPECT_PRED2Test, FunctorOnBuiltInTypeFailure) {
-  EXPECT_NONFATAL_FAILURE(
-      {  // NOLINT
-        EXPECT_PRED2(PredFunctor2(), n1_++, n2_++);
-        finished_ = true;
-      },
-      "");
-}
-
-// Tests a failed EXPECT_PRED2 where the
-// predicate-formatter is a functor on a user-defined type (Bool).
-TEST_F(EXPECT_PRED2Test, FunctorOnUserTypeFailure) {
-  EXPECT_NONFATAL_FAILURE(
-      {  // NOLINT
-        EXPECT_PRED2(PredFunctor2(), Bool(n1_++), Bool(n2_++));
-        finished_ = true;
-      },
-      "");
-}
-
-// Tests a successful ASSERT_PRED2 where the
-// predicate-formatter is a function on a built-in type (int).
-TEST_F(ASSERT_PRED2Test, FunctionOnBuiltInTypeSuccess) {
-  ASSERT_PRED2(PredFunction2Int, ++n1_, ++n2_);
-  finished_ = true;
-}
-
-// Tests a successful ASSERT_PRED2 where the
-// predicate-formatter is a function on a user-defined type (Bool).
-TEST_F(ASSERT_PRED2Test, FunctionOnUserTypeSuccess) {
-  ASSERT_PRED2(PredFunction2Bool, Bool(++n1_), Bool(++n2_));
-  finished_ = true;
-}
-
-// Tests a successful ASSERT_PRED2 where the
-// predicate-formatter is a functor on a built-in type (int).
-TEST_F(ASSERT_PRED2Test, FunctorOnBuiltInTypeSuccess) {
-  ASSERT_PRED2(PredFunctor2(), ++n1_, ++n2_);
-  finished_ = true;
-}
-
-// Tests a successful ASSERT_PRED2 where the
-// predicate-formatter is a functor on a user-defined type (Bool).
-TEST_F(ASSERT_PRED2Test, FunctorOnUserTypeSuccess) {
-  ASSERT_PRED2(PredFunctor2(), Bool(++n1_), Bool(++n2_));
-  finished_ = true;
-}
-
-// Tests a failed ASSERT_PRED2 where the
-// predicate-formatter is a function on a built-in type (int).
-TEST_F(ASSERT_PRED2Test, FunctionOnBuiltInTypeFailure) {
-  expected_to_finish_ = false;
-  EXPECT_FATAL_FAILURE(
-      {  // NOLINT
-        ASSERT_PRED2(PredFunction2Int, n1_++, n2_++);
-        finished_ = true;
-      },
-      "");
-}
-
-// Tests a failed ASSERT_PRED2 where the
-// predicate-formatter is a function on a user-defined type (Bool).
-TEST_F(ASSERT_PRED2Test, FunctionOnUserTypeFailure) {
-  expected_to_finish_ = false;
-  EXPECT_FATAL_FAILURE(
-      {  // NOLINT
-        ASSERT_PRED2(PredFunction2Bool, Bool(n1_++), Bool(n2_++));
-        finished_ = true;
-      },
-      "");
-}
-
-// Tests a failed ASSERT_PRED2 where the
-// predicate-formatter is a functor on a built-in type (int).
-TEST_F(ASSERT_PRED2Test, FunctorOnBuiltInTypeFailure) {
-  expected_to_finish_ = false;
-  EXPECT_FATAL_FAILURE(
-      {  // NOLINT
-        ASSERT_PRED2(PredFunctor2(), n1_++, n2_++);
-        finished_ = true;
-      },
-      "");
-}
-
-// Tests a failed ASSERT_PRED2 where the
-// predicate-formatter is a functor on a user-defined type (Bool).
-TEST_F(ASSERT_PRED2Test, FunctorOnUserTypeFailure) {
-  expected_to_finish_ = false;
-  EXPECT_FATAL_FAILURE(
-      {  // NOLINT
-        ASSERT_PRED2(PredFunctor2(), Bool(n1_++), Bool(n2_++));
-        finished_ = true;
-      },
-      "");
-}
-
-// Tests a successful EXPECT_PRED_FORMAT2 where the
-// predicate-formatter is a function on a built-in type (int).
-TEST_F(EXPECT_PRED_FORMAT2Test, FunctionOnBuiltInTypeSuccess) {
-  EXPECT_PRED_FORMAT2(PredFormatFunction2, ++n1_, ++n2_);
-  finished_ = true;
-}
-
-// Tests a successful EXPECT_PRED_FORMAT2 where the
-// predicate-formatter is a function on a user-defined type (Bool).
-TEST_F(EXPECT_PRED_FORMAT2Test, FunctionOnUserTypeSuccess) {
-  EXPECT_PRED_FORMAT2(PredFormatFunction2, Bool(++n1_), Bool(++n2_));
-  finished_ = true;
-}
-
-// Tests a successful EXPECT_PRED_FORMAT2 where the
-// predicate-formatter is a functor on a built-in type (int).
-TEST_F(EXPECT_PRED_FORMAT2Test, FunctorOnBuiltInTypeSuccess) {
-  EXPECT_PRED_FORMAT2(PredFormatFunctor2(), ++n1_, ++n2_);
-  finished_ = true;
-}
-
-// Tests a successful EXPECT_PRED_FORMAT2 where the
-// predicate-formatter is a functor on a user-defined type (Bool).
-TEST_F(EXPECT_PRED_FORMAT2Test, FunctorOnUserTypeSuccess) {
-  EXPECT_PRED_FORMAT2(PredFormatFunctor2(), Bool(++n1_), Bool(++n2_));
-  finished_ = true;
-}
-
-// Tests a failed EXPECT_PRED_FORMAT2 where the
-// predicate-formatter is a function on a built-in type (int).
-TEST_F(EXPECT_PRED_FORMAT2Test, FunctionOnBuiltInTypeFailure) {
-  EXPECT_NONFATAL_FAILURE(
-      {  // NOLINT
-        EXPECT_PRED_FORMAT2(PredFormatFunction2, n1_++, n2_++);
-        finished_ = true;
-      },
-      "");
-}
-
-// Tests a failed EXPECT_PRED_FORMAT2 where the
-// predicate-formatter is a function on a user-defined type (Bool).
-TEST_F(EXPECT_PRED_FORMAT2Test, FunctionOnUserTypeFailure) {
-  EXPECT_NONFATAL_FAILURE(
-      {  // NOLINT
-        EXPECT_PRED_FORMAT2(PredFormatFunction2, Bool(n1_++), Bool(n2_++));
-        finished_ = true;
-      },
-      "");
-}
-
-// Tests a failed EXPECT_PRED_FORMAT2 where the
-// predicate-formatter is a functor on a built-in type (int).
-TEST_F(EXPECT_PRED_FORMAT2Test, FunctorOnBuiltInTypeFailure) {
-  EXPECT_NONFATAL_FAILURE(
-      {  // NOLINT
-        EXPECT_PRED_FORMAT2(PredFormatFunctor2(), n1_++, n2_++);
-        finished_ = true;
-      },
-      "");
-}
-
-// Tests a failed EXPECT_PRED_FORMAT2 where the
-// predicate-formatter is a functor on a user-defined type (Bool).
-TEST_F(EXPECT_PRED_FORMAT2Test, FunctorOnUserTypeFailure) {
-  EXPECT_NONFATAL_FAILURE(
-      {  // NOLINT
-        EXPECT_PRED_FORMAT2(PredFormatFunctor2(), Bool(n1_++), Bool(n2_++));
-        finished_ = true;
-      },
-      "");
-}
-
-// Tests a successful ASSERT_PRED_FORMAT2 where the
-// predicate-formatter is a function on a built-in type (int).
-TEST_F(ASSERT_PRED_FORMAT2Test, FunctionOnBuiltInTypeSuccess) {
-  ASSERT_PRED_FORMAT2(PredFormatFunction2, ++n1_, ++n2_);
-  finished_ = true;
-}
-
-// Tests a successful ASSERT_PRED_FORMAT2 where the
-// predicate-formatter is a function on a user-defined type (Bool).
-TEST_F(ASSERT_PRED_FORMAT2Test, FunctionOnUserTypeSuccess) {
-  ASSERT_PRED_FORMAT2(PredFormatFunction2, Bool(++n1_), Bool(++n2_));
-  finished_ = true;
-}
-
-// Tests a successful ASSERT_PRED_FORMAT2 where the
-// predicate-formatter is a functor on a built-in type (int).
-TEST_F(ASSERT_PRED_FORMAT2Test, FunctorOnBuiltInTypeSuccess) {
-  ASSERT_PRED_FORMAT2(PredFormatFunctor2(), ++n1_, ++n2_);
-  finished_ = true;
-}
-
-// Tests a successful ASSERT_PRED_FORMAT2 where the
-// predicate-formatter is a functor on a user-defined type (Bool).
-TEST_F(ASSERT_PRED_FORMAT2Test, FunctorOnUserTypeSuccess) {
-  ASSERT_PRED_FORMAT2(PredFormatFunctor2(), Bool(++n1_), Bool(++n2_));
-  finished_ = true;
-}
-
-// Tests a failed ASSERT_PRED_FORMAT2 where the
-// predicate-formatter is a function on a built-in type (int).
-TEST_F(ASSERT_PRED_FORMAT2Test, FunctionOnBuiltInTypeFailure) {
-  expected_to_finish_ = false;
-  EXPECT_FATAL_FAILURE(
-      {  // NOLINT
-        ASSERT_PRED_FORMAT2(PredFormatFunction2, n1_++, n2_++);
-        finished_ = true;
-      },
-      "");
-}
-
-// Tests a failed ASSERT_PRED_FORMAT2 where the
-// predicate-formatter is a function on a user-defined type (Bool).
-TEST_F(ASSERT_PRED_FORMAT2Test, FunctionOnUserTypeFailure) {
-  expected_to_finish_ = false;
-  EXPECT_FATAL_FAILURE(
-      {  // NOLINT
-        ASSERT_PRED_FORMAT2(PredFormatFunction2, Bool(n1_++), Bool(n2_++));
-        finished_ = true;
-      },
-      "");
-}
-
-// Tests a failed ASSERT_PRED_FORMAT2 where the
-// predicate-formatter is a functor on a built-in type (int).
-TEST_F(ASSERT_PRED_FORMAT2Test, FunctorOnBuiltInTypeFailure) {
-  expected_to_finish_ = false;
-  EXPECT_FATAL_FAILURE(
-      {  // NOLINT
-        ASSERT_PRED_FORMAT2(PredFormatFunctor2(), n1_++, n2_++);
-        finished_ = true;
-      },
-      "");
-}
-
-// Tests a failed ASSERT_PRED_FORMAT2 where the
-// predicate-formatter is a functor on a user-defined type (Bool).
-TEST_F(ASSERT_PRED_FORMAT2Test, FunctorOnUserTypeFailure) {
-  expected_to_finish_ = false;
-  EXPECT_FATAL_FAILURE(
-      {  // NOLINT
-        ASSERT_PRED_FORMAT2(PredFormatFunctor2(), Bool(n1_++), Bool(n2_++));
-        finished_ = true;
-      },
-      "");
-}
-// Sample functions/functors for testing ternary predicate assertions.
-
-// A ternary predicate function.
-template <typename T1, typename T2, typename T3>
-bool PredFunction3(T1 v1, T2 v2, T3 v3) {
-  return v1 + v2 + v3 > 0;
-}
-
-// The following two functions are needed because a compiler doesn't have
-// a context yet to know which template function must be instantiated.
-bool PredFunction3Int(int v1, int v2, int v3) { return v1 + v2 + v3 > 0; }
-bool PredFunction3Bool(Bool v1, Bool v2, Bool v3) { return v1 + v2 + v3 > 0; }
-
-// A ternary predicate functor.
-struct PredFunctor3 {
-  template <typename T1, typename T2, typename T3>
-  bool operator()(const T1& v1, const T2& v2, const T3& v3) {
-    return v1 + v2 + v3 > 0;
-  }
-};
-
-// A ternary predicate-formatter function.
-template <typename T1, typename T2, typename T3>
-testing::AssertionResult PredFormatFunction3(const char* e1, const char* e2,
-                                             const char* e3, const T1& v1,
-                                             const T2& v2, const T3& v3) {
-  if (PredFunction3(v1, v2, v3)) return testing::AssertionSuccess();
-
-  return testing::AssertionFailure()
-         << e1 << " + " << e2 << " + " << e3
-         << " is expected to be positive, but evaluates to " << v1 + v2 + v3
-         << ".";
-}
-
-// A ternary predicate-formatter functor.
-struct PredFormatFunctor3 {
-  template <typename T1, typename T2, typename T3>
-  testing::AssertionResult operator()(const char* e1, const char* e2,
-                                      const char* e3, const T1& v1,
-                                      const T2& v2, const T3& v3) const {
-    return PredFormatFunction3(e1, e2, e3, v1, v2, v3);
-  }
-};
-
-// Tests for {EXPECT|ASSERT}_PRED_FORMAT3.
-
-class Predicate3Test : public testing::Test {
- protected:
-  void SetUp() override {
-    expected_to_finish_ = true;
-    finished_ = false;
-    n1_ = n2_ = n3_ = 0;
-  }
-
-  void TearDown() override {
-    // Verifies that each of the predicate's arguments was evaluated
-    // exactly once.
-    EXPECT_EQ(1, n1_) << "The predicate assertion didn't evaluate argument 2 "
-                         "exactly once.";
-    EXPECT_EQ(1, n2_) << "The predicate assertion didn't evaluate argument 3 "
-                         "exactly once.";
-    EXPECT_EQ(1, n3_) << "The predicate assertion didn't evaluate argument 4 "
-                         "exactly once.";
-
-    // Verifies that the control flow in the test function is expected.
-    if (expected_to_finish_ && !finished_) {
-      FAIL() << "The predicate assertion unexpectedly aborted the test.";
-    } else if (!expected_to_finish_ && finished_) {
-      FAIL() << "The failed predicate assertion didn't abort the test "
-                "as expected.";
-    }
-  }
-
-  // true if and only if the test function is expected to run to finish.
-  static bool expected_to_finish_;
-
-  // true if and only if the test function did run to finish.
-  static bool finished_;
-
-  static int n1_;
-  static int n2_;
-  static int n3_;
-};
-
-bool Predicate3Test::expected_to_finish_;
-bool Predicate3Test::finished_;
-int Predicate3Test::n1_;
-int Predicate3Test::n2_;
-int Predicate3Test::n3_;
-
-typedef Predicate3Test EXPECT_PRED_FORMAT3Test;
-typedef Predicate3Test ASSERT_PRED_FORMAT3Test;
-typedef Predicate3Test EXPECT_PRED3Test;
-typedef Predicate3Test ASSERT_PRED3Test;
-
-// Tests a successful EXPECT_PRED3 where the
-// predicate-formatter is a function on a built-in type (int).
-TEST_F(EXPECT_PRED3Test, FunctionOnBuiltInTypeSuccess) {
-  EXPECT_PRED3(PredFunction3Int, ++n1_, ++n2_, ++n3_);
-  finished_ = true;
-}
-
-// Tests a successful EXPECT_PRED3 where the
-// predicate-formatter is a function on a user-defined type (Bool).
-TEST_F(EXPECT_PRED3Test, FunctionOnUserTypeSuccess) {
-  EXPECT_PRED3(PredFunction3Bool, Bool(++n1_), Bool(++n2_), Bool(++n3_));
-  finished_ = true;
-}
-
-// Tests a successful EXPECT_PRED3 where the
-// predicate-formatter is a functor on a built-in type (int).
-TEST_F(EXPECT_PRED3Test, FunctorOnBuiltInTypeSuccess) {
-  EXPECT_PRED3(PredFunctor3(), ++n1_, ++n2_, ++n3_);
-  finished_ = true;
-}
-
-// Tests a successful EXPECT_PRED3 where the
-// predicate-formatter is a functor on a user-defined type (Bool).
-TEST_F(EXPECT_PRED3Test, FunctorOnUserTypeSuccess) {
-  EXPECT_PRED3(PredFunctor3(), Bool(++n1_), Bool(++n2_), Bool(++n3_));
-  finished_ = true;
-}
-
-// Tests a failed EXPECT_PRED3 where the
-// predicate-formatter is a function on a built-in type (int).
-TEST_F(EXPECT_PRED3Test, FunctionOnBuiltInTypeFailure) {
-  EXPECT_NONFATAL_FAILURE(
-      {  // NOLINT
-        EXPECT_PRED3(PredFunction3Int, n1_++, n2_++, n3_++);
-        finished_ = true;
-      },
-      "");
-}
-
-// Tests a failed EXPECT_PRED3 where the
-// predicate-formatter is a function on a user-defined type (Bool).
-TEST_F(EXPECT_PRED3Test, FunctionOnUserTypeFailure) {
-  EXPECT_NONFATAL_FAILURE(
-      {  // NOLINT
-        EXPECT_PRED3(PredFunction3Bool, Bool(n1_++), Bool(n2_++), Bool(n3_++));
-        finished_ = true;
-      },
-      "");
-}
-
-// Tests a failed EXPECT_PRED3 where the
-// predicate-formatter is a functor on a built-in type (int).
-TEST_F(EXPECT_PRED3Test, FunctorOnBuiltInTypeFailure) {
-  EXPECT_NONFATAL_FAILURE(
-      {  // NOLINT
-        EXPECT_PRED3(PredFunctor3(), n1_++, n2_++, n3_++);
-        finished_ = true;
-      },
-      "");
-}
-
-// Tests a failed EXPECT_PRED3 where the
-// predicate-formatter is a functor on a user-defined type (Bool).
-TEST_F(EXPECT_PRED3Test, FunctorOnUserTypeFailure) {
-  EXPECT_NONFATAL_FAILURE(
-      {  // NOLINT
-        EXPECT_PRED3(PredFunctor3(), Bool(n1_++), Bool(n2_++), Bool(n3_++));
-        finished_ = true;
-      },
-      "");
-}
-
-// Tests a successful ASSERT_PRED3 where the
-// predicate-formatter is a function on a built-in type (int).
-TEST_F(ASSERT_PRED3Test, FunctionOnBuiltInTypeSuccess) {
-  ASSERT_PRED3(PredFunction3Int, ++n1_, ++n2_, ++n3_);
-  finished_ = true;
-}
-
-// Tests a successful ASSERT_PRED3 where the
-// predicate-formatter is a function on a user-defined type (Bool).
-TEST_F(ASSERT_PRED3Test, FunctionOnUserTypeSuccess) {
-  ASSERT_PRED3(PredFunction3Bool, Bool(++n1_), Bool(++n2_), Bool(++n3_));
-  finished_ = true;
-}
-
-// Tests a successful ASSERT_PRED3 where the
-// predicate-formatter is a functor on a built-in type (int).
-TEST_F(ASSERT_PRED3Test, FunctorOnBuiltInTypeSuccess) {
-  ASSERT_PRED3(PredFunctor3(), ++n1_, ++n2_, ++n3_);
-  finished_ = true;
-}
-
-// Tests a successful ASSERT_PRED3 where the
-// predicate-formatter is a functor on a user-defined type (Bool).
-TEST_F(ASSERT_PRED3Test, FunctorOnUserTypeSuccess) {
-  ASSERT_PRED3(PredFunctor3(), Bool(++n1_), Bool(++n2_), Bool(++n3_));
-  finished_ = true;
-}
-
-// Tests a failed ASSERT_PRED3 where the
-// predicate-formatter is a function on a built-in type (int).
-TEST_F(ASSERT_PRED3Test, FunctionOnBuiltInTypeFailure) {
-  expected_to_finish_ = false;
-  EXPECT_FATAL_FAILURE(
-      {  // NOLINT
-        ASSERT_PRED3(PredFunction3Int, n1_++, n2_++, n3_++);
-        finished_ = true;
-      },
-      "");
-}
-
-// Tests a failed ASSERT_PRED3 where the
-// predicate-formatter is a function on a user-defined type (Bool).
-TEST_F(ASSERT_PRED3Test, FunctionOnUserTypeFailure) {
-  expected_to_finish_ = false;
-  EXPECT_FATAL_FAILURE(
-      {  // NOLINT
-        ASSERT_PRED3(PredFunction3Bool, Bool(n1_++), Bool(n2_++), Bool(n3_++));
-        finished_ = true;
-      },
-      "");
-}
-
-// Tests a failed ASSERT_PRED3 where the
-// predicate-formatter is a functor on a built-in type (int).
-TEST_F(ASSERT_PRED3Test, FunctorOnBuiltInTypeFailure) {
-  expected_to_finish_ = false;
-  EXPECT_FATAL_FAILURE(
-      {  // NOLINT
-        ASSERT_PRED3(PredFunctor3(), n1_++, n2_++, n3_++);
-        finished_ = true;
-      },
-      "");
-}
-
-// Tests a failed ASSERT_PRED3 where the
-// predicate-formatter is a functor on a user-defined type (Bool).
-TEST_F(ASSERT_PRED3Test, FunctorOnUserTypeFailure) {
-  expected_to_finish_ = false;
-  EXPECT_FATAL_FAILURE(
-      {  // NOLINT
-        ASSERT_PRED3(PredFunctor3(), Bool(n1_++), Bool(n2_++), Bool(n3_++));
-        finished_ = true;
-      },
-      "");
-}
-
-// Tests a successful EXPECT_PRED_FORMAT3 where the
-// predicate-formatter is a function on a built-in type (int).
-TEST_F(EXPECT_PRED_FORMAT3Test, FunctionOnBuiltInTypeSuccess) {
-  EXPECT_PRED_FORMAT3(PredFormatFunction3, ++n1_, ++n2_, ++n3_);
-  finished_ = true;
-}
-
-// Tests a successful EXPECT_PRED_FORMAT3 where the
-// predicate-formatter is a function on a user-defined type (Bool).
-TEST_F(EXPECT_PRED_FORMAT3Test, FunctionOnUserTypeSuccess) {
-  EXPECT_PRED_FORMAT3(PredFormatFunction3, Bool(++n1_), Bool(++n2_),
-                      Bool(++n3_));
-  finished_ = true;
-}
-
-// Tests a successful EXPECT_PRED_FORMAT3 where the
-// predicate-formatter is a functor on a built-in type (int).
-TEST_F(EXPECT_PRED_FORMAT3Test, FunctorOnBuiltInTypeSuccess) {
-  EXPECT_PRED_FORMAT3(PredFormatFunctor3(), ++n1_, ++n2_, ++n3_);
-  finished_ = true;
-}
-
-// Tests a successful EXPECT_PRED_FORMAT3 where the
-// predicate-formatter is a functor on a user-defined type (Bool).
-TEST_F(EXPECT_PRED_FORMAT3Test, FunctorOnUserTypeSuccess) {
-  EXPECT_PRED_FORMAT3(PredFormatFunctor3(), Bool(++n1_), Bool(++n2_),
-                      Bool(++n3_));
-  finished_ = true;
-}
-
-// Tests a failed EXPECT_PRED_FORMAT3 where the
-// predicate-formatter is a function on a built-in type (int).
-TEST_F(EXPECT_PRED_FORMAT3Test, FunctionOnBuiltInTypeFailure) {
-  EXPECT_NONFATAL_FAILURE(
-      {  // NOLINT
-        EXPECT_PRED_FORMAT3(PredFormatFunction3, n1_++, n2_++, n3_++);
-        finished_ = true;
-      },
-      "");
-}
-
-// Tests a failed EXPECT_PRED_FORMAT3 where the
-// predicate-formatter is a function on a user-defined type (Bool).
-TEST_F(EXPECT_PRED_FORMAT3Test, FunctionOnUserTypeFailure) {
-  EXPECT_NONFATAL_FAILURE(
-      {  // NOLINT
-        EXPECT_PRED_FORMAT3(PredFormatFunction3, Bool(n1_++), Bool(n2_++),
-                            Bool(n3_++));
-        finished_ = true;
-      },
-      "");
-}
-
-// Tests a failed EXPECT_PRED_FORMAT3 where the
-// predicate-formatter is a functor on a built-in type (int).
-TEST_F(EXPECT_PRED_FORMAT3Test, FunctorOnBuiltInTypeFailure) {
-  EXPECT_NONFATAL_FAILURE(
-      {  // NOLINT
-        EXPECT_PRED_FORMAT3(PredFormatFunctor3(), n1_++, n2_++, n3_++);
-        finished_ = true;
-      },
-      "");
-}
-
-// Tests a failed EXPECT_PRED_FORMAT3 where the
-// predicate-formatter is a functor on a user-defined type (Bool).
-TEST_F(EXPECT_PRED_FORMAT3Test, FunctorOnUserTypeFailure) {
-  EXPECT_NONFATAL_FAILURE(
-      {  // NOLINT
-        EXPECT_PRED_FORMAT3(PredFormatFunctor3(), Bool(n1_++), Bool(n2_++),
-                            Bool(n3_++));
-        finished_ = true;
-      },
-      "");
-}
-
-// Tests a successful ASSERT_PRED_FORMAT3 where the
-// predicate-formatter is a function on a built-in type (int).
-TEST_F(ASSERT_PRED_FORMAT3Test, FunctionOnBuiltInTypeSuccess) {
-  ASSERT_PRED_FORMAT3(PredFormatFunction3, ++n1_, ++n2_, ++n3_);
-  finished_ = true;
-}
-
-// Tests a successful ASSERT_PRED_FORMAT3 where the
-// predicate-formatter is a function on a user-defined type (Bool).
-TEST_F(ASSERT_PRED_FORMAT3Test, FunctionOnUserTypeSuccess) {
-  ASSERT_PRED_FORMAT3(PredFormatFunction3, Bool(++n1_), Bool(++n2_),
-                      Bool(++n3_));
-  finished_ = true;
-}
-
-// Tests a successful ASSERT_PRED_FORMAT3 where the
-// predicate-formatter is a functor on a built-in type (int).
-TEST_F(ASSERT_PRED_FORMAT3Test, FunctorOnBuiltInTypeSuccess) {
-  ASSERT_PRED_FORMAT3(PredFormatFunctor3(), ++n1_, ++n2_, ++n3_);
-  finished_ = true;
-}
-
-// Tests a successful ASSERT_PRED_FORMAT3 where the
-// predicate-formatter is a functor on a user-defined type (Bool).
-TEST_F(ASSERT_PRED_FORMAT3Test, FunctorOnUserTypeSuccess) {
-  ASSERT_PRED_FORMAT3(PredFormatFunctor3(), Bool(++n1_), Bool(++n2_),
-                      Bool(++n3_));
-  finished_ = true;
-}
-
-// Tests a failed ASSERT_PRED_FORMAT3 where the
-// predicate-formatter is a function on a built-in type (int).
-TEST_F(ASSERT_PRED_FORMAT3Test, FunctionOnBuiltInTypeFailure) {
-  expected_to_finish_ = false;
-  EXPECT_FATAL_FAILURE(
-      {  // NOLINT
-        ASSERT_PRED_FORMAT3(PredFormatFunction3, n1_++, n2_++, n3_++);
-        finished_ = true;
-      },
-      "");
-}
-
-// Tests a failed ASSERT_PRED_FORMAT3 where the
-// predicate-formatter is a function on a user-defined type (Bool).
-TEST_F(ASSERT_PRED_FORMAT3Test, FunctionOnUserTypeFailure) {
-  expected_to_finish_ = false;
-  EXPECT_FATAL_FAILURE(
-      {  // NOLINT
-        ASSERT_PRED_FORMAT3(PredFormatFunction3, Bool(n1_++), Bool(n2_++),
-                            Bool(n3_++));
-        finished_ = true;
-      },
-      "");
-}
-
-// Tests a failed ASSERT_PRED_FORMAT3 where the
-// predicate-formatter is a functor on a built-in type (int).
-TEST_F(ASSERT_PRED_FORMAT3Test, FunctorOnBuiltInTypeFailure) {
-  expected_to_finish_ = false;
-  EXPECT_FATAL_FAILURE(
-      {  // NOLINT
-        ASSERT_PRED_FORMAT3(PredFormatFunctor3(), n1_++, n2_++, n3_++);
-        finished_ = true;
-      },
-      "");
-}
-
-// Tests a failed ASSERT_PRED_FORMAT3 where the
-// predicate-formatter is a functor on a user-defined type (Bool).
-TEST_F(ASSERT_PRED_FORMAT3Test, FunctorOnUserTypeFailure) {
-  expected_to_finish_ = false;
-  EXPECT_FATAL_FAILURE(
-      {  // NOLINT
-        ASSERT_PRED_FORMAT3(PredFormatFunctor3(), Bool(n1_++), Bool(n2_++),
-                            Bool(n3_++));
-        finished_ = true;
-      },
-      "");
-}
-// Sample functions/functors for testing 4-ary predicate assertions.
-
-// A 4-ary predicate function.
-template <typename T1, typename T2, typename T3, typename T4>
-bool PredFunction4(T1 v1, T2 v2, T3 v3, T4 v4) {
-  return v1 + v2 + v3 + v4 > 0;
-}
-
-// The following two functions are needed because a compiler doesn't have
-// a context yet to know which template function must be instantiated.
-bool PredFunction4Int(int v1, int v2, int v3, int v4) {
-  return v1 + v2 + v3 + v4 > 0;
-}
-bool PredFunction4Bool(Bool v1, Bool v2, Bool v3, Bool v4) {
-  return v1 + v2 + v3 + v4 > 0;
-}
-
-// A 4-ary predicate functor.
-struct PredFunctor4 {
-  template <typename T1, typename T2, typename T3, typename T4>
-  bool operator()(const T1& v1, const T2& v2, const T3& v3, const T4& v4) {
-    return v1 + v2 + v3 + v4 > 0;
-  }
-};
-
-// A 4-ary predicate-formatter function.
-template <typename T1, typename T2, typename T3, typename T4>
-testing::AssertionResult PredFormatFunction4(const char* e1, const char* e2,
-                                             const char* e3, const char* e4,
-                                             const T1& v1, const T2& v2,
-                                             const T3& v3, const T4& v4) {
-  if (PredFunction4(v1, v2, v3, v4)) return testing::AssertionSuccess();
-
-  return testing::AssertionFailure()
-         << e1 << " + " << e2 << " + " << e3 << " + " << e4
-         << " is expected to be positive, but evaluates to "
-         << v1 + v2 + v3 + v4 << ".";
-}
-
-// A 4-ary predicate-formatter functor.
-struct PredFormatFunctor4 {
-  template <typename T1, typename T2, typename T3, typename T4>
-  testing::AssertionResult operator()(const char* e1, const char* e2,
-                                      const char* e3, const char* e4,
-                                      const T1& v1, const T2& v2, const T3& v3,
-                                      const T4& v4) const {
-    return PredFormatFunction4(e1, e2, e3, e4, v1, v2, v3, v4);
-  }
-};
-
-// Tests for {EXPECT|ASSERT}_PRED_FORMAT4.
-
-class Predicate4Test : public testing::Test {
- protected:
-  void SetUp() override {
-    expected_to_finish_ = true;
-    finished_ = false;
-    n1_ = n2_ = n3_ = n4_ = 0;
-  }
-
-  void TearDown() override {
-    // Verifies that each of the predicate's arguments was evaluated
-    // exactly once.
-    EXPECT_EQ(1, n1_) << "The predicate assertion didn't evaluate argument 2 "
-                         "exactly once.";
-    EXPECT_EQ(1, n2_) << "The predicate assertion didn't evaluate argument 3 "
-                         "exactly once.";
-    EXPECT_EQ(1, n3_) << "The predicate assertion didn't evaluate argument 4 "
-                         "exactly once.";
-    EXPECT_EQ(1, n4_) << "The predicate assertion didn't evaluate argument 5 "
-                         "exactly once.";
-
-    // Verifies that the control flow in the test function is expected.
-    if (expected_to_finish_ && !finished_) {
-      FAIL() << "The predicate assertion unexpectedly aborted the test.";
-    } else if (!expected_to_finish_ && finished_) {
-      FAIL() << "The failed predicate assertion didn't abort the test "
-                "as expected.";
-    }
-  }
-
-  // true if and only if the test function is expected to run to finish.
-  static bool expected_to_finish_;
-
-  // true if and only if the test function did run to finish.
-  static bool finished_;
-
-  static int n1_;
-  static int n2_;
-  static int n3_;
-  static int n4_;
-};
-
-bool Predicate4Test::expected_to_finish_;
-bool Predicate4Test::finished_;
-int Predicate4Test::n1_;
-int Predicate4Test::n2_;
-int Predicate4Test::n3_;
-int Predicate4Test::n4_;
-
-typedef Predicate4Test EXPECT_PRED_FORMAT4Test;
-typedef Predicate4Test ASSERT_PRED_FORMAT4Test;
-typedef Predicate4Test EXPECT_PRED4Test;
-typedef Predicate4Test ASSERT_PRED4Test;
-
-// Tests a successful EXPECT_PRED4 where the
-// predicate-formatter is a function on a built-in type (int).
-TEST_F(EXPECT_PRED4Test, FunctionOnBuiltInTypeSuccess) {
-  EXPECT_PRED4(PredFunction4Int, ++n1_, ++n2_, ++n3_, ++n4_);
-  finished_ = true;
-}
-
-// Tests a successful EXPECT_PRED4 where the
-// predicate-formatter is a function on a user-defined type (Bool).
-TEST_F(EXPECT_PRED4Test, FunctionOnUserTypeSuccess) {
-  EXPECT_PRED4(PredFunction4Bool, Bool(++n1_), Bool(++n2_), Bool(++n3_),
-               Bool(++n4_));
-  finished_ = true;
-}
-
-// Tests a successful EXPECT_PRED4 where the
-// predicate-formatter is a functor on a built-in type (int).
-TEST_F(EXPECT_PRED4Test, FunctorOnBuiltInTypeSuccess) {
-  EXPECT_PRED4(PredFunctor4(), ++n1_, ++n2_, ++n3_, ++n4_);
-  finished_ = true;
-}
-
-// Tests a successful EXPECT_PRED4 where the
-// predicate-formatter is a functor on a user-defined type (Bool).
-TEST_F(EXPECT_PRED4Test, FunctorOnUserTypeSuccess) {
-  EXPECT_PRED4(PredFunctor4(), Bool(++n1_), Bool(++n2_), Bool(++n3_),
-               Bool(++n4_));
-  finished_ = true;
-}
-
-// Tests a failed EXPECT_PRED4 where the
-// predicate-formatter is a function on a built-in type (int).
-TEST_F(EXPECT_PRED4Test, FunctionOnBuiltInTypeFailure) {
-  EXPECT_NONFATAL_FAILURE(
-      {  // NOLINT
-        EXPECT_PRED4(PredFunction4Int, n1_++, n2_++, n3_++, n4_++);
-        finished_ = true;
-      },
-      "");
-}
-
-// Tests a failed EXPECT_PRED4 where the
-// predicate-formatter is a function on a user-defined type (Bool).
-TEST_F(EXPECT_PRED4Test, FunctionOnUserTypeFailure) {
-  EXPECT_NONFATAL_FAILURE(
-      {  // NOLINT
-        EXPECT_PRED4(PredFunction4Bool, Bool(n1_++), Bool(n2_++), Bool(n3_++),
-                     Bool(n4_++));
-        finished_ = true;
-      },
-      "");
-}
-
-// Tests a failed EXPECT_PRED4 where the
-// predicate-formatter is a functor on a built-in type (int).
-TEST_F(EXPECT_PRED4Test, FunctorOnBuiltInTypeFailure) {
-  EXPECT_NONFATAL_FAILURE(
-      {  // NOLINT
-        EXPECT_PRED4(PredFunctor4(), n1_++, n2_++, n3_++, n4_++);
-        finished_ = true;
-      },
-      "");
-}
-
-// Tests a failed EXPECT_PRED4 where the
-// predicate-formatter is a functor on a user-defined type (Bool).
-TEST_F(EXPECT_PRED4Test, FunctorOnUserTypeFailure) {
-  EXPECT_NONFATAL_FAILURE(
-      {  // NOLINT
-        EXPECT_PRED4(PredFunctor4(), Bool(n1_++), Bool(n2_++), Bool(n3_++),
-                     Bool(n4_++));
-        finished_ = true;
-      },
-      "");
-}
-
-// Tests a successful ASSERT_PRED4 where the
-// predicate-formatter is a function on a built-in type (int).
-TEST_F(ASSERT_PRED4Test, FunctionOnBuiltInTypeSuccess) {
-  ASSERT_PRED4(PredFunction4Int, ++n1_, ++n2_, ++n3_, ++n4_);
-  finished_ = true;
-}
-
-// Tests a successful ASSERT_PRED4 where the
-// predicate-formatter is a function on a user-defined type (Bool).
-TEST_F(ASSERT_PRED4Test, FunctionOnUserTypeSuccess) {
-  ASSERT_PRED4(PredFunction4Bool, Bool(++n1_), Bool(++n2_), Bool(++n3_),
-               Bool(++n4_));
-  finished_ = true;
-}
-
-// Tests a successful ASSERT_PRED4 where the
-// predicate-formatter is a functor on a built-in type (int).
-TEST_F(ASSERT_PRED4Test, FunctorOnBuiltInTypeSuccess) {
-  ASSERT_PRED4(PredFunctor4(), ++n1_, ++n2_, ++n3_, ++n4_);
-  finished_ = true;
-}
-
-// Tests a successful ASSERT_PRED4 where the
-// predicate-formatter is a functor on a user-defined type (Bool).
-TEST_F(ASSERT_PRED4Test, FunctorOnUserTypeSuccess) {
-  ASSERT_PRED4(PredFunctor4(), Bool(++n1_), Bool(++n2_), Bool(++n3_),
-               Bool(++n4_));
-  finished_ = true;
-}
-
-// Tests a failed ASSERT_PRED4 where the
-// predicate-formatter is a function on a built-in type (int).
-TEST_F(ASSERT_PRED4Test, FunctionOnBuiltInTypeFailure) {
-  expected_to_finish_ = false;
-  EXPECT_FATAL_FAILURE(
-      {  // NOLINT
-        ASSERT_PRED4(PredFunction4Int, n1_++, n2_++, n3_++, n4_++);
-        finished_ = true;
-      },
-      "");
-}
-
-// Tests a failed ASSERT_PRED4 where the
-// predicate-formatter is a function on a user-defined type (Bool).
-TEST_F(ASSERT_PRED4Test, FunctionOnUserTypeFailure) {
-  expected_to_finish_ = false;
-  EXPECT_FATAL_FAILURE(
-      {  // NOLINT
-        ASSERT_PRED4(PredFunction4Bool, Bool(n1_++), Bool(n2_++), Bool(n3_++),
-                     Bool(n4_++));
-        finished_ = true;
-      },
-      "");
-}
-
-// Tests a failed ASSERT_PRED4 where the
-// predicate-formatter is a functor on a built-in type (int).
-TEST_F(ASSERT_PRED4Test, FunctorOnBuiltInTypeFailure) {
-  expected_to_finish_ = false;
-  EXPECT_FATAL_FAILURE(
-      {  // NOLINT
-        ASSERT_PRED4(PredFunctor4(), n1_++, n2_++, n3_++, n4_++);
-        finished_ = true;
-      },
-      "");
-}
-
-// Tests a failed ASSERT_PRED4 where the
-// predicate-formatter is a functor on a user-defined type (Bool).
-TEST_F(ASSERT_PRED4Test, FunctorOnUserTypeFailure) {
-  expected_to_finish_ = false;
-  EXPECT_FATAL_FAILURE(
-      {  // NOLINT
-        ASSERT_PRED4(PredFunctor4(), Bool(n1_++), Bool(n2_++), Bool(n3_++),
-                     Bool(n4_++));
-        finished_ = true;
-      },
-      "");
-}
-
-// Tests a successful EXPECT_PRED_FORMAT4 where the
-// predicate-formatter is a function on a built-in type (int).
-TEST_F(EXPECT_PRED_FORMAT4Test, FunctionOnBuiltInTypeSuccess) {
-  EXPECT_PRED_FORMAT4(PredFormatFunction4, ++n1_, ++n2_, ++n3_, ++n4_);
-  finished_ = true;
-}
-
-// Tests a successful EXPECT_PRED_FORMAT4 where the
-// predicate-formatter is a function on a user-defined type (Bool).
-TEST_F(EXPECT_PRED_FORMAT4Test, FunctionOnUserTypeSuccess) {
-  EXPECT_PRED_FORMAT4(PredFormatFunction4, Bool(++n1_), Bool(++n2_),
-                      Bool(++n3_), Bool(++n4_));
-  finished_ = true;
-}
-
-// Tests a successful EXPECT_PRED_FORMAT4 where the
-// predicate-formatter is a functor on a built-in type (int).
-TEST_F(EXPECT_PRED_FORMAT4Test, FunctorOnBuiltInTypeSuccess) {
-  EXPECT_PRED_FORMAT4(PredFormatFunctor4(), ++n1_, ++n2_, ++n3_, ++n4_);
-  finished_ = true;
-}
-
-// Tests a successful EXPECT_PRED_FORMAT4 where the
-// predicate-formatter is a functor on a user-defined type (Bool).
-TEST_F(EXPECT_PRED_FORMAT4Test, FunctorOnUserTypeSuccess) {
-  EXPECT_PRED_FORMAT4(PredFormatFunctor4(), Bool(++n1_), Bool(++n2_),
-                      Bool(++n3_), Bool(++n4_));
-  finished_ = true;
-}
-
-// Tests a failed EXPECT_PRED_FORMAT4 where the
-// predicate-formatter is a function on a built-in type (int).
-TEST_F(EXPECT_PRED_FORMAT4Test, FunctionOnBuiltInTypeFailure) {
-  EXPECT_NONFATAL_FAILURE(
-      {  // NOLINT
-        EXPECT_PRED_FORMAT4(PredFormatFunction4, n1_++, n2_++, n3_++, n4_++);
-        finished_ = true;
-      },
-      "");
-}
-
-// Tests a failed EXPECT_PRED_FORMAT4 where the
-// predicate-formatter is a function on a user-defined type (Bool).
-TEST_F(EXPECT_PRED_FORMAT4Test, FunctionOnUserTypeFailure) {
-  EXPECT_NONFATAL_FAILURE(
-      {  // NOLINT
-        EXPECT_PRED_FORMAT4(PredFormatFunction4, Bool(n1_++), Bool(n2_++),
-                            Bool(n3_++), Bool(n4_++));
-        finished_ = true;
-      },
-      "");
-}
-
-// Tests a failed EXPECT_PRED_FORMAT4 where the
-// predicate-formatter is a functor on a built-in type (int).
-TEST_F(EXPECT_PRED_FORMAT4Test, FunctorOnBuiltInTypeFailure) {
-  EXPECT_NONFATAL_FAILURE(
-      {  // NOLINT
-        EXPECT_PRED_FORMAT4(PredFormatFunctor4(), n1_++, n2_++, n3_++, n4_++);
-        finished_ = true;
-      },
-      "");
-}
-
-// Tests a failed EXPECT_PRED_FORMAT4 where the
-// predicate-formatter is a functor on a user-defined type (Bool).
-TEST_F(EXPECT_PRED_FORMAT4Test, FunctorOnUserTypeFailure) {
-  EXPECT_NONFATAL_FAILURE(
-      {  // NOLINT
-        EXPECT_PRED_FORMAT4(PredFormatFunctor4(), Bool(n1_++), Bool(n2_++),
-                            Bool(n3_++), Bool(n4_++));
-        finished_ = true;
-      },
-      "");
-}
-
-// Tests a successful ASSERT_PRED_FORMAT4 where the
-// predicate-formatter is a function on a built-in type (int).
-TEST_F(ASSERT_PRED_FORMAT4Test, FunctionOnBuiltInTypeSuccess) {
-  ASSERT_PRED_FORMAT4(PredFormatFunction4, ++n1_, ++n2_, ++n3_, ++n4_);
-  finished_ = true;
-}
-
-// Tests a successful ASSERT_PRED_FORMAT4 where the
-// predicate-formatter is a function on a user-defined type (Bool).
-TEST_F(ASSERT_PRED_FORMAT4Test, FunctionOnUserTypeSuccess) {
-  ASSERT_PRED_FORMAT4(PredFormatFunction4, Bool(++n1_), Bool(++n2_),
-                      Bool(++n3_), Bool(++n4_));
-  finished_ = true;
-}
-
-// Tests a successful ASSERT_PRED_FORMAT4 where the
-// predicate-formatter is a functor on a built-in type (int).
-TEST_F(ASSERT_PRED_FORMAT4Test, FunctorOnBuiltInTypeSuccess) {
-  ASSERT_PRED_FORMAT4(PredFormatFunctor4(), ++n1_, ++n2_, ++n3_, ++n4_);
-  finished_ = true;
-}
-
-// Tests a successful ASSERT_PRED_FORMAT4 where the
-// predicate-formatter is a functor on a user-defined type (Bool).
-TEST_F(ASSERT_PRED_FORMAT4Test, FunctorOnUserTypeSuccess) {
-  ASSERT_PRED_FORMAT4(PredFormatFunctor4(), Bool(++n1_), Bool(++n2_),
-                      Bool(++n3_), Bool(++n4_));
-  finished_ = true;
-}
-
-// Tests a failed ASSERT_PRED_FORMAT4 where the
-// predicate-formatter is a function on a built-in type (int).
-TEST_F(ASSERT_PRED_FORMAT4Test, FunctionOnBuiltInTypeFailure) {
-  expected_to_finish_ = false;
-  EXPECT_FATAL_FAILURE(
-      {  // NOLINT
-        ASSERT_PRED_FORMAT4(PredFormatFunction4, n1_++, n2_++, n3_++, n4_++);
-        finished_ = true;
-      },
-      "");
-}
-
-// Tests a failed ASSERT_PRED_FORMAT4 where the
-// predicate-formatter is a function on a user-defined type (Bool).
-TEST_F(ASSERT_PRED_FORMAT4Test, FunctionOnUserTypeFailure) {
-  expected_to_finish_ = false;
-  EXPECT_FATAL_FAILURE(
-      {  // NOLINT
-        ASSERT_PRED_FORMAT4(PredFormatFunction4, Bool(n1_++), Bool(n2_++),
-                            Bool(n3_++), Bool(n4_++));
-        finished_ = true;
-      },
-      "");
-}
-
-// Tests a failed ASSERT_PRED_FORMAT4 where the
-// predicate-formatter is a functor on a built-in type (int).
-TEST_F(ASSERT_PRED_FORMAT4Test, FunctorOnBuiltInTypeFailure) {
-  expected_to_finish_ = false;
-  EXPECT_FATAL_FAILURE(
-      {  // NOLINT
-        ASSERT_PRED_FORMAT4(PredFormatFunctor4(), n1_++, n2_++, n3_++, n4_++);
-        finished_ = true;
-      },
-      "");
-}
-
-// Tests a failed ASSERT_PRED_FORMAT4 where the
-// predicate-formatter is a functor on a user-defined type (Bool).
-TEST_F(ASSERT_PRED_FORMAT4Test, FunctorOnUserTypeFailure) {
-  expected_to_finish_ = false;
-  EXPECT_FATAL_FAILURE(
-      {  // NOLINT
-        ASSERT_PRED_FORMAT4(PredFormatFunctor4(), Bool(n1_++), Bool(n2_++),
-                            Bool(n3_++), Bool(n4_++));
-        finished_ = true;
-      },
-      "");
-}
-// Sample functions/functors for testing 5-ary predicate assertions.
-
-// A 5-ary predicate function.
-template <typename T1, typename T2, typename T3, typename T4, typename T5>
-bool PredFunction5(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5) {
-  return v1 + v2 + v3 + v4 + v5 > 0;
-}
-
-// The following two functions are needed because a compiler doesn't have
-// a context yet to know which template function must be instantiated.
-bool PredFunction5Int(int v1, int v2, int v3, int v4, int v5) {
-  return v1 + v2 + v3 + v4 + v5 > 0;
-}
-bool PredFunction5Bool(Bool v1, Bool v2, Bool v3, Bool v4, Bool v5) {
-  return v1 + v2 + v3 + v4 + v5 > 0;
-}
-
-// A 5-ary predicate functor.
-struct PredFunctor5 {
-  template <typename T1, typename T2, typename T3, typename T4, typename T5>
-  bool operator()(const T1& v1, const T2& v2, const T3& v3, const T4& v4,
-                  const T5& v5) {
-    return v1 + v2 + v3 + v4 + v5 > 0;
-  }
-};
-
-// A 5-ary predicate-formatter function.
-template <typename T1, typename T2, typename T3, typename T4, typename T5>
-testing::AssertionResult PredFormatFunction5(const char* e1, const char* e2,
-                                             const char* e3, const char* e4,
-                                             const char* e5, const T1& v1,
-                                             const T2& v2, const T3& v3,
-                                             const T4& v4, const T5& v5) {
-  if (PredFunction5(v1, v2, v3, v4, v5)) return testing::AssertionSuccess();
-
-  return testing::AssertionFailure()
-         << e1 << " + " << e2 << " + " << e3 << " + " << e4 << " + " << e5
-         << " is expected to be positive, but evaluates to "
-         << v1 + v2 + v3 + v4 + v5 << ".";
-}
-
-// A 5-ary predicate-formatter functor.
-struct PredFormatFunctor5 {
-  template <typename T1, typename T2, typename T3, typename T4, typename T5>
-  testing::AssertionResult operator()(const char* e1, const char* e2,
-                                      const char* e3, const char* e4,
-                                      const char* e5, const T1& v1,
-                                      const T2& v2, const T3& v3, const T4& v4,
-                                      const T5& v5) const {
-    return PredFormatFunction5(e1, e2, e3, e4, e5, v1, v2, v3, v4, v5);
-  }
-};
-
-// Tests for {EXPECT|ASSERT}_PRED_FORMAT5.
-
-class Predicate5Test : public testing::Test {
- protected:
-  void SetUp() override {
-    expected_to_finish_ = true;
-    finished_ = false;
-    n1_ = n2_ = n3_ = n4_ = n5_ = 0;
-  }
-
-  void TearDown() override {
-    // Verifies that each of the predicate's arguments was evaluated
-    // exactly once.
-    EXPECT_EQ(1, n1_) << "The predicate assertion didn't evaluate argument 2 "
-                         "exactly once.";
-    EXPECT_EQ(1, n2_) << "The predicate assertion didn't evaluate argument 3 "
-                         "exactly once.";
-    EXPECT_EQ(1, n3_) << "The predicate assertion didn't evaluate argument 4 "
-                         "exactly once.";
-    EXPECT_EQ(1, n4_) << "The predicate assertion didn't evaluate argument 5 "
-                         "exactly once.";
-    EXPECT_EQ(1, n5_) << "The predicate assertion didn't evaluate argument 6 "
-                         "exactly once.";
-
-    // Verifies that the control flow in the test function is expected.
-    if (expected_to_finish_ && !finished_) {
-      FAIL() << "The predicate assertion unexpectedly aborted the test.";
-    } else if (!expected_to_finish_ && finished_) {
-      FAIL() << "The failed predicate assertion didn't abort the test "
-                "as expected.";
-    }
-  }
-
-  // true if and only if the test function is expected to run to finish.
-  static bool expected_to_finish_;
-
-  // true if and only if the test function did run to finish.
-  static bool finished_;
-
-  static int n1_;
-  static int n2_;
-  static int n3_;
-  static int n4_;
-  static int n5_;
-};
-
-bool Predicate5Test::expected_to_finish_;
-bool Predicate5Test::finished_;
-int Predicate5Test::n1_;
-int Predicate5Test::n2_;
-int Predicate5Test::n3_;
-int Predicate5Test::n4_;
-int Predicate5Test::n5_;
-
-typedef Predicate5Test EXPECT_PRED_FORMAT5Test;
-typedef Predicate5Test ASSERT_PRED_FORMAT5Test;
-typedef Predicate5Test EXPECT_PRED5Test;
-typedef Predicate5Test ASSERT_PRED5Test;
-
-// Tests a successful EXPECT_PRED5 where the
-// predicate-formatter is a function on a built-in type (int).
-TEST_F(EXPECT_PRED5Test, FunctionOnBuiltInTypeSuccess) {
-  EXPECT_PRED5(PredFunction5Int, ++n1_, ++n2_, ++n3_, ++n4_, ++n5_);
-  finished_ = true;
-}
-
-// Tests a successful EXPECT_PRED5 where the
-// predicate-formatter is a function on a user-defined type (Bool).
-TEST_F(EXPECT_PRED5Test, FunctionOnUserTypeSuccess) {
-  EXPECT_PRED5(PredFunction5Bool, Bool(++n1_), Bool(++n2_), Bool(++n3_),
-               Bool(++n4_), Bool(++n5_));
-  finished_ = true;
-}
-
-// Tests a successful EXPECT_PRED5 where the
-// predicate-formatter is a functor on a built-in type (int).
-TEST_F(EXPECT_PRED5Test, FunctorOnBuiltInTypeSuccess) {
-  EXPECT_PRED5(PredFunctor5(), ++n1_, ++n2_, ++n3_, ++n4_, ++n5_);
-  finished_ = true;
-}
-
-// Tests a successful EXPECT_PRED5 where the
-// predicate-formatter is a functor on a user-defined type (Bool).
-TEST_F(EXPECT_PRED5Test, FunctorOnUserTypeSuccess) {
-  EXPECT_PRED5(PredFunctor5(), Bool(++n1_), Bool(++n2_), Bool(++n3_),
-               Bool(++n4_), Bool(++n5_));
-  finished_ = true;
-}
-
-// Tests a failed EXPECT_PRED5 where the
-// predicate-formatter is a function on a built-in type (int).
-TEST_F(EXPECT_PRED5Test, FunctionOnBuiltInTypeFailure) {
-  EXPECT_NONFATAL_FAILURE(
-      {  // NOLINT
-        EXPECT_PRED5(PredFunction5Int, n1_++, n2_++, n3_++, n4_++, n5_++);
-        finished_ = true;
-      },
-      "");
-}
-
-// Tests a failed EXPECT_PRED5 where the
-// predicate-formatter is a function on a user-defined type (Bool).
-TEST_F(EXPECT_PRED5Test, FunctionOnUserTypeFailure) {
-  EXPECT_NONFATAL_FAILURE(
-      {  // NOLINT
-        EXPECT_PRED5(PredFunction5Bool, Bool(n1_++), Bool(n2_++), Bool(n3_++),
-                     Bool(n4_++), Bool(n5_++));
-        finished_ = true;
-      },
-      "");
-}
-
-// Tests a failed EXPECT_PRED5 where the
-// predicate-formatter is a functor on a built-in type (int).
-TEST_F(EXPECT_PRED5Test, FunctorOnBuiltInTypeFailure) {
-  EXPECT_NONFATAL_FAILURE(
-      {  // NOLINT
-        EXPECT_PRED5(PredFunctor5(), n1_++, n2_++, n3_++, n4_++, n5_++);
-        finished_ = true;
-      },
-      "");
-}
-
-// Tests a failed EXPECT_PRED5 where the
-// predicate-formatter is a functor on a user-defined type (Bool).
-TEST_F(EXPECT_PRED5Test, FunctorOnUserTypeFailure) {
-  EXPECT_NONFATAL_FAILURE(
-      {  // NOLINT
-        EXPECT_PRED5(PredFunctor5(), Bool(n1_++), Bool(n2_++), Bool(n3_++),
-                     Bool(n4_++), Bool(n5_++));
-        finished_ = true;
-      },
-      "");
-}
-
-// Tests a successful ASSERT_PRED5 where the
-// predicate-formatter is a function on a built-in type (int).
-TEST_F(ASSERT_PRED5Test, FunctionOnBuiltInTypeSuccess) {
-  ASSERT_PRED5(PredFunction5Int, ++n1_, ++n2_, ++n3_, ++n4_, ++n5_);
-  finished_ = true;
-}
-
-// Tests a successful ASSERT_PRED5 where the
-// predicate-formatter is a function on a user-defined type (Bool).
-TEST_F(ASSERT_PRED5Test, FunctionOnUserTypeSuccess) {
-  ASSERT_PRED5(PredFunction5Bool, Bool(++n1_), Bool(++n2_), Bool(++n3_),
-               Bool(++n4_), Bool(++n5_));
-  finished_ = true;
-}
-
-// Tests a successful ASSERT_PRED5 where the
-// predicate-formatter is a functor on a built-in type (int).
-TEST_F(ASSERT_PRED5Test, FunctorOnBuiltInTypeSuccess) {
-  ASSERT_PRED5(PredFunctor5(), ++n1_, ++n2_, ++n3_, ++n4_, ++n5_);
-  finished_ = true;
-}
-
-// Tests a successful ASSERT_PRED5 where the
-// predicate-formatter is a functor on a user-defined type (Bool).
-TEST_F(ASSERT_PRED5Test, FunctorOnUserTypeSuccess) {
-  ASSERT_PRED5(PredFunctor5(), Bool(++n1_), Bool(++n2_), Bool(++n3_),
-               Bool(++n4_), Bool(++n5_));
-  finished_ = true;
-}
-
-// Tests a failed ASSERT_PRED5 where the
-// predicate-formatter is a function on a built-in type (int).
-TEST_F(ASSERT_PRED5Test, FunctionOnBuiltInTypeFailure) {
-  expected_to_finish_ = false;
-  EXPECT_FATAL_FAILURE(
-      {  // NOLINT
-        ASSERT_PRED5(PredFunction5Int, n1_++, n2_++, n3_++, n4_++, n5_++);
-        finished_ = true;
-      },
-      "");
-}
-
-// Tests a failed ASSERT_PRED5 where the
-// predicate-formatter is a function on a user-defined type (Bool).
-TEST_F(ASSERT_PRED5Test, FunctionOnUserTypeFailure) {
-  expected_to_finish_ = false;
-  EXPECT_FATAL_FAILURE(
-      {  // NOLINT
-        ASSERT_PRED5(PredFunction5Bool, Bool(n1_++), Bool(n2_++), Bool(n3_++),
-                     Bool(n4_++), Bool(n5_++));
-        finished_ = true;
-      },
-      "");
-}
-
-// Tests a failed ASSERT_PRED5 where the
-// predicate-formatter is a functor on a built-in type (int).
-TEST_F(ASSERT_PRED5Test, FunctorOnBuiltInTypeFailure) {
-  expected_to_finish_ = false;
-  EXPECT_FATAL_FAILURE(
-      {  // NOLINT
-        ASSERT_PRED5(PredFunctor5(), n1_++, n2_++, n3_++, n4_++, n5_++);
-        finished_ = true;
-      },
-      "");
-}
-
-// Tests a failed ASSERT_PRED5 where the
-// predicate-formatter is a functor on a user-defined type (Bool).
-TEST_F(ASSERT_PRED5Test, FunctorOnUserTypeFailure) {
-  expected_to_finish_ = false;
-  EXPECT_FATAL_FAILURE(
-      {  // NOLINT
-        ASSERT_PRED5(PredFunctor5(), Bool(n1_++), Bool(n2_++), Bool(n3_++),
-                     Bool(n4_++), Bool(n5_++));
-        finished_ = true;
-      },
-      "");
-}
-
-// Tests a successful EXPECT_PRED_FORMAT5 where the
-// predicate-formatter is a function on a built-in type (int).
-TEST_F(EXPECT_PRED_FORMAT5Test, FunctionOnBuiltInTypeSuccess) {
-  EXPECT_PRED_FORMAT5(PredFormatFunction5, ++n1_, ++n2_, ++n3_, ++n4_, ++n5_);
-  finished_ = true;
-}
-
-// Tests a successful EXPECT_PRED_FORMAT5 where the
-// predicate-formatter is a function on a user-defined type (Bool).
-TEST_F(EXPECT_PRED_FORMAT5Test, FunctionOnUserTypeSuccess) {
-  EXPECT_PRED_FORMAT5(PredFormatFunction5, Bool(++n1_), Bool(++n2_),
-                      Bool(++n3_), Bool(++n4_), Bool(++n5_));
-  finished_ = true;
-}
-
-// Tests a successful EXPECT_PRED_FORMAT5 where the
-// predicate-formatter is a functor on a built-in type (int).
-TEST_F(EXPECT_PRED_FORMAT5Test, FunctorOnBuiltInTypeSuccess) {
-  EXPECT_PRED_FORMAT5(PredFormatFunctor5(), ++n1_, ++n2_, ++n3_, ++n4_, ++n5_);
-  finished_ = true;
-}
-
-// Tests a successful EXPECT_PRED_FORMAT5 where the
-// predicate-formatter is a functor on a user-defined type (Bool).
-TEST_F(EXPECT_PRED_FORMAT5Test, FunctorOnUserTypeSuccess) {
-  EXPECT_PRED_FORMAT5(PredFormatFunctor5(), Bool(++n1_), Bool(++n2_),
-                      Bool(++n3_), Bool(++n4_), Bool(++n5_));
-  finished_ = true;
-}
-
-// Tests a failed EXPECT_PRED_FORMAT5 where the
-// predicate-formatter is a function on a built-in type (int).
-TEST_F(EXPECT_PRED_FORMAT5Test, FunctionOnBuiltInTypeFailure) {
-  EXPECT_NONFATAL_FAILURE(
-      {  // NOLINT
-        EXPECT_PRED_FORMAT5(PredFormatFunction5, n1_++, n2_++, n3_++, n4_++,
-                            n5_++);
-        finished_ = true;
-      },
-      "");
-}
-
-// Tests a failed EXPECT_PRED_FORMAT5 where the
-// predicate-formatter is a function on a user-defined type (Bool).
-TEST_F(EXPECT_PRED_FORMAT5Test, FunctionOnUserTypeFailure) {
-  EXPECT_NONFATAL_FAILURE(
-      {  // NOLINT
-        EXPECT_PRED_FORMAT5(PredFormatFunction5, Bool(n1_++), Bool(n2_++),
-                            Bool(n3_++), Bool(n4_++), Bool(n5_++));
-        finished_ = true;
-      },
-      "");
-}
-
-// Tests a failed EXPECT_PRED_FORMAT5 where the
-// predicate-formatter is a functor on a built-in type (int).
-TEST_F(EXPECT_PRED_FORMAT5Test, FunctorOnBuiltInTypeFailure) {
-  EXPECT_NONFATAL_FAILURE(
-      {  // NOLINT
-        EXPECT_PRED_FORMAT5(PredFormatFunctor5(), n1_++, n2_++, n3_++, n4_++,
-                            n5_++);
-        finished_ = true;
-      },
-      "");
-}
-
-// Tests a failed EXPECT_PRED_FORMAT5 where the
-// predicate-formatter is a functor on a user-defined type (Bool).
-TEST_F(EXPECT_PRED_FORMAT5Test, FunctorOnUserTypeFailure) {
-  EXPECT_NONFATAL_FAILURE(
-      {  // NOLINT
-        EXPECT_PRED_FORMAT5(PredFormatFunctor5(), Bool(n1_++), Bool(n2_++),
-                            Bool(n3_++), Bool(n4_++), Bool(n5_++));
-        finished_ = true;
-      },
-      "");
-}
-
-// Tests a successful ASSERT_PRED_FORMAT5 where the
-// predicate-formatter is a function on a built-in type (int).
-TEST_F(ASSERT_PRED_FORMAT5Test, FunctionOnBuiltInTypeSuccess) {
-  ASSERT_PRED_FORMAT5(PredFormatFunction5, ++n1_, ++n2_, ++n3_, ++n4_, ++n5_);
-  finished_ = true;
-}
-
-// Tests a successful ASSERT_PRED_FORMAT5 where the
-// predicate-formatter is a function on a user-defined type (Bool).
-TEST_F(ASSERT_PRED_FORMAT5Test, FunctionOnUserTypeSuccess) {
-  ASSERT_PRED_FORMAT5(PredFormatFunction5, Bool(++n1_), Bool(++n2_),
-                      Bool(++n3_), Bool(++n4_), Bool(++n5_));
-  finished_ = true;
-}
-
-// Tests a successful ASSERT_PRED_FORMAT5 where the
-// predicate-formatter is a functor on a built-in type (int).
-TEST_F(ASSERT_PRED_FORMAT5Test, FunctorOnBuiltInTypeSuccess) {
-  ASSERT_PRED_FORMAT5(PredFormatFunctor5(), ++n1_, ++n2_, ++n3_, ++n4_, ++n5_);
-  finished_ = true;
-}
-
-// Tests a successful ASSERT_PRED_FORMAT5 where the
-// predicate-formatter is a functor on a user-defined type (Bool).
-TEST_F(ASSERT_PRED_FORMAT5Test, FunctorOnUserTypeSuccess) {
-  ASSERT_PRED_FORMAT5(PredFormatFunctor5(), Bool(++n1_), Bool(++n2_),
-                      Bool(++n3_), Bool(++n4_), Bool(++n5_));
-  finished_ = true;
-}
-
-// Tests a failed ASSERT_PRED_FORMAT5 where the
-// predicate-formatter is a function on a built-in type (int).
-TEST_F(ASSERT_PRED_FORMAT5Test, FunctionOnBuiltInTypeFailure) {
-  expected_to_finish_ = false;
-  EXPECT_FATAL_FAILURE(
-      {  // NOLINT
-        ASSERT_PRED_FORMAT5(PredFormatFunction5, n1_++, n2_++, n3_++, n4_++,
-                            n5_++);
-        finished_ = true;
-      },
-      "");
-}
-
-// Tests a failed ASSERT_PRED_FORMAT5 where the
-// predicate-formatter is a function on a user-defined type (Bool).
-TEST_F(ASSERT_PRED_FORMAT5Test, FunctionOnUserTypeFailure) {
-  expected_to_finish_ = false;
-  EXPECT_FATAL_FAILURE(
-      {  // NOLINT
-        ASSERT_PRED_FORMAT5(PredFormatFunction5, Bool(n1_++), Bool(n2_++),
-                            Bool(n3_++), Bool(n4_++), Bool(n5_++));
-        finished_ = true;
-      },
-      "");
-}
-
-// Tests a failed ASSERT_PRED_FORMAT5 where the
-// predicate-formatter is a functor on a built-in type (int).
-TEST_F(ASSERT_PRED_FORMAT5Test, FunctorOnBuiltInTypeFailure) {
-  expected_to_finish_ = false;
-  EXPECT_FATAL_FAILURE(
-      {  // NOLINT
-        ASSERT_PRED_FORMAT5(PredFormatFunctor5(), n1_++, n2_++, n3_++, n4_++,
-                            n5_++);
-        finished_ = true;
-      },
-      "");
-}
-
-// Tests a failed ASSERT_PRED_FORMAT5 where the
-// predicate-formatter is a functor on a user-defined type (Bool).
-TEST_F(ASSERT_PRED_FORMAT5Test, FunctorOnUserTypeFailure) {
-  expected_to_finish_ = false;
-  EXPECT_FATAL_FAILURE(
-      {  // NOLINT
-        ASSERT_PRED_FORMAT5(PredFormatFunctor5(), Bool(n1_++), Bool(n2_++),
-                            Bool(n3_++), Bool(n4_++), Bool(n5_++));
-        finished_ = true;
-      },
-      "");
-}
diff --git a/3rdparty/googletest-1.13.0/googletest/test/gtest_premature_exit_test.cc b/3rdparty/googletest-1.13.0/googletest/test/gtest_premature_exit_test.cc
deleted file mode 100644
index 1a0c5ea4f0d31ae34f8149eae3e739f601347a8c..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/test/gtest_premature_exit_test.cc
+++ /dev/null
@@ -1,128 +0,0 @@
-// Copyright 2013, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-//
-// Tests that Google Test manipulates the premature-exit-detection
-// file correctly.
-
-#include <stdio.h>
-
-#include "gtest/gtest.h"
-
-using ::testing::InitGoogleTest;
-using ::testing::Test;
-using ::testing::internal::posix::GetEnv;
-using ::testing::internal::posix::Stat;
-using ::testing::internal::posix::StatStruct;
-
-namespace {
-
-class PrematureExitTest : public Test {
- public:
-  // Returns true if and only if the given file exists.
-  static bool FileExists(const char* filepath) {
-    StatStruct stat;
-    return Stat(filepath, &stat) == 0;
-  }
-
- protected:
-  PrematureExitTest() {
-    premature_exit_file_path_ = GetEnv("TEST_PREMATURE_EXIT_FILE");
-
-    // Normalize NULL to "" for ease of handling.
-    if (premature_exit_file_path_ == nullptr) {
-      premature_exit_file_path_ = "";
-    }
-  }
-
-  // Returns true if and only if the premature-exit file exists.
-  bool PrematureExitFileExists() const {
-    return FileExists(premature_exit_file_path_);
-  }
-
-  const char* premature_exit_file_path_;
-};
-
-typedef PrematureExitTest PrematureExitDeathTest;
-
-// Tests that:
-//   - the premature-exit file exists during the execution of a
-//     death test (EXPECT_DEATH*), and
-//   - a death test doesn't interfere with the main test process's
-//     handling of the premature-exit file.
-TEST_F(PrematureExitDeathTest, FileExistsDuringExecutionOfDeathTest) {
-  if (*premature_exit_file_path_ == '\0') {
-    return;
-  }
-
-  EXPECT_DEATH_IF_SUPPORTED(
-      {
-        // If the file exists, crash the process such that the main test
-        // process will catch the (expected) crash and report a success;
-        // otherwise don't crash, which will cause the main test process
-        // to report that the death test has failed.
-        if (PrematureExitFileExists()) {
-          exit(1);
-        }
-      },
-      "");
-}
-
-// Tests that the premature-exit file exists during the execution of a
-// normal (non-death) test.
-TEST_F(PrematureExitTest, PrematureExitFileExistsDuringTestExecution) {
-  if (*premature_exit_file_path_ == '\0') {
-    return;
-  }
-
-  EXPECT_TRUE(PrematureExitFileExists())
-      << " file " << premature_exit_file_path_
-      << " should exist during test execution, but doesn't.";
-}
-
-}  // namespace
-
-int main(int argc, char** argv) {
-  InitGoogleTest(&argc, argv);
-  const int exit_code = RUN_ALL_TESTS();
-
-  // Test that the premature-exit file is deleted upon return from
-  // RUN_ALL_TESTS().
-  const char* const filepath = GetEnv("TEST_PREMATURE_EXIT_FILE");
-  if (filepath != nullptr && *filepath != '\0') {
-    if (PrematureExitTest::FileExists(filepath)) {
-      printf(
-          "File %s shouldn't exist after the test program finishes, but does.",
-          filepath);
-      return 1;
-    }
-  }
-
-  return exit_code;
-}
diff --git a/3rdparty/googletest-1.13.0/googletest/test/gtest_prod_test.cc b/3rdparty/googletest-1.13.0/googletest/test/gtest_prod_test.cc
deleted file mode 100644
index ede81a0d17a02c853ed2234ec88fe8277180a37e..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/test/gtest_prod_test.cc
+++ /dev/null
@@ -1,56 +0,0 @@
-// Copyright 2006, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-//
-// Unit test for gtest_prod.h.
-
-#include "production.h"
-#include "gtest/gtest.h"
-
-// Tests that private members can be accessed from a TEST declared as
-// a friend of the class.
-TEST(PrivateCodeTest, CanAccessPrivateMembers) {
-  PrivateCode a;
-  EXPECT_EQ(0, a.x_);
-
-  a.set_x(1);
-  EXPECT_EQ(1, a.x_);
-}
-
-typedef testing::Test PrivateCodeFixtureTest;
-
-// Tests that private members can be accessed from a TEST_F declared
-// as a friend of the class.
-TEST_F(PrivateCodeFixtureTest, CanAccessPrivateMembers) {
-  PrivateCode a;
-  EXPECT_EQ(0, a.x_);
-
-  a.set_x(2);
-  EXPECT_EQ(2, a.x_);
-}
diff --git a/3rdparty/googletest-1.13.0/googletest/test/gtest_repeat_test.cc b/3rdparty/googletest-1.13.0/googletest/test/gtest_repeat_test.cc
deleted file mode 100644
index 73fb8dc906a97ec505fbc27f3ea7111ed101c0ff..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/test/gtest_repeat_test.cc
+++ /dev/null
@@ -1,222 +0,0 @@
-// Copyright 2008, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// Tests the --gtest_repeat=number flag.
-
-#include <stdlib.h>
-
-#include <iostream>
-
-#include "gtest/gtest.h"
-#include "src/gtest-internal-inl.h"
-
-namespace {
-
-// We need this when we are testing Google Test itself and therefore
-// cannot use Google Test assertions.
-#define GTEST_CHECK_INT_EQ_(expected, actual)                      \
-  do {                                                             \
-    const int expected_val = (expected);                           \
-    const int actual_val = (actual);                               \
-    if (::testing::internal::IsTrue(expected_val != actual_val)) { \
-      ::std::cout << "Value of: " #actual "\n"                     \
-                  << "  Actual: " << actual_val << "\n"            \
-                  << "Expected: " #expected "\n"                   \
-                  << "Which is: " << expected_val << "\n";         \
-      ::testing::internal::posix::Abort();                         \
-    }                                                              \
-  } while (::testing::internal::AlwaysFalse())
-
-// Used for verifying that global environment set-up and tear-down are
-// inside the --gtest_repeat loop.
-
-int g_environment_set_up_count = 0;
-int g_environment_tear_down_count = 0;
-
-class MyEnvironment : public testing::Environment {
- public:
-  MyEnvironment() {}
-  void SetUp() override { g_environment_set_up_count++; }
-  void TearDown() override { g_environment_tear_down_count++; }
-};
-
-// A test that should fail.
-
-int g_should_fail_count = 0;
-
-TEST(FooTest, ShouldFail) {
-  g_should_fail_count++;
-  EXPECT_EQ(0, 1) << "Expected failure.";
-}
-
-// A test that should pass.
-
-int g_should_pass_count = 0;
-
-TEST(FooTest, ShouldPass) { g_should_pass_count++; }
-
-// A test that contains a thread-safe death test and a fast death
-// test.  It should pass.
-
-int g_death_test_count = 0;
-
-TEST(BarDeathTest, ThreadSafeAndFast) {
-  g_death_test_count++;
-
-  GTEST_FLAG_SET(death_test_style, "threadsafe");
-  EXPECT_DEATH_IF_SUPPORTED(::testing::internal::posix::Abort(), "");
-
-  GTEST_FLAG_SET(death_test_style, "fast");
-  EXPECT_DEATH_IF_SUPPORTED(::testing::internal::posix::Abort(), "");
-}
-
-int g_param_test_count = 0;
-
-const int kNumberOfParamTests = 10;
-
-class MyParamTest : public testing::TestWithParam<int> {};
-
-TEST_P(MyParamTest, ShouldPass) {
-  GTEST_CHECK_INT_EQ_(g_param_test_count % kNumberOfParamTests, GetParam());
-  g_param_test_count++;
-}
-INSTANTIATE_TEST_SUITE_P(MyParamSequence, MyParamTest,
-                         testing::Range(0, kNumberOfParamTests));
-
-// Resets the count for each test.
-void ResetCounts() {
-  g_environment_set_up_count = 0;
-  g_environment_tear_down_count = 0;
-  g_should_fail_count = 0;
-  g_should_pass_count = 0;
-  g_death_test_count = 0;
-  g_param_test_count = 0;
-}
-
-// Checks that the count for each test is expected.
-void CheckCounts(int expected) {
-  GTEST_CHECK_INT_EQ_(expected, g_environment_set_up_count);
-  GTEST_CHECK_INT_EQ_(expected, g_environment_tear_down_count);
-  GTEST_CHECK_INT_EQ_(expected, g_should_fail_count);
-  GTEST_CHECK_INT_EQ_(expected, g_should_pass_count);
-  GTEST_CHECK_INT_EQ_(expected, g_death_test_count);
-  GTEST_CHECK_INT_EQ_(expected * kNumberOfParamTests, g_param_test_count);
-}
-
-// Tests the behavior of Google Test when --gtest_repeat is not specified.
-void TestRepeatUnspecified() {
-  ResetCounts();
-  GTEST_CHECK_INT_EQ_(1, RUN_ALL_TESTS());
-  CheckCounts(1);
-}
-
-// Tests the behavior of Google Test when --gtest_repeat has the given value.
-void TestRepeat(int repeat) {
-  GTEST_FLAG_SET(repeat, repeat);
-  GTEST_FLAG_SET(recreate_environments_when_repeating, true);
-
-  ResetCounts();
-  GTEST_CHECK_INT_EQ_(repeat > 0 ? 1 : 0, RUN_ALL_TESTS());
-  CheckCounts(repeat);
-}
-
-// Tests using --gtest_repeat when --gtest_filter specifies an empty
-// set of tests.
-void TestRepeatWithEmptyFilter(int repeat) {
-  GTEST_FLAG_SET(repeat, repeat);
-  GTEST_FLAG_SET(recreate_environments_when_repeating, true);
-  GTEST_FLAG_SET(filter, "None");
-
-  ResetCounts();
-  GTEST_CHECK_INT_EQ_(0, RUN_ALL_TESTS());
-  CheckCounts(0);
-}
-
-// Tests using --gtest_repeat when --gtest_filter specifies a set of
-// successful tests.
-void TestRepeatWithFilterForSuccessfulTests(int repeat) {
-  GTEST_FLAG_SET(repeat, repeat);
-  GTEST_FLAG_SET(recreate_environments_when_repeating, true);
-  GTEST_FLAG_SET(filter, "*-*ShouldFail");
-
-  ResetCounts();
-  GTEST_CHECK_INT_EQ_(0, RUN_ALL_TESTS());
-  GTEST_CHECK_INT_EQ_(repeat, g_environment_set_up_count);
-  GTEST_CHECK_INT_EQ_(repeat, g_environment_tear_down_count);
-  GTEST_CHECK_INT_EQ_(0, g_should_fail_count);
-  GTEST_CHECK_INT_EQ_(repeat, g_should_pass_count);
-  GTEST_CHECK_INT_EQ_(repeat, g_death_test_count);
-  GTEST_CHECK_INT_EQ_(repeat * kNumberOfParamTests, g_param_test_count);
-}
-
-// Tests using --gtest_repeat when --gtest_filter specifies a set of
-// failed tests.
-void TestRepeatWithFilterForFailedTests(int repeat) {
-  GTEST_FLAG_SET(repeat, repeat);
-  GTEST_FLAG_SET(recreate_environments_when_repeating, true);
-  GTEST_FLAG_SET(filter, "*ShouldFail");
-
-  ResetCounts();
-  GTEST_CHECK_INT_EQ_(1, RUN_ALL_TESTS());
-  GTEST_CHECK_INT_EQ_(repeat, g_environment_set_up_count);
-  GTEST_CHECK_INT_EQ_(repeat, g_environment_tear_down_count);
-  GTEST_CHECK_INT_EQ_(repeat, g_should_fail_count);
-  GTEST_CHECK_INT_EQ_(0, g_should_pass_count);
-  GTEST_CHECK_INT_EQ_(0, g_death_test_count);
-  GTEST_CHECK_INT_EQ_(0, g_param_test_count);
-}
-
-}  // namespace
-
-int main(int argc, char **argv) {
-  testing::InitGoogleTest(&argc, argv);
-
-  testing::AddGlobalTestEnvironment(new MyEnvironment);
-
-  TestRepeatUnspecified();
-  TestRepeat(0);
-  TestRepeat(1);
-  TestRepeat(5);
-
-  TestRepeatWithEmptyFilter(2);
-  TestRepeatWithEmptyFilter(3);
-
-  TestRepeatWithFilterForSuccessfulTests(3);
-
-  TestRepeatWithFilterForFailedTests(4);
-
-  // It would be nice to verify that the tests indeed loop forever
-  // when GTEST_FLAG(repeat) is negative, but this test will be quite
-  // complicated to write.  Since this flag is for interactive
-  // debugging only and doesn't affect the normal test result, such a
-  // test would be an overkill.
-
-  printf("PASS\n");
-  return 0;
-}
diff --git a/3rdparty/googletest-1.13.0/googletest/test/gtest_skip_check_output_test.py b/3rdparty/googletest-1.13.0/googletest/test/gtest_skip_check_output_test.py
deleted file mode 100644
index 1c87b44f01578df083f5c846126f9d8e7472640d..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/test/gtest_skip_check_output_test.py
+++ /dev/null
@@ -1,59 +0,0 @@
-#!/usr/bin/env python
-#
-# Copyright 2019 Google LLC.  All Rights Reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met:
-#
-#     * Redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer.
-#     * Redistributions in binary form must reproduce the above
-# copyright notice, this list of conditions and the following disclaimer
-# in the documentation and/or other materials provided with the
-# distribution.
-#     * Neither the name of Google Inc. nor the names of its
-# contributors may be used to endorse or promote products derived from
-# this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-"""Tests Google Test's gtest skip in environment setup  behavior.
-
-This script invokes gtest_skip_in_environment_setup_test_ and verifies its
-output.
-"""
-
-import re
-
-from googletest.test import gtest_test_utils
-
-# Path to the gtest_skip_in_environment_setup_test binary
-EXE_PATH = gtest_test_utils.GetTestExecutablePath('gtest_skip_test')
-
-OUTPUT = gtest_test_utils.Subprocess([EXE_PATH]).output
-
-
-# Test.
-class SkipEntireEnvironmentTest(gtest_test_utils.TestCase):
-
-  def testSkipEntireEnvironmentTest(self):
-    self.assertIn('Skipped\nskipping single test\n', OUTPUT)
-    skip_fixture = 'Skipped\nskipping all tests for this fixture\n'
-    self.assertIsNotNone(
-        re.search(skip_fixture + '.*' + skip_fixture, OUTPUT, flags=re.DOTALL),
-        repr(OUTPUT))
-    self.assertNotIn('FAILED', OUTPUT)
-
-
-if __name__ == '__main__':
-  gtest_test_utils.Main()
diff --git a/3rdparty/googletest-1.13.0/googletest/test/gtest_skip_environment_check_output_test.py b/3rdparty/googletest-1.13.0/googletest/test/gtest_skip_environment_check_output_test.py
deleted file mode 100644
index 6960b11a586650fff717687779ea3e85bd56f6b0..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/test/gtest_skip_environment_check_output_test.py
+++ /dev/null
@@ -1,54 +0,0 @@
-#!/usr/bin/env python
-#
-# Copyright 2019 Google LLC.  All Rights Reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met:
-#
-#     * Redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer.
-#     * Redistributions in binary form must reproduce the above
-# copyright notice, this list of conditions and the following disclaimer
-# in the documentation and/or other materials provided with the
-# distribution.
-#     * Neither the name of Google Inc. nor the names of its
-# contributors may be used to endorse or promote products derived from
-# this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-"""Tests Google Test's gtest skip in environment setup  behavior.
-
-This script invokes gtest_skip_in_environment_setup_test_ and verifies its
-output.
-"""
-
-from googletest.test import gtest_test_utils
-
-# Path to the gtest_skip_in_environment_setup_test binary
-EXE_PATH = gtest_test_utils.GetTestExecutablePath(
-    'gtest_skip_in_environment_setup_test')
-
-OUTPUT = gtest_test_utils.Subprocess([EXE_PATH]).output
-
-
-# Test.
-class SkipEntireEnvironmentTest(gtest_test_utils.TestCase):
-
-  def testSkipEntireEnvironmentTest(self):
-    self.assertIn('Skipping the entire environment', OUTPUT)
-    self.assertNotIn('FAILED', OUTPUT)
-
-
-if __name__ == '__main__':
-  gtest_test_utils.Main()
diff --git a/3rdparty/googletest-1.13.0/googletest/test/gtest_skip_in_environment_setup_test.cc b/3rdparty/googletest-1.13.0/googletest/test/gtest_skip_in_environment_setup_test.cc
deleted file mode 100644
index 5f21c27dcdbd67721439d66168608c89a6aecad4..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/test/gtest_skip_in_environment_setup_test.cc
+++ /dev/null
@@ -1,50 +0,0 @@
-// Copyright 2019, Google LLC.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google LLC. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-// This test verifies that skipping in the environment results in the
-// testcases being skipped.
-
-#include <iostream>
-
-#include "gtest/gtest.h"
-
-class SetupEnvironment : public testing::Environment {
- public:
-  void SetUp() override { GTEST_SKIP() << "Skipping the entire environment"; }
-};
-
-TEST(Test, AlwaysFails) { EXPECT_EQ(true, false); }
-
-int main(int argc, char **argv) {
-  testing::InitGoogleTest(&argc, argv);
-
-  testing::AddGlobalTestEnvironment(new SetupEnvironment());
-
-  return RUN_ALL_TESTS();
-}
diff --git a/3rdparty/googletest-1.13.0/googletest/test/gtest_skip_test.cc b/3rdparty/googletest-1.13.0/googletest/test/gtest_skip_test.cc
deleted file mode 100644
index e1b8d655adae34648bc4cfbcf1a2fa29bc161642..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/test/gtest_skip_test.cc
+++ /dev/null
@@ -1,51 +0,0 @@
-// Copyright 2008 Google Inc.
-// All Rights Reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-// Author: arseny.aprelev@gmail.com (Arseny Aprelev)
-//
-
-#include "gtest/gtest.h"
-
-using ::testing::Test;
-
-TEST(SkipTest, DoesSkip) {
-  GTEST_SKIP() << "skipping single test";
-  EXPECT_EQ(0, 1);
-}
-
-class Fixture : public Test {
- protected:
-  void SetUp() override {
-    GTEST_SKIP() << "skipping all tests for this fixture";
-  }
-};
-
-TEST_F(Fixture, SkipsOneTest) { EXPECT_EQ(5, 7); }
-
-TEST_F(Fixture, SkipsAnotherTest) { EXPECT_EQ(99, 100); }
diff --git a/3rdparty/googletest-1.13.0/googletest/test/gtest_sole_header_test.cc b/3rdparty/googletest-1.13.0/googletest/test/gtest_sole_header_test.cc
deleted file mode 100644
index e8e22a83c58ca6f9bfcb1f1c84b4c00daabc4759..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/test/gtest_sole_header_test.cc
+++ /dev/null
@@ -1,54 +0,0 @@
-// Copyright 2008, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-//
-// This test verifies that it's possible to use Google Test by including
-// the gtest.h header file alone.
-
-#include "gtest/gtest.h"
-
-namespace {
-
-void Subroutine() { EXPECT_EQ(42, 42); }
-
-TEST(NoFatalFailureTest, ExpectNoFatalFailure) {
-  EXPECT_NO_FATAL_FAILURE(;);
-  EXPECT_NO_FATAL_FAILURE(SUCCEED());
-  EXPECT_NO_FATAL_FAILURE(Subroutine());
-  EXPECT_NO_FATAL_FAILURE({ SUCCEED(); });
-}
-
-TEST(NoFatalFailureTest, AssertNoFatalFailure) {
-  ASSERT_NO_FATAL_FAILURE(;);
-  ASSERT_NO_FATAL_FAILURE(SUCCEED());
-  ASSERT_NO_FATAL_FAILURE(Subroutine());
-  ASSERT_NO_FATAL_FAILURE({ SUCCEED(); });
-}
-
-}  // namespace
diff --git a/3rdparty/googletest-1.13.0/googletest/test/gtest_stress_test.cc b/3rdparty/googletest-1.13.0/googletest/test/gtest_stress_test.cc
deleted file mode 100644
index 24b173ffd884c4044ea10a180d8ba16104622215..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/test/gtest_stress_test.cc
+++ /dev/null
@@ -1,242 +0,0 @@
-// Copyright 2007, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// Tests that SCOPED_TRACE() and various Google Test assertions can be
-// used in a large number of threads concurrently.
-
-#include <vector>
-
-#include "gtest/gtest.h"
-#include "src/gtest-internal-inl.h"
-
-#if GTEST_IS_THREADSAFE
-
-namespace testing {
-namespace {
-
-using internal::Notification;
-using internal::TestPropertyKeyIs;
-using internal::ThreadWithParam;
-
-// In order to run tests in this file, for platforms where Google Test is
-// thread safe, implement ThreadWithParam. See the description of its API
-// in gtest-port.h, where it is defined for already supported platforms.
-
-// How many threads to create?
-const int kThreadCount = 50;
-
-std::string IdToKey(int id, const char* suffix) {
-  Message key;
-  key << "key_" << id << "_" << suffix;
-  return key.GetString();
-}
-
-std::string IdToString(int id) {
-  Message id_message;
-  id_message << id;
-  return id_message.GetString();
-}
-
-void ExpectKeyAndValueWereRecordedForId(
-    const std::vector<TestProperty>& properties, int id, const char* suffix) {
-  TestPropertyKeyIs matches_key(IdToKey(id, suffix).c_str());
-  const std::vector<TestProperty>::const_iterator property =
-      std::find_if(properties.begin(), properties.end(), matches_key);
-  ASSERT_TRUE(property != properties.end())
-      << "expecting " << suffix << " value for id " << id;
-  EXPECT_STREQ(IdToString(id).c_str(), property->value());
-}
-
-// Calls a large number of Google Test assertions, where exactly one of them
-// will fail.
-void ManyAsserts(int id) {
-  GTEST_LOG_(INFO) << "Thread #" << id << " running...";
-
-  SCOPED_TRACE(Message() << "Thread #" << id);
-
-  for (int i = 0; i < kThreadCount; i++) {
-    SCOPED_TRACE(Message() << "Iteration #" << i);
-
-    // A bunch of assertions that should succeed.
-    EXPECT_TRUE(true);
-    ASSERT_FALSE(false) << "This shouldn't fail.";
-    EXPECT_STREQ("a", "a");
-    ASSERT_LE(5, 6);
-    EXPECT_EQ(i, i) << "This shouldn't fail.";
-
-    // RecordProperty() should interact safely with other threads as well.
-    // The shared_key forces property updates.
-    Test::RecordProperty(IdToKey(id, "string").c_str(), IdToString(id).c_str());
-    Test::RecordProperty(IdToKey(id, "int").c_str(), id);
-    Test::RecordProperty("shared_key", IdToString(id).c_str());
-
-    // This assertion should fail kThreadCount times per thread.  It
-    // is for testing whether Google Test can handle failed assertions in a
-    // multi-threaded context.
-    EXPECT_LT(i, 0) << "This should always fail.";
-  }
-}
-
-void CheckTestFailureCount(int expected_failures) {
-  const TestInfo* const info = UnitTest::GetInstance()->current_test_info();
-  const TestResult* const result = info->result();
-  GTEST_CHECK_(expected_failures == result->total_part_count())
-      << "Logged " << result->total_part_count() << " failures "
-      << " vs. " << expected_failures << " expected";
-}
-
-// Tests using SCOPED_TRACE() and Google Test assertions in many threads
-// concurrently.
-TEST(StressTest, CanUseScopedTraceAndAssertionsInManyThreads) {
-  {
-    std::unique_ptr<ThreadWithParam<int> > threads[kThreadCount];
-    Notification threads_can_start;
-    for (int i = 0; i != kThreadCount; i++)
-      threads[i].reset(
-          new ThreadWithParam<int>(&ManyAsserts, i, &threads_can_start));
-
-    threads_can_start.Notify();
-
-    // Blocks until all the threads are done.
-    for (int i = 0; i != kThreadCount; i++) threads[i]->Join();
-  }
-
-  // Ensures that kThreadCount*kThreadCount failures have been reported.
-  const TestInfo* const info = UnitTest::GetInstance()->current_test_info();
-  const TestResult* const result = info->result();
-
-  std::vector<TestProperty> properties;
-  // We have no access to the TestResult's list of properties but we can
-  // copy them one by one.
-  for (int i = 0; i < result->test_property_count(); ++i)
-    properties.push_back(result->GetTestProperty(i));
-
-  EXPECT_EQ(kThreadCount * 2 + 1, result->test_property_count())
-      << "String and int values recorded on each thread, "
-      << "as well as one shared_key";
-  for (int i = 0; i < kThreadCount; ++i) {
-    ExpectKeyAndValueWereRecordedForId(properties, i, "string");
-    ExpectKeyAndValueWereRecordedForId(properties, i, "int");
-  }
-  CheckTestFailureCount(kThreadCount * kThreadCount);
-}
-
-void FailingThread(bool is_fatal) {
-  if (is_fatal)
-    FAIL() << "Fatal failure in some other thread. "
-           << "(This failure is expected.)";
-  else
-    ADD_FAILURE() << "Non-fatal failure in some other thread. "
-                  << "(This failure is expected.)";
-}
-
-void GenerateFatalFailureInAnotherThread(bool is_fatal) {
-  ThreadWithParam<bool> thread(&FailingThread, is_fatal, nullptr);
-  thread.Join();
-}
-
-TEST(NoFatalFailureTest, ExpectNoFatalFailureIgnoresFailuresInOtherThreads) {
-  EXPECT_NO_FATAL_FAILURE(GenerateFatalFailureInAnotherThread(true));
-  // We should only have one failure (the one from
-  // GenerateFatalFailureInAnotherThread()), since the EXPECT_NO_FATAL_FAILURE
-  // should succeed.
-  CheckTestFailureCount(1);
-}
-
-void AssertNoFatalFailureIgnoresFailuresInOtherThreads() {
-  ASSERT_NO_FATAL_FAILURE(GenerateFatalFailureInAnotherThread(true));
-}
-TEST(NoFatalFailureTest, AssertNoFatalFailureIgnoresFailuresInOtherThreads) {
-  // Using a subroutine, to make sure, that the test continues.
-  AssertNoFatalFailureIgnoresFailuresInOtherThreads();
-  // We should only have one failure (the one from
-  // GenerateFatalFailureInAnotherThread()), since the EXPECT_NO_FATAL_FAILURE
-  // should succeed.
-  CheckTestFailureCount(1);
-}
-
-TEST(FatalFailureTest, ExpectFatalFailureIgnoresFailuresInOtherThreads) {
-  // This statement should fail, since the current thread doesn't generate a
-  // fatal failure, only another one does.
-  EXPECT_FATAL_FAILURE(GenerateFatalFailureInAnotherThread(true), "expected");
-  CheckTestFailureCount(2);
-}
-
-TEST(FatalFailureOnAllThreadsTest, ExpectFatalFailureOnAllThreads) {
-  // This statement should succeed, because failures in all threads are
-  // considered.
-  EXPECT_FATAL_FAILURE_ON_ALL_THREADS(GenerateFatalFailureInAnotherThread(true),
-                                      "expected");
-  CheckTestFailureCount(0);
-  // We need to add a failure, because main() checks that there are failures.
-  // But when only this test is run, we shouldn't have any failures.
-  ADD_FAILURE() << "This is an expected non-fatal failure.";
-}
-
-TEST(NonFatalFailureTest, ExpectNonFatalFailureIgnoresFailuresInOtherThreads) {
-  // This statement should fail, since the current thread doesn't generate a
-  // fatal failure, only another one does.
-  EXPECT_NONFATAL_FAILURE(GenerateFatalFailureInAnotherThread(false),
-                          "expected");
-  CheckTestFailureCount(2);
-}
-
-TEST(NonFatalFailureOnAllThreadsTest, ExpectNonFatalFailureOnAllThreads) {
-  // This statement should succeed, because failures in all threads are
-  // considered.
-  EXPECT_NONFATAL_FAILURE_ON_ALL_THREADS(
-      GenerateFatalFailureInAnotherThread(false), "expected");
-  CheckTestFailureCount(0);
-  // We need to add a failure, because main() checks that there are failures,
-  // But when only this test is run, we shouldn't have any failures.
-  ADD_FAILURE() << "This is an expected non-fatal failure.";
-}
-
-}  // namespace
-}  // namespace testing
-
-int main(int argc, char** argv) {
-  testing::InitGoogleTest(&argc, argv);
-
-  const int result = RUN_ALL_TESTS();  // Expected to fail.
-  GTEST_CHECK_(result == 1) << "RUN_ALL_TESTS() did not fail as expected";
-
-  printf("\nPASS\n");
-  return 0;
-}
-
-#else
-TEST(StressTest,
-     DISABLED_ThreadSafetyTestsAreSkippedWhenGoogleTestIsNotThreadSafe) {}
-
-int main(int argc, char **argv) {
-  testing::InitGoogleTest(&argc, argv);
-  return RUN_ALL_TESTS();
-}
-#endif  // GTEST_IS_THREADSAFE
diff --git a/3rdparty/googletest-1.13.0/googletest/test/gtest_test_macro_stack_footprint_test.cc b/3rdparty/googletest-1.13.0/googletest/test/gtest_test_macro_stack_footprint_test.cc
deleted file mode 100644
index 45f368b3a5e3ced287beacfb1ba70e95a8f0f0b3..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/test/gtest_test_macro_stack_footprint_test.cc
+++ /dev/null
@@ -1,89 +0,0 @@
-// Copyright 2013, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-//
-// Each TEST() expands to some static registration logic.  GCC puts all
-// such static initialization logic for a translation unit in a common,
-// internal function.  Since Google's build system restricts how much
-// stack space a function can use, there's a limit on how many TEST()s
-// one can put in a single C++ test file.  This test ensures that a large
-// number of TEST()s can be defined in the same translation unit.
-
-#include "gtest/gtest.h"
-
-// This macro defines 10 dummy tests.
-#define TEN_TESTS_(test_case_name) \
-  TEST(test_case_name, T0) {}      \
-  TEST(test_case_name, T1) {}      \
-  TEST(test_case_name, T2) {}      \
-  TEST(test_case_name, T3) {}      \
-  TEST(test_case_name, T4) {}      \
-  TEST(test_case_name, T5) {}      \
-  TEST(test_case_name, T6) {}      \
-  TEST(test_case_name, T7) {}      \
-  TEST(test_case_name, T8) {}      \
-  TEST(test_case_name, T9) {}
-
-// This macro defines 100 dummy tests.
-#define HUNDRED_TESTS_(test_case_name_prefix) \
-  TEN_TESTS_(test_case_name_prefix##0)        \
-  TEN_TESTS_(test_case_name_prefix##1)        \
-  TEN_TESTS_(test_case_name_prefix##2)        \
-  TEN_TESTS_(test_case_name_prefix##3)        \
-  TEN_TESTS_(test_case_name_prefix##4)        \
-  TEN_TESTS_(test_case_name_prefix##5)        \
-  TEN_TESTS_(test_case_name_prefix##6)        \
-  TEN_TESTS_(test_case_name_prefix##7)        \
-  TEN_TESTS_(test_case_name_prefix##8)        \
-  TEN_TESTS_(test_case_name_prefix##9)
-
-// This macro defines 1000 dummy tests.
-#define THOUSAND_TESTS_(test_case_name_prefix) \
-  HUNDRED_TESTS_(test_case_name_prefix##0)     \
-  HUNDRED_TESTS_(test_case_name_prefix##1)     \
-  HUNDRED_TESTS_(test_case_name_prefix##2)     \
-  HUNDRED_TESTS_(test_case_name_prefix##3)     \
-  HUNDRED_TESTS_(test_case_name_prefix##4)     \
-  HUNDRED_TESTS_(test_case_name_prefix##5)     \
-  HUNDRED_TESTS_(test_case_name_prefix##6)     \
-  HUNDRED_TESTS_(test_case_name_prefix##7)     \
-  HUNDRED_TESTS_(test_case_name_prefix##8)     \
-  HUNDRED_TESTS_(test_case_name_prefix##9)
-
-// Ensures that we can define 1000 TEST()s in the same translation
-// unit.
-THOUSAND_TESTS_(T)
-
-int main(int argc, char **argv) {
-  testing::InitGoogleTest(&argc, argv);
-
-  // We don't actually need to run the dummy tests - the purpose is to
-  // ensure that they compile.
-  return 0;
-}
diff --git a/3rdparty/googletest-1.13.0/googletest/test/gtest_test_utils.py b/3rdparty/googletest-1.13.0/googletest/test/gtest_test_utils.py
deleted file mode 100644
index eecc53346c277085ebae99e132dca3181b725296..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/test/gtest_test_utils.py
+++ /dev/null
@@ -1,255 +0,0 @@
-# Copyright 2006, Google Inc.
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met:
-#
-#     * Redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer.
-#     * Redistributions in binary form must reproduce the above
-# copyright notice, this list of conditions and the following disclaimer
-# in the documentation and/or other materials provided with the
-# distribution.
-#     * Neither the name of Google Inc. nor the names of its
-# contributors may be used to endorse or promote products derived from
-# this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-"""Unit test utilities for Google C++ Testing and Mocking Framework."""
-# Suppresses the 'Import not at the top of the file' lint complaint.
-# pylint: disable-msg=C6204
-
-import os
-import subprocess
-import sys
-
-IS_WINDOWS = os.name == 'nt'
-IS_CYGWIN = os.name == 'posix' and 'CYGWIN' in os.uname()[0]
-IS_OS2 = os.name == 'os2'
-
-import atexit
-import shutil
-import tempfile
-import unittest as _test_module
-# pylint: enable-msg=C6204
-
-GTEST_OUTPUT_VAR_NAME = 'GTEST_OUTPUT'
-
-# The environment variable for specifying the path to the premature-exit file.
-PREMATURE_EXIT_FILE_ENV_VAR = 'TEST_PREMATURE_EXIT_FILE'
-
-environ = os.environ.copy()
-
-
-def SetEnvVar(env_var, value):
-  """Sets/unsets an environment variable to a given value."""
-
-  if value is not None:
-    environ[env_var] = value
-  elif env_var in environ:
-    del environ[env_var]
-
-
-# Here we expose a class from a particular module, depending on the
-# environment. The comment suppresses the 'Invalid variable name' lint
-# complaint.
-TestCase = _test_module.TestCase  # pylint: disable=C6409
-
-# Initially maps a flag to its default value. After
-# _ParseAndStripGTestFlags() is called, maps a flag to its actual value.
-_flag_map = {'source_dir': os.path.dirname(sys.argv[0]),
-             'build_dir': os.path.dirname(sys.argv[0])}
-_gtest_flags_are_parsed = False
-
-
-def _ParseAndStripGTestFlags(argv):
-  """Parses and strips Google Test flags from argv.  This is idempotent."""
-
-  # Suppresses the lint complaint about a global variable since we need it
-  # here to maintain module-wide state.
-  global _gtest_flags_are_parsed  # pylint: disable=W0603
-  if _gtest_flags_are_parsed:
-    return
-
-  _gtest_flags_are_parsed = True
-  for flag in _flag_map:
-    # The environment variable overrides the default value.
-    if flag.upper() in os.environ:
-      _flag_map[flag] = os.environ[flag.upper()]
-
-    # The command line flag overrides the environment variable.
-    i = 1  # Skips the program name.
-    while i < len(argv):
-      prefix = '--' + flag + '='
-      if argv[i].startswith(prefix):
-        _flag_map[flag] = argv[i][len(prefix):]
-        del argv[i]
-        break
-      else:
-        # We don't increment i in case we just found a --gtest_* flag
-        # and removed it from argv.
-        i += 1
-
-
-def GetFlag(flag):
-  """Returns the value of the given flag."""
-
-  # In case GetFlag() is called before Main(), we always call
-  # _ParseAndStripGTestFlags() here to make sure the --gtest_* flags
-  # are parsed.
-  _ParseAndStripGTestFlags(sys.argv)
-
-  return _flag_map[flag]
-
-
-def GetSourceDir():
-  """Returns the absolute path of the directory where the .py files are."""
-
-  return os.path.abspath(GetFlag('source_dir'))
-
-
-def GetBuildDir():
-  """Returns the absolute path of the directory where the test binaries are."""
-
-  return os.path.abspath(GetFlag('build_dir'))
-
-
-_temp_dir = None
-
-def _RemoveTempDir():
-  if _temp_dir:
-    shutil.rmtree(_temp_dir, ignore_errors=True)
-
-atexit.register(_RemoveTempDir)
-
-
-def GetTempDir():
-  global _temp_dir
-  if not _temp_dir:
-    _temp_dir = tempfile.mkdtemp()
-  return _temp_dir
-
-
-def GetTestExecutablePath(executable_name, build_dir=None):
-  """Returns the absolute path of the test binary given its name.
-
-  The function will print a message and abort the program if the resulting file
-  doesn't exist.
-
-  Args:
-    executable_name: name of the test binary that the test script runs.
-    build_dir:       directory where to look for executables, by default
-                     the result of GetBuildDir().
-
-  Returns:
-    The absolute path of the test binary.
-  """
-
-  path = os.path.abspath(os.path.join(build_dir or GetBuildDir(),
-                                      executable_name))
-  if (IS_WINDOWS or IS_CYGWIN or IS_OS2) and not path.endswith('.exe'):
-    path += '.exe'
-
-  if not os.path.exists(path):
-    message = (
-        'Unable to find the test binary "%s". Please make sure to provide\n'
-        'a path to the binary via the --build_dir flag or the BUILD_DIR\n'
-        'environment variable.' % path)
-    print(message, file=sys.stderr)
-    sys.exit(1)
-
-  return path
-
-
-def GetExitStatus(exit_code):
-  """Returns the argument to exit(), or -1 if exit() wasn't called.
-
-  Args:
-    exit_code: the result value of os.system(command).
-  """
-
-  if os.name == 'nt':
-    # On Windows, os.WEXITSTATUS() doesn't work and os.system() returns
-    # the argument to exit() directly.
-    return exit_code
-  else:
-    # On Unix, os.WEXITSTATUS() must be used to extract the exit status
-    # from the result of os.system().
-    if os.WIFEXITED(exit_code):
-      return os.WEXITSTATUS(exit_code)
-    else:
-      return -1
-
-
-class Subprocess:
-  def __init__(self, command, working_dir=None, capture_stderr=True, env=None):
-    """Changes into a specified directory, if provided, and executes a command.
-
-    Restores the old directory afterwards.
-
-    Args:
-      command:        The command to run, in the form of sys.argv.
-      working_dir:    The directory to change into.
-      capture_stderr: Determines whether to capture stderr in the output member
-                      or to discard it.
-      env:            Dictionary with environment to pass to the subprocess.
-
-    Returns:
-      An object that represents outcome of the executed process. It has the
-      following attributes:
-        terminated_by_signal   True if and only if the child process has been
-                               terminated by a signal.
-        exited                 True if and only if the child process exited
-                               normally.
-        exit_code              The code with which the child process exited.
-        output                 Child process's stdout and stderr output
-                               combined in a string.
-    """
-
-    if capture_stderr:
-      stderr = subprocess.STDOUT
-    else:
-      stderr = subprocess.PIPE
-
-    p = subprocess.Popen(command,
-                         stdout=subprocess.PIPE, stderr=stderr,
-                         cwd=working_dir, universal_newlines=True, env=env)
-    # communicate returns a tuple with the file object for the child's
-    # output.
-    self.output = p.communicate()[0]
-    self._return_code = p.returncode
-
-    if bool(self._return_code & 0x80000000):
-      self.terminated_by_signal = True
-      self.exited = False
-    else:
-      self.terminated_by_signal = False
-      self.exited = True
-      self.exit_code = self._return_code
-
-
-def Main():
-  """Runs the unit test."""
-
-  # We must call _ParseAndStripGTestFlags() before calling
-  # unittest.main().  Otherwise the latter will be confused by the
-  # --gtest_* flags.
-  _ParseAndStripGTestFlags(sys.argv)
-  # The tested binaries should not be writing XML output files unless the
-  # script explicitly instructs them to.
-  if GTEST_OUTPUT_VAR_NAME in os.environ:
-    del os.environ[GTEST_OUTPUT_VAR_NAME]
-
-  _test_module.main()
diff --git a/3rdparty/googletest-1.13.0/googletest/test/gtest_testbridge_test.py b/3rdparty/googletest-1.13.0/googletest/test/gtest_testbridge_test.py
deleted file mode 100644
index 1c2a303a8887a5c2904044807958c18cba675be4..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/test/gtest_testbridge_test.py
+++ /dev/null
@@ -1,63 +0,0 @@
-#!/usr/bin/env python
-#
-# Copyright 2018 Google LLC. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met:
-#
-#     * Redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer.
-#     * Redistributions in binary form must reproduce the above
-# copyright notice, this list of conditions and the following disclaimer
-# in the documentation and/or other materials provided with the
-# distribution.
-#     * Neither the name of Google Inc. nor the names of its
-# contributors may be used to endorse or promote products derived from
-# this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-"""Verifies that Google Test uses filter provided via testbridge."""
-
-import os
-
-from googletest.test import gtest_test_utils
-
-binary_name = 'gtest_testbridge_test_'
-COMMAND = gtest_test_utils.GetTestExecutablePath(binary_name)
-TESTBRIDGE_NAME = 'TESTBRIDGE_TEST_ONLY'
-
-
-def Assert(condition):
-  if not condition:
-    raise AssertionError
-
-
-class GTestTestFilterTest(gtest_test_utils.TestCase):
-
-  def testTestExecutionIsFiltered(self):
-    """Tests that the test filter is picked up from the testbridge env var."""
-    subprocess_env = os.environ.copy()
-
-    subprocess_env[TESTBRIDGE_NAME] = '*.TestThatSucceeds'
-    p = gtest_test_utils.Subprocess(COMMAND, env=subprocess_env)
-
-    self.assertEquals(0, p.exit_code)
-
-    Assert('filter = *.TestThatSucceeds' in p.output)
-    Assert('[       OK ] TestFilterTest.TestThatSucceeds' in p.output)
-    Assert('[  PASSED  ] 1 test.' in p.output)
-
-
-if __name__ == '__main__':
-  gtest_test_utils.Main()
diff --git a/3rdparty/googletest-1.13.0/googletest/test/gtest_testbridge_test_.cc b/3rdparty/googletest-1.13.0/googletest/test/gtest_testbridge_test_.cc
deleted file mode 100644
index c2c000dca50d07b065e3b24d2aa6738e6e9af98f..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/test/gtest_testbridge_test_.cc
+++ /dev/null
@@ -1,42 +0,0 @@
-// Copyright 2018, Google LLC.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// This program is meant to be run by gtest_test_filter_test.py.  Do not run
-// it directly.
-
-#include "gtest/gtest.h"
-
-// These tests are used to detect if filtering is working. Only
-// 'TestThatSucceeds' should ever run.
-
-TEST(TestFilterTest, TestThatSucceeds) {}
-
-TEST(TestFilterTest, TestThatFails) {
-  ASSERT_TRUE(false) << "This test should never be run.";
-}
diff --git a/3rdparty/googletest-1.13.0/googletest/test/gtest_throw_on_failure_ex_test.cc b/3rdparty/googletest-1.13.0/googletest/test/gtest_throw_on_failure_ex_test.cc
deleted file mode 100644
index 25d7c797ed4c249da9e471c940178234ddaa1dd5..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/test/gtest_throw_on_failure_ex_test.cc
+++ /dev/null
@@ -1,90 +0,0 @@
-// Copyright 2009, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// Tests Google Test's throw-on-failure mode with exceptions enabled.
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include <stdexcept>
-
-#include "gtest/gtest.h"
-
-// Prints the given failure message and exits the program with
-// non-zero.  We use this instead of a Google Test assertion to
-// indicate a failure, as the latter is been tested and cannot be
-// relied on.
-void Fail(const char* msg) {
-  printf("FAILURE: %s\n", msg);
-  fflush(stdout);
-  exit(1);
-}
-
-// Tests that an assertion failure throws a subclass of
-// std::runtime_error.
-void TestFailureThrowsRuntimeError() {
-  GTEST_FLAG_SET(throw_on_failure, true);
-
-  // A successful assertion shouldn't throw.
-  try {
-    EXPECT_EQ(3, 3);
-  } catch (...) {
-    Fail("A successful assertion wrongfully threw.");
-  }
-
-  // A failed assertion should throw a subclass of std::runtime_error.
-  try {
-    EXPECT_EQ(2, 3) << "Expected failure";
-  } catch (const std::runtime_error& e) {
-    if (strstr(e.what(), "Expected failure") != nullptr) return;
-
-    printf("%s",
-           "A failed assertion did throw an exception of the right type, "
-           "but the message is incorrect.  Instead of containing \"Expected "
-           "failure\", it is:\n");
-    Fail(e.what());
-  } catch (...) {
-    Fail("A failed assertion threw the wrong type of exception.");
-  }
-  Fail("A failed assertion should've thrown but didn't.");
-}
-
-int main(int argc, char** argv) {
-  testing::InitGoogleTest(&argc, argv);
-
-  // We want to ensure that people can use Google Test assertions in
-  // other testing frameworks, as long as they initialize Google Test
-  // properly and set the thrown-on-failure mode.  Therefore, we don't
-  // use Google Test's constructs for defining and running tests
-  // (e.g. TEST and RUN_ALL_TESTS) here.
-
-  TestFailureThrowsRuntimeError();
-  return 0;
-}
diff --git a/3rdparty/googletest-1.13.0/googletest/test/gtest_unittest.cc b/3rdparty/googletest-1.13.0/googletest/test/gtest_unittest.cc
deleted file mode 100644
index e66e66b95ed47566d67fd85cf3053a7e089b1995..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/test/gtest_unittest.cc
+++ /dev/null
@@ -1,7756 +0,0 @@
-// Copyright 2005, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-//
-// Tests for Google Test itself.  This verifies that the basic constructs of
-// Google Test work.
-
-#include "gtest/gtest.h"
-
-// Verifies that the command line flag variables can be accessed in
-// code once "gtest.h" has been #included.
-// Do not move it after other gtest #includes.
-TEST(CommandLineFlagsTest, CanBeAccessedInCodeOnceGTestHIsIncluded) {
-  bool dummy =
-      GTEST_FLAG_GET(also_run_disabled_tests) ||
-      GTEST_FLAG_GET(break_on_failure) || GTEST_FLAG_GET(catch_exceptions) ||
-      GTEST_FLAG_GET(color) != "unknown" || GTEST_FLAG_GET(fail_fast) ||
-      GTEST_FLAG_GET(filter) != "unknown" || GTEST_FLAG_GET(list_tests) ||
-      GTEST_FLAG_GET(output) != "unknown" || GTEST_FLAG_GET(brief) ||
-      GTEST_FLAG_GET(print_time) || GTEST_FLAG_GET(random_seed) ||
-      GTEST_FLAG_GET(repeat) > 0 ||
-      GTEST_FLAG_GET(recreate_environments_when_repeating) ||
-      GTEST_FLAG_GET(show_internal_stack_frames) || GTEST_FLAG_GET(shuffle) ||
-      GTEST_FLAG_GET(stack_trace_depth) > 0 ||
-      GTEST_FLAG_GET(stream_result_to) != "unknown" ||
-      GTEST_FLAG_GET(throw_on_failure);
-  EXPECT_TRUE(dummy || !dummy);  // Suppresses warning that dummy is unused.
-}
-
-#include <limits.h>  // For INT_MAX.
-#include <stdlib.h>
-#include <string.h>
-#include <time.h>
-
-#include <cstdint>
-#include <map>
-#include <ostream>
-#include <set>
-#include <string>
-#include <type_traits>
-#include <unordered_set>
-#include <vector>
-
-#include "gtest/gtest-spi.h"
-#include "src/gtest-internal-inl.h"
-
-namespace testing {
-namespace internal {
-
-#if GTEST_CAN_STREAM_RESULTS_
-
-class StreamingListenerTest : public Test {
- public:
-  class FakeSocketWriter : public StreamingListener::AbstractSocketWriter {
-   public:
-    // Sends a string to the socket.
-    void Send(const std::string& message) override { output_ += message; }
-
-    std::string output_;
-  };
-
-  StreamingListenerTest()
-      : fake_sock_writer_(new FakeSocketWriter),
-        streamer_(fake_sock_writer_),
-        test_info_obj_("FooTest", "Bar", nullptr, nullptr,
-                       CodeLocation(__FILE__, __LINE__), nullptr, nullptr) {}
-
- protected:
-  std::string* output() { return &(fake_sock_writer_->output_); }
-
-  FakeSocketWriter* const fake_sock_writer_;
-  StreamingListener streamer_;
-  UnitTest unit_test_;
-  TestInfo test_info_obj_;  // The name test_info_ was taken by testing::Test.
-};
-
-TEST_F(StreamingListenerTest, OnTestProgramEnd) {
-  *output() = "";
-  streamer_.OnTestProgramEnd(unit_test_);
-  EXPECT_EQ("event=TestProgramEnd&passed=1\n", *output());
-}
-
-TEST_F(StreamingListenerTest, OnTestIterationEnd) {
-  *output() = "";
-  streamer_.OnTestIterationEnd(unit_test_, 42);
-  EXPECT_EQ("event=TestIterationEnd&passed=1&elapsed_time=0ms\n", *output());
-}
-
-TEST_F(StreamingListenerTest, OnTestSuiteStart) {
-  *output() = "";
-  streamer_.OnTestSuiteStart(TestSuite("FooTest", "Bar", nullptr, nullptr));
-  EXPECT_EQ("event=TestCaseStart&name=FooTest\n", *output());
-}
-
-TEST_F(StreamingListenerTest, OnTestSuiteEnd) {
-  *output() = "";
-  streamer_.OnTestSuiteEnd(TestSuite("FooTest", "Bar", nullptr, nullptr));
-  EXPECT_EQ("event=TestCaseEnd&passed=1&elapsed_time=0ms\n", *output());
-}
-
-TEST_F(StreamingListenerTest, OnTestStart) {
-  *output() = "";
-  streamer_.OnTestStart(test_info_obj_);
-  EXPECT_EQ("event=TestStart&name=Bar\n", *output());
-}
-
-TEST_F(StreamingListenerTest, OnTestEnd) {
-  *output() = "";
-  streamer_.OnTestEnd(test_info_obj_);
-  EXPECT_EQ("event=TestEnd&passed=1&elapsed_time=0ms\n", *output());
-}
-
-TEST_F(StreamingListenerTest, OnTestPartResult) {
-  *output() = "";
-  streamer_.OnTestPartResult(TestPartResult(TestPartResult::kFatalFailure,
-                                            "foo.cc", 42, "failed=\n&%"));
-
-  // Meta characters in the failure message should be properly escaped.
-  EXPECT_EQ(
-      "event=TestPartResult&file=foo.cc&line=42&message=failed%3D%0A%26%25\n",
-      *output());
-}
-
-#endif  // GTEST_CAN_STREAM_RESULTS_
-
-// Provides access to otherwise private parts of the TestEventListeners class
-// that are needed to test it.
-class TestEventListenersAccessor {
- public:
-  static TestEventListener* GetRepeater(TestEventListeners* listeners) {
-    return listeners->repeater();
-  }
-
-  static void SetDefaultResultPrinter(TestEventListeners* listeners,
-                                      TestEventListener* listener) {
-    listeners->SetDefaultResultPrinter(listener);
-  }
-  static void SetDefaultXmlGenerator(TestEventListeners* listeners,
-                                     TestEventListener* listener) {
-    listeners->SetDefaultXmlGenerator(listener);
-  }
-
-  static bool EventForwardingEnabled(const TestEventListeners& listeners) {
-    return listeners.EventForwardingEnabled();
-  }
-
-  static void SuppressEventForwarding(TestEventListeners* listeners) {
-    listeners->SuppressEventForwarding();
-  }
-};
-
-class UnitTestRecordPropertyTestHelper : public Test {
- protected:
-  UnitTestRecordPropertyTestHelper() {}
-
-  // Forwards to UnitTest::RecordProperty() to bypass access controls.
-  void UnitTestRecordProperty(const char* key, const std::string& value) {
-    unit_test_.RecordProperty(key, value);
-  }
-
-  UnitTest unit_test_;
-};
-
-}  // namespace internal
-}  // namespace testing
-
-using testing::AssertionFailure;
-using testing::AssertionResult;
-using testing::AssertionSuccess;
-using testing::DoubleLE;
-using testing::EmptyTestEventListener;
-using testing::Environment;
-using testing::FloatLE;
-using testing::IsNotSubstring;
-using testing::IsSubstring;
-using testing::kMaxStackTraceDepth;
-using testing::Message;
-using testing::ScopedFakeTestPartResultReporter;
-using testing::StaticAssertTypeEq;
-using testing::Test;
-using testing::TestEventListeners;
-using testing::TestInfo;
-using testing::TestPartResult;
-using testing::TestPartResultArray;
-using testing::TestProperty;
-using testing::TestResult;
-using testing::TestSuite;
-using testing::TimeInMillis;
-using testing::UnitTest;
-using testing::internal::AlwaysFalse;
-using testing::internal::AlwaysTrue;
-using testing::internal::AppendUserMessage;
-using testing::internal::ArrayAwareFind;
-using testing::internal::ArrayEq;
-using testing::internal::CodePointToUtf8;
-using testing::internal::CopyArray;
-using testing::internal::CountIf;
-using testing::internal::EqFailure;
-using testing::internal::FloatingPoint;
-using testing::internal::ForEach;
-using testing::internal::FormatEpochTimeInMillisAsIso8601;
-using testing::internal::FormatTimeInMillisAsSeconds;
-using testing::internal::GetCurrentOsStackTraceExceptTop;
-using testing::internal::GetElementOr;
-using testing::internal::GetNextRandomSeed;
-using testing::internal::GetRandomSeedFromFlag;
-using testing::internal::GetTestTypeId;
-using testing::internal::GetTimeInMillis;
-using testing::internal::GetTypeId;
-using testing::internal::GetUnitTestImpl;
-using testing::internal::GTestFlagSaver;
-using testing::internal::HasDebugStringAndShortDebugString;
-using testing::internal::Int32FromEnvOrDie;
-using testing::internal::IsContainer;
-using testing::internal::IsContainerTest;
-using testing::internal::IsNotContainer;
-using testing::internal::kMaxRandomSeed;
-using testing::internal::kTestTypeIdInGoogleTest;
-using testing::internal::NativeArray;
-using testing::internal::OsStackTraceGetter;
-using testing::internal::OsStackTraceGetterInterface;
-using testing::internal::ParseFlag;
-using testing::internal::RelationToSourceCopy;
-using testing::internal::RelationToSourceReference;
-using testing::internal::ShouldRunTestOnShard;
-using testing::internal::ShouldShard;
-using testing::internal::ShouldUseColor;
-using testing::internal::Shuffle;
-using testing::internal::ShuffleRange;
-using testing::internal::SkipPrefix;
-using testing::internal::StreamableToString;
-using testing::internal::String;
-using testing::internal::TestEventListenersAccessor;
-using testing::internal::TestResultAccessor;
-using testing::internal::UnitTestImpl;
-using testing::internal::WideStringToUtf8;
-using testing::internal::edit_distance::CalculateOptimalEdits;
-using testing::internal::edit_distance::CreateUnifiedDiff;
-using testing::internal::edit_distance::EditType;
-
-#if GTEST_HAS_STREAM_REDIRECTION
-using testing::internal::CaptureStdout;
-using testing::internal::GetCapturedStdout;
-#endif
-
-#if GTEST_IS_THREADSAFE
-using testing::internal::ThreadWithParam;
-#endif
-
-class TestingVector : public std::vector<int> {};
-
-::std::ostream& operator<<(::std::ostream& os, const TestingVector& vector) {
-  os << "{ ";
-  for (size_t i = 0; i < vector.size(); i++) {
-    os << vector[i] << " ";
-  }
-  os << "}";
-  return os;
-}
-
-// This line tests that we can define tests in an unnamed namespace.
-namespace {
-
-TEST(GetRandomSeedFromFlagTest, HandlesZero) {
-  const int seed = GetRandomSeedFromFlag(0);
-  EXPECT_LE(1, seed);
-  EXPECT_LE(seed, static_cast<int>(kMaxRandomSeed));
-}
-
-TEST(GetRandomSeedFromFlagTest, PreservesValidSeed) {
-  EXPECT_EQ(1, GetRandomSeedFromFlag(1));
-  EXPECT_EQ(2, GetRandomSeedFromFlag(2));
-  EXPECT_EQ(kMaxRandomSeed - 1, GetRandomSeedFromFlag(kMaxRandomSeed - 1));
-  EXPECT_EQ(static_cast<int>(kMaxRandomSeed),
-            GetRandomSeedFromFlag(kMaxRandomSeed));
-}
-
-TEST(GetRandomSeedFromFlagTest, NormalizesInvalidSeed) {
-  const int seed1 = GetRandomSeedFromFlag(-1);
-  EXPECT_LE(1, seed1);
-  EXPECT_LE(seed1, static_cast<int>(kMaxRandomSeed));
-
-  const int seed2 = GetRandomSeedFromFlag(kMaxRandomSeed + 1);
-  EXPECT_LE(1, seed2);
-  EXPECT_LE(seed2, static_cast<int>(kMaxRandomSeed));
-}
-
-TEST(GetNextRandomSeedTest, WorksForValidInput) {
-  EXPECT_EQ(2, GetNextRandomSeed(1));
-  EXPECT_EQ(3, GetNextRandomSeed(2));
-  EXPECT_EQ(static_cast<int>(kMaxRandomSeed),
-            GetNextRandomSeed(kMaxRandomSeed - 1));
-  EXPECT_EQ(1, GetNextRandomSeed(kMaxRandomSeed));
-
-  // We deliberately don't test GetNextRandomSeed() with invalid
-  // inputs, as that requires death tests, which are expensive.  This
-  // is fine as GetNextRandomSeed() is internal and has a
-  // straightforward definition.
-}
-
-static void ClearCurrentTestPartResults() {
-  TestResultAccessor::ClearTestPartResults(
-      GetUnitTestImpl()->current_test_result());
-}
-
-// Tests GetTypeId.
-
-TEST(GetTypeIdTest, ReturnsSameValueForSameType) {
-  EXPECT_EQ(GetTypeId<int>(), GetTypeId<int>());
-  EXPECT_EQ(GetTypeId<Test>(), GetTypeId<Test>());
-}
-
-class SubClassOfTest : public Test {};
-class AnotherSubClassOfTest : public Test {};
-
-TEST(GetTypeIdTest, ReturnsDifferentValuesForDifferentTypes) {
-  EXPECT_NE(GetTypeId<int>(), GetTypeId<const int>());
-  EXPECT_NE(GetTypeId<int>(), GetTypeId<char>());
-  EXPECT_NE(GetTypeId<int>(), GetTestTypeId());
-  EXPECT_NE(GetTypeId<SubClassOfTest>(), GetTestTypeId());
-  EXPECT_NE(GetTypeId<AnotherSubClassOfTest>(), GetTestTypeId());
-  EXPECT_NE(GetTypeId<AnotherSubClassOfTest>(), GetTypeId<SubClassOfTest>());
-}
-
-// Verifies that GetTestTypeId() returns the same value, no matter it
-// is called from inside Google Test or outside of it.
-TEST(GetTestTypeIdTest, ReturnsTheSameValueInsideOrOutsideOfGoogleTest) {
-  EXPECT_EQ(kTestTypeIdInGoogleTest, GetTestTypeId());
-}
-
-// Tests CanonicalizeForStdLibVersioning.
-
-using ::testing::internal::CanonicalizeForStdLibVersioning;
-
-TEST(CanonicalizeForStdLibVersioning, LeavesUnversionedNamesUnchanged) {
-  EXPECT_EQ("std::bind", CanonicalizeForStdLibVersioning("std::bind"));
-  EXPECT_EQ("std::_", CanonicalizeForStdLibVersioning("std::_"));
-  EXPECT_EQ("std::__foo", CanonicalizeForStdLibVersioning("std::__foo"));
-  EXPECT_EQ("gtl::__1::x", CanonicalizeForStdLibVersioning("gtl::__1::x"));
-  EXPECT_EQ("__1::x", CanonicalizeForStdLibVersioning("__1::x"));
-  EXPECT_EQ("::__1::x", CanonicalizeForStdLibVersioning("::__1::x"));
-}
-
-TEST(CanonicalizeForStdLibVersioning, ElidesDoubleUnderNames) {
-  EXPECT_EQ("std::bind", CanonicalizeForStdLibVersioning("std::__1::bind"));
-  EXPECT_EQ("std::_", CanonicalizeForStdLibVersioning("std::__1::_"));
-
-  EXPECT_EQ("std::bind", CanonicalizeForStdLibVersioning("std::__g::bind"));
-  EXPECT_EQ("std::_", CanonicalizeForStdLibVersioning("std::__g::_"));
-
-  EXPECT_EQ("std::bind",
-            CanonicalizeForStdLibVersioning("std::__google::bind"));
-  EXPECT_EQ("std::_", CanonicalizeForStdLibVersioning("std::__google::_"));
-}
-
-// Tests FormatTimeInMillisAsSeconds().
-
-TEST(FormatTimeInMillisAsSecondsTest, FormatsZero) {
-  EXPECT_EQ("0", FormatTimeInMillisAsSeconds(0));
-}
-
-TEST(FormatTimeInMillisAsSecondsTest, FormatsPositiveNumber) {
-  EXPECT_EQ("0.003", FormatTimeInMillisAsSeconds(3));
-  EXPECT_EQ("0.01", FormatTimeInMillisAsSeconds(10));
-  EXPECT_EQ("0.2", FormatTimeInMillisAsSeconds(200));
-  EXPECT_EQ("1.2", FormatTimeInMillisAsSeconds(1200));
-  EXPECT_EQ("3", FormatTimeInMillisAsSeconds(3000));
-}
-
-TEST(FormatTimeInMillisAsSecondsTest, FormatsNegativeNumber) {
-  EXPECT_EQ("-0.003", FormatTimeInMillisAsSeconds(-3));
-  EXPECT_EQ("-0.01", FormatTimeInMillisAsSeconds(-10));
-  EXPECT_EQ("-0.2", FormatTimeInMillisAsSeconds(-200));
-  EXPECT_EQ("-1.2", FormatTimeInMillisAsSeconds(-1200));
-  EXPECT_EQ("-3", FormatTimeInMillisAsSeconds(-3000));
-}
-
-// Tests FormatEpochTimeInMillisAsIso8601().  The correctness of conversion
-// for particular dates below was verified in Python using
-// datetime.datetime.fromutctimestamp(<timestamp>/1000).
-
-// FormatEpochTimeInMillisAsIso8601 depends on the current timezone, so we
-// have to set up a particular timezone to obtain predictable results.
-class FormatEpochTimeInMillisAsIso8601Test : public Test {
- public:
-  // On Cygwin, GCC doesn't allow unqualified integer literals to exceed
-  // 32 bits, even when 64-bit integer types are available.  We have to
-  // force the constants to have a 64-bit type here.
-  static const TimeInMillis kMillisPerSec = 1000;
-
- private:
-  void SetUp() override {
-    saved_tz_ = nullptr;
-
-    GTEST_DISABLE_MSC_DEPRECATED_PUSH_(/* getenv, strdup: deprecated */)
-    if (getenv("TZ")) saved_tz_ = strdup(getenv("TZ"));
-    GTEST_DISABLE_MSC_DEPRECATED_POP_()
-
-    // Set up the time zone for FormatEpochTimeInMillisAsIso8601 to use.  We
-    // cannot use the local time zone because the function's output depends
-    // on the time zone.
-    SetTimeZone("UTC+00");
-  }
-
-  void TearDown() override {
-    SetTimeZone(saved_tz_);
-    free(const_cast<char*>(saved_tz_));
-    saved_tz_ = nullptr;
-  }
-
-  static void SetTimeZone(const char* time_zone) {
-    // tzset() distinguishes between the TZ variable being present and empty
-    // and not being present, so we have to consider the case of time_zone
-    // being NULL.
-#if _MSC_VER || GTEST_OS_WINDOWS_MINGW
-    // ...Unless it's MSVC, whose standard library's _putenv doesn't
-    // distinguish between an empty and a missing variable.
-    const std::string env_var =
-        std::string("TZ=") + (time_zone ? time_zone : "");
-    _putenv(env_var.c_str());
-    GTEST_DISABLE_MSC_WARNINGS_PUSH_(4996 /* deprecated function */)
-    tzset();
-    GTEST_DISABLE_MSC_WARNINGS_POP_()
-#else
-#if GTEST_OS_LINUX_ANDROID && __ANDROID_API__ < 21
-    // Work around KitKat bug in tzset by setting "UTC" before setting "UTC+00".
-    // See https://github.com/android/ndk/issues/1604.
-    setenv("TZ", "UTC", 1);
-    tzset();
-#endif
-    if (time_zone) {
-      setenv(("TZ"), time_zone, 1);
-    } else {
-      unsetenv("TZ");
-    }
-    tzset();
-#endif
-  }
-
-  const char* saved_tz_;
-};
-
-const TimeInMillis FormatEpochTimeInMillisAsIso8601Test::kMillisPerSec;
-
-TEST_F(FormatEpochTimeInMillisAsIso8601Test, PrintsTwoDigitSegments) {
-  EXPECT_EQ("2011-10-31T18:52:42.000",
-            FormatEpochTimeInMillisAsIso8601(1320087162 * kMillisPerSec));
-}
-
-TEST_F(FormatEpochTimeInMillisAsIso8601Test, IncludesMillisecondsAfterDot) {
-  EXPECT_EQ("2011-10-31T18:52:42.234",
-            FormatEpochTimeInMillisAsIso8601(1320087162 * kMillisPerSec + 234));
-}
-
-TEST_F(FormatEpochTimeInMillisAsIso8601Test, PrintsLeadingZeroes) {
-  EXPECT_EQ("2011-09-03T05:07:02.000",
-            FormatEpochTimeInMillisAsIso8601(1315026422 * kMillisPerSec));
-}
-
-TEST_F(FormatEpochTimeInMillisAsIso8601Test, Prints24HourTime) {
-  EXPECT_EQ("2011-09-28T17:08:22.000",
-            FormatEpochTimeInMillisAsIso8601(1317229702 * kMillisPerSec));
-}
-
-TEST_F(FormatEpochTimeInMillisAsIso8601Test, PrintsEpochStart) {
-  EXPECT_EQ("1970-01-01T00:00:00.000", FormatEpochTimeInMillisAsIso8601(0));
-}
-
-#ifdef __BORLANDC__
-// Silences warnings: "Condition is always true", "Unreachable code"
-#pragma option push -w-ccc -w-rch
-#endif
-
-// Tests that the LHS of EXPECT_EQ or ASSERT_EQ can be used as a null literal
-// when the RHS is a pointer type.
-TEST(NullLiteralTest, LHSAllowsNullLiterals) {
-  EXPECT_EQ(0, static_cast<void*>(nullptr));     // NOLINT
-  ASSERT_EQ(0, static_cast<void*>(nullptr));     // NOLINT
-  EXPECT_EQ(NULL, static_cast<void*>(nullptr));  // NOLINT
-  ASSERT_EQ(NULL, static_cast<void*>(nullptr));  // NOLINT
-  EXPECT_EQ(nullptr, static_cast<void*>(nullptr));
-  ASSERT_EQ(nullptr, static_cast<void*>(nullptr));
-
-  const int* const p = nullptr;
-  EXPECT_EQ(0, p);     // NOLINT
-  ASSERT_EQ(0, p);     // NOLINT
-  EXPECT_EQ(NULL, p);  // NOLINT
-  ASSERT_EQ(NULL, p);  // NOLINT
-  EXPECT_EQ(nullptr, p);
-  ASSERT_EQ(nullptr, p);
-}
-
-struct ConvertToAll {
-  template <typename T>
-  operator T() const {  // NOLINT
-    return T();
-  }
-};
-
-struct ConvertToPointer {
-  template <class T>
-  operator T*() const {  // NOLINT
-    return nullptr;
-  }
-};
-
-struct ConvertToAllButNoPointers {
-  template <typename T,
-            typename std::enable_if<!std::is_pointer<T>::value, int>::type = 0>
-  operator T() const {  // NOLINT
-    return T();
-  }
-};
-
-struct MyType {};
-inline bool operator==(MyType const&, MyType const&) { return true; }
-
-TEST(NullLiteralTest, ImplicitConversion) {
-  EXPECT_EQ(ConvertToPointer{}, static_cast<void*>(nullptr));
-#if !defined(__GNUC__) || defined(__clang__)
-  // Disabled due to GCC bug gcc.gnu.org/PR89580
-  EXPECT_EQ(ConvertToAll{}, static_cast<void*>(nullptr));
-#endif
-  EXPECT_EQ(ConvertToAll{}, MyType{});
-  EXPECT_EQ(ConvertToAllButNoPointers{}, MyType{});
-}
-
-#ifdef __clang__
-#pragma clang diagnostic push
-#if __has_warning("-Wzero-as-null-pointer-constant")
-#pragma clang diagnostic error "-Wzero-as-null-pointer-constant"
-#endif
-#endif
-
-TEST(NullLiteralTest, NoConversionNoWarning) {
-  // Test that gtests detection and handling of null pointer constants
-  // doesn't trigger a warning when '0' isn't actually used as null.
-  EXPECT_EQ(0, 0);
-  ASSERT_EQ(0, 0);
-}
-
-#ifdef __clang__
-#pragma clang diagnostic pop
-#endif
-
-#ifdef __BORLANDC__
-// Restores warnings after previous "#pragma option push" suppressed them.
-#pragma option pop
-#endif
-
-//
-// Tests CodePointToUtf8().
-
-// Tests that the NUL character L'\0' is encoded correctly.
-TEST(CodePointToUtf8Test, CanEncodeNul) {
-  EXPECT_EQ("", CodePointToUtf8(L'\0'));
-}
-
-// Tests that ASCII characters are encoded correctly.
-TEST(CodePointToUtf8Test, CanEncodeAscii) {
-  EXPECT_EQ("a", CodePointToUtf8(L'a'));
-  EXPECT_EQ("Z", CodePointToUtf8(L'Z'));
-  EXPECT_EQ("&", CodePointToUtf8(L'&'));
-  EXPECT_EQ("\x7F", CodePointToUtf8(L'\x7F'));
-}
-
-// Tests that Unicode code-points that have 8 to 11 bits are encoded
-// as 110xxxxx 10xxxxxx.
-TEST(CodePointToUtf8Test, CanEncode8To11Bits) {
-  // 000 1101 0011 => 110-00011 10-010011
-  EXPECT_EQ("\xC3\x93", CodePointToUtf8(L'\xD3'));
-
-  // 101 0111 0110 => 110-10101 10-110110
-  // Some compilers (e.g., GCC on MinGW) cannot handle non-ASCII codepoints
-  // in wide strings and wide chars. In order to accommodate them, we have to
-  // introduce such character constants as integers.
-  EXPECT_EQ("\xD5\xB6", CodePointToUtf8(static_cast<wchar_t>(0x576)));
-}
-
-// Tests that Unicode code-points that have 12 to 16 bits are encoded
-// as 1110xxxx 10xxxxxx 10xxxxxx.
-TEST(CodePointToUtf8Test, CanEncode12To16Bits) {
-  // 0000 1000 1101 0011 => 1110-0000 10-100011 10-010011
-  EXPECT_EQ("\xE0\xA3\x93", CodePointToUtf8(static_cast<wchar_t>(0x8D3)));
-
-  // 1100 0111 0100 1101 => 1110-1100 10-011101 10-001101
-  EXPECT_EQ("\xEC\x9D\x8D", CodePointToUtf8(static_cast<wchar_t>(0xC74D)));
-}
-
-#if !GTEST_WIDE_STRING_USES_UTF16_
-// Tests in this group require a wchar_t to hold > 16 bits, and thus
-// are skipped on Windows, and Cygwin, where a wchar_t is
-// 16-bit wide. This code may not compile on those systems.
-
-// Tests that Unicode code-points that have 17 to 21 bits are encoded
-// as 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx.
-TEST(CodePointToUtf8Test, CanEncode17To21Bits) {
-  // 0 0001 0000 1000 1101 0011 => 11110-000 10-010000 10-100011 10-010011
-  EXPECT_EQ("\xF0\x90\xA3\x93", CodePointToUtf8(L'\x108D3'));
-
-  // 0 0001 0000 0100 0000 0000 => 11110-000 10-010000 10-010000 10-000000
-  EXPECT_EQ("\xF0\x90\x90\x80", CodePointToUtf8(L'\x10400'));
-
-  // 1 0000 1000 0110 0011 0100 => 11110-100 10-001000 10-011000 10-110100
-  EXPECT_EQ("\xF4\x88\x98\xB4", CodePointToUtf8(L'\x108634'));
-}
-
-// Tests that encoding an invalid code-point generates the expected result.
-TEST(CodePointToUtf8Test, CanEncodeInvalidCodePoint) {
-  EXPECT_EQ("(Invalid Unicode 0x1234ABCD)", CodePointToUtf8(L'\x1234ABCD'));
-}
-
-#endif  // !GTEST_WIDE_STRING_USES_UTF16_
-
-// Tests WideStringToUtf8().
-
-// Tests that the NUL character L'\0' is encoded correctly.
-TEST(WideStringToUtf8Test, CanEncodeNul) {
-  EXPECT_STREQ("", WideStringToUtf8(L"", 0).c_str());
-  EXPECT_STREQ("", WideStringToUtf8(L"", -1).c_str());
-}
-
-// Tests that ASCII strings are encoded correctly.
-TEST(WideStringToUtf8Test, CanEncodeAscii) {
-  EXPECT_STREQ("a", WideStringToUtf8(L"a", 1).c_str());
-  EXPECT_STREQ("ab", WideStringToUtf8(L"ab", 2).c_str());
-  EXPECT_STREQ("a", WideStringToUtf8(L"a", -1).c_str());
-  EXPECT_STREQ("ab", WideStringToUtf8(L"ab", -1).c_str());
-}
-
-// Tests that Unicode code-points that have 8 to 11 bits are encoded
-// as 110xxxxx 10xxxxxx.
-TEST(WideStringToUtf8Test, CanEncode8To11Bits) {
-  // 000 1101 0011 => 110-00011 10-010011
-  EXPECT_STREQ("\xC3\x93", WideStringToUtf8(L"\xD3", 1).c_str());
-  EXPECT_STREQ("\xC3\x93", WideStringToUtf8(L"\xD3", -1).c_str());
-
-  // 101 0111 0110 => 110-10101 10-110110
-  const wchar_t s[] = {0x576, '\0'};
-  EXPECT_STREQ("\xD5\xB6", WideStringToUtf8(s, 1).c_str());
-  EXPECT_STREQ("\xD5\xB6", WideStringToUtf8(s, -1).c_str());
-}
-
-// Tests that Unicode code-points that have 12 to 16 bits are encoded
-// as 1110xxxx 10xxxxxx 10xxxxxx.
-TEST(WideStringToUtf8Test, CanEncode12To16Bits) {
-  // 0000 1000 1101 0011 => 1110-0000 10-100011 10-010011
-  const wchar_t s1[] = {0x8D3, '\0'};
-  EXPECT_STREQ("\xE0\xA3\x93", WideStringToUtf8(s1, 1).c_str());
-  EXPECT_STREQ("\xE0\xA3\x93", WideStringToUtf8(s1, -1).c_str());
-
-  // 1100 0111 0100 1101 => 1110-1100 10-011101 10-001101
-  const wchar_t s2[] = {0xC74D, '\0'};
-  EXPECT_STREQ("\xEC\x9D\x8D", WideStringToUtf8(s2, 1).c_str());
-  EXPECT_STREQ("\xEC\x9D\x8D", WideStringToUtf8(s2, -1).c_str());
-}
-
-// Tests that the conversion stops when the function encounters \0 character.
-TEST(WideStringToUtf8Test, StopsOnNulCharacter) {
-  EXPECT_STREQ("ABC", WideStringToUtf8(L"ABC\0XYZ", 100).c_str());
-}
-
-// Tests that the conversion stops when the function reaches the limit
-// specified by the 'length' parameter.
-TEST(WideStringToUtf8Test, StopsWhenLengthLimitReached) {
-  EXPECT_STREQ("ABC", WideStringToUtf8(L"ABCDEF", 3).c_str());
-}
-
-#if !GTEST_WIDE_STRING_USES_UTF16_
-// Tests that Unicode code-points that have 17 to 21 bits are encoded
-// as 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx. This code may not compile
-// on the systems using UTF-16 encoding.
-TEST(WideStringToUtf8Test, CanEncode17To21Bits) {
-  // 0 0001 0000 1000 1101 0011 => 11110-000 10-010000 10-100011 10-010011
-  EXPECT_STREQ("\xF0\x90\xA3\x93", WideStringToUtf8(L"\x108D3", 1).c_str());
-  EXPECT_STREQ("\xF0\x90\xA3\x93", WideStringToUtf8(L"\x108D3", -1).c_str());
-
-  // 1 0000 1000 0110 0011 0100 => 11110-100 10-001000 10-011000 10-110100
-  EXPECT_STREQ("\xF4\x88\x98\xB4", WideStringToUtf8(L"\x108634", 1).c_str());
-  EXPECT_STREQ("\xF4\x88\x98\xB4", WideStringToUtf8(L"\x108634", -1).c_str());
-}
-
-// Tests that encoding an invalid code-point generates the expected result.
-TEST(WideStringToUtf8Test, CanEncodeInvalidCodePoint) {
-  EXPECT_STREQ("(Invalid Unicode 0xABCDFF)",
-               WideStringToUtf8(L"\xABCDFF", -1).c_str());
-}
-#else   // !GTEST_WIDE_STRING_USES_UTF16_
-// Tests that surrogate pairs are encoded correctly on the systems using
-// UTF-16 encoding in the wide strings.
-TEST(WideStringToUtf8Test, CanEncodeValidUtf16SUrrogatePairs) {
-  const wchar_t s[] = {0xD801, 0xDC00, '\0'};
-  EXPECT_STREQ("\xF0\x90\x90\x80", WideStringToUtf8(s, -1).c_str());
-}
-
-// Tests that encoding an invalid UTF-16 surrogate pair
-// generates the expected result.
-TEST(WideStringToUtf8Test, CanEncodeInvalidUtf16SurrogatePair) {
-  // Leading surrogate is at the end of the string.
-  const wchar_t s1[] = {0xD800, '\0'};
-  EXPECT_STREQ("\xED\xA0\x80", WideStringToUtf8(s1, -1).c_str());
-  // Leading surrogate is not followed by the trailing surrogate.
-  const wchar_t s2[] = {0xD800, 'M', '\0'};
-  EXPECT_STREQ("\xED\xA0\x80M", WideStringToUtf8(s2, -1).c_str());
-  // Trailing surrogate appearas without a leading surrogate.
-  const wchar_t s3[] = {0xDC00, 'P', 'Q', 'R', '\0'};
-  EXPECT_STREQ("\xED\xB0\x80PQR", WideStringToUtf8(s3, -1).c_str());
-}
-#endif  // !GTEST_WIDE_STRING_USES_UTF16_
-
-// Tests that codepoint concatenation works correctly.
-#if !GTEST_WIDE_STRING_USES_UTF16_
-TEST(WideStringToUtf8Test, ConcatenatesCodepointsCorrectly) {
-  const wchar_t s[] = {0x108634, 0xC74D, '\n', 0x576, 0x8D3, 0x108634, '\0'};
-  EXPECT_STREQ(
-      "\xF4\x88\x98\xB4"
-      "\xEC\x9D\x8D"
-      "\n"
-      "\xD5\xB6"
-      "\xE0\xA3\x93"
-      "\xF4\x88\x98\xB4",
-      WideStringToUtf8(s, -1).c_str());
-}
-#else
-TEST(WideStringToUtf8Test, ConcatenatesCodepointsCorrectly) {
-  const wchar_t s[] = {0xC74D, '\n', 0x576, 0x8D3, '\0'};
-  EXPECT_STREQ(
-      "\xEC\x9D\x8D"
-      "\n"
-      "\xD5\xB6"
-      "\xE0\xA3\x93",
-      WideStringToUtf8(s, -1).c_str());
-}
-#endif  // !GTEST_WIDE_STRING_USES_UTF16_
-
-// Tests the Random class.
-
-TEST(RandomDeathTest, GeneratesCrashesOnInvalidRange) {
-  testing::internal::Random random(42);
-  EXPECT_DEATH_IF_SUPPORTED(random.Generate(0),
-                            "Cannot generate a number in the range \\[0, 0\\)");
-  EXPECT_DEATH_IF_SUPPORTED(
-      random.Generate(testing::internal::Random::kMaxRange + 1),
-      "Generation of a number in \\[0, 2147483649\\) was requested, "
-      "but this can only generate numbers in \\[0, 2147483648\\)");
-}
-
-TEST(RandomTest, GeneratesNumbersWithinRange) {
-  constexpr uint32_t kRange = 10000;
-  testing::internal::Random random(12345);
-  for (int i = 0; i < 10; i++) {
-    EXPECT_LT(random.Generate(kRange), kRange) << " for iteration " << i;
-  }
-
-  testing::internal::Random random2(testing::internal::Random::kMaxRange);
-  for (int i = 0; i < 10; i++) {
-    EXPECT_LT(random2.Generate(kRange), kRange) << " for iteration " << i;
-  }
-}
-
-TEST(RandomTest, RepeatsWhenReseeded) {
-  constexpr int kSeed = 123;
-  constexpr int kArraySize = 10;
-  constexpr uint32_t kRange = 10000;
-  uint32_t values[kArraySize];
-
-  testing::internal::Random random(kSeed);
-  for (int i = 0; i < kArraySize; i++) {
-    values[i] = random.Generate(kRange);
-  }
-
-  random.Reseed(kSeed);
-  for (int i = 0; i < kArraySize; i++) {
-    EXPECT_EQ(values[i], random.Generate(kRange)) << " for iteration " << i;
-  }
-}
-
-// Tests STL container utilities.
-
-// Tests CountIf().
-
-static bool IsPositive(int n) { return n > 0; }
-
-TEST(ContainerUtilityTest, CountIf) {
-  std::vector<int> v;
-  EXPECT_EQ(0, CountIf(v, IsPositive));  // Works for an empty container.
-
-  v.push_back(-1);
-  v.push_back(0);
-  EXPECT_EQ(0, CountIf(v, IsPositive));  // Works when no value satisfies.
-
-  v.push_back(2);
-  v.push_back(-10);
-  v.push_back(10);
-  EXPECT_EQ(2, CountIf(v, IsPositive));
-}
-
-// Tests ForEach().
-
-static int g_sum = 0;
-static void Accumulate(int n) { g_sum += n; }
-
-TEST(ContainerUtilityTest, ForEach) {
-  std::vector<int> v;
-  g_sum = 0;
-  ForEach(v, Accumulate);
-  EXPECT_EQ(0, g_sum);  // Works for an empty container;
-
-  g_sum = 0;
-  v.push_back(1);
-  ForEach(v, Accumulate);
-  EXPECT_EQ(1, g_sum);  // Works for a container with one element.
-
-  g_sum = 0;
-  v.push_back(20);
-  v.push_back(300);
-  ForEach(v, Accumulate);
-  EXPECT_EQ(321, g_sum);
-}
-
-// Tests GetElementOr().
-TEST(ContainerUtilityTest, GetElementOr) {
-  std::vector<char> a;
-  EXPECT_EQ('x', GetElementOr(a, 0, 'x'));
-
-  a.push_back('a');
-  a.push_back('b');
-  EXPECT_EQ('a', GetElementOr(a, 0, 'x'));
-  EXPECT_EQ('b', GetElementOr(a, 1, 'x'));
-  EXPECT_EQ('x', GetElementOr(a, -2, 'x'));
-  EXPECT_EQ('x', GetElementOr(a, 2, 'x'));
-}
-
-TEST(ContainerUtilityDeathTest, ShuffleRange) {
-  std::vector<int> a;
-  a.push_back(0);
-  a.push_back(1);
-  a.push_back(2);
-  testing::internal::Random random(1);
-
-  EXPECT_DEATH_IF_SUPPORTED(
-      ShuffleRange(&random, -1, 1, &a),
-      "Invalid shuffle range start -1: must be in range \\[0, 3\\]");
-  EXPECT_DEATH_IF_SUPPORTED(
-      ShuffleRange(&random, 4, 4, &a),
-      "Invalid shuffle range start 4: must be in range \\[0, 3\\]");
-  EXPECT_DEATH_IF_SUPPORTED(
-      ShuffleRange(&random, 3, 2, &a),
-      "Invalid shuffle range finish 2: must be in range \\[3, 3\\]");
-  EXPECT_DEATH_IF_SUPPORTED(
-      ShuffleRange(&random, 3, 4, &a),
-      "Invalid shuffle range finish 4: must be in range \\[3, 3\\]");
-}
-
-class VectorShuffleTest : public Test {
- protected:
-  static const size_t kVectorSize = 20;
-
-  VectorShuffleTest() : random_(1) {
-    for (int i = 0; i < static_cast<int>(kVectorSize); i++) {
-      vector_.push_back(i);
-    }
-  }
-
-  static bool VectorIsCorrupt(const TestingVector& vector) {
-    if (kVectorSize != vector.size()) {
-      return true;
-    }
-
-    bool found_in_vector[kVectorSize] = {false};
-    for (size_t i = 0; i < vector.size(); i++) {
-      const int e = vector[i];
-      if (e < 0 || e >= static_cast<int>(kVectorSize) || found_in_vector[e]) {
-        return true;
-      }
-      found_in_vector[e] = true;
-    }
-
-    // Vector size is correct, elements' range is correct, no
-    // duplicate elements.  Therefore no corruption has occurred.
-    return false;
-  }
-
-  static bool VectorIsNotCorrupt(const TestingVector& vector) {
-    return !VectorIsCorrupt(vector);
-  }
-
-  static bool RangeIsShuffled(const TestingVector& vector, int begin, int end) {
-    for (int i = begin; i < end; i++) {
-      if (i != vector[static_cast<size_t>(i)]) {
-        return true;
-      }
-    }
-    return false;
-  }
-
-  static bool RangeIsUnshuffled(const TestingVector& vector, int begin,
-                                int end) {
-    return !RangeIsShuffled(vector, begin, end);
-  }
-
-  static bool VectorIsShuffled(const TestingVector& vector) {
-    return RangeIsShuffled(vector, 0, static_cast<int>(vector.size()));
-  }
-
-  static bool VectorIsUnshuffled(const TestingVector& vector) {
-    return !VectorIsShuffled(vector);
-  }
-
-  testing::internal::Random random_;
-  TestingVector vector_;
-};  // class VectorShuffleTest
-
-const size_t VectorShuffleTest::kVectorSize;
-
-TEST_F(VectorShuffleTest, HandlesEmptyRange) {
-  // Tests an empty range at the beginning...
-  ShuffleRange(&random_, 0, 0, &vector_);
-  ASSERT_PRED1(VectorIsNotCorrupt, vector_);
-  ASSERT_PRED1(VectorIsUnshuffled, vector_);
-
-  // ...in the middle...
-  ShuffleRange(&random_, kVectorSize / 2, kVectorSize / 2, &vector_);
-  ASSERT_PRED1(VectorIsNotCorrupt, vector_);
-  ASSERT_PRED1(VectorIsUnshuffled, vector_);
-
-  // ...at the end...
-  ShuffleRange(&random_, kVectorSize - 1, kVectorSize - 1, &vector_);
-  ASSERT_PRED1(VectorIsNotCorrupt, vector_);
-  ASSERT_PRED1(VectorIsUnshuffled, vector_);
-
-  // ...and past the end.
-  ShuffleRange(&random_, kVectorSize, kVectorSize, &vector_);
-  ASSERT_PRED1(VectorIsNotCorrupt, vector_);
-  ASSERT_PRED1(VectorIsUnshuffled, vector_);
-}
-
-TEST_F(VectorShuffleTest, HandlesRangeOfSizeOne) {
-  // Tests a size one range at the beginning...
-  ShuffleRange(&random_, 0, 1, &vector_);
-  ASSERT_PRED1(VectorIsNotCorrupt, vector_);
-  ASSERT_PRED1(VectorIsUnshuffled, vector_);
-
-  // ...in the middle...
-  ShuffleRange(&random_, kVectorSize / 2, kVectorSize / 2 + 1, &vector_);
-  ASSERT_PRED1(VectorIsNotCorrupt, vector_);
-  ASSERT_PRED1(VectorIsUnshuffled, vector_);
-
-  // ...and at the end.
-  ShuffleRange(&random_, kVectorSize - 1, kVectorSize, &vector_);
-  ASSERT_PRED1(VectorIsNotCorrupt, vector_);
-  ASSERT_PRED1(VectorIsUnshuffled, vector_);
-}
-
-// Because we use our own random number generator and a fixed seed,
-// we can guarantee that the following "random" tests will succeed.
-
-TEST_F(VectorShuffleTest, ShufflesEntireVector) {
-  Shuffle(&random_, &vector_);
-  ASSERT_PRED1(VectorIsNotCorrupt, vector_);
-  EXPECT_FALSE(VectorIsUnshuffled(vector_)) << vector_;
-
-  // Tests the first and last elements in particular to ensure that
-  // there are no off-by-one problems in our shuffle algorithm.
-  EXPECT_NE(0, vector_[0]);
-  EXPECT_NE(static_cast<int>(kVectorSize - 1), vector_[kVectorSize - 1]);
-}
-
-TEST_F(VectorShuffleTest, ShufflesStartOfVector) {
-  const int kRangeSize = kVectorSize / 2;
-
-  ShuffleRange(&random_, 0, kRangeSize, &vector_);
-
-  ASSERT_PRED1(VectorIsNotCorrupt, vector_);
-  EXPECT_PRED3(RangeIsShuffled, vector_, 0, kRangeSize);
-  EXPECT_PRED3(RangeIsUnshuffled, vector_, kRangeSize,
-               static_cast<int>(kVectorSize));
-}
-
-TEST_F(VectorShuffleTest, ShufflesEndOfVector) {
-  const int kRangeSize = kVectorSize / 2;
-  ShuffleRange(&random_, kRangeSize, kVectorSize, &vector_);
-
-  ASSERT_PRED1(VectorIsNotCorrupt, vector_);
-  EXPECT_PRED3(RangeIsUnshuffled, vector_, 0, kRangeSize);
-  EXPECT_PRED3(RangeIsShuffled, vector_, kRangeSize,
-               static_cast<int>(kVectorSize));
-}
-
-TEST_F(VectorShuffleTest, ShufflesMiddleOfVector) {
-  const int kRangeSize = static_cast<int>(kVectorSize) / 3;
-  ShuffleRange(&random_, kRangeSize, 2 * kRangeSize, &vector_);
-
-  ASSERT_PRED1(VectorIsNotCorrupt, vector_);
-  EXPECT_PRED3(RangeIsUnshuffled, vector_, 0, kRangeSize);
-  EXPECT_PRED3(RangeIsShuffled, vector_, kRangeSize, 2 * kRangeSize);
-  EXPECT_PRED3(RangeIsUnshuffled, vector_, 2 * kRangeSize,
-               static_cast<int>(kVectorSize));
-}
-
-TEST_F(VectorShuffleTest, ShufflesRepeatably) {
-  TestingVector vector2;
-  for (size_t i = 0; i < kVectorSize; i++) {
-    vector2.push_back(static_cast<int>(i));
-  }
-
-  random_.Reseed(1234);
-  Shuffle(&random_, &vector_);
-  random_.Reseed(1234);
-  Shuffle(&random_, &vector2);
-
-  ASSERT_PRED1(VectorIsNotCorrupt, vector_);
-  ASSERT_PRED1(VectorIsNotCorrupt, vector2);
-
-  for (size_t i = 0; i < kVectorSize; i++) {
-    EXPECT_EQ(vector_[i], vector2[i]) << " where i is " << i;
-  }
-}
-
-// Tests the size of the AssertHelper class.
-
-TEST(AssertHelperTest, AssertHelperIsSmall) {
-  // To avoid breaking clients that use lots of assertions in one
-  // function, we cannot grow the size of AssertHelper.
-  EXPECT_LE(sizeof(testing::internal::AssertHelper), sizeof(void*));
-}
-
-// Tests String::EndsWithCaseInsensitive().
-TEST(StringTest, EndsWithCaseInsensitive) {
-  EXPECT_TRUE(String::EndsWithCaseInsensitive("foobar", "BAR"));
-  EXPECT_TRUE(String::EndsWithCaseInsensitive("foobaR", "bar"));
-  EXPECT_TRUE(String::EndsWithCaseInsensitive("foobar", ""));
-  EXPECT_TRUE(String::EndsWithCaseInsensitive("", ""));
-
-  EXPECT_FALSE(String::EndsWithCaseInsensitive("Foobar", "foo"));
-  EXPECT_FALSE(String::EndsWithCaseInsensitive("foobar", "Foo"));
-  EXPECT_FALSE(String::EndsWithCaseInsensitive("", "foo"));
-}
-
-// C++Builder's preprocessor is buggy; it fails to expand macros that
-// appear in macro parameters after wide char literals.  Provide an alias
-// for NULL as a workaround.
-static const wchar_t* const kNull = nullptr;
-
-// Tests String::CaseInsensitiveWideCStringEquals
-TEST(StringTest, CaseInsensitiveWideCStringEquals) {
-  EXPECT_TRUE(String::CaseInsensitiveWideCStringEquals(nullptr, nullptr));
-  EXPECT_FALSE(String::CaseInsensitiveWideCStringEquals(kNull, L""));
-  EXPECT_FALSE(String::CaseInsensitiveWideCStringEquals(L"", kNull));
-  EXPECT_FALSE(String::CaseInsensitiveWideCStringEquals(kNull, L"foobar"));
-  EXPECT_FALSE(String::CaseInsensitiveWideCStringEquals(L"foobar", kNull));
-  EXPECT_TRUE(String::CaseInsensitiveWideCStringEquals(L"foobar", L"foobar"));
-  EXPECT_TRUE(String::CaseInsensitiveWideCStringEquals(L"foobar", L"FOOBAR"));
-  EXPECT_TRUE(String::CaseInsensitiveWideCStringEquals(L"FOOBAR", L"foobar"));
-}
-
-#if GTEST_OS_WINDOWS
-
-// Tests String::ShowWideCString().
-TEST(StringTest, ShowWideCString) {
-  EXPECT_STREQ("(null)", String::ShowWideCString(NULL).c_str());
-  EXPECT_STREQ("", String::ShowWideCString(L"").c_str());
-  EXPECT_STREQ("foo", String::ShowWideCString(L"foo").c_str());
-}
-
-#if GTEST_OS_WINDOWS_MOBILE
-TEST(StringTest, AnsiAndUtf16Null) {
-  EXPECT_EQ(NULL, String::AnsiToUtf16(NULL));
-  EXPECT_EQ(NULL, String::Utf16ToAnsi(NULL));
-}
-
-TEST(StringTest, AnsiAndUtf16ConvertBasic) {
-  const char* ansi = String::Utf16ToAnsi(L"str");
-  EXPECT_STREQ("str", ansi);
-  delete[] ansi;
-  const WCHAR* utf16 = String::AnsiToUtf16("str");
-  EXPECT_EQ(0, wcsncmp(L"str", utf16, 3));
-  delete[] utf16;
-}
-
-TEST(StringTest, AnsiAndUtf16ConvertPathChars) {
-  const char* ansi = String::Utf16ToAnsi(L".:\\ \"*?");
-  EXPECT_STREQ(".:\\ \"*?", ansi);
-  delete[] ansi;
-  const WCHAR* utf16 = String::AnsiToUtf16(".:\\ \"*?");
-  EXPECT_EQ(0, wcsncmp(L".:\\ \"*?", utf16, 3));
-  delete[] utf16;
-}
-#endif  // GTEST_OS_WINDOWS_MOBILE
-
-#endif  // GTEST_OS_WINDOWS
-
-// Tests TestProperty construction.
-TEST(TestPropertyTest, StringValue) {
-  TestProperty property("key", "1");
-  EXPECT_STREQ("key", property.key());
-  EXPECT_STREQ("1", property.value());
-}
-
-// Tests TestProperty replacing a value.
-TEST(TestPropertyTest, ReplaceStringValue) {
-  TestProperty property("key", "1");
-  EXPECT_STREQ("1", property.value());
-  property.SetValue("2");
-  EXPECT_STREQ("2", property.value());
-}
-
-// AddFatalFailure() and AddNonfatalFailure() must be stand-alone
-// functions (i.e. their definitions cannot be inlined at the call
-// sites), or C++Builder won't compile the code.
-static void AddFatalFailure() { FAIL() << "Expected fatal failure."; }
-
-static void AddNonfatalFailure() {
-  ADD_FAILURE() << "Expected non-fatal failure.";
-}
-
-class ScopedFakeTestPartResultReporterTest : public Test {
- public:  // Must be public and not protected due to a bug in g++ 3.4.2.
-  enum FailureMode { FATAL_FAILURE, NONFATAL_FAILURE };
-  static void AddFailure(FailureMode failure) {
-    if (failure == FATAL_FAILURE) {
-      AddFatalFailure();
-    } else {
-      AddNonfatalFailure();
-    }
-  }
-};
-
-// Tests that ScopedFakeTestPartResultReporter intercepts test
-// failures.
-TEST_F(ScopedFakeTestPartResultReporterTest, InterceptsTestFailures) {
-  TestPartResultArray results;
-  {
-    ScopedFakeTestPartResultReporter reporter(
-        ScopedFakeTestPartResultReporter::INTERCEPT_ONLY_CURRENT_THREAD,
-        &results);
-    AddFailure(NONFATAL_FAILURE);
-    AddFailure(FATAL_FAILURE);
-  }
-
-  EXPECT_EQ(2, results.size());
-  EXPECT_TRUE(results.GetTestPartResult(0).nonfatally_failed());
-  EXPECT_TRUE(results.GetTestPartResult(1).fatally_failed());
-}
-
-TEST_F(ScopedFakeTestPartResultReporterTest, DeprecatedConstructor) {
-  TestPartResultArray results;
-  {
-    // Tests, that the deprecated constructor still works.
-    ScopedFakeTestPartResultReporter reporter(&results);
-    AddFailure(NONFATAL_FAILURE);
-  }
-  EXPECT_EQ(1, results.size());
-}
-
-#if GTEST_IS_THREADSAFE
-
-class ScopedFakeTestPartResultReporterWithThreadsTest
-    : public ScopedFakeTestPartResultReporterTest {
- protected:
-  static void AddFailureInOtherThread(FailureMode failure) {
-    ThreadWithParam<FailureMode> thread(&AddFailure, failure, nullptr);
-    thread.Join();
-  }
-};
-
-TEST_F(ScopedFakeTestPartResultReporterWithThreadsTest,
-       InterceptsTestFailuresInAllThreads) {
-  TestPartResultArray results;
-  {
-    ScopedFakeTestPartResultReporter reporter(
-        ScopedFakeTestPartResultReporter::INTERCEPT_ALL_THREADS, &results);
-    AddFailure(NONFATAL_FAILURE);
-    AddFailure(FATAL_FAILURE);
-    AddFailureInOtherThread(NONFATAL_FAILURE);
-    AddFailureInOtherThread(FATAL_FAILURE);
-  }
-
-  EXPECT_EQ(4, results.size());
-  EXPECT_TRUE(results.GetTestPartResult(0).nonfatally_failed());
-  EXPECT_TRUE(results.GetTestPartResult(1).fatally_failed());
-  EXPECT_TRUE(results.GetTestPartResult(2).nonfatally_failed());
-  EXPECT_TRUE(results.GetTestPartResult(3).fatally_failed());
-}
-
-#endif  // GTEST_IS_THREADSAFE
-
-// Tests EXPECT_FATAL_FAILURE{,ON_ALL_THREADS}.  Makes sure that they
-// work even if the failure is generated in a called function rather than
-// the current context.
-
-typedef ScopedFakeTestPartResultReporterTest ExpectFatalFailureTest;
-
-TEST_F(ExpectFatalFailureTest, CatchesFatalFaliure) {
-  EXPECT_FATAL_FAILURE(AddFatalFailure(), "Expected fatal failure.");
-}
-
-TEST_F(ExpectFatalFailureTest, AcceptsStdStringObject) {
-  EXPECT_FATAL_FAILURE(AddFatalFailure(),
-                       ::std::string("Expected fatal failure."));
-}
-
-TEST_F(ExpectFatalFailureTest, CatchesFatalFailureOnAllThreads) {
-  // We have another test below to verify that the macro catches fatal
-  // failures generated on another thread.
-  EXPECT_FATAL_FAILURE_ON_ALL_THREADS(AddFatalFailure(),
-                                      "Expected fatal failure.");
-}
-
-#ifdef __BORLANDC__
-// Silences warnings: "Condition is always true"
-#pragma option push -w-ccc
-#endif
-
-// Tests that EXPECT_FATAL_FAILURE() can be used in a non-void
-// function even when the statement in it contains ASSERT_*.
-
-int NonVoidFunction() {
-  EXPECT_FATAL_FAILURE(ASSERT_TRUE(false), "");
-  EXPECT_FATAL_FAILURE_ON_ALL_THREADS(FAIL(), "");
-  return 0;
-}
-
-TEST_F(ExpectFatalFailureTest, CanBeUsedInNonVoidFunction) {
-  NonVoidFunction();
-}
-
-// Tests that EXPECT_FATAL_FAILURE(statement, ...) doesn't abort the
-// current function even though 'statement' generates a fatal failure.
-
-void DoesNotAbortHelper(bool* aborted) {
-  EXPECT_FATAL_FAILURE(ASSERT_TRUE(false), "");
-  EXPECT_FATAL_FAILURE_ON_ALL_THREADS(FAIL(), "");
-
-  *aborted = false;
-}
-
-#ifdef __BORLANDC__
-// Restores warnings after previous "#pragma option push" suppressed them.
-#pragma option pop
-#endif
-
-TEST_F(ExpectFatalFailureTest, DoesNotAbort) {
-  bool aborted = true;
-  DoesNotAbortHelper(&aborted);
-  EXPECT_FALSE(aborted);
-}
-
-// Tests that the EXPECT_FATAL_FAILURE{,_ON_ALL_THREADS} accepts a
-// statement that contains a macro which expands to code containing an
-// unprotected comma.
-
-static int global_var = 0;
-#define GTEST_USE_UNPROTECTED_COMMA_ global_var++, global_var++
-
-TEST_F(ExpectFatalFailureTest, AcceptsMacroThatExpandsToUnprotectedComma) {
-#ifndef __BORLANDC__
-  // ICE's in C++Builder.
-  EXPECT_FATAL_FAILURE(
-      {
-        GTEST_USE_UNPROTECTED_COMMA_;
-        AddFatalFailure();
-      },
-      "");
-#endif
-
-  EXPECT_FATAL_FAILURE_ON_ALL_THREADS(
-      {
-        GTEST_USE_UNPROTECTED_COMMA_;
-        AddFatalFailure();
-      },
-      "");
-}
-
-// Tests EXPECT_NONFATAL_FAILURE{,ON_ALL_THREADS}.
-
-typedef ScopedFakeTestPartResultReporterTest ExpectNonfatalFailureTest;
-
-TEST_F(ExpectNonfatalFailureTest, CatchesNonfatalFailure) {
-  EXPECT_NONFATAL_FAILURE(AddNonfatalFailure(), "Expected non-fatal failure.");
-}
-
-TEST_F(ExpectNonfatalFailureTest, AcceptsStdStringObject) {
-  EXPECT_NONFATAL_FAILURE(AddNonfatalFailure(),
-                          ::std::string("Expected non-fatal failure."));
-}
-
-TEST_F(ExpectNonfatalFailureTest, CatchesNonfatalFailureOnAllThreads) {
-  // We have another test below to verify that the macro catches
-  // non-fatal failures generated on another thread.
-  EXPECT_NONFATAL_FAILURE_ON_ALL_THREADS(AddNonfatalFailure(),
-                                         "Expected non-fatal failure.");
-}
-
-// Tests that the EXPECT_NONFATAL_FAILURE{,_ON_ALL_THREADS} accepts a
-// statement that contains a macro which expands to code containing an
-// unprotected comma.
-TEST_F(ExpectNonfatalFailureTest, AcceptsMacroThatExpandsToUnprotectedComma) {
-  EXPECT_NONFATAL_FAILURE(
-      {
-        GTEST_USE_UNPROTECTED_COMMA_;
-        AddNonfatalFailure();
-      },
-      "");
-
-  EXPECT_NONFATAL_FAILURE_ON_ALL_THREADS(
-      {
-        GTEST_USE_UNPROTECTED_COMMA_;
-        AddNonfatalFailure();
-      },
-      "");
-}
-
-#if GTEST_IS_THREADSAFE
-
-typedef ScopedFakeTestPartResultReporterWithThreadsTest
-    ExpectFailureWithThreadsTest;
-
-TEST_F(ExpectFailureWithThreadsTest, ExpectFatalFailureOnAllThreads) {
-  EXPECT_FATAL_FAILURE_ON_ALL_THREADS(AddFailureInOtherThread(FATAL_FAILURE),
-                                      "Expected fatal failure.");
-}
-
-TEST_F(ExpectFailureWithThreadsTest, ExpectNonFatalFailureOnAllThreads) {
-  EXPECT_NONFATAL_FAILURE_ON_ALL_THREADS(
-      AddFailureInOtherThread(NONFATAL_FAILURE), "Expected non-fatal failure.");
-}
-
-#endif  // GTEST_IS_THREADSAFE
-
-// Tests the TestProperty class.
-
-TEST(TestPropertyTest, ConstructorWorks) {
-  const TestProperty property("key", "value");
-  EXPECT_STREQ("key", property.key());
-  EXPECT_STREQ("value", property.value());
-}
-
-TEST(TestPropertyTest, SetValue) {
-  TestProperty property("key", "value_1");
-  EXPECT_STREQ("key", property.key());
-  property.SetValue("value_2");
-  EXPECT_STREQ("key", property.key());
-  EXPECT_STREQ("value_2", property.value());
-}
-
-// Tests the TestResult class
-
-// The test fixture for testing TestResult.
-class TestResultTest : public Test {
- protected:
-  typedef std::vector<TestPartResult> TPRVector;
-
-  // We make use of 2 TestPartResult objects,
-  TestPartResult *pr1, *pr2;
-
-  // ... and 3 TestResult objects.
-  TestResult *r0, *r1, *r2;
-
-  void SetUp() override {
-    // pr1 is for success.
-    pr1 = new TestPartResult(TestPartResult::kSuccess, "foo/bar.cc", 10,
-                             "Success!");
-
-    // pr2 is for fatal failure.
-    pr2 = new TestPartResult(TestPartResult::kFatalFailure, "foo/bar.cc",
-                             -1,  // This line number means "unknown"
-                             "Failure!");
-
-    // Creates the TestResult objects.
-    r0 = new TestResult();
-    r1 = new TestResult();
-    r2 = new TestResult();
-
-    // In order to test TestResult, we need to modify its internal
-    // state, in particular the TestPartResult vector it holds.
-    // test_part_results() returns a const reference to this vector.
-    // We cast it to a non-const object s.t. it can be modified
-    TPRVector* results1 =
-        const_cast<TPRVector*>(&TestResultAccessor::test_part_results(*r1));
-    TPRVector* results2 =
-        const_cast<TPRVector*>(&TestResultAccessor::test_part_results(*r2));
-
-    // r0 is an empty TestResult.
-
-    // r1 contains a single SUCCESS TestPartResult.
-    results1->push_back(*pr1);
-
-    // r2 contains a SUCCESS, and a FAILURE.
-    results2->push_back(*pr1);
-    results2->push_back(*pr2);
-  }
-
-  void TearDown() override {
-    delete pr1;
-    delete pr2;
-
-    delete r0;
-    delete r1;
-    delete r2;
-  }
-
-  // Helper that compares two TestPartResults.
-  static void CompareTestPartResult(const TestPartResult& expected,
-                                    const TestPartResult& actual) {
-    EXPECT_EQ(expected.type(), actual.type());
-    EXPECT_STREQ(expected.file_name(), actual.file_name());
-    EXPECT_EQ(expected.line_number(), actual.line_number());
-    EXPECT_STREQ(expected.summary(), actual.summary());
-    EXPECT_STREQ(expected.message(), actual.message());
-    EXPECT_EQ(expected.passed(), actual.passed());
-    EXPECT_EQ(expected.failed(), actual.failed());
-    EXPECT_EQ(expected.nonfatally_failed(), actual.nonfatally_failed());
-    EXPECT_EQ(expected.fatally_failed(), actual.fatally_failed());
-  }
-};
-
-// Tests TestResult::total_part_count().
-TEST_F(TestResultTest, total_part_count) {
-  ASSERT_EQ(0, r0->total_part_count());
-  ASSERT_EQ(1, r1->total_part_count());
-  ASSERT_EQ(2, r2->total_part_count());
-}
-
-// Tests TestResult::Passed().
-TEST_F(TestResultTest, Passed) {
-  ASSERT_TRUE(r0->Passed());
-  ASSERT_TRUE(r1->Passed());
-  ASSERT_FALSE(r2->Passed());
-}
-
-// Tests TestResult::Failed().
-TEST_F(TestResultTest, Failed) {
-  ASSERT_FALSE(r0->Failed());
-  ASSERT_FALSE(r1->Failed());
-  ASSERT_TRUE(r2->Failed());
-}
-
-// Tests TestResult::GetTestPartResult().
-
-typedef TestResultTest TestResultDeathTest;
-
-TEST_F(TestResultDeathTest, GetTestPartResult) {
-  CompareTestPartResult(*pr1, r2->GetTestPartResult(0));
-  CompareTestPartResult(*pr2, r2->GetTestPartResult(1));
-  EXPECT_DEATH_IF_SUPPORTED(r2->GetTestPartResult(2), "");
-  EXPECT_DEATH_IF_SUPPORTED(r2->GetTestPartResult(-1), "");
-}
-
-// Tests TestResult has no properties when none are added.
-TEST(TestResultPropertyTest, NoPropertiesFoundWhenNoneAreAdded) {
-  TestResult test_result;
-  ASSERT_EQ(0, test_result.test_property_count());
-}
-
-// Tests TestResult has the expected property when added.
-TEST(TestResultPropertyTest, OnePropertyFoundWhenAdded) {
-  TestResult test_result;
-  TestProperty property("key_1", "1");
-  TestResultAccessor::RecordProperty(&test_result, "testcase", property);
-  ASSERT_EQ(1, test_result.test_property_count());
-  const TestProperty& actual_property = test_result.GetTestProperty(0);
-  EXPECT_STREQ("key_1", actual_property.key());
-  EXPECT_STREQ("1", actual_property.value());
-}
-
-// Tests TestResult has multiple properties when added.
-TEST(TestResultPropertyTest, MultiplePropertiesFoundWhenAdded) {
-  TestResult test_result;
-  TestProperty property_1("key_1", "1");
-  TestProperty property_2("key_2", "2");
-  TestResultAccessor::RecordProperty(&test_result, "testcase", property_1);
-  TestResultAccessor::RecordProperty(&test_result, "testcase", property_2);
-  ASSERT_EQ(2, test_result.test_property_count());
-  const TestProperty& actual_property_1 = test_result.GetTestProperty(0);
-  EXPECT_STREQ("key_1", actual_property_1.key());
-  EXPECT_STREQ("1", actual_property_1.value());
-
-  const TestProperty& actual_property_2 = test_result.GetTestProperty(1);
-  EXPECT_STREQ("key_2", actual_property_2.key());
-  EXPECT_STREQ("2", actual_property_2.value());
-}
-
-// Tests TestResult::RecordProperty() overrides values for duplicate keys.
-TEST(TestResultPropertyTest, OverridesValuesForDuplicateKeys) {
-  TestResult test_result;
-  TestProperty property_1_1("key_1", "1");
-  TestProperty property_2_1("key_2", "2");
-  TestProperty property_1_2("key_1", "12");
-  TestProperty property_2_2("key_2", "22");
-  TestResultAccessor::RecordProperty(&test_result, "testcase", property_1_1);
-  TestResultAccessor::RecordProperty(&test_result, "testcase", property_2_1);
-  TestResultAccessor::RecordProperty(&test_result, "testcase", property_1_2);
-  TestResultAccessor::RecordProperty(&test_result, "testcase", property_2_2);
-
-  ASSERT_EQ(2, test_result.test_property_count());
-  const TestProperty& actual_property_1 = test_result.GetTestProperty(0);
-  EXPECT_STREQ("key_1", actual_property_1.key());
-  EXPECT_STREQ("12", actual_property_1.value());
-
-  const TestProperty& actual_property_2 = test_result.GetTestProperty(1);
-  EXPECT_STREQ("key_2", actual_property_2.key());
-  EXPECT_STREQ("22", actual_property_2.value());
-}
-
-// Tests TestResult::GetTestProperty().
-TEST(TestResultPropertyTest, GetTestProperty) {
-  TestResult test_result;
-  TestProperty property_1("key_1", "1");
-  TestProperty property_2("key_2", "2");
-  TestProperty property_3("key_3", "3");
-  TestResultAccessor::RecordProperty(&test_result, "testcase", property_1);
-  TestResultAccessor::RecordProperty(&test_result, "testcase", property_2);
-  TestResultAccessor::RecordProperty(&test_result, "testcase", property_3);
-
-  const TestProperty& fetched_property_1 = test_result.GetTestProperty(0);
-  const TestProperty& fetched_property_2 = test_result.GetTestProperty(1);
-  const TestProperty& fetched_property_3 = test_result.GetTestProperty(2);
-
-  EXPECT_STREQ("key_1", fetched_property_1.key());
-  EXPECT_STREQ("1", fetched_property_1.value());
-
-  EXPECT_STREQ("key_2", fetched_property_2.key());
-  EXPECT_STREQ("2", fetched_property_2.value());
-
-  EXPECT_STREQ("key_3", fetched_property_3.key());
-  EXPECT_STREQ("3", fetched_property_3.value());
-
-  EXPECT_DEATH_IF_SUPPORTED(test_result.GetTestProperty(3), "");
-  EXPECT_DEATH_IF_SUPPORTED(test_result.GetTestProperty(-1), "");
-}
-
-// Tests the Test class.
-//
-// It's difficult to test every public method of this class (we are
-// already stretching the limit of Google Test by using it to test itself!).
-// Fortunately, we don't have to do that, as we are already testing
-// the functionalities of the Test class extensively by using Google Test
-// alone.
-//
-// Therefore, this section only contains one test.
-
-// Tests that GTestFlagSaver works on Windows and Mac.
-
-class GTestFlagSaverTest : public Test {
- protected:
-  // Saves the Google Test flags such that we can restore them later, and
-  // then sets them to their default values.  This will be called
-  // before the first test in this test case is run.
-  static void SetUpTestSuite() {
-    saver_ = new GTestFlagSaver;
-
-    GTEST_FLAG_SET(also_run_disabled_tests, false);
-    GTEST_FLAG_SET(break_on_failure, false);
-    GTEST_FLAG_SET(catch_exceptions, false);
-    GTEST_FLAG_SET(death_test_use_fork, false);
-    GTEST_FLAG_SET(color, "auto");
-    GTEST_FLAG_SET(fail_fast, false);
-    GTEST_FLAG_SET(filter, "");
-    GTEST_FLAG_SET(list_tests, false);
-    GTEST_FLAG_SET(output, "");
-    GTEST_FLAG_SET(brief, false);
-    GTEST_FLAG_SET(print_time, true);
-    GTEST_FLAG_SET(random_seed, 0);
-    GTEST_FLAG_SET(repeat, 1);
-    GTEST_FLAG_SET(recreate_environments_when_repeating, true);
-    GTEST_FLAG_SET(shuffle, false);
-    GTEST_FLAG_SET(stack_trace_depth, kMaxStackTraceDepth);
-    GTEST_FLAG_SET(stream_result_to, "");
-    GTEST_FLAG_SET(throw_on_failure, false);
-  }
-
-  // Restores the Google Test flags that the tests have modified.  This will
-  // be called after the last test in this test case is run.
-  static void TearDownTestSuite() {
-    delete saver_;
-    saver_ = nullptr;
-  }
-
-  // Verifies that the Google Test flags have their default values, and then
-  // modifies each of them.
-  void VerifyAndModifyFlags() {
-    EXPECT_FALSE(GTEST_FLAG_GET(also_run_disabled_tests));
-    EXPECT_FALSE(GTEST_FLAG_GET(break_on_failure));
-    EXPECT_FALSE(GTEST_FLAG_GET(catch_exceptions));
-    EXPECT_STREQ("auto", GTEST_FLAG_GET(color).c_str());
-    EXPECT_FALSE(GTEST_FLAG_GET(death_test_use_fork));
-    EXPECT_FALSE(GTEST_FLAG_GET(fail_fast));
-    EXPECT_STREQ("", GTEST_FLAG_GET(filter).c_str());
-    EXPECT_FALSE(GTEST_FLAG_GET(list_tests));
-    EXPECT_STREQ("", GTEST_FLAG_GET(output).c_str());
-    EXPECT_FALSE(GTEST_FLAG_GET(brief));
-    EXPECT_TRUE(GTEST_FLAG_GET(print_time));
-    EXPECT_EQ(0, GTEST_FLAG_GET(random_seed));
-    EXPECT_EQ(1, GTEST_FLAG_GET(repeat));
-    EXPECT_TRUE(GTEST_FLAG_GET(recreate_environments_when_repeating));
-    EXPECT_FALSE(GTEST_FLAG_GET(shuffle));
-    EXPECT_EQ(kMaxStackTraceDepth, GTEST_FLAG_GET(stack_trace_depth));
-    EXPECT_STREQ("", GTEST_FLAG_GET(stream_result_to).c_str());
-    EXPECT_FALSE(GTEST_FLAG_GET(throw_on_failure));
-
-    GTEST_FLAG_SET(also_run_disabled_tests, true);
-    GTEST_FLAG_SET(break_on_failure, true);
-    GTEST_FLAG_SET(catch_exceptions, true);
-    GTEST_FLAG_SET(color, "no");
-    GTEST_FLAG_SET(death_test_use_fork, true);
-    GTEST_FLAG_SET(fail_fast, true);
-    GTEST_FLAG_SET(filter, "abc");
-    GTEST_FLAG_SET(list_tests, true);
-    GTEST_FLAG_SET(output, "xml:foo.xml");
-    GTEST_FLAG_SET(brief, true);
-    GTEST_FLAG_SET(print_time, false);
-    GTEST_FLAG_SET(random_seed, 1);
-    GTEST_FLAG_SET(repeat, 100);
-    GTEST_FLAG_SET(recreate_environments_when_repeating, false);
-    GTEST_FLAG_SET(shuffle, true);
-    GTEST_FLAG_SET(stack_trace_depth, 1);
-    GTEST_FLAG_SET(stream_result_to, "localhost:1234");
-    GTEST_FLAG_SET(throw_on_failure, true);
-  }
-
- private:
-  // For saving Google Test flags during this test case.
-  static GTestFlagSaver* saver_;
-};
-
-GTestFlagSaver* GTestFlagSaverTest::saver_ = nullptr;
-
-// Google Test doesn't guarantee the order of tests.  The following two
-// tests are designed to work regardless of their order.
-
-// Modifies the Google Test flags in the test body.
-TEST_F(GTestFlagSaverTest, ModifyGTestFlags) { VerifyAndModifyFlags(); }
-
-// Verifies that the Google Test flags in the body of the previous test were
-// restored to their original values.
-TEST_F(GTestFlagSaverTest, VerifyGTestFlags) { VerifyAndModifyFlags(); }
-
-// Sets an environment variable with the given name to the given
-// value.  If the value argument is "", unsets the environment
-// variable.  The caller must ensure that both arguments are not NULL.
-static void SetEnv(const char* name, const char* value) {
-#if GTEST_OS_WINDOWS_MOBILE
-  // Environment variables are not supported on Windows CE.
-  return;
-#elif defined(__BORLANDC__) || defined(__SunOS_5_8) || defined(__SunOS_5_9)
-  // C++Builder's putenv only stores a pointer to its parameter; we have to
-  // ensure that the string remains valid as long as it might be needed.
-  // We use an std::map to do so.
-  static std::map<std::string, std::string*> added_env;
-
-  // Because putenv stores a pointer to the string buffer, we can't delete the
-  // previous string (if present) until after it's replaced.
-  std::string* prev_env = NULL;
-  if (added_env.find(name) != added_env.end()) {
-    prev_env = added_env[name];
-  }
-  added_env[name] =
-      new std::string((Message() << name << "=" << value).GetString());
-
-  // The standard signature of putenv accepts a 'char*' argument. Other
-  // implementations, like C++Builder's, accept a 'const char*'.
-  // We cast away the 'const' since that would work for both variants.
-  putenv(const_cast<char*>(added_env[name]->c_str()));
-  delete prev_env;
-#elif GTEST_OS_WINDOWS  // If we are on Windows proper.
-  _putenv((Message() << name << "=" << value).GetString().c_str());
-#else
-  if (*value == '\0') {
-    unsetenv(name);
-  } else {
-    setenv(name, value, 1);
-  }
-#endif  // GTEST_OS_WINDOWS_MOBILE
-}
-
-#if !GTEST_OS_WINDOWS_MOBILE
-// Environment variables are not supported on Windows CE.
-
-using testing::internal::Int32FromGTestEnv;
-
-// Tests Int32FromGTestEnv().
-
-// Tests that Int32FromGTestEnv() returns the default value when the
-// environment variable is not set.
-TEST(Int32FromGTestEnvTest, ReturnsDefaultWhenVariableIsNotSet) {
-  SetEnv(GTEST_FLAG_PREFIX_UPPER_ "TEMP", "");
-  EXPECT_EQ(10, Int32FromGTestEnv("temp", 10));
-}
-
-#if !defined(GTEST_GET_INT32_FROM_ENV_)
-
-// Tests that Int32FromGTestEnv() returns the default value when the
-// environment variable overflows as an Int32.
-TEST(Int32FromGTestEnvTest, ReturnsDefaultWhenValueOverflows) {
-  printf("(expecting 2 warnings)\n");
-
-  SetEnv(GTEST_FLAG_PREFIX_UPPER_ "TEMP", "12345678987654321");
-  EXPECT_EQ(20, Int32FromGTestEnv("temp", 20));
-
-  SetEnv(GTEST_FLAG_PREFIX_UPPER_ "TEMP", "-12345678987654321");
-  EXPECT_EQ(30, Int32FromGTestEnv("temp", 30));
-}
-
-// Tests that Int32FromGTestEnv() returns the default value when the
-// environment variable does not represent a valid decimal integer.
-TEST(Int32FromGTestEnvTest, ReturnsDefaultWhenValueIsInvalid) {
-  printf("(expecting 2 warnings)\n");
-
-  SetEnv(GTEST_FLAG_PREFIX_UPPER_ "TEMP", "A1");
-  EXPECT_EQ(40, Int32FromGTestEnv("temp", 40));
-
-  SetEnv(GTEST_FLAG_PREFIX_UPPER_ "TEMP", "12X");
-  EXPECT_EQ(50, Int32FromGTestEnv("temp", 50));
-}
-
-#endif  // !defined(GTEST_GET_INT32_FROM_ENV_)
-
-// Tests that Int32FromGTestEnv() parses and returns the value of the
-// environment variable when it represents a valid decimal integer in
-// the range of an Int32.
-TEST(Int32FromGTestEnvTest, ParsesAndReturnsValidValue) {
-  SetEnv(GTEST_FLAG_PREFIX_UPPER_ "TEMP", "123");
-  EXPECT_EQ(123, Int32FromGTestEnv("temp", 0));
-
-  SetEnv(GTEST_FLAG_PREFIX_UPPER_ "TEMP", "-321");
-  EXPECT_EQ(-321, Int32FromGTestEnv("temp", 0));
-}
-#endif  // !GTEST_OS_WINDOWS_MOBILE
-
-// Tests ParseFlag().
-
-// Tests that ParseInt32Flag() returns false and doesn't change the
-// output value when the flag has wrong format
-TEST(ParseInt32FlagTest, ReturnsFalseForInvalidFlag) {
-  int32_t value = 123;
-  EXPECT_FALSE(ParseFlag("--a=100", "b", &value));
-  EXPECT_EQ(123, value);
-
-  EXPECT_FALSE(ParseFlag("a=100", "a", &value));
-  EXPECT_EQ(123, value);
-}
-
-// Tests that ParseFlag() returns false and doesn't change the
-// output value when the flag overflows as an Int32.
-TEST(ParseInt32FlagTest, ReturnsDefaultWhenValueOverflows) {
-  printf("(expecting 2 warnings)\n");
-
-  int32_t value = 123;
-  EXPECT_FALSE(ParseFlag("--abc=12345678987654321", "abc", &value));
-  EXPECT_EQ(123, value);
-
-  EXPECT_FALSE(ParseFlag("--abc=-12345678987654321", "abc", &value));
-  EXPECT_EQ(123, value);
-}
-
-// Tests that ParseInt32Flag() returns false and doesn't change the
-// output value when the flag does not represent a valid decimal
-// integer.
-TEST(ParseInt32FlagTest, ReturnsDefaultWhenValueIsInvalid) {
-  printf("(expecting 2 warnings)\n");
-
-  int32_t value = 123;
-  EXPECT_FALSE(ParseFlag("--abc=A1", "abc", &value));
-  EXPECT_EQ(123, value);
-
-  EXPECT_FALSE(ParseFlag("--abc=12X", "abc", &value));
-  EXPECT_EQ(123, value);
-}
-
-// Tests that ParseInt32Flag() parses the value of the flag and
-// returns true when the flag represents a valid decimal integer in
-// the range of an Int32.
-TEST(ParseInt32FlagTest, ParsesAndReturnsValidValue) {
-  int32_t value = 123;
-  EXPECT_TRUE(ParseFlag("--" GTEST_FLAG_PREFIX_ "abc=456", "abc", &value));
-  EXPECT_EQ(456, value);
-
-  EXPECT_TRUE(ParseFlag("--" GTEST_FLAG_PREFIX_ "abc=-789", "abc", &value));
-  EXPECT_EQ(-789, value);
-}
-
-// Tests that Int32FromEnvOrDie() parses the value of the var or
-// returns the correct default.
-// Environment variables are not supported on Windows CE.
-#if !GTEST_OS_WINDOWS_MOBILE
-TEST(Int32FromEnvOrDieTest, ParsesAndReturnsValidValue) {
-  EXPECT_EQ(333, Int32FromEnvOrDie(GTEST_FLAG_PREFIX_UPPER_ "UnsetVar", 333));
-  SetEnv(GTEST_FLAG_PREFIX_UPPER_ "UnsetVar", "123");
-  EXPECT_EQ(123, Int32FromEnvOrDie(GTEST_FLAG_PREFIX_UPPER_ "UnsetVar", 333));
-  SetEnv(GTEST_FLAG_PREFIX_UPPER_ "UnsetVar", "-123");
-  EXPECT_EQ(-123, Int32FromEnvOrDie(GTEST_FLAG_PREFIX_UPPER_ "UnsetVar", 333));
-}
-#endif  // !GTEST_OS_WINDOWS_MOBILE
-
-// Tests that Int32FromEnvOrDie() aborts with an error message
-// if the variable is not an int32_t.
-TEST(Int32FromEnvOrDieDeathTest, AbortsOnFailure) {
-  SetEnv(GTEST_FLAG_PREFIX_UPPER_ "VAR", "xxx");
-  EXPECT_DEATH_IF_SUPPORTED(
-      Int32FromEnvOrDie(GTEST_FLAG_PREFIX_UPPER_ "VAR", 123), ".*");
-}
-
-// Tests that Int32FromEnvOrDie() aborts with an error message
-// if the variable cannot be represented by an int32_t.
-TEST(Int32FromEnvOrDieDeathTest, AbortsOnInt32Overflow) {
-  SetEnv(GTEST_FLAG_PREFIX_UPPER_ "VAR", "1234567891234567891234");
-  EXPECT_DEATH_IF_SUPPORTED(
-      Int32FromEnvOrDie(GTEST_FLAG_PREFIX_UPPER_ "VAR", 123), ".*");
-}
-
-// Tests that ShouldRunTestOnShard() selects all tests
-// where there is 1 shard.
-TEST(ShouldRunTestOnShardTest, IsPartitionWhenThereIsOneShard) {
-  EXPECT_TRUE(ShouldRunTestOnShard(1, 0, 0));
-  EXPECT_TRUE(ShouldRunTestOnShard(1, 0, 1));
-  EXPECT_TRUE(ShouldRunTestOnShard(1, 0, 2));
-  EXPECT_TRUE(ShouldRunTestOnShard(1, 0, 3));
-  EXPECT_TRUE(ShouldRunTestOnShard(1, 0, 4));
-}
-
-class ShouldShardTest : public testing::Test {
- protected:
-  void SetUp() override {
-    index_var_ = GTEST_FLAG_PREFIX_UPPER_ "INDEX";
-    total_var_ = GTEST_FLAG_PREFIX_UPPER_ "TOTAL";
-  }
-
-  void TearDown() override {
-    SetEnv(index_var_, "");
-    SetEnv(total_var_, "");
-  }
-
-  const char* index_var_;
-  const char* total_var_;
-};
-
-// Tests that sharding is disabled if neither of the environment variables
-// are set.
-TEST_F(ShouldShardTest, ReturnsFalseWhenNeitherEnvVarIsSet) {
-  SetEnv(index_var_, "");
-  SetEnv(total_var_, "");
-
-  EXPECT_FALSE(ShouldShard(total_var_, index_var_, false));
-  EXPECT_FALSE(ShouldShard(total_var_, index_var_, true));
-}
-
-// Tests that sharding is not enabled if total_shards  == 1.
-TEST_F(ShouldShardTest, ReturnsFalseWhenTotalShardIsOne) {
-  SetEnv(index_var_, "0");
-  SetEnv(total_var_, "1");
-  EXPECT_FALSE(ShouldShard(total_var_, index_var_, false));
-  EXPECT_FALSE(ShouldShard(total_var_, index_var_, true));
-}
-
-// Tests that sharding is enabled if total_shards > 1 and
-// we are not in a death test subprocess.
-// Environment variables are not supported on Windows CE.
-#if !GTEST_OS_WINDOWS_MOBILE
-TEST_F(ShouldShardTest, WorksWhenShardEnvVarsAreValid) {
-  SetEnv(index_var_, "4");
-  SetEnv(total_var_, "22");
-  EXPECT_TRUE(ShouldShard(total_var_, index_var_, false));
-  EXPECT_FALSE(ShouldShard(total_var_, index_var_, true));
-
-  SetEnv(index_var_, "8");
-  SetEnv(total_var_, "9");
-  EXPECT_TRUE(ShouldShard(total_var_, index_var_, false));
-  EXPECT_FALSE(ShouldShard(total_var_, index_var_, true));
-
-  SetEnv(index_var_, "0");
-  SetEnv(total_var_, "9");
-  EXPECT_TRUE(ShouldShard(total_var_, index_var_, false));
-  EXPECT_FALSE(ShouldShard(total_var_, index_var_, true));
-}
-#endif  // !GTEST_OS_WINDOWS_MOBILE
-
-// Tests that we exit in error if the sharding values are not valid.
-
-typedef ShouldShardTest ShouldShardDeathTest;
-
-TEST_F(ShouldShardDeathTest, AbortsWhenShardingEnvVarsAreInvalid) {
-  SetEnv(index_var_, "4");
-  SetEnv(total_var_, "4");
-  EXPECT_DEATH_IF_SUPPORTED(ShouldShard(total_var_, index_var_, false), ".*");
-
-  SetEnv(index_var_, "4");
-  SetEnv(total_var_, "-2");
-  EXPECT_DEATH_IF_SUPPORTED(ShouldShard(total_var_, index_var_, false), ".*");
-
-  SetEnv(index_var_, "5");
-  SetEnv(total_var_, "");
-  EXPECT_DEATH_IF_SUPPORTED(ShouldShard(total_var_, index_var_, false), ".*");
-
-  SetEnv(index_var_, "");
-  SetEnv(total_var_, "5");
-  EXPECT_DEATH_IF_SUPPORTED(ShouldShard(total_var_, index_var_, false), ".*");
-}
-
-// Tests that ShouldRunTestOnShard is a partition when 5
-// shards are used.
-TEST(ShouldRunTestOnShardTest, IsPartitionWhenThereAreFiveShards) {
-  // Choose an arbitrary number of tests and shards.
-  const int num_tests = 17;
-  const int num_shards = 5;
-
-  // Check partitioning: each test should be on exactly 1 shard.
-  for (int test_id = 0; test_id < num_tests; test_id++) {
-    int prev_selected_shard_index = -1;
-    for (int shard_index = 0; shard_index < num_shards; shard_index++) {
-      if (ShouldRunTestOnShard(num_shards, shard_index, test_id)) {
-        if (prev_selected_shard_index < 0) {
-          prev_selected_shard_index = shard_index;
-        } else {
-          ADD_FAILURE() << "Shard " << prev_selected_shard_index << " and "
-                        << shard_index << " are both selected to run test "
-                        << test_id;
-        }
-      }
-    }
-  }
-
-  // Check balance: This is not required by the sharding protocol, but is a
-  // desirable property for performance.
-  for (int shard_index = 0; shard_index < num_shards; shard_index++) {
-    int num_tests_on_shard = 0;
-    for (int test_id = 0; test_id < num_tests; test_id++) {
-      num_tests_on_shard +=
-          ShouldRunTestOnShard(num_shards, shard_index, test_id);
-    }
-    EXPECT_GE(num_tests_on_shard, num_tests / num_shards);
-  }
-}
-
-// For the same reason we are not explicitly testing everything in the
-// Test class, there are no separate tests for the following classes
-// (except for some trivial cases):
-//
-//   TestSuite, UnitTest, UnitTestResultPrinter.
-//
-// Similarly, there are no separate tests for the following macros:
-//
-//   TEST, TEST_F, RUN_ALL_TESTS
-
-TEST(UnitTestTest, CanGetOriginalWorkingDir) {
-  ASSERT_TRUE(UnitTest::GetInstance()->original_working_dir() != nullptr);
-  EXPECT_STRNE(UnitTest::GetInstance()->original_working_dir(), "");
-}
-
-TEST(UnitTestTest, ReturnsPlausibleTimestamp) {
-  EXPECT_LT(0, UnitTest::GetInstance()->start_timestamp());
-  EXPECT_LE(UnitTest::GetInstance()->start_timestamp(), GetTimeInMillis());
-}
-
-// When a property using a reserved key is supplied to this function, it
-// tests that a non-fatal failure is added, a fatal failure is not added,
-// and that the property is not recorded.
-void ExpectNonFatalFailureRecordingPropertyWithReservedKey(
-    const TestResult& test_result, const char* key) {
-  EXPECT_NONFATAL_FAILURE(Test::RecordProperty(key, "1"), "Reserved key");
-  ASSERT_EQ(0, test_result.test_property_count())
-      << "Property for key '" << key << "' recorded unexpectedly.";
-}
-
-void ExpectNonFatalFailureRecordingPropertyWithReservedKeyForCurrentTest(
-    const char* key) {
-  const TestInfo* test_info = UnitTest::GetInstance()->current_test_info();
-  ASSERT_TRUE(test_info != nullptr);
-  ExpectNonFatalFailureRecordingPropertyWithReservedKey(*test_info->result(),
-                                                        key);
-}
-
-void ExpectNonFatalFailureRecordingPropertyWithReservedKeyForCurrentTestSuite(
-    const char* key) {
-  const testing::TestSuite* test_suite =
-      UnitTest::GetInstance()->current_test_suite();
-  ASSERT_TRUE(test_suite != nullptr);
-  ExpectNonFatalFailureRecordingPropertyWithReservedKey(
-      test_suite->ad_hoc_test_result(), key);
-}
-
-void ExpectNonFatalFailureRecordingPropertyWithReservedKeyOutsideOfTestSuite(
-    const char* key) {
-  ExpectNonFatalFailureRecordingPropertyWithReservedKey(
-      UnitTest::GetInstance()->ad_hoc_test_result(), key);
-}
-
-// Tests that property recording functions in UnitTest outside of tests
-// functions correctly.  Creating a separate instance of UnitTest ensures it
-// is in a state similar to the UnitTest's singleton's between tests.
-class UnitTestRecordPropertyTest
-    : public testing::internal::UnitTestRecordPropertyTestHelper {
- public:
-  static void SetUpTestSuite() {
-    ExpectNonFatalFailureRecordingPropertyWithReservedKeyForCurrentTestSuite(
-        "disabled");
-    ExpectNonFatalFailureRecordingPropertyWithReservedKeyForCurrentTestSuite(
-        "errors");
-    ExpectNonFatalFailureRecordingPropertyWithReservedKeyForCurrentTestSuite(
-        "failures");
-    ExpectNonFatalFailureRecordingPropertyWithReservedKeyForCurrentTestSuite(
-        "name");
-    ExpectNonFatalFailureRecordingPropertyWithReservedKeyForCurrentTestSuite(
-        "tests");
-    ExpectNonFatalFailureRecordingPropertyWithReservedKeyForCurrentTestSuite(
-        "time");
-
-    Test::RecordProperty("test_case_key_1", "1");
-
-    const testing::TestSuite* test_suite =
-        UnitTest::GetInstance()->current_test_suite();
-
-    ASSERT_TRUE(test_suite != nullptr);
-
-    ASSERT_EQ(1, test_suite->ad_hoc_test_result().test_property_count());
-    EXPECT_STREQ("test_case_key_1",
-                 test_suite->ad_hoc_test_result().GetTestProperty(0).key());
-    EXPECT_STREQ("1",
-                 test_suite->ad_hoc_test_result().GetTestProperty(0).value());
-  }
-};
-
-// Tests TestResult has the expected property when added.
-TEST_F(UnitTestRecordPropertyTest, OnePropertyFoundWhenAdded) {
-  UnitTestRecordProperty("key_1", "1");
-
-  ASSERT_EQ(1, unit_test_.ad_hoc_test_result().test_property_count());
-
-  EXPECT_STREQ("key_1",
-               unit_test_.ad_hoc_test_result().GetTestProperty(0).key());
-  EXPECT_STREQ("1", unit_test_.ad_hoc_test_result().GetTestProperty(0).value());
-}
-
-// Tests TestResult has multiple properties when added.
-TEST_F(UnitTestRecordPropertyTest, MultiplePropertiesFoundWhenAdded) {
-  UnitTestRecordProperty("key_1", "1");
-  UnitTestRecordProperty("key_2", "2");
-
-  ASSERT_EQ(2, unit_test_.ad_hoc_test_result().test_property_count());
-
-  EXPECT_STREQ("key_1",
-               unit_test_.ad_hoc_test_result().GetTestProperty(0).key());
-  EXPECT_STREQ("1", unit_test_.ad_hoc_test_result().GetTestProperty(0).value());
-
-  EXPECT_STREQ("key_2",
-               unit_test_.ad_hoc_test_result().GetTestProperty(1).key());
-  EXPECT_STREQ("2", unit_test_.ad_hoc_test_result().GetTestProperty(1).value());
-}
-
-// Tests TestResult::RecordProperty() overrides values for duplicate keys.
-TEST_F(UnitTestRecordPropertyTest, OverridesValuesForDuplicateKeys) {
-  UnitTestRecordProperty("key_1", "1");
-  UnitTestRecordProperty("key_2", "2");
-  UnitTestRecordProperty("key_1", "12");
-  UnitTestRecordProperty("key_2", "22");
-
-  ASSERT_EQ(2, unit_test_.ad_hoc_test_result().test_property_count());
-
-  EXPECT_STREQ("key_1",
-               unit_test_.ad_hoc_test_result().GetTestProperty(0).key());
-  EXPECT_STREQ("12",
-               unit_test_.ad_hoc_test_result().GetTestProperty(0).value());
-
-  EXPECT_STREQ("key_2",
-               unit_test_.ad_hoc_test_result().GetTestProperty(1).key());
-  EXPECT_STREQ("22",
-               unit_test_.ad_hoc_test_result().GetTestProperty(1).value());
-}
-
-TEST_F(UnitTestRecordPropertyTest,
-       AddFailureInsideTestsWhenUsingTestSuiteReservedKeys) {
-  ExpectNonFatalFailureRecordingPropertyWithReservedKeyForCurrentTest("name");
-  ExpectNonFatalFailureRecordingPropertyWithReservedKeyForCurrentTest(
-      "value_param");
-  ExpectNonFatalFailureRecordingPropertyWithReservedKeyForCurrentTest(
-      "type_param");
-  ExpectNonFatalFailureRecordingPropertyWithReservedKeyForCurrentTest("status");
-  ExpectNonFatalFailureRecordingPropertyWithReservedKeyForCurrentTest("time");
-  ExpectNonFatalFailureRecordingPropertyWithReservedKeyForCurrentTest(
-      "classname");
-}
-
-TEST_F(UnitTestRecordPropertyTest,
-       AddRecordWithReservedKeysGeneratesCorrectPropertyList) {
-  EXPECT_NONFATAL_FAILURE(
-      Test::RecordProperty("name", "1"),
-      "'classname', 'name', 'status', 'time', 'type_param', 'value_param',"
-      " 'file', and 'line' are reserved");
-}
-
-class UnitTestRecordPropertyTestEnvironment : public Environment {
- public:
-  void TearDown() override {
-    ExpectNonFatalFailureRecordingPropertyWithReservedKeyOutsideOfTestSuite(
-        "tests");
-    ExpectNonFatalFailureRecordingPropertyWithReservedKeyOutsideOfTestSuite(
-        "failures");
-    ExpectNonFatalFailureRecordingPropertyWithReservedKeyOutsideOfTestSuite(
-        "disabled");
-    ExpectNonFatalFailureRecordingPropertyWithReservedKeyOutsideOfTestSuite(
-        "errors");
-    ExpectNonFatalFailureRecordingPropertyWithReservedKeyOutsideOfTestSuite(
-        "name");
-    ExpectNonFatalFailureRecordingPropertyWithReservedKeyOutsideOfTestSuite(
-        "timestamp");
-    ExpectNonFatalFailureRecordingPropertyWithReservedKeyOutsideOfTestSuite(
-        "time");
-    ExpectNonFatalFailureRecordingPropertyWithReservedKeyOutsideOfTestSuite(
-        "random_seed");
-  }
-};
-
-// This will test property recording outside of any test or test case.
-static Environment* record_property_env GTEST_ATTRIBUTE_UNUSED_ =
-    AddGlobalTestEnvironment(new UnitTestRecordPropertyTestEnvironment);
-
-// This group of tests is for predicate assertions (ASSERT_PRED*, etc)
-// of various arities.  They do not attempt to be exhaustive.  Rather,
-// view them as smoke tests that can be easily reviewed and verified.
-// A more complete set of tests for predicate assertions can be found
-// in gtest_pred_impl_unittest.cc.
-
-// First, some predicates and predicate-formatters needed by the tests.
-
-// Returns true if and only if the argument is an even number.
-bool IsEven(int n) { return (n % 2) == 0; }
-
-// A functor that returns true if and only if the argument is an even number.
-struct IsEvenFunctor {
-  bool operator()(int n) { return IsEven(n); }
-};
-
-// A predicate-formatter function that asserts the argument is an even
-// number.
-AssertionResult AssertIsEven(const char* expr, int n) {
-  if (IsEven(n)) {
-    return AssertionSuccess();
-  }
-
-  Message msg;
-  msg << expr << " evaluates to " << n << ", which is not even.";
-  return AssertionFailure(msg);
-}
-
-// A predicate function that returns AssertionResult for use in
-// EXPECT/ASSERT_TRUE/FALSE.
-AssertionResult ResultIsEven(int n) {
-  if (IsEven(n))
-    return AssertionSuccess() << n << " is even";
-  else
-    return AssertionFailure() << n << " is odd";
-}
-
-// A predicate function that returns AssertionResult but gives no
-// explanation why it succeeds. Needed for testing that
-// EXPECT/ASSERT_FALSE handles such functions correctly.
-AssertionResult ResultIsEvenNoExplanation(int n) {
-  if (IsEven(n))
-    return AssertionSuccess();
-  else
-    return AssertionFailure() << n << " is odd";
-}
-
-// A predicate-formatter functor that asserts the argument is an even
-// number.
-struct AssertIsEvenFunctor {
-  AssertionResult operator()(const char* expr, int n) {
-    return AssertIsEven(expr, n);
-  }
-};
-
-// Returns true if and only if the sum of the arguments is an even number.
-bool SumIsEven2(int n1, int n2) { return IsEven(n1 + n2); }
-
-// A functor that returns true if and only if the sum of the arguments is an
-// even number.
-struct SumIsEven3Functor {
-  bool operator()(int n1, int n2, int n3) { return IsEven(n1 + n2 + n3); }
-};
-
-// A predicate-formatter function that asserts the sum of the
-// arguments is an even number.
-AssertionResult AssertSumIsEven4(const char* e1, const char* e2, const char* e3,
-                                 const char* e4, int n1, int n2, int n3,
-                                 int n4) {
-  const int sum = n1 + n2 + n3 + n4;
-  if (IsEven(sum)) {
-    return AssertionSuccess();
-  }
-
-  Message msg;
-  msg << e1 << " + " << e2 << " + " << e3 << " + " << e4 << " (" << n1 << " + "
-      << n2 << " + " << n3 << " + " << n4 << ") evaluates to " << sum
-      << ", which is not even.";
-  return AssertionFailure(msg);
-}
-
-// A predicate-formatter functor that asserts the sum of the arguments
-// is an even number.
-struct AssertSumIsEven5Functor {
-  AssertionResult operator()(const char* e1, const char* e2, const char* e3,
-                             const char* e4, const char* e5, int n1, int n2,
-                             int n3, int n4, int n5) {
-    const int sum = n1 + n2 + n3 + n4 + n5;
-    if (IsEven(sum)) {
-      return AssertionSuccess();
-    }
-
-    Message msg;
-    msg << e1 << " + " << e2 << " + " << e3 << " + " << e4 << " + " << e5
-        << " (" << n1 << " + " << n2 << " + " << n3 << " + " << n4 << " + "
-        << n5 << ") evaluates to " << sum << ", which is not even.";
-    return AssertionFailure(msg);
-  }
-};
-
-// Tests unary predicate assertions.
-
-// Tests unary predicate assertions that don't use a custom formatter.
-TEST(Pred1Test, WithoutFormat) {
-  // Success cases.
-  EXPECT_PRED1(IsEvenFunctor(), 2) << "This failure is UNEXPECTED!";
-  ASSERT_PRED1(IsEven, 4);
-
-  // Failure cases.
-  EXPECT_NONFATAL_FAILURE(
-      {  // NOLINT
-        EXPECT_PRED1(IsEven, 5) << "This failure is expected.";
-      },
-      "This failure is expected.");
-  EXPECT_FATAL_FAILURE(ASSERT_PRED1(IsEvenFunctor(), 5), "evaluates to false");
-}
-
-// Tests unary predicate assertions that use a custom formatter.
-TEST(Pred1Test, WithFormat) {
-  // Success cases.
-  EXPECT_PRED_FORMAT1(AssertIsEven, 2);
-  ASSERT_PRED_FORMAT1(AssertIsEvenFunctor(), 4)
-      << "This failure is UNEXPECTED!";
-
-  // Failure cases.
-  const int n = 5;
-  EXPECT_NONFATAL_FAILURE(EXPECT_PRED_FORMAT1(AssertIsEvenFunctor(), n),
-                          "n evaluates to 5, which is not even.");
-  EXPECT_FATAL_FAILURE(
-      {  // NOLINT
-        ASSERT_PRED_FORMAT1(AssertIsEven, 5) << "This failure is expected.";
-      },
-      "This failure is expected.");
-}
-
-// Tests that unary predicate assertions evaluates their arguments
-// exactly once.
-TEST(Pred1Test, SingleEvaluationOnFailure) {
-  // A success case.
-  static int n = 0;
-  EXPECT_PRED1(IsEven, n++);
-  EXPECT_EQ(1, n) << "The argument is not evaluated exactly once.";
-
-  // A failure case.
-  EXPECT_FATAL_FAILURE(
-      {  // NOLINT
-        ASSERT_PRED_FORMAT1(AssertIsEvenFunctor(), n++)
-            << "This failure is expected.";
-      },
-      "This failure is expected.");
-  EXPECT_EQ(2, n) << "The argument is not evaluated exactly once.";
-}
-
-// Tests predicate assertions whose arity is >= 2.
-
-// Tests predicate assertions that don't use a custom formatter.
-TEST(PredTest, WithoutFormat) {
-  // Success cases.
-  ASSERT_PRED2(SumIsEven2, 2, 4) << "This failure is UNEXPECTED!";
-  EXPECT_PRED3(SumIsEven3Functor(), 4, 6, 8);
-
-  // Failure cases.
-  const int n1 = 1;
-  const int n2 = 2;
-  EXPECT_NONFATAL_FAILURE(
-      {  // NOLINT
-        EXPECT_PRED2(SumIsEven2, n1, n2) << "This failure is expected.";
-      },
-      "This failure is expected.");
-  EXPECT_FATAL_FAILURE(
-      {  // NOLINT
-        ASSERT_PRED3(SumIsEven3Functor(), 1, 2, 4);
-      },
-      "evaluates to false");
-}
-
-// Tests predicate assertions that use a custom formatter.
-TEST(PredTest, WithFormat) {
-  // Success cases.
-  ASSERT_PRED_FORMAT4(AssertSumIsEven4, 4, 6, 8, 10)
-      << "This failure is UNEXPECTED!";
-  EXPECT_PRED_FORMAT5(AssertSumIsEven5Functor(), 2, 4, 6, 8, 10);
-
-  // Failure cases.
-  const int n1 = 1;
-  const int n2 = 2;
-  const int n3 = 4;
-  const int n4 = 6;
-  EXPECT_NONFATAL_FAILURE(
-      {  // NOLINT
-        EXPECT_PRED_FORMAT4(AssertSumIsEven4, n1, n2, n3, n4);
-      },
-      "evaluates to 13, which is not even.");
-  EXPECT_FATAL_FAILURE(
-      {  // NOLINT
-        ASSERT_PRED_FORMAT5(AssertSumIsEven5Functor(), 1, 2, 4, 6, 8)
-            << "This failure is expected.";
-      },
-      "This failure is expected.");
-}
-
-// Tests that predicate assertions evaluates their arguments
-// exactly once.
-TEST(PredTest, SingleEvaluationOnFailure) {
-  // A success case.
-  int n1 = 0;
-  int n2 = 0;
-  EXPECT_PRED2(SumIsEven2, n1++, n2++);
-  EXPECT_EQ(1, n1) << "Argument 1 is not evaluated exactly once.";
-  EXPECT_EQ(1, n2) << "Argument 2 is not evaluated exactly once.";
-
-  // Another success case.
-  n1 = n2 = 0;
-  int n3 = 0;
-  int n4 = 0;
-  int n5 = 0;
-  ASSERT_PRED_FORMAT5(AssertSumIsEven5Functor(), n1++, n2++, n3++, n4++, n5++)
-      << "This failure is UNEXPECTED!";
-  EXPECT_EQ(1, n1) << "Argument 1 is not evaluated exactly once.";
-  EXPECT_EQ(1, n2) << "Argument 2 is not evaluated exactly once.";
-  EXPECT_EQ(1, n3) << "Argument 3 is not evaluated exactly once.";
-  EXPECT_EQ(1, n4) << "Argument 4 is not evaluated exactly once.";
-  EXPECT_EQ(1, n5) << "Argument 5 is not evaluated exactly once.";
-
-  // A failure case.
-  n1 = n2 = n3 = 0;
-  EXPECT_NONFATAL_FAILURE(
-      {  // NOLINT
-        EXPECT_PRED3(SumIsEven3Functor(), ++n1, n2++, n3++)
-            << "This failure is expected.";
-      },
-      "This failure is expected.");
-  EXPECT_EQ(1, n1) << "Argument 1 is not evaluated exactly once.";
-  EXPECT_EQ(1, n2) << "Argument 2 is not evaluated exactly once.";
-  EXPECT_EQ(1, n3) << "Argument 3 is not evaluated exactly once.";
-
-  // Another failure case.
-  n1 = n2 = n3 = n4 = 0;
-  EXPECT_NONFATAL_FAILURE(
-      {  // NOLINT
-        EXPECT_PRED_FORMAT4(AssertSumIsEven4, ++n1, n2++, n3++, n4++);
-      },
-      "evaluates to 1, which is not even.");
-  EXPECT_EQ(1, n1) << "Argument 1 is not evaluated exactly once.";
-  EXPECT_EQ(1, n2) << "Argument 2 is not evaluated exactly once.";
-  EXPECT_EQ(1, n3) << "Argument 3 is not evaluated exactly once.";
-  EXPECT_EQ(1, n4) << "Argument 4 is not evaluated exactly once.";
-}
-
-// Test predicate assertions for sets
-TEST(PredTest, ExpectPredEvalFailure) {
-  std::set<int> set_a = {2, 1, 3, 4, 5};
-  std::set<int> set_b = {0, 4, 8};
-  const auto compare_sets = [](std::set<int>, std::set<int>) { return false; };
-  EXPECT_NONFATAL_FAILURE(
-      EXPECT_PRED2(compare_sets, set_a, set_b),
-      "compare_sets(set_a, set_b) evaluates to false, where\nset_a evaluates "
-      "to { 1, 2, 3, 4, 5 }\nset_b evaluates to { 0, 4, 8 }");
-}
-
-// Some helper functions for testing using overloaded/template
-// functions with ASSERT_PREDn and EXPECT_PREDn.
-
-bool IsPositive(double x) { return x > 0; }
-
-template <typename T>
-bool IsNegative(T x) {
-  return x < 0;
-}
-
-template <typename T1, typename T2>
-bool GreaterThan(T1 x1, T2 x2) {
-  return x1 > x2;
-}
-
-// Tests that overloaded functions can be used in *_PRED* as long as
-// their types are explicitly specified.
-TEST(PredicateAssertionTest, AcceptsOverloadedFunction) {
-  // C++Builder requires C-style casts rather than static_cast.
-  EXPECT_PRED1((bool (*)(int))(IsPositive), 5);       // NOLINT
-  ASSERT_PRED1((bool (*)(double))(IsPositive), 6.0);  // NOLINT
-}
-
-// Tests that template functions can be used in *_PRED* as long as
-// their types are explicitly specified.
-TEST(PredicateAssertionTest, AcceptsTemplateFunction) {
-  EXPECT_PRED1(IsNegative<int>, -5);
-  // Makes sure that we can handle templates with more than one
-  // parameter.
-  ASSERT_PRED2((GreaterThan<int, int>), 5, 0);
-}
-
-// Some helper functions for testing using overloaded/template
-// functions with ASSERT_PRED_FORMATn and EXPECT_PRED_FORMATn.
-
-AssertionResult IsPositiveFormat(const char* /* expr */, int n) {
-  return n > 0 ? AssertionSuccess() : AssertionFailure(Message() << "Failure");
-}
-
-AssertionResult IsPositiveFormat(const char* /* expr */, double x) {
-  return x > 0 ? AssertionSuccess() : AssertionFailure(Message() << "Failure");
-}
-
-template <typename T>
-AssertionResult IsNegativeFormat(const char* /* expr */, T x) {
-  return x < 0 ? AssertionSuccess() : AssertionFailure(Message() << "Failure");
-}
-
-template <typename T1, typename T2>
-AssertionResult EqualsFormat(const char* /* expr1 */, const char* /* expr2 */,
-                             const T1& x1, const T2& x2) {
-  return x1 == x2 ? AssertionSuccess()
-                  : AssertionFailure(Message() << "Failure");
-}
-
-// Tests that overloaded functions can be used in *_PRED_FORMAT*
-// without explicitly specifying their types.
-TEST(PredicateFormatAssertionTest, AcceptsOverloadedFunction) {
-  EXPECT_PRED_FORMAT1(IsPositiveFormat, 5);
-  ASSERT_PRED_FORMAT1(IsPositiveFormat, 6.0);
-}
-
-// Tests that template functions can be used in *_PRED_FORMAT* without
-// explicitly specifying their types.
-TEST(PredicateFormatAssertionTest, AcceptsTemplateFunction) {
-  EXPECT_PRED_FORMAT1(IsNegativeFormat, -5);
-  ASSERT_PRED_FORMAT2(EqualsFormat, 3, 3);
-}
-
-// Tests string assertions.
-
-// Tests ASSERT_STREQ with non-NULL arguments.
-TEST(StringAssertionTest, ASSERT_STREQ) {
-  const char* const p1 = "good";
-  ASSERT_STREQ(p1, p1);
-
-  // Let p2 have the same content as p1, but be at a different address.
-  const char p2[] = "good";
-  ASSERT_STREQ(p1, p2);
-
-  EXPECT_FATAL_FAILURE(ASSERT_STREQ("bad", "good"), "  \"bad\"\n  \"good\"");
-}
-
-// Tests ASSERT_STREQ with NULL arguments.
-TEST(StringAssertionTest, ASSERT_STREQ_Null) {
-  ASSERT_STREQ(static_cast<const char*>(nullptr), nullptr);
-  EXPECT_FATAL_FAILURE(ASSERT_STREQ(nullptr, "non-null"), "non-null");
-}
-
-// Tests ASSERT_STREQ with NULL arguments.
-TEST(StringAssertionTest, ASSERT_STREQ_Null2) {
-  EXPECT_FATAL_FAILURE(ASSERT_STREQ("non-null", nullptr), "non-null");
-}
-
-// Tests ASSERT_STRNE.
-TEST(StringAssertionTest, ASSERT_STRNE) {
-  ASSERT_STRNE("hi", "Hi");
-  ASSERT_STRNE("Hi", nullptr);
-  ASSERT_STRNE(nullptr, "Hi");
-  ASSERT_STRNE("", nullptr);
-  ASSERT_STRNE(nullptr, "");
-  ASSERT_STRNE("", "Hi");
-  ASSERT_STRNE("Hi", "");
-  EXPECT_FATAL_FAILURE(ASSERT_STRNE("Hi", "Hi"), "\"Hi\" vs \"Hi\"");
-}
-
-// Tests ASSERT_STRCASEEQ.
-TEST(StringAssertionTest, ASSERT_STRCASEEQ) {
-  ASSERT_STRCASEEQ("hi", "Hi");
-  ASSERT_STRCASEEQ(static_cast<const char*>(nullptr), nullptr);
-
-  ASSERT_STRCASEEQ("", "");
-  EXPECT_FATAL_FAILURE(ASSERT_STRCASEEQ("Hi", "hi2"), "Ignoring case");
-}
-
-// Tests ASSERT_STRCASENE.
-TEST(StringAssertionTest, ASSERT_STRCASENE) {
-  ASSERT_STRCASENE("hi1", "Hi2");
-  ASSERT_STRCASENE("Hi", nullptr);
-  ASSERT_STRCASENE(nullptr, "Hi");
-  ASSERT_STRCASENE("", nullptr);
-  ASSERT_STRCASENE(nullptr, "");
-  ASSERT_STRCASENE("", "Hi");
-  ASSERT_STRCASENE("Hi", "");
-  EXPECT_FATAL_FAILURE(ASSERT_STRCASENE("Hi", "hi"), "(ignoring case)");
-}
-
-// Tests *_STREQ on wide strings.
-TEST(StringAssertionTest, STREQ_Wide) {
-  // NULL strings.
-  ASSERT_STREQ(static_cast<const wchar_t*>(nullptr), nullptr);
-
-  // Empty strings.
-  ASSERT_STREQ(L"", L"");
-
-  // Non-null vs NULL.
-  EXPECT_NONFATAL_FAILURE(EXPECT_STREQ(L"non-null", nullptr), "non-null");
-
-  // Equal strings.
-  EXPECT_STREQ(L"Hi", L"Hi");
-
-  // Unequal strings.
-  EXPECT_NONFATAL_FAILURE(EXPECT_STREQ(L"abc", L"Abc"), "Abc");
-
-  // Strings containing wide characters.
-  EXPECT_NONFATAL_FAILURE(EXPECT_STREQ(L"abc\x8119", L"abc\x8120"), "abc");
-
-  // The streaming variation.
-  EXPECT_NONFATAL_FAILURE(
-      {  // NOLINT
-        EXPECT_STREQ(L"abc\x8119", L"abc\x8121") << "Expected failure";
-      },
-      "Expected failure");
-}
-
-// Tests *_STRNE on wide strings.
-TEST(StringAssertionTest, STRNE_Wide) {
-  // NULL strings.
-  EXPECT_NONFATAL_FAILURE(
-      {  // NOLINT
-        EXPECT_STRNE(static_cast<const wchar_t*>(nullptr), nullptr);
-      },
-      "");
-
-  // Empty strings.
-  EXPECT_NONFATAL_FAILURE(EXPECT_STRNE(L"", L""), "L\"\"");
-
-  // Non-null vs NULL.
-  ASSERT_STRNE(L"non-null", nullptr);
-
-  // Equal strings.
-  EXPECT_NONFATAL_FAILURE(EXPECT_STRNE(L"Hi", L"Hi"), "L\"Hi\"");
-
-  // Unequal strings.
-  EXPECT_STRNE(L"abc", L"Abc");
-
-  // Strings containing wide characters.
-  EXPECT_NONFATAL_FAILURE(EXPECT_STRNE(L"abc\x8119", L"abc\x8119"), "abc");
-
-  // The streaming variation.
-  ASSERT_STRNE(L"abc\x8119", L"abc\x8120") << "This shouldn't happen";
-}
-
-// Tests for ::testing::IsSubstring().
-
-// Tests that IsSubstring() returns the correct result when the input
-// argument type is const char*.
-TEST(IsSubstringTest, ReturnsCorrectResultForCString) {
-  EXPECT_FALSE(IsSubstring("", "", nullptr, "a"));
-  EXPECT_FALSE(IsSubstring("", "", "b", nullptr));
-  EXPECT_FALSE(IsSubstring("", "", "needle", "haystack"));
-
-  EXPECT_TRUE(IsSubstring("", "", static_cast<const char*>(nullptr), nullptr));
-  EXPECT_TRUE(IsSubstring("", "", "needle", "two needles"));
-}
-
-// Tests that IsSubstring() returns the correct result when the input
-// argument type is const wchar_t*.
-TEST(IsSubstringTest, ReturnsCorrectResultForWideCString) {
-  EXPECT_FALSE(IsSubstring("", "", kNull, L"a"));
-  EXPECT_FALSE(IsSubstring("", "", L"b", kNull));
-  EXPECT_FALSE(IsSubstring("", "", L"needle", L"haystack"));
-
-  EXPECT_TRUE(
-      IsSubstring("", "", static_cast<const wchar_t*>(nullptr), nullptr));
-  EXPECT_TRUE(IsSubstring("", "", L"needle", L"two needles"));
-}
-
-// Tests that IsSubstring() generates the correct message when the input
-// argument type is const char*.
-TEST(IsSubstringTest, GeneratesCorrectMessageForCString) {
-  EXPECT_STREQ(
-      "Value of: needle_expr\n"
-      "  Actual: \"needle\"\n"
-      "Expected: a substring of haystack_expr\n"
-      "Which is: \"haystack\"",
-      IsSubstring("needle_expr", "haystack_expr", "needle", "haystack")
-          .failure_message());
-}
-
-// Tests that IsSubstring returns the correct result when the input
-// argument type is ::std::string.
-TEST(IsSubstringTest, ReturnsCorrectResultsForStdString) {
-  EXPECT_TRUE(IsSubstring("", "", std::string("hello"), "ahellob"));
-  EXPECT_FALSE(IsSubstring("", "", "hello", std::string("world")));
-}
-
-#if GTEST_HAS_STD_WSTRING
-// Tests that IsSubstring returns the correct result when the input
-// argument type is ::std::wstring.
-TEST(IsSubstringTest, ReturnsCorrectResultForStdWstring) {
-  EXPECT_TRUE(IsSubstring("", "", ::std::wstring(L"needle"), L"two needles"));
-  EXPECT_FALSE(IsSubstring("", "", L"needle", ::std::wstring(L"haystack")));
-}
-
-// Tests that IsSubstring() generates the correct message when the input
-// argument type is ::std::wstring.
-TEST(IsSubstringTest, GeneratesCorrectMessageForWstring) {
-  EXPECT_STREQ(
-      "Value of: needle_expr\n"
-      "  Actual: L\"needle\"\n"
-      "Expected: a substring of haystack_expr\n"
-      "Which is: L\"haystack\"",
-      IsSubstring("needle_expr", "haystack_expr", ::std::wstring(L"needle"),
-                  L"haystack")
-          .failure_message());
-}
-
-#endif  // GTEST_HAS_STD_WSTRING
-
-// Tests for ::testing::IsNotSubstring().
-
-// Tests that IsNotSubstring() returns the correct result when the input
-// argument type is const char*.
-TEST(IsNotSubstringTest, ReturnsCorrectResultForCString) {
-  EXPECT_TRUE(IsNotSubstring("", "", "needle", "haystack"));
-  EXPECT_FALSE(IsNotSubstring("", "", "needle", "two needles"));
-}
-
-// Tests that IsNotSubstring() returns the correct result when the input
-// argument type is const wchar_t*.
-TEST(IsNotSubstringTest, ReturnsCorrectResultForWideCString) {
-  EXPECT_TRUE(IsNotSubstring("", "", L"needle", L"haystack"));
-  EXPECT_FALSE(IsNotSubstring("", "", L"needle", L"two needles"));
-}
-
-// Tests that IsNotSubstring() generates the correct message when the input
-// argument type is const wchar_t*.
-TEST(IsNotSubstringTest, GeneratesCorrectMessageForWideCString) {
-  EXPECT_STREQ(
-      "Value of: needle_expr\n"
-      "  Actual: L\"needle\"\n"
-      "Expected: not a substring of haystack_expr\n"
-      "Which is: L\"two needles\"",
-      IsNotSubstring("needle_expr", "haystack_expr", L"needle", L"two needles")
-          .failure_message());
-}
-
-// Tests that IsNotSubstring returns the correct result when the input
-// argument type is ::std::string.
-TEST(IsNotSubstringTest, ReturnsCorrectResultsForStdString) {
-  EXPECT_FALSE(IsNotSubstring("", "", std::string("hello"), "ahellob"));
-  EXPECT_TRUE(IsNotSubstring("", "", "hello", std::string("world")));
-}
-
-// Tests that IsNotSubstring() generates the correct message when the input
-// argument type is ::std::string.
-TEST(IsNotSubstringTest, GeneratesCorrectMessageForStdString) {
-  EXPECT_STREQ(
-      "Value of: needle_expr\n"
-      "  Actual: \"needle\"\n"
-      "Expected: not a substring of haystack_expr\n"
-      "Which is: \"two needles\"",
-      IsNotSubstring("needle_expr", "haystack_expr", ::std::string("needle"),
-                     "two needles")
-          .failure_message());
-}
-
-#if GTEST_HAS_STD_WSTRING
-
-// Tests that IsNotSubstring returns the correct result when the input
-// argument type is ::std::wstring.
-TEST(IsNotSubstringTest, ReturnsCorrectResultForStdWstring) {
-  EXPECT_FALSE(
-      IsNotSubstring("", "", ::std::wstring(L"needle"), L"two needles"));
-  EXPECT_TRUE(IsNotSubstring("", "", L"needle", ::std::wstring(L"haystack")));
-}
-
-#endif  // GTEST_HAS_STD_WSTRING
-
-// Tests floating-point assertions.
-
-template <typename RawType>
-class FloatingPointTest : public Test {
- protected:
-  // Pre-calculated numbers to be used by the tests.
-  struct TestValues {
-    RawType close_to_positive_zero;
-    RawType close_to_negative_zero;
-    RawType further_from_negative_zero;
-
-    RawType close_to_one;
-    RawType further_from_one;
-
-    RawType infinity;
-    RawType close_to_infinity;
-    RawType further_from_infinity;
-
-    RawType nan1;
-    RawType nan2;
-  };
-
-  typedef typename testing::internal::FloatingPoint<RawType> Floating;
-  typedef typename Floating::Bits Bits;
-
-  void SetUp() override {
-    const uint32_t max_ulps = Floating::kMaxUlps;
-
-    // The bits that represent 0.0.
-    const Bits zero_bits = Floating(0).bits();
-
-    // Makes some numbers close to 0.0.
-    values_.close_to_positive_zero =
-        Floating::ReinterpretBits(zero_bits + max_ulps / 2);
-    values_.close_to_negative_zero =
-        -Floating::ReinterpretBits(zero_bits + max_ulps - max_ulps / 2);
-    values_.further_from_negative_zero =
-        -Floating::ReinterpretBits(zero_bits + max_ulps + 1 - max_ulps / 2);
-
-    // The bits that represent 1.0.
-    const Bits one_bits = Floating(1).bits();
-
-    // Makes some numbers close to 1.0.
-    values_.close_to_one = Floating::ReinterpretBits(one_bits + max_ulps);
-    values_.further_from_one =
-        Floating::ReinterpretBits(one_bits + max_ulps + 1);
-
-    // +infinity.
-    values_.infinity = Floating::Infinity();
-
-    // The bits that represent +infinity.
-    const Bits infinity_bits = Floating(values_.infinity).bits();
-
-    // Makes some numbers close to infinity.
-    values_.close_to_infinity =
-        Floating::ReinterpretBits(infinity_bits - max_ulps);
-    values_.further_from_infinity =
-        Floating::ReinterpretBits(infinity_bits - max_ulps - 1);
-
-    // Makes some NAN's.  Sets the most significant bit of the fraction so that
-    // our NaN's are quiet; trying to process a signaling NaN would raise an
-    // exception if our environment enables floating point exceptions.
-    values_.nan1 = Floating::ReinterpretBits(
-        Floating::kExponentBitMask |
-        (static_cast<Bits>(1) << (Floating::kFractionBitCount - 1)) | 1);
-    values_.nan2 = Floating::ReinterpretBits(
-        Floating::kExponentBitMask |
-        (static_cast<Bits>(1) << (Floating::kFractionBitCount - 1)) | 200);
-  }
-
-  void TestSize() { EXPECT_EQ(sizeof(RawType), sizeof(Bits)); }
-
-  static TestValues values_;
-};
-
-template <typename RawType>
-typename FloatingPointTest<RawType>::TestValues
-    FloatingPointTest<RawType>::values_;
-
-// Instantiates FloatingPointTest for testing *_FLOAT_EQ.
-typedef FloatingPointTest<float> FloatTest;
-
-// Tests that the size of Float::Bits matches the size of float.
-TEST_F(FloatTest, Size) { TestSize(); }
-
-// Tests comparing with +0 and -0.
-TEST_F(FloatTest, Zeros) {
-  EXPECT_FLOAT_EQ(0.0, -0.0);
-  EXPECT_NONFATAL_FAILURE(EXPECT_FLOAT_EQ(-0.0, 1.0), "1.0");
-  EXPECT_FATAL_FAILURE(ASSERT_FLOAT_EQ(0.0, 1.5), "1.5");
-}
-
-// Tests comparing numbers close to 0.
-//
-// This ensures that *_FLOAT_EQ handles the sign correctly and no
-// overflow occurs when comparing numbers whose absolute value is very
-// small.
-TEST_F(FloatTest, AlmostZeros) {
-  // In C++Builder, names within local classes (such as used by
-  // EXPECT_FATAL_FAILURE) cannot be resolved against static members of the
-  // scoping class.  Use a static local alias as a workaround.
-  // We use the assignment syntax since some compilers, like Sun Studio,
-  // don't allow initializing references using construction syntax
-  // (parentheses).
-  static const FloatTest::TestValues& v = this->values_;
-
-  EXPECT_FLOAT_EQ(0.0, v.close_to_positive_zero);
-  EXPECT_FLOAT_EQ(-0.0, v.close_to_negative_zero);
-  EXPECT_FLOAT_EQ(v.close_to_positive_zero, v.close_to_negative_zero);
-
-  EXPECT_FATAL_FAILURE(
-      {  // NOLINT
-        ASSERT_FLOAT_EQ(v.close_to_positive_zero, v.further_from_negative_zero);
-      },
-      "v.further_from_negative_zero");
-}
-
-// Tests comparing numbers close to each other.
-TEST_F(FloatTest, SmallDiff) {
-  EXPECT_FLOAT_EQ(1.0, values_.close_to_one);
-  EXPECT_NONFATAL_FAILURE(EXPECT_FLOAT_EQ(1.0, values_.further_from_one),
-                          "values_.further_from_one");
-}
-
-// Tests comparing numbers far apart.
-TEST_F(FloatTest, LargeDiff) {
-  EXPECT_NONFATAL_FAILURE(EXPECT_FLOAT_EQ(2.5, 3.0), "3.0");
-}
-
-// Tests comparing with infinity.
-//
-// This ensures that no overflow occurs when comparing numbers whose
-// absolute value is very large.
-TEST_F(FloatTest, Infinity) {
-  EXPECT_FLOAT_EQ(values_.infinity, values_.close_to_infinity);
-  EXPECT_FLOAT_EQ(-values_.infinity, -values_.close_to_infinity);
-  EXPECT_NONFATAL_FAILURE(EXPECT_FLOAT_EQ(values_.infinity, -values_.infinity),
-                          "-values_.infinity");
-
-  // This is interesting as the representations of infinity and nan1
-  // are only 1 DLP apart.
-  EXPECT_NONFATAL_FAILURE(EXPECT_FLOAT_EQ(values_.infinity, values_.nan1),
-                          "values_.nan1");
-}
-
-// Tests that comparing with NAN always returns false.
-TEST_F(FloatTest, NaN) {
-  // In C++Builder, names within local classes (such as used by
-  // EXPECT_FATAL_FAILURE) cannot be resolved against static members of the
-  // scoping class.  Use a static local alias as a workaround.
-  // We use the assignment syntax since some compilers, like Sun Studio,
-  // don't allow initializing references using construction syntax
-  // (parentheses).
-  static const FloatTest::TestValues& v = this->values_;
-
-  EXPECT_NONFATAL_FAILURE(EXPECT_FLOAT_EQ(v.nan1, v.nan1), "v.nan1");
-  EXPECT_NONFATAL_FAILURE(EXPECT_FLOAT_EQ(v.nan1, v.nan2), "v.nan2");
-  EXPECT_NONFATAL_FAILURE(EXPECT_FLOAT_EQ(1.0, v.nan1), "v.nan1");
-
-  EXPECT_FATAL_FAILURE(ASSERT_FLOAT_EQ(v.nan1, v.infinity), "v.infinity");
-}
-
-// Tests that *_FLOAT_EQ are reflexive.
-TEST_F(FloatTest, Reflexive) {
-  EXPECT_FLOAT_EQ(0.0, 0.0);
-  EXPECT_FLOAT_EQ(1.0, 1.0);
-  ASSERT_FLOAT_EQ(values_.infinity, values_.infinity);
-}
-
-// Tests that *_FLOAT_EQ are commutative.
-TEST_F(FloatTest, Commutative) {
-  // We already tested EXPECT_FLOAT_EQ(1.0, values_.close_to_one).
-  EXPECT_FLOAT_EQ(values_.close_to_one, 1.0);
-
-  // We already tested EXPECT_FLOAT_EQ(1.0, values_.further_from_one).
-  EXPECT_NONFATAL_FAILURE(EXPECT_FLOAT_EQ(values_.further_from_one, 1.0),
-                          "1.0");
-}
-
-// Tests EXPECT_NEAR.
-TEST_F(FloatTest, EXPECT_NEAR) {
-  EXPECT_NEAR(-1.0f, -1.1f, 0.2f);
-  EXPECT_NEAR(2.0f, 3.0f, 1.0f);
-  EXPECT_NONFATAL_FAILURE(EXPECT_NEAR(1.0f, 1.5f, 0.25f),  // NOLINT
-                          "The difference between 1.0f and 1.5f is 0.5, "
-                          "which exceeds 0.25f");
-}
-
-// Tests ASSERT_NEAR.
-TEST_F(FloatTest, ASSERT_NEAR) {
-  ASSERT_NEAR(-1.0f, -1.1f, 0.2f);
-  ASSERT_NEAR(2.0f, 3.0f, 1.0f);
-  EXPECT_FATAL_FAILURE(ASSERT_NEAR(1.0f, 1.5f, 0.25f),  // NOLINT
-                       "The difference between 1.0f and 1.5f is 0.5, "
-                       "which exceeds 0.25f");
-}
-
-// Tests the cases where FloatLE() should succeed.
-TEST_F(FloatTest, FloatLESucceeds) {
-  EXPECT_PRED_FORMAT2(FloatLE, 1.0f, 2.0f);  // When val1 < val2,
-  ASSERT_PRED_FORMAT2(FloatLE, 1.0f, 1.0f);  // val1 == val2,
-
-  // or when val1 is greater than, but almost equals to, val2.
-  EXPECT_PRED_FORMAT2(FloatLE, values_.close_to_positive_zero, 0.0f);
-}
-
-// Tests the cases where FloatLE() should fail.
-TEST_F(FloatTest, FloatLEFails) {
-  // When val1 is greater than val2 by a large margin,
-  EXPECT_NONFATAL_FAILURE(EXPECT_PRED_FORMAT2(FloatLE, 2.0f, 1.0f),
-                          "(2.0f) <= (1.0f)");
-
-  // or by a small yet non-negligible margin,
-  EXPECT_NONFATAL_FAILURE(
-      {  // NOLINT
-        EXPECT_PRED_FORMAT2(FloatLE, values_.further_from_one, 1.0f);
-      },
-      "(values_.further_from_one) <= (1.0f)");
-
-  EXPECT_NONFATAL_FAILURE(
-      {  // NOLINT
-        EXPECT_PRED_FORMAT2(FloatLE, values_.nan1, values_.infinity);
-      },
-      "(values_.nan1) <= (values_.infinity)");
-  EXPECT_NONFATAL_FAILURE(
-      {  // NOLINT
-        EXPECT_PRED_FORMAT2(FloatLE, -values_.infinity, values_.nan1);
-      },
-      "(-values_.infinity) <= (values_.nan1)");
-  EXPECT_FATAL_FAILURE(
-      {  // NOLINT
-        ASSERT_PRED_FORMAT2(FloatLE, values_.nan1, values_.nan1);
-      },
-      "(values_.nan1) <= (values_.nan1)");
-}
-
-// Instantiates FloatingPointTest for testing *_DOUBLE_EQ.
-typedef FloatingPointTest<double> DoubleTest;
-
-// Tests that the size of Double::Bits matches the size of double.
-TEST_F(DoubleTest, Size) { TestSize(); }
-
-// Tests comparing with +0 and -0.
-TEST_F(DoubleTest, Zeros) {
-  EXPECT_DOUBLE_EQ(0.0, -0.0);
-  EXPECT_NONFATAL_FAILURE(EXPECT_DOUBLE_EQ(-0.0, 1.0), "1.0");
-  EXPECT_FATAL_FAILURE(ASSERT_DOUBLE_EQ(0.0, 1.0), "1.0");
-}
-
-// Tests comparing numbers close to 0.
-//
-// This ensures that *_DOUBLE_EQ handles the sign correctly and no
-// overflow occurs when comparing numbers whose absolute value is very
-// small.
-TEST_F(DoubleTest, AlmostZeros) {
-  // In C++Builder, names within local classes (such as used by
-  // EXPECT_FATAL_FAILURE) cannot be resolved against static members of the
-  // scoping class.  Use a static local alias as a workaround.
-  // We use the assignment syntax since some compilers, like Sun Studio,
-  // don't allow initializing references using construction syntax
-  // (parentheses).
-  static const DoubleTest::TestValues& v = this->values_;
-
-  EXPECT_DOUBLE_EQ(0.0, v.close_to_positive_zero);
-  EXPECT_DOUBLE_EQ(-0.0, v.close_to_negative_zero);
-  EXPECT_DOUBLE_EQ(v.close_to_positive_zero, v.close_to_negative_zero);
-
-  EXPECT_FATAL_FAILURE(
-      {  // NOLINT
-        ASSERT_DOUBLE_EQ(v.close_to_positive_zero,
-                         v.further_from_negative_zero);
-      },
-      "v.further_from_negative_zero");
-}
-
-// Tests comparing numbers close to each other.
-TEST_F(DoubleTest, SmallDiff) {
-  EXPECT_DOUBLE_EQ(1.0, values_.close_to_one);
-  EXPECT_NONFATAL_FAILURE(EXPECT_DOUBLE_EQ(1.0, values_.further_from_one),
-                          "values_.further_from_one");
-}
-
-// Tests comparing numbers far apart.
-TEST_F(DoubleTest, LargeDiff) {
-  EXPECT_NONFATAL_FAILURE(EXPECT_DOUBLE_EQ(2.0, 3.0), "3.0");
-}
-
-// Tests comparing with infinity.
-//
-// This ensures that no overflow occurs when comparing numbers whose
-// absolute value is very large.
-TEST_F(DoubleTest, Infinity) {
-  EXPECT_DOUBLE_EQ(values_.infinity, values_.close_to_infinity);
-  EXPECT_DOUBLE_EQ(-values_.infinity, -values_.close_to_infinity);
-  EXPECT_NONFATAL_FAILURE(EXPECT_DOUBLE_EQ(values_.infinity, -values_.infinity),
-                          "-values_.infinity");
-
-  // This is interesting as the representations of infinity_ and nan1_
-  // are only 1 DLP apart.
-  EXPECT_NONFATAL_FAILURE(EXPECT_DOUBLE_EQ(values_.infinity, values_.nan1),
-                          "values_.nan1");
-}
-
-// Tests that comparing with NAN always returns false.
-TEST_F(DoubleTest, NaN) {
-  static const DoubleTest::TestValues& v = this->values_;
-
-  // Nokia's STLport crashes if we try to output infinity or NaN.
-  EXPECT_NONFATAL_FAILURE(EXPECT_DOUBLE_EQ(v.nan1, v.nan1), "v.nan1");
-  EXPECT_NONFATAL_FAILURE(EXPECT_DOUBLE_EQ(v.nan1, v.nan2), "v.nan2");
-  EXPECT_NONFATAL_FAILURE(EXPECT_DOUBLE_EQ(1.0, v.nan1), "v.nan1");
-  EXPECT_FATAL_FAILURE(ASSERT_DOUBLE_EQ(v.nan1, v.infinity), "v.infinity");
-}
-
-// Tests that *_DOUBLE_EQ are reflexive.
-TEST_F(DoubleTest, Reflexive) {
-  EXPECT_DOUBLE_EQ(0.0, 0.0);
-  EXPECT_DOUBLE_EQ(1.0, 1.0);
-  ASSERT_DOUBLE_EQ(values_.infinity, values_.infinity);
-}
-
-// Tests that *_DOUBLE_EQ are commutative.
-TEST_F(DoubleTest, Commutative) {
-  // We already tested EXPECT_DOUBLE_EQ(1.0, values_.close_to_one).
-  EXPECT_DOUBLE_EQ(values_.close_to_one, 1.0);
-
-  // We already tested EXPECT_DOUBLE_EQ(1.0, values_.further_from_one).
-  EXPECT_NONFATAL_FAILURE(EXPECT_DOUBLE_EQ(values_.further_from_one, 1.0),
-                          "1.0");
-}
-
-// Tests EXPECT_NEAR.
-TEST_F(DoubleTest, EXPECT_NEAR) {
-  EXPECT_NEAR(-1.0, -1.1, 0.2);
-  EXPECT_NEAR(2.0, 3.0, 1.0);
-  EXPECT_NONFATAL_FAILURE(EXPECT_NEAR(1.0, 1.5, 0.25),  // NOLINT
-                          "The difference between 1.0 and 1.5 is 0.5, "
-                          "which exceeds 0.25");
-  // At this magnitude adjacent doubles are 512.0 apart, so this triggers a
-  // slightly different failure reporting path.
-  EXPECT_NONFATAL_FAILURE(
-      EXPECT_NEAR(4.2934311416234112e+18, 4.2934311416234107e+18, 1.0),
-      "The abs_error parameter 1.0 evaluates to 1 which is smaller than the "
-      "minimum distance between doubles for numbers of this magnitude which is "
-      "512");
-}
-
-// Tests ASSERT_NEAR.
-TEST_F(DoubleTest, ASSERT_NEAR) {
-  ASSERT_NEAR(-1.0, -1.1, 0.2);
-  ASSERT_NEAR(2.0, 3.0, 1.0);
-  EXPECT_FATAL_FAILURE(ASSERT_NEAR(1.0, 1.5, 0.25),  // NOLINT
-                       "The difference between 1.0 and 1.5 is 0.5, "
-                       "which exceeds 0.25");
-}
-
-// Tests the cases where DoubleLE() should succeed.
-TEST_F(DoubleTest, DoubleLESucceeds) {
-  EXPECT_PRED_FORMAT2(DoubleLE, 1.0, 2.0);  // When val1 < val2,
-  ASSERT_PRED_FORMAT2(DoubleLE, 1.0, 1.0);  // val1 == val2,
-
-  // or when val1 is greater than, but almost equals to, val2.
-  EXPECT_PRED_FORMAT2(DoubleLE, values_.close_to_positive_zero, 0.0);
-}
-
-// Tests the cases where DoubleLE() should fail.
-TEST_F(DoubleTest, DoubleLEFails) {
-  // When val1 is greater than val2 by a large margin,
-  EXPECT_NONFATAL_FAILURE(EXPECT_PRED_FORMAT2(DoubleLE, 2.0, 1.0),
-                          "(2.0) <= (1.0)");
-
-  // or by a small yet non-negligible margin,
-  EXPECT_NONFATAL_FAILURE(
-      {  // NOLINT
-        EXPECT_PRED_FORMAT2(DoubleLE, values_.further_from_one, 1.0);
-      },
-      "(values_.further_from_one) <= (1.0)");
-
-  EXPECT_NONFATAL_FAILURE(
-      {  // NOLINT
-        EXPECT_PRED_FORMAT2(DoubleLE, values_.nan1, values_.infinity);
-      },
-      "(values_.nan1) <= (values_.infinity)");
-  EXPECT_NONFATAL_FAILURE(
-      {  // NOLINT
-        EXPECT_PRED_FORMAT2(DoubleLE, -values_.infinity, values_.nan1);
-      },
-      " (-values_.infinity) <= (values_.nan1)");
-  EXPECT_FATAL_FAILURE(
-      {  // NOLINT
-        ASSERT_PRED_FORMAT2(DoubleLE, values_.nan1, values_.nan1);
-      },
-      "(values_.nan1) <= (values_.nan1)");
-}
-
-// Verifies that a test or test case whose name starts with DISABLED_ is
-// not run.
-
-// A test whose name starts with DISABLED_.
-// Should not run.
-TEST(DisabledTest, DISABLED_TestShouldNotRun) {
-  FAIL() << "Unexpected failure: Disabled test should not be run.";
-}
-
-// A test whose name does not start with DISABLED_.
-// Should run.
-TEST(DisabledTest, NotDISABLED_TestShouldRun) { EXPECT_EQ(1, 1); }
-
-// A test case whose name starts with DISABLED_.
-// Should not run.
-TEST(DISABLED_TestSuite, TestShouldNotRun) {
-  FAIL() << "Unexpected failure: Test in disabled test case should not be run.";
-}
-
-// A test case and test whose names start with DISABLED_.
-// Should not run.
-TEST(DISABLED_TestSuite, DISABLED_TestShouldNotRun) {
-  FAIL() << "Unexpected failure: Test in disabled test case should not be run.";
-}
-
-// Check that when all tests in a test case are disabled, SetUpTestSuite() and
-// TearDownTestSuite() are not called.
-class DisabledTestsTest : public Test {
- protected:
-  static void SetUpTestSuite() {
-    FAIL() << "Unexpected failure: All tests disabled in test case. "
-              "SetUpTestSuite() should not be called.";
-  }
-
-  static void TearDownTestSuite() {
-    FAIL() << "Unexpected failure: All tests disabled in test case. "
-              "TearDownTestSuite() should not be called.";
-  }
-};
-
-TEST_F(DisabledTestsTest, DISABLED_TestShouldNotRun_1) {
-  FAIL() << "Unexpected failure: Disabled test should not be run.";
-}
-
-TEST_F(DisabledTestsTest, DISABLED_TestShouldNotRun_2) {
-  FAIL() << "Unexpected failure: Disabled test should not be run.";
-}
-
-// Tests that disabled typed tests aren't run.
-
-template <typename T>
-class TypedTest : public Test {};
-
-typedef testing::Types<int, double> NumericTypes;
-TYPED_TEST_SUITE(TypedTest, NumericTypes);
-
-TYPED_TEST(TypedTest, DISABLED_ShouldNotRun) {
-  FAIL() << "Unexpected failure: Disabled typed test should not run.";
-}
-
-template <typename T>
-class DISABLED_TypedTest : public Test {};
-
-TYPED_TEST_SUITE(DISABLED_TypedTest, NumericTypes);
-
-TYPED_TEST(DISABLED_TypedTest, ShouldNotRun) {
-  FAIL() << "Unexpected failure: Disabled typed test should not run.";
-}
-
-// Tests that disabled type-parameterized tests aren't run.
-
-template <typename T>
-class TypedTestP : public Test {};
-
-TYPED_TEST_SUITE_P(TypedTestP);
-
-TYPED_TEST_P(TypedTestP, DISABLED_ShouldNotRun) {
-  FAIL() << "Unexpected failure: "
-         << "Disabled type-parameterized test should not run.";
-}
-
-REGISTER_TYPED_TEST_SUITE_P(TypedTestP, DISABLED_ShouldNotRun);
-
-INSTANTIATE_TYPED_TEST_SUITE_P(My, TypedTestP, NumericTypes);
-
-template <typename T>
-class DISABLED_TypedTestP : public Test {};
-
-TYPED_TEST_SUITE_P(DISABLED_TypedTestP);
-
-TYPED_TEST_P(DISABLED_TypedTestP, ShouldNotRun) {
-  FAIL() << "Unexpected failure: "
-         << "Disabled type-parameterized test should not run.";
-}
-
-REGISTER_TYPED_TEST_SUITE_P(DISABLED_TypedTestP, ShouldNotRun);
-
-INSTANTIATE_TYPED_TEST_SUITE_P(My, DISABLED_TypedTestP, NumericTypes);
-
-// Tests that assertion macros evaluate their arguments exactly once.
-
-class SingleEvaluationTest : public Test {
- public:  // Must be public and not protected due to a bug in g++ 3.4.2.
-  // This helper function is needed by the FailedASSERT_STREQ test
-  // below.  It's public to work around C++Builder's bug with scoping local
-  // classes.
-  static void CompareAndIncrementCharPtrs() { ASSERT_STREQ(p1_++, p2_++); }
-
-  // This helper function is needed by the FailedASSERT_NE test below.  It's
-  // public to work around C++Builder's bug with scoping local classes.
-  static void CompareAndIncrementInts() { ASSERT_NE(a_++, b_++); }
-
- protected:
-  SingleEvaluationTest() {
-    p1_ = s1_;
-    p2_ = s2_;
-    a_ = 0;
-    b_ = 0;
-  }
-
-  static const char* const s1_;
-  static const char* const s2_;
-  static const char* p1_;
-  static const char* p2_;
-
-  static int a_;
-  static int b_;
-};
-
-const char* const SingleEvaluationTest::s1_ = "01234";
-const char* const SingleEvaluationTest::s2_ = "abcde";
-const char* SingleEvaluationTest::p1_;
-const char* SingleEvaluationTest::p2_;
-int SingleEvaluationTest::a_;
-int SingleEvaluationTest::b_;
-
-// Tests that when ASSERT_STREQ fails, it evaluates its arguments
-// exactly once.
-TEST_F(SingleEvaluationTest, FailedASSERT_STREQ) {
-  EXPECT_FATAL_FAILURE(SingleEvaluationTest::CompareAndIncrementCharPtrs(),
-                       "p2_++");
-  EXPECT_EQ(s1_ + 1, p1_);
-  EXPECT_EQ(s2_ + 1, p2_);
-}
-
-// Tests that string assertion arguments are evaluated exactly once.
-TEST_F(SingleEvaluationTest, ASSERT_STR) {
-  // successful EXPECT_STRNE
-  EXPECT_STRNE(p1_++, p2_++);
-  EXPECT_EQ(s1_ + 1, p1_);
-  EXPECT_EQ(s2_ + 1, p2_);
-
-  // failed EXPECT_STRCASEEQ
-  EXPECT_NONFATAL_FAILURE(EXPECT_STRCASEEQ(p1_++, p2_++), "Ignoring case");
-  EXPECT_EQ(s1_ + 2, p1_);
-  EXPECT_EQ(s2_ + 2, p2_);
-}
-
-// Tests that when ASSERT_NE fails, it evaluates its arguments exactly
-// once.
-TEST_F(SingleEvaluationTest, FailedASSERT_NE) {
-  EXPECT_FATAL_FAILURE(SingleEvaluationTest::CompareAndIncrementInts(),
-                       "(a_++) != (b_++)");
-  EXPECT_EQ(1, a_);
-  EXPECT_EQ(1, b_);
-}
-
-// Tests that assertion arguments are evaluated exactly once.
-TEST_F(SingleEvaluationTest, OtherCases) {
-  // successful EXPECT_TRUE
-  EXPECT_TRUE(0 == a_++);  // NOLINT
-  EXPECT_EQ(1, a_);
-
-  // failed EXPECT_TRUE
-  EXPECT_NONFATAL_FAILURE(EXPECT_TRUE(-1 == a_++), "-1 == a_++");
-  EXPECT_EQ(2, a_);
-
-  // successful EXPECT_GT
-  EXPECT_GT(a_++, b_++);
-  EXPECT_EQ(3, a_);
-  EXPECT_EQ(1, b_);
-
-  // failed EXPECT_LT
-  EXPECT_NONFATAL_FAILURE(EXPECT_LT(a_++, b_++), "(a_++) < (b_++)");
-  EXPECT_EQ(4, a_);
-  EXPECT_EQ(2, b_);
-
-  // successful ASSERT_TRUE
-  ASSERT_TRUE(0 < a_++);  // NOLINT
-  EXPECT_EQ(5, a_);
-
-  // successful ASSERT_GT
-  ASSERT_GT(a_++, b_++);
-  EXPECT_EQ(6, a_);
-  EXPECT_EQ(3, b_);
-}
-
-#if GTEST_HAS_EXCEPTIONS
-
-#if GTEST_HAS_RTTI
-
-#ifdef _MSC_VER
-#define ERROR_DESC "class std::runtime_error"
-#else
-#define ERROR_DESC "std::runtime_error"
-#endif
-
-#else  // GTEST_HAS_RTTI
-
-#define ERROR_DESC "an std::exception-derived error"
-
-#endif  // GTEST_HAS_RTTI
-
-void ThrowAnInteger() { throw 1; }
-void ThrowRuntimeError(const char* what) { throw std::runtime_error(what); }
-
-// Tests that assertion arguments are evaluated exactly once.
-TEST_F(SingleEvaluationTest, ExceptionTests) {
-  // successful EXPECT_THROW
-  EXPECT_THROW(
-      {  // NOLINT
-        a_++;
-        ThrowAnInteger();
-      },
-      int);
-  EXPECT_EQ(1, a_);
-
-  // failed EXPECT_THROW, throws different
-  EXPECT_NONFATAL_FAILURE(EXPECT_THROW(
-                              {  // NOLINT
-                                a_++;
-                                ThrowAnInteger();
-                              },
-                              bool),
-                          "throws a different type");
-  EXPECT_EQ(2, a_);
-
-  // failed EXPECT_THROW, throws runtime error
-  EXPECT_NONFATAL_FAILURE(EXPECT_THROW(
-                              {  // NOLINT
-                                a_++;
-                                ThrowRuntimeError("A description");
-                              },
-                              bool),
-                          "throws " ERROR_DESC
-                          " with description \"A description\"");
-  EXPECT_EQ(3, a_);
-
-  // failed EXPECT_THROW, throws nothing
-  EXPECT_NONFATAL_FAILURE(EXPECT_THROW(a_++, bool), "throws nothing");
-  EXPECT_EQ(4, a_);
-
-  // successful EXPECT_NO_THROW
-  EXPECT_NO_THROW(a_++);
-  EXPECT_EQ(5, a_);
-
-  // failed EXPECT_NO_THROW
-  EXPECT_NONFATAL_FAILURE(EXPECT_NO_THROW({  // NOLINT
-                            a_++;
-                            ThrowAnInteger();
-                          }),
-                          "it throws");
-  EXPECT_EQ(6, a_);
-
-  // successful EXPECT_ANY_THROW
-  EXPECT_ANY_THROW({  // NOLINT
-    a_++;
-    ThrowAnInteger();
-  });
-  EXPECT_EQ(7, a_);
-
-  // failed EXPECT_ANY_THROW
-  EXPECT_NONFATAL_FAILURE(EXPECT_ANY_THROW(a_++), "it doesn't");
-  EXPECT_EQ(8, a_);
-}
-
-#endif  // GTEST_HAS_EXCEPTIONS
-
-// Tests {ASSERT|EXPECT}_NO_FATAL_FAILURE.
-class NoFatalFailureTest : public Test {
- protected:
-  void Succeeds() {}
-  void FailsNonFatal() { ADD_FAILURE() << "some non-fatal failure"; }
-  void Fails() { FAIL() << "some fatal failure"; }
-
-  void DoAssertNoFatalFailureOnFails() {
-    ASSERT_NO_FATAL_FAILURE(Fails());
-    ADD_FAILURE() << "should not reach here.";
-  }
-
-  void DoExpectNoFatalFailureOnFails() {
-    EXPECT_NO_FATAL_FAILURE(Fails());
-    ADD_FAILURE() << "other failure";
-  }
-};
-
-TEST_F(NoFatalFailureTest, NoFailure) {
-  EXPECT_NO_FATAL_FAILURE(Succeeds());
-  ASSERT_NO_FATAL_FAILURE(Succeeds());
-}
-
-TEST_F(NoFatalFailureTest, NonFatalIsNoFailure) {
-  EXPECT_NONFATAL_FAILURE(EXPECT_NO_FATAL_FAILURE(FailsNonFatal()),
-                          "some non-fatal failure");
-  EXPECT_NONFATAL_FAILURE(ASSERT_NO_FATAL_FAILURE(FailsNonFatal()),
-                          "some non-fatal failure");
-}
-
-TEST_F(NoFatalFailureTest, AssertNoFatalFailureOnFatalFailure) {
-  TestPartResultArray gtest_failures;
-  {
-    ScopedFakeTestPartResultReporter gtest_reporter(&gtest_failures);
-    DoAssertNoFatalFailureOnFails();
-  }
-  ASSERT_EQ(2, gtest_failures.size());
-  EXPECT_EQ(TestPartResult::kFatalFailure,
-            gtest_failures.GetTestPartResult(0).type());
-  EXPECT_EQ(TestPartResult::kFatalFailure,
-            gtest_failures.GetTestPartResult(1).type());
-  EXPECT_PRED_FORMAT2(testing::IsSubstring, "some fatal failure",
-                      gtest_failures.GetTestPartResult(0).message());
-  EXPECT_PRED_FORMAT2(testing::IsSubstring, "it does",
-                      gtest_failures.GetTestPartResult(1).message());
-}
-
-TEST_F(NoFatalFailureTest, ExpectNoFatalFailureOnFatalFailure) {
-  TestPartResultArray gtest_failures;
-  {
-    ScopedFakeTestPartResultReporter gtest_reporter(&gtest_failures);
-    DoExpectNoFatalFailureOnFails();
-  }
-  ASSERT_EQ(3, gtest_failures.size());
-  EXPECT_EQ(TestPartResult::kFatalFailure,
-            gtest_failures.GetTestPartResult(0).type());
-  EXPECT_EQ(TestPartResult::kNonFatalFailure,
-            gtest_failures.GetTestPartResult(1).type());
-  EXPECT_EQ(TestPartResult::kNonFatalFailure,
-            gtest_failures.GetTestPartResult(2).type());
-  EXPECT_PRED_FORMAT2(testing::IsSubstring, "some fatal failure",
-                      gtest_failures.GetTestPartResult(0).message());
-  EXPECT_PRED_FORMAT2(testing::IsSubstring, "it does",
-                      gtest_failures.GetTestPartResult(1).message());
-  EXPECT_PRED_FORMAT2(testing::IsSubstring, "other failure",
-                      gtest_failures.GetTestPartResult(2).message());
-}
-
-TEST_F(NoFatalFailureTest, MessageIsStreamable) {
-  TestPartResultArray gtest_failures;
-  {
-    ScopedFakeTestPartResultReporter gtest_reporter(&gtest_failures);
-    EXPECT_NO_FATAL_FAILURE([] { FAIL() << "foo"; }()) << "my message";
-  }
-  ASSERT_EQ(2, gtest_failures.size());
-  EXPECT_EQ(TestPartResult::kFatalFailure,
-            gtest_failures.GetTestPartResult(0).type());
-  EXPECT_EQ(TestPartResult::kNonFatalFailure,
-            gtest_failures.GetTestPartResult(1).type());
-  EXPECT_PRED_FORMAT2(testing::IsSubstring, "foo",
-                      gtest_failures.GetTestPartResult(0).message());
-  EXPECT_PRED_FORMAT2(testing::IsSubstring, "my message",
-                      gtest_failures.GetTestPartResult(1).message());
-}
-
-// Tests non-string assertions.
-
-std::string EditsToString(const std::vector<EditType>& edits) {
-  std::string out;
-  for (size_t i = 0; i < edits.size(); ++i) {
-    static const char kEdits[] = " +-/";
-    out.append(1, kEdits[edits[i]]);
-  }
-  return out;
-}
-
-std::vector<size_t> CharsToIndices(const std::string& str) {
-  std::vector<size_t> out;
-  for (size_t i = 0; i < str.size(); ++i) {
-    out.push_back(static_cast<size_t>(str[i]));
-  }
-  return out;
-}
-
-std::vector<std::string> CharsToLines(const std::string& str) {
-  std::vector<std::string> out;
-  for (size_t i = 0; i < str.size(); ++i) {
-    out.push_back(str.substr(i, 1));
-  }
-  return out;
-}
-
-TEST(EditDistance, TestSuites) {
-  struct Case {
-    int line;
-    const char* left;
-    const char* right;
-    const char* expected_edits;
-    const char* expected_diff;
-  };
-  static const Case kCases[] = {
-      // No change.
-      {__LINE__, "A", "A", " ", ""},
-      {__LINE__, "ABCDE", "ABCDE", "     ", ""},
-      // Simple adds.
-      {__LINE__, "X", "XA", " +", "@@ +1,2 @@\n X\n+A\n"},
-      {__LINE__, "X", "XABCD", " ++++", "@@ +1,5 @@\n X\n+A\n+B\n+C\n+D\n"},
-      // Simple removes.
-      {__LINE__, "XA", "X", " -", "@@ -1,2 @@\n X\n-A\n"},
-      {__LINE__, "XABCD", "X", " ----", "@@ -1,5 @@\n X\n-A\n-B\n-C\n-D\n"},
-      // Simple replaces.
-      {__LINE__, "A", "a", "/", "@@ -1,1 +1,1 @@\n-A\n+a\n"},
-      {__LINE__, "ABCD", "abcd", "////",
-       "@@ -1,4 +1,4 @@\n-A\n-B\n-C\n-D\n+a\n+b\n+c\n+d\n"},
-      // Path finding.
-      {__LINE__, "ABCDEFGH", "ABXEGH1", "  -/ -  +",
-       "@@ -1,8 +1,7 @@\n A\n B\n-C\n-D\n+X\n E\n-F\n G\n H\n+1\n"},
-      {__LINE__, "AAAABCCCC", "ABABCDCDC", "- /   + / ",
-       "@@ -1,9 +1,9 @@\n-A\n A\n-A\n+B\n A\n B\n C\n+D\n C\n-C\n+D\n C\n"},
-      {__LINE__, "ABCDE", "BCDCD", "-   +/",
-       "@@ -1,5 +1,5 @@\n-A\n B\n C\n D\n-E\n+C\n+D\n"},
-      {__LINE__, "ABCDEFGHIJKL", "BCDCDEFGJKLJK", "- ++     --   ++",
-       "@@ -1,4 +1,5 @@\n-A\n B\n+C\n+D\n C\n D\n"
-       "@@ -6,7 +7,7 @@\n F\n G\n-H\n-I\n J\n K\n L\n+J\n+K\n"},
-      {}};
-  for (const Case* c = kCases; c->left; ++c) {
-    EXPECT_TRUE(c->expected_edits ==
-                EditsToString(CalculateOptimalEdits(CharsToIndices(c->left),
-                                                    CharsToIndices(c->right))))
-        << "Left <" << c->left << "> Right <" << c->right << "> Edits <"
-        << EditsToString(CalculateOptimalEdits(CharsToIndices(c->left),
-                                               CharsToIndices(c->right)))
-        << ">";
-    EXPECT_TRUE(c->expected_diff == CreateUnifiedDiff(CharsToLines(c->left),
-                                                      CharsToLines(c->right)))
-        << "Left <" << c->left << "> Right <" << c->right << "> Diff <"
-        << CreateUnifiedDiff(CharsToLines(c->left), CharsToLines(c->right))
-        << ">";
-  }
-}
-
-// Tests EqFailure(), used for implementing *EQ* assertions.
-TEST(AssertionTest, EqFailure) {
-  const std::string foo_val("5"), bar_val("6");
-  const std::string msg1(
-      EqFailure("foo", "bar", foo_val, bar_val, false).failure_message());
-  EXPECT_STREQ(
-      "Expected equality of these values:\n"
-      "  foo\n"
-      "    Which is: 5\n"
-      "  bar\n"
-      "    Which is: 6",
-      msg1.c_str());
-
-  const std::string msg2(
-      EqFailure("foo", "6", foo_val, bar_val, false).failure_message());
-  EXPECT_STREQ(
-      "Expected equality of these values:\n"
-      "  foo\n"
-      "    Which is: 5\n"
-      "  6",
-      msg2.c_str());
-
-  const std::string msg3(
-      EqFailure("5", "bar", foo_val, bar_val, false).failure_message());
-  EXPECT_STREQ(
-      "Expected equality of these values:\n"
-      "  5\n"
-      "  bar\n"
-      "    Which is: 6",
-      msg3.c_str());
-
-  const std::string msg4(
-      EqFailure("5", "6", foo_val, bar_val, false).failure_message());
-  EXPECT_STREQ(
-      "Expected equality of these values:\n"
-      "  5\n"
-      "  6",
-      msg4.c_str());
-
-  const std::string msg5(
-      EqFailure("foo", "bar", std::string("\"x\""), std::string("\"y\""), true)
-          .failure_message());
-  EXPECT_STREQ(
-      "Expected equality of these values:\n"
-      "  foo\n"
-      "    Which is: \"x\"\n"
-      "  bar\n"
-      "    Which is: \"y\"\n"
-      "Ignoring case",
-      msg5.c_str());
-}
-
-TEST(AssertionTest, EqFailureWithDiff) {
-  const std::string left(
-      "1\\n2XXX\\n3\\n5\\n6\\n7\\n8\\n9\\n10\\n11\\n12XXX\\n13\\n14\\n15");
-  const std::string right(
-      "1\\n2\\n3\\n4\\n5\\n6\\n7\\n8\\n9\\n11\\n12\\n13\\n14");
-  const std::string msg1(
-      EqFailure("left", "right", left, right, false).failure_message());
-  EXPECT_STREQ(
-      "Expected equality of these values:\n"
-      "  left\n"
-      "    Which is: "
-      "1\\n2XXX\\n3\\n5\\n6\\n7\\n8\\n9\\n10\\n11\\n12XXX\\n13\\n14\\n15\n"
-      "  right\n"
-      "    Which is: 1\\n2\\n3\\n4\\n5\\n6\\n7\\n8\\n9\\n11\\n12\\n13\\n14\n"
-      "With diff:\n@@ -1,5 +1,6 @@\n 1\n-2XXX\n+2\n 3\n+4\n 5\n 6\n"
-      "@@ -7,8 +8,6 @@\n 8\n 9\n-10\n 11\n-12XXX\n+12\n 13\n 14\n-15\n",
-      msg1.c_str());
-}
-
-// Tests AppendUserMessage(), used for implementing the *EQ* macros.
-TEST(AssertionTest, AppendUserMessage) {
-  const std::string foo("foo");
-
-  Message msg;
-  EXPECT_STREQ("foo", AppendUserMessage(foo, msg).c_str());
-
-  msg << "bar";
-  EXPECT_STREQ("foo\nbar", AppendUserMessage(foo, msg).c_str());
-}
-
-#ifdef __BORLANDC__
-// Silences warnings: "Condition is always true", "Unreachable code"
-#pragma option push -w-ccc -w-rch
-#endif
-
-// Tests ASSERT_TRUE.
-TEST(AssertionTest, ASSERT_TRUE) {
-  ASSERT_TRUE(2 > 1);  // NOLINT
-  EXPECT_FATAL_FAILURE(ASSERT_TRUE(2 < 1), "2 < 1");
-}
-
-// Tests ASSERT_TRUE(predicate) for predicates returning AssertionResult.
-TEST(AssertionTest, AssertTrueWithAssertionResult) {
-  ASSERT_TRUE(ResultIsEven(2));
-#ifndef __BORLANDC__
-  // ICE's in C++Builder.
-  EXPECT_FATAL_FAILURE(ASSERT_TRUE(ResultIsEven(3)),
-                       "Value of: ResultIsEven(3)\n"
-                       "  Actual: false (3 is odd)\n"
-                       "Expected: true");
-#endif
-  ASSERT_TRUE(ResultIsEvenNoExplanation(2));
-  EXPECT_FATAL_FAILURE(ASSERT_TRUE(ResultIsEvenNoExplanation(3)),
-                       "Value of: ResultIsEvenNoExplanation(3)\n"
-                       "  Actual: false (3 is odd)\n"
-                       "Expected: true");
-}
-
-// Tests ASSERT_FALSE.
-TEST(AssertionTest, ASSERT_FALSE) {
-  ASSERT_FALSE(2 < 1);  // NOLINT
-  EXPECT_FATAL_FAILURE(ASSERT_FALSE(2 > 1),
-                       "Value of: 2 > 1\n"
-                       "  Actual: true\n"
-                       "Expected: false");
-}
-
-// Tests ASSERT_FALSE(predicate) for predicates returning AssertionResult.
-TEST(AssertionTest, AssertFalseWithAssertionResult) {
-  ASSERT_FALSE(ResultIsEven(3));
-#ifndef __BORLANDC__
-  // ICE's in C++Builder.
-  EXPECT_FATAL_FAILURE(ASSERT_FALSE(ResultIsEven(2)),
-                       "Value of: ResultIsEven(2)\n"
-                       "  Actual: true (2 is even)\n"
-                       "Expected: false");
-#endif
-  ASSERT_FALSE(ResultIsEvenNoExplanation(3));
-  EXPECT_FATAL_FAILURE(ASSERT_FALSE(ResultIsEvenNoExplanation(2)),
-                       "Value of: ResultIsEvenNoExplanation(2)\n"
-                       "  Actual: true\n"
-                       "Expected: false");
-}
-
-#ifdef __BORLANDC__
-// Restores warnings after previous "#pragma option push" suppressed them
-#pragma option pop
-#endif
-
-// Tests using ASSERT_EQ on double values.  The purpose is to make
-// sure that the specialization we did for integer and anonymous enums
-// isn't used for double arguments.
-TEST(ExpectTest, ASSERT_EQ_Double) {
-  // A success.
-  ASSERT_EQ(5.6, 5.6);
-
-  // A failure.
-  EXPECT_FATAL_FAILURE(ASSERT_EQ(5.1, 5.2), "5.1");
-}
-
-// Tests ASSERT_EQ.
-TEST(AssertionTest, ASSERT_EQ) {
-  ASSERT_EQ(5, 2 + 3);
-  // clang-format off
-  EXPECT_FATAL_FAILURE(ASSERT_EQ(5, 2*3),
-                       "Expected equality of these values:\n"
-                       "  5\n"
-                       "  2*3\n"
-                       "    Which is: 6");
-  // clang-format on
-}
-
-// Tests ASSERT_EQ(NULL, pointer).
-TEST(AssertionTest, ASSERT_EQ_NULL) {
-  // A success.
-  const char* p = nullptr;
-  ASSERT_EQ(nullptr, p);
-
-  // A failure.
-  static int n = 0;
-  EXPECT_FATAL_FAILURE(ASSERT_EQ(nullptr, &n), "  &n\n    Which is:");
-}
-
-// Tests ASSERT_EQ(0, non_pointer).  Since the literal 0 can be
-// treated as a null pointer by the compiler, we need to make sure
-// that ASSERT_EQ(0, non_pointer) isn't interpreted by Google Test as
-// ASSERT_EQ(static_cast<void*>(NULL), non_pointer).
-TEST(ExpectTest, ASSERT_EQ_0) {
-  int n = 0;
-
-  // A success.
-  ASSERT_EQ(0, n);
-
-  // A failure.
-  EXPECT_FATAL_FAILURE(ASSERT_EQ(0, 5.6), "  0\n  5.6");
-}
-
-// Tests ASSERT_NE.
-TEST(AssertionTest, ASSERT_NE) {
-  ASSERT_NE(6, 7);
-  EXPECT_FATAL_FAILURE(ASSERT_NE('a', 'a'),
-                       "Expected: ('a') != ('a'), "
-                       "actual: 'a' (97, 0x61) vs 'a' (97, 0x61)");
-}
-
-// Tests ASSERT_LE.
-TEST(AssertionTest, ASSERT_LE) {
-  ASSERT_LE(2, 3);
-  ASSERT_LE(2, 2);
-  EXPECT_FATAL_FAILURE(ASSERT_LE(2, 0), "Expected: (2) <= (0), actual: 2 vs 0");
-}
-
-// Tests ASSERT_LT.
-TEST(AssertionTest, ASSERT_LT) {
-  ASSERT_LT(2, 3);
-  EXPECT_FATAL_FAILURE(ASSERT_LT(2, 2), "Expected: (2) < (2), actual: 2 vs 2");
-}
-
-// Tests ASSERT_GE.
-TEST(AssertionTest, ASSERT_GE) {
-  ASSERT_GE(2, 1);
-  ASSERT_GE(2, 2);
-  EXPECT_FATAL_FAILURE(ASSERT_GE(2, 3), "Expected: (2) >= (3), actual: 2 vs 3");
-}
-
-// Tests ASSERT_GT.
-TEST(AssertionTest, ASSERT_GT) {
-  ASSERT_GT(2, 1);
-  EXPECT_FATAL_FAILURE(ASSERT_GT(2, 2), "Expected: (2) > (2), actual: 2 vs 2");
-}
-
-#if GTEST_HAS_EXCEPTIONS
-
-void ThrowNothing() {}
-
-// Tests ASSERT_THROW.
-TEST(AssertionTest, ASSERT_THROW) {
-  ASSERT_THROW(ThrowAnInteger(), int);
-
-#ifndef __BORLANDC__
-
-  // ICE's in C++Builder 2007 and 2009.
-  EXPECT_FATAL_FAILURE(
-      ASSERT_THROW(ThrowAnInteger(), bool),
-      "Expected: ThrowAnInteger() throws an exception of type bool.\n"
-      "  Actual: it throws a different type.");
-  EXPECT_FATAL_FAILURE(
-      ASSERT_THROW(ThrowRuntimeError("A description"), std::logic_error),
-      "Expected: ThrowRuntimeError(\"A description\") "
-      "throws an exception of type std::logic_error.\n  "
-      "Actual: it throws " ERROR_DESC
-      " "
-      "with description \"A description\".");
-#endif
-
-  EXPECT_FATAL_FAILURE(
-      ASSERT_THROW(ThrowNothing(), bool),
-      "Expected: ThrowNothing() throws an exception of type bool.\n"
-      "  Actual: it throws nothing.");
-}
-
-// Tests ASSERT_NO_THROW.
-TEST(AssertionTest, ASSERT_NO_THROW) {
-  ASSERT_NO_THROW(ThrowNothing());
-  EXPECT_FATAL_FAILURE(ASSERT_NO_THROW(ThrowAnInteger()),
-                       "Expected: ThrowAnInteger() doesn't throw an exception."
-                       "\n  Actual: it throws.");
-  EXPECT_FATAL_FAILURE(ASSERT_NO_THROW(ThrowRuntimeError("A description")),
-                       "Expected: ThrowRuntimeError(\"A description\") "
-                       "doesn't throw an exception.\n  "
-                       "Actual: it throws " ERROR_DESC
-                       " "
-                       "with description \"A description\".");
-}
-
-// Tests ASSERT_ANY_THROW.
-TEST(AssertionTest, ASSERT_ANY_THROW) {
-  ASSERT_ANY_THROW(ThrowAnInteger());
-  EXPECT_FATAL_FAILURE(ASSERT_ANY_THROW(ThrowNothing()),
-                       "Expected: ThrowNothing() throws an exception.\n"
-                       "  Actual: it doesn't.");
-}
-
-#endif  // GTEST_HAS_EXCEPTIONS
-
-// Makes sure we deal with the precedence of <<.  This test should
-// compile.
-TEST(AssertionTest, AssertPrecedence) {
-  ASSERT_EQ(1 < 2, true);
-  bool false_value = false;
-  ASSERT_EQ(true && false_value, false);
-}
-
-// A subroutine used by the following test.
-void TestEq1(int x) { ASSERT_EQ(1, x); }
-
-// Tests calling a test subroutine that's not part of a fixture.
-TEST(AssertionTest, NonFixtureSubroutine) {
-  EXPECT_FATAL_FAILURE(TestEq1(2), "  x\n    Which is: 2");
-}
-
-// An uncopyable class.
-class Uncopyable {
- public:
-  explicit Uncopyable(int a_value) : value_(a_value) {}
-
-  int value() const { return value_; }
-  bool operator==(const Uncopyable& rhs) const {
-    return value() == rhs.value();
-  }
-
- private:
-  // This constructor deliberately has no implementation, as we don't
-  // want this class to be copyable.
-  Uncopyable(const Uncopyable&);  // NOLINT
-
-  int value_;
-};
-
-::std::ostream& operator<<(::std::ostream& os, const Uncopyable& value) {
-  return os << value.value();
-}
-
-bool IsPositiveUncopyable(const Uncopyable& x) { return x.value() > 0; }
-
-// A subroutine used by the following test.
-void TestAssertNonPositive() {
-  Uncopyable y(-1);
-  ASSERT_PRED1(IsPositiveUncopyable, y);
-}
-// A subroutine used by the following test.
-void TestAssertEqualsUncopyable() {
-  Uncopyable x(5);
-  Uncopyable y(-1);
-  ASSERT_EQ(x, y);
-}
-
-// Tests that uncopyable objects can be used in assertions.
-TEST(AssertionTest, AssertWorksWithUncopyableObject) {
-  Uncopyable x(5);
-  ASSERT_PRED1(IsPositiveUncopyable, x);
-  ASSERT_EQ(x, x);
-  EXPECT_FATAL_FAILURE(
-      TestAssertNonPositive(),
-      "IsPositiveUncopyable(y) evaluates to false, where\ny evaluates to -1");
-  EXPECT_FATAL_FAILURE(TestAssertEqualsUncopyable(),
-                       "Expected equality of these values:\n"
-                       "  x\n    Which is: 5\n  y\n    Which is: -1");
-}
-
-// Tests that uncopyable objects can be used in expects.
-TEST(AssertionTest, ExpectWorksWithUncopyableObject) {
-  Uncopyable x(5);
-  EXPECT_PRED1(IsPositiveUncopyable, x);
-  Uncopyable y(-1);
-  EXPECT_NONFATAL_FAILURE(
-      EXPECT_PRED1(IsPositiveUncopyable, y),
-      "IsPositiveUncopyable(y) evaluates to false, where\ny evaluates to -1");
-  EXPECT_EQ(x, x);
-  EXPECT_NONFATAL_FAILURE(EXPECT_EQ(x, y),
-                          "Expected equality of these values:\n"
-                          "  x\n    Which is: 5\n  y\n    Which is: -1");
-}
-
-enum NamedEnum { kE1 = 0, kE2 = 1 };
-
-TEST(AssertionTest, NamedEnum) {
-  EXPECT_EQ(kE1, kE1);
-  EXPECT_LT(kE1, kE2);
-  EXPECT_NONFATAL_FAILURE(EXPECT_EQ(kE1, kE2), "Which is: 0");
-  EXPECT_NONFATAL_FAILURE(EXPECT_EQ(kE1, kE2), "Which is: 1");
-}
-
-// Sun Studio and HP aCC2reject this code.
-#if !defined(__SUNPRO_CC) && !defined(__HP_aCC)
-
-// Tests using assertions with anonymous enums.
-enum {
-  kCaseA = -1,
-
-#if GTEST_OS_LINUX
-
-  // We want to test the case where the size of the anonymous enum is
-  // larger than sizeof(int), to make sure our implementation of the
-  // assertions doesn't truncate the enums.  However, MSVC
-  // (incorrectly) doesn't allow an enum value to exceed the range of
-  // an int, so this has to be conditionally compiled.
-  //
-  // On Linux, kCaseB and kCaseA have the same value when truncated to
-  // int size.  We want to test whether this will confuse the
-  // assertions.
-  kCaseB = testing::internal::kMaxBiggestInt,
-
-#else
-
-  kCaseB = INT_MAX,
-
-#endif  // GTEST_OS_LINUX
-
-  kCaseC = 42
-};
-
-TEST(AssertionTest, AnonymousEnum) {
-#if GTEST_OS_LINUX
-
-  EXPECT_EQ(static_cast<int>(kCaseA), static_cast<int>(kCaseB));
-
-#endif  // GTEST_OS_LINUX
-
-  EXPECT_EQ(kCaseA, kCaseA);
-  EXPECT_NE(kCaseA, kCaseB);
-  EXPECT_LT(kCaseA, kCaseB);
-  EXPECT_LE(kCaseA, kCaseB);
-  EXPECT_GT(kCaseB, kCaseA);
-  EXPECT_GE(kCaseA, kCaseA);
-  EXPECT_NONFATAL_FAILURE(EXPECT_GE(kCaseA, kCaseB), "(kCaseA) >= (kCaseB)");
-  EXPECT_NONFATAL_FAILURE(EXPECT_GE(kCaseA, kCaseC), "-1 vs 42");
-
-  ASSERT_EQ(kCaseA, kCaseA);
-  ASSERT_NE(kCaseA, kCaseB);
-  ASSERT_LT(kCaseA, kCaseB);
-  ASSERT_LE(kCaseA, kCaseB);
-  ASSERT_GT(kCaseB, kCaseA);
-  ASSERT_GE(kCaseA, kCaseA);
-
-#ifndef __BORLANDC__
-
-  // ICE's in C++Builder.
-  EXPECT_FATAL_FAILURE(ASSERT_EQ(kCaseA, kCaseB), "  kCaseB\n    Which is: ");
-  EXPECT_FATAL_FAILURE(ASSERT_EQ(kCaseA, kCaseC), "\n    Which is: 42");
-#endif
-
-  EXPECT_FATAL_FAILURE(ASSERT_EQ(kCaseA, kCaseC), "\n    Which is: -1");
-}
-
-#endif  // !GTEST_OS_MAC && !defined(__SUNPRO_CC)
-
-#if GTEST_OS_WINDOWS
-
-static HRESULT UnexpectedHRESULTFailure() { return E_UNEXPECTED; }
-
-static HRESULT OkHRESULTSuccess() { return S_OK; }
-
-static HRESULT FalseHRESULTSuccess() { return S_FALSE; }
-
-// HRESULT assertion tests test both zero and non-zero
-// success codes as well as failure message for each.
-//
-// Windows CE doesn't support message texts.
-TEST(HRESULTAssertionTest, EXPECT_HRESULT_SUCCEEDED) {
-  EXPECT_HRESULT_SUCCEEDED(S_OK);
-  EXPECT_HRESULT_SUCCEEDED(S_FALSE);
-
-  EXPECT_NONFATAL_FAILURE(EXPECT_HRESULT_SUCCEEDED(UnexpectedHRESULTFailure()),
-                          "Expected: (UnexpectedHRESULTFailure()) succeeds.\n"
-                          "  Actual: 0x8000FFFF");
-}
-
-TEST(HRESULTAssertionTest, ASSERT_HRESULT_SUCCEEDED) {
-  ASSERT_HRESULT_SUCCEEDED(S_OK);
-  ASSERT_HRESULT_SUCCEEDED(S_FALSE);
-
-  EXPECT_FATAL_FAILURE(ASSERT_HRESULT_SUCCEEDED(UnexpectedHRESULTFailure()),
-                       "Expected: (UnexpectedHRESULTFailure()) succeeds.\n"
-                       "  Actual: 0x8000FFFF");
-}
-
-TEST(HRESULTAssertionTest, EXPECT_HRESULT_FAILED) {
-  EXPECT_HRESULT_FAILED(E_UNEXPECTED);
-
-  EXPECT_NONFATAL_FAILURE(EXPECT_HRESULT_FAILED(OkHRESULTSuccess()),
-                          "Expected: (OkHRESULTSuccess()) fails.\n"
-                          "  Actual: 0x0");
-  EXPECT_NONFATAL_FAILURE(EXPECT_HRESULT_FAILED(FalseHRESULTSuccess()),
-                          "Expected: (FalseHRESULTSuccess()) fails.\n"
-                          "  Actual: 0x1");
-}
-
-TEST(HRESULTAssertionTest, ASSERT_HRESULT_FAILED) {
-  ASSERT_HRESULT_FAILED(E_UNEXPECTED);
-
-#ifndef __BORLANDC__
-
-  // ICE's in C++Builder 2007 and 2009.
-  EXPECT_FATAL_FAILURE(ASSERT_HRESULT_FAILED(OkHRESULTSuccess()),
-                       "Expected: (OkHRESULTSuccess()) fails.\n"
-                       "  Actual: 0x0");
-#endif
-
-  EXPECT_FATAL_FAILURE(ASSERT_HRESULT_FAILED(FalseHRESULTSuccess()),
-                       "Expected: (FalseHRESULTSuccess()) fails.\n"
-                       "  Actual: 0x1");
-}
-
-// Tests that streaming to the HRESULT macros works.
-TEST(HRESULTAssertionTest, Streaming) {
-  EXPECT_HRESULT_SUCCEEDED(S_OK) << "unexpected failure";
-  ASSERT_HRESULT_SUCCEEDED(S_OK) << "unexpected failure";
-  EXPECT_HRESULT_FAILED(E_UNEXPECTED) << "unexpected failure";
-  ASSERT_HRESULT_FAILED(E_UNEXPECTED) << "unexpected failure";
-
-  EXPECT_NONFATAL_FAILURE(EXPECT_HRESULT_SUCCEEDED(E_UNEXPECTED)
-                              << "expected failure",
-                          "expected failure");
-
-#ifndef __BORLANDC__
-
-  // ICE's in C++Builder 2007 and 2009.
-  EXPECT_FATAL_FAILURE(ASSERT_HRESULT_SUCCEEDED(E_UNEXPECTED)
-                           << "expected failure",
-                       "expected failure");
-#endif
-
-  EXPECT_NONFATAL_FAILURE(EXPECT_HRESULT_FAILED(S_OK) << "expected failure",
-                          "expected failure");
-
-  EXPECT_FATAL_FAILURE(ASSERT_HRESULT_FAILED(S_OK) << "expected failure",
-                       "expected failure");
-}
-
-#endif  // GTEST_OS_WINDOWS
-
-// The following code intentionally tests a suboptimal syntax.
-#ifdef __GNUC__
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wdangling-else"
-#pragma GCC diagnostic ignored "-Wempty-body"
-#pragma GCC diagnostic ignored "-Wpragmas"
-#endif
-// Tests that the assertion macros behave like single statements.
-TEST(AssertionSyntaxTest, BasicAssertionsBehavesLikeSingleStatement) {
-  if (AlwaysFalse())
-    ASSERT_TRUE(false) << "This should never be executed; "
-                          "It's a compilation test only.";
-
-  if (AlwaysTrue())
-    EXPECT_FALSE(false);
-  else
-    ;  // NOLINT
-
-  if (AlwaysFalse()) ASSERT_LT(1, 3);
-
-  if (AlwaysFalse())
-    ;  // NOLINT
-  else
-    EXPECT_GT(3, 2) << "";
-}
-#ifdef __GNUC__
-#pragma GCC diagnostic pop
-#endif
-
-#if GTEST_HAS_EXCEPTIONS
-// Tests that the compiler will not complain about unreachable code in the
-// EXPECT_THROW/EXPECT_ANY_THROW/EXPECT_NO_THROW macros.
-TEST(ExpectThrowTest, DoesNotGenerateUnreachableCodeWarning) {
-  int n = 0;
-
-  EXPECT_THROW(throw 1, int);
-  EXPECT_NONFATAL_FAILURE(EXPECT_THROW(n++, int), "");
-  EXPECT_NONFATAL_FAILURE(EXPECT_THROW(throw 1, const char*), "");
-  EXPECT_NO_THROW(n++);
-  EXPECT_NONFATAL_FAILURE(EXPECT_NO_THROW(throw 1), "");
-  EXPECT_ANY_THROW(throw 1);
-  EXPECT_NONFATAL_FAILURE(EXPECT_ANY_THROW(n++), "");
-}
-
-TEST(ExpectThrowTest, DoesNotGenerateDuplicateCatchClauseWarning) {
-  EXPECT_THROW(throw std::exception(), std::exception);
-}
-
-// The following code intentionally tests a suboptimal syntax.
-#ifdef __GNUC__
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wdangling-else"
-#pragma GCC diagnostic ignored "-Wempty-body"
-#pragma GCC diagnostic ignored "-Wpragmas"
-#endif
-TEST(AssertionSyntaxTest, ExceptionAssertionsBehavesLikeSingleStatement) {
-  if (AlwaysFalse()) EXPECT_THROW(ThrowNothing(), bool);
-
-  if (AlwaysTrue())
-    EXPECT_THROW(ThrowAnInteger(), int);
-  else
-    ;  // NOLINT
-
-  if (AlwaysFalse()) EXPECT_NO_THROW(ThrowAnInteger());
-
-  if (AlwaysTrue())
-    EXPECT_NO_THROW(ThrowNothing());
-  else
-    ;  // NOLINT
-
-  if (AlwaysFalse()) EXPECT_ANY_THROW(ThrowNothing());
-
-  if (AlwaysTrue())
-    EXPECT_ANY_THROW(ThrowAnInteger());
-  else
-    ;  // NOLINT
-}
-#ifdef __GNUC__
-#pragma GCC diagnostic pop
-#endif
-
-#endif  // GTEST_HAS_EXCEPTIONS
-
-// The following code intentionally tests a suboptimal syntax.
-#ifdef __GNUC__
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wdangling-else"
-#pragma GCC diagnostic ignored "-Wempty-body"
-#pragma GCC diagnostic ignored "-Wpragmas"
-#endif
-TEST(AssertionSyntaxTest, NoFatalFailureAssertionsBehavesLikeSingleStatement) {
-  if (AlwaysFalse())
-    EXPECT_NO_FATAL_FAILURE(FAIL()) << "This should never be executed. "
-                                    << "It's a compilation test only.";
-  else
-    ;  // NOLINT
-
-  if (AlwaysFalse())
-    ASSERT_NO_FATAL_FAILURE(FAIL()) << "";
-  else
-    ;  // NOLINT
-
-  if (AlwaysTrue())
-    EXPECT_NO_FATAL_FAILURE(SUCCEED());
-  else
-    ;  // NOLINT
-
-  if (AlwaysFalse())
-    ;  // NOLINT
-  else
-    ASSERT_NO_FATAL_FAILURE(SUCCEED());
-}
-#ifdef __GNUC__
-#pragma GCC diagnostic pop
-#endif
-
-// Tests that the assertion macros work well with switch statements.
-TEST(AssertionSyntaxTest, WorksWithSwitch) {
-  switch (0) {
-    case 1:
-      break;
-    default:
-      ASSERT_TRUE(true);
-  }
-
-  switch (0)
-  case 0:
-    EXPECT_FALSE(false) << "EXPECT_FALSE failed in switch case";
-
-  // Binary assertions are implemented using a different code path
-  // than the Boolean assertions.  Hence we test them separately.
-  switch (0) {
-    case 1:
-    default:
-      ASSERT_EQ(1, 1) << "ASSERT_EQ failed in default switch handler";
-  }
-
-  switch (0)
-  case 0:
-    EXPECT_NE(1, 2);
-}
-
-#if GTEST_HAS_EXCEPTIONS
-
-void ThrowAString() { throw "std::string"; }
-
-// Test that the exception assertion macros compile and work with const
-// type qualifier.
-TEST(AssertionSyntaxTest, WorksWithConst) {
-  ASSERT_THROW(ThrowAString(), const char*);
-
-  EXPECT_THROW(ThrowAString(), const char*);
-}
-
-#endif  // GTEST_HAS_EXCEPTIONS
-
-}  // namespace
-
-namespace testing {
-
-// Tests that Google Test tracks SUCCEED*.
-TEST(SuccessfulAssertionTest, SUCCEED) {
-  SUCCEED();
-  SUCCEED() << "OK";
-  EXPECT_EQ(2, GetUnitTestImpl()->current_test_result()->total_part_count());
-}
-
-// Tests that Google Test doesn't track successful EXPECT_*.
-TEST(SuccessfulAssertionTest, EXPECT) {
-  EXPECT_TRUE(true);
-  EXPECT_EQ(0, GetUnitTestImpl()->current_test_result()->total_part_count());
-}
-
-// Tests that Google Test doesn't track successful EXPECT_STR*.
-TEST(SuccessfulAssertionTest, EXPECT_STR) {
-  EXPECT_STREQ("", "");
-  EXPECT_EQ(0, GetUnitTestImpl()->current_test_result()->total_part_count());
-}
-
-// Tests that Google Test doesn't track successful ASSERT_*.
-TEST(SuccessfulAssertionTest, ASSERT) {
-  ASSERT_TRUE(true);
-  EXPECT_EQ(0, GetUnitTestImpl()->current_test_result()->total_part_count());
-}
-
-// Tests that Google Test doesn't track successful ASSERT_STR*.
-TEST(SuccessfulAssertionTest, ASSERT_STR) {
-  ASSERT_STREQ("", "");
-  EXPECT_EQ(0, GetUnitTestImpl()->current_test_result()->total_part_count());
-}
-
-}  // namespace testing
-
-namespace {
-
-// Tests the message streaming variation of assertions.
-
-TEST(AssertionWithMessageTest, EXPECT) {
-  EXPECT_EQ(1, 1) << "This should succeed.";
-  EXPECT_NONFATAL_FAILURE(EXPECT_NE(1, 1) << "Expected failure #1.",
-                          "Expected failure #1");
-  EXPECT_LE(1, 2) << "This should succeed.";
-  EXPECT_NONFATAL_FAILURE(EXPECT_LT(1, 0) << "Expected failure #2.",
-                          "Expected failure #2.");
-  EXPECT_GE(1, 0) << "This should succeed.";
-  EXPECT_NONFATAL_FAILURE(EXPECT_GT(1, 2) << "Expected failure #3.",
-                          "Expected failure #3.");
-
-  EXPECT_STREQ("1", "1") << "This should succeed.";
-  EXPECT_NONFATAL_FAILURE(EXPECT_STRNE("1", "1") << "Expected failure #4.",
-                          "Expected failure #4.");
-  EXPECT_STRCASEEQ("a", "A") << "This should succeed.";
-  EXPECT_NONFATAL_FAILURE(EXPECT_STRCASENE("a", "A") << "Expected failure #5.",
-                          "Expected failure #5.");
-
-  EXPECT_FLOAT_EQ(1, 1) << "This should succeed.";
-  EXPECT_NONFATAL_FAILURE(EXPECT_DOUBLE_EQ(1, 1.2) << "Expected failure #6.",
-                          "Expected failure #6.");
-  EXPECT_NEAR(1, 1.1, 0.2) << "This should succeed.";
-}
-
-TEST(AssertionWithMessageTest, ASSERT) {
-  ASSERT_EQ(1, 1) << "This should succeed.";
-  ASSERT_NE(1, 2) << "This should succeed.";
-  ASSERT_LE(1, 2) << "This should succeed.";
-  ASSERT_LT(1, 2) << "This should succeed.";
-  ASSERT_GE(1, 0) << "This should succeed.";
-  EXPECT_FATAL_FAILURE(ASSERT_GT(1, 2) << "Expected failure.",
-                       "Expected failure.");
-}
-
-TEST(AssertionWithMessageTest, ASSERT_STR) {
-  ASSERT_STREQ("1", "1") << "This should succeed.";
-  ASSERT_STRNE("1", "2") << "This should succeed.";
-  ASSERT_STRCASEEQ("a", "A") << "This should succeed.";
-  EXPECT_FATAL_FAILURE(ASSERT_STRCASENE("a", "A") << "Expected failure.",
-                       "Expected failure.");
-}
-
-TEST(AssertionWithMessageTest, ASSERT_FLOATING) {
-  ASSERT_FLOAT_EQ(1, 1) << "This should succeed.";
-  ASSERT_DOUBLE_EQ(1, 1) << "This should succeed.";
-  EXPECT_FATAL_FAILURE(ASSERT_NEAR(1, 1.2, 0.1) << "Expect failure.",  // NOLINT
-                       "Expect failure.");
-}
-
-// Tests using ASSERT_FALSE with a streamed message.
-TEST(AssertionWithMessageTest, ASSERT_FALSE) {
-  ASSERT_FALSE(false) << "This shouldn't fail.";
-  EXPECT_FATAL_FAILURE(
-      {  // NOLINT
-        ASSERT_FALSE(true) << "Expected failure: " << 2 << " > " << 1
-                           << " evaluates to " << true;
-      },
-      "Expected failure");
-}
-
-// Tests using FAIL with a streamed message.
-TEST(AssertionWithMessageTest, FAIL) { EXPECT_FATAL_FAILURE(FAIL() << 0, "0"); }
-
-// Tests using SUCCEED with a streamed message.
-TEST(AssertionWithMessageTest, SUCCEED) { SUCCEED() << "Success == " << 1; }
-
-// Tests using ASSERT_TRUE with a streamed message.
-TEST(AssertionWithMessageTest, ASSERT_TRUE) {
-  ASSERT_TRUE(true) << "This should succeed.";
-  ASSERT_TRUE(true) << true;
-  EXPECT_FATAL_FAILURE(
-      {  // NOLINT
-        ASSERT_TRUE(false) << static_cast<const char*>(nullptr)
-                           << static_cast<char*>(nullptr);
-      },
-      "(null)(null)");
-}
-
-#if GTEST_OS_WINDOWS
-// Tests using wide strings in assertion messages.
-TEST(AssertionWithMessageTest, WideStringMessage) {
-  EXPECT_NONFATAL_FAILURE(
-      {  // NOLINT
-        EXPECT_TRUE(false) << L"This failure is expected.\x8119";
-      },
-      "This failure is expected.");
-  EXPECT_FATAL_FAILURE(
-      {  // NOLINT
-        ASSERT_EQ(1, 2) << "This failure is " << L"expected too.\x8120";
-      },
-      "This failure is expected too.");
-}
-#endif  // GTEST_OS_WINDOWS
-
-// Tests EXPECT_TRUE.
-TEST(ExpectTest, EXPECT_TRUE) {
-  EXPECT_TRUE(true) << "Intentional success";
-  EXPECT_NONFATAL_FAILURE(EXPECT_TRUE(false) << "Intentional failure #1.",
-                          "Intentional failure #1.");
-  EXPECT_NONFATAL_FAILURE(EXPECT_TRUE(false) << "Intentional failure #2.",
-                          "Intentional failure #2.");
-  EXPECT_TRUE(2 > 1);  // NOLINT
-  EXPECT_NONFATAL_FAILURE(EXPECT_TRUE(2 < 1),
-                          "Value of: 2 < 1\n"
-                          "  Actual: false\n"
-                          "Expected: true");
-  EXPECT_NONFATAL_FAILURE(EXPECT_TRUE(2 > 3), "2 > 3");
-}
-
-// Tests EXPECT_TRUE(predicate) for predicates returning AssertionResult.
-TEST(ExpectTest, ExpectTrueWithAssertionResult) {
-  EXPECT_TRUE(ResultIsEven(2));
-  EXPECT_NONFATAL_FAILURE(EXPECT_TRUE(ResultIsEven(3)),
-                          "Value of: ResultIsEven(3)\n"
-                          "  Actual: false (3 is odd)\n"
-                          "Expected: true");
-  EXPECT_TRUE(ResultIsEvenNoExplanation(2));
-  EXPECT_NONFATAL_FAILURE(EXPECT_TRUE(ResultIsEvenNoExplanation(3)),
-                          "Value of: ResultIsEvenNoExplanation(3)\n"
-                          "  Actual: false (3 is odd)\n"
-                          "Expected: true");
-}
-
-// Tests EXPECT_FALSE with a streamed message.
-TEST(ExpectTest, EXPECT_FALSE) {
-  EXPECT_FALSE(2 < 1);  // NOLINT
-  EXPECT_FALSE(false) << "Intentional success";
-  EXPECT_NONFATAL_FAILURE(EXPECT_FALSE(true) << "Intentional failure #1.",
-                          "Intentional failure #1.");
-  EXPECT_NONFATAL_FAILURE(EXPECT_FALSE(true) << "Intentional failure #2.",
-                          "Intentional failure #2.");
-  EXPECT_NONFATAL_FAILURE(EXPECT_FALSE(2 > 1),
-                          "Value of: 2 > 1\n"
-                          "  Actual: true\n"
-                          "Expected: false");
-  EXPECT_NONFATAL_FAILURE(EXPECT_FALSE(2 < 3), "2 < 3");
-}
-
-// Tests EXPECT_FALSE(predicate) for predicates returning AssertionResult.
-TEST(ExpectTest, ExpectFalseWithAssertionResult) {
-  EXPECT_FALSE(ResultIsEven(3));
-  EXPECT_NONFATAL_FAILURE(EXPECT_FALSE(ResultIsEven(2)),
-                          "Value of: ResultIsEven(2)\n"
-                          "  Actual: true (2 is even)\n"
-                          "Expected: false");
-  EXPECT_FALSE(ResultIsEvenNoExplanation(3));
-  EXPECT_NONFATAL_FAILURE(EXPECT_FALSE(ResultIsEvenNoExplanation(2)),
-                          "Value of: ResultIsEvenNoExplanation(2)\n"
-                          "  Actual: true\n"
-                          "Expected: false");
-}
-
-#ifdef __BORLANDC__
-// Restores warnings after previous "#pragma option push" suppressed them
-#pragma option pop
-#endif
-
-// Tests EXPECT_EQ.
-TEST(ExpectTest, EXPECT_EQ) {
-  EXPECT_EQ(5, 2 + 3);
-  // clang-format off
-  EXPECT_NONFATAL_FAILURE(EXPECT_EQ(5, 2*3),
-                          "Expected equality of these values:\n"
-                          "  5\n"
-                          "  2*3\n"
-                          "    Which is: 6");
-  EXPECT_NONFATAL_FAILURE(EXPECT_EQ(5, 2 - 3), "2 - 3");
-  // clang-format on
-}
-
-// Tests using EXPECT_EQ on double values.  The purpose is to make
-// sure that the specialization we did for integer and anonymous enums
-// isn't used for double arguments.
-TEST(ExpectTest, EXPECT_EQ_Double) {
-  // A success.
-  EXPECT_EQ(5.6, 5.6);
-
-  // A failure.
-  EXPECT_NONFATAL_FAILURE(EXPECT_EQ(5.1, 5.2), "5.1");
-}
-
-// Tests EXPECT_EQ(NULL, pointer).
-TEST(ExpectTest, EXPECT_EQ_NULL) {
-  // A success.
-  const char* p = nullptr;
-  EXPECT_EQ(nullptr, p);
-
-  // A failure.
-  int n = 0;
-  EXPECT_NONFATAL_FAILURE(EXPECT_EQ(nullptr, &n), "  &n\n    Which is:");
-}
-
-// Tests EXPECT_EQ(0, non_pointer).  Since the literal 0 can be
-// treated as a null pointer by the compiler, we need to make sure
-// that EXPECT_EQ(0, non_pointer) isn't interpreted by Google Test as
-// EXPECT_EQ(static_cast<void*>(NULL), non_pointer).
-TEST(ExpectTest, EXPECT_EQ_0) {
-  int n = 0;
-
-  // A success.
-  EXPECT_EQ(0, n);
-
-  // A failure.
-  EXPECT_NONFATAL_FAILURE(EXPECT_EQ(0, 5.6), "  0\n  5.6");
-}
-
-// Tests EXPECT_NE.
-TEST(ExpectTest, EXPECT_NE) {
-  EXPECT_NE(6, 7);
-
-  EXPECT_NONFATAL_FAILURE(EXPECT_NE('a', 'a'),
-                          "Expected: ('a') != ('a'), "
-                          "actual: 'a' (97, 0x61) vs 'a' (97, 0x61)");
-  EXPECT_NONFATAL_FAILURE(EXPECT_NE(2, 2), "2");
-  char* const p0 = nullptr;
-  EXPECT_NONFATAL_FAILURE(EXPECT_NE(p0, p0), "p0");
-  // Only way to get the Nokia compiler to compile the cast
-  // is to have a separate void* variable first. Putting
-  // the two casts on the same line doesn't work, neither does
-  // a direct C-style to char*.
-  void* pv1 = (void*)0x1234;  // NOLINT
-  char* const p1 = reinterpret_cast<char*>(pv1);
-  EXPECT_NONFATAL_FAILURE(EXPECT_NE(p1, p1), "p1");
-}
-
-// Tests EXPECT_LE.
-TEST(ExpectTest, EXPECT_LE) {
-  EXPECT_LE(2, 3);
-  EXPECT_LE(2, 2);
-  EXPECT_NONFATAL_FAILURE(EXPECT_LE(2, 0),
-                          "Expected: (2) <= (0), actual: 2 vs 0");
-  EXPECT_NONFATAL_FAILURE(EXPECT_LE(1.1, 0.9), "(1.1) <= (0.9)");
-}
-
-// Tests EXPECT_LT.
-TEST(ExpectTest, EXPECT_LT) {
-  EXPECT_LT(2, 3);
-  EXPECT_NONFATAL_FAILURE(EXPECT_LT(2, 2),
-                          "Expected: (2) < (2), actual: 2 vs 2");
-  EXPECT_NONFATAL_FAILURE(EXPECT_LT(2, 1), "(2) < (1)");
-}
-
-// Tests EXPECT_GE.
-TEST(ExpectTest, EXPECT_GE) {
-  EXPECT_GE(2, 1);
-  EXPECT_GE(2, 2);
-  EXPECT_NONFATAL_FAILURE(EXPECT_GE(2, 3),
-                          "Expected: (2) >= (3), actual: 2 vs 3");
-  EXPECT_NONFATAL_FAILURE(EXPECT_GE(0.9, 1.1), "(0.9) >= (1.1)");
-}
-
-// Tests EXPECT_GT.
-TEST(ExpectTest, EXPECT_GT) {
-  EXPECT_GT(2, 1);
-  EXPECT_NONFATAL_FAILURE(EXPECT_GT(2, 2),
-                          "Expected: (2) > (2), actual: 2 vs 2");
-  EXPECT_NONFATAL_FAILURE(EXPECT_GT(2, 3), "(2) > (3)");
-}
-
-#if GTEST_HAS_EXCEPTIONS
-
-// Tests EXPECT_THROW.
-TEST(ExpectTest, EXPECT_THROW) {
-  EXPECT_THROW(ThrowAnInteger(), int);
-  EXPECT_NONFATAL_FAILURE(EXPECT_THROW(ThrowAnInteger(), bool),
-                          "Expected: ThrowAnInteger() throws an exception of "
-                          "type bool.\n  Actual: it throws a different type.");
-  EXPECT_NONFATAL_FAILURE(
-      EXPECT_THROW(ThrowRuntimeError("A description"), std::logic_error),
-      "Expected: ThrowRuntimeError(\"A description\") "
-      "throws an exception of type std::logic_error.\n  "
-      "Actual: it throws " ERROR_DESC
-      " "
-      "with description \"A description\".");
-  EXPECT_NONFATAL_FAILURE(
-      EXPECT_THROW(ThrowNothing(), bool),
-      "Expected: ThrowNothing() throws an exception of type bool.\n"
-      "  Actual: it throws nothing.");
-}
-
-// Tests EXPECT_NO_THROW.
-TEST(ExpectTest, EXPECT_NO_THROW) {
-  EXPECT_NO_THROW(ThrowNothing());
-  EXPECT_NONFATAL_FAILURE(EXPECT_NO_THROW(ThrowAnInteger()),
-                          "Expected: ThrowAnInteger() doesn't throw an "
-                          "exception.\n  Actual: it throws.");
-  EXPECT_NONFATAL_FAILURE(EXPECT_NO_THROW(ThrowRuntimeError("A description")),
-                          "Expected: ThrowRuntimeError(\"A description\") "
-                          "doesn't throw an exception.\n  "
-                          "Actual: it throws " ERROR_DESC
-                          " "
-                          "with description \"A description\".");
-}
-
-// Tests EXPECT_ANY_THROW.
-TEST(ExpectTest, EXPECT_ANY_THROW) {
-  EXPECT_ANY_THROW(ThrowAnInteger());
-  EXPECT_NONFATAL_FAILURE(EXPECT_ANY_THROW(ThrowNothing()),
-                          "Expected: ThrowNothing() throws an exception.\n"
-                          "  Actual: it doesn't.");
-}
-
-#endif  // GTEST_HAS_EXCEPTIONS
-
-// Make sure we deal with the precedence of <<.
-TEST(ExpectTest, ExpectPrecedence) {
-  EXPECT_EQ(1 < 2, true);
-  EXPECT_NONFATAL_FAILURE(EXPECT_EQ(true, true && false),
-                          "  true && false\n    Which is: false");
-}
-
-// Tests the StreamableToString() function.
-
-// Tests using StreamableToString() on a scalar.
-TEST(StreamableToStringTest, Scalar) {
-  EXPECT_STREQ("5", StreamableToString(5).c_str());
-}
-
-// Tests using StreamableToString() on a non-char pointer.
-TEST(StreamableToStringTest, Pointer) {
-  int n = 0;
-  int* p = &n;
-  EXPECT_STRNE("(null)", StreamableToString(p).c_str());
-}
-
-// Tests using StreamableToString() on a NULL non-char pointer.
-TEST(StreamableToStringTest, NullPointer) {
-  int* p = nullptr;
-  EXPECT_STREQ("(null)", StreamableToString(p).c_str());
-}
-
-// Tests using StreamableToString() on a C string.
-TEST(StreamableToStringTest, CString) {
-  EXPECT_STREQ("Foo", StreamableToString("Foo").c_str());
-}
-
-// Tests using StreamableToString() on a NULL C string.
-TEST(StreamableToStringTest, NullCString) {
-  char* p = nullptr;
-  EXPECT_STREQ("(null)", StreamableToString(p).c_str());
-}
-
-// Tests using streamable values as assertion messages.
-
-// Tests using std::string as an assertion message.
-TEST(StreamableTest, string) {
-  static const std::string str(
-      "This failure message is a std::string, and is expected.");
-  EXPECT_FATAL_FAILURE(FAIL() << str, str.c_str());
-}
-
-// Tests that we can output strings containing embedded NULs.
-// Limited to Linux because we can only do this with std::string's.
-TEST(StreamableTest, stringWithEmbeddedNUL) {
-  static const char char_array_with_nul[] =
-      "Here's a NUL\0 and some more string";
-  static const std::string string_with_nul(
-      char_array_with_nul,
-      sizeof(char_array_with_nul) - 1);  // drops the trailing NUL
-  EXPECT_FATAL_FAILURE(FAIL() << string_with_nul,
-                       "Here's a NUL\\0 and some more string");
-}
-
-// Tests that we can output a NUL char.
-TEST(StreamableTest, NULChar) {
-  EXPECT_FATAL_FAILURE(
-      {  // NOLINT
-        FAIL() << "A NUL" << '\0' << " and some more string";
-      },
-      "A NUL\\0 and some more string");
-}
-
-// Tests using int as an assertion message.
-TEST(StreamableTest, int) { EXPECT_FATAL_FAILURE(FAIL() << 900913, "900913"); }
-
-// Tests using NULL char pointer as an assertion message.
-//
-// In MSVC, streaming a NULL char * causes access violation.  Google Test
-// implemented a workaround (substituting "(null)" for NULL).  This
-// tests whether the workaround works.
-TEST(StreamableTest, NullCharPtr) {
-  EXPECT_FATAL_FAILURE(FAIL() << static_cast<const char*>(nullptr), "(null)");
-}
-
-// Tests that basic IO manipulators (endl, ends, and flush) can be
-// streamed to testing::Message.
-TEST(StreamableTest, BasicIoManip) {
-  EXPECT_FATAL_FAILURE(
-      {  // NOLINT
-        FAIL() << "Line 1." << std::endl
-               << "A NUL char " << std::ends << std::flush << " in line 2.";
-      },
-      "Line 1.\nA NUL char \\0 in line 2.");
-}
-
-// Tests the macros that haven't been covered so far.
-
-void AddFailureHelper(bool* aborted) {
-  *aborted = true;
-  ADD_FAILURE() << "Intentional failure.";
-  *aborted = false;
-}
-
-// Tests ADD_FAILURE.
-TEST(MacroTest, ADD_FAILURE) {
-  bool aborted = true;
-  EXPECT_NONFATAL_FAILURE(AddFailureHelper(&aborted), "Intentional failure.");
-  EXPECT_FALSE(aborted);
-}
-
-// Tests ADD_FAILURE_AT.
-TEST(MacroTest, ADD_FAILURE_AT) {
-  // Verifies that ADD_FAILURE_AT does generate a nonfatal failure and
-  // the failure message contains the user-streamed part.
-  EXPECT_NONFATAL_FAILURE(ADD_FAILURE_AT("foo.cc", 42) << "Wrong!", "Wrong!");
-
-  // Verifies that the user-streamed part is optional.
-  EXPECT_NONFATAL_FAILURE(ADD_FAILURE_AT("foo.cc", 42), "Failed");
-
-  // Unfortunately, we cannot verify that the failure message contains
-  // the right file path and line number the same way, as
-  // EXPECT_NONFATAL_FAILURE() doesn't get to see the file path and
-  // line number.  Instead, we do that in googletest-output-test_.cc.
-}
-
-// Tests FAIL.
-TEST(MacroTest, FAIL) {
-  EXPECT_FATAL_FAILURE(FAIL(), "Failed");
-  EXPECT_FATAL_FAILURE(FAIL() << "Intentional failure.",
-                       "Intentional failure.");
-}
-
-// Tests GTEST_FAIL_AT.
-TEST(MacroTest, GTEST_FAIL_AT) {
-  // Verifies that GTEST_FAIL_AT does generate a fatal failure and
-  // the failure message contains the user-streamed part.
-  EXPECT_FATAL_FAILURE(GTEST_FAIL_AT("foo.cc", 42) << "Wrong!", "Wrong!");
-
-  // Verifies that the user-streamed part is optional.
-  EXPECT_FATAL_FAILURE(GTEST_FAIL_AT("foo.cc", 42), "Failed");
-
-  // See the ADD_FAIL_AT test above to see how we test that the failure message
-  // contains the right filename and line number -- the same applies here.
-}
-
-// Tests SUCCEED
-TEST(MacroTest, SUCCEED) {
-  SUCCEED();
-  SUCCEED() << "Explicit success.";
-}
-
-// Tests for EXPECT_EQ() and ASSERT_EQ().
-//
-// These tests fail *intentionally*, s.t. the failure messages can be
-// generated and tested.
-//
-// We have different tests for different argument types.
-
-// Tests using bool values in {EXPECT|ASSERT}_EQ.
-TEST(EqAssertionTest, Bool) {
-  EXPECT_EQ(true, true);
-  EXPECT_FATAL_FAILURE(
-      {
-        bool false_value = false;
-        ASSERT_EQ(false_value, true);
-      },
-      "  false_value\n    Which is: false\n  true");
-}
-
-// Tests using int values in {EXPECT|ASSERT}_EQ.
-TEST(EqAssertionTest, Int) {
-  ASSERT_EQ(32, 32);
-  EXPECT_NONFATAL_FAILURE(EXPECT_EQ(32, 33), "  32\n  33");
-}
-
-// Tests using time_t values in {EXPECT|ASSERT}_EQ.
-TEST(EqAssertionTest, Time_T) {
-  EXPECT_EQ(static_cast<time_t>(0), static_cast<time_t>(0));
-  EXPECT_FATAL_FAILURE(
-      ASSERT_EQ(static_cast<time_t>(0), static_cast<time_t>(1234)), "1234");
-}
-
-// Tests using char values in {EXPECT|ASSERT}_EQ.
-TEST(EqAssertionTest, Char) {
-  ASSERT_EQ('z', 'z');
-  const char ch = 'b';
-  EXPECT_NONFATAL_FAILURE(EXPECT_EQ('\0', ch), "  ch\n    Which is: 'b'");
-  EXPECT_NONFATAL_FAILURE(EXPECT_EQ('a', ch), "  ch\n    Which is: 'b'");
-}
-
-// Tests using wchar_t values in {EXPECT|ASSERT}_EQ.
-TEST(EqAssertionTest, WideChar) {
-  EXPECT_EQ(L'b', L'b');
-
-  EXPECT_NONFATAL_FAILURE(EXPECT_EQ(L'\0', L'x'),
-                          "Expected equality of these values:\n"
-                          "  L'\0'\n"
-                          "    Which is: L'\0' (0, 0x0)\n"
-                          "  L'x'\n"
-                          "    Which is: L'x' (120, 0x78)");
-
-  static wchar_t wchar;
-  wchar = L'b';
-  EXPECT_NONFATAL_FAILURE(EXPECT_EQ(L'a', wchar), "wchar");
-  wchar = 0x8119;
-  EXPECT_FATAL_FAILURE(ASSERT_EQ(static_cast<wchar_t>(0x8120), wchar),
-                       "  wchar\n    Which is: L'");
-}
-
-// Tests using ::std::string values in {EXPECT|ASSERT}_EQ.
-TEST(EqAssertionTest, StdString) {
-  // Compares a const char* to an std::string that has identical
-  // content.
-  ASSERT_EQ("Test", ::std::string("Test"));
-
-  // Compares two identical std::strings.
-  static const ::std::string str1("A * in the middle");
-  static const ::std::string str2(str1);
-  EXPECT_EQ(str1, str2);
-
-  // Compares a const char* to an std::string that has different
-  // content
-  EXPECT_NONFATAL_FAILURE(EXPECT_EQ("Test", ::std::string("test")), "\"test\"");
-
-  // Compares an std::string to a char* that has different content.
-  char* const p1 = const_cast<char*>("foo");
-  EXPECT_NONFATAL_FAILURE(EXPECT_EQ(::std::string("bar"), p1), "p1");
-
-  // Compares two std::strings that have different contents, one of
-  // which having a NUL character in the middle.  This should fail.
-  static ::std::string str3(str1);
-  str3.at(2) = '\0';
-  EXPECT_FATAL_FAILURE(ASSERT_EQ(str1, str3),
-                       "  str3\n    Which is: \"A \\0 in the middle\"");
-}
-
-#if GTEST_HAS_STD_WSTRING
-
-// Tests using ::std::wstring values in {EXPECT|ASSERT}_EQ.
-TEST(EqAssertionTest, StdWideString) {
-  // Compares two identical std::wstrings.
-  const ::std::wstring wstr1(L"A * in the middle");
-  const ::std::wstring wstr2(wstr1);
-  ASSERT_EQ(wstr1, wstr2);
-
-  // Compares an std::wstring to a const wchar_t* that has identical
-  // content.
-  const wchar_t kTestX8119[] = {'T', 'e', 's', 't', 0x8119, '\0'};
-  EXPECT_EQ(::std::wstring(kTestX8119), kTestX8119);
-
-  // Compares an std::wstring to a const wchar_t* that has different
-  // content.
-  const wchar_t kTestX8120[] = {'T', 'e', 's', 't', 0x8120, '\0'};
-  EXPECT_NONFATAL_FAILURE(
-      {  // NOLINT
-        EXPECT_EQ(::std::wstring(kTestX8119), kTestX8120);
-      },
-      "kTestX8120");
-
-  // Compares two std::wstrings that have different contents, one of
-  // which having a NUL character in the middle.
-  ::std::wstring wstr3(wstr1);
-  wstr3.at(2) = L'\0';
-  EXPECT_NONFATAL_FAILURE(EXPECT_EQ(wstr1, wstr3), "wstr3");
-
-  // Compares a wchar_t* to an std::wstring that has different
-  // content.
-  EXPECT_FATAL_FAILURE(
-      {  // NOLINT
-        ASSERT_EQ(const_cast<wchar_t*>(L"foo"), ::std::wstring(L"bar"));
-      },
-      "");
-}
-
-#endif  // GTEST_HAS_STD_WSTRING
-
-// Tests using char pointers in {EXPECT|ASSERT}_EQ.
-TEST(EqAssertionTest, CharPointer) {
-  char* const p0 = nullptr;
-  // Only way to get the Nokia compiler to compile the cast
-  // is to have a separate void* variable first. Putting
-  // the two casts on the same line doesn't work, neither does
-  // a direct C-style to char*.
-  void* pv1 = (void*)0x1234;  // NOLINT
-  void* pv2 = (void*)0xABC0;  // NOLINT
-  char* const p1 = reinterpret_cast<char*>(pv1);
-  char* const p2 = reinterpret_cast<char*>(pv2);
-  ASSERT_EQ(p1, p1);
-
-  EXPECT_NONFATAL_FAILURE(EXPECT_EQ(p0, p2), "  p2\n    Which is:");
-  EXPECT_NONFATAL_FAILURE(EXPECT_EQ(p1, p2), "  p2\n    Which is:");
-  EXPECT_FATAL_FAILURE(ASSERT_EQ(reinterpret_cast<char*>(0x1234),
-                                 reinterpret_cast<char*>(0xABC0)),
-                       "ABC0");
-}
-
-// Tests using wchar_t pointers in {EXPECT|ASSERT}_EQ.
-TEST(EqAssertionTest, WideCharPointer) {
-  wchar_t* const p0 = nullptr;
-  // Only way to get the Nokia compiler to compile the cast
-  // is to have a separate void* variable first. Putting
-  // the two casts on the same line doesn't work, neither does
-  // a direct C-style to char*.
-  void* pv1 = (void*)0x1234;  // NOLINT
-  void* pv2 = (void*)0xABC0;  // NOLINT
-  wchar_t* const p1 = reinterpret_cast<wchar_t*>(pv1);
-  wchar_t* const p2 = reinterpret_cast<wchar_t*>(pv2);
-  EXPECT_EQ(p0, p0);
-
-  EXPECT_NONFATAL_FAILURE(EXPECT_EQ(p0, p2), "  p2\n    Which is:");
-  EXPECT_NONFATAL_FAILURE(EXPECT_EQ(p1, p2), "  p2\n    Which is:");
-  void* pv3 = (void*)0x1234;  // NOLINT
-  void* pv4 = (void*)0xABC0;  // NOLINT
-  const wchar_t* p3 = reinterpret_cast<const wchar_t*>(pv3);
-  const wchar_t* p4 = reinterpret_cast<const wchar_t*>(pv4);
-  EXPECT_NONFATAL_FAILURE(EXPECT_EQ(p3, p4), "p4");
-}
-
-// Tests using other types of pointers in {EXPECT|ASSERT}_EQ.
-TEST(EqAssertionTest, OtherPointer) {
-  ASSERT_EQ(static_cast<const int*>(nullptr), static_cast<const int*>(nullptr));
-  EXPECT_FATAL_FAILURE(ASSERT_EQ(static_cast<const int*>(nullptr),
-                                 reinterpret_cast<const int*>(0x1234)),
-                       "0x1234");
-}
-
-// A class that supports binary comparison operators but not streaming.
-class UnprintableChar {
- public:
-  explicit UnprintableChar(char ch) : char_(ch) {}
-
-  bool operator==(const UnprintableChar& rhs) const {
-    return char_ == rhs.char_;
-  }
-  bool operator!=(const UnprintableChar& rhs) const {
-    return char_ != rhs.char_;
-  }
-  bool operator<(const UnprintableChar& rhs) const { return char_ < rhs.char_; }
-  bool operator<=(const UnprintableChar& rhs) const {
-    return char_ <= rhs.char_;
-  }
-  bool operator>(const UnprintableChar& rhs) const { return char_ > rhs.char_; }
-  bool operator>=(const UnprintableChar& rhs) const {
-    return char_ >= rhs.char_;
-  }
-
- private:
-  char char_;
-};
-
-// Tests that ASSERT_EQ() and friends don't require the arguments to
-// be printable.
-TEST(ComparisonAssertionTest, AcceptsUnprintableArgs) {
-  const UnprintableChar x('x'), y('y');
-  ASSERT_EQ(x, x);
-  EXPECT_NE(x, y);
-  ASSERT_LT(x, y);
-  EXPECT_LE(x, y);
-  ASSERT_GT(y, x);
-  EXPECT_GE(x, x);
-
-  EXPECT_NONFATAL_FAILURE(EXPECT_EQ(x, y), "1-byte object <78>");
-  EXPECT_NONFATAL_FAILURE(EXPECT_EQ(x, y), "1-byte object <79>");
-  EXPECT_NONFATAL_FAILURE(EXPECT_LT(y, y), "1-byte object <79>");
-  EXPECT_NONFATAL_FAILURE(EXPECT_GT(x, y), "1-byte object <78>");
-  EXPECT_NONFATAL_FAILURE(EXPECT_GT(x, y), "1-byte object <79>");
-
-  // Code tested by EXPECT_FATAL_FAILURE cannot reference local
-  // variables, so we have to write UnprintableChar('x') instead of x.
-#ifndef __BORLANDC__
-  // ICE's in C++Builder.
-  EXPECT_FATAL_FAILURE(ASSERT_NE(UnprintableChar('x'), UnprintableChar('x')),
-                       "1-byte object <78>");
-  EXPECT_FATAL_FAILURE(ASSERT_LE(UnprintableChar('y'), UnprintableChar('x')),
-                       "1-byte object <78>");
-#endif
-  EXPECT_FATAL_FAILURE(ASSERT_LE(UnprintableChar('y'), UnprintableChar('x')),
-                       "1-byte object <79>");
-  EXPECT_FATAL_FAILURE(ASSERT_GE(UnprintableChar('x'), UnprintableChar('y')),
-                       "1-byte object <78>");
-  EXPECT_FATAL_FAILURE(ASSERT_GE(UnprintableChar('x'), UnprintableChar('y')),
-                       "1-byte object <79>");
-}
-
-// Tests the FRIEND_TEST macro.
-
-// This class has a private member we want to test.  We will test it
-// both in a TEST and in a TEST_F.
-class Foo {
- public:
-  Foo() {}
-
- private:
-  int Bar() const { return 1; }
-
-  // Declares the friend tests that can access the private member
-  // Bar().
-  FRIEND_TEST(FRIEND_TEST_Test, TEST);
-  FRIEND_TEST(FRIEND_TEST_Test2, TEST_F);
-};
-
-// Tests that the FRIEND_TEST declaration allows a TEST to access a
-// class's private members.  This should compile.
-TEST(FRIEND_TEST_Test, TEST) { ASSERT_EQ(1, Foo().Bar()); }
-
-// The fixture needed to test using FRIEND_TEST with TEST_F.
-class FRIEND_TEST_Test2 : public Test {
- protected:
-  Foo foo;
-};
-
-// Tests that the FRIEND_TEST declaration allows a TEST_F to access a
-// class's private members.  This should compile.
-TEST_F(FRIEND_TEST_Test2, TEST_F) { ASSERT_EQ(1, foo.Bar()); }
-
-// Tests the life cycle of Test objects.
-
-// The test fixture for testing the life cycle of Test objects.
-//
-// This class counts the number of live test objects that uses this
-// fixture.
-class TestLifeCycleTest : public Test {
- protected:
-  // Constructor.  Increments the number of test objects that uses
-  // this fixture.
-  TestLifeCycleTest() { count_++; }
-
-  // Destructor.  Decrements the number of test objects that uses this
-  // fixture.
-  ~TestLifeCycleTest() override { count_--; }
-
-  // Returns the number of live test objects that uses this fixture.
-  int count() const { return count_; }
-
- private:
-  static int count_;
-};
-
-int TestLifeCycleTest::count_ = 0;
-
-// Tests the life cycle of test objects.
-TEST_F(TestLifeCycleTest, Test1) {
-  // There should be only one test object in this test case that's
-  // currently alive.
-  ASSERT_EQ(1, count());
-}
-
-// Tests the life cycle of test objects.
-TEST_F(TestLifeCycleTest, Test2) {
-  // After Test1 is done and Test2 is started, there should still be
-  // only one live test object, as the object for Test1 should've been
-  // deleted.
-  ASSERT_EQ(1, count());
-}
-
-}  // namespace
-
-// Tests that the copy constructor works when it is NOT optimized away by
-// the compiler.
-TEST(AssertionResultTest, CopyConstructorWorksWhenNotOptimied) {
-  // Checks that the copy constructor doesn't try to dereference NULL pointers
-  // in the source object.
-  AssertionResult r1 = AssertionSuccess();
-  AssertionResult r2 = r1;
-  // The following line is added to prevent the compiler from optimizing
-  // away the constructor call.
-  r1 << "abc";
-
-  AssertionResult r3 = r1;
-  EXPECT_EQ(static_cast<bool>(r3), static_cast<bool>(r1));
-  EXPECT_STREQ("abc", r1.message());
-}
-
-// Tests that AssertionSuccess and AssertionFailure construct
-// AssertionResult objects as expected.
-TEST(AssertionResultTest, ConstructionWorks) {
-  AssertionResult r1 = AssertionSuccess();
-  EXPECT_TRUE(r1);
-  EXPECT_STREQ("", r1.message());
-
-  AssertionResult r2 = AssertionSuccess() << "abc";
-  EXPECT_TRUE(r2);
-  EXPECT_STREQ("abc", r2.message());
-
-  AssertionResult r3 = AssertionFailure();
-  EXPECT_FALSE(r3);
-  EXPECT_STREQ("", r3.message());
-
-  AssertionResult r4 = AssertionFailure() << "def";
-  EXPECT_FALSE(r4);
-  EXPECT_STREQ("def", r4.message());
-
-  AssertionResult r5 = AssertionFailure(Message() << "ghi");
-  EXPECT_FALSE(r5);
-  EXPECT_STREQ("ghi", r5.message());
-}
-
-// Tests that the negation flips the predicate result but keeps the message.
-TEST(AssertionResultTest, NegationWorks) {
-  AssertionResult r1 = AssertionSuccess() << "abc";
-  EXPECT_FALSE(!r1);
-  EXPECT_STREQ("abc", (!r1).message());
-
-  AssertionResult r2 = AssertionFailure() << "def";
-  EXPECT_TRUE(!r2);
-  EXPECT_STREQ("def", (!r2).message());
-}
-
-TEST(AssertionResultTest, StreamingWorks) {
-  AssertionResult r = AssertionSuccess();
-  r << "abc" << 'd' << 0 << true;
-  EXPECT_STREQ("abcd0true", r.message());
-}
-
-TEST(AssertionResultTest, CanStreamOstreamManipulators) {
-  AssertionResult r = AssertionSuccess();
-  r << "Data" << std::endl << std::flush << std::ends << "Will be visible";
-  EXPECT_STREQ("Data\n\\0Will be visible", r.message());
-}
-
-// The next test uses explicit conversion operators
-
-TEST(AssertionResultTest, ConstructibleFromContextuallyConvertibleToBool) {
-  struct ExplicitlyConvertibleToBool {
-    explicit operator bool() const { return value; }
-    bool value;
-  };
-  ExplicitlyConvertibleToBool v1 = {false};
-  ExplicitlyConvertibleToBool v2 = {true};
-  EXPECT_FALSE(v1);
-  EXPECT_TRUE(v2);
-}
-
-struct ConvertibleToAssertionResult {
-  operator AssertionResult() const { return AssertionResult(true); }
-};
-
-TEST(AssertionResultTest, ConstructibleFromImplicitlyConvertible) {
-  ConvertibleToAssertionResult obj;
-  EXPECT_TRUE(obj);
-}
-
-// Tests streaming a user type whose definition and operator << are
-// both in the global namespace.
-class Base {
- public:
-  explicit Base(int an_x) : x_(an_x) {}
-  int x() const { return x_; }
-
- private:
-  int x_;
-};
-std::ostream& operator<<(std::ostream& os, const Base& val) {
-  return os << val.x();
-}
-std::ostream& operator<<(std::ostream& os, const Base* pointer) {
-  return os << "(" << pointer->x() << ")";
-}
-
-TEST(MessageTest, CanStreamUserTypeInGlobalNameSpace) {
-  Message msg;
-  Base a(1);
-
-  msg << a << &a;  // Uses ::operator<<.
-  EXPECT_STREQ("1(1)", msg.GetString().c_str());
-}
-
-// Tests streaming a user type whose definition and operator<< are
-// both in an unnamed namespace.
-namespace {
-class MyTypeInUnnamedNameSpace : public Base {
- public:
-  explicit MyTypeInUnnamedNameSpace(int an_x) : Base(an_x) {}
-};
-std::ostream& operator<<(std::ostream& os,
-                         const MyTypeInUnnamedNameSpace& val) {
-  return os << val.x();
-}
-std::ostream& operator<<(std::ostream& os,
-                         const MyTypeInUnnamedNameSpace* pointer) {
-  return os << "(" << pointer->x() << ")";
-}
-}  // namespace
-
-TEST(MessageTest, CanStreamUserTypeInUnnamedNameSpace) {
-  Message msg;
-  MyTypeInUnnamedNameSpace a(1);
-
-  msg << a << &a;  // Uses <unnamed_namespace>::operator<<.
-  EXPECT_STREQ("1(1)", msg.GetString().c_str());
-}
-
-// Tests streaming a user type whose definition and operator<< are
-// both in a user namespace.
-namespace namespace1 {
-class MyTypeInNameSpace1 : public Base {
- public:
-  explicit MyTypeInNameSpace1(int an_x) : Base(an_x) {}
-};
-std::ostream& operator<<(std::ostream& os, const MyTypeInNameSpace1& val) {
-  return os << val.x();
-}
-std::ostream& operator<<(std::ostream& os, const MyTypeInNameSpace1* pointer) {
-  return os << "(" << pointer->x() << ")";
-}
-}  // namespace namespace1
-
-TEST(MessageTest, CanStreamUserTypeInUserNameSpace) {
-  Message msg;
-  namespace1::MyTypeInNameSpace1 a(1);
-
-  msg << a << &a;  // Uses namespace1::operator<<.
-  EXPECT_STREQ("1(1)", msg.GetString().c_str());
-}
-
-// Tests streaming a user type whose definition is in a user namespace
-// but whose operator<< is in the global namespace.
-namespace namespace2 {
-class MyTypeInNameSpace2 : public ::Base {
- public:
-  explicit MyTypeInNameSpace2(int an_x) : Base(an_x) {}
-};
-}  // namespace namespace2
-std::ostream& operator<<(std::ostream& os,
-                         const namespace2::MyTypeInNameSpace2& val) {
-  return os << val.x();
-}
-std::ostream& operator<<(std::ostream& os,
-                         const namespace2::MyTypeInNameSpace2* pointer) {
-  return os << "(" << pointer->x() << ")";
-}
-
-TEST(MessageTest, CanStreamUserTypeInUserNameSpaceWithStreamOperatorInGlobal) {
-  Message msg;
-  namespace2::MyTypeInNameSpace2 a(1);
-
-  msg << a << &a;  // Uses ::operator<<.
-  EXPECT_STREQ("1(1)", msg.GetString().c_str());
-}
-
-// Tests streaming NULL pointers to testing::Message.
-TEST(MessageTest, NullPointers) {
-  Message msg;
-  char* const p1 = nullptr;
-  unsigned char* const p2 = nullptr;
-  int* p3 = nullptr;
-  double* p4 = nullptr;
-  bool* p5 = nullptr;
-  Message* p6 = nullptr;
-
-  msg << p1 << p2 << p3 << p4 << p5 << p6;
-  ASSERT_STREQ("(null)(null)(null)(null)(null)(null)", msg.GetString().c_str());
-}
-
-// Tests streaming wide strings to testing::Message.
-TEST(MessageTest, WideStrings) {
-  // Streams a NULL of type const wchar_t*.
-  const wchar_t* const_wstr = nullptr;
-  EXPECT_STREQ("(null)", (Message() << const_wstr).GetString().c_str());
-
-  // Streams a NULL of type wchar_t*.
-  wchar_t* wstr = nullptr;
-  EXPECT_STREQ("(null)", (Message() << wstr).GetString().c_str());
-
-  // Streams a non-NULL of type const wchar_t*.
-  const_wstr = L"abc\x8119";
-  EXPECT_STREQ("abc\xe8\x84\x99",
-               (Message() << const_wstr).GetString().c_str());
-
-  // Streams a non-NULL of type wchar_t*.
-  wstr = const_cast<wchar_t*>(const_wstr);
-  EXPECT_STREQ("abc\xe8\x84\x99", (Message() << wstr).GetString().c_str());
-}
-
-// This line tests that we can define tests in the testing namespace.
-namespace testing {
-
-// Tests the TestInfo class.
-
-class TestInfoTest : public Test {
- protected:
-  static const TestInfo* GetTestInfo(const char* test_name) {
-    const TestSuite* const test_suite =
-        GetUnitTestImpl()->GetTestSuite("TestInfoTest", "", nullptr, nullptr);
-
-    for (int i = 0; i < test_suite->total_test_count(); ++i) {
-      const TestInfo* const test_info = test_suite->GetTestInfo(i);
-      if (strcmp(test_name, test_info->name()) == 0) return test_info;
-    }
-    return nullptr;
-  }
-
-  static const TestResult* GetTestResult(const TestInfo* test_info) {
-    return test_info->result();
-  }
-};
-
-// Tests TestInfo::test_case_name() and TestInfo::name().
-TEST_F(TestInfoTest, Names) {
-  const TestInfo* const test_info = GetTestInfo("Names");
-
-  ASSERT_STREQ("TestInfoTest", test_info->test_suite_name());
-  ASSERT_STREQ("Names", test_info->name());
-}
-
-// Tests TestInfo::result().
-TEST_F(TestInfoTest, result) {
-  const TestInfo* const test_info = GetTestInfo("result");
-
-  // Initially, there is no TestPartResult for this test.
-  ASSERT_EQ(0, GetTestResult(test_info)->total_part_count());
-
-  // After the previous assertion, there is still none.
-  ASSERT_EQ(0, GetTestResult(test_info)->total_part_count());
-}
-
-#define VERIFY_CODE_LOCATION                                                \
-  const int expected_line = __LINE__ - 1;                                   \
-  const TestInfo* const test_info = GetUnitTestImpl()->current_test_info(); \
-  ASSERT_TRUE(test_info);                                                   \
-  EXPECT_STREQ(__FILE__, test_info->file());                                \
-  EXPECT_EQ(expected_line, test_info->line())
-
-// clang-format off
-TEST(CodeLocationForTEST, Verify) {
-  VERIFY_CODE_LOCATION;
-}
-
-class CodeLocationForTESTF : public Test {};
-
-TEST_F(CodeLocationForTESTF, Verify) {
-  VERIFY_CODE_LOCATION;
-}
-
-class CodeLocationForTESTP : public TestWithParam<int> {};
-
-TEST_P(CodeLocationForTESTP, Verify) {
-  VERIFY_CODE_LOCATION;
-}
-
-INSTANTIATE_TEST_SUITE_P(, CodeLocationForTESTP, Values(0));
-
-template <typename T>
-class CodeLocationForTYPEDTEST : public Test {};
-
-TYPED_TEST_SUITE(CodeLocationForTYPEDTEST, int);
-
-TYPED_TEST(CodeLocationForTYPEDTEST, Verify) {
-  VERIFY_CODE_LOCATION;
-}
-
-template <typename T>
-class CodeLocationForTYPEDTESTP : public Test {};
-
-TYPED_TEST_SUITE_P(CodeLocationForTYPEDTESTP);
-
-TYPED_TEST_P(CodeLocationForTYPEDTESTP, Verify) {
-  VERIFY_CODE_LOCATION;
-}
-
-REGISTER_TYPED_TEST_SUITE_P(CodeLocationForTYPEDTESTP, Verify);
-
-INSTANTIATE_TYPED_TEST_SUITE_P(My, CodeLocationForTYPEDTESTP, int);
-
-#undef VERIFY_CODE_LOCATION
-// clang-format on
-
-// Tests setting up and tearing down a test case.
-// Legacy API is deprecated but still available
-#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
-class SetUpTestCaseTest : public Test {
- protected:
-  // This will be called once before the first test in this test case
-  // is run.
-  static void SetUpTestCase() {
-    printf("Setting up the test case . . .\n");
-
-    // Initializes some shared resource.  In this simple example, we
-    // just create a C string.  More complex stuff can be done if
-    // desired.
-    shared_resource_ = "123";
-
-    // Increments the number of test cases that have been set up.
-    counter_++;
-
-    // SetUpTestCase() should be called only once.
-    EXPECT_EQ(1, counter_);
-  }
-
-  // This will be called once after the last test in this test case is
-  // run.
-  static void TearDownTestCase() {
-    printf("Tearing down the test case . . .\n");
-
-    // Decrements the number of test cases that have been set up.
-    counter_--;
-
-    // TearDownTestCase() should be called only once.
-    EXPECT_EQ(0, counter_);
-
-    // Cleans up the shared resource.
-    shared_resource_ = nullptr;
-  }
-
-  // This will be called before each test in this test case.
-  void SetUp() override {
-    // SetUpTestCase() should be called only once, so counter_ should
-    // always be 1.
-    EXPECT_EQ(1, counter_);
-  }
-
-  // Number of test cases that have been set up.
-  static int counter_;
-
-  // Some resource to be shared by all tests in this test case.
-  static const char* shared_resource_;
-};
-
-int SetUpTestCaseTest::counter_ = 0;
-const char* SetUpTestCaseTest::shared_resource_ = nullptr;
-
-// A test that uses the shared resource.
-TEST_F(SetUpTestCaseTest, Test1) { EXPECT_STRNE(nullptr, shared_resource_); }
-
-// Another test that uses the shared resource.
-TEST_F(SetUpTestCaseTest, Test2) { EXPECT_STREQ("123", shared_resource_); }
-#endif  //  GTEST_REMOVE_LEGACY_TEST_CASEAPI_
-
-// Tests SetupTestSuite/TearDown TestSuite
-class SetUpTestSuiteTest : public Test {
- protected:
-  // This will be called once before the first test in this test case
-  // is run.
-  static void SetUpTestSuite() {
-    printf("Setting up the test suite . . .\n");
-
-    // Initializes some shared resource.  In this simple example, we
-    // just create a C string.  More complex stuff can be done if
-    // desired.
-    shared_resource_ = "123";
-
-    // Increments the number of test cases that have been set up.
-    counter_++;
-
-    // SetUpTestSuite() should be called only once.
-    EXPECT_EQ(1, counter_);
-  }
-
-  // This will be called once after the last test in this test case is
-  // run.
-  static void TearDownTestSuite() {
-    printf("Tearing down the test suite . . .\n");
-
-    // Decrements the number of test suites that have been set up.
-    counter_--;
-
-    // TearDownTestSuite() should be called only once.
-    EXPECT_EQ(0, counter_);
-
-    // Cleans up the shared resource.
-    shared_resource_ = nullptr;
-  }
-
-  // This will be called before each test in this test case.
-  void SetUp() override {
-    // SetUpTestSuite() should be called only once, so counter_ should
-    // always be 1.
-    EXPECT_EQ(1, counter_);
-  }
-
-  // Number of test suites that have been set up.
-  static int counter_;
-
-  // Some resource to be shared by all tests in this test case.
-  static const char* shared_resource_;
-};
-
-int SetUpTestSuiteTest::counter_ = 0;
-const char* SetUpTestSuiteTest::shared_resource_ = nullptr;
-
-// A test that uses the shared resource.
-TEST_F(SetUpTestSuiteTest, TestSetupTestSuite1) {
-  EXPECT_STRNE(nullptr, shared_resource_);
-}
-
-// Another test that uses the shared resource.
-TEST_F(SetUpTestSuiteTest, TestSetupTestSuite2) {
-  EXPECT_STREQ("123", shared_resource_);
-}
-
-// The ParseFlagsTest test case tests ParseGoogleTestFlagsOnly.
-
-// The Flags struct stores a copy of all Google Test flags.
-struct Flags {
-  // Constructs a Flags struct where each flag has its default value.
-  Flags()
-      : also_run_disabled_tests(false),
-        break_on_failure(false),
-        catch_exceptions(false),
-        death_test_use_fork(false),
-        fail_fast(false),
-        filter(""),
-        list_tests(false),
-        output(""),
-        brief(false),
-        print_time(true),
-        random_seed(0),
-        repeat(1),
-        recreate_environments_when_repeating(true),
-        shuffle(false),
-        stack_trace_depth(kMaxStackTraceDepth),
-        stream_result_to(""),
-        throw_on_failure(false) {}
-
-  // Factory methods.
-
-  // Creates a Flags struct where the gtest_also_run_disabled_tests flag has
-  // the given value.
-  static Flags AlsoRunDisabledTests(bool also_run_disabled_tests) {
-    Flags flags;
-    flags.also_run_disabled_tests = also_run_disabled_tests;
-    return flags;
-  }
-
-  // Creates a Flags struct where the gtest_break_on_failure flag has
-  // the given value.
-  static Flags BreakOnFailure(bool break_on_failure) {
-    Flags flags;
-    flags.break_on_failure = break_on_failure;
-    return flags;
-  }
-
-  // Creates a Flags struct where the gtest_catch_exceptions flag has
-  // the given value.
-  static Flags CatchExceptions(bool catch_exceptions) {
-    Flags flags;
-    flags.catch_exceptions = catch_exceptions;
-    return flags;
-  }
-
-  // Creates a Flags struct where the gtest_death_test_use_fork flag has
-  // the given value.
-  static Flags DeathTestUseFork(bool death_test_use_fork) {
-    Flags flags;
-    flags.death_test_use_fork = death_test_use_fork;
-    return flags;
-  }
-
-  // Creates a Flags struct where the gtest_fail_fast flag has
-  // the given value.
-  static Flags FailFast(bool fail_fast) {
-    Flags flags;
-    flags.fail_fast = fail_fast;
-    return flags;
-  }
-
-  // Creates a Flags struct where the gtest_filter flag has the given
-  // value.
-  static Flags Filter(const char* filter) {
-    Flags flags;
-    flags.filter = filter;
-    return flags;
-  }
-
-  // Creates a Flags struct where the gtest_list_tests flag has the
-  // given value.
-  static Flags ListTests(bool list_tests) {
-    Flags flags;
-    flags.list_tests = list_tests;
-    return flags;
-  }
-
-  // Creates a Flags struct where the gtest_output flag has the given
-  // value.
-  static Flags Output(const char* output) {
-    Flags flags;
-    flags.output = output;
-    return flags;
-  }
-
-  // Creates a Flags struct where the gtest_brief flag has the given
-  // value.
-  static Flags Brief(bool brief) {
-    Flags flags;
-    flags.brief = brief;
-    return flags;
-  }
-
-  // Creates a Flags struct where the gtest_print_time flag has the given
-  // value.
-  static Flags PrintTime(bool print_time) {
-    Flags flags;
-    flags.print_time = print_time;
-    return flags;
-  }
-
-  // Creates a Flags struct where the gtest_random_seed flag has the given
-  // value.
-  static Flags RandomSeed(int32_t random_seed) {
-    Flags flags;
-    flags.random_seed = random_seed;
-    return flags;
-  }
-
-  // Creates a Flags struct where the gtest_repeat flag has the given
-  // value.
-  static Flags Repeat(int32_t repeat) {
-    Flags flags;
-    flags.repeat = repeat;
-    return flags;
-  }
-
-  // Creates a Flags struct where the gtest_recreate_environments_when_repeating
-  // flag has the given value.
-  static Flags RecreateEnvironmentsWhenRepeating(
-      bool recreate_environments_when_repeating) {
-    Flags flags;
-    flags.recreate_environments_when_repeating =
-        recreate_environments_when_repeating;
-    return flags;
-  }
-
-  // Creates a Flags struct where the gtest_shuffle flag has the given
-  // value.
-  static Flags Shuffle(bool shuffle) {
-    Flags flags;
-    flags.shuffle = shuffle;
-    return flags;
-  }
-
-  // Creates a Flags struct where the GTEST_FLAG(stack_trace_depth) flag has
-  // the given value.
-  static Flags StackTraceDepth(int32_t stack_trace_depth) {
-    Flags flags;
-    flags.stack_trace_depth = stack_trace_depth;
-    return flags;
-  }
-
-  // Creates a Flags struct where the GTEST_FLAG(stream_result_to) flag has
-  // the given value.
-  static Flags StreamResultTo(const char* stream_result_to) {
-    Flags flags;
-    flags.stream_result_to = stream_result_to;
-    return flags;
-  }
-
-  // Creates a Flags struct where the gtest_throw_on_failure flag has
-  // the given value.
-  static Flags ThrowOnFailure(bool throw_on_failure) {
-    Flags flags;
-    flags.throw_on_failure = throw_on_failure;
-    return flags;
-  }
-
-  // These fields store the flag values.
-  bool also_run_disabled_tests;
-  bool break_on_failure;
-  bool catch_exceptions;
-  bool death_test_use_fork;
-  bool fail_fast;
-  const char* filter;
-  bool list_tests;
-  const char* output;
-  bool brief;
-  bool print_time;
-  int32_t random_seed;
-  int32_t repeat;
-  bool recreate_environments_when_repeating;
-  bool shuffle;
-  int32_t stack_trace_depth;
-  const char* stream_result_to;
-  bool throw_on_failure;
-};
-
-// Fixture for testing ParseGoogleTestFlagsOnly().
-class ParseFlagsTest : public Test {
- protected:
-  // Clears the flags before each test.
-  void SetUp() override {
-    GTEST_FLAG_SET(also_run_disabled_tests, false);
-    GTEST_FLAG_SET(break_on_failure, false);
-    GTEST_FLAG_SET(catch_exceptions, false);
-    GTEST_FLAG_SET(death_test_use_fork, false);
-    GTEST_FLAG_SET(fail_fast, false);
-    GTEST_FLAG_SET(filter, "");
-    GTEST_FLAG_SET(list_tests, false);
-    GTEST_FLAG_SET(output, "");
-    GTEST_FLAG_SET(brief, false);
-    GTEST_FLAG_SET(print_time, true);
-    GTEST_FLAG_SET(random_seed, 0);
-    GTEST_FLAG_SET(repeat, 1);
-    GTEST_FLAG_SET(recreate_environments_when_repeating, true);
-    GTEST_FLAG_SET(shuffle, false);
-    GTEST_FLAG_SET(stack_trace_depth, kMaxStackTraceDepth);
-    GTEST_FLAG_SET(stream_result_to, "");
-    GTEST_FLAG_SET(throw_on_failure, false);
-  }
-
-  // Asserts that two narrow or wide string arrays are equal.
-  template <typename CharType>
-  static void AssertStringArrayEq(int size1, CharType** array1, int size2,
-                                  CharType** array2) {
-    ASSERT_EQ(size1, size2) << " Array sizes different.";
-
-    for (int i = 0; i != size1; i++) {
-      ASSERT_STREQ(array1[i], array2[i]) << " where i == " << i;
-    }
-  }
-
-  // Verifies that the flag values match the expected values.
-  static void CheckFlags(const Flags& expected) {
-    EXPECT_EQ(expected.also_run_disabled_tests,
-              GTEST_FLAG_GET(also_run_disabled_tests));
-    EXPECT_EQ(expected.break_on_failure, GTEST_FLAG_GET(break_on_failure));
-    EXPECT_EQ(expected.catch_exceptions, GTEST_FLAG_GET(catch_exceptions));
-    EXPECT_EQ(expected.death_test_use_fork,
-              GTEST_FLAG_GET(death_test_use_fork));
-    EXPECT_EQ(expected.fail_fast, GTEST_FLAG_GET(fail_fast));
-    EXPECT_STREQ(expected.filter, GTEST_FLAG_GET(filter).c_str());
-    EXPECT_EQ(expected.list_tests, GTEST_FLAG_GET(list_tests));
-    EXPECT_STREQ(expected.output, GTEST_FLAG_GET(output).c_str());
-    EXPECT_EQ(expected.brief, GTEST_FLAG_GET(brief));
-    EXPECT_EQ(expected.print_time, GTEST_FLAG_GET(print_time));
-    EXPECT_EQ(expected.random_seed, GTEST_FLAG_GET(random_seed));
-    EXPECT_EQ(expected.repeat, GTEST_FLAG_GET(repeat));
-    EXPECT_EQ(expected.recreate_environments_when_repeating,
-              GTEST_FLAG_GET(recreate_environments_when_repeating));
-    EXPECT_EQ(expected.shuffle, GTEST_FLAG_GET(shuffle));
-    EXPECT_EQ(expected.stack_trace_depth, GTEST_FLAG_GET(stack_trace_depth));
-    EXPECT_STREQ(expected.stream_result_to,
-                 GTEST_FLAG_GET(stream_result_to).c_str());
-    EXPECT_EQ(expected.throw_on_failure, GTEST_FLAG_GET(throw_on_failure));
-  }
-
-  // Parses a command line (specified by argc1 and argv1), then
-  // verifies that the flag values are expected and that the
-  // recognized flags are removed from the command line.
-  template <typename CharType>
-  static void TestParsingFlags(int argc1, const CharType** argv1, int argc2,
-                               const CharType** argv2, const Flags& expected,
-                               bool should_print_help) {
-    const bool saved_help_flag = ::testing::internal::g_help_flag;
-    ::testing::internal::g_help_flag = false;
-
-#if GTEST_HAS_STREAM_REDIRECTION
-    CaptureStdout();
-#endif
-
-    // Parses the command line.
-    internal::ParseGoogleTestFlagsOnly(&argc1, const_cast<CharType**>(argv1));
-
-#if GTEST_HAS_STREAM_REDIRECTION
-    const std::string captured_stdout = GetCapturedStdout();
-#endif
-
-    // Verifies the flag values.
-    CheckFlags(expected);
-
-    // Verifies that the recognized flags are removed from the command
-    // line.
-    AssertStringArrayEq(argc1 + 1, argv1, argc2 + 1, argv2);
-
-    // ParseGoogleTestFlagsOnly should neither set g_help_flag nor print the
-    // help message for the flags it recognizes.
-    EXPECT_EQ(should_print_help, ::testing::internal::g_help_flag);
-
-#if GTEST_HAS_STREAM_REDIRECTION
-    const char* const expected_help_fragment =
-        "This program contains tests written using";
-    if (should_print_help) {
-      EXPECT_PRED_FORMAT2(IsSubstring, expected_help_fragment, captured_stdout);
-    } else {
-      EXPECT_PRED_FORMAT2(IsNotSubstring, expected_help_fragment,
-                          captured_stdout);
-    }
-#endif  // GTEST_HAS_STREAM_REDIRECTION
-
-    ::testing::internal::g_help_flag = saved_help_flag;
-  }
-
-  // This macro wraps TestParsingFlags s.t. the user doesn't need
-  // to specify the array sizes.
-
-#define GTEST_TEST_PARSING_FLAGS_(argv1, argv2, expected, should_print_help) \
-  TestParsingFlags(sizeof(argv1) / sizeof(*argv1) - 1, argv1,                \
-                   sizeof(argv2) / sizeof(*argv2) - 1, argv2, expected,      \
-                   should_print_help)
-};
-
-// Tests parsing an empty command line.
-TEST_F(ParseFlagsTest, Empty) {
-  const char* argv[] = {nullptr};
-
-  const char* argv2[] = {nullptr};
-
-  GTEST_TEST_PARSING_FLAGS_(argv, argv2, Flags(), false);
-}
-
-// Tests parsing a command line that has no flag.
-TEST_F(ParseFlagsTest, NoFlag) {
-  const char* argv[] = {"foo.exe", nullptr};
-
-  const char* argv2[] = {"foo.exe", nullptr};
-
-  GTEST_TEST_PARSING_FLAGS_(argv, argv2, Flags(), false);
-}
-
-// Tests parsing --gtest_fail_fast.
-TEST_F(ParseFlagsTest, FailFast) {
-  const char* argv[] = {"foo.exe", "--gtest_fail_fast", nullptr};
-
-  const char* argv2[] = {"foo.exe", nullptr};
-
-  GTEST_TEST_PARSING_FLAGS_(argv, argv2, Flags::FailFast(true), false);
-}
-
-// Tests parsing an empty --gtest_filter flag.
-TEST_F(ParseFlagsTest, FilterEmpty) {
-  const char* argv[] = {"foo.exe", "--gtest_filter=", nullptr};
-
-  const char* argv2[] = {"foo.exe", nullptr};
-
-  GTEST_TEST_PARSING_FLAGS_(argv, argv2, Flags::Filter(""), false);
-}
-
-// Tests parsing a non-empty --gtest_filter flag.
-TEST_F(ParseFlagsTest, FilterNonEmpty) {
-  const char* argv[] = {"foo.exe", "--gtest_filter=abc", nullptr};
-
-  const char* argv2[] = {"foo.exe", nullptr};
-
-  GTEST_TEST_PARSING_FLAGS_(argv, argv2, Flags::Filter("abc"), false);
-}
-
-// Tests parsing --gtest_break_on_failure.
-TEST_F(ParseFlagsTest, BreakOnFailureWithoutValue) {
-  const char* argv[] = {"foo.exe", "--gtest_break_on_failure", nullptr};
-
-  const char* argv2[] = {"foo.exe", nullptr};
-
-  GTEST_TEST_PARSING_FLAGS_(argv, argv2, Flags::BreakOnFailure(true), false);
-}
-
-// Tests parsing --gtest_break_on_failure=0.
-TEST_F(ParseFlagsTest, BreakOnFailureFalse_0) {
-  const char* argv[] = {"foo.exe", "--gtest_break_on_failure=0", nullptr};
-
-  const char* argv2[] = {"foo.exe", nullptr};
-
-  GTEST_TEST_PARSING_FLAGS_(argv, argv2, Flags::BreakOnFailure(false), false);
-}
-
-// Tests parsing --gtest_break_on_failure=f.
-TEST_F(ParseFlagsTest, BreakOnFailureFalse_f) {
-  const char* argv[] = {"foo.exe", "--gtest_break_on_failure=f", nullptr};
-
-  const char* argv2[] = {"foo.exe", nullptr};
-
-  GTEST_TEST_PARSING_FLAGS_(argv, argv2, Flags::BreakOnFailure(false), false);
-}
-
-// Tests parsing --gtest_break_on_failure=F.
-TEST_F(ParseFlagsTest, BreakOnFailureFalse_F) {
-  const char* argv[] = {"foo.exe", "--gtest_break_on_failure=F", nullptr};
-
-  const char* argv2[] = {"foo.exe", nullptr};
-
-  GTEST_TEST_PARSING_FLAGS_(argv, argv2, Flags::BreakOnFailure(false), false);
-}
-
-// Tests parsing a --gtest_break_on_failure flag that has a "true"
-// definition.
-TEST_F(ParseFlagsTest, BreakOnFailureTrue) {
-  const char* argv[] = {"foo.exe", "--gtest_break_on_failure=1", nullptr};
-
-  const char* argv2[] = {"foo.exe", nullptr};
-
-  GTEST_TEST_PARSING_FLAGS_(argv, argv2, Flags::BreakOnFailure(true), false);
-}
-
-// Tests parsing --gtest_catch_exceptions.
-TEST_F(ParseFlagsTest, CatchExceptions) {
-  const char* argv[] = {"foo.exe", "--gtest_catch_exceptions", nullptr};
-
-  const char* argv2[] = {"foo.exe", nullptr};
-
-  GTEST_TEST_PARSING_FLAGS_(argv, argv2, Flags::CatchExceptions(true), false);
-}
-
-// Tests parsing --gtest_death_test_use_fork.
-TEST_F(ParseFlagsTest, DeathTestUseFork) {
-  const char* argv[] = {"foo.exe", "--gtest_death_test_use_fork", nullptr};
-
-  const char* argv2[] = {"foo.exe", nullptr};
-
-  GTEST_TEST_PARSING_FLAGS_(argv, argv2, Flags::DeathTestUseFork(true), false);
-}
-
-// Tests having the same flag twice with different values.  The
-// expected behavior is that the one coming last takes precedence.
-TEST_F(ParseFlagsTest, DuplicatedFlags) {
-  const char* argv[] = {"foo.exe", "--gtest_filter=a", "--gtest_filter=b",
-                        nullptr};
-
-  const char* argv2[] = {"foo.exe", nullptr};
-
-  GTEST_TEST_PARSING_FLAGS_(argv, argv2, Flags::Filter("b"), false);
-}
-
-// Tests having an unrecognized flag on the command line.
-TEST_F(ParseFlagsTest, UnrecognizedFlag) {
-  const char* argv[] = {"foo.exe", "--gtest_break_on_failure",
-                        "bar",  // Unrecognized by Google Test.
-                        "--gtest_filter=b", nullptr};
-
-  const char* argv2[] = {"foo.exe", "bar", nullptr};
-
-  Flags flags;
-  flags.break_on_failure = true;
-  flags.filter = "b";
-  GTEST_TEST_PARSING_FLAGS_(argv, argv2, flags, false);
-}
-
-// Tests having a --gtest_list_tests flag
-TEST_F(ParseFlagsTest, ListTestsFlag) {
-  const char* argv[] = {"foo.exe", "--gtest_list_tests", nullptr};
-
-  const char* argv2[] = {"foo.exe", nullptr};
-
-  GTEST_TEST_PARSING_FLAGS_(argv, argv2, Flags::ListTests(true), false);
-}
-
-// Tests having a --gtest_list_tests flag with a "true" value
-TEST_F(ParseFlagsTest, ListTestsTrue) {
-  const char* argv[] = {"foo.exe", "--gtest_list_tests=1", nullptr};
-
-  const char* argv2[] = {"foo.exe", nullptr};
-
-  GTEST_TEST_PARSING_FLAGS_(argv, argv2, Flags::ListTests(true), false);
-}
-
-// Tests having a --gtest_list_tests flag with a "false" value
-TEST_F(ParseFlagsTest, ListTestsFalse) {
-  const char* argv[] = {"foo.exe", "--gtest_list_tests=0", nullptr};
-
-  const char* argv2[] = {"foo.exe", nullptr};
-
-  GTEST_TEST_PARSING_FLAGS_(argv, argv2, Flags::ListTests(false), false);
-}
-
-// Tests parsing --gtest_list_tests=f.
-TEST_F(ParseFlagsTest, ListTestsFalse_f) {
-  const char* argv[] = {"foo.exe", "--gtest_list_tests=f", nullptr};
-
-  const char* argv2[] = {"foo.exe", nullptr};
-
-  GTEST_TEST_PARSING_FLAGS_(argv, argv2, Flags::ListTests(false), false);
-}
-
-// Tests parsing --gtest_list_tests=F.
-TEST_F(ParseFlagsTest, ListTestsFalse_F) {
-  const char* argv[] = {"foo.exe", "--gtest_list_tests=F", nullptr};
-
-  const char* argv2[] = {"foo.exe", nullptr};
-
-  GTEST_TEST_PARSING_FLAGS_(argv, argv2, Flags::ListTests(false), false);
-}
-
-// Tests parsing --gtest_output=xml
-TEST_F(ParseFlagsTest, OutputXml) {
-  const char* argv[] = {"foo.exe", "--gtest_output=xml", nullptr};
-
-  const char* argv2[] = {"foo.exe", nullptr};
-
-  GTEST_TEST_PARSING_FLAGS_(argv, argv2, Flags::Output("xml"), false);
-}
-
-// Tests parsing --gtest_output=xml:file
-TEST_F(ParseFlagsTest, OutputXmlFile) {
-  const char* argv[] = {"foo.exe", "--gtest_output=xml:file", nullptr};
-
-  const char* argv2[] = {"foo.exe", nullptr};
-
-  GTEST_TEST_PARSING_FLAGS_(argv, argv2, Flags::Output("xml:file"), false);
-}
-
-// Tests parsing --gtest_output=xml:directory/path/
-TEST_F(ParseFlagsTest, OutputXmlDirectory) {
-  const char* argv[] = {"foo.exe", "--gtest_output=xml:directory/path/",
-                        nullptr};
-
-  const char* argv2[] = {"foo.exe", nullptr};
-
-  GTEST_TEST_PARSING_FLAGS_(argv, argv2, Flags::Output("xml:directory/path/"),
-                            false);
-}
-
-// Tests having a --gtest_brief flag
-TEST_F(ParseFlagsTest, BriefFlag) {
-  const char* argv[] = {"foo.exe", "--gtest_brief", nullptr};
-
-  const char* argv2[] = {"foo.exe", nullptr};
-
-  GTEST_TEST_PARSING_FLAGS_(argv, argv2, Flags::Brief(true), false);
-}
-
-// Tests having a --gtest_brief flag with a "true" value
-TEST_F(ParseFlagsTest, BriefFlagTrue) {
-  const char* argv[] = {"foo.exe", "--gtest_brief=1", nullptr};
-
-  const char* argv2[] = {"foo.exe", nullptr};
-
-  GTEST_TEST_PARSING_FLAGS_(argv, argv2, Flags::Brief(true), false);
-}
-
-// Tests having a --gtest_brief flag with a "false" value
-TEST_F(ParseFlagsTest, BriefFlagFalse) {
-  const char* argv[] = {"foo.exe", "--gtest_brief=0", nullptr};
-
-  const char* argv2[] = {"foo.exe", nullptr};
-
-  GTEST_TEST_PARSING_FLAGS_(argv, argv2, Flags::Brief(false), false);
-}
-
-// Tests having a --gtest_print_time flag
-TEST_F(ParseFlagsTest, PrintTimeFlag) {
-  const char* argv[] = {"foo.exe", "--gtest_print_time", nullptr};
-
-  const char* argv2[] = {"foo.exe", nullptr};
-
-  GTEST_TEST_PARSING_FLAGS_(argv, argv2, Flags::PrintTime(true), false);
-}
-
-// Tests having a --gtest_print_time flag with a "true" value
-TEST_F(ParseFlagsTest, PrintTimeTrue) {
-  const char* argv[] = {"foo.exe", "--gtest_print_time=1", nullptr};
-
-  const char* argv2[] = {"foo.exe", nullptr};
-
-  GTEST_TEST_PARSING_FLAGS_(argv, argv2, Flags::PrintTime(true), false);
-}
-
-// Tests having a --gtest_print_time flag with a "false" value
-TEST_F(ParseFlagsTest, PrintTimeFalse) {
-  const char* argv[] = {"foo.exe", "--gtest_print_time=0", nullptr};
-
-  const char* argv2[] = {"foo.exe", nullptr};
-
-  GTEST_TEST_PARSING_FLAGS_(argv, argv2, Flags::PrintTime(false), false);
-}
-
-// Tests parsing --gtest_print_time=f.
-TEST_F(ParseFlagsTest, PrintTimeFalse_f) {
-  const char* argv[] = {"foo.exe", "--gtest_print_time=f", nullptr};
-
-  const char* argv2[] = {"foo.exe", nullptr};
-
-  GTEST_TEST_PARSING_FLAGS_(argv, argv2, Flags::PrintTime(false), false);
-}
-
-// Tests parsing --gtest_print_time=F.
-TEST_F(ParseFlagsTest, PrintTimeFalse_F) {
-  const char* argv[] = {"foo.exe", "--gtest_print_time=F", nullptr};
-
-  const char* argv2[] = {"foo.exe", nullptr};
-
-  GTEST_TEST_PARSING_FLAGS_(argv, argv2, Flags::PrintTime(false), false);
-}
-
-// Tests parsing --gtest_random_seed=number
-TEST_F(ParseFlagsTest, RandomSeed) {
-  const char* argv[] = {"foo.exe", "--gtest_random_seed=1000", nullptr};
-
-  const char* argv2[] = {"foo.exe", nullptr};
-
-  GTEST_TEST_PARSING_FLAGS_(argv, argv2, Flags::RandomSeed(1000), false);
-}
-
-// Tests parsing --gtest_repeat=number
-TEST_F(ParseFlagsTest, Repeat) {
-  const char* argv[] = {"foo.exe", "--gtest_repeat=1000", nullptr};
-
-  const char* argv2[] = {"foo.exe", nullptr};
-
-  GTEST_TEST_PARSING_FLAGS_(argv, argv2, Flags::Repeat(1000), false);
-}
-
-// Tests parsing --gtest_recreate_environments_when_repeating
-TEST_F(ParseFlagsTest, RecreateEnvironmentsWhenRepeating) {
-  const char* argv[] = {
-      "foo.exe",
-      "--gtest_recreate_environments_when_repeating=0",
-      nullptr,
-  };
-
-  const char* argv2[] = {"foo.exe", nullptr};
-
-  GTEST_TEST_PARSING_FLAGS_(
-      argv, argv2, Flags::RecreateEnvironmentsWhenRepeating(false), false);
-}
-
-// Tests having a --gtest_also_run_disabled_tests flag
-TEST_F(ParseFlagsTest, AlsoRunDisabledTestsFlag) {
-  const char* argv[] = {"foo.exe", "--gtest_also_run_disabled_tests", nullptr};
-
-  const char* argv2[] = {"foo.exe", nullptr};
-
-  GTEST_TEST_PARSING_FLAGS_(argv, argv2, Flags::AlsoRunDisabledTests(true),
-                            false);
-}
-
-// Tests having a --gtest_also_run_disabled_tests flag with a "true" value
-TEST_F(ParseFlagsTest, AlsoRunDisabledTestsTrue) {
-  const char* argv[] = {"foo.exe", "--gtest_also_run_disabled_tests=1",
-                        nullptr};
-
-  const char* argv2[] = {"foo.exe", nullptr};
-
-  GTEST_TEST_PARSING_FLAGS_(argv, argv2, Flags::AlsoRunDisabledTests(true),
-                            false);
-}
-
-// Tests having a --gtest_also_run_disabled_tests flag with a "false" value
-TEST_F(ParseFlagsTest, AlsoRunDisabledTestsFalse) {
-  const char* argv[] = {"foo.exe", "--gtest_also_run_disabled_tests=0",
-                        nullptr};
-
-  const char* argv2[] = {"foo.exe", nullptr};
-
-  GTEST_TEST_PARSING_FLAGS_(argv, argv2, Flags::AlsoRunDisabledTests(false),
-                            false);
-}
-
-// Tests parsing --gtest_shuffle.
-TEST_F(ParseFlagsTest, ShuffleWithoutValue) {
-  const char* argv[] = {"foo.exe", "--gtest_shuffle", nullptr};
-
-  const char* argv2[] = {"foo.exe", nullptr};
-
-  GTEST_TEST_PARSING_FLAGS_(argv, argv2, Flags::Shuffle(true), false);
-}
-
-// Tests parsing --gtest_shuffle=0.
-TEST_F(ParseFlagsTest, ShuffleFalse_0) {
-  const char* argv[] = {"foo.exe", "--gtest_shuffle=0", nullptr};
-
-  const char* argv2[] = {"foo.exe", nullptr};
-
-  GTEST_TEST_PARSING_FLAGS_(argv, argv2, Flags::Shuffle(false), false);
-}
-
-// Tests parsing a --gtest_shuffle flag that has a "true" definition.
-TEST_F(ParseFlagsTest, ShuffleTrue) {
-  const char* argv[] = {"foo.exe", "--gtest_shuffle=1", nullptr};
-
-  const char* argv2[] = {"foo.exe", nullptr};
-
-  GTEST_TEST_PARSING_FLAGS_(argv, argv2, Flags::Shuffle(true), false);
-}
-
-// Tests parsing --gtest_stack_trace_depth=number.
-TEST_F(ParseFlagsTest, StackTraceDepth) {
-  const char* argv[] = {"foo.exe", "--gtest_stack_trace_depth=5", nullptr};
-
-  const char* argv2[] = {"foo.exe", nullptr};
-
-  GTEST_TEST_PARSING_FLAGS_(argv, argv2, Flags::StackTraceDepth(5), false);
-}
-
-TEST_F(ParseFlagsTest, StreamResultTo) {
-  const char* argv[] = {"foo.exe", "--gtest_stream_result_to=localhost:1234",
-                        nullptr};
-
-  const char* argv2[] = {"foo.exe", nullptr};
-
-  GTEST_TEST_PARSING_FLAGS_(argv, argv2,
-                            Flags::StreamResultTo("localhost:1234"), false);
-}
-
-// Tests parsing --gtest_throw_on_failure.
-TEST_F(ParseFlagsTest, ThrowOnFailureWithoutValue) {
-  const char* argv[] = {"foo.exe", "--gtest_throw_on_failure", nullptr};
-
-  const char* argv2[] = {"foo.exe", nullptr};
-
-  GTEST_TEST_PARSING_FLAGS_(argv, argv2, Flags::ThrowOnFailure(true), false);
-}
-
-// Tests parsing --gtest_throw_on_failure=0.
-TEST_F(ParseFlagsTest, ThrowOnFailureFalse_0) {
-  const char* argv[] = {"foo.exe", "--gtest_throw_on_failure=0", nullptr};
-
-  const char* argv2[] = {"foo.exe", nullptr};
-
-  GTEST_TEST_PARSING_FLAGS_(argv, argv2, Flags::ThrowOnFailure(false), false);
-}
-
-// Tests parsing a --gtest_throw_on_failure flag that has a "true"
-// definition.
-TEST_F(ParseFlagsTest, ThrowOnFailureTrue) {
-  const char* argv[] = {"foo.exe", "--gtest_throw_on_failure=1", nullptr};
-
-  const char* argv2[] = {"foo.exe", nullptr};
-
-  GTEST_TEST_PARSING_FLAGS_(argv, argv2, Flags::ThrowOnFailure(true), false);
-}
-
-// Tests parsing a bad --gtest_filter flag.
-TEST_F(ParseFlagsTest, FilterBad) {
-  const char* argv[] = {"foo.exe", "--gtest_filter", nullptr};
-
-  const char* argv2[] = {"foo.exe", "--gtest_filter", nullptr};
-
-#if GTEST_HAS_ABSL && GTEST_HAS_DEATH_TEST
-  // Invalid flag arguments are a fatal error when using the Abseil Flags.
-  EXPECT_EXIT(GTEST_TEST_PARSING_FLAGS_(argv, argv2, Flags::Filter(""), true),
-              testing::ExitedWithCode(1),
-              "ERROR: Missing the value for the flag 'gtest_filter'");
-#elif !GTEST_HAS_ABSL
-  GTEST_TEST_PARSING_FLAGS_(argv, argv2, Flags::Filter(""), true);
-#else
-  static_cast<void>(argv);
-  static_cast<void>(argv2);
-#endif
-}
-
-// Tests parsing --gtest_output (invalid).
-TEST_F(ParseFlagsTest, OutputEmpty) {
-  const char* argv[] = {"foo.exe", "--gtest_output", nullptr};
-
-  const char* argv2[] = {"foo.exe", "--gtest_output", nullptr};
-
-#if GTEST_HAS_ABSL && GTEST_HAS_DEATH_TEST
-  // Invalid flag arguments are a fatal error when using the Abseil Flags.
-  EXPECT_EXIT(GTEST_TEST_PARSING_FLAGS_(argv, argv2, Flags(), true),
-              testing::ExitedWithCode(1),
-              "ERROR: Missing the value for the flag 'gtest_output'");
-#elif !GTEST_HAS_ABSL
-  GTEST_TEST_PARSING_FLAGS_(argv, argv2, Flags(), true);
-#else
-  static_cast<void>(argv);
-  static_cast<void>(argv2);
-#endif
-}
-
-#if GTEST_HAS_ABSL
-TEST_F(ParseFlagsTest, AbseilPositionalFlags) {
-  const char* argv[] = {"foo.exe", "--gtest_throw_on_failure=1", "--",
-                        "--other_flag", nullptr};
-
-  // When using Abseil flags, it should be possible to pass flags not recognized
-  // using "--" to delimit positional arguments. These flags should be returned
-  // though argv.
-  const char* argv2[] = {"foo.exe", "--other_flag", nullptr};
-
-  GTEST_TEST_PARSING_FLAGS_(argv, argv2, Flags::ThrowOnFailure(true), false);
-}
-#endif
-
-#if GTEST_OS_WINDOWS
-// Tests parsing wide strings.
-TEST_F(ParseFlagsTest, WideStrings) {
-  const wchar_t* argv[] = {L"foo.exe",
-                           L"--gtest_filter=Foo*",
-                           L"--gtest_list_tests=1",
-                           L"--gtest_break_on_failure",
-                           L"--non_gtest_flag",
-                           NULL};
-
-  const wchar_t* argv2[] = {L"foo.exe", L"--non_gtest_flag", NULL};
-
-  Flags expected_flags;
-  expected_flags.break_on_failure = true;
-  expected_flags.filter = "Foo*";
-  expected_flags.list_tests = true;
-
-  GTEST_TEST_PARSING_FLAGS_(argv, argv2, expected_flags, false);
-}
-#endif  // GTEST_OS_WINDOWS
-
-#if GTEST_USE_OWN_FLAGFILE_FLAG_
-class FlagfileTest : public ParseFlagsTest {
- public:
-  void SetUp() override {
-    ParseFlagsTest::SetUp();
-
-    testdata_path_.Set(internal::FilePath(
-        testing::TempDir() + internal::GetCurrentExecutableName().string() +
-        "_flagfile_test"));
-    testing::internal::posix::RmDir(testdata_path_.c_str());
-    EXPECT_TRUE(testdata_path_.CreateFolder());
-  }
-
-  void TearDown() override {
-    testing::internal::posix::RmDir(testdata_path_.c_str());
-    ParseFlagsTest::TearDown();
-  }
-
-  internal::FilePath CreateFlagfile(const char* contents) {
-    internal::FilePath file_path(internal::FilePath::GenerateUniqueFileName(
-        testdata_path_, internal::FilePath("unique"), "txt"));
-    FILE* f = testing::internal::posix::FOpen(file_path.c_str(), "w");
-    fprintf(f, "%s", contents);
-    fclose(f);
-    return file_path;
-  }
-
- private:
-  internal::FilePath testdata_path_;
-};
-
-// Tests an empty flagfile.
-TEST_F(FlagfileTest, Empty) {
-  internal::FilePath flagfile_path(CreateFlagfile(""));
-  std::string flagfile_flag =
-      std::string("--" GTEST_FLAG_PREFIX_ "flagfile=") + flagfile_path.c_str();
-
-  const char* argv[] = {"foo.exe", flagfile_flag.c_str(), nullptr};
-
-  const char* argv2[] = {"foo.exe", nullptr};
-
-  GTEST_TEST_PARSING_FLAGS_(argv, argv2, Flags(), false);
-}
-
-// Tests passing a non-empty --gtest_filter flag via --gtest_flagfile.
-TEST_F(FlagfileTest, FilterNonEmpty) {
-  internal::FilePath flagfile_path(
-      CreateFlagfile("--" GTEST_FLAG_PREFIX_ "filter=abc"));
-  std::string flagfile_flag =
-      std::string("--" GTEST_FLAG_PREFIX_ "flagfile=") + flagfile_path.c_str();
-
-  const char* argv[] = {"foo.exe", flagfile_flag.c_str(), nullptr};
-
-  const char* argv2[] = {"foo.exe", nullptr};
-
-  GTEST_TEST_PARSING_FLAGS_(argv, argv2, Flags::Filter("abc"), false);
-}
-
-// Tests passing several flags via --gtest_flagfile.
-TEST_F(FlagfileTest, SeveralFlags) {
-  internal::FilePath flagfile_path(
-      CreateFlagfile("--" GTEST_FLAG_PREFIX_ "filter=abc\n"
-                     "--" GTEST_FLAG_PREFIX_ "break_on_failure\n"
-                     "--" GTEST_FLAG_PREFIX_ "list_tests"));
-  std::string flagfile_flag =
-      std::string("--" GTEST_FLAG_PREFIX_ "flagfile=") + flagfile_path.c_str();
-
-  const char* argv[] = {"foo.exe", flagfile_flag.c_str(), nullptr};
-
-  const char* argv2[] = {"foo.exe", nullptr};
-
-  Flags expected_flags;
-  expected_flags.break_on_failure = true;
-  expected_flags.filter = "abc";
-  expected_flags.list_tests = true;
-
-  GTEST_TEST_PARSING_FLAGS_(argv, argv2, expected_flags, false);
-}
-#endif  // GTEST_USE_OWN_FLAGFILE_FLAG_
-
-// Tests current_test_info() in UnitTest.
-class CurrentTestInfoTest : public Test {
- protected:
-  // Tests that current_test_info() returns NULL before the first test in
-  // the test case is run.
-  static void SetUpTestSuite() {
-    // There should be no tests running at this point.
-    const TestInfo* test_info = UnitTest::GetInstance()->current_test_info();
-    EXPECT_TRUE(test_info == nullptr)
-        << "There should be no tests running at this point.";
-  }
-
-  // Tests that current_test_info() returns NULL after the last test in
-  // the test case has run.
-  static void TearDownTestSuite() {
-    const TestInfo* test_info = UnitTest::GetInstance()->current_test_info();
-    EXPECT_TRUE(test_info == nullptr)
-        << "There should be no tests running at this point.";
-  }
-};
-
-// Tests that current_test_info() returns TestInfo for currently running
-// test by checking the expected test name against the actual one.
-TEST_F(CurrentTestInfoTest, WorksForFirstTestInATestSuite) {
-  const TestInfo* test_info = UnitTest::GetInstance()->current_test_info();
-  ASSERT_TRUE(nullptr != test_info)
-      << "There is a test running so we should have a valid TestInfo.";
-  EXPECT_STREQ("CurrentTestInfoTest", test_info->test_suite_name())
-      << "Expected the name of the currently running test suite.";
-  EXPECT_STREQ("WorksForFirstTestInATestSuite", test_info->name())
-      << "Expected the name of the currently running test.";
-}
-
-// Tests that current_test_info() returns TestInfo for currently running
-// test by checking the expected test name against the actual one.  We
-// use this test to see that the TestInfo object actually changed from
-// the previous invocation.
-TEST_F(CurrentTestInfoTest, WorksForSecondTestInATestSuite) {
-  const TestInfo* test_info = UnitTest::GetInstance()->current_test_info();
-  ASSERT_TRUE(nullptr != test_info)
-      << "There is a test running so we should have a valid TestInfo.";
-  EXPECT_STREQ("CurrentTestInfoTest", test_info->test_suite_name())
-      << "Expected the name of the currently running test suite.";
-  EXPECT_STREQ("WorksForSecondTestInATestSuite", test_info->name())
-      << "Expected the name of the currently running test.";
-}
-
-}  // namespace testing
-
-// These two lines test that we can define tests in a namespace that
-// has the name "testing" and is nested in another namespace.
-namespace my_namespace {
-namespace testing {
-
-// Makes sure that TEST knows to use ::testing::Test instead of
-// ::my_namespace::testing::Test.
-class Test {};
-
-// Makes sure that an assertion knows to use ::testing::Message instead of
-// ::my_namespace::testing::Message.
-class Message {};
-
-// Makes sure that an assertion knows to use
-// ::testing::AssertionResult instead of
-// ::my_namespace::testing::AssertionResult.
-class AssertionResult {};
-
-// Tests that an assertion that should succeed works as expected.
-TEST(NestedTestingNamespaceTest, Success) {
-  EXPECT_EQ(1, 1) << "This shouldn't fail.";
-}
-
-// Tests that an assertion that should fail works as expected.
-TEST(NestedTestingNamespaceTest, Failure) {
-  EXPECT_FATAL_FAILURE(FAIL() << "This failure is expected.",
-                       "This failure is expected.");
-}
-
-}  // namespace testing
-}  // namespace my_namespace
-
-// Tests that one can call superclass SetUp and TearDown methods--
-// that is, that they are not private.
-// No tests are based on this fixture; the test "passes" if it compiles
-// successfully.
-class ProtectedFixtureMethodsTest : public Test {
- protected:
-  void SetUp() override { Test::SetUp(); }
-  void TearDown() override { Test::TearDown(); }
-};
-
-// StreamingAssertionsTest tests the streaming versions of a representative
-// sample of assertions.
-TEST(StreamingAssertionsTest, Unconditional) {
-  SUCCEED() << "expected success";
-  EXPECT_NONFATAL_FAILURE(ADD_FAILURE() << "expected failure",
-                          "expected failure");
-  EXPECT_FATAL_FAILURE(FAIL() << "expected failure", "expected failure");
-}
-
-#ifdef __BORLANDC__
-// Silences warnings: "Condition is always true", "Unreachable code"
-#pragma option push -w-ccc -w-rch
-#endif
-
-TEST(StreamingAssertionsTest, Truth) {
-  EXPECT_TRUE(true) << "unexpected failure";
-  ASSERT_TRUE(true) << "unexpected failure";
-  EXPECT_NONFATAL_FAILURE(EXPECT_TRUE(false) << "expected failure",
-                          "expected failure");
-  EXPECT_FATAL_FAILURE(ASSERT_TRUE(false) << "expected failure",
-                       "expected failure");
-}
-
-TEST(StreamingAssertionsTest, Truth2) {
-  EXPECT_FALSE(false) << "unexpected failure";
-  ASSERT_FALSE(false) << "unexpected failure";
-  EXPECT_NONFATAL_FAILURE(EXPECT_FALSE(true) << "expected failure",
-                          "expected failure");
-  EXPECT_FATAL_FAILURE(ASSERT_FALSE(true) << "expected failure",
-                       "expected failure");
-}
-
-#ifdef __BORLANDC__
-// Restores warnings after previous "#pragma option push" suppressed them
-#pragma option pop
-#endif
-
-TEST(StreamingAssertionsTest, IntegerEquals) {
-  EXPECT_EQ(1, 1) << "unexpected failure";
-  ASSERT_EQ(1, 1) << "unexpected failure";
-  EXPECT_NONFATAL_FAILURE(EXPECT_EQ(1, 2) << "expected failure",
-                          "expected failure");
-  EXPECT_FATAL_FAILURE(ASSERT_EQ(1, 2) << "expected failure",
-                       "expected failure");
-}
-
-TEST(StreamingAssertionsTest, IntegerLessThan) {
-  EXPECT_LT(1, 2) << "unexpected failure";
-  ASSERT_LT(1, 2) << "unexpected failure";
-  EXPECT_NONFATAL_FAILURE(EXPECT_LT(2, 1) << "expected failure",
-                          "expected failure");
-  EXPECT_FATAL_FAILURE(ASSERT_LT(2, 1) << "expected failure",
-                       "expected failure");
-}
-
-TEST(StreamingAssertionsTest, StringsEqual) {
-  EXPECT_STREQ("foo", "foo") << "unexpected failure";
-  ASSERT_STREQ("foo", "foo") << "unexpected failure";
-  EXPECT_NONFATAL_FAILURE(EXPECT_STREQ("foo", "bar") << "expected failure",
-                          "expected failure");
-  EXPECT_FATAL_FAILURE(ASSERT_STREQ("foo", "bar") << "expected failure",
-                       "expected failure");
-}
-
-TEST(StreamingAssertionsTest, StringsNotEqual) {
-  EXPECT_STRNE("foo", "bar") << "unexpected failure";
-  ASSERT_STRNE("foo", "bar") << "unexpected failure";
-  EXPECT_NONFATAL_FAILURE(EXPECT_STRNE("foo", "foo") << "expected failure",
-                          "expected failure");
-  EXPECT_FATAL_FAILURE(ASSERT_STRNE("foo", "foo") << "expected failure",
-                       "expected failure");
-}
-
-TEST(StreamingAssertionsTest, StringsEqualIgnoringCase) {
-  EXPECT_STRCASEEQ("foo", "FOO") << "unexpected failure";
-  ASSERT_STRCASEEQ("foo", "FOO") << "unexpected failure";
-  EXPECT_NONFATAL_FAILURE(EXPECT_STRCASEEQ("foo", "bar") << "expected failure",
-                          "expected failure");
-  EXPECT_FATAL_FAILURE(ASSERT_STRCASEEQ("foo", "bar") << "expected failure",
-                       "expected failure");
-}
-
-TEST(StreamingAssertionsTest, StringNotEqualIgnoringCase) {
-  EXPECT_STRCASENE("foo", "bar") << "unexpected failure";
-  ASSERT_STRCASENE("foo", "bar") << "unexpected failure";
-  EXPECT_NONFATAL_FAILURE(EXPECT_STRCASENE("foo", "FOO") << "expected failure",
-                          "expected failure");
-  EXPECT_FATAL_FAILURE(ASSERT_STRCASENE("bar", "BAR") << "expected failure",
-                       "expected failure");
-}
-
-TEST(StreamingAssertionsTest, FloatingPointEquals) {
-  EXPECT_FLOAT_EQ(1.0, 1.0) << "unexpected failure";
-  ASSERT_FLOAT_EQ(1.0, 1.0) << "unexpected failure";
-  EXPECT_NONFATAL_FAILURE(EXPECT_FLOAT_EQ(0.0, 1.0) << "expected failure",
-                          "expected failure");
-  EXPECT_FATAL_FAILURE(ASSERT_FLOAT_EQ(0.0, 1.0) << "expected failure",
-                       "expected failure");
-}
-
-#if GTEST_HAS_EXCEPTIONS
-
-TEST(StreamingAssertionsTest, Throw) {
-  EXPECT_THROW(ThrowAnInteger(), int) << "unexpected failure";
-  ASSERT_THROW(ThrowAnInteger(), int) << "unexpected failure";
-  EXPECT_NONFATAL_FAILURE(EXPECT_THROW(ThrowAnInteger(), bool)
-                              << "expected failure",
-                          "expected failure");
-  EXPECT_FATAL_FAILURE(ASSERT_THROW(ThrowAnInteger(), bool)
-                           << "expected failure",
-                       "expected failure");
-}
-
-TEST(StreamingAssertionsTest, NoThrow) {
-  EXPECT_NO_THROW(ThrowNothing()) << "unexpected failure";
-  ASSERT_NO_THROW(ThrowNothing()) << "unexpected failure";
-  EXPECT_NONFATAL_FAILURE(EXPECT_NO_THROW(ThrowAnInteger())
-                              << "expected failure",
-                          "expected failure");
-  EXPECT_FATAL_FAILURE(ASSERT_NO_THROW(ThrowAnInteger()) << "expected failure",
-                       "expected failure");
-}
-
-TEST(StreamingAssertionsTest, AnyThrow) {
-  EXPECT_ANY_THROW(ThrowAnInteger()) << "unexpected failure";
-  ASSERT_ANY_THROW(ThrowAnInteger()) << "unexpected failure";
-  EXPECT_NONFATAL_FAILURE(EXPECT_ANY_THROW(ThrowNothing())
-                              << "expected failure",
-                          "expected failure");
-  EXPECT_FATAL_FAILURE(ASSERT_ANY_THROW(ThrowNothing()) << "expected failure",
-                       "expected failure");
-}
-
-#endif  // GTEST_HAS_EXCEPTIONS
-
-// Tests that Google Test correctly decides whether to use colors in the output.
-
-TEST(ColoredOutputTest, UsesColorsWhenGTestColorFlagIsYes) {
-  GTEST_FLAG_SET(color, "yes");
-
-  SetEnv("TERM", "xterm");             // TERM supports colors.
-  EXPECT_TRUE(ShouldUseColor(true));   // Stdout is a TTY.
-  EXPECT_TRUE(ShouldUseColor(false));  // Stdout is not a TTY.
-
-  SetEnv("TERM", "dumb");              // TERM doesn't support colors.
-  EXPECT_TRUE(ShouldUseColor(true));   // Stdout is a TTY.
-  EXPECT_TRUE(ShouldUseColor(false));  // Stdout is not a TTY.
-}
-
-TEST(ColoredOutputTest, UsesColorsWhenGTestColorFlagIsAliasOfYes) {
-  SetEnv("TERM", "dumb");  // TERM doesn't support colors.
-
-  GTEST_FLAG_SET(color, "True");
-  EXPECT_TRUE(ShouldUseColor(false));  // Stdout is not a TTY.
-
-  GTEST_FLAG_SET(color, "t");
-  EXPECT_TRUE(ShouldUseColor(false));  // Stdout is not a TTY.
-
-  GTEST_FLAG_SET(color, "1");
-  EXPECT_TRUE(ShouldUseColor(false));  // Stdout is not a TTY.
-}
-
-TEST(ColoredOutputTest, UsesNoColorWhenGTestColorFlagIsNo) {
-  GTEST_FLAG_SET(color, "no");
-
-  SetEnv("TERM", "xterm");              // TERM supports colors.
-  EXPECT_FALSE(ShouldUseColor(true));   // Stdout is a TTY.
-  EXPECT_FALSE(ShouldUseColor(false));  // Stdout is not a TTY.
-
-  SetEnv("TERM", "dumb");               // TERM doesn't support colors.
-  EXPECT_FALSE(ShouldUseColor(true));   // Stdout is a TTY.
-  EXPECT_FALSE(ShouldUseColor(false));  // Stdout is not a TTY.
-}
-
-TEST(ColoredOutputTest, UsesNoColorWhenGTestColorFlagIsInvalid) {
-  SetEnv("TERM", "xterm");  // TERM supports colors.
-
-  GTEST_FLAG_SET(color, "F");
-  EXPECT_FALSE(ShouldUseColor(true));  // Stdout is a TTY.
-
-  GTEST_FLAG_SET(color, "0");
-  EXPECT_FALSE(ShouldUseColor(true));  // Stdout is a TTY.
-
-  GTEST_FLAG_SET(color, "unknown");
-  EXPECT_FALSE(ShouldUseColor(true));  // Stdout is a TTY.
-}
-
-TEST(ColoredOutputTest, UsesColorsWhenStdoutIsTty) {
-  GTEST_FLAG_SET(color, "auto");
-
-  SetEnv("TERM", "xterm");              // TERM supports colors.
-  EXPECT_FALSE(ShouldUseColor(false));  // Stdout is not a TTY.
-  EXPECT_TRUE(ShouldUseColor(true));    // Stdout is a TTY.
-}
-
-TEST(ColoredOutputTest, UsesColorsWhenTermSupportsColors) {
-  GTEST_FLAG_SET(color, "auto");
-
-#if GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_MINGW
-  // On Windows, we ignore the TERM variable as it's usually not set.
-
-  SetEnv("TERM", "dumb");
-  EXPECT_TRUE(ShouldUseColor(true));  // Stdout is a TTY.
-
-  SetEnv("TERM", "");
-  EXPECT_TRUE(ShouldUseColor(true));  // Stdout is a TTY.
-
-  SetEnv("TERM", "xterm");
-  EXPECT_TRUE(ShouldUseColor(true));  // Stdout is a TTY.
-#else
-  // On non-Windows platforms, we rely on TERM to determine if the
-  // terminal supports colors.
-
-  SetEnv("TERM", "dumb");              // TERM doesn't support colors.
-  EXPECT_FALSE(ShouldUseColor(true));  // Stdout is a TTY.
-
-  SetEnv("TERM", "emacs");             // TERM doesn't support colors.
-  EXPECT_FALSE(ShouldUseColor(true));  // Stdout is a TTY.
-
-  SetEnv("TERM", "vt100");             // TERM doesn't support colors.
-  EXPECT_FALSE(ShouldUseColor(true));  // Stdout is a TTY.
-
-  SetEnv("TERM", "xterm-mono");        // TERM doesn't support colors.
-  EXPECT_FALSE(ShouldUseColor(true));  // Stdout is a TTY.
-
-  SetEnv("TERM", "xterm");            // TERM supports colors.
-  EXPECT_TRUE(ShouldUseColor(true));  // Stdout is a TTY.
-
-  SetEnv("TERM", "xterm-color");      // TERM supports colors.
-  EXPECT_TRUE(ShouldUseColor(true));  // Stdout is a TTY.
-
-  SetEnv("TERM", "xterm-kitty");      // TERM supports colors.
-  EXPECT_TRUE(ShouldUseColor(true));  // Stdout is a TTY.
-
-  SetEnv("TERM", "xterm-256color");   // TERM supports colors.
-  EXPECT_TRUE(ShouldUseColor(true));  // Stdout is a TTY.
-
-  SetEnv("TERM", "screen");           // TERM supports colors.
-  EXPECT_TRUE(ShouldUseColor(true));  // Stdout is a TTY.
-
-  SetEnv("TERM", "screen-256color");  // TERM supports colors.
-  EXPECT_TRUE(ShouldUseColor(true));  // Stdout is a TTY.
-
-  SetEnv("TERM", "tmux");             // TERM supports colors.
-  EXPECT_TRUE(ShouldUseColor(true));  // Stdout is a TTY.
-
-  SetEnv("TERM", "tmux-256color");    // TERM supports colors.
-  EXPECT_TRUE(ShouldUseColor(true));  // Stdout is a TTY.
-
-  SetEnv("TERM", "rxvt-unicode");     // TERM supports colors.
-  EXPECT_TRUE(ShouldUseColor(true));  // Stdout is a TTY.
-
-  SetEnv("TERM", "rxvt-unicode-256color");  // TERM supports colors.
-  EXPECT_TRUE(ShouldUseColor(true));        // Stdout is a TTY.
-
-  SetEnv("TERM", "linux");            // TERM supports colors.
-  EXPECT_TRUE(ShouldUseColor(true));  // Stdout is a TTY.
-
-  SetEnv("TERM", "cygwin");           // TERM supports colors.
-  EXPECT_TRUE(ShouldUseColor(true));  // Stdout is a TTY.
-#endif  // GTEST_OS_WINDOWS
-}
-
-// Verifies that StaticAssertTypeEq works in a namespace scope.
-
-static bool dummy1 GTEST_ATTRIBUTE_UNUSED_ = StaticAssertTypeEq<bool, bool>();
-static bool dummy2 GTEST_ATTRIBUTE_UNUSED_ =
-    StaticAssertTypeEq<const int, const int>();
-
-// Verifies that StaticAssertTypeEq works in a class.
-
-template <typename T>
-class StaticAssertTypeEqTestHelper {
- public:
-  StaticAssertTypeEqTestHelper() { StaticAssertTypeEq<bool, T>(); }
-};
-
-TEST(StaticAssertTypeEqTest, WorksInClass) {
-  StaticAssertTypeEqTestHelper<bool>();
-}
-
-// Verifies that StaticAssertTypeEq works inside a function.
-
-typedef int IntAlias;
-
-TEST(StaticAssertTypeEqTest, CompilesForEqualTypes) {
-  StaticAssertTypeEq<int, IntAlias>();
-  StaticAssertTypeEq<int*, IntAlias*>();
-}
-
-TEST(HasNonfatalFailureTest, ReturnsFalseWhenThereIsNoFailure) {
-  EXPECT_FALSE(HasNonfatalFailure());
-}
-
-static void FailFatally() { FAIL(); }
-
-TEST(HasNonfatalFailureTest, ReturnsFalseWhenThereIsOnlyFatalFailure) {
-  FailFatally();
-  const bool has_nonfatal_failure = HasNonfatalFailure();
-  ClearCurrentTestPartResults();
-  EXPECT_FALSE(has_nonfatal_failure);
-}
-
-TEST(HasNonfatalFailureTest, ReturnsTrueWhenThereIsNonfatalFailure) {
-  ADD_FAILURE();
-  const bool has_nonfatal_failure = HasNonfatalFailure();
-  ClearCurrentTestPartResults();
-  EXPECT_TRUE(has_nonfatal_failure);
-}
-
-TEST(HasNonfatalFailureTest, ReturnsTrueWhenThereAreFatalAndNonfatalFailures) {
-  FailFatally();
-  ADD_FAILURE();
-  const bool has_nonfatal_failure = HasNonfatalFailure();
-  ClearCurrentTestPartResults();
-  EXPECT_TRUE(has_nonfatal_failure);
-}
-
-// A wrapper for calling HasNonfatalFailure outside of a test body.
-static bool HasNonfatalFailureHelper() {
-  return testing::Test::HasNonfatalFailure();
-}
-
-TEST(HasNonfatalFailureTest, WorksOutsideOfTestBody) {
-  EXPECT_FALSE(HasNonfatalFailureHelper());
-}
-
-TEST(HasNonfatalFailureTest, WorksOutsideOfTestBody2) {
-  ADD_FAILURE();
-  const bool has_nonfatal_failure = HasNonfatalFailureHelper();
-  ClearCurrentTestPartResults();
-  EXPECT_TRUE(has_nonfatal_failure);
-}
-
-TEST(HasFailureTest, ReturnsFalseWhenThereIsNoFailure) {
-  EXPECT_FALSE(HasFailure());
-}
-
-TEST(HasFailureTest, ReturnsTrueWhenThereIsFatalFailure) {
-  FailFatally();
-  const bool has_failure = HasFailure();
-  ClearCurrentTestPartResults();
-  EXPECT_TRUE(has_failure);
-}
-
-TEST(HasFailureTest, ReturnsTrueWhenThereIsNonfatalFailure) {
-  ADD_FAILURE();
-  const bool has_failure = HasFailure();
-  ClearCurrentTestPartResults();
-  EXPECT_TRUE(has_failure);
-}
-
-TEST(HasFailureTest, ReturnsTrueWhenThereAreFatalAndNonfatalFailures) {
-  FailFatally();
-  ADD_FAILURE();
-  const bool has_failure = HasFailure();
-  ClearCurrentTestPartResults();
-  EXPECT_TRUE(has_failure);
-}
-
-// A wrapper for calling HasFailure outside of a test body.
-static bool HasFailureHelper() { return testing::Test::HasFailure(); }
-
-TEST(HasFailureTest, WorksOutsideOfTestBody) {
-  EXPECT_FALSE(HasFailureHelper());
-}
-
-TEST(HasFailureTest, WorksOutsideOfTestBody2) {
-  ADD_FAILURE();
-  const bool has_failure = HasFailureHelper();
-  ClearCurrentTestPartResults();
-  EXPECT_TRUE(has_failure);
-}
-
-class TestListener : public EmptyTestEventListener {
- public:
-  TestListener() : on_start_counter_(nullptr), is_destroyed_(nullptr) {}
-  TestListener(int* on_start_counter, bool* is_destroyed)
-      : on_start_counter_(on_start_counter), is_destroyed_(is_destroyed) {}
-
-  ~TestListener() override {
-    if (is_destroyed_) *is_destroyed_ = true;
-  }
-
- protected:
-  void OnTestProgramStart(const UnitTest& /*unit_test*/) override {
-    if (on_start_counter_ != nullptr) (*on_start_counter_)++;
-  }
-
- private:
-  int* on_start_counter_;
-  bool* is_destroyed_;
-};
-
-// Tests the constructor.
-TEST(TestEventListenersTest, ConstructionWorks) {
-  TestEventListeners listeners;
-
-  EXPECT_TRUE(TestEventListenersAccessor::GetRepeater(&listeners) != nullptr);
-  EXPECT_TRUE(listeners.default_result_printer() == nullptr);
-  EXPECT_TRUE(listeners.default_xml_generator() == nullptr);
-}
-
-// Tests that the TestEventListeners destructor deletes all the listeners it
-// owns.
-TEST(TestEventListenersTest, DestructionWorks) {
-  bool default_result_printer_is_destroyed = false;
-  bool default_xml_printer_is_destroyed = false;
-  bool extra_listener_is_destroyed = false;
-  TestListener* default_result_printer =
-      new TestListener(nullptr, &default_result_printer_is_destroyed);
-  TestListener* default_xml_printer =
-      new TestListener(nullptr, &default_xml_printer_is_destroyed);
-  TestListener* extra_listener =
-      new TestListener(nullptr, &extra_listener_is_destroyed);
-
-  {
-    TestEventListeners listeners;
-    TestEventListenersAccessor::SetDefaultResultPrinter(&listeners,
-                                                        default_result_printer);
-    TestEventListenersAccessor::SetDefaultXmlGenerator(&listeners,
-                                                       default_xml_printer);
-    listeners.Append(extra_listener);
-  }
-  EXPECT_TRUE(default_result_printer_is_destroyed);
-  EXPECT_TRUE(default_xml_printer_is_destroyed);
-  EXPECT_TRUE(extra_listener_is_destroyed);
-}
-
-// Tests that a listener Append'ed to a TestEventListeners list starts
-// receiving events.
-TEST(TestEventListenersTest, Append) {
-  int on_start_counter = 0;
-  bool is_destroyed = false;
-  TestListener* listener = new TestListener(&on_start_counter, &is_destroyed);
-  {
-    TestEventListeners listeners;
-    listeners.Append(listener);
-    TestEventListenersAccessor::GetRepeater(&listeners)
-        ->OnTestProgramStart(*UnitTest::GetInstance());
-    EXPECT_EQ(1, on_start_counter);
-  }
-  EXPECT_TRUE(is_destroyed);
-}
-
-// Tests that listeners receive events in the order they were appended to
-// the list, except for *End requests, which must be received in the reverse
-// order.
-class SequenceTestingListener : public EmptyTestEventListener {
- public:
-  SequenceTestingListener(std::vector<std::string>* vector, const char* id)
-      : vector_(vector), id_(id) {}
-
- protected:
-  void OnTestProgramStart(const UnitTest& /*unit_test*/) override {
-    vector_->push_back(GetEventDescription("OnTestProgramStart"));
-  }
-
-  void OnTestProgramEnd(const UnitTest& /*unit_test*/) override {
-    vector_->push_back(GetEventDescription("OnTestProgramEnd"));
-  }
-
-  void OnTestIterationStart(const UnitTest& /*unit_test*/,
-                            int /*iteration*/) override {
-    vector_->push_back(GetEventDescription("OnTestIterationStart"));
-  }
-
-  void OnTestIterationEnd(const UnitTest& /*unit_test*/,
-                          int /*iteration*/) override {
-    vector_->push_back(GetEventDescription("OnTestIterationEnd"));
-  }
-
- private:
-  std::string GetEventDescription(const char* method) {
-    Message message;
-    message << id_ << "." << method;
-    return message.GetString();
-  }
-
-  std::vector<std::string>* vector_;
-  const char* const id_;
-
-  SequenceTestingListener(const SequenceTestingListener&) = delete;
-  SequenceTestingListener& operator=(const SequenceTestingListener&) = delete;
-};
-
-TEST(EventListenerTest, AppendKeepsOrder) {
-  std::vector<std::string> vec;
-  TestEventListeners listeners;
-  listeners.Append(new SequenceTestingListener(&vec, "1st"));
-  listeners.Append(new SequenceTestingListener(&vec, "2nd"));
-  listeners.Append(new SequenceTestingListener(&vec, "3rd"));
-
-  TestEventListenersAccessor::GetRepeater(&listeners)
-      ->OnTestProgramStart(*UnitTest::GetInstance());
-  ASSERT_EQ(3U, vec.size());
-  EXPECT_STREQ("1st.OnTestProgramStart", vec[0].c_str());
-  EXPECT_STREQ("2nd.OnTestProgramStart", vec[1].c_str());
-  EXPECT_STREQ("3rd.OnTestProgramStart", vec[2].c_str());
-
-  vec.clear();
-  TestEventListenersAccessor::GetRepeater(&listeners)
-      ->OnTestProgramEnd(*UnitTest::GetInstance());
-  ASSERT_EQ(3U, vec.size());
-  EXPECT_STREQ("3rd.OnTestProgramEnd", vec[0].c_str());
-  EXPECT_STREQ("2nd.OnTestProgramEnd", vec[1].c_str());
-  EXPECT_STREQ("1st.OnTestProgramEnd", vec[2].c_str());
-
-  vec.clear();
-  TestEventListenersAccessor::GetRepeater(&listeners)
-      ->OnTestIterationStart(*UnitTest::GetInstance(), 0);
-  ASSERT_EQ(3U, vec.size());
-  EXPECT_STREQ("1st.OnTestIterationStart", vec[0].c_str());
-  EXPECT_STREQ("2nd.OnTestIterationStart", vec[1].c_str());
-  EXPECT_STREQ("3rd.OnTestIterationStart", vec[2].c_str());
-
-  vec.clear();
-  TestEventListenersAccessor::GetRepeater(&listeners)
-      ->OnTestIterationEnd(*UnitTest::GetInstance(), 0);
-  ASSERT_EQ(3U, vec.size());
-  EXPECT_STREQ("3rd.OnTestIterationEnd", vec[0].c_str());
-  EXPECT_STREQ("2nd.OnTestIterationEnd", vec[1].c_str());
-  EXPECT_STREQ("1st.OnTestIterationEnd", vec[2].c_str());
-}
-
-// Tests that a listener removed from a TestEventListeners list stops receiving
-// events and is not deleted when the list is destroyed.
-TEST(TestEventListenersTest, Release) {
-  int on_start_counter = 0;
-  bool is_destroyed = false;
-  // Although Append passes the ownership of this object to the list,
-  // the following calls release it, and we need to delete it before the
-  // test ends.
-  TestListener* listener = new TestListener(&on_start_counter, &is_destroyed);
-  {
-    TestEventListeners listeners;
-    listeners.Append(listener);
-    EXPECT_EQ(listener, listeners.Release(listener));
-    TestEventListenersAccessor::GetRepeater(&listeners)
-        ->OnTestProgramStart(*UnitTest::GetInstance());
-    EXPECT_TRUE(listeners.Release(listener) == nullptr);
-  }
-  EXPECT_EQ(0, on_start_counter);
-  EXPECT_FALSE(is_destroyed);
-  delete listener;
-}
-
-// Tests that no events are forwarded when event forwarding is disabled.
-TEST(EventListenerTest, SuppressEventForwarding) {
-  int on_start_counter = 0;
-  TestListener* listener = new TestListener(&on_start_counter, nullptr);
-
-  TestEventListeners listeners;
-  listeners.Append(listener);
-  ASSERT_TRUE(TestEventListenersAccessor::EventForwardingEnabled(listeners));
-  TestEventListenersAccessor::SuppressEventForwarding(&listeners);
-  ASSERT_FALSE(TestEventListenersAccessor::EventForwardingEnabled(listeners));
-  TestEventListenersAccessor::GetRepeater(&listeners)
-      ->OnTestProgramStart(*UnitTest::GetInstance());
-  EXPECT_EQ(0, on_start_counter);
-}
-
-// Tests that events generated by Google Test are not forwarded in
-// death test subprocesses.
-TEST(EventListenerDeathTest, EventsNotForwardedInDeathTestSubprecesses) {
-  EXPECT_DEATH_IF_SUPPORTED(
-      {
-        GTEST_CHECK_(TestEventListenersAccessor::EventForwardingEnabled(
-            *GetUnitTestImpl()->listeners()))
-            << "expected failure";
-      },
-      "expected failure");
-}
-
-// Tests that a listener installed via SetDefaultResultPrinter() starts
-// receiving events and is returned via default_result_printer() and that
-// the previous default_result_printer is removed from the list and deleted.
-TEST(EventListenerTest, default_result_printer) {
-  int on_start_counter = 0;
-  bool is_destroyed = false;
-  TestListener* listener = new TestListener(&on_start_counter, &is_destroyed);
-
-  TestEventListeners listeners;
-  TestEventListenersAccessor::SetDefaultResultPrinter(&listeners, listener);
-
-  EXPECT_EQ(listener, listeners.default_result_printer());
-
-  TestEventListenersAccessor::GetRepeater(&listeners)
-      ->OnTestProgramStart(*UnitTest::GetInstance());
-
-  EXPECT_EQ(1, on_start_counter);
-
-  // Replacing default_result_printer with something else should remove it
-  // from the list and destroy it.
-  TestEventListenersAccessor::SetDefaultResultPrinter(&listeners, nullptr);
-
-  EXPECT_TRUE(listeners.default_result_printer() == nullptr);
-  EXPECT_TRUE(is_destroyed);
-
-  // After broadcasting an event the counter is still the same, indicating
-  // the listener is not in the list anymore.
-  TestEventListenersAccessor::GetRepeater(&listeners)
-      ->OnTestProgramStart(*UnitTest::GetInstance());
-  EXPECT_EQ(1, on_start_counter);
-}
-
-// Tests that the default_result_printer listener stops receiving events
-// when removed via Release and that is not owned by the list anymore.
-TEST(EventListenerTest, RemovingDefaultResultPrinterWorks) {
-  int on_start_counter = 0;
-  bool is_destroyed = false;
-  // Although Append passes the ownership of this object to the list,
-  // the following calls release it, and we need to delete it before the
-  // test ends.
-  TestListener* listener = new TestListener(&on_start_counter, &is_destroyed);
-  {
-    TestEventListeners listeners;
-    TestEventListenersAccessor::SetDefaultResultPrinter(&listeners, listener);
-
-    EXPECT_EQ(listener, listeners.Release(listener));
-    EXPECT_TRUE(listeners.default_result_printer() == nullptr);
-    EXPECT_FALSE(is_destroyed);
-
-    // Broadcasting events now should not affect default_result_printer.
-    TestEventListenersAccessor::GetRepeater(&listeners)
-        ->OnTestProgramStart(*UnitTest::GetInstance());
-    EXPECT_EQ(0, on_start_counter);
-  }
-  // Destroying the list should not affect the listener now, too.
-  EXPECT_FALSE(is_destroyed);
-  delete listener;
-}
-
-// Tests that a listener installed via SetDefaultXmlGenerator() starts
-// receiving events and is returned via default_xml_generator() and that
-// the previous default_xml_generator is removed from the list and deleted.
-TEST(EventListenerTest, default_xml_generator) {
-  int on_start_counter = 0;
-  bool is_destroyed = false;
-  TestListener* listener = new TestListener(&on_start_counter, &is_destroyed);
-
-  TestEventListeners listeners;
-  TestEventListenersAccessor::SetDefaultXmlGenerator(&listeners, listener);
-
-  EXPECT_EQ(listener, listeners.default_xml_generator());
-
-  TestEventListenersAccessor::GetRepeater(&listeners)
-      ->OnTestProgramStart(*UnitTest::GetInstance());
-
-  EXPECT_EQ(1, on_start_counter);
-
-  // Replacing default_xml_generator with something else should remove it
-  // from the list and destroy it.
-  TestEventListenersAccessor::SetDefaultXmlGenerator(&listeners, nullptr);
-
-  EXPECT_TRUE(listeners.default_xml_generator() == nullptr);
-  EXPECT_TRUE(is_destroyed);
-
-  // After broadcasting an event the counter is still the same, indicating
-  // the listener is not in the list anymore.
-  TestEventListenersAccessor::GetRepeater(&listeners)
-      ->OnTestProgramStart(*UnitTest::GetInstance());
-  EXPECT_EQ(1, on_start_counter);
-}
-
-// Tests that the default_xml_generator listener stops receiving events
-// when removed via Release and that is not owned by the list anymore.
-TEST(EventListenerTest, RemovingDefaultXmlGeneratorWorks) {
-  int on_start_counter = 0;
-  bool is_destroyed = false;
-  // Although Append passes the ownership of this object to the list,
-  // the following calls release it, and we need to delete it before the
-  // test ends.
-  TestListener* listener = new TestListener(&on_start_counter, &is_destroyed);
-  {
-    TestEventListeners listeners;
-    TestEventListenersAccessor::SetDefaultXmlGenerator(&listeners, listener);
-
-    EXPECT_EQ(listener, listeners.Release(listener));
-    EXPECT_TRUE(listeners.default_xml_generator() == nullptr);
-    EXPECT_FALSE(is_destroyed);
-
-    // Broadcasting events now should not affect default_xml_generator.
-    TestEventListenersAccessor::GetRepeater(&listeners)
-        ->OnTestProgramStart(*UnitTest::GetInstance());
-    EXPECT_EQ(0, on_start_counter);
-  }
-  // Destroying the list should not affect the listener now, too.
-  EXPECT_FALSE(is_destroyed);
-  delete listener;
-}
-
-// Tests to ensure that the alternative, verbose spellings of
-// some of the macros work.  We don't test them thoroughly as that
-// would be quite involved.  Since their implementations are
-// straightforward, and they are rarely used, we'll just rely on the
-// users to tell us when they are broken.
-GTEST_TEST(AlternativeNameTest, Works) {  // GTEST_TEST is the same as TEST.
-  GTEST_SUCCEED() << "OK";  // GTEST_SUCCEED is the same as SUCCEED.
-
-  // GTEST_FAIL is the same as FAIL.
-  EXPECT_FATAL_FAILURE(GTEST_FAIL() << "An expected failure",
-                       "An expected failure");
-
-  // GTEST_ASSERT_XY is the same as ASSERT_XY.
-
-  GTEST_ASSERT_EQ(0, 0);
-  EXPECT_FATAL_FAILURE(GTEST_ASSERT_EQ(0, 1) << "An expected failure",
-                       "An expected failure");
-  EXPECT_FATAL_FAILURE(GTEST_ASSERT_EQ(1, 0) << "An expected failure",
-                       "An expected failure");
-
-  GTEST_ASSERT_NE(0, 1);
-  GTEST_ASSERT_NE(1, 0);
-  EXPECT_FATAL_FAILURE(GTEST_ASSERT_NE(0, 0) << "An expected failure",
-                       "An expected failure");
-
-  GTEST_ASSERT_LE(0, 0);
-  GTEST_ASSERT_LE(0, 1);
-  EXPECT_FATAL_FAILURE(GTEST_ASSERT_LE(1, 0) << "An expected failure",
-                       "An expected failure");
-
-  GTEST_ASSERT_LT(0, 1);
-  EXPECT_FATAL_FAILURE(GTEST_ASSERT_LT(0, 0) << "An expected failure",
-                       "An expected failure");
-  EXPECT_FATAL_FAILURE(GTEST_ASSERT_LT(1, 0) << "An expected failure",
-                       "An expected failure");
-
-  GTEST_ASSERT_GE(0, 0);
-  GTEST_ASSERT_GE(1, 0);
-  EXPECT_FATAL_FAILURE(GTEST_ASSERT_GE(0, 1) << "An expected failure",
-                       "An expected failure");
-
-  GTEST_ASSERT_GT(1, 0);
-  EXPECT_FATAL_FAILURE(GTEST_ASSERT_GT(0, 1) << "An expected failure",
-                       "An expected failure");
-  EXPECT_FATAL_FAILURE(GTEST_ASSERT_GT(1, 1) << "An expected failure",
-                       "An expected failure");
-}
-
-// Tests for internal utilities necessary for implementation of the universal
-// printing.
-
-class ConversionHelperBase {};
-class ConversionHelperDerived : public ConversionHelperBase {};
-
-struct HasDebugStringMethods {
-  std::string DebugString() const { return ""; }
-  std::string ShortDebugString() const { return ""; }
-};
-
-struct InheritsDebugStringMethods : public HasDebugStringMethods {};
-
-struct WrongTypeDebugStringMethod {
-  std::string DebugString() const { return ""; }
-  int ShortDebugString() const { return 1; }
-};
-
-struct NotConstDebugStringMethod {
-  std::string DebugString() { return ""; }
-  std::string ShortDebugString() const { return ""; }
-};
-
-struct MissingDebugStringMethod {
-  std::string DebugString() { return ""; }
-};
-
-struct IncompleteType;
-
-// Tests that HasDebugStringAndShortDebugString<T>::value is a compile-time
-// constant.
-TEST(HasDebugStringAndShortDebugStringTest, ValueIsCompileTimeConstant) {
-  static_assert(HasDebugStringAndShortDebugString<HasDebugStringMethods>::value,
-                "const_true");
-  static_assert(
-      HasDebugStringAndShortDebugString<InheritsDebugStringMethods>::value,
-      "const_true");
-  static_assert(HasDebugStringAndShortDebugString<
-                    const InheritsDebugStringMethods>::value,
-                "const_true");
-  static_assert(
-      !HasDebugStringAndShortDebugString<WrongTypeDebugStringMethod>::value,
-      "const_false");
-  static_assert(
-      !HasDebugStringAndShortDebugString<NotConstDebugStringMethod>::value,
-      "const_false");
-  static_assert(
-      !HasDebugStringAndShortDebugString<MissingDebugStringMethod>::value,
-      "const_false");
-  static_assert(!HasDebugStringAndShortDebugString<IncompleteType>::value,
-                "const_false");
-  static_assert(!HasDebugStringAndShortDebugString<int>::value, "const_false");
-}
-
-// Tests that HasDebugStringAndShortDebugString<T>::value is true when T has
-// needed methods.
-TEST(HasDebugStringAndShortDebugStringTest,
-     ValueIsTrueWhenTypeHasDebugStringAndShortDebugString) {
-  EXPECT_TRUE(
-      HasDebugStringAndShortDebugString<InheritsDebugStringMethods>::value);
-}
-
-// Tests that HasDebugStringAndShortDebugString<T>::value is false when T
-// doesn't have needed methods.
-TEST(HasDebugStringAndShortDebugStringTest,
-     ValueIsFalseWhenTypeIsNotAProtocolMessage) {
-  EXPECT_FALSE(HasDebugStringAndShortDebugString<int>::value);
-  EXPECT_FALSE(
-      HasDebugStringAndShortDebugString<const ConversionHelperBase>::value);
-}
-
-// Tests GTEST_REMOVE_REFERENCE_AND_CONST_.
-
-template <typename T1, typename T2>
-void TestGTestRemoveReferenceAndConst() {
-  static_assert(std::is_same<T1, GTEST_REMOVE_REFERENCE_AND_CONST_(T2)>::value,
-                "GTEST_REMOVE_REFERENCE_AND_CONST_ failed.");
-}
-
-TEST(RemoveReferenceToConstTest, Works) {
-  TestGTestRemoveReferenceAndConst<int, int>();
-  TestGTestRemoveReferenceAndConst<double, double&>();
-  TestGTestRemoveReferenceAndConst<char, const char>();
-  TestGTestRemoveReferenceAndConst<char, const char&>();
-  TestGTestRemoveReferenceAndConst<const char*, const char*>();
-}
-
-// Tests GTEST_REFERENCE_TO_CONST_.
-
-template <typename T1, typename T2>
-void TestGTestReferenceToConst() {
-  static_assert(std::is_same<T1, GTEST_REFERENCE_TO_CONST_(T2)>::value,
-                "GTEST_REFERENCE_TO_CONST_ failed.");
-}
-
-TEST(GTestReferenceToConstTest, Works) {
-  TestGTestReferenceToConst<const char&, char>();
-  TestGTestReferenceToConst<const int&, const int>();
-  TestGTestReferenceToConst<const double&, double>();
-  TestGTestReferenceToConst<const std::string&, const std::string&>();
-}
-
-// Tests IsContainerTest.
-
-class NonContainer {};
-
-TEST(IsContainerTestTest, WorksForNonContainer) {
-  EXPECT_EQ(sizeof(IsNotContainer), sizeof(IsContainerTest<int>(0)));
-  EXPECT_EQ(sizeof(IsNotContainer), sizeof(IsContainerTest<char[5]>(0)));
-  EXPECT_EQ(sizeof(IsNotContainer), sizeof(IsContainerTest<NonContainer>(0)));
-}
-
-TEST(IsContainerTestTest, WorksForContainer) {
-  EXPECT_EQ(sizeof(IsContainer), sizeof(IsContainerTest<std::vector<bool>>(0)));
-  EXPECT_EQ(sizeof(IsContainer),
-            sizeof(IsContainerTest<std::map<int, double>>(0)));
-}
-
-struct ConstOnlyContainerWithPointerIterator {
-  using const_iterator = int*;
-  const_iterator begin() const;
-  const_iterator end() const;
-};
-
-struct ConstOnlyContainerWithClassIterator {
-  struct const_iterator {
-    const int& operator*() const;
-    const_iterator& operator++(/* pre-increment */);
-  };
-  const_iterator begin() const;
-  const_iterator end() const;
-};
-
-TEST(IsContainerTestTest, ConstOnlyContainer) {
-  EXPECT_EQ(sizeof(IsContainer),
-            sizeof(IsContainerTest<ConstOnlyContainerWithPointerIterator>(0)));
-  EXPECT_EQ(sizeof(IsContainer),
-            sizeof(IsContainerTest<ConstOnlyContainerWithClassIterator>(0)));
-}
-
-// Tests IsHashTable.
-struct AHashTable {
-  typedef void hasher;
-};
-struct NotReallyAHashTable {
-  typedef void hasher;
-  typedef void reverse_iterator;
-};
-TEST(IsHashTable, Basic) {
-  EXPECT_TRUE(testing::internal::IsHashTable<AHashTable>::value);
-  EXPECT_FALSE(testing::internal::IsHashTable<NotReallyAHashTable>::value);
-  EXPECT_FALSE(testing::internal::IsHashTable<std::vector<int>>::value);
-  EXPECT_TRUE(testing::internal::IsHashTable<std::unordered_set<int>>::value);
-}
-
-// Tests ArrayEq().
-
-TEST(ArrayEqTest, WorksForDegeneratedArrays) {
-  EXPECT_TRUE(ArrayEq(5, 5L));
-  EXPECT_FALSE(ArrayEq('a', 0));
-}
-
-TEST(ArrayEqTest, WorksForOneDimensionalArrays) {
-  // Note that a and b are distinct but compatible types.
-  const int a[] = {0, 1};
-  long b[] = {0, 1};
-  EXPECT_TRUE(ArrayEq(a, b));
-  EXPECT_TRUE(ArrayEq(a, 2, b));
-
-  b[0] = 2;
-  EXPECT_FALSE(ArrayEq(a, b));
-  EXPECT_FALSE(ArrayEq(a, 1, b));
-}
-
-TEST(ArrayEqTest, WorksForTwoDimensionalArrays) {
-  const char a[][3] = {"hi", "lo"};
-  const char b[][3] = {"hi", "lo"};
-  const char c[][3] = {"hi", "li"};
-
-  EXPECT_TRUE(ArrayEq(a, b));
-  EXPECT_TRUE(ArrayEq(a, 2, b));
-
-  EXPECT_FALSE(ArrayEq(a, c));
-  EXPECT_FALSE(ArrayEq(a, 2, c));
-}
-
-// Tests ArrayAwareFind().
-
-TEST(ArrayAwareFindTest, WorksForOneDimensionalArray) {
-  const char a[] = "hello";
-  EXPECT_EQ(a + 4, ArrayAwareFind(a, a + 5, 'o'));
-  EXPECT_EQ(a + 5, ArrayAwareFind(a, a + 5, 'x'));
-}
-
-TEST(ArrayAwareFindTest, WorksForTwoDimensionalArray) {
-  int a[][2] = {{0, 1}, {2, 3}, {4, 5}};
-  const int b[2] = {2, 3};
-  EXPECT_EQ(a + 1, ArrayAwareFind(a, a + 3, b));
-
-  const int c[2] = {6, 7};
-  EXPECT_EQ(a + 3, ArrayAwareFind(a, a + 3, c));
-}
-
-// Tests CopyArray().
-
-TEST(CopyArrayTest, WorksForDegeneratedArrays) {
-  int n = 0;
-  CopyArray('a', &n);
-  EXPECT_EQ('a', n);
-}
-
-TEST(CopyArrayTest, WorksForOneDimensionalArrays) {
-  const char a[3] = "hi";
-  int b[3];
-#ifndef __BORLANDC__  // C++Builder cannot compile some array size deductions.
-  CopyArray(a, &b);
-  EXPECT_TRUE(ArrayEq(a, b));
-#endif
-
-  int c[3];
-  CopyArray(a, 3, c);
-  EXPECT_TRUE(ArrayEq(a, c));
-}
-
-TEST(CopyArrayTest, WorksForTwoDimensionalArrays) {
-  const int a[2][3] = {{0, 1, 2}, {3, 4, 5}};
-  int b[2][3];
-#ifndef __BORLANDC__  // C++Builder cannot compile some array size deductions.
-  CopyArray(a, &b);
-  EXPECT_TRUE(ArrayEq(a, b));
-#endif
-
-  int c[2][3];
-  CopyArray(a, 2, c);
-  EXPECT_TRUE(ArrayEq(a, c));
-}
-
-// Tests NativeArray.
-
-TEST(NativeArrayTest, ConstructorFromArrayWorks) {
-  const int a[3] = {0, 1, 2};
-  NativeArray<int> na(a, 3, RelationToSourceReference());
-  EXPECT_EQ(3U, na.size());
-  EXPECT_EQ(a, na.begin());
-}
-
-TEST(NativeArrayTest, CreatesAndDeletesCopyOfArrayWhenAskedTo) {
-  typedef int Array[2];
-  Array* a = new Array[1];
-  (*a)[0] = 0;
-  (*a)[1] = 1;
-  NativeArray<int> na(*a, 2, RelationToSourceCopy());
-  EXPECT_NE(*a, na.begin());
-  delete[] a;
-  EXPECT_EQ(0, na.begin()[0]);
-  EXPECT_EQ(1, na.begin()[1]);
-
-  // We rely on the heap checker to verify that na deletes the copy of
-  // array.
-}
-
-TEST(NativeArrayTest, TypeMembersAreCorrect) {
-  StaticAssertTypeEq<char, NativeArray<char>::value_type>();
-  StaticAssertTypeEq<int[2], NativeArray<int[2]>::value_type>();
-
-  StaticAssertTypeEq<const char*, NativeArray<char>::const_iterator>();
-  StaticAssertTypeEq<const bool(*)[2], NativeArray<bool[2]>::const_iterator>();
-}
-
-TEST(NativeArrayTest, MethodsWork) {
-  const int a[3] = {0, 1, 2};
-  NativeArray<int> na(a, 3, RelationToSourceCopy());
-  ASSERT_EQ(3U, na.size());
-  EXPECT_EQ(3, na.end() - na.begin());
-
-  NativeArray<int>::const_iterator it = na.begin();
-  EXPECT_EQ(0, *it);
-  ++it;
-  EXPECT_EQ(1, *it);
-  it++;
-  EXPECT_EQ(2, *it);
-  ++it;
-  EXPECT_EQ(na.end(), it);
-
-  EXPECT_TRUE(na == na);
-
-  NativeArray<int> na2(a, 3, RelationToSourceReference());
-  EXPECT_TRUE(na == na2);
-
-  const int b1[3] = {0, 1, 1};
-  const int b2[4] = {0, 1, 2, 3};
-  EXPECT_FALSE(na == NativeArray<int>(b1, 3, RelationToSourceReference()));
-  EXPECT_FALSE(na == NativeArray<int>(b2, 4, RelationToSourceCopy()));
-}
-
-TEST(NativeArrayTest, WorksForTwoDimensionalArray) {
-  const char a[2][3] = {"hi", "lo"};
-  NativeArray<char[3]> na(a, 2, RelationToSourceReference());
-  ASSERT_EQ(2U, na.size());
-  EXPECT_EQ(a, na.begin());
-}
-
-// IndexSequence
-TEST(IndexSequence, MakeIndexSequence) {
-  using testing::internal::IndexSequence;
-  using testing::internal::MakeIndexSequence;
-  EXPECT_TRUE(
-      (std::is_same<IndexSequence<>, MakeIndexSequence<0>::type>::value));
-  EXPECT_TRUE(
-      (std::is_same<IndexSequence<0>, MakeIndexSequence<1>::type>::value));
-  EXPECT_TRUE(
-      (std::is_same<IndexSequence<0, 1>, MakeIndexSequence<2>::type>::value));
-  EXPECT_TRUE((
-      std::is_same<IndexSequence<0, 1, 2>, MakeIndexSequence<3>::type>::value));
-  EXPECT_TRUE(
-      (std::is_base_of<IndexSequence<0, 1, 2>, MakeIndexSequence<3>>::value));
-}
-
-// ElemFromList
-TEST(ElemFromList, Basic) {
-  using testing::internal::ElemFromList;
-  EXPECT_TRUE(
-      (std::is_same<int, ElemFromList<0, int, double, char>::type>::value));
-  EXPECT_TRUE(
-      (std::is_same<double, ElemFromList<1, int, double, char>::type>::value));
-  EXPECT_TRUE(
-      (std::is_same<char, ElemFromList<2, int, double, char>::type>::value));
-  EXPECT_TRUE((
-      std::is_same<char, ElemFromList<7, int, int, int, int, int, int, int,
-                                      char, int, int, int, int>::type>::value));
-}
-
-// FlatTuple
-TEST(FlatTuple, Basic) {
-  using testing::internal::FlatTuple;
-
-  FlatTuple<int, double, const char*> tuple = {};
-  EXPECT_EQ(0, tuple.Get<0>());
-  EXPECT_EQ(0.0, tuple.Get<1>());
-  EXPECT_EQ(nullptr, tuple.Get<2>());
-
-  tuple = FlatTuple<int, double, const char*>(
-      testing::internal::FlatTupleConstructTag{}, 7, 3.2, "Foo");
-  EXPECT_EQ(7, tuple.Get<0>());
-  EXPECT_EQ(3.2, tuple.Get<1>());
-  EXPECT_EQ(std::string("Foo"), tuple.Get<2>());
-
-  tuple.Get<1>() = 5.1;
-  EXPECT_EQ(5.1, tuple.Get<1>());
-}
-
-namespace {
-std::string AddIntToString(int i, const std::string& s) {
-  return s + std::to_string(i);
-}
-}  // namespace
-
-TEST(FlatTuple, Apply) {
-  using testing::internal::FlatTuple;
-
-  FlatTuple<int, std::string> tuple{testing::internal::FlatTupleConstructTag{},
-                                    5, "Hello"};
-
-  // Lambda.
-  EXPECT_TRUE(tuple.Apply([](int i, const std::string& s) -> bool {
-    return i == static_cast<int>(s.size());
-  }));
-
-  // Function.
-  EXPECT_EQ(tuple.Apply(AddIntToString), "Hello5");
-
-  // Mutating operations.
-  tuple.Apply([](int& i, std::string& s) {
-    ++i;
-    s += s;
-  });
-  EXPECT_EQ(tuple.Get<0>(), 6);
-  EXPECT_EQ(tuple.Get<1>(), "HelloHello");
-}
-
-struct ConstructionCounting {
-  ConstructionCounting() { ++default_ctor_calls; }
-  ~ConstructionCounting() { ++dtor_calls; }
-  ConstructionCounting(const ConstructionCounting&) { ++copy_ctor_calls; }
-  ConstructionCounting(ConstructionCounting&&) noexcept { ++move_ctor_calls; }
-  ConstructionCounting& operator=(const ConstructionCounting&) {
-    ++copy_assignment_calls;
-    return *this;
-  }
-  ConstructionCounting& operator=(ConstructionCounting&&) noexcept {
-    ++move_assignment_calls;
-    return *this;
-  }
-
-  static void Reset() {
-    default_ctor_calls = 0;
-    dtor_calls = 0;
-    copy_ctor_calls = 0;
-    move_ctor_calls = 0;
-    copy_assignment_calls = 0;
-    move_assignment_calls = 0;
-  }
-
-  static int default_ctor_calls;
-  static int dtor_calls;
-  static int copy_ctor_calls;
-  static int move_ctor_calls;
-  static int copy_assignment_calls;
-  static int move_assignment_calls;
-};
-
-int ConstructionCounting::default_ctor_calls = 0;
-int ConstructionCounting::dtor_calls = 0;
-int ConstructionCounting::copy_ctor_calls = 0;
-int ConstructionCounting::move_ctor_calls = 0;
-int ConstructionCounting::copy_assignment_calls = 0;
-int ConstructionCounting::move_assignment_calls = 0;
-
-TEST(FlatTuple, ConstructorCalls) {
-  using testing::internal::FlatTuple;
-
-  // Default construction.
-  ConstructionCounting::Reset();
-  { FlatTuple<ConstructionCounting> tuple; }
-  EXPECT_EQ(ConstructionCounting::default_ctor_calls, 1);
-  EXPECT_EQ(ConstructionCounting::dtor_calls, 1);
-  EXPECT_EQ(ConstructionCounting::copy_ctor_calls, 0);
-  EXPECT_EQ(ConstructionCounting::move_ctor_calls, 0);
-  EXPECT_EQ(ConstructionCounting::copy_assignment_calls, 0);
-  EXPECT_EQ(ConstructionCounting::move_assignment_calls, 0);
-
-  // Copy construction.
-  ConstructionCounting::Reset();
-  {
-    ConstructionCounting elem;
-    FlatTuple<ConstructionCounting> tuple{
-        testing::internal::FlatTupleConstructTag{}, elem};
-  }
-  EXPECT_EQ(ConstructionCounting::default_ctor_calls, 1);
-  EXPECT_EQ(ConstructionCounting::dtor_calls, 2);
-  EXPECT_EQ(ConstructionCounting::copy_ctor_calls, 1);
-  EXPECT_EQ(ConstructionCounting::move_ctor_calls, 0);
-  EXPECT_EQ(ConstructionCounting::copy_assignment_calls, 0);
-  EXPECT_EQ(ConstructionCounting::move_assignment_calls, 0);
-
-  // Move construction.
-  ConstructionCounting::Reset();
-  {
-    FlatTuple<ConstructionCounting> tuple{
-        testing::internal::FlatTupleConstructTag{}, ConstructionCounting{}};
-  }
-  EXPECT_EQ(ConstructionCounting::default_ctor_calls, 1);
-  EXPECT_EQ(ConstructionCounting::dtor_calls, 2);
-  EXPECT_EQ(ConstructionCounting::copy_ctor_calls, 0);
-  EXPECT_EQ(ConstructionCounting::move_ctor_calls, 1);
-  EXPECT_EQ(ConstructionCounting::copy_assignment_calls, 0);
-  EXPECT_EQ(ConstructionCounting::move_assignment_calls, 0);
-
-  // Copy assignment.
-  // TODO(ofats): it should be testing assignment operator of FlatTuple, not its
-  // elements
-  ConstructionCounting::Reset();
-  {
-    FlatTuple<ConstructionCounting> tuple;
-    ConstructionCounting elem;
-    tuple.Get<0>() = elem;
-  }
-  EXPECT_EQ(ConstructionCounting::default_ctor_calls, 2);
-  EXPECT_EQ(ConstructionCounting::dtor_calls, 2);
-  EXPECT_EQ(ConstructionCounting::copy_ctor_calls, 0);
-  EXPECT_EQ(ConstructionCounting::move_ctor_calls, 0);
-  EXPECT_EQ(ConstructionCounting::copy_assignment_calls, 1);
-  EXPECT_EQ(ConstructionCounting::move_assignment_calls, 0);
-
-  // Move assignment.
-  // TODO(ofats): it should be testing assignment operator of FlatTuple, not its
-  // elements
-  ConstructionCounting::Reset();
-  {
-    FlatTuple<ConstructionCounting> tuple;
-    tuple.Get<0>() = ConstructionCounting{};
-  }
-  EXPECT_EQ(ConstructionCounting::default_ctor_calls, 2);
-  EXPECT_EQ(ConstructionCounting::dtor_calls, 2);
-  EXPECT_EQ(ConstructionCounting::copy_ctor_calls, 0);
-  EXPECT_EQ(ConstructionCounting::move_ctor_calls, 0);
-  EXPECT_EQ(ConstructionCounting::copy_assignment_calls, 0);
-  EXPECT_EQ(ConstructionCounting::move_assignment_calls, 1);
-
-  ConstructionCounting::Reset();
-}
-
-TEST(FlatTuple, ManyTypes) {
-  using testing::internal::FlatTuple;
-
-  // Instantiate FlatTuple with 257 ints.
-  // Tests show that we can do it with thousands of elements, but very long
-  // compile times makes it unusuitable for this test.
-#define GTEST_FLAT_TUPLE_INT8 int, int, int, int, int, int, int, int,
-#define GTEST_FLAT_TUPLE_INT16 GTEST_FLAT_TUPLE_INT8 GTEST_FLAT_TUPLE_INT8
-#define GTEST_FLAT_TUPLE_INT32 GTEST_FLAT_TUPLE_INT16 GTEST_FLAT_TUPLE_INT16
-#define GTEST_FLAT_TUPLE_INT64 GTEST_FLAT_TUPLE_INT32 GTEST_FLAT_TUPLE_INT32
-#define GTEST_FLAT_TUPLE_INT128 GTEST_FLAT_TUPLE_INT64 GTEST_FLAT_TUPLE_INT64
-#define GTEST_FLAT_TUPLE_INT256 GTEST_FLAT_TUPLE_INT128 GTEST_FLAT_TUPLE_INT128
-
-  // Let's make sure that we can have a very long list of types without blowing
-  // up the template instantiation depth.
-  FlatTuple<GTEST_FLAT_TUPLE_INT256 int> tuple;
-
-  tuple.Get<0>() = 7;
-  tuple.Get<99>() = 17;
-  tuple.Get<256>() = 1000;
-  EXPECT_EQ(7, tuple.Get<0>());
-  EXPECT_EQ(17, tuple.Get<99>());
-  EXPECT_EQ(1000, tuple.Get<256>());
-}
-
-// Tests SkipPrefix().
-
-TEST(SkipPrefixTest, SkipsWhenPrefixMatches) {
-  const char* const str = "hello";
-
-  const char* p = str;
-  EXPECT_TRUE(SkipPrefix("", &p));
-  EXPECT_EQ(str, p);
-
-  p = str;
-  EXPECT_TRUE(SkipPrefix("hell", &p));
-  EXPECT_EQ(str + 4, p);
-}
-
-TEST(SkipPrefixTest, DoesNotSkipWhenPrefixDoesNotMatch) {
-  const char* const str = "world";
-
-  const char* p = str;
-  EXPECT_FALSE(SkipPrefix("W", &p));
-  EXPECT_EQ(str, p);
-
-  p = str;
-  EXPECT_FALSE(SkipPrefix("world!", &p));
-  EXPECT_EQ(str, p);
-}
-
-// Tests ad_hoc_test_result().
-TEST(AdHocTestResultTest, AdHocTestResultForUnitTestDoesNotShowFailure) {
-  const testing::TestResult& test_result =
-      testing::UnitTest::GetInstance()->ad_hoc_test_result();
-  EXPECT_FALSE(test_result.Failed());
-}
-
-class DynamicUnitTestFixture : public testing::Test {};
-
-class DynamicTest : public DynamicUnitTestFixture {
-  void TestBody() override { EXPECT_TRUE(true); }
-};
-
-auto* dynamic_test = testing::RegisterTest(
-    "DynamicUnitTestFixture", "DynamicTest", "TYPE", "VALUE", __FILE__,
-    __LINE__, []() -> DynamicUnitTestFixture* { return new DynamicTest; });
-
-TEST(RegisterTest, WasRegistered) {
-  const auto& unittest = testing::UnitTest::GetInstance();
-  for (int i = 0; i < unittest->total_test_suite_count(); ++i) {
-    auto* tests = unittest->GetTestSuite(i);
-    if (tests->name() != std::string("DynamicUnitTestFixture")) continue;
-    for (int j = 0; j < tests->total_test_count(); ++j) {
-      if (tests->GetTestInfo(j)->name() != std::string("DynamicTest")) continue;
-      // Found it.
-      EXPECT_STREQ(tests->GetTestInfo(j)->value_param(), "VALUE");
-      EXPECT_STREQ(tests->GetTestInfo(j)->type_param(), "TYPE");
-      return;
-    }
-  }
-
-  FAIL() << "Didn't find the test!";
-}
-
-// Test that the pattern globbing algorithm is linear. If not, this test should
-// time out.
-TEST(PatternGlobbingTest, MatchesFilterLinearRuntime) {
-  std::string name(100, 'a');  // Construct the string (a^100)b
-  name.push_back('b');
-
-  std::string pattern;  // Construct the string ((a*)^100)b
-  for (int i = 0; i < 100; ++i) {
-    pattern.append("a*");
-  }
-  pattern.push_back('b');
-
-  EXPECT_TRUE(
-      testing::internal::UnitTestOptions::MatchesFilter(name, pattern.c_str()));
-}
-
-TEST(PatternGlobbingTest, MatchesFilterWithMultiplePatterns) {
-  const std::string name = "aaaa";
-  EXPECT_TRUE(testing::internal::UnitTestOptions::MatchesFilter(name, "a*"));
-  EXPECT_TRUE(testing::internal::UnitTestOptions::MatchesFilter(name, "a*:"));
-  EXPECT_FALSE(testing::internal::UnitTestOptions::MatchesFilter(name, "ab"));
-  EXPECT_FALSE(testing::internal::UnitTestOptions::MatchesFilter(name, "ab:"));
-  EXPECT_TRUE(testing::internal::UnitTestOptions::MatchesFilter(name, "ab:a*"));
-}
-
-TEST(PatternGlobbingTest, MatchesFilterEdgeCases) {
-  EXPECT_FALSE(testing::internal::UnitTestOptions::MatchesFilter("", "*a"));
-  EXPECT_TRUE(testing::internal::UnitTestOptions::MatchesFilter("", "*"));
-  EXPECT_FALSE(testing::internal::UnitTestOptions::MatchesFilter("a", ""));
-  EXPECT_TRUE(testing::internal::UnitTestOptions::MatchesFilter("", ""));
-}
diff --git a/3rdparty/googletest-1.13.0/googletest/test/gtest_xml_outfile1_test_.cc b/3rdparty/googletest-1.13.0/googletest/test/gtest_xml_outfile1_test_.cc
deleted file mode 100644
index 19aa252a3010fd56eabe3768308a80d2dc6dbd5a..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/test/gtest_xml_outfile1_test_.cc
+++ /dev/null
@@ -1,43 +0,0 @@
-// Copyright 2008, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-// gtest_xml_outfile1_test_ writes some xml via TestProperty used by
-// gtest_xml_outfiles_test.py
-
-#include "gtest/gtest.h"
-
-class PropertyOne : public testing::Test {
- protected:
-  void SetUp() override { RecordProperty("SetUpProp", 1); }
-  void TearDown() override { RecordProperty("TearDownProp", 1); }
-};
-
-TEST_F(PropertyOne, TestSomeProperties) {
-  RecordProperty("TestSomeProperty", 1);
-}
diff --git a/3rdparty/googletest-1.13.0/googletest/test/gtest_xml_outfile2_test_.cc b/3rdparty/googletest-1.13.0/googletest/test/gtest_xml_outfile2_test_.cc
deleted file mode 100644
index f743b31b7206ecc480aeac5f3564e1811458ecec..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/test/gtest_xml_outfile2_test_.cc
+++ /dev/null
@@ -1,45 +0,0 @@
-// Copyright 2008, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-// gtest_xml_outfile2_test_ writes some xml via TestProperty used by
-// gtest_xml_outfiles_test.py
-
-#include "gtest/gtest.h"
-
-class PropertyTwo : public testing::Test {
- protected:
-  void SetUp() override { RecordProperty("SetUpProp", 2); }
-  void TearDown() override { RecordProperty("TearDownProp", 2); }
-};
-
-TEST_F(PropertyTwo, TestSomeProperties) {
-  // Validate we can write an unsigned size_t as a property
-  size_t prop_two = 2;
-  RecordProperty("TestSomeProperty", prop_two);
-}
diff --git a/3rdparty/googletest-1.13.0/googletest/test/gtest_xml_outfiles_test.py b/3rdparty/googletest-1.13.0/googletest/test/gtest_xml_outfiles_test.py
deleted file mode 100644
index c129e64b90515ffa404a9ac505e0366bd109e013..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/test/gtest_xml_outfiles_test.py
+++ /dev/null
@@ -1,135 +0,0 @@
-#!/usr/bin/env python
-#
-# Copyright 2008, Google Inc.
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met:
-#
-#     * Redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer.
-#     * Redistributions in binary form must reproduce the above
-# copyright notice, this list of conditions and the following disclaimer
-# in the documentation and/or other materials provided with the
-# distribution.
-#     * Neither the name of Google Inc. nor the names of its
-# contributors may be used to endorse or promote products derived from
-# this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-"""Unit test for the gtest_xml_output module."""
-
-import os
-from xml.dom import minidom, Node
-from googletest.test import gtest_test_utils
-from googletest.test import gtest_xml_test_utils
-
-GTEST_OUTPUT_SUBDIR = "xml_outfiles"
-GTEST_OUTPUT_1_TEST = "gtest_xml_outfile1_test_"
-GTEST_OUTPUT_2_TEST = "gtest_xml_outfile2_test_"
-
-EXPECTED_XML_1 = """<?xml version="1.0" encoding="UTF-8"?>
-<testsuites tests="1" failures="0" disabled="0" errors="0" time="*" timestamp="*" name="AllTests">
-  <testsuite name="PropertyOne" tests="1" failures="0" skipped="0" disabled="0" errors="0" time="*" timestamp="*">
-    <testcase name="TestSomeProperties" file="gtest_xml_outfile1_test_.cc" line="41" status="run" result="completed" time="*" timestamp="*" classname="PropertyOne">
-      <properties>
-        <property name="SetUpProp" value="1"/>
-        <property name="TestSomeProperty" value="1"/>
-        <property name="TearDownProp" value="1"/>
-      </properties>
-    </testcase>
-  </testsuite>
-</testsuites>
-"""
-
-EXPECTED_XML_2 = """<?xml version="1.0" encoding="UTF-8"?>
-<testsuites tests="1" failures="0" disabled="0" errors="0" time="*" timestamp="*" name="AllTests">
-  <testsuite name="PropertyTwo" tests="1" failures="0" skipped="0" disabled="0" errors="0" time="*" timestamp="*">
-    <testcase name="TestSomeProperties" file="gtest_xml_outfile2_test_.cc" line="41" status="run" result="completed" time="*" timestamp="*" classname="PropertyTwo">
-      <properties>
-        <property name="SetUpProp" value="2"/>
-        <property name="TestSomeProperty" value="2"/>
-        <property name="TearDownProp" value="2"/>
-      </properties>
-    </testcase>
-  </testsuite>
-</testsuites>
-"""
-
-
-class GTestXMLOutFilesTest(gtest_xml_test_utils.GTestXMLTestCase):
-  """Unit test for Google Test's XML output functionality."""
-
-  def setUp(self):
-    # We want the trailing '/' that the last "" provides in os.path.join, for
-    # telling Google Test to create an output directory instead of a single file
-    # for xml output.
-    self.output_dir_ = os.path.join(gtest_test_utils.GetTempDir(),
-                                    GTEST_OUTPUT_SUBDIR, "")
-    self.DeleteFilesAndDir()
-
-  def tearDown(self):
-    self.DeleteFilesAndDir()
-
-  def DeleteFilesAndDir(self):
-    try:
-      os.remove(os.path.join(self.output_dir_, GTEST_OUTPUT_1_TEST + ".xml"))
-    except os.error:
-      pass
-    try:
-      os.remove(os.path.join(self.output_dir_, GTEST_OUTPUT_2_TEST + ".xml"))
-    except os.error:
-      pass
-    try:
-      os.rmdir(self.output_dir_)
-    except os.error:
-      pass
-
-  def testOutfile1(self):
-    self._TestOutFile(GTEST_OUTPUT_1_TEST, EXPECTED_XML_1)
-
-  def testOutfile2(self):
-    self._TestOutFile(GTEST_OUTPUT_2_TEST, EXPECTED_XML_2)
-
-  def _TestOutFile(self, test_name, expected_xml):
-    gtest_prog_path = gtest_test_utils.GetTestExecutablePath(test_name)
-    command = [gtest_prog_path, "--gtest_output=xml:%s" % self.output_dir_]
-    p = gtest_test_utils.Subprocess(command,
-                                    working_dir=gtest_test_utils.GetTempDir())
-    self.assert_(p.exited)
-    self.assertEquals(0, p.exit_code)
-
-    output_file_name1 = test_name + ".xml"
-    output_file1 = os.path.join(self.output_dir_, output_file_name1)
-    output_file_name2 = 'lt-' + output_file_name1
-    output_file2 = os.path.join(self.output_dir_, output_file_name2)
-    self.assert_(os.path.isfile(output_file1) or os.path.isfile(output_file2),
-                 output_file1)
-
-    expected = minidom.parseString(expected_xml)
-    if os.path.isfile(output_file1):
-      actual = minidom.parse(output_file1)
-    else:
-      actual = minidom.parse(output_file2)
-    self.NormalizeXml(actual.documentElement)
-    self.AssertEquivalentNodes(expected.documentElement,
-                               actual.documentElement)
-    expected.unlink()
-    actual.unlink()
-
-
-if __name__ == "__main__":
-  os.environ["GTEST_STACK_TRACE_DEPTH"] = "0"
-  gtest_test_utils.Main()
diff --git a/3rdparty/googletest-1.13.0/googletest/test/gtest_xml_output_unittest.py b/3rdparty/googletest-1.13.0/googletest/test/gtest_xml_output_unittest.py
deleted file mode 100644
index e1b7f1fc83d44501ba3b5f2d4ac5993422464a9d..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/test/gtest_xml_output_unittest.py
+++ /dev/null
@@ -1,415 +0,0 @@
-#!/usr/bin/env python
-#
-# Copyright 2006, Google Inc.
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met:
-#
-#     * Redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer.
-#     * Redistributions in binary form must reproduce the above
-# copyright notice, this list of conditions and the following disclaimer
-# in the documentation and/or other materials provided with the
-# distribution.
-#     * Neither the name of Google Inc. nor the names of its
-# contributors may be used to endorse or promote products derived from
-# this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-"""Unit test for the gtest_xml_output module"""
-
-import datetime
-import errno
-import os
-import re
-import sys
-from xml.dom import minidom, Node
-
-from googletest.test import gtest_test_utils
-from googletest.test import gtest_xml_test_utils
-
-GTEST_FILTER_FLAG = '--gtest_filter'
-GTEST_LIST_TESTS_FLAG = '--gtest_list_tests'
-GTEST_OUTPUT_FLAG = '--gtest_output'
-GTEST_DEFAULT_OUTPUT_FILE = 'test_detail.xml'
-GTEST_PROGRAM_NAME = 'gtest_xml_output_unittest_'
-
-# The flag indicating stacktraces are not supported
-NO_STACKTRACE_SUPPORT_FLAG = '--no_stacktrace_support'
-
-# The environment variables for test sharding.
-TOTAL_SHARDS_ENV_VAR = 'GTEST_TOTAL_SHARDS'
-SHARD_INDEX_ENV_VAR = 'GTEST_SHARD_INDEX'
-SHARD_STATUS_FILE_ENV_VAR = 'GTEST_SHARD_STATUS_FILE'
-
-SUPPORTS_STACK_TRACES = NO_STACKTRACE_SUPPORT_FLAG not in sys.argv
-
-if SUPPORTS_STACK_TRACES:
-  STACK_TRACE_TEMPLATE = '\nStack trace:\n*'
-else:
-  STACK_TRACE_TEMPLATE = ''
-  # unittest.main() can't handle unknown flags
-  sys.argv.remove(NO_STACKTRACE_SUPPORT_FLAG)
-
-EXPECTED_NON_EMPTY_XML = """<?xml version="1.0" encoding="UTF-8"?>
-<testsuites tests="26" failures="5" disabled="2" errors="0" time="*" timestamp="*" name="AllTests" ad_hoc_property="42">
-  <testsuite name="SuccessfulTest" tests="1" failures="0" disabled="0" skipped="0" errors="0" time="*" timestamp="*">
-    <testcase name="Succeeds" file="gtest_xml_output_unittest_.cc" line="51" status="run" result="completed" time="*" timestamp="*" classname="SuccessfulTest"/>
-  </testsuite>
-  <testsuite name="FailedTest" tests="1" failures="1" disabled="0" skipped="0" errors="0" time="*" timestamp="*">
-    <testcase name="Fails" file="gtest_xml_output_unittest_.cc" line="59" status="run" result="completed" time="*" timestamp="*" classname="FailedTest">
-      <failure message="gtest_xml_output_unittest_.cc:*&#x0A;Expected equality of these values:&#x0A;  1&#x0A;  2" type=""><![CDATA[gtest_xml_output_unittest_.cc:*
-Expected equality of these values:
-  1
-  2%(stack)s]]></failure>
-    </testcase>
-  </testsuite>
-  <testsuite name="MixedResultTest" tests="3" failures="1" disabled="1" skipped="0" errors="0" time="*" timestamp="*">
-    <testcase name="Succeeds" file="gtest_xml_output_unittest_.cc" line="86" status="run" result="completed" time="*" timestamp="*" classname="MixedResultTest"/>
-    <testcase name="Fails" file="gtest_xml_output_unittest_.cc" line="91" status="run" result="completed" time="*" timestamp="*" classname="MixedResultTest">
-      <failure message="gtest_xml_output_unittest_.cc:*&#x0A;Expected equality of these values:&#x0A;  1&#x0A;  2" type=""><![CDATA[gtest_xml_output_unittest_.cc:*
-Expected equality of these values:
-  1
-  2%(stack)s]]></failure>
-      <failure message="gtest_xml_output_unittest_.cc:*&#x0A;Expected equality of these values:&#x0A;  2&#x0A;  3" type=""><![CDATA[gtest_xml_output_unittest_.cc:*
-Expected equality of these values:
-  2
-  3%(stack)s]]></failure>
-    </testcase>
-    <testcase name="DISABLED_test" file="gtest_xml_output_unittest_.cc" line="96" status="notrun" result="suppressed" time="*" timestamp="*" classname="MixedResultTest"/>
-  </testsuite>
-  <testsuite name="XmlQuotingTest" tests="1" failures="1" disabled="0" skipped="0" errors="0" time="*" timestamp="*">
-    <testcase name="OutputsCData" file="gtest_xml_output_unittest_.cc" line="100" status="run" result="completed" time="*" timestamp="*" classname="XmlQuotingTest">
-      <failure message="gtest_xml_output_unittest_.cc:*&#x0A;Failed&#x0A;XML output: &lt;?xml encoding=&quot;utf-8&quot;&gt;&lt;top&gt;&lt;![CDATA[cdata text]]&gt;&lt;/top&gt;" type=""><![CDATA[gtest_xml_output_unittest_.cc:*
-Failed
-XML output: <?xml encoding="utf-8"><top><![CDATA[cdata text]]>]]&gt;<![CDATA[</top>%(stack)s]]></failure>
-    </testcase>
-  </testsuite>
-  <testsuite name="InvalidCharactersTest" tests="1" failures="1" disabled="0" skipped="0" errors="0" time="*" timestamp="*">
-    <testcase name="InvalidCharactersInMessage" file="gtest_xml_output_unittest_.cc" line="107" status="run" result="completed" time="*" timestamp="*" classname="InvalidCharactersTest">
-      <failure message="gtest_xml_output_unittest_.cc:*&#x0A;Failed&#x0A;Invalid characters in brackets []" type=""><![CDATA[gtest_xml_output_unittest_.cc:*
-Failed
-Invalid characters in brackets []%(stack)s]]></failure>
-    </testcase>
-  </testsuite>
-  <testsuite name="DisabledTest" tests="1" failures="0" disabled="1" skipped="0" errors="0" time="*" timestamp="*">
-    <testcase name="DISABLED_test_not_run" file="gtest_xml_output_unittest_.cc" line="66" status="notrun" result="suppressed" time="*" timestamp="*" classname="DisabledTest"/>
-  </testsuite>
-  <testsuite name="SkippedTest" tests="3" failures="1" disabled="0" skipped="2" errors="0" time="*" timestamp="*">
-    <testcase name="Skipped" status="run" file="gtest_xml_output_unittest_.cc" line="73" result="skipped" time="*" timestamp="*" classname="SkippedTest">
-      <skipped message="gtest_xml_output_unittest_.cc:*&#x0A;"><![CDATA[gtest_xml_output_unittest_.cc:*
-%(stack)s]]></skipped>
-    </testcase>
-    <testcase name="SkippedWithMessage" file="gtest_xml_output_unittest_.cc" line="77" status="run" result="skipped" time="*" timestamp="*" classname="SkippedTest">
-      <skipped message="gtest_xml_output_unittest_.cc:*&#x0A;It is good practice to tell why you skip a test."><![CDATA[gtest_xml_output_unittest_.cc:*
-It is good practice to tell why you skip a test.%(stack)s]]></skipped>
-    </testcase>
-    <testcase name="SkippedAfterFailure" file="gtest_xml_output_unittest_.cc" line="81" status="run" result="completed" time="*" timestamp="*" classname="SkippedTest">
-      <failure message="gtest_xml_output_unittest_.cc:*&#x0A;Expected equality of these values:&#x0A;  1&#x0A;  2" type=""><![CDATA[gtest_xml_output_unittest_.cc:*
-Expected equality of these values:
-  1
-  2%(stack)s]]></failure>
-      <skipped message="gtest_xml_output_unittest_.cc:*&#x0A;It is good practice to tell why you skip a test."><![CDATA[gtest_xml_output_unittest_.cc:*
-It is good practice to tell why you skip a test.%(stack)s]]></skipped>
-    </testcase>
-
-  </testsuite>
-  <testsuite name="PropertyRecordingTest" tests="4" failures="0" disabled="0" skipped="0" errors="0" time="*" timestamp="*" SetUpTestSuite="yes" TearDownTestSuite="aye">
-    <testcase name="OneProperty" file="gtest_xml_output_unittest_.cc" line="119" status="run" result="completed" time="*" timestamp="*" classname="PropertyRecordingTest">
-      <properties>
-        <property name="key_1" value="1"/>
-      </properties>
-    </testcase>
-    <testcase name="IntValuedProperty" file="gtest_xml_output_unittest_.cc" line="123" status="run" result="completed" time="*" timestamp="*" classname="PropertyRecordingTest">
-      <properties>
-        <property name="key_int" value="1"/>
-      </properties>
-    </testcase>
-    <testcase name="ThreeProperties" file="gtest_xml_output_unittest_.cc" line="127" status="run" result="completed" time="*" timestamp="*" classname="PropertyRecordingTest">
-      <properties>
-        <property name="key_1" value="1"/>
-        <property name="key_2" value="2"/>
-        <property name="key_3" value="3"/>
-      </properties>
-    </testcase>
-    <testcase name="TwoValuesForOneKeyUsesLastValue" file="gtest_xml_output_unittest_.cc" line="133" status="run" result="completed" time="*" timestamp="*" classname="PropertyRecordingTest">
-      <properties>
-        <property name="key_1" value="2"/>
-      </properties>
-    </testcase>
-  </testsuite>
-  <testsuite name="NoFixtureTest" tests="3" failures="0" disabled="0" skipped="0" errors="0" time="*" timestamp="*">
-     <testcase name="RecordProperty" file="gtest_xml_output_unittest_.cc" line="138" status="run" result="completed" time="*" timestamp="*" classname="NoFixtureTest">
-       <properties>
-         <property name="key" value="1"/>
-       </properties>
-     </testcase>
-     <testcase name="ExternalUtilityThatCallsRecordIntValuedProperty" file="gtest_xml_output_unittest_.cc" line="151" status="run" result="completed" time="*" timestamp="*" classname="NoFixtureTest">
-       <properties>
-         <property name="key_for_utility_int" value="1"/>
-       </properties>
-     </testcase>
-     <testcase name="ExternalUtilityThatCallsRecordStringValuedProperty" file="gtest_xml_output_unittest_.cc" line="155" status="run" result="completed" time="*" timestamp="*" classname="NoFixtureTest">
-       <properties>
-         <property name="key_for_utility_string" value="1"/>
-       </properties>
-     </testcase>
-  </testsuite>
-  <testsuite name="Single/ValueParamTest" tests="4" failures="0" disabled="0" skipped="0" errors="0" time="*" timestamp="*">
-    <testcase name="HasValueParamAttribute/0" file="gtest_xml_output_unittest_.cc" line="162" value_param="33" status="run" result="completed" time="*" timestamp="*" classname="Single/ValueParamTest" />
-    <testcase name="HasValueParamAttribute/1" file="gtest_xml_output_unittest_.cc" line="162" value_param="42" status="run" result="completed" time="*" timestamp="*" classname="Single/ValueParamTest" />
-    <testcase name="AnotherTestThatHasValueParamAttribute/0" file="gtest_xml_output_unittest_.cc" line="163" value_param="33" status="run" result="completed" time="*" timestamp="*" classname="Single/ValueParamTest" />
-    <testcase name="AnotherTestThatHasValueParamAttribute/1" file="gtest_xml_output_unittest_.cc" line="163" value_param="42" status="run" result="completed" time="*" timestamp="*" classname="Single/ValueParamTest" />
-  </testsuite>
-  <testsuite name="TypedTest/0" tests="1" failures="0" disabled="0" skipped="0" errors="0" time="*" timestamp="*">
-    <testcase name="HasTypeParamAttribute" file="gtest_xml_output_unittest_.cc" line="171" type_param="*" status="run" result="completed" time="*" timestamp="*" classname="TypedTest/0" />
-  </testsuite>
-  <testsuite name="TypedTest/1" tests="1" failures="0" disabled="0" skipped="0" errors="0" time="*" timestamp="*">
-    <testcase name="HasTypeParamAttribute" file="gtest_xml_output_unittest_.cc" line="171" type_param="*" status="run" result="completed" time="*" timestamp="*" classname="TypedTest/1" />
-  </testsuite>
-  <testsuite name="Single/TypeParameterizedTestSuite/0" tests="1" failures="0" disabled="0" skipped="0" errors="0" time="*" timestamp="*">
-    <testcase name="HasTypeParamAttribute" file="gtest_xml_output_unittest_.cc" line="178" type_param="*" status="run" result="completed" time="*" timestamp="*" classname="Single/TypeParameterizedTestSuite/0" />
-  </testsuite>
-  <testsuite name="Single/TypeParameterizedTestSuite/1" tests="1" failures="0" disabled="0" skipped="0" errors="0" time="*" timestamp="*">
-    <testcase name="HasTypeParamAttribute" file="gtest_xml_output_unittest_.cc" line="178" type_param="*" status="run" result="completed" time="*" timestamp="*" classname="Single/TypeParameterizedTestSuite/1" />
-  </testsuite>
-</testsuites>""" % {
-    'stack': STACK_TRACE_TEMPLATE
-}
-
-EXPECTED_FILTERED_TEST_XML = """<?xml version="1.0" encoding="UTF-8"?>
-<testsuites tests="1" failures="0" disabled="0" errors="0" time="*"
-            timestamp="*" name="AllTests" ad_hoc_property="42">
-  <testsuite name="SuccessfulTest" tests="1" failures="0" disabled="0" skipped="0"
-             errors="0" time="*" timestamp="*">
-    <testcase name="Succeeds" file="gtest_xml_output_unittest_.cc" line="51" status="run" result="completed" time="*" timestamp="*" classname="SuccessfulTest"/>
-  </testsuite>
-</testsuites>"""
-
-EXPECTED_SHARDED_TEST_XML = """<?xml version="1.0" encoding="UTF-8"?>
-<testsuites tests="3" failures="0" disabled="0" errors="0" time="*" timestamp="*" name="AllTests" ad_hoc_property="42">
-  <testsuite name="SuccessfulTest" tests="1" failures="0" disabled="0" skipped="0" errors="0" time="*" timestamp="*">
-    <testcase name="Succeeds" file="gtest_xml_output_unittest_.cc" line="51" status="run" result="completed" time="*" timestamp="*" classname="SuccessfulTest"/>
-  </testsuite>
-  <testsuite name="PropertyRecordingTest" tests="1" failures="0" disabled="0" skipped="0" errors="0" time="*" timestamp="*" SetUpTestSuite="yes" TearDownTestSuite="aye">
-    <testcase name="IntValuedProperty" file="gtest_xml_output_unittest_.cc" line="123" status="run" result="completed" time="*" timestamp="*" classname="PropertyRecordingTest">
-      <properties>
-        <property name="key_int" value="1"/>
-      </properties>
-    </testcase>
-  </testsuite>
-  <testsuite name="Single/ValueParamTest" tests="1" failures="0" disabled="0" skipped="0" errors="0" time="*" timestamp="*">
-    <testcase name="HasValueParamAttribute/0" file="gtest_xml_output_unittest_.cc" line="162" value_param="33" status="run" result="completed" time="*" timestamp="*" classname="Single/ValueParamTest" />
-  </testsuite>
-</testsuites>"""
-
-EXPECTED_NO_TEST_XML = """<?xml version="1.0" encoding="UTF-8"?>
-<testsuites tests="0" failures="0" disabled="0" errors="0" time="*"
-            timestamp="*" name="AllTests">
-  <testsuite name="NonTestSuiteFailure" tests="1" failures="1" disabled="0" skipped="0" errors="0" time="*" timestamp="*">
-    <testcase name="" status="run" result="completed" time="*" timestamp="*" classname="">
-      <failure message="gtest_no_test_unittest.cc:*&#x0A;Expected equality of these values:&#x0A;  1&#x0A;  2" type=""><![CDATA[gtest_no_test_unittest.cc:*
-Expected equality of these values:
-  1
-  2%(stack)s]]></failure>
-    </testcase>
-  </testsuite>
-</testsuites>""" % {
-    'stack': STACK_TRACE_TEMPLATE
-}
-
-GTEST_PROGRAM_PATH = gtest_test_utils.GetTestExecutablePath(GTEST_PROGRAM_NAME)
-
-SUPPORTS_TYPED_TESTS = 'TypedTest' in gtest_test_utils.Subprocess(
-    [GTEST_PROGRAM_PATH, GTEST_LIST_TESTS_FLAG], capture_stderr=False).output
-
-
-class GTestXMLOutputUnitTest(gtest_xml_test_utils.GTestXMLTestCase):
-  """
-  Unit test for Google Test's XML output functionality.
-  """
-
-  # This test currently breaks on platforms that do not support typed and
-  # type-parameterized tests, so we don't run it under them.
-  if SUPPORTS_TYPED_TESTS:
-    def testNonEmptyXmlOutput(self):
-      """
-      Runs a test program that generates a non-empty XML output, and
-      tests that the XML output is expected.
-      """
-      self._TestXmlOutput(GTEST_PROGRAM_NAME, EXPECTED_NON_EMPTY_XML, 1)
-
-  def testNoTestXmlOutput(self):
-    """Verifies XML output for a Google Test binary without actual tests.
-
-    Runs a test program that generates an XML output for a binary without tests,
-    and tests that the XML output is expected.
-    """
-
-    self._TestXmlOutput('gtest_no_test_unittest', EXPECTED_NO_TEST_XML, 0)
-
-  def testTimestampValue(self):
-    """Checks whether the timestamp attribute in the XML output is valid.
-
-    Runs a test program that generates an empty XML output, and checks if
-    the timestamp attribute in the testsuites tag is valid.
-    """
-    actual = self._GetXmlOutput('gtest_no_test_unittest', [], {}, 0)
-    date_time_str = actual.documentElement.getAttributeNode('timestamp').value
-    # datetime.strptime() is only available in Python 2.5+ so we have to
-    # parse the expected datetime manually.
-    match = re.match(r'(\d+)-(\d\d)-(\d\d)T(\d\d):(\d\d):(\d\d)', date_time_str)
-    self.assertTrue(
-        re.match,
-        'XML datettime string %s has incorrect format' % date_time_str)
-    date_time_from_xml = datetime.datetime(
-        year=int(match.group(1)), month=int(match.group(2)),
-        day=int(match.group(3)), hour=int(match.group(4)),
-        minute=int(match.group(5)), second=int(match.group(6)))
-
-    time_delta = abs(datetime.datetime.now() - date_time_from_xml)
-    # timestamp value should be near the current local time
-    self.assertTrue(time_delta < datetime.timedelta(seconds=600),
-                    'time_delta is %s' % time_delta)
-    actual.unlink()
-
-  def testDefaultOutputFile(self):
-    """
-    Confirms that Google Test produces an XML output file with the expected
-    default name if no name is explicitly specified.
-    """
-    output_file = os.path.join(gtest_test_utils.GetTempDir(),
-                               GTEST_DEFAULT_OUTPUT_FILE)
-    gtest_prog_path = gtest_test_utils.GetTestExecutablePath(
-        'gtest_no_test_unittest')
-    try:
-      os.remove(output_file)
-    except OSError:
-      e = sys.exc_info()[1]
-      if e.errno != errno.ENOENT:
-        raise
-
-    p = gtest_test_utils.Subprocess(
-        [gtest_prog_path, '%s=xml' % GTEST_OUTPUT_FLAG],
-        working_dir=gtest_test_utils.GetTempDir())
-    self.assert_(p.exited)
-    self.assertEquals(0, p.exit_code)
-    self.assert_(os.path.isfile(output_file))
-
-  def testSuppressedXmlOutput(self):
-    """
-    Tests that no XML file is generated if the default XML listener is
-    shut down before RUN_ALL_TESTS is invoked.
-    """
-
-    xml_path = os.path.join(gtest_test_utils.GetTempDir(),
-                            GTEST_PROGRAM_NAME + 'out.xml')
-    if os.path.isfile(xml_path):
-      os.remove(xml_path)
-
-    command = [GTEST_PROGRAM_PATH,
-               '%s=xml:%s' % (GTEST_OUTPUT_FLAG, xml_path),
-               '--shut_down_xml']
-    p = gtest_test_utils.Subprocess(command)
-    if p.terminated_by_signal:
-      # p.signal is available only if p.terminated_by_signal is True.
-      self.assertFalse(
-          p.terminated_by_signal,
-          '%s was killed by signal %d' % (GTEST_PROGRAM_NAME, p.signal))
-    else:
-      self.assert_(p.exited)
-      self.assertEquals(1, p.exit_code,
-                        "'%s' exited with code %s, which doesn't match "
-                        'the expected exit code %s.'
-                        % (command, p.exit_code, 1))
-
-    self.assert_(not os.path.isfile(xml_path))
-
-  def testFilteredTestXmlOutput(self):
-    """Verifies XML output when a filter is applied.
-
-    Runs a test program that executes only some tests and verifies that
-    non-selected tests do not show up in the XML output.
-    """
-
-    self._TestXmlOutput(GTEST_PROGRAM_NAME, EXPECTED_FILTERED_TEST_XML, 0,
-                        extra_args=['%s=SuccessfulTest.*' % GTEST_FILTER_FLAG])
-
-  def testShardedTestXmlOutput(self):
-    """Verifies XML output when run using multiple shards.
-
-    Runs a test program that executes only one shard and verifies that tests
-    from other shards do not show up in the XML output.
-    """
-
-    self._TestXmlOutput(
-        GTEST_PROGRAM_NAME,
-        EXPECTED_SHARDED_TEST_XML,
-        0,
-        extra_env={SHARD_INDEX_ENV_VAR: '0',
-                   TOTAL_SHARDS_ENV_VAR: '10'})
-
-  def _GetXmlOutput(self, gtest_prog_name, extra_args, extra_env,
-                    expected_exit_code):
-    """
-    Returns the xml output generated by running the program gtest_prog_name.
-    Furthermore, the program's exit code must be expected_exit_code.
-    """
-    xml_path = os.path.join(gtest_test_utils.GetTempDir(),
-                            gtest_prog_name + 'out.xml')
-    gtest_prog_path = gtest_test_utils.GetTestExecutablePath(gtest_prog_name)
-
-    command = ([gtest_prog_path, '%s=xml:%s' % (GTEST_OUTPUT_FLAG, xml_path)] +
-               extra_args)
-    environ_copy = os.environ.copy()
-    if extra_env:
-      environ_copy.update(extra_env)
-    p = gtest_test_utils.Subprocess(command, env=environ_copy)
-
-    if p.terminated_by_signal:
-      self.assert_(False,
-                   '%s was killed by signal %d' % (gtest_prog_name, p.signal))
-    else:
-      self.assert_(p.exited)
-      self.assertEquals(expected_exit_code, p.exit_code,
-                        "'%s' exited with code %s, which doesn't match "
-                        'the expected exit code %s.'
-                        % (command, p.exit_code, expected_exit_code))
-    actual = minidom.parse(xml_path)
-    return actual
-
-  def _TestXmlOutput(self, gtest_prog_name, expected_xml,
-                     expected_exit_code, extra_args=None, extra_env=None):
-    """
-    Asserts that the XML document generated by running the program
-    gtest_prog_name matches expected_xml, a string containing another
-    XML document.  Furthermore, the program's exit code must be
-    expected_exit_code.
-    """
-
-    actual = self._GetXmlOutput(gtest_prog_name, extra_args or [],
-                                extra_env or {}, expected_exit_code)
-    expected = minidom.parseString(expected_xml)
-    self.NormalizeXml(actual.documentElement)
-    self.AssertEquivalentNodes(expected.documentElement,
-                               actual.documentElement)
-    expected.unlink()
-    actual.unlink()
-
-
-if __name__ == '__main__':
-  os.environ['GTEST_STACK_TRACE_DEPTH'] = '1'
-  gtest_test_utils.Main()
diff --git a/3rdparty/googletest-1.13.0/googletest/test/gtest_xml_output_unittest_.cc b/3rdparty/googletest-1.13.0/googletest/test/gtest_xml_output_unittest_.cc
deleted file mode 100644
index 4bdb0c7e8acc71abfb8efd06f4dd7ff1b74d9c14..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/test/gtest_xml_output_unittest_.cc
+++ /dev/null
@@ -1,195 +0,0 @@
-// Copyright 2006, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// Unit test for Google Test XML output.
-//
-// A user can specify XML output in a Google Test program to run via
-// either the GTEST_OUTPUT environment variable or the --gtest_output
-// flag.  This is used for testing such functionality.
-//
-// This program will be invoked from a Python unit test.  Don't run it
-// directly.
-// clang-format off
-
-#include "gtest/gtest.h"
-
-using ::testing::InitGoogleTest;
-using ::testing::Test;
-using ::testing::TestEventListeners;
-using ::testing::TestWithParam;
-using ::testing::UnitTest;
-using ::testing::Values;
-
-class SuccessfulTest : public Test {};
-
-TEST_F(SuccessfulTest, Succeeds) {
-  SUCCEED() << "This is a success.";
-  ASSERT_EQ(1, 1);
-}
-
-class FailedTest : public Test {
-};
-
-TEST_F(FailedTest, Fails) {
-  ASSERT_EQ(1, 2);
-}
-
-class DisabledTest : public Test {
-};
-
-TEST_F(DisabledTest, DISABLED_test_not_run) {
-  FAIL() << "Unexpected failure: Disabled test should not be run";
-}
-
-class SkippedTest : public Test {
-};
-
-TEST_F(SkippedTest, Skipped) {
-  GTEST_SKIP();
-}
-
-TEST_F(SkippedTest, SkippedWithMessage) {
-  GTEST_SKIP() << "It is good practice to tell why you skip a test.";
-}
-
-TEST_F(SkippedTest, SkippedAfterFailure) {
-  EXPECT_EQ(1, 2);
-  GTEST_SKIP() << "It is good practice to tell why you skip a test.";
-}
-
-TEST(MixedResultTest, Succeeds) {
-  EXPECT_EQ(1, 1);
-  ASSERT_EQ(1, 1);
-}
-
-TEST(MixedResultTest, Fails) {
-  EXPECT_EQ(1, 2);
-  ASSERT_EQ(2, 3);
-}
-
-TEST(MixedResultTest, DISABLED_test) {
-  FAIL() << "Unexpected failure: Disabled test should not be run";
-}
-
-TEST(XmlQuotingTest, OutputsCData) {
-  FAIL() << "XML output: "
-            "<?xml encoding=\"utf-8\"><top><![CDATA[cdata text]]></top>";
-}
-
-// Helps to test that invalid characters produced by test code do not make
-// it into the XML file.
-TEST(InvalidCharactersTest, InvalidCharactersInMessage) {
-  FAIL() << "Invalid characters in brackets [\x1\x2]";
-}
-
-class PropertyRecordingTest : public Test {
- public:
-  static void SetUpTestSuite() { RecordProperty("SetUpTestSuite", "yes"); }
-  static void TearDownTestSuite() {
-    RecordProperty("TearDownTestSuite", "aye");
-  }
-};
-
-TEST_F(PropertyRecordingTest, OneProperty) {
-  RecordProperty("key_1", "1");
-}
-
-TEST_F(PropertyRecordingTest, IntValuedProperty) {
-  RecordProperty("key_int", 1);
-}
-
-TEST_F(PropertyRecordingTest, ThreeProperties) {
-  RecordProperty("key_1", "1");
-  RecordProperty("key_2", "2");
-  RecordProperty("key_3", "3");
-}
-
-TEST_F(PropertyRecordingTest, TwoValuesForOneKeyUsesLastValue) {
-  RecordProperty("key_1", "1");
-  RecordProperty("key_1", "2");
-}
-
-TEST(NoFixtureTest, RecordProperty) {
-  RecordProperty("key", "1");
-}
-
-void ExternalUtilityThatCallsRecordProperty(const std::string& key, int value) {
-  testing::Test::RecordProperty(key, value);
-}
-
-void ExternalUtilityThatCallsRecordProperty(const std::string& key,
-                                            const std::string& value) {
-  testing::Test::RecordProperty(key, value);
-}
-
-TEST(NoFixtureTest, ExternalUtilityThatCallsRecordIntValuedProperty) {
-  ExternalUtilityThatCallsRecordProperty("key_for_utility_int", 1);
-}
-
-TEST(NoFixtureTest, ExternalUtilityThatCallsRecordStringValuedProperty) {
-  ExternalUtilityThatCallsRecordProperty("key_for_utility_string", "1");
-}
-
-// Verifies that the test parameter value is output in the 'value_param'
-// XML attribute for value-parameterized tests.
-class ValueParamTest : public TestWithParam<int> {};
-TEST_P(ValueParamTest, HasValueParamAttribute) {}
-TEST_P(ValueParamTest, AnotherTestThatHasValueParamAttribute) {}
-INSTANTIATE_TEST_SUITE_P(Single, ValueParamTest, Values(33, 42));
-
-// Verifies that the type parameter name is output in the 'type_param'
-// XML attribute for typed tests.
-template <typename T> class TypedTest : public Test {};
-typedef testing::Types<int, long> TypedTestTypes;
-TYPED_TEST_SUITE(TypedTest, TypedTestTypes);
-TYPED_TEST(TypedTest, HasTypeParamAttribute) {}
-
-// Verifies that the type parameter name is output in the 'type_param'
-// XML attribute for type-parameterized tests.
-template <typename T>
-class TypeParameterizedTestSuite : public Test {};
-TYPED_TEST_SUITE_P(TypeParameterizedTestSuite);
-TYPED_TEST_P(TypeParameterizedTestSuite, HasTypeParamAttribute) {}
-REGISTER_TYPED_TEST_SUITE_P(TypeParameterizedTestSuite, HasTypeParamAttribute);
-typedef testing::Types<int, long> TypeParameterizedTestSuiteTypes;  // NOLINT
-INSTANTIATE_TYPED_TEST_SUITE_P(Single, TypeParameterizedTestSuite,
-                               TypeParameterizedTestSuiteTypes);
-
-int main(int argc, char** argv) {
-  InitGoogleTest(&argc, argv);
-
-  if (argc > 1 && strcmp(argv[1], "--shut_down_xml") == 0) {
-    TestEventListeners& listeners = UnitTest::GetInstance()->listeners();
-    delete listeners.Release(listeners.default_xml_generator());
-  }
-  testing::Test::RecordProperty("ad_hoc_property", "42");
-  return RUN_ALL_TESTS();
-}
-
-// clang-format on
diff --git a/3rdparty/googletest-1.13.0/googletest/test/gtest_xml_test_utils.py b/3rdparty/googletest-1.13.0/googletest/test/gtest_xml_test_utils.py
deleted file mode 100644
index c6fb9f4438cc2979260be28b23e15bd37485d22c..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/test/gtest_xml_test_utils.py
+++ /dev/null
@@ -1,201 +0,0 @@
-# Copyright 2006, Google Inc.
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met:
-#
-#     * Redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer.
-#     * Redistributions in binary form must reproduce the above
-# copyright notice, this list of conditions and the following disclaimer
-# in the documentation and/or other materials provided with the
-# distribution.
-#     * Neither the name of Google Inc. nor the names of its
-# contributors may be used to endorse or promote products derived from
-# this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-"""Unit test utilities for gtest_xml_output"""
-
-import re
-from xml.dom import minidom, Node
-from googletest.test import gtest_test_utils
-
-GTEST_DEFAULT_OUTPUT_FILE = 'test_detail.xml'
-
-class GTestXMLTestCase(gtest_test_utils.TestCase):
-  """
-  Base class for tests of Google Test's XML output functionality.
-  """
-
-
-  def AssertEquivalentNodes(self, expected_node, actual_node):
-    """
-    Asserts that actual_node (a DOM node object) is equivalent to
-    expected_node (another DOM node object), in that either both of
-    them are CDATA nodes and have the same value, or both are DOM
-    elements and actual_node meets all of the following conditions:
-
-    *  It has the same tag name as expected_node.
-    *  It has the same set of attributes as expected_node, each with
-       the same value as the corresponding attribute of expected_node.
-       Exceptions are any attribute named "time", which needs only be
-       convertible to a floating-point number and any attribute named
-       "type_param" which only has to be non-empty.
-    *  It has an equivalent set of child nodes (including elements and
-       CDATA sections) as expected_node.  Note that we ignore the
-       order of the children as they are not guaranteed to be in any
-       particular order.
-    """
-
-    if expected_node.nodeType == Node.CDATA_SECTION_NODE:
-      self.assertEquals(Node.CDATA_SECTION_NODE, actual_node.nodeType)
-      self.assertEquals(expected_node.nodeValue, actual_node.nodeValue)
-      return
-
-    self.assertEquals(Node.ELEMENT_NODE, actual_node.nodeType)
-    self.assertEquals(Node.ELEMENT_NODE, expected_node.nodeType)
-    self.assertEquals(expected_node.tagName, actual_node.tagName)
-
-    expected_attributes = expected_node.attributes
-    actual_attributes = actual_node.attributes
-    self.assertEquals(
-        expected_attributes.length, actual_attributes.length,
-        'attribute numbers differ in element %s:\nExpected: %r\nActual: %r' % (
-            actual_node.tagName, expected_attributes.keys(),
-            actual_attributes.keys()))
-    for i in range(expected_attributes.length):
-      expected_attr = expected_attributes.item(i)
-      actual_attr = actual_attributes.get(expected_attr.name)
-      self.assert_(
-          actual_attr is not None,
-          'expected attribute %s not found in element %s' %
-          (expected_attr.name, actual_node.tagName))
-      self.assertEquals(
-          expected_attr.value, actual_attr.value,
-          ' values of attribute %s in element %s differ: %s vs %s' %
-          (expected_attr.name, actual_node.tagName,
-           expected_attr.value, actual_attr.value))
-
-    expected_children = self._GetChildren(expected_node)
-    actual_children = self._GetChildren(actual_node)
-    self.assertEquals(
-        len(expected_children), len(actual_children),
-        'number of child elements differ in element ' + actual_node.tagName)
-    for child_id, child in expected_children.items():
-      self.assert_(child_id in actual_children,
-                   '<%s> is not in <%s> (in element %s)' %
-                   (child_id, actual_children, actual_node.tagName))
-      self.AssertEquivalentNodes(child, actual_children[child_id])
-
-  identifying_attribute = {
-      'testsuites': 'name',
-      'testsuite': 'name',
-      'testcase': 'name',
-      'failure': 'message',
-      'skipped': 'message',
-      'property': 'name',
-  }
-
-  def _GetChildren(self, element):
-    """
-    Fetches all of the child nodes of element, a DOM Element object.
-    Returns them as the values of a dictionary keyed by the IDs of the
-    children.  For <testsuites>, <testsuite>, <testcase>, and <property>
-    elements, the ID is the value of their "name" attribute; for <failure>
-    elements, it is the value of the "message" attribute; for <properties>
-    elements, it is the value of their parent's "name" attribute plus the
-    literal string "properties"; CDATA sections and non-whitespace
-    text nodes are concatenated into a single CDATA section with ID
-    "detail".  An exception is raised if any element other than the above
-    four is encountered, if two child elements with the same identifying
-    attributes are encountered, or if any other type of node is encountered.
-    """
-
-    children = {}
-    for child in element.childNodes:
-      if child.nodeType == Node.ELEMENT_NODE:
-        if child.tagName == 'properties':
-          self.assert_(child.parentNode is not None,
-                       'Encountered <properties> element without a parent')
-          child_id = child.parentNode.getAttribute('name') + '-properties'
-        else:
-          self.assert_(child.tagName in self.identifying_attribute,
-                       'Encountered unknown element <%s>' % child.tagName)
-          child_id = child.getAttribute(
-              self.identifying_attribute[child.tagName])
-        self.assert_(child_id not in children)
-        children[child_id] = child
-      elif child.nodeType in [Node.TEXT_NODE, Node.CDATA_SECTION_NODE]:
-        if 'detail' not in children:
-          if (child.nodeType == Node.CDATA_SECTION_NODE or
-              not child.nodeValue.isspace()):
-            children['detail'] = child.ownerDocument.createCDATASection(
-                child.nodeValue)
-        else:
-          children['detail'].nodeValue += child.nodeValue
-      else:
-        self.fail('Encountered unexpected node type %d' % child.nodeType)
-    return children
-
-  def NormalizeXml(self, element):
-    """
-    Normalizes Google Test's XML output to eliminate references to transient
-    information that may change from run to run.
-
-    *  The "time" attribute of <testsuites>, <testsuite> and <testcase>
-       elements is replaced with a single asterisk, if it contains
-       only digit characters.
-    *  The "timestamp" attribute of <testsuites> elements is replaced with a
-       single asterisk, if it contains a valid ISO8601 datetime value.
-    *  The "type_param" attribute of <testcase> elements is replaced with a
-       single asterisk (if it sn non-empty) as it is the type name returned
-       by the compiler and is platform dependent.
-    *  The line info reported in the first line of the "message"
-       attribute and CDATA section of <failure> elements is replaced with the
-       file's basename and a single asterisk for the line number.
-    *  The directory names in file paths are removed.
-    *  The stack traces are removed.
-    """
-
-    if element.tagName == 'testcase':
-      source_file = element.getAttributeNode('file')
-      if source_file:
-        source_file.value = re.sub(r'^.*[/\\](.*)', '\\1', source_file.value)
-    if element.tagName in ('testsuites', 'testsuite', 'testcase'):
-      timestamp = element.getAttributeNode('timestamp')
-      timestamp.value = re.sub(r'^\d{4}-\d\d-\d\dT\d\d:\d\d:\d\d\.\d\d\d$',
-                               '*', timestamp.value)
-    if element.tagName in ('testsuites', 'testsuite', 'testcase'):
-      time = element.getAttributeNode('time')
-      time.value = re.sub(r'^\d+(\.\d+)?$', '*', time.value)
-      type_param = element.getAttributeNode('type_param')
-      if type_param and type_param.value:
-        type_param.value = '*'
-    elif element.tagName == 'failure' or element.tagName == 'skipped':
-      source_line_pat = r'^.*[/\\](.*:)\d+\n'
-      # Replaces the source line information with a normalized form.
-      message = element.getAttributeNode('message')
-      message.value = re.sub(source_line_pat, '\\1*\n', message.value)
-      for child in element.childNodes:
-        if child.nodeType == Node.CDATA_SECTION_NODE:
-          # Replaces the source line information with a normalized form.
-          cdata = re.sub(source_line_pat, '\\1*\n', child.nodeValue)
-          # Removes the actual stack trace.
-          child.nodeValue = re.sub(r'Stack trace:\n(.|\n)*',
-                                   'Stack trace:\n*', cdata)
-    for child in element.childNodes:
-      if child.nodeType == Node.ELEMENT_NODE:
-        self.NormalizeXml(child)
diff --git a/3rdparty/googletest-1.13.0/googletest/test/production.cc b/3rdparty/googletest-1.13.0/googletest/test/production.cc
deleted file mode 100644
index 0f69f6dbd2e26a3a9bdf756b480737a4ef4218d4..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/test/production.cc
+++ /dev/null
@@ -1,35 +0,0 @@
-// Copyright 2006, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-//
-// This is part of the unit test for gtest_prod.h.
-
-#include "production.h"
-
-PrivateCode::PrivateCode() : x_(0) {}
diff --git a/3rdparty/googletest-1.13.0/googletest/test/production.h b/3rdparty/googletest-1.13.0/googletest/test/production.h
deleted file mode 100644
index 4dec8d46e247490d6b1666d870c11144d3085048..0000000000000000000000000000000000000000
--- a/3rdparty/googletest-1.13.0/googletest/test/production.h
+++ /dev/null
@@ -1,55 +0,0 @@
-// Copyright 2006, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-//
-// This is part of the unit test for gtest_prod.h.
-
-#ifndef GOOGLETEST_TEST_PRODUCTION_H_
-#define GOOGLETEST_TEST_PRODUCTION_H_
-
-#include "gtest/gtest_prod.h"
-
-class PrivateCode {
- public:
-  // Declares a friend test that does not use a fixture.
-  FRIEND_TEST(PrivateCodeTest, CanAccessPrivateMembers);
-
-  // Declares a friend test that uses a fixture.
-  FRIEND_TEST(PrivateCodeFixtureTest, CanAccessPrivateMembers);
-
-  PrivateCode();
-
-  int x() const { return x_; }
-
- private:
-  void set_x(int an_x) { x_ = an_x; }
-  int x_;
-};
-
-#endif  // GOOGLETEST_TEST_PRODUCTION_H_
diff --git a/CMakeLists.txt b/CMakeLists.txt
deleted file mode 100755
index 5206de12e6caeb2a9dad68b5c975e3dc535b4aa7..0000000000000000000000000000000000000000
--- a/CMakeLists.txt
+++ /dev/null
@@ -1,413 +0,0 @@
-# Copyright (c) 2019-2023, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-cmake_minimum_required(VERSION 3.11 FATAL_ERROR) # for PyTorch extensions, version should be greater than 3.13
-project(TurboMind LANGUAGES CXX CUDA)
-
-find_package(CUDA 10.2 REQUIRED)
-
-# if(${CUDA_VERSION_MAJOR} VERSION_GREATER_EQUAL "11")
-#   add_definitions("-DENABLE_BF16")
-#   message("CUDA_VERSION ${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR} is greater or equal than 11.0, enable -DENABLE_BF16 flag")
-# endif()
-
-# if((${CUDA_VERSION_MAJOR} VERSION_GREATER_EQUAL "11" AND ${CUDA_VERSION_MINOR} VERSION_GREATER_EQUAL "8") OR (${CUDA_VERSION_MAJOR} VERSION_GREATER_EQUAL "12"))
-#   add_definitions("-DENABLE_FP8")
-#   option(ENABLE_FP8 "ENABLE_FP8" OFF)
-#   if(ENABLE_FP8)
-#     message("CUDA_VERSION ${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR} is greater or equal than 11.8, enable -DENABLE_FP8 flag")
-#   endif()
-# endif()
-
-set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake/Modules)
-
-option(BUILD_PYT "Build in PyTorch TorchScript class mode" OFF)
-if(NOT BUILD_MULTI_GPU)
-  option(BUILD_MULTI_GPU "Build project about multi-GPU" OFF)
-endif()
-if(NOT USE_TRITONSERVER_DATATYPE)
-  option(USE_TRITONSERVER_DATATYPE "Build triton backend for triton server" OFF)
-endif()
-option(BUILD_PY_FFI "Build python ffi" ON)
-option(BUILD_TEST "Build tests" OFF)
-
-include(FetchContent)
-
-#FetchContent_Declare(
-#  repo-cutlass
-#  GIT_REPOSITORY https://github.com/NVIDIA/cutlass.git
-#  GIT_TAG        cc85b64cf676c45f98a17e3a47c0aafcf817f088
-#)
-
-#set(CUTLASS_ENABLE_HEADERS_ONLY ON CACHE BOOL "Enable only the header library")
-
-#FetchContent_MakeAvailable(repo-cutlass)
-
-#set(CUTLASS_HEADER_DIR ${PROJECT_SOURCE_DIR}/3rdparty/cutlass/include)
-#set(CUTLASS_EXTENSIONS_DIR ${PROJECT_SOURCE_DIR}/src/turbomind/cutlass_extensions/include)
-
-option(SPARSITY_SUPPORT "Build project with Ampere sparsity feature support" OFF)
-
-option(BUILD_FAST_MATH "Build in fast math mode" ON)
-
-if(BUILD_MULTI_GPU)
-  message(STATUS "Add DBUILD_MULTI_GPU, requires MPI and NCCL")
-  add_definitions("-DBUILD_MULTI_GPU")
-  set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake/Modules)
-  find_package(MPI REQUIRED)
-  find_package(NCCL REQUIRED)
-  set(CMAKE_MODULE_PATH "") # prevent the bugs for pytorch building
-endif()
-
-if(BUILD_PYT)
-  if(DEFINED ENV{NVIDIA_PYTORCH_VERSION})
-    if($ENV{NVIDIA_PYTORCH_VERSION} VERSION_LESS "20.03")
-      message(FATAL_ERROR "NVIDIA PyTorch image is too old for TorchScript mode.")
-    endif()
-    if($ENV{NVIDIA_PYTORCH_VERSION} VERSION_EQUAL "20.03")
-      add_definitions(-DLEGACY_THS=1)
-    endif()
-  endif()
-endif()
-
-if(USE_TRITONSERVER_DATATYPE)
-  message("-- USE_TRITONSERVER_DATATYPE")
-  add_definitions("-DUSE_TRITONSERVER_DATATYPE")
-endif()
-
-set(CXX_STD "17" CACHE STRING "C++ standard")
-
-set(CUDA_PATH ${CUDA_TOOLKIT_ROOT_DIR})
-
-set(TF_PATH "" CACHE STRING "TensorFlow path")
-set(CUSPARSELT_PATH "" CACHE STRING "cuSPARSELt path")
-
-if((BUILD_TF OR BUILD_TF2) AND NOT TF_PATH)
-  message(FATAL_ERROR "TF_PATH must be set if BUILD_TF or BUILD_TF2 (=TensorFlow mode) is on.")
-endif()
-
-list(APPEND CMAKE_MODULE_PATH ${CUDA_PATH}/lib64)
-
-# profiling
-option(USE_NVTX "Whether or not to use nvtx" ON)
-if(USE_NVTX)
-  message(STATUS "NVTX is enabled.")
-  add_definitions("-DUSE_NVTX")
-endif()
-
-# setting compiler flags
-set(CMAKE_C_FLAGS    "${CMAKE_C_FLAGS}")
-set(CMAKE_CXX_STANDARD 17)
-set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler -Wall -ldl") # -Xptxas -v
-set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --gpu-max-threads-per-block=1024")
-
-set(SM_SETS 52 60 61 70 75 80 86 89 90)
-set(USING_WMMA False)
-set(FIND_SM False)
-
-foreach(SM_NUM IN LISTS SM_SETS)
-  string(FIND "${SM}" "${SM_NUM}" SM_POS)
-  if(SM_POS GREATER -1)
-    if(FIND_SM STREQUAL False)
-      set(ENV{TORCH_CUDA_ARCH_LIST} "")
-    endif()
-    set(FIND_SM True)
-    set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_${SM_NUM},code=\\\"sm_${SM_NUM},compute_${SM_NUM}\\\"")
-
-    if (SM_NUM STREQUAL 70 OR SM_NUM STREQUAL 75 OR SM_NUM STREQUAL 80 OR SM_NUM STREQUAL 86 OR SM_NUM STREQUAL 89 OR SM_NUM STREQUAL 90)
-      set(USING_WMMA True)
-    endif()
-
-    if(BUILD_PYT)
-      string(SUBSTRING ${SM_NUM} 0 1 SM_MAJOR)
-      string(SUBSTRING ${SM_NUM} 1 1 SM_MINOR)
-      set(ENV{TORCH_CUDA_ARCH_LIST} "$ENV{TORCH_CUDA_ARCH_LIST}\;${SM_MAJOR}.${SM_MINOR}")
-    endif()
-
-    list(APPEND CMAKE_CUDA_ARCHITECTURES ${SM_NUM})
-    message("-- Assign GPU architecture (sm=${SM_NUM})")
-  endif()
-endforeach()
-
-if(USING_WMMA STREQUAL True)
-  set(CMAKE_C_FLAGS    "${CMAKE_C_FLAGS}    -DWMMA")
-  set(CMAKE_CXX_FLAGS  "${CMAKE_CXX_FLAGS}  -DWMMA")
-  set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -DWMMA")
-  message("-- Use WMMA")
-endif()
-
-if(NOT (FIND_SM STREQUAL True))
-  set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS}  \
-                        -gencode=arch=compute_70,code=\\\"sm_70,compute_70\\\" \
-                        -gencode=arch=compute_75,code=\\\"sm_75,compute_75\\\" \
-                        -gencode=arch=compute_80,code=\\\"sm_80,compute_80\\\" \
-                        -gencode=arch=compute_86,code=\\\"sm_86,compute_86\\\" \
-                        ")
-  #                      -rdc=true")
-  set(CMAKE_C_FLAGS    "${CMAKE_C_FLAGS}    -DWMMA")
-  set(CMAKE_CXX_FLAGS  "${CMAKE_CXX_FLAGS}  -DWMMA")
-  set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -DWMMA")
-  if(BUILD_PYT)
-    set(ENV{TORCH_CUDA_ARCH_LIST} "7.0;7.5;8.0;8.6")
-  endif()
-  set(CMAKE_CUDA_ARCHITECTURES 70 75 80 86)
-  message("-- Assign GPU architecture (sm=70,75,80,86)")
-endif()
-
-if(BUILD_PYT)
-  set(TORCH_CUDA_ARCH_LIST $ENV{TORCH_CUDA_ARCH_LIST})
-endif()
-
-set(CMAKE_CUDA_RUNTIME_LIBRARY Shared)
-set(CMAKE_C_FLAGS_DEBUG    "${CMAKE_C_FLAGS_DEBUG}    -Wall -O0")
-set(CMAKE_CXX_FLAGS_DEBUG  "${CMAKE_CXX_FLAGS_DEBUG}  -Wall -O0")
-# set(CMAKE_CUDA_FLAGS_DEBUG "${CMAKE_CUDA_FLAGS_DEBUG} -O0 -G -Xcompiler -Wall  --ptxas-options=-v --resource-usage")
-set(CMAKE_CUDA_FLAGS_DEBUG "${CMAKE_CUDA_FLAGS_DEBUG} -O0 -G -Xcompiler -Wall -DCUDA_PTX_FP8_F2FP_ENABLED")
-
-set(CMAKE_CXX_STANDARD "${CXX_STD}")
-set(CMAKE_CXX_STANDARD_REQUIRED ON)
-set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-extended-lambda")
-set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr")
-set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --std=c++${CXX_STD} -DCUDA_PTX_FP8_F2FP_ENABLED")
-
-set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3")
-# set(CMAKE_CUDA_FLAGS_RELEASE "${CMAKE_CUDA_FLAGS_RELEASE} -Xcompiler -O3 --ptxas-options=--verbose")
-set(CMAKE_CUDA_FLAGS_RELEASE "${CMAKE_CUDA_FLAGS_RELEASE} -Xcompiler -O3 -DCUDA_PTX_FP8_F2FP_ENABLED")
-if(BUILD_FAST_MATH)
-set(CMAKE_CUDA_FLAGS_RELEASE "${CMAKE_CUDA_FLAGS_RELEASE} --use_fast_math")
-message("CMAKE_CUDA_FLAGS_RELEASE: ${CMAKE_CUDA_FLAGS_RELEASE}")
-endif()
-
-set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)
-set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)
-set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
-
-set(COMMON_HEADER_DIRS
-  ${PROJECT_SOURCE_DIR}
-  ${CUDA_PATH}/include
-  ${CUTLASS_HEADER_DIR}
-)
-message("-- COMMON_HEADER_DIRS: ${COMMON_HEADER_DIRS}")
-
-set(COMMON_LIB_DIRS
-  ${CUDA_PATH}/lib64
-)
-
-if (SPARSITY_SUPPORT)
-  list(APPEND COMMON_HEADER_DIRS ${CUSPARSELT_PATH}/include)
-  list(APPEND COMMON_LIB_DIRS ${CUSPARSELT_PATH}/lib64)
-  add_definitions(-DSPARSITY_ENABLED=1)
-endif()
-
-if(BUILD_TF)
-  list(APPEND COMMON_HEADER_DIRS ${TF_PATH}/include)
-  list(APPEND COMMON_LIB_DIRS ${TF_PATH})
-  add_definitions(-D_GLIBCXX_USE_CXX11_ABI=0)
-endif()
-
-if(BUILD_TF2)
-  list(APPEND COMMON_HEADER_DIRS ${TF_PATH}/include)
-  list(APPEND COMMON_LIB_DIRS ${TF_PATH})
-  add_definitions(-D_GLIBCXX_USE_CXX11_ABI=1)
-endif()
-
-set(PYTHON_PATH "python" CACHE STRING "Python path")
-if(BUILD_PYT)
-  execute_process(COMMAND ${PYTHON_PATH} "-c" "from __future__ import print_function; import torch; print(torch.__version__,end='');"
-                  RESULT_VARIABLE _PYTHON_SUCCESS
-                  OUTPUT_VARIABLE TORCH_VERSION)
-  if (TORCH_VERSION VERSION_LESS "1.5.0")
-      message(FATAL_ERROR "PyTorch >= 1.5.0 is needed for TorchScript mode.")
-  endif()
-  execute_process(COMMAND ${PYTHON_PATH} "-c" "from __future__ import print_function; import os; import torch;
-print(os.path.dirname(torch.__file__),end='');"
-                  RESULT_VARIABLE _PYTHON_SUCCESS
-                  OUTPUT_VARIABLE TORCH_DIR)
-  if (NOT _PYTHON_SUCCESS MATCHES 0)
-      message(FATAL_ERROR "Torch config Error.")
-  endif()
-  list(APPEND CMAKE_PREFIX_PATH ${TORCH_DIR})
-  find_package(Torch REQUIRED)
-  execute_process(COMMAND ${PYTHON_PATH} "-c" "from __future__ import print_function; from distutils import sysconfig;
-print(sysconfig.get_python_inc());"
-                  RESULT_VARIABLE _PYTHON_SUCCESS
-                  OUTPUT_VARIABLE PY_INCLUDE_DIR)
-  if (NOT _PYTHON_SUCCESS MATCHES 0)
-      message(FATAL_ERROR "Python config Error.")
-  endif()
-  list(APPEND COMMON_HEADER_DIRS ${PY_INCLUDE_DIR})
-  execute_process(COMMAND ${PYTHON_PATH} "-c" "from __future__ import print_function; import torch;
-print(torch._C._GLIBCXX_USE_CXX11_ABI,end='');"
-                  RESULT_VARIABLE _PYTHON_SUCCESS
-                  OUTPUT_VARIABLE USE_CXX11_ABI)
-  message("-- USE_CXX11_ABI=${USE_CXX11_ABI}")
-  if (USE_CXX11_ABI)
-    set(CMAKE_CUDA_FLAGS_RELEASE "${CMAKE_CUDA_FLAGS_RELEASE} -D_GLIBCXX_USE_CXX11_ABI=1")
-    set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -D_GLIBCXX_USE_CXX11_ABI=1")
-    set(CMAKE_CUDA_FLAGS_DEBUG "${CMAKE_CUDA_FLAGS_DEBUG} -D_GLIBCXX_USE_CXX11_ABI=1")
-    set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -D_GLIBCXX_USE_CXX11_ABI=1")
-  else()
-    set(CMAKE_CUDA_FLAGS_RELEASE "${CMAKE_CUDA_FLAGS_RELEASE} -D_GLIBCXX_USE_CXX11_ABI=0")
-    set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -D_GLIBCXX_USE_CXX11_ABI=0")
-    set(CMAKE_CUDA_FLAGS_DEBUG "${CMAKE_CUDA_FLAGS_DEBUG} -D_GLIBCXX_USE_CXX11_ABI=0")
-    set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -D_GLIBCXX_USE_CXX11_ABI=0")
-  endif()
-endif()
-
-if (BUILD_MULTI_GPU)
-  list(APPEND COMMON_HEADER_DIRS ${MPI_INCLUDE_PATH})
-  #list(APPEND COMMON_LIB_DIRS /usr/local/mpi/lib)
-  list(APPEND COMMON_LIB_DIRS /opt/mpi/lib)
-endif()
-
-if(USE_TRITONSERVER_DATATYPE)
-  list(APPEND COMMON_HEADER_DIRS ${PROJECT_SOURCE_DIR}/../repo-core-src/include)
-endif()
-
-include_directories(
-  ${COMMON_HEADER_DIRS}
-)
-
-link_directories(
-  ${COMMON_LIB_DIRS}
-)
-
-# add_subdirectory(3rdparty)
-add_subdirectory(src)
-add_subdirectory(examples)
-
-if(BUILD_TEST)
-    add_subdirectory(tests/csrc)
-endif()
-
-# install python api
-if (BUILD_PY_FFI)
-  install(TARGETS _turbomind DESTINATION ${CMAKE_SOURCE_DIR}/lmdeploy/lib)
-endif ()
-
-if (MSVC)
-  return()
-endif ()
-
-# # Mesaure the compile time
-option(MEASURE_BUILD_TIME "Measure the build time of each module" OFF)
-if (MEASURE_BUILD_TIME)
-  set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE "${CMAKE_COMMAND} -E time")
-  set_property(GLOBAL PROPERTY RULE_LAUNCH_CUSTOM "${CMAKE_COMMAND} -E time")
-  set_property(GLOBAL PROPERTY RULE_LAUNCH_LINK "${CMAKE_COMMAND} -E time")
-endif()
-
-########################################
-
-add_library(transformer-shared SHARED
-  $<TARGET_OBJECTS:BaseSamplingLayer>
-  $<TARGET_OBJECTS:DynamicDecodeLayer>
-#  $<TARGET_OBJECTS:llama_fmha>
-#  $<TARGET_OBJECTS:flash_attention2>
-  $<TARGET_OBJECTS:Llama>
-  $<TARGET_OBJECTS:LlamaTritonBackend>
-#  $<TARGET_OBJECTS:gemm_s4_f16>
-  $<TARGET_OBJECTS:TopKSamplingLayer>
-  $<TARGET_OBJECTS:TopPSamplingLayer>
-  $<TARGET_OBJECTS:TransformerTritonBackend>
-  $<TARGET_OBJECTS:activation_kernels>
-  $<TARGET_OBJECTS:ban_bad_words>
-  $<TARGET_OBJECTS:bert_preprocess_kernels>
-  $<TARGET_OBJECTS:cublasAlgoMap>
-  $<TARGET_OBJECTS:cublasMMWrapper>
-  $<TARGET_OBJECTS:cuda_utils>
-  $<TARGET_OBJECTS:custom_ar_comm>
-  $<TARGET_OBJECTS:custom_ar_kernels>
-  $<TARGET_OBJECTS:decoder_masked_multihead_attention>
-  $<TARGET_OBJECTS:decoding_kernels>
-  $<TARGET_OBJECTS:gpt_kernels>
-  $<TARGET_OBJECTS:logprob_kernels>
-  $<TARGET_OBJECTS:logger>
-  $<TARGET_OBJECTS:memory_utils>
-  $<TARGET_OBJECTS:mpi_utils>
-  $<TARGET_OBJECTS:nccl_utils>
-  $<TARGET_OBJECTS:nvtx_utils>
-  $<TARGET_OBJECTS:sampling_penalty_kernels>
-  $<TARGET_OBJECTS:sampling_topk_kernels>
-  $<TARGET_OBJECTS:sampling_topp_kernels>
-  $<TARGET_OBJECTS:stop_criteria>
-  $<TARGET_OBJECTS:tensor>
-  $<TARGET_OBJECTS:unfused_attention_kernels>
-  $<TARGET_OBJECTS:word_list>
-)
-
-if (BUILD_MULTI_GPU)
-target_link_libraries(transformer-shared PUBLIC
-  -lmpi
-  ${NCCL_LIBRARIES}
-)
-endif()
-
-if(USE_NVTX)
-#target_link_libraries(transformer-shared PUBLIC
-#  -lnvToolsExt
-#)
-endif()
-
-#set_target_properties(transformer-shared PROPERTIES POSITION_INDEPENDENT_CODE ON)
-#set_target_properties(transformer-shared PROPERTIES CUDA_RESOLVE_DEVICE_SYMBOLS ON)
-set_target_properties(transformer-shared PROPERTIES LINKER_LANGUAGE CXX)
-#target_link_libraries(transformer-shared PUBLIC -lcudart -lcublas -lcublasLt -lcurand)
-target_link_libraries(transformer-shared PUBLIC -lcudart -lcublas -lcurand)
-
-include(GNUInstallDirs)
-set(INSTALL_CONFIGDIR ${CMAKE_INSTALL_LIBDIR}/cmake/TurboMind)
-
-include(CMakePackageConfigHelpers)
-configure_package_config_file(
-  ${CMAKE_CURRENT_LIST_DIR}/cmake/TurboMindConfig.cmake.in
-  ${CMAKE_CURRENT_BINARY_DIR}/TurboMindConfig.cmake
-  INSTALL_DESTINATION ${INSTALL_CONFIGDIR}
-)
-
-install(
-  FILES
-  ${CMAKE_CURRENT_BINARY_DIR}/TurboMindConfig.cmake
-  DESTINATION ${INSTALL_CONFIGDIR}
-)
-
-install(
-  TARGETS
-    transformer-shared
-  EXPORT
-    transformer-shared-targets
-  LIBRARY DESTINATION ${CMAKE_INSTALL_PREFIX}/backends/turbomind
-  ARCHIVE DESTINATION ${CMAKE_INSTALL_PREFIX}/backends/turbomind
-  RUNTIME DESTINATION ${CMAKE_INSTALL_PREFIX}/bin
-)
-
-install(
-  EXPORT
-    transformer-shared-targets
-  FILE
-    TurboMindTargets.cmake
-  DESTINATION
-    ${INSTALL_CONFIGDIR}
-)
-
-export(
-  EXPORT
-    transformer-shared-targets
-  FILE
-    ${CMAKE_CURRENT_BINARY_DIR}/TurboMindTargets.cmake
-  NAMESPACE
-    TritonCore::
-)
-
-export(PACKAGE TurboMind)
diff --git a/LICENSE b/LICENSE
deleted file mode 100644
index 261eeb9e9f8b2b4b0d119366dda99c6fd7d35c64..0000000000000000000000000000000000000000
--- a/LICENSE
+++ /dev/null
@@ -1,201 +0,0 @@
-                                 Apache License
-                           Version 2.0, January 2004
-                        http://www.apache.org/licenses/
-
-   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
-
-   1. Definitions.
-
-      "License" shall mean the terms and conditions for use, reproduction,
-      and distribution as defined by Sections 1 through 9 of this document.
-
-      "Licensor" shall mean the copyright owner or entity authorized by
-      the copyright owner that is granting the License.
-
-      "Legal Entity" shall mean the union of the acting entity and all
-      other entities that control, are controlled by, or are under common
-      control with that entity. For the purposes of this definition,
-      "control" means (i) the power, direct or indirect, to cause the
-      direction or management of such entity, whether by contract or
-      otherwise, or (ii) ownership of fifty percent (50%) or more of the
-      outstanding shares, or (iii) beneficial ownership of such entity.
-
-      "You" (or "Your") shall mean an individual or Legal Entity
-      exercising permissions granted by this License.
-
-      "Source" form shall mean the preferred form for making modifications,
-      including but not limited to software source code, documentation
-      source, and configuration files.
-
-      "Object" form shall mean any form resulting from mechanical
-      transformation or translation of a Source form, including but
-      not limited to compiled object code, generated documentation,
-      and conversions to other media types.
-
-      "Work" shall mean the work of authorship, whether in Source or
-      Object form, made available under the License, as indicated by a
-      copyright notice that is included in or attached to the work
-      (an example is provided in the Appendix below).
-
-      "Derivative Works" shall mean any work, whether in Source or Object
-      form, that is based on (or derived from) the Work and for which the
-      editorial revisions, annotations, elaborations, or other modifications
-      represent, as a whole, an original work of authorship. For the purposes
-      of this License, Derivative Works shall not include works that remain
-      separable from, or merely link (or bind by name) to the interfaces of,
-      the Work and Derivative Works thereof.
-
-      "Contribution" shall mean any work of authorship, including
-      the original version of the Work and any modifications or additions
-      to that Work or Derivative Works thereof, that is intentionally
-      submitted to Licensor for inclusion in the Work by the copyright owner
-      or by an individual or Legal Entity authorized to submit on behalf of
-      the copyright owner. For the purposes of this definition, "submitted"
-      means any form of electronic, verbal, or written communication sent
-      to the Licensor or its representatives, including but not limited to
-      communication on electronic mailing lists, source code control systems,
-      and issue tracking systems that are managed by, or on behalf of, the
-      Licensor for the purpose of discussing and improving the Work, but
-      excluding communication that is conspicuously marked or otherwise
-      designated in writing by the copyright owner as "Not a Contribution."
-
-      "Contributor" shall mean Licensor and any individual or Legal Entity
-      on behalf of whom a Contribution has been received by Licensor and
-      subsequently incorporated within the Work.
-
-   2. Grant of Copyright License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      copyright license to reproduce, prepare Derivative Works of,
-      publicly display, publicly perform, sublicense, and distribute the
-      Work and such Derivative Works in Source or Object form.
-
-   3. Grant of Patent License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      (except as stated in this section) patent license to make, have made,
-      use, offer to sell, sell, import, and otherwise transfer the Work,
-      where such license applies only to those patent claims licensable
-      by such Contributor that are necessarily infringed by their
-      Contribution(s) alone or by combination of their Contribution(s)
-      with the Work to which such Contribution(s) was submitted. If You
-      institute patent litigation against any entity (including a
-      cross-claim or counterclaim in a lawsuit) alleging that the Work
-      or a Contribution incorporated within the Work constitutes direct
-      or contributory patent infringement, then any patent licenses
-      granted to You under this License for that Work shall terminate
-      as of the date such litigation is filed.
-
-   4. Redistribution. You may reproduce and distribute copies of the
-      Work or Derivative Works thereof in any medium, with or without
-      modifications, and in Source or Object form, provided that You
-      meet the following conditions:
-
-      (a) You must give any other recipients of the Work or
-          Derivative Works a copy of this License; and
-
-      (b) You must cause any modified files to carry prominent notices
-          stating that You changed the files; and
-
-      (c) You must retain, in the Source form of any Derivative Works
-          that You distribute, all copyright, patent, trademark, and
-          attribution notices from the Source form of the Work,
-          excluding those notices that do not pertain to any part of
-          the Derivative Works; and
-
-      (d) If the Work includes a "NOTICE" text file as part of its
-          distribution, then any Derivative Works that You distribute must
-          include a readable copy of the attribution notices contained
-          within such NOTICE file, excluding those notices that do not
-          pertain to any part of the Derivative Works, in at least one
-          of the following places: within a NOTICE text file distributed
-          as part of the Derivative Works; within the Source form or
-          documentation, if provided along with the Derivative Works; or,
-          within a display generated by the Derivative Works, if and
-          wherever such third-party notices normally appear. The contents
-          of the NOTICE file are for informational purposes only and
-          do not modify the License. You may add Your own attribution
-          notices within Derivative Works that You distribute, alongside
-          or as an addendum to the NOTICE text from the Work, provided
-          that such additional attribution notices cannot be construed
-          as modifying the License.
-
-      You may add Your own copyright statement to Your modifications and
-      may provide additional or different license terms and conditions
-      for use, reproduction, or distribution of Your modifications, or
-      for any such Derivative Works as a whole, provided Your use,
-      reproduction, and distribution of the Work otherwise complies with
-      the conditions stated in this License.
-
-   5. Submission of Contributions. Unless You explicitly state otherwise,
-      any Contribution intentionally submitted for inclusion in the Work
-      by You to the Licensor shall be under the terms and conditions of
-      this License, without any additional terms or conditions.
-      Notwithstanding the above, nothing herein shall supersede or modify
-      the terms of any separate license agreement you may have executed
-      with Licensor regarding such Contributions.
-
-   6. Trademarks. This License does not grant permission to use the trade
-      names, trademarks, service marks, or product names of the Licensor,
-      except as required for reasonable and customary use in describing the
-      origin of the Work and reproducing the content of the NOTICE file.
-
-   7. Disclaimer of Warranty. Unless required by applicable law or
-      agreed to in writing, Licensor provides the Work (and each
-      Contributor provides its Contributions) on an "AS IS" BASIS,
-      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-      implied, including, without limitation, any warranties or conditions
-      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
-      PARTICULAR PURPOSE. You are solely responsible for determining the
-      appropriateness of using or redistributing the Work and assume any
-      risks associated with Your exercise of permissions under this License.
-
-   8. Limitation of Liability. In no event and under no legal theory,
-      whether in tort (including negligence), contract, or otherwise,
-      unless required by applicable law (such as deliberate and grossly
-      negligent acts) or agreed to in writing, shall any Contributor be
-      liable to You for damages, including any direct, indirect, special,
-      incidental, or consequential damages of any character arising as a
-      result of this License or out of the use or inability to use the
-      Work (including but not limited to damages for loss of goodwill,
-      work stoppage, computer failure or malfunction, or any and all
-      other commercial damages or losses), even if such Contributor
-      has been advised of the possibility of such damages.
-
-   9. Accepting Warranty or Additional Liability. While redistributing
-      the Work or Derivative Works thereof, You may choose to offer,
-      and charge a fee for, acceptance of support, warranty, indemnity,
-      or other liability obligations and/or rights consistent with this
-      License. However, in accepting such obligations, You may act only
-      on Your own behalf and on Your sole responsibility, not on behalf
-      of any other Contributor, and only if You agree to indemnify,
-      defend, and hold each Contributor harmless for any liability
-      incurred by, or claims asserted against, such Contributor by reason
-      of your accepting any such warranty or additional liability.
-
-   END OF TERMS AND CONDITIONS
-
-   APPENDIX: How to apply the Apache License to your work.
-
-      To apply the Apache License to your work, attach the following
-      boilerplate notice, with the fields enclosed by brackets "[]"
-      replaced with your own identifying information. (Don't include
-      the brackets!)  The text should be enclosed in the appropriate
-      comment syntax for the file format. We also recommend that a
-      file or class name and description of purpose be included on the
-      same "printed page" as the copyright notice for easier
-      identification within third-party archives.
-
-   Copyright [yyyy] [name of copyright owner]
-
-   Licensed under the Apache License, Version 2.0 (the "License");
-   you may not use this file except in compliance with the License.
-   You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
diff --git a/MANIFEST.in b/MANIFEST.in
deleted file mode 100644
index ab9b0b57bf79c4a8e59378c673bf38e2c5d33121..0000000000000000000000000000000000000000
--- a/MANIFEST.in
+++ /dev/null
@@ -1,8 +0,0 @@
-
-include lmdeploy/lib/*.so
-include lmdeploy/lib/*.so*
-include lmdeploy/lib/*.dll
-include lmdeploy/lib/*.pyd
-include lmdeploy/bin/*
-include lmdeploy/serve/turbomind/service_docker_up.sh
-recursive-include lmdeploy/serve/turbomind/triton_models *
diff --git a/README_origin.md b/README_origin.md
deleted file mode 100644
index c65cff7e5f19edf9d06fa48976ccb2b14d343c40..0000000000000000000000000000000000000000
--- a/README_origin.md
+++ /dev/null
@@ -1,253 +0,0 @@
-<div align="center">
-  <img src="resources/lmdeploy-logo.svg" width="450"/>
-
-[![docs](https://img.shields.io/badge/docs-latest-blue)](https://lmdeploy.readthedocs.io/en/latest/)
-[![badge](https://github.com/InternLM/lmdeploy/workflows/lint/badge.svg)](https://github.com/InternLM/lmdeploy/actions)
-[![PyPI](https://img.shields.io/pypi/v/lmdeploy)](https://pypi.org/project/lmdeploy)
-[![license](https://img.shields.io/github/license/InternLM/lmdeploy.svg)](https://github.com/InternLM/lmdeploy/tree/main/LICENSE)
-[![issue resolution](https://img.shields.io/github/issues-closed-raw/InternLM/lmdeploy)](https://github.com/InternLM/lmdeploy/issues)
-[![open issues](https://img.shields.io/github/issues-raw/InternLM/lmdeploy)](https://github.com/InternLM/lmdeploy/issues)
-
-English | [简体中文](README_zh-CN.md)
-
-</div>
-
-<p align="center">
-    👋 join us on <a href="https://twitter.com/intern_lm" target="_blank">Twitter</a>, <a href="https://discord.gg/xa29JuW87d" target="_blank">Discord</a> and <a href="https://r.vansin.top/?r=internwx" target="_blank">WeChat</a>
-</p>
-
-______________________________________________________________________
-
-## News 🎉
-
-- \[2023/09\] TurboMind supports Qwen-14B
-- \[2023/09\] TurboMind supports InternLM-20B
-- \[2023/09\] TurboMind supports all features of Code Llama: code completion, infilling, chat / instruct, and python specialist. Click [here](./docs/en/supported_models/codellama.md) for deployment guide
-- \[2023/09\] TurboMind supports Baichuan2-7B
-- \[2023/08\] TurboMind supports flash-attention2.
-- \[2023/08\] TurboMind supports Qwen-7B, dynamic NTK-RoPE scaling and dynamic logN scaling
-- \[2023/08\] TurboMind supports Windows (tp=1)
-- \[2023/08\] TurboMind supports 4-bit inference, 2.4x faster than FP16, the fastest open-source implementation🚀. Check [this](./docs/en/w4a16.md) guide for detailed info
-- \[2023/08\] LMDeploy has launched on the [HuggingFace Hub](https://huggingface.co/lmdeploy), providing ready-to-use 4-bit models.
-- \[2023/08\] LMDeploy supports 4-bit quantization using the [AWQ](https://arxiv.org/abs/2306.00978) algorithm.
-- \[2023/07\] TurboMind supports Llama-2 70B with GQA.
-- \[2023/07\] TurboMind supports Llama-2 7B/13B.
-- \[2023/07\] TurboMind supports tensor-parallel inference of InternLM.
-
-______________________________________________________________________
-
-## Introduction
-
-LMDeploy is a toolkit for compressing, deploying, and serving LLM, developed by the [MMRazor](https://github.com/open-mmlab/mmrazor) and [MMDeploy](https://github.com/open-mmlab/mmdeploy) teams. It has the following core features:
-
-- **Efficient Inference Engine (TurboMind)**: Based on [FasterTransformer](https://github.com/NVIDIA/FasterTransformer), we have implemented an efficient inference engine - TurboMind, which supports the inference of LLaMA and its variant models on NVIDIA GPUs.
-
-- **Interactive Inference Mode**: By caching the k/v of attention during multi-round dialogue processes, it remembers dialogue history, thus avoiding repetitive processing of historical sessions.
-
-- **Multi-GPU Model Deployment and Quantization**: We provide comprehensive model deployment and quantification support, and have been validated at different scales.
-
-- **Persistent Batch Inference**: Further optimization of model execution efficiency.
-
-![PersistentBatchInference](https://github.com/InternLM/lmdeploy/assets/67539920/e3876167-0671-44fc-ac52-5a0f9382493e)
-
-## Supported Models
-
-`LMDeploy` has two inference backends, `Pytorch` and `TurboMind`.
-
-### TurboMind
-
-> **Note**<br />
-> W4A16 inference requires Nvidia GPU with Ampere architecture or above.
-
-|    Models    | Tensor Parallel | FP16 | KV INT8 | W4A16 | W8A8 |
-| :----------: | :-------------: | :--: | :-----: | :---: | :--: |
-|    Llama     |       Yes       | Yes  |   Yes   |  Yes  |  No  |
-|    Llama2    |       Yes       | Yes  |   Yes   |  Yes  |  No  |
-|    SOLAR     |       Yes       | Yes  |   Yes   |  Yes  |  No  |
-| InternLM-7B  |       Yes       | Yes  |   Yes   |  Yes  |  No  |
-| InternLM-20B |       Yes       | Yes  |   Yes   |  Yes  |  No  |
-|   QWen-7B    |       Yes       | Yes  |   Yes   |  No   |  No  |
-|   QWen-14B   |       Yes       | Yes  |   Yes   |  No   |  No  |
-| Baichuan-7B  |       Yes       | Yes  |   Yes   |  Yes  |  No  |
-| Baichuan2-7B |       Yes       | Yes  |   No    |  No   |  No  |
-|  Code Llama  |       Yes       | Yes  |   No    |  No   |  No  |
-
-### Pytorch
-
-|   Models    | Tensor Parallel | FP16 | KV INT8 | W4A16 | W8A8 |
-| :---------: | :-------------: | :--: | :-----: | :---: | :--: |
-|    Llama    |       Yes       | Yes  |   No    |  No   |  No  |
-|   Llama2    |       Yes       | Yes  |   No    |  No   |  No  |
-| InternLM-7B |       Yes       | Yes  |   No    |  No   |  No  |
-
-## Performance
-
-**Case I**: output token throughput with fixed input token and output token number (1, 2048)
-
-**Case II**: request throughput with real conversation data
-
-Test Setting: LLaMA-7B, NVIDIA A100(80G)
-
-The output token throughput of TurboMind exceeds 2000 tokens/s, which is about 5% - 15% higher than DeepSpeed overall and outperforms huggingface transformers by up to 2.3x.
-And the request throughput of TurboMind is 30% higher than vLLM.
-
-![benchmark](https://github.com/InternLM/lmdeploy/assets/4560679/7775c518-608e-4e5b-be73-7645a444e774)
-
-## Quick Start
-
-### Installation
-
-Install lmdeploy with pip ( python 3.8+) or [from source](./docs/en/build.md)
-
-```shell
-pip install lmdeploy
-```
-
-### Deploy InternLM
-
-#### Get InternLM model
-
-```shell
-# 1. Download InternLM model
-
-# Make sure you have git-lfs installed (https://git-lfs.com)
-git lfs install
-git clone https://huggingface.co/internlm/internlm-chat-7b-v1_1 /path/to/internlm-chat-7b
-
-# if you want to clone without large files – just their pointers
-# prepend your git clone with the following env var:
-GIT_LFS_SKIP_SMUDGE=1
-
-# 2. Convert InternLM model to turbomind's format, which will be in "./workspace" by default
-lmdeploy convert internlm-chat-7b /path/to/internlm-chat-7b
-
-```
-
-#### Inference by TurboMind
-
-```shell
-lmdeploy chat turbomind ./workspace
-```
-
-> **Note**<br />
-> When inferring with FP16 precision, the InternLM-7B model requires at least 15.7G of GPU memory overhead on TurboMind. <br />
-> It is recommended to use NVIDIA cards such as 3090, V100, A100, etc.
-> Disable GPU ECC can free up 10% memory, try `sudo nvidia-smi --ecc-config=0` and reboot system.
-
-> **Note**<br />
-> Tensor parallel is available to perform inference on multiple GPUs. Add `--tp=<num_gpu>` on `chat` to enable runtime TP.
-
-#### Serving with gradio
-
-```shell
-lmdeploy serve gradio ./workspace
-```
-
-![](https://github.com/InternLM/lmdeploy/assets/67539920/08d1e6f2-3767-44d5-8654-c85767cec2ab)
-
-#### Serving with Restful API
-
-Launch inference server by:
-
-```shell
-lmdeploy serve api_server ./workspace --instance_num 32 --tp 1
-```
-
-Then, you can communicate with it by command line,
-
-```shell
-# restful_api_url is what printed in api_server.py, e.g. http://localhost:23333
-lmdeploy serve api_client restful_api_url
-```
-
-or webui,
-
-```shell
-# restful_api_url is what printed in api_server.py, e.g. http://localhost:23333
-# server_ip and server_port here are for gradio ui
-# example: lmdeploy serve gradio http://localhost:23333 --server_name localhost --server_port 6006 --restful_api True
-lmdeploy serve gradio restful_api_url --server_name ${server_ip} --server_port ${server_port} --restful_api True
-```
-
-Refer to [restful_api.md](docs/en/restful_api.md) for more details.
-
-#### Serving with Triton Inference Server
-
-Launch inference server by:
-
-```shell
-bash workspace/service_docker_up.sh
-```
-
-Then, you can communicate with the inference server by command line,
-
-```shell
-lmdeploy serve triton_client {server_ip_addresss}:33337
-```
-
-or webui,
-
-```shell
-lmdeploy serve gradio {server_ip_addresss}:33337
-```
-
-For the deployment of other supported models, such as LLaMA, LLaMA-2, vicuna and so on, you can find the guide from [here](docs/en/serving.md)
-
-### Inference with PyTorch
-
-For detailed instructions on Inference pytorch models, see [here](docs/en/pytorch.md).
-
-#### Single GPU
-
-```shell
-lmdeploy chat torch $NAME_OR_PATH_TO_HF_MODEL \
-    --max_new_tokens 64 \
-    --temperture 0.8 \
-    --top_p 0.95 \
-    --seed 0
-```
-
-#### Tensor Parallel with DeepSpeed
-
-```shell
-deepspeed --module --num_gpus 2 lmdeploy.pytorch.chat \
-    $NAME_OR_PATH_TO_HF_MODEL \
-    --max_new_tokens 64 \
-    --temperture 0.8 \
-    --top_p 0.95 \
-    --seed 0
-```
-
-You need to install deepspeed first to use this feature.
-
-```
-pip install deepspeed
-```
-
-## Quantization
-
-#### Weight INT4 Quantization
-
-LMDeploy uses [AWQ](https://arxiv.org/abs/2306.00978) algorithm for model weight quantization
-
-[Click here](./docs/en/w4a16.md) to view the test results for weight int4 usage.
-
-#### KV Cache INT8 Quantization
-
-[Click here](./docs/en/kv_int8.md) to view the usage method, implementation formula, and test results for kv int8.
-
-> **Warning**<br />
-> runtime Tensor Parallel for quantized model is not available. Please setup `--tp` on `deploy` to enable static TP.
-
-## Contributing
-
-We appreciate all contributions to LMDeploy. Please refer to [CONTRIBUTING.md](.github/CONTRIBUTING.md) for the contributing guideline.
-
-## Acknowledgement
-
-- [FasterTransformer](https://github.com/NVIDIA/FasterTransformer)
-- [llm-awq](https://github.com/mit-han-lab/llm-awq)
-
-## License
-
-This project is released under the [Apache 2.0 license](LICENSE).
diff --git a/README_zh-CN.md b/README_zh-CN.md
deleted file mode 100644
index 84f860ef3de907e422523ec96dc0eab466a03e80..0000000000000000000000000000000000000000
--- a/README_zh-CN.md
+++ /dev/null
@@ -1,251 +0,0 @@
-<div align="center">
-  <img src="resources/lmdeploy-logo.svg" width="450"/>
-
-[![docs](https://img.shields.io/badge/docs-latest-blue)](https://lmdeploy-zh-cn.readthedocs.io/zh_CN/latest/)
-[![badge](https://github.com/InternLM/lmdeploy/workflows/lint/badge.svg)](https://github.com/InternLM/lmdeploy/actions)
-[![PyPI](https://img.shields.io/pypi/v/lmdeploy)](https://pypi.org/project/lmdeploy)
-[![license](https://img.shields.io/github/license/InternLM/lmdeploy.svg)](https://github.com/InternLM/lmdeploy/tree/main/LICENSE)
-[![issue resolution](https://img.shields.io/github/issues-closed-raw/InternLM/lmdeploy)](https://github.com/InternLM/lmdeploy/issues)
-[![open issues](https://img.shields.io/github/issues-raw/InternLM/lmdeploy)](https://github.com/InternLM/lmdeploy/issues)
-
-[English](README.md) | 简体中文
-
-</div>
-
-<p align="center">
-    👋 join us on <a href="https://twitter.com/intern_lm" target="_blank">Twitter</a>, <a href="https://discord.gg/xa29JuW87d" target="_blank">Discord</a> and <a href="https://r.vansin.top/?r=internwx" target="_blank">WeChat</a>
-</p>
-
-______________________________________________________________________
-
-## 更新 🎉
-
-- \[2023/09\] TurboMind 支持 Qwen-14B
-- \[2023/09\] TurboMind 支持 InternLM-20B 模型
-- \[2023/09\] TurboMind 支持 Code Llama 所有功能：代码续写、填空、对话、Python专项。点击[这里](./docs/zh_cn/supported_models/codellama.md)阅读部署方法
-- \[2023/09\] TurboMind 支持 Baichuan2-7B
-- \[2023/08\] TurboMind 支持 flash-attention2
-- \[2023/08\] TurboMind 支持 Qwen-7B，动态NTK-RoPE缩放，动态logN缩放
-- \[2023/08\] TurboMind 支持 Windows (tp=1)
-- \[2023/08\] TurboMind 支持 4-bit 推理，速度是 FP16 的 2.4 倍，是目前最快的开源实现🚀。部署方式请看[这里](./docs/zh_cn/w4a16.md)
-- \[2023/08\] LMDeploy 开通了 [HuggingFace Hub](https://huggingface.co/lmdeploy) ，提供开箱即用的 4-bit 模型
-- \[2023/08\] LMDeploy 支持使用 [AWQ](https://arxiv.org/abs/2306.00978) 算法进行 4-bit 量化
-- \[2023/07\] TurboMind 支持使用 GQA 的 Llama-2 70B 模型
-- \[2023/07\] TurboMind 支持 Llama-2 7B/13B 模型
-- \[2023/07\] TurboMind 支持 InternLM 的 Tensor Parallel 推理
-
-______________________________________________________________________
-
-## 简介
-
-LMDeploy 由 [MMDeploy](https://github.com/open-mmlab/mmdeploy) 和 [MMRazor](https://github.com/open-mmlab/mmrazor) 团队联合开发，是涵盖了 LLM 任务的全套轻量化、部署和服务解决方案。
-这个强大的工具箱提供以下核心功能：
-
-- **高效推理引擎 TurboMind**：基于 [FasterTransformer](https://github.com/NVIDIA/FasterTransformer)，我们实现了高效推理引擎 TurboMind，支持 InternLM、LLaMA、vicuna等模型在 NVIDIA GPU 上的推理。
-
-- **交互推理方式**：通过缓存多轮对话过程中 attention 的 k/v，记住对话历史，从而避免重复处理历史会话。
-
-- **多 GPU 部署和量化**：我们提供了全面的模型部署和量化支持，已在不同规模上完成验证。
-
-- **persistent batch 推理**：进一步优化模型执行效率。
-
-  ![PersistentBatchInference](https://github.com/InternLM/lmdeploy/assets/67539920/e3876167-0671-44fc-ac52-5a0f9382493e)
-
-## 支持的模型
-
-`LMDeploy` 支持 `TurboMind` 和 `Pytorch` 两种推理后端
-
-### TurboMind
-
-> **Note**<br />
-> W4A16 推理需要 Ampere 及以上架构的 Nvidia GPU
-
-|     模型     | 模型并行 | FP16 | KV INT8 | W4A16 | W8A8 |
-| :----------: | :------: | :--: | :-----: | :---: | :--: |
-|    Llama     |   Yes    | Yes  |   Yes   |  Yes  |  No  |
-|    Llama2    |   Yes    | Yes  |   Yes   |  Yes  |  No  |
-|    SOLAR     |   Yes    | Yes  |   Yes   |  Yes  |  No  |
-| InternLM-7B  |   Yes    | Yes  |   Yes   |  Yes  |  No  |
-| InternLM-20B |   Yes    | Yes  |   Yes   |  Yes  |  No  |
-|   QWen-7B    |   Yes    | Yes  |   Yes   |  No   |  No  |
-|   QWen-14B   |   Yes    | Yes  |   Yes   |  No   |  No  |
-| Baichuan-7B  |   Yes    | Yes  |   Yes   |  Yes  |  No  |
-| Baichuan2-7B |   Yes    | Yes  |   No    |  No   |  No  |
-|  Code Llama  |   Yes    | Yes  |   No    |  No   |  No  |
-
-### Pytorch
-
-|    模型     | 模型并行 | FP16 | KV INT8 | W4A16 | W8A8 |
-| :---------: | :------: | :--: | :-----: | :---: | :--: |
-|    Llama    |   Yes    | Yes  |   No    |  No   |  No  |
-|   Llama2    |   Yes    | Yes  |   No    |  No   |  No  |
-| InternLM-7B |   Yes    | Yes  |   No    |  No   |  No  |
-
-## 性能
-
-**场景一**: 固定的输入、输出token数（1,2048），测试 output token throughput
-
-**场景二**: 使用真实数据，测试 request throughput
-
-测试配置：LLaMA-7B, NVIDIA A100(80G)
-
-TurboMind 的 output token throughput 超过 2000 token/s, 整体比 DeepSpeed 提升约 5% - 15%，比 huggingface transformers 提升 2.3 倍
-在 request throughput 指标上，TurboMind 的效率比 vLLM 高 30%
-
-![benchmark](https://github.com/InternLM/lmdeploy/assets/4560679/7775c518-608e-4e5b-be73-7645a444e774)
-
-## 快速上手
-
-### 安装
-
-使用 pip ( python 3.8+) 安装 LMDeploy，或者[源码安装](./docs/zh_cn/build.md)
-
-```shell
-pip install lmdeploy
-```
-
-### 部署 InternLM
-
-#### 获取 InternLM 模型
-
-```shell
-# 1. 下载 InternLM 模型
-
-# Make sure you have git-lfs installed (https://git-lfs.com)
-git lfs install
-git clone https://huggingface.co/internlm/internlm-chat-7b-v1_1 /path/to/internlm-chat-7b
-
-# if you want to clone without large files – just their pointers
-# prepend your git clone with the following env var:
-GIT_LFS_SKIP_SMUDGE=1
-
-# 2. 转换为 trubomind 要求的格式。默认存放路径为 ./workspace
-lmdeploy convert internlm-chat-7b /path/to/internlm-chat-7b
-
-```
-
-#### 使用 turbomind 推理
-
-```shell
-lmdeploy chat turbomind ./workspace
-```
-
-> **Note**<br />
-> turbomind 在使用 FP16 精度推理 InternLM-7B 模型时，显存开销至少需要 15.7G。建议使用 3090, V100，A100等型号的显卡。<br />
-> 关闭显卡的 ECC 可以腾出 10% 显存，执行 `sudo nvidia-smi --ecc-config=0` 重启系统生效。
-
-> **Note**<br />
-> 使用 Tensor 并发可以利用多张 GPU 进行推理。在 `chat` 时添加参数 `--tp=<num_gpu>` 可以启动运行时 TP。
-
-#### 启动 gradio server
-
-```shell
-lmdeploy serve gradio ./workspace
-```
-
-![](https://github.com/InternLM/lmdeploy/assets/67539920/08d1e6f2-3767-44d5-8654-c85767cec2ab)
-
-#### 通过 Restful API 部署服务
-
-使用下面的命令启动推理服务：
-
-```shell
-lmdeploy serve api_server ./workspace --server_name 0.0.0.0 --server_port ${server_port} --instance_num 32 --tp 1
-```
-
-你可以通过命令行方式与推理服务进行对话：
-
-```shell
-# restful_api_url is what printed in api_server.py, e.g. http://localhost:23333
-lmdeploy serve api_client restful_api_url
-```
-
-也可以通过 WebUI 方式来对话：
-
-```shell
-# restful_api_url is what printed in api_server.py, e.g. http://localhost:23333
-# server_ip and server_port here are for gradio ui
-# example: lmdeploy serve gradio http://localhost:23333 --server_name localhost --server_port 6006 --restful_api True
-lmdeploy serve gradio restful_api_url --server_name ${server_ip} --server_port${server_port} --restful_api True
-```
-
-更多详情可以查阅 [restful_api.md](docs/zh_cn/restful_api.md)。
-
-#### 通过容器部署推理服务
-
-使用下面的命令启动推理服务：
-
-```shell
-bash workspace/service_docker_up.sh
-```
-
-你可以通过命令行方式与推理服务进行对话：
-
-```shell
-lmdeploy serve triton_client {server_ip_addresss}:33337
-```
-
-也可以通过 WebUI 方式来对话：
-
-```shell
-lmdeploy serve gradio {server_ip_addresss}:33337
-```
-
-其他模型的部署方式，比如 LLaMA，LLaMA-2，vicuna等等，请参考[这里](docs/zh_cn/serving.md)
-
-### 基于 PyTorch 的推理
-
-你必须确保环境中有安装 deepspeed：
-
-```
-pip install deepspeed
-```
-
-#### 单个 GPU
-
-```shell
-lmdeploy chat torch $NAME_OR_PATH_TO_HF_MODEL\
-    --max_new_tokens 64 \
-    --temperture 0.8 \
-    --top_p 0.95 \
-    --seed 0
-```
-
-#### 使用 DeepSpeed 实现张量并行
-
-```shell
-deepspeed --module --num_gpus 2 lmdeploy.pytorch.chat \
-    $NAME_OR_PATH_TO_HF_MODEL \
-    --max_new_tokens 64 \
-    --temperture 0.8 \
-    --top_p 0.95 \
-    --seed 0
-```
-
-## 量化部署
-
-#### 权重 INT4 量化
-
-LMDeploy 使用 [AWQ](https://arxiv.org/abs/2306.00978) 算法对模型权重进行量化
-
-[点击这里](./docs/zh_cn/w4a16.md) 查看 weight int4 用法测试结果。
-
-#### KV Cache INT8 量化
-
-[点击这里](./docs/zh_cn/kv_int8.md) 查看 kv int8 使用方法、实现公式和测试结果。
-
-> **Warning**<br />
-> 量化部署不支持运行时 Tensor 并发。如果希望使用 Tensor 并发，需要在 deploy 时配置 tp 参数。
-
-## 贡献指南
-
-我们感谢所有的贡献者为改进和提升 LMDeploy 所作出的努力。请参考[贡献指南](.github/CONTRIBUTING.md)来了解参与项目贡献的相关指引。
-
-## 致谢
-
-- [FasterTransformer](https://github.com/NVIDIA/FasterTransformer)
-- [llm-awq](https://github.com/mit-han-lab/llm-awq)
-
-## License
-
-该项目采用 [Apache 2.0 开源许可证](LICENSE)。
diff --git a/benchmark/README.md b/benchmark/README.md
deleted file mode 100644
index 3fa117210e379d96f05cc97a5647b14a706c01b8..0000000000000000000000000000000000000000
--- a/benchmark/README.md
+++ /dev/null
@@ -1,62 +0,0 @@
-# Benchmark
-
-We provide several profiling tools to benchmark our models.
-
-## profile with dataset
-
-Download the dataset below or create your own dataset.
-
-```bash
-wget https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json
-```
-
-Profiling your model with `profile_throughput.py`
-
-```bash
-python profile_throughput.py \
- ShareGPT_V3_unfiltered_cleaned_split.json \
- /path/to/your/model \
- --concurrency 64
-```
-
-## profile without dataset
-
-`profile_generation.py` perform benchmark with dummy data.
-
-```shell
-pip install nvidia-ml-py
-```
-
-```bash
-python profile_generation.py \
- --model-path /path/to/your/model \
- --concurrency 1 8 --prompt-tokens 1 512 --completion-tokens 2048 512
-```
-
-## profile serving
-
-Tools above profile models with Python API. `profile_serving.py` is used to do benchmark on serving.
-
-```bash
-wget https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json
-
-python profile_serving.py \
-    ${TritonServerAddress} \
-    /path/to/tokenizer \ # ends with .model for most models. Otherwise, please pass model_path/triton_models/tokenizer.
-    ShareGPT_V3_unfiltered_cleaned_split.json \
-    --concurrency 64
-```
-
-## profile restful api
-
-`profile_restful_api.py` is used to do benchmark on api server.
-
-```bash
-wget https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json
-
-python profile_restful_api.py \
-    ${ServerAddress} \
-    /path/to/tokenizer \ # ends with .model for most models. Otherwise, please pass model_path/triton_models/tokenizer.
-    ShareGPT_V3_unfiltered_cleaned_split.json \
-    --concurrency 64
-```
diff --git a/benchmark/benchmark_decode.py b/benchmark/benchmark_decode.py
deleted file mode 100644
index 3a9c007db805f21dcf92fecbf71a7dcd08ebe415..0000000000000000000000000000000000000000
--- a/benchmark/benchmark_decode.py
+++ /dev/null
@@ -1,90 +0,0 @@
-import json
-import pickle
-import time
-from pathlib import Path
-
-import fire
-import numpy as np
-from transformers import AutoTokenizer
-
-from lmdeploy.pytorch.decode import Engine
-
-
-def benchmark(model_path,
-              share_gpt_path,
-              downsample=100,
-              accel=None,
-              save_to='decode_result'):
-    """Benchmark using ShareGPT data.
-
-    Please download `ShareGPT_V3_unfiltered_cleaned_split.json` as data for
-    this benchmark.
-    """
-
-    start = time.monotonic()
-    content = json.load(open(share_gpt_path, 'r'))
-
-    texts = []
-    for c in content:
-        for cc in c['conversations']:
-            texts.append(cc['value'])
-
-    print(f'Parse json in {time.monotonic() - start} seconds.')
-
-    tokenizer = AutoTokenizer.from_pretrained(model_path)
-    tokenizer.pad_token_id = tokenizer.eos_token_id
-    tokenizer.padding_side = 'right'
-
-    texts = texts[::downsample]
-    input_ids = tokenizer(texts, padding=False).input_ids
-
-    print(F'Number of prompts: {len(input_ids)}')
-    print(F'Maximum length: {max(map(len, input_ids))}')
-    print(F'Total length: {sum(map(len, input_ids))}')
-
-    start = time.monotonic()
-    # Init an engine
-    engine = Engine(model_path, tokenizer=tokenizer, accel=accel)
-    # decode prompts
-    probs = engine.decode(input_ids)
-    total_tokens = sum(map(len, input_ids))
-
-    elapsed = time.monotonic() - start
-    print(f'Decoded {total_tokens} tokens in {elapsed:.1f} seconds, '
-          f'{total_tokens / elapsed:.1f} tokens/s.')
-    print(f'Decoded {len(probs)} prompts in {elapsed:.1f} seconds, '
-          f'{len(probs) / elapsed:.1f} requests/s.')
-
-    pkl_path = Path(save_to).with_suffix('.pkl')
-
-    with pkl_path.open('wb') as f:
-        pickle.dump(probs, f)
-
-    txt_path = Path(save_to).with_suffix('.txt')
-    np.savetxt(txt_path.as_posix(), probs, fmt='%.4e')
-
-
-if __name__ == '__main__':
-    fire.Fire(benchmark)
-
-    # llama-2 on 1 A100:
-    # data = ShareGPT, downsample = 100
-    # Decoded 1579536 tokens in 175.3 seconds, 9012.821089984884 tokens/s.
-    # Decoded 7022 prompts in 175.3 seconds, 40.067481648961376 requests/s.
-
-    # llama-2 on 3 A100:
-    # data = ShareGPT, downsample = 100
-    # Decoded 1579536 tokens in 77.9 seconds, 20268.736076299527 tokens/s.
-    # Decoded 7022 prompts in 77.9 seconds, 90.10688248180179 requests/s.
-
-    # llama-2 on 8 A100:
-    # data = ShareGPT, downsample = 100
-    # Decoded 1579536 tokens in 55.2 seconds, 28630.35872677815 tokens/s.
-    # Decoded 7022 prompts in 55.2 seconds, 127.27939026361929 requests/s.
-
-    # llama-2 on 8 A100:
-    # data = ShareGPT, downsample = 10
-    # Decoded 15991314 tokens in 242.7 seconds, 65893.38488718234 tokens/s.
-    # Decoded 70216 prompts in 242.7 seconds, 289.33018970413536 requests/s.
-
-    # Above time all includes time for workers to load model.
diff --git a/benchmark/profile_generation.py b/benchmark/profile_generation.py
deleted file mode 100644
index ff8d062501745288b4438b2c4ef37d4db7be4384..0000000000000000000000000000000000000000
--- a/benchmark/profile_generation.py
+++ /dev/null
@@ -1,329 +0,0 @@
-# Copyright (c) OpenMMLab. All rights reserved.
-# import multiprocessing as mp
-import argparse
-import csv
-import logging
-import os
-import os.path as osp
-import time
-from dataclasses import dataclass
-from queue import Queue
-from threading import Thread
-from typing import List
-
-import numpy as np
-from pynvml import (NVMLError, nvmlDeviceGetCount, nvmlDeviceGetHandleByIndex,
-                    nvmlDeviceGetMemoryInfo, nvmlDeviceGetName,
-                    nvmlDeviceGetPowerState, nvmlDeviceGetTemperature,
-                    nvmlInit, nvmlShutdown, nvmlSystemGetDriverVersion)
-from tqdm import tqdm
-
-from lmdeploy.tokenizer import Tokenizer
-from lmdeploy.turbomind import TurboMind
-
-
-def infer(model, session_id: int, input_ids: str, output_seqlen: int,
-          test_round: int, que: Queue):
-    chatbot = model.create_instance()
-    stats = []
-    for i in range(test_round):
-        start = time.perf_counter()
-        timestamps = []
-        tokens = []
-        for outputs in chatbot.stream_infer(session_id,
-                                            input_ids,
-                                            request_output_len=output_seqlen,
-                                            sequence_start=True,
-                                            sequence_end=True,
-                                            ignore_eos=True):
-            res, token = outputs[0]
-            timestamps.append(time.perf_counter())
-            tokens.append(token)
-
-        # TODO: ignore first token
-        first_token_latency = np.round(timestamps[0] - start, 2)
-        if len(timestamps) == 1:
-            token_latency = np.round(timestamps[0] - start, 2)
-            token = tokens[0]
-        else:
-            token_latency = np.round(timestamps[-1] - timestamps[0], 2)
-            token = tokens[-1] - tokens[0]
-        stats.append([first_token_latency, token, token_latency])
-    que.put((session_id, stats))
-
-
-def warmup(model,
-           concurrency: int,
-           input_ids: List[int],
-           output_seqlen: int,
-           warmup_round: int = 2):
-    print('start to warmup ...')
-
-    def _infer(model, session_id):
-        chatbot = model.create_instance()
-        for _ in range(warmup_round):
-            for _ in chatbot.stream_infer(session_id,
-                                          input_ids=input_ids,
-                                          request_output_len=output_seqlen,
-                                          sequence_start=True,
-                                          sequence_end=True,
-                                          ignore_eos=True):
-                continue
-
-    _start = time.perf_counter()
-    procs = []
-    for i in range(concurrency):
-        proc = Thread(target=_infer, args=(model, i + 1))
-        procs.append(proc)
-        proc.start()
-
-    try:
-        for proc in procs:
-            proc.join()
-    except Exception:
-        for proc in procs:
-            proc.stop()
-        exit(1)
-    _end = time.perf_counter()
-    print(f'end warmup, elapsed time: {round(_end - _start, 2)}s')
-
-
-def profile_throughput(model_path: str,
-                       concurrency: int = 1,
-                       input_seqlen: int = 1,
-                       output_seqlen: int = 512,
-                       test_round: int = 10,
-                       tp: int = 1):
-    tokenizer_model_path = osp.join(model_path, 'triton_models', 'tokenizer')
-    tokenizer = Tokenizer(tokenizer_model_path)
-    tm_model = TurboMind(model_path=model_path, tp=tp)
-
-    # make up a prompt that can be tokenized into {input_seqlen} tokens
-    assert input_seqlen > 0, 'input_seqlen should > 0'
-    prompt = 'hi'
-    input_ids = tokenizer.encode(prompt)
-    input_ids = input_ids * input_seqlen
-
-    warmup(tm_model, concurrency, input_ids, output_seqlen)
-
-    que = Queue()
-    procs = []
-    _start = time.perf_counter()
-
-    # TODO: update to the multithread version
-    for i in range(concurrency):
-        proc = Thread(target=infer,
-                      args=(tm_model, i + 1, input_ids, output_seqlen,
-                            test_round, que))
-        procs.append(proc)
-        proc.start()
-
-    try:
-        for proc in procs:
-            proc.join()
-    except Exception:
-        for proc in procs:
-            proc.stop()
-        exit(1)
-    _end = time.perf_counter()
-    elapsed_time = _end - _start
-
-    stats = []
-    while not que.empty():
-        session_id, _stats = que.get()
-        print(f'\n{"-" * 50}\n'
-              f'session {session_id} stats: \n{_stats}\n{"-" * 50}\n')
-        stats.append(_stats)
-
-    stats = np.array(stats).reshape(-1, 3)
-
-    first_token_latency_min = np.min(stats[:, 0], axis=0)
-    first_token_latency_max = np.max(stats[:, 0], axis=0)
-    first_token_latency_ave = np.mean(stats[:, 0], axis=0)
-    token_latency_min = np.min(stats[:, 2], axis=0)
-    token_latency_max = np.max(stats[:, 2], axis=0)
-    token_latency_ave = np.mean(stats[:, 2], axis=0)
-    throughput = np.sum(stats[:, 1], axis=0) / np.sum(stats[:, 2],
-                                                      axis=0) * concurrency
-    print(f'\n{"-" * 50}\nconcurrency: {concurrency}, input_tokens: '
-          f'{input_seqlen}, output_tokens: {output_seqlen}\n'
-          f'elapsed_time: {elapsed_time:.2f}s\n'
-          f'first_token latency(min, max, ave): '
-          f'{first_token_latency_min:.2f}s, {first_token_latency_max:.2f}s, '
-          f'{first_token_latency_ave:.2f}s\ntoken latency(min, max, ave): '
-          f'{token_latency_min:.2f}s, {token_latency_max:.2f}s, '
-          f'{token_latency_ave:.2f}s\n'
-          f'throughput: {throughput:.2f} token/s\n{"-" * 50}')
-    return tm_model.model_name, throughput, tm_model.gpu_count
-
-
-class MemoryMonitor:
-    from multiprocessing import Manager
-    max_mem = Manager().Value('f', 0)  # GB
-    device_count = Manager().Value('f', 0)
-
-    @staticmethod
-    def nvidia_info():
-        # pip install nvidia-ml-py
-        nvidia_dict = {
-            'state': True,
-            'nvidia_version': '',
-            'nvidia_count': 0,
-            'gpus': []
-        }
-        try:
-            nvmlInit()
-            nvidia_dict['nvidia_version'] = nvmlSystemGetDriverVersion()
-            nvidia_dict['nvidia_count'] = nvmlDeviceGetCount()
-            for i in range(nvidia_dict['nvidia_count']):
-                handle = nvmlDeviceGetHandleByIndex(i)
-                memory_info = nvmlDeviceGetMemoryInfo(handle)
-                gpu = {
-                    'gpu_name': nvmlDeviceGetName(handle),
-                    'total': memory_info.total,
-                    'free': memory_info.free,
-                    'used': memory_info.used,
-                    'temperature': f'{nvmlDeviceGetTemperature(handle, 0)}℃',
-                    'powerStatus': nvmlDeviceGetPowerState(handle)
-                }
-                nvidia_dict['gpus'].append(gpu)
-        except NVMLError as _:  # noqa
-            nvidia_dict['state'] = False
-        except Exception as _:  # noqa
-            nvidia_dict['state'] = False
-        finally:
-            try:
-                nvmlShutdown()
-            except:  # noqa
-                pass
-        return nvidia_dict
-
-    @classmethod
-    def mem_monitor(cls):
-        info = cls.nvidia_info()
-        max_mem = 0
-        mem_start = 0
-        cls.device_count.value = len(info['gpus'])
-        for used_total in info['gpus']:
-            mem_start += used_total['used']
-        while True:
-            info = cls.nvidia_info()
-            used = 0
-            for used_total in info['gpus']:
-                used += used_total['used']
-            if used > max_mem:
-                max_mem = used
-                cls.max_mem.value = (max_mem - mem_start) / (1 << 30)
-
-    @classmethod
-    def start(cls):
-        cls._running = True
-        from multiprocessing import Process
-        cls.proc = Process(target=cls.mem_monitor)
-        cls.proc.start()
-
-    @classmethod
-    def terminate(cls) -> float:
-        """Terminate the subprocess and return maximum memory."""
-        cls.proc.kill()
-        return cls.max_mem.value
-
-
-@dataclass
-class ProfileResult:
-    model_name: str
-    batch: int
-    prompt_tokens: int
-    completion_tokens: int
-    throughput_per_proc: float
-    throughput_per_node: float
-    mem_per_proc: float
-    mem_per_gpu: float
-    mem_per_node: float
-
-
-def parse_args():
-    parser = argparse.ArgumentParser(description='Regression Test')
-    parser.add_argument('--model-path',
-                        type=str,
-                        help='benchmark test model path')
-    parser.add_argument('--concurrency',
-                        nargs='+',
-                        type=int,
-                        help='how many requests launched concurrently',
-                        default=[1, 8, 16, 32])
-    parser.add_argument(
-        '--prompt-tokens',
-        nargs='+',
-        type=int,
-        help='how many requests launched concurrently. One-to-one'
-        'correspondence with completion-tokens',
-        default=[64, 512, 512, 1024])
-    parser.add_argument('--completion-tokens',
-                        nargs='+',
-                        type=int,
-                        help='how many tokens to be generated. One-to-one'
-                        'correspondence with prompt-tokens',
-                        default=[512, 512, 1024, 1024])
-    parser.add_argument('--tp', type=int, help='Tensor parallel', default=1)
-    parser.add_argument('--dst-csv',
-                        type=str,
-                        help='Where to save the result.',
-                        default='profile_generation.csv')
-    parser.add_argument('--log-level',
-                        help='set log level',
-                        default='INFO',
-                        choices=list(logging._nameToLevel.keys()))
-    args = parser.parse_args()
-    return args
-
-
-def main():
-    args = parse_args()
-    os.environ['TM_LOG_LEVEL'] = args.log_level
-    results: List[ProfileResult] = []
-    for batch in tqdm(args.concurrency):
-        for prompt_tokens, completion_tokens in tqdm(
-                zip(args.prompt_tokens, args.completion_tokens)):
-            MemoryMonitor.start()
-            from functools import partial
-            from multiprocessing import Pool
-            profile_target = partial(profile_throughput,
-                                     concurrency=batch,
-                                     input_seqlen=prompt_tokens,
-                                     output_seqlen=completion_tokens,
-                                     tp=args.tp)
-            output = Pool(1).map(profile_target, (args.model_path, ))
-            model_name, throughput_per_proc, tp = output[0]
-            time.sleep(5)  # wait a while for releasing GPU mem
-            memory = MemoryMonitor.terminate()
-            device_count = MemoryMonitor.device_count.value
-            results.append(
-                ProfileResult(model_name=model_name,
-                              batch=batch,
-                              prompt_tokens=prompt_tokens,
-                              completion_tokens=completion_tokens,
-                              throughput_per_proc=throughput_per_proc,
-                              throughput_per_node=throughput_per_proc / tp *
-                              device_count,
-                              mem_per_proc=memory,
-                              mem_per_gpu=memory / tp,
-                              mem_per_node=memory / tp * device_count))
-    with open(args.dst_csv, 'w') as csvfile:
-        writer = csv.writer(csvfile)
-        writer.writerow([
-            'batch', 'prompt_tokens', 'completion_tokens',
-            'throughput_per_proc(token/s)', 'throughput_per_node(token/s)',
-            'mem_per_proc(GB)', 'mem_per_gpu(GB)', 'mem_per_node(GB)'
-        ])
-        for re in results:
-            writer.writerow([
-                re.batch, re.prompt_tokens, re.completion_tokens,
-                f'{re.throughput_per_proc:.2f}',
-                f'{re.throughput_per_node:.2f}', f'{re.mem_per_proc:.2f}',
-                f'{re.mem_per_gpu:.2f}', f'{re.mem_per_node:.2f}'
-            ])
-
-
-if __name__ == '__main__':
-    main()
diff --git a/benchmark/profile_hf_generation.py b/benchmark/profile_hf_generation.py
deleted file mode 100644
index 1c6704bc4663746779f6565895ba3c6b0b9acbfb..0000000000000000000000000000000000000000
--- a/benchmark/profile_hf_generation.py
+++ /dev/null
@@ -1,290 +0,0 @@
-"""Benchmark huggingface models and maybe speedup by deepspeed.
-
-Theoretically, this tool is compatible with all huggingface models.
-
-Example 1: Test huggingface llama2 with simulated 128 input token and 128 generated token
-
-```shell
-python profile_hf_generation.py \
-    --model_path $PATH_TO_HF_LLAMA2 \
-    --batch_size 16 \
-    --input_seqlen 128 \
-    --gen_seqlen 128 \
-    --out_file profile_hf.csv
-```
-
-Example 2: Same test above but accelerated with DeepSpeed inference and more round to test
-
-```shell
-python profile_hf_generation.py \
-    --model_path $PATH_TO_HF_LLAMA2 \
-    --batch_size 16 \
-    --input_seqlen 128 \
-    --gen_seqlen 128 \
-    --test_round 2 \
-    --accel deepspeed \
-    --out_file profile_hf.csv
-```
-
-Example 3: Same test above but do not use streamer to measure time of every token
-        Only overall time is measured, a little bit faster than streaming mode
-
-```shell
-python profile_hf_generation.py \
-    --model_path $PATH_TO_HF_LLAMA2 \
-    --batch_size 16 \
-    --input_seqlen 128 \
-    --gen_seqlen 128 \
-    --test_round 2 \
-    --accel deepspeed \
-    --no-streamer \
-    --out_file profile_hf.csv
-```
-
-Result will be saved in `profile_hf.csv`, which is a comma-separated file
-with the following fields.
-
-1. model: name of model, specified with --model_log_name or otherwise f"{model.__class__.__name__}-{accel}"
-2. batch_size: as name
-3. input_seqlen: as name
-4. gen_len: length of sequence length to generate
-5. total_len: gen_len + input_len
-
-In total, the model will take a random input ids of shape (batch_size, input_seqlen) and
-run forward `gen_len` times to generate output ids of shape (batch_size, gen_len)
-
-6. first_time: latency to forward the first (batch_size, input_seqlen) ids
-    to get `input_seqlen+1`-th batch of output of shape (batch_size, 1)
-7. next_time: average latency of the next samples (averaged of 5 sample),
-    this measure latency when context length is short
-8. last_time: average latency of the last samples (averaged of 5 sample),
-    this measure latency when context length is long
-9. total time: total time to generate all tokens
-10. throughput(total): bs * total_len / total_time (same measure as vllm)
-11. throughput(gen): bs * gen_len / total_time
-"""   # noqa: E501
-
-import csv
-import logging
-import os
-import time
-from typing import Optional
-
-import fire
-import torch
-from transformers import AutoModelForCausalLM, GenerationConfig
-
-from lmdeploy.pytorch.accel import LoadNoInit
-
-logger = logging.getLogger(__file__)
-logger.setLevel(logging.DEBUG)
-info = logger.info
-warning = logger.warning
-debug = logger.debug
-cinfo = lambda x: info('\033[1;32m' + x + '\033[0m')  # noqa E731
-cprint = lambda x: print('\033[1;32m' + x + '\033[0m')  # noqa E731
-avg = lambda x: sum(x) / len(x)  # noqa E731
-
-
-class TimingStreamer:
-    """Timing helper for HuggingFace models."""
-
-    def __init__(self) -> None:
-        # self.token_cache = []
-        # self.tokens = None
-
-        torch.cuda.synchronize()
-
-        self.evts = []
-        # self.value = 0
-
-    def put(self, value):
-        """
-        Notes:
-            When `put` is called for the first time, no prompt is feed to the model yet.
-            When `put` is called later, event is recorded for the previous generation,
-                which means the second event records the time for the first prompt.
-
-            GenerationMixin will call `.cpu()` on output token which implies a sync.
-        """  # noqa: E501
-        # self.value += 1
-        # self.token_cache.append(value)
-        evt = torch.cuda.Event(enable_timing=True)
-        evt.record()
-        self.evts.append(evt)
-
-    def end(self):
-        torch.cuda.synchronize()
-        # self.tokens = torch.hstack([_atleast_2d(v) for v in self.token_cache])    # noqa: E501
-
-    def get_times(self):
-        """Maybe deprecated.
-
-        Returns:
-          a list of times in ms for (first prompt, avg next token, total time)
-        """
-        first = self.evts[0].elapsed_time(self.evts[1])
-        rest = [
-            self.evts[i].elapsed_time(self.evts[i + 1])
-            for i in range(1,
-                           len(self.evts) - 1)
-        ]
-        avg = sum(rest) / len(rest)
-        return first + sum(rest), first, avg
-
-    def raw_times(self):
-        """
-        Returns:
-          a list of times in ms.
-        """
-        evts = self.evts
-        r = [evts[i].elapsed_time(evts[i + 1]) for i in range(len(evts) - 1)]
-        return r
-
-
-class CSVWritter:
-
-    def __init__(
-        self,
-        file='unnamed.csv',
-        header=[
-            'model',
-            'batch_size',
-            'input_seqlen',
-            'gen_len',
-            'total_len',
-            'first_time',
-            'next_time',
-            'last_time',
-            'total time',
-            'throughput(total)',
-            'throughput(gen)',
-        ],
-    ):
-        if self.on_master:
-            self.file = file
-            csv.writer(open(file, 'a')).writerow(header)
-
-    def write(self, line):
-        if self.on_master:
-            csv.writer(open(self.file, 'a')).writerow(line)
-
-    @property
-    def on_master(self):
-        # return not torch.distributed.is_initialized() or torch.distributed.get_rank() == 0  # noqa: E501
-        rank = int(os.environ.get('RANK', 0))
-        return rank == 0
-
-
-def init_hf_model(model_path: str):
-    start = time.monotonic()
-    with LoadNoInit():
-        model = AutoModelForCausalLM.from_pretrained(model_path,
-                                                     torch_dtype=torch.float16,
-                                                     trust_remote_code=True)
-    print(f'load model in {time.monotonic() -start} s')
-    return model
-
-
-def accel_deepspeed(model, max_out_tokens, tp_size=1):
-    import deepspeed
-    ds_model = deepspeed.init_inference(
-        model=model,  # Transformers models
-        tensor_parallel={'tp_size': tp_size},
-        dtype=torch.float16,  # dtype of the weights (fp16)
-        replace_with_kernel_inject=True,
-        max_out_tokens=max_out_tokens,
-    )
-
-    return ds_model
-
-
-def main(model_path: str,
-         batch_size: int,
-         input_seqlen: int,
-         gen_seqlen: int,
-         test_round: int = 1,
-         accel: Optional[str] = None,
-         out_file: Optional[str] = 'profile_hf.csv',
-         model_log_name: Optional[str] = None,
-         no_streamer: bool = False):
-
-    total_seqlen = input_seqlen + gen_seqlen
-
-    model = init_hf_model(model_path)
-
-    vocab_size = model.config.vocab_size
-    if model_log_name is None:
-        model_log_name = model.__class__.__name__
-        if accel is not None:
-            model_log_name += f'-{accel}'
-
-    if accel is None:
-        model = model.cuda()
-    elif accel == 'deepspeed':
-        model = accel_deepspeed(model, total_seqlen + 6)
-        # longer total seqlen for fault tolerance
-    else:
-        raise NotImplementedError(f'accel {accel} not supported.')
-
-    # log to file
-    csvwritter = CSVWritter(out_file)
-
-    cprint('Benchmarking {} '
-           f'with batch_size={batch_size}, input_seqlen={input_seqlen}, '
-           f'gen_seqlen={gen_seqlen}, accel={accel}')
-    for r in range(test_round):
-        # TODO: now write every round to csv
-        # Use external tool for analysis
-
-        cprint(f'Test round {r}')
-        # input_id = 0 sometimes cause some cuda error
-        fake_inputs = torch.randint(10, vocab_size, (batch_size, input_seqlen))
-        fake_inputs = fake_inputs.cuda()
-
-        ts = TimingStreamer() if not no_streamer else None
-
-        torch.cuda.synchronize()
-        start = time.monotonic()
-        fake_outputs = model.generate(
-            fake_inputs,
-            GenerationConfig(max_new_tokens=gen_seqlen,
-                             do_sample=False,
-                             eos_token_id=[-1]),
-            streamer=ts,
-        )
-        torch.cuda.synchronize()
-        end = time.monotonic()
-        assert fake_outputs.size() == (batch_size,
-                                       total_seqlen), fake_outputs.size()
-
-        # total_time, first_time, _ = ts.get_times()
-        if no_streamer:
-            total_time = (end - start) * 1000
-            first_time = next_time = last_time = 0
-        else:
-            raw_times = ts.raw_times()  # You may further analyze this
-            total_time = sum(raw_times)
-            first_time = raw_times[0]
-            next_time = avg(raw_times[1:6])
-            last_time = avg(raw_times[-5:])
-        tt = batch_size * total_seqlen * 1000 / total_time
-        tg = batch_size * gen_seqlen * 1000 / total_time
-        cprint(f'First token/ms: {first_time:.1f}, '
-               f'Next tokens/ms: {next_time:.1f}, '
-               f'Last tokens/ms: {last_time:.1f}, '
-               f'Total Time/ms: {total_time:5.3f}, '
-               f'Throughput Total(tok/s): {tt:5.3f}, '
-               f'Throughput Gen(tok/s): {tg:5.3f}')
-
-        if test_round > 1 and r > 0:
-            # First round is warm up
-            csvwritter.write([
-                model_log_name, batch_size, input_seqlen, gen_seqlen,
-                total_seqlen, first_time, next_time, last_time, total_time, tt,
-                tg
-            ])
-
-
-if __name__ == '__main__':
-    fire.Fire(main)
diff --git a/benchmark/profile_restful_api.py b/benchmark/profile_restful_api.py
deleted file mode 100644
index d1f6ebf80e766d6c6e179ea9e90b397c21e9fed1..0000000000000000000000000000000000000000
--- a/benchmark/profile_restful_api.py
+++ /dev/null
@@ -1,196 +0,0 @@
-import json
-import multiprocessing as mp
-import random
-import time
-from typing import Iterable, List
-
-import fire
-import numpy as np
-import requests
-
-from lmdeploy.tokenizer import Tokenizer
-from lmdeploy.utils import get_logger
-
-
-def get_streaming_response(prompt: str,
-                           api_url: str,
-                           session_id: int,
-                           request_output_len: int,
-                           stream: bool = True,
-                           sequence_start: bool = True,
-                           sequence_end: bool = False,
-                           ignore_eos: bool = False) -> Iterable[List[str]]:
-    headers = {'User-Agent': 'Test Client'}
-    pload = {
-        'prompt': prompt,
-        'stream': stream,
-        'session_id': session_id,
-        'request_output_len': request_output_len,
-        'sequence_start': sequence_start,
-        'sequence_end': sequence_end,
-        'ignore_eos': ignore_eos
-    }
-    response = requests.post(api_url,
-                             headers=headers,
-                             json=pload,
-                             stream=stream)
-    for chunk in response.iter_lines(chunk_size=8192,
-                                     decode_unicode=False,
-                                     delimiter=b'\n'):
-        if chunk:
-            data = json.loads(chunk.decode('utf-8'))
-            output = data['text']
-            tokens = data['tokens']
-            yield output, tokens
-
-
-def infer(server_addr: str, session_id: int, req_queue: mp.Queue,
-          res_que: mp.Queue):
-    stats = []
-    while not req_queue.empty():
-        prompt, input_seqlen, output_seqlen = req_queue.get()
-        get_logger('profile_restful_api').info(
-            f'request info: session {session_id}, '
-            f'input_seqlen {input_seqlen}, output_seqlen {output_seqlen}')
-        timestamps = []
-        tokens = []
-        start = time.perf_counter()
-        for res, token in get_streaming_response(
-                prompt,
-                server_addr,
-                session_id,
-                request_output_len=output_seqlen,
-                sequence_start=True,
-                sequence_end=True):
-            timestamps.append(time.perf_counter())
-            tokens.append(token)
-
-        first_token_latency = timestamps[1] - start
-        token_latency = timestamps[-1] - timestamps[0]
-        token = tokens[-1] - tokens[0]
-        stats.append([first_token_latency, token, token_latency])
-    res_que.put((session_id, stats))
-
-
-def warmup(server_addr: str,
-           concurrency: int,
-           output_seqlen: int,
-           warmup_round: int = 1):
-    print('start to warmup ...')
-
-    def _infer(server_addr, session_id):
-        for _ in range(warmup_round):
-            for _, _ in get_streaming_response(
-                    '',
-                    server_addr,
-                    session_id,
-                    request_output_len=output_seqlen,
-                    sequence_start=True,
-                    sequence_end=True):
-                continue
-
-    _start = time.perf_counter()
-    procs = []
-    for i in range(concurrency):
-        proc = mp.Process(target=_infer, args=(server_addr, i + 1))
-        procs.append(proc)
-        proc.start()
-    for proc in procs:
-        proc.join()
-    _end = time.perf_counter()
-    print(f'end warmup, elapsed time: {round(_end - _start, 2)} s')
-
-
-def read_dataset(tokenizer_path: str, dataset_path: str, samples: int,
-                 session_len: int):
-    start = time.perf_counter()
-    with open(dataset_path) as f:
-        dataset = json.load(f)
-        dataset = [data for data in dataset if len(data['conversations']) >= 2]
-        # Only keep the first two turns of each conversation.
-        dataset = [(data['conversations'][0]['value'],
-                    data['conversations'][1]['value']) for data in dataset]
-        prompts = [prompt for prompt, _ in dataset]
-        completions = [completion for _, completion in dataset]
-        print(f'elapsed time for read data: '
-              f'{round(time.perf_counter() - start, 2)} s')
-
-    start = time.perf_counter()
-    tokenizer = Tokenizer(tokenizer_path)
-    prompts_token_lens = [len(tokenizer.encode(prompt)) for prompt in prompts]
-    completions_token_lens = [
-        len(tokenizer.encode(prompt)) for prompt in completions
-    ]
-    print(f'elapsed time for tokenization: '
-          f'{round(time.perf_counter() - start, 2)} s')
-
-    start = time.perf_counter()
-    filtered_dataset = []
-    for (prompt, _), input_len, output_len in zip(dataset, prompts_token_lens,
-                                                  completions_token_lens):
-        if input_len + output_len > session_len:
-            # ignore too long conversation
-            continue
-        filtered_dataset.append([prompt, input_len, output_len])
-
-    if samples > 0:
-        filtered_dataset = random.sample(filtered_dataset, samples)
-
-    que = mp.Queue()
-    for data in filtered_dataset:
-        que.put(data)
-    print(f'elapsed time for filtering: '
-          f'{round(time.perf_counter() - start, 2)} s')
-    return que, len(filtered_dataset)
-
-
-def main(server_addr: str,
-         tokenizer_path: str,
-         dataset_path: str,
-         concurrency: int = 1,
-         session_len: int = 2048,
-         samples: int = 1000):
-    api_url = server_addr + '/generate'
-    warmup(api_url, concurrency, session_len - 1)
-    req_queue, n_req = read_dataset(tokenizer_path, dataset_path, samples,
-                                    session_len)
-    res_que = mp.Queue()
-    procs = []
-    _start = time.perf_counter()
-    for i in range(concurrency):
-        proc = mp.Process(target=infer,
-                          args=(api_url, i + 1, req_queue, res_que))
-        procs.append(proc)
-        proc.start()
-    for proc in procs:
-        proc.join()
-    _end = time.perf_counter()
-    elapsed_time = _end - _start
-
-    stats = []
-    while not res_que.empty():
-        session_id, _stats = res_que.get()
-        print(f'\n{"-" * 50}\n'
-              f'session {session_id} stats: \n{_stats}\n{"-" * 50}\n')
-        stats.append(np.array(_stats))
-
-    stats = np.concatenate(stats).reshape(-1, 3)
-
-    first_token_latency_min = np.min(stats[:, 0], axis=0)
-    first_token_latency_max = np.max(stats[:, 0], axis=0)
-    first_token_latency_ave = np.mean(stats[:, 0], axis=0)
-    token_throughput = np.sum(stats[:, 1], axis=0) / elapsed_time
-    req_throughput = n_req / elapsed_time
-
-    print(f'\n{"-" * 50}\nconcurrency: {concurrency}\n'
-          f'elapsed_time: {elapsed_time:.2f}s\n'
-          f'first_token latency(min, max, ave): '
-          f'{first_token_latency_min:.2f}s, {first_token_latency_max:.2f}s, '
-          f'{first_token_latency_ave:.2f}s\n'
-          f'token throughput: {token_throughput:.2f} token/s\n'
-          f'req throughput: {req_throughput:.2f} req/s\n'
-          f'{"-" * 50}\n')
-
-
-if __name__ == '__main__':
-    fire.Fire(main)
diff --git a/benchmark/profile_serving.py b/benchmark/profile_serving.py
deleted file mode 100644
index 4580757eebe1dd5de7bba9d0dad1b69b8931ed8d..0000000000000000000000000000000000000000
--- a/benchmark/profile_serving.py
+++ /dev/null
@@ -1,178 +0,0 @@
-import json
-import logging
-import multiprocessing as mp
-import random
-import time
-
-import fire
-import numpy as np
-
-from lmdeploy.serve.turbomind.chatbot import Chatbot
-from lmdeploy.tokenizer import Tokenizer
-
-
-def infer(chatbot, session_id: int, req_que: mp.Queue, res_que: mp.Queue):
-    stats = []
-    for prompt, input_seqlen, output_seqlen in iter(req_que.get,
-                                                    [None, None, None]):
-        timestamps = []
-        tokens = []
-        start = time.perf_counter()
-        for status, res, token in chatbot.stream_infer(
-                session_id,
-                prompt,
-                request_output_len=output_seqlen,
-                sequence_start=True,
-                sequence_end=True):
-            timestamps.append(time.perf_counter())
-            tokens.append(token)
-
-        first_token_latency = np.round(timestamps[1] - start, 3)
-        token_latency = np.round(timestamps[-1] - timestamps[0], 3)
-        token = tokens[-1] - tokens[0]
-        stats.append([first_token_latency, token, token_latency])
-        print(f'session {session_id}: '
-              f'input_seqlen {input_seqlen}, output_seqlen {output_seqlen}')
-    res_que.put((session_id, stats))
-
-
-def warmup(tritonserver_addr: str,
-           concurrency: int,
-           output_seqlen: int,
-           warmup_round: int = 1):
-    print('start to warmup ...')
-
-    def _infer(_chatbot, session_id):
-        for _ in range(warmup_round):
-            for _, _, _ in _chatbot.stream_infer(
-                    session_id,
-                    prompt='',
-                    request_output_len=output_seqlen,
-                    sequence_start=True,
-                    sequence_end=True):
-                continue
-            _chatbot.reset_session()
-
-    _start = time.perf_counter()
-    chatbots = [
-        Chatbot(tritonserver_addr=tritonserver_addr,
-                ignore_eos=True,
-                log_level=logging.ERROR,
-                profile_generation=True) for _ in range(concurrency)
-    ]
-    procs = []
-    for i, chatbot in enumerate(chatbots):
-        proc = mp.Process(target=_infer, args=(chatbot, i + 1))
-        procs.append(proc)
-        proc.start()
-    for proc in procs:
-        proc.join()
-    _end = time.perf_counter()
-    print(f'end warmup, elapsed time: {round(_end - _start, 2)} s')
-
-
-def read_dataset(tokenizer_path: str, dataset_path: str, samples: int,
-                 session_len: int, que: mp.Queue):
-    start = time.perf_counter()
-    with open(dataset_path) as f:
-        dataset = json.load(f)
-        dataset = [data for data in dataset if len(data['conversations']) >= 2]
-        # Only keep the first two turns of each conversation.
-        dataset = [(data['conversations'][0]['value'],
-                    data['conversations'][1]['value']) for data in dataset]
-        prompts = [prompt for prompt, _ in dataset]
-        completions = [completion for _, completion in dataset]
-        print(f'elapsed time for read data: '
-              f'{round(time.perf_counter() - start, 2)} s')
-
-    start = time.perf_counter()
-    tokenizer = Tokenizer(tokenizer_path)
-    prompts_token_lens = [len(tokenizer.encode(prompt)) for prompt in prompts]
-    completions_token_lens = [
-        len(tokenizer.encode(prompt)) for prompt in completions
-    ]
-    print(f'elapsed time for tokenization: '
-          f'{round(time.perf_counter() - start, 2)} s')
-
-    start = time.perf_counter()
-    filtered_dataset = []
-    for (prompt, _), input_len, output_len in zip(dataset, prompts_token_lens,
-                                                  completions_token_lens):
-        if input_len + output_len > session_len:
-            # ignore too long conversation
-            continue
-        filtered_dataset.append([prompt, input_len, output_len])
-
-    if samples > 0:
-        filtered_dataset = random.sample(filtered_dataset, samples)
-
-    for data in filtered_dataset:
-        que.put(data)
-    print(f'elapsed time for filtering: '
-          f'{round(time.perf_counter() - start, 2)} s')
-    return len(filtered_dataset)
-
-
-def main(tritonserver_addr: str,
-         tokenizer_path: str,
-         dataset_path: str,
-         concurrency: int = 1,
-         session_len: int = 2048,
-         samples: int = 1000):
-    warmup(tritonserver_addr, concurrency, session_len - 1)
-    req_que = mp.Queue()
-    res_que = mp.Queue()
-
-    procs = []
-    _start = time.perf_counter()
-    for i in range(concurrency):
-        chatbot = Chatbot(tritonserver_addr=tritonserver_addr,
-                          display=False,
-                          profile_serving=True,
-                          ignore_eos=True,
-                          log_level=logging.ERROR)
-        proc = mp.Process(target=infer,
-                          args=(chatbot, i + 1, req_que, res_que))
-        procs.append(proc)
-        proc.start()
-
-    # read data and put it to queue
-    n_req = read_dataset(tokenizer_path, dataset_path, samples, session_len,
-                         req_que)
-    for i in range(concurrency):
-        req_que.put([None, None, None])
-
-    stats = []
-    for i in range(concurrency):
-        session_id, _stats = res_que.get()
-        print(f'\n{"-" * 50}\n'
-              f'session {session_id}: processed reqs {len(_stats)}, '
-              f'stats: \n{_stats}\n{"-" * 50}\n')
-        stats.append(np.array(_stats))
-
-    _end = time.perf_counter()
-    elapsed_time = _end - _start
-
-    stats = np.concatenate(stats).reshape(-1, 3)
-
-    first_token_latency_min = np.min(stats[:, 0], axis=0)
-    first_token_latency_max = np.max(stats[:, 0], axis=0)
-    first_token_latency_ave = np.mean(stats[:, 0], axis=0)
-    token_throughput = np.sum(stats[:, 1], axis=0) / elapsed_time
-    req_throughput = n_req / elapsed_time
-
-    print(f'\n{"-" * 50}\nconcurrency: {concurrency}\n'
-          f'elapsed_time: {elapsed_time:.3f}s\n'
-          f'first_token latency(min, max, ave): '
-          f'{first_token_latency_min:.3f}s, {first_token_latency_max:.3f}s, '
-          f'{first_token_latency_ave:.3f}s\n'
-          f'token throughput: {token_throughput:.3f} token/s\n'
-          f'req throughput: {req_throughput:.3f} req/s\n'
-          f'{"-" * 50}\n')
-
-    for proc in procs:
-        proc.join()
-
-
-if __name__ == '__main__':
-    fire.Fire(main)
diff --git a/benchmark/profile_throughput.py b/benchmark/profile_throughput.py
deleted file mode 100644
index 9d92b31fa351a4f5eec7ce9d9a6c197816f4bc23..0000000000000000000000000000000000000000
--- a/benchmark/profile_throughput.py
+++ /dev/null
@@ -1,143 +0,0 @@
-import json
-import os.path as osp
-import random
-import time
-from queue import Queue
-from threading import Thread
-from typing import List, Tuple
-
-import fire
-
-from lmdeploy.tokenizer import Tokenizer
-from lmdeploy.turbomind import TurboMind
-
-
-def sample_requests(
-    dataset_path: str,
-    num_requests: int,
-    tokenizer: Tokenizer,
-) -> List[Tuple[str, int, int]]:
-    # Load the dataset.
-    with open(dataset_path) as f:
-        dataset = json.load(f)
-    # Filter out the conversations with less than 2 turns.
-    dataset = [data for data in dataset if len(data['conversations']) >= 2]
-    # Only keep the first two turns of each conversation.
-    dataset = [(data['conversations'][0]['value'],
-                data['conversations'][1]['value']) for data in dataset]
-
-    # Tokenize the prompts and completions.
-    prompts = [prompt for prompt, _ in dataset]
-    prompt_token_ids = tokenizer(prompts).input_ids
-    completions = [completion for _, completion in dataset]
-    completion_token_ids = tokenizer(completions).input_ids
-    tokenized_dataset = []
-    for i in range(len(dataset)):
-        output_len = len(completion_token_ids[i])
-        tokenized_dataset.append((prompts[i], prompt_token_ids[i], output_len))
-
-    # Filter out too long sequences.
-    filtered_dataset: List[Tuple[str, int, int]] = []
-    for prompt, prompt_token_ids, output_len in tokenized_dataset:
-        prompt_len = len(prompt_token_ids)
-        if prompt_len < 4 or output_len < 4:
-            # Prune too short sequences.
-            continue
-        if prompt_len > 1024 or prompt_len + output_len > 2048:
-            # Prune too long sequences.
-            continue
-        filtered_dataset.append((prompt, prompt_len, output_len))
-
-    # Sample the requests.
-    sampled_requests = random.sample(filtered_dataset, num_requests)
-    return sampled_requests
-
-
-class Engine:
-
-    def __init__(self, model_path: str, tp: int = 1):
-        tokenizer_model_path = osp.join(model_path, 'triton_models',
-                                        'tokenizer')
-        tokenizer = Tokenizer(tokenizer_model_path)
-        tm_model = TurboMind(model_path=model_path, tp=tp)
-        self.tm_model = tm_model
-        self.tokenizer = tokenizer
-
-    def _inference(self, queue, session_id: int):
-
-        model_inst = self.tm_model.create_instance()
-        while True:
-            request = queue.get()
-            if request is None:
-                # stop signal
-                queue.put(None)
-                return
-            else:
-                prompt, _, output_seqlen = request
-                input_ids = self.tokenizer.encode(prompt)
-
-                for outputs in model_inst.stream_infer(
-                        session_id,
-                        input_ids=input_ids,
-                        request_output_len=output_seqlen,
-                        temperature=1.0,
-                        top_p=1.0,
-                        sequence_start=True,
-                        sequence_end=True,
-                        ignore_eos=True):
-                    res, tokens = outputs[0]
-                    self.tokenizer.decode(res)
-
-    def process_request(self, requests, concurrency: int = 1):
-        q = Queue()
-        threads = []
-
-        start = time.time()
-
-        # start threads
-        for i in range(concurrency):
-            t = Thread(target=self._inference, args=(q, i))
-            t.start()
-            threads.append(t)
-
-        # feed request to q
-        for req in requests:
-            q.put(req)
-
-        q.put(None)
-
-        # wait for finish
-        for t in threads:
-            t.join()
-
-        end = time.time()
-
-        return end - start
-
-
-def main(dataset: str,
-         model_path: str,
-         concurrency: int = 1,
-         num_prompts: int = 1000,
-         tp: int = 1):
-
-    engine = Engine(model_path, tp=tp)
-    tokenizer = engine.tokenizer
-
-    requests = sample_requests(dataset, num_prompts, tokenizer)
-
-    elapsed_time = engine.process_request(requests, concurrency)
-    total_num_tokens = sum(prompt_len + output_len
-                           for _, prompt_len, output_len in requests)
-    total_num_out_tokens = sum(output_len for _, _, output_len in requests)
-    print(f'Throughput requests: {len(requests) / elapsed_time:.2f} req/s')
-    print(
-        f'Throughput requests: {len(requests) * 60 / elapsed_time:.2f} req/min'
-    )
-    print(f'Throughput tokens: {total_num_tokens / elapsed_time:.2f} tokens/s')
-    print('Throughput tokens(output only):'
-          f'{total_num_out_tokens / elapsed_time:.2f} tokens/s')
-
-
-if __name__ == '__main__':
-    fire.Fire(main)
diff --git a/builder/manywheel/Dockerfile_2014 b/builder/manywheel/Dockerfile_2014
deleted file mode 100644
index 9dadc34432bd6a22917338b30740bcfda0dd8e39..0000000000000000000000000000000000000000
--- a/builder/manywheel/Dockerfile_2014
+++ /dev/null
@@ -1,49 +0,0 @@
-# syntax = docker/dockerfile:experimental
-FROM quay.io/pypa/manylinux2014_x86_64 as base
-ARG DEVTOOLSET_VERSION=9
-ARG BASE_CUDA_VERSION=11.8
-
-ENV LC_ALL en_US.UTF-8
-ENV LANG en_US.UTF-8
-ENV LANGUAGE en_US.UTF-8
-
-# gcc/g++
-RUN yum install -y \
-    devtoolset-${DEVTOOLSET_VERSION}-gcc \
-    devtoolset-${DEVTOOLSET_VERSION}-gcc-c++ \
-    devtoolset-${DEVTOOLSET_VERSION}-gcc-gfortran \
-    devtoolset-${DEVTOOLSET_VERSION}-binutils
-ENV PATH=/opt/rh/devtoolset-${DEVTOOLSET_VERSION}/root/usr/bin:$PATH
-ENV LD_LIBRARY_PATH=/opt/rh/devtoolset-${DEVTOOLSET_VERSION}/root/usr/lib64:/opt/rh/devtoolset-${DEVTOOLSET_VERSION}/root/usr/lib:$LD_LIBRARY_PATH
-
-RUN yum install -y wget rapidjson-devel glog-devel
-
-FROM base as cuda
-ADD manywheel/scripts/install_cuda.sh install_cuda.sh
-RUN bash ./install_cuda.sh ${BASE_CUDA_VERSION} && rm install_cuda.sh
-
-FROM base as conda
-ADD manywheel/scripts/install_conda.sh install_conda.sh
-RUN bash ./install_conda.sh && rm install_conda.sh
-RUN
-RUN /opt/conda/bin/conda create -n py38 python=3.8 -yq && \
-    /opt/conda/envs/py38/bin/pip install pybind11 && \
-    /opt/conda/bin/conda create -n py39 python=3.9 -yq && \
-    /opt/conda/envs/py39/bin/pip install pybind11 && \
-    /opt/conda/bin/conda create -n py310 python=3.10 -yq && \
-    /opt/conda/envs/py310/bin/pip install pybind11 && \
-    /opt/conda/bin/conda create -n py311 python=3.11 -yq && \
-    /opt/conda/envs/py311/bin/pip install pybind11
-
-FROM base as mpi
-ADD manywheel/scripts/install_openmpi.sh install_openmpi.sh
-RUN bash ./install_openmpi.sh && rm install_openmpi.sh
-
-FROM base as cuda_final
-COPY --from=cuda            /usr/local/cuda-${BASE_CUDA_VERSION}  /usr/local/cuda-${BASE_CUDA_VERSION}
-RUN ln -sf /usr/local/cuda-${BASE_CUDA_VERSION} /usr/local/cuda
-ENV PATH=/usr/local/cuda/bin:$PATH
-COPY --from=conda           /opt/conda                            /opt/conda
-RUN /opt/conda/bin/conda init bash
-COPY --from=mpi             /usr/local/mpi                        /usr/local/mpi
-ENV PATH=/usr/local/mpi/bin:$PATH
diff --git a/builder/manywheel/README.md b/builder/manywheel/README.md
deleted file mode 100644
index f665f5bd54d318bf3f3997f1fb8b353533362236..0000000000000000000000000000000000000000
--- a/builder/manywheel/README.md
+++ /dev/null
@@ -1,23 +0,0 @@
-# Build lmdeploy manylinux wheel
-
-## Prepare docker image
-
-To build all docker images you can use the convenient script:
-
-```bash
-./build_all_docker.sh
-# Build with pushing
-WITH_PUSH=true ./build_all_docker.sh
-```
-
-To build a docker image with specific cuda version or manylinux-docker version, you may use:
-
-```bash
-MANY_LINUX_VERSION=2014 GPU_ARCH_VERSION=11.8 ./build_docker.sh
-```
-
-## Build lmdeploy wheel
-
-```bash
-./build_all_wheel.sh
-```
diff --git a/builder/manywheel/build_all_docker.sh b/builder/manywheel/build_all_docker.sh
deleted file mode 100755
index e15ab994deffd392dba70437aa1c25262faed2a9..0000000000000000000000000000000000000000
--- a/builder/manywheel/build_all_docker.sh
+++ /dev/null
@@ -1,9 +0,0 @@
-#!/usr/bin/env bash
-
-set -eou pipefail
-
-TOPDIR=$(git rev-parse --show-toplevel)/builder
-
-for cuda_version in 11.8; do
-    MANY_LINUX_VERSION=2014 GPU_ARCH_VERSION="${cuda_version}" "${TOPDIR}/manywheel/build_docker.sh"
-done
diff --git a/builder/manywheel/build_all_wheel.sh b/builder/manywheel/build_all_wheel.sh
deleted file mode 100755
index 967743f4c07a81765fcdca9b7145c708ce5a0cd5..0000000000000000000000000000000000000000
--- a/builder/manywheel/build_all_wheel.sh
+++ /dev/null
@@ -1,15 +0,0 @@
-#!/usr/bin/env bash
-
-set -eou pipefail
-
-TOPDIR=$(git rev-parse --show-toplevel)/builder
-
-PLAT_NAME=manylinux2014_x86_64
-for cuver in 11.8; do
-    DOCKER_TAG=cuda${cuver}
-    OUTPUT_FOLDER=cuda${cuver}_dist
-    for pyver in py38 py39 py310 py311; do
-        bash ${TOPDIR}/manywheel/build_wheel.sh ${pyver} ${PLAT_NAME} ${DOCKER_TAG} ${OUTPUT_FOLDER} \
-            |& tee ${PLAT_NAME}.${pyver}.cuda${cuver}.log.txt
-    done
-done
diff --git a/builder/manywheel/build_docker.sh b/builder/manywheel/build_docker.sh
deleted file mode 100755
index 59f85b4d65c5d3a885cab336d4ae7e9dd0a6649e..0000000000000000000000000000000000000000
--- a/builder/manywheel/build_docker.sh
+++ /dev/null
@@ -1,36 +0,0 @@
-#!/usr/bin/env bash
-
-set -eou pipefail
-
-TOPDIR=$(git rev-parse --show-toplevel)/builder
-GPU_ARCH_VERSION=${GPU_ARCH_VERSION}
-WITH_PUSH=${WITH_PUSH:-}
-
-TARGET=cuda_final
-DOCKER_TAG=cuda${GPU_ARCH_VERSION}
-DOCKER_BUILD_ARG="--build-arg BASE_CUDA_VERSION=${GPU_ARCH_VERSION} --build-arg DEVTOOLSET_VERSION=9"
-DOCKER_TAG=cuda${GPU_ARCH_VERSION}
-
-DOCKER_IMAGE=openmmlab/lmdeploy-builder:${DOCKER_TAG}
-if [[ -n ${MANY_LINUX_VERSION} ]]; then
-    DOCKERFILE_SUFFIX=_${MANY_LINUX_VERSION}
-else
-    DOCKERFILE_SUFFIX=''
-fi
-
-(
-    set -x
-    DOCKER_BUILDKIT=1 docker build \
-        -t "${DOCKER_IMAGE}" \
-        ${DOCKER_BUILD_ARG} \
-        --target "${TARGET}" \
-        -f "${TOPDIR}/manywheel/Dockerfile${DOCKERFILE_SUFFIX}" \
-        "${TOPDIR}"
-)
-
-if [[ "${WITH_PUSH}" == true ]]; then
-    (
-        set -x
-        docker push "${DOCKER_IMAGE}"
-    )
-fi
diff --git a/builder/manywheel/build_wheel.sh b/builder/manywheel/build_wheel.sh
deleted file mode 100755
index 9b1cf3d7e03ee03bddf5590419b9222e057f4c8b..0000000000000000000000000000000000000000
--- a/builder/manywheel/build_wheel.sh
+++ /dev/null
@@ -1,25 +0,0 @@
-#!/usr/bin/env bash
-set -eux
-
-PYTHON_VERSION="$1"
-PLAT_NAME="$2"
-DOCKER_TAG="$3"
-OUTPUT_DIR="$4"
-
-DOCKER_IMAGE="openmmlab/lmdeploy-builder:${DOCKER_TAG}"
-export USERID=$(id -u)
-export GROUPID=$(id -g)
-
-cd "$(dirname "$0")"  # move inside the script directory
-mkdir -p "${OUTPUT_DIR}"
-docker pull ${DOCKER_IMAGE}
-docker run --rm -it \
-    --env PYTHON_VERSION="${PYTHON_VERSION}" \
-    --env PLAT_NAME="${PLAT_NAME}" \
-    --env USERID="${USERID}" \
-    --env GROUPID="${GROUPID}" \
-    --volume "$(pwd)/../../:/lmdeploy" \
-    --volume "$(pwd)/${OUTPUT_DIR}:/lmdeploy_build" \
-    --volume "$(pwd)/entrypoint_build.sh:/entrypoint_build.sh" \
-    --entrypoint /entrypoint_build.sh \
-    ${DOCKER_IMAGE}
diff --git a/builder/manywheel/entrypoint_build.sh b/builder/manywheel/entrypoint_build.sh
deleted file mode 100755
index 8d1eb16de9ca4ba56b1c5078fde40332830600b8..0000000000000000000000000000000000000000
--- a/builder/manywheel/entrypoint_build.sh
+++ /dev/null
@@ -1,25 +0,0 @@
-#!/usr/bin/env bash
-set -eux
-
-export PYTHON_VERSION=$PYTHON_VERSION
-export PLAT_NAME=$PLAT_NAME
-export USERID=${USERID}
-export GROUPID=${GROUPID}
-export CUDAVER=$(nvcc --version | sed -n 's/^.*release \([0-9]\+\).*$/\1/p')
-
-source /opt/conda/bin/activate
-conda activate $PYTHON_VERSION
-
-cd lmdeploy
-mkdir -p build && cd build && rm -rf *
-bash ../generate.sh
-make -j$(nproc) && make install
-if [ $? != 0 ]; then
-    echo "build failed"
-    exit 1
-fi
-cd ..
-rm -rf build
-python setup.py bdist_wheel --cuda=${CUDAVER} --plat-name $PLAT_NAME -d /tmpbuild/
-chown ${USERID}:${GROUPID} /tmpbuild/*
-mv /tmpbuild/* /lmdeploy_build/
diff --git a/builder/manywheel/scripts/install_conda.sh b/builder/manywheel/scripts/install_conda.sh
deleted file mode 100755
index fe91045e238dcc465a569ef070dcbe681f8ea415..0000000000000000000000000000000000000000
--- a/builder/manywheel/scripts/install_conda.sh
+++ /dev/null
@@ -1,8 +0,0 @@
-#!/bin/bash
-
-set -ex
-
-wget -q https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh
-chmod +x  Miniconda3-latest-Linux-x86_64.sh
-bash ./Miniconda3-latest-Linux-x86_64.sh -b -p /opt/conda
-rm Miniconda3-latest-Linux-x86_64.sh
diff --git a/builder/manywheel/scripts/install_cuda.sh b/builder/manywheel/scripts/install_cuda.sh
deleted file mode 100755
index bba540e899cfc88445e58cd48ab67f8ccc8a0289..0000000000000000000000000000000000000000
--- a/builder/manywheel/scripts/install_cuda.sh
+++ /dev/null
@@ -1,84 +0,0 @@
-#!/bin/bash
-
-set -ex
-
-function install_118 {
-    echo "Installing CUDA 11.8 and cuDNN 8.7 and NCCL 2.15"
-    rm -rf /usr/local/cuda-11.8 /usr/local/cuda
-    # install CUDA 11.8.0 in the same container
-    wget -q https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda_11.8.0_520.61.05_linux.run
-    chmod +x cuda_11.8.0_520.61.05_linux.run
-    ./cuda_11.8.0_520.61.05_linux.run --toolkit --silent
-    rm -f cuda_11.8.0_520.61.05_linux.run
-    rm -f /usr/local/cuda && ln -s /usr/local/cuda-11.8 /usr/local/cuda
-
-    # cuDNN license: https://developer.nvidia.com/cudnn/license_agreement
-    mkdir tmp_cudnn && cd tmp_cudnn
-    wget -q https://developer.download.nvidia.com/compute/redist/cudnn/v8.7.0/local_installers/11.8/cudnn-linux-x86_64-8.7.0.84_cuda11-archive.tar.xz -O cudnn-linux-x86_64-8.7.0.84_cuda11-archive.tar.xz
-    tar xf cudnn-linux-x86_64-8.7.0.84_cuda11-archive.tar.xz
-    cp -a cudnn-linux-x86_64-8.7.0.84_cuda11-archive/include/* /usr/local/cuda/include/
-    cp -a cudnn-linux-x86_64-8.7.0.84_cuda11-archive/lib/* /usr/local/cuda/lib64/
-    cd ..
-    rm -rf tmp_cudnn
-    ldconfig
-
-    # NCCL license: https://docs.nvidia.com/deeplearning/nccl/#licenses
-    mkdir tmp_nccl && cd tmp_nccl
-    wget -q https://developer.download.nvidia.com/compute/redist/nccl/v2.15.5/nccl_2.15.5-1+cuda11.8_x86_64.txz
-    tar xf nccl_2.15.5-1+cuda11.8_x86_64.txz
-    cp -a nccl_2.15.5-1+cuda11.8_x86_64/include/* /usr/local/cuda/include/
-    cp -a nccl_2.15.5-1+cuda11.8_x86_64/lib/* /usr/local/cuda/lib64/
-    cd ..
-    rm -rf tmp_nccl
-    ldconfig
-}
-
-function install_121 {
-    echo "Installing CUDA 12.1 and cuDNN 8.9 and NCCL 2.18.1"
-    rm -rf /usr/local/cuda-12.1 /usr/local/cuda
-    # install CUDA 12.1.0 in the same container
-    wget -q https://developer.download.nvidia.com/compute/cuda/12.1.0/local_installers/cuda_12.1.0_530.30.02_linux.run
-    chmod +x cuda_12.1.0_530.30.02_linux.run
-    ./cuda_12.1.0_530.30.02_linux.run --toolkit --silent
-    rm -f cuda_12.1.0_530.30.02_linux.run
-    rm -f /usr/local/cuda && ln -s /usr/local/cuda-12.1 /usr/local/cuda
-
-    # cuDNN license: https://developer.nvidia.com/cudnn/license_agreement
-    mkdir tmp_cudnn && cd tmp_cudnn
-    wget -q https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-x86_64/cudnn-linux-x86_64-8.9.2.26_cuda12-archive.tar.xz -O cudnn-linux-x86_64-8.9.2.26_cuda12-archive.tar.xz
-    tar xf cudnn-linux-x86_64-8.9.2.26_cuda12-archive.tar.xz
-    cp -a cudnn-linux-x86_64-8.9.2.26_cuda12-archive/include/* /usr/local/cuda/include/
-    cp -a cudnn-linux-x86_64-8.9.2.26_cuda12-archive/lib/* /usr/local/cuda/lib64/
-    cd ..
-    rm -rf tmp_cudnn
-    ldconfig
-
-    # NCCL license: https://docs.nvidia.com/deeplearning/nccl/#licenses
-    mkdir tmp_nccl && cd tmp_nccl
-    wget -q https://developer.download.nvidia.com/compute/redist/nccl/v2.18.1/nccl_2.18.1-1+cuda12.1_x86_64.txz
-    tar xf nccl_2.18.1-1+cuda12.1_x86_64.txz
-    cp -a nccl_2.18.1-1+cuda12.1_x86_64/include/* /usr/local/cuda/include/
-    cp -a nccl_2.18.1-1+cuda12.1_x86_64/lib/* /usr/local/cuda/lib64/
-    cd ..
-    rm -rf tmp_nccl
-    ldconfig
-}
-
-if test $# -eq 0
-then
-    echo "doesn't provide cuda version"; exit 1;
-fi
-
-# idiomatic parameter and option handling in sh
-while test $# -gt 0
-do
-    case "$1" in
-    11.8) install_118
-	        ;;
-    12.1) install_121
-            ;;
-	*) echo "bad argument $1"; exit 1
-	   ;;
-    esac
-    shift
-done
diff --git a/builder/manywheel/scripts/install_openmpi.sh b/builder/manywheel/scripts/install_openmpi.sh
deleted file mode 100755
index 381bd18d8212bc9f5a1f0f7e101999ab34d0955e..0000000000000000000000000000000000000000
--- a/builder/manywheel/scripts/install_openmpi.sh
+++ /dev/null
@@ -1,10 +0,0 @@
-#!/bin/bash
-
-set -ex
-
-wget -q https://download.open-mpi.org/release/open-mpi/v4.1/openmpi-4.1.5.tar.gz
-tar xf openmpi-4.1.5.tar.gz
-cd openmpi-4.1.5
-./configure --prefix=/usr/local/mpi
-make -j$(nproc)
-make install
diff --git a/builder/windows/README.md b/builder/windows/README.md
deleted file mode 100644
index 69335067827f7583f4a0e71913eeb4b87390c62a..0000000000000000000000000000000000000000
--- a/builder/windows/README.md
+++ /dev/null
@@ -1,20 +0,0 @@
-# Build lmdeploy on windows
-
-## Requirements
-
-- [CMake 3.17+](https://github.com/Kitware/CMake/releases)
-- [Visual Studio 2019+](https://visualstudio.microsoft.com/downloads/)
-- [CUDA Toolkit 11.8+](https://developer.nvidia.com/cuda-toolkit-archive)
-
-## Build lmdeploy wheel
-
-```powershell
-mkdir build
-cd build
-..\builder\windows\generate.ps1
-cmake --build . --config Release -- /m
-cmake --install . --config Release
-cd ..
-rm build -Force -Recurse
-python setup.py bdist_wheel -d build\wheel
-```
diff --git a/builder/windows/generate.ps1 b/builder/windows/generate.ps1
deleted file mode 100644
index 7c793dc3822c83f54076be8be4735d47cbc35c3e..0000000000000000000000000000000000000000
--- a/builder/windows/generate.ps1
+++ /dev/null
@@ -1,8 +0,0 @@
-cmake .. -A x64 -T v142,cuda="$env:CUDA_PATH" `
-    -DCMAKE_BUILD_TYPE=Release `
-    -DCMAKE_INSTALL_PREFIX=install `
-    -DBUILD_PY_FFI=ON `
-    -DBUILD_MULTI_GPU=OFF `
-    -DCMAKE_CUDA_FLAGS="-lineinfo" `
-    -DUSE_NVTX=ON `
-    -DBUILD_TEST="$env:BUILD_TEST"
diff --git a/cmake/Modules/FindCUDNN.cmake b/cmake/Modules/FindCUDNN.cmake
deleted file mode 100644
index 7e7fc0c9391e661e14c5c4d9210abeb04be94dda..0000000000000000000000000000000000000000
--- a/cmake/Modules/FindCUDNN.cmake
+++ /dev/null
@@ -1,51 +0,0 @@
-# taken from https://github.com/pytorch/pytorch/blob/master/cmake/Modules_CUDA_fix/FindCUDNN.cmake
-# Find the CUDNN libraries
-#
-# The following variables are optionally searched for defaults
-#  CUDNN_ROOT: Base directory where CUDNN is found
-#  CUDNN_INCLUDE_DIR: Directory where CUDNN header is searched for
-#  CUDNN_LIBRARY: Directory where CUDNN library is searched for
-#  CUDNN_STATIC: Are we looking for a static library? (default: no)
-#
-# The following are set after configuration is done:
-#  CUDNN_FOUND
-#  CUDNN_INCLUDE_PATH
-#  CUDNN_LIBRARY_PATH
-#
-
-include(FindPackageHandleStandardArgs)
-
-set(CUDNN_ROOT $ENV{CUDNN_ROOT_DIR} CACHE PATH "Folder containing NVIDIA cuDNN")
-if (DEFINED $ENV{CUDNN_ROOT_DIR})
-  message(WARNING "CUDNN_ROOT_DIR is deprecated. Please set CUDNN_ROOT instead.")
-endif()
-list(APPEND CUDNN_ROOT $ENV{CUDNN_ROOT_DIR} ${CUDA_TOOLKIT_ROOT_DIR})
-
-# Compatible layer for CMake <3.12. CUDNN_ROOT will be accounted in for searching paths and libraries for CMake >=3.12.
-list(APPEND CMAKE_PREFIX_PATH ${CUDNN_ROOT})
-
-set(CUDNN_INCLUDE_DIR $ENV{CUDNN_INCLUDE_DIR} CACHE PATH "Folder containing NVIDIA cuDNN header files")
-
-find_path(CUDNN_INCLUDE_PATH cudnn.h
-  HINTS ${CUDNN_INCLUDE_DIR}
-  PATH_SUFFIXES cuda/include cuda include)
-
-option(CUDNN_STATIC "Look for static CUDNN" OFF)
-if (CUDNN_STATIC)
-  set(CUDNN_LIBNAME "libcudnn_static.a")
-else()
-  set(CUDNN_LIBNAME "cudnn")
-endif()
-
-set(CUDNN_LIBRARY $ENV{CUDNN_LIBRARY} CACHE PATH "Path to the cudnn library file (e.g., libcudnn.so)")
-if (CUDNN_LIBRARY MATCHES ".*cudnn_static.a" AND NOT CUDNN_STATIC)
-  message(WARNING "CUDNN_LIBRARY points to a static library (${CUDNN_LIBRARY}) but CUDNN_STATIC is OFF.")
-endif()
-
-find_library(CUDNN_LIBRARY_PATH ${CUDNN_LIBNAME}
-  PATHS ${CUDNN_LIBRARY}
-  PATH_SUFFIXES lib lib64 cuda/lib cuda/lib64 lib/x64)
-
-find_package_handle_standard_args(CUDNN DEFAULT_MSG CUDNN_LIBRARY_PATH CUDNN_INCLUDE_PATH)
-
-mark_as_advanced(CUDNN_ROOT CUDNN_INCLUDE_DIR CUDNN_LIBRARY)
diff --git a/cmake/Modules/FindNCCL.cmake b/cmake/Modules/FindNCCL.cmake
deleted file mode 100644
index f53648b5678919b34cc997484badfa6af0e70f53..0000000000000000000000000000000000000000
--- a/cmake/Modules/FindNCCL.cmake
+++ /dev/null
@@ -1,165 +0,0 @@
-# Copyright (c) 2021-2022, NVIDIA CORPORATION. All rights reserved.
-#
-# From PyTorch:
-#
-# Copyright (c) 2016-     Facebook, Inc            (Adam Paszke)
-# Copyright (c) 2014-     Facebook, Inc            (Soumith Chintala)
-# Copyright (c) 2011-2014 Idiap Research Institute (Ronan Collobert)
-# Copyright (c) 2012-2014 Deepmind Technologies    (Koray Kavukcuoglu)
-# Copyright (c) 2011-2012 NEC Laboratories America (Koray Kavukcuoglu)
-# Copyright (c) 2011-2013 NYU                      (Clement Farabet)
-# Copyright (c) 2006-2010 NEC Laboratories America (Ronan Collobert, Leon Bottou, Iain Melvin, Jason Weston)
-# Copyright (c) 2006      Idiap Research Institute (Samy Bengio)
-# Copyright (c) 2001-2004 Idiap Research Institute (Ronan Collobert, Samy Bengio, Johnny Mariethoz)
-#
-# From Caffe2:
-#
-# Copyright (c) 2016-present, Facebook Inc. All rights reserved.
-#
-# All contributions by Facebook:
-# Copyright (c) 2016 Facebook Inc.
-#
-# All contributions by Google:
-# Copyright (c) 2015 Google Inc.
-# All rights reserved.
-#
-# All contributions by Yangqing Jia:
-# Copyright (c) 2015 Yangqing Jia
-# All rights reserved.
-#
-# All contributions by Kakao Brain:
-# Copyright 2019-2020 Kakao Brain
-#
-# All contributions from Caffe:
-# Copyright(c) 2013, 2014, 2015, the respective contributors
-# All rights reserved.
-#
-# All other contributions:
-# Copyright(c) 2015, 2016 the respective contributors
-# All rights reserved.
-#
-# Caffe2 uses a copyright model similar to Caffe: each contributor holds
-# copyright over their contributions to Caffe2. The project versioning records
-# all such contribution and copyright details. If a contributor wants to further
-# mark their specific copyright on a particular contribution, they should
-# indicate their copyright solely in the commit message of the change when it is
-# committed.
-#
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-#
-# 1. Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#
-# 2. Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#
-# 3. Neither the names of Facebook, Deepmind Technologies, NYU, NEC Laboratories America
-#    and IDIAP Research Institute nor the names of its contributors may be
-#    used to endorse or promote products derived from this software without
-#    specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-# POSSIBILITY OF SUCH DAMAGE.
-#
-# Find the nccl libraries
-#
-# The following variables are optionally searched for defaults
-#  NCCL_ROOT: Base directory where all NCCL components are foundHong Xu, 1 year ago: • Let CMake handle NCCL detection instead of ou…
-#  NCCL_INCLUDE_DIR: Directory where NCCL header is foundPieter Noordhuis, 3 years ago: • Bump gloo
-#  NCCL_LIB_DIR: Directory where NCCL library is found
-#
-# The following are set after configuration is done:
-#  NCCL_FOUND
-#  NCCL_INCLUDE_DIRS
-#  NCCL_LIBRARIES
-#
-# The path hints include CUDA_TOOLKIT_ROOT_DIR seeing as some folks
-# install NCCL in the same location as the CUDA toolkit.
-# See https://github.com/caffe2/caffe2/issues/1601
-
-set(NCCL_INCLUDE_DIR $ENV{NCCL_INCLUDE_DIR} CACHE PATH "Folder contains NVIDIA NCCL headers")
-set(NCCL_LIB_DIR $ENV{NCCL_LIB_DIR} CACHE PATH "Folder contains NVIDIA NCCL libraries")
-set(NCCL_VERSION $ENV{NCCL_VERSION} CACHE STRING "Version of NCCL to build with")
-
-if ($ENV{NCCL_ROOT_DIR})
-  message(WARNING "NCCL_ROOT_DIR is deprecated. Please set NCCL_ROOT instead.")
-endif()
-list(APPEND NCCL_ROOT $ENV{NCCL_ROOT_DIR} ${CUDA_TOOLKIT_ROOT_DIR})
-# Compatible layer for CMake <3.12. NCCL_ROOT will be accounted in for searching paths and libraries for CMake >=3.12.
-list(APPEND CMAKE_PREFIX_PATH ${NCCL_ROOT})
-
-find_path(NCCL_INCLUDE_DIRS
-  NAMES nccl.h
-  HINTS ${NCCL_INCLUDE_DIR})
-
-if (USE_STATIC_NCCL)
-  MESSAGE(STATUS "USE_STATIC_NCCL is set. Linking with static NCCL library.")
-  SET(NCCL_LIBNAME "nccl_static")
-  if (NCCL_VERSION)  # Prefer the versioned library if a specific NCCL version is specified
-    set(CMAKE_FIND_LIBRARY_SUFFIXES ".a.${NCCL_VERSION}" ${CMAKE_FIND_LIBRARY_SUFFIXES})
-  endif()
-else()
-  SET(NCCL_LIBNAME "nccl")
-  if (NCCL_VERSION)  # Prefer the versioned library if a specific NCCL version is specified
-    set(CMAKE_FIND_LIBRARY_SUFFIXES ".so.${NCCL_VERSION}" ${CMAKE_FIND_LIBRARY_SUFFIXES})
-  endif()
-endif()
-
-find_library(NCCL_LIBRARIES
-  NAMES ${NCCL_LIBNAME}
-  HINTS ${NCCL_LIB_DIR})
-
-include(FindPackageHandleStandardArgs)
-find_package_handle_standard_args(NCCL DEFAULT_MSG NCCL_INCLUDE_DIRS NCCL_LIBRARIES)
-
-if(NCCL_FOUND)  # obtaining NCCL version and some sanity checks
-  set (NCCL_HEADER_FILE "${NCCL_INCLUDE_DIRS}/nccl.h")
-  message (STATUS "Determining NCCL version from ${NCCL_HEADER_FILE}...")
-  set (OLD_CMAKE_REQUIRED_INCLUDES ${CMAKE_REQUIRED_INCLUDES})
-  list (APPEND CMAKE_REQUIRED_INCLUDES ${NCCL_INCLUDE_DIRS})
-  include(CheckCXXSymbolExists)
-  check_cxx_symbol_exists(NCCL_VERSION_CODE nccl.h NCCL_VERSION_DEFINED)
-
-  if (NCCL_VERSION_DEFINED)
-    set(file "${PROJECT_BINARY_DIR}/detect_nccl_version.cc")
-    file(WRITE ${file} "
-      #include <iostream>
-      #include <nccl.h>
-      int main()
-      {
-        std::cout << NCCL_MAJOR << '.' << NCCL_MINOR << '.' << NCCL_PATCH << std::endl;
-        int x;
-        ncclGetVersion(&x);
-        return x == NCCL_VERSION_CODE;
-      }
-")
-    try_run(NCCL_VERSION_MATCHED compile_result ${PROJECT_BINARY_DIR} ${file}
-          RUN_OUTPUT_VARIABLE NCCL_VERSION_FROM_HEADER
-          CMAKE_FLAGS  "-DINCLUDE_DIRECTORIES=${NCCL_INCLUDE_DIRS}"
-          LINK_LIBRARIES ${NCCL_LIBRARIES})
-    if (NOT NCCL_VERSION_MATCHED)
-      message(FATAL_ERROR "Found NCCL header version and library version do not match! \
-(include: ${NCCL_INCLUDE_DIRS}, library: ${NCCL_LIBRARIES}) Please set NCCL_INCLUDE_DIR and NCCL_LIB_DIR manually.")
-    endif()
-    message(STATUS "NCCL version: ${NCCL_VERSION_FROM_HEADER}")
-  else()
-    # message(STATUS "NCCL version < 2.3.5-5")
-  endif ()
-  set (CMAKE_REQUIRED_INCLUDES ${OLD_CMAKE_REQUIRED_INCLUDES})
-
-  message(STATUS "Found NCCL (include: ${NCCL_INCLUDE_DIRS}, library: ${NCCL_LIBRARIES})")
-  mark_as_advanced(NCCL_ROOT_DIR NCCL_INCLUDE_DIRS NCCL_LIBRARIES)
-endif()
diff --git a/cmake/TritonTurboMindBackendConfig.cmake.in b/cmake/TritonTurboMindBackendConfig.cmake.in
deleted file mode 100644
index 61a4a5489a80d9d571cf1cddbee4840f70228e13..0000000000000000000000000000000000000000
--- a/cmake/TritonTurboMindBackendConfig.cmake.in
+++ /dev/null
@@ -1,39 +0,0 @@
-# Copyright (c) 2021-2022, NVIDIA CORPORATION. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-include(CMakeFindDependencyMacro)
-
-get_filename_component(
-  TRITONPYTORCHBACKEND_CMAKE_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH
-)
-
-list(APPEND CMAKE_MODULE_PATH ${TRITONPYTORCHBACKEND_CMAKE_DIR})
-
-if(NOT TARGET TritonPyTorchBackend::triton-pytorch-backend)
-  include("${TRITONPYTORCHBACKEND_CMAKE_DIR}/TritonPyTorchBackendTargets.cmake")
-endif()
-
-set(TRITONPYTORCHBACKEND_LIBRARIES TritonPyTorchBackend::triton-pytorch-backend)
diff --git a/cmake/TurboMindConfig.cmake.in b/cmake/TurboMindConfig.cmake.in
deleted file mode 100644
index 124600bf8d84beb64ccda998e5e193657cb7a2bc..0000000000000000000000000000000000000000
--- a/cmake/TurboMindConfig.cmake.in
+++ /dev/null
@@ -1,39 +0,0 @@
-# Copyright (c) 2021-2023, NVIDIA CORPORATION. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-include(CMakeFindDependencyMacro)
-
-get_filename_component(
-  TURBOMIND_CMAKE_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH
-)
-
-list(APPEND CMAKE_MODULE_PATH ${TURBOMIND_CMAKE_DIR})
-
-if(NOT TARGET transformer-shared)
-  include("${TURBOMIND_CMAKE_DIR}/TurboMindTargets.cmake")
-endif()
-
-set(TURBOMIND_LIBRARIES transformer-shared)
diff --git a/docker/Dockerfile b/docker/Dockerfile
deleted file mode 100644
index 6d41afa7a63d260b7c456b3678f522b2cc0649fa..0000000000000000000000000000000000000000
--- a/docker/Dockerfile
+++ /dev/null
@@ -1,33 +0,0 @@
-FROM nvcr.io/nvidia/tritonserver:22.12-py3
-
-RUN rm /etc/apt/sources.list.d/cuda*.list && apt-get update && apt-get install -y --no-install-recommends \
-    rapidjson-dev libgoogle-glog-dev gdb  \
-    && rm -rf /var/lib/apt/lists/*
-
-RUN python3 -m pip install --no-cache-dir torch==1.13.1+cu117 torchvision==0.14.1+cu117 --extra-index-url https://download.pytorch.org/whl/cu117
-RUN python3 -m pip install --no-cache-dir cmake
-
-ENV NCCL_LAUNCH_MODE=GROUP
-
-ARG VERSION=main
-
-RUN git clone --depth=1 --branch=${VERSION} https://github.com/InternLM/lmdeploy.git &&\
-    cd lmdeploy &&\
-    python3 -m pip install --no-cache-dir -r requirements.txt &&\
-    mkdir -p build && cd build &&\
-    cmake .. \
-        -DCMAKE_BUILD_TYPE=RelWithDebInfo \
-        -DCMAKE_EXPORT_COMPILE_COMMANDS=1 \
-        -DCMAKE_INSTALL_PREFIX=/opt/tritonserver \
-        -DBUILD_PY_FFI=ON \
-        -DBUILD_MULTI_GPU=ON \
-        -DBUILD_CUTLASS_MOE=OFF \
-        -DBUILD_CUTLASS_MIXED_GEMM=OFF \
-        -DCMAKE_CUDA_FLAGS="-lineinfo" \
-        -DUSE_NVTX=ON &&\
-    make -j$(nproc) && make install &&\
-    cd .. &&\
-    python3 -m pip install . &&\
-    rm -rf build
-
-ENV LD_LIBRARY_PATH=/opt/tritonserver/lib:$LD_LIBRARY_PATH
diff --git a/docs/dcu/qwen.png b/docs/dcu/qwen.png
deleted file mode 100644
index 30a8ec2725dc53842ba8787a8568b62f3794152f..0000000000000000000000000000000000000000
Binary files a/docs/dcu/qwen.png and /dev/null differ
diff --git "a/docs/dcu/qwen\346\216\250\347\220\206.gif" "b/docs/dcu/qwen\346\216\250\347\220\206.gif"
deleted file mode 100644
index 5cbc5cc8a34fcff264196b3502734cb17f2150bf..0000000000000000000000000000000000000000
Binary files "a/docs/dcu/qwen\346\216\250\347\220\206.gif" and /dev/null differ
diff --git a/docs/dcu/transformer.jpg b/docs/dcu/transformer.jpg
deleted file mode 100644
index 9eb0180ce1c081e0501d53e04d84aa32d5bef0cd..0000000000000000000000000000000000000000
Binary files a/docs/dcu/transformer.jpg and /dev/null differ
diff --git a/docs/en/Makefile b/docs/en/Makefile
deleted file mode 100644
index 51285967a7d9722c5bdee4f6a81c154a56aa0846..0000000000000000000000000000000000000000
--- a/docs/en/Makefile
+++ /dev/null
@@ -1,19 +0,0 @@
-# Minimal makefile for Sphinx documentation
-#
-
-# You can set these variables from the command line.
-SPHINXOPTS    =
-SPHINXBUILD   = sphinx-build
-SOURCEDIR     = .
-BUILDDIR      = _build
-
-# Put it first so that "make" without argument is like "make help".
-help:
-	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
-
-.PHONY: help Makefile
-
-# Catch-all target: route all unknown targets to Sphinx using the new
-# "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
-%: Makefile
-	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
diff --git a/docs/en/_static/css/readthedocs.css b/docs/en/_static/css/readthedocs.css
deleted file mode 100644
index d767dd8d3b5d65cd055da5db1d1c24bcb8f2b44d..0000000000000000000000000000000000000000
--- a/docs/en/_static/css/readthedocs.css
+++ /dev/null
@@ -1,6 +0,0 @@
-.header-logo {
-    background-image: url("../image/lmdeploy-logo.png");
-    background-size: 150px 60px;
-    height: 60px;
-    width: 150px;
-}
diff --git a/docs/en/_static/image/lmdeploy-logo.png b/docs/en/_static/image/lmdeploy-logo.png
deleted file mode 120000
index f1bdecd8c0f1c64961355bfb5d57ca72ae07db90..0000000000000000000000000000000000000000
--- a/docs/en/_static/image/lmdeploy-logo.png
+++ /dev/null
@@ -1 +0,0 @@
-resources/lmdeploy-logo.png
\ No newline at end of file
diff --git a/docs/en/api.rst b/docs/en/api.rst
deleted file mode 100644
index ed1ddce2bfc0faf316ffb9b312a18bb8fb428663..0000000000000000000000000000000000000000
--- a/docs/en/api.rst
+++ /dev/null
@@ -1,14 +0,0 @@
-lmdeploy.lite
--------------
-.. automodule:: lmdeploy.lite
-    :members:
-
-lmdeploy.pytorch
-----------------
-.. automodule:: lmdeploy.pytorch
-    :members:
-
-lmdeploy.serve
---------------
-.. automodule:: lmdeploy.serve
-    :members:
diff --git a/docs/en/build.md b/docs/en/build.md
deleted file mode 100644
index cb278073c9202cd59320337ddd2eb4e62d0cbd9d..0000000000000000000000000000000000000000
--- a/docs/en/build.md
+++ /dev/null
@@ -1,79 +0,0 @@
-# Build from source
-
-LMDeploy provides prebuilt package that can be easily installed by `pip install lmdeploy`.
-
-If you have requests to build lmdeploy from source, please clone lmdeploy repository from GitHub, and follow instructions in next sections
-
-```shell
-git clone --depth=1 https://github.com/InternLM/lmdeploy
-```
-
-## Build in Docker (recommended)
-
-We highly advise using the provided docker image for lmdeploy build to circumvent complex environment setup.
-
-The docker image is `openmmlab/lmdeploy-builder:cuda11.8`. Make sure that docker is installed before using this image.
-
-In the root directory of the lmdeploy source code, please run the following command:
-
-```shell
-cd lmdeploy # the home folder of lmdeploy source code
-bash builder/manywheel/build_all_wheel.sh
-```
-
-All the wheel files for lmdeploy under py3.8 - py3.11 will be found in the `builder/manywheel/cuda11.8_dist` directory, such as,
-
-```text
-builder/manywheel/cuda11.8_dist/
-├── lmdeploy-0.0.12-cp310-cp310-manylinux2014_x86_64.whl
-├── lmdeploy-0.0.12-cp311-cp311-manylinux2014_x86_64.whl
-├── lmdeploy-0.0.12-cp38-cp38-manylinux2014_x86_64.whl
-└── lmdeploy-0.0.12-cp39-cp39-manylinux2014_x86_64.whl
-```
-
-If the wheel file for a specific Python version is required, such as py3.8, please execute:
-
-```shell
-bash builder/manywheel/build_wheel.sh py38 manylinux2014_x86_64 cuda11.8 cuda11.8_dist
-```
-
-And the wheel file will be found in the `builder/manywheel/cuda11.8_dist` directory.
-
-You can use `pip install` to install the wheel file that matches the Python version on your host machine.
-
-## Build in localhost (optional)
-
-Firstly, please make sure gcc version is no less than 9, which can be conformed by `gcc --version`.
-
-Then, follow the steps below to set up the compilation environment:
-
-- install the dependent packages:
-  ```shell
-  pip install -r requirements.txt
-  apt-get install rapidjson-dev
-  ```
-- install [nccl](https://docs.nvidia.com/deeplearning/nccl/install-guide/index.html), and set environment variables:
-  ```shell
-  export NCCL_ROOT_DIR=/path/to/nccl/build
-  export NCCL_LIBRARIES=/path/to/nccl/build/lib
-  ```
-- install openmpi from source:
-  ```shell
-  wget https://download.open-mpi.org/release/open-mpi/v4.1/openmpi-4.1.5.tar.gz
-  tar xf openmpi-4.1.5.tar.gz
-  cd openmpi-4.1.5
-  ./configure
-  make -j$(nproc) && make install
-  ```
-- build and install lmdeploy libraries:
-  ```shell
-  cd lmdeploy # the home folder of lmdeploy
-  mkdir build && cd build
-  sh ../generate.sh
-  make -j$(nproc) && make install
-  ```
-- install lmdeploy python package:
-  ```shell
-  cd ..
-  pip install -e .
-  ```
diff --git a/docs/en/conf.py b/docs/en/conf.py
deleted file mode 100644
index d89b5c440ee1e47635c042e179d9bb21063f827d..0000000000000000000000000000000000000000
--- a/docs/en/conf.py
+++ /dev/null
@@ -1,223 +0,0 @@
-#
-# Configuration file for the Sphinx documentation builder.
-#
-# This file does only contain a selection of the most common options. For a
-# full list see the documentation:
-# http://www.sphinx-doc.org/en/master/config
-
-# -- Path setup --------------------------------------------------------------
-
-# If extensions (or modules to document with autodoc) are in another directory,
-# add these directories to sys.path here. If the directory is relative to the
-# documentation root, use os.path.abspath to make it absolute, like shown here.
-#
-import os
-import sys
-
-import pytorch_sphinx_theme
-from m2r import MdInclude
-from recommonmark.transform import AutoStructify
-from sphinx.builders.html import StandaloneHTMLBuilder
-
-sys.path.insert(0, os.path.abspath('../..'))
-
-version_file = '../../lmdeploy/version.py'
-with open(version_file, 'r') as f:
-    exec(compile(f.read(), version_file, 'exec'))
-__version__ = locals()['__version__']
-
-# -- Project information -----------------------------------------------------
-
-project = 'lmdeploy'
-copyright = '2021-2024, OpenMMLab'
-author = 'LMDeploy Authors'
-
-# The short X.Y version
-version = __version__
-# The full version, including alpha/beta/rc tags
-release = __version__
-
-# -- General configuration ---------------------------------------------------
-
-# If your documentation needs a minimal Sphinx version, state it here.
-#
-# needs_sphinx = '1.0'
-
-# Add any Sphinx extension module names here, as strings. They can be
-# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
-# ones.
-
-extensions = [
-    'sphinx.ext.autodoc',
-    'sphinx.ext.napoleon',
-    'sphinx.ext.viewcode',
-    'sphinx.ext.autosectionlabel',
-    'sphinx_markdown_tables',
-    'myst_parser',
-    'sphinx_copybutton',
-    'sphinxcontrib.mermaid'
-]  # yapf: disable
-
-autodoc_mock_imports = []
-
-autosectionlabel_prefix_document = True
-
-# Add any paths that contain templates here, relative to this directory.
-templates_path = ['_templates']
-
-# The suffix(es) of source filenames.
-# You can specify multiple suffix as a list of string:
-#
-source_suffix = {
-    '.rst': 'restructuredtext',
-    '.md': 'markdown',
-}
-
-# The master toctree document.
-master_doc = 'index'
-
-# The language for content autogenerated by Sphinx. Refer to documentation
-# for a list of supported languages.
-#
-# This is also used if you do content translation via gettext catalogs.
-# Usually you set "language" from the command line for these cases.
-language = 'en'
-
-# List of patterns, relative to source directory, that match files and
-# directories to ignore when looking for source files.
-# This pattern also affects html_static_path and html_extra_path.
-exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
-
-# The name of the Pygments (syntax highlighting) style to use.
-pygments_style = 'sphinx'
-
-# -- Options for HTML output -------------------------------------------------
-
-# The theme to use for HTML and HTML Help pages.  See the documentation for
-# a list of builtin themes.
-#
-# html_theme = 'sphinx_rtd_theme'
-html_theme = 'pytorch_sphinx_theme'
-html_theme_path = [pytorch_sphinx_theme.get_html_theme_path()]
-
-# Theme options are theme-specific and customize the look and feel of a theme
-# further.  For a list of options available for each theme, see the
-# documentation.
-#
-html_theme_options = {
-    'logo_url': 'https://lmdeploy.readthedocs.io/en/latest/',
-    'menu': [{
-        'name': 'GitHub',
-        'url': 'https://github.com/InternLM/lmdeploy'
-    }],
-    'menu_lang': 'en'
-}
-
-# Add any paths that contain custom static files (such as style sheets) here,
-# relative to this directory. They are copied after the builtin static files,
-# so a file named "default.css" will overwrite the builtin "default.css".
-html_static_path = ['_static']
-html_css_files = ['css/readthedocs.css']
-
-# Enable ::: for my_st
-myst_enable_extensions = ['colon_fence']
-myst_heading_anchors = 3
-
-# Custom sidebar templates, must be a dictionary that maps document names
-# to template names.
-#
-# The default sidebars (for documents that don't match any pattern) are
-# defined by theme itself.  Builtin themes are using these templates by
-# default: ``['localtoc.html', 'relations.html', 'sourcelink.html',
-# 'searchbox.html']``.
-#
-# html_sidebars = {}
-
-# -- Options for HTMLHelp output ---------------------------------------------
-
-# Output file base name for HTML help builder.
-htmlhelp_basename = 'lmdeploydoc'
-
-# -- Options for LaTeX output ------------------------------------------------
-
-latex_elements = {
-    # The paper size ('letterpaper' or 'a4paper').
-    #
-    # 'papersize': 'letterpaper',
-
-    # The font size ('10pt', '11pt' or '12pt').
-    #
-    # 'pointsize': '10pt',
-
-    # Additional stuff for the LaTeX preamble.
-    #
-    # 'preamble': '',
-
-    # Latex figure (float) alignment
-    #
-    # 'figure_align': 'htbp',
-}
-
-# Grouping the document tree into LaTeX files. List of tuples
-# (source start file, target name, title,
-#  author, documentclass [howto, manual, or own class]).
-latex_documents = [
-    (master_doc, 'lmdeploy.tex', 'lmdeploy Documentation',
-     'LMDeploy Contributors', 'manual'),
-]
-
-# -- Options for manual page output ------------------------------------------
-
-# One entry per manual page. List of tuples
-# (source start file, name, description, authors, manual section).
-man_pages = [(master_doc, 'lmdeploy', 'lmdeploy Documentation', [author], 1)]
-
-# -- Options for Texinfo output ----------------------------------------------
-
-# Grouping the document tree into Texinfo files. List of tuples
-# (source start file, target name, title, author,
-#  dir menu entry, description, category)
-texinfo_documents = [
-    (master_doc, 'lmdeploy', 'lmdeploy Documentation', author, 'lmdeploy',
-     'One line description of project.', 'Miscellaneous'),
-]
-
-# -- Options for Epub output -------------------------------------------------
-
-# Bibliographic Dublin Core info.
-epub_title = project
-
-# The unique identifier of the text. This can be a ISBN number
-# or the project homepage.
-#
-# epub_identifier = ''
-
-# A unique identification for the text.
-#
-# epub_uid = ''
-
-# A list of files that should not be packed into the epub file.
-epub_exclude_files = ['search.html']
-
-# set priority when building html
-StandaloneHTMLBuilder.supported_image_types = [
-    'image/svg+xml', 'image/gif', 'image/png', 'image/jpeg'
-]
-
-# -- Extension configuration -------------------------------------------------
-# Ignore >>> when copying code
-copybutton_prompt_text = r'>>> |\.\.\. '
-copybutton_prompt_is_regexp = True
-
-
-def setup(app):
-    app.add_config_value('no_underscore_emphasis', False, 'env')
-    app.add_config_value('m2r_parse_relative_links', False, 'env')
-    app.add_config_value('m2r_anonymous_references', False, 'env')
-    app.add_config_value('m2r_disable_inline_math', False, 'env')
-    app.add_directive('mdinclude', MdInclude)
-    app.add_config_value('recommonmark_config', {
-        'auto_toc_tree_section': 'Contents',
-        'enable_eval_rst': True,
-    }, True)
-    app.add_transform(AutoStructify)
diff --git a/docs/en/faq.md b/docs/en/faq.md
deleted file mode 100644
index 636da5c947c791c74b03ccafa6fd9a7d74edb726..0000000000000000000000000000000000000000
--- a/docs/en/faq.md
+++ /dev/null
@@ -1,50 +0,0 @@
-# FAQ
-
-## ModuleNotFoundError
-
-### No module named 'mmengine.config.lazy'
-
-There is probably a cached mmengine in your local host. Try to install its latest version.
-
-```shell
-pip install --upgrade mmengine
-```
-
-### No module named '\_turbomind'
-
-It may have been caused by the following reasons.
-
-1. You haven't installed lmdeploy's precompiled package. `_turbomind` is the pybind package of c++ turbomind, which involves compilation. It is recommended that you install the precompiled one.
-
-```shell
-pip install lmdeploy
-```
-
-2. If you have installed it and still encounter this issue, it is probably because you are executing turbomind-related command in the root directory of lmdeploy source code. Switching to another directory will fix it
-
-## Libs
-
-### libnccl.so.2 not found
-
-Make sure you have install lmdeploy (>=v0.0.5) through `pip install lmdeploy`.
-
-If the issue still exists after lmdeploy installation, add the path of `libnccl.so.2` to environment variable LD_LIBRARY_PATH.
-
-```shell
-# Get the location of nvidia-nccl-cu11 package
-pip show nvidia-nccl-cu11|grep Location
-# insert the path of "libnccl.so.2" to LD_LIBRARY_PATH
-export LD_LIBRARY_PATH={Location}/nvidia/nccl/lib:$LD_LIBRARY_PATH
-```
-
-### symbol cudaFreeAsync version libcudart.so.11.0 not defined in file libcudart.so.11.0 with link time reference
-
-It's probably due to a low-version cuda toolkit. LMDeploy runtime requires a minimum CUDA version of 11.2
-
-## Turbomind Inference
-
-## Pytorch Inference
-
-## Serve
-
-## Quantization
diff --git a/docs/en/index.rst b/docs/en/index.rst
deleted file mode 100644
index 91abf7e1784d1533258eae84be1a48dbe3e18549..0000000000000000000000000000000000000000
--- a/docs/en/index.rst
+++ /dev/null
@@ -1,46 +0,0 @@
-Welcome to LMDeploy's documentation!
-====================================
-
-You can switch between Chinese and English documents in the lower-left corner of the layout.
-
-.. toctree::
-   :maxdepth: 2
-   :caption: Build
-
-   build.md
-
-.. toctree::
-   :maxdepth: 2
-   :caption: Chatting with PyTorch
-
-   pytorch.md
-
-.. toctree::
-   :maxdepth: 2
-   :caption: Quantization
-
-   quantization.md
-
-.. toctree::
-   :maxdepth: 2
-   :caption: Serving
-
-   serving.md
-
-.. toctree::
-   :maxdepth: 2
-   :caption: TurboMind
-
-   turbomind.md
-
-.. toctree::
-   :caption: Switch Language
-
-   switch_language.md
-
-
-Indices and tables
-==================
-
-* :ref:`genindex`
-* :ref:`search`
diff --git a/docs/en/kv_int8.md b/docs/en/kv_int8.md
deleted file mode 100644
index 5dcf43ba686cf5fe391dc082a63baae5ab9873c9..0000000000000000000000000000000000000000
--- a/docs/en/kv_int8.md
+++ /dev/null
@@ -1,110 +0,0 @@
-# KV Cache Quantization and Test Results
-
-For the LLaMa-7B fp16 model with a maximum length of 2048, the server requires approximately 1030MB of GPU memory to store kv_cache for each concurrent session created. This means that even an A100 80G can only serve a limited number of users.
-
-To reduce runtime GPU memory usage, we have implemented PTQ quantization for kv cache, using the following formula:
-
-```bash
-zp = (min+max) / 2
-scale = (max-min) / 255
-quant: q = round( (f-zp) / scale)
-dequant: f = q * scale + zp
-```
-
-## How to Enable KV Cache INT8
-
-### **Step One**
-
-Convert the Hugging Face model format to the TurboMind inference format to create a workspace directory.
-
-```bash
-lmdeploy convert internlm-chat-7b /path/to/internlm-chat-7b
-```
-
-If you already have a workspace directory, skip this step.
-
-### **Step Two**
-
-Get the quantization parameters by these two steps:
-
-```bash
-# get minmax
-lmdeploy lite calibrate \
-  --model $HF_MODEL \
-  --calib_dataset 'c4' \             # Support c4, ptb, wikitext2, pileval
-  --calib_samples 128 \              # Number of samples in the calibration set, if the memory is not enough, it can be adjusted appropriately
-  --calib_seqlen 2048 \              # Length of a single text, if the memory is not enough, you can adjust it appropriately
-  --work_dir $WORK_DIR \             # Directory for saving quantized statistical parameters and quantized weights in Pytorch format
-
-# get quant parameters
-lmdeploy lite kv_qparams \
-  --work_dir $WORK_DIR  \                             # Directory of the last output
-  --turbomind_dir workspace/triton_models/weights/ \ # Directory to save the quantization parameters
-  --kv_sym False \                                    # Symmetric or asymmetric quantization, default is False
-  --num_tp 1  \                                       # Number of GPUs used for Tensor parallelization, keep it consistent with deploy.py
-```
-
-`kv_qparams` will generate fp32 scaling factors in the `weights` directory. The file format is a binary produced by `numpy.tofile`.
-
-You can also first set `turbomind_dir` to a private directory, then copy the scaling factors into `workspace/triton_models/weights/`.
-
-### **Step Three**
-
-Modify `workspace/triton_models/weights/config.ini`:
-
-- Set use_context_fmha to 0, which means turning off flashattention
-- Set quant_policy to 4. This means enabling kv_cache int8
-
-This is because there are two versions of flashattention, v1 and v2, and kv_cache int8 has also previously realized the symmetric version.
-
-Considering there are four combinations of kernels needed to be implemented, premature optimization when the algorithm is uncertain can be disastrous for software.
-
-### **Step Four**
-
-Test the chat performance.
-
-```bash
-lmdeploy chat turbomind ./workspace
-```
-
-## GPU Memory Test
-
-The test object is the [internlm-chat-7b](https://huggingface.co/internlm/internlm-chat-7b-v1_1) model.
-Testing method:
-
-1. Use `deploy.py` to convert the model, modify the maximum concurrency in the `workspace` configuration; adjust the number of requests in `llama_config.ini`.
-2. Compile and run `bin/llama_triton_example` to obtain the GPU memory situation of the fp16 version under different batch_size.
-3. Enable quantization, re-run `bin/llama_triton_example` to obtain the GPU memory situation of the int8 version under different batch_size.
-
-Below shows the comparison of GPU memory between the two versions:
-
-| batch_size | fp16 memory(MiB) | int8 memory(MiB) | diff(MiB) |
-| :--------: | :--------------: | :--------------: | :-------: |
-|     8      |      22337       |      18241       |   -4096   |
-|     16     |      30593       |      22369       |   -8224   |
-|     32     |      47073       |      30625       |  -16448   |
-|     48     |      63553       |      38881       |  -24672   |
-
-Compared to directly quantizing Weight (such as [GPTQ-for-LLaMa](https://github.com/qwopqwop200/GPTQ-for-LLaMa/)), we have done a comparative estimation of memory growth in the 7B model for both methods, with some data from [llama.cpp](https://github.com/ggerganov/llama.cpp).
-
-![](../../resources/batch_memory.png)
-
-As can be seen, the fp16 version requires 1030MB of GPU memory for each concurrency, so quantizing kv_cache can significantly reduce the rate of increase of runtime memory.
-
-## Accuracy Test
-
-The test object is the [internlm-chat-7b](https://huggingface.co/internlm/internlm-chat-7b-v1_1) command model.
-
-Below is the result of PTQ quantization of `kCacheKVInt8` method with only 128 randomly selected data from the c4 dataset. The accuracy was tested using [opencompass](https://github.com/InternLM/opencompass) before and after quantization.
-
-|     task      |     dataset     |    metric     | int8  | fp16  | diff  |
-| :-----------: | :-------------: | :-----------: | :---: | :---: | :---: |
-|   Language    |   winogrande    |   accuracy    | 60.77 | 61.48 | -0.71 |
-|   Knowledge   |       nq        |     score     | 2.69  | 2.60  | +0.09 |
-|   Reasoning   |      gsm8k      |   accuracy    | 33.28 | 34.72 | -1.44 |
-|   Reasoning   |       bbh       | naive_average | 20.12 | 20.51 | -0.39 |
-| Understanding | openbookqa_fact |   accuracy    | 82.40 | 82.20 | +0.20 |
-| Understanding |   eprstmt-dev   |   accuracy    | 90.62 | 88.75 | +1.87 |
-|    Safety     |   crows_pairs   |   accuracy    | 32.56 | 31.43 | +1.13 |
-
-Note that both `kCacheKVInt8` and `WeightInt4` methods can be enabled at the same time.
diff --git a/docs/en/make.bat b/docs/en/make.bat
deleted file mode 100644
index 7893348a1b7dbb588983a48e6991282eae7e1b55..0000000000000000000000000000000000000000
--- a/docs/en/make.bat
+++ /dev/null
@@ -1,35 +0,0 @@
-@ECHO OFF
-
-pushd %~dp0
-
-REM Command file for Sphinx documentation
-
-if "%SPHINXBUILD%" == "" (
-	set SPHINXBUILD=sphinx-build
-)
-set SOURCEDIR=.
-set BUILDDIR=_build
-
-if "%1" == "" goto help
-
-%SPHINXBUILD% >NUL 2>NUL
-if errorlevel 9009 (
-	echo.
-	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
-	echo.installed, then set the SPHINXBUILD environment variable to point
-	echo.to the full path of the 'sphinx-build' executable. Alternatively you
-	echo.may add the Sphinx directory to PATH.
-	echo.
-	echo.If you don't have Sphinx installed, grab it from
-	echo.http://sphinx-doc.org/
-	exit /b 1
-)
-
-%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
-goto end
-
-:help
-%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
-
-:end
-popd
diff --git a/docs/en/pytorch.md b/docs/en/pytorch.md
deleted file mode 100644
index e4cd5a9cbe068665d5631c363fe3d7ab719f64df..0000000000000000000000000000000000000000
--- a/docs/en/pytorch.md
+++ /dev/null
@@ -1,74 +0,0 @@
-# Pytorch
-
-## Chat in command line
-
-LMDeploy support chatting with PyTorch models with submodule `lmdeploy.pytorch.chat`.
-
-This submodule allow user to chat with language model through command line, and optionally accelerate model using backends like deepspeed.
-
-**Example 1**: Chat with default setting
-
-```shell
-lmdeploy chat torch $PATH_TO_HF_MODEL
-```
-
-**Example 2**: Disable sampling and chat history
-
-```shell
-lmdeploy chat torch \
-    $PATH_TO_LLAMA_MODEL_IN_HF_FORMAT \
-    --temperature 0 --max-history 0
-```
-
-**Example 3**: Accelerate with deepspeed inference
-
-```shell
-lmdeploy chat torch \
-    $PATH_TO_LLAMA_MODEL_IN_HF_FORMAT \
-    --accel deepspeed
-```
-
-Note: to use deepspeed, you need to install deepspeed, and if hope to accelerate InternLM, you need a customized version <https://github.com/wangruohui/DeepSpeed/tree/support_internlm_0.10.0>
-
-**Example 4**: Tensor parallel the model on 2 GPUs
-
-```shell
-deepspeed --module --num_gpus 2 lmdeploy.pytorch.chat \
-    $PATH_TO_LLAMA_MODEL_IN_HF_FORMAT \
-    --accel deepspeed \
-```
-
-This module also allow the following control commands to change generation behaviors during chat.
-
-- `exit`: terminate and exit chat
-- `config set key=value`: change generation config `key` to `value`, e.g. config temperature=0 disable sampling for following chats
-- `clear`: clear chat history
-
-### Simple diagram of components
-
-```mermaid
-graph LR;
-    subgraph model specific adapter
-        p((user_input))-->tokenize-->id((input_ids))-->decorate
-        tmpl_ids((template_ids))-->decorate;
-    end
-    subgraph generate
-        model[CausalLM_model.generate]-->gen_result(("gen_result"))
-        gen_result-->hid
-        gen_result-->attn((attention))
-    end
-    subgraph streamer
-        model-->s[streamer]--value-->decode_single--token-->output
-    end
-    subgraph session_manager
-        prepend_history-->fullid((complete_ids));
-        trim-->prepend_history
-    end
-    decorate-->prepend_history
-    hid((history_ids))-->trim;
-    attn-->trim;
-    fullid-->model
-    tokenizer((tokenizer))-->decode_single
-    tokenizer-->tokenize
-    p-->genconfig(GenConfig)-->model
-```
diff --git a/docs/en/restful_api.md b/docs/en/restful_api.md
deleted file mode 100644
index a66859c0c7bca78217c175f6fa1ecf375d99eb1d..0000000000000000000000000000000000000000
--- a/docs/en/restful_api.md
+++ /dev/null
@@ -1,155 +0,0 @@
-# Restful API
-
-### Launch Service
-
-```shell
-lmdeploy serve api_server ./workspace --server_name 0.0.0.0 --server_port ${server_port} --instance_num 32 --tp 1
-```
-
-Then, the user can open the swagger UI: `http://{server_ip}:{server_port}` for the detailed api usage.
-We provide four restful api in total. Three of them are in OpenAI format. However, we recommend users try
-our own api which provides more arguments for users to modify. The performance is comparatively better.
-
-### python
-
-Here is an example for our own api `generate`.
-
-```python
-import json
-import requests
-from typing import Iterable, List
-
-
-def get_streaming_response(prompt: str,
-                           api_url: str,
-                           session_id: int,
-                           request_output_len: int,
-                           stream: bool = True,
-                           sequence_start: bool = True,
-                           sequence_end: bool = True,
-                           ignore_eos: bool = False) -> Iterable[List[str]]:
-    headers = {'User-Agent': 'Test Client'}
-    pload = {
-        'prompt': prompt,
-        'stream': stream,
-        'session_id': session_id,
-        'request_output_len': request_output_len,
-        'sequence_start': sequence_start,
-        'sequence_end': sequence_end,
-        'ignore_eos': ignore_eos
-    }
-    response = requests.post(
-        api_url, headers=headers, json=pload, stream=stream)
-    for chunk in response.iter_lines(
-            chunk_size=8192, decode_unicode=False, delimiter=b'\n'):
-        if chunk:
-            data = json.loads(chunk.decode('utf-8'))
-            output = data['text']
-            tokens = data['tokens']
-            yield output, tokens
-
-
-for output, tokens in get_streaming_response(
-        "Hi, how are you?", "http://{server_ip}:{server_port}/generate", 0,
-        512):
-    print(output, end='')
-```
-
-### Java/Golang/Rust
-
-May use [openapi-generator-cli](https://github.com/OpenAPITools/openapi-generator-cli) to convert `http://{server_ip}:{server_port}/openapi.json` to java/rust/golang client.
-Here is an example:
-
-```shell
-$ docker run -it --rm -v ${PWD}:/local openapitools/openapi-generator-cli generate -i /local/openapi.json -g rust -o /local/rust
-
-$ ls rust/*
-rust/Cargo.toml  rust/git_push.sh  rust/README.md
-
-rust/docs:
-ChatCompletionRequest.md  EmbeddingsRequest.md  HttpValidationError.md  LocationInner.md  Prompt.md
-DefaultApi.md             GenerateRequest.md    Input.md                Messages.md       ValidationError.md
-
-rust/src:
-apis  lib.rs  models
-```
-
-### cURL
-
-cURL is a tool for observing the output of the api.
-
-List Models:
-
-```bash
-curl http://{server_ip}:{server_port}/v1/models
-```
-
-Generate:
-
-```bash
-curl http://{server_ip}:{server_port}/generate \
-  -H "Content-Type: application/json" \
-  -d '{
-    "prompt": "Hello! How are you?",
-    "session_id": 1,
-    "sequence_start": true,
-    "sequence_end": true
-  }'
-```
-
-Chat Completions:
-
-```bash
-curl http://{server_ip}:{server_port}/v1/chat/completions \
-  -H "Content-Type: application/json" \
-  -d '{
-    "model": "internlm-chat-7b",
-    "messages": [{"role": "user", "content": "Hello! Ho are you?"}]
-  }'
-```
-
-Embeddings:
-
-```bash
-curl http://{server_ip}:{server_port}/v1/embeddings \
-  -H "Content-Type: application/json" \
-  -d '{
-    "model": "internlm-chat-7b",
-    "input": "Hello world!"
-  }'
-```
-
-### CLI client
-
-There is a client script for restful api server.
-
-```shell
-# restful_api_url is what printed in api_server.py, e.g. http://localhost:23333
-lmdeploy serve api_client restful_api_url
-```
-
-### webui
-
-You can also test restful-api through webui.
-
-```shell
-# restful_api_url is what printed in api_server.py, e.g. http://localhost:23333
-# server_ip and server_port here are for gradio ui
-# example: lmdeploy serve gradio http://localhost:23333 --server_name localhost --server_port 6006 --restful_api True
-lmdeploy serve gradio restful_api_url --server_name ${server_ip} --server_port ${server_port} --restful_api True
-```
-
-### FAQ
-
-1. When user got `"finish_reason":"length"` which means the session is too long to be continued.
-   Please add `"renew_session": true` into the next request.
-
-2. When OOM appeared at the server side, please reduce the number of `instance_num` when lanching the service.
-
-3. When the request with the same `session_id` to `generate` got a empty return value and a negative `tokens`, please consider setting `sequence_start=false` for the second question and the same for the afterwards.
-
-4. Requests were previously being handled sequentially rather than concurrently. To resolve this issue,
-
-   - kindly provide unique session_id values when calling the `generate` API or else your requests may be associated with client IP addresses
-
-5. Both `generate` api and `v1/chat/completions` upport engaging in multiple rounds of conversation, where input `prompt` or `messages` consists of either single strings or entire chat histories.These inputs are interpreted using multi-turn dialogue modes. However, ff you want to turn the mode of and manage the chat history in clients, please the parameter `sequence_end: true` when utilizing the `generate` function, or specify `renew_session: true` when making use of `v1/chat/completions`
diff --git a/docs/en/serving.md b/docs/en/serving.md
deleted file mode 100644
index 6cc18018d08e9a9bf874b09ca264e2ae19593620..0000000000000000000000000000000000000000
--- a/docs/en/serving.md
+++ /dev/null
@@ -1,117 +0,0 @@
-# Serving a model
-
-## Serving [LLaMA-2](https://github.com/facebookresearch/llama)
-
-You can download [llama-2 models from huggingface](https://huggingface.co/meta-llama) and serve them like below:
-
-<details open>
-<summary><b>7B</b></summary>
-
-```shell
-lmdeploy convert llama2 /path/to/llama-2-7b-chat-hf
-bash workspace/service_docker_up.sh
-```
-
-</details>
-
-<details open>
-<summary><b>13B</b></summary>
-
-```shell
-lmdeploy convert llama2 /path/to/llama-2-13b-chat-hf --tp 2
-bash workspace/service_docker_up.sh
-```
-
-</details>
-
-<details open>
-<summary><b>70B</b></summary>
-
-```shell
-lmdeploy convert llama2 /path/to/llama-2-70b-chat-hf --tp 8
-bash workspace/service_docker_up.sh
-```
-
-</details>
-
-## Serving [LLaMA](https://github.com/facebookresearch/llama)
-
-Weights for the LLaMA models can be obtained from by filling out [this form](https://docs.google.com/forms/d/e/1FAIpQLSfqNECQnMkycAp2jP4Z9TFX0cGR4uf7b_fBxjY_OjhJILlKGA/viewform)
-
-<details open>
-<summary><b>7B</b></summary>
-
-```shell
-lmdeploy convert llama /path/to/llama-7b llama \
-    --tokenizer_path /path/to/tokenizer/model
-bash workspace/service_docker_up.sh
-```
-
-</details>
-
-<details open>
-<summary><b>13B</b></summary>
-
-```shell
-lmdeploy convert llama /path/to/llama-13b llama \
-    --tokenizer_path /path/to/tokenizer/model --tp 2
-bash workspace/service_docker_up.sh
-```
-
-</details>
-
-<details open>
-<summary><b>30B</b></summary>
-
-```shell
-lmdeploy convert llama /path/to/llama-30b llama \
-    --tokenizer_path /path/to/tokenizer/model --tp 4
-bash workspace/service_docker_up.sh
-```
-
-</details>
-
-<details open>
-<summary><b>65B</b></summary>
-
-```shell
-lmdeploy convert llama /path/to/llama-65b llama \
-    --tokenizer_path /path/to/tokenizer/model --tp 8
-bash workspace/service_docker_up.sh
-```
-
-</details>
-
-### Serving [Vicuna](https://lmsys.org/blog/2023-03-30-vicuna/)
-
-<details open>
-<summary><b>7B</b></summary>
-
-```shell
-python3 -m pip install fschat
-python3 -m fastchat.model.apply_delta \
-  --base-model-path /path/to/llama-7b \
-  --target-model-path /path/to/vicuna-7b \
-  --delta-path lmsys/vicuna-7b-delta-v1.1
-
-lmdeploy convert vicuna /path/to/vicuna-7b
-bash workspace/service_docker_up.sh
-```
-
-</details>
-
-<details open>
-<summary><b>13B</b></summary>
-
-```shell
-python3 -m pip install fschat
-python3 -m fastchat.model.apply_delta \
-  --base-model-path /path/to/llama-13b \
-  --target-model-path /path/to/vicuna-13b \
-  --delta-path lmsys/vicuna-13b-delta-v1.1
-
-lmdeploy convert vicuna /path/to/vicuna-13b
-bash workspace/service_docker_up.sh
-```
-
-</details>
diff --git a/docs/en/supported_models/codellama.md b/docs/en/supported_models/codellama.md
deleted file mode 100644
index 886dc5922c234d02d502f1254fc786e7318495bd..0000000000000000000000000000000000000000
--- a/docs/en/supported_models/codellama.md
+++ /dev/null
@@ -1,112 +0,0 @@
-# codellama
-
-## Introduction
-
-[codellama](https://github.com/facebookresearch/codellama) features enhanced coding capabilities. It can generate code and natural language about code, from both code and natural language prompts (e.g., “Write me a function that outputs the fibonacci sequence”). It can also be used for code completion and debugging. It supports many of the most popular programming languages used today, including Python, C++, Java, PHP, Typescript (Javascript), C#, Bash and more.
-
-There are three sizes (7b, 13b, 34b) as well as three flavours (base model, Python fine-tuned, and instruction tuned) released on [HuggingFace](https://huggingface.co/codellama).
-
-| Base Model                                                                      | Python                                                                                        | Instruct                                                                                          |
-| ------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------- |
-| [codellama/CodeLlama-7b-hf](https://huggingface.co/codellama/CodeLlama-7b-hf)   | [codellama/CodeLlama-7b-Python-hf](https://huggingface.co/codellama/CodeLlama-7b-Python-hf)   | [codellama/CodeLlama-7b-Instruct-hf](https://huggingface.co/codellama/CodeLlama-7b-Instruct-hf)   |
-| [codellama/CodeLlama-13b-hf](https://huggingface.co/codellama/CodeLlama-13b-hf) | [codellama/CodeLlama-13b-Python-hf](https://huggingface.co/codellama/CodeLlama-13b-Python-hf) | [codellama/CodeLlama-13b-Instruct-hf](https://huggingface.co/codellama/CodeLlama-13b-Instruct-hf) |
-| [codellama/CodeLlama-34b-hf](https://huggingface.co/codellama/CodeLlama-34b-hf) | [codellama/CodeLlama-34b-Python-hf](https://huggingface.co/codellama/CodeLlama-34b-Python-hf) | [codellama/CodeLlama-34b-Instruct-hf](https://huggingface.co/codellama/CodeLlama-34b-Instruct-hf) |
-
-The correspondence between the model and capabilities is:
-
-| models     | code completion | infilling         | instructions / chat | python specialist |
-| ---------- | --------------- | ----------------- | ------------------- | ----------------- |
-| Base Model | Y               | Y(7B,13B), N(34B) | N                   | N                 |
-| Python     | Y               | N                 | N                   | Y                 |
-| Instruct   | Y               | Y(7B,13B), N(34B) | Y                   | N                 |
-
-## Inference
-
-Based on the above table, download the model that meets your requirements. Execute the following command to interact with the model in the console:
-
-```shell
-# install lmdeploy
-python3 -m pip install lmdeploy
-
-# convert weight layout
-lmdeploy convert codellama /the/path/of/codellama/model
-```
-
-Then, you can communicate with codellama in consolo by following instructions in next sections
-
-**Note**:
-
-- minimum requirement of `transformers` is **v4.33.0**
-- lmdeploy supports copying code blocks to the console. But you have to press enter, input "!!" and press enter again to end the prompt. The way to input prompt for other supported models keeps unchanged, i.e., double pressing enter.
-
-### Completion
-
-```shell
-lmdeploy chat turbomind ./workspace --cap completion
-```
-
-### Infilling
-
-```shell
-lmdeploy chat turbomind ./workspace --cap infilling
-```
-
-The input code is supposed to have a special placeholder `<FILL>`. For example,
-
-```
-def remove_non_ascii(s: str) -> str:
-    """ <FILL>
-    return result
-```
-
-And the generated code piece by `turbomind.chat` is the one to be filled in `<FILL>`
-
-### Chat
-
-```
-lmdeploy chat turbomind ./workspace --cap chat --sys-instruct "Provide answers in Python"
-```
-
-`--sys-instruct` instruction can be changed to other coding languages as long as codellama supports it
-
-### Python specialist
-
-```
-lmdeploy chat turbomind ./workspace --cap python
-```
-
-Python fine-tuned model is highly recommended when 'python specialist' capability is required.
-
-## Quantization
-
-TBD
-
-## Serving
-
-**LMDeploy server only supports `chat` capabllity**. The res ones are going to be supported soon.
-
-Launch inference server by:
-
-```shell
-# --instance_num: number of instances to performance inference, which can be viewed as max requests concurrency
-# --tp: the number of GPUs used in tensor parallelism
-lmdeploy serve api_server ./workspace --server_name ${server_ip} --server_port ${server_port} --instance_num 32 --tp 1
-```
-
-Then, you can communicate with it by command line,
-
-```shell
-# restful_api_url is what printed in api_server.py, e.g. http://localhost:23333
-lmdeploy serve api_client restful_api_url
-```
-
-or through webui after launching gradio,
-
-```shell
-# restful_api_url is what printed in api_server.py, e.g. http://localhost:23333
-# server_ip and server_port here are for gradio ui
-# example: lmdeploy serve gradio http://localhost:23333 --server_name localhost --server_port 6006 --restful_api True
-lmdeploy serve gradio restful_api_url --server_name ${server_ip} --server_port ${server_port} --restful_api True
-```
-
-Regarding the detailed information of RESTful API, you can refer to [restful_api.md](../restful_api.md).
diff --git a/docs/en/switch_language.md b/docs/en/switch_language.md
deleted file mode 100644
index 544f09d0779313148ae772449b532748650596ca..0000000000000000000000000000000000000000
--- a/docs/en/switch_language.md
+++ /dev/null
@@ -1,3 +0,0 @@
-## <a href='https://lmdeploy.readthedocs.io/en/latest/'>English</a>
-
-## <a href='https://lmdeploy.readthedocs.io/zh_CN/latest/'>简体中文</a>
diff --git a/docs/en/turbomind.md b/docs/en/turbomind.md
deleted file mode 100644
index 40fd2e31b4022493da29bd13982374c99096c074..0000000000000000000000000000000000000000
--- a/docs/en/turbomind.md
+++ /dev/null
@@ -1,70 +0,0 @@
-# Architecture of TurboMind
-
-TurboMind is an inference engine that supports high throughput inference for conversational LLMs. It's based on NVIDIA's [FasterTransformer](https://github.com/NVIDIA/FasterTransformer). Major features of TurboMind include an efficient LLaMa implementation, the persistent batch inference model and an extendable KV cache manager.
-
-## High level overview of TurboMind
-
-```
-  +--------------------+
-  |        API         |
-  +--------------------+
-          |    ^
-  request |    | stream callback
-          v    |
-  +--------------------+   fetch   +-------------------+
-  |  Persistent Batch  | <-------> |  KV Cache Manager |
-  +--------------------+   update  +-------------------+
-             ^
-             |
-             v
-+------------------------+
-|  LLaMA implementation  |
-+------------------------+
-| FT kernels & utilities |
-+------------------------+
-```
-
-## Persistent Batch
-
-You may recognize this feature as "continuous batching" in other repos. But during the concurrent development of the feature, we modeled the inference of a conversational LLM as a persistently running batch whose lifetime spans the entire serving process, hence the name "persistent batch". To put it simply
-
-- The persistent batch as N pre-configured batch slots.
-- Requests join the batch when there are free slots available. A batch slot is released and can be reused once the generation of the requested tokens is finished.
-- __On cache-hits (see below), history tokens don't need to be decoded in every round of a conversation; generation of response tokens will start instantly.__
-- The batch grows or shrinks automatically to minimize unnecessary computations.
-
-## KV Cache Manager
-
-The [KV cache manager](https://github.com/InternLM/lmdeploy/blob/main/src/turbomind/models/llama/LlamaCacheManager.h) of TurboMind is a memory-pool-liked object that also implements LRU policy so that it can be viewed as a form of __cache of KV caches__. It works in the following way
-
-- All device memory required for KV cache is allocated by the manager. A fixed number of slots is pre-configured to match the memory size of the system. Each slot corresponds to the memory required by the KV cache of a single sequence. Allocation chunk-size can be configure to implement pre-allocate/on-demand style allocation policy (or something in-between).
-- When space for the KV cache of a new sequence is requested but no free slots left in the pool, the least recently used sequence is evicted from the cache and its device memory is directly reused by the new sequence. However, this is not the end of the story.
-- Fetching sequence currently resides in one of the slots resembles a _cache-hit_, the history KV cache is returned directly and no context decoding is needed.
-- Victim (evicted) sequences are not erased entirely but converted to its most compact form, i.e. token IDs. When the same sequence id is fetched later (_cache-miss_) the token IDs will be decoded by FMHA backed context decoder and converted back to KV cache.
-- The eviction and conversion are handled automatically inside TurboMind and thus transparent to the users. __From the user's aspect, system that use TurboMind has access to infinite device memory.__
-
-## LLaMa implementation
-
-Our implementation of the LLaMa family models is modified from Gpt-NeoX model in FasterTransformer. In addition to basic refactoring and modifications to support the LLaMa family, we made some improvements to enable high performance inference of conversational models, most importantly:
-
-- To support fast context decoding in multi-round conversations. We replaced the attention implementation in context decoder with a [cutlass](https://github.com/NVIDIA/cutlass)-based FMHA implementation that supports mismatched Q/K lengths.
-- We introduced indirect buffer pointers in both context FMHA and generation FMHA to support the discontinuity in KV cache within the batch.
-- To support concurrent inference with persistent batch, new synchronization mechanism was designed to orchestrate the worker threads running in tensor parallel mode.
-- To maximize the throughput, we implement INT8 KV cache support to increase the max batch size. It's effective because in real-world serving scenarios, KV cache costs more memory and consumes more memory bandwidth than weights or other activations.
-- We resolved an NCCL hang issue when running multiple model instances in TP mode within a single process, NCCL APIs are now guarded by host-side synchronization barriers.
-
-## API
-
-TurboMind supports a Python API that enables streaming output and tensor parallel mode.
-
-The ability to use [tritonserver](https://github.com/triton-inference-server/server) for serving is also inherited from FasterTransformer. However, to support submitting concurrent requests into our persistent batch model, we no longer use sequence batching or dynamic batching as FasterTransformer does. The bookkeeping of request and sequence states are managed by TurboMind instead.
-
-## Difference between FasterTransformer and TurboMind
-
-Apart of the features described above, there are still many minor differences that we don't cover in this document. Notably, many capabilities of FT are dropped in TurboMind because of the difference in objectives (e.g. prefix prompt, beam search, context embedding, sparse GEMM, GPT/T5/other model families, etc)
-
-## FAQ
-
-### Supporting Huggingface models
-
-For historical reasons, TurboMind's weight layout is based on [the original LLaMa implementation](https://github.com/facebookresearch/llama) (differ only by a transpose). The implementation in huggingface transformers uses a [different layout](https://github.com/huggingface/transformers/blob/45025d92f815675e483f32812caa28cce3a960e7/src/transformers/models/llama/convert_llama_weights_to_hf.py#L123C76-L123C76) for `W_q` and `W_k` which is handled in [deploy.py](https://github.com/InternLM/lmdeploy/blob/ff4648a1d09e5aec74cf70efef35bfaeeac552e0/lmdeploy/serve/turbomind/deploy.py#L398).
diff --git a/docs/en/w4a16.md b/docs/en/w4a16.md
deleted file mode 100644
index dc70c23c2c0f1da2a10a9404cad0e674c550841e..0000000000000000000000000000000000000000
--- a/docs/en/w4a16.md
+++ /dev/null
@@ -1,105 +0,0 @@
-# W4A16 LLM Model Deployment
-
-LMDeploy supports LLM model inference of 4-bit weight, with the minimum requirement for NVIDIA graphics cards being sm80, such as A10, A100, Geforce 30/40 series.
-
-Before proceeding with the inference, please ensure that lmdeploy is installed.
-
-```shell
-pip install lmdeploy
-```
-
-## 4-bit LLM model Inference
-
-You can download the pre-quantized 4-bit weight models from LMDeploy's [model zoo](https://huggingface.co/lmdeploy) and conduct inference using the following command.
-
-Alternatively, you can quantize 16-bit weights to 4-bit weights following the ["4-bit Weight Quantization"](#4-bit-weight-quantization) section, and then perform inference as per the below instructions.
-
-Take the 4-bit Llama-2-chat-7B model from the model zoo as an example:
-
-```shell
-git-lfs install
-git clone https://huggingface.co/lmdeploy/llama2-chat-7b-w4
-```
-
-As demonstrated in the command below, first convert the model's layout using `turbomind.deploy`, and then you can interact with the AI assistant in the terminal
-
-```shell
-
-## Convert the model's layout and store it in the default path, ./workspace.
-lmdeploy convert \
-    --model-name llama2 \
-    --model-path ./llama2-chat-7b-w4 \
-    --model-format awq \
-    --group-size 128
-
-## inference
-lmdeploy chat turbomind ./workspace
-```
-
-## Serve with gradio
-
-If you wish to interact with the model via web ui, please initiate the gradio server as indicated below:
-
-```shell
-lmdeploy serve gradio ./workspace --server_name {ip_addr} --server_port {port}
-```
-
-Subsequently, you can open the website `http://{ip_addr}:{port}` in your browser and interact with the model
-
-## Inference Performance
-
-We benchmarked the Llama-2-7B-chat and Llama-2-13B-chat models with 4-bit quantization on NVIDIA GeForce RTX 4090 using [profile_generation.py](https://github.com/InternLM/lmdeploy/blob/main/benchmark/profile_generation.py). And we measure the token generation throughput (tokens/s) by setting a single prompt token and generating 512 tokens. All the results are measured for single batch inference.
-
-| model            | llm-awq | mlc-llm | turbomind |
-| ---------------- | ------- | ------- | --------- |
-| Llama-2-7B-chat  | 112.9   | 159.4   | 206.4     |
-| Llama-2-13B-chat | N/A     | 90.7    | 115.8     |
-
-Memory (GB) comparison results between 4-bit and 16-bit model with context size 2048 and 4096 respectively,
-
-| model            | 16bit(2048) | 4bit(2048) | 16bit(4096) | 4bit(4096) |
-| ---------------- | ----------- | ---------- | ----------- | ---------- |
-| Llama-2-7B-chat  | 15.1        | 6.3        | 16.2        | 7.5        |
-| Llama-2-13B-chat | OOM         | 10.3       | OOM         | 12.0       |
-
-```
-pip install nvidia-ml-py
-```
-
-```shell
-python benchmark/profile_generation.py \
- --model-path ./workspace \
- --concurrency 1 8 --prompt-tokens 1 512 --completion-tokens 2048 512
-```
-
-## 4-bit Weight Quantization
-
-It includes two steps:
-
-- generate quantization parameter
-- quantize model according to the parameter
-
-### Step 1: Generate Quantization Parameter
-
-```shell
-lmdeploy lite calibrate \
-  --model $HF_MODEL \
-  --calib_dataset 'c4' \             # Calibration dataset, supports c4, ptb, wikitext2, pileval
-  --calib_samples 128 \              # Number of samples in the calibration set, if memory is insufficient, you can appropriately reduce this
-  --calib_seqlen 2048 \              # Length of a single piece of text, if memory is insufficient, you can appropriately reduce this
-  --work_dir $WORK_DIR \             # Folder storing Pytorch format quantization statistics parameters and post-quantization weight
-```
-
-### Step2: Quantize Weights
-
-LMDeploy employs AWQ algorithm for model weight quantization.
-
-```shell
-lmdeploy lite auto_awq \
-  --model $HF_MODEL \
-  --w_bits 4 \                       # Bit number for weight quantization
-  --w_group_size 128 \               # Group size for weight quantization statistics
-  --work_dir $WORK_DIR \             # Directory saving quantization parameters from Step 1
-```
-
-After the quantization is complete, the quantized model is saved to `$WORK_DIR`. Then you can proceed with model inference according to the instructions in the ["4-Bit Weight Model Inference"](#4-bit-llm-model-inference) section.
diff --git a/docs/zh_cn/Makefile b/docs/zh_cn/Makefile
deleted file mode 100644
index 51285967a7d9722c5bdee4f6a81c154a56aa0846..0000000000000000000000000000000000000000
--- a/docs/zh_cn/Makefile
+++ /dev/null
@@ -1,19 +0,0 @@
-# Minimal makefile for Sphinx documentation
-#
-
-# You can set these variables from the command line.
-SPHINXOPTS    =
-SPHINXBUILD   = sphinx-build
-SOURCEDIR     = .
-BUILDDIR      = _build
-
-# Put it first so that "make" without argument is like "make help".
-help:
-	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
-
-.PHONY: help Makefile
-
-# Catch-all target: route all unknown targets to Sphinx using the new
-# "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
-%: Makefile
-	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
diff --git a/docs/zh_cn/_static/css/readthedocs.css b/docs/zh_cn/_static/css/readthedocs.css
deleted file mode 100644
index d767dd8d3b5d65cd055da5db1d1c24bcb8f2b44d..0000000000000000000000000000000000000000
--- a/docs/zh_cn/_static/css/readthedocs.css
+++ /dev/null
@@ -1,6 +0,0 @@
-.header-logo {
-    background-image: url("../image/lmdeploy-logo.png");
-    background-size: 150px 60px;
-    height: 60px;
-    width: 150px;
-}
diff --git a/docs/zh_cn/_static/image/lmdeploy-logo.png b/docs/zh_cn/_static/image/lmdeploy-logo.png
deleted file mode 120000
index f1bdecd8c0f1c64961355bfb5d57ca72ae07db90..0000000000000000000000000000000000000000
--- a/docs/zh_cn/_static/image/lmdeploy-logo.png
+++ /dev/null
@@ -1 +0,0 @@
-resources/lmdeploy-logo.png
\ No newline at end of file
diff --git a/docs/zh_cn/api.rst b/docs/zh_cn/api.rst
deleted file mode 100644
index ed1ddce2bfc0faf316ffb9b312a18bb8fb428663..0000000000000000000000000000000000000000
--- a/docs/zh_cn/api.rst
+++ /dev/null
@@ -1,14 +0,0 @@
-lmdeploy.lite
--------------
-.. automodule:: lmdeploy.lite
-    :members:
-
-lmdeploy.pytorch
-----------------
-.. automodule:: lmdeploy.pytorch
-    :members:
-
-lmdeploy.serve
---------------
-.. automodule:: lmdeploy.serve
-    :members:
diff --git a/docs/zh_cn/build.md b/docs/zh_cn/build.md
deleted file mode 100644
index a73296354b71c0370b4d592c5fd4ea0d8dcc575b..0000000000000000000000000000000000000000
--- a/docs/zh_cn/build.md
+++ /dev/null
@@ -1,79 +0,0 @@
-# 编译和安装
-
-LMDeploy 提供了预编译包，可以很方便的通过 `pip install lmdeploy` 安装和使用。
-
-如果有源码编译的需求，请先下载 lmdeploy 源码：
-
-```shell
-git clone --depth=1 https://github.com/InternLM/lmdeploy
-```
-
-然后，参考以下章节编译和安装。
-
-## 在 docker 内编译安装（强烈推荐）
-
-LMDeploy 提供了编译镜像 `openmmlab/lmdeploy-builder:cuda11.8`。使用之前，请确保 docker 已安装。
-
-在 lmdeploy 源码的根目录下，运行以下命令：
-
-```shell
-cd lmdeploy # lmdeploy 源码根目录
-bash builder/manywheel/build_all_wheel.sh
-```
-
-即可在 `builder/manywheel/cuda11.8_dist` 文件夹下，得到 lmdeploy 在 py3.8 - py3.11 下所有的 wheel 文件。比如，
-
-```text
-builder/manywheel/cuda11.8_dist/
-├── lmdeploy-0.0.12-cp310-cp310-manylinux2014_x86_64.whl
-├── lmdeploy-0.0.12-cp311-cp311-manylinux2014_x86_64.whl
-├── lmdeploy-0.0.12-cp38-cp38-manylinux2014_x86_64.whl
-└── lmdeploy-0.0.12-cp39-cp39-manylinux2014_x86_64.whl
-```
-
-如果需要固定 python 版本的 wheel 文件，比如 py3.8，可以执行：
-
-```shell
-bash builder/manywheel/build_wheel.sh py38 manylinux2014_x86_64 cuda11.8 cuda11.8_dist
-```
-
-wheel 文件存放在目录 `builder/manywheel/cuda11.8_dist` 下。
-
-在宿主机上，通过 `pip install` 安装和宿主机python版本一致的 wheel 文件，即完成 lmdeploy 整个编译安装过程。
-
-## 在物理机上编译安装（可选）
-
-首先，请确保物理机环境的 gcc 版本不低于 9，可以通过`gcc --version`确认。
-
-然后，按如下步骤，配置编译环境：
-
-- 安装编译和运行依赖包：
-  ```shell
-  pip install -r requirements.txt
-  apt-get install rapidjson-dev
-  ```
-- 安装 [nccl](https://docs.nvidia.com/deeplearning/nccl/install-guide/index.html),设置环境变量
-  ```shell
-  export NCCL_ROOT_DIR=/path/to/nccl/build
-  export NCCL_LIBRARIES=/path/to/nccl/build/lib
-  ```
-- 源码编译安装 openmpi:
-  ```shell
-  wget https://download.open-mpi.org/release/open-mpi/v4.1/openmpi-4.1.5.tar.gz
-  tar xf openmpi-4.1.5.tar.gz
-  cd openmpi-4.1.5
-  ./configure
-  make -j$(nproc) && make install
-  ```
-- lmdeploy 编译安装:
-  ```shell
-  cd lmdeploy # lmdeploy 源码的根目录
-  mkdir build && cd build
-  sh ../generate.sh
-  make -j$(nproc) && make install
-  ```
-- 安装 lmdeploy python package:
-  ```shell
-  cd ..
-  pip install -e .
-  ```
diff --git a/docs/zh_cn/conf.py b/docs/zh_cn/conf.py
deleted file mode 100644
index 5117235fd1c89d5ce998b1ed447be090dfb07747..0000000000000000000000000000000000000000
--- a/docs/zh_cn/conf.py
+++ /dev/null
@@ -1,224 +0,0 @@
-#
-# Configuration file for the Sphinx documentation builder.
-#
-# This file does only contain a selection of the most common options. For a
-# full list see the documentation:
-# http://www.sphinx-doc.org/en/master/config
-
-# -- Path setup --------------------------------------------------------------
-
-# If extensions (or modules to document with autodoc) are in another directory,
-# add these directories to sys.path here. If the directory is relative to the
-# documentation root, use os.path.abspath to make it absolute, like shown here.
-#
-import os
-import sys
-
-import pytorch_sphinx_theme
-from m2r import MdInclude
-from recommonmark.transform import AutoStructify
-from sphinx.builders.html import StandaloneHTMLBuilder
-
-sys.path.insert(0, os.path.abspath('../..'))
-
-version_file = '../../lmdeploy/version.py'
-with open(version_file, 'r') as f:
-    exec(compile(f.read(), version_file, 'exec'))
-__version__ = locals()['__version__']
-
-# -- Project information -----------------------------------------------------
-
-project = 'lmdeploy'
-copyright = '2021-2024, OpenMMLab'
-author = 'LMDeploy Authors'
-
-# The short X.Y version
-version = __version__
-# The full version, including alpha/beta/rc tags
-release = __version__
-
-# -- General configuration ---------------------------------------------------
-
-# If your documentation needs a minimal Sphinx version, state it here.
-#
-# needs_sphinx = '1.0'
-
-# Add any Sphinx extension module names here, as strings. They can be
-# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
-# ones.
-
-extensions = [
-    'sphinx.ext.autodoc',
-    'sphinx.ext.doctest',
-    'sphinx.ext.napoleon',
-    'sphinx.ext.viewcode',
-    'sphinx.ext.autosectionlabel',
-    'sphinx_markdown_tables',
-    'myst_parser',
-    'sphinx_copybutton',
-    'sphinxcontrib.mermaid'
-]  # yapf: disable
-
-autodoc_mock_imports = []
-
-autosectionlabel_prefix_document = True
-
-# Add any paths that contain templates here, relative to this directory.
-templates_path = ['_templates']
-
-# The suffix(es) of source filenames.
-# You can specify multiple suffix as a list of string:
-#
-source_suffix = {
-    '.rst': 'restructuredtext',
-    '.md': 'markdown',
-}
-
-# The master toctree document.
-master_doc = 'index'
-
-# The language for content autogenerated by Sphinx. Refer to documentation
-# for a list of supported languages.
-#
-# This is also used if you do content translation via gettext catalogs.
-# Usually you set "language" from the command line for these cases.
-language = 'zh_CN'
-
-# List of patterns, relative to source directory, that match files and
-# directories to ignore when looking for source files.
-# This pattern also affects html_static_path and html_extra_path.
-exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
-
-# The name of the Pygments (syntax highlighting) style to use.
-pygments_style = 'sphinx'
-
-# -- Options for HTML output -------------------------------------------------
-
-# The theme to use for HTML and HTML Help pages.  See the documentation for
-# a list of builtin themes.
-#
-# html_theme = 'sphinx_rtd_theme'
-html_theme = 'pytorch_sphinx_theme'
-html_theme_path = [pytorch_sphinx_theme.get_html_theme_path()]
-
-# Theme options are theme-specific and customize the look and feel of a theme
-# further.  For a list of options available for each theme, see the
-# documentation.
-#
-html_theme_options = {
-    'logo_url': 'https://lmdeploy.readthedocs.io/zh_CN/latest/',
-    'menu': [{
-        'name': 'GitHub',
-        'url': 'https://github.com/open-mmlab/lmdeploy'
-    }],
-    'menu_lang': 'cn',
-}
-
-# Add any paths that contain custom static files (such as style sheets) here,
-# relative to this directory. They are copied after the builtin static files,
-# so a file named "default.css" will overwrite the builtin "default.css".
-html_static_path = ['_static']
-html_css_files = ['css/readthedocs.css']
-
-# Enable ::: for my_st
-myst_enable_extensions = ['colon_fence']
-myst_heading_anchors = 5
-
-# Custom sidebar templates, must be a dictionary that maps document names
-# to template names.
-#
-# The default sidebars (for documents that don't match any pattern) are
-# defined by theme itself.  Builtin themes are using these templates by
-# default: ``['localtoc.html', 'relations.html', 'sourcelink.html',
-# 'searchbox.html']``.
-#
-# html_sidebars = {}
-
-# -- Options for HTMLHelp output ---------------------------------------------
-
-# Output file base name for HTML help builder.
-htmlhelp_basename = 'lmdeploydoc'
-
-# -- Options for LaTeX output ------------------------------------------------
-
-latex_elements = {
-    # The paper size ('letterpaper' or 'a4paper').
-    #
-    # 'papersize': 'letterpaper',
-
-    # The font size ('10pt', '11pt' or '12pt').
-    #
-    # 'pointsize': '10pt',
-
-    # Additional stuff for the LaTeX preamble.
-    #
-    # 'preamble': '',
-
-    # Latex figure (float) alignment
-    #
-    # 'figure_align': 'htbp',
-}
-
-# Grouping the document tree into LaTeX files. List of tuples
-# (source start file, target name, title,
-#  author, documentclass [howto, manual, or own class]).
-latex_documents = [
-    (master_doc, 'lmdeploy.tex', 'lmdeploy Documentation',
-     'LMDeploy Contributors', 'manual'),
-]
-
-# -- Options for manual page output ------------------------------------------
-
-# One entry per manual page. List of tuples
-# (source start file, name, description, authors, manual section).
-man_pages = [(master_doc, 'lmdeploy', 'lmdeploy Documentation', [author], 1)]
-
-# -- Options for Texinfo output ----------------------------------------------
-
-# Grouping the document tree into Texinfo files. List of tuples
-# (source start file, target name, title, author,
-#  dir menu entry, description, category)
-texinfo_documents = [
-    (master_doc, 'lmdeploy', 'lmdeploy Documentation', author, 'lmdeploy',
-     'One line description of project.', 'Miscellaneous'),
-]
-
-# -- Options for Epub output -------------------------------------------------
-
-# Bibliographic Dublin Core info.
-epub_title = project
-
-# The unique identifier of the text. This can be a ISBN number
-# or the project homepage.
-#
-# epub_identifier = ''
-
-# A unique identification for the text.
-#
-# epub_uid = ''
-
-# A list of files that should not be packed into the epub file.
-epub_exclude_files = ['search.html']
-
-# set priority when building html
-StandaloneHTMLBuilder.supported_image_types = [
-    'image/svg+xml', 'image/gif', 'image/png', 'image/jpeg'
-]
-
-# -- Extension configuration -------------------------------------------------
-# Ignore >>> when copying code
-copybutton_prompt_text = r'>>> |\.\.\. '
-copybutton_prompt_is_regexp = True
-
-
-def setup(app):
-    app.add_config_value('no_underscore_emphasis', False, 'env')
-    app.add_config_value('m2r_parse_relative_links', False, 'env')
-    app.add_config_value('m2r_anonymous_references', False, 'env')
-    app.add_config_value('m2r_disable_inline_math', False, 'env')
-    app.add_directive('mdinclude', MdInclude)
-    app.add_config_value('recommonmark_config', {
-        'auto_toc_tree_section': 'Contents',
-        'enable_eval_rst': True,
-    }, True)
-    app.add_transform(AutoStructify)
diff --git a/docs/zh_cn/faq.md b/docs/zh_cn/faq.md
deleted file mode 100644
index 5f3bf0b117b8921388c69a9f5f49d209bf9a0931..0000000000000000000000000000000000000000
--- a/docs/zh_cn/faq.md
+++ /dev/null
@@ -1,56 +0,0 @@
-# 常见问题
-
-## ModuleNotFoundError
-
-### No module named 'mmengine.config.lazy'
-
-可能是因为已经有旧版本的mmengine缓存在了本机。更新到最新班应该可以解决这个问题。
-
-```shell
-pip install --upgrade mmengine
-```
-
-### No module named '\_turbomind'
-
-可能是因为：
-
-1. 您没有安装 lmdeploy 的预编译包。`_turbomind`是 turbomind c++ 的 pybind部分，涉及到编译。推荐您直接安装预编译包。
-
-```
-pip install lmdeploy
-```
-
-2. 如果已经安装了，还是出现这个问题，请检查下执行目录。不要在 lmdeploy 的源码根目录下执行 python -m lmdeploy.turbomind.\*下的package，换到其他目录下执行。
-
-## Libs
-
-### libnccl.so.2 not found
-
-确保通过 `pip install lmdeploy` 安装了 lmdeploy (>=v0.0.5)。
-
-如果安装之后，问题还存在，那么就把`libnccl.so.2`的路径加入到环境变量 LD_LIBRARY_PATH 中。
-
-```shell
-# 获取nvidia-nccl-cu11 package的安装目录
-pip show nvidia-nccl-cu11|grep Location
-# 把"libnccl.so.2"的路径加入到 LD_LIBRARY_PATH
-export LD_LIBRARY_PATH={Location}/nvidia/nccl/lib:$LD_LIBRARY_PATH
-```
-
-### symbol cudaFreeAsync version libcudart.so.11.0 not defined in file libcudart.so.11.0 with link time reference
-
-很可能是机器上的 cuda 版本太低导致的。LMDeploy运行时要求 cuda 不低于 11.2
-
-## Turbomind 推理
-
-## Pytorch 推理
-
-## 服务
-
-## 量化
-
-### RuntimeError: \[enforce fail at inline_container.cc:337\] . unexpected pos 4566829760 vs 4566829656
-
-请检查你的硬盘空间。
-
-这个错误是因为保存权重时硬盘空间不足导致的，在量化 70B 模型时可能会遇到
diff --git a/docs/zh_cn/index.rst b/docs/zh_cn/index.rst
deleted file mode 100644
index 1a1971597f335f268e9a91fd9d4a6bff202e6121..0000000000000000000000000000000000000000
--- a/docs/zh_cn/index.rst
+++ /dev/null
@@ -1,46 +0,0 @@
-欢迎来到 LMDeploy 的中文文档！
-====================================
-
-点击页面左下角切换中英文。
-
-.. toctree::
-   :maxdepth: 2
-   :caption: 编译
-
-   build.md
-
-.. toctree::
-   :maxdepth: 2
-   :caption: 使用PyTorch对话
-
-   pytorch.md
-
-.. toctree::
-   :maxdepth: 2
-   :caption: 量化
-
-   quantization.md
-
-.. toctree::
-   :maxdepth: 2
-   :caption: 服务
-
-   serving.md
-
-.. toctree::
-   :maxdepth: 2
-   :caption: TurboMind
-
-   turbomind.md
-
-.. toctree::
-   :caption: 语言切换
-
-   switch_language.md
-
-
-Indices and tables
-==================
-
-* :ref:`genindex`
-* :ref:`search`
diff --git a/docs/zh_cn/kv_int8.md b/docs/zh_cn/kv_int8.md
deleted file mode 100644
index 75e58e1cbf83a587fbb3ee7031067280cf0e46d1..0000000000000000000000000000000000000000
--- a/docs/zh_cn/kv_int8.md
+++ /dev/null
@@ -1,110 +0,0 @@
-# KV Cache 量化和测试结果
-
-对于最大长度是 2048 的 LLaMa-7B fp16 模型，服务端每创建 1 个并发，都需要大约 1030MB 显存保存 kv_cache，即便是 A100 80G，能服务的用户也非常有限。
-
-为了降低运行时显存，我们实现了 kv cache PTQ 量化，使用的公式如下：
-
-```bash
-zp = (min+max) / 2
-scale = (max-min) / 255
-quant: q = round( (f-zp) / scale)
-dequant: f = q * scale + zp
-```
-
-## 如何开启 KV Cache INT8
-
-### **第一步**
-
-把 huggingface 格式的模型，转成 turbomind 推理格式，得到一个 workspace 目录
-
-```bash
-lmdeploy convert internlm-chat-7b /path/to/internlm-chat-7b
-```
-
-如果已经有 workspace 目录，可以跳过这步。
-
-### **第二步**
-
-通过以下 2 步，获取量化参数
-
-```bash
-# 计算 minmax
-lmdeploy lite calibrate \
-  --model $HF_MODEL \
-  --calib_dataset 'c4' \             # 校准数据集，支持 c4, ptb, wikitext2, pileval
-  --calib_samples 128 \              # 校准集的样本数，如果显存不够，可以适当调小
-  --calib_seqlen 2048 \              # 单条的文本长度，如果显存不够，可以适当调小
-  --work_dir $WORK_DIR \             # 保存 Pytorch 格式量化统计参数和量化后权重的文件夹
-
-# 通过 minmax 获取量化参数
-lmdeploy lite kv_qparams \
-  --work_dir $WORK_DIR  \                             # 上一步的结果
-  --turbomind_dir workspace/triton_models/weights/ \ # 保存量化参数的目录，推理要用
-  --kv_sym False \                                    # 对称量化或非对称量化，默认为 False
-  --num_tp 1  \                                       # Tensor 并行使用的 GPU 数，和 deploy.py 保持一致
-```
-
-`kv_qparams` 会在 `weights` 目录生成 fp32 缩放系数，文件格式是 `numpy.tofile` 产生的二进制。
-
-也可以先把 `turbomind_dir` 设成私有目录，再把缩放系数拷贝进 `workspace/triton_models/weights/`。
-
-### **第三步**
-
-修改 `workspace/triton_models/weights/config.ini`：
-
-- use_context_fmha 改为 0，表示关闭 flashattention
-- quant_policy 设置为 4。表示打开 kv_cache int8
-
-这是因为 flashattention 有 v1、v2 两个版本，kv cache int8 曾经也实现过对称版本。
-
-排列组合需要实现 4 套 kernel，在算法不确定的时候过早优化，对软件来说是场灾难。
-
-### **第四步**
-
-测试聊天效果
-
-```bash
-lmdeploy chat turbomind ./workspace
-```
-
-## 显存测试
-
-测试对象为 [internlm-chat-7b](https://huggingface.co/internlm/internlm-chat-7b-v1_1) 模型。
-测试方法：
-
-1. 使用 `deploy.py` 转换模型，修改 `workspace` 配置中的最大并发数；调整 `llama_config.ini` 中的请求数
-2. 编译执行 `bin/llama_triton_example`，获取 fp16 版本在不同 batch_size 的显存情况
-3. 开启量化，重新执行 `bin/llama_triton_example`，获取 int8 版本在不同 batch_size 显存情况
-
-以下是两个版本的显存对比：
-
-| batch_size | fp16 memory(MiB) | int8 memory(MiB) | diff(MiB) |
-| :--------: | :--------------: | :--------------: | :-------: |
-|     8      |      22337       |      18241       |   -4096   |
-|     16     |      30593       |      22369       |   -8224   |
-|     32     |      47073       |      30625       |  -16448   |
-|     48     |      63553       |      38881       |  -24672   |
-
-相对于直接量化 Weight（如 [GPTQ-for-LLaMa](https://github.com/qwopqwop200/GPTQ-for-LLaMa/)），我们做了两种方案在 7B 模型中的内存增长对比预估，部分数据来自 [llama.cpp](https://github.com/ggerganov/llama.cpp)。
-
-![](../../resources/batch_memory.png)
-
-可以看到，fp16 版本每个并发需要 1030MB 显存，因此量化 kv_cache 能显著降低运行时的显存增长速度。
-
-## 精度测试
-
-测试对象为 [internlm-chat-7b](https://huggingface.co/internlm/internlm-chat-7b-v1_1) 指令模型。
-
-以下是 `kCacheKVInt8` 方法仅从 c4 数据集，随机选择 128 条数据 PTQ 量化。量化前后均使用 [opencompass](https://github.com/InternLM/opencompass) 测试精度。
-
-|     task      |     dataset     |    metric     | int8  | fp16  | diff  |
-| :-----------: | :-------------: | :-----------: | :---: | :---: | :---: |
-|   Language    |   winogrande    |   accuracy    | 60.77 | 61.48 | -0.71 |
-|   Knowledge   |       nq        |     score     | 2.69  | 2.60  | +0.09 |
-|   Reasoning   |      gsm8k      |   accuracy    | 33.28 | 34.72 | -1.44 |
-|   Reasoning   |       bbh       | naive_average | 20.12 | 20.51 | -0.39 |
-| Understanding | openbookqa_fact |   accuracy    | 82.40 | 82.20 | +0.20 |
-| Understanding |   eprstmt-dev   |   accuracy    | 90.62 | 88.75 | +1.87 |
-|    Safety     |   crows_pairs   |   accuracy    | 32.56 | 31.43 | +1.13 |
-
-需要注意的是，`kCacheKVInt8` 和 `WeightInt4` 两种方案可以同时开启。
diff --git a/docs/zh_cn/make.bat b/docs/zh_cn/make.bat
deleted file mode 100644
index 7893348a1b7dbb588983a48e6991282eae7e1b55..0000000000000000000000000000000000000000
--- a/docs/zh_cn/make.bat
+++ /dev/null
@@ -1,35 +0,0 @@
-@ECHO OFF
-
-pushd %~dp0
-
-REM Command file for Sphinx documentation
-
-if "%SPHINXBUILD%" == "" (
-	set SPHINXBUILD=sphinx-build
-)
-set SOURCEDIR=.
-set BUILDDIR=_build
-
-if "%1" == "" goto help
-
-%SPHINXBUILD% >NUL 2>NUL
-if errorlevel 9009 (
-	echo.
-	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
-	echo.installed, then set the SPHINXBUILD environment variable to point
-	echo.to the full path of the 'sphinx-build' executable. Alternatively you
-	echo.may add the Sphinx directory to PATH.
-	echo.
-	echo.If you don't have Sphinx installed, grab it from
-	echo.http://sphinx-doc.org/
-	exit /b 1
-)
-
-%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
-goto end
-
-:help
-%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
-
-:end
-popd
diff --git a/docs/zh_cn/restful_api.md b/docs/zh_cn/restful_api.md
deleted file mode 100644
index 484ab5686180d92729ab35dcdca5adbd1b9969ce..0000000000000000000000000000000000000000
--- a/docs/zh_cn/restful_api.md
+++ /dev/null
@@ -1,159 +0,0 @@
-# Restful API
-
-### 启动服务
-
-运行脚本
-
-```shell
-lmdeploy serve api_server ./workspace 0.0.0.0 --server_port ${server_port} --instance_num 32 --tp 1
-```
-
-然后用户可以打开 swagger UI: `http://{server_ip}:{server_port}` 详细查看所有的 API 及其使用方法。
-我们一共提供四个 restful api，其中三个仿照 OpenAI 的形式。不过，我们建议用户用我们提供的另一个 API: `generate`。
-它有更好的性能，提供更多的参数让用户自定义修改。
-
-### python
-
-这是一个 python 示例，展示如何使用 `generate`。
-
-```python
-import json
-import requests
-from typing import Iterable, List
-
-
-def get_streaming_response(prompt: str,
-                           api_url: str,
-                           session_id: int,
-                           request_output_len: int,
-                           stream: bool = True,
-                           sequence_start: bool = True,
-                           sequence_end: bool = True,
-                           ignore_eos: bool = False) -> Iterable[List[str]]:
-    headers = {'User-Agent': 'Test Client'}
-    pload = {
-        'prompt': prompt,
-        'stream': stream,
-        'session_id': session_id,
-        'request_output_len': request_output_len,
-        'sequence_start': sequence_start,
-        'sequence_end': sequence_end,
-        'ignore_eos': ignore_eos
-    }
-    response = requests.post(
-        api_url, headers=headers, json=pload, stream=stream)
-    for chunk in response.iter_lines(
-            chunk_size=8192, decode_unicode=False, delimiter=b'\n'):
-        if chunk:
-            data = json.loads(chunk.decode('utf-8'))
-            output = data['text']
-            tokens = data['tokens']
-            yield output, tokens
-
-
-for output, tokens in get_streaming_response(
-        "Hi, how are you?", "http://{server_ip}:{server_port}/generate", 0,
-        512):
-    print(output, end='')
-```
-
-### Java/Golang/Rust
-
-可以使用代码生成工具 [openapi-generator-cli](https://github.com/OpenAPITools/openapi-generator-cli) 将 `http://{server_ip}:{server_port}/openapi.json` 转成 java/rust/golang 客户端。
-下面是一个使用示例：
-
-```shell
-$ docker run -it --rm -v ${PWD}:/local openapitools/openapi-generator-cli generate -i /local/openapi.json -g rust -o /local/rust
-
-$ ls rust/*
-rust/Cargo.toml  rust/git_push.sh  rust/README.md
-
-rust/docs:
-ChatCompletionRequest.md  EmbeddingsRequest.md  HttpValidationError.md  LocationInner.md  Prompt.md
-DefaultApi.md             GenerateRequest.md    Input.md                Messages.md       ValidationError.md
-
-rust/src:
-apis  lib.rs  models
-```
-
-### cURL
-
-cURL 也可以用于查看 API 的输出结果
-
-查看模型列表：
-
-```bash
-curl http://{server_ip}:{server_port}/v1/models
-```
-
-使用 generate:
-
-```bash
-curl http://{server_ip}:{server_port}/generate \
-  -H "Content-Type: application/json" \
-  -d '{
-    "prompt": "Hello! How are you?",
-    "session_id": 1,
-    "sequence_start": true,
-    "sequence_end": true
-  }'
-```
-
-Chat Completions:
-
-```bash
-curl http://{server_ip}:{server_port}/v1/chat/completions \
-  -H "Content-Type: application/json" \
-  -d '{
-    "model": "internlm-chat-7b",
-    "messages": [{"role": "user", "content": "Hello! Ho are you?"}]
-  }'
-```
-
-Embeddings:
-
-```bash
-curl http://{server_ip}:{server_port}/v1/embeddings \
-  -H "Content-Type: application/json" \
-  -d '{
-    "model": "internlm-chat-7b",
-    "input": "Hello world!"
-  }'
-```
-
-### CLI client
-
-restful api 服务可以通过客户端测试，例如
-
-```shell
-# restful_api_url 就是 api_server 产生的，比如 http://localhost:23333
-lmdeploy serve api_client restful_api_url
-```
-
-### webui
-
-也可以直接用 webui 测试使用 restful-api。
-
-```shell
-# restful_api_url 就是 api_server 产生的，比如 http://localhost:23333
-# server_ip 和 server_port 是用来提供 gradio ui 访问服务的
-# 例子: lmdeploy serve gradio http://localhost:23333 --server_name localhost --server_port 6006 --restful_api True
-lmdeploy serve gradio restful_api_url --server_name ${server_ip} --server_port ${server_port} --restful_api True
-```
-
-### FAQ
-
-1. 当返回结果结束原因为 `"finish_reason":"length"`，这表示回话长度超过最大值。
-   请添加 `"renew_session": true` 到下一次请求中。
-
-2. 当服务端显存 OOM 时，可以适当减小启动服务时的 `instance_num` 个数
-
-3. 当同一个 `session_id` 的请求给 `generate` 函数后，出现返回空字符串和负值的 `tokens`，应该是第二次问话没有设置 `sequence_start=false`
-
-4. 如果感觉请求不是并发地被处理，而是一个一个地处理，请设置好以下参数：
-
-   - 不同的 session_id 传入 `generate` api。否则，我们将自动绑定会话 id 为请求端的 ip 地址编号。
-
-5. `generate` api 和 `v1/chat/completions` 均支持多轮对话。`messages` 或者 `prompt` 参数既可以是一个简单字符串表示用户的单词提问，也可以是一段对话历史。
-   两个 api 都是默认开启多伦对话的，如果你想关闭这个功能，然后在客户端管理会话记录，请设置 `sequence_end: true` 传入 `generate`，或者设置
-   `renew_session: true` 传入 `v1/chat/completions`。
diff --git a/docs/zh_cn/serving.md b/docs/zh_cn/serving.md
deleted file mode 100644
index db4ebb8d3ca7c9d67d64416bfd2328552271c25d..0000000000000000000000000000000000000000
--- a/docs/zh_cn/serving.md
+++ /dev/null
@@ -1,117 +0,0 @@
-# 模型服务
-
-## 部署 [LLaMA-2](https://github.com/facebookresearch/llama) 服务
-
-请从[这里](https://huggingface.co/meta-llama) 下载 llama2 模型，参考如下命令部署服务：
-
-<details open>
-<summary><b>7B</b></summary>
-
-```shell
-lmdeploy convert llama2 /path/to/llama-2-7b-chat-hf
-bash workspace/service_docker_up.sh
-```
-
-</details>
-
-<details open>
-<summary><b>13B</b></summary>
-
-```shell
-lmdeploy convert llama2 /path/to/llama-2-13b-chat-hf --tp 2
-bash workspace/service_docker_up.sh
-```
-
-</details>
-
-<details open>
-<summary><b>70B</b></summary>
-
-```shell
-lmdeploy convert llama2 /path/to/llama-2-70b-chat-hf --tp 8
-bash workspace/service_docker_up.sh
-```
-
-</details>
-
-## 部署 [LLaMA](https://github.com/facebookresearch/llama) 服务
-
-请填写[这张表](https://docs.google.com/forms/d/e/1FAIpQLSfqNECQnMkycAp2jP4Z9TFX0cGR4uf7b_fBxjY_OjhJILlKGA/viewform)，获取 LLaMA 模型权重
-
-<details open>
-<summary><b>7B</b></summary>
-
-```shell
-lmdeploy convert llama /path/to/llama-7b llama \
-    --tokenizer_path /path/to/tokenizer/model
-bash workspace/service_docker_up.sh
-```
-
-</details>
-
-<details open>
-<summary><b>13B</b></summary>
-
-```shell
-lmdeploy convert llama /path/to/llama-13b llama \
-    --tokenizer_path /path/to/tokenizer/model --tp 2
-bash workspace/service_docker_up.sh
-```
-
-</details>
-
-<details open>
-<summary><b>30B</b></summary>
-
-```shell
-lmdeploy convert llama /path/to/llama-30b llama \
-    --tokenizer_path /path/to/tokenizer/model --tp 4
-bash workspace/service_docker_up.sh
-```
-
-</details>
-
-<details open>
-<summary><b>65B</b></summary>
-
-```shell
-lmdeploy convert llama /path/to/llama-65b llama \
-    --tokenizer_path /path/to/tokenizer/model --tp 8
-bash workspace/service_docker_up.sh
-```
-
-</details>
-
-### 部署 [Vicuna](https://lmsys.org/blog/2023-03-30-vicuna/) 服务
-
-<details open>
-<summary><b>7B</b></summary>
-
-```shell
-python3 -m pip install fschat
-python3 -m fastchat.model.apply_delta \
-  --base-model-path /path/to/llama-7b \
-  --target-model-path /path/to/vicuna-7b \
-  --delta-path lmsys/vicuna-7b-delta-v1.1
-
-lmdeploy convert vicuna /path/to/vicuna-7b
-bash workspace/service_docker_up.sh
-```
-
-</details>
-
-<details open>
-<summary><b>13B</b></summary>
-
-```shell
-python3 -m pip install fschat
-python3 -m fastchat.model.apply_delta \
-  --base-model-path /path/to/llama-13b \
-  --target-model-path /path/to/vicuna-13b \
-  --delta-path lmsys/vicuna-13b-delta-v1.1
-
-lmdeploy convert vicuna /path/to/vicuna-13b
-bash workspace/service_docker_up.sh
-```
-
-</details>
diff --git a/docs/zh_cn/supported_models/codellama.md b/docs/zh_cn/supported_models/codellama.md
deleted file mode 100644
index a2abd2f4a971c893934decc388b4733b12924859..0000000000000000000000000000000000000000
--- a/docs/zh_cn/supported_models/codellama.md
+++ /dev/null
@@ -1,114 +0,0 @@
-# Code Llama
-
-## 模型介绍
-
-[codellama](https://github.com/facebookresearch/codellama) 支持很多种编程语言，包括 Python, C++, Java, PHP, Typescript (Javascript), C#, Bash 等等。具备代码续写、代码填空、对话、python专项等 4 种能力。
-
-它在 [HuggingFace](https://huggingface.co/codellama) 上发布了基座模型，Python模型和指令微调模型：
-
-| 基座模型                                                                        | Python微调模型                                                                                | 指令模型                                                                                          |
-| ------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------- |
-| [codellama/CodeLlama-7b-hf](https://huggingface.co/codellama/CodeLlama-7b-hf)   | [codellama/CodeLlama-7b-Python-hf](https://huggingface.co/codellama/CodeLlama-7b-Python-hf)   | [codellama/CodeLlama-7b-Instruct-hf](https://huggingface.co/codellama/CodeLlama-7b-Instruct-hf)   |
-| [codellama/CodeLlama-13b-hf](https://huggingface.co/codellama/CodeLlama-13b-hf) | [codellama/CodeLlama-13b-Python-hf](https://huggingface.co/codellama/CodeLlama-13b-Python-hf) | [codellama/CodeLlama-13b-Instruct-hf](https://huggingface.co/codellama/CodeLlama-13b-Instruct-hf) |
-| [codellama/CodeLlama-34b-hf](https://huggingface.co/codellama/CodeLlama-34b-hf) | [codellama/CodeLlama-34b-Python-hf](https://huggingface.co/codellama/CodeLlama-34b-Python-hf) | [codellama/CodeLlama-34b-Instruct-hf](https://huggingface.co/codellama/CodeLlama-34b-Instruct-hf) |
-
-模型和能力的对应关系为：
-
-| 模型           | 代码续写 | 代码填空          | 对话 | Python专项 |
-| -------------- | -------- | ----------------- | ---- | ---------- |
-| 基座模型       | Y        | Y(7B,13B), N(34B) | N    | N          |
-| Python微调模型 | Y        | N                 | N    | Y          |
-| 指令微调模型   | Y        | Y(7B,13B), N(34B) | Y    | N          |
-
-## 推理
-
-根据上述的模型和能力关系表，下载感兴趣的模型。执行如下的命令，把模型权重转成 turbomind 要求的格式：
-
-```shell
-# 安装 lmdeploy
-python3 -m pip install lmdeploy
-
-# 转模型格式
-lmdeploy convert codellama /path/of/codellama/model
-```
-
-接下来，可参考如下章节，在控制台与 codellama 进行交互式对话。
-
-**注意**:
-
-- **transformers最低要求 v4.33.0**
-- `lmdeploy.turbomind.chat` 支持把代码块拷贝到控制台，**结束输出的方式为回车，再输入"!!"，再回车**。其他非 codellama 模型，仍然是两次回车结束输入。
-
-### 代码续写
-
-```shell
-lmdeploy chat turbomind ./workspace --cap completion
-```
-
-### 代码填空
-
-```shell
-lmdeploy chat turbomind ./workspace --cap infilling
-```
-
-输入的代码块中要包含 `<FILL>`，比如：
-
-```
-def remove_non_ascii(s: str) -> str:
-    """ <FILL>
-    return result
-```
-
-`turbomind.chat` 输出的代码即是要填到 `<FILL>` 中的内容
-
-### 对话
-
-```
-lmdeploy chat turbomind ./workspace --cap chat --sys-instruct "Provide answers in Python"
-```
-
-可以把 `--sys-instruct` 的指令换成 codellama 支持的其他变成语言。
-
-### Python 专项
-
-```
-lmdeploy chat turbomind ./workspace --cap python
-```
-
-建议这里部署 Python 微调模型
-
-## 量化
-
-TBD
-
-## 服务
-
-**目前，server 支持的是对话功能**，其余功能后续再加上。
-
-启动 sever 的方式是：
-
-```shell
-# --instance_num: turbomind推理实例的个数。可理解为支持的最大并发数
-# --tp: 在 tensor parallel时，使用的GPU数量
-lmdeploy serve api_server ./workspace --server_name 0.0.0.0 --server_port ${server_port} --instance_num 32 --tp 1
-```
-
-打开 `http://{server_ip}:{server_port}`，即可访问 swagger，查阅 RESTful API 的详细信息。
-
-你可以用命令行，在控制台与 server 通信：
-
-```shell
-# restful_api_url 就是 api_server 产生的，比如 http://localhost:23333
-lmdeploy serve api_client restful_api_url
-```
-
-或者，启动 gradio，在 webui 的聊天对话框中，与 codellama 交流：
-
-```shell
-# restful_api_url 就是 api_server 产生的，比如 http://localhost:23333
-# server_ip 和 server_port 是用来提供 gradio ui 访问服务的
-# 例子: lmdeploy serve gradio http://localhost:23333 --server_name localhost --server_port 6006 --restful_api True
-lmdeploy serve gradio restful_api_url --server_name ${server_ip} --server_port ${server_port} --restful_api True
-```
-
-关于 RESTful API的详细介绍，请参考[这份](../restful_api.md)文档。
diff --git a/docs/zh_cn/switch_language.md b/docs/zh_cn/switch_language.md
deleted file mode 100644
index 544f09d0779313148ae772449b532748650596ca..0000000000000000000000000000000000000000
--- a/docs/zh_cn/switch_language.md
+++ /dev/null
@@ -1,3 +0,0 @@
-## <a href='https://lmdeploy.readthedocs.io/en/latest/'>English</a>
-
-## <a href='https://lmdeploy.readthedocs.io/zh_CN/latest/'>简体中文</a>
diff --git a/docs/zh_cn/turbomind.md b/docs/zh_cn/turbomind.md
deleted file mode 100644
index e51a0199b807c32d2b88f0fabc2942984927a8a7..0000000000000000000000000000000000000000
--- a/docs/zh_cn/turbomind.md
+++ /dev/null
@@ -1,70 +0,0 @@
-# TurboMind
-
-TurboMind 是一款关于 LLM 推理的高效推理引擎，基于英伟达的 [FasterTransformer](https://github.com/NVIDIA/FasterTransformer) 研发而成。它的主要功能包括：LLaMa 结构模型的支持，persistent batch 推理模式和可扩展的 KV 缓存管理器。
-
-## TurboMind 结构
-
-```
-  +--------------------+
-  |        API         |
-  +--------------------+
-          |    ^
-    请 求  |    | 流式回调
-          v    |
-  +--------------------+    获取   +-------------------+
-  |  Persistent Batch  | <-------> |  KV Cache 管理器 |
-  +--------------------+    更新   +-------------------+
-             ^
-             |
-             v
-+------------------------+
-|      LLaMa推理实现      |
-+------------------------+
-| FT kernels & utilities |
-+------------------------+
-```
-
-## Persistent Batch
-
-你也许在别的项目中看到这项机制的另一个名字： `continuous batching` 。在开发这个功能时，我们将对话式 LLM 的推理建模为一个持续运行的 batch ，其生命周期跨越整个服务过程，故将其命名为 `persistent batch` 。简单来说是这样实现的：
-
-- 该功能会预先准备好 N 个 batch slots。
-- 当有空闲 slots 时， 请求就会加入到 batch 中。当请求对应的 tokens 都生成完毕后，对应的 batch slot 会立刻被释放，接收新的请求。
-- **当一个 sequence 命中缓存时（见下文），它的历史 token 不必在每轮中都进行解码，所以它的 token 生成过程会即刻开始**。
-- 整个 batch 会自动扩缩容来避免不必要的计算。
-
-## KV 缓存管理器
-
-TurboMind 的 [KV 缓存管理器](https://github.com/InternLM/lmdeploy/blob/main/src/turbomind/models/llama/LlamaCacheManager.h) 是一个内存池类型的对象，并且在其中加入了 LRU 的实现，这样整个管理器可以被看作是一个 **KV 缓存的缓存**。大致工作方式如下：
-
-- KV 缓存由管理器分配。管理器会根据预先配置好的 slot 数量开辟空间。每个 slot 对应于一个 sequence 所需的 KV 缓存。分配的内存块大小可通过配置来实现预分配或者按需分配（或介于两者之间）。
-- 当有新的请求，但是缓存池中没有空闲 slot时，根据 LRU 机制，管理器会踢除最近使用最少的 sequence，把它占据的 slot 分给新的请求。不仅仅如此，
-- sequence获取到了slot，类似缓存命中。它在缓存中的历史KV会被直接返回，而不用在进行context decoding 。
-- 被踢除的 sequences 不会被完全的删除，而是会被转换成最简洁的形式，例如 token IDs 。当之后获取到相同的 sequence id 时 (即 _cache-miss_ 状态)，这些 token IDs 将被 FMHA 的 context decoder 解码并被转回 KV 缓存。
-- 踢除和转换均由 TurboMind 内部自动管理所以对用户来说是透明的。__从用户的使用角度来看，使用了 TurboMind 的系统就像是可以访问无限的设备内存__。
-
-## TurboMind 的 LLaMa 实现
-
-我们对 LLaMa 系列模型的实现是从 FasterTransformer 中的 Gpt-NeX 模型修改而来的。除了对 LLaMa 系列进行基本重构和修改外，我们还做了一些改进以实现会话模型的高性能推理，其中最重要的是：
-
-- 支持多轮对话中的快速文本解码。我们用基于 [cutlass](https://github.com/NVIDIA/cutlass) 的 FMHA 实现替代了 context decoder 中的注意力机制实现，从而支持了 Q/K 长度不匹配的情况。
-- 我们在 context FMHA 和 generation FMHA 中都加入了间接缓冲指针，支持 batch 中不连续的 KV 缓存。
-- 为了支持 persistent batch 的并发推理，我们设计了新的同步机制来协调在张量并型模式下的工作线程。
-- 我们实现了 INT8 KV cache，降低了内存开销，提高了批处理大小和系统吞吐量。这在实际场景中非常有用，因为相比权重和其他激活，KV cache 会消耗更多的内存和内存带宽。
-- 我们解决了单个进程内多个模型实例在 TP 模式下运行时 NCCL 卡住的问题。NCCL APIs 现由 host 端的同步 barriers 保护。
-
-## API
-
-TurboMind 的 Python API 支持流式结果返回和张量并行模式。
-
-同时 TurboMind 也继承了 FasterTransformer 能够注册为 [Triton Inference Server](https://github.com/triton-inference-server/server) 推理后端的能力。但是为了支持 persistent batch 中的并发请求，我们不再像 FasterTransformer 那样使用 sequence batching 或者 dynamic batching 。相反，TurboMind 负责记录和管理请求序列的状态。
-
-## TurboMind 和 FasterTransformer 的区别
-
-除了上文中提到的功能外，TurboMind 相较于 FasterTransformer 还有不少差别。譬如不少 FasterTransformer 的功能在 TurboMind 中都被去掉了，这其中包括前缀提示词、 beam search 、上下文 embedding、稀疏化 GEMM 操作和对应 GPT 或 T5 等结构的模型的支持等等。
-
-## FAQ
-
-### 对 Huggingface 模型的支持
-
-因为历史因素， TurboMind 的权重设计是基于 [LLaMa 的官方实现](https://github.com/facebookresearch/llama) 完成的，两者只相差一个转置操作。但是 Huggingface 版本的实现却是[另一种形式](https://github.com/huggingface/transformers/blob/45025d92f815675e483f32812caa28cce3a960e7/src/transformers/models/llama/convert_llama_weights_to_hf.py#L123C76-L123C76)，两种权重实现方式在 `W_q` 和 `W_k` 上的区别我们在 [deploy.py](https://github.com/InternLM/lmdeploy/blob/ff4648a1d09e5aec74cf70efef35bfaeeac552e0/lmdeploy/serve/turbomind/deploy.py#L398) 进行了适配处理，用户可前往查看。
diff --git a/docs/zh_cn/w4a16.md b/docs/zh_cn/w4a16.md
deleted file mode 100644
index 46f5c58a9145e3e7e8f2a787b0ba5e6d46209a67..0000000000000000000000000000000000000000
--- a/docs/zh_cn/w4a16.md
+++ /dev/null
@@ -1,101 +0,0 @@
-# W4A16 LLM 模型部署
-
-LMDeploy 支持 4bit 权重模型的推理，**对 NVIDIA 显卡的最低要求是 sm80**，比如A10，A100，Gerforce 30/40系列。
-
-在推理之前，请确保安装了 lmdeploy
-
-```shell
-pip install lmdeploy
-```
-
-## 4bit 权重模型推理
-
-你可以直接从 LMDeploy 的 [model zoo](https://huggingface.co/lmdeploy) 下载已经量化好的 4bit 权重模型，直接使用下面的命令推理。也可以根据["4bit 权重量化"](#4bit-权重量化)章节的内容，把 16bit 权重量化为 4bit 权重，然后再按下述说明推理
-
-以 4bit 的 Llama-2-chat-7B 模型为例，可以从 model zoo 直接下载：
-
-```shell
-git-lfs install
-git clone https://huggingface.co/lmdeploy/llama2-chat-7b-w4
-```
-
-执行以下命令，即可在终端与模型对话：
-
-```shell
-
-## 转换模型的layout，存放在默认路径 ./workspace 下
-lmdeploy convert \
-    --model-name llama2 \
-    --model-path ./llama2-chat-7b-w4 \
-    --model-format awq \
-    --group-size 128
-
-## 推理
-lmdeploy chat turbomind ./workspace
-```
-
-## 启动 gradio 服务
-
-如果想通过 webui 与模型对话，请执行以下命令启动 gradio 服务
-
-```shell
-lmdeploy serve gradio ./workspace --server_name {ip_addr} --server_port {port}
-```
-
-然后，在浏览器中打开 http://{ip_addr}:{port}，即可在线对话
-
-## 推理速度
-
-我们在 NVIDIA GeForce RTX 4090 上使用 [profile_generation.py](https://github.com/InternLM/lmdeploy/blob/main/benchmark/profile_generation.py)，分别测试了 4-bit Llama-2-7B-chat 和 Llama-2-13B-chat 模型的 token 生成速度。测试配置为 batch size = 1，(prompt_tokens, completion_tokens) = (1, 512)
-
-| model            | llm-awq | mlc-llm | turbomind |
-| ---------------- | ------- | ------- | --------- |
-| Llama-2-7B-chat  | 112.9   | 159.4   | 206.4     |
-| Llama-2-13B-chat | N/A     | 90.7    | 115.8     |
-
-上述两个模型的16bit 和 4bit 权重，分别使用 turbomind 推理时，各自在context size 为 2048 和 4096 配置下，所占的显存对比如下：
-
-| model            | 16bit(2048) | 4bit(2048) | 16bit(4096) | 4bit(4096) |
-| ---------------- | ----------- | ---------- | ----------- | ---------- |
-| Llama-2-7B-chat  | 15.1        | 6.3        | 16.2        | 7.5        |
-| Llama-2-13B-chat | OOM         | 10.3       | OOM         | 12.0       |
-
-```
-pip install nvidia-ml-py
-```
-
-```shell
-python benchmark/profile_generation.py \
- --model-path ./workspace \
- --concurrency 1 8 --prompt-tokens 1 512 --completion-tokens 2048 512
-```
-
-## 4bit 权重量化
-
-4bit 权重量化包括 2 步：
-
-- 生成量化参数
-- 根据量化参数，量化模型权重
-
-### 第一步：生成量化参数
-
-```shell
-lmdeploy lite calibrate \
-  --model $HF_MODEL \
-  --calib_dataset 'c4' \             # 校准数据集，支持 c4, ptb, wikitext2, pileval
-  --calib_samples 128 \              # 校准集的样本数，如果显存不够，可以适当调小
-  --calib_seqlen 2048 \              # 单条的文本长度，如果显存不够，可以适当调小
-  --work_dir $WORK_DIR \             # 保存 Pytorch 格式量化统计参数和量化后权重的文件夹
-```
-
-### 第二步：量化权重模型
-
-LMDeploy 使用 AWQ 算法对模型权重进行量化。在执行下面的命令时，需要把步骤1的`$WORK_DIR`传入。量化结束后，权重文件也会存放在这个目录中。然后就可以根据 ["4bit权重模型推理"](#4bit-权重模型推理)章节的说明，进行模型推理。
-
-```shell
-lmdeploy lite auto_awq \
-  --model $HF_MODEL \
-  --w_bits 4 \                       # 权重量化的 bit 数
-  --w_group_size 128 \               # 权重量化分组统计尺寸
-  --work_dir $WORK_DIR \             # 步骤 1 保存量化参数的目录
-```
diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
deleted file mode 100644
index 2c8034fa852453f07145bfc756990f7582717af2..0000000000000000000000000000000000000000
--- a/examples/CMakeLists.txt
+++ /dev/null
@@ -1,15 +0,0 @@
-# Copyright (c) 2019-2023, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-add_subdirectory(cpp)
diff --git a/examples/__init__.py b/examples/__init__.py
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/examples/cpp/CMakeLists.txt b/examples/cpp/CMakeLists.txt
deleted file mode 100644
index 37d883f86f05a04cc4cd974f23caa0ac7a67d629..0000000000000000000000000000000000000000
--- a/examples/cpp/CMakeLists.txt
+++ /dev/null
@@ -1,15 +0,0 @@
-# Copyright (c) 2019-2023, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-add_subdirectory(llama)
diff --git a/examples/cpp/llama/CMakeLists.txt b/examples/cpp/llama/CMakeLists.txt
deleted file mode 100644
index c5868fcc3e9788dca773c8de3c19713d8aa52b62..0000000000000000000000000000000000000000
--- a/examples/cpp/llama/CMakeLists.txt
+++ /dev/null
@@ -1,9 +0,0 @@
-# Copyright (c) OpenMMLab. All rights reserved.
-
-add_executable(llama_triton_example llama_triton_example.cc)
-#target_link_libraries(llama_triton_example PUBLIC -lcublas -lcublasLt -lcudart
-target_link_libraries(llama_triton_example PUBLIC -lcublas -lcudart
-        LlamaTritonBackend TransformerTritonBackend mpi_utils nccl_utils
-        nvtx_utils word_list -lpthread)
-
-install(TARGETS llama_triton_example DESTINATION ${CMAKE_INSTALL_PREFIX}/bin)
diff --git a/examples/cpp/llama/llama_config.ini b/examples/cpp/llama/llama_config.ini
deleted file mode 100644
index 096781e37eae0af1f546653a89949d21f71dd363..0000000000000000000000000000000000000000
--- a/examples/cpp/llama/llama_config.ini
+++ /dev/null
@@ -1,81 +0,0 @@
-[ft_instance_hyperparameter]
-data_type=fp16
-enable_custom_all_reduce=0
-pipeline_para_size=1
-tensor_para_size=1
-model_dir=/workspace/models/triton_models/weights/
-
-
-[request]
-request_batch_size=8
-max_input_len=1
-request_output_len=2048
-beam_width=1 ; beam width for beam search
-top_k=1 ; k value for top k sampling
-top_p=0.0 ; p value for top p sampling
-temperature=1.0 ; Use for sampling
-repetition_penalty=1.00 ; Use for sampling
-presence_penalty=0.0 ; Only one of repetition_penalty and presence_penalty are allowed.
-len_penalty=0.0
-beam_search_diversity_rate=0.0
-; PJLM start/end ids
-start_id=0
-end_id=1
-
-
-; --------------------- legacy params -------------------------
-
-; LLaMA start/end ids
-; start_id=1
-; end_id=2
-
-[4999_llama]
-head_num=80
-size_per_head=128
-vocab_size=65632
-num_layer=82
-rotary_embedding=128
-norm_eps=1e-5
-start_id=0
-end_id=1
-inter_size=27392
-
-[llama_7B]
-head_num=32
-size_per_head=128
-vocab_size=32000
-num_layer=32
-rotary_embedding=128
-start_id=1
-end_id=2
-inter_size=11008
-
-[llama_13B]
-head_num=40
-size_per_head=128
-vocab_size=32000
-num_layer=40
-rotary_embedding=128
-start_id=1
-end_id=2
-inter_size=13824
-
-[llama_30B]
-head_num=52
-size_per_head=128
-vocab_size=32000
-num_layer=60
-rotary_embedding=128
-start_id=1
-end_id=2
-inter_size=17920
-
-[llama_65B]
-head_num=64
-size_per_head=128
-vocab_size=32000
-num_layer=80
-rotary_embedding=128
-start_id=1
-end_id=2
-inter_size=22016
diff --git a/examples/cpp/llama/llama_triton_example.cc b/examples/cpp/llama/llama_triton_example.cc
deleted file mode 100644
index 2f50ae19a0646be246ee9582e680482ff63ec6b9..0000000000000000000000000000000000000000
--- a/examples/cpp/llama/llama_triton_example.cc
+++ /dev/null
@@ -1,580 +0,0 @@
-/*
- * Copyright (c) OpenMMLab. All rights reserved.
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-// Modified from
-// https://github.com/NVIDIA/FasterTransformer/blob/main/examples/cpp/multi_gpu_gpt/multi_gpu_gpt_triton_example.cc
-
-#include "3rdparty/INIReader.h"
-#include <chrono>
-#include <memory>
-#include <thread>
-
-#include "src/turbomind/macro.h"
-#include "src/turbomind/triton_backend/llama/LlamaTritonModel.h"
-#include "src/turbomind/triton_backend/llama/LlamaTritonModelInstance.h"
-#include "src/turbomind/triton_backend/transformer_triton_backend.hpp"
-#include "src/turbomind/utils/custom_ar_comm.h"
-#include "src/turbomind/utils/mpi_utils.h"
-#include "src/turbomind/utils/nccl_utils.h"
-#include "src/turbomind/utils/nvtx_utils.h"
-#include "src/turbomind/utils/word_list.h"
-
-namespace ft = turbomind;
-
-constexpr const bool kUSE_MPI = true;
-
-struct RequestParam {
-    int                    beam_width;
-    int                    request_output_len;
-    float                  beam_search_diversity_rate;
-    uint                   runtime_top_k;
-    float                  runtime_top_p;
-    float                  temperature;
-    float                  len_penalty;
-    float                  repetition_penalty;
-    float                  presence_penalty;
-    int                    min_length;
-    unsigned long long int random_seed;
-    int                    start_id;
-    int                    end_id;
-};
-
-std::vector<std::shared_ptr<std::unordered_map<std::string, triton::Tensor>>>
-broadCastRequest(const std::vector<int>& v_start_ids,
-                 const std::vector<int>& v_start_lengths,
-                 const std::vector<int>& v_bad_words,
-                 const int               node_id,
-                 const int               gpu_count,
-                 const RequestParam      param,
-                 std::vector<void*>*     pointer_record)
-{
-    // broadcast the request to all nodes, and copy "gpu_count" copies on
-    // different gpu
-    int size_1         = v_start_ids.size();
-    int size_2         = v_start_lengths.size();
-    int size_bad_words = v_bad_words.size();
-    if (kUSE_MPI) {
-        ft::mpi::bcast(&size_1, 1, ft::mpi::MPI_TYPE_INT, 0, ft::mpi::COMM_WORLD);
-        ft::mpi::bcast(&size_2, 1, ft::mpi::MPI_TYPE_INT, 0, ft::mpi::COMM_WORLD);
-        ft::mpi::bcast(&size_bad_words, 1, ft::mpi::MPI_TYPE_INT, 0, ft::mpi::COMM_WORLD);
-    }
-
-    std::vector<int> v_input_ids(size_1);
-    std::vector<int> v_input_lengths(size_2);
-    std::vector<int> v_input_bad_words(size_bad_words);
-
-    if (node_id == 0) {
-        memcpy(v_input_ids.data(), v_start_ids.data(), size_1 * sizeof(int));
-        memcpy(v_input_lengths.data(), v_start_lengths.data(), size_2 * sizeof(int));
-        memcpy(v_input_bad_words.data(), v_bad_words.data(), size_bad_words * sizeof(int));
-    }
-    if (kUSE_MPI) {
-        ft::mpi::barrier();
-    }
-
-    int request_batch_size = size_2;
-    int max_input_len      = size_1 / size_2;
-
-    std::cerr << "request_batch_size=" << request_batch_size << " max_input_len=" << max_input_len << "\n";
-
-    if (kUSE_MPI) {
-        ft::mpi::bcast(v_input_ids.data(), size_1, ft::mpi::MPI_TYPE_INT, 0, ft::mpi::COMM_WORLD);
-        ft::mpi::bcast(v_input_lengths.data(), size_2, ft::mpi::MPI_TYPE_INT, 0, ft::mpi::COMM_WORLD);
-        ft::mpi::bcast(v_input_bad_words.data(), size_bad_words, ft::mpi::MPI_TYPE_INT, 0, ft::mpi::COMM_WORLD);
-    }
-
-    std::vector<std::shared_ptr<std::unordered_map<std::string, triton::Tensor>>> request_list;
-    for (int device_id = 0; device_id < gpu_count; device_id++) {
-        ft::check_cuda_error(cudaSetDevice(device_id));
-
-        int* d_input_ids;
-        // int* d_input_lengths;
-        int* d_input_bad_words;
-
-        if (max_input_len == 0) {
-            // unconditional case, no input ids, so do nothing.
-            d_input_ids = nullptr;
-            // d_input_lengths = nullptr;
-            max_input_len = 0;
-        }
-        else {
-            // conditional case.
-            ft::deviceMalloc(&d_input_ids, size_1, false);
-            // ft::deviceMalloc(&d_input_lengths, size_2, false);
-            ft::cudaH2Dcpy(d_input_ids, v_input_ids.data(), size_1);
-            // ft::cudaH2Dcpy(d_input_lengths, v_input_lengths.data(), size_2);
-        }
-
-        if (!v_input_bad_words.empty()) {
-            ft::deviceMalloc(&d_input_bad_words, size_bad_words, false);
-            ft::cudaH2Dcpy(d_input_bad_words, v_input_bad_words.data(), size_bad_words);
-        }
-        else {
-            d_input_bad_words = nullptr;
-        }
-
-        uint32_t* request_output_len_ptr = (uint32_t*)malloc(request_batch_size * sizeof(uint32_t));
-        int*      input_lengths_ptr      = (int*)malloc(request_batch_size * sizeof(int));
-        for (int i = 0; i < request_batch_size; i++) {
-            request_output_len_ptr[i] = param.request_output_len;
-            input_lengths_ptr[i]      = v_input_lengths[i];
-        }
-
-        int* start_ids_ptr = (int*)malloc(request_batch_size * sizeof(int));
-        int* end_ids_ptr   = (int*)malloc(request_batch_size * sizeof(int));
-        for (int i = 0; i < request_batch_size; i++) {
-            start_ids_ptr[i] = param.start_id;
-            end_ids_ptr[i]   = param.end_id;
-        }
-        pointer_record->push_back(start_ids_ptr);
-        pointer_record->push_back(end_ids_ptr);
-
-        request_list.push_back(std::shared_ptr<std::unordered_map<std::string, triton::Tensor>>(
-            new std::unordered_map<std::string, triton::Tensor>{
-                {"input_ids",
-                 triton::Tensor{triton::MEMORY_GPU,
-                                triton::TYPE_INT32,
-                                std::vector<size_t>{(size_t)request_batch_size, (size_t)max_input_len},
-                                d_input_ids}},
-                {"input_lengths",
-                 triton::Tensor{triton::MEMORY_CPU,
-                                triton::TYPE_INT32,
-                                std::vector<size_t>{(size_t)request_batch_size},
-                                input_lengths_ptr}},
-                {"request_output_len",
-                 triton::Tensor{triton::MEMORY_CPU,
-                                triton::TYPE_INT32,
-                                std::vector<size_t>{(size_t)request_batch_size},
-                                request_output_len_ptr}},
-                {"bad_words_list",
-                 triton::Tensor{
-                     triton::MEMORY_GPU, triton::TYPE_INT32, {2, v_input_bad_words.size() / 2}, d_input_bad_words}},
-                {"start_id",
-                 triton::Tensor{triton::MEMORY_CPU, triton::TYPE_INT32, {(size_t)request_batch_size}, start_ids_ptr}},
-                {"end_id",
-                 triton::Tensor{triton::MEMORY_CPU, triton::TYPE_INT32, {(size_t)request_batch_size}, end_ids_ptr}}}));
-
-        int* beam_width_ptr = new int(param.beam_width);
-        pointer_record->push_back(beam_width_ptr);
-        request_list[device_id]->insert(
-            {"beam_width",
-             triton::Tensor{triton::MEMORY_CPU, triton::TYPE_INT32, std::vector<size_t>{1}, beam_width_ptr}});
-        if (param.beam_width > 1) {
-            float* beam_search_diversity_rate_ptr = new float(param.beam_search_diversity_rate);
-            pointer_record->push_back(beam_search_diversity_rate_ptr);
-            request_list[device_id]->insert(
-                {"beam_search_diversity_rate",
-                 triton::Tensor{
-                     triton::MEMORY_CPU, triton::TYPE_FP32, std::vector<size_t>{1}, beam_search_diversity_rate_ptr}});
-        }
-        else {
-            if (param.runtime_top_p != 0.0f) {
-                float* runtime_top_p_ptr = new float(param.runtime_top_p);
-                pointer_record->push_back(runtime_top_p_ptr);
-                request_list[device_id]->insert(
-                    {"runtime_top_p",
-                     triton::Tensor{triton::MEMORY_CPU, triton::TYPE_FP32, std::vector<size_t>{1}, runtime_top_p_ptr}});
-            }
-            if (param.runtime_top_k != 0) {
-                uint* runtime_top_k_ptr = new uint(param.runtime_top_k);
-                pointer_record->push_back(runtime_top_k_ptr);
-                request_list[device_id]->insert(
-                    {"runtime_top_k",
-                     triton::Tensor{
-                         triton::MEMORY_CPU, triton::TYPE_UINT32, std::vector<size_t>{1}, runtime_top_k_ptr}});
-            }
-        }
-        float* temperature_ptr = new float(param.temperature);
-        pointer_record->push_back(temperature_ptr);
-        request_list[device_id]->insert(
-            {"temperature",
-             triton::Tensor{triton::MEMORY_CPU, triton::TYPE_FP32, std::vector<size_t>{1}, temperature_ptr}});
-        float* len_penalty_ptr = new float(param.len_penalty);
-        pointer_record->push_back(len_penalty_ptr);
-        request_list[device_id]->insert(
-            {"len_penalty",
-             triton::Tensor{triton::MEMORY_CPU, triton::TYPE_FP32, std::vector<size_t>{1}, len_penalty_ptr}});
-        if (param.repetition_penalty != 1.0f) {
-            float* repetition_penalty_ptr = new float(param.repetition_penalty);
-            pointer_record->push_back(repetition_penalty_ptr);
-            request_list[device_id]->insert(
-                {"repetition_penalty",
-                 triton::Tensor{
-                     triton::MEMORY_CPU, triton::TYPE_FP32, std::vector<size_t>{1}, repetition_penalty_ptr}});
-        }
-        if (param.presence_penalty != 0.0f) {
-            float* presence_penalty_ptr = new float(param.presence_penalty);
-            pointer_record->push_back(presence_penalty_ptr);
-            request_list[device_id]->insert(
-                {"presence_penalty",
-                 triton::Tensor{triton::MEMORY_CPU, triton::TYPE_FP32, std::vector<size_t>{1}, presence_penalty_ptr}});
-        }
-        int* min_length_ptr = new int(param.min_length);
-        pointer_record->push_back(min_length_ptr);
-        request_list[device_id]->insert(
-            {"min_length",
-             triton::Tensor{triton::MEMORY_CPU, triton::TYPE_INT32, std::vector<size_t>{1}, min_length_ptr}});
-        unsigned long long int* random_seed_ptr = new unsigned long long int(param.random_seed);
-        pointer_record->push_back(random_seed_ptr);
-        request_list[device_id]->insert(
-            {"random_seed",
-             triton::Tensor{triton::MEMORY_CPU, triton::TYPE_UINT64, std::vector<size_t>{1}, random_seed_ptr}});
-
-        pointer_record->push_back(d_input_ids);
-        // pointer_record->push_back(d_input_lengths);
-        pointer_record->push_back(d_input_bad_words);
-        pointer_record->push_back(request_output_len_ptr);
-        pointer_record->push_back(input_lengths_ptr);
-    }
-
-    return request_list;
-}
-
-int read_start_ids(size_t            batch_size,
-                   std::vector<int>* v_start_lengths,
-                   std::vector<int>* v_start_ids,
-                   size_t            max_input_len,
-                   const int         end_id,
-                   const int         beam_width,
-                   std::string       file_name);
-
-std::vector<std::shared_ptr<std::unordered_map<std::string, triton::Tensor>>>
-prepareRequest(std::string ini_name, const int node_id, const int gpu_count, std::vector<void*>* pointer_record)
-{
-    INIReader reader = INIReader(ini_name);
-    if (reader.ParseError() < 0) {
-        std::cout << "[ERROR] Can't load '" << ini_name << "'\n";
-        ft::FT_CHECK(false);
-    }
-
-    const size_t request_batch_size = reader.GetInteger("request", "request_batch_size");
-    std::cerr << "request_batch_size=" << request_batch_size << "\n";
-
-    const int start_id      = reader.GetInteger("request", "start_id");
-    const int end_id        = reader.GetInteger("request", "end_id");
-    const int max_input_len = reader.GetInteger("request", "max_input_len");
-
-    std::vector<int> v_start_ids;
-    std::vector<int> v_start_lengths;
-
-    read_start_ids(request_batch_size,
-                   &v_start_lengths,
-                   &v_start_ids,
-                   max_input_len,
-                   end_id,
-                   1,
-                   "../examples/cpp/llama/start_ids.csv");
-    // drop requests > request_batch_size
-    if (v_start_lengths.size() > request_batch_size) {
-        v_start_lengths.resize(request_batch_size);
-        v_start_ids.resize(request_batch_size * max_input_len);
-    }
-    std::cerr << "max_input_len=" << max_input_len << "\n";
-
-    std::vector<int> v_bad_words;
-    // ft::read_word_list("../examples/cpp/llama/bad_words.csv", v_bad_words);
-
-    RequestParam param;
-    param.beam_width                 = reader.GetInteger("request", "beam_width");
-    param.request_output_len         = reader.GetInteger("request", "request_output_len");
-    param.beam_search_diversity_rate = reader.GetFloat("request", "beam_search_diversity_rate");
-    param.runtime_top_k              = reader.GetInteger("request", "top_k");
-    param.runtime_top_p              = reader.GetFloat("request", "top_p");
-    param.temperature                = reader.GetFloat("request", "temperature");
-    param.len_penalty                = reader.GetFloat("request", "len_penalty");
-    param.repetition_penalty         = reader.GetFloat("request", "repetition_penalty", 1.0f);
-    param.presence_penalty           = reader.GetFloat("request", "presence_penalty", 0.0f);
-    param.min_length                 = reader.GetInteger("request", "min_length", 0);
-    param.random_seed                = (unsigned long long int)0;
-    param.start_id                   = start_id;
-    param.end_id                     = end_id;
-
-    auto request_list =
-        broadCastRequest(v_start_ids, v_start_lengths, v_bad_words, node_id, gpu_count, param, pointer_record);
-    return request_list;
-}
-
-int threadCreateModelInstances(std::shared_ptr<AbstractTransformerModel>                         model,
-                               std::vector<std::unique_ptr<AbstractTransformerModelInstance>>*   model_instances,
-                               const int                                                         device_id,
-                               const int                                                         rank,
-                               std::pair<std::vector<ft::NcclParam>, std::vector<ft::NcclParam>> nccl_params,
-                               std::shared_ptr<ft::AbstractCustomComm> custom_all_reduce_comm = nullptr)
-{
-    printf("[INFO] rank = %d \n", rank);
-    ft::check_cuda_error(cudaSetDevice(device_id));
-    cudaStream_t stream;
-    ft::check_cuda_error(cudaStreamCreate(&stream));
-    model->createSharedWeights(device_id, rank);
-    auto model_instance = model->createModelInstance(device_id, rank, stream, nccl_params, custom_all_reduce_comm);
-    model_instances->at(device_id) = std::move(model_instance);
-    printf("model instance %d is created \n", device_id);
-    ft::print_mem_usage();
-    return 0;
-}
-
-int threadForward(std::unique_ptr<AbstractTransformerModelInstance>*                model_instance,
-                  std::shared_ptr<std::unordered_map<std::string, triton::Tensor>>  request,
-                  std::shared_ptr<std::unordered_map<std::string, triton::Tensor>>* output_tensors,
-                  const int                                                         device_id,
-                  ft::AbstractInstanceComm*                                         comm)
-{
-    ft::check_cuda_error(cudaSetDevice(device_id));
-    cudaDeviceSynchronize();
-    *output_tensors = (*model_instance)->forward(request, comm);
-    cudaDeviceSynchronize();
-    return 0;
-}
-
-int main(int argc, char* argv[])
-{
-    /*
-        Prepare the nccl ids, node id, device id and world size
-        by MPI or triton
-    */
-
-    int node_id  = 0;
-    int node_num = 1;
-
-    if (kUSE_MPI) {
-        ft::mpi::initialize(&argc, &argv);
-        node_id  = ft::mpi::getCommWorldRank();
-        node_num = ft::mpi::getCommWorldSize();
-    }
-
-    printf("node_id=%d node_num=%d\n", node_id, node_num);
-
-    // Note: Only supports that all nodes have same gpu count
-    const int   gpu_count  = ft::getDeviceCount();
-    const int   world_size = node_num * gpu_count;
-    std::string ini_name   = argc >= 2 ? std::string(argv[1]) : "../examples/cpp/llama/llama_config.ini";
-
-    // step 1: Create model
-    std::shared_ptr<AbstractTransformerModel> model              = AbstractTransformerModel::createLlamaModel(ini_name);
-    int                                       tensor_para_size   = model->getTensorParaSize();
-    int                                       pipeline_para_size = model->getPipelineParaSize();
-    printf(
-        "world_size=%d tensor_para_size=%d pipeline_para_size=%d\n", world_size, tensor_para_size, pipeline_para_size);
-    FT_CHECK_WITH_INFO(world_size == (tensor_para_size * pipeline_para_size),
-                       "World Size != Tensor Parallel Size * Pipeline Parallel Size !");
-
-    std::cout << model->toString();
-
-    // step 2: Initialize the NCCL
-    std::pair<std::vector<ft::NcclParam>, std::vector<ft::NcclParam>> nccl_comms = model->createNcclParams(node_id);
-    cudaDeviceSynchronize();
-
-    // Optional Step: create custom all reduce comm
-    // std::vector<std::shared_ptr<ft::AbstractCustomComm>>
-    // custom_all_reduce_comms; model->createCustomComms(&custom_all_reduce_comms,
-    // world_size);
-
-    // step 2.1 create instance comm
-    auto instance_comm = model->createInstanceComm(gpu_count);
-
-    // step 3: Create model instances
-    std::vector<std::unique_ptr<AbstractTransformerModelInstance>> model_instances((size_t)gpu_count);
-    std::vector<std::thread>                                       threads;
-    for (int device_id = 0; device_id < gpu_count; device_id++) {
-        const int rank = node_id * gpu_count + device_id;
-        threads.push_back(
-            std::thread(threadCreateModelInstances, model, &model_instances, device_id, rank, nccl_comms, nullptr));
-        //   custom_all_reduce_comms[rank]));
-    }
-    for (auto& t : threads) {
-        t.join();
-    }
-
-    // step 4: prepare request
-    std::vector<void*> pointer_record;  // Used to prevent the pointers are
-                                        // release after leaving functions
-    std::vector<std::shared_ptr<std::unordered_map<std::string, triton::Tensor>>> request_list =
-        prepareRequest(ini_name, node_id, gpu_count, &pointer_record);
-    printf("[INFO] request is created \n");
-
-    // step 5: Forward
-    std::vector<std::shared_ptr<std::unordered_map<std::string, triton::Tensor>>> output_tensors_lists(
-        (size_t)gpu_count);
-    for (int i = 0; i < 1; i++) {
-        threads.clear();
-        for (int device_id = 0; device_id < gpu_count; device_id++) {
-            threads.push_back(std::thread(threadForward,
-                                          &model_instances[device_id],
-                                          request_list[device_id],
-                                          &output_tensors_lists[device_id],
-                                          device_id,
-                                          instance_comm.get()));
-        }
-        for (auto& t : threads) {
-            t.join();
-        }
-    }
-    printf("[INFO] forward is completed. \n");
-
-    const int* d_output_ids = (const int*)output_tensors_lists[0].get()->at("output_ids").data;
-    const int* d_seq_lens   = (const int*)output_tensors_lists[0].get()->at("sequence_length").data;
-    const int  batch_size   = output_tensors_lists[0].get()->at("output_ids").shape[0];
-    const int  beam_width   = output_tensors_lists[0].get()->at("output_ids").shape[1];
-    const int  seq_len      = output_tensors_lists[0].get()->at("output_ids").shape[2];
-
-    std::vector<int> seq_lens(batch_size);
-    // step 6: check results
-    if (node_id == 0) {
-        std::string fName   = "out";
-        auto        outFile = std::ofstream(fName, std::ios::out);
-        if (!outFile.is_open()) {
-            printf("[WARNING] Cannot write results into output file %s \n", fName.c_str());
-        }
-        else {
-            size_t outCount = batch_size * beam_width * seq_len;
-            // int*   hBuf     = new int[outCount];
-            std::vector<int> hBuf(outCount);
-            ft::cudaD2Hcpy(hBuf.data(), d_output_ids, outCount);
-            ft::cudaD2Hcpy(seq_lens.data(), d_seq_lens, batch_size);
-            std::cout << "sequence length: ";
-            for (int i = 0; i < batch_size; ++i) {
-                std::cout << (i ? ", " : "") << seq_lens[i];
-            }
-            std::cout << "\n";
-            {
-                std::cout << "Writing " << outCount << " elements\n";
-                int zeroCount = 0;
-                for (size_t i = 0; i < outCount; i++) {
-                    if (hBuf[i] == int(0))
-                        zeroCount++;
-                    outFile << hBuf[i] << " ";
-                    if ((i + 1) % (seq_len) == 0)
-                        outFile << std::endl;
-
-                    if (i < 10)
-                        printf("%5d ", hBuf[i]);
-                    if ((i + 1) % (seq_len) == 0 && i < 10)
-                        std::cout << std::endl;
-                }
-                std::cout << std::endl << "zeroCount = " << zeroCount << std::endl;
-            }
-        }
-    }
-
-    if (kUSE_MPI) {
-        ft::mpi::barrier();
-    }
-    cudaDeviceSynchronize();
-
-    if (1) {
-        // test time
-        auto start = std::chrono::high_resolution_clock::now();
-
-        const int ite = 1;
-        for (int i = 0; i < ite; i++) {
-            threads.clear();
-            for (int device_id = 0; device_id < gpu_count; device_id++) {
-                threads.push_back(std::thread(threadForward,
-                                              &model_instances[device_id],
-                                              request_list[device_id],
-                                              &output_tensors_lists[device_id],
-                                              device_id,
-                                              instance_comm.get()));
-            }
-            for (auto& t : threads) {
-                t.join();
-            }
-        }
-
-        cudaDeviceSynchronize();
-        if (kUSE_MPI) {
-            ft::mpi::barrier();
-        }
-
-        auto end = std::chrono::high_resolution_clock::now();
-        auto dur = std::chrono::duration<float, std::milli>(end - start);
-
-        printf("[INFO] batch_size %d beam_width %d seq_len %d"
-               " FT-CPP-GPT-Triton-time %.2f ms\n",
-               batch_size,
-               beam_width,
-               seq_lens[0],
-               dur.count() / ite);
-    }
-
-    if (kUSE_MPI) {
-        ft::mpi::finalize();
-    }
-    return 0;
-}
-
-int read_start_ids(size_t            batch_size,
-                   std::vector<int>* v_start_lengths,
-                   std::vector<int>* v_start_ids,
-                   size_t            max_input_len,
-                   const int         end_id,
-                   const int         beam_width,
-                   std::string       file_name)
-{
-    std::vector<std::vector<int>> tmp_start_ids;
-    std::vector<int>              tmp_start_lengths;
-
-    std::ifstream start_id_file(file_name, std::ios::in);
-    int           line_num = 0;
-    if (start_id_file.is_open()) {
-        std::string line;
-        while (std::getline(start_id_file, line)) {
-            std::stringstream lineStream(line);
-            std::string       vals;
-            std::vector<int>  tmp_vec;
-            while (std::getline(lineStream, vals, ',')) {
-                tmp_vec.push_back(std::stoi(vals));
-                if (tmp_vec.size() == max_input_len)
-                    break;
-            }
-            tmp_start_ids.push_back(tmp_vec);
-            tmp_start_lengths.push_back(tmp_vec.size());
-            line_num++;
-        }
-        if (batch_size == 0) {
-            batch_size = line_num;
-        }
-    }
-    else {
-        printf("[WARNING] Cannot open the file '%s'. \n", file_name.c_str());
-        max_input_len = 0;
-        return 0;
-    }
-
-    // Add padding
-    for (int i = 0; i < (int)tmp_start_ids.size(); i++) {
-        for (int j = (int)tmp_start_ids[i].size(); j < max_input_len; j++) {
-            tmp_start_ids[i].push_back(end_id);
-        }
-    }
-
-    // Pad to batch_size
-    for (int i = (int)tmp_start_lengths.size(); i < batch_size; i++) {
-        tmp_start_ids.push_back(tmp_start_ids[0]);
-        tmp_start_lengths.push_back(tmp_start_lengths[0]);
-    }
-
-    for (int i = 0; i < (int)tmp_start_ids.size(); i++) {
-        for (int b = 0; b < beam_width; b++) {
-            for (int j = 0; j < (int)tmp_start_ids[i].size(); j++) {
-                v_start_ids->push_back(tmp_start_ids[i][j]);
-            }
-            v_start_lengths->push_back(tmp_start_lengths[i]);
-        }
-    }
-    return batch_size;
-}
diff --git a/examples/cpp/llama/tokenizer.py b/examples/cpp/llama/tokenizer.py
deleted file mode 100644
index bcd13e4e62ce06d5e67335bd2b78c1a19894979c..0000000000000000000000000000000000000000
--- a/examples/cpp/llama/tokenizer.py
+++ /dev/null
@@ -1,82 +0,0 @@
-import os.path as osp
-from typing import List
-
-import fire
-
-
-class Tokenizer:
-
-    def __init__(self, model_file: str):
-        if model_file.endswith('.model'):
-            from sentencepiece import SentencePieceProcessor
-            self.model = SentencePieceProcessor(model_file=model_file)
-            self.vocab_size = self.model.vocab_size()
-            self.start_id = self.model.bos_id()
-            self.end_id = self.model.eos_id()
-            self.pad_id = self.model.pad_id()
-        else:
-            from transformers import AutoTokenizer
-            self.model = AutoTokenizer.from_pretrained(model_file,
-                                                       trust_remote_code=True)
-            self.vocab_size = self.model.vocab_size
-            self.start_id = self.model.bos_token_id
-            self.end_id = self.model.eos_token_id
-            self.pad_id = self.model.pad_token_id
-
-    def encode(self, s: str):
-        if hasattr(self.model, 'Encode'):
-            return self.model.Encode(s, add_bos=True)
-        else:
-            return self.model.encode(s, add_special_tokens=True)
-
-    def decode(self, t: List[int]):
-        if hasattr(self.model, 'Decode'):
-            return self.model.Decode(t)
-        else:
-            return self.model.decode(t)
-
-
-def main(model_file: str = '/data/llama/model/tokenizer.model',
-         encode_file: str = None,
-         decode_file: str = None):
-    tokenizer = Tokenizer(model_file)
-    if encode_file:
-        with open(encode_file, 'r') as f:
-            xs = tokenizer.encode(f.read())
-            xs = ','.join(map(str, xs))
-            print(xs)
-
-        output_dir = osp.dirname(osp.abspath(__file__))
-        with open(osp.join(output_dir, 'start_ids.csv'), 'w') as f:
-            f.write(xs)
-
-    elif decode_file:
-        with open(decode_file, 'r') as f:
-            token_ids = f.read()
-            token_ids = token_ids.splitlines()
-            for _token_ids in token_ids:
-                _token_ids = _token_ids.split(',')
-                _token_ids = [int(token_id) for token_id in _token_ids]
-                ys = tokenizer.decode(_token_ids)
-                print(ys)
-    else:
-        first = True
-        while True:
-            try:
-                s = input()
-            except EOFError:
-                break
-            if not first:
-                print('---------------------------------------------')
-            first = False
-            try:
-                xs = map(int, s.strip().split(' '))
-                s = tokenizer.decode(list(xs))
-                print(s)
-            except ValueError:
-                xs = tokenizer.encode(s)
-                print(' '.join(map(str, xs)))
-
-
-if __name__ == '__main__':
-    fire.Fire(main)
diff --git a/generate.sh b/generate.sh
deleted file mode 100755
index 899e28ab3689ab4ac71b576c0409eac0163a278e..0000000000000000000000000000000000000000
--- a/generate.sh
+++ /dev/null
@@ -1,13 +0,0 @@
-#!/bin/sh
-
-cmake .. \
-    -DCMAKE_CXX_COMPILER=nvcc \
-    -DCMAKE_C_COMPILER=nvcc \
-    -DCMAKE_BUILD_TYPE=RelWithDebInfo \
-    -DCMAKE_EXPORT_COMPILE_COMMANDS=1 \
-    -DCMAKE_INSTALL_PREFIX=./install \
-    -DBUILD_PY_FFI=ON \
-    -DBUILD_MULTI_GPU=ON \
-    -DCMAKE_CUDA_FLAGS="-lineinfo" \
-    -DUSE_NVTX=OFF \
-#    -DBUILD_TEST=ON
diff --git a/lmdeploy/__init__.py b/lmdeploy/__init__.py
deleted file mode 100644
index e105443003f6c314c37e2abb660d85e282f81756..0000000000000000000000000000000000000000
--- a/lmdeploy/__init__.py
+++ /dev/null
@@ -1,16 +0,0 @@
-# Copyright (c) OpenMMLab. All rights reserved.
-def bootstrap():
-    import os
-    import sys
-
-    has_turbomind = False
-    pwd = os.path.dirname(__file__)
-    if os.path.exists(os.path.join(pwd, 'lib')):
-        has_turbomind = True
-    if os.name == 'nt' and has_turbomind:
-        if sys.version_info[:2] >= (3, 8):
-            CUDA_PATH = os.getenv('CUDA_PATH')
-            os.add_dll_directory(os.path.join(CUDA_PATH, 'bin'))
-
-
-bootstrap()
diff --git a/lmdeploy/cli/__init__.py b/lmdeploy/cli/__init__.py
deleted file mode 100644
index 3575bec5bdf126b9d503c3f253e999b7a7f0132f..0000000000000000000000000000000000000000
--- a/lmdeploy/cli/__init__.py
+++ /dev/null
@@ -1,4 +0,0 @@
-# Copyright (c) OpenMMLab. All rights reserved.
-from .cli import run
-
-__all__ = ['run']
diff --git a/lmdeploy/cli/chat.py b/lmdeploy/cli/chat.py
deleted file mode 100644
index 735b24c7ccf1dfba0d8fc336c9216b1f710716c5..0000000000000000000000000000000000000000
--- a/lmdeploy/cli/chat.py
+++ /dev/null
@@ -1,90 +0,0 @@
-# Copyright (c) OpenMMLab. All rights reserved.
-from typing import Optional
-
-
-class SubCliChat(object):
-    """Chat through terminal with pytorch or turbomind model."""
-
-    def torch(self,
-              model_path: str,
-              tokenizer_path: Optional[str] = None,
-              accel: Optional[str] = None,
-              max_new_tokens: int = 128,
-              temperature: float = 0.8,
-              top_p: float = 0.95,
-              seed: int = 0,
-              use_fast_tokenizer: bool = True,
-              max_alloc: int = 2048,
-              max_session_len: int = None,
-              log_file: Optional[str] = None,
-              debug: bool = False,
-              adapter: Optional[str] = None):
-        """Chat with pytorch model through terminal.
-
-        Args:
-            model_path (str): Path to pytorch model.
-            tokenizer_path (str): Path to tokenizer.
-            accel (str): Model accelerator.
-            max_new_tokens (int): Maximum number of tokens to generate.
-            temperature (float): Temperature for sampling.
-            top_p (float): Top p for sampling.
-            seed (int): Random seed.
-            use_fast_tokenizer (bool): Whether to use fast tokenizer.
-                This argument is directly pass to transformer's
-                ``AutoTokenizer.from_pretrained``.
-                Generally, user should choose to use fast tokenizers.
-                But if using fast raise some error, try to force using a slow one.
-            max_alloc (int): Maximum memory to allocate (for deepspeed).
-            max_session_len (int): Maximum number of tokens allowed for all chat sessions.
-                This include both history and current session.
-            log_file (str): Path to log file.
-            debug (bool): Whether to enable debug mode.
-            adapter (str): Force to use an adapter.
-                Generally user should not use this argument because adapter is selected based
-                on the type of model. Only when it is impossible, e.g. distinguishing llama 1/2
-                based on `LlamaforCausalLM` class, this argument is required.
-                Currently, only "llama1" is acceptable for llama1 models.
-        """  # noqa: E501
-        from lmdeploy.pytorch.chat import main as run_torch_model
-
-        run_torch_model(model_path,
-                        tokenizer_path=tokenizer_path,
-                        accel=accel,
-                        max_new_tokens=max_new_tokens,
-                        temperature=temperature,
-                        top_p=top_p,
-                        seed=seed,
-                        use_fast_tokenizer=use_fast_tokenizer,
-                        max_alloc=max_alloc,
-                        max_session_len=max_session_len,
-                        log_file=log_file,
-                        debug=debug,
-                        adapter=adapter)
-
-    def turbomind(self,
-                  model_path,
-                  session_id: int = 1,
-                  cap: str = 'chat',
-                  tp=1,
-                  stream_output=True,
-                  **kwargs):
-        """Chat with turbomind model through terminal.
-
-        Args:
-            model_path (str): the path of the deployed model
-            session_id (int): the identical id of a session
-            cap (str): the capability of a model. For example, codellama has
-                the ability among ['completion', 'infilling', 'chat', 'python']
-            tp (int): GPU number used in tensor parallelism
-            stream_output (bool): indicator for streaming output or not
-            **kwarg (dict): other arguments for initializing model's chat
-                template
-        """
-        from lmdeploy.turbomind.chat import main as run_turbomind_model
-
-        run_turbomind_model(model_path,
-                            session_id=session_id,
-                            cap=cap,
-                            tp=tp,
-                            stream_output=stream_output,
-                            **kwargs)
diff --git a/lmdeploy/cli/cli.py b/lmdeploy/cli/cli.py
deleted file mode 100644
index 7b2634b53e2975658f67a9db681dfb8a26b6da08..0000000000000000000000000000000000000000
--- a/lmdeploy/cli/cli.py
+++ /dev/null
@@ -1,61 +0,0 @@
-# Copyright (c) OpenMMLab. All rights reserved.
-import fire
-
-from .chat import SubCliChat
-from .lite import SubCliLite
-from .serve import SubCliServe
-
-
-class CLI(object):
-    """LMDeploy Command Line Interface.
-
-    The CLI provides a unified API for converting, compressing and deploying
-    large language models.
-    """
-
-    def convert(self,
-                model_name: str,
-                model_path: str,
-                model_format: str = None,
-                tokenizer_path: str = None,
-                dst_path: str = './workspace',
-                tp: int = 1,
-                quant_path: str = None,
-                group_size: int = 0):
-        """Convert LLMs to lmdeploy format.
-
-        Args:
-            model_name (str): The name of the to-be-deployed model, such as
-                llama-7b, llama-13b, vicuna-7b and etc.
-            model_path (str): The directory path of the model
-            model_format (str): The format of the model, fb or hf. 'fb' stands
-                for META's llama format, and 'hf' means huggingface format.
-            tokenizer_path (str): The path of tokenizer model.
-            dst_path (str): The destination path that saves outputs.
-            tp (int): The number of GPUs used for tensor parallelism, which
-                should be 2^n.
-            quant_path (str): Path of the quantized model, which can be None.
-            group_size (int): A parameter used in AWQ to quantize fp16 weights
-                to 4 bits.
-        """
-        from lmdeploy.serve.turbomind.deploy import main as convert
-
-        convert(model_name,
-                model_path,
-                model_format=model_format,
-                tokenizer_path=tokenizer_path,
-                dst_path=dst_path,
-                tp=tp,
-                quant_path=quant_path,
-                group_size=group_size)
-
-
-def run():
-    """The entry point of running LMDeploy CLI."""
-
-    cli = CLI()
-    cli.lite = SubCliLite()
-    cli.chat = SubCliChat()
-    cli.serve = SubCliServe()
-
-    fire.Fire(cli, name='lmdeploy')
diff --git a/lmdeploy/cli/lite.py b/lmdeploy/cli/lite.py
deleted file mode 100644
index 4302765e288c4caeba4de3fab44d63bd4e3d4b43..0000000000000000000000000000000000000000
--- a/lmdeploy/cli/lite.py
+++ /dev/null
@@ -1,100 +0,0 @@
-# Copyright (c) OpenMMLab. All rights reserved.
-
-
-class SubCliLite(object):
-    """CLI for compressing LLMs."""
-
-    def auto_awq(self,
-                 model: str,
-                 work_dir: str,
-                 w_bits: int = 4,
-                 w_sym: bool = False,
-                 w_group_size: int = 128,
-                 device: str = 'cuda'):
-        """Perform weight quantization using AWQ algorithm.
-
-        Args:
-            model (str): The path of model in hf format.
-            work_dir (str): The working directory to save results.
-            w_bits (int): Bit number for weight quantization.
-            w_sym (bool): Whether to do symmetric quantization.
-            w_group_size (int): Group size for weight quantization statistics.
-            device (str): Device type of running.
-        """
-        from lmdeploy.lite.apis.auto_awq import auto_awq
-
-        auto_awq(model,
-                 work_dir,
-                 w_bits=w_bits,
-                 w_sym=w_sym,
-                 w_group_size=w_group_size,
-                 device=device)
-
-    def calibrate(self,
-                  model: str,
-                  calib_dataset: str = 'c4',
-                  calib_samples: int = 128,
-                  calib_seqlen: int = 2048,
-                  work_dir: str = './work_dir',
-                  device: str = 'cuda') -> None:
-        """Perform calibration on a given dataset.
-
-        Args:
-            model (str): The model to be loaded.
-            calib_dataset (str, optional): The calibration dataset name.
-                Defaults to 'c4'.
-            calib_samples (int, optional): The number of samples for
-                calibration. Defaults to 128.
-            calib_seqlen (int, optional): The sequence length for calibration.
-                Defaults to 2048.
-            work_dir (str): The working directory for outputs.
-                Defaults to './work_dir'.
-            device (str, optional): The device to be used for calculation.
-                Defaults to 'cuda'.
-        """
-        from lmdeploy.lite.apis.calibrate import calibrate
-
-        calibrate(model,
-                  calib_dataset=calib_dataset,
-                  calib_samples=calib_samples,
-                  calib_seqlen=calib_seqlen,
-                  work_dir=work_dir,
-                  device=device)
-
-    def kv_qparams(self,
-                   work_dir: str,
-                   turbomind_dir: str,
-                   kv_bits: int = 8,
-                   kv_sym: bool = False,
-                   num_tp: int = 1) -> None:
-        """Export key and value stats.
-
-        Args:
-            work_dir (str): Directory path where the stats
-                are saved.
-            turbomind_dir (str): Directory path where to
-                save the results.
-            kv_bits (int, optional): Number of bits for quantization.
-                Defaults to 8.
-            kv_sym (bool, optional): Whether to use symmetric quantization.
-                Defaults to False.
-            num_tp (int, optional): Number of tensor parallelism.
-                Defaults to 1.
-        """
-        from lmdeploy.lite.apis.kv_qparams import main as run_kv_qparams
-
-        run_kv_qparams(work_dir,
-                       turbomind_dir,
-                       kv_bits=kv_bits,
-                       kv_sym=kv_sym,
-                       num_tp=num_tp)
-
-    def get_small_sharded_hf(self, src_dir: str, dst_dir: str):
-        """Convert a hugging face model to the smallest sharded one.
-
-        Args:
-            src_dir (str): The directory of the input HF model.
-            dst_dir (str): The directory to save new  model.
-        """
-        from lmdeploy.lite.apis.get_small_sharded_hf import main as run_sharded
-        run_sharded(src_dir, dst_dir)
diff --git a/lmdeploy/cli/serve.py b/lmdeploy/cli/serve.py
deleted file mode 100644
index 0bff69c31e2dac1f97bb62bb668f02e1e4dfbc23..0000000000000000000000000000000000000000
--- a/lmdeploy/cli/serve.py
+++ /dev/null
@@ -1,122 +0,0 @@
-# Copyright (c) OpenMMLab. All rights reserved.
-from typing import List
-
-
-class SubCliServe(object):
-    """Serve LLMs and interact on terminal or web UI."""
-
-    def gradio(self,
-               model_path_or_server: str,
-               server_name: str = 'localhost',
-               server_port: int = 6006,
-               batch_size: int = 32,
-               tp: int = 1,
-               restful_api: bool = False):
-        """Serve LLMs with web ui using gradio.
-
-        Example 1:
-            lmdeploy serve gradio ./workspace
-
-        Example 2:
-            lmdeploy serve gradio http://localhost:23333
-            --server_name localhost
-            --server_port 6006
-            --restful_api True
-
-        Example 3:
-            lmdeploy serve gradio ${triton_server_ip_addresss}:33337
-
-        Args:
-            model_path_or_server (str): the path of the deployed model or the
-                tritonserver URL or restful api URL. The former is for directly
-                running service with gradio. The latter is for running with
-                tritonserver by default. If the input URL is restful api.
-                Please enable another flag `restful_api`.
-            server_name (str): the ip address of gradio server
-            server_port (int): the port of gradio server
-            batch_size (int): batch size for running Turbomind directly
-            tp (int): tensor parallel for Turbomind
-            restful_api (bool): a flag for model_path_or_server
-        """
-        from lmdeploy.serve.gradio.app import run
-        run(model_path_or_server,
-            server_name=server_name,
-            server_port=server_port,
-            batch_size=batch_size,
-            tp=tp,
-            restful_api=restful_api)
-
-    def api_server(self,
-                   model_path: str,
-                   server_name: str = 'localhost',
-                   server_port: int = 23333,
-                   instance_num: int = 32,
-                   tp: int = 1,
-                   allow_origins: List[str] = ['*'],
-                   allow_credentials: bool = True,
-                   allow_methods: List[str] = ['*'],
-                   allow_headers: List[str] = ['*']):
-        """Serve LLMs with restful api using fastapi.
-
-        Args:
-            model_path (str): the path of the deployed model
-            server_name (str): host ip for serving
-            server_port (int): server port
-            instance_num (int): number of instances of turbomind model
-            tp (int): tensor parallel
-            allow_origins (List[str]): a list of allowed origins for CORS
-            allow_credentials (bool): whether to allow credentials for CORS
-            allow_methods (List[str]): a list of allowed HTTP methods for CORS
-            allow_headers (List[str]): a list of allowed HTTP headers for CORS
-        """
-        from lmdeploy.serve.openai.api_server import main as run_api_server
-
-        run_api_server(model_path,
-                       server_name=server_name,
-                       server_port=server_port,
-                       instance_num=instance_num,
-                       tp=tp,
-                       allow_origins=allow_origins,
-                       allow_credentials=allow_credentials,
-                       allow_methods=allow_methods,
-                       allow_headers=allow_headers)
-
-    def api_client(self, restful_api_url: str, session_id: int = 0):
-        """Interact with restful api server in terminal.
-
-        Args:
-            restful_api_url: The restful api URL.
-            session_id: The identical id of a session.
-        """
-        from lmdeploy.serve.openai.api_client import main as run_api_client
-        run_api_client(restful_api_url, session_id=session_id)
-
-    def triton_client(self,
-                      tritonserver_addr: str,
-                      session_id: int = 1,
-                      cap: str = 'chat',
-                      stream_output: bool = True,
-                      **kwargs):
-        """Interact with Triton Server using gRPC protocol.
-
-        Args:
-            tritonserver_addr (str): the address in format "ip:port" of
-              triton inference server
-            session_id (int): the identical id of a session
-            cap (str): the capability of a model. For example, codellama
-                has the ability among ['completion', 'infill', 'instruct',
-                'python']
-            stream_output (bool): indicator for streaming output or not
-            **kwargs (dict): other arguments for initializing model's
-                chat template
-        """
-
-        from lmdeploy.serve.client import main as run_triton_client
-
-        run_triton_client(
-            tritonserver_addr,
-            session_id=session_id,
-            cap=cap,
-            stream_output=stream_output,
-            **kwargs,
-        )
diff --git a/lmdeploy/lite/__init__.py b/lmdeploy/lite/__init__.py
deleted file mode 100644
index 7f56c608be971e31346e6eda1f1dc221ccb46e93..0000000000000000000000000000000000000000
--- a/lmdeploy/lite/__init__.py
+++ /dev/null
@@ -1,4 +0,0 @@
-# Copyright (c) OpenMMLab. All rights reserved.
-from .apis import *  # noqa: F401,F403
-from .quantization import *  # noqa: F401,F403
-from .utils import *  # noqa: F401,F403
diff --git a/lmdeploy/lite/apis/__init__.py b/lmdeploy/lite/apis/__init__.py
deleted file mode 100644
index ef101fec61e72abc0eb90266d453b5b22331378d..0000000000000000000000000000000000000000
--- a/lmdeploy/lite/apis/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-# Copyright (c) OpenMMLab. All rights reserved.
diff --git a/lmdeploy/lite/apis/auto_awq.py b/lmdeploy/lite/apis/auto_awq.py
deleted file mode 100644
index 250defb59acaee12a54bdbdfc982382530c38dfe..0000000000000000000000000000000000000000
--- a/lmdeploy/lite/apis/auto_awq.py
+++ /dev/null
@@ -1,85 +0,0 @@
-# Copyright (c) OpenMMLab. All rights reserved.
-
-from pathlib import Path
-
-import torch
-from accelerate import (infer_auto_device_map, init_empty_weights,
-                        load_checkpoint_in_model)
-from torch import nn
-from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer
-
-from lmdeploy.lite.quantization.awq import (FC_FCS_MAP, NORM_FCS_MAP,
-                                            quant_weights, smooth_layers)
-from lmdeploy.lite.utils import collect_target_modules
-
-LAYER_TYPE_MAP = {
-    'InternLMForCausalLM': 'InternLMDecoderLayer',
-    'QWenLMHeadModel': 'QWenBlock',
-    'BaiChuanForCausalLM': 'DecoderLayer',
-    'LlamaForCausalLM': 'LlamaDecoderLayer',
-}
-NORM_TYPE_MAP = {
-    'InternLMForCausalLM': 'InternLMRMSNorm',
-    'QWenLMHeadModel': 'RMSNorm',
-    'BaiChuanForCausalLM': 'RMSNorm',
-    'LlamaForCausalLM': 'LlamaRMSNorm',
-}
-
-
-def auto_awq(model: str,
-             work_dir: str,
-             w_bits: int = 4,
-             w_sym: bool = False,
-             w_group_size: int = 128,
-             device: str = 'cuda'):
-
-    # Load tokenizer and configuration
-    tokenizer = AutoTokenizer.from_pretrained(model,
-                                              use_fast=False,
-                                              trust_remote_code=True)
-    hf_config = AutoConfig.from_pretrained(model, trust_remote_code=True)
-    checkpoint = hf_config._name_or_path
-
-    with init_empty_weights():
-        # Load model
-        model = AutoModelForCausalLM.from_pretrained(model,
-                                                     torch_dtype=torch.float16,
-                                                     trust_remote_code=True)
-        model.config.use_cache = False
-
-    layer_type = LAYER_TYPE_MAP[type(model).__name__]
-    fc2fcs = FC_FCS_MAP[layer_type]
-    norm2fcs = NORM_FCS_MAP[layer_type]
-
-    decoder_layers = collect_target_modules(model, layer_type)
-
-    # Infer device map
-    device_map = infer_auto_device_map(model,
-                                       no_split_module_classes=[layer_type])
-    for name in device_map.keys():
-        if name in decoder_layers or 'lm_head' in name:
-            device_map[name] = 'cpu'
-        else:
-            device_map[name] = 0
-    load_checkpoint_in_model(model, checkpoint, device_map)
-
-    work_dir = Path(work_dir)
-
-    act_scales = torch.load(work_dir / 'inputs_stats.pth')['absmean']
-    layers = collect_target_modules(model, layer_type)
-    fcs = {}
-    for l_name, layer in layers.items():
-        name2fc = collect_target_modules(layer, nn.Linear, prefix=l_name)
-        fcs.update(name2fc)
-
-    smooth_layers(layers, fc2fcs, norm2fcs, act_scales, w_group_size, device)
-    quant_weights(model, fcs, w_bits, w_sym, w_group_size, device)
-
-    model.save_pretrained(work_dir)
-    tokenizer.save_pretrained(work_dir)
-
-
-if __name__ == '__main__':
-    import fire
-
-    fire.Fire(auto_awq)
diff --git a/lmdeploy/lite/apis/calibrate.py b/lmdeploy/lite/apis/calibrate.py
deleted file mode 100644
index 3df252f067f456a0827a721f288553e2cb3892f6..0000000000000000000000000000000000000000
--- a/lmdeploy/lite/apis/calibrate.py
+++ /dev/null
@@ -1,111 +0,0 @@
-# Copyright (c) OpenMMLab. All rights reserved.
-
-from pathlib import Path
-
-import torch
-from accelerate import (infer_auto_device_map, init_empty_weights,
-                        load_checkpoint_in_model)
-from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer
-
-from lmdeploy.lite.quantization import CalibrationContext
-from lmdeploy.lite.utils import collect_target_modules, get_calib_loaders
-
-LAYER_TYPE_MAP = {
-    'InternLMForCausalLM': 'InternLMDecoderLayer',
-    'QWenLMHeadModel': 'QWenBlock',
-    'BaiChuanForCausalLM': 'DecoderLayer',
-    'LlamaForCausalLM': 'LlamaDecoderLayer',
-}
-NORM_TYPE_MAP = {
-    'InternLMForCausalLM': 'InternLMRMSNorm',
-    'QWenLMHeadModel': 'RMSNorm',
-    'BaiChuanForCausalLM': 'RMSNorm',
-    'LlamaForCausalLM': 'LlamaRMSNorm',
-}
-
-
-def calibrate(model: str,
-              calib_dataset: str = 'c4',
-              calib_samples: int = 128,
-              calib_seqlen: int = 2048,
-              work_dir: str = './work_dir',
-              device: str = 'cuda') -> None:
-    """The main function for loading the model and performing calibration on a
-    given dataset.
-
-    Args:
-        model (str): The model to be loaded.
-        calib_dataset (str, optional): The calibration dataset name.
-            Defaults to 'c4'.
-        calib_samples (int, optional): The number of samples for calibration.
-            Defaults to 128.
-        calib_seqlen (int, optional): The sequence length for calibration.
-            Defaults to 2048.
-        work_dir (str): The working directory for outputs.
-            Defaults to './work_dir'.
-        device (str, optional): The device to be used for calculation.
-            Defaults to 'cuda'.
-    """
-
-    assert calib_dataset in ['c4', 'ptb', 'wikitext2', 'pileval'], \
-        'Support only `c4`, `ptb`, `wikitext2` or `pileval`.'
-
-    # Load tokenizer and configuration
-    tokenizer = AutoTokenizer.from_pretrained(model,
-                                              use_fast=False,
-                                              trust_remote_code=True)
-    hf_config = AutoConfig.from_pretrained(model, trust_remote_code=True)
-    checkpoint = hf_config._name_or_path
-
-    with init_empty_weights():
-        # Load model
-        model = AutoModelForCausalLM.from_pretrained(model,
-                                                     torch_dtype=torch.float16,
-                                                     trust_remote_code=True)
-        model.config.use_cache = False
-
-    layer_type = LAYER_TYPE_MAP[type(model).__name__]
-    norm_type = NORM_TYPE_MAP[type(model).__name__]
-
-    decoder_layers = collect_target_modules(model, layer_type)
-
-    # Infer device map
-    device_map = infer_auto_device_map(model,
-                                       no_split_module_classes=[layer_type])
-    for name in device_map.keys():
-        if name in decoder_layers or 'lm_head' in name:
-            device_map[name] = 'cpu'
-        else:
-            device_map[name] = 0
-    load_checkpoint_in_model(model, checkpoint, device_map)
-
-    print('Loading calibrate dataset ...')
-    calib_loader, _ = get_calib_loaders(calib_dataset,
-                                        tokenizer,
-                                        nsamples=calib_samples,
-                                        seqlen=calib_seqlen)
-
-    # Initialize calibration context
-    calib_ctx = CalibrationContext(model,
-                                   tokenizer,
-                                   layer_type=layer_type,
-                                   norm_type=norm_type,
-                                   device=device)
-
-    with calib_ctx:
-        all_data = torch.cat([
-            data if isinstance(data, torch.Tensor) else data[0]
-            for data in calib_loader
-        ]).to(device)
-        calib_ctx.calibrate(all_data)
-
-    # Create work directory if not exists
-    work_dir = Path(work_dir)
-    work_dir.mkdir(parents=True, exist_ok=True)
-    calib_ctx.export(work_dir)
-
-
-if __name__ == '__main__':
-    import fire
-
-    fire.Fire(calibrate)
diff --git a/lmdeploy/lite/apis/get_small_sharded_hf.py b/lmdeploy/lite/apis/get_small_sharded_hf.py
deleted file mode 100644
index 22318ef32aac8b7c2f9255ad6d3e93ac20756cad..0000000000000000000000000000000000000000
--- a/lmdeploy/lite/apis/get_small_sharded_hf.py
+++ /dev/null
@@ -1,63 +0,0 @@
-# Copyright (c) OpenMMLab. All rights reserved.
-import argparse
-import copy
-import json
-import os
-import shutil
-
-import torch
-from mmengine.utils import mkdir_or_exist
-
-
-def parse_args():
-    parser = argparse.ArgumentParser(
-        description='Convert a hugging face model to the smallest sharded one')
-    parser.add_argument('src_dir', help='the directory of the model')
-    parser.add_argument('dst_dir', help='the directory to save the new model')
-    args = parser.parse_args()
-    return args
-
-
-def main():
-    args = parse_args()
-    mkdir_or_exist(args.dst_dir)
-
-    all_files = os.listdir(args.src_dir)
-    for name in all_files:
-        if not name.startswith(('pytorch_model', '.')):
-            src_path = os.path.join(args.src_dir, name)
-            dst_path = os.path.join(args.dst_dir, name)
-            shutil.copy(src_path, dst_path)
-
-    with open(os.path.join(args.src_dir, 'pytorch_model.bin.index.json')) as f:
-        index = json.load(f)
-
-    n_shard = len(index['weight_map'])
-    new_index = copy.deepcopy(index)
-    new_index['weight_map'] = {}
-    cnt = 1
-
-    checkpoints = set(index['weight_map'].values())
-    for ckpt in checkpoints:
-        state_dict = torch.load(os.path.join(args.src_dir, ckpt),
-                                map_location='cuda')
-        keys = sorted(list(state_dict.keys()))
-        for k in keys:
-            new_state_dict_name = 'pytorch_model-{:05d}-of-{:05d}.bin'.format(
-                cnt, n_shard)
-            new_index['weight_map'][k] = new_state_dict_name
-            new_state_dict = {k: state_dict[k]}
-            torch.save(new_state_dict,
-                       os.path.join(args.dst_dir, new_state_dict_name))
-            cnt += 1
-        del state_dict
-        torch.cuda.empty_cache()
-    with open(os.path.join(args.dst_dir, 'pytorch_model.bin.index.json'),
-              'w') as f:
-        json.dump(new_index, f)
-    assert new_index['weight_map'].keys() == index['weight_map'].keys(
-    ), 'Mismatch on `weight_map`!'
-
-
-if __name__ == '__main__':
-    main()
diff --git a/lmdeploy/lite/apis/kv_qparams.py b/lmdeploy/lite/apis/kv_qparams.py
deleted file mode 100644
index f31fee029994bc66e469798bf79ab76d81eef0dc..0000000000000000000000000000000000000000
--- a/lmdeploy/lite/apis/kv_qparams.py
+++ /dev/null
@@ -1,124 +0,0 @@
-# Copyright (c) OpenMMLab. All rights reserved.
-from pathlib import Path
-from typing import Union
-
-import numpy as np
-import torch
-
-
-def _export_sym(key_stats: dict,
-                value_stats: dict,
-                bits: int,
-                out_dir: Union[str, Path],
-                tp: int = 1) -> None:
-    """Export symmetric quantization parameters to specified directory."""
-    keys_absmax = key_stats['absmax']
-    values_absmax = value_stats['absmax']
-    for layer_idx, name in enumerate(keys_absmax.keys()):
-        k_absmax = keys_absmax[name]
-        v_absmax = values_absmax[name]
-
-        heads, dims = k_absmax.shape
-        assert heads % tp == 0
-
-        mp_k_absmax = torch.chunk(k_absmax, tp)
-        mp_v_absmax = torch.chunk(v_absmax, tp)
-        for i in range(tp):
-            # quant: q = f / scale
-            # dequant: f = q * scale
-            k_s = mp_k_absmax[i].max() / (2**(bits - 1) - 1)
-            v_s = mp_v_absmax[i].max() / (2**(bits - 1) - 1)
-
-            kv_qparams = np.array([k_s, v_s], dtype=np.float32)
-            out_path = out_dir / f'layers.{layer_idx}.past_kv_scale.{i}.weight'  # noqa: E501
-            kv_qparams.tofile(out_path)
-            print(f'Layer {layer_idx} MP {i} qparam: {k_s} \t{v_s}')
-
-
-def _export_asym(key_stats: dict,
-                 value_stats: dict,
-                 bits: int,
-                 out_dir: Union[str, Path],
-                 tp: int = 1) -> None:
-    """Export asymmetric quantization parameters to specified directory."""
-    keys_min = key_stats['min']
-    values_min = value_stats['min']
-
-    keys_max = key_stats['max']
-    values_max = value_stats['max']
-    for layer_idx, name in enumerate(keys_min.keys()):
-        k_max = keys_max[name]
-        v_max = values_max[name]
-
-        k_min = keys_min[name]
-        v_min = values_min[name]
-
-        heads, dims = k_min.shape
-        assert heads % tp == 0
-
-        tp_k_min = torch.chunk(k_min, tp)
-        tp_v_min = torch.chunk(v_min, tp)
-
-        tp_k_max = torch.chunk(k_max, tp)
-        tp_v_max = torch.chunk(v_max, tp)
-        for i in range(tp):
-            # zp = (min+max) / 2
-            # scale = (max-min) / 255
-            # quant: q = (f-zp) / scale
-            # dequant: f = q * scale + zp
-            k_min = tp_k_min[i].min()
-            v_min = tp_v_min[i].min()
-
-            k_max = tp_k_max[i].max()
-            v_max = tp_v_max[i].max()
-
-            k_scale = (k_max - k_min) / (2**bits - 1)
-            v_scale = (v_max - v_min) / (2**bits - 1)
-
-            k_zp = (k_max + k_min) / 2
-            v_zp = (v_max + v_min) / 2
-
-            kv_qparams = np.array([k_scale, k_zp, v_scale, v_zp],
-                                  dtype=np.float32)
-            out_path = out_dir / f'layers.{layer_idx}.past_kv_scale.{i}.weight'
-            kv_qparams.tofile(out_path)
-            print(f'Layer {layer_idx} MP {i} qparam: '
-                  f'\t{k_scale} \t{k_zp} \t{v_scale} \t{v_zp}')
-
-
-def main(work_dir: str,
-         turbomind_dir: str,
-         kv_bits: int = 8,
-         kv_sym: bool = False,
-         num_tp: int = 1) -> None:
-    """Main function to export key and value stats.
-
-    Args:
-        work_dir (Union[str, Path]): Directory path where the stats are saved.
-        turbomind_dir (Union[str, Path]): Directory path where to
-            save the results.
-        kv_bits (int, optional): Number of bits for quantization.
-            Defaults to 8.
-        kv_sym (bool, optional): Whether to use symmetric quantizaiton.
-            Defaults to False.
-        num_tp (int, optional): Number of tensor parallelism. Defaults to 1.
-    """
-
-    work_dir = Path(work_dir)
-
-    tm_dir = Path(turbomind_dir)
-    assert tm_dir.exists(), 'The specified TurboMind directory does not exist.'
-
-    key_stats = torch.load(work_dir / 'key_stats.pth')
-    value_stats = torch.load(work_dir / 'value_stats.pth')
-
-    if kv_sym:
-        _export_sym(key_stats, value_stats, kv_bits, tm_dir, num_tp)
-    else:
-        _export_asym(key_stats, value_stats, kv_bits, tm_dir, num_tp)
-
-
-if __name__ == '__main__':
-    import fire
-
-    fire.Fire(main)
diff --git a/lmdeploy/lite/defaults.py b/lmdeploy/lite/defaults.py
deleted file mode 100644
index 166c726d9f27f81750514f835ae553c0b6daf3e6..0000000000000000000000000000000000000000
--- a/lmdeploy/lite/defaults.py
+++ /dev/null
@@ -1,5 +0,0 @@
-# Copyright (c) OpenMMLab. All rights reserved.
-from torch import nn
-
-OFFLOAD_MOD = (nn.Linear, )
-KV_CACHE_SIGNATURE = 'past_key_value'
diff --git a/lmdeploy/lite/quantization/__init__.py b/lmdeploy/lite/quantization/__init__.py
deleted file mode 100644
index 435cd3520aa54c5ddd25812e919f88a6c5dcc200..0000000000000000000000000000000000000000
--- a/lmdeploy/lite/quantization/__init__.py
+++ /dev/null
@@ -1,9 +0,0 @@
-# Copyright (c) OpenMMLab. All rights reserved.
-from .activation import ActivationObserver, KVCacheObserver
-from .calibration import CalibrationContext
-from .weight import WeightQuantizer
-
-__all__ = [
-    'WeightQuantizer', 'ActivationObserver', 'KVCacheObserver',
-    'CalibrationContext'
-]
diff --git a/lmdeploy/lite/quantization/activation/__init__.py b/lmdeploy/lite/quantization/activation/__init__.py
deleted file mode 100644
index 5d1f60ec62b1e06802f3dbd529247c20f3ac7d1b..0000000000000000000000000000000000000000
--- a/lmdeploy/lite/quantization/activation/__init__.py
+++ /dev/null
@@ -1,4 +0,0 @@
-# Copyright (c) OpenMMLab. All rights reserved.
-from .observer import ActivationObserver, KVCacheObserver
-
-__all__ = ['ActivationObserver', 'KVCacheObserver']
diff --git a/lmdeploy/lite/quantization/activation/observer.py b/lmdeploy/lite/quantization/activation/observer.py
deleted file mode 100644
index dc0d736bc916c90d4613f02a087dac331a1d2d3e..0000000000000000000000000000000000000000
--- a/lmdeploy/lite/quantization/activation/observer.py
+++ /dev/null
@@ -1,113 +0,0 @@
-# Copyright (c) OpenMMLab. All rights reserved.
-
-import torch
-
-from lmdeploy.lite.utils.global_avail import GlobalAvailMixin
-
-
-class KVCacheObserver(GlobalAvailMixin):
-    """A class to observe and record the max, min, and absolute max value of
-    given tensor."""
-
-    def __init__(self, num_head: int, head_dim: int) -> None:
-        """Constructor for KVCacheObserver.
-
-        Args:
-            num_head : Number of heads
-            head_dim : Dimension of each head
-        """
-        self.num_head = num_head
-        self.head_dim = head_dim
-        self.max_val = torch.full((num_head, head_dim),
-                                  -torch.inf,
-                                  dtype=torch.float16)
-        self.min_val = torch.full((num_head, head_dim),
-                                  torch.inf,
-                                  dtype=torch.float16)
-        self.absmax_val = torch.full((num_head, head_dim),
-                                     0,
-                                     dtype=torch.float16)
-
-    @torch.no_grad()
-    def observe(self, x: torch.Tensor) -> None:
-        """Function to observe the input tensor and update the max, min, and
-        absolute max values.
-
-        Args:
-            x : Input tensor
-        """
-        assert len(x.shape) == 4
-
-        if x.size(2) == self.num_head and x.size(3) == self.head_dim:
-            # layout: (bs, seqlen, heads, dims)
-            x = x
-        elif x.size(1) == self.num_head and x.size(3) == self.head_dim:
-            # layout: (bs, heads, seqlen, dims)
-            x = x.transpose(1, 2)
-        else:
-            raise RuntimeError
-
-        cur_max = x.flatten(0, 1).max(0)[0].cpu()
-        cur_min = x.flatten(0, 1).min(0)[0].cpu()
-        cur_absmax = x.flatten(0, 1).abs().max(0)[0].cpu()
-
-        self.max_val = torch.maximum(self.max_val, cur_max)
-        self.min_val = torch.minimum(self.min_val, cur_min)
-        self.absmax_val = torch.maximum(self.absmax_val, cur_absmax)
-
-
-class ActivationObserver(GlobalAvailMixin):
-    """A class to observe and record the max, min, mean, absolute max, and
-    absolute mean value of a given tensor.
-
-    Also keeps track of the number of batches observed.
-    """
-
-    def __init__(self, dim: int) -> None:
-        """Constructor for ActivationObserver.
-
-        Args:
-            dim : Dimension of the tensor
-        """
-        self.dim = dim
-        self.max_val = torch.full((dim, ), -torch.inf, dtype=torch.float16)
-        self.min_val = torch.full((dim, ), torch.inf, dtype=torch.float16)
-        self.absmax_val = torch.full((dim, ), 0, dtype=torch.float16)
-        self.absmean_val = torch.full((dim, ), 0, dtype=torch.float16)
-        self.mean_val = torch.full((dim, ), 0, dtype=torch.float16)
-        self.num_batches_tracked = 0
-
-    @torch.no_grad()
-    def observe(self, x: torch.Tensor) -> None:
-        """Function to observe the input tensor and update the max, min, mean,
-        absolute max, absolute mean values and number of batches tracked.
-
-        Args:
-            x : Input tensor
-        """
-        assert len(x.shape) == 3
-        assert x.size(2) == self.dim
-        cur_val = x.flatten(0, 1)
-        cur_max = cur_val.max(0)[0].cpu()
-        cur_min = cur_val.min(0)[0].cpu()
-        cur_mean = cur_val.mean(0).cpu()
-
-        cur_abs = cur_val.abs()
-        cur_absmax = cur_abs.max(0)[0].cpu()
-        cur_absmean = cur_abs.mean(0).cpu()
-
-        self.max_val = torch.maximum(self.max_val, cur_max)
-        self.min_val = torch.minimum(self.min_val, cur_min)
-        self.absmax_val = torch.maximum(self.absmax_val, cur_absmax)
-
-        # Update mean and absmean value with accumulated sum divided
-        # by total number of batches
-        self.mean_val = (
-            (self.mean_val * self.num_batches_tracked + cur_mean) /
-            (self.num_batches_tracked + 1))
-        self.absmean_val = (
-            (self.absmean_val * self.num_batches_tracked + cur_absmean) /
-            (self.num_batches_tracked + 1))
-
-        # Increment the count of batches tracked
-        self.num_batches_tracked += 1
diff --git a/lmdeploy/lite/quantization/awq.py b/lmdeploy/lite/quantization/awq.py
deleted file mode 100644
index c9811563fd3ceb2be7939cf12f6f4d57bd3c4f0d..0000000000000000000000000000000000000000
--- a/lmdeploy/lite/quantization/awq.py
+++ /dev/null
@@ -1,218 +0,0 @@
-# Copyright (c) OpenMMLab. All rights reserved.
-from typing import List
-
-import torch
-
-# Maps that describe the structure of your model.
-NORM_FCS_MAP = {
-    'LlamaDecoderLayer': {
-        'input_layernorm':
-        ['self_attn.k_proj', 'self_attn.q_proj', 'self_attn.v_proj'],
-        'post_attention_layernorm': ['mlp.gate_proj', 'mlp.up_proj']
-    },
-    'InternLMDecoderLayer': {
-        'input_layernorm':
-        ['self_attn.k_proj', 'self_attn.q_proj', 'self_attn.v_proj'],
-        'post_attention_layernorm': ['mlp.gate_proj', 'mlp.up_proj']
-    },
-    'QWenBlock': {
-        'ln_1': ['attn.c_attn'],
-        'ln_2': ['mlp.w1', 'mlp.w2']
-    }
-}
-
-FC_FCS_MAP = {
-    'LlamaDecoderLayer': {
-        'self_attn.v_proj': ['self_attn.o_proj'],
-        'mlp.up_proj': ['mlp.down_proj']
-    },
-    'InternLMDecoderLayer': {
-        'self_attn.v_proj': ['self_attn.o_proj'],
-        'mlp.up_proj': ['mlp.down_proj']
-    },
-    'QWenBlock': {
-        'attn.c_attn': ['attn.c_proj'],
-        'mlp.w1': ['mlp.c_proj']
-    }
-}
-
-
-@torch.no_grad()
-def get_weight_scale(weight, q_group_size=-1):
-    org_shape = weight.shape
-    if q_group_size > 0:
-        weight = weight.view(-1, q_group_size)
-    scale = weight.abs() / weight.abs().amax(dim=1, keepdim=True)
-    scale = scale.view(org_shape)
-    scale = scale.mean(0)
-    return scale
-
-
-@torch.no_grad()
-def smooth_ln_fcs(ln: torch.nn.Module,
-                  fcs: List[torch.nn.Module],
-                  act_scales: torch.Tensor,
-                  group_size: int = -1,
-                  alpha: float = 0.5) -> torch.Tensor:
-    """Smooth weights of a layer normalization and its fully connected layers.
-
-    :param ln: Layer Normalization module
-    :param fcs: List of Fully Connected modules
-    :param act_scales: Activation scales
-    :param alpha: Scaling factor (default is 0.5)
-    :return: Scales
-    """
-    device, dtype = fcs[0].weight.device, fcs[0].weight.dtype
-    act_scales = act_scales.to(device=device, dtype=dtype)
-
-    concat_w = torch.cat([fc.weight for fc in fcs], dim=0)
-    w_scales = get_weight_scale(concat_w, group_size)
-
-    scales = (act_scales.pow(alpha) /
-              w_scales.pow(1 - alpha)).clamp(min=1e-4).to(device).to(dtype)
-    scales = scales / (scales.max() * scales.min()).sqrt()
-
-    ln.weight.div_(scales)
-    if hasattr(ln, 'bias'):
-        ln.bias.div_(scales)
-
-    for fc in fcs:
-        fc.weight.mul_(scales.view(1, -1))
-
-    for p in ln.parameters():
-        assert torch.isnan(p).sum() == 0
-    for fc in fcs:
-        for p in fc.parameters():
-            assert torch.isnan(p).sum() == 0
-    return scales
-
-
-@torch.no_grad()
-def smooth_fc_fcs(pre_fc: torch.nn.Module,
-                  fcs: List[torch.nn.Module],
-                  act_scales: torch.Tensor,
-                  group_size: int = -1,
-                  alpha: float = 0.5) -> torch.Tensor:
-    """Smooth weights of a fully connected layer and its downstream layers.
-
-    :param pre_fc: Previous Fully Connected layer
-    :param fcs: List of Fully Connected modules
-    :param act_scales: Activation scales
-    :param alpha: Scaling factor (default is 0.5)
-    :return: Scales
-    """
-    device, dtype = pre_fc.weight.device, pre_fc.weight.dtype
-
-    size_a = act_scales.size(0)
-    size_pre_fc = pre_fc.weight.size(0)
-
-    # (for llama2) use group query attention, pre_fc is v_proj, fc is o_proj
-    if size_pre_fc < size_a and size_a % size_pre_fc == 0:
-        return
-
-    act_scales = act_scales.to(device=device, dtype=dtype)
-
-    concat_w = torch.cat([fc.weight for fc in fcs], dim=0)
-    w_scales = get_weight_scale(concat_w, group_size)
-
-    scales = (act_scales.pow(alpha) /
-              w_scales.pow(1 - alpha)).clamp(min=1e-4).to(device).to(dtype)
-    scales = scales / (scales.max() * scales.min()).sqrt()
-
-    # (for qwen) pre_fc is packed QKV, only V needs to scale
-    if size_pre_fc > size_a and size_pre_fc % size_a == 0 \
-            and size_pre_fc // size_a == 3:
-
-        pre_fc.weight[-size_a:].div_(scales.view(-1, 1))
-
-        if getattr(pre_fc, 'bias', None) is not None:
-            pre_fc.bias[-size_a:].div_(scales)
-    else:
-        pre_fc.weight.div_(scales.view(-1, 1))
-
-        if getattr(pre_fc, 'bias', None) is not None:
-            pre_fc.bias.div_(scales)
-
-    for fc in fcs:
-        fc.weight.mul_(scales.view(1, -1))
-
-    for p in pre_fc.parameters():
-        assert torch.isnan(p).sum() == 0
-    for fc in fcs:
-        for p in fc.parameters():
-            assert torch.isnan(p).sum() == 0
-
-    return scales
-
-
-def check_awq_supported(layer_type):
-    """Check if the smooth function is supported by inspecting layer type."""
-    norm_fcs_found = False
-    fc_fcs_found = False
-
-    if isinstance(layer_type, str):
-        if layer_type in NORM_FCS_MAP:
-            norm_fcs_found = True
-        if layer_type in FC_FCS_MAP:
-            fc_fcs_found = True
-
-    elif isinstance(layer_type, type):
-        if layer_type.__name__ in NORM_FCS_MAP:
-            norm_fcs_found = True
-        if layer_type.__name__ in FC_FCS_MAP:
-            fc_fcs_found = True
-
-    else:
-        raise NotImplementedError
-
-    if not norm_fcs_found:
-        raise NotImplementedError
-
-    if not fc_fcs_found:
-        raise NotImplementedError
-
-
-def quant_weights(model, fcs, bits, symmetry, group_size=-1, device='cuda'):
-    """Quantize the weights of the target model's linear layers."""
-    from lmdeploy.lite.quantization import WeightQuantizer
-    from lmdeploy.pytorch.modules import WeightOnlyQLinear
-    for name, fc in fcs.items():
-        fc.to(device)
-        quantizer = WeightQuantizer(bits, symmetry, 'per_group', group_size)
-        q_linear = WeightOnlyQLinear.from_linear(fc, quantizer)
-
-        parent_name, _, child_name = name.rpartition('.')
-        parent = model.get_submodule(parent_name)
-        fc.to('cpu')
-        setattr(parent, child_name, q_linear)
-
-        print(f'{name} weight packed.')
-
-
-def smooth_layers(layers,
-                  fc2fcs,
-                  norm2fcs,
-                  a_scales,
-                  group_size=-1,
-                  device='cuda'):
-    """Apply weight smoothing based on input scales."""
-
-    for l_name, layer in layers.items():
-        layer.to(device)
-        for ln_name, fc_names in norm2fcs.items():
-            a_name = [f'{l_name}.{n}' for n in fc_names][0]
-
-            ln = layer.get_submodule(ln_name)
-            fcs = [layer.get_submodule(n) for n in fc_names]
-            smooth_ln_fcs(ln, fcs, a_scales[a_name], group_size)
-
-        for f_name, fc_names in fc2fcs.items():
-            a_name = [f'{l_name}.{n}' for n in fc_names][0]
-
-            fc = layer.get_submodule(f_name)
-            fcs = [layer.get_submodule(n) for n in fc_names]
-
-            smooth_fc_fcs(fc, fcs, a_scales[a_name], group_size)
-
-        layer.to('cpu')
-        print(f'{l_name} smooth weight done.')
diff --git a/lmdeploy/lite/quantization/calibration.py b/lmdeploy/lite/quantization/calibration.py
deleted file mode 100644
index bb19cdb0cdf163b1b179cdd30e5114c036e16e4b..0000000000000000000000000000000000000000
--- a/lmdeploy/lite/quantization/calibration.py
+++ /dev/null
@@ -1,309 +0,0 @@
-# Copyright (c) OpenMMLab. All rights reserved.
-from functools import partial
-from typing import Union
-
-import torch
-from torch import nn
-from transformers import PreTrainedTokenizer
-
-from lmdeploy.lite.quantization.activation import (ActivationObserver,
-                                                   KVCacheObserver)
-from lmdeploy.lite.utils import (bimap_name_mod, collect_target_modules,
-                                 concat_decoder_layer_outputs,
-                                 split_decoder_layer_inputs)
-
-
-class CalibrationContext():
-    """Calibration context manager for model quantization.
-
-    Parameters:
-      - model: The target model to be calibrated and quantized
-      - tokenizer: The tokenizer used in the model training
-      - layer_type: Layer type to be targeted for calibration
-      - norm_type: Normalization type used for calibration
-      - device: Device on which model is to be calibrated ('cpu' or 'cuda')
-    """
-
-    inp_obs_group = 'inputs'
-    out_obs_group = 'outputs'
-    key_obs_group = 'keys'
-    value_obs_group = 'values'
-
-    def __init__(self,
-                 model: nn.Module,
-                 tokenizer: PreTrainedTokenizer,
-                 layer_type: Union[str, type],
-                 norm_type: Union[str, type],
-                 device: str = 'cuda') -> None:
-        """Initiate calibration context.
-
-        Args:
-            model (nn.Module): Model to be calibrated.
-            tokenizer (PreTrainedTokenizer): Tokenizer of the given model.
-            layer_type (Union[str, type]): Type of the layers to be observed.
-            norm_type (Union[str, type]): Norm type used in the model.
-            device (str, optional): Device where the model should run.
-                Defaults to 'cuda'.
-        """
-
-        self.layer_type = layer_type
-        self.norm_type = norm_type
-
-        num_kv_heads, num_attn_heads = self._guess_num_heads(model)
-        self.num_kv_heads = num_kv_heads
-        self.head_dim = model.config.hidden_size // num_attn_heads
-        self.model = model
-        del self.model.lm_head
-
-        self.tokenizer = tokenizer
-
-        # Collect modules to observe
-        self.name2layer = collect_target_modules(self.model, layer_type)
-        self.name2fc = {}
-        for l_name, layer in self.name2layer.items():
-            name2fc = collect_target_modules(layer, nn.Linear, prefix=l_name)
-            self.name2fc.update(name2fc)
-        self.name2norm = collect_target_modules(self.model, norm_type)
-
-        maps = bimap_name_mod([self.name2layer, self.name2fc, self.name2norm])
-        self.name2mod, self.mod2name = maps
-
-        # Initialize observers
-        self._init_input_observers(self.name2fc)
-        self._init_output_observers(self.name2norm)
-        self._init_output_observers(self.name2fc)
-        self._init_kv_observers(self.name2layer)
-
-        self.device = device
-
-    def _guess_num_heads(self, model):
-
-        if hasattr(model.config, 'num_key_value_heads'):
-            num_kv_heads = model.config.num_key_value_heads
-        else:
-            num_kv_heads = model.config.num_attention_heads
-
-        num_attn_heads = model.config.num_attention_heads
-
-        return num_kv_heads, num_attn_heads
-
-    def _init_input_observers(self, name2mod):
-        """Initialize input observers for given modules."""
-        for name, mod in name2mod.items():
-            obs = ActivationObserver(mod.weight.size(-1))
-            obs.global_available(name, group=self.inp_obs_group)
-
-    def _init_output_observers(self, name2mod):
-        """Initialize output observers for given modules."""
-        for name, mod in name2mod.items():
-            obs = ActivationObserver(mod.weight.size(0))
-            obs.global_available(name, group=self.out_obs_group)
-
-    def _init_kv_observers(self, name2mod):
-        """Initialize KV observers for given modules."""
-        for name in name2mod.keys():
-            k_obs = KVCacheObserver(self.num_kv_heads, self.head_dim)
-            v_obs = KVCacheObserver(self.num_kv_heads, self.head_dim)
-            k_obs.global_available(name, group=self.key_obs_group)
-            v_obs.global_available(name, group=self.value_obs_group)
-
-    def _insert_input_observers(self):
-        """Insert input observers into the target modules.
-
-        This function registers a forward pre-hook on each target module to
-        observe the inputs.
-        """
-
-        def _input_hook(mod: nn.Module, inp: torch.Tensor):
-            m_name = self.mod2name[mod]
-            obs = ActivationObserver.find(m_name, group=self.inp_obs_group)
-            obs.observe(inp[0])
-
-        group = ActivationObserver.find_group(self.inp_obs_group)
-        for name in group.keys():
-            mod = self.name2mod[name]
-            hook_fn = mod.register_forward_pre_hook(_input_hook)
-            self._hooks.append(hook_fn)
-
-    def _insert_output_observers(self):
-        """Insert output observers into the target modules.
-
-        This function registers a forward hook on each target module to observe
-        the outputs.
-        """
-
-        def _output_hook(mod: nn.Module, inp: torch.Tensor, out: torch.Tensor):
-            m_name = self.mod2name[mod]
-            obs = ActivationObserver.find(m_name, group=self.out_obs_group)
-            obs.observe(out)
-
-        group = ActivationObserver.find_group(self.out_obs_group)
-        for name in group.keys():
-            mod = self.name2mod[name]
-            hook_fn = mod.register_forward_hook(_output_hook)
-            self._hooks.append(hook_fn)
-
-    def _wrap_decoder_layers(self):
-        """Method to wrap the decoder layers' forward functions for observing
-        their key/value cache during batched forward passes."""
-
-        def _forward(mod, *args, **kwargs):
-
-            mod.to(self.device)
-            batch_args, batch_kwargs = split_decoder_layer_inputs(
-                *args, **kwargs)
-            batch_outputs = []
-            samples = len(batch_args)
-
-            m_name = self.mod2name[mod]
-            k_obs = KVCacheObserver.find(m_name, group=self.key_obs_group)
-            v_obs = KVCacheObserver.find(m_name, group=self.value_obs_group)
-
-            for i in range(len(batch_args)):
-
-                if k_obs and v_obs:
-                    batch_kwargs[i]['use_cache'] = True
-                    out = self._ori_forwards[mod](*batch_args[i],
-                                                  **batch_kwargs[i])
-                    out = list(out)
-                    key, value = out.pop(-1)
-                    k_obs.observe(key)
-                    v_obs.observe(value)
-
-                    del key, value
-                    torch.cuda.empty_cache()
-                    batch_outputs.append(tuple(out))
-                else:
-                    batch_outputs.append(self._ori_forwards[mod](
-                        *batch_args[i], **batch_kwargs[i]))
-
-            outputs = concat_decoder_layer_outputs(batch_outputs)
-
-            del batch_outputs, batch_args, batch_kwargs, args
-            mod.to('cpu')
-            torch.cuda.empty_cache()
-            max_memory = torch.cuda.max_memory_allocated() / 1024 / 1024 / 1024
-            print(f'{m_name}, samples: {samples}, '
-                  f'max gpu memory: {max_memory:.2f} GB')
-            return outputs
-
-        for layer in self.name2layer.values():
-            self._ori_forwards[layer] = layer.forward
-            layer.forward = partial(_forward, layer)
-
-    def collect_inputs_stats(self):
-        """Collect statistics (min, max, absmax values) of the observed inputs.
-
-        Returns a dictionary with these collected stats.
-        """
-        inputs_stats = {
-            'max': {},
-            'min': {},
-            'mean': {},
-            'absmax': {},
-            'absmean': {}
-        }
-        obs_group = ActivationObserver.find_group(self.inp_obs_group)
-        for name, obs in obs_group.items():
-            inputs_stats['max'][name] = obs.max_val
-            inputs_stats['min'][name] = obs.min_val
-            inputs_stats['mean'][name] = obs.mean_val
-            inputs_stats['absmax'][name] = obs.absmax_val
-            inputs_stats['absmean'][name] = obs.absmean_val
-        return inputs_stats
-
-    def collect_outputs_stats(self):
-        """Collect statistics (min, max, absmax values) of the observed
-        outputs.
-
-        Returns a dictionary with these collected stats.
-        """
-        outputs_stats = {
-            'max': {},
-            'min': {},
-            'mean': {},
-            'absmax': {},
-            'absmean': {}
-        }
-        obs_group = ActivationObserver.find_group(self.out_obs_group)
-        for name, obs in obs_group.items():
-            outputs_stats['max'][name] = obs.max_val
-            outputs_stats['min'][name] = obs.min_val
-            outputs_stats['mean'][name] = obs.mean_val
-            outputs_stats['absmax'][name] = obs.absmax_val
-            outputs_stats['absmean'][name] = obs.absmean_val
-        return outputs_stats
-
-    def collect_kv_stats(self):
-        """Collect statistics (min, max, absmax values) of the observed keys
-        and values.
-
-        Returns a tuple of two dictionaries with these collected stats.
-        """
-        key_stats = {'max': {}, 'min': {}, 'absmax': {}}
-        obs_group = KVCacheObserver.find_group(self.key_obs_group)
-        for name, obs in obs_group.items():
-            key_stats['max'][name] = obs.max_val
-            key_stats['min'][name] = obs.min_val
-            key_stats['absmax'][name] = obs.absmax_val
-
-        value_stats = {'max': {}, 'min': {}, 'absmax': {}}
-        obs_group = KVCacheObserver.find_group(self.value_obs_group)
-        for name, obs in obs_group.items():
-            value_stats['max'][name] = obs.max_val
-            value_stats['min'][name] = obs.min_val
-            value_stats['absmax'][name] = obs.absmax_val
-        return key_stats, value_stats
-
-    def export(self, out_dir):
-        """Export the calibration statistics (inputs, outputs, keys and values)
-        to specified directory.
-
-        Args:
-            out_dir (Union[str, Path]): The directory path where the stats
-                will be saved.
-        """
-
-        inp_stats = self.collect_inputs_stats()
-        torch.save(inp_stats, out_dir / 'inputs_stats.pth')
-
-        out_stats = self.collect_outputs_stats()
-        torch.save(out_stats, out_dir / 'outputs_stats.pth')
-
-        key_stats, value_stats = self.collect_kv_stats()
-        torch.save(key_stats, out_dir / 'key_stats.pth')
-        torch.save(value_stats, out_dir / 'value_stats.pth')
-
-    def calibrate(self, data):
-        """Forward pass through the model in inference mode with given data."""
-
-        if type(self.model).__name__ == 'QWenLMHeadModel':
-            model = self.model.transformer
-        else:
-            model = self.model.model
-        with torch.inference_mode():
-            _ = model(data.to(self.device))
-
-    def __enter__(self):
-        """Prepares the Calibration object for a 'with' statement by
-        registering hooks and wrapping layer forward methods."""
-
-        self._hooks = list()
-
-        self._ori_forwards = {}
-        for layer in self.name2layer.values():
-            self._ori_forwards[layer] = layer.forward
-
-        self._insert_input_observers()
-        self._insert_output_observers()
-        self._wrap_decoder_layers()
-
-    def __exit__(self, exc_type, exc_value, traceback):
-        """Clean up after a 'with' statement by removing registered hooks,
-        restoring original forward methods, and if no exception occurred,
-        collecting all gathered statistics and saving them."""
-        for h in self._hooks:
-            h.remove()
-
-        for layer in self.name2layer.values():
-            layer.forward = self._ori_forwards[layer]
diff --git a/lmdeploy/lite/quantization/weight/__init__.py b/lmdeploy/lite/quantization/weight/__init__.py
deleted file mode 100644
index 949930187b63736bf42bacc65139623f7caba689..0000000000000000000000000000000000000000
--- a/lmdeploy/lite/quantization/weight/__init__.py
+++ /dev/null
@@ -1,4 +0,0 @@
-# Copyright (c) OpenMMLab. All rights reserved.
-from .quantizer import WeightQuantizer
-
-__all__ = ['WeightQuantizer']
diff --git a/lmdeploy/lite/quantization/weight/quantizer.py b/lmdeploy/lite/quantization/weight/quantizer.py
deleted file mode 100644
index 56cfda8f0107c8ea2a90d05527b57248caf767a4..0000000000000000000000000000000000000000
--- a/lmdeploy/lite/quantization/weight/quantizer.py
+++ /dev/null
@@ -1,156 +0,0 @@
-# Copyright (c) OpenMMLab. All rights reserved.
-from typing import Callable, Dict, Optional
-
-import torch
-
-from lmdeploy.lite.utils import (QParams, cal_qparams_per_channel_absmax,
-                                 cal_qparams_per_channel_minmax,
-                                 cal_qparams_per_group_absmax,
-                                 cal_qparams_per_group_minmax,
-                                 cal_qparams_per_tensor_absmax,
-                                 cal_qparams_per_tensor_minmax)
-from lmdeploy.lite.utils.global_avail import GlobalAvailMixin
-
-
-class WeightQuantizer(GlobalAvailMixin):
-    """A class for performing weight quantization of neural networks.
-
-    The WeightQuantizer class provides various methods to quantize the weights
-    of a neural network. This helps in reducing the memory requirements and
-    computational complexity of the model, potentially offering faster
-    inference and lower power consumption.
-
-    Attributes:
-        bits (int): The bit width for quantization.
-        symmetry (bool): If True, use absmax scaling; if False,
-            use min-max scaling.
-        granularity (str): The granularity of quantization. Available options
-            are 'per_channel', 'per_tensor', and 'per_group'.
-        group_size (Optional[int]): If using 'per_group' quantization, this is
-            the number of channels in each group.
-
-    Example:
-
-        # Instantiate the weight quantizer with specific quantization settings
-        quantizer = WeightQuantizer(bits=8,
-                                     symmetry=True,
-                                     granularity='per_tensor')
-
-        # Calculate the quantization parameters for given weights
-        qparams = quantizer.calculate_qparams(weights)
-
-        # Perform fake quantization on the weights
-        quantized_weights = quantizer.fake_quant(weights, qparams)
-    """
-
-    CAL_FUNC_MAP: Dict[str, Dict[str, Callable]] = {
-        'per_group': {
-            'absmax': cal_qparams_per_group_absmax,
-            'minmax': cal_qparams_per_group_minmax,
-        },
-        'per_channel': {
-            'absmax': cal_qparams_per_channel_absmax,
-            'minmax': cal_qparams_per_channel_minmax,
-        },
-        'per_tensor': {
-            'absmax': cal_qparams_per_tensor_absmax,
-            'minmax': cal_qparams_per_tensor_minmax,
-        },
-    }
-
-    def __init__(self,
-                 bits: int,
-                 symmetry: bool,
-                 granularity: str,
-                 group_size: Optional[int] = -1):
-
-        assert bits in [4, 8], "The 'bits' argument must be either 4 or 8."
-        self.bits = bits
-
-        if granularity not in ['per_channel', 'per_tensor', 'per_group']:
-            raise NotImplementedError(
-                "The 'granularity' argument must be one of 'per_channel', "
-                "'per_tensor', or 'per_group'.")
-
-        self.granularity = granularity
-
-        if self.granularity == 'per_group':
-            assert group_size > 0, \
-                "The 'group_size' argument must be greater than 0."
-
-        self.group_size = group_size
-
-        # If symmetry is True, use absmax to compute scales
-        # If symmetry is False, use minmax to compute scales and zeor-points
-        self.symmetry = symmetry
-        self.observer = 'absmax' if symmetry else 'minmax'
-
-    def calculate_qparams(self, weight: torch.Tensor) -> QParams:
-        """Calculate the quantization parameters for the given weight tensor.
-
-        Args:
-            weight (torch.Tensor): The weight tensor with shape
-                (out_features, in_features).
-
-        Returns:
-            QParams: A namedtuple containing 'scales' and 'zero_points'.
-        """
-
-        cal_func = self.CAL_FUNC_MAP[self.granularity][self.observer]
-        if self.granularity == 'per_group':
-            return cal_func(weight, self.bits, self.group_size)
-        else:
-            return cal_func(weight, self.bits)
-
-    def quant(self,
-              weight: torch.Tensor,
-              qparams: Optional[QParams] = None,
-              real: bool = False) -> torch.Tensor:
-        """Perform fake quantization on the given weight tensor.
-
-        Args:
-            weight (torch.Tensor): The weight tensor with shape
-                (out_features, in_features).
-            qparams (Optional[QParams]): A namedtuple containing 'scales'
-                and 'zero_points'.
-            real (bool): If True, return the tensor with quantized type.
-
-        Returns:
-            torch.Tensor: The fake quantized weight tensor.
-        """
-
-        if qparams is None:
-            qparams = self.calculate_qparams(weight)
-
-        scales = qparams.scales
-        zero_points = qparams.zero_points
-
-        out_c, in_c = weight.shape
-
-        # Reshape the weights if using per_group quantization
-        # per tensor scales shape: [1]
-        # per channel scales shape: [out_c, 1]
-        # per group scales shape: [out_c, in_c//group_size, 1]
-        if len(scales.shape) > 2:
-            # scales shape: [out_c, in_c//group_size, 1]
-            weight = weight.reshape(out_c, scales.shape[1], -1)
-
-        if zero_points is None:
-            assert self.symmetry
-            real_qweight = (weight / scales).round()
-            fake_qweight = real_qweight * scales
-
-        else:
-            assert not self.symmetry
-
-            real_qweight = (weight / scales).round() + zero_points
-            fake_qweight = (real_qweight - zero_points) * scales
-
-        if len(scales.shape) > 2:
-            real_qweight = real_qweight.reshape(out_c, in_c)
-            fake_qweight = fake_qweight.reshape(out_c, in_c)
-
-        if real:
-            return real_qweight.to(torch.int32)
-        else:
-            return fake_qweight
diff --git a/lmdeploy/lite/utils/__init__.py b/lmdeploy/lite/utils/__init__.py
deleted file mode 100644
index c2b56287bda92cdc15432f044a84af7e7fde40b4..0000000000000000000000000000000000000000
--- a/lmdeploy/lite/utils/__init__.py
+++ /dev/null
@@ -1,22 +0,0 @@
-# Copyright (c) OpenMMLab. All rights reserved.
-from .batch_split import (concat_decoder_layer_outputs,
-                          split_decoder_layer_inputs)
-from .cal_qparams import (QParams, cal_qparams_per_channel_absmax,
-                          cal_qparams_per_channel_minmax,
-                          cal_qparams_per_group_absmax,
-                          cal_qparams_per_group_minmax,
-                          cal_qparams_per_tensor_absmax,
-                          cal_qparams_per_tensor_minmax)
-from .calib_dataloader import get_calib_loaders
-from .collect import (bimap_name_mod, collect_target_modules,
-                      collect_target_weights)
-from .global_avail import GlobalAvailMixin
-
-__all__ = [
-    'cal_qparams_per_channel_absmax', 'cal_qparams_per_channel_minmax',
-    'cal_qparams_per_group_absmax', 'cal_qparams_per_group_minmax',
-    'cal_qparams_per_tensor_absmax', 'cal_qparams_per_tensor_minmax',
-    'QParams', 'get_calib_loaders', 'collect_target_modules',
-    'collect_target_weights', 'GlobalAvailMixin', 'split_decoder_layer_inputs',
-    'bimap_name_mod', 'concat_decoder_layer_outputs'
-]
diff --git a/lmdeploy/lite/utils/batch_split.py b/lmdeploy/lite/utils/batch_split.py
deleted file mode 100644
index d271089fec9874999c0c635e74363a18c52e683a..0000000000000000000000000000000000000000
--- a/lmdeploy/lite/utils/batch_split.py
+++ /dev/null
@@ -1,103 +0,0 @@
-# Copyright (c) OpenMMLab. All rights reserved.
-from typing import Any, Dict, List, Tuple, Union
-
-import torch
-
-
-def split_decoder_layer_inputs(
-    *args: Union[torch.Tensor, Any], **kwargs: Union[torch.Tensor, Any]
-) -> Tuple[List[List[Any]], List[Dict[str, Any]]]:
-    """This function splits batched decoder layer inputs into individual
-    elements.
-
-    Args:
-        *args (Union[torch.Tensor, Any]): Positional arguments which could
-            be a mix of tensors and other types.
-        **kwargs (Union[torch.Tensor, Any]): Keyword arguments which could
-            be a mix of tensors and other types.
-
-    Returns:
-        Tuple[List[List[Any]], List[Dict[str, Any]]]: A tuple containing two
-            lists, one for positional arguments, one for keyword arguments.
-            Each list contains individual elements from the batch.
-    """
-
-    if not isinstance(args[0], torch.Tensor):
-        raise ValueError('The first argument must be a Tensor')
-
-    bs = args[0].size(0)
-
-    batch_args = []
-    batch_kwargs = []
-    for i in range(bs):
-        new_args = []
-        # Iterate over each argument. If it's a torch.Tensor and its first
-        # dimension equals the batch size, then get the value corresponding
-        # to the current index, else directly add the whole value.
-        for val in args:
-            if isinstance(val, torch.Tensor) and val.size(0) == bs:
-                new_args.append(val[i:i + 1])
-            else:
-                new_args.append(val)
-
-        new_kwargs = {}
-        # Execute the same operation for the keyword arguments.
-        for name, val in kwargs.items():
-            if isinstance(val, torch.Tensor) and val.size(0) == bs:
-                new_kwargs[name] = val[i:i + 1]
-            else:
-                new_kwargs[name] = val
-
-        batch_args.append(new_args)
-        batch_kwargs.append(new_kwargs)
-
-    return batch_args, batch_kwargs
-
-
-def concat_decoder_layer_outputs(
-        batch_outputs: List[Tuple[Any]]) -> Tuple[Any]:
-    """This function concatenates individual decoder layer outputs into a
-    batched output.
-
-    Args:
-        batch_outputs (List[Tuple[Any]]): A list of tuples, where each tuple
-            represents the output from an individual element in the batch.
-
-    Returns:
-        Tuple[Any]: A tuple representing the batched output.
-    """
-
-    num_returns = len(batch_outputs[0])
-
-    def is_past_key_value(data: Any) -> bool:
-        """Check whether data is a past key-value pair.
-
-        Args:
-            data (Any): The data to check.
-
-        Returns:
-            bool: True if data is a past key-value pair, False otherwise.
-        """
-        flag = isinstance(data, tuple)
-        flag = flag and len(data) == 2
-        flag = flag and isinstance(data[0], torch.Tensor)
-        flag = flag and isinstance(data[1], torch.Tensor)
-        return flag
-
-    new_outputs = []
-
-    # Iterate over all types of return values.
-    for i in range(num_returns):
-        # Check if the current element is a past key-value pair.
-        flag = is_past_key_value(batch_outputs[0][i])
-        if flag:
-            # Concatenate the keys and values separately.
-            key = torch.cat([out[i][0] for out in batch_outputs])
-            value = torch.cat([out[i][1] for out in batch_outputs])
-            out_i = (key, value)
-        else:
-            # If it's not a past key-value pair, concatenate directly.
-            out_i = torch.cat([out[i] for out in batch_outputs])
-        new_outputs.append(out_i)
-
-    return tuple(new_outputs)
diff --git a/lmdeploy/lite/utils/cal_qparams.py b/lmdeploy/lite/utils/cal_qparams.py
deleted file mode 100644
index a682704a55c3085f060a4fc8d81c27adc7acdb72..0000000000000000000000000000000000000000
--- a/lmdeploy/lite/utils/cal_qparams.py
+++ /dev/null
@@ -1,135 +0,0 @@
-# Copyright (c) OpenMMLab. All rights reserved.
-from typing import NamedTuple, Optional
-
-import torch
-
-
-class QParams(NamedTuple):
-    """A class to hold the quantization parameters."""
-
-    scales: torch.Tensor
-    zero_points: Optional[torch.Tensor]
-
-
-@torch.no_grad()
-def cal_qparams_per_channel_absmax(w: torch.Tensor,
-                                   n_bits: int,
-                                   return_stats: bool = False) -> QParams:
-    """Calculate quantization parameters for each channel using absolute max
-    value."""
-
-    absmax = w.abs().max(dim=-1, keepdim=True)[0]
-    q_max = 2**(n_bits - 1) - 1
-    scales = absmax.clamp(min=1e-5).div(q_max)
-
-    if return_stats:
-        return QParams(scales=scales, zero_points=None), absmax
-    else:
-        return QParams(scales=scales, zero_points=None)
-
-
-@torch.no_grad()
-def cal_qparams_per_channel_minmax(w: torch.Tensor,
-                                   n_bits: int,
-                                   return_stats: bool = False) -> QParams:
-    """Calculate quantization parameters for each channel using min and max
-    values."""
-
-    w_min = w.min(dim=-1, keepdim=True)[0]
-    w_max = w.max(dim=-1, keepdim=True)[0]
-
-    q_max = 2**n_bits - 1
-    scales = (w_max - w_min)
-    scales = scales.clamp_(min=1e-5).div_(q_max)
-
-    zero_points = (-w_min / scales).round()
-
-    if return_stats:
-        return QParams(scales=scales, zero_points=zero_points), (w_min, w_max)
-    else:
-        return QParams(scales=scales, zero_points=zero_points)
-
-
-@torch.no_grad()
-def cal_qparams_per_group_absmax(w: torch.Tensor,
-                                 n_bits: int,
-                                 group_size: int,
-                                 return_stats: bool = False) -> QParams:
-    """Calculate quantization parameters for each group using absolute max
-    value."""
-
-    outc, inc = w.shape
-    assert inc >= group_size, \
-        'Input channels should be greater than or equal to group_size.'
-    assert inc % group_size == 0, \
-        'Input channels should be divisible by group_size.'
-    absmax = w.abs().reshape(outc, -1, group_size).max(dim=-1, keepdim=True)[0]
-    q_max = 2**(n_bits - 1) - 1
-    scales = absmax.clamp(min=1e-5).div(q_max)
-    if return_stats:
-        return QParams(scales=scales, zero_points=None), absmax
-    else:
-        return QParams(scales=scales, zero_points=None)
-
-
-@torch.no_grad()
-def cal_qparams_per_group_minmax(w: torch.Tensor,
-                                 n_bits: int,
-                                 group_size: int,
-                                 return_stats: bool = False) -> QParams:
-    """Calculate quantization parameters for each group using min and max
-    values."""
-
-    outc, inc = w.shape
-    assert inc >= group_size, \
-        'Input channels should be greater than or equal to group_size.'
-    assert inc % group_size == 0, \
-        'Input channels should be divisible by group_size.'
-    w_group_wise = w.reshape(outc, -1, group_size)
-    w_min = w_group_wise.min(dim=-1, keepdim=True)[0]
-    w_max = w_group_wise.max(dim=-1, keepdim=True)[0]
-
-    q_max = 2**n_bits - 1
-    scales = (w_max - w_min)
-    scales = scales.clamp_(min=1e-5).div_(q_max)
-    zero_points = (-w_min / scales).round()
-    if return_stats:
-        return QParams(scales=scales, zero_points=zero_points), (w_min, w_max)
-    else:
-        return QParams(scales=scales, zero_points=zero_points)
-
-
-@torch.no_grad()
-def cal_qparams_per_tensor_minmax(w: torch.Tensor,
-                                  n_bits: int,
-                                  return_stats: bool = False) -> QParams:
-    """Calculate quantization parameters for the entire tensor using min and
-    max values."""
-
-    w_min = w.min()
-    w_max = w.max()
-
-    q_max = 2**n_bits - 1
-    scales = (w_max - w_min)
-    scales = scales.clamp_(min=1e-5).div_(q_max)
-    zero_points = (-w_min / scales).round()
-    if return_stats:
-        return QParams(scales=scales, zero_points=zero_points), (w_min, w_max)
-    else:
-        return QParams(scales=scales, zero_points=zero_points)
-
-
-@torch.no_grad()
-def cal_qparams_per_tensor_absmax(w: torch.Tensor,
-                                  n_bits: int,
-                                  return_stats: bool = False) -> QParams:
-    """Calculate quantization parameters for the entire tensor using absolute
-    max value."""
-    absmax = w.abs().max()
-    q_max = 2**(n_bits - 1) - 1
-    scales = absmax.clamp(min=1e-5).div(q_max)
-
-    if return_stats:
-        return QParams(scales=scales, zero_points=None), absmax
-    else:
-        return QParams(scales=scales, zero_points=None)
diff --git a/lmdeploy/lite/utils/calib_dataloader.py b/lmdeploy/lite/utils/calib_dataloader.py
deleted file mode 100644
index 77ac6e2d2c52033b8bd3f07dce24c69a258dd061..0000000000000000000000000000000000000000
--- a/lmdeploy/lite/utils/calib_dataloader.py
+++ /dev/null
@@ -1,311 +0,0 @@
-# Copyright (c) OpenMMLab. All rights reserved.
-import numpy as np
-import torch
-
-
-def set_seed(seed):
-    np.random.seed(seed)
-    torch.random.manual_seed(seed)
-
-
-def get_wikitext2(tokenizer, nsamples, seed, seqlen):
-    """Load Wikitext-2 train and test datasets and tokenize.
-
-    Args:
-        tokenizer: Tokenizer to encode text.
-        nsamples: Number of samples to take from train set.
-        seed: Random seed for sampling.
-        seqlen: Maximum sequence length.
-
-    Returns:
-        train_loader: List of sampled and tokenized training examples.
-        test_enc: Full tokenized Wikitext-2 test set.
-    """
-    from datasets import load_dataset
-    traindata = load_dataset('wikitext', 'wikitext-2-raw-v1', split='train')
-    testdata = load_dataset('wikitext', 'wikitext-2-raw-v1', split='test')
-
-    trainenc = tokenizer('\n\n'.join(traindata['text']), return_tensors='pt')
-    testenc = tokenizer('\n\n'.join(testdata['text']), return_tensors='pt')
-
-    import random
-    random.seed(seed)
-    trainloader = []
-    for _ in range(nsamples):
-        i = random.randint(0, trainenc.input_ids.shape[1] - seqlen)
-        j = i + seqlen
-        inp = trainenc.input_ids[:, i:j]
-        tar = inp.clone()
-        tar[:, :-1] = -100
-        trainloader.append((inp, tar))
-    return trainloader, testenc
-
-
-def get_ptb(tokenizer, nsamples, seed, seqlen):
-    """Load PTB train and validation datasets and tokenize.
-
-    Args:
-        tokenizer: Tokenizer to encode text.
-        nsamples: Number of samples to take from train set.
-        seed: Random seed for sampling.
-        seqlen: Maximum sequence length.
-
-    Returns:
-        train_loader: List of sampled and tokenized training examples.
-        test_enc: Full tokenized PTB validation set.
-    """
-    from datasets import load_dataset
-    traindata = load_dataset('ptb_text_only', 'penn_treebank', split='train')
-    valdata = load_dataset('ptb_text_only',
-                           'penn_treebank',
-                           split='validation')
-
-    trainenc = tokenizer('\n\n'.join(traindata['sentence']),
-                         return_tensors='pt')
-    testenc = tokenizer('\n\n'.join(valdata['sentence']), return_tensors='pt')
-
-    import random
-    random.seed(seed)
-    trainloader = []
-    for _ in range(nsamples):
-        i = random.randint(0, trainenc.input_ids.shape[1] - seqlen)
-        j = i + seqlen
-        inp = trainenc.input_ids[:, i:j]
-        tar = inp.clone()
-        tar[:, :-1] = -100
-        trainloader.append((inp, tar))
-    return trainloader, testenc
-
-
-def get_c4(tokenizer, nsamples, seed, seqlen):
-    """Load C4 train and validation datasets and tokenize.
-
-    Args:
-        tokenizer: Tokenizer to encode text.
-        nsamples: Number of samples to take from train set.
-        seed: Random seed for sampling.
-        seqlen: Maximum sequence length.
-
-    Returns:
-        train_loader: List of sampled and tokenized training examples.
-        test_enc: Full tokenized PTB validation set.
-    """
-    from datasets import load_dataset
-    traindata = load_dataset(
-        'allenai/c4',
-        'allenai--c4',
-        data_files={'train': 'en/c4-train.00000-of-01024.json.gz'},
-        split='train',
-        use_auth_token=False)
-    valdata = load_dataset(
-        'allenai/c4',
-        'allenai--c4',
-        data_files={'validation': 'en/c4-validation.00000-of-00008.json.gz'},
-        split='validation',
-        use_auth_token=False)
-
-    import random
-    random.seed(seed)
-    trainloader = []
-    for _ in range(nsamples):
-        while True:
-            i = random.randint(0, len(traindata) - 1)
-            trainenc = tokenizer(traindata[i]['text'], return_tensors='pt')
-            if trainenc.input_ids.shape[1] >= seqlen:
-                break
-        i = random.randint(0, trainenc.input_ids.shape[1] - seqlen)
-        j = i + seqlen
-        inp = trainenc.input_ids[:, i:j]
-        tar = inp.clone()
-        tar[:, :-1] = -100
-        trainloader.append((inp, tar))
-
-    import random
-    random.seed(0)
-    valenc = []
-    for _ in range(256):
-        while True:
-            i = random.randint(0, len(valdata) - 1)
-            tmp = tokenizer(valdata[i]['text'], return_tensors='pt')
-            if tmp.input_ids.shape[1] >= seqlen:
-                break
-        i = random.randint(0, tmp.input_ids.shape[1] - seqlen)
-        j = i + seqlen
-        valenc.append(tmp.input_ids[:, i:j])
-    valenc = torch.hstack(valenc)
-
-    class TokenizerWrapper:
-
-        def __init__(self, input_ids):
-            self.input_ids = input_ids
-
-    valenc = TokenizerWrapper(valenc)
-
-    return trainloader, valenc
-
-
-def get_ptb_new(tokenizer, nsamples, seed, seqlen):
-    """Load PTB New train and validation datasets and tokenize.
-
-    Args:
-        tokenizer: Tokenizer to encode text.
-        nsamples: Number of samples to take from train set.
-        seed: Random seed for sampling.
-        seqlen: Maximum sequence length.
-
-    Returns:
-        train_loader: List of sampled and tokenized training examples.
-        test_enc: Full tokenized PTB validation set.
-    """
-    from datasets import load_dataset
-    traindata = load_dataset('ptb_text_only', 'penn_treebank', split='train')
-    testdata = load_dataset('ptb_text_only', 'penn_treebank', split='test')
-
-    trainenc = tokenizer(' '.join(traindata['sentence']), return_tensors='pt')
-    testenc = tokenizer(' '.join(testdata['sentence']), return_tensors='pt')
-
-    import random
-    random.seed(seed)
-    trainloader = []
-    for _ in range(nsamples):
-        i = random.randint(0, trainenc.input_ids.shape[1] - seqlen)
-        j = i + seqlen
-        inp = trainenc.input_ids[:, i:j]
-        tar = inp.clone()
-        tar[:, :-1] = -100
-        trainloader.append((inp, tar))
-    return trainloader, testenc
-
-
-def get_c4_new(tokenizer, nsamples, seed, seqlen):
-    """Load C4 New train and validation datasets and tokenize.
-
-    Args:
-        tokenizer: Tokenizer to encode text.
-        nsamples: Number of samples to take from train set.
-        seed: Random seed for sampling.
-        seqlen: Maximum sequence length.
-
-    Returns:
-        train_loader: List of sampled and tokenized training examples.
-        test_enc: Full tokenized PTB validation set.
-    """
-    from datasets import load_dataset
-    traindata = load_dataset(
-        'allenai/c4',
-        'allenai--c4',
-        data_files={'train': 'en/c4-train.00000-of-01024.json.gz'},
-        split='train')
-    valdata = load_dataset(
-        'allenai/c4',
-        'allenai--c4',
-        data_files={'validation': 'en/c4-validation.00000-of-00008.json.gz'},
-        split='validation')
-
-    import random
-    random.seed(seed)
-    trainloader = []
-    for _ in range(nsamples):
-        while True:
-            i = random.randint(0, len(traindata) - 1)
-            trainenc = tokenizer(traindata[i]['text'], return_tensors='pt')
-            if trainenc.input_ids.shape[1] >= seqlen:
-                break
-        i = random.randint(0, trainenc.input_ids.shape[1] - seqlen)
-        j = i + seqlen
-        inp = trainenc.input_ids[:, i:j]
-        tar = inp.clone()
-        tar[:, :-1] = -100
-        trainloader.append((inp, tar))
-
-    valenc = tokenizer(' '.join(valdata[:1100]['text']), return_tensors='pt')
-    valenc = valenc.input_ids[:, :(256 * seqlen)]
-
-    class TokenizerWrapper:
-
-        def __init__(self, input_ids):
-            self.input_ids = input_ids
-
-    valenc = TokenizerWrapper(valenc)
-
-    return trainloader, valenc
-
-
-def get_pileval(tokenizer, nsamples, seed, seqlen=512):
-    """Load pileval train dataset and tokenize.
-
-    Args:
-        tokenizer: Tokenizer to encode text.
-        nsamples: Number of samples to take from train set.
-        seed: Random seed for sampling.
-        seqlen: Maximum sequence length.
-
-    Returns:
-        train_loader: List of sampled and tokenized training examples.
-        test_enc: Full tokenized PTB validation set.
-    """
-    from datasets import load_dataset
-    from datasets.builder import DatasetGenerationError
-    try:
-        dataset = load_dataset(
-            'json',
-            data_files='https://the-eye.eu/public/AI/pile/val.jsonl.zst',
-            split='train')
-    except DatasetGenerationError:
-        raise InterruptedError('There have been some issues when generating '
-                               'the dataset, you could try to download it '
-                               'locally first, and replace the `data_files`'
-                               'with local addresses or use other datasets '
-                               '(c4, wiki, ptb).')
-    dataset = dataset.shuffle(seed=seed)
-    samples = []
-    n_run = 0
-    for data in dataset:
-        line = data['text']
-        line = line.strip()
-        line_encoded = tokenizer.encode(line)
-        if len(line_encoded) > 512:
-            continue
-        sample = torch.tensor([line_encoded])
-        if sample.numel() == 0:
-            continue
-        samples.append(sample)
-        n_run += 1
-        if n_run == nsamples:
-            break
-    # now concatenate all samples and split according to block size
-    cat_samples = torch.cat(samples, dim=1)
-    n_split = cat_samples.shape[1] // seqlen
-    print(f' * Split into {n_split} blocks')
-    return [
-        cat_samples[:, i * seqlen:(i + 1) * seqlen] for i in range(n_split)
-    ], None
-
-
-def get_calib_loaders(name, tokenizer, nsamples=128, seed=0, seqlen=2048):
-    """Get calibration data loaders for a dataset.
-
-    Args:
-      name: Dataset name ('wikitext2', 'ptb', 'c4', etc).
-      tokenizer: Tokenizer to encode text.
-      nsamples: Number of samples to take from train set.
-      seed: Random seed for sampling.
-      seqlen: Maximum sequence length.
-
-    Returns:
-      train_loader: List of sampled and tokenized training examples.
-      test_data: Full tokenized validation set.
-    """
-    if 'wikitext2' in name:
-        return get_wikitext2(tokenizer, nsamples, seed, seqlen)
-    if 'ptb' in name:
-        if 'new' in name:
-            return get_ptb_new(tokenizer, nsamples, seed, seqlen)
-        return get_ptb(tokenizer, nsamples, seed, seqlen)
-    if 'c4' in name:
-        if 'new' in name:
-            return get_c4_new(tokenizer, nsamples, seed, seqlen)
-        return get_c4(tokenizer, nsamples, seed, seqlen)
-
-    if 'pileval' in name:
-        return get_pileval(tokenizer, nsamples, seed, seqlen)
diff --git a/lmdeploy/lite/utils/collect.py b/lmdeploy/lite/utils/collect.py
deleted file mode 100644
index 8b2691a4a6e99a5fb33746e5c77d26b8daa63ace..0000000000000000000000000000000000000000
--- a/lmdeploy/lite/utils/collect.py
+++ /dev/null
@@ -1,90 +0,0 @@
-# Copyright (c) OpenMMLab. All rights reserved.
-from typing import Dict, List, Tuple, Union
-
-from mmengine.config.lazy import LazyAttr
-from torch import nn
-
-
-def collect_target_modules(model: nn.Module,
-                           target: Union[str, type],
-                           skip_names: List[str] = [],
-                           prefix: str = '') -> Dict[str, nn.Module]:
-    """Collects the specific target modules from the model.
-
-    Args:
-        model : The PyTorch module from which to collect the target modules.
-        target : The specific target to be collected. It can be a class of a
-            module or the name of a module.
-        skip_names : List of names of modules to be skipped during collection.
-        prefix : A string to be added as a prefix to the module names.
-
-    Returns:
-        A dictionary mapping from module names to module instances.
-    """
-
-    if isinstance(target, LazyAttr):
-        target = target.build()
-
-    if not isinstance(target, (type, str)):
-        raise TypeError('Target must be a string (name of the module) '
-                        'or a type (class of the module)')
-
-    def _is_target(n, m):
-        if isinstance(target, str):
-            return target == type(m).__name__ and n not in skip_names
-        return isinstance(m, target) and n not in skip_names
-
-    name2mod = {}
-    for name, mod in model.named_modules():
-        m_name = f'{prefix}.{name}' if prefix else name
-        if _is_target(name, mod):
-            name2mod[m_name] = mod
-    return name2mod
-
-
-def collect_target_weights(model: nn.Module, target: Union[str, type],
-                           skip_names: List[str]) -> Dict[str, nn.Module]:
-    """Collects weights of the specific target modules from the model.
-
-    Args:
-        model : The PyTorch module from which to collect the weights of
-            target modules.
-        target : The specific target whose weights to be collected. It can be
-            a class of a module or the name of a module.
-        skip_names : Names of modules to be skipped during weight collection.
-
-    Returns:
-        A dictionary mapping from module instances to their
-            corresponding weights.
-    """
-
-    named_modules = collect_target_modules(model, target, skip_names)
-    mod2weight = {}
-    for _, mod in named_modules.items():
-        assert hasattr(
-            mod, 'weight'), "The module does not have a 'weight' attribute"
-        mod2weight[mod] = mod.weight
-    return mod2weight
-
-
-def bimap_name_mod(
-    name2mod_mappings: List[Dict[str, nn.Module]]
-) -> Tuple[Dict[str, nn.Module], Dict[nn.Module, str]]:
-    """Generates bidirectional maps from module names to module instances and
-    vice versa.
-
-    Args:
-        name2mod_mappings : List of dictionaries each mapping from module
-            names to module instances.
-
-    Returns:
-        Two dictionaries providing bidirectional mappings between module
-            names and module instances.
-    """
-
-    name2mod = {}
-    mod2name = {}
-    for mapping in name2mod_mappings:
-        mod2name.update({v: k for k, v in mapping.items()})
-        name2mod.update(mapping)
-    return name2mod, mod2name
diff --git a/lmdeploy/lite/utils/global_avail.py b/lmdeploy/lite/utils/global_avail.py
deleted file mode 100644
index e6cfa352c33bb9e2e231a429161de97ba6ab2cd8..0000000000000000000000000000000000000000
--- a/lmdeploy/lite/utils/global_avail.py
+++ /dev/null
@@ -1,84 +0,0 @@
-# Copyright (c) OpenMMLab. All rights reserved.
-from typing import Dict, Union
-
-from torch import nn
-
-
-class GlobalAvailMixin:
-    """Mixin class to make instances globally available."""
-
-    _instances: Dict[str, Dict[Union[str, nn.Module], 'GlobalAvailMixin']] = {
-        'default': {}
-    }
-
-    def global_available(self,
-                         key: Union[str, nn.Module] = 'default',
-                         group: str = 'default') -> None:
-        """Make the instance globally available.
-
-        Args:
-            key (Union[str, nn.Module], optional): Key to save the instance.
-                Defaults to 'default'.
-            group (str, optional): Group to save the instance.
-                Defaults to 'default'.
-        """
-        self._save_instance(self, key, group)
-
-    @classmethod
-    def _save_instance(cls,
-                       instance: 'GlobalAvailMixin',
-                       key: Union[str, nn.Module] = 'default',
-                       group: str = 'default') -> None:
-        """Save the instance.
-
-        Args:
-            instance (GlobalAvailMixin): Instance to save.
-            key (Union[str, nn.Module], optional): Key to save the instance.
-                Defaults to 'default'.
-            group (str, optional): Group to save the instance.
-                Defaults to 'default'.
-        """
-        if group not in cls._instances:
-            assert isinstance(group, str)
-            cls._instances[group] = {}
-
-        cls._instances[group][key] = instance
-
-    @classmethod
-    def find(cls,
-             key: Union[str, nn.Module] = 'default',
-             group: str = 'default') -> Union[None, 'GlobalAvailMixin']:
-        """Find an instance by its key and group.
-
-        Args:
-            key (Union[str, nn.Module], optional): Key of the instance.
-                Defaults to 'default'.
-            group (str, optional): Group of the instance.
-                Defaults to 'default'.
-
-        Returns:
-            Union[None, GlobalAvailMixin]: The found instance, or None if
-                it does not exist.
-        """
-        return cls._instances.get(group, {}).get(key)
-
-    @classmethod
-    def find_group(
-            cls,
-            group: str) -> Dict[Union[str, nn.Module], 'GlobalAvailMixin']:
-        """Find all instances in a group.
-
-        Args:
-            group (str): Group of the instances.
-
-        Returns:
-            Dict[Union[str, nn.Module], GlobalAvailMixin]: All instances in
-                the group.
-        """
-        return cls._instances.get(group, {})
-
-    @classmethod
-    def instances(
-            cls) -> Dict[str, Dict[Union[str, nn.Module], 'GlobalAvailMixin']]:
-        """Get all instances."""
-        return cls._instances
diff --git a/lmdeploy/lite/utils/memory_efficient.py b/lmdeploy/lite/utils/memory_efficient.py
deleted file mode 100644
index ef6c8edaa390e8d272fa209fcbb846c82d394f4e..0000000000000000000000000000000000000000
--- a/lmdeploy/lite/utils/memory_efficient.py
+++ /dev/null
@@ -1,233 +0,0 @@
-# Copyright (c) OpenMMLab. All rights reserved.
-import inspect
-import re
-import warnings
-from contextlib import contextmanager
-from functools import partial
-from typing import List
-
-import torch
-from torch import nn
-
-from lmdeploy.lite.defaults import KV_CACHE_SIGNATURE, OFFLOAD_MOD
-
-
-def extract_return_values(module: nn.Module) -> List[str]:
-    """Extracts return values from given module's forward method.
-
-    Args:
-        module (nn.Module): Module to inspect
-
-    Returns:
-        list[str]: List of return values
-    """
-
-    last_line = inspect.getsource(module.forward).rstrip('\n').split('\n')[-1]
-    pattern = r'return ([\w\s,]+)'
-    match = re.search(pattern, last_line)
-
-    if match:
-        return_values = match.group(1).split(',')
-        return [value.strip() for value in return_values]
-    else:
-        return []
-
-
-def find_kv_cache_idx(module: nn.Module) -> int:
-    """Finds index of kv cache signature in module's forward parameters."""
-
-    signatures = list(inspect.signature(module.forward).parameters.keys())
-    if KV_CACHE_SIGNATURE not in signatures:
-        raise ValueError(f'{KV_CACHE_SIGNATURE} not in signatures of '
-                         f'{type(module)} forward.')
-    return signatures.index(KV_CACHE_SIGNATURE)
-
-
-def find_modules_by_return_value(model: nn.Module,
-                                 value: str) -> List[nn.Module]:
-    """Finds modules in model that return given value.
-
-    Args:
-        model (nn.Module): Model to inspect
-        value (str): Return value to search for
-
-    Returns:
-        list[nn.Module]: List of matching modules
-
-    Raises:
-        ValueError: If no matching modules found
-    """
-
-    modules = []
-    for name, module in model.named_modules():
-        returns = extract_return_values(module)
-        if value in returns:
-            print(f'Found {name} returning {value}')
-            modules.append(module)
-
-    if not modules:
-        error_msg = f'No modules found returning {value}. '
-        error_msg += 'Please check if the default KV_CACHE_SIGNATURE  '
-        error_msg += f"'{KV_CACHE_SIGNATURE}' matches what is used in your "
-        error_msg += 'model code. If not, you can modify KV_CACHE_SIGNATURE '
-        error_msg += 'in `lmdeploy.lite.defaults`.'
-        raise ValueError(error_msg)
-
-    return modules
-
-
-@contextmanager
-def offload_kv_cache(model: nn.Module, device: str = 'cuda') -> None:
-    """Offloads kv cache to given device during forward pass.
-
-    Args:
-        model (nn.Module): Model for inference
-        device (str): Device to offload to
-
-    Yields:
-        None
-    """
-
-    modules = find_modules_by_return_value(model, KV_CACHE_SIGNATURE)
-
-    original_forwards = {mod: mod.forward for mod in modules}
-    input_idxs = {mod: find_kv_cache_idx(mod) for mod in modules}
-    output_idxs = {
-        mod: extract_return_values(mod).index(KV_CACHE_SIGNATURE)
-        for mod in modules
-    }
-
-    def wrap_forward(module, *args, **kwargs):
-
-        idx = input_idxs[module]
-        if idx >= len(args):
-            # kv cache in kwargs
-            if KV_CACHE_SIGNATURE in kwargs:
-                if kwargs[KV_CACHE_SIGNATURE]:
-                    kwargs[KV_CACHE_SIGNATURE] = kwargs[KV_CACHE_SIGNATURE].to(
-                        device)
-            else:
-                raise ValueError(f'No kv cache input found at index {idx}')
-        else:
-            # kv cache in args
-            args = list(args)
-            args[idx] = args[idx].to(device)
-            args = tuple(args)
-
-        result = original_forwards[module](*args, **kwargs)
-
-        result = list(result)
-        idx = output_idxs[module]
-
-        # Move kv cache outputs back to CPU
-        key = result[idx][0].to('cpu')
-        value = result[idx][1].to('cpu')
-        torch.cuda.empty_cache()
-
-        result[idx] = (key, value)
-        result = tuple(result)
-
-        return result
-
-    try:
-        for module in modules:
-            original_forwards[module] = module.forward
-            module.forward = partial(wrap_forward, module)
-
-        yield
-
-    finally:
-        for module in modules:
-            module.forward = original_forwards[module]
-            del original_forwards[module]
-
-
-@contextmanager
-def offload_weights(model: nn.Module, device: str = 'cuda') -> None:
-    """Offloads specified modules to given device during forward pass.
-
-    Args:
-        model (nn.Module): Model for inference
-        device (str): Device to offload to
-
-    Yields:
-        None
-    """
-
-    target_modules = OFFLOAD_MOD
-
-    def before_forward(module: nn.Module, inp: torch.Tensor):
-        module.to(device)
-
-    def after_forward(module: nn.Module, inp: torch.Tensor, out: torch.Tensor):
-        module.to('cpu')
-        torch.cuda.empty_cache()
-
-    def _to_device(m, spec_modules, dev):
-        if len(spec_modules) == 0 or len(list(m.children())) == 0:
-            m.to(dev)
-            return
-
-        for child in m.children():
-            if isinstance(child, spec_modules):
-                child.to('cpu')
-            else:
-                _to_device(child, spec_modules, dev)
-                # m.to(dev)
-
-    warnings.warn('By default, offloading will be done on '
-                  '`nn.Linear`. You can add modules which want offload to '
-                  'the `lmdeploy.lite.defaults.OFFLOAD_MOD`.')
-    target = OFFLOAD_MOD
-
-    _to_device(model, target, device)
-
-    handles = []
-    for module in model.modules():
-        if isinstance(module, target_modules):
-            handle1 = module.register_forward_pre_hook(before_forward)
-            handle2 = module.register_forward_hook(after_forward)
-            handles.extend([handle1, handle2])
-
-    try:
-        yield
-    finally:
-        for handle in handles:
-            handle.remove()
-
-        model.to('cpu')
-        torch.cuda.empty_cache()
-
-
-@contextmanager
-def memory_efficient_inference(model: nn.Module,
-                               offload: bool = True,
-                               device: str = 'cuda') -> None:
-    """Memory efficient inference context manager.
-
-    Moves model to device for inference, with option to offload
-    specific modules.
-
-    Args:
-        model (nn.Module): Model for inference
-        offload (bool): Whether to offload modules
-        device (str): Device for inference
-
-    Yields:
-        None
-    """
-
-    if offload:
-        warnings.warn('Using offload mode - modules defined in OFFLOAD_MOD '
-                      'will be moved to GPU during forward pass only.')
-        warnings.warn(
-            'Using offload mode will incur performance penalty due to '
-            'frequent CPU-GPU data transfers.')
-        with torch.inference_mode():
-            with offload_kv_cache(model, device):
-                with offload_weights(model, device):
-                    yield
-    else:
-        model.to(device)
-        with torch.inference_mode():
-            yield
diff --git a/lmdeploy/model.py b/lmdeploy/model.py
deleted file mode 100644
index 39451acdce6886bd2c18b9da13da7b1b7c2ef7ff..0000000000000000000000000000000000000000
--- a/lmdeploy/model.py
+++ /dev/null
@@ -1,658 +0,0 @@
-# Copyright (c) OpenMMLab. All rights reserved.
-import dataclasses
-from abc import abstractmethod
-from typing import List
-
-from mmengine import Registry
-
-MODELS = Registry('model', locations=['lmdeploy.model'])
-
-
-@dataclasses.dataclass
-class SamplingParam:
-    top_p: float = 0.8
-    top_k: float = None
-    temperature: float = 0.8
-    repetition_penalty: float = 1.0
-
-
-@MODELS.register_module(name='internlm')
-@MODELS.register_module(name='llama')
-@MODELS.register_module(name='base')
-class BaseModel:
-    """Base model."""
-
-    def __init__(self,
-                 session_len=2048,
-                 top_p=0.8,
-                 top_k=None,
-                 temperature=0.8,
-                 repetition_penalty=1.0,
-                 capability='chat',
-                 stop_words=None,
-                 **kwargs):
-        self.session_len = session_len
-        self.top_p = top_p
-        self.top_k = top_k
-        self.temperature = temperature
-        self.repetition_penalty = repetition_penalty
-        self.stop_words = stop_words
-        self.capability = capability
-
-    def get_prompt(self, prompt, sequence_start=True):
-        """Return the prompt that is concatenated with other elements in the
-        chat template.
-
-        Args:
-            prompt (str): user's input prompt
-            sequence_start (bool): indicator for the first round chat of a
-               session sequence
-        Returns:
-            str: the concatenated prompt
-        """
-        if self.capability == 'completion':
-            return prompt
-        else:
-            return self.decorate_prompt(prompt, sequence_start)
-
-    @abstractmethod
-    def decorate_prompt(self, prompt, sequence_start):
-        return prompt
-
-    @staticmethod
-    def _translate_messages(messages: List):
-        """Translate messages into system, user speaking list, assistant
-        speaking list.
-
-        Args:
-            messages (List): chat history
-        Returns:
-            Turple: consists of system (str), users (List[str]),
-                assistants (List[str])
-        """
-        system = None
-        users = []
-        assistants = []
-        assert isinstance(messages, List)
-        for message in messages:
-            msg_role = message['role']
-            if msg_role == 'system':
-                system = message['content']
-            elif msg_role == 'user':
-                users.append(message['content'])
-            elif msg_role == 'assistant':
-                assistants.append(message['content'])
-            else:
-                raise ValueError(f'Unknown role: {msg_role}')
-        assistants.append(None)
-        return system, users, assistants
-
-    @abstractmethod
-    def messages2prompt(self, messages, sequence_start=True):
-        """Return the prompt that is concatenated with other elements in the
-        chat template. When messages arg is a string, return
-        self.get_prompt(messages). When messages arg is a chat history, return
-        translated prompt from chat history.
-
-        Args:
-            messages (str | List): user's input prompt
-        Returns:
-            str: the concatenated prompt
-        """
-        if isinstance(messages, str):
-            return self.get_prompt(messages)
-        # chat history processing in derived classes
-
-    @property
-    def sampling_param(self):
-        return SamplingParam(top_p=self.top_p,
-                             top_k=self.top_k,
-                             temperature=self.temperature,
-                             repetition_penalty=self.repetition_penalty)
-
-
-@MODELS.register_module(name='vicuna')
-class Vicuna(BaseModel):
-    """Chat template of vicuna model."""
-
-    def __init__(
-            self,
-            system="""A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions. """,  # noqa: E501
-            user='USER',
-            assistant='ASSISTANT',
-            **kwargs):
-        super().__init__(**kwargs)
-        self.system = system
-        self.user = user
-        self.assistant = assistant
-
-    def decorate_prompt(self, prompt, sequence_start=True):
-        """Return the prompt that is concatenated with other elements in the
-        chat template.
-
-        Args:
-            prompt (str): user's input prompt
-            sequence_start (bool): indicator for the first round chat of a
-               session sequence
-        Returns:
-            str: the concatenated prompt
-        """
-        assert self.capability == 'chat', \
-            f'{type(self).__name__} has no capability of {self.capability}'
-        if sequence_start:
-            return f'{self.system} {self.user}: {prompt} {self.assistant}: '
-        else:
-            return f'</s>{self.user}: {prompt} {self.assistant}: '
-
-    def messages2prompt(self, messages, sequence_start=True):
-        """Return the prompt that is concatenated with other elements in the
-        chat template.
-
-        Args:
-            messages (str | List): user's input prompt
-        Returns:
-            str: the concatenated prompt
-        """
-        if isinstance(messages, str):
-            return self.get_prompt(messages, sequence_start)
-        system, users, assistants = self._translate_messages(messages)
-        system = self.system if not system else system
-        ret = system + ' '
-        for user, assistant in zip(users, assistants):
-            if assistant:
-                ret += f'{self.user}: {user} {self.assistant}: {assistant}</s>'
-            else:
-                ret += f'{self.user}: {user} {self.assistant}: '
-        return ret
-
-
-@MODELS.register_module(name='internlm-chat')
-@MODELS.register_module(name='internlm-chat-7b')
-class InternLMChat7B(BaseModel):
-    """Chat template of InternLM model."""
-
-    def __init__(
-            self,
-            system='<|System|>',
-            meta_instruction="""You are an AI assistant whose name is InternLM (书生·浦语).
-- InternLM (书生·浦语) is a conversational language model that is developed by Shanghai AI Laboratory (上海人工智能实验室). It is designed to be helpful, honest, and harmless.
-- InternLM (书生·浦语) can understand and communicate fluently in the language chosen by the user such as English and 中文.
-""",  # noqa: E501
-            user='<|User|>',
-            eoh='',
-            eoa='<eoa>',
-            assistant='<|Bot|>',
-            stop_words=['<eoa>'],
-            **kwargs):
-        super().__init__(**kwargs)
-        self.system = system
-        self.meta_instruction = meta_instruction
-        self.user = user
-        self.eoh = eoh
-        self.eoa = eoa
-        self.assistant = assistant
-        self.stop_words = stop_words
-
-    def decorate_prompt(self, prompt, sequence_start=True):
-        """Return the prompt that is concatenated with other elements in the
-        chat template.
-
-        Args:
-            prompt (str): user's input prompt
-            sequence_start (bool): indicator for the first round chat of a
-               session sequence
-        Returns:
-            str: the concatenated prompt
-        """
-        assert self.capability == 'chat', \
-            f'{type(self).__name__} has no capability of {self.capability}'
-        if sequence_start:
-            return f'<BOS>{self.system}:{self.meta_instruction}\n' \
-                   f'{self.user}:{prompt}{self.eoh}\n' \
-                   f'{self.assistant}:'
-        else:
-            return f'\n{self.user}:{prompt}{self.eoh}\n' \
-                   f'{self.assistant}:'
-
-    def messages2prompt(self, messages, sequence_start=True):
-        """Return the prompt that is concatenated with other elements in the
-        chat template.
-
-        Args:
-            messages (str | List): user's input prompt
-        Returns:
-            str: the concatenated prompt
-        """
-        if isinstance(messages, str):
-            return self.get_prompt(messages, sequence_start)
-        system, users, assistants = self._translate_messages(messages)
-        system = self.meta_instruction if not system else system
-        ret = f'<BOS>{self.system}:{system}\n'
-        for user, assistant in zip(users, assistants):
-            if assistant:
-                ret += f'{self.user}:{user}{self.eoh}\n{self.assistant}:' \
-                       f'{assistant}{self.eoa}\n'
-            else:
-                ret += f'{self.user}:{user}{self.eoh}\n{self.assistant}:'
-        return ret
-
-
-@MODELS.register_module(name='internlm-chat-20b')
-@MODELS.register_module(name='internlm-chat-7b-8k')
-class InternLMChat7B8K(InternLMChat7B):
-    """Chat template and generation parameters of InternLM-Chat-7B-8K and
-    InternLM-Chat-20B models."""
-
-    def __init__(self, session_len=8192, **kwargs):
-        super(InternLMChat7B8K, self).__init__(**kwargs)
-        self.session_len = session_len
-
-
-@MODELS.register_module(name='internlm-20b')
-class InternLMBaseModel20B(BaseModel):
-    """Generation parameters of InternLM-20B-Base model."""
-
-    def __init__(self, session_len=4096, capability='completion', **kwargs):
-        super().__init__(session_len=session_len,
-                         capability=capability,
-                         **kwargs)
-
-
-@MODELS.register_module(name='baichuan-7b')
-class Baichuan7B(BaseModel):
-    """Generation parameters of Baichuan-7B base model."""
-
-    def __init__(self, repetition_penalty=1.1, **kwargs):
-        super().__init__(**kwargs)
-        self.repetition_penalty = repetition_penalty
-
-
-@MODELS.register_module(name='baichuan2-7b')
-class Baichuan2_7B(BaseModel):
-    """Chat template and generation parameters of Baichuan2-7B-Base and
-    Baichuan2-7B-Chat models."""
-
-    def __init__(self,
-                 temperature=0.3,
-                 top_k=5,
-                 top_p=0.85,
-                 repetition_penalty=1.05,
-                 **kwargs):
-        super().__init__(temperature=temperature,
-                         top_k=top_k,
-                         top_p=top_p,
-                         repetition_penalty=repetition_penalty,
-                         **kwargs)
-        self.user_token = '<reserved_106>'  # id = 195
-        self.assistant_token = '<reserved_107>'  # id = 196
-
-    def decorate_prompt(self, prompt, sequence_start=True):
-        """Return the prompt that is concatenated with other elements in the
-        chat template.
-
-        Args:
-            prompt (str): user's input prompt
-            sequence_start (bool): indicator for the first round chat of a
-               session sequence
-        Returns:
-            str: the concatenated prompt
-        """
-        assert self.capability == 'chat', \
-            f'{type(self).__name__} has no capability of {self.capability}'
-        return f'{self.user_token}{prompt}{self.assistant_token}'
-
-    def messages2prompt(self, messages, sequence_start=True):
-        """Return the prompt that is concatenated with other elements in the
-        chat template.
-
-        Args:
-            messages (str | List): user's input prompt
-        Returns:
-            str: the concatenated prompt
-        """
-        if isinstance(messages, str):
-            return self.get_prompt(messages, sequence_start)
-        system, users, assistants = self._translate_messages(messages)
-        ret = ''
-        for user, assistant in zip(users, assistants):
-            ret += f'{self.user_token}{user}{self.assistant_token}'
-            if assistant:
-                ret += f'{assistant}'
-        return ret
-
-
-@MODELS.register_module(name='puyu')
-class Puyu(BaseModel):
-    """Chat template of puyu model.This is only for internal usage in Shanghai
-    AI Laboratory."""
-
-    def __init__(self,
-                 meta_instruction='',
-                 system='',
-                 eosys='',
-                 user='',
-                 eoh='',
-                 assistant='',
-                 eoa='',
-                 stop_words=None,
-                 **kwargs):
-        super().__init__(**kwargs)
-        self.meta_instruction = meta_instruction
-        self.system = system
-        self.user = user
-        self.assistant = assistant
-        self.stop_words = stop_words
-        self.eosys = eosys
-        self.eoh = eoh
-        self.eoa = eoa
-
-    def decorate_prompt(self, prompt, sequence_start=True):
-        assert self.capability == 'chat', \
-            f'{type(self).__name__} has no capability of {self.capability}'
-        if sequence_start:
-            return f'<BOS>{self.system}{self.meta_instruction}{self.eosys}' \
-                   f'{self.user}{prompt}{self.eoh}' \
-                   f'{self.assistant}'
-        else:
-            return f'{self.eoa}{self.user}{prompt}{self.eoh}{self.assistant}'
-
-    def messages2prompt(self, messages, sequence_start=True):
-        """Return the prompt that is concatenated with other elements in the
-        chat template.
-
-        Args:
-            messages (str | List): user's input prompt
-            sequence_start (bool): flag to start the sequence
-        Returns:
-            str: the concatenated prompt
-        """
-        if isinstance(messages, str):
-            return self.get_prompt(messages, sequence_start)
-        system, users, assistants = self._translate_messages(messages)
-        system = self.system if not system else system
-        ret = f'<BOS>{system}{self.meta_instruction}{self.eosys}'
-        for user, assistant in zip(users, assistants):
-            if assistant:
-                ret += f'{self.user}{user}{self.eoh}{self.assistant}' \
-                       f'{assistant}{self.eoa}'
-            else:
-                ret += f'{self.user}{user}{self.eoh}{self.assistant}'
-        return ret
-
-
-@MODELS.register_module(name='llama2')
-class Llama2(BaseModel):
-    """Chat template of LLaMA2 model."""
-
-    def __init__(
-            self,
-            b_inst='[INST]',
-            e_inst='[/INST]',
-            b_sys='<<SYS>>\n',
-            e_sys='\n<</SYS>>\n\n',
-            system="""\
-You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.
-
-If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.""",  # noqa: E501
-            session_len=4096,
-            **kwargs):
-        super().__init__(**kwargs)
-        self.b_inst = b_inst
-        self.e_inst = e_inst
-        self.b_sys = b_sys
-        self.e_sys = e_sys
-        self.default_sys_prompt = system
-        self.session_len = session_len
-
-    def decorate_prompt(self, prompt, sequence_start=True):
-        """Return the prompt that is concatenated with other elements in the
-        chat template.
-
-        Args:
-            prompt (str): user's input prompt
-            sequence_start (bool): indicator for the first round chat of a
-               session sequence
-        Returns:
-            str: the concatenated prompt
-        """
-        assert self.capability == 'chat', \
-            f'{type(self).__name__} has no capability of {self.capability}'
-        if sequence_start:
-            return f'<BOS>{self.b_inst} ' \
-                   f'{self.b_sys} {self.default_sys_prompt} {self.e_sys}' \
-                   f'{prompt} {self.e_inst} '
-
-        return f'{self.b_inst} {prompt} {self.e_inst} '
-
-    def messages2prompt(self, messages, sequence_start=True):
-        """Return the prompt that is concatenated with other elements in the
-        chat template.
-
-        Args:
-            messages (str | List): user's input prompt
-        Returns:
-            str: the concatenated prompt
-        """
-        if isinstance(messages, str):
-            return self.get_prompt(messages, sequence_start)
-        system, users, assistants = self._translate_messages(messages)
-        system = self.default_sys_prompt if not system else system
-        ret = f'<BOS>{self.b_inst} {self.b_sys} {system} {self.e_sys}'
-        for i, (user, assistant) in enumerate(zip(users, assistants)):
-            if i != 0:
-                ret += f'{self.b_inst} '
-            if assistant:
-                ret += f'{user} {self.e_inst} {assistant}'
-            else:
-                ret += f'{user} {self.e_inst} '
-        return ret
-
-
-@MODELS.register_module(name='qwen-14b')
-@MODELS.register_module(name='qwen-7b')
-class Qwen7BChat(BaseModel):
-    """Chat template for Qwen-7B-Chat."""
-
-    def __init__(self,
-                 session_len=8192,
-                 top_p=0.5,
-                 top_k=40,
-                 temperature=1.0,
-                 im_start='<|im_start|>',
-                 im_end='<|im_end|>',
-                 system='You are a helpful assistant.',
-                 stop_words=['<|im_end|>'],
-                 **kwargs):
-        super().__init__(**kwargs)
-        self.session_len = session_len
-        self.top_p = top_p
-        self.top_k = top_k
-        self.temperature = temperature
-
-        self.im_start = im_start
-        self.im_end = im_end
-        self.system = system
-        self.stop_words = stop_words
-
-    def decorate_prompt(self, prompt, sequence_start=True):
-        assert self.capability == 'chat', \
-            f'{type(self).__name__} has no capability of {self.capability}'
-        if sequence_start:
-            return f'{self.im_start}system\n{self.system}{self.im_end}' \
-                   f'\n{self.im_start}user\n{prompt}{self.im_end}' \
-                   f'\n{self.im_start}assistant\n'
-
-        return f'\n{self.im_start}user\n{prompt}{self.im_end}' \
-               f'\n{self.im_start}assistant\n'
-
-    def messages2prompt(self, messages, sequence_start=True):
-        """Return the prompt that is concatenated with other elements in the
-        chat template.
-
-        Args:
-            messages (str | List): user's input prompt
-        Returns:
-            str: the concatenated prompt
-        """
-        if isinstance(messages, str):
-            return self.get_prompt(messages, sequence_start)
-        system, users, assistants = self._translate_messages(messages)
-        system = self.system if not system else system
-        ret = f'{self.im_start}system\n{system}{self.im_end}'
-        for user, assistant in zip(users, assistants):
-            if assistant:
-                ret += f'\n{self.im_start}user\n{user}{self.im_end}' \
-                       f'\n{self.im_start}assistant\n{assistant}'
-            else:
-                ret += f'\n{self.im_start}user\n{user}{self.im_end}' \
-                       f'\n{self.im_start}assistant\n'
-        return ret
-
-
-@MODELS.register_module(name='codellama')
-class CodeLlama(Llama2):
-
-    def __init__(self,
-                 system='',
-                 session_len=4096,
-                 suffix_first=False,
-                 stop_words=None,
-                 **kwargs):
-        super().__init__(**kwargs)
-        caps = ['completion', 'infilling', 'chat', 'python']
-        assert self.capability in caps, \
-            f'{self.capability} is not supported. ' \
-            f'The supported capabilities are: {caps}'
-        self.default_sys_prompt = system
-        self.session_len = session_len
-        self.suffix_first = suffix_first
-        self.stop_words = stop_words
-
-        # The following sampling parameters refers to https://github.com/facebookresearch/codellama # noqa: E501
-        if self.capability == 'completion' or self.capability == 'python':
-            self.top_p = kwargs.get('top_p', 0.9)
-            self.temperature = kwargs.get('temperature', 0.2)
-        if self.capability == 'chat':
-            self.top_p = kwargs.get('top_p', 0.95)
-            self.temperature = kwargs.get('temperature', 0.2)
-        elif self.capability == 'infilling':
-            self.top_p = kwargs.get('top_p', 0.9)
-            self.temperature = kwargs.get('temperature', 0.0)
-            if self.stop_words is None:
-                self.stop_words = ['<EOT>']
-
-    def decorate_prompt(self, prompt, sequence_start=True):
-        if self.capability == 'infilling':
-            return self._infill_prompt(prompt)
-        elif self.capability == 'chat':
-            return self._get_prompt(prompt, sequence_start)
-        else:  # python speicalist
-            return prompt
-
-    def _infill_prompt(self, prompt):
-        prefix, suffix = prompt.split('<FILL>')
-        if self.suffix_first:
-            # format as "<PRE> <SUF>{suf} <MID> {pre}"
-            prompt = f'<BOS><PRE> <SUF>{suffix} <MID> {prefix}'
-        else:
-            # format as "<PRE> {pre} <SUF>{suf} <MID>"
-            prompt = f'<BOS><PRE> {prefix} <SUF>{suffix} <MID>'
-        return prompt
-
-    def _get_prompt(self, prompt, sequence_start):
-        prompt = prompt.strip()
-        if sequence_start:
-            return f'<BOS>{self.b_inst} ' \
-                   f'{self.b_sys}{self.default_sys_prompt}{self.e_sys}' \
-                   f'{prompt} {self.e_inst}'
-
-        return f'{self.b_inst} {prompt} {self.e_inst}'
-
-    def messages2prompt(self, messages, sequence_start=True):
-        assert self.capability == 'chat', \
-            f'codellama message2prompt only supports chat mode ' \
-            f'but got {self.cap} mode'
-        return super().messages2prompt(messages, sequence_start)
-
-
-@MODELS.register_module(name='solar')
-class SOLAR(BaseModel):
-    """Chat template of SOLAR model.
-
-    `https://huggingface.co/upstage/SOLAR-0-70b-16bit`
-    """
-
-    def __init__(self,
-                 b_sys='### System:\n',
-                 e_sys='\n\n',
-                 user='### User:\n',
-                 eoh='\n\n',
-                 assistant='### Assistant:\n',
-                 eoa='\n\n',
-                 system='',
-                 session_len=2048,
-                 **kwargs):
-        super().__init__(**kwargs)
-        self.b_sys = b_sys
-        self.e_sys = e_sys
-        self.user = user
-        self.eoh = eoh
-        self.assistant = assistant
-        self.eoa = eoa
-        self.system = system
-        self.session_len = session_len
-
-    def decorate_prompt(self, prompt, sequence_start=True):
-        """Return the prompt that is concatenated with other elements in the
-        chat template.
-
-        Args:
-            prompt (str): user's input prompt
-            sequence_start (bool): indicator for the first round chat of a
-               session sequence
-        Returns:
-            str: the concatenated prompt
-        """
-        assert self.capability == 'chat', \
-            f'{type(self).__name__} has no capability of {self.capability}'
-        if sequence_start:
-            return f'{self.b_sys}{self.system}{self.e_sys}' \
-                   f'{self.user}{prompt}{self.eoh}{self.assistant}'
-
-        return f'{self.user}{prompt}{self.eoh}{self.assistant}'
-
-    def messages2prompt(self, messages, sequence_start=True):
-        """Return the prompt that is concatenated with other elements in the
-        chat template.
-
-        Args:
-            messages (str | List): user's input prompt
-        Returns:
-            str: the concatenated prompt
-        """
-        if isinstance(messages, str):
-            return self.get_prompt(messages, sequence_start)
-        system, users, assistants = self._translate_messages(messages)
-        system = self.system if not system else system
-        ret = f'{self.b_sys}{system}{self.e_sys}'
-        for i, (user, assistant) in enumerate(zip(users, assistants)):
-            ret += f'{self.user}{user}{self.eoh}{self.assistant}'
-            if assistant:
-                ret += f'{assistant}{self.eoa}'
-        return ret
-
-
-def main(model_name: str = 'test'):
-    assert model_name in MODELS.module_dict.keys(), \
-        f"'{model_name}' is not supported. " \
-        f'The supported models are: {MODELS.module_dict.keys()}'
-    model = MODELS.get(model_name)()
-    prompt = model.get_prompt(prompt='hi')
-    print(prompt)
-    print(f'session_len: {model.session_len}')
-
-
-if __name__ == '__main__':
-    import fire
-
-    fire.Fire(main)
diff --git a/lmdeploy/pytorch/__init__.py b/lmdeploy/pytorch/__init__.py
deleted file mode 100644
index 22810c08b38f2c53b7a2798c8cc5fd9030bbc591..0000000000000000000000000000000000000000
--- a/lmdeploy/pytorch/__init__.py
+++ /dev/null
@@ -1,2 +0,0 @@
-# Copyright (c) OpenMMLab. All rights reserved.
-"""Chat with torch models."""
diff --git a/lmdeploy/pytorch/accel.py b/lmdeploy/pytorch/accel.py
deleted file mode 100644
index e51e0589c802b36a1b2575f5c33551130be0a2d3..0000000000000000000000000000000000000000
--- a/lmdeploy/pytorch/accel.py
+++ /dev/null
@@ -1,37 +0,0 @@
-# Copyright (c) OpenMMLab. All rights reserved.
-import torch
-
-
-class LoadNoInit:
-    """Initialize model without parameter initialization."""
-
-    def __init__(self):
-        self.constant_ = torch.nn.init.constant_
-        self.zeros_ = torch.nn.init.zeros_
-        self.ones_ = torch.nn.init.ones_
-        self.uniform_ = torch.nn.init.uniform_
-        self.normal_ = torch.nn.init.normal_
-        self.kaiming_uniform_ = torch.nn.init.kaiming_uniform_
-        self.kaiming_normal_ = torch.nn.init.kaiming_normal_
-
-    def __enter__(self, *args, **kwargs):
-        """Replace initializers with no-op."""
-
-        torch.nn.init.constant_ = lambda *args, **kwargs: None
-        torch.nn.init.zeros_ = lambda *args, **kwargs: None
-        torch.nn.init.ones_ = lambda *args, **kwargs: None
-        torch.nn.init.uniform_ = lambda *args, **kwargs: None
-        torch.nn.init.normal_ = lambda *args, **kwargs: None
-        torch.nn.init.kaiming_uniform_ = lambda *args, **kwargs: None
-        torch.nn.init.kaiming_normal_ = lambda *args, **kwargs: None
-
-    def __exit__(self, *args, **kwargs):
-        """Recover."""
-
-        torch.nn.init.constant_ = self.constant_
-        torch.nn.init.zeros_ = self.zeros_
-        torch.nn.init.ones_ = self.ones_
-        torch.nn.init.uniform_ = self.uniform_
-        torch.nn.init.normal_ = self.normal_
-        torch.nn.init.kaiming_uniform_ = self.kaiming_uniform_
-        torch.nn.init.kaiming_normal_ = self.kaiming_normal_
diff --git a/lmdeploy/pytorch/adapters/__init__.py b/lmdeploy/pytorch/adapters/__init__.py
deleted file mode 100644
index f87c0546ddf222b233bb01284e7004ba78e8c29f..0000000000000000000000000000000000000000
--- a/lmdeploy/pytorch/adapters/__init__.py
+++ /dev/null
@@ -1,39 +0,0 @@
-# Copyright (c) OpenMMLab. All rights reserved.
-
-import logging
-
-import torch.nn as nn
-
-from .base import BasicAdapter, BasicAdapterFast
-from .internlm import InternLMAdapter
-from .llama2 import Llama2Adapter
-
-logger = logging.getLogger(__name__)
-
-
-def _get_default_adapter(tokenizer):
-    if tokenizer.is_fast:
-        return BasicAdapterFast
-    else:
-        return BasicAdapter
-
-
-def init_adapter(model: nn.Module, tokenizer, adapter=None):
-    if adapter is None:
-        for v in model.modules():
-            if 'InternLMModel' in v.__class__.__name__:
-                Adapter = InternLMAdapter
-                break
-            elif 'LlamaModel' in v.__class__.__name__:
-                Adapter = Llama2Adapter
-                break
-        else:
-            Adapter = _get_default_adapter(tokenizer)
-    elif adapter == 'llama1':
-        Adapter = _get_default_adapter(tokenizer)
-    else:
-        raise ValueError(f'Adapter {adapter} is not allowed.')
-
-    logger.info(f'Using adapter {Adapter.__name__}')
-
-    return Adapter(tokenizer)
diff --git a/lmdeploy/pytorch/adapters/base.py b/lmdeploy/pytorch/adapters/base.py
deleted file mode 100644
index e958a0f377ef881cf2c600631ea1d37a390919ee..0000000000000000000000000000000000000000
--- a/lmdeploy/pytorch/adapters/base.py
+++ /dev/null
@@ -1,127 +0,0 @@
-# Copyright (c) OpenMMLab. All rights reserved.
-"""Basic adapter suitable for general HuggingFace models."""
-
-import logging
-import re
-
-from transformers import (PreTrainedTokenizer, PreTrainedTokenizerBase,
-                          PreTrainedTokenizerFast)
-
-logger = logging.getLogger(__name__)
-
-
-class BaseAdapter:
-    """Base class for all adapters.
-
-    Note:
-        Adapters coordinate with the session manager to prepare input_ids.
-        The full sequence fed to the model is as follows:
-
-            ```
-            adapter.start_ids
-            adapter.encode_and_decorate(user_input_1)
-            output_1_generated_by_model
-            adapter.sep_ids
-            adapter.encode_and_decorate(user_input_2)
-            output_2_generated_by_model
-            adapter.sep_ids
-            adapter.encode_and_decorate(user_input_3)
-            ```
-
-        Thus adapter is responsible for providing model specific
-        ``start_ids``, ``sep_ids``, and method to encode single prompt.
-    """
-
-    def __init__(self, tokenizer: PreTrainedTokenizerBase):
-        self.tokenizer = tokenizer
-
-    def encode_and_decorate(self, prompt, add_special_tokens=False):
-        """Model specific method to encode and decorate prompt."""
-        raise NotImplementedError
-
-    def decode(self, value):
-        """Model specific method to decode single value to string."""
-        raise NotImplementedError
-
-    @property
-    def stopping_criteria(self):
-        """Model specific stopping criteria for generation."""
-        return None
-
-    @property
-    def start_ids(self):
-        """Model specific start ids."""
-        return [self.tokenizer.bos_token_id]
-
-    @property
-    def sep_ids(self):
-        """Model specific separation ids."""
-        return [self.tokenizer.bos_token_id]
-
-
-class BasicAdapter(BaseAdapter):
-    """Basic adapter for slow tokenizers."""
-
-    def encode_and_decorate(self, prompt, add_special_tokens=False):
-        """Encode prompt.
-
-        Note:
-            we leave <bos> to session manager to add.
-        """
-        input_ids = self.tokenizer.encode(
-            prompt,
-            add_special_tokens=add_special_tokens,
-            return_tensors='pt',
-        )
-        logger.debug(f'Encode {prompt} to {input_ids}')
-        return input_ids
-
-    def decode(self, value):
-        """Fallback when tokenizer is not fast."""
-
-        self.tokenizer: PreTrainedTokenizer
-
-        tok = self.tokenizer.decode(value)
-        return tok + ' '
-
-
-class BasicAdapterFast(BaseAdapter):
-    """Basic adapter for slow tokenizers."""
-
-    hex_regex = re.compile(r'^<0x([0-9ABCDEF]+)>$')
-
-    def encode_and_decorate(self, prompt, add_special_tokens=False):
-        """Encode prompt.
-
-        Note:
-            we leave <bos> to session manager to add.
-        """
-        input_ids = self.tokenizer.encode(
-            prompt,
-            add_special_tokens=add_special_tokens,
-            return_tensors='pt',
-        )
-        logger.debug(f'Encode {prompt} to {input_ids}')
-        return input_ids
-
-    def decode(self, value):
-        """Decode with fast tokenizers."""
-
-        self.tokenizer: PreTrainedTokenizerFast
-
-        tok = self.tokenizer._convert_id_to_token(value)
-        if tok.startswith('▁'):  # sentencepiece
-            space = ' '
-            tok = tok[1:]
-        else:
-            space = ''
-        if res := self.hex_regex.match(tok):
-            tok = chr(int(res.group(1), 16))
-        if tok == '</s>' or tok == '\r':
-            tok = '\n'
-
-        tok = space + tok
-
-        logger.debug(f'Decode {value} to {repr(tok)}')
-
-        return tok
diff --git a/lmdeploy/pytorch/adapters/internlm.py b/lmdeploy/pytorch/adapters/internlm.py
deleted file mode 100644
index 89e6f9f0331aa7bbf47f5528d76087a31f35619f..0000000000000000000000000000000000000000
--- a/lmdeploy/pytorch/adapters/internlm.py
+++ /dev/null
@@ -1,81 +0,0 @@
-# Copyright (c) OpenMMLab. All rights reserved.
-import logging
-import re
-
-import torch
-from transformers import (PreTrainedTokenizerFast, StoppingCriteria,
-                          StoppingCriteriaList)
-
-from .base import BaseAdapter
-
-logger = logging.getLogger(__name__)
-
-
-class InternLMStoppingCriteria(StoppingCriteria):
-    """Stopping criteria for HF version of InternLM."""
-
-    def __call__(self, input_ids, *args, **kwargs) -> bool:
-        return input_ids[0, -1] in [2, 103028]
-
-
-class InternLMAdapter(BaseAdapter):
-    """Adapter for InternLM.
-
-    InternLM use the following template and \n should be 13.
-
-        <bos> (no actual newline here, just for better readability)
-        <|User|>:{prompt}<eoh>\n
-        <|Bot|>:{model_output}<eoa>\n
-        <|User|>:{prompt}<eoh>\n
-        <|Bot|>:{model_output}<eoa>\n
-        ...
-        <eos>
-    """
-
-    hex_regex = re.compile(r'^<0x([0-9ABCDEF]+)>$')
-    # ids of '<|User|>:'
-    B_USER_ID = torch.tensor([[333, 352, 1621, 352, 27232]])
-    # ids of '<eoh>\n<|Bot|>:'
-    E_USER_ID = torch.tensor([[103027, 13, 333, 352, 23845, 352, 27232]])
-    # ids of '<bos>'
-    start_ids = [1]
-    # ids of '\n'
-    sep_ids = [13]
-
-    def __init__(self, tokenizer: PreTrainedTokenizerFast):
-        self.tokenizer = tokenizer
-
-    def encode_and_decorate(self, prompt):
-        r"""Encode prompt and decorate with template.
-
-        Note:
-            we leave <bos> and chat history for session manager to add,
-        so we will decorate input_ids to '<|User|>:{prompt}<eoh>\n<|Bot|>:'
-        """
-        input_ids = self.tokenizer.encode(
-            prompt,
-            add_special_tokens=False,
-            return_tensors='pt',
-        )
-        # This is f'<|User|>:{prompt}<eoh>\n<|Bot|>:'
-        # but force \n to 13 instead of 364
-        input_ids = torch.cat([self.B_USER_ID, input_ids, self.E_USER_ID],
-                              dim=1)
-        return input_ids
-
-    def decode(self, value):
-        """Decode generated tokens for InternLM."""
-
-        tok = self.tokenizer.decode(value)
-        if res := self.hex_regex.match(tok):
-            tok = chr(int(res.group(1), 16))
-        if tok == '</s>' or tok == '<eoa>' or tok == '\r':
-            tok = '\n'
-
-        logger.debug(f'Decode {value} to {repr(tok)}')
-
-        return tok
-
-    @property
-    def stopping_criteria(self):
-        return StoppingCriteriaList([InternLMStoppingCriteria()])
diff --git a/lmdeploy/pytorch/adapters/llama2.py b/lmdeploy/pytorch/adapters/llama2.py
deleted file mode 100644
index d0dcb6d259198d1d04f01781f6a9c60d74a5d767..0000000000000000000000000000000000000000
--- a/lmdeploy/pytorch/adapters/llama2.py
+++ /dev/null
@@ -1,80 +0,0 @@
-# Copyright (c) OpenMMLab. All rights reserved.
-import logging
-import re
-
-from transformers import PreTrainedTokenizerFast
-
-from .base import BasicAdapterFast
-
-logger = logging.getLogger(__name__)
-
-B_INST, E_INST = '[INST]', '[/INST]'
-B_SYS, E_SYS = '<<SYS>>\n', '\n<</SYS>>\n\n'
-DEFAULT_SYSTEM_PROMPT = """\
-You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.
-
-If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information."""   # noqa: E501
-
-
-class Llama2Adapter(BasicAdapterFast):
-    """Adapter for llama2.
-
-    Llama2 use the following template and the first user prompt
-    should contain a system prompt.
-
-    User can specify the system prompt using a <<SYS>> tag otherwise
-    the default system prompt is prepended to user's input.
-
-        <bos>
-        [INST]<space>
-        <<SYS>>\n
-        SYSTEM_PROMPT\n
-        <</SYS>>\n\n
-        {user_prompt_1}<space>
-        [/INST]<space>
-        {answer_1}<space>
-        <eos>
-
-        <bos>
-        [INST]<space>
-        {user_prompt_2}<space>
-        [/INST]<space>
-        {answer_2}<space>
-        <eos>
-
-        <bos>
-        [INST]<space>
-        {user_prompt_2}(no space here)
-        ...
-    """
-
-    start_ids = []
-    sep_ids = []
-
-    def __init__(self, tokenizer: PreTrainedTokenizerFast):
-        super().__init__(tokenizer)
-        self.prev_round = 0
-
-    def encode_and_decorate(self, prompt):
-        r"""Encode prompt and decorate with template."""
-
-        if self.prev_round == 0:
-            res = re.search(r'<<SYS>>(.*?)<</SYS>>(.*)', prompt)
-            if res:
-                prompt = B_SYS + res.group(1).strip() + \
-                    E_SYS + res.group(2).strip()
-            else:
-                prompt = B_SYS + DEFAULT_SYSTEM_PROMPT + E_SYS + prompt
-
-        prompt = f'{B_INST} {prompt.strip()} {E_INST}'
-
-        logger.debug(f'decorated prompt: {repr(prompt)}')
-
-        input_ids = self.tokenizer.encode(
-            prompt,
-            add_special_tokens=True,
-            return_tensors='pt',
-        )
-
-        self.prev_round += 1
-        return input_ids
diff --git a/lmdeploy/pytorch/chat.py b/lmdeploy/pytorch/chat.py
deleted file mode 100644
index 2690480a8cfc6d83c41143223cf03b00c33a6e7d..0000000000000000000000000000000000000000
--- a/lmdeploy/pytorch/chat.py
+++ /dev/null
@@ -1,213 +0,0 @@
-# Copyright (c) OpenMMLab. All rights reserved.
-"""Chat through command line.
-
-This submodule allows user to chat with language model through command line,
-and optionally accelerate model using backends like deepspeed.
-
-Example 1: Chat with default setting
-
-```python
-python -m lmdeploy.pytorch.chat $PATH_TO_HF_MODEL
-```
-
-Example 2: Disable sampling
-
-```python
-python -m lmdeploy.pytorch.chat \
-    $PATH_TO_LLAMA_MODEL_IN_HF_FORMAT \
-    --temperature 0
-```
-
-Example 3: Accelerate with deepspeed inference
-
-```python
-python -m lmdeploy.pytorch.chat \
-    $PATH_TO_LLAMA_MODEL_IN_HF_FORMAT \
-    --accel deepspeed
-```
-
-Note: to use deepspeed, you need to install deepspeed,
-    and if hope to accelerate InternLM, you need a customized version
-    https://github.com/wangruohui/DeepSpeed/tree/support_internlm_0.10.0
-
-Example 4: Tensor parallel the model on 2 GPUs
-
-```python
-deepspeed --module --num_gpus 2 lmdeploy.pytorch.chat \
-    $PATH_TO_LLAMA_MODEL_IN_HF_FORMAT \
-    --accel deepspeed \
-```
-
-This module also allow the following control commands to change
-generation behaviors during chat.
-
-- `exit`: terminate and exit chat
-- `config set key=value`: change generation config `key` to `value`,
-    e.g. config temperature=0 disable sampling for following chats
-- `clear`: clear chat history
-"""
-
-import itertools
-import logging
-from typing import Optional
-
-import torch
-from transformers import GenerationConfig, PreTrainedModel
-
-from .adapters import init_adapter
-from .dist import get_local_rank, get_rank, get_world_size
-from .model import accel_model, init_model
-from .session import BasicSessionManagerWithHistory
-from .utils import BasicStreamer, TerminalIO, control
-
-logger = logging.getLogger(__name__)
-
-
-def set_logging(log_file: str, debug: bool):
-    torch.set_printoptions(linewidth=120)
-    level = logging.DEBUG if debug else logging.INFO
-    log_file = log_file or 'chat.log'
-    if r := get_rank() != 0:
-        log_file = log_file + f'.{r}'
-    logging.basicConfig(level=level,
-                        format=('%(filename)s: '
-                                '%(levelname)s: '
-                                '%(funcName)s(): '
-                                '%(lineno)d:\t'
-                                '%(message)s'),
-                        filename=log_file,
-                        filemode='w')
-    print(f'Worker {get_rank()} logging to {log_file}')
-
-
-def main(
-    model_path: str,
-    tokenizer_path: Optional[str] = None,
-    accel: Optional[str] = None,
-    max_new_tokens: int = 128,
-    temperature: float = 0.8,
-    top_p: float = 0.95,
-    seed: int = 0,
-    use_fast_tokenizer: bool = True,
-    max_alloc: int = 2048,
-    max_session_len: int = None,
-    log_file: Optional[str] = None,
-    debug: bool = False,
-    adapter: Optional[str] = None,
-):
-    """Chat with model through terminal.
-
-    Args:
-        model_path (str): Path to model.
-        tokenizer_path (str): Path to tokenizer.
-        accel (str): Model accelerator.
-        max_new_tokens (int): Maximum number of tokens to generate.
-        temperature (float): Temperature for sampling.
-        top_p (float): Top p for sampling.
-        seed (int): Random seed.
-        use_fast_tokenizer (bool): Whether to use fast tokenizer.
-            This argument is directly pass to transformer's ``AutoTokenizer.from_pretrained``.
-            Generally, user should choose to use fast tokenizers.
-            But if using fast raise some error, try to force using a slow one.
-        max_alloc (int): Maximum memory to allocate (for deepspeed).
-        max_session_len (int): Maximum number of tokens allowed for all chat sessions.
-            This include both history and current session.
-        log_file (str): Path to log file.
-        debug (bool): Whether to enable debug mode.
-        adapter (str): Force to use an adapter.
-            Generally user should not use this argument because adapter is selected based
-            on the type of model. Only when it is impossible, e.g. distinguishing llama 1/2
-            based on `LlamaforCausalLM` class, this argument is required.
-            Currently, only "llama1" is acceptable for llama1 models.
-    """  # noqa: E501
-    set_logging(log_file, debug)
-
-    # workers should sync in sampling
-    torch.manual_seed(seed)
-
-    local_rank = get_local_rank()
-    world_size = get_world_size()
-
-    # Init model and tokenizer
-    if not tokenizer_path:
-        tokenizer_path = model_path
-
-    model, tokenizer = init_model(
-        model_path,
-        tokenizer_path,
-        use_fast_tokenizer=use_fast_tokenizer,
-    )
-
-    # Init adapter based on model and tokenizer
-    adapter = init_adapter(model, tokenizer, adapter)
-
-    # Accelerate model
-    model: PreTrainedModel = accel_model(model,
-                                         accel,
-                                         max_alloc=max_alloc,
-                                         tp_size=world_size)
-
-    # warmup
-    warmup_config = GenerationConfig(
-        max_new_tokens=1,
-        do_sample=temperature > 0,
-        temperature=temperature,
-        top_p=top_p,
-    )
-    model.generate(torch.tensor([[6]], device=get_local_rank()), warmup_config)
-
-    gen_config = GenerationConfig(
-        max_new_tokens=max_new_tokens,
-        do_sample=temperature > 0,
-        temperature=temperature,
-        top_p=top_p,
-    )
-
-    # Session manager handling history
-    max_session_len = max_alloc if max_session_len is None else max_session_len
-    sm = BasicSessionManagerWithHistory(max_session_len=max_session_len,
-                                        start_ids=adapter.start_ids,
-                                        sep_ids=adapter.sep_ids)
-    io = TerminalIO()
-    streamer = BasicStreamer(adapter.decode, io.output)
-
-    for r in itertools.count(1):
-        # User input from IO
-        logger.info(f'Round {r}')
-
-        prompt: str = io.input()
-        logger.info(f'User input: {prompt}')
-
-        # Allow user to change config during runtime or exit
-        if control(prompt, gen_config, sm):
-            continue
-
-        # Tokenize and apply model specific templates
-        input_ids = adapter.encode_and_decorate(prompt)
-        logger.info(f'Input ids:\n{input_ids}')
-
-        # Prepend chat history (tensor concatenation)
-        input_ids = sm.prepend_history(input_ids)
-        logger.info(f'Input ids with history:\n{input_ids}')
-
-        # Generate
-        input_ids = input_ids.cuda(local_rank)
-        # returned tensor including input and generated output
-        output = model.generate(input_ids,
-                                gen_config,
-                                streamer=streamer,
-                                stopping_criteria=adapter.stopping_criteria)
-        logger.info(f'Output:\n{output}')
-
-        # Save output into session manager and maybe trim some history
-        sm.add_to_history(output)
-
-
-def cli():
-    import fire
-
-    fire.Fire(main)
-
-
-if __name__ == '__main__':
-    cli()
diff --git a/lmdeploy/pytorch/decode.py b/lmdeploy/pytorch/decode.py
deleted file mode 100644
index 594c7807bc542d73805316272de7c12630a69377..0000000000000000000000000000000000000000
--- a/lmdeploy/pytorch/decode.py
+++ /dev/null
@@ -1,428 +0,0 @@
-# Copyright (c) OpenMMLab. All rights reserved.
-import argparse
-import logging
-import queue
-import warnings
-from typing import List, Optional
-
-import pynvml
-import torch
-import torch.multiprocessing as mp
-from torch.nn.utils.rnn import pad_sequence
-from transformers import (AutoTokenizer, PreTrainedModel,
-                          PreTrainedTokenizerBase)
-
-from .model import accel_model, init_model
-
-
-def safe_numel(free_mem, model_size, max_intermediate):
-    """Number of elements without out-of-memory."""
-    return int(free_mem - model_size) // max_intermediate
-
-
-def avail_gpus(percentage=0.96):
-    """Detect available gpus.
-
-    Args:
-        percentage (float): The minimum percentage of free memory to be
-            considered as available.
-
-    Return:
-       A list of gpu ids.
-       average free memory on single gpu.
-    """
-
-    gpus = []
-    mems = []
-    pynvml.nvmlInit()
-    for i in range(torch.cuda.device_count()):
-        handle = pynvml.nvmlDeviceGetHandleByIndex(int(i))
-        mem_info = pynvml.nvmlDeviceGetMemoryInfo(handle)
-        free, total = int(mem_info.free), int(mem_info.total)
-
-        if free / total > percentage:
-            gpus.append(i)
-            mems.append(free)
-    pynvml.nvmlShutdown()
-
-    if len(gpus) == 0:
-        raise RuntimeError('No GPU available.')
-
-    return gpus, sum(mems) / len(mems)
-
-
-@torch.no_grad()
-def decode_single(model: PreTrainedModel,
-                  input_ids: torch.Tensor,
-                  attention_mask: torch.Tensor = None,
-                  return_logits=True):
-    """Decode a single batch.
-
-    Args:
-        model (PreTrainedModel): Pretrained model.
-        input_ids (torch.Tensor): A batch of input ids.
-        attention_mask (torch.Tensor): A batch of attention masks.
-
-    Returns:
-        torch.Tensor: A batch of probabilities (on CPU).
-
-
-    Note:
-        This function assume input_ids[i] = [bos, x1, x2, ..., xn]
-        and return prob = [p(x1|bos), p(x2|bos,x1), ..., p(xn|bos..xn-1)]
-        So prob is shorter than input_ids by 1.
-    """
-
-    # Call Causal LM forward
-    outputs = model(input_ids=input_ids,
-                    attention_mask=attention_mask,
-                    output_hidden_states=False,
-                    output_attentions=False,
-                    use_cache=False,
-                    return_dict=True)
-    # fp32, [bs, seq_len, vocab_size]
-    logits = outputs.logits
-
-    if not return_logits:
-        # inplace softmax to get probs
-        torch.softmax(logits, dim=-1, out=logits)
-
-        # Shift to fetch probabilities
-        shift_labels = input_ids[..., 1:].contiguous()
-        shift_probs = logits[..., :-1, :].contiguous()
-        logits = torch.gather(shift_probs, -1, shift_labels.unsqueeze(-1))
-
-    if attention_mask is not None:
-        logits *= attention_mask[..., None]
-
-    logits = logits.cpu()
-
-    return logits
-
-
-def worker_fn(model_path: str,
-              inq: mp.Queue,
-              outq: mp.Queue,
-              accel: Optional[str] = None,
-              gpu_id=0):
-    # torch.set_default_device(gpu_id)
-    model, _ = init_model(model_path)
-    model = model.eval()
-    model = accel_model(model, accel, gpu_id=gpu_id)
-
-    while True:
-        try:
-            idx, args = inq.get(timeout=1)
-        except queue.Empty:
-            continue
-
-        if idx is None:
-            print(f'Worker {gpu_id} received exit signal.')
-            break
-
-        # print(args)
-        input_ids, input_lens, *args = args
-
-        input_ids = input_ids.cuda(gpu_id)
-        max_len = max(input_lens)
-        assert max_len == input_ids.size(-1), \
-            f'input_ids.shape = {input_ids.shape}, max_len = {max_len}'
-
-        input_lens = torch.tensor(input_lens, device=gpu_id)
-        attention_mask = \
-            torch.arange(max_len, device=gpu_id)[None, :] < input_lens[:, None]
-
-        assert attention_mask.shape == input_ids.shape, \
-            f'attention_mask.shape = {attention_mask.shape}'
-
-        try:
-            probs = decode_single(model, input_ids, attention_mask, *args)
-        except torch.cuda.OutOfMemoryError:
-            warnings.warn(
-                f'OOM on GPU {gpu_id}, discard prompts at indics {idx}.')
-            probs = torch.empty((input_ids.size(0), 0),
-                                dtype=torch.float32,
-                                device='cpu')
-
-        outq.put((idx, probs))
-
-    print(f'Exiting worker {gpu_id} ...')
-    inq.close()
-    outq.close()
-    print(f'Worker {gpu_id} finished.')
-
-
-class Engine:
-    """Multi-GPU deciding engine.
-
-    Args:
-        model_path (str): Path to the pretrained model.
-        tokenizer_path (str, optional): Path to the pretrained tokenizer.
-            Defaults to None.
-            Either tokenizer_path or tokenizer should be provided.
-        tokenizer (PreTrainedTokenizerBase, optional): Pre-configured tokenizer.
-            Defaults to None.
-            Either tokenizer_path or tokenizer should be provided.
-        accel (str, optional): Acceleration method.
-            Defaults to None. 'deepspeed' is not tested.
-        gpu_mem_percentage (float, optional): GPU with memory larger than this value
-            are considered available and be used as decode device.
-            Defaults to 0.96.
-        model_size_byte (float, optional): (Approximate) model size in bytes.
-            Defaults to 14e9 (7B model in FP16).
-        bytes_per_token (float, optional): (Approximate) memory cost per token in bytes.
-            Defaults to 2e6 (2MB).
-            ``bytes_per_token`` and ``model_size_byte`` are used to compute
-            the maximum batch size for given seq_length
-    """  # noqa: E501
-
-    def __init__(self,
-                 model_path: str,
-                 tokenizer_path: Optional[str] = None,
-                 tokenizer: Optional[PreTrainedTokenizerBase] = None,
-                 accel: Optional[str] = None,
-                 gpu_mem_percentage: float = 0.96,
-                 model_size_byte=14e9,
-                 bytes_per_token=2e6):
-
-        gpu_ids, mem = avail_gpus(gpu_mem_percentage)
-        print(f'Available GPUs are: {gpu_ids}, ', end='')
-        print(f'with {mem/2**30:.2f} GiB free.')
-
-        ctx = mp.get_context('spawn')
-        inq = ctx.Queue()
-        outq = ctx.Queue()
-
-        ps = []
-        for id in gpu_ids:
-            p = ctx.Process(target=worker_fn,
-                            args=(model_path, inq, outq, accel, id))
-            p.start()
-            ps.append(p)
-
-        if tokenizer is None:
-
-            if tokenizer_path is None:
-                tokenizer_path = model_path
-
-            tokenizer = AutoTokenizer.from_pretrained(tokenizer_path)
-
-        self.gpu_ids = gpu_ids
-        self.inq = inq
-        self.outq = outq
-        self.ps = ps
-        self.tokenizer = tokenizer
-        self.safe_numel = safe_numel(mem, model_size_byte, bytes_per_token)
-
-    def clear_queue(self):
-        for q in self.inq, self.outq:
-            while not q.empty():
-                q.get()
-
-    def decode(self,
-               token_ids: List[List[int]],
-               sort=True,
-               max_bs: int = 1024,
-               pad=True,
-               pad_token_id=2,
-               return_logits=True):
-        """Inference the model to compute probabilities.
-
-        Args:
-            token_ids (List[List[int]]): List of list of token ids.
-            sort (bool, optional): Internally sort the prompts by length to achieve better efficiency.
-                Defaults to True.
-                Note: orders of returned probabilities are always the same as the input.
-            max_bs (int, optional): Maximum batch size.
-                Defaults to 1024.
-            pad (bool, optional): Pad the prompts in every mini batch to the same length.
-                Defaults to True. Set to False to save memory.
-            return_logits (bool, optional): Return logits instead of probabilities.
-
-        Returns:
-            numpy.ndarray: Array of logits of shape [bsz, seqlen, vocab_size],
-                with prob=0 padded, if pad is True
-            List[numpy.ndarray]: List of logits without padding, if pad is False.
-
-        Note:
-            This function will accept input token_ids = [x0(=bos), x1, x2, ..., xn]
-            and compute prob = [p(x1|x0), p(x2|x0,x1), ..., p(xn|x0..xn-1)]
-            So prob is shorter than input_ids by 1.
-        """  # noqa: E501
-
-        self.clear_queue()
-
-        # sort to achieve better efficiency
-        if sort:
-            pids_and_indicis = sorted(enumerate(token_ids),
-                                      key=lambda i_and_x: len(i_and_x[1]))
-        else:
-            pids_and_indicis = list(enumerate(token_ids))
-
-        left = 0
-        bs = max_bs
-
-        while left < len(token_ids):
-
-            if not sort:
-                bs = max_bs
-
-            right = min(left + bs, len(token_ids))
-
-            # batch of prompts
-            sub_p_and_i = pids_and_indicis[left:right]
-            idx, sub_p = zip(*sub_p_and_i)
-
-            # batch of input_ids and attn_masks
-            # inputs = self.tokenizer(sub_p, return_tensors='pt', padding=True)
-            input_ids = [torch.tensor(p) for p in sub_p]
-            input_ids = pad_sequence(input_ids,
-                                     batch_first=True,
-                                     padding_value=pad_token_id)
-            input_lens = [len(p) for p in sub_p]
-
-            # Dynamic batch size based on safe memory
-            while input_ids.numel() > self.safe_numel:
-                if bs == 1:
-                    break
-                bs = max(1, round(bs / 1.5))
-                print(f'\nReduce bs to {bs} when seq len reaches '
-                      f'{input_ids.shape[-1]}')
-                idx = idx[:bs]
-                input_lens = input_lens[:bs]
-                input_ids = input_ids[:bs, :max(input_lens)]
-
-            # Send to worker
-            self.inq.put((idx, (input_ids, input_lens)))
-
-            left += bs
-
-            print(
-                f'Distributing prompts {right}/{len(token_ids)},'
-                f' {right/len(token_ids):.0%}',
-                end='\r')
-
-        print()
-
-        # Collect outputs from workers
-        all_probs = [None] * len(token_ids)
-        count = 0
-
-        while count < len(token_ids):
-            idx, probs = self.outq.get()
-            for i, p in zip(idx, probs):
-                assert all_probs[i] is None
-                all_probs[i] = p
-
-            count += len(idx)
-            print(
-                f'Decoding and collecting outputs '
-                f'{count}/{len(token_ids)}, '
-                f'{count/len(token_ids):.0%}',
-                end='\r')
-
-        print()
-
-        if pad:
-            all_probs = pad_sequence(all_probs, batch_first=True)
-            all_probs = all_probs.cpu().numpy()
-        else:
-            all_probs = [p.cpu().numpy() for p in all_probs]
-
-        return all_probs
-
-    def __del__(self):
-        print('Exiting engine ...')
-        for _ in self.ps:
-            self.inq.put((None, None))
-        for p in self.ps:
-            p.join(timeout=1)
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser()
-    parser.add_argument('--model_path',
-                        default='llama2/huggingface/llama-2-7b',
-                        help='Path to HugigngFace model and tokenizer.')
-    parser.add_argument(
-        '--test_path',
-        default='',
-        help='Path to text file, with each line containing a prompt.')
-    parser.add_argument(
-        '-p',
-        '--prompts',
-        nargs='*',
-        default=[
-            'I believe the meaning of life is to find your gift.',
-            'Simply put, the theory of relativity states that',
-            'Building a website can be done in 10 simple steps:'
-        ],
-        help="Prompt in command line, please quote \"\" every sentences, "
-        'surpassed by --test_path')
-    parser.add_argument('--min_len',
-                        default=1,
-                        help='Minimum length of prompts')
-    parser.add_argument('--save-to',
-                        default='decode.out',
-                        help='Save results to this file.')
-    args = parser.parse_args()
-
-    model_path = args.model_path
-    test_path = args.test_path
-    prompts = args.prompts
-
-    logger = logging.getLogger(__name__)
-    # logging.basicConfig(level=logging.DEBUG)
-
-    # Use test file preferentially
-    if test_path:
-        with open(test_path, 'r') as f:
-            prompts = f.readlines()
-
-    prompts = [p.strip() for p in prompts]
-
-    # Output infos
-    print(f'Model path: {model_path}')
-
-    def _format(ts, start, end):
-        if start < 0:
-            start += len(ts)
-        if end <= 0:
-            end += len(ts)
-        return '\n'.join(
-            (f'{i}\t{t}' for i, t in zip(range(start, end), ts[start:end])))
-
-    if len(prompts) > 10:
-        print('Prompts:\n' + _format(prompts, 0, 5) + '\n......\n' +
-              _format(prompts, -5, 0))
-    else:
-        print('Prompts:\n' + _format(prompts, 0, 0))
-
-    # Init Engine in backend
-    engine = Engine(model_path)
-
-    # Tokenize
-    tokenizer = AutoTokenizer.from_pretrained(model_path)
-    tokenizer.pad_token_id = tokenizer.eos_token_id
-    tokenizer.padding_side = 'right'
-
-    input_ids = tokenizer(prompts, padding=False)
-    input_ids: List[List[int]] = input_ids.input_ids
-
-    # Filter out too short prompts
-    input_ids = [i for i in input_ids if len(i) >= args.min_len]
-    if len(input_ids) < len(prompts):
-        logger.warning(
-            f'Filtered out {len(prompts) - len(input_ids)} prompts, '
-            f'because they are shorter than {args.min_len}.')
-
-    # Decode
-    logits = engine.decode(input_ids)
-
-    print(f'logits.shape = {logits.shape}')
-    # Save to pth
-    print(f'Dumping results to = {args.save_to}')
-
-    torch.save(logits, args.save_to, pickle_protocol=4)
-
-    del engine
diff --git a/lmdeploy/pytorch/dist.py b/lmdeploy/pytorch/dist.py
deleted file mode 100644
index 2de51ed2192488e7b375c8ee32126917b4e016fa..0000000000000000000000000000000000000000
--- a/lmdeploy/pytorch/dist.py
+++ /dev/null
@@ -1,98 +0,0 @@
-# Copyright (c) OpenMMLab. All rights reserved.
-"""Helpers for parallel and distributed inference."""
-
-import functools
-import os
-
-import torch
-from torch.distributed import broadcast, broadcast_object_list, is_initialized
-
-
-def get_local_rank():
-    """Get local rank of current process.
-
-    Assume environment variable ``LOCAL_RANK`` is properly set by some launcher.
-    See: https://pytorch.org/docs/stable/elastic/run.html#environment-variables
-    """  # noqa: E501
-
-    return int(os.getenv('LOCAL_RANK', '0'))
-
-
-def get_rank():
-    """Get rank of current process.
-
-    Assume environment variable ``RANK`` is properly set by some launcher.
-    See: https://pytorch.org/docs/stable/elastic/run.html#environment-variables
-    """  # noqa: E501
-
-    return int(os.getenv('RANK', '0'))
-
-
-def get_world_size():
-    """Get rank of current process.
-
-    Assume environment variable ``WORLD_SIZE`` is properly set by some launcher.
-    See: https://pytorch.org/docs/stable/elastic/run.html#environment-variables
-    """  # noqa: E501
-
-    return int(os.getenv('WORLD_SIZE', '1'))
-
-
-def master_only(func):
-    """Decorator to run a function only on the master process."""
-
-    @functools.wraps(func)
-    def wrapper(*args, **kwargs):
-        if is_initialized():
-            if get_rank() != 0:
-                return None
-        return func(*args, **kwargs)
-
-    return wrapper
-
-
-def master_only_and_broadcast_general(func):
-    """Decorator to run a function only on the master process and broadcast the
-    result to all processes."""
-
-    @functools.wraps(func)
-    def wrapper(*args, **kwargs):
-        if is_initialized():
-            if get_rank() == 0:
-                result = [func(*args, **kwargs)]
-            else:
-                result = [None]
-            broadcast_object_list(result, src=0)
-            result = result[0]
-        else:
-            result = func(*args, **kwargs)
-        return result
-
-    return wrapper
-
-
-def master_only_and_broadcast_tensor(func):
-    """Decorator to run a function only on the master process and broadcast the
-    result to all processes.
-
-    Note: Require CUDA tensor.
-    Note: Not really work because we don't know the shape aforehand,
-          for cpu tensors, use master_only_and_broadcast_general
-    """
-
-    @functools.wraps(func)
-    def wrapper(*args, size, dtype, **kwargs):
-        if is_initialized():
-            if get_rank() == 0:
-                result = func(*args, **kwargs)
-            else:
-                result = torch.empty(size=size,
-                                     dtype=dtype,
-                                     device=get_local_rank())
-            broadcast(result, src=0)
-            # print(f'rank {get_rank()} received {result}')
-        else:
-            result = func(*args, **kwargs)
-        return result
-
-    return wrapper
diff --git a/lmdeploy/pytorch/model.py b/lmdeploy/pytorch/model.py
deleted file mode 100644
index 4a0476ff5640846dc6c100c025d5fc31754bf354..0000000000000000000000000000000000000000
--- a/lmdeploy/pytorch/model.py
+++ /dev/null
@@ -1,159 +0,0 @@
-# Copyright (c) OpenMMLab. All rights reserved.
-import logging
-import time
-import warnings
-from typing import Optional
-
-import torch
-from transformers import AutoModelForCausalLM, AutoTokenizer
-
-from .dist import get_local_rank
-
-logger = logging.getLogger(__name__)
-
-
-class LoadWoInit:
-    """Context manager that disable parameter initialization."""
-
-    def __init__(self):
-        self.constant_ = torch.nn.init.constant_
-        self.zeros_ = torch.nn.init.zeros_
-        self.ones_ = torch.nn.init.ones_
-        self.uniform_ = torch.nn.init.uniform_
-        self.normal_ = torch.nn.init.normal_
-        self.kaiming_uniform_ = torch.nn.init.kaiming_uniform_
-        self.kaiming_normal_ = torch.nn.init.kaiming_normal_
-
-    def __enter__(self, *args, **kwargs):
-        torch.nn.init.constant_ = lambda *args, **kwargs: None
-        torch.nn.init.zeros_ = lambda *args, **kwargs: None
-        torch.nn.init.ones_ = lambda *args, **kwargs: None
-        torch.nn.init.uniform_ = lambda *args, **kwargs: None
-        torch.nn.init.normal_ = lambda *args, **kwargs: None
-        torch.nn.init.kaiming_uniform_ = lambda *args, **kwargs: None
-        torch.nn.init.kaiming_normal_ = lambda *args, **kwargs: None
-
-    def __exit__(self, *args, **kwargs):
-        torch.nn.init.constant_ = self.constant_
-        torch.nn.init.zeros_ = self.zeros_
-        torch.nn.init.ones_ = self.ones_
-        torch.nn.init.uniform_ = self.uniform_
-        torch.nn.init.normal_ = self.normal_
-        torch.nn.init.kaiming_uniform_ = self.kaiming_uniform_
-        torch.nn.init.kaiming_normal_ = self.kaiming_normal_
-
-
-def init_model(model_path: str,
-               tokenizer_path: Optional[str] = None,
-               use_fast_tokenizer=True):
-    """Initialize model and tokenizer from given model path.
-
-    Args:
-        model_path (str): Path to model.
-        tokenizer_path (str): Path to tokenizer.
-        use_fast_tokenizer (bool): Whether to use fast tokenizer.
-
-    Note:
-        If the model is converted from new version of transformers,
-            use_fast_tokenizer should be True.
-        If using depodaca/llama-xb-hf, use_fast_tokenizer should be False.
-    """
-
-    start = time.monotonic()
-
-    if not tokenizer_path:
-        tokenizer_path = model_path
-
-    tokenizer = AutoTokenizer.from_pretrained(tokenizer_path,
-                                              use_fast=use_fast_tokenizer,
-                                              trust_remote_code=True)
-
-    with LoadWoInit():
-        model = AutoModelForCausalLM.from_pretrained(model_path,
-                                                     torch_dtype=torch.float16,
-                                                     trust_remote_code=True)
-
-    logger.info(f'Model loaded in {time.monotonic() - start:.1f} seconds')
-    logger.info(f'Model loaded from {model_path}')
-    logger.debug(model)
-
-    return model, tokenizer
-
-
-def accel_model(model,
-                accel: Optional[str] = None,
-                gpu_id=None,
-                max_alloc=2048,
-                tp_size=1):
-    """Accelerate model with given accelerator.
-
-    Note:
-        Currently we support only deepspeed or just no acceleration.
-    """
-
-    logger.info(f'Accelerate model with {accel}')
-
-    if accel is None:
-        # No acceleration, just to cuda
-        # assume single gpu single process
-        # user is responsible to assign the gpu id via CUDA_VISIBLE_DEVICES # noqa: E501
-        gpu_id = gpu_id if gpu_id is not None else get_local_rank()
-        model = model.cuda(gpu_id)
-
-    elif accel.lower() == 'deepspeed':
-        # Use deepspeed inference inject fast kernel and/or tensor parallel
-
-        try:
-            import deepspeed
-        except ImportError as e:
-            raise ImportError('--accel=deepspeed is specified but '
-                              'deepspeed is not installed.\n'
-                              'Install with `pip install deepspeed`.') from e
-
-        config = dict(
-            tensor_parallel=dict(tp_size=tp_size),  # Use world size in general
-            dtype=torch.float16,
-            replace_with_kernel_inject=True,
-            max_out_tokens=max_alloc,
-        )
-
-        if 'InternLM' in model.__class__.__name__:
-            try:
-                # Use customized deepspeed supporting InternLM
-                # https://github.com/wangruohui/DeepSpeed/tree/support_internlm_0.10.0 (commit cdef2ce)  # noqa: E501
-                from deepspeed.module_inject.containers.internlm import \
-                    InternLMLayerPolicy  # noqa: E501
-            except ImportError:
-                # InternLM is not officially supported by DeepSpeed
-                # Set replace_with_kernel_inject=False to use AutoTP
-                config.update({'replace_with_kernel_inject': False})
-                warnings.warn(
-                    '\033[0;93m'
-                    'Current installation of deepspeed does not '
-                    'support InternLM. Disable kernel injection. '
-                    'To support InternLM, install customized deepspeed with '
-                    '`pip install git+https://github.com/wangruohui/DeepSpeed@support_internlm_0.10.0`'  # noqa: E501
-                    '\033[0m')
-            else:
-                for module in model.modules():
-                    # Since remote code is dynamically located,
-                    # we need to do this dynamically
-                    if module.__class__.__name__ == 'InternLMDecoderLayer':
-                        InternLMLayerPolicy._orig_layer_class = module.__class__  # noqa: E501
-                        break
-
-        logger.debug(f'Using deepspeed config\n{config}')
-
-        model = deepspeed.init_inference(
-            model=model,  # Transformers models
-            config=config,
-        )
-
-        # for k, v in model.named_parameters():
-        #     logger.debug(f"{k}: v.device")
-    else:
-        raise ValueError(f'Unsupported accelerator {accel}.')
-
-    logger.debug(model)
-
-    return model
diff --git a/lmdeploy/pytorch/modules/__init__.py b/lmdeploy/pytorch/modules/__init__.py
deleted file mode 100644
index d184ea4d0046e4001d7e3692f81a9e980d054cb2..0000000000000000000000000000000000000000
--- a/lmdeploy/pytorch/modules/__init__.py
+++ /dev/null
@@ -1,4 +0,0 @@
-# Copyright (c) OpenMMLab. All rights reserved.
-from .linear import WeightOnlyQLinear
-
-__all__ = ['WeightOnlyQLinear']
diff --git a/lmdeploy/pytorch/modules/linear.py b/lmdeploy/pytorch/modules/linear.py
deleted file mode 100644
index bfde0d3d422bc066eacb5f7d1143f8a2a5595561..0000000000000000000000000000000000000000
--- a/lmdeploy/pytorch/modules/linear.py
+++ /dev/null
@@ -1,126 +0,0 @@
-# Copyright (c) OpenMMLab. All rights reserved.
-from typing import Optional, Type, TypeVar
-
-import torch
-from torch import nn
-
-
-class WeightOnlyQLinear(nn.Module):
-    """This class implements weight only quantization linear.
-
-    Args:
-        w_bit (int): number of bits for quantization.
-        symmetry (bool): If true, use symmetric quantization,
-            otherwise use asymmetric quantization.
-        group_size (int): size of the quantization group.
-        in_features (int): size of each input sample.
-        out_features (int): size of each output sample.
-        bias (Tensor, optional): Defaults to None.
-    """
-
-    def __init__(self,
-                 w_bit: int,
-                 symmetry: bool,
-                 group_size: int,
-                 in_features: int,
-                 out_features: int,
-                 bias: Optional[torch.Tensor] = None) -> None:
-        super().__init__()
-
-        if w_bit not in [2, 4, 8]:
-            raise NotImplementedError('Only 2,4,8 bit are supported for now.')
-
-        self.in_features = in_features
-        self.out_features = out_features
-        self.w_bit = w_bit
-        self.group_size = group_size if group_size != -1 else in_features
-
-        assert self.in_features % self.group_size == 0
-        assert out_features % (32 // self.w_bit) == 0
-
-        w_pack_oc = out_features // (32 // self.w_bit)
-        w_inc = in_features
-        weight = torch.zeros((w_inc, w_pack_oc), dtype=torch.int32)
-        self.register_buffer('qweight', weight)
-
-        if bias:
-            self.register_buffer('bias', torch.zeros(out_features))
-        else:
-            self.bias = None
-
-        s_inc = in_features // self.group_size
-        s_oc = out_features
-        scales = torch.zeros((s_inc, s_oc), dtype=torch.float16)
-        self.register_buffer('scales', scales)
-
-        if not symmetry:
-            z_inc = in_features // self.group_size
-            z_oc = out_features // (32 // self.w_bit)
-            zeros = torch.zeros((z_inc, z_oc), dtype=torch.int32)
-            self.register_buffer('qzeros', zeros)
-        else:
-            self.qzeros = None
-
-    @classmethod
-    def from_linear(cls: Type['WeightOnlyQLinear'],
-                    linear: nn.Linear,
-                    quantizer: TypeVar('Quantizer'),
-                    awq_layout: bool = True) -> 'WeightOnlyQLinear':
-        """Create a WeightOnlyQLinear object from a PyTorch Linear object.
-
-        Args:
-            linear (nn.Linear): PyTorch Linear object.
-            quantizer (Quantizer): Object that handles quantization.
-            awq_layout (bool): AWQ layout. Defaults to True.
-
-        Returns:
-            WeightOnlyQLinear: A WeightOnlyQLinear object.
-        """
-        device = linear.weight.device
-
-        w_bit = quantizer.bits
-        pack_num = 32 // w_bit
-        if awq_layout:
-            assert w_bit == 4
-            pack_order = [0, 2, 4, 6, 1, 3, 5, 7]
-        else:
-            pack_order = torch.arange(pack_num)
-        group_size = quantizer.group_size
-        symmetry = quantizer.symmetry
-
-        in_features = linear.in_features
-        out_features = linear.out_features
-        bias = False if linear.bias is None else True
-
-        qlinear = cls(w_bit, symmetry, group_size, in_features, out_features,
-                      bias)
-        qlinear.bias = linear.bias
-
-        qparams = quantizer.calculate_qparams(linear.weight)
-        i32_w = quantizer.quant(linear.weight, qparams, real=True)
-        i32_w = i32_w.t().contiguous()
-
-        pack_int_w = torch.zeros_like(qlinear.qweight).to(device)
-
-        for col in range(pack_int_w.shape[1]):
-            for i in range(pack_num):
-                pack_int_w_col = i32_w[:, col * pack_num + pack_order[i]]
-                pack_int_w[:, col] |= pack_int_w_col << (i * w_bit)
-
-        qlinear.qweight = pack_int_w
-        qlinear.scales = qparams.scales.squeeze(-1).t().contiguous()
-
-        if qparams.zero_points is not None:
-            zeros = qparams.zero_points.to(torch.int32).to(device)
-            zeros = zeros.squeeze(-1).t().contiguous()
-            pack_int_zeros = torch.zeros_like(qlinear.qzeros).to(device)
-
-            for col in range(pack_int_zeros.shape[1]):
-                for i in range(pack_num):
-                    qzero_col = zeros[:, col * pack_num + pack_order[i]]
-                    pack_int_zeros[:, col] |= qzero_col << (i * w_bit)
-            qlinear.qzeros = pack_int_zeros
-
-        qlinear.to('cpu')
-
-        return qlinear
diff --git a/lmdeploy/pytorch/session.py b/lmdeploy/pytorch/session.py
deleted file mode 100644
index 4689b258b9d88701a03878a762ea83540d952683..0000000000000000000000000000000000000000
--- a/lmdeploy/pytorch/session.py
+++ /dev/null
@@ -1,81 +0,0 @@
-# Copyright (c) OpenMMLab. All rights reserved.
-import logging
-
-import torch
-from transformers.generation.utils import ModelOutput
-
-logger = logging.getLogger(__name__)
-
-
-class BasicSessionManager:
-    """Basic session manager without history."""
-
-    def prepend_history(self, input_ids):
-        return input_ids
-
-    def add_to_history(self, output):
-        pass
-
-
-class BasicSessionManagerWithHistory:
-    """Basic session manager with chat history.
-
-    Args:
-        max_session_len (int): Maximum number of tokens allowed for all chat sessions.
-        reduce_size (int): Number of tokens to be trimmed when reaching maximum
-            session length. Default: 256.
-        start_ids (list[int]): Sequences of ids at the start of the chat session.
-        sep_ids (list[int]): Sequences of ids separating chat sessions.
-    """   # noqa: E501
-    bs = 1
-
-    def __init__(self,
-                 max_session_len=2048,
-                 reduce_size=256,
-                 start_ids=[1],
-                 sep_ids=[13]) -> None:
-
-        self.start_ids = torch.tensor(start_ids, dtype=torch.long)
-        self.sep_ids = torch.tensor(sep_ids, dtype=torch.long)
-
-        assert self.start_ids.ndim == 1
-        assert self.sep_ids.ndim == 1
-
-        self.max_session_len = max(len(start_ids), max_session_len)
-        self.reduce_size = min(reduce_size, max_session_len - len(start_ids))
-
-        assert self.max_session_len > self.reduce_size
-
-        self.new_session()
-
-    def new_session(self):
-        self.history_ids = self.start_ids.repeat(self.bs, 1)
-
-    def prepend_history(self, input_ids: torch.Tensor):
-        """Prepend history ids to input ids and trim if over-length."""
-
-        input_ids = input_ids.to(self.history_ids.device).long()
-        sep_ids = self.sep_ids.to(self.history_ids.device).long().repeat(1, 1)
-        input_ids = torch.cat([self.history_ids, sep_ids, input_ids], dim=1)
-
-        if input_ids.shape[1] > self.max_session_len:
-            input_ids = input_ids[:,
-                                  (self.reduce_size - self.max_session_len):]
-            input_ids[:, :len(self.start_ids)] = self.start_ids.repeat(
-                self.bs, 1)
-        return input_ids
-
-    def add_to_history(self, output):
-        """Save history output ids.
-
-        Note:
-            Output returned by HuggingFace generator contains both input
-            and output ids.
-        """
-
-        if isinstance(output, ModelOutput):
-            self.history_ids = output.sequences
-        elif isinstance(output, torch.Tensor):
-            self.history_ids = output
-        else:
-            raise ValueError(f'Unknown output type {type(output)}')
diff --git a/lmdeploy/pytorch/utils.py b/lmdeploy/pytorch/utils.py
deleted file mode 100644
index 8f88e327948fcc50071d7b3380af7ef141f4800b..0000000000000000000000000000000000000000
--- a/lmdeploy/pytorch/utils.py
+++ /dev/null
@@ -1,102 +0,0 @@
-# Copyright (c) OpenMMLab. All rights reserved.
-
-import logging
-
-from transformers.generation.streamers import BaseStreamer
-
-from .dist import get_rank, master_only, master_only_and_broadcast_general
-
-try:
-    import readline  # To support command line history # noqa: F401
-except ImportError:  # readline not available
-    pass
-
-logger = logging.getLogger(__name__)
-
-
-class TerminalIO:
-    """Terminal input and output."""
-
-    end_of_output = '\n'
-
-    @master_only_and_broadcast_general
-    def input(self):
-        """Read input from terminal."""
-
-        print('\ndouble enter to end input >>> ', end='')
-        sentinel = ''  # ends when this string is seen
-        try:
-            return '\n'.join(iter(input, sentinel))
-        except EOFError:
-            print('Detect EOF, exit')
-            exit()
-
-    @master_only
-    def output(self, string):
-        """Output to terminal with flush."""
-
-        print(string, end='', flush=True)
-
-
-class BasicStreamer(BaseStreamer):
-    """Basic streamer for HuggingFace models."""
-
-    def __init__(self,
-                 decode_func,
-                 output_func,
-                 end_of_output='\n',
-                 skip_prompt=True):
-        self.decode = decode_func
-        self.output = output_func
-        self.end_of_output = end_of_output
-        self.skip_prompt = skip_prompt
-        self.gen_len = 0
-
-    def put(self, value):
-        """Callback before forwarding current token id to model."""
-
-        if self.gen_len == 0 and self.skip_prompt:
-            pass
-        else:
-            token = self.decode(value)
-            self.output(token)
-
-        self.gen_len += 1
-
-    def end(self):
-        """Callback at the end of generation."""
-        self.output(self.end_of_output)
-        self.gen_len = 0
-
-
-def control(prompt, gen_config, sm):
-    """Allow user to control generation config and session manager.
-
-    Return:
-        True if control command applied, False otherwise.
-    """
-
-    if prompt == 'exit':
-        exit(0)
-
-    if prompt == 'clear':
-        sm.new_session()
-        logger.info('Session cleared')
-        return True
-
-    # Re-config during runtime
-    if prompt.startswith('config set'):
-        try:
-            keqv = prompt.split()[-1]
-            k, v = keqv.split('=')
-            v = eval(v)
-            gen_config.__setattr__(k, v)
-            logger.info(f'Worker {get_rank()} set {k} to {repr(v)}')
-            logger.info(f'Generator config changed to: {gen_config}')
-
-            return True
-        except:  # noqa
-            logger.info(
-                'illegal instruction, treated as normal conversation. ')
-
-    return False
diff --git a/lmdeploy/serve/__init__.py b/lmdeploy/serve/__init__.py
deleted file mode 100644
index ef101fec61e72abc0eb90266d453b5b22331378d..0000000000000000000000000000000000000000
--- a/lmdeploy/serve/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-# Copyright (c) OpenMMLab. All rights reserved.
diff --git a/lmdeploy/serve/async_engine.py b/lmdeploy/serve/async_engine.py
deleted file mode 100644
index 4aba9dce7f1a79d82cee1311aae6fb41fc52dbf1..0000000000000000000000000000000000000000
--- a/lmdeploy/serve/async_engine.py
+++ /dev/null
@@ -1,322 +0,0 @@
-# Copyright (c) OpenMMLab. All rights reserved.
-import asyncio
-import dataclasses
-import os.path as osp
-import random
-from contextlib import contextmanager
-from typing import List, Literal, Optional
-
-from lmdeploy.model import MODELS, BaseModel
-
-
-@dataclasses.dataclass
-class GenOut:
-    """Pack all response information together."""
-    response: str
-    history_token_len: int
-    input_token_len: int
-    generate_token_len: int
-    finish_reason: Optional[Literal['stop', 'length']] = None
-
-
-class AsyncEngine:
-    """Async inference engine. Maintaining a bunch of tm_model instances.
-
-    Args:
-        model_path (str): the path of the deployed model
-        instance_num (int): instance numbers to be created
-        tp (int): tensor parallel
-    """
-
-    def __init__(self, model_path, instance_num=32, tp=1) -> None:
-        from lmdeploy import turbomind as tm
-        from lmdeploy.tokenizer import Tokenizer
-        tokenizer_model_path = osp.join(model_path, 'triton_models',
-                                        'tokenizer')
-        tokenizer = Tokenizer(tokenizer_model_path)
-        self.tm_model = tm.TurboMind(model_path,
-                                     eos_id=tokenizer.eos_token_id,
-                                     tp=tp)
-        self.tokenizer = tokenizer
-        self.generators = [
-            self.tm_model.create_instance() for i in range(instance_num)
-        ]
-        self.instance_num = instance_num
-        self.model: BaseModel = MODELS.get(self.tm_model.model_name)()
-        self.available = [True] * instance_num
-        self.starts = [None] * instance_num
-        self.steps = {}
-        self.loop = asyncio.get_event_loop()
-
-    def stop_session(self, session_id: int):
-        instance_id = session_id % self.instance_num
-        input_ids = self.tokenizer.encode('')
-        for outputs in self.generators[instance_id].stream_infer(
-                session_id,
-                input_ids,
-                request_output_len=0,
-                sequence_start=False,
-                sequence_end=False,
-                stop=True):
-            pass
-        self.available[instance_id] = True
-
-    @contextmanager
-    def safe_run(self, instance_id: int, session_id: Optional[int] = None):
-        self.available[instance_id] = False
-        try:
-            yield
-        except (Exception, asyncio.CancelledError) as e:  # noqa
-            self.stop_session(session_id)
-        self.available[instance_id] = True
-
-    async def get_embeddings(self, prompt, do_prerpocess=False):
-        if do_prerpocess:
-            prompt = self.model.get_prompt(prompt)
-        input_ids = self.tokenizer.encode(prompt)
-        return input_ids
-
-    async def get_generator(self, instance_id: int, stop: bool = False):
-        """Only return the model instance if it is available."""
-        if not stop:
-            while self.available[instance_id] is False:
-                await asyncio.sleep(0.1)
-        return self.generators[instance_id]
-
-    def batch_infer(self,
-                    prompts: List[str],
-                    request_output_len=512,
-                    top_k=40,
-                    top_p=0.8,
-                    temperature=0.8,
-                    repetition_penalty=1.0,
-                    ignore_eos=False,
-                    **kwargs):
-        """Inference a batch of prompts.
-
-        Args:
-            prompts (List[str]): a batch of prompts
-            request_output_len (int): output token nums
-            top_k (int): The number of the highest probability vocabulary
-              tokens to keep for top-k-filtering
-            top_p (float): If set to float < 1, only the smallest set of most
-              probable tokens with probabilities that add up to top_p or higher
-            are kept for generation.
-            temperature (float): to modulate the next token probability
-            repetition_penalty (float): The parameter for repetition penalty.
-              1.0 means no penalty
-            ignore_eos (bool): indicator for ignoring eos
-        """
-        assert isinstance(prompts, List), 'prompts should be a list'
-        batch_size = len(prompts)
-        outputs = [''] * batch_size
-        generators = []
-        for i, prompt in enumerate(prompts):
-            generators.append(
-                self.generate(prompt,
-                              i,
-                              stream_response=True,
-                              sequence_start=True,
-                              sequence_end=True,
-                              request_output_len=request_output_len,
-                              top_k=top_k,
-                              top_p=top_p,
-                              temperature=temperature,
-                              ignore_eos=ignore_eos,
-                              repetition_penalty=repetition_penalty))
-
-        async def _inner_call(i, generator):
-            async for out in generator:
-                outputs[i] += out.response
-
-        async def gather():
-            await asyncio.gather(
-                *[_inner_call(i, generators[i]) for i in range(batch_size)])
-
-        self.loop.run_until_complete(gather())
-        return outputs
-
-    async def generate(
-        self,
-        messages,
-        session_id,
-        stream_response=True,
-        sequence_start=True,
-        sequence_end=False,
-        step=0,
-        request_output_len=512,
-        stop=False,
-        top_k=40,
-        top_p=0.8,
-        temperature=0.8,
-        repetition_penalty=1.0,
-        ignore_eos=False,
-    ):
-        """Generate responses.
-
-        Args:
-            messages (str | List): chat history or prompt
-            session_id (int): the session id
-            stream_response (bool): whether return responses streamingly
-            request_output_len (int): output token nums
-            sequence_start (bool): indicator for starting a sequence
-            sequence_end (bool): indicator for ending a sequence
-            step (int): the offset of the k/v cache
-            stop (bool): whether stop inference
-            top_k (int): The number of the highest probability vocabulary
-              tokens to keep for top-k-filtering
-            top_p (float): If set to float < 1, only the smallest set of most
-              probable tokens with probabilities that add up to top_p or higher
-              are kept for generation.
-            temperature (float): to modulate the next token probability
-            repetition_penalty (float): The parameter for repetition penalty.
-              1.0 means no penalty
-            ignore_eos (bool): indicator for ignoring eos
-        """
-        instance_id = session_id % self.instance_num
-        if str(session_id) not in self.steps:
-            self.steps[str(session_id)] = 0
-        if step != 0:
-            self.steps[str(session_id)] = step
-        seed = random.getrandbits(64)
-        prompt = self.model.messages2prompt(messages, sequence_start)
-        input_ids = self.tokenizer.encode(prompt)
-        finish_reason = 'stop' if stop else None
-        if self.steps[str(session_id)] + len(
-                input_ids) >= self.tm_model.session_len:
-            finish_reason = 'length'
-            yield GenOut('', self.steps[str(session_id)], len(input_ids), 0,
-                         finish_reason)
-        else:
-            generator = await self.get_generator(instance_id, stop)
-            with self.safe_run(instance_id, session_id):
-                response_size = 0
-                async for outputs in generator.async_stream_infer(
-                        session_id=session_id,
-                        input_ids=[input_ids],
-                        stream_output=stream_response,
-                        request_output_len=request_output_len,
-                        sequence_start=(sequence_start),
-                        sequence_end=sequence_end,
-                        step=self.steps[str(session_id)],
-                        stop=stop,
-                        top_k=top_k,
-                        top_p=top_p,
-                        temperature=temperature,
-                        repetition_penalty=repetition_penalty,
-                        ignore_eos=ignore_eos,
-                        random_seed=seed if sequence_start else None):
-                    res, tokens = outputs[0]
-                    # decode res
-                    response = self.tokenizer.decode(res.tolist(),
-                                                     offset=response_size)
-                    # utf-8 char at the end means it's a potential unfinished
-                    # byte sequence, continue to concate it with the next
-                    # sequence and decode them together
-                    if response.endswith('�'):
-                        continue
-                    # response, history token len,
-                    # input token len, gen token len
-                    yield GenOut(response, self.steps[str(session_id)],
-                                 len(input_ids), tokens, finish_reason)
-                    response_size = tokens
-
-                # update step
-                self.steps[str(session_id)] += len(input_ids) + tokens
-                if sequence_end or stop:
-                    self.steps[str(session_id)] = 0
-
-    async def generate_openai(
-        self,
-        messages,
-        instance_id,
-        stream_response=True,
-        renew_session=False,
-        request_output_len=512,
-        stop=False,
-        top_k=40,
-        top_p=0.8,
-        temperature=0.8,
-        repetition_penalty=1.0,
-        ignore_eos=False,
-    ):
-        """Generate responses.
-
-        Args:
-            messages (str | List): chat history or prompt
-            instance_id (int): actually request host ip
-            stream_response (bool): whether return responses streamingly
-            renew_session (bool): renew the session
-            request_output_len (int): output token nums
-            stop (bool): whether stop inference
-            top_k (int): The number of the highest probability vocabulary
-              tokens to keep for top-k-filtering
-            top_p (float): If set to float < 1, only the smallest set of most
-              probable tokens with probabilities that add up to top_p or higher
-              are kept for generation.
-            temperature (float): to modulate the next token probability
-            repetition_penalty (float): The parameter for repetition penalty.
-              1.0 means no penalty
-            ignore_eos (bool): indicator for ignoring eos
-        """
-        session_id = instance_id
-        instance_id %= self.instance_num
-        sequence_start = False
-        generator = await self.get_generator(instance_id)
-        if renew_session:  # renew a session
-            empty_input_ids = self.tokenizer.encode('')
-            for outputs in generator.stream_infer(session_id=session_id,
-                                                  input_ids=[empty_input_ids],
-                                                  request_output_len=0,
-                                                  sequence_start=False,
-                                                  sequence_end=True,
-                                                  stop=True):
-                pass
-            self.steps[str(session_id)] = 0
-        if str(session_id) not in self.steps:
-            self.steps[str(session_id)] = 0
-        if self.steps[str(session_id)] == 0:
-            sequence_start = True
-        seed = random.getrandbits(64)
-        prompt = self.model.messages2prompt(messages, sequence_start)
-        input_ids = self.tokenizer.encode(prompt)
-        finish_reason = 'stop' if stop else None
-        if self.steps[str(session_id)] + len(
-                input_ids) >= self.tm_model.session_len:
-            finish_reason = 'length'
-            yield GenOut('', self.steps[str(session_id)], len(input_ids), 0,
-                         finish_reason)
-        else:
-            with self.safe_run(instance_id, session_id):
-                response_size = 0
-                async for outputs in generator.async_stream_infer(
-                        session_id=session_id,
-                        input_ids=[input_ids],
-                        stream_output=stream_response,
-                        request_output_len=request_output_len,
-                        sequence_start=(sequence_start),
-                        sequence_end=False,
-                        step=self.steps[str(session_id)],
-                        stop=stop,
-                        top_k=top_k,
-                        top_p=top_p,
-                        temperature=temperature,
-                        repetition_penalty=repetition_penalty,
-                        ignore_eos=ignore_eos,
-                        random_seed=seed if sequence_start else None):
-                    res, tokens = outputs[0]
-                    # decode res
-                    response = self.tokenizer.decode(res.tolist(),
-                                                     offset=response_size)
-                    # utf-8 char at the end means it's a potential unfinished
-                    # byte sequence, continue to concate it with the next
-                    # sequence and decode them together
-                    if response.endswith('�'):
-                        continue
-                    # response, history len, input len, generation len
-                    yield GenOut(response, self.steps[str(session_id)],
-                                 len(input_ids), tokens, finish_reason)
-                    response_size = tokens
-
-                # update step
-                self.steps[str(session_id)] += len(input_ids) + tokens
diff --git a/lmdeploy/serve/client.py b/lmdeploy/serve/client.py
deleted file mode 100644
index 424e83143fb82515bf35b407a177108cf3fd14be..0000000000000000000000000000000000000000
--- a/lmdeploy/serve/client.py
+++ /dev/null
@@ -1,69 +0,0 @@
-# Copyright (c) OpenMMLab. All rights reserved.
-import os
-
-from lmdeploy.serve.turbomind.chatbot import Chatbot
-
-
-def input_prompt(model_name):
-    """Input a prompt in the consolo interface."""
-    if model_name == 'codellama':
-        print('\nenter !! to end the input >>>\n', end='')
-        sentinel = '!!'
-    else:
-        print('\ndouble enter to end input >>> ', end='')
-        sentinel = ''  # ends when this string is seen
-    return '\n'.join(iter(input, sentinel))
-
-
-def main(tritonserver_addr: str,
-         session_id: int = 1,
-         cap: str = 'chat',
-         stream_output: bool = True,
-         **kwargs):
-    """An example to communicate with inference server through the command line
-    interface.
-
-    Args:
-        tritonserver_addr (str): the address in format "ip:port" of
-          triton inference server
-        session_id (int): the identical id of a session
-        cap (str): the capability of a model. For example, codellama has
-            the ability among ['completion', 'infill', 'instruct', 'python']
-        stream_output (bool): indicator for streaming output or not
-        **kwargs (dict): other arguments for initializing model's chat template
-    """
-    log_level = os.environ.get('SERVICE_LOG_LEVEL', 'WARNING')
-    kwargs.update(capability=cap)
-    chatbot = Chatbot(tritonserver_addr,
-                      log_level=log_level,
-                      display=stream_output,
-                      **kwargs)
-    nth_round = 1
-    while True:
-        prompt = input_prompt(chatbot.model_name)
-        if prompt == 'exit':
-            exit(0)
-        elif prompt == 'end':
-            chatbot.end(session_id)
-        else:
-            request_id = f'{session_id}-{nth_round}'
-            if stream_output:
-                for status, res, n_token in chatbot.stream_infer(
-                        session_id,
-                        prompt,
-                        request_id=request_id,
-                        request_output_len=512):
-                    continue
-            else:
-                status, res, n_token = chatbot.infer(session_id,
-                                                     prompt,
-                                                     request_id=request_id,
-                                                     request_output_len=512)
-                print(res)
-        nth_round += 1
-
-
-if __name__ == '__main__':
-    import fire
-
-    fire.Fire(main)
diff --git a/lmdeploy/serve/gradio/__init__.py b/lmdeploy/serve/gradio/__init__.py
deleted file mode 100644
index ef101fec61e72abc0eb90266d453b5b22331378d..0000000000000000000000000000000000000000
--- a/lmdeploy/serve/gradio/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-# Copyright (c) OpenMMLab. All rights reserved.
diff --git a/lmdeploy/serve/gradio/app.py b/lmdeploy/serve/gradio/app.py
deleted file mode 100644
index 5c200517be77e677904fd739fb7dfa40345b04f3..0000000000000000000000000000000000000000
--- a/lmdeploy/serve/gradio/app.py
+++ /dev/null
@@ -1,543 +0,0 @@
-# Copyright (c) OpenMMLab. All rights reserved.
-import os
-import threading
-import time
-from functools import partial
-from typing import Sequence
-
-import gradio as gr
-
-from lmdeploy.serve.async_engine import AsyncEngine
-from lmdeploy.serve.gradio.css import CSS
-from lmdeploy.serve.openai.api_client import (get_model_list,
-                                              get_streaming_response)
-from lmdeploy.serve.openai.api_server import ip2id
-from lmdeploy.serve.turbomind.chatbot import Chatbot
-
-THEME = gr.themes.Soft(
-    primary_hue=gr.themes.colors.blue,
-    secondary_hue=gr.themes.colors.sky,
-    font=[gr.themes.GoogleFont('Inconsolata'), 'Arial', 'sans-serif'])
-
-enable_btn = gr.Button.update(interactive=True)
-disable_btn = gr.Button.update(interactive=False)
-
-
-def chat_stream(state_chatbot: Sequence, llama_chatbot: Chatbot,
-                request: gr.Request):
-    """Chat with AI assistant.
-
-    Args:
-        instruction (str): user's prompt
-        state_chatbot (Sequence): the chatting history
-        llama_chatbot (Chatbot): the instance of a chatbot
-        request (gr.Request): the request from a user
-        model_name (str): the name of deployed model
-    """
-    instruction = state_chatbot[-1][0]
-    session_id = threading.current_thread().ident
-    if request is not None:
-        session_id = ip2id(request.kwargs['client']['host'])
-
-    bot_response = llama_chatbot.stream_infer(
-        session_id, instruction, f'{session_id}-{len(state_chatbot)}')
-
-    for status, tokens, _ in bot_response:
-        state_chatbot[-1] = (state_chatbot[-1][0], tokens)
-        yield (state_chatbot, state_chatbot, '')
-
-    return (state_chatbot, state_chatbot, '')
-
-
-def reset_all_func(instruction_txtbox: gr.Textbox, state_chatbot: gr.State,
-                   llama_chatbot: gr.State, triton_server_addr: str,
-                   model_name: str):
-    """reset the session."""
-    state_chatbot = []
-    log_level = os.environ.get('SERVICE_LOG_LEVEL', 'INFO')
-    llama_chatbot = Chatbot(triton_server_addr,
-                            model_name,
-                            log_level=log_level,
-                            display=True)
-
-    return (
-        llama_chatbot,
-        state_chatbot,
-        state_chatbot,
-        gr.Textbox.update(value=''),
-    )
-
-
-def cancel_func(
-    instruction_txtbox: gr.Textbox,
-    state_chatbot: gr.State,
-    llama_chatbot: gr.State,
-):
-    """cancel the session."""
-    session_id = llama_chatbot._session.session_id
-    llama_chatbot.cancel(session_id)
-
-    return (
-        llama_chatbot,
-        state_chatbot,
-    )
-
-
-def add_instruction(instruction, state_chatbot):
-    state_chatbot = state_chatbot + [(instruction, None)]
-    return ('', state_chatbot)
-
-
-def run_server(triton_server_addr: str,
-               server_name: str = 'localhost',
-               server_port: int = 6006):
-    """chat with AI assistant through web ui.
-
-    Args:
-        triton_server_addr (str): the communication address of inference server
-        server_name (str): the ip address of gradio server
-        server_port (int): the port of gradio server
-    """
-    with gr.Blocks(css=CSS, theme=THEME) as demo:
-        log_level = os.environ.get('SERVICE_LOG_LEVEL', 'INFO')
-        llama_chatbot = gr.State(
-            Chatbot(triton_server_addr, log_level=log_level, display=True))
-        state_chatbot = gr.State([])
-        model_name = llama_chatbot.value.model_name
-        reset_all = partial(reset_all_func,
-                            model_name=model_name,
-                            triton_server_addr=triton_server_addr)
-
-        with gr.Column(elem_id='container'):
-            gr.Markdown('## LMDeploy Playground')
-
-            chatbot = gr.Chatbot(elem_id='chatbot', label=model_name)
-            instruction_txtbox = gr.Textbox(
-                placeholder='Please input the instruction',
-                label='Instruction')
-            with gr.Row():
-                cancel_btn = gr.Button(value='Cancel')
-                reset_btn = gr.Button(value='Reset')
-
-        send_event = instruction_txtbox.submit(
-            add_instruction, [instruction_txtbox, state_chatbot],
-            [instruction_txtbox, state_chatbot]).then(
-                chat_stream, [state_chatbot, llama_chatbot],
-                [state_chatbot, chatbot])
-
-        cancel_btn.click(cancel_func,
-                         [instruction_txtbox, state_chatbot, llama_chatbot],
-                         [llama_chatbot, chatbot],
-                         cancels=[send_event])
-
-        reset_btn.click(
-            reset_all, [instruction_txtbox, state_chatbot, llama_chatbot],
-            [llama_chatbot, state_chatbot, chatbot, instruction_txtbox],
-            cancels=[send_event])
-
-    print(f'server is gonna mount on: http://{server_name}:{server_port}')
-    demo.queue(concurrency_count=4, max_size=100, api_open=True).launch(
-        max_threads=10,
-        share=True,
-        server_port=server_port,
-        server_name=server_name,
-    )
-
-
-# a IO interface mananing variables
-class InterFace:
-    async_engine: AsyncEngine = None  # for run_local
-    restful_api_url: str = None  # for run_restful
-
-
-def chat_stream_restful(
-    instruction: str,
-    state_chatbot: Sequence,
-    cancel_btn: gr.Button,
-    reset_btn: gr.Button,
-    request: gr.Request,
-):
-    """Chat with AI assistant.
-
-    Args:
-        instruction (str): user's prompt
-        state_chatbot (Sequence): the chatting history
-        request (gr.Request): the request from a user
-    """
-    session_id = threading.current_thread().ident
-    if request is not None:
-        session_id = ip2id(request.kwargs['client']['host'])
-    bot_summarized_response = ''
-    state_chatbot = state_chatbot + [(instruction, None)]
-
-    yield (state_chatbot, state_chatbot, disable_btn, enable_btn,
-           f'{bot_summarized_response}'.strip())
-
-    for response, tokens, finish_reason in get_streaming_response(
-            instruction,
-            f'{InterFace.restful_api_url}/generate',
-            session_id=session_id,
-            request_output_len=512,
-            sequence_start=(len(state_chatbot) == 1),
-            sequence_end=False):
-        if finish_reason == 'length':
-            gr.Warning('WARNING: exceed session max length.'
-                       ' Please restart the session by reset button.')
-        if tokens < 0:
-            gr.Warning('WARNING: running on the old session.'
-                       ' Please restart the session by reset button.')
-        if state_chatbot[-1][-1] is None:
-            state_chatbot[-1] = (state_chatbot[-1][0], response)
-        else:
-            state_chatbot[-1] = (state_chatbot[-1][0],
-                                 state_chatbot[-1][1] + response
-                                 )  # piece by piece
-        yield (state_chatbot, state_chatbot, enable_btn, disable_btn,
-               f'{bot_summarized_response}'.strip())
-
-    yield (state_chatbot, state_chatbot, disable_btn, enable_btn,
-           f'{bot_summarized_response}'.strip())
-
-
-def reset_restful_func(instruction_txtbox: gr.Textbox, state_chatbot: gr.State,
-                       request: gr.Request):
-    """reset the session.
-
-    Args:
-        instruction_txtbox (str): user's prompt
-        state_chatbot (Sequence): the chatting history
-        request (gr.Request): the request from a user
-    """
-    state_chatbot = []
-
-    session_id = threading.current_thread().ident
-    if request is not None:
-        session_id = ip2id(request.kwargs['client']['host'])
-    # end the session
-    for response, tokens, finish_reason in get_streaming_response(
-            '',
-            f'{InterFace.restful_api_url}/generate',
-            session_id=session_id,
-            request_output_len=0,
-            sequence_start=False,
-            sequence_end=True):
-        pass
-
-    return (
-        state_chatbot,
-        state_chatbot,
-        gr.Textbox.update(value=''),
-    )
-
-
-def cancel_restful_func(state_chatbot: gr.State, cancel_btn: gr.Button,
-                        reset_btn: gr.Button, request: gr.Request):
-    """stop the session.
-
-    Args:
-        instruction_txtbox (str): user's prompt
-        state_chatbot (Sequence): the chatting history
-        request (gr.Request): the request from a user
-    """
-    session_id = threading.current_thread().ident
-    if request is not None:
-        session_id = ip2id(request.kwargs['client']['host'])
-    # end the session
-    for out in get_streaming_response('',
-                                      f'{InterFace.restful_api_url}/generate',
-                                      session_id=session_id,
-                                      request_output_len=0,
-                                      sequence_start=False,
-                                      sequence_end=False,
-                                      stop=True):
-        pass
-    time.sleep(0.5)
-    messages = []
-    for qa in state_chatbot:
-        messages.append(dict(role='user', content=qa[0]))
-        if qa[1] is not None:
-            messages.append(dict(role='assistant', content=qa[1]))
-    for out in get_streaming_response(messages,
-                                      f'{InterFace.restful_api_url}/generate',
-                                      session_id=session_id,
-                                      request_output_len=0,
-                                      sequence_start=True,
-                                      sequence_end=False):
-        pass
-    return (state_chatbot, disable_btn, enable_btn)
-
-
-def run_restful(restful_api_url: str,
-                server_name: str = 'localhost',
-                server_port: int = 6006,
-                batch_size: int = 32):
-    """chat with AI assistant through web ui.
-
-    Args:
-        restful_api_url (str): restufl api url
-        server_name (str): the ip address of gradio server
-        server_port (int): the port of gradio server
-        batch_size (int): batch size for running Turbomind directly
-    """
-    InterFace.restful_api_url = restful_api_url
-    model_names = get_model_list(f'{restful_api_url}/v1/models')
-    model_name = ''
-    if isinstance(model_names, list) and len(model_names) > 0:
-        model_name = model_names[0]
-    else:
-        raise ValueError('gradio can find a suitable model from restful-api')
-
-    with gr.Blocks(css=CSS, theme=THEME) as demo:
-        state_chatbot = gr.State([])
-
-        with gr.Column(elem_id='container'):
-            gr.Markdown('## LMDeploy Playground')
-
-            chatbot = gr.Chatbot(elem_id='chatbot', label=model_name)
-            instruction_txtbox = gr.Textbox(
-                placeholder='Please input the instruction',
-                label='Instruction')
-            with gr.Row():
-                cancel_btn = gr.Button(value='Cancel', interactive=False)
-                reset_btn = gr.Button(value='Reset')
-
-        send_event = instruction_txtbox.submit(
-            chat_stream_restful,
-            [instruction_txtbox, state_chatbot, cancel_btn, reset_btn],
-            [state_chatbot, chatbot, cancel_btn, reset_btn])
-        instruction_txtbox.submit(
-            lambda: gr.Textbox.update(value=''),
-            [],
-            [instruction_txtbox],
-        )
-        cancel_btn.click(cancel_restful_func,
-                         [state_chatbot, cancel_btn, reset_btn],
-                         [state_chatbot, cancel_btn, reset_btn],
-                         cancels=[send_event])
-
-        reset_btn.click(reset_restful_func,
-                        [instruction_txtbox, state_chatbot],
-                        [state_chatbot, chatbot, instruction_txtbox],
-                        cancels=[send_event])
-
-    print(f'server is gonna mount on: http://{server_name}:{server_port}')
-    demo.queue(concurrency_count=batch_size, max_size=100,
-               api_open=True).launch(
-                   max_threads=10,
-                   share=True,
-                   server_port=server_port,
-                   server_name=server_name,
-               )
-
-
-async def chat_stream_local(
-    instruction: str,
-    state_chatbot: Sequence,
-    cancel_btn: gr.Button,
-    reset_btn: gr.Button,
-    request: gr.Request,
-):
-    """Chat with AI assistant.
-
-    Args:
-        instruction (str): user's prompt
-        state_chatbot (Sequence): the chatting history
-        request (gr.Request): the request from a user
-    """
-    session_id = threading.current_thread().ident
-    if request is not None:
-        session_id = ip2id(request.kwargs['client']['host'])
-    bot_summarized_response = ''
-    state_chatbot = state_chatbot + [(instruction, None)]
-
-    yield (state_chatbot, state_chatbot, disable_btn, enable_btn,
-           f'{bot_summarized_response}'.strip())
-
-    async for outputs in InterFace.async_engine.generate(
-            instruction,
-            session_id,
-            stream_response=True,
-            sequence_start=(len(state_chatbot) == 1)):
-        response = outputs.response
-        if outputs.finish_reason == 'length':
-            gr.Warning('WARNING: exceed session max length.'
-                       ' Please restart the session by reset button.')
-        if outputs.generate_token_len < 0:
-            gr.Warning('WARNING: running on the old session.'
-                       ' Please restart the session by reset button.')
-        if state_chatbot[-1][-1] is None:
-            state_chatbot[-1] = (state_chatbot[-1][0], response)
-        else:
-            state_chatbot[-1] = (state_chatbot[-1][0],
-                                 state_chatbot[-1][1] + response
-                                 )  # piece by piece
-        yield (state_chatbot, state_chatbot, enable_btn, disable_btn,
-               f'{bot_summarized_response}'.strip())
-
-    yield (state_chatbot, state_chatbot, disable_btn, enable_btn,
-           f'{bot_summarized_response}'.strip())
-
-
-async def reset_local_func(instruction_txtbox: gr.Textbox,
-                           state_chatbot: gr.State, request: gr.Request):
-    """reset the session.
-
-    Args:
-        instruction_txtbox (str): user's prompt
-        state_chatbot (Sequence): the chatting history
-        request (gr.Request): the request from a user
-    """
-    state_chatbot = []
-
-    session_id = threading.current_thread().ident
-    if request is not None:
-        session_id = ip2id(request.kwargs['client']['host'])
-    # end the session
-    async for out in InterFace.async_engine.generate('',
-                                                     session_id,
-                                                     request_output_len=1,
-                                                     stream_response=True,
-                                                     sequence_start=False,
-                                                     sequence_end=True):
-        pass
-
-    return (
-        state_chatbot,
-        state_chatbot,
-        gr.Textbox.update(value=''),
-    )
-
-
-async def cancel_local_func(state_chatbot: gr.State, cancel_btn: gr.Button,
-                            reset_btn: gr.Button, request: gr.Request):
-    """stop the session.
-
-    Args:
-        instruction_txtbox (str): user's prompt
-        state_chatbot (Sequence): the chatting history
-        request (gr.Request): the request from a user
-    """
-    session_id = threading.current_thread().ident
-    if request is not None:
-        session_id = ip2id(request.kwargs['client']['host'])
-    # end the session
-    async for out in InterFace.async_engine.generate('',
-                                                     session_id,
-                                                     request_output_len=0,
-                                                     stream_response=True,
-                                                     sequence_start=False,
-                                                     sequence_end=False,
-                                                     stop=True):
-        pass
-    messages = []
-    for qa in state_chatbot:
-        messages.append(dict(role='user', content=qa[0]))
-        if qa[1] is not None:
-            messages.append(dict(role='assistant', content=qa[1]))
-    async for out in InterFace.async_engine.generate(messages,
-                                                     session_id,
-                                                     request_output_len=0,
-                                                     stream_response=True,
-                                                     sequence_start=True,
-                                                     sequence_end=False):
-        pass
-    return (state_chatbot, disable_btn, enable_btn)
-
-
-def run_local(model_path: str,
-              server_name: str = 'localhost',
-              server_port: int = 6006,
-              batch_size: int = 4,
-              tp: int = 1):
-    """chat with AI assistant through web ui.
-
-    Args:
-        model_path (str): the path of the deployed model
-        server_name (str): the ip address of gradio server
-        server_port (int): the port of gradio server
-        batch_size (int): batch size for running Turbomind directly
-        tp (int): tensor parallel for Turbomind
-    """
-    InterFace.async_engine = AsyncEngine(model_path=model_path,
-                                         instance_num=batch_size,
-                                         tp=tp)
-
-    with gr.Blocks(css=CSS, theme=THEME) as demo:
-        state_chatbot = gr.State([])
-
-        with gr.Column(elem_id='container'):
-            gr.Markdown('## LMDeploy Playground')
-
-            chatbot = gr.Chatbot(
-                elem_id='chatbot',
-                label=InterFace.async_engine.tm_model.model_name)
-            instruction_txtbox = gr.Textbox(
-                placeholder='Please input the instruction',
-                label='Instruction')
-            with gr.Row():
-                cancel_btn = gr.Button(value='Cancel', interactive=False)
-                reset_btn = gr.Button(value='Reset')
-
-        send_event = instruction_txtbox.submit(
-            chat_stream_local,
-            [instruction_txtbox, state_chatbot, cancel_btn, reset_btn],
-            [state_chatbot, chatbot, cancel_btn, reset_btn])
-        instruction_txtbox.submit(
-            lambda: gr.Textbox.update(value=''),
-            [],
-            [instruction_txtbox],
-        )
-        cancel_btn.click(cancel_local_func,
-                         [state_chatbot, cancel_btn, reset_btn],
-                         [state_chatbot, cancel_btn, reset_btn],
-                         cancels=[send_event])
-
-        reset_btn.click(reset_local_func, [instruction_txtbox, state_chatbot],
-                        [state_chatbot, chatbot, instruction_txtbox],
-                        cancels=[send_event])
-
-    print(f'server is gonna mount on: http://{server_name}:{server_port}')
-    demo.queue(concurrency_count=batch_size, max_size=100,
-               api_open=True).launch(
-                   max_threads=10,
-                   share=True,
-                   server_port=server_port,
-                   server_name=server_name,
-               )
-
-
-def run(model_path_or_server: str,
-        server_name: str = 'localhost',
-        server_port: int = 6006,
-        batch_size: int = 32,
-        tp: int = 1,
-        restful_api: bool = False):
-    """chat with AI assistant through web ui.
-
-    Args:
-        model_path_or_server (str): the path of the deployed model or the
-            tritonserver URL or restful api URL. The former is for directly
-            running service with gradio. The latter is for running with
-            tritonserver by default. If the input URL is restful api. Please
-            enable another flag `restful_api`.
-        server_name (str): the ip address of gradio server
-        server_port (int): the port of gradio server
-        batch_size (int): batch size for running Turbomind directly
-        tp (int): tensor parallel for Turbomind
-        restful_api (bool): a flag for model_path_or_server
-    """
-    if ':' in model_path_or_server:
-        if restful_api:
-            run_restful(model_path_or_server, server_name, server_port,
-                        batch_size)
-        else:
-            run_server(model_path_or_server, server_name, server_port)
-    else:
-        run_local(model_path_or_server, server_name, server_port, batch_size,
-                  tp)
-
-
-if __name__ == '__main__':
-    import fire
-
-    fire.Fire(run)
diff --git a/lmdeploy/serve/gradio/css.py b/lmdeploy/serve/gradio/css.py
deleted file mode 100644
index b3bd2332223655a6a6b4764ce768edaf7a40be81..0000000000000000000000000000000000000000
--- a/lmdeploy/serve/gradio/css.py
+++ /dev/null
@@ -1,18 +0,0 @@
-# Copyright (c) OpenMMLab. All rights reserved.
-
-CSS = """
-#container {
-    width: 95%;
-    margin-left: auto;
-    margin-right: auto;
-}
-
-#chatbot {
-    height: 500px;
-    overflow: auto;
-}
-
-.chat_wrap_space {
-    margin-left: 0.5em
-}
-"""
diff --git a/lmdeploy/serve/openai/__init__.py b/lmdeploy/serve/openai/__init__.py
deleted file mode 100644
index ef101fec61e72abc0eb90266d453b5b22331378d..0000000000000000000000000000000000000000
--- a/lmdeploy/serve/openai/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-# Copyright (c) OpenMMLab. All rights reserved.
diff --git a/lmdeploy/serve/openai/api_client.py b/lmdeploy/serve/openai/api_client.py
deleted file mode 100644
index 26977bc6c6b778c4f1a23ed48b8b8ede4d047ae9..0000000000000000000000000000000000000000
--- a/lmdeploy/serve/openai/api_client.py
+++ /dev/null
@@ -1,93 +0,0 @@
-# Copyright (c) OpenMMLab. All rights reserved.
-import json
-from typing import Iterable, List
-
-import requests
-
-
-def get_model_list(api_url: str):
-    response = requests.get(api_url)
-    if hasattr(response, 'text'):
-        model_list = json.loads(response.text)
-        model_list = model_list.pop('data', [])
-        return [item['id'] for item in model_list]
-    return None
-
-
-def get_streaming_response(prompt: str,
-                           api_url: str,
-                           session_id: int,
-                           request_output_len: int = 512,
-                           stream: bool = True,
-                           sequence_start: bool = True,
-                           sequence_end: bool = True,
-                           ignore_eos: bool = False,
-                           stop: bool = False) -> Iterable[List[str]]:
-    headers = {'User-Agent': 'Test Client'}
-    pload = {
-        'prompt': prompt,
-        'stream': stream,
-        'session_id': session_id,
-        'request_output_len': request_output_len,
-        'sequence_start': sequence_start,
-        'sequence_end': sequence_end,
-        'ignore_eos': ignore_eos,
-        'stop': stop
-    }
-    response = requests.post(api_url,
-                             headers=headers,
-                             json=pload,
-                             stream=stream)
-    for chunk in response.iter_lines(chunk_size=8192,
-                                     decode_unicode=False,
-                                     delimiter=b'\n'):
-        if chunk:
-            data = json.loads(chunk.decode('utf-8'))
-            output = data.pop('text', '')
-            tokens = data.pop('tokens', 0)
-            finish_reason = data.pop('finish_reason', None)
-            yield output, tokens, finish_reason
-
-
-def input_prompt():
-    """Input a prompt in the consolo interface."""
-    print('\ndouble enter to end input >>> ', end='')
-    sentinel = ''  # ends when this string is seen
-    return '\n'.join(iter(input, sentinel))
-
-
-def main(restful_api_url: str, session_id: int = 0):
-    nth_round = 1
-    while True:
-        prompt = input_prompt()
-        if prompt == 'exit':
-            for output, tokens, finish_reason in get_streaming_response(
-                    '',
-                    f'{restful_api_url}/generate',
-                    session_id=session_id,
-                    request_output_len=0,
-                    sequence_start=(nth_round == 1),
-                    sequence_end=True):
-                pass
-            exit(0)
-        else:
-            for output, tokens, finish_reason in get_streaming_response(
-                    prompt,
-                    f'{restful_api_url}/generate',
-                    session_id=session_id,
-                    request_output_len=512,
-                    sequence_start=(nth_round == 1),
-                    sequence_end=False):
-                if finish_reason == 'length':
-                    print('WARNING: exceed session max length.'
-                          ' Please end the session.')
-                    continue
-                print(output, end='')
-
-            nth_round += 1
-
-
-if __name__ == '__main__':
-    import fire
-
-    fire.Fire(main)
diff --git a/lmdeploy/serve/openai/api_server.py b/lmdeploy/serve/openai/api_server.py
deleted file mode 100644
index 3ba8b80b4b5f9bbc41960affdc1e36627cd48dad..0000000000000000000000000000000000000000
--- a/lmdeploy/serve/openai/api_server.py
+++ /dev/null
@@ -1,369 +0,0 @@
-# Copyright (c) OpenMMLab. All rights reserved.
-import os
-import time
-from http import HTTPStatus
-from typing import AsyncGenerator, List, Optional
-
-import uvicorn
-from fastapi import FastAPI, Request
-from fastapi.middleware.cors import CORSMiddleware
-from fastapi.responses import JSONResponse, StreamingResponse
-
-from lmdeploy.serve.async_engine import AsyncEngine
-from lmdeploy.serve.openai.protocol import (  # noqa: E501
-    ChatCompletionRequest, ChatCompletionResponse,
-    ChatCompletionResponseChoice, ChatCompletionResponseStreamChoice,
-    ChatCompletionStreamResponse, ChatMessage, DeltaMessage, EmbeddingsRequest,
-    EmbeddingsResponse, ErrorResponse, GenerateRequest, GenerateResponse,
-    ModelCard, ModelList, ModelPermission, UsageInfo)
-
-os.environ['TM_LOG_LEVEL'] = 'ERROR'
-
-
-class VariableInterface:
-    """A IO interface maintaining variables."""
-    async_engine: AsyncEngine = None
-    request_hosts = []
-
-
-app = FastAPI(docs_url='/')
-
-
-def get_model_list():
-    """Available models.
-
-    Only provided one now.
-    """
-    return [VariableInterface.async_engine.tm_model.model_name]
-
-
-@app.get('/v1/models')
-def available_models():
-    """Show available models."""
-    model_cards = []
-    for model_name in get_model_list():
-        model_cards.append(
-            ModelCard(id=model_name,
-                      root=model_name,
-                      permission=[ModelPermission()]))
-    return ModelList(data=model_cards)
-
-
-def create_error_response(status: HTTPStatus, message: str):
-    """Create error response according to http status and message.
-
-    Args:
-        status (HTTPStatus): HTTP status codes and reason phrases
-        message (str): error message
-    """
-    return JSONResponse(
-        ErrorResponse(message=message,
-                      type='invalid_request_error',
-                      code=status.value).model_dump())
-
-
-async def check_request(request) -> Optional[JSONResponse]:
-    """Check if a request is valid."""
-    if request.model in get_model_list():
-        return
-    ret = create_error_response(
-        HTTPStatus.NOT_FOUND, f'The model `{request.model}` does not exist.')
-    return ret
-
-
-def ip2id(host_ip: str):
-    """Convert host ip address to session id."""
-    if '.' in host_ip:  # IPv4
-        return int(host_ip.replace('.', '')[-8:])
-    if ':' in host_ip:  # IPv6
-        return int(host_ip.replace(':', '')[-8:], 16)
-    print('Warning, could not get session id from ip, set it 0')
-    return 0
-
-
-@app.post('/v1/chat/completions')
-async def chat_completions_v1(request: ChatCompletionRequest,
-                              raw_request: Request = None):
-    """Completion API similar to OpenAI's API.
-
-    Refer to  `https://platform.openai.com/docs/api-reference/chat/create`
-    for the API specification.
-
-    The request should be a JSON object with the following fields:
-    - model: model name. Available from /v1/models.
-    - messages: string prompt or chat history in OpenAI format.
-    - temperature (float): to modulate the next token probability
-    - top_p (float): If set to float < 1, only the smallest set of most
-        probable tokens with probabilities that add up to top_p or higher
-        are kept for generation.
-    - n (int): How many chat completion choices to generate for each input
-        message. Only support one here.
-    - stream: whether to stream the results or not. Default to false.
-    - max_tokens (int): output token nums
-    - repetition_penalty (float): The parameter for repetition penalty.
-        1.0 means no penalty
-
-    Additional arguments supported by LMDeploy:
-    - renew_session (bool): Whether renew the session. Can be used when the
-        session length is exceeded.
-    - ignore_eos (bool): indicator for ignoring eos
-
-    Currently we do not support the following features:
-    - function_call (Users should implement this by themselves)
-    - logit_bias (not supported yet)
-    - presence_penalty (replaced with repetition_penalty)
-    - frequency_penalty (replaced with repetition_penalty)
-    """
-    session_id = ip2id(raw_request.client.host)
-    error_check_ret = await check_request(request)
-    if error_check_ret is not None:
-        return error_check_ret
-
-    model_name = request.model
-    request_id = str(session_id)
-    created_time = int(time.time())
-
-    result_generator = VariableInterface.async_engine.generate_openai(
-        request.messages,
-        session_id,
-        True,  # always use stream to enable batching
-        request.renew_session,
-        request_output_len=request.max_tokens if request.max_tokens else 512,
-        stop=request.stop,
-        top_p=request.top_p,
-        temperature=request.temperature,
-        repetition_penalty=request.repetition_penalty,
-        ignore_eos=request.ignore_eos)
-
-    def create_stream_response_json(
-        index: int,
-        text: str,
-        finish_reason: Optional[str] = None,
-    ) -> str:
-        choice_data = ChatCompletionResponseStreamChoice(
-            index=index,
-            delta=DeltaMessage(role='assistant', content=text),
-            finish_reason=finish_reason,
-        )
-        response = ChatCompletionStreamResponse(
-            id=request_id,
-            created=created_time,
-            model=model_name,
-            choices=[choice_data],
-        )
-        response_json = response.model_dump_json()
-
-        return response_json
-
-    async def completion_stream_generator() -> AsyncGenerator[str, None]:
-        # First chunk with role
-        for i in range(request.n):
-            choice_data = ChatCompletionResponseStreamChoice(
-                index=i,
-                delta=DeltaMessage(role='assistant'),
-                finish_reason=None,
-            )
-            chunk = ChatCompletionStreamResponse(id=request_id,
-                                                 choices=[choice_data],
-                                                 model=model_name)
-            data = chunk.model_dump_json(exclude_unset=True)
-            yield f'data: {data}\n\n'
-
-        async for res in result_generator:
-            response_json = create_stream_response_json(
-                index=0,
-                text=res.response,
-            )
-            yield f'data: {response_json}\n\n'
-        yield 'data: [DONE]\n\n'
-
-    # Streaming response
-    if request.stream:
-        return StreamingResponse(completion_stream_generator(),
-                                 media_type='text/event-stream')
-
-    # Non-streaming response
-    final_res = None
-    text = ''
-    async for res in result_generator:
-        if await raw_request.is_disconnected():
-            # Abort the request if the client disconnects.
-            VariableInterface.async_engine.stop_session(session_id)
-            return create_error_response(HTTPStatus.BAD_REQUEST,
-                                         'Client disconnected')
-        final_res = res
-        text += res.response
-    assert final_res is not None
-    choices = []
-    choice_data = ChatCompletionResponseChoice(
-        index=0,
-        message=ChatMessage(role='assistant', content=text),
-        finish_reason=final_res.finish_reason,
-    )
-    choices.append(choice_data)
-
-    total_tokens = sum([
-        final_res.history_token_len, final_res.input_token_len,
-        final_res.generate_token_len
-    ])
-    usage = UsageInfo(
-        prompt_tokens=final_res.input_token_len,
-        completion_tokens=final_res.generate_token_len,
-        total_tokens=total_tokens,
-    )
-    response = ChatCompletionResponse(
-        id=request_id,
-        created=created_time,
-        model=model_name,
-        choices=choices,
-        usage=usage,
-    )
-
-    return response
-
-
-@app.post('/v1/embeddings')
-async def create_embeddings(request: EmbeddingsRequest,
-                            raw_request: Request = None):
-    """Creates embeddings for the text."""
-    error_check_ret = await check_request(request)
-    if error_check_ret is not None:
-        return error_check_ret
-    if isinstance(request.input, str):
-        request.input = [request.input]
-
-    data = []
-    token_num = 0
-    for i, prompt in enumerate(request.input):
-        embedding = await VariableInterface.async_engine.get_embeddings(prompt)
-        data.append({
-            'object': 'embedding',
-            'embedding': embedding,
-            'index': i
-        })
-        token_num += len(embedding)
-    return EmbeddingsResponse(
-        data=data,
-        model=request.model,
-        usage=UsageInfo(
-            prompt_tokens=token_num,
-            total_tokens=token_num,
-            completion_tokens=None,
-        ),
-    ).dict(exclude_none=True)
-
-
-@app.post('/generate')
-async def generate(request: GenerateRequest, raw_request: Request = None):
-    """Generate completion for the request.
-
-    The request should be a JSON object with the following fields:
-    - prompt: the prompt to use for the generation.
-    - session_id: determine which instance will be called. If not specified
-        with a value other than -1, using host ip directly.
-    - sequence_start (bool): indicator for starting a sequence.
-    - sequence_end (bool): indicator for ending a sequence
-    - stream: whether to stream the results or not.
-    - stop: whether to stop the session response or not.
-    - request_output_len (int): output token nums
-    - step (int): the offset of the k/v cache
-    - top_p (float): If set to float < 1, only the smallest set of most
-        probable tokens with probabilities that add up to top_p or higher
-        are kept for generation.
-    - top_k (int): The number of the highest probability vocabulary
-        tokens to keep for top-k-filtering
-    - temperature (float): to modulate the next token probability
-    - repetition_penalty (float): The parameter for repetition penalty.
-        1.0 means no penalty
-    - ignore_eos (bool): indicator for ignoring eos
-    """
-    if request.session_id == -1:
-        session_id = ip2id(raw_request.client.host)
-        request.session_id = session_id
-
-    generation = VariableInterface.async_engine.generate(
-        request.prompt,
-        request.session_id,
-        stream_response=True,  # always use stream to enable batching
-        sequence_start=request.sequence_start,
-        sequence_end=request.sequence_end,
-        request_output_len=request.request_output_len,
-        top_p=request.top_p,
-        top_k=request.top_k,
-        stop=request.stop,
-        temperature=request.temperature,
-        repetition_penalty=request.repetition_penalty,
-        ignore_eos=request.ignore_eos)
-
-    # Streaming case
-    async def stream_results() -> AsyncGenerator[bytes, None]:
-        async for out in generation:
-            chunk = GenerateResponse(text=out.response,
-                                     tokens=out.generate_token_len,
-                                     finish_reason=out.finish_reason)
-            data = chunk.model_dump_json()
-            yield f'{data}\n'
-
-    if request.stream:
-        return StreamingResponse(stream_results(),
-                                 media_type='text/event-stream')
-    else:
-        ret = {}
-        text = ''
-        tokens = 0
-        finish_reason = None
-        async for out in generation:
-            if await raw_request.is_disconnected():
-                # Abort the request if the client disconnects.
-                VariableInterface.async_engine.stop_session(session_id)
-                return create_error_response(HTTPStatus.BAD_REQUEST,
-                                             'Client disconnected')
-            text += out.response
-            tokens = out.generate_token_len
-            finish_reason = out.finish_reason
-        ret = {'text': text, 'tokens': tokens, 'finish_reason': finish_reason}
-        return JSONResponse(ret)
-
-
-def main(model_path: str,
-         server_name: str = 'localhost',
-         server_port: int = 23333,
-         instance_num: int = 32,
-         tp: int = 1,
-         allow_origins: List[str] = ['*'],
-         allow_credentials: bool = True,
-         allow_methods: List[str] = ['*'],
-         allow_headers: List[str] = ['*']):
-    """An example to perform model inference through the command line
-    interface.
-
-    Args:
-        model_path (str): the path of the deployed model
-        server_name (str): host ip for serving
-        server_port (int): server port
-        instance_num (int): number of instances of turbomind model
-        tp (int): tensor parallel
-        allow_origins (List[str]): a list of allowed origins for CORS
-        allow_credentials (bool): whether to allow credentials for CORS
-        allow_methods (List[str]): a list of allowed HTTP methods for CORS
-        allow_headers (List[str]): a list of allowed HTTP headers for CORS
-    """
-    if allow_origins:
-        app.add_middleware(
-            CORSMiddleware,
-            allow_origins=allow_origins,
-            allow_credentials=allow_credentials,
-            allow_methods=allow_methods,
-            allow_headers=allow_headers,
-        )
-
-    VariableInterface.async_engine = AsyncEngine(model_path=model_path,
-                                                 instance_num=instance_num,
-                                                 tp=tp)
-    uvicorn.run(app=app, host=server_name, port=server_port, log_level='info')
-
-
-if __name__ == '__main__':
-    import fire
-
-    fire.Fire(main)
diff --git a/lmdeploy/serve/openai/protocol.py b/lmdeploy/serve/openai/protocol.py
deleted file mode 100644
index 78bf56531b9151413c18e9f3a75aa5a541fbf7f3..0000000000000000000000000000000000000000
--- a/lmdeploy/serve/openai/protocol.py
+++ /dev/null
@@ -1,210 +0,0 @@
-# Copyright (c) OpenMMLab. All rights reserved.
-# Modified from
-# https://github.com/lm-sys/FastChat/blob/168ccc29d3f7edc50823016105c024fe2282732a/fastchat/protocol/openai_api_protocol.py
-import time
-from typing import Any, Dict, List, Literal, Optional, Union
-
-import shortuuid
-from pydantic import BaseModel, Field
-
-
-class ErrorResponse(BaseModel):
-    """Error responses."""
-    object: str = 'error'
-    message: str
-    code: int
-
-
-class ModelPermission(BaseModel):
-    """Model permissions."""
-    id: str = Field(default_factory=lambda: f'modelperm-{shortuuid.random()}')
-    object: str = 'model_permission'
-    created: int = Field(default_factory=lambda: int(time.time()))
-    allow_create_engine: bool = False
-    allow_sampling: bool = True
-    allow_logprobs: bool = True
-    allow_search_indices: bool = True
-    allow_view: bool = True
-    allow_fine_tuning: bool = False
-    organization: str = '*'
-    group: Optional[str] = None
-    is_blocking: bool = False
-
-
-class ModelCard(BaseModel):
-    """Model cards."""
-    id: str
-    object: str = 'model'
-    created: int = Field(default_factory=lambda: int(time.time()))
-    owned_by: str = 'lmdeploy'
-    root: Optional[str] = None
-    parent: Optional[str] = None
-    permission: List[ModelPermission] = []
-
-
-class ModelList(BaseModel):
-    """Model list consists of model cards."""
-    object: str = 'list'
-    data: List[ModelCard] = []
-
-
-class UsageInfo(BaseModel):
-    """Usage information."""
-    prompt_tokens: int = 0
-    total_tokens: int = 0
-    completion_tokens: Optional[int] = 0
-
-
-class ChatCompletionRequest(BaseModel):
-    """Chat completion request."""
-    model: str
-    messages: Union[str, List[Dict[str, str]]]
-    temperature: Optional[float] = 0.7
-    top_p: Optional[float] = 1.0
-    n: Optional[int] = 1
-    max_tokens: Optional[int] = 512
-    stop: Optional[bool] = False
-    stream: Optional[bool] = False
-    presence_penalty: Optional[float] = 0.0
-    frequency_penalty: Optional[float] = 0.0
-    user: Optional[str] = None
-    # additional argument of lmdeploy
-    repetition_penalty: Optional[float] = 1.0
-    renew_session: Optional[bool] = False
-    ignore_eos: Optional[bool] = False
-
-
-class ChatMessage(BaseModel):
-    """Chat messages."""
-    role: str
-    content: str
-
-
-class ChatCompletionResponseChoice(BaseModel):
-    """Chat completion response choices."""
-    index: int
-    message: ChatMessage
-    finish_reason: Optional[Literal['stop', 'length']] = None
-
-
-class ChatCompletionResponse(BaseModel):
-    """Chat completion response."""
-    id: str = Field(default_factory=lambda: f'chatcmpl-{shortuuid.random()}')
-    object: str = 'chat.completion'
-    created: int = Field(default_factory=lambda: int(time.time()))
-    model: str
-    choices: List[ChatCompletionResponseChoice]
-    usage: UsageInfo
-
-
-class DeltaMessage(BaseModel):
-    """Delta messages."""
-    role: Optional[str] = None
-    content: Optional[str] = None
-
-
-class ChatCompletionResponseStreamChoice(BaseModel):
-    """Chat completion response stream choice."""
-    index: int
-    delta: DeltaMessage
-    finish_reason: Optional[Literal['stop', 'length']] = None
-
-
-class ChatCompletionStreamResponse(BaseModel):
-    """Chat completion stream response."""
-    id: str = Field(default_factory=lambda: f'chatcmpl-{shortuuid.random()}')
-    object: str = 'chat.completion.chunk'
-    created: int = Field(default_factory=lambda: int(time.time()))
-    model: str
-    choices: List[ChatCompletionResponseStreamChoice]
-
-
-class CompletionRequest(BaseModel):
-    """Completion request."""
-    model: str
-    prompt: Union[str, List[Any]]
-    suffix: Optional[str] = None
-    temperature: Optional[float] = 0.7
-    n: Optional[int] = 1
-    max_tokens: Optional[int] = 16
-    stop: Optional[Union[str, List[str]]] = None
-    stream: Optional[bool] = False
-    top_p: Optional[float] = 1.0
-    logprobs: Optional[int] = None
-    echo: Optional[bool] = False
-    presence_penalty: Optional[float] = 0.0
-    frequency_penalty: Optional[float] = 0.0
-    user: Optional[str] = None
-
-
-class CompletionResponseChoice(BaseModel):
-    """Completion response choices."""
-    index: int
-    text: str
-    logprobs: Optional[int] = None
-    finish_reason: Optional[Literal['stop', 'length']] = None
-
-
-class CompletionResponse(BaseModel):
-    """Completion response."""
-    id: str = Field(default_factory=lambda: f'cmpl-{shortuuid.random()}')
-    object: str = 'text_completion'
-    created: int = Field(default_factory=lambda: int(time.time()))
-    model: str
-    choices: List[CompletionResponseChoice]
-    usage: UsageInfo
-
-
-class CompletionResponseStreamChoice(BaseModel):
-    """Completion response stream choice."""
-    index: int
-    text: str
-    logprobs: Optional[float] = None
-    finish_reason: Optional[Literal['stop', 'length']] = None
-
-
-class CompletionStreamResponse(BaseModel):
-    """Completion stream response."""
-    id: str = Field(default_factory=lambda: f'cmpl-{shortuuid.random()}')
-    object: str = 'text_completion'
-    created: int = Field(default_factory=lambda: int(time.time()))
-    model: str
-    choices: List[CompletionResponseStreamChoice]
-
-
-class EmbeddingsRequest(BaseModel):
-    """Embedding request."""
-    model: str = None
-    input: Union[str, List[str]]
-    user: Optional[str] = None
-
-
-class EmbeddingsResponse(BaseModel):
-    """Embedding response."""
-    object: str = 'list'
-    data: List[Dict[str, Any]]
-    model: str
-    usage: UsageInfo
-
-
-class GenerateRequest(BaseModel):
-    """Generate request."""
-    prompt: Union[str, List[Dict[str, str]]]
-    session_id: int = -1
-    sequence_start: bool = True
-    sequence_end: bool = False
-    stream: bool = False
-    stop: bool = False
-    request_output_len: int = 512
-    top_p: float = 0.8
-    top_k: int = 40
-    temperature: float = 0.8
-    repetition_penalty: float = 1.0
-    ignore_eos: bool = False
-
-
-class GenerateResponse(BaseModel):
-    """Generate response."""
-    text: str
-    tokens: int
-    finish_reason: Optional[Literal['stop', 'length']] = None
diff --git a/lmdeploy/serve/turbomind/__init__.py b/lmdeploy/serve/turbomind/__init__.py
deleted file mode 100644
index ef101fec61e72abc0eb90266d453b5b22331378d..0000000000000000000000000000000000000000
--- a/lmdeploy/serve/turbomind/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-# Copyright (c) OpenMMLab. All rights reserved.
diff --git a/lmdeploy/serve/turbomind/chatbot.py b/lmdeploy/serve/turbomind/chatbot.py
deleted file mode 100644
index 5b89cc506afbba1fa130db75d3c0090fb4384d74..0000000000000000000000000000000000000000
--- a/lmdeploy/serve/turbomind/chatbot.py
+++ /dev/null
@@ -1,688 +0,0 @@
-# Copyright (c) OpenMMLab. All rights reserved.
-import json
-import logging
-import queue
-import random
-import threading
-from dataclasses import dataclass
-from enum import Enum
-from functools import partial
-from typing import List, Union
-
-import google.protobuf.json_format
-import mmengine
-import numpy as np
-import tritonclient.grpc as grpcclient
-from tritonclient.grpc.service_pb2 import ModelInferResponse
-
-from lmdeploy.model import MODELS
-from lmdeploy.serve.turbomind.utils import (Postprocessor, Preprocessor,
-                                            prepare_tensor)
-from lmdeploy.utils import filter_suffix
-
-
-@dataclass
-class Session:
-    session_id: Union[int, str]
-    request_id: str = ''
-    histories: str = ''  # history conversations of the session
-    sequence_length: int = 0  # the total generated token number in the session
-    prompt: str = ''
-    response: str = ''
-    status: int = None  # status of the session
-
-
-class StatusCode(Enum):
-    TRITON_STREAM_END = 0  # end of streaming
-    TRITON_STREAM_ING = 1  # response is in streaming
-    TRITON_SESSION_READY = 2  # session is ready for inference
-    TRITON_SERVER_ERR = -1  # triton server's error
-    TRITON_SESSION_CLOSED = -2  # session has been closed
-    TRITON_SESSION_OUT_OF_LIMIT = -3  # request length out of limit
-    TRITON_SESSION_INVALID_ARG = -4  # invalid argument
-
-
-def stream_callback(que, result, error):
-    """callback function invoked by triton client."""
-    if error:
-        print(error)
-        que.put(dict(errcode=StatusCode.TRITON_SERVER_ERR, errmsg=f'{error}'))
-    else:
-        que.put(result.get_response(as_json=True))
-
-
-def get_logger(log_file=None, log_level=logging.INFO):
-    """Return the logger."""
-    from lmdeploy.utils import get_logger
-    logger = get_logger('service.ft', log_file=log_file, log_level=log_level)
-    return logger
-
-
-class Chatbot:
-    """Chatbot for LLaMA series models with turbomind as inference engine.
-
-    Args:
-        tritonserver_addr (str): communicating address '<ip>:<port>' of
-            triton inference server
-        model_name (str): name of the to-be-deployed mode
-        log_level (int): the level of the log
-        display (bool): display the generated text on consolo or not
-        profile_generation (bool): profile token generation or not
-    """
-
-    def __init__(self,
-                 tritonserver_addr: str,
-                 model_name: str = '',
-                 ignore_eos: bool = False,
-                 log_level: int = logging.INFO,
-                 display: bool = False,
-                 profile_generation: bool = False,
-                 profile_serving: bool = False,
-                 **model_kwargs):
-        self.tritonserver_addr = tritonserver_addr
-        self.model_name = model_name
-        if self.model_name == '':
-            self.model_name = self._get_model_name()
-        assert self.model_name in MODELS.module_dict.keys(), \
-            f"'{self.model_name}' is not supported. " \
-            f'The supported models are: {MODELS.module_dict.keys()}'
-        self.model = MODELS.get(self.model_name)(**model_kwargs)
-        self._session = None
-        self.preprocess = Preprocessor(tritonserver_addr)
-        self.postprocess = Postprocessor(tritonserver_addr)
-        self.bos_id = self._get_bos()
-        self.eos_id = self._get_eos()
-        stop_words = self._stop_words(self.model.stop_words)
-        bad_words = None
-        if ignore_eos:
-            stop_words = None
-            bad_words = np.array([[[self.eos_id], [1]]], dtype=np.int32)
-        self.cfg = mmengine.Config(
-            dict(session_len=self.model.session_len,
-                 top_p=self.model.top_p,
-                 top_k=self.model.top_k,
-                 temperature=self.model.temperature,
-                 repetition_penalty=self.model.repetition_penalty,
-                 stop_words=stop_words,
-                 bad_words=bad_words))
-        self.log_level = log_level
-        self.display = display
-        self.profile_generation = profile_generation
-        self.profile_serving = profile_serving
-
-    def stream_infer(self,
-                     session_id: int,
-                     prompt: str,
-                     request_id: str = '',
-                     request_output_len: int = None,
-                     sequence_start: bool = False,
-                     sequence_end: bool = False,
-                     *args,
-                     **kwargs):
-        """Start a new round conversion of a session.
-
-        Args:
-            session_id (int): the identical id of a session
-            prompt (str): user's prompt in this round conversation
-            request_id (str): the identical id of this round conversation
-            request_output_len (int): the expected generated token numbers
-            sequence_start (bool): start flag of a session
-            sequence_end (bool): end flag of a session
-        Returns:
-            iterator: The generated content by chatbot
-        """
-        assert isinstance(session_id, int), \
-            f'INT session id is required, but got {type(session_id)}'
-
-        logger = get_logger(log_level=self.log_level)
-        logger.info(f'session {session_id}, request_id {request_id}, '
-                    f'request_output_len {request_output_len}')
-
-        if self._session is None:
-            sequence_start = True
-            self._session = Session(session_id=session_id)
-        elif self._session.status == 0:
-            logger.error(f'session {session_id} has been ended. Please set '
-                         f'`sequence_start` be True if you want to restart it')
-            yield StatusCode.TRITON_SESSION_CLOSED, '', 0
-            return
-
-        self._session.status = 1
-        self._session.request_id = request_id
-        self._session.response = ''
-        self.cfg.update(**kwargs)
-
-        self._session.prompt = self._get_prompt(prompt, sequence_start)
-        for status, res, tokens in self._stream_infer(self._session,
-                                                      self._session.prompt,
-                                                      request_output_len,
-                                                      sequence_start,
-                                                      sequence_end):
-            if status == StatusCode.TRITON_STREAM_END:  # remove stop_words
-                res = filter_suffix(res, self.model.stop_words)
-            if status.value < 0:
-                break
-            else:
-                yield status, res, tokens
-        if status.value == 0:
-            self._session.histories = \
-                self._session.histories + self._session.prompt + \
-                self._session.response
-        else:
-            yield status, res, tokens
-
-    def end(self, session_id: int, *args, **kwargs):
-        """end a session. Triton inference server will release the session's
-        occupied resource when it is ended.
-
-        Args:
-            session_id (int): the identical id of a session
-
-        Returns:
-            int: 0: success, -1: session not found
-        """
-        assert isinstance(session_id, int), \
-            f'INT session id is required, but got {type(session_id)}'
-
-        logger = get_logger(log_level=self.log_level)
-        logger.info(f'end session: {session_id}')
-
-        if self._session is None:
-            logger.error(
-                f"session {session_id} doesn't exist. It cannot be ended")
-            return StatusCode.TRITON_SESSION_INVALID_ARG
-        if self._session.session_id != session_id:
-            logger.error(f'you cannot end session {session_id}, because this '
-                         f'session is {self._session.session_id}')
-            return StatusCode.TRITON_SESSION_INVALID_ARG
-        if self._session.status == 0:
-            logger.warning(f'session {session_id} has already been ended')
-            return StatusCode.TRITON_SESSION_CLOSED
-
-        self._session.status = 0
-        for status, _, _ in self._stream_infer(self._session,
-                                               prompt='',
-                                               request_output_len=0,
-                                               sequence_start=False,
-                                               sequence_end=True):
-            if status.value < 0:
-                break
-
-        self.reset_session()
-        return status
-
-    def cancel(self, session_id: int, *args, **kwargs):
-        """Cancel the session during generating tokens.
-
-        Args:
-            session_id (int): the identical id of a session
-
-        Returns:
-            int: 0: success, -1: session not found
-        """
-        assert isinstance(session_id, int), \
-            f'INT session id is required, but got {type(session_id)}'
-        logger = get_logger(log_level=self.log_level)
-        logger.info(f'cancel session: {session_id}')
-
-        if self._session is None:
-            logger.error(
-                f"session {session_id} doesn't exist. It cannot be cancelled")
-            return StatusCode.TRITON_SESSION_INVALID_ARG
-        if self._session.session_id != session_id:
-            logger.error(
-                f'you cannot cancel session {session_id}, because this '
-                f'session is {self._session.session_id}')
-            return StatusCode.TRITON_SESSION_INVALID_ARG
-        if self._session.status == 0:
-            logger.error(f'session {session_id} has already been ended. '
-                         f'It cannot be cancelled')
-            return StatusCode.TRITON_SESSION_CLOSED
-
-        prev_session = self._session
-        status, res = None, None
-        for status, res, _ in self._stream_infer(self._session,
-                                                 prompt='',
-                                                 request_output_len=0,
-                                                 sequence_start=False,
-                                                 sequence_end=False,
-                                                 cancel=True):
-            if status.value < 0:
-                break
-        if status == StatusCode.TRITON_STREAM_END:
-            logger.info(f'cancel session {session_id} successfully')
-            if prev_session.histories:
-                logger.warning(f'TODO: start to recover session {session_id}')
-        else:
-            logger.info(f'cancel session {session_id} failed: {res}')
-        return status
-
-    def resume(self, session_id: int, *args, **kwargs):
-        """Resume a session by sending the history conversations to triton
-        inference server. After resuming, users can continue chatting with
-        chatbot.
-
-        Args:
-            session_id (int): the identical id of a session
-
-        Returns:
-            int: 0: success, -1: session not found
-        """
-        assert isinstance(session_id, int), \
-            f'INT session id is required, but got {type(session_id)}'
-
-        logger = get_logger(log_level=self.log_level)
-        logger.info(f'resume session: {session_id}')
-
-        if self._session is None:
-            logger.error(
-                f"session {session_id} doesn't exist. It cannot be recovered")
-            return StatusCode.TRITON_SESSION_INVALID_ARG
-        if self._session.session_id != session_id:
-            logger.error(
-                f'you cannot resume session {session_id}, because this '
-                f'session is {self._session.session_id}')
-            return StatusCode.TRITON_SESSION_INVALID_ARG
-
-        self._session.status = 1
-        self._session.sequence_length = 0
-        histories = self._session.histories
-        for status, _, _ in self._stream_infer(self._session,
-                                               prompt=histories,
-                                               request_output_len=0,
-                                               sequence_start=True,
-                                               sequence_end=False):
-            if status.value < 0:
-                break
-
-        self._session.histories = histories
-        return status
-
-    def infer(self,
-              session_id: int,
-              prompt: str,
-              request_id: str = '',
-              request_output_len: int = None,
-              sequence_start: bool = False,
-              sequence_end: bool = False,
-              *args,
-              **kwargs):
-        """Start a new round conversion of a session. Return the chat
-        completions in non-stream mode.
-
-        Args:
-            session_id (int): the identical id of a session
-            prompt (str): user's prompt in this round conversation
-            request_id (str): the identical id of this round conversation
-            request_output_len (int): the expected generated token numbers
-            sequence_start (bool): start flag of a session
-            sequence_end (bool): end flag of a session
-        Returns:
-            tuple(Status, str, int): status, text/chat completion,
-            generated token number
-        """
-        assert isinstance(session_id, int), \
-            f'INT session id is required, but got {type(session_id)}'
-
-        logger = get_logger(log_level=self.log_level)
-        logger.info(f'session {session_id}, request_id {request_id}, '
-                    f'request_output_len {request_output_len}')
-
-        if self._session is None:
-            sequence_start = True
-            self._session = Session(session_id=session_id)
-        elif self._session.status == 0:
-            logger.error(f'session {session_id} has been ended. Please set '
-                         f'`sequence_start` be True if you want to restart it')
-            return StatusCode.TRITON_SESSION_CLOSED, '', 0
-
-        self._session.status = 1
-        self._session.request_id = request_id
-        self._session.response = ''
-
-        self._session.prompt = self._get_prompt(prompt, sequence_start)
-        status, res, tokens = None, '', 0
-        for status, res, tokens in self._stream_infer(self._session,
-                                                      self._session.prompt,
-                                                      request_output_len,
-                                                      sequence_start,
-                                                      sequence_end):
-            if status.value < 0:
-                break
-            if status == StatusCode.TRITON_STREAM_END:  # remove stop_words
-                res = filter_suffix(res, self.model.stop_words)
-        if status.value == 0:
-            self._session.histories = \
-                self._session.histories + self._session.prompt + \
-                self._session.response
-            return status, res, tokens
-        else:
-            return status, res, tokens
-
-    def reset_session(self):
-        """reset session."""
-        self._session = None
-
-    @property
-    def session(self):
-        """get session."""
-        return self._session
-
-    @session.setter
-    def session(self, value):
-        """set session."""
-        self._session = value
-
-    def _get_model_name(self):
-        with grpcclient.InferenceServerClient(
-                self.tritonserver_addr) as client:
-            model_config = client.get_model_config(model_name='turbomind',
-                                                   as_json=True)
-            return model_config['config']['parameters']['model_name'][
-                'string_value']
-
-    def _get_bos(self):
-        """return bos token id."""
-        token_ids, _ = self.preprocess('<BOS>')
-        return token_ids[0][0]
-
-    def _get_eos(self):
-        """return eos token id."""
-        token_ids, _ = self.preprocess('<EOS>')
-        return token_ids[0][0]
-
-    def _stop_words(self, stop_words: List[str]):
-        """return stop-words' token ids."""
-        if stop_words is None:
-            return None
-        assert isinstance(stop_words, List) and \
-               all(isinstance(elem, str) for elem in stop_words), \
-               f'stop_words must be a list but got {type(stop_words)}'
-        # each id in stop_words represents a stop word
-        # refer to https://github.com/fauxpilot/fauxpilot/discussions/165 for
-        # detailed explanation about turbomind's stop_words
-        stop_words = [
-            int(self.preprocess(stop_word)[0][0][-1])
-            for stop_word in stop_words
-        ]
-        assert isinstance(stop_words, List) and \
-               all(isinstance(elem, int) for elem in stop_words), \
-               'invalid stop_words'
-        stop_word_offsets = range(1, len(stop_words) + 1)
-        stop_words = np.array([[stop_words,
-                                stop_word_offsets]]).astype(np.int32)
-        return stop_words
-
-    def _get_prompt(self, prompt: str, sequence_start: bool):
-        """return the concatenated prompt according to the model's chat
-        template."""
-        if self.profile_generation or self.profile_serving:
-            return prompt
-        return self.model.get_prompt(prompt, sequence_start)
-
-    def _stream_infer(self,
-                      session: Session,
-                      prompt: str,
-                      request_output_len: int = 512,
-                      sequence_start: bool = True,
-                      sequence_end: bool = False,
-                      cancel: bool = False):
-        """communicate with inference server to chat, or cancel a session, or
-        end a session.
-
-        Args:
-            session (Session): an instance of a session
-            prompt (str): the concatenated prompt
-            request_output_len (int): the max number of tokens to be generated
-            sequence_start (bool): indicator for starting a sequence
-            sequence_end (bool): indicator for ending a sequence
-            cancel (bool): indicator for cancelling the session
-        Yields:
-            tuple: status, text, generated token number
-        """
-        logger = get_logger(log_level=self.log_level)
-        logger.info(f'session {session.session_id}, '
-                    f'request id {session.request_id}, '
-                    f'request_output_len {request_output_len}, '
-                    f'start {sequence_start}, '
-                    f'end {sequence_end}, cancel {cancel}')
-
-        assert request_output_len is None or \
-               isinstance(request_output_len, int), \
-               f'request_output_len is supposed to be None or int, ' \
-               f'but got {type(request_output_len)}'
-
-        if sequence_start:
-            logger.info(f'session {session.session_id}, clear history since '
-                        f'sequence_start is True')
-            session.histories = ''
-            session.sequence_length = 0
-
-        input_ids, input_lengths = self.preprocess(prompt)
-        # will crash if last_token_id == eos_id and send empty input_ids
-        if sequence_end and request_output_len == 0:
-            input_ids = np.array([[self.bos_id]], dtype=np.uint32)
-            input_lengths = np.array([[1]], dtype=np.uint32)
-        input_tokens = input_lengths.squeeze()
-        if self.profile_generation:
-            yield StatusCode.TRITON_STREAM_ING, \
-                  'ignore preprocessing during profiling generation', 0
-        if request_output_len is None:
-            request_output_len = max(
-                128,
-                self.cfg.session_len - session.sequence_length - input_tokens)
-
-        if input_tokens + request_output_len + \
-                session.sequence_length > self.cfg.session_len:
-            errmsg = f'session {session.session_id}, ' \
-                     f'out of max sequence length {self.cfg.session_len}, ' \
-                     f'#input tokens {input_tokens}, ' \
-                     f'history tokens {session.sequence_length}, ' \
-                     f'request length {request_output_len}'
-            logger.warning(errmsg)
-            yield StatusCode.TRITON_SESSION_OUT_OF_LIMIT, errmsg, 0
-            return
-
-        logger.info(f'session {session.session_id}, '
-                    f'max length: {self.cfg.session_len}, '
-                    f'input tokens: {input_tokens}, '
-                    f'request tokens: {request_output_len}, '
-                    f'history tokens: {session.sequence_length}')
-
-        preseq_length = session.sequence_length
-        session.response = ''
-        session.status = StatusCode.TRITON_SESSION_READY
-
-        que = queue.Queue()
-        producer = threading.Thread(target=self._stream_producer,
-                                    args=(self.tritonserver_addr, session, que,
-                                          self.cfg, input_ids, input_lengths,
-                                          request_output_len, sequence_start,
-                                          sequence_end, preseq_length, cancel))
-        producer.start()
-        for status, res, n_token in self.stream_consumer(
-                self.postprocess, que, session, input_tokens, preseq_length,
-                cancel, logger, self.display, self.profile_generation,
-                self.eos_id):
-            yield status, res, n_token
-
-        producer.join()
-        self._session = que.get()
-        curseq_length = self._session.sequence_length
-        logger.info(f'session {session.session_id}, pre seq_len '
-                    f'{preseq_length}, cur seq_len {curseq_length}, '
-                    f'diff {curseq_length - preseq_length}')
-
-    @staticmethod
-    def _stream_producer(tritonserver_addr, session, que, cfg, input_ids,
-                         input_lengths, request_output_len, sequence_start,
-                         sequence_end, preseq_length, cancel):
-        """Send a request to the triton inference server.
-
-        Args:
-            tritonserver_addr (str): the communication address of the inference
-                server
-            session (Session): an instance of a session
-            que (multiprocessing.Queue): response queue
-            cfg (dict): parameters for sampling
-            input_ids (numpy.ndarray): token ids of input prompt
-            input_lengths (numpy.ndarray): length of input_ids
-            request_output_len (int): the max number of tokens to be generated
-            sequence_start (bool): indicator for starting a sequence
-            sequence_end (bool): indicator for ending a sequence
-            preseq_length (int): the history sequence length
-            cancel (bool): indicator for cancelling the session
-        """
-        request_output_len = np.full(input_lengths.shape,
-                                     request_output_len).astype(np.uint32)
-
-        callback = partial(stream_callback, que)
-        with grpcclient.InferenceServerClient(tritonserver_addr) as client:
-            inputs = [
-                prepare_tensor('input_ids', input_ids),
-                prepare_tensor('input_lengths', input_lengths),
-                prepare_tensor('request_output_len', request_output_len),
-                prepare_tensor('runtime_top_p',
-                               cfg.top_p * np.ones((1, 1), dtype=np.float32)),
-                prepare_tensor(
-                    'temperature',
-                    cfg.temperature * np.ones((1, 1), dtype=np.float32)),
-                prepare_tensor(
-                    'repetition_penalty',
-                    cfg.repetition_penalty * np.ones(
-                        (1, 1), dtype=np.float32)),
-                prepare_tensor('step',
-                               preseq_length * np.ones((1, 1), dtype=np.int32))
-            ]
-            if cfg.top_k is not None:
-                inputs += prepare_tensor(
-                    'runtime_top_k',
-                    cfg.top_k * np.ones((1, 1), dtype=np.uint32)),
-            if cfg.stop_words is not None:
-                inputs += [prepare_tensor('stop_words_list', cfg.stop_words)]
-            if cfg.bad_words is not None:
-                inputs += [prepare_tensor('bad_words_list', cfg.bad_words)]
-
-            inputs += [
-                prepare_tensor(
-                    'session_len',
-                    cfg.session_len *
-                    np.ones([input_ids.shape[0], 1], dtype=np.uint32)),
-                prepare_tensor('START', (1 if sequence_start else 0) * np.ones(
-                    (1, 1), dtype=np.int32)),
-                prepare_tensor('END', (1 if sequence_end else 0) * np.ones(
-                    (1, 1), dtype=np.int32)),
-                prepare_tensor(
-                    'CORRID',
-                    session.session_id * np.ones((1, 1), dtype=np.uint64)),
-                prepare_tensor('STOP', (1 if cancel else 0) * np.ones(
-                    (1, 1), dtype=np.int32))
-            ]
-            if sequence_start:
-                random_seed = random.getrandbits(64)
-                inputs += [
-                    prepare_tensor(
-                        'random_seed',
-                        random_seed * np.ones((1, 1), dtype=np.uint64))
-                ]
-            client.start_stream(callback)
-            client.async_stream_infer('turbomind',
-                                      inputs,
-                                      sequence_id=session.session_id,
-                                      request_id=session.request_id,
-                                      sequence_start=sequence_start,
-                                      sequence_end=sequence_end)
-        que.put(None)
-
-    @staticmethod
-    def stream_consumer(postprocess, res_queue, session, n_input_token,
-                        preseq_length, cancel, logger, display,
-                        profile_generation, eos_id):
-        """Consume the response from the triton inference server.
-
-        Args:
-            postprocess (callable): postprocess function for
-                the generated tokens
-            res_queue (multiprocessing.Queue): response queue
-            session (Session): an instance of a session
-            n_input_token (int): token number of input prompt
-            preseq_length (int): the history sequence length
-            cancel (bool): indicator for cancelling the session
-            logger (util.Logger):
-            display (bool): display the text in the consolo interface or not
-            profile_generation (bool): indicator for profiling token generation
-            eos_id (int): eos token id
-
-        Yields:
-            tuple: status, text, generated token number
-        """
-        status, res, n_token = None, '', 0
-        while True:
-            result = res_queue.get()
-            if result is None:
-                status = StatusCode.TRITON_STREAM_END
-                res = session.response
-                session.status = StatusCode.TRITON_STREAM_END
-                break
-            if 'errcode' in result:
-                logger.error(f'got error from turbomind, code '
-                             f"{result['errcode']}, {result['errmsg']}, "
-                             f'token {session.sequence_length}')
-                session.sequence_length = preseq_length
-                session.response = ''
-                status = StatusCode.TRITON_SERVER_ERR
-                res = f"{result['errcode']}, {result['errmsg']}"
-                n_token = 0
-                break
-            if cancel:
-                continue
-            try:
-                message = ModelInferResponse()
-                google.protobuf.json_format.Parse(json.dumps(result), message)
-                result = grpcclient.InferResult(message)
-                sequence_length = result.as_numpy('sequence_length')
-                output_ids = result.as_numpy('output_ids')
-
-                session.sequence_length = sequence_length.squeeze()
-                output_ids = output_ids.reshape((1, 1, output_ids.shape[-1]))
-                output_ids = output_ids[:, :, n_input_token +
-                                        preseq_length:sequence_length.squeeze(
-                                        )]
-                last_token_id = None if output_ids.shape[
-                    -1] == 0 else output_ids[-1, -1, -1]
-                if last_token_id == eos_id:
-                    session.sequence_length = session.sequence_length - 1
-                    output_ids = output_ids[:, :, :-1]
-
-                if profile_generation:
-                    yield (StatusCode.TRITON_STREAM_ING,
-                           'postprocessing is ignored during profiling '
-                           'token generation', output_ids.shape[-1])
-                    continue
-                output_str = postprocess(
-                    output_ids, np.array([[n_token]], dtype=np.uint32))
-                text = output_str[0].decode()
-                # utf-8 char at the end means it's a potential unfinished
-                # byte sequence, continue to concate it with the next
-                # sequence and decode them together
-                if text.endswith('�'):
-                    continue
-                n_token = output_ids.shape[-1]
-                if display:
-                    print(text, end='', flush=True)
-                session.response += text
-                yield (StatusCode.TRITON_STREAM_ING, session.response,
-                       output_ids.shape[-1])
-            except Exception as e:
-                logger.error(f'catch exception: {e}')
-                logger.error(
-                    f'session {session.session_id}: prompt: {session.prompt}')
-
-        # put session back to queue so that `_stream_infer` can update it in
-        # `self.sessions`
-        while not res_queue.empty():
-            res_queue.get()
-        res_queue.put(session)
-        if display:
-            print('\n')
-        yield status, res, n_token
diff --git a/lmdeploy/serve/turbomind/deploy.py b/lmdeploy/serve/turbomind/deploy.py
deleted file mode 100644
index ab8c9ea950501a0c7ed651f8168f0fd2835ba4ad..0000000000000000000000000000000000000000
--- a/lmdeploy/serve/turbomind/deploy.py
+++ /dev/null
@@ -1,1047 +0,0 @@
-# Copyright (c) OpenMMLab. All rights reserved.
-import configparser
-import json
-import os
-import os.path as osp
-import re
-import shutil
-import sys
-from pathlib import Path
-
-import safetensors
-import torch
-from safetensors.torch import load_file
-from sentencepiece import SentencePieceProcessor
-
-import lmdeploy
-from lmdeploy.model import MODELS
-
-supported_formats = ['llama', 'hf', 'awq', 'qwen']
-
-
-def get_package_root_path():
-    import lmdeploy
-    return Path(lmdeploy.__file__).parent
-
-
-def create_workspace(_path: str):
-    """Create a workspace.
-
-    Args:
-        _path (str): the path of the workspace
-    Returns:
-        bool: success or not
-    """
-    try:
-        if osp.exists(_path):
-            shutil.rmtree(_path)
-        os.makedirs(_path)
-        print(f'create workspace in directory {_path}')
-        return True
-    except Exception as e:
-        print(f'create workspace in {_path} failed: {e}')
-        return False
-
-
-def destroy_workspace(_path: str):
-    """destroy workspace.
-
-    Args:
-        _path(str): the path of the workspace
-    Returns:
-        bool: success or not
-    """
-    try:
-        shutil.rmtree(_path)
-        print(f'destroy workspace in directory {_path}')
-        return True
-    except Exception as e:
-        print(f'destroy workspace in {_path} failed: {e}')
-        return False
-
-
-def copy_triton_model_templates(_path: str):
-    """copy triton model templates to the specified path.
-
-    Args:
-        _path (str): the target path
-    Returns:
-        str: the path of the triton models
-    """
-    try:
-        cur_path = osp.abspath(__file__)
-        dir_path = osp.dirname(cur_path)
-        triton_models_path = osp.join(dir_path, 'triton_models')
-        dst_path = osp.join(_path, 'triton_models')
-        shutil.copytree(triton_models_path, dst_path, symlinks=True)
-        print(f'copy triton model templates from "{triton_models_path}" to '
-              f'"{dst_path}" successfully')
-        shutil.copy(osp.join(dir_path, 'service_docker_up.sh'), _path)
-        return dst_path
-    except Exception as e:
-        print(f'copy triton model templates from "{triton_models_path}"'
-              f' to "{dst_path}" failed: {e}')
-        return None
-
-
-def tokenizer_info_sp(model_path: str):
-    """Return the vocabulary size, bos token id and eos token id.
-
-    Args:
-        model_path (str): the tokenizer model's path
-    Returns:
-        tuple: vocabulary size, bos token id and eos token id
-    """
-    assert os.path.isfile(model_path), model_path
-    sp_model = SentencePieceProcessor(model_file=model_path)
-    # BOS / EOS token IDs
-    n_words = sp_model.vocab_size()
-    bos_id = sp_model.bos_id()
-    eos_id = sp_model.eos_id()
-    return n_words, bos_id, eos_id
-
-
-def tokenizer_info_qwen(model_dir: str):
-    n_words = 151851
-    bos_id = 0
-    eos_id = 151643
-    return n_words, bos_id, eos_id
-
-
-def load_checkpoint(model_path):
-    """Load checkpoint files into torch format.
-
-    Args:
-        model_path (str): the checkpoint folder
-    Returns:
-        Dict[str, torch.Tensor]: weight in torch format
-    """
-    suffixes = ['.safetensors', '.bin']
-    for suffix in suffixes:
-        files = [
-            file for file in os.listdir(model_path) if file.endswith(suffix)
-        ]
-        if len(files) > 0:
-            break
-
-    assert len(files) > 0, f'could not find checkpoints in {model_path}'
-    files = sorted(files)
-    print(files)
-    params = {}
-    for file in files:
-        if file.endswith('.bin'):
-            tmp = torch.load(osp.join(model_path, file), map_location='cpu')
-        else:
-            tmp = load_file(osp.join(model_path, file))
-        params.update(tmp)
-    return params
-
-
-def export(model_name: str,
-           num_layer: int,
-           norm_eps: float,
-           kv_head_num: int,
-           model_params: dict,
-           tokenizer_path: str,
-           out_dir: str,
-           tp: int,
-           size_per_head: int = 128,
-           group_size: int = 0,
-           weight_type: str = 'fp16',
-           max_position_embeddings: int = 0,
-           use_dynamic_ntk: int = 0,
-           use_logn_attn: int = 0,
-           rope_theta: float = 10000.0,
-           tokenizer_info=tokenizer_info_sp):
-    """Export deploying information to a config file.
-
-    Args:
-        model_name (str): model's name
-        num_layer (int): the number of transformer blocks
-        norm_eps (float): norm epsilon
-        model_params (dict): parameters of a model
-        tokenizer_path (str): the tokenizer model's path
-        out_dir (str): the path of the output directory
-        tp (int): the number of tensor parallelism
-        size_per_head (int): the dimension of each head
-    """
-    out_dir = osp.join(out_dir, 'weights')
-    os.makedirs(out_dir, exist_ok=True)
-
-    def save_bin(param: torch.Tensor, name):
-        print(name, param.shape)
-        if param.dtype in [torch.float, torch.bfloat16]:
-            param = param.half()
-        param.contiguous().cpu().numpy().tofile(osp.join(out_dir, name))
-
-    attn_bias = False
-    inter_size = 0
-
-    tok_embeddings = model_params['tok_embeddings.weight']
-    _vocab_size, dim = tok_embeddings.shape
-    head_num = dim // size_per_head
-    if _vocab_size % tp != 0:
-        # Resolve https://github.com/InternLM/lmdeploy/issues/266
-        # Pad tok_embeddings and output weights, making their shape divisible by TP # noqa: E501
-        pad_size = (_vocab_size + tp - 1) // tp * tp - _vocab_size
-        # Pad weight at the bottom of dim 0
-        model_params['tok_embeddings.weight'] = torch.nn.functional.pad(
-            tok_embeddings, (0, 0, 0, pad_size), 'constant', 0)
-        # Pad output weight at the bottom of dim 0
-        model_params['output.weight'] = torch.nn.functional.pad(
-            model_params['output.weight'], (0, 0, 0, pad_size), 'constant', 0)
-
-    # reverse the splitting axes since the weights are transposed above
-    for param_name, param_data in model_params.items():
-        split_dim = None
-        key, ext = param_name.split('.')[-2:]
-        if key == 'w_qkv' and ext == 'bias':
-            attn_bias = True
-        copy = False
-        if key in ['w1', 'w3', 'w13', 'w_qkv']:
-            split_dim = -1
-            # TODO: move parameter extraction outside of the loop
-            if key == 'w1':
-                inter_size = max(inter_size, param_data.shape[-1])
-            elif key == 'w13':
-                inter_size = max(inter_size, param_data.shape[-1] // 2)
-        elif key in ['w2', 'wo']:
-            if ext in ['bias']:
-                copy = True
-            else:
-                split_dim = 0
-        if split_dim is not None:
-            print(f'*** splitting {param_name}, shape={param_data.shape}, '
-                  f'split_dim={split_dim}')
-            assert param_data.shape[split_dim] % tp == 0
-            split_size = param_data.shape[split_dim] // tp
-            splits = torch.split(param_data, split_size, dim=split_dim)
-            for i, split in enumerate(splits):
-                prefix, ext = osp.splitext(param_name)
-                save_bin(split, f'{prefix}.{i}{ext}')
-        elif copy:
-            print(f'### copying {param_name}, shape={param_data.shape}')
-            copies = [param_data] * tp
-            for i, copy in enumerate(copies):
-                prefix, ext = osp.splitext(param_name)
-                save_bin(copy, f'{prefix}.{i}{ext}')
-        else:
-            save_bin(param_data, param_name)
-
-    assert inter_size > 0
-
-    # export config and save it to {out_dir}/config.ini
-    model = MODELS.get(model_name)()
-    vocab_size, bos_id, eos_id = tokenizer_info(tokenizer_path)
-    assert _vocab_size >= vocab_size, \
-        f'different vocab size {_vocab_size} vs {vocab_size}'
-    cfg = dict(llama=dict(
-        model_name=model_name,
-        head_num=head_num,
-        kv_head_num=kv_head_num,
-        size_per_head=size_per_head,
-        vocab_size=_vocab_size,
-        num_layer=num_layer,
-        rotary_embedding=size_per_head,
-        rope_theta=rope_theta,
-        inter_size=inter_size,
-        norm_eps=norm_eps,
-        attn_bias=int(attn_bias),
-        start_id=bos_id,
-        end_id=eos_id,
-        weight_type=weight_type,
-        group_size=group_size,
-        # parameters for turbomind
-        max_batch_size=32,
-        max_context_token_num=4,
-        session_len=model.session_len + 8,
-        step_length=1,
-        cache_max_entry_count=48,
-        cache_chunk_size=1,
-        use_context_fmha=1,
-        quant_policy=0,
-        tensor_para_size=tp,
-        # extra attention params
-        max_position_embeddings=max_position_embeddings,
-        use_dynamic_ntk=int(use_dynamic_ntk),
-        use_logn_attn=int(use_logn_attn),
-    ))
-
-    config = configparser.ConfigParser()
-    for section, key_values in cfg.items():
-        config[section] = key_values
-
-    config_path = osp.join(out_dir, 'config.ini')
-    with open(config_path, 'w') as f:
-        config.write(f)
-    return True
-
-
-def merge_qkv(q: torch.Tensor, k: torch.Tensor, v: torch.Tensor, tp: int,
-              dim: int):
-
-    def reshape(x):
-        return x.view(x.size(0), tp, -1) if dim == 2 else x.view(tp, -1)
-
-    qkv = torch.cat((reshape(q), reshape(k), reshape(v)), dim=-1)
-
-    # (input_dim, head_num + 2 * kv_head_num)
-    return qkv.view(q.size(0), -1)
-
-
-def deploy_llama(model_name: str, model_path: str, tokenizer_path: str,
-                 triton_models_path: str, tp: int):
-    """Deploy a model with huggingface transformers' format.
-
-    Args:
-        model_name (str): the name of the to-be-deployed model
-        model_path (str): the path of the directory where the model weight
-          files are
-        tokenizer_path (str): the path of the tokenizer model path
-        triton_models_path (str): the path of the exported triton models
-        tp (int): the number of tensor parallelism
-    """
-    if osp.exists(tokenizer_path):
-        shutil.copy(tokenizer_path,
-                    osp.join(triton_models_path, 'tokenizer/tokenizer.model'))
-        with get_package_root_path() as root_path:
-            shutil.copy(osp.join(root_path, 'tokenizer.py'),
-                        osp.join(triton_models_path, 'tokenizer'))
-    else:
-        print(f'tokenizer model {tokenizer_path} does not exist')
-        return False
-    # read model arguments from params.json
-    try:
-        params_path = osp.join(model_path, 'params.json')
-        with open(params_path) as f:
-            model_arg = json.load(f)
-            num_layer = model_arg['n_layers']
-            norm_eps = model_arg['norm_eps']
-            head_num = model_arg.get('n_heads', 32)
-            kv_head_num = model_arg.get('n_kv_heads', head_num)
-    except Exception as e:
-        print(f'get "n_layers" and "norm_eps" from {params_path} failed: {e}')
-        return False
-
-    # convert weights from llama to turbomind format
-    checkpoints = []
-    for pattern in ['*.pth', '*.pt']:
-        checkpoints += sorted(Path(model_path).glob(pattern))
-    print(checkpoints)
-    n_ckpt = len(checkpoints)
-    model_params = {}
-
-    def get_param(_name, _size):
-        print(_name, _size)
-        if _name not in model_params:
-            model_params[_name] = torch.zeros(_size,
-                                              dtype=torch.float16,
-                                              device='cpu')
-        return model_params[_name]
-
-    for i, ckpt_path in enumerate(checkpoints):
-        ckpt = torch.load(ckpt_path, map_location='cpu')
-        for param_name, param_data in ckpt.items():
-            key, ext = param_name.split('.')[-2:]
-            # column-parallel
-            if key in ['w1', 'w3', 'wq', 'wk', 'wv', 'output']:
-                size = param_data.size(0)
-                if ext == 'weight':
-                    param = get_param(
-                        param_name,
-                        [size * n_ckpt, param_data.size(1)])
-                    param.data[size * i:size * (i + 1), :] = param_data
-                else:  # bias
-                    param = get_param(param_name, [size * n_ckpt])
-                    param.data[size * i:size * (i + 1)] = param_data
-            # row-parallel
-            elif key in ['w2', 'wo', 'tok_embeddings']:
-                size = param_data.size(-1)
-                if ext == 'weight':
-                    param = get_param(param_name,
-                                      [param_data.size(0), size * n_ckpt])
-                    param.data[:, size * i:size * (i + 1)] = param_data
-                else:  # bias
-                    param = get_param(param_name, [size])
-                    param.data = param_data
-            elif i == 0:
-                param = get_param(param_name, param_data.size())
-                param.data = param_data
-        del ckpt
-
-    for name, param in model_params.items():
-        # transpose all weights as TurboMind is expecting column-major
-        # weights: (output_dims, input_dims) -> (input_dims, output_dims)
-        key = name.split('.')[-2]
-        if key in ['w1', 'w3', 'wq', 'wk', 'wv', 'w2', 'wo']:
-            param.data = param.data.t()
-
-    # concat qkv projection
-    for t in ['weight', 'bias']:
-        for i in range(1000):
-            _qkv = [
-                f'layers.{i}.attention.{k}.{t}' for k in ['wq', 'wk', 'wv']
-            ]
-            try:
-                qkv = tuple(map(model_params.pop, _qkv))
-            except KeyError:
-                break
-            # concat by heads
-            qkv = merge_qkv(*qkv, tp, dim=2 if t == 'weight' else 1)
-            print(f'layers.{i}.attention.w_qkv.{t}', qkv.shape)
-            model_params[f'layers.{i}.attention.w_qkv.{t}'] = qkv
-
-    assert i == 0 or num_layer == i, f'miss matched layers: {num_layer} vs {i}'
-
-    return export(model_name, num_layer, norm_eps, kv_head_num, model_params,
-                  tokenizer_path, triton_models_path, tp)
-
-
-def permute(x: torch.Tensor):
-    SIZE_PER_HEAD = 128
-    if x.shape[-1] > 1:
-        dim = x.shape[-1]
-        n_heads = dim // SIZE_PER_HEAD
-        return x.view(-1, n_heads, 2,
-                      dim // n_heads // 2).transpose(2, 3).reshape(-1, dim)
-    else:  # scales, zeros
-        dim = x.shape[0]
-        n_heads = dim // SIZE_PER_HEAD
-        return x.view(n_heads, 2, dim // n_heads // 2,
-                      1).transpose(1, 2).reshape(dim, 1)
-
-
-def deploy_hf(model_name: str, model_path: str, tokenizer_path: str,
-              triton_models_path: str, tp: int):
-    """Deploy a model with huggingface transformers' format.
-
-    Args:
-        model_name (str): the name of the to-be-deployed model
-        model_path (str): the path of the directory where the model weight
-          files are
-        tokenizer_path (str): the path of the tokenizer model path
-        triton_models_path (str): the path of the exported triton models
-        tp (int): the number of tensor parallelism
-    """
-    if tokenizer_path is None:
-        tokenizer_path = osp.join(model_path, 'tokenizer.model')
-    if osp.exists(tokenizer_path):
-        shutil.copy(tokenizer_path,
-                    osp.join(triton_models_path, 'tokenizer/tokenizer.model'))
-        for _file in os.listdir(model_path):
-            if _file.endswith('.json') or _file.endswith('.py'):
-                json_path = osp.join(model_path, _file)
-                shutil.copy(json_path,
-                            osp.join(triton_models_path, 'tokenizer', _file))
-        with get_package_root_path() as root_path:
-            shutil.copy(osp.join(root_path, 'tokenizer.py'),
-                        osp.join(triton_models_path, 'tokenizer'))
-    else:
-        print(f'tokenizer model {tokenizer_path} does not exist')
-        exit(-1)
-
-    # read model arguments from params.json
-    try:
-        params_path = osp.join(model_path, 'config.json')
-        with open(params_path) as f:
-            model_arg = json.load(f)
-            num_layer = model_arg['num_hidden_layers']
-            norm_eps = model_arg['rms_norm_eps']
-            rope_theta = float(model_arg.get('rope_theta', 10000.0))
-            max_position_embeddings = int(
-                model_arg.get('max_position_embeddings', 0))
-            repo_scaling = bool(model_arg.get('rope_scaling', False))
-            if 'num_key_value_heads' in model_arg:
-                kv_head_num = model_arg['num_key_value_heads']
-            else:
-                kv_head_num = model_arg['num_attention_heads']
-    except Exception as e:
-        print(f'get "num_hidden_layers" and "rms_norm_eps" from '
-              f'{params_path} failed: {e}')
-        return False
-
-    # convert weights from hf to turbomind
-    model_params = {}
-
-    _qweight = 'weight'
-    _suffixes = [_qweight, 'bias']
-
-    _params = load_checkpoint(model_path)
-
-    def get_tensor(name):
-        """return tensor according its name."""
-        return _params[name]
-
-    def get_tensor_transposed(name: str):
-        """return a transposed tensor according its name."""
-        if name not in _params and name.find('bias'):
-            return None
-        return _params[name].t()
-
-    w_pack = False
-    if 'model.layers.0.self_attn.W_pack.weight' in _params:
-        w_pack = True
-
-    for i in range(1000):
-        try:
-            # attention weights
-            for suffix in _suffixes:
-                if w_pack:
-                    _qkvo = [
-                        f'model.layers.{i}.self_attn.{t}'
-                        for t in ['W_pack', 'o_proj']
-                    ]
-                    qkv, o = map(get_tensor_transposed,
-                                 map(('{}.' + suffix).format, _qkvo))
-
-                    if qkv is None:
-                        continue
-                    _shape = qkv.shape[1] // 3
-                    _qkv = torch.split(qkv, [_shape, _shape, _shape], dim=1)
-                    q = _qkv[0]
-                    k = _qkv[1]
-                    v = _qkv[2]
-
-                else:
-                    _qkvo = [
-                        f'model.layers.{i}.self_attn.{t}_proj' for t in 'qkvo'
-                    ]
-                    q, k, v, o = map(get_tensor_transposed,
-                                     map(('{}.' + suffix).format, _qkvo))
-                if q is None:
-                    continue
-                # q, k has different layout for fb & hf, convert to fb's
-                # layout
-                q = permute(q)
-                k = permute(k)
-                if suffix == _qweight:  # weight, qweight
-                    qkv = merge_qkv(q, k, v, tp, dim=2)
-                    print(suffix, qkv.shape)
-                else:  # scales, zeros, bias
-                    qkv = merge_qkv(q, k, v, tp, dim=1)
-                    print(suffix, qkv.shape)
-                for k, v in [('w_qkv', qkv), ('wo', o)]:
-                    model_params[f'layers.{i}.attention.{k}.{suffix}'] = v
-            # ffn weights
-            _w123 = [
-                f'model.layers.{i}.mlp.{t}_proj'
-                for t in ['gate', 'down', 'up']
-            ]
-            for suffix in _suffixes:
-                w1, w2, w3 = map(get_tensor_transposed,
-                                 map(('{}.' + suffix).format, _w123))
-                if w1 is None:
-                    continue
-                if suffix in ['scales', 'zeros', 'bias']:
-                    w1, w2, w3 = map(lambda x: x.squeeze(dim=-1), [w1, w2, w3])
-                for k, v in [('w1', w1), ('w2', w2), ('w3', w3)]:
-                    model_params[f'layers.{i}.feed_forward.{k}.{suffix}'] = v
-            other = [('attention_norm.weight', 'input_layernorm.weight'),
-                     ('ffn_norm.weight', 'post_attention_layernorm.weight')]
-            for ft, hf in other:
-                model_params[f'layers.{i}.' +
-                             ft] = get_tensor(f'model.layers.{i}.' + hf)
-        except safetensors.SafetensorError:
-            break
-        except KeyError:
-            break
-
-    assert num_layer == i, f'miss matched layers: {num_layer} vs {i}'
-
-    other = [('tok_embeddings.weight', 'model.embed_tokens.weight'),
-             ('norm.weight', 'model.norm.weight'),
-             ('output.weight', 'lm_head.weight')]
-    for ft, hf in other:
-        model_params[ft] = get_tensor(hf)
-
-    if model_name == 'baichuan2-7b':
-        # https://huggingface.co/baichuan-inc/Baichuan2-7B-Base/blob/main/modeling_baichuan.py#L507
-        # https://huggingface.co/baichuan-inc/Baichuan2-7B-Chat/blob/main/modeling_baichuan.py#L507
-        model_params['output.weight'] = torch.nn.functional.normalize(
-            model_params['output.weight'])
-
-    return export(model_name,
-                  num_layer,
-                  norm_eps,
-                  kv_head_num,
-                  model_params,
-                  tokenizer_path,
-                  triton_models_path,
-                  tp,
-                  max_position_embeddings=max_position_embeddings,
-                  use_dynamic_ntk=repo_scaling,
-                  rope_theta=rope_theta)
-
-
-def deploy_awq(model_name: str, model_path: str, tokenizer_path: str,
-               triton_models_path: str, tp: int, quant_path: str,
-               group_size: int):
-    """Deploy a model with huggingface transformers' format.
-
-    Args:
-        model_name (str): the name of the to-be-deployed model
-        model_path (str): the path of the directory where the model weight
-          files are
-        tokenizer_path (str): the path of the tokenizer model path
-        triton_models_path (str): the path of the exported triton models
-        tp (int): the number of tensor parallelism
-        quant_path (str): path of the quantized model, which can be None
-        group_size (int): a parameter used in AWQ to quantize fp16 weights
-            to 4 bits
-    """
-    if tokenizer_path is None:
-        tokenizer_path = osp.join(model_path, 'tokenizer.model')
-    if osp.exists(tokenizer_path):
-        shutil.copy(tokenizer_path,
-                    osp.join(triton_models_path, 'tokenizer/tokenizer.model'))
-        for _file in os.listdir(model_path):
-            if _file.endswith('.json') or _file.endswith('.py'):
-                json_path = osp.join(model_path, _file)
-                shutil.copy(json_path,
-                            osp.join(triton_models_path, 'tokenizer', _file))
-        with get_package_root_path() as root_path:
-            shutil.copy(osp.join(root_path, 'tokenizer.py'),
-                        osp.join(triton_models_path, 'tokenizer'))
-    else:
-        print(f'tokenizer model {tokenizer_path} does not exist')
-        exit(-1)
-
-    # read model arguments from params.json
-    try:
-        params_path = osp.join(model_path, 'config.json')
-        with open(params_path) as f:
-            model_arg = json.load(f)
-            num_layer = model_arg['num_hidden_layers']
-            norm_eps = model_arg['rms_norm_eps']
-            rope_theta = float(model_arg.get('rope_theta', 10000.0))
-            if 'num_key_value_heads' in model_arg:
-                kv_head_num = model_arg['num_key_value_heads']
-            else:
-                kv_head_num = model_arg['num_attention_heads']
-    except Exception as e:
-        print(f'get "num_hidden_layers" and "rms_norm_eps" from '
-              f'{params_path} failed: {e}')
-        return False
-
-    # convert weights from hf to turbomind
-    if quant_path is None:
-        _files = [
-            osp.join(model_path, file) for file in os.listdir(model_path)
-            if file.endswith('.bin')
-        ]
-        _files = sorted(_files)
-    else:
-        _files = [quant_path]
-
-    model_params = {}
-
-    _params = {}
-    for _file in _files:
-        _tmp = torch.load(_file, map_location='cpu')
-        _params.update(_tmp)
-
-    def get_tensor(name):
-        """return tensor according its name."""
-        return _params[name].cuda().contiguous()
-
-    # import _turbomind as _tm
-    # TODO: find another way import _turbomind
-    lmdeploy_dir = osp.split(lmdeploy.__file__)[0]
-    sys.path.append(osp.join(lmdeploy_dir, 'lib'))
-    import _turbomind as _tm  # noqa: E402
-
-    def transpose_qk_s4(src: torch.Tensor):
-        assert src.is_contiguous()
-        dst = torch.zeros_like(src)
-        _tm.transpose_qk_s4_k_m8(src, dst,
-                                 src.size(-1) * 8, src.size(0), group_size)
-        return dst
-
-    def fuse_w1_w3_s4(w1_qw: torch.Tensor, w1_qz: torch.Tensor,
-                      w1_s: torch.Tensor, w3_qw: torch.Tensor,
-                      w3_qz: torch.Tensor, w3_s: torch.Tensor):
-
-        def fuse(a: torch.Tensor, b: torch.Tensor):
-            ab = torch.cat((a, b)).contiguous()
-            _ab = torch.zeros_like(ab)
-            _tm.fuse_w1_w3_s4_k_m8(ab, _ab, a.size(-1) * 8, a.size(0))
-            return _ab.view(a.size(0), -1)
-
-        w13_qw = fuse(w1_qw, w3_qw)
-        w13_qz = fuse(w1_qz, w3_qz)
-
-        w13_s = torch.cat((w1_s, w3_s)).view(2, w1_s.size(0), -1)
-        w13_s = w13_s.permute(1, 2, 0).contiguous().view(w1_s.size(0), -1)
-
-        return w13_qw, w13_qz, w13_s
-
-    def convert_s4(qw: torch.Tensor, qz: torch.Tensor, s: torch.Tensor,
-                   group_size: int):
-        assert qw.is_contiguous()
-        assert qz.is_contiguous()
-        assert s.is_contiguous()
-        _qw = torch.zeros_like(qw)
-        _sz = torch.zeros_like(s, dtype=torch.int32)  # half2
-        _ws = torch.zeros_like(s)
-        _tm.convert_s4_k_m8(_qw, _sz, _ws, qw, s, qz,
-                            qw.size(-1) * 8, qw.size(0), group_size)
-        return _qw, _sz
-
-    def tp_m_s4(x: torch.Tensor, tp: int):
-        return x.view(x.size(0) // 32, tp, -1, 128).permute(0, 2, 3,
-                                                            1).contiguous()
-
-    attn_bias = False
-
-    for i in range(num_layer):
-        print(i)
-
-        # attention weights
-        q_qw = get_tensor(f'model.layers.{i}.self_attn.q_proj.qweight')
-        k_qw = get_tensor(f'model.layers.{i}.self_attn.k_proj.qweight')
-        v_qw = get_tensor(f'model.layers.{i}.self_attn.v_proj.qweight')
-        o_qw = get_tensor(f'model.layers.{i}.self_attn.o_proj.qweight')
-
-        q_qz = get_tensor(f'model.layers.{i}.self_attn.q_proj.qzeros')
-        k_qz = get_tensor(f'model.layers.{i}.self_attn.k_proj.qzeros')
-        v_qz = get_tensor(f'model.layers.{i}.self_attn.v_proj.qzeros')
-        o_qz = get_tensor(f'model.layers.{i}.self_attn.o_proj.qzeros')
-
-        q_s = get_tensor(f'model.layers.{i}.self_attn.q_proj.scales')
-        k_s = get_tensor(f'model.layers.{i}.self_attn.k_proj.scales')
-        v_s = get_tensor(f'model.layers.{i}.self_attn.v_proj.scales')
-        o_s = get_tensor(f'model.layers.{i}.self_attn.o_proj.scales')
-
-        try:
-            q_b = get_tensor(f'model.layers.{i}.self_attn.q_proj.bias')
-            k_b = get_tensor(f'model.layers.{i}.self_attn.k_proj.bias')
-            v_b = get_tensor(f'model.layers.{i}.self_attn.v_proj.bias')
-            o_b = get_tensor(f'model.layers.{i}.self_attn.o_proj.bias')
-            attn_bias = True
-        except:  # noqa: E722
-            pass
-
-        q_qw = transpose_qk_s4(q_qw)
-        k_qw = transpose_qk_s4(k_qw)
-        q_qz = transpose_qk_s4(q_qz)
-        k_qz = transpose_qk_s4(k_qz)
-        q_s = permute(q_s)
-        k_s = permute(k_s)
-
-        qkv_qw = merge_qkv(q_qw, k_qw, v_qw, tp, dim=2)
-        qkv_qz = merge_qkv(q_qz, k_qz, v_qz, tp, dim=2)
-        qkv_s = merge_qkv(q_s, k_s, v_s, tp, dim=2)
-
-        qkv_qw, qkv_sz = convert_s4(qkv_qw, qkv_qz, qkv_s, group_size)
-
-        qkv_qw = tp_m_s4(qkv_qw, tp)
-
-        model_params[f'layers.{i}.attention.w_qkv.qweight'] = qkv_qw
-        model_params[f'layers.{i}.attention.w_qkv.scales_zeros'] = qkv_sz
-
-        o_qw, o_sz = convert_s4(o_qw, o_qz, o_s, group_size)
-
-        model_params[f'layers.{i}.attention.wo.qweight'] = o_qw
-        model_params[f'layers.{i}.attention.wo.scales_zeros'] = o_sz
-
-        if attn_bias:
-            q_b = permute(q_b)
-            k_b = permute(k_b)
-            qkv_b = merge_qkv(q_b, k_b, v_b, tp, dim=1)
-            model_params[f'layers.{i}.attention.w_qkv.bias'] = qkv_b
-            model_params[f'layers.{i}.attention.wo.bias'] = o_b
-
-        # ffn weights
-        w1_qw = get_tensor(f'model.layers.{i}.mlp.gate_proj.qweight')
-        w2_qw = get_tensor(f'model.layers.{i}.mlp.down_proj.qweight')
-        w3_qw = get_tensor(f'model.layers.{i}.mlp.up_proj.qweight')
-
-        w1_qz = get_tensor(f'model.layers.{i}.mlp.gate_proj.qzeros')
-        w2_qz = get_tensor(f'model.layers.{i}.mlp.down_proj.qzeros')
-        w3_qz = get_tensor(f'model.layers.{i}.mlp.up_proj.qzeros')
-
-        w1_s = get_tensor(f'model.layers.{i}.mlp.gate_proj.scales')
-        w2_s = get_tensor(f'model.layers.{i}.mlp.down_proj.scales')
-        w3_s = get_tensor(f'model.layers.{i}.mlp.up_proj.scales')
-
-        w13_qw, w13_qz, w13_s = fuse_w1_w3_s4(w1_qw, w1_qz, w1_s, w3_qw, w3_qz,
-                                              w3_s)
-
-        w13_qw, w13_sz = convert_s4(w13_qw, w13_qz, w13_s, group_size)
-        w2_qw, w2_sz = convert_s4(w2_qw, w2_qz, w2_s, group_size)
-
-        w13_qw = tp_m_s4(w13_qw, tp)
-
-        model_params[f'layers.{i}.feed_forward.w13.qweight'] = w13_qw
-        model_params[f'layers.{i}.feed_forward.w13.scales_zeros'] = w13_sz
-
-        model_params[f'layers.{i}.feed_forward.w2.qweight'] = w2_qw
-        model_params[f'layers.{i}.feed_forward.w2.scales_zeros'] = w2_sz
-
-        # norm weights
-        attn_norm = get_tensor(f'model.layers.{i}.input_layernorm.weight')
-        ffn_norm = get_tensor(
-            f'model.layers.{i}.post_attention_layernorm.weight')
-
-        model_params[f'layers.{i}.attention_norm.weight'] = attn_norm
-        model_params[f'layers.{i}.ffn_norm.weight'] = ffn_norm
-
-    other = [('tok_embeddings.weight', 'model.embed_tokens.weight'),
-             ('norm.weight', 'model.norm.weight'),
-             ('output.weight', 'lm_head.weight')]
-    for ft, hf in other:
-        model_params[ft] = get_tensor(hf)
-
-    return export(model_name,
-                  num_layer,
-                  norm_eps,
-                  kv_head_num,
-                  model_params,
-                  tokenizer_path,
-                  triton_models_path,
-                  tp,
-                  weight_type='int4',
-                  group_size=group_size,
-                  rope_theta=rope_theta)
-
-
-def deploy_qwen(model_name: str, model_path: str, tokenizer_path: str,
-                triton_models_path: str, tp: int):
-    """Deploy a model with huggingface transformers' format.
-
-    Args:
-        model_name (str): the name of the to-be-deployed model
-        model_path (str): the path of the directory where the model weight
-          files are
-        tokenizer_path (str): the path of the tokenizer model path
-        triton_models_path (str): the path of the exported triton models
-        tp (int): the number of tensor parallelism
-        quant_path (str): path of the quantized model, which can be None
-        group_size (int): a parameter used in AWQ to quantize fp16 weights
-            to 4 bits
-    """
-
-    if osp.exists(model_path):
-        shutil.copy(osp.join(model_path, 'qwen.tiktoken'),
-                    osp.join(triton_models_path, 'tokenizer'))
-        for _file in os.listdir(model_path):
-            if _file.endswith('.json') or _file.endswith('.py'):
-                json_path = osp.join(model_path, _file)
-                shutil.copy(json_path,
-                            osp.join(triton_models_path, 'tokenizer', _file))
-        with get_package_root_path() as root_path:
-            shutil.copy(osp.join(root_path, 'tokenizer.py'),
-                        osp.join(triton_models_path, 'tokenizer'))
-    else:
-        print(f'tokenizer model {tokenizer_path} does not exist')
-        exit(-1)
-
-    # read model arguments from params.json
-    try:
-        params_path = osp.join(model_path, 'config.json')
-        with open(params_path) as f:
-            config = json.load(f)
-            num_layer = config['num_hidden_layers']
-            norm_eps = config['layer_norm_epsilon']
-            rope_theta = float(config.get('rotary_emb_base', 10000.0))
-            if 'num_key_value_heads' in config:
-                kv_head_num = config['num_key_value_heads']
-            else:
-                kv_head_num = config['num_attention_heads']
-            seq_length = config['seq_length']
-            use_dynamic_ntk = config['use_dynamic_ntk']
-            use_logn_attn = config['use_logn_attn']
-    except Exception as e:
-        print(f'get "num_hidden_layers" and "layer_norm_epsilon" from '
-              f'{params_path} failed: {e}')
-        return False
-
-    # convert weights from hf to turbomind
-    model_params = {}
-
-    _params = load_checkpoint(model_path)
-
-    def get_tensor(name, trans=True):
-        """return a transposed tensor according its name."""
-        if trans:
-            return _params[name].cuda().t()
-        else:
-            return _params[name].cuda()
-
-    for i in range(num_layer):
-        print(i)
-
-        # qkv weights
-        qkv_w = get_tensor(f'transformer.h.{i}.attn.c_attn.weight')
-        q_w, k_w, v_w = torch.split(qkv_w, qkv_w.size(-1) // 3, dim=-1)
-        q_w, k_w = permute(q_w), permute(k_w)
-        qkv_w = merge_qkv(q_w, k_w, v_w, tp, dim=2)
-        model_params[f'layers.{i}.attention.w_qkv.weight'] = qkv_w
-
-        # qkv bias
-        qkv_b = get_tensor(f'transformer.h.{i}.attn.c_attn.bias')
-        q_b, k_b, v_b = torch.split(qkv_b, qkv_b.size(-1) // 3)
-        q_b, k_b = permute(q_b), permute(k_b)
-        qkv_b = merge_qkv(q_b, k_b, v_b, tp, dim=1)
-        model_params[f'layers.{i}.attention.w_qkv.bias'] = qkv_b
-
-        # o weights
-        o_w = get_tensor(f'transformer.h.{i}.attn.c_proj.weight')
-        model_params[f'layers.{i}.attention.wo.weight'] = o_w
-        model_params[f'layers.{i}.attention.wo.bias'] = torch.zeros_like(q_b)
-
-        # ffn weights
-        # ours: w2(silu(w1(x)) * w3(x))
-        # qwen: c_proj(w1(x) * silu(w2(x)))
-        w1 = get_tensor(f'transformer.h.{i}.mlp.w2.weight')
-        w3 = get_tensor(f'transformer.h.{i}.mlp.w1.weight')
-        w2 = get_tensor(f'transformer.h.{i}.mlp.c_proj.weight')
-        model_params[f'layers.{i}.feed_forward.w1.weight'] = w1
-        model_params[f'layers.{i}.feed_forward.w2.weight'] = w2
-        model_params[f'layers.{i}.feed_forward.w3.weight'] = w3
-
-        # norm weights
-        attn_norm = get_tensor(f'transformer.h.{i}.ln_1.weight')
-        ffn_norm = get_tensor(f'transformer.h.{i}.ln_2.weight')
-
-        model_params[f'layers.{i}.attention_norm.weight'] = attn_norm
-        model_params[f'layers.{i}.ffn_norm.weight'] = ffn_norm
-
-    other = [('tok_embeddings.weight', 'transformer.wte.weight'),
-             ('norm.weight', 'transformer.ln_f.weight'),
-             ('output.weight', 'lm_head.weight')]
-    for ft, hf in other:
-        model_params[ft] = get_tensor(hf, trans=False)
-
-    return export(model_name,
-                  num_layer,
-                  norm_eps,
-                  kv_head_num,
-                  model_params,
-                  model_path,
-                  triton_models_path,
-                  tp,
-                  max_position_embeddings=seq_length,
-                  use_dynamic_ntk=use_dynamic_ntk,
-                  use_logn_attn=use_logn_attn,
-                  rope_theta=rope_theta,
-                  tokenizer_info=tokenizer_info_qwen)
-
-
-def pack_model_repository(workspace_path: str):
-    """package the model repository.
-
-    Args:
-        workspace_path: the path of workspace
-    """
-    os.symlink(src='../../tokenizer',
-               dst=osp.join(workspace_path, 'triton_models', 'preprocessing',
-                            '1', 'tokenizer'))
-    os.symlink(src='../../tokenizer',
-               dst=osp.join(workspace_path, 'triton_models', 'postprocessing',
-                            '1', 'tokenizer'))
-    os.symlink(src='../../weights',
-               dst=osp.join(workspace_path, 'triton_models', 'interactive',
-                            '1', 'weights'))
-    model_repo_dir = osp.join(workspace_path, 'model_repository')
-    os.makedirs(model_repo_dir, exist_ok=True)
-    os.symlink(src=osp.join('../triton_models/interactive'),
-               dst=osp.join(model_repo_dir, 'turbomind'))
-    os.symlink(src=osp.join('../triton_models/preprocessing'),
-               dst=osp.join(model_repo_dir, 'preprocessing'))
-    os.symlink(src=osp.join('../triton_models/postprocessing'),
-               dst=osp.join(model_repo_dir, 'postprocessing'))
-
-
-def main(model_name: str,
-         model_path: str,
-         model_format: str = None,
-         tokenizer_path: str = None,
-         dst_path: str = './workspace',
-         tp: int = 1,
-         quant_path: str = None,
-         group_size: int = 0):
-    """deploy llama family models via turbomind.
-
-    Args:
-        model_name (str): the name of the to-be-deployed model, such as
-            llama-7b, llama-13b, vicuna-7b and etc
-        model_path (str): the directory path of the model
-        model_format (str): the format of the model, fb or hf. 'fb' stands for
-            META's llama format, and 'hf' means huggingface format
-        tokenizer_path (str): the path of tokenizer model
-        dst_path (str): the destination path that saves outputs
-        tp (int): the number of GPUs used for tensor parallelism, should be 2^n
-        quant_path (str): path of the quantized model, which can be None
-        group_size (int): a parameter used in AWQ to quantize fp16 weights
-            to 4 bits
-    """
-    assert model_name in MODELS.module_dict.keys(), \
-        f"'{model_name}' is not supported. " \
-        f'The supported models are: {MODELS.module_dict.keys()}'
-
-    assert ((tp & (tp - 1) == 0) and tp != 0), 'tp should be 2^n'
-
-    if model_format is None:
-        model_format = 'qwen' if model_name.startswith('qwen') else 'hf'
-
-    if model_format not in supported_formats:
-        print(f'the model format "{model_format}" is not supported. '
-              f'The supported format are: {supported_formats}')
-        exit(-1)
-
-    if model_format == 'llama' and tokenizer_path is None:
-        print('The model is llama. Its tokenizer model path should be '
-              'specified')
-        exit(-1)
-
-    if not create_workspace(dst_path):
-        exit(-1)
-
-    triton_models_path = copy_triton_model_templates(dst_path)
-    if triton_models_path is None:
-        exit(-1)
-
-    if model_format == 'llama':
-        res = deploy_llama(model_name, model_path, tokenizer_path,
-                           triton_models_path, tp)
-    elif model_format == 'hf':
-        res = deploy_hf(model_name, model_path, tokenizer_path,
-                        triton_models_path, tp)
-    elif model_format == 'awq':
-        res = deploy_awq(model_name, model_path, tokenizer_path,
-                         triton_models_path, tp, quant_path, group_size)
-    elif model_format == 'qwen':
-        res = deploy_qwen(model_name, model_path, tokenizer_path,
-                          triton_models_path, tp)
-
-    # update `tensor_para_size` in `triton_models/interactive/config.pbtxt`
-    with open(osp.join(triton_models_path, 'interactive/config.pbtxt'),
-              'a') as f:
-        param = \
-            'parameters {\n  key: "tensor_para_size"\n  value: {\n    ' \
-            'string_value: ' + f'"{tp}"\n' + '  }\n}\n' + \
-            'parameters {\n  key: "model_name"\n  value: {\n    ' \
-            'string_value: ' + f'"{model_name}"\n' + '  }\n}\n'
-        f.write(param)
-    if not res:
-        print(f'deploy model "{model_name}" via turbomind failed')
-        destroy_workspace(dst_path)
-        exit(-1)
-
-    # pack model repository for triton inference server
-    pack_model_repository(dst_path)
-
-    # update the value of $TP in `service_docker_up.sh`
-    file_path = osp.join(dst_path, 'service_docker_up.sh')
-    with open(file_path, 'r') as f:
-        content = f.read()
-        content = re.sub('TP=1', f'TP={tp}', content)
-    with open(file_path, 'w') as f:
-        f.write(content)
-
-
-if __name__ == '__main__':
-    import fire
-
-    fire.Fire(main)
diff --git a/lmdeploy/serve/turbomind/service_docker_up.sh b/lmdeploy/serve/turbomind/service_docker_up.sh
deleted file mode 100644
index d45345e6165857b5bf210cb4ad261029143508a9..0000000000000000000000000000000000000000
--- a/lmdeploy/serve/turbomind/service_docker_up.sh
+++ /dev/null
@@ -1,87 +0,0 @@
-#!/bin/sh
-
-show_help() {
-  echo "Usage: $0 [-h] [--help] [-l] [--lib-dir]"
-  echo
-  echo "Options:"
-  echo "  -h, --help   Show this help message and exit"
-  echo "  --lib-dir    Specify the directory of turbomind libraries"
-}
-
-# check if '-h' or '--help' in the arguments
-for arg in "$@"
-do
-  if [ "$arg" == "-h" ] || [ "$arg" == "--help" ]; then
-    show_help
-    exit 0
-  fi
-done
-
-
-TP=1
-DEVICES="0"
-for ((i = 1; i < ${TP}; ++i)); do
-    DEVICES="${DEVICES},$i"
-done
-DEVICES="\"device=${DEVICES}\""
-
-
-SCRIPT_DIR="$(dirname "$0")"
-SCRIPT_ABS_DIR="$(realpath "$SCRIPT_DIR")"
-
-
-if [ -z "$1" ]; then
-    docker run \
-        --gpus $DEVICES \
-        --rm \
-        -v "${SCRIPT_ABS_DIR}":/workspace/models \
-        --shm-size 16g \
-        -p 33336:22 \
-        -p 33337-33400:33337-33400 \
-        --cap-add=SYS_PTRACE \
-        --cap-add=SYS_ADMIN \
-        --security-opt seccomp=unconfined \
-        --name lmdeploy \
-        -it --env NCCL_LAUNCH_MODE=GROUP openmmlab/lmdeploy:latest \
-        tritonserver \
-        --model-repository=/workspace/models/model_repository \
-        --allow-http=0 \
-        --allow-grpc=1 \
-        --grpc-port=33337 \
-        --log-verbose=0 \
-        --allow-metrics=1
-fi
-
-for ((i = 1; i <= $#; i++)); do
-  arg=${!i}
-  case "$arg" in
-    --lib-dir)
-    if [ "$i" -eq "$#" ]; then
-        show_help
-        exit -1
-    fi
-    LIB_PATH=${@:i+1:1}
-      docker run \
-        --gpus $DEVICES \
-        --rm \
-        -v "${LIB_PATH}":/opt/tritonserver/backends/turbomind \
-        -v ""${SCRIPT_ABS_DIR}"":/workspace/models \
-        --shm-size 16g \
-        -p 33336:22 \
-        -p 33337-33400:33337-33400 \
-        --cap-add=SYS_PTRACE \
-        --cap-add=SYS_ADMIN \
-        --security-opt seccomp=unconfined \
-        --name lmdeploy \
-        -it --env NCCL_LAUNCH_MODE=GROUP openmmlab/lmdeploy:latest \
-        tritonserver \
-        --model-repository=/workspace/models/model_repository \
-        --allow-http=0 \
-        --allow-grpc=1 \
-        --grpc-port=33337 \
-        --log-verbose=0 \
-        --allow-metrics=1
-    break
-    ;;
-  esac
-done
diff --git a/lmdeploy/serve/turbomind/triton_models/interactive/1/placeholder b/lmdeploy/serve/turbomind/triton_models/interactive/1/placeholder
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/lmdeploy/serve/turbomind/triton_models/interactive/config.pbtxt b/lmdeploy/serve/turbomind/triton_models/interactive/config.pbtxt
deleted file mode 100644
index 003881ce4336874f5019ecf7bc5c19fe5fb2ec0c..0000000000000000000000000000000000000000
--- a/lmdeploy/serve/turbomind/triton_models/interactive/config.pbtxt
+++ /dev/null
@@ -1,269 +0,0 @@
-# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-name: "turbomind"
-backend: "turbomind"
-default_model_filename: "weights"
-max_batch_size: 1
-
-model_transaction_policy {
-  decoupled: True
-}
-
-instance_group [
-  {
-    # max concurrent instances
-    count: 48
-    kind: KIND_CPU
-  }
-]
-
-input [
-  {
-    name: "input_ids"
-    data_type: TYPE_UINT32
-    dims: [ -1 ]
-    # allow_ragged_batch: true
-  },
-  {
-    name: "input_lengths"
-    data_type: TYPE_UINT32
-    dims: [ 1 ]
-    reshape: { shape: [ ] }
-  },
-  {
-    name: "request_output_len"
-    data_type: TYPE_UINT32
-    dims: [ -1 ]
-  },
-  {
-    name: "step"
-    data_type: TYPE_INT32
-    dims: [ 1 ]
-    reshape: { shape: [ ] }
-    optional: true
-  },
-  {
-    name: "session_len"
-    data_type: TYPE_UINT32
-    dims: [ 1 ]
-    reshape: { shape: [ ] }
-    optional: true
-  },
-  {
-    name: "runtime_top_k"
-    data_type: TYPE_UINT32
-    dims: [ 1 ]
-    reshape: { shape: [ ] }
-    optional: true
-  },
-  {
-    name: "runtime_top_p"
-    data_type: TYPE_FP32
-    dims: [ 1 ]
-    reshape: { shape: [ ] }
-    optional: true
-  },
-  {
-    name: "beam_search_diversity_rate"
-    data_type: TYPE_FP32
-    dims: [ 1 ]
-    reshape: { shape: [ ] }
-    optional: true
-  },
-  {
-    name: "temperature"
-    data_type: TYPE_FP32
-    dims: [ 1 ]
-    reshape: { shape: [ ] }
-    optional: true
-  },
-  {
-    name: "len_penalty"
-    data_type: TYPE_FP32
-    dims: [ 1 ]
-    reshape: { shape: [ ] }
-    optional: true
-  },
-  {
-    name: "repetition_penalty"
-    data_type: TYPE_FP32
-    dims: [ 1 ]
-    reshape: { shape: [ ] }
-    optional: true
-  },
-  {
-    name: "random_seed"
-    data_type: TYPE_UINT64
-    dims: [ 1 ]
-    reshape: { shape: [ ] }
-    optional: true
-  },
-  {
-    name: "is_return_log_probs"
-    data_type: TYPE_BOOL
-    dims: [ 1 ]
-    reshape: { shape: [ ] }
-    optional: true
-  },
-  {
-    name: "beam_width"
-    data_type: TYPE_UINT32
-    dims: [ 1 ]
-    reshape: { shape: [ ] }
-    optional: true
-  },
-    {
-    name: "start_id"
-    data_type: TYPE_UINT32
-    dims: [ 1 ]
-    reshape: { shape: [ ] }
-    optional: true
-  },
-  {
-    name: "end_id"
-    data_type: TYPE_UINT32
-    dims: [ 1 ]
-    reshape: { shape: [ ] }
-    optional: true
-  },
-  {
-    name: "bad_words_list"
-    data_type: TYPE_INT32
-    dims: [ 2, -1 ]
-    optional: true
-  },
-  {
-    name: "stop_words_list"
-    data_type: TYPE_INT32
-    dims: [ 2, -1 ]
-    optional: true
-  },
-  {
-    name: "prompt_learning_task_name_ids"
-    data_type: TYPE_UINT32
-    dims: [ 1 ]
-    reshape: { shape: [ ] }
-    optional: true
-  },
-  {
-    name: "top_p_decay"
-    data_type: TYPE_FP32
-    dims: [ 1 ]
-    reshape: { shape: [ ] }
-    optional: true
-  },
-  {
-    name: "top_p_min"
-    data_type: TYPE_FP32
-    dims: [ 1 ]
-    reshape: { shape: [ ] }
-    optional: true
-  },
-  {
-    name: "top_p_reset_ids"
-    data_type: TYPE_UINT32
-    dims: [ 1 ]
-    reshape: { shape: [ ] }
-    optional: true
-  },
-  {
-    name: "START"
-    data_type: TYPE_INT32
-    dims: [ 1 ]
-    reshape: { shape: [ ] }
-    optional: true
-  },
-  {
-    name: "END"
-    data_type: TYPE_INT32
-    dims: [ 1 ]
-    reshape: { shape: [ ] }
-    optional: true
-  },
-  {
-    name: "STOP"
-    data_type: TYPE_INT32
-    dims: [ 1 ]
-    reshape: { shape: [ ] }
-    optional: true
-  },
-  {
-    name: "CORRID"
-    data_type: TYPE_UINT64
-    dims: [ 1 ]
-    reshape: { shape: [ ] }
-    optional: true
-  }
-]
-output [
-  {
-    name: "output_ids"
-    data_type: TYPE_UINT32
-    dims: [ -1, -1 ]
-  },
-  {
-    name: "sequence_length"
-    data_type: TYPE_UINT32
-    dims: [ -1 ]
-  },
-  {
-    name: "cum_log_probs"
-    data_type: TYPE_FP32
-    dims: [ -1 ]
-  },
-  {
-    name: "output_log_probs"
-    data_type: TYPE_FP32
-    dims: [ -1, -1 ]
-  }
-]
-
-parameters {
-  key: "pipeline_para_size"
-  value: {
-    string_value: "1"
-  }
-}
-parameters {
-  key: "data_type"
-  value: {
-    string_value: "fp16"
-  }
-}
-parameters {
-  key: "model_type"
-  value: {
-    string_value: "Llama"
-  }
-}
-
-parameters {
-  key: "enable_custom_all_reduce"
-  value: {
-    string_value: "0"
-  }
-}
diff --git a/lmdeploy/serve/turbomind/triton_models/postprocessing/1/model.py b/lmdeploy/serve/turbomind/triton_models/postprocessing/1/model.py
deleted file mode 100644
index 20de97595195da5dedc044a31c6086c1f49892da..0000000000000000000000000000000000000000
--- a/lmdeploy/serve/turbomind/triton_models/postprocessing/1/model.py
+++ /dev/null
@@ -1,129 +0,0 @@
-# Copyright (c) OpenMMLab. All rights reserved.
-import json
-import os.path as osp
-from pathlib import Path
-
-import numpy as np
-import triton_python_backend_utils as pb_utils
-
-# This tokenizer is `lmdeploy/turbomind/tokenizer.py`. When an LLM is served
-# by triton inference server, it has to be converted first by running
-# `python lmdeploy/serve/turbomind/deploy.py`. Then
-# `lmdeploy/turbomind/tokenizer.py` will be copied to `tokenizer/tokenizer.py`
-from .tokenizer.tokenizer import Tokenizer
-
-
-class TritonPythonModel:
-    """Your Python model must use the same class name.
-
-    Every Python model that is created must have "TritonPythonModel" as the
-    class name.
-    """
-
-    def initialize(self, args):
-        """`initialize` is called only once when the model is being loaded.
-        Implementing `initialize` function is optional. This function allows
-        the model to initialize any state associated with this model.
-        Parameters
-        ----------
-        args : dict
-          Both keys and values are strings. The dictionary keys and values are:
-          * model_config: A JSON string containing the model configuration
-          * model_instance_kind: A string containing model instance kind
-          * model_instance_device_id: A string containing model instance device
-          ID
-          * model_repository: Model repository path
-          * model_version: Model version
-          * model_name: Model name
-        """
-        # Parse model configs
-        self.model_config = model_config = json.loads(args['model_config'])
-
-        # Parse model output configs
-        output_config = pb_utils.get_output_config_by_name(
-            model_config, 'OUTPUT')
-
-        # Convert Triton types to numpy types
-        self.output_dtype = pb_utils.triton_string_to_numpy(
-            output_config['data_type'])
-
-        cur_folder = Path(__file__).parent
-
-        self.tokenizer = Tokenizer(
-            osp.join(
-                cur_folder, self.model_config['parameters']['tokenizer_path']
-                ['string_value']))
-
-    def execute(self, requests):
-        """`execute` must be implemented in every Python model. `execute`
-        function receives a list of pb_utils.InferenceRequest as the only
-        argument. This function is called when an inference is requested
-        for this model. Depending on the batching configuration (e.g. Dynamic
-        Batching) used, `requests` may contain multiple requests. Every
-        Python model, must create one pb_utils.InferenceResponse for every
-        pb_utils.InferenceRequest in `requests`. If there is an error, you can
-        set the error argument when creating a pb_utils.InferenceResponse.
-        Parameters
-        ----------
-        requests : list
-          A list of pb_utils.InferenceRequest
-        Returns
-        -------
-        list
-          A list of pb_utils.InferenceResponse. The length of this list must
-          be the same as `requests`
-        """
-
-        responses = []
-
-        # Every Python backend must iterate over everyone of the requests
-        # and create a pb_utils.InferenceResponse for each of them.
-        for idx, request in enumerate(requests):
-            # Get input tensors
-            tokens_batch = pb_utils.get_input_tensor_by_name(
-                request, 'TOKENS_BATCH').as_numpy()
-            sequence_length = pb_utils.get_input_tensor_by_name(
-                request, 'sequence_length').as_numpy()
-
-            # Postprocessing output data.
-            outputs = self._postprocessing(tokens_batch.tolist(),
-                                           sequence_length)
-
-            # Create output tensors. You need pb_utils.Tensor
-            # objects to create pb_utils.InferenceResponse.
-            output_tensor = pb_utils.Tensor(
-                'OUTPUT',
-                np.array(outputs).astype(self.output_dtype))
-
-            # Create InferenceResponse. You can set an error here in case
-            # there was a problem with handling this inference request.
-            # Below is an example of how you can set errors in inference
-            # response:
-            #
-            # pb_utils.InferenceResponse(
-            #    output_tensors=..., TritonError("An error occurred"))
-            inference_response = pb_utils.InferenceResponse(
-                output_tensors=[output_tensor])
-            responses.append(inference_response)
-
-        # You should return a list of pb_utils.InferenceResponse. Length
-        # of this list must match the length of `requests` list.
-        return responses
-
-    def finalize(self):
-        """`finalize` is called only once when the model is being unloaded.
-
-        Implementing `finalize` function is optional. This function allows the
-        model to perform any necessary clean ups before exit.
-        """
-        print('Cleaning up...')
-
-    def _postprocessing(self, tokens_batch, sequence_length):
-        """decode token ids into texts."""
-        outputs = []
-        for beam_tokens, beam_len in zip(tokens_batch, sequence_length):
-            for tokens, _len in zip(beam_tokens, beam_len):
-                output = self.tokenizer.decode(tokens, _len)
-                output = output.encode('utf8')
-                outputs.append(output)
-        return outputs
diff --git a/lmdeploy/serve/turbomind/triton_models/postprocessing/config.pbtxt b/lmdeploy/serve/turbomind/triton_models/postprocessing/config.pbtxt
deleted file mode 100644
index a4c3fd1041dcd03dc5c18b3fc28533cb82ac5653..0000000000000000000000000000000000000000
--- a/lmdeploy/serve/turbomind/triton_models/postprocessing/config.pbtxt
+++ /dev/null
@@ -1,36 +0,0 @@
-name: "postprocessing"
-backend: "python"
-max_batch_size: 1
-input [
-  {
-    name: "TOKENS_BATCH"
-    data_type: TYPE_UINT32
-    dims: [ -1, -1 ]
-  },
-  {
-    name: "sequence_length"
-    data_type: TYPE_UINT32
-    dims: [ -1 ]
-  }
-]
-output [
-  {
-    name: "OUTPUT"
-    data_type: TYPE_STRING
-    dims: [ -1, -1 ]
-  }
-]
-
-instance_group [
-    {
-        count: 16
-        kind: KIND_CPU
-    }
-]
-
-parameters {
-  key: "tokenizer_path"
-  value: {
-    string_value: "tokenizer/tokenizer.model"
-  }
-}
diff --git a/lmdeploy/serve/turbomind/triton_models/preprocessing/1/model.py b/lmdeploy/serve/turbomind/triton_models/preprocessing/1/model.py
deleted file mode 100644
index 77f51bfb3d03e4ccd1eee656eada1744ae19805a..0000000000000000000000000000000000000000
--- a/lmdeploy/serve/turbomind/triton_models/preprocessing/1/model.py
+++ /dev/null
@@ -1,151 +0,0 @@
-# Copyright (c) OpenMMLab. All rights reserved.
-import json
-import os.path as osp
-from pathlib import Path
-
-import numpy as np
-import torch
-import triton_python_backend_utils as pb_utils
-from torch.nn.utils.rnn import pad_sequence
-
-# This tokenizer is `lmdeploy/turbomind/tokenizer.py`. When an LLM is served
-# by triton inference server, it has to be converted first by running
-# `python lmdeploy/serve/turbomind/deploy.py`. Then
-# `lmdeploy/turbomind/tokenizer.py` will be copied to `tokenizer/tokenizer.py`
-from .tokenizer.tokenizer import Tokenizer
-
-
-class TritonPythonModel:
-    """Your Python model must use the same class name.
-
-    Every Python model that is created must have "TritonPythonModel" as the
-    class name.
-    """
-
-    def initialize(self, args):
-        """`initialize` is called only once when the model is being loaded.
-        Implementing `initialize` function is optional. This function allows
-        the model to initialize any state associated with this model.
-        Parameters
-        ----------
-        args : dict
-          Both keys and values are strings. The dictionary keys and values are:
-          * model_config: A JSON string containing the model configuration
-          * model_instance_kind: A string containing model instance kind
-          * model_instance_device_id: A string containing model instance device
-          ID
-          * model_repository: Model repository path
-          * model_version: Model version
-          * model_name: Model name
-        """
-        # Parse model configs
-        self.model_config = model_config = json.loads(args['model_config'])
-
-        # Parse model output configs and convert Triton types to numpy types
-        input_names = [
-            'INPUT_ID', 'REQUEST_INPUT_LEN', 'BAD_WORDS_IDS', 'STOP_WORDS_IDS'
-        ]
-        for input_name in input_names:
-            setattr(
-                self,
-                input_name.lower() + '_dtype',
-                pb_utils.triton_string_to_numpy(
-                    pb_utils.get_output_config_by_name(
-                        model_config, input_name)['data_type']))
-
-        cur_folder = Path(__file__).parent
-        self.tokenizer = Tokenizer(
-            osp.join(
-                cur_folder, self.model_config['parameters']['tokenizer_path']
-                ['string_value']))
-        self.start_id = self.tokenizer.bos_token_id
-        self.end_id = self.tokenizer.eos_token_id
-
-    def execute(self, requests):
-        """`execute` must be implemented in every Python model. `execute`
-        function receives a list of pb_utils.InferenceRequest as the only
-        argument. This function is called when an inference is requested
-        for this model. Depending on the batching configuration (e.g. Dynamic
-        Batching) used, `requests` may contain multiple requests. Every
-        Python model, must create one pb_utils.InferenceResponse for every
-        pb_utils.InferenceRequest in `requests`. If there is an error, you can
-        set the error argument when creating a pb_utils.InferenceResponse.
-        Parameters
-        ----------
-        requests : list
-          A list of pb_utils.InferenceRequest
-        Returns
-        -------
-        list
-          A list of pb_utils.InferenceResponse. The length of this list must
-          be the same as `requests`
-        """
-
-        responses = []
-
-        # Every Python backend must iterate over everyone of the requests
-        # and create a pb_utils.InferenceResponse for each of them.
-        for idx, request in enumerate(requests):
-            # Get input tensors
-            query = pb_utils.get_input_tensor_by_name(request,
-                                                      'QUERY').as_numpy()
-            request_output_len = pb_utils.get_input_tensor_by_name(
-                request, 'REQUEST_OUTPUT_LEN').as_numpy()
-
-            # Preprocessing input data.
-            input_id, request_input_len = self._create_request(query)
-
-            # Create output tensors. You need pb_utils.Tensor
-            # objects to create pb_utils.InferenceResponse.
-            input_id_tensor = pb_utils.Tensor(
-                'INPUT_ID',
-                np.array(input_id).astype(self.input_id_dtype))
-            request_input_len_tensor = pb_utils.Tensor(
-                'REQUEST_INPUT_LEN',
-                np.array(request_input_len).astype(
-                    self.request_input_len_dtype))
-            request_output_len_tensor = pb_utils.Tensor(
-                'REQUEST_OUTPUT_LEN', request_output_len)
-
-            # Create InferenceResponse. You can set an error here in case
-            # there was a problem with handling this inference request.
-            # Below is an example of how you can set errors in inference
-            # response:
-            #
-            # pb_utils.InferenceResponse(
-            #    output_tensors=..., TritonError("An error occurred"))
-            inference_response = pb_utils.InferenceResponse(output_tensors=[
-                input_id_tensor, request_input_len_tensor,
-                request_output_len_tensor
-            ])
-            responses.append(inference_response)
-
-        # You should return a list of pb_utils.InferenceResponse. Length
-        # of this list must match the length of `requests` list.
-        return responses
-
-    def finalize(self):
-        """`finalize` is called only once when the model is being unloaded.
-
-        Implementing `finalize` function is optional. This function allows the
-        model to perform any necessary clean ups before exit.
-        """
-        print('Cleaning up...')
-
-    def _create_request(self, query):
-        """Tokenize prompts and return the token ids and their length.
-
-        Args:
-            query (List[str]): a list of prompt
-        Returns:
-            tuple: token ids and their length
-        """
-        start_ids = [
-            torch.IntTensor(self.tokenizer.encode(s[0].decode()))
-            for s in query
-        ]
-        start_lengths = torch.IntTensor([[len(ids)] for ids in start_ids])
-        start_ids = pad_sequence(start_ids,
-                                 batch_first=True,
-                                 padding_value=self.end_id)
-        return start_ids, start_lengths
diff --git a/lmdeploy/serve/turbomind/triton_models/preprocessing/config.pbtxt b/lmdeploy/serve/turbomind/triton_models/preprocessing/config.pbtxt
deleted file mode 100644
index a87abd98df1e193849122f0b1f3979f20eef3bbf..0000000000000000000000000000000000000000
--- a/lmdeploy/serve/turbomind/triton_models/preprocessing/config.pbtxt
+++ /dev/null
@@ -1,74 +0,0 @@
-name: "preprocessing"
-backend: "python"
-max_batch_size: 1
-
-input [
-    {
-        name: "QUERY"
-        data_type: TYPE_STRING
-        dims: [ -1 ]
-    },
-    {
-        name: "BAD_WORDS_DICT"
-        data_type: TYPE_STRING
-        dims: [ -1 ]
-        optional: true
-    },
-    {
-        name: "STOP_WORDS_DICT"
-        data_type: TYPE_STRING
-        dims: [ -1 ]
-        optional: true
-    },
-    {
-        name: "REQUEST_OUTPUT_LEN"
-        data_type: TYPE_UINT32
-        dims: [ -1 ]
-    }
-]
-output [
-    {
-        name: "INPUT_ID"
-        data_type: TYPE_UINT32
-        dims: [ -1 ]
-    },
-    {
-        name: "REQUEST_INPUT_LEN"
-        data_type: TYPE_UINT32
-        dims: [ 1 ]
-    },
-    {
-        name: "BAD_WORDS_IDS"
-        data_type: TYPE_INT32
-        dims: [ 2, -1 ]
-    },
-    {
-        name: "STOP_WORDS_IDS"
-        data_type: TYPE_INT32
-        dims: [ 2, -1 ]
-    },
-    {
-        name: "REQUEST_OUTPUT_LEN"
-        data_type: TYPE_UINT32
-        dims: [ -1 ]
-    },
-    {
-        name: "PROMPT_LEARNING_TASK_NAME_IDS"
-        data_type: TYPE_UINT32
-        dims: [ 1 ]
-    }
-]
-
-instance_group [
-    {
-        count: 4
-        kind: KIND_CPU
-    }
-]
-
-parameters {
-  key: "tokenizer_path"
-  value: {
-    string_value: "tokenizer/tokenizer.model"
-  }
-}
diff --git a/lmdeploy/serve/turbomind/triton_models/tokenizer/placeholder b/lmdeploy/serve/turbomind/triton_models/tokenizer/placeholder
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/lmdeploy/serve/turbomind/triton_models/weights/config.ini b/lmdeploy/serve/turbomind/triton_models/weights/config.ini
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/lmdeploy/serve/turbomind/utils.py b/lmdeploy/serve/turbomind/utils.py
deleted file mode 100644
index bd1c3a16c2ee201124171bbc4e4cc47d12870434..0000000000000000000000000000000000000000
--- a/lmdeploy/serve/turbomind/utils.py
+++ /dev/null
@@ -1,100 +0,0 @@
-# Copyright (c) OpenMMLab. All rights reserved.
-from typing import List, Union
-
-import numpy as np
-import tritonclient.grpc as grpcclient
-from tritonclient.utils import np_to_triton_dtype
-
-
-def prepare_tensor(name, input_tensor):
-    """Create grpcclient's InferInput instance according to a given tensor."""
-    t = grpcclient.InferInput(name, list(input_tensor.shape),
-                              np_to_triton_dtype(input_tensor.dtype))
-    t.set_data_from_numpy(input_tensor)
-    return t
-
-
-class Preprocessor:
-    """Tokenize prompts.
-
-    Args:
-        tritonserver_addr (str): the communication address of the inference
-          server
-    """
-
-    def __init__(self, tritonserver_addr: str):
-        self.tritonserver_addr = tritonserver_addr
-        self.model_name = 'preprocessing'
-
-    def __call__(self, *args, **kwargs):
-        return self.infer(*args, **kwargs)
-
-    def infer(self, prompts: Union[str, List[str]]) -> tuple:
-        """Tokenize the input prompts.
-
-        Args:
-            prompts(str | List[str]): user's prompt, or a batch prompts
-
-        Returns:
-            Tuple(numpy.ndarray, numpy.ndarray, numpy.ndarray): prompt's token
-            ids, ids' length and requested output length
-        """
-        if isinstance(prompts, str):
-            input0 = [[prompts]]
-        elif isinstance(prompts, List):
-            input0 = [[prompt] for prompt in prompts]
-        else:
-            assert 0, f'str or List[str] prompts are expected but got ' \
-                      f'{type(prompts)}'
-
-        input0_data = np.array(input0).astype(object)
-        output0_len = np.ones_like(input0).astype(np.uint32)
-        inputs = [
-            prepare_tensor('QUERY', input0_data),
-            prepare_tensor('REQUEST_OUTPUT_LEN', output0_len)
-        ]
-
-        with grpcclient.InferenceServerClient(self.tritonserver_addr) as \
-                client:
-            result = client.infer(self.model_name, inputs)
-            output0 = result.as_numpy('INPUT_ID')
-            output1 = result.as_numpy('REQUEST_INPUT_LEN')
-        return output0, output1
-
-
-class Postprocessor:
-    """De-tokenize prompts.
-
-    Args:
-        tritonserver_addr (str): the communication address of the inference
-          server
-    """
-
-    def __init__(self, tritonserver_addr: str):
-        self.tritonserver_addr = tritonserver_addr
-
-    def __call__(self, *args, **kwargs):
-        return self.infer(*args, **kwargs)
-
-    def infer(self, output_ids: np.ndarray, seqlen: np.ndarray):
-        """De-tokenize tokens for text.
-
-        Args:
-            output_ids(np.ndarray): tokens' id
-            seqlen(np.ndarray): sequence length
-
-        Returns:
-            str: decoded tokens
-        """
-        inputs = [
-            prepare_tensor('TOKENS_BATCH', output_ids),
-            prepare_tensor('sequence_length', seqlen)
-        ]
-        inputs[0].set_data_from_numpy(output_ids)
-        inputs[1].set_data_from_numpy(seqlen)
-        model_name = 'postprocessing'
-        with grpcclient.InferenceServerClient(self.tritonserver_addr) \
-                as client:
-            result = client.infer(model_name, inputs)
-            output0 = result.as_numpy('OUTPUT')
-        return output0
diff --git a/lmdeploy/tokenizer.py b/lmdeploy/tokenizer.py
deleted file mode 100644
index 138705dfad81fc1f1b79321f1e2af7951d9709d4..0000000000000000000000000000000000000000
--- a/lmdeploy/tokenizer.py
+++ /dev/null
@@ -1,292 +0,0 @@
-# Copyright (c) OpenMMLab. All rights reserved.
-import json
-import os.path as osp
-from typing import Optional, Sequence, Union
-
-import torch
-
-
-class SentencePieceTokenizer:
-    """Tokenizer of sentencepiece.
-
-    Args:
-        model_file (str): the path of the tokenizer model
-    """
-
-    def __init__(self, model_file: str):
-        from sentencepiece import SentencePieceProcessor
-        self.model = SentencePieceProcessor(model_file=model_file)
-        self._prefix_space_tokens = None
-
-    @property
-    def vocab_size(self):
-        """vocabulary size."""
-        return self.model.vocab_size()
-
-    @property
-    def bos_token_id(self):
-        """begine of the sentence token id."""
-        return self.model.bos_id()
-
-    @property
-    def eos_token_id(self):
-        """end of the sentence token id."""
-        return self.model.eos_id()
-
-    @property
-    def prefix_space_tokens(self):
-        """tokens without prefix space."""
-        if self._prefix_space_tokens is None:
-            vocab = self.model.IdToPiece(list(range(self.vocab_size)))
-            self._prefix_space_tokens = {
-                i
-                for i, tok in enumerate(vocab) if tok.startswith('▁')
-            }
-        return self._prefix_space_tokens
-
-    def _maybe_add_prefix_space(self, tokens, decoded):
-        """maybe add prefix space for incremental decoding."""
-        if len(tokens) and not decoded.startswith(' ') and\
-                tokens[0] in self.prefix_space_tokens:
-            return ' ' + decoded
-        else:
-            return decoded
-
-    def encode(self, s: str):
-        """Tokenize a prompt.
-
-        Args:
-            s (str): a prompt
-        Returns:
-            list[int]: token ids
-        """
-        add_bos = False
-        add_eos = False
-        if s.find('<BOS>') != -1:
-            s = s.replace('<BOS>', '')
-            add_bos = True
-        if s == '<EOS>':
-            s = ''
-            add_eos = True
-        return self.model.Encode(s, add_bos=add_bos, add_eos=add_eos)
-
-    def decode(self, t: Sequence[int], offset: Optional[int] = None):
-        """De-tokenize.
-
-        Args:
-            t (List[int]): a list of token ids
-            offset (int): for incrementally decoding. Default to None, which
-                means not applied.
-        Returns:
-            str: text of decoding tokens
-        """
-        if isinstance(t, torch.Tensor):
-            t = t.tolist()
-        t = t[offset:]
-        out_string = self.model.Decode(t)
-        if offset:
-            out_string = self._maybe_add_prefix_space(t, out_string)
-        return out_string
-
-    def __call__(self, s: Union[str, Sequence[str]]):
-        """Tokenize prompts.
-
-        Args:
-            s (str): prompts
-        Returns:
-            list[int]: token ids
-        """
-        import addict
-        add_bos = False
-        add_eos = False
-
-        input_ids = self.model.Encode(s, add_bos=add_bos, add_eos=add_eos)
-        return addict.Addict(input_ids=input_ids)
-
-
-class HuggingFaceTokenizer:
-    """Tokenizer of sentencepiece.
-
-    Args:
-        model_dir (str): the directory of the tokenizer model
-    """
-
-    def __init__(self, model_dir: str):
-        from transformers import AutoTokenizer
-        model_file = osp.join(model_dir, 'tokenizer.model')
-        backend_tokenizer_file = osp.join(model_dir, 'tokenizer.json')
-        model_file_exists = osp.exists(model_file)
-        if not osp.exists(backend_tokenizer_file) and model_file_exists:
-            print('WARNING: Can not find tokenizer.json. '
-                  'It may take long time to initialize the tokenizer.')
-        self.model = AutoTokenizer.from_pretrained(model_dir,
-                                                   trust_remote_code=True)
-        self._prefix_space_tokens = None
-        # save tokenizer.json to reuse
-        if not osp.exists(backend_tokenizer_file) and model_file_exists:
-            if hasattr(self.model, 'backend_tokenizer'):
-                self.model.backend_tokenizer.save(backend_tokenizer_file)
-
-        if self.model.eos_token_id is None:
-            generation_config_file = osp.join(model_dir,
-                                              'generation_config.json')
-            if osp.exists(generation_config_file):
-                with open(generation_config_file, 'r') as f:
-                    cfg = json.load(f)
-                    self.model.eos_token_id = cfg['eos_token_id']
-            elif hasattr(self.model, 'eod_id'):  # Qwen remote
-                self.model.eos_token_id = self.model.eod_id
-
-    @property
-    def vocab_size(self):
-        """vocabulary size."""
-        return self.model.vocab_size
-
-    @property
-    def bos_token_id(self):
-        """begine of the sentence token id."""
-        return self.model.bos_token_id
-
-    @property
-    def eos_token_id(self):
-        """end of the sentence token id."""
-        return self.model.eos_token_id
-
-    @property
-    def prefix_space_tokens(self):
-        """tokens without prefix space."""
-        if self._prefix_space_tokens is None:
-            vocab = self.model.convert_ids_to_tokens(
-                list(range(self.vocab_size)))
-            self._prefix_space_tokens = {
-                i
-                for i, tok in enumerate(vocab)
-                if tok.startswith('▁' if isinstance(tok, str) else b' ')
-            }
-        return self._prefix_space_tokens
-
-    def _maybe_add_prefix_space(self, tokens, decoded):
-        """maybe add prefix space for incremental decoding."""
-        if len(tokens) and not decoded.startswith(' ') and\
-                tokens[0] in self.prefix_space_tokens:
-            return ' ' + decoded
-        else:
-            return decoded
-
-    def encode(self, s: str):
-        """Tokenize a prompt.
-
-        Args:
-            s (str): a prompt
-        Returns:
-            list[int]: token ids
-        """
-        add_special_tokens = False
-        if s.find('<BOS>') != -1:
-            s = s.replace('<BOS>', '<s>')
-        if s == '<EOS>':
-            s = '</s>'
-        if len(s) == 0:
-            add_special_tokens = True
-        return self.model.encode(s, add_special_tokens=add_special_tokens)
-
-    def decode(self, t: Sequence[int], offset: Optional[int] = None):
-        """De-tokenize.
-
-        Args:
-            t (List[int]): a list of token ids
-            offset (int): for incrementally decoding. Default to None, which
-                means not applied.
-        Returns:
-            str: text of decoding tokens
-        """
-        skip_special_tokens = True
-        t = t[offset:]
-        out_string = self.model.decode(t,
-                                       skip_special_tokens=skip_special_tokens)
-        if offset:
-            out_string = self._maybe_add_prefix_space(t, out_string)
-        return out_string
-
-    def __call__(self, s: Union[str, Sequence[str]]):
-        """Tokenize prompts.
-
-        Args:
-            s (str): prompts
-        Returns:
-            list[int]: token ids
-        """
-        add_special_tokens = False
-        return self.model(s, add_special_tokens=add_special_tokens)
-
-
-class Tokenizer:
-    """Tokenize prompts or de-tokenize tokens into texts.
-
-    Args:
-        model_file (str): the path of the tokenizer model
-    """
-
-    def __init__(self, model_file: str):
-        if model_file.endswith('.model'):
-            model_folder = osp.split(model_file)[0]
-        else:
-            model_folder = model_file
-            model_file = osp.join(model_folder, 'tokenizer.model')
-        tokenizer_config_file = osp.join(model_folder, 'tokenizer_config.json')
-
-        model_file_exists = osp.exists(model_file)
-        config_exists = osp.exists(tokenizer_config_file)
-        use_hf_model = config_exists or not model_file_exists
-
-        if not use_hf_model:
-            self.model = SentencePieceTokenizer(model_file)
-        else:
-            self.model = HuggingFaceTokenizer(model_folder)
-
-    @property
-    def vocab_size(self):
-        """vocabulary size."""
-        return self.model.vocab_size
-
-    @property
-    def bos_token_id(self):
-        """begine of the sentence token id."""
-        return self.model.bos_token_id
-
-    @property
-    def eos_token_id(self):
-        """end of the sentence token id."""
-        return self.model.eos_token_id
-
-    def encode(self, s: str):
-        """Tokenize a prompt.
-
-        Args:
-            s (str): a prompt
-        Returns:
-            list[int]: token ids
-        """
-        return self.model.encode(s)
-
-    def decode(self, t: Sequence[int], offset: Optional[int] = None):
-        """De-tokenize.
-
-        Args:
-            t (List[int]): a list of token ids
-            offset (int): for incrementally decoding. Default to None, which
-                means not applied.
-        Returns:
-            str: text of decoding tokens
-        """
-        return self.model.decode(t, offset)
-
-    def __call__(self, s: Union[str, Sequence[str]]):
-        """Tokenize prompts.
-
-        Args:
-            s (str): prompts
-        Returns:
-            list[int]: token ids
-        """
-        return self.model(s)
diff --git a/lmdeploy/turbomind/__init__.py b/lmdeploy/turbomind/__init__.py
deleted file mode 100644
index b2df77014cf2cbc6d6ba89dd0e6470b7714c8f6d..0000000000000000000000000000000000000000
--- a/lmdeploy/turbomind/__init__.py
+++ /dev/null
@@ -1,4 +0,0 @@
-# Copyright (c) OpenMMLab. All rights reserved.
-from .turbomind import TurboMind
-
-__all__ = ['TurboMind']
diff --git a/lmdeploy/turbomind/chat.py b/lmdeploy/turbomind/chat.py
deleted file mode 100644
index bf0ce7399cd4677cc8934a295e0f16dc99be3e59..0000000000000000000000000000000000000000
--- a/lmdeploy/turbomind/chat.py
+++ /dev/null
@@ -1,161 +0,0 @@
-# Copyright (c) OpenMMLab. All rights reserved.
-import dataclasses
-import os
-import os.path as osp
-import random
-
-from lmdeploy.model import MODELS
-
-os.environ['TM_LOG_LEVEL'] = 'ERROR'
-
-
-@dataclasses.dataclass
-class GenParam:
-    top_p: float
-    top_k: float
-    temperature: float
-    repetition_penalty: float
-    sequence_start: bool = False
-    sequence_end: bool = False
-    step: int = 0
-    request_output_len: int = 512
-
-
-def input_prompt(model_name):
-    """Input a prompt in the consolo interface."""
-    if model_name == 'codellama':
-        print('\nenter !! to end the input >>>\n', end='')
-        sentinel = '!!'
-    else:
-        print('\ndouble enter to end input >>> ', end='')
-        sentinel = ''  # ends when this string is seen
-    return '\n'.join(iter(input, sentinel))
-
-
-def valid_str(string, coding='utf-8'):
-    """decode text according to its encoding type."""
-    invalid_chars = [b'\xef\xbf\xbd']
-    bstr = bytes(string, coding)
-    for invalid_char in invalid_chars:
-        bstr = bstr.replace(invalid_char, b'')
-    ret = bstr.decode(encoding=coding, errors='ignore')
-    return ret
-
-
-def get_gen_param(cap,
-                  sampling_param,
-                  nth_round,
-                  step,
-                  request_output_len=512,
-                  **kwargs):
-    """return parameters used by token generation."""
-    gen_param = GenParam(**dataclasses.asdict(sampling_param),
-                         request_output_len=request_output_len)
-    # Fix me later. turbomind.py doesn't support None top_k
-    if gen_param.top_k is None:
-        gen_param.top_k = 40
-
-    if cap == 'chat':
-        gen_param.sequence_start = (nth_round == 1)
-        gen_param.sequence_end = False
-        gen_param.step = step
-    else:
-        gen_param.sequence_start = True
-        gen_param.sequence_end = True
-        gen_param.step = 0
-    return gen_param
-
-
-def main(model_path,
-         session_id: int = 1,
-         cap: str = 'chat',
-         tp=1,
-         stream_output=True,
-         **kwargs):
-    """An example to perform model inference through the command line
-    interface.
-
-    Args:
-        model_path (str): the path of the deployed model
-        session_id (int): the identical id of a session
-        cap (str): the capability of a model. For example, codellama has
-            the ability among ['completion', 'infilling', 'chat', 'python']
-        tp (int): GPU number used in tensor parallelism
-        stream_output (bool): indicator for streaming output or not
-        **kwarg (dict): other arguments for initializing model's chat template
-    """
-    from lmdeploy import turbomind as tm
-    from lmdeploy.tokenizer import Tokenizer
-
-    tokenizer_model_path = osp.join(model_path, 'triton_models', 'tokenizer')
-    tokenizer = Tokenizer(tokenizer_model_path)
-    tm_model = tm.TurboMind(model_path, eos_id=tokenizer.eos_token_id, tp=tp)
-    generator = tm_model.create_instance()
-
-    nth_round = 1
-    step = 0
-    seed = random.getrandbits(64)
-    model_name = tm_model.model_name
-    model = MODELS.get(model_name)(capability=cap, **kwargs)
-
-    print(f'session {session_id}')
-    while True:
-        prompt = input_prompt(model_name)
-        if prompt == 'exit':
-            exit(0)
-        elif prompt == 'end':
-            prompt = model.get_prompt('', nth_round == 1)
-            input_ids = tokenizer.encode(prompt)
-            for outputs in generator.stream_infer(session_id=session_id,
-                                                  input_ids=[input_ids],
-                                                  request_output_len=512,
-                                                  sequence_start=False,
-                                                  sequence_end=True,
-                                                  stream_output=stream_output):
-                pass
-            nth_round = 1
-            step = 0
-            seed = random.getrandbits(64)
-        else:
-            prompt = model.get_prompt(prompt, nth_round == 1)
-            input_ids = tokenizer.encode(prompt)
-            if step + len(input_ids) >= tm_model.session_len:
-                print('WARNING: exceed session max length.'
-                      ' Please end the session.')
-                continue
-
-            gen_param = get_gen_param(cap, model.sampling_param, nth_round,
-                                      step, **kwargs)
-
-            print(f'{prompt} ', end='', flush=True)
-            response_size = 0
-            for outputs in generator.stream_infer(
-                    session_id=session_id,
-                    input_ids=[input_ids],
-                    stream_output=stream_output,
-                    **dataclasses.asdict(gen_param),
-                    ignore_eos=False,
-                    random_seed=seed if nth_round == 1 else None):
-                res, tokens = outputs[0]
-                # decode res
-                response = tokenizer.decode(res.tolist(), offset=response_size)
-                # utf-8 char at the end means it's a potential unfinished
-                # byte sequence, continue to concate it with the next
-                # sequence and decode them together
-                if response.endswith('�'):
-                    continue
-                response = valid_str(response)
-                print(f'{response}', end='', flush=True)
-                response_size = tokens
-
-            # update step
-            step += len(input_ids) + tokens
-            print()
-
-            nth_round += 1
-
-
-if __name__ == '__main__':
-    import fire
-
-    fire.Fire(main)
diff --git a/lmdeploy/turbomind/decode.py b/lmdeploy/turbomind/decode.py
deleted file mode 100644
index 5ba4675c594c0feed4163e1d199b7f8a5e4185d8..0000000000000000000000000000000000000000
--- a/lmdeploy/turbomind/decode.py
+++ /dev/null
@@ -1,41 +0,0 @@
-# Copyright (c) OpenMMLab. All rights reserved.
-import os
-import os.path as osp
-
-import torch
-
-from lmdeploy import turbomind as tm
-from lmdeploy.tokenizer import Tokenizer
-
-os.environ['TM_LOG_LEVEL'] = 'ERROR'
-
-
-def main(model_path, inputs):
-    """An example to perform model inference through the command line
-    interface.
-
-    Args:
-        model_path (str): the path of the deployed model
-        inputs (str): the path of text file contatin input text lines
-    """
-    tokenizer_model_path = osp.join(model_path, 'triton_models', 'tokenizer')
-    tokenizer = Tokenizer(tokenizer_model_path)
-    tm_model = tm.TurboMind(model_path, eos_id=tokenizer.eos_token_id)
-    generator = tm_model.create_instance()
-
-    with open(inputs, 'r') as f:
-        lines = f.readlines()
-
-    input_ids = [tokenizer.encode(x) for x in lines]
-
-    logits = generator.decode(input_ids)
-
-    top_1 = torch.argmax(logits, -1)
-
-    print(top_1)
-
-
-if __name__ == '__main__':
-    import fire
-
-    fire.Fire(main)
diff --git a/lmdeploy/turbomind/generate_gemm_config.py b/lmdeploy/turbomind/generate_gemm_config.py
deleted file mode 100644
index 9a4f0e8c4df15fb76783f5ab2edf7aad0e20a4a4..0000000000000000000000000000000000000000
--- a/lmdeploy/turbomind/generate_gemm_config.py
+++ /dev/null
@@ -1,33 +0,0 @@
-# Copyright (c) OpenMMLab. All rights reserved.
-
-import subprocess
-
-
-def get_llama_gemm():
-    import os.path as osp
-
-    import lmdeploy
-    lmdeploy_dir = osp.split(lmdeploy.__file__)[0]
-    bin_path = osp.join(lmdeploy_dir, 'bin', 'llama_gemm')
-    assert osp.exists(bin_path), f'{bin_path} not exists'
-    return bin_path
-
-
-def main(head_num: int = 32,
-         size_per_head: int = 128,
-         vocab_size: int = 32000,
-         inter_size: int = 11008,
-         tensor_para_size: int = 1,
-         max_batch_size: int = 64):
-    for bsz in range(1, max_batch_size + 1):
-        subprocess.call(
-            f'{get_llama_gemm()} {bsz} 1 1 {head_num} {size_per_head}'
-            f' {inter_size} {vocab_size} 1 {tensor_para_size}'
-            f' {0 if bsz == 1 else 1}',
-            shell=True)
-
-
-if __name__ == '__main__':
-    import fire
-
-    fire.Fire(main)
diff --git a/lmdeploy/turbomind/turbomind.py b/lmdeploy/turbomind/turbomind.py
deleted file mode 100644
index b63f5dafe30ea0baf148483a7af44c105e5c557c..0000000000000000000000000000000000000000
--- a/lmdeploy/turbomind/turbomind.py
+++ /dev/null
@@ -1,422 +0,0 @@
-# Copyright (c) OpenMMLab. All rights reserved.
-import asyncio
-import os.path as osp
-import sys
-from configparser import ConfigParser
-from contextlib import contextmanager
-from queue import Queue
-from threading import Thread
-from typing import Iterable, List
-
-import numpy as np
-import torch
-from torch.nn.utils.rnn import pad_sequence
-
-import lmdeploy
-from lmdeploy.model import MODELS
-from lmdeploy.tokenizer import Tokenizer
-from lmdeploy.utils import get_logger
-
-# TODO: find another way import _turbomind
-lmdeploy_dir = osp.split(lmdeploy.__file__)[0]
-sys.path.append(osp.join(lmdeploy_dir, 'lib'))
-import _turbomind as _tm  # noqa: E402
-
-
-def _stop_words(stop_words: List[str], tokenizer: Tokenizer):
-    """return list of stop-words to numpy.ndarray."""
-    if stop_words is None:
-        return None
-    assert isinstance(stop_words, List) and \
-           all(isinstance(elem, str) for elem in stop_words), \
-           f'stop_words must be a list but got {type(stop_words)}'
-    stop_words = [tokenizer.encode(stop_word)[-1] for stop_word in stop_words]
-    assert isinstance(stop_words, List) and all(
-        isinstance(elem, int) for elem in stop_words), 'invalid stop_words'
-    # each id in stop_words represents a stop word
-    # refer to https://github.com/fauxpilot/fauxpilot/discussions/165 for
-    # detailed explanation about fastertransformer's stop_words
-    stop_word_offsets = range(1, len(stop_words) + 1)
-    stop_words = np.array([[stop_words, stop_word_offsets]]).astype(np.int32)
-    return stop_words
-
-
-def _np_dict_to_tm_dict(np_dict: dict):
-    """map numpy.ndarray to turbomind's tensor."""
-    ret = _tm.TensorMap()
-    for k, v in np_dict.items():
-        ret[k] = _tm.from_dlpack(v)
-
-    return ret
-
-
-def _tm_dict_to_torch_dict(tm_dict: _tm.TensorMap):
-    """map turbomind's tensor to torch's tensor."""
-    ret = dict()
-    for k, v in tm_dict.items():
-        if v.type == _tm.DataType.TYPE_UINT32:
-            v = v.view(_tm.DataType.TYPE_INT32)
-        ret[k] = torch.from_dlpack(v)
-
-    return ret
-
-
-@contextmanager
-def cuda_ctx(device_id):
-    old_device = torch.cuda.current_device()
-    torch.cuda.set_device(device_id)
-    yield
-    torch.cuda.set_device(old_device)
-
-
-class TurboMind:
-    """LMDeploy's inference engine.
-
-    Args:
-        model_path (str): the path of turbomind's model
-        eos_id (int): eos token id
-        tp (int): tensor parallel
-    """
-
-    def __init__(self, model_path: str, eos_id: int = 2, tp: int = 1):
-        self.eos_id = eos_id
-
-        # TODO: support mpi
-        node_id = 0
-        node_num = 1
-
-        # read meta from model path
-        assert ((tp & (tp - 1) == 0) and tp != 0), 'tp should be 2^n'
-        self.gpu_count = tp
-        self.session_len = 2048
-        data_type = 'fp16'
-        ini_path = osp.join(model_path, 'triton_models/weights/config.ini')
-        with open(ini_path, 'r') as f:
-            parser = ConfigParser()
-            parser.read_file(f)
-            section_name = ''
-            if 'turbomind' in parser:
-                section_name = 'turbomind'
-            elif 'llama' in parser:
-                section_name = 'llama'
-
-            if len(section_name) > 0:
-                tp_cfg = parser.getint(section_name, 'tensor_para_size')
-                self.session_len = parser.getint(section_name, 'session_len')
-                if tp_cfg != 1 and tp_cfg != tp:
-                    get_logger('turbomind').info(
-                        f'found tp={tp_cfg} in config.ini.')
-                    self.gpu_count = tp_cfg
-            self.model_name = parser.get(section_name, 'model_name')
-            data_type = parser.get(section_name, 'weight_type')
-        model = MODELS.get(self.model_name)()
-        tokenizer_model_path = osp.join(model_path, 'triton_models',
-                                        'tokenizer')
-        tokenizer = Tokenizer(tokenizer_model_path)
-        self.stop_words = _stop_words(model.stop_words, tokenizer)
-
-        # params
-        self.node_id = node_id
-        self.node_num = node_num
-        self.world_size = self.node_num * self.gpu_count
-
-        # create model
-        weight_dir = osp.join(model_path, 'triton_models', 'weights')
-        model = _tm.AbstractTransformerModel.create_llama_model(
-            weight_dir, tensor_para_size=self.gpu_count, data_type=data_type)
-        self.model = model
-        self.nccl_params = model.create_nccl_params(self.node_id)
-        torch.cuda.synchronize()
-
-        # create weight
-        def _create_weight(device_id):
-            with cuda_ctx(device_id):
-                rank = self.node_id * self.gpu_count + device_id
-                model.create_shared_weights(device_id, rank)
-
-        threads = []
-        for device_id in range(self.gpu_count):
-            t = Thread(target=_create_weight, args=(device_id, ))
-            t.start()
-            threads.append(t)
-        for t in threads:
-            t.join()
-
-    def create_instance(self, cuda_stream_id=0):
-        """Create a turbomind instance.
-
-        Args:
-            cuda_stream_id(int): identity of a cuda stream
-        Returns:
-            TurboMindInstance: an instance of turbomind
-        """
-        return TurboMindInstance(self, cuda_stream_id)
-
-
-class TurboMindInstance:
-    """Instance of TurboMind.
-
-    Args:
-        tm_model (str): turbomind's model path
-        cuda_stream_id(int): identity of a cuda stream
-    """
-
-    def __init__(self, tm_model, cuda_stream_id=0):
-        self.tm_model = tm_model
-        self.cuda_stream_id = cuda_stream_id
-
-        self.node_id = tm_model.node_id
-        self.gpu_count = tm_model.gpu_count
-
-        self.stop_words = tm_model.stop_words
-        self.stop_tokens = [] if self.stop_words is None else \
-            self.stop_words.flatten().tolist()
-        self.eos_id = tm_model.eos_id
-        self.session_len = tm_model.session_len
-
-        self.nccl_params = tm_model.nccl_params
-        self.instance_comm = tm_model.model.create_instance_comm(
-            self.gpu_count)
-
-        # create model instances
-        model_insts = [None] * self.gpu_count
-        threads = []
-        for device_id in range(self.gpu_count):
-            t = Thread(target=self._create_model_instance,
-                       args=(device_id, model_insts))
-            t.start()
-            threads.append(t)
-        for t in threads:
-            t.join()
-
-        self.model_insts = model_insts
-        self.que = Queue()
-        self.threads = [None] * self.gpu_count
-
-    def _create_model_instance(self, device_id, model_insts):
-        with cuda_ctx(device_id):
-            rank = self.node_id * self.gpu_count + device_id
-            model_inst = self.tm_model.model.create_model_instance(
-                device_id, rank, self.cuda_stream_id, self.nccl_params)
-            model_insts[device_id] = model_inst
-
-    def _forward_callback(self, result, ctx):
-        self.que.put((False, result))
-
-    def _forward_thread(self, inputs):
-
-        def _func(device_id, enque_output):
-            with cuda_ctx(device_id):
-                output = self.model_insts[device_id].forward(
-                    inputs, self.instance_comm)
-                if enque_output:
-                    self.que.put((True, output))
-
-        for device_id in range(self.gpu_count):
-            t = Thread(target=_func, args=(device_id, device_id == 0))
-            t.start()
-            self.threads[device_id] = t
-
-    async def async_stream_infer(self, *args, **kwargs):
-        """Async wrapper of self.stream_infer."""
-        for output in self.stream_infer(*args, **kwargs):
-            # Allow the pipeline add new requests into the queue.
-            await asyncio.sleep(0)
-            yield output
-
-    def stream_infer(self,
-                     session_id,
-                     input_ids,
-                     request_output_len: int = 512,
-                     sequence_start: bool = True,
-                     sequence_end: bool = False,
-                     step=0,
-                     stop=False,
-                     top_p=0.8,
-                     top_k=40,
-                     temperature=0.8,
-                     repetition_penalty=1.0,
-                     ignore_eos=False,
-                     random_seed=None,
-                     stream_output=False):
-        """Perform model inference.
-
-        Args:
-            session_id (int): the id of a session
-            input_ids (numpy.ndarray): the token ids of a prompt
-            request_output_len (int): the max number of to-be-generated tokens
-            sequence_start (bool): indicator for starting a sequence
-            sequence_end (bool): indicator for ending a sequence
-            step (int): the offset of the k/v cache
-            stop (bool): indicator for cancelling the session
-            top_p (float): If set to float < 1, only the smallest set of most
-              probable tokens with probabilities that add up to top_p or higher
-            are kept for generation.
-            top_k (int): The number of the highest probability vocabulary
-              tokens to keep for top-k-filtering
-            temperature (float): to modulate the next token probability
-            repetition_penalty (float): The parameter for repetition penalty.
-              1.0 means no penalty
-            ignore_eos (bool): indicator for ignoring eos
-            random_seed (int): seed used by sampling
-            stream_output (bool): indicator for stream output
-        """
-        if stream_output:
-            self.model_insts[0].register_callback(self._forward_callback)
-
-        if len(input_ids) == 0:
-            input_ids = []
-        if isinstance(input_ids[0], int):
-            input_ids = [input_ids]
-
-        batch_size = len(input_ids)
-
-        def _broadcast_np(data, dtype, shape=(batch_size, )):
-            if isinstance(data, Iterable):
-                assert len(data) == batch_size
-                return data
-
-            return np.full(shape, data, dtype=dtype)
-
-        input_ids = [torch.IntTensor(ids) for ids in input_ids]
-        input_lengths = torch.IntTensor([len(ids) for ids in input_ids])
-        input_ids = pad_sequence(input_ids,
-                                 batch_first=True,
-                                 padding_value=self.eos_id)
-
-        if isinstance(session_id, int):
-            session_id = [session_id]
-        assert len(session_id) == batch_size
-
-        step = _broadcast_np(step, np.int32)
-
-        inputs = dict(
-            input_ids=input_ids,
-            input_lengths=input_lengths,
-            request_output_len=np.full(input_lengths.shape,
-                                       request_output_len,
-                                       dtype=np.uint32),
-            runtime_top_k=_broadcast_np(top_k, np.uint32),
-            runtime_top_p=_broadcast_np(top_p, np.float32),
-            temperature=_broadcast_np(temperature, np.float32),
-            repetition_penalty=_broadcast_np(repetition_penalty, np.float32),
-            step=step,
-
-            # session input
-            session_len=self.session_len *
-            np.ones([
-                batch_size,
-            ], dtype=np.uint32),
-            START=_broadcast_np((1 if sequence_start else 0), np.int32),
-            END=_broadcast_np((1 if sequence_end else 0), np.int32),
-            CORRID=np.array(session_id, dtype=np.uint64),
-            STOP=_broadcast_np((1 if stop else 0), np.int32))
-
-        if ignore_eos:
-            stop_words = None
-            bad_words = torch.tensor([[[self.eos_id], [1]]], dtype=torch.int32)
-        else:
-            stop_words = self.stop_words
-            bad_words = None
-
-        if stop_words is not None:
-            inputs['stop_words_list'] = stop_words
-        if bad_words is not None:
-            inputs['bad_words_list'] = bad_words
-
-        if random_seed is not None:
-            inputs['random_seed'] = _broadcast_np(random_seed, np.uint64)
-        tm_inputs = _np_dict_to_tm_dict(inputs)
-
-        # start forward thread
-        self._forward_thread(tm_inputs)
-
-        seq_start = input_lengths + input_lengths.new_tensor(step)
-
-        # generator
-        while True:
-            while self.que.qsize() > 1:
-                self.que.get()
-
-            finish, tm_outputs = self.que.get()
-
-            outputs = _tm_dict_to_torch_dict(tm_outputs)
-
-            output_ids = outputs['output_ids'][:, 0, :]
-            sequence_length = outputs['sequence_length'].long()[:, 0].cpu()
-            output_ids = [
-                output_id[s:l] for output_id, s, l in zip(
-                    output_ids, seq_start, sequence_length)
-            ]
-            sequence_length -= seq_start.to(sequence_length.device)
-
-            outputs = []
-            for output, len_ in zip(output_ids, sequence_length):
-                output, len_ = output, len_.item()
-                if len(output) > 0 and output[-1].item() == self.eos_id:
-                    outputs.append((output[:-1], len_ - 1))
-                elif len(output) > 0 and output[-1].item() in self.stop_tokens:
-                    outputs.append((output[:-1], len_))
-                else:
-                    outputs.append((output, len_))
-
-            yield outputs
-
-            if finish:
-                for t in self.threads:
-                    t.join()
-                while self.que.qsize() > 0:
-                    self.que.get()
-                break
-
-        if stream_output:
-            self.model_insts[0].unregister_callback()
-
-    def decode(self, input_ids):
-        """Perform context decode on input tokens.
-
-        Args:
-            input_ids (numpy.ndarray): the batch of input token ids
-        """
-
-        if len(input_ids) == 0:
-            input_ids = []
-        if isinstance(input_ids[0], int):
-            input_ids = [input_ids]
-
-        # append an extra token since input_len-1 tokens will be
-        # decoded by context decoder
-        for inputs in input_ids:
-            inputs.append(0)
-
-        batch_size = len(input_ids)
-
-        def _broadcast_np(data, dtype, shape=(batch_size, )):
-            if isinstance(data, Iterable):
-                assert len(data) == batch_size
-                return data
-
-            return np.full(shape, data, dtype=dtype)
-
-        input_ids = [torch.IntTensor(ids) for ids in input_ids]
-        input_lengths = torch.IntTensor([len(ids) for ids in input_ids])
-        input_ids = pad_sequence(input_ids,
-                                 batch_first=True,
-                                 padding_value=self.eos_id)
-
-        inputs = dict(input_ids=input_ids,
-                      input_lengths=input_lengths,
-                      request_output_len=_broadcast_np(0, dtype=np.uint32),
-                      is_return_logits=_broadcast_np(1, np.uint32))
-
-        tm_inputs = _np_dict_to_tm_dict(inputs)
-
-        # start forward thread
-        self._forward_thread(tm_inputs)
-
-        _, tm_outputs = self.que.get()
-
-        outputs = _tm_dict_to_torch_dict(tm_outputs)
-        logits = outputs['logits']
-
-        return logits[:, :-1, :]
diff --git a/lmdeploy/utils.py b/lmdeploy/utils.py
deleted file mode 100644
index e284f50075e369ff14d246e35660b75fa3001f6d..0000000000000000000000000000000000000000
--- a/lmdeploy/utils.py
+++ /dev/null
@@ -1,97 +0,0 @@
-# Copyright (c) OpenMMLab. All rights reserved.
-import logging
-from typing import List, Optional
-
-logger_initialized = {}
-
-
-def get_logger(name: str,
-               log_file: Optional[str] = None,
-               log_level: int = logging.INFO,
-               file_mode: str = 'w'):
-    """Initialize and get a logger by name.
-
-    If the logger has not been initialized, this method will initialize the
-    logger by adding one or two handlers, otherwise the initialized logger will
-    be directly returned. During initialization, a StreamHandler will always be
-    added. If `log_file` is specified, a FileHandler will also be added.
-    Args:
-        name (str): Logger name.
-        log_file (str | None): The log filename. If specified, a FileHandler
-            will be added to the logger.
-        log_level (int): The logger level.
-        file_mode (str): The file mode used in opening log file.
-            Defaults to 'w'.
-    Returns:
-        logging.Logger: The expected logger.
-    """
-    # use logger in mmengine if exists.
-    try:
-        from mmengine.logging import MMLogger
-        if MMLogger.check_instance_created(name):
-            logger = MMLogger.get_instance(name)
-        else:
-            logger = MMLogger.get_instance(name,
-                                           logger_name=name,
-                                           log_file=log_file,
-                                           log_level=log_level,
-                                           file_mode=file_mode)
-        return logger
-
-    except Exception:
-        pass
-
-    logger = logging.getLogger(name)
-    if name in logger_initialized:
-        return logger
-    # handle hierarchical names
-    # e.g., logger "a" is initialized, then logger "a.b" will skip the
-    # initialization since it is a child of "a".
-    for logger_name in logger_initialized:
-        if name.startswith(logger_name):
-            return logger
-
-    # handle duplicate logs to the console
-    for handler in logger.root.handlers:
-        if type(handler) is logging.StreamHandler:
-            handler.setLevel(logging.ERROR)
-
-    stream_handler = logging.StreamHandler()
-    handlers = [stream_handler]
-
-    if log_file is not None:
-        # Here, the default behaviour of the official logger is 'a'. Thus, we
-        # provide an interface to change the file mode to the default
-        # behaviour.
-        file_handler = logging.FileHandler(log_file, file_mode)
-        handlers.append(file_handler)
-
-    formatter = logging.Formatter(
-        '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
-    for handler in handlers:
-        handler.setFormatter(formatter)
-        handler.setLevel(log_level)
-        logger.addHandler(handler)
-
-    logger.setLevel(log_level)
-    logger_initialized[name] = True
-
-    return logger
-
-
-def filter_suffix(response: str, suffixes: Optional[List[str]] = None) -> str:
-    """Filter response with suffixes.
-
-    Args:
-        response (str): generated response by LLMs.
-        suffixes (str): a list of suffixes to be deleted.
-
-    Return:
-        str: a clean response.
-    """
-    if suffixes is None:
-        return response
-    for item in suffixes:
-        if response.endswith(item):
-            response = response[:len(response) - len(item)]
-    return response
diff --git a/lmdeploy/version.py b/lmdeploy/version.py
deleted file mode 100644
index 61278255afd6815af579a18c0114b2283b5aa522..0000000000000000000000000000000000000000
--- a/lmdeploy/version.py
+++ /dev/null
@@ -1,30 +0,0 @@
-# Copyright (c) OpenMMLab. All rights reserved.
-from typing import Tuple
-
-__version__ = '0.0.13'
-short_version = __version__
-
-
-def parse_version_info(version_str: str) -> Tuple:
-    """Parse version from a string.
-
-    Args:
-        version_str (str): A string represents a version info.
-
-    Returns:
-        tuple: A sequence of integer and string represents version.
-    """
-    _version_info = []
-    for x in version_str.split('.'):
-        if x.isdigit():
-            _version_info.append(int(x))
-        elif x.find('rc') != -1:
-            patch_version = x.split('rc')
-            _version_info.append(int(patch_version[0]))
-            _version_info.append(f'rc{patch_version[1]}')
-    return tuple(_version_info)
-
-
-version_info = parse_version_info(__version__)
-
-__all__ = ['__version__', 'version_info', 'parse_version_info']
diff --git a/requirements.txt b/requirements.txt
deleted file mode 100644
index 861623c040b7f787c44a38b40c0ad99513ac5d7c..0000000000000000000000000000000000000000
--- a/requirements.txt
+++ /dev/null
@@ -1,17 +0,0 @@
-accelerate
-datasets
-fastapi
-fire
-gradio
-mmengine
-numpy
-pybind11
-safetensors
-sentencepiece
-setuptools
-shortuuid
-tiktoken
-torch
-transformers>=4.33.0
-tritonclient[all]
-uvicorn
diff --git a/requirements/docs.txt b/requirements/docs.txt
deleted file mode 100644
index bdffca9613a665f04bb92a3ab1e208122c2a9b05..0000000000000000000000000000000000000000
--- a/requirements/docs.txt
+++ /dev/null
@@ -1,11 +0,0 @@
-docutils==0.16.0
-m2r==0.2.1
-markdown>=3.4.0
-mistune==0.8.4
-myst-parser
--e git+https://github.com/open-mmlab/pytorch_sphinx_theme.git#egg=pytorch_sphinx_theme
-recommonmark
-sphinx==4.0.2
-sphinx-copybutton
-sphinx_markdown_tables>=0.0.16
-sphinxcontrib-mermaid
diff --git a/requirements/readthedocs.txt b/requirements/readthedocs.txt
deleted file mode 100644
index 2f4885a20b5b2f1a3f08e326f09e196a76fa45d8..0000000000000000000000000000000000000000
--- a/requirements/readthedocs.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-mmengine
-torch
-transformers
-urllib3<2.0.0
diff --git a/resources/batch_memory.png b/resources/batch_memory.png
deleted file mode 100644
index 85dd14ba2b322181b63cb1768575bfe9841ee756..0000000000000000000000000000000000000000
Binary files a/resources/batch_memory.png and /dev/null differ
diff --git a/resources/lmdeploy-logo.svg b/resources/lmdeploy-logo.svg
deleted file mode 100644
index 7448d225d889094899bdd1ff3c40e63072c8df45..0000000000000000000000000000000000000000
--- a/resources/lmdeploy-logo.svg
+++ /dev/null
@@ -1 +0,0 @@
-<svg width="724" height="169" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" xml:space="preserve" overflow="hidden"><defs><clipPath id="clip0"><rect x="290" y="255" width="724" height="169"/></clipPath><linearGradient x1="515.209" y1="187.434" x2="675.945" y2="480.272" gradientUnits="userSpaceOnUse" spreadMethod="pad" id="fill1"><stop offset="0" stop-color="#9C8BFE"/><stop offset="1" stop-color="#2B50FF"/></linearGradient><linearGradient x1="366.983" y1="280.208" x2="358.966" y2="161.282" gradientUnits="userSpaceOnUse" spreadMethod="pad" id="fill2"><stop offset="0" stop-color="#E3AFFE"/><stop offset="1" stop-color="#2B50FF"/></linearGradient><linearGradient x1="339.833" y1="251.78" x2="336.655" y2="198.744" gradientUnits="userSpaceOnUse" spreadMethod="pad" id="fill3"><stop offset="0" stop-color="#748DFA"/><stop offset="1" stop-color="#C1B8FF"/></linearGradient><linearGradient x1="366.61" y1="199.406" x2="331.082" y2="291.3" gradientUnits="userSpaceOnUse" spreadMethod="pad" id="fill4"><stop offset="0" stop-color="#DBABFE"/><stop offset="1" stop-color="#C8F2FF"/></linearGradient><linearGradient x1="369.17" y1="198.557" x2="335.983" y2="245.993" gradientUnits="userSpaceOnUse" spreadMethod="pad" id="stroke5"><stop offset="0" stop-color="#FFFFFF"/><stop offset="0.46875" stop-color="#FFFFFF" stop-opacity="0"/><stop offset="1" stop-color="#FFFFFF" stop-opacity="0"/></linearGradient><linearGradient x1="378.752" y1="221.569" x2="411.083" y2="175.73" gradientUnits="userSpaceOnUse" spreadMethod="pad" id="stroke6"><stop offset="0" stop-color="#FFFFFF"/><stop offset="0.46875" stop-color="#FFFFFF" stop-opacity="0"/><stop offset="1" stop-color="#FFFFFF" stop-opacity="0"/></linearGradient><linearGradient x1="405.519" y1="173.592" x2="409.26" y2="222.227" gradientUnits="userSpaceOnUse" spreadMethod="pad" id="fill7"><stop offset="0" stop-color="#DBABFE"/><stop offset="1" stop-color="#B1E8FA"/></linearGradient><linearGradient x1="356.715" y1="253.912" x2="350.448" y2="271.193" gradientUnits="userSpaceOnUse" spreadMethod="pad" id="stroke8"><stop offset="0" stop-color="#AA5FE6" stop-opacity="0"/><stop offset="1" stop-color="#2E75FE"/></linearGradient><linearGradient x1="350.864" y1="235.329" x2="339.765" y2="259.744" gradientUnits="userSpaceOnUse" spreadMethod="pad" id="stroke9"><stop offset="0" stop-color="#AA5FE6" stop-opacity="0"/><stop offset="1" stop-color="#2E75FE"/></linearGradient><linearGradient x1="353.774" y1="211.139" x2="340.952" y2="235.597" gradientUnits="userSpaceOnUse" spreadMethod="pad" id="stroke10"><stop offset="0" stop-color="#AA5FE6" stop-opacity="0"/><stop offset="1" stop-color="#2E75FE"/></linearGradient></defs><g clip-path="url(#clip0)" transform="translate(-290 -255)"><path d="M0 0 1280.24 0 1280.24 463.908 0 463.908Z" fill="none" transform="matrix(1 0 0 1.00081 -0.255482 128.069)"/><path d="M589.722 261.071 569.151 213.627C567.428 209.675 565.705 205.513 563.982 201.142 563.034 198.739 562.087 196.272 561.14 193.743L560.908 193.122 560.765 192.739 560.606 192.309 560.117 192.309 560.127 192.486 560.156 193.058 560.166 193.275C560.704 203.942 560.972 213.736 560.972 222.652L560.972 261.071 551.023 261.071 551.023 181.62 565.367 181.62 584.594 226.572C586.654 231.396 588.911 237.144 591.365 243.812L591.858 245.158 592.163 245.158 592.21 245.03 592.302 244.777 592.408 244.486C595.227 236.778 597.568 230.803 599.427 226.572L618.654 181.62 632.998 181.62 632.998 261.071 623.049 261.071 623.049 222.652C623.049 214.228 623.299 204.812 623.8 194.403L623.855 193.272 623.866 193.058 623.894 192.486 623.904 192.309 623.415 192.309 623.146 193.032 623.114 193.119C620.214 200.895 617.465 207.736 614.869 213.627L594.3 261.071 589.722 261.071ZM718.209 234.053C719.389 229.975 719.979 225.622 719.979 220.99 719.979 216.398 719.389 212.121 718.209 208.164 717.07 204.165 715.32 200.582 712.96 197.415 710.64 194.209 707.69 191.439 704.109 189.102 700.569 186.766 696.418 184.945 691.658 183.639 688.93 182.886 685.981 182.371 682.807 182.095 681.362 181.951 679.864 181.839 678.31 181.761 676.45 181.667 674.509 181.62 672.491 181.62L653.813 181.62 653.813 261.071 672.308 261.071C674.622 261.071 676.833 261.017 678.941 260.909L679.099 260.902 679.955 260.853C680.864 260.798 681.754 260.732 682.623 260.656 685.797 260.339 688.748 259.806 691.474 259.053 696.235 257.747 700.386 255.905 703.926 253.53 707.507 251.156 710.477 248.344 712.838 245.098 715.239 241.813 717.029 238.132 718.209 234.053ZM704.354 202.403C707.772 207.154 709.481 213.349 709.481 220.99 709.481 228.708 707.772 235.044 704.354 239.992 700.935 244.94 696.032 248.404 689.643 250.383 688.628 250.695 687.564 250.967 686.451 251.198 684.886 251.521 683.223 251.765 681.464 251.927 678.452 252.205 675.054 252.342 671.27 252.342L663.762 252.342 663.762 190.349 671.27 190.349C675.054 190.349 678.452 190.487 681.464 190.764 684.474 191.042 687.202 191.557 689.643 192.309 696.032 194.288 700.935 197.652 704.354 202.403ZM842.824 232.331C842.824 229.046 842.458 226.097 841.724 223.484 841.036 220.871 839.974 218.654 838.552 216.833 837.13 214.973 835.379 213.548 833.299 212.557 832.611 212.23 831.893 211.955 831.129 211.736 830.875 211.663 830.621 211.595 830.359 211.534 829.042 211.227 827.62 211.074 826.101 211.074 824.148 211.074 822.353 211.331 820.729 211.845 819.143 212.359 817.698 213.013 816.396 213.805 815.095 214.596 813.972 215.447 813.037 216.358 812.102 217.269 811.346 218.08 810.777 218.793L810.777 248.305C812.775 250.285 815.027 251.848 817.556 252.995 818.813 253.56 820.115 253.986 821.47 254.274 822.001 254.388 822.547 254.48 823.101 254.55 823.976 254.662 824.867 254.719 825.779 254.719L825.794 254.719C826.752 254.719 827.74 254.628 828.757 254.447 829.64 254.289 830.546 254.062 831.474 253.768 833.464 253.095 835.297 251.927 836.966 250.264 838.672 248.561 840.078 246.266 841.178 243.377 842.271 240.486 842.824 236.804 842.824 232.331ZM852.417 237.663C852.215 239.443 851.908 241.11 851.489 242.663 850.718 245.672 849.678 248.305 848.376 250.561 847.074 252.817 845.548 254.719 843.797 256.262 842.794 257.129 841.769 257.902 840.722 258.584 839.944 259.087 839.165 259.54 838.365 259.943 836.494 260.853 834.586 261.507 832.633 261.902 830.718 262.338 828.907 262.556 827.201 262.556 823.654 262.556 820.587 262.002 817.983 260.893 815.701 259.889 813.628 258.476 811.75 256.654 811.608 256.511 811.466 256.369 811.323 256.223 811.196 256.098 811.077 255.974 810.964 255.847L810.777 255.847 810.777 288.684 801.319 288.684 801.319 204.659 810.471 204.659 810.471 211.074 810.658 211.074C810.83 210.813 811.017 210.556 811.219 210.302 811.638 209.779 812.124 209.263 812.67 208.757 813.523 208.005 814.646 207.213 816.03 206.382 816.703 205.97 817.437 205.586 818.222 205.226 819.105 204.826 820.063 204.459 821.095 204.125 821.904 203.869 822.734 203.664 823.595 203.512 824.859 203.288 826.184 203.175 827.56 203.175 830.823 203.175 833.95 203.71 836.966 204.778 840.018 205.847 842.705 207.55 845.024 209.886 847.344 212.221 849.192 215.23 850.576 218.911 851.998 222.553 852.709 226.987 852.709 232.213 852.709 234.135 852.611 235.951 852.417 237.663ZM935.995 232.925C935.995 229.442 935.546 226.354 934.656 223.662 933.803 220.931 932.583 218.633 930.989 216.773 929.448 214.873 927.592 213.449 925.437 212.498 923.282 211.508 920.918 211.014 918.359 211.014 915.793 211.014 913.436 211.508 911.273 212.498 909.118 213.449 907.248 214.873 905.661 216.773 904.794 217.818 904.03 219.001 903.364 220.32 902.848 221.35 902.392 222.464 902.003 223.662 901.142 226.354 900.716 229.442 900.716 232.925 900.716 236.369 901.142 239.457 902.003 242.189 902.893 244.88 904.135 247.157 905.721 249.018 907.308 250.878 909.178 252.303 911.341 253.293 913.496 254.244 915.853 254.719 918.419 254.719 920.985 254.719 923.32 254.244 925.437 253.293 926.979 252.588 928.356 251.661 929.583 250.514 930.077 250.051 930.548 249.553 930.989 249.018 932.583 247.157 933.803 244.88 934.656 242.189 935.546 239.457 935.995 236.369 935.995 232.925ZM945.887 232.925C945.887 237.359 945.236 241.396 943.934 245.039 942.632 248.681 940.776 251.809 938.382 254.422 936.018 256.994 933.152 258.993 929.77 260.418 926.395 261.844 922.609 262.556 918.419 262.556 914.102 262.556 910.241 261.844 906.821 260.418 904.921 259.617 903.177 258.633 901.584 257.469 901.411 257.339 901.232 257.209 901.06 257.074 900.02 256.27 899.055 255.386 898.157 254.422 895.792 251.809 893.982 248.681 892.717 245.039 892.014 242.991 891.505 240.816 891.191 238.517L891.153 238.216C890.936 236.52 890.831 234.756 890.831 232.925 890.831 228.451 891.482 224.394 892.784 220.752 893.518 218.704 894.415 216.818 895.478 215.096L895.538 215.007C896.353 213.7 897.266 212.488 898.276 211.37 900.678 208.757 903.566 206.738 906.941 205.313 910.36 203.888 914.169 203.175 918.359 203.175 922.632 203.175 926.477 203.888 929.897 205.313 933.309 206.738 936.205 208.757 938.562 211.37 940.919 213.983 942.729 217.111 943.994 220.752 944.458 222.092 944.839 223.489 945.131 224.939L945.191 225.231C945.655 227.641 945.887 230.206 945.887 232.925ZM976.587 259.943 964.196 288.684 973.602 288.684 1009.79 204.659 999.842 204.659 981.593 248.899 981.226 248.899 963.224 204.659 953.519 204.659 976.587 259.943ZM787.896 235.787C785.352 249.711 773.073 260.247 758.79 260.247 742.478 260.247 729.206 246.852 729.206 230.387 729.206 220.419 734.071 211.575 741.531 206.149 746.423 202.538 752.452 200.404 758.962 200.404 771.248 200.404 782.067 207.869 786.527 219.398L788.24 223.81 788.046 223.884 788.068 223.934 742.402 241.925C742.482 242.04 742.561 242.152 742.643 242.264L742.821 242.506C743.147 242.943 743.49 243.366 743.849 243.774 747.55 247.979 752.961 250.636 758.962 250.636 763.361 250.636 767.477 249.173 770.836 246.695 775.326 243.309 778.416 238.091 778.91 232.132L779.067 232.146C779.067 232.138 779.067 232.132 779.067 232.126L779.074 232.008 788.442 232.811C788.367 233.768 788.24 234.712 788.068 235.642L787.896 235.787ZM747.047 213.806C742.027 217.517 738.759 223.504 738.759 230.246 738.759 230.51 738.765 230.775 738.776 231.039L738.782 231.207 738.787 231.304C738.801 231.567 738.82 231.83 738.843 232.09L738.86 232.267 738.901 232.641 738.949 233.037 775.356 218.692C771.637 213.294 765.531 209.998 758.775 209.998 754.405 210.001 750.357 211.413 747.047 213.806ZM535.763 252.342 535.763 261.071 485.955 261.071 485.955 181.62 495.904 181.62 495.904 252.342 535.763 252.342ZM865.743 175.088 875.201 175.088 875.201 261.071 865.743 261.071 865.743 175.088Z" fill="none" fill-rule="evenodd" transform="matrix(1 0 0 1.00081 -0.255482 128.069)"/><path d="M589.722 261.071 569.151 213.627C567.428 209.675 565.705 205.513 563.982 201.142 563.034 198.739 562.087 196.272 561.14 193.743L560.908 193.122 560.765 192.739 560.606 192.309 560.117 192.309 560.127 192.486 560.156 193.058 560.166 193.275C560.704 203.942 560.972 213.736 560.972 222.652L560.972 261.071 551.023 261.071 551.023 181.62 565.367 181.62 584.594 226.572C586.654 231.396 588.911 237.144 591.365 243.812L591.858 245.158 592.163 245.158 592.21 245.03 592.302 244.777 592.408 244.486C595.227 236.778 597.568 230.803 599.427 226.572L618.654 181.62 632.998 181.62 632.998 261.071 623.049 261.071 623.049 222.652C623.049 214.228 623.299 204.812 623.8 194.403L623.855 193.272 623.866 193.058 623.894 192.486 623.904 192.309 623.415 192.309 623.146 193.032 623.114 193.119C620.214 200.895 617.465 207.736 614.869 213.627L594.3 261.071 589.722 261.071ZM718.209 234.053C719.389 229.975 719.979 225.622 719.979 220.99 719.979 216.398 719.389 212.121 718.209 208.164 717.07 204.165 715.32 200.582 712.96 197.415 710.64 194.209 707.69 191.439 704.109 189.102 700.569 186.766 696.418 184.945 691.658 183.639 688.93 182.886 685.981 182.371 682.807 182.095 681.362 181.951 679.864 181.839 678.31 181.761 676.45 181.667 674.509 181.62 672.491 181.62L653.813 181.62 653.813 261.071 672.308 261.071C674.622 261.071 676.833 261.017 678.941 260.909L679.099 260.902 679.955 260.853C680.864 260.798 681.754 260.732 682.623 260.656 685.797 260.339 688.748 259.806 691.474 259.053 696.235 257.747 700.386 255.905 703.926 253.53 707.507 251.156 710.477 248.344 712.838 245.098 715.239 241.813 717.029 238.132 718.209 234.053ZM704.354 202.403C707.772 207.154 709.481 213.349 709.481 220.99 709.481 228.708 707.772 235.044 704.354 239.992 700.935 244.94 696.032 248.404 689.643 250.383 688.628 250.695 687.564 250.967 686.451 251.198 684.886 251.521 683.223 251.765 681.464 251.927 678.452 252.205 675.054 252.342 671.27 252.342L663.762 252.342 663.762 190.349 671.27 190.349C675.054 190.349 678.452 190.487 681.464 190.764 684.474 191.042 687.202 191.557 689.643 192.309 696.032 194.288 700.935 197.652 704.354 202.403ZM842.824 232.331C842.824 229.046 842.458 226.097 841.724 223.484 841.036 220.871 839.974 218.654 838.552 216.833 837.13 214.973 835.379 213.548 833.299 212.557 832.611 212.23 831.893 211.955 831.129 211.736 830.875 211.663 830.621 211.595 830.359 211.534 829.042 211.227 827.62 211.074 826.101 211.074 824.148 211.074 822.353 211.331 820.729 211.845 819.143 212.359 817.698 213.013 816.396 213.805 815.095 214.596 813.972 215.447 813.037 216.358 812.102 217.269 811.346 218.08 810.777 218.793L810.777 248.305C812.775 250.285 815.027 251.848 817.556 252.995 818.813 253.56 820.115 253.986 821.47 254.274 822.001 254.388 822.547 254.48 823.101 254.55 823.976 254.662 824.867 254.719 825.779 254.719L825.794 254.719C826.752 254.719 827.74 254.628 828.757 254.447 829.64 254.289 830.546 254.062 831.474 253.768 833.464 253.095 835.297 251.927 836.966 250.264 838.672 248.561 840.078 246.266 841.178 243.377 842.271 240.486 842.824 236.804 842.824 232.331ZM852.417 237.663C852.215 239.443 851.908 241.11 851.489 242.663 850.718 245.672 849.678 248.305 848.376 250.561 847.074 252.817 845.548 254.719 843.797 256.262 842.794 257.129 841.769 257.902 840.722 258.584 839.944 259.087 839.165 259.54 838.365 259.943 836.494 260.853 834.586 261.507 832.633 261.902 830.718 262.338 828.907 262.556 827.201 262.556 823.654 262.556 820.587 262.002 817.983 260.893 815.701 259.889 813.628 258.476 811.75 256.654 811.608 256.511 811.466 256.369 811.323 256.223 811.196 256.098 811.077 255.974 810.964 255.847L810.777 255.847 810.777 288.684 801.319 288.684 801.319 204.659 810.471 204.659 810.471 211.074 810.658 211.074C810.83 210.813 811.017 210.556 811.219 210.302 811.638 209.779 812.124 209.263 812.67 208.757 813.523 208.005 814.646 207.213 816.03 206.382 816.703 205.97 817.437 205.586 818.222 205.226 819.105 204.826 820.063 204.459 821.095 204.125 821.904 203.869 822.734 203.664 823.595 203.512 824.859 203.288 826.184 203.175 827.56 203.175 830.823 203.175 833.95 203.71 836.966 204.778 840.018 205.847 842.705 207.55 845.024 209.886 847.344 212.221 849.192 215.23 850.576 218.911 851.998 222.553 852.709 226.987 852.709 232.213 852.709 234.135 852.611 235.951 852.417 237.663ZM935.995 232.925C935.995 229.442 935.546 226.354 934.656 223.662 933.803 220.931 932.583 218.633 930.989 216.773 929.448 214.873 927.592 213.449 925.437 212.498 923.282 211.508 920.918 211.014 918.359 211.014 915.793 211.014 913.436 211.508 911.273 212.498 909.118 213.449 907.248 214.873 905.661 216.773 904.794 217.818 904.03 219.001 903.364 220.32 902.848 221.35 902.392 222.464 902.003 223.662 901.142 226.354 900.716 229.442 900.716 232.925 900.716 236.369 901.142 239.457 902.003 242.189 902.893 244.88 904.135 247.157 905.721 249.018 907.308 250.878 909.178 252.303 911.341 253.293 913.496 254.244 915.853 254.719 918.419 254.719 920.985 254.719 923.32 254.244 925.437 253.293 926.979 252.588 928.356 251.661 929.583 250.514 930.077 250.051 930.548 249.553 930.989 249.018 932.583 247.157 933.803 244.88 934.656 242.189 935.546 239.457 935.995 236.369 935.995 232.925ZM945.887 232.925C945.887 237.359 945.236 241.396 943.934 245.039 942.632 248.681 940.776 251.809 938.382 254.422 936.018 256.994 933.152 258.993 929.77 260.418 926.395 261.844 922.609 262.556 918.419 262.556 914.102 262.556 910.241 261.844 906.821 260.418 904.921 259.617 903.177 258.633 901.584 257.469 901.411 257.339 901.232 257.209 901.06 257.074 900.02 256.27 899.055 255.386 898.157 254.422 895.792 251.809 893.982 248.681 892.717 245.039 892.014 242.991 891.505 240.816 891.191 238.517L891.153 238.216C890.936 236.52 890.831 234.756 890.831 232.925 890.831 228.451 891.482 224.394 892.784 220.752 893.518 218.704 894.415 216.818 895.478 215.096L895.538 215.007C896.353 213.7 897.266 212.488 898.276 211.37 900.678 208.757 903.566 206.738 906.941 205.313 910.36 203.888 914.169 203.175 918.359 203.175 922.632 203.175 926.477 203.888 929.897 205.313 933.309 206.738 936.205 208.757 938.562 211.37 940.919 213.983 942.729 217.111 943.994 220.752 944.458 222.092 944.839 223.489 945.131 224.939L945.191 225.231C945.655 227.641 945.887 230.206 945.887 232.925ZM976.587 259.943 964.196 288.684 973.602 288.684 1009.79 204.659 999.842 204.659 981.593 248.899 981.226 248.899 963.224 204.659 953.519 204.659 976.587 259.943ZM787.896 235.787C785.352 249.711 773.073 260.247 758.79 260.247 742.478 260.247 729.206 246.852 729.206 230.387 729.206 220.419 734.071 211.575 741.531 206.149 746.423 202.538 752.452 200.404 758.962 200.404 771.248 200.404 782.067 207.869 786.527 219.398L788.24 223.81 788.046 223.884 788.068 223.934 742.402 241.925C742.482 242.04 742.561 242.152 742.643 242.264L742.821 242.506C743.147 242.943 743.49 243.366 743.849 243.774 747.55 247.979 752.961 250.636 758.962 250.636 763.361 250.636 767.477 249.173 770.836 246.695 775.326 243.309 778.416 238.091 778.91 232.132L779.067 232.146C779.067 232.138 779.067 232.132 779.067 232.126L779.074 232.008 788.442 232.811C788.367 233.768 788.24 234.712 788.068 235.642L787.896 235.787ZM747.047 213.806C742.027 217.517 738.759 223.504 738.759 230.246 738.759 230.51 738.765 230.775 738.776 231.039L738.782 231.207 738.787 231.304C738.801 231.567 738.82 231.83 738.843 232.09L738.86 232.267 738.901 232.641 738.949 233.037 775.356 218.692C771.637 213.294 765.531 209.998 758.775 209.998 754.405 210.001 750.357 211.413 747.047 213.806ZM535.763 252.342 535.763 261.071 485.955 261.071 485.955 181.62 495.904 181.62 495.904 252.342 535.763 252.342ZM865.743 175.088 875.201 175.088 875.201 261.071 865.743 261.071 865.743 175.088Z" fill="url(#fill1)" fill-rule="evenodd" transform="matrix(1 0 0 1.00081 -0.255482 128.069)"/><path d="M417.928 210.759 332.03 292.638 356.588 212.584 329.253 211.565 415.752 129.412 390.657 209.626 417.928 210.759Z" fill="url(#fill2)" fill-rule="evenodd" transform="matrix(1 0 0 1.00081 -0.255482 128.069)"/><path d="M352.974 215.897 331.46 292.898 370.665 200.078C370.913 199.492 370.362 198.884 369.754 199.072L328.536 211.86 352.35 214.954C352.802 215.013 353.097 215.459 352.974 215.897Z" fill="url(#fill3)" transform="matrix(1 0 0 1.00081 -0.255482 128.069)"/><path d="M352.974 215.897 331.46 292.898 370.665 200.078C370.913 199.492 370.362 198.884 369.754 199.072L328.536 211.86 352.35 214.954C352.802 215.013 353.097 215.459 352.974 215.897Z" fill="url(#fill4)" transform="matrix(1 0 0 1.00081 -0.255482 128.069)"/><path d="M352.974 215.897 331.46 292.898 370.665 200.078C370.913 199.492 370.362 198.884 369.754 199.072L328.536 211.86 352.35 214.954C352.802 215.013 353.097 215.459 352.974 215.897Z" stroke="url(#stroke5)" stroke-width="0.748239" fill="none" transform="matrix(1 0 0 1.00081 -0.255482 128.069)"/><path d="M394.247 202.173 415.328 129.974 377.297 220.145C377.057 220.715 377.573 221.314 378.172 221.161L417.509 211.1 394.716 203.089C394.342 202.957 394.135 202.554 394.247 202.173Z" stroke="url(#stroke6)" stroke-width="0.748239" fill="url(#fill7)" transform="matrix(1 0 0 1.00081 -0.255482 128.069)"/><path d="M400.69 240.126C415.788 244.356 425.536 251.018 425.453 258.426 425.315 270.82 397.71 280.608 363.797 280.288 329.883 279.969 302.503 269.662 302.641 257.268 302.735 248.864 315.458 241.657 334.215 237.989" stroke="url(#stroke8)" stroke-width="5.23768" fill="none" transform="matrix(1 0 0 1.00081 -0.255482 128.069)"/><path d="M403.693 233.437C417.578 241.42 425.394 250.68 423.145 258.396 419.383 271.306 388.87 275.007 354.995 266.662 321.119 258.317 296.707 241.086 300.47 228.176 303.021 219.421 317.873 214.902 337.734 215.501" stroke="url(#stroke9)" stroke-width="5.23768" fill="none" transform="matrix(1 0 0 1.00081 -0.255482 128.069)"/><path d="M403.498 232.586C414.855 243.273 420.115 253.555 416.138 259.89 409.483 270.487 379.485 266.019 349.137 249.91 318.787 233.801 299.58 212.151 306.236 201.553 310.748 194.367 325.995 194.108 344.807 199.71" stroke="url(#stroke10)" stroke-width="5.23768" fill="none" transform="matrix(1 0 0 1.00081 -0.255482 128.069)"/></g></svg>
diff --git a/setup.py b/setup.py
deleted file mode 100644
index df36118c23f6ee5509c2e1916872921c991c0705..0000000000000000000000000000000000000000
--- a/setup.py
+++ /dev/null
@@ -1,149 +0,0 @@
-import os
-import re
-import sys
-
-from setuptools import find_packages, setup
-
-pwd = os.path.dirname(__file__)
-version_file = 'lmdeploy/version.py'
-
-
-def readme():
-    with open(os.path.join(pwd, 'README.md'), encoding='utf-8') as f:
-        content = f.read()
-    return content
-
-
-def get_version():
-    with open(os.path.join(pwd, version_file), 'r') as f:
-        exec(compile(f.read(), version_file, 'exec'))
-    return locals()['__version__']
-
-
-def check_ext_modules():
-    if os.path.exists(os.path.join(pwd, 'lmdeploy', 'lib')):
-        return True
-    return False
-
-
-def parse_requirements(fname='requirements.txt', with_version=True):
-    """Parse the package dependencies listed in a file but strips specific
-    versioning information.
-
-    Args:
-        fname (str): path to the file
-        with_version (bool, default=False): if True include version specs
-
-    Returns:
-        List[str]: list of requirements items
-
-    CommandLine:
-        python -c "import setup; print(setup.parse_requirements())"
-    """
-    require_fpath = fname
-
-    def get_nccl_pkg():
-        arg_name = '--cuda='
-        arg_value = None
-        for arg in sys.argv[1:]:
-            if arg.startswith(arg_name):
-                arg_value = arg[len(arg_name):]
-                sys.argv.remove(arg)
-                break
-
-        if arg_value == '11':
-            return 'nvidia-nccl-cu11'
-        elif arg_value == '12':
-            return 'nvidia-nccl-cu12'
-        return None
-
-    def parse_line(line):
-        """Parse information from a line in a requirements text file."""
-        if line.startswith('-r '):
-            # Allow specifying requirements in other files
-            target = line.split(' ')[1]
-            for info in parse_require_file(target):
-                yield info
-        else:
-            info = {'line': line}
-            if line.startswith('-e '):
-                info['package'] = line.split('#egg=')[1]
-            elif '@git+' in line:
-                info['package'] = line
-            else:
-                # Remove versioning from the package
-                pat = '(' + '|'.join(['>=', '==', '>']) + ')'
-                parts = re.split(pat, line, maxsplit=1)
-                parts = [p.strip() for p in parts]
-
-                info['package'] = parts[0]
-                if len(parts) > 1:
-                    op, rest = parts[1:]
-                    if ';' in rest:
-                        # Handle platform specific dependencies
-                        # http://setuptools.readthedocs.io/en/latest/setuptools.html#declaring-platform-specific-dependencies
-                        version, platform_deps = map(str.strip,
-                                                     rest.split(';'))
-                        info['platform_deps'] = platform_deps
-                    else:
-                        version = rest  # NOQA
-                    info['version'] = (op, version)
-            yield info
-
-    def parse_require_file(fpath):
-        with open(fpath, 'r') as f:
-            for line in f.readlines():
-                line = line.strip()
-                if line and not line.startswith('#'):
-                    for info in parse_line(line):
-                        yield info
-
-    def gen_packages_items():
-        if os.path.exists(require_fpath):
-            for info in parse_require_file(require_fpath):
-                parts = [info['package']]
-                if with_version and 'version' in info:
-                    parts.extend(info['version'])
-                if not sys.version.startswith('3.4'):
-                    # apparently package_deps are broken in 3.4
-                    platform_deps = info.get('platform_deps')
-                    if platform_deps is not None:
-                        parts.append(';' + platform_deps)
-                item = ''.join(parts)
-                yield item
-
-    packages = list(gen_packages_items())
-    nccl_pkg = get_nccl_pkg()
-    if nccl_pkg is not None:
-        packages += [nccl_pkg]
-    return packages
-
-
-if __name__ == '__main__':
-    lmdeploy_package_data = ['lmdeploy/bin/llama_gemm']
-    setup(
-        name='lmdeploy',
-        version=get_version(),
-        description='A toolset for compressing, deploying and serving LLM',
-        long_description=readme(),
-        long_description_content_type='text/markdown',
-        author='OpenMMLab',
-        author_email='openmmlab@gmail.com',
-        packages=find_packages(exclude=()),
-        package_data={
-            'lmdeploy': lmdeploy_package_data,
-        },
-        include_package_data=True,
-        install_requires=parse_requirements('requirements.txt'),
-        has_ext_modules=check_ext_modules,
-        classifiers=[
-            'Programming Language :: Python :: 3.8',
-            'Programming Language :: Python :: 3.9',
-            'Programming Language :: Python :: 3.10',
-            'Programming Language :: Python :: 3.11',
-            'Intended Audience :: Developers',
-            'Intended Audience :: Education',
-            'Intended Audience :: Science/Research',
-        ],
-        entry_points={'console_scripts': ['lmdeploy = lmdeploy.cli:run']},
-    )
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
deleted file mode 100644
index b5b19bd136129c5c8f5a8c755fed65e08d94d6f8..0000000000000000000000000000000000000000
--- a/src/CMakeLists.txt
+++ /dev/null
@@ -1,15 +0,0 @@
-# Copyright (c) 2019-2023, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-add_subdirectory(turbomind)
diff --git a/src/turbomind/CMakeLists.txt b/src/turbomind/CMakeLists.txt
deleted file mode 100644
index aec443a1aa49c81bd25aff03489a5dfb441e919c..0000000000000000000000000000000000000000
--- a/src/turbomind/CMakeLists.txt
+++ /dev/null
@@ -1,25 +0,0 @@
-# Copyright (c) 2019-2023, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-add_subdirectory(utils)
-add_subdirectory(kernels)
-add_subdirectory(layers)
-add_subdirectory(models)
-if(BUILD_PYT)
-    add_subdirectory(th_op)
-endif()
-if(BUILD_PY_FFI)
-    add_subdirectory(python)
-endif()
-add_subdirectory(triton_backend)
diff --git a/src/turbomind/kernels/CMakeLists.txt b/src/turbomind/kernels/CMakeLists.txt
deleted file mode 100644
index 57f9e3c1feed9d541ea3b20f9a52902a9baec6ea..0000000000000000000000000000000000000000
--- a/src/turbomind/kernels/CMakeLists.txt
+++ /dev/null
@@ -1,75 +0,0 @@
-# Copyright (c) 2019-2023, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-cmake_minimum_required(VERSION 3.8)
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC")
-set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -fPIC")
-
-add_library(ban_bad_words STATIC ban_bad_words.cu)
-#set_property(TARGET ban_bad_words PROPERTY POSITION_INDEPENDENT_CODE  ON)
-#set_property(TARGET ban_bad_words PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS  ON)
-
-add_library(stop_criteria STATIC stop_criteria_kernels.cu)
-#set_property(TARGET stop_criteria PROPERTY POSITION_INDEPENDENT_CODE  ON)
-#set_property(TARGET stop_criteria PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS  ON)
-
-add_library(activation_kernels STATIC activation_kernels.cu)
-#set_property(TARGET activation_kernels PROPERTY POSITION_INDEPENDENT_CODE  ON)
-#set_property(TARGET activation_kernels PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS  ON)
-
-add_library(logprob_kernels STATIC logprob_kernels.cu)
-#set_property(TARGET logprob_kernels PROPERTY POSITION_INDEPENDENT_CODE  ON)
-#set_property(TARGET logprob_kernels PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS  ON)
-
-add_library(unfused_attention_kernels STATIC unfused_attention_kernels.cu)
-#set_property(TARGET unfused_attention_kernels PROPERTY POSITION_INDEPENDENT_CODE  ON)
-#set_property(TARGET unfused_attention_kernels PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS  ON)
-
-add_library(bert_preprocess_kernels STATIC bert_preprocess_kernels.cu)
-#set_property(TARGET bert_preprocess_kernels PROPERTY POSITION_INDEPENDENT_CODE  ON)
-#set_property(TARGET bert_preprocess_kernels PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS  ON)
-
-set(decoder_masked_multihead_attention_files
-    decoder_masked_multihead_attention.cu
-)
-file(GLOB decoder_masked_multihead_attention_files ${decoder_masked_multihead_attention_files} ./decoder_masked_multihead_attention/*.cu)
-add_library(decoder_masked_multihead_attention STATIC ${decoder_masked_multihead_attention_files})
-#set_property(TARGET decoder_masked_multihead_attention PROPERTY POSITION_INDEPENDENT_CODE  ON)
-#set_property(TARGET decoder_masked_multihead_attention PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS  ON)
-
-add_library(decoding_kernels STATIC decoding_kernels.cu)
-#set_property(TARGET decoding_kernels PROPERTY POSITION_INDEPENDENT_CODE  ON)
-#set_property(TARGET decoding_kernels PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS  ON)
-
-add_library(gpt_kernels STATIC gpt_kernels.cu)
-#set_property(TARGET gpt_kernels PROPERTY POSITION_INDEPENDENT_CODE  ON)
-#set_property(TARGET gpt_kernels PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS  ON)
-
-add_library(sampling_topk_kernels STATIC sampling_topk_kernels.cu)
-#set_property(TARGET sampling_topk_kernels PROPERTY POSITION_INDEPENDENT_CODE  ON)
-#set_property(TARGET sampling_topk_kernels PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS  ON)
-
-add_library(sampling_topp_kernels STATIC sampling_topp_kernels.cu)
-#set_property(TARGET sampling_topp_kernels PROPERTY POSITION_INDEPENDENT_CODE  ON)
-#set_property(TARGET sampling_topp_kernels PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS  ON)
-
-add_library(sampling_penalty_kernels STATIC sampling_penalty_kernels.cu)
-#set_property(TARGET sampling_penalty_kernels PROPERTY POSITION_INDEPENDENT_CODE  ON)
-#set_property(TARGET sampling_penalty_kernels PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS  ON)
-
-add_library(custom_ar_kernels STATIC custom_ar_kernels.cu)
-#set_property(TARGET custom_ar_kernels PROPERTY POSITION_INDEPENDENT_CODE  ON)
-#set_property(TARGET custom_ar_kernels PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS  ON)
-
-#add_subdirectory(gemm_s_f16)
diff --git a/src/turbomind/kernels/activation_kernels.cu b/src/turbomind/kernels/activation_kernels.cu
deleted file mode 100644
index ddbfa87207e8ac4ffc7e174d385b233a561b2497..0000000000000000000000000000000000000000
--- a/src/turbomind/kernels/activation_kernels.cu
+++ /dev/null
@@ -1,332 +0,0 @@
-/*
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "src/turbomind/kernels/activation_kernels.h"
-#include "src/turbomind/macro.h"
-#include "src/turbomind/utils/cuda_type_utils.cuh"
-#include "src/turbomind/utils/cuda_utils.h"
-#include "src/turbomind/utils/memory_utils.h"
-
-#ifndef CUDART_VERSION
-#error CUDART_VERSION Undefined!
-#endif
-
-namespace turbomind {
-
-/* Gelu Activation */
-
-__forceinline__ __device__ float copysignf_pos(float a, float b)
-{
-    float r;
-    r = __int_as_float(__float_as_int(a) | (__float_as_int(b) & 0x80000000));
-    return r;
-}
-
-__inline__ __device__ float tanh_opt(float x)
-{
-#if (__CUDA_ARCH__ >= 750 && CUDART_VERSION >= 11000)
-    float r;
-    asm("tanh.approx.f32 %0,%1; \n\t" : "=f"(r) : "f"(x));
-    return r;
-#else
-    const float exp_val = -1.f * fabs(2 * x);
-    return copysignf_pos((1.0f - __expf(exp_val)) / (__expf(exp_val) + 1.0f), x);
-#endif
-}
-
-template<typename T>
-struct GeluActivation {
-    using return_type = T;
-    static __device__ __forceinline__ T apply(const T& val)
-    {
-        const float cdf = 0.5f * (1.0f + tanh_opt((0.7978845608028654f * (val + 0.044715f * val * val * val))));
-        return val * cdf;
-    }
-};
-
-template<>
-struct GeluActivation<half2> {
-    using return_type = half2;
-    static __device__ __forceinline__ half2 apply(const half2& val)
-    {
-        half2  val_pow3 = __hmul2(val, __hmul2(val, val));
-        float2 tmp_pow  = __half22float2(val_pow3);
-        float2 tmp      = __half22float2(val);
-
-        tmp.x = 0.5f * (1.0f + tanh_opt((0.7978845608028654f * (tmp.x + 0.044715f * tmp_pow.x))));
-        tmp.y = 0.5f * (1.0f + tanh_opt((0.7978845608028654f * (tmp.y + 0.044715f * tmp_pow.y))));
-        return __hmul2(val, __float22half2_rn(tmp));
-    }
-};
-
-#ifdef ENABLE_BF16
-template<>
-struct GeluActivation<__nv_bfloat162> {
-    using return_type = __nv_bfloat162;
-    static __device__ __forceinline__ __nv_bfloat162 apply(const __nv_bfloat162& val)
-    {
-        __nv_bfloat162 val_pow3 = bf16hmul2(val, bf16hmul2(val, val));
-        float2         tmp_pow  = bf1622float2(val_pow3);
-        float2         tmp      = bf1622float2(val);
-
-        tmp.x = 0.5f * (1.0f + tanh_opt((0.7978845608028654f * (tmp.x + 0.044715f * tmp_pow.x))));
-        tmp.y = 0.5f * (1.0f + tanh_opt((0.7978845608028654f * (tmp.y + 0.044715f * tmp_pow.y))));
-        return bf16hmul2(val, __floats2bfloat162_rn(tmp.x, tmp.y));
-    }
-};
-#endif
-
-/* Relu Activation */
-
-template<typename T>
-struct ReluActivation {
-    using return_type = T;
-    static __device__ __forceinline__ T apply(const T& val)
-    {
-        return val > static_cast<T>(0.0f) ? val : static_cast<T>(0.0f);
-    }
-};
-
-template<>
-struct ReluActivation<half2> {
-    using return_type = half2;
-    static __device__ __forceinline__ half2 apply(const half2& val)
-    {
-        const half zero_half = static_cast<half>(0.0f);
-        // return make_half2(val.x > zero_half ? val.x : zero_half, val.y > zero_half ? val.y : zero_half);
-        return make_half2(static_cast<half>(val.data[0]) > zero_half ? static_cast<half>(val.data[0]) : zero_half, static_cast<half>(val.data[1]) > zero_half ? static_cast<half>(val.data[1]) : zero_half);
-    }
-};
-
-#ifdef ENABLE_BF16
-template<>
-struct ReluActivation<__nv_bfloat162> {
-    using return_type = __nv_bfloat162;
-    static __device__ __forceinline__ __nv_bfloat162 apply(const __nv_bfloat162& val)
-    {
-        const __nv_bfloat16 zero_bf16 = static_cast<__nv_bfloat16>(0.0f);
-        // return make_bfloat162(val.x > zero_bf16 ? val.x : zero_bf16, val.y > zero_bf16 ? val.y : zero_bf16);
-        return make_bfloat162(val.data[0] > zero_bf16 ? val.data[0] : zero_bf16, val.data[1] > zero_bf16 ? val.data[1] : zero_bf16);
-    }
-};
-#endif
-
-/* Silu Activation */
-
-template<typename T>
-struct SiluActivation {
-    using return_type = T;
-    static __device__ __forceinline__ T apply(const T& val)
-    {
-        return (T)((float)val / (1.0f + __expf((float)-val)));
-    }
-};
-
-template<>
-struct SiluActivation<half2> {
-    using return_type = float2;
-    static __device__ __forceinline__ float2 apply(const half2& val)
-    {
-        // return make_float2(SiluActivation<float>::apply(val.x), SiluActivation<float>::apply(val.y));
-        return make_float2(SiluActivation<float>::apply(val.data[0]), SiluActivation<float>::apply(val.data[1]));
-    }
-};
-
-#ifdef ENABLE_BF16
-template<>
-struct SiluActivation<__nv_bfloat162> {
-    using return_type = float2;
-    static __device__ __forceinline__ float2 apply(const __nv_bfloat162& val)
-    {
-        // return make_float2(SiluActivation<float>::apply(val.x), SiluActivation<float>::apply(val.y));
-        return make_float2(SiluActivation<float>::apply(val.data[0]), SiluActivation<float>::apply(val.data[1]));
-    }
-};
-#endif  // ENABLE_BF16
-
-/* Identity Activation (= no activation) */
-
-template<typename T>
-struct IdentityActivation {
-    using return_type = T;
-    static __device__ __forceinline__ T apply(const T& val)
-    {
-        return val;
-    }
-};
-
-// clang-format off
-template<template<typename T> class Activation, typename T, typename BT>
-__global__ void generic_activation(T*                      out,
-                                   const BT*  __restrict   bias,
-                                   const T*   __restrict   gated_weights,
-                                   const BT*  __restrict   gated_bias,
-                                   const int* __restrict   ia3_tasks,
-                                   const T*   __restrict   ia3_weights,
-                                   const int               int8_mode,
-                                   const float* __restrict activation_in,
-                                   const float* __restrict activation_out,
-                                   const int* __restrict padding_offset,
-                                   const int seq_len,
-                                   int m,
-                                   int n)
-{
-    constexpr size_t packed_elems = num_elems<T>::value;
-
-    const bool with_bias = bias != nullptr;
-    const bool with_gate = gated_weights != nullptr;
-    // const bool with_ia3  = ia3_tasks != nullptr;
-
-    using Act_T         = typename Activation<T>::return_type;
-    using Float_T       = typename packed_as<float, packed_elems>::type;
-    using Packed_Int8_t = typename packed_as<int8_t, packed_elems>::type;
-
-    for (int id = blockIdx.x * blockDim.x + threadIdx.x; id < m * n; id += blockDim.x * gridDim.x) {
-        T val;
-        if (int8_mode == 2) {
-            // val = cuda_cast<T>(cuda_cast<Float_T>(reinterpret_cast<Packed_Int8_t*>(out)[id]) * activation_in[0]);
-        }
-        else {
-            val = out[id];
-        }
-
-        T gated_val;
-        if (with_gate) {
-            gated_val = gated_weights[id];
-        }
-
-        // if (with_bias) {
-        //     const T reg_bias = static_cast<T>(bias[id % n]);
-        //     val              = val + reg_bias;
-
-        //     if (with_gate) {
-        //         const T reg_gated_bias = static_cast<T>(gated_bias[id % n]);
-        //         gated_val              = gated_val + reg_gated_bias;
-        //     }
-        // }
-
-        if (with_gate) {
-            val = cuda_cast<T>(Activation<T>::apply(val) * cuda_cast<Act_T>(gated_val));
-        }
-        else {
-            // val = cuda_cast<T>(Activation<T>::apply(val));
-        }
-
-        // if (with_ia3) {
-        //     const int word_id = id / n;
-        //     const int offset = padding_offset == nullptr ? 0 : padding_offset[word_id];
-        //     const int batch_id = (word_id + offset) / seq_len;
-        //     const int task = ia3_tasks[batch_id];
-        //     val            = val * ia3_weights[task * n + (id % n)];
-        // }
-
-        if (int8_mode != 2) {
-            out[id] = val;
-        }
-        else {
-            // reinterpret_cast<Packed_Int8_t*>(out)[id] =
-            //     cuda_cast<Packed_Int8_t>(cuda_cast<Float_T>(val) * activation_out[0]);
-        }
-    }
-}
-// clang-format on
-
-template<template<typename T> class Activation, typename T, typename BT>
-void invokeGenericActivation(T*           out,
-                             const BT*    bias,
-                             const T*     gated_weights,
-                             const BT*    gated_bias,
-                             const int*   ia3_tasks,
-                             const T*     ia3_weights,
-                             const int    m,
-                             const int    n,
-                             const int    int8_mode,
-                             const float* activation_in,
-                             const float* activation_out,
-                             const int*   padding_offset,
-                             const int    seq_len,
-                             cudaStream_t stream)
-{
-    TM_LOG_DEBUG(__PRETTY_FUNCTION__);
-    TM_LOG_DEBUG("invokeGenericActivation %d %d %d", m, n, seq_len);
-    using PT                   = typename packed_type<T>::type;
-    constexpr int packed_elems = num_elems<PT>::value;
-    using PBT                  = typename packed_as<BT, packed_elems>::type;
-
-    const int n_threads = 512;
-
-    dim3 block, grid;
-    if (n / 4 / packed_elems <= n_threads) {
-        block.x = n / 4 / packed_elems;
-        grid.x  = m;
-    }
-    else {
-        block.x = n_threads;
-        grid.x  = ceil(m * n / double(n_threads));
-    }
-    TM_LOG_DEBUG("%d %d", grid.x, block.x);
-    sync_check_cuda_error();
-    generic_activation<Activation><<<grid, block, 0, stream>>>(reinterpret_cast<PT*>(out),
-                                                               reinterpret_cast<const PBT*>(bias),
-                                                               reinterpret_cast<const PT*>(gated_weights),
-                                                               reinterpret_cast<const PBT*>(gated_bias),
-                                                               ia3_tasks,
-                                                               reinterpret_cast<const PT*>(ia3_weights),
-                                                               int8_mode,
-                                                               activation_in,
-                                                               activation_out,
-                                                               padding_offset,
-                                                               seq_len,
-                                                               m,
-                                                               n / packed_elems);
-    sync_check_cuda_error();
-}
-
-#define INSTANTIATE_GENERIC_ACTIVATION(Activation, T, BT)                                                              \
-    template void invokeGenericActivation<Activation, T, BT>(T * out,                                                  \
-                                                             const BT*    bias,                                        \
-                                                             const T*     gated_weights,                               \
-                                                             const BT*    gated_bias,                                  \
-                                                             const int*   ia3_tasks,                                   \
-                                                             const T*     ia3_weights,                                 \
-                                                             const int    m,                                           \
-                                                             const int    n,                                           \
-                                                             const int    int8_mode,                                   \
-                                                             const float* activation_in,                               \
-                                                             const float* activation_out,                              \
-                                                             const int*   padding_offset,                              \
-                                                             const int    seq_len,                                     \
-                                                             cudaStream_t stream);
-
-// INSTANTIATE_GENERIC_ACTIVATION(GeluActivation, float, float);
-// INSTANTIATE_GENERIC_ACTIVATION(GeluActivation, half, half);
-// #ifdef ENABLE_BF16
-// INSTANTIATE_GENERIC_ACTIVATION(GeluActivation, __nv_bfloat16, __nv_bfloat16);
-// #endif
-
-// INSTANTIATE_GENERIC_ACTIVATION(ReluActivation, float, float);
-// INSTANTIATE_GENERIC_ACTIVATION(ReluActivation, half, half);
-// #ifdef ENABLE_BF16
-// INSTANTIATE_GENERIC_ACTIVATION(ReluActivation, __nv_bfloat16, __nv_bfloat16);
-// #endif
-
-INSTANTIATE_GENERIC_ACTIVATION(SiluActivation, float, float);
-INSTANTIATE_GENERIC_ACTIVATION(SiluActivation, half, half);
-#ifdef ENABLE_BF16
-INSTANTIATE_GENERIC_ACTIVATION(SiluActivation, __nv_bfloat16, __nv_bfloat16);
-#endif
-
-}  // namespace turbomind
diff --git a/src/turbomind/kernels/activation_kernels.h b/src/turbomind/kernels/activation_kernels.h
deleted file mode 100644
index 776b614c9c81c2d9a1e063f469f7462d9a5bb90f..0000000000000000000000000000000000000000
--- a/src/turbomind/kernels/activation_kernels.h
+++ /dev/null
@@ -1,110 +0,0 @@
-/*
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "src/turbomind/utils/cuda_bf16_wrapper.h"
-#include <cuda_fp16.h>
-#include <cuda_runtime.h>
-#include <stdlib.h>
-
-namespace turbomind {
-
-// clang-format off
-template<typename T> struct GeluActivation;
-template<typename T> struct ReluActivation;
-template<typename T> struct SiluActivation;
-template<typename T> struct IdentityActivation;
-// clang-format on
-
-template<template<typename T> class Activation, typename T, typename BT>
-void invokeGenericActivation(T*           out,
-                             const BT*    bias,
-                             const T*     gated_weights,
-                             const BT*    gated_bias,
-                             const int*   ia3_tasks,
-                             const T*     ia3_weights,
-                             const int    m,
-                             const int    n,
-                             const int    int8_mode,
-                             const float* activation_in,
-                             const float* activation_out,
-                             const int*   padding_offset,
-                             const int    seq_len,
-                             cudaStream_t stream);
-
-template<template<typename T> class Activation, typename T, typename BT>
-void invokeGenericActivation(T*           out,
-                             const BT*    bias,
-                             const T*     gated_weights,
-                             const BT*    gated_bias,
-                             const int*   ia3_tasks,
-                             const T*     ia3_weights,
-                             const int    m,
-                             const int    n,
-                             const int    int8_mode,
-                             const float* activation_in,
-                             const float* activation_out,
-                             cudaStream_t stream)
-{
-    invokeGenericActivation<Activation, T, BT>(out,
-                                               bias,
-                                               gated_weights,
-                                               gated_bias,
-                                               ia3_tasks,
-                                               ia3_weights,
-                                               m,
-                                               n,
-                                               int8_mode,
-                                               activation_in,
-                                               activation_out,
-                                               (const int*)nullptr,
-                                               0,
-                                               stream);
-}
-
-template<typename T>
-void invokeAddBiasGeluV2(T*           out,
-                         const T*     bias,
-                         const int*   ia3_tasks,
-                         const T*     ia3_weights,
-                         const int*   padding_offset,
-                         const int    seq_len,
-                         const int    m,
-                         const int    n,
-                         cudaStream_t stream);
-
-template<typename T>
-void invokeAddBias(T* out, T const* bias, const int m, const int n, cudaStream_t stream)
-{
-    invokeGenericActivation<IdentityActivation, T, T>(
-        out, bias, nullptr, nullptr, nullptr, nullptr, m, n, 0, nullptr, nullptr, stream);
-}
-
-template<typename T>
-void invokeAddBiasGeluV2(
-    T* out, const T* bias, const int* ia3_tasks, const T* ia3_weights, const int m, const int n, cudaStream_t stream)
-{
-    invokeAddBiasGeluV2(out, bias, ia3_tasks, ia3_weights, nullptr, 0, m, n, stream);
-}
-
-template<typename T>
-void invokeAddBiasTanh(T* out, const T* bias, const int m, const int n, cudaStream_t stream);
-
-template<typename T>
-void invokeSigmoid(T* data, const int size, const float scale, cudaStream_t stream);
-
-}  // namespace turbomind
diff --git a/src/turbomind/kernels/ban_bad_words.cu b/src/turbomind/kernels/ban_bad_words.cu
deleted file mode 100644
index dd328c5c28f09846eb61e279ed6c8a433f3d3c2a..0000000000000000000000000000000000000000
--- a/src/turbomind/kernels/ban_bad_words.cu
+++ /dev/null
@@ -1,164 +0,0 @@
-/*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "src/turbomind/kernels/ban_bad_words.h"
-#include "src/turbomind/utils/cuda_utils.h"
-
-namespace turbomind {
-
-template<typename T>
-__global__ void ban_bad_words(T*         logits,
-                              const int* output_ids_buf,
-                              const int* parent_ids_buf,
-                              int        batch_size,
-                              int        beam_width,
-                              const int* bad_words,
-                              size_t     bad_words_len,
-                              bool       share_words,
-                              int        id_offset,
-                              int        vocab_size_padded,
-                              size_t     step)
-{
-    const int id        = blockIdx.x * blockDim.x + threadIdx.x;
-    const int batch_idx = blockIdx.y / beam_width;
-    const int beam_idx  = blockIdx.y % beam_width;
-
-    const int* base_bad_words         = share_words ? bad_words : bad_words + batch_idx * 2 * bad_words_len;
-    const int* base_bad_words_offsets = base_bad_words + bad_words_len;
-
-    if (id >= bad_words_len || base_bad_words_offsets[id] < 0) {
-        return;
-    }
-
-    const int item_end   = base_bad_words_offsets[id];
-    const int item_start = (id > 0) ? base_bad_words_offsets[id - 1] : 0;
-    const int item_size  = item_end - item_start;
-
-    /* The single-token case unconditionally bans the token */
-    bool should_ban = item_size == 1;
-
-    /* Multi-token case and enough previously generated tokens to look for a match */
-    if (item_size > 1 && step >= item_size - 1) {
-        should_ban             = true;
-        int        parent_id   = beam_idx;
-        const bool gather_beam = beam_width > 1;
-
-        for (int token_idx = item_size - 2; token_idx >= 0; token_idx--) {
-            const int previous_token = output_ids_buf[(step - (item_size - 1) + token_idx) * batch_size * beam_width
-                                                      + id_offset + batch_idx * beam_width + parent_id];
-
-            if (previous_token != base_bad_words[item_start + token_idx]) {
-                should_ban = false;
-                break;
-            }
-            if (gather_beam) {
-                parent_id = parent_ids_buf[(step - (item_size - 1) + token_idx) * beam_width * batch_size + id_offset
-                                           + batch_idx * beam_width + parent_id];
-
-                if (parent_id < 0 || parent_id >= beam_width) {
-                    should_ban = false;
-                    break;
-                }
-            }
-        }
-    }
-
-    if (should_ban) {
-        int banned_token = base_bad_words[item_end - 1];
-        if (0 < banned_token && banned_token < vocab_size_padded) {
-            logits[batch_idx * beam_width * vocab_size_padded + beam_idx * vocab_size_padded + banned_token] =
-                static_cast<T>(-INFINITY);
-        }
-    }
-}
-
-template<typename T>
-void invokeBanBadWords(T*           logits,
-                       const int*   output_ids_buf,
-                       const int*   parent_ids_buf,
-                       int          batch_size,
-                       int          local_batch_size,
-                       int          beam_width,
-                       const int*   bad_words,
-                       bool         share_words,
-                       size_t       bad_words_len,
-                       int          id_offset,
-                       int          vocab_size_padded,
-                       size_t       step,
-                       cudaStream_t stream)
-{
-    dim3 block, grid;
-    block.x = min((unsigned long)((bad_words_len + 32 - 1) / 32) * 32, 256UL);
-    grid.x  = (bad_words_len + block.x - 1) / block.x;
-    grid.y  = local_batch_size * beam_width;
-
-    ban_bad_words<<<grid, block, 0, stream>>>(logits,
-                                              output_ids_buf,
-                                              parent_ids_buf,
-                                              batch_size,
-                                              beam_width,
-                                              bad_words,
-                                              bad_words_len,
-                                              share_words,
-                                              id_offset,
-                                              vocab_size_padded,
-                                              step);
-    sync_check_cuda_error();
-}
-
-template void invokeBanBadWords(half*        logits,
-                                const int*   output_ids_buf,
-                                const int*   parent_ids_buf,
-                                int          batch_size,
-                                int          local_batch_size,
-                                int          beam_width,
-                                const int*   bad_words,
-                                bool         share_words,
-                                size_t       bad_words_len,
-                                int          id_offset,
-                                int          vocab_size_padded,
-                                size_t       step,
-                                cudaStream_t stream);
-#ifdef ENABLE_BF16
-template void invokeBanBadWords(__nv_bfloat16* logits,
-                                const int*     output_ids_buf,
-                                const int*     parent_ids_buf,
-                                int            batch_size,
-                                int            local_batch_size,
-                                int            beam_width,
-                                const int*     bad_words,
-                                bool           share_words,
-                                size_t         bad_words_len,
-                                int            id_offset,
-                                int            vocab_size_padded,
-                                size_t         step,
-                                cudaStream_t   stream);
-#endif
-template void invokeBanBadWords(float*       logits,
-                                const int*   output_ids_buf,
-                                const int*   parent_ids_buf,
-                                int          batch_size,
-                                int          local_batch_size,
-                                int          beam_width,
-                                const int*   bad_words,
-                                bool         share_words,
-                                size_t       bad_words_len,
-                                int          id_offset,
-                                int          vocab_size_padded,
-                                size_t       step,
-                                cudaStream_t stream);
-
-}  // namespace turbomind
diff --git a/src/turbomind/kernels/ban_bad_words.h b/src/turbomind/kernels/ban_bad_words.h
deleted file mode 100644
index 05bdc008496ebc11c9ea83f0ac2448a7fb28d664..0000000000000000000000000000000000000000
--- a/src/turbomind/kernels/ban_bad_words.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include <cuda_fp16.h>
-#include <cuda_runtime.h>
-
-namespace turbomind {
-
-template<typename T>
-void invokeBanBadWords(T*           logits,
-                       const int*   output_ids_buf,
-                       const int*   parent_ids_buf,
-                       int          batch_size,
-                       int          local_batch_size,
-                       int          beam_width,
-                       const int*   bad_words,
-                       bool         share_words,
-                       size_t       bad_words_len,
-                       int          id_offset,
-                       int          vocab_size_padded,
-                       size_t       step,
-                       cudaStream_t stream);
-
-}  // namespace turbomind
diff --git a/src/turbomind/kernels/bert_preprocess_kernels.cu b/src/turbomind/kernels/bert_preprocess_kernels.cu
deleted file mode 100644
index 0b97c44fba036cb3837333179df3b3d9a1c5997b..0000000000000000000000000000000000000000
--- a/src/turbomind/kernels/bert_preprocess_kernels.cu
+++ /dev/null
@@ -1,180 +0,0 @@
-/*
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "bert_preprocess_kernels.h"
-#include "src/turbomind/utils/cuda_bf16_fallbacks.cuh"
-#include "src/turbomind/utils/cuda_fp8_utils.h"
-#include "src/turbomind/utils/cuda_type_utils.cuh"
-
-namespace turbomind {
-
-__global__ void getPaddingOffsetAndCuSeqLensKernel(size_t*    h_valid_word_num,
-                                                   int*       tmp_mask_offset,
-                                                   int*       cu_seqlens,
-                                                   const int* sequence_length,
-                                                   const int  batch_size,
-                                                   const int  max_seq_len)
-{
-    // do cumulated sum
-    int        total_seq_len        = 0;
-    int        cum_offset           = 0;
-    int        index                = 0;
-    const bool calculate_cu_seqlens = cu_seqlens != nullptr;
-    for (int i = 0; i < batch_size; i++) {
-        const int seq_len = sequence_length[i];
-        if (calculate_cu_seqlens) {
-            cu_seqlens[i] = total_seq_len;
-        }
-        for (int j = 0; j < seq_len; j++) {
-            tmp_mask_offset[index] = cum_offset;
-            index++;
-        }
-        cum_offset += max_seq_len - seq_len;
-        total_seq_len += seq_len;
-    }
-    if (calculate_cu_seqlens) {
-        cu_seqlens[batch_size] = total_seq_len;
-    }
-    h_valid_word_num[0] = (size_t)total_seq_len;
-}
-
-void invokeGetPaddingOffsetAndCuSeqLens(size_t*      h_pinned_token_num,
-                                        size_t*      h_token_num,
-                                        int*         tmp_mask_offset,
-                                        int*         cu_seqlens,
-                                        const int*   sequence_lengths,
-                                        const int    batch_size,
-                                        const int    max_seq_len,
-                                        cudaStream_t stream)
-{
-    h_pinned_token_num[0] = 0;
-    getPaddingOffsetAndCuSeqLensKernel<<<1, 1, 0, stream>>>(
-        h_pinned_token_num, tmp_mask_offset, cu_seqlens, sequence_lengths, batch_size, max_seq_len);
-#ifdef _MSC_VER
-    cudaStreamSynchronize(stream);
-#else
-    while (((volatile size_t*)h_pinned_token_num)[0] == 0) {};
-#endif
-    h_token_num[0] = h_pinned_token_num[0];
-    sync_check_cuda_error();
-}
-
-template<typename T>
-__global__ void rebuild_sequence_length_padding(const T* src, T* dst, const int* padding_offset, const int n)
-{
-    const int tid        = threadIdx.x;
-    const int bid        = blockIdx.x;
-    const int dst_seq_id = bid + padding_offset[bid];
-    const int src_seq_id = bid;
-
-    for (int i = tid; i < n; i += blockDim.x) {
-        dst[dst_seq_id * n + i] = src[src_seq_id * n + i];
-    }
-}
-
-template<typename T>
-void invokeRebuildPadding(
-    T* dst, const T* src, const int* padding_offset, const int token_num, const int hidden_dim, cudaStream_t stream)
-{
-    // src: [token_num, hidden_dim]
-    // dst: [batch_size*max_seq_len, hidden_dim]
-    rebuild_sequence_length_padding<<<token_num, 256, 0, stream>>>(src, dst, padding_offset, hidden_dim);
-}
-
-template<typename T>
-void invokeRebuildPadding(
-    T* dst, const T* src, const int* padding_offset, const int token_num, const int hidden_dim, cudaStream_t stream);
-template void invokeRebuildPadding(float*       dst,
-                                   const float* src,
-                                   const int*   padding_offset,
-                                   const int    token_num,
-                                   const int    hidden_dim,
-                                   cudaStream_t stream);
-template void invokeRebuildPadding(half*        dst,
-                                   const half*  src,
-                                   const int*   padding_offset,
-                                   const int    token_num,
-                                   const int    hidden_dim,
-                                   cudaStream_t stream);
-#ifdef ENABLE_BF16
-template void invokeRebuildPadding(__nv_bfloat16*       dst,
-                                   const __nv_bfloat16* src,
-                                   const int*           padding_offset,
-                                   const int            token_num,
-                                   const int            hidden_dim,
-                                   cudaStream_t         stream);
-#endif  // ENABLE_BF16
-
-#ifdef ENABLE_FP8
-template void invokeRebuildPadding(__nv_fp8_e4m3*       dst,
-                                   const __nv_fp8_e4m3* src,
-                                   const int*           padding_offset,
-                                   const int            token_num,
-                                   const int            hidden_dim,
-                                   cudaStream_t         stream);
-#endif  // ENABLE_FP8
-
-template<typename T>
-__global__ void remove_padding(T* tgt, const T* src, const int* padding_offset, const int n)
-{
-    const int tid        = threadIdx.x;
-    const int bid        = blockIdx.x;
-    const int src_seq_id = bid + padding_offset[bid];
-    const int tgt_seq_id = bid;
-
-    for (int i = tid; i < n; i += blockDim.x) {
-        tgt[tgt_seq_id * n + i] = src[src_seq_id * n + i];
-    }
-}
-
-template<typename T>
-void invokeRemovePadding(
-    T* dst, const T* src, const int* padding_offset, const int token_num, const int hidden_dim, cudaStream_t stream)
-{
-    remove_padding<<<token_num, 256, 0, stream>>>(dst, src, padding_offset, hidden_dim);
-}
-
-template void invokeRemovePadding(float*       dst,
-                                  const float* src,
-                                  const int*   padding_offset,
-                                  const int    token_num,
-                                  const int    hidden_dim,
-                                  cudaStream_t stream);
-
-template void invokeRemovePadding(half*        dst,
-                                  const half*  src,
-                                  const int*   padding_offset,
-                                  const int    token_num,
-                                  const int    hidden_dim,
-                                  cudaStream_t stream);
-#ifdef ENABLE_FP8
-template void invokeRemovePadding(__nv_fp8_e4m3*       dst,
-                                  const __nv_fp8_e4m3* src,
-                                  const int*           padding_offset,
-                                  const int            token_num,
-                                  const int            hidden_dim,
-                                  cudaStream_t         stream);
-#endif  // ENABLE_FP8
-#ifdef ENABLE_BF16
-template void invokeRemovePadding(__nv_bfloat16*       dst,
-                                  const __nv_bfloat16* src,
-                                  const int*           padding_offset,
-                                  const int            token_num,
-                                  const int            hidden_dim,
-                                  cudaStream_t         stream);
-#endif
-
-}  // namespace turbomind
diff --git a/src/turbomind/kernels/bert_preprocess_kernels.h b/src/turbomind/kernels/bert_preprocess_kernels.h
deleted file mode 100644
index 867aaf6b8f26c4320b4356e752c3ecd13b8550d5..0000000000000000000000000000000000000000
--- a/src/turbomind/kernels/bert_preprocess_kernels.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-#include "src/turbomind/utils/cuda_utils.h"
-#include <cuda_fp16.h>
-#include <cuda_runtime.h>
-#ifdef ENABLE_FP8
-#include "src/turbomind/utils/cuda_fp8_utils.h"
-#endif  // ENABLE_FP8
-
-namespace turbomind {
-
-void invokeGetPaddingOffsetAndCuSeqLens(size_t*      h_pinned_token_num,
-                                        size_t*      h_token_num,
-                                        int*         tmp_mask_offset,
-                                        int*         cu_seqlens,
-                                        const int*   sequence_length,
-                                        const int    batch_size,
-                                        const int    max_seq_len,
-                                        cudaStream_t stream);
-
-inline void invokeGetPaddingOffset(size_t*      h_pinned_token_num,
-                                   size_t*      h_token_num,
-                                   int*         tmp_mask_offset,
-                                   const int*   sequence_length,
-                                   const int    batch_size,
-                                   const int    max_seq_len,
-                                   cudaStream_t stream)
-{
-    invokeGetPaddingOffsetAndCuSeqLens(
-        h_pinned_token_num, h_token_num, tmp_mask_offset, nullptr, sequence_length, batch_size, max_seq_len, stream);
-}
-
-template<typename T>
-void invokeRebuildPadding(
-    T* dst, const T* src, const int* padding_offset, const int token_num, const int hidden_dim, cudaStream_t stream);
-
-template<typename T>
-void invokeRemovePadding(
-    T* dst, const T* src, const int* padding_offset, const int token_num, const int hidden_dim, cudaStream_t stream);
-
-}  // namespace turbomind
diff --git a/src/turbomind/kernels/custom_ar_kernels.cu b/src/turbomind/kernels/custom_ar_kernels.cu
deleted file mode 100644
index 41c4f7f57bc89d6c4de8b4cd9a5bce39948ebaf6..0000000000000000000000000000000000000000
--- a/src/turbomind/kernels/custom_ar_kernels.cu
+++ /dev/null
@@ -1,412 +0,0 @@
-/*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "custom_ar_kernels.h"
-#include "src/turbomind/utils/cuda_type_utils.cuh"
-
-namespace turbomind {
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-static inline __device__ uint32_t hadd2(const uint32_t& a, const uint32_t& b)
-{
-    uint32_t c;
-    // asm volatile("add.f16x2 %0, %1, %2;\n" : "=r"(c) : "r"(a), "r"(b));
-    const __half * ha = reinterpret_cast<const __half*>(&a);
-    const __half * hb = reinterpret_cast<const __half*>(&b);
-    __half2 h2c = make_half2(ha[0] + hb[0], ha[1] + hb[1]);
-    __builtin_memcpy(&c, &h2c, sizeof(h2c));
-    // asm volatile("v_pk_add_f16 %0, %1, %2;\n" : "=v"(c) : "v"(a), "v"(b));
-    return c;
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-static inline __device__ uint32_t fadd(const uint32_t& a, const uint32_t& b)
-{
-    uint32_t c;
-    // asm volatile("add.f32 %0, %1, %2;\n" : "=r"(c) : "r"(a), "r"(b));
-    union {float *f_p; const uint32_t *u_p;} x, y, z;
-    x.u_p = &a;
-    y.u_p = &b;
-    z.u_p = &c;
-    *z.f_p = *x.f_p + *y.f_p;
-    return c;
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-static inline __device__ void st_flag_release(uint32_t& flag, uint32_t* flag_addr)
-{
-#if __CUDA_ARCH__ >= 700
-    // asm volatile("st.global.release.sys.b32 [%1], %0;" ::"r"(flag), "l"(flag_addr));
-    *flag_addr = flag;
-#else
-    __threadfence_system();
-    // asm volatile("st.global.volatile.b32 [%1], %0;" ::"r"(flag), "l"(flag_addr));
-    *flag_addr = flag;
-#endif
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-static inline __device__ void ld_flag_acquire(uint32_t& flag, uint32_t* flag_addr)
-{
-#if __CUDA_ARCH__ >= 700
-    // asm volatile("ld.global.acquire.sys.b32 %0, [%1];" : "=r"(flag) : "l"(flag_addr));
-    flag = *flag_addr;
-#else
-    // asm volatile("ld.global.volatile.b32 %0, [%1];" : "=r"(flag) : "l"(flag_addr));
-    flag = *flag_addr;
-#endif
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-// Type Converter that packs data format to 128 bits data type
-template<typename T>
-struct ARTypeConverter {
-    using Type = uint4;
-};
-
-#ifdef ENABLE_BF16
-template<>
-struct ARTypeConverter<__nv_bfloat16> {
-    using Type = bf168;
-};
-#endif
-
-// add two 128b data
-template<typename T_IN, typename T_COMP>
-inline __device__ T_IN add128b(T_IN a, T_IN b);
-
-template<>
-inline __device__ uint4 add128b<uint4, uint16_t>(uint4 a, uint4 b)
-{
-    uint4 c;
-    c.x = hadd2(a.x, b.x);
-    c.y = hadd2(a.y, b.y);
-    c.z = hadd2(a.z, b.z);
-    c.w = hadd2(a.w, b.w);
-    return c;
-}
-
-template<>
-inline __device__ uint4 add128b<uint4, uint32_t>(uint4 a, uint4 b)
-{
-    uint4 c;
-    c.x = fadd(a.x, b.x);
-    c.y = fadd(a.y, b.y);
-    c.z = fadd(a.z, b.z);
-    c.w = fadd(a.w, b.w);
-    return c;
-}
-
-#ifdef ENABLE_BF16
-template<>
-inline __device__ bf168 add128b<bf168, __nv_bfloat16>(bf168 a, bf168 b)
-{
-    bf168 c;
-    c.x = bf16hadd2(a.x, b.x);
-    c.y = bf16hadd2(a.y, b.y);
-    c.z = bf16hadd2(a.z, b.z);
-    c.w = bf16hadd2(a.w, b.w);
-    return c;
-}
-#endif
-
-// init 128bits data with 0
-template<typename T>
-inline __device__ T init_packed_type();
-
-template<>
-inline __device__ uint4 init_packed_type()
-{
-    return make_uint4(0u, 0u, 0u, 0u);
-}
-
-#ifdef ENABLE_BF16
-template<>
-inline __device__ bf168 init_packed_type()
-{
-    bf168  val;
-    uint4& val_u = reinterpret_cast<uint4&>(val);
-    val_u        = make_uint4(0u, 0u, 0u, 0u);
-    return val;
-}
-#endif
-
-template<typename T>
-static __global__ void oneShotAllReduceKernel(AllReduceParams<T> params)
-{
-    // The block index.
-    const int bidx = blockIdx.x;
-    // The thread index with the block.
-    const int tidx = threadIdx.x;
-
-    // The number of elements packed into one for comms
-    static constexpr int NUM_ELTS = std::is_same<T, uint32_t>::value ? 4 : 8;
-
-    // Packed data type for comms
-    using PackedType = typename ARTypeConverter<T>::Type;
-
-    // The location in the destination array (load 8 fp16 or load 4 fp32 using LDG.128).
-    size_t offset = bidx * params.elts_per_block + tidx * NUM_ELTS;
-    // The end of the segment computed by that block.
-    size_t max_offset = std::min((bidx + 1) * params.elts_per_block, params.elts_per_rank);
-
-    // Synchronize the ranks.
-    volatile uint32_t* barrier_d = params.peer_barrier_ptrs[params.local_rank];
-    if (tidx < RANKS_PER_NODE) {
-        // The 1st block notifies the other ranks.
-        if (bidx == 0) {
-            params.peer_barrier_ptrs[tidx][params.local_rank] = params.barrier_flag;
-        }
-
-        // Busy-wait until all ranks are ready.
-        while (barrier_d[tidx] < params.barrier_flag) {}
-    }
-
-    // Make sure we can move on...
-    __syncthreads();
-
-    // The source pointers. Distributed round-robin for the different warps.
-    const T* src_d[RANKS_PER_NODE];
-#pragma unroll
-    for (int ii = 0; ii < RANKS_PER_NODE; ++ii) {
-        int rank  = (params.local_rank + ii) % RANKS_PER_NODE;
-        src_d[ii] = params.peer_comm_buffer_ptrs[rank];
-    }
-
-    // Each block accumulates the values from the different GPUs on the same node.
-    for (size_t iter_offset = offset; iter_offset < max_offset; iter_offset += blockDim.x * NUM_ELTS) {
-        // Iterate over the different ranks/devices on the node to load the values.
-        PackedType vals[RANKS_PER_NODE];
-#pragma unroll
-        for (int ii = 0; ii < RANKS_PER_NODE; ++ii) {
-            vals[ii] = reinterpret_cast<const PackedType*>(&src_d[ii][iter_offset])[0];
-        }
-
-        // Sum the values from the different ranks.
-        PackedType sums = init_packed_type<PackedType>();
-#pragma unroll
-        for (int ii = 0; ii < RANKS_PER_NODE; ++ii) {
-            sums = add128b<PackedType, T>(sums, vals[ii]);
-        }
-
-        // Store to the destination buffer.
-        reinterpret_cast<PackedType*>(&params.local_output_buffer_ptr[iter_offset])[0] = sums;
-    }
-}
-
-template<typename T>
-static __global__ void twoShotAllReduceKernel(AllReduceParams<T> params)
-{
-
-    // The block index.
-    const int bidx = blockIdx.x;
-    // The thread index with the block.
-    const int tidx = threadIdx.x;
-
-    // The number of elements packed into one for comms
-    static constexpr int NUM_ELTS = std::is_same<T, uint32_t>::value ? 4 : 8;
-
-    // Packed data type for comms
-    using PackedType = typename ARTypeConverter<T>::Type;
-
-    // The location in the destination array (load 8 fp16 or load 4 fp32 using LDG.128).
-    size_t offset = bidx * params.elts_per_block + tidx * NUM_ELTS + params.rank_offset;
-    // The end of the segment computed by that block.
-    size_t max_offset = min(offset + params.elts_per_block, params.elts_total);
-
-    // Synchronize the ranks.
-    volatile uint32_t* barrier_d = params.peer_barrier_ptrs[params.local_rank];
-    if (tidx < RANKS_PER_NODE) {
-        // The 1st block notifies the other ranks.
-        if (bidx == 0) {
-            params.peer_barrier_ptrs[tidx][params.local_rank] = params.barrier_flag;
-        }
-
-        // Busy-wait until all ranks are ready.
-        while (barrier_d[tidx] < params.barrier_flag) {}
-    }
-
-    // Make sure we can move on...
-    __syncthreads();
-
-    // The source pointers. Distributed round-robin for the different warps.
-    T* src_d[RANKS_PER_NODE];
-    // The destination ranks for round-robin gathering
-    size_t dst_rank[RANKS_PER_NODE];
-#pragma unroll
-    for (int ii = 0; ii < RANKS_PER_NODE; ++ii) {
-        int rank     = (params.local_rank + ii) % RANKS_PER_NODE;
-        src_d[ii]    = params.peer_comm_buffer_ptrs[rank];
-        dst_rank[ii] = rank;
-    }
-
-    // Each block accumulates the values from the different GPUs on the same node.
-    for (size_t local_offset = offset; local_offset < max_offset; local_offset += blockDim.x * NUM_ELTS) {
-
-        // Iterate over the different ranks/devices on the node to load the values.
-        PackedType vals[RANKS_PER_NODE];
-#pragma unroll
-        for (int ii = 0; ii < RANKS_PER_NODE; ++ii) {
-            vals[ii] = reinterpret_cast<const PackedType*>(&src_d[ii][local_offset])[0];
-        }
-
-        // Sum the values from the different ranks.
-        PackedType sums = init_packed_type<PackedType>();
-#pragma unroll
-        for (int ii = 0; ii < RANKS_PER_NODE; ++ii) {
-            sums = add128b<PackedType, T>(sums, vals[ii]);
-        }
-
-        // Store to the local buffer.
-        reinterpret_cast<PackedType*>(&src_d[0][local_offset])[0] = sums;
-    }
-
-    // sync threads to make sure all block threads have the sums
-    __syncthreads();
-
-    // barreris among the blocks with the same idx (release-acuqire semantics)
-    if (tidx < RANKS_PER_NODE) {
-        // The all blocks notifies the other ranks.
-        uint32_t flag_block_offset = RANKS_PER_NODE + bidx * RANKS_PER_NODE;
-        st_flag_release(params.barrier_flag, params.peer_barrier_ptrs[tidx] + flag_block_offset + params.local_rank);
-
-        // Busy-wait until all ranks are ready.
-        uint32_t  rank_barrier   = 0;
-        uint32_t* peer_barrier_d = params.peer_barrier_ptrs[params.local_rank] + flag_block_offset + tidx;
-        do {
-            ld_flag_acquire(rank_barrier, peer_barrier_d);
-        } while (rank_barrier != params.barrier_flag);
-    }
-
-    // sync threads to make sure all other ranks has the final partial results
-    __syncthreads();
-
-    // Gather all needed elts from other intra-node ranks
-    for (size_t local_offset = offset; local_offset < max_offset; local_offset += blockDim.x * NUM_ELTS) {
-#pragma unroll
-        for (int ii = 0; ii < RANKS_PER_NODE; ++ii) {
-            // use round-robin gathering from other ranks
-            int offset_rank = local_offset + (dst_rank[ii] - params.local_rank) * params.elts_per_rank;
-            reinterpret_cast<PackedType*>(&params.local_output_buffer_ptr[offset_rank])[0] =
-                reinterpret_cast<PackedType*>(&src_d[dst_rank[ii]][offset_rank])[0];
-        }
-    }
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-void kernelLaunchConfig(
-    int& blocks_per_grid, int& threads_per_block, size_t elts, int kernel_algo, size_t data_type_bytes)
-{
-    assert(data_type_bytes == 2 || data_type_bytes == 4);
-    // NOTE: need to support FP16 and FP32
-    size_t elts_per_thread = 16 / data_type_bytes;
-    size_t elts_per_warp   = (16 * WARP_SIZE) / data_type_bytes;
-    switch (kernel_algo) {
-        case 0: {  // one stage all reduce algo
-            assert(elts % elts_per_warp == 0);
-            if (elts < (elts_per_thread * DEFAULT_BLOCK_SIZE)) {  // local reduce
-                threads_per_block = ((elts + elts_per_warp - 1) / elts_per_warp) * WARP_SIZE;
-                blocks_per_grid   = 1;
-            }
-            else {  // local reduce
-                if (elts % (elts_per_thread * threads_per_block) == 0) {
-                    blocks_per_grid =
-                        (elts + elts_per_thread * threads_per_block - 1) / (elts_per_thread * threads_per_block);
-                    // NOTE: need to adjust here
-                    if (blocks_per_grid > MAX_ALL_REDUCE_BLOCKS) {
-                        int iter_factor = 1;
-                        while (blocks_per_grid / iter_factor > MAX_ALL_REDUCE_BLOCKS || blocks_per_grid % iter_factor) {
-                            iter_factor += 1;
-                        }
-                        blocks_per_grid /= iter_factor;
-                    }
-                }
-                else {
-                    int total_threads = elts / elts_per_thread;
-                    blocks_per_grid   = 1;
-                    while (total_threads % blocks_per_grid != 0
-                           || total_threads / blocks_per_grid > DEFAULT_BLOCK_SIZE) {
-                        blocks_per_grid += 1;
-                    }
-                    threads_per_block = total_threads / blocks_per_grid;
-                }
-            }
-            break;
-        }
-        case 1: {  // two stage all reduce algo
-            int total_threads = elts / RANKS_PER_NODE / RANKS_PER_NODE;
-            assert(elts / RANKS_PER_NODE % RANKS_PER_NODE == 0 && total_threads % WARP_SIZE == 0);
-
-            while (total_threads % blocks_per_grid != 0 || total_threads / blocks_per_grid > DEFAULT_BLOCK_SIZE) {
-                blocks_per_grid += 1;
-            }
-
-            threads_per_block = total_threads / blocks_per_grid;
-
-            // NOTE: need to adjust here
-            if (blocks_per_grid > MAX_ALL_REDUCE_BLOCKS) {
-                int iter_factor = 1;
-                while (blocks_per_grid / iter_factor > MAX_ALL_REDUCE_BLOCKS || blocks_per_grid % iter_factor) {
-                    iter_factor += 1;
-                }
-                blocks_per_grid /= iter_factor;
-            }
-            break;
-        }
-    }
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-template<typename T>
-void invokeOneOrTwoShotAllReduceKernel(AllReduceParams<T>& param, cudaStream_t stream)
-{
-    size_t elts_total      = param.elts_total;
-    int    blocks_per_grid = 1, threads_per_block = DEFAULT_BLOCK_SIZE;
-    int    kernel_algo = 1;
-    if (elts_total * sizeof(T) <= DEFALUT_ALGO_AR_SIZE_THRESHOLD) {
-        kernel_algo = 0;
-    }
-
-    kernelLaunchConfig(blocks_per_grid, threads_per_block, elts_total, kernel_algo, sizeof(T));
-
-    if (kernel_algo == 0) {
-        param.elts_per_rank  = elts_total;
-        param.elts_per_block = param.elts_per_rank / blocks_per_grid;
-        oneShotAllReduceKernel<<<blocks_per_grid, threads_per_block, 0, stream>>>(param);
-    }
-    else {
-        param.elts_per_rank  = param.elts_total / RANKS_PER_NODE;
-        param.elts_per_block = param.elts_per_rank / blocks_per_grid;
-        param.rank_offset    = param.rank * param.elts_per_rank;
-        twoShotAllReduceKernel<<<blocks_per_grid, threads_per_block, 0, stream>>>(param);
-    }
-}
-
-// Template instantiation
-template void invokeOneOrTwoShotAllReduceKernel<uint16_t>(AllReduceParams<uint16_t>& param, cudaStream_t stream);
-#ifdef ENABLE_BF16
-template void invokeOneOrTwoShotAllReduceKernel<__nv_bfloat16>(AllReduceParams<__nv_bfloat16>& param,
-                                                               cudaStream_t                    stream);
-#endif
-template void invokeOneOrTwoShotAllReduceKernel<uint32_t>(AllReduceParams<uint32_t>& param, cudaStream_t stream);
-}  // namespace turbomind
diff --git a/src/turbomind/kernels/custom_ar_kernels.h b/src/turbomind/kernels/custom_ar_kernels.h
deleted file mode 100644
index 99d697ee4287a9e0518945a72078cb28bf2ff314..0000000000000000000000000000000000000000
--- a/src/turbomind/kernels/custom_ar_kernels.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include <assert.h>
-#include <cuda_fp16.h>
-
-#include <iostream>
-
-#include "src/turbomind/utils/cuda_utils.h"
-
-#define CUSTOM_AR_SIZE_THRESHOLD 50331648
-#define MAX_ALL_REDUCE_BLOCKS 24
-#define FLAG(a) ((uint32_t)((a) % 0x146))
-#define RANKS_PER_NODE 8
-// #define WARP_SIZE 32
-#define WARP_SIZE 64
-#define DEFAULT_BLOCK_SIZE 1024
-#define DEFALUT_ALGO_AR_SIZE_THRESHOLD 393216
-
-namespace turbomind {
-
-#ifdef ENABLE_BF16
-typedef struct bf168 {
-    __nv_bfloat162 x;
-    __nv_bfloat162 y;
-    __nv_bfloat162 z;
-    __nv_bfloat162 w;
-} bf168;
-#endif
-
-template<typename T>
-struct AllReduceParams {
-    size_t    elts_total;
-    size_t    elts_per_rank;
-    size_t    elts_per_block;
-    size_t    rank_offset;
-    size_t    rank, local_rank, node_id;
-    uint32_t  barrier_flag;
-    uint32_t* peer_barrier_ptrs[RANKS_PER_NODE];
-    T*        peer_comm_buffer_ptrs[RANKS_PER_NODE];
-    T*        local_output_buffer_ptr;
-};
-
-template<typename T>
-void invokeOneOrTwoShotAllReduceKernel(AllReduceParams<T>& param, cudaStream_t stream);
-
-void kernelLaunchConfig(int& blocks_per_grid, int& threads_per_block, size_t elts, int kernel_algo);
-
-}  // namespace turbomind
diff --git a/src/turbomind/kernels/decoder_masked_multihead_attention.cu b/src/turbomind/kernels/decoder_masked_multihead_attention.cu
deleted file mode 100644
index f630d858c904119c78041495cc38f03638c8fc7c..0000000000000000000000000000000000000000
--- a/src/turbomind/kernels/decoder_masked_multihead_attention.cu
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Copyright (c) 2020-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "src/turbomind/kernels/decoder_masked_multihead_attention.h"
-#include "src/turbomind/kernels/decoder_masked_multihead_attention/decoder_masked_multihead_attention_template.cuh"
-#include "src/turbomind/kernels/decoder_masked_multihead_attention_utils.h"
-#include "src/turbomind/utils/cuda_bf16_wrapper.h"
-#include <assert.h>
-#include <float.h>
-#include <type_traits>
-
-template<typename T, typename KERNEL_PARAMS_TYPE>
-void multihead_attention_(const KERNEL_PARAMS_TYPE& params, const cudaStream_t& stream)
-{
-    switch (params.hidden_size_per_head) {
-        case 128:
-            mmha_launch_kernel<T, 128, 128, KERNEL_PARAMS_TYPE>(params, stream);
-            break;
-        default:
-            assert(false);
-    }
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-void masked_multihead_attention(const Masked_multihead_attention_params<float>& params, const cudaStream_t& stream)
-{
-    multihead_attention_<float, Masked_multihead_attention_params<float>>(params, stream);
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-void masked_multihead_attention(const Masked_multihead_attention_params<uint16_t>& params, const cudaStream_t& stream)
-{
-    multihead_attention_<uint16_t, Masked_multihead_attention_params<uint16_t>>(params, stream);
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-#ifdef ENABLE_BF16
-void masked_multihead_attention(const Masked_multihead_attention_params<__nv_bfloat16>& params,
-                                const cudaStream_t&                                     stream)
-{
-    multihead_attention_<__nv_bfloat16, Masked_multihead_attention_params<__nv_bfloat16>>(params, stream);
-}
-#endif
diff --git a/src/turbomind/kernels/decoder_masked_multihead_attention.h b/src/turbomind/kernels/decoder_masked_multihead_attention.h
deleted file mode 100644
index b44332090f6bfa02b1e411b5ce4da0b73db3acea..0000000000000000000000000000000000000000
--- a/src/turbomind/kernels/decoder_masked_multihead_attention.h
+++ /dev/null
@@ -1,139 +0,0 @@
-/*
- * Copyright (c) 2020-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "src/turbomind/utils/cuda_bf16_wrapper.h"
-#include "src/turbomind/utils/cuda_fp8_utils.h"
-#include <cuda_fp16.h>
-#include <cuda_runtime_api.h>
-#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-#define CHECK_CUDA(call)                                                                                               \
-    do {                                                                                                               \
-        cudaError_t status_ = call;                                                                                    \
-        if (status_ != cudaSuccess) {                                                                                  \
-            fprintf(stderr, "CUDA error (%s:%d): %s\n", __FILE__, __LINE__, cudaGetErrorString(status_));              \
-            exit(1);                                                                                                   \
-        }                                                                                                              \
-    } while (0)
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-// The structure of parameters for the masked multihead attention kernel.
-//
-// We use the following terminology to describe the different dimensions.
-//
-// B:  Batch size (number of sequences),
-// L:  Sequence length,
-// D:  Hidden dimension,
-// H:  Number of heads,
-// Dh: Hidden dimension per head - Dh = D / H.
-
-template<typename T>
-struct Multihead_attention_params_base {
-
-    // The output buffer. Dimensions B x D.
-    T* out = nullptr;
-
-    // The input Qs and the associated bias. Dimensions B x D and D, resp.
-    const T *q = nullptr, *q_bias = nullptr;
-    // The input Ks and the associated bias. Dimensions B x D and D, resp.
-    const T *k = nullptr, *k_bias = nullptr;
-    // The input Vs and the associated bias. Dimensions B x D and D, resp.
-    const T *v = nullptr, *v_bias = nullptr;
-
-    // scales
-    const float* query_weight_output_scale               = nullptr;
-    const float* attention_qk_scale                      = nullptr;
-    const float* attention_output_weight_input_scale_inv = nullptr;
-
-    // Stride to handle the case when KQV is a single buffer
-    int stride = 0;
-
-    // The batch size.
-    int batch_size = 0;
-    // The beam width
-    int beam_width = 0;
-    // The sequence length.
-    int memory_max_len = 0;
-    // The number of heads (H).
-    int num_heads = 0;
-    // The hidden dimension per head (Dh).
-    int hidden_size_per_head = 0;
-    // The per-head latent space reserved for rotary embeddings.
-    int rotary_embedding_dim = 0;
-    // The maximum length of input sentences.
-    int max_input_length = 0;
-    // The current timestep. TODO(bhsueh) Check that do we only this param in cross attention?
-    int timestep = 0;
-    // The current timestep of each sentences (support different timestep for different sentences)
-
-    // The 1.f / sqrt(Dh). Computed on the host.
-    float inv_sqrt_dh = 0.0f;
-
-    // Used when we have some input context like gpt
-    const int* total_padding_tokens = nullptr;
-
-    const bool* masked_tokens            = nullptr;
-    const int*  prefix_prompt_lengths    = nullptr;
-    int         max_prefix_prompt_length = 0;
-
-    const T* relative_attention_bias        = nullptr;
-    int      relative_attention_bias_stride = 0;
-    // The slope per head of linear position bias to attention score (H).
-    const T* linear_bias_slopes = nullptr;
-
-    const float* qkv_scale_out       = nullptr;
-    const float* attention_out_scale = nullptr;
-    int          int8_mode           = 0;
-    float        attention_k_scale   = 0.f;
-    float        attention_k_zp      = 0.f;
-    float        attention_v_scale   = 0.f;
-    float        attention_v_zp      = 0.f;
-};
-
-template<typename T>
-struct Multihead_attention_params: public Multihead_attention_params_base<T> {
-    bool*      finished                   = nullptr;
-    const int* length_per_sample          = nullptr;
-    T**        k_cache_per_sample         = nullptr;
-    T**        v_cache_per_sample         = nullptr;
-    size_t     kv_cache_per_sample_offset = 0;
-    int        num_kv_heads               = 0;
-    int        max_position_embeddings    = 0;
-    bool       use_dynamic_ntk            = false;
-    bool       use_logn_attn              = false;
-    float      rotary_embedding_base      = 10000.0f;
-};
-
-template<class T>
-using Masked_multihead_attention_params = Multihead_attention_params<T>;
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-void masked_multihead_attention(const Masked_multihead_attention_params<float>& params, const cudaStream_t& stream);
-void masked_multihead_attention(const Masked_multihead_attention_params<uint16_t>& params, const cudaStream_t& stream);
-#ifdef ENABLE_BF16
-void masked_multihead_attention(const Masked_multihead_attention_params<__nv_bfloat16>& params,
-                                const cudaStream_t&                                     stream);
-#endif
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
diff --git a/src/turbomind/kernels/decoder_masked_multihead_attention/decoder_masked_multihead_attention_128.cu b/src/turbomind/kernels/decoder_masked_multihead_attention/decoder_masked_multihead_attention_128.cu
deleted file mode 100644
index 370594a27402312515ba24e3941f2aeb58641a31..0000000000000000000000000000000000000000
--- a/src/turbomind/kernels/decoder_masked_multihead_attention/decoder_masked_multihead_attention_128.cu
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * Copyright (c) 2020-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "src/turbomind/kernels/decoder_masked_multihead_attention.h"
-#include "src/turbomind/kernels/decoder_masked_multihead_attention_utils.h"
-#include "src/turbomind/utils/cuda_bf16_wrapper.h"
-#include "src/turbomind/utils/cuda_utils.h"
-#include <assert.h>
-#include <float.h>
-#include <type_traits>
-
-#include "decoder_masked_multihead_attention_template.cuh"
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-#define MMHA_LAUNCH_KERNEL(                                                                                            \
-    T, Dh, Dh_MAX, THDS_PER_KEY, THDS_PER_VALUE, THDS_PER_BLOCK, HAS_BEAMS, QUANT_POLICY, stream)                      \
-    auto   func    = &mmha::masked_multihead_attention_kernel<T,                                                       \
-                                                         Dh,                                                      \
-                                                         Dh_MAX,                                                  \
-                                                         THDS_PER_KEY,                                            \
-                                                         THDS_PER_VALUE,                                          \
-                                                         THDS_PER_BLOCK,                                          \
-                                                         HAS_BEAMS,                                               \
-                                                         QUANT_POLICY>;                                           \
-    size_t smem_sz = mmha::smem_size_in_bytes<T>(params, THDS_PER_VALUE, THDS_PER_BLOCK);                              \
-    dim3   grid(params.num_heads, params.batch_size);                                                                  \
-    cudaFuncSetAttribute(func, cudaFuncAttributeMaxDynamicSharedMemorySize, smem_sz);                                  \
-    func<<<grid, THDS_PER_BLOCK, smem_sz, stream>>>(params)
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-template<typename T, int Dh, int Dh_MAX, typename KERNEL_PARAMS_TYPE>
-void mmha_launch_kernel(const KERNEL_PARAMS_TYPE& params, const cudaStream_t& stream)
-{
-    constexpr int THREADS_PER_VALUE = threads_per_value_t<T, Dh_MAX>::value;
-
-    const int tlength = params.timestep;
-
-    if (params.int8_mode == 4) {
-        if (tlength < 32) {
-            MMHA_LAUNCH_KERNEL(T, Dh, Dh_MAX, 4, THREADS_PER_VALUE, 64, false, 4, stream);
-        }
-        else if (tlength < 2048) {
-            MMHA_LAUNCH_KERNEL(T, Dh, Dh_MAX, 2, THREADS_PER_VALUE, 128, false, 4, stream);
-        }
-        else {
-            MMHA_LAUNCH_KERNEL(T, Dh, Dh_MAX, 1, THREADS_PER_VALUE, 256, false, 4, stream);
-        }
-    }
-    else {
-        if (tlength < 32) {
-            MMHA_LAUNCH_KERNEL(T, Dh, Dh_MAX, 4, THREADS_PER_VALUE, 64, false, 0, stream);
-        }
-        else if (tlength < 2048) {
-            MMHA_LAUNCH_KERNEL(T, Dh, Dh_MAX, 2, THREADS_PER_VALUE, 128, false, 0, stream);
-        }
-        else {
-            MMHA_LAUNCH_KERNEL(T, Dh, Dh_MAX, 1, THREADS_PER_VALUE, 256, false, 0, stream);
-        }
-    }
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-template void mmha_launch_kernel<float, 128, 128, Masked_multihead_attention_params<float>>(
-    const Masked_multihead_attention_params<float>& params, const cudaStream_t& stream);
-template void mmha_launch_kernel<uint16_t, 128, 128, Masked_multihead_attention_params<uint16_t>>(
-    const Masked_multihead_attention_params<uint16_t>& params, const cudaStream_t& stream);
-#ifdef ENABLE_BF16
-template void mmha_launch_kernel<__nv_bfloat16, 128, 128, Masked_multihead_attention_params<__nv_bfloat16>>(
-    const Masked_multihead_attention_params<__nv_bfloat16>& params, const cudaStream_t& stream);
-#endif
-#ifdef ENABLE_FP8
-template void mmha_launch_kernel<__nv_fp8_e4m3, 128, 128, Masked_multihead_attention_params<__nv_fp8_e4m3>>(
-    const Masked_multihead_attention_params<__nv_fp8_e4m3>& params, const cudaStream_t& stream);
-#endif
-
-#undef MMHA_LAUNCH_KERNEL
diff --git a/src/turbomind/kernels/decoder_masked_multihead_attention/decoder_masked_multihead_attention_template.cuh b/src/turbomind/kernels/decoder_masked_multihead_attention/decoder_masked_multihead_attention_template.cuh
deleted file mode 100644
index edb0dced5ea181523d25662afbdbe3decc99d06e..0000000000000000000000000000000000000000
--- a/src/turbomind/kernels/decoder_masked_multihead_attention/decoder_masked_multihead_attention_template.cuh
+++ /dev/null
@@ -1,1993 +0,0 @@
-/*
- * Copyright (c) 2020-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#pragma once
-
-#include "src/turbomind/kernels/decoder_masked_multihead_attention.h"
-#include "src/turbomind/kernels/decoder_masked_multihead_attention_utils.h"
-#include "src/turbomind/macro.h"
-#include "src/turbomind/utils/cuda_type_utils.cuh"
-#include <assert.h>
-#include <float.h>
-#include <type_traits>
-
-// #define MMHA_USE_HMMA_FOR_REDUCTION
-
-// Below are knobs to extend FP32 accumulation for higher FP16 accuracy
-
-// Does not seem to affect the accuracy that much
-// #define MMHA_USE_FP32_ACUM_FOR_FMA
-
-// Seems to slightly improve the accuracy
-#define MMHA_USE_FP32_ACUM_FOR_OUT
-
-#if 0 && defined(MMHA_USE_FP32_ACUM_FOR_OUT)
- // Does not seem to improve the accuracy
- //#define MMHA_USE_FP32_ACUM_FOR_LOGITS
-#endif
-
-namespace mmha {
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-//
-// We use the following terminology to describe the different dimensions.
-//
-// B:  Batch size (number of sequences),
-// L:  Sequence length,
-// D:  Hidden dimension,
-// H:  Number of heads,
-// Dh: Hidden dimension per head - Dh = D / H.
-//
-// The different kernels assign a threadblock for B x H pair. The grid has size (1, B, H). We use
-// 64, 128 and 256 threads per block.
-//
-// Each threadblock loads Dh values from Q and its associated bias. The kernels run a loop to
-// compute Q * K^T where K is loaded from a cache buffer -- except for the current timestep. The
-// cache buffer helps with memory accesses and contains keys with bias.
-//
-// The layout of the cache buffer for the keys is [B, H, Dh/x, L, x] where x == 8 for FP16 and
-// x == 4 for FP32 where the fastest moving dimension (contiguous data) is the rightmost one. The
-// values for x are chosen to create chunks of 16 bytes.
-//
-// The different kernels use 1, 2 or 4 threads per key (THREADS_PER_KEY). The size of the LDGs
-// depends on the number of threads per key. Each thread sums Dh / THREADS_PER_KEY elements. At
-// the end of each iteration of the Q * K^T loop, we perform a reduction between lanes using an
-// HMMA instruction (Tensor Core). Each Q * K^T valuey is stored in shared memory in FP32.
-//
-// After that loop, a parallel softmax is computed across the different Q * K^T values stored in
-// shared memory.
-//
-// The kernel ends with a loop over the values in V. We use THREADS_PER_VALUE to control how many
-// timesteps are computed by loop iteration. As with the keys, the values are read from a cache
-// except for the current timestep. The layout of the cache buffer for the values is much simpler
-// as it is [B, H, L, Dh].
-//
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-template<typename T, int Dh>
-struct Qk_vec_m_ {
-};
-
-template<>
-struct Qk_vec_m_<float, 32> {
-    using Type = float;
-};
-template<>
-struct Qk_vec_m_<float, 64> {
-    using Type = float2;
-};
-template<>
-struct Qk_vec_m_<float, 128> {
-    using Type = float4;
-};
-template<>
-struct Qk_vec_m_<float, 256> {
-    using Type = float4;
-};
-template<>
-struct Qk_vec_m_<uint16_t, 32> {
-    using Type = uint32_t;
-};
-template<>
-struct Qk_vec_m_<uint16_t, 64> {
-    using Type = uint32_t;
-};
-template<>
-struct Qk_vec_m_<uint16_t, 128> {
-    using Type = uint2;
-};
-template<>
-struct Qk_vec_m_<uint16_t, 256> {
-    using Type = uint4;
-};
-#ifdef ENABLE_BF16
-template<>
-struct Qk_vec_m_<__nv_bfloat16, 32> {
-    using Type = __nv_bfloat162;
-};
-template<>
-struct Qk_vec_m_<__nv_bfloat16, 64> {
-    using Type = __nv_bfloat162;
-};
-template<>
-struct Qk_vec_m_<__nv_bfloat16, 128> {
-    using Type = bf16_4_t;
-};
-template<>
-struct Qk_vec_m_<__nv_bfloat16, 256> {
-    using Type = bf16_8_t;
-};
-#endif  // ENABLE_BF16
-
-#ifdef ENABLE_FP8
-template<>
-struct Qk_vec_m_<__nv_fp8_e4m3, 32> {
-    using Type = fp8_4_t;
-};
-template<>
-struct Qk_vec_m_<__nv_fp8_e4m3, 64> {
-    using Type = fp8_4_t;
-};
-template<>
-struct Qk_vec_m_<__nv_fp8_e4m3, 128> {
-    using Type = fp8_4_t;
-};
-template<>
-struct Qk_vec_m_<__nv_fp8_e4m3, 256> {
-    using Type = fp8_4_t;
-};
-#endif  // ENABLE_FP8
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-template<typename T, int Dh>
-struct Qk_vec_k_ {
-    using Type = typename Qk_vec_m_<T, Dh>::Type;
-};
-#ifdef ENABLE_FP8
-template<>
-struct Qk_vec_k_<__nv_fp8_e4m3, 32> {
-    using Type = float4;
-};
-template<>
-struct Qk_vec_k_<__nv_fp8_e4m3, 64> {
-    using Type = float4;
-};
-template<>
-struct Qk_vec_k_<__nv_fp8_e4m3, 128> {
-    using Type = float4;
-};
-template<>
-struct Qk_vec_k_<__nv_fp8_e4m3, 256> {
-    using Type = float4;
-};
-#endif  // ENABLE_FP8
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-template<typename T, int THREADS_PER_KEY>
-struct K_vec_m_ {
-};
-
-template<>
-struct K_vec_m_<float, 4> {
-    using Type = float;
-};
-template<>
-struct K_vec_m_<float, 2> {
-    using Type = float2;
-};
-template<>
-struct K_vec_m_<float, 1> {
-    using Type = float4;
-};
-template<>
-struct K_vec_m_<uint16_t, 4> {
-    using Type = uint32_t;
-};
-template<>
-struct K_vec_m_<uint16_t, 2> {
-    using Type = uint2;
-};
-template<>
-struct K_vec_m_<uint16_t, 1> {
-    using Type = uint4;
-};
-#ifdef ENABLE_BF16
-template<>
-struct K_vec_m_<__nv_bfloat16, 4> {
-    using Type = __nv_bfloat162;
-};
-template<>
-struct K_vec_m_<__nv_bfloat16, 2> {
-    using Type = bf16_4_t;
-};
-template<>
-struct K_vec_m_<__nv_bfloat16, 1> {
-    using Type = bf16_8_t;
-};
-#endif  // ENABLE_BF16
-
-// NOTE: THREADS_PER_KEY * sizeof(K_vec_m_) = 128 bytes
-#ifdef ENABLE_FP8
-template<>
-struct K_vec_m_<__nv_fp8_e4m3, 4> {
-    using Type = fp8_4_t;
-};
-template<>
-struct K_vec_m_<__nv_fp8_e4m3, 2> {
-    using Type = fp8_4_t;
-};  // Defined for compilation-purpose only, do not use
-template<>
-struct K_vec_m_<__nv_fp8_e4m3, 1> {
-    using Type = fp8_4_t;
-};      // Defined for compilation-purpose only, do not use
-#endif  // ENABLE_FP8
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-template<typename T, int THREADS_PER_KEY>
-struct K_vec_k_ {
-    using Type = typename K_vec_m_<T, THREADS_PER_KEY>::Type;
-};
-#ifdef ENABLE_FP8
-template<>
-struct K_vec_k_<__nv_fp8_e4m3, 4> {
-    using Type = float4;
-};
-template<>
-struct K_vec_k_<__nv_fp8_e4m3, 2> {
-    using Type = float4;
-};  // Defined for compilation-purpose only, do not use
-template<>
-struct K_vec_k_<__nv_fp8_e4m3, 1> {
-    using Type = float4;
-};      // Defined for compilation-purpose only, do not use
-#endif  // ENABLE_FP8
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-template<typename T, int V_VEC_SIZE>
-struct V_vec_m_ {
-};
-
-template<>
-struct V_vec_m_<float, 1> {
-    using Type = float;
-};
-template<>
-struct V_vec_m_<float, 2> {
-    using Type = float2;
-};
-template<>
-struct V_vec_m_<float, 4> {
-    using Type = float4;
-};
-template<>
-struct V_vec_m_<uint16_t, 2> {
-    using Type = uint32_t;
-};
-template<>
-struct V_vec_m_<uint16_t, 4> {
-    using Type = uint2;
-};
-template<>
-struct V_vec_m_<uint16_t, 8> {
-    using Type = uint4;
-};
-#ifdef ENABLE_BF16
-template<>
-struct V_vec_m_<__nv_bfloat16, 2> {
-    using Type = __nv_bfloat162;
-};
-template<>
-struct V_vec_m_<__nv_bfloat16, 4> {
-    using Type = bf16_4_t;
-};
-template<>
-struct V_vec_m_<__nv_bfloat16, 8> {
-    using Type = bf16_8_t;
-};
-#endif  // ENABLE_BF16
-#ifdef ENABLE_FP8
-template<>
-struct V_vec_m_<__nv_fp8_e4m3, 4> {
-    using Type = fp8_4_t;
-};
-template<>
-struct V_vec_m_<__nv_fp8_e4m3, 8> {
-    using Type = fp8_4_t;
-};
-template<>
-struct V_vec_m_<__nv_fp8_e4m3, 16> {
-    using Type = fp8_4_t;
-};
-#endif  // ENABLE_FP8
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-template<typename T, int V_VEC_SIZE>
-struct V_vec_k_ {
-    using Type = typename V_vec_m_<T, V_VEC_SIZE>::Type;
-};
-#ifdef ENABLE_FP8
-template<>
-struct V_vec_k_<__nv_fp8_e4m3, 4> {
-    using Type = float4;
-};
-template<>
-struct V_vec_k_<__nv_fp8_e4m3, 8> {
-    using Type = float4;
-};
-template<>
-struct V_vec_k_<__nv_fp8_e4m3, 16> {
-    using Type = float4;
-};
-#endif
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-#ifdef MMHA_USE_FP32_ACUM_FOR_FMA
-template<typename T>
-struct Qk_vec_acum_fp32_ {
-};
-
-template<>
-struct Qk_vec_acum_fp32_<float> {
-    using Type = float;
-};
-template<>
-struct Qk_vec_acum_fp32_<float2> {
-    using Type = float2;
-};
-template<>
-struct Qk_vec_acum_fp32_<float4> {
-    using Type = float4;
-};
-// template<> struct Qk_vec_acum_fp32_<uint16_t> { using Type = float;        };
-template<>
-struct Qk_vec_acum_fp32_<uint32_t> {
-    using Type = float2;
-};
-template<>
-struct Qk_vec_acum_fp32_<uint2> {
-    using Type = Float4_;
-};
-template<>
-struct Qk_vec_acum_fp32_<uint4> {
-    using Type = Float8_;
-};
-template<>
-struct Qk_vec_acum_fp32_<__nv_bfloat16> {
-    using Type = float;
-};
-template<>
-struct Qk_vec_acum_fp32_<__nv_bfloat162> {
-    using Type = float2;
-};
-template<>
-struct Qk_vec_acum_fp32_<bf16_4_t> {
-    using Type = Float4_;
-};
-template<>
-struct Qk_vec_acum_fp32_<bf16_8_t> {
-    using Type = Float8_;
-};
-
-template<>
-struct Qk_vec_acum_fp32_<uint4> {
-    using Type = Float8_;
-};
-template<>
-struct Qk_vec_acum_fp32_<__nv_bfloat16> {
-    using Type = float;
-};
-template<>
-struct Qk_vec_acum_fp32_<__nv_bfloat162> {
-    using Type = float2;
-};
-template<>
-struct Qk_vec_acum_fp32_<bf16_4_t> {
-    using Type = Float4_;
-};
-template<>
-struct Qk_vec_acum_fp32_<bf16_8_t> {
-    using Type = Float8_;
-};
-#ifdef ENABLE_FP8
-// template<>
-// struct Qk_vec_acum_fp32_<fp8_2_t> {
-//     using Type = float2;
-// };
-template<>
-struct Qk_vec_acum_fp32_<fp8_4_t> {
-    using Type = Float4_;
-};
-// template<>
-// struct Qk_vec_acum_fp32_<fp8_8_t> {
-//     using Type = Float4_;
-// };
-#endif  // ENABLE_FP8
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-template<typename T>
-struct K_vec_acum_fp32_ {
-};
-
-template<>
-struct K_vec_acum_fp32_<float> {
-    using Type = float;
-};
-template<>
-struct K_vec_acum_fp32_<float2> {
-    using Type = float2;
-};
-template<>
-struct K_vec_acum_fp32_<float4> {
-    using Type = float4;
-};
-template<>
-struct K_vec_acum_fp32_<uint32_t> {
-    using Type = float2;
-};
-template<>
-struct K_vec_acum_fp32_<uint2> {
-    using Type = Float4_;
-};
-template<>
-struct K_vec_acum_fp32_<uint4> {
-    using Type = Float8_;
-};
-template<>
-struct K_vec_acum_fp32_<__nv_bfloat16> {
-    using Type = float;
-};
-template<>
-struct K_vec_acum_fp32_<__nv_bfloat162> {
-    using Type = float2;
-};
-template<>
-struct K_vec_acum_fp32_<bf16_4_t> {
-    using Type = Float4_;
-};
-template<>
-struct K_vec_acum_fp32_<bf16_8_t> {
-    using Type = Float8_;
-};
-#ifdef ENABLE_FP8
-// template<>
-// struct K_vec_acum_fp32_<fp8_2_t> {
-//     using Type = float2;
-// };
-template<>
-struct K_vec_acum_fp32_<fp8_4_t> {
-    using Type = Float4_;
-};
-// template<>
-// struct K_vec_acum_fp32_<fp8_8_t> {
-//     using Type = Float4_;
-// };
-#endif  // ENABLE_FP8
-#endif  // MMHA_USE_FP32_ACUM_FOR_FMA
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-#ifdef MMHA_USE_FP32_ACUM_FOR_OUT
-template<typename T>
-struct V_vec_acum_fp32_ {
-};
-
-template<>
-struct V_vec_acum_fp32_<float> {
-    using Type = float;
-};
-template<>
-struct V_vec_acum_fp32_<float2> {
-    using Type = float2;
-};
-template<>
-struct V_vec_acum_fp32_<float4> {
-    using Type = float4;
-};
-template<>
-struct V_vec_acum_fp32_<uint32_t> {
-    using Type = float2;
-};
-template<>
-struct V_vec_acum_fp32_<uint2> {
-    using Type = Float4_;
-};
-template<>
-struct V_vec_acum_fp32_<uint4> {
-    using Type = Float8_;
-};
-#ifdef ENABLE_BF16
-template<>
-struct V_vec_acum_fp32_<__nv_bfloat162> {
-    using Type = float2;
-};
-template<>
-struct V_vec_acum_fp32_<bf16_4_t> {
-    using Type = Float4_;
-};
-template<>
-struct V_vec_acum_fp32_<bf16_8_t> {
-    using Type = Float8_;
-};
-#endif  // ENABLE_BF16
-#ifdef ENABLE_FP8
-// template<>
-// struct V_vec_acum_fp32_<fp8_2_t> {
-//     using Type = float2;
-// };
-template<>
-struct V_vec_acum_fp32_<fp8_4_t> {
-    using Type = Float4_;
-};
-// template<>
-// struct V_vec_acum_fp32_<fp8_8_t> {
-//     using Type = Float4_;
-// };
-#endif  // ENABLE_FP8
-#endif
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-template<typename Tout, typename Tin>
-__inline__ __device__ Tout vec_conversion(const Tin& x)
-{
-    return x;
-}
-template<>
-__inline__ __device__ uint32_t vec_conversion<uint32_t, float2>(const float2& a)
-{
-    union {
-        half2    float16;
-        uint32_t uint32;
-    };
-
-    float16 = __float22half2_rn(a);
-    return uint32;
-}
-template<>
-__inline__ __device__ uint2 vec_conversion<uint2, float4>(const float4& a)
-{
-    uint2  b;
-    float2 val;
-    val.x = a.x;
-    val.y = a.y;
-    b.x   = vec_conversion<uint32_t, float2>(val);
-
-    val.x = a.z;
-    val.y = a.w;
-    b.y   = vec_conversion<uint32_t, float2>(val);
-
-    return b;
-}
-template<>
-__inline__ __device__ uint4 vec_conversion<uint4, Float8_>(const Float8_& a)
-{
-    uint4 b;
-    b.x = vec_conversion<uint32_t, float2>(a.x);
-    b.y = vec_conversion<uint32_t, float2>(a.y);
-    b.z = vec_conversion<uint32_t, float2>(a.z);
-    b.w = vec_conversion<uint32_t, float2>(a.w);
-    return b;
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-template<int THREADS_PER_KEY, typename K_vec, int N>
-inline __device__ float qk_dot_(const K_vec (&q)[N], const K_vec (&k)[N])
-{
-#ifdef MMHA_USE_FP32_ACUM_FOR_FMA
-    using K_vec_acum = typename K_vec_acum_fp32_<K_vec>::Type;
-#else
-    using K_vec_acum = K_vec;
-#endif
-    // Compute the parallel products for Q*K^T (treat vector lanes separately).
-    K_vec_acum qk_vec = mul<K_vec_acum, K_vec, K_vec>(q[0], k[0]);
-#pragma unroll
-    for (int ii = 1; ii < N; ++ii) {
-        qk_vec = fma(q[ii], k[ii], qk_vec);
-    }
-
-    // Finalize the reduction across lanes.
-    float qk = sum(qk_vec);
-#pragma unroll
-    for (int mask = THREADS_PER_KEY / 2; mask >= 1; mask /= 2) {
-        qk += __shfl_xor_sync(uint32_t(-1), qk, mask);
-    }
-    return qk;
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-template<typename T, int THREADS_PER_KEY>
-struct Qk_dot {
-    template<typename K_vec, int N>
-    static inline __device__ float dot(const K_vec (&q)[N], const K_vec (&k)[N])
-    {
-        return qk_dot_<THREADS_PER_KEY>(q, k);
-    }
-};
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-__device__ inline void f16mulf16addf32(uint32_t & a, uint32_t & b, const float * c, float * d){
-
-//   uint32_t res = 0;
-//   asm volatile("v_pk_fma_f16 %0, %1,%2,%3" : "=v"(res) : "v"(a), "v"(b), "v"(res));
-//   __half * h = reinterpret_cast<__half*>(&res);
-  __half * ha = reinterpret_cast<__half*>(&a);
-  __half * hb = reinterpret_cast<__half*>(&b);
-
-  *d = *c + __half2float(ha[0])*__half2float(hb[0]) + __half2float(ha[1])*__half2float(hb[1]);
-
-}
-
-// row 8 col 4
-__device__ inline void m16n8k8(const uint32_t * A, const uint32_t * B, /*const float * C,*/ float * D) {
-  int tid = threadIdx.x;
-  int baseId = tid / 32 * 32;
-  __shared__ uint32_t smem[1024*3];
-
-  int base = tid*3;
-  __builtin_memcpy(smem+base, A, sizeof(uint32_t));
-  __builtin_memcpy(smem+(base+1), A+1, sizeof(uint32_t));
-  __builtin_memcpy(smem+(base+2), B, sizeof(uint32_t));
-  __syncthreads();
-
-  /* վ��D���ӽǣ�ÿ�����̸���D���ݵļ��㣬��0�߳̿�ʼѭ������ȡһ��A������B
-     sΪB������̺߳�
-     baseAΪA���̺߳�
-     baseB0Ϊ��ǰ�̻߳�ȡB�ĵ�һ�У�baseB1Ϊ��ǰ�̻߳�ȡB�ĵڶ���
-  */
-  int s = baseId+(tid%4)*8, e = s+4;
-  for (int i = s; i < e; ++i) {
-    // A[0]->i A[1]->i+1 B[0]->i+2 
-    int baseA = (tid-tid%4+i-s)*3;  // ��ǰtid�����еĵ�һ�еĽ��̺�+stride ��*3
-    int baseB0 = i*3, baseB1 = (i+4)*3;
-   
-    f16mulf16addf32(smem[baseA], smem[baseB0+2], D, D);
-
-    f16mulf16addf32(smem[baseA], smem[baseB1+2], D+1, D+1);
-
-    f16mulf16addf32(smem[baseA+1], smem[baseB0+2], D+2, D+2);
-
-    f16mulf16addf32(smem[baseA+1], smem[baseB1+2], D+3, D+3);
-  }
-}
-
-
-inline __device__ float4 hmma_fp32(const uint2& a, uint32_t b)
-{
-    float4 c;
-    float  zero = 0.f;
-    const uint32_t * A = reinterpret_cast<const uint32_t*>(&a);
-    const uint32_t * B = reinterpret_cast<const uint32_t*>(b);
-    float * C = reinterpret_cast<float*>(&c);
-    m16n8k8(A, B, C);
-    // asm volatile("mma.sync.aligned.m16n8k8.row.col.f32.f16.f16.f32 \n"
-    //              "    {%0, %1, %2, %3}, \n"
-    //              "    {%4, %5}, \n"
-    //              "    {%6}, \n"
-    //              "    {%7, %7, %7, %7}; \n"
-
-    //              : "=f"(c.x), "=f"(c.y), "=f"(c.z), "=f"(c.w)
-    //              : "r"(a.x), "r"(a.y), "r"(b), "f"(zero));
-    return c;
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-template<int N>
-inline __device__ float qk_hmma_dot_(const uint32_t (&q)[N], const uint32_t (&k)[N])
-{
-#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 750
-#ifdef MMHA_USE_FP32_ACUM_FOR_FMA
-    using K_vec_acum = typename K_vec_acum_fp32_<uint32_t>::Type;
-#else
-    using K_vec_acum = uint32_t;
-#endif
-    K_vec_acum qk_vec = mul<K_vec_acum, uint32_t, uint32_t>(q[0], k[0]);
-#pragma unroll
-    for (int ii = 1; ii < N; ++ii) {
-        qk_vec = fma(q[ii], k[ii], qk_vec);
-    }
-#ifdef MMHA_USE_FP32_ACUM_FOR_FMA
-    uint32_t qk_vec_ = float2_to_half2(qk_vec);
-    return hmma_fp32(make_uint2(qk_vec_, 0u), 0x3c003c00u).x;
-#else
-    return hmma_fp32(make_uint2(qk_vec, 0u), 0x3c003c00u).x;
-#endif
-#else
-    return 0.f;
-#endif
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-template<>
-struct Qk_dot<uint16_t, 4> {
-    template<int N>
-    static inline __device__ float dot(const uint32_t (&q)[N], const uint32_t (&k)[N])
-    {
-#if __CUDA_ARCH__ >= 750 && defined(MMHA_USE_HMMA_FOR_REDUCTION)
-        return qk_hmma_dot_(q, k);
-#else
-        return qk_dot_<4>(q, k);
-#endif  // defined MMHA_USE_HMMA_FOR_REDUCTION
-    }
-};
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-// template<int WARPS_PER_BLOCK, int WARP_SIZE = 32>
-template<int WARPS_PER_BLOCK, int WARP_SIZE = 64>
-inline __device__ float block_sum(float* red_smem, float sum)
-{
-
-    // Decompose the thread index into warp / lane.
-    int warp = threadIdx.x / WARP_SIZE;
-    int lane = threadIdx.x % WARP_SIZE;
-
-// Compute the sum per warp.
-#pragma unroll
-    for (int mask = WARP_SIZE / 2; mask >= 1; mask /= 2) {
-        sum += __shfl_xor_sync(uint32_t(-1), sum, mask);
-    }
-
-    // Warp leaders store the data to shared memory.
-    if (lane == 0) {
-        red_smem[warp] = sum;
-    }
-
-    // Make sure the data is in shared memory.
-    __syncthreads();
-
-    // The warps compute the final sums.
-    if (lane < WARPS_PER_BLOCK) {
-        sum = red_smem[lane];
-    }
-
-// Parallel reduction inside the warp.
-#pragma unroll
-    for (int mask = WARPS_PER_BLOCK / 2; mask >= 1; mask /= 2) {
-        sum += __shfl_xor_sync(uint32_t(-1), sum, mask);
-    }
-
-    // Broadcast to other threads.
-    return __shfl_sync(uint32_t(-1), sum, 0);
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-inline __device__ void convert_from_float(float& dst, float src)
-{
-    dst = src;
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-inline __device__ void convert_from_float(uint16_t& dst, float src)
-{
-    dst = float_to_half(src);
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-inline __device__ void convert_from_float(uint32_t& dst, float2 src)
-{
-    dst = float2_to_half2(src);
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-#ifdef ENABLE_BF16
-inline __device__ void convert_from_float(__nv_bfloat16& dst, float src)
-{
-    dst = __float2bfloat16(src);
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-inline __device__ void convert_from_float(__nv_bfloat162& dst, float2 src)
-{
-#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 800
-    dst = __float22bfloat162_rn(src);
-#else
-    dst   = __floats2bfloat162_rn(src.x, src.y);
-#endif
-}
-#endif  // ENABLE_BF16
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-inline __device__ void convert_from_float(uint2& dst, Float4_ src)
-{
-    dst.x = float2_to_half2(src.x);
-    dst.y = float2_to_half2(src.y);
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-inline __device__ void convert_from_float(uint2& dst, float4 src)
-{
-    convert_from_float(dst, Float4_{make_float2(src.x, src.y), make_float2(src.z, src.w)});
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-inline __device__ void convert_from_float(uint4& dst, Float8_ src)
-{
-    dst.x = float2_to_half2(src.x);
-    dst.y = float2_to_half2(src.y);
-    dst.z = float2_to_half2(src.z);
-    dst.w = float2_to_half2(src.w);
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-#ifdef ENABLE_BF16
-inline __device__ void convert_from_float(bf16_4_t& dst, Float4_ src)
-{
-#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 800
-    dst.x = __float22bfloat162_rn(src.x);
-    dst.y = __float22bfloat162_rn(src.y);
-#else
-    dst.x = __floats2bfloat162_rn(src.x.x, src.x.y);
-    dst.y = __floats2bfloat162_rn(src.y.x, src.y.y);
-#endif
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-inline __device__ void convert_from_float(bf16_4_t& dst, float4 src)
-{
-    convert_from_float(dst, Float4_{make_float2(src.x, src.y), make_float2(src.z, src.w)});
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-inline __device__ void convert_from_float(bf16_8_t& dst, Float8_ src)
-{
-#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 800
-    dst.x = __float22bfloat162_rn(src.x);
-    dst.y = __float22bfloat162_rn(src.y);
-    dst.z = __float22bfloat162_rn(src.z);
-    dst.w = __float22bfloat162_rn(src.w);
-#else
-    dst.x = __floats2bfloat162_rn(src.x.x, src.x.y);
-    dst.y = __floats2bfloat162_rn(src.y.x, src.y.y);
-    dst.z = __floats2bfloat162_rn(src.z.x, src.z.y);
-    dst.w = __floats2bfloat162_rn(src.w.x, src.w.y);
-#endif
-}
-#endif  // ENABLE_BF16
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-inline __device__ void convert_from_float(float2& dst, float2 src)
-{
-    dst = src;
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-inline __device__ void convert_from_float(float4& dst, float4 src)
-{
-    dst = src;
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-inline __device__ float convert_to_float(float4 u)
-{
-    return u.x;
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-inline __device__ float convert_to_float(uint4 u)
-{
-    float2 tmp = half2_to_float2(u.x);
-    return tmp.x;
-}
-
-#if defined(MMHA_USE_FP32_ACUM_FOR_LOGITS)
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-inline __device__ float cast_to_float(float u)
-{
-    return u;
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-inline __device__ float2 cast_to_float(float2 u)
-{
-    return u;
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-inline __device__ float4 cast_to_float(float4 u)
-{
-    return u;
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-inline __device__ Float4_ cast_to_float(Float4_ u)
-{
-    return u;
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-inline __device__ Float8_ cast_to_float(Float8_ u)
-{
-    return u;
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-inline __device__ float2 cast_to_float(uint32_t u)
-{
-    return half2_to_float2(u);
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-inline __device__ Float4_ cast_to_float(uint2 u)
-{
-    Float4_ tmp;
-    tmp.x = half2_to_float2(u.x);
-    tmp.y = half2_to_float2(u.y);
-    return tmp;
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-inline __device__ Float8_ cast_to_float(uint4 u)
-{
-    Float8_ tmp;
-    tmp.x = half2_to_float2(u.x);
-    tmp.y = half2_to_float2(u.y);
-    tmp.z = half2_to_float2(u.z);
-    tmp.w = half2_to_float2(u.w);
-    return tmp;
-}
-
-#endif
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-inline __device__ float float_from_int8(int8_t u)
-{
-    return u;
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-inline __device__ float2 float_from_int8(int16_t u)
-{
-    union {
-        int16_t int16;
-        int8_t  int8[2];
-    };
-    int16 = u;
-    return make_float2(int8[0], int8[1]);
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-inline __device__ float4 float_from_int8(int32_t u)
-{
-    union {
-        int32_t int32;
-        int8_t  int8[4];
-    };
-    int32 = u;
-    return make_float4(int8[0], int8[1], int8[2], int8[3]);
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-// clang-format off
-inline __device__ Float8_ float_from_int8(int64_t u)
-{
-    union {
-        int64_t int64;
-        int16_t int16[4];
-    };
-    int64 = u;
-    return Float8_ {float_from_int8(int16[0]),
-                    float_from_int8(int16[1]),
-                    float_from_int8(int16[2]),
-                    float_from_int8(int16[3])};
-}
-// clang-format on
-
-inline __device__ int8_t quant(float a, const float scale, const float zp)
-{
-    int8_t int8;
-    int8 = round(max(-128.f, min(127.f, (a - zp) / scale)));
-    return int8;
-}
-
-inline __device__ short quant(float2 a, const float scale, const float zp)
-{
-    union {
-        int8_t int8[2];
-        short  int16;
-    };
-
-    int8[0] = round(max(-128.f, min(127.f, (a.x - zp) / scale)));
-    int8[1] = round(max(-128.f, min(127.f, (a.y - zp) / scale)));
-    return int16;
-}
-
-inline __device__ int32_t quant(float4 a, const float scale, const float zp)
-{
-    union {
-        int8_t  int8[4];
-        int32_t int32;
-    };
-
-    int8[0] = round(max(-128.f, min(127.f, (a.x - zp) / scale)));
-    int8[1] = round(max(-128.f, min(127.f, (a.y - zp) / scale)));
-    int8[2] = round(max(-128.f, min(127.f, (a.z - zp) / scale)));
-    int8[3] = round(max(-128.f, min(127.f, (a.w - zp) / scale)));
-    return int32;
-}
-
-// float16 to int8
-inline __device__ int8_t quant(uint16_t a, const float scale, const float zp)
-{
-    int8_t int8;
-    float  b = half_to_float(a);
-    int8     = round(max(-128.f, min(127.f, (b - zp) / scale)));
-    return int8;
-}
-// float16x2 to int8x2
-inline __device__ int16_t quant(uint a, const float scale, const float zp)
-{
-    union {
-        int8_t int8[2];
-        short  int16;
-    };
-    float2 b = half2_to_float2(a);
-
-    int8[0] = round(max(-128.f, min(127.f, (b.x - zp) / scale)));
-    int8[1] = round(max(-128.f, min(127.f, (b.y - zp) / scale)));
-    return int16;
-}
-// float16x4 to int8x4
-inline __device__ int32_t quant(uint2 a, const float scale, const float zp)
-{
-    union {
-        int16_t int16[2];
-        int32_t int32;
-    };
-
-    int16[0] = quant(a.x, scale, zp);
-    int16[1] = quant(a.y, scale, zp);
-    return int32;
-}
-// float16x8 to int8x8
-inline __device__ int64_t quant(uint4 a, const float scale, const float zp)
-{
-    union {
-        int16_t int16[4];
-        int64_t int64;
-    };
-
-    int16[0] = quant(a.x, scale, zp);
-    int16[1] = quant(a.y, scale, zp);
-    int16[2] = quant(a.z, scale, zp);
-    int16[3] = quant(a.w, scale, zp);
-    return int64;
-}
-// int8 to float32, then `vec_conversion` to target format
-inline __device__ float dequant(int8_t a, const float scale, const float zp)
-{
-    float b = a * scale + zp;
-    return b;
-}
-// int8x2 to float32x2
-inline __device__ float2 dequant(int16_t a, const float scale, const float zp)
-{
-    union {
-        int8_t  int8[2];
-        int16_t int16;
-    };
-    int16 = a;
-
-    float2 b;
-    b.x = int8[0] * scale + zp;
-    b.y = int8[1] * scale + zp;
-    return b;
-}
-// int8x4 to float32x4
-inline __device__ float4 dequant(int32_t a, const float scale, const float zp)
-{
-    union {
-        int8_t  int8[4];
-        int32_t int32;
-    };
-    int32 = a;
-
-    float4 b;
-    b.x = (int8[0] * scale) + zp;
-    b.y = (int8[1] * scale) + zp;
-    b.z = (int8[2] * scale) + zp;
-    b.w = (int8[3] * scale) + zp;
-    return b;
-}
-
-inline __device__ Float8_ dequant(int64_t a, const float scale, const float zp)
-{
-    union {
-        int16_t int16[4];
-        int64_t int64;
-    };
-    int64 = a;
-
-    Float8_ b;
-    b.x = dequant(int16[0], scale, zp);
-    b.y = dequant(int16[1], scale, zp);
-    b.z = dequant(int16[2], scale, zp);
-    b.w = dequant(int16[3], scale, zp);
-    return b;
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-inline __device__ int8_t cast_to_int8(float val)
-{
-    // union {
-    //     int8_t  int8[2];
-    //     int16_t int16;
-    // };
-    // asm volatile("cvt.rni.sat.s8.f32 %0, %1;" : "=h"(int16) : "f"(val));
-    // return int8[0];
-    int8_t dst;
-    if (val >= 128){
-        dst = 127;
-    }else if (val < -128){
-        dst = -128;
-    }else{
-        dst = static_cast<int8_t>(val);
-    }
-    return dst;
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-inline __device__ int32_t cast_to_int8(float4 val)
-{
-    union {
-        int8_t  int8[4];
-        int32_t int32;
-    };
-    int8[0] = cast_to_int8(val.x);
-    int8[1] = cast_to_int8(val.y);
-    int8[2] = cast_to_int8(val.z);
-    int8[3] = cast_to_int8(val.w);
-    return int32;
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-inline __device__ int64_t cast_to_int8(Float8_ val)
-{
-    union {
-        int8_t  int8[8];
-        int64_t int64;
-    };
-    int8[0] = cast_to_int8(val.x.x);
-    int8[1] = cast_to_int8(val.x.y);
-    int8[2] = cast_to_int8(val.y.x);
-    int8[3] = cast_to_int8(val.y.y);
-    int8[4] = cast_to_int8(val.z.x);
-    int8[5] = cast_to_int8(val.z.y);
-    int8[6] = cast_to_int8(val.w.x);
-    int8[7] = cast_to_int8(val.w.y);
-    return int64;
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-template<typename T>
-inline __device__ __host__ T div_up(T m, T n)
-{
-    return (m + n - 1) / n;
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-template<typename T>
-struct kernel_type_t {
-    using Type = T;
-};
-
-#ifdef ENABLE_FP8
-template<>
-struct kernel_type_t<__nv_fp8_e4m3> {
-    using Type = float;
-};
-#endif
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-template<typename T>
-inline size_t
-smem_size_in_bytes(const Multihead_attention_params<T>& params, int threads_per_value, int threads_per_block)
-{
-    using Tk = typename kernel_type_t<T>::Type;
-    // The amount of shared memory needed to store the Q*K^T values in float.
-    const int max_timesteps = min(params.timestep, params.memory_max_len);
-    size_t    qk_sz         = div_up(max_timesteps + 1, 4) * 16;
-
-    // The extra memory needed if we are not using floats for the final logits.
-    size_t logits_sz = 0;
-#ifndef MMHA_USE_FP32_ACUM_FOR_LOGITS
-    if (sizeof(Tk) != 4) {
-        // TDOD
-        logits_sz = div_up(max_timesteps + 1, 4) * 4 * sizeof(Tk);
-    }
-#endif
-
-    // The total size needed during softmax.
-    size_t softmax_sz = qk_sz + logits_sz;
-
-    // The number of partial rows to reduce in the final reduction.
-    int rows_per_red = threads_per_block / threads_per_value;
-    // The amount of storage needed to finalize the outputs.
-    size_t red_sz = rows_per_red * params.hidden_size_per_head * sizeof(Tk) / 2;
-
-    // The max.
-    return max(softmax_sz, red_sz);
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-inline __device__ constexpr uint32_t shfl_mask(int threads)
-{
-    return threads == 32 ? uint32_t(-1) : (1u << threads) - 1u;
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-template<typename T,  // The type of the inputs. Supported types: float and half.
-         int  Dh,     // The hidden dimension per head.
-         int  Dh_MAX,
-         int  THREADS_PER_KEY,    // The number of threads per key.
-         int  THREADS_PER_VALUE,  // The number of threads per value.
-         int  THREADS_PER_BLOCK,  // The number of threads in a threadblock.
-         bool HAS_BEAMS,
-         int  QUANT_POLICY>  // quantization method
-__global__ void masked_multihead_attention_kernel(Multihead_attention_params<T> params)
-{
-
-    using Tk = typename kernel_type_t<T>::Type;
-#ifdef ENABLE_FP8
-    // FP8 MHA Scales
-    constexpr bool FP8_MHA_KERNEL = std::is_same<T, __nv_fp8_e4m3>::value;
-#else
-    constexpr bool FP8_MHA_KERNEL = false;
-#endif
-    // Make sure the hidden dimension per head is a multiple of the number of threads per key.
-    static_assert(Dh_MAX % THREADS_PER_KEY == 0, "");
-    // Make sure the hidden dimension per head is a multiple of the number of threads per value.
-    static_assert(Dh_MAX % THREADS_PER_VALUE == 0, "");
-
-    // The size of a warp.
-    // constexpr int WARP_SIZE = 32;
-    constexpr int WARP_SIZE = 64;
-    // The number of warps in a threadblock.
-    constexpr int WARPS_PER_BLOCK = THREADS_PER_BLOCK / WARP_SIZE;
-
-    // Use smem_size_in_bytes (above) to determine the amount of shared memory.
-    extern __shared__ char smem_[];
-
-    // The shared memory for the Q*K^T values and partial logits in softmax.
-    float* qk_smem = reinterpret_cast<float*>(smem_);
-
-    // The shared memory for the logits. For FP32, that's the same buffer as qk_smem.
-    char* logits_smem_ = smem_;
-#ifndef MMHA_USE_FP32_ACUM_FOR_LOGITS
-    if (sizeof(Tk) != 4) {
-        // TODO - change to tlength
-        const int max_timesteps = min(params.timestep, params.memory_max_len);
-        logits_smem_ += div_up(max_timesteps + 1, 4) * 16;
-    }
-    Tk* logits_smem = reinterpret_cast<Tk*>(logits_smem_);
-#else
-    float*         logits_smem    = reinterpret_cast<float*>(logits_smem_);
-#endif
-
-    // The shared memory to do the final reduction for the output values. Reuse qk_smem.
-    Tk* out_smem = reinterpret_cast<Tk*>(smem_);
-
-    // The shared memory buffers for the block-wide reductions. One for max, one for sum.
-    __shared__ float red_smem[WARPS_PER_BLOCK * 2];
-
-    // A vector of Q or K elements for the current timestep.
-    using Qk_vec_k = typename Qk_vec_k_<T, Dh_MAX>::Type;  // with kernel-used precision
-    using Qk_vec_m = typename Qk_vec_m_<T, Dh_MAX>::Type;  // with memory-used precision
-
-    // Use alignment for safely casting the shared buffers as Qk_vec_k.
-    // Shared memory to store Q inputs.
-    __shared__ __align__(sizeof(Qk_vec_k)) Tk q_smem[Dh_MAX];
-
-    // The number of elements per vector.
-    constexpr int QK_VEC_SIZE = sizeof(Qk_vec_m) / sizeof(T);
-    // Make sure the hidden size per head is a multiple of the vector size.
-    static_assert(Dh_MAX % QK_VEC_SIZE == 0, "");
-    // We will use block wide reduction if needed
-    // static_assert(Dh_MAX / QK_VEC_SIZE <= WARP_SIZE, "");
-    // The number of vectors per warp.
-    constexpr int QK_VECS_PER_WARP = Dh_MAX / QK_VEC_SIZE;
-
-    // The layout of the cache is [B, H, Dh/x, L, x] with x == 4/8/16 for FP32/FP16/FP8. Since each thread
-    // owns x elements, we have to decompose the linear index into chunks of x values and the posi-
-    // tion of the thread in that chunk.
-
-    // The number of elements in a chunk of 16B (that's the x in the above formula).
-    constexpr int QK_ELTS_IN_16B = 16 / sizeof(T);
-    // The number of K vectors in 16B.
-    constexpr int QK_VECS_IN_16B = 16 / sizeof(Qk_vec_m);
-
-    // The batch/beam idx
-    const int bi = blockIdx.y;
-    if (params.finished != nullptr && params.finished[bi] == true) {
-        return;
-    }
-
-    // The head.
-    const int hi = blockIdx.x;
-    // Combine the batch and the head indices.
-    const int bhi = bi * params.num_heads + hi;
-
-    const int head_n_rep = params.num_heads / params.num_kv_heads;
-
-    const int kvhi = hi / head_n_rep;  // heads in the same group collapse to the same kv head
-
-    const bool group_leader = hi % head_n_rep == 0;  // only group leader writes to kv cache
-
-    // The thread in the block.
-    const int tidx = threadIdx.x;
-
-    // While doing the product Q*K^T for the different keys we track the max.
-    float qk_max = -FLT_MAX;
-
-    float qk = 0.0F;
-
-    const size_t bi_seq_len_offset = bi * params.memory_max_len;
-
-    const int tlength      = params.length_per_sample[bi] + params.max_prefix_prompt_length;
-    const int first_step   = max(0, tlength + 1 - params.memory_max_len);
-    const int tlength_circ = tlength % params.memory_max_len;
-
-    // First QK_VECS_PER_WARP load Q and K + the bias values for the current timestep.
-    const bool is_masked = tidx >= QK_VECS_PER_WARP;
-
-    const int q_base_offset = bi * params.stride + hi * Dh;
-    const int k_base_offset = bi * params.stride + kvhi * Dh;
-
-    // The offset in the Q and K buffer also accounts for the batch.
-    const int q_offset = q_base_offset + tidx * QK_VEC_SIZE;
-    const int k_offset = k_base_offset + tidx * QK_VEC_SIZE;
-
-    // The offset in the bias buffer.
-    const int q_bias_offset = hi * Dh + tidx * QK_VEC_SIZE;
-    const int k_bias_offset = kvhi * Dh + tidx * QK_VEC_SIZE;
-
-    // past kv quant param
-    const float k_scale = params.attention_k_scale;
-    const float k_zp    = params.attention_k_zp;
-    const float v_scale = params.attention_v_scale;
-    const float v_zp    = params.attention_v_zp;
-
-    // Trigger the loads from the Q and K buffers.
-    Qk_vec_k q;
-    zero(q);
-    if (!is_masked && (Dh == Dh_MAX || tidx * QK_VEC_SIZE < Dh)) {
-        q = vec_conversion<Qk_vec_k, Qk_vec_m>(*reinterpret_cast<const Qk_vec_m*>(&params.q[q_offset]));
-    }
-
-    Qk_vec_k k;
-    zero(k);
-    {
-        k = !is_masked && (Dh == Dh_MAX || tidx * QK_VEC_SIZE < Dh) ?
-                vec_conversion<Qk_vec_k, Qk_vec_m>(*reinterpret_cast<const Qk_vec_m*>(&params.k[k_offset])) :
-                k;
-    }
-
-    // Trigger the loads from the Q and K bias buffers.
-    Qk_vec_k q_bias;
-    zero(q_bias);
-    q_bias = (!is_masked && Dh == Dh_MAX || tidx * QK_VEC_SIZE < Dh) && params.q_bias != nullptr ?
-                 vec_conversion<Qk_vec_k, Qk_vec_m>(*reinterpret_cast<const Qk_vec_m*>(&params.q_bias[q_bias_offset])) :
-                 q_bias;
-
-    Qk_vec_k k_bias;
-    zero(k_bias);
-
-    k_bias = !is_masked && (Dh == Dh_MAX || tidx * QK_VEC_SIZE < Dh) && params.k_bias != nullptr ?
-                 vec_conversion<Qk_vec_k, Qk_vec_m>(*reinterpret_cast<const Qk_vec_m*>(&params.k_bias[k_bias_offset])) :
-                 k_bias;
-
-    // Computes the Q/K values with bias.
-    q = add(q, q_bias);
-    k = add(k, k_bias);
-
-    float rotary_embedding_base = params.rotary_embedding_base;
-    if (params.use_dynamic_ntk) {
-        // +1 because of `length_per_sample == context_length - 1`
-        rotary_embedding_base = rotary_embedding_get_base(params.length_per_sample[bi] + 1,
-                                                          params.max_position_embeddings,
-                                                          params.rotary_embedding_dim,
-                                                          rotary_embedding_base);
-    }
-
-    // Padded len
-    const int padd_len = (params.total_padding_tokens == nullptr) ? 0 : params.total_padding_tokens[bi];
-    if (params.rotary_embedding_dim > 0) {
-        apply_rotary_embedding(
-            q, k, tidx, params.rotary_embedding_dim, rotary_embedding_base, params.timestep - padd_len);
-    }
-
-    if (params.use_logn_attn) {
-        T log_n_scaling;
-        // +1 because of `length_per_sample == context_length - 1`
-        convert_from_float(log_n_scaling,
-                           logn_attn_get_scaling(params.length_per_sample[bi] + 1, params.max_position_embeddings));
-        q = mul<Qk_vec_k, T, Qk_vec_k>(log_n_scaling, q);
-    }
-
-    if (!is_masked) {
-        // Store the Q values to shared memory.
-        *reinterpret_cast<Qk_vec_k*>(&q_smem[tidx * QK_VEC_SIZE]) = q;
-
-        // Write the K values to the global memory cache.
-        //
-        // NOTE: The stores are uncoalesced as we have multiple chunks of 16B spread across the memory
-        // system. We designed it this way as it allows much better memory loads (and there are many
-        // more loads) + the stores are really "write and forget" since we won't need the ack before
-        // the end of the kernel. There's plenty of time for the transactions to complete.
-
-        // The 16B chunk written by the thread.
-        int co = tidx / QK_VECS_IN_16B;
-        // The position of the thread in that 16B chunk.
-        int ci = tidx % QK_VECS_IN_16B * QK_VEC_SIZE;
-
-        if (group_leader) {
-            // Trigger the stores to global memory.
-            if (Dh == Dh_MAX || co < Dh / QK_ELTS_IN_16B) {
-
-                size_t offset = params.kv_cache_per_sample_offset + kvhi * params.memory_max_len * Dh
-                                + tlength_circ * Dh + co * QK_ELTS_IN_16B + ci;
-
-                if (!QUANT_POLICY) {
-                    *reinterpret_cast<Qk_vec_m*>(&params.k_cache_per_sample[bi][offset]) =
-                        vec_conversion<Qk_vec_m, Qk_vec_k>(k);
-                }
-                else if (QUANT_POLICY == 4) {
-                    using Packed_Int8_t  = typename packed_type<int8_t, num_elems<Qk_vec_k>::value>::type;
-                    Packed_Int8_t k_int8 = quant(k, k_scale, k_zp);
-
-                    int8_t* dst_ptr = reinterpret_cast<int8_t*>(params.k_cache_per_sample[bi]);
-                    *reinterpret_cast<Packed_Int8_t*>(&dst_ptr[offset]) = k_int8;
-                }
-            }
-        }
-
-        // Compute \sum_i Q[i] * K^T[i] for the current timestep.
-#ifdef MMHA_USE_FP32_ACUM_FOR_FMA
-        using Qk_vec_acum = typename Qk_vec_acum_fp32_<Qk_vec_k>::Type;
-#else
-        using Qk_vec_acum = Qk_vec_k;
-#endif
-        qk = dot<Qk_vec_acum, Qk_vec_k>(q, k);
-        if (QK_VECS_PER_WARP <= WARP_SIZE) {
-#pragma unroll
-            for (int mask = QK_VECS_PER_WARP / 2; mask >= 1; mask /= 2) {
-                qk += __shfl_xor_sync(shfl_mask(QK_VECS_PER_WARP), qk, mask);
-            }
-        }
-    }
-
-    if (QK_VECS_PER_WARP > WARP_SIZE) {
-        constexpr int WARPS_PER_RED = (QK_VECS_PER_WARP + WARP_SIZE - 1) / WARP_SIZE;
-        qk                          = block_sum<WARPS_PER_RED>(&red_smem[WARPS_PER_RED], qk);
-    }
-
-    // Store that value in shared memory. Keep the Q*K^T value in register for softmax.
-    if (tidx == 0) {
-        // Normalize qk.
-        qk *= params.inv_sqrt_dh;
-        if (params.relative_attention_bias != nullptr) {
-            qk = add(qk,
-                     params.relative_attention_bias[hi * params.relative_attention_bias_stride
-                                                        * params.relative_attention_bias_stride
-                                                    + (tlength - padd_len) * params.relative_attention_bias_stride
-                                                    + (tlength - padd_len)]);
-        }
-        // We don't need to apply the linear position bias here since qi - ki = 0 yields the position bias 0.
-
-        qk_max                        = qk;
-        qk_smem[tlength - first_step] = qk;
-        // qk_smem[params.timestep] = qk;
-    }
-
-    // Make sure the data is in shared memory.
-    __syncthreads();
-
-    // The type of queries and keys for the math in the Q*K^T product.
-    using K_vec_k = typename K_vec_k_<T, THREADS_PER_KEY>::Type;
-    using K_vec_m = typename K_vec_m_<T, THREADS_PER_KEY>::Type;
-    // The number of elements per vector.
-    constexpr int K_VEC_SIZE = sizeof(K_vec_m) / sizeof(T);
-    // Make sure the hidden size per head is a multiple of the vector size.
-    static_assert(Dh_MAX % K_VEC_SIZE == 0, "");
-    // The number of elements per thread.
-    constexpr int K_ELTS_PER_THREAD = Dh_MAX / THREADS_PER_KEY;
-    // The number of vectors per thread.
-    constexpr int K_VECS_PER_THREAD = K_ELTS_PER_THREAD / K_VEC_SIZE;
-
-    // The position the first key loaded by each thread from the cache buffer (for this B * H).
-    int ko = tidx / THREADS_PER_KEY;
-    // The position of the thread in the chunk of keys.
-    int ki = tidx % THREADS_PER_KEY * K_VEC_SIZE;
-
-    static_assert(Dh_MAX == THREADS_PER_KEY * K_VEC_SIZE * K_VECS_PER_THREAD);
-
-    // Load the Q values from shared memory. The values are reused during the loop on K.
-    K_vec_k q_vec[K_VECS_PER_THREAD];
-#pragma unroll
-    for (int ii = 0; ii < K_VECS_PER_THREAD; ++ii) {
-        q_vec[ii] = *reinterpret_cast<const K_vec_k*>(&q_smem[ki + ii * THREADS_PER_KEY * K_VEC_SIZE]);
-    }
-
-    // The number of timesteps loaded per iteration.
-    constexpr int K_PER_ITER = THREADS_PER_BLOCK / THREADS_PER_KEY;
-    // The number of keys per warp.
-    constexpr int K_PER_WARP = WARP_SIZE / THREADS_PER_KEY;
-
-    // The base pointer for the key in the cache buffer.
-    T*      k_cache_batch      = nullptr;
-    int8_t* k_cache_batch_int8 = nullptr;
-
-    if (!QUANT_POLICY) {
-        k_cache_batch =
-            params.k_cache_per_sample[bi] + params.kv_cache_per_sample_offset + kvhi * params.memory_max_len * Dh + ki;
-    }
-    else if (QUANT_POLICY == 4) {
-        // convert k_cache_per_sample to int8
-        int8_t* ptr        = reinterpret_cast<int8_t*>(params.k_cache_per_sample[bi]);
-        k_cache_batch_int8 = ptr + params.kv_cache_per_sample_offset + kvhi * params.memory_max_len * Dh + ki;
-    }
-
-    // Pick a number of keys to make sure all the threads of a warp enter (due to shfl_sync).
-    // int ti_end = div_up(params.timestep, K_PER_WARP) * K_PER_WARP;
-    int ti_end = div_up(tlength - first_step, K_PER_WARP) * K_PER_WARP + first_step;
-
-    // Iterate over the keys/timesteps to compute the various (Q*K^T)_{ti} values.
-    // const int* beam_indices = HAS_BEAMS ? &params.cache_indir[bi_seq_len_offset] : nullptr;
-
-    for (int ti = first_step + ko; ti < ti_end; ti += K_PER_ITER) {
-        const int ti_circ = ti % params.memory_max_len;
-        bool      is_mask = (params.masked_tokens != nullptr) && params.masked_tokens[bi_seq_len_offset + ti];
-
-        // The keys loaded from the key cache.
-        K_vec_k k[K_VECS_PER_THREAD];
-        K_vec_k k_vec_zero;
-        zero(k_vec_zero);
-#pragma unroll
-        for (int ii = 0; ii < K_VECS_PER_THREAD; ++ii) {
-            int jj = ti_circ * Dh / QK_ELTS_IN_16B + ii;
-            // if( ti < params.timestep ) {
-            const bool within_bounds = (Dh == Dh_MAX || jj * QK_ELTS_IN_16B < Dh * params.memory_max_len);
-            if (ti < tlength) {
-                if (!within_bounds) {
-                    k[ii] = k_vec_zero;
-                }
-                else {
-                    int beam_offset = 0;
-
-                    if (!QUANT_POLICY) {
-                        k[ii] = vec_conversion<K_vec_k, K_vec_m>(
-                            (*reinterpret_cast<const K_vec_m*>(&k_cache_batch[beam_offset + jj * QK_ELTS_IN_16B])));
-                    }
-                    else if (QUANT_POLICY == 4) {
-                        using Packed_Int8_t  = typename packed_type<int8_t, num_elems<K_vec_m>::value>::type;
-                        using Packed_Float_t = typename packed_type<float, num_elems<K_vec_m>::value>::type;
-
-                        Packed_Int8_t k_vec_m_int8 = *reinterpret_cast<const Packed_Int8_t*>(
-                            &k_cache_batch_int8[beam_offset + jj * QK_ELTS_IN_16B]);
-                        Packed_Float_t k_vec_m_float = dequant(k_vec_m_int8, k_scale, k_zp);
-
-                        k[ii] = vec_conversion<K_vec_k, Packed_Float_t>(k_vec_m_float);
-                    }
-                }
-            }
-        }
-
-        // Perform the dot product and normalize qk.
-        //
-        // WARNING: ALL THE THREADS OF A WARP MUST ENTER!!!
-        float qk = Qk_dot<T, THREADS_PER_KEY>::dot(q_vec, k) * params.inv_sqrt_dh;
-
-        // Store the product to shared memory. There's one qk value per timestep. Update the max.
-        // if( ti < params.timestep && tidx % THREADS_PER_KEY == 0 ) {
-        if (ti < tlength && tidx % THREADS_PER_KEY == 0) {
-            if (params.relative_attention_bias != nullptr) {
-                qk = add(qk,
-                         params.relative_attention_bias[hi * params.relative_attention_bias_stride
-                                                            * params.relative_attention_bias_stride
-                                                        + tlength * params.relative_attention_bias_stride + ti]);
-            }
-            if (params.linear_bias_slopes != nullptr) {
-                // Apply the linear position bias: (ki - qi) * slope[hi].
-                // The padding token locates between the input context and the generated tokens.
-                // We need to remove the number of padding tokens in the distance computation.
-                //   ti   : 0 1 2 3 4 5 6 7 8 9(tlength)
-                //   token: i i i i p p p o o o where i=input, p=pad, o=output.
-                // e.g. ti = 2, dist = (9 - 3) - 2 = 4.
-                int   max_context_length = params.max_prefix_prompt_length + params.max_input_length;
-                float dist               = (ti < max_context_length ? ti + padd_len : ti) - tlength;
-
-                qk += mul<float, T, float>(params.linear_bias_slopes[hi], dist);
-            }
-            qk_max                   = is_mask ? qk_max : fmaxf(qk_max, qk);
-            qk_smem[ti - first_step] = qk;
-        }
-    }
-
-// Perform the final reduction to compute the max inside each warp.
-//
-// NOTE: In a group of THREADS_PER_KEY threads, the leader already has the max value for the
-// group so it's not needed to run the reduction inside the group (again).
-#pragma unroll
-    for (int mask = WARP_SIZE / 2; mask >= THREADS_PER_KEY; mask /= 2) {
-        qk_max = fmaxf(qk_max, __shfl_xor_sync(uint32_t(-1), qk_max, mask));
-    }
-
-    // Decompose the thread index into warp and lane.
-    const int warp = tidx / WARP_SIZE;
-    const int lane = tidx % WARP_SIZE;
-
-    // The warp leader writes the max to shared memory.
-    if (lane == 0) {
-        red_smem[warp] = qk_max;
-    }
-
-    // Make sure the products are in shared memory.
-    __syncthreads();
-
-    // The warps finalize the reduction.
-    qk_max = lane < WARPS_PER_BLOCK ? red_smem[lane] : -FLT_MAX;
-#pragma unroll
-    for (int mask = WARPS_PER_BLOCK / 2; mask >= 1; mask /= 2) {
-        qk_max = fmaxf(qk_max, __shfl_xor_sync(uint32_t(-1), qk_max, mask));
-    }
-
-    // Broadcast to all the threads in the warp.
-    qk_max = __shfl_sync(uint32_t(-1), qk_max, 0);
-
-    // Compute the logits and start the sum.
-    float sum = 0.f;
-    // for( int ti = tidx; ti <= params.timestep; ti += THREADS_PER_BLOCK ) {
-    for (int ti = first_step + tidx; ti <= tlength; ti += THREADS_PER_BLOCK) {
-        bool is_mask = (params.masked_tokens != nullptr) && params.masked_tokens[bi_seq_len_offset + ti];
-#ifdef FP8_MHA
-        float logit = 0.f;
-        if (FP8_MHA_KERNEL) {
-            logit = is_mask ? 0.f :
-                              __expf((qk_smem[ti - first_step] - qk_max) * params.query_weight_output_scale[0]
-                                     * params.query_weight_output_scale[0]);
-        }
-        else {
-            logit = is_mask ? 0.f : __expf(qk_smem[ti - first_step] - qk_max);
-        }
-#else
-        float logit       = is_mask ? 0.f : __expf(qk_smem[ti - first_step] - qk_max);
-#endif
-        sum += logit;
-        qk_smem[ti - first_step] = logit;
-    }
-
-    // Compute the sum.
-    sum = block_sum<WARPS_PER_BLOCK>(&red_smem[WARPS_PER_BLOCK], sum);
-
-    // Normalize the logits.
-    float inv_sum = __fdividef(1.f, sum + 1.e-6f);
-
-    for (int ti = first_step + tidx; ti <= tlength; ti += THREADS_PER_BLOCK) {
-        float logit = qk_smem[ti - first_step] * inv_sum;
-        convert_from_float(logits_smem[ti - first_step], logit);
-    }
-
-    // Put Values part below so we leverage __syncthreads
-    // from the previous step
-
-    // The number of elements per vector.
-    constexpr int V_VEC_SIZE = Dh_MAX / THREADS_PER_VALUE;
-    // A vector of V elements for the current timestep.
-    using V_vec_k = typename V_vec_k_<T, V_VEC_SIZE>::Type;
-    using V_vec_m = typename V_vec_m_<T, V_VEC_SIZE>::Type;
-
-    // The value computed by this thread.
-    int vo = tidx / THREADS_PER_VALUE;
-    // The hidden dimensions computed by this particular thread.
-    int vi = tidx % THREADS_PER_VALUE * V_VEC_SIZE;
-
-    // The base pointer for the value in the cache buffer.
-    T* v_cache       = nullptr;
-    T* v_cache_batch = nullptr;
-
-    int8_t* v_cache_int8       = nullptr;
-    int8_t* v_cache_batch_int8 = nullptr;
-
-    if (!QUANT_POLICY) {
-        v_cache =
-            params.v_cache_per_sample[bi] + params.kv_cache_per_sample_offset + kvhi * params.memory_max_len * Dh + vi;
-        // Base pointer for the beam's batch, before offsetting with indirection buffer
-        // T* v_cache_batch = &params.v_cache[bbhi * params.memory_max_len * Dh + vi];
-        v_cache_batch = v_cache;
-    }
-    else if (QUANT_POLICY == 4) {
-        int8_t* ptr        = reinterpret_cast<int8_t*>(params.v_cache_per_sample[bi]);
-        v_cache_int8       = ptr + params.kv_cache_per_sample_offset + kvhi * params.memory_max_len * Dh + vi;
-        v_cache_batch_int8 = v_cache_int8;
-    }
-
-    // The number of values processed per iteration of the loop.
-    constexpr int V_PER_ITER = THREADS_PER_BLOCK / THREADS_PER_VALUE;
-
-    // From previous, before values, step
-    // Also make sure the logits are in shared memory.
-    __syncthreads();
-
-    // Values continued
-#ifdef MMHA_USE_FP32_ACUM_FOR_OUT
-    using V_vec_acum = typename V_vec_acum_fp32_<V_vec_k>::Type;
-#else
-    using V_vec_acum = V_vec_k;
-#endif
-    // The partial outputs computed by each thread.
-    V_vec_acum out;
-    zero(out);
-
-    // Loop over the timesteps to compute the partial outputs.
-    // for( int ti = vo; ti < params.timestep; ti += V_PER_ITER ) {
-    if (Dh == Dh_MAX || vi < Dh) {
-        using Packed_Int8_t  = typename packed_type<int8_t, num_elems<V_vec_m>::value>::type;
-        using Packed_Float_t = typename packed_type<float, num_elems<V_vec_m>::value>::type;
-        // Separate the ti < memory_max_len and ti > memory_max_len
-        // to prevent ti % memory_len when ti < memory_len, and
-        // the compiler cannot optimize the codes automatically.
-        const int min_length = min(tlength, params.memory_max_len);
-        for (int ti = first_step + vo; ti < min_length; ti += V_PER_ITER) {
-            // Load the values from the cache.
-            V_vec_k v;
-
-            if (!QUANT_POLICY) {
-                v = vec_conversion<V_vec_k, V_vec_m>(*reinterpret_cast<const V_vec_m*>(&v_cache_batch[ti * Dh]));
-            }
-            else if (QUANT_POLICY == 4) {
-                Packed_Int8_t  v_vec_m_int8  = *reinterpret_cast<const Packed_Int8_t*>(&v_cache_batch_int8[ti * Dh]);
-                Packed_Float_t v_vec_m_float = dequant(v_vec_m_int8, v_scale, v_zp);
-
-                v = vec_conversion<V_vec_k, Packed_Float_t>(v_vec_m_float);
-            }
-
-            // Load the logits from shared memory.
-#if defined(MMHA_USE_FP32_ACUM_FOR_LOGITS)
-            float logit = logits_smem[ti - first_step];
-            out         = fma(logit, cast_to_float(v), out);
-#else  // MMHA_USE_FP32_ACUM_FOR_LOGITS
-#ifdef FP8_MHA
-            Tk logit;
-            if (FP8_MHA_KERNEL) {
-                // NOTE: fake quantization
-                // logit = vec_conversion<Tk, Tquant>(vec_conversion<Tquant, Tk>(mul<Tk, float, Tk>(1.0f /
-                // params.attention_qk_scale[0], logits_smem[ti])));
-                logit = logits_smem[ti - first_step];
-            }
-            else {
-                logit = logits_smem[ti - first_step];
-            }
-            out = fma(logit, v, out);
-#else   // FP8_MHA
-            Tk logit = logits_smem[ti - first_step];
-            out      = fma(logit, v, out);
-#endif  // FP8_MHA
-#endif  // MMHA_USE_FP32_ACUM_FOR_LOGITS
-        }
-        for (int ti = first_step + vo; ti < tlength; ti += V_PER_ITER) {
-            if (ti < params.memory_max_len) {
-                // handled by previous loop
-                continue;
-            }
-            const int ti_circ = ti % params.memory_max_len;
-
-            // Load the values from the cache.
-            V_vec_k v;
-            if (!QUANT_POLICY) {
-                v = vec_conversion<V_vec_k, V_vec_m>(*reinterpret_cast<const V_vec_m*>(&v_cache_batch[ti_circ * Dh]));
-            }
-            else if (QUANT_POLICY == 4) {
-                Packed_Int8_t v_vec_m_int8 = *reinterpret_cast<const Packed_Int8_t*>(&v_cache_batch_int8[ti_circ * Dh]);
-                Packed_Float_t v_vec_m_float = dequant(v_vec_m_int8, v_scale, v_zp);
-
-                v = vec_conversion<V_vec_k, Packed_Float_t>(v_vec_m_float);
-            }
-
-            // Load the logits from shared memory.
-#if defined(MMHA_USE_FP32_ACUM_FOR_LOGITS)
-            float logit = logits_smem[ti - first_step];
-            out         = fma(logit, cast_to_float(v), out);
-#else  // MMHA_USE_FP32_ACUM_FOR_LOGITS
-#ifdef FP8_MHA
-            Tk logit;
-            if (FP8_MHA_KERNEL) {
-                // NOTE: fake quantization
-                // logit = vec_conversion<Tk, Tquant>(vec_conversion<Tquant, Tk>(mul<Tk, float, Tk>(1.0f /
-                // params.attention_qk_scale[0], logits_smem[ti])));
-                logit = logits_smem[ti - first_step];
-            }
-            else {
-                logit = logits_smem[ti - first_step];
-            }
-            out = fma(logit, v, out);
-#else   // FP8_MHA
-            Tk logit = logits_smem[ti - first_step];
-            out      = fma(logit, v, out);
-#endif  // FP8_MHA
-#endif  // MMHA_USE_FP32_ACUM_FOR_LOGITS
-        }
-    }
-
-    // One group of threads computes the product(s) for the current timestep.
-    // if( vo == params.timestep % V_PER_ITER ) {
-    if (vo == tlength % V_PER_ITER && (Dh == Dh_MAX || vi < Dh)) {
-
-        V_vec_k v;
-
-        // Trigger the loads from the V buffer.
-        const auto v_offset = k_base_offset + vi;
-
-        v = vec_conversion<V_vec_k, V_vec_m>(*reinterpret_cast<const V_vec_m*>(&params.v[v_offset]));
-
-        // Trigger the loads from the V bias buffer.
-        if (params.v_bias != nullptr) {
-            V_vec_k v_bias = *reinterpret_cast<const V_vec_k*>(&params.v_bias[kvhi * Dh + vi]);
-            v              = add(v, v_bias);
-        }
-
-        // Store the V values to cache
-        if (group_leader) {
-
-            // Store the values with bias back to global memory in the cache for V.
-            if (!QUANT_POLICY) {
-                *reinterpret_cast<V_vec_m*>(&v_cache[tlength_circ * Dh]) = vec_conversion<V_vec_m, V_vec_k>(v);
-            }
-            else if (QUANT_POLICY == 4) {
-                using Packed_Int8_t  = typename packed_type<int8_t, num_elems<V_vec_k>::value>::type;
-                Packed_Int8_t v_int8 = quant(v, v_scale, v_zp);
-                *reinterpret_cast<Packed_Int8_t*>(&v_cache_int8[tlength_circ * Dh]) = v_int8;
-            }
-        }
-
-        // Initialize the output value with the current timestep.
-#if defined(MMHA_USE_FP32_ACUM_FOR_LOGITS)
-        // out = fma(logits_smem[params.timestep], cast_to_float(v), out);
-        out = fma(logits_smem[tlength - first_step], cast_to_float(v), out);
-#else  // MMHA_USE_FP32_ACUM_FOR_LOGITS
-       // out = fma(logits_smem[params.timestep], v, out);
-#ifdef FP8_MHA
-        Tk logit;
-        if (FP8_MHA_KERNEL) {
-            // NOTE: fake quantization
-            // logit = mul<Tk, float, Tk>(1.0f / params.attention_qk_scale[0], logits_smem[tlength]);
-            logit = logits_smem[tlength - first_step];
-        }
-        else {
-            logit = logits_smem[tlength - first_step];
-        }
-        out = fma(logit, v, out);
-#else   // FP8_MHA
-        out = fma(logits_smem[tlength - first_step], v, out);
-#endif  // FP8_MHA
-#endif  // MMHA_USE_FP32_ACUM_FOR_LOGITS
-    }
-
-    // Make sure we can start writing to shared memory.
-    __syncthreads();
-
-    // Run the final reduction amongst the different groups computing different partial outputs.
-    if (Dh == Dh_MAX || vi < Dh) {
-#pragma unroll
-        for (int active_groups = V_PER_ITER; active_groups >= 2; active_groups /= 2) {
-
-            // The midpoint in the number of active groups.
-            int midpoint = active_groups / 2;
-
-            // The upper part of active threads store to shared memory.
-            if (vo >= midpoint && vo < active_groups && (Dh == Dh_MAX || vi < Dh)) {
-#ifdef MMHA_USE_FP32_ACUM_FOR_OUT
-                convert_from_float(*reinterpret_cast<V_vec_k*>(&out_smem[(vo - midpoint) * Dh + vi]), out);
-#else
-                *reinterpret_cast<V_vec_k*>(&out_smem[(vo - midpoint) * Dh + vi]) = out;
-#endif
-            }
-            __syncthreads();
-
-            // The bottom warps update their values.
-            if (vo < midpoint && (Dh == Dh_MAX || vi < Dh)) {
-                out = add(*reinterpret_cast<const V_vec_k*>(&out_smem[vo * Dh + vi]), out);
-            }
-            __syncthreads();
-        }
-    }
-
-    // Output the final values.
-    if (vo == 0 && (Dh == Dh_MAX || vi < Dh)) {
-#ifdef MMHA_USE_FP32_ACUM_FOR_OUT
-        if (FP8_MHA_KERNEL) {
-#ifdef FP8_MHA
-            // float result_scale = params.attention_qk_scale[0] * params.query_weight_output_scale[0] *
-            // params.attention_output_weight_input_scale_inv[0];
-            float result_scale =
-                params.query_weight_output_scale[0] * params.attention_output_weight_input_scale_inv[0];
-            convert_from_float(*reinterpret_cast<V_vec_m*>(&params.out[bhi * Dh + vi]),
-                               mul<V_vec_acum, float, V_vec_acum>(result_scale, out));
-#endif  // FP8_MHA
-        }
-        else {
-            convert_from_float(*reinterpret_cast<V_vec_m*>(&params.out[bhi * Dh + vi]), out);
-        }
-#else   // MMHA_USE_FP32_ACUM_FOR_OUT
-        // TODO: support int8_mode?
-        *reinterpret_cast<V_vec_m*>(&params.out[bhi * Dh + vi]) = vec_conversion<V_vec_m, V_vec_acum>(out);
-#endif  // MMHA_USE_FP32_ACUM_FOR_OUT
-    }
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-}  // namespace mmha
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-template<typename T, int Dh_MAX>
-struct threads_per_value_t {
-    static const int value = Dh_MAX * sizeof(T) / 16;
-};
-#ifdef ENABLE_FP8
-template<int Dh_MAX>
-struct threads_per_value_t<__nv_fp8_e4m3, Dh_MAX> {
-    static const int value = Dh_MAX * 4 / 16;  // DEBUG: float v
-};
-#endif
-
-template<typename T, int Dh, int Dh_MAX, typename KERNEL_PARAMS_TYPE>
-void mmha_launch_kernel(const KERNEL_PARAMS_TYPE& params, const cudaStream_t& stream);
diff --git a/src/turbomind/kernels/decoder_masked_multihead_attention_utils.h b/src/turbomind/kernels/decoder_masked_multihead_attention_utils.h
deleted file mode 100644
index e858d36e1e98db9de03ed738a81e05bad6c667fd..0000000000000000000000000000000000000000
--- a/src/turbomind/kernels/decoder_masked_multihead_attention_utils.h
+++ /dev/null
@@ -1,1078 +0,0 @@
-/*
- * Copyright (c) 2020-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "src/turbomind/utils/cuda_bf16_wrapper.h"
-#include "src/turbomind/utils/cuda_fp8_utils.h"
-#include "src/turbomind/utils/cuda_type_utils.cuh"
-#include <stdint.h>
-
-using namespace turbomind;
-
-namespace mmha {
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-struct Float8_ {
-    float2 x;
-    float2 y;
-    float2 z;
-    float2 w;
-};
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-struct Float4_ {
-    float2 x;
-    float2 y;
-};
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-template<typename T>
-struct num_elems;
-template<>
-struct num_elems<float> {
-    static constexpr int value = 1;
-};
-template<>
-struct num_elems<float2> {
-    static constexpr int value = 2;
-};
-template<>
-struct num_elems<float4> {
-    static constexpr int value = 4;
-};
-template<>
-struct num_elems<Float4_> {
-    static constexpr int value = 4;
-};
-template<>
-struct num_elems<Float8_> {
-    static constexpr int value = 8;
-};
-
-template<>
-struct num_elems<uint32_t> {
-    static constexpr int value = 2;
-};
-template<>
-struct num_elems<uint2> {
-    static constexpr int value = 4;
-};
-template<>
-struct num_elems<uint4> {
-    static constexpr int value = 8;
-};
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-template<typename T, int N>
-struct packed_type;
-template<typename T>
-struct packed_type<T, 1> {
-    using type = T;
-};
-template<>
-struct packed_type<int8_t, 2> {
-    using type = int16_t;
-};
-template<>
-struct packed_type<int8_t, 4> {
-    using type = int32_t;
-};
-template<>
-struct packed_type<int8_t, 8> {
-    using type = int64_t;
-};
-
-template<>
-struct packed_type<float, 2> {
-    using type = float2;
-};
-template<>
-struct packed_type<float, 4> {
-    using type = float4;
-};
-template<>
-struct packed_type<float, 8> {
-    using type = Float8_;
-};
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-inline __device__ float add(float a, float b)
-{
-    return a + b;
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-inline __device__ float2 add(float2 a, float2 b)
-{
-    float2 c;
-    c.x = add(a.x, b.x);
-    c.y = add(a.y, b.y);
-    return c;
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-inline __device__ float4 add(float4 a, float4 b)
-{
-    float4 c;
-    c.x = add(a.x, b.x);
-    c.y = add(a.y, b.y);
-    c.z = add(a.z, b.z);
-    c.w = add(a.w, b.w);
-    return c;
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-inline __device__ uint16_t add(uint16_t a, uint16_t b)
-{
-    uint16_t c;
-    // asm volatile("add.f16 %0, %1, %2;\n" : "=h"(c) : "h"(a), "h"(b));
-    asm volatile("v_add_f16 %0, %1, %2;" : "=v"(c) : "v"(a), "v"(b));
-    return c;
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-inline __device__ uint32_t add(uint32_t a, uint32_t b)
-{
-    uint32_t c;
-    // asm volatile("add.f16x2 %0, %1, %2;\n" : "=r"(c) : "r"(a), "r"(b));
-    const __half  *ha = reinterpret_cast<const __half*>(&a);
-    const __half  *hb = reinterpret_cast<const __half*>(&b);
-    __half2 h2c = make_half2(ha[0]+hb[0], ha[1]+hb[1]);
-    __builtin_memcpy(&c, &h2c, sizeof(h2c));
-    return c;
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-inline __device__ uint2 add(uint2 a, uint2 b)
-{
-    uint2 c;
-    c.x = add(a.x, b.x);
-    c.y = add(a.y, b.y);
-    return c;
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-inline __device__ uint4 add(uint4 a, uint4 b)
-{
-    uint4 c;
-    c.x = add(a.x, b.x);
-    c.y = add(a.y, b.y);
-    c.z = add(a.z, b.z);
-    c.w = add(a.w, b.w);
-    return c;
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-inline __device__ uint16_t float_to_half(float f)
-{
-    union {
-        uint32_t u32;
-        uint16_t u16[2];
-    } tmp;
-#if 0 && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 800  // Is it better?
-    float zero = 0.f;
-    // asm volatile("cvt.rn.f16x2.f32 %0, %1, %2;\n" : "=r"(tmp.u32) : "f"(zero), "f"(f));
-    __half h=__float2half(f);
-    tmp.u16[0] = reinterpret_cast<const uint16_t&>(h);
-#else
-    // asm volatile("cvt.rn.f16.f32 %0, %1;\n" : "=h"(tmp.u16[0]) : "f"(f));
-    __half h=__float2half(f);
-    tmp.u16[0] = reinterpret_cast<const uint16_t&>(h);
-#endif
-    return tmp.u16[0];
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-inline __device__ uint32_t float2_to_half2(float2 f)
-{
-    union {
-        uint32_t u32;
-        uint16_t u16[2];
-    } tmp;
-#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 800
-    // asm volatile("cvt.rn.f16x2.f32 %0, %1, %2;\n" : "=r"(tmp.u32) : "f"(f.y), "f"(f.x));
-    __half h1 = __float2half(f.x);
-    __half h2 = __float2half(f.y);
-    tmp.u16[0] = reinterpret_cast<const uint16_t&>(h1);
-    tmp.u16[1] = reinterpret_cast<const uint16_t&>(h2);
-#else
-    // asm volatile("cvt.rn.f16.f32 %0, %1;\n" : "=h"(tmp.u16[0]) : "f"(f.x));
-    // asm volatile("cvt.rn.f16.f32 %0, %1;\n" : "=h"(tmp.u16[1]) : "f"(f.y));
-    __half h1 = __float2half(f.x);
-    __half h2 = __float2half(f.y);
-    tmp.u16[0] = reinterpret_cast<const uint16_t&>(h1);
-    tmp.u16[1] = reinterpret_cast<const uint16_t&>(h2);
-#endif
-    return tmp.u32;
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-inline __device__ float half_to_float(uint16_t h)
-{
-    float f;
-    // asm volatile("cvt.f32.f16 %0, %1;\n" : "=f"(f) : "h"(h));
-    f = __half2float(reinterpret_cast<const __half&>(h));
-    return f;
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-inline __device__ float2 half2_to_float2(uint32_t v)
-{
-    uint16_t lo, hi;
-    // asm volatile("mov.b32 {%0, %1}, %2;\n" : "=h"(lo), "=h"(hi) : "r"(v));
-    lo = v & 0xffff;
-    hi = (v >> 16) & 0xffff;
-    return make_float2(half_to_float(lo), half_to_float(hi));
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-inline __device__ float add(float a, uint16_t b)
-{
-    return a + half_to_float(b);
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-inline __device__ float2 add(uint32_t a, float2 fb)
-{
-    float2 fa = half2_to_float2(a);
-    return add(fa, fb);
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-inline __device__ Float4_ add(uint2 a, Float4_ fb)
-{
-    Float4_ fc;
-    fc.x = add(a.x, fb.x);
-    fc.y = add(a.y, fb.y);
-    return fc;
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-inline __device__ Float8_ add(uint4 a, Float8_ fb)
-{
-    Float8_ fc;
-    fc.x = add(a.x, fb.x);
-    fc.y = add(a.y, fb.y);
-    fc.z = add(a.z, fb.z);
-    fc.w = add(a.w, fb.w);
-    return fc;
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-inline __device__ uint32_t h0_h0(uint16_t a)
-{
-    uint32_t b;
-    // asm volatile("mov.b32 %0, {%1, %1};" : "=r"(b) : "h"(a));
-    uint16_t tmp[2];
-    tmp[0] = a;
-    tmp[1] = a;
-    __builtin_memcpy(&b, tmp, sizeof(uint16_t) * 2);
-    return b;
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-inline __device__ float fma(float a, float b, float c)
-{
-    return a * b + c;
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-inline __device__ float2 fma(float2 a, float2 b, float2 c)
-{
-    float2 d;
-    d.x = fma(a.x, b.x, c.x);
-    d.y = fma(a.y, b.y, c.y);
-    return d;
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-inline __device__ float2 fma(float a, float2 b, float2 c)
-{
-    float2 d;
-    d.x = fma(a, b.x, c.x);
-    d.y = fma(a, b.y, c.y);
-    return d;
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-inline __device__ float4 fma(float4 a, float4 b, float4 c)
-{
-    float4 d;
-    d.x = fma(a.x, b.x, c.x);
-    d.y = fma(a.y, b.y, c.y);
-    d.z = fma(a.z, b.z, c.z);
-    d.w = fma(a.w, b.w, c.w);
-    return d;
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-inline __device__ float4 fma(float a, float4 b, float4 c)
-{
-    float4 d;
-    d.x = fma(a, b.x, c.x);
-    d.y = fma(a, b.y, c.y);
-    d.z = fma(a, b.z, c.z);
-    d.w = fma(a, b.w, c.w);
-    return d;
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-inline __device__ float4 fma(float a, float4 b, Float4_ c)
-{
-    float4 d;
-    d.x = fma(a, b.x, c.x.x);
-    d.y = fma(a, b.y, c.x.y);
-    d.z = fma(a, b.z, c.y.x);
-    d.w = fma(a, b.w, c.y.y);
-    return d;
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-inline __device__ Float4_ fma(float a, Float4_ b, Float4_ c)
-{
-    Float4_ d;
-    d.x = fma(a, b.x, c.x);
-    d.y = fma(a, b.y, c.y);
-    return d;
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-inline __device__ Float8_ fma(float a, Float8_ b, Float8_ c)
-{
-    Float8_ d;
-    d.x = fma(a, b.x, c.x);
-    d.y = fma(a, b.y, c.y);
-    d.z = fma(a, b.z, c.z);
-    d.w = fma(a, b.w, c.w);
-    return d;
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-inline __device__ uint32_t fma(uint32_t a, uint32_t b, uint32_t c)
-{
-    uint32_t d;
-    // asm volatile("fma.rn.f16x2 %0, %1, %2, %3;\n" : "=r"(d) : "r"(a), "r"(b), "r"(c));
-    asm volatile("v_pk_fma_f16 %0, %1, %2, %3;\n" : "=v"(d) : "v"(a), "v"(b), "v"(c));
-    return d;
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-inline __device__ uint32_t fma(uint16_t a, uint32_t b, uint32_t c)
-{
-    return fma(h0_h0(a), b, c);
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-inline __device__ uint2 fma(uint2 a, uint2 b, uint2 c)
-{
-    uint2 d;
-    d.x = fma(a.x, b.x, c.x);
-    d.y = fma(a.y, b.y, c.y);
-    return d;
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-inline __device__ uint2 fma(uint16_t a, uint2 b, uint2 c)
-{
-    uint32_t s = h0_h0(a);
-    uint2    d;
-    d.x = fma(s, b.x, c.x);
-    d.y = fma(s, b.y, c.y);
-    return d;
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-inline __device__ uint4 fma(uint4 a, uint4 b, uint4 c)
-{
-    uint4 d;
-    d.x = fma(a.x, b.x, c.x);
-    d.y = fma(a.y, b.y, c.y);
-    d.z = fma(a.z, b.z, c.z);
-    d.w = fma(a.w, b.w, c.w);
-    return d;
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-inline __device__ uint4 fma(uint16_t a, uint4 b, uint4 c)
-{
-    uint32_t s = h0_h0(a);
-    uint4    d;
-    d.x = fma(s, b.x, c.x);
-    d.y = fma(s, b.y, c.y);
-    d.z = fma(s, b.z, c.z);
-    d.w = fma(s, b.w, c.w);
-    return d;
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-inline __device__ float fma(uint16_t a, uint16_t b, float fc)
-{
-    float fa = half_to_float(a);
-    float fb = half_to_float(b);
-    return fa * fb + fc;
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-inline __device__ float2 fma(uint32_t a, uint32_t b, float2 fc)
-{
-    float2 fa = half2_to_float2(a);
-    float2 fb = half2_to_float2(b);
-    return fma(fa, fb, fc);
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-inline __device__ float2 fma(uint16_t a, uint32_t b, float2 fc)
-{
-    return fma(h0_h0(a), b, fc);
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-inline __device__ Float4_ fma(uint2 a, uint2 b, Float4_ fc)
-{
-    Float4_ fd;
-    fd.x = fma(a.x, b.x, fc.x);
-    fd.y = fma(a.y, b.y, fc.y);
-    return fd;
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-inline __device__ Float4_ fma(uint16_t a, uint2 b, Float4_ fc)
-{
-    uint32_t s = h0_h0(a);
-    Float4_  fd;
-    fd.x = fma(s, b.x, fc.x);
-    fd.y = fma(s, b.y, fc.y);
-    return fd;
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-inline __device__ Float8_ fma(uint4 a, uint4 b, Float8_ fc)
-{
-    Float8_ fd;
-    fd.x = fma(a.x, b.x, fc.x);
-    fd.y = fma(a.y, b.y, fc.y);
-    fd.z = fma(a.z, b.z, fc.z);
-    fd.w = fma(a.w, b.w, fc.w);
-    return fd;
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-inline __device__ Float8_ fma(uint16_t a, uint4 b, Float8_ fc)
-{
-    uint32_t s = h0_h0(a);
-    Float8_  fd;
-    fd.x = fma(s, b.x, fc.x);
-    fd.y = fma(s, b.y, fc.y);
-    fd.z = fma(s, b.z, fc.z);
-    fd.w = fma(s, b.w, fc.w);
-    return fd;
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-template<typename Acc, typename A, typename B>
-inline __device__ Acc mul(A a, B b)
-{
-    return Acc{};  // for compile
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-template<>
-inline __device__ float mul<float, float>(float a, float b)
-{
-    return a * b;
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-template<>
-inline __device__ float2 mul(float2 a, float2 b)
-{
-    float2 c;
-    c.x = a.x * b.x;
-    c.y = a.y * b.y;
-    return c;
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-template<>
-inline __device__ float2 mul(float a, float2 b)
-{
-    float2 c;
-    c.x = a * b.x;
-    c.y = a * b.y;
-    return c;
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-template<>
-inline __device__ float4 mul(float4 a, float4 b)
-{
-    float4 c;
-    c.x = a.x * b.x;
-    c.y = a.y * b.y;
-    c.z = a.z * b.z;
-    c.w = a.w * b.w;
-    return c;
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-template<>
-inline __device__ float4 mul(float a, float4 b)
-{
-    float4 c;
-    c.x = a * b.x;
-    c.y = a * b.y;
-    c.z = a * b.z;
-    c.w = a * b.w;
-    return c;
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-template<>
-inline __device__ Float8_ mul(float a, Float8_ b)
-{
-    Float8_ c;
-    c.x = mul<float2, float, float2>(a, b.x);
-    c.y = mul<float2, float, float2>(a, b.y);
-    c.z = mul<float2, float, float2>(a, b.z);
-    c.w = mul<float2, float, float2>(a, b.w);
-    return c;
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-template<>
-inline __device__ uint16_t mul(uint16_t a, uint16_t b)
-{
-    uint16_t c;
-    // asm volatile("mul.f16 %0, %1, %2;\n" : "=h"(c) : "h"(a), "h"(b));
-    asm volatile("v_mul_f16 %0, %1, %2;\n" : "=v"(c) : "v"(a), "v"(b));
-    return c;
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-template<>
-inline __device__ uint32_t mul(uint32_t a, uint32_t b)
-{
-    uint32_t c;
-    // asm volatile("mul.f16x2 %0, %1, %2;\n" : "=r"(c) : "r"(a), "r"(b));
-    asm volatile("v_pk_mul_f16 %0, %1, %2;\n" : "=v"(c) : "v"(a), "v"(b));
-    return c;
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-template<>
-inline __device__ uint32_t mul(uint16_t a, uint32_t b)
-{
-    return mul<uint32_t, uint32_t, uint32_t>(h0_h0(a), b);
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-template<>
-inline __device__ uint2 mul(uint2 a, uint2 b)
-{
-    uint2 c;
-    c.x = mul<uint32_t, uint32_t, uint32_t>(a.x, b.x);
-    c.y = mul<uint32_t, uint32_t, uint32_t>(a.y, b.y);
-    return c;
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-template<>
-inline __device__ uint2 mul(uint16_t a, uint2 b)
-{
-    uint32_t s = h0_h0(a);
-    uint2    c;
-    c.x = mul<uint32_t, uint32_t, uint32_t>(s, b.x);
-    c.y = mul<uint32_t, uint32_t, uint32_t>(s, b.y);
-    return c;
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-template<>
-inline __device__ uint4 mul(uint4 a, uint4 b)
-{
-    uint4 c;
-    c.x = mul<uint32_t, uint32_t, uint32_t>(a.x, b.x);
-    c.y = mul<uint32_t, uint32_t, uint32_t>(a.y, b.y);
-    c.z = mul<uint32_t, uint32_t, uint32_t>(a.z, b.z);
-    c.w = mul<uint32_t, uint32_t, uint32_t>(a.w, b.w);
-    return c;
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-template<>
-inline __device__ uint4 mul(uint16_t a, uint4 b)
-{
-    uint32_t s = h0_h0(a);
-    uint4    c;
-    c.x = mul<uint32_t, uint32_t, uint32_t>(s, b.x);
-    c.y = mul<uint32_t, uint32_t, uint32_t>(s, b.y);
-    c.z = mul<uint32_t, uint32_t, uint32_t>(s, b.z);
-    c.w = mul<uint32_t, uint32_t, uint32_t>(s, b.w);
-    return c;
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-template<>
-inline __device__ float mul(uint16_t a, uint16_t b)
-{
-    float fa = half_to_float(a);
-    float fb = half_to_float(b);
-    return fa * fb;
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-template<>
-inline __device__ float mul(uint16_t a, float b)
-{
-    return half_to_float(a) * b;
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-template<>
-inline __device__ float2 mul(uint32_t a, uint32_t b)
-{
-    float2 fa = half2_to_float2(a);
-    float2 fb = half2_to_float2(b);
-    return mul<float2, float2, float2>(fa, fb);
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-template<>
-inline __device__ float2 mul(uint16_t a, uint32_t b)
-{
-    return mul<float2, uint32_t, uint32_t>(h0_h0(a), b);
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-template<>
-inline __device__ Float4_ mul(uint2 a, uint2 b)
-{
-    Float4_ fc;
-    fc.x = mul<float2, uint32_t, uint32_t>(a.x, b.x);
-    fc.y = mul<float2, uint32_t, uint32_t>(a.y, b.y);
-    return fc;
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-template<>
-inline __device__ Float4_ mul(uint16_t a, uint2 b)
-{
-    uint32_t s = h0_h0(a);
-    Float4_  fc;
-    fc.x = mul<float2, uint32_t, uint32_t>(s, b.x);
-    fc.y = mul<float2, uint32_t, uint32_t>(s, b.y);
-    return fc;
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-template<>
-inline __device__ Float8_ mul(uint4 a, uint4 b)
-{
-    Float8_ fc;
-    fc.x = mul<float2, uint32_t, uint32_t>(a.x, b.x);
-    fc.y = mul<float2, uint32_t, uint32_t>(a.y, b.y);
-    fc.z = mul<float2, uint32_t, uint32_t>(a.z, b.z);
-    fc.w = mul<float2, uint32_t, uint32_t>(a.w, b.w);
-    return fc;
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-template<>
-inline __device__ Float8_ mul(uint16_t a, uint4 b)
-{
-    uint32_t s = h0_h0(a);
-    Float8_  fc;
-    fc.x = mul<float2, uint32_t, uint32_t>(s, b.x);
-    fc.y = mul<float2, uint32_t, uint32_t>(s, b.y);
-    fc.z = mul<float2, uint32_t, uint32_t>(s, b.z);
-    fc.w = mul<float2, uint32_t, uint32_t>(s, b.w);
-    return fc;
-}
-
-inline __device__ float sum(float v)
-{
-    return v;
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-inline __device__ float sum(float2 v)
-{
-    return v.x + v.y;
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-inline __device__ float sum(float4 v)
-{
-    return v.x + v.y + v.z + v.w;
-}
-
-inline __device__ float sum(uint16_t v)
-{
-    return half_to_float(v);
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-inline __device__ float sum(uint32_t v)
-{
-    float2 tmp = half2_to_float2(v);
-    return tmp.x + tmp.y;
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-inline __device__ float sum(uint2 v)
-{
-    uint32_t c = add(v.x, v.y);
-    return sum(c);
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-inline __device__ float sum(uint4 v)
-{
-#if 1
-    uint32_t c = add(v.x, v.y);
-    c          = add(c, v.z);
-    c          = add(c, v.w);
-#else
-    uint32_t c = add(v.x, v.y);
-    uint32_t d = add(v.z, v.w);
-    c          = add(c, d);
-#endif
-    return sum(c);
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-inline __device__ float sum(Float4_ v)
-{
-    return v.x.x + v.x.y + v.y.x + v.y.y;
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-inline __device__ float sum(Float8_ v)
-{
-    return v.x.x + v.x.y + v.y.x + v.y.y + v.z.x + v.z.y + v.w.x + v.w.y;
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-template<typename T>
-inline __device__ float dot(T a, T b)
-{
-    return sum(mul<T, T, T>(a, b));
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-template<typename A, typename T>
-inline __device__ float dot(T a, T b)
-{
-    return sum(mul<A, T, T>(a, b));
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-inline __device__ void zero(uint16_t& dst)
-{
-    dst = uint16_t(0);
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-template<typename T>
-inline __device__ void zero(T& dst)
-{
-    constexpr int WORDS = sizeof(T) / 4;
-    union {
-        T        raw;
-        uint32_t words[WORDS];
-    } tmp;
-#pragma unroll
-    for (int ii = 0; ii < WORDS; ++ii) {
-        tmp.words[ii] = 0u;
-    }
-    dst = tmp.raw;
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-inline __device__ float logn_attn_get_scaling(float seq_len, int max_position_embeddings)
-{
-    if (seq_len <= max_position_embeddings) {
-        return 1.f;
-    }
-    return log2f(seq_len) / log2f(max_position_embeddings);
-}
-
-inline __device__ float
-rotary_embedding_get_base(float seq_len, int max_position_embeddings, float rot_embed_dim, float base)
-{
-    if (seq_len < max_position_embeddings) {
-        return base;
-    }
-    float ntk_alpha = max(exp2f(ceilf(log2f(seq_len / max_position_embeddings) + 1.f)) - 1.f, 1.f);
-    base *= powf(ntk_alpha, rot_embed_dim / (rot_embed_dim - 2.f));
-    return base;
-}
-
-// inline __device__ float
-// rotary_embedding_get_base(float seq_len, int max_position_embeddings, float rot_embed_dim, float base)
-// {
-//     constexpr float scaling_factor = 1.f;
-//     if (scaling_factor * seq_len < max_position_embeddings) {
-//         return base;
-//     }
-//     base *= powf((scaling_factor * seq_len / max_position_embeddings) - (scaling_factor - 1.f),
-//                  rot_embed_dim / (rot_embed_dim - 2.f));
-//     return base;
-// }
-
-inline __device__ float2 rotary_embedding_coefficient(int zid, int rot_embed_dim, float base, float t_step)
-{
-    const float inv_freq = t_step / powf(base, zid / (float)rot_embed_dim);
-    return {cos(inv_freq), sin(inv_freq)};
-}
-
-inline __device__ float2 rotary_embedding_transform(const float2 v, const float2 coef)
-{
-    float2 rot_v;
-    rot_v.x = coef.x * v.x - coef.y * v.y;
-    rot_v.y = coef.x * v.y + coef.y * v.x;
-    return rot_v;
-}
-
-inline __device__ uint32_t rotary_embedding_transform(const uint32_t v, const float2 coef)
-{
-    float2 fv     = half2_to_float2(v);
-    float2 rot_fv = rotary_embedding_transform(fv, coef);
-    return float2_to_half2(rot_fv);
-}
-
-#ifdef ENABLE_BF16
-inline __device__ __nv_bfloat162 rotary_embedding_transform(const __nv_bfloat162 v, const float2 coef)
-{
-    float2 fv     = bf1622float2(v);
-    float2 rot_fv = rotary_embedding_transform(fv, coef);
-    return __floats2bfloat162_rn(rot_fv.x, rot_fv.y);
-}
-#endif
-
-inline __device__ void apply_rotary_embedding(float& q, int zid, int rot_embed_dim, int t_step)
-{
-    return;
-}
-
-inline __device__ void apply_rotary_embedding(float& q, float& k, int zid, int rot_embed_dim, int t_step)
-{
-    return;
-}
-
-inline __device__ void apply_rotary_embedding(float2& q, int tid, int rot_embed_dim, float base, int t_step)
-{
-    if (2 * tid >= rot_embed_dim) {
-        return;
-    }
-    const auto coef = rotary_embedding_coefficient(2 * tid, rot_embed_dim, base, t_step);
-    q               = rotary_embedding_transform(q, coef);
-}
-
-inline __device__ void apply_rotary_embedding(float2& q, float2& k, int tid, int rot_embed_dim, float base, int t_step)
-{
-    if (2 * tid >= rot_embed_dim) {
-        return;
-    }
-    const auto coef = rotary_embedding_coefficient(2 * tid, rot_embed_dim, base, t_step);
-    q               = rotary_embedding_transform(q, coef);
-    k               = rotary_embedding_transform(k, coef);
-}
-
-inline __device__ void apply_rotary_embedding(float4& q, int tid, int rot_embed_dim, float base, int t_step)
-{
-    if (4 * tid >= rot_embed_dim) {
-        return;
-    }
-
-    Float4_&   q_    = *reinterpret_cast<Float4_*>(&q);
-    const auto coef0 = rotary_embedding_coefficient(4 * tid, rot_embed_dim, base, t_step);
-    q_.x             = rotary_embedding_transform(q_.x, coef0);
-    const auto coef1 = rotary_embedding_coefficient(4 * tid + 2, rot_embed_dim, base, t_step);
-    q_.y             = rotary_embedding_transform(q_.y, coef1);
-}
-
-inline __device__ void apply_rotary_embedding(float4& q, float4& k, int tid, int rot_embed_dim, float base, int t_step)
-{
-    if (4 * tid >= rot_embed_dim) {
-        return;
-    }
-
-    Float4_&   q_    = *reinterpret_cast<Float4_*>(&q);
-    Float4_&   k_    = *reinterpret_cast<Float4_*>(&k);
-    const auto coef0 = rotary_embedding_coefficient(4 * tid, rot_embed_dim, base, t_step);
-    q_.x             = rotary_embedding_transform(q_.x, coef0);
-    k_.x             = rotary_embedding_transform(k_.x, coef0);
-    const auto coef1 = rotary_embedding_coefficient(4 * tid + 2, rot_embed_dim, base, t_step);
-    q_.y             = rotary_embedding_transform(q_.y, coef1);
-    k_.y             = rotary_embedding_transform(k_.y, coef1);
-}
-
-inline __device__ void apply_rotary_embedding(uint32_t& q, int tid, int rot_embed_dim, float base, int t_step)
-{
-    if (2 * tid >= rot_embed_dim) {
-        return;
-    }
-    const auto coef = rotary_embedding_coefficient(2 * tid, rot_embed_dim, base, t_step);
-    q               = rotary_embedding_transform(q, coef);
-}
-
-inline __device__ void
-apply_rotary_embedding(uint32_t& q, uint32_t& k, int tid, int rot_embed_dim, float base, int t_step)
-{
-    if (2 * tid >= rot_embed_dim) {
-        return;
-    }
-    const auto coef = rotary_embedding_coefficient(2 * tid, rot_embed_dim, base, t_step);
-    q               = rotary_embedding_transform(q, coef);
-    k               = rotary_embedding_transform(k, coef);
-}
-
-inline __device__ void apply_rotary_embedding(uint2& q, int tid, int rot_embed_dim, float base, int t_step)
-{
-    if (4 * tid >= rot_embed_dim) {
-        return;
-    }
-    const auto coef0 = rotary_embedding_coefficient(4 * tid, rot_embed_dim, base, t_step);
-    q.x              = rotary_embedding_transform(q.x, coef0);
-    const auto coef1 = rotary_embedding_coefficient(4 * tid + 2, rot_embed_dim, base, t_step);
-    q.y              = rotary_embedding_transform(q.y, coef1);
-}
-
-inline __device__ void apply_rotary_embedding(uint2& q, uint2& k, int tid, int rot_embed_dim, float base, int t_step)
-{
-    if (4 * tid >= rot_embed_dim) {
-        return;
-    }
-    const auto coef0 = rotary_embedding_coefficient(4 * tid, rot_embed_dim, base, t_step);
-    q.x              = rotary_embedding_transform(q.x, coef0);
-    k.x              = rotary_embedding_transform(k.x, coef0);
-    const auto coef1 = rotary_embedding_coefficient(4 * tid + 2, rot_embed_dim, base, t_step);
-    q.y              = rotary_embedding_transform(q.y, coef1);
-    k.y              = rotary_embedding_transform(k.y, coef1);
-}
-
-inline __device__ void apply_rotary_embedding(uint4& q, int tid, int rot_embed_dim, float base, int t_step)
-{
-    if (8 * tid >= rot_embed_dim) {
-        return;
-    }
-    const auto coef0 = rotary_embedding_coefficient(8 * tid, rot_embed_dim, base, t_step);
-    q.x              = rotary_embedding_transform(q.x, coef0);
-    const auto coef1 = rotary_embedding_coefficient(8 * tid + 2, rot_embed_dim, base, t_step);
-    q.y              = rotary_embedding_transform(q.y, coef1);
-    const auto coef2 = rotary_embedding_coefficient(8 * tid + 4, rot_embed_dim, base, t_step);
-    q.z              = rotary_embedding_transform(q.z, coef2);
-    const auto coef3 = rotary_embedding_coefficient(8 * tid + 6, rot_embed_dim, base, t_step);
-    q.w              = rotary_embedding_transform(q.w, coef3);
-}
-
-inline __device__ void apply_rotary_embedding(uint4& q, uint4& k, int tid, int rot_embed_dim, float base, int t_step)
-{
-    if (8 * tid >= rot_embed_dim) {
-        return;
-    }
-    const auto coef0 = rotary_embedding_coefficient(8 * tid, rot_embed_dim, base, t_step);
-    q.x              = rotary_embedding_transform(q.x, coef0);
-    k.x              = rotary_embedding_transform(k.x, coef0);
-    const auto coef1 = rotary_embedding_coefficient(8 * tid + 2, rot_embed_dim, base, t_step);
-    q.y              = rotary_embedding_transform(q.y, coef1);
-    k.y              = rotary_embedding_transform(k.y, coef1);
-    const auto coef2 = rotary_embedding_coefficient(8 * tid + 4, rot_embed_dim, base, t_step);
-    q.z              = rotary_embedding_transform(q.z, coef2);
-    k.z              = rotary_embedding_transform(k.z, coef2);
-    const auto coef3 = rotary_embedding_coefficient(8 * tid + 6, rot_embed_dim, base, t_step);
-    q.w              = rotary_embedding_transform(q.w, coef3);
-    k.w              = rotary_embedding_transform(k.w, coef3);
-}
-
-}  // namespace mmha
diff --git a/src/turbomind/kernels/decoding_kernels.cu b/src/turbomind/kernels/decoding_kernels.cu
deleted file mode 100644
index ffce48c46008b72ab0f82dd0dbe376bc40723005..0000000000000000000000000000000000000000
--- a/src/turbomind/kernels/decoding_kernels.cu
+++ /dev/null
@@ -1,395 +0,0 @@
-/*
- * Copyright (c) 2020-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "src/turbomind/kernels/decoding_kernels.h"
-#include "src/turbomind/kernels/reduce_kernel_utils.cuh"
-#include "src/turbomind/utils/cuda_type_utils.cuh"
-#include "src/turbomind/utils/cuda_utils.h"
-
-namespace turbomind {
-
-// PROMPT_SRC: 0 --> no prompts, 1 --> from loaded prompts, 2 --> from request prompts
-template<typename T>
-__global__ void embeddingLookupPosEncoding(T*            from_tensor,
-                                           const T*      embedding_table,
-                                           const T*      position_encoding,
-                                           const int*    all_ids,
-                                           const int*    padding_count,
-                                           const int*    input_lengths,
-                                           const int     local_token_num,
-                                           const int64_t hidden_units,
-                                           const int     step,
-                                           const int     max_input_length,
-                                           const int     token_num,
-                                           const int     ite,
-                                           const T       scale)
-{
-    // 1. lookup from embedding table
-    // 2. multiply scale
-    // 3. add the position encoding
-    const int id_offset = step * token_num + ite * local_token_num;
-
-    const bool use_padding_count = padding_count != nullptr;
-    const bool use_input_len     = input_lengths != nullptr;
-
-    for (int64_t index = blockIdx.x * blockDim.x + threadIdx.x; index < local_token_num * hidden_units;
-         index += blockDim.x * gridDim.x) {
-        const int row_index   = index / hidden_units;
-        const int col_index   = index % hidden_units;
-        int       step_offset = step;
-        if (use_padding_count) {
-            step_offset -= padding_count[row_index];
-        }
-        else if (use_input_len) {
-            step_offset -= max_input_length - input_lengths[row_index];
-        }
-        step_offset *= hidden_units;
-
-        T val = embedding_table[all_ids[id_offset + row_index] * hidden_units + col_index] * scale;
-        val   = val + position_encoding[step_offset + col_index];
-
-        from_tensor[index] = val;
-    }
-}
-
-// No absolute position embedding
-// PROMPT_SRC: 0 --> no prompts, 1 --> from loaded prompts, 2 --> from request prompts
-template<typename T, int PROMPT_SRC>
-__global__ void embeddingLookup(T*                    from_tensor,
-                                const T*              embedding_table,
-                                const int*            all_ids,
-                                pPromptTuningParam<T> prompt_param,
-                                const int             local_token_num,
-                                const int64_t         hidden_units,
-                                const int             step,
-                                const int             token_num,
-                                const int             ite,
-                                const int             seq_len,
-                                const T               scale)
-{
-    // 1. lookup from embedding table
-    // 2. multiply scale
-    const int id_offset = step * token_num + ite * local_token_num;
-
-    for (int64_t index = blockIdx.x * blockDim.x + threadIdx.x; index < local_token_num * hidden_units;
-         index += blockDim.x * gridDim.x) {
-
-        const int word_index     = index / hidden_units;
-        const int word_index_row = word_index / seq_len;  // batch_id
-        const int col_index      = index % hidden_units;
-        const int input_id       = all_ids == nullptr ? word_index : all_ids[id_offset + word_index];
-        const int prompt_id      = input_id - prompt_param.p_prompt_tuning_id_start;
-        T         embedding      = (T)0.0f;
-        if (PROMPT_SRC > 0 && prompt_id >= 0) {
-            if (PROMPT_SRC == 1) {
-                // from loaded prompt embedding tables
-                embedding =
-                    prompt_param.p_prompt_tuning_batch_weights[word_index_row][prompt_id * hidden_units + col_index];
-            }
-            else {
-                // from request prompt embedding
-                embedding =
-                    prompt_param
-                        .request_prompt_embedding[word_index_row * prompt_param.request_prompt_max_length * hidden_units
-                                                  + prompt_id * hidden_units + col_index];
-            }
-        }
-        else {
-            embedding = embedding_table[input_id * hidden_units + col_index];
-        }
-        from_tensor[index] = embedding * scale;
-    }
-}
-
-#define EMBEDDING_LOOKUP(PROMPT_SRC)                                                                                   \
-    embeddingLookup<T, PROMPT_SRC><<<grid, block, 0, stream>>>(from_tensor,                                            \
-                                                               embedding_table,                                        \
-                                                               all_ids,                                                \
-                                                               prompt_param,                                           \
-                                                               local_token_num,                                        \
-                                                               hidden_units,                                           \
-                                                               step,                                                   \
-                                                               token_num,                                              \
-                                                               ite,                                                    \
-                                                               seq_len,                                                \
-                                                               scale);
-
-/* Adapter function for invokeEmbeddingLookupPosEncoding{PadCount,InputLen} */
-template<typename T>
-void invokeEmbeddingLookupPosEncoding(T*                    from_tensor,
-                                      const T*              embedding_table,
-                                      const T*              position_encoding,
-                                      const int*            all_ids,
-                                      const int*            padding_count,
-                                      const int*            input_lengths,
-                                      pPromptTuningParam<T> prompt_param,
-                                      const int             local_token_num,
-                                      const int             hidden_units,
-                                      const T               scale,
-                                      const int             step,
-                                      const int             max_input_length,
-                                      const int             token_num,
-                                      const int             ite,
-                                      const int             seq_len,
-                                      cudaStream_t          stream)
-{
-    dim3 grid(min(local_token_num, 65536));
-    dim3 block(min(hidden_units, 1024));
-    if (position_encoding != nullptr) {
-        FT_CHECK_WITH_INFO(prompt_param.use_request_p_prompt_embedding == false
-                               && prompt_param.p_prompt_tuning_batch_weights == nullptr,
-                           fmtstr("embeddingLookupPosEncoding still not support prompt tuning"));
-        embeddingLookupPosEncoding<T><<<grid, block, 0, stream>>>(from_tensor,
-                                                                  embedding_table,
-                                                                  position_encoding,
-                                                                  all_ids,
-                                                                  padding_count,
-                                                                  input_lengths,
-                                                                  local_token_num,
-                                                                  hidden_units,
-                                                                  step,
-                                                                  max_input_length,
-                                                                  token_num,
-                                                                  ite,
-                                                                  scale);
-    }
-    else {
-        if (prompt_param.use_request_p_prompt_embedding) {
-            EMBEDDING_LOOKUP(2);
-        }
-        else if (prompt_param.p_prompt_tuning_batch_weights != nullptr) {
-            EMBEDDING_LOOKUP(1);
-        }
-        else {
-            EMBEDDING_LOOKUP(0);
-        }
-    }
-}
-
-#undef EMBEDDING_LOOKUP
-
-template<typename T>
-void invokeEmbeddingLookupPosEncodingPadCount(T*                    from_tensor,
-                                              const T*              embedding_table,
-                                              const T*              position_encoding,
-                                              const int*            all_ids,
-                                              const int*            pad_count,
-                                              pPromptTuningParam<T> prompt_param,
-                                              const int             local_token_num,
-                                              const int             hidden_units,
-                                              const T               scale,
-                                              const int             step,
-                                              const int             token_num,
-                                              const int             ite,
-                                              const int             seq_len,
-                                              cudaStream_t          stream)
-{
-    invokeEmbeddingLookupPosEncoding<T>(from_tensor,
-                                        embedding_table,
-                                        position_encoding,
-                                        all_ids,
-                                        pad_count,
-                                        nullptr,
-                                        prompt_param,
-                                        local_token_num,
-                                        hidden_units,
-                                        scale,
-                                        step,
-                                        0,
-                                        token_num,
-                                        ite,
-                                        seq_len,
-                                        stream);
-}
-
-#define INSTANTIATE_LOOKUP_POS_ENCODING_PAD_COUNT(T)                                                                   \
-    template void invokeEmbeddingLookupPosEncodingPadCount(T*                    from_tensor,                          \
-                                                           const T*              embedding_table,                      \
-                                                           const T*              position_encoding,                    \
-                                                           const int*            all_ids,                              \
-                                                           const int*            pad_count,                            \
-                                                           pPromptTuningParam<T> prompt_param,                         \
-                                                           const int             local_token_num,                      \
-                                                           const int             hidden_units,                         \
-                                                           const T               scale,                                \
-                                                           const int             step,                                 \
-                                                           const int             token_num,                            \
-                                                           const int             ite,                                  \
-                                                           const int             seq_len,                              \
-                                                           cudaStream_t          stream)
-INSTANTIATE_LOOKUP_POS_ENCODING_PAD_COUNT(float);
-INSTANTIATE_LOOKUP_POS_ENCODING_PAD_COUNT(half);
-#ifdef ENABLE_BF16
-INSTANTIATE_LOOKUP_POS_ENCODING_PAD_COUNT(__nv_bfloat16);
-#endif
-#undef INSTANTIATE_LOOKUP_POS_ENCODING_PAD_COUNT
-
-template<typename T>
-__global__ void paddingEmbedding(T*            padded_embedding_kernel,
-                                 T*            padded_embedding_bias,
-                                 const T*      embedding_kernel,
-                                 const T*      embedding_bias,
-                                 const int64_t hidden_unit,
-                                 const int64_t vocab_size,
-                                 const int64_t vocab_size_padded)
-{
-    for (int64_t id = threadIdx.x + blockIdx.x * blockDim.x; id < hidden_unit * vocab_size_padded;
-         id += blockDim.x * gridDim.x) {
-        int row_id = id / vocab_size_padded;
-        int col_id = id % vocab_size_padded;
-        if (col_id < vocab_size) {
-            padded_embedding_kernel[id] = embedding_kernel[row_id * vocab_size + col_id];
-        }
-        else {
-            padded_embedding_kernel[id] = (T)(0.0f);
-        }
-    }
-
-    for (int id = threadIdx.x + blockIdx.x * blockDim.x; id < vocab_size_padded; id += blockDim.x * gridDim.x) {
-        if (id < vocab_size) {
-            padded_embedding_bias[id] = embedding_bias[id];
-        }
-        else {
-            padded_embedding_bias[id] = (T)(0.0f);
-        }
-    }
-}
-
-template<typename T>
-void invokePaddingEmbedding(T*           padded_embedding_kernel,
-                            T*           padded_embedding_bias,
-                            const T*     embedding_kernel,
-                            const T*     embedding_bias,
-                            const int    hidden_unit,
-                            const int    vocab_size,
-                            const int    vocab_size_padded,
-                            cudaStream_t stream)
-{
-    dim3 block(512);
-    dim3 grid((int)(ceil(hidden_unit * vocab_size_padded / 512.)));
-    paddingEmbedding<<<grid, block, 0, stream>>>(padded_embedding_kernel,
-                                                 padded_embedding_bias,
-                                                 embedding_kernel,
-                                                 embedding_bias,
-                                                 hidden_unit,
-                                                 vocab_size,
-                                                 vocab_size_padded);
-}
-
-// template void invokePaddingEmbedding(float*       padded_embedding_kernel,
-//                                      float*       padded_embedding_bias,
-//                                      const float* embedding_kernel,
-//                                      const float* embedding_bias,
-//                                      const int    hidden_unit,
-//                                      const int    vocab_size,
-//                                      const int    vocab_size_padded,
-//                                      cudaStream_t stream);
-
-// template void invokePaddingEmbedding(half*        padded_embedding_kernel,
-//                                      half*        padded_embedding_bias,
-//                                      const half*  embedding_kernel,
-//                                      const half*  embedding_bias,
-//                                      const int    hidden_unit,
-//                                      const int    vocab_size,
-//                                      const int    vocab_size_padded,
-//                                      cudaStream_t stream);
-// #ifdef ENABLE_BF16
-// template void invokePaddingEmbedding(__nv_bfloat16*       padded_embedding_kernel,
-//                                      __nv_bfloat16*       padded_embedding_bias,
-//                                      const __nv_bfloat16* embedding_kernel,
-//                                      const __nv_bfloat16* embedding_bias,
-//                                      const int            hidden_unit,
-//                                      const int            vocab_size,
-//                                      const int            vocab_size_padded,
-//                                      cudaStream_t         stream);
-// #endif
-
-template<typename T>
-__global__ void paddingEmbeddingKernel(T*        padded_embedding_kernel,
-                                       const T*  embedding_kernel,
-                                       const int hidden_unit,
-                                       const int vocab_size,
-                                       const int vocab_size_padded)
-{
-    for (int id = threadIdx.x + blockIdx.x * blockDim.x; id < hidden_unit * vocab_size_padded;
-         id += blockDim.x * gridDim.x) {
-        int row_id = id / hidden_unit;
-        int col_id = id % hidden_unit;
-        if (row_id < vocab_size) {
-            padded_embedding_kernel[id] = embedding_kernel[row_id * hidden_unit + col_id];
-        }
-        else {
-            padded_embedding_kernel[id] = (T)(0.0f);
-        }
-    }
-}
-
-template<typename T>
-void invokePaddingEmbeddingKernel(T*           padded_embedding_kernel,
-                                  const T*     embedding_kernel,
-                                  const int    hidden_unit,
-                                  const int    vocab_size,
-                                  const int    vocab_size_padded,
-                                  cudaStream_t stream)
-{
-    dim3 block(512);
-    dim3 grid((int)(ceil(hidden_unit * vocab_size_padded / 512.)));
-    paddingEmbeddingKernel<<<grid, block, 0, stream>>>(
-        padded_embedding_kernel, embedding_kernel, hidden_unit, vocab_size, vocab_size_padded);
-}
-
-// template void invokePaddingEmbeddingKernel(float*       padded_embedding_kernel,
-//                                            const float* embedding_kernel,
-//                                            const int    hidden_unit,
-//                                            const int    vocab_size,
-//                                            const int    vocab_size_padded,
-//                                            cudaStream_t stream);
-
-// template void invokePaddingEmbeddingKernel(half*        padded_embedding_kernel,
-//                                            const half*  embedding_kernel,
-//                                            const int    hidden_unit,
-//                                            const int    vocab_size,
-//                                            const int    vocab_size_padded,
-//                                            cudaStream_t stream);
-
-// #ifdef ENABLE_BF16
-// template void invokePaddingEmbeddingKernel(__nv_bfloat16*       padded_embedding_kernel,
-//                                            const __nv_bfloat16* embedding_kernel,
-//                                            const int            hidden_unit,
-//                                            const int            vocab_size,
-//                                            const int            vocab_size_padded,
-//                                            cudaStream_t         stream);
-// #endif
-
-template<typename T>
-__global__ void plusScalar(T* buf, const T val, const int size)
-{
-    for (int i = threadIdx.x + blockIdx.x * blockDim.x; i < size; i += blockDim.x * gridDim.x) {
-        buf[i] += val;
-    }
-}
-
-template<typename T>
-void invokePlusScalar(T* buf, const T val, const int size, cudaStream_t stream)
-{
-    dim3 block(min(256, size));
-    dim3 grid(ceil(size / 256.));
-    plusScalar<<<block, grid, 0, stream>>>(buf, val, size);
-}
-
-template void invokePlusScalar(int* buf, const int val, const int size, cudaStream_t stream);
-
-}  // namespace turbomind
diff --git a/src/turbomind/kernels/decoding_kernels.h b/src/turbomind/kernels/decoding_kernels.h
deleted file mode 100644
index 49db3c3fcb5e73698281d3607a13b6db3d78f36b..0000000000000000000000000000000000000000
--- a/src/turbomind/kernels/decoding_kernels.h
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "gpt_kernels.h"
-#include <cuda_fp16.h>
-#include <cuda_runtime.h>
-
-namespace turbomind {
-
-// get token from all_ids at step, then lookup from the embedding table
-// by the token
-template<typename T>
-void invokeEmbeddingLookupPosEncodingPadCount(T*                    from_tensor,
-                                              const T*              embedding_table,
-                                              const T*              position_encoding,
-                                              const int*            all_ids,
-                                              const int*            padding_count,
-                                              pPromptTuningParam<T> prompt_param,
-                                              const int             local_token_num,
-                                              const int             hidden_units,
-                                              const T               scale,
-                                              const int             step,
-                                              const int             token_num,
-                                              const int             ite,
-                                              const int             seq_len,
-                                              cudaStream_t          stream);
-
-template<typename T>
-void invokeEmbeddingLookupPosEncodingPadCount(T*           from_tensor,
-                                              const T*     embedding_table,
-                                              const T*     position_encoding,
-                                              const int*   all_ids,
-                                              const int*   padding_count,
-                                              const int    local_token_num,
-                                              const int    hidden_units,
-                                              const T      scale,
-                                              const int    step,
-                                              const int    token_num,
-                                              const int    ite,
-                                              cudaStream_t stream)
-{
-    invokeEmbeddingLookupPosEncodingPadCount(from_tensor,
-                                             embedding_table,
-                                             position_encoding,
-                                             all_ids,
-                                             padding_count,
-                                             {(const T**)nullptr, 0, 0, false, nullptr},
-                                             local_token_num,
-                                             hidden_units,
-                                             scale,
-                                             step,
-                                             token_num,
-                                             ite,
-                                             0,
-                                             stream);
-}
-
-template<typename T>
-void invokePaddingEmbedding(T*           padded_embedding_kernel,
-                            T*           padded_embedding_bias,
-                            const T*     embedding_kernel,
-                            const T*     embedding_bias,
-                            const int    hidden_unit,
-                            const int    vocab_size,
-                            const int    vocab_size_padded,
-                            cudaStream_t stream);
-
-template<typename T>
-void invokePaddingEmbeddingKernel(T*           padded_embedding_kernel,
-                                  const T*     embedding_kernel,
-                                  const int    hidden_unit,
-                                  const int    vocab_size,
-                                  const int    vocab_size_padded,
-                                  cudaStream_t stream);
-
-template<typename T>
-void invokePlusScalar(T* buf, const T val, const int size, cudaStream_t stream);
-
-}  // namespace turbomind
diff --git a/src/turbomind/kernels/gemm_s_f16/CMakeLists.txt b/src/turbomind/kernels/gemm_s_f16/CMakeLists.txt
deleted file mode 100644
index 3a8199c4c1aa4706593face68846a4825cc98e3e..0000000000000000000000000000000000000000
--- a/src/turbomind/kernels/gemm_s_f16/CMakeLists.txt
+++ /dev/null
@@ -1,7 +0,0 @@
-# Copyright (c) OpenMMLab. All rights reserved.
-
-add_library(gemm_s4_f16 STATIC gemm_s4_f16.cu format.cu)
-target_compile_options(gemm_s4_f16 PRIVATE
-  --generate-line-info -O3 -use_fast_math -Xptxas=-v --expt-relaxed-constexpr)
-set_property(TARGET gemm_s4_f16 PROPERTY POSITION_INDEPENDENT_CODE ON)
-set_property(TARGET gemm_s4_f16 PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS ON)
diff --git a/src/turbomind/kernels/gemm_s_f16/common.h b/src/turbomind/kernels/gemm_s_f16/common.h
deleted file mode 100644
index 1556982cf1de748f81a24a58cd0cd12671bec453..0000000000000000000000000000000000000000
--- a/src/turbomind/kernels/gemm_s_f16/common.h
+++ /dev/null
@@ -1,296 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved.
-
-#pragma once
-
-#include "src/turbomind/macro.h"
-#include <cassert>
-#include <cstdint>
-#include <cuda_fp16.h>
-#include <type_traits>
-
-namespace turbomind {
-
-#ifndef TURBOMIND_S4_DEQUANT_USE_FMA
-#define TURBOMIND_S4_DEQUANT_USE_FMA 0
-#endif
-
-#if (defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 750))
-#define TURBOMIND_ARCH_SM75 1
-#else
-#define TURBOMIND_ARCH_SM75 0
-#endif
-
-#if (defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 800))
-#define TURBOMIND_ARCH_SM80 1
-#else
-#define TURBOMIND_ARCH_SM80 0
-#endif
-
-constexpr int WARP_SIZE = 32;
-
-#if defined(__CUDA_ARCH__) && !defined(__INTELLISENSE__)
-#if defined(__CUDACC_RTC__) || (defined(__clang__) && defined(__CUDA__))
-#define PRAGMA_UNROLL _Pragma("unroll")
-#define PRAGMA_NO_UNROLL _Pragma("unroll 1")
-#else
-#define PRAGMA_UNROLL #pragma unroll
-#define PRAGMA_NO_UNROLL #pragma unroll 1
-#endif
-#else
-#define PRAGMA_UNROLL
-#define PRAGMA_NO_UNROLL
-#endif
-
-// Modified from NVIDIA FasterTransformer:
-// https://github.com/NVIDIA/FasterTransformer/blob/main/src/fastertransformer/cutlass_extensions/include/cutlass_extensions/interleaved_numeric_conversion.h
-// Modified from llm-awq https://github.com/mit-han-lab/llm-awq/blob/main/awq/kernels/csrc/quantization/dequantize.cuh
-__inline__ __device__ uint4 dequantize_s4_to_fp16x2(uint32_t const& source)
-{
-    uint4 result;
-
-    uint32_t*      h   = reinterpret_cast<uint32_t*>(&result);
-    uint32_t const i4s = reinterpret_cast<uint32_t const&>(source);
-
-    // First, we extract the i4s and construct an intermediate fp16 number.
-    static constexpr uint32_t immLut                = (0xf0 & 0xcc) | 0xaa;
-    static constexpr uint32_t BOTTOM_MASK           = 0x000f000f;
-    static constexpr uint32_t TOP_MASK              = 0x00f000f0;
-    static constexpr uint32_t I4s_TO_F16s_MAGIC_NUM = 0x64006400;
-
-    // Note that the entire sequence only requires 1 shift instruction. This is
-    // thanks to the register packing format and the fact that we force our
-    // integers to be unsigned, and account for this in the fp16 subtractions. In
-    // addition, I exploit the fact that sub and fma have the same throughput in
-    // order to convert elt_23 and elt_67 to fp16 without having to shift them to
-    // the bottom bits before hand.
-
-    // Shift right by 8 to now consider elt_45 and elt_67. Issue first to hide RAW
-    // dependency if we issue immediately before required.
-    const uint32_t top_i4s = i4s >> 8;
-    // Extract elt_01 - (i4s & 0x000f000f) | 0x64006400
-    asm("lop3.b32 %0, %1, %2, %3, %4;\n"
-        : "=r"(h[0])
-        : "r"(i4s), "n"(BOTTOM_MASK), "n"(I4s_TO_F16s_MAGIC_NUM), "n"(immLut));
-    // Extract elt_23 (i4s & 0x00f000f0) | 0x64006400
-    asm("lop3.b32 %0, %1, %2, %3, %4;\n"
-        : "=r"(h[1])
-        : "r"(i4s), "n"(TOP_MASK), "n"(I4s_TO_F16s_MAGIC_NUM), "n"(immLut));
-    // Extract elt_45 (top_i4s & 0x000f000f) | 0x64006400
-    asm("lop3.b32 %0, %1, %2, %3, %4;\n"
-        : "=r"(h[2])
-        : "r"(top_i4s), "n"(BOTTOM_MASK), "n"(I4s_TO_F16s_MAGIC_NUM), "n"(immLut));
-    // Extract elt_67 (top_i4s & 0x00f000f0) | 0x64006400
-    asm("lop3.b32 %0, %1, %2, %3, %4;\n"
-        : "=r"(h[3])
-        : "r"(top_i4s), "n"(TOP_MASK), "n"(I4s_TO_F16s_MAGIC_NUM), "n"(immLut));
-
-    // I use inline PTX below because I am not sure if the compiler will emit
-    // float2half instructions if I use the half2 ctor. In this case, I chose
-    // performance reliability over code readability.
-
-    // This is the half2 {1032, 1032} represented as an integer.
-    // static constexpr uint32_t FP16_TOP_MAGIC_NUM = 0x64086408;
-    // Haotian: subtract {1024, 1024} instead, we do not need to map to [-8, 7]
-    static constexpr uint32_t FP16_TOP_MAGIC_NUM = 0x64006400;
-    // This is the half2 {1 / 16, 1 / 16} represented as an integer.
-    static constexpr uint32_t ONE_SIXTEENTH = 0x2c002c00;
-    // This is the half2 {-72, -72} represented as an integer.
-    // static constexpr uint32_t NEG_72 = 0xd480d480;
-    // Haotian: Let's use {-64, -64}.
-    static constexpr uint32_t NEG_64 = 0xd400d400;
-
-    // Finally, we construct the output numbers.
-    // Convert elt_01
-    asm("sub.f16x2 %0, %1, %2;\n" : "=r"(h[0]) : "r"(h[0]), "r"(FP16_TOP_MAGIC_NUM));
-    // Convert elt_23
-    asm("fma.rn.f16x2 %0, %1, %2, %3;\n" : "=r"(h[1]) : "r"(h[1]), "r"(ONE_SIXTEENTH), "r"(NEG_64));
-    // Convert elt_45
-    asm("sub.f16x2 %0, %1, %2;\n" : "=r"(h[2]) : "r"(h[2]), "r"(FP16_TOP_MAGIC_NUM));
-    // Convert elt_67
-    asm("fma.rn.f16x2 %0, %1, %2, %3;\n" : "=r"(h[3]) : "r"(h[3]), "r"(ONE_SIXTEENTH), "r"(NEG_64));
-
-    return result;
-}
-
-__inline__ __device__ uint4 dequantize_s4_to_fp16x2_v2(uint32_t const& source)
-{
-    uint4 result;
-
-    uint32_t*       h   = reinterpret_cast<uint32_t*>(&result);
-    uint32_t const& i4s = reinterpret_cast<uint32_t const&>(source);
-
-    // First, we extract the i4s and construct an intermediate fp16 number.
-    static constexpr uint32_t immLut      = (0xf0 & 0xcc) | 0xaa;
-    static constexpr uint32_t BOT_MASK    = 0x000f000f;
-    static constexpr uint32_t TOP_MASK    = 0x00f000f0;
-    static constexpr uint32_t MAGIC_NUM_0 = 0x64006400;        // `1024`
-    static constexpr uint32_t MAGIC_NUM_1 = 0x54005400;        // `64`
-    static constexpr uint32_t MAGIC_NUM_2 = MAGIC_NUM_1 >> 4;  // `64` >> 4
-
-    // Shift right by 8 to now consider elt_45 and elt_67. Issue first to hide RAW
-    // dependency if we issue immediately before required.
-    const uint32_t top_i4s = i4s >> 8;
-
-    if (0) {  // 1024 & 64
-        asm("lop3.b32 %0, %1, %2, %3, %4;\n" : "=r"(h[0]) : "r"(i4s), "n"(BOT_MASK), "n"(MAGIC_NUM_0), "n"(immLut));
-        asm("lop3.b32 %0, %1, %2, %3, %4;\n" : "=r"(h[1]) : "r"(i4s), "n"(TOP_MASK), "n"(MAGIC_NUM_1), "n"(immLut));
-        asm("lop3.b32 %0, %1, %2, %3, %4;\n" : "=r"(h[2]) : "r"(top_i4s), "n"(BOT_MASK), "n"(MAGIC_NUM_0), "n"(immLut));
-        asm("lop3.b32 %0, %1, %2, %3, %4;\n" : "=r"(h[3]) : "r"(top_i4s), "n"(TOP_MASK), "n"(MAGIC_NUM_1), "n"(immLut));
-        asm("sub.f16x2 %0, %1, %2;\n" : "=r"(h[0]) : "r"(h[0]), "r"(MAGIC_NUM_0));
-        asm("sub.f16x2 %0, %1, %2;\n" : "=r"(h[1]) : "r"(h[1]), "r"(MAGIC_NUM_1));
-        asm("sub.f16x2 %0, %1, %2;\n" : "=r"(h[2]) : "r"(h[2]), "r"(MAGIC_NUM_0));
-        asm("sub.f16x2 %0, %1, %2;\n" : "=r"(h[3]) : "r"(h[3]), "r"(MAGIC_NUM_1));
-    }
-    else {  //  64 only, trade 4 hfma2 with 2 shifts
-        asm("lop3.b32 %0, %1, %2, %3, %4;\n" : "=r"(h[0]) : "r"(i4s), "n"(BOT_MASK), "n"(MAGIC_NUM_2), "n"(immLut));
-        asm("lop3.b32 %0, %1, %2, %3, %4;\n" : "=r"(h[1]) : "r"(i4s), "n"(TOP_MASK), "n"(MAGIC_NUM_1), "n"(immLut));
-        asm("lop3.b32 %0, %1, %2, %3, %4;\n" : "=r"(h[2]) : "r"(top_i4s), "n"(BOT_MASK), "n"(MAGIC_NUM_2), "n"(immLut));
-        asm("lop3.b32 %0, %1, %2, %3, %4;\n" : "=r"(h[3]) : "r"(top_i4s), "n"(TOP_MASK), "n"(MAGIC_NUM_1), "n"(immLut));
-        h[0] <<= 4;
-        h[2] <<= 4;
-        // we don't need to subtract the magic nums because zeros will go through the same dequant function
-        // and carry the same magic constant, the magic num will be canceled out after subtracting zeros
-    }
-
-    return result;
-}
-
-__inline__ __device__ uint32_t cast_smem_ptr_to_uint(void const* const ptr)
-{
-    uint32_t smem_int_ptr;
-
-    asm("{.reg .u64 smem_ptr; cvta.to.shared.u64 smem_ptr, %1; cvt.u32.u64 %0, smem_ptr; }\n"
-        : "=r"(smem_int_ptr)
-        : "l"(ptr));
-
-    return smem_int_ptr;
-}
-
-__inline__ __device__ void ldmatrix_m8n8_x4_b16(uint& d0, uint& d1, uint& d2, uint& d3, uint32_t smem_int_ptr)
-{
-#if TURBOMIND_ARCH_SM75
-    asm("ldmatrix.sync.aligned.m8n8.x4.shared.b16 {%0,%1,%2,%3}, [%4];\n"
-        : "=r"(d0), "=r"(d1), "=r"(d2), "=r"(d3)
-        : "r"(smem_int_ptr));
-#else
-    assert(TURBOMIND_ARCH_SM75);
-#endif
-}
-
-__inline__ __device__ void ldmatrix_m8n8_x2_b16(uint& d0, uint& d1, uint32_t smem_int_ptr)
-{
-#if TURBOMIND_ARCH_SM75
-    asm("ldmatrix.sync.aligned.m8n8.x2.shared.b16 {%0,%1}, [%2];\n" : "=r"(d0), "=r"(d1) : "r"(smem_int_ptr));
-#else
-    assert(TURBOMIND_ARCH_SM75);
-#endif
-}
-
-__inline__ __device__ void wait_flag(int* lock, int status, int thread_id)
-{
-    int state = 0;
-    while (__syncthreads_and(state != status)) {
-        if (thread_id == 0) {
-#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 700
-            asm volatile("ld.global.acquire.gpu.b32 %0, [%1];\n" : "=r"(state) : "l"(lock));
-#else
-            asm volatile("ld.global.cg.b32 %0, [%1];\n" : "=r"(state) : "l"(lock));
-#endif
-        }
-    }
-
-    __syncthreads();  // memory fence
-}
-
-__inline__ __device__ void release_flag(int* lock, int status, int thread_id)
-{
-    __syncthreads();  // memory fence
-
-    if (thread_id == 0) {
-#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 700
-        asm volatile("st.global.release.gpu.b32 [%0], %1;\n" : : "l"(lock), "r"(status));
-#else
-        asm volatile("st.global.cg.b32 [%0], %1;\n" : : "l"(lock), "r"(status));
-#endif
-    }
-}
-
-__inline__ __device__ half2 apply_Q(const half2& x, const half2& q)
-{
-    uint s, z;
-    (half2&)z = __halves2half2(q.x, q.x);
-    (half2&)s = __halves2half2(q.y, q.y);
-
-    auto& t = (const uint&)x;
-    uint  u, v;
-    if (TURBOMIND_S4_DEQUANT_USE_FMA) {
-        asm("fma.rn.f16x2 %0, %1, %2, %3;\n" : "=r"(v) : "r"(t), "r"(s), "r"(z));
-    }
-    else {
-        asm("sub.ftz.f16x2 %0, %1, %2;\n" : "=r"(u) : "r"(t), "r"(z));
-        asm("mul.ftz.f16x2 %0, %1, %2;\n" : "=r"(v) : "r"(u), "r"(s));
-    }
-
-    return (half2&)v;
-}
-
-template<typename T, int N>
-struct Array {
-    T a[N];
-
-    __device__ __host__ constexpr T& operator[](int i) noexcept
-    {
-        return a[i];
-    }
-    __device__ __host__ constexpr const T& operator[](int i) const noexcept
-    {
-        return a[i];
-    }
-};
-
-template<int... Ns>
-struct Shape {
-    static constexpr Array<int, sizeof...(Ns)> data_{Ns...};
-
-    constexpr Shape() = default;
-
-    Shape(std::integral_constant<int, Ns>...){};
-
-    template<int index>
-    constexpr auto get() const noexcept
-    {
-        return std::integral_constant<int, data_[index]>{};
-    }
-
-    constexpr auto m() const noexcept
-    {
-        return get<0>();
-    }
-
-    constexpr auto n() const noexcept
-    {
-        return get<1>();
-    }
-
-    constexpr auto k() const noexcept
-    {
-        return get<2>();
-    }
-
-    constexpr int c() const noexcept
-    {
-        return get<0>();
-    }
-
-    constexpr int s() const noexcept
-    {
-        return get<1>();
-    }
-
-    constexpr int count() const noexcept
-    {
-        return (Ns * ...);
-    }
-};
-
-}  // namespace turbomind
diff --git a/src/turbomind/kernels/gemm_s_f16/cta_iterator.h b/src/turbomind/kernels/gemm_s_f16/cta_iterator.h
deleted file mode 100644
index 48cf9ace2c6c1277e60efb8d71a952a2e69aba4c..0000000000000000000000000000000000000000
--- a/src/turbomind/kernels/gemm_s_f16/cta_iterator.h
+++ /dev/null
@@ -1,621 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved.
-
-#pragma once
-
-#include "common.h"
-#include <cstdint>
-
-namespace turbomind {
-
-#if (__CUDACC_VER_MAJOR__ >= 11) && (__CUDACC_VER_MINOR__ >= 4)
-#define L2_CACHEHINT(size) ".L2::" #size "B"
-#else
-#define L2_CACHEHINT(size)
-#endif
-
-template<typename T>
-__inline__ __device__ void cp_async_cg_A(uint32_t smem_int_ptr, const T* __restrict__ src, bool mask)
-{
-#if TURBOMIND_ARCH_SM80
-    constexpr int cp_size = sizeof(T);
-    static_assert(cp_size == 16, "cp.async.cg requreis cp_size == 16");
-    // clang-format off
-    asm volatile("{\n"
-                 "  .reg .pred p;\n"
-                 "  setp.ne.b32 p, %0, 0;\n"
-                 "  @p cp.async.cg.shared.global" L2_CACHEHINT(256) " [%1], [%2], %3;\n"
-                 "}\n" ::"r"((int)mask),
-                 "r"(smem_int_ptr),
-                 "l"(src),
-                 "n"(cp_size));
-    // clang-format on
-#else
-    assert(TURBOMIND_ARCH_SM80);
-#endif
-}
-
-template<typename T>
-__inline__ __device__ void cp_async_cg_B(uint32_t smem_int_ptr, const T* __restrict__ src, bool mask)
-{
-#if TURBOMIND_ARCH_SM80
-    constexpr int cp_size = sizeof(T);
-    static_assert(cp_size == 16, "cp.async.cg requreis cp_size == 16");
-    // clang-format off
-    asm volatile("{\n"
-                 "  .reg .pred p;\n"
-                 "  setp.ne.b32 p, %0, 0;\n"
-                 "  @p cp.async.cg.shared.global" L2_CACHEHINT(128) " [%1], [%2], %3;\n"
-                 "}\n" ::"r"((int)mask),
-                 "r"(smem_int_ptr),
-                 "l"(src),
-                 "n"(cp_size));
-    // clang-format on
-#else
-    assert(TURBOMIND_ARCH_SM80);
-#endif
-}
-
-template<typename T>
-__inline__ __device__ void cp_async_ca(uint32_t smem_int_ptr, const T* __restrict__ src, bool mask)
-{
-#if TURBOMIND_ARCH_SM80
-    constexpr int cp_size = sizeof(T);
-    // clang-format off
-    asm volatile("{\n"
-                 "  .reg .pred p;\n"
-                 "  setp.ne.b32 p, %0, 0;\n"
-                 "  @p cp.async.ca.shared.global" L2_CACHEHINT(128) " [%1], [%2], %3;\n"
-                 "}\n" ::"r"((int)mask),
-                 "r"(smem_int_ptr),
-                 "l"(src),
-                 "n"(cp_size));
-    // clang-format on
-#else
-    assert(TURBOMIND_ARCH_SM80);
-#endif
-}
-
-template<int WARPS, int CTA_M, int CTA_N, int CTA_K, int STAGES, int SLICES>
-struct IteratorA {
-    static constexpr int SLICE_K = CTA_K / SLICES;
-
-    using AccessType                 = uint4;
-    static constexpr int kAccessSize = sizeof(AccessType);
-
-    static_assert(CTA_M % 32 == 0 && CTA_K % 32 == 0, "A is pre-formatted as 32x32 tiles");
-
-    // A is [K/32, M/32, WARP_SIZE] uint4
-
-    static constexpr int kShapeM = CTA_M;
-    static constexpr int kShapeK = SLICE_K / 32;
-
-    // thread access shape
-    static constexpr int kAccessM = 1;
-    static constexpr int kAccessK = 1;
-
-    // warp thread arrangement
-    static constexpr int kWarpThreadC = 32;
-    static constexpr int kWarpThreadS = 1;
-
-    // warp shape per access
-    static constexpr int kWarpAccessM = kWarpThreadC * kAccessM;  // 32
-    static constexpr int kWarpAccessK = kWarpThreadS * kAccessK;  // 1
-
-    // warp access iterations
-    static constexpr int kWarpIterM = kShapeM / kWarpAccessM;
-    static constexpr int kWarpIterK = kShapeK / kWarpAccessK;
-
-    // warp arrangement
-    static constexpr int kWarpM = kWarpIterM >= WARPS ? WARPS : kWarpIterM;
-    static constexpr int kWarpK = WARPS > kWarpIterM ? (WARPS / kWarpM) : 1;
-
-    // iterations
-    static constexpr int kIterM = kWarpIterM / kWarpM;
-    static constexpr int kIterK = kWarpIterK / kWarpK;
-
-    static constexpr int kIterCount = kIterM * kIterK;
-    static_assert(kIterCount > 0);
-
-    // warp footprint
-    static constexpr int kWarpFootprintM = kWarpAccessM * kIterM;
-    static constexpr int kWarpFootprintK = kWarpAccessK * kIterK;
-
-    static constexpr int kSizePerStage = kShapeK * kShapeM;
-    static constexpr int kSmemByteSize = kAccessSize * STAGES * kSizePerStage;
-
-    const uint* src_;
-    AccessType* smem_;
-    uint32_t    smem_int_ptr_;
-
-    const int m_;
-    const int k_;
-
-    const int warp_id_;
-    const int lane_id_;
-
-    int src_offset_;
-    int dst_offset_;
-
-    int src_step_m_;
-    int src_step_k_;
-    int src_step_s_;
-
-    int dst_step_m_;
-    int dst_step_k_;
-    int dst_step_s_;
-
-    int iter_m_{0};
-
-    IteratorA() = default;
-
-    __device__ IteratorA(const uint* src, void* smem, int m, int k, int cta_m, int cta_k, int warp_id, int lane_id):
-        src_(src),
-        smem_((AccessType*)smem),
-        smem_int_ptr_(cast_smem_ptr_to_uint(smem)),
-        m_(m),
-        k_(k),
-        warp_id_(warp_id),
-        lane_id_(lane_id)
-    {
-        const int warp_offset_m = warp_id_ % kWarpM;
-        const int warp_offset_k = warp_id_ / kWarpM;
-
-        const int warp_thread_offset_m = lane_id_ % kWarpThreadC;
-        const int warp_thread_offset_k = lane_id_ / kWarpThreadC;
-
-        const int cta_thread_offset_m = kWarpFootprintM * warp_offset_m + warp_thread_offset_m * kAccessM;
-        const int cta_thread_offset_k = kWarpFootprintK * warp_offset_k + warp_thread_offset_k * kAccessK;
-
-        const int src_offset_m = cta_thread_offset_m + cta_m;
-        const int src_offset_k = cta_thread_offset_k + cta_k / 32;
-
-        src_offset_ = src_offset_k * m_ + src_offset_m;
-        src_step_m_ = kWarpAccessM;
-        src_step_k_ = kWarpAccessK * m_ - kIterM * kWarpAccessM;
-        src_step_s_ = CTA_K / 32 * m_ - kIterK * kWarpAccessK * m_;
-
-        const int dst_offset_m = cta_thread_offset_m;
-        const int dst_offset_k = cta_thread_offset_k;
-
-        dst_offset_ = dst_offset_k * kShapeM + dst_offset_m;
-        dst_step_m_ = kWarpAccessM;
-        dst_step_k_ = kWarpAccessK * kShapeM - kIterM * kWarpAccessM;
-        dst_step_s_ = SLICE_K / 32 * kShapeM - kIterK * kWarpAccessK * kShapeM;
-
-        dst_offset_ *= kAccessSize;
-        dst_step_m_ *= kAccessSize;
-        dst_step_k_ *= kAccessSize;
-        dst_step_s_ *= kAccessSize;
-    }
-
-    __device__ void prefetch_stage(bool mask)
-    {
-        PRAGMA_UNROLL
-        for (int i = 0; i < kIterCount; ++i) {
-            prefetch(mask);
-            ++(*this);
-        }
-        next_stage();
-    }
-
-    __device__ void prefetch_batch(int batch_idx, int batch_size, bool mask)
-    {
-        PRAGMA_UNROLL
-        for (int i = 0; i < batch_size; ++i) {
-            if (batch_idx * batch_size + i < kIterCount) {
-                prefetch(mask);
-                ++(*this);
-            }
-        }
-    }
-
-    __device__ IteratorA& operator++()
-    {
-        src_offset_ += src_step_m_;
-        dst_offset_ += dst_step_m_;
-        ++iter_m_;
-        if (iter_m_ < kIterM) {
-            return *this;
-        }
-        iter_m_ = 0;
-        src_offset_ += src_step_k_;
-        dst_offset_ += dst_step_k_;
-
-        return *this;
-    }
-
-    __device__ void next_stage()
-    {
-        src_offset_ += src_step_s_;
-        dst_offset_ += dst_step_s_;
-
-        if (dst_offset_ >= kSmemByteSize) {
-            dst_offset_ -= kSmemByteSize;
-        }
-    }
-
-    __device__ void prefetch(bool mask)
-    {
-        cp_async_cg_A(smem_int_ptr_ + dst_offset_, (const AccessType*)src_ + src_offset_, mask);
-    }
-};
-
-template<int WARPS, int CTA_M, int CTA_N, int CTA_K, int STAGES, int SLICES, int GROUP_SIZE>
-struct IteratorQ {
-    static constexpr int SLICE_K = CTA_K / SLICES;
-
-    using AccessType                 = uint;
-    static constexpr int kAccessSize = sizeof(AccessType);
-
-    static constexpr int kAccessM = kAccessSize / sizeof(half2);
-    static constexpr int kAccessK = GROUP_SIZE;
-
-    // warp thread arrangement
-    static constexpr int kWarpThreadC = 32;
-    static constexpr int kWarpThreadS = 1;
-
-    // warp shape per access
-    static constexpr int kWarpAccessM = kWarpThreadC * kAccessM;  // 32
-    static constexpr int kWarpAccessK = kWarpThreadS * kAccessK;  // GROUP_SIZE
-
-    // warp access iterations
-    static constexpr int kWarpIterM = CTA_M / kWarpAccessM;    // CTA_M / 32
-    static constexpr int kWarpIterK = SLICE_K / kWarpAccessK;  // SLICE_K / GROUP_SIZE, maybe 0
-
-    // kWarpIterK == 0 => SLICE_K < kWarpAccessK => kIterK == 1
-
-    // warp arrangement
-    static constexpr int kWarpM = kWarpIterM >= WARPS ? WARPS : kWarpIterM;
-    static constexpr int kWarpK = WARPS > kWarpIterM ? WARPS / kWarpM : 1;
-
-    // iterations
-    static constexpr int kIterM     = kWarpIterM / kWarpM;
-    static constexpr int kIterK     = kWarpIterK >= kWarpK ? kWarpIterK / kWarpK : 1;
-    static constexpr int kIterCount = kIterM * kIterK;
-
-    // warp footprint
-    static constexpr int kWarpFootprintM = kWarpAccessM * kIterM;
-    static constexpr int kWarpFootprintK = kWarpAccessK * kIterK;
-
-    static constexpr int kSizePerStage = std::max(SLICE_K / GROUP_SIZE, 1) * CTA_M;
-    static constexpr int kSmemByteSize = sizeof(uint) * STAGES * kSizePerStage;
-
-    const half2* const src_;
-    half2* const       smem_;
-    uint32_t const     smem_int_ptr_;
-
-    const int m_;
-    const int k_;
-
-    bool is_out_of_bound_;  // mask for out-of-bound warps
-
-    int src_offset_k_;
-    int src_offset_m_;
-
-    int src_offset_;
-    int src_step_m_;
-    int src_step_k_;
-
-    int dst_offset_;
-    int dst_step_m_;
-    int dst_step_k_;
-
-    int tmp_src_offset_;
-    int tmp_dst_offset_;
-
-    int iter_m_{0};
-
-    struct Storage {
-        half2 data[SLICES][STAGES * kSizePerStage];
-    };
-
-    IteratorQ() = default;
-
-    __device__ IteratorQ(const half2* src, half2* smem, int m, int k, int cta_m, int cta_k, int warp_id, int lane_id):
-        src_(src), smem_(smem), smem_int_ptr_(cast_smem_ptr_to_uint(smem)), m_(m), k_(k)
-    {
-        const int warp_offset_m = warp_id % kWarpM;
-        const int warp_offset_k = warp_id / kWarpM;
-
-        const int warp_thread_offset_m = lane_id % kWarpThreadC;
-        const int warp_thread_offset_k = lane_id / kWarpThreadC;
-
-        const int cta_thread_offset_m = kWarpFootprintM * warp_offset_m + warp_thread_offset_m * kAccessM;
-        const int cta_thread_offset_k = kWarpFootprintK * warp_offset_k + warp_thread_offset_k * kAccessK;
-
-        // mask out-of-bound warps
-        is_out_of_bound_ = cta_thread_offset_k >= SLICE_K;
-
-        src_offset_m_ = cta_thread_offset_m + cta_m;
-        src_offset_k_ = cta_thread_offset_k + cta_k;
-
-        src_offset_ = src_offset_k_ / GROUP_SIZE * m_ + src_offset_m_;
-        src_step_m_ = kWarpAccessM;
-        src_step_k_ = m_ - kIterM * kWarpAccessM;  // valid only when SLICE_K >= GROUP_SIZE
-
-        const int dst_offset_m = cta_thread_offset_m;
-        const int dst_offset_k = cta_thread_offset_k;
-
-        dst_offset_ = dst_offset_k / GROUP_SIZE * CTA_M + dst_offset_m;
-        dst_step_m_ = kWarpAccessM;
-        dst_step_k_ = CTA_M - kIterM * kWarpAccessM;  // valid only when SLICE_K >= GROUP_SIZE
-
-        dst_offset_ *= kAccessSize;
-        dst_step_m_ *= kAccessSize;
-        dst_step_k_ *= kAccessSize;
-
-        tmp_src_offset_ = src_offset_;
-        tmp_dst_offset_ = dst_offset_;
-    }
-
-    __device__ void prefetch_stage(bool mask)
-    {
-        if (is_out_of_bound_) {
-            return;
-        }
-
-        PRAGMA_UNROLL
-        for (int i = 0; i < kIterCount; ++i) {
-            prefetch(mask);
-            ++(*this);
-        }
-        next_stage();
-    }
-
-    __device__ void prefetch_batch(int batch_idx, int batch_size, bool mask)
-    {
-        if (is_out_of_bound_) {
-            return;
-        }
-
-        PRAGMA_UNROLL
-        for (int i = 0; i < batch_size; ++i) {
-            if (batch_idx * batch_size + i < kIterCount) {
-                prefetch(mask);
-                ++(*this);
-            }
-        }
-    }
-
-    __device__ IteratorQ& operator++()
-    {
-        ++iter_m_;
-
-        src_offset_ += src_step_m_;
-        dst_offset_ += dst_step_m_;
-        if (iter_m_ < kIterM) {
-            return *this;
-        }
-
-        iter_m_ = 0;
-
-        if constexpr (SLICE_K >= GROUP_SIZE) {
-            src_offset_ += src_step_k_;
-            dst_offset_ += dst_step_k_;
-        }
-        // else advnace offsets in `next_stage`
-
-        return *this;
-    }
-
-    __device__ void next_stage()
-    {
-        if constexpr (SLICE_K >= GROUP_SIZE) {
-            src_offset_ += (CTA_K / GROUP_SIZE - kIterK) * m_;
-            dst_offset_ += kAccessSize * (SLICE_K / GROUP_SIZE - kIterK) * CTA_M;
-        }
-        else {  // SLICE_K < GROUP_SIZE, recompute `src_offset_`
-            src_offset_k_ += CTA_K;
-            src_offset_ = (src_offset_k_ / GROUP_SIZE) * m_ + src_offset_m_;
-            dst_offset_ += dst_step_k_;
-        }
-
-        if (dst_offset_ >= kSmemByteSize) {
-            dst_offset_ -= kSmemByteSize;
-        }
-    }
-
-    __device__ void prefetch(bool mask)
-    {
-        cp_async_ca(smem_int_ptr_ + dst_offset_, (const AccessType*)src_ + src_offset_, mask);
-    }
-};
-
-template<int WARPS, int CTA_M, int CTA_N, int CTA_K, int STAGES, int SLICES>
-struct IteratorB {
-
-    static constexpr int SLICE_K      = CTA_K / SLICES;
-    static constexpr int kElementSize = sizeof(half);
-    using AccessType                  = uint4;
-    static constexpr int kAccessSize  = sizeof(AccessType);
-
-    static constexpr int kShapeK = SLICE_K;
-    static constexpr int kShapeN = CTA_N;
-
-    static constexpr int kAccessK = kAccessSize / sizeof(half);
-
-    static_assert(kShapeK % kAccessSize == 0);
-
-    // warp thread arrangement
-    static constexpr int kWarpThreadC = std::max(kShapeK / kAccessK, 1);
-    static constexpr int kWarpThreadS = WARP_SIZE / kWarpThreadC;
-
-    // warp shape per access
-    static constexpr int kWarpAccessK = kWarpThreadC * kAccessK;
-    static constexpr int kWarpAccessN = kWarpThreadS;
-
-    // warp access iterations
-    static constexpr int kWarpIterK = kShapeK / kWarpAccessK;
-    static constexpr int kWarpIterN = kShapeN / kWarpAccessN;
-
-    // warp arrangement
-    static constexpr int kWarpK = kWarpIterK >= WARPS ? WARPS : kWarpIterK;
-    static constexpr int kWarpN = WARPS > kWarpIterK ? WARPS / kWarpK : 1;
-
-    // iterations
-    static constexpr int kIterK = kWarpIterK / kWarpK;
-    static constexpr int kIterN = kWarpIterN >= kWarpN ? kWarpIterN / kWarpN : 1;
-
-    static constexpr int kIterCount = kIterK * kIterN;
-    static_assert(kIterCount > 0);
-
-    // warp footprint
-    static constexpr int kWarpFootprintK = kWarpAccessK * kIterK;
-    static constexpr int kWarpFootprintN = kWarpAccessN * kIterN;
-
-    // Eliminate bank-conflicts for 8x4 half2 tiles, watch out for misalignment
-    static constexpr int kSmemPadCtaK  = SLICE_K + 8;
-    static constexpr int kSizePerTile  = CTA_N * kSmemPadCtaK;
-    static constexpr int kSmemByteSize = kElementSize * STAGES * kSizePerTile;
-
-    const half*       src_;
-    AccessType* const smem_;  // [CTA_N, SLICE_K + 8]
-    const uint32_t    smem_int_ptr_;
-    const int         k_;
-    const int         n_;
-    const int         cta_n_;
-    const int         warp_id_;
-    const int         lane_id_;
-    const int         c_;
-    const int         s_;
-
-    int src_offset_n_;
-
-    int src_offset_;
-    int dst_offset_;
-
-    int  src_step_k_;
-    int  src_step_n_;
-    int  dst_step_k_;
-    int  dst_step_n_;
-    bool is_valid_n_;
-
-    int tmp_src_offset_;
-    int tmp_dst_offset_;
-    int tmp_src_offset_n_;
-
-    int iter_k_{0};
-    int iter_n_{0};
-
-    IteratorB() = default;
-
-    __device__ IteratorB(const half* src, void* smem, int k, int n, int cta_n, int cta_k, int warp_id, int lane_id):
-        src_(src),
-        smem_((AccessType*)smem),
-        smem_int_ptr_(cast_smem_ptr_to_uint(smem)),
-        k_(k),
-        n_(n),
-        cta_n_(cta_n),
-        warp_id_(warp_id),
-        lane_id_(lane_id),
-        c_(lane_id_ % kWarpThreadC),
-        s_(lane_id_ / kWarpThreadC)
-    {
-
-        const int warp_offset_k = warp_id_ % kWarpK;
-        const int warp_offset_n = warp_id_ / kWarpK;
-
-        const int warp_thread_offset_k = lane_id_ % kWarpThreadC;
-        const int warp_thread_offset_n = lane_id_ / kWarpThreadC;
-
-        const int cta_thread_offset_k = kWarpFootprintK * warp_offset_k + warp_thread_offset_k * kAccessK;
-        const int cta_thread_offset_n = kWarpFootprintN * warp_offset_n + warp_thread_offset_n;
-
-        const int src_offset_k = cta_thread_offset_k + cta_k;
-        src_offset_n_          = cta_thread_offset_n + cta_n_;
-
-        src_offset_ = src_offset_n_ * k_ + src_offset_k;
-
-        const int dst_offset_k = cta_thread_offset_k;
-        const int dst_offset_n = cta_thread_offset_n;
-
-        dst_offset_ = dst_offset_n * kSmemPadCtaK + dst_offset_k;
-
-        src_step_k_ = kWarpAccessK;
-        src_step_n_ = kWarpAccessN * k_ - kIterK * kWarpAccessK;
-
-        dst_step_k_ = kWarpAccessK;
-        dst_step_n_ = kWarpAccessN * kSmemPadCtaK - kIterK * kWarpAccessK;
-
-        dst_offset_ *= kElementSize;
-        dst_step_k_ *= kElementSize;
-        dst_step_n_ *= kElementSize;
-
-        tmp_src_offset_   = src_offset_;
-        tmp_dst_offset_   = dst_offset_;
-        tmp_src_offset_n_ = src_offset_n_;
-        is_valid_n_       = tmp_src_offset_n_ < n_;
-    }
-
-    __device__ void prefetch_stage(bool mask)
-    {
-
-        PRAGMA_UNROLL
-        for (int i = 0; i < kIterCount; ++i) {
-            prefetch(mask);
-            ++(*this);
-        }
-        next_stage();
-    }
-
-    __device__ void prefetch_batch(int batch_idx, int batch_size, bool mask)
-    {
-        PRAGMA_UNROLL
-        for (int i = 0; i < batch_size; ++i) {
-            if (batch_idx * batch_size + i < kIterCount) {
-                prefetch(mask);
-                ++(*this);
-            }
-        }
-    }
-
-    __device__ IteratorB& operator++()
-    {
-        if (!is_valid_n_) {
-            return *this;
-        }
-
-        // move to next k
-        tmp_src_offset_ += src_step_k_;
-        tmp_dst_offset_ += dst_step_k_;
-        ++iter_k_;
-        if (iter_k_ < kIterK) {
-            return *this;
-        }
-
-        // move to next n
-        iter_k_ = 0;
-        tmp_src_offset_n_ += kWarpAccessN;
-        tmp_src_offset_ += src_step_n_;
-        tmp_dst_offset_ += dst_step_n_;
-        is_valid_n_ = tmp_src_offset_n_ < n_;
-        ++iter_n_;
-
-        return *this;
-    }
-
-    __device__ void next_stage()
-    {
-        iter_n_ = 0;
-
-        src_offset_ += CTA_K;
-        dst_offset_ += kElementSize * kSizePerTile;
-        if (dst_offset_ >= kSmemByteSize) {
-            dst_offset_ -= kSmemByteSize;
-        }
-
-        tmp_src_offset_   = src_offset_;
-        tmp_dst_offset_   = dst_offset_;
-        tmp_src_offset_n_ = src_offset_n_;
-
-        is_valid_n_ = tmp_src_offset_n_ < n_;
-    }
-
-    __device__ void prefetch(bool mask)
-    {
-        cp_async_cg_B(
-            smem_int_ptr_ + tmp_dst_offset_, (const AccessType*)(src_ + tmp_src_offset_), is_valid_n_ && mask);
-    }
-};
-
-}  // namespace turbomind
diff --git a/src/turbomind/kernels/gemm_s_f16/format.cu b/src/turbomind/kernels/gemm_s_f16/format.cu
deleted file mode 100644
index c64548d8bfeabd2225b3bfc0201cc5389ce9d3f1..0000000000000000000000000000000000000000
--- a/src/turbomind/kernels/gemm_s_f16/format.cu
+++ /dev/null
@@ -1,144 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved.
-
-#include "common.h"
-#include <iostream>
-
-namespace turbomind {
-
-__device__ void atomic_assign_u4(uint32_t* address, uint32_t index, uint32_t value)
-{
-    uint32_t old = *address;
-    uint32_t assumed;
-    do {
-        assumed      = old;
-        uint32_t tmp = (assumed & ~(0xfu << (index * 4u))) | (value << (index * 4u));
-        old          = atomicCAS(address, assumed, tmp);
-    } while (assumed != old);
-}
-
-__device__ uint32_t read_u4(const uint32_t* address, uint32_t index)
-{
-    return (*address >> (index * 4u)) & 0xfu;
-}
-
-template<int... Ds>
-__global__ void permute_u4(uint* dst, const uint* src, Array<int, sizeof...(Ds)> dims)
-{
-    constexpr int N = sizeof...(Ds);
-
-    size_t count = 1;
-    PRAGMA_UNROLL
-    for (int i = 0; i < N; ++i) {
-        count *= dims[i];
-    }
-
-    constexpr int order[] = {Ds...};
-
-    for (int i = threadIdx.x + blockDim.x * blockIdx.x; i < count; i += blockDim.x * gridDim.x) {
-
-        int indices[N]{};
-
-        PRAGMA_UNROLL
-        for (int j = N - 1, ii = i; j >= 0; --j) {
-            indices[j] = ii % dims[j];
-            ii /= dims[j];
-        }
-
-        auto data = read_u4(src + i / 8, i % 8);
-
-        int index = 0;
-
-        PRAGMA_UNROLL
-        for (int j = N - 1, stride = 1; j >= 0; --j) {
-            index += indices[order[j]] * stride;
-            stride *= dims[order[j]];
-        }
-
-        atomic_assign_u4(dst + index / 8, index % 8, data);
-    }
-}
-
-void reformat_s4_k8_m(uint32_t* dst, const uint32_t* src, int m, int k, cudaStream_t st)
-{
-    // permutation for [k/8, m] layout
-    Array<int, 10> shape{k / 32, 2, 2, m / 32, 2, 2, 8, 2, 2, 2};
-    //        |warp|  lane  | 2x2 |  a0-7  |
-    permute_u4<0, 3, 6, 8, 9, 1, 4, 7, 2, 5><<<512, 512, 0, st>>>(dst, src, shape);
-}
-
-void reformat_s4_k_m8(uint32_t* dst, const uint32_t* src, int m, int k, cudaStream_t st)
-{
-    // permutation for [k, m/8] layout
-    Array<int, 10> shape{k / 32, 2, 2, 4, 2, m / 32, 2, 2, 2, 4};
-    //        |warp|  lane  | 2x2 |  a0-7  |
-    permute_u4<0, 5, 9, 8, 3, 1, 6, 4, 2, 7><<<512, 512, 0, st>>>(dst, src, shape);
-}
-
-__global__ void dequantize_s4_offset_64(uint4* dst, const uint32_t* src, size_t count)
-{
-    for (int i = threadIdx.x + blockDim.x * blockIdx.x; i < count; i += blockDim.x * gridDim.x) {
-        dst[i] = dequantize_s4_to_fp16x2_v2(src[i]);
-    }
-}
-
-__global__ void merge_Q(half2* Q, const half* scales, const half* zeros, int count)
-{
-    for (int i = threadIdx.x + blockDim.x * blockIdx.x; i < count; i += blockDim.x * gridDim.x) {
-        if (TURBOMIND_S4_DEQUANT_USE_FMA) {
-            // dequant via HFMA2 has numerical statbility issue
-            Q[i] = __halves2half2(-zeros[i] * scales[i], scales[i]);
-        }
-        else {
-            Q[i] = __halves2half2(zeros[i], scales[i]);
-        }
-    }
-}
-
-void convert_s4_k_m8(uint32_t*       A_dst,
-                     half2*          Q_dst,
-                     half*           workspace,
-                     const uint32_t* A_src,
-                     const half*     scales,
-                     const uint32_t* qzeros,
-                     int             m,
-                     int             k,
-                     int             group_size,
-                     cudaStream_t    st)
-{
-    dequantize_s4_offset_64<<<256, 256, 0, st>>>((uint4*)workspace, qzeros, k / group_size * m / 8);
-
-    merge_Q<<<256, 256, 0, st>>>(Q_dst, scales, workspace, k / group_size * m);
-
-    reformat_s4_k_m8(A_dst, A_src, m, k, st);
-}
-
-void transpose_qk_s4_k_m8_hf(uint32_t* dst, const uint32_t* src, int m, int k, int size_per_head, cudaStream_t st)
-{
-    Array<int, 7> shape{k, m / size_per_head, 2, size_per_head / 2 / 8, 2, 2, 2};
-    //      dequant   transpose    quant
-    // 0123456 -> 0123564 -> 0135642 -> 0135264
-    permute_u4<0, 1, 3, 5, 2, 6, 4><<<512, 512, 0, st>>>(dst, src, shape);
-}
-
-// [2, k, m/8] -> [k, m/8, 2]
-void fuse_w1_w3_s4_k_m8(uint32_t* dst, const uint32_t* src, int m, int k, cudaStream_t st)
-{
-    Array<int, 6> shape{2, k, m / 8, 2, 2, 2};
-    //     dequant   transpose   quant
-    // 012345 -> 012453 -> 124530 -> 124053
-    permute_u4<1, 2, 4, 0, 5, 3><<<512, 512, 0, st>>>(dst, src, shape);
-}
-
-__global__ void dequantize_s4_kernel(uint4* dst, const uint* src, size_t count)
-{
-    for (int i = threadIdx.x + blockDim.x * blockIdx.x; i < count; i += blockDim.x * gridDim.x) {
-        dst[i] = dequantize_s4_to_fp16x2(src[i]);
-    }
-}
-
-void dequantize_s4(uint4* dst, const uint32_t* src, size_t count, cudaStream_t st)
-{
-    dequantize_s4_kernel<<<512, 512>>>(dst, src, count);
-}
-
-}  // namespace turbomind
diff --git a/src/turbomind/kernels/gemm_s_f16/format.h b/src/turbomind/kernels/gemm_s_f16/format.h
deleted file mode 100644
index 053d2e8687a251cdba06974e6bf228efb54d62b4..0000000000000000000000000000000000000000
--- a/src/turbomind/kernels/gemm_s_f16/format.h
+++ /dev/null
@@ -1,32 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved.
-
-#pragma once
-
-#include <cstdint>
-#include <cuda_fp16.h>
-#include <cuda_runtime.h>
-
-namespace turbomind {
-
-void reformat_s4_k8_m(uint32_t* dst, const uint32_t* src, int m, int k, cudaStream_t st = {});
-
-void reformat_s4_k_m8(uint32_t* dst, const uint32_t* src, int m, int k, cudaStream_t st = {});
-
-void convert_s4_k_m8(uint32_t*       A_dst,
-                     half2*          Q_dst,
-                     half*           workspace,
-                     const uint32_t* A_src,
-                     const half*     scales,
-                     const uint32_t* qzeros,
-                     int             m,
-                     int             k,
-                     int             group_size,
-                     cudaStream_t    st = {});
-
-void transpose_qk_s4_k_m8_hf(uint32_t* dst, const uint32_t* src, int m, int k, int size_per_head, cudaStream_t st = {});
-
-void fuse_w1_w3_s4_k_m8(uint32_t* dst, const uint32_t* src, int m, int k, cudaStream_t st = {});
-
-void dequantize_s4(uint4* dst, const uint32_t* src, size_t count, cudaStream_t st = {});
-
-}  // namespace turbomind
diff --git a/src/turbomind/kernels/gemm_s_f16/gemm_s4_f16.cu b/src/turbomind/kernels/gemm_s_f16/gemm_s4_f16.cu
deleted file mode 100644
index bc1a90d0f750e5487f193d0b549cec257a58ea22..0000000000000000000000000000000000000000
--- a/src/turbomind/kernels/gemm_s_f16/gemm_s4_f16.cu
+++ /dev/null
@@ -1,299 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved.
-
-#include "gemm_s4_f16.h"
-#include "gemm_s4_f16_kernel.h"
-#include "metric.h"
-#include <algorithm>
-#include <iomanip>
-#include <ios>
-#include <iostream>
-#include <limits>
-#include <numeric>
-#include <stdexcept>
-#include <tuple>
-#include <vector>
-
-namespace turbomind {
-
-bool g_dump_kernel_info_once = false;
-
-namespace ops {
-
-struct Identity {
-    static __inline__ __device__ void apply(uint data, int m, int n, half* C, int M, int N)
-    {
-        if (n < N) {
-            (uint&)C[n * M + m] = (uint&)data;
-        }
-    }
-};
-
-struct SiluActivation {
-    static __inline__ __device__ void apply(uint data, int m, int n, half* C, int M, int N)
-    {
-        auto  u    = __half22float2((half2&)data);
-        float silu = u.x / (1.f + __expf(-u.x));
-        half  val  = __float2half_rn(silu * u.y);
-
-        if (n < N) {
-            C[n * (M / 2) + m / 2] = val;
-        }
-    }
-};
-
-}  // namespace ops
-
-template<typename... Ts>
-struct OutputOps {
-
-    template<int index>
-    static __inline__ __device__ void apply(uint data, int m, int n, half* C, int M, int N)
-    {
-        std::tuple_element_t<index, std::tuple<Ts...>>::apply(data, m, n, C, M, N);
-    }
-};
-
-struct GemmS4F16::Impl {
-
-    using Kernels = std::vector<std::unique_ptr<IGemmKernel>>;
-
-    template<int GS, typename Op>
-    void Generate(std::vector<Kernels>& kernels)
-    {
-        // smem size (KB):
-        // sm75: 64
-        // sm80: 163
-        // sm86: 99
-        // sm89: 99
-        // sm90: 227
-
-        Kernels k;
-
-        // 256
-        k.emplace_back(new GemmKernel<Shape<256, 128, 32>, Shape<32, 128, 32>, 3, GS, Op>{});
-        k.emplace_back(new GemmKernel<Shape<256, 64, 64>, Shape<64, 64, 32>, 3, GS, Op>{});
-        k.emplace_back(new GemmKernel<Shape<256, 64, 32>, Shape<64, 64, 32>, 3, GS, Op>{});
-        k.emplace_back(new GemmKernel<Shape<256, 32, 64>, Shape<64, 32, 32>, 3, GS, Op>{});
-        k.emplace_back(new GemmKernel<Shape<256, 16, 256>, Shape<32, 16, 128>, 3, GS, Op>{});
-        k.emplace_back(new GemmKernel<Shape<256, 8, 256>, Shape<32, 8, 128>, 3, GS, Op>{});
-
-        // 128
-        k.emplace_back(new GemmKernel<Shape<128, 128, 64>, Shape<32, 128, 32>, 3, GS, Op>{});
-        k.emplace_back(new GemmKernel<Shape<128, 128, 32>, Shape<32, 128, 32>, 3, GS, Op>{});
-        k.emplace_back(new GemmKernel<Shape<128, 96, 64>, Shape<32, 96, 32>, 3, GS, Op>{});
-        k.emplace_back(new GemmKernel<Shape<128, 64, 64>, Shape<32, 64, 32>, 3, GS, Op>{});
-        k.emplace_back(new GemmKernel<Shape<128, 64, 32>, Shape<32, 64, 32>, 3, GS, Op>{});
-        k.emplace_back(new GemmKernel<Shape<128, 32, 128>, Shape<32, 32, 64>, 3, GS, Op>{});
-        k.emplace_back(new GemmKernel<Shape<128, 16, 256>, Shape<32, 16, 64>, 3, GS, Op>{});
-        k.emplace_back(new GemmKernel<Shape<128, 8, 512>, Shape<32, 8, 128>, 3, GS, Op>{});
-        k.emplace_back(new GemmKernel<Shape<128, 8, 512>, Shape<32, 8, 128>, 2, GS, Op>{});  // for 86/89
-
-        // 64
-        k.emplace_back(new GemmKernel<Shape<64, 16, 256>, Shape<32, 16, 32>, 3, GS, Op>{});
-        k.emplace_back(new GemmKernel<Shape<64, 8, 256>, Shape<32, 8, 32>, 3, GS, Op>{});
-
-        kernels.push_back(std::move(k));
-    }
-
-    void Measure(half*                 C,
-                 const uint*           A,
-                 const half*           B,
-                 const half2*          Q,
-                 int                   m,
-                 int                   n,
-                 int                   k,
-                 int                   group_size,
-                 Type                  type,
-                 std::vector<Metric>&  metrics,
-                 cudaStream_t          st,
-                 std::vector<Kernels>& _kernels)
-    {
-        int gid = -1;
-        for (size_t i = 0; i < group_sizes_.size(); ++i) {
-            if (group_sizes_[i] == group_size) {
-                gid = i;
-                break;
-            }
-        }
-        if (gid < 0) {
-            throw std::runtime_error("unsupported group size");
-        }
-        const auto& kernels = _kernels[gid];
-        metrics             = std::vector<Metric>(kernels.size());
-
-        int best = 0;
-
-        for (size_t i = 0; i < kernels.size(); ++i) {
-            metrics[i].id = i;
-            kernels[i]->GetMetric(metrics[i], m, n, k);
-            if (!metrics[i].feasible) {
-                metrics[i].time  = std::numeric_limits<float>::infinity();
-                metrics[i].count = 1;
-                continue;
-            }
-            if (Compare(metrics[i], metrics[best])) {
-                best = i;
-            }
-            for (size_t j = 0; j < kWarmup + kMeasure; ++j) {
-                if (j == kWarmup) {
-                    cudaEventRecord(ev_start_, st);
-                }
-                kernels[i]->Launch(C, A, B, Q, m, n, k, type, st);
-            }
-            cudaEventRecord(ev_end_, st);
-            cudaEventSynchronize(ev_end_);
-            float ms{};
-            cudaEventElapsedTime(&ms, ev_start_, ev_end_);
-            metrics[i].time  = ms;
-            metrics[i].count = kMeasure;
-        }
-
-        metrics[best].best = 1;
-
-        // sort metrics
-        std::vector<int> indices(kernels.size());
-        std::iota(indices.begin(), indices.end(), 0);
-        std::stable_sort(
-            indices.begin(), indices.end(), [&](int i, int j) { return metrics[i].time < metrics[j].time; });
-
-        if (g_dump_kernel_info_once) {
-            DumpMetrics(std::cerr, metrics, indices);
-            g_dump_kernel_info_once = 0;
-        }
-
-        std::vector<Metric> tmp;
-        for (size_t i = 0; i < indices.size(); ++i) {
-            tmp.push_back(metrics[indices[i]]);
-        }
-        metrics.swap(tmp);
-    }
-
-    static bool Compare(const Metric& a, const Metric& b)
-    {
-        if (a.feasible != b.feasible) {
-            return a.feasible > b.feasible;
-        }
-
-        if (a.prefer != b.prefer) {
-            return a.prefer > b.prefer;
-        }
-
-        return a.grid_norm < b.grid_norm;
-    }
-
-    int Estimate(int m, int n, int k, Kernels& kernels)
-    {
-        int                 best = 0;
-        std::vector<Metric> metrics(kernels.size());
-        for (size_t i = 0; i < kernels.size(); ++i) {
-            metrics[i].id = i;
-            kernels[i]->GetMetric(metrics[i], m, n, k);
-            if (Compare(metrics[i], metrics[best])) {
-                best = i;
-            }
-        }
-
-        if (g_dump_kernel_info_once) {
-            std::vector<int> indices(kernels.size());
-            std::iota(indices.begin(), indices.end(), 0);
-            std::stable_sort(
-                indices.begin(), indices.end(), [&](int i, int j) { return Compare(metrics[i], metrics[j]); });
-            DumpMetrics(std::cerr, metrics, indices);
-            g_dump_kernel_info_once = 0;
-        }
-
-        return best;
-    }
-
-    void Run(half*                 C,
-             const uint*           A,
-             const half*           B,
-             const half2*          Q,
-             int                   m,
-             int                   n,
-             int                   k,
-             int                   group_size,
-             Type                  type,
-             int                   algo_id,
-             cudaStream_t          st,
-             std::vector<Kernels>& kernels)
-    {
-        for (size_t i = 0; i < group_sizes_.size(); ++i) {
-            if (group_sizes_[i] == group_size) {
-                if (algo_id < 0) {
-                    algo_id = Estimate(m, n, k, kernels[i]);
-                }
-                if (algo_id < 0) {
-                    throw std::runtime_error("no feasible kernel found");
-                }
-                kernels[i].at(algo_id)->Launch(C, A, B, Q, m, n, k, type, st);
-                return;
-            }
-        }
-        throw std::runtime_error("unsupported group size");
-    }
-
-    Impl()
-    {
-        cudaEventCreate(&ev_start_);
-        cudaEventCreate(&ev_end_);
-
-        using Ops = OutputOps<ops::Identity, ops::SiluActivation>;
-
-        /// TODO: add more group sizes
-        Generate<128, Ops>(kernels_);
-        group_sizes_.push_back(128);
-    }
-
-    ~Impl()
-    {
-        cudaEventDestroy(ev_end_);
-        cudaEventDestroy(ev_start_);
-    }
-
-    std::vector<Kernels> kernels_;
-
-    std::vector<int> group_sizes_;
-
-    static constexpr int kWarmup  = 10;
-    static constexpr int kMeasure = 100;
-
-    cudaEvent_t ev_start_{};
-    cudaEvent_t ev_end_{};
-};
-
-GemmS4F16::GemmS4F16(): impl_(std::make_unique<Impl>()) {}
-
-GemmS4F16::~GemmS4F16() = default;
-
-void GemmS4F16::Measure(half*                C,
-                        const uint*          A,
-                        const half*          B,
-                        const half2*         Q,
-                        int                  m,
-                        int                  n,
-                        int                  k,
-                        int                  group_size,
-                        Type                 type,
-                        std::vector<Metric>& metrics,
-                        cudaStream_t         st)
-{
-    impl_->Measure(C, A, B, Q, m, n, k, group_size, type, metrics, st, impl_->kernels_);
-}
-
-void GemmS4F16::Run(half*        C,
-                    const uint*  A,
-                    const half*  B,
-                    const half2* Q,
-                    int          m,
-                    int          n,
-                    int          k,
-                    int          group_size,
-                    Type         type,
-                    int          algo_id,
-                    cudaStream_t st)
-{
-    impl_->Run(C, A, B, Q, m, n, k, group_size, type, algo_id, st, impl_->kernels_);
-}
-
-}  // namespace turbomind
diff --git a/src/turbomind/kernels/gemm_s_f16/gemm_s4_f16.h b/src/turbomind/kernels/gemm_s_f16/gemm_s4_f16.h
deleted file mode 100644
index b8f58b304f9664f2ce4d5e04d9a605d700b91fb1..0000000000000000000000000000000000000000
--- a/src/turbomind/kernels/gemm_s_f16/gemm_s4_f16.h
+++ /dev/null
@@ -1,58 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved.
-
-#pragma once
-
-#include "metric.h"
-#include "src/turbomind/macro.h"
-#include <cuda_fp16.h>
-#include <cuda_runtime.h>
-
-#include <memory>
-#include <vector>
-
-namespace turbomind {
-
-extern bool g_dump_kernel_info_once;
-
-class GemmS4F16 {
-public:
-    GemmS4F16();
-
-    ~GemmS4F16();
-
-    enum Type
-    {
-        kGemm,
-        kFusedSiluFfn
-    };
-
-    void Measure(half*                C,
-                 const uint*          A,
-                 const half*          B,
-                 const half2*         Q,
-                 int                  m,
-                 int                  n,
-                 int                  k,
-                 int                  group_size,
-                 Type                 type,
-                 std::vector<Metric>& metrics,
-                 cudaStream_t         st);
-
-    void Run(half*        C,
-             const uint*  A,
-             const half*  B,
-             const half2* Q,
-             int          m,
-             int          n,
-             int          k,
-             int          group_size,
-             Type         type,
-             int          algo_id,
-             cudaStream_t st);
-
-private:
-    struct Impl;
-    std::unique_ptr<Impl> impl_;
-};
-
-}  // namespace turbomind
diff --git a/src/turbomind/kernels/gemm_s_f16/gemm_s4_f16_kernel.h b/src/turbomind/kernels/gemm_s_f16/gemm_s4_f16_kernel.h
deleted file mode 100644
index 0f6fc61c8c50511133f91a8d108703aeacb2275d..0000000000000000000000000000000000000000
--- a/src/turbomind/kernels/gemm_s_f16/gemm_s4_f16_kernel.h
+++ /dev/null
@@ -1,203 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved.
-
-#pragma once
-
-#include "gemm_template.h"
-
-#include "metric.h"
-#include <iostream>
-#include <memory>
-#include <sstream>
-
-namespace turbomind {
-
-struct IGemmKernel {
-
-    virtual ~IGemmKernel() = default;
-
-    virtual void GetMetric(Metric& metric, int m, int n, int k) = 0;
-
-    virtual void Launch(half*        C,
-                        const uint*  A,
-                        const half*  B,
-                        const half2* Q,
-                        int          M,
-                        int          N,
-                        int          K,
-                        int          output_op_idx,
-                        cudaStream_t) = 0;
-
-    virtual void Dump(std::ostream& os) = 0;
-};
-
-template<typename CtaShape, typename WarpShape, int Stages, int GroupSize, typename OutputOps>
-struct GemmKernel: public IGemmKernel {
-
-    static constexpr CtaShape  cta_shape{};
-    static constexpr WarpShape warp_shape{};
-
-    using GemmType = Gemm<cta_shape.m(),
-                          cta_shape.n(),
-                          cta_shape.k(),
-                          warp_shape.m(),
-                          warp_shape.n(),
-                          warp_shape.k(),
-                          Stages,
-                          GroupSize,
-                          OutputOps>;
-
-    decltype(&gemm_s4_f16_nn<GemmType>) kernel_func_;
-    std::shared_ptr<cudaDeviceProp>     props_;
-    int                                 max_active_ctas_{};
-
-    static constexpr int kSlices       = GemmType::SLICES;
-    static constexpr int kSmemSizeA    = GemmType::IteratorA::kSmemByteSize * kSlices;
-    static constexpr int kSmemSizeB    = GemmType::IteratorB::kSmemByteSize * kSlices;
-    static constexpr int kSmemSizeC    = sizeof(float) * cta_shape.m() * cta_shape.n();
-    static constexpr int kSmemByteSize = std::max(kSmemSizeA + kSmemSizeB, kSmemSizeC);
-
-    // static shared memory size of Q
-    static constexpr int kSmemSizeQ = sizeof(typename GemmType::IteratorQ::Storage);
-
-    explicit GemmKernel(std::shared_ptr<cudaDeviceProp> props = {}): props_(std::move(props))
-    {
-        if (!props_) {
-            props_        = std::make_shared<cudaDeviceProp>();
-            int device_id = -1;
-            cudaGetDevice(&device_id);
-            cudaGetDeviceProperties(props_.get(), device_id);
-        }
-
-        kernel_func_ = gemm_s4_f16_nn<GemmType>;
-        cudaFuncSetAttribute(kernel_func_, cudaFuncAttributeMaxDynamicSharedMemorySize, kSmemByteSize);
-
-        cudaOccupancyMaxActiveBlocksPerMultiprocessor(
-            &max_active_ctas_, kernel_func_, GemmType::kWarpCount * WARP_SIZE, kSmemByteSize);
-    };
-
-    bool is_feasible(int m, int n, int k)
-    {
-        return m % cta_shape.m() == 0 && k % cta_shape.k() == 0;
-    }
-
-    void GetMetric(Metric& metric, int m, int n, int k) override
-    {
-        metric.cta_shape  = {cta_shape.m(), cta_shape.n(), cta_shape.k()};
-        metric.warp_shape = {warp_shape.m(), warp_shape.n(), warp_shape.k()};
-        metric.warps      = GemmType::kWarpCount;
-        metric.stages     = Stages;
-        metric.smem       = (kSmemByteSize + kSmemSizeQ) / 1024.f;
-
-        metric.feasible = is_feasible(m, n, k) && max_active_ctas_ > 0;
-
-        metric.prefer = cta_shape.m() != 64 || m <= k;
-
-        if (!metric.feasible) {
-            return;
-        }
-
-        int grid_size    = ((m + cta_shape.m() - 1) / cta_shape.m()) * ((n + cta_shape.n() - 1) / cta_shape.n());
-        metric.grid_size = grid_size;
-
-        metric.max_active_ctas = max_active_ctas_;
-
-        metric.active_ctas =
-            std::min(max_active_ctas_, (grid_size + props_->multiProcessorCount - 1) / props_->multiProcessorCount);
-
-        metric.waves     = (float)grid_size / (props_->multiProcessorCount * metric.active_ctas);
-        metric.occupancy = (metric.active_ctas * GemmType::kWarpCount)
-                           / (float)(props_->maxThreadsPerMultiProcessor / props_->warpSize);
-
-        metric.cta_cnt_m  = (m + cta_shape.m() - 1) / cta_shape.m();
-        metric.cta_cnt_n  = (n + cta_shape.n() - 1) / cta_shape.n();
-        metric.cta_iter_k = (k + cta_shape.k() - 1) / cta_shape.k();
-
-        metric.tile_efficiency = (float)n / (metric.cta_cnt_n * cta_shape.n());
-        metric.wave_efficiency = metric.waves / std::ceil(metric.waves);
-
-        const int m_pad = (m + cta_shape.m() - 1) / cta_shape.m() * cta_shape.m();
-        const int n_pad = (n + cta_shape.n() - 1) / cta_shape.n() * cta_shape.n();
-
-        metric.grid_a0  = 0.25f * m * n_pad / cta_shape.n();       // Ta0 *  M *  [N / ctaN]
-        metric.grid_b0  = 1.00f * n * m_pad / cta_shape.m();       // Tb0 *  N *  [M / ctaM]
-        metric.grid_a1  = 0.65f * m_pad * n_pad / warp_shape.n();  // Ta1 * [M] * [N] / warpN
-        metric.grid_b1  = 0.25f * m_pad * n_pad / warp_shape.m();  // Tb1 * [M] * [N] / warpM
-        metric.grid_mm  = 1.00f * m_pad * n_pad / 64;              // Tm * [M] * [N]
-        metric.grid_sum = metric.grid_a0 + metric.grid_b0 + metric.grid_a1 + metric.grid_b1 + metric.grid_mm;
-
-        metric.cta_sum = metric.grid_sum / grid_size;
-
-        metric.waves1 = (float)grid_size / (props_->multiProcessorCount * metric.active_ctas);
-
-        metric.cta_wave  = std::ceil(metric.waves1) * metric.active_ctas;
-        metric.grid_norm = metric.cta_wave * metric.cta_sum;
-    }
-
-    void Launch(
-        half* C, const uint* A, const half* B, const half2* Q, int M, int N, int K, int output_op_idx, cudaStream_t st)
-        override
-    {
-        constexpr int block_size = GemmType::kWarpCount * WARP_SIZE;
-
-        dim3 grid_size((M + cta_shape.m() - 1) / cta_shape.m(), (N + cta_shape.n() - 1) / cta_shape.n());
-
-        kernel_func_<<<grid_size, block_size, kSmemByteSize, st>>>(C, A, B, Q, M, N, K, output_op_idx);
-    }
-
-    void Dump(std::ostream& os) override
-    {
-        {
-            os << "[Gemm] CTA shape: " << cta_shape.m() << "x" << cta_shape.n() << "x" << cta_shape.k() << std::endl;
-            os << "[Gemm] warp shape: " << warp_shape.m() << "x" << warp_shape.n() << "x" << warp_shape.k()
-               << std::endl;
-            os << "[Gemm] warp count: " << GemmType::kWarpCountM << "x" << GemmType::kWarpCountN << "x"
-               << GemmType::kWarpCountK << " (" << GemmType::kWarpCount << ")" << std::endl;
-            os << std::endl;
-        }
-
-        {
-            using Iter = typename GemmType::IteratorA;
-            os << "[A] shape: " << Iter::kShapeM << " " << Iter::kShapeK << std::endl;
-            os << "[A] warp thread arrangement: " << Iter::kWarpThreadC << " " << Iter::kWarpThreadS << std::endl;
-            os << "[A] warp shape per access: " << Iter::kWarpAccessM << " " << Iter::kWarpAccessK << std::endl;
-            os << "[A] warp access iters: " << Iter::kWarpIterM << " " << Iter::kWarpIterK << std::endl;
-            os << "[A] warp arrangement: " << Iter::kWarpM << " " << Iter::kWarpK << std::endl;
-            os << "[A] iterations: " << Iter::kIterM << " " << Iter::kIterK << std::endl;
-            os << "[A] iters per tile: " << Iter::kIterCount << std::endl;
-            os << "[A] warp footprint: " << Iter::kWarpFootprintM << " " << Iter::kWarpFootprintK << std::endl;
-            os << "[A] shared memory: " << Iter::kSmemByteSize << std::endl;
-            os << std::endl;
-        }
-        {
-            using Iter = typename GemmType::IteratorB;
-            os << "[B] shape: " << Iter::kShapeK << " " << Iter::kShapeN << std::endl;
-            os << "[B] warp thread arrangement: " << Iter::kWarpThreadC << " " << Iter::kWarpThreadS << std::endl;
-            os << "[B] warp shape per access: " << Iter::kWarpAccessK << " " << Iter::kWarpAccessN << std::endl;
-            os << "[B] warp access iters: " << Iter::kWarpIterK << " " << Iter::kWarpIterN << std::endl;
-            os << "[B] warp arrangement: " << Iter::kWarpK << " " << Iter::kWarpN << std::endl;
-            os << "[B] iterations: " << Iter::kIterK << " " << Iter::kIterN << std::endl;
-            os << "[B] iters per tile: " << Iter::kIterCount << std::endl;
-            os << "[B] warp footprint: " << Iter::kWarpFootprintK << " " << Iter::kWarpFootprintN << std::endl;
-            os << "[B] shared memory: " << Iter::kSmemByteSize << std::endl;
-            os << std::endl;
-        }
-        {
-
-            using Iter = typename GemmType::IteratorQ;
-            // os << "[Q] shape: " << CTA_M << " " << Iter::SLICE_K << std::endl;
-            os << "[Q] warp thread arrangement: " << Iter::kWarpThreadC << " " << Iter::kWarpThreadS << std::endl;
-            os << "[Q] warp shape per access: " << Iter::kWarpAccessM << " " << Iter::kWarpAccessK << std::endl;
-            os << "[Q] warp access iters: " << Iter::kWarpIterM << " " << Iter::kWarpIterK << std::endl;
-            os << "[Q] warp arrangement: " << Iter::kWarpM << " " << Iter::kWarpK << std::endl;
-            os << "[Q] iterations: " << Iter::kIterM << " " << Iter::kIterK << std::endl;
-            os << "[Q] iters per tile: " << Iter::kIterCount << std::endl;
-            os << "[Q] warp footprint: " << Iter::kWarpFootprintM << " " << Iter::kWarpFootprintK << std::endl;
-            os << "[Q] size per stage: " << Iter::kSizePerStage << std::endl;
-            os << "[Q] shared memory: " << Iter::kSmemByteSize << std::endl;
-            os << std::endl;
-        }
-        os << "Dynamic shared memory size: " << kSmemByteSize << std::endl;
-    }
-};
-
-}  // namespace turbomind
diff --git a/src/turbomind/kernels/gemm_s_f16/gemm_template.h b/src/turbomind/kernels/gemm_s_f16/gemm_template.h
deleted file mode 100644
index a429ba8536c2d08e74d0964b0a42c8ca7fa50a74..0000000000000000000000000000000000000000
--- a/src/turbomind/kernels/gemm_s_f16/gemm_template.h
+++ /dev/null
@@ -1,446 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved.
-
-#pragma once
-
-#include "common.h"
-#include "cta_iterator.h"
-#include "warp_iterator.h"
-#include <cuda_pipeline_primitives.h>
-
-namespace turbomind {
-
-__inline__ __device__ void
-mma_m16n8k16_row_col(Array<float, 4>& d, const Array<half, 8>& a, const Array<half, 4>& b, Array<float, 4>& c)
-{
-#if TURBOMIND_ARCH_SM80
-    uint32_t const* A = reinterpret_cast<uint32_t const*>(&a);
-    uint32_t const* B = reinterpret_cast<uint32_t const*>(&b);
-    float const*    C = reinterpret_cast<float const*>(&c);
-    float*          D = reinterpret_cast<float*>(&d);
-    asm("mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32  {%0,%1,%2,%3}, "
-        "{%4,%5,%6,%7}, {%8,%9}, {%10,%11,%12,%13};\n"
-        : "=f"(D[0]), "=f"(D[1]), "=f"(D[2]), "=f"(D[3])
-        : "r"(A[0]), "r"(A[1]), "r"(A[2]), "r"(A[3]), "r"(B[0]), "r"(B[1]), "f"(C[0]), "f"(C[1]), "f"(C[2]), "f"(C[3]));
-#else
-    assert(TURBOMIND_ARCH_SM80);
-#endif
-}
-
-__inline__ __device__ uint transpose_m8n8_b16_warp_shuffle(uint value, int lane_id)
-{
-    int    src_lane = lane_id / 8 + lane_id % 4 * 8;
-    uint   u0       = __shfl_sync(0xffffffff, value, src_lane);
-    uint   u1       = __shfl_sync(0xffffffff, value, src_lane + 4);
-    short2 r;
-
-    if (lane_id % 8 < 4) {
-        r.x = ((short2&)u0).x;
-        r.y = ((short2&)u1).x;
-    }
-    else {
-        r.x = ((short2&)u0).y;
-        r.y = ((short2&)u1).y;
-    }
-    return (uint&)r;
-}
-
-#if (__CUDACC_VER_MAJOR__ >= 11) && (__CUDACC_VER_MINOR__ >= 8)
-__inline__ __device__ uint transpose_m8n8_b16_movmatrix(uint a)
-{
-#if TURBOMIND_ARCH_SM75
-    uint d;
-    asm("movmatrix.sync.aligned.m8n8.trans.b16 %0, %1;\n" : "=r"(d) : "r"(a));
-    return d;
-#else
-    assert(TURBOMIND_ARCH_SM75);
-    return 0;
-#endif
-}
-#endif
-
-__inline__ __device__ uint transpose_m8n8_b16(uint a, int lane_id)
-{
-
-#if (__CUDACC_VER_MAJOR__ >= 11) && (__CUDACC_VER_MINOR__ >= 8)
-    (void)lane_id;
-    return transpose_m8n8_b16_movmatrix(a);
-#else
-    return transpose_m8n8_b16_warp_shuffle(a, lane_id);
-#endif
-}
-
-namespace ops {
-
-__inline__ __device__ float4 operator+(const float4& a, const float4& b)
-{
-    return {a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w};
-}
-
-__inline__ __device__ float2 operator+(const float2& a, const float2& b)
-{
-    return {a.x + b.x, a.y + b.y};
-}
-
-}  // namespace ops
-
-template<int CTA_M,
-         int CTA_N,
-         int CTA_K,
-         int WARP_M,
-         int WARP_N,
-         int WARP_K,
-         int STAGES,
-         int GROUP_SIZE,
-         typename OutputOps>
-struct Gemm {
-
-    static constexpr int kWarpCountM = CTA_M / WARP_M;
-    static constexpr int kWarpCountN = CTA_N / WARP_N;
-    static constexpr int kWarpCountK = CTA_K / WARP_K;
-
-    static constexpr int kWarpCountMN = kWarpCountM * kWarpCountN;
-    static constexpr int kWarpCount   = kWarpCountMN * kWarpCountK;
-
-    static constexpr int SLICES  = kWarpCountK;
-    static constexpr int SLICE_K = CTA_K / SLICES;
-
-    static_assert(SLICE_K % WARP_K == 0, "infeasible sliced-k setting");
-
-    using IteratorA = turbomind::IteratorA<kWarpCountMN, CTA_M, CTA_N, CTA_K, STAGES, SLICES>;
-    using IteratorQ = turbomind::IteratorQ<kWarpCountMN, CTA_M, CTA_N, CTA_K, STAGES, SLICES, GROUP_SIZE>;
-    using IteratorB = turbomind::IteratorB<kWarpCountMN, CTA_M, CTA_N, CTA_K, STAGES, SLICES>;
-
-    static constexpr int OP_M = 16;
-    static constexpr int OP_N = 8;
-    static constexpr int OP_K = 16;
-
-    using WarpIterA = turbomind::WarpIteratorA<CTA_M,
-                                               CTA_K,
-                                               WARP_M,
-                                               WARP_K,
-                                               OP_M,
-                                               OP_K,
-                                               GROUP_SIZE,
-                                               STAGES,
-                                               IteratorA::kSizePerStage,
-                                               IteratorQ::kSizePerStage>;
-
-    using WarpIterB =
-        turbomind::WarpIteratorB<CTA_N, CTA_K, WARP_N, WARP_K, OP_N, OP_K, IteratorB::kSmemPadCtaK, STAGES>;
-
-    __device__ void warp_mma(IteratorA& iter_A,
-                             IteratorQ& iter_Q,
-                             IteratorB& iter_B,
-                             WarpIterA& warp_iter_A,
-                             WarpIterB& warp_iter_B,
-                             float*     accum,
-                             int        slice_id,
-                             int&       gemm_iter)
-    {
-
-        constexpr int ITER_M = WARP_M / OP_M;
-        constexpr int ITER_N = WARP_N / OP_N;
-        constexpr int ITER_K = WARP_K / OP_K;
-
-        constexpr int kBatchA = (IteratorA::kIterCount + ITER_K - 1) / ITER_K;
-        constexpr int kBatchQ = (IteratorQ::kIterCount + ITER_K - 1) / ITER_K;
-        constexpr int kBatchB = (IteratorB::kIterCount + ITER_K - 1) / ITER_K;
-
-        auto frag_C_ptr = (Array<float, 4>*)accum;  // [ITER_N, ITER_M]
-
-        PRAGMA_UNROLL
-        for (int iter_k = 0; iter_k < ITER_K; ++iter_k) {
-
-            warp_iter_A.load(warp_frag_A_[(iter_k + 1) % 2], (iter_k + 1) % ITER_K);
-            warp_iter_B.load(warp_frag_B_[(iter_k + 1) % 2], (iter_k + 1) % ITER_K);
-
-            auto warp_frag_A = warp_frag_A_[iter_k % 2];
-            auto warp_frag_B = warp_frag_B_[iter_k % 2];
-
-            PRAGMA_UNROLL
-            for (int iter_m = 0; iter_m < ITER_M; ++iter_m) {
-                PRAGMA_UNROLL
-                for (int iter_n = 0; iter_n < ITER_N; ++iter_n) {
-                    auto& frag_A = warp_frag_A[iter_m];
-                    auto& frag_B = warp_frag_B[iter_n];
-                    auto& frag_C = frag_C_ptr[iter_n * ITER_M + iter_m];
-                    mma_m16n8k16_row_col(frag_C, frag_A, frag_B, frag_C);
-                }
-            }
-
-            if (iter_k < ITER_K - 1) {
-                iter_A.prefetch_batch(iter_k, kBatchA, gemm_iter > 0);
-                iter_Q.prefetch_batch(iter_k, kBatchQ, gemm_iter > 0);
-                iter_B.prefetch_batch(iter_k, kBatchB, gemm_iter > 0);
-            }
-
-            if (iter_k == ITER_K - 2) {
-                iter_A.prefetch_batch(iter_k + 1, kBatchA, gemm_iter > 0);
-                iter_Q.prefetch_batch(iter_k + 1, kBatchQ, gemm_iter > 0);
-                iter_B.prefetch_batch(iter_k + 1, kBatchB, gemm_iter > 0);
-
-                __pipeline_commit();
-                __pipeline_wait_prior(STAGES - 2);
-                sync_slice(slice_id);
-
-                iter_A.next_stage();
-                iter_Q.next_stage();
-                iter_B.next_stage();
-
-                warp_iter_A.next_stage();
-                warp_iter_B.next_stage();
-
-                --gemm_iter;
-            }
-        }
-    }
-
-    template<typename T, int N>
-    __device__ static void copy(T (&dst)[N], const T (&src)[N])
-    {
-        PRAGMA_UNROLL
-        for (int i = 0; i < N; ++i) {
-            dst[i] = src[i];
-        }
-    }
-
-    template<typename T, int N>
-    __device__ static void clear(T (&dst)[N])
-    {
-        PRAGMA_UNROLL
-        for (int i = 0; i < N; ++i) {
-            dst[i] = T{};
-        }
-    }
-
-    __device__ void sync_slice(int slice_id)
-    {
-        if constexpr (SLICES == 1) {
-            __syncthreads();
-        }
-        else {
-            constexpr int      SLICE_GROUP = (SLICES + 7) / 8;
-            constexpr uint32_t num_threads = kWarpCountMN * WARP_SIZE;
-            const uint32_t     barrier_id  = slice_id / SLICE_GROUP + 1;
-            asm volatile("bar.sync %0, %1;" : : "r"(barrier_id), "n"(num_threads));
-        }
-    }
-
-    __device__ void load_partial(float* tb_frag_C, const float* partial_C, int cta, int slice_id)
-    {
-        if (slice_id == 0) {
-            PRAGMA_UNROLL
-            for (int i = 0; i < CTA_N; ++i) {
-                tb_frag_C[i] += partial_C[cta * CTA_N * CTA_M + i * CTA_M + threadIdx.x];
-            }
-        }
-    }
-
-    __device__ void store_partial(float* partial_C, const float* tb_frag_C, int cta, int slice_id)
-    {
-        if (slice_id == 0) {
-            PRAGMA_UNROLL
-            for (int i = 0; i < CTA_N; ++i) {
-                partial_C[cta * CTA_N * CTA_M + i * CTA_M + threadIdx.x] = tb_frag_C[i];
-            }
-        }
-    }
-
-    template<int Index>
-    __device__ void store_accum(float* tb_frag_C,
-                                float* tb_smem_C,
-                                half*  C,
-                                int    m,
-                                int    n,
-                                int    cta_m,
-                                int    cta_n,
-                                int    warp_id_m,
-                                int    warp_id_n,
-                                int    lane_id,
-                                int    slice_id)
-    {
-
-        if (slice_id != 0) {
-            return;
-        }
-
-        // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#mma-16816-c
-        PRAGMA_UNROLL
-        for (int i = 0; i < WARP_N / OP_N; ++i) {
-            const float2* frag_C = (float2*)&tb_frag_C[i * WARP_M / OP_M * 4];
-            const int     nn     = cta_n + warp_id_n * WARP_N + i * OP_N + lane_id / 4;
-            PRAGMA_UNROLL
-            for (int j = 0; j < WARP_M / OP_M; ++j) {
-                PRAGMA_UNROLL
-                for (int x = 0; x < 2; ++x) {
-                    const int mm = cta_m + warp_id_m * WARP_M + j * OP_M + x * 8 + lane_id % 4 * 2;
-                    // convert to half
-                    half2 half_C = __float22half2_rn(frag_C[j * 2 + x]);
-                    // transpose 8x8 accum tile
-                    uint trans_C = transpose_m8n8_b16((uint&)half_C, lane_id);
-                    // store to global memory
-                    OutputOps::template apply<Index>(trans_C, mm, nn, C, m, n);
-                }
-            }
-        }
-    }
-
-    __device__ void
-    sum_slices(float* tb_frag_C, float* tb_smem_C, int warp_id_m, int warp_id_n, int lane_id, int slice_id)
-    {
-
-        int offset_m = warp_id_m * WARP_M / OP_M;
-        int offset_n = warp_id_n * WARP_N / OP_N;
-
-        PRAGMA_UNROLL
-        for (int z = 0; z < SLICES; ++z) {
-            if (slice_id == z) {
-                PRAGMA_UNROLL
-                for (int i = 0; i < WARP_N / OP_N; ++i) {
-                    PRAGMA_UNROLL
-                    for (int j = 0; j < WARP_M / OP_M; ++j) {
-                        PRAGMA_UNROLL
-                        for (int x = 0; x < 4; ++x) {
-                            int src = (i * WARP_M / OP_M + j) * 4 + x;
-                            int dst = ((i + offset_n) * CTA_M / OP_M + j + offset_m) * 4 + x;
-                            if (z > 0) {
-                                using namespace ops;
-                                tb_frag_C[src] = tb_smem_C[dst * WARP_SIZE + lane_id] + tb_frag_C[src];
-                            }
-                            tb_smem_C[dst * WARP_SIZE + lane_id] = tb_frag_C[src];
-                        }
-                    }
-                }
-            }
-            __syncthreads();
-        }
-
-        if (slice_id == 0) {
-            PRAGMA_UNROLL
-            for (int i = 0; i < WARP_N / OP_N; ++i) {
-                PRAGMA_UNROLL
-                for (int j = 0; j < WARP_M / OP_M; ++j) {
-                    PRAGMA_UNROLL
-                    for (int x = 0; x < 4; ++x) {
-                        int src = ((i + offset_n) * CTA_M / OP_M + j + offset_m) * 4 + x;
-                        int dst = (i * WARP_M / OP_M + j) * 4 + x;
-
-                        tb_frag_C[dst] = tb_smem_C[src * WARP_SIZE + lane_id];
-                    }
-                }
-            }
-        }
-    }
-
-    Array<half, 8> warp_frag_A_[2][WARP_M / OP_M];
-    Array<half, 4> warp_frag_B_[2][WARP_N / OP_N];
-
-    __device__ void run_v2(half* __restrict__ C,
-                           const uint* __restrict__ A,
-                           const half* __restrict__ B,
-                           const half2* __restrict__ Q,
-                           int M,
-                           int N,
-                           int K,
-                           int output_op_idx)
-    {
-        static_assert(WARP_M % OP_N == 0);
-
-        float tb_frag_C[(WARP_N / OP_N) * (WARP_M / OP_M) * 4];
-
-        extern __shared__ uint8_t smem[];
-
-        const int warp_id = threadIdx.x / WARP_SIZE;
-        const int lane_id = threadIdx.x % WARP_SIZE;
-
-        const int warp_id_m  = warp_id % kWarpCountM;
-        const int warp_id_nk = warp_id / kWarpCountM;
-        const int warp_id_n  = warp_id_nk % kWarpCountN;
-        const int warp_id_k  = warp_id_nk / kWarpCountN;
-
-        const int warp_id_mn = warp_id_n * kWarpCountM + warp_id_m;
-
-        const int slice_id = warp_id_k;
-
-        const int cta_k = slice_id * SLICE_K;  // sliced-k offset
-        const int cta_m = blockIdx.x * CTA_M;
-        const int cta_n = blockIdx.y * CTA_N;
-
-        // each slice has its own partition of smem
-        uint4* const tb_smem_A = (uint4*)(smem + IteratorA::kSmemByteSize * slice_id);
-        half* const tb_smem_B = (half*)(smem + IteratorA::kSmemByteSize * SLICES + IteratorB::kSmemByteSize * slice_id);
-
-        // [CTA_N / OP_N, CTA_M / OP_M, 4, WARP_SIZE], all mn fragments in CTA
-        float* const tb_smem_C = (float*)smem;
-
-        __shared__ typename IteratorQ::Storage tb_smem_Q_storage;
-
-        auto tb_smem_Q = tb_smem_Q_storage.data[slice_id];
-
-        IteratorA iter_A{A, tb_smem_A, M, K, cta_m, cta_k, warp_id_mn, lane_id};
-        IteratorQ iter_Q{Q, tb_smem_Q, M, K, cta_m, cta_k, warp_id_mn, lane_id};
-        IteratorB iter_B{B, tb_smem_B, K, N, cta_n, cta_k, warp_id_mn, lane_id};
-
-        const int offset_m = warp_id_m * WARP_M + lane_id;
-
-        WarpIterA warp_iter_A(iter_A.smem_, iter_Q.smem_, warp_id, lane_id, offset_m, cta_k);
-        WarpIterB warp_iter_B(iter_B.smem_int_ptr_, warp_id_n, lane_id, 0);
-
-        int gemm_iter = (K + CTA_K - 1) / CTA_K;
-
-        PRAGMA_UNROLL
-        for (int stage = 0; stage < STAGES - 1; ++stage, --gemm_iter) {
-            iter_A.prefetch_stage(gemm_iter > 0);
-            iter_Q.prefetch_stage(gemm_iter > 0);
-            iter_B.prefetch_stage(gemm_iter > 0);
-            __pipeline_commit();
-        }
-
-        clear(tb_frag_C);
-
-        __pipeline_wait_prior(STAGES - 2);
-        sync_slice(slice_id);
-
-        warp_iter_A.load(warp_frag_A_[0], 0);
-        warp_iter_B.load(warp_frag_B_[0], 0);
-
-        PRAGMA_NO_UNROLL
-        for (; gemm_iter > -STAGES + 1;) {
-            warp_mma(iter_A, iter_Q, iter_B, warp_iter_A, warp_iter_B, tb_frag_C, slice_id, gemm_iter);
-        }
-
-        __pipeline_commit();
-        __pipeline_wait_prior(0);
-        __syncthreads();
-
-        if constexpr (SLICES > 1) {
-            sum_slices(tb_frag_C, tb_smem_C, warp_id_m, warp_id_n, lane_id, slice_id);
-        }
-
-        switch (output_op_idx) {
-            case 0:
-                store_accum<0>(tb_frag_C, tb_smem_C, C, M, N, cta_m, cta_n, warp_id_m, warp_id_n, lane_id, slice_id);
-                break;
-            case 1:
-                store_accum<1>(tb_frag_C, tb_smem_C, C, M, N, cta_m, cta_n, warp_id_m, warp_id_n, lane_id, slice_id);
-                break;
-            default:
-                return;
-        }
-    }
-};
-
-template<typename Gemm>
-__global__ void gemm_s4_f16_nn(half* __restrict__ C,
-                               const uint* __restrict__ A,
-                               const half* __restrict__ B,
-                               const half2* __restrict__ Q,
-                               int M,
-                               int N,
-                               int K,
-                               int output_op_idx)
-{
-    Gemm{}.run_v2(C, A, B, Q, M, N, K, output_op_idx);
-}
-
-}  // namespace turbomind
diff --git a/src/turbomind/kernels/gemm_s_f16/metric.h b/src/turbomind/kernels/gemm_s_f16/metric.h
deleted file mode 100644
index 69ef242cf8caaca430098ec6390c211edb36f59a..0000000000000000000000000000000000000000
--- a/src/turbomind/kernels/gemm_s_f16/metric.h
+++ /dev/null
@@ -1,112 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved.
-
-#pragma once
-
-#include <array>
-#include <iomanip>
-#include <sstream>
-#include <string>
-#include <vector>
-
-namespace turbomind {
-
-struct Metric {
-    int  id;
-    bool feasible;
-    bool prefer;
-
-    std::array<int, 3> cta_shape;
-    std::array<int, 3> warp_shape;
-
-    int   warps;
-    int   stages;
-    int   max_active_ctas;
-    float smem;
-
-    float cta_cnt_m;
-    float cta_cnt_n;
-    float cta_iter_k;
-    float grid_size;
-
-    int   active_ctas;
-    float waves;
-    float waves1;
-    float occupancy;
-
-    float tile_efficiency;
-    float wave_efficiency;
-
-    float grid_a0;
-    float grid_b0;
-    float grid_a1;
-    float grid_b1;
-    float grid_mm;
-
-    float grid_sum;
-    float grid_norm;
-
-    float cta_sum;
-    float cta_wave;
-
-    int   best;
-    float time;
-    int   count;
-};
-
-inline void DumpMetrics(std::ostream& os, const std::vector<Metric>& metrics, const std::vector<int>& indices = {})
-{
-    auto dump_shape = [](const std::array<int, 3>& shape) {
-        std::stringstream ss;
-        ss << std::setw(4) << shape[0] << std::setw(4) << shape[1] << std::setw(4) << shape[2];
-        return ss.str();
-    };
-
-    std::vector<std::tuple<std::string, int>> infos{
-        {"id", 4},       {"valid", 6},      {"cta_mnk", 14},   {"warp_mnk", 14},   {"warps", 6},     {"stages", 8},
-        {"smem", 8},     {"cta_cnt_m", 10}, {"cta_cnt_n", 10}, {"cta_iter_k", 11}, {"max_ctas", 9},  {"act_ctas", 10},
-        {"waves", 12},   {"waves1", 12},    {"occupancy", 12}, {"%tile", 10},      {"%wave", 10},    {"grid_a0", 12},
-        {"grid_b0", 12}, {"grid_a1", 12},   {"grid_b1", 12},   {"grid_mm", 12},    {"grid_sum", 12}, {"cta_cnt", 8},
-        {"cta_sum", 8},  {"cta_wave", 9},   {"grid_norm", 12}, {"time", 12},       {"best", 7}};
-
-    for (const auto& [name, width] : infos) {
-        os << std::setw(width) << name;
-    }
-    os << "\n";
-
-    for (size_t i = 0; i < metrics.size(); ++i) {
-        auto& metric = indices.empty() ? metrics[i] : metrics[indices[i]];
-        int   c      = 0;
-        os << std::setw(std::get<1>(infos[c++])) << metric.id;
-        os << std::setw(std::get<1>(infos[c++])) << metric.feasible;
-        os << std::setw(std::get<1>(infos[c++])) << dump_shape(metric.cta_shape);
-        os << std::setw(std::get<1>(infos[c++])) << dump_shape(metric.warp_shape);
-        os << std::setw(std::get<1>(infos[c++])) << metric.warps;
-        os << std::setw(std::get<1>(infos[c++])) << metric.stages;
-        os << std::setw(std::get<1>(infos[c++])) << metric.smem;
-        os << std::setw(std::get<1>(infos[c++])) << metric.cta_cnt_m;
-        os << std::setw(std::get<1>(infos[c++])) << metric.cta_cnt_n;
-        os << std::setw(std::get<1>(infos[c++])) << metric.cta_iter_k;
-        os << std::setw(std::get<1>(infos[c++])) << metric.max_active_ctas;
-        os << std::setw(std::get<1>(infos[c++])) << metric.active_ctas;
-        os << std::setw(std::get<1>(infos[c++])) << metric.waves;
-        os << std::setw(std::get<1>(infos[c++])) << metric.waves1;
-        os << std::setw(std::get<1>(infos[c++])) << metric.occupancy;
-        os << std::setw(std::get<1>(infos[c++])) << metric.tile_efficiency;
-        os << std::setw(std::get<1>(infos[c++])) << metric.wave_efficiency;
-        os << std::setw(std::get<1>(infos[c++])) << metric.grid_a0;
-        os << std::setw(std::get<1>(infos[c++])) << metric.grid_b0;
-        os << std::setw(std::get<1>(infos[c++])) << metric.grid_a1;
-        os << std::setw(std::get<1>(infos[c++])) << metric.grid_b1;
-        os << std::setw(std::get<1>(infos[c++])) << metric.grid_mm;
-        os << std::setw(std::get<1>(infos[c++])) << metric.grid_sum;
-        os << std::setw(std::get<1>(infos[c++])) << metric.grid_size;
-        os << std::setw(std::get<1>(infos[c++])) << metric.cta_sum;
-        os << std::setw(std::get<1>(infos[c++])) << metric.cta_wave;
-        os << std::setw(std::get<1>(infos[c++])) << metric.grid_norm;
-        os << std::setw(std::get<1>(infos[c++])) << metric.time * 1000 / metric.count;
-        os << std::setw(std::get<1>(infos[c++])) << (metric.best ? "*" : "");
-        os << "\n";
-    }
-}
-
-}  // namespace turbomind
diff --git a/src/turbomind/kernels/gemm_s_f16/warp_iterator.h b/src/turbomind/kernels/gemm_s_f16/warp_iterator.h
deleted file mode 100644
index 853720a8f1beca000892abc7af6afacb7ed8f2b8..0000000000000000000000000000000000000000
--- a/src/turbomind/kernels/gemm_s_f16/warp_iterator.h
+++ /dev/null
@@ -1,166 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved.
-
-#pragma once
-
-#include "common.h"
-
-namespace turbomind {
-
-template<int CTA_M,
-         int CTA_K,
-         int WARP_M,
-         int WARP_K,
-         int OP_M,
-         int OP_K,
-         int GROUP_SIZE,
-         int STAGES,
-         int kSizePerStageA,
-         int kSizePerStageQ>
-struct WarpIteratorA {
-
-    static_assert(WARP_K % GROUP_SIZE == 0 || GROUP_SIZE % WARP_K == 0);
-
-    static constexpr int ITER_M = 32 / OP_M;
-    static constexpr int ITER_X = WARP_M / 32;
-
-    uint4 frag_A4_[ITER_X];    // 8 value per uint
-    half2 frag_Q_[ITER_X][4];  // 4 m8k8 tile along M, as WARP_M == 32
-
-    const uint4* smem_A_;
-    const half2* smem_Q_;
-    const int    offset_m_;
-    const int    offset_m_Q_;
-
-    int stage_{0};
-    int offset_A_{0};
-    int offset_Q_{0};
-
-    __device__ WarpIteratorA(uint4* smem_A, half2* smem_Q, int warp_id, int lane_id, int offset_m, int offset_k):
-        smem_A_(smem_A), smem_Q_(smem_Q), offset_m_(offset_m), offset_m_Q_(offset_m / 32 * 32 + lane_id / 4)
-    {
-    }
-
-    // iter_k must be a compile tile constant
-    __device__ void load(Array<half, 8>* data, int iter_k)
-    {
-        // load A
-        // smem_A uint4 [SLICE_K/32, CTA_M/32, WARP_SIZE], load as uint4 to avoid bank-conflicts
-        if (iter_k % 2 == 0) {
-            PRAGMA_UNROLL
-            for (int x = 0; x < ITER_X; ++x) {
-                frag_A4_[x] = smem_A_[offset_A_ + (iter_k / 2) * CTA_M + x * 32 + offset_m_];
-            }
-        }
-
-        // load Q
-        if (iter_k * OP_K % GROUP_SIZE == 0) {
-            const int g = iter_k * OP_K / GROUP_SIZE;
-            PRAGMA_UNROLL
-            for (int x = 0; x < ITER_X; ++x) {
-                PRAGMA_UNROLL
-                for (int i = 0; i < 4; ++i) {
-                    const int mm           = offset_m_Q_ + x * 32 + i * 8;  // stride of m8k8 tile
-                    ((uint&)frag_Q_[x][i]) = ((uint&)smem_Q_[offset_Q_ + g * CTA_M + mm]);
-                }
-            }
-        }
-
-        PRAGMA_UNROLL
-        for (int x = 0; x < ITER_X; ++x) {
-            const uint* frag_A = (uint*)&frag_A4_[x];
-            PRAGMA_UNROLL
-            for (int iter_m = 0; iter_m < ITER_M; ++iter_m) {
-                uint4 tmp = dequantize_s4_to_fp16x2_v2(frag_A[iter_k % 2 * 2 + iter_m]);
-                auto& vec = (Array<half2, 4>&)tmp;
-
-                vec[0] = apply_Q(vec[0], frag_Q_[x][iter_m * 2]);
-                vec[1] = apply_Q(vec[1], frag_Q_[x][iter_m * 2 + 1]);
-                vec[2] = apply_Q(vec[2], frag_Q_[x][iter_m * 2]);
-                vec[3] = apply_Q(vec[3], frag_Q_[x][iter_m * 2 + 1]);
-
-                data[x * ITER_M + iter_m] = (Array<half, 8>&)vec;
-            }
-        }
-    }
-
-    __device__ void next_stage()
-    {
-        ++stage_;
-        if (stage_ >= STAGES) {
-            stage_ = 0;
-        }
-        offset_A_ = stage_ * kSizePerStageA;
-        offset_Q_ = stage_ * kSizePerStageQ;
-    }
-};
-
-template<int CTA_N, int CTA_K, int WARP_N, int WARP_K, int OP_N, int OP_K, int SMEM_STRIDE, int STAGES>
-struct WarpIteratorB {
-
-    static constexpr int kLdsmNum = WARP_N == 8 ? 2 : 4;
-    static constexpr int ITER_N   = WARP_N / OP_N;
-    static constexpr int ITER_K   = WARP_K / OP_K;
-
-    static_assert(OP_N == 8 && OP_K == 16);
-
-    const int warp_id_n_;
-    const int lane_id_;
-
-    const int ldsm_group_id_;
-
-    const int offset_k_;
-    int       offset_n_;
-
-    const uint32_t smem_base_ptr_;
-
-    uint32_t smem_ptr_;
-
-    int stage_{0};
-
-    __device__ WarpIteratorB(uint32_t smem_int_ptr, int warp_id_n, int lane_id, int offset_k):
-        smem_base_ptr_(smem_int_ptr),
-        smem_ptr_(smem_base_ptr_),
-        warp_id_n_(warp_id_n),
-        lane_id_(lane_id),
-        ldsm_group_id_(lane_id / 8),
-        offset_k_(ldsm_group_id_ % 2 * 8 + offset_k),
-        offset_n_(ldsm_group_id_ / 2 * 8 + lane_id % 8)
-    {
-        if (kLdsmNum == 2) {
-            offset_n_ -= ldsm_group_id_ / 2 * 8;
-        }
-        offset_n_ += warp_id_n_ * WARP_N;
-    }
-
-    __device__ void load(Array<half, 4>* data, int iter_k)
-    {
-        const int kk  = iter_k * OP_K + offset_k_;
-        auto      ptr = (uint*)data;
-        PRAGMA_UNROLL
-        for (int iter_n = 0; iter_n < ITER_N;) {
-            const int nn  = offset_n_ + iter_n * OP_N;
-            auto      src = smem_ptr_ + sizeof(half) * (nn * SMEM_STRIDE + kk);
-            if constexpr (kLdsmNum == 4) {
-                ldmatrix_m8n8_x4_b16(ptr[0], ptr[1], ptr[2], ptr[3], src);
-                ptr += 4;
-                iter_n += 2;
-            }
-            else {
-                ldmatrix_m8n8_x2_b16(ptr[0], ptr[1], src);
-                ptr += 2;
-                iter_n += 1;
-            }
-        }
-    }
-
-    __device__ void next_stage()
-    {
-        ++stage_;
-        if (stage_ >= STAGES) {
-            stage_ = 0;
-        }
-        smem_ptr_ = smem_base_ptr_ + stage_ * sizeof(half) * CTA_N * SMEM_STRIDE;
-    }
-};
-
-}  // namespace turbomind
diff --git a/src/turbomind/kernels/gpt_kernels.cu b/src/turbomind/kernels/gpt_kernels.cu
deleted file mode 100644
index d3c15f24393bbc8acefadfea1b1c9fd9a7b5dfab..0000000000000000000000000000000000000000
--- a/src/turbomind/kernels/gpt_kernels.cu
+++ /dev/null
@@ -1,1112 +0,0 @@
-/*
- * Copyright (c) 2020-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "src/turbomind/utils/cuda_fp8_utils.h"
-#ifndef CUDART_VERSION
-#error CUDART_VERSION Undefined!
-#elif (CUDART_VERSION >= 11000)
-#include <cub/cub.cuh>
-#else
-// #include "3rdparty/cub/cub.cuh"
-#include <cub/cub.cuh>
-#endif
-#include "src/turbomind/kernels/gpt_kernels.h"
-#include "src/turbomind/utils/memory_utils.h"
-
-namespace turbomind {
-
-// PROMPT_SRC: 0 --> no prompts, 1 --> from loaded prompts, 2 --> from request prompts
-template<typename T, bool OUTPUT_ID, int PROMPT_SRC>
-__global__ void start_id_embedding_position_lookups_kernel(T*                    from_tensor,
-                                                           int*                  output_ids,
-                                                           const T*              embedding_table,
-                                                           const T*              pos_table,
-                                                           pPromptTuningParam<T> prompt_param,
-                                                           const int*            input_ids,
-                                                           const int             start_step,
-                                                           const int             length,
-                                                           const int             max_length,
-                                                           const int             batch_size,
-                                                           const int64_t         hidden_units)
-{
-    for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < batch_size * length * hidden_units;
-         index += blockDim.x * gridDim.x) {
-        // transpose the input_ids [batch, length] (part of [batch, max_length]) to output_ids [length, batch]
-        if (OUTPUT_ID && index < batch_size * max_length) {
-            // for p/prompt_tuning (have prompt templates like [input1, prompt1, input2, prompt2])
-            // we have to process it to like [input1, input2, prompt1, prompt2], and then remove the prompts during post
-            // processing
-            if (PROMPT_SRC > 0) {
-                if (index < batch_size) {
-                    int no_prompt_output_seq_id = 0;
-#pragma unroll 1
-                    for (int seq_id = 0; seq_id < max_length; seq_id++) {
-                        int current_input_id = input_ids[index * max_length + seq_id];
-                        if (current_input_id < prompt_param.p_prompt_tuning_id_start) {
-                            output_ids[no_prompt_output_seq_id * batch_size + index] = current_input_id;
-                            no_prompt_output_seq_id++;
-                        }
-                    }
-                }
-            }
-            else {
-                const int seq_id   = index % max_length;
-                const int batch_id = index / max_length;
-                if (seq_id < length) {
-                    output_ids[seq_id * batch_size + batch_id] = input_ids[index];
-                }
-            }
-        }
-
-        // embedding lookup from word ids [batch, length] (part of [batch, max_length]) and [vocab, hidden] to generate
-        // embedding [batch, length, hidden]
-        const int word_index      = index / hidden_units;
-        const int word_index_row  = word_index / length;  // batch_id
-        const int word_index_col  = word_index % length;
-        const int real_word_index = word_index_row * max_length + word_index_col;
-        const int step            = start_step + word_index % length;
-        const int col_index       = index % hidden_units;
-        const int input_id        = input_ids == nullptr ? real_word_index : input_ids[real_word_index];
-        const int prompt_id       = input_id - prompt_param.p_prompt_tuning_id_start;
-        T         embedding       = (T)0.0f;
-        if (PROMPT_SRC > 0 && prompt_id >= 0) {
-            if (PROMPT_SRC == 1) {
-                // from loaded prompt embedding tables
-                embedding =
-                    prompt_param.p_prompt_tuning_batch_weights[word_index_row][prompt_id * hidden_units + col_index];
-            }
-            else {
-                // from request prompt embedding
-                embedding =
-                    prompt_param
-                        .request_prompt_embedding[word_index_row * prompt_param.request_prompt_max_length * hidden_units
-                                                  + prompt_id * hidden_units + col_index];
-            }
-        }
-        else {
-            embedding = embedding_table[input_id * hidden_units + col_index];
-        }
-        T pos_embed        = pos_table == nullptr ? (T)0.f : pos_table[(step - 1) * hidden_units + col_index];
-        from_tensor[index] = embedding + pos_embed;
-    }
-}
-
-#define WORD_POS_EMBEDDING_LOOPUP_KERNEL(OUTPUT_ID, PROMPT_SRC)                                                        \
-    start_id_embedding_position_lookups_kernel<T, OUTPUT_ID, PROMPT_SRC><<<grid, block, 0, stream>>>(from_tensor,      \
-                                                                                                     output_ids,       \
-                                                                                                     embedding_table,  \
-                                                                                                     pos_table,        \
-                                                                                                     prompt_param,     \
-                                                                                                     input_ids,        \
-                                                                                                     start_step,       \
-                                                                                                     length,           \
-                                                                                                     max_length,       \
-                                                                                                     batch_size,       \
-                                                                                                     hidden_units);
-
-template<typename T>
-void invokeInputIdsEmbeddingLookupPosEncoding(T*                    from_tensor,
-                                              int*                  output_ids,
-                                              const T*              embedding_table,  // can also be inputs_embeds
-                                              const T*              pos_table,
-                                              pPromptTuningParam<T> prompt_param,
-                                              const int*            input_ids,
-                                              const int             start_step,
-                                              const int             length,
-                                              const int             max_length,
-                                              const int             batch_size,
-                                              const int             hidden_units,
-                                              cudaStream_t          stream)
-{
-    dim3       grid(min(batch_size * length, 65536));
-    dim3       block(min(hidden_units, 512));
-    const bool has_output_ids = output_ids != nullptr;
-    FT_CHECK(!(has_output_ids && input_ids == nullptr));
-
-    if (has_output_ids) {
-        if (prompt_param.use_request_p_prompt_embedding) {
-            WORD_POS_EMBEDDING_LOOPUP_KERNEL(true, 2);
-        }
-        else if (prompt_param.p_prompt_tuning_batch_weights != nullptr) {
-            WORD_POS_EMBEDDING_LOOPUP_KERNEL(true, 1);
-        }
-        else {
-            WORD_POS_EMBEDDING_LOOPUP_KERNEL(true, 0);
-        }
-    }
-    else {
-        if (prompt_param.use_request_p_prompt_embedding) {
-            WORD_POS_EMBEDDING_LOOPUP_KERNEL(false, 2);
-        }
-        else if (prompt_param.p_prompt_tuning_batch_weights != nullptr) {
-            WORD_POS_EMBEDDING_LOOPUP_KERNEL(false, 1);
-        }
-        else {
-            WORD_POS_EMBEDDING_LOOPUP_KERNEL(false, 0);
-        }
-    }
-}
-
-template void invokeInputIdsEmbeddingLookupPosEncoding(float*                    from_tensor,
-                                                       int*                      output_ids,
-                                                       const float*              embedding_table,
-                                                       const float*              pos_table,
-                                                       pPromptTuningParam<float> prompt_param,
-                                                       const int*                input_ids,
-                                                       const int                 start_step,
-                                                       const int                 length,
-                                                       const int                 max_length,
-                                                       const int                 batch_size,
-                                                       const int                 hidden_units,
-                                                       cudaStream_t              stream);
-
-template void invokeInputIdsEmbeddingLookupPosEncoding(half*                    from_tensor,
-                                                       int*                     output_ids,
-                                                       const half*              embedding_table,
-                                                       const half*              pos_table,
-                                                       pPromptTuningParam<half> prompt_param,
-                                                       const int*               input_ids,
-                                                       const int                start_step,
-                                                       const int                length,
-                                                       const int                max_length,
-                                                       const int                batch_size,
-                                                       const int                hidden_units,
-                                                       cudaStream_t             stream);
-
-#ifdef ENABLE_BF16
-template void invokeInputIdsEmbeddingLookupPosEncoding(__nv_bfloat16*                    from_tensor,
-                                                       int*                              output_ids,
-                                                       const __nv_bfloat16*              embedding_table,
-                                                       const __nv_bfloat16*              pos_table,
-                                                       pPromptTuningParam<__nv_bfloat16> prompt_param,
-                                                       const int*                        input_ids,
-                                                       const int                         start_step,
-                                                       const int                         length,
-                                                       const int                         max_length,
-                                                       const int                         batch_size,
-                                                       const int                         hidden_units,
-                                                       cudaStream_t                      stream);
-#endif
-
-template<typename T>
-__global__ void inputIdsEmbeddingLookupPosEncodingSoftPrompt(inputIdsEmbeddingLookupPosEncodingSoftPromptParam<T> param)
-{
-    // 1. Copy the input ids to output ids and transpose output ids to [seq_len, batch_size, beam_width].
-    // 2. Embedding lookup by input ids and concat with soft prompt. The axis of concatenation is on axis of seq_len.
-
-    // Assume batch size is 2 and prompts are [[t1, t2], [t3], [t4, t5]], input_ids are [[s1, s2], [s3], [s4]]
-    // then the order of output_ids is
-    // [ [?, ?, s1, s2]
-    //   [?, s3, padding, padding]
-    //   [?, ?, s4, padding] ]
-    // and the order of embedding is
-    // [ [t1, t2, s1, s2]
-    //   [t3, s3, padding, padding]
-    //   [t4, t5, s4, padding] ]
-    // where "?" means undefined values and we should attach it.
-
-    for (int index = blockIdx.x * blockDim.x + threadIdx.x;
-         index < param.batch_size * param.beam_width * (param.max_prefix_soft_prompt_length + param.max_input_length)
-                     * param.hidden_units;
-         index += blockDim.x * gridDim.x) {
-        // transpose the input_ids [batch, length] (part of [batch, beam, max_input_length]) to
-        // output_ids [length, batch, beam].
-        // ouptut_ids need to add padding in the beginning for soft prompting.
-
-        if (index < param.batch_size * param.beam_width * param.max_input_length) {
-            int       tmp_index = index;
-            const int seq_id    = tmp_index % param.max_input_length;
-            tmp_index           = (tmp_index - seq_id) / param.max_input_length;
-            const int beam_id   = tmp_index % param.beam_width;
-            tmp_index           = (tmp_index - beam_id) / param.beam_width;
-            const int batch_id  = tmp_index % param.batch_size;
-            if (seq_id < param.max_input_length) {
-                param.output_ids[(param.prefix_soft_prompt_lengths[batch_id] + seq_id) * param.batch_size
-                                     * param.beam_width
-                                 + batch_id * param.beam_width + beam_id] = param.input_ids[index];
-            }
-        }
-
-        // embedding lookup from word ids [batch, beam, length] (part of [batch, beam, max_input_length]), [vocab,
-        // hidden] and [batch, max_prefix_soft_prompt_length, hidden] to generate embedding [batch, beam, length +
-        // max_prefix_soft_prompt_length, hidden]
-        int       tmp_index    = index;
-        const int hidden_id    = tmp_index % param.hidden_units;
-        tmp_index              = (tmp_index - hidden_id) / param.hidden_units;
-        const int seq_id       = tmp_index % (param.max_prefix_soft_prompt_length + param.max_input_length);
-        tmp_index              = (tmp_index - seq_id) / (param.max_prefix_soft_prompt_length + param.max_input_length);
-        const int beam_id      = tmp_index % param.beam_width;
-        tmp_index              = (tmp_index - beam_id) / param.beam_width;
-        const int     batch_id = tmp_index % param.batch_size;
-        const int64_t hidden_units = param.hidden_units;
-        T             embedding =
-            (seq_id < param.prefix_soft_prompt_lengths[batch_id]) ?
-                            (T)param.prefix_soft_prompt_embedding[batch_id * param.max_prefix_soft_prompt_length * hidden_units
-                                                      + seq_id * hidden_units + hidden_id] :
-                            param.embedding_table[param.input_ids[batch_id * param.beam_width * param.max_input_length
-                                                      + beam_id * param.max_input_length
-                                                      + (seq_id - param.prefix_soft_prompt_lengths[batch_id])]
-                                          * hidden_units
-                                      + hidden_id];
-
-        T pos_embed              = param.pos_table == nullptr ?
-                                       (T)0.0f :
-                                       param.pos_table[(param.start_step + seq_id - 1) * hidden_units + hidden_id];
-        param.from_tensor[index] = embedding + pos_embed;
-
-        if (seq_id == 0 && hidden_id == 0) {
-            param.input_lengths[batch_id * param.beam_width + beam_id] += param.prefix_soft_prompt_lengths[batch_id];
-        }
-    }
-}
-
-template<typename T>
-void invokeInputIdsEmbeddingLookupPosEncodingSoftPrompt(inputIdsEmbeddingLookupPosEncodingSoftPromptParam<T> param)
-{
-    dim3 grid(min(param.batch_size * param.beam_width * (param.max_input_length + param.max_prefix_soft_prompt_length),
-                  65536));
-    dim3 block(min(param.hidden_units, 512));
-    inputIdsEmbeddingLookupPosEncodingSoftPrompt<T><<<grid, block, 0, param.stream>>>(param);
-}
-
-template void
-invokeInputIdsEmbeddingLookupPosEncodingSoftPrompt(inputIdsEmbeddingLookupPosEncodingSoftPromptParam<float> param);
-
-template void
-invokeInputIdsEmbeddingLookupPosEncodingSoftPrompt(inputIdsEmbeddingLookupPosEncodingSoftPromptParam<half> param);
-
-#ifdef ENABLE_BF16
-template void invokeInputIdsEmbeddingLookupPosEncodingSoftPrompt(
-    inputIdsEmbeddingLookupPosEncodingSoftPromptParam<__nv_bfloat16> param);
-#endif
-
-// TODO Add half2 implementation
-template<typename T>
-__global__ void transposeAxis01(T* out, T* in, const int dim0, const int dim1, const int dim2)
-{
-    int index = threadIdx.x + blockIdx.x * blockDim.x;
-    if (index < dim0 * dim1 * dim2) {
-        const int input_dim2_index = index % dim2;
-        index                      = (index - input_dim2_index) / dim2;
-        const int input_dim1_index = index % dim1;
-        index                      = (index - input_dim1_index) / dim1;
-        const int input_dim0_index = index % dim0;
-
-        out[input_dim1_index * dim0 * dim2 + input_dim0_index * dim2 + input_dim2_index] =
-            in[input_dim0_index * dim1 * dim2 + input_dim1_index * dim2 + input_dim2_index];
-    }
-}
-
-template<typename T>
-void invokeTransposeAxis01(T* out, T* in, const int dim0, const int dim1, const int dim2, cudaStream_t stream)
-{
-    dim3 block(512);
-    dim3 grid((int)(ceil(dim0 * dim1 * dim2 / 512.)));
-    transposeAxis01<<<grid, block, 0, stream>>>(out, in, dim0, dim1, dim2);
-}
-
-template void
-invokeTransposeAxis01(float* out, float* in, const int dim0, const int dim1, const int dim2, cudaStream_t stream);
-
-template void
-invokeTransposeAxis01(half* out, half* in, const int dim0, const int dim1, const int dim2, cudaStream_t stream);
-
-template void
-invokeTransposeAxis01(int* out, int* in, const int dim0, const int dim1, const int dim2, cudaStream_t stream);
-
-template<typename T>
-__global__ void transposeAxis01(T* out, T* in, const int* in_skipping_dim1, const int dim0, const int dim1)
-{
-    // out: [dim1, dim0]
-    // in: [dim0, dim1]
-    // in_skipping_dim1: [dim1]
-
-    int index = threadIdx.x + blockIdx.x * blockDim.x;
-    if (index < dim0 * dim1) {
-        const int input_dim1_index = index % dim1;
-        index                      = (index - input_dim1_index) / dim1;
-        const int input_dim0_index = index % dim0;
-        const int in_offset        = in_skipping_dim1 == nullptr ? 0 : in_skipping_dim1[input_dim1_index] * dim1;
-
-        out[input_dim1_index * dim0 + input_dim0_index] = in[in_offset + input_dim0_index * dim1 + input_dim1_index];
-    }
-}
-
-template<typename T>
-void invokeTransposeAxis01(
-    T* out, T* in, const int* in_skipping_dim1, const int dim0, const int dim1, cudaStream_t stream)
-{
-    dim3 block(512);
-    dim3 grid((int)(ceil(dim0 * dim1 / 512.)));
-    transposeAxis01<<<grid, block, 0, stream>>>(out, in, in_skipping_dim1, dim0, dim1);
-}
-
-template void invokeTransposeAxis01(
-    int* out, int* in, const int* in_skipping_dim1, const int dim0, const int dim1, cudaStream_t stream);
-
-template<typename T, bool PREFIX_PROMPT>
-__global__ void buildDecoderAttentionMaskKernel(T*         attention_mask,
-                                                const int* sequence_lengths,
-                                                const int* prefix_prompt_lengths,
-                                                const int  max_seq_len,
-                                                const int  max_prompt_length)
-{
-    // sequence_lengths: [batch_size]
-    // attention_mask: [batch_size, 1, max_seq_len, max_seq_len + max_prompt_length]
-    const int max_prompt_seq_length = max_seq_len + max_prompt_length;
-    const int mask_size_per_seq     = max_seq_len * max_prompt_seq_length;
-    attention_mask += blockIdx.x * mask_size_per_seq;
-    const int seq_length    = sequence_lengths[blockIdx.x];
-    const int prompt_length = PREFIX_PROMPT ? prefix_prompt_lengths[blockIdx.x] : 0;
-    for (int i = threadIdx.x; i < mask_size_per_seq; i += blockDim.x) {
-        int row_id = i / max_prompt_seq_length;
-        int col_id = i % max_prompt_seq_length;
-        if (row_id < seq_length && col_id <= (row_id + prompt_length)) {
-            attention_mask[i] = (T)(1.0f);
-        }
-        else {
-            attention_mask[i] = (T)(0.0f);
-        }
-    }
-}
-
-template<typename T>
-void invokeBuildDecoderAttentionMask(T*           attention_mask,
-                                     const int*   sequence_lengths,
-                                     const int*   prefix_prompt_lengths,
-                                     const int    batch_size,
-                                     const int    max_seq_len,
-                                     const int    max_prompt_length,
-                                     cudaStream_t stream)
-{
-    if (max_prompt_length == 0) {
-        buildDecoderAttentionMaskKernel<T, false><<<batch_size, 256, 0, stream>>>(
-            attention_mask, sequence_lengths, prefix_prompt_lengths, max_seq_len, max_prompt_length);
-    }
-    else {
-        buildDecoderAttentionMaskKernel<T, true><<<batch_size, 256, 0, stream>>>(
-            attention_mask, sequence_lengths, prefix_prompt_lengths, max_seq_len, max_prompt_length);
-    }
-}
-
-template void invokeBuildDecoderAttentionMask(float*       attention_mask,
-                                              const int*   sequence_lengths,
-                                              const int*   prefix_prompt_lengths,
-                                              const int    batch_size,
-                                              const int    max_seq_len,
-                                              const int    max_prompt_length,
-                                              cudaStream_t stream);
-template void invokeBuildDecoderAttentionMask(half*        attention_mask,
-                                              const int*   sequence_lengths,
-                                              const int*   prefix_prompt_lengths,
-                                              const int    batch_size,
-                                              const int    max_seq_len,
-                                              const int    max_prompt_length,
-                                              cudaStream_t stream);
-#ifdef ENABLE_BF16
-template void invokeBuildDecoderAttentionMask(__nv_bfloat16* attention_mask,
-                                              const int*     sequence_lengths,
-                                              const int*     prefix_prompt_lengths,
-                                              const int      batch_size,
-                                              const int      max_seq_len,
-                                              const int      max_prompt_length,
-                                              cudaStream_t   stream);
-#endif
-#ifdef ENABLE_FP8
-template void invokeBuildDecoderAttentionMask(__nv_fp8_e4m3* attention_mask,
-                                              const int*     sequence_lengths,
-                                              const int*     prefix_prompt_lengths,
-                                              const int      batch_size,
-                                              const int      max_seq_len,
-                                              const int      max_prompt_length,
-                                              cudaStream_t   stream);
-#endif
-
-template<typename T>
-__launch_bounds__(1024, 1) __global__ void lookupHiddenStateOfLastToken(T*         from_tensor,
-                                                                        const T*   hidden_state,
-                                                                        const int* input_lengths,
-                                                                        const int  max_input_length,
-                                                                        const int  batch_size,
-                                                                        const int  hidden_units)
-{
-    for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < batch_size * hidden_units;
-         index += blockDim.x * gridDim.x) {
-        const int col_index = index % hidden_units;
-        const int batch_id  = index / hidden_units;
-        from_tensor[index]  = hidden_state[batch_id * max_input_length * hidden_units
-                                          + (input_lengths[batch_id] - 1) * hidden_units + col_index];
-    }
-}
-
-template<typename T>
-void invokeLookupHiddenStateOfLastToken(T*           from_tensor,
-                                        const T*     hidden_state,
-                                        const int*   input_lengths,
-                                        const int    max_input_length,
-                                        const int    batch_size,
-                                        const int    hidden_units,
-                                        cudaStream_t stream)
-{
-    const int grid_size = (int)(ceil(batch_size * hidden_units / 1024.));
-    dim3      grid(min(grid_size, 65536));
-    dim3      block(min(hidden_units, 1024));
-    lookupHiddenStateOfLastToken<T><<<grid, block, 0, stream>>>(
-        from_tensor, hidden_state, input_lengths, max_input_length, batch_size, hidden_units);
-}
-
-template void invokeLookupHiddenStateOfLastToken(float*       from_tensor,
-                                                 const float* hidden_state,
-                                                 const int*   input_lengths,
-                                                 const int    max_input_length,
-                                                 const int    batch_size,
-                                                 const int    hidden_units,
-                                                 cudaStream_t stream);
-
-template void invokeLookupHiddenStateOfLastToken(half*        from_tensor,
-                                                 const half*  hidden_state,
-                                                 const int*   input_lengths,
-                                                 const int    max_input_length,
-                                                 const int    batch_size,
-                                                 const int    hidden_units,
-                                                 cudaStream_t stream);
-
-#ifdef ENABLE_BF16
-template void invokeLookupHiddenStateOfLastToken(__nv_bfloat16*       from_tensor,
-                                                 const __nv_bfloat16* hidden_state,
-                                                 const int*           input_lengths,
-                                                 const int            max_input_length,
-                                                 const int            batch_size,
-                                                 const int            hidden_units,
-                                                 cudaStream_t         stream);
-#endif
-
-template<bool PREFIX_PROMPT>
-__global__ void tileGptPromptInputs(int*       tiled_input_ids,
-                                    int*       tiled_input_lengths,
-                                    int*       tiled_prompt_lengths,
-                                    const int* input_ids,
-                                    const int* input_lengths,
-                                    const int* prefix_prompt_lengths,
-                                    const int  max_input_length)
-{
-    if (threadIdx.x == 0) {
-        tiled_input_lengths[blockIdx.x * gridDim.y + blockIdx.y] = input_lengths[blockIdx.x];
-        if (PREFIX_PROMPT) {
-            tiled_prompt_lengths[blockIdx.x * gridDim.y + blockIdx.y] = prefix_prompt_lengths[blockIdx.x];
-        }
-    }
-    for (int index = threadIdx.x; index < max_input_length; index += blockDim.x) {
-        tiled_input_ids[(blockIdx.x * gridDim.y + blockIdx.y) * max_input_length + index] =
-            input_ids[blockIdx.x * max_input_length + index];
-    }
-}
-
-void invokeTileGptPromptInputs(int*         tiled_input_ids,
-                               int*         tiled_input_lengths,
-                               int*         tiled_prompt_lengths,
-                               const int*   input_ids,
-                               const int*   input_lengths,
-                               const int*   prefix_prompt_lengths,
-                               const int    batch_size,
-                               const int    beam_width,
-                               const int    max_input_length,
-                               cudaStream_t stream)
-{
-    dim3 grid(batch_size, beam_width);
-    dim3 block(min(1024, max_input_length));
-    if (prefix_prompt_lengths != nullptr) {
-        tileGptPromptInputs<true><<<grid, block, 0, stream>>>(tiled_input_ids,
-                                                              tiled_input_lengths,
-                                                              tiled_prompt_lengths,
-                                                              input_ids,
-                                                              input_lengths,
-                                                              prefix_prompt_lengths,
-                                                              max_input_length);
-    }
-    else {
-        tileGptPromptInputs<false><<<grid, block, 0, stream>>>(tiled_input_ids,
-                                                               tiled_input_lengths,
-                                                               tiled_prompt_lengths,
-                                                               input_ids,
-                                                               input_lengths,
-                                                               prefix_prompt_lengths,
-                                                               max_input_length);
-    }
-}
-
-void invokeTileGptInputs(int*         tiled_input_ids,
-                         int*         tiled_input_lengths,
-                         const int*   input_ids,
-                         const int*   input_lengths,
-                         const int    batch_size,
-                         const int    beam_width,
-                         const int    max_input_length,
-                         cudaStream_t stream)
-{
-    invokeTileGptPromptInputs(tiled_input_ids,
-                              tiled_input_lengths,
-                              nullptr,
-                              input_ids,
-                              input_lengths,
-                              nullptr,
-                              batch_size,
-                              beam_width,
-                              max_input_length,
-                              stream);
-}
-
-void setSeqLimitLen(uint32_t* seq_len_d, Tensor seq_len, int limit_len_offset, int batch_size)
-{
-    std::vector<uint32_t> seq_len_h(batch_size);
-    for (int i = 0; i < batch_size; i++) {
-        seq_len_h[i] = seq_len.getPtr<uint32_t>()[i] + limit_len_offset;
-    }
-    cudaH2Dcpy(seq_len_d, seq_len_h.data(), batch_size);
-}
-
-template<int TB_SIZE>
-__global__ void
-find_context_dups(int* shared_contexts, const int* input_ids, const size_t batch_size, const size_t input_seq_len)
-{
-    /* We compare all context pairs (i, j), with i (tgt) < j (src) , to detect duplicate
-     * inputs. If there's a match between i and j, we store i at the
-     * j-th position of shared_context. So that we know that j can be
-     * represented by i. shared_contexts is initialized like shared_contexts[i] = i
-     * and when there's a match, we actually use shared_contexts[j] = min(shared_contexts[j], i)
-     * so that in the end, shared_contexts effectively contains an index
-     * to the match with the lowest index context.
-     * Note that shared_contexts[i] <= i, a property that will be used when uncompacting
-     * inputs.
-     */
-    typedef cub::BlockReduce<int, TB_SIZE>       BlockReduce;
-    __shared__ typename BlockReduce::TempStorage temp_storage;
-    __shared__ bool                              match;
-
-    /* Each block is responsible for a (i, j) pair. To map the block space to
-     * the i < j space, we need to convert a linear addressing to a triangle, of
-     * size (batch_size * (batch_size - 1)) / 2
-     * For more information, check https://en.wikipedia.org/wiki/Triangular_number
-     */
-
-    // blockIdx = [0, 1, 2, ... n(n-1)/2] -> base_index = [0, 1, 1, 2, 2, 2, 3, 3, 3, 3, ..., n - 2]
-    const int base_index = floorf(0.5f * (sqrtf(1 + 8 * blockIdx.x) - 1));
-    const int src_idx    = base_index + 1;  // base_index \in [1, batch_size)
-
-    const int rev_base_index = base_index * (base_index + 1) / 2;
-    const int tgt_idx        = blockIdx.x - rev_base_index;  // tgt_idx \in [0, src_idx)
-
-    const int padded_length = TB_SIZE * ((input_seq_len + TB_SIZE - 1) / TB_SIZE);
-
-    int sum = 0;
-    for (int i = threadIdx.x; i < padded_length; i += TB_SIZE) {
-        int compare =
-            (i >= input_seq_len) ? 1 : input_ids[tgt_idx * input_seq_len + i] == input_ids[src_idx * input_seq_len + i];
-
-        sum = BlockReduce(temp_storage).Sum(compare);
-
-        if (threadIdx.x == 0) {
-            match = (sum == TB_SIZE);
-        }
-
-        __syncthreads();
-
-        if (!match) {
-            break;
-        }
-    }
-
-    if (threadIdx.x == 0 && match) {
-        atomicMin(&shared_contexts[src_idx], tgt_idx);
-    }
-}
-
-constexpr int DUPS_INDICES_BLOCK_SIZE = 128;
-
-__global__ void generate_dups_indices(int*         batch_to_compact,
-                                      int*         compact_to_batch,
-                                      int*         compact_size,
-                                      const int*   shared_contexts,
-                                      const size_t batch_size,
-                                      const size_t input_seq_len)
-{
-    const int padded_batchsize = blockDim.x * ((batch_size + blockDim.x - 1) / blockDim.x);
-
-    typedef cub::BlockScan<int, DUPS_INDICES_BLOCK_SIZE, cub::BLOCK_SCAN_WARP_SCANS> BlockScan;
-    __shared__ typename BlockScan::TempStorage                                       temp_storage;
-    __shared__ int                                                                   scan_offset;
-
-    int scan = 0;
-    for (int batch = threadIdx.x; batch < padded_batchsize; batch += blockDim.x) {
-        bool masked     = (batch >= batch_size);
-        bool first_iter = batch < blockDim.x;
-
-        int is_first_occur = masked ? 0 : shared_contexts[batch] == batch;
-        BlockScan(temp_storage).ExclusiveSum(is_first_occur, scan);
-
-        if (!masked && is_first_occur) {
-            int compact_idx = scan + (first_iter ? 0 : scan_offset);
-            // Context rep. writes initial index
-            batch_to_compact[batch]       = compact_idx;
-            compact_to_batch[compact_idx] = batch;
-        }
-
-        if (threadIdx.x == blockDim.x - 1) {
-            scan_offset = scan + is_first_occur + (first_iter ? 0 : scan_offset);
-        }
-
-        __syncthreads();
-
-        if (!masked && !is_first_occur) {
-            // Fill the rest of batch_to_compact based on what rep. wrote
-            const int src_idx       = batch_to_compact[shared_contexts[batch]];
-            batch_to_compact[batch] = src_idx;
-        }
-    }
-
-    if (threadIdx.x == 0) {
-        *compact_size = scan_offset;
-    }
-}
-
-__global__ void init_shared_contexts(int* shared_contexts, const size_t batch_size)
-{
-    const int global_idx = blockIdx.x * blockDim.x + threadIdx.x;
-    if (global_idx >= batch_size) {
-        return;
-    }
-    shared_contexts[global_idx] = global_idx;
-}
-
-void invokeFindContextDups(int*         shared_contexts,
-                           int*         batch_to_compact,
-                           int*         compact_to_batch,
-                           int*         compact_size,
-                           const int*   input_ids,
-                           const size_t batch_size,
-                           const size_t input_seq_len,
-                           cudaStream_t stream)
-{
-    dim3 block{512};
-    dim3 grid{((int)batch_size + block.x - 1) / block.x};
-    init_shared_contexts<<<grid, block, 0, stream>>>(shared_contexts, batch_size);
-
-    grid = dim3{(unsigned int)(batch_size * (batch_size - 1)) / 2};
-    if (input_seq_len <= 128) {
-        block = 128;
-        find_context_dups<128><<<grid, block, 0, stream>>>(shared_contexts, input_ids, batch_size, input_seq_len);
-    }
-    else {
-        block = 256;
-        find_context_dups<256><<<grid, block, 0, stream>>>(shared_contexts, input_ids, batch_size, input_seq_len);
-    }
-
-    generate_dups_indices<<<1, DUPS_INDICES_BLOCK_SIZE, 0, stream>>>(
-        batch_to_compact, compact_to_batch, compact_size, shared_contexts, batch_size, input_seq_len);
-}
-
-template<typename T>
-__global__ void compact_inputs(T*         compact_input,
-                               T*         compact_attention_mask,
-                               int*       compact_input_lengths,
-                               const T*   decoder_input,
-                               const T*   decoder_mask,
-                               const int* input_lengths,
-                               const int* compact_idx,
-                               size_t     compact_size,
-                               size_t     seq_len,
-                               size_t     hidden_dimension)
-{
-    const int global_idx = blockIdx.x * blockDim.x + threadIdx.x;
-
-    if (global_idx < compact_size * seq_len * hidden_dimension) {
-        const int h_id     = global_idx % hidden_dimension;
-        const int seq_id   = (global_idx / hidden_dimension) % seq_len;
-        const int batch_id = global_idx / (hidden_dimension * seq_len);
-
-        compact_input[global_idx] = decoder_input[(compact_idx[batch_id] * seq_len + seq_id) * hidden_dimension + h_id];
-    }
-
-    if (global_idx < compact_size * seq_len * seq_len) {
-        const int seq1_id  = global_idx % seq_len;
-        const int seq2_id  = (global_idx / seq_len) % seq_len;
-        const int batch_id = global_idx / (seq_len * seq_len);
-
-        compact_attention_mask[global_idx] =
-            decoder_mask[(compact_idx[batch_id] * seq_len + seq2_id) * seq_len + seq1_id];
-    }
-
-    if (global_idx < compact_size) {
-        compact_input_lengths[global_idx] = input_lengths[compact_idx[global_idx]];
-    }
-}
-
-template<typename T>
-void invokeCompactInputs(T*           compact_input,
-                         T*           compact_attention_mask,
-                         int*         compact_input_lengths,
-                         const T*     decoder_input,
-                         const T*     decoder_mask,
-                         const int*   input_lengths,
-                         const int*   compact_idx,
-                         size_t       compact_size,
-                         size_t       seq_len,
-                         size_t       hidden_dimension,
-                         cudaStream_t stream)
-{
-    /* Compact relevant decoder_layer inputs based on the identical contexts.
-     * For example, decoder_input is [batch_size, seq_len, H]. It's compacted
-     * into compact_input [compact_size, seq_len, H] such that
-     * compact_input[i, ...] = decoder_input[compact_idx[i], ...] */
-    const size_t elems_n = compact_size * seq_len * max(hidden_dimension, seq_len);
-    const dim3   blockDim(512);
-    const dim3   gridDim((elems_n + 512 - 1) / 512);
-
-    compact_inputs<T><<<gridDim, blockDim, 0, stream>>>(compact_input,
-                                                        compact_attention_mask,
-                                                        compact_input_lengths,
-                                                        decoder_input,
-                                                        decoder_mask,
-                                                        input_lengths,
-                                                        compact_idx,
-                                                        compact_size,
-                                                        seq_len,
-                                                        hidden_dimension);
-}
-
-#define INSTANTIATE_INVOKE_COMPACT_INPUTS(T)                                                                           \
-    template void invokeCompactInputs<T>(T * compact_input,                                                            \
-                                         T * compact_attention_mask,                                                   \
-                                         int*         compact_input_lengths,                                           \
-                                         const T*     decoder_input,                                                   \
-                                         const T*     decoder_mask,                                                    \
-                                         const int*   input_lengths,                                                   \
-                                         const int*   compact_idx,                                                     \
-                                         size_t       compact_size,                                                    \
-                                         size_t       seq_len,                                                         \
-                                         size_t       hidden_dimension,                                                \
-                                         cudaStream_t stream)
-INSTANTIATE_INVOKE_COMPACT_INPUTS(half);
-INSTANTIATE_INVOKE_COMPACT_INPUTS(float);
-#ifdef ENABLE_BF16
-INSTANTIATE_INVOKE_COMPACT_INPUTS(__nv_bfloat16);
-#endif
-#undef INSTANTIATE_INVOKE_COMPACT_INPUTS
-
-template<typename T>
-__global__ void uncompact_outputs(T*         uncompact_buffer,
-                                  const T*   compact_buffer,
-                                  const int* batch_to_compact_idx,
-                                  size_t     batch_size,
-                                  size_t     buffer_stride)
-{
-    /* Uncompact a buffer IN of size [Compact, Stride] into OUT of size [Batch, Stride]
-     * so that \forall i, OUT[i, :] = IN[batch_to_compact_idx[i], :]
-     */
-    const int global_idx = blockIdx.x * blockDim.x + threadIdx.x;
-
-    if (global_idx >= batch_size * buffer_stride) {
-        return;
-    }
-
-    const int stride_idx = global_idx % buffer_stride;
-    const int batch_idx  = global_idx / buffer_stride;
-
-    const int src                = batch_to_compact_idx[batch_idx];
-    uncompact_buffer[global_idx] = compact_buffer[src * buffer_stride + stride_idx];
-}
-
-template<typename T>
-void invokeUnCompactOutputs(T*           uncompact_buffer,
-                            const T*     compact_buffer,
-                            const int*   batch_to_compact_idx,
-                            size_t       batch_size,
-                            size_t       buffer_stride,
-                            cudaStream_t stream)
-{
-    const size_t num_elems = batch_size * buffer_stride;
-    const dim3   blockDim(1024);
-    const dim3   gridDim((num_elems + blockDim.x - 1) / blockDim.x);
-
-    uncompact_outputs<T><<<gridDim, blockDim, 0, stream>>>(
-        uncompact_buffer, compact_buffer, batch_to_compact_idx, batch_size, buffer_stride);
-}
-
-#define INSTANTIATE_INVOKE_UNCOMPACT_OUTPUTS(T)                                                                        \
-    template void invokeUnCompactOutputs(T*           uncompact_buffer,                                                \
-                                         const T*     compact_buffer,                                                  \
-                                         const int*   batch_to_compact_idx,                                            \
-                                         size_t       batch_size,                                                      \
-                                         size_t       buffer_stride,                                                   \
-                                         cudaStream_t stream)
-INSTANTIATE_INVOKE_UNCOMPACT_OUTPUTS(half);
-INSTANTIATE_INVOKE_UNCOMPACT_OUTPUTS(float);
-#ifdef ENABLE_BF16
-INSTANTIATE_INVOKE_UNCOMPACT_OUTPUTS(__nv_bfloat16);
-#endif
-#undef INSTANTIATE_INVOKE_UNCOMPACT_OUTPUTS
-
-template<typename T>
-__global__ void uncompact_caches(T*         uncompact_k_cache,
-                                 T*         uncompact_v_cache,
-                                 const T*   compact_k_cache,
-                                 const T*   compact_v_cache,
-                                 const int* batch_to_compact_idx,
-                                 size_t     batch_size,
-                                 size_t     num_heads,
-                                 size_t     max_seq_len,
-                                 size_t     seq_len,
-                                 size_t     size_per_head,
-                                 size_t     local_batch_size,
-                                 size_t     ite)
-{
-    const int hidden_dimension    = num_heads * size_per_head;
-    const int num_elems_per_batch = seq_len * hidden_dimension;
-    const int num_elems_cache     = batch_size * num_elems_per_batch;
-    const int x_size              = 16 / sizeof(T);
-
-    for (int global_idx = blockIdx.x * blockDim.x + threadIdx.x; global_idx < 2 * num_elems_cache;
-         global_idx += blockDim.x * gridDim.x) {
-
-        const bool     handle_k  = global_idx < num_elems_cache;
-        const T* const cache_src = handle_k ? compact_k_cache : compact_v_cache;
-        T* const       cache_dst = handle_k ? uncompact_k_cache : uncompact_v_cache;
-        const int      idx       = handle_k ? global_idx : global_idx - num_elems_cache;
-
-        const int src_offset = idx % num_elems_per_batch;
-        const int batch_idx  = idx / num_elems_per_batch;
-        const int batch_src  = batch_to_compact_idx[batch_idx] - ite * local_batch_size;
-
-        if (batch_src < 0 || batch_src >= local_batch_size) {
-            continue;
-        }
-
-        int dst_offset;
-        if (handle_k) {
-            const int i0 = idx % (x_size * seq_len);
-            const int i1 = (idx / (x_size * seq_len)) % (num_heads * size_per_head / x_size);
-            dst_offset   = i1 * max_seq_len * x_size + i0;
-        }
-        else {
-            const int i0 = idx % (size_per_head * seq_len);
-            const int i1 = (idx / (size_per_head * seq_len)) % (num_heads);
-            dst_offset   = i1 * max_seq_len * size_per_head + i0;
-        }
-
-        cache_dst[batch_idx * max_seq_len * hidden_dimension + dst_offset] =
-            cache_src[batch_src * num_elems_per_batch + src_offset];
-    }
-}
-
-template<typename T>
-void invokeUnCompactCaches(T*           uncompact_k_cache,
-                           T*           uncompact_v_cache,
-                           const T*     compact_k_cache,
-                           const T*     compact_v_cache,
-                           const int*   batch_to_compact_idx,
-                           size_t       batch_size,
-                           size_t       num_heads,
-                           size_t       max_seq_len,
-                           size_t       seq_len,
-                           size_t       size_per_head,
-                           size_t       local_batch_size,
-                           size_t       ite,
-                           cudaStream_t stream)
-{
-    const dim3 blockDim(512);
-    const dim3 gridDim(1024);
-    uncompact_caches<T><<<gridDim, blockDim, 0, stream>>>(uncompact_k_cache,
-                                                          uncompact_v_cache,
-                                                          compact_k_cache,
-                                                          compact_v_cache,
-                                                          batch_to_compact_idx,
-                                                          batch_size,
-                                                          num_heads,
-                                                          max_seq_len,
-                                                          seq_len,
-                                                          size_per_head,
-                                                          local_batch_size,
-                                                          ite);
-}
-
-#define INSTANTIATE_INVOKE_UNCOMPACT_CACHES(T)                                                                         \
-    template void invokeUnCompactCaches(T*           uncompact_k_cache,                                                \
-                                        T*           uncompact_v_cache,                                                \
-                                        const T*     compact_k_cache,                                                  \
-                                        const T*     compact_v_cache,                                                  \
-                                        const int*   batch_to_compact_idx,                                             \
-                                        size_t       batch_size,                                                       \
-                                        size_t       num_heads,                                                        \
-                                        size_t       max_seq_len,                                                      \
-                                        size_t       seq_len,                                                          \
-                                        size_t       size_per_head,                                                    \
-                                        size_t       local_batch_size,                                                 \
-                                        size_t       ite,                                                              \
-                                        cudaStream_t stream)
-INSTANTIATE_INVOKE_UNCOMPACT_CACHES(half);
-INSTANTIATE_INVOKE_UNCOMPACT_CACHES(float);
-#ifdef ENABLE_BF16
-INSTANTIATE_INVOKE_UNCOMPACT_CACHES(__nv_bfloat16);
-#endif
-#undef INSTANTIATE_INVOKE_UNCOMPACT_CACHES
-
-template<bool PREFIX_PROMPT>
-__global__ void update_padding_count(int*       total_padding_count,
-                                     const int* input_lengths,
-                                     const int* tiled_prompt_lengths,
-                                     size_t     max_input_length,
-                                     size_t     max_prompt_length,
-                                     size_t     batch_size,
-                                     size_t     beam_width)
-{
-    const int gidx = blockIdx.x * blockDim.x + threadIdx.x;
-
-    if (gidx >= batch_size * beam_width) {
-        return;
-    }
-
-    const int batch_idx = gidx / beam_width;
-
-    total_padding_count[gidx] +=
-        PREFIX_PROMPT ? (max_input_length + max_prompt_length - input_lengths[batch_idx] - tiled_prompt_lengths[gidx]) :
-                        (max_input_length - input_lengths[batch_idx]);
-}
-
-void invokeUpdatePaddingCount(int*         total_padding_count,
-                              const int*   input_lengths,
-                              const int*   tiled_prompt_lengths,
-                              size_t       max_input_length,
-                              size_t       max_prompt_length,
-                              size_t       batch_size,
-                              size_t       beam_width,
-                              cudaStream_t stream)
-{
-    dim3 blockSize(256);
-    dim3 gridSize((batch_size * beam_width + blockSize.x - 1) / blockSize.x);
-
-    if (tiled_prompt_lengths != nullptr) {
-        update_padding_count<true><<<gridSize, blockSize, 0, stream>>>(total_padding_count,
-                                                                       input_lengths,
-                                                                       tiled_prompt_lengths,
-                                                                       max_input_length,
-                                                                       max_prompt_length,
-                                                                       batch_size,
-                                                                       beam_width);
-    }
-    else {
-        update_padding_count<false><<<gridSize, blockSize, 0, stream>>>(total_padding_count,
-                                                                        input_lengths,
-                                                                        tiled_prompt_lengths,
-                                                                        max_input_length,
-                                                                        max_prompt_length,
-                                                                        batch_size,
-                                                                        beam_width);
-    }
-}
-
-template<bool PREFIX_PROMPT>
-__global__ void mask_padding_tokens(bool*        masked_tokens,
-                                    const int*   input_lengths,
-                                    const int*   tiled_prefix_prompt_lengths,
-                                    const size_t memory_len,
-                                    const size_t max_input_length,
-                                    const size_t initial_step,
-                                    size_t       beam_width)
-{
-    const int seq_len = PREFIX_PROMPT ?
-                            (input_lengths[blockIdx.x / beam_width] + tiled_prefix_prompt_lengths[blockIdx.x]) :
-                            input_lengths[blockIdx.x / beam_width];
-    for (int step = initial_step + seq_len + threadIdx.x; step < initial_step + max_input_length; step += blockDim.x) {
-        masked_tokens[blockIdx.x * memory_len + step % memory_len] = true;
-    }
-}
-
-void invokeMaskPaddingTokens(bool*        masked_tokens,
-                             const int*   input_lengths,
-                             const int*   tiled_prefix_prompt_lengths,
-                             const size_t memory_len,
-                             const size_t max_input_length,
-                             const size_t initial_step,
-                             size_t       batch_size,
-                             size_t       beam_width,
-                             cudaStream_t stream)
-{
-    dim3 blockSize(128);
-    dim3 gridSize(batch_size * beam_width);
-    if (tiled_prefix_prompt_lengths != nullptr) {
-        mask_padding_tokens<true><<<gridSize, blockSize, 0, stream>>>(masked_tokens,
-                                                                      input_lengths,
-                                                                      tiled_prefix_prompt_lengths,
-                                                                      memory_len,
-                                                                      max_input_length,
-                                                                      initial_step,
-                                                                      beam_width);
-    }
-    else {
-        mask_padding_tokens<false><<<gridSize, blockSize, 0, stream>>>(masked_tokens,
-                                                                       input_lengths,
-                                                                       tiled_prefix_prompt_lengths,
-                                                                       memory_len,
-                                                                       max_input_length,
-                                                                       initial_step,
-                                                                       beam_width);
-    }
-}
-
-template<typename T>
-__global__ void sum_length_dimension(
-    float* out_buf, const T* in_buf, const size_t batch_size, const size_t input_length, const size_t hidden_dim)
-{
-    const int bidx = blockIdx.x;
-
-    for (int hidx = threadIdx.x; hidx < hidden_dim; hidx += blockDim.x) {
-        float accum = 0.0f;
-        for (int step = 0; step < input_length; step++) {
-            accum += static_cast<float>(in_buf[(bidx * input_length + step) * hidden_dim + hidx]);
-        }
-        out_buf[bidx * hidden_dim + hidx] = accum;
-    }
-}
-
-template<typename T>
-void invokeSumLengthDimension(float*       out_buf,
-                              const T*     in_buf,
-                              const size_t batch_size,
-                              const size_t input_length,
-                              const size_t hidden_dim,
-                              cudaStream_t stream)
-{
-    dim3 gridSize(batch_size);
-    dim3 blockSize(256);
-
-    sum_length_dimension<<<gridSize, blockSize, 0, stream>>>(out_buf, in_buf, batch_size, input_length, hidden_dim);
-}
-
-#define INSTANTIATE_INVOKE_SUM_LENGTH_DIMENSION(T)                                                                     \
-    template void invokeSumLengthDimension(float*       out_buf,                                                       \
-                                           const T*     in_buf,                                                        \
-                                           const size_t batch_size,                                                    \
-                                           const size_t input_length,                                                  \
-                                           const size_t hidden_dim,                                                    \
-                                           cudaStream_t stream)
-INSTANTIATE_INVOKE_SUM_LENGTH_DIMENSION(half);
-INSTANTIATE_INVOKE_SUM_LENGTH_DIMENSION(float);
-#ifdef ENABLE_BF16
-INSTANTIATE_INVOKE_SUM_LENGTH_DIMENSION(__nv_bfloat16);
-#endif
-#undef INSTANTIATE_INVOKE_SUM_LENGTH_DIMENSION
-
-}  // namespace turbomind
diff --git a/src/turbomind/kernels/gpt_kernels.h b/src/turbomind/kernels/gpt_kernels.h
deleted file mode 100644
index 4e1dc49be895ab3e28aca9e3c643a5da74dd82b8..0000000000000000000000000000000000000000
--- a/src/turbomind/kernels/gpt_kernels.h
+++ /dev/null
@@ -1,241 +0,0 @@
-/*
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include <cuda_fp16.h>
-#include <cuda_runtime.h>
-#include <unordered_map>
-
-#include "src/turbomind/utils/Tensor.h"
-#include "src/turbomind/utils/memory_utils.h"
-
-namespace turbomind {
-
-template<typename T>
-struct inputIdsEmbeddingLookupPosEncodingSoftPromptParam {
-    T*           from_tensor;
-    int*         output_ids;
-    int*         input_lengths;
-    const T*     embedding_table;
-    const T*     pos_table;
-    const float* prefix_soft_prompt_embedding;
-    const int*   prefix_soft_prompt_lengths;
-    int*         input_ids;
-    int          start_step;
-    int          max_input_length;
-    int          max_prefix_soft_prompt_length;
-    int          batch_size;
-    int          beam_width;
-    int          hidden_units;
-    cudaStream_t stream;
-};
-
-template<typename T>
-struct pPromptTuningParam {
-    // Batch number of ptrs, each ptr is the ptr of the specific p/prompt tuning weights for this sequence
-    const T** p_prompt_tuning_batch_weights = nullptr;
-    // The start id of p_prompt_tuning token ids (based on the tokenizer)
-    // PROMPT_0 --> p_prompt_tuning_id_start; PROMPT_1 --> p_prompt_tuning_id_start + 1; ...
-    const int p_prompt_tuning_id_start = 0;
-    // Request prompt embeddding's max length
-    const int request_prompt_max_length = 0;
-    // Whether or not use the request prompt embeddings
-    const bool use_request_p_prompt_embedding = false;
-    // Request prompt embeddings
-    const T* request_prompt_embedding = nullptr;
-};
-
-template<typename T>
-void invokeInputIdsEmbeddingLookupPosEncoding(T*                    from_tensor,
-                                              int*                  output_ids,
-                                              const T*              embedding_table,
-                                              const T*              pos_table,
-                                              pPromptTuningParam<T> prompt_param,
-                                              const int*            input_ids,
-                                              const int             start_step,
-                                              const int             length,
-                                              const int             max_length,
-                                              const int             batch_size,
-                                              const int             hidden_units,
-                                              cudaStream_t          stream);
-
-template<typename T>
-void invokeInputIdsEmbeddingLookupPosEncodingSoftPrompt(inputIdsEmbeddingLookupPosEncodingSoftPromptParam<T> param);
-
-template<typename T>
-void invokeTransposeAxis01(T* out, T* in, const int dim0, const int dim1, const int dim2, cudaStream_t stream);
-
-template<typename T>
-void invokeTransposeAxis01(
-    T* out, T* in, const int* in_skipping_dim1, const int dim0, const int dim1, cudaStream_t stream);
-
-template<typename T>
-void invokeBuildDecoderAttentionMask(T*           attention_mask,
-                                     const int*   sequence_lengths,
-                                     const int*   prefix_prompt_lengths,
-                                     const int    batch_size,
-                                     const int    max_seq_len,
-                                     const int    max_prompt_length,
-                                     cudaStream_t stream);
-
-template<typename T>
-void invokeLookupHiddenStateOfLastToken(T*           from_tensor,
-                                        const T*     hidden_state,
-                                        const int*   input_lengths,
-                                        const int    max_input_length,
-                                        const int    batch_size,
-                                        const int    hidden_units,
-                                        cudaStream_t stream);
-
-void invokeTileGptPromptInputs(int*         tiled_input_ids,
-                               int*         tiled_input_lengths,
-                               int*         tiled_prompt_lengths,
-                               const int*   input_ids,
-                               const int*   input_lengths,
-                               const int*   prefix_prompt_lengths,
-                               const int    batch_size,
-                               const int    beam_width,
-                               const int    max_input_length,
-                               cudaStream_t stream);
-
-void invokeTileGptInputs(int*         tiled_input_ids,
-                         int*         tiled_input_lengths,
-                         const int*   input_ids,
-                         const int*   input_lengths,
-                         const int    batch_size,
-                         const int    beam_width,
-                         const int    max_input_length,
-                         cudaStream_t stream);
-
-void invokeFindContextDups(int*         shared_contexts,
-                           int*         batch_to_compact,
-                           int*         compact_to_batch,
-                           int*         compact_size,
-                           const int*   input_ids,
-                           const size_t batch_size,
-                           const size_t input_seq_len,
-                           cudaStream_t stream = 0);
-
-template<typename T>
-void handleOptArg(TensorMap* input_tensors, const std::string& arg_name, T* d_ptr, T default_value, size_t size)
-{
-    if (input_tensors->isExist(arg_name)) {
-        FT_CHECK(input_tensors->at(arg_name).size() == size);
-        cudaH2Dcpy(d_ptr, input_tensors->at(arg_name).getPtr<const T>(), size);
-    }
-    else {
-        deviceFill(d_ptr, size, default_value);
-    }
-}
-
-void setSeqLimitLen(uint32_t* seq_len_d, Tensor seq_len, int limit_len_offset, int batch_size);
-
-template<typename T>
-void invokeCompactInputs(T*           compact_input,
-                         T*           compact_attention_mask,
-                         int*         compact_input_lengths,
-                         const T*     decoder_input,
-                         const T*     decoder_mask,
-                         const int*   input_lengths,
-                         const int*   compact_idx,
-                         size_t       compact_size,
-                         size_t       seq_len,
-                         size_t       hidden_dimension,
-                         cudaStream_t stream = 0);
-
-template<typename T>
-void invokeUnCompactOutputs(T*           uncompact_buffer,
-                            const T*     compact_buffer,
-                            const int*   batch_to_compact_idx,
-                            size_t       batch_size,
-                            size_t       buffer_stride,
-                            cudaStream_t stream = 0);
-
-template<typename T>
-void invokeUnCompactCaches(T*           uncompact_k_cache,
-                           T*           uncompact_v_cache,
-                           const T*     compact_k_cache,
-                           const T*     compact_v_cache,
-                           const int*   batch_to_compact_idx,
-                           size_t       batch_size,
-                           size_t       num_heads,
-                           size_t       max_seq_len,
-                           size_t       seq_len,
-                           size_t       size_per_head,
-                           size_t       local_batch_size,
-                           size_t       ite,
-                           cudaStream_t stream = 0);
-
-void invokeUpdatePaddingCount(int*         total_padding_count,
-                              const int*   input_lengths,
-                              const int*   tiled_prompt_lengths,
-                              size_t       max_input_length,
-                              size_t       max_prompt_length,
-                              size_t       batch_size,
-                              size_t       beam_width,
-                              cudaStream_t stream = 0);
-
-inline void invokeUpdatePaddingCount(int*         total_padding_count,
-                                     const int*   input_lengths,
-                                     size_t       max_input_length,
-                                     size_t       batch_size,
-                                     size_t       beam_width,
-                                     cudaStream_t stream = 0)
-{
-    invokeUpdatePaddingCount(
-        total_padding_count, input_lengths, (const int*)nullptr, max_input_length, 0, batch_size, beam_width, stream);
-}
-
-void invokeMaskPaddingTokens(bool*        masked_tokens,
-                             const int*   input_lengths,
-                             const int*   tiled_prefix_prompt_lengths,
-                             const size_t memory_len,
-                             const size_t max_input_length,
-                             const size_t initial_step,
-                             size_t       batch_size,
-                             size_t       beam_width,
-                             cudaStream_t stream = 0);
-
-inline void invokeMaskPaddingTokens(bool*        masked_tokens,
-                                    const int*   input_lengths,
-                                    const size_t memory_len,
-                                    const size_t max_input_length,
-                                    const size_t initial_step,
-                                    size_t       batch_size,
-                                    size_t       beam_width,
-                                    cudaStream_t stream = 0)
-{
-    invokeMaskPaddingTokens(masked_tokens,
-                            input_lengths,
-                            (const int*)nullptr,
-                            memory_len,
-                            max_input_length,
-                            initial_step,
-                            batch_size,
-                            beam_width,
-                            stream);
-}
-
-template<typename T>
-void invokeSumLengthDimension(float*       out_buf,
-                              const T*     in_buf,
-                              const size_t batch_size,
-                              const size_t input_length,
-                              const size_t hidden_dim,
-                              cudaStream_t stream = 0);
-
-}  // namespace turbomind
diff --git a/src/turbomind/kernels/logprob_kernels.cu b/src/turbomind/kernels/logprob_kernels.cu
deleted file mode 100644
index c14dcc1fd9e7d4ac7b42e17d4b3b0f77d3b14706..0000000000000000000000000000000000000000
--- a/src/turbomind/kernels/logprob_kernels.cu
+++ /dev/null
@@ -1,212 +0,0 @@
-/*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <assert.h>
-#include <cuda_fp16.h>
-#include <cuda_runtime.h>
-
-#ifndef CUDART_VERSION
-#error CUDART_VERSION Undefined!
-#elif (CUDART_VERSION >= 11000)
-#include <cub/cub.cuh>
-#else
-// #include "3rdparty/cub/cub.cuh"
-#include <cub/cub.cuh>
-#endif
-
-#include "src/turbomind/kernels/logprob_kernels.h"
-#include "src/turbomind/kernels/reduce_kernel_utils.cuh"
-#include "src/turbomind/macro.h"
-#include "src/turbomind/utils/logger.h"
-
-namespace turbomind {
-
-template<typename T>
-__global__ void log_probs_kernel(float*       log_probs,
-                                 const T*     logits,
-                                 const int*   ids,
-                                 const int*   lengths,
-                                 const size_t max_input_length,
-                                 const size_t batch_size,
-                                 const size_t vocab_size,
-                                 const size_t vocab_size_padded,
-                                 bool         batch_first)
-{
-    // Calculate the log probability from logits.
-    //   log_probs[t, :] = log(softmax(logits))[ids[t + 1, :]]
-    //
-    // log_probs: [max_length - 1, batch_size] or [batch_size, max_length -1],
-    //     log probabilities of each token.
-    // logits: [max_length, batch_size, vocab_size_padded] or [batch_size, max_length, vocab_size_padded]
-    // lengths: [batch_size], sequence lengths
-    // ids: [max_length, batch_size], token ids.
-    // batch_size: [1], batch_size. in case of beam > 1, batch x beam.
-    // vocab_size: [1], vocab_size,
-    // vocab_size: [1], vocab_size_padded, padded vocab size.
-
-    const bool IS_FP16   = std::is_same<T, half>::value;
-    const T    MAX_T_VAL = (IS_FP16) ? HALF_FLT_MAX : FLT_MAX;
-
-    int tidx = threadIdx.x;                            // vocab dim
-    int bidx = batch_first ? blockIdx.x : blockIdx.y;  // batch dim
-    int step = batch_first ? blockIdx.y : blockIdx.x;  // step dim
-
-    __shared__ float s_max_logit;
-
-    if (bidx < batch_size && step < lengths[bidx] - 1) {
-        // reposition logits to data for the current batch.
-        int step_offset  = batch_first ? step * vocab_size_padded : step * batch_size * vocab_size_padded;
-        int batch_offset = batch_first ? bidx * max_input_length * vocab_size_padded : bidx * vocab_size_padded;
-        logits += step_offset + batch_offset;
-
-        // Find max(logits).
-        float local_max = -MAX_T_VAL;
-        float val       = -MAX_T_VAL;
-        for (int i = tidx; i < vocab_size; i += blockDim.x) {
-            val       = static_cast<float>(logits[i]);
-            local_max = fmax(local_max, val);
-        }
-
-        float max_val = blockDim.x <= 32 ? warpReduceMax(local_max) : blockReduceMax<float>(local_max);
-        if (tidx == 0) {
-            s_max_logit = max_val;
-        }
-        __syncthreads();
-
-        // Calculate the denominator: sum_i exp(logits[i])
-        float local_sum_exp = 0.0f;
-        for (int i = tidx; i < vocab_size; i += blockDim.x) {
-            val = __expf(static_cast<float>(logits[i]) - s_max_logit);
-            local_sum_exp += val;
-        }
-
-        float sum_exp = blockDim.x <= 32 ? warpReduceSum(local_sum_exp) : blockReduceSum<float>(local_sum_exp);
-        if (tidx == 0) {
-            int idx = batch_first ? step + bidx * (max_input_length - 1) : step * batch_size + bidx;
-            // log_probs[step, ...] is the log probability of a token at step t + 1.
-            int token_idx  = batch_first ? step + 1 + bidx * max_input_length : (step + 1) * batch_size + bidx;
-            log_probs[idx] = static_cast<float>(logits[ids[token_idx]]) - s_max_logit - __logf(sum_exp + 1e-9f);
-        }
-    }
-}
-
-__global__ void accumulate_log_probs(float*       cum_log_probs,
-                                     const float* log_probs,
-                                     const int*   lengths,
-                                     const size_t max_input_length,
-                                     const size_t batch_size,
-                                     const bool   batch_first)
-{
-    // Accumulate the log probability along with the sequence dimension.
-    //   cum_log_probs[j] = sum_i log(softmax(logits))[ids[i,j]]
-    //
-    // cum_log_probs: [batch_size], cumulative log probability
-    // log_probs: [max_length - 1, batch_size] or [batch_size, max_length - 1],
-    //   log probability of each token
-    // lengths: [batch_size], sequence lengths
-    // batch_size: [1], batch_size. in case of beam > 1, batch x beam.
-
-    int bidx = blockIdx.x;   // batch dim
-    int tidx = threadIdx.x;  // step dim
-
-    if (bidx < batch_size) {
-        int length = lengths[bidx];
-        // reposition logits to data for the current batch.
-        log_probs += batch_first ? bidx * (max_input_length - 1) : bidx;
-        int   stride      = batch_first ? 1 : batch_size;  // stride along with seq dim.
-        float local_accum = 0.0f;
-        for (int step = tidx; step < length - 1; step += blockDim.x) {
-            local_accum += static_cast<float>(log_probs[step * stride]);
-        }
-        float accum = blockDim.x <= 32 ? warpReduceSum(local_accum) : blockReduceSum<float>(local_accum);
-        if (tidx == 0) {
-            cum_log_probs[bidx] = accum;
-        }
-    }
-}
-
-template<typename T>
-void invokeLogProbFromLogits(float*       cum_log_probs,
-                             const T*     logits,
-                             const int*   input_ids,
-                             const int*   input_lengths,
-                             const size_t max_input_length,
-                             const size_t batch_size,
-                             const size_t vocab_size,
-                             const size_t vocab_size_padded,
-                             void*        workspace,
-                             const size_t workspace_size,
-                             cudaStream_t stream,
-                             const bool   batch_first)
-{
-    // A batched version of log prob computation.
-    //
-    // cum_log_probs: [batch_size]
-    // logits: [max_input_length, batch_size, vocab_size] or [batch_size, max_input_length, vocab_size]
-    // input_ids: [max_input_length, batch_size] or [max_input_length, batch_size]
-    // input_lengths: [batch_size]
-    // workspace: workspace buffer of size at least sizeof(float) * max_input_length * batch_size.
-
-    TM_LOG_DEBUG(__PRETTY_FUNCTION__);
-    // block_size should be multiple of 32 to use warpReduceMax.
-    const int block_size = vocab_size < 1024 ? (vocab_size + 31) / 32 * 32 : 1024;
-    assert(block_size % 32 == 0);
-    assert(workspace != nullptr && workspace_size >= sizeof(float) * max_input_length * batch_size);
-    assert(vocab_size <= vocab_size_padded);
-
-    float* log_probs = reinterpret_cast<float*>(workspace);
-    int    gx        = batch_first ? batch_size : max_input_length - 1;
-    int    gy        = batch_first ? max_input_length - 1 : batch_size;
-    dim3   grid(gx, gy);
-    log_probs_kernel<T><<<grid, block_size, 0, stream>>>(log_probs,
-                                                         logits,
-                                                         input_ids,
-                                                         input_lengths,
-                                                         max_input_length,
-                                                         batch_size,
-                                                         vocab_size,
-                                                         vocab_size_padded,
-                                                         batch_first);
-    accumulate_log_probs<<<batch_size, block_size, 0, stream>>>(
-        cum_log_probs, log_probs, input_lengths, max_input_length, batch_size, batch_first);
-}
-
-template void invokeLogProbFromLogits(float*       cum_log_probs,
-                                      const float* logits,
-                                      const int*   input_ids,
-                                      const int*   input_lengths,
-                                      const size_t max_input_length,
-                                      const size_t batch_size,
-                                      const size_t vocab_size,
-                                      const size_t vocab_size_padded,
-                                      void*        workspace,
-                                      const size_t workspace_size,
-                                      cudaStream_t stream,
-                                      const bool   batch_first);
-
-template void invokeLogProbFromLogits(float*       cum_log_probs,
-                                      const half*  logits,
-                                      const int*   input_ids,
-                                      const int*   input_lengths,
-                                      const size_t max_input_length,
-                                      const size_t batch_size,
-                                      const size_t vocab_size,
-                                      const size_t vocab_size_padded,
-                                      void*        workspace,
-                                      const size_t workspace_size,
-                                      cudaStream_t stream,
-                                      const bool   batch_first);
-}  // end of namespace turbomind
diff --git a/src/turbomind/kernels/logprob_kernels.h b/src/turbomind/kernels/logprob_kernels.h
deleted file mode 100644
index b7b00d98b49d93fbc34085ccd8d56a96e3883ebe..0000000000000000000000000000000000000000
--- a/src/turbomind/kernels/logprob_kernels.h
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-namespace turbomind {
-
-template<typename T>
-void invokeLogProbFromLogits(float*       cum_log_probs,
-                             const T*     logits,
-                             const int*   input_ids,
-                             const int*   input_lengths,
-                             const size_t max_input_length,
-                             const size_t batch_size,
-                             const size_t vocab_size,
-                             const size_t vocab_size_padded,
-                             void*        workspace,
-                             const size_t workspace_size,
-                             cudaStream_t stream,
-                             const bool   batch_first = false);
-}  // namespace turbomind
diff --git a/src/turbomind/kernels/penalty_types.h b/src/turbomind/kernels/penalty_types.h
deleted file mode 100644
index 8ba15ac95c939d26dec69eadfedd154fecd77789..0000000000000000000000000000000000000000
--- a/src/turbomind/kernels/penalty_types.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include <string>
-#include <unordered_map>
-
-#include "src/turbomind/utils/string_utils.h"
-
-namespace turbomind {
-
-enum class RepetitionPenaltyType
-{
-    Additive,        // the presence penalty
-    Multiplicative,  // the repetition penalty
-    None             // No repetition penalty.
-};
-
-inline float getDefaultPenaltyValue(RepetitionPenaltyType penalty_type)
-{
-    switch (penalty_type) {
-        case RepetitionPenaltyType::Additive:
-            return 0.0f;
-        case RepetitionPenaltyType::Multiplicative:
-            return 1.0f;
-        default:
-            break;
-    }
-    return 0.0f;
-}
-
-}  // namespace turbomind
diff --git a/src/turbomind/kernels/reduce_kernel_utils.cuh b/src/turbomind/kernels/reduce_kernel_utils.cuh
deleted file mode 100644
index 614aa684fafad01f5b75bbdebff1e2a7e993ec29..0000000000000000000000000000000000000000
--- a/src/turbomind/kernels/reduce_kernel_utils.cuh
+++ /dev/null
@@ -1,366 +0,0 @@
-/*
- * Copyright (c) 2020-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#pragma once
-#include <array>
-#include <assert.h>
-#if ((__CUDACC_VER_MAJOR__ > 11) || (__CUDACC_VER_MAJOR__ == 11 && __CUDACC_VER_MINOR__ >= 0))
-#include <cooperative_groups/reduce.h>
-#else
-#include <cooperative_groups.h>
-#endif
-#include "src/turbomind/utils/cuda_bf16_wrapper.h"
-#include "src/turbomind/utils/cuda_type_utils.cuh"
-#include <cuda_fp16.h>
-#include <cuda_runtime.h>
-#include <curand_kernel.h>
-#include <float.h>
-#include <type_traits>
-
-namespace cg = cooperative_groups;
-
-namespace turbomind {
-
-template<int VPT>
-struct BytesToType;
-
-template<>
-struct BytesToType<2> {
-    using type = uint16_t;
-};
-template<>
-struct BytesToType<4> {
-    using type = uint32_t;
-};
-template<>
-struct BytesToType<8> {
-    using type = uint64_t;
-};
-template<>
-struct BytesToType<16> {
-    using type = float4;
-};
-
-template<int Bytes>
-__device__ inline void copy(const void* local, void* data)
-{
-    using T = typename BytesToType<Bytes>::type;
-
-    const T* in  = static_cast<const T*>(local);
-    T*       out = static_cast<T*>(data);
-    *out         = *in;
-}
-
-#define HALF_FLT_MAX 65504.F
-#define FINAL_MASK 0xffffffff
-
-template<typename T>
-__inline__ __device__ T warpReduceSum(T val)
-{
-#pragma unroll
-    for (int mask = 16; mask > 0; mask >>= 1)
-        val = add(val, __shfl_xor_sync(FINAL_MASK, val, mask, 32));  //__shfl_sync bf16 return float when sm < 80
-    return val;
-}
-
-/* Calculate the sum of all elements in a block */
-template<typename T>
-__inline__ __device__ T blockReduceSum(T val)
-{
-    static __shared__ T shared[32];
-    int                 lane = threadIdx.x & 0x1f;
-    int                 wid  = threadIdx.x >> 5;
-
-    val = warpReduceSum<T>(val);
-
-    if (lane == 0)
-        shared[wid] = val;
-
-    __syncthreads();
-
-    // Modify from blockDim.x << 5 to blockDim.x / 32. to prevent
-    // blockDim.x is not divided by 32
-    val = (threadIdx.x < (blockDim.x / 32.f)) ? shared[lane] : (T)(0.0f);
-    val = warpReduceSum<T>(val);
-
-    return val;
-}
-
-template<typename T>
-__inline__ __device__ T warpReduceMax(T val)
-{
-#pragma unroll
-    for (int mask = 16; mask > 0; mask >>= 1)
-        val = max(val, __shfl_xor_sync(FINAL_MASK, val, mask, 32));
-    return val;
-}
-
-/* Calculate the maximum of all elements in a block */
-template<typename T>
-__inline__ __device__ T blockReduceMax(T val)
-{
-    static __shared__ T shared[32];
-    int                 lane = threadIdx.x & 0x1f;  // in-warp idx
-    int                 wid  = threadIdx.x >> 5;    // warp idx
-
-    val = warpReduceMax(val);  // get maxx in each warp
-
-    if (lane == 0)  // record in-warp maxx by warp Idx
-        shared[wid] = val;
-
-    __syncthreads();
-
-    // Modify from blockDim.x << 5 to blockDim.x / 32. to prevent
-    // blockDim.x is not divided by 32
-    val = (threadIdx.x < (blockDim.x / 32.f)) ? shared[lane] : -1e20f;
-    val = warpReduceMax(val);
-
-    return val;
-}
-
-/* Calculate the maximum of all elements in a block */
-template<typename T>
-__inline__ __device__ T blockAllReduceMax(T val)
-{
-    static __shared__ T shared[32];
-    int                 lane = threadIdx.x & 0x1f;  // in-warp idx
-    int                 wid  = threadIdx.x >> 5;    // warp idx
-
-    val = warpReduceMax(val);  // get maxx in each warp
-
-    if (lane == 0)  // record in-warp maxx by warp Idx
-        shared[wid] = val;
-
-    __syncthreads();
-
-    // Modify from blockDim.x << 5 to blockDim.x / 32. to prevent
-    // blockDim.x is not divided by 32
-    val = (lane < (blockDim.x / 32.f)) ? shared[lane] : -1e20f;
-    val = warpReduceMax(val);
-
-    return val;
-}
-
-template<typename T, int NUM>
-__inline__ __device__ T warpReduceSumV2(T* val)
-{
-#pragma unroll
-    for (int i = 0; i < NUM; i++) {
-#pragma unroll
-        for (int mask = 16; mask > 0; mask >>= 1)
-            val[i] += __shfl_xor_sync(FINAL_MASK, val[i], mask, 32);
-    }
-    return (T)(0.0f);
-}
-
-template<typename T, int NUM>
-__inline__ __device__ T blockReduceSumV2(T* val)
-{
-    static __shared__ T shared[NUM][33];
-    int                 lane = threadIdx.x & 0x1f;
-    int                 wid  = threadIdx.x >> 5;
-
-    warpReduceSumV2<T, NUM>(val);
-
-    if (lane == 0) {
-#pragma unroll
-        for (int i = 0; i < NUM; i++) {
-            shared[i][wid] = val[i];
-        }
-    }
-
-    __syncthreads();
-
-    bool is_mask = threadIdx.x < (blockDim.x / 32.f);
-#pragma unroll
-    for (int i = 0; i < NUM; i++) {
-        val[i] = is_mask ? shared[i][lane] : (T)(0.0f);
-    }
-    warpReduceSumV2<T, NUM>(val);
-    return (T)0.0f;
-}
-
-template<typename T, int NUM>
-__inline__ __device__ T warpReduceMaxV2(T* val)
-{
-#pragma unroll
-    for (int i = 0; i < NUM; i++) {
-#pragma unroll
-        for (int mask = 16; mask > 0; mask >>= 1)
-            val[i] = max(val[i], __shfl_xor_sync(FINAL_MASK, val[i], mask, 32));
-    }
-    return (T)(0.0f);
-}
-
-template<typename T, int NUM>
-__inline__ __device__ T blockReduceMaxV2(T* val)
-{
-    static __shared__ T shared[32][NUM];
-    int                 lane = threadIdx.x & 0x1f;  // in-warp idx
-    int                 wid  = threadIdx.x >> 5;    // warp idx
-
-    warpReduceMaxV2<T, NUM>(val);  // get maxx in each warp
-
-    if (lane == 0)  // record in-warp maxx by warp Idx
-    {
-#pragma unroll
-        for (int i = 0; i < NUM; i++) {
-            shared[wid][i] = val[i];
-        }
-    }
-
-    __syncthreads();
-
-    // Modify from blockDim.x << 5 to blockDim.x / 32. to prevent
-    // blockDim.x is not divided by 32
-    bool is_mask = threadIdx.x < (blockDim.x / 32.f);
-#pragma unroll
-    for (int i = 0; i < NUM; i++) {
-        val[i] = is_mask ? shared[lane][i] : (T)-1e20f;
-    }
-    warpReduceMaxV2<T, NUM>(val);
-
-    return (T)0.0f;
-}
-
-template<int NUM>
-__inline__ __device__ void cgBlockReduceSumElements(float* element_list, float* cgBlockReduceSumElements_shm)
-{
-    cg::thread_block          cta  = cg::this_thread_block();
-    cg::thread_block_tile<32> tile = cg::tiled_partition<32>(cta);
-
-    const int tid    = cta.thread_rank();
-    const int blockz = blockDim.x;
-    for (int i = 0; i < NUM; i++) {
-#if ((__CUDACC_VER_MAJOR__ > 11) || (__CUDACC_VER_MAJOR__ == 11 && __CUDACC_VER_MINOR__ >= 0))
-        cgBlockReduceSumElements_shm[i * blockz + tid] = cg::reduce(tile, element_list[i], cg::plus<float>());
-#else
-        // TODO Add implementation here
-        if (threadIdx.x == 0 && blockIdx.x == 0) {
-            printf("[ERROR] Not support cgBlockReduceSumElements when CUDA < 11 \n");
-            assert(false);
-        }
-#endif
-    }
-    cg::sync(cta);
-    if (tid == 0) {
-#pragma unroll
-        for (int i = 0; i < NUM; i++) {
-            float beta = 0.0f;
-            for (int j = 0; j < blockz; j += 32) {
-                beta += cgBlockReduceSumElements_shm[i * blockz + j];
-            }
-            element_list[i] = beta;
-        }
-    }
-}
-
-template<typename T, int MAX_K>
-struct TopK {
-    int p[MAX_K];
-    T   u[MAX_K];
-
-    __device__ __forceinline__ void insert(T elem, int elem_id)
-    {
-        if (elem > u[MAX_K - 1] || (p[MAX_K - 1] == -1) || ((elem == u[MAX_K - 1]) && (elem_id < p[MAX_K - 1])))
-        // if (elem > u[MAX_K-1] || ((elem == u[MAX_K-1]) && (elem_id < p[MAX_K-1])))
-        {
-            u[MAX_K - 1] = elem;
-            p[MAX_K - 1] = elem_id;
-        }
-
-        for (int k = MAX_K - 2; k >= 0; --k) {
-            if ((u[k + 1] > u[k]) || (p[k] == -1) || ((u[k + 1] == u[k]) && (p[k + 1] < p[k])))
-            // if ((u[k+1] > u[k]) || ((u[k+1] == u[k])&&(p[k+1] < p[k])))
-            {
-                T   u2   = u[k];
-                int p2   = p[k];
-                u[k]     = u[k + 1];
-                p[k]     = p[k + 1];
-                u[k + 1] = u2;
-                p[k + 1] = p2;
-            }
-        }
-    }
-
-    __device__ __forceinline__ void init()
-    {
-        const bool IS_FP16   = std::is_same<T, half>::value;
-        const T    MAX_T_VAL = (IS_FP16) ? HALF_FLT_MAX : FLT_MAX;
-
-        for (int i = 0; i < MAX_K; i++) {
-            p[i] = -1;
-            u[i] = -MAX_T_VAL;
-        }
-    }
-};
-
-template<typename T, int MAX_K>
-__device__ __forceinline__ TopK<T, MAX_K> reduce_topk_op(const TopK<T, MAX_K>& a, const TopK<T, MAX_K>& b)
-{
-    TopK<T, MAX_K> res = a;
-    for (int i = 0; i < MAX_K; ++i)
-        res.insert(b.u[i], b.p[i]);
-    return res;
-}
-
-template<typename T>
-struct TopK_2 {
-    int p = -1;
-    T   u = -((std::is_same<T, half>::value) ? HALF_FLT_MAX : FLT_MAX);
-
-    __device__ __forceinline__ void insert(T elem, int elem_id)
-    {
-        if (elem > u) {
-            u = elem;
-            p = elem_id;
-        }
-    }
-
-    __device__ __forceinline__ void init()
-    {
-        u = -((std::is_same<T, half>::value) ? HALF_FLT_MAX : FLT_MAX);
-        p = -1;
-    }
-};
-
-template<typename T>
-__device__ __forceinline__ TopK_2<T> reduce_topk_op_2(const TopK_2<T>& a, const TopK_2<T>& b)
-{
-    return a.u > b.u ? a : b;
-}
-
-template<typename T>
-__device__ __forceinline__ T clamp_inf_for_half(const float input)
-{
-    return input;
-}
-
-template<>
-__device__ __forceinline__ half clamp_inf_for_half(const float input)
-{
-    // clamp inf values to enable fp16 training
-    return input > 0.0f ? (half)min(input, HALF_FLT_MAX - 1000) : (half)max(input, -HALF_FLT_MAX + 1000);
-}
-
-#ifdef ENABLE_BF16
-template<>
-__device__ __forceinline__ __nv_bfloat16 clamp_inf_for_half(const float input)
-{
-    return __float2bfloat16(input);
-}
-#endif
-
-}  // namespace turbomind
diff --git a/src/turbomind/kernels/sampling_penalty_kernels.cu b/src/turbomind/kernels/sampling_penalty_kernels.cu
deleted file mode 100644
index 4877bdb1a085da7fb7a8fe46d246561bcb47028d..0000000000000000000000000000000000000000
--- a/src/turbomind/kernels/sampling_penalty_kernels.cu
+++ /dev/null
@@ -1,537 +0,0 @@
-/*
- * Copyright (c) 2020-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <assert.h>
-#include <float.h>
-
-#include "src/turbomind/kernels/sampling_penalty_kernels.h"
-
-namespace turbomind {
-
-// TODO Add half2 implementation
-template<typename T>
-__global__ void applyTemperaturePenalty(T*          logits,
-                                        const T*    bias,
-                                        const float temperature_inverse,
-                                        const int   m,
-                                        const int   vocab_size,
-                                        const int   vocab_size_padd)
-{
-    const bool IS_FP16   = std::is_same<T, half>::value;
-    const T    MAX_T_VAL = (IS_FP16) ? 65504.F : FLT_MAX;
-    for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < m * vocab_size_padd;
-         index += blockDim.x * gridDim.x) {
-        T bias_val = bias == nullptr ? (T)(0.0f) : bias[index % vocab_size_padd];
-        if (index % vocab_size_padd < vocab_size) {
-            logits[index] = (logits[index] + bias_val) * (T)temperature_inverse;
-        }
-        else {
-            logits[index] = -MAX_T_VAL;
-        }
-    }
-}
-
-template<>
-__global__ void applyTemperaturePenalty(half2*       logits,
-                                        const half2* bias,
-                                        const float  temperature_inverse,
-                                        const int    batch_size,
-                                        const int    vocab_size,
-                                        const int    vocab_size_padded)
-{
-    assert(vocab_size % 2 == 0);
-    assert(vocab_size_padded % 2 == 0);
-    const half2 mask_val = __float2half2_rn(-65504.0f);
-    const half2 temp_inv = __float2half2_rn(temperature_inverse);
-
-    const int half_vocab_size        = vocab_size / 2;
-    const int half_vocab_size_padded = vocab_size_padded / 2;
-    for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < batch_size * half_vocab_size_padded;
-         index += blockDim.x * gridDim.x) {
-        int   vocab_idx = index % half_vocab_size_padded;
-        half2 logit     = vocab_idx < half_vocab_size ? __ldg(&logits[index]) : mask_val;
-        if (vocab_idx < half_vocab_size) {
-            if (bias != nullptr) {
-                logit = __hadd2(logit, bias[vocab_idx]);
-            }
-            logits[index] = __hmul2(logit, temp_inv);
-        }
-    }
-}
-
-template<typename T>
-void invokeApplyTemperaturePenalty(T*           logits,
-                                   const T*     bias,
-                                   const float  temperature,
-                                   const int    batch_size,
-                                   const int    vocab_size,
-                                   const int    vocab_size_padd,
-                                   cudaStream_t stream)
-{
-    dim3    block(min(vocab_size_padd, 1024));
-    dim3    grid(min(batch_size * vocab_size_padd / block.x, 65536));
-    const T temperature_inverse = (T)(1.f / (temperature + 1e-6f));
-    if (std::is_same<T, half>::value && vocab_size % 2 == 0 && vocab_size_padd % 2 == 0) {
-        applyTemperaturePenalty<<<grid, block, 0, stream>>>(reinterpret_cast<half2*>(logits),
-                                                            reinterpret_cast<const half2*>(bias),
-                                                            temperature_inverse,
-                                                            batch_size,
-                                                            vocab_size,
-                                                            vocab_size_padd);
-    }
-    else {
-        applyTemperaturePenalty<T>
-            <<<grid, block, 0, stream>>>(logits, bias, temperature_inverse, batch_size, vocab_size, vocab_size_padd);
-    }
-}
-
-template void invokeApplyTemperaturePenalty(float*       logits,
-                                            const float* bias,
-                                            const float  temperature,
-                                            const int    batch_size,
-                                            const int    vocab_size,
-                                            const int    vocab_size_padd,
-                                            cudaStream_t stream);
-
-template void invokeApplyTemperaturePenalty(half*        logits,
-                                            const half*  bias,
-                                            const float  temperature,
-                                            const int    batch_size,
-                                            const int    vocab_size,
-                                            const int    vocab_size_padd,
-                                            cudaStream_t stream);
-
-template<typename T>
-__global__ void batchApplyTemperaturePenalty(T*           logits,
-                                             const T*     bias,
-                                             const float* temperatures,
-                                             const int    batch_size,
-                                             const int    vocab_size,
-                                             const int    vocab_size_padd)
-{
-    // TODO: Add macro or device function to get MAX_T_VAL.
-    const bool              IS_FP16   = std::is_same<T, half>::value;
-    const T                 MAX_T_VAL = (IS_FP16) ? 65504.F : FLT_MAX;
-    extern __shared__ float inv_temperatures[];
-    if (threadIdx.x < batch_size) {
-        inv_temperatures[threadIdx.x] = 1.0f / (temperatures[threadIdx.x] + 1e-6f);
-    }
-    __syncthreads();
-
-    for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < batch_size * vocab_size_padd;
-         index += blockDim.x * gridDim.x) {
-        int batch_idx = index / vocab_size_padd;
-        int vocab_idx = index % vocab_size_padd;
-        T   logit     = (vocab_idx < vocab_size) ? logits[index] : -MAX_T_VAL;
-        if (vocab_idx < vocab_size) {
-            if (bias != nullptr) {
-                logit += bias[vocab_idx];
-            }
-            logit *= inv_temperatures[batch_idx];
-        }
-        logits[index] = logit;
-    }
-}
-
-__global__ void batchApplyTemperaturePenalty_h2(half2*       logits,
-                                                const half2* bias,
-                                                const float* temperatures,
-                                                const int    batch_size,
-                                                const int    vocab_size,
-                                                const int    vocab_size_padded)
-{
-    assert(vocab_size % 2 == 0);
-    assert(vocab_size_padded % 2 == 0);
-    extern __shared__ half2 h2_inv_temperatures[];
-    if (threadIdx.x < batch_size) {
-        h2_inv_temperatures[threadIdx.x] = __float2half2_rn(1.f / (temperatures[threadIdx.x] + 1e-6f));
-    }
-    __syncthreads();
-
-    const half2 mask_val               = __float2half2_rn(-65504.0f);
-    const int   half_vocab_size        = vocab_size / 2;
-    const int   half_vocab_size_padded = vocab_size_padded / 2;
-    for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < batch_size * half_vocab_size_padded;
-         index += blockDim.x * gridDim.x) {
-        int   batch_idx = index / half_vocab_size_padded;
-        int   vocab_idx = index % half_vocab_size_padded;
-        half2 logit     = vocab_idx < half_vocab_size ? __ldg(&logits[index]) : mask_val;
-        if (vocab_idx < half_vocab_size) {
-            if (bias != nullptr) {
-                logit = __hadd2(logit, bias[vocab_idx]);
-            }
-            logits[index] = __hmul2(logit, h2_inv_temperatures[batch_idx]);
-        }
-    }
-}
-
-template<typename T>
-void invokeBatchApplyTemperaturePenalty(T*           logits,
-                                        const T*     bias,
-                                        const float* temperatures,
-                                        const int    batch_size,
-                                        const int    vocab_size,
-                                        const int    vocab_size_padd,
-                                        cudaStream_t stream)
-{
-    dim3 block(min(vocab_size_padd, 1024));
-    dim3 grid(min(batch_size * vocab_size_padd / block.x, 65536));
-    if (std::is_same<T, half>::value && vocab_size % 2 == 0 && vocab_size_padd % 2 == 0) {
-        size_t smem_size = sizeof(half2) * batch_size;
-        batchApplyTemperaturePenalty_h2<<<grid, block, smem_size, stream>>>(reinterpret_cast<half2*>(logits),
-                                                                            reinterpret_cast<const half2*>(bias),
-                                                                            temperatures,
-                                                                            batch_size,
-                                                                            vocab_size,
-                                                                            vocab_size_padd);
-    }
-    else {
-        size_t smem_size = sizeof(float) * batch_size;
-        batchApplyTemperaturePenalty<T>
-            <<<grid, block, smem_size, stream>>>(logits, bias, temperatures, batch_size, vocab_size, vocab_size_padd);
-    }
-}
-
-template void invokeBatchApplyTemperaturePenalty(float*       logits,
-                                                 const float* bias,
-                                                 const float* temperatures,
-                                                 const int    batch_size,
-                                                 const int    vocab_size,
-                                                 const int    vocab_size_padd,
-                                                 cudaStream_t stream);
-
-template void invokeBatchApplyTemperaturePenalty(half*        logits,
-                                                 const half*  bias,
-                                                 const float* temperatures,
-                                                 const int    batch_size,
-                                                 const int    vocab_size,
-                                                 const int    vocab_size_padd,
-                                                 cudaStream_t stream);
-
-template<typename T, RepetitionPenaltyType penalty_type>
-__global__ void applyRepetitionPenalty(T*          logits,
-                                       const float penalty,
-                                       const int*  start_ids,
-                                       int*        output_ids,
-                                       const int   batch_size,
-                                       const int   local_batch_size,
-                                       const int   vocab_size,
-                                       const int   vocab_size_padd,
-                                       const int*  input_lengths,
-                                       const int   max_input_len,
-                                       const int   step)
-{
-    extern __shared__ float penalty_logits[];
-    int*                    penalty_indices = (int*)(penalty_logits + step);
-
-    logits                 = logits + blockIdx.x * vocab_size_padd;
-    const int input_length = input_lengths != nullptr ? input_lengths[blockIdx.x] : max_input_len;
-    for (int index = threadIdx.x; index < step; index += blockDim.x) {
-
-        if (index >= input_length && index < max_input_len) {
-            continue;
-        }
-
-        // output_ids shape: (input_len + output_len, batch_size)
-        int penalty_index = output_ids[index * batch_size + blockIdx.x];
-        if (penalty_index >= vocab_size) {
-            continue;
-        }
-        penalty_indices[index] = penalty_index;
-        float logit            = (float)logits[penalty_index];
-        if (penalty_type == RepetitionPenaltyType::Additive) {
-            penalty_logits[index] = logit - penalty;
-        }
-        else if (penalty_type == RepetitionPenaltyType::Multiplicative) {
-            penalty_logits[index] = logit < 0.0f ? logit * penalty : logit / penalty;
-        }
-        else if (penalty_type == RepetitionPenaltyType::None) {
-            penalty_logits[index] = logit;
-        }
-        else {
-            // Unsupported type
-            assert(false);
-        }
-    }
-
-    if (blockDim.x > 32) {
-        __syncthreads();
-    }
-
-    for (int index = threadIdx.x; index < step; index += blockDim.x) {
-
-        if (index >= input_length && index < max_input_len) {
-            continue;
-        }
-
-        // output_ids shape: (input_len + output_len, batch_size)
-        if (penalty_indices[index] >= vocab_size) {
-            continue;
-        }
-        logits[penalty_indices[index]] = penalty_logits[index];
-    }
-}
-
-template<typename T>
-void invokeApplyRepetitionPenalty(T*                          logits,
-                                  const float                 penalty,
-                                  const int*                  start_ids,
-                                  int*                        output_ids,
-                                  const int                   batch_size,
-                                  const int                   local_batch_size,
-                                  const int                   vocab_size,
-                                  const int                   vocab_size_padd,
-                                  const int*                  input_lengths,
-                                  const int                   max_input_len,
-                                  const int                   step,
-                                  const RepetitionPenaltyType penalty_type,
-                                  cudaStream_t                stream)
-{
-    dim3   block(min(step, 1024));
-    dim3   grid(local_batch_size);
-    size_t smem_size = step * (sizeof(float) + sizeof(int));
-
-    if (penalty_type == RepetitionPenaltyType::Additive) {
-        applyRepetitionPenalty<T, RepetitionPenaltyType::Additive><<<grid, block, smem_size, stream>>>(logits,
-                                                                                                       penalty,
-                                                                                                       start_ids,
-                                                                                                       output_ids,
-                                                                                                       batch_size,
-                                                                                                       local_batch_size,
-                                                                                                       vocab_size,
-                                                                                                       vocab_size_padd,
-                                                                                                       input_lengths,
-                                                                                                       max_input_len,
-                                                                                                       step);
-    }
-    else if (penalty_type == RepetitionPenaltyType::Multiplicative) {
-        applyRepetitionPenalty<T, RepetitionPenaltyType::Multiplicative>
-            <<<grid, block, smem_size, stream>>>(logits,
-                                                 penalty,
-                                                 start_ids,
-                                                 output_ids,
-                                                 batch_size,
-                                                 local_batch_size,
-                                                 vocab_size,
-                                                 vocab_size_padd,
-                                                 input_lengths,
-                                                 max_input_len,
-                                                 step);
-    }
-    else if (penalty_type == RepetitionPenaltyType::None) {
-        // do nothing
-    }
-}
-
-template void invokeApplyRepetitionPenalty(float*                      logits,
-                                           const float                 penalty,
-                                           const int*                  start_ids,
-                                           int*                        output_ids,
-                                           const int                   batch_size,
-                                           const int                   local_batch_size,
-                                           const int                   vocab_size,
-                                           const int                   vocab_size_padd,
-                                           const int*                  input_lengths,
-                                           const int                   max_input_len,
-                                           const int                   step,
-                                           const RepetitionPenaltyType penalty_type,
-                                           cudaStream_t                stream);
-
-template void invokeApplyRepetitionPenalty(half*                       logits,
-                                           const float                 penalty,
-                                           const int*                  start_ids,
-                                           int*                        output_ids,
-                                           const int                   batch_size,
-                                           const int                   local_batch_size,
-                                           const int                   vocab_size,
-                                           const int                   vocab_size_padd,
-                                           const int*                  input_lengths,
-                                           const int                   max_input_len,
-                                           const int                   step,
-                                           const RepetitionPenaltyType penalty_type,
-                                           cudaStream_t                stream);
-
-template<typename T, RepetitionPenaltyType penalty_type>
-__global__ void batchApplyRepetitionPenalty(T*           logits,
-                                            const float* penalties,
-                                            const int*   output_ids,
-                                            const int    batch_size,
-                                            const int    vocab_size,
-                                            const int*   input_lengths,
-                                            const int    max_input_length,
-                                            const int    step)
-{
-    extern __shared__ float penalty_logits[];
-    int*                    penalty_indices = (int*)(penalty_logits + step);
-    const int               batch_idx       = blockIdx.x;
-    const float             penalty         = penalties[batch_idx];
-    const int               input_length    = input_lengths != nullptr ? input_lengths[batch_idx] : max_input_length;
-
-    logits += batch_idx * vocab_size;
-
-    // Phase 1. Find indices to penalize and keep the penalized values.
-    // A vocab id can appear multiple times but should be penalized once.
-    for (int index = threadIdx.x; index < step; index += blockDim.x) {
-        // Skip the padding tokens in input sequences.
-        if (index >= input_length && index < max_input_length) {
-            continue;
-        }
-        // output_ids shape: (input_len + output_len, batch_size)
-        int penalty_index = output_ids[index * batch_size + batch_idx];
-        assert(penalty_index < vocab_size);
-        penalty_indices[index] = penalty_index;
-        float logit            = (float)logits[penalty_index];
-        if (penalty_type == RepetitionPenaltyType::Additive) {
-            penalty_logits[index] = logit - penalty;
-        }
-        else if (penalty_type == RepetitionPenaltyType::Multiplicative) {
-            penalty_logits[index] = logit < 0.0f ? logit * penalty : logit / penalty;
-        }
-        else if (penalty_type == RepetitionPenaltyType::None) {
-            penalty_logits[index] = logit;
-        }
-        else {
-            // Unsupported type
-            assert(false);
-        }
-    }
-
-    if (blockDim.x > 32) {
-        __syncthreads();
-    }
-
-    // Phase 2. Replace a logit value by the penalized one.
-    for (int index = threadIdx.x; index < step; index += blockDim.x) {
-        // Skip the padding tokens in input sequences.
-        if (index >= input_length && index < max_input_length) {
-            continue;
-        }
-        logits[penalty_indices[index]] = penalty_logits[index];
-    }
-}
-
-template<typename T>
-void invokeBatchApplyRepetitionPenalty(T*                    logits,
-                                       const float*          penalties,
-                                       const int*            output_ids,
-                                       const int             batch_size,
-                                       const int             local_batch_size,
-                                       const int             vocab_size,
-                                       const int*            input_lengths,
-                                       const int             max_input_length,
-                                       const int             step,
-                                       RepetitionPenaltyType penalty_type,
-                                       cudaStream_t          stream)
-{
-    // Inputs
-    //   logits [local_batch_size, vocab_size] : logit values.
-    //   penalties [local_batch_size] : repetition penalty factors.
-    //   output_ids [step, batch_size] : output token ids (with offset ite * local_batch_size).
-    //   input_lengths [local_batch_size], input lengths (optional).
-    //      Padding tokens at [input_length, max_input_length) of input will not be penalized.
-    dim3   block(min(step, 1024));
-    dim3   grid(local_batch_size);
-    size_t smem_size = step * (sizeof(float) + sizeof(int));
-    if (penalty_type == RepetitionPenaltyType::Additive) {
-        batchApplyRepetitionPenalty<T, RepetitionPenaltyType::Additive><<<grid, block, smem_size, stream>>>(
-            logits, penalties, output_ids, batch_size, vocab_size, input_lengths, max_input_length, step);
-    }
-    else if (penalty_type == RepetitionPenaltyType::Multiplicative) {
-        batchApplyRepetitionPenalty<T, RepetitionPenaltyType::Multiplicative><<<grid, block, smem_size, stream>>>(
-            logits, penalties, output_ids, batch_size, vocab_size, input_lengths, max_input_length, step);
-    }
-    else if (penalty_type == RepetitionPenaltyType::None) {
-        // do nothing
-    }
-}
-
-template void invokeBatchApplyRepetitionPenalty(float*                logits,
-                                                const float*          penalties,
-                                                const int*            output_ids,
-                                                const int             batch_size,
-                                                const int             local_batch_size,
-                                                const int             vocab_size,
-                                                const int*            input_lengths,
-                                                const int             max_input_length,
-                                                const int             step,
-                                                RepetitionPenaltyType penalty_type,
-                                                cudaStream_t          stream);
-
-template void invokeBatchApplyRepetitionPenalty(half*                 logits,
-                                                const float*          penalties,
-                                                const int*            output_ids,
-                                                const int             batch_size,
-                                                const int             local_batch_size,
-                                                const int             vocab_size,
-                                                const int*            input_lengths,
-                                                const int             max_input_length,
-                                                const int             step,
-                                                RepetitionPenaltyType penalty_type,
-                                                cudaStream_t          stream);
-
-template<typename T>
-__global__ void batchApplyMinLengthPenalty(T*         logits,
-                                           const int* min_lengths,
-                                           const int* end_ids,
-                                           const int* sequence_lengths,
-                                           const int  max_input_length,
-                                           const int  vocab_size_padded)
-{
-    int bid = threadIdx.x + blockIdx.x * blockDim.x;  // batch index
-    // We need +1 because sequence_lengths = max_input_length + num_gen_tokens - 1,
-    // which is equal to the length of k/v caches.
-    if (sequence_lengths[bid] + 1 - max_input_length < min_lengths[bid]) {
-        T mask_val                                     = (std::is_same<T, half>::value) ? -65504.0f : -FLT_MAX;
-        logits[bid * vocab_size_padded + end_ids[bid]] = mask_val;
-    }
-}
-
-template<typename T>
-void invokeMinLengthPenalty(T*           logits,
-                            const int*   min_lengths,
-                            const int*   end_ids,
-                            const int*   sequnece_lengths,
-                            const int    max_input_length,
-                            const int    batch_size,
-                            const int    vocab_size_padded,
-                            cudaStream_t stream)
-
-{
-    const int block_size = min(batch_size, 1024);
-    const int grid_size  = (batch_size + block_size - 1) / block_size;
-    batchApplyMinLengthPenalty<<<grid_size, block_size, 0, stream>>>(
-        logits, min_lengths, end_ids, sequnece_lengths, max_input_length, vocab_size_padded);
-}
-
-template void invokeMinLengthPenalty(float*       logits,
-                                     const int*   min_lengths,
-                                     const int*   end_ids,
-                                     const int*   sequnece_lengths,
-                                     const int    max_input_length,
-                                     const int    batch_size,
-                                     const int    vocab_size_padded,
-                                     cudaStream_t stream);
-
-template void invokeMinLengthPenalty(half*        logits,
-                                     const int*   min_lengths,
-                                     const int*   end_ids,
-                                     const int*   sequnece_lengths,
-                                     const int    max_input_length,
-                                     const int    batch_size,
-                                     const int    vocab_size_padded,
-                                     cudaStream_t stream);
-
-}  // namespace turbomind
diff --git a/src/turbomind/kernels/sampling_penalty_kernels.h b/src/turbomind/kernels/sampling_penalty_kernels.h
deleted file mode 100644
index 3c54cc15bfc5b05a8d7c6267d7c6d07ffabce6f2..0000000000000000000000000000000000000000
--- a/src/turbomind/kernels/sampling_penalty_kernels.h
+++ /dev/null
@@ -1,81 +0,0 @@
-/*
- * Copyright (c) 2020-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#pragma once
-
-#include <cuda_fp16.h>
-
-#include "src/turbomind/kernels/penalty_types.h"
-#include "src/turbomind/utils/cuda_utils.h"
-
-namespace turbomind {
-
-template<typename T>
-void invokeApplyRepetitionPenalty(T*                          logits,
-                                  const float                 penalty,
-                                  const int*                  start_ids,
-                                  int*                        output_ids,
-                                  const int                   batch_size,
-                                  const int                   local_batch_size,
-                                  const int                   vocab_size,
-                                  const int                   vocab_size_padd,
-                                  const int*                  input_lengths,
-                                  const int                   max_input_len,
-                                  const int                   step,
-                                  const RepetitionPenaltyType penalty_type,
-                                  cudaStream_t                stream);
-
-template<typename T>
-void invokeBatchApplyRepetitionPenalty(T*                          logits,
-                                       const float*                penalties,
-                                       const int*                  output_ids,
-                                       const int                   batch_size,
-                                       const int                   local_batch_size,
-                                       const int                   vocab_size,
-                                       const int*                  input_lengths,
-                                       const int                   max_input_length,
-                                       const int                   step,
-                                       const RepetitionPenaltyType penalty_type,
-                                       cudaStream_t                stream);
-
-template<typename T>
-void invokeApplyTemperaturePenalty(T*           logits,
-                                   const T*     bias,
-                                   const float  temperature,
-                                   const int    batch_size,
-                                   const int    vocab_size,
-                                   const int    vocab_size_padd,
-                                   cudaStream_t stream);
-
-template<typename T>
-void invokeBatchApplyTemperaturePenalty(T*           logits,
-                                        const T*     bias,
-                                        const float* temperatures,
-                                        const int    batch_size,
-                                        const int    vocab_size,
-                                        const int    vocab_size_padd,
-                                        cudaStream_t stream);
-
-template<typename T>
-void invokeMinLengthPenalty(T*           logits,
-                            const int*   min_lengths,
-                            const int*   end_ids,
-                            const int*   sequnece_lengths,
-                            const int    max_input_length,
-                            const int    batch_size,
-                            const int    vocab_size_padded,
-                            cudaStream_t stream);
-
-}  // namespace turbomind
diff --git a/src/turbomind/kernels/sampling_topk_kernels.cu b/src/turbomind/kernels/sampling_topk_kernels.cu
deleted file mode 100644
index cccfbaaf8cc65a37bc163533a03677d5f1f1f46e..0000000000000000000000000000000000000000
--- a/src/turbomind/kernels/sampling_topk_kernels.cu
+++ /dev/null
@@ -1,623 +0,0 @@
-/*
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.  All rights reserved.
- * Copyright (c) 2021, NAVER Corp.  Authored by CLOVA.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <stdexcept>
-#ifndef CUDART_VERSION
-#error CUDART_VERSION Undefined!
-#elif (CUDART_VERSION >= 11000)
-#include <cub/cub.cuh>
-#else
-// #include "3rdparty/cub/cub.cuh"
-#include <cub/cub.cuh>
-#endif
-
-#include "src/turbomind/kernels/reduce_kernel_utils.cuh"
-#include "src/turbomind/kernels/sampling_topk_kernels.h"
-
-namespace turbomind {
-
-__global__ void curandInitialize(curandState_t* state, const int size, const unsigned long long random_seed)
-{
-    if (threadIdx.x + blockIdx.x * blockDim.x < size) {
-        curand_init(random_seed, 0, 0, &state[blockIdx.x * blockDim.x + threadIdx.x]);
-    }
-}
-
-void invokeCurandInitialize(curandState_t*           state,
-                            const size_t             batch_size,
-                            const unsigned long long random_seed,
-                            cudaStream_t             stream)
-{
-    dim3 block(256);
-    dim3 grid((int)(ceil(batch_size * 1.0 / 256)));
-    curandInitialize<<<grid, block, 0, stream>>>(state, batch_size, random_seed);
-}
-
-__global__ void curandBatchInitialize(curandState_t* states, const int size, const unsigned long long* random_seeds)
-{
-    int idx = threadIdx.x + blockIdx.x * blockDim.x;
-    if (idx < size) {
-        curand_init(random_seeds[idx], 0, 0, &states[idx]);
-    }
-}
-
-void invokeCurandBatchInitialize(curandState_t*            states,
-                                 const size_t              batch_size,
-                                 const unsigned long long* random_seeds,
-                                 cudaStream_t              stream)
-{
-    dim3 block(256);
-    dim3 grid((int)(ceil(batch_size * 1.0 / 256)));
-    curandBatchInitialize<<<grid, block, 0, stream>>>(states, batch_size, random_seeds);
-}
-
-template<typename T>
-__global__ void addBiasEndMask(T*          logits,
-                               const T*    bias,
-                               const int*  end_ids,
-                               const bool* finished,
-                               const int   vocab_size,
-                               const int   vocab_size_padded)
-{
-    int  bid    = blockIdx.x;
-    bool finish = finished != nullptr ? finished[bid] : false;
-    int  offset = bid * vocab_size_padded;
-
-    const bool IS_FP16   = std::is_same<T, half>::value;
-    const T    MAX_T_VAL = (IS_FP16) ? HALF_FLT_MAX : FLT_MAX;
-    for (int tid = threadIdx.x; tid < vocab_size_padded; tid += blockDim.x) {
-        if (tid >= vocab_size) {
-            logits[offset + tid] = -MAX_T_VAL;
-        }
-        else if (finish) {
-            logits[offset + tid] = (tid == end_ids[bid]) ? MAX_T_VAL : -MAX_T_VAL;
-        }
-        else {
-            if (bias != nullptr) {
-                logits[offset + tid] += bias[tid];
-            }
-        }
-    }
-}
-
-template<typename T>
-void invokeAddBiasEndMask(T*           logits,
-                          const T*     bias,
-                          const int*   end_ids,
-                          const bool*  finished,
-                          const int    batch_size,
-                          const int    vocab_size,
-                          const int    vocab_size_padded,
-                          cudaStream_t stream)
-{
-    dim3 grid(batch_size);
-    dim3 block(min(vocab_size_padded, 1024));
-    /*n is the vocab_size, e.g., 30000, 7000.... vocab_size is usually very big. */
-    addBiasEndMask<<<grid, block, 0, stream>>>(logits, bias, end_ids, finished, vocab_size, vocab_size_padded);
-}
-
-template void invokeAddBiasEndMask(float*       logits,
-                                   const float* bias,
-                                   const int*   end_ids,
-                                   const bool*  finished,
-                                   const int    batch_size,
-                                   const int    vocab_size,
-                                   const int    vocab_size_padded,
-                                   cudaStream_t stream);
-
-template void invokeAddBiasEndMask(half*        logits,
-                                   const half*  bias,
-                                   const int*   end_ids,
-                                   const bool*  finished,
-                                   const int    batch_size,
-                                   const int    vocab_size,
-                                   const int    vocab_size_padded,
-                                   cudaStream_t stream);
-
-template<typename T, int BLOCK_SIZE_, int BLOCKS_PER_BEAM_>
-__global__ void topk_stage1(const T* __restrict log_probs,
-                            T*          tmp_log_probs,
-                            int*        topk_tmp_id_buf,
-                            T*          topk_tmp_val_buf,
-                            const bool* finished,
-                            const int   max_top_k,
-                            const int*  top_ks,
-                            const int   vocab_size,
-                            const int*  end_ids,
-                            const bool* skip_decode)
-{
-    typedef cub::BlockReduce<TopK_2<T>, BLOCK_SIZE_> BlockReduce;
-    __shared__ typename BlockReduce::TempStorage     temp_storage;
-
-    const int tid = threadIdx.x;
-    const int bid = blockIdx.x;
-
-    const int batch_id = bid / BLOCKS_PER_BEAM_;  // row id for log_probs
-    if (skip_decode != nullptr && skip_decode[batch_id]) {
-        return;
-    }
-    const int block_lane = bid % BLOCKS_PER_BEAM_;                              // block id for a beam
-    const int k          = (top_ks != nullptr) ? top_ks[batch_id] : max_top_k;  // batch_id = batch index
-
-    const int tmp_log_buf_index  = batch_id * vocab_size;
-    const int tmp_topk_buf_index = batch_id * BLOCKS_PER_BEAM_ * max_top_k + block_lane * k;
-
-    TopK_2<T>  partial;
-    const bool IS_FP16   = std::is_same<T, half>::value;
-    const T    MAX_T_VAL = (IS_FP16) ? HALF_FLT_MAX : FLT_MAX;
-
-    if (finished != nullptr && finished[batch_id] == true) {
-        if (tid < k) {
-            const int index = tmp_topk_buf_index + tid;
-            if (block_lane == 0 && tid == 0) {
-                const int end_id        = end_ids[batch_id];
-                topk_tmp_id_buf[index]  = tmp_log_buf_index + end_id;
-                topk_tmp_val_buf[index] = log_probs[tmp_log_buf_index + end_id];
-            }
-            else {
-                topk_tmp_id_buf[index]  = -1;
-                topk_tmp_val_buf[index] = -MAX_T_VAL;
-            }
-        }
-        return;
-    }
-
-    for (int elem_id = tid + block_lane * BLOCK_SIZE_; elem_id < vocab_size;
-         elem_id += BLOCK_SIZE_ * BLOCKS_PER_BEAM_) {
-        int index            = elem_id + tmp_log_buf_index;
-        tmp_log_probs[index] = log_probs[index];
-    }
-
-    for (int ite = 0; ite < k; ite++) {
-        partial.init();
-#pragma unroll
-        for (int elem_id = tid + block_lane * BLOCK_SIZE_; elem_id < vocab_size;
-             elem_id += BLOCK_SIZE_ * BLOCKS_PER_BEAM_) {
-            int index = elem_id + tmp_log_buf_index;
-            partial.insert(tmp_log_probs[index], index);
-        }
-
-        TopK_2<T> total = BlockReduce(temp_storage).Reduce(partial, reduce_topk_op_2<T>);
-
-        if (tid == 0) {
-            const int index         = tmp_topk_buf_index + ite;
-            topk_tmp_id_buf[index]  = total.p;
-            topk_tmp_val_buf[index] = total.u;
-            tmp_log_probs[total.p]  = -MAX_T_VAL;
-        }
-        __syncthreads();
-    }
-}
-
-template<typename T, int BLOCK_SIZE_, int BLOCKS_PER_BEAM_>
-__global__ void topk_stage2_sampling(const int* __restrict topk_tmp_id_buf,
-                                     T*             topk_tmp_val_buf,
-                                     int*           ids,
-                                     int*           sequence_length,
-                                     bool*          finished,
-                                     float*         cum_log_probs,
-                                     float*         output_log_probs,
-                                     const int      max_top_k,
-                                     const int*     top_ks,
-                                     const float    top_p,
-                                     const float*   top_ps,
-                                     curandState_t* curandstate,
-                                     const int*     end_ids,
-                                     const int      vocab_size,
-                                     const bool*    skip_decode)
-{
-    const bool IS_FP16   = std::is_same<T, half>::value;
-    const T    MAX_T_VAL = (IS_FP16) ? HALF_FLT_MAX : FLT_MAX;
-
-    const int tid      = threadIdx.x;
-    const int batch_id = blockIdx.x;
-    if (skip_decode != nullptr && skip_decode[batch_id]) {
-        return;
-    }
-
-    const int   k              = (top_ks != nullptr) ? top_ks[batch_id] : max_top_k;
-    const float prob_threshold = (top_ps != nullptr) ? top_ps[batch_id] : top_p;
-    const int   size           = k * BLOCKS_PER_BEAM_;
-    const int   stride         = max_top_k * BLOCKS_PER_BEAM_;
-
-    typedef cub::BlockReduce<TopK_2<float>, BLOCK_SIZE_> BlockReduce;
-    __shared__ typename BlockReduce::TempStorage         temp_storage;
-    extern __shared__ char                               array[];
-    __shared__ float                                     rand_num;
-    __shared__ float                                     s_sum;
-    __shared__ float                                     s_max;
-    T*                                                   s_val = topk_tmp_val_buf + batch_id * stride;
-    int*                                                 s_id  = reinterpret_cast<int*>(array);
-    if (tid == 0) {
-        s_sum = 0.0f;
-    }
-    TopK_2<float> partial;
-
-    if (finished != nullptr && finished[batch_id] == true) {
-        ids[batch_id] = end_ids[batch_id];
-        return;
-    }
-
-    float* s_val2 = reinterpret_cast<float*>(s_id + k);
-    for (int ite = 0; ite < k; ite++) {
-        partial.init();
-#pragma unroll
-        for (int i = tid; i < size; i += BLOCK_SIZE_) {
-            partial.insert((float)s_val[i], i);
-        }
-
-        TopK_2<float> total = BlockReduce(temp_storage).Reduce(partial, reduce_topk_op_2<float>);
-
-        if (tid == 0) {
-            if (ite == 0) {
-                s_max = total.u;
-            }
-            s_id[ite]      = total.p;
-            s_val[total.p] = -MAX_T_VAL;
-
-            // when cum_log_probs are computed, topk_tmp_val_buf (logits_buf_) are already pre-processed by
-            // softmax_kernel
-            if (cum_log_probs == nullptr && output_log_probs == nullptr) {
-                total.u = __expf(total.u - s_max);
-            }
-            s_val2[ite] = total.u;
-            s_sum += total.u;
-        }
-        __syncthreads();
-    }
-
-    if (tid == 0) {
-        rand_num = (float)curand_uniform(curandstate + blockIdx.x) * prob_threshold * s_sum;
-        for (int i = 0; i < k; i++) {
-            float exp_logit = s_val2[i];
-            rand_num        = rand_num - exp_logit;
-            if (rand_num <= 0.0f || i == k - 1) {
-                ids[batch_id] = topk_tmp_id_buf[batch_id * stride + s_id[i]] % vocab_size;
-                if (cum_log_probs != nullptr || output_log_probs != nullptr) {
-                    float log_prob = logf(exp_logit);
-                    if (cum_log_probs != nullptr) {
-                        cum_log_probs[batch_id] += log_prob;
-                    }
-                    if (output_log_probs != nullptr) {
-                        // 'output_log_probs' is the probability induced by the top-k sampling.
-                        // We normalize the probability 'exp_logit' of the selected token by
-                        // the probability 's_sum' of a set of top-k tokens, meaning the log_prob
-                        // is the probability of the selected token, conditioned on the event that
-                        // it is selected, i.e.,
-                        //   log_prob = log P(i | i is in top-k) = log(exp_logit / s_sum).
-                        output_log_probs[batch_id] = log_prob - logf(s_sum);
-                    }
-                }
-                break;
-            }
-        }
-        if (sequence_length != nullptr && finished != nullptr) {
-            sequence_length[batch_id] = finished[batch_id] ? sequence_length[batch_id] : sequence_length[batch_id] + 1;
-            finished[batch_id]        = ids[batch_id] == end_ids[batch_id] ? true : false;
-        }
-    }
-}
-
-#ifdef _MSC_VER
-#define CASE_K(K_MIN, K_MAX, BLOCK_SIZE_1_, BLOCK_SIZE_2_, BLOCKS_PER_BEAM_)                                           \
-    if (K_MIN <= max_top_k && max_top_k <= K_MAX) {                                                                    \
-        topk_stage1<T, BLOCK_SIZE_1_, BLOCKS_PER_BEAM_>                                                                \
-            <<<batch_size * BLOCKS_PER_BEAM_, BLOCK_SIZE_1_, 0, stream>>>(log_probs,                                   \
-                                                                          temp_log_probs,                              \
-                                                                          topk_tmp_id_buf,                             \
-                                                                          topk_tmp_val_buf,                            \
-                                                                          finished,                                    \
-                                                                          max_top_k,                                   \
-                                                                          top_ks,                                      \
-                                                                          vocab_size,                                  \
-                                                                          end_ids,                                     \
-                                                                          skip_decode);                                \
-        topk_stage2_sampling<T, BLOCK_SIZE_2_, BLOCKS_PER_BEAM_>                                                       \
-            <<<batch_size, BLOCK_SIZE_2_, K_MAX * sizeof(int) + K_MAX * sizeof(float), stream>>>(topk_tmp_id_buf,      \
-                                                                                                 topk_tmp_val_buf,     \
-                                                                                                 ids,                  \
-                                                                                                 sequence_length,      \
-                                                                                                 finished,             \
-                                                                                                 cum_log_probs,        \
-                                                                                                 output_log_probs,     \
-                                                                                                 max_top_k,            \
-                                                                                                 top_ks,               \
-                                                                                                 top_p,                \
-                                                                                                 top_ps,               \
-                                                                                                 curandstate,          \
-                                                                                                 end_ids,              \
-                                                                                                 vocab_size,           \
-                                                                                                 skip_decode);         \
-        break;                                                                                                         \
-    }
-#else
-#define CASE_K(K_MIN, K_MAX, BLOCK_SIZE_1_, BLOCK_SIZE_2_, BLOCKS_PER_BEAM_)                                           \
-    case K_MIN ... K_MAX:                                                                                              \
-        topk_stage1<T, BLOCK_SIZE_1_, BLOCKS_PER_BEAM_>                                                                \
-            <<<batch_size * BLOCKS_PER_BEAM_, BLOCK_SIZE_1_, 0, stream>>>(log_probs,                                   \
-                                                                          temp_log_probs,                              \
-                                                                          topk_tmp_id_buf,                             \
-                                                                          topk_tmp_val_buf,                            \
-                                                                          finished,                                    \
-                                                                          max_top_k,                                   \
-                                                                          top_ks,                                      \
-                                                                          vocab_size,                                  \
-                                                                          end_ids,                                     \
-                                                                          skip_decode);                                \
-        topk_stage2_sampling<T, BLOCK_SIZE_2_, BLOCKS_PER_BEAM_>                                                       \
-            <<<batch_size, BLOCK_SIZE_2_, K_MAX * sizeof(int) + K_MAX * sizeof(float), stream>>>(topk_tmp_id_buf,      \
-                                                                                                 topk_tmp_val_buf,     \
-                                                                                                 ids,                  \
-                                                                                                 sequence_length,      \
-                                                                                                 finished,             \
-                                                                                                 cum_log_probs,        \
-                                                                                                 output_log_probs,     \
-                                                                                                 max_top_k,            \
-                                                                                                 top_ks,               \
-                                                                                                 top_p,                \
-                                                                                                 top_ps,               \
-                                                                                                 curandstate,          \
-                                                                                                 end_ids,              \
-                                                                                                 vocab_size,           \
-                                                                                                 skip_decode);         \
-        break;
-#endif
-
-template<typename T>
-void invokeBatchTopKSampling(void*          workspace,
-                             size_t&        workspace_size,
-                             const T*       log_probs,
-                             int*           ids,
-                             int*           sequence_length,
-                             bool*          finished,
-                             float*         cum_log_probs,
-                             float*         output_log_probs,
-                             curandState_t* curandstate,
-                             const int      max_top_k,
-                             const int*     top_ks,
-                             const float    top_p,
-                             const float*   top_ps,
-                             const int      vocab_size_padded,
-                             const int*     end_ids,
-                             cudaStream_t   stream,
-                             const int      batch_size,
-                             const bool*    skip_decode)
-{
-    // Not allow an ambiguous inputs top_p and top_ps.
-    assert(top_p == 1.0f || top_ps == nullptr);
-    const int vocab_size              = vocab_size_padded;
-    const int max_block_per_beam      = 8;
-    int       temp_log_probs_buf_size = batch_size * vocab_size;                      // type float
-    int       topk_tmp_ids_buf_size   = batch_size * max_top_k * max_block_per_beam;  // type int
-    int       topk_tmp_val_buf_size   = batch_size * max_top_k * max_block_per_beam;  // type float
-
-    // prevent memory misaligned address
-    temp_log_probs_buf_size = (int)(ceil(temp_log_probs_buf_size / 4.)) * 4;
-    topk_tmp_ids_buf_size   = (int)(ceil(topk_tmp_ids_buf_size / 4.)) * 4;
-    topk_tmp_val_buf_size   = (int)(ceil(topk_tmp_val_buf_size / 4.)) * 4;
-
-    if (workspace == nullptr) {
-        workspace_size = sizeof(T) * temp_log_probs_buf_size + sizeof(int) * topk_tmp_ids_buf_size
-                         + sizeof(T) * topk_tmp_val_buf_size;
-        return;
-    }
-
-    T*   temp_log_probs   = (T*)workspace;
-    int* topk_tmp_id_buf  = (int*)(temp_log_probs + temp_log_probs_buf_size);
-    T*   topk_tmp_val_buf = (T*)(topk_tmp_id_buf + topk_tmp_ids_buf_size);
-
-#ifdef _MSC_VER
-    do {
-        CASE_K(1, 16, 128, 128, 8);
-        CASE_K(17, 32, 256, 128, 8);
-        CASE_K(33, 64, 256, 256, 8);
-        CASE_K(65, 1024, 256, 256, 8);
-        throw std::domain_error(fmtstr("top-k kernel supports 1<=k<=1024 but got k=%d", max_top_k));
-    } while (0);
-#else
-    switch (max_top_k) {
-        CASE_K(1, 16, 128, 128, 8);
-        CASE_K(17, 32, 256, 128, 8);
-        CASE_K(33, 64, 256, 256, 8);
-        CASE_K(65, 1024, 256, 256, 8);
-        default:
-            throw std::domain_error(fmtstr("top-k kernel supports 1<=k<=1024 but got k=%d", max_top_k));
-    }
-#endif
-}
-
-#undef CASE_K
-
-template void invokeBatchTopKSampling(void*          workspace,
-                                      size_t&        workspace_size,
-                                      const float*   log_probs,
-                                      int*           ids,
-                                      int*           sequence_length,
-                                      bool*          finished_buf,
-                                      float*         cum_log_probs,
-                                      float*         output_log_probs,
-                                      curandState_t* curandstate,
-                                      const int      max_top_k,
-                                      const int*     top_ks,
-                                      const float    top_p,
-                                      const float*   top_ps,
-                                      const int      vocab_size_padded,
-                                      const int*     end_ids,
-                                      cudaStream_t   stream,
-                                      const int      batch_size,
-                                      const bool*    skip_decode);
-
-template void invokeBatchTopKSampling(void*          workspace,
-                                      size_t&        workspace_size,
-                                      const half*    log_probs,
-                                      int*           ids,
-                                      int*           sequence_length,
-                                      bool*          finished_buf,
-                                      float*         cum_log_probs,
-                                      float*         output_log_probs,
-                                      curandState_t* curandstate,
-                                      const int      max_top_k,
-                                      const int*     top_ks,
-                                      const float    top_p,
-                                      const float*   top_ps,
-                                      const int      vocab_size_padded,
-                                      const int*     end_ids,
-                                      cudaStream_t   stream,
-                                      const int      batch_size,
-                                      const bool*    skip_decode);
-
-template<typename T>
-void invokeTopKSampling(void*          workspace,
-                        size_t&        workspace_size,
-                        const T*       log_probs,
-                        int*           ids,
-                        int*           sequence_length,
-                        bool*          finished_buf,
-                        float*         cum_log_probs,
-                        float*         output_log_probs,
-                        curandState_t* curandstate,
-                        const int      top_k,
-                        const float    top_p,
-                        const int      vocab_size_padded,
-                        const int*     end_ids,
-                        cudaStream_t   stream,
-                        const int      batch_size,
-                        const bool*    skip_decode)
-{
-    invokeBatchTopKSampling(workspace,
-                            workspace_size,
-                            log_probs,
-                            ids,
-                            sequence_length,
-                            finished_buf,
-                            cum_log_probs,
-                            output_log_probs,
-                            curandstate,
-                            top_k,
-                            nullptr,
-                            top_p,
-                            nullptr,
-                            vocab_size_padded,
-                            end_ids,
-                            stream,
-                            batch_size,
-                            skip_decode);
-}
-
-template void invokeTopKSampling(void*          workspace,
-                                 size_t&        workspace_size,
-                                 const float*   log_probs,
-                                 int*           ids,
-                                 int*           sequence_length,
-                                 bool*          finished_buf,
-                                 float*         cum_log_probs,
-                                 float*         output_log_probs,
-                                 curandState_t* curandstate,
-                                 const int      top_k,
-                                 const float    top_p,
-                                 const int      vocab_size_padded,
-                                 const int*     end_ids,
-                                 cudaStream_t   stream,
-                                 const int      batch_size,
-                                 const bool*    skip_decode);
-
-template void invokeTopKSampling(void*          workspace,
-                                 size_t&        workspace_size,
-                                 const half*    log_probs,
-                                 int*           ids,
-                                 int*           sequence_length,
-                                 bool*          finished_buf,
-                                 float*         cum_log_probs,
-                                 float*         output_log_probs,
-                                 curandState_t* curandstate,
-                                 const int      top_k,
-                                 const float    top_p,
-                                 const int      vocab_size_padded,
-                                 const int*     end_ids,
-                                 cudaStream_t   stream,
-                                 const int      batch_size,
-                                 const bool*    skip_decode);
-
-template<typename T>
-void invokeTopKTopPSampling(void*          workspace,
-                            size_t&        workspace_size,
-                            int*           output_ids,
-                            const T*       logits,
-                            int*           sequence_length,
-                            bool*          finished_buf,
-                            float*         cum_log_probs,
-                            float*         output_log_probs,
-                            curandState_t* curandstate,
-                            const int      batch_size,
-                            const int      top_k,
-                            const float    top_p,
-                            const int      vocab_size_padded,
-                            const int*     end_ids,
-                            cudaStream_t   stream)
-{
-    // invokeTopKTopPSampling will be deprecated. Please use invokeTopKSampling instead.
-    invokeTopKSampling(workspace,
-                       workspace_size,
-                       logits,
-                       output_ids,
-                       sequence_length,
-                       finished_buf,
-                       cum_log_probs,
-                       output_log_probs,
-                       curandstate,
-                       top_k,
-                       top_p,
-                       vocab_size_padded,
-                       end_ids,
-                       stream,
-                       batch_size,
-                       nullptr);
-}
-
-template void invokeTopKTopPSampling(void*          workspace,
-                                     size_t&        workspace_size,
-                                     int*           output_ids,
-                                     const float*   logits,
-                                     int*           sequence_length,
-                                     bool*          finished_buf,
-                                     float*         cum_log_probs,
-                                     float*         output_log_probs,
-                                     curandState_t* curandstate,
-                                     const int      batch_size,
-                                     const int      top_k,
-                                     const float    top_p,
-                                     const int      vocab_size_padded,
-                                     const int*     end_ids,
-                                     cudaStream_t   stream);
-
-template void invokeTopKTopPSampling(void*          workspace,
-                                     size_t&        workspace_size,
-                                     int*           output_ids,
-                                     const half*    logits,
-                                     int*           sequence_length,
-                                     bool*          finished_buf,
-                                     float*         cum_log_probs,
-                                     float*         output_log_probs,
-                                     curandState_t* curandstate,
-                                     const int      batch_size,
-                                     const int      top_k,
-                                     const float    top_p,
-                                     const int      vocab_size_padded,
-                                     const int*     end_ids,
-                                     cudaStream_t   stream);
-
-}  // namespace turbomind
diff --git a/src/turbomind/kernels/sampling_topk_kernels.h b/src/turbomind/kernels/sampling_topk_kernels.h
deleted file mode 100644
index a539abf0fa6bdc88082edeab07dfc6ddc7715eca..0000000000000000000000000000000000000000
--- a/src/turbomind/kernels/sampling_topk_kernels.h
+++ /dev/null
@@ -1,98 +0,0 @@
-/*
- * Copyright (c) 2020-2023, NVIDIA CORPORATION.  All rights reserved.
- * Copyright (c) 2021, NAVER Corp.  Authored by CLOVA.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#pragma once
-
-#include "src/turbomind/utils/logger.h"
-#include <curand_kernel.h>
-namespace turbomind {
-
-template<typename T>
-void invokeTopKSampling(void*          workspace,
-                        size_t&        workspace_size,
-                        const T*       log_probs,
-                        int*           ids,
-                        int*           sequence_length,
-                        bool*          finished_buf,
-                        float*         cum_log_probs,
-                        float*         output_log_probs,
-                        curandState_t* curandstate,
-                        const int      top_k,
-                        const float    top_p,
-                        const int      vocab_size_padded,
-                        const int*     end_ids,
-                        cudaStream_t   stream,
-                        const int      batch_size,
-                        const bool*    skip_decode);
-
-template<typename T>
-void invokeBatchTopKSampling(void*          workspace,
-                             size_t&        workspace_size,
-                             const T*       log_probs,
-                             int*           ids,
-                             int*           sequence_length,
-                             bool*          finished,
-                             float*         cum_log_probs,
-                             float*         output_log_probs,
-                             curandState_t* curandstate,
-                             const int      max_top_k,
-                             const int*     top_ks,
-                             const float    top_p,
-                             const float*   top_ps,
-                             const int      vocab_size_padded,
-                             const int*     end_ids,
-                             cudaStream_t   stream,
-                             const int      batch_size,
-                             const bool*    skip_decode);
-
-void invokeCurandInitialize(curandState_t*     state,
-                            const size_t       batch_size,
-                            unsigned long long random_seed,
-                            cudaStream_t       stream);
-
-void invokeCurandBatchInitialize(curandState_t*            states,
-                                 const size_t              batch_size,
-                                 const unsigned long long* random_seeds,
-                                 cudaStream_t              stream);
-
-template<typename T>
-void invokeAddBiasEndMask(T*           logits,
-                          const T*     bias,
-                          const int*   end_ids,
-                          const bool*  finished,
-                          const int    batch_size,
-                          const int    vocab_size,
-                          const int    vocab_size_padded,
-                          cudaStream_t stream);
-
-template<typename T>
-void invokeTopKTopPSampling(void*          workspace,
-                            size_t&        workspace_size,
-                            int*           output_ids,
-                            const T*       logits,
-                            int*           sequence_length,
-                            bool*          finished_buf,
-                            float*         cum_log_probs,
-                            float*         output_log_probs,
-                            curandState_t* curandstate,
-                            const int      batch_size,
-                            const int      top_k,
-                            const float    top_p,
-                            const int      vocab_size_padded,
-                            const int*     end_ids,
-                            cudaStream_t   stream);
-
-}  // namespace turbomind
diff --git a/src/turbomind/kernels/sampling_topp_kernels.cu b/src/turbomind/kernels/sampling_topp_kernels.cu
deleted file mode 100644
index e94252ba6786da6be6b66ce3e27be4a594c8f43b..0000000000000000000000000000000000000000
--- a/src/turbomind/kernels/sampling_topp_kernels.cu
+++ /dev/null
@@ -1,1430 +0,0 @@
-/*
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef CUDART_VERSION
-#error CUDART_VERSION Undefined!
-#elif (CUDART_VERSION >= 11000)
-#include <cub/cub.cuh>
-#else
-// #include "3rdparty/cub/cub.cuh"
-#include <cub/cub.cuh>
-#endif
-
-#include "src/turbomind/kernels/reduce_kernel_utils.cuh"
-#include "src/turbomind/kernels/sampling_topp_kernels.h"
-#include "src/turbomind/utils/cuda_utils.h"
-
-constexpr int   ENABLE_SINGLE_PASS_TOP_P = 0;
-constexpr float SINGLE_PASS_THRESHOLD    = 0.9;
-
-namespace turbomind {
-
-namespace segmented_topp_impl {
-
-template<int HALF_ELEMENTS_PER_WARP_LOAD>
-using Copy_half_t = typename std::conditional<
-    HALF_ELEMENTS_PER_WARP_LOAD == 32,
-    half,
-    typename std::conditional<HALF_ELEMENTS_PER_WARP_LOAD == 64,
-                              int,
-                              typename std::conditional<HALF_ELEMENTS_PER_WARP_LOAD == 128, int2, int4>::type>::type>::
-    type;
-
-template<typename T, int ELEMENTS_PER_WARP_LOAD>
-using Copy_t = Copy_half_t<sizeof(T) / sizeof(half) * ELEMENTS_PER_WARP_LOAD>;
-
-template<typename T>
-struct Float_as_int_ {
-};
-template<>
-struct Float_as_int_<float> {
-    using Type = uint32_t;
-};
-template<>
-struct Float_as_int_<__half> {
-    using Type = uint16_t;
-};
-
-using kernel_params_float   = Segmented_topk_kernel_params<float, int32_t, 256, 2>;
-using kernel_params_float_1 = Segmented_topk_kernel_params<float, int32_t, 256, 1>;
-using kernel_params_half    = Segmented_topk_kernel_params<__half, int32_t, 256, 4>;
-using kernel_params_half_1  = Segmented_topk_kernel_params<__half, int32_t, 256, 1>;
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-
-static inline __device__ float to_float(uint32_t src)
-{
-    return __int_as_float(src);
-}
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-
-static inline __device__ float to_float(uint16_t src)
-{
-    __half dst = __ushort_as_half(src);
-    return __half2float(dst);
-}
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-
-// sort one segment per cta
-template<typename T_SCORE, int BLOCK_THREADS, int ELEMENTS_PER_THREAD>
-__global__ void blockSortKernel(const T_SCORE* d_keys_in,
-                                T_SCORE*       d_keys_out,
-                                const int32_t* d_values_in,
-                                int32_t*       d_values_out,
-                                const int32_t* active_counts,
-                                int            num_items_,
-                                int            stride_items,
-                                int            num_segments)
-{
-    // Specialize BlockRadixSort for a 1D block
-    typedef cub::BlockRadixSort<T_SCORE, BLOCK_THREADS, ELEMENTS_PER_THREAD, int32_t> BlockRadixSort;
-
-    // Allocate shared memory for BlockRadixSort
-    __shared__ typename BlockRadixSort::TempStorage temp_storage;
-
-    if (blockIdx.x >= num_segments) {
-        return;
-    }
-
-    int num_items = active_counts[blockIdx.x];  // > num_items_ ? num_items_ : active_counts[blockIdx.x];
-
-    if (num_items == 0) {
-        return;
-    }
-
-    // Obtain a segment of consecutive items that are blocked across threads
-    T_SCORE thread_keys[ELEMENTS_PER_THREAD];
-    int32_t thread_values[ELEMENTS_PER_THREAD];
-
-    int32_t block_offset = blockIdx.x * stride_items;
-    cub::LoadDirectStriped<BLOCK_THREADS>(threadIdx.x, d_keys_out + block_offset, thread_keys, num_items, 0);
-    cub::LoadDirectStriped<BLOCK_THREADS>(threadIdx.x, d_values_out + block_offset, thread_values, num_items, -1);
-    __syncthreads();
-
-    // Collectively sort the keys and values among block threads
-    BlockRadixSort(temp_storage).SortDescendingBlockedToStriped(thread_keys, thread_values);
-
-    // Store output in striped fashion
-    cub::StoreDirectStriped<BLOCK_THREADS>(threadIdx.x, d_keys_out + block_offset, thread_keys, num_items);
-    cub::StoreDirectStriped<BLOCK_THREADS>(threadIdx.x, d_values_out + block_offset, thread_values, num_items);
-}
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-
-/// block sort kernel
-template<typename T_SCORE>
-void blockSort(const T_SCORE* d_keys_in,
-               T_SCORE*       d_keys_out,
-               const int32_t* d_values_in,
-               int32_t*       d_values_out,
-               const int32_t* active_counts,
-               int            num_items,
-               int            stride_items,
-               int            num_segments,
-               cudaStream_t   stream)
-{
-    if (num_items == 0) {
-        return;
-    }
-
-    int kernel_index  = div_up(num_items, 128) - 1;
-    int warps_per_cta = (kernel_index + 1) * 128 / 32;
-    if (kernel_index > 7) {
-        kernel_index  = 7 + div_up(num_items, 1024) - 1;
-        warps_per_cta = 1024 / 32;
-    }
-    assert(warps_per_cta <= 32);
-
-    dim3 block(warps_per_cta * 32);
-    dim3 grid(num_segments);
-
-    using kernel_func = void (*)(const T_SCORE* d_keys_in,
-                                 T_SCORE*       d_keys_out,
-                                 const int32_t* d_values_in,
-                                 int32_t*       d_values_out,
-                                 const int32_t* active_counts,
-                                 int            num_items,
-                                 int            stride_items,
-                                 int            num_segments);
-
-    static const kernel_func kernel_funcs[] = {
-        &blockSortKernel<T_SCORE, 128, 1>,
-        &blockSortKernel<T_SCORE, 256, 1>,
-        &blockSortKernel<T_SCORE, 384, 1>,
-        &blockSortKernel<T_SCORE, 512, 1>,
-        &blockSortKernel<T_SCORE, 640, 1>,
-        &blockSortKernel<T_SCORE, 768, 1>,
-        &blockSortKernel<T_SCORE, 896, 1>,
-        &blockSortKernel<T_SCORE, 1024, 1>,
-        &blockSortKernel<T_SCORE, 1024, 2>,
-        &blockSortKernel<T_SCORE, 1024, 4>,
-        //&blockSortKernel<T_SCORE, 1024, 6>,
-    };
-    kernel_funcs[kernel_index]<<<grid, block, 0, stream>>>(
-        d_keys_in, d_keys_out, d_values_in, d_values_out, active_counts, num_items, stride_items, num_segments);
-}
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-
-struct BlockPrefixCallbackOp {
-    // Running prefix
-    int running_total;
-    // Constructor
-    __device__ BlockPrefixCallbackOp(uint32_t running_total): running_total(running_total) {}
-    // Callback operator to be entered by the first warp of threads in the block.
-    // Thread-0 is responsible for returning a value for seeding the block-wide scan.
-    __device__ int operator()(uint32_t block_aggregate)
-    {
-        uint32_t old_prefix = running_total;
-        running_total += block_aggregate;
-        return old_prefix;
-    }
-};
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-
-#define DO_DEBUG_PRINT 0
-
-// governs the split between regs and smem
-constexpr float SMEM_FRACTION = 0.5F;
-constexpr float P_EPSILON     = 0.01F;
-
-constexpr int MAX_TOP_K = 3072;
-constexpr int WARP_SZ   = 32;
-
-template<typename Kernel_params, int ITEMS_PER_THREAD>
-__global__ __launch_bounds__(Kernel_params::BLOCK_THREADS,
-                             1) void segmented_top_p_single_pass(TopKPerSegmentParams params)
-{
-#if DO_DEBUG_PRINT
-    constexpr int debug_block_id = 26;
-#endif
-
-    using Key_Data_Type     = typename Kernel_params::Key_Data_Type;
-    using Int_Key_Data_Type = typename Float_as_int_<Key_Data_Type>::Type;
-
-    // 4 fp16 keys or 2 fp32 keys
-    constexpr int                                         KEYS_PER_LDG = Kernel_params::KEYS_PER_LDG;
-    typedef Copy_t<Key_Data_Type, WARP_SZ * KEYS_PER_LDG> copy_t;
-    union access_t {
-        copy_t            v;
-        Int_Key_Data_Type x[KEYS_PER_LDG];  // supported size 1,2,4
-    };
-
-    constexpr int BLOCK_THREADS = Kernel_params::BLOCK_THREADS;
-
-    constexpr int ITEMS_PER_THREAD_IN_REGS = ITEMS_PER_THREAD * (1.0F - SMEM_FRACTION);
-    constexpr int ITEMS_PER_THREAD_IN_SMEM = ITEMS_PER_THREAD - ITEMS_PER_THREAD_IN_REGS;
-
-#if DO_DEBUG_PRINT == 1
-    if (blockIdx.x == 0 && threadIdx.x == 0) {
-        printf("ITEMS_PER_THREAD, ITEMS_PER_THREAD_IN_REGS, ITEMS_PER_THREAD_IN_SMEM = %d, %d, %d\n",
-               ITEMS_PER_THREAD,
-               ITEMS_PER_THREAD_IN_REGS,
-               ITEMS_PER_THREAD_IN_SMEM);
-    }
-#endif
-
-    constexpr int          MIN_KEY            = 0;
-    constexpr int          ENABLED_PER_THREAD = (ITEMS_PER_THREAD + 32 - 1) / 32;
-    extern __shared__ int2 dynamic_smem[];
-    int2*                  smem_selected_elements = dynamic_smem;
-    Int_Key_Data_Type*     smem_thread_items = reinterpret_cast<Int_Key_Data_Type*>(smem_selected_elements + MAX_TOP_K);
-
-    __shared__ unsigned int smem_selected_count;
-
-    // Specialize BlockScan type for our thread block
-    typedef cub::BlockScan<uint32_t, BLOCK_THREADS> BlockScan;
-
-    // Specialize BlockScan type for our thread block
-    typedef cub::BlockReduce<float, BLOCK_THREADS> BlockReduce;
-    __shared__ float                               smem_p_sum_total;
-
-    __shared__ union {
-        typename BlockScan::TempStorage scan;
-
-        typename BlockReduce::TempStorage reduce;
-    } temp_storage;
-    // Initialize running total
-    BlockPrefixCallbackOp prefix_op(0);
-
-    unsigned int old_selected_count;
-
-    uint32_t segment = blockIdx.y * gridDim.x + blockIdx.x;
-
-    // Preceding TopK has shortcutted this segment
-    if (params.gmem_begin_offsets[segment] == params.gmem_end_offsets[segment]) {
-        if (threadIdx.x == 0) {
-            params.gmem_active_count_per_segment[segment] = 1;
-            atomicMax(params.gmem_active_count_total, 1);
-        }
-        return;
-    }
-
-    Int_Key_Data_Type* gmem_src_keys = reinterpret_cast<Int_Key_Data_Type*>(params.gmem_src_keys);
-    Int_Key_Data_Type* gmem_dst_keys = reinterpret_cast<Int_Key_Data_Type*>(params.gmem_dst_keys);
-    int32_t*           gmem_dst_vals = reinterpret_cast<int32_t*>(params.gmem_dst_vals);
-
-    constexpr int BITS_IN_KEY = sizeof(Key_Data_Type) * 8;
-
-    int items       = params.num_items / params.num_segments;
-    int first_index = segment * items;
-    gmem_src_keys += first_index;
-    gmem_dst_keys += first_index;
-    gmem_dst_vals += first_index;
-
-    int               index_limit                            = items;
-    Int_Key_Data_Type thread_items[ITEMS_PER_THREAD_IN_REGS] = {0};
-
-    // Load all keys into registers and smem
-    const int     lane_id   = threadIdx.x % WARP_SZ;
-    const int     warp_id   = threadIdx.x / WARP_SZ;
-    constexpr int NUM_WARPS = BLOCK_THREADS / WARP_SZ;
-
-    access_t ZERO;
-    for (int i = 0; i < KEYS_PER_LDG; i++) {
-        ZERO.x[i] = MIN_KEY;
-    }
-
-    // registers
-    for (int iter = 0; iter < ITEMS_PER_THREAD_IN_REGS; iter++) {
-        int offset         = (iter + threadIdx.x * ITEMS_PER_THREAD);
-        thread_items[iter] = (offset < index_limit) ? gmem_src_keys[offset] : MIN_KEY;
-    }
-
-    // shared memory
-    for (int c = warp_id; c < BLOCK_THREADS; c += NUM_WARPS) {
-        for (int iter = lane_id * KEYS_PER_LDG; iter < ITEMS_PER_THREAD_IN_SMEM; iter += WARP_SZ * KEYS_PER_LDG) {
-            int      offset = iter + c * ITEMS_PER_THREAD + ITEMS_PER_THREAD_IN_REGS;
-            access_t val;
-            val.v = (offset < index_limit) ? *reinterpret_cast<copy_t*>(&gmem_src_keys[offset]) : ZERO.v;
-            for (int i = 0; i < KEYS_PER_LDG; i++) {
-                smem_thread_items[c + (iter + i) * BLOCK_THREADS] = val.x[i];
-            }
-            // smem_thread_items[c  + iter * BLOCK_THREADS] = (offset < index_limit)? gmem_src_keys[offset] : MIN_KEY;
-        }
-    }
-
-    Int_Key_Data_Type select_mask = 0;
-    Int_Key_Data_Type save_mask   = 0;
-
-    // Int_Key_Data_Type save_bit = 0;
-    // set to true when we finish with too few keys, so we go back to last_save_mask one more time
-    bool is_last_iter = false;
-
-    if (threadIdx.x == 0) {
-        smem_selected_count = 0;
-        old_selected_count  = 0;
-    }
-
-    // iterate over bits.
-    // skip the first two bits,
-    // * bit 31 is the sign bit. all values are positive
-    // * bit 30 is only set for values >= 2, but the input consists only of values in
-    // the range of [0,1]
-    constexpr int               START_BIT = BITS_IN_KEY - 1;
-    constexpr int               SKIP_BITS = 2;
-    constexpr Int_Key_Data_Type ONE       = (Int_Key_Data_Type)1;
-    uint32_t                    selected;
-    uint32_t                    sc;
-    float                       p_sum_total     = 0.0F;
-    float                       old_p_sum_total = 0.0F;
-    uint32_t                    offset          = 0;
-    for (Int_Key_Data_Type bit = START_BIT - SKIP_BITS; true; --bit) {
-        __syncthreads();
-        Int_Key_Data_Type bit_mask = select_mask | (ONE << bit);
-
-        uint32_t enabled[ENABLED_PER_THREAD] = {0};
-        float    thread_sum                  = 0.0F;
-
-        for (int item = 0; item < ITEMS_PER_THREAD_IN_REGS; ++item) {
-            // check if all the bits from bit mask are contained in the thread_item. If yes, set respective
-            // bit of enabled
-            auto     val        = thread_items[item];
-            uint32_t is_enabled = uint32_t(((val ^ bit_mask) & bit_mask) == 0);
-            // thread_sum += (is_enabled)? to_float(val) : 0.0F;
-            thread_sum += is_enabled * to_float(val);
-            enabled[item / 32] |= is_enabled << (item % 32);
-        }
-
-        for (int item = 0; item < ITEMS_PER_THREAD_IN_SMEM; ++item) {
-            int idx = threadIdx.x + item * BLOCK_THREADS;
-            // int idx = item + ITEMS_PER_THREAD_IN_SMEM * threadIdx.x;
-            auto     val        = smem_thread_items[idx];
-            uint32_t is_enabled = uint32_t(((val ^ bit_mask) & bit_mask) == 0);
-            // thread_sum += (is_enabled)? to_float(val) : 0.0F;
-            thread_sum += is_enabled * to_float(val);
-            enabled[(ITEMS_PER_THREAD_IN_REGS + item) / 32] |= is_enabled << ((ITEMS_PER_THREAD_IN_REGS + item) % 32);
-        }
-
-        selected = 0;
-#pragma unroll
-        for (int i = 0; i < ENABLED_PER_THREAD; i++) {
-            selected += __popc(enabled[i]);
-        }
-
-        float p_sum = BlockReduce(temp_storage.reduce).Sum(thread_sum);
-
-        if (threadIdx.x == 0) {
-            p_sum_total += p_sum;
-            smem_p_sum_total = p_sum_total;
-        }
-
-        __syncthreads();
-        p_sum_total = smem_p_sum_total;
-        __syncthreads();
-
-        BlockScan(temp_storage.scan).ExclusiveSum(selected, offset, prefix_op);
-
-        if (threadIdx.x == 0) {
-            smem_selected_count = prefix_op.running_total;
-        }
-
-        __syncthreads();
-        sc = smem_selected_count;
-        __syncthreads();
-
-        // float p_diff = params.top_p - p_sum_total;
-        float p_diff = p_sum_total - params.top_p;
-
-        if ((p_sum_total <= params.top_p + P_EPSILON && p_sum_total > 0)
-            || (p_sum_total > params.top_p && sc <= MAX_TOP_K) || (bit == 0 && p_sum_total > 0) || is_last_iter) {
-
-#if DO_DEBUG_PRINT == 1
-            __syncthreads();
-            if (threadIdx.x == 0 && blockIdx.x == debug_block_id) {
-                sc = smem_selected_count;
-                printf("bit %d bit_mask %d offset %d (%d, %d), sc = %d, p_sum = %f, p_sum_total = %f\n",
-                       bit,
-                       bit_mask,
-                       offset,
-                       blockIdx.x,
-                       threadIdx.x,
-                       sc,
-                       p_sum,
-                       p_sum_total);
-            }
-            __syncthreads();
-#endif
-
-            for (int item = 0; item < ITEMS_PER_THREAD_IN_REGS; ++item) {
-                // last condition should not trigger with well trained weights, but we will get
-                // illegal mewmory access if we do not have one in those rare cases
-                if (enabled[item / 32] & (ONE << (item % 32)) && offset < MAX_TOP_K) {
-                    smem_selected_elements[offset] =
-                        make_int2(thread_items[item], item + threadIdx.x * ITEMS_PER_THREAD);
-                    ++offset;
-                    thread_items[item] = MIN_KEY;
-                }
-            }
-
-            for (int item = 0; item < ITEMS_PER_THREAD_IN_SMEM; ++item) {
-                if (enabled[(item + ITEMS_PER_THREAD_IN_REGS) / 32] & (ONE << ((item + ITEMS_PER_THREAD_IN_REGS) % 32))
-                    && offset < MAX_TOP_K) {
-                    int idx = threadIdx.x + item * BLOCK_THREADS;
-                    // int idx = item + ITEMS_PER_THREAD_IN_SMEM * threadIdx.x;
-                    // if (idx <  params.num_items_per_segment_in_smem)
-                    {
-                        smem_selected_elements[offset] = make_int2(
-                            smem_thread_items[idx], item + threadIdx.x * ITEMS_PER_THREAD + ITEMS_PER_THREAD_IN_REGS);
-                        ++offset;
-                        smem_thread_items[idx] = MIN_KEY;
-                    }
-                }
-            }
-        }
-
-#if DO_DEBUG_PRINT == 1
-        if (threadIdx.x == 0 && blockIdx.x == debug_block_id) {
-            printf("!!!! bit %d bit_mask %d offset %d (%d, %d), sc = %d, p_sum = %f, p_sum_total = %f\n",
-                   bit,
-                   bit_mask,
-                   offset,
-                   blockIdx.x,
-                   threadIdx.x,
-                   sc,
-                   p_sum,
-                   p_sum_total);
-        }
-#endif
-
-        if (p_diff <= P_EPSILON && p_diff >= 0 || (p_sum_total > params.top_p && sc <= MAX_TOP_K) || bit == 0) {
-
-            break;
-        }
-        // p > top_p
-        else if (p_diff > P_EPSILON) {
-            // There are too many bits in the current selection
-            // Save the current state and go to the next bit
-            // If there are not enough items left using the next bit
-            // it's necessary to restart here with the current bit not set
-            save_mask = bit_mask;
-            select_mask |= bit_mask;
-
-            if (threadIdx.x == 0) {
-                smem_selected_count = old_selected_count;
-                p_sum_total         = old_p_sum_total;
-
-                prefix_op.running_total = old_selected_count;
-            }
-        }
-        else {
-            // sc < num_top_k branch
-            if (save_mask) {
-                select_mask = save_mask;
-
-                save_mask = 0;
-            }
-            if (threadIdx.x == 0) {
-                old_selected_count = smem_selected_count;
-                old_p_sum_total    = p_sum_total;
-            }
-        }
-    }
-
-    __syncthreads();
-
-    // store data to global memory
-    sc = (p_sum_total < params.top_p) ? params.num_items / params.num_segments : smem_selected_count;
-    if (threadIdx.x == 0) {
-        params.gmem_active_count_per_segment[segment] = sc;
-        atomicMax(params.gmem_active_count_total, sc);
-    }
-    if (sc >= MAX_TOP_K) {
-        return;
-    }
-    for (int i = threadIdx.x; i < sc; i += blockDim.x) {
-        int2 selected_element = smem_selected_elements[i];
-        gmem_dst_keys[i]      = selected_element.x;
-        gmem_dst_vals[i]      = selected_element.y;
-    }
-}
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-
-template<typename Kernel_params>
-int getSmemSizeAndCheck(const TopKPerSegmentContext& context, const TopKPerSegmentParams& params)
-{
-    constexpr int BLOCK_THREADS         = Kernel_params::BLOCK_THREADS;
-    using Key_Data_Type                 = typename Kernel_params::Key_Data_Type;
-    int           num_items_per_segment = params.num_items / params.num_segments;
-    constexpr int ITEMS_INCREMENT       = Kernel_params::ITEMS_INCREMENT;
-    int           kernel_index          = div_up(num_items_per_segment, BLOCK_THREADS * ITEMS_INCREMENT) - 1;
-
-    int       smem_size                = MAX_TOP_K * sizeof(int2);
-    const int items_per_thread         = (kernel_index + 1) * ITEMS_INCREMENT;
-    const int items_per_thread_in_regs = items_per_thread * (1.0F - SMEM_FRACTION);
-    const int items_per_thread_in_smem = items_per_thread - items_per_thread_in_regs;
-
-    smem_size += items_per_thread_in_smem * BLOCK_THREADS * sizeof(typename Float_as_int_<Key_Data_Type>::Type);
-
-    int keys_per_ldg = 2 * sizeof(Key_Data_Type) / 2;
-    if (smem_size + BLOCK_THREADS * sizeof(float) > (size_t)context.sm_shared_size ||  // dynamic + static memory
-        items_per_thread_in_regs + items_per_thread_in_smem != items_per_thread || params.top_p + P_EPSILON > 1.0F
-        || items_per_thread_in_regs % keys_per_ldg != 0 || items_per_thread_in_smem % keys_per_ldg != 0
-        || num_items_per_segment % keys_per_ldg != 0) {
-        return -1;
-    }
-
-    return smem_size;
-}
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-
-int getSmemSizeAndCheck(const TopKPerSegmentContext& context,
-                        const TopKPerSegmentParams&  params,
-                        const DType_t                DT_SCORE)
-{
-    int num_items_per_segment = params.num_items / params.num_segments;
-    if (DT_SCORE == kFLOAT) {
-        if (num_items_per_segment % 2 == 0) {
-            return getSmemSizeAndCheck<kernel_params_float>(context, params);
-        }
-        else {
-            return getSmemSizeAndCheck<kernel_params_float_1>(context, params);
-        }
-    }
-    else {
-        if (num_items_per_segment % 4 == 0) {
-            return getSmemSizeAndCheck<kernel_params_half>(context, params);
-        }
-        else {
-            return getSmemSizeAndCheck<kernel_params_half_1>(context, params);
-        }
-    }
-}
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-
-template<typename Kernel_params>
-void segmentedTopPSinglePass_dispatch(const TopKPerSegmentParams&  params,
-                                      const TopKPerSegmentContext& context,
-                                      cudaStream_t                 stream)
-{
-
-    constexpr int BLOCK_THREADS = Kernel_params::BLOCK_THREADS;
-    using Key_Data_Type         = typename Kernel_params::Key_Data_Type;
-    using Value_Data_Type       = typename Kernel_params::Value_Data_Type;
-
-    int num_items_per_segment = params.num_items / params.num_segments;
-
-    constexpr int ITEMS_INCREMENT = Kernel_params::ITEMS_INCREMENT;
-    int           kernel_index    = div_up(num_items_per_segment, BLOCK_THREADS * ITEMS_INCREMENT) - 1;
-
-#define KERNEL_RUN(INDEX)                                                                                              \
-    {                                                                                                                  \
-        if (smem_size > 0)                                                                                             \
-            check_cuda_error(                                                                                          \
-                cudaFuncSetAttribute(segmented_top_p_single_pass<Kernel_params, ITEMS_INCREMENT*(INDEX + 1)>,          \
-                                     cudaFuncAttributeMaxDynamicSharedMemorySize,                                      \
-                                     smem_size));                                                                      \
-        segmented_top_p_single_pass<Kernel_params, ITEMS_INCREMENT*(INDEX + 1)>                                        \
-            <<<grid_dim, Kernel_params::BLOCK_THREADS, smem_size, stream>>>(params);                                   \
-    }
-
-    int smem_size = getSmemSizeAndCheck<Kernel_params>(context, params);
-
-    dim3 grid_dim(params.num_segments, 1);
-
-    switch (kernel_index) {
-        case 0:
-            KERNEL_RUN(0) break;
-        case 1:
-            KERNEL_RUN(1) break;
-        case 2:
-            KERNEL_RUN(2) break;
-        case 3:
-            KERNEL_RUN(3) break;
-        case 4:
-            KERNEL_RUN(4) break;
-        case 5:
-            KERNEL_RUN(5) break;
-        case 6:
-            KERNEL_RUN(6) break;
-        case 7:
-            KERNEL_RUN(7) break;
-        default:
-            exit(1);
-    }
-}
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-
-template<typename Kernel_params>
-void topPPerSegment_dispatch(const TopKPerSegmentContext& context,
-                             TopKPerSegmentParams&        params,
-                             void*                        temp_storage,
-                             size_t&                      temp_storage_bytes,
-                             cudaStream_t                 stream)
-{
-
-    using Key_Data_Type   = typename Kernel_params::Key_Data_Type;
-    using Value_Data_Type = typename Kernel_params::Value_Data_Type;
-
-    if (temp_storage == nullptr) {
-        if (params.num_segments > 1) {
-            cub::DeviceSegmentedRadixSort::SortPairsDescending(temp_storage,
-                                                               temp_storage_bytes,
-                                                               reinterpret_cast<Key_Data_Type*>(params.gmem_src_keys),
-                                                               reinterpret_cast<Key_Data_Type*>(params.gmem_dst_keys),
-                                                               reinterpret_cast<Value_Data_Type*>(params.gmem_src_vals),
-                                                               reinterpret_cast<Value_Data_Type*>(params.gmem_dst_vals),
-                                                               params.num_items,
-                                                               params.num_segments,
-                                                               params.gmem_begin_offsets,
-                                                               params.gmem_end_offsets,
-                                                               0,
-                                                               sizeof(Key_Data_Type) * 8,
-                                                               stream);
-        }
-        else {
-            cub::DeviceRadixSort::SortPairsDescending(temp_storage,
-                                                      temp_storage_bytes,
-                                                      reinterpret_cast<Key_Data_Type*>(params.gmem_src_keys),
-                                                      reinterpret_cast<Key_Data_Type*>(params.gmem_dst_keys),
-                                                      reinterpret_cast<Value_Data_Type*>(params.gmem_src_vals),
-                                                      reinterpret_cast<Value_Data_Type*>(params.gmem_dst_vals),
-                                                      params.num_items,
-                                                      0,
-                                                      sizeof(Key_Data_Type) * 8,
-                                                      stream);
-        }
-        temp_storage_bytes = div_up(temp_storage_bytes, 256) * 256;
-        // total active counts
-        temp_storage_bytes += div_up(sizeof(int), 256) * 256;
-        // storage for gmem_end_offsets
-        temp_storage_bytes += div_up(sizeof(int) * params.num_segments, 256) * 256;
-        return;
-    }
-
-    size_t cub_temp_storage_bytes =
-        temp_storage_bytes - div_up(sizeof(int), 256) * 256 - div_up(sizeof(int) * params.num_segments, 256) * 256;
-    void* cub_temp_storage         = temp_storage;
-    params.gmem_active_count_total = reinterpret_cast<int*>((char*)temp_storage + cub_temp_storage_bytes);
-    params.gmem_active_count_per_segment =
-        reinterpret_cast<int*>((char*)params.gmem_active_count_total + div_up(sizeof(int), 256) * 256);
-
-    int num_items_per_segment = params.num_items / params.num_segments;
-
-    cudaMemsetAsync(params.gmem_active_count_total, 0, sizeof(int), stream);
-    cudaMemsetAsync(params.gmem_dst_keys, 0, params.num_items * sizeof(Key_Data_Type), stream);
-    segmentedTopPSinglePass_dispatch<Kernel_params>(params, context, stream);
-
-    int max_num_items = 0;
-    cudaMemcpyAsync(&max_num_items, params.gmem_active_count_total, sizeof(int), cudaMemcpyDeviceToHost, stream);
-
-    cudaStreamSynchronize(stream);
-
-    if (max_num_items >= MAX_TOP_K || max_num_items == 0) {
-        if (params.num_segments > 1) {
-            cub::DeviceSegmentedRadixSort::SortPairsDescending(cub_temp_storage,
-                                                               cub_temp_storage_bytes,
-                                                               reinterpret_cast<Key_Data_Type*>(params.gmem_src_keys),
-                                                               reinterpret_cast<Key_Data_Type*>(params.gmem_dst_keys),
-                                                               reinterpret_cast<Value_Data_Type*>(params.gmem_src_vals),
-                                                               reinterpret_cast<Value_Data_Type*>(params.gmem_dst_vals),
-                                                               params.num_items,
-                                                               params.num_segments,
-                                                               params.gmem_begin_offsets,
-                                                               params.gmem_end_offsets,
-                                                               0,
-                                                               sizeof(Key_Data_Type) * 8,
-                                                               stream);
-        }
-        else {
-            cub::DeviceRadixSort::SortPairsDescending(cub_temp_storage,
-                                                      cub_temp_storage_bytes,
-                                                      reinterpret_cast<Key_Data_Type*>(params.gmem_src_keys),
-                                                      reinterpret_cast<Key_Data_Type*>(params.gmem_dst_keys),
-                                                      reinterpret_cast<Value_Data_Type*>(params.gmem_src_vals),
-                                                      reinterpret_cast<Value_Data_Type*>(params.gmem_dst_vals),
-                                                      params.num_items,
-                                                      0,
-                                                      sizeof(Key_Data_Type) * 8,
-                                                      stream);
-        }
-    }
-    else {
-        // run at max supported value
-        blockSort<Key_Data_Type>((const Key_Data_Type*)(params.gmem_dst_keys),
-                                 (Key_Data_Type*)(params.gmem_dst_keys),
-                                 (const Value_Data_Type*)(params.gmem_dst_vals),
-                                 (Value_Data_Type*)(params.gmem_dst_vals),
-                                 params.gmem_active_count_per_segment,
-                                 max_num_items,
-                                 num_items_per_segment,
-                                 params.num_segments,
-                                 stream);
-    }
-}
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-
-int topPPerSegment(const TopKPerSegmentContext& context,
-                   TopKPerSegmentParams&        params,
-                   const DType_t                DT_SCORE,
-                   void*                        temp_storage,
-                   size_t&                      temp_storage_bytes,
-                   cudaStream_t                 stream)
-{
-    int num_items_per_segment = params.num_items / params.num_segments;
-    if (DT_SCORE == kFLOAT) {
-        if (num_items_per_segment % 2 == 0) {
-            topPPerSegment_dispatch<kernel_params_float>(context, params, temp_storage, temp_storage_bytes, stream);
-        }
-        else {
-            topPPerSegment_dispatch<kernel_params_float_1>(context, params, temp_storage, temp_storage_bytes, stream);
-        }
-    }
-    else {
-        if (num_items_per_segment % 4 == 0) {
-            topPPerSegment_dispatch<kernel_params_half>(context, params, temp_storage, temp_storage_bytes, stream);
-        }
-        else {
-            topPPerSegment_dispatch<kernel_params_half_1>(context, params, temp_storage, temp_storage_bytes, stream);
-        }
-    }
-
-    return 0;
-}
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
-
-}  // namespace segmented_topp_impl
-
-__global__ void topPInitialize(
-    int* topp_id_val_buf, int* topp_offset_buf, int* begin_topp_offset_buf_, const int batch_size, const int n)
-{
-    int tid = threadIdx.x;
-    int bid = blockIdx.x;
-
-    if (bid == 0) {
-        for (int i = tid; i < batch_size + 1; i += blockDim.x) {
-            topp_offset_buf[i]        = i * n;
-            begin_topp_offset_buf_[i] = topp_offset_buf[i];
-        }
-    }
-
-    int index = tid + bid * blockDim.x;
-
-    while (index < batch_size * n) {
-        topp_id_val_buf[index] = index % n;
-        index += blockDim.x * gridDim.x;
-    }
-}
-
-void invokeTopPInitialize(int*         topp_id_val_buf,
-                          int*         topp_offset_buf,
-                          int*         begin_topp_offset_buf_,
-                          const size_t batch_size,
-                          const int    n,
-                          cudaStream_t stream)
-{
-    // n: the column number of logits_buffer for top_p sampling
-    topPInitialize<<<32, 512, 0, stream>>>(topp_id_val_buf, topp_offset_buf, begin_topp_offset_buf_, batch_size, n);
-}
-
-template<typename T, int MAX_K, int THREADBLOCK_SIZE>
-__launch_bounds__(THREADBLOCK_SIZE) __global__ void topp_beam_topk_kernel(const T*     log_probs,  // prob.
-                                                                          int*         topk_tmp_id_buf,
-                                                                          T*           topk_tmp_val_buf,
-                                                                          const int    vocab_size,
-                                                                          int*         offset_buf,
-                                                                          int*         begin_offset_buf,
-                                                                          const float  top_p,
-                                                                          const float* top_ps,
-                                                                          const bool*  skip_decode)
-{
-    int thread_id = threadIdx.x;
-    int batch_id  = blockIdx.x;
-    if (skip_decode != nullptr && skip_decode[batch_id]) {
-        return;
-    }
-    float p_threshold = (top_ps != nullptr) ? top_ps[batch_id] : top_p;
-
-    typedef cub::BlockReduce<TopK<T, MAX_K>, THREADBLOCK_SIZE> BlockReduce;
-    __shared__ typename BlockReduce::TempStorage               temp_storage;
-    TopK<T, MAX_K>                                             partial;
-
-    const bool IS_FP16   = std::is_same<T, half>::value;
-    const T    MAX_T_VAL = (IS_FP16) ? HALF_FLT_MAX : FLT_MAX;
-
-#pragma unroll
-    for (int i = 0; i < MAX_K; ++i) {
-        partial.p[i] = -1;
-        partial.u[i] = -MAX_T_VAL;
-    }
-
-#pragma unroll
-    for (int elem_id = thread_id; elem_id < vocab_size; elem_id += THREADBLOCK_SIZE) {
-        int index = elem_id + batch_id * vocab_size;
-        partial.insert(log_probs[index], index);
-    }
-
-    TopK<T, MAX_K> total = BlockReduce(temp_storage).Reduce(partial, reduce_topk_op<T, MAX_K>);
-
-    if (thread_id == 0) {
-        begin_offset_buf[batch_id] = offset_buf[batch_id];
-        T sum_prob                 = (T)(0.0f);
-
-#pragma unroll
-        for (int i = 0; i < MAX_K; i++) {
-            sum_prob += total.u[i];
-        }
-
-        if ((float)sum_prob >= p_threshold) {
-            begin_offset_buf[batch_id] += vocab_size;
-            int index = batch_id * vocab_size;
-
-#pragma unroll
-            for (int i = 0; i < MAX_K; ++i) {
-                topk_tmp_id_buf[index + i]  = total.p[i] % vocab_size;
-                topk_tmp_val_buf[index + i] = total.u[i];
-            }
-        }
-    }
-}
-
-struct BlockPrefixCallbackOp {
-    // Running prefix
-    float running_total;
-    // Constructor
-    __device__ BlockPrefixCallbackOp(float running_total): running_total(running_total) {}
-    // Callback operator to be entered by the first warp of threads in the block.
-    // Thread-0 is responsible for returning a value for seeding the block-wide scan.
-    __device__ float operator()(float block_aggregate)
-    {
-        float old_prefix = running_total;
-        running_total += block_aggregate;
-        return old_prefix;
-    }
-};
-
-template<typename T, int BLOCK_SIZE>
-__global__ void topp_sampling(T*             sorted_log_probs,
-                              int*           sorted_id_vals,
-                              int*           ids,
-                              int*           sequence_length,
-                              bool*          finished_buf,
-                              float*         cum_log_probs,
-                              float*         output_log_probs,
-                              const int*     begin_offset_buf,
-                              const int*     offset_buf,
-                              const int      vocab_size,
-                              curandState_t* curandstate,
-                              const float    top_p,
-                              const float*   top_ps,
-                              const int*     end_ids,
-                              const int      batch_size,
-                              const bool*    skip_decode)
-{
-    __shared__ int   stop_shared;
-    __shared__ float rand_num_s;
-
-    const int tid      = threadIdx.x;
-    const int batch_id = blockIdx.x;
-    if (skip_decode != nullptr && skip_decode[batch_id]) {
-        return;
-    }
-
-    constexpr int WARP_SIZE      = 32;
-    constexpr int NUM_WARPS      = BLOCK_SIZE / WARP_SIZE;
-    const int     lane_id        = threadIdx.x % WARP_SIZE;
-    const int     warp_id        = threadIdx.x / WARP_SIZE;
-    const float   prob_threshold = (top_ps != nullptr) ? top_ps[batch_id] : top_p;
-
-    if (threadIdx.x == 0) {
-        stop_shared = 0;
-        rand_num_s  = curand_uniform(curandstate + blockIdx.x) * prob_threshold;
-    }
-
-    // if begin_offset_buf and offset_buf of sorting have same value,
-    // this means that we have find best one in beam_topK_kernel_for_topP
-    // and skip the sorting. So, we can skip then during sampling.
-    if (begin_offset_buf[batch_id] == offset_buf[batch_id]) {
-        if (tid == 0) {
-            int offset    = batch_id * vocab_size;
-            ids[batch_id] = sorted_id_vals[offset];
-
-            if (cum_log_probs != nullptr || output_log_probs != nullptr) {
-                float lprob = logf(sorted_log_probs[offset]);
-                if (cum_log_probs != nullptr) {
-                    cum_log_probs[batch_id] += lprob;
-                }
-                if (output_log_probs != nullptr) {
-                    output_log_probs[batch_id] = lprob;
-                }
-            }
-            if (sequence_length != nullptr && finished_buf != nullptr) {
-                sequence_length[batch_id] =
-                    finished_buf[batch_id] ? sequence_length[batch_id] : sequence_length[batch_id] + 1;
-                finished_buf[batch_id] = ids[batch_id] == end_ids[batch_id] ? 1 : 0;
-            }
-        }
-        return;
-    }
-
-    typedef cub::BlockScan<float, BLOCK_SIZE>  BlockScan;
-    __shared__ typename BlockScan::TempStorage temp_storage;
-    __shared__ uint32_t                        selected_shared[NUM_WARPS];
-    // Initialize running total
-    BlockPrefixCallbackOp prefix_op(0);
-
-    if (lane_id == 0) {
-        selected_shared[warp_id] = 0;
-    }
-
-    __syncthreads();
-
-    int offset          = batch_id * vocab_size;
-    ids[batch_id]       = sorted_id_vals[offset];
-    int   end           = ((vocab_size + BLOCK_SIZE - 1) / BLOCK_SIZE) * BLOCK_SIZE;
-    int   i_active      = 0;
-    float thread_offset = 0;
-    for (int i = tid; i < end; i += BLOCK_SIZE) {
-        float thread_count = (i < vocab_size) ? (float)sorted_log_probs[offset + i] : 0.f;
-        BlockScan(temp_storage).InclusiveSum(thread_count, thread_offset, prefix_op);
-
-        uint32_t active_mask = __ballot_sync(0xFFFFFFFF, rand_num_s <= thread_offset);
-
-        i_active = i;
-        if (active_mask != 0) {
-            if (lane_id == 0) {
-                atomicAdd(&stop_shared, 1);
-                selected_shared[warp_id] = active_mask;
-            }
-        }
-        __syncthreads();
-        if (stop_shared > 0) {
-            break;
-        }
-    };
-
-    // select first active warp
-    bool skip = (selected_shared[warp_id] > 0) ? false : true;
-    for (int i = 0; i < warp_id; i++) {
-        if (selected_shared[i] != 0) {
-            skip = true;
-        }
-    }
-    if (!skip) {
-        int active_lane_id = WARP_SIZE - __popc(selected_shared[warp_id]);
-        if (lane_id == active_lane_id) {
-            ids[batch_id] = sorted_id_vals[offset + i_active];
-            if (cum_log_probs != nullptr || output_log_probs != nullptr) {
-                float lprob = logf(sorted_log_probs[offset + i_active]);
-                if (cum_log_probs != nullptr) {
-                    cum_log_probs[batch_id] += lprob;
-                }
-                if (output_log_probs != nullptr) {
-                    output_log_probs[batch_id] = lprob;
-                }
-            }
-            if (sequence_length != nullptr && finished_buf != nullptr) {
-                sequence_length[batch_id] =
-                    finished_buf[batch_id] ? sequence_length[batch_id] : sequence_length[batch_id] + 1;
-                finished_buf[batch_id] = ids[batch_id] == end_ids[batch_id] ? 1 : 0;
-            }
-        }
-    }
-}
-
-template<typename T>
-void invokeBatchTopPSampling(void*           workspace,
-                             size_t&         workspace_size,
-                             size_t&         cub_temp_storage_size,
-                             int*            output_ids,
-                             int*            sequence_length,
-                             bool*           finished_buf,
-                             float*          cum_log_probs,
-                             float*          output_log_probs,
-                             const T*        log_probs,
-                             const int*      id_vals,
-                             int*            offset_buf,
-                             int*            begin_offset_buf,
-                             curandState_t*  curandstate,
-                             const int       batch_size,
-                             const size_t    vocab_size_padded,
-                             const int*      end_ids,
-                             const float     max_top_p,
-                             const float*    top_ps,
-                             cudaStream_t    stream,
-                             cudaDeviceProp* cuda_device_prop,
-                             const bool*     skip_decode)
-{
-    // Here, we put batch size as an argument because the batch size of initialization
-    // and inference may be different due to pipeline parallelism.
-    const int vocab_size = vocab_size_padded;
-    const int block_size = 256;
-
-    size_t sorted_log_prob_buf_size = batch_size * vocab_size * sizeof(T);    // type T
-    size_t sorted_id_vals_buf_size  = batch_size * vocab_size * sizeof(int);  // type int
-    sorted_log_prob_buf_size        = div_up(sorted_log_prob_buf_size, 256) * 256;
-    sorted_id_vals_buf_size         = div_up(sorted_id_vals_buf_size, 256) * 256;
-
-    void* cub_temp_storage = workspace;
-    T*    sorted_log_probs = (T*)((char*)cub_temp_storage + cub_temp_storage_size);
-    int*  sorted_id_vals   = (int*)((char*)sorted_log_probs + sorted_log_prob_buf_size);
-
-    bool do_radix_sort = (ENABLE_SINGLE_PASS_TOP_P == 0 || max_top_p >= SINGLE_PASS_THRESHOLD);
-    int  smem_size     = -1;
-
-    segmented_topp_impl::TopKPerSegmentContext context;
-    segmented_topp_impl::TopKPerSegmentParams  params;
-    segmented_topp_impl::DType_t               dataTypeKind =
-        (std::is_same<T, float>::value) ? segmented_topp_impl::kFLOAT : segmented_topp_impl::kHALF;
-
-    if (!do_radix_sort) {
-        FT_CHECK(cuda_device_prop != nullptr);
-        memset(&context, 0, sizeof(context));
-        context.sm_count       = cuda_device_prop->multiProcessorCount;
-        context.sm_shared_size = cuda_device_prop->sharedMemPerMultiprocessor;
-        context.sm_version     = cuda_device_prop->major * 100 + cuda_device_prop->minor * 10;
-
-        memset(&params, 0, sizeof(params));
-        params.gmem_src_keys        = reinterpret_cast<void*>(const_cast<T*>(log_probs));
-        params.gmem_dst_keys        = sorted_log_probs;
-        params.gmem_src_vals        = reinterpret_cast<void*>(const_cast<int*>(id_vals));
-        params.gmem_dst_vals        = reinterpret_cast<void*>(sorted_id_vals);
-        params.gmem_begin_offsets   = begin_offset_buf;
-        params.gmem_end_offsets     = offset_buf + 1;
-        params.workspace            = nullptr;
-        params.num_items            = vocab_size * batch_size;
-        params.num_segments         = batch_size;
-        params.top_p                = max_top_p;
-        params.confidence_threshold = 0.0F;
-
-        smem_size     = getSmemSizeAndCheck(context, params, dataTypeKind);
-        do_radix_sort = smem_size < 0;
-    }
-
-    if (do_radix_sort) {
-        if (workspace == nullptr) {
-            check_cuda_error(
-                cub::DeviceSegmentedRadixSort::SortPairsDescending(nullptr,
-                                                                   cub_temp_storage_size,
-                                                                   log_probs,
-                                                                   (T*)nullptr,
-                                                                   id_vals,
-                                                                   (int*)nullptr,
-                                                                   vocab_size * batch_size,
-                                                                   batch_size,
-                                                                   begin_offset_buf,
-                                                                   offset_buf + 1,
-                                                                   0,              // begin_bit
-                                                                   sizeof(T) * 8,  // end_bit = sizeof(KeyT) * 8
-                                                                   stream));       // cudaStream_t
-            cub_temp_storage_size = div_up(cub_temp_storage_size, 256) * 256;
-            workspace_size        = sorted_log_prob_buf_size + sorted_id_vals_buf_size + cub_temp_storage_size;
-            return;
-        }
-
-        topp_beam_topk_kernel<T, 1, block_size><<<batch_size, block_size, 0, stream>>>(log_probs,
-                                                                                       sorted_id_vals,
-                                                                                       sorted_log_probs,
-                                                                                       vocab_size,
-                                                                                       offset_buf,
-                                                                                       begin_offset_buf,
-                                                                                       max_top_p,
-                                                                                       top_ps,
-                                                                                       skip_decode);
-
-        check_cuda_error(
-            cub::DeviceSegmentedRadixSort::SortPairsDescending(cub_temp_storage,
-                                                               cub_temp_storage_size,
-                                                               log_probs,
-                                                               sorted_log_probs,
-                                                               id_vals,
-                                                               sorted_id_vals,
-                                                               vocab_size * batch_size,
-                                                               batch_size,
-                                                               begin_offset_buf,
-                                                               offset_buf + 1,
-                                                               0,              // begin_bit
-                                                               sizeof(T) * 8,  // end_bit = sizeof(KeyT) * 8
-                                                               stream));       // cudaStream_t
-    }
-    else {
-        if (workspace == nullptr) {
-            segmented_topp_impl::topPPerSegment(
-                context, params, dataTypeKind, cub_temp_storage, cub_temp_storage_size, stream);
-            workspace_size = sorted_log_prob_buf_size + sorted_id_vals_buf_size + cub_temp_storage_size;
-            return;
-        }
-        else {
-            topp_beam_topk_kernel<T, 1, block_size><<<batch_size, block_size, 0, stream>>>(log_probs,
-                                                                                           sorted_id_vals,
-                                                                                           sorted_log_probs,
-                                                                                           vocab_size,
-                                                                                           offset_buf,
-                                                                                           begin_offset_buf,
-                                                                                           max_top_p,
-                                                                                           top_ps,
-                                                                                           skip_decode);
-            segmented_topp_impl::topPPerSegment(
-                context, params, dataTypeKind, cub_temp_storage, cub_temp_storage_size, stream);
-        }
-    }
-
-    constexpr int SAMPLING_BLOCK_SIZE = 256;
-    dim3          grid(batch_size);
-    topp_sampling<T, SAMPLING_BLOCK_SIZE><<<grid, SAMPLING_BLOCK_SIZE, 0, stream>>>(sorted_log_probs,
-                                                                                    sorted_id_vals,
-                                                                                    output_ids,
-                                                                                    sequence_length,
-                                                                                    finished_buf,
-                                                                                    cum_log_probs,
-                                                                                    output_log_probs,
-                                                                                    begin_offset_buf,
-                                                                                    offset_buf + 1,
-                                                                                    vocab_size,
-                                                                                    curandstate,
-                                                                                    max_top_p,
-                                                                                    top_ps,
-                                                                                    end_ids,
-                                                                                    batch_size,
-                                                                                    skip_decode);
-}
-
-template void invokeBatchTopPSampling(void*           workspace,
-                                      size_t&         workspace_size,
-                                      size_t&         cub_temp_storage_size,
-                                      int*            output_ids,
-                                      int*            sequence_length,
-                                      bool*           finished_buf,
-                                      float*          cum_log_probs,
-                                      float*          output_log_probs,
-                                      const float*    log_probs,
-                                      const int*      id_vals,
-                                      int*            offset_buf,
-                                      int*            begin_offset_buf,
-                                      curandState_t*  curandstate,
-                                      const int       batch_size,
-                                      const size_t    vocab_size_padded,
-                                      const int*      end_ids,
-                                      const float     max_top_p,
-                                      const float*    top_ps,
-                                      cudaStream_t    stream,
-                                      cudaDeviceProp* cuda_device_prop,
-                                      const bool*     skip_decode);
-
-template void invokeBatchTopPSampling(void*           workspace,
-                                      size_t&         workspace_size,
-                                      size_t&         cub_temp_storage_size,
-                                      int*            output_ids,
-                                      int*            sequence_length,
-                                      bool*           finished_buf,
-                                      float*          cum_log_probs,
-                                      float*          output_log_probs,
-                                      const half*     log_probs,
-                                      const int*      id_vals,
-                                      int*            offset_buf,
-                                      int*            begin_offset_buf,
-                                      curandState_t*  curandstate,
-                                      const int       batch_size,
-                                      const size_t    vocab_size_padded,
-                                      const int*      end_ids,
-                                      const float     max_top_p,
-                                      const float*    top_ps,
-                                      cudaStream_t    stream,
-                                      cudaDeviceProp* cuda_device_prop,
-                                      const bool*     skip_decode);
-
-template<typename T>
-void invokeTopPSampling(void*           workspace,
-                        size_t&         workspace_size,
-                        size_t&         cub_temp_storage_size,
-                        int*            output_ids,
-                        int*            sequence_length,
-                        bool*           finished_buf,
-                        float*          cum_log_probs,
-                        float*          output_log_probs,
-                        const T*        log_probs,
-                        const int*      id_vals,
-                        int*            offset_buf,
-                        int*            begin_offset_buf,
-                        curandState_t*  curandstate,
-                        const int       batch_size,
-                        const size_t    vocab_size_padded,
-                        const int*      end_ids,
-                        const float     top_p,
-                        cudaStream_t    stream,
-                        cudaDeviceProp* cuda_device_prop,
-                        const bool*     skip_decode)
-{
-    invokeBatchTopPSampling(workspace,
-                            workspace_size,
-                            cub_temp_storage_size,
-                            output_ids,
-                            sequence_length,
-                            finished_buf,
-                            cum_log_probs,
-                            output_log_probs,
-                            log_probs,
-                            id_vals,
-                            offset_buf,
-                            begin_offset_buf,
-                            curandstate,
-                            batch_size,
-                            vocab_size_padded,
-                            end_ids,
-                            top_p,
-                            nullptr,
-                            stream,
-                            cuda_device_prop,
-                            skip_decode);
-}
-
-template void invokeTopPSampling(void*           workspace,
-                                 size_t&         workspace_size,
-                                 size_t&         cub_temp_storage_size,
-                                 int*            output_ids,
-                                 int*            sequence_length,
-                                 bool*           finished_buf,
-                                 float*          cum_log_probs,
-                                 float*          output_log_probs,
-                                 const float*    log_probs,
-                                 const int*      id_vals,
-                                 int*            offset_buf,
-                                 int*            begin_offset_buf,
-                                 curandState_t*  curandstate,
-                                 const int       batch_size,
-                                 const size_t    vocab_size_padded,
-                                 const int*      end_ids,
-                                 const float     top_p,
-                                 cudaStream_t    stream,
-                                 cudaDeviceProp* cuda_device_prop,
-                                 const bool*     skip_decode);
-
-template void invokeTopPSampling(void*           workspace,
-                                 size_t&         workspace_size,
-                                 size_t&         cub_temp_storage_size,
-                                 int*            output_ids,
-                                 int*            sequence_length,
-                                 bool*           finished_buf,
-                                 float*          cum_log_probs,
-                                 float*          output_log_probs,
-                                 const half*     log_probs,
-                                 const int*      id_vals,
-                                 int*            offset_buf,
-                                 int*            begin_offset_buf,
-                                 curandState_t*  curandstate,
-                                 const int       batch_size,
-                                 const size_t    vocab_size_padded,
-                                 const int*      end_ids,
-                                 const float     top_p,
-                                 cudaStream_t    stream,
-                                 cudaDeviceProp* cuda_device_prop,
-                                 const bool*     skip_decode);
-
-template<typename T>
-__global__ void
-addBiasSoftMax(T* logits, const T* bias, const int* end_ids, const bool* finished, const int n_padded, const int n)
-{
-    int  bid    = blockIdx.x;
-    bool finish = (finished != nullptr) ? finished[bid] : false;
-    int  offset = bid * n_padded;
-
-    float            max_val   = -1 * FLT_MAX;
-    const bool       IS_FP16   = std::is_same<T, half>::value;
-    const T          MAX_T_VAL = (IS_FP16) ? HALF_FLT_MAX : FLT_MAX;
-    __shared__ float s_max_val;
-    __shared__ float s_sum_val;
-
-    for (int tid = threadIdx.x; tid < n_padded; tid += blockDim.x) {
-        if (tid < n) {
-            if (finish) {
-                logits[offset + tid] = (tid == end_ids[bid]) ? MAX_T_VAL : -MAX_T_VAL;
-            }
-            else {
-                T bias_val = (bias != nullptr) ? bias[tid] : (T)0.0f;
-                logits[offset + tid] += bias_val;
-            }
-        }
-        else {
-            logits[offset + tid] = -MAX_T_VAL;
-        }
-        max_val = max(max_val, (float)logits[offset + tid]);
-    }
-
-    max_val = blockReduceMax<float>((float)max_val);
-    if (threadIdx.x == 0) {
-        s_max_val = max_val;
-    }
-    __syncthreads();
-
-    float sum_val = 0.0f;
-    for (int tid = threadIdx.x; tid < n_padded; tid += blockDim.x) {
-        logits[offset + tid] = __expf((float)logits[offset + tid] - s_max_val);
-        sum_val += (float)logits[offset + tid];
-    }
-
-    sum_val = blockReduceSum<float>(sum_val);
-    if (threadIdx.x == 0) {
-        s_sum_val = sum_val;
-    }
-    __syncthreads();
-
-    for (int tid = threadIdx.x; tid < n_padded; tid += blockDim.x) {
-        logits[offset + tid] = ((float)logits[offset + tid] / (s_sum_val + 1e-6f));
-    }
-}
-
-template<typename T>
-void invokeAddBiasSoftMax(T*           logits,
-                          const T*     bias,
-                          const int*   end_ids,
-                          const bool*  finished,
-                          const int    m,
-                          const int    n_padded,
-                          const int    n,
-                          cudaStream_t stream)
-{
-    dim3 grid(m);
-    dim3 block(min(n, 1024));
-    /*n is the vocab_size, e.g., 30000, 7000.... vocab_size is usually very big. */
-    addBiasSoftMax<<<grid, block, 0, stream>>>(logits, bias, end_ids, finished, n_padded, n);
-}
-
-template void invokeAddBiasSoftMax(float*       logits,
-                                   const float* bias,
-                                   const int*   end_ids,
-                                   const bool*  finished,
-                                   const int    m,
-                                   const int    n_padded,
-                                   const int    n,
-                                   cudaStream_t stream);
-
-template void invokeAddBiasSoftMax(half*        logits,
-                                   const half*  bias,
-                                   const int*   end_ids,
-                                   const bool*  finished,
-                                   const int    m,
-                                   const int    n_padded,
-                                   const int    n,
-                                   cudaStream_t stream);
-
-__global__ void computeToppDecay(float*         runtime_top_p,
-                                 const float*   runtime_initial_top_p,
-                                 const int*     output_ids,
-                                 const float*   top_p_decay,
-                                 const float*   top_p_min,
-                                 const int32_t* top_p_reset_ids,
-                                 const int      local_batch_size)
-{
-    /**
-     * @brief Compute the topp decay by https://arxiv.org/pdf/2206.04624.pdf
-     *        In short, the formula is
-     *          runtime_top_p = max(runtime_top_p * top_p_decay, top_p_min)
-     *        If generating the top_p_reset_ids, then reset the runtime_top_p.
-     *
-     * \param runtime_top_p          [local_batch_size]
-     * \param runtime_initial_top_p  [local_batch_size]
-     * \param output_ids             [local_batch_size]
-     * \param top_p_decay            [local_batch_size]
-     * \param top_p_min              [local_batch_size]
-     * \param top_p_reset_ids         [local_batch_size]
-     * \param local_batch_size
-     *
-     */
-
-    int idx = blockDim.x * blockIdx.x + threadIdx.x;
-    if (output_ids[idx] == top_p_reset_ids[idx]) {
-        runtime_top_p[idx] = runtime_initial_top_p[idx];
-    }
-    else {
-        runtime_top_p[idx] = max(runtime_top_p[idx] * top_p_decay[idx], top_p_min[idx]);
-    }
-}
-
-void invokeComputeToppDecay(float*         runtime_top_p,
-                            const float*   runtime_initial_top_p,
-                            const int*     output_ids,
-                            const float*   top_p_decay,
-                            const float*   top_p_min,
-                            const int32_t* top_p_reset_ids,
-                            const int      local_batch_size,
-                            cudaStream_t   stream)
-{
-    dim3 block(min(local_batch_size, 512));
-    dim3 grid((local_batch_size + block.x - 1) / block.x);
-    computeToppDecay<<<grid, block, 0, stream>>>(
-        runtime_top_p, runtime_initial_top_p, output_ids, top_p_decay, top_p_min, top_p_reset_ids, local_batch_size);
-}
-
-}  // namespace turbomind
diff --git a/src/turbomind/kernels/sampling_topp_kernels.h b/src/turbomind/kernels/sampling_topp_kernels.h
deleted file mode 100644
index 4be9e6815b4e60a1588dbbd329daf5dd0d677a03..0000000000000000000000000000000000000000
--- a/src/turbomind/kernels/sampling_topp_kernels.h
+++ /dev/null
@@ -1,159 +0,0 @@
-/*
- * Copyright (c) 2020-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#pragma once
-
-#include <curand_kernel.h>
-
-namespace turbomind {
-
-void invokeTopPInitialize(int*         topp_id_val_buf,
-                          int*         topp_offset_buf,
-                          int*         begin_topp_offset_buf_,
-                          const size_t batch_size,
-                          const int    n,
-                          cudaStream_t stream);
-
-template<typename T>
-void invokeTopPSampling(void*           workspace,
-                        size_t&         workspace_size,
-                        size_t&         cub_temp_storage_size,
-                        int*            output_ids,
-                        int*            sequence_length,
-                        bool*           finished_buf,
-                        float*          cum_log_probs,
-                        float*          output_log_probs,
-                        const T*        log_probs,
-                        const int*      id_vals,
-                        int*            offset_buf,
-                        int*            begin_offset_buf,
-                        curandState_t*  curandstate,
-                        const int       batch_size,
-                        const size_t    vocab_size_padded,
-                        const int*      end_ids,
-                        const float     top_p,
-                        cudaStream_t    stream,
-                        cudaDeviceProp* cuda_device_prop,
-                        const bool*     skip_decode);
-
-template<typename T>
-void invokeBatchTopPSampling(void*           workspace,
-                             size_t&         workspace_size,
-                             size_t&         cub_temp_storage_size,
-                             int*            output_ids,
-                             int*            sequence_length,
-                             bool*           finished_buf,
-                             float*          cum_log_probs,
-                             float*          output_log_probs,
-                             const T*        log_probs,
-                             const int*      id_vals,
-                             int*            offset_buf,
-                             int*            begin_offset_buf,
-                             curandState_t*  curandstate,
-                             const int       batch_size,
-                             const size_t    vocab_size_padded,
-                             const int*      end_ids,
-                             const float     max_top_p,
-                             const float*    top_ps,
-                             cudaStream_t    stream,
-                             cudaDeviceProp* cuda_device_prop,
-                             const bool*     skip_decode);
-
-template<typename T>
-void invokeAddBiasSoftMax(T*           logits,
-                          const T*     bias,
-                          const int*   end_ids,
-                          const bool*  finished,
-                          const int    m,
-                          const int    n_padded,
-                          const int    n,
-                          cudaStream_t stream);
-
-namespace segmented_topp_impl {
-enum DType_t
-{
-    kFLOAT,
-    kHALF,
-    kINT8
-};
-
-template<typename Key_Data_Type_   = float,
-         typename Value_Data_Type_ = int32_t,
-         int BLOCK_THREADS_        = 256,
-         int KEYS_PER_LDG_         = 1>
-struct Segmented_topk_kernel_params {
-    typedef Key_Data_Type_   Key_Data_Type;
-    typedef Value_Data_Type_ Value_Data_Type;
-    enum
-    {
-        BLOCK_THREADS = BLOCK_THREADS_
-    };
-    enum
-    {
-        ITEMS_INCREMENT = 32
-    };
-    // enum { KEYS_PER_LDG = 2 * 4 / sizeof(Key_Data_Type_) };
-    enum
-    {
-        KEYS_PER_LDG = KEYS_PER_LDG_
-    };
-};
-
-struct TopKPerSegmentContext {
-    TopKPerSegmentContext(): sm_count(0), sm_shared_size(0), sm_version(0){};
-    int sm_count;
-    int sm_shared_size;
-    int sm_version;
-};
-
-struct TopKPerSegmentParams {
-    // input/output keys and values
-    void *gmem_src_keys, *gmem_dst_keys, *gmem_dst_vals;
-    // not used in the custom implementation
-    void* gmem_src_vals;
-    // int array of size num_segments
-    int* gmem_active_count_per_segment;
-    int* gmem_active_count_total;
-    int* gmem_begin_offsets;
-    // gmem_end_offsets will be populated
-    int*  gmem_end_offsets;
-    void* workspace;
-    // total number of items for all segments
-    int num_items;
-    int num_segments;
-    // top_k per segment
-    int   num_top_k;
-    float top_p;
-    float confidence_threshold;
-};
-
-int topPPerSegment(const TopKPerSegmentContext& context,
-                   TopKPerSegmentParams&        params,
-                   const DType_t                DT_SCORE,
-                   void*                        temp_storage,
-                   size_t&                      temp_storage_bytes,
-                   cudaStream_t                 stream);
-}  // namespace segmented_topp_impl
-
-void invokeComputeToppDecay(float*         runtime_top_p,
-                            const float*   runtime_initial_top_p,
-                            const int*     output_ids,
-                            const float*   top_p_decay,
-                            const float*   top_p_min,
-                            const int32_t* top_p_reset_ids,
-                            const int      local_batch_size,
-                            cudaStream_t   stream);
-
-}  // namespace turbomind
diff --git a/src/turbomind/kernels/stop_criteria_kernels.cu b/src/turbomind/kernels/stop_criteria_kernels.cu
deleted file mode 100644
index cacb97a3ab5d9c88778c5284fc53348c145a2767..0000000000000000000000000000000000000000
--- a/src/turbomind/kernels/stop_criteria_kernels.cu
+++ /dev/null
@@ -1,165 +0,0 @@
-/*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "src/turbomind/kernels/reduce_kernel_utils.cuh"
-#include "src/turbomind/kernels/stop_criteria_kernels.h"
-#include "src/turbomind/macro.h"
-#include "src/turbomind/utils/cuda_utils.h"
-#include "src/turbomind/utils/memory_utils.h"
-
-namespace turbomind {
-
-__global__ void stop_words_criterion(const int* output_ids,
-                                     const int* parent_ids,
-                                     const int* stop_words,
-                                     bool*      finished,
-                                     size_t     id_offset,
-                                     size_t     stop_words_len,
-                                     int        batch_size,
-                                     int        beam_width,
-                                     int        step)
-{
-    const int id        = blockIdx.x * blockDim.x + threadIdx.x;
-    const int batch_idx = blockIdx.y / beam_width;
-    const int beam_idx  = blockIdx.y % beam_width;
-
-    const int* base_stop_words = stop_words + batch_idx * 2 * stop_words_len;
-    const int* base_offsets    = base_stop_words + stop_words_len;
-
-    if (id >= stop_words_len || base_offsets[id] < 0) {
-        return;
-    }
-
-    const int item_end   = base_offsets[id];
-    const int item_start = (id > 0) ? base_offsets[id - 1] : 0;
-    const int item_size  = item_end - item_start;
-
-    /* The single-token case unconditionally bans the token */
-    bool should_stop = false;
-
-    /* Enough previously generated tokens to look for a match */
-    if (step + 1 >= item_size) {
-        should_stop            = true;
-        int        parent_id   = beam_idx;
-        const bool gather_beam = beam_width > 1;
-
-        for (int token_idx = item_size - 1; token_idx >= 0; token_idx--) {
-            const int previous_token = output_ids[(step - (item_size - 1) + token_idx) * batch_size * beam_width
-                                                  + id_offset + batch_idx * beam_width + parent_id];
-
-            if (previous_token != base_stop_words[item_start + token_idx]) {
-                should_stop = false;
-                break;
-            }
-            if (gather_beam) {
-                parent_id = parent_ids[(step - (item_size - 1) + token_idx) * beam_width * batch_size + id_offset
-                                       + batch_idx * beam_width + parent_id];
-
-                if (parent_id < 0 || parent_id >= beam_width) {
-                    should_stop = false;
-                    break;
-                }
-            }
-        }
-    }
-
-    if (should_stop) {
-        finished[batch_idx * beam_width + beam_idx] = true;
-    }
-}
-
-void invokeStopWordsCriterion(const int*   output_ids,
-                              const int*   parent_ids,
-                              const int*   stop_words,
-                              bool*        finished,
-                              size_t       id_offset,
-                              size_t       stop_words_len,
-                              int          batch_size,
-                              int          beam_width,
-                              int          step,
-                              cudaStream_t stream)
-{
-    TM_LOG_DEBUG("%s start", __PRETTY_FUNCTION__);
-    // Check if we have sampled a word from the stop_words list. If so, stop the sequence.
-    dim3 block, grid;
-    block.x = min((unsigned long)((stop_words_len + 32 - 1) / 32) * 32, 256UL);
-    grid.x  = (stop_words_len + block.x - 1) / block.x;
-    grid.y  = batch_size * beam_width;
-
-    stop_words_criterion<<<grid, block, 0, stream>>>(
-        output_ids, parent_ids, stop_words, finished, id_offset, stop_words_len, batch_size, beam_width, step);
-    sync_check_cuda_error();
-}
-
-__global__ void length_criterion(bool*           finished,
-                                 bool*           should_stop,
-                                 int*            finished_sum,
-                                 const uint32_t* sequence_limit_length,
-                                 int             batch_size,
-                                 int             beam_width,
-                                 int             step)
-{
-    int thread_finished_count = 0;
-    for (int index = threadIdx.x; index < batch_size * beam_width; index += blockDim.x) {
-        const int batch_idx = index / beam_width;
-
-        finished[index] |= step >= sequence_limit_length[batch_idx];
-        thread_finished_count += finished[index] ? 1 : 0;
-    }
-    int block_finished_count = 0;
-    if (blockDim.x <= 32) {
-        block_finished_count = warpReduceSum(thread_finished_count);
-    }
-    else {
-        block_finished_count = blockReduceSum(thread_finished_count);
-    }
-    __syncthreads();
-
-    if (threadIdx.x == 0) {
-        finished_sum[0] = block_finished_count;
-    }
-}
-
-void invokeLengthCriterion(bool*           finished,
-                           bool*           should_stop,
-                           int*            h_pinned_finished_sum_,
-                           const uint32_t* sequence_limit_length,
-                           int             batch_size,
-                           int             beam_width,
-                           int             step,
-                           cudaStream_t    stream)
-{
-    // Check if we have attained the sequence length limit. If so, stop the sequence.
-    // In addition, check if all sequences are stopped and return the result in should_stop
-    TM_LOG_DEBUG("%s start", __PRETTY_FUNCTION__);
-    // dim3 block{min(512, uint32_t(batch_size * beam_width))};
-    dim3 block{static_cast<unsigned int>(min(512, uint32_t(batch_size * beam_width)))};
-    dim3 grid{1};
-    h_pinned_finished_sum_[0] = -1;
-
-    length_criterion<<<grid, block, 0, stream>>>(
-        finished, should_stop, h_pinned_finished_sum_, sequence_limit_length, batch_size, beam_width, step);
-#ifdef _MSC_VER
-    cudaStreamSynchronize(stream);
-#else
-    while (((volatile int*)h_pinned_finished_sum_)[0] == -1) {};
-#endif
-    sync_check_cuda_error();
-
-    *should_stop = h_pinned_finished_sum_[0] == batch_size * beam_width;
-}
-
-}  // namespace turbomind
diff --git a/src/turbomind/kernels/stop_criteria_kernels.h b/src/turbomind/kernels/stop_criteria_kernels.h
deleted file mode 100644
index e403c947cb66de69b182e881e4153876d2a9977d..0000000000000000000000000000000000000000
--- a/src/turbomind/kernels/stop_criteria_kernels.h
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#pragma once
-
-#include <cuda_runtime.h>
-
-namespace turbomind {
-
-void invokeStopWordsCriterion(const int*   output_ids,
-                              const int*   parent_ids,
-                              const int*   stop_words,
-                              bool*        finished,
-                              size_t       id_offset,
-                              size_t       stop_words_len,
-                              int          batch_size,
-                              int          beam_width,
-                              int          step,
-                              cudaStream_t stream);
-
-void invokeLengthCriterion(bool*           finished,
-                           bool*           should_stop,
-                           int*            finished_sum,
-                           const uint32_t* sequence_limit_length,
-                           int             batch_size,
-                           int             beam_width,
-                           int             step,
-                           cudaStream_t    stream);
-
-}  // namespace turbomind
diff --git a/src/turbomind/kernels/unfused_attention_kernels.cu b/src/turbomind/kernels/unfused_attention_kernels.cu
deleted file mode 100644
index 7f893310a6fcd88f7b03f532e6be6d3ce58b8372..0000000000000000000000000000000000000000
--- a/src/turbomind/kernels/unfused_attention_kernels.cu
+++ /dev/null
@@ -1,2000 +0,0 @@
-/*
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.  All rights reserved.
- * Copyright (c) 2021, NAVER Corp.  Authored by CLOVA.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "src/turbomind/kernels/decoder_masked_multihead_attention_utils.h"
-#include "src/turbomind/kernels/reduce_kernel_utils.cuh"
-#include "src/turbomind/kernels/unfused_attention_kernels.h"
-#include "src/turbomind/utils/cuda_type_utils.cuh"
-#include "src/turbomind/utils/cuda_utils.h"
-#include "src/turbomind/utils/logger.h"
-
-namespace turbomind {
-
-__inline__ __device__ int target_index(int id1, int id2, int id3, int id4, int dim_1, int dim_2, int dim_3, int dim_4)
-{
-    return id1 * (dim_2 * dim_3 * dim_4) + id3 * (dim_2 * dim_4) + id2 * dim_4 + id4;
-}
-
-template<typename T, typename T_IN, int ITEMS_PER_THREAD>
-__global__ void softmax_kernel(T*          attn_score,
-                               const T_IN* qk,
-                               const T*    attn_mask,
-                               const T*    linear_bias_slopes,
-                               const int   batch_size,
-                               const int   head_num,
-                               const int   q_length,
-                               const int   k_length,
-                               const float qk_scale)
-{
-    // attn_score, [batch_size, num_heads, q_length, k_length]
-    // qk, [batch_size, num_heads, q_length, k_length]
-    // attn_mask, [batch_size, q_length, k_length]
-    // linear_bias_slopes, [num_heads]
-
-    const int bi = blockIdx.y;  // Batch index.
-    const int hi = blockIdx.z;  // Head index.
-
-    __shared__ float s_mean, s_max;
-
-    const float linear_bias_slope = linear_bias_slopes != nullptr ? (float)linear_bias_slopes[hi] : 0.0f;
-
-    // Loop along with Q dimension.
-    for (int qi = blockIdx.x; qi < q_length; qi += gridDim.x) {
-
-        float data[ITEMS_PER_THREAD];
-        int   qk_offset;
-        float local_max = -1e20f;
-
-        // Loop along with K dimension.
-        for (int i = 0; blockDim.x * i + threadIdx.x < k_length; i++) {
-            int ki    = blockDim.x * i + threadIdx.x;  // Index of K dimension.
-            qk_offset = ((bi * head_num + hi) * q_length + qi) * k_length + ki;
-
-            float qk_val  = static_cast<float>(qk[qk_offset]);
-            float qk_bias = 0.0f;
-
-            if (linear_bias_slopes != nullptr) {
-                // We don't handle the upper diagonal (ki > qi) separately, whose values
-                // are negligible due to the negative infinity mask. And it matches with
-                // the HF's implementation.
-                qk_bias += static_cast<float>(linear_bias_slope * (ki - qi));
-            }
-
-            int   mask_offset = (bi * q_length + qi) * k_length + ki;
-            float mask_val    = static_cast<float>(ldg(&attn_mask[mask_offset]));
-            qk_bias += (1.0f - mask_val) * -10000.0f;
-
-            data[i]   = qk_scale * qk_val + qk_bias;
-            local_max = fmax(local_max, data[i]);
-        }
-
-        float max_val = blockDim.x <= 32 ? warpReduceMax(local_max) : blockReduceMax<float>(local_max);
-        if (threadIdx.x == 0) {
-            s_max = max_val;
-        }
-        __syncthreads();
-
-        float local_sum = 0;
-        for (int i = 0; blockDim.x * i + threadIdx.x < k_length; i++) {
-            data[i] = __expf(data[i] - s_max);
-            local_sum += data[i];
-        }
-
-        float sum_val = blockDim.x <= 32 ? warpReduceSum(local_sum) : blockReduceSum<float>(local_sum);
-        if (threadIdx.x == 0) {
-            s_mean = sum_val + 1e-6f;
-            s_mean = __fdividef(1.0f, s_mean);
-        }
-        __syncthreads();
-
-        for (int i = 0; blockDim.x * i + threadIdx.x < k_length; i++) {
-            qk_offset             = ((bi * head_num + hi) * q_length + qi) * k_length + blockDim.x * i + threadIdx.x;
-            attn_score[qk_offset] = (T)(data[i] * s_mean);
-        }
-    }
-}
-
-template<typename T, int ITEMS_PER_THREAD>
-__global__ void softmax_kernel_h2(T*        attn_score,
-                                  const T*  qk_buf,
-                                  const T*  attn_mask,
-                                  const T*  linear_bias_slopes,
-                                  const int batch_size,
-                                  const int head_num,
-                                  const int q_length,
-                                  const int k_length,
-                                  const T   qk_scale)
-{
-    // attn_score, [batch_size, num_heads, q_length, k_length]
-    // qk, [batch_size, num_heads, q_length, k_length]
-    // attn_mask, [batch_size, q_length, k_length]
-    // linear_bias_slopes, [num_heads]
-
-    using T2 = typename TypeConverter<T>::Type;
-
-    T2*       attn_score_h2 = reinterpret_cast<T2*>(attn_score);
-    const T2* qk_buf_h2     = reinterpret_cast<const T2*>(qk_buf);
-    const T2* attn_mask_h2  = reinterpret_cast<const T2*>(attn_mask);
-
-    const int bi = blockIdx.y;  // Batch index
-    const int hi = blockIdx.z;  // Head index.
-
-    __shared__ float s_mean, s_max;
-
-    // Constant values that will be used repeately in the q/k loop.
-    const T2 ONE       = cuda_cast<T2>(1.0f);
-    const T2 ZERO      = cuda_cast<T2>(0.0f);
-    const T2 NEG_INFTY = cuda_cast<T2>(-10000.0f);
-
-    // The normalization factor of QK.
-    const T2 qk_scale_h2 = cuda_cast<T2>(qk_scale);
-    // The slope of a linear position bias of the current attention head.
-    const T2 linear_bias_slope = linear_bias_slopes != nullptr ? cuda_cast<T2>(linear_bias_slopes[hi]) : ZERO;
-
-    // Loop over q dimension.
-    for (int qi = blockIdx.x; qi < q_length; qi += gridDim.x) {
-        T2    data[ITEMS_PER_THREAD];
-        int   qk_offset;
-        float local_max = -1e20f;
-
-        // Loop over k dimension.
-        for (int i = 0; blockDim.x * i + threadIdx.x < (k_length / 2) && i < ITEMS_PER_THREAD; i++) {
-            // The half of the index of k dimension. We will use the elements at {2 * ki, 2 * ki + 1}.
-            int ki          = blockDim.x * i + threadIdx.x;
-            qk_offset       = ((bi * head_num + hi) * q_length + qi) * (k_length / 2) + ki;
-            int mask_offset = (bi * q_length + qi) * (k_length / 2) + ki;
-
-            // The value of QK^T matrix at (qi, ki).
-            T2 qk = qk_buf_h2[qk_offset];
-            // The bias value to the position (qi, ki) including both mask and positional bias.
-            T2 qk_bias = ZERO;
-
-            if (linear_bias_slopes != nullptr) {
-                // The position bias depends on the distance between qi/ki and is zero if qi >= 2*ki
-                // or qi >= 2*ki+1. For T2 vectorization, we should handle every two elements along
-                // with k-dim simultaneously. To do this, we check qi / 2 > ki at ones instead of
-                // qi >= 2*ki or 2*ki+1. It works because an diagonal element for an odd qi will be
-                // zero due to slope * (qi - 2*ki+1) = 0. Thus, we don't handle the upper diagonal
-                // separately, whose values are negligible due to the negative infinity mask.
-                T2 dist(2.0f * ki - qi, 2.0f * ki + 1 - qi);
-                qk_bias = hadd2<T2>(qk_bias, hmul2<T2>(linear_bias_slope, dist));
-            }
-
-            T2 mask_val = ldg(&attn_mask_h2[mask_offset]);
-            qk_bias     = hadd2<T2>(qk_bias, hmul2<T2>(hsub2<T2>(ONE, mask_val), NEG_INFTY));
-
-            data[i]   = hadd2<T2>(hmul2<T2>(qk, qk_scale_h2), qk_bias);
-            // if (std::is_same<T2, half2>::value) {
-                local_max = fmax(local_max, fmax((float)data[i].data[0], (float)data[i].data[1]));
-            // } else {
-            //     local_max = fmax(local_max, fmax((float)data[i].x, (float)data[i].y));
-            // }
-        }
-
-        float max_val = blockDim.x <= 32 ? warpReduceMax(local_max) : blockReduceMax<float>(local_max);
-        if (threadIdx.x == 0) {
-            s_max = max_val;
-        }
-        __syncthreads();
-
-        float local_sum = 0.0f;
-        for (int i = 0; blockDim.x * i + threadIdx.x < (k_length / 2) && i < ITEMS_PER_THREAD; i++) {
-            data[i] = hexp2<T2>(hsub2<T2>(data[i], cuda_cast<T2>(s_max)));
-            // if (std::is_same<T2, half2>::value) {
-                local_sum += (float)(data[i].data[0] + data[i].data[1]);
-            // } else {
-            //     local_sum += (float)(data[i].x + data[i].y);
-            // }
-        }
-
-        float sum_val = blockDim.x <= 32 ? warpReduceSum(local_sum) : blockReduceSum<float>(local_sum);
-
-        if (threadIdx.x == 0) {
-            s_mean = sum_val + 1e-6f;
-            s_mean = __fdividef(1.0f, s_mean);
-        }
-        __syncthreads();
-
-        for (int i = 0; blockDim.x * i + threadIdx.x < (k_length / 2) && i < ITEMS_PER_THREAD; i++) {
-            qk_offset = ((bi * head_num + hi) * q_length + qi) * (k_length / 2) + blockDim.x * i + threadIdx.x;
-            attn_score_h2[qk_offset] = hmul2<T2>(data[i], cuda_cast<T2>(s_mean));
-        }
-    }
-}
-
-template<typename T, int K_ITEMS_PER_THREAD, int Q_ITEMS_PER_THREAD>
-__global__ void softmax_kernel_h2_v2(T*        attn_score,
-                                     const T*  qk_buf,
-                                     const T*  attn_mask,
-                                     const T*  linear_bias_slopes,
-                                     const int batch_size,
-                                     const int head_num,
-                                     const int q_length,
-                                     const int k_length,
-                                     const T   scalar)
-{
-    // attn_score, [batch_size, num_heads, q_length, k_length]
-    // qk, [batch_size, num_heads, q_length, k_length]
-    // attn_mask, [batch_size, q_length, k_length]
-    // linear_bias_slopes, [num_heads]
-
-    using T2 = typename TypeConverter<T>::Type;
-
-    // QK^T matrix of shape (batch_size, head_num, q_length, k_length / 2)
-    T2*       attn_score_h2 = reinterpret_cast<T2*>(attn_score);
-    const T2* qk_buf_h2     = reinterpret_cast<const T2*>(qk_buf);
-    const T2* attn_mask_h2  = reinterpret_cast<const T2*>(attn_mask);
-
-    const int bi = blockIdx.y;  // Batch index
-    const int hi = blockIdx.z;  // Head index.
-
-    // Constant values that will be used repeately in the q/k loop.
-    const T2 ONE       = cuda_cast<T2>(1.0f);
-    const T2 ZERO      = cuda_cast<T2>(0.0f);
-    const T2 NEG_INFTY = cuda_cast<T2>(-10000.0f);
-
-    // The normalization factor of QK.
-    const T2 qk_scale = cuda_cast<T2>(scalar);
-    // The slope of a linear position bias of the current attention head.
-    const T2 linear_bias_slope = linear_bias_slopes != nullptr ? cuda_cast<T2>(linear_bias_slopes[hi]) : ZERO;
-
-    __shared__ float s_sum[Q_ITEMS_PER_THREAD], s_max[Q_ITEMS_PER_THREAD];
-
-    // Loop over q dimension.
-    for (int qi = blockIdx.x; qi < q_length; qi += gridDim.x * Q_ITEMS_PER_THREAD) {
-        T2 data[Q_ITEMS_PER_THREAD][K_ITEMS_PER_THREAD];
-
-        int qk_offset[Q_ITEMS_PER_THREAD];
-
-        float local_max[Q_ITEMS_PER_THREAD];
-#pragma unroll
-        for (int j = 0; j < Q_ITEMS_PER_THREAD; j++) {
-            local_max[j] = -1e20f;
-        }
-
-        // Loop over k dimension.
-        const int Q_ITEMS = min((q_length - qi + gridDim.x - 1) / gridDim.x, Q_ITEMS_PER_THREAD);
-        for (int i = 0; blockDim.x * i + threadIdx.x < k_length / 2 && i < K_ITEMS_PER_THREAD; ++i) {
-            // The half of the index of k dimension. We will use the elements at {2 * ki, 2 * ki + 1}.
-            int ki = blockDim.x * i + threadIdx.x;
-
-            int mask_offset[Q_ITEMS_PER_THREAD];
-#pragma unroll
-            for (int j = 0; j < Q_ITEMS; j++) {
-                qk_offset[j]   = ((bi * head_num + hi) * q_length + qi + j * gridDim.x) * (k_length / 2) + ki;
-                mask_offset[j] = (bi * q_length + qi + j * gridDim.x) * (k_length / 2) + ki;
-            }
-
-            T2 mask_val[Q_ITEMS_PER_THREAD];
-#pragma unroll
-            for (int j = 0; j < Q_ITEMS; j++) {
-                mask_val[j] = ldg(&attn_mask_h2[mask_offset[j]]);
-            }
-
-            T2 qk[Q_ITEMS_PER_THREAD];
-#pragma unroll
-            for (int j = 0; j < Q_ITEMS; j++) {
-                qk[j] = qk_buf_h2[qk_offset[j]];
-            }
-
-            T2 pos_bias[Q_ITEMS_PER_THREAD];
-            if (linear_bias_slopes != nullptr) {
-#pragma unroll
-                for (int j = 0; j < Q_ITEMS; j++) {
-                    // The position bias depends on the distance between qi/ki and is zero if qi >= 2*ki
-                    // or qi >= 2*ki+1. For T2 vectorization, we should handle every two elements along
-                    // with k-dim simultaneously. To do this, we check qi / 2 > ki at ones instead of
-                    // qi >= 2*ki or 2*ki+1. It works because an diagonal element for an odd qi will be
-                    // zero due to slope * (qi - 2*ki+1) = 0. Thus, we don't handle the upper diagonal
-                    // separately, whose values are negligible due to the negative infinity mask.
-                    int qidx = qi + j * gridDim.x;
-                    T2  dist(2.0f * ki - qidx, 2.0f * ki + 1 - qidx);
-                    pos_bias[j] = hmul2<T2>(linear_bias_slope, dist);
-                }
-            }
-#pragma unroll
-            for (int j = 0; j < Q_ITEMS; j++) {
-                mask_val[j] = hmul2<T2>(hsub2<T2>(ONE, mask_val[j]), NEG_INFTY);
-            }
-
-#pragma unroll
-            for (int j = 0; j < Q_ITEMS; j++) {
-                T2 val = hadd2<T2>(hmul2<T2>(qk_scale, qk[j]), mask_val[j]);
-                if (linear_bias_slopes != nullptr) {
-                    val = hadd2<T2>(val, pos_bias[j]);
-                }
-                data[j][i]   = val;
-                // if (std::is_same<T2, half2>::value) {
-                    local_max[j] = fmax(local_max[j], fmax((float)data[j][i].data[0], (float)data[j][i].data[1]));
-                // } else {
-                //     local_max[j] = fmax(local_max[j], fmax((float)data[j][i].x, (float)data[j][i].y));
-                // }
-            }
-        }
-
-        if (blockDim.x <= 32) {
-            warpReduceMaxV2<float, Q_ITEMS_PER_THREAD>(local_max);
-        }
-        else {
-            blockReduceMaxV2<float, Q_ITEMS_PER_THREAD>(local_max);
-        }
-
-        if (threadIdx.x == 0) {
-#pragma unroll
-            for (int j = 0; j < Q_ITEMS_PER_THREAD; j++) {
-                s_max[j] = local_max[j];
-            }
-        }
-        __syncthreads();
-
-        float local_sum[Q_ITEMS_PER_THREAD];
-#pragma unroll
-        for (int j = 0; j < Q_ITEMS_PER_THREAD; j++) {
-            local_sum[j] = {0.f};
-        }
-
-        for (int i = 0; blockDim.x * i + threadIdx.x < k_length / 2 && i < K_ITEMS_PER_THREAD; ++i) {
-#pragma unroll
-            for (int j = 0; j < Q_ITEMS; ++j) {
-                data[j][i] = hexp2<T2>(hsub2<T2>(data[j][i], cuda_cast<T2>(s_max[j])));
-            }
-
-#pragma unroll
-            for (int j = 0; j < Q_ITEMS; j++) {
-                // if (std::is_same<T2, half2>::value) {
-                    local_sum[j] += (float)(data[j][i].data[0] + data[j][i].data[1]);
-                // } else {
-                //     local_sum[j] += (float)(data[j][i].x + data[j][i].y);
-                // }
-            }
-        }
-
-        if (blockDim.x <= 32) {
-            warpReduceSumV2<float, Q_ITEMS_PER_THREAD>(local_sum);
-        }
-        else {
-            blockReduceSumV2<float, Q_ITEMS_PER_THREAD>(local_sum);
-        }
-
-        if (threadIdx.x == 0) {
-#pragma unroll
-            for (int j = 0; j < Q_ITEMS_PER_THREAD; j++) {
-                s_sum[j] = __fdividef(1.0f, local_sum[j] + 1e-6f);
-            }
-        }
-        __syncthreads();
-
-        for (int i = 0; blockDim.x * i + threadIdx.x < k_length / 2 && i < K_ITEMS_PER_THREAD; ++i) {
-#pragma unroll
-            for (int j = 0; j < Q_ITEMS; j++) {
-                qk_offset[j] = ((bi * head_num + hi) * q_length + qi + j * gridDim.x) * (k_length / 2) + blockDim.x * i
-                               + threadIdx.x;
-            }
-
-#pragma unroll
-            for (int j = 0; j < Q_ITEMS; j++) {
-                attn_score_h2[qk_offset[j]] = hmul2<T2>(data[j][i], cuda_cast<T2>(s_sum[j]));
-            }
-        }
-    }
-}
-
-#define LAUNCH_MAKSED_SOFTMAX_(T_, ITEMS_PER_THREAD)                                                                   \
-    block.x /= ITEMS_PER_THREAD;                                                                                       \
-    block.x = (block.x + 31) / 32 * 32;                                                                                \
-    assert(block.x <= 1024);                                                                                           \
-    if (is_half2) {                                                                                                    \
-        if (grid.x % 4 == 0) {                                                                                         \
-            grid.x /= 4;                                                                                               \
-            softmax_kernel_h2_v2<T_, ITEMS_PER_THREAD, 4>                                                              \
-                <<<grid, block, 0, stream>>>((T_*)param.attention_score,                                               \
-                                             (const T_*)param.qk,                                                      \
-                                             (const T_*)param.attention_mask,                                          \
-                                             (const T_*)param.linear_bias_slopes,                                      \
-                                             param.batch_size,                                                         \
-                                             param.num_heads,                                                          \
-                                             param.q_length,                                                           \
-                                             param.k_length,                                                           \
-                                             (const T_)param.qk_scale);                                                \
-        }                                                                                                              \
-        else {                                                                                                         \
-            softmax_kernel_h2<T_, ITEMS_PER_THREAD><<<grid, block, 0, stream>>>((T_*)param.attention_score,            \
-                                                                                (const T_*)param.qk,                   \
-                                                                                (const T_*)param.attention_mask,       \
-                                                                                (const T_*)param.linear_bias_slopes,   \
-                                                                                param.batch_size,                      \
-                                                                                param.num_heads,                       \
-                                                                                param.q_length,                        \
-                                                                                param.k_length,                        \
-                                                                                (const T_)param.qk_scale);             \
-        }                                                                                                              \
-    }                                                                                                                  \
-    else {                                                                                                             \
-        softmax_kernel<T, T_IN, ITEMS_PER_THREAD><<<grid, block, 0, stream>>>(param.attention_score,                   \
-                                                                              param.qk,                                \
-                                                                              param.attention_mask,                    \
-                                                                              param.linear_bias_slopes,                \
-                                                                              param.batch_size,                        \
-                                                                              param.num_heads,                         \
-                                                                              param.q_length,                          \
-                                                                              param.k_length,                          \
-                                                                              param.qk_scale);                         \
-    }
-
-#define LAUNCH_MAKSED_SOFTMAX(ITEMS_PER_THREAD) LAUNCH_MAKSED_SOFTMAX_(half, ITEMS_PER_THREAD)
-
-template<typename T, typename T_IN>
-void invokeMaskedSoftmax(MaskedSoftmaxParam<T, T_IN>& param, cudaStream_t stream)
-{
-    // attention_score,    (batch_size, head_num, q_length, k_length), softmax output.
-    // qk,                 (batch_size, head_num, q_length, k_length), QK^T.
-    // attention_mask,     (batch_size, q_length, k_length), attention mask.
-    // linear_bias_slopes, (head_num,) the slopes of the linear position bias.
-
-    dim3 grid(param.q_length, param.batch_size, param.num_heads);
-    if (param.batch_size * param.num_heads > 360) {
-        grid.x = ceil(float(param.q_length) / 32.0f);
-    }
-
-    bool is_half2 = sizeof(T) == 2 && sizeof(T_IN) == 2 && param.k_length % 2 == 0;
-    dim3 block((param.k_length / (is_half2 ? 2 : 1) + 31) / 32 * 32);
-
-    if (block.x > 2048 && block.x <= 4096) {
-        LAUNCH_MAKSED_SOFTMAX(4)
-    }
-    else if (block.x > 1024) {
-        LAUNCH_MAKSED_SOFTMAX(2)
-    }
-    else if (block.x > 0) {
-        LAUNCH_MAKSED_SOFTMAX(1)
-    }
-    else {
-        FT_CHECK(param.k_length <= 4096);
-    }
-}
-
-template void invokeMaskedSoftmax(MaskedSoftmaxParam<float, float>& param, cudaStream_t stream);
-template void invokeMaskedSoftmax(MaskedSoftmaxParam<half, float>& param, cudaStream_t stream);
-template void invokeMaskedSoftmax(MaskedSoftmaxParam<half, half>& param, cudaStream_t stream);
-
-#ifdef ENABLE_BF16
-template<>
-void invokeMaskedSoftmax(MaskedSoftmaxParam<__nv_bfloat16, float>& param, cudaStream_t stream)
-{
-    // attention_score,    (batch_size, head_num, q_length, k_length), softmax output.
-    // qk,                 (batch_size, head_num, q_length, k_length), QK^T.
-    // attention_mask,     (batch_size, q_length, k_length), attention mask.
-    // linear_bias_slopes, (head_num,) the slopes of the linear position bias.
-
-    using T    = __nv_bfloat16;
-    using T_IN = float;
-
-    dim3 grid(param.q_length, param.batch_size, param.num_heads);
-    if (param.batch_size * param.num_heads > 360) {
-        grid.x = ceil(float(param.q_length) / 32.0f);
-    }
-
-    bool is_half2 = sizeof(T) == 2 && sizeof(T_IN) == 2 && param.k_length % 2 == 0;
-    dim3 block((param.k_length / (is_half2 ? 2 : 1) + 31) / 32 * 32);
-
-    if (block.x > 2048 && block.x <= 4096) {
-        LAUNCH_MAKSED_SOFTMAX_(__nv_bfloat16, 4);
-    }
-    else if (block.x > 1024) {
-        LAUNCH_MAKSED_SOFTMAX_(__nv_bfloat16, 2);
-    }
-    else if (block.x > 0) {
-        LAUNCH_MAKSED_SOFTMAX_(__nv_bfloat16, 1);
-    }
-    else {
-        FT_CHECK(param.k_length <= 4096);
-    }
-}
-template<>
-void invokeMaskedSoftmax(MaskedSoftmaxParam<__nv_bfloat16, __nv_bfloat16>& param, cudaStream_t stream)
-{
-    // attention_score,    (batch_size, head_num, q_length, k_length), softmax output.
-    // qk,                 (batch_size, head_num, q_length, k_length), QK^T.
-    // attention_mask,     (batch_size, q_length, k_length), attention mask.
-    // linear_bias_slopes, (head_num,) the slopes of the linear position bias.
-
-    using T    = __nv_bfloat16;
-    using T_IN = __nv_bfloat16;
-
-    dim3 grid(param.q_length, param.batch_size, param.num_heads);
-    if (param.batch_size * param.num_heads > 360) {
-        grid.x = ceil(float(param.q_length) / 32.0f);
-    }
-
-    bool is_half2 = sizeof(T) == 2 && sizeof(T_IN) == 2 && param.k_length % 2 == 0;
-    dim3 block((param.k_length / (is_half2 ? 2 : 1) + 31) / 32 * 32);
-
-    if (block.x > 2048 && block.x <= 4096) {
-        LAUNCH_MAKSED_SOFTMAX_(__nv_bfloat16, 4);
-    }
-    else if (block.x > 1024) {
-        LAUNCH_MAKSED_SOFTMAX_(__nv_bfloat16, 2);
-    }
-    else if (block.x > 0) {
-        LAUNCH_MAKSED_SOFTMAX_(__nv_bfloat16, 1);
-    }
-    else {
-        FT_CHECK(param.k_length <= 4096);
-    }
-}
-
-#endif
-
-#undef LAUNCH_MAKSED_SOFTMAX
-#undef LAUNCH_MAKSED_SOFTMAX_
-
-template<typename T>
-__global__ void transpose(const T*     src,
-                          T*           dst,
-                          const int    batch_size,
-                          const int    seq_len,
-                          const int    head_num,
-                          const int    size_per_head,
-                          const float* scale,
-                          int          int8_mode)
-{
-    int tid = blockIdx.x * blockDim.x + threadIdx.x;
-
-    int batch_id = tid / (head_num * seq_len * size_per_head);
-    int head_id  = (tid % (head_num * seq_len * size_per_head)) / (seq_len * size_per_head);
-    int seq_id   = (tid % (seq_len * size_per_head)) / size_per_head;
-    int id       = tid % size_per_head;
-
-    int target_id = target_index(batch_id, head_id, seq_id, id, batch_size, head_num, seq_len, size_per_head);
-
-    if (int8_mode == 2) {
-        using Int8_Packed_T  = typename packed_as<int8_t, num_elems<T>::value>::type;
-        using Float_Packed_T = typename packed_as<float, num_elems<T>::value>::type;
-
-        const Float_Packed_T scale_val = cuda_cast<Float_Packed_T>(*scale);
-        reinterpret_cast<Int8_Packed_T*>(dst)[target_id] =
-            cuda_cast<Int8_Packed_T>(cuda_cast<Float_Packed_T>(src[tid]) * scale_val);
-    }
-    else {
-        dst[target_id] = src[tid];
-    }
-}
-
-template<>
-__global__ void transpose(const float* src,
-                          float*       dst,
-                          const int    batch_size,
-                          const int    seq_len,
-                          const int    head_num,
-                          const int    size_per_head,
-                          const float* scale,
-                          int          int8_mode)
-{
-    int batch_id = blockIdx.x / (head_num * seq_len);
-    int seq_id   = blockIdx.x % seq_len;
-    int head_id  = (blockIdx.x % (head_num * seq_len)) / seq_len;
-
-    const int target_id = batch_id * (head_num * seq_len * size_per_head) + seq_id * head_num * size_per_head
-                          + head_id * size_per_head + threadIdx.x;
-    const int src_id = blockIdx.x * size_per_head + threadIdx.x;
-
-    if (int8_mode == 2) {
-        const float scale_val                     = *scale;
-        reinterpret_cast<int8_t*>(dst)[target_id] = cuda_cast<int8_t>(src[src_id] * scale_val);
-    }
-    else {
-        dst[target_id] = src[src_id];
-    }
-}
-
-template<typename T>
-void invokeTransposeQKV(T*           dst,
-                        T*           src,
-                        const int    batch_size,
-                        const int    seq_len,
-                        const int    head_num,
-                        const int    size_per_head,
-                        const float* scale,
-                        const int    int8_mode,
-                        cudaStream_t stream)
-{
-    dim3 grid, block;
-    if (sizeof(T) == 2) {
-        int seq_per_block = 1;
-        grid.x            = batch_size * head_num * seq_len / seq_per_block;
-        while (seq_per_block < 4 && grid.x % 2 == 0) {
-            grid.x /= 2;
-            seq_per_block *= 2;
-        }
-
-        FT_CHECK(grid.x * seq_per_block == (size_t)batch_size * head_num * seq_len);
-
-        if (seq_per_block * size_per_head % 2 == 0) {
-            block.x = seq_per_block * size_per_head / 2;
-            if (std::is_same<T, half>::value) {
-                transpose<half2><<<grid, block, 0, stream>>>(
-                    (half2*)src, (half2*)dst, batch_size, seq_len, head_num, size_per_head / 2, scale, int8_mode);
-            }
-#ifdef ENABLE_BF16
-            else {
-                transpose<__nv_bfloat162><<<grid, block, 0, stream>>>((__nv_bfloat162*)src,
-                                                                      (__nv_bfloat162*)dst,
-                                                                      batch_size,
-                                                                      seq_len,
-                                                                      head_num,
-                                                                      size_per_head / 2,
-                                                                      scale,
-                                                                      int8_mode);
-            }
-#endif
-        }
-        else {
-            block.x = seq_per_block * size_per_head;
-            transpose<T>
-                <<<grid, block, 0, stream>>>(src, dst, batch_size, seq_len, head_num, size_per_head, scale, int8_mode);
-        }
-    }
-    else {
-        const int seq_per_block = 1;
-        grid.x                  = batch_size * head_num * seq_len / seq_per_block;
-        block.x                 = seq_per_block * size_per_head;
-        transpose<T>
-            <<<grid, block, 0, stream>>>(src, dst, batch_size, seq_len, head_num, size_per_head, scale, int8_mode);
-    }
-}
-
-#define INSTANTIATETRANSPOSEQKV(T)                                                                                     \
-    template void invokeTransposeQKV(T*           src,                                                                 \
-                                     T*           dst,                                                                 \
-                                     const int    batch_size,                                                          \
-                                     const int    seq_len,                                                             \
-                                     const int    head_num,                                                            \
-                                     const int    size_per_head,                                                       \
-                                     const float* scale,                                                               \
-                                     const int    int8_mode,                                                           \
-                                     cudaStream_t stream)
-INSTANTIATETRANSPOSEQKV(float);
-INSTANTIATETRANSPOSEQKV(half);
-#ifdef ENABLE_BF16
-INSTANTIATETRANSPOSEQKV(__nv_bfloat16);
-#endif
-#undef INSTANTIATETRANSPOSEQKV
-
-template<typename T>
-__global__ void transpose_remove_padding(const T*     src,
-                                         T*           dst,
-                                         const int    batch_size,
-                                         const int    seq_len,
-                                         const int    head_num,
-                                         const int    size_per_head,
-                                         const int*   mask_offset,
-                                         const float* scale,
-                                         const int    int8_mode)
-{
-    // TODO: optimize this kernel?
-    // do remove_sequence_length_padding
-    const int bid = blockIdx.x;  // batch * seq_len or valid_word_num
-
-    const int src_batch_id = (bid + mask_offset[bid]) / seq_len;
-    const int src_seq_id   = (bid + mask_offset[bid]) % seq_len;
-
-    const int dst_seq_id = bid;
-
-    const int src_offset_base = src_batch_id * seq_len * head_num * size_per_head + src_seq_id * size_per_head;
-    const int dst_offset_base = dst_seq_id * head_num * size_per_head;
-
-    using Int8_Packed_T  = typename packed_as<int8_t, num_elems<T>::value>::type;
-    using Float_Packed_T = typename packed_as<float, num_elems<T>::value>::type;
-    const Float_Packed_T scale_val =
-        int8_mode == 2 ? cuda_cast<Float_Packed_T>(*scale) : cuda_cast<Float_Packed_T>(0.0f);
-
-    for (int idx = threadIdx.x; idx < head_num * size_per_head; idx += blockDim.x) {
-        const int head_id   = idx / size_per_head;
-        const int hidden_id = idx % size_per_head;
-        const T   src_elem  = ldg(&src[src_offset_base + head_id * seq_len * size_per_head + hidden_id]);
-        if (int8_mode == 2) {
-            reinterpret_cast<Int8_Packed_T*>(dst)[dst_offset_base + idx] =
-                cuda_cast<Int8_Packed_T>(cuda_cast<Float_Packed_T>(src_elem) * scale_val);
-        }
-        else {
-            dst[dst_offset_base + idx] = src_elem;
-        }
-    }
-}
-
-// clang-format off
-template<typename T>
-void invokeTransposeAttentionOutRemovePadding(T*           src,
-                                              T*           dst,
-                                              const int    valid_word_num,
-                                              const int    batch_size,
-                                              const int    seq_len,
-                                              const int    head_num,
-                                              const int    size_per_head,
-                                              const int*   mask_offset,
-                                              const float* scale,
-                                              const int    int8_mode,
-                                              cudaStream_t stream)
-{
-#ifdef ENABLE_BF16
-    bool is_half2 = (std::is_same<T, half>::value || std::is_same<T, __nv_bfloat16>::value) && (size_per_head % 2 == 0);
-#else
-    bool is_half2 = (std::is_same<T, half>::value) && (size_per_head % 2 == 0);
-#endif
-    using T2       = typename TypeConverter<T>::Type;  // fp16 to half2, bf16 to bf162
-    int block_size = head_num * size_per_head;
-    if (is_half2) {
-        while (block_size > 512) {
-            if (block_size % 2 == 0) {
-                block_size /= 2;
-            }
-            else {
-                is_half2   = false;
-                block_size = std::min(block_size, 1024);
-                break;
-            }
-        }
-    }
-    else {
-        block_size = std::min(block_size, 1024);
-    }
-
-    if (is_half2) {
-        transpose_remove_padding<T2><<<valid_word_num, block_size, 0, stream>>>(
-            (T2*)src, (T2*)dst, batch_size, seq_len, head_num, size_per_head / 2, mask_offset, scale, int8_mode);
-    }
-    else {
-        transpose_remove_padding<<<valid_word_num, block_size, 0, stream>>>(
-            src, dst, batch_size, seq_len, head_num, size_per_head, mask_offset, scale, int8_mode);
-    }
-}
-// clang-format on
-
-#define INSTANTIATETRANSPOSEATTENTIONOUTREMOVEPADDING(T)                                                               \
-    template void invokeTransposeAttentionOutRemovePadding(T*           src,                                           \
-                                                           T*           dst,                                           \
-                                                           const int    valid_word_num,                                \
-                                                           const int    batch_size,                                    \
-                                                           const int    seq_len,                                       \
-                                                           const int    head_num,                                      \
-                                                           const int    size_per_head,                                 \
-                                                           const int*   mask_offset,                                   \
-                                                           const float* scale,                                         \
-                                                           const int    int8_mode,                                     \
-                                                           cudaStream_t stream)
-INSTANTIATETRANSPOSEATTENTIONOUTREMOVEPADDING(float);
-INSTANTIATETRANSPOSEATTENTIONOUTREMOVEPADDING(half);
-#ifdef ENABLE_BF16
-INSTANTIATETRANSPOSEATTENTIONOUTREMOVEPADDING(__nv_bfloat16);
-#endif
-#undef INSTANTIATETRANSPOSEATTENTIONOUTREMOVEPADDING
-
-template<typename T>
-__global__ void add_fusedQKV_bias_transpose_kernel(T* q_buf,
-                                                   T* k_buf,
-                                                   T* v_buf,
-                                                   T* QKV,
-                                                   const T* __restrict qkv_bias,
-                                                   const int*   padding_offset,
-                                                   const int    batch_size,
-                                                   const int    seq_len,
-                                                   const int    token_num,
-                                                   const int    head_num,
-                                                   const int    size_per_head,
-                                                   const float* scale,
-                                                   const int    int8_mode)
-{
-    // QKV: [token_num, 3, n]
-    // qkv_bias: [3, n]
-    // q_buf, k_buf, v_buf: [batch, head_num, seq_len, size_per_head]
-
-    T*        qkv_ptr[3] = {q_buf, k_buf, v_buf};
-    const int n          = head_num * size_per_head;
-    for (int index = blockDim.x * blockIdx.x + threadIdx.x; index < token_num * 3 * n;
-         index += gridDim.x * blockDim.x) {
-        const int bias_id = index % (3 * n);
-
-        const int token_idx        = index / (3 * n);
-        const int token_padded_idx = token_idx + (padding_offset == nullptr ? 0 : padding_offset[token_idx]);
-        const int target_batch_id  = token_padded_idx / seq_len;
-        const int seq_id           = token_padded_idx % seq_len;
-
-        const int qkv_id  = (index % (3 * n)) / n;
-        const int head_id = (index % n) / size_per_head;
-        const int size_id = index % size_per_head;
-
-        T val;
-        if (int8_mode == 2) {
-            val = cuda_cast<T>(cuda_cast<float>(reinterpret_cast<const int8_t*>(QKV)[index]) * scale[qkv_id]);
-        }
-        else {
-            val = ldg(&QKV[index]);
-        }
-        val = val + ldg(&qkv_bias[bias_id]);
-
-        if (int8_mode == 2) {
-            // TODO(mseznec): add support for int8 BMM with FusedAtt
-        }
-        else {
-            QKV[index] = val;
-        }
-
-        qkv_ptr[qkv_id][target_batch_id * head_num * seq_len * size_per_head + head_id * seq_len * size_per_head
-                        + seq_id * size_per_head + size_id] = val;
-    }
-}
-
-template<typename T>
-struct Vec_t {
-    static constexpr int size = 0;
-};
-
-template<>
-struct Vec_t<float> {
-    using Type                = float2;
-    static constexpr int size = 2;
-};
-
-template<>
-struct Vec_t<half> {
-    using Type                = uint32_t;
-    static constexpr int size = 2;
-};
-
-#ifdef ENABLE_BF16
-template<>
-struct Vec_t<__nv_bfloat16> {
-    using Type                = __nv_bfloat162;
-    static constexpr int size = 2;
-};
-#endif
-
-/// TODO: support batch step offset
-template<typename T, bool PREFIX_PROMPT>
-__global__ void add_fusedQKV_bias_transpose_kernel(T* q_buf,
-                                                   T* k_buf,
-                                                   T* v_buf,
-                                                   T* QKV,
-                                                   const T* __restrict qkv_bias,
-                                                   const int* padding_offset,
-                                                   const int* history_length,
-                                                   const int* input_length,
-                                                   int        batch_size,
-                                                   int        seq_len,
-                                                   int        head_num,
-                                                   int        kv_head_num,
-                                                   int        size_per_head,
-                                                   int        rotary_embedding_dim,
-                                                   float      rotary_embedding_base,
-                                                   int        max_position_embeddings,
-                                                   bool       use_dynamic_ntk,
-                                                   bool       use_logn_attn)
-{
-    // This kernel add bias to QKV, which has shape [batch_size, seq_len, 3, head_num, size_per_head], and
-    // QKV split to 3 split buffer q, k, v and transpose them to [batch_size, head_num, seq_len, size_per_head].
-    // For q and k, also apply the rotary embedding.
-
-    // NOTE: QKV src shape (batch_size, seq_len, 3, head_num, size_per_head)
-    //  QKV dst shape (3, batch_size, head_num, seq_len, size_per_head)
-    extern __shared__ __align__(sizeof(float2)) char smem_[];  // align on largest vector type
-
-    constexpr int vec_size         = Vec_t<T>::size;
-    using Vec_t                    = typename Vec_t<T>::Type;
-    const int token_idx            = blockIdx.x;
-    const int token_padding_offset = (padding_offset == nullptr || token_idx < 0) ? 0 : padding_offset[token_idx];
-    const int tgt_token_idx        = token_idx + token_padding_offset;
-
-    const int batch_idx = tgt_token_idx / seq_len;
-    const int seq_idx   = tgt_token_idx % seq_len;
-
-    const int head_idx = blockIdx.y;
-    const int tidx     = threadIdx.x;
-
-    const int total_seq_len = seq_len;
-
-    const bool is_masked = tidx * vec_size >= size_per_head;
-
-    const int hidden_idx = head_idx * size_per_head + tidx * vec_size;
-
-    const int q_kv_head_num = head_num + 2 * kv_head_num;
-
-    const int k_offset = head_num * size_per_head;
-    const int v_offset = k_offset + kv_head_num * size_per_head;
-
-    // src QKV: [batch, time, q_kv_head_num, hidden]
-    const int src_q_idx = token_idx * q_kv_head_num * size_per_head + hidden_idx;
-    const int src_k_idx = token_idx * q_kv_head_num * size_per_head + hidden_idx + k_offset;
-    const int src_v_idx = token_idx * q_kv_head_num * size_per_head + hidden_idx + v_offset;
-
-    Vec_t q, k, v;
-    Vec_t q_bias, k_bias, v_bias;
-
-    // load Q and apply bias
-    if (!is_masked) {
-        q = *reinterpret_cast<const Vec_t*>(&QKV[src_q_idx]);
-        if (qkv_bias) {
-            q_bias = *reinterpret_cast<const Vec_t*>(&qkv_bias[hidden_idx]);
-            q      = mmha::add(q, q_bias);
-        }
-    }
-
-    // load KV and apply bias
-    if (!is_masked && head_idx < kv_head_num) {
-        k = *reinterpret_cast<const Vec_t*>(&QKV[src_k_idx]);
-        v = *reinterpret_cast<const Vec_t*>(&QKV[src_v_idx]);
-        if (qkv_bias) {
-            k_bias = *reinterpret_cast<const Vec_t*>(&qkv_bias[hidden_idx + k_offset]);
-            v_bias = *reinterpret_cast<const Vec_t*>(&qkv_bias[hidden_idx + v_offset]);
-            k      = mmha::add(k, k_bias);
-            v      = mmha::add(v, v_bias);
-        }
-    }
-
-    const int history_len = history_length[batch_idx];
-    const int context_len = history_len + input_length[batch_idx];
-    const int timestep    = history_len + seq_idx;
-
-    if (use_dynamic_ntk) {
-        rotary_embedding_base = mmha::rotary_embedding_get_base(
-            context_len, max_position_embeddings, rotary_embedding_dim, rotary_embedding_base);
-    }
-
-    // TODO: unused computation on k if GQA is used
-    mmha::apply_rotary_embedding(q, k, tidx, rotary_embedding_dim, rotary_embedding_base, timestep);
-
-    if (use_logn_attn) {
-        // +1 to convert to context length at the timestep
-        float logn_scaling = mmha::logn_attn_get_scaling(timestep + 1, max_position_embeddings);
-        if constexpr (std::is_same_v<T, float>) {
-            q = mmha::mul<Vec_t, float, Vec_t>(logn_scaling, q);
-        }
-        else if constexpr (std::is_same_v<T, half>) {
-            half tmp = __float2half(logn_scaling);
-            q        = mmha::mul<Vec_t, uint16_t, Vec_t>((uint16_t&)tmp, q);
-        }
-    }
-
-    if (!is_masked && !q_buf) {  // also skip modifying QKV if q/k/v_buf are present
-        *reinterpret_cast<Vec_t*>(&QKV[src_q_idx]) = q;
-        if (head_idx < kv_head_num) {
-            *reinterpret_cast<Vec_t*>(&QKV[src_k_idx]) = k;
-            *reinterpret_cast<Vec_t*>(&QKV[src_v_idx]) = v;
-        }
-    }
-
-    const int dest_q_idx = batch_idx * size_per_head * seq_len * head_num + head_idx * size_per_head * seq_len
-                           + seq_idx * size_per_head + tidx * vec_size;
-
-    const int dest_kv_idx = batch_idx * size_per_head * total_seq_len * kv_head_num
-                            + head_idx * size_per_head * total_seq_len + seq_idx * size_per_head + tidx * vec_size;
-
-    if (!is_masked) {
-        *reinterpret_cast<Vec_t*>(&q_buf[dest_q_idx]) = q;
-        if (head_idx < kv_head_num) {
-            *reinterpret_cast<Vec_t*>(&k_buf[dest_kv_idx]) = k;
-            *reinterpret_cast<Vec_t*>(&v_buf[dest_kv_idx]) = v;
-        }
-    }
-}
-
-#define FUSED_QKV_BIAS_TRANSPOSE_LAUNCH(T, PREFIX_PROMPT)                                                              \
-    add_fusedQKV_bias_transpose_kernel<T, PREFIX_PROMPT><<<grid, block, smem_size, stream>>>(q_buf,                    \
-                                                                                             k_buf,                    \
-                                                                                             v_buf,                    \
-                                                                                             QKV,                      \
-                                                                                             qkv_bias,                 \
-                                                                                             padding_offset,           \
-                                                                                             history_length,           \
-                                                                                             input_length,             \
-                                                                                             batch_size,               \
-                                                                                             seq_len,                  \
-                                                                                             head_num,                 \
-                                                                                             kv_head_num,              \
-                                                                                             size_per_head,            \
-                                                                                             rotary_embedding_dim,     \
-                                                                                             rotary_embedding_base,    \
-                                                                                             max_position_embeddings,  \
-                                                                                             use_dynamic_ntk,          \
-                                                                                             use_logn_attn);
-
-template<typename T>
-void invokeAddFusedQKVBiasTranspose(T*           q_buf,
-                                    T*           k_buf,
-                                    T*           v_buf,
-                                    T*           QKV,
-                                    const T*     qkv_bias,
-                                    const int*   padding_offset,
-                                    const int*   history_length,
-                                    const int*   input_length,
-                                    const int    batch_size,
-                                    const int    seq_len,
-                                    const int    token_num,
-                                    const int    head_num,
-                                    const int    kv_head_num,
-                                    const int    size_per_head,
-                                    const int    rotary_embedding_dim,
-                                    float        rotary_embedding_base,
-                                    int          max_position_embeddings,
-                                    bool         use_dynamic_ntk,
-                                    bool         use_logn_attn,
-                                    cudaStream_t stream)
-{
-    FT_CHECK(rotary_embedding_dim);
-    // To implement rotary embeddings, each thread processes two QKV elems:
-    dim3   block((size_per_head / Vec_t<T>::size + 31) / 32 * 32);
-    dim3   grid(token_num, head_num);
-    size_t smem_size = 0;
-    FUSED_QKV_BIAS_TRANSPOSE_LAUNCH(T, false);
-}
-
-#define INSTANTIATEADDFUSEDQKVBIASTRANSPOSE(T)                                                                         \
-    template void invokeAddFusedQKVBiasTranspose(T*           q_buf,                                                   \
-                                                 T*           k_buf,                                                   \
-                                                 T*           v_buf,                                                   \
-                                                 T*           QKV,                                                     \
-                                                 const T*     qkv_bias,                                                \
-                                                 const int*   padding_offset,                                          \
-                                                 const int*   history_length,                                          \
-                                                 const int*   input_length,                                            \
-                                                 const int    batch_size,                                              \
-                                                 const int    seq_len,                                                 \
-                                                 const int    token_num,                                               \
-                                                 const int    head_num,                                                \
-                                                 const int    kv_head_num,                                             \
-                                                 const int    size_per_head,                                           \
-                                                 const int    rotary_embedding_dim,                                    \
-                                                 float        rotary_embedding_base,                                   \
-                                                 int          max_position_embeddings,                                 \
-                                                 bool         use_dynamic_ntk,                                         \
-                                                 bool         use_logn_attn,                                           \
-                                                 cudaStream_t stream)
-INSTANTIATEADDFUSEDQKVBIASTRANSPOSE(float);
-INSTANTIATEADDFUSEDQKVBIASTRANSPOSE(half);
-#ifdef ENABLE_BF16
-INSTANTIATEADDFUSEDQKVBIASTRANSPOSE(__nv_bfloat16);
-#endif
-#undef INSTANTIATEADDFUSEDQKVBIASTRANSPOSE
-
-template<typename T>
-__global__ void transpose_4d(T*        dst,
-                             T*        src,
-                             const int dim0,
-                             const int dim1,
-                             const int dim2,
-                             const int dim3,
-                             const int dim0_leading_dim,
-                             const int ite)
-{
-    // transpose from [dim0, dim1, dim2, dim3] to [dim2, X, dim1, dim3]
-    // where the dimension of X is dim0_leading_dim, and offset is ite * dim0
-    for (int i = threadIdx.x + blockIdx.x * blockDim.x; i < dim0 * dim1 * dim2 * dim3; i += blockDim.x * gridDim.x) {
-        int       index = i;
-        const int d3    = index % dim3;
-        index           = (index - d3) / dim3;
-        const int d2    = index % dim2;
-        index           = (index - d2) / dim2;
-        const int d1    = index % dim1;
-        index           = (index - d1) / dim1;
-        const int d0    = index % dim0;
-        index           = (index - d0) / dim0;
-        dst[d2 * dim0_leading_dim * dim1 * dim3 + (d0 + dim0 * ite) * dim1 * dim3 + d1 * dim3 + d3] = src[i];
-    }
-}
-
-template<>
-__global__ void transpose_4d(half*     dst,
-                             half*     src,
-                             const int dim0,
-                             const int dim1,
-                             const int dim2,
-                             const int dim3,
-                             const int dim0_leading_dim,
-                             const int ite)
-{
-    half2*    dst_ptr   = (half2*)dst;
-    half2*    src_ptr   = (half2*)src;
-    const int half_dim3 = dim3 / 2;
-    // transpose from [dim0, dim1, dim2, half_dim3] to [dim2, dim0, dim1, half_dim3]
-    // where the dimension of X is dim0_leading_dim, and offset is ite * dim0
-    for (int i = threadIdx.x + blockIdx.x * blockDim.x; i < dim0 * dim1 * dim2 * half_dim3;
-         i += blockDim.x * gridDim.x) {
-        int       index = i;
-        const int d3    = index % half_dim3;
-        index           = (index - d3) / half_dim3;
-        const int d2    = index % dim2;
-        index           = (index - d2) / dim2;
-        const int d1    = index % dim1;
-        index           = (index - d1) / dim1;
-        const int d0    = index % dim0;
-        index           = (index - d0) / dim0;
-        dst_ptr[d2 * dim0_leading_dim * dim1 * half_dim3 + (d0 + dim0 * ite) * dim1 * half_dim3 + d1 * half_dim3 + d3] =
-            src_ptr[i];
-    }
-}
-
-template<typename T>
-void invokeTranspose4d(T*           dst,
-                       T*           src,
-                       const int    local_batch_size,
-                       const int    seq_len,
-                       const int    size_per_head,
-                       const int    local_hidden_units,
-                       const int    local_head_num,
-                       const int    batch_size,
-                       const int    ite,
-                       cudaStream_t stream)
-{
-    transpose_4d<<<local_batch_size * seq_len * local_hidden_units / 512, 512 / (4 / (sizeof(T))), 0, stream>>>(
-        dst, src, local_batch_size, local_head_num, seq_len, size_per_head, batch_size, ite);
-}
-
-#define INSTANTIATETRANSPOSE4D(T)                                                                                      \
-    template void invokeTranspose4d(T*           dst,                                                                  \
-                                    T*           src,                                                                  \
-                                    const int    local_batch_size,                                                     \
-                                    const int    seq_len,                                                              \
-                                    const int    size_per_head,                                                        \
-                                    const int    local_hidden_units,                                                   \
-                                    const int    local_head_num,                                                       \
-                                    const int    batch_size,                                                           \
-                                    const int    ite,                                                                  \
-                                    cudaStream_t stream)
-INSTANTIATETRANSPOSE4D(float);
-INSTANTIATETRANSPOSE4D(half);
-#undef INSTANTIATETRANSPOSE4D
-template<typename T>
-__global__ void transpose_4d_batch_major_k_cache(
-    T* k_dst, const T* k_src, const int head_num, const int size_per_head, const int seq_len, const int max_seq_len)
-{
-    const int     batch_id = blockIdx.y;
-    const int     head_id  = blockIdx.z;
-    constexpr int X_ELEMS  = (sizeof(T) == 4) ? 4 : 8;
-
-    auto key_src = reinterpret_cast<const uint4*>(k_src + batch_id * head_num * size_per_head * seq_len
-                                                  + head_id * size_per_head * seq_len);
-    auto key_dst = reinterpret_cast<uint4*>(k_dst + batch_id * head_num * size_per_head * max_seq_len
-                                            + head_id * size_per_head * max_seq_len);
-
-    const int out_idx             = blockIdx.x * blockDim.x + threadIdx.x;
-    int       size_per_head_div_x = size_per_head / X_ELEMS;
-    if (out_idx >= size_per_head_div_x * max_seq_len) {
-        return;
-    }
-
-    int       idx            = out_idx;
-    const int k_seq_len_id   = idx % max_seq_len;
-    idx                      = (idx - k_seq_len_id) / max_seq_len;
-    const int k_head_size_id = idx % size_per_head_div_x;
-
-    if (k_seq_len_id < seq_len) {
-        key_dst[out_idx] = key_src[k_seq_len_id * size_per_head_div_x + k_head_size_id];
-    }
-}
-
-template<typename T>
-__global__ void transpose_4d_batch_major_v_cache(
-    T* v_dst, const T* v_src, const int head_num, const int size_per_head, const int seq_len, const int max_seq_len)
-{
-    const int batch_id = blockIdx.y;
-    const int head_id  = blockIdx.z;
-
-    // 16 byte loads will handle "x" dimension
-    auto val_src = reinterpret_cast<const uint4*>(v_src + batch_id * head_num * size_per_head * seq_len
-                                                  + head_id * size_per_head * seq_len);
-    auto val_dst = reinterpret_cast<uint4*>(v_dst + batch_id * head_num * size_per_head * max_seq_len
-                                            + head_id * size_per_head * max_seq_len);
-
-    // idx is over output dimension L * size_per_head / x for values
-    const int idx = blockIdx.x * blockDim.x + threadIdx.x;
-
-    constexpr int X_ELEMS             = (sizeof(T) == 4) ? 4 : 8;
-    const int     size_per_head_div_x = size_per_head / X_ELEMS;
-
-    if (idx >= size_per_head_div_x * seq_len) {
-        return;
-    }
-
-    val_dst[idx] = val_src[idx];
-}
-
-template<typename T>
-void invokeTranspose4dBatchMajor(T*           k_dst,
-                                 T*           v_dst,
-                                 const T*     k_src,
-                                 const T*     v_src,
-                                 const int    local_batch_size,
-                                 const int    seq_len,
-                                 const int    max_seq_len,
-                                 const int    size_per_head,
-                                 const int    local_head_num,
-                                 cudaStream_t stream)
-{
-    constexpr int block_sz = 128;
-    constexpr int x        = (sizeof(T) == 4) ? 4 : 8;
-    int           size     = max_seq_len * size_per_head / x;
-    dim3          grid((size + block_sz - 1) / block_sz, local_batch_size, local_head_num);
-    dim3          grid_v((seq_len * size_per_head / x + block_sz - 1) / block_sz, local_batch_size, local_head_num);
-
-    transpose_4d_batch_major_k_cache<<<grid, block_sz, 0, stream>>>(
-        k_dst, k_src, local_head_num, size_per_head, seq_len, max_seq_len);
-
-    transpose_4d_batch_major_v_cache<<<grid_v, block_sz, 0, stream>>>(
-        v_dst, v_src, local_head_num, size_per_head, seq_len, max_seq_len);
-}
-
-#define INSTANTIATETRANSPOSE4DBATCHMAJOR(T)                                                                            \
-    template void invokeTranspose4dBatchMajor(T*           k_dst,                                                      \
-                                              T*           v_dst,                                                      \
-                                              const T*     k_src,                                                      \
-                                              const T*     v_src,                                                      \
-                                              const int    local_batch_size,                                           \
-                                              const int    seq_len,                                                    \
-                                              const int    max_seq_len,                                                \
-                                              const int    size_per_head,                                              \
-                                              const int    local_head_num,                                             \
-                                              cudaStream_t stream)
-INSTANTIATETRANSPOSE4DBATCHMAJOR(float);
-INSTANTIATETRANSPOSE4DBATCHMAJOR(half);
-#ifdef ENABLE_BF16
-INSTANTIATETRANSPOSE4DBATCHMAJOR(__nv_bfloat16);
-#endif
-#undef INSTANTIATETRANSPOSE4DBATCHMAJOR
-
-template<typename T>
-__global__ void addRelativeAttentionBias(
-    T* qk_buf, const T* relative_attention_bias, const int batch_size, const int head_num, const int seq_len)
-{
-    for (int i = threadIdx.x; i < batch_size * seq_len; i += blockDim.x) {
-        int batch_id = i / seq_len;
-        int seq_id   = i % seq_len;
-
-        const int bias_index = blockIdx.x * seq_len + seq_id;
-        const int qk_index   = batch_id * gridDim.x * seq_len + bias_index;
-        qk_buf[qk_index]     = add(qk_buf[qk_index], relative_attention_bias[bias_index]);
-    }
-}
-
-template<typename T>
-void invokeAddRelativeAttentionBias(T*           qk_buf,
-                                    const T*     relative_attention_bias,
-                                    const int    batch_size,
-                                    const int    head_num,
-                                    const int    seq_len,
-                                    cudaStream_t stream)
-{
-    // qk_buf: [batch_size, head_num, seq_len, seq_len]
-    // relative_attention_bias: [1, head_num, seq_len, seq_len]
-    dim3 grid(head_num * seq_len);
-    dim3 block(512);
-    using T2 = typename TypeConverter<T>::Type;
-#ifdef ENABLE_BF16
-    const bool is_half2 = (std::is_same<T, half>::value || std::is_same<T, __nv_bfloat16>::value) && (seq_len % 2 == 0);
-#else
-    const bool is_half2 = (std::is_same<T, half>::value) && (seq_len % 2 == 0);
-#endif
-    if (is_half2) {
-        addRelativeAttentionBias<T2><<<grid, block, 0, stream>>>(
-            (T2*)qk_buf, (const T2*)relative_attention_bias, batch_size, head_num, seq_len / 2);
-    }
-    else {
-        addRelativeAttentionBias<<<grid, block, 0, stream>>>(
-            qk_buf, relative_attention_bias, batch_size, head_num, seq_len);
-    }
-}
-
-#define INSTANTIATEADDRELATIVEATTENTIONBIAS(T)                                                                         \
-    template void invokeAddRelativeAttentionBias(T*           qk_buf,                                                  \
-                                                 const T*     relative_attention_bias,                                 \
-                                                 const int    batch_size,                                              \
-                                                 const int    head_num,                                                \
-                                                 const int    seq_len,                                                 \
-                                                 cudaStream_t stream)
-INSTANTIATEADDRELATIVEATTENTIONBIAS(float);
-INSTANTIATEADDRELATIVEATTENTIONBIAS(half);
-#ifdef ENABLE_BF16
-INSTANTIATEADDRELATIVEATTENTIONBIAS(__nv_bfloat16);
-#endif
-#undef INSTANTIATEADDRELATIVEATTENTIONBIAS
-
-/*******************  invokeAddHead3SizeQKVBias  ***********************/
-// m = batch*window_num*window_len
-// mm_qkv is [m, head*3*size_per_head] row-major
-// bias_qkv is [head*3*size_per_head]
-// q_buf_, k_buf_, v_buf_ is [batch*window_num, num_head, window_len, size_per_head] row-major
-// grid(window_len, window_num, 3*batch);
-// block(num_head * size_per_head)
-template<typename T>
-__global__ void add_head3Size_QKV_bias(const T*  mm_qkv,
-                                       const T*  bias_qkv,
-                                       T*        q_buf_,
-                                       T*        k_buf_,
-                                       T*        v_buf_,
-                                       const int batch,
-                                       const int window_num,
-                                       const int window_len,
-                                       const int num_head,
-                                       const int size_per_head)
-{
-
-    T*  buf_ptr;
-    int qkv_id = blockIdx.z / batch;
-    if (qkv_id == 0) {
-        buf_ptr = q_buf_;
-    }
-    else if (qkv_id == 1) {
-        buf_ptr = k_buf_;
-    }
-    else {
-        buf_ptr = v_buf_;
-    }
-
-    const int batch_id   = blockIdx.z % batch;
-    const int token_id   = blockIdx.x;
-    const int window_id  = blockIdx.y;
-    const int head_id    = threadIdx.x / size_per_head;
-    const int id_in_head = threadIdx.x % size_per_head;
-
-    const int bias_idx = (head_id * 3 + qkv_id) * size_per_head + id_in_head;
-    const T   bias     = ldg(bias_qkv + bias_idx);
-
-    const int input_idx =
-        ((batch_id * window_num + window_id) * window_len + token_id) * num_head * 3 * size_per_head + bias_idx;
-    T tmp = mm_qkv[input_idx] + bias;
-
-    int target_id = (((batch_id * window_num + window_id) * num_head + head_id) * window_len + token_id) * size_per_head
-                    + id_in_head;
-    ;
-    buf_ptr[target_id] = tmp;
-}
-
-// for float2, size_per_head /= 2
-// m = batch*window_num*window_len
-// mm_qkv is [m, head*3*size_per_head] row-major
-// bias_qkv is [head*3*size_per_head]
-// q_buf_, k_buf_, v_buf_ is [batch*window_num, num_head, window_len, size_per_head] row-major
-// grid(window_len, window_num, 3*batch);
-// block(num_head * size_per_head)
-template<>
-__global__ void add_head3Size_QKV_bias(const float2* mm_qkv,
-                                       const float2* bias_qkv,
-                                       float2*       q_buf_,
-                                       float2*       k_buf_,
-                                       float2*       v_buf_,
-                                       const int     batch,
-                                       const int     window_num,
-                                       const int     window_len,
-                                       const int     num_head,
-                                       const int     size_per_head)
-{
-
-    float2* buf_ptr;
-    int     qkv_id = blockIdx.z / batch;
-    if (qkv_id == 0) {
-        buf_ptr = q_buf_;
-    }
-    else if (qkv_id == 1) {
-        buf_ptr = k_buf_;
-    }
-    else {
-        buf_ptr = v_buf_;
-    }
-
-    const int batch_id   = blockIdx.z % batch;
-    const int token_id   = blockIdx.x;
-    const int window_id  = blockIdx.y;
-    const int head_id    = threadIdx.x / size_per_head;
-    const int id_in_head = threadIdx.x % size_per_head;
-
-    const int    bias_idx = (head_id * 3 + qkv_id) * size_per_head + id_in_head;
-    const float2 bias     = ldg(bias_qkv + bias_idx);
-
-    const int input_idx =
-        ((batch_id * window_num + window_id) * window_len + token_id) * num_head * 3 * size_per_head + bias_idx;
-    float2 tmp = mm_qkv[input_idx];
-    tmp.x += bias.x;
-    tmp.y += bias.y;
-
-    int target_id = (((batch_id * window_num + window_id) * num_head + head_id) * window_len + token_id) * size_per_head
-                    + id_in_head;
-    ;
-    buf_ptr[target_id] = tmp;
-}
-
-// for half2, size_per_head /= 2
-// m = batch*window_num*window_len
-// mm_qkv is [m, head*3*size_per_head] row-major
-// bias_qkv is [head*3*size_per_head]
-// q_buf_, k_buf_, v_buf_ is [batch*window_num, num_head, window_len, size_per_head] row-major
-// grid(window_len, window_num, batch);
-// block(num_head * size_per_head)
-template<>
-__global__ void add_head3Size_QKV_bias(const half2* mm_qkv,
-                                       const half2* bias_qkv,
-                                       half2*       q_buf_,
-                                       half2*       k_buf_,
-                                       half2*       v_buf_,
-                                       const int    batch,
-                                       const int    window_num,
-                                       const int    window_len,
-                                       const int    num_head,
-                                       const int    size_per_head)
-{
-
-    const int batch_id   = blockIdx.z;
-    const int token_id   = blockIdx.x;
-    const int window_id  = blockIdx.y;
-    const int head_id    = threadIdx.x / size_per_head;
-    const int id_in_head = threadIdx.x % size_per_head;
-
-    const int input_offset =
-        ((batch_id * window_num + window_id) * window_len + token_id) * num_head * 3 * size_per_head;
-    const int target_id =
-        (((batch_id * window_num + window_id) * num_head + head_id) * window_len + token_id) * size_per_head
-        + id_in_head;
-
-    int   qkv_id      = 0;
-    int   bias_idx    = (head_id * 3 + qkv_id) * size_per_head + id_in_head;
-    half2 bias        = __ldg(bias_qkv + bias_idx);
-    int   input_idx   = input_offset + bias_idx;
-    half2 tmp         = mm_qkv[input_idx];
-    tmp               = __hadd2(tmp, bias);
-    q_buf_[target_id] = tmp;
-
-    qkv_id            = 1;
-    bias_idx          = (head_id * 3 + qkv_id) * size_per_head + id_in_head;
-    bias              = __ldg(bias_qkv + bias_idx);
-    input_idx         = input_offset + bias_idx;
-    tmp               = mm_qkv[input_idx];
-    tmp               = __hadd2(tmp, bias);
-    k_buf_[target_id] = tmp;
-
-    qkv_id            = 2;
-    bias_idx          = (head_id * 3 + qkv_id) * size_per_head + id_in_head;
-    bias              = __ldg(bias_qkv + bias_idx);
-    input_idx         = input_offset + bias_idx;
-    tmp               = mm_qkv[input_idx];
-    tmp               = __hadd2(tmp, bias);
-    v_buf_[target_id] = tmp;
-}
-
-#ifdef ENABLE_BF16
-template<>
-__global__ void add_head3Size_QKV_bias(const __nv_bfloat162* mm_qkv,
-                                       const __nv_bfloat162* bias_qkv,
-                                       __nv_bfloat162*       q_buf_,
-                                       __nv_bfloat162*       k_buf_,
-                                       __nv_bfloat162*       v_buf_,
-                                       const int             batch,
-                                       const int             window_num,
-                                       const int             window_len,
-                                       const int             num_head,
-                                       const int             size_per_head)
-{
-
-    const int batch_id   = blockIdx.z;
-    const int token_id   = blockIdx.x;
-    const int window_id  = blockIdx.y;
-    const int head_id    = threadIdx.x / size_per_head;
-    const int id_in_head = threadIdx.x % size_per_head;
-
-    const int input_offset =
-        ((batch_id * window_num + window_id) * window_len + token_id) * num_head * 3 * size_per_head;
-    const int target_id =
-        (((batch_id * window_num + window_id) * num_head + head_id) * window_len + token_id) * size_per_head
-        + id_in_head;
-
-    int            qkv_id    = 0;
-    int            bias_idx  = (head_id * 3 + qkv_id) * size_per_head + id_in_head;
-    __nv_bfloat162 bias      = ldg(bias_qkv + bias_idx);
-    int            input_idx = input_offset + bias_idx;
-    __nv_bfloat162 tmp       = mm_qkv[input_idx];
-    tmp                      = bf16hadd2(tmp, bias);
-    q_buf_[target_id]        = tmp;
-
-    qkv_id            = 1;
-    bias_idx          = (head_id * 3 + qkv_id) * size_per_head + id_in_head;
-    bias              = ldg(bias_qkv + bias_idx);
-    input_idx         = input_offset + bias_idx;
-    tmp               = mm_qkv[input_idx];
-    tmp               = bf16hadd2(tmp, bias);
-    k_buf_[target_id] = tmp;
-
-    qkv_id            = 2;
-    bias_idx          = (head_id * 3 + qkv_id) * size_per_head + id_in_head;
-    bias              = ldg(bias_qkv + bias_idx);
-    input_idx         = input_offset + bias_idx;
-    tmp               = mm_qkv[input_idx];
-    tmp               = bf16hadd2(tmp, bias);
-    v_buf_[target_id] = tmp;
-}
-#endif
-
-template<typename T>
-void invokeAddHead3SizeQKVBias(const T*     mm_qkv,
-                               const T*     bias_qkv,
-                               T*           q_buf_,
-                               T*           k_buf_,
-                               T*           v_buf_,
-                               const int    batch,
-                               const int    window_num,
-                               const int    window_len,
-                               const int    num_head,
-                               const int    size_per_head,
-                               cudaStream_t stream)
-{
-    if (std::is_same<T, float>::value) {
-        dim3 grid(window_len, window_num, 3 * batch);
-        dim3 block(num_head * size_per_head);
-
-        if (block.x < 1024) {
-            add_head3Size_QKV_bias<<<grid, block, 0, stream>>>(
-                mm_qkv, bias_qkv, q_buf_, k_buf_, v_buf_, batch, window_num, window_len, num_head, size_per_head);
-        }
-        else if ((block.x % 2 == 0) && (block.x / 2 < 1024)) {
-            block.x /= 2;
-            add_head3Size_QKV_bias<<<grid, block, 0, stream>>>((const float2*)mm_qkv,
-                                                               (const float2*)bias_qkv,
-                                                               (float2*)q_buf_,
-                                                               (float2*)k_buf_,
-                                                               (float2*)v_buf_,
-                                                               batch,
-                                                               window_num,
-                                                               window_len,
-                                                               num_head,
-                                                               size_per_head / 2);
-        }
-        else {
-            printf("[ERROR][invokeAddHead3SizeQKVBias] unsupported block.x!\n");
-            exit(-1);
-        }
-    }
-#ifdef ENABLE_BF16
-    else if (std::is_same<T, half>::value || std::is_same<T, __nv_bfloat16>::value) {
-#else
-    else if (std::is_same<T, half>::value) {
-#endif
-        dim3 grid(window_len, window_num, batch);
-        dim3 block(num_head * size_per_head / 2);
-
-        using T2 = typename TypeConverter<T>::Type;  // half2 or bfloat16
-
-        if (block.x > 1024) {
-            printf("[ERROR][invokeAddHead3SizeQKVBias] block.x > 1024!\n");
-            exit(-1);
-        }
-
-        add_head3Size_QKV_bias<<<grid, block, 0, stream>>>((const T2*)mm_qkv,
-                                                           (const T2*)bias_qkv,
-                                                           (T2*)q_buf_,
-                                                           (T2*)k_buf_,
-                                                           (T2*)v_buf_,
-                                                           batch,
-                                                           window_num,
-                                                           window_len,
-                                                           num_head,
-                                                           size_per_head / 2);
-    }
-}
-
-#define INSTANTIATEADDHEAD3SIZEQKVBIAS(T)                                                                              \
-    template void invokeAddHead3SizeQKVBias<T>(const T*     mm_qkv,                                                    \
-                                               const T*     bias_qkv,                                                  \
-                                               T*           q_buf_,                                                    \
-                                               T*           k_buf_,                                                    \
-                                               T*           v_buf_,                                                    \
-                                               const int    batch,                                                     \
-                                               const int    window_num,                                                \
-                                               const int    window_len,                                                \
-                                               const int    num_head,                                                  \
-                                               const int    size_per_head,                                             \
-                                               cudaStream_t stream)
-INSTANTIATEADDHEAD3SIZEQKVBIAS(float);
-INSTANTIATEADDHEAD3SIZEQKVBIAS(half);
-#ifdef ENABLE_BF16
-INSTANTIATEADDHEAD3SIZEQKVBIAS(__nv_bfloat16);
-#endif
-#undef INSTANTIATEADDHEAD3SIZEQKVBIAS
-
-/*******************  invokeMaskedSoftMaxWithRelPosBias  ***********************/
-
-// grid = (window_len/word_per_thread, window_num*num_head, batch_size)
-// block.x = max(32, (window_len + 31)/32*32)
-// qk_buf is [batch, window_num, num_head, window_len, window_len]
-// attn_mask is [window_num, window_len, window_len] + row-major
-// relative_pos_bias is [num_head, window_len, window_len] + row-majot
-template<typename T>
-__global__ void softmax_withRelPosBias_element1_kernel(T*          qk_buf,
-                                                       const T*    attn_mask,
-                                                       const T*    relative_pos_bias,
-                                                       const int   batch_size,
-                                                       const int   num_head,
-                                                       const int   window_num,
-                                                       const int   window_len,
-                                                       const int   window_len_x_window_len,
-                                                       const float qk_scale)
-{
-
-    bool qual = threadIdx.x < window_len;
-    for (int window_id = blockIdx.x; window_id < window_len; window_id += gridDim.x) {
-        float            tmp = -1e20f;
-        __shared__ float s_mean, s_max;
-        int              qk_offset;
-        if (qual) {
-            const int offset_in_window = window_id * window_len + threadIdx.x;
-            qk_offset = (blockIdx.z * gridDim.y + blockIdx.y) * window_len_x_window_len + offset_in_window;
-            const int relative_pos_bias_offset = (blockIdx.y % num_head) * window_len_x_window_len + offset_in_window;
-            float     mask_val =
-                (attn_mask == nullptr) ?
-                        0.0f :
-                        static_cast<float>(
-                        ldg(attn_mask + ((blockIdx.y / num_head) * window_len_x_window_len + offset_in_window)));
-            tmp = qk_scale * static_cast<float>(qk_buf[qk_offset]) + mask_val
-                  + static_cast<float>(ldg(relative_pos_bias + relative_pos_bias_offset));
-        }
-
-        float max_val = blockReduceMax<float>(tmp);
-        if (threadIdx.x == 0) {
-            s_max = max_val;
-        }
-        __syncthreads();
-
-        float qk_tmp  = qual ? __expf(tmp - s_max) : 0.0f;
-        float sum_val = blockReduceSum<float>(qk_tmp);
-        if (threadIdx.x == 0) {
-            s_mean = sum_val + 1e-6f;
-            s_mean = __fdividef(1.0f, s_mean);
-        }
-        __syncthreads();
-        if (qual) {
-            qk_buf[qk_offset] = (T)(qk_tmp * s_mean);
-        }
-    }
-}
-
-// grid = (window_len/word_per_thread, window_num*num_head, batch_size)
-// block.x = max(32, (window_len/2 + 31)/32*32)
-// qk_buf is [batch, window_num, num_head, window_len, window_len]
-// attn_mask is [window_num, window_len, window_len] + row-major
-// relative_pos_bias is [num_head, window_len, window_len] + row-majot
-template<typename T2, typename T>
-__global__ void softmax_withRelPosBias_element2_kernel(T2*         qk_buf,
-                                                       const T2*   attn_mask,
-                                                       const T2*   relative_pos_bias,
-                                                       const int   batch_size,
-                                                       const int   num_head,
-                                                       const int   window_num,
-                                                       const int   window_len,
-                                                       const int   window_len_x_window_len,
-                                                       const float qk_scale)
-{
-    const int window_len_2 = window_len / 2;
-    const int tidx         = threadIdx.x;
-    bool      qual         = tidx < window_len_2;
-    const T2  zero         = {T(0.0f), T(0.0f)};
-    const int bdim         = blockDim.x;
-    for (int window_id = blockIdx.x; window_id < window_len; window_id += gridDim.x) {
-        float            tmp = -1e20f;
-        __shared__ float s_mean, s_max;
-        int              qk_offset;
-        float2           local_qk_val;
-        T2               qk_val;
-        if (qual) {
-            const int offset_in_window = window_id * window_len + 2 * tidx;
-            qk_offset = ((blockIdx.z * gridDim.y + blockIdx.y) * window_len_x_window_len + offset_in_window) / 2;
-            const int relative_pos_bias_offset =
-                ((blockIdx.y % num_head) * window_len_x_window_len + offset_in_window) / 2;
-            T2 mask_val =
-                (attn_mask == nullptr) ?
-                    zero :
-                    ldg(attn_mask + ((blockIdx.y / num_head) * window_len_x_window_len + offset_in_window) / 2);
-            qk_val            = qk_buf[qk_offset];
-            local_qk_val.x    = static_cast<float>(qk_val.x);
-            local_qk_val.y    = static_cast<float>(qk_val.y);
-            const T2 bias_val = ldg(relative_pos_bias + relative_pos_bias_offset);
-            local_qk_val.x =
-                qk_scale * local_qk_val.x + static_cast<float>(mask_val.x) + static_cast<float>(bias_val.x);
-            local_qk_val.y =
-                qk_scale * local_qk_val.y + static_cast<float>(mask_val.y) + static_cast<float>(bias_val.y);
-            tmp = local_qk_val.x > local_qk_val.y ? local_qk_val.x : local_qk_val.y;
-        }
-
-        float max_val = bdim <= 32 ? warpReduceMax<float>(tmp) : blockReduceMax<float>(tmp);
-        if (tidx == 0) {
-            s_max = max_val;
-        }
-        __syncthreads();
-
-        local_qk_val.x = qual ? __expf(local_qk_val.x - s_max) : 0.0f;
-        local_qk_val.y = qual ? __expf(local_qk_val.y - s_max) : 0.0f;
-
-        float sum_val = bdim <= 32 ? warpReduceSum<float>(local_qk_val.x + local_qk_val.y) :
-                                     blockReduceSum<float>(local_qk_val.x + local_qk_val.y);
-        if (tidx == 0) {
-            s_mean = sum_val + 1e-6f;
-            s_mean = __fdividef(1.0f, s_mean);
-        }
-        __syncthreads();
-        if (qual) {
-            local_qk_val.x    = local_qk_val.x * s_mean;
-            local_qk_val.y    = local_qk_val.y * s_mean;
-            qk_val.x          = T(local_qk_val.x);
-            qk_val.y          = T(local_qk_val.y);
-            qk_buf[qk_offset] = qk_val;
-        }
-    }
-}
-
-// grid = (window_len/word_per_thread, window_num*num_head, batch_size)
-// block.x = max(32, (window_len/4 + 31)/32*32)
-// qk_buf is [batch, window_num, num_head, window_len, window_len]
-// attn_mask is [window_num, window_len, window_len] + row-major
-// relative_pos_bias is [num_head, window_len, window_len] + row-majot
-template<typename T4, typename T>
-__global__ void softmax_withRelPosBias_element4_kernel(T4*         qk_buf,
-                                                       const T4*   attn_mask,
-                                                       const T4*   relative_pos_bias,
-                                                       const int   batch_size,
-                                                       const int   num_head,
-                                                       const int   window_num,
-                                                       const int   window_len,
-                                                       const int   window_len_x_window_len,
-                                                       const float qk_scale)
-{
-    const int window_len_4 = window_len / 4;
-    const int tidx         = threadIdx.x;
-    bool      qual         = tidx < window_len_4;
-    const T4  zero         = {T(0.0f), T(0.0f), T(0.0f), T(0.0f)};
-    const int bdim         = blockDim.x;
-    for (int window_id = blockIdx.x; window_id < window_len; window_id += gridDim.x) {
-        float            tmp = -1e20f;
-        __shared__ float s_mean, s_max;
-        int              qk_offset;
-        float4           local_qk_val;
-        T4               qk_val;
-        if (qual) {
-            const int offset_in_window = window_id * window_len + 4 * tidx;
-            qk_offset = ((blockIdx.z * gridDim.y + blockIdx.y) * window_len_x_window_len + offset_in_window) / 4;
-            const int relative_pos_bias_offset =
-                ((blockIdx.y % num_head) * window_len_x_window_len + offset_in_window) / 4;
-            T4 mask_val       = (attn_mask == nullptr) ?
-                                    zero :
-                                    attn_mask[((blockIdx.y / num_head) * window_len_x_window_len + offset_in_window) / 4];
-            qk_val            = qk_buf[qk_offset];
-            local_qk_val.x    = static_cast<float>(qk_val.x);
-            local_qk_val.y    = static_cast<float>(qk_val.y);
-            local_qk_val.z    = static_cast<float>(qk_val.z);
-            local_qk_val.w    = static_cast<float>(qk_val.w);
-            const T4 bias_val = relative_pos_bias[relative_pos_bias_offset];
-            local_qk_val.x =
-                qk_scale * local_qk_val.x + static_cast<float>(mask_val.x) + static_cast<float>(bias_val.x);
-            local_qk_val.y =
-                qk_scale * local_qk_val.y + static_cast<float>(mask_val.y) + static_cast<float>(bias_val.y);
-            local_qk_val.z =
-                qk_scale * local_qk_val.z + static_cast<float>(mask_val.z) + static_cast<float>(bias_val.z);
-            local_qk_val.w =
-                qk_scale * local_qk_val.w + static_cast<float>(mask_val.w) + static_cast<float>(bias_val.w);
-            tmp = local_qk_val.x > local_qk_val.y ? local_qk_val.x : local_qk_val.y;
-            tmp = tmp > local_qk_val.z ? tmp : local_qk_val.z;
-            tmp = tmp > local_qk_val.w ? tmp : local_qk_val.w;
-        }
-
-        float max_val = bdim <= 32 ? warpReduceMax<float>(tmp) : blockReduceMax<float>(tmp);
-        if (tidx == 0) {
-            s_max = max_val;
-        }
-        __syncthreads();
-
-        local_qk_val.x = qual ? __expf(local_qk_val.x - s_max) : 0.0f;
-        local_qk_val.y = qual ? __expf(local_qk_val.y - s_max) : 0.0f;
-        local_qk_val.z = qual ? __expf(local_qk_val.z - s_max) : 0.0f;
-        local_qk_val.w = qual ? __expf(local_qk_val.w - s_max) : 0.0f;
-
-        float sum_val = bdim <= 32 ?
-                            warpReduceSum<float>(local_qk_val.x + local_qk_val.y + local_qk_val.z + local_qk_val.w) :
-                            blockReduceSum<float>(local_qk_val.x + local_qk_val.y + local_qk_val.z + local_qk_val.w);
-        if (tidx == 0) {
-            s_mean = sum_val + 1e-6f;
-            s_mean = __fdividef(1.0f, s_mean);
-        }
-        __syncthreads();
-        if (qual) {
-            local_qk_val.x    = local_qk_val.x * s_mean;
-            local_qk_val.y    = local_qk_val.y * s_mean;
-            local_qk_val.z    = local_qk_val.z * s_mean;
-            local_qk_val.w    = local_qk_val.w * s_mean;
-            qk_val.x          = T(local_qk_val.x);
-            qk_val.y          = T(local_qk_val.y);
-            qk_val.z          = T(local_qk_val.z);
-            qk_val.w          = T(local_qk_val.w);
-            qk_buf[qk_offset] = qk_val;
-        }
-    }
-}
-
-template<typename T>
-void invokeMaskedSoftMaxWithRelPosBias(T*           qk_buf,
-                                       const T*     attn_mask,
-                                       const T*     relative_pos_bias,
-                                       const int    batch_size,
-                                       const int    num_head,
-                                       const int    window_num,
-                                       const int    window_len,
-                                       float        qk_scale,
-                                       cudaStream_t stream)
-{
-    const int word_per_thread = 1;
-    dim3      grid((window_len + word_per_thread - 1) / word_per_thread, window_num * num_head, batch_size);
-    if ((window_len % 4 == 0) && window_len / 4 >= 32) {
-        dim3 block((window_len / 4 + 31) / 32 * 32);
-        if (std::is_same<T, float>::value) {
-            softmax_withRelPosBias_element4_kernel<float4, float>
-                <<<grid, block, 0, stream>>>((float4*)qk_buf,
-                                             (const float4*)attn_mask,
-                                             (const float4*)relative_pos_bias,
-                                             batch_size,
-                                             num_head,
-                                             window_num,
-                                             window_len,
-                                             window_len * window_len,
-                                             qk_scale);
-        }
-        else if (std::is_same<T, half>::value) {
-            softmax_withRelPosBias_element4_kernel<half4, half>
-                <<<grid, block, 0, stream>>>((half4*)qk_buf,
-                                             (const half4*)attn_mask,
-                                             (const half4*)relative_pos_bias,
-                                             batch_size,
-                                             num_head,
-                                             window_num,
-                                             window_len,
-                                             window_len * window_len,
-                                             qk_scale);
-        }
-#ifdef ENABLE_BF16
-        else {
-            dim3 block((window_len + 31) / 32 * 32);
-            softmax_withRelPosBias_element1_kernel<<<grid, block, 0, stream>>>(qk_buf,
-                                                                               attn_mask,
-                                                                               relative_pos_bias,
-                                                                               batch_size,
-                                                                               num_head,
-                                                                               window_num,
-                                                                               window_len,
-                                                                               window_len * window_len,
-                                                                               qk_scale);
-        }
-#endif
-    }
-    else if (window_len % 2 == 0) {
-        dim3 block((window_len / 2 + 31) / 32 * 32);
-        if (std::is_same<T, float>::value) {
-            softmax_withRelPosBias_element2_kernel<float2, float>
-                <<<grid, block, 0, stream>>>((float2*)qk_buf,
-                                             (const float2*)attn_mask,
-                                             (const float2*)relative_pos_bias,
-                                             batch_size,
-                                             num_head,
-                                             window_num,
-                                             window_len,
-                                             window_len * window_len,
-                                             qk_scale);
-        }
-        else if (std::is_same<T, half>::value) {
-            printf("============xiabo_test %s:%d\n", __FILE__,__LINE__);
-            softmax_withRelPosBias_element2_kernel<half2, half>
-                <<<grid, block, 0, stream>>>((half2*)qk_buf,
-                                             (const half2*)attn_mask,
-                                             (const half2*)relative_pos_bias,
-                                             batch_size,
-                                             num_head,
-                                             window_num,
-                                             window_len,
-                                             window_len * window_len,
-                                             qk_scale);
-        }
-#ifdef ENABLE_BF16
-        else {
-            dim3 block((window_len + 31) / 32 * 32);
-            softmax_withRelPosBias_element1_kernel<<<grid, block, 0, stream>>>(qk_buf,
-                                                                               attn_mask,
-                                                                               relative_pos_bias,
-                                                                               batch_size,
-                                                                               num_head,
-                                                                               window_num,
-                                                                               window_len,
-                                                                               window_len * window_len,
-                                                                               qk_scale);
-        }
-#endif
-    }
-    else {
-        dim3 block((window_len + 31) / 32 * 32);
-        softmax_withRelPosBias_element1_kernel<<<grid, block, 0, stream>>>(qk_buf,
-                                                                           attn_mask,
-                                                                           relative_pos_bias,
-                                                                           batch_size,
-                                                                           num_head,
-                                                                           window_num,
-                                                                           window_len,
-                                                                           window_len * window_len,
-                                                                           qk_scale);
-    }
-}
-
-#define INSTANTIATEMASKEDSOFTMAXWITHRELPOSBIAS(T)                                                                      \
-    template void invokeMaskedSoftMaxWithRelPosBias(T*           qk_buf,                                               \
-                                                    const T*     attn_mask,                                            \
-                                                    const T*     relative_pos_bias,                                    \
-                                                    const int    batch_size,                                           \
-                                                    const int    num_head,                                             \
-                                                    const int    window_num,                                           \
-                                                    const int    window_len,                                           \
-                                                    const float  qk_scale,                                             \
-                                                    cudaStream_t stream)
-INSTANTIATEMASKEDSOFTMAXWITHRELPOSBIAS(float);
-INSTANTIATEMASKEDSOFTMAXWITHRELPOSBIAS(half);
-#ifdef ENABLE_BF16
-INSTANTIATEMASKEDSOFTMAXWITHRELPOSBIAS(__nv_bfloat16);
-#endif
-#undef INSTANTIATEMASKEDSOFTMAXWITHRELPOSBIAS
-
-template<typename T>
-__global__ void transpose_attentions(
-    T* attentions_out, const T* attentions_in, size_t batch_size, size_t num_layers, size_t num_heads, size_t seq_len)
-{
-    // attentions_in  shape [B, H, S, S]
-    // attentions_out shape [B, L, H, S, S].
-    // Note that we write the L dimension as if it was index 0.
-    // In reality, the pointer has already been shifted to point to the correct layer.
-
-    const auto batch_idx = blockIdx.x;
-    const auto head_idx  = blockIdx.y;
-
-    const auto dst_offset = (batch_idx * num_layers * num_heads + head_idx) * seq_len * seq_len;
-    const auto src_offset = (batch_idx * num_heads + head_idx) * seq_len * seq_len;
-
-    for (auto x = threadIdx.x; x < seq_len * seq_len; x += blockDim.x) {
-        attentions_out[dst_offset + x] = attentions_in[src_offset + x];
-    }
-}
-
-template<typename T>
-void invokeTransposeAttentions(Tensor& attentions_out, const Tensor& attentions_in, cudaStream_t stream)
-{
-    const size_t batch_size = attentions_in.shape[0];
-    const size_t num_heads  = attentions_in.shape[1];
-    const size_t seq_len    = attentions_in.shape[2];
-    const size_t num_layers = attentions_out.shape[1];
-
-    const dim3 gridSize(batch_size, num_heads);
-    const dim3 blockSize(512);
-
-    transpose_attentions<<<gridSize, blockSize, 0, stream>>>(
-        attentions_out.getPtr<T>(), attentions_in.getPtr<const T>(), batch_size, num_layers, num_heads, seq_len);
-}
-
-#define INSTANTIATETRANSPOSEATTENTIONS(T)                                                                              \
-    template void invokeTransposeAttentions<T>(                                                                        \
-        Tensor & attentions_out, const Tensor& attentions_in, cudaStream_t stream)
-INSTANTIATETRANSPOSEATTENTIONS(float);
-INSTANTIATETRANSPOSEATTENTIONS(half);
-#ifdef ENABLE_BF16
-INSTANTIATETRANSPOSEATTENTIONS(__nv_bfloat16);
-#endif
-#undef INSTANTIATETRANSPOSEATTENTIONS
-
-}  // namespace turbomind
diff --git a/src/turbomind/kernels/unfused_attention_kernels.h b/src/turbomind/kernels/unfused_attention_kernels.h
deleted file mode 100644
index b5c37b5d48e65ecb306f5e381437752b03595114..0000000000000000000000000000000000000000
--- a/src/turbomind/kernels/unfused_attention_kernels.h
+++ /dev/null
@@ -1,147 +0,0 @@
-/*
- * Copyright (c) 2020-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#pragma once
-
-#include "src/turbomind/utils/Tensor.h"
-
-namespace turbomind {
-
-template<typename T, typename T_IN>
-struct MaskedSoftmaxParam {
-    // Common parameters.
-    T*          attention_score = nullptr;  // (batch_size, head_num, q_length, k_length)
-    const T_IN* qk              = nullptr;  // (batch_size, head_num, q_length, k_length)
-    const T*    attention_mask  = nullptr;  // (batch_size, q_length, k_length)
-    int         batch_size      = 0;
-    int         q_length        = 0;
-    int         k_length        = 0;
-    int         num_heads       = 0;
-    T           qk_scale        = T(0.0f);
-
-    // Optional parameters that depend on the type of attention.
-    // The slopes of the linear position bias of ALiBi.
-    const T* linear_bias_slopes = nullptr;  // (head_num,), optional
-};
-
-template<typename T, typename T_IN>
-void invokeMaskedSoftmax(MaskedSoftmaxParam<T, T_IN>& param, cudaStream_t stream);
-
-template<typename T>
-void invokeTransposeQKV(T*           dst,
-                        T*           src,
-                        const int    batch_size,
-                        const int    seq_len,
-                        const int    head_num,
-                        const int    size_per_head,
-                        const float* scale,
-                        const int    int8_mode,
-                        cudaStream_t stream);
-
-template<typename T>
-void invokeTransposeAttentionOutRemovePadding(T*           src,
-                                              T*           dst,
-                                              const int    valid_word_num,
-                                              const int    batch_size,
-                                              const int    seq_len,
-                                              const int    head_num,
-                                              const int    size_per_head,
-                                              const int*   mask_offset,
-                                              const float* scale,
-                                              const int    int8_mode,
-                                              cudaStream_t stream);
-
-template<typename T>
-void invokeAddFusedQKVBiasTranspose(T*           q_buf,
-                                    T*           k_buf,
-                                    T*           v_buf,
-                                    T*           QKV,
-                                    const T*     qkv_bias,
-                                    const int*   padding_offset,
-                                    const int*   history_length,
-                                    const int*   input_length,
-                                    const int    batch_size,
-                                    const int    seq_len,
-                                    const int    token_num,
-                                    const int    head_num,
-                                    const int    kv_head_num,
-                                    const int    size_per_head,
-                                    const int    rotary_embedding_dim,
-                                    float        rotary_embedding_base,
-                                    int          max_position_embeddings,
-                                    bool         use_dynamic_ntk,
-                                    bool         use_logn_attn,
-                                    cudaStream_t stream);
-
-template<typename T>
-void invokeTranspose4d(T*           dst,
-                       T*           src,
-                       const int    local_batch_size,
-                       const int    seq_len,
-                       const int    size_per_head,
-                       const int    local_hidden_units,
-                       const int    local_head_num,
-                       const int    batch_size,
-                       const int    ite,
-                       cudaStream_t stream);
-
-template<typename T>
-void invokeTranspose4dBatchMajor(T*           k_dst,
-                                 T*           v_dst,
-                                 const T*     k_src,
-                                 const T*     v_src,
-                                 const int    local_batch_size,
-                                 const int    seq_len,
-                                 const int    max_seq_len,
-                                 const int    size_per_head,
-                                 const int    local_head_num,
-                                 cudaStream_t stream);
-
-template<typename T>
-void invokeAddRelativeAttentionBias(T*           qk_buf,
-                                    const T*     relative_attention_bias,
-                                    const int    batch_size,
-                                    const int    head_num,
-                                    const int    seq_len,
-                                    cudaStream_t stream);
-
-template<typename T>
-void invokeAddHead3SizeQKVBias(const T*     mm_qkv,
-                               const T*     bias_qkv,
-                               T*           q_buf_,
-                               T*           k_buf_,
-                               T*           v_buf_,
-                               const int    batch,
-                               const int    window_num,
-                               const int    window_len,
-                               const int    head_num,
-                               const int    size_per_head,
-                               cudaStream_t stream);
-
-template<typename T>
-void invokeMaskedSoftMaxWithRelPosBias(T*           qk_buf,
-                                       const T*     attn_mask,
-                                       const T*     relative_pos_bias,
-                                       const int    batch_size,
-                                       const int    num_head,
-                                       const int    window_num,
-                                       const int    window_len,
-                                       const float  qk_scale,
-                                       cudaStream_t stream);
-
-template<typename T>
-void invokeTransposeAttentions(Tensor& attentions_out, const Tensor& attentions_in, cudaStream_t stream = 0);
-
-}  // namespace turbomind
diff --git a/src/turbomind/layers/BaseLayer.h b/src/turbomind/layers/BaseLayer.h
deleted file mode 100644
index fcb0ef37ccf5c083daf00f5f3e75c090fc8bb092..0000000000000000000000000000000000000000
--- a/src/turbomind/layers/BaseLayer.h
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include <assert.h>
-
-#include "src/turbomind/utils/Tensor.h"
-#include "src/turbomind/utils/allocator.h"
-#include "src/turbomind/utils/cublasMMWrapper.h"
-
-namespace turbomind {
-
-class BaseLayer {
-public:
-    BaseLayer(cudaStream_t     stream,
-              cublasMMWrapper* cublas_wrapper,
-              IAllocator*      allocator,
-              bool             is_free_buffer_after_forward,
-              cudaDeviceProp*  cuda_device_prop = nullptr,
-              bool             sparse           = false):
-        stream_(stream),
-        cublas_wrapper_(cublas_wrapper),
-        allocator_(allocator),
-        cuda_device_prop_(cuda_device_prop),
-        is_free_buffer_after_forward_(is_free_buffer_after_forward),
-        sparse_(sparse){};
-    virtual ~BaseLayer() = default;
-
-    virtual cudaStream_t getStream()
-    {
-        return stream_;
-    }
-
-    virtual void setStream(cudaStream_t stream)
-    {
-        stream_ = stream;
-    }
-
-protected:
-    virtual void allocateBuffer() = 0;
-    virtual void freeBuffer()     = 0;
-
-    // device environments
-    cudaStream_t     stream_;
-    cublasMMWrapper* cublas_wrapper_;
-    IAllocator*      allocator_;
-    cudaDeviceProp*  cuda_device_prop_ = nullptr;
-
-    bool is_free_buffer_after_forward_;
-    bool is_allocate_buffer_ = false;  // TODO (bhsueh) to be deprecated
-    bool sparse_;
-};
-
-}  // namespace turbomind
diff --git a/src/turbomind/layers/CMakeLists.txt b/src/turbomind/layers/CMakeLists.txt
deleted file mode 100644
index 7ba21cec75bfc52562f6bee63cd9e574d6a29bd9..0000000000000000000000000000000000000000
--- a/src/turbomind/layers/CMakeLists.txt
+++ /dev/null
@@ -1,26 +0,0 @@
-# Copyright (c) 2019-2023, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-cmake_minimum_required(VERSION 3.8)
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC")
-set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -fPIC")
-add_subdirectory(sampling_layers)
-
-#find_package(CUDAToolkit REQUIRED)
-find_package(CUDA REQUIRED)
-add_library(DynamicDecodeLayer STATIC DynamicDecodeLayer.cc)
-#set_property(TARGET DynamicDecodeLayer PROPERTY POSITION_INDEPENDENT_CODE  ON)
-#set_property(TARGET DynamicDecodeLayer PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS  ON)
-target_link_libraries(DynamicDecodeLayer PUBLIC cudart TopKSamplingLayer
-        TopPSamplingLayer ban_bad_words stop_criteria gpt_kernels tensor nvtx_utils)
diff --git a/src/turbomind/layers/DenseWeight.h b/src/turbomind/layers/DenseWeight.h
deleted file mode 100644
index ba27764d38ce547bbfc641428977d39f5cece977..0000000000000000000000000000000000000000
--- a/src/turbomind/layers/DenseWeight.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-#include "src/turbomind/utils/cuda_fp8_utils.h"
-#include "stdlib.h"
-#include <cstdint>
-
-namespace turbomind {
-
-// Note that the int8 mode of BERT and GPT are different.
-// For int8 mode = 2 on GPT:
-// scale (gemm input scale): quantize input of GEMM (float/half) in the int8 range. Namely, int8_x = scale * x
-// scale_inter: (gemm output scale) / (gemm input scale * gemm weight scale)
-// scale_out: 1 / (gemm output scale), dequantize activation from int8 range to float/half.
-template<typename T1, typename T2 = T1>
-struct DenseWeight {
-    const T1* kernel    = nullptr;
-    const T2* bias      = nullptr;
-    const T1* fp8_bias  = nullptr;
-    const T1* sp_kernel = nullptr;
-    // for int8 kernel
-    const int8_t* int8_kernel             = nullptr;
-    const float*  scale                   = nullptr;
-    const T2*     weight_only_quant_scale = nullptr;
-    const T2*     moe_scale               = nullptr;
-    const float*  scale_inter             = nullptr;
-    const float*  scale_out               = nullptr;
-
-    // FP8 scales
-    // scale = AMAX(tensor) / FP8_MAX
-    // During GEMM, A (original) = A_scaled (fp8) * "scale of A"
-    const float* input_scale      = nullptr;  // a scalar
-    const float* input_scale_inv  = nullptr;  // a scalar
-    const float* weight_scale     = nullptr;  // a scalar or a vector
-    const float* weight_scale_inv = nullptr;  // a scalar or a vector
-    const float* output_scale     = nullptr;  // a scalar
-    const float* output_scale_inv = nullptr;  // a scalar
-    // host pointer of scales, all are scalars
-    const float* input_h_scale      = nullptr;
-    const float* input_h_scale_inv  = nullptr;
-    const float* weight_h_scale     = nullptr;
-    const float* weight_h_scale_inv = nullptr;
-    const float* output_h_scale     = nullptr;
-    const float* output_h_scale_inv = nullptr;
-
-    // TODO(bhsueh) check do we need this param
-    const float* per_channel_scale_min =
-        nullptr;  // = min(weight_scale), used to adjust the scaling of per channel scaling
-
-    bool fuse_gemm_bias = false;
-};
-
-}  // namespace turbomind
diff --git a/src/turbomind/layers/DynamicDecodeBaseLayer.h b/src/turbomind/layers/DynamicDecodeBaseLayer.h
deleted file mode 100644
index d3665a01c31a389ae83fbfaafdf3ab9155b0ff80..0000000000000000000000000000000000000000
--- a/src/turbomind/layers/DynamicDecodeBaseLayer.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include <string>
-#include <unordered_map>
-
-#include "src/turbomind/layers/BaseLayer.h"
-
-namespace turbomind {
-
-class DynamicDecodeBaseLayer: public BaseLayer {
-protected:
-    virtual void allocateBuffer() = 0;
-    virtual void freeBuffer()     = 0;
-
-public:
-    DynamicDecodeBaseLayer(cudaStream_t     stream,
-                           cublasMMWrapper* cublas_wrapper,
-                           IAllocator*      allocator,
-                           bool             is_free_buffer_after_forward,
-                           cudaDeviceProp*  cuda_device_prop):
-        BaseLayer(stream, cublas_wrapper, allocator, is_free_buffer_after_forward, cuda_device_prop){};
-    ~DynamicDecodeBaseLayer() = default;
-    DynamicDecodeBaseLayer(DynamicDecodeBaseLayer const& dynamic_decode_layer): BaseLayer(dynamic_decode_layer){};
-
-    virtual void setup(const size_t batch_size, const size_t beam_width, TensorMap* runtime_args) = 0;
-    virtual void forward(std::vector<turbomind::Tensor>*       output_tensors,
-                         const std::vector<turbomind::Tensor>* input_tensors)                     = 0;
-    virtual void forward(std::unordered_map<std::string, Tensor>*       output_tensors,
-                         const std::unordered_map<std::string, Tensor>* input_tensors)            = 0;
-    virtual void forward(TensorMap* output_tensors, TensorMap* input_tensors)                     = 0;
-};
-
-}  // namespace turbomind
diff --git a/src/turbomind/layers/DynamicDecodeLayer.cc b/src/turbomind/layers/DynamicDecodeLayer.cc
deleted file mode 100644
index b4932eae2bb991ea8dcf374203e34f38bc2a6c33..0000000000000000000000000000000000000000
--- a/src/turbomind/layers/DynamicDecodeLayer.cc
+++ /dev/null
@@ -1,387 +0,0 @@
-/*
- * Copyright (c) 2022-2022, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "src/turbomind/layers/DynamicDecodeLayer.h"
-#include "src/turbomind/kernels/ban_bad_words.h"
-#include "src/turbomind/kernels/stop_criteria_kernels.h"
-#include "src/turbomind/layers/sampling_layers/TopKSamplingLayer.h"
-#include "src/turbomind/layers/sampling_layers/TopPSamplingLayer.h"
-#include "src/turbomind/macro.h"
-#include "src/turbomind/utils/cuda_utils.h"
-
-namespace turbomind {
-
-template<typename T>
-void DynamicDecodeLayer<T>::allocateBuffer()
-{
-    TM_LOG_DEBUG(__PRETTY_FUNCTION__);
-    h_pinned_finished_sum_ = (int*)allocator_->reMalloc(h_pinned_finished_sum_, sizeof(int), true, true);
-    return;
-}
-
-template<typename T>
-void DynamicDecodeLayer<T>::freeBuffer()
-{
-    TM_LOG_DEBUG(__PRETTY_FUNCTION__);
-    allocator_->free((void**)(&h_pinned_finished_sum_), true);
-    return;
-}
-
-template<typename T>
-void DynamicDecodeLayer<T>::initialize()
-{
-    TM_LOG_DEBUG(__PRETTY_FUNCTION__);
-
-    topk_decode_ = new TopKSamplingLayer<T>(0,
-                                            vocab_size_,
-                                            vocab_size_padded_,
-                                            0,     // end_id, deprecated
-                                            0,     // top_k_, deprecated
-                                            0,     // random_seed_, deprecated
-                                            1.0f,  // temperature_, deprecated
-                                            0.0f,  // len_penalty_, deprecated
-                                            1.0f,  // repetition_penalty_, deprecated
-                                            stream_,
-                                            cublas_wrapper_,
-                                            allocator_,
-                                            false);
-
-    topp_decode_ = new TopPSamplingLayer<T>(0,
-                                            vocab_size_,
-                                            vocab_size_padded_,
-                                            0,     // end_id, deprecated
-                                            0.0f,  // top_p_, deprecated
-                                            0,     // random_seed_, deprecated
-                                            1.0f,  // temperature_, deprecated
-                                            0.0f,  // len_penalty_, deprecated
-                                            1.0f,  // repetition_penalty_, deprecated
-                                            stream_,
-                                            cublas_wrapper_,
-                                            allocator_,
-                                            false,
-                                            cuda_device_prop_);
-
-    allocateBuffer();
-}
-
-template<typename T>
-DynamicDecodeLayer<T>::DynamicDecodeLayer(size_t           vocab_size,
-                                          size_t           vocab_size_padded,
-                                          int              end_id,
-                                          cudaStream_t     stream,
-                                          cublasMMWrapper* cublas_wrapper,
-                                          IAllocator*      allocator,
-                                          bool             is_free_buffer_after_forward,
-                                          cudaDeviceProp*  cuda_device_prop):
-    BaseLayer(stream, cublas_wrapper, allocator, is_free_buffer_after_forward),
-    vocab_size_(vocab_size),
-    vocab_size_padded_(vocab_size_padded),
-    cuda_device_prop_(cuda_device_prop)
-{
-    TM_LOG_DEBUG(__PRETTY_FUNCTION__);
-    initialize();
-}
-
-template<typename T>
-DynamicDecodeLayer<T>::~DynamicDecodeLayer()
-{
-    TM_LOG_DEBUG(__PRETTY_FUNCTION__);
-    delete topk_decode_;
-    delete topp_decode_;
-    freeBuffer();
-}
-
-template<typename T>
-DynamicDecodeLayer<T>::DynamicDecodeLayer(DynamicDecodeLayer const& dynamic_decode_layer):
-    BaseLayer(dynamic_decode_layer),
-    vocab_size_(dynamic_decode_layer.vocab_size_),
-    vocab_size_padded_(dynamic_decode_layer.vocab_size_padded_),
-    cuda_device_prop_(dynamic_decode_layer.cuda_device_prop_)
-{
-    TM_LOG_DEBUG(__PRETTY_FUNCTION__);
-    initialize();
-}
-
-template<typename T>
-void DynamicDecodeLayer<T>::setup(const size_t batch_size, const size_t beam_width, TensorMap* runtime_args)
-{
-    /**
-     * @brief Set up the dynamic decode layer for given input runtime arguments.
-     *
-     * runtime_args:
-     *   \param  runtime_top_k [1] or [batch_size] on cpu, optional.
-     *   \param  runtime_top_p [1] or [batch_size] on cpu, optional
-     *   \param  beam_search_diversity_rate [1] or [batch_size] on cpu, optional
-     *   \param  temperature [1] or [batch_size] on cpu, optional
-     *   \param  len_penalty [1] or [batch_size] on cpu, optional
-     *   \param  repetition_penalty [1] or [batch_size] on cpu, optional
-     *   \param  presence_penalty [1] or [batch_size] on cpu, optional, float
-     *   \param  min_length [1] or [batch_size], optional
-     *   \param  top_p_decay [batch_size] on gpu, float, optional
-     *   \param  top_p_min [batch_size] on gpu, float, optional
-     *   \param  top_p_reset_ids [batch_size] on gpu, uint32, optional
-     */
-
-    TM_LOG_DEBUG(__PRETTY_FUNCTION__);
-    has_diff_runtime_args_ = hasDiffRuntimeArgs(runtime_args);
-    if (beam_width == 1) {  // sampling layers
-        topk_decode_->setup(batch_size, beam_width, runtime_args);
-        topp_decode_->setup(batch_size, beam_width, runtime_args);
-    }
-}
-
-template<typename T>
-void DynamicDecodeLayer<T>::forward(std::unordered_map<std::string, Tensor>*       output_tensors,
-                                    const std::unordered_map<std::string, Tensor>* input_tensors)
-{
-    TM_LOG_DEBUG(__PRETTY_FUNCTION__);
-    TensorMap input_map(*input_tensors);
-    TensorMap output_map(*output_tensors);
-    forward(&output_map, &input_map);
-}
-
-template<typename T>
-void DynamicDecodeLayer<T>::forward(TensorMap* output_tensors, TensorMap* input_tensors)
-{
-    /**
-     * @brief
-     * input_tensors:
-     *   \param  logits [batch_size, beam_width, vocab_size_padded]
-     *   \param  embedding_bias [vocab_size_padded], optional
-     *   \param  step [1] on cpu
-     *   \param  max_input_length [1] on cpu
-     *   \param  input_lengths [batch_size, beam_width], optional
-     *   \param  min_length [batch_size], optional
-     *   \param  sequence_limit_length [batch_size]
-     *   \param  ite [1] on cpu
-     *   \param  local_batch_size [1] on cpu
-     *   \param  stop_words_list [batch_size, 2, stop_words_length], optional
-     *   \param  runtime_top_k [1] or [batch_size] on cpu, optional, uint
-     *   \param  runtime_top_p [1] or [batch_size] on cpu, optional, float
-     *   \param  temperature [1] or [batch_size] on cpu, optional, float
-     *   \param  len_penalty [1] or [batch_size] on cpu, optional, float
-     *   \param  repetition_penalty [1] or [batch_size] on cpu, optional, float
-     *   \param  presence_penalty [1] or [batch_size] on cpu, optional, float
-     *                Only one of repetition and presence penalties is allowed.
-     *   \param  random_seed [1] or [batch_size] on cpu, optional, unsigned long long int
-     *   \param  bad_words_list [2, bad_words_length] or [batch_size, 2, bad_words_length], optional
-     *   \param  src_cache_indirection
-     *                [local_batch_size, beam_width, max_seq_len]
-     *                the k/v cache index for beam search
-     *   \param  is_initialize_random_table [1] on cpu, bool
-     *   \param  top_p_decay [batch_size] on gpu, float, optional
-     *   \param  top_p_min [batch_size] on gpu, float, optional
-     *   \param  top_p_reset_ids [batch_size] on gpu, uint32, optional
-     *
-     * output_tensors:
-     *   \param  output_ids [max_seq_len, batch_size]
-     *   \param  finished [batch_size * beam_width], optional
-     *   \param  should_stop [1] on cpu
-     *   \param  cum_log_probs [batch_size * beam_width], necessary in beam search
-     *   \param  parent_ids [max_seq_len, batch_size * beam_width]
-     *   \param  sequence_length [batch_size * beam_width], optional
-     *   \param  output_log_probs [request_ouptut_length, batch_size * beam_width], must be float*, optional
-     *   \param  tgt_cache_indirection
-     *                [local_batch_size, beam_width, max_seq_len]
-     *                the k/v cache index for beam search
-     *   \param  beam_hyps: [1] on cpu, a special structure which maintains some pointers of beam search
-     *
-     */
-
-    TM_LOG_DEBUG(__PRETTY_FUNCTION__);
-    const int ite  = (int)input_tensors->at("ite").getVal<uint>();
-    const int step = input_tensors->at("step").getVal<int>();
-    FT_CHECK(input_tensors->at("logits").shape.size() == 3);
-
-    const size_t batch_size       = input_tensors->at("logits").shape[0];
-    const size_t beam_width       = input_tensors->at("logits").shape[1];
-    const size_t local_batch_size = (size_t)input_tensors->at("local_batch_size").getVal<int>();
-
-    if (input_tensors->isExist("bad_words_list")) {
-        const auto& bad_words     = input_tensors->at("bad_words_list");
-        const int*  bad_words_ptr = bad_words.getPtr<const int>();
-        FT_CHECK_WITH_INFO(bad_words.shape.size() == 2 || bad_words.shape.size() == 3,
-                           "Bad words dimension must be 2 or 3.");
-
-        const bool is_matrix = bad_words.shape.size() == 2;
-        if (bad_words.shape.size() == 3) {
-            FT_CHECK_WITH_INFO(bad_words.shape[0] == batch_size,
-                               fmtstr("Shape of dim 0 of bad words is invalid. It must be equal to batch size."
-                                      " However, it is %d and the batch size is %d.",
-                                      bad_words.shape[0],
-                                      batch_size));
-        }
-
-        const bool   shared_bad_words = is_matrix || bad_words.shape[0] == 1;
-        const size_t bad_words_len    = bad_words.shape[is_matrix ? 1 : 2];
-        // Add check on batch size of bad words
-        const int id_offset                      = ite * local_batch_size;
-        const int decode_vocab_size_units_offset = id_offset * vocab_size_padded_;
-
-        invokeBanBadWords((T*)input_tensors->at("logits").getPtrWithOffset(decode_vocab_size_units_offset),
-                          output_tensors->at("output_ids").getPtr<const int>(),
-                          beam_width > 1 ? output_tensors->at("parent_ids").getPtr<const int>() : nullptr,
-                          batch_size,
-                          local_batch_size,
-                          beam_width,
-                          shared_bad_words ?
-                              bad_words_ptr :
-                              bad_words.getPtrWithOffset<const int>(ite * local_batch_size * 2 * bad_words_len),
-                          shared_bad_words,
-                          bad_words_len,
-                          id_offset,
-                          vocab_size_padded_,
-                          step,
-                          stream_);
-    }
-
-    // dynamic decode GPT
-    if (beam_width > 1) {
-        FT_CHECK_WITH_INFO(0, "Beam-search is not supported.");
-    }
-    else {  // beam_width=1
-        // In sampling, we have supported batch sampling. So, we always compute all sentences once.
-        const size_t local_batch_offset = ite * local_batch_size * beam_width;
-
-        Tensor logits = input_tensors->at("logits");
-        Tensor end_id = input_tensors->at("end_id");
-
-        TensorMap decode_input_tensors(
-            {{"logits",
-              logits.slice({local_batch_size, beam_width, logits.shape[2]}, local_batch_offset * logits.shape[2])},
-             {"step", input_tensors->at("step")},
-             {"max_input_length", input_tensors->at("max_input_length")},
-             {"end_id", end_id.slice({local_batch_size}, ite * local_batch_size)},
-             {"ite", Tensor{MEMORY_CPU, TYPE_INT32, {1}, &ite}}});
-
-        if (input_tensors->isExist("embedding_bias")) {
-            decode_input_tensors.insert({"embedding_bias", input_tensors->at("embedding_bias")});
-        }
-        if (input_tensors->isExist("input_lengths")) {
-            Tensor input_lengths = input_tensors->at("input_lengths");
-            decode_input_tensors.insert(
-                {"input_lengths", input_lengths.slice({local_batch_size, beam_width}, local_batch_offset)});
-        }
-
-        TensorMap decode_output_tensors({{"output_ids", output_tensors->at("output_ids")}});
-        if (output_tensors->isExist("sequence_length")) {
-            Tensor sequence_length = output_tensors->at("sequence_length");
-            decode_output_tensors.insert(
-                {"sequence_length", sequence_length.slice({local_batch_size * beam_width}, local_batch_offset)});
-        }
-        if (output_tensors->isExist("finished")) {
-            Tensor finished = output_tensors->at("finished");
-            decode_output_tensors.insert(
-                {"finished", finished.slice({local_batch_size * beam_width}, local_batch_offset)});
-        }
-        if (output_tensors->isExist("cum_log_probs")) {
-            Tensor cum_log_probs = output_tensors->at("cum_log_probs");
-            decode_output_tensors.insert(
-                {"cum_log_probs", cum_log_probs.slice({local_batch_size * beam_width}, local_batch_offset)});
-        }
-        if (output_tensors->isExist("output_log_probs")) {
-            Tensor output_log_probs = output_tensors->at("output_log_probs");
-            int    max_input_length = input_tensors->at("max_input_length").getVal<int>();
-            size_t step_offset      = (step - max_input_length) * batch_size * beam_width;
-            decode_output_tensors.insert({"output_log_probs",
-                                          output_log_probs.slice({output_log_probs.shape[0] - (step - max_input_length),
-                                                                  local_batch_size * beam_width},
-                                                                 step_offset + local_batch_offset)});
-        }
-
-        // Run topk / topp decode layers.
-        // Currently, we support batch sampling. If the runtime arguments are like
-        // topk = [4, 0, 4]. topp = [0.0, 0.5, 0.5]
-        // then topk_decode handles [4, x, 4 + 0.5]
-        //      topp_decode handles [x, 0.5, x]
-        // where "x" are skipped.
-        topk_decode_->forward(&decode_output_tensors, &decode_input_tensors);
-        topp_decode_->forward(&decode_output_tensors, &decode_input_tensors);
-    }
-
-    if (input_tensors->isExist("stop_words_list")) {
-        const size_t id_offset         = ite * local_batch_size * beam_width;
-        const size_t stop_words_length = input_tensors->at("stop_words_list").shape[2];
-
-        invokeStopWordsCriterion(output_tensors->at("output_ids").getPtr<const int>(),
-                                 beam_width > 1 ? output_tensors->at("parent_ids").getPtr<const int>() : nullptr,
-                                 input_tensors->at("stop_words_list")
-                                     .getPtrWithOffset<const int>(ite * local_batch_size * 2 * stop_words_length),
-                                 output_tensors->at("finished").getPtrWithOffset<bool>(id_offset),
-                                 id_offset,
-                                 stop_words_length,
-                                 batch_size,
-                                 beam_width,
-                                 step,
-                                 stream_);
-    }
-
-    if (input_tensors->isExist("sequence_limit_length")) {
-        invokeLengthCriterion(output_tensors->at("finished").getPtr<bool>(),
-                              output_tensors->at("should_stop").getPtr<bool>(),
-                              h_pinned_finished_sum_,
-                              input_tensors->at("sequence_limit_length").getPtr<const uint32_t>(),
-                              batch_size,
-                              beam_width,
-                              step,
-                              stream_);
-    }
-}
-
-template<typename T>
-bool DynamicDecodeLayer<T>::hasDiffRuntimeArgs(TensorMap* input_tensors)
-{
-    for (int i = 0; i < (int)runtime_arg_names_.size(); i++) {
-        if (input_tensors->isExist(runtime_arg_names_[i])) {
-            auto tensor = input_tensors->at(runtime_arg_names_[i]);
-            FT_CHECK(tensor.shape.size() == 1);
-            for (int j = 1; j < (int)tensor.shape[0]; j++) {
-                const void* data = tensor.data;
-                switch (tensor.type) {
-                    case TYPE_FP32:
-                        if (((const float*)data)[0] != ((const float*)data)[j]) {
-                            return true;
-                        }
-                        break;
-                    case TYPE_INT32:
-                        if (((const int*)data)[0] != ((const int*)data)[j]) {
-                            return true;
-                        }
-                        break;
-                    case TYPE_UINT32:
-                        if (((const uint*)data)[0] != ((const uint*)data)[j]) {
-                            return true;
-                        }
-                        break;
-                    case TYPE_UINT64:
-                        if (((const unsigned long long int*)data)[0] != ((const unsigned long long int*)data)[j]) {
-                            return true;
-                        }
-                        break;
-                    default:
-                        FT_CHECK_WITH_INFO(false, runtime_arg_names_[i] + ": " + tensor.toString() + " is invalid.");
-                        break;
-                }
-            }
-        }
-    }
-    return false;
-}
-
-template class DynamicDecodeLayer<float>;
-template class DynamicDecodeLayer<half>;
-
-}  // namespace turbomind
diff --git a/src/turbomind/layers/DynamicDecodeLayer.h b/src/turbomind/layers/DynamicDecodeLayer.h
deleted file mode 100644
index cae2118c19fe968448ae490483188ac014607c9c..0000000000000000000000000000000000000000
--- a/src/turbomind/layers/DynamicDecodeLayer.h
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * Copyright (c) 2022-2022, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include <string>
-#include <unordered_map>
-
-#include "src/turbomind/layers/BaseLayer.h"
-#include "src/turbomind/layers/DynamicDecodeBaseLayer.h"
-#include "src/turbomind/layers/sampling_layers/TopPSamplingLayer.h"
-
-namespace turbomind {
-
-template<typename T>
-class DynamicDecodeLayer: public BaseLayer {
-protected:
-    void allocateBuffer() override;
-    void freeBuffer() override;
-    void initialize();
-    bool hasDiffRuntimeArgs(TensorMap* input_tensors);
-
-    DynamicDecodeBaseLayer* topk_decode_;
-    DynamicDecodeBaseLayer* topp_decode_;
-
-    size_t          vocab_size_;
-    size_t          vocab_size_padded_;
-    cudaDeviceProp* cuda_device_prop_;
-
-    // List of argument names which can have different values in runtime
-    // and does not support a batched version of kernel in beam search.
-    const std::vector<std::string> runtime_arg_names_ = {"beam_search_diversity_rate",
-                                                         "temperature",
-                                                         "len_penalty",
-                                                         "repetition_penalty",
-                                                         "presence_penalty",
-                                                         "min_length"};
-
-    bool has_diff_runtime_args_ = false;
-    int* h_pinned_finished_sum_ = nullptr;
-
-public:
-    curandState_t* topk_curandstate_buf()
-    {
-        return static_cast<BaseSamplingLayer<T>*>(topk_decode_)->curandstate_buf();
-    }
-    curandState_t* topp_curandstate_buf()
-    {
-        return static_cast<BaseSamplingLayer<T>*>(topp_decode_)->curandstate_buf();
-    }
-
-    DynamicDecodeLayer(size_t           vocab_size,
-                       size_t           vocab_size_padded,
-                       int              end_id,
-                       cudaStream_t     stream,
-                       cublasMMWrapper* cublas_wrapper,
-                       IAllocator*      allocator,
-                       bool             is_free_buffer_after_forward,
-                       cudaDeviceProp*  cuda_device_prop);
-
-    ~DynamicDecodeLayer();
-    DynamicDecodeLayer(DynamicDecodeLayer const& dynamic_decode_layer);
-
-    void setup(const size_t batch_size, const size_t beam_width, TensorMap* runtime_args);
-    void forward(TensorMap* output_tensors, TensorMap* input_tensors);
-    void forward(std::unordered_map<std::string, Tensor>*       output_tensors,
-                 const std::unordered_map<std::string, Tensor>* input_tensors);
-};
-
-}  // namespace turbomind
diff --git a/src/turbomind/layers/FfnLayer.cc b/src/turbomind/layers/FfnLayer.cc
deleted file mode 100644
index 14f8bf42b475f5bbdf671eca7f7f27357450f7b3..0000000000000000000000000000000000000000
--- a/src/turbomind/layers/FfnLayer.cc
+++ /dev/null
@@ -1,716 +0,0 @@
-/*
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "src/turbomind/layers/FfnLayer.h"
-#include "src/turbomind/kernels/transpose_int8_kernels.h"
-#include "src/turbomind/macro.h"
-#include "src/turbomind/utils/nvtx_utils.h"
-
-namespace turbomind {
-
-template<typename T>
-void FfnLayer<T>::forward(std::vector<turbomind::Tensor>*       output_tensors,
-                          const std::vector<turbomind::Tensor>* input_tensors,
-                          const FfnWeight<T>*                   ffn_weights)
-{
-    TensorMap input_tensor({{"ffn_input", input_tensors->at(0)}});
-    TensorMap output_tensor({{"ffn_output", output_tensors->at(0)}});
-    forward(&output_tensor, &input_tensor, ffn_weights);
-}
-
-template<typename T>
-void FfnLayer<T>::forward(TensorMap* output_tensors, TensorMap* input_tensors, const FfnWeight<T>* ffn_weights)
-{
-    // input tensors:
-    //      ffn_input [token_num, hidden_dimension],
-    //      ia3_tasks [batch_size] (optional)
-    //      moe_k     [1], uint64 (optional)
-    //      padding_offset [token_num] (optional)
-    //      seq_len [1], int32, (optional), only used for ia3
-
-    // output tensors:
-    //      ffn_output [token_num, hidden_dimension] or [moe_k * token_num, hidden_dimension] if use_moe
-    //      expert_scales [token_num, moe_k] (optional)
-    //      expanded_source_row_to_expanded_dest_row [token_num, moe_k] (optional)
-    //      expert_for_source_row [token_num, moe_k] (optional)
-
-    TM_LOG_DEBUG(__PRETTY_FUNCTION__);
-    FT_CHECK(input_tensors->size() >= 1 && input_tensors->size() <= 5);
-    FT_CHECK(output_tensors->size() >= 1 || output_tensors->size() <= 4);
-    bool   use_moe = false;
-    size_t moe_k   = 0;
-    if (input_tensors->isExist("moe_k")) {
-        use_moe = true;
-        moe_k   = input_tensors->at("moe_k").getVal<size_t>();
-    }
-    allocateBuffer(input_tensors->at("ffn_input").shape[0], moe_k, use_moe);
-
-    const int m             = input_tensors->at("ffn_input").shape[0];
-    T*        output_tensor = output_tensors->at("ffn_output").getPtr<T>();
-    const T*  input_tensor  = input_tensors->at("ffn_input").getPtr<const T>();
-
-    // for moe output
-    T*   expert_scales    = nullptr;
-    int* permuted_rows    = nullptr;
-    int* permuted_experts = nullptr;
-
-    // moe outputs should exist or not together
-    FT_CHECK((use_moe && output_tensors->isExist("expert_scales")
-              && output_tensors->isExist("expanded_source_row_to_expanded_dest_row")
-              && output_tensors->isExist("expert_for_source_row"))
-             || (!use_moe && !output_tensors->isExist("expert_scales")
-                 && !output_tensors->isExist("expanded_source_row_to_expanded_dest_row")
-                 && !output_tensors->isExist("expert_for_source_row")));
-
-    if (use_moe) {
-        expert_scales    = output_tensors->at("expert_scales").getPtr<T>();
-        permuted_rows    = output_tensors->at("expanded_source_row_to_expanded_dest_row").getPtr<int>();
-        permuted_experts = output_tensors->at("expert_for_source_row").getPtr<int>();
-    }
-
-    // TODO: INT8 and Sparsity are currently not implemented (geglu or reglu)
-    const bool use_gated_activation = use_gated_activation_ && ffn_weights->intermediate_weight2.kernel != nullptr;
-
-    // moe can't be used with use_gated_activation currently
-    FT_CHECK(!(use_gated_activation && use_moe));
-    auto activation_type = getActivationType();
-
-    const int* ia3_tasks = input_tensors->getPtr<const int>("ia3_tasks", nullptr);
-
-    if (use_moe) {
-        PUSH_RANGE("FFN moe");
-        FT_CHECK(ia3_tasks == nullptr);
-        cublas_wrapper_->Gemm(CUBLAS_OP_N,
-                              CUBLAS_OP_N,
-                              expert_num_,
-                              m,
-                              hidden_units_,
-                              ffn_weights->gating_weight.kernel,
-                              expert_num_,
-                              input_tensor,
-                              hidden_units_,
-                              moe_gates_buf_,
-                              expert_num_);
-
-        if (int8_mode_ == 0) {
-            moe_fc_runner_->run_moe_fc(input_tensor,
-                                       moe_gates_buf_,
-                                       ffn_weights->intermediate_weight.kernel,
-                                       ffn_weights->intermediate_weight.weight_only_quant_scale,
-                                       ffn_weights->intermediate_weight.bias,
-                                       activation_type,
-                                       ffn_weights->output_weight.kernel,
-                                       ffn_weights->output_weight.weight_only_quant_scale,
-                                       m,
-                                       hidden_units_,
-                                       inter_size_,
-                                       expert_num_,
-                                       moe_k,
-                                       moe_fc_workspace_,
-                                       output_tensor,
-                                       expert_scales,
-                                       permuted_rows,
-                                       permuted_experts,
-                                       stream_);
-        }
-        else if (int8_mode_ == 1) {
-            FT_CHECK_WITH_INFO(moe_int8_weight_only_fc_runner_.get() != NULL,
-                               "weight only runner was not initialized.");
-
-            FT_CHECK(ffn_weights->intermediate_weight.int8_kernel != NULL
-                     && ffn_weights->intermediate_weight.weight_only_quant_scale != NULL);
-
-            FT_CHECK(ffn_weights->output_weight.int8_kernel != NULL
-                     && ffn_weights->output_weight.weight_only_quant_scale != NULL);
-
-            moe_int8_weight_only_fc_runner_->run_moe_fc(
-                input_tensor,
-                moe_gates_buf_,
-                reinterpret_cast<const uint8_t*>(ffn_weights->intermediate_weight.int8_kernel),
-                ffn_weights->intermediate_weight.weight_only_quant_scale,
-                ffn_weights->intermediate_weight.bias,
-                activation_type,
-                reinterpret_cast<const uint8_t*>(ffn_weights->output_weight.int8_kernel),
-                ffn_weights->output_weight.weight_only_quant_scale,
-                m,
-                hidden_units_,
-                inter_size_,
-                expert_num_,
-                moe_k,
-                moe_fc_workspace_,
-                output_tensor,
-                expert_scales,
-                permuted_rows,
-                permuted_experts,
-                stream_);
-        }
-        else {
-            FT_CHECK_WITH_INFO(false, "Invalid int8 mode for MoE");
-        }
-
-        sync_check_cuda_error();
-        if (is_free_buffer_after_forward_ == true) {
-            freeBuffer();
-        }
-        sync_check_cuda_error();
-        POP_RANGE;
-        return;
-    }
-
-    PUSH_RANGE("FFN gemm 1");
-    int m_tmp = input_tensors->at("ffn_input").shape[0];
-    if (m_tmp % 8 != 0) {
-        m_tmp = (m_tmp / 8 + 1) * 8;
-    }
-    const int m_padded = m_tmp;
-#ifdef SPARSITY_ENABLED
-    bool use_sparse_gemm = sparse_ && cublas_wrapper_->isUseSparse(1, inter_size_, m, hidden_units_);
-#else
-    constexpr bool use_sparse_gemm = false;
-#endif
-
-    if (use_sparse_gemm) {
-        FT_CHECK(!use_gated_activation);
-#ifdef SPARSITY_ENABLED
-        cublas_wrapper_->SpGemm(CUBLAS_OP_N,
-                                CUBLAS_OP_N,
-                                inter_size_,
-                                m_padded,
-                                hidden_units_,
-                                ffn_weights->intermediate_weight.sp_kernel,
-                                input_tensor,
-                                inter_buf_);
-#endif
-    }
-    else {
-        if (int8_mode_ == 1) {
-            FT_CHECK_WITH_INFO(weight_only_int8_fc_runner_.get() != NULL, "weight only runner was not initialized.");
-            FT_CHECK(ffn_weights->intermediate_weight.int8_kernel != NULL
-                     && ffn_weights->intermediate_weight.weight_only_quant_scale != NULL);
-
-            if (ia3_tasks == nullptr && !use_gated_activation) {
-                // launch fused GEMM + activation
-                weight_only_int8_fc_runner_->gemm_bias_act(
-                    input_tensor,
-                    reinterpret_cast<const uint8_t*>(ffn_weights->intermediate_weight.int8_kernel),
-                    ffn_weights->intermediate_weight.weight_only_quant_scale,
-                    ffn_weights->intermediate_weight.bias,
-                    inter_buf_,
-                    m,
-                    inter_size_,
-                    hidden_units_,
-                    activation_type,
-                    mixed_gemm_workspace_,
-                    mixed_gemm_ws_bytes_,
-                    stream_);
-            }
-            else {
-                // Otherwise, let FT handle activation
-                weight_only_int8_fc_runner_->gemm(
-                    input_tensor,
-                    reinterpret_cast<const uint8_t*>(ffn_weights->intermediate_weight.int8_kernel),
-                    ffn_weights->intermediate_weight.weight_only_quant_scale,
-                    inter_buf_,
-                    m,
-                    inter_size_,
-                    hidden_units_,
-                    mixed_gemm_workspace_,
-                    mixed_gemm_ws_bytes_,
-                    stream_);
-
-                if (use_gated_activation) {
-                    FT_CHECK(ffn_weights->intermediate_weight2.int8_kernel != NULL
-                             && ffn_weights->intermediate_weight2.weight_only_quant_scale != NULL);
-
-                    weight_only_int8_fc_runner_->gemm(
-                        input_tensor,
-                        reinterpret_cast<const uint8_t*>(ffn_weights->intermediate_weight2.int8_kernel),
-                        ffn_weights->intermediate_weight2.weight_only_quant_scale,
-                        inter_buf_2_,
-                        m,
-                        inter_size_,
-                        hidden_units_,
-                        mixed_gemm_workspace_,
-                        mixed_gemm_ws_bytes_,
-                        stream_);
-                }
-            }
-        }
-        else if (int8_mode_ == 2) {
-            FT_CHECK(!use_gated_activation);
-            cublas_wrapper_->Int8Gemm(inter_size_,
-                                      m,
-                                      hidden_units_,
-                                      ffn_weights->intermediate_weight.int8_kernel,
-                                      hidden_units_,
-                                      input_tensors->getPtr<int8_t>("ffn_input"),
-                                      hidden_units_,
-                                      reinterpret_cast<int8_t*>(inter_buf_),
-                                      inter_size_,
-                                      ffn_weights->intermediate_weight.scale_inter);
-        }
-        else {
-            cublas_wrapper_->Gemm(CUBLAS_OP_N,
-                                  CUBLAS_OP_N,
-                                  inter_size_,
-                                  m,
-                                  hidden_units_,
-                                  ffn_weights->intermediate_weight.kernel,
-                                  inter_size_,
-                                  input_tensor,
-                                  hidden_units_,
-                                  inter_buf_,
-                                  inter_size_);
-            if (use_gated_activation) {
-                cublas_wrapper_->Gemm(CUBLAS_OP_N,
-                                      CUBLAS_OP_N,
-                                      inter_size_,
-                                      m,
-                                      hidden_units_,
-                                      ffn_weights->intermediate_weight2.kernel,
-                                      inter_size_,
-                                      input_tensor,
-                                      hidden_units_,
-                                      inter_buf_2_,
-                                      inter_size_);
-            }
-        }
-    }
-
-    POP_RANGE;
-
-    if (int8_mode_ != 1 || ia3_tasks != nullptr || use_gated_activation) {
-        // if int8_mode == 1 && ia3_tasks == nullptr && we don't use gated activations, we use cutlass
-        // to fuse GEMM + bias + activation, so we skip the activation function here. In all
-        // other cases, we must apply the activation function separately.
-        PUSH_RANGE("add bias act");
-        genericActivation(m,
-                          ffn_weights->intermediate_weight.bias,
-                          use_gated_activation ? ffn_weights->intermediate_weight2.bias : nullptr,
-                          input_tensors->at("ia3_tasks", {MEMORY_GPU, TYPE_INT32, {}, nullptr}).getPtr<const int>(),
-                          ffn_weights->ia3_weight.kernel,
-                          int8_mode_ == 2 ? ffn_weights->intermediate_weight.scale_out : (float*)nullptr,
-                          int8_mode_ == 2 ? ffn_weights->output_weight.scale : (float*)nullptr,
-                          input_tensors->getPtr<int>("padding_offset", nullptr),
-                          input_tensors->getVal<int>("seq_len", 1));
-        POP_RANGE;
-    }
-
-    sync_check_cuda_error();
-
-    PUSH_RANGE("FFN gemm 2");
-#ifdef SPARSITY_ENABLED
-    use_sparse_gemm = sparse_ && cublas_wrapper_->isUseSparse(1, hidden_units_, m, inter_size_);
-#endif
-    if (use_sparse_gemm) {
-#ifdef SPARSITY_ENABLED
-        cublas_wrapper_->SpGemm(CUBLAS_OP_N,
-                                CUBLAS_OP_N,
-                                hidden_units_,
-                                m_padded,
-                                inter_size_,
-                                ffn_weights->output_weight.sp_kernel,
-                                inter_buf_,
-                                output_tensor);
-#endif
-    }
-    else {
-        if (int8_mode_ == 1) {
-            FT_CHECK_WITH_INFO(weight_only_int8_fc_runner_.get() != NULL, "weight only runner was not initialized.");
-            FT_CHECK(ffn_weights->output_weight.int8_kernel != NULL
-                     && ffn_weights->output_weight.weight_only_quant_scale != NULL);
-            weight_only_int8_fc_runner_->gemm(inter_buf_,
-                                              reinterpret_cast<const uint8_t*>(ffn_weights->output_weight.int8_kernel),
-                                              ffn_weights->output_weight.weight_only_quant_scale,
-                                              output_tensor,
-                                              m,
-                                              hidden_units_,
-                                              inter_size_,
-                                              mixed_gemm_workspace_,
-                                              mixed_gemm_ws_bytes_,
-                                              stream_);
-        }
-        else if (int8_mode_ == 2) {
-            int8_fc_runner_->gemm(reinterpret_cast<int8_t*>(inter_buf_),
-                                  ffn_weights->output_weight.int8_kernel,
-                                  QuantMode::PerTensorQuant,
-                                  ffn_weights->output_weight.scale_inter,
-                                  ffn_weights->output_weight.scale_out,
-                                  output_tensors->getPtr<T>("ffn_output"),
-                                  m,
-                                  hidden_units_,
-                                  inter_size_,
-                                  nullptr,
-                                  0,
-                                  stream_);
-        }
-        else {
-            cublas_wrapper_->Gemm(CUBLAS_OP_N,
-                                  CUBLAS_OP_N,
-                                  hidden_units_,
-                                  m,
-                                  inter_size_,
-                                  ffn_weights->output_weight.kernel,
-                                  hidden_units_,
-                                  inter_buf_,
-                                  inter_size_,
-                                  output_tensor,
-                                  hidden_units_);
-        }
-    }
-    sync_check_cuda_error();
-    POP_RANGE;
-
-    if (is_free_buffer_after_forward_ == true) {
-        freeBuffer();
-    }
-    sync_check_cuda_error();
-}
-
-template<typename T>
-FfnLayer<T>::FfnLayer(size_t           max_batch_size,
-                      size_t           max_seq_len,
-                      size_t           head_num,
-                      size_t           size_per_head,
-                      size_t           expert_num,
-                      size_t           inter_size,
-                      cudaStream_t     stream,
-                      cublasMMWrapper* cublas_wrapper,
-                      IAllocator*      allocator,
-                      bool             is_free_buffer_after_forward,
-                      bool             sparse,
-                      int              int8_mode,
-                      bool             use_gated_activation):
-    BaseLayer(stream, cublas_wrapper, allocator, is_free_buffer_after_forward, nullptr, sparse),
-    max_token_num_(max_batch_size * max_seq_len),
-    head_num_(head_num),
-    size_per_head_(size_per_head),
-    expert_num_(expert_num),
-    hidden_units_(head_num * size_per_head),
-    max_inter_size_(inter_size),
-    inter_size_(inter_size),
-    int8_mode_(int8_mode),
-    use_gated_activation_(use_gated_activation),
-    int8_fc_runner_(int8_mode == 2 ? std::make_shared<CutlassInt8GemmRunner<T>>() : nullptr)
-{
-    TM_LOG_DEBUG(__PRETTY_FUNCTION__);
-    if (int8_mode_ == 0) {
-        moe_fc_runner_ = std::make_shared<CutlassMoeFCRunner<T, T>>();
-    }
-    else if (int8_mode_ == 1) {
-        FT_CHECK_WITH_INFO(!(std::is_same<T, float>::value), "Weight only quant not supported for fp32.");
-        moe_int8_weight_only_fc_runner_ = std::make_shared<CutlassMoeFCRunner<T, uint8_t>>();
-        weight_only_int8_fc_runner_     = std::make_shared<CutlassFpAIntBGemmRunner<T, uint8_t>>();
-    }
-}
-
-template<typename T>
-FfnLayer<T>::FfnLayer(FfnLayer<T> const& ffn_layer):
-    BaseLayer(ffn_layer.stream_,
-              ffn_layer.cublas_wrapper_,
-              ffn_layer.allocator_,
-              ffn_layer.is_free_buffer_after_forward_,
-              ffn_layer.cuda_device_prop_,
-              ffn_layer.sparse_),
-    max_token_num_(ffn_layer.max_token_num_),
-    head_num_(ffn_layer.head_num_),
-    size_per_head_(ffn_layer.size_per_head_),
-    expert_num_(ffn_layer.expert_num_),
-    hidden_units_(ffn_layer.hidden_units_),
-    max_inter_size_(ffn_layer.max_inter_size_),
-    inter_size_(ffn_layer.inter_size_),
-    int8_mode_(ffn_layer.int8_mode_),
-    use_gated_activation_(ffn_layer.use_gated_activation_),
-    moe_fc_runner_(ffn_layer.moe_fc_runner_),
-    moe_int8_weight_only_fc_runner_(ffn_layer.moe_int8_weight_only_fc_runner_),
-    weight_only_int8_fc_runner_(ffn_layer.weight_only_int8_fc_runner_),
-    int8_fc_runner_(ffn_layer.int8_fc_runner_)
-{
-    TM_LOG_DEBUG(__PRETTY_FUNCTION__);
-}
-
-template<typename T>
-FfnLayer<T>::~FfnLayer()
-{
-    TM_LOG_DEBUG(__PRETTY_FUNCTION__);
-    cublas_wrapper_ = nullptr;
-    freeBuffer();
-}
-
-template<typename T>
-void FfnLayer<T>::allocateBuffer()
-{
-    FT_CHECK_WITH_INFO(false,
-                       "FfnLayer::allocateBuffer() is deprecated. Use `allocateBuffer(size_t token_num, ...)` instead");
-}
-
-template<typename T>
-void FfnLayer<T>::allocateBuffer(size_t token_num, int moe_k, bool use_moe)
-{
-    TM_LOG_DEBUG(__PRETTY_FUNCTION__);
-    if (use_moe) {
-        moe_gates_buf_ =
-            (T*)allocator_->reMalloc(moe_gates_buf_, sizeof(T) * pad_to_multiple_of_16(token_num * expert_num_), false);
-        size_t ws_size_moe = 0;
-        if (int8_mode_ == 0) {
-            FT_CHECK_WITH_INFO(moe_fc_runner_.get() != NULL, "moe runner was not initialized.");
-            ws_size_moe = moe_fc_runner_->getWorkspaceSize(token_num, hidden_units_, inter_size_, expert_num_, moe_k);
-        }
-        else if (int8_mode_ == 1) {
-            FT_CHECK_WITH_INFO(moe_int8_weight_only_fc_runner_.get() != NULL,
-                               "weight only moe runner was not initialized.");
-            ws_size_moe = moe_int8_weight_only_fc_runner_->getWorkspaceSize(
-                token_num, hidden_units_, inter_size_, expert_num_, moe_k);
-        }
-
-        moe_fc_workspace_ = (char*)allocator_->reMalloc(moe_fc_workspace_, sizeof(char) * ws_size_moe, false);
-    }
-    else {
-        const auto type_size = int8_mode_ == 2 ? sizeof(int8_t) : sizeof(T);
-        inter_buf_           = (T*)allocator_->reMalloc(inter_buf_, type_size * token_num * max_inter_size_, false);
-        if (use_gated_activation_) {
-            inter_buf_2_ = (T*)allocator_->reMalloc(inter_buf_2_, sizeof(T) * token_num * max_inter_size_, false);
-        }
-
-        if (int8_mode_ == 1) {
-            FT_CHECK_WITH_INFO(weight_only_int8_fc_runner_.get() != NULL, "weight only runner was not initialized.");
-            // We use max_size for n and k since we reuse buffers for both FCs and want to allocate the max
-            // possible memory that would be required by any of the individual gemms.
-            const int max_size    = std::max(hidden_units_, inter_size_);
-            mixed_gemm_ws_bytes_  = weight_only_int8_fc_runner_->getWorkspaceSize(token_num, max_size, max_size);
-            mixed_gemm_workspace_ = (char*)allocator_->reMalloc(mixed_gemm_workspace_, mixed_gemm_ws_bytes_, false);
-        }
-        else if (int8_mode_ == 2) {
-            const int max_size   = std::max(hidden_units_, inter_size_);
-            int8_gemm_ws_bytes_  = int8_fc_runner_->getWorkspaceSize(token_num, max_size, max_size);
-            int8_gemm_workspace_ = (char*)allocator_->reMalloc(int8_gemm_workspace_, int8_gemm_ws_bytes_, false);
-        }
-    }
-
-    is_allocate_buffer_ = true;
-}
-
-template<typename T>
-void FfnLayer<T>::freeBuffer()
-{
-    TM_LOG_DEBUG(__PRETTY_FUNCTION__);
-    if (is_allocate_buffer_) {
-        allocator_->free((void**)(&inter_buf_));
-        if (use_gated_activation_) {
-            allocator_->free((void**)(&inter_buf_2_));
-        }
-        if (expert_num_ != 0) {
-            allocator_->free((void**)(&moe_gates_buf_));
-            allocator_->free((void**)(&moe_fc_workspace_));
-        }
-
-        if (mixed_gemm_workspace_) {
-            allocator_->free((void**)(&mixed_gemm_workspace_));
-            mixed_gemm_ws_bytes_ = 0;
-        }
-
-        is_allocate_buffer_ = false;
-    }
-}
-
-#define INVOKE_GENERIC_ACT(ACT)                                                                                        \
-    invokeGenericActivation<ACT>(inter_buf_,                                                                           \
-                                 bias1,                                                                                \
-                                 inter_buf_2_,                                                                         \
-                                 bias2,                                                                                \
-                                 ia3_tasks,                                                                            \
-                                 ia3_weights,                                                                          \
-                                 m,                                                                                    \
-                                 inter_size_,                                                                          \
-                                 int8_mode_,                                                                           \
-                                 activation_in,                                                                        \
-                                 activation_out,                                                                       \
-                                 padding_offset,                                                                       \
-                                 seq_len,                                                                              \
-                                 stream_);
-
-template<typename T>
-void FfnLayer<T>::genericActivation(int          m,
-                                    const T*     bias1,
-                                    const T*     bias2,
-                                    const int*   ia3_tasks,
-                                    const T*     ia3_weights,
-                                    const float* activation_in,
-                                    const float* activation_out,
-                                    const int*   padding_offset,
-                                    const int    seq_len)
-{
-    if (ia3_tasks != nullptr) {
-        FT_CHECK(seq_len > 0);
-    }
-
-    // dispatch according to actual activation
-    switch (getActivationType()) {
-        case ActivationType::Gelu:
-        case ActivationType::GeGLU:
-            if (inter_buf_2_ == nullptr && int8_mode_ <= 1) {
-                invokeAddBiasGeluV2(
-                    inter_buf_, bias1, ia3_tasks, ia3_weights, padding_offset, seq_len, m, inter_size_, stream_);
-            }
-            else {
-                INVOKE_GENERIC_ACT(GeluActivation);
-            }
-            break;
-        case ActivationType::Relu:
-        case ActivationType::ReGLU:
-            INVOKE_GENERIC_ACT(ReluActivation);
-            break;
-        case ActivationType::Silu:
-        case ActivationType::SiGLU:
-            INVOKE_GENERIC_ACT(SiluActivation);
-            break;
-        case ActivationType::Identity:
-            INVOKE_GENERIC_ACT(IdentityActivation);
-            break;
-    }
-}
-
-#undef INVOKE_GENERIC_ACT
-
-template class FfnLayer<float>;
-template class FfnLayer<half>;
-#ifdef ENABLE_BF16
-template class FfnLayer<__nv_bfloat16>;
-#endif
-
-template<typename T>
-GeluFfnLayer<T>::GeluFfnLayer(size_t           max_batch_size,
-                              size_t           max_seq_len,
-                              size_t           head_num,
-                              size_t           size_per_head,
-                              size_t           expert_num,
-                              size_t           inter_size,
-                              cudaStream_t     stream,
-                              cublasMMWrapper* cublas_wrapper,
-                              IAllocator*      allocator,
-                              bool             is_free_buffer_after_forward,
-                              bool             sparse,
-                              int              int8_mode,
-                              bool             use_gated_activation):
-    FfnLayer<T>(max_batch_size,
-                max_seq_len,
-                head_num,
-                size_per_head,
-                expert_num,
-                inter_size,
-                stream,
-                cublas_wrapper,
-                allocator,
-                is_free_buffer_after_forward,
-                sparse,
-                int8_mode,
-                use_gated_activation)
-{
-}
-
-template<typename T>
-GeluFfnLayer<T>::GeluFfnLayer(GeluFfnLayer<T> const& gelu_ffn_layer): FfnLayer<T>(gelu_ffn_layer)
-{
-}
-
-template class GeluFfnLayer<float>;
-template class GeluFfnLayer<half>;
-#ifdef ENABLE_BF16
-template class GeluFfnLayer<__nv_bfloat16>;
-#endif
-
-template<typename T>
-ReluFfnLayer<T>::ReluFfnLayer(size_t           max_batch_size,
-                              size_t           max_seq_len,
-                              size_t           head_num,
-                              size_t           size_per_head,
-                              size_t           expert_num,
-                              size_t           inter_size,
-                              cudaStream_t     stream,
-                              cublasMMWrapper* cublas_wrapper,
-                              IAllocator*      allocator,
-                              bool             is_free_buffer_after_forward,
-                              bool             sparse,
-                              int              int8_mode,
-                              bool             use_gated_activation):
-    FfnLayer<T>(max_batch_size,
-                max_seq_len,
-                head_num,
-                size_per_head,
-                expert_num,
-                inter_size,
-                stream,
-                cublas_wrapper,
-                allocator,
-                is_free_buffer_after_forward,
-                sparse,
-                int8_mode,
-                use_gated_activation)
-{
-}
-
-template<typename T>
-ReluFfnLayer<T>::ReluFfnLayer(ReluFfnLayer<T> const& relu_ffn_layer): FfnLayer<T>(relu_ffn_layer)
-{
-}
-
-template class ReluFfnLayer<float>;
-template class ReluFfnLayer<half>;
-#ifdef ENABLE_BF16
-template class ReluFfnLayer<__nv_bfloat16>;
-#endif
-
-template<typename T>
-SiluFfnLayer<T>::SiluFfnLayer(size_t           max_batch_size,
-                              size_t           max_seq_len,
-                              size_t           head_num,
-                              size_t           size_per_head,
-                              size_t           expert_num,
-                              size_t           inter_size,
-                              cudaStream_t     stream,
-                              cublasMMWrapper* cublas_wrapper,
-                              IAllocator*      allocator,
-                              bool             is_free_buffer_after_forward,
-                              bool             sparse,
-                              bool             use_gated_activation):
-    FfnLayer<T>(max_batch_size,
-                max_seq_len,
-                head_num,
-                size_per_head,
-                expert_num,
-                inter_size,
-                stream,
-                cublas_wrapper,
-                allocator,
-                is_free_buffer_after_forward,
-                sparse,
-                0,
-                use_gated_activation)
-{
-}
-
-template<typename T>
-SiluFfnLayer<T>::SiluFfnLayer(SiluFfnLayer<T> const& gelu_ffn_layer): FfnLayer<T>(gelu_ffn_layer)
-{
-}
-
-template class SiluFfnLayer<float>;
-template class SiluFfnLayer<half>;
-#ifdef ENABLE_BF16
-template class SiluFfnLayer<__nv_bfloat16>;
-#endif
-
-}  // namespace turbomind
diff --git a/src/turbomind/layers/FfnLayer.h b/src/turbomind/layers/FfnLayer.h
deleted file mode 100644
index 596de66ca048949366702d8c825f373b0d4e2dbc..0000000000000000000000000000000000000000
--- a/src/turbomind/layers/FfnLayer.h
+++ /dev/null
@@ -1,232 +0,0 @@
-/*
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "src/turbomind/kernels/activation_kernels.h"
-#include "src/turbomind/kernels/cutlass_kernels/fpA_intB_gemm/fpA_intB_gemm.h"
-#include "src/turbomind/kernels/cutlass_kernels/int8_gemm/int8_gemm.h"
-#include "src/turbomind/kernels/matrix_vector_multiplication.h"
-#include "src/turbomind/kernels/moe_kernels.h"
-#include "src/turbomind/layers/BaseLayer.h"
-#include "src/turbomind/layers/FfnWeight.h"
-#include "src/turbomind/utils/activation_types.h"
-#include "src/turbomind/utils/cuda_utils.h"
-#include "src/turbomind/utils/memory_utils.h"
-#include <stdint.h>
-#include <vector>
-
-namespace turbomind {
-
-template<typename T>
-class FfnLayer: public BaseLayer {
-private:
-    // buffer handling
-    size_t max_token_num_ = 0;
-
-    // meta data
-    size_t head_num_;       // (martinma): this member is not used in this class. Remove it?
-    size_t size_per_head_;  // (martinma): this member is not used in this class. Remove it?
-    size_t expert_num_;
-
-    // calculated data
-    size_t hidden_units_;
-
-    // gated activation
-    bool use_gated_activation_;
-
-    std::shared_ptr<CutlassMoeFCRunner<T, T>>       moe_fc_runner_;
-    std::shared_ptr<CutlassMoeFCRunner<T, uint8_t>> moe_int8_weight_only_fc_runner_;
-
-    std::shared_ptr<CutlassFpAIntBGemmRunner<T, uint8_t>> weight_only_int8_fc_runner_;
-    std::shared_ptr<CutlassInt8GemmRunner<T>>             int8_fc_runner_;
-
-    void allocateBuffer() override;
-    void freeBuffer() override;
-    void allocateBuffer(int moe_k = 0, bool use_moe = false);
-    void allocateBuffer(size_t token_num, int moe_k = 0, bool use_moe = false);
-
-protected:
-    T*    inter_buf_        = nullptr;
-    T*    inter_buf_2_      = nullptr;  // for gated activation
-    T*    moe_gates_buf_    = nullptr;
-    char* moe_fc_workspace_ = nullptr;
-
-    char*  mixed_gemm_workspace_ = nullptr;
-    size_t mixed_gemm_ws_bytes_  = 0;
-    char*  int8_gemm_workspace_  = nullptr;
-    size_t int8_gemm_ws_bytes_   = 0;
-
-    size_t inter_size_;
-    /* used to allocater memory buffers
-       different ffn layers (inter_size) will
-       reuse the same ffn layer with the max inter size.
-       max_inter_size will be passed as inter_size when initializing the ffn layer
-    */
-    size_t max_inter_size_;
-
-    // int8_mode_ == 0 means we don't use any mechanism related to INT8.
-    // int8_mode_ == 1 for weight quantized only gemm for GPT
-    // int8_mode_ == 2 for SmoothQuant O3 (per tensor scales)
-    int int8_mode_ = 0;
-
-    virtual ActivationType getActivationType() const
-    {
-        return ActivationType::InvalidType;
-    };
-
-    void genericActivation(int          m,
-                           const T*     bias1,
-                           const T*     bias2,
-                           const int*   ia3_tasks,
-                           const T*     ia3_weights,
-                           const float* activation_in,
-                           const float* activation_out,
-                           const int*   padding_offset,
-                           const int    seq_len);
-
-public:
-    FfnLayer(size_t           max_batch_size,
-             size_t           max_seq_len,
-             size_t           head_num,       // (martinma): redundant parameter?
-             size_t           size_per_head,  // (martinma): redundant parameter?
-             size_t           expert_num,
-             size_t           inter_size,
-             cudaStream_t     stream,
-             cublasMMWrapper* cublas_wrapper,
-             IAllocator*      allocator,
-             bool             is_free_buffer_after_forward,
-             bool             sparse               = false,
-             int              int8_mode            = 0,
-             bool             use_gated_activation = false);
-
-    FfnLayer(FfnLayer<T> const& ffn_layer);
-
-    virtual ~FfnLayer();
-
-    void resetInterSize(size_t runtime_inter_size)
-    {
-        inter_size_ = runtime_inter_size;
-    }
-
-    virtual void forward(std::vector<turbomind::Tensor>*       output_tensors,
-                         const std::vector<turbomind::Tensor>* input_tensors,
-                         const FfnWeight<T>*                   ffn_weights);
-    virtual void forward(TensorMap* output_tensors, TensorMap* input_tensors, const FfnWeight<T>* ffn_weights);
-};
-
-template<typename T>
-class GeluFfnLayer: public FfnLayer<T> {
-public:
-    GeluFfnLayer(size_t           max_batch_size,
-                 size_t           max_seq_len,
-                 size_t           head_num,
-                 size_t           size_per_head,
-                 size_t           expert_num,
-                 size_t           inter_size,
-                 cudaStream_t     stream,
-                 cublasMMWrapper* cublas_wrapper,
-                 IAllocator*      allocator,
-                 bool             is_free_buffer_after_forward,
-                 bool             sparse               = false,
-                 int              int8_mode            = 0,
-                 bool             use_gated_activation = false);
-
-    GeluFfnLayer(GeluFfnLayer<T> const& ffn_layer);
-
-    virtual ~GeluFfnLayer() = default;
-
-protected:
-    using FfnLayer<T>::stream_;
-    virtual ActivationType getActivationType() const override
-    {
-        return ActivationType::Gelu;
-    };
-
-private:
-    using FfnLayer<T>::inter_buf_;
-    using FfnLayer<T>::inter_buf_2_;
-    using FfnLayer<T>::inter_size_;
-};
-
-template<typename T>
-class ReluFfnLayer: public FfnLayer<T> {
-public:
-    ReluFfnLayer(size_t           max_batch_size,
-                 size_t           max_seq_len,
-                 size_t           head_num,
-                 size_t           size_per_head,
-                 size_t           expert_num,
-                 size_t           inter_size,
-                 cudaStream_t     stream,
-                 cublasMMWrapper* cublas_wrapper,
-                 IAllocator*      allocator,
-                 bool             is_free_buffer_after_forward,
-                 bool             sparse               = false,
-                 int              int8_mode            = 0,
-                 bool             use_gated_activation = false);
-
-    ReluFfnLayer(ReluFfnLayer<T> const& ffn_layer);
-
-    virtual ~ReluFfnLayer() = default;
-
-protected:
-    using FfnLayer<T>::stream_;
-    virtual ActivationType getActivationType() const override
-    {
-        return ActivationType::Relu;
-    };
-
-private:
-    using FfnLayer<T>::inter_buf_;
-    using FfnLayer<T>::inter_buf_2_;
-    using FfnLayer<T>::inter_size_;
-};
-
-template<typename T>
-class SiluFfnLayer: public FfnLayer<T> {
-public:
-    SiluFfnLayer(size_t           max_batch_size,
-                 size_t           max_seq_len,
-                 size_t           head_num,
-                 size_t           size_per_head,
-                 size_t           expert_num,
-                 size_t           inter_size,
-                 cudaStream_t     stream,
-                 cublasMMWrapper* cublas_wrapper,
-                 IAllocator*      allocator,
-                 bool             is_free_buffer_after_forward,
-                 bool             sparse               = false,
-                 bool             use_gated_activation = false);
-
-    SiluFfnLayer(SiluFfnLayer<T> const& ffn_layer);
-
-    virtual ~SiluFfnLayer() = default;
-
-protected:
-    using FfnLayer<T>::stream_;
-    virtual ActivationType getActivationType() const override
-    {
-        return ActivationType::Silu;
-    };
-
-private:
-    using FfnLayer<T>::inter_buf_;
-    using FfnLayer<T>::inter_buf_2_;
-    using FfnLayer<T>::inter_size_;
-};
-
-}  // namespace turbomind
diff --git a/src/turbomind/layers/FfnWeight.h b/src/turbomind/layers/FfnWeight.h
deleted file mode 100644
index 5e91615c104831b959608bd40fa849235c22ad8b..0000000000000000000000000000000000000000
--- a/src/turbomind/layers/FfnWeight.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "DenseWeight.h"
-
-namespace turbomind {
-
-template<typename T1, typename T2 = T1>
-struct FfnWeight {
-    DenseWeight<T1, T2> gating_weight;
-    DenseWeight<T1, T2> intermediate_weight;
-    DenseWeight<T1, T2> intermediate_weight2;  // for gated activation
-    DenseWeight<T1, T2> output_weight;
-    DenseWeight<T1, T2> ia3_weight;
-};
-
-}  // namespace turbomind
diff --git a/src/turbomind/layers/attention_layers/AttentionWeight.h b/src/turbomind/layers/attention_layers/AttentionWeight.h
deleted file mode 100644
index 46d7bf3e89b76d687926ff59f225983f99dccde1..0000000000000000000000000000000000000000
--- a/src/turbomind/layers/attention_layers/AttentionWeight.h
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "src/turbomind/layers/DenseWeight.h"
-
-namespace turbomind {
-
-template<typename T1, typename T2 = T1>
-struct AttentionWeight {
-    DenseWeight<T1, T2> query_weight;
-    DenseWeight<T1, T2> key_weight;
-    DenseWeight<T1, T2> value_weight;
-    DenseWeight<T1, T2> attention_output_weight;
-    DenseWeight<T1, T2> ia3_key_weight;
-    DenseWeight<T1, T2> ia3_value_weight;
-};
-
-}  // namespace turbomind
diff --git a/src/turbomind/layers/attention_layers/BaseAttentionLayer.h b/src/turbomind/layers/attention_layers/BaseAttentionLayer.h
deleted file mode 100644
index db9972ab657b2d3fdc6bf0cc716e80da67a9d760..0000000000000000000000000000000000000000
--- a/src/turbomind/layers/attention_layers/BaseAttentionLayer.h
+++ /dev/null
@@ -1,163 +0,0 @@
-/*
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include <assert.h>
-#include <vector>
-
-// #include "3rdparty/trt_fused_multihead_attention/fused_multihead_attention_common.h"
-#include "src/turbomind/layers/BaseLayer.h"
-#include "src/turbomind/layers/attention_layers/AttentionWeight.h"
-#include "src/turbomind/utils/Tensor.h"
-#include "src/turbomind/utils/allocator.h"
-#include "src/turbomind/utils/cublasMMWrapper.h"
-#include "src/turbomind/utils/cuda_fp8_utils.h"
-#include "src/turbomind/utils/memory_utils.h"
-
-namespace turbomind {
-
-enum class AttentionType
-{
-    UNFUSED_MHA,
-    UNFUSED_PADDED_MHA,
-    FUSED_MHA,
-    FUSED_PADDED_MHA
-};
-
-/* NOTE:
-1. only swin-style relative position bias is supported currently
-2. gpt-style (causal-mask) models support any-sequence-length fmha, so we don't need to call isValidSeqLen at run-time
-3. bert/vit can also support any-seq-length fmha
-*/
-template<typename T>
-AttentionType getAttentionType(size_t     size_per_head,
-                               const int  sm,
-                               const bool remove_padding,
-                               const int  max_seq_len,
-                               const bool is_fuse                          = true,
-                               const bool with_swin_relative_position_bias = false,
-                               const bool causal_mask                      = false)
-{
-
-    if (std::is_same<T, half>::value && is_fuse) {
-        // Bert/Vit
-        if (!causal_mask) {
-            if (!with_swin_relative_position_bias
-                && (((sm == kSM_70 || sm == kSM_72) && size_per_head == 64)
-                    || ((sm == kSM_75 || sm == kSM_80 || sm == kSM_86)
-                        && (size_per_head == 64 || size_per_head == 32)))) {
-                return remove_padding ? AttentionType::FUSED_MHA : AttentionType::FUSED_PADDED_MHA;
-            }
-            else if (with_swin_relative_position_bias && (sm == kSM_75 || sm == kSM_80 || sm == kSM_86)
-                     && max_seq_len <= 256 && size_per_head == 32) {
-                return remove_padding ? AttentionType::FUSED_MHA : AttentionType::FUSED_PADDED_MHA;
-            }
-        }
-        // GPT and its variants
-        else {
-            // FMHA_ENABLE only affects gpt-style models (causal-mask)
-            char* fused_qkv = std::getenv("FMHA_ENABLE");
-            if (fused_qkv != nullptr && std::string(fused_qkv) == "ON") {
-                if ((sm == kSM_70 || sm == kSM_72 || sm == kSM_75 || sm == kSM_80 || sm == kSM_86 || sm == kSM_89)
-                    && (size_per_head == 32 || size_per_head == 40 || size_per_head == 64 || size_per_head == 80
-                        || size_per_head == 128 || size_per_head == 144 || size_per_head == 160
-                        || size_per_head == 256)) {
-                    return remove_padding ? AttentionType::FUSED_MHA : AttentionType::UNFUSED_PADDED_MHA;
-                }
-            }
-        }
-    }
-#ifdef ENABLE_FP8
-    else if (std::is_same<T, __nv_fp8_e4m3>::value && is_fuse) {
-        if (!causal_mask) {
-            if ((sm == kSM_89 || sm == kSM_90) && max_seq_len < 512 && is_fuse && size_per_head == 64) {
-                return remove_padding ? AttentionType::FUSED_MHA : AttentionType::FUSED_PADDED_MHA;
-            }
-            else {
-                return remove_padding ? AttentionType::UNFUSED_MHA : AttentionType::UNFUSED_PADDED_MHA;
-            }
-        }
-    }
-#endif
-
-    return remove_padding ? AttentionType::UNFUSED_MHA : AttentionType::UNFUSED_PADDED_MHA;
-}
-
-template<typename T>
-AttentionType getAttentionTypeINT8(
-    size_t size_per_head, const int sm, const bool remove_padding, const int max_seq_len, const int int8_mode)
-{
-    if ((int8_mode == 1 || int8_mode == 2)
-        && (((sm == kSM_80 || sm == kSM_86) && (size_per_head == 64 || size_per_head == 32) && max_seq_len <= 512)
-            || (sm == kSM_75
-                && ((size_per_head == 64 && max_seq_len <= 384) || (size_per_head == 32 && max_seq_len <= 512))))) {
-        return remove_padding ? AttentionType::FUSED_MHA : AttentionType::FUSED_PADDED_MHA;
-    }
-    else {
-        return remove_padding ? AttentionType::UNFUSED_MHA : AttentionType::UNFUSED_PADDED_MHA;
-    }
-}
-
-inline bool isFusedMHA(AttentionType attention_type)
-{
-    return attention_type == AttentionType::FUSED_MHA || attention_type == AttentionType::FUSED_PADDED_MHA;
-}
-
-inline bool isUnPaddedMHA(AttentionType attention_type)
-{
-    return attention_type == AttentionType::FUSED_MHA || attention_type == AttentionType::UNFUSED_MHA;
-}
-
-inline bool isPaddedMHA(AttentionType attention_type)
-{
-    return attention_type == AttentionType::FUSED_PADDED_MHA || attention_type == AttentionType::UNFUSED_PADDED_MHA;
-}
-
-inline AttentionType getUnfusedAttentionType(AttentionType attention_type)
-{
-    if (attention_type == AttentionType::FUSED_MHA) {
-        return AttentionType::UNFUSED_MHA;
-    }
-    else if (attention_type == AttentionType::FUSED_PADDED_MHA) {
-        return AttentionType::UNFUSED_PADDED_MHA;
-    }
-    return attention_type;
-}
-
-template<typename T>
-class BaseAttentionLayer: public BaseLayer {
-
-public:
-    virtual void
-    forward(TensorMap* output_tensors, TensorMap* input_tensors, const AttentionWeight<T>* attention_weights) = 0;
-
-    BaseAttentionLayer(cudaStream_t     stream,
-                       cublasMMWrapper* cublas_wrapper,
-                       IAllocator*      allocator,
-                       bool             is_free_buffer_after_forward,
-                       bool             sparse = false):
-        BaseLayer(stream, cublas_wrapper, allocator, is_free_buffer_after_forward, nullptr, sparse)
-    {
-    }
-    virtual ~BaseAttentionLayer() = default;
-    virtual bool isValidSeqLen(const size_t seq_len)
-    {
-        return true;
-    }
-};
-
-}  // namespace turbomind
diff --git a/src/turbomind/layers/attention_layers/CMakeLists.txt b/src/turbomind/layers/attention_layers/CMakeLists.txt
deleted file mode 100644
index 0d1a96fef343cf30231e3db9550f6b989e667375..0000000000000000000000000000000000000000
--- a/src/turbomind/layers/attention_layers/CMakeLists.txt
+++ /dev/null
@@ -1,15 +0,0 @@
-# Copyright (c) 2019-2023, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-cmake_minimum_required(VERSION 3.8)
diff --git a/src/turbomind/layers/sampling_layers/BaseSamplingLayer.cc b/src/turbomind/layers/sampling_layers/BaseSamplingLayer.cc
deleted file mode 100644
index c8c36f65dad3ff4cb8ae3922f04b068367a248f5..0000000000000000000000000000000000000000
--- a/src/turbomind/layers/sampling_layers/BaseSamplingLayer.cc
+++ /dev/null
@@ -1,365 +0,0 @@
-/*
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.  All rights reserved.
- * Copyright (c) 2021, NAVER Corp.  Authored by CLOVA.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "src/turbomind/layers/sampling_layers/BaseSamplingLayer.h"
-#include "src/turbomind/kernels/sampling_penalty_kernels.h"
-#include "src/turbomind/kernels/sampling_topk_kernels.h"
-#include "src/turbomind/macro.h"
-#include "src/turbomind/utils/cuda_utils.h"
-#include "src/turbomind/utils/memory_utils.h"
-
-#include <algorithm>
-
-namespace turbomind {
-
-template<typename T>
-void BaseSamplingLayer<T>::allocateBuffer(size_t batch_size, Tensor top_k, Tensor top_p)
-{
-    TM_LOG_DEBUG(__PRETTY_FUNCTION__);
-    curandstate_buf_ = reinterpret_cast<curandState_t*>(
-        allocator_->reMalloc(curandstate_buf_, sizeof(curandState_t) * batch_size, false));
-    random_seeds_buf_ = reinterpret_cast<unsigned long long*>(
-        allocator_->reMalloc(random_seeds_buf_, sizeof(unsigned long long) * batch_size, false));
-    temperature_buf_ =
-        reinterpret_cast<float*>(allocator_->reMalloc(temperature_buf_, sizeof(float) * batch_size, false));
-    repetition_penalty_buf_ =
-        reinterpret_cast<float*>(allocator_->reMalloc(repetition_penalty_buf_, sizeof(float) * batch_size, false));
-    min_lengths_buf_ = reinterpret_cast<int*>(allocator_->reMalloc(min_lengths_buf_, sizeof(int) * batch_size, false));
-    runtime_logits_buf_ = reinterpret_cast<T*>(
-        allocator_->reMalloc(runtime_logits_buf_, sizeof(T) * batch_size * vocab_size_padded_, false));
-    skip_decode_buf_ =
-        reinterpret_cast<bool*>(allocator_->reMalloc(skip_decode_buf_, sizeof(bool) * batch_size, false));
-
-    // host buffers.
-    temperature_        = (float*)std::realloc((void*)temperature_, batch_size * sizeof(float));
-    repetition_penalty_ = (float*)std::realloc((void*)repetition_penalty_, batch_size * sizeof(float));
-    min_lengths_        = (int*)std::realloc((void*)min_lengths_, batch_size * sizeof(int));
-    skip_decode_        = (bool*)std::realloc((void*)skip_decode_, batch_size * sizeof(bool));
-
-    is_allocate_buffer_ = true;
-}
-
-template<typename T>
-void BaseSamplingLayer<T>::freeBuffer()
-{
-    TM_LOG_DEBUG(__PRETTY_FUNCTION__);
-    if (is_allocate_buffer_) {
-        allocator_->free((void**)(&curandstate_buf_));
-        allocator_->free((void**)(&random_seeds_buf_));
-        allocator_->free((void**)(&temperature_buf_));
-        allocator_->free((void**)(&repetition_penalty_buf_));
-        allocator_->free((void**)(&min_lengths_buf_));
-        allocator_->free((void**)(&runtime_logits_buf_));
-        allocator_->free((void**)(&skip_decode_buf_));
-        std::free(temperature_);
-        std::free(repetition_penalty_);
-        std::free(min_lengths_);
-        std::free(skip_decode_);
-        is_allocate_buffer_ = false;
-    }
-}
-
-template<typename T>
-BaseSamplingLayer<T>::BaseSamplingLayer(size_t             max_batch_size,
-                                        size_t             vocab_size,
-                                        size_t             vocab_size_padded,
-                                        int                end_id,
-                                        size_t             top_k,
-                                        float              top_p,
-                                        unsigned long long random_seed,
-                                        float              temperature,
-                                        float              len_penalty,
-                                        float              repetition_penalty,
-                                        cudaStream_t       stream,
-                                        cublasMMWrapper*   cublas_wrapper,
-                                        IAllocator*        allocator,
-                                        bool               is_free_buffer_after_forward,
-                                        cudaDeviceProp*    cuda_device_prop):
-    DynamicDecodeBaseLayer(stream, cublas_wrapper, allocator, is_free_buffer_after_forward, cuda_device_prop),
-    vocab_size_(vocab_size),
-    vocab_size_padded_(vocab_size_padded)
-{
-}
-
-template<typename T>
-BaseSamplingLayer<T>::BaseSamplingLayer(BaseSamplingLayer const& sampling_layer):
-    DynamicDecodeBaseLayer(sampling_layer),
-    vocab_size_(sampling_layer.vocab_size_),
-    vocab_size_padded_(sampling_layer.vocab_size_padded_),
-    sampling_workspace_size_(sampling_layer.sampling_workspace_size_)
-{
-}
-
-template<typename T>
-BaseSamplingLayer<T>::~BaseSamplingLayer()
-{
-}
-
-template<typename T>
-void BaseSamplingLayer<T>::setup(const size_t batch_size, const size_t beam_width, TensorMap* runtime_args)
-{
-    // Set up the sampling layer for given runtime arguments.
-    //
-    // runtime_args:
-    //     runtime_top_k [1] or [batch_size] on cpu, optional.
-    //     runtime_top_p [1] or [batch_size] on cpu, optional
-    //     temperature [1] or [batch_size] on cpu, optional
-    //     repetition_penalty [1] or [batch_size] on cpu, optional
-    //     presence_penalty [1] or [batch_size] on cpu, optional,
-    //         repetition_penalty and presence_penalty are mutually exclusive.
-    //     min_length [1] or [batch_size] on cpu, optional
-
-    TM_LOG_DEBUG(__PRETTY_FUNCTION__);
-    Tensor runtime_top_k = runtime_args->isExist("runtime_top_k") ? runtime_args->at("runtime_top_k") : Tensor();
-    Tensor runtime_top_p = runtime_args->isExist("runtime_top_p") ? runtime_args->at("runtime_top_p") : Tensor();
-    allocateBuffer(batch_size, runtime_top_k, runtime_top_p);
-
-    // If runtime argument has single random seed, using this random seed to initialize the random table of all
-    // sentences. If the argument has [batch_size] random seeds, initializing the random table by different random seeds
-    // respectively. If no random seed, initialize the random table of all sentences by 0 directly.
-    if (runtime_args->isExist("random_seed")) {
-        Tensor random_seeds = runtime_args->at("random_seed");
-        FT_CHECK_WITH_INFO(random_seeds.shape.size() == 1
-                               && (random_seeds.size() == 1 || random_seeds.size() == batch_size),
-                           fmtstr("random_seeds must be of shape [1] or [batch_size(%ld)], got random_seeds.shape=%s",
-                                  batch_size,
-                                  vec2str(random_seeds.shape).c_str()));
-        if (random_seeds.size() == 1) {
-            invokeCurandInitialize(curandstate_buf_, batch_size, random_seeds.getVal<unsigned long long>(), stream_);
-            sync_check_cuda_error();
-        }
-        else {
-            unsigned long long* random_seed_ptr = random_seeds.getPtr<unsigned long long>();
-            cudaAutoCpy(random_seeds_buf_, random_seed_ptr, batch_size, stream_);
-            invokeCurandBatchInitialize(curandstate_buf_, batch_size, random_seeds_buf_, stream_);
-            sync_check_cuda_error();
-        }
-    }
-    else {
-        // Initialize curand states using the default seed 0.
-        invokeCurandInitialize(curandstate_buf_, batch_size, 0, stream_);
-    }
-
-    // Setup penalties.
-    const float default_temperature = 1.0f;
-    Tensor      temperature         = runtime_args->isExist("temperature") ?
-                                          runtime_args->at("temperature") :
-                                          Tensor(MEMORY_CPU, TYPE_FP32, {1}, &default_temperature);
-    if (temperature.size() == 1) {
-        float tp = temperature.getVal<float>();
-        deviceFill(temperature_buf_, batch_size, tp, stream_);
-        std::fill_n(temperature_, batch_size, tp);
-    }
-    else {
-        cudaAutoCpy(temperature_buf_, temperature.getPtr<float>(), batch_size, stream_);
-        std::copy_n(temperature.getPtr<float>(), batch_size, temperature_);
-    }
-
-    if (runtime_args->isExist("repetition_penalty") || runtime_args->isExist("presence_penalty")) {
-        FT_CHECK_WITH_INFO(
-            !(runtime_args->isExist("repetition_penalty") && runtime_args->isExist("presence_penalty")),
-            "Found ambiguous parameters repetition_penalty and presence_penalty which are mutually exclusive. "
-            "Please provide one of repetition_penalty or presence_penalty.");
-        repetition_penalty_type_ = runtime_args->isExist("repetition_penalty") ? RepetitionPenaltyType::Multiplicative :
-                                                                                 RepetitionPenaltyType::Additive;
-        Tensor repetition_penalty = repetition_penalty_type_ == RepetitionPenaltyType::Multiplicative ?
-                                        runtime_args->at("repetition_penalty") :
-                                        runtime_args->at("presence_penalty");
-        if (repetition_penalty.size() == 1) {
-            float rp = repetition_penalty.getVal<float>();
-            deviceFill(repetition_penalty_buf_, batch_size, rp, stream_);
-            std::fill_n(repetition_penalty_, batch_size, rp);
-        }
-        else {
-            cudaAutoCpy(repetition_penalty_buf_, repetition_penalty.getPtr<float>(), batch_size, stream_);
-            std::copy_n(repetition_penalty.getPtr<float>(), batch_size, repetition_penalty_);
-        }
-    }
-    else {
-        repetition_penalty_type_ = RepetitionPenaltyType::None;
-    }
-
-    const int default_min_length = 0;
-    Tensor    min_lengths = runtime_args->at("min_length", Tensor(MEMORY_CPU, TYPE_INT32, {1}, &default_min_length));
-    if (min_lengths.size() == 1) {
-        int minlen = min_lengths.getVal<int>();
-        deviceFill(min_lengths_buf_, batch_size, minlen, stream_);
-        std::fill_n(min_lengths_, batch_size, minlen);
-    }
-    else {
-        cudaAutoCpy(min_lengths_buf_, min_lengths.getPtr<int>(), batch_size, stream_);
-        std::copy_n(min_lengths.getPtr<int>(), batch_size, min_lengths_);
-    }
-}
-
-template<typename T>
-void BaseSamplingLayer<T>::forward(std::vector<Tensor>* output_tensors, const std::vector<Tensor>* input_tensors)
-{
-    // input_tensors:
-    //      logits [local_batch_size, vocab_size_padded]
-    //      embedding_bias [vocab_size_padded]
-    //      step [1] on cpu
-    //      max_input_length [1] on cpu
-    //      input_lengths [local_batch_size]
-    //      ite [1] on cpu
-    //      random_seed [1] on cpu, optional
-
-    // output_tensors:
-    //      output_ids [max_seq_len, batch_size]
-    //      finished [local_batch_size]
-    //      sequence_length [local_batch_size]
-    //      cum_log_probs [local_batch_size], must be float*
-
-    FT_CHECK(false);  // TODO deprecated, need to remove
-    std::unordered_map<std::string, Tensor> input_tensors_map{{"logits", input_tensors->at(0)},
-                                                              {"embedding_bias", input_tensors->at(1)},
-                                                              {"step", input_tensors->at(2)},
-                                                              {"max_input_length", input_tensors->at(3)},
-                                                              {"input_lengths", input_tensors->at(4)},
-                                                              {"ite", input_tensors->at(5)}};
-    if (input_tensors->size() == 7) {
-        input_tensors_map.insert({"random_seed", input_tensors->at(6)});
-    }
-
-    std::unordered_map<std::string, Tensor> output_tensors_map{{"output_ids", output_tensors->at(0)},
-                                                               {"finished", output_tensors->at(1)},
-                                                               {"sequence_length", output_tensors->at(2)},
-                                                               {"cum_log_probs", output_tensors->at(3)}};
-    forward(&output_tensors_map, &input_tensors_map);
-}
-
-template<typename T>
-void BaseSamplingLayer<T>::forward(std::unordered_map<std::string, Tensor>*       output_tensors,
-                                   const std::unordered_map<std::string, Tensor>* input_tensors)
-{
-    TM_LOG_DEBUG("%s", __PRETTY_FUNCTION__);
-    TensorMap input_map(*input_tensors);
-    TensorMap output_map(*output_tensors);
-    forward(&output_map, &input_map);
-}
-
-template<typename T>
-void BaseSamplingLayer<T>::forward(TensorMap* output_tensors, TensorMap* input_tensors)
-{
-    // input_tensors:
-    //      logits [local_batch_size, vocab_size_padded]
-    //      embedding_bias [vocab_size_padded], optional
-    //      step [1] on cpu
-    //      max_input_length [1] on cpu
-    //      input_lengths [local_batch_size], optional
-    //      ite [1] on cpu
-    //      end_id [local_batch_size], optional
-
-    // output_tensors:
-    //      output_ids [max_seq_len, batch_size]
-    //      finished [local_batch_size], optional
-    //      sequence_length [local_batch_size], optional
-    //      cum_log_probs [batch_size], must be float*, optional
-    //          The cumultative log probability of generated tokens.
-    //      output_log_probs [local_batch_size], must be float*, optional
-    //          The log probs at the current step.
-
-    TM_LOG_DEBUG("%s start", __PRETTY_FUNCTION__);
-    FT_CHECK(input_tensors->size() >= 4);
-    FT_CHECK(output_tensors->size() >= 1);
-    const int batch_size       = output_tensors->at("output_ids").shape[1];
-    const int local_batch_size = input_tensors->at("logits").shape[0];
-    const int step             = input_tensors->at("step").getVal<int>();
-    const int ite              = input_tensors->at("ite").getVal<int>();
-    const int max_input_length = input_tensors->at("max_input_length").getVal<int>();
-    T*        logits           = input_tensors->at("logits").getPtr<T>();
-
-#define ALL_OF(p_, sz_, dt_, v_) (std::all_of(p_, p_ + sz_, [&](dt_ b) { return b == v_; }))
-
-    bool* skip_decode = skip_decode_ + ite * local_batch_size;
-    if (ALL_OF(skip_decode, local_batch_size, bool, true)) {
-        // No sample in the current batch to do TopX sampling.
-        return;
-    }
-    skip_any_ = std::any_of(skip_decode, skip_decode + local_batch_size, [](bool b) { return b; });
-    if (skip_any_) {
-        // A TopX Sampling layer directly changes the logit values. In case of skip_any==true,
-        // meaning topk and topp layers will run simultaneously for a batch in the same step.
-        // We copy the logits to an internal buffer, not affecting the other sampling layers.
-        FT_CHECK(input_tensors->at("logits").size() == local_batch_size * vocab_size_padded_);
-        cudaD2Dcpy(runtime_logits_buf_, logits, input_tensors->at("logits").size());
-        logits = runtime_logits_buf_;
-    }
-
-    const T* embedding_bias =
-        input_tensors->isExist("embedding_bias") ? input_tensors->at("embedding_bias").getPtr<T>() : nullptr;
-    if (embedding_bias != nullptr || !ALL_OF(temperature_ + ite * local_batch_size, local_batch_size, float, 1.0f)) {
-        invokeBatchApplyTemperaturePenalty(logits,
-                                           embedding_bias,
-                                           temperature_buf_ + ite * local_batch_size,
-                                           local_batch_size,
-                                           vocab_size_,
-                                           vocab_size_padded_,
-                                           stream_);
-    }
-    sync_check_cuda_error();
-
-    if (step > 1 && repetition_penalty_type_ != RepetitionPenaltyType::None) {
-        float default_value = getDefaultPenaltyValue(repetition_penalty_type_);
-        if (!ALL_OF(repetition_penalty_ + ite * local_batch_size, local_batch_size, float, default_value)) {
-            invokeBatchApplyRepetitionPenalty(
-                logits,
-                repetition_penalty_buf_ + ite * local_batch_size,
-                output_tensors->at("output_ids").getPtrWithOffset<int>(ite * local_batch_size),
-                batch_size,
-                local_batch_size,
-                vocab_size_padded_,
-                input_tensors->at("input_lengths", Tensor{MEMORY_GPU, TYPE_INT32, {}, nullptr}).getPtr<int>(),
-                max_input_length,
-                step,
-                repetition_penalty_type_,
-                stream_);
-            sync_check_cuda_error();
-        }
-    }
-
-    const int  num_generated_tokens      = step - max_input_length;
-    const int* min_lengths               = min_lengths_ + ite * local_batch_size;
-    const bool invoke_min_length_penalty = std::any_of(
-        min_lengths, min_lengths + local_batch_size, [&](int min_length) { return min_length > num_generated_tokens; });
-    if (invoke_min_length_penalty) {
-        FT_CHECK_WITH_INFO(input_tensors->isExist("end_id"), "Need end_id to apply min length penlaty");
-        invokeMinLengthPenalty(logits,
-                               min_lengths_buf_ + ite * local_batch_size,
-                               input_tensors->getPtr<const int>("end_id"),
-                               output_tensors->getPtr<const int>("sequence_length"),
-                               max_input_length,
-                               local_batch_size,
-                               vocab_size_padded_,
-                               stream_);
-        sync_check_cuda_error();
-    }
-#undef ALL_OF
-
-    runSampling(output_tensors, input_tensors);
-
-    if (is_free_buffer_after_forward_) {
-        freeBuffer();
-    }
-    sync_check_cuda_error();
-    TM_LOG_DEBUG("%s stop", __PRETTY_FUNCTION__);
-}
-
-template class BaseSamplingLayer<float>;
-template class BaseSamplingLayer<half>;
-
-}  // namespace turbomind
diff --git a/src/turbomind/layers/sampling_layers/BaseSamplingLayer.h b/src/turbomind/layers/sampling_layers/BaseSamplingLayer.h
deleted file mode 100644
index 71dff01834c23bb4ace81f19f4fc8c7b651211f3..0000000000000000000000000000000000000000
--- a/src/turbomind/layers/sampling_layers/BaseSamplingLayer.h
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.  All rights reserved.
- * Copyright (c) 2021, NAVER Corp.  Authored by CLOVA.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include <curand_kernel.h>
-
-#include "src/turbomind/kernels/penalty_types.h"
-#include "src/turbomind/layers/DynamicDecodeBaseLayer.h"
-
-namespace turbomind {
-
-template<typename T>
-class BaseSamplingLayer: public DynamicDecodeBaseLayer {
-private:
-    bool isValidBatchSize(size_t batch_size);
-
-protected:
-    size_t vocab_size_;
-    size_t vocab_size_padded_;
-
-    size_t              sampling_workspace_size_;
-    void*               sampling_workspace_ = nullptr;
-    curandState_t*      curandstate_buf_    = nullptr;
-    unsigned long long* random_seeds_buf_   = nullptr;
-
-    float* temperature_buf_        = nullptr;
-    float* repetition_penalty_buf_ = nullptr;
-    int*   min_lengths_buf_        = nullptr;
-    bool*  skip_decode_buf_        = nullptr;
-    T*     runtime_logits_buf_     = nullptr;
-
-    float* temperature_        = nullptr;
-    float* repetition_penalty_ = nullptr;
-    int*   min_lengths_        = nullptr;
-    bool*  skip_decode_        = nullptr;
-    bool   skip_any_           = false;
-
-    RepetitionPenaltyType repetition_penalty_type_ = RepetitionPenaltyType::None;
-
-    virtual void runSampling(TensorMap* output_tensors, TensorMap* input_tensors) = 0;
-
-    virtual void freeBuffer();
-    virtual void allocateBuffer() = 0;
-    virtual void allocateBuffer(size_t batch_size, Tensor top_k, Tensor top_p);
-
-public:
-    curandState_t* curandstate_buf()
-    {
-        return curandstate_buf_;
-    }
-
-    BaseSamplingLayer(size_t             max_batch_size,
-                      size_t             vocab_size,
-                      size_t             vocab_size_padded,
-                      int                end_id,
-                      size_t             top_k,
-                      float              top_p,
-                      unsigned long long random_seed,  // TODO(bhsueh) delete
-                      float              temperature,
-                      float              len_penalty,
-                      float              repetition_penalty,
-                      cudaStream_t       stream,
-                      cublasMMWrapper*   cublas_wrapper,
-                      IAllocator*        allocator,
-                      bool               is_free_buffer_after_forward,
-                      cudaDeviceProp*    cuda_device_prop);
-
-    BaseSamplingLayer(BaseSamplingLayer const& sampling_layer);
-
-    ~BaseSamplingLayer();
-
-    void setup(const size_t batch_size, const size_t beam_width, TensorMap* runtime_args) override;
-    void forward(std::vector<turbomind::Tensor>*       output_tensors,
-                 const std::vector<turbomind::Tensor>* input_tensors) override;
-    void forward(std::unordered_map<std::string, Tensor>*       output_tensors,
-                 const std::unordered_map<std::string, Tensor>* input_tensors) override;
-    void forward(TensorMap* output_tensors, TensorMap* input_tensors) override;
-};
-
-}  // namespace turbomind
diff --git a/src/turbomind/layers/sampling_layers/CMakeLists.txt b/src/turbomind/layers/sampling_layers/CMakeLists.txt
deleted file mode 100644
index b8e0b9d72f938807bad5224c127cb54177da4bf6..0000000000000000000000000000000000000000
--- a/src/turbomind/layers/sampling_layers/CMakeLists.txt
+++ /dev/null
@@ -1,36 +0,0 @@
-# Copyright (c) 2019-2023, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-cmake_minimum_required(VERSION 3.8)
-
-#find_package(CUDAToolkit REQUIRED)
-find_package(CUDA REQUIRED)
-
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC")
-set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -fPIC")
-
-add_library(BaseSamplingLayer STATIC BaseSamplingLayer.cc)
-#set_property(TARGET BaseSamplingLayer PROPERTY POSITION_INDEPENDENT_CODE  ON)
-#set_property(TARGET BaseSamplingLayer PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS  ON)
-target_link_libraries(BaseSamplingLayer PUBLIC cudart sampling_penalty_kernels memory_utils)
-
-add_library(TopKSamplingLayer STATIC TopKSamplingLayer.cu)
-#set_property(TARGET TopKSamplingLayer PROPERTY POSITION_INDEPENDENT_CODE  ON)
-#set_property(TARGET TopKSamplingLayer PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS  ON)
-target_link_libraries(TopKSamplingLayer PUBLIC cudart BaseSamplingLayer sampling_topk_kernels)
-
-add_library(TopPSamplingLayer STATIC TopPSamplingLayer.cu)
-#set_property(TARGET TopPSamplingLayer PROPERTY POSITION_INDEPENDENT_CODE  ON)
-#set_property(TARGET TopPSamplingLayer PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS  ON)
-target_link_libraries(TopPSamplingLayer PUBLIC cudart BaseSamplingLayer sampling_topk_kernels sampling_topp_kernels)
diff --git a/src/turbomind/layers/sampling_layers/TopKSamplingLayer.cu b/src/turbomind/layers/sampling_layers/TopKSamplingLayer.cu
deleted file mode 100644
index 614b1a68ce3b676503c8ddf8596335ac085d0df1..0000000000000000000000000000000000000000
--- a/src/turbomind/layers/sampling_layers/TopKSamplingLayer.cu
+++ /dev/null
@@ -1,319 +0,0 @@
-/*
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.  All rights reserved.
- * Copyright (c) 2021, NAVER Corp.  Authored by CLOVA.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <float.h>
-
-#include "src/turbomind/kernels/sampling_topk_kernels.h"
-#include "src/turbomind/kernels/sampling_topp_kernels.h"
-#include "src/turbomind/layers/sampling_layers/TopKSamplingLayer.h"
-#include "src/turbomind/macro.h"
-#include "src/turbomind/utils/logger.h"
-#include "src/turbomind/utils/memory_utils.h"
-
-namespace turbomind {
-
-template<uint TOP_K_MAX>
-__global__ void setup_topk_runtime_args(int    batch_size,
-                                        uint   top_k,
-                                        uint*  top_ks,
-                                        int    top_ks_size,
-                                        float  top_p,
-                                        float* top_ps,
-                                        int    top_ps_size,
-                                        bool*  skip_decode)
-{
-    int index = blockIdx.x * blockDim.x + threadIdx.x;
-    for (int i = index; i < batch_size; i += gridDim.x * blockDim.x) {
-        uint  k = top_ks_size > 1 ? top_ks[i] : top_k;
-        float p = top_ps_size > 1 ? top_ps[i] : top_p;
-        if (k == 0 && p == 0.0f) {
-            // FT's topp implementation does not support topp = 0.0f, but it equivalent to greedy search.
-            // So, we set the topk = 1 as an alternative solution.
-            k = 1;
-        }
-        if (k > 0 && p == 0.0f) {
-            // for compatibility <= FT5.0.
-            // This case corresponds to the old topk sampling, which is equivalent to
-            // the old topk_topp sampling with topp=1.0f. TopKSamplingLayer and
-            // TopKTopPSamplingLayer are now merged by TopKSamplingLayer. Thus, we
-            // replace the case topk>0 and topp=0.0f by topk>0 and topp=1.0f for the
-            // compatibility.
-            p = 1.0f;
-        }
-        // Clip k value. A topk sampling kernel supports up to TOP_K_MAX=64.
-        top_ks[i] = k > TOP_K_MAX ? TOP_K_MAX : k;
-        if (k > TOP_K_MAX) {
-            printf("[WARNING] topk (%d) is larger than max supported number (%d) for token %d"
-                   " clip to max supported number %d. \n",
-                   k,
-                   TOP_K_MAX,
-                   i,
-                   top_ks[i]);
-        }
-        // Clip p value if it is out of range. range = [0.0, 1.0].
-        top_ps[i] = p < 0.0f ? 0.0f : (p > 1.0f ? 1.0f : p);
-        if (p < 0.0f || p > 1.0f) {
-            printf("[WARNING] topp (%f) is out of range ([0.0, 1.0f]) for token %d"
-                   " clip to closest number %f.\n",
-                   p,
-                   i,
-                   top_ps[i]);
-        }
-        skip_decode[i] = k == 0;
-    }
-}
-
-template<typename T>
-void TopKSamplingLayer<T>::allocateBuffer()
-{
-    FT_CHECK(false);
-}
-
-template<typename T>
-void TopKSamplingLayer<T>::allocateBuffer(size_t batch_size, Tensor top_k, Tensor top_p)
-{
-    TM_LOG_DEBUG(__PRETTY_FUNCTION__);
-    BaseSamplingLayer<T>::allocateBuffer(batch_size, top_k, top_p);
-    uint max_top_k = top_k.size() > 0 ? top_k.max<uint>() : 1;
-    if (max_top_k == 0) {
-        // for safety. TopKSamplingLayer handles a case of top_k=0 and top_p=0 as
-        // a greedy decode, i.e. top_k=1, although such case has max_top_k=0.
-        max_top_k = 1;
-    }
-    invokeTopKSampling<T>(nullptr,
-                          sampling_workspace_size_,
-                          nullptr,
-                          nullptr,
-                          nullptr,
-                          nullptr,
-                          nullptr,
-                          nullptr,
-                          nullptr,
-                          max_top_k,
-                          1.0f,
-                          vocab_size_padded_,
-                          nullptr,
-                          stream_,
-                          batch_size,
-                          skip_decode_buf_);
-    sampling_workspace_ = allocator_->reMalloc(sampling_workspace_, sampling_workspace_size_, false);
-    runtime_top_k_buf_ =
-        reinterpret_cast<uint*>(allocator_->reMalloc(runtime_top_k_buf_, sizeof(uint) * batch_size, false));
-    runtime_top_p_buf_ =
-        reinterpret_cast<float*>(allocator_->reMalloc(runtime_top_p_buf_, sizeof(float) * batch_size, false));
-    is_allocate_buffer_ = true;
-}
-
-template<typename T>
-void TopKSamplingLayer<T>::freeBuffer()
-{
-    TM_LOG_DEBUG(__PRETTY_FUNCTION__);
-    if (is_allocate_buffer_) {
-        allocator_->free((void**)(&sampling_workspace_));
-        allocator_->free((void**)(&runtime_top_k_buf_));
-        allocator_->free((void**)(&runtime_top_p_buf_));
-    }
-    BaseSamplingLayer<T>::freeBuffer();
-    is_allocate_buffer_ = false;
-}
-
-template<typename T>
-void TopKSamplingLayer<T>::setup(const size_t batch_size, const size_t beam_width, TensorMap* runtime_args)
-{
-    // Setup runtime topk and topp arguments.
-    //
-    // runtime_args:
-    //     runtime_top_k [1] or [batch_size] on cpu, optional, uint.
-    //     runtime_top_p [1] or [batch_size] on cpu, optional, float.
-    //     temperature [1] or [batch_size] on cpu, optional
-    //     repetition_penalty [1] or [batch_size] on cpu, optional
-    TM_LOG_DEBUG(__PRETTY_FUNCTION__);
-    BaseSamplingLayer<T>::setup(batch_size, beam_width, runtime_args);
-
-    uint         tmp_top_k     = 0;
-    const Tensor runtime_top_k = runtime_args->isExist("runtime_top_k") ?
-                                     runtime_args->at("runtime_top_k") :
-                                     Tensor(MEMORY_CPU, TYPE_UINT32, {1}, &tmp_top_k);
-    const Tensor runtime_top_p = runtime_args->isExist("runtime_top_p") ? runtime_args->at("runtime_top_p") : Tensor();
-    const size_t runtime_top_k_size = runtime_top_k.size();
-    const size_t runtime_top_p_size = runtime_top_p.size();
-
-    uint  top_k = runtime_top_k.max<uint>();
-    float top_p = runtime_top_p_size == 0 ? 0.0f : runtime_top_p.getVal<float>();
-
-    if (runtime_top_k_size > 1) {
-        FT_CHECK_WITH_INFO(
-            runtime_top_k.size() == batch_size,
-            fmtstr("runtime_top_k.size() (%d) == batch_size (%d) is not satisfied!", runtime_top_k.size(), batch_size));
-        cudaAutoCpy(runtime_top_k_buf_, runtime_top_k.getPtr<uint>(), batch_size, stream_);
-    }
-    if (runtime_top_p_size > 1) {
-        FT_CHECK_WITH_INFO(
-            runtime_top_p.size() == batch_size,
-            fmtstr("runtime_top_p.size() (%d) == batch_size (%d) is not satisfied!", runtime_top_p.size(), batch_size));
-        cudaAutoCpy(runtime_top_p_buf_, runtime_top_p.getPtr<float>(), batch_size, stream_);
-    }
-
-    dim3 block(std::min((int)batch_size, 256));
-    dim3 grid(div_up((int)batch_size, (int)block.x));
-    // support top_k up to 1024.
-    setup_topk_runtime_args<1024><<<grid, block, 0, stream_>>>(batch_size,
-                                                               top_k,
-                                                               runtime_top_k_buf_,
-                                                               runtime_top_k_size,
-                                                               top_p,
-                                                               runtime_top_p_buf_,
-                                                               runtime_top_p_size,
-                                                               skip_decode_buf_);
-    cudaAutoCpy(skip_decode_, skip_decode_buf_, batch_size, stream_);
-    uint* runtime_top_ks = new uint[batch_size];
-    cudaAutoCpy(runtime_top_ks, runtime_top_k_buf_, batch_size, stream_);
-    runtime_max_top_k_ = static_cast<int>(*std::max_element(runtime_top_ks, runtime_top_ks + batch_size));
-    delete[] runtime_top_ks;
-}
-
-template<typename T>
-void TopKSamplingLayer<T>::runSampling(TensorMap* output_tensors, TensorMap* input_tensors)
-{
-    // input_tensors:
-    //      logits [local_batch_size, vocab_size_padded]
-    //      embedding_bias [vocab_size_padded], optional
-    //      step [1] on cpu
-    //      max_input_length [1] on cpu
-    //      input_lengths [local_batch_size], optional
-    //      ite [1] on cpu
-
-    // output_tensors:
-    //      output_ids [max_seq_len, batch_size]
-    //      finished [local_batch_size], optional
-    //      sequence_length [local_batch_size], optional
-    //      cum_log_probs [batch_size], must be float*, optional
-    //          The cumultative log probability of generated tokens.
-    //      output_log_probs [local_batch_size], must be float*, optional
-    //          The log probs at the current step.
-
-    TM_LOG_DEBUG(__PRETTY_FUNCTION__);
-    FT_CHECK(input_tensors->size() >= 4);
-    FT_CHECK(output_tensors->size() >= 1);
-
-    const int batch_size       = output_tensors->at("output_ids").shape[1];
-    const int local_batch_size = input_tensors->at("logits").shape[0];
-    const int ite              = input_tensors->at("ite").getVal<int>();
-    const int step             = input_tensors->at("step").getVal<int>();
-
-    // in case of skip any, the logit value is already copied and processed.
-    T* logits = !skip_any_ ? input_tensors->at("logits").getPtr<T>() : runtime_logits_buf_;
-
-    invokeAddBiasEndMask(logits,
-                         (T*)(nullptr),
-                         input_tensors->at("end_id").getPtr<const int>(),
-                         output_tensors->at("finished", Tensor{MEMORY_GPU, TYPE_INVALID, {}, nullptr}).getPtr<bool>(),
-                         local_batch_size,
-                         vocab_size_,
-                         vocab_size_padded_,
-                         stream_);
-    sync_check_cuda_error();
-
-    float* cum_log_probs =
-        output_tensors->isExist("cum_log_probs") ? output_tensors->at("cum_log_probs").getPtr<float>() : nullptr;
-    float* output_log_probs =
-        output_tensors->isExist("output_log_probs") ? output_tensors->at("output_log_probs").getPtr<float>() : nullptr;
-
-    if (cum_log_probs != nullptr || output_log_probs != nullptr) {
-        invokeAddBiasSoftMax(
-            logits,
-            (T*)(nullptr),
-            input_tensors->at("end_id").getPtr<int>(),
-            output_tensors->at("finished", Tensor{MEMORY_GPU, TYPE_INVALID, {}, nullptr}).getPtr<bool>(),
-            local_batch_size,
-            vocab_size_padded_,
-            vocab_size_,
-            stream_);
-        sync_check_cuda_error();
-    }
-
-    invokeBatchTopKSampling(
-        sampling_workspace_,
-        sampling_workspace_size_,
-        logits,
-        output_tensors->at("output_ids").getPtrWithOffset<int>(step * batch_size + ite * local_batch_size),
-        output_tensors->at("sequence_length", Tensor{MEMORY_GPU, TYPE_INVALID, {}, nullptr}).getPtr<int>(),
-        output_tensors->at("finished", Tensor{MEMORY_GPU, TYPE_INVALID, {}, nullptr}).getPtr<bool>(),
-        cum_log_probs,
-        output_log_probs,
-        curandstate_buf_ + ite * local_batch_size,
-        (int)runtime_max_top_k_,  // useless because runtime_top_k_buf_ is never nullptr. Keep for legacy.
-        (int*)(runtime_top_k_buf_ + ite * local_batch_size),
-        1.0f,  // useless because runtime_top_p_buf_ is never nullptr. Keep for legacy.
-        runtime_top_p_buf_ + ite * local_batch_size,
-        vocab_size_padded_,
-        input_tensors->at("end_id").getPtr<int>(),
-        stream_,
-        local_batch_size,
-        skip_decode_buf_ + ite * local_batch_size);
-    sync_check_cuda_error();
-}
-
-template<typename T>
-TopKSamplingLayer<T>::TopKSamplingLayer(size_t             max_batch_size,
-                                        size_t             vocab_size,
-                                        size_t             vocab_size_padded,
-                                        int                end_id,
-                                        size_t             top_k,
-                                        unsigned long long random_seed,
-                                        float              temperature,
-                                        float              len_penalty,
-                                        float              repetition_penalty,
-                                        cudaStream_t       stream,
-                                        cublasMMWrapper*   cublas_wrapper,
-                                        IAllocator*        allocator,
-                                        bool               is_free_buffer_after_forward):
-    BaseSamplingLayer<T>(max_batch_size,
-                         vocab_size,
-                         vocab_size_padded,
-                         end_id,
-                         top_k,
-                         0.0f,
-                         random_seed,
-                         temperature,
-                         len_penalty,
-                         repetition_penalty,
-                         stream,
-                         cublas_wrapper,
-                         allocator,
-                         is_free_buffer_after_forward,
-                         nullptr)
-{
-}
-
-template<typename T>
-TopKSamplingLayer<T>::TopKSamplingLayer(TopKSamplingLayer<T> const& top_k_sampling_layer):
-    BaseSamplingLayer<T>(top_k_sampling_layer)
-{
-}
-
-template<typename T>
-TopKSamplingLayer<T>::~TopKSamplingLayer()
-{
-    TM_LOG_DEBUG(__PRETTY_FUNCTION__);
-    freeBuffer();
-}
-
-template class TopKSamplingLayer<float>;
-template class TopKSamplingLayer<half>;
-
-}  // namespace turbomind
diff --git a/src/turbomind/layers/sampling_layers/TopKSamplingLayer.h b/src/turbomind/layers/sampling_layers/TopKSamplingLayer.h
deleted file mode 100644
index 013a6dfccb45dcb27f3014ef42172fcc86f9bd43..0000000000000000000000000000000000000000
--- a/src/turbomind/layers/sampling_layers/TopKSamplingLayer.h
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.  All rights reserved.
- * Copyright (c) 2021, NAVER Corp.  Authored by CLOVA.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "src/turbomind/layers/sampling_layers/BaseSamplingLayer.h"
-#include "src/turbomind/macro.h"
-#include "src/turbomind/utils/memory_utils.h"
-
-namespace turbomind {
-
-template<typename T>
-class TopKSamplingLayer: public BaseSamplingLayer<T> {
-private:
-    void runSampling(TensorMap* output_tensors, TensorMap* input_tensors) override;
-
-    void freeBuffer() override;
-    void allocateBuffer() override;
-    void allocateBuffer(size_t batch_size, Tensor top_k, Tensor top_p) override;
-
-    uint   runtime_max_top_k_ = 1;
-    uint*  runtime_top_k_buf_ = nullptr;
-    float* runtime_top_p_buf_ = nullptr;
-    using BaseSamplingLayer<T>::vocab_size_;
-    using BaseSamplingLayer<T>::vocab_size_padded_;
-
-    using BaseSamplingLayer<T>::sampling_workspace_size_;
-    using BaseSamplingLayer<T>::sampling_workspace_;
-    using BaseSamplingLayer<T>::curandstate_buf_;
-    using BaseSamplingLayer<T>::random_seeds_buf_;
-    using BaseSamplingLayer<T>::skip_decode_buf_;
-    using BaseSamplingLayer<T>::skip_decode_;
-    using BaseSamplingLayer<T>::skip_any_;
-    using BaseSamplingLayer<T>::runtime_logits_buf_;
-
-    using BaseSamplingLayer<T>::stream_;
-    using BaseSamplingLayer<T>::allocator_;
-    using BaseSamplingLayer<T>::is_allocate_buffer_;
-
-protected:
-public:
-    TopKSamplingLayer(size_t             max_batch_size,
-                      size_t             vocab_size,
-                      size_t             vocab_size_padded,
-                      int                end_id,
-                      size_t             top_k,
-                      unsigned long long random_seed,
-                      float              temperature,
-                      float              len_penalty,
-                      float              repetition_penalty,
-                      cudaStream_t       stream,
-                      cublasMMWrapper*   cublas_wrapper,
-                      IAllocator*        allocator,
-                      bool               is_free_buffer_after_forward);
-    TopKSamplingLayer(TopKSamplingLayer<T> const& top_k_sampling_layer);
-    ~TopKSamplingLayer();
-
-    void setup(const size_t batch_size, const size_t beam_width, TensorMap* runtime_args) override;
-};
-
-}  // namespace turbomind
diff --git a/src/turbomind/layers/sampling_layers/TopPSamplingLayer.cu b/src/turbomind/layers/sampling_layers/TopPSamplingLayer.cu
deleted file mode 100644
index 8e7e97314f768324947dbbbf7ce7f3ff48e1bb53..0000000000000000000000000000000000000000
--- a/src/turbomind/layers/sampling_layers/TopPSamplingLayer.cu
+++ /dev/null
@@ -1,394 +0,0 @@
-/*
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.  All rights reserved.
- * Copyright (c) 2021, NAVER Corp.  Authored by CLOVA.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <algorithm>
-#include <float.h>
-
-#include "src/turbomind/kernels/reduce_kernel_utils.cuh"
-#include "src/turbomind/kernels/sampling_topk_kernels.h"
-#include "src/turbomind/kernels/sampling_topp_kernels.h"
-#include "src/turbomind/layers/sampling_layers/TopPSamplingLayer.h"
-#include "src/turbomind/macro.h"
-#include "src/turbomind/utils/logger.h"
-#include "src/turbomind/utils/memory_utils.h"
-
-namespace turbomind {
-
-static __global__ void set_topp_runtime_args(int             batch_size,
-                                             uint            top_k,
-                                             uint*           top_ks,
-                                             int             top_ks_size,
-                                             float           top_p,
-                                             float*          top_ps,
-                                             int             top_ps_size,
-                                             bool*           skip_decode,
-                                             float*          initial_top_p_buf,
-                                             float*          top_p_decay_buf,
-                                             const float*    top_p_decay,
-                                             float*          top_p_min_buf,
-                                             const float*    top_p_min,
-                                             int32_t*        top_p_reset_ids_buf,
-                                             const uint32_t* top_p_reset_ids)
-{
-    /**
-     * @brief Setup the runtime arguments for topp, broadcasting top_p to top_ps
-                and top_k to top_ks, copying top_p_decay/top_p_min/top_p_reset_ids
-                to internal buffers.
-     *
-     * \param batch_size            [batch_size]
-     * \param op_k                  [batch_size]
-     * \param top_ks                [batch_size]
-     * \param top_ks_size           [batch_size]
-     * \param top_p                 [batch_size]
-     * \param top_ps                [batch_size]
-     * \param top_ps_size           [batch_size]
-     * \param skip_decode           [batch_size]
-     * \param initial_top_p_buf     [batch_size]
-     * \param top_p_decay_buf       [batch_size]
-     * \param top_p_decay           [batch_size], optional, must between [0, 1]
-     * \param top_p_min_buf         [batch_size]
-     * \param top_p_min             [batch_size], optional, must between [0, 1]
-     * \param top_p_reset_ids_buf    [batch_size]
-     * \param top_p_reset_ids        [batch_size], optional
-     *
-     */
-
-    int index = blockIdx.x * blockDim.x + threadIdx.x;
-    for (int i = index; i < batch_size; i += gridDim.x * blockDim.x) {
-        uint  k = top_ks_size > 1 ? top_ks[i] : top_k;
-        float p = top_ps_size > 1 ? top_ps[i] : top_p;
-        if (k == 0 && p == 0.0f) {
-            // FT's topp implementation does not support topp = 0.0f, but it equivalent to greedy search.
-            // So, we set the topk = 1 as an alternative solution.
-            k = 1;
-        }
-        top_ks[i] = k;
-        // Clip p value if it is out of range. range = [0.0, 1.0].
-        top_ps[i] = p < 0.0f ? 0.0f : (p > 1.0f ? 1.0f : p);
-        if (p < 0.0f || p > 1.0f) {
-            printf("[WARNING] topp (%f) is out of range ([0.0, 1.0f]) for token %d"
-                   " clip to closest number %f.\n",
-                   p,
-                   i,
-                   top_ps[i]);
-        }
-        skip_decode[i] = k > 0;
-
-        initial_top_p_buf[i] = top_ps[i];
-        top_p_decay_buf[i]   = top_p_decay == nullptr ? 1.0f : top_p_decay[i];
-        if (top_p_decay_buf[i] > 1.0f || top_p_decay_buf[i] <= 0.0f) {
-            printf("[WARNING] top_p_decay_buf (%f) is out of range ([0.0, 1.0f]) for token %d,"
-                   " change to 1.0f.\n",
-                   top_p_decay_buf[i],
-                   i);
-            top_p_decay_buf[i] = 1.0f;
-        }
-        top_p_min_buf[i] = top_p_min == nullptr ? 1e-6f : top_p_min[i];  // prevent topp becoming 0.0
-        if (top_p_min_buf[i] > 1.0f || top_p_min_buf[i] <= 0.0f) {
-            printf("[WARNING] top_p_min_buf (%f) is out of range ([0.0, 1.0f]) for token %d,"
-                   " change to 0.5f.\n",
-                   top_p_min_buf[i],
-                   i);
-            top_p_min_buf[i] = 0.5f;
-        }
-        top_p_reset_ids_buf[i] = (int32_t)(top_p_reset_ids == nullptr ? -1 : top_p_reset_ids[i]);
-    }
-}
-
-template<typename T>
-void TopPSamplingLayer<T>::allocateBuffer()
-{
-    FT_CHECK(false);
-}
-
-template<typename T>
-void TopPSamplingLayer<T>::allocateBuffer(size_t batch_size, Tensor top_k, Tensor top_p)
-{
-    TM_LOG_DEBUG(__PRETTY_FUNCTION__);
-    BaseSamplingLayer<T>::allocateBuffer(batch_size, top_k, top_p);
-    invokeTopPSampling<T>(nullptr,  // workspace
-                          sampling_workspace_size_,
-                          cub_temp_storage_size_,
-                          nullptr,  // output_ids
-                          nullptr,  // sequence_length
-                          nullptr,  // finished_buffer
-                          nullptr,  // cum_log_probs
-                          nullptr,  // output_log_probs
-                          nullptr,  // log_probs
-                          topp_id_vals_buf_,
-                          topp_offset_buf_,
-                          begin_topp_offset_buf_,
-                          curandstate_buf_,
-                          batch_size,
-                          vocab_size_padded_,
-                          nullptr,
-                          top_p.size() > 0 ? top_p.max<float>() : 0.0f,
-                          stream_,
-                          cuda_device_prop_,
-                          skip_decode_buf_);
-    sampling_workspace_ = allocator_->reMalloc(sampling_workspace_, sampling_workspace_size_, true);
-    runtime_top_k_buf_ =
-        reinterpret_cast<uint*>(allocator_->reMalloc(runtime_top_k_buf_, sizeof(uint) * batch_size, false));
-    runtime_top_p_buf_ =
-        reinterpret_cast<float*>(allocator_->reMalloc(runtime_top_p_buf_, sizeof(float) * batch_size, false));
-    initial_top_p_buf_ =
-        reinterpret_cast<float*>(allocator_->reMalloc(initial_top_p_buf_, sizeof(float) * batch_size, false));
-    top_p_decay_buf_ =
-        reinterpret_cast<float*>(allocator_->reMalloc(top_p_decay_buf_, sizeof(float) * batch_size, false));
-    top_p_min_buf_ = reinterpret_cast<float*>(allocator_->reMalloc(top_p_min_buf_, sizeof(float) * batch_size, false));
-    top_p_reset_ids_buf_ =
-        reinterpret_cast<int32_t*>(allocator_->reMalloc(top_p_reset_ids_buf_, sizeof(int32_t) * batch_size, false));
-    topp_id_vals_buf_ = reinterpret_cast<int*>(
-        allocator_->reMalloc(topp_id_vals_buf_, sizeof(int) * batch_size * vocab_size_padded_, false));
-    topp_offset_buf_ =
-        reinterpret_cast<int*>(allocator_->reMalloc(topp_offset_buf_, sizeof(int) * (batch_size + 1), false));
-    begin_topp_offset_buf_ =
-        reinterpret_cast<int*>(allocator_->reMalloc(begin_topp_offset_buf_, sizeof(int) * (batch_size + 1), false));
-    is_allocate_buffer_ = true;
-}
-
-template<typename T>
-void TopPSamplingLayer<T>::freeBuffer()
-{
-    TM_LOG_DEBUG(__PRETTY_FUNCTION__);
-    if (is_allocate_buffer_) {
-        allocator_->free((void**)(&sampling_workspace_));
-        allocator_->free((void**)(&topp_id_vals_buf_));
-        allocator_->free((void**)(&topp_offset_buf_));
-        allocator_->free((void**)(&begin_topp_offset_buf_));
-        allocator_->free((void**)(&runtime_top_k_buf_));
-        allocator_->free((void**)(&runtime_top_p_buf_));
-        allocator_->free((void**)(&initial_top_p_buf_));
-        allocator_->free((void**)(&top_p_decay_buf_));
-        allocator_->free((void**)(&top_p_min_buf_));
-        allocator_->free((void**)(&top_p_reset_ids_buf_));
-    }
-    BaseSamplingLayer<T>::freeBuffer();
-    is_allocate_buffer_ = false;
-}
-
-template<typename T>
-void TopPSamplingLayer<T>::setup(const size_t batch_size, const size_t beam_width, TensorMap* runtime_args)
-{
-    /**
-    * @brief Set up the sampling layer for given runtime arguments.
-
-    * runtime_args:
-    *   \param  runtime_top_k [1] or [batch_size] on cpu, optional.
-    *   \param  runtime_top_p [1] or [batch_size] on cpu, optional
-    *   \param  temperature [1] or [batch_size] on cpu, optional
-    *   \param  repetition_penalty [1] or [batch_size] on cpu, optional
-    *   \param  top_p_decay [batch_size] on gpu, float, optional
-    *   \param  top_p_min [batch_size] on gpu, float, optional
-    *   \param  top_p_reset_ids [batch_size] on gpu, uint32, optional
-    **/
-
-    TM_LOG_DEBUG(__PRETTY_FUNCTION__);
-    BaseSamplingLayer<T>::setup(batch_size, beam_width, runtime_args);
-    const Tensor runtime_top_p = runtime_args->isExist("runtime_top_p") ? runtime_args->at("runtime_top_p") : Tensor();
-    const size_t runtime_top_p_size = runtime_top_p.size();
-    if (runtime_top_p_size == 0) {
-        std::fill_n(skip_decode_, batch_size, true);
-        return;
-    }
-
-    uint         tmp_top_k          = 0;
-    const Tensor runtime_top_k      = runtime_args->isExist("runtime_top_k") ?
-                                          runtime_args->at("runtime_top_k") :
-                                          Tensor(MEMORY_CPU, TYPE_UINT32, {1}, &tmp_top_k);
-    const size_t runtime_top_k_size = runtime_top_k.size();
-
-    uint  top_k = runtime_top_k.getVal<uint>();
-    float top_p = runtime_top_p.getVal<float>();
-
-    if (runtime_top_k_size > 1) {
-        FT_CHECK(runtime_top_k.size() == batch_size);
-        cudaH2Dcpy(runtime_top_k_buf_, runtime_top_k.getPtr<uint>(), batch_size);
-    }
-    if (runtime_top_p_size > 1) {
-        FT_CHECK(runtime_top_p.size() == batch_size);
-        cudaH2Dcpy(runtime_top_p_buf_, runtime_top_p.getPtr<float>(), batch_size);
-    }
-
-    dim3 block(std::min((int)batch_size, 256));
-    dim3 grid(div_up((int)batch_size, (int)block.x));
-
-    const float*    top_p_decay     = runtime_args->getPtr<float>("top_p_decay", nullptr);
-    const float*    top_p_min       = runtime_args->getPtr<float>("top_p_min", nullptr);
-    const uint32_t* top_p_reset_ids = runtime_args->getPtr<uint32_t>("top_p_reset_ids", nullptr);
-    set_topp_runtime_args<<<grid, block, 0, stream_>>>(batch_size,
-                                                       top_k,
-                                                       runtime_top_k_buf_,
-                                                       runtime_top_k_size,
-                                                       top_p,
-                                                       runtime_top_p_buf_,
-                                                       runtime_top_p_size,
-                                                       skip_decode_buf_,
-                                                       initial_top_p_buf_,
-                                                       top_p_decay_buf_,
-                                                       top_p_decay,
-                                                       top_p_min_buf_,
-                                                       top_p_min,
-                                                       top_p_reset_ids_buf_,
-                                                       top_p_reset_ids);
-    sync_check_cuda_error();
-    cudaAutoCpy(skip_decode_, skip_decode_buf_, batch_size, stream_);
-    float* runtime_top_ps = new float[batch_size];
-    cudaAutoCpy(runtime_top_ps, runtime_top_p_buf_, batch_size, stream_);
-    runtime_max_top_p_ = *std::max_element(runtime_top_ps, runtime_top_ps + batch_size);
-    delete[] runtime_top_ps;
-}
-
-template<typename T>
-void TopPSamplingLayer<T>::runSampling(TensorMap* output_tensors, TensorMap* input_tensors)
-{
-    /**
-    * input_tensors:
-    *   \param  logits [local_batch_size, vocab_size_padded]
-    *   \param  embedding_bias [vocab_size_padded], optional
-    *   \param  step [1] on cpu
-    *   \param  max_input_length [1] on cpu
-    *   \param  input_lengths [local_batch_size], optional
-    *   \param  ite [1] on cpu
-
-    * output_tensors:
-    *   \param  output_ids [max_seq_len, batch_size]
-    *   \param  finished [local_batch_size], optional
-    *   \param  sequence_length [local_batch_size], optional
-    *   \param  cum_log_probs [batch_size], must be float*, optional
-    *   \param  The cumultative log probability of generated tokens.
-    *   \param  output_log_probs [local_batch_size], must be float*, optional
-                    log probs at the current step.
-    **/
-
-    TM_LOG_DEBUG("%s start", __PRETTY_FUNCTION__);
-    FT_CHECK(input_tensors->size() >= 4);
-    FT_CHECK(output_tensors->size() >= 1);
-
-    const int batch_size       = output_tensors->at("output_ids").shape[1];
-    const int local_batch_size = input_tensors->at("logits").shape[0];
-    const int step             = input_tensors->at("step").getVal<int>();
-    const int ite              = input_tensors->at("ite").getVal<int>();
-
-    // in case of skip any, the logit value is already copied and processed.
-    T* logits = !skip_any_ ? input_tensors->at("logits").getPtr<T>() : runtime_logits_buf_;
-
-    invokeTopPInitialize(
-        topp_id_vals_buf_, topp_offset_buf_, begin_topp_offset_buf_, local_batch_size, vocab_size_padded_, stream_);
-    sync_check_cuda_error();
-
-    invokeAddBiasSoftMax(logits,
-                         (T*)(nullptr),
-                         input_tensors->at("end_id").getPtr<int>(),
-                         output_tensors->at("finished", Tensor{MEMORY_GPU, TYPE_INVALID, {}, nullptr}).getPtr<bool>(),
-                         local_batch_size,
-                         vocab_size_padded_,
-                         vocab_size_,
-                         stream_);
-    sync_check_cuda_error();
-
-    float* cum_log_probs =
-        output_tensors->isExist("cum_log_probs") ? output_tensors->at("cum_log_probs").getPtr<float>() : nullptr;
-    float* output_log_probs =
-        output_tensors->isExist("output_log_probs") ? output_tensors->at("output_log_probs").getPtr<float>() : nullptr;
-
-    invokeBatchTopPSampling<T>(
-        sampling_workspace_,
-        sampling_workspace_size_,
-        cub_temp_storage_size_,
-        output_tensors->at("output_ids").getPtrWithOffset<int>(step * batch_size + ite * local_batch_size),
-        output_tensors->at("sequence_length", Tensor{MEMORY_GPU, TYPE_INVALID, {}, nullptr}).getPtr<int>(),
-        output_tensors->at("finished", Tensor{MEMORY_GPU, TYPE_INVALID, {}, nullptr}).getPtr<bool>(),
-        cum_log_probs,
-        output_log_probs,
-        logits,
-        topp_id_vals_buf_,
-        topp_offset_buf_,
-        begin_topp_offset_buf_,
-        curandstate_buf_ + ite * local_batch_size,
-        local_batch_size,
-        vocab_size_padded_,
-        input_tensors->at("end_id").getPtr<int>(),
-        runtime_max_top_p_,
-        runtime_top_p_buf_ + ite * local_batch_size,
-        stream_,
-        cuda_device_prop_,
-        skip_decode_buf_ + ite * local_batch_size);
-    sync_check_cuda_error();
-
-    invokeComputeToppDecay(
-        runtime_top_p_buf_ + ite * local_batch_size,
-        initial_top_p_buf_ + ite * local_batch_size,
-        output_tensors->getPtrWithOffset<int>("output_ids", step * batch_size + ite * local_batch_size),
-        top_p_decay_buf_ + ite * local_batch_size,
-        top_p_min_buf_ + ite * local_batch_size,
-        top_p_reset_ids_buf_ + ite * local_batch_size,
-        local_batch_size,
-        stream_);
-    sync_check_cuda_error();
-    TM_LOG_DEBUG("%s stop", __PRETTY_FUNCTION__);
-}
-
-template<typename T>
-TopPSamplingLayer<T>::TopPSamplingLayer(size_t             max_batch_size,
-                                        size_t             vocab_size,
-                                        size_t             vocab_size_padded,
-                                        int                end_id,
-                                        float              top_p,
-                                        unsigned long long random_seed,
-                                        float              temperature,
-                                        float              len_penalty,
-                                        float              repetition_penalty,
-                                        cudaStream_t       stream,
-                                        cublasMMWrapper*   cublas_wrapper,
-                                        IAllocator*        allocator,
-                                        bool               is_free_buffer_after_forward,
-                                        cudaDeviceProp*    cuda_device_prop):
-    BaseSamplingLayer<T>(max_batch_size,
-                         vocab_size,
-                         vocab_size_padded,
-                         end_id,
-                         0,
-                         top_p,
-                         random_seed,
-                         temperature,
-                         len_penalty,
-                         repetition_penalty,
-                         stream,
-                         cublas_wrapper,
-                         allocator,
-                         is_free_buffer_after_forward,
-                         cuda_device_prop)
-{
-}
-
-template<typename T>
-TopPSamplingLayer<T>::TopPSamplingLayer(TopPSamplingLayer<T> const& top_p_sampling_layer):
-    BaseSamplingLayer<T>(top_p_sampling_layer)
-{
-}
-
-template<typename T>
-TopPSamplingLayer<T>::~TopPSamplingLayer()
-{
-    freeBuffer();
-}
-
-template class TopPSamplingLayer<float>;
-template class TopPSamplingLayer<half>;
-
-}  // namespace turbomind
diff --git a/src/turbomind/layers/sampling_layers/TopPSamplingLayer.h b/src/turbomind/layers/sampling_layers/TopPSamplingLayer.h
deleted file mode 100644
index 8005e83215af7438962276983f83c08074fb90ff..0000000000000000000000000000000000000000
--- a/src/turbomind/layers/sampling_layers/TopPSamplingLayer.h
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.  All rights reserved.
- * Copyright (c) 2021, NAVER Corp.  Authored by CLOVA.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "src/turbomind/layers/sampling_layers/BaseSamplingLayer.h"
-#include "src/turbomind/macro.h"
-
-namespace turbomind {
-
-template<typename T>
-class TopPSamplingLayer: public BaseSamplingLayer<T> {
-private:
-    void runSampling(TensorMap* output_tensors, TensorMap* input_tensors) override;
-    void allocateBuffer() override;
-    void allocateBuffer(size_t batch_size, Tensor top_k, Tensor top_p) override;
-    void freeBuffer() override;
-
-    uint*    runtime_top_k_buf_ = nullptr;
-    float*   runtime_top_p_buf_ = nullptr;
-    float    runtime_max_top_p_;
-    float*   initial_top_p_buf_   = nullptr;
-    float*   top_p_decay_buf_     = nullptr;
-    float*   top_p_min_buf_       = nullptr;
-    int32_t* top_p_reset_ids_buf_ = nullptr;
-
-    int*   topp_id_vals_buf_      = nullptr;
-    int*   topp_offset_buf_       = nullptr;
-    int*   begin_topp_offset_buf_ = nullptr;
-    size_t cub_temp_storage_size_;
-
-    using BaseSamplingLayer<T>::vocab_size_;
-    using BaseSamplingLayer<T>::vocab_size_padded_;
-
-    using BaseSamplingLayer<T>::sampling_workspace_size_;
-    using BaseSamplingLayer<T>::sampling_workspace_;
-    using BaseSamplingLayer<T>::curandstate_buf_;
-    using BaseSamplingLayer<T>::random_seeds_buf_;
-    using BaseSamplingLayer<T>::skip_decode_buf_;
-    using BaseSamplingLayer<T>::skip_decode_;
-    using BaseSamplingLayer<T>::skip_any_;
-    using BaseSamplingLayer<T>::runtime_logits_buf_;
-
-    using BaseSamplingLayer<T>::stream_;
-    using BaseSamplingLayer<T>::allocator_;
-    using BaseSamplingLayer<T>::is_allocate_buffer_;
-    using BaseSamplingLayer<T>::cuda_device_prop_;
-
-protected:
-public:
-    TopPSamplingLayer(size_t             max_batch_size,
-                      size_t             vocab_size,
-                      size_t             vocab_size_padded,
-                      int                end_id,
-                      float              top_p,
-                      unsigned long long random_seed,
-                      float              temperature,
-                      float              len_penalty,
-                      float              repetition_penalty,
-                      cudaStream_t       stream,
-                      cublasMMWrapper*   cublas_wrapper,
-                      IAllocator*        allocator,
-                      bool               is_free_buffer_after_forward,
-                      cudaDeviceProp*    cuda_device_prop);
-    TopPSamplingLayer(TopPSamplingLayer<T> const& top_p_sampling_layer);
-    ~TopPSamplingLayer();
-
-    void setup(const size_t batch_size, const size_t beam_width, TensorMap* runtime_args) override;
-};
-
-}  // namespace turbomind
diff --git a/src/turbomind/macro.h b/src/turbomind/macro.h
deleted file mode 100644
index 141ee4e0d9ba8fb663935413fc38f389943f2233..0000000000000000000000000000000000000000
--- a/src/turbomind/macro.h
+++ /dev/null
@@ -1,9 +0,0 @@
-#pragma once
-
-#if !defined(__PRETTY_FUNCTION__) && !defined(__GNUC__)
-
-#define __PRETTY_FUNCTION__ __FUNCSIG__
-
-#endif
-
-typedef unsigned int uint;
diff --git a/src/turbomind/models/BaseWeight.h b/src/turbomind/models/BaseWeight.h
deleted file mode 100644
index 5c1008110e478b2cc7d9393580cca0b86ba45e1c..0000000000000000000000000000000000000000
--- a/src/turbomind/models/BaseWeight.h
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "src/turbomind/macro.h"
-#include <string>
-#include <vector>
-
-#pragma once
-
-namespace turbomind {
-
-template<typename T>
-struct FtWeight {
-
-public:
-    std::string         name_;
-    std::vector<size_t> shape_;
-    size_t              size_ = 0;
-    T*                  ptr_  = nullptr;
-
-    FtWeight() {}
-    FtWeight(const std::string name, const std::vector<size_t> shape, T* ptr): name_(name), shape_(shape), ptr_(ptr)
-    {
-        size_ = 1;
-        for (uint i = 0; i < shape_.size(); i++) {
-            size_ *= shape_[i];
-        }
-    }
-
-    ~FtWeight()
-    {
-        size_ = 0;
-        ptr_  = nullptr;
-    }
-};
-
-}  // namespace turbomind
diff --git a/src/turbomind/models/CMakeLists.txt b/src/turbomind/models/CMakeLists.txt
deleted file mode 100644
index 37d883f86f05a04cc4cd974f23caa0ac7a67d629..0000000000000000000000000000000000000000
--- a/src/turbomind/models/CMakeLists.txt
+++ /dev/null
@@ -1,15 +0,0 @@
-# Copyright (c) 2019-2023, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-add_subdirectory(llama)
diff --git a/src/turbomind/models/llama/Barrier.h b/src/turbomind/models/llama/Barrier.h
deleted file mode 100644
index 6eb0df9585282f54e9c196d17c3b8afabd590d54..0000000000000000000000000000000000000000
--- a/src/turbomind/models/llama/Barrier.h
+++ /dev/null
@@ -1,63 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved.
-
-#pragma once
-
-#include "src/turbomind/utils/logger.h"
-#ifndef _MSC_VER
-#include <pthread.h>
-#endif
-
-namespace turbomind {
-
-#ifdef _MSC_VER
-
-class Barrier {
-public:
-    Barrier(unsigned count)
-    {
-        TM_LOG_INFO("Barrier(%d)", (int)count);
-        FT_CHECK(count == 1);
-    }
-
-    Barrier(const Barrier&) = delete;
-    Barrier& operator=(const Barrier&) = delete;
-    Barrier(Barrier&&) noexcept        = delete;
-    Barrier& operator=(Barrier&&) noexcept = delete;
-
-    void wait() {}
-
-    ~Barrier() {}
-};
-
-#else
-
-class Barrier {
-public:
-    Barrier(unsigned count)
-    {
-        TM_LOG_INFO("Barrier(%d)", (int)count);
-        pthread_barrier_init(&barrier_, nullptr, count);
-    }
-
-    Barrier(const Barrier&) = delete;
-    Barrier& operator=(const Barrier&) = delete;
-    Barrier(Barrier&&) noexcept        = delete;
-    Barrier& operator=(Barrier&&) noexcept = delete;
-
-    void wait()
-    {
-        pthread_barrier_wait(&barrier_);
-    }
-
-    ~Barrier()
-    {
-        pthread_barrier_destroy(&barrier_);
-    }
-
-private:
-    pthread_barrier_t barrier_{};
-};
-
-#endif
-
-}  // namespace turbomind
diff --git a/src/turbomind/models/llama/CMakeLists.txt b/src/turbomind/models/llama/CMakeLists.txt
deleted file mode 100644
index 2992fec778409b00863709e3f67f3f99933071f0..0000000000000000000000000000000000000000
--- a/src/turbomind/models/llama/CMakeLists.txt
+++ /dev/null
@@ -1,54 +0,0 @@
-# Copyright (c) OpenMMLab. All rights reserved.
-
-cmake_minimum_required(VERSION 3.8)
-
-#add_subdirectory(fused_multi_head_attention)
-
-#find_package(CUDAToolkit REQUIRED)
-find_package(CUDA REQUIRED)
-
-add_library(Llama STATIC
-        LlamaV2.cc
-        LlamaBatch.cc
-        LlamaCacheManager.cc
-        LlamaContextDecoder.cc
-        LlamaContextAttentionLayer.cc
-        LlamaDecoderSelfAttentionLayer.cc
-        LlamaDecoder.cc
-        LlamaWeight.cc
-        LlamaDecoderLayerWeight.cc
-        LlamaFfnLayer.cc
-        llama_kernels.cu
-        llama_decoder_kernels.cu
-        llama_utils.cu)
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC")
-set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -fPIC")
-#set_property(TARGET Llama PROPERTY POSITION_INDEPENDENT_CODE  ON)
-#set_property(TARGET Llama PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS  ON)
-target_link_libraries(Llama PUBLIC cudart
-#        gemm_s4_f16
-        cublasMMWrapper
-        DynamicDecodeLayer
-        activation_kernels
-        decoder_masked_multihead_attention
-        bert_preprocess_kernels
-        decoding_kernels
-        unfused_attention_kernels
-        custom_ar_kernels
-        custom_ar_comm
-        gpt_kernels
-        tensor
-        memory_utils
-        nccl_utils
-        cuda_utils
-        logger)
-#        llama_fmha)
-
-if (NOT MSVC)
-#        add_subdirectory(flash_attention2)
-#        target_link_libraries(Llama PUBLIC flash_attention2)
-endif()
-
-add_executable(llama_gemm llama_gemm.cc)
-target_link_libraries(llama_gemm PUBLIC cudart gpt_gemm_func memory_utils cuda_utils logger)
-install(TARGETS llama_gemm DESTINATION ${CMAKE_SOURCE_DIR}/lmdeploy/bin)
diff --git a/src/turbomind/models/llama/LlamaBatch.cc b/src/turbomind/models/llama/LlamaBatch.cc
deleted file mode 100644
index 5d8d7d04114ec0028045ae4ee124747ecd14a4ff..0000000000000000000000000000000000000000
--- a/src/turbomind/models/llama/LlamaBatch.cc
+++ /dev/null
@@ -1,1139 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved.
-
-#include "src/turbomind/models/llama/LlamaBatch.h"
-#include "src/turbomind/kernels/decoding_kernels.h"
-#include "src/turbomind/macro.h"
-#include "src/turbomind/models/llama/LlamaNcclGuard.h"
-#include "src/turbomind/models/llama/LlamaV2.h"
-#include "src/turbomind/models/llama/Request.h"
-#include "src/turbomind/models/llama/llama_utils.h"
-#include "src/turbomind/utils/Tensor.h"
-#include "src/turbomind/utils/logger.h"
-#include <cstdint>
-#include <iomanip>
-#include <sstream>
-#include <unordered_map>
-
-namespace turbomind {
-
-template<typename T>
-void LlamaBatch<T>::verifyRequests(std::vector<std::shared_ptr<Request>>& stop_reqs,
-                                   std::vector<std::shared_ptr<Request>>& infer_reqs)
-{
-    std::unordered_map<uint64_t, int> occurrence;
-
-    auto count_occurrence = [&occurrence](const std::vector<std::shared_ptr<Request>>& rs) {
-        for (const auto& r : rs) {
-            ++occurrence[r->id];
-        }
-    };
-
-    auto invalidate = [](const char* type, std::shared_ptr<Request>& req, int ec) {
-        TM_LOG_WARNING("[verifyRequests] Skipping invalid %s request for id %ld, code = %d", type, (long)req->id, ec);
-        // We don't need a barrier there because
-        // this lambda is called only for new requests
-        // which are visible only for rank = 0 thread.
-        req->signal.set_value(ec);
-        req.reset();
-    };
-
-    auto handle_conflict_or_invalid = [this, &occurrence, &invalidate](std::vector<std::shared_ptr<Request>>& rs,
-                                                                       const char*                            type) {
-        for (auto& r : rs) {
-            if (r) {
-                int ec = 0;
-
-                if (occurrence[r->id] != 1) {
-                    ec = Request::kConflict;
-                }
-                else if (r->start_flag && r->stop_flag) {
-                    ec = Request::kInvalid;
-                }
-                else if (!r->start_flag && !llama_->kv_cache_mgr_->contains(r->id)) {
-                    ec = Request::kInvalid;
-                }
-
-                if (ec) {
-                    invalidate(type, r, ec);
-                }
-            }
-        }
-    };
-
-    auto drop_invalid = [](std::vector<std::shared_ptr<Request>>& rs) {
-        int count = 0;
-        for (int i = 0; i < rs.size(); ++i) {
-            if (rs[i]) {
-                rs[count++] = std::move(rs[i]);
-            }
-        }
-        rs.resize(count);
-    };
-
-    count_occurrence(stop_reqs);
-    count_occurrence(infer_reqs);
-
-    if (!stop_reqs.empty()) {
-        handle_conflict_or_invalid(stop_reqs, "stop");
-
-        // invalidate stop-only requests for inactive sequences
-        for (auto& r : stop_reqs) {
-            if (r && r->end_flag == false) {
-                int ec = Request::kInactive;
-                for (int i = 0; i < batch_size_; ++i) {
-                    if (requests_[i] && requests_[i]->id == r->id) {
-                        ec = 0;
-                        break;
-                    }
-                }
-                if (ec) {
-                    invalidate("stop", r, ec);
-                }
-            }
-        }
-
-        drop_invalid(stop_reqs);
-    }
-
-    if (!infer_reqs.empty()) {
-        handle_conflict_or_invalid(infer_reqs, "infer");
-
-        // invalidate requests for busy sequences
-        for (auto& r : infer_reqs) {
-            if (r) {
-                for (int i = 0; i < batch_size_; ++i) {
-                    if (requests_[i] && requests_[i]->id == r->id) {
-                        invalidate("infer", r, Request::kBusy);
-                        break;
-                    }
-                }
-            }
-        }
-
-        drop_invalid(infer_reqs);
-    }
-}
-
-template<typename T>
-void LlamaBatch<T>::handleStopRequests(const std::vector<std::shared_ptr<Request>>& requests)
-{
-    for (const auto& r : requests) {
-        int ec = Request::kFail;
-        // find matching active sequence
-        for (int i = 0; i < batch_size_; ++i) {
-            // stop & optionally erase active sequence
-            if (requests_[i] && requests_[i]->id == r->id) {
-                ec = 0;
-                finishRequest(i, r->end_flag);
-                break;
-            }
-        }
-        // mismatch, try erase inactive sequence
-        if (ec && r->end_flag) {
-            ec = 0;
-            llama_->kv_cache_mgr_->erase(r->id);
-        }
-        // clear output buffers (prevent leaking conversations) if request is successful
-        if (ec == 0) {
-            auto& output_ids      = r->outputs[rank_].at("output_ids");
-            auto& sequence_length = r->outputs[rank_].at("sequence_length");
-            check_cuda_error(
-                cudaMemsetAsync(output_ids.getPtr<int>(), 0, sizeof(int) * output_ids.shape.at(2), stream_));
-            check_cuda_error(cudaMemsetAsync(sequence_length.getPtr<int>(), 0, sizeof(int), stream_));
-            check_cuda_error(cudaStreamSynchronize(stream_));
-        }
-
-        // When the signal is set threads from LlamaV2::forward can exit
-        // and free inputs/outputs tensors.
-        // Therefore we need to make sure that no threads from LlamaV2::internalThreadEntry
-        // are accessing the tensors.
-        llama_->shared_state_->barrier->wait();
-        if (rank_ == 0) {
-            r->signal.set_value(ec);
-        }
-    }
-}
-
-template<typename T>
-void LlamaBatch<T>::allocateBuffer(size_t batch_size, size_t session_len)
-{
-    TM_LOG_DEBUG(__PRETTY_FUNCTION__);
-    const size_t batchxbeam = batch_size;
-
-    const size_t hidden_units = llama_->hidden_units_;
-    const size_t vocab_size   = llama_->vocab_size_padded_;
-
-    context_decoder_input_buf_ =
-        (T*)allocator_->reMalloc(context_decoder_input_buf_, sizeof(T) * max_context_token_num_ * hidden_units, false);
-    context_decoder_output_buf_ =
-        (T*)allocator_->reMalloc(context_decoder_output_buf_, sizeof(T) * max_context_token_num_ * hidden_units, false);
-    context_decoder_ids_buf_ =
-        (int*)allocator_->reMalloc(context_decoder_ids_buf_, sizeof(int) * max_context_token_num_, false);
-
-    decoder_input_buf_  = (T*)allocator_->reMalloc(decoder_input_buf_, sizeof(T) * batchxbeam * hidden_units, false);
-    decoder_output_buf_ = (T*)allocator_->reMalloc(decoder_output_buf_, sizeof(T) * batchxbeam * hidden_units, false);
-
-    input_ids_buf_      = (int*)allocator_->reMalloc(input_ids_buf_, sizeof(int) * batchxbeam * session_len, true);
-    input_length_buf_   = (int*)allocator_->reMalloc(input_length_buf_, sizeof(int) * batchxbeam);
-    history_length_buf_ = (int*)allocator_->reMalloc(history_length_buf_, sizeof(int) * batchxbeam);
-    context_length_buf_ = (int*)allocator_->reMalloc(context_length_buf_, sizeof(int) * batchxbeam);
-
-    total_padding_count_ = (int*)allocator_->reMalloc(total_padding_count_, sizeof(int) * batchxbeam, false);
-    sequence_lengths_    = (int*)allocator_->reMalloc(sequence_lengths_, sizeof(int) * batchxbeam, false);
-
-    k_cache_ptr_buf_ = (uint64_t*)allocator_->reMalloc(k_cache_ptr_buf_, sizeof(uint64_t) * batchxbeam);
-    v_cache_ptr_buf_ = (uint64_t*)allocator_->reMalloc(v_cache_ptr_buf_, sizeof(uint64_t) * batchxbeam);
-
-    logits_buf_       = (float*)allocator_->reMalloc(logits_buf_, sizeof(float) * batchxbeam * vocab_size, false);
-    local_logits_buf_ = (float*)allocator_->reMalloc(local_logits_buf_, sizeof(float) * batchxbeam * vocab_size, false);
-
-    token_ids_buf_ = (int*)allocator_->reMalloc(token_ids_buf_, sizeof(int) * batchxbeam * session_len * 2, true);
-
-    end_ids_buf_   = (int*)allocator_->reMalloc(end_ids_buf_, sizeof(int) * batch_size, false);
-    finished_buf_  = (bool*)allocator_->reMalloc(finished_buf_, sizeof(bool) * batchxbeam, false);
-    seq_limit_len_ = (uint32_t*)allocator_->reMalloc(seq_limit_len_, sizeof(uint32_t) * batch_size, false);
-
-    is_allocate_buffer_ = true;
-}
-
-template<typename T>
-void LlamaBatch<T>::allocatePersistantBuffer(size_t max_batch_size)
-{
-    output_ids_buf_ = (int*)allocator_->reMalloc(output_ids_buf_, sizeof(int) * max_batch_size * session_len_, true);
-
-    stop_words_buf_ =
-        (int*)allocator_->reMalloc(stop_words_buf_, sizeof(int) * max_batch_size * kMaxStopBadWordsLen, true);
-    bad_words_buf_ =
-        (int*)allocator_->reMalloc(bad_words_buf_, sizeof(int) * max_batch_size * kMaxStopBadWordsLen, true);
-
-    h_runtime_top_k_ = (int*)allocator_->reMalloc(h_runtime_top_k_, sizeof(int) * max_batch_size, true, true);
-    h_runtime_top_p_ = (float*)allocator_->reMalloc(h_runtime_top_p_, sizeof(float) * max_batch_size, true, true);
-    h_temperature_   = (float*)allocator_->reMalloc(h_temperature_, sizeof(float) * max_batch_size, true, true);
-    h_repetition_penalty_ =
-        (float*)allocator_->reMalloc(h_repetition_penalty_, sizeof(float) * max_batch_size, true, true);
-    h_random_seed_ = (uint64_t*)allocator_->reMalloc(h_random_seed_, sizeof(uint64_t) * max_batch_size, true, true);
-
-    sampling_params_ = {{"stop_words_list", stop_words_buf_},
-                        {"bad_words_list", bad_words_buf_},
-                        {"runtime_top_k", h_runtime_top_k_},
-                        {"runtime_top_p", h_runtime_top_p_},
-                        {"temperature", h_temperature_},
-                        {"repetition_penalty", h_repetition_penalty_},
-                        {"random_seed", h_random_seed_}};
-
-    topk_curandstate_buf_ = allocator_->reMalloc(topk_curandstate_buf_, sizeof(curandState_t) * max_batch_size, true);
-    topp_curandstate_buf_ = allocator_->reMalloc(topp_curandstate_buf_, sizeof(curandState_t) * max_batch_size, true);
-
-    {
-        NcclGuard barrier(llama_->tensor_para_, stream_, true);
-        h_input_ids_buf_ =
-            (int*)allocator_->reMalloc(h_input_ids_buf_, sizeof(int) * max_batch_size * session_len_, false, true);
-        h_input_length_buf_ =
-            (int*)allocator_->reMalloc(h_input_length_buf_, sizeof(int) * max_batch_size, false, true);
-        h_history_length_buf_ =
-            (int*)allocator_->reMalloc(h_history_length_buf_, sizeof(int) * max_batch_size, false, true);
-        h_context_length_buf_ =
-            (int*)allocator_->reMalloc(h_context_length_buf_, sizeof(int) * max_batch_size, false, true);
-        h_sequence_lengths_ =
-            (int*)allocator_->reMalloc(h_sequence_lengths_, sizeof(int) * max_batch_size, false, true);
-        h_k_cache_ptr_buf_ =
-            (uintptr_t*)allocator_->reMalloc(h_k_cache_ptr_buf_, sizeof(uintptr_t) * max_batch_size, true, true);
-        h_v_cache_ptr_buf_ =
-            (uintptr_t*)allocator_->reMalloc(h_v_cache_ptr_buf_, sizeof(uintptr_t) * max_batch_size, true, true);
-        h_finished_buf_ = (bool*)allocator_->reMalloc(h_finished_buf_, sizeof(bool) * max_batch_size, false, true);
-        h_seq_limit_len_ =
-            (uint32_t*)allocator_->reMalloc(h_seq_limit_len_, sizeof(uint32_t) * max_batch_size, false, true);
-    }
-
-    is_allocate_persistant_buffer_ = true;
-}
-
-template<typename T>
-void LlamaBatch<T>::freeBuffer()
-{
-    TM_LOG_DEBUG(__PRETTY_FUNCTION__);
-    if (is_allocate_buffer_) {
-        allocator_->free((void**)&context_decoder_input_buf_);
-        allocator_->free((void**)&context_decoder_output_buf_);
-        allocator_->free((void**)&context_decoder_ids_buf_);
-
-        allocator_->free((void**)&decoder_input_buf_);
-        allocator_->free((void**)&decoder_output_buf_);
-
-        allocator_->free((void**)&input_ids_buf_);
-        allocator_->free((void**)&input_length_buf_);
-        allocator_->free((void**)&history_length_buf_);
-        allocator_->free((void**)&context_length_buf_);
-
-        allocator_->free((void**)&total_padding_count_);
-        allocator_->free((void**)&sequence_lengths_);
-
-        allocator_->free((void**)&k_cache_ptr_buf_);
-        allocator_->free((void**)&v_cache_ptr_buf_);
-
-        allocator_->free((void**)&logits_buf_);
-        allocator_->free((void**)&local_logits_buf_);
-
-        if (local_context_logits_buf_) {
-            allocator_->free((void**)&local_context_logits_buf_);
-        }
-        if (context_logits_buf_) {
-            allocator_->free((void**)&context_logits_buf_);
-        }
-
-        allocator_->free((void**)&token_ids_buf_);
-
-        allocator_->free((void**)&end_ids_buf_);
-        allocator_->free((void**)&finished_buf_);
-        allocator_->free((void**)&seq_limit_len_);
-
-        is_allocate_buffer_ = false;
-    }
-
-    if (is_allocate_persistant_buffer_) {
-        allocator_->free((void**)&h_input_ids_buf_, true);
-        allocator_->free((void**)&h_input_length_buf_, true);
-        allocator_->free((void**)&h_history_length_buf_, true);
-        allocator_->free((void**)&h_context_length_buf_, true);
-        allocator_->free((void**)&h_sequence_lengths_, true);
-        allocator_->free((void**)&h_k_cache_ptr_buf_, true);
-        allocator_->free((void**)&h_v_cache_ptr_buf_, true);
-        allocator_->free((void**)&h_seq_limit_len_, true);
-        allocator_->free((void**)&h_finished_buf_, true);
-
-        allocator_->free((void**)&output_ids_buf_);
-
-        is_allocate_persistant_buffer_ = false;
-    }
-}
-
-template<typename T>
-LlamaBatch<T>::LlamaBatch(int max_batch_size, int max_context_token_num, int session_len, LlamaV2<T>* llama):
-    max_batch_size_(max_batch_size),
-    max_context_token_num_(max_context_token_num),
-    session_len_(session_len),
-    rank_(llama->tensor_para_.rank_),
-    debug_(llama->debug_),
-    llama_(llama),
-    data_type_(getTensorType<T>())
-{
-    stream_         = llama_->stream_;
-    allocator_      = llama_->allocator_;
-    cublas_wrapper_ = llama_->cublas_wrapper_;
-
-    requests_.resize(max_batch_size);
-    request_seq_len_limit_.resize(max_batch_size);
-    cached_seq_.resize(max_batch_size);
-
-    allocatePersistantBuffer(max_batch_size);
-}
-
-template<typename T>
-void LlamaBatch<T>::initializeSampling(int infer_request_count)
-{
-    TensorMap inputs;
-    for (const auto& param : sampling_params_) {
-        const Tensor* ptr{};
-        for (int i = 0; i < batch_size_; ++i) {
-            if (requests_[i]->inputs[rank_].isExist(param.first)) {
-                ptr = &requests_[i]->inputs[rank_].at(param.first);
-                break;
-            }
-        }
-        if (ptr) {
-            const auto& ref   = *ptr;
-            auto        shape = ref.shape;
-            FT_CHECK(shape[0] == 1);
-            shape[0]                = batch_size_;
-            const int size_in_bytes = ref.sizeBytes();
-            check_cuda_error(cudaMemsetAsync(param.second, 0, size_in_bytes * batch_size_, stream_));
-            for (int i = 0; i < batch_size_; ++i) {
-                if (requests_[i]->inputs[rank_].isExist(param.first)) {
-                    auto& src = requests_[i]->inputs[rank_].at(param.first);
-                    FT_CHECK(ref.shape == src.shape);
-                    check_cuda_error(cudaMemcpyAsync((uint8_t*)param.second + size_in_bytes * i,
-                                                     src.getPtr<void>(),
-                                                     size_in_bytes,
-                                                     cudaMemcpyDefault,
-                                                     stream_));
-                }
-            }
-            inputs.insert({param.first, {ref.where, ref.type, shape, param.second}});
-            if (debug_ && rank_ == 0) {
-                TM_LOG_INFO("[initializeSampling] %s", format({param.first, inputs.at(param.first)}).c_str());
-            }
-        }
-    }
-
-    inputs_ = std::move(inputs);
-
-    llama_->dynamic_decode_layer_->setup(batch_size_, 1, &inputs_);
-
-    for (int i = 0; i < batch_size_; ++i) {
-        // recover random states if not a new request or new request w/o "random_seed"
-        if (i < batch_size_ - infer_request_count || !requests_[i]->inputs[rank_].isExist("random_seed")) {
-            check_cuda_error(cudaMemcpyAsync(llama_->dynamic_decode_layer_->topk_curandstate_buf() + i,
-                                             (curandState_t*)topk_curandstate_buf_ + i,
-                                             sizeof(curandState_t),
-                                             cudaMemcpyDefault,
-                                             stream_));
-            check_cuda_error(cudaMemcpyAsync(llama_->dynamic_decode_layer_->topp_curandstate_buf() + i,
-                                             (curandState_t*)topp_curandstate_buf_ + i,
-                                             sizeof(curandState_t),
-                                             cudaMemcpyDefault,
-                                             stream_));
-        }
-    }
-
-    handleOptArg(&inputs_, "end_id", end_ids_buf_, llama_->end_id_, batch_size_);
-    cudaStreamSynchronize(0);
-}
-
-template<typename T>
-void LlamaBatch<T>::initializeGeneration()
-{
-    max_context_len_ = *std::max_element(h_context_length_buf_, h_context_length_buf_ + batch_size_);
-
-    check_cuda_error(cudaMemsetAsync(token_ids_buf_, 0, sizeof(int) * batch_size_ * session_len_ * 2, stream_));
-    invokeTransposeAxis01(token_ids_buf_, output_ids_buf_, batch_size_, session_len_, 1, stream_);
-    sync_check_cuda_error();
-
-    // token_ids_buf_[s, b]
-    // ABCDe            ABCDe     e
-    // ABCDEFGHIJk      ABCDEFGHIJk
-    // ABCDEFGHi    ->  ABCDEFGHi i
-    // ABCDEFGh         ABCDEFGh  h
-    // ABCd             ABCd      d
-    for (int i = 0; i < batch_size_; ++i) {
-        auto token_ids = token_ids_buf_ + i;
-        auto p_src     = h_context_length_buf_[i] - 1;
-        auto p_dst     = max_context_len_ - 1;
-        if (p_src != p_dst) {  // dst and src of `cudaMemcpyAsync` must not overlap
-            check_cuda_error(cudaMemcpyAsync(token_ids + p_dst * batch_size_,
-                                             token_ids + p_src * batch_size_,
-                                             sizeof(int),
-                                             cudaMemcpyDefault,
-                                             stream_));
-        }
-    }
-
-    check_cuda_error(cudaMemcpyAsync(
-        context_length_buf_, h_context_length_buf_, sizeof(int) * batch_size_, cudaMemcpyDefault, stream_));
-    check_cuda_error(cudaMemcpyAsync(
-        k_cache_ptr_buf_, h_k_cache_ptr_buf_, sizeof(uintptr_t) * batch_size_, cudaMemcpyDefault, stream_));
-    check_cuda_error(cudaMemcpyAsync(
-        v_cache_ptr_buf_, h_v_cache_ptr_buf_, sizeof(uintptr_t) * batch_size_, cudaMemcpyDefault, stream_));
-
-    check_cuda_error(
-        cudaMemcpyAsync(sequence_lengths_, context_length_buf_, sizeof(int) * batch_size_, cudaMemcpyDefault, stream_));
-    // `sequence_lengths_` will be increased by dynamic decode
-    // note that in decoder and in output "sequence length" has different semantic
-    // - in decoder it means length of sequence that has kv cache already computed
-    // - in output it means length of all tokens (the last generated token does not have k/v cache computed yet)
-    invokePlusScalar(sequence_lengths_, -1, batch_size_, stream_);
-    sync_check_cuda_error();
-
-    // total_padding_count_
-    // decoding starts at max_context_len
-    check_cuda_error(cudaMemsetAsync(total_padding_count_, 0, sizeof(int) * batch_size_, stream_));
-    invokeUpdatePaddingCount(total_padding_count_,  //
-                             context_length_buf_,
-                             max_context_len_,
-                             batch_size_,
-                             1,
-                             stream_);
-    sync_check_cuda_error();
-
-    // seq_limit_len_, will be compared to `step` instead of `sequence_length`, so padding len should be accounted for
-    for (int i = 0; i < batch_size_; ++i) {
-        h_seq_limit_len_[i] = request_seq_len_limit_[i] + (max_context_len_ - h_context_length_buf_[i]);
-        // mask finished sequences
-        h_finished_buf_[i] = max_context_len_ >= h_seq_limit_len_[i];
-    }
-    check_cuda_error(
-        cudaMemcpyAsync(seq_limit_len_, h_seq_limit_len_, sizeof(uint32_t) * batch_size_, cudaMemcpyDefault, stream_));
-    check_cuda_error(
-        cudaMemcpyAsync(finished_buf_, h_finished_buf_, sizeof(bool) * batch_size_, cudaMemcpyDefault, stream_));
-
-    // ! range of step_ [1, 2 * session_len]
-    // consider a sequence with context_len == session_len and another sequence with context_len == 1 and
-    // request_output_len == session_len - 1 => step_ will loop in [session_len, 2 * session_len)
-    step_ = max_context_len_;
-
-    if (rank_ == 0) {
-        TM_LOG_INFO("[initGen] batch_size = %d", (int)batch_size_);
-        TM_LOG_INFO("[initGen] max_context_len = %d", (int)max_context_len_);
-
-        TM_LOG_INFO("[initGen] slot  sequence_id  context_len  seq_limit_len  finished");
-        for (int i = 0; i < batch_size_; ++i) {
-            TM_LOG_INFO("[initGen] %4d  %11ld  %11d  %13d  %8d",
-                        i,
-                        (long)cached_seq_[i].id,
-                        h_context_length_buf_[i],
-                        (int)h_seq_limit_len_[i],
-                        (int)h_finished_buf_[i]);
-        }
-    }
-}
-
-template<typename T>
-bool LlamaBatch<T>::generate()
-{
-    constexpr int kLogInterval = 10;
-    if (rank_ == 0 && (step_ - 1) % kLogInterval == 0) {
-        TM_LOG_INFO("------------------------- step = %d -------------------------", step_ - 1);
-    }
-
-    const bool is_first_step = step_ == max_context_len_;
-
-    std::vector<int> prev;
-    if (debug_ && rank_ == 0 && is_first_step) {
-        prev.resize(batch_size_);
-        cudaMemcpyAsync(prev.data(),
-                        token_ids_buf_ + (step_ - 1) * batch_size_,
-                        sizeof(int) * batch_size_,
-                        cudaMemcpyDefault,
-                        stream_);
-    }
-
-    // embeddingLookup(step_ - 1);
-    llama_->embeddingLookup(decoder_input_buf_,  //
-                            token_ids_buf_,
-                            batch_size_,
-                            step_ - 1);
-
-    llama_->decoderForward(decoder_output_buf_,
-                           k_cache_ptr_buf_,
-                           v_cache_ptr_buf_,
-                           decoder_input_buf_,
-                           sequence_lengths_,
-                           total_padding_count_,
-                           finished_buf_,
-                           step_,
-                           0,
-                           session_len_,
-                           batch_size_);
-
-    llama_->postDecodeEmbedding(logits_buf_,  //
-                                local_logits_buf_,
-                                decoder_output_buf_,
-                                batch_size_);
-
-    // stop-words & bad-words require the matched tokens to be contiguous, so item size > 1 is
-    // not supported yet.
-    bool should_stop{};
-    llama_->dynamicDecode(token_ids_buf_,
-                          finished_buf_,
-                          sequence_lengths_,
-                          &should_stop,
-                          &inputs_,
-                          &outputs_,
-                          logits_buf_,
-                          seq_limit_len_,
-                          context_length_buf_,
-                          end_ids_buf_,
-                          step_,
-                          0,
-                          max_context_len_,
-                          session_len_ * 2,
-                          batch_size_);
-
-    if (debug_ && rank_ == 0) {
-        std::vector<int> curr(batch_size_);
-
-        cudaMemcpyAsync(
-            curr.data(), token_ids_buf_ + step_ * batch_size_, sizeof(int) * batch_size_, cudaMemcpyDefault, stream_);
-        cudaStreamSynchronize(stream_);
-
-        if (is_first_step) {
-            std::stringstream sprev;
-            for (int k = 0; k < prev.size(); ++k) {
-                sprev << std::setw(6) << prev[k];
-            }
-            TM_LOG_INFO("[ lookup ] step = %d, [%s]", step_ - 1, sprev.str().c_str());
-        }
-
-        std::stringstream scurr;
-        for (int k = 0; k < curr.size(); ++k) {
-            scurr << std::setw(6) << curr[k];
-        }
-        TM_LOG_INFO("[generate] step = %d, [%s]", step_ - 1, scurr.str().c_str());
-    }
-
-    ////////////////////////////////////////////////
-    /// ! increase the step counter
-    ++step_;
-
-    return !should_stop;
-}
-
-template<typename T>
-void LlamaBatch<T>::initialize(const std::vector<std::shared_ptr<Request>>& infer_requests)
-{
-    FT_CHECK(batch_size_ + infer_requests.size() <= max_batch_size_);
-
-    const int infer_request_count = infer_requests.size();
-
-    allocateBuffer(batch_size_ + infer_request_count, session_len_);
-
-    // handle infer requests
-    std::vector<int>       tmp_input_length(infer_request_count);
-    std::vector<CachedSeq> tmp_cached_seq;
-    tmp_cached_seq.reserve(infer_request_count);
-
-    int tmp_max_input_length = 0;
-    for (int i = 0; i < infer_request_count; ++i) {
-        auto& r = *infer_requests[i];
-
-        LlamaCacheManager::Sequence seq{};
-        if (r.start_flag) {
-            seq = llama_->kv_cache_mgr_->create(r.id, stream_);
-        }
-        else {
-            seq = llama_->kv_cache_mgr_->fetch(r.id, stream_);
-        }
-
-        const int step = r.inputs[rank_].getVal<int>("step", -1);
-        if (step >= 0) {
-            if (step <= seq.token_ids.size()) {
-                seq.token_ids.resize(step);
-                seq.cache_len = std::min(seq.cache_len, (size_t)step);
-            }
-            else if (rank_ == 0) {
-                TM_LOG_WARNING("[initialize] Skipping invalid step (%d) setting for ID %ld", step, (long)seq.id);
-            }
-        }
-
-        // input length with missing cache accounted for
-        int actual_input_len = r.inputs[rank_].getVal<int>("input_lengths") + (seq.token_ids.size() - seq.cache_len);
-
-        // insert `start_id` for empty sequences
-        if (seq.token_ids.empty() && actual_input_len == 0) {
-            seq.token_ids.push_back(llama_->start_id_);
-            seq.cache_len    = 0;
-            actual_input_len = seq.token_ids.size() - seq.cache_len;
-        }
-
-        tmp_input_length[i] = actual_input_len;
-
-        tmp_max_input_length = std::max((int)tmp_max_input_length, actual_input_len);
-        tmp_cached_seq.push_back(std::move(seq));
-    }
-
-    FT_CHECK(tmp_max_input_length > 0);
-    const int max_input_length = tmp_max_input_length;
-
-    // arrange requests in ascending order w.r.t actual input lengths, so that requests need context decoding will
-    // be together
-    {
-        std::vector<int> idxs(tmp_input_length.size());
-        std::iota(idxs.begin(), idxs.end(), 0);
-        std::sort(idxs.begin(), idxs.end(), [&](int i, int j) { return tmp_input_length[i] < tmp_input_length[j]; });
-        for (int i = 0; i < idxs.size(); ++i) {
-            requests_[batch_size_ + i]   = infer_requests[idxs[i]];
-            cached_seq_[batch_size_ + i] = tmp_cached_seq[idxs[i]];
-        }
-    }
-
-    const int count = batch_size_ + infer_requests.size();
-
-    std::vector<int> tmp_input_len(count);
-
-    for (int i = batch_size_; i < count; ++i) {
-        const auto& seq = cached_seq_[i];
-
-        h_input_length_buf_[i] = requests_[i]->inputs[rank_].getVal<int>("input_lengths");
-        tmp_input_len[i]       = h_input_length_buf_[i];
-        // prepare output ids
-        // <--------> max_context_len
-        // aaaAAAA
-        // bbbbBBBBBB
-        // ccCCC
-        auto output_ids_ptr = output_ids_buf_ + i * session_len_;
-
-        // clear the persistent buffer to prevent leaking previous conversation
-        check_cuda_error(cudaMemsetAsync(output_ids_ptr, 0, sizeof(int) * session_len_, stream_));
-
-        if (!seq.token_ids.empty()) {
-            check_cuda_error(cudaMemcpyAsync(output_ids_ptr,  //
-                                             seq.token_ids.data(),
-                                             sizeof(int) * seq.token_ids.size(),
-                                             cudaMemcpyDefault,
-                                             stream_));
-            output_ids_ptr += seq.token_ids.size();
-        }
-
-        if (h_input_length_buf_[i]) {
-            auto input_ids_ptr = requests_[i]->inputs[rank_].getPtr<int>("input_ids");
-            check_cuda_error(cudaMemcpyAsync(output_ids_ptr,  //
-                                             input_ids_ptr,
-                                             sizeof(int) * h_input_length_buf_[i],
-                                             cudaMemcpyDefault,
-                                             stream_));
-        }
-
-        if (!requests_[i]->start_flag && !seq.random_state_.empty()) {
-            check_cuda_error(cudaMemcpyAsync((curandState_t*)topk_curandstate_buf_ + i,
-                                             seq.random_state_.data(),
-                                             sizeof(curandState_t),
-                                             cudaMemcpyDefault,
-                                             stream_));
-            check_cuda_error(cudaMemcpyAsync((curandState_t*)topp_curandstate_buf_ + i,
-                                             seq.random_state_.data() + sizeof(curandState_t),
-                                             sizeof(curandState_t),
-                                             cudaMemcpyDefault,
-                                             stream_));
-        }
-    }
-
-    for (int i = batch_size_; i < count; ++i) {
-        const auto& seq           = cached_seq_[i];
-        const int   missed        = (int)seq.token_ids.size() - seq.cache_len;
-        auto        input_ids_buf = input_ids_buf_ + i * session_len_;
-        FT_CHECK(missed >= 0);
-        if (missed > 0) {
-            check_cuda_error(cudaMemcpyAsync(input_ids_buf,  //
-                                             seq.token_ids.data() + seq.cache_len,
-                                             sizeof(int) * missed,
-                                             cudaMemcpyDefault,
-                                             stream_));
-            input_ids_buf += missed;
-        }
-        auto& input_ids = requests_[i]->inputs[rank_].at("input_ids");
-        check_cuda_error(cudaMemcpyAsync(input_ids_buf,  //
-                                         input_ids.getPtr<int>(),
-                                         sizeof(int) * h_input_length_buf_[i],
-                                         cudaMemcpyDefault,
-                                         stream_));
-        h_input_length_buf_[i] += missed;
-        h_history_length_buf_[i] = seq.cache_len;
-        h_context_length_buf_[i] = h_input_length_buf_[i] + h_history_length_buf_[i];
-
-        const int request_output_len = requests_[i]->inputs[rank_].getVal<int>("request_output_len");
-        request_seq_len_limit_[i]    = h_context_length_buf_[i] + request_output_len;
-        // `length_criterion` sets finish flag when step >= seq_limit_len, however when step == seq_limit_len
-        // the actual sequence length is seq_limit_len + 1, hence seq_limit_len must truncated to session_len - 1
-        if (request_seq_len_limit_[i] >= session_len_) {
-            request_seq_len_limit_[i] = session_len_ - 1;
-            if (rank_ == 0) {
-                const int trunc_output_len = request_seq_len_limit_[i] - h_context_length_buf_[i];
-                TM_LOG_WARNING(
-                    "[initialize] [%ld] total sequence length (%d + %d) exceeds session_len (%d), request_output_len is truncated to %d",
-                    (long)seq.id,
-                    h_context_length_buf_[i],
-                    request_output_len,
-                    (int)session_len_,
-                    trunc_output_len);
-            }
-        }
-
-        h_k_cache_ptr_buf_[i] = (uint64_t)seq.k_cache;
-        h_v_cache_ptr_buf_[i] = (uint64_t)seq.v_cache;
-    }
-
-    const int max_context_len = *std::max_element(h_context_length_buf_ + batch_size_, h_context_length_buf_ + count);
-
-    batch_size_      = count;
-    max_context_len_ = max_context_len;
-    step_            = max_context_len;
-
-    check_cuda_error(
-        cudaMemcpyAsync(input_length_buf_, h_input_length_buf_, sizeof(int) * batch_size_, cudaMemcpyDefault, stream_));
-    check_cuda_error(cudaMemcpyAsync(
-        history_length_buf_, h_history_length_buf_, sizeof(int) * batch_size_, cudaMemcpyDefault, stream_));
-    check_cuda_error(cudaMemcpyAsync(
-        context_length_buf_, h_context_length_buf_, sizeof(int) * batch_size_, cudaMemcpyDefault, stream_));
-    check_cuda_error(cudaMemcpyAsync(
-        k_cache_ptr_buf_, h_k_cache_ptr_buf_, sizeof(uintptr_t) * batch_size_, cudaMemcpyDefault, stream_));
-    check_cuda_error(cudaMemcpyAsync(
-        v_cache_ptr_buf_, h_v_cache_ptr_buf_, sizeof(uintptr_t) * batch_size_, cudaMemcpyDefault, stream_));
-
-    if (llama_->tensor_para_.rank_ == 0) {
-        TM_LOG_INFO("[init] infer_request_count = %d", (int)infer_request_count);
-        TM_LOG_INFO("[init] batch_size = %d", (int)batch_size_);
-        TM_LOG_INFO("[init] session_len = %d", (int)session_len_);
-        TM_LOG_INFO("[init] max_input_length = %d", (int)max_input_length);
-        TM_LOG_INFO("[init] max_context_len = %d", (int)max_context_len);
-        TM_LOG_INFO(
-            "[init] slot  sequence_id  history_len  input_len  context_len  tmp_input_len  token_ids.size  cache_len");
-        for (int i = batch_size_ - infer_request_count; i < batch_size_; ++i) {
-            TM_LOG_INFO("[init] %4d  %11ld  %11d  %9d  %11d  %13d  %14d  %9d",
-                        i,
-                        (int)cached_seq_[i].id,
-                        h_history_length_buf_[i],
-                        h_input_length_buf_[i],
-                        h_context_length_buf_[i],
-                        tmp_input_len[i],
-                        (int)cached_seq_[i].token_ids.size(),
-                        (int)cached_seq_[i].cache_len);
-        }
-    }
-}
-
-template<typename T>
-void LlamaBatch<T>::contextDecode()
-{
-    int base = -1;
-    for (int i = 0; i < batch_size_; ++i) {
-        if (h_input_length_buf_[i] > 1) {
-            base = i;
-            break;
-        }
-    }
-    if (base >= 0) {
-        check_cuda_error(cudaStreamSynchronize(stream_));
-        const auto tick = std::chrono::high_resolution_clock::now();
-
-        const int context_decode_count = batch_size_ - base;
-        if (rank_ == 0) {
-            TM_LOG_INFO("[decodeContext] base = %d, count = %d", base, context_decode_count);
-        }
-        invokePlusScalar(input_length_buf_ + base, -1, context_decode_count, stream_);
-        invokePlusScalar(context_length_buf_ + base, -1, context_decode_count, stream_);
-
-        auto get_input_len   = [this](int index) { return h_input_length_buf_[index] - 1; };
-        auto get_context_len = [this](int index) { return h_context_length_buf_[index] - 1; };
-
-        std::vector<int> decode_indices{base};
-        std::vector<int> decode_lengths{get_input_len(base)};
-
-        auto token_num       = get_input_len(base);
-        auto max_input_len   = get_input_len(base);
-        auto max_context_len = get_context_len(base);
-        auto offset          = base;
-        for (int i = offset + 1; i <= batch_size_; ++i) {
-            if (i == batch_size_ || token_num + h_context_length_buf_[i] > max_context_token_num_) {
-                const int context_decode_batch_size = i - offset;
-                if (rank_ == 0) {
-                    TM_LOG_INFO(
-                        "[decodeContext] offset = %d, batch_size = %d, token_num = %d, max_input_len = %d, max_context_len = %d",
-                        base,
-                        context_decode_batch_size,
-                        token_num,
-                        max_input_len,
-                        max_context_len);
-                }
-                // construct context_decoder_ids w/o padding
-                // aaaa____
-                // bb______ -> aaaabbcccccccc
-                // cccccccc
-                auto context_decoder_ids = context_decoder_ids_buf_;
-                for (int j = offset; j < i; ++j) {
-                    check_cuda_error(cudaMemcpyAsync(context_decoder_ids,
-                                                     input_ids_buf_ + j * session_len_,
-                                                     sizeof(int) * get_input_len(j),
-                                                     cudaMemcpyDefault,
-                                                     stream_));
-                    context_decoder_ids += get_input_len(j);
-                }
-                llama_->contextDecode(nullptr,
-                                      k_cache_ptr_buf_ + offset,
-                                      v_cache_ptr_buf_ + offset,
-                                      context_decoder_input_buf_,
-                                      context_decoder_output_buf_,
-                                      context_decoder_ids_buf_,
-                                      input_length_buf_ + offset,
-                                      history_length_buf_ + offset,
-                                      context_length_buf_ + offset,
-                                      token_num,
-                                      max_input_len,
-                                      max_context_len,
-                                      session_len_,
-                                      context_decode_batch_size);
-
-                // compute logits of inputs if requested
-                outputContextLogits(context_decoder_output_buf_, decode_indices, decode_lengths);
-
-                if (i < batch_size_) {
-                    // initialize next sub-batch
-                    token_num       = get_input_len(i);
-                    max_input_len   = get_input_len(i);
-                    max_context_len = get_context_len(i);
-                    offset          = i;
-
-                    decode_indices = {i};
-                    decode_lengths = {get_input_len(i)};
-                }
-            }
-            else {
-                // add to current sub-batch
-                token_num += get_input_len(i);
-                max_input_len   = std::max(max_input_len, get_input_len(i));
-                max_context_len = std::max(max_context_len, get_context_len(i));
-
-                decode_indices.push_back(i);
-                decode_lengths.push_back(get_input_len(i));
-            }
-        }
-
-        invokePlusScalar(context_length_buf_ + base, 1, context_decode_count, stream_);
-        invokePlusScalar(input_length_buf_ + base, 1, context_decode_count, stream_);
-
-        for (int i = offset; i < batch_size_; ++i) {
-            h_input_length_buf_[i] = 0;
-        }
-
-        check_cuda_error(cudaStreamSynchronize(stream_));
-        const auto tock = std::chrono::high_resolution_clock::now();
-        if (rank_ == 0) {
-            TM_LOG_INFO("[decodeContext] %.2f ms", std::chrono::duration<float, std::milli>(tock - tick).count());
-        }
-    }
-    else if (rank_ == 0) {
-        TM_LOG_INFO("[decodeContext] Context decoding is not needed.");
-    }
-}
-
-template<typename T>
-void LlamaBatch<T>::outputContextLogits(T*                      context_decoder_output,
-                                        const std::vector<int>& indices,
-                                        const std::vector<int>& lengths)
-{
-    std::vector<float*> output_logits;
-    int                 num_token = 0;
-    {
-        bool is_return_logits = false;
-        for (int k = 0; k < indices.size(); ++k) {
-            auto& request = requests_[indices[k]];
-            output_logits.push_back(request->outputs[rank_].getPtr<float>("logits", nullptr));
-            num_token += lengths[k];
-            if (output_logits.back()) {
-                is_return_logits = true;
-            }
-        }
-        if (!is_return_logits) {
-            return;
-        }
-    }
-
-    if (context_logits_buf_ == nullptr) {
-        NcclGuard guard(llama_->tensor_para_, stream_, true);
-        context_logits_buf_ =
-            (float*)allocator_->malloc(sizeof(float) * llama_->vocab_size_padded_ * max_context_token_num_);
-        const auto tp = llama_->tensor_para_.world_size_;
-        if (tp > 1) {
-            FT_CHECK(llama_->vocab_size_padded_ % tp == 0);
-            const auto local_vocab_size = llama_->vocab_size_padded_ / tp;
-            local_context_logits_buf_ =
-                (float*)allocator_->malloc(sizeof(float) * local_vocab_size * max_context_token_num_);
-        }
-    }
-
-    llama_->postDecodeEmbedding(context_logits_buf_, local_context_logits_buf_, context_decoder_output, num_token);
-
-    auto logits = context_logits_buf_;
-
-    for (int k = 0; k < indices.size(); ++k) {
-        if (output_logits[k]) {
-            check_cuda_error(cudaMemcpyAsync(output_logits[k],
-                                             logits,
-                                             sizeof(float) * llama_->vocab_size_ * lengths[k],
-                                             cudaMemcpyDefault,
-                                             stream_));
-        }
-        logits += llama_->vocab_size_padded_ * lengths[k];
-    }
-}
-
-template<typename T>
-void LlamaBatch<T>::finish()
-{
-    // secure info needed by `synchronize()`
-    check_cuda_error(
-        cudaMemcpyAsync(h_finished_buf_, finished_buf_, sizeof(bool) * batch_size_, cudaMemcpyDefault, stream_));
-    check_cuda_error(
-        cudaMemcpyAsync(h_sequence_lengths_, sequence_lengths_, sizeof(int) * batch_size_, cudaMemcpyDefault, stream_));
-
-    setOutputTensors(step_);
-
-    check_cuda_error(cudaStreamSynchronize(stream_));
-
-    if (rank_ == 0 && llama_->ffi_lock_) {
-        llama_->ffi_lock_(1);
-    }
-    for (int i = 0; i < batch_size_; ++i) {
-        FT_CHECK(requests_[i] != nullptr);
-        if (requests_[i]->stream_cb && rank_ == 0) {
-            requests_[i]->stream_cb(&requests_[i]->outputs[rank_].get());
-        }
-    }
-    if (rank_ == 0 && llama_->ffi_lock_) {
-        llama_->ffi_lock_(0);
-    }
-
-    if (debug_ && rank_ == 0) {
-        std::stringstream ss;
-        for (int i = 0; i < batch_size_; ++i) {
-            ss << (i ? ", " : "") << "(" << h_sequence_lengths_[i] << "," << h_finished_buf_[i] << ")";
-        }
-        TM_LOG_INFO("[finish] [%s]", ss.str().c_str());
-    }
-
-    for (int i = 0; i < batch_size_; ++i) {
-        if (h_finished_buf_[i]) {
-            finishRequest(i, false);
-            ++finished_count_;
-        }
-    }
-}
-
-template<typename T>
-void LlamaBatch<T>::synchronize()
-{
-    // compact
-    int idx = 0;
-    for (int i = 0; i < batch_size_; ++i) {
-        if (requests_[i]) {
-            h_input_length_buf_[idx]   = 0;
-            h_history_length_buf_[idx] = 0;
-
-            h_context_length_buf_[idx] = h_sequence_lengths_[i] + 1;
-            h_sequence_lengths_[idx]   = h_context_length_buf_[idx];
-
-            check_cuda_error(cudaMemcpyAsync((curandState_t*)topk_curandstate_buf_ + idx,
-                                             llama_->dynamic_decode_layer_->topk_curandstate_buf() + i,
-                                             sizeof(curandState_t),
-                                             cudaMemcpyDefault,
-                                             stream_));
-            check_cuda_error(cudaMemcpyAsync((curandState_t*)topp_curandstate_buf_ + idx,
-                                             llama_->dynamic_decode_layer_->topp_curandstate_buf() + i,
-                                             sizeof(curandState_t),
-                                             cudaMemcpyDefault,
-                                             stream_));
-
-            if (i != idx) {
-                h_finished_buf_[idx]        = h_finished_buf_[i];
-                request_seq_len_limit_[idx] = request_seq_len_limit_[i];
-
-                h_k_cache_ptr_buf_[idx] = h_k_cache_ptr_buf_[i];
-                h_v_cache_ptr_buf_[idx] = h_v_cache_ptr_buf_[i];
-
-                requests_[idx]   = std::move(requests_[i]);
-                cached_seq_[idx] = std::move(cached_seq_[i]);
-                check_cuda_error(cudaMemcpyAsync(output_ids_buf_ + idx * session_len_,
-                                                 output_ids_buf_ + i * session_len_,
-                                                 sizeof(int) * h_context_length_buf_[idx],
-                                                 cudaMemcpyDefault,
-                                                 stream_));
-            }
-            ++idx;
-        }
-    }
-    batch_size_ = idx;
-
-    if (rank_ == 0) {
-        TM_LOG_INFO("[synchronize] batch_size = %d", (int)batch_size_);
-    }
-
-    finished_count_ = 0;
-}
-
-template<typename T>
-void LlamaBatch<T>::setOutputTensors(int max_gen_step)
-{
-    // [s,b] -> [b,s] and skip padding in [context_len, max_context_len)
-    invokeGatherOutput(output_ids_buf_,
-                       token_ids_buf_,
-                       context_length_buf_,
-                       max_context_len_,
-                       max_gen_step,
-                       session_len_,
-                       batch_size_,
-                       stream_);
-    sync_check_cuda_error();
-
-    /// TODO: fuse the loop into a single kernel
-    for (int i = 0; i < batch_size_; ++i) {
-        if (requests_[i]) {
-            auto& output_ids      = requests_[i]->outputs[rank_].at("output_ids");
-            auto& sequence_length = requests_[i]->outputs[rank_].at("sequence_length");
-            check_cuda_error(cudaMemcpyAsync(output_ids.getPtr<int>(),
-                                             output_ids_buf_ + i * session_len_,
-                                             sizeof(int) * output_ids.shape.at(2),
-                                             cudaMemcpyDefault,
-                                             stream_));
-            check_cuda_error(cudaMemcpyAsync(
-                sequence_length.getPtr<int>(), sequence_lengths_ + i, sizeof(int), cudaMemcpyDefault, stream_));
-            if (max_gen_step > max_context_len_) {  // +1 for newly generated token
-                invokePlusScalar(sequence_length.getPtr<int>(), 1, 1, stream_);
-            }
-        }
-    }
-}
-
-template<typename T>
-void LlamaBatch<T>::finishRequest(int index, bool force_end)
-{
-    if (rank_ == 0) {
-        TM_LOG_INFO("[finishRequest] slot = %d, id = %lu", index, (long)requests_[index]->id);
-    }
-
-    if (debug_ && rank_ == 0) {
-        std::vector<int> tokens(h_sequence_lengths_[index] + 1);
-        cudaMemcpyAsync(tokens.data(),
-                        output_ids_buf_ + index * session_len_,
-                        sizeof(int) * tokens.size(),
-                        cudaMemcpyDefault,
-                        stream_);
-        cudaStreamSynchronize(stream_);
-        std::stringstream ss;
-        for (const auto& t : tokens) {
-            ss << " " << t;
-        }
-        TM_LOG_INFO("[finishRequest] slot %d, tokens [%s]", index, ss.str().c_str());
-    }
-
-    auto&      output_ids_tensor = requests_[index]->outputs[rank_].at("output_ids");
-    const auto output_ids_data   = output_ids_tensor.getPtr<int>();
-    if (requests_[index]->end_flag || force_end) {
-        llama_->kv_cache_mgr_->erase(requests_[index]->id);
-    }
-    else {
-        // the last generated token is not processed by decoder thus dont have k/v cache
-        const int n_steps    = step_ - max_context_len_;
-        const int cache_len  = h_sequence_lengths_[index];
-        const int output_len = n_steps > 0 ? cache_len + 1 : cache_len;
-
-        auto& seq = cached_seq_[index];
-
-        seq.cache_len = cache_len;
-
-        // update token IDs
-        seq.token_ids.resize(output_len);
-        check_cuda_error(cudaMemcpyAsync(
-            seq.token_ids.data(), output_ids_data, sizeof(int) * output_len, cudaMemcpyDefault, stream_));
-
-        // update random states
-        seq.random_state_.resize(sizeof(curandState_t) * 2);
-        check_cuda_error(cudaMemcpyAsync(seq.random_state_.data(),
-                                         llama_->dynamic_decode_layer_->topk_curandstate_buf() + index,
-                                         sizeof(curandState_t),
-                                         cudaMemcpyDefault,
-                                         stream_));
-        check_cuda_error(cudaMemcpyAsync(seq.random_state_.data() + sizeof(curandState_t),
-                                         llama_->dynamic_decode_layer_->topp_curandstate_buf() + index,
-                                         sizeof(curandState_t),
-                                         cudaMemcpyDefault,
-                                         stream_));
-
-        check_cuda_error(cudaStreamSynchronize(stream_));
-
-        llama_->kv_cache_mgr_->update(cached_seq_[index], stream_);
-    }
-
-    // When the signal is set threads from LlamaV2::forward can exit
-    // and free inputs/outputs tensors.
-    // Therefore we need to make sure that no threads from LlamaV2::internalThreadEntry
-    // are accessing the tensors.
-    llama_->shared_state_->barrier->wait();
-    if (rank_ == 0) {
-        requests_[index]->signal.set_value(0);
-    }
-
-    requests_[index] = nullptr;
-}
-
-template class LlamaBatch<half>;
-template class LlamaBatch<float>;
-
-}  // namespace turbomind
diff --git a/src/turbomind/models/llama/LlamaBatch.h b/src/turbomind/models/llama/LlamaBatch.h
deleted file mode 100644
index 280562ffb159798b234b9f608d58ecec1575e921..0000000000000000000000000000000000000000
--- a/src/turbomind/models/llama/LlamaBatch.h
+++ /dev/null
@@ -1,159 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved.
-
-#pragma once
-
-#include "src/turbomind/models/llama/LlamaCacheManager.h"
-#include "src/turbomind/models/llama/LlamaNcclGuard.h"
-#include "src/turbomind/models/llama/Request.h"
-#include "src/turbomind/utils/allocator.h"
-#include "src/turbomind/utils/cublasMMWrapper.h"
-
-namespace turbomind {
-
-template<typename T>
-class LlamaV2;
-
-template<typename T>
-class LlamaBatch {
-public:
-    int size() const noexcept
-    {
-        return batch_size_;
-    };
-
-    int maxSize() const noexcept
-    {
-        return max_batch_size_;
-    }
-
-    int finishedCount() const noexcept
-    {
-        return finished_count_;
-    }
-
-    void verifyRequests(std::vector<std::shared_ptr<Request>>& stop_reqs,
-                        std::vector<std::shared_ptr<Request>>& infer_reqs);
-    void handleStopRequests(const std::vector<std::shared_ptr<Request>>& requests);
-
-    void allocateBuffer(size_t batch_size, size_t session_len);
-    void allocatePersistantBuffer(size_t max_batch_size);
-    void freeBuffer();
-
-    void initializeSampling(int infer_request_count);
-
-    void initialize(const std::vector<std::shared_ptr<Request>>& infer_requests);
-    void contextDecode();
-
-    void initializeGeneration();
-    bool generate();
-
-    void finish();
-    void finishRequest(int index, bool force_end);
-
-    void synchronize();
-
-    void setOutputTensors(int max_gen_step);
-
-    void
-    outputContextLogits(T* context_decoder_output, const std::vector<int>& indices, const std::vector<int>& lengths);
-
-    explicit LlamaBatch(int max_batch_size, int max_context_token_num, int session_len, LlamaV2<T>* llama);
-
-    ~LlamaBatch()
-    {
-        freeBuffer();
-    }
-
-private:
-    const int  max_batch_size_;
-    const int  max_context_token_num_;
-    const int  session_len_;
-    const int  rank_;
-    const bool debug_;
-
-    LlamaV2<T>* const llama_;
-
-    // active requests
-    std::vector<std::shared_ptr<Request>> requests_;
-
-    T*   context_decoder_input_buf_{};   // CTXDEC
-    T*   context_decoder_output_buf_{};  // CTXDEC
-    int* context_decoder_ids_buf_{};
-
-    T* decoder_input_buf_{};   // CTXDEC, GENERATE
-    T* decoder_output_buf_{};  // CTXDEC, GENERATE
-
-    int* input_ids_buf_{};       // input token ids + cache missed token ids, CTXDEC
-    int* input_length_buf_{};    // input + cache missed length, CTXDEC, GENERATE
-    int* history_length_buf_{};  // history length, CTXDEC
-    int* context_length_buf_{};  // history length + input_length, CTXDEC, GENERATE
-
-    int* total_padding_count_{};  // GENERATE
-    int* sequence_lengths_{};     // current sequence length
-
-    uint64_t* k_cache_ptr_buf_{};
-    uint64_t* v_cache_ptr_buf_{};
-
-    float* logits_buf_{};        // combined logits
-    float* local_logits_buf_{};  // tensor parallel local logits
-    float* context_logits_buf_{};
-    float* local_context_logits_buf_{};
-
-    // used by dynamic decoder
-    int*      token_ids_buf_{};   // all token IDs in [S, B], indexed using `step`
-    int*      output_ids_buf_{};  // output ids in [B, S]
-    int*      end_ids_buf_{};
-    bool*     finished_buf_{};
-    uint32_t* seq_limit_len_{};
-
-    // pinned buffers
-    int*       h_input_ids_buf_{};
-    int*       h_input_length_buf_{};
-    int*       h_history_length_buf_{};
-    int*       h_context_length_buf_{};
-    int*       h_sequence_lengths_{};
-    bool*      h_finished_buf_{};
-    uintptr_t* h_k_cache_ptr_buf_{};
-    uintptr_t* h_v_cache_ptr_buf_{};
-    uint32_t*  h_seq_limit_len_{};
-
-    int*      stop_words_buf_{};  // [batch_size, 2, kMaxStopWordsLen]
-    int*      bad_words_buf_{};
-    int*      h_runtime_top_k_{};
-    float*    h_runtime_top_p_{};
-    float*    h_temperature_{};
-    float*    h_repetition_penalty_{};
-    uint64_t* h_random_seed_{};
-
-    void* topk_curandstate_buf_{};
-    void* topp_curandstate_buf_{};
-
-    // hard limits for persistent buffers
-    static constexpr int kMaxStopBadWordsLen = 32;
-
-    using CachedSeq = LlamaCacheManager::Sequence;
-
-    std::vector<CachedSeq> cached_seq_;
-    std::vector<int>       request_seq_len_limit_;
-
-    const DataType data_type_{};
-
-    int batch_size_{};
-    int max_context_len_{};
-    int step_{};
-    int finished_count_{};
-
-    bool is_allocate_persistant_buffer_ = false;
-    bool is_allocate_buffer_            = false;
-
-    TensorMap inputs_;
-    TensorMap outputs_;
-
-    std::unordered_map<std::string, void*> sampling_params_;
-
-    cudaStream_t     stream_{};
-    cublasMMWrapper* cublas_wrapper_{};
-    IAllocator*      allocator_{};
-};
-
-}  // namespace turbomind
diff --git a/src/turbomind/models/llama/LlamaCacheManager.cc b/src/turbomind/models/llama/LlamaCacheManager.cc
deleted file mode 100644
index 1b929baa351624e528322d64b8d438b060ea741e..0000000000000000000000000000000000000000
--- a/src/turbomind/models/llama/LlamaCacheManager.cc
+++ /dev/null
@@ -1,192 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved.
-
-#include "src/turbomind/models/llama/LlamaCacheManager.h"
-#include "src/turbomind/utils/cuda_utils.h"
-#include "src/turbomind/utils/logger.h"
-
-namespace turbomind {
-
-LlamaCacheManager::~LlamaCacheManager()
-{
-    for (auto& p : device_mem_) {
-        allocator_->free(&p, false);
-    }
-}
-
-void* LlamaCacheManager::allocate(bool is_preallocte)
-{
-    if (rank_ == 0) {
-        TM_LOG_INFO("[LlamaCacheManager][allocate]");
-    }
-
-    void* mem_ptr{};
-
-    if (!device_free_.empty()) {
-        mem_ptr = device_free_.front();
-        device_free_.pop();
-
-        if (rank_ == 0) {
-            TM_LOG_INFO("[LlamaCacheManager][allocate] free = %d", (int)device_free_.size());
-        }
-    }
-    else if (entry_count_ < max_entry_count_) {
-        const auto   alloc_count     = std::min(chunk_size_, max_entry_count_ - entry_count_);
-        const size_t entry_byte_size = 2 * cache_byte_size_;  // 2 for k,v
-
-        if (rank_ == 0) {
-            TM_LOG_INFO("[LlamaCacheManager][allocate] malloc %d", (int)alloc_count);
-        }
-        const auto chunk_ptr = allocator_->malloc(alloc_count * entry_byte_size, false);
-        FT_CHECK(chunk_ptr);
-        device_mem_.push_back(chunk_ptr);
-        entry_count_ += alloc_count;
-        if (rank_ == 0) {
-            TM_LOG_INFO("[LlamaCacheManager][allocate] count = %d", entry_count_);
-        }
-
-        for (int i = 0; i < alloc_count; ++i) {
-            device_free_.push((uint8_t*)chunk_ptr + entry_byte_size * i);
-        }
-
-        if (!is_preallocte) {
-            mem_ptr = device_free_.front();
-            device_free_.pop();
-        }
-
-        if (rank_ == 0) {
-            TM_LOG_INFO("[LlamaCacheManager][allocate] free = %d", (int)device_free_.size());
-        }
-    }
-    else {
-        mem_ptr = evict();
-        FT_CHECK_WITH_INFO(mem_ptr, "No enough cache entries.");
-    }
-
-    return mem_ptr;
-}
-
-auto LlamaCacheManager::create(uint64_t id, cudaStream_t stream) -> Sequence
-{
-    if (rank_ == 0) {
-        TM_LOG_INFO("[LlamaCacheManager][create] %ld", (long)id);
-    }
-
-    for (const auto& e : device_cache_) {
-        if (e.id == id) {
-            if (rank_ == 0) {
-                TM_LOG_WARNING("[LlamaCacheManager][create] Removing conflicting id %ld", (long)id);
-            }
-            erase(id);
-        }
-    }
-
-    const auto mem_ptr = (uint8_t*)allocate(false);
-    check_cuda_error(cudaMemsetAsync(mem_ptr, 0, cache_byte_size_ * 2, stream));
-
-    device_cache_.push_back({
-        id,
-        max_seq_len_,
-        {},
-        0,
-        mem_ptr,
-        mem_ptr + cache_byte_size_,
-        {},
-        static_cast<uint64_t>(-1),
-    });
-
-    return device_cache_.back();
-}
-
-auto LlamaCacheManager::getEntryOrThrow(uint64_t id) -> std::vector<Sequence>::iterator
-{
-    auto pred = [&](const Sequence& s) { return s.id == id; };
-    auto it   = std::find_if(device_cache_.begin(), device_cache_.end(), pred);
-    if (it == device_cache_.end()) {
-        TM_LOG_ERROR("[LlamaCacheManager] %ld not found.\n", (long)id);
-        FT_CHECK(0);
-    }
-    return it;
-}
-
-auto LlamaCacheManager::fetch(uint64_t id, cudaStream_t stream) -> Sequence
-{
-    if (rank_ == 0) {
-        TM_LOG_INFO("[LlamaCacheManager][fetch] %ld", (long)id);
-    }
-
-    auto entry = getEntryOrThrow(id);
-
-    if (entry->k_cache == nullptr) {
-        FT_CHECK(entry->cache_len == 0);
-        const auto mem_ptr = allocate(false);
-        check_cuda_error(cudaMemsetAsync(mem_ptr, 0, cache_byte_size_ * 2, stream));
-        entry->k_cache = mem_ptr;
-        entry->v_cache = (uint8_t*)entry->k_cache + cache_byte_size_;
-    }
-
-    entry->timestamp = static_cast<uint64_t>(-1);
-    return *entry;
-}
-
-void LlamaCacheManager::update(const Sequence& seq, cudaStream_t stream)
-{
-    if (rank_ == 0) {
-        TM_LOG_INFO("[LlamaCacheManager][update] %ld", (long)seq.id);
-    }
-
-    auto entry = getEntryOrThrow(seq.id);
-
-    entry->timestamp = ++timestamp_;
-    entry->token_ids = seq.token_ids;
-    entry->cache_len = seq.cache_len;
-    FT_CHECK(seq.k_cache == entry->k_cache && seq.v_cache == entry->v_cache);
-}
-
-void LlamaCacheManager::erase(uint64_t id)
-{
-    if (rank_ == 0) {
-        TM_LOG_INFO("[LlamaCacheManager][erase] %ld", (long)id);
-    }
-
-    auto entry = getEntryOrThrow(id);
-
-    if (entry->k_cache) {
-        device_free_.push(entry->k_cache);
-        if (rank_ == 0) {
-            TM_LOG_INFO("[LlamaCacheManager][erase] free = %d", (int)device_free_.size());
-        }
-    }
-    device_cache_.erase(entry);
-}
-
-void* LlamaCacheManager::evict()
-{
-    FT_CHECK(!device_cache_.empty());
-    auto it = std::min_element(device_cache_.begin(), device_cache_.end(), [](const auto& a, const auto& b) {
-        return a.timestamp < b.timestamp;
-    });
-
-    if (it->timestamp == static_cast<uint64_t>(-1)) {
-        return nullptr;
-    }
-
-    if (rank_ == 0) {
-        TM_LOG_INFO("[LlamaCacheManager][evict] %ld", (long)it->id);
-    }
-
-    FT_CHECK(it->k_cache);
-    auto mem_ptr = it->k_cache;
-    it->k_cache = it->v_cache = nullptr;
-    it->cache_len             = 0;
-    it->timestamp             = static_cast<uint64_t>(-1);
-    return mem_ptr;
-}
-
-bool LlamaCacheManager::contains(uint64_t id) const noexcept
-{
-    auto pred = [&](const Sequence& s) { return s.id == id; };
-    auto it   = std::find_if(device_cache_.begin(), device_cache_.end(), pred);
-    return it != device_cache_.end();
-}
-
-}  // namespace turbomind
diff --git a/src/turbomind/models/llama/LlamaCacheManager.h b/src/turbomind/models/llama/LlamaCacheManager.h
deleted file mode 100644
index 2dd539550c52f8c889f5d2563b646e4f183475c5..0000000000000000000000000000000000000000
--- a/src/turbomind/models/llama/LlamaCacheManager.h
+++ /dev/null
@@ -1,102 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved.
-
-#include "src/turbomind/utils/allocator.h"
-#include "src/turbomind/utils/logger.h"
-#include <cstdint>
-#include <cuda_runtime.h>
-#include <queue>
-#include <unordered_map>
-#include <vector>
-
-namespace turbomind {
-
-// k-cache layout [L, H, D/x, S[s:], x]
-// v-cache layout [L, H, S[s:], D/x, x]
-
-class LlamaCacheManager {
-public:
-    LlamaCacheManager(size_t      layer_num,
-                      size_t      head_num,
-                      size_t      size_per_head,
-                      size_t      max_seq_len,
-                      size_t      elem_bits,
-                      size_t      max_entry_count,
-                      size_t      chunk_size,
-                      int         rank,
-                      IAllocator* allocator):
-        layer_num_(layer_num),
-        head_num_(head_num),
-        size_per_head_(size_per_head),
-        max_seq_len_(max_seq_len),
-        elem_bits_(elem_bits),
-        cache_byte_size_(layer_num_ * head_num_ * max_seq_len_ * size_per_head_ * elem_bits_ / 8),
-        max_entry_count_(max_entry_count),
-        chunk_size_(chunk_size),
-        rank_(rank),
-        allocator_(allocator)
-    {
-        if (rank == 0) {
-            TM_LOG_INFO("[LlamaCacheManager] max_entry_count = %d", (int)max_entry_count_);
-            TM_LOG_INFO("[LlamaCacheManager] chunk_size = %d", (int)chunk_size_);
-        }
-        allocate(true);
-    }
-
-    ~LlamaCacheManager();
-
-    struct Sequence {
-        // header
-        uint64_t id;
-        size_t   max_seq_len;
-
-        // payloads
-        std::vector<int> token_ids;  // all token ids
-        size_t           cache_len;  // cache_len == 0 -> cache miss
-        void*            k_cache;
-        void*            v_cache;
-
-        std::vector<uint8_t> random_state_;  // states for RNGs
-
-        // for LRU policy
-        uint64_t timestamp;
-    };
-
-    Sequence create(uint64_t id, cudaStream_t stream);
-
-    Sequence fetch(uint64_t id, cudaStream_t stream);
-
-    void update(const Sequence& seq, cudaStream_t stream);
-
-    void erase(uint64_t id);
-
-    bool contains(uint64_t id) const noexcept;
-
-private:
-    std::vector<Sequence>::iterator getEntryOrThrow(uint64_t id);
-
-    void* allocate(bool is_preallocte);
-
-    void* evict();
-
-private:
-    const size_t layer_num_{};
-    const size_t head_num_{};
-    const size_t size_per_head_{};
-    const size_t max_seq_len_{};
-    const size_t elem_bits_{};
-    const size_t cache_byte_size_{};
-    const size_t max_entry_count_{};
-    const size_t chunk_size_{};
-    const int    rank_{};
-    IAllocator*  allocator_{};
-
-    std::queue<void*>  device_free_;
-    std::vector<void*> device_mem_;
-    int                entry_count_{};
-
-    uint64_t timestamp_{};
-
-    std::vector<Sequence> device_cache_;
-};
-
-}  // namespace turbomind
diff --git a/src/turbomind/models/llama/LlamaContextAttentionLayer.cc b/src/turbomind/models/llama/LlamaContextAttentionLayer.cc
deleted file mode 100644
index 91a2d65ea3fc23f234d348d82edf7b4400977acb..0000000000000000000000000000000000000000
--- a/src/turbomind/models/llama/LlamaContextAttentionLayer.cc
+++ /dev/null
@@ -1,426 +0,0 @@
-/*
- * Copyright (c) OpenMMLab. All rights reserved.
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.  All rights reserved.
- * Copyright (c) 2021, NAVER Corp.  Authored by CLOVA.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-// Modified from
-// https://github.com/NVIDIA/FasterTransformer/blob/main/src/turbomind/layers/attention_layers/GptContextAttentionLayer.cc
-
-#include "src/turbomind/models/llama/LlamaContextAttentionLayer.h"
-#include "src/turbomind/kernels/bert_preprocess_kernels.h"
-#include "src/turbomind/kernels/unfused_attention_kernels.h"
-#include "src/turbomind/macro.h"
-#include "src/turbomind/models/llama/LlamaNcclGuard.h"
-#include "src/turbomind/models/llama/llama_kernels.h"
-#include "src/turbomind/models/llama/llama_utils.h"
-#include "src/turbomind/utils/Tensor.h"
-#include "src/turbomind/utils/cuda_utils.h"
-#include "src/turbomind/utils/logger.h"
-
-namespace turbomind {
-
-template<typename T>
-void LlamaContextAttentionLayer<T>::allocateBuffer(size_t batch_size,
-                                                   size_t num_token,
-                                                   size_t max_q_len,
-                                                   size_t max_k_len)
-{
-    TM_LOG_DEBUG(__PRETTY_FUNCTION__);
-
-    const int local_q_kv_head_num = local_head_num_ + 2 * local_kv_head_num_;
-
-    // no padding
-    qkv_buf_ = (T*)allocator_->reMalloc(qkv_buf_, sizeof(T) * num_token * local_q_kv_head_num * size_per_head_, true);
-
-    // padding is rebuilt for q/k/v_buf_2_
-    // [qH + 2kvH, B, S, D]
-    q_buf_2_ = (T*)allocator_->reMalloc(
-        q_buf_2_, sizeof(T) * local_q_kv_head_num * batch_size * max_q_len * size_per_head_, true);
-    k_buf_2_ = q_buf_2_ + local_head_num_ * batch_size * max_q_len * size_per_head_;
-    v_buf_2_ = k_buf_2_ + local_kv_head_num_ * batch_size * max_q_len * size_per_head_;
-
-    // if (use_fmha_) {
-    if (0) {
-        FlashAttentionOp<T> flash_attention(batch_size, local_head_num_, max_k_len, max_q_len, size_per_head_);
-        if (flash_attention.get_workspace_size() > 0) {
-            qk_buf_float_ = (float*)allocator_->reMalloc(qk_buf_float_, flash_attention.get_workspace_size(), true);
-        }
-    }
-    else {
-        // kv heads are repeated for unfused attention
-        k_cache_buf_ = (T*)allocator_->reMalloc(
-            k_cache_buf_, 2 * sizeof(T) * batch_size * local_head_num_ * max_k_len * size_per_head_, true);
-        v_cache_buf_ = k_cache_buf_ + batch_size * local_head_num_ * max_k_len * size_per_head_;
-
-        qk_buf_ =
-            (T*)allocator_->reMalloc(qk_buf_, sizeof(T) * batch_size * local_head_num_ * max_q_len * max_k_len, true);
-
-        // qkv_buf_2_ has padding
-        qkv_buf_2_ = (T*)allocator_->reMalloc(
-            qkv_buf_2_, sizeof(T) * batch_size * max_q_len * local_head_num_ * size_per_head_, true);
-    }
-
-    // qkv_buf_3_ padding is removed
-    qkv_buf_3_ = (T*)allocator_->reMalloc(qkv_buf_3_, sizeof(T) * num_token * local_head_num_ * size_per_head_, true);
-
-    is_allocate_buffer_ = true;
-}
-
-template<typename T>
-void LlamaContextAttentionLayer<T>::freeBuffer()
-{
-    if (is_allocate_buffer_) {
-        TM_LOG_DEBUG(__PRETTY_FUNCTION__);
-
-        allocator_->free((void**)(&qkv_buf_));
-        allocator_->free((void**)(&q_buf_2_));
-        // if (use_fmha_) {
-        if (0) {
-            allocator_->free((void**)&qk_buf_float_);
-        }
-        else {
-            allocator_->free((void**)(&k_cache_buf_));
-            allocator_->free((void**)(&qk_buf_));
-            allocator_->free((void**)(&qkv_buf_2_));
-        }
-        allocator_->free((void**)(&qkv_buf_3_));
-
-        is_allocate_buffer_ = false;
-    }
-}
-
-template<typename T>
-inline void LlamaContextAttentionLayer<T>::forward(TensorMap*                     output_tensors,
-                                                   const TensorMap*               input_tensors,
-                                                   const LlamaAttentionWeight<T>* weights)
-{
-    TM_LOG_DEBUG(__PRETTY_FUNCTION__);
-
-    /**
-     * input_tensors:
-     *   \param input_query [token_num, hidden_dim]
-     *   \param attention_mask [batch_size, 1, max_q_len, max_kv_len]
-     *   \param padding_offset [token_num], int
-     *   \param input_lengths [batch_size], int
-     *   \param history_lengths [batch_size], int
-     *   \param context_lengths [batch_size], int
-     *   \param cu_seqlens [batch_size+1], int
-     *   \param max_seq_len [1], int on cpu
-     *   \param is_final_layer [1], bool on cpu
-     *   \param layer_id [1], int on cpu
-     *
-     * output_tensors:
-     *   \param hidden_features [token_num, hidden_dim]
-     *   \param key_cache [batch_size], uint64
-     *   \param value_cache [batch_size], uint64
-     */
-
-    /////////////////////////////////////////////
-    /// parse inputs
-    const int batch_size = input_tensors->at("attention_mask").shape[0];
-    const int max_q_len  = input_tensors->at("attention_mask").shape[2];
-    const int max_k_len  = input_tensors->at("attention_mask").shape[3];
-    const int layer_id   = input_tensors->getVal<int>("layer_id");
-
-    const int num_token = input_tensors->at("input_query").shape[0];
-
-    const int max_seq_len = input_tensors->at("max_seq_len").getVal<int>();
-
-    T* attention_out   = output_tensors->at("hidden_features").getPtr<T>();
-    T* attention_input = input_tensors->at("input_query").getPtr<T>();
-    T* attention_mask  = input_tensors->at("attention_mask").getPtr<T>();
-
-    const auto input_length   = input_tensors->at("input_lengths").getPtr<const int>();
-    const auto history_length = input_tensors->at("history_lengths").getPtr<const int>();
-    const auto context_length = input_tensors->at("context_lengths").getPtr<const int>();
-    int*       cu_seqlens     = input_tensors->at("cu_seqlens").getPtr<int>();
-
-    const auto padding_offset = input_tensors->at("padding_offset").getPtr<int>();
-
-    /////////////////////////////////////////////
-    /// allocate buffers
-    allocateBuffer(batch_size, num_token, max_q_len, max_k_len);
-
-    //////////////////////////////////////////////
-    /// qkv gemm
-    // [token_num, hidden_dim] -> [token_num, 3, local_hidden_dim]
-    linear_.forward(qkv_buf_, attention_input, num_token, weights->qkv);
-
-    //////////////////////////////////////////////
-    /// transpose qkv & apply rotary embedding & rebuild padding
-    /// qkv [B, s, H + 2kvH, D] -> (q [B, H, s, D], k [B, kvH, s, D], v [B, kvH, s, D])
-    invokeAddFusedQKVBiasTranspose(q_buf_2_,
-                                   k_buf_2_,
-                                   v_buf_2_,
-                                   qkv_buf_,
-                                   weights->qkv.bias,
-                                   padding_offset,  // padding_offset,
-                                   history_length,  // used for applying rotary embedding
-                                   input_length,
-                                   batch_size,
-                                   max_q_len,  // seq_len
-                                   num_token,  // batch_size * seq_len
-                                   local_head_num_,
-                                   local_kv_head_num_,
-                                   size_per_head_,
-                                   params_.rotray_embedding_dim,
-                                   params_.rotary_embedding_base,
-                                   params_.max_position_embeddings,
-                                   params_.use_dynamic_ntk,
-                                   params_.use_logn_attn,
-                                   stream_);
-    sync_check_cuda_error();
-
-    const size_t layer_offset = layer_id * local_kv_head_num_ * max_seq_len * size_per_head_;
-
-    auto k_cache_ptrs = output_tensors->getPtr<T*>("key_cache");
-    auto v_cache_ptrs = output_tensors->getPtr<T*>("value_cache");
-    //////////////////////////////////////////////////////////
-    /// insert the k/v computed from inputs into k/v cache
-    /// transpose kv -> kv cache
-    // put k/v_buf from shape [B, kvH, s, D] to
-    // k_buf_2 [B, kvH, s, D] -> key_cache [B, kvH, S[t:t+s], D/x, x]
-    // v_buf_2 [B, kvH, s, D] -> val_cache [B, kvH, S[t:t+s], D/x, x]
-    invokeExtendKVCache(k_cache_ptrs,
-                        v_cache_ptrs,
-                        layer_offset,
-                        k_buf_2_,
-                        v_buf_2_,
-                        batch_size,
-                        input_length,
-                        max_q_len,
-                        history_length,
-                        max_seq_len,
-                        size_per_head_,
-                        local_kv_head_num_,
-                        stream_,
-                        quant_policy_,
-                        weights->past_kv_scale.data());
-
-    sync_check_cuda_error();
-    // if (use_fmha_) {
-    if (0) {
-        fusedMultiHeadAttention(k_cache_ptrs,
-                                v_cache_ptrs,
-                                layer_offset,
-                                attention_mask,
-                                cu_seqlens,
-                                input_tensors->at("context_lengths").getPtr<int>(),
-                                batch_size,
-                                max_q_len,
-                                max_k_len,
-                                max_seq_len);
-    }
-    else {
-        unfusedMultiHeadAttention(k_cache_ptrs,
-                                  v_cache_ptrs,
-                                  layer_offset,
-                                  attention_mask,
-                                  padding_offset,
-                                  context_length,
-                                  batch_size,
-                                  num_token,
-                                  max_q_len,
-                                  max_k_len,
-                                  max_seq_len,
-                                  quant_policy_,
-                                  weights->past_kv_scale.data());
-    }
-
-    //////////////////////////////////////////////
-    /// output gemm <Bs,HD> -> <Bs,HD>
-    linear_.forward(attention_out, qkv_buf_3_, num_token, weights->output);
-
-    if (tensor_para_.world_size_ > 1) {
-        NcclGuard nccl_guard(tensor_para_, stream_);
-        ftNcclAllReduceSum(attention_out, attention_out, num_token * hidden_units_, tensor_para_, stream_);
-        sync_check_cuda_error();
-    }
-
-    if (is_free_buffer_after_forward_ == true) {
-        freeBuffer();
-    }
-    sync_check_cuda_error();
-}
-#if 0
-template<typename T>
-void LlamaContextAttentionLayer<T>::fusedMultiHeadAttention(T**    key_cache_ptrs,
-                                                            T**    val_cache_ptrs,
-                                                            size_t cache_layer_offset,
-                                                            T*     attention_mask,
-                                                            int*   cu_seqlens,
-                                                            int*   context_lengths,
-                                                            int    batch_size,
-                                                            int    max_q_len,
-                                                            int    max_k_len,
-                                                            int    max_seq_len)
-{
-    //////////////////////////////////////////////
-    // flash attention
-    // flash attention 2 only support half inputs
-    using AttentionOp = FlashAttentionOp<T>;
-    using Layout      = typename AttentionOp::AttentionLayout;
-    Layout layout_q{
-        int(local_head_num_ * max_q_len * size_per_head_), int(size_per_head_), int(max_q_len * size_per_head_)};
-    Layout layout_k{int(local_head_num_ * max_seq_len * size_per_head_),
-                    int(size_per_head_),
-                    int(max_seq_len * size_per_head_),
-                    false,
-                    cache_layer_offset,
-                    key_cache_ptrs};
-    Layout layout_v{int(local_head_num_ * max_seq_len * size_per_head_),
-                    int(size_per_head_),
-                    int(max_seq_len * size_per_head_),
-                    false,
-                    cache_layer_offset,
-                    val_cache_ptrs};
-    Layout layout_o{
-        int(local_head_num_ * max_q_len * size_per_head_),
-        int(local_head_num_ * size_per_head_),
-        int(size_per_head_),
-        true,
-    };
-    size_t                       group_size = size_t(local_head_num_ / local_kv_head_num_);
-    AttentionOp                  flash_attention(batch_size, local_head_num_, max_k_len, max_q_len, size_per_head_);
-    typename AttentionOp::Params attn_params{qkv_buf_3_,
-                                             q_buf_2_,
-                                             k_cache_buf_,
-                                             v_cache_buf_,
-                                             attention_mask,
-                                             qk_buf_float_,
-                                             cu_seqlens,
-                                             nullptr,
-                                             nullptr,
-                                             context_lengths,
-                                             group_size,
-                                             layout_q,
-                                             layout_k,
-                                             layout_v,
-                                             layout_o};
-
-    //
-    flash_attention(attn_params, stream_);
-}
-#endif
-template<typename T>
-void LlamaContextAttentionLayer<T>::unfusedMultiHeadAttention(T**          key_cache_ptrs,
-                                                              T**          val_cache_ptrs,
-                                                              size_t       cache_layer_offset,
-                                                              const T*     attention_mask,
-                                                              const int*   padding_offset,
-                                                              const int*   context_length,
-                                                              int          batch_size,
-                                                              int          num_token,
-                                                              int          max_q_len,
-                                                              int          max_k_len,
-                                                              int          max_seq_len,
-                                                              int          quant,
-                                                              const float* kv_scale)
-{
-    // key_cache [B, kvH, S[:t+s], D/x, x] -> [B, qH, t+s, D]
-    // val_cache [B, kvH, S[:t+s], D/x, x] -> [B, qH, t+s, D]
-    invokeTransposeKVCache(k_cache_buf_,
-                           v_cache_buf_,
-                           (const T**)key_cache_ptrs,
-                           (const T**)val_cache_ptrs,
-                           cache_layer_offset,
-                           batch_size,
-                           context_length,  // history_len + input_len = context_len
-                           max_k_len,
-                           max_seq_len,
-                           size_per_head_,
-                           local_head_num_,
-                           head_n_rep_,
-                           stream_,
-                           quant,
-                           kv_scale);
-    sync_check_cuda_error();
-
-    const T qk_scale = static_cast<T>(1.f / sqrtf(size_per_head_ * 1.f));
-
-    //////////////////////////////////////////////
-    /// Q*K batch gemm
-    /// -> [B, H, s, t + s]
-    cublas_wrapper_->stridedBatchedGemm(CUBLAS_OP_T,
-                                        CUBLAS_OP_N,
-                                        max_k_len,                      // m
-                                        max_q_len,                      // n
-                                        size_per_head_,                 // k
-                                        k_cache_buf_,                   // A
-                                        size_per_head_,                 // lda
-                                        max_k_len * size_per_head_,     // strideA
-                                        q_buf_2_,                       // B
-                                        size_per_head_,                 // ldb
-                                        max_q_len * size_per_head_,     // strideB
-                                        qk_buf_,                        // C
-                                        max_k_len,                      // ldc
-                                        max_q_len * max_k_len,          // strideC
-                                        batch_size * local_head_num_);  // batchCount
-
-    //////////////////////////////////////////////
-    /// ! masked softmax (kernel asserts k_length <= 4096)
-    MaskedSoftmaxParam<T, T> param{};
-    param.attention_score    = qk_buf_;
-    param.qk                 = qk_buf_;
-    param.attention_mask     = attention_mask;
-    param.batch_size         = batch_size;
-    param.q_length           = max_q_len;
-    param.k_length           = max_k_len;
-    param.num_heads          = local_head_num_;
-    param.qk_scale           = qk_scale;
-    param.linear_bias_slopes = nullptr;
-    invokeMaskedSoftmax(param, stream_);
-    sync_check_cuda_error();
-
-    //////////////////////////////////////////////
-    /// softmax(QK)*V batch gemm
-    // -> [B, H, S, D]
-    cublas_wrapper_->stridedBatchedGemm(CUBLAS_OP_N,
-                                        CUBLAS_OP_N,
-                                        size_per_head_,                 // m
-                                        max_q_len,                      // n
-                                        max_k_len,                      // k
-                                        v_cache_buf_,                   // A
-                                        size_per_head_,                 // lda
-                                        max_k_len * size_per_head_,     // strideA,
-                                        qk_buf_,                        // B
-                                        max_k_len,                      // ldb
-                                        max_k_len * max_q_len,          // strideB
-                                        qkv_buf_2_,                     // C
-                                        size_per_head_,                 // ldc,
-                                        max_q_len * size_per_head_,     // strideC
-                                        batch_size * local_head_num_);  // batchCount
-
-    //////////////////////////////////////////////
-    /// transpose <B,h,s,D> -> <B,s,h,D>
-    invokeTransposeAttentionOutRemovePadding(qkv_buf_2_,
-                                             qkv_buf_3_,
-                                             num_token,
-                                             batch_size,
-                                             max_q_len,
-                                             local_head_num_,
-                                             size_per_head_,
-                                             padding_offset,
-                                             nullptr,
-                                             0,
-                                             stream_);
-    sync_check_cuda_error();
-}
-
-template class LlamaContextAttentionLayer<float>;
-template class LlamaContextAttentionLayer<half>;
-
-}  // namespace turbomind
diff --git a/src/turbomind/models/llama/LlamaContextAttentionLayer.h b/src/turbomind/models/llama/LlamaContextAttentionLayer.h
deleted file mode 100644
index f79eaa4ef2a9212c30c456b15cf24c877bd457a5..0000000000000000000000000000000000000000
--- a/src/turbomind/models/llama/LlamaContextAttentionLayer.h
+++ /dev/null
@@ -1,130 +0,0 @@
-/*
- * Copyright (c) OpenMMLab. All rights reserved.
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.  All rights reserved.
- * Copyright (c) 2021, NAVER Corp.  Authored by CLOVA.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-// Modified from
-// https://github.com/NVIDIA/FasterTransformer/blob/main/src/turbomind/layers/attention_layers/GptContextAttentionLayer.h
-
-#pragma once
-
-#include "src/turbomind/models/llama/LlamaDenseWeight.h"
-#include "src/turbomind/models/llama/LlamaLinear.h"
-#include "src/turbomind/models/llama/llama_params.h"
-#include "src/turbomind/utils/Tensor.h"
-#include "src/turbomind/utils/nccl_utils.h"
-
-namespace turbomind {
-
-template<typename T>
-class LlamaContextAttentionLayer {
-public:
-    void freeBuffer();
-    void allocateBuffer(size_t batch_size, size_t num_token, size_t max_q_len, size_t max_kv_len);
-
-    LlamaContextAttentionLayer(size_t               head_num,
-                               size_t               kv_head_num,
-                               size_t               size_per_head,
-                               LlamaAttentionParams attn_params,
-                               NcclParam            tensor_para,
-                               cudaStream_t         stream,
-                               cublasMMWrapper*     cublas_wrapper,
-                               IAllocator*          allocator,
-                               bool                 is_free_buffer_after_forward,
-                               bool                 use_fmha,
-                               int                  quant_policy):
-        head_num_(head_num),
-        size_per_head_(size_per_head),
-        hidden_units_(head_num * size_per_head),
-        local_head_num_(head_num / tensor_para.world_size_),
-        local_kv_head_num_(kv_head_num / tensor_para.world_size_),
-        head_n_rep_(head_num / kv_head_num),
-        params_(attn_params),
-        tensor_para_(tensor_para),
-        stream_(stream),
-        cublas_wrapper_(cublas_wrapper),
-        linear_(cublas_wrapper, stream),
-        allocator_(allocator),
-        is_free_buffer_after_forward_(is_free_buffer_after_forward),
-        use_fmha_(use_fmha),
-        quant_policy_(quant_policy)
-    {
-        FT_CHECK(head_num % kv_head_num == 0);
-    }
-
-    void forward(TensorMap* output_tensors, const TensorMap* input_tensors, const LlamaAttentionWeight<T>* weights);
-
-    void fusedMultiHeadAttention(T**    key_cache_ptrs,
-                                 T**    val_cache_ptrs,
-                                 size_t cache_layer_offset,
-                                 T*     attention_mask,
-                                 int*   cu_seqlens,
-                                 int*   context_lengths,
-                                 int    batch_size,
-                                 int    max_q_len,
-                                 int    max_k_len,
-                                 int    max_seq_len);
-
-    void unfusedMultiHeadAttention(T**          key_cache_ptrs,
-                                   T**          val_cache_ptrs,
-                                   size_t       cache_layer_offset,
-                                   const T*     attention_mask,
-                                   const int*   padding_offset,
-                                   const int*   context_length,
-                                   int          batch_size,
-                                   int          num_token,
-                                   int          max_q_len,
-                                   int          max_k_len,
-                                   int          max_seq_len,
-                                   int          quant_policy,
-                                   const float* kv_scale);
-
-private:
-    const size_t head_num_;
-    const size_t size_per_head_;
-    const size_t hidden_units_;
-    const size_t local_kv_head_num_;
-    const size_t local_head_num_;
-    const size_t head_n_rep_;
-    const bool   is_free_buffer_after_forward_;
-
-    const LlamaAttentionParams params_;
-
-    const bool use_fmha_;
-    const int  quant_policy_;
-
-    NcclParam tensor_para_;
-
-    cudaStream_t     stream_;
-    IAllocator*      allocator_;
-    cublasMMWrapper* cublas_wrapper_;
-    LlamaLinear<T>   linear_;
-
-    T*     qkv_buf_{};
-    T*     q_buf_2_{};
-    T*     k_buf_2_{};
-    T*     v_buf_2_{};
-    T*     k_cache_buf_{};
-    T*     v_cache_buf_{};
-    T*     qk_buf_{};
-    float* qk_buf_float_{};
-    T*     qkv_buf_2_{};
-    T*     qkv_buf_3_{};
-
-    bool is_allocate_buffer_ = false;
-};
-
-}  // namespace turbomind
diff --git a/src/turbomind/models/llama/LlamaContextDecoder.cc b/src/turbomind/models/llama/LlamaContextDecoder.cc
deleted file mode 100644
index f914063a703b6f08a64d066d876ae2a12beb8216..0000000000000000000000000000000000000000
--- a/src/turbomind/models/llama/LlamaContextDecoder.cc
+++ /dev/null
@@ -1,290 +0,0 @@
-/*
- * Copyright (c) OpenMMLab. All rights reserved.
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-// Modified from
-// https://github.com/NVIDIA/FasterTransformer/blob/main/src/turbomind/models/multi_gpu_gpt/ParallelGptContextDecoder.cc
-
-#include "src/turbomind/models/llama/LlamaContextDecoder.h"
-#include "src/turbomind/kernels/bert_preprocess_kernels.h"
-#include "src/turbomind/kernels/gpt_kernels.h"
-#include "src/turbomind/macro.h"
-#include "src/turbomind/models/llama/LlamaContextDecoder.h"
-#include "src/turbomind/models/llama/llama_decoder_kernels.h"
-#include "src/turbomind/models/llama/llama_kernels.h"
-#include "src/turbomind/utils/Tensor.h"
-
-namespace turbomind {
-
-template<typename T>
-void LlamaContextDecoder<T>::allocateBuffer()
-{
-    FT_CHECK(false);
-}
-
-template<typename T>
-void LlamaContextDecoder<T>::allocateBuffer(size_t batch_size, size_t num_token, size_t max_q_len, size_t max_kv_len)
-{
-    TM_LOG_DEBUG(__PRETTY_FUNCTION__);
-
-    attention_mask_ = (T*)allocator_->reMalloc(attention_mask_, sizeof(T) * batch_size * max_q_len * max_kv_len, false);
-    padding_offset_ = (int*)allocator_->reMalloc(padding_offset_, sizeof(int) * batch_size * max_q_len, false);
-    cu_seqlens_     = (int*)allocator_->reMalloc(cu_seqlens_, sizeof(int) * (batch_size + 1), false);
-
-    is_allocate_buffer_ = true;
-}
-
-template<typename T>
-void LlamaContextDecoder<T>::freeBuffer()
-{
-    TM_LOG_DEBUG(__PRETTY_FUNCTION__);
-    if (is_allocate_buffer_) {
-        allocator_->free((void**)&padding_offset_);
-        allocator_->free((void**)&cu_seqlens_);
-        allocator_->free((void**)&attention_mask_);
-        allocator_->free((void**)&h_pinned_token_num_ptr_, true);
-        is_allocate_buffer_ = false;
-    }
-}
-
-template<typename T>
-void LlamaContextDecoder<T>::initialize(const LlamaAttentionParams& attn_params,
-                                        size_t                      kv_head_num,
-                                        bool                        use_fmha,
-                                        int                         quant_policy)
-{
-    h_pinned_token_num_ptr_ = (size_t*)allocator_->reMalloc(h_pinned_token_num_ptr_, sizeof(size_t), true, true);
-
-    context_attention_layer_ = new LlamaContextAttentionLayer<T>(head_num_,
-                                                                 kv_head_num,
-                                                                 size_per_head_,
-                                                                 attn_params,
-                                                                 tensor_para_,
-                                                                 stream_,
-                                                                 cublas_wrapper_,
-                                                                 allocator_,
-                                                                 is_free_buffer_after_forward_,
-                                                                 use_fmha,
-                                                                 quant_policy);
-
-    silu_ffn_layer_ = new LlamaFfnLayer<T>(head_num_,
-                                           size_per_head_,
-                                           inter_size_,
-                                           tensor_para_,
-                                           stream_,
-                                           cublas_wrapper_,
-                                           allocator_,
-                                           is_free_buffer_after_forward_);
-}
-
-template<typename T>
-void LlamaContextDecoder<T>::forwardSelfAttn(const Session&                                 sess,
-                                             T*                                             attn_io,
-                                             const std::unordered_map<std::string, Tensor>* input_tensors,
-                                             int                                            layer,
-                                             bool                                           is_final)
-{
-    // TM_LOG_ERROR(__PRETTY_FUNCTION__);
-    TensorMap self_attention_input_tensors{
-        {"input_query", Tensor{MEMORY_GPU, data_type_, {sess.token_num, hidden_units_}, attn_io}},
-        {"attention_mask",
-         {MEMORY_GPU, data_type_, {sess.batch_size, 1, sess.max_query_len, sess.max_key_len}, attention_mask_}},
-        {"layer_id", Tensor{MEMORY_CPU, TYPE_INT32, {1}, &layer}},
-        {"is_final_layer", Tensor{MEMORY_CPU, TYPE_BOOL, {1}, &is_final}},
-        {"padding_offset", {MEMORY_GPU, TYPE_INT32, {sess.token_num}, padding_offset_}},
-        {"cu_seqlens", {MEMORY_GPU, TYPE_INT32, {sess.batch_size + 1}, cu_seqlens_}},
-        {"input_lengths", {MEMORY_GPU, TYPE_INT32, {sess.batch_size}, sess.input_length}},
-        {"history_lengths", {MEMORY_GPU, TYPE_INT32, {sess.batch_size}, sess.history_length}},
-        {"context_lengths", {MEMORY_GPU, TYPE_INT32, {sess.batch_size}, sess.context_length}},
-        {"max_seq_len", input_tensors->at("max_seq_len")}};
-
-    auto& k_cache = *sess.k_cache;
-    auto& v_cache = *sess.v_cache;
-
-    TensorMap self_attention_output_tensors{
-        {"hidden_features", {MEMORY_GPU, data_type_, {sess.token_num, hidden_units_}, attn_io}},
-        {"key_cache", k_cache},
-        {"value_cache", v_cache},
-    };
-
-    context_attention_layer_->forward(&self_attention_output_tensors,  //
-                                      &self_attention_input_tensors,
-                                      &sess.weights->at(layer)->self_attn_weights);
-}
-
-template<typename T>
-LlamaContextDecoder<T>::LlamaContextDecoder(size_t                      head_num,
-                                            size_t                      kv_head_num,
-                                            size_t                      size_per_head,
-                                            size_t                      inter_size,
-                                            size_t                      num_layer,
-                                            const LlamaAttentionParams& attn_params,
-                                            float                       rmsnorm_eps,
-                                            NcclParam                   tensor_para,
-                                            cudaStream_t                stream,
-                                            cublasMMWrapper*            cublas_wrapper,
-                                            IAllocator*                 allocator,
-                                            bool                        is_free_buffer_after_forward,
-                                            bool                        use_fmha,
-                                            int                         quant_policy):
-    BaseLayer(stream, cublas_wrapper, allocator, is_free_buffer_after_forward),
-    head_num_(head_num),
-    size_per_head_(size_per_head),
-    inter_size_(inter_size),
-    hidden_units_(head_num * size_per_head),
-    num_layer_(num_layer),
-    rmsnorm_eps_(rmsnorm_eps),
-    tensor_para_(tensor_para),
-    data_type_(getTensorType<T>())
-{
-    initialize(attn_params, kv_head_num, use_fmha, quant_policy);
-}
-
-template<typename T>
-LlamaContextDecoder<T>::~LlamaContextDecoder()
-{
-    delete context_attention_layer_;
-    delete silu_ffn_layer_;
-    freeBuffer();
-}
-
-template<typename T>
-void LlamaContextDecoder<T>::forward(std::vector<Tensor>*                            output_tensors,
-                                     const std::vector<Tensor>*                      input_tensors,
-                                     const std::vector<LlamaDecoderLayerWeight<T>*>* decoder_layer_weights)
-{
-    FT_CHECK(false);
-}
-
-template<typename T>
-void LlamaContextDecoder<T>::forward(std::unordered_map<std::string, Tensor>*        output_tensors,
-                                     const std::unordered_map<std::string, Tensor>*  input_tensors,
-                                     const std::vector<LlamaDecoderLayerWeight<T>*>* decoder_layer_weights)
-{
-    /**
-     * input tensors:
-     *   \param decoder_input [num_token, hidden_units], float
-     *   \param input_lengths [batch_size], int
-     *   \param history_lengths [batch_size], int
-     *   \param context_legnths [batch_size], int
-     *   \param output_norm_weight [hidden_dims], float
-     *   \param max_q_len [1], int on cpu
-     *   \param max_kv_len [1], int on cpu
-     *   \param max_seq_len [1], int on cpu
-     *
-     * output tensors:
-     *   \param decoder_output [num_token, hidden_units],
-     *   \param key_cache [num_layer, batch, local_head_num, size_per_head // x, max_seq_len, x]
-     *   \param value_cache [num_layer, batch, local_head_num, max_seq_len, size_per_head]
-     *   \param last_token_hidden_units [batch_size, hidden_units]
-     */
-
-    Session sess{};
-
-    sess.token_num     = input_tensors->at("decoder_input").shape[0];
-    sess.batch_size    = input_tensors->at("input_lengths").shape[0];
-    sess.max_query_len = input_tensors->at("max_q_len").getVal<int>();
-    sess.max_key_len   = input_tensors->at("max_kv_len").getVal<int>();
-    sess.weights       = decoder_layer_weights;
-
-    sess.input_length   = input_tensors->at("input_lengths").getPtr<int>();
-    sess.history_length = input_tensors->at("history_lengths").getPtr<int>();
-    sess.context_length = input_tensors->at("context_lengths").getPtr<int>();
-
-    T* decoder_input_output = input_tensors->at("decoder_input").getPtr<T>();
-    T* decoder_output       = output_tensors->at("decoder_output").getPtr<T>();
-
-    sess.k_cache = &output_tensors->at("key_cache");
-    sess.v_cache = &output_tensors->at("value_cache");
-
-    allocateBuffer(sess.batch_size, sess.token_num, sess.max_query_len, sess.max_key_len);
-
-    size_t tmp_token_num{};
-    invokeGetPaddingOffsetAndCuSeqLens(h_pinned_token_num_ptr_,
-                                       &tmp_token_num,  // updated token num
-                                       padding_offset_,
-                                       cu_seqlens_,
-                                       input_tensors->at("input_lengths").getPtr<int>(),
-                                       sess.batch_size,
-                                       sess.max_query_len,
-                                       stream_);
-    sync_check_cuda_error();
-    FT_CHECK(tmp_token_num == sess.token_num);
-
-    invokeCreateCausalMasks(attention_mask_,
-                            sess.input_length,
-                            sess.context_length,
-                            sess.max_query_len,
-                            sess.max_key_len,
-                            sess.batch_size,
-                            stream_);
-    sync_check_cuda_error();
-
-    /////////////////////////////////////////////
-    /// RMSNorm
-    invokeRootMeanSquareNorm(decoder_output,
-                             decoder_input_output,
-                             decoder_layer_weights->at(0)->self_attn_norm_weights,
-                             rmsnorm_eps_,
-                             sess.token_num,
-                             hidden_units_,
-                             stream_);
-    sync_check_cuda_error();
-
-    for (size_t layer = 0; layer < num_layer_; ++layer) {
-        /////////////////////////////////////////////
-        /// self-attention
-        forwardSelfAttn(sess, decoder_output, input_tensors, layer, false);
-
-        invokeFusedAddBiasResidualRMSNorm(decoder_input_output,
-                                          decoder_output,
-                                          decoder_layer_weights->at(layer)->self_attn_weights.output.bias,
-                                          decoder_layer_weights->at(layer)->ffn_norm_weights,
-                                          rmsnorm_eps_,
-                                          sess.token_num,
-                                          hidden_units_,
-                                          stream_);
-        sync_check_cuda_error();
-
-        ////////////////////////////////////////////
-        /// feed-forward network
-        TensorMap ffn_inputs{{"ffn_input", {MEMORY_GPU, data_type_, {sess.token_num, hidden_units_}, decoder_output}}};
-        TensorMap ffn_outputs{
-            {"ffn_output", {MEMORY_GPU, data_type_, {sess.token_num, hidden_units_}, decoder_output}}};
-        silu_ffn_layer_->forward(&ffn_outputs, &ffn_inputs, &decoder_layer_weights->at(layer)->ffn_weights);
-
-        auto scale_weight = layer < num_layer_ - 1 ? decoder_layer_weights->at(layer + 1)->self_attn_norm_weights :
-                                                     input_tensors->at("output_norm_weight").getPtr<T>();
-        invokeFusedAddBiasResidualRMSNorm(decoder_input_output,  //
-                                          decoder_output,
-                                          decoder_layer_weights->at(layer)->ffn_weights.output.bias,
-                                          scale_weight,
-                                          rmsnorm_eps_,
-                                          sess.token_num,
-                                          hidden_units_,
-                                          stream_);
-        sync_check_cuda_error();
-    }
-
-    if (is_free_buffer_after_forward_) {
-        freeBuffer();
-    }
-}
-
-template class LlamaContextDecoder<float>;
-template class LlamaContextDecoder<half>;
-
-}  // namespace turbomind
diff --git a/src/turbomind/models/llama/LlamaContextDecoder.h b/src/turbomind/models/llama/LlamaContextDecoder.h
deleted file mode 100644
index da6264176f497ece41ac076ad1db6dce9ebd55c2..0000000000000000000000000000000000000000
--- a/src/turbomind/models/llama/LlamaContextDecoder.h
+++ /dev/null
@@ -1,112 +0,0 @@
-/*
- * Copyright (c) OpenMMLab. All rights reserved.
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-// Modified from
-// https://github.com/NVIDIA/FasterTransformer/blob/main/src/turbomind/models/multi_gpu_gpt/ParallelGptContextDecoder.h
-
-#pragma once
-
-#include "src/turbomind/layers/BaseLayer.h"
-#include "src/turbomind/models/llama/LlamaContextAttentionLayer.h"
-#include "src/turbomind/models/llama/LlamaDecoderLayerWeight.h"
-#include "src/turbomind/models/llama/LlamaFfnLayer.h"
-#include "src/turbomind/models/llama/llama_params.h"
-#include "src/turbomind/utils/Tensor.h"
-#include "src/turbomind/utils/allocator.h"
-#include "src/turbomind/utils/cublasMMWrapper.h"
-#include "src/turbomind/utils/custom_ar_comm.h"
-#include "src/turbomind/utils/nccl_utils.h"
-
-namespace turbomind {
-
-template<typename T>
-class LlamaContextDecoder: public BaseLayer {
-protected:
-    void allocateBuffer() override;
-    void allocateBuffer(size_t batch_size, size_t num_token, size_t max_q_len, size_t max_kv_len);
-    void freeBuffer() override;
-
-    void initialize(const LlamaAttentionParams& attn_params, size_t kv_head_num, bool use_fmha, int quant_policy);
-
-    size_t head_num_;
-    size_t size_per_head_;
-    size_t inter_size_;
-    size_t num_layer_;
-    size_t hidden_units_;
-    float  rmsnorm_eps_;
-
-    NcclParam tensor_para_;
-
-    T*   attention_mask_{};
-    int* padding_offset_{};
-    int* cu_seqlens_{};  // cu for cumulative
-
-    size_t* h_pinned_token_num_ptr_{};
-
-    LlamaContextAttentionLayer<T>* context_attention_layer_{};
-    LlamaFfnLayer<T>*              silu_ffn_layer_{};
-
-    const DataType data_type_;
-
-    struct Session {
-        size_t  batch_size;
-        size_t  token_num;
-        size_t  max_query_len;
-        size_t  max_key_len;
-        Tensor* k_cache;
-        Tensor* v_cache;
-        int*    input_length{};
-        int*    history_length{};
-        int*    context_length{};
-
-        const std::vector<LlamaDecoderLayerWeight<T>*>* weights;
-    };
-
-    void forwardSelfAttn(const Session&                                 sess,
-                         T*                                             attn_io,
-                         const std::unordered_map<std::string, Tensor>* input_tensors,
-                         int                                            layer,
-                         bool                                           is_final);
-
-public:
-    LlamaContextDecoder(size_t                      head_num,
-                        size_t                      kv_head_num,
-                        size_t                      size_per_head,
-                        size_t                      inter_size,
-                        size_t                      num_layer,
-                        const LlamaAttentionParams& attn_params,
-                        float                       rmsnorm_eps,
-                        NcclParam                   tensor_para,
-                        cudaStream_t                stream,
-                        cublasMMWrapper*            cublas_wrapper,
-                        IAllocator*                 allocator,
-                        bool                        is_free_buffer_after_forward,
-                        bool                        use_fmha,
-                        int                         quant_policy);
-
-    ~LlamaContextDecoder() override;
-
-    virtual void forward(std::unordered_map<std::string, Tensor>*        output_tensors,
-                         const std::unordered_map<std::string, Tensor>*  input_tensors,
-                         const std::vector<LlamaDecoderLayerWeight<T>*>* decoder_layer_weights);
-
-    virtual void forward(std::vector<Tensor>*                            output_tensors,
-                         const std::vector<Tensor>*                      input_tensors,
-                         const std::vector<LlamaDecoderLayerWeight<T>*>* decoder_layer_weights);
-};
-
-}  // namespace turbomind
diff --git a/src/turbomind/models/llama/LlamaDecoder.cc b/src/turbomind/models/llama/LlamaDecoder.cc
deleted file mode 100644
index 73e95b1353f51cb09238c17f3a8821eca45f1666..0000000000000000000000000000000000000000
--- a/src/turbomind/models/llama/LlamaDecoder.cc
+++ /dev/null
@@ -1,247 +0,0 @@
-/*
- * Copyright (c) OpenMMLab. All rights reserved.
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.  All rights reserved.
- * Copyright (c) 2022, SK Telecom Authored by A. Dialog
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-// Modified from
-// https://github.com/NVIDIA/FasterTransformer/blob/main/src/turbomind/models/multi_gpu_gpt/ParallelGptDecoder.cc
-
-#include "src/turbomind/models/llama/LlamaDecoder.h"
-#include "src/turbomind/macro.h"
-#include "src/turbomind/models/llama/llama_decoder_kernels.h"
-#include "src/turbomind/models/llama/llama_kernels.h"
-#include "src/turbomind/models/llama/llama_params.h"
-#include "src/turbomind/models/llama/llama_utils.h"
-
-namespace turbomind {
-
-template<typename T>
-LlamaDecoder<T>::LlamaDecoder(size_t                      head_num,
-                              size_t                      kv_head_num,
-                              size_t                      size_per_head,
-                              size_t                      inter_size,
-                              size_t                      num_layer,
-                              const LlamaAttentionParams& attn_params,
-                              float                       rmsnorm_eps,
-                              NcclParam                   tensor_para,
-                              cudaStream_t                stream,
-                              cublasMMWrapper*            cublas_wrapper,
-                              IAllocator*                 allocator,
-                              bool                        is_free_buffer_after_forward,
-                              int                         quant_policy):
-    BaseLayer(stream, cublas_wrapper, allocator, is_free_buffer_after_forward),
-    head_num_(head_num),
-    size_per_head_(size_per_head),
-    inter_size_(inter_size),
-    num_layer_(num_layer),
-    hidden_units_(head_num * size_per_head),
-    rmsnorm_eps_(rmsnorm_eps),
-    tensor_para_(tensor_para),
-    data_type_(getTensorType<T>())
-{
-    TM_LOG_DEBUG(__PRETTY_FUNCTION__);
-    initialize(attn_params, kv_head_num, quant_policy);
-}
-
-template<typename T>
-LlamaDecoder<T>::~LlamaDecoder()
-{
-    TM_LOG_DEBUG(__PRETTY_FUNCTION__);
-    delete self_attention_layer_;
-    delete silu_ffn_layer_;
-}
-
-template<typename T>
-void LlamaDecoder<T>::initialize(const LlamaAttentionParams& attn_params, size_t kv_head_num, int quant_policy)
-{
-    TM_LOG_DEBUG(__PRETTY_FUNCTION__);
-
-    self_attention_layer_ = new LlamaDecoderSelfAttentionLayer<T>(head_num_,
-                                                                  kv_head_num,
-                                                                  size_per_head_,
-                                                                  attn_params,
-                                                                  tensor_para_,
-                                                                  stream_,
-                                                                  cublas_wrapper_,
-                                                                  allocator_,
-                                                                  is_free_buffer_after_forward_,
-                                                                  quant_policy);
-
-    silu_ffn_layer_ = new LlamaFfnLayer<T>(head_num_,
-                                           size_per_head_,
-                                           inter_size_,
-                                           tensor_para_,
-                                           stream_,
-                                           cublas_wrapper_,
-                                           allocator_,
-                                           is_free_buffer_after_forward_);
-}
-
-template<typename T>
-void LlamaDecoder<T>::allocateBuffer()
-{
-    FT_CHECK(false);
-}
-
-template<typename T>
-void LlamaDecoder<T>::allocateBuffer(size_t batch_size)
-{
-    TM_LOG_DEBUG(__PRETTY_FUNCTION__);
-    is_allocate_buffer_ = true;
-}
-
-template<typename T>
-void LlamaDecoder<T>::freeBuffer()
-{
-    TM_LOG_DEBUG(__PRETTY_FUNCTION__);
-    if (is_allocate_buffer_) {
-        is_allocate_buffer_ = false;
-    }
-}
-
-template<typename T>
-void LlamaDecoder<T>::forwardSelfAttn(const LlamaDecoder::Session&                   sess,
-                                      T*                                             attn_io,
-                                      const std::unordered_map<std::string, Tensor>* input_tensors,
-                                      size_t                                         layer)
-{
-    TM_LOG_DEBUG(__PRETTY_FUNCTION__);
-    TensorMap self_attention_input_tensors(*input_tensors);
-    self_attention_input_tensors.insert("input_query",
-                                        {MEMORY_GPU, data_type_, {sess.batch_size, hidden_units_}, attn_io});
-    const int layer_id = layer;
-    self_attention_input_tensors.insert("layer_id", {MEMORY_CPU, TYPE_INT32, {1}, &layer_id});
-    auto& k_cache = *sess.k_cache;
-    auto& v_cache = *sess.v_cache;
-
-    TensorMap self_attention_output_tensors{
-        {"attention_output", {MEMORY_GPU, data_type_, {sess.batch_size, hidden_units_}, attn_io}},
-        {"key_cache", k_cache},
-        {"value_cache", v_cache},
-    };
-
-    self_attention_layer_->forward(&self_attention_output_tensors,  //
-                                   &self_attention_input_tensors,
-                                   &sess.weights->at(layer)->self_attn_weights);
-}
-
-template<typename T>
-void LlamaDecoder<T>::forwardFfn(const LlamaDecoder::Session& sess, T* ffn_io, size_t layer)
-{
-    TensorMap ffn_inputs{{"ffn_input", {MEMORY_GPU, data_type_, {sess.batch_size, hidden_units_}, ffn_io}}};
-    TensorMap ffn_outputs{{"ffn_output", {MEMORY_GPU, data_type_, {sess.batch_size, hidden_units_}, ffn_io}}};
-    silu_ffn_layer_->forward(&ffn_outputs, &ffn_inputs, &sess.weights->at(layer)->ffn_weights);
-}
-
-template<typename T>
-void LlamaDecoder<T>::forward(std::vector<Tensor>*                            output_tensors,
-                              const std::vector<Tensor>*                      input_tensors,
-                              const std::vector<LlamaDecoderLayerWeight<T>*>* decoder_layer_weights)
-{
-    FT_CHECK(false);
-}
-
-template<typename T>
-void LlamaDecoder<T>::forward(std::unordered_map<std::string, Tensor>*        output_tensors,
-                              const std::unordered_map<std::string, Tensor>*  input_tensors,
-                              const std::vector<LlamaDecoderLayerWeight<T>*>* decoder_layer_weights)
-{
-    TM_LOG_DEBUG(__PRETTY_FUNCTION__);
-    /**
-     * input_tensors:
-     *   \param decoder_input [batch_size, hidden_dims]
-     *   \param sequence_lengths [batch_size] int
-     *   \param output_norm_weight [hidden_dims]
-     *   \param step [1] on cpu
-     *   \param ite [1] on cpu
-     *   \param finished [batch_size] bool
-     *   \param total_padding_tokens [batch_size], int
-     *   \param max_seq_len [1] on cpu
-     *   \param masked_tokens [batch_size, memory_len] bool (optional), NOT USED YET
-     *
-     * output_tensors:
-     *   \param decoder_output [batch_size, hidden_dimension]
-     *   \param key_cache [batch_size] uint64_t
-     *   \param value_cache [batch_size] uint64_t
-     */
-
-    // for the shape of key cache, refer to decoder_masked_multihead_attention_template.hpp
-
-    Session sess{};
-    sess.batch_size = input_tensors->at("decoder_input").shape[0];
-    sess.weights    = decoder_layer_weights;
-
-    allocateBuffer(sess.batch_size);
-
-    sess.ite     = input_tensors->at("ite").getVal<const int>();
-    sess.k_cache = &output_tensors->at("key_cache");
-    sess.v_cache = &output_tensors->at("value_cache");
-
-    sess.max_memory_len = input_tensors->at("max_seq_len").getVal<int>();
-
-    T* decoder_input  = input_tensors->at("decoder_input").getPtr<T>();
-    T* decoder_output = output_tensors->at("decoder_output").getPtr<T>();
-
-    ////////////////////////////////////////////
-    /// RMSNorm
-    invokeRootMeanSquareNorm(decoder_output,
-                             decoder_input,
-                             decoder_layer_weights->at(0)->self_attn_norm_weights,
-                             rmsnorm_eps_,
-                             sess.batch_size,
-                             hidden_units_,
-                             stream_);
-    sync_check_cuda_error();
-
-    for (size_t layer = 0; layer < num_layer_; ++layer) {
-        // output: self_attn_output_, k_cache, v_cache = self_attn(decoder_normed_input_)
-        forwardSelfAttn(sess, decoder_output, input_tensors, layer);
-
-        invokeFusedAddBiasResidualRMSNorm(decoder_input,
-                                          decoder_output,
-                                          decoder_layer_weights->at(layer)->self_attn_weights.output.bias,
-                                          decoder_layer_weights->at(layer)->ffn_norm_weights,
-                                          rmsnorm_eps_,
-                                          sess.batch_size,
-                                          hidden_units_,
-                                          stream_);
-        sync_check_cuda_error();
-
-        // decoder_layer_output_ = ffn(decoder_normed_input_)
-        forwardFfn(sess, decoder_output, layer);
-
-        auto scale_weight = layer < num_layer_ - 1 ? decoder_layer_weights->at(layer + 1)->self_attn_norm_weights :
-                                                     input_tensors->at("output_norm_weight").getPtr<T>();
-        invokeFusedAddBiasResidualRMSNorm(decoder_input,  //
-                                          decoder_output,
-                                          decoder_layer_weights->at(layer)->ffn_weights.output.bias,
-                                          scale_weight,
-                                          rmsnorm_eps_,
-                                          sess.batch_size,
-                                          hidden_units_,
-                                          stream_);
-        sync_check_cuda_error();
-    }
-
-    if (is_free_buffer_after_forward_) {
-        freeBuffer();
-    }
-}
-
-template class LlamaDecoder<half>;
-template class LlamaDecoder<float>;
-
-}  // namespace turbomind
diff --git a/src/turbomind/models/llama/LlamaDecoder.h b/src/turbomind/models/llama/LlamaDecoder.h
deleted file mode 100644
index 091c2ba55a11593eb6091438fc948e3a94b43271..0000000000000000000000000000000000000000
--- a/src/turbomind/models/llama/LlamaDecoder.h
+++ /dev/null
@@ -1,96 +0,0 @@
-/*
- * Copyright (c) OpenMMLab. All rights reserved.
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.  All rights reserved.
- * Copyright (c) 2022, SK Telecom Authored by A. Dialog
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-// Modified from
-// https://github.com/NVIDIA/FasterTransformer/blob/main/src/turbomind/models/multi_gpu_gpt/ParallelGptDecoder.h
-
-#include "src/turbomind/layers/BaseLayer.h"
-#include "src/turbomind/models/llama/LlamaDecoderLayerWeight.h"
-#include "src/turbomind/models/llama/LlamaDecoderSelfAttentionLayer.h"
-#include "src/turbomind/models/llama/LlamaFfnLayer.h"
-#include "src/turbomind/models/llama/llama_params.h"
-#include "src/turbomind/utils/custom_ar_comm.h"
-#include "src/turbomind/utils/nccl_utils.h"
-
-namespace turbomind {
-
-template<typename T>
-class LlamaDecoder: public BaseLayer {
-protected:
-    void allocateBuffer() override;  // deprecated
-    void allocateBuffer(size_t batch_size);
-    void freeBuffer() override;
-    void initialize(const LlamaAttentionParams& attn_params, size_t kv_head_num, int quant_policy);
-
-    size_t head_num_;
-    size_t size_per_head_;
-    size_t inter_size_;
-    size_t num_layer_;
-    size_t hidden_units_;
-    float  rmsnorm_eps_;
-
-    NcclParam tensor_para_;
-
-    LlamaDecoderSelfAttentionLayer<T>* self_attention_layer_{};
-    LlamaFfnLayer<T>*                  silu_ffn_layer_{};
-
-    const DataType data_type_;
-
-    struct Session {
-        size_t                                          batch_size;
-        int                                             ite;
-        size_t                                          max_memory_len;
-        Tensor*                                         k_cache;
-        Tensor*                                         v_cache;
-        const std::vector<LlamaDecoderLayerWeight<T>*>* weights;
-    };
-
-    void forwardSelfAttn(const Session&                                 sess,
-                         T*                                             attn_io,
-                         const std::unordered_map<std::string, Tensor>* input_tensors,
-                         size_t                                         layer);
-
-    void forwardFfn(const LlamaDecoder::Session& sess, T* ffn_io, size_t layer);
-
-public:
-    LlamaDecoder(size_t                      head_num,
-                 size_t                      kv_head_num,
-                 size_t                      size_per_head,
-                 size_t                      inter_size,
-                 size_t                      num_layer,
-                 const LlamaAttentionParams& attn_params,
-                 float                       rmsnorm_eps,
-                 NcclParam                   tensor_para,
-                 cudaStream_t                stream,
-                 cublasMMWrapper*            cublas_wrapper,
-                 IAllocator*                 allocator,
-                 bool                        is_free_buffer_after_forward,
-                 int                         quant_policy);
-
-    ~LlamaDecoder() override;
-
-    virtual void forward(std::unordered_map<std::string, Tensor>*        output_tensors,
-                         const std::unordered_map<std::string, Tensor>*  input_tensors,
-                         const std::vector<LlamaDecoderLayerWeight<T>*>* decoder_layer_weights);
-
-    virtual void forward(std::vector<Tensor>*                            output_tensors,
-                         const std::vector<Tensor>*                      input_tensors,
-                         const std::vector<LlamaDecoderLayerWeight<T>*>* decoder_layer_weights);
-};
-
-}  // namespace turbomind
diff --git a/src/turbomind/models/llama/LlamaDecoderLayerWeight.cc b/src/turbomind/models/llama/LlamaDecoderLayerWeight.cc
deleted file mode 100644
index 17e1c51a0213c4c479c736b0f822c8edbaf886cb..0000000000000000000000000000000000000000
--- a/src/turbomind/models/llama/LlamaDecoderLayerWeight.cc
+++ /dev/null
@@ -1,321 +0,0 @@
-/*
- * Copyright (c) OpenMMLab. All rights reserved.
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-// Modified from
-// https://github.com/NVIDIA/FasterTransformer/blob/main/src/turbomind/models/multi_gpu_gpt/ParallelGptDecoderLayerWeight.cc
-
-#include "src/turbomind/models/llama/LlamaDecoderLayerWeight.h"
-#include "src/turbomind/models/llama/LlamaDenseWeight.h"
-#include "src/turbomind/utils/logger.h"
-#include "src/turbomind/utils/memory_utils.h"
-// #include <filesystem>
-#include <experimental/filesystem>
-#include <sys/stat.h>
-#include <string>
-
-namespace turbomind {
-
-bool fileExists(const std::string& path) {
-    struct stat buffer;
-    return (stat(path.c_str(), &buffer) == 0);
-}
-
-template<typename T>
-LlamaDecoderLayerWeight<T>::LlamaDecoderLayerWeight(size_t     head_num,
-                                                    size_t     kv_head_num,
-                                                    size_t     size_per_head,
-                                                    size_t     inter_size,
-                                                    WeightType weight_type,
-                                                    int        group_size,
-                                                    bool       attn_bias,
-                                                    size_t     tensor_para_size,
-                                                    size_t     tensor_para_rank):
-    head_num_(head_num),
-    kv_head_num_(kv_head_num),
-    size_per_head_(size_per_head),
-    hidden_units_(head_num * size_per_head),
-    inter_size_(inter_size),
-    weight_type_(weight_type),
-    attn_bias_(attn_bias),
-    tensor_para_size_(tensor_para_size),
-    tensor_para_rank_(tensor_para_rank)
-{
-    self_attn_weights.qkv.input_dims  = hidden_units_;
-    self_attn_weights.qkv.output_dims = (head_num + 2 * kv_head_num) * size_per_head / tensor_para_size_;
-    self_attn_weights.qkv.type        = weight_type;
-    self_attn_weights.qkv.group_size  = group_size;
-
-    self_attn_weights.output.input_dims  = hidden_units_ / tensor_para_size_;
-    self_attn_weights.output.output_dims = hidden_units_;
-    self_attn_weights.output.type        = weight_type;
-    self_attn_weights.output.group_size  = group_size;
-
-    ffn_weights.gating.input_dims  = hidden_units_;
-    ffn_weights.gating.output_dims = inter_size_ / tensor_para_size_;
-    ffn_weights.gating.type        = weight_type;
-    ffn_weights.gating.group_size  = group_size;
-
-    ffn_weights.intermediate.input_dims  = hidden_units_;
-    ffn_weights.intermediate.output_dims = inter_size_ / tensor_para_size_;
-    ffn_weights.intermediate.type        = weight_type;
-    ffn_weights.intermediate.group_size  = group_size;
-
-    ffn_weights.fused_gating_intermediate.input_dims  = hidden_units_;
-    ffn_weights.fused_gating_intermediate.output_dims = inter_size_ / tensor_para_size_ * 2;
-    ffn_weights.fused_gating_intermediate.type        = weight_type;
-    ffn_weights.fused_gating_intermediate.group_size  = group_size;
-
-    ffn_weights.output.input_dims  = inter_size_ / tensor_para_size_;
-    ffn_weights.output.output_dims = hidden_units_;
-    ffn_weights.output.type        = weight_type;
-    ffn_weights.output.group_size  = group_size;
-    mallocWeights();
-}
-
-template<typename T>
-void freeWeights(LlamaDenseWeight<T>& weights)
-{
-    cudaFree(weights.kernel);
-    cudaFree(weights.bias);
-    cudaFree(weights.scales_and_zeros);
-
-    weights.kernel           = nullptr;
-    weights.bias             = nullptr;
-    weights.scales_and_zeros = nullptr;
-}
-
-template<typename T>
-void mallocWeights(LlamaDenseWeight<T>& weights, bool bias)
-{
-    if (bias) {
-        deviceMalloc((T**)&weights.bias, weights.output_dims);
-    }
-    const size_t bit_size = getBitSize(weights.type);
-    if (bit_size >= 16) {  // fp16, fp32
-        deviceMalloc((T**)&weights.kernel, weights.input_dims * weights.output_dims);
-    }
-    else {  // int8, int4
-        const int factor = sizeof(float) * 8 / bit_size;
-        FT_CHECK(weights.input_dims % factor == 0);
-        deviceMalloc((int**)&weights.kernel, weights.input_dims * weights.output_dims / factor);
-        deviceMemSetZero((int*)weights.kernel, weights.input_dims * weights.output_dims / factor);
-        // interleaved scales/zeros
-        deviceMalloc((T**)&weights.scales_and_zeros, weights.input_dims / weights.group_size * weights.output_dims * 2);
-    }
-}
-
-template<typename T>
-void loadWeights(LlamaDenseWeight<T>& w,
-                 std::string          prefix,
-                 int                  rank,
-                 FtCudaDataType       model_file_type,
-                 size_t               tensor_para_size,
-                 int                  slice_dim   = 0,
-                 std::vector<size_t>  slice_shape = {})
-{
-    auto       max_prefix = prefix + "." + std::to_string(tensor_para_size - 1);
-    const auto type       = model_file_type;
-
-    bool enable_slice = true;
-    // Disable slice if tensor param rank is 1
-    if (tensor_para_size <= 1) {
-        enable_slice = false;
-    }
-    else {
-        // Disable slice if weight has already been sliced
-        // if (std::filesystem::exists(max_prefix + ".weight") || std::filesystem::exists(max_prefix + ".qweight")) {
-        if (fileExists(max_prefix + ".weight") || fileExists(max_prefix + ".qweight")) {
-            TM_LOG_DEBUG("TP weight exists. Disable runtime TP.");
-            enable_slice = false;
-        }
-    }
-
-    size_t dim0 = w.input_dims;
-    size_t dim1 = w.output_dims;
-    if (enable_slice) {
-        // multiple tp size for slice stride
-        if (slice_dim == 0) {
-            dim0 = dim0 * tensor_para_size;
-            if (slice_shape.size() == 0) {
-                slice_shape = {dim0};
-            }
-        }
-        else {
-            dim1 = dim1 * tensor_para_size;
-            if (slice_shape.size() == 0) {
-                slice_shape = {dim1};
-            }
-        }
-
-        prefix += "." + std::to_string(0);
-    }
-    else {
-        prefix += "." + std::to_string(rank);
-    }
-
-    if (w.bias) {
-        std::vector<ConcateSlice> bias_slices{};
-        if (enable_slice) {
-            if (slice_dim == 1) {
-                size_t       start = 0;
-                ConcateSlice slice0{{{0, 1}}};
-                ConcateSlice slice1{{{}}};
-                for (auto len : slice_shape) {
-                    size_t stride = len / tensor_para_size;
-                    slice1.slices.push_back({start + stride * rank, start + stride * (rank + 1)});
-                    start += len;
-                }
-                bias_slices = {slice0, slice1};
-            }
-        }
-        loadWeightFromBin((T*)w.bias, {1, dim1}, prefix + ".bias", type, bias_slices);
-    }
-    const size_t bit_size = getBitSize(w.type);
-    if (bit_size >= 16) {  // fp16, fp32
-        std::vector<ConcateSlice> weight_slices{};
-        if (enable_slice) {
-            if (slice_dim == 1) {
-                size_t       start = 0;
-                ConcateSlice slice0{{{0, dim0}}};
-                ConcateSlice slice1{{{}}};
-                for (auto len : slice_shape) {
-                    size_t stride = len / tensor_para_size;
-                    slice1.slices.push_back({start + stride * rank, start + stride * (rank + 1)});
-                    start += len;
-                }
-                weight_slices = {slice0, slice1};
-            }
-            else {
-                size_t       start = 0;
-                ConcateSlice slice0{{}};
-                ConcateSlice slice1{{{0, dim1}}};
-                for (auto len : slice_shape) {
-                    size_t stride = len / tensor_para_size;
-                    slice0.slices.push_back({start + stride * rank, start + stride * (rank + 1)});
-                    start += len;
-                }
-                weight_slices = {slice0, slice1};
-            }
-        }
-        loadWeightFromBin((T*)w.kernel, {dim0, dim1}, prefix + ".weight", type, weight_slices);
-    }
-    else {  // int8, int4
-        const int factor = sizeof(float) * 8 / bit_size;
-
-        FT_CHECK(dim1 % factor == 0);
-
-        std::vector<size_t> w_shape{dim0, dim1 / factor * sizeof(uint32_t)};
-        loadWeightFromBin((int8_t*)w.kernel, w_shape, prefix + ".qweight", FtCudaDataType::INT8, {});
-
-        const size_t group_count = w.group_size > 0 ? dim0 / w.group_size : 1;
-
-        loadWeightFromBin((half*)w.scales_and_zeros, {group_count, dim1 * 2}, prefix + ".scales_zeros", type, {});
-    }
-}
-
-template<typename T>
-void LlamaDecoderLayerWeight<T>::mallocWeights()
-{
-    deviceMalloc((T**)&self_attn_norm_weights, hidden_units_);
-    deviceMalloc((T**)&ffn_norm_weights, hidden_units_);
-
-    turbomind::mallocWeights(self_attn_weights.qkv, attn_bias_);
-    turbomind::mallocWeights(self_attn_weights.output, attn_bias_);
-
-    if (weight_type_ == WeightType::kINT4) {
-        turbomind::mallocWeights(ffn_weights.fused_gating_intermediate, false);
-    }
-    else {
-        turbomind::mallocWeights(ffn_weights.gating, false);
-        turbomind::mallocWeights(ffn_weights.intermediate, false);
-    }
-
-    turbomind::mallocWeights(ffn_weights.output, false);
-}
-
-template<typename T>
-LlamaDecoderLayerWeight<T>::~LlamaDecoderLayerWeight()
-{
-    cudaFree((void*)self_attn_norm_weights);
-    cudaFree((void*)ffn_norm_weights);
-
-    freeWeights(self_attn_weights.qkv);
-    freeWeights(self_attn_weights.output);
-
-    if (weight_type_ == WeightType::kINT4) {
-        freeWeights(ffn_weights.fused_gating_intermediate);
-    }
-    else {
-        freeWeights(ffn_weights.gating);
-        freeWeights(ffn_weights.intermediate);
-    }
-
-    freeWeights(ffn_weights.output);
-}
-
-template<typename T>
-void LlamaDecoderLayerWeight<T>::loadModel(std::string dir_path, FtCudaDataType model_file_type)
-{
-    const auto rank_spec = std::to_string(tensor_para_rank_);
-    const auto type      = model_file_type;
-
-    loadWeightFromBin(
-        (T*)self_attn_norm_weights, {hidden_units_}, dir_path + ".attention_norm.weight", model_file_type);
-    loadWeightFromBin((T*)ffn_norm_weights, {hidden_units_}, dir_path + ".ffn_norm.weight", model_file_type);
-
-    loadWeights(self_attn_weights.qkv,
-                dir_path + ".attention.w_qkv",
-                tensor_para_rank_,
-                type,
-                tensor_para_size_,
-                1,
-                {head_num_ * size_per_head_, kv_head_num_ * size_per_head_, kv_head_num_ * size_per_head_});
-
-    loadWeights(self_attn_weights.output, dir_path + ".attention.wo", tensor_para_rank_, type, tensor_para_size_, 0);
-
-    if (weight_type_ == WeightType::kINT4) {
-        loadWeights(ffn_weights.fused_gating_intermediate,
-                    dir_path + ".feed_forward.w13",
-                    tensor_para_rank_,
-                    type,
-                    tensor_para_size_,
-                    1);
-    }
-    else {
-        loadWeights(ffn_weights.gating, dir_path + ".feed_forward.w1", tensor_para_rank_, type, tensor_para_size_, 1);
-        loadWeights(
-            ffn_weights.intermediate, dir_path + ".feed_forward.w3", tensor_para_rank_, type, tensor_para_size_, 1);
-    }
-    loadWeights(ffn_weights.output, dir_path + ".feed_forward.w2", tensor_para_rank_, type, tensor_para_size_, 0);
-
-    // load kv_cache quant scale
-    // if file not exist, get empty vector
-    std::string   scale_path = dir_path + ".past_kv_scale." + rank_spec + ".weight";
-    std::ifstream in(scale_path, std::ios::in);
-    if (in.is_open()) {
-        in.close();
-        self_attn_weights.past_kv_scale = loadArrayFromBin({4}, scale_path);
-    }
-    else {
-        self_attn_weights.past_kv_scale = {};
-    }
-}
-
-template struct LlamaDecoderLayerWeight<float>;
-template struct LlamaDecoderLayerWeight<half>;
-
-}  // namespace turbomind
diff --git a/src/turbomind/models/llama/LlamaDecoderLayerWeight.h b/src/turbomind/models/llama/LlamaDecoderLayerWeight.h
deleted file mode 100644
index 2141f72e7f5539a02a529ab7d2894df331b50d91..0000000000000000000000000000000000000000
--- a/src/turbomind/models/llama/LlamaDecoderLayerWeight.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Copyright (c) OpenMMLab. All rights reserved.
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-// Modified from
-// https://github.com/NVIDIA/FasterTransformer/blob/main/src/turbomind/models/multi_gpu_gpt/ParallelGptDecoderLayerWeight.h
-
-#pragma once
-
-#include "src/turbomind/models/llama/LlamaDenseWeight.h"
-
-namespace turbomind {
-
-template<typename T>
-struct LlamaDecoderLayerWeight {
-public:
-    LlamaDecoderLayerWeight() = delete;
-    LlamaDecoderLayerWeight(size_t     head_num,
-                            size_t     kv_head_num,
-                            size_t     size_per_head,
-                            size_t     inter_size,
-                            WeightType weight_type,
-                            int        group_size,
-                            bool       attn_bias,
-                            size_t     tensor_para_size,
-                            size_t     tensor_para_rank);
-    ~LlamaDecoderLayerWeight();
-    LlamaDecoderLayerWeight(const LlamaDecoderLayerWeight& other) = delete;
-    LlamaDecoderLayerWeight& operator=(const LlamaDecoderLayerWeight& other) = delete;
-
-    void loadModel(std::string dir_path, FtCudaDataType model_file_type);
-
-    T*                      self_attn_norm_weights{};
-    T*                      ffn_norm_weights{};
-    LlamaAttentionWeight<T> self_attn_weights{};
-    LlamaFfnWeight<T>       ffn_weights{};
-
-private:
-    size_t     head_num_;
-    size_t     kv_head_num_;
-    size_t     size_per_head_;
-    size_t     hidden_units_;
-    size_t     inter_size_;
-    WeightType weight_type_;
-    size_t     bit_size_;
-    bool       attn_bias_;
-    size_t     tensor_para_size_;
-    size_t     tensor_para_rank_;
-    bool       is_maintain_buffer_ = false;
-
-    void mallocWeights();
-};
-
-}  // namespace turbomind
diff --git a/src/turbomind/models/llama/LlamaDecoderSelfAttentionLayer.cc b/src/turbomind/models/llama/LlamaDecoderSelfAttentionLayer.cc
deleted file mode 100644
index 103b32e88f29b64125398f07ba3501a86a872b73..0000000000000000000000000000000000000000
--- a/src/turbomind/models/llama/LlamaDecoderSelfAttentionLayer.cc
+++ /dev/null
@@ -1,309 +0,0 @@
-/*
- * Copyright (c) OpenMMLab. All rights reserved.
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-// Modified from
-// https://github.com/NVIDIA/FasterTransformer/blob/main/src/turbomind/layers/attention_layers/DecoderSelfAttentionLayer.cc
-#include "src/turbomind/models/llama/LlamaDecoderSelfAttentionLayer.h"
-#include "src/turbomind/kernels/decoder_masked_multihead_attention.h"
-#include "src/turbomind/macro.h"
-#include "src/turbomind/models/llama/LlamaNcclGuard.h"
-#include "src/turbomind/models/llama/llama_kernels.h"
-#include "src/turbomind/models/llama/llama_utils.h"
-#include "src/turbomind/utils/cuda_utils.h"
-#include "src/turbomind/utils/logger.h"
-#include "src/turbomind/utils/nvtx_utils.h"
-#include <string>
-// #include <glog/logging.h>
-
-namespace turbomind {
-
-template<typename T>
-struct SATypeConverter {
-    using Type = T;
-};
-
-template<>
-struct SATypeConverter<half> {
-    using Type = uint16_t;
-};
-
-template<typename T>
-static inline void fusedQKV_masked_attention_dispatch(const T*     qkv_buf,
-                                                      const T*     qkv_bias,
-                                                      const T*     relative_attention_bias,
-                                                      T*           key_cache,
-                                                      T*           value_cache,
-                                                      T**          k_cache_per_sample,
-                                                      T**          v_cache_per_sample,
-                                                      size_t       kv_cache_per_sample_offset,
-                                                      const int*   cache_indir,
-                                                      T*           context_buf,
-                                                      const bool*  finished,
-                                                      const int*   sequence_lengths,
-                                                      const int    max_batch_size,
-                                                      const int    inference_batch_size,
-                                                      const int    beam_width,
-                                                      const int    head_num,
-                                                      const int    kv_head_num,
-                                                      const int    size_per_head,
-                                                      const int    rotary_embedding_dim,
-                                                      const float  rotary_embedding_base,
-                                                      const int    max_position_embeddings,
-                                                      const bool   use_dynamic_ntk,
-                                                      const bool   use_logn_attn,
-                                                      const int    memory_max_len,
-                                                      const int*   prefix_prompt_lengths,
-                                                      const int    max_prefix_prompt_length,
-                                                      const int    max_input_len,
-                                                      const int*   total_padding_tokens,
-                                                      const int    step,
-                                                      const float  q_scaling,
-                                                      const int    relative_attention_bias_stride,
-                                                      const T*     linear_bias_slopes,
-                                                      const bool*  masked_tokens,
-                                                      const int*   ia3_tasks,
-                                                      const T*     ia3_key_weights,
-                                                      const T*     ia3_value_weights,
-                                                      const float* qkv_scale_out,
-                                                      const float* attention_out_scale,
-                                                      const int    int8_mode,
-                                                      const float* attention_kv_scale,
-                                                      cudaStream_t stream)
-{
-    using DataType = typename SATypeConverter<T>::Type;
-    // Prepare the parameters.
-    Masked_multihead_attention_params<DataType> params;
-    memset(&params, 0, sizeof(params));
-    // int hidden_units = head_num * size_per_head;
-    if (qkv_bias != nullptr) {
-        params.q_bias = reinterpret_cast<const DataType*>(qkv_bias);
-        params.k_bias = reinterpret_cast<const DataType*>(qkv_bias) + head_num * size_per_head;
-        params.v_bias = reinterpret_cast<const DataType*>(qkv_bias) + (head_num + kv_head_num) * size_per_head;
-    }
-    else {
-        params.q_bias = nullptr;
-        params.k_bias = nullptr;
-        params.v_bias = nullptr;
-    }
-
-    // Set the output buffer.
-    params.out = reinterpret_cast<DataType*>(context_buf);
-
-    // Set the input buffers.
-    // [B, nH + kvH, D]
-    params.q = reinterpret_cast<const DataType*>(qkv_buf);
-    params.k = reinterpret_cast<const DataType*>(qkv_buf) + head_num * size_per_head;
-    params.v = reinterpret_cast<const DataType*>(qkv_buf) + (head_num + kv_head_num) * size_per_head;
-
-    params.stride   = (head_num + 2 * kv_head_num) * size_per_head;
-    params.finished = const_cast<bool*>(finished);
-
-    FT_CHECK(k_cache_per_sample && v_cache_per_sample);
-
-    params.k_cache_per_sample         = reinterpret_cast<DataType**>(k_cache_per_sample);
-    params.v_cache_per_sample         = reinterpret_cast<DataType**>(v_cache_per_sample);
-    params.kv_cache_per_sample_offset = kv_cache_per_sample_offset;
-    params.batch_size                 = inference_batch_size;
-    params.beam_width                 = beam_width;
-    params.memory_max_len             = memory_max_len;
-    params.prefix_prompt_lengths      = prefix_prompt_lengths;
-    params.max_prefix_prompt_length   = max_prefix_prompt_length;
-    params.length_per_sample          = sequence_lengths;  // max_input_length + current output length
-    // timestep adding max_prefix_prompt_length for shared memory size calculation and rotary embedding computation
-    params.timestep     = step + max_prefix_prompt_length - 1;
-    params.num_heads    = head_num;
-    params.num_kv_heads = kv_head_num;
-
-    params.hidden_size_per_head    = size_per_head;
-    params.rotary_embedding_dim    = rotary_embedding_dim;
-    params.rotary_embedding_base   = rotary_embedding_base;
-    params.max_position_embeddings = max_position_embeddings;
-    params.use_dynamic_ntk         = use_dynamic_ntk;
-    params.use_logn_attn           = use_logn_attn;
-
-    // Note: keep norm factor (sqrt(K_dim)) when adopting megatron T5 structure (may adjust)
-    params.inv_sqrt_dh = 1.F / (sqrtf((float)params.hidden_size_per_head) * q_scaling);
-
-    params.total_padding_tokens = total_padding_tokens;
-    if (relative_attention_bias != nullptr) {
-        params.relative_attention_bias = reinterpret_cast<const DataType*>(relative_attention_bias);
-    }
-    params.relative_attention_bias_stride = relative_attention_bias_stride;
-    params.masked_tokens                  = masked_tokens;
-
-    // The slope of linear position bias per head, e.g., ALiBi.
-    if (linear_bias_slopes != nullptr) {
-        params.linear_bias_slopes = reinterpret_cast<const DataType*>(linear_bias_slopes);
-    }
-    params.max_input_length = max_input_len;
-
-    params.int8_mode = int8_mode;
-
-    if (int8_mode & QuantPolicy::kCacheKVInt8) {
-        params.attention_k_scale = attention_kv_scale[0];
-        params.attention_k_zp    = attention_kv_scale[1];
-        params.attention_v_scale = attention_kv_scale[2];
-        params.attention_v_zp    = attention_kv_scale[3];
-    }
-
-    PUSH_RANGE("scaled dot-product fusion");
-    masked_multihead_attention(params, stream);
-    POP_RANGE;
-}
-
-template<typename T>
-void LlamaDecoderSelfAttentionLayer<T>::allocateBuffer(size_t batch_size, int key_len, int max_memory_len)
-{
-    TM_LOG_DEBUG(__PRETTY_FUNCTION__);
-
-    const size_t local_q_kv_head_num = local_head_num_ + 2 * local_kv_head_num_;
-
-    qkv_buf_ = reinterpret_cast<T*>(
-        allocator_->reMalloc(qkv_buf_, sizeof(T) * batch_size * local_q_kv_head_num * size_per_head_, false));
-    context_buf_ =
-        reinterpret_cast<T*>(allocator_->reMalloc(context_buf_, sizeof(T) * batch_size * local_hidden_units_, false));
-
-    is_allocate_buffer_ = true;
-}
-
-template<typename T>
-void LlamaDecoderSelfAttentionLayer<T>::freeBuffer()
-{
-    if (is_allocate_buffer_) {
-        allocator_->free((void**)(&qkv_buf_));
-        allocator_->free((void**)(&context_buf_));
-        is_allocate_buffer_ = false;
-    }
-}
-
-template<typename T>
-void LlamaDecoderSelfAttentionLayer<T>::forward(TensorMap*                     output_tensors,
-                                                const TensorMap*               input_tensors,
-                                                const LlamaAttentionWeight<T>* weights)
-{
-    /**
-     * input tensors:
-     *    \param input_query [batch_size, hidden_units],
-     *    \param sequence_lengths [batch_size]
-     *    \param step [1] on cpu
-     *    \param finished [batch_size]
-     *    \param total_padding_tokens [batch_size]
-     *    \param layer_id [1], int on cpu
-     *    \param max_seq_len [1] on cpu
-     *    \param masked_tokens [batch_size, memory_len], (optional), NOT USED YET
-     *    \param cache_indirection [batch_size / beam_width, beam_width, memory_max_len] (optional)
-     *
-     * output tensors:
-     *    \param attention_output [batch_size, hidden_units],
-     *    \param key_cache [batch, local_head_num, memory_max_len, size_per_head]
-     *    \param value_cache [batch, local_head_num, memory_max_len, size_per_head]
-     */
-
-    const T*    input_query_data      = input_tensors->getPtr<T>("input_query");
-    const int*  sequence_lengths_data = input_tensors->getPtr<int>("sequence_lengths");
-    const int*  total_padding_len     = input_tensors->getPtr<int>("total_padding_tokens");
-    const bool* finished_data         = input_tensors->getPtr<bool>("finished", nullptr);
-    const bool* masked_tokens_data    = input_tensors->getPtr<bool>("masked_tokens", nullptr);
-    const int*  cache_indir           = input_tensors->getPtr<int>("cache_indirection", nullptr);
-
-    T*  hidden_features_data = output_tensors->getPtr<T>("attention_output");
-    T** key_cache_ptrs       = output_tensors->getPtr<T*>("key_cache");
-    T** value_cache_ptrs     = output_tensors->getPtr<T*>("value_cache");
-
-    const int layer_id = input_tensors->getVal<int>("layer_id");
-
-    const int max_seq_len = input_tensors->getVal<int>("max_seq_len");
-    const int step        = input_tensors->getVal<int>("step");
-
-    const int step_1 = step - 1;
-
-    const int batch_size = input_tensors->at("input_query").shape[0];
-    const int beam_width = cache_indir != nullptr ? input_tensors->at("cache_indirection").shape[1] : 1;
-
-    allocateBuffer(batch_size, step, max_seq_len);
-
-    PUSH_RANGE("qkv_gemm");
-    linear_.forward(qkv_buf_, input_query_data, batch_size, weights->qkv);
-    POP_RANGE;
-
-    const auto kv_cache_layer_offset = layer_id * local_kv_head_num_ * max_seq_len * size_per_head_;
-    const int  memory_len            = max_seq_len;
-
-    fusedQKV_masked_attention_dispatch<T>(
-        qkv_buf_,
-        weights->qkv.bias,  // query_weight.bias,
-        nullptr,            // relative_attention_bias,
-        nullptr,
-        nullptr,
-        key_cache_ptrs,
-        value_cache_ptrs,
-        kv_cache_layer_offset,
-        cache_indir,
-        context_buf_,
-        finished_data,
-        sequence_lengths_data,  // NOTE: current seq len including padding (fixed after meeting the finished id)
-        batch_size,
-        batch_size,
-        beam_width,
-        local_head_num_,
-        local_kv_head_num_,
-        size_per_head_,
-        params_.rotray_embedding_dim,
-        params_.rotary_embedding_base,
-        params_.max_position_embeddings,
-        params_.use_dynamic_ntk,
-        params_.use_logn_attn,
-        memory_len,
-        nullptr,  // prefix_prompt_lengths
-        0,        // max_prefix_prompt_length
-        0,        // max_input_length, not used w/o linear_bias_slopes
-        input_tensors->getPtr<int>("total_padding_tokens", nullptr),
-        step,
-        1.f,                            // q_scaling
-        0,                              // relative_attention_bias_stride
-        nullptr,                        // linear_bias_slopes
-        nullptr,                        //  masked_tokens_data,
-        nullptr,                        // ia3_tasks
-        nullptr,                        // ia3_key_weights
-        nullptr,                        // ia3_value_weights
-        nullptr,                        // qkv_scale_out
-        nullptr,                        // attention_out_scale
-        quant_policy_,                  // int8_mode
-        weights->past_kv_scale.data(),  // attention kv scale
-        stream_);
-    sync_check_cuda_error();
-
-    linear_.forward(hidden_features_data, context_buf_, batch_size, weights->output);
-
-    if (tensor_para_.world_size_ > 1) {
-        NcclGuard nccl_guard(tensor_para_, stream_);
-        ftNcclAllReduceSum(
-            hidden_features_data, hidden_features_data, batch_size * hidden_units_, tensor_para_, stream_);
-        sync_check_cuda_error();
-    }
-
-    if (is_free_buffer_after_forward_) {
-        freeBuffer();
-    }
-
-    // LOG(WARNING);
-}
-
-template class LlamaDecoderSelfAttentionLayer<float>;
-template class LlamaDecoderSelfAttentionLayer<half>;
-
-}  // namespace turbomind
diff --git a/src/turbomind/models/llama/LlamaDecoderSelfAttentionLayer.h b/src/turbomind/models/llama/LlamaDecoderSelfAttentionLayer.h
deleted file mode 100644
index 89afe3f96475b0857209aaf294ba1d41eef22894..0000000000000000000000000000000000000000
--- a/src/turbomind/models/llama/LlamaDecoderSelfAttentionLayer.h
+++ /dev/null
@@ -1,96 +0,0 @@
-/*
- * Copyright (c) OpenMMLab. All rights reserved.
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-// Modified from
-// https://github.com/NVIDIA/FasterTransformer/blob/main/src/turbomind/layers/attention_layers/DecoderSelfAttentionLayer.h
-
-#pragma once
-
-#include "src/turbomind/models/llama/LlamaDenseWeight.h"
-#include "src/turbomind/models/llama/LlamaLinear.h"
-#include "src/turbomind/models/llama/llama_params.h"
-#include "src/turbomind/utils/Tensor.h"
-#include "src/turbomind/utils/nccl_utils.h"
-
-namespace turbomind {
-
-template<typename T>
-class LlamaDecoderSelfAttentionLayer {
-public:
-    void freeBuffer();
-    void allocateBuffer(size_t batch_size, int key_len, int max_memory_len);
-
-    LlamaDecoderSelfAttentionLayer(size_t                      head_num,
-                                   size_t                      kv_head_num,
-                                   size_t                      size_per_head,
-                                   const LlamaAttentionParams& attn_params,
-                                   NcclParam                   tensor_para,
-                                   cudaStream_t                stream,
-                                   cublasMMWrapper*            cublas_wrapper,
-                                   IAllocator*                 allocator,
-                                   bool                        is_free_buffer_after_forward,
-                                   int                         quant_policy):
-        head_num_(head_num),
-        kv_head_num_(kv_head_num),
-        size_per_head_(size_per_head),
-        hidden_units_(head_num * size_per_head),
-        local_head_num_(head_num / tensor_para.world_size_),
-        local_kv_head_num_(kv_head_num_ / tensor_para.world_size_),
-        local_hidden_units_(hidden_units_ / tensor_para.world_size_),
-        params_(attn_params),
-        tensor_para_(tensor_para),
-        stream_(stream),
-        linear_(cublas_wrapper, stream),
-        allocator_(allocator),
-        is_free_buffer_after_forward_(is_free_buffer_after_forward),
-        quant_policy_(quant_policy)
-    {
-    }
-
-    ~LlamaDecoderSelfAttentionLayer()
-    {
-        freeBuffer();
-    }
-
-    void forward(TensorMap* output_tensors, const TensorMap* input_tensors, const LlamaAttentionWeight<T>* weights);
-
-private:
-    const size_t head_num_;
-    const size_t kv_head_num_;
-    const size_t size_per_head_;
-    const size_t hidden_units_;
-    const size_t local_head_num_;
-    const size_t local_kv_head_num_;
-    const size_t local_hidden_units_;
-    const bool   is_free_buffer_after_forward_;
-    const int    quant_policy_;
-
-    const LlamaAttentionParams& params_;
-
-    NcclParam tensor_para_;
-
-    cudaStream_t   stream_;
-    IAllocator*    allocator_;
-    LlamaLinear<T> linear_;
-
-    T* qkv_buf_     = nullptr;
-    T* context_buf_ = nullptr;
-
-    bool is_allocate_buffer_{};
-};
-
-}  // namespace turbomind
diff --git a/src/turbomind/models/llama/LlamaDenseWeight.h b/src/turbomind/models/llama/LlamaDenseWeight.h
deleted file mode 100644
index 410667f1bdbf2899ce7fe16c06d6a535867ae88c..0000000000000000000000000000000000000000
--- a/src/turbomind/models/llama/LlamaDenseWeight.h
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
- * Copyright (c) OpenMMLab. All rights reserved.
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-// Modified from https://github.com/NVIDIA/FasterTransformer/blob/main/src/turbomind/layers/DenseWeight.h
-
-#pragma once
-
-#include "src/turbomind/layers/FfnWeight.h"
-#include "src/turbomind/layers/attention_layers/AttentionWeight.h"
-#include "src/turbomind/utils/cuda_utils.h"
-
-namespace turbomind {
-
-enum class WeightType : int
-{
-    kFP32,
-    kFP16,
-    kFP8,  // not supported yet
-    kINT8,
-    kINT4
-};
-
-inline size_t getBitSize(WeightType type)
-{
-    switch (type) {
-        case WeightType::kFP32:
-            return 32;
-        case WeightType::kFP16:
-            return 16;
-        case WeightType::kFP8:
-            return 8;
-        case WeightType::kINT8:
-            return 8;
-        case WeightType::kINT4:
-            return 4;
-    }
-    return 0;
-}
-
-template<typename T>
-struct LlamaDenseWeight {
-    size_t     input_dims;
-    size_t     output_dims;
-    void*      kernel;
-    WeightType type;
-    T*         bias;
-    T*         scales_and_zeros;
-    int        group_size;
-};
-
-template<typename T>
-struct LlamaAttentionWeight {
-    LlamaDenseWeight<T> qkv;
-    LlamaDenseWeight<T> output;
-    std::vector<float>  past_kv_scale;
-};
-
-template<typename T>
-struct LlamaFfnWeight {
-    LlamaDenseWeight<T> gating;
-    LlamaDenseWeight<T> intermediate;
-    LlamaDenseWeight<T> output;
-    LlamaDenseWeight<T> fused_gating_intermediate;
-};
-
-}  // namespace turbomind
diff --git a/src/turbomind/models/llama/LlamaFfnLayer.cc b/src/turbomind/models/llama/LlamaFfnLayer.cc
deleted file mode 100644
index f605d8f27b9e181dbd1b4807dcd2305dad00a0de..0000000000000000000000000000000000000000
--- a/src/turbomind/models/llama/LlamaFfnLayer.cc
+++ /dev/null
@@ -1,121 +0,0 @@
-/*
- * Copyright (c) OpenMMLab. All rights reserved.
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-// Modified from https://github.com/NVIDIA/FasterTransformer/blob/main/src/turbomind/layers/FfnLayer.h
-
-#include "src/turbomind/models/llama/LlamaFfnLayer.h"
-#include "src/turbomind/kernels/activation_kernels.h"
-#include "src/turbomind/models/llama/LlamaNcclGuard.h"
-#include "src/turbomind/utils/nvtx_utils.h"
-// #include <glog/logging.h>
-
-namespace turbomind {
-
-template<typename T>
-void LlamaFfnLayer<T>::allocateBuffer(size_t token_num)
-{
-    inter_buf_          = (T*)allocator_->reMalloc(inter_buf_, sizeof(T) * token_num * inter_size_, false);
-    gating_buf_         = (T*)allocator_->reMalloc(gating_buf_, sizeof(T) * token_num * inter_size_, false);
-    is_allocate_buffer_ = true;
-}
-
-template<typename T>
-void LlamaFfnLayer<T>::freeBuffer()
-{
-    if (is_allocate_buffer_) {
-        allocator_->free((void**)&inter_buf_);
-        allocator_->free((void**)&gating_buf_);
-        is_allocate_buffer_ = false;
-    }
-}
-
-template<typename T>
-void LlamaFfnLayer<T>::activation(int num_token)
-{
-    invokeGenericActivation<SiluActivation>(gating_buf_,
-                                            (const T*)nullptr,  // bias
-                                            inter_buf_,
-                                            (const T*)nullptr,  // gated_bias
-                                            nullptr,            // ia3_tasks
-                                            (const T*)nullptr,  // ia3_weights
-                                            num_token,          // m
-                                            inter_size_,        // n
-                                            0,                  // int8_mode
-                                            nullptr,            // activation_in
-                                            nullptr,            // activation_out
-                                            nullptr,            // padding_offset
-                                            0,                  // seq_len
-                                            stream_);
-    sync_check_cuda_error();
-}
-
-template<typename T>
-void LlamaFfnLayer<T>::forward(TensorMap*               output_tensors,
-                               const TensorMap*         input_tensors,
-                               const LlamaFfnWeight<T>* weights)
-{
-    /**
-     * input_tensors:
-     *   \param ffn_input [token_num, hidden_dimension]
-     *
-     * output_tensors:
-     *   \param ffn_output [token_num, hidden_dimension]
-     */
-
-    const size_t num_token = input_tensors->at("ffn_input").shape[0];
-    // LOG(WARNING);
-
-    allocateBuffer(num_token);
-
-    const T* ffn_input_data  = input_tensors->at("ffn_input").getPtr<T>();
-    T*       ffn_output_data = output_tensors->at("ffn_output").getPtr<T>();
-
-    PUSH_RANGE("ffn");
-
-    if (weights->fused_gating_intermediate.kernel) {
-        linear_.forward(
-            gating_buf_, ffn_input_data, num_token, weights->fused_gating_intermediate, LlamaLinear<T>::kFusedSiluFfn);
-    }
-    else {
-        // w1(x)
-        linear_.forward(gating_buf_, ffn_input_data, num_token, weights->gating);
-        // w3(x)
-        linear_.forward(inter_buf_, ffn_input_data, num_token, weights->intermediate);
-        // silu(w1(x)) * w3(x)
-        activation(num_token);
-    }
-
-    // w2(x)
-    linear_.forward(ffn_output_data, gating_buf_, num_token, weights->output);
-    POP_RANGE;
-
-    if (tensor_para_.world_size_ > 1) {
-        NcclGuard nccl_guard(tensor_para_, stream_);
-        ftNcclAllReduceSum(ffn_output_data, ffn_output_data, num_token * hidden_units_, tensor_para_, stream_);
-        sync_check_cuda_error();
-    }
-
-    if (is_free_buffer_after_forward_) {
-        freeBuffer();
-    }
-    // LOG(WARNING);
-}
-
-template class LlamaFfnLayer<float>;
-template class LlamaFfnLayer<half>;
-
-}  // namespace turbomind
diff --git a/src/turbomind/models/llama/LlamaFfnLayer.h b/src/turbomind/models/llama/LlamaFfnLayer.h
deleted file mode 100644
index 25794724ead620be1511cbe9ab7e5716a8b746ca..0000000000000000000000000000000000000000
--- a/src/turbomind/models/llama/LlamaFfnLayer.h
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Copyright (c) OpenMMLab. All rights reserved.
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-// Modified from https://github.com/NVIDIA/FasterTransformer/blob/main/src/turbomind/layers/FfnLayer.cc
-
-#pragma once
-
-// #include "src/turbomind/layers/FfnLayer.h"
-#include "src/turbomind/models/llama/LlamaDecoderLayerWeight.h"
-#include "src/turbomind/models/llama/LlamaLinear.h"
-#include "src/turbomind/utils/custom_ar_comm.h"
-#include "src/turbomind/utils/nccl_utils.h"
-#include <functional>
-
-namespace turbomind {
-
-template<typename T>
-class LlamaFfnLayer {
-public:
-    LlamaFfnLayer(size_t           head_num,
-                  size_t           size_per_head,
-                  size_t           inter_size,
-                  NcclParam        tensor_para,
-                  cudaStream_t     stream,
-                  cublasMMWrapper* cublas_wrapper,
-                  IAllocator*      allocator,
-                  bool             is_free_buffer_after_forward):
-        head_num_(head_num),
-        size_per_head_(size_per_head),
-        inter_size_(inter_size / tensor_para.world_size_),
-        hidden_units_(head_num * size_per_head),
-        stream_(stream),
-        linear_(cublas_wrapper, stream),
-        allocator_(allocator),
-        tensor_para_(tensor_para),
-        is_free_buffer_after_forward_(is_free_buffer_after_forward)
-    {
-    }
-
-    ~LlamaFfnLayer()
-    {
-        freeBuffer();
-    }
-
-    void forward(TensorMap* output_tensors, const TensorMap* input_tensors, const LlamaFfnWeight<T>* weights);
-
-private:
-    void allocateBuffer(size_t token_num);
-
-    void freeBuffer();
-
-    void activation(int num_token);
-
-    size_t         head_num_;
-    size_t         size_per_head_;
-    size_t         inter_size_;
-    size_t         hidden_units_;
-    cudaStream_t   stream_;
-    LlamaLinear<T> linear_;
-    IAllocator*    allocator_;
-    bool           is_free_buffer_after_forward_;
-
-    T* gating_buf_{};
-    T* inter_buf_{};
-
-    NcclParam tensor_para_;
-
-    bool is_allocate_buffer_{};
-};
-
-}  // namespace turbomind
diff --git a/src/turbomind/models/llama/LlamaInstanceComm.h b/src/turbomind/models/llama/LlamaInstanceComm.h
deleted file mode 100644
index 540a009020b07f917adc78c4a0846af073b3f2e0..0000000000000000000000000000000000000000
--- a/src/turbomind/models/llama/LlamaInstanceComm.h
+++ /dev/null
@@ -1,34 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved.
-
-#pragma once
-
-#include "src/turbomind/models/llama/Barrier.h"
-#include "src/turbomind/utils/instance_comm.h"
-
-namespace turbomind {
-
-class LlamaInstanceComm: public AbstractInstanceComm {
-public:
-    LlamaInstanceComm(int count): barrier_(count) {}
-
-    void barrier() override
-    {
-        barrier_.wait();
-    }
-
-    void setSharedObject(void* p) override
-    {
-        ptr = p;
-    }
-
-    void* getSharedObject() override
-    {
-        return ptr;
-    }
-
-private:
-    Barrier barrier_;
-    void*   ptr{};
-};
-
-}  // namespace turbomind
diff --git a/src/turbomind/models/llama/LlamaLinear.h b/src/turbomind/models/llama/LlamaLinear.h
deleted file mode 100644
index 4471f7f003cdb3af3c990542280f757c24495fa6..0000000000000000000000000000000000000000
--- a/src/turbomind/models/llama/LlamaLinear.h
+++ /dev/null
@@ -1,89 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved.
-
-#pragma once
-
-// #include "src/turbomind/kernels/gemm_s_f16/gemm_s4_f16.h"
-#include "src/turbomind/models/llama/LlamaDenseWeight.h"
-#include "src/turbomind/models/llama/llama_kernels.h"
-#include "src/turbomind/utils/cublasMMWrapper.h"
-#include "src/turbomind/utils/cuda_utils.h"
-#include "src/turbomind/utils/logger.h"
-#include <type_traits>
-
-namespace turbomind {
-
-template<typename T>
-class LlamaLinear {
-public:
-    enum Type
-    {
-        kGemm,
-        kFusedSiluFfn
-    };
-
-    LlamaLinear(cublasMMWrapper* cublas_wrapper, cudaStream_t stream): cublas_wrapper_(cublas_wrapper), stream_(stream)
-    {
-    }
-
-    void
-    forward(T* output_data, const T* input_data, int batch_size, const LlamaDenseWeight<T>& weight, Type type = kGemm)
-    {
-        switch (weight.type) {
-            case WeightType::kFP16:
-            case WeightType::kFP32:
-                forwardFp(output_data, input_data, batch_size, weight, type);
-                break;
-            case WeightType::kINT4:
-                forwardInt4(output_data, input_data, batch_size, weight, type);
-                break;
-            default:
-                FT_CHECK(0);
-        }
-    }
-
-private:
-    void forwardFp(T* output_data, const T* input_data, int batch_size, const LlamaDenseWeight<T>& weight, Type type)
-    {
-        FT_CHECK(type == kGemm);
-        cublas_wrapper_->Gemm(CUBLAS_OP_N,
-                              CUBLAS_OP_N,
-                              weight.output_dims,
-                              batch_size,
-                              weight.input_dims,
-                              (const T*)weight.kernel,
-                              weight.output_dims,
-                              input_data,
-                              weight.input_dims,
-                              output_data,
-                              weight.output_dims);
-        sync_check_cuda_error();
-    }
-
-    void forwardInt4(T* output_data, const T* input_data, int batch_size, const LlamaDenseWeight<T>& weight, Type type)
-    {
-        // if constexpr (std::is_same_v<T, half>) {
-        //     gemm_s4_f16_.Run(output_data,
-        //                      (const uint*)weight.kernel,
-        //                      input_data,
-        //                      (const half2*)weight.scales_and_zeros,
-        //                      weight.output_dims,
-        //                      batch_size,
-        //                      weight.input_dims,
-        //                      weight.group_size,
-        //                      type == kFusedSiluFfn ? GemmS4F16::kFusedSiluFfn : GemmS4F16::kGemm,
-        //                      -1,
-        //                      stream_);
-        //     sync_check_cuda_error();
-        // }
-        // else {
-            FT_CHECK_WITH_INFO(0, "Not implemented");
-        // }
-    }
-
-private:
-    cublasMMWrapper* cublas_wrapper_;
-    cudaStream_t     stream_{};
-    // GemmS4F16        gemm_s4_f16_;
-};
-
-}  // namespace turbomind
diff --git a/src/turbomind/models/llama/LlamaNcclGuard.h b/src/turbomind/models/llama/LlamaNcclGuard.h
deleted file mode 100644
index cf7e78c20b0e76d3d5ef675257bd10a71654a7e9..0000000000000000000000000000000000000000
--- a/src/turbomind/models/llama/LlamaNcclGuard.h
+++ /dev/null
@@ -1,92 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved.
-
-#pragma once
-
-#include "src/turbomind/utils/nccl_utils.h"
-#include <array>
-#include <atomic>
-#include <condition_variable>
-#include <cuda_runtime.h>
-#include <mutex>
-
-namespace turbomind {
-
-struct NcclGuard {
-    static constexpr int kMaxGroupCount = 32;
-
-    static std::mutex& globalNcclMutex()
-    {
-        static std::mutex inst;
-        return inst;
-    }
-
-    struct GroupState {
-        std::mutex              mutex;
-        std::condition_variable cv;
-        int                     ref_count;
-    };
-
-    static GroupState& groupState(int group_id)
-    {
-        static std::array<GroupState, kMaxGroupCount> array{};
-        FT_CHECK(group_id < kMaxGroupCount);
-        return array[group_id];
-    }
-
-    NcclGuard(NcclParam tensor_para, cudaStream_t stream, bool barrier = false):
-        tensor_para_(tensor_para), stream_(stream), barrier_(barrier)
-    {
-        if (is_active()) {
-            auto& group = groupState(tensor_para_.group_id_);
-            if (tensor_para_.rank_ == 0) {
-                /// TODO: use std::optional after switching to C++17
-                global_nccl_lock_ = std::make_unique<std::lock_guard<std::mutex>>(globalNcclMutex());
-                {
-                    std::lock_guard<std::mutex> lock(group.mutex);
-                    group.ref_count = tensor_para_.world_size_;
-                }
-                group.cv.notify_all();
-            }
-            else {
-                std::unique_lock<std::mutex> lock(group.mutex);
-                group.cv.wait(lock, [&] { return group.ref_count > 0; });
-            }
-        }
-    }
-
-    ~NcclGuard()
-    {
-        if (is_active()) {
-            ftNcclStreamSynchronize(tensor_para_, NcclParam{}, stream_);
-
-            auto& group = groupState(tensor_para_.group_id_);
-
-            int value = -1;
-            {
-                std::lock_guard<std::mutex> lock(group.mutex);
-                value = --group.ref_count;
-            }
-            if (value == 0) {
-                group.cv.notify_all();
-            }
-            else if (barrier_ || tensor_para_.rank_ == 0) {
-                std::unique_lock<std::mutex> lock(group.mutex);
-                group.cv.wait(lock, [&] { return group.ref_count == 0; });
-            }
-
-            // rank 0 unlocks global NCCL mutex automatically
-        }
-    }
-
-    bool is_active()
-    {
-        return barrier_ || (ftNcclGroupCount() > 1 && tensor_para_.world_size_ > 1);
-    }
-
-    NcclParam                                    tensor_para_;
-    cudaStream_t                                 stream_;
-    bool                                         barrier_;
-    std::unique_ptr<std::lock_guard<std::mutex>> global_nccl_lock_;
-};
-
-}  // namespace turbomind
diff --git a/src/turbomind/models/llama/LlamaV2.cc b/src/turbomind/models/llama/LlamaV2.cc
deleted file mode 100644
index 8768e7fd053717772dd2b041d33faa3c64a8f0b9..0000000000000000000000000000000000000000
--- a/src/turbomind/models/llama/LlamaV2.cc
+++ /dev/null
@@ -1,621 +0,0 @@
-/*
- * Copyright (c) OpenMMLab. All rights reserved.
- * Copyright (c) 2020-2023, NVIDIA CORPORATION.  All rights reserved.
- * Copyright (c) 2021, NAVER Corp.  Authored by CLOVA.
- * Copyright (c) 2022, SK Telecom Authored by A. Dialog
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-// Modified from
-// https://github.com/NVIDIA/FasterTransformer/blob/main/src/turbomind/models/multi_gpu_gpt/ParallelGpt.cc
-
-#include "src/turbomind/models/llama/LlamaV2.h"
-#include "src/turbomind/kernels/decoding_kernels.h"
-#include "src/turbomind/kernels/gpt_kernels.h"
-#include "src/turbomind/macro.h"
-#include "src/turbomind/models/llama/LlamaBatch.h"
-#include "src/turbomind/models/llama/LlamaNcclGuard.h"
-#include "src/turbomind/models/llama/LlamaWeight.h"
-#include "src/turbomind/models/llama/Request.h"
-#include "src/turbomind/models/llama/llama_params.h"
-#include "src/turbomind/models/llama/llama_utils.h"
-#include "src/turbomind/utils/Tensor.h"
-#include "src/turbomind/utils/cuda_utils.h"
-#include <functional>
-#include <memory>
-#include <sstream>
-#include <stdexcept>
-
-namespace turbomind {
-
-template<typename T>
-LlamaV2<T>::LlamaV2(size_t                       head_num,
-                    size_t                       kv_head_num,
-                    size_t                       size_per_head,
-                    size_t                       inter_size,
-                    size_t                       num_layer,
-                    size_t                       vocab_size,
-                    const LlamaAttentionParams&  attn_params,
-                    float                        norm_eps,
-                    int                          max_batch_size,
-                    int                          max_context_token_num,
-                    int                          session_len,
-                    int                          step_length,
-                    int                          start_id,
-                    int                          end_id,
-                    int                          cache_max_entry_count,
-                    int                          cache_chunk_size,
-                    int                          quant_policy,
-                    bool                         use_context_fmha,
-                    std::shared_ptr<SharedState> shared_state,
-                    LlamaWeight<T>*              weights,
-                    NcclParam                    tensor_para,
-                    cudaStream_t                 stream,
-                    cublasMMWrapper*             cublas_wrapper,
-                    IAllocator*                  allocator,
-                    bool                         is_free_buffer_after_forward,
-                    cudaDeviceProp*              cuda_device_prop):
-    head_num_(head_num),
-    size_per_head_(size_per_head),
-    inter_size_(inter_size),
-    num_layer_(num_layer),
-    vocab_size_(vocab_size),
-    vocab_size_padded_(vocab_size),
-    rmsnorm_eps_(norm_eps),
-    start_id_(start_id),
-    end_id_(end_id),
-    hidden_units_(head_num * size_per_head),
-    local_head_num_(head_num / tensor_para.world_size_),
-    weights_(weights),
-    tensor_para_(tensor_para),
-    stream_(stream),
-    cublas_wrapper_(cublas_wrapper),
-    allocator_(allocator),
-    is_free_buffer_after_forward_(is_free_buffer_after_forward),
-    cuda_device_prop_(cuda_device_prop),
-    debug_(isDebug()),
-    step_length_(step_length),
-    batch_(max_batch_size, max_context_token_num, session_len, this),
-    shared_state_(shared_state)
-
-{
-    TM_LOG_DEBUG(__PRETTY_FUNCTION__);
-    TM_LOG_INFO("NCCL group_id = %d", tensor_para_.group_id_);
-
-    vocab_size_padded_ =
-        (vocab_size_padded_ + tensor_para_.world_size_ - 1) / tensor_para_.world_size_ * tensor_para_.world_size_;
-
-    size_t elem_bits = 0;
-    if (quant_policy & QuantPolicy::kCacheKVInt8) {
-        elem_bits = sizeof(int8_t) * 8;
-        if (use_context_fmha) {
-            TM_LOG_ERROR("use_context_fmha not support int8");
-            assert(0);
-        }
-    }
-    else {
-        elem_bits = sizeof(T) * 8;
-    }
-
-    const size_t local_kv_head_num = kv_head_num / tensor_para.world_size_;
-
-    kv_cache_mgr_ = std::make_unique<LlamaCacheManager>(num_layer_,
-                                                        local_kv_head_num,
-                                                        size_per_head_,
-                                                        session_len,
-                                                        elem_bits,
-                                                        cache_max_entry_count,
-                                                        cache_chunk_size,
-                                                        tensor_para.rank_,
-                                                        allocator);
-    initialize(attn_params, kv_head_num, use_context_fmha, quant_policy);
-    start();
-}
-
-template<typename T>
-LlamaV2<T>::~LlamaV2()
-{
-    shared_state_->request_queue.close();
-    internal_thread_.join();
-
-    delete decoder_;
-    delete dynamic_decode_layer_;
-    delete context_decoder_;
-}
-
-template<typename T>
-void LlamaV2<T>::initialize(const LlamaAttentionParams& attn_params,
-                            size_t                      kv_head_num,
-                            bool                        use_context_fmha,
-                            int                         quant_policy)
-{
-    TM_LOG_DEBUG(__PRETTY_FUNCTION__);
-
-    context_decoder_ = new LlamaContextDecoder<T>(head_num_,
-                                                  kv_head_num,
-                                                  size_per_head_,
-                                                  inter_size_,
-                                                  num_layer_,
-                                                  attn_params,
-                                                  rmsnorm_eps_,
-                                                  tensor_para_,
-                                                  stream_,
-                                                  cublas_wrapper_,
-                                                  allocator_,
-                                                  is_free_buffer_after_forward_,
-                                                  use_context_fmha,
-                                                  quant_policy);
-
-    decoder_ = new LlamaDecoder<T>(head_num_,
-                                   kv_head_num,
-                                   size_per_head_,
-                                   inter_size_,
-                                   num_layer_,
-                                   attn_params,
-                                   rmsnorm_eps_,
-                                   tensor_para_,
-                                   stream_,
-                                   cublas_wrapper_,
-                                   allocator_,
-                                   is_free_buffer_after_forward_,
-                                   quant_policy);
-
-    dynamic_decode_layer_ = new DynamicDecodeLayer<float>(vocab_size_,
-                                                          vocab_size_padded_,
-                                                          0,  // end_id, deprecated
-                                                          stream_,
-                                                          cublas_wrapper_,
-                                                          allocator_,
-                                                          is_free_buffer_after_forward_,
-                                                          cuda_device_prop_);
-}
-
-template<typename T>
-void LlamaV2<T>::embeddingLookup(T* embeddings, const int* token_ids_buf, int batch_size, int step)
-{
-    TM_LOG_DEBUG(__PRETTY_FUNCTION__);
-    // ! This kernel can't be used in context decoding
-    invokeEmbeddingLookupPosEncodingPadCount(embeddings,
-                                             weights_->pre_decoder_embedding_table,
-                                             static_cast<T*>(nullptr),  // position encoding
-                                             token_ids_buf,
-                                             static_cast<int*>(nullptr),  // padding count, not used w/o pos-code
-                                             batch_size,
-                                             hidden_units_,
-                                             static_cast<T>(1.),  // scale
-                                             step,                // step, used int index into output_ids_buf_
-                                             batch_size,          // token_num
-                                             0,                   // ite
-                                             stream_);
-    sync_check_cuda_error();
-}
-
-template<typename T>
-void LlamaV2<T>::contextDecode(T*         deocder_output,
-                               uintptr_t* k_cache_ptr,
-                               uintptr_t* v_cache_ptr,
-                               T*         context_decoder_input_buf,
-                               T*         context_decoder_output_buf,
-                               const int* input_ids,
-                               const int* input_length,
-                               const int* history_length,
-                               const int* context_length,
-                               size_t     token_num,
-                               size_t     max_input_len,
-                               size_t     max_context_len,
-                               size_t     session_len,
-                               size_t     batch_size)
-{
-    TM_LOG_DEBUG(__PRETTY_FUNCTION__);
-
-    if (tensor_para_.rank_ == 0) {
-        TM_LOG_INFO("context decoding start");
-    }
-
-    invokeInputIdsEmbeddingLookupPosEncoding(context_decoder_input_buf,
-                                             nullptr,  // processed somewhere else
-                                             weights_->pre_decoder_embedding_table,
-                                             static_cast<T*>(nullptr),
-                                             pPromptTuningParam<T>{},
-                                             input_ids,
-                                             0,  // only used for position encoding
-                                             token_num,
-                                             token_num,
-                                             1,
-                                             hidden_units_,
-                                             stream_);
-    sync_check_cuda_error();
-
-    const auto dtype = getTensorType<T>();
-    const auto bsz   = batch_size;
-
-    const int max_q_len   = max_input_len;
-    const int max_kv_len  = max_context_len;
-    const int max_seq_len = session_len;
-
-    std::unordered_map<std::string, Tensor> decoder_input_tensors{
-        {"decoder_input", {MEMORY_GPU, dtype, {token_num, hidden_units_}, context_decoder_input_buf}},
-        {"output_norm_weight", {MEMORY_GPU, dtype, {hidden_units_}, weights_->output_norm_weight}},
-        {"input_lengths", {MEMORY_GPU, TYPE_INT32, {bsz}, input_length}},
-        {"history_lengths", {MEMORY_GPU, TYPE_INT32, {bsz}, history_length}},
-        {"context_lengths", {MEMORY_GPU, TYPE_INT32, {bsz}, context_length}},
-        {"max_q_len", {MEMORY_CPU, TYPE_INT32, {1}, &max_q_len}},
-        {"max_kv_len", {MEMORY_CPU, TYPE_INT32, {1}, &max_kv_len}},
-        {"max_seq_len", {MEMORY_CPU, TYPE_INT32, {1}, &max_seq_len}},
-    };
-
-    std::unordered_map<std::string, Tensor> decoder_output_tensors{
-        {"decoder_output", {MEMORY_GPU, dtype, {token_num, hidden_units_}, context_decoder_output_buf}},
-        {"key_cache", {MEMORY_GPU, TYPE_UINT64, {bsz}, k_cache_ptr}},
-        {"value_cache", {MEMORY_GPU, TYPE_UINT64, {bsz}, v_cache_ptr}},
-        {"last_token_hidden_units", {MEMORY_GPU, dtype, {bsz, hidden_units_}, deocder_output}}};
-
-    context_decoder_->forward(&decoder_output_tensors, &decoder_input_tensors, &weights_->decoder_layer_weights);
-
-    if (tensor_para_.rank_ == 0) {
-        TM_LOG_INFO("context decoding end");
-    }
-}
-
-template<typename T>
-void LlamaV2<T>::decoderForward(T*         decoder_output,
-                                uintptr_t* k_cache_ptr,
-                                uintptr_t* v_cache_ptr,
-                                T*         decoder_input,
-                                const int* sequence_length,
-                                const int* total_padding_count,
-                                bool*      finished,
-                                int        step,
-                                int        ite,
-                                size_t     session_len,
-                                size_t     batch_size)
-{
-    TM_LOG_DEBUG(__PRETTY_FUNCTION__);
-
-    const int  max_seq_len = session_len;
-    const auto dtype       = getTensorType<T>();
-
-    // max_input_length is not used w/o linear_bias_slopes
-    // sequence_lengths_ will be incremented in dynamic decode
-    std::unordered_map<std::string, Tensor> decoder_input_tensors{
-        {"decoder_input", {MEMORY_GPU, dtype, {batch_size, hidden_units_}, decoder_input}},
-        {"sequence_lengths", {MEMORY_GPU, TYPE_INT32, {batch_size}, sequence_length}},
-        {"total_padding_tokens", {MEMORY_GPU, TYPE_INT32, {batch_size}, total_padding_count}},
-        {"max_seq_len", {MEMORY_CPU, TYPE_INT32, {1}, &max_seq_len}},
-        {"finished", {MEMORY_GPU, TYPE_BOOL, {batch_size}, finished}},
-        {"output_norm_weight", {MEMORY_GPU, dtype, {hidden_units_}, weights_->output_norm_weight}},
-        {"step", {MEMORY_CPU, TYPE_INT32, {1}, &step}},
-        {"ite", {MEMORY_CPU, TYPE_INT32, {1}, &ite}},
-    };
-
-    // LOG(ERROR) << key_cache_ << " " << value_cache_;
-    std::unordered_map<std::string, Tensor> decoder_output_tensors{
-        {"decoder_output", {MEMORY_GPU, dtype, {batch_size, hidden_units_}, decoder_output}},
-        {"key_cache", {MEMORY_GPU, TYPE_UINT64, {batch_size}, k_cache_ptr}},
-        {"value_cache", {MEMORY_GPU, TYPE_UINT64, {batch_size}, v_cache_ptr}},
-    };
-
-    decoder_->forward(&decoder_output_tensors, &decoder_input_tensors, &weights_->decoder_layer_weights);
-}
-
-template<typename T>
-void LlamaV2<T>::postDecodeEmbedding(float* logits, float* local_logits, const T* decoder_output, int batch_size)
-{
-    TM_LOG_DEBUG(__PRETTY_FUNCTION__);
-    cudaDataType_t data_type = getCudaDataType<T>();
-    float          alpha     = 1.f;
-    float          beta      = 0.f;
-    if (tensor_para_.world_size_ == 1) {
-        cublas_wrapper_->Gemm(CUBLAS_OP_T,
-                              CUBLAS_OP_N,
-                              vocab_size_,  // n
-                              batch_size,
-                              hidden_units_,  // k
-                              &alpha,
-                              weights_->post_decoder_embedding_kernel,
-                              data_type,
-                              hidden_units_,  // k
-                              decoder_output,
-                              data_type,
-                              hidden_units_,  // k
-                              &beta,
-                              logits,
-                              CUDA_R_32F,
-                              vocab_size_,  // n
-                              CUDA_R_32F,
-                              cublasGemmAlgo_t(-1));
-    }
-    else {
-        FT_CHECK(vocab_size_padded_ % tensor_para_.world_size_ == 0);
-        const size_t local_vocab_size = vocab_size_padded_ / tensor_para_.world_size_;
-        cublas_wrapper_->Gemm(CUBLAS_OP_T,
-                              CUBLAS_OP_N,
-                              local_vocab_size,  // n
-                              batch_size,
-                              hidden_units_,  // k
-                              &alpha,
-                              weights_->post_decoder_embedding_kernel
-                                  + tensor_para_.rank_ * local_vocab_size * hidden_units_,
-                              data_type,
-                              hidden_units_,  // k
-                              decoder_output,
-                              data_type,
-                              hidden_units_,  // k
-                              &beta,
-                              local_logits + tensor_para_.rank_ * batch_size * local_vocab_size,
-                              CUDA_R_32F,
-                              local_vocab_size,  // n
-                              CUDA_R_32F,
-                              cublasGemmAlgo_t(-1));
-        {
-            NcclGuard nccl_guard(tensor_para_, stream_);
-            ftNcclAllGather(local_logits,                   // send_buf
-                            local_logits,                   // recv_buf
-                            batch_size * local_vocab_size,  // data_size
-                            tensor_para_.rank_,
-                            tensor_para_,
-                            stream_);
-        }
-        invokeTransposeAxis01(logits, local_logits, tensor_para_.world_size_, batch_size, local_vocab_size, stream_);
-        sync_check_cuda_error();
-    }
-}
-
-template<typename T>
-void LlamaV2<T>::dynamicDecode(int*            token_ids,
-                               bool*           finished,
-                               int*            sequence_length,
-                               bool*           should_stop,
-                               TensorMap*      inputs,
-                               TensorMap*      outputs,
-                               const float*    logits,
-                               const uint32_t* seq_limit_len,
-                               const int*      context_length,
-                               const int*      end_ids,
-                               int             step,
-                               int             ite,
-                               size_t          max_context_len,
-                               size_t          token_ids_len,
-                               size_t          batch_size)
-{
-    TM_LOG_DEBUG(__PRETTY_FUNCTION__);
-    int local_batch_size = (int)batch_size;
-
-    std::unordered_map<std::string, Tensor> dynamic_decode_input_tensors{
-        {"logits", {MEMORY_GPU, TYPE_FP32, {batch_size, (size_t)1, vocab_size_padded_}, logits}},
-        {"step", {MEMORY_CPU, TYPE_INT32, {1}, &step}},
-        {"max_input_length", {MEMORY_CPU, TYPE_INT32, {1}, &max_context_len}},
-        {"sequence_limit_length", {MEMORY_GPU, TYPE_UINT32, {batch_size}, seq_limit_len}},
-        {"input_lengths", {MEMORY_GPU, TYPE_INT32, {batch_size, 1}, context_length}},
-        {"ite", {MEMORY_CPU, TYPE_UINT32, {1}, &ite}},
-        {"end_id", {MEMORY_GPU, TYPE_INT32, {batch_size}, end_ids}},
-        {"local_batch_size", {MEMORY_CPU, TYPE_INT32, {1}, &local_batch_size}},
-    };
-
-    const std::vector<std::string> optional_inputs{"stop_words_list",
-                                                   "bad_words_list",
-                                                   "runtime_top_k",
-                                                   "runtime_top_p",
-                                                   "temperature",
-                                                   "repetition_penalty",
-                                                   "random_seed"};
-    for (const auto& key : optional_inputs) {
-        if (inputs->isExist(key)) {
-            dynamic_decode_input_tensors.insert({key, inputs->at(key)});
-        }
-    }
-
-    std::unordered_map<std::string, Tensor> dynamic_decode_output_tensors{
-        {"output_ids", {MEMORY_GPU, TYPE_INT32, {token_ids_len, batch_size, 1U}, token_ids}},
-        {"finished", {MEMORY_GPU, TYPE_BOOL, {batch_size}, finished}},
-        {"sequence_length", {MEMORY_GPU, TYPE_INT32, {batch_size}, sequence_length}},
-        {"should_stop", {MEMORY_CPU, TYPE_BOOL, {1}, should_stop}}};
-
-    const std::vector<std::string> optional_outputs{"cum_log_probs", "output_log_probs"};
-    for (const auto& key : optional_outputs) {
-        if (outputs->isExist(key)) {
-            dynamic_decode_output_tensors.insert({key, outputs->at(key)});
-        }
-    }
-
-    dynamic_decode_layer_->forward(&dynamic_decode_output_tensors, &dynamic_decode_input_tensors);
-}
-
-template<typename T>
-void LlamaV2<T>::internalThreadEntry(int device_id)
-{
-    TM_LOG_INFO("[internalThreadEntry] %d", (int)tensor_para_.rank_);
-    check_cuda_error(cudaSetDevice(device_id));
-
-    auto& request_queue  = shared_state_->request_queue;
-    auto& infer_requests = shared_state_->infer_requests;
-    auto& stop_requests  = shared_state_->stop_requests;
-
-    while (1) {
-        if (tensor_para_.rank_ == 0) {
-            const int  free_slot_count = batch_.maxSize() - batch_.size() + batch_.finishedCount();
-            const bool is_empty        = free_slot_count == batch_.maxSize();
-
-            request_queue.dequeue(stop_requests, infer_requests, free_slot_count, is_empty);
-
-            // request queue was closed
-            // and there are no unprocessed requests in the queue
-            if (is_empty && infer_requests.empty() && stop_requests.empty()) {
-                // rank 0 sets flag
-                shared_state_->should_stop = true;
-            }
-
-            batch_.verifyRequests(stop_requests, infer_requests);
-        }
-
-        // wait while rank-0 is dequeueing
-        shared_state_->barrier->wait();
-
-        // exit if job is done
-        if (shared_state_->should_stop) {
-            return;
-        }
-
-        bool modified = false;
-
-        if (!(batch_.finishedCount() == 0 && stop_requests.empty() && infer_requests.empty())) {
-            batch_.handleStopRequests(stop_requests);
-            batch_.synchronize();
-            modified = true;
-        }
-
-        const int infer_request_count = infer_requests.size();
-
-        if (!infer_requests.empty()) {
-            batch_.initialize(infer_requests);  // reinitialize when new requests come, possible buffer allocation
-            batch_.contextDecode();
-            modified = true;
-        }
-
-        // wait while shared stop/infer_requests is being used
-        shared_state_->barrier->wait();
-
-        if (batch_.size()) {
-            if (modified) {
-                batch_.initializeGeneration();
-                batch_.initializeSampling(infer_request_count);
-            }
-            for (int i = 0; i < step_length_; ++i) {
-                if (!batch_.generate()) {
-                    break;
-                }
-            }
-            batch_.finish();
-        }
-    }
-}
-
-template<typename T>
-void LlamaV2<T>::start()
-{
-    int device_id = -1;
-    check_cuda_error(cudaGetDevice(&device_id));
-    internal_thread_ = std::thread(&LlamaV2<T>::internalThreadEntry, this, device_id);
-}
-
-static inline Tensor slice(const Tensor& tensor, int index)
-{
-    auto shape = tensor.shape;
-    if (shape.at(0) == 1) {
-        return tensor;
-    }
-    shape[0]          = 1;
-    const auto offset = std::accumulate(shape.begin(), shape.end(), (size_t)index, std::multiplies<>{});
-    return tensor.slice(shape, offset);
-}
-
-// ! implicit conversion from `unordered_map` to `TensorMap` drops 0-sized tensors
-static inline TensorMap slice(const std::unordered_map<std::string, Tensor>& src, int index)
-{
-    TensorMap dst;
-    for (const auto& kv : src) {
-        dst.insert({kv.first, slice(kv.second, index)});
-    }
-    return dst;
-}
-
-template<typename T>
-void LlamaV2<T>::forward(std::unordered_map<std::string, Tensor>*       outputs,
-                         const std::unordered_map<std::string, Tensor>* inputs,
-                         Control                                        control)
-{
-    if (debug_) {
-        if (tensor_para_.rank_ == 0) {
-            for (const auto& kv : *inputs) {
-                TM_LOG_INFO("[forward][rank=%d] INPUT: %s", (int)tensor_para_.rank_, format(kv).c_str());
-            }
-            for (const auto& kv : *outputs) {
-                TM_LOG_INFO("[forward][rank=%d] OUTPUT: %s", (int)tensor_para_.rank_, format(kv).c_str());
-            }
-        }
-    }
-
-    const int batch_size = outputs->at("output_ids").shape[0];
-
-    const auto rank = tensor_para_.rank_;
-
-    std::vector<std::shared_ptr<Request>> requests(batch_size);
-
-    // rank-0 allocates all requests for the batch
-    if (rank == 0) {
-        for (int i = 0; i < batch_size; ++i) {
-            requests[i] = std::make_shared<Request>();
-            requests[i]->inputs.resize(tensor_para_.world_size_);
-            requests[i]->outputs.resize(tensor_para_.world_size_);
-        }
-        control.comm->setSharedObject(&requests);
-    }
-
-    control.comm->barrier();
-
-    if (rank != 0) {
-        requests = *(std::vector<std::shared_ptr<Request>>*)control.comm->getSharedObject();
-    }
-
-    for (int i = 0; i < batch_size; ++i) {
-        auto& r = requests[i];
-
-        r->inputs[rank]  = slice(*inputs, i);
-        r->outputs[rank] = slice(*outputs, i);
-
-        if (rank == 0) {
-            r->id         = r->inputs[rank].getVal<uint64_t>("CORRID", i);
-            r->start_flag = r->inputs[rank].getVal<int>("START", 1);
-            r->end_flag   = r->inputs[rank].getVal<int>("END", 1);
-            r->stop_flag  = r->inputs[rank].getVal<int>("STOP", 0);
-            r->stream_cb  = control.callback;
-        }
-    }
-
-    control.comm->barrier();
-
-    // rank-0 now takes the ownership of `requests`
-    // rank-0 submits the tasks and wait for finish
-    std::vector<int> error_codes;
-    bool             has_error = 0;
-    if (rank == 0) {
-        TM_LOG_INFO("[forward] Enqueue requests");
-        auto futures = shared_state_->request_queue.enqueue(std::move(requests));
-
-        TM_LOG_INFO("[forward] Wait for requests to complete ...");
-        for (auto& f : futures) {
-            auto ec = f.get();
-            error_codes.push_back(ec);
-            if (ec) {
-                has_error = true;
-            }
-        }
-    }
-
-    // prevents request tensors being freed before the batch completes
-    control.comm->barrier();
-
-    if (rank == 0 && has_error) {
-        std::stringstream ss;
-        for (int i = 0; i < error_codes.size(); ++i) {
-            ss << (i ? "" : " ") << error_codes[i];
-        }
-        throw std::runtime_error(ss.str());
-    }
-}
-
-template class LlamaV2<half>;
-template class LlamaV2<float>;
-
-}  // namespace turbomind
diff --git a/src/turbomind/models/llama/LlamaV2.h b/src/turbomind/models/llama/LlamaV2.h
deleted file mode 100644
index 40633b0a223c5371742a1f8651a505505631dafe..0000000000000000000000000000000000000000
--- a/src/turbomind/models/llama/LlamaV2.h
+++ /dev/null
@@ -1,205 +0,0 @@
-/*
- * Copyright (c) OpenMMLab. All rights reserved.
- * Copyright (c) 2020-2023, NVIDIA CORPORATION.  All rights reserved.
- * Copyright (c) 2021, NAVER Corp.  Authored by CLOVA.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-// Modified from
-// https://github.com/NVIDIA/FasterTransformer/blob/main/src/turbomind/models/multi_gpu_gpt/ParallelGpt.h
-
-#pragma once
-
-#include "src/turbomind/layers/DynamicDecodeLayer.h"
-#include "src/turbomind/models/llama/Barrier.h"
-#include "src/turbomind/models/llama/LlamaBatch.h"
-#include "src/turbomind/models/llama/LlamaContextDecoder.h"
-#include "src/turbomind/models/llama/LlamaDecoder.h"
-#include "src/turbomind/models/llama/LlamaWeight.h"
-#include "src/turbomind/models/llama/Request.h"
-#include "src/turbomind/utils/allocator.h"
-#include "src/turbomind/utils/cublasMMWrapper.h"
-#include "src/turbomind/utils/instance_comm.h"
-#include "src/turbomind/utils/nccl_utils.h"
-#include <unordered_map>
-
-using ffi_api_lock_ctrl_t = std::function<void(int)>;
-
-namespace turbomind {
-
-template<typename T>
-class LlamaV2 {
-public:
-    struct SharedState {
-        std::vector<std::shared_ptr<Request>> infer_requests;
-        std::vector<std::shared_ptr<Request>> stop_requests;
-        RequestQueue                          request_queue;
-        std::shared_ptr<Barrier>              barrier;
-
-        // rank 0 sets flag to true if there are no more tasks in the request_queue
-        bool should_stop = false;
-    };
-
-    ~LlamaV2();
-
-    LlamaV2(size_t                       head_num,
-            size_t                       kv_head_num,
-            size_t                       size_per_head,
-            size_t                       inter_size,
-            size_t                       num_layer,
-            size_t                       vocab_size,
-            const LlamaAttentionParams&  attn_params,
-            float                        norm_eps,
-            int                          max_batch_size,
-            int                          max_context_token_num,
-            int                          session_len,
-            int                          step_length,
-            int                          start_id,
-            int                          end_id,
-            int                          cache_max_entry_count,
-            int                          cache_chunk_size,
-            int                          quant_policy,
-            bool                         use_context_fmha,
-            std::shared_ptr<SharedState> shared_state,
-            LlamaWeight<T>*              weights,
-            NcclParam                    tensor_para,
-            cudaStream_t                 stream,
-            cublasMMWrapper*             cublas_wrapper,
-            IAllocator*                  allocator,
-            bool                         is_free_buffer_after_forward,
-            cudaDeviceProp*              cuda_device_prop);
-
-    struct Control {
-        AbstractInstanceComm* comm;
-        Request::Callback     callback;
-    };
-
-    void forward(std::unordered_map<std::string, Tensor>*       outputs,
-                 const std::unordered_map<std::string, Tensor>* inputs,
-                 Control                                        control);
-
-    void stop(const std::vector<uint64_t>& seq_ids);
-
-    size_t vocab_size() const noexcept
-    {
-        return vocab_size_;
-    }
-
-    void setFfiLock(ffi_api_lock_ctrl_t func)
-    {
-        ffi_lock_ = func;
-    }
-
-private:
-    friend class Batch;
-
-    void internalThreadEntry(int device_id);
-
-    void
-    initialize(const LlamaAttentionParams& attn_params, size_t kv_head_num, bool use_context_fmha, int quant_policy);
-
-    void embeddingLookup(T* embeddings, const int* token_ids_buf, int batch_size, int step);
-
-    void contextDecode(T*         deocder_output,
-                       uintptr_t* k_cache_ptr,
-                       uintptr_t* v_cache_ptr,
-                       T*         context_decoder_input_buf,
-                       T*         context_decoder_output_buf,
-                       const int* input_ids,
-                       const int* input_length,
-                       const int* history_length,
-                       const int* context_length,
-                       size_t     token_num,
-                       size_t     max_input_len,
-                       size_t     max_context_len,
-                       size_t     session_len,
-                       size_t     batch_size);
-
-    void decoderForward(T*         decoder_output,
-                        uintptr_t* k_cache_ptr,
-                        uintptr_t* v_cache_ptr,
-                        T*         decoder_input,
-                        const int* sequence_length,
-                        const int* total_padding_count,
-                        bool*      finished,
-                        int        step,
-                        int        ite,
-                        size_t     session_len,
-                        size_t     batch_size);
-
-    void postDecodeEmbedding(float* logits, float* local_logits, const T* decoder_output, int batch_size);
-
-    void dynamicDecode(int*            token_ids,
-                       bool*           finished,
-                       int*            sequence_length,
-                       bool*           should_stop,
-                       TensorMap*      inputs,
-                       TensorMap*      outputs,
-                       const float*    logits,
-                       const uint32_t* seq_limit_len,
-                       const int*      context_length,
-                       const int*      end_ids,
-                       int             step,
-                       int             ite,
-                       size_t          max_context_len,
-                       size_t          token_ids_len,
-                       size_t          batch_size);
-
-    void start();
-
-private:
-    friend class LlamaBatch<T>;
-
-    const size_t head_num_;
-    const size_t size_per_head_;
-    const size_t inter_size_;
-    const size_t num_layer_;
-    const size_t vocab_size_;
-    size_t       vocab_size_padded_;
-    float        rmsnorm_eps_ = 1e-6f;
-
-    static constexpr bool neox_rotary_style_ = false;
-
-    const int    start_id_;
-    const int    end_id_;
-    const size_t hidden_units_;
-
-    const size_t local_head_num_;
-    NcclParam    tensor_para_;
-
-    cudaStream_t     stream_;
-    cublasMMWrapper* cublas_wrapper_;
-    IAllocator*      allocator_;
-    bool             is_free_buffer_after_forward_;
-    cudaDeviceProp*  cuda_device_prop_;
-
-    const bool debug_{false};
-
-    std::unique_ptr<LlamaCacheManager> kv_cache_mgr_;
-
-    LlamaWeight<T>*            weights_{};
-    LlamaDecoder<T>*           decoder_{};
-    LlamaContextDecoder<T>*    context_decoder_{};
-    DynamicDecodeLayer<float>* dynamic_decode_layer_{};
-
-    const int                    step_length_;
-    LlamaBatch<T>                batch_;
-    std::shared_ptr<SharedState> shared_state_;
-
-    std::thread internal_thread_;
-
-    ffi_api_lock_ctrl_t ffi_lock_ = nullptr;
-};
-
-}  // namespace turbomind
diff --git a/src/turbomind/models/llama/LlamaWeight.cc b/src/turbomind/models/llama/LlamaWeight.cc
deleted file mode 100644
index e1287f471b5ccb457fb2456b9dc6105361099db0..0000000000000000000000000000000000000000
--- a/src/turbomind/models/llama/LlamaWeight.cc
+++ /dev/null
@@ -1,115 +0,0 @@
-/*
- * Copyright (c) OpenMMLab. All rights reserved.
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-// Modified from
-// https://github.com/NVIDIA/FasterTransformer/blob/main/src/turbomind/models/multi_gpu_gpt/ParallelGptWeight.cc
-
-#include "src/turbomind/models/llama/LlamaWeight.h"
-
-namespace turbomind {
-
-template<typename T>
-LlamaWeight<T>::LlamaWeight(size_t     head_num,
-                            size_t     kv_head_num,
-                            size_t     size_per_head,
-                            size_t     inter_size,
-                            size_t     vocab_size,
-                            size_t     num_layer,
-                            bool       attn_bias,
-                            WeightType weight_type,
-                            int        group_size,
-                            size_t     tensor_para_size,
-                            size_t     tensor_para_rank):
-    hidden_units_(head_num * size_per_head),
-    inter_size_(inter_size),
-    vocab_size_(vocab_size),
-    vocab_size_padded_(vocab_size),
-    num_layer_(num_layer),
-    weight_type_(weight_type),
-    tensor_para_size_(tensor_para_size),
-    tensor_para_rank_(tensor_para_rank)
-{
-    if (vocab_size_padded_ % tensor_para_size_ != 0) {
-        vocab_size_padded_ = (vocab_size_padded_ + tensor_para_size_ - 1) / tensor_para_size_ * tensor_para_size_;
-        TM_LOG_WARNING("pad vocab size from %d to %d", vocab_size_, vocab_size_padded_);
-    }
-    decoder_layer_weights.reserve(num_layer_);
-    for (unsigned l = 0; l < num_layer_; ++l) {
-        decoder_layer_weights.push_back(new LlamaDecoderLayerWeight<T>(head_num,
-                                                                       kv_head_num,
-                                                                       size_per_head,
-                                                                       inter_size_,
-                                                                       weight_type_,
-                                                                       group_size,
-                                                                       attn_bias,
-                                                                       tensor_para_size_,
-                                                                       tensor_para_rank_));
-    }
-
-    mallocWeights();
-}
-
-template<typename T>
-LlamaWeight<T>::~LlamaWeight()
-{
-    cudaFree((void*)pre_decoder_embedding_table);
-    cudaFree((void*)output_norm_weight);
-    cudaFree((void*)post_decoder_embedding_kernel);
-
-    pre_decoder_embedding_table   = nullptr;
-    post_decoder_embedding_kernel = nullptr;
-
-    for (auto& p : decoder_layer_weights) {
-        delete p;
-    }
-}
-
-template<typename T>
-void LlamaWeight<T>::mallocWeights()
-{
-    deviceMalloc((T**)&pre_decoder_embedding_table, vocab_size_padded_ * hidden_units_);
-    deviceMalloc((T**)&output_norm_weight, hidden_units_);
-    deviceMalloc((T**)&post_decoder_embedding_kernel, hidden_units_ * vocab_size_padded_);
-}
-
-template<typename T>
-void LlamaWeight<T>::loadModel(std::string dir_path)
-{
-    FtCudaDataType model_file_type = FtCudaDataType::FP16;
-    dir_path += '/';
-
-    loadWeightFromBin((T*)pre_decoder_embedding_table,
-                      {vocab_size_padded_ * hidden_units_},
-                      dir_path + "tok_embeddings.weight",
-                      model_file_type);
-
-    loadWeightFromBin((T*)output_norm_weight, {hidden_units_}, dir_path + "norm.weight", model_file_type);
-
-    loadWeightFromBin((T*)post_decoder_embedding_kernel,
-                      {hidden_units_ * vocab_size_padded_},
-                      dir_path + "output.weight",
-                      model_file_type);
-
-    for (unsigned layer = 0; layer < num_layer_; ++layer) {
-        decoder_layer_weights[layer]->loadModel(dir_path + "layers." + std::to_string(layer), model_file_type);
-    }
-}
-
-template struct LlamaWeight<float>;
-template struct LlamaWeight<half>;
-
-}  // namespace turbomind
diff --git a/src/turbomind/models/llama/LlamaWeight.h b/src/turbomind/models/llama/LlamaWeight.h
deleted file mode 100644
index be7fda2b98ac650fc70acc36c3a3fd65d3eae0ba..0000000000000000000000000000000000000000
--- a/src/turbomind/models/llama/LlamaWeight.h
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Copyright (c) OpenMMLab. All rights reserved.
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-// Modified from
-// https://github.com/NVIDIA/FasterTransformer/blob/main/src/turbomind/models/multi_gpu_gpt/ParallelGptWeight.h
-
-#pragma once
-
-#include "src/turbomind/models/llama/LlamaDecoderLayerWeight.h"
-#include "src/turbomind/utils/memory_utils.h"
-
-namespace turbomind {
-
-template<typename T>
-struct LlamaWeight {
-    LlamaWeight() = default;
-    LlamaWeight(size_t     head_num,
-                size_t     kv_head_num,
-                size_t     size_per_head,
-                size_t     inter_size,
-                size_t     vocab_size,
-                size_t     num_layer,
-                bool       attn_bias,
-                WeightType weight_type,
-                int        group_size,
-                size_t     tensor_para_size,
-                size_t     tensor_para_rank);
-
-    ~LlamaWeight();
-
-    LlamaWeight(const LlamaWeight& other) = delete;
-    LlamaWeight& operator=(const LlamaWeight& other) = delete;
-
-    void loadModel(std::string dir_path);
-
-    std::vector<LlamaDecoderLayerWeight<T>*> decoder_layer_weights;
-    const T*                                 pre_decoder_embedding_table{};
-    const T*                                 output_norm_weight{};
-    const T*                                 post_decoder_embedding_kernel{};
-
-private:
-    void mallocWeights();
-
-    size_t     hidden_units_;
-    size_t     inter_size_;
-    size_t     vocab_size_;
-    size_t     vocab_size_padded_;
-    size_t     num_layer_;
-    WeightType weight_type_;
-    size_t     tensor_para_size_;
-    size_t     tensor_para_rank_;
-};
-
-}  // namespace turbomind
diff --git a/src/turbomind/models/llama/Request.h b/src/turbomind/models/llama/Request.h
deleted file mode 100644
index 0bccf84a57a76b531a9468d78c35e13d9435b4cb..0000000000000000000000000000000000000000
--- a/src/turbomind/models/llama/Request.h
+++ /dev/null
@@ -1,104 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved.
-
-#pragma once
-
-#include "src/turbomind/utils/Tensor.h"
-#include <condition_variable>
-#include <cstdint>
-#include <future>
-#include <limits>
-#include <queue>
-#include <unordered_map>
-
-namespace turbomind {
-
-struct Request {
-    uint64_t id;
-    bool     start_flag;
-    bool     end_flag;
-    bool     stop_flag;
-
-    // per rank inputs/outputs
-    std::vector<TensorMap> inputs;
-    std::vector<TensorMap> outputs;
-
-    using Callback = std::function<void(std::unordered_map<std::string, Tensor>*)>;
-    Callback stream_cb;
-
-    enum
-    {
-        kInvalid  = 1,
-        kConflict = 2,
-        kBusy     = 3,
-        kInactive = 4,
-        kFail     = 5
-    };
-    std::promise<int> signal;
-};
-
-class RequestQueue {
-public:
-    std::vector<std::future<int>> enqueue(std::vector<std::shared_ptr<Request>> requests)
-    {
-        std::vector<std::future<int>> futures;
-        futures.reserve(requests.size());
-        {
-            std::lock_guard<std::mutex> lock(mutex_);
-
-            if (closed_) {
-                throw std::runtime_error("Queue is closed");
-            }
-
-            for (auto& r : requests) {
-                futures.push_back(r->signal.get_future());
-                if (r->stop_flag) {
-                    stop_queue_.push(std::move(r));
-                }
-                else {
-                    infer_queue_.push(std::move(r));
-                }
-            }
-        }
-        cv_.notify_one();
-        return futures;
-    }
-
-    void dequeue(std::vector<std::shared_ptr<Request>>& stop_requests,
-                 std::vector<std::shared_ptr<Request>>& infer_requests,
-                 unsigned                               max_infer_count,
-                 bool                                   blocking)
-    {
-        std::unique_lock<std::mutex> lock(mutex_);
-        if (blocking) {
-            cv_.wait(lock, [this] { return !(stop_queue_.empty() && infer_queue_.empty() && closed_ == false); });
-        }
-
-        stop_requests.clear();
-        while (!stop_queue_.empty()) {
-            stop_requests.push_back(std::move(stop_queue_.front()));
-            stop_queue_.pop();
-        }
-
-        infer_requests.clear();
-        while (!infer_queue_.empty() && infer_requests.size() < max_infer_count) {
-            infer_requests.push_back(std::move(infer_queue_.front()));
-            infer_queue_.pop();
-        }
-    }
-
-    void close()
-    {
-        std::lock_guard<std::mutex> lock(mutex_);
-        closed_ = true;
-        cv_.notify_all();
-    }
-
-private:
-    std::queue<std::shared_ptr<Request>> stop_queue_;
-    std::queue<std::shared_ptr<Request>> infer_queue_;
-    std::mutex                           mutex_;
-    std::condition_variable              cv_;
-    bool                                 closed_ = false;
-};
-
-}  // namespace turbomind
diff --git a/src/turbomind/models/llama/flash_attention2/CMakeLists.txt b/src/turbomind/models/llama/flash_attention2/CMakeLists.txt
deleted file mode 100644
index 9f527d7d1acfdbf260011e1f313c7f73f50d7cf0..0000000000000000000000000000000000000000
--- a/src/turbomind/models/llama/flash_attention2/CMakeLists.txt
+++ /dev/null
@@ -1,15 +0,0 @@
-
-cmake_minimum_required(VERSION 3.8)
-project(flash_attention2)
-
-add_library(${PROJECT_NAME} STATIC
-    flash_api.cpp
-    flash_fwd_hdim32_fp16_sm80.cu
-    flash_fwd_hdim64_fp16_sm80.cu
-    flash_fwd_hdim128_fp16_sm80.cu
-    flash_fwd_hdim256_fp16_sm80.cu
-    )
-target_include_directories(${PROJECT_NAME} PRIVATE ${CUTLASS_DIR} / include)
-target_link_libraries(${PROJECT_NAME} PRIVATE nvidia::cutlass::cutlass)
-set_property(TARGET ${PROJECT_NAME} PROPERTY POSITION_INDEPENDENT_CODE ON)
-set_property(TARGET ${PROJECT_NAME} PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS ON)
diff --git a/src/turbomind/models/llama/flash_attention2/README.md b/src/turbomind/models/llama/flash_attention2/README.md
deleted file mode 100644
index 36abf2b2b420c246dbcbc1521fa292f3e59e3c32..0000000000000000000000000000000000000000
--- a/src/turbomind/models/llama/flash_attention2/README.md
+++ /dev/null
@@ -1,7 +0,0 @@
-#Flash Attention 2
-
-This is flash attention2 implementation modified from https://github.com/Dao-AILab/flash-attention
-
-- remove dropout
-- remove backward
-- cutlass 3.1.0
diff --git a/src/turbomind/models/llama/flash_attention2/block_info.h b/src/turbomind/models/llama/flash_attention2/block_info.h
deleted file mode 100644
index 38b6aa25834ce9108c07452f56448533fdb67b6f..0000000000000000000000000000000000000000
--- a/src/turbomind/models/llama/flash_attention2/block_info.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/******************************************************************************
- * Copyright (c) 2023, Tri Dao.
- ******************************************************************************/
-
-#pragma once
-
-namespace flash {
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-template<bool Varlen = true>
-struct BlockInfo {
-
-    template<typename Params>
-    __device__ BlockInfo(const Params& params, const int bidb):
-        sum_s_q(!Varlen || params.cu_seqlens_q == nullptr ? -1 : params.cu_seqlens_q[bidb]),
-        sum_s_k(!Varlen || params.cu_seqlens_k == nullptr ? -1 : params.cu_seqlens_k[bidb]),
-        actual_seqlen_q(params.actual_seqlen_q == nullptr ?
-                            (!Varlen || params.cu_seqlens_q == nullptr ? params.seqlen_q :
-                                                                         params.cu_seqlens_q[bidb + 1] - sum_s_q) :
-                            params.actual_seqlen_q[bidb]),
-        actual_seqlen_k(params.actual_seqlen_k == nullptr ?
-                            (!Varlen || params.cu_seqlens_k == nullptr ? params.seqlen_k :
-                                                                         params.cu_seqlens_k[bidb + 1] - sum_s_k) :
-                            params.actual_seqlen_k[bidb])
-    {
-    }
-
-    template<typename index_t>
-    inline __device__ index_t q_offset(const index_t batch_stride, const index_t row_stride, const int bidb) const
-    {
-        return sum_s_q == -1 ? bidb * batch_stride : uint32_t(sum_s_q) * row_stride;
-    }
-
-    template<typename index_t>
-    inline __device__ index_t k_offset(const index_t batch_stride, const index_t row_stride, const int bidb) const
-    {
-        return sum_s_k == -1 ? bidb * batch_stride : uint32_t(sum_s_k) * row_stride;
-    }
-
-    const int sum_s_q;
-    const int sum_s_k;
-    const int actual_seqlen_q;
-    const int actual_seqlen_k;
-};
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-}  // namespace flash
diff --git a/src/turbomind/models/llama/flash_attention2/flash.h b/src/turbomind/models/llama/flash_attention2/flash.h
deleted file mode 100644
index 8a5a7c57941656a29dcae33400eac2a429bf4b31..0000000000000000000000000000000000000000
--- a/src/turbomind/models/llama/flash_attention2/flash.h
+++ /dev/null
@@ -1,92 +0,0 @@
-/******************************************************************************
- * Copyright (c) 2023, Tri Dao.
- ******************************************************************************/
-// modify from: https://github.com/Dao-AILab/flash-attention
-
-#pragma once
-
-#include <cuda.h>
-#include <cuda_runtime.h>
-#include <vector>
-
-constexpr int TOTAL_DIM = 0;
-constexpr int H_DIM     = 1;
-constexpr int D_DIM     = 2;
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-struct Qkv_params {
-    using index_t = size_t;
-    // The QKV matrices.
-    void* __restrict__ q_ptr;
-    void* __restrict__ k_ptr;
-    void* __restrict__ v_ptr;
-
-    // batched ptr inputs.
-    void** __restrict__ k_batched_ptr = nullptr;
-    void** __restrict__ v_batched_ptr = nullptr;
-    size_t k_batched_offset           = 0;
-    size_t v_batched_offset           = 0;
-
-    // The stride between rows of the Q, K and V matrices.
-    index_t q_batch_stride;
-    index_t k_batch_stride;
-    index_t v_batch_stride;
-    index_t q_row_stride;
-    index_t k_row_stride;
-    index_t v_row_stride;
-    index_t q_head_stride;
-    index_t k_head_stride;
-    index_t v_head_stride;
-
-    // The number of heads.
-    int h, h_k;
-    // In the case of multi-query and grouped-query attention (MQA/GQA), nheads_k could be
-    // different from nheads (query).
-    int h_h_k_ratio;  // precompute h / h_k,
-};
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-struct Flash_fwd_params: public Qkv_params {
-
-    // The O matrix (output).
-    void* __restrict__ o_ptr;
-
-    // The stride between rows of O.
-    index_t o_batch_stride;
-    index_t o_row_stride;
-    index_t o_head_stride;
-
-    // The pointer to the P matrix.
-    void* __restrict__ p_ptr;
-
-    // The dimensions.
-    int b, seqlen_q, seqlen_k, d, seqlen_q_rounded, seqlen_k_rounded;
-
-    // The scaling factors for the kernel.
-    float scale_softmax;
-    float scale_softmax_log2;
-
-    // array of length b+1 holding starting offset of each sequence.
-    int* __restrict__ cu_seqlens_q;
-    int* __restrict__ cu_seqlens_k;
-
-    // array of length b with actual length of each sequence
-    int* __restrict__ actual_seqlen_q;
-    int* __restrict__ actual_seqlen_k;
-
-    void* __restrict__ blockmask;
-
-    bool is_bf16;
-    bool is_causal;
-
-    // enable output seqlen
-    bool q_enable_seqlen;
-    bool o_enable_seqlen;
-};
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-template<typename T, int Headdim>
-void run_mha_fwd_(Flash_fwd_params& params, cudaStream_t stream);
diff --git a/src/turbomind/models/llama/flash_attention2/flash_api.cpp b/src/turbomind/models/llama/flash_attention2/flash_api.cpp
deleted file mode 100644
index 55bc92c1ffcbdcdbd46a226bb77ab31b8579f724..0000000000000000000000000000000000000000
--- a/src/turbomind/models/llama/flash_attention2/flash_api.cpp
+++ /dev/null
@@ -1,167 +0,0 @@
-/******************************************************************************
- * Copyright (c) 2023, Tri Dao.
- ******************************************************************************/
-// modify from: https://github.com/Dao-AILab/flash-attention
-
-#include "flash.h"
-#include "src/turbomind/models/llama/llama_kernels.h"
-#include "static_switch.h"
-#include <cuda_runtime.h>
-#include <cutlass/numeric_types.h>
-#include <math.h>
-
-void run_mha_fwd(Flash_fwd_params& params, cudaStream_t stream)
-{
-    FP16_SWITCH(true,
-                [&] { FWD_HEADDIM_SWITCH(params.d, [&] { run_mha_fwd_<elem_type, kHeadDim>(params, stream); }); });
-}
-
-namespace turbomind {
-
-static constexpr int FMHA_VERSION = 2;
-
-template<typename T>
-class FlashAttentionOpImpl<T, FMHA_VERSION> {
-
-public:
-    using AttentionLayout = BaseAttentionLayout<T>;
-    using Params          = BaseAttentionParams<T>;
-
-public:
-    FlashAttentionOpImpl(int batch_size, int head_num, int key_len, int seq_len, int size_per_head);
-    ~FlashAttentionOpImpl();
-
-    int get_workspace_size() const;
-
-    void operator()(Params& params, cudaStream_t st) const;
-
-private:
-    class impl;
-    std::unique_ptr<impl> pimpl;
-};
-
-template<typename T>
-class FlashAttentionOpImpl<T, FMHA_VERSION>::impl {
-
-private:
-    using scalar_t =
-        typename std::conditional_t<std::is_same<half, typename std::decay<T>::type>::value, cutlass::half_t, T>;
-    using Params = typename FlashAttentionOpImpl<T, FMHA_VERSION>::Params;
-
-    int batch_size_;
-    int head_num_;
-    int key_len_;
-    int seq_len_;
-    int size_per_head_;
-
-public:
-    impl(int batch_size, int head_num, int key_len, int seq_len, int size_per_head):
-        batch_size_(batch_size),
-        head_num_(head_num),
-        key_len_(key_len),
-        seq_len_(seq_len),
-        size_per_head_(size_per_head)
-    {
-    }
-
-    ~impl() {}
-
-    int get_workspace_size() const
-    {
-        return 0;
-    }
-
-    void operator()(Params& params, cudaStream_t st) const
-    {
-        const float      qk_scale = static_cast<float>(1.f / sqrtf(size_per_head_ * 1.f));
-        Flash_fwd_params fwd_params;
-        memset(&fwd_params, 0, sizeof(fwd_params));
-
-        fwd_params.q_ptr = reinterpret_cast<void*>(params.query);
-        fwd_params.k_ptr = reinterpret_cast<void*>(params.key);
-        fwd_params.v_ptr = reinterpret_cast<void*>(params.val);
-
-        fwd_params.k_batched_ptr    = reinterpret_cast<void**>(params.layout_k.batch_seqs);
-        fwd_params.v_batched_ptr    = reinterpret_cast<void**>(params.layout_v.batch_seqs);
-        fwd_params.k_batched_offset = params.layout_k.batch_seqs_offset;
-        fwd_params.v_batched_offset = params.layout_v.batch_seqs_offset;
-
-        fwd_params.q_batch_stride = params.layout_q.stride_batch;
-        fwd_params.k_batch_stride = params.layout_k.stride_batch;
-        fwd_params.v_batch_stride = params.layout_v.stride_batch;
-        fwd_params.q_row_stride   = params.layout_q.stride_seq;
-        fwd_params.k_row_stride   = params.layout_k.stride_seq;
-        fwd_params.v_row_stride   = params.layout_v.stride_seq;
-        fwd_params.q_head_stride  = params.layout_q.stride_head;
-        fwd_params.v_head_stride  = params.layout_v.stride_head;
-        fwd_params.k_head_stride  = params.layout_k.stride_head;
-
-        fwd_params.h           = head_num_;
-        fwd_params.h_k         = head_num_ / params.group_size;
-        fwd_params.h_h_k_ratio = params.group_size;
-
-        fwd_params.o_ptr = reinterpret_cast<void*>(params.attn_out);
-
-        fwd_params.o_batch_stride = params.layout_o.stride_batch;
-        fwd_params.o_row_stride   = params.layout_o.stride_seq;
-        fwd_params.o_head_stride  = params.layout_o.stride_head;
-
-        fwd_params.p_ptr = nullptr;
-
-        fwd_params.b                = batch_size_;
-        fwd_params.seqlen_q         = seq_len_;
-        fwd_params.seqlen_k         = key_len_;
-        fwd_params.d                = size_per_head_;
-        fwd_params.seqlen_q_rounded = 0;
-        fwd_params.seqlen_k_rounded = 0;
-
-        fwd_params.scale_softmax      = qk_scale;
-        fwd_params.scale_softmax_log2 = qk_scale * M_LOG2E;
-
-        fwd_params.cu_seqlens_q = params.cu_seqlens_q;
-        fwd_params.cu_seqlens_k = params.cu_seqlens_k;
-
-        fwd_params.actual_seqlen_q = params.actual_seqlen_q;
-        fwd_params.actual_seqlen_k = params.actual_seqlen_k;
-
-        fwd_params.blockmask = reinterpret_cast<void*>(params.mask);
-
-        fwd_params.is_bf16   = false;
-        fwd_params.is_causal = true;
-
-        fwd_params.q_enable_seqlen = params.layout_q.use_seqlens;
-        fwd_params.o_enable_seqlen = params.layout_o.use_seqlens;
-
-        run_mha_fwd(fwd_params, st);
-    }
-};
-
-template<typename T>
-FlashAttentionOpImpl<T, FMHA_VERSION>::FlashAttentionOpImpl(
-    int batch_size, int head_num, int key_len, int seq_len, int size_per_head):
-    pimpl{std::make_unique<FlashAttentionOpImpl<T, FMHA_VERSION>::impl>(
-        batch_size, head_num, key_len, seq_len, size_per_head)}
-{
-}
-
-template<typename T>
-FlashAttentionOpImpl<T, FMHA_VERSION>::~FlashAttentionOpImpl()
-{
-}
-
-template<typename T>
-int FlashAttentionOpImpl<T, FMHA_VERSION>::get_workspace_size() const
-{
-    return pimpl->get_workspace_size();
-}
-
-template<typename T>
-void FlashAttentionOpImpl<T, FMHA_VERSION>::operator()(Params& params, cudaStream_t st) const
-{
-    pimpl->operator()(params, st);
-}
-
-template class FlashAttentionOpImpl<float, FMHA_VERSION>;
-template class FlashAttentionOpImpl<half, FMHA_VERSION>;
-
-}  // namespace turbomind
diff --git a/src/turbomind/models/llama/flash_attention2/flash_fwd_hdim128_fp16_sm80.cu b/src/turbomind/models/llama/flash_attention2/flash_fwd_hdim128_fp16_sm80.cu
deleted file mode 100644
index 9c6d620757d46a24db580b1afff6a3df544a3e4f..0000000000000000000000000000000000000000
--- a/src/turbomind/models/llama/flash_attention2/flash_fwd_hdim128_fp16_sm80.cu
+++ /dev/null
@@ -1,11 +0,0 @@
-// Copyright (c) 2023, Tri Dao.
-
-// Splitting the different head dimensions to different files to speed up compilation.
-
-#include "flash_fwd_launch_template.h"
-
-template<>
-void run_mha_fwd_<cutlass::half_t, 128>(Flash_fwd_params& params, cudaStream_t stream)
-{
-    run_mha_fwd_hdim128<cutlass::half_t>(params, stream);
-}
diff --git a/src/turbomind/models/llama/flash_attention2/flash_fwd_hdim256_fp16_sm80.cu b/src/turbomind/models/llama/flash_attention2/flash_fwd_hdim256_fp16_sm80.cu
deleted file mode 100644
index af8eeca5838426fecbbfdadcebc7796aa953da98..0000000000000000000000000000000000000000
--- a/src/turbomind/models/llama/flash_attention2/flash_fwd_hdim256_fp16_sm80.cu
+++ /dev/null
@@ -1,11 +0,0 @@
-// Copyright (c) 2023, Tri Dao.
-
-// Splitting the different head dimensions to different files to speed up compilation.
-
-#include "flash_fwd_launch_template.h"
-
-template<>
-void run_mha_fwd_<cutlass::half_t, 256>(Flash_fwd_params& params, cudaStream_t stream)
-{
-    run_mha_fwd_hdim256<cutlass::half_t>(params, stream);
-}
diff --git a/src/turbomind/models/llama/flash_attention2/flash_fwd_hdim32_fp16_sm80.cu b/src/turbomind/models/llama/flash_attention2/flash_fwd_hdim32_fp16_sm80.cu
deleted file mode 100644
index f90e10616327f1d54378973d21ac7df16f1681d3..0000000000000000000000000000000000000000
--- a/src/turbomind/models/llama/flash_attention2/flash_fwd_hdim32_fp16_sm80.cu
+++ /dev/null
@@ -1,11 +0,0 @@
-// Copyright (c) 2023, Tri Dao.
-
-// Splitting the different head dimensions to different files to speed up compilation.
-
-#include "flash_fwd_launch_template.h"
-
-template<>
-void run_mha_fwd_<cutlass::half_t, 32>(Flash_fwd_params& params, cudaStream_t stream)
-{
-    run_mha_fwd_hdim32<cutlass::half_t>(params, stream);
-}
diff --git a/src/turbomind/models/llama/flash_attention2/flash_fwd_hdim64_fp16_sm80.cu b/src/turbomind/models/llama/flash_attention2/flash_fwd_hdim64_fp16_sm80.cu
deleted file mode 100644
index 75abce931507183aa14ca3b68e456ecaee013963..0000000000000000000000000000000000000000
--- a/src/turbomind/models/llama/flash_attention2/flash_fwd_hdim64_fp16_sm80.cu
+++ /dev/null
@@ -1,11 +0,0 @@
-// Copyright (c) 2023, Tri Dao.
-
-// Splitting the different head dimensions to different files to speed up compilation.
-
-#include "flash_fwd_launch_template.h"
-
-template<>
-void run_mha_fwd_<cutlass::half_t, 64>(Flash_fwd_params& params, cudaStream_t stream)
-{
-    run_mha_fwd_hdim64<cutlass::half_t>(params, stream);
-}
diff --git a/src/turbomind/models/llama/flash_attention2/flash_fwd_kernel.h b/src/turbomind/models/llama/flash_attention2/flash_fwd_kernel.h
deleted file mode 100644
index b01f1ca4c0f5ec9afc094d3fbbd814c86cc180f5..0000000000000000000000000000000000000000
--- a/src/turbomind/models/llama/flash_attention2/flash_fwd_kernel.h
+++ /dev/null
@@ -1,524 +0,0 @@
-/******************************************************************************
- * Copyright (c) 2023, Tri Dao.
- ******************************************************************************/
-// modify from: https://github.com/Dao-AILab/flash-attention
-
-#pragma once
-
-#include <cmath>
-#include <cute/algorithm/copy.hpp>
-#include <cute/algorithm/gemm.hpp>
-
-#include <cutlass/array.h>
-#include <cutlass/cutlass.h>
-#include <cutlass/numeric_conversion.h>
-#include <cutlass/numeric_types.h>
-
-#include "block_info.h"
-#include "kernel_traits.h"
-#include "softmax.h"
-#include "utils.h"
-
-namespace flash {
-
-using namespace cute;
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-template<bool Is_first, bool Check_inf = false, typename Tensor0, typename Tensor1, typename Tensor2>
-inline __device__ void
-softmax_rescale_o(Tensor0& scores, Tensor1& scores_max, Tensor1& scores_sum, Tensor2& acc_o, float softmax_scale_log2)
-{
-    if (Is_first) {
-        flash::template reduce_max</*zero_init=*/true>(scores, scores_max);
-        flash::scale_apply_exp2(scores, scores_max, softmax_scale_log2);
-        flash::reduce_sum(scores, scores_sum);
-    }
-    else {
-        Tensor scores_max_prev = make_fragment_like(scores_max);
-        copy(scores_max, scores_max_prev);
-        flash::template reduce_max</*zero_init=*/false>(scores, scores_max);
-        // Reshape acc_o from (MMA=4, MMA_M, MMA_K) to (nrow=(2, MMA_M), ncol=(2, MMA_K))
-        Tensor acc_o_rowcol = make_tensor(acc_o.data(), flash::convert_layout_acc_rowcol(acc_o.layout()));
-#pragma unroll
-        for (int mi = 0; mi < size(scores_max); ++mi) {
-            float scores_max_cur = !Check_inf ? scores_max(mi) : (scores_max(mi) == -INFINITY ? 0.0f : scores_max(mi));
-            float scores_scale   = exp2f((scores_max_prev(mi) - scores_max_cur) * softmax_scale_log2);
-            scores_sum(mi) *= scores_scale;
-#pragma unroll
-            for (int ni = 0; ni < size<1>(acc_o_rowcol); ++ni) {
-                acc_o_rowcol(mi, ni) *= scores_scale;
-            }
-        }
-        flash::scale_apply_exp2(scores, scores_max, softmax_scale_log2);
-        Tensor scores_sum_cur = make_fragment_like(scores_sum);
-        flash::reduce_sum(scores, scores_sum_cur);
-#pragma unroll
-        for (int mi = 0; mi < size(scores_sum); ++mi) {
-            scores_sum(mi) += scores_sum_cur(mi);
-        }
-    }
-};
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-template<typename Engine0, typename Layout0, typename Engine1, typename Layout1, typename TiledCopy>
-inline __device__ void
-write_softmax_to_gmem(Tensor<Engine0, Layout0> const& tOrP, Tensor<Engine1, Layout1>& tPgP, TiledCopy gmem_thr_copy_P)
-{
-#if (__CUDA_ARCH__ >= 800)
-    // Reshape tOrP from (8, MMA_M, MMA_N) to (8, MMA_M * MMA_N)
-    Layout l    = tOrP.layout();
-    Tensor tPrP = make_tensor(tOrP.data(), make_layout(get<0>(l), make_layout(get<1>(l), get<2>(l))));
-    CUTE_STATIC_ASSERT_V(size<2>(tPgP) == _1{});
-    CUTE_STATIC_ASSERT_V(size<1>(tPrP) == size<1>(tPgP));
-#pragma unroll
-    for (int mi = 0; mi < size<1>(tPrP); ++mi) {
-        copy(gmem_thr_copy_P, tPrP(_, mi), tPgP(_, mi, 0));
-    }
-#else
-    // do not support return softmax on device < sm80
-    assert(false);
-#endif
-};
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-template<typename Kernel_traits,
-         bool Is_dropout,
-         bool Is_causal,
-         bool Is_even_N,
-         bool Is_even_K,
-         bool Return_softmax,
-         typename Params>
-inline __device__ void compute_attn_1rowblock(const Params& params, const int bidb, const int bidh, const int m_block)
-{
-
-    using Element      = typename Kernel_traits::Element;
-    using ElementAccum = typename Kernel_traits::ElementAccum;
-    using index_t      = typename Kernel_traits::index_t;
-
-    // Shared memory.
-    extern __shared__ char smem_[];
-
-    // The thread index.
-    const int tidx = threadIdx.x;
-
-    constexpr int kBlockM  = Kernel_traits::kBlockM;
-    constexpr int kBlockN  = Kernel_traits::kBlockN;
-    constexpr int kHeadDim = Kernel_traits::kHeadDim;
-    constexpr int kNWarps  = Kernel_traits::kNWarps;
-    constexpr int MMA_M    = kBlockM / decltype(size<0>(typename Kernel_traits::TiledMma::TiledShape_MNK{}))::value;
-
-    const BlockInfo</*Varlen=*/!Is_even_N> binfo(params, bidb);
-    if (m_block * kBlockM >= binfo.actual_seqlen_q || binfo.actual_seqlen_k == 0)
-        return;
-
-    int n_block_max = cute::ceil_div(binfo.actual_seqlen_k, kBlockN);
-    if (Is_causal) {
-        int seq_diff = max(binfo.actual_seqlen_k - binfo.actual_seqlen_q, int(0));
-        n_block_max  = std::min(n_block_max, cute::ceil_div((m_block + 1) * kBlockM + seq_diff, kBlockN));
-    }
-
-    // We iterate over the blocks in reverse order. This is because the last block is the only one
-    // that needs masking when we read K and V from global memory. Moreover, iterating in reverse
-    // might save us 1 register (we just need n_block instead of both n_block and n_block_max).
-
-    index_t row_offset_q = binfo.q_offset(params.q_batch_stride, params.q_row_stride, bidb)
-                           + m_block * kBlockM * params.q_row_stride + bidh * params.q_head_stride;
-    if (!params.q_enable_seqlen) {
-        row_offset_q =
-            bidb * params.q_batch_stride + m_block * kBlockM * params.q_row_stride + bidh * params.q_head_stride;
-    }
-
-    // We move K and V to the last block.
-    auto k_ptr          = params.k_ptr;
-    auto k_batch_stride = params.k_batch_stride;
-    if (params.k_batched_ptr != nullptr) {
-        k_ptr          = (reinterpret_cast<Element**>(params.k_batched_ptr))[bidb] + params.k_batched_offset;
-        k_batch_stride = 0;
-    }
-    const index_t row_offset_k = binfo.k_offset(k_batch_stride, params.k_row_stride, bidb)
-                                 + (n_block_max - 1) * kBlockN * params.k_row_stride
-                                 + (bidh / params.h_h_k_ratio) * params.k_head_stride;
-    auto v_ptr          = params.v_ptr;
-    auto v_batch_stride = params.v_batch_stride;
-    if (params.v_batched_ptr != nullptr) {
-        v_ptr          = (reinterpret_cast<Element**>(params.v_batched_ptr))[bidb] + params.v_batched_offset;
-        v_batch_stride = 0;
-    }
-    const index_t row_offset_v = binfo.k_offset(v_batch_stride, params.v_row_stride, bidb)
-                                 + (n_block_max - 1) * kBlockN * params.v_row_stride
-                                 + (bidh / params.h_h_k_ratio) * params.v_head_stride;
-    const index_t row_offset_p =
-        ((bidb * params.h + bidh) * params.seqlen_q_rounded + m_block * kBlockM) * params.seqlen_k_rounded
-        + (n_block_max - 1) * kBlockN;
-
-    // gmem tensor
-    Tensor gQ = make_tensor(make_gmem_ptr(reinterpret_cast<Element*>(params.q_ptr) + row_offset_q),
-                            Shape<Int<kBlockM>, Int<kHeadDim>>{},
-                            make_stride(params.q_row_stride, _1{}));
-    Tensor gK = make_tensor(make_gmem_ptr(reinterpret_cast<Element*>(k_ptr) + row_offset_k),
-                            Shape<Int<kBlockN>, Int<kHeadDim>>{},
-                            make_stride(params.k_row_stride, _1{}));
-    Tensor gV = make_tensor(make_gmem_ptr(reinterpret_cast<Element*>(v_ptr) + row_offset_v),
-                            Shape<Int<kBlockN>, Int<kHeadDim>>{},
-                            make_stride(params.v_row_stride, _1{}));
-    Tensor gP = make_tensor(make_gmem_ptr(reinterpret_cast<Element*>(params.p_ptr) + row_offset_p),
-                            Shape<Int<kBlockM>, Int<kBlockN>>{},
-                            make_stride(params.seqlen_k_rounded, _1{}));
-
-    // smem tensor
-    Tensor sQ = make_tensor(make_smem_ptr(reinterpret_cast<Element*>(smem_)), typename Kernel_traits::SmemLayoutQ{});
-    // Careful we're using the same smem for sQ and sK | sV if Share_Q_K_smem;
-    Tensor sK =
-        make_tensor(sQ.data() + (Kernel_traits::Share_Q_K_smem ? 0 : size(sQ)), typename Kernel_traits::SmemLayoutKV{});
-    Tensor sV           = make_tensor(sK.data() + size(sK), typename Kernel_traits::SmemLayoutKV{});
-    Tensor sVt          = make_tensor(sV.data(), typename Kernel_traits::SmemLayoutVtransposed{});
-    Tensor sVtNoSwizzle = make_tensor(sV.data(), typename Kernel_traits::SmemLayoutVtransposedNoSwizzle{});
-
-    // g->s thread copy
-    auto gmem_tiled_copy_QKV = typename Kernel_traits::GmemTiledCopyQKV{};
-    auto gmem_thr_copy_QKV   = gmem_tiled_copy_QKV.get_thread_slice(tidx);
-    auto gmem_tiled_copy_P   = typename Kernel_traits::GmemTiledCopyP{};
-    auto gmem_thr_copy_P     = gmem_tiled_copy_P.get_thread_slice(tidx);
-
-    // tiles of g->s copy
-    Tensor tQgQ = gmem_thr_copy_QKV.partition_S(gQ);
-    Tensor tQsQ = gmem_thr_copy_QKV.partition_D(sQ);
-    Tensor tKgK = gmem_thr_copy_QKV.partition_S(gK);  // (KCPY, KCPY_N, KCPY_K)
-    Tensor tKsK = gmem_thr_copy_QKV.partition_D(sK);
-    Tensor tVgV = gmem_thr_copy_QKV.partition_S(gV);  // (VCPY, VCPY_N, VCPY_K)
-    Tensor tVsV = gmem_thr_copy_QKV.partition_D(sV);
-    Tensor tPgP = gmem_thr_copy_P.partition_D(gP);
-
-    // tiles of mma
-    typename Kernel_traits::TiledMma tiled_mma;
-    auto                             thr_mma = tiled_mma.get_thread_slice(tidx);
-    Tensor                           tSrQ    = thr_mma.partition_fragment_A(sQ);            // (MMA,MMA_M,MMA_K)
-    Tensor                           tSrK    = thr_mma.partition_fragment_B(sK);            // (MMA,MMA_N,MMA_K)
-    Tensor                           tOrVt   = thr_mma.partition_fragment_B(sVtNoSwizzle);  // (MMA, MMA_K,MMA_N)
-
-    Tensor acc_o = partition_fragment_C(tiled_mma, Shape<Int<kBlockM>, Int<kHeadDim>>{});  // MMA, MMA_M, MMA_K
-
-    //
-    // Copy Atom retiling
-    //
-
-    auto   smem_tiled_copy_Q = make_tiled_copy_A(typename Kernel_traits::SmemCopyAtom{}, tiled_mma);
-    auto   smem_thr_copy_Q   = smem_tiled_copy_Q.get_thread_slice(tidx);
-    Tensor tSsQ              = smem_thr_copy_Q.partition_S(sQ);
-
-    auto   smem_tiled_copy_K = make_tiled_copy_B(typename Kernel_traits::SmemCopyAtom{}, tiled_mma);
-    auto   smem_thr_copy_K   = smem_tiled_copy_K.get_thread_slice(tidx);
-    Tensor tSsK              = smem_thr_copy_K.partition_S(sK);
-
-    auto   smem_tiled_copy_V = make_tiled_copy_B(typename Kernel_traits::SmemCopyAtomTransposed{}, tiled_mma);
-    auto   smem_thr_copy_V   = smem_tiled_copy_V.get_thread_slice(tidx);
-    Tensor tOsVt             = smem_thr_copy_V.partition_S(sVt);
-
-    // TODO: this might need to change if we change the mma instruction in SM70
-    Tensor scores_max = make_tensor<ElementAccum>(Shape<Int<2 * size<1>(acc_o)>>{});
-    Tensor scores_sum = make_fragment_like(scores_max);
-
-    //
-    // PREDICATES
-    //
-
-    Tensor cQ  = make_identity_tensor(make_shape(size<0>(sQ), size<1>(sQ)));  // (BLK_M,BLK_K) -> (blk_m,blk_k)
-    Tensor cKV = make_identity_tensor(make_shape(size<0>(sK), size<1>(sK)));  // (BLK_N,BLK_K) -> (blk_n,blk_k)
-
-    // Repeat the partitioning with identity layouts
-    Tensor tQcQ   = gmem_thr_copy_QKV.partition_S(cQ);   // (ACPY,ACPY_M,ACPY_K) -> (blk_m,blk_k)
-    Tensor tKVcKV = gmem_thr_copy_QKV.partition_S(cKV);  // (BCPY,BCPY_N,BCPY_K) -> (blk_n,blk_k)
-
-    // Allocate predicate tensors for k
-    Tensor tQpQ   = make_tensor<bool>(make_shape(size<2>(tQsQ)));
-    Tensor tKVpKV = make_tensor<bool>(make_shape(size<2>(tKsK)));
-
-    // Set predicates for k bounds
-    if (!Is_even_K) {
-#pragma unroll
-        for (int k = 0; k < size(tQpQ); ++k) {
-            tQpQ(k) = get<1>(tQcQ(0, 0, k)) < params.d;
-        }
-#pragma unroll
-        for (int k = 0; k < size(tKVpKV); ++k) {
-            tKVpKV(k) = get<1>(tKVcKV(0, 0, k)) < params.d;
-        }
-    }
-
-    // Prologue
-
-    Tensor tQrQ = make_fragment_like(tQgQ);
-    // We don't need to clear the sQ smem tiles since we'll only write out the valid outputs
-    flash::copy</*Is_even_MN=*/false, Is_even_K>(
-        gmem_tiled_copy_QKV, tQgQ, tQsQ, tQcQ, tQpQ, binfo.actual_seqlen_q - m_block * kBlockM);
-    if (Kernel_traits::Is_Q_in_regs) {
-        cute::cp_async_fence();
-    }
-
-    if (Kernel_traits::Share_Q_K_smem) {
-        flash::cp_async_wait<0>();
-        __syncthreads();
-        Tensor tSrQ_copy_view = smem_thr_copy_Q.retile_D(tSrQ);
-        CUTE_STATIC_ASSERT_V(size<1>(tSsQ) == size<1>(tSrQ_copy_view));  // M
-        copy(smem_tiled_copy_Q, tSsQ, tSrQ_copy_view);
-        __syncthreads();
-    }
-
-    // copy K
-    int n_block = n_block_max - 1;
-    // We don't need to clear the sK smem tiles since we'll mask out the scores anyway.
-    flash::copy<Is_even_N, Is_even_K>(
-        gmem_tiled_copy_QKV, tKgK, tKsK, tKVcKV, tKVpKV, binfo.actual_seqlen_k - n_block * kBlockN);
-    cute::cp_async_fence();
-
-    if (Kernel_traits::Is_Q_in_regs && !Kernel_traits::Share_Q_K_smem) {
-        flash::cp_async_wait<1>();
-        __syncthreads();
-        Tensor tSrQ_copy_view = smem_thr_copy_Q.retile_D(tSrQ);
-        CUTE_STATIC_ASSERT_V(size<1>(tSsQ) == size<1>(tSrQ_copy_view));  // M
-        copy(smem_tiled_copy_Q, tSsQ, tSrQ_copy_view);
-    }
-
-    clear(acc_o);
-
-    // For performance reason, we separate out two kinds of iterations:
-    // those that need masking on S, and those that don't.
-    // We need masking on S for the very last block when K and V has length not multiple of kBlockN.
-    // We also need masking on S if it's causal, for the last ceil_div(kBlockM, kBlockN) blocks.
-    // We will have at least 1 "masking" iteration.
-
-    constexpr int n_masking_steps = Is_causal ? cute::ceil_div(kBlockM, kBlockN) + 1 : 2;
-#pragma unroll
-    for (int masking_step = 0; masking_step < n_masking_steps; ++masking_step, --n_block) {
-        Tensor acc_s = partition_fragment_C(tiled_mma, Shape<Int<kBlockM>, Int<kBlockN>>{});  // (MMA=4, MMA_M, MMA_N)
-        clear(acc_s);
-        flash::cp_async_wait<0>();
-        __syncthreads();
-
-        // Advance gV
-        if (masking_step > 0) {
-            tVgV.data() = tVgV.data() + (-int(kBlockN * params.v_row_stride));
-            flash::copy</*Is_even_MN=*/true, Is_even_K>(gmem_tiled_copy_QKV, tVgV, tVsV, tKVcKV, tKVpKV);
-        }
-        else {
-            // Clear the smem tiles to account for predicated off loads
-            flash::copy<Is_even_N, Is_even_K, /*Clear_OOB_MN=*/true>(
-                gmem_tiled_copy_QKV, tVgV, tVsV, tKVcKV, tKVpKV, binfo.actual_seqlen_k - n_block * kBlockN);
-        }
-        cute::cp_async_fence();
-
-        flash::gemm</*A_in_regs=*/Kernel_traits::Is_Q_in_regs>(acc_s,
-                                                               tSrQ,
-                                                               tSrK,
-                                                               tSsQ,
-                                                               tSsK,
-                                                               tiled_mma,
-                                                               smem_tiled_copy_Q,
-                                                               smem_tiled_copy_K,
-                                                               smem_thr_copy_Q,
-                                                               smem_thr_copy_K);
-
-        // Reshape acc_s from (MMA=4, MMA_M, MMA_N) to (nrow=(2, MMA_M), ncol=(2, MMA_N))
-        Tensor scores = make_tensor(acc_s.data(), flash::convert_layout_acc_rowcol(acc_s.layout()));
-
-        // We don't put the masking before the matmul S = Q K^T because we don't clear sK
-        // for rows outside actual_seqlen_k. So those rows could have Inf / NaN, and the matmul
-        // can produce Inf / NaN.
-        if (!Is_causal) {
-            if (!Is_even_N) {
-                flash::apply_mask(scores, binfo.actual_seqlen_k - n_block * kBlockN);
-            }
-        }
-        else {
-            flash::apply_mask_causal(scores,
-                                     n_block * kBlockN,
-                                     binfo.actual_seqlen_k,
-                                     m_block * kBlockM + (tidx / 32) * 16 + (tidx % 32) / 4
-                                         + max(binfo.actual_seqlen_k - binfo.actual_seqlen_q, 0),
-                                     kNWarps * 16);
-        }
-
-        flash::cp_async_wait<0>();
-        __syncthreads();
-        if (n_block > 0) {
-            // Advance gK
-            tKgK.data() = tKgK.data() + (-int(kBlockN * params.k_row_stride));
-            flash::copy</*Is_even_MN=*/true, Is_even_K>(gmem_tiled_copy_QKV, tKgK, tKsK, tKVcKV, tKVpKV);
-            // This cp_async_fence needs to be in the if block, otherwise the synchronization
-            // isn't right and we get race conditions.
-            cute::cp_async_fence();
-        }
-
-        // softmax
-        // TODO: when we have key_padding_mask we'll need to Check_inf
-        masking_step == 0 ? softmax_rescale_o</*Is_first=*/true, /*Check_inf=*/Is_causal>(
-            scores, scores_max, scores_sum, acc_o, params.scale_softmax_log2) :
-                            softmax_rescale_o</*Is_first=*/false, /*Check_inf=*/Is_causal>(
-                                scores, scores_max, scores_sum, acc_o, params.scale_softmax_log2);
-
-        // Convert scores from fp32 to fp16/bf16
-        Tensor rP = flash::convert_type<Element>(scores);
-        // Reshape rP from (nrow=(2, MMA_M), ncol=(2, MMA_N)) to ((2, 2, 2), MMA_M, MMA_N / 2)
-        // if using m16n8k16 or ((2, 2, 1), MMA_M, MMA_N) if using m16n8k8.
-        Tensor tOrP = make_tensor(rP.data(), flash::convert_layout_rowcol_Aregs<Kernel_traits::TiledMma>(rP.layout()));
-        if (Return_softmax) {
-            Tensor tOrP_copy = make_fragment_like(tOrP);
-            copy(tOrP, tOrP_copy);
-            flash::write_softmax_to_gmem(tOrP_copy, tPgP, gmem_tiled_copy_P);
-            tPgP.data() = tPgP.data() + (-kBlockN);
-        }
-
-        flash::gemm_A_in_regs(acc_o, tOrP, tOrVt, tOsVt, tiled_mma, smem_tiled_copy_V, smem_thr_copy_V);
-
-        // This check is at the end of the loop since we always have at least 1 iteration
-        if (n_masking_steps > 1 && n_block <= 0) {
-            --n_block;
-            break;
-        }
-    }
-
-    // These are the iterations where we don't need masking on S
-    for (; n_block >= 0; --n_block) {
-        Tensor acc_s = partition_fragment_C(tiled_mma, Shape<Int<kBlockM>, Int<kBlockN>>{});  // (MMA=4, MMA_M, MMA_N)
-        clear(acc_s);
-        flash::cp_async_wait<0>();
-        __syncthreads();
-
-        // Advance gV
-        tVgV.data() = tVgV.data() + (-int(kBlockN * params.v_row_stride));
-        flash::copy</*Is_even_MN=*/true, Is_even_K>(gmem_tiled_copy_QKV, tVgV, tVsV, tKVcKV, tKVpKV);
-        cute::cp_async_fence();
-
-        flash::gemm</*A_in_regs=*/Kernel_traits::Is_Q_in_regs>(acc_s,
-                                                               tSrQ,
-                                                               tSrK,
-                                                               tSsQ,
-                                                               tSsK,
-                                                               tiled_mma,
-                                                               smem_tiled_copy_Q,
-                                                               smem_tiled_copy_K,
-                                                               smem_thr_copy_Q,
-                                                               smem_thr_copy_K);
-
-        flash::cp_async_wait<0>();
-        __syncthreads();
-        if (n_block > 0) {
-            // Advance gK
-            tKgK.data() = tKgK.data() + (-int(kBlockN * params.k_row_stride));
-            flash::copy</*Is_even_MN=*/true, Is_even_K>(gmem_tiled_copy_QKV, tKgK, tKsK, tKVcKV, tKVpKV);
-            // This cp_async_fence needs to be in the if block, otherwise the synchronization
-            // isn't right and we get race conditions.
-            cute::cp_async_fence();
-        }
-
-        // Reshape acc_s from (MMA=4, MMA_M, MMA_N) to (nrow=(2, MMA_M), ncol=(2, MMA_N))
-        Tensor scores = make_tensor(acc_s.data(), flash::convert_layout_acc_rowcol(acc_s.layout()));
-        softmax_rescale_o</*Is_first=*/false>(scores, scores_max, scores_sum, acc_o, params.scale_softmax_log2);
-
-        Tensor rP = flash::convert_type<Element>(scores);
-        // Reshape rP from (nrow=(2, MMA_M), ncol=(2, MMA_N)) to ((2, 2, 2), MMA_M, MMA_N / 2)
-        // if using m16n8k16 or ((2, 2, 1), MMA_M, MMA_N) if using m16n8k8.
-        Tensor tOrP = make_tensor(rP.data(), flash::convert_layout_rowcol_Aregs<Kernel_traits::TiledMma>(rP.layout()));
-        if (Return_softmax) {
-            Tensor tOrP_copy = make_fragment_like(tOrP);
-            copy(tOrP, tOrP_copy);
-            flash::write_softmax_to_gmem(tOrP_copy, tPgP, gmem_tiled_copy_P);
-            tPgP.data() = tPgP.data() + (-kBlockN);
-        }
-        flash::gemm_A_in_regs(acc_o, tOrP, tOrVt, tOsVt, tiled_mma, smem_tiled_copy_V, smem_thr_copy_V);
-    }
-
-    // Epilogue
-
-    // Reshape acc_o from (MMA=4, MMA_M, MMA_K) to (nrow=(2, MMA_M), ncol=(2, MMA_K))
-    Tensor acc_o_rowcol = make_tensor(acc_o.data(), flash::convert_layout_acc_rowcol(acc_o.layout()));
-
-#pragma unroll
-    for (int mi = 0; mi < size<0>(acc_o_rowcol); ++mi) {
-        float sum     = scores_sum(mi);
-        float inv_sum = (sum == 0.f || sum != sum) ? 1.f : 1.f / sum;
-        float scale   = inv_sum;
-#pragma unroll
-        for (int ni = 0; ni < size<1>(acc_o_rowcol); ++ni) {
-            acc_o_rowcol(mi, ni) *= scale;
-        }
-    }
-
-    // Convert acc_o from fp32 to fp16/bf16
-    Tensor rO = flash::convert_type<Element>(acc_o);
-    Tensor sO = make_tensor(sQ.data(), typename Kernel_traits::SmemLayoutO{});  // (SMEM_M,SMEM_N)
-    // Partition sO to match the accumulator partitioning
-    auto   smem_tiled_copy_O = make_tiled_copy_C(typename Kernel_traits::SmemCopyAtomO{}, tiled_mma);
-    auto   smem_thr_copy_O   = smem_tiled_copy_O.get_thread_slice(tidx);
-    Tensor taccOrO           = smem_thr_copy_O.retile_S(rO);     // ((Atom,AtomNum), MMA_M, MMA_N)
-    Tensor taccOsO           = smem_thr_copy_O.partition_D(sO);  // ((Atom,AtomNum),PIPE_M,PIPE_N)
-
-    // sO has the same size as sQ, so we don't need to sync here.
-    if (Kernel_traits::Share_Q_K_smem) {
-        __syncthreads();
-    }
-
-    copy(smem_tiled_copy_O, taccOrO, taccOsO);
-    __syncthreads();
-
-    index_t row_offset_o = binfo.q_offset(params.o_batch_stride, params.o_row_stride, bidb)
-                           + m_block * kBlockM * params.o_row_stride + bidh * params.o_head_stride;
-    if (!params.o_enable_seqlen) {
-        row_offset_o =
-            bidb * params.o_batch_stride + m_block * kBlockM * params.o_row_stride + bidh * params.o_head_stride;
-    }
-    Tensor gO = make_tensor(make_gmem_ptr(reinterpret_cast<Element*>(params.o_ptr) + row_offset_o),
-                            Shape<Int<kBlockM>, Int<kHeadDim>>{},
-                            make_stride(params.o_row_stride, _1{}));
-
-    auto   gmem_tiled_copy_O = typename Kernel_traits::GmemTiledCopyO{};
-    auto   gmem_thr_copy_O   = gmem_tiled_copy_O.get_thread_slice(tidx);
-    Tensor tOsO              = gmem_thr_copy_O.partition_S(sO);  // ((Atom,AtomNum),ATOM_M,ATOM_N)
-    Tensor tOgO              = gmem_thr_copy_O.partition_D(gO);
-
-    Tensor tOrO = make_tensor<Element>(shape(tOgO));
-    copy(gmem_tiled_copy_O, tOsO, tOrO);
-
-    // Construct identity layout for sO
-    Tensor cO = make_identity_tensor(make_shape(size<0>(sO), size<1>(sO)));  // (BLK_M,BLK_K) -> (blk_m,blk_k)
-    // Repeat the partitioning with identity layouts
-    Tensor tOcO = gmem_thr_copy_O.partition_D(cO);  // (ACPY,ACPY_M,ACPY_K) -> (blk_m,blk_k)
-    Tensor tOpO = make_tensor<bool>(make_shape(size<2>(tOgO)));
-    if (!Is_even_K) {
-#pragma unroll
-        for (int k = 0; k < size(tOpO); ++k) {
-            tOpO(k) = get<1>(tOcO(0, 0, k)) < params.d;
-        }
-    }
-    // Clear_OOB_K must be false since we don't want to write zeros to gmem
-    flash::copy</*Is_even_MN=*/false, Is_even_K, /*Clear_OOB_MN=*/false, /*Clear_OOB_K=*/false>(
-        gmem_tiled_copy_O, tOrO, tOgO, tOcO, tOpO, binfo.actual_seqlen_q - m_block * kBlockM);
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-template<typename Kernel_traits,
-         bool Is_dropout,
-         bool Is_causal,
-         bool Is_even_N,
-         bool Is_even_K,
-         bool Return_softmax,
-         typename Params>
-inline __device__ void compute_attn(const Params& params)
-{
-    const int m_block = blockIdx.x;
-    // The block index for the batch.
-    const int bidb = blockIdx.y;
-    // The block index for the head.
-    const int bidh = blockIdx.z;
-
-    flash::compute_attn_1rowblock<Kernel_traits, Is_dropout, Is_causal, Is_even_N, Is_even_K, Return_softmax>(
-        params, bidb, bidh, m_block);
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-}  // namespace flash
diff --git a/src/turbomind/models/llama/flash_attention2/flash_fwd_launch_template.h b/src/turbomind/models/llama/flash_attention2/flash_fwd_launch_template.h
deleted file mode 100644
index 4a94da08b2b2694972e07851c536f33050bedc6c..0000000000000000000000000000000000000000
--- a/src/turbomind/models/llama/flash_attention2/flash_fwd_launch_template.h
+++ /dev/null
@@ -1,170 +0,0 @@
-/******************************************************************************
- * Copyright (c) 2023, Tri Dao.
- ******************************************************************************/
-// modify from: https://github.com/Dao-AILab/flash-attention
-
-#pragma once
-
-#include "flash.h"
-#include "flash_fwd_kernel.h"
-#include "kernel_traits.h"
-#include "src/turbomind/utils/cuda_utils.h"
-#include "static_switch.h"
-
-template<typename Kernel_traits, bool Is_dropout, bool Is_causal, bool Is_even_N, bool Is_even_K, bool Return_softmax>
-__global__ void flash_fwd_kernel(Flash_fwd_params params)
-{
-    flash::compute_attn<Kernel_traits, Is_dropout, Is_causal, Is_even_N, Is_even_K, Return_softmax>(params);
-}
-
-template<typename Kernel_traits, bool Is_dropout, bool Is_causal>
-void run_flash_fwd(Flash_fwd_params& params, cudaStream_t stream)
-{
-    constexpr size_t smem_size = Kernel_traits::kSmemSize;
-    // printf("smem_size = %d\n", smem_size);
-
-    // Work-around for gcc 7. It doesn't like nested BOOL_SWITCH.
-    // https://github.com/kokkos/kokkos-kernels/issues/349
-    // https://github.com/HazyResearch/flash-attention/issues/21
-
-    const int num_m_block = (params.seqlen_q + Kernel_traits::kBlockM - 1) / Kernel_traits::kBlockM;
-    dim3      grid(num_m_block, params.b, params.h);
-    // We also use is_even_N to set Unpadded in the BlockInfo constructor, so we need to check
-    // for cu_seqlens_q as well.
-    const bool is_even_N = params.cu_seqlens_q == nullptr && params.cu_seqlens_k == nullptr
-                           && params.seqlen_k % Kernel_traits::kBlockN == 0;
-    const bool is_even_K = params.d == Kernel_traits::kHeadDim;
-    // const bool return_softmax = params.p_ptr != nullptr;
-    constexpr bool return_softmax = false;
-    BOOL_SWITCH(is_even_N, IsEvenNConst, [&] {
-        BOOL_SWITCH(is_even_K, IsEvenKConst, [&] {
-            BOOL_SWITCH(return_softmax, ReturnSoftmaxConst, [&] {
-                // Will only return softmax if dropout, to reduce compilation time.
-                auto kernel = &flash_fwd_kernel < Kernel_traits, Is_dropout, Is_causal, IsEvenNConst, IsEvenKConst,
-                     ReturnSoftmaxConst && Is_dropout > ;
-                // auto kernel = &flash_fwd_kernel<Kernel_traits, Is_dropout, Is_causal, IsEvenNConst, true,
-                // ReturnSoftmaxConst && Is_dropout>;
-                if (smem_size >= 48 * 1024) {
-                    cudaFuncSetAttribute(kernel, cudaFuncAttributeMaxDynamicSharedMemorySize, smem_size);
-                }
-                int ctas_per_sm;
-                cudaOccupancyMaxActiveBlocksPerMultiprocessor(
-                    &ctas_per_sm, kernel, Kernel_traits::kNThreads, smem_size);
-                // printf("smem_size = %d, CTAs per SM = %d\n", int(smem_size), ctas_per_sm);
-                kernel<<<grid, Kernel_traits::kNThreads, smem_size, stream>>>(params);
-            });
-        });
-    });
-}
-
-template<typename T>
-void run_mha_fwd_hdim32(Flash_fwd_params& params, cudaStream_t stream)
-{
-    constexpr int         Headdim    = 32;
-    static constexpr bool Is_dropout = false;
-    BOOL_SWITCH(params.is_causal, Is_causal, [&] {
-        run_flash_fwd<Flash_fwd_kernel_traits<Headdim, 128, 128, 4, false, false, T>, Is_dropout, Is_causal>(params,
-                                                                                                             stream);
-    });
-}
-
-template<typename T>
-void run_mha_fwd_hdim64(Flash_fwd_params& params, cudaStream_t stream)
-{
-    constexpr int         Headdim    = 64;
-    static constexpr bool Is_dropout = false;
-    BOOL_SWITCH(params.is_causal, Is_causal, [&] {
-        if constexpr (!Is_dropout) {
-            // Using 8 warps is 18% slower for seqlen=2k, 2 warps is 5% slower
-            // Using block size (64 x 256) is 27% slower for seqlen=2k
-            // Using block size (256 x 64) is 85% slower for seqlen=2k, because of register spilling
-            run_flash_fwd<Flash_fwd_kernel_traits<Headdim, 128, 128, 4, false, false, T>, Is_dropout, Is_causal>(
-                params, stream);
-            // run_flash_fwd<Flash_fwd_kernel_traits<Headdim, 128, 64, 4, true, false, T>, Is_dropout,
-            // Is_causal>(params, stream); run_flash_fwd<Flash_fwd_kernel_traits<Headdim, 128, 64, 4, true, true, T>,
-            // Is_dropout, Is_causal>(params, stream);
-        }
-        else {
-            run_flash_fwd<Flash_fwd_kernel_traits<Headdim, 128, 64, 4, false, false, T>, Is_dropout, Is_causal>(params,
-                                                                                                                stream);
-            // run_flash_fwd<Flash_fwd_kernel_traits<Headdim, 128, 64, 4, true, true, T>, Is_dropout, Is_causal>(params,
-            // stream); run_flash_fwd<Flash_fwd_kernel_traits<Headdim, 128, 64, 4, true, false, T>, Is_dropout,
-            // Is_causal>(params, stream); run_flash_fwd<Flash_fwd_kernel_traits<Headdim, 128, 128, 4, false, false, T>,
-            // Is_dropout, Is_causal>(params, stream);
-        }
-    });
-}
-
-template<typename T>
-void run_mha_fwd_hdim128(Flash_fwd_params& params, cudaStream_t stream)
-{
-    constexpr int         Headdim    = 128;
-    bool                  is_sm8x    = (turbomind::getSMVersion() >= 80);
-    static constexpr bool Is_dropout = false;
-    BOOL_SWITCH(params.is_causal, Is_causal, [&] {
-        if constexpr (!Is_dropout) {
-            // For sm86 or sm89, 64 x 64 is the fastest for causal (because it's square),
-            // and 128 x 32 (48 KB smem) is the fastest for non-causal since we get 2 CTAs per SM.
-            if (is_sm8x) {
-                if constexpr (!Is_causal) {
-                    run_flash_fwd<Flash_fwd_kernel_traits<Headdim, 128, 32, 4, false, false, T>, Is_dropout, Is_causal>(
-                        params, stream);
-                }
-                else {
-                    run_flash_fwd<Flash_fwd_kernel_traits<Headdim, 64, 64, 4, false, false, T>, Is_dropout, Is_causal>(
-                        params, stream);
-                }
-            }
-            else {
-                run_flash_fwd<Flash_fwd_kernel_traits<Headdim, 128, 64, 4, false, false, T>, Is_dropout, Is_causal>(
-                    params, stream);
-            }
-            // run_flash_fwd<Flash_fwd_kernel_traits<Headdim, 128, 64, 4, true, false, T>, Is_dropout,
-            // Is_causal>(params, stream); run_flash_fwd<Flash_fwd_kernel_traits<Headdim, 128, 64, 4, true, true, T>,
-            // Is_dropout, Is_causal>(params, stream); run_flash_fwd<Flash_fwd_kernel_traits<Headdim, 64, 128, 4, false,
-            // false, T>, Is_dropout, Is_causal>(params, stream); Using 8 warps (128 x 128 and 256 x 64) is 28% slower
-            // for seqlen=2k run_flash_fwd<Flash_fwd_kernel_traits<Headdim, 128, 128, 8, false, false, T>, Is_dropout,
-            // Is_causal>(params, stream); run_flash_fwd<Flash_fwd_kernel_traits<Headdim, 128, 64, 8, false, false, T>,
-            // Is_dropout, Is_causal>(params, stream); 1st ones are good for H100, A100 2nd one is good for A6000 bc we
-            // get slightly better occupancy
-        }
-        else {
-            run_flash_fwd<Flash_fwd_kernel_traits<Headdim, 128, 32, 4, false, false, T>, Is_dropout, Is_causal>(params,
-                                                                                                                stream);
-            // run_flash_fwd<Flash_fwd_kernel_traits<Headdim, 64, 64, 4, false, false, T>, Is_dropout,
-            // Is_causal>(params, stream); run_flash_fwd<Flash_fwd_kernel_traits<Headdim, 128, 32, 4, true, false, T>,
-            // Is_dropout, Is_causal>(params, stream); run_flash_fwd<Flash_fwd_kernel_traits<Headdim, 128, 32, 4, true,
-            // true, T>, Is_dropout, Is_causal>(params, stream);
-        }
-    });
-}
-
-template<typename T>
-void run_mha_fwd_hdim256(Flash_fwd_params& params, cudaStream_t stream)
-{
-    constexpr int Headdim = 256;
-    int           device;
-    cudaGetDevice(&device);
-    int max_smem_per_sm, max_smem_per_block;
-    cudaDeviceGetAttribute(&max_smem_per_sm, cudaDevAttrMaxSharedMemoryPerMultiprocessor, device);
-    cudaDeviceGetAttribute(&max_smem_per_block, cudaDevAttrMaxSharedMemoryPerBlockOptin, device);
-    // printf("max_smem_per_sm = %d, max_smem_per_block = %d\n", max_smem_per_sm, max_smem_per_block);
-
-    static constexpr bool Is_dropout = false;
-    BOOL_SWITCH(params.is_causal, Is_causal, [&] {
-        // For A100, we want to run with 128 x 64 (128KB smem).
-        // For H100 we want to run with 64 x 64 (96KB smem) since then we can get 2 CTAs per SM.
-        if (max_smem_per_block >= 2 * Headdim * (128 + 2 * 64) && max_smem_per_sm < 4 * Headdim * (64 + 2 * 64)) {
-            run_flash_fwd<Flash_fwd_kernel_traits<Headdim, 128, 64, 8, false, false, T>, Is_dropout, Is_causal>(params,
-                                                                                                                stream);
-        }
-        else {
-            run_flash_fwd<Flash_fwd_kernel_traits<Headdim, 64, 64, 4, false, false, T>, Is_dropout, Is_causal>(params,
-                                                                                                               stream);
-        }
-        // 64 KB
-        // run_flash_fwd<Flash_fwd_kernel_traits<Headdim, 64, 32, 4, false, false, T>, Is_dropout, Is_causal>(params,
-        // stream); 96 KB run_flash_fwd<Flash_fwd_kernel_traits<Headdim, 128, 32, 8, false, false, T>, Is_dropout,
-        // Is_causal>(params, stream);
-    });
-}
diff --git a/src/turbomind/models/llama/flash_attention2/kernel_traits.h b/src/turbomind/models/llama/flash_attention2/kernel_traits.h
deleted file mode 100644
index 2df5c7b87c314c8ba7ae1b427f627a3a9ff81ee3..0000000000000000000000000000000000000000
--- a/src/turbomind/models/llama/flash_attention2/kernel_traits.h
+++ /dev/null
@@ -1,145 +0,0 @@
-/******************************************************************************
- * Copyright (c) 2023, Tri Dao.
- ******************************************************************************/
-
-#pragma once
-
-#include "cute/algorithm/copy.hpp"
-
-#include "cutlass/cutlass.h"
-#include "cutlass/layout/layout.h"
-#include <cutlass/numeric_types.h>
-
-using namespace cute;
-
-template<int kHeadDim_, int kBlockM_, int kBlockN_, int kNWarps_, typename elem_type = cutlass::half_t>
-struct Flash_kernel_traits {
-
-#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 800
-    using Element                      = elem_type;
-    static constexpr bool Has_cp_async = true;
-#else
-    using Element                      = cutlass::half_t;
-    static constexpr bool Has_cp_async = false;
-#endif
-
-    using ElementAccum = float;
-    using index_t      = uint32_t;
-
-#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 800
-    using MMA_Atom_Arch = std::conditional_t<std::is_same_v<elem_type, cutlass::half_t>,
-                                             MMA_Atom<SM80_16x8x16_F32F16F16F32_TN>,
-                                             MMA_Atom<SM80_16x8x16_F32BF16BF16F32_TN>>;
-    using ValLayoutMNK  = Layout<Shape<_1, _2, _1>>;
-#else
-    using MMA_Atom_Arch                = MMA_Atom<SM75_16x8x8_F32F16F16F32_TN>;
-    using ValLayoutMNK                 = Layout<Shape<_1, _2, _2>>;
-#endif
-
-#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 750
-    using SmemCopyAtom           = Copy_Atom<SM75_U32x4_LDSM_N, elem_type>;
-    using SmemCopyAtomTransposed = Copy_Atom<SM75_U16x8_LDSM_T, elem_type>;
-#else
-    using SmemCopyAtom                 = Copy_Atom<DefaultCopy, elem_type>;
-    using SmemCopyAtomTransposed       = Copy_Atom<DefaultCopy, elem_type>;
-#endif
-};
-
-// If Share_Q_K_smem is true, that forces Is_Q_in_regs to be true
-template<int  kHeadDim_,
-         int  kBlockM_,
-         int  kBlockN_,
-         int  kNWarps_,
-         bool Is_Q_in_regs_   = false,
-         bool Share_Q_K_smem_ = false,
-         typename elem_type   = cutlass::half_t,
-         typename Base        = Flash_kernel_traits<kHeadDim_, kBlockM_, kBlockN_, kNWarps_, elem_type>>
-struct Flash_fwd_kernel_traits: public Base {
-    using Element                      = typename Base::Element;
-    using ElementAccum                 = typename Base::ElementAccum;
-    using index_t                      = typename Base::index_t;
-    static constexpr bool Has_cp_async = Base::Has_cp_async;
-    using SmemCopyAtom                 = typename Base::SmemCopyAtom;
-    using SmemCopyAtomTransposed       = typename Base::SmemCopyAtomTransposed;
-
-    static constexpr bool Share_Q_K_smem = Share_Q_K_smem_;
-    static constexpr bool Is_Q_in_regs   = Is_Q_in_regs_ || Share_Q_K_smem;
-
-    // The number of threads.
-    static constexpr int kNWarps   = kNWarps_;
-    static constexpr int kNThreads = kNWarps * 32;
-
-    static constexpr int kBlockM  = kBlockM_;
-    static constexpr int kBlockN  = kBlockN_;
-    static constexpr int kHeadDim = kHeadDim_;
-    static_assert(kHeadDim % 32 == 0);
-    static constexpr int kBlockKSmem = kHeadDim % 64 == 0 ? 64 : 32;
-    static constexpr int kBlockKGmem = kHeadDim % 128 == 0 ? 128 : (kHeadDim % 64 == 0 ? 64 : 32);
-    static constexpr int kSwizzle    = kBlockKSmem == 32 ? 2 : 3;
-
-    using TiledMma = TiledMMA<typename Base::MMA_Atom_Arch,
-                              Layout<Shape<Int<kNWarps>, _1, _1>>,  // 4x1x1 or 8x1x1 thread group
-                              typename Base::ValLayoutMNK>;  // 1x2x1 or 1x2x2 value group for 16x16x16 MMA and LDSM
-
-    using SmemLayoutAtomQ =
-        decltype(composition(Swizzle<kSwizzle, 3, 3>{},
-                             // This has to be kBlockKSmem, using kHeadDim gives wrong results for d=128
-                             Layout<Shape<_8, Int<kBlockKSmem>>, Stride<Int<kBlockKSmem>, _1>>{}));
-    using SmemLayoutQ = decltype(tile_to_shape(SmemLayoutAtomQ{}, Shape<Int<kBlockM>, Int<kHeadDim>>{}));
-
-    using SmemLayoutKV = decltype(tile_to_shape(SmemLayoutAtomQ{}, Shape<Int<kBlockN>, Int<kHeadDim>>{}));
-
-    // This has to be kBlockN and not 8, otherwise we get wrong results for d=128
-    using SmemLayoutAtomVtransposedNoSwizzle =
-        Layout<Shape<Int<kBlockKSmem>, Int<kBlockN>>, Stride<_1, Int<kBlockKSmem>>>;
-    using SmemLayoutAtomVtransposed =
-        decltype(composition(Swizzle<kSwizzle, 3, 3>{}, SmemLayoutAtomVtransposedNoSwizzle{}));
-    using SmemLayoutVtransposed =
-        decltype(tile_to_shape(SmemLayoutAtomVtransposed{}, Shape<Int<kHeadDim>, Int<kBlockN>>{}));
-    // Maybe the VtransposeNoSwizzle just needs to have the right shape
-    // And the strides don't matter?
-    using SmemLayoutVtransposedNoSwizzle =
-        decltype(tile_to_shape(SmemLayoutAtomVtransposedNoSwizzle{}, Shape<Int<kHeadDim>, Int<kBlockN>>{}));
-    // using SmemLayoutVtransposedNoSwizzle = decltype(SmemLayoutVtransposed{}.layout_fn());
-
-    using SmemLayoutAtomO = decltype(composition(
-        Swizzle<kSwizzle, 3, 3>{}, Layout<Shape<Int<8>, Int<kBlockKSmem>>, Stride<Int<kBlockKSmem>, _1>>{}));
-    using SmemLayoutO     = decltype(tile_to_shape(SmemLayoutAtomO{}, Shape<Int<kBlockM>, Int<kHeadDim>>{}));
-    using SmemCopyAtomO   = Copy_Atom<DefaultCopy, elem_type>;
-
-    static constexpr int kSmemQCount  = size(SmemLayoutQ{});
-    static constexpr int kSmemKVCount = size(SmemLayoutKV{}) * 2;
-    static constexpr int kSmemQSize   = kSmemQCount * sizeof(Element);
-    static constexpr int kSmemKVSize  = kSmemKVCount * sizeof(Element);
-    static constexpr int kSmemSize    = Share_Q_K_smem ? std::max(kSmemQSize, kSmemKVSize) : kSmemQSize + kSmemKVSize;
-
-    static constexpr int kGmemElemsPerLoad = sizeof(cute::uint128_t) / sizeof(Element);
-    static_assert(kHeadDim % kGmemElemsPerLoad == 0, "kHeadDim must be a multiple of kGmemElemsPerLoad");
-    // Using kBlockKSmem here is 6-10% faster than kBlockKGmem for d=128 because of bank conflicts.
-    // For example, for d=128, smem is split into 2 "pages", each page takes care of columns
-    // 0-63 and 64-127. If we have 16 threads per row for gmem read, when we write to smem,
-    // thread 0 - 7 will write to the first page and thread 8 - 15 will write to the second page,
-    // to the same banks.
-    static constexpr int kGmemThreadsPerRow = kBlockKSmem / kGmemElemsPerLoad;
-    static_assert(kNThreads % kGmemThreadsPerRow == 0, "kNThreads must be a multiple of kGmemThreadsPerRow");
-    using GmemLayoutAtom = Layout<Shape<Int<kNThreads / kGmemThreadsPerRow>, Int<kGmemThreadsPerRow>>,
-                                  Stride<Int<kGmemThreadsPerRow>, _1>>;
-
-    // We use CACHEGLOBAL instead of CACHEALWAYS for both Q and K/V, since we won't be reading
-    // from the same address by the same threadblock. This is slightly faster.
-    using Gmem_copy_struct = std::conditional_t<Has_cp_async, SM80_CP_ASYNC_CACHEGLOBAL<cute::uint128_t>, DefaultCopy>;
-    using GmemTiledCopyQKV = decltype(make_tiled_copy(Copy_Atom<Gmem_copy_struct, elem_type>{},
-                                                      GmemLayoutAtom{},
-                                                      Layout<Shape<_1, _8>>{}));  // Val layout, 8 vals per read
-    using GmemTiledCopyO   = decltype(make_tiled_copy(Copy_Atom<DefaultCopy, elem_type>{},
-                                                    GmemLayoutAtom{},
-                                                    Layout<Shape<_1, _8>>{}));  // Val layout, 8 vals per store
-    static constexpr int kGmemThreadsPerRowP = kBlockN / kGmemElemsPerLoad;
-    static_assert(kNThreads % kGmemThreadsPerRowP == 0, "kNThreads must be a multiple of kGmemThreadsPerRowP");
-    using GmemLayoutAtomP = Layout<Shape<Int<kNThreads / kGmemThreadsPerRowP>, Int<kGmemThreadsPerRowP>>,
-                                   Stride<Int<kGmemThreadsPerRowP>, _1>>;
-
-    using GmemTiledCopyP = decltype(make_tiled_copy(Copy_Atom<DefaultCopy, elem_type>{},
-                                                    GmemLayoutAtomP{},
-                                                    Layout<Shape<_1, _8>>{}));  // Val layout, 8 vals per store
-};
diff --git a/src/turbomind/models/llama/flash_attention2/softmax.h b/src/turbomind/models/llama/flash_attention2/softmax.h
deleted file mode 100644
index f742f4f450820b5a7a273e009998d40b0244e9dd..0000000000000000000000000000000000000000
--- a/src/turbomind/models/llama/flash_attention2/softmax.h
+++ /dev/null
@@ -1,159 +0,0 @@
-/******************************************************************************
- * Copyright (c) 2023, Tri Dao.
- ******************************************************************************/
-
-#pragma once
-
-#include <cmath>
-
-#include <cute/tensor.hpp>
-
-#include <cutlass/array.h>
-#include <cutlass/cutlass.h>
-
-#include "utils.h"
-
-namespace flash {
-
-template<bool zero_init = true,
-         typename Engine0,
-         typename Layout0,
-         typename Engine1,
-         typename Layout1,
-         typename Operator>
-__device__ inline void
-thread_reduce_(Tensor<Engine0, Layout0> const& tensor, Tensor<Engine1, Layout1>& summary, Operator& op)
-{
-    static_assert(Layout0::rank == 2, "Only support 2D Tensor");
-    static_assert(Layout1::rank == 1, "Only support 1D Tensor");
-    CUTE_STATIC_ASSERT_V(size<0>(summary) == size<0>(tensor));
-#pragma unroll
-    for (int mi = 0; mi < size<0>(tensor); mi++) {
-        summary(mi) = zero_init ? tensor(mi, 0) : op(summary(mi), tensor(mi, 0));
-#pragma unroll
-        for (int ni = 1; ni < size<1>(tensor); ni++) {
-            summary(mi) = op(summary(mi), tensor(mi, ni));
-        }
-    }
-}
-
-template<typename Engine0, typename Layout0, typename Engine1, typename Layout1, typename Operator>
-__device__ inline void quad_allreduce_(Tensor<Engine0, Layout0>& dst, Tensor<Engine1, Layout1>& src, Operator& op)
-{
-    CUTE_STATIC_ASSERT_V(size(dst) == size(src));
-#pragma unroll
-    for (int i = 0; i < size(dst); i++) {
-        dst(i) = Allreduce<4>::run(src(i), op);
-    }
-}
-
-template<bool zero_init = true,
-         typename Engine0,
-         typename Layout0,
-         typename Engine1,
-         typename Layout1,
-         typename Operator>
-__device__ inline void reduce_(Tensor<Engine0, Layout0> const& tensor, Tensor<Engine1, Layout1>& summary, Operator& op)
-{
-    thread_reduce_<zero_init>(tensor, summary, op);
-    quad_allreduce_(summary, summary, op);
-}
-
-template<bool zero_init = true, typename Engine0, typename Layout0, typename Engine1, typename Layout1>
-__device__ inline void reduce_max(Tensor<Engine0, Layout0> const& tensor, Tensor<Engine1, Layout1>& max)
-{
-    MaxOp<float> max_op;
-    reduce_<zero_init>(tensor, max, max_op);
-}
-
-template<typename Engine0, typename Layout0, typename Engine1, typename Layout1>
-__device__ inline void reduce_sum(Tensor<Engine0, Layout0> const& tensor, Tensor<Engine1, Layout1>& sum)
-{
-    SumOp<float> sum_op;
-    reduce_(tensor, sum, sum_op);
-}
-
-// Apply the exp to all the elements.
-template<bool Scale_max = true, typename Engine0, typename Layout0, typename Engine1, typename Layout1>
-inline __device__ void
-scale_apply_exp2(Tensor<Engine0, Layout0>& tensor, Tensor<Engine1, Layout1> const& max, const float scale)
-{
-    static_assert(Layout0::rank == 2, "Only support 2D Tensor");
-    static_assert(Layout1::rank == 1, "Only support 1D Tensor");
-    CUTE_STATIC_ASSERT_V(size<0>(max) == size<0>(tensor));
-#pragma unroll
-    for (int mi = 0; mi < size<0>(tensor); ++mi) {
-        // If max is -inf, then all elements must have been -inf (possibly due to masking).
-        // We don't want (-inf - (-inf)) since that would give NaN.
-        // If we don't have float around M_LOG2E the multiplication is done in fp64.
-        const float max_scaled = max(mi) == -INFINITY ? 0.f : max(mi) * (Scale_max ? scale : float(M_LOG2E));
-#pragma unroll
-        for (int ni = 0; ni < size<1>(tensor); ++ni) {
-            // Instead of computing exp(x - max), we compute exp2(x * log_2(e) -
-            // max * log_2(e)) This allows the compiler to use the ffma
-            // instruction instead of fadd and fmul separately.
-            tensor(mi, ni) = exp2f(tensor(mi, ni) * scale - max_scaled);
-        }
-    }
-}
-
-using namespace cute;
-template<typename Engine, typename Layout>
-inline __device__ void apply_mask(Tensor<Engine, Layout>& tensor, const int max_seqlen_k, const int col_idx_offset_ = 0)
-{
-    // tensor has shape (ncol=(2, MMA_M), nrow=(2, MMA_N))
-    static_assert(Layout::rank == 2, "Only support 2D Tensor");
-    const int lane_id        = threadIdx.x % 32;
-    const int col_idx_offset = col_idx_offset_ + (lane_id % 4) * 2;
-#pragma unroll
-    for (int nj = 0; nj < size<1, 1>(tensor); ++nj) {
-        const int col_idx_base = col_idx_offset + nj * 8;
-#pragma unroll
-        for (int j = 0; j < size<1, 0>(tensor); ++j) {
-            const int col_idx = col_idx_base + j;
-            if (col_idx >= max_seqlen_k) {
-// Without the "make_coord" we get wrong results
-#pragma unroll
-                for (int mi = 0; mi < size<0>(tensor); ++mi) {
-                    tensor(mi, make_coord(j, nj)) = -INFINITY;
-                }
-            }
-        }
-    }
-}
-
-template<typename Engine, typename Layout>
-inline __device__ void apply_mask_causal(Tensor<Engine, Layout>& tensor,
-                                         const uint32_t          col_idx_offset_,
-                                         const uint32_t          max_seqlen_k,
-                                         const uint32_t          row_idx_offset_,
-                                         const uint32_t          warp_row_stride)
-{
-    // tensor has shape (ncol=(2, MMA_M), nrow=(2, MMA_N))
-    static_assert(Layout::rank == 2, "Only support 2D Tensor");
-    const uint32_t lane_id = threadIdx.x % 32;
-    // const uint32_t row_idx_offset = row_idx_offset_ + lane_id / 4;
-    const uint32_t row_idx_offset = row_idx_offset_;
-    const uint32_t col_idx_offset = col_idx_offset_ + (lane_id % 4) * 2;
-#pragma unroll
-    for (int mi = 0; mi < size<0, 1>(tensor); ++mi) {
-        const uint32_t row_idx_base = row_idx_offset + mi * warp_row_stride;
-#pragma unroll
-        for (int i = 0; i < size<0, 0>(tensor); ++i) {
-            const uint32_t row_idx       = row_idx_base + i * 8;
-            const uint32_t col_idx_limit = std::min(max_seqlen_k, row_idx + 1);
-#pragma unroll
-            for (int nj = 0; nj < size<1, 1>(tensor); ++nj) {
-                const uint32_t col_idx_base = col_idx_offset + nj * 8;
-#pragma unroll
-                for (int j = 0; j < size<1, 0>(tensor); ++j) {
-                    const uint32_t col_idx = col_idx_base + j;
-                    if (col_idx >= col_idx_limit) {
-                        tensor(make_coord(i, mi), make_coord(j, nj)) = -INFINITY;
-                    }
-                }
-            }
-        }
-    }
-}
-}  // namespace flash
diff --git a/src/turbomind/models/llama/flash_attention2/static_switch.h b/src/turbomind/models/llama/flash_attention2/static_switch.h
deleted file mode 100644
index bf4a9195ea5c7b78617a1e7ee2836d0396e84fee..0000000000000000000000000000000000000000
--- a/src/turbomind/models/llama/flash_attention2/static_switch.h
+++ /dev/null
@@ -1,59 +0,0 @@
-// Inspired by
-// https://github.com/NVIDIA/DALI/blob/main/include/dali/core/static_switch.h
-// and https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/Dispatch.h
-
-#pragma once
-
-/// @param COND       - a boolean expression to switch by
-/// @param CONST_NAME - a name given for the constexpr bool variable.
-/// @param ...       - code to execute for true and false
-///
-/// Usage:
-/// ```
-/// BOOL_SWITCH(flag, BoolConst, [&] {
-///     some_function<BoolConst>(...);
-/// });
-/// ```
-#define BOOL_SWITCH(COND, CONST_NAME, ...)                                                                             \
-    [&] {                                                                                                              \
-        if (COND) {                                                                                                    \
-            constexpr static bool CONST_NAME = true;                                                                   \
-            return __VA_ARGS__();                                                                                      \
-        }                                                                                                              \
-        else {                                                                                                         \
-            constexpr static bool CONST_NAME = false;                                                                  \
-            return __VA_ARGS__();                                                                                      \
-        }                                                                                                              \
-    }()
-
-#define FP16_SWITCH(COND, ...)                                                                                         \
-    [&] {                                                                                                              \
-        if (COND) {                                                                                                    \
-            using elem_type = cutlass::half_t;                                                                         \
-            return __VA_ARGS__();                                                                                      \
-        }                                                                                                              \
-        else {                                                                                                         \
-            using elem_type = cutlass::bfloat16_t;                                                                     \
-            return __VA_ARGS__();                                                                                      \
-        }                                                                                                              \
-    }()
-
-#define FWD_HEADDIM_SWITCH(HEADDIM, ...)                                                                               \
-    [&] {                                                                                                              \
-        if (HEADDIM <= 32) {                                                                                           \
-            constexpr static int kHeadDim = 32;                                                                        \
-            return __VA_ARGS__();                                                                                      \
-        }                                                                                                              \
-        else if (HEADDIM <= 64) {                                                                                      \
-            constexpr static int kHeadDim = 64;                                                                        \
-            return __VA_ARGS__();                                                                                      \
-        }                                                                                                              \
-        else if (HEADDIM <= 128) {                                                                                     \
-            constexpr static int kHeadDim = 128;                                                                       \
-            return __VA_ARGS__();                                                                                      \
-        }                                                                                                              \
-        else if (HEADDIM <= 256) {                                                                                     \
-            constexpr static int kHeadDim = 256;                                                                       \
-            return __VA_ARGS__();                                                                                      \
-        }                                                                                                              \
-    }()
diff --git a/src/turbomind/models/llama/flash_attention2/utils.h b/src/turbomind/models/llama/flash_attention2/utils.h
deleted file mode 100644
index ca4525f9cccacc348eb44eb70fb4d6021f5e50ad..0000000000000000000000000000000000000000
--- a/src/turbomind/models/llama/flash_attention2/utils.h
+++ /dev/null
@@ -1,271 +0,0 @@
-/******************************************************************************
- * Copyright (c) 2023, Tri Dao.
- ******************************************************************************/
-
-#pragma once
-
-#include <assert.h>
-#include <stdint.h>
-#include <stdlib.h>
-
-#include <cuda_fp16.h>
-
-#include <cute/algorithm/copy.hpp>
-#include <cute/algorithm/gemm.hpp>
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-namespace cute {
-
-}  // namespace cute
-
-namespace flash {
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-template<bool A_in_regs = false,
-         bool B_in_regs = false,
-         typename Tensor0,
-         typename Tensor1,
-         typename Tensor2,
-         typename Tensor3,
-         typename Tensor4,
-         typename TiledMma,
-         typename TiledCopyA,
-         typename TiledCopyB,
-         typename ThrCopyA,
-         typename ThrCopyB>
-inline __device__ void gemm(Tensor0&       acc,
-                            Tensor1&       tCrA,
-                            Tensor2&       tCrB,
-                            Tensor3 const& tCsA,
-                            Tensor4 const& tCsB,
-                            TiledMma       tiled_mma,
-                            TiledCopyA     smem_tiled_copy_A,
-                            TiledCopyB     smem_tiled_copy_B,
-                            ThrCopyA       smem_thr_copy_A,
-                            ThrCopyB       smem_thr_copy_B)
-{
-    CUTE_STATIC_ASSERT_V(size<1>(tCrA) == size<1>(acc));   // MMA_M
-    CUTE_STATIC_ASSERT_V(size<1>(tCrB) == size<2>(acc));   // MMA_N
-    CUTE_STATIC_ASSERT_V(size<2>(tCrA) == size<2>(tCrB));  // MMA_K
-    Tensor tCrA_copy_view = smem_thr_copy_A.retile_D(tCrA);
-    CUTE_STATIC_ASSERT_V(size<1>(tCsA) == size<1>(tCrA_copy_view));  // M
-    Tensor tCrB_copy_view = smem_thr_copy_B.retile_D(tCrB);
-    CUTE_STATIC_ASSERT_V(size<1>(tCsB) == size<1>(tCrB_copy_view));  // N
-    if (!A_in_regs) {
-        cute::copy(smem_tiled_copy_A, tCsA(_, _, _0{}), tCrA_copy_view(_, _, _0{}));
-    }
-    if (!B_in_regs) {
-        cute::copy(smem_tiled_copy_B, tCsB(_, _, _0{}), tCrB_copy_view(_, _, _0{}));
-    }
-#pragma unroll
-    for (int i = 0; i < size<2>(tCrA); ++i) {
-        if (i < size<2>(tCrA) - 1) {
-            if (!A_in_regs) {
-                cute::copy(smem_tiled_copy_A, tCsA(_, _, i + 1), tCrA_copy_view(_, _, i + 1));
-            }
-            if (!B_in_regs) {
-                cute::copy(smem_tiled_copy_B, tCsB(_, _, i + 1), tCrB_copy_view(_, _, i + 1));
-            }
-        }
-        cute::gemm(tiled_mma, tCrA(_, _, i), tCrB(_, _, i), acc);
-    }
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-template<typename Tensor0,
-         typename Tensor1,
-         typename Tensor2,
-         typename Tensor3,
-         typename TiledMma,
-         typename TiledCopy,
-         typename ThrCopy>
-inline __device__ void gemm_A_in_regs(Tensor0&       acc,
-                                      Tensor1&       tCrA,
-                                      Tensor2&       tCrB,
-                                      Tensor3 const& tCsB,
-                                      TiledMma       tiled_mma,
-                                      TiledCopy      smem_tiled_copy_B,
-                                      ThrCopy        smem_thr_copy_B)
-{
-    CUTE_STATIC_ASSERT_V(size<1>(tCrA) == size<1>(acc));   // MMA_M
-    CUTE_STATIC_ASSERT_V(size<1>(tCrB) == size<2>(acc));   // MMA_N
-    CUTE_STATIC_ASSERT_V(size<2>(tCrA) == size<2>(tCrB));  // MMA_K
-    Tensor tCrB_copy_view = smem_thr_copy_B.retile_D(tCrB);
-    CUTE_STATIC_ASSERT_V(size<1>(tCsB) == size<1>(tCrB_copy_view));  // N
-    cute::copy(smem_tiled_copy_B, tCsB(_, _, _0{}), tCrB_copy_view(_, _, _0{}));
-#pragma unroll
-    for (int i = 0; i < size<2>(tCrA); ++i) {
-        if (i < size<2>(tCrA) - 1) {
-            cute::copy(smem_tiled_copy_B, tCsB(_, _, i + 1), tCrB_copy_view(_, _, i + 1));
-        }
-        cute::gemm(tiled_mma, tCrA(_, _, i), tCrB(_, _, i), acc);
-    }
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-// Blocks until all but N previous cp.async.commit_group operations have committed.
-// This differs from cute::cp_async_wait in that when N = 0 we don't call cp.async.wait_all
-// (which is equivalent to commit_group then wait_group 0).
-// Instead we just call cp.async.wait_group 0, which is slightly faster.
-// https://github.com/NVIDIA/cutlass/blob/master/include/cute/arch/copy_sm80.hpp#L113
-template<int N>
-CUTE_HOST_DEVICE void cp_async_wait()
-{
-#if defined(CUTE_ARCH_CP_ASYNC_SM80_ENABLED)
-    asm volatile("cp.async.wait_group %0;\n" ::"n"(N));
-#endif
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-template<bool Is_even_MN   = true,
-         bool Is_even_K    = true,
-         bool Clear_OOB_MN = false,
-         bool Clear_OOB_K  = true,
-         typename TiledCopy,
-         typename Engine0,
-         typename Layout0,
-         typename Engine1,
-         typename Layout1,
-         typename Engine2,
-         typename Layout2,
-         typename Engine3,
-         typename Layout3>
-inline __device__ void copy(TiledCopy                       thr_copy,
-                            Tensor<Engine0, Layout0> const& S,
-                            Tensor<Engine1, Layout1>&       D,
-                            Tensor<Engine2, Layout2> const& identity_MN,
-                            Tensor<Engine3, Layout3> const& predicate_K,
-                            int                             max_MN = 0)
-{
-    CUTE_STATIC_ASSERT_V(rank(S) == Int<3>{});
-    CUTE_STATIC_ASSERT_V(rank(D) == Int<3>{});
-    CUTE_STATIC_ASSERT_V(size<0>(S) == size<0>(D));  // MMA
-    CUTE_STATIC_ASSERT_V(size<1>(S) == size<1>(D));  // MMA_M
-    CUTE_STATIC_ASSERT_V(size<2>(S) == size<2>(D));  // MMA_K
-    // There's no case where !Clear_OOB_K && Clear_OOB_MN
-    static_assert(!(Clear_OOB_MN && !Clear_OOB_K));
-#pragma unroll
-    for (int m = 0; m < size<1>(S); ++m) {
-        if (Is_even_MN || get<0>(identity_MN(0, m, 0)) < max_MN) {
-#pragma unroll
-            for (int k = 0; k < size<2>(S); ++k) {
-                if (Is_even_K || predicate_K(k)) {
-                    copy(thr_copy, S(_, m, k), D(_, m, k));
-                }
-                else if (Clear_OOB_K) {
-                    clear(D(_, m, k));
-                }
-            }
-        }
-        else if (Clear_OOB_MN) {
-            clear(D(_, m, _));
-        }
-    }
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-template<typename T>
-struct MaxOp {
-    __device__ inline T operator()(T const& x, T const& y)
-    {
-        return x > y ? x : y;
-    }
-};
-
-template<>
-struct MaxOp<float> {
-    // This is slightly faster
-    __device__ inline float operator()(float const& x, float const& y)
-    {
-        return max(x, y);
-    }
-};
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-template<typename T>
-struct SumOp {
-    __device__ inline T operator()(T const& x, T const& y)
-    {
-        return x + y;
-    }
-};
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-template<int THREADS>
-struct Allreduce {
-    static_assert(THREADS == 32 || THREADS == 16 || THREADS == 8 || THREADS == 4);
-    template<typename T, typename Operator>
-    static __device__ inline T run(T x, Operator& op)
-    {
-        constexpr int OFFSET = THREADS / 2;
-        x                    = op(x, __shfl_xor_sync(uint32_t(-1), x, OFFSET));
-        return Allreduce<OFFSET>::run(x, op);
-    }
-};
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-template<>
-struct Allreduce<2> {
-    template<typename T, typename Operator>
-    static __device__ inline T run(T x, Operator& op)
-    {
-        x = op(x, __shfl_xor_sync(uint32_t(-1), x, 1));
-        return x;
-    }
-};
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-// Convert acc_layout from (MMA=4, MMA_M, MMA_N) to (nrow=(2, MMA_M), ncol=(2, MMA_N))
-template<typename Layout>
-inline __device__ auto convert_layout_acc_rowcol(Layout acc_layout)
-{
-    static_assert(decltype(size<0>(acc_layout))::value == 4);
-    static_assert(decltype(rank(acc_layout))::value == 3);
-    auto l = logical_divide(acc_layout, Shape<_2>{});  // ((2, 2), MMA_M, MMA_N)
-    return make_layout(make_layout(get<0, 1>(l), get<1>(l)), make_layout(get<0, 0>(l), get<2>(l)));
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-// Convert rowcol_layout from (nrow=(2, MMA_M), ncol=(2, MMA_N)) to ((2, 2, 2), MMA_M, MMA_N / 2)
-// if using m16n8k16, or to ((2, 2, 1), MMA_M, MMA_N) if using m16n8k8.
-template<typename MMA_traits, typename Layout>
-inline __device__ auto convert_layout_rowcol_Aregs(Layout rowcol_layout)
-{
-    using X = Underscore;
-    static_assert(decltype(size<0, 0>(rowcol_layout))::value == 2);
-    static_assert(decltype(size<1, 0>(rowcol_layout))::value == 2);
-    constexpr int mma_shape_K = get<2>(typename MMA_traits::Shape_MNK{});
-    static_assert(mma_shape_K == 8 || mma_shape_K == 16);
-    constexpr int MMA_N_divisor = mma_shape_K == 8 ? 1 : 2;
-    auto          l =
-        logical_divide(rowcol_layout, Shape<X, Shape<X, Int<MMA_N_divisor>>>{});  // ((2, MMA_M), (2, (2, MMA_N / 2)))
-    return make_layout(make_layout(get<1, 0>(l), get<0, 0>(l), get<1, 1, 0>(l)), get<0, 1>(l), get<1, 1, 1>(l));
-};
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-template<typename To_type, typename Engine, typename Layout>
-inline __device__ auto convert_type(Tensor<Engine, Layout> const& tensor)
-{
-    using From_type                                                 = typename Engine::value_type;
-    constexpr int                                             numel = decltype(size(tensor))::value;
-    cutlass::NumericArrayConverter<To_type, From_type, numel> convert_op;
-    // HACK: this requires tensor to be "contiguous"
-    auto frag = convert_op(*reinterpret_cast<const cutlass::Array<From_type, numel>*>(tensor.data()));
-    return make_tensor(make_rmem_ptr<To_type>(&frag), tensor.layout());
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-}  // namespace flash
diff --git a/src/turbomind/models/llama/fused_multi_head_attention/CMakeLists.txt b/src/turbomind/models/llama/fused_multi_head_attention/CMakeLists.txt
deleted file mode 100644
index b423a20d71ed1387e46610262f94c2277eb5fc11..0000000000000000000000000000000000000000
--- a/src/turbomind/models/llama/fused_multi_head_attention/CMakeLists.txt
+++ /dev/null
@@ -1,8 +0,0 @@
-
-cmake_minimum_required(VERSION 3.8)
-
-add_library(llama_fmha STATIC llama_flash_attention_kernel.cu)
-target_include_directories(llama_fmha PRIVATE ${CUTLASS_DIR}/examples)
-target_link_libraries(llama_fmha PRIVATE nvidia::cutlass::cutlass)
-set_property(TARGET llama_fmha PROPERTY POSITION_INDEPENDENT_CODE  ON)
-set_property(TARGET llama_fmha PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS  ON)
diff --git a/src/turbomind/models/llama/fused_multi_head_attention/llama_flash_attention_kernel.cu b/src/turbomind/models/llama/fused_multi_head_attention/llama_flash_attention_kernel.cu
deleted file mode 100644
index 4fae69bd08c365795c438a5653eeb2ade7d4fd89..0000000000000000000000000000000000000000
--- a/src/turbomind/models/llama/fused_multi_head_attention/llama_flash_attention_kernel.cu
+++ /dev/null
@@ -1,913 +0,0 @@
-#include "src/turbomind/models/llama/llama_kernels.h"
-#include "src/turbomind/utils/cuda_utils.h"
-
-#include "41_fused_multi_head_attention/kernel_forward.h"
-#include <cuda_fp16.h>
-#include <cutlass/arch/arch.h>
-#include <cutlass/gemm/gemm.h>
-#include <cutlass/half.h>
-#include <cutlass/platform/platform.h>
-
-// modified from:
-// https://github.com/NVIDIA/cutlass/blob/main/examples/41_fused_multi_head_attention/kernel_forward.h
-
-namespace turbomind {
-
-template<
-    // dtype of Q/K/V/M
-    typename Element_,
-    typename ArchTag,
-    int kQueriesPerBlock,
-    int kKeysPerBlock_,
-    int kSingleValueIteration_ = false>
-struct LlamaAttentionKernel:
-    AttentionKernel<Element_,
-                    ArchTag,
-                    true,  // isAligned_
-                    kQueriesPerBlock,
-                    kKeysPerBlock_,
-                    kSingleValueIteration_  // kSingleValueIteration_
-                    > {
-    using Base = AttentionKernel<Element_,
-                                 ArchTag,
-                                 true,  // isAligned_
-                                 kQueriesPerBlock,
-                                 kKeysPerBlock_,
-                                 kSingleValueIteration_  // kSingleValueIteration_
-                                 >;
-
-    using scalar_t                                      = typename Base::scalar_t;
-    using accum_t                                       = typename Base::accum_t;
-    using output_t                                      = typename Base::output_t;
-    using output_accum_t                                = typename Base::output_accum_t;
-    using BaseParams                                    = typename Base::Params;
-    static constexpr auto kSingleValueIteration         = kSingleValueIteration_;
-    static constexpr bool kSupportsBias                 = true;
-    static constexpr auto kKeysPerBlock                 = kKeysPerBlock_;
-    static constexpr auto kNumThreads                   = Base::kNumThreads;
-    static constexpr auto kMinBlocksPerSm               = Base::kMinBlocksPerSm;
-    static constexpr auto kNumWarpsPerBlock             = Base::kNumWarpsPerBlock;
-    static constexpr auto kWarpSize                     = Base::kWarpSize;
-    static constexpr auto kKeepOutputInRF               = Base::kKeepOutputInRF;
-    static constexpr bool kNeedsOutputAccumulatorBuffer = Base::kNeedsOutputAccumulatorBuffer;
-    static constexpr auto kAlignLSE                     = Base::kAlignLSE;
-    static constexpr auto kPreloadV                     = Base::kPreloadV;
-    static constexpr auto kAlignmentQ                   = Base::kAlignmentQ;
-    static constexpr auto kAlignmentK                   = Base::kAlignmentK;
-    static constexpr auto kAlignmentV                   = Base::kAlignmentV;
-
-    struct Params: BaseParams {
-        scalar_t* attn_bias_ptr;
-        int32_t   bias_strideM;
-        int32_t   bias_strideH;
-        int32_t   bias_strideB;
-
-        bool q_use_seqlens = false;
-        bool o_use_seqlens = false;
-
-        scalar_t** q_batch_seqs_ptr = nullptr;
-        scalar_t** k_batch_seqs_ptr = nullptr;
-        scalar_t** v_batch_seqs_ptr = nullptr;
-        output_t** o_batch_seqs_ptr = nullptr;
-
-        size_t q_batch_seqs_offset = 0;
-        size_t k_batch_seqs_offset = 0;
-        size_t v_batch_seqs_offset = 0;
-        size_t o_batch_seqs_offset = 0;
-
-        int32_t group_size = 1;
-
-        float scale;
-
-        template<typename ptr_t>
-        CUTLASS_DEVICE void
-        update_batched_ptr(ptr_t& data_ptr, ptr_t* batch_seq_ptr, size_t batch_seq_offset, int batch_id, int strideB)
-        {
-            if (batch_seq_ptr != nullptr)
-                data_ptr = batch_seq_ptr[batch_id] + batch_seq_offset;
-            else
-                data_ptr += batch_id * strideB;
-        }
-
-        CUTLASS_DEVICE bool advance_to_block()
-        {
-
-            auto& query_ptr        = BaseParams::query_ptr;
-            auto& key_ptr          = BaseParams::key_ptr;
-            auto& value_ptr        = BaseParams::value_ptr;
-            auto& cu_seqlens_q_ptr = BaseParams::seqstart_q_ptr;
-            auto& cu_seqlens_k_ptr = BaseParams::seqstart_k_ptr;
-
-            auto& output_ptr       = BaseParams::output_ptr;
-            auto& output_accum_ptr = BaseParams::output_accum_ptr;
-            auto& logsumexp_ptr    = BaseParams::logsumexp_ptr;
-
-            auto& head_dim       = BaseParams::head_dim;
-            auto& head_dim_value = BaseParams::head_dim_value;
-            auto& num_queries    = BaseParams::num_queries;
-            auto& num_keys       = BaseParams::num_keys;
-
-            auto& q_strideM = BaseParams::q_strideM;
-            auto& k_strideM = BaseParams::k_strideM;
-            auto& v_strideM = BaseParams::v_strideM;
-            auto& o_strideM = BaseParams::o_strideM;
-
-            // Everything below is only used in `advance_to_block`
-            // and shouldn't use registers
-            auto& q_strideH   = BaseParams::q_strideH;
-            auto& k_strideH   = BaseParams::k_strideH;
-            auto& v_strideH   = BaseParams::v_strideH;
-            auto& q_strideB   = BaseParams::q_strideB;
-            auto& k_strideB   = BaseParams::k_strideB;
-            auto& v_strideB   = BaseParams::v_strideB;
-            auto& num_batches = BaseParams::num_batches;
-            auto& num_heads   = BaseParams::num_heads;
-
-            auto batch_id    = blockIdx.z;
-            auto head_id     = blockIdx.y;
-            auto query_start = blockIdx.x * kQueriesPerBlock;
-
-            auto o_strideB = o_strideM * num_queries;
-
-            auto lse_dim = ceil_div((int32_t)num_queries, kAlignLSE) * kAlignLSE;
-
-            int64_t q_start, k_start;
-
-            if (kSupportsBias && attn_bias_ptr != nullptr) {
-                attn_bias_ptr += (batch_id * bias_strideB) + (head_id * bias_strideH);
-                attn_bias_ptr = warp_uniform(attn_bias_ptr);
-            }
-
-            // Advance to current batch - in case of different sequence lengths
-            int qq_start, qo_start;
-            if (cu_seqlens_q_ptr != nullptr) {
-                cu_seqlens_q_ptr += batch_id;
-                q_start              = cu_seqlens_q_ptr[0];
-                int64_t q_next_start = cu_seqlens_q_ptr[1];
-                num_queries          = q_next_start - q_start;
-
-                if (query_start >= num_queries) {
-                    return false;
-                }
-                if (!q_use_seqlens) {
-                    update_batched_ptr(query_ptr, q_batch_seqs_ptr, q_batch_seqs_offset, batch_id, q_strideB);
-                    qq_start = 0;
-                }
-                else {
-                    qq_start = q_start;
-                }
-                if (!o_use_seqlens) {
-                    update_batched_ptr(output_ptr, o_batch_seqs_ptr, o_batch_seqs_offset, batch_id, o_strideB);
-                    qo_start = 0;
-                }
-                else {
-                    qo_start = q_start;
-                }
-            }
-            else {
-                update_batched_ptr(query_ptr, q_batch_seqs_ptr, q_batch_seqs_offset, batch_id, q_strideB);
-                update_batched_ptr(output_ptr, o_batch_seqs_ptr, o_batch_seqs_offset, batch_id, o_strideB);
-                if (output_accum_ptr != nullptr) {
-                    output_accum_ptr += batch_id * o_strideB;
-                }
-                q_start  = 0;
-                qq_start = qo_start = q_start;
-            }
-
-            if (cu_seqlens_k_ptr != nullptr) {
-                cu_seqlens_k_ptr += batch_id;
-                k_start              = cu_seqlens_k_ptr[0];
-                int64_t k_next_start = cu_seqlens_k_ptr[1];
-                num_keys             = k_next_start - k_start;
-            }
-            else {
-                update_batched_ptr(key_ptr, k_batch_seqs_ptr, k_batch_seqs_offset, batch_id, k_strideB);
-                update_batched_ptr(value_ptr, v_batch_seqs_ptr, v_batch_seqs_offset, batch_id, v_strideB);
-                k_start = 0;
-            }
-
-            // Advance to the current batch / head / query_start
-            query_ptr += (qq_start + query_start) * q_strideM + head_id * q_strideH;
-            key_ptr += k_start * k_strideM + int64_t(head_id / group_size) * k_strideH;
-            value_ptr += k_start * v_strideM + int64_t(head_id / group_size) * v_strideH;
-            output_ptr += int64_t(qo_start + query_start) * o_strideM + head_id * head_dim_value;
-
-            if (output_accum_ptr != nullptr) {
-                output_accum_ptr += int64_t(query_start) * o_strideM + head_id * head_dim_value;
-            }
-            else {
-                // Accumulate directly in the destination buffer (eg for f32)
-                output_accum_ptr = (accum_t*)output_ptr;
-            }
-            if (logsumexp_ptr != nullptr) {
-                // lse[batch_id, head_id, query_start]
-                logsumexp_ptr += batch_id * lse_dim * num_heads + head_id * lse_dim + query_start;
-            }
-
-            num_queries -= query_start;
-            num_batches = 0;  // no longer used after
-
-            // Make sure the compiler knows these variables are the same on all
-            // the threads of the warp.
-            query_ptr        = warp_uniform(query_ptr);
-            key_ptr          = warp_uniform(key_ptr);
-            value_ptr        = warp_uniform(value_ptr);
-            output_ptr       = warp_uniform(output_ptr);
-            output_accum_ptr = warp_uniform(output_accum_ptr);
-            logsumexp_ptr    = warp_uniform(logsumexp_ptr);
-            num_queries      = warp_uniform(num_queries);
-            num_keys         = warp_uniform(num_keys);
-            head_dim         = warp_uniform(head_dim);
-            head_dim_value   = warp_uniform(head_dim_value);
-            return true;
-        }
-    };
-
-    using MM0                             = typename Base::MM0;
-    using MM1                             = typename Base::MM1;
-    using BaseSharedStorageEpilogueAtEnd  = typename Base::SharedStorageEpilogueAtEnd;
-    using BaseSharedStorageEpilogueInLoop = typename Base::SharedStorageEpilogueInLoop;
-
-    // TODO: find a way to optimize non aligned bias
-    using BiasLoader = TileSmemLoader<scalar_t,
-                                      cutlass::MatrixShape<kQueriesPerBlock, kKeysPerBlock>,
-                                      MM0::MmaCore::kThreads,
-                                      // input restriction: kv_len has to be a multiple of this value
-                                      1>;  // 1 per load. unless bias is aligned.
-
-    using AccumLambdaIterator =
-        typename DefaultMmaAccumLambdaIterator<typename MM0::Mma::Operator::IteratorC, accum_t, kWarpSize>::Iterator;
-
-    struct SharedStorageEpilogueAtEnd: BaseSharedStorageEpilogueAtEnd {
-        struct SharedStorageAfterMM0 {
-            // Everything here might be overwritten during MM0
-            union {
-                typename BiasLoader::SmemTile          bias;
-                typename MM0::AccumulatorSharedStorage si;
-            };
-            typename MM1::SharedStorageMM1 mm1;
-        };
-
-        union {
-            typename MM0::Mma::SharedStorage             mm0;
-            SharedStorageAfterMM0                        after_mm0;
-            typename MM1::DefaultEpilogue::SharedStorage epilogue;
-        };
-    };
-
-    struct SharedStorageEpilogueInLoop: BaseSharedStorageEpilogueInLoop {
-        struct SharedStorageAfterMM0 {
-            // Everything here might be overwritten during MM0
-            union {
-                typename BiasLoader::SmemTile          bias;
-                typename MM0::AccumulatorSharedStorage si;
-            };
-            typename MM1::SharedStorageMM1               mm1;
-            typename MM1::DefaultEpilogue::SharedStorage epilogue;
-        };
-
-        union {
-            typename MM0::Mma::SharedStorage mm0;
-            SharedStorageAfterMM0            after_mm0;
-        };
-    };
-
-    using SharedStorage = typename cutlass::platform::conditional<kSingleValueIteration || kKeepOutputInRF,
-                                                                  SharedStorageEpilogueAtEnd,
-                                                                  SharedStorageEpilogueInLoop>::type;
-
-    static bool __host__ check_supported(Params const& p)
-    {
-        if (kSupportsBias) {
-            CHECK_ALIGNED_PTR(p.attn_bias_ptr, kAlignmentQ);
-            XFORMERS_CHECK(p.num_heads <= 1 || p.bias_strideH % kAlignmentQ == 0,
-                           "attn_bias is not correctly aligned (strideH)");
-        }
-        return Base::check_supported(p);
-    }
-
-    static void CUTLASS_DEVICE attention_kernel(Params& p)
-    {
-
-        // In this block, we will only ever:
-        // - read query[query_start:query_end, :]
-        // - write to output[query_start:query_end, :]
-
-        extern __shared__ char smem_buffer[];
-        SharedStorage&         shared_storage = *((SharedStorage*)smem_buffer);
-        auto&                  m_prime        = shared_storage.m_prime;
-        auto&                  s_prime        = shared_storage.s_prime;
-        auto&                  si             = shared_storage.after_mm0.si;
-        auto&                  mi             = shared_storage.mi;
-        const uint32_t         query_start    = blockIdx.x * kQueriesPerBlock;
-
-        static_assert(kQueriesPerBlock < kNumWarpsPerBlock * kWarpSize, "");
-        if (thread_id() < kQueriesPerBlock) {
-            s_prime[thread_id()] = accum_t(0);
-            m_prime[thread_id()] = -cutlass::platform::numeric_limits<accum_t>::infinity();
-            mi[thread_id()]      = -cutlass::platform::numeric_limits<accum_t>::infinity();
-        }
-        typename MM1::Mma::FragmentC accum_o;
-        accum_o.clear();
-
-        auto createOutputIter = [&](int col) -> typename MM1::OutputTileIterator {
-            using OutputTileIterator = typename MM1::OutputTileIterator;
-            return OutputTileIterator(typename OutputTileIterator::Params{(int32_t)p.o_strideM},
-                                      p.output_ptr,
-                                      typename OutputTileIterator::TensorCoord{p.num_queries, p.head_dim_value},
-                                      thread_id(),
-                                      {0, col});
-        };
-
-        auto createOutputAccumIter = [&](int col) -> typename MM1::OutputTileIteratorAccum {
-            using OutputTileIteratorAccum = typename MM1::OutputTileIteratorAccum;
-            return OutputTileIteratorAccum(
-                typename OutputTileIteratorAccum::Params{(int32_t)p.o_strideM},
-                p.output_accum_ptr,
-                typename OutputTileIteratorAccum::TensorCoord{p.num_queries, p.head_dim_value},
-                thread_id(),
-                {0, col});
-        };
-
-        // Iterate through keys
-        for (int32_t iter_key_start = 0; iter_key_start < p.num_keys; iter_key_start += kKeysPerBlock) {
-            int32_t        problem_size_0_m = cutlass::fast_min((int32_t)kQueriesPerBlock, p.num_queries);
-            int32_t        problem_size_0_n = cutlass::fast_min(int32_t(kKeysPerBlock), p.num_keys - iter_key_start);
-            int32_t const& problem_size_0_k = p.head_dim;
-            int32_t const& problem_size_1_n = p.head_dim_value;
-            int32_t const& problem_size_1_k = problem_size_0_n;
-
-            auto prologueV = [&](int blockN) {
-                typename MM1::Mma::IteratorB iterator_V(typename MM1::IteratorB::Params{MM1::LayoutB(p.v_strideM)},
-                                                        p.value_ptr + iter_key_start * p.v_strideM,
-                                                        {problem_size_1_k, problem_size_1_n},
-                                                        thread_id(),
-                                                        cutlass::MatrixCoord{0, blockN * MM1::Mma::Shape::kN});
-                MM1::Mma::prologue(shared_storage.after_mm0.mm1.mm, iterator_V, thread_id(), problem_size_1_k);
-            };
-
-            __syncthreads();  // Need to have shared memory initialized, and `m_prime`
-                              // updated from end of prev iter
-
-            // MATMUL: Q.K_t
-            //
-            // Computes the block-matrix product of:
-            // (a) query[query_start:query_end, :]
-            // with
-            // (b) key[iter_key_start:iter_key_start + kKeysPerBlock]
-            // and stores that into `shared_storage.si`
-            //
-
-            // Compute threadblock location
-            cutlass::gemm::GemmCoord tb_tile_offset = {0, 0, 0};
-
-            cutlass::MatrixCoord tb_offset_A{tb_tile_offset.m() * MM0::Mma::Shape::kM, tb_tile_offset.k()};
-
-            cutlass::MatrixCoord tb_offset_B{tb_tile_offset.k(), tb_tile_offset.n() * MM0::Mma::Shape::kN};
-
-            // Construct iterators to A and B operands
-            typename MM0::IteratorA iterator_A(
-                typename MM0::IteratorA::Params(typename MM0::MmaCore::LayoutA(p.q_strideM)),
-                p.query_ptr,
-                {problem_size_0_m, problem_size_0_k},
-                thread_id(),
-                tb_offset_A);
-
-            typename MM0::IteratorB iterator_B(
-                typename MM0::IteratorB::Params(typename MM0::MmaCore::LayoutB(p.k_strideM)),
-                p.key_ptr + iter_key_start * p.k_strideM,
-                {problem_size_0_k, problem_size_0_n},
-                thread_id(),
-                tb_offset_B);
-
-            auto my_warp_id = warp_id();
-            auto my_lane_id = lane_id();
-
-            // Construct thread-scoped matrix multiply
-            typename MM0::Mma mma(shared_storage.mm0, thread_id(), my_warp_id, my_lane_id);
-
-            typename MM0::Mma::FragmentC accum;
-
-            accum.clear();
-
-            auto gemm_k_iterations = (problem_size_0_k + MM0::Mma::Shape::kK - 1) / MM0::Mma::Shape::kK;
-
-            // Compute threadblock-scoped matrix multiply-add
-            mma(gemm_k_iterations, accum, iterator_A, iterator_B, accum);
-            __syncthreads();
-
-            if (kPreloadV) {
-                prologueV(0);
-            }
-
-            typename MM0::Mma::Operator::IteratorC::TensorCoord iteratorC_tile_offset = {
-                (tb_tile_offset.m() * MM0::Mma::WarpCount::kM) + (my_warp_id % MM0::Mma::WarpCount::kM),
-                (tb_tile_offset.n() * MM0::Mma::WarpCount::kN) + (my_warp_id / MM0::Mma::WarpCount::kM)};
-
-            // multiply by scaling factor
-            if (kSupportsBias) {
-                accum = cutlass::multiplies<typename MM0::Mma::FragmentC>()(p.scale, accum);
-            }
-
-            // apply attention bias if applicable
-            if (kSupportsBias && p.attn_bias_ptr != nullptr) {
-                // load bias tile Bij into shared memory
-                typename BiasLoader::GmemTileIterator bias_iter(
-                    {cutlass::layout::RowMajor(p.bias_strideM)},
-                    // attn_bias_pointer points to matrix of size (n_queries, n_keys)
-                    // for the relevant batch_id and head_id
-                    p.attn_bias_ptr + query_start * p.bias_strideM + iter_key_start,
-                    {problem_size_0_m, problem_size_0_n},
-                    thread_id());
-                cutlass::TensorRef<scalar_t, cutlass::layout::RowMajor> bias_tensor_ref(
-                    shared_storage.after_mm0.bias.data(), cutlass::layout::RowMajor(MM0::ThreadblockShape::kN));
-                typename BiasLoader::SmemTileIterator smem_tile_iter(bias_tensor_ref, thread_id());
-                BiasLoader::load(bias_iter, smem_tile_iter);
-
-                // Pij += Bij, Pij is in register fragment and Bij is in shared memory
-                auto lane_offset = AccumLambdaIterator::get_lane_offset(lane_id(), warp_id(), iteratorC_tile_offset);
-                AccumLambdaIterator::iterateRows(
-                    lane_offset,
-                    [&](int accum_m) {},
-                    [&](int accum_m, int accum_n, int idx) {
-                        if (accum_m < problem_size_0_m && accum_n < problem_size_0_n) {
-                            accum[idx] += (1.0f - bias_tensor_ref.at({accum_m, accum_n})) * -1e5f;
-                        }
-                    },
-                    [&](int accum_m) {});
-            }
-
-            DISPATCH_BOOL(iter_key_start == 0, kIsFirst, ([&] {
-                              DISPATCH_BOOL(
-                                  p.num_keys - iter_key_start >= kKeysPerBlock, kFullColumns, ([&] {
-                                      // Update `mi` from accum stored in registers
-                                      // Also updates `accum` with accum[i] <-
-                                      // exp(accum[i] * scale
-                                      // - mi)
-                                      Base::iterative_softmax<MM0::Mma::Operator::IteratorC, kFullColumns, kIsFirst>(
-                                          accum_o,
-                                          accum,
-                                          mi,
-                                          m_prime,
-                                          s_prime,
-                                          lane_id(),
-                                          thread_id(),
-                                          warp_id(),
-                                          p.num_keys - iter_key_start,
-                                          iteratorC_tile_offset,
-                                          kSupportsBias ? 1.0f : p.scale);
-                                  }));
-                          }));
-
-            // Output results to shared-memory
-            int  warp_idx_mn_0      = my_warp_id % (MM0::Mma::Base::WarpCount::kM * MM0::Mma::Base::WarpCount::kN);
-            auto output_tile_coords = cutlass::MatrixCoord{warp_idx_mn_0 % MM0::Mma::Base::WarpCount::kM,
-                                                           warp_idx_mn_0 / MM0::Mma::Base::WarpCount::kM};
-
-            MM0::B2bGemm::accumToSmem(shared_storage.after_mm0.si, accum, my_lane_id, output_tile_coords);
-
-            __syncthreads();
-
-            //
-            // MATMUL: Attn . V
-            // Run the matmul `attn @ V` for a block of attn and V.
-            // `attn` is read from shared memory (in `shared_storage_si`)
-            // `V` is read from global memory (with iterator_B)
-            //
-
-            const int64_t nBlockN =
-                kSingleValueIteration ? 1 : ceil_div((int64_t)problem_size_1_n, int64_t(MM1::ThreadblockShape::kN));
-            for (int blockN = 0; blockN < nBlockN; ++blockN) {
-                int gemm_k_iterations = (problem_size_1_k + MM1::Mma::Shape::kK - 1) / MM1::Mma::Shape::kK;
-
-                // Compute threadblock-scoped matrix multiply-add and store it in accum
-                // (in registers)
-                if (!kPreloadV) {
-                    __syncthreads();  // we share shmem between mma and epilogue
-                }
-
-                typename MM1::Mma::IteratorB iterator_V(typename MM1::IteratorB::Params{MM1::LayoutB(p.v_strideM)},
-                                                        p.value_ptr + iter_key_start * p.v_strideM,
-                                                        {problem_size_1_k, problem_size_1_n},
-                                                        thread_id(),
-                                                        cutlass::MatrixCoord{0, blockN * MM1::Mma::Shape::kN});
-                typename MM1::Mma            mma_pv(shared_storage.after_mm0.mm1.mm,
-                                         shared_storage.after_mm0.si,
-                                         (int)thread_id(),
-                                         (int)warp_id(),
-                                         (int)lane_id(),
-                                         (int)problem_size_1_k);
-                mma_pv.set_prologue_done(kPreloadV);
-                if (!kKeepOutputInRF) {
-                    accum_o.clear();
-                }
-                mma_pv(gemm_k_iterations, accum_o, iterator_V, accum_o);
-                __syncthreads();
-
-                if (kPreloadV && !kSingleValueIteration && blockN + 1 < nBlockN) {
-                    prologueV(blockN + 1);
-                }
-
-                if (!kKeepOutputInRF) {
-                    DISPATCH_BOOL(
-                        iter_key_start == 0, kIsFirst, ([&] {
-                            DISPATCH_BOOL(
-                                (iter_key_start + kKeysPerBlock) >= p.num_keys, kIsLast, ([&] {
-                                    using DefaultEpilogue = typename MM1::DefaultEpilogue;
-                                    using DefaultOp       = typename MM1::DefaultConfig::EpilogueOutputOp;
-                                    using ElementCompute  = typename DefaultOp::ElementCompute;
-                                    using EpilogueOutputOp =
-                                        typename cutlass::epilogue::thread::MemoryEfficientAttentionNormalize<
-                                            typename cutlass::platform::conditional<kIsLast, output_t, output_accum_t>::
-                                                type,
-                                            output_accum_t,
-                                            DefaultOp::kCount,
-                                            typename DefaultOp::ElementAccumulator,
-                                            ElementCompute,
-                                            kIsFirst,
-                                            kIsLast,
-                                            cutlass::Array<ElementCompute, kQueriesPerBlock>>;
-                                    using Epilogue = typename cutlass::epilogue::threadblock::EpiloguePipelined<
-                                        typename DefaultEpilogue::Shape,
-                                        typename MM1::Mma::Operator,
-                                        DefaultEpilogue::kPartitionsK,
-                                        typename cutlass::platform::conditional<
-                                            kIsLast,
-                                            typename MM1::OutputTileIterator,
-                                            typename MM1::OutputTileIteratorAccum>::type,
-                                        typename DefaultEpilogue::AccumulatorFragmentIterator,
-                                        typename DefaultEpilogue::WarpTileIterator,
-                                        typename DefaultEpilogue::SharedLoadIterator,
-                                        EpilogueOutputOp,
-                                        typename DefaultEpilogue::Padding,
-                                        DefaultEpilogue::kFragmentsPerIteration,
-                                        true,                                  // IterationsUnroll
-                                        typename MM1::OutputTileIteratorAccum  // Read
-                                                                               // iterator
-                                        >;
-
-                                    int  col         = blockN * MM1::Mma::Shape::kN;
-                                    auto source_iter = createOutputAccumIter(col);
-                                    auto dest_iter =
-                                        call_conditional<kIsLast,
-                                                         decltype(createOutputIter),
-                                                         decltype(createOutputAccumIter)>::apply(createOutputIter,
-                                                                                                 createOutputAccumIter,
-                                                                                                 col);
-                                    EpilogueOutputOp rescale(s_prime, m_prime);
-                                    Epilogue         epilogue(
-                                        shared_storage.epilogue_shared_storage(), thread_id(), warp_id(), lane_id());
-                                    epilogue(rescale, dest_iter, accum_o, source_iter);
-                                }));
-                        }));
-                    if (!kSingleValueIteration) {
-                        __syncthreads();
-                    }
-                }
-            }
-            __syncthreads();  // we modify `m_prime` after
-        }
-
-        if (kKeepOutputInRF) {
-            constexpr bool kIsFirst = true;
-            constexpr bool kIsLast  = true;
-            using DefaultEpilogue   = typename MM1::DefaultEpilogue;
-            using DefaultOp         = typename MM1::DefaultConfig::EpilogueOutputOp;
-            using ElementCompute    = typename DefaultOp::ElementCompute;
-            using EpilogueOutputOp  = typename cutlass::epilogue::thread::MemoryEfficientAttentionNormalize<
-                output_t,        // output
-                output_accum_t,  // source
-                DefaultOp::kCount,
-                typename DefaultOp::ElementAccumulator,  // accum
-                output_accum_t,                          // compute
-                kIsFirst,
-                kIsLast,
-                cutlass::Array<ElementCompute, kQueriesPerBlock>>;
-            using Epilogue = typename cutlass::epilogue::threadblock::EpiloguePipelined<
-                typename DefaultEpilogue::Shape,
-                typename MM1::Mma::Operator,
-                DefaultEpilogue::kPartitionsK,
-                typename MM1::OutputTileIterator,  // destination
-                typename DefaultEpilogue::AccumulatorFragmentIterator,
-                typename DefaultEpilogue::WarpTileIterator,
-                typename DefaultEpilogue::SharedLoadIterator,
-                EpilogueOutputOp,
-                typename DefaultEpilogue::Padding,
-                DefaultEpilogue::kFragmentsPerIteration,
-                true,                                  // IterationsUnroll
-                typename MM1::OutputTileIteratorAccum  // source tile
-                >;
-            auto             dest_iter = createOutputIter(0);
-            EpilogueOutputOp rescale(s_prime, m_prime);
-            Epilogue         epilogue(shared_storage.epilogue_shared_storage(), thread_id(), warp_id(), lane_id());
-            epilogue(rescale, dest_iter, accum_o);
-        }
-
-        // 7. Calculate logsumexp
-        // To make the backward easier, we pad logsumexp with `inf`
-        // this avoids a few bound checks, and is not more expensive during fwd
-        static_assert(kQueriesPerBlock < kNumWarpsPerBlock * kWarpSize, "");
-        if (p.logsumexp_ptr && thread_id() < kQueriesPerBlock) {
-            auto lse_dim = ceil_div((int32_t)p.num_queries, kAlignLSE) * kAlignLSE;
-            if (thread_id() < p.num_queries) {
-                p.logsumexp_ptr[thread_id()] =
-                    accum_t(mi[thread_id()]) + cutlass::fast_log(accum_t(s_prime[thread_id()]));
-            }
-            else if (thread_id() < lse_dim) {
-                p.logsumexp_ptr[thread_id()] = cutlass::platform::numeric_limits<accum_t>::infinity();
-            }
-        }
-    }
-
-    static CUTLASS_DEVICE int8_t lane_id()
-    {
-        return Base::lane_id();
-    }
-    static CUTLASS_DEVICE int8_t warp_id()
-    {
-        return Base::warp_id();
-    }
-    static CUTLASS_DEVICE int16_t thread_id()
-    {
-        return Base::thread_id();
-    }
-};
-
-template<typename T, typename Attention>
-void invokeFlashAttention_impl(int                                          batch_size,
-                               int                                          head_num,
-                               int                                          key_len,
-                               int                                          seq_len,
-                               int                                          size_per_head,
-                               typename FlashAttentionOpImpl<T, 1>::Params& attention_params,
-                               cudaStream_t                                 st)
-{
-    T*     out_ptr          = attention_params.attn_out;
-    T*     query_ptr        = attention_params.query;
-    T*     key_ptr          = attention_params.key;
-    T*     value_ptr        = attention_params.val;
-    T*     mask_ptr         = attention_params.mask;
-    float* output_accum_ptr = attention_params.out_accum;
-    auto*  cu_seqlens_q_ptr = attention_params.cu_seqlens_q;
-    auto   layout_q         = attention_params.layout_q;
-    auto   layout_k         = attention_params.layout_k;
-    auto   layout_v         = attention_params.layout_v;
-    auto   layout_o         = attention_params.layout_o;
-    auto   group_size       = attention_params.group_size;
-
-    using scalar_t =
-        typename std::conditional_t<std::is_same<half, typename std::decay<T>::type>::value, cutlass::half_t, T>;
-
-    const float qk_scale = static_cast<float>(1.f / sqrtf(size_per_head * 1.f));
-
-    constexpr bool kNeedsOutputAccumulatorBuffer = Attention::kNeedsOutputAccumulatorBuffer;
-    if (kNeedsOutputAccumulatorBuffer) {
-        assert(output_accum_ptr != nullptr);
-    }
-
-    // fill param
-    typename Attention::Params params{};
-    {
-        params.query_ptr      = (scalar_t*)(query_ptr);
-        params.key_ptr        = (scalar_t*)(key_ptr);
-        params.value_ptr      = (scalar_t*)(value_ptr);
-        params.attn_bias_ptr  = (scalar_t*)(mask_ptr);
-        params.seqstart_q_ptr = cu_seqlens_q_ptr;
-
-        params.output_ptr       = (scalar_t*)(out_ptr);
-        params.output_accum_ptr = kNeedsOutputAccumulatorBuffer ? output_accum_ptr : nullptr;
-        params.logsumexp_ptr    = nullptr;
-
-        params.scale = qk_scale;
-
-        params.head_dim       = size_per_head;
-        params.head_dim_value = size_per_head;
-        params.num_queries    = seq_len;
-        params.num_keys       = key_len;
-
-        params.bias_strideH = 0;
-        params.bias_strideM = key_len;
-        params.bias_strideB = seq_len * params.bias_strideM;
-
-        params.q_strideH           = layout_q.stride_head;
-        params.q_strideM           = layout_q.stride_seq;
-        params.q_strideB           = layout_q.stride_batch;
-        params.q_use_seqlens       = layout_q.use_seqlens;
-        params.q_batch_seqs_ptr    = (scalar_t**)(layout_q.batch_seqs);
-        params.q_batch_seqs_offset = layout_q.batch_seqs_offset;
-
-        params.k_strideH           = layout_k.stride_head;
-        params.k_strideM           = layout_k.stride_seq;
-        params.k_strideB           = layout_k.stride_batch;
-        params.k_batch_seqs_ptr    = (scalar_t**)layout_k.batch_seqs;
-        params.k_batch_seqs_offset = layout_k.batch_seqs_offset;
-
-        params.v_strideH           = layout_v.stride_head;
-        params.v_strideM           = layout_v.stride_seq;
-        params.v_strideB           = layout_v.stride_batch;
-        params.v_batch_seqs_ptr    = (scalar_t**)layout_v.batch_seqs;
-        params.v_batch_seqs_offset = layout_v.batch_seqs_offset;
-
-        params.o_strideM           = layout_o.stride_seq;
-        params.o_use_seqlens       = layout_o.use_seqlens;
-        params.o_batch_seqs_ptr    = (scalar_t**)layout_o.batch_seqs;
-        params.o_batch_seqs_offset = layout_o.batch_seqs_offset;
-
-        params.num_batches = batch_size;
-        params.num_heads   = head_num;
-
-        params.group_size = int32_t(group_size);
-    }
-
-    Attention::check_supported(params);
-
-    // start kernel
-    auto block_grid  = params.getBlocksGrid();
-    auto thread_grid = params.getThreadsGrid();
-
-    int smem_bytes = sizeof(typename Attention::SharedStorage);
-    if (smem_bytes > 0xc000) {
-        cudaFuncSetAttribute(
-            attention_kernel_batched_impl<Attention>, cudaFuncAttributeMaxDynamicSharedMemorySize, smem_bytes);
-    }
-
-    attention_kernel_batched_impl<Attention><<<block_grid, thread_grid, smem_bytes, st>>>(params);
-}
-
-#define CUTLASS_ARCH(sm) cutlass::arch::Sm##sm
-
-#define ATTENTION_KERNEL(scalar_t, sm, querys_per_block, keys_per_block, single_value)                                 \
-    LlamaAttentionKernel<scalar_t, CUTLASS_ARCH(sm), querys_per_block, keys_per_block, single_value>
-
-template<typename T, int kQueriesPerBlock, int kKeysPerBlock>
-bool get_needs_accum_buffer()
-{
-    using scalar_t =
-        typename std::conditional_t<std::is_same<half, typename std::decay<T>::type>::value, cutlass::half_t, T>;
-
-#define GET_NEED_ACCUM_BUFFER(sm)                                                                                      \
-    ATTENTION_KERNEL(scalar_t, sm, kQueriesPerBlock, kKeysPerBlock, false)::kNeedsOutputAccumulatorBuffer
-
-    auto sm = getSMVersion();
-
-    switch (sm) {
-        case 75:
-            return GET_NEED_ACCUM_BUFFER(75);
-        default:
-            if (sm >= 80) {
-                return GET_NEED_ACCUM_BUFFER(80);
-            }
-            else {
-                return GET_NEED_ACCUM_BUFFER(70);
-            }
-    }
-#undef GET_NEED_ACCUM_BUFFER
-}
-
-template<typename T, int kQueriesPerBlock, int kKeysPerBlock>
-void invoke_attention_impl(bool                                         single_val_iteration,
-                           int                                          batch_size,
-                           int                                          head_num,
-                           int                                          key_len,
-                           int                                          seq_len,
-                           int                                          size_per_head,
-                           typename FlashAttentionOpImpl<T, 1>::Params& params,
-                           cudaStream_t                                 st)
-{
-    using scalar_t =
-        typename std::conditional_t<std::is_same<half, typename std::decay<T>::type>::value, cutlass::half_t, T>;
-
-#define INVOKE_ATTEN_IMPL(sm, single_value)                                                                            \
-    {                                                                                                                  \
-        using AttentionKernel = ATTENTION_KERNEL(scalar_t, sm, kQueriesPerBlock, kKeysPerBlock, single_value);         \
-        invokeFlashAttention_impl<T, AttentionKernel>(                                                                 \
-            batch_size, head_num, key_len, seq_len, size_per_head, params, st);                                        \
-    }
-
-#define INVOKE_ATTENN_IMPL_V2(sm)                                                                                      \
-    {                                                                                                                  \
-        if (single_val_iteration)                                                                                      \
-            INVOKE_ATTEN_IMPL(sm, true)                                                                                \
-        else                                                                                                           \
-            INVOKE_ATTEN_IMPL(sm, false)                                                                               \
-    }
-
-    auto sm = getSMVersion();
-    switch (sm) {
-        case 75:
-            INVOKE_ATTENN_IMPL_V2(75);
-            break;
-        default:
-            if (sm >= 80) {
-                INVOKE_ATTENN_IMPL_V2(80);
-            }
-            else {
-                INVOKE_ATTENN_IMPL_V2(70);
-            }
-    }
-
-#undef INVOKE_ATTENN_IMPL_V2
-#undef INVOKE_ATTEN_IMPL
-}
-
-template<typename T>
-class FlashAttentionOpImpl<T, 1> {
-
-public:
-    using AttentionLayout = BaseAttentionLayout<T>;
-    using Params          = BaseAttentionParams<T>;
-
-public:
-    FlashAttentionOpImpl(int batch_size, int head_num, int key_len, int seq_len, int size_per_head);
-    ~FlashAttentionOpImpl();
-
-    int get_workspace_size() const;
-
-    void operator()(Params& params, cudaStream_t st) const;
-
-private:
-    class impl;
-    std::unique_ptr<impl> pimpl;
-};
-
-template<typename T>
-class FlashAttentionOpImpl<T, 1>::impl {
-
-private:
-    static constexpr int kQueriesPerBlock = 32;
-    static constexpr int kKeysPerBlock    = 128;
-    using scalar_t =
-        typename std::conditional_t<std::is_same<half, typename std::decay<T>::type>::value, cutlass::half_t, T>;
-    using Params = typename FlashAttentionOpImpl<T, 1>::Params;
-
-    int  batch_size_;
-    int  head_num_;
-    int  key_len_;
-    int  seq_len_;
-    int  size_per_head_;
-    bool kSingleValueIteration;
-
-public:
-    impl(int batch_size, int head_num, int key_len, int seq_len, int size_per_head):
-        batch_size_(batch_size),
-        head_num_(head_num),
-        key_len_(key_len),
-        seq_len_(seq_len),
-        size_per_head_(size_per_head)
-    {
-        kSingleValueIteration = (size_per_head <= kKeysPerBlock);
-    }
-
-    ~impl() {}
-
-    int get_workspace_size() const
-    {
-        if (kSingleValueIteration) {
-            return 0;
-        }
-        else {
-            bool kNeedsOutputAccumulatorBuffer = get_needs_accum_buffer<T, kQueriesPerBlock, kKeysPerBlock>();
-            if (kNeedsOutputAccumulatorBuffer) {
-                return batch_size_ * head_num_ * seq_len_ * size_per_head_ * sizeof(float);
-            }
-            else {
-                return 0;
-            }
-        }
-    }
-
-    void operator()(Params& params, cudaStream_t st) const
-    {
-        invoke_attention_impl<T, kQueriesPerBlock, kKeysPerBlock>(
-            kSingleValueIteration, batch_size_, head_num_, key_len_, seq_len_, size_per_head_, params, st);
-    }
-};
-
-template<typename T>
-FlashAttentionOpImpl<T, 1>::FlashAttentionOpImpl(
-    int batch_size, int head_num, int key_len, int seq_len, int size_per_head):
-    pimpl{std::make_unique<FlashAttentionOpImpl<T, 1>::impl>(batch_size, head_num, key_len, seq_len, size_per_head)}
-{
-}
-
-template<typename T>
-FlashAttentionOpImpl<T, 1>::~FlashAttentionOpImpl()
-{
-}
-
-template<typename T>
-int FlashAttentionOpImpl<T, 1>::get_workspace_size() const
-{
-    return pimpl->get_workspace_size();
-}
-
-template<typename T>
-void FlashAttentionOpImpl<T, 1>::operator()(Params& params, cudaStream_t st) const
-{
-    pimpl->operator()(params, st);
-}
-
-template class FlashAttentionOpImpl<float, 1>;
-template class FlashAttentionOpImpl<half, 1>;
-
-}  // namespace turbomind
diff --git a/src/turbomind/models/llama/fused_multi_head_attention/mma_accum_lambda_iterator.h b/src/turbomind/models/llama/fused_multi_head_attention/mma_accum_lambda_iterator.h
deleted file mode 100644
index 3ddeb133953c4bb782f8ca92412fc2e195369faf..0000000000000000000000000000000000000000
--- a/src/turbomind/models/llama/fused_multi_head_attention/mma_accum_lambda_iterator.h
+++ /dev/null
@@ -1,309 +0,0 @@
-/***************************************************************************************************
- * Copyright (c) 2017 - 2023 NVIDIA CORPORATION & AFFILIATES. All rights
- *reserved. SPDX-License-Identifier: BSD-3-Clause
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice,
- *this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * 3. Neither the name of the copyright holdvr nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- *ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- *LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- *CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- *SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- *INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- *CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- *ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- *POSSIBILITY OF SUCH DAMAGE.
- *
- **************************************************************************************************/
-
-#pragma once
-
-#include "cutlass/functional.h"
-#include "cutlass/gemm/warp/mma_simt_tile_iterator.h"
-#include "cutlass/gemm/warp/mma_tensor_op_tile_iterator_sm70.h"
-#include "cutlass/gemm/warp/mma_tensor_op_tile_iterator_sm80.h"
-#include "cutlass/matrix_shape.h"
-
-/*
-TensorCores have different accumulator layouts.
-This file provides a class to easily map the accumulator
-i-th element with the corresponding matrix row/col.
-*/
-
-template<typename T, typename accum_t, int kWarpSize>
-struct AccumLambdaIteratorSm80 {
-    static_assert(cutlass::platform::is_same<typename T::Layout, cutlass::layout::RowMajor>::value,
-                  "only RowMajor is supported");
-
-    using Policy                        = typename T::Policy;
-    using InstructionShape              = typename T::InstructionShape;
-    using OpDelta                       = typename T::OpDelta;
-    using Shape                         = typename T::Shape;
-    static int const kElementsPerAccess = InstructionShape::kN / 4;
-    static int const kRowsPerTile       = 8;
-    static int const kAccumulatorRows   = InstructionShape::kM / kRowsPerTile;
-
-    static cutlass::MatrixCoord CUTLASS_DEVICE get_lane_offset(int8_t                         lane_id,
-                                                               int8_t                         warp_id,
-                                                               typename T::TensorCoord const& tile_offset)
-    {
-        int quad         = (lane_id >> 2);
-        int lane_in_quad = (lane_id & 3);
-        return cutlass::MatrixCoord(quad + tile_offset.row() * Shape::kRow,
-                                    lane_in_quad * kElementsPerAccess + tile_offset.column() * Shape::kColumn);
-    }
-
-    template<typename FA, typename FB, typename FC>
-    CUTLASS_DEVICE static void iterateRows(cutlass::MatrixCoord& lane_offset, FA beginRow, FB op, FC endRow)
-    {
-        // See cutlass/gemm/warp/mma_tensor_op_tile_iterator.h
-        CUTLASS_PRAGMA_UNROLL
-        for (int mma_m = 0; mma_m < Policy::MmaIterations::kRow; ++mma_m) {
-            CUTLASS_PRAGMA_UNROLL
-            for (int row = 0; row < kAccumulatorRows; ++row) {
-                int accum_m = mma_m * InstructionShape::kM * OpDelta::kRow + row * kRowsPerTile + lane_offset.row();
-                beginRow(accum_m);
-
-                CUTLASS_PRAGMA_UNROLL
-                for (int mma_n = 0; mma_n < Policy::MmaIterations::kColumn; ++mma_n) {
-                    int mma_accum_start =
-                        kAccumulatorRows * kElementsPerAccess * (mma_n * Policy::MmaIterations::kRow + mma_m);
-                    CUTLASS_PRAGMA_UNROLL
-                    for (int col = 0; col < kElementsPerAccess; ++col) {
-                        int accum_n = mma_n * InstructionShape::kN * OpDelta::kColumn + col + lane_offset.column();
-                        int idx     = mma_accum_start + row * kElementsPerAccess + col;
-                        op(accum_m, accum_n, idx);
-                    }
-                }
-
-                endRow(accum_m);
-            }
-        }
-    }
-
-    template<typename DT, typename F>
-    CUTLASS_DEVICE static bool reduceSameRow(int lane_id, DT& myValue, F fn)
-    {
-        // In each warp, 4 threads will work on the same row
-        // - the ones with the same `quad`
-        auto otherV      = __shfl_xor_sync(0xffffffff, myValue, 1);
-        myValue          = fn(myValue, otherV);
-        otherV           = __shfl_xor_sync(0xffffffff, myValue, 2);
-        myValue          = fn(myValue, otherV);
-        int lane_in_quad = (lane_id & 3);
-        return lane_in_quad == 0;
-    }
-};
-
-template<typename T, typename accum_t, int kWarpSize>
-struct AccumLambdaIteratorSm70 {
-    static_assert(cutlass::platform::is_same<typename T::Layout, cutlass::layout::RowMajor>::value,
-                  "only RowMajor is supported");
-
-    using Policy           = typename T::Policy;
-    using InstructionShape = typename T::InstructionShape;
-    using OpDelta          = typename T::OpDelta;
-    using Shape            = typename T::Shape;
-    using Element          = accum_t;
-
-    static int const kElementsPerPartial = 4;
-    using EleShapePerPatial = typename cutlass::platform::conditional<cutlass::platform::is_same<Element, float>::value,
-                                                                      cutlass::MatrixShape<2, 2>,
-                                                                      cutlass::MatrixShape<1, 4>>::type;
-    static int const kElementsPerMma     = 8;
-    static int const kAccumulatorPatials = 2;
-    using QuadShapePerPatialMma          = cutlass::MatrixShape<4, 4>;
-
-    static cutlass::MatrixCoord CUTLASS_DEVICE get_lane_offset(int8_t                         lane_id,
-                                                               int8_t                         warp_id,
-                                                               typename T::TensorCoord const& tile_offset)
-    {
-        int quad         = (lane_id >> 2);
-        int lane_in_quad = (lane_id & 3);
-        int accum_m, accum_n;
-
-        if (cutlass::platform::is_same<Element, float>::value) {
-            // (quad[2],quad[0])+lane_in_quad[0]
-            accum_m = (((quad & 0x4) >> 1) + (quad & 0x1)) * 8 + (lane_in_quad & 1);
-            // (quad[1])+lane_in_quad[1]
-            accum_n = ((quad >> 1) & 0x1) * kElementsPerPartial * kAccumulatorPatials + (lane_in_quad & 2);
-        }
-        else {
-            accum_m = (((quad & 0x4) >> 1) + (quad & 0x1)) * 8 + lane_in_quad;  // (quad[2],quad[0])
-            accum_n = ((quad >> 1) & 0x1) * kElementsPerPartial * kAccumulatorPatials;
-        }
-        return cutlass::MatrixCoord(accum_m + tile_offset.row() * Shape::kRow,
-                                    accum_n + tile_offset.column() * Shape::kColumn);
-    }
-
-    template<typename DT, typename F>
-    CUTLASS_DEVICE static bool reduceSameRow(int lane_id, DT& myValue, F fn)
-    {
-        static_assert(cutlass::platform::is_same<Element, float>::value, "update to support non-float accum");
-        // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#warp-level-matrix-fragment-mma-884-f16
-        // T0 & T2 share same line within a quad
-        auto otherV = __shfl_xor_sync(0xffffffff, myValue, 1 << 1);
-        myValue     = fn(myValue, otherV);
-        // quad 0 and quad 2 are on the same lines
-        otherV  = __shfl_xor_sync(0xffffffff, myValue, 1 << 3);
-        myValue = fn(myValue, otherV);
-        return (lane_id & ((1 << 1) | (1 << 3))) == 0;
-    }
-
-    template<typename FA, typename FB, typename FC>
-    CUTLASS_DEVICE static void iterateRows(cutlass::MatrixCoord& lane_offset, FA beginRow, FB op, FC endRow)
-    {
-        CUTLASS_PRAGMA_UNROLL
-        for (int tile_m = 0; tile_m < Policy::TileIterations::kRow; ++tile_m) {
-            CUTLASS_PRAGMA_UNROLL
-            for (int mma_m = 0; mma_m < Policy::MmaIterations::kRow; ++mma_m) {
-                CUTLASS_PRAGMA_UNROLL
-                for (int m = 0; m < EleShapePerPatial::kRow; ++m) {
-                    int accum_m = tile_m * Policy::InterleavedTile::kRow + mma_m * QuadShapePerPatialMma::kRow + m * 2
-                                  + lane_offset.row();
-                    beginRow(accum_m);
-
-                    CUTLASS_PRAGMA_UNROLL
-                    for (int tile_n = 0; tile_n < Policy::TileIterations::kColumn; ++tile_n) {
-                        CUTLASS_PRAGMA_UNROLL
-                        for (int mma_n = 0; mma_n < Policy::MmaIterations::kColumn; ++mma_n) {
-                            CUTLASS_PRAGMA_UNROLL
-                            for (int p = 0; p < kAccumulatorPatials; ++p) {
-                                CUTLASS_PRAGMA_UNROLL
-                                for (int n = 0; n < EleShapePerPatial::kColumn; ++n) {
-                                    int mma_accum_start = (((tile_n * Policy::TileIterations::kRow + tile_m)
-                                                                * Policy::MmaIterations::kColumn
-                                                            + mma_n)
-                                                               * Policy::MmaIterations::kRow
-                                                           + mma_m)
-                                                          * kElementsPerMma;
-                                    int accum_n = tile_n * Policy::InterleavedTile::kColumn
-                                                  + mma_n * QuadShapePerPatialMma::kColumn
-                                                  + p * Policy::InterleavedTile::kColumn / 2 + n + lane_offset.column();
-                                    int idx =
-                                        mma_accum_start + p * kElementsPerPartial + m * EleShapePerPatial::kColumn + n;
-                                    op(accum_m, accum_n, idx);
-                                }
-                            }
-                        }
-                    }
-                    endRow(accum_m);
-                }
-            }
-        }
-    }
-};
-
-template<typename T, typename accum_t, int kWarpSize>
-struct AccumLambdaIteratorSimt {
-    using Policy     = typename T::Policy;
-    using Iterations = typename T::Iterations;
-    using Element    = typename T::Element;
-    using Delta      = typename T::Delta;
-    using Shape      = typename T::Shape;
-    static_assert(cutlass::platform::is_same<typename T::Layout, cutlass::layout::RowMajor>::value,
-                  "only RowMajor is supported");
-
-    template<typename DT, typename F>
-    CUTLASS_DEVICE static bool reduceSameRow(int lane_id, DT& myValue, F fn)
-    {
-        CUTLASS_PRAGMA_UNROLL
-        for (int bit = 1; bit < Policy::WarpShape::kColumn; bit *= 2) {
-            auto otherV = __shfl_xor_sync(0xffffffff, myValue, bit);
-            myValue     = fn(myValue, otherV);
-        }
-        return (lane_id & (Policy::WarpShape::kColumn - 1)) == 0;
-    }
-
-    template<typename FA, typename FB, typename FC>
-    CUTLASS_DEVICE static void iterateRows(cutlass::MatrixCoord& lane_offset, FA beginRow, FB op, FC endRow)
-    {
-        CUTLASS_PRAGMA_UNROLL
-        for (int mma_m = 0; mma_m < Iterations::kRow; ++mma_m) {
-            CUTLASS_PRAGMA_UNROLL
-            for (int m = 0; m < Policy::LaneMmaShape::kM; ++m) {
-                int accum_m = mma_m * Delta::kRow + m + lane_offset.row();
-                beginRow(accum_m);
-
-                CUTLASS_PRAGMA_UNROLL
-                for (int mma_n = 0; mma_n < Iterations::kColumn; ++mma_n) {
-                    int accum_n = mma_n * Policy::WarpShape::kColumn * Policy::LaneMmaShape::kN + lane_offset.column();
-                    CUTLASS_PRAGMA_UNROLL
-                    for (int n = 0; n < Policy::LaneMmaShape::kN; ++n) {
-                        int idx = n
-                                  + Policy::LaneMmaShape::kN
-                                        * (mma_n + Iterations::kColumn * (m + mma_m * Policy::LaneMmaShape::kM));
-                        op(accum_m, accum_n + n, idx);
-                    }
-                }
-                endRow(accum_m);
-            }
-        }
-    }
-
-    static cutlass::MatrixCoord CUTLASS_DEVICE get_lane_offset(int8_t                         lane_id,
-                                                               int8_t                         warp_id,
-                                                               typename T::TensorCoord const& tile_offset)
-    {
-        static_assert(
-            cutlass::platform::is_same<typename Policy::LaneLayout, cutlass::layout::RowMajorInterleaved<1>>::value,
-            "");
-        typename Policy::LaneLayout lane_layout = Policy::get_lane_layout();
-
-        cutlass::MatrixCoord lane_offset =
-            lane_layout.inverse(lane_id) * cutlass::MatrixCoord(Policy::LaneMmaShape::kM, Policy::LaneMmaShape::kN);
-        return lane_offset + tile_offset * cutlass::MatrixCoord(Shape::kRow, Shape::kColumn);
-    }
-};
-
-template<typename T, typename accum_t, int kWarpSize>
-struct DefaultMmaAccumLambdaIterator;
-
-// Simt
-template<typename S, typename P, typename accum_t, int kWarpSize>
-struct DefaultMmaAccumLambdaIterator<
-    cutlass::gemm::warp::
-        MmaSimtTileIterator<S, cutlass::gemm::Operand::kC, accum_t, cutlass::layout::RowMajor, P, 1, 1>,
-    accum_t,
-    kWarpSize> {
-    using WarpIterator = typename cutlass::gemm::warp::
-        MmaSimtTileIterator<S, cutlass::gemm::Operand::kC, accum_t, cutlass::layout::RowMajor, P, 1, 1>;
-    using Iterator = AccumLambdaIteratorSimt<WarpIterator, accum_t, kWarpSize>;
-};
-
-// TensorOp - Volta
-template<typename S1, typename S2, typename accum_t, int kWarpSize>
-struct DefaultMmaAccumLambdaIterator<
-    cutlass::gemm::warp::
-        MmaVoltaTensorOpAccumulatorTileIterator<S1, accum_t, cutlass::layout::RowMajor, S2, cutlass::MatrixShape<1, 1>>,
-    accum_t,
-    kWarpSize> {
-    using WarpIterator = typename cutlass::gemm::warp::
-        MmaVoltaTensorOpAccumulatorTileIterator<S1, accum_t, cutlass::layout::RowMajor, S2, cutlass::MatrixShape<1, 1>>;
-    using Iterator = AccumLambdaIteratorSm70<WarpIterator, accum_t, kWarpSize>;
-};
-
-// TensorOp - Sm75+
-template<typename S1, typename S2, typename S3, typename accum_t, int kWarpSize>
-struct DefaultMmaAccumLambdaIterator<
-    cutlass::gemm::warp::MmaTensorOpAccumulatorTileIterator<S1, accum_t, cutlass::layout::RowMajor, S2, S3>,
-    accum_t,
-    kWarpSize> {
-    using WarpIterator = typename cutlass::gemm::warp::
-        MmaTensorOpAccumulatorTileIterator<S1, accum_t, cutlass::layout::RowMajor, S2, S3>;
-    using Iterator = AccumLambdaIteratorSm80<WarpIterator, accum_t, kWarpSize>;
-};
diff --git a/src/turbomind/models/llama/fused_multi_head_attention/tile_smem_loader.h b/src/turbomind/models/llama/fused_multi_head_attention/tile_smem_loader.h
deleted file mode 100644
index 070da1bb8e182f52cab1ca915d0247ea3ba42d05..0000000000000000000000000000000000000000
--- a/src/turbomind/models/llama/fused_multi_head_attention/tile_smem_loader.h
+++ /dev/null
@@ -1,84 +0,0 @@
-/***************************************************************************************************
- * Copyright (c) 2017 - 2023 NVIDIA CORPORATION & AFFILIATES. All rights
- *reserved. SPDX-License-Identifier: BSD-3-Clause
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice,
- *this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * 3. Neither the name of the copyright holdvr nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- *ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
- *LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- *CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- *SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- *INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- *CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- *ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- *POSSIBILITY OF SUCH DAMAGE.
- *
- **************************************************************************************************/
-
-#include <cutlass/cutlass.h>
-
-#include "cutlass/aligned_buffer.h"
-#include "cutlass/array.h"
-#include "cutlass/layout/matrix.h"
-#include "cutlass/layout/pitch_linear.h"
-#include "cutlass/numeric_types.h"
-#include "cutlass/transform/pitch_linear_thread_map.h"
-#include "cutlass/transform/threadblock/predicated_tile_iterator.h"
-#include "cutlass/transform/threadblock/regular_tile_iterator.h"
-
-template<typename scalar_t,              // scalar type
-         typename ThreadblockTileShape,  // size of tile to load
-         int Threads,                    // number of participating threads
-         int ElementsPerAccess>          // thread access width in elements
-class TileSmemLoader {
-public:
-    using SmemTile = cutlass::AlignedBuffer<scalar_t, ThreadblockTileShape::kCount>;
-
-    using ThreadMap = cutlass::transform::PitchLinearStripminedThreadMap<
-        cutlass::layout::PitchLinearShape<ThreadblockTileShape::kColumn,  // contiguous
-                                          ThreadblockTileShape::kRow>,    // strided
-        Threads,                                                          // Threads
-        ElementsPerAccess>;                                               // ElementsPerAccess
-
-    using GmemTileIterator =
-        cutlass::transform::threadblock::PredicatedTileIterator<ThreadblockTileShape,       // Shape
-                                                                scalar_t,                   // Element
-                                                                cutlass::layout::RowMajor,  // Layout
-                                                                0,                          // AdvanceRank
-                                                                ThreadMap>;                 // ThreadMap
-
-    using SmemTileIterator = cutlass::transform::threadblock::RegularTileIterator<ThreadblockTileShape,       // Shape
-                                                                                  scalar_t,                   // Element
-                                                                                  cutlass::layout::RowMajor,  // Layout
-                                                                                  0,           // AdvanceRank
-                                                                                  ThreadMap>;  // ThreadMap
-
-    using Fragment = typename GmemTileIterator::Fragment;
-
-    /// load a tile from global memory into shared memory
-    CUTLASS_DEVICE
-    static void load(GmemTileIterator tile_load_iter, SmemTileIterator tile_store_iter)
-    {
-        Fragment tb_frag;
-        tb_frag.clear();
-        tile_load_iter.load(tb_frag);
-        tile_store_iter.store(tb_frag);
-
-        __syncthreads();
-    }
-};
diff --git a/src/turbomind/models/llama/llama_decoder_kernels.cu b/src/turbomind/models/llama/llama_decoder_kernels.cu
deleted file mode 100644
index 0b7dd39569b2f5f0d1ecfd18587262e84d26d618..0000000000000000000000000000000000000000
--- a/src/turbomind/models/llama/llama_decoder_kernels.cu
+++ /dev/null
@@ -1,202 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved.
-
-#include "src/turbomind/macro.h"
-#include "src/turbomind/models/llama/llama_decoder_kernels.h"
-#include "src/turbomind/utils/cuda_utils.h"
-#include <cooperative_groups.h>
-// #include <cooperative_groups/reduce.h>
-#include <cuda_fp16.h>
-
-namespace cg = cooperative_groups;
-
-namespace turbomind {
-
-template<typename T>
-struct res_norm_ops_t {
-};
-
-template<typename T>
-struct res_norm_t {
-    res_norm_ops_t<T> f;
-    __device__ uint4  addvec(const uint4& a, const uint4& b, const uint4& bias, float& accum) const
-    {
-        uint4 c;
-        c.x = f.cast(f.add(f.cast(a.x), f.cast(b.x), f.cast(bias.x), accum));
-        c.y = f.cast(f.add(f.cast(a.y), f.cast(b.y), f.cast(bias.y), accum));
-        c.z = f.cast(f.add(f.cast(a.z), f.cast(b.z), f.cast(bias.z), accum));
-        c.w = f.cast(f.add(f.cast(a.w), f.cast(b.w), f.cast(bias.w), accum));
-        return c;
-    }
-    __device__ uint4 normvec(const uint4& u, const uint4& s, float factor) const
-    {
-        uint4 v;
-        v.x = f.cast(f.norm(f.cast(u.x), f.cast(s.x), factor));
-        v.y = f.cast(f.norm(f.cast(u.y), f.cast(s.y), factor));
-        v.z = f.cast(f.norm(f.cast(u.z), f.cast(s.z), factor));
-        v.w = f.cast(f.norm(f.cast(u.w), f.cast(s.w), factor));
-        return v;
-    }
-};
-
-template<>
-struct res_norm_ops_t<half> {
-    __device__ float2 cast(const uint& x) const
-    {
-        return __half22float2(reinterpret_cast<const half2&>(x));
-    }
-    __device__ uint cast(const float2& x) const
-    {
-        auto y = __float22half2_rn(x);
-        return reinterpret_cast<uint&>(y);
-    }
-    __device__ float2 add(const float2& a, const float2& b, const float2& bias, float& accum) const
-    {
-        float2 c{a.x + b.x + bias.x, a.y + b.y + bias.y};
-        accum += c.x * c.x + c.y * c.y;
-        return c;
-    }
-    __device__ float2 norm(const float2& a, const float2& s, float factor) const
-    {
-        return {a.x * s.x * factor, a.y * s.y * factor};
-    }
-};
-
-template<>
-struct res_norm_ops_t<float> {
-    __device__ float cast(const uint& x) const
-    {
-        return reinterpret_cast<const float&>(x);
-    }
-    __device__ uint cast(const float& x) const
-    {
-        return reinterpret_cast<const uint&>(x);
-    }
-    __device__ float add(const float& a, const float& b, const float& bias, float& accum) const
-    {
-        float c = a + b + bias;
-        accum += c * c;
-        return c;
-    }
-    __device__ float norm(const float& a, const float& s, float factor) const
-    {
-        return a * s * factor;
-    }
-};
-
-// template<typename T>
-// __device__ T blockReduceSum(const cg::thread_block& block, T value)
-// {
-//     __shared__ float partial[32];
-
-//     auto tile = cg::tiled_partition<32>(block);
-//     value     = cg::reduce(tile, value, cg::plus<float>{});
-
-//     if (tile.thread_rank() == 0) {
-//         partial[tile.meta_group_rank()] = value;
-//     }
-
-//     block.sync();
-
-//     value = tile.thread_rank() < tile.meta_group_size() ? partial[tile.thread_rank()] : T{};
-//     return cg::reduce(tile, value, cg::plus<float>{});
-// }
-#define WARPSIZE 64
-
-template<typename T>
-__inline__ __device__ T warpReduceSum_xiabo(T value)
-{
-#pragma unroll
-    for (int offset = WARPSIZE / 2; offset > 0; offset >>= 1)
-        value += __shfl_down_sync(0xffffffff, value, offset);
-    return value;
-}
-
-template<typename T>
-__inline__ __device__ T blockReduceSum_xiabo(T val)
-{
-    T sum = (T)(0.0f);
-    __shared__ T shared[WARPSIZE];
-    sum = warpReduceSum_xiabo(val);
-    __syncthreads();
-    int tid = threadIdx.x + threadIdx.y * blockDim.x;
-    if (tid % WARPSIZE == 0) {
-        shared[tid / WARPSIZE] = sum;
-    }
-    if (tid >= blockDim.x * blockDim.y / WARPSIZE && tid < WARPSIZE) {
-        shared[tid] = (T)(0.0f);
-    }
-    __syncthreads();
-    if (tid / WARPSIZE == 0) {
-        sum = warpReduceSum_xiabo(shared[tid]);
-        if (tid == 0) {
-            shared[0] = sum;
-        }
-    }
-    __syncthreads();
-    return shared[0];
-}
-
-template<typename T>
-__global__ void fusedAddBiasResidualNorm(T* __restrict__ r_data,
-                                         T* __restrict__ x_data,
-                                         const T* __restrict__ bias,
-                                         const T* __restrict__ scale,
-                                         float eps,
-                                         int   batch_size,
-                                         int   n_dims)
-{
-    auto block = cg::this_thread_block();
-    // auto grid  = cg::this_grid();
-
-    constexpr int PACK_DIM = sizeof(uint4) / sizeof(T);
-
-    const auto batch_idx            = block.group_index().x;
-    uint4* __restrict__ r_ptr       = reinterpret_cast<uint4*>(r_data + batch_idx * n_dims);
-    uint4* __restrict__ x_ptr       = reinterpret_cast<uint4*>(x_data + batch_idx * n_dims);
-    const uint4* __restrict__ b_ptr = reinterpret_cast<const uint4*>(bias);
-
-    res_norm_t<T> ops;
-
-    float thread_sum{};
-    for (auto i = block.thread_rank(); i < n_dims / PACK_DIM; i += block.size()) {
-        auto  r  = r_ptr[i];
-        auto  x  = x_ptr[i];
-        uint4 b  = b_ptr ? b_ptr[i] : uint4{};
-        r        = ops.addvec(r, x, b, thread_sum);
-        r_ptr[i] = r;
-    }
-
-    // auto total_sum = blockReduceSum(block, thread_sum);
-    auto total_sum = blockReduceSum_xiabo(thread_sum);
-
-    float s_inv_mean = rsqrt(total_sum / n_dims + eps);
-
-    const uint4* __restrict__ s_ptr = reinterpret_cast<const uint4*>(scale);
-    for (uint i = block.thread_rank(); i < n_dims / PACK_DIM; i += block.size()) {
-        auto r   = r_ptr[i];
-        auto s   = s_ptr[i];
-        auto o   = ops.normvec(r, s, s_inv_mean);
-        x_ptr[i] = o;
-    }
-}
-
-template<typename T>
-void invokeFusedAddBiasResidualRMSNorm(
-    T* residual, T* in_out, const T* bias, const T* scale, float eps, int batch_size, int n_dims, cudaStream_t stream)
-{
-    constexpr int PACK_DIM = sizeof(uint4) / sizeof(T);
-    FT_CHECK(n_dims % PACK_DIM == 0);
-    const int n_pack    = n_dims / PACK_DIM;
-    const int n_iter    = ((n_pack + 1023) / 1024);        // iterations when block size == 1024
-    int       n_threads = (n_pack + n_iter - 1) / n_iter;  // adjust block size to avoid tail effect
-    n_threads           = (n_threads + 31) / 32 * 32;      // round up to the nearest multiple of warp size
-
-    fusedAddBiasResidualNorm<<<batch_size, n_threads, 0, stream>>>(
-        residual, in_out, bias, scale, eps, batch_size, n_dims);
-}
-
-template void
-invokeFusedAddBiasResidualRMSNorm(float*, float*, const float*, const float*, float, int, int, cudaStream_t);
-template void invokeFusedAddBiasResidualRMSNorm(half*, half*, const half*, const half*, float, int, int, cudaStream_t);
-
-}  // namespace turbomind
diff --git a/src/turbomind/models/llama/llama_decoder_kernels.h b/src/turbomind/models/llama/llama_decoder_kernels.h
deleted file mode 100644
index ade0dc053cee5011a2adc4c49aa87c112de3eb18..0000000000000000000000000000000000000000
--- a/src/turbomind/models/llama/llama_decoder_kernels.h
+++ /dev/null
@@ -1,11 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved.
-
-#include <cuda_runtime.h>
-
-namespace turbomind {
-
-template<typename T>
-void invokeFusedAddBiasResidualRMSNorm(
-    T* residual, T* in_out, const T* bias, const T* scale, float eps, int batch_size, int n_dims, cudaStream_t stream);
-
-}  // namespace turbomind
diff --git a/src/turbomind/models/llama/llama_gemm.cc b/src/turbomind/models/llama/llama_gemm.cc
deleted file mode 100644
index 5f5c5c99275f800bf26df4efadf9be977b82136e..0000000000000000000000000000000000000000
--- a/src/turbomind/models/llama/llama_gemm.cc
+++ /dev/null
@@ -1,149 +0,0 @@
-/*
- * Copyright (c) 2020-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-// Copied from
-// https://github.com/NVIDIA/FasterTransformer/blob/main/src/turbomind/models/multi_gpu_gpt/gpt_gemm.cc
-
-#include "src/turbomind/utils/gemm_test/gpt_gemm_func.h"
-#include "src/turbomind/utils/memory_utils.h"
-
-namespace ft = turbomind;
-
-int main(int argc, char* argv[])
-{
-    if (argc < 9 || argc > 11) {
-        TM_LOG_ERROR("./bin/llama_gemm batch_size \\ \n"
-                     "                 beam_width \\ \n"
-                     "                 max_input_len \\ \n"
-                     "                 head_number \\ \n"
-                     "                 size_per_head \\ \n"
-                     "                 inter_size \\ \n"
-                     "                 vocab_size \\ \n"
-                     "                 data_type \\ \n"
-                     "                 tensor_para_size \\\n"
-                     "                 is_append (append new config into exist gemm_config.ini or not)");
-        TM_LOG_ERROR("e.g. ./bin/llama_gemm 8 4 32 96 128 49152 51200 1 8 1");
-        return 0;
-    }
-
-    const int                batch_size    = atoi(argv[1]);
-    const int                beam_width    = atoi(argv[2]);
-    const int                max_input_len = atoi(argv[3]);
-    const int                head_num      = atoi(argv[4]);
-    const int                size_per_head = atoi(argv[5]);
-    const int                inter_size    = atoi(argv[6]);
-    const int                vocab_size    = atoi(argv[7]);
-    const ft::CublasDataType data_type     = static_cast<ft::CublasDataType>(atoi(argv[8]));  // 0 FP32, 1 FP16, 2 BF 16
-    const int                tensor_para_size = argc < 10 ? 1 : atoi(argv[9]);
-    const bool               is_append        = argc < 11 ? false : (bool)(atoi(argv[10]));
-
-    TM_LOG_INFO("Arguments:");
-    TM_LOG_INFO("  batch_size: %d", batch_size);
-    TM_LOG_INFO("  beam_width: %d", beam_width);
-    TM_LOG_INFO("  max_input_len: %d", max_input_len);
-    TM_LOG_INFO("  head_num: %d", head_num);
-    TM_LOG_INFO("  size_per_head: %d", size_per_head);
-    TM_LOG_INFO("  inter_size: %d", inter_size);
-    TM_LOG_INFO("  vocab_size: %d", vocab_size);
-    TM_LOG_INFO("  data_type: %d", data_type);
-    TM_LOG_INFO("  tensor_para_size: %d", tensor_para_size);
-    TM_LOG_INFO("  is_append: %d", (int)is_append);
-    std::cout << std::endl;
-
-    void*  gemm_test_buf;
-    size_t buf_size_in_byte = ft::calGptGemmTestBufSizeInByte(batch_size,
-                                                              beam_width,
-                                                              max_input_len,
-                                                              head_num,
-                                                              size_per_head,
-                                                              inter_size,
-                                                              vocab_size,
-                                                              tensor_para_size,
-                                                              data_type);
-    size_t total, free;
-    ft::check_cuda_error(cudaMemGetInfo(&free, &total));
-    if (free < buf_size_in_byte + 10 * 1024 * 1024) {
-        printf("[ERROR] There is no enough device memory for gemm test!\n"
-               " %ld Bytes is needed, but only %ld Bytes is free.\n",
-               buf_size_in_byte,
-               free);
-        gemm_test_buf = NULL;
-        return -1;
-    }
-    else {
-        ft::deviceMalloc(reinterpret_cast<char**>(&gemm_test_buf), buf_size_in_byte, false);
-    }
-
-    if (data_type == ft::FLOAT_DATATYPE) {
-        ft::generate_gpt_gemm_config<float>(batch_size,
-                                            beam_width,
-                                            max_input_len,
-                                            head_num,
-                                            size_per_head,
-                                            inter_size,
-                                            vocab_size,
-                                            tensor_para_size,
-                                            gemm_test_buf,
-                                            is_append);
-    }
-    else if (data_type == ft::HALF_DATATYPE) {
-        ft::generate_gpt_gemm_config<half>(batch_size,
-                                           beam_width,
-                                           max_input_len,
-                                           head_num,
-                                           size_per_head,
-                                           inter_size,
-                                           vocab_size,
-                                           tensor_para_size,
-                                           gemm_test_buf,
-                                           is_append);
-    }
-#ifdef ENABLE_BF16
-    else if (data_type == ft::BFLOAT16_DATATYPE) {
-        ft::generate_gpt_gemm_config<__nv_bfloat16>(batch_size,
-                                                    beam_width,
-                                                    max_input_len,
-                                                    head_num,
-                                                    size_per_head,
-                                                    inter_size,
-                                                    vocab_size,
-                                                    tensor_para_size,
-                                                    gemm_test_buf,
-                                                    is_append);
-    }
-#endif
-#ifdef ENABLE_FP8
-    else if (data_type == ft::FP8_DATATYPE) {
-        ft::generate_gpt_gemm_config<__nv_fp8_e4m3>(batch_size,
-                                                    beam_width,
-                                                    max_input_len,
-                                                    head_num,
-                                                    size_per_head,
-                                                    inter_size,
-                                                    vocab_size,
-                                                    tensor_para_size,
-                                                    gemm_test_buf,
-                                                    false);
-    }
-#endif
-    else {
-        printf("[ERROR] data type only supports fp32(0), fp16(1), bf16(2), fp8(4). \n");
-        return -1;
-    }
-
-    ft::check_cuda_error(cudaFree(gemm_test_buf));
-    return 0;
-}
diff --git a/src/turbomind/models/llama/llama_kernels.cu b/src/turbomind/models/llama/llama_kernels.cu
deleted file mode 100644
index 3a5dfe55ddc5f1fad01885676f689a26fba2f2fb..0000000000000000000000000000000000000000
--- a/src/turbomind/models/llama/llama_kernels.cu
+++ /dev/null
@@ -1,785 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved.
-
-#include "src/turbomind/kernels/decoder_masked_multihead_attention_utils.h"
-#include "src/turbomind/kernels/reduce_kernel_utils.cuh"
-#include "src/turbomind/macro.h"
-#include "src/turbomind/models/llama/llama_kernels.h"
-#include "src/turbomind/models/llama/llama_utils.h"
-#include "src/turbomind/utils/cuda_type_utils.cuh"
-
-namespace turbomind {
-
-// fp16, bf16
-// n is divided by 2 for this impl
-template<typename T>
-__global__ void rootMeanSquareNorm(T* out, const T* input, const T* scale, float eps, int m, int n)
-{
-    using T2 = typename TypeConverter<T>::Type;
-    __shared__ float s_inv_mean;
-    float            mean = 0.f;
-
-    T2*       out_ptr   = (T2*)out;
-    const T2* input_ptr = (const T2*)input;
-    const T2* scale_ptr = (const T2*)scale;
-
-    for (uint idx = threadIdx.x; idx < n; idx += blockDim.x) {
-        float2 tmp2 = cuda_cast<float2>(input_ptr[blockIdx.x * n + idx]);
-        mean += tmp2.x * tmp2.x;
-        mean += tmp2.y * tmp2.y;
-    }
-
-    mean = blockReduceSum<float>(mean);
-    if (threadIdx.x == 0) {
-        s_inv_mean = rsqrt(.5f * mean / (float)n + eps);
-    }
-    __syncthreads();
-
-    for (uint idx = threadIdx.x; idx < n; idx += blockDim.x) {
-        float2 tmp2                   = cuda_cast<float2>(input_ptr[blockIdx.x * n + idx]);
-        float2 sca2                   = cuda_cast<float2>(scale_ptr[idx]);
-        tmp2.x                        = tmp2.x * s_inv_mean * sca2.x;
-        tmp2.y                        = tmp2.y * s_inv_mean * sca2.y;
-        out_ptr[blockIdx.x * n + idx] = cuda_cast<T2>(tmp2);
-    }
-}
-
-template<>
-__global__ void rootMeanSquareNorm(float* out, const float* input, const float* scale, float eps, int m, int n)
-{
-    __shared__ float s_inv_mean;
-    float            mean = 0.f;
-
-    for (uint idx = threadIdx.x; idx < n; idx += blockDim.x) {
-        float tmp = input[blockIdx.x * n + idx];
-        mean += tmp * tmp;
-    }
-
-    mean = blockReduceSum<float>(mean);
-    if (threadIdx.x == 0) {
-        s_inv_mean = rsqrt(mean / static_cast<float>(n) + eps);
-    }
-    __syncthreads();
-
-    for (uint idx = threadIdx.x; idx < n; idx += blockDim.x) {
-        float tmp                 = input[blockIdx.x * n + idx];
-        out[blockIdx.x * n + idx] = tmp * s_inv_mean * scale[idx];
-    }
-}
-
-template<typename T>
-void invokeRootMeanSquareNorm(T* out, const T* input, const T* scale, float eps, int m, int n, cudaStream_t stream)
-{
-    if (sizeof(T) == 2) {
-        FT_CHECK(n % 2 == 0);
-        n /= 2;
-    }
-    dim3 grid(m);
-    dim3 block(std::min(n, 1024));
-    rootMeanSquareNorm<<<grid, block, 0, stream>>>(out, input, scale, eps, m, n);
-}
-
-template void invokeRootMeanSquareNorm(float*, const float*, const float*, float, int, int, cudaStream_t);
-template void invokeRootMeanSquareNorm(half*, const half*, const half*, float, int, int, cudaStream_t);
-
-// #ifdef ENABLE_BF16
-
-// template void invokeRootMeanSquareNorm(__nv_bfloat16*, const __nv_bfloat16*, float, int, int, cudaStream_t);
-
-// #endif
-
-template<typename T, typename T0>
-__device__ T saturate_cast(T0 x)
-{
-    return x;
-}
-
-template<>
-__device__ half saturate_cast<half, float>(float x)
-{
-    return (x > 64512.f || x < -64512.f) ? (x > 0.f ? 64512.f : -64512.f) : x;
-}
-
-template<typename T>
-__global__ void addResidual(T* out, const T* in, size_t n)
-{
-    auto idx = threadIdx.x + (size_t)blockIdx.x * blockDim.x;
-    if (idx < n) {
-        out[idx] = static_cast<T>(static_cast<float>(out[idx]) + static_cast<float>(in[idx]));
-    }
-}
-
-template<typename T>
-void invokeAddResidual(T* out, const T* in, int m, int n, cudaStream_t stream)
-{
-    auto total = static_cast<size_t>(m) * n;
-    dim3 block(std::min((unsigned long)total, 1024UL));
-    dim3 grid((total + block.x - 1) / block.x);
-
-    addResidual<<<grid, block, 0, stream>>>(out, in, total);
-}
-
-template void invokeAddResidual(float*, const float*, int, int, cudaStream_t);
-template void invokeAddResidual(half*, const half*, int, int, cudaStream_t);
-
-// ids [seq_len, batch_size]
-// input_ids [batch_size, max_input_len]
-__global__ void
-fixInputIds(int* ids, const int* input_ids, const int* input_lengths, int batch_size, int seq_len, int max_input_len)
-{
-    int seq_id   = threadIdx.x;
-    int batch_id = blockIdx.x;
-    for (; seq_id < input_lengths[batch_id]; seq_id += blockDim.x) {
-        ids[seq_id * batch_size + batch_id] = input_ids[batch_id * max_input_len + seq_id];
-    }
-}
-
-void invokeFixInputIds(int*         ids,
-                       const int*   input_ids,
-                       const int*   input_lengths,
-                       int          batch_size,
-                       int          seq_len,
-                       int          max_input_len,
-                       cudaStream_t st)
-{
-    dim3 block(std::min(1024, max_input_len));
-    dim3 grid(batch_size);
-    fixInputIds<<<grid, block, 0, st>>>(ids, input_ids, input_lengths, batch_size, seq_len, max_input_len);
-}
-
-template<typename T>
-__global__ void sliceCausalMask(T* mask, int seq_len, int key_len, int step)
-{
-    mask += (size_t)blockIdx.x * seq_len * key_len;
-    for (int i = threadIdx.x; i < seq_len * key_len; i += blockDim.x) {
-        int row = i / key_len;
-        int col = i % key_len;
-        if (col <= row + step) {
-            mask[i] = static_cast<T>(1.f);
-        }
-        else {
-            mask[i] = static_cast<T>(0.f);
-        }
-    }
-}
-
-// [step: step+Q, :] of the K*K causal mask
-template<typename T>
-void invokeSliceCausalMask(T* mask, int seq_len, int key_len, int step, int batch_size, cudaStream_t stream)
-{
-    FT_CHECK(step == key_len - seq_len);
-    sliceCausalMask<<<batch_size, 256, 0, stream>>>(mask, seq_len, key_len, step);
-}
-
-template void invokeSliceCausalMask(half*, int, int, int, int, cudaStream_t);
-template void invokeSliceCausalMask(float*, int, int, int, int, cudaStream_t);
-
-// mask [bsz, max_q_len, max_k_len]
-
-template<typename T>
-__global__ void createCausalMasks(T* mask, const int* q_lens, const int* k_lens, int max_q_len, int max_k_len)
-{
-    const auto q_len = q_lens[blockIdx.x];
-    const auto k_len = k_lens[blockIdx.x];
-    mask += blockIdx.x * max_q_len * max_k_len;
-    for (int i = threadIdx.x; i < max_q_len * max_k_len; i += blockDim.x) {
-        const int q        = i / max_k_len;  // [0, max_q_len)
-        const int k        = i % max_k_len;  // [0, max_k_len)
-        bool      is_valid = q < q_len && k < k_len && k <= q + (k_len - q_len);
-        mask[i]            = static_cast<T>(is_valid);
-    }
-}
-
-template<typename T>
-void invokeCreateCausalMasks(
-    T* mask, const int* q_lens, const int* k_lens, int max_q_len, int max_k_len, int batch_size, cudaStream_t stream)
-{
-    createCausalMasks<<<batch_size, 512, 0, stream>>>(mask, q_lens, k_lens, max_q_len, max_k_len);
-}
-
-template void invokeCreateCausalMasks(float* mask, const int*, const int*, int, int, int, cudaStream_t);
-template void invokeCreateCausalMasks(half* mask, const int*, const int*, int, int, int, cudaStream_t);
-
-template<typename T>
-__global__ void extend_key_cache(T**          k_dst,
-                                 const size_t dst_offset,
-                                 const T*     k_src,
-                                 const int    head_num,
-                                 const int    size_per_head,
-                                 const int*   query_length,
-                                 const int*   history_length,
-                                 const int    max_q_len,
-                                 const int    max_seq_len)
-{
-    const int     batch_id = blockIdx.y;
-    const int     head_id  = blockIdx.z;
-    constexpr int X_ELEMS  = (sizeof(T) == 4) ? 4 : 8;
-
-    const int idx                 = blockIdx.x * blockDim.x + threadIdx.x;
-    int       size_per_head_div_x = size_per_head / X_ELEMS;
-
-    // x dim is now handled by uint4 type
-    const auto key_src = reinterpret_cast<const uint4*>(k_src);
-    const auto key_dst = reinterpret_cast<uint4*>(k_dst[batch_id] + dst_offset);
-
-    const auto seq_len  = query_length[batch_id];
-    const auto t_offset = history_length[batch_id];
-
-    const int k_head_size_id = idx % size_per_head_div_x;
-    const int k_seq_len_id   = idx / size_per_head_div_x;
-
-    if (k_seq_len_id < seq_len) {
-        // [B, H, s, D/x] -> [H, D/x, S[t:t+s]]
-
-        const int64_t dst_idx = head_id * size_per_head_div_x * max_seq_len +  // H
-                                k_head_size_id * max_seq_len +                 // D/x
-                                t_offset + k_seq_len_id;                       // s + offset
-
-        const int64_t src_idx = batch_id * head_num * size_per_head_div_x * max_q_len +  // B
-                                head_id * size_per_head_div_x * max_q_len +              // H
-                                k_seq_len_id * size_per_head_div_x +                     // s
-                                k_head_size_id;                                          // D/x
-
-        key_dst[dst_idx] = key_src[src_idx];
-    }
-}
-
-template<typename T>
-__global__ void extend_value_cache(T**          v_dst,
-                                   const size_t dst_offset,
-                                   const T*     v_src,
-                                   const int    head_num,
-                                   const int    size_per_head,
-                                   const int*   query_length,
-                                   const int*   history_length,
-                                   const int    max_q_len,
-                                   const int    max_seq_len)
-{
-    const int     batch_id = blockIdx.y;
-    const int     head_id  = blockIdx.z;
-    constexpr int X_ELEMS  = (sizeof(T) == 4) ? 4 : 8;
-
-    const int idx                 = blockIdx.x * blockDim.x + threadIdx.x;
-    int       size_per_head_div_x = size_per_head / X_ELEMS;
-
-    // x dim is now handled by uint4 type
-    const auto val_src = reinterpret_cast<const uint4*>(v_src);
-    const auto val_dst = reinterpret_cast<uint4*>(v_dst[batch_id] + dst_offset);
-
-    const auto seq_len  = query_length[batch_id];
-    const auto t_offset = history_length[batch_id];
-
-    const int v_head_size_id = idx % size_per_head_div_x;
-    const int v_seq_len_id   = idx / size_per_head_div_x;
-
-    if (v_seq_len_id < seq_len) {
-        // [B, H, s, D/x] -> [H, S[t:t+s], D/x]
-        const int64_t dst_idx = head_id * size_per_head_div_x * max_seq_len +      // H
-                                (v_seq_len_id + t_offset) * size_per_head_div_x +  // s + offset
-                                v_head_size_id;                                    // D/x
-
-        const int64_t src_idx = batch_id * head_num * size_per_head_div_x * max_q_len +  // B
-                                head_id * size_per_head_div_x * max_q_len +              // H
-                                v_seq_len_id * size_per_head_div_x +                     // s
-                                v_head_size_id;                                          // D/x
-
-        val_dst[dst_idx] = val_src[src_idx];
-    }
-}
-
-inline __device__ float2 float2div(float a, float2 b)
-{
-    float2 c;
-    c.x = b.x / a;
-    c.y = b.y / a;
-    return c;
-}
-
-inline __device__ float2 float2sub(float zp, float2 val)
-{
-    float2 ret;
-    ret.x = val.x - zp;
-    ret.y = val.y - zp;
-    return ret;
-}
-
-static inline __device__ half4 char4_scale_to_half4(char4 value, const float scale, const float zp)
-{
-    half4 dst;
-    dst.x = __float2half(value.x * scale + zp);
-    dst.y = __float2half(value.y * scale + zp);
-    dst.z = __float2half(value.z * scale + zp);
-    dst.w = __float2half(value.w * scale + zp);
-    return dst;
-}
-
-static inline __device__ uint32_t float4_to_char4(float x, float y, float z, float w)
-{
-    uint32_t dst;
-// #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 720
-#if 0
-    uint32_t a;
-    asm volatile("cvt.rni.sat.s32.f32 %0, %1;\n" : "=r"(a) : "f"(x));
-    uint32_t b;
-    asm volatile("cvt.rni.sat.s32.f32 %0, %1;\n" : "=r"(b) : "f"(y));
-    uint32_t c;
-    asm volatile("cvt.rni.sat.s32.f32 %0, %1;\n" : "=r"(c) : "f"(z));
-    uint32_t d;
-    asm volatile("cvt.rni.sat.s32.f32 %0, %1;\n" : "=r"(d) : "f"(w));
-
-    asm volatile("cvt.pack.sat.s8.s32.b32 %0, %1, %2,  0;\n" : "=r"(dst) : "r"(d), "r"(c));
-    asm volatile("cvt.pack.sat.s8.s32.b32 %0, %1, %2, %0;\n" : "+r"(dst) : "r"(b), "r"(a));
-#else
-    char4 tmp;
-    tmp.x       = x;
-    tmp.y       = y;
-    tmp.z       = z;
-    tmp.w       = w;
-    dst         = reinterpret_cast<const uint32_t&>(tmp);
-#endif
-    return dst;
-}
-
-template<typename T>
-__global__ void extend_value_cache_int8(int8_t**     v_dst,
-                                        const size_t dst_offset,
-                                        const T*     v_src,
-                                        const int    head_num,
-                                        const int    size_per_head,
-                                        const int*   query_length,
-                                        const int*   history_length,
-                                        const int    max_q_len,
-                                        const int    max_seq_len,
-                                        const float  v_scale,
-                                        const float  v_zp)
-{
-    const int     batch_id = blockIdx.y;
-    const int     head_id  = blockIdx.z;
-    constexpr int X_ELEMS  = (sizeof(T) == 4) ? 4 : 8;
-
-    const int idx                 = blockIdx.x * blockDim.x + threadIdx.x;
-    int       size_per_head_div_x = size_per_head / X_ELEMS;
-
-    // x dim is now handled by uint4 type
-    const auto val_src = reinterpret_cast<const uint4*>(v_src);
-    const auto val_dst = reinterpret_cast<uint2*>(v_dst[batch_id] + dst_offset);
-
-    const auto seq_len  = query_length[batch_id];
-    const auto t_offset = history_length[batch_id];
-
-    const int v_head_size_id = idx % size_per_head_div_x;
-    const int v_seq_len_id   = idx / size_per_head_div_x;
-
-    if (v_seq_len_id < seq_len) {
-        // [B, H, s, D/x] -> [H, S[t:t+s], D/x]
-        const int64_t dst_idx = head_id * size_per_head_div_x * max_seq_len +      // H
-                                (v_seq_len_id + t_offset) * size_per_head_div_x +  // s + offset
-                                v_head_size_id;                                    // D/x
-
-        const int64_t src_idx = batch_id * head_num * size_per_head_div_x * max_q_len +  // B
-                                head_id * size_per_head_div_x * max_q_len +              // H
-                                v_seq_len_id * size_per_head_div_x +                     // s
-                                v_head_size_id;                                          // D/x
-
-        // scale to int8 and write
-        const auto value  = val_src[src_idx];
-        auto       to_ptr = reinterpret_cast<uint32_t*>(val_dst + dst_idx);
-
-        float2 float2_0 = float2div(v_scale, float2sub(v_zp, mmha::half2_to_float2(value.x)));
-        float2 float2_1 = float2div(v_scale, float2sub(v_zp, mmha::half2_to_float2(value.y)));
-        to_ptr[0]       = float4_to_char4(float2_0.x, float2_0.y, float2_1.x, float2_1.y);
-
-        float2_0  = float2div(v_scale, float2sub(v_zp, mmha::half2_to_float2(value.z)));
-        float2_1  = float2div(v_scale, float2sub(v_zp, mmha::half2_to_float2(value.w)));
-        to_ptr[1] = float4_to_char4(float2_0.x, float2_0.y, float2_1.x, float2_1.y);
-    }
-}
-
-template<typename T>
-void invokeExtendKVCache(T**          k_dst,
-                         T**          v_dst,
-                         size_t       dst_offset,
-                         const T*     k_src,
-                         const T*     v_src,
-                         int          local_batch_size,
-                         const int*   query_length,
-                         int          max_q_len,
-                         const int*   history_length,
-                         int          max_seq_len,
-                         int          size_per_head,
-                         int          local_head_num,
-                         cudaStream_t stream,
-                         int          quant,
-                         const float* kv_scale)
-{
-    constexpr int block_sz = 128;
-    constexpr int x        = (sizeof(T) == 4) ? 4 : 8;
-
-    dim3 grid((max_q_len * size_per_head / x + block_sz - 1) / block_sz, local_batch_size, local_head_num);
-
-    if (quant & QuantPolicy::kCacheKVInt8) {
-        extend_value_cache_int8<<<grid, block_sz, 0, stream>>>(reinterpret_cast<int8_t**>(k_dst),
-                                                               dst_offset,
-                                                               k_src,
-                                                               local_head_num,
-                                                               size_per_head,
-                                                               query_length,
-                                                               history_length,
-                                                               max_q_len,
-                                                               max_seq_len,
-                                                               kv_scale[0],
-                                                               kv_scale[1]);
-
-        extend_value_cache_int8<<<grid, block_sz, 0, stream>>>(reinterpret_cast<int8_t**>(v_dst),
-                                                               dst_offset,
-                                                               v_src,
-                                                               local_head_num,
-                                                               size_per_head,
-                                                               query_length,
-                                                               history_length,
-                                                               max_q_len,
-                                                               max_seq_len,
-                                                               kv_scale[2],
-                                                               kv_scale[3]);
-    }
-    else {
-        extend_value_cache<<<grid, block_sz, 0, stream>>>(k_dst,
-                                                          dst_offset,
-                                                          k_src,
-                                                          local_head_num,
-                                                          size_per_head,
-                                                          query_length,
-                                                          history_length,
-                                                          max_q_len,
-                                                          max_seq_len);
-
-        extend_value_cache<<<grid, block_sz, 0, stream>>>(v_dst,
-                                                          dst_offset,
-                                                          v_src,
-                                                          local_head_num,
-                                                          size_per_head,
-                                                          query_length,
-                                                          history_length,
-                                                          max_q_len,
-                                                          max_seq_len);
-    }
-}
-
-template void invokeExtendKVCache(float**,
-                                  float**,
-                                  size_t,
-                                  const float*,
-                                  const float*,
-                                  int,
-                                  const int*,
-                                  int,
-                                  const int*,
-                                  int,
-                                  int,
-                                  int,
-                                  cudaStream_t stream,
-                                  int,
-                                  const float*);
-
-template void invokeExtendKVCache(half**,
-                                  half**,
-                                  size_t,
-                                  const half*,
-                                  const half*,
-                                  int,
-                                  const int*,
-                                  int,
-                                  const int*,
-                                  int,
-                                  int,
-                                  int,
-                                  cudaStream_t stream,
-                                  int,
-                                  const float*);
-
-template<typename T>
-__global__ void transpose_value_cache(T*           v_dst,  //
-                                      const T**    v_src,
-                                      const size_t src_offset,
-                                      const int    head_num,
-                                      const int    head_n_rep,
-                                      const int    size_per_head,
-                                      const int*   seq_length,
-                                      const int    max_kv_len,
-                                      const int    max_seq_len)
-{
-    const int     batch_id = blockIdx.y;
-    const int     head_id  = blockIdx.z;
-    constexpr int X_ELEMS  = (sizeof(T) == 4) ? 4 : 8;
-
-    const int idx                 = blockIdx.x * blockDim.x + threadIdx.x;
-    int       size_per_head_div_x = size_per_head / X_ELEMS;
-
-    // x dim is now handled by uint4 type
-    const auto val_src = reinterpret_cast<const uint4*>(v_src[batch_id] + src_offset);
-    const auto val_dst = reinterpret_cast<uint4*>(v_dst);
-
-    const auto seq_len = seq_length[batch_id];
-
-    const int v_head_size_id = idx % size_per_head_div_x;
-    const int v_seq_len_id   = idx / size_per_head_div_x;
-
-    if (v_seq_len_id < seq_len) {
-        // [B, H, s, D/x] <- [B, H, S[:s], D/x]
-        const int64_t src_idx = head_id / head_n_rep * size_per_head_div_x * max_seq_len +  // H
-                                v_seq_len_id * size_per_head_div_x +                        // s
-                                v_head_size_id;                                             // D/x
-
-        const int64_t dst_idx = batch_id * head_num * size_per_head_div_x * max_kv_len +  // B
-                                head_id * size_per_head_div_x * max_kv_len +              // H
-                                v_seq_len_id * size_per_head_div_x +                      // s
-                                v_head_size_id;                                           // D/x
-
-        val_dst[dst_idx] = val_src[src_idx];
-    }
-}
-
-template<typename T>
-__global__ void transpose_value_cache_int8(T*             v_dst,  //
-                                           const int8_t** v_src,
-                                           const size_t   src_offset,
-                                           const int      head_num,
-                                           const int      head_n_rep,
-                                           const int      size_per_head,
-                                           const int*     seq_length,
-                                           const int      max_kv_len,
-                                           const int      max_seq_len,
-                                           const float    v_scale,
-                                           const float    v_zp)
-{
-    const int     batch_id = blockIdx.y;
-    const int     head_id  = blockIdx.z;
-    constexpr int X_ELEMS  = (sizeof(T) == 4) ? 4 : 8;
-
-    const int idx                 = blockIdx.x * blockDim.x + threadIdx.x;
-    int       size_per_head_div_x = size_per_head / X_ELEMS;
-
-    // x dim is now handled by uint4 type
-    const auto val_src = reinterpret_cast<const uint2*>(v_src[batch_id] + src_offset);
-    const auto val_dst = reinterpret_cast<uint4*>(v_dst);
-
-    const auto seq_len = seq_length[batch_id];
-
-    const int v_head_size_id = idx % size_per_head_div_x;
-    const int v_seq_len_id   = idx / size_per_head_div_x;
-
-    if (v_seq_len_id < seq_len) {
-        // [B, H, s, D/x] <- [B, H, S[:s], D/x]
-        const int64_t src_idx = head_id / head_n_rep * size_per_head_div_x * max_seq_len +  // H
-                                v_seq_len_id * size_per_head_div_x +                        // s
-                                v_head_size_id;                                             // D/x
-
-        const int64_t dst_idx = batch_id * head_num * size_per_head_div_x * max_kv_len +  // B
-                                head_id * size_per_head_div_x * max_kv_len +              // H
-                                v_seq_len_id * size_per_head_div_x +                      // s
-                                v_head_size_id;                                           // D/x
-
-        // int8x8 -> fp16x8
-        const auto from_ptr = reinterpret_cast<const char4*>(val_src + src_idx);
-        auto       to_ptr   = reinterpret_cast<half4*>(val_dst + dst_idx);
-
-        to_ptr[0] = char4_scale_to_half4(from_ptr[0], v_scale, v_zp);
-        to_ptr[1] = char4_scale_to_half4(from_ptr[1], v_scale, v_zp);
-    }
-}
-
-template<typename T>
-void invokeTransposeKVCache(T*           key_cache_trans,
-                            T*           val_cache_trans,
-                            const T**    key_cache,
-                            const T**    val_cache,
-                            size_t       src_offset,
-                            int          batch_size,
-                            const int*   key_length,
-                            int          max_kv_len,
-                            int          max_seq_len,
-                            int          size_per_head,
-                            int          head_num,
-                            int          head_n_rep,
-                            cudaStream_t stream,
-                            int          quant,
-                            const float* kv_scale)
-{
-    constexpr int block_sz = 128;
-    constexpr int x        = (sizeof(T) == 4) ? 4 : 8;
-
-    dim3 grid((max_kv_len * size_per_head / x + block_sz - 1) / block_sz, batch_size, head_num);
-
-    if (quant & QuantPolicy::kCacheKVInt8) {
-        transpose_value_cache_int8<<<grid, block_sz, 0, stream>>>(key_cache_trans,
-                                                                  reinterpret_cast<const int8_t**>(key_cache),
-                                                                  src_offset,
-                                                                  head_num,
-                                                                  head_n_rep,
-                                                                  size_per_head,
-                                                                  key_length,
-                                                                  max_kv_len,
-                                                                  max_seq_len,
-                                                                  kv_scale[0],
-                                                                  kv_scale[1]);
-
-        transpose_value_cache_int8<<<grid, block_sz, 0, stream>>>(val_cache_trans,
-                                                                  reinterpret_cast<const int8_t**>(val_cache),
-                                                                  src_offset,
-                                                                  head_num,
-                                                                  head_n_rep,
-                                                                  size_per_head,
-                                                                  key_length,
-                                                                  max_kv_len,
-                                                                  max_seq_len,
-                                                                  kv_scale[2],
-                                                                  kv_scale[3]);
-    }
-    else {
-        transpose_value_cache<<<grid, block_sz, 0, stream>>>(key_cache_trans,
-                                                             key_cache,
-                                                             src_offset,
-                                                             head_num,
-                                                             head_n_rep,
-                                                             size_per_head,
-                                                             key_length,
-                                                             max_kv_len,
-                                                             max_seq_len);
-
-        transpose_value_cache<<<grid, block_sz, 0, stream>>>(val_cache_trans,
-                                                             val_cache,
-                                                             src_offset,
-                                                             head_num,
-                                                             head_n_rep,
-                                                             size_per_head,
-                                                             key_length,
-                                                             max_kv_len,
-                                                             max_seq_len);
-    }
-}
-
-template void invokeTransposeKVCache(float*,
-                                     float*,
-                                     const float**,
-                                     const float**,
-                                     size_t,
-                                     int,
-                                     const int*,
-                                     int,
-                                     int,
-                                     int,
-                                     int,
-                                     int,
-                                     cudaStream_t stream,
-                                     int,
-                                     const float*);
-template void invokeTransposeKVCache(half*,
-                                     half*,
-                                     const half**,
-                                     const half**,
-                                     size_t,
-                                     int,
-                                     const int*,
-                                     int,
-                                     int,
-                                     int,
-                                     int,
-                                     int,
-                                     cudaStream_t stream,
-                                     int,
-                                     const float*);
-
-__global__ void gatherOutput(int*       output_ids,
-                             const int* ids,
-                             const int* context_length,
-                             int        max_context_len,
-                             int        max_gen_step,
-                             int        max_output_len,
-                             int        batch_size)
-{
-    const int batch_id    = blockIdx.x;
-    const int context_len = context_length[batch_id];
-    output_ids += batch_id * max_output_len;
-    for (int src_idx = threadIdx.x; src_idx < max_gen_step; src_idx += blockDim.x) {
-        // skip padding for src
-        if (context_len <= src_idx && src_idx < max_context_len) {
-            continue;
-        }
-        // skip padding for dst
-        const int dst_idx   = src_idx < context_len ? src_idx : src_idx - (max_context_len - context_len);
-        output_ids[dst_idx] = ids[src_idx * batch_size + batch_id];
-    }
-}
-
-void invokeGatherOutput(int*         output_ids,
-                        const int*   ids,
-                        const int*   context_length,
-                        int          max_context_len,
-                        int          max_gen_step,
-                        int          max_output_len,
-                        int          batch_size,
-                        cudaStream_t stream)
-{
-    int block_size = 512;
-    int grid_size  = batch_size;
-    gatherOutput<<<grid_size, block_size, 0, stream>>>(
-        output_ids, ids, context_length, max_context_len, max_gen_step, max_output_len, batch_size);
-}
-
-#define VERSION_SWITCH(VERSION, CONST_NAME, ...)                                                                       \
-    [&] {                                                                                                              \
-        if (VERSION == 2) {                                                                                            \
-            constexpr static int CONST_NAME = 2;                                                                       \
-            return __VA_ARGS__();                                                                                      \
-        }                                                                                                              \
-        else {                                                                                                         \
-            constexpr static int CONST_NAME = 1;                                                                       \
-            return __VA_ARGS__();                                                                                      \
-        }                                                                                                              \
-    }()
-
-// template<typename T>
-// FlashAttentionOp<T>::FlashAttentionOp(int batch_size, int head_num, int key_len, int seq_len, int size_per_head):
-//     batch_size_(batch_size), head_num_(head_num), key_len_(key_len), seq_len_(seq_len), size_per_head_(size_per_head)
-// {
-// #ifdef _MSC_VER
-//     op_version_ = 1;
-// #else
-//     op_version_ = std::is_same<half, typename std::decay<T>::type>::value ? 2 : 1;
-//     if (op_version_ == 2 && getSMVersion() < 80) {
-//         op_version_ = 1;
-//     }
-// #endif
-// }
-
-// template<typename T>
-// int FlashAttentionOp<T>::get_workspace_size() const
-// {
-// #ifdef _MSC_VER
-//     FlashAttentionOpImpl<T, 1> attention_op(batch_size_, head_num_, key_len_, seq_len_, size_per_head_);
-//     return attention_op.get_workspace_size();
-// #else
-//     return VERSION_SWITCH(op_version_, OP_VERSION, [&]() {
-//         FlashAttentionOpImpl<T, OP_VERSION> attention_op(batch_size_, head_num_, key_len_, seq_len_, size_per_head_);
-//         return attention_op.get_workspace_size();
-//     });
-// #endif
-// }
-
-// template<typename T>
-// void FlashAttentionOp<T>::operator()(Params& params, cudaStream_t st) const
-// {
-// #ifdef _MSC_VER
-//     FlashAttentionOpImpl<T, 1> attention_op(batch_size_, head_num_, key_len_, seq_len_, size_per_head_);
-//     return attention_op(params, st);
-// #else
-//     return VERSION_SWITCH(op_version_, OP_VERSION, [&]() {
-//         FlashAttentionOpImpl<T, OP_VERSION> attention_op(batch_size_, head_num_, key_len_, seq_len_, size_per_head_);
-//         return attention_op(params, st);
-//     });
-// #endif
-// }
-
-// template class FlashAttentionOp<float>;
-// template class FlashAttentionOp<half>;
-
-}  // namespace turbomind
diff --git a/src/turbomind/models/llama/llama_kernels.h b/src/turbomind/models/llama/llama_kernels.h
deleted file mode 100644
index 06cb24e042c7ea1b315e97b0d18d5e1546c3d36c..0000000000000000000000000000000000000000
--- a/src/turbomind/models/llama/llama_kernels.h
+++ /dev/null
@@ -1,197 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved.
-
-#pragma once
-
-#include "src/turbomind/kernels/gpt_kernels.h"
-#include "src/turbomind/utils/cuda_bf16_wrapper.h"
-#include "src/turbomind/utils/cuda_utils.h"
-#include <assert.h>
-#include <cuda_fp16.h>
-#include <cuda_runtime.h>
-#include <numeric>
-
-namespace turbomind {
-
-template<typename T>
-void invokeRootMeanSquareNorm(T* out, const T* input, const T* scale, float eps, int m, int n, cudaStream_t stream);
-
-template<typename T>
-void invokeAddResidual(T* out, const T* in, int m, int n, cudaStream_t stream);
-
-void invokeFixInputIds(int*         ids,
-                       const int*   input_ids,
-                       const int*   input_lengths,
-                       int          batch_size,
-                       int          seq_len,
-                       int          max_input_len,
-                       cudaStream_t st);
-
-template<typename T>
-void invokeSliceCausalMask(T* mask, int seq_len, int key_len, int step, int batch_size, cudaStream_t stream);
-
-template<typename T>
-void invokeCreateCausalMasks(
-    T* mask, const int* q_lens, const int* k_lens, int max_q_len, int max_k_len, int batch_size, cudaStream_t stream);
-
-template<typename T>
-void invokeExtendKVCache(T**          k_dst,
-                         T**          v_dst,
-                         size_t       layer_offset,
-                         const T*     k_src,
-                         const T*     v_src,
-                         int          batch_size,
-                         const int*   query_length,
-                         int          max_q_len,
-                         const int*   history_length,
-                         int          max_seq_len,
-                         int          size_per_head,
-                         int          local_head_num,
-                         cudaStream_t stream,
-                         int          quant,
-                         const float* kv_scale);
-
-template<typename T>
-void invokeTransposeKVCache(T*           key_cache_trans,
-                            T*           val_cache_trans,
-                            const T**    key_cache,
-                            const T**    val_cache,
-                            size_t       layer_offset,
-                            int          batch_size,
-                            const int*   key_length,
-                            int          max_kv_len,
-                            int          max_seq_len,
-                            int          size_per_head,
-                            int          head_num,
-                            int          head_n_rep,
-                            cudaStream_t stream,
-                            int          quant_policy,
-                            const float* kv_scale);
-
-void invokeGatherOutput(int*         output_ids,
-                        const int*   ids,
-                        const int*   context_length,
-                        int          max_context_len,
-                        int          max_gen_step,
-                        int          max_output_len,
-                        int          batch_size,
-                        cudaStream_t stream);
-
-void invokeMyCopyInt(int* dst, const int* src, size_t count, cudaStream_t st);
-
-template<typename T>
-struct BaseAttentionLayout {
-    int    stride_batch;
-    int    stride_seq;
-    int    stride_head;
-    bool   use_seqlens       = false;
-    size_t batch_seqs_offset = 0;
-    T**    batch_seqs        = nullptr;
-};
-
-template<typename T>
-struct BaseAttentionParams {
-    T*                     attn_out;
-    T*                     query;
-    T*                     key;
-    T*                     val;
-    T*                     mask;
-    float*                 out_accum       = nullptr;
-    int*                   cu_seqlens_q    = nullptr;
-    int*                   cu_seqlens_k    = nullptr;
-    int*                   actual_seqlen_q = nullptr;
-    int*                   actual_seqlen_k = nullptr;
-    size_t                 group_size      = 1;
-    BaseAttentionLayout<T> layout_q;
-    BaseAttentionLayout<T> layout_k;
-    BaseAttentionLayout<T> layout_v;
-    BaseAttentionLayout<T> layout_o;
-};
-
-template<typename T, int version>
-class FlashAttentionOpImpl {
-public:
-    using AttentionLayout = BaseAttentionLayout<T>;
-    using Params          = BaseAttentionParams<T>;
-
-public:
-    FlashAttentionOpImpl(int batch_size, int head_num, int key_len, int seq_len, int size_per_head);
-    ~FlashAttentionOpImpl();
-
-    int get_workspace_size() const;
-
-    void operator()(Params& params, cudaStream_t st) const;
-
-private:
-    class impl;
-    std::unique_ptr<impl> pimpl;
-};
-
-template<typename T>
-class FlashAttentionOp {
-public:
-    using AttentionLayout = BaseAttentionLayout<T>;
-    using Params          = BaseAttentionParams<T>;
-
-public:
-    FlashAttentionOp(int batch_size, int head_num, int key_len, int seq_len, int size_per_head);
-
-    int get_workspace_size() const;
-
-    void operator()(Params& params, cudaStream_t st) const;
-
-private:
-    int batch_size_;
-    int head_num_;
-    int key_len_;
-    int seq_len_;
-    int size_per_head_;
-    int op_version_;
-};
-
-template<typename T>
-inline void dump(const T* x, int size, cudaStream_t st, const char* msg, bool full = false)
-{
-    std::vector<T> h_x(size);
-    cudaMemcpyAsync(h_x.data(), x, sizeof(T) * size, cudaMemcpyDefault, st);
-    cudaStreamSynchronize(st);
-    fprintf(stderr, "\n%s:\n", msg);
-    std::vector<float> h_y(h_x.begin(), h_x.end());
-    float              asum = 0.f;
-    for (const auto& x : h_y) {
-        asum += std::fabs(x);
-    }
-    if (full) {
-        for (int i = 0; i < size; ++i) {
-            printf("%d %.8f\n", i, h_y[i]);
-        }
-    }
-    else {
-        for (int i = 0; i < 8; ++i) {
-            fprintf(stderr, "%.8f\n", h_y[i]);
-        }
-        for (int i = size - 8; i < size; ++i) {
-            fprintf(stderr, "%.8f\n", h_y[i]);
-        }
-    }
-    fprintf(stderr, "\nasum = %f\n", asum);
-    // getchar();
-}
-
-template<typename T>
-struct TempBuffer {
-    TempBuffer(size_t size)
-    {
-        deviceMalloc(&data, size, false);
-    }
-    T* data;
-};
-
-inline void dump_sequence_len(int* d_seq_len, int step, int tp_rank, cudaStream_t st)
-{
-    int h_seq_len = -1;
-    cudaMemcpyAsync(&h_seq_len, d_seq_len, sizeof(int), cudaMemcpyDefault, st);
-    cudaStreamSynchronize(st);
-    TM_LOG_ERROR("--------> rank = %d, step = %d, seq_len = %d <--------", tp_rank, step, h_seq_len);
-}
-
-}  // namespace turbomind
diff --git a/src/turbomind/models/llama/llama_params.h b/src/turbomind/models/llama/llama_params.h
deleted file mode 100644
index 8f8c96837b03c9c4009127d026448b8cf4e62c92..0000000000000000000000000000000000000000
--- a/src/turbomind/models/llama/llama_params.h
+++ /dev/null
@@ -1,15 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved.
-
-#pragma once
-
-namespace turbomind {
-
-struct LlamaAttentionParams {
-    int   rotray_embedding_dim;
-    float rotary_embedding_base;
-    int   max_position_embeddings;
-    bool  use_dynamic_ntk;
-    bool  use_logn_attn;
-};
-
-}  // namespace turbomind
diff --git a/src/turbomind/models/llama/llama_utils.cu b/src/turbomind/models/llama/llama_utils.cu
deleted file mode 100644
index 7050d2d13f825b38969531c8b90baf6cf5227a25..0000000000000000000000000000000000000000
--- a/src/turbomind/models/llama/llama_utils.cu
+++ /dev/null
@@ -1,160 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved.
-
-#include "src/turbomind/kernels/reduce_kernel_utils.cuh"
-#include "src/turbomind/models/llama/llama_utils.h"
-#include "src/turbomind/utils/cuda_utils.h"
-#include <cmath>
-#include <cstdio>
-#include <cstdlib>
-#include <cstring>
-#include <cuda_fp16.h>
-#include <curand_kernel.h>
-#include <thrust/device_vector.h>
-#include <thrust/execution_policy.h>
-#include <thrust/host_vector.h>
-#include <vector>
-
-namespace turbomind {
-
-CmpMode compare_mode = kCmpNone;
-
-template<typename T>
-struct abs_diff_t {
-    using type = T;
-};
-
-template<>
-struct abs_diff_t<half> {
-    using type = float;
-};
-
-template<typename T>
-struct abs_diff: public thrust::unary_function<thrust::tuple<T, T>, typename abs_diff_t<T>::type> {
-    __host__ __device__ float operator()(thrust::tuple<T, T> x) const
-    {
-        using R = typename abs_diff_t<T>::type;
-        auto r  = R(thrust::get<0>(x)) - R(thrust::get<1>(x));
-        return r < R(0) ? -r : r;
-    }
-};
-
-template<typename T>
-void CheckNan(const T* ptr, size_t size, std::string key, cudaStream_t stream)
-{
-    std::vector<T> h_data(size);
-    cudaMemcpyAsync(h_data.data(), ptr, sizeof(T) * size, cudaMemcpyDefault, stream);
-
-    check_cuda_error(cudaStreamSynchronize(stream));
-
-    size_t nan_cnt = 0;
-    for (const auto& x : h_data) {
-        nan_cnt += std::isnan(static_cast<float>(x));
-    }
-    if (nan_cnt) {
-        std::cerr << key << ": NaN count " << nan_cnt << "\n";
-    }
-}
-
-template<typename T>
-void CmpRead(T* ptr, size_t size, std::string key, cudaStream_t stream)
-{
-    // wait for b
-    check_cuda_error(cudaStreamSynchronize(stream));
-    // read a from file
-    thrust::host_vector<T> h_a(size);
-    {
-        const auto    filename = "tmp/" + key + ".cmp";
-        std::ifstream ifs(filename, std::ios::binary);
-        if (!ifs.is_open()) {
-            std::cerr << key << ": failed to open " + filename << "\n";
-            return;
-        }
-        ifs.seekg(0, ifs.end);
-        const auto actual_size_in_bytes = ifs.tellg();
-        ifs.seekg(0, ifs.beg);
-        const auto expect_size_in_bytes = sizeof(T) * size;
-        if (actual_size_in_bytes != expect_size_in_bytes) {
-            std::cerr << key << ": file size in bytes mismatch, expect " << expect_size_in_bytes << ", got "
-                      << actual_size_in_bytes << "\n";
-            return;
-        }
-        ifs.read((char*)h_a.data(), sizeof(T) * h_a.size());
-    }
-    // copy a to device
-    thrust::device_vector<T> a = h_a;
-    // create abs(a - b) iterator
-    thrust::device_ptr<T> dev_ptr(ptr);
-    auto                  zip_iter       = thrust::make_zip_iterator(thrust::make_tuple(a.begin(), dev_ptr));
-    auto                  transform_iter = thrust::make_transform_iterator(zip_iter, abs_diff<T>{});
-    // sum(abs(a - b))
-    auto asum = thrust::reduce(thrust::device, transform_iter, transform_iter + size);
-    std::cerr << key << ": " << asum << " " << asum / size << "\n";
-}
-
-template<typename T>
-void CmpWrite(T* ptr, size_t size, std::string key, cudaStream_t stream)
-{
-    std::vector<T> a(size);
-    // copy a to host
-    check_cuda_error(cudaMemcpyAsync(a.data(), ptr, sizeof(T) * size, cudaMemcpyDefault, stream));
-    check_cuda_error(cudaStreamSynchronize(stream));
-    // write to file
-    {
-        std::ofstream ofs("tmp/" + key + ".cmp", std::ios::binary);
-        ofs.write((char*)a.data(), sizeof(T) * a.size());
-    }
-}
-
-template<typename T>
-void Compare(T* ptr, size_t size, std::string key, CmpMode mode, cudaStream_t stream)
-{
-    // std::cerr << "Comparing " << key << "\n";
-    if (mode == kCmpRead) {
-        CmpRead(ptr, size, key, stream);
-    }
-    else if (mode == kCmpWrite) {
-        CmpWrite(ptr, size, key, stream);
-    }
-    else {
-        // kCmpNone
-    }
-}
-
-template void Compare(int* ptr, size_t size, std::string key, CmpMode mode, cudaStream_t stream);
-template void Compare(float* ptr, size_t size, std::string key, CmpMode mode, cudaStream_t stream);
-template void Compare(half* ptr, size_t size, std::string key, CmpMode mode, cudaStream_t stream);
-
-template void CheckNan(const float* ptr, size_t size, std::string key, cudaStream_t stream);
-template void CheckNan(const half* ptr, size_t size, std::string key, cudaStream_t stream);
-
-std::string format(const std::pair<std::string, Tensor>& p)
-{
-    std::stringstream ss;
-    ss << p.first << " [";
-    bool first = true;
-    for (const auto& x : p.second.shape) {
-        ss << (first ? "" : ", ") << x;
-        first = false;
-    }
-    ss << "]";
-    return ss.str();
-}
-
-size_t curandStateGetSize()
-{
-    return sizeof(curandState_t);
-}
-
-bool isDebug()
-{
-    static const bool is_debug = [] {
-        const auto level = std::getenv("TM_DEBUG_LEVEL");
-        if (level && level == std::string("DEBUG")) {
-            return true;
-        }
-        return false;
-    }();
-    return is_debug;
-}
-
-}  // namespace turbomind
diff --git a/src/turbomind/models/llama/llama_utils.h b/src/turbomind/models/llama/llama_utils.h
deleted file mode 100644
index 05c10be80ba0561034321e2bdb0176ad836ff443..0000000000000000000000000000000000000000
--- a/src/turbomind/models/llama/llama_utils.h
+++ /dev/null
@@ -1,69 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved.
-
-#pragma once
-#include "src/turbomind/utils/Tensor.h"
-#include <cuda_runtime.h>
-#include <sstream>
-#include <string>
-#include <vector>
-
-namespace turbomind {
-
-enum QuantPolicy
-{
-    kNone = 0x00,
-    // reserve 0x01 and 0x02 for backward compatibility
-    kReserve1 = 0x01,
-    kReserve2 = 0x02,
-    // quantize cache kv
-    kCacheKVInt8 = 0x04,
-};
-
-enum CmpMode
-{
-    kCmpNone,
-    kCmpRead,
-    kCmpWrite,
-};
-
-extern CmpMode compare_mode;
-
-template<typename T>
-void Compare(T* ptr, size_t size, std::string key, CmpMode mode, cudaStream_t stream);
-
-template<typename T>
-void CheckNan(const T* ptr, size_t size, std::string key, cudaStream_t stream);
-
-namespace detail {
-
-template<typename T>
-std::string to_string(T x)
-{
-    return std::to_string(x);
-}
-
-inline std::string to_string(std::string x)
-{
-    return x;
-}
-
-}  // namespace detail
-
-template<typename... Args>
-std::string Concat(std::string key, Args&&... args)
-{
-    std::vector<std::string> args_str{detail::to_string((Args &&) args)...};
-    for (const auto& s : args_str) {
-        key.append("_");
-        key.append(s);
-    }
-    return key;
-}
-
-std::string format(const std::pair<std::string, Tensor>& p);
-
-size_t curandStateGetSize();
-
-bool isDebug();
-
-}  // namespace turbomind
diff --git a/src/turbomind/python/CMakeLists.txt b/src/turbomind/python/CMakeLists.txt
deleted file mode 100644
index 9e007e95f8d01e1b0d7f73ba2cf76d68f2ac6c91..0000000000000000000000000000000000000000
--- a/src/turbomind/python/CMakeLists.txt
+++ /dev/null
@@ -1,22 +0,0 @@
-# Copyright (c) OpenMMLab. All rights reserved.
-
-cmake_minimum_required(VERSION 3.8)
-project(_turbomind)
-
-find_package(pybind11 CONFIG)
-if(NOT pybind11_FOUND)
-    execute_process(COMMAND "pybind11-config" "--cmakedir"
-                    RESULT_VARIABLE _COMMAND_SUCCESS
-                    OUTPUT_VARIABLE pybind11_DIR
-                    OUTPUT_STRIP_TRAILING_WHITESPACE)
-    find_package(pybind11 CONFIG)
-endif()
-
-pybind11_add_module(${PROJECT_NAME} bind.cpp)
-target_link_libraries(${PROJECT_NAME} PRIVATE TransformerTritonBackend
-    LlamaTritonBackend)
-target_compile_features(${PROJECT_NAME} PRIVATE cxx_std_14)
-
-set_target_properties(${PROJECT_NAME} PROPERTIES
-        BUILD_RPATH "\$ORIGIN"
-	INSTALL_RPATH "\$ORIGIN;\$ORIGIN/../../nvidia/nccl/lib/")
diff --git a/src/turbomind/python/bind.cpp b/src/turbomind/python/bind.cpp
deleted file mode 100644
index dc454878fc0e6b4f83a45dd4c72eefb3488bf7a6..0000000000000000000000000000000000000000
--- a/src/turbomind/python/bind.cpp
+++ /dev/null
@@ -1,467 +0,0 @@
-// #include "src/turbomind/kernels/gemm_s_f16/format.h"
-#include "src/turbomind/python/dlpack.h"
-#include "src/turbomind/triton_backend/llama/LlamaTritonModel.h"
-#include "src/turbomind/triton_backend/transformer_triton_backend.hpp"
-#include "src/turbomind/utils/cuda_utils.h"
-#include "src/turbomind/utils/nccl_utils.h"
-#include <cuda_runtime.h>
-#include <memory>
-#include <pybind11/functional.h>
-#include <pybind11/pybind11.h>
-#include <pybind11/pytypes.h>
-#include <pybind11/stl.h>
-#include <pybind11/stl_bind.h>
-
-namespace py = pybind11;
-namespace ft = turbomind;
-using namespace pybind11::literals;
-
-// prepare to bind container
-using TensorVector = std::vector<triton::Tensor>;
-PYBIND11_MAKE_OPAQUE(TensorVector);
-using TensorMap = std::unordered_map<std::string, triton::Tensor>;
-PYBIND11_MAKE_OPAQUE(TensorMap);
-static const char kDlTensorCapsuleName[] = "dltensor";
-
-template<typename T>
-std::shared_ptr<T> make_shared_nodel(T data)
-{
-    return std::shared_ptr<T>(&data, [](T*) {});
-}
-
-DLDevice getDLDevice(triton::Tensor& tensor)
-{
-    int device_id = 0;
-    if (tensor.where == triton::MEMORY_GPU) {
-        cudaPointerAttributes ptr_attr;
-        cudaPointerGetAttributes(&ptr_attr, tensor.data);
-        device_id = ptr_attr.device;
-    }
-
-    DLDevice device{kDLCPU, device_id};
-
-    switch (tensor.where) {
-        case triton::MEMORY_CPU:
-            device.device_type = DLDeviceType::kDLCPU;
-            break;
-        case triton::MEMORY_CPU_PINNED:
-            device.device_type = DLDeviceType::kDLCUDAHost;
-        case triton::MEMORY_GPU:
-            // device.device_type = DLDeviceType::kDLCUDA;
-            device.device_type = DLDeviceType::kDLROCM;
-            break;
-        default:
-            break;
-    }
-
-    return device;
-}
-
-std::unique_ptr<DLManagedTensor> TritonTensorToDLManagedTensor(triton::Tensor& tensor)
-{
-    DLDevice device = getDLDevice(tensor);
-
-    DLDataType data_type{0, 0, 1};
-    switch (tensor.type) {
-        case triton::TYPE_BOOL:
-            data_type.code = DLDataTypeCode::kDLBool;
-            data_type.bits = 8;
-            break;
-        case triton::TYPE_UINT8:
-            data_type.code = DLDataTypeCode::kDLUInt;
-            data_type.bits = 8;
-            break;
-        case triton::TYPE_UINT16:
-            data_type.code = DLDataTypeCode::kDLUInt;
-            data_type.bits = 16;
-            break;
-        case triton::TYPE_UINT32:
-            data_type.code = DLDataTypeCode::kDLUInt;
-            data_type.bits = 32;
-            break;
-        case triton::TYPE_UINT64:
-            data_type.code = DLDataTypeCode::kDLUInt;
-            data_type.bits = 64;
-            break;
-        case triton::TYPE_INT8:
-        case triton::TYPE_BYTES:
-            data_type.code = DLDataTypeCode::kDLInt;
-            data_type.bits = 8;
-            break;
-        case triton::TYPE_INT16:
-            data_type.code = DLDataTypeCode::kDLInt;
-            data_type.bits = 16;
-            break;
-        case triton::TYPE_INT32:
-            data_type.code = DLDataTypeCode::kDLInt;
-            data_type.bits = 32;
-            break;
-        case triton::TYPE_INT64:
-            data_type.code = DLDataTypeCode::kDLInt;
-            data_type.bits = 64;
-            break;
-        case triton::TYPE_FP16:
-            data_type.code = DLDataTypeCode::kDLFloat;
-            data_type.bits = 16;
-            break;
-        case triton::TYPE_FP32:
-            data_type.code = DLDataTypeCode::kDLFloat;
-            data_type.bits = 32;
-            break;
-        case triton::TYPE_FP64:
-            data_type.code = DLDataTypeCode::kDLFloat;
-            data_type.bits = 64;
-            break;
-        case triton::TYPE_BF16:
-            data_type.code = DLDataTypeCode::kDLBfloat;
-            data_type.bits = 16;
-            break;
-        default:
-            break;
-    }
-    DLTensor dl_tensor{const_cast<void*>(tensor.data),
-                       device,
-                       (int32_t)(tensor.shape.size()),
-                       data_type,
-                       reinterpret_cast<int64_t*>(const_cast<size_t*>(tensor.shape.data())),
-                       (int64_t*)(nullptr),
-                       0};
-
-    return std::unique_ptr<DLManagedTensor>(new DLManagedTensor{dl_tensor, nullptr, [](DLManagedTensor*) {}});
-}
-
-triton::MemoryType getMemoryType(DLDevice device)
-{
-    switch (device.device_type) {
-        case DLDeviceType::kDLCPU:
-            return triton::MemoryType::MEMORY_CPU;
-        case DLDeviceType::kDLCUDAHost:
-            return triton::MemoryType::MEMORY_CPU_PINNED;
-        case DLDeviceType::kDLCUDA:
-            return triton::MemoryType::MEMORY_GPU;
-        default:
-            return triton::MemoryType::MEMORY_CPU;
-    }
-}
-
-triton::DataType getDataType(DLDataType data_type)
-{
-    switch (data_type.code) {
-        case DLDataTypeCode::kDLUInt:
-            switch (data_type.bits) {
-                case 8:
-                    return triton::TYPE_UINT8;
-                case 16:
-                    return triton::TYPE_UINT16;
-                case 32:
-                    return triton::TYPE_UINT32;
-                case 64:
-                    return triton::TYPE_UINT64;
-                default:
-                    return triton::TYPE_INVALID;
-            }
-            break;
-        case DLDataTypeCode::kDLInt:
-            switch (data_type.bits) {
-                case 8:
-                    return triton::TYPE_INT8;
-                case 16:
-                    return triton::TYPE_INT16;
-                case 32:
-                    return triton::TYPE_INT32;
-                case 64:
-                    return triton::TYPE_INT64;
-                default:
-                    return triton::TYPE_INVALID;
-            }
-            break;
-        case DLDataTypeCode::kDLFloat:
-            switch (data_type.bits) {
-                case 16:
-                    return triton::TYPE_FP16;
-                case 32:
-                    return triton::TYPE_FP32;
-                case 64:
-                    return triton::TYPE_FP64;
-                default:
-                    return triton::TYPE_INVALID;
-            }
-            break;
-        case DLDataTypeCode::kDLBfloat:
-            switch (data_type.bits) {
-                case 16:
-                    return triton::TYPE_BF16;
-                default:
-                    return triton::TYPE_INVALID;
-            }
-            break;
-        case DLDataTypeCode::kDLBool:
-            return triton::TYPE_BOOL;
-        default:
-            return triton::TYPE_INVALID;
-    }
-}
-
-std::shared_ptr<triton::Tensor> DLManagedTensorToTritonTensor(DLManagedTensor* tensor)
-{
-    auto& dl_tensor = tensor->dl_tensor;
-    auto  where     = getMemoryType(dl_tensor.device);
-    auto  dtype     = getDataType(dl_tensor.dtype);
-    assert(dl_tensor.ndim > 0);
-    std::vector<size_t> shape(dl_tensor.shape, dl_tensor.shape + dl_tensor.ndim);
-    auto                data = dl_tensor.data;
-
-    return std::make_shared<triton::Tensor>(where, dtype, shape, data);
-}
-
-DLTensor GetDLTensor(py::object obj)
-{
-    py::capsule      cap  = obj.attr("__dlpack__")();
-    DLManagedTensor* dlmt = static_cast<DLManagedTensor*>(PyCapsule_GetPointer(cap.ptr(), kDlTensorCapsuleName));
-    return dlmt->dl_tensor;
-}
-
-PYBIND11_MODULE(_turbomind, m)
-{
-    // nccl param
-    py::class_<ft::NcclParam>(m, "NcclParam")
-        .def(py::init<int, int>(), "rank"_a = 0, "world_size"_a = 1)
-        .def("__str__", &ft::NcclParam::toString);
-
-    // custom comm
-    py::class_<ft::AbstractCustomComm, std::shared_ptr<ft::AbstractCustomComm>>(m, "AbstractCustomComm");
-
-    // instance comm
-    py::class_<ft::AbstractInstanceComm>(m, "AbstractInstanceComm");
-
-    // data type
-    py::enum_<triton::DataType>(m, "DataType")
-        .value("TYPE_INVALID", triton::DataType::TYPE_INVALID)
-        .value("TYPE_BOOL", triton::DataType::TYPE_BOOL)
-        .value("TYPE_UINT8", triton::DataType::TYPE_UINT8)
-        .value("TYPE_UINT16", triton::DataType::TYPE_UINT16)
-        .value("TYPE_UINT32", triton::DataType::TYPE_UINT32)
-        .value("TYPE_UINT64", triton::DataType::TYPE_UINT64)
-        .value("TYPE_INT8", triton::DataType::TYPE_INT8)
-        .value("TYPE_INT16", triton::DataType::TYPE_INT16)
-        .value("TYPE_INT32", triton::DataType::TYPE_INT32)
-        .value("TYPE_INT64", triton::DataType::TYPE_INT64)
-        .value("TYPE_FP16", triton::DataType::TYPE_FP16)
-        .value("TYPE_FP32", triton::DataType::TYPE_FP32)
-        .value("TYPE_FP64", triton::DataType::TYPE_FP64)
-        .value("TYPE_BYTES", triton::DataType::TYPE_BYTES)
-        .value("TYPE_BF16", triton::DataType::TYPE_BF16);
-
-    // memory type
-    py::enum_<triton::MemoryType>(m, "MemoryType")
-        .value("MEMORY_CPU", triton::MemoryType::MEMORY_CPU)
-        .value("MEMORY_CPU_PINNED", triton::MemoryType::MEMORY_CPU_PINNED)
-        .value("MEMORY_GPU", triton::MemoryType::MEMORY_GPU);
-
-    // tensor
-    py::class_<triton::Tensor, std::shared_ptr<triton::Tensor>>(m, "Tensor")
-        .def_readonly("where", &triton::Tensor::where)
-        .def_readonly("type", &triton::Tensor::type)
-        .def_readonly("shape", &triton::Tensor::shape)
-        .def_readonly("data", &triton::Tensor::data)
-        .def(py::init([](const triton::MemoryType   where,
-                         const triton::DataType     type,
-                         const std::vector<size_t>& shape,
-                         const long                 data) {
-            auto data_ptr = reinterpret_cast<void*>(data);
-            return new triton::Tensor(where, type, shape, data_ptr);
-        }))
-        .def(
-            "view",
-            [](triton::Tensor* self, triton::DataType new_type) {
-                return new triton::Tensor(self->where, new_type, self->shape, self->data);
-            },
-            "new_type"_a)
-        .def(
-            "view",
-            [](triton::Tensor* self, std::vector<size_t> new_shape) {
-                return new triton::Tensor(self->where, self->type, new_shape, self->data);
-            },
-            "new_shape"_a)
-        .def(
-            "__dlpack__",
-            [](triton::Tensor* self, long stream) {
-                auto tensor_ptr = TritonTensorToDLManagedTensor(*self);
-                return new py::capsule(tensor_ptr.release(), kDlTensorCapsuleName, [](PyObject* obj) {
-                    DLManagedTensor* dlmt =
-                        static_cast<DLManagedTensor*>(PyCapsule_GetPointer(obj, kDlTensorCapsuleName));
-                    if (dlmt) {
-                        dlmt->deleter(dlmt);
-                    }
-                    else {
-                        // The tensor has been deleted. Clear any error from
-                        // PyCapsule_GetPointer.
-                        PyErr_Clear();
-                    }
-                });
-            },
-            "stream"_a = 0)
-        .def("__dlpack_device__", [](triton::Tensor* self) {
-            auto device = getDLDevice(*self);
-            return std::tuple<int, int>(int(device.device_type), device.device_id);
-        });
-    m.def(
-        "from_dlpack",
-        [](py::object obj) {
-            py::capsule      cap = obj.attr("__dlpack__")();
-            DLManagedTensor* dlmt =
-                static_cast<DLManagedTensor*>(PyCapsule_GetPointer(cap.ptr(), kDlTensorCapsuleName));
-            auto ret = DLManagedTensorToTritonTensor(dlmt);
-            return ret;
-        },
-        "dl_managed_tensor"_a);
-
-    // transformer model instance
-    py::bind_map<TensorMap, std::shared_ptr<TensorMap>>(m, "TensorMap");
-    py::class_<AbstractTransformerModelInstance>(m, "AbstractTransformerModelInstance")
-        .def(
-            "forward",
-            [](AbstractTransformerModelInstance* model,
-               std::shared_ptr<TensorMap>        input_tensors,
-               ft::AbstractInstanceComm*         inst_comm) { return model->forward(input_tensors, inst_comm); },
-            py::call_guard<py::gil_scoped_release>(),
-            "input_tensors"_a,
-            "inst_comm"_a = nullptr)
-        .def(
-            "register_callback",
-            [](AbstractTransformerModelInstance* self, triton_stream_cb_t cb, py::object ctx) {
-                self->registerCallback(cb, ctx.ptr());
-            },
-            "callback"_a,
-            "context"_a = nullptr)
-        .def("unregister_callback", &AbstractTransformerModelInstance::unRegisterCallback);
-
-    // transformer model
-    py::class_<AbstractTransformerModel, std::shared_ptr<AbstractTransformerModel>>(m, "AbstractTransformerModel")
-        .def_static(
-            "create_llama_model",
-            [](std::string model_dir,
-               size_t      tensor_para_size,
-               size_t      pipeline_para_size,
-               int         enable_custom_all_reduce,
-               std::string data_type) -> std::shared_ptr<AbstractTransformerModel> {
-                auto gil_control = [state = PyGILState_STATE{}](int op) mutable {
-                    if (op) {
-                        state = PyGILState_Ensure();
-                    }
-                    else {
-                        PyGILState_Release(state);
-                    }
-                };
-                if (data_type == "half" || data_type == "fp16" || data_type == "int4") {
-                    auto model = std::make_shared<LlamaTritonModel<half>>(
-                        tensor_para_size, pipeline_para_size, enable_custom_all_reduce, model_dir);
-                    model->setFfiLock(gil_control);
-                    return model;
-                }
-                else {
-                    auto model = std::make_shared<LlamaTritonModel<float>>(
-                        tensor_para_size, pipeline_para_size, enable_custom_all_reduce, model_dir);
-                    model->setFfiLock(gil_control);
-                    return model;
-                }
-            },
-            "model_dir"_a,
-            "tensor_para_size"_a         = 1,
-            "pipeline_para_size"_a       = 1,
-            "enable_custom_all_reduce"_a = 0,
-            "data_type"_a                = "half")
-        .def("create_nccl_params",
-             &AbstractTransformerModel::createNcclParams,
-             "node_id"_a,
-             "device_id_start"_a = 0,
-             "multi_node"_a      = false)
-        .def(
-            "create_custom_comms",
-            [](AbstractTransformerModel* model, int world_size) {
-                std::vector<std::shared_ptr<ft::AbstractCustomComm>> ret;
-                model->createCustomComms(&ret, world_size);
-                return ret;
-            },
-            "world_size"_a)
-        .def("create_instance_comm", &AbstractTransformerModel::createInstanceComm, "size"_a)
-        .def(
-            "create_model_instance",
-            [](AbstractTransformerModel*                                         model,
-               int                                                               deviceId,
-               int                                                               rank,
-               long                                                              stream_id,
-               std::pair<std::vector<ft::NcclParam>, std::vector<ft::NcclParam>> nccl_params,
-               std::shared_ptr<ft::AbstractCustomComm>                           custom_all_reduce_comm = nullptr) {
-                cudaStream_t stream = reinterpret_cast<cudaStream_t>(stream_id);
-                return model->createModelInstance(deviceId, rank, stream, nccl_params, custom_all_reduce_comm);
-            },
-            py::call_guard<py::gil_scoped_release>(),
-            "device_id"_a,
-            "rank"_a,
-            "stream"_a,
-            "nccl_params"_a,
-            "custom_all_reduce_comm"_a = nullptr)
-        .def("create_shared_weights",
-             &AbstractTransformerModel::createSharedWeights,
-             py::call_guard<py::gil_scoped_release>(),
-             "device_id"_a,
-             "rank"_a)
-        .def("__str__", &AbstractTransformerModel::toString)
-        .def("__repr__", &AbstractTransformerModel::toString)
-        .def("get_tensor_para_size", &AbstractTransformerModel::getTensorParaSize)
-        .def("get_pipeline_para_size", &AbstractTransformerModel::getPipelineParaSize);
-
-    m.def("transpose_qk_s4_k_m8", [](py::object src, py::object dst, int m, int k, int size_per_head) {
-        auto src_tensor = GetDLTensor(src);
-        auto dst_tensor = GetDLTensor(dst);
-
-        // turbomind::transpose_qk_s4_k_m8_hf(
-        //     (uint32_t*)dst_tensor.data, (const uint32_t*)src_tensor.data, m, k, size_per_head, nullptr);
-    });
-
-    m.def("fuse_w1_w3_s4_k_m8", [](py::object src, py::object dst, int m, int k) {
-        auto src_tensor = GetDLTensor(src);
-        auto dst_tensor = GetDLTensor(dst);
-
-        // turbomind::fuse_w1_w3_s4_k_m8((uint32_t*)dst_tensor.data, (const uint32_t*)src_tensor.data, m, k, nullptr);
-    });
-
-    m.def("convert_s4_k_m8",
-          [](py::object A_dst,
-             py::object Q_dst,
-             py::object ws,
-             py::object A_src,
-             py::object scales,
-             py::object qzeros,
-             int        m,
-             int        k,
-             int        group_size) {
-              auto a_dst = GetDLTensor(A_dst);
-              auto q_dst = GetDLTensor(Q_dst);
-              auto w     = GetDLTensor(ws);
-              auto a_src = GetDLTensor(A_src);
-              auto s     = GetDLTensor(scales);
-              auto qz    = GetDLTensor(qzeros);
-
-            //   turbomind::convert_s4_k_m8((uint32_t*)a_dst.data,
-            //                              (half2*)q_dst.data,
-            //                              (half*)w.data,
-            //                              (const uint32_t*)a_src.data,
-            //                              (const half*)s.data,
-            //                              (const uint32_t*)qz.data,
-            //                              m,
-            //                              k,
-            //                              group_size,
-            //                              nullptr);
-          });
-
-    m.def("dequantize_s4", [](py::object src, py::object dst) {
-        auto src_tensor = GetDLTensor(src);
-        auto dst_tensor = GetDLTensor(dst);
-        auto src_count  = std::accumulate(src_tensor.shape, src_tensor.shape + src_tensor.ndim, size_t{1});
-        auto dst_count  = std::accumulate(dst_tensor.shape, dst_tensor.shape + dst_tensor.ndim, size_t{1});
-        turbomind::FT_CHECK(src_count * 8 == dst_count);
-        // turbomind::dequantize_s4((uint4*)dst_tensor.data, (uint32_t*)src_tensor.data, src_count, nullptr);
-    });
-}
diff --git a/src/turbomind/python/dlpack.h b/src/turbomind/python/dlpack.h
deleted file mode 100644
index f308af2d75c337aba4243910c04df543c585a5b9..0000000000000000000000000000000000000000
--- a/src/turbomind/python/dlpack.h
+++ /dev/null
@@ -1,322 +0,0 @@
-/*!
- *  Copyright (c) 2017 by Contributors
- * \file dlpack.h
- * \brief The common header of DLPack.
- */
-#ifndef DLPACK_DLPACK_H_
-#define DLPACK_DLPACK_H_
-
-/**
- * \brief Compatibility with C++
- */
-#ifdef __cplusplus
-#define DLPACK_EXTERN_C extern "C"
-#else
-#define DLPACK_EXTERN_C
-#endif
-
-/*! \brief The current major version of dlpack */
-#define DLPACK_MAJOR_VERSION 1
-
-/*! \brief The current minor version of dlpack */
-#define DLPACK_MINOR_VERSION 0
-
-/*! \brief DLPACK_DLL prefix for windows */
-#ifdef _WIN32
-#ifdef DLPACK_EXPORTS
-#define DLPACK_DLL __declspec(dllexport)
-#else
-#define DLPACK_DLL __declspec(dllimport)
-#endif
-#else
-#define DLPACK_DLL
-#endif
-
-#include <stddef.h>
-#include <stdint.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/*!
- * \brief The DLPack version.
- *
- * A change in major version indicates that we have changed the
- * data layout of the ABI - DLManagedTensorVersioned.
- *
- * A change in minor version indicates that we have added new
- * code, such as a new device type, but the ABI is kept the same.
- *
- * If an obtained DLPack tensor has a major version that disagrees
- * with the version number specified in this header file
- * (i.e. major != DLPACK_MAJOR_VERSION), the consumer must call the deleter
- * (and it is safe to do so). It is not safe to access any other fields
- * as the memory layout will have changed.
- *
- * In the case of a minor version mismatch, the tensor can be safely used as
- * long as the consumer knows how to interpret all fields. Minor version
- * updates indicate the addition of enumeration values.
- */
-typedef struct {
-    /*! \brief DLPack major version. */
-    uint32_t major;
-    /*! \brief DLPack minor version. */
-    uint32_t minor;
-} DLPackVersion;
-
-/*!
- * \brief The device type in DLDevice.
- */
-#ifdef __cplusplus
-typedef enum: int32_t
-{
-#else
-typedef enum
-{
-#endif
-    /*! \brief CPU device */
-    kDLCPU = 1,
-    /*! \brief CUDA GPU device */
-    kDLCUDA = 2,
-    /*!
-     * \brief Pinned CUDA CPU memory by cudaMallocHost
-     */
-    kDLCUDAHost = 3,
-    /*! \brief OpenCL devices. */
-    kDLOpenCL = 4,
-    /*! \brief Vulkan buffer for next generation graphics. */
-    kDLVulkan = 7,
-    /*! \brief Metal for Apple GPU. */
-    kDLMetal = 8,
-    /*! \brief Verilog simulator buffer */
-    kDLVPI = 9,
-    /*! \brief ROCm GPUs for AMD GPUs */
-    kDLROCM = 10,
-    /*!
-     * \brief Pinned ROCm CPU memory allocated by hipMallocHost
-     */
-    kDLROCMHost = 11,
-    /*!
-     * \brief Reserved extension device type,
-     * used for quickly test extension device
-     * The semantics can differ depending on the implementation.
-     */
-    kDLExtDev = 12,
-    /*!
-     * \brief CUDA managed/unified memory allocated by cudaMallocManaged
-     */
-    kDLCUDAManaged = 13,
-    /*!
-     * \brief Unified shared memory allocated on a oneAPI non-partititioned
-     * device. Call to oneAPI runtime is required to determine the device
-     * type, the USM allocation type and the sycl context it is bound to.
-     *
-     */
-    kDLOneAPI = 14,
-    /*! \brief GPU support for next generation WebGPU standard. */
-    kDLWebGPU = 15,
-    /*! \brief Qualcomm Hexagon DSP */
-    kDLHexagon = 16,
-} DLDeviceType;
-
-/*!
- * \brief A Device for Tensor and operator.
- */
-typedef struct {
-    /*! \brief The device type used in the device. */
-    DLDeviceType device_type;
-    /*!
-     * \brief The device index.
-     * For vanilla CPU memory, pinned memory, or managed memory, this is set to 0.
-     */
-    int32_t device_id;
-} DLDevice;
-
-/*!
- * \brief The type code options DLDataType.
- */
-typedef enum
-{
-    /*! \brief signed integer */
-    kDLInt = 0U,
-    /*! \brief unsigned integer */
-    kDLUInt = 1U,
-    /*! \brief IEEE floating point */
-    kDLFloat = 2U,
-    /*!
-     * \brief Opaque handle type, reserved for testing purposes.
-     * Frameworks need to agree on the handle data type for the exchange to be well-defined.
-     */
-    kDLOpaqueHandle = 3U,
-    /*! \brief bfloat16 */
-    kDLBfloat = 4U,
-    /*!
-     * \brief complex number
-     * (C/C++/Python layout: compact struct per complex number)
-     */
-    kDLComplex = 5U,
-    /*! \brief boolean */
-    kDLBool = 6U,
-} DLDataTypeCode;
-
-/*!
- * \brief The data type the tensor can hold. The data type is assumed to follow the
- * native endian-ness. An explicit error message should be raised when attempting to
- * export an array with non-native endianness
- *
- *  Examples
- *   - float: type_code = 2, bits = 32, lanes = 1
- *   - float4(vectorized 4 float): type_code = 2, bits = 32, lanes = 4
- *   - int8: type_code = 0, bits = 8, lanes = 1
- *   - std::complex<float>: type_code = 5, bits = 64, lanes = 1
- *   - bool: type_code = 6, bits = 8, lanes = 1 (as per common array library convention, the underlying storage size of
- * bool is 8 bits)
- */
-typedef struct {
-    /*!
-     * \brief Type code of base types.
-     * We keep it uint8_t instead of DLDataTypeCode for minimal memory
-     * footprint, but the value should be one of DLDataTypeCode enum values.
-     * */
-    uint8_t code;
-    /*!
-     * \brief Number of bits, common choices are 8, 16, 32.
-     */
-    uint8_t bits;
-    /*! \brief Number of lanes in the type, used for vector types. */
-    uint16_t lanes;
-} DLDataType;
-
-/*!
- * \brief Plain C Tensor object, does not manage memory.
- */
-typedef struct {
-    /*!
-     * \brief The data pointer points to the allocated data. This will be CUDA
-     * device pointer or cl_mem handle in OpenCL. It may be opaque on some device
-     * types. This pointer is always aligned to 256 bytes as in CUDA. The
-     * `byte_offset` field should be used to point to the beginning of the data.
-     *
-     * Note that as of Nov 2021, multiply libraries (CuPy, PyTorch, TensorFlow,
-     * TVM, perhaps others) do not adhere to this 256 byte alignment requirement
-     * on CPU/CUDA/ROCm, and always use `byte_offset=0`.  This must be fixed
-     * (after which this note will be updated); at the moment it is recommended
-     * to not rely on the data pointer being correctly aligned.
-     *
-     * For given DLTensor, the size of memory required to store the contents of
-     * data is calculated as follows:
-     *
-     * \code{.c}
-     * static inline size_t GetDataSize(const DLTensor* t) {
-     *   size_t size = 1;
-     *   for (tvm_index_t i = 0; i < t->ndim; ++i) {
-     *     size *= t->shape[i];
-     *   }
-     *   size *= (t->dtype.bits * t->dtype.lanes + 7) / 8;
-     *   return size;
-     * }
-     * \endcode
-     */
-    void* data;
-    /*! \brief The device of the tensor */
-    DLDevice device;
-    /*! \brief Number of dimensions */
-    int32_t ndim;
-    /*! \brief The data type of the pointer*/
-    DLDataType dtype;
-    /*! \brief The shape of the tensor */
-    int64_t* shape;
-    /*!
-     * \brief strides of the tensor (in number of elements, not bytes)
-     *  can be NULL, indicating tensor is compact and row-majored.
-     */
-    int64_t* strides;
-    /*! \brief The offset in bytes to the beginning pointer to data */
-    uint64_t byte_offset;
-} DLTensor;
-
-/*!
- * \brief C Tensor object, manage memory of DLTensor. This data structure is
- *  intended to facilitate the borrowing of DLTensor by another framework. It is
- *  not meant to transfer the tensor. When the borrowing framework doesn't need
- *  the tensor, it should call the deleter to notify the host that the resource
- *  is no longer needed.
- *
- * \note This data structure is used as Legacy DLManagedTensor
- *       in DLPack exchange and is deprecated after DLPack v0.8
- *       Use DLManagedTensorVersioned instead.
- *       This data structure may get renamed or deleted in future versions.
- *
- * \sa DLManagedTensorVersioned
- */
-typedef struct DLManagedTensor {
-    /*! \brief DLTensor which is being memory managed */
-    DLTensor dl_tensor;
-    /*! \brief the context of the original host framework of DLManagedTensor in
-     *   which DLManagedTensor is used in the framework. It can also be NULL.
-     */
-    void* manager_ctx;
-    /*!
-     * \brief Destructor - this should be called
-     * to destruct the manager_ctx  which backs the DLManagedTensor. It can be
-     * NULL if there is no way for the caller to provide a reasonable destructor.
-     * The destructors deletes the argument self as well.
-     */
-    void (*deleter)(struct DLManagedTensor* self);
-} DLManagedTensor;
-
-// bit masks used in in the DLManagedTensorVersioned
-
-/*! \brief bit mask to indicate that the tensor is read only. */
-#define DLPACK_FLAG_BITMASK_READ_ONLY (1UL << 0UL)
-
-/*!
- * \brief A versioned and managed C Tensor object, manage memory of DLTensor.
- *
- * This data structure is intended to facilitate the borrowing of DLTensor by
- * another framework. It is not meant to transfer the tensor. When the borrowing
- * framework doesn't need the tensor, it should call the deleter to notify the
- * host that the resource is no longer needed.
- *
- * \note This is the current standard DLPack exchange data structure.
- */
-struct DLManagedTensorVersioned {
-    /*!
-     * \brief The API and ABI version of the current managed Tensor
-     */
-    DLPackVersion version;
-    /*!
-     * \brief the context of the original host framework.
-     *
-     * Stores DLManagedTensorVersioned is used in the
-     * framework. It can also be NULL.
-     */
-    void* manager_ctx;
-    /*!
-     * \brief Destructor.
-     *
-     * This should be called to destruct manager_ctx which holds the DLManagedTensorVersioned.
-     * It can be NULL if there is no way for the caller to provide a reasonable
-     * destructor. The destructors deletes the argument self as well.
-     */
-    void (*deleter)(struct DLManagedTensorVersioned* self);
-    /*!
-     * \brief Additional bitmask flags information about the tensor.
-     *
-     * By default the flags should be set to 0.
-     *
-     * \note Future ABI changes should keep everything until this field
-     *       stable, to ensure that deleter can be correctly called.
-     *
-     * \sa DLPACK_FLAG_BITMASK_READ_ONLY
-     */
-    uint64_t flags;
-    /*! \brief DLTensor which is being memory managed */
-    DLTensor dl_tensor;
-};
-
-#ifdef __cplusplus
-}  // DLPACK_EXTERN_C
-#endif
-#endif  // DLPACK_DLPACK_H_
diff --git a/src/turbomind/triton_backend/CMakeLists.txt b/src/turbomind/triton_backend/CMakeLists.txt
deleted file mode 100644
index 5fe0559aa5a98b03035cd4ab658c685b206c9723..0000000000000000000000000000000000000000
--- a/src/turbomind/triton_backend/CMakeLists.txt
+++ /dev/null
@@ -1,297 +0,0 @@
-# Copyright (c) 2021-2022, NVIDIA CORPORATION. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#cmake_minimum_required (VERSION 3.18)
-cmake_minimum_required (VERSION 3.16)
-
-project(tritonturbomindbackend LANGUAGES C CXX)
-
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC")
-set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -fPIC")
-
-add_library(TransformerTritonBackend STATIC transformer_triton_backend.cpp)
-target_link_libraries(TransformerTritonBackend PUBLIC nccl_utils)
-#set_property(TARGET TransformerTritonBackend PROPERTY POSITION_INDEPENDENT_CODE ON)
-install(TARGETS TransformerTritonBackend DESTINATION ${CMAKE_INSTALL_LIBDIR})
-
-add_subdirectory(llama)
-
-# Needn't build triton backend on windows
-if (MSVC)
-  return ()
-endif()
-
-#
-# Options
-#
-option(TRITON_ENABLE_GPU "Enable GPU support in backend" ON)
-option(TRITON_ENABLE_STATS "Include statistics collections in backend" ON)
-
-set(TRITON_PYTORCH_INCLUDE_PATHS "" CACHE PATH "Paths to Torch includes")
-set(TRITON_PYTORCH_LIB_PATHS "" CACHE PATH "Paths to Torch libraries")
-
-set(TRITON_BACKEND_REPO_TAG "r22.12" CACHE STRING "Tag for triton-inference-server/backend repo")
-set(TRITON_CORE_REPO_TAG "r22.12" CACHE STRING "Tag for triton-inference-server/core repo")
-set(TRITON_COMMON_REPO_TAG "r22.12" CACHE STRING "Tag for triton-inference-server/common repo")
-
-if(NOT CMAKE_BUILD_TYPE)
-  set(CMAKE_BUILD_TYPE Release)
-endif()
-
-set(USE_TRITONSERVER_DATATYPE "ON")
-message("-- Enable USE_TRITONSERVER_DATATYPE")
-
-#
-# Dependencies
-#
-# FetchContent's composability isn't very good. We must include the
-# transitive closure of all repos so that we can override the tag.
-#
-include(FetchContent)
-
-FetchContent_Declare(
-  repo-common
-  URL ../../../3rdparty/common-r22.12
-  #GIT_REPOSITORY https://github.com/triton-inference-server/common.git
-  #GIT_TAG ${TRITON_COMMON_REPO_TAG}
-  #GIT_SHALLOW ON
-)
-FetchContent_Declare(
-  repo-core
-  URL ../../../3rdparty/core-r22.12
-  #GIT_REPOSITORY https://github.com/triton-inference-server/core.git
-  #GIT_TAG ${TRITON_CORE_REPO_TAG}
-  #GIT_SHALLOW ON
-)
-FetchContent_Declare(
-  repo-backend
-  URL ../../../3rdparty/backend-r22.12
-  #GIT_REPOSITORY https://github.com/triton-inference-server/backend.git
-  #GIT_TAG ${TRITON_BACKEND_REPO_TAG}
-  #GIT_SHALLOW ON
-)
-FetchContent_MakeAvailable(repo-common repo-core repo-backend)
-
-#
-# CUDA
-#
-if(${TRITON_ENABLE_GPU})
-  #find_package(CUDAToolkit REQUIRED)
-  find_package(CUDA REQUIRED)
-endif() # TRITON_ENABLE_GPU
-
-#
-# Shared library implementing the Triton Backend API
-#
-configure_file(libtriton_fastertransformer.ldscript libtriton_fastertransformer.ldscript COPYONLY)
-
-add_library(
-  triton-turbomind-backend SHARED
-  libfastertransformer.cc
-)
-
-add_library(
-  TritonTurboMindBackend::triton-turbomind-backend ALIAS triton-turbomind-backend
-)
-
-#find_package(CUDAToolkit REQUIRED)
-find_package(CUDA REQUIRED)
-find_package(CUDA 10.1 REQUIRED)
-if (${CUDA_VERSION} GREATER_EQUAL 11.0)
-  message(STATUS "Add DCUDA11_MODE")
-  add_definitions("-DCUDA11_MODE")
-endif()
-
-set(CUDA_PATH ${CUDA_TOOLKIT_ROOT_DIR})
-
-target_compile_definitions(triton-turbomind-backend PUBLIC
-  USE_TRITONSERVER_DATATYPE)
-
-if (BUILD_MULTI_GPU)
-  target_compile_definitions(triton-turbomind-backend PUBLIC
-    BUILD_MULTI_GPU)
-endif ()
-
-target_include_directories(
-  triton-turbomind-backend
-  PRIVATE
-  ${CMAKE_CURRENT_SOURCE_DIR}/src
-  ${TRITON_PYTORCH_INCLUDE_PATHS}
-  ${Python3_INCLUDE_DIRS}
-  ${repo-ft_SOURCE_DIR}
-  ${repo-ft_SOURCE_DIR}/3rdparty/cutlass/include
-  ${repo-core_SOURCE_DIR}/include
-  )
-
-target_link_directories(
-  triton-turbomind-backend
-  PRIVATE
-  ${CUDA_PATH}/lib64
-  )
-
-target_compile_features(triton-turbomind-backend PRIVATE cxx_std_14)
-
-target_compile_options(
-  triton-turbomind-backend PRIVATE
-  $<$<OR:$<CXX_COMPILER_ID:Clang>,$<CXX_COMPILER_ID:AppleClang>,$<CXX_COMPILER_ID:GNU>>:
-    -Wall -Wextra -Wno-unused-parameter -Wno-type-limits >#-Werror>
-)
-
-if(${TRITON_ENABLE_GPU})
-  target_compile_definitions(
-    triton-turbomind-backend
-    PRIVATE TRITON_ENABLE_GPU=1
-  )
-endif() # TRITON_ENABLE_GPU
-
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC")
-set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -fPIC")
-
-set_target_properties(
-  triton-turbomind-backend
-  PROPERTIES
-#    POSITION_INDEPENDENT_CODE ON
-    POSITION_INDEPENDENT_CODE OFF
-    OUTPUT_NAME triton_turbomind
-    SKIP_BUILD_RPATH TRUE
-    BUILD_WITH_INSTALL_RPATH TRUE
-    INSTALL_RPATH_USE_LINK_PATH FALSE
-    INSTALL_RPATH "$\{ORIGIN\}"
-    LINK_DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/libtriton_fastertransformer.ldscript
-    LINK_FLAGS "-Wl,--no-as-needed,--version-script ${CMAKE_CURRENT_BINARY_DIR}/libtriton_fastertransformer.ldscript"
-)
-
-# Need to turn off unused-but-set-variable due to Torchvision
-# Need to turn off unknown-pragmas due to ATen OpenMP
-set_target_properties(
-  triton-turbomind-backend
-  PROPERTIES COMPILE_FLAGS
-    "-Wno-unknown-pragmas -Wno-unused-but-set-variable"
-)
-
-set(TRITON_PYTORCH_LDFLAGS "")
-FOREACH(p ${TRITON_PYTORCH_LIB_PATHS})
-  set(TRITON_PYTORCH_LDFLAGS ${TRITON_PYTORCH_LDFLAGS} "-L${p}")
-ENDFOREACH(p)
-
-target_link_libraries(
-  triton-turbomind-backend
-  PRIVATE
-    triton-core-serverapi  # from repo-core
-    triton-core-backendapi # from repo-core
-    triton-core-serverstub # from repo-core
-    triton-backend-utils   # from repo-backend
-    transformer-shared     # from repo-ft
-    ${TRITON_PYTORCH_LDFLAGS}
-    -lcublas
-#    -lcublasLt
-    -lcudart
-    -lcurand
-)
-
-if (BUILD_MULTI_GPU)
-  target_compile_definitions(
-    triton-turbomind-backend
-    PUBLIC
-      BUILD_MULTI_GPU
-  )
-  target_include_directories(
-    triton-turbomind-backend
-    PRIVATE
-      ${MPI_INCLUDE_PATH}
-  )
-  target_link_directories(
-    triton-turbomind-backend
-    PRIVATE
-      ${MPI_Libraries}
-      /usr/local/mpi/lib
-  )
-  target_link_libraries(
-    triton-turbomind-backend
-    PRIVATE
-      ${NCCL_LIBRARIES}
-      ${MPI_LIBRARIES}
-  )
-endif()
-
-if(${TRITON_ENABLE_GPU})
-  target_link_libraries(
-    triton-turbomind-backend
-    PRIVATE
-    #  CUDA::cudart
-    cudart
-  )
-endif() # TRITON_ENABLE_GPU
-
-#
-# Install
-#
-include(GNUInstallDirs)
-set(INSTALL_CONFIGDIR ${CMAKE_INSTALL_LIBDIR}/cmake/TurboMindBackend)
-
-install(
-  TARGETS
-    triton-turbomind-backend
-  EXPORT
-    triton-turbomind-backend-targets
-  LIBRARY DESTINATION ${CMAKE_INSTALL_PREFIX}/backends/turbomind
-  ARCHIVE DESTINATION ${CMAKE_INSTALL_PREFIX}/backends/turbomind
-)
-
-install(
-  EXPORT
-    triton-turbomind-backend-targets
-  FILE
-    TritonTurboMindBackendTargets.cmake
-  NAMESPACE
-    TritonTurboMindBackend::
-  DESTINATION
-    ${INSTALL_CONFIGDIR}
-)
-
-include(CMakePackageConfigHelpers)
-configure_package_config_file(
-  ${CMAKE_SOURCE_DIR}/cmake/TritonTurboMindBackendConfig.cmake.in
-  ${CMAKE_CURRENT_BINARY_DIR}/TritonTurboMindBackendConfig.cmake
-  INSTALL_DESTINATION ${INSTALL_CONFIGDIR}
-)
-
-install(
-  FILES
-  ${CMAKE_CURRENT_BINARY_DIR}/TritonTurboMindBackendConfig.cmake
-  DESTINATION ${INSTALL_CONFIGDIR}
-)
-
-#
-# Export from build tree
-#
-export(
-  EXPORT triton-turbomind-backend-targets
-  FILE ${CMAKE_CURRENT_BINARY_DIR}/TritonTurboMindBackendTargets.cmake
-  NAMESPACE TritonTurboMindBackend::
-)
-
-export(PACKAGE TritonTurboMindBackend)
diff --git a/src/turbomind/triton_backend/libfastertransformer.cc b/src/turbomind/triton_backend/libfastertransformer.cc
deleted file mode 100644
index bef458521c844eefd851a8506bd0188b8f9837b5..0000000000000000000000000000000000000000
--- a/src/turbomind/triton_backend/libfastertransformer.cc
+++ /dev/null
@@ -1,1909 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved.
-// Copyright (c) 2021-2022, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// Modified from
-// https://github.com/triton-inference-server/fastertransformer_backend/blob/main/src/libfastertransformer.cc
-
-#include <stdint.h>
-
-#include <exception>
-#include <string>
-#include <thread>
-#include <vector>
-
-#pragma GCC diagnostic push
-// #pragma GCC diagnostic ignored "-Wsign-compare"
-#pragma GCC diagnostic ignored "-Wcast-function-type"
-#pragma warning(push, 0)
-#pragma warning(pop)
-#pragma GCC diagnostic pop
-
-// must include triton libraries first
-#include "triton/backend/backend_common.h"
-#include "triton/backend/backend_input_collector.h"
-#include "triton/backend/backend_memory.h"
-#include "triton/backend/backend_model.h"
-#include "triton/backend/backend_model_instance.h"
-#include "triton/backend/backend_output_responder.h"
-#include "triton/core/tritonbackend.h"
-
-// FT's libraries have dependency with triton's lib
-#include "src/turbomind/macro.h"
-#include "src/turbomind/triton_backend/llama/LlamaTritonModel.h"
-#include "src/turbomind/triton_backend/llama/LlamaTritonModelInstance.h"
-#include "src/turbomind/triton_backend/transformer_triton_backend.hpp"
-#include "src/turbomind/utils/Tensor.h"
-#include "src/turbomind/utils/cuda_bf16_wrapper.h"
-#include "src/turbomind/utils/instance_comm.h"
-#include "src/turbomind/utils/mpi_utils.h"
-#include "src/turbomind/utils/nccl_utils.h"
-
-std::exception_ptr ptr[8];
-
-namespace ft = turbomind;
-
-namespace triton {
-namespace backend {
-namespace turbomind_backend {
-
-#define RESPOND_ALL_AND_RETURN_IF_ERROR(RESPONSES, RESPONSES_COUNT, X)                                                 \
-    do {                                                                                                               \
-        TRITONSERVER_Error* raarie_err__ = (X);                                                                        \
-        if (raarie_err__ != nullptr) {                                                                                 \
-            SendErrorForResponses(RESPONSES, RESPONSES_COUNT, raarie_err__);                                           \
-            return;                                                                                                    \
-        }                                                                                                              \
-    } while (false)
-
-// Cuda Error handling
-TRITONSERVER_Error*
-ConvertCUDAStatusToTritonError(cudaError_t cuda_error, TRITONSERVER_Error_Code code, const char* msg)
-{
-    if (cuda_error != cudaSuccess) {
-        return TRITONSERVER_ErrorNew(code, cudaGetErrorString(cuda_error));
-    }
-    return nullptr;  // success
-}
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-// Ragged Baching
-
-struct RaggedBatchingParams {
-    bool           is_input_ragged      = false;
-    int32_t        max_seq_length       = 0;
-    int32_t        max_elements_per_seq = 0;
-    const int32_t* batch_input_ptr      = nullptr;
-    size_t         batch_intput_size    = 0;
-    size_t         total_input_elements = 0;
-};
-
-using RaggedBatchingParam_Map = std::unordered_map<std::string, RaggedBatchingParams>;
-
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
-//
-// ModelState
-//
-// State associated with a model that is using this backend. An object
-// of this class is created and associated with each
-// TRITONBACKEND_Model.
-//
-class ModelState: public BackendModel {
-public:
-    static TRITONSERVER_Error* Create(TRITONBACKEND_Model* triton_model, ModelState** state);
-    virtual ~ModelState() = default;
-
-    TRITONSERVER_Error* LoadModel(const std::string&                                                 artifact_name,
-                                  const int32_t                                                      node_id,
-                                  const int32_t                                                      device_id,
-                                  const int32_t                                                      device_id_start,
-                                  const int32_t                                                      stream_id,
-                                  std::pair<std::vector<ft::NcclParam>, std::vector<ft::NcclParam>>& nccl_params,
-                                  std::shared_ptr<ft::AbstractCustomComm>            custom_all_reduce_comms,
-                                  std::string*                                       model_path,
-                                  std::unique_ptr<AbstractTransformerModelInstance>* ft_model_instance);
-
-    int GetGpuSize()
-    {
-        return gpu_size;
-    };
-    int GetWorldSize()
-    {
-        return world_size;
-    };
-    int GetParallelSize()
-    {
-        return tp_pp_size;
-    };
-    int GetInstanceId()
-    {
-        return current_model_instance_id++;
-    };
-    int GetInstanceGroupCount()
-    {
-        return instance_group_count;
-    };
-    bool SequenceBatchingEnabled()
-    {
-        return sequence_batching_enabled;
-    };
-    bool DynamicBatchingEnabled()
-    {
-        return dynamic_batching_enabled;
-    };
-    std::shared_ptr<AbstractTransformerModel> GetFtModel()
-    {
-        return ft_model;
-    };
-
-private:
-    ModelState(TRITONBACKEND_Model* triton_model);
-    TRITONSERVER_Error*                       AutoCompleteConfig();
-    std::string                               GetParameter(const char* parameter);
-    int                                       current_model_instance_id = 0;
-    bool                                      sequence_batching_enabled = false;
-    bool                                      dynamic_batching_enabled  = false;
-    int                                       instance_group_count      = 1;
-    std::shared_ptr<AbstractTransformerModel> ft_model;
-    int                                       node_id, gpu_size, world_size, tp_pp_size;
-    std::vector<cudaStream_t>                 streams_;
-
-    std::shared_ptr<AbstractTransformerModel> ModelFactory(common::TritonJson::Value& param,
-                                                           const std::string&         model_filename);
-};
-
-TRITONSERVER_Error* ModelState::Create(TRITONBACKEND_Model* triton_model, ModelState** state)
-{
-    try {
-        *state = new ModelState(triton_model);
-    }
-    catch (const BackendModelException& ex) {
-        RETURN_ERROR_IF_TRUE(ex.err_ == nullptr,
-                             TRITONSERVER_ERROR_INTERNAL,
-                             std::string("unexpected nullptr in BackendModelException"));
-        RETURN_IF_ERROR(ex.err_);
-    }
-
-    // Auto-complete the configuration if requested, or T5-Encoder
-    bool auto_complete_config = false;
-    RETURN_IF_ERROR(TRITONBACKEND_ModelAutoCompleteConfig(triton_model, &auto_complete_config));
-    auto_complete_config |=
-        (*state)->GetParameter("model_type") == "T5-Encoder" || (*state)->GetParameter("model_type") == "bert";
-    if (auto_complete_config) {
-        RETURN_IF_ERROR((*state)->AutoCompleteConfig());
-
-        triton::common::TritonJson::WriteBuffer json_buffer;
-        (*state)->ModelConfig().Write(&json_buffer);
-
-        TRITONSERVER_Message* message;
-        RETURN_IF_ERROR(TRITONSERVER_MessageNewFromSerializedJson(&message, json_buffer.Base(), json_buffer.Size()));
-        RETURN_IF_ERROR(TRITONBACKEND_ModelSetConfig(triton_model, 1 /* config_version */, message));
-    }
-
-    return nullptr;  // success
-}
-
-std::string param_get(common::TritonJson::Value& param, const char* field, const std::string& fallback = "")
-{
-    common::TritonJson::Value key;
-    std::string               value = fallback;
-    param.MemberAsObject(field, &key);
-    key.MemberAsString("string_value", &value);
-    return value;
-}
-
-int param_get_int(common::TritonJson::Value& param, const char* field, int fallback = 0)
-{
-    int ret = fallback;
-    try {
-        ret = std::stoi(param_get(param, field));
-    }
-    catch (std::invalid_argument& ia) {
-        LOG_MESSAGE(TRITONSERVER_LOG_ERROR,
-                    (std::string("Invalid configuration argument '") + field + "': " + ia.what()).c_str());
-    }
-    return ret;
-}
-
-float param_get_float(common::TritonJson::Value& param, const char* field, float fallback = 0.0)
-{
-    float ret = fallback;
-    try {
-        ret = std::stof(param_get(param, field));
-    }
-    catch (std::invalid_argument& ia) {
-        LOG_MESSAGE(TRITONSERVER_LOG_ERROR,
-                    (std::string("Invalid configuration argument '") + field + "': " + ia.what()).c_str());
-    }
-    return ret;
-}
-
-bool param_get_bool(common::TritonJson::Value& param, const char* field, bool fallback = false)
-{
-    return static_cast<bool>(param_get_int(param, field, static_cast<int>(fallback)));
-}
-
-std::shared_ptr<AbstractTransformerModel> ModelState::ModelFactory(common::TritonJson::Value& param,
-                                                                   const std::string&         model_filename)
-{
-    std::shared_ptr<AbstractTransformerModel> ft_model;
-
-    const std::string model_dir = param_get(
-        param, "model_checkpoint_path", JoinPath({RepositoryPath(), std::to_string(Version()), model_filename}));
-    const std::string model_type = param_get(param, "model_type", "GPT");
-    const std::string data_type  = param_get(param, "data_type");
-    const int         tp         = param_get_int(param, "tensor_para_size");
-    const int         pp         = param_get_int(param, "pipeline_para_size");
-    const int         custom_ar  = param_get_int(param, "enable_custom_all_reduce");
-
-    const std::string dt_message = std::string("Invalid configuration argument 'data_type': ") + data_type;
-
-    if (model_type == "Llama") {
-        if (data_type == "fp16") {
-            ft_model = std::make_shared<LlamaTritonModel<half>>(tp, pp, custom_ar, model_dir);
-        }
-        else {
-            ft_model = std::make_shared<LlamaTritonModel<float>>(tp, pp, custom_ar, model_dir);
-        }
-    }
-    else {
-        THROW_IF_BACKEND_MODEL_ERROR(
-            TRITONSERVER_ErrorNew(TRITONSERVER_ERROR_UNSUPPORTED, ("Unknown model \"" + model_type + "\"").c_str()));
-    }
-
-    return ft_model;
-}
-
-ModelState::ModelState(TRITONBACKEND_Model* triton_model): BackendModel(triton_model, true)
-{
-    node_id       = ft::mpi::getCommWorldRank();
-    int num_nodes = ft::mpi::getCommWorldSize();
-
-    triton::common::TritonJson::WriteBuffer buffer;
-    ModelConfig().PrettyWrite(&buffer);
-    LOG_MESSAGE(TRITONSERVER_LOG_VERBOSE, (std::string("model configuration:\n") + buffer.Contents()).c_str());
-
-    common::TritonJson::Value param;
-    model_config_.MemberAsObject("parameters", &param);
-
-    // instance groups
-    triton::common::TritonJson::Value instance_group, instance_obj, instance_group_count_val, instance_group_kind;
-    if (!ModelConfig().Find("instance_group", &instance_group) || instance_group.ArraySize() > 1) {
-        THROW_IF_BACKEND_MODEL_ERROR(
-            TRITONSERVER_ErrorNew(TRITONSERVER_ERROR_UNSUPPORTED, "Only supports one instance group !"));
-    }
-    instance_group.IndexAsObject(0, &instance_obj);
-    instance_obj.Find("count", &instance_group_count_val);
-    instance_obj.Find("kind", &instance_group_kind);
-    std::string instance_group_kind_str;
-    int64_t     instance_group_count_int64 = 1;
-    instance_group_kind.AsString(&instance_group_kind_str);
-    instance_group_count_val.AsInt(&instance_group_count_int64);
-    instance_group_count = (int)instance_group_count_int64;
-    LOG_MESSAGE(
-        TRITONSERVER_LOG_INFO,
-        ("Instance group type: " + instance_group_kind_str + " count: " + std::to_string(instance_group_count_int64))
-            .c_str());
-    if (instance_group_kind_str != "KIND_CPU") {
-        THROW_IF_BACKEND_MODEL_ERROR(
-            TRITONSERVER_ErrorNew(TRITONSERVER_ERROR_UNSUPPORTED, "Instance Group: only KIND_CPU supports!"));
-    }
-
-    // instance group validation
-    bool multi_node_enabled  = num_nodes > 1;
-    tp_pp_size               = param_get_int(param, "tensor_para_size") * param_get_int(param, "pipeline_para_size");
-    gpu_size                 = ft::getDeviceCount();
-    world_size               = gpu_size * num_nodes;
-    int  model_instance_size = num_nodes > 1 ? gpu_size : tp_pp_size;
-    bool multi_model_instance_valid = (multi_node_enabled && tp_pp_size == world_size && instance_group_count == 1)
-                                      || (!multi_node_enabled && gpu_size % tp_pp_size == 0
-                                          && model_instance_size * instance_group_count >= gpu_size);
-
-    printf("num_nodes=%d\n", num_nodes);
-    printf("tp_pp_size=%d\n", tp_pp_size);
-    printf("gpu_size=%d\n", gpu_size);
-    printf("world_size=%d\n", world_size);
-    printf("model_instance_size=%d\n", model_instance_size);
-    if (!multi_model_instance_valid) {
-        THROW_IF_BACKEND_MODEL_ERROR(
-            TRITONSERVER_ErrorNew(TRITONSERVER_ERROR_UNSUPPORTED,
-                                  "1. Number of visible GPUs must be evenly divisble by TP * PP \n"
-                                  "2. Number of visible GPUs must be <= instance count * TP * PP \n"
-                                  "3. Multi-Node Inference only support one model instance \n"));
-    }
-
-    int64_t max_batch_size = 0;
-    model_config_.MemberAsInt("max_batch_size", &max_batch_size);
-
-    // sequence batching
-    triton::common::TritonJson::Value sequence_batching;
-    sequence_batching_enabled         = ModelConfig().Find("sequence_batching", &sequence_batching);
-    std::string sequence_batching_log = sequence_batching_enabled ? "enabled" : "disabled";
-    LOG_MESSAGE(TRITONSERVER_LOG_INFO, (std::string("Sequence Batching: ") + sequence_batching_log).c_str());
-    // if (sequence_batching_enabled && max_batch_size != 1) {
-    //   THROW_IF_BACKEND_MODEL_ERROR(TRITONSERVER_ErrorNew(TRITONSERVER_ERROR_UNSUPPORTED,
-    //     "Sequence Batching for interactive text generation: only supports max
-    //     batch size = 1 currently !"));
-    // }
-
-    // dynamic batching
-    triton::common::TritonJson::Value dynamic_batching;
-    dynamic_batching_enabled         = ModelConfig().Find("dynamic_batching", &dynamic_batching);
-    std::string dynamic_batching_log = dynamic_batching_enabled ? "enabled" : "disabled";
-    LOG_MESSAGE(TRITONSERVER_LOG_INFO, (std::string("Dynamic Batching: ") + dynamic_batching_log).c_str());
-    if (dynamic_batching_enabled && sequence_batching_enabled) {
-        THROW_IF_BACKEND_MODEL_ERROR(TRITONSERVER_ErrorNew(TRITONSERVER_ERROR_UNSUPPORTED,
-                                                           "Sequence Batching cannot work with dynamic "
-                                                           "batching at the same time !"));
-    }
-
-    std::string model_filename;
-    model_config_.MemberAsString("default_model_filename", &model_filename);
-
-    if (model_filename == "") {
-        model_filename = std::to_string(param_get_int(param, "tensor_para_size")) + "-gpu";
-    }
-
-    ft_model = ModelFactory(param, model_filename);
-
-    std::cout << ft_model->toString();
-
-    int total_weight_gpu_size = (instance_group_count * model_instance_size) >= gpu_size ?
-                                    gpu_size :
-                                    (instance_group_count * model_instance_size);
-    streams_.resize(instance_group_count * model_instance_size);
-
-    /* create shared weights
-    assume 8 gpus, 8 model instances, Tensor Para Size 2
-    then we will distribute model instances to [0, 1], [2, 3], [4, 5], [6, 7],
-    [0, 1], [2, 3], [4, 5], [6, 7] GPUs;
-    two instance instances on GPUs [0, 1] will share the same weights
-    */
-    std::vector<std::thread> threads;
-    LOG_MESSAGE(TRITONSERVER_LOG_INFO, (std::string("Before Loading Weights:")).c_str());
-    ft::print_mem_usage();
-    for (int gid = 0; gid < total_weight_gpu_size; gid++) {
-        int rank = node_id * gpu_size + gid % tp_pp_size;
-        threads.push_back(std::thread(&AbstractTransformerModel::createSharedWeights, ft_model, gid, rank));
-    }
-    for (auto& t : threads) {
-        t.join();
-    }
-    LOG_MESSAGE(TRITONSERVER_LOG_INFO, (std::string("After Loading Weights:")).c_str());
-    ft::print_mem_usage();
-}
-
-TRITONSERVER_Error*
-ModelState::LoadModel(const std::string&                                                 artifact_name,
-                      const int32_t                                                      node_id,
-                      const int32_t                                                      device_id,
-                      const int32_t                                                      device_id_start,
-                      const int32_t                                                      stream_id,
-                      std::pair<std::vector<ft::NcclParam>, std::vector<ft::NcclParam>>& nccl_params_instance,
-                      std::shared_ptr<ft::AbstractCustomComm>                            custom_all_reduce_comms,
-                      std::string*                                                       model_path,
-                      std::unique_ptr<AbstractTransformerModelInstance>*                 ft_model_instance)
-{
-    LOG_IF_ERROR(ConvertCUDAStatusToTritonError(
-                     cudaSetDevice(device_id), TRITONSERVER_ERROR_INTERNAL, "Failed to set cuda device"),
-                 "Failed to set cuda device");
-
-    std::string cc_model_filename = artifact_name;
-    if (cc_model_filename.empty()) {
-        cc_model_filename = "gpt3-model";
-    }
-
-    if (!node_id && !device_id) {
-        LOG_MESSAGE(TRITONSERVER_LOG_INFO, (std::string("Before Loading Model:")).c_str());
-    }
-    ft::print_mem_usage();
-
-    LOG_IF_ERROR(ConvertCUDAStatusToTritonError(cudaStreamCreate(&streams_[stream_id]),
-                                                TRITONSERVER_ERROR_INTERNAL,
-                                                "Failed to create the stream"),
-                 "Failed to create the stream");
-
-    const int rank = node_id * GetGpuSize() + device_id - device_id_start;
-
-    auto model_instance = ft_model->createModelInstance(
-        device_id, rank, streams_[stream_id], nccl_params_instance, custom_all_reduce_comms);
-    ft_model_instance->reset(model_instance.release());
-
-    if (!node_id && !device_id) {
-        LOG_MESSAGE(TRITONSERVER_LOG_INFO, (std::string("After Loading Model:")).c_str());
-    }
-    ft::print_mem_usage();
-
-    return nullptr;  // success
-}
-
-TRITONSERVER_Error* ModelState::AutoCompleteConfig()
-{
-    if (GetParameter("model_type") == "T5-Encoder") {
-        const std::string         data_type = GetParameter("data_type");
-        auto&                     config    = ModelConfig();
-        common::TritonJson::Value outputs, output, dtype_object;
-        std::string               name;
-        config.MemberAsArray("output", &outputs);
-
-        std::unordered_map<std::string, std::string> return_type_map{
-            {"fp16", "TYPE_FP16"}, {"fp32", "TYPE_FP32"}, {"bf16", "TYPE_BF16"}};
-
-        std::set<std::string> outputs_to_modify = {"output_hidden_state", "output_attentions"};
-        for (size_t idx = 0; idx < outputs.ArraySize(); idx++) {
-            outputs.IndexAsObject(idx, &output);
-            output.MemberAsString("name", &name);
-            if (outputs_to_modify.find(name) == outputs_to_modify.end()) {
-                continue;
-            }
-            output.Find("data_type", &dtype_object);
-            dtype_object.SetString(return_type_map[data_type]);
-            LOG_MESSAGE(TRITONSERVER_LOG_VERBOSE,
-                        ("Automatically setting return data_type for \"" + name + "\" to \""
-                         + return_type_map[data_type] + "\"")
-                            .c_str());
-        }
-    }
-    else if (GetParameter("model_type") == "bert") {
-        const std::string         data_type = GetParameter("data_type");
-        auto&                     config    = ModelConfig();
-        common::TritonJson::Value inputs, input, dtype_object;
-        common::TritonJson::Value outputs, output;
-        std::string               name;
-        config.MemberAsArray("input", &inputs);
-        config.MemberAsArray("output", &outputs);
-
-        std::unordered_map<std::string, std::string> return_type_map{
-            {"fp16", "TYPE_FP16"}, {"fp32", "TYPE_FP32"}, {"bf16", "TYPE_BF16"}};
-
-        for (size_t idx = 0; idx < inputs.ArraySize(); idx++) {
-            inputs.IndexAsObject(idx, &input);
-            input.MemberAsString("name", &name);
-            if (name != "input_hidden_state") {
-                continue;
-            }
-            input.Find("data_type", &dtype_object);
-            dtype_object.SetString(return_type_map[data_type]);
-            LOG_MESSAGE(TRITONSERVER_LOG_VERBOSE,
-                        ("Automatically setting return data_type for "
-                         "\"input_hidden_state\" to \""
-                         + return_type_map[data_type] + "\"")
-                            .c_str());
-        }
-
-        for (size_t idx = 0; idx < outputs.ArraySize(); idx++) {
-            outputs.IndexAsObject(idx, &output);
-            output.MemberAsString("name", &name);
-            if (name != "output_hidden_state") {
-                continue;
-            }
-            output.Find("data_type", &dtype_object);
-            dtype_object.SetString(return_type_map[data_type]);
-            LOG_MESSAGE(TRITONSERVER_LOG_VERBOSE,
-                        ("Automatically setting return data_type for "
-                         "\"output_hidden_state\" to \""
-                         + return_type_map[data_type] + "\"")
-                            .c_str());
-        }
-    }
-    else {
-        // Auto-complete configuration is not supported since turbomind does
-        // not store/capture sufficient model metadata so just log error instead.
-        LOG_MESSAGE(TRITONSERVER_LOG_WARN,
-                    (std::string("skipping model configuration auto-complete for '") + Name()
-                     + "': not supported for turbomind backend")
-                        .c_str());
-    }
-
-    return nullptr;  // success
-}
-
-std::string ModelState::GetParameter(const char* parameter)
-{
-    auto&                     config = ModelConfig();
-    common::TritonJson::Value parameters, model_type_obj;
-    std::string               model_type;
-    config.MemberAsObject("parameters", &parameters);
-    parameters.MemberAsObject(parameter, &model_type_obj);
-    model_type_obj.MemberAsString("string_value", &model_type);
-    return model_type;
-}
-
-struct stream_callback_ctx_t {
-    size_t                                       total_batch_size;
-    TRITONBACKEND_Request**                      requests;
-    uint32_t                                     request_count;
-    std::vector<TRITONBACKEND_Response*>*        responses;
-    std::vector<TRITONBACKEND_ResponseFactory*>* factories;
-    BackendModelInstance*                        model;
-};
-
-void generate_response_placeholders(std::vector<TRITONBACKEND_Response*>*        responses,
-                                    std::vector<TRITONBACKEND_ResponseFactory*>* factories)
-{
-    TRITONSERVER_Error* err = nullptr;
-    for (auto factory : *factories) {
-        TRITONBACKEND_Response* response;
-        err = TRITONBACKEND_ResponseNewFromFactory(&response, factory);
-        if (err) {
-            LOG_MESSAGE(TRITONSERVER_LOG_ERROR, "Fail to create response from factory");
-            TRITONSERVER_ErrorDelete(err);
-        }
-        responses->push_back(response);
-    }
-}
-
-//
-// ModelInstanceState
-//
-// State associated with a model instance. An object of this class is
-// created and associated with each TRITONBACKEND_ModelInstance.
-//
-class ModelInstanceState: public BackendModelInstance {
-public:
-    static TRITONSERVER_Error*
-    Create(ModelState* model_state, TRITONBACKEND_ModelInstance* triton_model_instance, ModelInstanceState** state);
-    virtual ~ModelInstanceState();
-
-    // Get the state of the model that corresponds to this instance.
-    ModelState* StateForModel() const
-    {
-        return model_state_;
-    }
-
-    // Execute...
-    void ProcessRequests(TRITONBACKEND_Request** requests, const uint32_t request_count);
-
-    std::shared_ptr<std::unordered_map<std::string, Tensor>>
-    Execute(std::vector<TRITONBACKEND_Response*>*                    responses,
-            stream_callback_ctx_t*                                   context,
-            const uint32_t                                           response_count,
-            std::shared_ptr<std::unordered_map<std::string, Tensor>> input_tensors);
-
-    void ReadOutputTensors(size_t                                                   total_batch_size,
-                           std::shared_ptr<std::unordered_map<std::string, Tensor>> output_tensors,
-                           TRITONBACKEND_Request**                                  requests,
-                           const uint32_t                                           request_count,
-                           std::vector<TRITONBACKEND_Response*>*                    responses);
-
-    int GetModelInstanceCount()
-    {
-        return model_instance_count_;
-    };
-    int GetModelInstanceId()
-    {
-        return model_instance_id_;
-    };
-
-private:
-    ModelInstanceState(ModelState* model_state, TRITONBACKEND_ModelInstance* triton_model_instance);
-    TRITONSERVER_Error* ValidateInputs();
-    TRITONSERVER_Error* ValidateOutputs();
-
-    void SetInputTensors(size_t                                                    total_batch_size,
-                         TRITONBACKEND_Request**                                   requests,
-                         const uint32_t                                            request_count,
-                         std::vector<TRITONBACKEND_Response*>*                     responses,
-                         BackendInputCollector*                                    collector,
-                         std::vector<const char*>*                                 input_names,
-                         std::shared_ptr<std::unordered_map<std::string, Tensor>>* input_tensors,
-                         std::vector<BackendMemory*>*                              input_memories,
-                         bool*                                                     cuda_copy);
-
-    void BroadcastInputTensors(std::shared_ptr<std::unordered_map<std::string, Tensor>>* input_tensors);
-
-    ModelState* model_state_;
-
-    // model instance id
-    int model_instance_count_           = 1;
-    int model_instance_id_              = 0;
-    int model_instance_gpu_size_        = 1;
-    int model_instance_device_id_start_ = 0;
-
-    // output tensor stream
-    cudaStream_t output_stream_;
-
-    // tensor parallel + pipeline parallel
-    int gpu_size_   = 1;
-    int world_size_ = 1;
-    int tp_pp_size_ = 1;
-
-    // Should we use the streaming API?
-    bool is_decoupled_ = false;
-
-    // The full path to the FT model file.
-    std::string model_path_;
-
-    std::vector<std::unique_ptr<AbstractTransformerModelInstance>> ft_model_instance_;
-
-    std::unique_ptr<ft::AbstractInstanceComm> instance_comm_;
-
-    // inter-node broadcast buffer
-    std::vector<char*> bcast_buffers;
-
-    // Map from configuration name for an input to the index of
-    // that input in the model.
-    std::unordered_map<std::string, int> input_index_map_;
-
-    // Map from configuration name for an output to the index of
-    // that output in the model.
-    std::unordered_map<std::string, TRITONSERVER_DataType> output_dtype_map_;
-
-    std::pair<std::vector<ft::NcclParam>, std::vector<ft::NcclParam>> nccl_params_;
-
-    // custom all reduce comms
-    std::vector<std::shared_ptr<ft::AbstractCustomComm>> custom_all_reduce_comms_;
-};
-
-TRITONSERVER_Error* ModelInstanceState::Create(ModelState*                  model_state,
-                                               TRITONBACKEND_ModelInstance* triton_model_instance,
-                                               ModelInstanceState**         state)
-{
-    try {
-        *state = new ModelInstanceState(model_state, triton_model_instance);
-    }
-    catch (const BackendModelInstanceException& ex) {
-        RETURN_ERROR_IF_TRUE(ex.err_ == nullptr,
-                             TRITONSERVER_ERROR_INTERNAL,
-                             std::string("unexpected nullptr in BackendModelInstanceException"));
-        RETURN_IF_ERROR(ex.err_);
-    }
-
-    return nullptr;  // success
-}
-
-int ThreadLoadModel(ModelState*                                                       model_state,
-                    const std::string&                                                artifact_name,
-                    const int32_t                                                     node_id,
-                    const int32_t                                                     device_id,
-                    const int32_t                                                     device_id_start,
-                    const int32_t                                                     stream_id,
-                    std::pair<std::vector<ft::NcclParam>, std::vector<ft::NcclParam>> nccl_params,
-                    std::shared_ptr<ft::AbstractCustomComm>                           custom_all_reduce_comms,
-                    std::string*                                                      model_path,
-                    std::unique_ptr<AbstractTransformerModelInstance>*                ft_model_instance)
-{
-    THROW_IF_BACKEND_INSTANCE_ERROR(model_state->LoadModel(artifact_name,
-                                                           node_id,
-                                                           device_id,
-                                                           device_id_start,
-                                                           stream_id,
-                                                           nccl_params,
-                                                           custom_all_reduce_comms,
-                                                           model_path,
-                                                           ft_model_instance));
-    return 0;
-}
-
-ModelInstanceState::ModelInstanceState(ModelState* model_state, TRITONBACKEND_ModelInstance* triton_model_instance):
-    BackendModelInstance(model_state, triton_model_instance), model_state_(model_state)
-{
-    int node_id   = ft::mpi::getCommWorldRank();
-    int num_nodes = ft::mpi::getCommWorldSize();
-
-    LOG_MESSAGE(TRITONSERVER_LOG_VERBOSE, (std::string("Model name ") + ArtifactFilename()).c_str());
-
-    triton::common::TritonJson::Value transaction_policy;
-    is_decoupled_ = false;
-    model_state_->ModelConfig().MemberAsObject("model_transaction_policy", &transaction_policy);
-    transaction_policy.MemberAsBool("decoupled", &is_decoupled_);
-
-    LOG_MESSAGE(
-        TRITONSERVER_LOG_VERBOSE,
-        (std::string("Use ") + (is_decoupled_ ? "DECOUPLED (streaming)" : "COUPLED (classic)") + " API.").c_str());
-
-    THROW_IF_BACKEND_INSTANCE_ERROR(ValidateInputs());
-    THROW_IF_BACKEND_INSTANCE_ERROR(ValidateOutputs());
-
-    // NOTE:  model instance params
-    model_instance_id_    = model_state->GetInstanceId();
-    model_instance_count_ = model_state->GetInstanceGroupCount();
-    tp_pp_size_           = model_state->GetParallelSize();
-    gpu_size_             = model_state->GetGpuSize();
-    world_size_           = model_state->GetWorldSize();
-
-    model_instance_gpu_size_ = num_nodes > 1 ? gpu_size_ : tp_pp_size_;
-    ft_model_instance_.resize(model_instance_gpu_size_);
-    std::vector<std::thread> threads;
-
-    std::shared_ptr<AbstractTransformerModel> shared_ft_model = model_state->GetFtModel();
-
-    // NOTE: CPU_KIND only, the backend fully controls how to distribute models to
-    // GPUs
-    model_instance_device_id_start_ = (model_instance_id_ * model_instance_gpu_size_) % gpu_size_;
-    // create output tensor stream
-    LOG_IF_ERROR(ConvertCUDAStatusToTritonError(cudaSetDevice(model_instance_device_id_start_),
-                                                TRITONSERVER_ERROR_INTERNAL,
-                                                "Failed to set cuda device"),
-                 "Failed to set cuda device");
-    LOG_IF_ERROR(ConvertCUDAStatusToTritonError(
-                     cudaStreamCreate(&output_stream_), TRITONSERVER_ERROR_INTERNAL, "Failed to create the stream"),
-                 "Failed to create the stream");
-
-    // create nccl params
-    nccl_params_ = shared_ft_model->createNcclParams(node_id, model_instance_device_id_start_, num_nodes > 1);
-
-    shared_ft_model->createCustomComms(&custom_all_reduce_comms_, world_size_);
-    std::string model_instance_gpu_ids = "[ ";
-    for (int gid = model_instance_device_id_start_; gid < model_instance_device_id_start_ + model_instance_gpu_size_;
-         gid++) {
-        model_instance_gpu_ids += (std::to_string(gid) + " ");
-        threads.push_back(std::thread(ThreadLoadModel,
-                                      model_state,
-                                      ArtifactFilename(),
-                                      node_id,
-                                      gid,
-                                      model_instance_device_id_start_,
-                                      model_instance_id_ * model_instance_gpu_size_ + gid,
-                                      nccl_params_,
-                                      custom_all_reduce_comms_[gid - model_instance_device_id_start_],
-                                      &model_path_,
-                                      &ft_model_instance_[gid - model_instance_device_id_start_]));
-    }
-    model_instance_gpu_ids += "]";
-
-    for (auto& t : threads) {
-        t.join();
-    }
-
-    instance_comm_ = shared_ft_model->createInstanceComm(tp_pp_size_);
-
-    LOG_MESSAGE(TRITONSERVER_LOG_INFO,
-                (std::string("Model instance is created on GPU ") + model_instance_gpu_ids).c_str());
-}
-
-ModelInstanceState::~ModelInstanceState()
-{
-#ifdef TRITON_ENABLE_GPU
-#endif  // TRITON_ENABLE_GPU
-    for (auto bcast_buffer : bcast_buffers) {
-        free(bcast_buffer);
-    }
-}
-
-TRITONSERVER_Error* ModelInstanceState::ValidateInputs()
-{
-    triton::common::TritonJson::Value ios, bios;
-    // input
-    std::string                       name, data_type;
-    triton::common::TritonJson::Value jshape;
-    // batch input
-    std::string                       kind, target_name, source_input;
-    triton::common::TritonJson::Value target_name_array, source_input_array;
-    model_state_->ModelConfig().MemberAsArray("input", &ios);
-    model_state_->ModelConfig().MemberAsArray("batch_input", &bios);
-
-    std::vector<std::string> valid_batch_input;
-
-    // batch input
-    for (size_t size = 0; size < bios.ArraySize(); size++) {
-        triton::common::TritonJson::Value batch_input;
-        bios.IndexAsObject(size, &batch_input);
-        batch_input.MemberAsString("kind", &kind);
-        batch_input.MemberAsArray("target_name", &target_name_array);
-        batch_input.MemberAsString("data_type", &data_type);
-        batch_input.MemberAsArray("source_input", &source_input_array);
-        target_name_array.IndexAsString(0, &target_name);
-        source_input_array.IndexAsString(0, &source_input);
-
-        LOG_MESSAGE(TRITONSERVER_LOG_INFO,
-                    (std::string("Get batch input kind: " + kind + ", target_name: " + target_name
-                                 + ", data_type: " + data_type + ", source_input: " + source_input)
-                         .c_str()));
-
-        if (kind == "BATCH_ITEM_SHAPE" && data_type == "TYPE_INT32" && source_input + "_item_shape" == target_name) {
-            valid_batch_input.emplace_back(std::move(source_input));
-        }
-    }
-
-    // input
-    for (size_t size = 0; size < ios.ArraySize(); size++) {
-        triton::common::TritonJson::Value input;
-        ios.IndexAsObject(size, &input);
-        input.MemberAsString("name", &name);
-        input.MemberAsString("data_type", &data_type);
-        input.MemberAsArray("dims", &jshape);
-
-        triton::common::TritonJson::Value allow_ragged_batch_json;
-        bool                              allow_ragged_batch = false;
-        if (input.Find("allow_ragged_batch", &allow_ragged_batch_json)) {
-            RETURN_IF_ERROR(allow_ragged_batch_json.AsBool(&allow_ragged_batch));
-        }
-
-        if (allow_ragged_batch
-            && std::find(valid_batch_input.begin(), valid_batch_input.end(), name) == valid_batch_input.end()) {
-            return TRITONSERVER_ErrorNew(
-                TRITONSERVER_ERROR_INVALID_ARG,
-                std::string("Ragged Batch [ " + name + " ] needs the corresponding batch_input item shape !").c_str());
-        }
-
-        std::vector<int64_t> shape;
-        for (size_t size = 0; size < jshape.ArraySize(); size++) {
-            int64_t value = 0;
-            jshape.IndexAsInt(size, &value);
-            shape.push_back(value);
-        }
-
-        std::string str_shape = "[";
-        for (uint i = 0; i < shape.size(); i++) {
-            str_shape = str_shape + std::to_string(shape[i]);
-            if (i != shape.size() - 1) {
-                str_shape = str_shape + ", ";
-            }
-            else {
-                str_shape = str_shape + "]";
-            }
-        }
-
-        std::string allow_ragged_batch_str = allow_ragged_batch ? "true" : "false";
-
-        LOG_MESSAGE(TRITONSERVER_LOG_VERBOSE,
-                    (std::string("Get input name: " + name + ", type: " + data_type + ", shape: " + str_shape
-                                 + ", allow_ragged_batch: " + allow_ragged_batch_str)
-                         .c_str()));
-    }
-    return nullptr;  // success
-}
-
-TRITONSERVER_Error* ModelInstanceState::ValidateOutputs()
-{
-    triton::common::TritonJson::Value ios;
-    RETURN_IF_ERROR(model_state_->ModelConfig().MemberAsArray("output", &ios));
-
-    std::string                       name, data_type;
-    triton::common::TritonJson::Value jshape;
-    model_state_->ModelConfig().MemberAsArray("output", &ios);
-    for (size_t size = 0; size < ios.ArraySize(); size++) {
-        triton::common::TritonJson::Value input;
-        ios.IndexAsObject(size, &input);
-        input.MemberAsString("name", &name);
-        input.MemberAsString("data_type", &data_type);
-        input.MemberAsArray("dims", &jshape);
-
-        std::vector<int64_t> shape;
-        for (size_t size = 0; size < jshape.ArraySize(); size++) {
-            int64_t value = 0;
-            jshape.IndexAsInt(size, &value);
-            shape.push_back(value);
-        }
-
-        std::string str_shape = "[";
-        for (uint i = 0; i < shape.size(); i++) {
-            str_shape = str_shape + std::to_string(shape[i]);
-            if (i != shape.size() - 1) {
-                str_shape = str_shape + ", ";
-            }
-            else {
-                str_shape = str_shape + "]";
-            }
-        }
-
-        LOG_MESSAGE(
-            TRITONSERVER_LOG_VERBOSE,
-            (std::string("Get output name: " + name + ", type: " + data_type + ", shape: " + str_shape).c_str()));
-    }
-
-    return nullptr;  // success
-}
-
-void ModelInstanceState::ProcessRequests(TRITONBACKEND_Request** requests, const uint32_t request_count)
-{
-    LOG_MESSAGE(TRITONSERVER_LOG_VERBOSE,
-                (std::string("TRITONBACKEND_ModelExecute: Running ") + Name() + " with " + std::to_string(request_count)
-                 + " requests")
-                    .c_str());
-    uint64_t exec_start_ns = 0;
-    SET_TIMESTAMP(exec_start_ns);
-
-    const int max_batch_size = model_state_->MaxBatchSize();
-
-    // For each request collect the total batch size for this inference
-    // execution. The batch-size, number of inputs, and size of each
-    // input has already been checked so don't need to do that here.
-    size_t total_batch_size = 0;
-
-    // bool sequence_batching_enabled = model_state_->SequenceBatchingEnabled();
-    // size_t real_batch_dim = (int) sequence_batching_enabled;
-    constexpr size_t real_batch_dim = 0;
-
-    // only one batch slot per model instance when sequence_batching enabled
-    for (size_t i = 0; i < request_count; i++) {
-        // If we get a nullptr request then something is badly wrong. Fail
-        // and release all requests.
-        if (requests[i] == nullptr) {
-            RequestsRespondWithError(
-                requests,
-                request_count,
-                TRITONSERVER_ErrorNew(
-                    TRITONSERVER_ERROR_INTERNAL,
-                    std::string("null request given to TurboMind backend for '" + Name() + "'").c_str()));
-            return;
-        }
-
-        if (max_batch_size > 0) {
-            // Retrieve the batch size from one of the inputs, if the model
-            // supports batching, the first dimension size is batch size
-            int index = 0;
-            while (true) {
-                TRITONBACKEND_Input* input;
-                TRITONSERVER_Error*  err_0 = TRITONBACKEND_RequestInputByIndex(requests[i], index, &input);
-                if (err_0 == nullptr) {
-                    const char*         input_name;
-                    const int64_t*      shape;
-                    TRITONSERVER_Error* err_1 =
-                        TRITONBACKEND_InputProperties(input, &input_name, nullptr, &shape, nullptr, nullptr, nullptr);
-                    std::string input_name_str = std::string(input_name);
-                    if (err_1 == nullptr) {
-                        if (input_name_str != "START" && input_name_str != "END" && input_name_str != "READY") {
-                            total_batch_size += shape[real_batch_dim];
-                            break;
-                        }
-                        index++;
-                    }
-                    else {
-                        RequestsRespondWithError(requests, request_count, err_1);
-                        return;
-                    }
-                }
-                else {
-                    RequestsRespondWithError(requests, request_count, err_0);
-                    return;
-                }
-            }
-        }
-        else {
-            total_batch_size += 1;
-        }
-    }
-
-    // If there are no valid payloads then no need to run the inference.
-    if (total_batch_size == 0) {
-        return;
-    }
-
-    LOG_MESSAGE(TRITONSERVER_LOG_VERBOSE,
-                (std::string("get total batch_size = ") + std::to_string(total_batch_size)).c_str());
-
-    // Make sure the maximum batch size is not exceeded. The
-    // total_batch_size must be 1 for models that don't support batching
-    // (i.e. max_batch_size == 0). If max_batch_size is exceeded then
-    // scheduler has done something badly wrong so fail and release all
-    // requests.
-    if ((total_batch_size != 1) && (total_batch_size > (size_t)max_batch_size)) {
-        RequestsRespondWithError(
-            requests,
-            request_count,
-            TRITONSERVER_ErrorNew(TRITONSERVER_ERROR_INTERNAL,
-                                  std::string("batch size " + std::to_string(total_batch_size) + " for '" + Name()
-                                              + "', max allowed is " + std::to_string(max_batch_size))
-                                      .c_str()));
-        return;
-    }
-
-    // At this point we are committed to running inference with all
-    // 'requests'. Create a response for each request. During input
-    // processing if there is an error with any request that error will
-    // be sent immediately with the corresponding response (and the
-    // response unique_ptr will then be nullptr). The request object
-    // itself will not be released until after all inferencing is done
-    // (below) as we may need to access the request object when
-    // determine how to process outputs (for example, even if we don't
-    // need the outputs for a request that has an error, we do need to
-    // know the size of those outputs associated with the request so we
-    // can skip them in the output tensors).
-    //
-    // When operating in the decoupled mode, responses should be created
-    // from factories. Here, we instantiate a factory for each request and
-    // generate the first response. At each new result from the model the
-    // generated response is filled, sent, and another response is created
-    // from the factory. The last response is send just like in the
-    // non-decoupled mode.
-    std::vector<TRITONBACKEND_Response*> responses;
-    responses.reserve(request_count);
-    std::vector<TRITONBACKEND_ResponseFactory*> factories;
-
-    for (size_t i = 0; i < request_count; i++) {
-        if (is_decoupled_) {
-            TRITONBACKEND_ResponseFactory* factory;
-            auto                           err = TRITONBACKEND_ResponseFactoryNew(&factory, requests[i]);
-            if (err == nullptr) {
-                factories.emplace_back(factory);
-            }
-            else {
-                factories.emplace_back(nullptr);
-                LOG_MESSAGE(TRITONSERVER_LOG_ERROR, "Fail to create response factory");
-                TRITONSERVER_ErrorDelete(err);
-            }
-        }
-        else {
-            TRITONBACKEND_Response* response;
-            auto                    err = TRITONBACKEND_ResponseNew(&response, requests[i]);
-            if (err == nullptr) {
-                responses.emplace_back(response);
-            }
-            else {
-                responses.emplace_back(nullptr);
-                LOG_MESSAGE(TRITONSERVER_LOG_ERROR, "Fail to create response");
-                TRITONSERVER_ErrorDelete(err);
-            }
-        }
-    }
-
-    std::vector<const char*>                                 input_names;
-    std::shared_ptr<std::unordered_map<std::string, Tensor>> input_tensors =
-        std::make_shared<std::unordered_map<std::string, Tensor>>();
-    std::vector<BackendMemory*> input_memories;
-    bool                        cuda_copy = false;
-    if (is_decoupled_) {
-        generate_response_placeholders(&responses, &factories);
-    }
-    BackendInputCollector collector(requests,
-                                    request_count,
-                                    &responses,
-                                    model_state_->TritonMemoryManager(),
-                                    model_state_->EnablePinnedInput(),
-                                    CudaStream());
-    SetInputTensors(total_batch_size,
-                    requests,
-                    request_count,
-                    &responses,
-                    &collector,
-                    &input_names,
-                    &input_tensors,
-                    &input_memories,
-                    &cuda_copy);
-
-    // Wait for any in-flight input tensor copies to complete.
-#ifdef TRITON_ENABLE_GPU
-    if (cuda_copy) {
-        cudaStreamSynchronize(CudaStream());
-    }
-#endif
-
-    uint64_t compute_start_ns = 0;
-    SET_TIMESTAMP(compute_start_ns);
-
-    stream_callback_ctx_t context = {total_batch_size, requests, request_count, &responses, &factories, this};
-
-    auto output_tensors = Execute(&responses, &context, request_count, input_tensors);
-
-    uint64_t compute_end_ns = 0;
-    SET_TIMESTAMP(compute_end_ns);
-
-    // Free BackendMemory used for inputs
-    for (BackendMemory* mem : input_memories) {
-        delete mem;
-    }
-    input_memories.clear();
-
-    ReadOutputTensors(total_batch_size, output_tensors, requests, request_count, &responses);
-
-    uint64_t exec_end_ns = 0;
-    SET_TIMESTAMP(exec_end_ns);
-
-    LOG_MESSAGE(TRITONSERVER_LOG_VERBOSE,
-                (std::string("get response size = ") + std::to_string(responses.size())).c_str());
-
-    // Send all the responses that haven't already been sent because of
-    // an earlier error. Note that the responses are not set to nullptr
-    // here as we need that indication below to determine if the request
-    // we successful or not.
-    for (auto& response : responses) {
-        if (response != nullptr) {
-            LOG_IF_ERROR(TRITONBACKEND_ResponseSend(response, TRITONSERVER_RESPONSE_COMPLETE_FINAL, nullptr),
-                         "failed to send TurboMind backend response");
-            LOG_MESSAGE(TRITONSERVER_LOG_VERBOSE, (std::string("response is sent")).c_str());
-        }
-        else {
-            LOG_MESSAGE(TRITONSERVER_LOG_WARN, (std::string("response is nullptr")).c_str());
-        }
-    }
-
-    // Report statistics for each request.
-    for (uint32_t r = 0; r < request_count; ++r) {
-        auto& request = requests[r];
-        LOG_IF_ERROR(TRITONBACKEND_ModelInstanceReportStatistics(TritonModelInstance(),
-                                                                 request,
-                                                                 (responses[r] != nullptr) /* success */,
-                                                                 exec_start_ns,
-                                                                 compute_start_ns,
-                                                                 compute_end_ns,
-                                                                 exec_end_ns),
-                     "failed reporting request statistics");
-
-        LOG_IF_ERROR(TRITONBACKEND_RequestRelease(request, TRITONSERVER_REQUEST_RELEASE_ALL),
-                     "failed releasing request");
-    }
-
-    // Report the entire batch statistics.
-    LOG_IF_ERROR(
-        TRITONBACKEND_ModelInstanceReportBatchStatistics(
-            TritonModelInstance(), total_batch_size, exec_start_ns, compute_start_ns, compute_end_ns, exec_end_ns),
-        "failed reporting batch request statistics");
-}
-
-void streaming_callback(std::shared_ptr<std::unordered_map<std::string, Tensor>> output_tensors, void* ctx)
-{
-    stream_callback_ctx_t* context = reinterpret_cast<stream_callback_ctx_t*>(ctx);
-    ModelInstanceState*    model   = reinterpret_cast<ModelInstanceState*>(context->model);
-
-    std::vector<TRITONBACKEND_Response*>* responses = context->responses;
-
-    model->ReadOutputTensors(
-        context->total_batch_size, output_tensors, context->requests, context->request_count, responses);
-
-    for (auto& response : *responses) {
-        if (response != nullptr) {
-            LOG_MESSAGE(TRITONSERVER_LOG_VERBOSE, (std::string("start to send streaming response")).c_str());
-            LOG_IF_ERROR(TRITONBACKEND_ResponseSend(response, 0, nullptr), "failed to send TurboMind backend response");
-            LOG_MESSAGE(TRITONSERVER_LOG_VERBOSE, (std::string("streaming response is sent")).c_str());
-        }
-        else {
-            LOG_MESSAGE(TRITONSERVER_LOG_WARN, (std::string("streaming response is nullptr")).c_str());
-        }
-    }
-    responses->clear();
-    generate_response_placeholders(responses, context->factories);
-}
-
-int ThreadForward(std::unique_ptr<AbstractTransformerModelInstance>*        ft_model_instance,
-                  std::shared_ptr<std::unordered_map<std::string, Tensor>>* input_tensors,
-                  std::shared_ptr<std::unordered_map<std::string, Tensor>>* output_tensors,
-                  ft::AbstractInstanceComm*                                 instance_comm,
-                  std::exception_ptr*                                       exception_ptr,
-                  const int                                                 device_id,
-                  const int                                                 use_stream_cb,
-                  stream_callback_ctx_t*                                    context)
-{
-    LOG_IF_ERROR(ConvertCUDAStatusToTritonError(
-                     cudaSetDevice(device_id), TRITONSERVER_ERROR_INTERNAL, "Failed to set cuda device"),
-                 "Failed to set cuda device");
-
-    LOG_MESSAGE(TRITONSERVER_LOG_VERBOSE, (std::string("Start to forward")).c_str());
-    if (use_stream_cb) {
-        (*ft_model_instance)->registerCallback(streaming_callback, (void*)context);
-    }
-    *output_tensors = (*ft_model_instance)->forward(*input_tensors, instance_comm);
-    if (use_stream_cb) {
-        (*ft_model_instance)->unRegisterCallback();
-    }
-    LOG_MESSAGE(TRITONSERVER_LOG_VERBOSE, (std::string("Stop to forward")).c_str());
-
-    if ((*output_tensors)->count("error_message")) {
-        *exception_ptr = *((std::exception_ptr*)((*output_tensors)->at("error_message").data));
-    }
-    return 0;
-}
-
-void triton_check_inputs(std::shared_ptr<std::unordered_map<std::string, Tensor>> output_tensors, const char* filename)
-{
-    auto& output = output_tensors->at("output_ids");
-    auto  shape  = output.shape;
-    assert(shape.size() == 3);
-    assert(output.type == TYPE_UINT32);
-    auto        batch_size = shape[0];
-    auto        length     = shape[2];
-    std::string fName      = filename;
-    auto        file       = std::ofstream(fName, std::ios::out);
-    if (!file.is_open()) {}
-    else {
-        for (size_t i = 0; i < batch_size; i++) {
-            for (size_t j = 0; j < length; j++) {
-                file << ((uint32_t*)output.data)[i * length + j] << " ";
-            }
-            file << std::endl;
-        }
-    }
-}
-
-void ModelInstanceState::BroadcastInputTensors(std::shared_ptr<std::unordered_map<std::string, Tensor>>* input_tensors)
-{
-    int node_id = ft::mpi::getCommWorldRank();
-
-    uint32_t input_count = node_id ? 0 : (*input_tensors)->size();
-    ft::mpi::bcast(&input_count, 1, ft::mpi::MPI_TYPE_UINT32_T, 0, ft::mpi::COMM_WORLD);
-    if (input_count > bcast_buffers.size()) {
-        bcast_buffers.resize(input_count);
-    }
-
-    if (node_id) {
-        for (uint input_index = 0; input_index < input_count; input_index++) {
-            std::vector<size_t> batchn_shape;
-            int64_t             shape_size  = 0;
-            int64_t             buffer_size = 1;
-            ft::mpi::bcast(&shape_size, 1, ft::mpi::MPI_TYPE_INT64_T, 0, ft::mpi::COMM_WORLD);
-            for (int s_id = 0; s_id < shape_size; s_id++) {
-                int64_t val;
-                ft::mpi::bcast(&val, 1, ft::mpi::MPI_TYPE_INT64_T, 0, ft::mpi::COMM_WORLD);
-                batchn_shape.push_back(val);
-                buffer_size *= val;
-            }
-            int64_t data_type_size = 1;
-            ft::mpi::bcast(&data_type_size, 1, ft::mpi::MPI_TYPE_INT64_T, 0, ft::mpi::COMM_WORLD);
-            buffer_size *= data_type_size;
-            bcast_buffers[input_index] = (char*)realloc(bcast_buffers[input_index], buffer_size);
-            char* input_buffer         = bcast_buffers[input_index];
-            ft::mpi::bcast(input_buffer, buffer_size, ft::mpi::MPI_TYPE_BYTE, 0, ft::mpi::COMM_WORLD);
-
-            int64_t name_size = 0;
-            ft::mpi::bcast(&name_size, 1, ft::mpi::MPI_TYPE_INT64_T, 0, ft::mpi::COMM_WORLD);
-            char char_name[1024] = {0};
-            ft::mpi::bcast(char_name, name_size, ft::mpi::MPI_TYPE_CHAR, 0, ft::mpi::COMM_WORLD);
-            uint32_t data_type_num = 0;
-            ft::mpi::bcast(&data_type_num, 1, ft::mpi::MPI_TYPE_UINT32_T, 0, ft::mpi::COMM_WORLD);
-            TRITONSERVER_DataType triton_data_type = TRITONSERVER_DataType(data_type_num);
-
-            (*input_tensors)
-                ->insert({std::string(char_name),
-                          Tensor{TRITONSERVER_MEMORY_CPU, triton_data_type, batchn_shape, input_buffer}});
-        }
-    }
-    else {
-        int input_index = 0;
-        for (auto it = (*input_tensors)->begin(); it != (*input_tensors)->end(); ++it) {
-            std::vector<size_t> batchn_shape = it->second.shape;
-            int64_t             shape_size   = batchn_shape.size();
-            int64_t             buffer_size  = 1;
-            ft::mpi::bcast(&shape_size, 1, ft::mpi::MPI_TYPE_INT64_T, 0, ft::mpi::COMM_WORLD);
-            for (int s_id = 0; s_id < shape_size; s_id++) {
-                int64_t val = batchn_shape[s_id];
-                ft::mpi::bcast(&val, 1, ft::mpi::MPI_TYPE_INT64_T, 0, ft::mpi::COMM_WORLD);
-                buffer_size *= val;
-            }
-
-            ft::Tensor tmp{
-                ft::MEMORY_CPU, ft::TYPE_BYTES, {1}, nullptr};  // TODO change the getDataTypeByteNum function to static
-            int64_t data_type_size = tmp.getTypeSize(triton::Tensor::convertTritonTypeToFt(it->second.type));
-            ft::mpi::bcast(&data_type_size, 1, ft::mpi::MPI_TYPE_INT64_T, 0, ft::mpi::COMM_WORLD);
-            buffer_size *= data_type_size;
-
-            ft::mpi::bcast(
-                const_cast<void*>(it->second.data), buffer_size, ft::mpi::MPI_TYPE_BYTE, 0, ft::mpi::COMM_WORLD);
-
-            std::string name      = it->first;
-            int64_t     name_size = name.size();
-            ft::mpi::bcast(&name_size, 1, ft::mpi::MPI_TYPE_INT64_T, 0, ft::mpi::COMM_WORLD);
-            bcast_buffers[input_index] = (char*)realloc(bcast_buffers[input_index], name_size);
-            char*   char_name          = bcast_buffers[input_index];
-            int64_t length             = (int64_t)name.copy(char_name, name_size);
-            ft::FT_CHECK(length == name_size);
-            ft::mpi::bcast(char_name, name_size, ft::mpi::MPI_TYPE_CHAR, 0, ft::mpi::COMM_WORLD);
-
-            uint32_t data_type_num = (uint32_t)(it->second.type);
-            ft::mpi::bcast(&data_type_num, 1, ft::mpi::MPI_TYPE_UINT32_T, 0, ft::mpi::COMM_WORLD);
-            input_index++;
-        }
-    }
-}
-
-std::shared_ptr<std::unordered_map<std::string, Tensor>>
-ModelInstanceState::Execute(std::vector<TRITONBACKEND_Response*>*                    responses,
-                            stream_callback_ctx_t*                                   context,
-                            const uint32_t                                           response_count,
-                            std::shared_ptr<std::unordered_map<std::string, Tensor>> input_tensors)
-{
-    int node_id = ft::mpi::getCommWorldRank();
-
-    if (node_id == 0) {
-        // Debug: input array
-        // triton_check_inputs(input_tensors, "triton_in");
-    }
-    if (node_id) {
-        input_tensors = std::make_shared<std::unordered_map<std::string, Tensor>>();
-    }
-
-    ft::mpi::barrier();
-
-    BroadcastInputTensors(&input_tensors);
-    std::vector<std::thread>                                 threads;
-    std::shared_ptr<std::unordered_map<std::string, Tensor>> output_tensors_list[model_instance_gpu_size_];
-    std::exception_ptr                                       exception_ptr[model_instance_gpu_size_];
-    for (int gid = model_instance_device_id_start_; gid < model_instance_device_id_start_ + model_instance_gpu_size_;
-         gid++) {
-        int instance_local_id = gid - model_instance_device_id_start_;
-        LOG_MESSAGE(TRITONSERVER_LOG_VERBOSE, (std::string("before ThreadForward " + std::to_string(gid))).c_str());
-        threads.push_back(std::thread(ThreadForward,
-                                      &ft_model_instance_[instance_local_id],
-                                      &input_tensors,
-                                      &output_tensors_list[instance_local_id],
-                                      instance_comm_.get(),
-                                      &exception_ptr[instance_local_id],
-                                      gid,
-                                      is_decoupled_ && gid == model_instance_device_id_start_,
-                                      context));
-        LOG_MESSAGE(TRITONSERVER_LOG_VERBOSE, (std::string("after ThreadForward " + std::to_string(gid))).c_str());
-    }
-
-    for (auto& t : threads) {
-        t.join();
-    }
-
-    try {
-        for (int gid = model_instance_device_id_start_;
-             gid < model_instance_device_id_start_ + model_instance_gpu_size_;
-             gid++) {
-            int instance_local_id = gid - model_instance_device_id_start_;
-            if (exception_ptr[instance_local_id]) {
-                std::rethrow_exception(exception_ptr[instance_local_id]);
-            }
-        }
-    }
-    catch (std::exception& ex) {
-        SendErrorForResponses(responses,
-                              response_count,
-                              TRITONSERVER_ErrorNew(TRITONSERVER_ERROR_INTERNAL,
-                                                    ("TurboMind execute failure: " + std::string(ex.what())).c_str()));
-    }
-    auto output_tensors = output_tensors_list[0];
-    return output_tensors;
-}
-
-void ModelInstanceState::SetInputTensors(
-    size_t                                                            total_batch_size,
-    TRITONBACKEND_Request**                                           requests,
-    const uint32_t                                                    request_count,
-    std::vector<TRITONBACKEND_Response*>*                             responses,
-    BackendInputCollector*                                            collector,
-    std::vector<const char*>*                                         input_names,
-    std::shared_ptr<std::unordered_map<std::string, triton::Tensor>>* input_tensors,
-    std::vector<BackendMemory*>*                                      input_memories,
-    bool*                                                             cuda_copy)
-{
-    const int max_batch_size = model_state_->MaxBatchSize();
-    // bool sequence_batching_enabled = model_state_->SequenceBatchingEnabled();
-    bool dynamic_batching_enabled = model_state_->DynamicBatchingEnabled() || model_state_->SequenceBatchingEnabled();
-
-    // All requests must have equally-sized input tensors so use any
-    // request as the representative for the input tensors.
-    uint32_t input_count;
-    RESPOND_ALL_AND_RETURN_IF_ERROR(
-        responses, request_count, TRITONBACKEND_RequestInputCount(requests[0], &input_count));
-
-    LOG_MESSAGE(TRITONSERVER_LOG_VERBOSE, (std::string("get input count = ") + std::to_string(input_count)).c_str());
-
-    // Process batch input if any
-    RaggedBatchingParam_Map batch_input_param_map;
-
-    if (dynamic_batching_enabled) {
-        // Handle batch inputs for ragged batching
-        for (const auto& batch_input : model_state_->BatchInputs()) {
-            std::vector<int64_t> shape;
-            collector->BatchInputShape(batch_input, &shape);
-
-            auto batch_input_kind = batch_input.BatchInputKind();
-            auto batch_input_name = batch_input.TargetNames()[0];
-
-            // we only take care of the ragged input_ids
-            // Assume the first dimension (length) are different and others are the
-            // same BATCH_ITEM_SHAPE [num_requests (batches), num_dims (excluding
-            // batch dimension)]
-            if (batch_input_kind == BatchInput::Kind::BATCH_ITEM_SHAPE
-                && (batch_input_name == "input_ids_item_shape"
-                    || batch_input_name == "request_prompt_embedding_item_shape")) {
-                RaggedBatchingParams param{};
-
-                size_t                  num_feature_dimensions = (size_t)shape[1];
-                const char*             dst_buffer             = nullptr;
-                size_t                  dst_buffer_byte_size;
-                TRITONSERVER_MemoryType dst_memory_type;
-                int64_t                 dst_memory_type_id;
-
-                // Batch inputs are always created on CPU
-                RESPOND_ALL_AND_SET_NULL_IF_ERROR((*responses),
-                                                  responses->size(),
-                                                  collector->ProcessBatchInput(batch_input,
-                                                                               nullptr,
-                                                                               0,
-                                                                               {{TRITONSERVER_MEMORY_CPU, 0}},
-                                                                               &dst_buffer,
-                                                                               &dst_buffer_byte_size,
-                                                                               &dst_memory_type,
-                                                                               &dst_memory_type_id));
-
-                param.batch_input_ptr = reinterpret_cast<const int32_t*>(dst_buffer);
-
-                // concat all feature dimensions
-                param.batch_intput_size = (dst_buffer_byte_size / sizeof(int32_t)) / num_feature_dimensions;
-                if (num_feature_dimensions > 1) {
-                    BackendMemory* batch_item_shape_memory;
-                    RESPOND_ALL_AND_RETURN_IF_ERROR(responses,
-                                                    request_count,
-                                                    BackendMemory::Create(model_state_->TritonMemoryManager(),
-                                                                          {BackendMemory::AllocationType::CPU},
-                                                                          0,
-                                                                          dst_buffer_byte_size / num_feature_dimensions,
-                                                                          &batch_item_shape_memory));
-                    int32_t* batch_item_shape_memory_ptr =
-                        reinterpret_cast<int32_t*>(batch_item_shape_memory->MemoryPtr());
-                    for (size_t idx = 0; idx < param.batch_intput_size; idx++) {
-                        int32_t concat_dimensions = 1;
-                        for (size_t dim_idx = 0; dim_idx < num_feature_dimensions; dim_idx++) {
-                            concat_dimensions *= param.batch_input_ptr[idx * num_feature_dimensions + dim_idx];
-                            // dim0 is seq length dimension
-                            if (dim_idx == 0) {
-                                param.max_seq_length =
-                                    std::max(param.max_seq_length, param.batch_input_ptr[idx * num_feature_dimensions]);
-                            }
-                        }
-                        batch_item_shape_memory_ptr[idx] = concat_dimensions;
-                    }
-                    param.batch_input_ptr = reinterpret_cast<const int32_t*>(batch_item_shape_memory_ptr);
-                }
-                else {
-                    param.max_seq_length =
-                        *std::max_element(param.batch_input_ptr, param.batch_input_ptr + param.batch_intput_size);
-                }
-
-                // check if padding is needed
-                param.is_input_ragged = std::any_of(param.batch_input_ptr,
-                                                    param.batch_input_ptr + param.batch_intput_size,
-                                                    [&](int x) { return x != param.batch_input_ptr[0]; });
-
-                // calculate statistics of elements
-                if (param.is_input_ragged) {
-                    param.max_elements_per_seq =
-                        *std::max_element(param.batch_input_ptr, param.batch_input_ptr + param.batch_intput_size);
-                    param.total_input_elements =
-                        std::accumulate(param.batch_input_ptr, param.batch_input_ptr + param.batch_intput_size, 0);
-                    batch_input_param_map.insert({batch_input_name, param});
-                    // verbose logging for debugging
-                    if (TRITONSERVER_LogIsEnabled(TRITONSERVER_LOG_VERBOSE)) {
-                        std::string value_str = "[ ";
-                        for (size_t i = 0; i < param.batch_intput_size; i++) {
-                            value_str += std::to_string(param.batch_input_ptr[i]) + " ";
-                        }
-                        value_str += "]";
-
-                        LOG_MESSAGE(TRITONSERVER_LOG_VERBOSE,
-                                    (std::string("collect batch input name: ") + batch_input_name + "\n size: "
-                                     + std::to_string(dst_buffer_byte_size) + " bytes\n value: " + value_str
-                                     + "\n max sequence length: " + std::to_string(param.max_seq_length)
-                                     + "\n max elements per sequence: " + std::to_string(param.max_elements_per_seq))
-                                        .c_str());
-                    }
-                }
-            }
-        }
-    }
-
-    // Process user-defined inputs
-    for (uint32_t input_idx = 0; input_idx < input_count; input_idx++) {
-        TRITONBACKEND_Input* input;
-        RESPOND_ALL_AND_RETURN_IF_ERROR(
-            responses, request_count, TRITONBACKEND_RequestInputByIndex(requests[0], input_idx, &input));
-
-        const char*           input_name;
-        TRITONSERVER_DataType input_datatype;
-        const int64_t*        input_shape;
-        uint32_t              input_dims_count;
-        RESPOND_ALL_AND_RETURN_IF_ERROR(
-            responses,
-            request_count,
-            TRITONBACKEND_InputProperties(
-                input, &input_name, &input_datatype, &input_shape, &input_dims_count, nullptr, nullptr));
-
-        input_names->emplace_back(input_name);
-
-        std::string input_name_str = std::string(input_name);
-
-        // Pad input ids from different requests
-        RaggedBatchingParams param = batch_input_param_map[input_name_str + "_item_shape"];
-        if (batch_input_param_map.find(input_name_str + "_item_shape") != batch_input_param_map.end()
-            && batch_input_param_map[input_name_str + "_item_shape"].is_input_ragged) {
-            RaggedBatchingParams param = batch_input_param_map[input_name_str + "_item_shape"];
-
-            const int64_t total_batch_size_int64     = (int64_t)total_batch_size;
-            const int64_t max_elements_per_seq_int64 = (int64_t)param.max_elements_per_seq;
-            const size_t  padded_input_ids_buffer_size =
-                GetByteSize(input_datatype, std::vector<int64_t>{total_batch_size_int64, max_elements_per_seq_int64});
-            // Always host memory
-            BackendMemory* padded_input_memory;
-            BackendMemory* request_input_memory;
-            RESPOND_ALL_AND_RETURN_IF_ERROR(responses,
-                                            request_count,
-                                            BackendMemory::Create(model_state_->TritonMemoryManager(),
-                                                                  {BackendMemory::AllocationType::CPU},
-                                                                  0,
-                                                                  padded_input_ids_buffer_size,
-                                                                  &padded_input_memory));
-            RESPOND_ALL_AND_RETURN_IF_ERROR(responses,
-                                            request_count,
-                                            BackendMemory::Create(model_state_->TritonMemoryManager(),
-                                                                  {BackendMemory::AllocationType::CPU},
-                                                                  0,
-                                                                  padded_input_ids_buffer_size,
-                                                                  &request_input_memory));
-
-            memset(padded_input_memory->MemoryPtr(), 0, padded_input_ids_buffer_size);
-
-            collector->ProcessTensor(
-                input_name,
-                request_input_memory->MemoryPtr(),
-                GetByteSize(input_datatype, std::vector<int64_t>{(int64_t)param.total_input_elements}),
-                request_input_memory->MemoryType(),
-                request_input_memory->MemoryTypeId());
-
-            int64_t accumulated_elements_offset = 0;
-
-            char* padded_input_ids_ptr = padded_input_memory->MemoryPtr();
-            char* base_input_ids       = request_input_memory->MemoryPtr();
-
-            // copy each request buffer to padded buffer
-            for (int64_t single_batch_idx = 0; single_batch_idx < total_batch_size_int64; single_batch_idx++) {
-                int32_t sequence_elements = param.batch_input_ptr[single_batch_idx];
-                std::memcpy(padded_input_ids_ptr
-                                + GetByteSize(input_datatype,
-                                              std::vector<int64_t>{single_batch_idx, max_elements_per_seq_int64}),
-                            base_input_ids
-                                + GetByteSize(input_datatype, std::vector<int64_t>{accumulated_elements_offset}),
-                            GetByteSize(input_datatype, std::vector<int64_t>{sequence_elements}));
-
-                accumulated_elements_offset += sequence_elements;
-            }
-
-            // modify batch dimension shape, and sequence length dimension shape after
-            // padding
-            std::vector<size_t> batchn_shape(input_shape, input_shape + input_dims_count);
-            if (max_batch_size != 0) {
-                batchn_shape[0] = total_batch_size;
-                batchn_shape[1] = (size_t)param.max_seq_length;
-                // assume all non-seq-length dimensions have the same shape
-                if (input_dims_count > 2) {
-                    batchn_shape[2] = (size_t)(param.max_elements_per_seq / param.max_seq_length);
-                }
-            }
-            (*input_tensors)
-                ->insert({std::string(input_name),
-                          triton::Tensor{TRITONSERVER_MEMORY_CPU, input_datatype, batchn_shape, padded_input_ids_ptr}});
-
-            continue;
-        }
-
-        // bool start_end_ready_flag = (input_name_str == "START" || input_name_str
-        // == "END"
-        //   || input_name_str == "READY");
-
-        // int shape_dims_start = (int) (sequence_batching_enabled &&
-        // !start_end_ready_flag);
-
-        // The shape for the entire input patch, [total_batch_size, ...]
-        std::vector<int64_t> batchn_shape(input_shape, input_shape + input_dims_count);
-        if (max_batch_size != 0) {
-            batchn_shape[0] = total_batch_size;
-        }
-
-        std::vector<size_t> batchn_shape_2(input_shape, input_shape + input_dims_count);
-        if (max_batch_size != 0) {
-            batchn_shape_2[0] = total_batch_size;
-        }
-
-        // std::vector<int64_t> batchn_shape(
-        //     input_shape + shape_dims_start, input_shape + input_dims_count);
-        // if (max_batch_size != 0 && !start_end_ready_flag) {
-        //   batchn_shape[0] = total_batch_size;
-        // }
-
-        // std::vector<size_t> batchn_shape_2(
-        //     input_shape + shape_dims_start, input_shape + input_dims_count);
-        // if (max_batch_size != 0 && !start_end_ready_flag) {
-        //   batchn_shape_2[0] = total_batch_size;
-        // }
-
-        // The input must be in contiguous CPU/GPU memory.
-        const int64_t batchn_byte_size = GetByteSize(input_datatype, batchn_shape);
-
-        // Always host memory
-        BackendMemory* input_memory;
-        RESPOND_ALL_AND_RETURN_IF_ERROR(responses,
-                                        request_count,
-                                        BackendMemory::Create(model_state_->TritonMemoryManager(),
-                                                              {BackendMemory::AllocationType::CPU},
-                                                              0,
-                                                              batchn_byte_size,
-                                                              &input_memory));
-        input_memories->push_back(input_memory);
-
-        TRITONSERVER_MemoryType memory_type    = input_memory->MemoryType();
-        int64_t                 memory_type_id = input_memory->MemoryTypeId();
-        char*                   input_buffer   = input_memory->MemoryPtr();
-
-        collector->ProcessTensor(input_name, input_buffer, batchn_byte_size, memory_type, memory_type_id);
-
-        LOG_MESSAGE(
-            TRITONSERVER_LOG_VERBOSE,
-            (std::string("collect name: ") + input_name + " size: " + std::to_string(batchn_byte_size) + " bytes")
-                .c_str());
-        (*input_tensors)
-            ->insert({std::string(input_name),
-                      triton::Tensor{TRITONSERVER_MEMORY_CPU, input_datatype, batchn_shape_2, input_buffer}});
-    }
-
-    LOG_MESSAGE(TRITONSERVER_LOG_VERBOSE,
-                (std::string("the data is in ") + (*cuda_copy ? std::string("GPU") : std::string("CPU"))).c_str());
-    // Finalize...
-    *cuda_copy |= collector->Finalize();
-    LOG_MESSAGE(TRITONSERVER_LOG_VERBOSE,
-                (std::string("the data is in ") + (*cuda_copy ? std::string("GPU") : std::string("CPU"))).c_str());
-}
-
-void ModelInstanceState::ReadOutputTensors(size_t                                                   total_batch_size,
-                                           std::shared_ptr<std::unordered_map<std::string, Tensor>> output_tensors,
-                                           TRITONBACKEND_Request**                                  requests,
-                                           const uint32_t                                           request_count,
-                                           std::vector<TRITONBACKEND_Response*>*                    responses)
-{
-    BackendOutputResponder responder(requests,
-                                     request_count,
-                                     responses,
-                                     model_state_->MaxBatchSize(),
-                                     model_state_->TritonMemoryManager(),
-                                     model_state_->EnablePinnedInput(),
-                                     output_stream_);
-
-    bool cuda_copy = false;
-    // bool sequence_batching_enabled = model_state_->SequenceBatchingEnabled();
-    std::vector<std::vector<char>> string_buffers;
-
-    int idx = 0;
-    for (auto it = output_tensors->begin(); it != output_tensors->end(); ++it) {
-        LOG_MESSAGE(
-            TRITONSERVER_LOG_VERBOSE,
-            (std::string("Get output_tensors ") + std::to_string(idx) + std::string(": ") + std::string(it->first))
-                .c_str());
-        idx++;
-        auto& output = it->second;
-
-        // Verify output datatype matches datatype from model config
-        TRITONSERVER_DataType output_dtype = output.type;
-        LOG_MESSAGE(TRITONSERVER_LOG_VERBOSE,
-                    (std::string("    output_type: ") + TRITONSERVER_DataTypeString(output_dtype)).c_str());
-
-        const char* output_buffer = static_cast<const char*>(output.data);
-
-        //  Set output shape
-        // std::vector<int64_t> batchn_shape = sequence_batching_enabled ?
-        // std::vector<int64_t>{1} :
-        //   std::vector<int64_t>{};
-        std::vector<int64_t> batchn_shape;
-        if (TRITONSERVER_LogIsEnabled(TRITONSERVER_LOG_VERBOSE)) {
-            // std::string batch_shape_str = sequence_batching_enabled ? "    output
-            // shape: [1, " :
-            //   "    output shape: [";
-            std::string batch_shape_str = "    output shape: [";
-            for (uint i = 0; i < output.shape.size(); i++) {
-                batchn_shape.push_back(output.shape[i]);
-                batch_shape_str = batch_shape_str + std::to_string(output.shape[i]);
-                if (i != output.shape.size() - 1) {
-                    batch_shape_str = batch_shape_str + ", ";
-                }
-                else {
-                    batch_shape_str = batch_shape_str + "]";
-                }
-            }
-            LOG_MESSAGE(TRITONSERVER_LOG_VERBOSE, batch_shape_str.c_str());
-        }
-        else {
-            batchn_shape.insert(batchn_shape.end(), output.shape.begin(), output.shape.end());
-        }
-
-        responder.ProcessTensor(it->first,
-                                output_dtype,
-                                batchn_shape,
-                                output_buffer,
-                                TRITONSERVER_MEMORY_GPU,
-                                model_instance_device_id_start_);
-    }
-
-    // Finalize and wait for any pending buffer copies.
-    cuda_copy |= responder.Finalize();
-
-#ifdef TRITON_ENABLE_GPU
-    if (cuda_copy) {
-        cudaStreamSynchronize(output_stream_);
-    }
-#endif  // TRITON_ENABLE_GPU
-
-    LOG_MESSAGE(TRITONSERVER_LOG_VERBOSE,
-                (std::string("PERFORMED GPU copy: ") + (cuda_copy ? std::string("YES") : std::string("NO"))).c_str());
-}
-
-/////////////
-
-extern "C" {
-
-TRITONSERVER_Error* TRITONBACKEND_Initialize(TRITONBACKEND_Backend* backend)
-{
-    int provided;
-    ft::mpi::initThread(nullptr, nullptr, ft::mpi::THREAD_MULTIPLE, &provided);
-    const char* cname;
-    RETURN_IF_ERROR(TRITONBACKEND_BackendName(backend, &cname));
-    std::string name(cname);
-
-    LOG_MESSAGE(TRITONSERVER_LOG_INFO, (std::string("TRITONBACKEND_Initialize: ") + name).c_str());
-
-    // Check the backend API version that Triton supports vs. what this
-    // backend was compiled against.
-    uint32_t api_version_major, api_version_minor;
-    RETURN_IF_ERROR(TRITONBACKEND_ApiVersion(&api_version_major, &api_version_minor));
-
-    LOG_MESSAGE(TRITONSERVER_LOG_INFO,
-                (std::string("Triton TRITONBACKEND API version: ") + std::to_string(api_version_major) + "."
-                 + std::to_string(api_version_minor))
-                    .c_str());
-    LOG_MESSAGE(TRITONSERVER_LOG_INFO,
-                (std::string("'") + name
-                 + "' TRITONBACKEND API version: " + std::to_string(TRITONBACKEND_API_VERSION_MAJOR) + "."
-                 + std::to_string(TRITONBACKEND_API_VERSION_MINOR))
-                    .c_str());
-
-    if ((api_version_major != TRITONBACKEND_API_VERSION_MAJOR)
-        || (api_version_minor < TRITONBACKEND_API_VERSION_MINOR)) {
-        return TRITONSERVER_ErrorNew(
-            TRITONSERVER_ERROR_UNSUPPORTED,
-            (std::string("Triton TRITONBACKEND API version: ") + std::to_string(api_version_major) + "."
-             + std::to_string(api_version_minor) + " does not support '" + name + "' TRITONBACKEND API version: "
-             + std::to_string(TRITONBACKEND_API_VERSION_MAJOR) + "." + std::to_string(TRITONBACKEND_API_VERSION_MINOR))
-                .c_str());
-    }
-    return nullptr;  // success
-}
-
-TRITONSERVER_Error* TRITONBACKEND_ModelInitialize(TRITONBACKEND_Model* model)
-{
-    const char* cname;
-    RETURN_IF_ERROR(TRITONBACKEND_ModelName(model, &cname));
-    std::string name(cname);
-
-    uint64_t version;
-    RETURN_IF_ERROR(TRITONBACKEND_ModelVersion(model, &version));
-
-    LOG_MESSAGE(
-        TRITONSERVER_LOG_INFO,
-        (std::string("TRITONBACKEND_ModelInitialize: ") + name + " (version " + std::to_string(version) + ")").c_str());
-
-    // Create a ModelState object and associate it with the
-    // TRITONBACKEND_Model.
-    ModelState* model_state;
-    RETURN_IF_ERROR(ModelState::Create(model, &model_state));
-    RETURN_IF_ERROR(TRITONBACKEND_ModelSetState(model, reinterpret_cast<void*>(model_state)));
-
-    return nullptr;  // success
-}
-
-TRITONSERVER_Error* TRITONBACKEND_ModelFinalize(TRITONBACKEND_Model* model)
-{
-    void* vstate;
-    RETURN_IF_ERROR(TRITONBACKEND_ModelState(model, &vstate));
-    ModelState* model_state = reinterpret_cast<ModelState*>(vstate);
-
-    LOG_MESSAGE(TRITONSERVER_LOG_INFO, "TRITONBACKEND_ModelFinalize: delete model state");
-
-    delete model_state;
-
-    LOG_MESSAGE(TRITONSERVER_LOG_INFO, "TRITONBACKEND_ModelFinalize: MPI Finalize");
-
-    ft::mpi::finalize();
-
-    return nullptr;  // success
-}
-
-TRITONSERVER_Error* TRITONBACKEND_ModelInstanceInitialize(TRITONBACKEND_ModelInstance* instance)
-{
-    int node_id = ft::mpi::getCommWorldRank();
-
-    const char* cname;
-    RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceName(instance, &cname));
-    std::string name(cname);
-
-    // Get the model state associated with this instance's model.
-    TRITONBACKEND_Model* model;
-    RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceModel(instance, &model));
-
-    void* vmodelstate;
-    RETURN_IF_ERROR(TRITONBACKEND_ModelState(model, &vmodelstate));
-    ModelState* model_state = reinterpret_cast<ModelState*>(vmodelstate);
-
-    // Create a ModelInstanceState object and associate it with the
-    // TRITONBACKEND_ModelInstance.
-    ModelInstanceState* instance_state;
-    RETURN_IF_ERROR(ModelInstanceState::Create(model_state, instance, &instance_state));
-    RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceSetState(instance, reinterpret_cast<void*>(instance_state)));
-
-    int model_instance_id    = instance_state->GetModelInstanceId();
-    int model_instance_count = instance_state->GetModelInstanceCount();
-
-    LOG_MESSAGE(TRITONSERVER_LOG_INFO,
-                (std::string("TRITONBACKEND_ModelInstanceInitialize: ") + name + " (count "
-                 + std::to_string(model_instance_count) + ")" + " (instance_id " + std::to_string(model_instance_id)
-                 + ")")
-                    .c_str());
-
-    if (node_id) {
-        while (true) {
-            instance_state->Execute(
-                nullptr, nullptr, 0, std::shared_ptr<std::unordered_map<std::string, Tensor>>(nullptr));
-        }
-    }
-
-    return nullptr;  // success
-}
-
-TRITONSERVER_Error* TRITONBACKEND_ModelInstanceFinalize(TRITONBACKEND_ModelInstance* instance)
-{
-    void* vstate;
-    RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceState(instance, &vstate));
-    ModelInstanceState* instance_state = reinterpret_cast<ModelInstanceState*>(vstate);
-
-    LOG_MESSAGE(TRITONSERVER_LOG_INFO, "TRITONBACKEND_ModelInstanceFinalize: delete instance state");
-
-    delete instance_state;
-
-    return nullptr;  // success
-}
-
-TRITONSERVER_Error* TRITONBACKEND_ModelInstanceExecute(TRITONBACKEND_ModelInstance* instance,
-                                                       TRITONBACKEND_Request**      requests,
-                                                       const uint32_t               request_count)
-{
-    // Triton will not call this function simultaneously for the same
-    // 'instance'. But since this backend could be used by multiple
-    // instances from multiple models the implementation needs to handle
-    // multiple calls to this function at the same time (with different
-    // 'instance' objects). Suggested practice for this is to use only
-    // function-local and model-instance-specific state (obtained from
-    // 'instance'), which is what we do here.
-    ModelInstanceState* instance_state;
-    RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceState(instance, reinterpret_cast<void**>(&instance_state)));
-    ModelState* model_state = instance_state->StateForModel();
-
-    // This backend specifies BLOCKING execution policy. That means that
-    // we should not return from this function until execution is
-    // complete. Triton will automatically release 'instance' on return
-    // from this function so that it is again available to be used for
-    // another call to TRITONBACKEND_ModelInstanceExecute.
-
-    LOG_MESSAGE(TRITONSERVER_LOG_VERBOSE,
-                (std::string("model ") + model_state->Name() + ", instance " + instance_state->Name() + ", executing "
-                 + std::to_string(request_count) + " requests")
-                    .c_str());
-
-    // At this point we accept ownership of 'requests', which means that
-    // even if something goes wrong we must still return success from
-    // this function. If something does go wrong in processing a
-    // particular request then we send an error response just for the
-    // specific request.
-    instance_state->ProcessRequests(requests, request_count);
-
-    return nullptr;  // success
-}
-
-}  // extern "C"
-
-}  // namespace turbomind_backend
-}  // namespace backend
-}  // namespace triton
diff --git a/src/turbomind/triton_backend/libtriton_fastertransformer.ldscript b/src/turbomind/triton_backend/libtriton_fastertransformer.ldscript
deleted file mode 100644
index 26d2fbb33fcc9295b0afebb7c612c29726fe6923..0000000000000000000000000000000000000000
--- a/src/turbomind/triton_backend/libtriton_fastertransformer.ldscript
+++ /dev/null
@@ -1,30 +0,0 @@
-# Copyright (c) 2021-2022, NVIDIA CORPORATION. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-{
-  global:
-    TRITONBACKEND_*;
-  local: *;
-};
diff --git a/src/turbomind/triton_backend/llama/CMakeLists.txt b/src/turbomind/triton_backend/llama/CMakeLists.txt
deleted file mode 100644
index a6d40592eeada06749d789cbac1dcbed1f650a41..0000000000000000000000000000000000000000
--- a/src/turbomind/triton_backend/llama/CMakeLists.txt
+++ /dev/null
@@ -1,31 +0,0 @@
-# Copyright (c) OpenMMLab. All rights reserved.
-# Copyright (c) 2022-2023, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# Modified from https://github.com/NVIDIA/FasterTransformer/blob/main/src/turbomind/triton_backend/multi_gpu_gpt/CMakeLists.txt
-
-cmake_minimum_required(VERSION 3.8)
-
-set(llama_triton_backend_files
-    LlamaTritonModel.cc
-    LlamaTritonModelInstance.cc
-)
-
-#find_package(CUDAToolkit REQUIRED)
-find_package(CUDA REQUIRED)
-add_library(LlamaTritonBackend STATIC ${llama_triton_backend_files})
-#set_property(TARGET LlamaTritonBackend PROPERTY POSITION_INDEPENDENT_CODE  ON)
-#target_link_libraries(LlamaTritonBackend PUBLIC TransformerTritonBackend Llama tensor memory_utils CUDA::cublasLt)
-target_link_libraries(LlamaTritonBackend PUBLIC TransformerTritonBackend Llama tensor memory_utils)
-target_compile_features(LlamaTritonBackend PRIVATE cxx_std_14)
diff --git a/src/turbomind/triton_backend/llama/LlamaTritonModel.cc b/src/turbomind/triton_backend/llama/LlamaTritonModel.cc
deleted file mode 100644
index f14ebf7b42ff697a87a44a260cf638df84be1d75..0000000000000000000000000000000000000000
--- a/src/turbomind/triton_backend/llama/LlamaTritonModel.cc
+++ /dev/null
@@ -1,391 +0,0 @@
-/*
- * Copyright (c) OpenMMLab. All rights reserved.
- * Copyright (c) 2020-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-// Modified from
-// https://github.com/NVIDIA/FasterTransformer/blob/main/src/turbomind/triton_backend/multi_gpu_gpt/ParallelGptTritonModel.cc
-
-#include "src/turbomind/triton_backend/llama/LlamaTritonModel.h"
-#include "3rdparty/INIReader.h"
-#include "src/turbomind/models/llama/LlamaInstanceComm.h"
-#include "src/turbomind/triton_backend/llama/LlamaTritonModelInstance.h"
-#include "src/turbomind/triton_backend/transformer_triton_backend.hpp"
-#include "src/turbomind/utils/allocator.h"
-#include <mutex>
-
-namespace ft = turbomind;
-
-std::shared_ptr<AbstractTransformerModel> AbstractTransformerModel::createLlamaModel(std::string inifile)
-{
-    INIReader reader = INIReader(inifile);
-    if (reader.ParseError() < 0) {
-        std::cout << "[ERROR] Can't load '" << inifile << "'\n";
-        return nullptr;
-    }
-
-    const std::string data_type        = reader.Get("ft_instance_hyperparameter", "data_type");
-    int               tensor_para_size = reader.GetInteger("ft_instance_hyperparameter", "tensor_para_size");
-    std::string       model_dir        = reader.Get("ft_instance_hyperparameter", "model_dir");
-
-    if (data_type == "half" || data_type == "fp16") {
-        return std::make_shared<LlamaTritonModel<half>>(
-            reader.GetInteger("ft_instance_hyperparameter", "tensor_para_size"),
-            reader.GetInteger("ft_instance_hyperparameter", "pipeline_para_size"),
-            reader.GetInteger("ft_instance_hyperparameter", "enable_custom_all_reduce", 0),
-            model_dir);
-    }
-    else {
-        return std::make_shared<LlamaTritonModel<float>>(
-            reader.GetInteger("ft_instance_hyperparameter", "tensor_para_size"),
-            reader.GetInteger("ft_instance_hyperparameter", "pipeline_para_size"),
-            reader.GetInteger("ft_instance_hyperparameter", "enable_custom_all_reduce", 0),
-            model_dir);
-    }
-}
-
-template<typename T>
-void LlamaTritonModel<T>::handleMissingParams()
-{
-    if (kv_head_num_ == 0) {
-        kv_head_num_ = head_num_;
-        TM_LOG_WARNING("[LlamaTritonModel] `kv_head_num` is not set, default to `head_num` (%d).", (int)kv_head_num_);
-    }
-
-    if (!max_batch_size_) {
-        max_batch_size_ = 32;
-        TM_LOG_WARNING("[LlamaTritonModel] `max_batch_size` is not set, default to %d.", (int)max_batch_size_);
-    }
-
-    if (!session_len_) {
-        session_len_ = 2160;
-        TM_LOG_WARNING("[LlamaTritonModel] `session_len` is not set, default to %d.", (int)session_len_);
-    }
-
-    if (!max_context_token_num_) {
-        max_context_token_num_ = (int)std::sqrt(max_batch_size_);
-        TM_LOG_WARNING("[LlamaTritonModel] `max_context_token_num` is not set, default to %d.",
-                       (int)max_context_token_num_);
-    }
-
-    if (!step_length_) {
-        step_length_ = 1;
-        TM_LOG_WARNING("[LlamaTritonModel] `step_length` is not set, default to %d.", (int)step_length_);
-    }
-
-    if (!cache_max_entry_count_) {
-        cache_max_entry_count_ = 32;
-        TM_LOG_WARNING("[LlamaTritonModel] `cache_max_entry_count` is not set, default to %d.",
-                       (int)cache_max_entry_count_);
-    }
-
-    if (!cache_chunk_size_) {
-        cache_chunk_size_ = cache_max_entry_count_;
-        TM_LOG_WARNING("[LlamaTritonModel] `cache_chunk_size` is not set, default to %d.", (int)cache_chunk_size_);
-    }
-}
-
-template<typename T>
-LlamaTritonModel<T>::LlamaTritonModel(size_t      tensor_para_size,
-                                      size_t      pipeline_para_size,
-                                      int         enable_custom_all_reduce,
-                                      std::string model_dir):
-    tensor_para_size_(tensor_para_size),
-    pipeline_para_size_(pipeline_para_size),
-    shared_weights_(std::vector<std::shared_ptr<ft::LlamaWeight<T>>>(ft::getDeviceCount())),
-    enable_custom_all_reduce_(enable_custom_all_reduce)
-{
-    model_dir_ = model_dir;
-    const std::string inifile{model_dir + "/config.ini"};
-    INIReader         reader = INIReader(inifile);
-    if (reader.ParseError() < 0) {
-        std::cout << "[ERROR] Can't load '" << inifile << "'\n";
-        ft::FT_CHECK(false);
-    }
-
-    model_name_            = reader.Get("llama", "model_name");
-    head_num_              = reader.GetInteger("llama", "head_num");
-    kv_head_num_           = reader.GetInteger("llama", "kv_head_num", 0);
-    size_per_head_         = reader.GetInteger("llama", "size_per_head");
-    inter_size_            = reader.GetInteger("llama", "inter_size");
-    num_layer_             = reader.GetInteger("llama", "num_layer");
-    vocab_size_            = reader.GetInteger("llama", "vocab_size");
-    norm_eps_              = reader.GetFloat("llama", "norm_eps");
-    start_id_              = reader.GetInteger("llama", "start_id");
-    end_id_                = reader.GetInteger("llama", "end_id");
-    max_batch_size_        = reader.GetInteger("llama", "max_batch_size", 0);
-    max_context_token_num_ = reader.GetInteger("llama", "max_context_token_num", 0);
-    session_len_           = reader.GetInteger("llama", "session_len", 0);
-    step_length_           = reader.GetInteger("llama", "step_length", 0);
-    cache_max_entry_count_ = reader.GetInteger("llama", "cache_max_entry_count", 0);
-    use_context_fmha_      = reader.GetInteger("llama", "use_context_fmha", 1);
-    cache_chunk_size_      = reader.GetInteger("llama", "cache_chunk_size", 0);
-    attn_bias_             = reader.GetInteger("llama", "attn_bias", 0);
-    quant_policy_          = reader.GetInteger("llama", "quant_policy", 0);
-    group_size_            = reader.GetInteger("llama", "group_size", 0);
-
-    attn_params_.rotray_embedding_dim    = reader.GetInteger("llama", "rotary_embedding");
-    attn_params_.rotary_embedding_base   = reader.GetFloat("llama", "rope_theta", 10000.0f);
-    attn_params_.max_position_embeddings = reader.GetInteger("llama", "max_position_embeddings", 0);
-    attn_params_.use_dynamic_ntk         = reader.GetInteger("llama", "use_dynamic_ntk", 0);
-    attn_params_.use_logn_attn           = reader.GetInteger("llama", "use_logn_attn", 0);
-
-    handleMissingParams();
-
-    if (max_context_token_num_ <= max_batch_size_) {
-        max_context_token_num_ *= session_len_;
-    }
-
-    shared_state_          = std::make_shared<typename ft::LlamaV2<T>::SharedState>();
-    shared_state_->barrier = std::make_shared<ft::Barrier>(tensor_para_size);
-
-    const auto device_count = ft::getDeviceCount();
-    shared_instances_.resize(device_count);
-    shared_mutexes_.resize(device_count);
-
-    const std::string weight_type_str = reader.Get("llama", "weight_type");
-    if (weight_type_str == "fp16") {
-        weight_type_ = ft::WeightType::kFP16;
-    }
-    else if (weight_type_str == "fp32") {
-        weight_type_ = ft::WeightType::kFP32;
-    }
-    else if (weight_type_str == "int8") {
-        weight_type_ = ft::WeightType::kINT8;
-    }
-    else if (weight_type_str == "int4") {
-        weight_type_ = ft::WeightType::kINT4;
-    }
-    else {
-        std::cout << "[ERROR] Unsupported weight type: '" << weight_type_str << "'\n";
-        ft::FT_CHECK(0);
-    }
-}
-
-template<typename T>
-std::unique_ptr<LlamaTritonSharedModelInstance<T>> LlamaTritonModel<T>::createSharedModelInstance(
-    int                                                               device_id,
-    int                                                               rank,
-    std::pair<std::vector<ft::NcclParam>, std::vector<ft::NcclParam>> nccl_params,
-    std::shared_ptr<ft::AbstractCustomComm>                           custom_all_reduce_comm)
-{
-    ft::check_cuda_error(cudaSetDevice(device_id));
-    const int comms_rank = device_id % (tensor_para_size_ * pipeline_para_size_);
-
-    std::unique_ptr<ft::Allocator<ft::AllocatorType::CUDA>> allocator(
-        new ft::Allocator<ft::AllocatorType::CUDA>(device_id));
-
-    /// TODO: this stream handle is leaked
-    cudaStream_t stream{};
-    ft::check_cuda_error(cudaStreamCreate(&stream));
-
-    allocator->setStream(stream);
-
-    cublasHandle_t   cublas_handle;
-    cublasLtHandle_t cublaslt_handle;
-
-    cublasCreate(&cublas_handle);
-    // cublasLtCreate(&cublaslt_handle);
-    cublasSetStream(cublas_handle, stream);
-
-    std::unique_ptr<ft::cublasAlgoMap>   cublas_algo_map(new ft::cublasAlgoMap("gemm_config.in"));
-    std::unique_ptr<std::mutex>          cublas_wrapper_mutex(new std::mutex());
-    std::unique_ptr<ft::cublasMMWrapper> cublas_wrapper(new ft::cublasMMWrapper(
-        cublas_handle, cublaslt_handle, stream, cublas_algo_map.get(), cublas_wrapper_mutex.get(), allocator.get()));
-
-    std::unique_ptr<cudaDeviceProp> cuda_device_prop_ptr(new cudaDeviceProp);
-    ft::check_cuda_error(cudaGetDeviceProperties(cuda_device_prop_ptr.get(), device_id));
-
-    if (std::is_same<T, half>::value) {
-        // cublas_wrapper->setGemmConfig(CUDA_R_16F, CUDA_R_16F, CUDA_R_16F, CUDA_R_32F);
-        cublas_wrapper->setGemmConfig(CUDA_R_16F, CUDA_R_16F, CUDA_R_16F, CUDA_R_16F);
-    }
-    else if (std::is_same<T, float>::value) {
-        cublas_wrapper->setFP32GemmConfig();
-    }
-
-    ft::NcclParam tensor_para   = nccl_params.first[comms_rank];
-    ft::NcclParam pipeline_para = nccl_params.second[comms_rank];
-
-    ft::FT_CHECK(tensor_para.world_size_ == tensor_para_size_);
-    ft::FT_CHECK(pipeline_para.world_size_ = pipeline_para_size_);
-
-    auto llama = std::make_unique<ft::LlamaV2<T>>(head_num_,
-                                                  kv_head_num_,
-                                                  size_per_head_,
-                                                  inter_size_,
-                                                  num_layer_,
-                                                  vocab_size_,
-                                                  attn_params_,
-                                                  norm_eps_,
-                                                  max_batch_size_,
-                                                  max_context_token_num_,
-                                                  session_len_,
-                                                  step_length_,
-                                                  start_id_,
-                                                  end_id_,
-                                                  cache_max_entry_count_,
-                                                  cache_chunk_size_,
-                                                  quant_policy_,
-                                                  use_context_fmha_,
-                                                  shared_state_,
-                                                  shared_weights_[device_id].get(),
-                                                  tensor_para,
-                                                  stream,
-                                                  cublas_wrapper.get(),
-                                                  allocator.get(),
-                                                  false,  // is_free_buffer_after_forward,
-                                                  cuda_device_prop_ptr.get());
-
-    return std::make_unique<LlamaTritonSharedModelInstance<T>>(
-        LlamaTritonSharedModelInstance<T>{std::move(allocator),
-                                          std::move(cublas_algo_map),
-                                          std::move(cublas_wrapper_mutex),
-                                          std::move(cublas_wrapper),
-                                          std::move(cuda_device_prop_ptr),
-                                          shared_weights_[device_id],
-                                          std::move(llama),
-                                          session_len_});
-}
-
-template<typename T>
-std::unique_ptr<AbstractTransformerModelInstance>
-LlamaTritonModel<T>::createModelInstance(int                                                               device_id,
-                                         int                                                               rank,
-                                         cudaStream_t                                                      stream,
-                                         std::pair<std::vector<ft::NcclParam>, std::vector<ft::NcclParam>> nccl_params,
-                                         std::shared_ptr<ft::AbstractCustomComm> custom_all_reduce_comm)
-{
-    ft::check_cuda_error(cudaSetDevice(device_id));
-    // const int comms_rank = device_id % (tensor_para_size_ * pipeline_para_size_);
-
-    std::shared_ptr<LlamaTritonSharedModelInstance<T>> instance;
-    {
-        std::lock_guard<std::mutex> lock(shared_mutexes_[device_id]);
-        instance = shared_instances_[device_id];
-        if (!instance) {
-            instance = createSharedModelInstance(device_id, rank, nccl_params, custom_all_reduce_comm);
-            instance->llm->setFfiLock(ffi_lock_);
-            shared_instances_[device_id] = instance;
-        }
-    }
-
-    std::unique_ptr<ft::Allocator<ft::AllocatorType::CUDA>> allocator(
-        new ft::Allocator<ft::AllocatorType::CUDA>(device_id));
-
-    allocator->setStream(stream);
-
-    return std::make_unique<LlamaTritonModelInstance<T>>(instance, std::move(allocator));
-}
-
-template<typename T>
-void LlamaTritonModel<T>::createSharedWeights(int device_id, int rank)
-{
-    ft::check_cuda_error(cudaSetDevice(device_id));
-    const int tensor_para_rank   = rank % tensor_para_size_;
-    const int pipeline_para_rank = rank / tensor_para_size_;
-    ft::FT_CHECK(pipeline_para_size_ == 1 && pipeline_para_rank == 0);
-    shared_weights_[device_id] = std::make_shared<ft::LlamaWeight<T>>(head_num_,
-                                                                      kv_head_num_,
-                                                                      size_per_head_,
-                                                                      inter_size_,
-                                                                      vocab_size_,
-                                                                      num_layer_,
-                                                                      attn_bias_,
-                                                                      weight_type_,
-                                                                      group_size_,
-                                                                      tensor_para_size_,
-                                                                      tensor_para_rank);
-    shared_weights_[device_id]->loadModel(model_dir_);
-    return;
-}
-
-template<typename T>
-std::string LlamaTritonModel<T>::toString()
-{
-    std::stringstream ss;
-    ss << "Model: "
-       << "\nhead_num: " << head_num_ << "\nkv_head_num: " << kv_head_num_ << "\nsize_per_head: " << size_per_head_
-       << "\ninter_size: " << inter_size_ << "\nnum_layer: " << num_layer_ << "\nvocab_size: " << vocab_size_
-       << "\nattn_bias: " << attn_bias_ << "\nmax_batch_size: " << max_batch_size_
-       << "\nmax_context_token_num: " << max_context_token_num_ << "\nsession_len: " << session_len_
-       << "\nstep_length: " << step_length_ << "\ncache_max_entry_count: " << cache_max_entry_count_
-       << "\ncache_chunk_size: " << cache_chunk_size_ << "\nuse_context_fmha: " << use_context_fmha_
-       << "\nstart_id: " << start_id_ << "\ntensor_para_size: " << tensor_para_size_
-       << "\npipeline_para_size: " << pipeline_para_size_ << "\nenable_custom_all_reduce: " << enable_custom_all_reduce_
-       << "\nmodel_name: " << model_name_ << "\nmodel_dir: " << model_dir_ << "\nquant_policy: " << quant_policy_
-       << "\ngroup_size: " << group_size_ << std::endl;
-
-    return ss.str();
-}
-
-template<typename T>
-void LlamaTritonModel<T>::createCustomComms(
-    std::vector<std::shared_ptr<ft::AbstractCustomComm>>* custom_all_reduce_comms, int world_size)
-{
-    using commDataType = typename ft::CustomARCommTypeConverter<T>::Type;
-    ft::initCustomAllReduceComm<commDataType>(custom_all_reduce_comms, enable_custom_all_reduce_, world_size);
-}
-
-template<typename T>
-std::pair<std::vector<ft::NcclParam>, std::vector<ft::NcclParam>>
-LlamaTritonModel<T>::createNcclParams(const int node_id, const int device_id_start, const bool multi_node)
-{
-    const auto device_count     = ft::getDeviceCount();
-    bool       need_nccl_params = false;
-    // create nccl group when there are non-occupied devices
-    for (int i = 0; i < device_count; ++i) {
-        std::lock_guard<std::mutex> lock(shared_mutexes_[i]);
-        if (shared_instances_[i] == nullptr) {
-            need_nccl_params = true;
-            break;
-        }
-    }
-    if (need_nccl_params) {
-        return AbstractTransformerModel::createNcclParams(node_id, device_id_start, multi_node);
-    }
-    else {
-        TM_LOG_INFO("Skipping NCCL param creation.");
-
-        const int tensor_para_size   = getTensorParaSize();
-        const int pipeline_para_size = getPipelineParaSize();
-        const int local_comm_size    = multi_node ? device_count : tensor_para_size * pipeline_para_size;
-
-        std::vector<ft::NcclParam> tensor_para_params(local_comm_size);
-        std::vector<ft::NcclParam> pipeline_para_params(local_comm_size);
-        return {std::move(tensor_para_params), std::move(pipeline_para_params)};
-    }
-}
-
-template<typename T>
-std::unique_ptr<ft::AbstractInstanceComm> LlamaTritonModel<T>::createInstanceComm(int size)
-{
-    return std::make_unique<ft::LlamaInstanceComm>(size);
-}
-
-template<typename T>
-int LlamaTritonModel<T>::getTensorParaSize()
-{
-    return tensor_para_size_;
-}
-
-template<typename T>
-int LlamaTritonModel<T>::getPipelineParaSize()
-{
-    return pipeline_para_size_;
-}
-
-template struct LlamaTritonModel<float>;
-template struct LlamaTritonModel<half>;
diff --git a/src/turbomind/triton_backend/llama/LlamaTritonModel.h b/src/turbomind/triton_backend/llama/LlamaTritonModel.h
deleted file mode 100644
index b7d8f439ca7ceff5974655c10abb174a242ac155..0000000000000000000000000000000000000000
--- a/src/turbomind/triton_backend/llama/LlamaTritonModel.h
+++ /dev/null
@@ -1,121 +0,0 @@
-/*
- * Copyright (c) OpenMMLab. All rights reserved.
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-// Modified from
-// https://github.com/NVIDIA/FasterTransformer/blob/main/src/turbomind/triton_backend/multi_gpu_gpt/ParallelGptTritonModel.h
-
-#pragma once
-
-#include "src/turbomind/models/llama/LlamaV2.h"
-#include "src/turbomind/models/llama/llama_params.h"
-#include "src/turbomind/triton_backend/llama/LlamaTritonModelInstance.h"
-#include "src/turbomind/triton_backend/transformer_triton_backend.hpp"
-#include "src/turbomind/utils/cuda_utils.h"
-#include "src/turbomind/utils/custom_ar_comm.h"
-#include "src/turbomind/utils/nccl_utils.h"
-#include <cuda_fp16.h>
-#include <mutex>
-
-namespace ft = turbomind;
-
-template<typename T>
-struct LlamaTritonSharedModelInstance;
-
-template<typename T>
-struct LlamaTritonModel: public AbstractTransformerModel {
-    LlamaTritonModel(size_t      tensor_para_size,
-                     size_t      pipeline_para_size,
-                     int         enable_custom_all_reduce,
-                     std::string model_dir);
-
-    ~LlamaTritonModel() = default;
-
-    std::unique_ptr<AbstractTransformerModelInstance>
-    createModelInstance(int                                                               deviceId,
-                        int                                                               rank,
-                        cudaStream_t                                                      stream,
-                        std::pair<std::vector<ft::NcclParam>, std::vector<ft::NcclParam>> nccl_params,
-                        std::shared_ptr<ft::AbstractCustomComm> custom_all_reduce_comm = nullptr) override;
-
-    void createSharedWeights(int deviceId, int rank) override;
-
-    void createCustomComms(std::vector<std::shared_ptr<ft::AbstractCustomComm>>* custom_all_reduce_comms,
-                           int                                                   world_size) override;
-
-    std::pair<std::vector<ft::NcclParam>, std::vector<ft::NcclParam>>
-    createNcclParams(const int node_id, const int device_id_start, const bool multi_node) override;
-
-    std::unique_ptr<ft::AbstractInstanceComm> createInstanceComm(int size) override;
-
-    void handleMissingParams();
-
-    void setFfiLock(ffi_api_lock_ctrl_t func)
-    {
-        ffi_lock_ = func;
-    }
-
-    std::string toString() override;
-    int         getTensorParaSize() override;
-    int         getPipelineParaSize() override;
-
-private:
-    std::unique_ptr<LlamaTritonSharedModelInstance<T>>
-    createSharedModelInstance(int                                                               deviceId,
-                              int                                                               rank,
-                              std::pair<std::vector<ft::NcclParam>, std::vector<ft::NcclParam>> nccl_params,
-                              std::shared_ptr<ft::AbstractCustomComm> custom_all_reduce_comm = nullptr);
-
-    size_t                          head_num_;
-    size_t                          kv_head_num_;
-    size_t                          size_per_head_;
-    size_t                          inter_size_;
-    size_t                          num_layer_;
-    size_t                          vocab_size_;
-    turbomind::LlamaAttentionParams attn_params_;
-    float                           norm_eps_;
-    int                             max_batch_size_;
-    int                             max_context_token_num_;
-    int                             session_len_;
-    int                             step_length_;
-    int                             start_id_;
-    int                             end_id_;
-    int                             cache_max_entry_count_;
-    int                             cache_chunk_size_;
-    int                             use_context_fmha_;
-    size_t                          tensor_para_size_;
-    size_t                          pipeline_para_size_;
-    ft::WeightType                  weight_type_;
-    bool                            attn_bias_;
-    int                             quant_policy_;
-    int                             group_size_;
-
-    // shared weights for each device
-    std::vector<std::shared_ptr<ft::LlamaWeight<T>>> shared_weights_;
-
-    std::shared_ptr<typename ft::LlamaV2<T>::SharedState> shared_state_;
-
-    std::vector<std::shared_ptr<LlamaTritonSharedModelInstance<T>>> shared_instances_;
-    std::deque<std::mutex>                                          shared_mutexes_;  // is locking really needed?
-
-    bool is_fp16_;
-    int  enable_custom_all_reduce_ = 0;
-
-    std::string model_name_;
-    std::string model_dir_;
-
-    ffi_api_lock_ctrl_t ffi_lock_ = nullptr;
-};
diff --git a/src/turbomind/triton_backend/llama/LlamaTritonModelInstance.cc b/src/turbomind/triton_backend/llama/LlamaTritonModelInstance.cc
deleted file mode 100644
index 102b324b8eaf738fc19099902feb3a3bee2959e2..0000000000000000000000000000000000000000
--- a/src/turbomind/triton_backend/llama/LlamaTritonModelInstance.cc
+++ /dev/null
@@ -1,295 +0,0 @@
-/*
- * Copyright (c) OpenMMLab. All rights reserved.
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-// Modified from
-// https://github.com/NVIDIA/FasterTransformer/blob/main/src/turbomind/triton_backend/multi_gpu_gpt/ParallelGptTritonModel.h
-
-#include "src/turbomind/triton_backend/llama/LlamaTritonModelInstance.h"
-#include "src/turbomind/macro.h"
-#include "src/turbomind/triton_backend/transformer_triton_backend.hpp"
-#include "src/turbomind/triton_backend/triton_utils.hpp"
-#include "src/turbomind/utils/Tensor.h"
-#include "src/turbomind/utils/cuda_utils.h"
-#include <algorithm>
-#include <functional>
-#include <numeric>
-#include <sstream>
-#include <unordered_map>
-#include <vector>
-
-namespace ft = turbomind;
-
-template<typename T>
-void triton_stream_callback(std::unordered_map<std::string, ft::Tensor>* output_tensors, void* ctx)
-{
-    LlamaTritonModelInstance<T>* model  = reinterpret_cast<LlamaTritonModelInstance<T>*>(ctx);
-    auto                         result = LlamaTritonModelInstance<T>::convert_outputs(*output_tensors);
-
-    model->stream_cb_(result, model->stream_ctx_);
-}
-
-template<typename T>
-LlamaTritonModelInstance<T>::LlamaTritonModelInstance(
-    std::shared_ptr<LlamaTritonSharedModelInstance<T>>      instance,
-    std::unique_ptr<ft::Allocator<ft::AllocatorType::CUDA>> allocator):
-    instance_(std::move(instance)), allocator_(std::move(allocator))
-{
-}
-
-template<typename T>
-std::unordered_map<std::string, ft::Tensor> LlamaTritonModelInstance<T>::convert_inputs(
-    std::shared_ptr<std::unordered_map<std::string, triton::Tensor>> input_tensors)
-{
-    TM_LOG_DEBUG(__PRETTY_FUNCTION__);
-
-    move_tensor_H2D(input_tensors->at("input_ids"), d_input_ids_, &allocator_);
-    move_tensor_H2D(input_tensors->at("input_lengths"), d_input_lengths_, &allocator_);
-
-    const size_t request_batch_size = input_tensors->at("input_ids").shape[0];
-    const size_t input_data_len     = input_tensors->at("input_ids").shape[1];
-    h_total_output_lengths_ =
-        (uint32_t*)std::realloc((void*)h_total_output_lengths_, request_batch_size * sizeof(uint32_t));
-
-    std::unordered_map<std::string, ft::Tensor> ft_input_tensors = std::unordered_map<std::string, ft::Tensor>{
-        {"input_ids", as_GPU_tensor(input_tensors->at("input_ids"), d_input_ids_)},
-        // {"input_lengths", as_GPU_tensor(input_tensors->at("input_lengths"), d_input_lengths_)},
-    };
-
-    if (input_tensors->find("bad_words_list") != input_tensors->end()) {
-        move_tensor_H2D(input_tensors->at("bad_words_list"), d_input_bad_words_, &allocator_);
-        ft_input_tensors.insert(
-            {"bad_words_list", as_GPU_tensor(input_tensors->at("bad_words_list"), d_input_bad_words_)});
-    }
-
-    if (input_tensors->find("stop_words_list") != input_tensors->end()) {
-        move_tensor_H2D(input_tensors->at("stop_words_list"), d_input_stop_words_, &allocator_);
-        ft_input_tensors.insert(
-            {"stop_words_list", as_GPU_tensor(input_tensors->at("stop_words_list"), d_input_stop_words_)});
-    }
-
-    if (input_tensors->count("request_prompt_embedding") && input_tensors->count("request_prompt_lengths")
-        && input_tensors->count("request_prompt_type")) {
-
-        move_tensor_H2D(input_tensors->at("request_prompt_lengths"), d_request_prompt_lengths_, &allocator_);
-        ft_input_tensors.insert(
-            {"request_prompt_lengths",
-             as_GPU_tensor(input_tensors->at("request_prompt_lengths"), d_request_prompt_lengths_)});
-
-        move_tensor_H2D(input_tensors->at("request_prompt_embedding"), d_request_prompt_embedding_, &allocator_);
-        ft_input_tensors.insert(
-            {"request_prompt_embedding",
-             as_GPU_tensor(input_tensors->at("request_prompt_embedding"), d_request_prompt_embedding_)});
-    }
-
-    if (input_tensors->find("top_p_decay") != input_tensors->end()) {
-        move_tensor_H2D(input_tensors->at("top_p_decay"), d_top_p_decay_, &allocator_);
-        ft_input_tensors.insert({"top_p_decay", as_GPU_tensor(input_tensors->at("top_p_decay"), d_top_p_decay_)});
-    }
-    if (input_tensors->find("top_p_min") != input_tensors->end()) {
-        move_tensor_H2D(input_tensors->at("top_p_min"), d_top_p_min_, &allocator_);
-        ft_input_tensors.insert({"top_p_min", as_GPU_tensor(input_tensors->at("top_p_min"), d_top_p_min_)});
-    }
-    if (input_tensors->find("top_p_reset_ids") != input_tensors->end()) {
-        move_tensor_H2D(input_tensors->at("top_p_reset_ids"), d_top_p_reset_ids_, &allocator_);
-        ft_input_tensors.insert(
-            {"top_p_reset_ids", as_GPU_tensor(input_tensors->at("top_p_reset_ids"), d_top_p_reset_ids_)});
-    }
-
-    for (auto t = input_tensors->begin(); t != input_tensors->end(); ++t) {
-        if (t->first.find("input_ids") == std::string::npos  // && t->first.find("input_lengths") == std::string::npos
-            && t->first.find("output_seq_len") == std::string::npos
-            && t->first.find("prefix_soft_prompt_embedding") == std::string::npos
-            && t->first.find("prefix_soft_prompt_lengths") == std::string::npos) {
-            if (ft_input_tensors.count(t->first) == 0) {
-                ft_input_tensors.insert({t->first, t->second.convertTritonTensorToFt()});
-            }
-        }
-    }
-
-    return ft_input_tensors;
-}
-
-template<typename T>
-std::shared_ptr<std::unordered_map<std::string, triton::Tensor>>
-LlamaTritonModelInstance<T>::convert_outputs(const std::unordered_map<std::string, ft::Tensor>& output_tensors)
-{
-    TM_LOG_DEBUG(__PRETTY_FUNCTION__);
-    std::unordered_map<std::string, triton::Tensor>* outputs_mapping =
-        new std::unordered_map<std::string, triton::Tensor>();
-
-    for (auto it = output_tensors.begin(); it != output_tensors.end(); it++) {
-        outputs_mapping->insert({it->first, triton::Tensor::convertFtTensorToTriton(it->second)});
-    }
-
-    return std::shared_ptr<std::unordered_map<std::string, triton::Tensor>>(outputs_mapping);
-}
-
-template<typename T>
-std::shared_ptr<std::vector<triton::Tensor>>
-LlamaTritonModelInstance<T>::forward(std::shared_ptr<std::vector<triton::Tensor>> input_tensors)
-{
-    ft::FT_CHECK(false);
-    return nullptr;
-}
-
-template<typename T>
-std::shared_ptr<std::unordered_map<std::string, triton::Tensor>>
-LlamaTritonModelInstance<T>::forward(std::shared_ptr<std::unordered_map<std::string, triton::Tensor>> input_tensors)
-{
-    ft::FT_CHECK(false);
-    return nullptr;
-}
-
-template<typename T>
-std::string format_vector(const std::vector<T>& vec)
-{
-    std::stringstream ss;
-    ss << "[";
-    bool first = true;
-    for (const auto& x : vec) {
-        ss << (first ? "" : ", ") << x;
-        first = false;
-    }
-    ss << "]";
-    return ss.str();
-}
-
-template<typename T>
-std::shared_ptr<std::unordered_map<std::string, triton::Tensor>>
-LlamaTritonModelInstance<T>::forward(std::shared_ptr<std::unordered_map<std::string, triton::Tensor>> input_tensors,
-                                     ft::AbstractInstanceComm*                                        instance_comm)
-{
-    TM_LOG_DEBUG(__PRETTY_FUNCTION__);
-    // for (const auto& kv : *input_tensors) {
-    //     TM_LOG_INFO("%s: %s", kv.first.c_str(), format_vector(kv.second.shape).c_str());
-    // }
-
-    FT_CHECK_WITH_INFO(input_tensors->at("input_ids").shape.size() == 2,
-                       "input_tensors->at(\"input_ids\").shape.size() == 2");
-    FT_CHECK_WITH_INFO(input_tensors->at("input_lengths").shape.size() == 1,
-                       "input_tensors->at(\"input_lengths\").shape.size() == 1");
-
-    const uint32_t request_batch_size     = input_tensors->at("input_ids").shape[0];
-    const uint32_t max_request_output_len = (size_t)*std::max_element(
-        (int*)input_tensors->at("request_output_len").data,
-        (int*)input_tensors->at("request_output_len").data + input_tensors->at("request_output_len").shape[0]);
-    // const uint32_t total_output_len = max_request_output_len + input_tensors->at("input_ids").shape[1];
-    const uint32_t beam_width =
-        input_tensors->count("beam_width") ? (size_t)(*(uint*)input_tensors->at("beam_width").data) : 1;
-    FT_CHECK_WITH_INFO(beam_width == 1, "Beam search is not implemented");
-
-    std::unordered_map<std::string, ft::Tensor> ft_input_tensors = convert_inputs(input_tensors);
-
-    const size_t max_input_len = input_tensors->at("input_ids").shape[1];
-    const bool   is_return_logits =
-        input_tensors->count("is_return_logits") && *(bool*)input_tensors->at("is_return_logits").data;
-
-    const size_t vocab_size = instance_->llm->vocab_size();
-
-    allocateBuffer(request_batch_size, max_input_len, beam_width, instance_->session_len, is_return_logits);
-
-    std::unordered_map<std::string, ft::Tensor> output_tensors = std::unordered_map<std::string, ft::Tensor>{
-        {"output_ids",
-         ft::Tensor{ft::MEMORY_GPU,
-                    ft::TYPE_UINT32,
-                    std::vector<size_t>{request_batch_size, beam_width, (size_t)instance_->session_len},
-                    d_output_ids_}},
-        {"sequence_length",
-         ft::Tensor{ft::MEMORY_GPU,
-                    ft::TYPE_UINT32,
-                    std::vector<size_t>{request_batch_size, beam_width},
-                    d_sequence_lengths_}}};
-
-    if (input_tensors->count("is_return_log_probs") && *((bool*)input_tensors->at("is_return_log_probs").data)) {
-        output_tensors.insert({"output_log_probs",
-                               ft::Tensor{ft::MEMORY_GPU,
-                                          ft::TYPE_FP32,
-                                          std::vector<size_t>{request_batch_size, beam_width, max_request_output_len},
-                                          d_output_log_probs_}});
-        output_tensors.insert({"cum_log_probs",
-                               ft::Tensor{ft::MEMORY_GPU,
-                                          ft::TYPE_FP32,
-                                          std::vector<size_t>{request_batch_size, beam_width},
-                                          d_cum_log_probs_}});
-    }
-
-    if (is_return_logits) {
-        output_tensors.insert(
-            {"logits",
-             {ft::MEMORY_GPU, ft::TYPE_FP32, {request_batch_size, max_input_len, vocab_size}, d_output_logits_}});
-    }
-
-    try {
-        ft::Request::Callback callback;
-
-        if (stream_cb_) {
-            callback = [this](std::unordered_map<std::string, ft::Tensor>* outputs) {
-                triton_stream_callback<T>(outputs, this);
-            };
-        }
-
-        ft::check_cuda_error(cudaStreamSynchronize(allocator_->returnStream()));
-        instance_->llm->forward(&output_tensors, &ft_input_tensors, {instance_comm, callback});
-        // ! stream synced by the model before returning
-    }
-    catch (...) {
-        h_exception_ = std::current_exception();
-        output_tensors.insert({"error_message", ft::Tensor{ft::MEMORY_CPU, ft::TYPE_BYTES, {1}, &h_exception_}});
-    }
-
-    return convert_outputs(output_tensors);
-}
-
-template<typename T>
-LlamaTritonModelInstance<T>::~LlamaTritonModelInstance()
-{
-    freeBuffer();
-}
-
-template<typename T>
-void LlamaTritonModelInstance<T>::allocateBuffer(const size_t request_batch_size,
-                                                 const size_t max_input_len,
-                                                 const size_t beam_width,
-                                                 const size_t session_len,
-                                                 const bool   is_return_logits)
-{
-    d_output_ids_ =
-        (int*)(allocator_->reMalloc(d_output_ids_, sizeof(int) * request_batch_size * beam_width * session_len, false));
-    d_sequence_lengths_ =
-        (int*)(allocator_->reMalloc(d_sequence_lengths_, sizeof(int) * request_batch_size * beam_width, false));
-    d_output_log_probs_ = (float*)(allocator_->reMalloc(
-        d_output_log_probs_, sizeof(float) * request_batch_size * beam_width * session_len, false));
-    d_cum_log_probs_ =
-        (float*)(allocator_->reMalloc(d_cum_log_probs_, sizeof(float) * request_batch_size * beam_width, false));
-    if (is_return_logits) {
-        d_output_logits_ = (float*)allocator_->reMalloc(
-            d_output_logits_, sizeof(float) * request_batch_size * max_input_len * instance_->llm->vocab_size(), false);
-    }
-}
-
-template<typename T>
-void LlamaTritonModelInstance<T>::freeBuffer()
-{
-    allocator_->free((void**)(&d_output_ids_));
-    allocator_->free((void**)(&d_sequence_lengths_));
-    allocator_->free((void**)(&d_output_log_probs_));
-    allocator_->free((void**)(&d_cum_log_probs_));
-    std::free(h_total_output_lengths_);
-}
-
-template struct LlamaTritonModelInstance<float>;
-template struct LlamaTritonModelInstance<half>;
diff --git a/src/turbomind/triton_backend/llama/LlamaTritonModelInstance.h b/src/turbomind/triton_backend/llama/LlamaTritonModelInstance.h
deleted file mode 100644
index 4dff6eb24cc43706a8fc3e3db5e98235af70eba4..0000000000000000000000000000000000000000
--- a/src/turbomind/triton_backend/llama/LlamaTritonModelInstance.h
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
- * Copyright (c) OpenMMLab. All rights reserved.
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-// Modified from
-// https://github.com/NVIDIA/FasterTransformer/blob/main/src/turbomind/triton_backend/multi_gpu_gpt/ParallelGptTritonModel.h
-
-#pragma once
-
-#include "src/turbomind/models/llama/LlamaV2.h"
-#include "src/turbomind/triton_backend/llama/LlamaTritonModel.h"
-#include "src/turbomind/triton_backend/transformer_triton_backend.hpp"
-#include <memory>
-
-namespace ft = turbomind;
-
-template<typename T>
-struct LlamaTritonSharedModelInstance {
-    std::unique_ptr<ft::Allocator<ft::AllocatorType::CUDA>> allocator;
-    std::unique_ptr<ft::cublasAlgoMap>                      cublas_algo_map;
-    std::unique_ptr<std::mutex>                             cublas_wrapper_mutex;
-    std::unique_ptr<ft::cublasMMWrapper>                    cublas_wrapper;
-    std::unique_ptr<cudaDeviceProp>                         cuda_device_prop_ptr;
-    std::shared_ptr<ft::LlamaWeight<T>>                     llm_weight;
-    std::unique_ptr<ft::LlamaV2<T>>                         llm;
-    const int                                               session_len;
-};
-
-template<typename T>
-struct LlamaTritonModelInstance: AbstractTransformerModelInstance {
-
-    LlamaTritonModelInstance(std::shared_ptr<LlamaTritonSharedModelInstance<T>>      instance,
-                             std::unique_ptr<ft::Allocator<ft::AllocatorType::CUDA>> allocator);
-    ~LlamaTritonModelInstance();
-
-    std::shared_ptr<std::vector<triton::Tensor>>
-    forward(std::shared_ptr<std::vector<triton::Tensor>> input_tensors) override;
-
-    std::shared_ptr<std::unordered_map<std::string, triton::Tensor>>
-    forward(std::shared_ptr<std::unordered_map<std::string, triton::Tensor>> input_tensors) override;
-
-    std::shared_ptr<std::unordered_map<std::string, triton::Tensor>>
-    forward(std::shared_ptr<std::unordered_map<std::string, triton::Tensor>> input_tensors,
-            ft::AbstractInstanceComm*) override;
-
-    static std::shared_ptr<std::unordered_map<std::string, triton::Tensor>>
-    convert_outputs(const std::unordered_map<std::string, ft::Tensor>& output_tensors);
-
-private:
-    const std::shared_ptr<LlamaTritonSharedModelInstance<T>>      instance_;
-    const std::unique_ptr<ft::Allocator<ft::AllocatorType::CUDA>> allocator_;
-
-    std::unordered_map<std::string, ft::Tensor>
-    convert_inputs(std::shared_ptr<std::unordered_map<std::string, triton::Tensor>> input_tensors);
-
-    void allocateBuffer(const size_t request_batch_size,
-                        const size_t max_input_len,
-                        const size_t beam_width,
-                        const size_t session_len,
-                        const bool   is_return_logits);
-    void freeBuffer();
-
-    int*   d_input_ids_                = nullptr;
-    int*   d_input_lengths_            = nullptr;
-    int*   d_input_bad_words_          = nullptr;
-    int*   d_input_stop_words_         = nullptr;
-    int*   d_request_prompt_lengths_   = nullptr;
-    T*     d_request_prompt_embedding_ = nullptr;
-    float* d_top_p_decay_              = nullptr;
-    float* d_top_p_min_                = nullptr;
-    int*   d_top_p_reset_ids_          = nullptr;
-
-    int*   d_output_ids_       = nullptr;
-    int*   d_sequence_lengths_ = nullptr;
-    float* d_output_log_probs_ = nullptr;
-    float* d_cum_log_probs_    = nullptr;
-    float* d_output_logits_    = nullptr;
-
-    uint32_t*          h_total_output_lengths_ = nullptr;
-    std::exception_ptr h_exception_            = nullptr;
-};
diff --git a/src/turbomind/triton_backend/transformer_triton_backend.cpp b/src/turbomind/triton_backend/transformer_triton_backend.cpp
deleted file mode 100644
index f9d2e027ef12c534d72ada10c486f6e18eb2d42d..0000000000000000000000000000000000000000
--- a/src/turbomind/triton_backend/transformer_triton_backend.cpp
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- * Copyright (c) OpenMMLab. All rights reserved.
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-// Modified from
-// https://github.com/NVIDIA/FasterTransformer/blob/main/src/turbomind/triton_backend/transformer_triton_backend.cpp
-
-#include "src/turbomind/triton_backend/transformer_triton_backend.hpp"
-#include "src/turbomind/utils/nccl_utils.h"
-
-std::pair<std::vector<ft::NcclParam>, std::vector<ft::NcclParam>>
-AbstractTransformerModel::createNcclParams(const int node_id, const int device_id_start, const bool multi_node)
-{
-    const int gpu_count          = ft::getDeviceCount();
-    const int tensor_para_size   = getTensorParaSize();
-    const int pipeline_para_size = getPipelineParaSize();
-    const int local_comm_size    = multi_node ? gpu_count : tensor_para_size * pipeline_para_size;
-    ft::FT_CHECK(tensor_para_size > 0 && pipeline_para_size > 0);
-    ft::FT_CHECK(device_id_start + (int)local_comm_size <= gpu_count);
-
-    std::vector<ft::NcclUid> nccl_ids;
-    if (tensor_para_size > 1 || pipeline_para_size > 1) {
-        nccl_ids.resize(tensor_para_size + pipeline_para_size);
-        if (node_id == 0) {
-            for (uint32_t i = 0; i < nccl_ids.size(); i++) {
-                ft::ftNcclGetUniqueId(nccl_ids[i]);
-            }
-        }
-    }
-
-    std::vector<ft::NcclParam> tensor_para_params(local_comm_size);
-    std::vector<ft::NcclParam> pipeline_para_params(local_comm_size);
-    // Don't init comm when size == 1
-    if (tensor_para_size > 1) {
-        const auto group_id = ft::ftNcclNextGroupId();
-        ft::ftNcclGroupStart();
-        for (int gid = device_id_start; gid < device_id_start + local_comm_size; gid++) {
-            int rank               = node_id * gpu_count + gid - device_id_start;
-            int tensor_para_rank   = rank % tensor_para_size;
-            int pipeline_para_rank = rank / tensor_para_size;
-
-            ft::NcclUid tensor_para_nccl_uid = nccl_ids[pipeline_para_rank];
-            ft::check_cuda_error(cudaSetDevice(gid));
-            ft::ftNcclCommInitRank(
-                tensor_para_params[gid - device_id_start], tensor_para_rank, tensor_para_size, tensor_para_nccl_uid);
-            tensor_para_params[gid - device_id_start].group_id_ = group_id;
-        }
-        ft::ftNcclGroupEnd();
-    }
-    if (pipeline_para_size > 1) {
-        const auto group_id = ft::ftNcclNextGroupId();
-        ft::ftNcclGroupStart();
-        for (int gid = device_id_start; gid < device_id_start + local_comm_size; gid++) {
-            int rank               = node_id * gpu_count + gid - device_id_start;
-            int tensor_para_rank   = rank % tensor_para_size;
-            int pipeline_para_rank = rank / tensor_para_size;
-
-            ft::NcclUid pipeline_para_nccl_uid = nccl_ids[pipeline_para_size + tensor_para_rank];
-            ft::check_cuda_error(cudaSetDevice(gid));
-            ft::ftNcclCommInitRank(pipeline_para_params[gid - device_id_start],
-                                   pipeline_para_rank,
-                                   pipeline_para_size,
-                                   pipeline_para_nccl_uid);
-            pipeline_para_params[gid - device_id_start].group_id_ = group_id;
-        }
-        ft::ftNcclGroupEnd();
-    }
-    return std::pair<std::vector<ft::NcclParam>, std::vector<ft::NcclParam>>(tensor_para_params, pipeline_para_params);
-}
diff --git a/src/turbomind/triton_backend/transformer_triton_backend.hpp b/src/turbomind/triton_backend/transformer_triton_backend.hpp
deleted file mode 100644
index 8f1f88f5a6cd11e561fd04a94356b5d24ee6240e..0000000000000000000000000000000000000000
--- a/src/turbomind/triton_backend/transformer_triton_backend.hpp
+++ /dev/null
@@ -1,329 +0,0 @@
-/*
- * Copyright (c) OpenMMLab. All rights reserved.
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-// Modified from
-// https://github.com/NVIDIA/FasterTransformer/blob/main/src/turbomind/triton_backend/transformer_triton_backend.hpp
-
-#pragma once
-
-#include <functional>
-#include <memory>
-#include <sstream>
-#ifdef __linux__
-#include <sys/time.h>
-#endif
-#include <vector>
-
-#include "src/turbomind/utils/Tensor.h"
-#include "src/turbomind/utils/custom_ar_comm.h"
-#include "src/turbomind/utils/instance_comm.h"
-#include "src/turbomind/utils/nccl_utils.h"
-
-namespace ft = turbomind;
-
-namespace triton {
-#ifdef USE_TRITONSERVER_DATATYPE
-
-#include "triton/core/tritonbackend.h"
-#include "triton/core/tritonserver.h"
-
-#ifndef TRITONSERVER_API_VERSION_MAJOR
-#error TRITONSERVER_API_VERSION_MAJOR Undefined!
-#endif
-
-#ifndef TRITONSERVER_API_VERSION_MINOR
-#error TRITONSERVER_API_VERSION_MINOR Undefined!
-#endif
-
-#if (TRITONSERVER_API_VERSION_MAJOR == 1 && TRITONSERVER_API_VERSION_MINOR >= 17)                                      \
-    || (TRITONSERVER_API_VERSION_MAJOR > 1)
-#define ENABLE_TRITON_BF16 1
-#endif
-
-typedef TRITONSERVER_DataType   DataType;
-typedef TRITONSERVER_MemoryType MemoryType;
-
-constexpr TRITONSERVER_DataType TYPE_INVALID = TRITONSERVER_TYPE_INVALID;
-constexpr TRITONSERVER_DataType TYPE_BOOL    = TRITONSERVER_TYPE_BOOL;
-constexpr TRITONSERVER_DataType TYPE_UINT8   = TRITONSERVER_TYPE_UINT8;
-constexpr TRITONSERVER_DataType TYPE_UINT16  = TRITONSERVER_TYPE_UINT16;
-constexpr TRITONSERVER_DataType TYPE_UINT32  = TRITONSERVER_TYPE_UINT32;
-constexpr TRITONSERVER_DataType TYPE_UINT64  = TRITONSERVER_TYPE_UINT64;
-constexpr TRITONSERVER_DataType TYPE_INT8    = TRITONSERVER_TYPE_INT8;
-constexpr TRITONSERVER_DataType TYPE_INT16   = TRITONSERVER_TYPE_INT16;
-constexpr TRITONSERVER_DataType TYPE_INT32   = TRITONSERVER_TYPE_INT32;
-constexpr TRITONSERVER_DataType TYPE_INT64   = TRITONSERVER_TYPE_INT64;
-constexpr TRITONSERVER_DataType TYPE_FP16    = TRITONSERVER_TYPE_FP16;
-constexpr TRITONSERVER_DataType TYPE_FP32    = TRITONSERVER_TYPE_FP32;
-constexpr TRITONSERVER_DataType TYPE_FP64    = TRITONSERVER_TYPE_FP64;
-constexpr TRITONSERVER_DataType TYPE_BYTES   = TRITONSERVER_TYPE_BYTES;
-
-#ifdef ENABLE_TRITON_BF16
-constexpr TRITONSERVER_DataType TYPE_BF16 = TRITONSERVER_TYPE_BF16;
-#endif
-constexpr TRITONSERVER_MemoryType MEMORY_CPU        = TRITONSERVER_MEMORY_CPU;
-constexpr TRITONSERVER_MemoryType MEMORY_CPU_PINNED = TRITONSERVER_MEMORY_CPU_PINNED;
-constexpr TRITONSERVER_MemoryType MEMORY_GPU        = TRITONSERVER_MEMORY_GPU;
-
-#else
-
-typedef ft::DataType   DataType;
-typedef ft::MemoryType MemoryType;
-
-constexpr DataType   TYPE_INVALID      = ft::TYPE_INVALID;
-constexpr DataType   TYPE_BOOL         = ft::TYPE_BOOL;
-constexpr DataType   TYPE_UINT8        = ft::TYPE_UINT8;
-constexpr DataType   TYPE_UINT16       = ft::TYPE_UINT16;
-constexpr DataType   TYPE_UINT32       = ft::TYPE_UINT32;
-constexpr DataType   TYPE_UINT64       = ft::TYPE_UINT64;
-constexpr DataType   TYPE_INT8         = ft::TYPE_INT8;
-constexpr DataType   TYPE_INT16        = ft::TYPE_INT16;
-constexpr DataType   TYPE_INT32        = ft::TYPE_INT32;
-constexpr DataType   TYPE_INT64        = ft::TYPE_INT64;
-constexpr DataType   TYPE_FP16         = ft::TYPE_FP16;
-constexpr DataType   TYPE_FP32         = ft::TYPE_FP32;
-constexpr DataType   TYPE_FP64         = ft::TYPE_FP64;
-constexpr DataType   TYPE_BYTES        = ft::TYPE_BYTES;
-constexpr DataType   TYPE_BF16         = ft::TYPE_BF16;
-constexpr MemoryType MEMORY_CPU        = ft::MEMORY_CPU;
-constexpr MemoryType MEMORY_CPU_PINNED = ft::MEMORY_CPU_PINNED;
-constexpr MemoryType MEMORY_GPU        = ft::MEMORY_GPU;
-
-#endif
-
-struct Tensor {
-    const MemoryType          where;
-    const DataType            type;
-    const std::vector<size_t> shape;
-    const void*               data;
-
-    Tensor(const MemoryType _where, const DataType _type, const std::vector<size_t> _shape, const void* _data):
-        where(_where), type(_type), shape(_shape), data(_data)
-    {
-    }
-
-    static ft::DataType convertTritonTypeToFt(DataType tmp_type)
-    {
-        ft::DataType ft_data_type;
-        switch (tmp_type) {
-            case TYPE_INVALID:
-                ft_data_type = ft::DataType::TYPE_INVALID;
-                break;
-            case TYPE_BOOL:
-                ft_data_type = ft::DataType::TYPE_BOOL;
-                break;
-            case TYPE_UINT8:
-                ft_data_type = ft::DataType::TYPE_UINT8;
-                break;
-            case TYPE_UINT16:
-                ft_data_type = ft::DataType::TYPE_UINT16;
-                break;
-            case TYPE_UINT32:
-                ft_data_type = ft::DataType::TYPE_UINT32;
-                break;
-            case TYPE_UINT64:
-                ft_data_type = ft::DataType::TYPE_UINT64;
-                break;
-            case TYPE_INT8:
-                ft_data_type = ft::DataType::TYPE_INT8;
-                break;
-            case TYPE_INT16:
-                ft_data_type = ft::DataType::TYPE_INT16;
-                break;
-            case TYPE_INT32:
-                ft_data_type = ft::DataType::TYPE_INT32;
-                break;
-            case TYPE_INT64:
-                ft_data_type = ft::DataType::TYPE_INT64;
-                break;
-            case TYPE_FP16:
-                ft_data_type = ft::DataType::TYPE_FP16;
-                break;
-            case TYPE_FP32:
-                ft_data_type = ft::DataType::TYPE_FP32;
-                break;
-            case TYPE_FP64:
-                ft_data_type = ft::DataType::TYPE_FP64;
-                break;
-#ifdef ENABLE_TRITON_BF16
-            case TYPE_BF16:
-                ft_data_type = ft::DataType::TYPE_BF16;
-                break;
-#endif
-            case TYPE_BYTES:
-                ft_data_type = ft::DataType::TYPE_BYTES;
-                break;
-            default:
-                FT_CHECK_WITH_INFO(false, "Unknown data type with type id: " + std::to_string(tmp_type));
-                break;
-        }
-        return ft_data_type;
-    }
-
-    ft::Tensor convertTritonTensorToFt()
-    {
-        ft::DataType   ft_data_type = convertTritonTypeToFt(type);
-        ft::MemoryType ft_memory_type;
-        switch (where) {
-            case MEMORY_CPU:
-                ft_memory_type = ft::MemoryType::MEMORY_CPU;
-                break;
-            case MEMORY_CPU_PINNED:
-                ft_memory_type = ft::MemoryType::MEMORY_CPU_PINNED;
-                break;
-            case MEMORY_GPU:
-                ft_memory_type = ft::MemoryType::MEMORY_GPU;
-                break;
-        }
-        return ft::Tensor{ft_memory_type, ft_data_type, shape, data};
-    }
-
-    static Tensor convertFtTensorToTriton(ft::Tensor ft_tensor)
-    {
-        DataType triton_data_type;
-        switch (ft_tensor.type) {
-            case TYPE_INVALID:
-                triton_data_type = TYPE_INVALID;
-                break;
-            case TYPE_BOOL:
-                triton_data_type = TYPE_BOOL;
-                break;
-            case TYPE_UINT8:
-                triton_data_type = TYPE_UINT8;
-                break;
-            case TYPE_UINT16:
-                triton_data_type = TYPE_UINT16;
-                break;
-            case TYPE_UINT32:
-                triton_data_type = TYPE_UINT32;
-                break;
-            case TYPE_UINT64:
-                triton_data_type = TYPE_UINT64;
-                break;
-            case TYPE_INT8:
-                triton_data_type = TYPE_INT8;
-                break;
-            case TYPE_INT16:
-                triton_data_type = TYPE_INT16;
-                break;
-            case TYPE_INT32:
-                triton_data_type = TYPE_INT32;
-                break;
-            case TYPE_INT64:
-                triton_data_type = TYPE_INT64;
-                break;
-            case TYPE_FP16:
-                triton_data_type = TYPE_FP16;
-                break;
-            case TYPE_FP32:
-                triton_data_type = TYPE_FP32;
-                break;
-            case TYPE_FP64:
-                triton_data_type = TYPE_FP64;
-                break;
-#ifdef ENABLE_TRITON_BF16
-            case TYPE_BF16:
-                triton_data_type = TYPE_BF16;
-                break;
-#endif
-            case TYPE_BYTES:
-                triton_data_type = TYPE_BYTES;
-                break;
-            default:
-                FT_CHECK_WITH_INFO(false, "Unknown data type with type id: " + std::to_string(ft_tensor.type));
-                break;
-        }
-        MemoryType triton_memory_type;
-        switch (ft_tensor.where) {
-            case MEMORY_CPU:
-                triton_memory_type = MEMORY_CPU;
-                break;
-            case MEMORY_CPU_PINNED:
-                triton_memory_type = MEMORY_CPU_PINNED;
-                break;
-            case MEMORY_GPU:
-                triton_memory_type = MEMORY_GPU;
-                break;
-        }
-        return Tensor{triton_memory_type, triton_data_type, ft_tensor.shape, ft_tensor.data};
-    }
-};
-
-}  // namespace triton
-
-using triton_stream_cb_t = std::function<void(std::shared_ptr<std::unordered_map<std::string, triton::Tensor>>, void*)>;
-
-struct AbstractTransformerModel;
-struct AbstractTransformerModelInstance;
-
-struct AbstractTransformerModelInstance {
-    virtual ~AbstractTransformerModelInstance() = default;
-    virtual std::shared_ptr<std::vector<triton::Tensor>>
-    forward(std::shared_ptr<std::vector<triton::Tensor>> input_tensors) = 0;
-
-    virtual std::shared_ptr<std::unordered_map<std::string, triton::Tensor>>
-    forward(std::shared_ptr<std::unordered_map<std::string, triton::Tensor>> input_tensors) = 0;
-
-    virtual std::shared_ptr<std::unordered_map<std::string, triton::Tensor>>
-    forward(std::shared_ptr<std::unordered_map<std::string, triton::Tensor>> input_tensors, ft::AbstractInstanceComm*)
-    {
-        return forward(input_tensors);
-    }
-
-    void registerCallback(triton_stream_cb_t cb, void* ctx)
-    {
-        stream_cb_  = cb;
-        stream_ctx_ = ctx;
-    }
-
-    void unRegisterCallback()
-    {
-        stream_cb_  = nullptr;
-        stream_ctx_ = nullptr;
-    }
-
-    triton_stream_cb_t stream_cb_  = nullptr;
-    void*              stream_ctx_ = nullptr;
-};
-
-struct AbstractTransformerModel {
-    static std::shared_ptr<AbstractTransformerModel> createLlamaModel(std::string model_dir);
-
-    virtual std::pair<std::vector<ft::NcclParam>, std::vector<ft::NcclParam>>
-    createNcclParams(const int node_id, const int device_id_start = 0, const bool multi_node = false);
-
-    virtual void createCustomComms(std::vector<std::shared_ptr<ft::AbstractCustomComm>>* custom_all_reduce_comms,
-                                   int                                                   world_size) = 0;
-
-    virtual std::unique_ptr<ft::AbstractInstanceComm> createInstanceComm(int size)
-    {
-        return nullptr;
-    }
-
-    virtual std::unique_ptr<AbstractTransformerModelInstance>
-    createModelInstance(int                                                               deviceId,
-                        int                                                               rank,
-                        cudaStream_t                                                      stream,
-                        std::pair<std::vector<ft::NcclParam>, std::vector<ft::NcclParam>> nccl_params,
-                        std::shared_ptr<ft::AbstractCustomComm> custom_all_reduce_comm = nullptr) = 0;
-
-    virtual void createSharedWeights(int deviceId, int rank) = 0;
-
-    virtual std::string toString()            = 0;
-    virtual int         getTensorParaSize()   = 0;
-    virtual int         getPipelineParaSize() = 0;
-};
diff --git a/src/turbomind/triton_backend/triton_utils.hpp b/src/turbomind/triton_backend/triton_utils.hpp
deleted file mode 100644
index 1547e10d4e5a636cfe5e5070c341f0252aa2615f..0000000000000000000000000000000000000000
--- a/src/turbomind/triton_backend/triton_utils.hpp
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "src/turbomind/triton_backend/transformer_triton_backend.hpp"
-#include "src/turbomind/utils/Tensor.h"
-
-namespace ft = turbomind;
-
-template<typename T>
-void move_tensor_H2D(const triton::Tensor&                                          tensor,
-                     T*&                                                            d_ptr,
-                     const std::unique_ptr<ft::Allocator<ft::AllocatorType::CUDA>>* allocator)
-{
-    if (tensor.where == triton::MEMORY_GPU) {
-        return;
-    }
-
-    size_t tensor_size = 1;
-    for (auto t : tensor.shape) {
-        tensor_size *= t;
-    }
-
-    cudaStream_t stream = (*allocator)->returnStream();
-
-    d_ptr = (T*)((*allocator)->reMalloc(d_ptr, sizeof(T) * tensor_size, false));
-    ft::check_cuda_error(cudaMemcpyAsync(d_ptr, (T*)tensor.data, sizeof(T) * tensor_size, cudaMemcpyDefault, stream));
-}
-
-template<typename T>
-ft::Tensor as_GPU_tensor(const triton::Tensor& tensor, T* d_ptr)
-{
-    return ft::Tensor{ft::MEMORY_GPU,
-                      triton::Tensor::convertTritonTypeToFt(tensor.type),
-                      tensor.shape,
-                      tensor.where == triton::MEMORY_CPU ? d_ptr : tensor.data};
-}
-
-inline ft::Tensor as_CPU_tensor(const triton::Tensor& tensor)
-{
-    return ft::Tensor{ft::MEMORY_CPU, triton::Tensor::convertTritonTypeToFt(tensor.type), tensor.shape, tensor.data};
-}
diff --git a/src/turbomind/utils/CMakeLists.txt b/src/turbomind/utils/CMakeLists.txt
deleted file mode 100644
index ff49643af69ccd56032eedaa4b85989f53ebdb45..0000000000000000000000000000000000000000
--- a/src/turbomind/utils/CMakeLists.txt
+++ /dev/null
@@ -1,118 +0,0 @@
-# Copyright (c) 2019-2023, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-cmake_minimum_required(VERSION 3.8)
-
-#find_package(CUDAToolkit REQUIRED)
-find_package(CUDA REQUIRED)
-
-add_subdirectory(gemm_test)
-
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC")
-set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -fPIC")
-
-add_library(cuda_utils STATIC cuda_utils.cc)
-#set_property(TARGET cuda_utils PROPERTY POSITION_INDEPENDENT_CODE  ON)
-#set_property(TARGET cuda_utils PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS  ON)
-target_link_libraries(cuda_utils PUBLIC cudart)
-
-add_library(logger STATIC logger.cc)
-#set_property(TARGET logger PROPERTY POSITION_INDEPENDENT_CODE  ON)
-#set_property(TARGET logger PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS  ON)
-target_link_libraries(logger PUBLIC cudart)
-
-add_library(cublasAlgoMap STATIC cublasAlgoMap.cc)
-#set_property(TARGET cublasAlgoMap PROPERTY POSITION_INDEPENDENT_CODE  ON)
-#set_property(TARGET cublasAlgoMap PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS  ON)
-target_link_libraries(cublasAlgoMap PUBLIC cublas cudart curand cuda_utils logger)
-
-add_library(cublasMMWrapper STATIC cublasMMWrapper.cc)
-#set_property(TARGET cublasMMWrapper PROPERTY POSITION_INDEPENDENT_CODE  ON)
-#set_property(TARGET cublasMMWrapper PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS  ON)
-target_link_libraries(cublasMMWrapper PUBLIC cublas cudart curand cublasAlgoMap cuda_utils logger)
-if (SPARSITY_SUPPORT)
-target_link_libraries(cublasMMWrapper PUBLIC cusparse -lcusparseLt)
-endif()
-
-add_library(word_list STATIC word_list.cc)
-#set_property(TARGET word_list PROPERTY POSITION_INDEPENDENT_CODE  ON)
-#set_property(TARGET word_list PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS  ON)
-
-add_library(nvtx_utils STATIC nvtx_utils.cc)
-#set_property(TARGET nvtx_utils PROPERTY POSITION_INDEPENDENT_CODE  ON)
-#set_property(TARGET nvtx_utils PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS  ON)
-if(${CMAKE_VERSION} VERSION_LESS "3.25")
-#    target_link_libraries(nvtx_utils PUBLIC nvToolsExt -ldl)
-else()
-#   target_link_libraries(nvtx_utils PUBLIC nvtx3 -ldl)
-endif()
-
-add_library(memory_utils STATIC memory_utils.cu)
-#set_property(TARGET memory_utils PROPERTY POSITION_INDEPENDENT_CODE  ON)
-#set_property(TARGET memory_utils PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS  ON)
-target_link_libraries(memory_utils PUBLIC cuda_utils logger tensor)
-
-add_library(mpi_utils STATIC mpi_utils.cc)
-#set_property(TARGET mpi_utils PROPERTY POSITION_INDEPENDENT_CODE  ON)
-#set_property(TARGET mpi_utils PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS  ON)
-if (BUILD_MULTI_GPU)
-    target_link_libraries(mpi_utils PUBLIC mpi logger)
-endif()
-
-add_library(nccl_utils STATIC nccl_utils.cc)
-#set_property(TARGET nccl_utils PROPERTY POSITION_INDEPENDENT_CODE  ON)
-#set_property(TARGET nccl_utils PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS  ON)
-if (BUILD_MULTI_GPU)
-    target_link_libraries(nccl_utils PUBLIC ${NCCL_LIBRARIES} logger)
-endif()
-
-add_library(cublasINT8MMWrapper STATIC cublasINT8MMWrapper.cc)
-#set_property(TARGET cublasINT8MMWrapper PROPERTY POSITION_INDEPENDENT_CODE  ON)
-#set_property(TARGET cublasINT8MMWrapper PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS  ON)
-#target_link_libraries(cublasINT8MMWrapper PUBLIC cublasLt cudart curand cublasAlgoMap cublasMMWrapper cuda_utils logger)
-target_link_libraries(cublasINT8MMWrapper PUBLIC cudart curand cublasAlgoMap cublasMMWrapper cuda_utils logger)
-
-if(ENABLE_FP8)
-add_library(cublasFP8MMWrapper STATIC cublasFP8MMWrapper.cu)
-#set_property(TARGET cublasFP8MMWrapper PROPERTY POSITION_INDEPENDENT_CODE  ON)
-#set_property(TARGET cublasFP8MMWrapper PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS  ON)
-#target_link_libraries(cublasFP8MMWrapper PUBLIC cublasLt cudart curand
-target_link_libraries(cublasFP8MMWrapper PUBLIC cudart curand
-                      cublasAlgoMap cublasMMWrapper nvtx_utils fp8_qgmma_1x1_utils)
-endif()
-
-add_library(custom_ar_comm STATIC custom_ar_comm.cc)
-#set_property(TARGET custom_ar_comm PROPERTY POSITION_INDEPENDENT_CODE  ON)
-#set_property(TARGET custom_ar_comm PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS  ON)
-target_link_libraries(custom_ar_comm PUBLIC custom_ar_kernels memory_utils cuda_utils logger)
-
-add_library(gemm STATIC gemm.cc)
-#set_property(TARGET gemm PROPERTY POSITION_INDEPENDENT_CODE  ON)
-#set_property(TARGET gemm PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS  ON)
-target_link_libraries(gemm PUBLIC
-#                      cublas cublasLt cudart curand
-                      cublas cudart curand
-                      cublasAlgoMap memory_utils cuda_utils logger)
-if (SPARSITY_SUPPORT)
-    target_link_libraries(gemm PUBLIC cusparse -lcusparseLt)
-endif()
-
-add_library(cuda_fp8_utils STATIC cuda_fp8_utils.cu)
-#set_property(TARGET cuda_fp8_utils PROPERTY POSITION_INDEPENDENT_CODE  ON)
-#set_property(TARGET cuda_fp8_utils PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS  ON)
-
-add_library(tensor STATIC Tensor.cc)
-#set_property(TARGET tensor PROPERTY POSITION_INDEPENDENT_CODE  ON)
-#set_property(TARGET tensor PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS  ON)
-target_link_libraries(tensor PUBLIC cuda_utils logger)
diff --git a/src/turbomind/utils/IA3.h b/src/turbomind/utils/IA3.h
deleted file mode 100644
index 3fa386ddc3555fe7803968d7501bafa1e6c32938..0000000000000000000000000000000000000000
--- a/src/turbomind/utils/IA3.h
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-namespace turbomind {
-
-enum IA3_config
-{
-    KEY_ADAPTER   = 1 << 0,
-    VALUE_ADAPTER = 1 << 1,
-    MLP_ADAPTER   = 1 << 2,
-};
-
-static constexpr IA3_config IA3_NONE                    = static_cast<IA3_config>(0);
-static constexpr size_t     IA3_ADAPTER_MAX_NUM_ENCODER = 3;
-static constexpr size_t     IA3_ADAPTER_MAX_NUM_DECODER = 5;
-
-static inline IA3_config operator&(IA3_config x, IA3_config y)
-{
-    return static_cast<IA3_config>(static_cast<int>(x) & static_cast<int>(y));
-}
-
-static inline IA3_config operator|(IA3_config x, IA3_config y)
-{
-    return static_cast<IA3_config>(static_cast<int>(x) | static_cast<int>(y));
-}
-
-static inline IA3_config& operator|=(IA3_config& x, IA3_config y)
-{
-    return x = static_cast<IA3_config>(static_cast<int>(x) | static_cast<int>(y));
-}
-
-}  // namespace turbomind
diff --git a/src/turbomind/utils/ScaleList.h b/src/turbomind/utils/ScaleList.h
deleted file mode 100644
index 17340270e72b9ae1f5e88462a3dad1876451e2d3..0000000000000000000000000000000000000000
--- a/src/turbomind/utils/ScaleList.h
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-#include "stdlib.h"
-
-namespace turbomind {
-
-#define ACTIVATION_AMAX_NUM 72
-#define INT8O_GEMM_NUM 8
-#define TRT_AMAX_NUM 3
-#define SCALE_RESERVE_NUM 21
-
-struct ScaleList {
-    // Part 1 -- 72:
-    //   First 72 are for activation amaxs. For each activation amax, there are 4 values: amax, amax/127.0f,
-    //   amax/127.0f/127.0f, 127.0f/amax -- input_amax 0-3 , Q_aftergemm_amax 4-7, Qbias_amax 8-11, K_aftergemm_amax
-    //   12-15, Kbias_amax 16-19, V_aftergemm_amax 20-23, Vbias_amax 24-27, bmm1_amax 28-31, Softmax_amax 32-35,
-    //   bmm2_amax 36-39, Proj_aftergemm_scale 40-43, ProjBiasNorm_amax 44-47, FC1_aftergemm_amax 48-51, F1Bias_amax
-    //   52-55, FC2_aftergemm_amax 56-59, F2BiasNorm_amax 60-63, reserve 64-71
-    // Part 2 -- 9*hidden_dim:
-    //   Kernel amaxs, for each kernel amax list, there are output_channel values : query_weight_amax_list,
-    //   key_weight_amax_list, value_weight_amax_list, proj_weight_amax_list, FC1_weight_amax_list, FC2_weight_amax_list
-    // Part 3 -- 8:
-    //   Int8 gemm deQFactor list (8 values): Q_deQ_scale, K_deQ_scale, V_deQ_scale, bmm1_deQ_scale, bmm2_deQ_scale,
-    //   FC0_deQ_scale, FC1_deQ_scale, FC2_deQ_scale
-    // Part 4 -- 3:
-    //   Amax used in trt fused mha kernel (3 values) : QKVbias_amax, Softmax_amax, bmm2_amax
-    // Part 5 -- 21: reverse
-    const float* d_scale_list_ = nullptr;
-    const float* h_scale_list_ = nullptr;
-    size_t       size_         = ACTIVATION_AMAX_NUM + 9 * 768 + INT8O_GEMM_NUM + TRT_AMAX_NUM;
-    size_t       p2_offset_    = ACTIVATION_AMAX_NUM;
-    size_t       p3_offset_    = ACTIVATION_AMAX_NUM + 9 * 768;
-    size_t       p4_offset_    = ACTIVATION_AMAX_NUM + 9 * 768 + INT8O_GEMM_NUM;
-};
-
-}  // namespace turbomind
diff --git a/src/turbomind/utils/Tensor.cc b/src/turbomind/utils/Tensor.cc
deleted file mode 100644
index 1ae72bc5e45c8282769890e2ffc8d2790e50839c..0000000000000000000000000000000000000000
--- a/src/turbomind/utils/Tensor.cc
+++ /dev/null
@@ -1,451 +0,0 @@
-/*
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "src/turbomind/utils/Tensor.h"
-#include "src/turbomind/utils/cuda_bf16_wrapper.h"
-#include "src/turbomind/utils/cuda_utils.h"
-#include "src/turbomind/utils/string_utils.h"
-
-#include "stdlib.h"
-#include <cuda_fp16.h>
-#include <cuda_runtime_api.h>
-#include <filesystem>
-#include <numeric>
-#include <stdlib.h>
-#include <string>
-#include <sys/stat.h>
-#include <sys/types.h>
-#include <unordered_map>
-#include <vector>
-
-namespace fs = std::filesystem;
-namespace turbomind {
-
-Tensor::Tensor():
-    // a none tensor.
-    where(MEMORY_CPU),
-    type(TYPE_INVALID),
-    shape({}),
-    data(nullptr),
-    offsets({})  // only a record to record offset
-{
-}
-
-Tensor::Tensor(const MemoryType _where, const DataType _type, const std::vector<size_t> _shape, const void* _data):
-    where(_where), type(_type), shape(_shape), data(const_cast<void*>(_data))
-{
-}
-
-Tensor::Tensor(const MemoryType          _where,
-               const DataType            _type,
-               const std::vector<size_t> _shape,
-               const void*               _data,
-               const std::vector<size_t> _offset):
-    where(_where), type(_type), shape(_shape), data(const_cast<void*>(_data)), offsets(_offset)
-{
-}
-
-void Tensor::parseNpyIntro(FILE*& f_ptr, uint32_t& header_len, uint32_t& start_data)
-{
-    const char magic[] = "\x93"
-                         "NUMPY";
-    char magic_test[sizeof(magic)] = "\0";
-
-    size_t n_elems = fread((void*)magic_test, sizeof(char), sizeof(magic) - 1, f_ptr);
-    if (n_elems != sizeof(magic) - 1 || std::string(magic) != std::string(magic_test)) {
-        throw std::runtime_error("Could read magic token in NPY file");
-    }
-
-    uint8_t npy_major = 0;
-    uint8_t npy_minor = 0;
-    n_elems           = fread((void*)&npy_major, sizeof(uint8_t), 1, f_ptr);
-    n_elems += fread((void*)&npy_minor, sizeof(uint8_t), 1, f_ptr);
-
-    if (npy_major == 1) {
-        uint16_t header_len_u16 = 0;
-        n_elems                 = fread((void*)&header_len_u16, sizeof(uint16_t), 1, f_ptr);
-        header_len              = header_len_u16;
-    }
-    else if (npy_major == 2) {
-        uint32_t header_len_u32 = 0;
-        n_elems                 = fread((void*)&header_len_u32, sizeof(uint32_t), 1, f_ptr);
-        header_len              = header_len_u32;
-    }
-    else {
-        throw std::runtime_error("Unsupported npy version: " + std::to_string(npy_major));
-    }
-
-    start_data = 8 + 2 * npy_major + header_len;
-}
-
-int Tensor::parseNpyHeader(FILE*& f_ptr, uint32_t header_len, DataType& type, std::vector<size_t>& shape)
-{
-    char*  header_c = (char*)malloc(header_len * sizeof(char));
-    size_t n_elems  = fread((void*)header_c, sizeof(char), header_len, f_ptr);
-    if (n_elems != header_len) {
-        free(header_c);
-        return -1;
-    }
-    std::string header(header_c, header_len);
-    free(header_c);
-
-    size_t start, end;
-    start = header.find("'descr'") + 7;
-    start = header.find("'", start);
-    end   = header.find("'", start + 1);
-    type  = typeFromNumpyDesc(header.substr(start + 1, end - start - 1));
-
-    start = header.find("'fortran_order'") + 15;
-    start = header.find(":", start);
-    end   = header.find(",", start + 1);
-    if (header.substr(start + 1, end - start - 1).find("False") == std::string::npos) {
-        throw std::runtime_error("Unsupported value for fortran_order while reading npy file");
-    }
-
-    start = header.find("'shape'") + 7;
-    start = header.find("(", start);
-    end   = header.find(")", start + 1);
-
-    std::istringstream shape_stream(header.substr(start + 1, end - start - 1));
-    std::string        token;
-
-    shape.clear();
-    while (std::getline(shape_stream, token, ',')) {
-        if (token.find_first_not_of(' ') == std::string::npos) {
-            break;
-        }
-        shape.push_back(std::stoul(token));
-    }
-
-    return 0;
-}
-
-Tensor Tensor::loadNpy(const std::string& npy_file, const MemoryType where)
-{
-    DataType            type;
-    std::vector<size_t> shape;
-
-    FILE* f_ptr = fopen(npy_file.c_str(), "rb");
-    if (f_ptr == nullptr) {
-        throw std::runtime_error("Could not open file " + npy_file);
-    }
-    uint32_t header_len, start_data;
-    parseNpyIntro(f_ptr, header_len, start_data);
-    parseNpyHeader(f_ptr, header_len, type, shape);
-
-    const size_t size     = std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<size_t>());
-    void*        data_cpu = malloc(size * Tensor::getTypeSize(type));
-    void*        data     = data_cpu;
-
-    size_t n_elems = fread(data_cpu, Tensor::getTypeSize(type), size, f_ptr);
-    FT_CHECK_WITH_INFO(n_elems == size, "reading tensor failed");
-    if (where == MEMORY_GPU) {
-        cudaMalloc(&data, size * Tensor::getTypeSize(type));
-        cudaMemcpy(data, data_cpu, size * Tensor::getTypeSize(type), cudaMemcpyHostToDevice);
-        free(data_cpu);
-    }
-
-    fclose(f_ptr);
-    return Tensor(where, type, shape, data);
-}
-
-size_t Tensor::size() const
-{
-    if (data == nullptr || shape.size() == 0) {
-        return 0;
-    }
-    return std::accumulate(shape.begin(), shape.end(), (size_t)1, std::multiplies<size_t>());
-}
-
-size_t Tensor::sizeBytes() const
-{
-    return size() * Tensor::getTypeSize(type);
-}
-
-std::string Tensor::whereToString() const
-{
-    static const std::unordered_map<MemoryType, std::string> mem_to_string{
-        {MEMORY_CPU, "CPU"}, {MEMORY_CPU_PINNED, "CPU_PINNED"}, {MEMORY_GPU, "GPU"}};
-    return mem_to_string.at(where);
-}
-
-std::string Tensor::toString() const
-{
-    std::string memtype_str = whereToString();
-
-    static const std::unordered_map<DataType, std::string> type_to_string{
-        {TYPE_BOOL, "BOOL"},
-        {TYPE_UINT8, "UINT8"},
-        {TYPE_UINT16, "UINT16"},
-        {TYPE_UINT32, "UINT32"},
-        {TYPE_UINT64, "UINT64"},
-        {TYPE_INT8, "INT8"},
-        {TYPE_INT16, "INT16"},
-        {TYPE_INT32, "INT32"},
-        {TYPE_INT64, "INT64"},
-        {TYPE_BF16, "BF16"},
-        {TYPE_FP16, "FP16"},
-        {TYPE_FP32, "FP32"},
-        {TYPE_FP64, "FP64"},
-        {TYPE_BYTES, "BYTES"},
-        {TYPE_INVALID, "INVALID"},
-        {TYPE_FP8_E4M3, "E4M3"},
-        {TYPE_VOID, "VOID"},
-    };
-    return fmtstr("Tensor[where=%s, type=%s, shape=%s, data=%p]",
-                  memtype_str.c_str(),
-                  type_to_string.at(type).c_str(),
-                  vec2str(shape).c_str(),
-                  data);
-}
-
-DataType Tensor::typeFromNumpyDesc(std::string type)
-{
-    static const std::unordered_map<std::string, DataType> type_map{{"?", TYPE_BOOL},
-                                                                    {"b", TYPE_BYTES},
-                                                                    {"u1", TYPE_UINT8},
-                                                                    {"u2", TYPE_UINT16},
-                                                                    {"u4", TYPE_UINT32},
-                                                                    {"u8", TYPE_UINT64},
-                                                                    {"i1", TYPE_INT8},
-                                                                    {"i2", TYPE_INT16},
-                                                                    {"i4", TYPE_INT32},
-                                                                    {"i8", TYPE_INT64},
-                                                                    {"f2", TYPE_FP16},
-                                                                    {"f4", TYPE_FP32},
-                                                                    {"f8", TYPE_FP64}};
-    return type_map.at(type);
-}
-
-size_t Tensor::getTypeSize(DataType type)
-{
-    static const std::unordered_map<DataType, size_t> type_map{{TYPE_BOOL, sizeof(bool)},
-                                                               {TYPE_BYTES, sizeof(char)},
-                                                               {TYPE_UINT8, sizeof(uint8_t)},
-                                                               {TYPE_UINT16, sizeof(uint16_t)},
-                                                               {TYPE_UINT32, sizeof(uint32_t)},
-                                                               {TYPE_UINT64, sizeof(uint64_t)},
-                                                               {TYPE_INT8, sizeof(int8_t)},
-                                                               {TYPE_INT16, sizeof(int16_t)},
-                                                               {TYPE_INT32, sizeof(int32_t)},
-                                                               {TYPE_INT64, sizeof(int64_t)},
-#ifdef ENABLE_BF16
-                                                               {TYPE_BF16, sizeof(__nv_bfloat16)},
-#endif
-#ifdef ENABLE_FP8
-                                                               {TYPE_FP8_E4M3, sizeof(__nv_fp8_e4m3)},
-#endif
-                                                               {TYPE_FP16, sizeof(half)},
-                                                               {TYPE_FP32, sizeof(float)},
-                                                               {TYPE_FP64, sizeof(double)}};
-    return type_map.at(type);
-}
-
-std::string Tensor::getNumpyTypeDesc(DataType type) const
-{
-    static const std::unordered_map<DataType, std::string> type_map{{TYPE_INVALID, "x"},
-                                                                    {TYPE_BOOL, "?"},
-                                                                    {TYPE_BYTES, "b"},
-                                                                    {TYPE_UINT8, "u1"},
-                                                                    {TYPE_UINT16, "u2"},
-                                                                    {TYPE_UINT32, "u4"},
-                                                                    {TYPE_UINT64, "u8"},
-                                                                    {TYPE_INT8, "i1"},
-                                                                    {TYPE_INT16, "i2"},
-                                                                    {TYPE_INT32, "i4"},
-                                                                    {TYPE_INT64, "i8"},
-                                                                    {TYPE_FP16, "f2"},
-                                                                    {TYPE_FP32, "f4"},
-                                                                    {TYPE_FP64, "f8"}};
-
-    if (type == TYPE_BF16) {
-        TM_LOG_WARNING("getNumpyTypeDesc(TYPE_BF16) returns an invalid type 'x' since Numpy doesn't "
-                       "support bfloat16 as of now, it will be properly extended if numpy supports. "
-                       "Please refer for the discussions https://github.com/numpy/numpy/issues/19808.");
-    }
-
-    return type_map.count(type) > 0 ? type_map.at(type) : "x";
-}
-
-void Tensor::saveNpy(const std::string& filename) const
-{
-    // Save tensor to NPY 1.0 format (see https://numpy.org/neps/nep-0001-npy-format.html)
-    void*  cpu_data     = (void*)data;
-    bool   is_data_temp = false;
-    size_t tensor_size  = size();
-    if (where == MemoryType::MEMORY_GPU) {
-        cpu_data     = malloc(tensor_size * Tensor::getTypeSize(type));
-        is_data_temp = true;
-        cudaDeviceSynchronize();
-        cudaMemcpy(cpu_data, data, tensor_size * Tensor::getTypeSize(type), cudaMemcpyDeviceToHost);
-    }
-
-    const char magic[] = "\x93"
-                         "NUMPY";
-    const uint8_t npy_major = 1;
-    const uint8_t npy_minor = 0;
-
-    std::stringstream header_stream;
-    header_stream << "{'descr': '" << getNumpyTypeDesc(type) << "', 'fortran_order': False, 'shape': (";
-    for (size_t i = 0; i < shape.size(); ++i) {
-        header_stream << shape[i];
-        if (i + 1 < shape.size() || shape.size() == 1) {
-            header_stream << ", ";
-        }
-    }
-    header_stream << ")}";
-    int base_length = 6 + 4 + header_stream.str().size();
-    int pad_length  = 16 * ((base_length + 1 + 15) / 16);  // Take ceiling of base_length + 1 (for '\n' ending)
-    for (int i = 0; i < pad_length - base_length; ++i) {
-        header_stream << ((i == pad_length - base_length - 1) ? "\n" : "\x20");
-    }
-    std::string    header     = header_stream.str();
-    const uint16_t header_len = header.size();
-
-    FILE* f_ptr = fopen(filename.c_str(), "wb");
-    FT_CHECK_WITH_INFO(f_ptr != nullptr, fmtstr("Unable to open %s for writing.\n", filename.c_str()));
-
-    fwrite(magic, sizeof(char), sizeof(magic) - 1, f_ptr);
-    fwrite(&npy_major, sizeof(uint8_t), 1, f_ptr);
-    fwrite(&npy_minor, sizeof(uint8_t), 1, f_ptr);
-    fwrite(&header_len, sizeof(uint16_t), 1, f_ptr);
-    fwrite(header.c_str(), sizeof(char), header_len, f_ptr);
-    fwrite(cpu_data, Tensor::getTypeSize(type), tensor_size, f_ptr);
-
-    fclose(f_ptr);
-
-    if (is_data_temp) {
-        free(cpu_data);
-    }
-}
-
-Tensor Tensor::slice(std::vector<size_t> shape, size_t offset) const
-{
-    if (this->data != nullptr) {
-        size_t n_elts        = this->size();
-        size_t n_sliced_elts = std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<size_t>());
-        FT_CHECK_WITH_INFO(
-            n_sliced_elts + offset <= n_elts,
-            fmtstr("The number (%ld) of elements of sliced tensor exceeds that (%ld) of the original tensor",
-                   n_sliced_elts + offset,
-                   n_elts));
-    }
-    return Tensor(this->where, this->type, shape, this->getPtrWithOffset(offset));
-}
-
-TensorMap::TensorMap(const std::unordered_map<std::string, Tensor>& tensor_map)
-{
-    for (auto& kv : tensor_map) {
-        if (isValid(kv.second)) {
-            insert(kv.first, kv.second);
-        }
-        else {
-            TM_LOG_DEBUG(fmtstr("%s is not a valid tensor, skipping insert into TensorMap", kv.first.c_str()));
-        }
-    }
-}
-
-TensorMap::TensorMap(const std::vector<Tensor>& tensor_map)
-{
-    for (size_t i = 0; i < tensor_map.size(); i++) {
-        insert(std::to_string(i), tensor_map[i]);
-    }
-}
-
-TensorMap::TensorMap(std::initializer_list<std::pair<std::string, Tensor>> tensor_map)
-{
-    for (auto& pair : tensor_map) {
-        if (isValid(pair.second)) {
-            insert(pair.first, pair.second);
-        }
-        else {
-            TM_LOG_DEBUG(fmtstr("%s is not a valid tensor, skipping insert into TensorMap", pair.first.c_str()));
-        }
-    }
-}
-
-TensorMap::~TensorMap()
-{
-    tensor_map_.clear();
-}
-
-std::vector<std::string> TensorMap::keys() const
-{
-    std::vector<std::string> key_names;
-    for (auto& kv : tensor_map_) {
-        key_names.push_back(kv.first);
-    }
-    return key_names;
-}
-
-std::string TensorMap::toString()
-{
-    std::stringstream ss;
-    ss << "{";
-    std::vector<std::string> key_names = keys();
-    for (size_t i = 0; i < tensor_map_.size(); ++i) {
-        ss << key_names[i] << ": " << at(key_names[i]).toString();
-        if (i < tensor_map_.size() - 1) {
-            ss << ", ";
-        }
-    }
-    ss << "}";
-    return ss.str();
-}
-
-TensorMap TensorMap::fromNpyFolder(const std::string& base_folder)
-{
-    TensorMap ret_tensor;
-    for (auto const& entry : fs::directory_iterator{base_folder}) {
-        std::string filename = entry.path().stem().string();
-        size_t      len      = filename.length();
-        if (len < 4 || filename.compare(len - 4, 4, ".npy")) {
-            continue;
-        }
-
-        size_t pos = filename.find('-');
-        FT_CHECK_WITH_INFO(pos != std::string::npos, fmtstr("Invalid filename: %s\n", filename.c_str()));
-
-        MemoryType where;
-        if (filename.compare(0, pos, "GPU") == 0) {
-            where = MEMORY_GPU;
-        }
-        else if (filename.compare(0, pos, "CPU") == 0) {
-            where = MEMORY_CPU;
-        }
-        else if (filename.compare(0, pos, "CPU_PINNED") == 0) {
-            where = MEMORY_CPU_PINNED;
-        }
-        else {
-            FT_CHECK_WITH_INFO(false, fmtstr("Invalid filename: %s\n", filename.c_str()));
-        }
-        std::string key = filename.substr(pos + 1, len - pos - 5);
-
-        ret_tensor.tensor_map_.insert({key, Tensor::loadNpy(base_folder + "/" + filename, where)});
-    }
-    return ret_tensor;
-}
-
-void TensorMap::saveNpy(const std::string& base_folder)
-{
-    bool ret = fs::exists(base_folder) | fs::create_directory(base_folder);
-    FT_CHECK_WITH_INFO(ret == true, fmtstr("Could not create folder %s.\n", base_folder.c_str()));
-    for (const auto& item : tensor_map_) {
-        item.second.saveNpy(base_folder + "/" + item.second.whereToString() + "-" + item.first + ".npy");
-    }
-}
-
-}  // namespace turbomind
diff --git a/src/turbomind/utils/Tensor.h b/src/turbomind/utils/Tensor.h
deleted file mode 100644
index 6214f6bbc23f1f3a8ad2684e04847ff290a3febd..0000000000000000000000000000000000000000
--- a/src/turbomind/utils/Tensor.h
+++ /dev/null
@@ -1,523 +0,0 @@
-/*
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "src/turbomind/macro.h"
-#include "src/turbomind/utils/cuda_bf16_wrapper.h"
-#include "src/turbomind/utils/cuda_fp8_utils.h"
-#include "src/turbomind/utils/cuda_utils.h"
-#include "src/turbomind/utils/string_utils.h"
-
-#include "stdlib.h"
-#include <cuda_fp16.h>
-#include <cuda_runtime_api.h>
-#include <numeric>
-#include <stdlib.h>
-#include <string>
-#include <sys/stat.h>
-#include <sys/types.h>
-#include <unordered_map>
-#include <vector>
-
-namespace turbomind {
-
-typedef enum datatype_enum
-{
-    TYPE_INVALID,
-    TYPE_BOOL,
-    TYPE_UINT8,
-    TYPE_UINT16,
-    TYPE_UINT32,
-    TYPE_UINT64,
-    TYPE_INT8,
-    TYPE_INT16,
-    TYPE_INT32,
-    TYPE_INT64,
-    TYPE_FP16,
-    TYPE_FP32,
-    TYPE_FP64,
-    TYPE_BYTES,
-    TYPE_BF16,
-    TYPE_FP8_E4M3,
-    TYPE_STR,
-    TYPE_VOID,
-} DataType;
-
-template<typename T>
-DataType getTensorType()
-{
-    if (std::is_same<T, float>::value || std::is_same<T, const float>::value) {
-        return TYPE_FP32;
-    }
-    else if (std::is_same<T, half>::value || std::is_same<T, const half>::value) {
-        return TYPE_FP16;
-    }
-#ifdef ENABLE_BF16
-    else if (std::is_same<T, __nv_bfloat16>::value || std::is_same<T, const __nv_bfloat16>::value) {
-        return TYPE_BF16;
-    }
-#endif
-#ifdef ENABLE_FP8
-    else if (std::is_same<T, __nv_fp8_e4m3>::value || std::is_same<T, const __nv_fp8_e4m3>::value) {
-        return TYPE_FP8_E4M3;
-    }
-#endif
-    else if (std::is_same<T, int>::value || std::is_same<T, const int>::value) {
-        return TYPE_INT32;
-    }
-    else if (std::is_same<T, int8_t>::value || std::is_same<T, const int8_t>::value) {
-        return TYPE_INT8;
-    }
-    else if (std::is_same<T, uint>::value || std::is_same<T, const uint>::value) {
-        return TYPE_UINT32;
-    }
-    else if (std::is_same<T, unsigned long long int>::value || std::is_same<T, const unsigned long long int>::value) {
-        return TYPE_UINT64;
-    }
-    else if (std::is_same<T, bool>::value || std::is_same<T, const bool>::value) {
-        return TYPE_BOOL;
-    }
-    else if (std::is_same<T, char>::value || std::is_same<T, const char>::value) {
-        return TYPE_BYTES;
-    }
-    else {
-        return TYPE_INVALID;
-    }
-}
-
-typedef enum memorytype_enum
-{
-    MEMORY_CPU,
-    MEMORY_CPU_PINNED,
-    MEMORY_GPU
-} MemoryType;
-
-struct Tensor {
-    MemoryType          where;
-    DataType            type;
-    std::vector<size_t> shape;
-    void*               data;
-    std::vector<size_t> offsets = std::vector<size_t>{};
-
-    Tensor();
-    Tensor(const MemoryType _where, const DataType _type, const std::vector<size_t> _shape, const void* _data);
-    Tensor(const MemoryType          _where,
-           const DataType            _type,
-           const std::vector<size_t> _shape,
-           const void*               _data,
-           const std::vector<size_t> _offset);
-
-    size_t size() const;
-    size_t sizeBytes() const;
-
-    std::string whereToString() const;
-    std::string toString() const;
-    std::string getNumpyTypeDesc(DataType type) const;
-
-    void          saveNpy(const std::string& filename) const;
-    static Tensor loadNpy(const std::string& npy_file, const MemoryType where);
-
-    static DataType typeFromNumpyDesc(std::string type);
-    static size_t   getTypeSize(DataType type);
-
-    template<typename T>
-    inline T getVal(size_t index) const
-    {
-        TM_LOG_DEBUG("%s start", __PRETTY_FUNCTION__);
-        FT_CHECK(where == MEMORY_CPU);
-        FT_CHECK(data != nullptr);
-        FT_CHECK_WITH_INFO(index < size(), "index is larger than buffer size");
-
-        if (getTensorType<T>() != type) {
-            TM_LOG_DEBUG("getVal with type %s, but data type is: %s",
-                         getNumpyTypeDesc(getTensorType<T>()).c_str(),
-                         getNumpyTypeDesc(type).c_str());
-        }
-        return ((T*)data)[index];
-    }
-
-    template<typename T>
-    inline T getVal() const
-    {
-        TM_LOG_DEBUG("%s start", __PRETTY_FUNCTION__);
-        if (getTensorType<T>() != type) {
-            TM_LOG_DEBUG("getVal with type %s, but data type is: %s",
-                         getNumpyTypeDesc(getTensorType<T>()).c_str(),
-                         getNumpyTypeDesc(type).c_str());
-        }
-        return getVal<T>(0);
-    }
-
-    template<typename T>
-    inline T* getPtr() const
-    {
-        TM_LOG_DEBUG("%s start", __PRETTY_FUNCTION__);
-        if (getTensorType<T>() != type) {
-            TM_LOG_DEBUG("getPtr with type %s, but data type is: %s",
-                         getNumpyTypeDesc(getTensorType<T>()).c_str(),
-                         getNumpyTypeDesc(type).c_str());
-        }
-        return (T*)data;
-    }
-
-    inline void* getPtrWithOffset(size_t offset) const
-    {
-        TM_LOG_DEBUG("%s start", __PRETTY_FUNCTION__);
-        if (data == nullptr) {
-            return (void*)data;
-        }
-        else {
-            FT_CHECK_WITH_INFO(offset < size(), "offset is larger than buffer size");
-            return (void*)((char*)data + offset * Tensor::getTypeSize(type));
-        }
-    }
-
-    template<typename T>
-    inline T* getPtrWithOffset(size_t offset) const
-    {
-        TM_LOG_DEBUG("%s start", __PRETTY_FUNCTION__);
-        if (getTensorType<T>() != type) {
-            TM_LOG_DEBUG("getVal with type %s, but data type is: %s",
-                         getNumpyTypeDesc(getTensorType<T>()).c_str(),
-                         getNumpyTypeDesc(type).c_str());
-        }
-        if (data == nullptr) {
-            return (T*)data;
-        }
-        else {
-            FT_CHECK_WITH_INFO(offset < size(),
-                               fmtstr("offset (%lu) is larger than buffer size (%lu)", offset, size()));
-            return ((T*)data) + offset;
-        }
-    }
-
-    template<typename T>
-    T max() const
-    {
-        if (getTensorType<T>() != type) {
-            TM_LOG_DEBUG("getVal with type %s, but data type is: %s",
-                         getNumpyTypeDesc(getTensorType<T>()).c_str(),
-                         getNumpyTypeDesc(type).c_str());
-        }
-        FT_CHECK_WITH_INFO(shape.size() > 0 && data != nullptr, "Should be a non-empty tensor.");
-        FT_CHECK_WITH_INFO(where == MEMORY_CPU || where == MEMORY_CPU_PINNED,
-                           "max() supports MEMORY_CPU or MEMORY_CPU_PINNED tensor.");
-        size_t max_idx = 0;
-        T      max_val = getVal<T>(max_idx);
-        for (size_t i = 1; i < size(); ++i) {
-            T val = getVal<T>(i);
-            if (val > max_val) {
-                max_idx = i;
-                max_val = val;
-            }
-        }
-        return max_val;
-    }
-
-    template<typename T>
-    T min() const
-    {
-        if (getTensorType<T>() != type) {
-            TM_LOG_DEBUG("getVal with type %s, but data type is: %s",
-                         getNumpyTypeDesc(getTensorType<T>()).c_str(),
-                         getNumpyTypeDesc(type).c_str());
-        }
-        FT_CHECK_WITH_INFO(shape.size() > 0 && data != nullptr, "Should be a non-empty tensor.");
-        FT_CHECK_WITH_INFO(where == MEMORY_CPU || where == MEMORY_CPU_PINNED,
-                           "min() supports MEMORY_CPU or MEMORY_CPU_PINNED tensor.");
-        size_t min_idx = 0;
-        T      min_val = getVal<T>(min_idx);
-        for (size_t i = 1; i < size(); ++i) {
-            T val = getVal<T>(i);
-            if (val < min_val) {
-                min_idx = i;
-                min_val = val;
-            }
-        }
-        return min_val;
-    }
-
-    template<typename T>
-    T any(T val) const
-    {
-        if (getTensorType<T>() != type) {
-            TM_LOG_DEBUG("getVal with type %s, but data type is: %s",
-                         getNumpyTypeDesc(getTensorType<T>()).c_str(),
-                         getNumpyTypeDesc(type).c_str());
-        }
-        FT_CHECK_WITH_INFO(shape.size() > 0 && data != nullptr, "Should be a non-empty tensor.");
-        FT_CHECK_WITH_INFO(where == MEMORY_CPU || where == MEMORY_CPU_PINNED,
-                           "any() supports MEMORY_CPU or MEMORY_CPU_PINNED tensor.");
-        for (size_t i = 0; i < size(); ++i) {
-            if (getVal<T>(i) == val) {
-                return true;
-            }
-        }
-        return false;
-    }
-
-    template<typename T>
-    T all(T val) const
-    {
-        if (getTensorType<T>() != type) {
-            TM_LOG_DEBUG("getVal with type %s, but data type is: %s",
-                         getNumpyTypeDesc(getTensorType<T>()).c_str(),
-                         getNumpyTypeDesc(type).c_str());
-        }
-        FT_CHECK_WITH_INFO(shape.size() > 0 && data != nullptr, "Should be a non-empty tensor.");
-        FT_CHECK_WITH_INFO(where == MEMORY_CPU || where == MEMORY_CPU_PINNED,
-                           "all() supports MEMORY_CPU or MEMORY_CPU_PINNED tensor.");
-        for (size_t i = 0; i < size(); ++i) {
-            if (getVal<T>(i) != val) {
-                return false;
-            }
-        }
-        return true;
-    }
-
-    void updateShape(size_t idx, size_t val)
-    {
-        // TODO: find a better way to update the shape
-        std::vector<size_t>& shape_ref = const_cast<std::vector<size_t>&>(shape);
-        shape_ref[idx]                 = val;
-    }
-
-    Tensor slice(std::vector<size_t> shape, size_t offset = 0) const;
-
-private:
-    static void parseNpyIntro(FILE*& f_ptr, uint32_t& header_len, uint32_t& start_data);
-    static int  parseNpyHeader(FILE*& f_ptr, uint32_t header_len, DataType& type, std::vector<size_t>& shape);
-};
-
-class TensorMap {
-private:
-    std::unordered_map<std::string, Tensor> tensor_map_;
-
-    inline bool isValid(const Tensor& tensor)
-    {
-        return tensor.size() > 0 && tensor.data != nullptr;
-    }
-
-public:
-    TensorMap() = default;
-    TensorMap(const std::unordered_map<std::string, Tensor>& tensor_map);
-    TensorMap(const std::vector<Tensor>& tensor_map);
-    TensorMap(std::initializer_list<std::pair<std::string, Tensor>> tensor_map);
-    ~TensorMap();
-
-    inline size_t size() const
-    {
-        return tensor_map_.size();
-    }
-
-    inline bool isExist(const std::string& key) const
-    {
-        TM_LOG_DEBUG("%s for key: %s", __PRETTY_FUNCTION__, key.c_str());
-        return tensor_map_.find(key) != tensor_map_.end();
-    }
-
-    std::vector<std::string> keys() const;
-
-    inline void insert(const std::string& key, const Tensor& value)
-    {
-        FT_CHECK_WITH_INFO(!isExist(key), fmtstr("Duplicated key %s", key.c_str()));
-        FT_CHECK_WITH_INFO(isValid(value), fmtstr("A none tensor or nullptr is not allowed (key is %s)", key.c_str()));
-        tensor_map_.insert({key, value});
-    }
-
-    inline void insertIfValid(const std::string& key, const Tensor& value)
-    {
-        if (isValid(value)) {
-            insert({key, value});
-        }
-    }
-
-    inline void insert(std::pair<std::string, Tensor> p)
-    {
-        tensor_map_.insert(p);
-    }
-
-    // prevent converting int or size_t to string automatically
-    Tensor at(int tmp)    = delete;
-    Tensor at(size_t tmp) = delete;
-
-    inline Tensor& at(const std::string& key)
-    {
-        TM_LOG_DEBUG("%s for key %s", __PRETTY_FUNCTION__, key.c_str());
-        FT_CHECK_WITH_INFO(isExist(key),
-                           fmtstr("Cannot find a tensor of name %s in the tensor map (keys: %s)",
-                                  key.c_str(),
-                                  vec2str(keys()).c_str()));
-        return tensor_map_.at(key);
-    }
-
-    inline Tensor at(const std::string& key) const
-    {
-        FT_CHECK_WITH_INFO(isExist(key),
-                           fmtstr("Cannot find a tensor of name %s in the tensor map (keys: %s)",
-                                  key.c_str(),
-                                  vec2str(keys()).c_str()));
-        return tensor_map_.at(key);
-    }
-
-    inline Tensor& at(const std::string& key, Tensor& default_tensor)
-    {
-        TM_LOG_DEBUG("%s for key %s", __PRETTY_FUNCTION__, key.c_str());
-        if (isExist(key)) {
-            return tensor_map_.at(key);
-        }
-        return default_tensor;
-    }
-
-    inline Tensor at(const std::string& key, Tensor& default_tensor) const
-    {
-        TM_LOG_DEBUG("%s for key %s", __PRETTY_FUNCTION__, key.c_str());
-        if (isExist(key)) {
-            return tensor_map_.at(key);
-        }
-        return default_tensor;
-    }
-
-    inline Tensor& at(const std::string& key, Tensor&& default_tensor)
-    {
-        TM_LOG_DEBUG("%s for key %s", __PRETTY_FUNCTION__, key.c_str());
-        if (isExist(key)) {
-            return tensor_map_.at(key);
-        }
-        return default_tensor;
-    }
-
-    inline Tensor at(const std::string& key, Tensor&& default_tensor) const
-    {
-        if (isExist(key)) {
-            return tensor_map_.at(key);
-        }
-        return default_tensor;
-    }
-
-    template<typename T>
-    inline T getVal(const std::string& key) const
-    {
-        FT_CHECK_WITH_INFO(isExist(key),
-                           fmtstr("Cannot find a tensor of name %s in the tensor map (keys: %s)",
-                                  key.c_str(),
-                                  vec2str(keys()).c_str()));
-        return tensor_map_.at(key).getVal<T>();
-    }
-
-    template<typename T>
-    inline T getVal(const std::string& key, T default_value) const
-    {
-        if (isExist(key)) {
-            return tensor_map_.at(key).getVal<T>();
-        }
-        return default_value;
-    }
-
-    template<typename T>
-    inline T getValWithOffset(const std::string& key, size_t index) const
-    {
-        FT_CHECK_WITH_INFO(isExist(key),
-                           fmtstr("Cannot find a tensor of name %s in the tensor map (keys: %s)",
-                                  key.c_str(),
-                                  vec2str(keys()).c_str()));
-        return tensor_map_.at(key).getVal<T>(index);
-    }
-
-    template<typename T>
-    inline T getValWithOffset(const std::string& key, size_t index, T default_value) const
-    {
-        if (isExist(key)) {
-            return tensor_map_.at(key).getVal<T>(index);
-        }
-        return default_value;
-    }
-
-    template<typename T>
-    inline T* getPtr(const std::string& key) const
-    {
-        FT_CHECK_WITH_INFO(isExist(key),
-                           fmtstr("Cannot find a tensor of name %s in the tensor map (keys: %s)",
-                                  key.c_str(),
-                                  vec2str(keys()).c_str()));
-        return tensor_map_.at(key).getPtr<T>();
-    }
-
-    template<typename T>
-    inline T* getPtr(const std::string& key, T* default_ptr) const
-    {
-        if (isExist(key)) {
-            return tensor_map_.at(key).getPtr<T>();
-        }
-        return default_ptr;
-    }
-
-    template<typename T>
-    inline T* getPtrWithOffset(const std::string& key, size_t index) const
-    {
-        FT_CHECK_WITH_INFO(isExist(key),
-                           fmtstr("Cannot find a tensor of name %s in the tensor map (keys: %s)",
-                                  key.c_str(),
-                                  vec2str(keys()).c_str()));
-        return tensor_map_.at(key).getPtrWithOffset<T>(index);
-    }
-
-    template<typename T>
-    inline T* getPtrWithOffset(const std::string& key, size_t index, T* default_ptr) const
-    {
-        if (isExist(key)) {
-            return tensor_map_.at(key).getPtrWithOffset<T>(index);
-        }
-        return default_ptr;
-    }
-
-    inline std::unordered_map<std::string, Tensor> getMap() const
-    {
-        return tensor_map_;
-    }
-
-    inline std::unordered_map<std::string, Tensor>::iterator begin()
-    {
-        return tensor_map_.begin();
-    }
-
-    inline std::unordered_map<std::string, Tensor>::iterator end()
-    {
-        return tensor_map_.end();
-    }
-
-    inline std::unordered_map<std::string, Tensor>& get()
-    {
-        return tensor_map_;
-    }
-
-    inline std::unordered_map<std::string, Tensor>::const_iterator begin() const
-    {
-        return tensor_map_.begin();
-    }
-
-    inline std::unordered_map<std::string, Tensor>::const_iterator end() const
-    {
-        return tensor_map_.end();
-    }
-
-    std::string      toString();
-    static TensorMap fromNpyFolder(const std::string& base_folder);
-    void             saveNpy(const std::string& base_folder);
-};
-
-}  // namespace turbomind
diff --git a/src/turbomind/utils/activation_types.h b/src/turbomind/utils/activation_types.h
deleted file mode 100644
index e981cda2ca01d9c7537c05007f19464d1138e83f..0000000000000000000000000000000000000000
--- a/src/turbomind/utils/activation_types.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "src/turbomind/utils/cuda_utils.h"
-
-namespace turbomind {
-
-enum class ActivationType
-{
-    Gelu,
-    Relu,
-    Silu,
-    GeGLU,
-    ReGLU,
-    SiGLU,
-    Identity,
-    InvalidType
-};
-
-inline ActivationType getActivationType(std::string activation_type_str)
-{
-    if (activation_type_str == "Gelu" || activation_type_str == "gelu") {
-        return ActivationType::Gelu;
-    }
-    else if (activation_type_str == "Relu" || activation_type_str == "relu") {
-        return ActivationType::Relu;
-    }
-    else if (activation_type_str == "Silu" || activation_type_str == "silu") {
-        return ActivationType::Silu;
-    }
-    else if (activation_type_str == "GeGLU" || activation_type_str == "geglu" || activation_type_str == "gated-gelu") {
-        return ActivationType::GeGLU;
-    }
-    else if (activation_type_str == "ReGLU" || activation_type_str == "reglu" || activation_type_str == "gated-relu") {
-        return ActivationType::ReGLU;
-    }
-    else if (activation_type_str == "SiGLU" || activation_type_str == "gated-silu") {
-        return ActivationType::SiGLU;
-    }
-    else {
-        FT_CHECK_WITH_INFO(false, "Activation Type: " + activation_type_str + " not supported !");
-    }
-    return ActivationType::InvalidType;
-}
-
-inline bool isGatedActivation(ActivationType activaiton_type)
-{
-    return activaiton_type == ActivationType::GeGLU || activaiton_type == ActivationType::ReGLU
-           || activaiton_type == ActivationType::SiGLU;
-}
-
-}  // namespace turbomind
diff --git a/src/turbomind/utils/allocator.h b/src/turbomind/utils/allocator.h
deleted file mode 100644
index b489b579d226653d3e5d8bd741a6128ff0f2ba89..0000000000000000000000000000000000000000
--- a/src/turbomind/utils/allocator.h
+++ /dev/null
@@ -1,478 +0,0 @@
-/*
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-/**
- * Memory Allocator
- **/
-
-#pragma once
-
-#include "cuda_utils.h"
-#include "src/turbomind/macro.h"
-#include <cuda_runtime.h>
-#include <unordered_map>
-#include <vector>
-
-#ifdef GOOGLE_CUDA
-#include "tensorflow/core/framework/op.h"
-#include "tensorflow/core/framework/op_kernel.h"
-#include "tensorflow/core/framework/register_types.h"
-#include "tensorflow/core/framework/shape_inference.h"
-#include "tensorflow/core/framework/tensor.h"
-#include "tensorflow/core/framework/tensor_types.h"
-#include "tensorflow/core/framework/types.h"
-#include "tensorflow/core/lib/core/errors.h"
-#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
-#endif
-
-#ifdef TORCH_CUDA
-#include "torch/extension.h"
-#include <memory>
-#endif
-
-#include "src/turbomind/utils/logger.h"
-
-#if defined(CUDART_VERSION) && CUDART_VERSION < 11020
-#define CUDA_MEMORY_POOL_DISABLED
-#endif
-
-namespace turbomind {
-
-enum class AllocatorType
-{
-    CUDA,
-    TF,
-    TH
-};
-
-enum class ReallocType
-{
-    INCREASE,
-    REUSE,
-    DECREASE,
-};
-
-class IAllocator {
-public:
-    virtual ~IAllocator(){};
-
-    virtual void*        malloc(size_t size, const bool is_set_zero = true, bool is_host = false) = 0;
-    virtual void         free(void** ptr, bool is_host = false) const                             = 0;
-    virtual void         setStream(cudaStream_t stream)                                           = 0;
-    virtual cudaStream_t returnStream()                                                           = 0;
-    virtual void         memSet(void* ptr, const int val, const size_t size)                      = 0;
-
-    template<typename T>
-    void* reMalloc(T* ptr, size_t size, const bool is_set_zero = true, bool is_host = false)
-    {
-        TM_LOG_DEBUG(__PRETTY_FUNCTION__);
-        size              = ((size + 31) / 32) * 32;  // make the buffer align with 32 bytes
-        void* void_ptr    = (void*)ptr;
-        void* ptr_address = getAddress(void_ptr);
-        if (isExist(ptr_address)) {
-            ReallocType realloc_type = isReMalloc(ptr_address, size);
-            if (realloc_type == ReallocType::INCREASE) {
-                TM_LOG_DEBUG("ReMalloc the buffer %p since it is too small.", void_ptr);
-                free((void**)(&void_ptr), is_host);
-                return malloc(size, is_set_zero, is_host);
-            }
-#if !defined(CUDA_MEMORY_POOL_DISABLED)
-            else if (realloc_type == ReallocType::DECREASE) {
-                TM_LOG_DEBUG("ReMalloc the buffer %p to release unused memory to memory pools.", void_ptr);
-                free((void**)(&void_ptr), is_host);
-                return malloc(size, is_set_zero, is_host);
-            }
-#endif
-            else {
-                TM_LOG_DEBUG("Reuse original buffer %p with size %d and do nothing for reMalloc.", void_ptr, size);
-                if (is_set_zero) {
-                    memSet(void_ptr, 0, size);
-                }
-                return void_ptr;
-            }
-        }
-        else {
-            TM_LOG_DEBUG("Cannot find buffer %p, mallocing new one.", void_ptr);
-            return malloc(size, is_set_zero, is_host);
-        }
-    }
-
-protected:
-    virtual bool        isExist(void* address) const                 = 0;
-    virtual ReallocType isReMalloc(void* address, size_t size) const = 0;
-
-    void* getAddress(void* ptr) const
-    {
-        return ptr;
-    }
-};
-
-template<AllocatorType AllocType_>
-class Allocator;
-
-template<>
-class Allocator<AllocatorType::CUDA>: public IAllocator {
-private:
-    enum class MemoryType
-    {
-        HOST,
-        DEVICE
-    };
-
-    const int                                                 device_id_;
-    cudaStream_t                                              stream_ = 0;  // initialize as default stream
-    std::unordered_map<void*, std::pair<size_t, MemoryType>>* pointer_mapping_;
-
-    bool isExist(void* address) const
-    {
-        return pointer_mapping_->count(address) > 0;
-    }
-    ReallocType isReMalloc(void* address, size_t size) const
-    {
-        FT_CHECK(isExist(address));
-        if (pointer_mapping_->at(address).first < size) {
-            return ReallocType::INCREASE;
-        }
-        else if (pointer_mapping_->at(address).first == size) {
-            return ReallocType::REUSE;
-        }
-        else {
-            return ReallocType::DECREASE;
-        }
-    }
-
-public:
-    Allocator(int device_id): device_id_(device_id)
-    {
-        TM_LOG_DEBUG(__PRETTY_FUNCTION__);
-        pointer_mapping_ = new std::unordered_map<void*, std::pair<size_t, MemoryType>>();
-// #if defined(CUDA_MEMORY_POOL_DISABLED)
-//         TM_LOG_WARNING(
-//             "Async cudaMalloc/Free is not supported before CUDA 11.2. Using Sync cudaMalloc/Free."
-//             "Note this may lead to hang with NCCL kernels launched in parallel; if so, try NCCL_LAUNCH_MODE=GROUP");
-// #else
-//         int device_count = 1;
-//         check_cuda_error(cudaGetDeviceCount(&device_count));
-//         cudaMemPool_t mempool;
-//         check_cuda_error(cudaDeviceGetDefaultMemPool(&mempool, device_id));
-//         cudaMemAccessDesc desc                  = {};
-//         int               peer_access_available = 0;
-//         for (int i = 0; i < device_count; i++) {
-//             if (i == device_id) {
-//                 continue;
-//             }
-//             check_cuda_error(cudaDeviceCanAccessPeer(&peer_access_available, device_id, i));
-//             if (!peer_access_available) {
-//                 TM_LOG_WARNING("Device " + std::to_string(device_id) + " peer access Device " + std::to_string(i)
-//                                + " is not available.");
-//                 continue;
-//             }
-//             desc.location.type = cudaMemLocationTypeDevice;
-//             desc.location.id   = i;
-//             desc.flags         = cudaMemAccessFlagsProtReadWrite;
-//             check_cuda_error(cudaMemPoolSetAccess(mempool, &desc, 1));
-//         }
-//         // set memory pool threshold to avoid shrinking the pool
-//         uint64_t setVal = UINT64_MAX;
-//         check_cuda_error(cudaMemPoolSetAttribute(mempool, cudaMemPoolAttrReleaseThreshold, &setVal));
-// #endif
-    }
-
-    virtual ~Allocator()
-    {
-        TM_LOG_DEBUG(__PRETTY_FUNCTION__);
-        while (!pointer_mapping_->empty()) {
-            auto ptr           = pointer_mapping_->begin()->first;
-            auto size_and_type = pointer_mapping_->begin()->second;
-            free(&ptr, size_and_type.second == MemoryType::HOST);
-        }
-        delete pointer_mapping_;
-    }
-
-    void setStream(cudaStream_t stream)
-    {
-        stream_ = stream;
-    }
-
-    cudaStream_t returnStream()
-    {
-        return stream_;
-    };
-
-    void* malloc(size_t size, const bool is_set_zero = true, bool is_host = false)
-    {
-        TM_LOG_DEBUG(__PRETTY_FUNCTION__);
-        if (size == 0) {
-            return nullptr;
-        }
-        void* ptr      = nullptr;
-        int   o_device = 0;
-
-        check_cuda_error(getSetDevice(device_id_, &o_device));
-        if (is_host) {
-            check_cuda_error(cudaMallocHost(&ptr, (size_t)(ceil(size / 32.)) * 32));
-        }
-        else {
-#if defined(CUDA_MEMORY_POOL_DISABLED)
-            check_cuda_error(cudaMalloc(&ptr, (size_t)(ceil(size / 32.)) * 32));
-#else
-            check_cuda_error(cudaMallocAsync(&ptr, (size_t)(ceil(size / 32.)) * 32, stream_));
-#endif
-        }
-        if (is_set_zero) {
-            check_cuda_error(cudaMemsetAsync(ptr, 0, (size_t)(ceil(size / 32.)) * 32, stream_));
-        }
-        check_cuda_error(getSetDevice(o_device));
-        TM_LOG_DEBUG("malloc buffer %p with size %ld", ptr, size);
-
-        pointer_mapping_->insert({getAddress(ptr), {size, is_host ? MemoryType::HOST : MemoryType::DEVICE}});
-
-        return ptr;
-    }
-
-    void free(void** ptr, bool _ = false) const
-    {
-        TM_LOG_DEBUG(__PRETTY_FUNCTION__);
-        void* address = getAddress(*ptr);
-        if (*ptr != nullptr) {
-            int o_device = 0;
-            if (pointer_mapping_->count(address)) {
-                const auto is_host = pointer_mapping_->at(address).second == MemoryType::HOST;
-                TM_LOG_DEBUG("Free buffer %p", address);
-                check_cuda_error(getSetDevice(device_id_, &o_device));
-                if (is_host) {
-                    check_cuda_error(cudaFreeHost(*ptr));
-                }
-                else {
-#if defined(CUDA_MEMORY_POOL_DISABLED)
-                    check_cuda_error(cudaFree(*ptr));
-#else
-                    check_cuda_error(cudaFreeAsync(*ptr, stream_));
-                    cudaStreamSynchronize(stream_);
-#endif
-                }
-                check_cuda_error(getSetDevice(o_device));
-                pointer_mapping_->erase(address);
-            }
-            else {
-                TM_LOG_WARNING("pointer_mapping_ does not have information of ptr at %p.", address);
-            }
-        }
-        *ptr = nullptr;
-        return;
-    }
-
-    void memSet(void* ptr, const int val, const size_t size)
-    {
-        check_cuda_error(cudaMemsetAsync(ptr, val, size, stream_));
-    }
-};
-
-#ifdef GOOGLE_CUDA
-using namespace tensorflow;
-template<>
-class Allocator<AllocatorType::TF>: public IAllocator {
-    OpKernelContext*                               context_;
-    std::unordered_map<void*, tensorflow::Tensor>* pointer_mapping_;
-    cudaStream_t                                   stream_;
-
-    bool isExist(void* address) const
-    {
-        return pointer_mapping_->count(address) > 0;
-    }
-    ReallocType isReMalloc(void* address, size_t size) const
-    {
-        FT_CHECK(isExist(address));
-        size_t current_buffer_size = 1;
-        for (int i = 0; i < pointer_mapping_->at(address).dims(); i++) {
-            current_buffer_size *= pointer_mapping_->at(address).dim_size(i);
-        }
-        TM_LOG_DEBUG("current_buffer_size: %d, new buffer: %d", current_buffer_size, size);
-        if (current_buffer_size < size) {
-            return ReallocType::INCREASE;
-        }
-        else if (current_buffer_size == size) {
-            return ReallocType::REUSE;
-        }
-        else {
-            return ReallocType::DECREASE;
-        }
-    }
-
-public:
-    Allocator(OpKernelContext* context, cudaStream_t stream): context_(context), stream_(stream)
-    {
-        pointer_mapping_ = new std::unordered_map<void*, tensorflow::Tensor>();
-    }
-
-    void setStream(cudaStream_t stream)
-    {
-        stream_ = stream;
-    }
-
-    cudaStream_t returnStream()
-    {
-        return stream_;
-    };
-
-    void* malloc(size_t size, const bool is_set_zero = true, bool is_host = false)
-    {
-        TM_LOG_DEBUG(__PRETTY_FUNCTION__);
-        tensorflow::Tensor buf;
-        long long int      buf_size = ((long long int)ceil(size / 32.) * 32);
-        tensorflow::Status status;
-        if (is_host) {
-            tensorflow::AllocatorAttributes pinned_allocator;
-            pinned_allocator.set_on_host(true);
-            pinned_allocator.set_gpu_compatible(true);
-            status = context_->allocate_temp(DT_UINT8, TensorShape{buf_size}, &buf, pinned_allocator);
-        }
-        else {
-            status = context_->allocate_temp(DT_UINT8, TensorShape{buf_size}, &buf);
-        }
-
-        if (status != tensorflow::Status::OK()) {
-            throw std::runtime_error("TF error: context->allocate_temp failed");
-        }
-
-        auto  flat = buf.flat<uint8>();
-        void* ptr  = (void*)flat.data();
-        if (is_set_zero) {
-            cudaMemsetAsync(ptr, 0, buf_size, stream_);
-        }
-        pointer_mapping_->insert({getAddress(ptr), buf});
-
-        return ptr;
-    }
-
-    void free(void** ptr, bool is_host = false) const
-    {
-        TM_LOG_DEBUG(__PRETTY_FUNCTION__);
-        void* address = getAddress(*ptr);
-        pointer_mapping_->erase(address);
-        *ptr = nullptr;
-        return;
-    }
-
-    virtual ~Allocator()
-    {
-        while (!pointer_mapping_->empty()) {
-            void* ptr = pointer_mapping_->begin()->second.flat<uint8>().data();
-            free(&ptr);
-        }
-        pointer_mapping_->clear();
-        delete pointer_mapping_;
-    }
-
-    void memSet(void* ptr, const int val, const size_t size)
-    {
-        check_cuda_error(cudaMemsetAsync(ptr, val, size, stream_));
-    }
-};
-#endif
-
-#ifdef TORCH_CUDA
-template<>
-class Allocator<AllocatorType::TH>: public IAllocator {
-    std::unordered_map<void*, torch::Tensor>* pointer_mapping_;
-
-    bool isExist(void* address) const
-    {
-        return pointer_mapping_->count(address) > 0;
-    }
-    ReallocType isReMalloc(void* address, size_t size) const
-    {
-        FT_CHECK(isExist(address));
-        size_t current_buffer_size = 1;
-        for (int i = 0; i < pointer_mapping_->at(address).dim(); i++) {
-            current_buffer_size *= pointer_mapping_->at(address).size(i);
-        }
-        TM_LOG_DEBUG(
-            "current_buffer_size: %d, original buffer: %p, new buffer: %d", current_buffer_size, address, size);
-        if (current_buffer_size < size) {
-            return ReallocType::INCREASE;
-        }
-        else if (current_buffer_size == size) {
-            return ReallocType::REUSE;
-        }
-        else {
-            return ReallocType::DECREASE;
-        }
-    }
-
-public:
-    Allocator()
-    {
-        pointer_mapping_ = new std::unordered_map<void*, torch::Tensor>();
-    }
-
-    void setStream(cudaStream_t stream)
-    {
-        // nothing to do here;
-    }
-
-    cudaStream_t returnStream()
-    {
-        // nothing to do here;
-        return 0;
-    };
-
-    void* malloc(size_t size, const bool is_set_zero = true, bool is_host = false)
-    {
-        TM_LOG_DEBUG(__PRETTY_FUNCTION__);
-        int64_t       buf_size = static_cast<int64_t>(ceil(size / 32.)) * 32;
-        torch::Tensor buf;
-        if (is_host) {
-            buf = torch::empty({buf_size}, torch::dtype(torch::kUInt8).device(torch::kCPU).pinned_memory(true));
-        }
-        else {
-            buf = torch::empty({buf_size}, torch::dtype(torch::kUInt8).device(torch::kCUDA));
-        }
-        void* ptr = buf.data_ptr();
-        if (is_set_zero) {
-            cudaMemset(ptr, 0, buf_size);
-        }
-        TM_LOG_DEBUG("malloc buffer %p with size %ld", ptr, buf_size);
-        pointer_mapping_->insert({getAddress(ptr), buf});
-        return ptr;
-    }
-
-    void free(void** ptr, bool is_host = false) const
-    {
-        TM_LOG_DEBUG(__PRETTY_FUNCTION__);
-        void* address = getAddress(*ptr);
-        pointer_mapping_->erase(address);
-        *ptr = nullptr;
-        return;
-    }
-
-    virtual ~Allocator()
-    {
-        TM_LOG_DEBUG(__PRETTY_FUNCTION__);
-        while (!pointer_mapping_->empty()) {
-            void* ptr = pointer_mapping_->begin()->second.data_ptr();
-            free(&ptr);
-        }
-        pointer_mapping_->clear();
-        delete pointer_mapping_;
-    }
-
-    void memSet(void* ptr, const int val, const size_t size)
-    {
-        check_cuda_error(cudaMemset(ptr, val, size));
-    }
-};
-#endif
-}  // namespace turbomind
diff --git a/src/turbomind/utils/conv2d.h b/src/turbomind/utils/conv2d.h
deleted file mode 100644
index 806436c7d1aaf0c99ccc29da8265a056604e5d38..0000000000000000000000000000000000000000
--- a/src/turbomind/utils/conv2d.h
+++ /dev/null
@@ -1,137 +0,0 @@
-/*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "cublasLt.h"
-#include "cuda_utils.h"
-#include "math.h"
-#include "stdio.h"
-#include "stdlib.h"
-#include <cublas_v2.h>
-#include <cuda_fp16.h>
-#include <cudnn.h>
-
-namespace turbomind {
-
-template<typename T>
-void conv2d(T*             output,
-            const T*       input,
-            const T*       kernel,
-            const int      batch,
-            const int      h,
-            const int      w,
-            const int      in_channels,
-            const int      out_channels,
-            const int      kernel_size,
-            const int      stride,
-            cudnnHandle_t& cudnn_handle)
-{
-    cudnnDataType_t dataType;
-    cudnnDataType_t computeType = CUDNN_DATA_FLOAT;
-    float           alpha       = 1.0f;
-    float           beta        = 0.0f;
-    if (std::is_same<T, half>::value) {
-        dataType = CUDNN_DATA_HALF;
-    }
-#ifdef ENABLE_BF16
-    else if (std::is_same<T, __nv_bfloat16>::value) {
-        dataType = CUDNN_DATA_BFLOAT16;
-    }
-#endif
-    else {
-        dataType = CUDNN_DATA_FLOAT;
-    }
-
-    cudnnTensorDescriptor_t      input_descriptor_;
-    cudnnTensorDescriptor_t      output_descriptor_;
-    cudnnFilterDescriptor_t      kernel_descriptor_;
-    cudnnConvolutionDescriptor_t convolution_descriptor_;
-    cudnnConvolutionFwdAlgo_t    convolution_algorithm_ = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM;
-    // cudnnConvolutionFwdAlgo_t convolution_algorithm_ = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM;
-    // cudnnConvolutionFwdAlgo_t convolution_algorithm_ = CUDNN_CONVOLUTION_FWD_ALGO_GEMM;
-    // cudnnConvolutionFwdAlgo_t convolution_algorithm_ = CUDNN_CONVOLUTION_FWD_ALGO_DIRECT;
-    // cudnnConvolutionFwdAlgo_t convolution_algorithm_ = CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING;
-    // cudnnConvolutionFwdAlgo_t convolution_algorithm_ = CUDNN_CONVOLUTION_FWD_ALGO_FFT;
-    // cudnnConvolutionFwdAlgo_t convolution_algorithm_ = CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD;
-    // cudnnConvolutionFwdAlgo_t convolution_algorithm_ = CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD_NONFUSED;
-
-    checkCUDNN(cudnnCreateTensorDescriptor(&input_descriptor_));
-    checkCUDNN(cudnnSetTensor4dDescriptor(input_descriptor_,
-                                          /*format=*/CUDNN_TENSOR_NCHW,
-                                          /*dataType=*/dataType,
-                                          /*batch_size=*/batch,
-                                          /*channels=*/in_channels,
-                                          /*image_height=*/h,
-                                          /*image_width=*/w));
-
-    checkCUDNN(cudnnCreateTensorDescriptor(&output_descriptor_));
-    checkCUDNN(cudnnSetTensor4dDescriptor(output_descriptor_,
-                                          /*format=*/CUDNN_TENSOR_NHWC,
-                                          /*dataType=*/dataType,
-                                          /*batch_size=*/batch,
-                                          /*channels=*/out_channels,
-                                          /*image_height=*/h / stride,
-                                          /*image_width=*/w / stride));
-
-    checkCUDNN(cudnnCreateFilterDescriptor(&kernel_descriptor_));
-    checkCUDNN(cudnnSetFilter4dDescriptor(kernel_descriptor_,
-                                          /*dataType=*/dataType,
-                                          /*format=*/CUDNN_TENSOR_NCHW,
-                                          /*out_channels=*/out_channels,
-                                          /*in_channels=*/in_channels,
-                                          /*kernel_height=*/kernel_size,
-                                          /*kernel_width=*/kernel_size));
-
-    checkCUDNN(cudnnCreateConvolutionDescriptor(&convolution_descriptor_));
-    checkCUDNN(cudnnSetConvolution2dDescriptor(convolution_descriptor_,
-                                               /*pad_height=*/0,
-                                               /*pad_width=*/0,
-                                               /*vertical_stride=*/stride,
-                                               /*horizontal_stride=*/stride,
-                                               /*dilation_height=*/1,
-                                               /*dilation_width=*/1,
-                                               /*mode=*//*CUDNN_CONVOLUTION,*/ CUDNN_CROSS_CORRELATION,
-                                               /*computeType=*/computeType));
-
-    /*checkCUDNN(cudnnGetConvolutionForwardAlgorithm(cudnn_handle,
-                                                   input_descriptor_,
-                                                   kernel_descriptor_,
-                                                   convolution_descriptor_,
-                                                   output_descriptor_,
-                                                   CUDNN_CONVOLUTION_FWD_PREFER_FASTEST,
-                                                   0,//memoryLimitInBytes
-                                                   &convolution_algorithm_));*/
-
-    checkCUDNN(cudnnConvolutionForward(cudnn_handle,
-                                       &alpha,
-                                       input_descriptor_,
-                                       input,
-                                       kernel_descriptor_,
-                                       kernel,
-                                       convolution_descriptor_,
-                                       convolution_algorithm_,
-                                       nullptr,
-                                       0,
-                                       &beta,
-                                       output_descriptor_,
-                                       output));
-
-    checkCUDNN(cudnnDestroyTensorDescriptor(input_descriptor_));
-    checkCUDNN(cudnnDestroyTensorDescriptor(output_descriptor_));
-    checkCUDNN(cudnnDestroyFilterDescriptor(kernel_descriptor_));
-    checkCUDNN(cudnnDestroyConvolutionDescriptor(convolution_descriptor_));
-}
-
-}  // namespace turbomind
diff --git a/src/turbomind/utils/convert_data_type.h b/src/turbomind/utils/convert_data_type.h
deleted file mode 100644
index a509fcf4d6d89b87d69de55a84cd5a43ba019f2c..0000000000000000000000000000000000000000
--- a/src/turbomind/utils/convert_data_type.h
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Copyright (c) 2020-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-#include "stdio.h"
-#include "stdlib.h"
-
-// be consistent with FasterTransformer
-int8_t float_to_int8_rn_host(float x)
-{
-    int8_t  res;
-    int32_t tmp;
-    if (x >= 0) {
-        tmp = int(x + 0.5);
-        tmp = tmp > 127 ? 127 : tmp;
-        res = int8_t(tmp);
-    }
-    else {
-        tmp = int(x - 0.5);
-        tmp = tmp < -127 ? -127 : tmp;
-        res = int8_t(tmp);
-    }
-    return res;
-}
diff --git a/src/turbomind/utils/cublasAlgoMap.cc b/src/turbomind/utils/cublasAlgoMap.cc
deleted file mode 100644
index f6933fbe3a730f1390ae4a24d8c1f4228c0ded26..0000000000000000000000000000000000000000
--- a/src/turbomind/utils/cublasAlgoMap.cc
+++ /dev/null
@@ -1,227 +0,0 @@
-/*
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "cublasAlgoMap.h"
-
-namespace turbomind {
-
-cublasAlgoMap::cublasAlgoMap(const std::string filename, const std::string sp_config_filename):
-    config_filename_(filename), sp_config_filename_(sp_config_filename)
-{
-    loadGemmConfig();
-    loadSpGemmConfig();
-}
-
-cublasAlgoMap::cublasAlgoMap(const cublasAlgoMap& algo_map):
-    config_filename_(algo_map.config_filename_),
-    sp_config_filename_(algo_map.sp_config_filename_),
-    algo_map_(algo_map.algo_map_),
-    sp_algo_map_(algo_map.sp_algo_map_)
-{
-}
-
-cublasAlgoMap::~cublasAlgoMap()
-{
-    algo_map_.clear();
-}
-
-void cublasAlgoMap::loadGemmConfig()
-{
-    FILE* fd;
-    fd = fopen(config_filename_.c_str(), "r");
-    if (fd == NULL) {
-        std::cout << "[WARNING] " << config_filename_ << " is not found; using default GEMM algo" << std::endl;
-        return;
-    }
-
-    int   batchCount2, m2, n2, k2, algoId, customOption, tile, splitK_val;
-    int   batch_size, seq_len, head_num, size_per_head, dataType;
-    int   swizzle, reductionScheme, workspaceSize, stages;
-    int   inner_shapeId, cluster_shapeId, mma_shapeId, cga_shapeId, sche_mode;
-    float exec_time;
-    char  tmp[1024];
-    if (!fgets(tmp, 1024, fd)) {
-        printf("[ERROR] fgets fail at %s:%d \n", __FILE__, __LINE__);
-        exit(-1);
-    }
-    while (fscanf(fd,
-                  "%d %d %d %d %d ### %d %d %d %d %d %d %d %d %d %d %d %d "
-#if (CUBLAS_VER_MAJOR == 11 && CUBLAS_VER_MINOR == 11 && CUBLAS_VER_PATCH >= 3)
-                  "%d %d "
-#elif (CUBLAS_VER_MAJOR == 11 && CUBLAS_VER_MINOR == 11 && CUBLAS_VER_PATCH < 3)
-                  "%d %d %d "
-#endif
-                  "%f\n",
-                  &batch_size,
-                  &seq_len,
-                  &head_num,
-                  &size_per_head,
-                  &dataType,
-                  &batchCount2,
-                  &n2,
-                  &m2,
-                  &k2,
-                  &algoId,
-                  &customOption,
-                  &tile,
-                  &splitK_val,
-                  &swizzle,
-                  &reductionScheme,
-                  &workspaceSize,
-                  &stages,
-#if (CUBLAS_VER_MAJOR == 11 && CUBLAS_VER_MINOR == 11 && CUBLAS_VER_PATCH >= 3)
-                  &inner_shapeId,
-                  &cluster_shapeId,
-#elif (CUBLAS_VER_MAJOR == 11 && CUBLAS_VER_MINOR == 11 && CUBLAS_VER_PATCH < 3)
-                  &mma_shapeId,
-                  &cga_shapeId,
-                  &sche_mode,
-#endif
-                  &exec_time)
-           != EOF) {
-        if (dataType != FLOAT_DATATYPE && dataType != HALF_DATATYPE && dataType != BFLOAT16_DATATYPE
-            && dataType != INT8_DATATYPE && dataType != FP8_DATATYPE) {
-            printf("[WARNING][readAlgoFromConfig] wrong dataType %d!\n", dataType);
-            continue;
-        }
-        cublasAlgoConfig_t markStr{batchCount2, m2, n2, k2, static_cast<CublasDataType>(dataType)};
-        // workspaceSize should be zero
-        if (algo_map_.find(markStr) == algo_map_.end()) {
-            algo_map_[markStr].algoId          = algoId;
-            algo_map_[markStr].customOption    = customOption;
-            algo_map_[markStr].tile            = tile;
-            algo_map_[markStr].splitK_val      = splitK_val;
-            algo_map_[markStr].swizzle         = swizzle;
-            algo_map_[markStr].reductionScheme = reductionScheme;
-            algo_map_[markStr].workspaceSize   = workspaceSize;
-            algo_map_[markStr].stages          = stages;
-#if (CUBLAS_VER_MAJOR == 11 && CUBLAS_VER_MINOR == 11 && CUBLAS_VER_PATCH >= 3)
-            algo_map_[markStr].inner_shapeId   = (uint16_t)inner_shapeId;
-            algo_map_[markStr].cluster_shapeId = (uint16_t)cluster_shapeId;
-#elif (CUBLAS_VER_MAJOR == 11 && CUBLAS_VER_MINOR == 11 && CUBLAS_VER_PATCH < 3)
-            algo_map_[markStr].mma_shapeId = (uint16_t)mma_shapeId;
-            algo_map_[markStr].cga_shapeId = (uint16_t)cga_shapeId;
-            algo_map_[markStr].sche_mode   = (uint16_t)sche_mode;
-#endif
-            algo_map_[markStr].exec_time = exec_time;
-        }
-    }
-    fclose(fd);
-}
-
-bool cublasAlgoMap::isExist(
-    const int batch_count, const int m, const int n, const int k, const CublasDataType data_type)
-{
-    cublasAlgoConfig_t mark{batch_count, n, m, k, data_type};
-    return algo_map_.find(mark) != algo_map_.end();
-}
-
-cublasLtMatmulAlgo_info
-cublasAlgoMap::getAlgo(const int batch_count, const int m, const int n, const int k, const CublasDataType data_type)
-{
-    cublasAlgoConfig_t mark{batch_count, n, m, k, data_type};
-    if (algo_map_.find(mark) != algo_map_.end()) {
-        return algo_map_[mark];
-    }
-    else {
-        cublasLtMatmulAlgo_info tmp_algo;
-        tmp_algo.algoId =
-            // static_cast<int>(data_type == FLOAT_DATATYPE ? CUBLAS_GEMM_DEFAULT : CUBLAS_GEMM_DEFAULT_TENSOR_OP);
-            static_cast<int>(data_type == FLOAT_DATATYPE ? CUBLAS_GEMM_DEFAULT : CUBLAS_GEMM_DEFAULT);
-        tmp_algo.customOption    = -1;
-        tmp_algo.tile            = -1;
-        tmp_algo.splitK_val      = -1;
-        tmp_algo.swizzle         = -1;
-        tmp_algo.reductionScheme = -1;
-        tmp_algo.workspaceSize   = -1;
-        tmp_algo.stages          = -1;
-        tmp_algo.exec_time       = -1.0f;
-        return tmp_algo;
-    }
-}
-
-void cublasAlgoMap::loadSpGemmConfig()
-{
-    if (sp_config_filename_.empty()) {
-        return;
-    }
-    FILE* fd = fopen(sp_config_filename_.c_str(), "r");
-    if (fd == NULL) {
-        printf("[WARNING] %s is not found; using SPGEMM algo id 0\n", sp_config_filename_.c_str());
-        return;
-    }
-    sp_algo_map_.clear();
-    int   batch_size, seq_len, head_num, size_per_head, data_type;
-    int   batchCount, m, n, k, algoId;
-    float exec_time;
-    char  tmp[1024];
-    if (!fgets(tmp, 1024, fd)) {
-        printf("[ERROR] fgets fail at %s:%d \n", __FILE__, __LINE__);
-        exit(-1);
-    }
-    while (fscanf(fd,
-                  "%d %d %d %d %d ### %d %d %d %d %d %f\n",
-                  &batch_size,
-                  &seq_len,
-                  &head_num,
-                  &size_per_head,
-                  &data_type,
-                  &batchCount,
-                  &m,
-                  &n,
-                  &k,
-                  &algoId,
-                  &exec_time)
-           != EOF) {
-        char mark[256];
-        sprintf(mark, "%d_%d_%d_%d", batchCount, m, n, k);
-        std::string markStr(mark);
-        sp_algo_map_[markStr] = algoId;
-    }
-    fclose(fd);
-}
-
-int cublasAlgoMap::getSpAlgo(const int batch_count, const int m, const int n, const int k)
-{
-    char mark[256];
-    sprintf(mark, "%d_%d_%d_%d", batch_count, m, n, k);
-    if (sp_algo_map_.find(mark) != sp_algo_map_.end()) {
-        return sp_algo_map_[mark];
-    }
-    else {
-        // for remove padding, select algo 1 for simplicity
-        return 0;
-    }
-}
-
-bool cublasAlgoMap::isUseSparse(const int batch_count, const int m, const int n, const int k)
-{
-    // not available to use cusparselt.
-    if (m % 8 != 0 || n % 8 != 0 || k % 8 != 0) {
-        return false;
-    }
-    char mark[256];
-    sprintf(mark, "%d_%d_%d_%d", batch_count, m, n, k);
-    if (sp_algo_map_.find(mark) != sp_algo_map_.end()) {
-        return sp_algo_map_[mark] != -1;
-    }
-    else {
-        // no gemm test case, choose sparse according to sparse flag
-        return true;
-    }
-}
-
-}  // namespace turbomind
diff --git a/src/turbomind/utils/cublasAlgoMap.h b/src/turbomind/utils/cublasAlgoMap.h
deleted file mode 100644
index 3e5b534a1b5345d7f1e0a788e02aa6bfef9b40fb..0000000000000000000000000000000000000000
--- a/src/turbomind/utils/cublasAlgoMap.h
+++ /dev/null
@@ -1,105 +0,0 @@
-/*
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "src/turbomind/utils/cuda_utils.h"
-#include <cublasLt.h>
-#include <cublas_v2.h>
-#include <cuda_runtime.h>
-#include <map>
-#include <string>
-#include <unordered_map>
-#include <utility>
-
-#pragma once
-namespace turbomind {
-
-#define GEMM_NUM 6
-#define GEMM_CONFIG "gemm_config.in"
-#define IGEMM_CONFIG "igemm_config.in"
-#define SPGEMM_CONFIG "spgemm_config.in"
-#define SPIGEMM_CONFIG "spigemm_config.in"
-
-typedef struct {
-    int algoId, customOption, tile, splitK_val;
-    int swizzle, reductionScheme, workspaceSize;
-    // only used in cublasLt >= 11.0
-    int stages;
-#if (CUBLAS_VER_MAJOR == 11 && CUBLAS_VER_MINOR == 11 && CUBLAS_VER_PATCH >= 3)
-    uint16_t inner_shapeId, cluster_shapeId;
-#elif (CUBLAS_VER_MAJOR == 11 && CUBLAS_VER_MINOR == 11 && CUBLAS_VER_PATCH < 3)
-    uint16_t mma_shapeId, cga_shapeId, sche_mode;
-#endif
-    float exec_time;
-} cublasLtMatmulAlgo_info;
-
-/* Structure to store information about different run trials */
-typedef struct {
-    cublasLtMatmulAlgo_t      algo;
-    cublasStatus_t            status;
-    float                     time;
-    size_t                    workspaceSize;  // actual memory workspace needed
-    cublasMath_t              mathMode;
-    cublasLtReductionScheme_t reductionScheme;
-    int                       customOption;
-    float                     wavesCount;
-} customMatmulPerf_t;
-
-struct cublasAlgoConfig_t {
-    int            batch_count;
-    int            m;
-    int            n;
-    int            k;
-    CublasDataType data_type;
-    bool           operator==(cublasAlgoConfig_t const& config) const
-    {
-        return (batch_count == config.batch_count) && (m == config.m) && (n == config.n) && (k == config.k)
-               && (data_type == config.data_type);
-    }
-};
-
-class cublasAlgoConfig_hasher {
-public:
-    std::size_t operator()(cublasAlgoConfig_t const& config) const
-    {
-        return config.batch_count * 98317ull ^ config.m * 49157ull ^ config.n * 24593ull ^ config.k * 196613ull
-               ^ static_cast<int>(config.data_type) * 6151ull;
-    }
-};
-
-class cublasAlgoMap {
-private:
-    std::unordered_map<cublasAlgoConfig_t, cublasLtMatmulAlgo_info, cublasAlgoConfig_hasher> algo_map_;
-    std::string                                                                              config_filename_;
-    std::string                                                                              sp_config_filename_;
-    std::map<std::string, int>                                                               sp_algo_map_;
-
-public:
-    cublasAlgoMap(){};
-    explicit cublasAlgoMap(const std::string filename, const std::string sp_config_filename = "");
-    cublasAlgoMap(const cublasAlgoMap& map);
-    ~cublasAlgoMap();
-    void loadGemmConfig();
-    void loadSpGemmConfig();
-    int  getSpAlgo(const int batch_count, const int m, const int n, const int k);
-    bool isUseSparse(const int batch_count, const int m, const int n, const int k);
-
-    bool isExist(const int batch_count, const int m, const int n, const int k, const CublasDataType data_type);
-
-    cublasLtMatmulAlgo_info
-    getAlgo(const int batch_count, const int m, const int n, const int k, const CublasDataType data_type);
-};
-
-}  // namespace turbomind
diff --git a/src/turbomind/utils/cublasFP8MMWrapper.cu b/src/turbomind/utils/cublasFP8MMWrapper.cu
deleted file mode 100644
index 29cff3e4956b26c9632a180d55650d54e97286d8..0000000000000000000000000000000000000000
--- a/src/turbomind/utils/cublasFP8MMWrapper.cu
+++ /dev/null
@@ -1,1022 +0,0 @@
-/*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "cublasFP8MMWrapper.h"
-#include "cuda_utils.h"
-#include "src/turbomind/macro.h"
-
-namespace turbomind {
-
-#define CUBLAS_WORKSPACE_1MB 1048576
-cublasFP8MMWrapper::cublasFP8MMWrapper(cublasLtHandle_t cublaslt_handle,
-                                       cudaStream_t     stream,
-                                       cublasAlgoMap*   cublas_algo_map,
-                                       std::mutex*      mu,
-                                       IAllocator*      allocator):
-    cublasMMWrapper(nullptr, cublaslt_handle, stream, cublas_algo_map, mu, allocator)
-{
-    TM_LOG_DEBUG(__PRETTY_FUNCTION__);
-    FT_CHECK_WITH_INFO(allocator != nullptr, "must pass allocator to cublasFP8MMWrapper");
-    cublasVersionCheck();
-
-    if (allocator_ != nullptr) {
-        cublas_workspace_qgemm_ = allocator_->reMalloc(cublas_workspace_qgemm_, CUBLAS_WORKSPACE_1MB, true);
-    }
-}
-
-cublasFP8MMWrapper::cublasFP8MMWrapper(cublasHandle_t   cublas_handle,
-                                       cublasLtHandle_t cublaslt_handle,
-                                       cudaStream_t     stream,
-                                       cublasAlgoMap*   cublas_algo_map,
-                                       std::mutex*      mu,
-                                       IAllocator*      allocator):
-    cublasMMWrapper(cublas_handle, cublaslt_handle, stream, cublas_algo_map, mu, allocator)
-{
-    TM_LOG_DEBUG(__PRETTY_FUNCTION__);
-    FT_CHECK_WITH_INFO(allocator != nullptr, "must pass allocator to cublasFP8MMWrapper");
-    cublasVersionCheck();
-    if (allocator_ != nullptr) {
-        cublas_workspace_qgemm_ = allocator_->reMalloc(cublas_workspace_qgemm_, CUBLAS_WORKSPACE_1MB, true);
-    }
-}
-
-cublasFP8MMWrapper::~cublasFP8MMWrapper()
-{
-    TM_LOG_DEBUG(__PRETTY_FUNCTION__);
-    mu_ = nullptr;
-    if (allocator_ != nullptr) {
-        allocator_->free((void**)(&cublas_workspace_qgemm_));
-    }
-}
-
-cublasFP8MMWrapper::cublasFP8MMWrapper(const cublasFP8MMWrapper& wrapper):
-    cublasMMWrapper(wrapper.cublas_handle_,
-                    wrapper.cublaslt_handle_,
-                    wrapper.stream_,
-                    wrapper.cublas_algo_map_,
-                    wrapper.mu_,
-                    wrapper.allocator_)
-{
-    TM_LOG_DEBUG(__PRETTY_FUNCTION__);
-    cublasVersionCheck();
-}
-
-void cublasFP8MMWrapper::cublasVersionCheck()
-{
-    cublasGetProperty(MAJOR_VERSION, &version_major_);
-    cublasGetProperty(MINOR_VERSION, &version_minor_);
-    cublasGetProperty(PATCH_LEVEL, &version_patch_);
-    size_t cublasVersion = (version_major_ * 10000 + version_minor_ * 100 + version_patch_);
-#if defined(FP8_MHA) || !defined(FP8_GEMM_OUTPUT_QUANT_DISABLE)
-    FT_CHECK_WITH_INFO((version_major_ > 11) || (version_major_ == 11 && version_minor_ == 11 && version_patch_ >= 4),
-                       "FP8 MHA needs d-scale, which is only supported after cublas 11.11.4 !");
-
-#endif
-}
-
-void cublasFP8MMWrapper::Gemm(__nv_bfloat16*       res,
-                              int                  batchCount,
-                              int                  m,
-                              int                  n,
-                              int                  k,
-                              int64_t              strideA,
-                              int64_t              strideB,
-                              int64_t              strideD,
-                              const float*         alpha,
-                              const float*         beta,
-                              const __nv_fp8_e4m3* input,
-                              const __nv_fp8_e4m3* kernel,
-                              const float*         input_scale,
-                              const float*         kernel_scale)
-{
-    Gemm(res,
-         batchCount,
-         m,
-         n,
-         k,
-         strideA,
-         strideB,
-         strideD,
-         alpha,
-         beta,
-         input,
-         kernel,
-         input_scale,
-         kernel_scale,
-         (cudaStream_t)0);
-}
-
-void cublasFP8MMWrapper::Gemm(__nv_bfloat16*       res,
-                              int                  batchCount,
-                              int                  m,
-                              int                  n,
-                              int                  k,
-                              int64_t              strideA,
-                              int64_t              strideB,
-                              int64_t              strideD,
-                              const float*         alpha,
-                              const float*         beta,
-                              const __nv_fp8_e4m3* input,
-                              const __nv_fp8_e4m3* kernel,
-                              const float*         input_scale,
-                              const float*         kernel_scale,
-                              cudaStream_t         stream,
-                              bool                 fastAccum)
-{
-    TM_LOG_DEBUG(__PRETTY_FUNCTION__);
-    mu_->lock();
-
-    const void*  devAscalePtr = (const void*)kernel_scale;
-    const void*  devBscalePtr = (const void*)input_scale;
-    const size_t wsSizeBytes  = CUBLAS_WORKSPACE_SIZE;
-
-    const auto aType       = CUDA_R_8F_E4M3;
-    const auto bType       = CUDA_R_8F_E4M3;
-    const auto dType       = CUDA_R_16BF;
-    const auto computeType = CUBLAS_COMPUTE_32F;
-    const auto scaleType   = CUDA_R_32F;
-    // const auto epilogueAuxType = CUDA_R_16BF;
-
-    const cublasOperation_t tA = CUBLAS_OP_T;
-    const cublasOperation_t tB = CUBLAS_OP_N;
-
-    //------- init, desc & tensors
-    cublasLtMatmulDesc_t   matmulDesc;
-    cublasLtMatrixLayout_t Adesc;
-    cublasLtMatrixLayout_t Bdesc;
-    cublasLtMatrixLayout_t Ddesc;
-
-    {
-        check_cuda_error(cublasLtMatmulDescCreate(&matmulDesc, computeType, scaleType));
-        check_cuda_error(cublasLtMatmulDescSetAttribute(matmulDesc, CUBLASLT_MATMUL_DESC_TRANSA, &tA, sizeof(tA)));
-        check_cuda_error(cublasLtMatmulDescSetAttribute(matmulDesc, CUBLASLT_MATMUL_DESC_TRANSB, &tB, sizeof(tB)));
-
-        if (version_major_ >= 11 && version_minor_ >= 11 && version_patch_ > 0 && fastAccum) {
-            const int8_t fastAccuMode = 1;  // enable fast imprecise accum
-            check_cuda_error(cublasLtMatmulDescSetAttribute(
-                matmulDesc, CUBLASLT_MATMUL_DESC_FAST_ACCUM, &fastAccuMode, sizeof(decltype(fastAccuMode))));
-        }
-
-        // TODO: Check that do we need to set these attributes
-        // TODO: comment them for compiler first
-        check_cuda_error(cublasLtMatmulDescSetAttribute(
-            matmulDesc, CUBLASLT_MATMUL_DESC_A_SCALE_POINTER, &devAscalePtr, sizeof(devAscalePtr)));
-        check_cuda_error(cublasLtMatmulDescSetAttribute(
-            matmulDesc, CUBLASLT_MATMUL_DESC_B_SCALE_POINTER, &devBscalePtr, sizeof(devBscalePtr)));
-    }
-
-    {
-        const int64_t lda = k;
-        const int64_t ldb = k;
-        const int64_t ldd = n;
-
-        // create matrix descriptors, we are good with the details here so no need
-        // to set any extra attributes
-        check_cuda_error(
-            cublasLtMatrixLayoutCreate(&Adesc, aType, tA == CUBLAS_OP_N ? n : k, tA == CUBLAS_OP_N ? k : n, lda));
-        if (batchCount > 1) {
-            check_cuda_error(cublasLtMatrixLayoutSetAttribute(
-                Adesc, CUBLASLT_MATRIX_LAYOUT_BATCH_COUNT, &batchCount, sizeof(batchCount)));
-            check_cuda_error(cublasLtMatrixLayoutSetAttribute(
-                Adesc, CUBLASLT_MATRIX_LAYOUT_STRIDED_BATCH_OFFSET, &strideA, sizeof(strideA)));
-        }
-
-        check_cuda_error(
-            cublasLtMatrixLayoutCreate(&Bdesc, bType, tB == CUBLAS_OP_N ? k : m, tB == CUBLAS_OP_N ? m : k, ldb));
-        if (batchCount > 1) {
-            check_cuda_error(cublasLtMatrixLayoutSetAttribute(
-                Bdesc, CUBLASLT_MATRIX_LAYOUT_BATCH_COUNT, &batchCount, sizeof(batchCount)));
-            check_cuda_error(cublasLtMatrixLayoutSetAttribute(
-                Bdesc, CUBLASLT_MATRIX_LAYOUT_STRIDED_BATCH_OFFSET, &strideB, sizeof(strideB)));
-        }
-
-        check_cuda_error(cublasLtMatrixLayoutCreate(&Ddesc, dType, n, m, ldd));
-        if (batchCount > 1) {
-            check_cuda_error(cublasLtMatrixLayoutSetAttribute(
-                Ddesc, CUBLASLT_MATRIX_LAYOUT_BATCH_COUNT, &batchCount, sizeof(batchCount)));
-            check_cuda_error(cublasLtMatrixLayoutSetAttribute(
-                Ddesc, CUBLASLT_MATRIX_LAYOUT_STRIDED_BATCH_OFFSET, &strideD, sizeof(strideD)));
-        }
-    }
-
-    bool                    findAlgo = cublas_algo_map_->isExist(batchCount, n, m, k, FP8_DATATYPE);
-    cublasLtMatmulAlgo_info info     = cublas_algo_map_->getAlgo(batchCount, n, m, k, FP8_DATATYPE);
-    if (info.stages == -1) {
-        findAlgo = false;
-    }
-
-    cublasLtMatmulAlgo_t algo;
-    int                  workspaceSize = cublas_workspace_ == NULL ? 0 : CUBLAS_WORKSPACE_SIZE;
-    if (findAlgo) {
-        if (info.workspaceSize > workspaceSize) {
-            findAlgo = false;
-        }
-        else {
-            cublasLtMatmulAlgoInit(
-                cublaslt_handle_, computeType, scaleType, aType, bType, dType, dType, info.algoId, &algo);
-            cublasLtMatmulAlgoConfigSetAttribute(
-                &algo, CUBLASLT_ALGO_CONFIG_CUSTOM_OPTION, &(info.customOption), sizeof(info.customOption));
-            cublasLtMatmulAlgoConfigSetAttribute(&algo, CUBLASLT_ALGO_CONFIG_TILE_ID, &(info.tile), sizeof(info.tile));
-            cublasLtMatmulAlgoConfigSetAttribute(
-                &algo, CUBLASLT_ALGO_CONFIG_SPLITK_NUM, &(info.splitK_val), sizeof(info.splitK_val));
-            cublasLtMatmulAlgoConfigSetAttribute(
-                &algo, CUBLASLT_ALGO_CONFIG_CTA_SWIZZLING, &(info.swizzle), sizeof(info.swizzle));
-            cublasLtMatmulAlgoConfigSetAttribute(
-                &algo, CUBLASLT_ALGO_CONFIG_REDUCTION_SCHEME, &(info.reductionScheme), sizeof(info.reductionScheme));
-
-#if (CUDART_VERSION >= 11000)
-            cublasLtMatmulAlgoConfigSetAttribute(
-                &algo, CUBLASLT_ALGO_CONFIG_STAGES_ID, &(info.stages), sizeof(info.stages));
-#endif
-
-#if (CUBLAS_VER_MAJOR == 11 && CUBLAS_VER_MINOR == 11 && CUBLAS_VER_PATCH >= 3)
-            cublasLtMatmulAlgoConfigSetAttribute(
-                &algo, CUBLASLT_ALGO_CONFIG_INNER_SHAPE_ID, &(info.inner_shapeId), sizeof(info.inner_shapeId));
-            cublasLtMatmulAlgoConfigSetAttribute(
-                &algo, CUBLASLT_ALGO_CONFIG_CLUSTER_SHAPE_ID, &(info.cluster_shapeId), sizeof(info.cluster_shapeId));
-#elif (CUBLAS_VER_MAJOR == 11 && CUBLAS_VER_MINOR == 11 && CUBLAS_VER_PATCH < 3)
-            cublasLtMatmulAlgoConfigSetAttribute(
-                &algo, CUBLASLT_ALGO_CONFIG_MMA_SHAPE_ID, &(info.mma_shapeId), sizeof(info.mma_shapeId));
-            cublasLtMatmulAlgoConfigSetAttribute(
-                &algo, CUBLASLT_ALGO_CONFIG_CGA_SHAPE_ID, &(info.cga_shapeId), sizeof(info.cga_shapeId));
-            cublasLtMatmulAlgoConfigSetAttribute(
-                &algo, CUBLASLT_ALGO_CONFIG_SCHEDULING_MODE, &(info.sche_mode), sizeof(info.sche_mode));
-#endif
-        }
-    }
-
-    {
-        cublasStatus_t status = cublasLtMatmul(cublaslt_handle_,
-                                               matmulDesc,
-                                               alpha,
-                                               kernel,
-                                               Adesc,
-                                               input,
-                                               Bdesc,
-                                               beta,
-                                               nullptr,  // Cptr, not used here
-                                               Ddesc,
-                                               res,
-                                               Ddesc,
-                                               (findAlgo ? (&algo) : NULL),
-                                               cublas_workspace_,
-                                               wsSizeBytes,
-                                               stream);
-        check_cuda_error(status);
-    }
-
-    if (Ddesc) {
-        check_cuda_error(cublasLtMatrixLayoutDestroy(Ddesc));
-    }
-    if (Bdesc) {
-        check_cuda_error(cublasLtMatrixLayoutDestroy(Bdesc));
-    }
-    if (Adesc) {
-        check_cuda_error(cublasLtMatrixLayoutDestroy(Adesc));
-    }
-    if (matmulDesc) {
-        check_cuda_error(cublasLtMatmulDescDestroy(matmulDesc));
-    }
-
-    mu_->unlock();
-}
-
-void cublasFP8MMWrapper::Gemm(__nv_fp8_e4m3*       res,
-                              int                  batchCount,
-                              int                  m,
-                              int                  n,
-                              int                  k,
-                              int64_t              strideA,
-                              int64_t              strideB,
-                              int64_t              strideD,
-                              const float*         alpha,
-                              const float*         beta,
-                              const __nv_fp8_e4m3* input,
-                              const __nv_fp8_e4m3* kernel,
-                              const float*         input_scale,
-                              const float*         kernel_scale,
-                              const float*         output_scale)
-{
-    Gemm(res,
-         batchCount,
-         m,
-         n,
-         k,
-         strideA,
-         strideB,
-         strideD,
-         alpha,
-         beta,
-         input,
-         kernel,
-         input_scale,
-         kernel_scale,
-         output_scale,
-         0);
-}
-
-void cublasFP8MMWrapper::Gemm(__nv_fp8_e4m3*       res,
-                              int                  batchCount,
-                              int                  m,
-                              int                  n,
-                              int                  k,
-                              int64_t              strideA,
-                              int64_t              strideB,
-                              int64_t              strideD,
-                              const float*         alpha,
-                              const float*         beta,
-                              const __nv_fp8_e4m3* input,
-                              const __nv_fp8_e4m3* kernel,
-                              const float*         input_scale,
-                              const float*         kernel_scale,
-                              const float*         output_scale,
-                              cudaStream_t         stream,
-                              bool                 fastAccum)
-{
-    TM_LOG_DEBUG(__PRETTY_FUNCTION__);
-    mu_->lock();
-
-    const void* devAscalePtr = (const void*)kernel_scale;
-    const void* devBscalePtr = (const void*)input_scale;
-    const void* devDscalePtr = (const void*)output_scale;
-
-    FT_CHECK(cublas_workspace_ != nullptr);
-    const size_t wsSizeBytes = CUBLAS_WORKSPACE_SIZE;
-
-    const auto aType       = CUDA_R_8F_E4M3;
-    const auto bType       = CUDA_R_8F_E4M3;
-    const auto cType       = CUDA_R_16BF;
-    const auto dType       = CUDA_R_8F_E4M3;
-    const auto computeType = CUBLAS_COMPUTE_32F;
-    const auto scaleType   = CUDA_R_32F;
-
-    const cublasOperation_t tA = CUBLAS_OP_T;
-    const cublasOperation_t tB = CUBLAS_OP_N;
-
-    //------- init, desc & tensors
-    cublasLtMatmulDesc_t   matmulDesc;
-    cublasLtMatrixLayout_t Adesc;
-    cublasLtMatrixLayout_t Bdesc;
-    cublasLtMatrixLayout_t Cdesc;
-    cublasLtMatrixLayout_t Ddesc;
-
-    {
-        check_cuda_error(cublasLtMatmulDescCreate(&matmulDesc, computeType, scaleType));
-        check_cuda_error(cublasLtMatmulDescSetAttribute(matmulDesc, CUBLASLT_MATMUL_DESC_TRANSA, &tA, sizeof(tA)));
-        check_cuda_error(cublasLtMatmulDescSetAttribute(matmulDesc, CUBLASLT_MATMUL_DESC_TRANSB, &tB, sizeof(tB)));
-
-        if (version_major_ >= 11 && version_minor_ >= 11 && version_patch_ > 0 && fastAccum) {
-            const int8_t fastAccuMode = 1;  // enable fast imprecise accum
-            check_cuda_error(cublasLtMatmulDescSetAttribute(
-                matmulDesc, CUBLASLT_MATMUL_DESC_FAST_ACCUM, &fastAccuMode, sizeof(decltype(fastAccuMode))));
-        }
-
-        // TODO: Check that do we need to set these attributes
-        // TODO: comment them for compiler first
-        check_cuda_error(cublasLtMatmulDescSetAttribute(
-            matmulDesc, CUBLASLT_MATMUL_DESC_A_SCALE_POINTER, &devAscalePtr, sizeof(devAscalePtr)));
-        check_cuda_error(cublasLtMatmulDescSetAttribute(
-            matmulDesc, CUBLASLT_MATMUL_DESC_B_SCALE_POINTER, &devBscalePtr, sizeof(devBscalePtr)));
-        // check_cuda_error(cublasLtMatmulDescSetAttribute(
-        //     matmulDesc, CUBLASLT_MATMUL_DESC_C_SCALE_POINTER, &devDscalePtr, sizeof(devDscalePtr)));
-        check_cuda_error(cublasLtMatmulDescSetAttribute(
-            matmulDesc, CUBLASLT_MATMUL_DESC_D_SCALE_POINTER, &devDscalePtr, sizeof(devDscalePtr)));
-    }
-
-    {
-        const int64_t lda = k;
-        const int64_t ldb = k;
-        const int64_t ldd = n;
-
-        // create matrix descriptors, we are good with the details here so no need
-        // to set any extra attributes
-        check_cuda_error(
-            cublasLtMatrixLayoutCreate(&Adesc, aType, tA == CUBLAS_OP_N ? n : k, tA == CUBLAS_OP_N ? k : n, lda));
-        if (batchCount > 1) {
-            check_cuda_error(cublasLtMatrixLayoutSetAttribute(
-                Adesc, CUBLASLT_MATRIX_LAYOUT_BATCH_COUNT, &batchCount, sizeof(batchCount)));
-            check_cuda_error(cublasLtMatrixLayoutSetAttribute(
-                Adesc, CUBLASLT_MATRIX_LAYOUT_STRIDED_BATCH_OFFSET, &strideA, sizeof(strideA)));
-        }
-
-        check_cuda_error(
-            cublasLtMatrixLayoutCreate(&Bdesc, bType, tB == CUBLAS_OP_N ? k : m, tB == CUBLAS_OP_N ? m : k, ldb));
-        if (batchCount > 1) {
-            check_cuda_error(cublasLtMatrixLayoutSetAttribute(
-                Bdesc, CUBLASLT_MATRIX_LAYOUT_BATCH_COUNT, &batchCount, sizeof(batchCount)));
-            check_cuda_error(cublasLtMatrixLayoutSetAttribute(
-                Bdesc, CUBLASLT_MATRIX_LAYOUT_STRIDED_BATCH_OFFSET, &strideB, sizeof(strideB)));
-        }
-
-        check_cuda_error(cublasLtMatrixLayoutCreate(&Cdesc, cType, n, m, ldd));
-        if (batchCount > 1) {
-            check_cuda_error(cublasLtMatrixLayoutSetAttribute(
-                Cdesc, CUBLASLT_MATRIX_LAYOUT_BATCH_COUNT, &batchCount, sizeof(batchCount)));
-            check_cuda_error(cublasLtMatrixLayoutSetAttribute(
-                Cdesc, CUBLASLT_MATRIX_LAYOUT_STRIDED_BATCH_OFFSET, &strideD, sizeof(strideD)));
-        }
-        check_cuda_error(cublasLtMatrixLayoutCreate(&Ddesc, dType, n, m, ldd));
-        if (batchCount > 1) {
-            check_cuda_error(cublasLtMatrixLayoutSetAttribute(
-                Ddesc, CUBLASLT_MATRIX_LAYOUT_BATCH_COUNT, &batchCount, sizeof(batchCount)));
-            check_cuda_error(cublasLtMatrixLayoutSetAttribute(
-                Ddesc, CUBLASLT_MATRIX_LAYOUT_STRIDED_BATCH_OFFSET, &strideD, sizeof(strideD)));
-        }
-    }
-
-    bool                    findAlgo = cublas_algo_map_->isExist(batchCount, n, m, k, FP8_DATATYPE);
-    cublasLtMatmulAlgo_info info     = cublas_algo_map_->getAlgo(batchCount, n, m, k, FP8_DATATYPE);
-    if (info.stages == -1) {
-        findAlgo = false;
-    }
-
-    cublasLtMatmulAlgo_t algo;
-    int                  workspaceSize = cublas_workspace_ == NULL ? 0 : CUBLAS_WORKSPACE_SIZE;
-    if (findAlgo) {
-        if (info.workspaceSize > workspaceSize) {
-            findAlgo = false;
-        }
-        else {
-            cublasLtMatmulAlgoInit(
-                cublaslt_handle_, computeType, scaleType, aType, bType, cType, dType, info.algoId, &algo);
-            cublasLtMatmulAlgoConfigSetAttribute(
-                &algo, CUBLASLT_ALGO_CONFIG_CUSTOM_OPTION, &(info.customOption), sizeof(info.customOption));
-            cublasLtMatmulAlgoConfigSetAttribute(&algo, CUBLASLT_ALGO_CONFIG_TILE_ID, &(info.tile), sizeof(info.tile));
-            cublasLtMatmulAlgoConfigSetAttribute(
-                &algo, CUBLASLT_ALGO_CONFIG_SPLITK_NUM, &(info.splitK_val), sizeof(info.splitK_val));
-            cublasLtMatmulAlgoConfigSetAttribute(
-                &algo, CUBLASLT_ALGO_CONFIG_CTA_SWIZZLING, &(info.swizzle), sizeof(info.swizzle));
-            cublasLtMatmulAlgoConfigSetAttribute(
-                &algo, CUBLASLT_ALGO_CONFIG_REDUCTION_SCHEME, &(info.reductionScheme), sizeof(info.reductionScheme));
-
-#if (CUDART_VERSION >= 11000)
-            cublasLtMatmulAlgoConfigSetAttribute(
-                &algo, CUBLASLT_ALGO_CONFIG_STAGES_ID, &(info.stages), sizeof(info.stages));
-#endif
-
-#if (CUBLAS_VER_MAJOR == 11 && CUBLAS_VER_MINOR == 11 && CUBLAS_VER_PATCH >= 3)
-            cublasLtMatmulAlgoConfigSetAttribute(
-                &algo, CUBLASLT_ALGO_CONFIG_INNER_SHAPE_ID, &(info.inner_shapeId), sizeof(info.inner_shapeId));
-            cublasLtMatmulAlgoConfigSetAttribute(
-                &algo, CUBLASLT_ALGO_CONFIG_CLUSTER_SHAPE_ID, &(info.cluster_shapeId), sizeof(info.cluster_shapeId));
-#elif (CUBLAS_VER_MAJOR == 11 && CUBLAS_VER_MINOR == 11 && CUBLAS_VER_PATCH < 3)
-            cublasLtMatmulAlgoConfigSetAttribute(
-                &algo, CUBLASLT_ALGO_CONFIG_MMA_SHAPE_ID, &(info.mma_shapeId), sizeof(info.mma_shapeId));
-            cublasLtMatmulAlgoConfigSetAttribute(
-                &algo, CUBLASLT_ALGO_CONFIG_CGA_SHAPE_ID, &(info.cga_shapeId), sizeof(info.cga_shapeId));
-            cublasLtMatmulAlgoConfigSetAttribute(
-                &algo, CUBLASLT_ALGO_CONFIG_SCHEDULING_MODE, &(info.sche_mode), sizeof(info.sche_mode));
-#endif
-        }
-    }
-
-    {
-        cublasStatus_t status = cublasLtMatmul(cublaslt_handle_,
-                                               matmulDesc,
-                                               alpha,
-                                               kernel,
-                                               Adesc,
-                                               input,
-                                               Bdesc,
-                                               beta,
-                                               nullptr,  // Cptr, not used here
-                                               Cdesc,
-                                               res,
-                                               Ddesc,
-                                               (findAlgo ? (&algo) : NULL),
-                                               cublas_workspace_,
-                                               wsSizeBytes,
-                                               stream);
-        check_cuda_error(status);
-    }
-
-    if (Ddesc) {
-        check_cuda_error(cublasLtMatrixLayoutDestroy(Ddesc));
-    }
-    if (Cdesc) {
-        check_cuda_error(cublasLtMatrixLayoutDestroy(Cdesc));
-    }
-    if (Bdesc) {
-        check_cuda_error(cublasLtMatrixLayoutDestroy(Bdesc));
-    }
-    if (Adesc) {
-        check_cuda_error(cublasLtMatrixLayoutDestroy(Adesc));
-    }
-    if (matmulDesc) {
-        check_cuda_error(cublasLtMatmulDescDestroy(matmulDesc));
-    }
-
-    mu_->unlock();
-}
-
-template<bool RELU, bool GELU>
-void cublasFP8MMWrapper::Conv1x1Gemm(__nv_fp8_e4m3*       res,
-                                     int                  m,
-                                     int                  n,
-                                     int                  k,
-                                     const __nv_fp8_e4m3* input,
-                                     const __nv_fp8_e4m3* kernel,
-                                     const __nv_bfloat16* bias,
-                                     const float          input_scale,
-                                     const float          kernel_scale,
-                                     const float          output_scale,
-                                     cudaStream_t         stream)
-{
-    TM_LOG_DEBUG(__PRETTY_FUNCTION__);
-    mu_->lock();
-    size_t workspace_size = 0;
-    // get workspace size
-    qgmmaLauncher.getWorkSpaceSize<RELU, GELU>(n, workspace_size);
-
-    if (workspace_size > CUBLAS_WORKSPACE_1MB) {
-        throw std::runtime_error("Need to rellocate workspace for qgemm. It is not supported");
-        // cublas_workspace_qgemm_ = allocator_->reMalloc(cublas_workspace_qgemm_, workspace_size);
-    }
-
-    qgmmaLauncher.invokeQgmma1x1<RELU, GELU>(
-        res, m, n, k, input, kernel, bias, input_scale, kernel_scale, output_scale, cublas_workspace_qgemm_, stream);
-    sync_check_cuda_error();
-    mu_->unlock();
-}
-
-template void cublasFP8MMWrapper::Conv1x1Gemm<true, false>(__nv_fp8_e4m3*       res,
-                                                           int                  m,
-                                                           int                  n,
-                                                           int                  k,
-                                                           const __nv_fp8_e4m3* input,
-                                                           const __nv_fp8_e4m3* kernel,
-                                                           const __nv_bfloat16* bias,
-                                                           const float          input_scale,
-                                                           const float          kernel_scale,
-                                                           const float          output_scale,
-                                                           cudaStream_t         stream);
-template void cublasFP8MMWrapper::Conv1x1Gemm<true, true>(__nv_fp8_e4m3*       res,
-                                                          int                  m,
-                                                          int                  n,
-                                                          int                  k,
-                                                          const __nv_fp8_e4m3* input,
-                                                          const __nv_fp8_e4m3* kernel,
-                                                          const __nv_bfloat16* bias,
-                                                          const float          input_scale,
-                                                          const float          kernel_scale,
-                                                          const float          output_scale,
-                                                          cudaStream_t         stream);
-template void cublasFP8MMWrapper::Conv1x1Gemm<false, false>(__nv_fp8_e4m3*       res,
-                                                            int                  m,
-                                                            int                  n,
-                                                            int                  k,
-                                                            const __nv_fp8_e4m3* input,
-                                                            const __nv_fp8_e4m3* kernel,
-                                                            const __nv_bfloat16* bias,
-                                                            const float          input_scale,
-                                                            const float          kernel_scale,
-                                                            const float          output_scale,
-                                                            cudaStream_t         stream);
-template void cublasFP8MMWrapper::Conv1x1Gemm<false, true>(__nv_fp8_e4m3*       res,
-                                                           int                  m,
-                                                           int                  n,
-                                                           int                  k,
-                                                           const __nv_fp8_e4m3* input,
-                                                           const __nv_fp8_e4m3* kernel,
-                                                           const __nv_bfloat16* bias,
-                                                           const float          input_scale,
-                                                           const float          kernel_scale,
-                                                           const float          output_scale,
-                                                           cudaStream_t         stream);
-
-template<bool RELU, bool GELU>
-void cublasFP8MMWrapper::Gemm_Bias_Act(__nv_bfloat16*       res,
-                                       int                  batchCount,
-                                       int                  m,
-                                       int                  n,
-                                       int                  k,
-                                       int64_t              strideA,
-                                       int64_t              strideB,
-                                       int64_t              strideD,
-                                       const float*         alpha,
-                                       const float*         beta,
-                                       const __nv_fp8_e4m3* input,
-                                       const __nv_fp8_e4m3* kernel,
-                                       const float*         input_scale,
-                                       const float*         kernel_scale,
-                                       const __nv_bfloat16* bias,
-                                       const float*         output_scale,
-                                       cudaStream_t         stream)
-{
-    TM_LOG_DEBUG(__PRETTY_FUNCTION__);
-    mu_->lock();
-
-    const void*  devAscalePtr = (const void*)kernel_scale;
-    const void*  devBscalePtr = (const void*)input_scale;
-    const void*  devDscalePtr = (const void*)output_scale;
-    const size_t wsSizeBytes  = CUBLAS_WORKSPACE_SIZE;
-
-    const auto aType       = CUDA_R_8F_E4M3;
-    const auto bType       = CUDA_R_8F_E4M3;
-    const auto dType       = CUDA_R_16BF;
-    const auto computeType = CUBLAS_COMPUTE_32F;
-    const auto scaleType   = CUDA_R_32F;
-    // const auto epilogueAuxType = CUDA_R_16BF;
-
-    const cublasOperation_t tA = CUBLAS_OP_T;
-    const cublasOperation_t tB = CUBLAS_OP_N;
-
-    //------- init, desc & tensors
-    cublasLtMatmulDesc_t   matmulDesc;
-    cublasLtMatrixLayout_t Adesc;
-    cublasLtMatrixLayout_t Bdesc;
-    cublasLtMatrixLayout_t Ddesc;
-
-    {
-        check_cuda_error(cublasLtMatmulDescCreate(&matmulDesc, computeType, scaleType));
-        check_cuda_error(cublasLtMatmulDescSetAttribute(matmulDesc, CUBLASLT_MATMUL_DESC_TRANSA, &tA, sizeof(tA)));
-        check_cuda_error(cublasLtMatmulDescSetAttribute(matmulDesc, CUBLASLT_MATMUL_DESC_TRANSB, &tB, sizeof(tB)));
-
-        if (version_major_ >= 11 && version_minor_ >= 11 && version_patch_ > 0) {
-            const int8_t fastAccuMode = 1;  // enable fast imprecise accum
-            check_cuda_error(cublasLtMatmulDescSetAttribute(
-                matmulDesc, CUBLASLT_MATMUL_DESC_FAST_ACCUM, &fastAccuMode, sizeof(decltype(fastAccuMode))));
-        }
-
-        // TODO: Check that do we need to set these attributes
-        // TODO: comment them for compiler first
-        check_cuda_error(cublasLtMatmulDescSetAttribute(
-            matmulDesc, CUBLASLT_MATMUL_DESC_A_SCALE_POINTER, &devAscalePtr, sizeof(devAscalePtr)));
-        check_cuda_error(cublasLtMatmulDescSetAttribute(
-            matmulDesc, CUBLASLT_MATMUL_DESC_B_SCALE_POINTER, &devBscalePtr, sizeof(devBscalePtr)));
-
-        cublasLtEpilogue_t epi = CUBLASLT_EPILOGUE_BIAS;
-        if (RELU == true) {
-            epi = CUBLASLT_EPILOGUE_RELU_BIAS;
-        }
-        else if (GELU == true) {
-            epi = CUBLASLT_EPILOGUE_GELU_BIAS;
-        }
-        // cublasLtEpilogue_t epi = CUBLASLT_EPILOGUE_BIAS;
-        cublasLtMatmulDescSetAttribute(matmulDesc, CUBLASLT_MATMUL_DESC_EPILOGUE, &epi, sizeof(cublasLtEpilogue_t));
-        cublasLtMatmulDescSetAttribute(matmulDesc, CUBLASLT_MATMUL_DESC_BIAS_POINTER, &bias, sizeof(const void*));
-    }
-
-    {
-        const int64_t lda = k;
-        const int64_t ldb = k;
-        const int64_t ldd = n;
-
-        // create matrix descriptors, we are good with the details here so no need
-        // to set any extra attributes
-        check_cuda_error(
-            cublasLtMatrixLayoutCreate(&Adesc, aType, tA == CUBLAS_OP_N ? n : k, tA == CUBLAS_OP_N ? k : n, lda));
-        if (batchCount > 1) {
-            check_cuda_error(cublasLtMatrixLayoutSetAttribute(
-                Adesc, CUBLASLT_MATRIX_LAYOUT_BATCH_COUNT, &batchCount, sizeof(batchCount)));
-            check_cuda_error(cublasLtMatrixLayoutSetAttribute(
-                Adesc, CUBLASLT_MATRIX_LAYOUT_STRIDED_BATCH_OFFSET, &strideA, sizeof(strideA)));
-        }
-
-        check_cuda_error(
-            cublasLtMatrixLayoutCreate(&Bdesc, bType, tB == CUBLAS_OP_N ? k : m, tB == CUBLAS_OP_N ? m : k, ldb));
-        if (batchCount > 1) {
-            check_cuda_error(cublasLtMatrixLayoutSetAttribute(
-                Bdesc, CUBLASLT_MATRIX_LAYOUT_BATCH_COUNT, &batchCount, sizeof(batchCount)));
-            check_cuda_error(cublasLtMatrixLayoutSetAttribute(
-                Bdesc, CUBLASLT_MATRIX_LAYOUT_STRIDED_BATCH_OFFSET, &strideB, sizeof(strideB)));
-        }
-
-        check_cuda_error(cublasLtMatrixLayoutCreate(&Ddesc, dType, n, m, ldd));
-        if (batchCount > 1) {
-            check_cuda_error(cublasLtMatrixLayoutSetAttribute(
-                Ddesc, CUBLASLT_MATRIX_LAYOUT_BATCH_COUNT, &batchCount, sizeof(batchCount)));
-            check_cuda_error(cublasLtMatrixLayoutSetAttribute(
-                Ddesc, CUBLASLT_MATRIX_LAYOUT_STRIDED_BATCH_OFFSET, &strideD, sizeof(strideD)));
-        }
-    }
-
-    const int                       requestedAlgoCount = 1;
-    cublasLtMatmulHeuristicResult_t heuristicResult;
-    cublasLtMatmulPreference_t      preference;
-    int                             returnedAlgoCount = -1;
-    check_cuda_error(cublasLtMatmulPreferenceCreate(&preference));
-    check_cuda_error(cublasLtMatmulPreferenceSetAttribute(
-        preference, CUBLASLT_MATMUL_PREF_MAX_WORKSPACE_BYTES, &wsSizeBytes, sizeof(wsSizeBytes)));
-
-    check_cuda_error(cublasLtMatmulAlgoGetHeuristic(cublaslt_handle_,
-                                                    matmulDesc,
-                                                    Adesc,
-                                                    Bdesc,
-                                                    Ddesc,
-                                                    Ddesc,
-                                                    preference,
-                                                    requestedAlgoCount,
-                                                    &heuristicResult,
-                                                    &returnedAlgoCount));
-
-    {
-        cublasStatus_t status = cublasLtMatmul(cublaslt_handle_,
-                                               matmulDesc,
-                                               alpha,
-                                               kernel,
-                                               Adesc,
-                                               input,
-                                               Bdesc,
-                                               beta,
-                                               res,
-                                               Ddesc,
-                                               res,
-                                               Ddesc,
-                                               &heuristicResult.algo,
-                                               cublas_workspace_,
-                                               wsSizeBytes,
-                                               stream);
-        check_cuda_error(status);
-    }
-
-    if (Ddesc) {
-        check_cuda_error(cublasLtMatrixLayoutDestroy(Ddesc));
-    }
-    if (Bdesc) {
-        check_cuda_error(cublasLtMatrixLayoutDestroy(Bdesc));
-    }
-    if (Adesc) {
-        check_cuda_error(cublasLtMatrixLayoutDestroy(Adesc));
-    }
-    if (matmulDesc) {
-        check_cuda_error(cublasLtMatmulDescDestroy(matmulDesc));
-    }
-
-    mu_->unlock();
-}
-
-template<bool RELU, bool GELU>
-void cublasFP8MMWrapper::Gemm_Bias_Act(__nv_fp8_e4m3*       res,
-                                       int                  batchCount,
-                                       int                  m,
-                                       int                  n,
-                                       int                  k,
-                                       int64_t              strideA,
-                                       int64_t              strideB,
-                                       int64_t              strideD,
-                                       const float*         alpha,
-                                       const float*         beta,
-                                       const __nv_fp8_e4m3* input,
-                                       const __nv_fp8_e4m3* kernel,
-                                       const float*         input_scale,
-                                       const float*         kernel_scale,
-                                       const __nv_bfloat16* bias,
-                                       const float*         output_scale,
-                                       cudaStream_t         stream)
-{
-    TM_LOG_DEBUG(__PRETTY_FUNCTION__);
-    mu_->lock();
-
-    const void*  devAscalePtr = (const void*)kernel_scale;
-    const void*  devBscalePtr = (const void*)input_scale;
-    const void*  devDscalePtr = (const void*)output_scale;
-    const size_t wsSizeBytes  = CUBLAS_WORKSPACE_SIZE;
-
-    const auto aType       = CUDA_R_8F_E4M3;
-    const auto bType       = CUDA_R_8F_E4M3;
-    const auto cType       = CUDA_R_16BF;
-    const auto dType       = CUDA_R_8F_E4M3;
-    const auto computeType = CUBLAS_COMPUTE_32F;
-    const auto scaleType   = CUDA_R_32F;
-    // const auto epilogueAuxType = CUDA_R_16BF;
-
-    const cublasOperation_t tA = CUBLAS_OP_T;
-    const cublasOperation_t tB = CUBLAS_OP_N;
-
-    //------- init, desc & tensors
-    cublasLtMatmulDesc_t   matmulDesc;
-    cublasLtMatrixLayout_t Adesc;
-    cublasLtMatrixLayout_t Bdesc;
-    cublasLtMatrixLayout_t Cdesc;
-    cublasLtMatrixLayout_t Ddesc;
-
-    {
-        check_cuda_error(cublasLtMatmulDescCreate(&matmulDesc, computeType, scaleType));
-        check_cuda_error(cublasLtMatmulDescSetAttribute(matmulDesc, CUBLASLT_MATMUL_DESC_TRANSA, &tA, sizeof(tA)));
-        check_cuda_error(cublasLtMatmulDescSetAttribute(matmulDesc, CUBLASLT_MATMUL_DESC_TRANSB, &tB, sizeof(tB)));
-
-        if (version_major_ >= 11 && version_minor_ >= 11 && version_patch_ > 0) {
-            const int8_t fastAccuMode = 1;  // enable fast imprecise accum
-            check_cuda_error(cublasLtMatmulDescSetAttribute(
-                matmulDesc, CUBLASLT_MATMUL_DESC_FAST_ACCUM, &fastAccuMode, sizeof(decltype(fastAccuMode))));
-        }
-
-        // TODO: Check that do we need to set these attributes
-        // TODO: comment them for compiler first
-        check_cuda_error(cublasLtMatmulDescSetAttribute(
-            matmulDesc, CUBLASLT_MATMUL_DESC_A_SCALE_POINTER, &devAscalePtr, sizeof(devAscalePtr)));
-        check_cuda_error(cublasLtMatmulDescSetAttribute(
-            matmulDesc, CUBLASLT_MATMUL_DESC_B_SCALE_POINTER, &devBscalePtr, sizeof(devBscalePtr)));
-        check_cuda_error(cublasLtMatmulDescSetAttribute(
-            matmulDesc, CUBLASLT_MATMUL_DESC_D_SCALE_POINTER, &devDscalePtr, sizeof(devDscalePtr)));
-
-        cublasLtEpilogue_t epi = CUBLASLT_EPILOGUE_GELU_BIAS;
-        // cublasLtEpilogue_t epi = CUBLASLT_EPILOGUE_BIAS;
-        cublasLtMatmulDescSetAttribute(matmulDesc, CUBLASLT_MATMUL_DESC_EPILOGUE, &epi, sizeof(cublasLtEpilogue_t));
-        cublasLtMatmulDescSetAttribute(matmulDesc, CUBLASLT_MATMUL_DESC_BIAS_POINTER, &bias, sizeof(const void*));
-    }
-
-    {
-        const int64_t lda = k;
-        const int64_t ldb = k;
-        const int64_t ldd = n;
-
-        // create matrix descriptors, we are good with the details here so no need
-        // to set any extra attributes
-        check_cuda_error(
-            cublasLtMatrixLayoutCreate(&Adesc, aType, tA == CUBLAS_OP_N ? n : k, tA == CUBLAS_OP_N ? k : n, lda));
-        if (batchCount > 1) {
-            check_cuda_error(cublasLtMatrixLayoutSetAttribute(
-                Adesc, CUBLASLT_MATRIX_LAYOUT_BATCH_COUNT, &batchCount, sizeof(batchCount)));
-            check_cuda_error(cublasLtMatrixLayoutSetAttribute(
-                Adesc, CUBLASLT_MATRIX_LAYOUT_STRIDED_BATCH_OFFSET, &strideA, sizeof(strideA)));
-        }
-
-        check_cuda_error(
-            cublasLtMatrixLayoutCreate(&Bdesc, bType, tB == CUBLAS_OP_N ? k : m, tB == CUBLAS_OP_N ? m : k, ldb));
-        if (batchCount > 1) {
-            check_cuda_error(cublasLtMatrixLayoutSetAttribute(
-                Bdesc, CUBLASLT_MATRIX_LAYOUT_BATCH_COUNT, &batchCount, sizeof(batchCount)));
-            check_cuda_error(cublasLtMatrixLayoutSetAttribute(
-                Bdesc, CUBLASLT_MATRIX_LAYOUT_STRIDED_BATCH_OFFSET, &strideB, sizeof(strideB)));
-        }
-
-        check_cuda_error(cublasLtMatrixLayoutCreate(&Cdesc, cType, n, m, ldd));
-        // (TODO Hongbinl)Not sure if the implementation makes sense
-        if (batchCount > 1) {
-            check_cuda_error(cublasLtMatrixLayoutSetAttribute(
-                Cdesc, CUBLASLT_MATRIX_LAYOUT_BATCH_COUNT, &batchCount, sizeof(batchCount)));
-            check_cuda_error(cublasLtMatrixLayoutSetAttribute(
-                Cdesc, CUBLASLT_MATRIX_LAYOUT_STRIDED_BATCH_OFFSET, &strideD, sizeof(strideD)));
-        }
-
-        check_cuda_error(cublasLtMatrixLayoutCreate(&Ddesc, dType, n, m, ldd));
-        if (batchCount > 1) {
-            check_cuda_error(cublasLtMatrixLayoutSetAttribute(
-                Ddesc, CUBLASLT_MATRIX_LAYOUT_BATCH_COUNT, &batchCount, sizeof(batchCount)));
-            check_cuda_error(cublasLtMatrixLayoutSetAttribute(
-                Ddesc, CUBLASLT_MATRIX_LAYOUT_STRIDED_BATCH_OFFSET, &strideD, sizeof(strideD)));
-        }
-    }
-
-    const int                       requestedAlgoCount = 1;
-    cublasLtMatmulHeuristicResult_t heuristicResult;
-    cublasLtMatmulPreference_t      preference;
-    int                             returnedAlgoCount = -1;
-    check_cuda_error(cublasLtMatmulPreferenceCreate(&preference));
-    check_cuda_error(cublasLtMatmulPreferenceSetAttribute(
-        preference, CUBLASLT_MATMUL_PREF_MAX_WORKSPACE_BYTES, &wsSizeBytes, sizeof(wsSizeBytes)));
-#if (CUBLAS_VERSION) <= 12000
-    uint32_t pointer_mode_mask = 0;
-    check_cuda_error(cublasLtMatmulPreferenceSetAttribute(
-        preference, CUBLASLT_MATMUL_PREF_EPILOGUE_MASK, &pointer_mode_mask, sizeof(pointer_mode_mask)));
-#endif
-
-    check_cuda_error(cublasLtMatmulAlgoGetHeuristic(cublaslt_handle_,
-                                                    matmulDesc,
-                                                    Adesc,
-                                                    Bdesc,
-                                                    Cdesc,
-                                                    Ddesc,
-                                                    preference,
-                                                    requestedAlgoCount,
-                                                    &heuristicResult,
-                                                    &returnedAlgoCount));
-
-    {
-        cublasStatus_t status = cublasLtMatmul(cublaslt_handle_,
-                                               matmulDesc,
-                                               alpha,
-                                               kernel,
-                                               Adesc,
-                                               input,
-                                               Bdesc,
-                                               beta,
-                                               res,
-                                               Cdesc,
-                                               res,
-                                               Ddesc,
-                                               &heuristicResult.algo,
-                                               cublas_workspace_,
-                                               wsSizeBytes,
-                                               stream);
-        check_cuda_error(status);
-    }
-
-    if (Ddesc) {
-        check_cuda_error(cublasLtMatrixLayoutDestroy(Ddesc));
-    }
-    if (Bdesc) {
-        check_cuda_error(cublasLtMatrixLayoutDestroy(Bdesc));
-    }
-    if (Adesc) {
-        check_cuda_error(cublasLtMatrixLayoutDestroy(Adesc));
-    }
-    if (matmulDesc) {
-        check_cuda_error(cublasLtMatmulDescDestroy(matmulDesc));
-    }
-
-    mu_->unlock();
-}
-
-template void cublasFP8MMWrapper::Gemm_Bias_Act<false, true>(__nv_bfloat16*       res,
-                                                             int                  batchCount,
-                                                             int                  m,
-                                                             int                  n,
-                                                             int                  k,
-                                                             int64_t              strideA,
-                                                             int64_t              strideB,
-                                                             int64_t              strideD,
-                                                             const float*         alpha,
-                                                             const float*         beta,
-                                                             const __nv_fp8_e4m3* input,
-                                                             const __nv_fp8_e4m3* kernel,
-                                                             const float*         input_scale,
-                                                             const float*         kernel_scale,
-                                                             const __nv_bfloat16* bias,
-                                                             const float*         output_scale,
-                                                             cudaStream_t         stream);
-template void cublasFP8MMWrapper::Gemm_Bias_Act<false, true>(__nv_fp8_e4m3*       res,
-                                                             int                  batchCount,
-                                                             int                  m,
-                                                             int                  n,
-                                                             int                  k,
-                                                             int64_t              strideA,
-                                                             int64_t              strideB,
-                                                             int64_t              strideD,
-                                                             const float*         alpha,
-                                                             const float*         beta,
-                                                             const __nv_fp8_e4m3* input,
-                                                             const __nv_fp8_e4m3* kernel,
-                                                             const float*         input_scale,
-                                                             const float*         kernel_scale,
-                                                             const __nv_bfloat16* bias,
-                                                             const float*         output_scale,
-                                                             cudaStream_t         stream);
-template void cublasFP8MMWrapper::Gemm_Bias_Act<true, false>(__nv_bfloat16*       res,
-                                                             int                  batchCount,
-                                                             int                  m,
-                                                             int                  n,
-                                                             int                  k,
-                                                             int64_t              strideA,
-                                                             int64_t              strideB,
-                                                             int64_t              strideD,
-                                                             const float*         alpha,
-                                                             const float*         beta,
-                                                             const __nv_fp8_e4m3* input,
-                                                             const __nv_fp8_e4m3* kernel,
-                                                             const float*         input_scale,
-                                                             const float*         kernel_scale,
-                                                             const __nv_bfloat16* bias,
-                                                             const float*         output_scale,
-                                                             cudaStream_t         stream);
-template void cublasFP8MMWrapper::Gemm_Bias_Act<true, false>(__nv_fp8_e4m3*       res,
-                                                             int                  batchCount,
-                                                             int                  m,
-                                                             int                  n,
-                                                             int                  k,
-                                                             int64_t              strideA,
-                                                             int64_t              strideB,
-                                                             int64_t              strideD,
-                                                             const float*         alpha,
-                                                             const float*         beta,
-                                                             const __nv_fp8_e4m3* input,
-                                                             const __nv_fp8_e4m3* kernel,
-                                                             const float*         input_scale,
-                                                             const float*         kernel_scale,
-                                                             const __nv_bfloat16* bias,
-                                                             const float*         output_scale,
-                                                             cudaStream_t         stream);
-template void cublasFP8MMWrapper::Gemm_Bias_Act<false, false>(__nv_fp8_e4m3*       res,
-                                                              int                  batchCount,
-                                                              int                  m,
-                                                              int                  n,
-                                                              int                  k,
-                                                              int64_t              strideA,
-                                                              int64_t              strideB,
-                                                              int64_t              strideD,
-                                                              const float*         alpha,
-                                                              const float*         beta,
-                                                              const __nv_fp8_e4m3* input,
-                                                              const __nv_fp8_e4m3* kernel,
-                                                              const float*         input_scale,
-                                                              const float*         kernel_scale,
-                                                              const __nv_bfloat16* bias,
-                                                              const float*         output_scale,
-                                                              cudaStream_t         stream);
-
-}  // namespace turbomind
diff --git a/src/turbomind/utils/cublasFP8MMWrapper.h b/src/turbomind/utils/cublasFP8MMWrapper.h
deleted file mode 100644
index 09716011fb78423d627725b6ee18a4e97f6da71f..0000000000000000000000000000000000000000
--- a/src/turbomind/utils/cublasFP8MMWrapper.h
+++ /dev/null
@@ -1,177 +0,0 @@
-/*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "3rdparty/fp8_qgmma_1x1/fp8_qgmma_1x1_utils.h"
-#include "cuda_utils.h"
-#include "src/turbomind/utils/cublasAlgoMap.h"
-#include "src/turbomind/utils/cublasMMWrapper.h"
-#include "src/turbomind/utils/cuda_fp8_utils.h"
-#include <cublasLt.h>
-#include <cublas_v2.h>
-#include <cuda_runtime.h>
-#include <map>
-#include <mutex>
-#include <string>
-
-#pragma once
-
-namespace turbomind {
-
-class cublasFP8MMWrapper: public cublasMMWrapper {
-public:
-    cublasFP8MMWrapper(cublasLtHandle_t cublaslt_handle_,
-                       cudaStream_t     stream,
-                       cublasAlgoMap*   map,
-                       std::mutex*      mu,
-                       IAllocator*      allocator);
-
-    cublasFP8MMWrapper(cublasHandle_t   cublas_handle,
-                       cublasLtHandle_t cublaslt_handle,
-                       cudaStream_t     stream,
-                       cublasAlgoMap*   map,
-                       std::mutex*      mu,
-                       IAllocator*      allocator);
-
-    virtual ~cublasFP8MMWrapper();
-
-    cublasFP8MMWrapper(const cublasFP8MMWrapper& wrapper);
-
-    virtual void cublasVersionCheck() override;
-
-    void Gemm(__nv_bfloat16*       res,
-              int                  batchCount,
-              int                  m,
-              int                  n,
-              int                  k,
-              int64_t              stridea,
-              int64_t              strideb,
-              int64_t              stridec,
-              const float*         alpha,
-              const float*         beta,
-              const __nv_fp8_e4m3* input,
-              const __nv_fp8_e4m3* kernel,
-              const float*         input_scale,
-              const float*         kernel_scale);
-
-    void Gemm(__nv_bfloat16*       res,
-              int                  batchCount,
-              int                  m,
-              int                  n,
-              int                  k,
-              int64_t              stridea,
-              int64_t              strideb,
-              int64_t              stridec,
-              const float*         alpha,
-              const float*         beta,
-              const __nv_fp8_e4m3* input,
-              const __nv_fp8_e4m3* kernel,
-              const float*         input_scale,
-              const float*         kernel_scale,
-              cudaStream_t         stream,
-              bool                 fastAccum = true);
-
-    void Gemm(__nv_fp8_e4m3*       res,
-              int                  batchCount,
-              int                  m,
-              int                  n,
-              int                  k,
-              int64_t              stridea,
-              int64_t              strideb,
-              int64_t              stridec,
-              const float*         alpha,
-              const float*         beta,
-              const __nv_fp8_e4m3* input,
-              const __nv_fp8_e4m3* kernel,
-              const float*         input_scale,
-              const float*         kernel_scale,
-              const float*         output_scale);
-
-    void Gemm(__nv_fp8_e4m3*       res,
-              int                  batchCount,
-              int                  m,
-              int                  n,
-              int                  k,
-              int64_t              stridea,
-              int64_t              strideb,
-              int64_t              stridec,
-              const float*         alpha,
-              const float*         beta,
-              const __nv_fp8_e4m3* input,
-              const __nv_fp8_e4m3* kernel,
-              const float*         input_scale,
-              const float*         kernel_scale,
-              const float*         output_scale,
-              cudaStream_t         stream,
-              bool                 fastAccum = true);
-
-    template<bool RELU, bool GELU>
-    void Conv1x1Gemm(__nv_fp8_e4m3*       res,
-                     int                  m,
-                     int                  n,
-                     int                  k,
-                     const __nv_fp8_e4m3* input,
-                     const __nv_fp8_e4m3* kernel,
-                     const __nv_bfloat16* bias,
-                     const float          input_scale,
-                     const float          kernel_scale,
-                     const float          output_scale,
-                     cudaStream_t         stream);
-
-    template<bool RELU, bool GELU>
-    void Gemm_Bias_Act(__nv_bfloat16*       res,
-                       int                  batchCount,
-                       int                  m,
-                       int                  n,
-                       int                  k,
-                       int64_t              stridea,
-                       int64_t              strideb,
-                       int64_t              stridec,
-                       const float*         alpha,
-                       const float*         beta,
-                       const __nv_fp8_e4m3* input,
-                       const __nv_fp8_e4m3* kernel,
-                       const float*         input_scale,
-                       const float*         kernel_scale,
-                       const __nv_bfloat16* bias,
-                       const float*         output_scale,
-                       cudaStream_t         stream);
-
-    template<bool RELU, bool GELU>
-    void Gemm_Bias_Act(__nv_fp8_e4m3*       res,
-                       int                  batchCount,
-                       int                  m,
-                       int                  n,
-                       int                  k,
-                       int64_t              stridea,
-                       int64_t              strideb,
-                       int64_t              stridec,
-                       const float*         alpha,
-                       const float*         beta,
-                       const __nv_fp8_e4m3* input,
-                       const __nv_fp8_e4m3* kernel,
-                       const float*         input_scale,
-                       const float*         kernel_scale,
-                       const __nv_bfloat16* bias,
-                       const float*         output_scale,
-                       cudaStream_t         stream);
-
-private:
-    int                         version_major_, version_minor_, version_patch_;
-    turbomind::qgmma1x1Launcher qgmmaLauncher;
-    void*                       cublas_workspace_qgemm_ = nullptr;
-};
-
-}  // namespace turbomind
diff --git a/src/turbomind/utils/cublasINT8MMWrapper.cc b/src/turbomind/utils/cublasINT8MMWrapper.cc
deleted file mode 100644
index 9afd21d088c830767009ee9c99ac5912b14c4458..0000000000000000000000000000000000000000
--- a/src/turbomind/utils/cublasINT8MMWrapper.cc
+++ /dev/null
@@ -1,559 +0,0 @@
-/*
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "cublasINT8MMWrapper.h"
-
-#ifndef CUDART_VERSION
-#error CUDART_VERSION Undefined!
-#endif
-
-namespace turbomind {
-cublasINT8MMWrapper::cublasINT8MMWrapper(cublasLtHandle_t cublaslt_handle,
-                                         cudaStream_t     stream,
-                                         cublasAlgoMap*   cublas_algo_map,
-                                         std::mutex*      mu,
-                                         bool             use_ORDER_COL32_2R_4R4):
-    cublasMMWrapper(nullptr, cublaslt_handle, stream, cublas_algo_map, mu, nullptr),
-    use_ORDER_COL32_2R_4R4_(use_ORDER_COL32_2R_4R4)
-{
-}
-
-cublasINT8MMWrapper::cublasINT8MMWrapper(cublasHandle_t   cublas_handle,
-                                         cublasLtHandle_t cublaslt_handle,
-                                         cudaStream_t     stream,
-                                         cublasAlgoMap*   cublas_algo_map,
-                                         std::mutex*      mu,
-                                         bool             use_ORDER_COL32_2R_4R4):
-    cublasMMWrapper(cublas_handle, cublaslt_handle, stream, cublas_algo_map, mu, nullptr),
-    use_ORDER_COL32_2R_4R4_(use_ORDER_COL32_2R_4R4)
-{
-}
-
-#ifdef SPARSITY_ENABLED
-cublasINT8MMWrapper::cublasINT8MMWrapper(cublasLtHandle_t   cublaslt_handle,
-                                         cusparseLtHandle_t cusparselt_handle,
-                                         cudaStream_t       stream,
-                                         cublasAlgoMap*     cublas_algo_map,
-                                         std::mutex*        mu,
-                                         bool               use_ORDER_COL32_2R_4R4):
-    cublasMMWrapper(nullptr, cublaslt_handle, cusparselt_handle, stream, cublas_algo_map, mu, nullptr),
-    use_ORDER_COL32_2R_4R4_(use_ORDER_COL32_2R_4R4)
-{
-}
-#endif
-
-cublasINT8MMWrapper::~cublasINT8MMWrapper()
-{
-    mu_ = nullptr;
-}
-
-cublasINT8MMWrapper::cublasINT8MMWrapper(const cublasINT8MMWrapper& wrapper):
-#ifdef SPARSITY_ENABLED
-    cublasMMWrapper(nullptr,
-                    wrapper.cublaslt_handle_,
-                    wrapper.cusparselt_handle_,
-                    wrapper.stream_,
-                    wrapper.cublas_algo_map_,
-                    wrapper.mu_,
-                    wrapper.allocator_),
-#else
-    cublasMMWrapper(
-        nullptr, wrapper.cublaslt_handle_, wrapper.stream_, wrapper.cublas_algo_map_, wrapper.mu_, wrapper.allocator_),
-#endif
-    use_ORDER_COL32_2R_4R4_(wrapper.use_ORDER_COL32_2R_4R4_)
-{
-}
-
-// for int8 cublasLtMM with algo
-// ATransform should be m*n, CUBLASLT_ORDER_COL32
-// kernel should be n*k, CUBLASLT_ORDER_COL4_4R2_8C or CUBLASLT_ORDER_COL32_2R_4R4
-// res is m*n, CUBLASLT_ORDER_COL32
-void cublasINT8MMWrapper::Gemm(int*          res,
-                               int           batchCount,
-                               int           m,
-                               int           n,
-                               int           k,
-                               int64_t       stridea,
-                               int64_t       strideb,
-                               int64_t       stridec,
-                               const int8_t* ATransform,
-                               const int8_t* kernel)
-{
-    mu_->lock();
-    cublasOperation_t opTranspose = CUBLAS_OP_T;
-#if (CUDART_VERSION >= 11000)
-    cublasComputeType_t computeType = CUBLAS_COMPUTE_32I;
-#else
-    cudaDataType_t computeType = CUDA_R_32I;
-#endif
-    cublasLtMatmulDesc_t   matmulDesc;
-    cublasLtMatrixLayout_t AtransformDesc = NULL;
-    cublasLtMatrixLayout_t BtransformDesc = NULL;
-    cublasLtMatrixLayout_t CtransformDesc = NULL;
-    cublasLtOrder_t        order_COL32    = CUBLASLT_ORDER_COL32;
-
-    cublasLtOrder_t order_matrixB;
-#if (CUDART_VERSION >= 11000)
-    if (use_ORDER_COL32_2R_4R4_) {
-        order_matrixB = CUBLASLT_ORDER_COL32_2R_4R4;
-    }
-    else {
-        order_matrixB = CUBLASLT_ORDER_COL4_4R2_8C;
-    }
-#else
-    order_matrixB              = CUBLASLT_ORDER_COL4_4R2_8C;
-#endif
-
-    int ldaTransform = 32 * m;
-    int ldbTransform;
-    if (use_ORDER_COL32_2R_4R4_) {
-        ldbTransform = 32 * ((n + 32 - 1) / 32) * 32;
-    }
-    else {
-        ldbTransform = 32 * ((n + 8 - 1) / 8) * 8;
-    }
-    int ldcTransform = 32 * m;
-
-    // create matmulDesc
-#if (CUDART_VERSION >= 11000)
-    cublasLtMatmulDescCreate(&matmulDesc, computeType, CUDA_R_32I);
-#else
-    cublasLtMatmulDescCreate(&matmulDesc, computeType);
-#endif
-    cublasLtMatmulDescSetAttribute(matmulDesc, CUBLASLT_MATMUL_DESC_TRANSB, &opTranspose, sizeof(cublasOperation_t));
-    cublasLtMatrixLayoutCreate(&AtransformDesc, CUDA_R_8I, m, k, ldaTransform);
-    cublasLtMatrixLayoutSetAttribute(AtransformDesc, CUBLASLT_MATRIX_LAYOUT_ORDER, &order_COL32, sizeof(order_COL32));
-    cublasLtMatrixLayoutCreate(&BtransformDesc, CUDA_R_8I, n, k, ldbTransform);
-    cublasLtMatrixLayoutSetAttribute(
-        BtransformDesc, CUBLASLT_MATRIX_LAYOUT_ORDER, &order_matrixB, sizeof(order_matrixB));
-    cublasLtMatrixLayoutCreate(&CtransformDesc, CUDA_R_32I, m, n, ldcTransform);
-    cublasLtMatrixLayoutSetAttribute(CtransformDesc, CUBLASLT_MATRIX_LAYOUT_ORDER, &order_COL32, sizeof(order_COL32));
-    if (batchCount > 1) {
-        cublasLtMatrixLayoutSetAttribute(
-            AtransformDesc, CUBLASLT_MATRIX_LAYOUT_BATCH_COUNT, &batchCount, sizeof(batchCount));
-        cublasLtMatrixLayoutSetAttribute(
-            AtransformDesc, CUBLASLT_MATRIX_LAYOUT_STRIDED_BATCH_OFFSET, &stridea, sizeof(stridea));
-        cublasLtMatrixLayoutSetAttribute(
-            BtransformDesc, CUBLASLT_MATRIX_LAYOUT_BATCH_COUNT, &batchCount, sizeof(batchCount));
-        cublasLtMatrixLayoutSetAttribute(
-            BtransformDesc, CUBLASLT_MATRIX_LAYOUT_STRIDED_BATCH_OFFSET, &strideb, sizeof(strideb));
-        cublasLtMatrixLayoutSetAttribute(
-            CtransformDesc, CUBLASLT_MATRIX_LAYOUT_BATCH_COUNT, &batchCount, sizeof(batchCount));
-        cublasLtMatrixLayoutSetAttribute(
-            CtransformDesc, CUBLASLT_MATRIX_LAYOUT_STRIDED_BATCH_OFFSET, &stridec, sizeof(stridec));
-    }
-
-    int alphaI = 1;
-    int betaI  = 0;
-
-    // get algo
-    cublasLtMatmulAlgo_t algo;
-    int                  findAlgo = 0;
-    if (cublas_algo_map_->isExist(batchCount, m, n, k, INT8_DATATYPE)) {
-        // printf("find algo %s\n", markStr.c_str());
-        findAlgo = 1;
-
-        cublasLtMatmulAlgo_info tmp_info = cublas_algo_map_->getAlgo(batchCount, m, n, k, INT8_DATATYPE);
-
-        cublasLtMatmulAlgoInit(cublaslt_handle_,
-                               computeType,
-                               CUDA_R_32I,
-                               CUDA_R_8I,
-                               CUDA_R_8I,
-                               CUDA_R_32I,
-                               CUDA_R_32I,
-                               tmp_info.algoId,
-                               &algo);
-        cublasLtMatmulAlgoConfigSetAttribute(
-            &algo, CUBLASLT_ALGO_CONFIG_CUSTOM_OPTION, &(tmp_info.customOption), sizeof(tmp_info.customOption));
-        cublasLtMatmulAlgoConfigSetAttribute(
-            &algo, CUBLASLT_ALGO_CONFIG_TILE_ID, &(tmp_info.tile), sizeof(tmp_info.tile));
-        cublasLtMatmulAlgoConfigSetAttribute(
-            &algo, CUBLASLT_ALGO_CONFIG_SPLITK_NUM, &(tmp_info.splitK_val), sizeof(tmp_info.splitK_val));
-        cublasLtMatmulAlgoConfigSetAttribute(
-            &algo, CUBLASLT_ALGO_CONFIG_CTA_SWIZZLING, &(tmp_info.swizzle), sizeof(tmp_info.swizzle));
-        cublasLtMatmulAlgoConfigSetAttribute(
-            &algo, CUBLASLT_ALGO_CONFIG_REDUCTION_SCHEME, &(tmp_info.reductionScheme), sizeof(int));
-#if (CUDART_VERSION >= 11000)
-        cublasLtMatmulAlgoConfigSetAttribute(
-            &algo, CUBLASLT_ALGO_CONFIG_STAGES_ID, &(tmp_info.stages), sizeof(tmp_info.stages));
-#endif
-    }
-    else {
-        findAlgo = 1;
-        int algoId;
-        if (use_ORDER_COL32_2R_4R4_) {
-            algoId = 7;
-        }
-        else {
-            algoId = 6;
-        }
-        int swizzle         = 0;
-        int customOption    = 0;
-        int tile            = 20;
-        int splitK_val      = 0;
-        int reductionScheme = 0;
-        cublasLtMatmulAlgoInit(
-            cublaslt_handle_, computeType, CUDA_R_32I, CUDA_R_8I, CUDA_R_8I, CUDA_R_32I, CUDA_R_32I, algoId, &algo);
-        cublasLtMatmulAlgoConfigSetAttribute(
-            &algo, CUBLASLT_ALGO_CONFIG_CUSTOM_OPTION, &(customOption), sizeof(customOption));
-        cublasLtMatmulAlgoConfigSetAttribute(&algo, CUBLASLT_ALGO_CONFIG_TILE_ID, &(tile), sizeof(tile));
-        cublasLtMatmulAlgoConfigSetAttribute(&algo, CUBLASLT_ALGO_CONFIG_SPLITK_NUM, &(splitK_val), sizeof(splitK_val));
-        cublasLtMatmulAlgoConfigSetAttribute(&algo, CUBLASLT_ALGO_CONFIG_CTA_SWIZZLING, &(swizzle), sizeof(swizzle));
-        cublasLtMatmulAlgoConfigSetAttribute(
-            &algo, CUBLASLT_ALGO_CONFIG_REDUCTION_SCHEME, &(reductionScheme), sizeof(int));
-#if (CUDART_VERSION >= 11000)
-        int stages;
-        if (use_ORDER_COL32_2R_4R4_) {
-            stages = 15;
-        }
-        else {
-            stages = 13;
-        }
-        cublasLtMatmulAlgoConfigSetAttribute(&algo, CUBLASLT_ALGO_CONFIG_STAGES_ID, &(stages), sizeof(stages));
-#endif
-    }
-
-    cublasLtMatmul(cublaslt_handle_,
-                   matmulDesc,
-                   &alphaI,
-                   ATransform,
-                   AtransformDesc,
-                   kernel,
-                   BtransformDesc,
-                   &betaI,
-                   res,
-                   CtransformDesc,
-                   res,
-                   CtransformDesc,
-                   (findAlgo == 1 ? (&algo) : NULL),
-                   NULL,
-                   0,
-                   stream_);
-
-    cublasLtMatmulDescDestroy(matmulDesc);
-    cublasLtMatrixLayoutDestroy(AtransformDesc);
-    cublasLtMatrixLayoutDestroy(BtransformDesc);
-    cublasLtMatrixLayoutDestroy(CtransformDesc);
-    sync_check_cuda_error();
-    mu_->unlock();
-}
-
-// for int8 IO cublasLtMM with algo
-// ATransform should be m*k CUBLASLT_ORDER_COL32
-// kernel should be n*k CUBLASLT_ORDER_COL4_4R2_8C
-// res is m*n CUBLASLT_ORDER_COL32
-void cublasINT8MMWrapper::Gemm(int8_t*       res,
-                               int           batchCount,
-                               int           m,
-                               int           n,
-                               int           k,
-                               int64_t       stridea,
-                               int64_t       strideb,
-                               int64_t       stridec,
-                               const float   alpha,
-                               const int8_t* ATransform,
-                               const int8_t* kernel)
-{
-    mu_->lock();
-    cublasOperation_t opTranspose = CUBLAS_OP_T;
-    // int8 gemm does not support CUBLAS_POINTER_MODE_DEVICE
-    // cublasLtPointerMode_t pointerMode = CUBLASLT_POINTER_MODE_ALPHA_DEVICE_VECTOR_BETA_ZERO;
-    cudaDataType_t scaleType = CUDA_R_32F;
-#if (CUDART_VERSION >= 11000)
-    cublasComputeType_t computeType = CUBLAS_COMPUTE_32I;
-#else
-    cudaDataType_t computeType = CUDA_R_32I;
-#endif
-    cublasLtMatmulDesc_t   matmulDesc;
-    cublasLtMatrixLayout_t AtransformDesc = NULL;
-    cublasLtMatrixLayout_t BtransformDesc = NULL;
-    cublasLtMatrixLayout_t CtransformDesc = NULL;
-    cublasLtOrder_t        order_COL32    = CUBLASLT_ORDER_COL32;
-
-    cublasLtOrder_t order_matrixB;
-#if (CUDART_VERSION >= 11000)
-    if (use_ORDER_COL32_2R_4R4_) {
-        order_matrixB = CUBLASLT_ORDER_COL32_2R_4R4;
-    }
-    else {
-        order_matrixB = CUBLASLT_ORDER_COL4_4R2_8C;
-    }
-#else
-    order_matrixB              = CUBLASLT_ORDER_COL4_4R2_8C;
-#endif
-
-    int ldaTransform = 32 * m;
-
-    int ldbTransform;
-    if (use_ORDER_COL32_2R_4R4_) {
-        ldbTransform = 32 * ((n + 32 - 1) / 32) * 32;
-    }
-    else {
-        ldbTransform = 32 * ((n + 8 - 1) / 8) * 8;
-    }
-
-    int ldcTransform = 32 * m;
-
-    // create matmulDesc
-#if (CUDART_VERSION >= 11000)
-    cublasLtMatmulDescCreate(&matmulDesc, computeType, scaleType);
-#else
-    cublasLtMatmulDescCreate(&matmulDesc, computeType);
-#endif
-    cublasLtMatmulDescSetAttribute(matmulDesc, CUBLASLT_MATMUL_DESC_TRANSB, &opTranspose, sizeof(cublasOperation_t));
-    cublasLtMatmulDescSetAttribute(matmulDesc, CUBLASLT_MATMUL_DESC_SCALE_TYPE, &scaleType, sizeof(scaleType));
-    // cublasLtMatmulDescSetAttribute(matmulDesc, CUBLASLT_MATMUL_DESC_POINTER_MODE, &pointerMode,
-    // sizeof(cublasLtPointerMode_t));
-    cublasLtMatrixLayoutCreate(&AtransformDesc, CUDA_R_8I, m, k, ldaTransform);
-    cublasLtMatrixLayoutSetAttribute(AtransformDesc, CUBLASLT_MATRIX_LAYOUT_ORDER, &order_COL32, sizeof(order_COL32));
-    cublasLtMatrixLayoutCreate(&BtransformDesc, CUDA_R_8I, n, k, ldbTransform);
-    cublasLtMatrixLayoutSetAttribute(
-        BtransformDesc, CUBLASLT_MATRIX_LAYOUT_ORDER, &order_matrixB, sizeof(order_matrixB));
-    cublasLtMatrixLayoutCreate(&CtransformDesc, CUDA_R_8I, m, n, ldcTransform);
-    cublasLtMatrixLayoutSetAttribute(CtransformDesc, CUBLASLT_MATRIX_LAYOUT_ORDER, &order_COL32, sizeof(order_COL32));
-    if (batchCount > 1) {
-        cublasLtMatrixLayoutSetAttribute(
-            AtransformDesc, CUBLASLT_MATRIX_LAYOUT_BATCH_COUNT, &batchCount, sizeof(batchCount));
-        cublasLtMatrixLayoutSetAttribute(
-            AtransformDesc, CUBLASLT_MATRIX_LAYOUT_STRIDED_BATCH_OFFSET, &stridea, sizeof(stridea));
-        cublasLtMatrixLayoutSetAttribute(
-            BtransformDesc, CUBLASLT_MATRIX_LAYOUT_BATCH_COUNT, &batchCount, sizeof(batchCount));
-        cublasLtMatrixLayoutSetAttribute(
-            BtransformDesc, CUBLASLT_MATRIX_LAYOUT_STRIDED_BATCH_OFFSET, &strideb, sizeof(strideb));
-        cublasLtMatrixLayoutSetAttribute(
-            CtransformDesc, CUBLASLT_MATRIX_LAYOUT_BATCH_COUNT, &batchCount, sizeof(batchCount));
-        cublasLtMatrixLayoutSetAttribute(
-            CtransformDesc, CUBLASLT_MATRIX_LAYOUT_STRIDED_BATCH_OFFSET, &stridec, sizeof(stridec));
-    }
-
-    // get algo
-    cublasLtMatmulAlgo_t algo;
-    int                  findAlgo = 0;
-    if (cublas_algo_map_->isExist(batchCount, m, n, k, INT8_DATATYPE)) {
-        findAlgo = 1;
-
-        cublasLtMatmulAlgo_info tmp_info = cublas_algo_map_->getAlgo(batchCount, m, n, k, INT8_DATATYPE);
-
-        cublasLtMatmulAlgoInit(cublaslt_handle_,
-                               computeType,
-                               CUDA_R_32F,
-                               CUDA_R_8I,
-                               CUDA_R_8I,
-                               CUDA_R_8I,
-                               CUDA_R_8I,
-                               tmp_info.algoId,
-                               &algo);
-        cublasLtMatmulAlgoConfigSetAttribute(
-            &algo, CUBLASLT_ALGO_CONFIG_CUSTOM_OPTION, &(tmp_info.customOption), sizeof(tmp_info.customOption));
-        cublasLtMatmulAlgoConfigSetAttribute(
-            &algo, CUBLASLT_ALGO_CONFIG_TILE_ID, &(tmp_info.tile), sizeof(tmp_info.tile));
-        cublasLtMatmulAlgoConfigSetAttribute(
-            &algo, CUBLASLT_ALGO_CONFIG_SPLITK_NUM, &(tmp_info.splitK_val), sizeof(tmp_info.splitK_val));
-        cublasLtMatmulAlgoConfigSetAttribute(
-            &algo, CUBLASLT_ALGO_CONFIG_CTA_SWIZZLING, &(tmp_info.swizzle), sizeof(tmp_info.swizzle));
-        cublasLtMatmulAlgoConfigSetAttribute(
-            &algo, CUBLASLT_ALGO_CONFIG_REDUCTION_SCHEME, &(tmp_info.reductionScheme), sizeof(int));
-#if (CUDART_VERSION >= 11000)
-        cublasLtMatmulAlgoConfigSetAttribute(
-            &algo, CUBLASLT_ALGO_CONFIG_STAGES_ID, &(tmp_info.stages), sizeof(tmp_info.stages));
-#endif
-    }
-    else {
-        findAlgo = 1;
-        int algoId;
-        if (use_ORDER_COL32_2R_4R4_) {
-            algoId = 7;
-        }
-        else {
-            algoId = 6;
-        }
-        int swizzle         = 0;
-        int customOption    = 0;
-        int tile            = 20;
-        int splitK_val      = 0;
-        int reductionScheme = 0;
-        cublasLtMatmulAlgoInit(
-            cublaslt_handle_, computeType, CUDA_R_32F, CUDA_R_8I, CUDA_R_8I, CUDA_R_8I, CUDA_R_8I, algoId, &algo);
-        cublasLtMatmulAlgoConfigSetAttribute(
-            &algo, CUBLASLT_ALGO_CONFIG_CUSTOM_OPTION, &(customOption), sizeof(customOption));
-        cublasLtMatmulAlgoConfigSetAttribute(&algo, CUBLASLT_ALGO_CONFIG_TILE_ID, &(tile), sizeof(tile));
-        cublasLtMatmulAlgoConfigSetAttribute(&algo, CUBLASLT_ALGO_CONFIG_SPLITK_NUM, &(splitK_val), sizeof(splitK_val));
-        cublasLtMatmulAlgoConfigSetAttribute(&algo, CUBLASLT_ALGO_CONFIG_CTA_SWIZZLING, &(swizzle), sizeof(swizzle));
-        cublasLtMatmulAlgoConfigSetAttribute(
-            &algo, CUBLASLT_ALGO_CONFIG_REDUCTION_SCHEME, &(reductionScheme), sizeof(int));
-#if (CUDART_VERSION >= 11000)
-        int stages;
-        if (use_ORDER_COL32_2R_4R4_) {
-            stages = 15;
-        }
-        else {
-            stages = 13;
-        }
-        cublasLtMatmulAlgoConfigSetAttribute(&algo, CUBLASLT_ALGO_CONFIG_STAGES_ID, &(stages), sizeof(stages));
-#endif
-    }
-
-    float beta = 0.0f;
-    cublasLtMatmul(cublaslt_handle_,
-                   matmulDesc,
-                   &alpha,
-                   ATransform,
-                   AtransformDesc,
-                   kernel,
-                   BtransformDesc,
-                   &beta,
-                   res,
-                   CtransformDesc,
-                   res,
-                   CtransformDesc,
-                   (findAlgo == 1 ? (&algo) : NULL),
-                   NULL,
-                   0,
-                   stream_);
-
-    cublasLtMatmulDescDestroy(matmulDesc);
-    cublasLtMatrixLayoutDestroy(AtransformDesc);
-    cublasLtMatrixLayoutDestroy(BtransformDesc);
-    cublasLtMatrixLayoutDestroy(CtransformDesc);
-    sync_check_cuda_error();
-    mu_->unlock();
-}
-
-template<typename T>
-int cublasINT8MMWrapper::getFusedINT8QKVType(const int k, const int n, const AttentionWeight<T>* attention_weights)
-{
-
-    int           fusedINT8QKV_type = 0;
-    const int8_t* Q_weight          = (const int8_t*)(attention_weights->query_weight.kernel);
-    const int8_t* K_weight          = (const int8_t*)(attention_weights->key_weight.kernel);
-    const int8_t* V_weight          = (const int8_t*)(attention_weights->value_weight.kernel);
-    // for QKV weight are DataType_ & continue
-    if ((attention_weights->query_weight.kernel + n * k == attention_weights->key_weight.kernel)
-        && (attention_weights->key_weight.kernel + n * k == attention_weights->value_weight.kernel)) {
-        fusedINT8QKV_type = 1;
-    }
-    // for QVK weight are int8 & continue
-    else if ((Q_weight + n * k == K_weight) && (K_weight + n * k == V_weight)) {
-        fusedINT8QKV_type = 2;
-    }
-    return fusedINT8QKV_type;
-}
-
-bool cublasINT8MMWrapper::getUseOrderCol322R4R4()
-{
-    return use_ORDER_COL32_2R_4R4_;
-}
-
-template int
-cublasINT8MMWrapper::getFusedINT8QKVType(const int k, const int n, const AttentionWeight<float>* attention_weights);
-
-template int
-cublasINT8MMWrapper::getFusedINT8QKVType(const int k, const int n, const AttentionWeight<half>* attention_weights);
-
-#ifdef SPARSITY_ENABLED
-// A is sparse weight [m,k], non transposed row major
-// B is activation input [k, n], non transposed col major
-void cublasINT8MMWrapper::SpGemm(
-    const int m, const int n, const int k, const float alpha, const void* A, const void* B, void* C)
-{
-    cudaDataType_t                 Atype        = CUDA_R_8I;
-    cudaDataType_t                 Btype        = CUDA_R_8I;
-    cudaDataType_t                 Ctype        = CUDA_R_8I;
-    cusparseComputeType            compute_type = CUSPARSE_COMPUTE_32I;
-    cusparseOrder_t                col_order    = CUSPARSE_ORDER_COL;
-    cusparseOrder_t                row_order    = CUSPARSE_ORDER_ROW;
-    cusparseOperation_t            opA          = CUSPARSE_OPERATION_NON_TRANSPOSE;
-    cusparseOperation_t            opB          = CUSPARSE_OPERATION_NON_TRANSPOSE;
-    cusparseLtMatmulDescriptor_t   matmul;
-    cusparseLtMatmulAlgSelection_t alg_sel;
-    cusparseLtMatmulPlan_t         plan;
-
-    auto     num_A_rows = m;
-    auto     num_A_cols = k;
-    auto     num_B_rows = k;
-    auto     num_B_cols = n;
-    auto     num_C_rows = m;
-    auto     num_C_cols = n;
-    unsigned alignment  = 16;
-    auto     lda        = num_A_cols;
-    auto     ldb        = num_B_rows;
-    auto     ldc        = num_C_rows;
-    float    _beta(0.0f);
-
-    char mark[256];
-    sprintf(mark, "%d_%d_%d_%d", 1, m, n, k);
-    if (sp_mat_A_desc_map_.find(mark) != sp_mat_A_desc_map_.end()) {
-        CHECK_CUSPARSE(cusparseLtMatmulDescriptorInit(&cusparselt_handle_,
-                                                      &matmul,
-                                                      opA,
-                                                      opB,
-                                                      &sp_mat_A_desc_map_[mark],
-                                                      &sp_mat_B_desc_map_[mark],
-                                                      &sp_mat_C_desc_map_[mark],
-                                                      &sp_mat_C_desc_map_[mark],
-                                                      compute_type))
-    }
-    else {
-        // initializing MatDesc takes a lot of time
-        cusparseLtMatDescriptor_t mat_A, mat_B, mat_C;
-        sp_mat_A_desc_map_[mark] = mat_A;
-        sp_mat_B_desc_map_[mark] = mat_B;
-        sp_mat_C_desc_map_[mark] = mat_C;
-        CHECK_CUSPARSE(cusparseLtStructuredDescriptorInit(&cusparselt_handle_,
-                                                          &sp_mat_A_desc_map_[mark],
-                                                          num_A_rows,
-                                                          num_A_cols,
-                                                          lda,
-                                                          alignment,
-                                                          Atype,
-                                                          row_order,
-                                                          CUSPARSELT_SPARSITY_50_PERCENT))
-        CHECK_CUSPARSE(cusparseLtDenseDescriptorInit(
-            &cusparselt_handle_, &sp_mat_B_desc_map_[mark], num_B_rows, num_B_cols, ldb, alignment, Btype, col_order))
-        CHECK_CUSPARSE(cusparseLtDenseDescriptorInit(
-            &cusparselt_handle_, &sp_mat_C_desc_map_[mark], num_C_rows, num_C_cols, ldc, alignment, Ctype, col_order))
-        CHECK_CUSPARSE(cusparseLtMatmulDescriptorInit(&cusparselt_handle_,
-                                                      &matmul,
-                                                      opA,
-                                                      opB,
-                                                      &sp_mat_A_desc_map_[mark],
-                                                      &sp_mat_B_desc_map_[mark],
-                                                      &sp_mat_C_desc_map_[mark],
-                                                      &sp_mat_C_desc_map_[mark],
-                                                      compute_type))
-    }
-    mu_->lock();
-    CHECK_CUSPARSE(
-        cusparseLtMatmulAlgSelectionInit(&cusparselt_handle_, &alg_sel, &matmul, CUSPARSELT_MATMUL_ALG_DEFAULT))
-    int alg = cublas_algo_map_->getSpAlgo(1, num_A_rows, num_B_cols, num_A_cols);
-    CHECK_CUSPARSE(cusparseLtMatmulAlgSetAttribute(
-        &cusparselt_handle_, &alg_sel, CUSPARSELT_MATMUL_ALG_CONFIG_ID, &alg, sizeof(alg)))
-    size_t workspace_size;
-    CHECK_CUSPARSE(cusparseLtMatmulGetWorkspace(&cusparselt_handle_, &alg_sel, &workspace_size))
-    CHECK_CUSPARSE(cusparseLtMatmulPlanInit(&cusparselt_handle_, &plan, &matmul, &alg_sel, workspace_size))
-
-    void*        d_workspace = nullptr;
-    int          num_streams = 1;
-    cudaStream_t streams[1]  = {stream_};
-    CHECK_CUSPARSE(
-        cusparseLtMatmul(&cusparselt_handle_, &plan, &alpha, A, B, &_beta, C, C, d_workspace, streams, num_streams))
-    CHECK_CUSPARSE(cusparseLtMatmulPlanDestroy(&plan))
-    sync_check_cuda_error();
-    mu_->unlock();
-}
-#endif
-}  // namespace turbomind
diff --git a/src/turbomind/utils/cublasINT8MMWrapper.h b/src/turbomind/utils/cublasINT8MMWrapper.h
deleted file mode 100644
index 631ef1f842b62732209098bfe7ad72718e312037..0000000000000000000000000000000000000000
--- a/src/turbomind/utils/cublasINT8MMWrapper.h
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "cuda_utils.h"
-#include "src/turbomind/layers/attention_layers/AttentionWeight.h"
-#include "src/turbomind/utils/cublasAlgoMap.h"
-#include "src/turbomind/utils/cublasMMWrapper.h"
-#include <cublasLt.h>
-#include <cublas_v2.h>
-#include <cuda_runtime.h>
-#include <map>
-#include <mutex>
-#include <string>
-
-#pragma once
-namespace turbomind {
-
-class cublasINT8MMWrapper: public cublasMMWrapper {
-private:
-    bool use_ORDER_COL32_2R_4R4_;
-
-public:
-    cublasINT8MMWrapper(cublasLtHandle_t cublaslt_handle_,
-                        cudaStream_t     stream,
-                        cublasAlgoMap*   map,
-                        std::mutex*      mu,
-                        bool             use_ORDER_COL32_2R_4R4);
-
-    cublasINT8MMWrapper(cublasHandle_t   cublas_handle,
-                        cublasLtHandle_t cublaslt_handle,
-                        cudaStream_t     stream,
-                        cublasAlgoMap*   map,
-                        std::mutex*      mu,
-                        bool             use_ORDER_COL32_2R_4R4);
-#ifdef SPARSITY_ENABLED
-    cublasINT8MMWrapper(cublasLtHandle_t   cublaslt_handle_,
-                        cusparseLtHandle_t cusparselt_handle,
-                        cudaStream_t       stream,
-                        cublasAlgoMap*     map,
-                        std::mutex*        mu,
-                        bool               use_ORDER_COL32_2R_4R4);
-#endif
-
-    ~cublasINT8MMWrapper();
-
-    cublasINT8MMWrapper(const cublasINT8MMWrapper& wrapper);
-
-    void Gemm(int*          res,
-              int           batchCount,
-              int           m,
-              int           n,
-              int           k,
-              int64_t       stridea,
-              int64_t       strideb,
-              int64_t       stridec,
-              const int8_t* ATransform,
-              const int8_t* kernel);
-
-    void Gemm(int8_t*       res,
-              int           batchCount,
-              int           m,
-              int           n,
-              int           k,
-              int64_t       stridea,
-              int64_t       strideb,
-              int64_t       stridec,
-              const float   alpha,
-              const int8_t* ATransform,
-              const int8_t* kernel);
-
-    template<typename T>
-    int getFusedINT8QKVType(const int k, const int n, const AttentionWeight<T>* attention_weights);
-
-    bool getUseOrderCol322R4R4();
-
-#ifdef SPARSITY_ENABLED
-    void SpGemm(const int m, const int n, const int k, const float alpha, const void* A, const void* B, void* C);
-#endif
-};
-
-}  // namespace turbomind
diff --git a/src/turbomind/utils/cublasMMWrapper.cc b/src/turbomind/utils/cublasMMWrapper.cc
deleted file mode 100644
index 9908640c846740a5e1402097f2bf0583dd990e7f..0000000000000000000000000000000000000000
--- a/src/turbomind/utils/cublasMMWrapper.cc
+++ /dev/null
@@ -1,1105 +0,0 @@
-/*
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "cublasMMWrapper.h"
-#include "cuda_utils.h"
-#include "src/turbomind/macro.h"
-
-#ifndef CUDART_VERSION
-#error CUDART_VERSION Undefined!
-#endif
-
-namespace turbomind {
-cublasMMWrapper::cublasMMWrapper(cublasHandle_t   cublas_handle,
-                                 cublasLtHandle_t cublaslt_handle,
-                                 cudaStream_t     stream,
-                                 cublasAlgoMap*   cublas_algo_map,
-                                 std::mutex*      mu,
-                                 IAllocator*      allocator):
-    cublas_handle_(cublas_handle),
-    cublaslt_handle_(cublaslt_handle),
-    stream_(stream),
-    cublas_algo_map_(cublas_algo_map),
-    mu_(mu),
-    allocator_(allocator)
-{
-    TM_LOG_DEBUG(__PRETTY_FUNCTION__);
-    if (allocator_ != nullptr) {
-        cublas_workspace_ = allocator_->reMalloc(cublas_workspace_, CUBLAS_WORKSPACE_SIZE, false);
-    }
-}
-
-#ifdef SPARSITY_ENABLED
-cublasMMWrapper::cublasMMWrapper(cublasHandle_t     cublas_handle,
-                                 cublasLtHandle_t   cublaslt_handle,
-                                 cusparseLtHandle_t cusparselt_handle,
-                                 cudaStream_t       stream,
-                                 cublasAlgoMap*     cublas_algo_map,
-                                 std::mutex*        mu,
-                                 IAllocator*        allocator):
-    cublas_handle_(cublas_handle),
-    cublaslt_handle_(cublaslt_handle),
-    cusparselt_handle_(cusparselt_handle),
-    stream_(stream),
-    cublas_algo_map_(cublas_algo_map),
-    mu_(mu),
-    allocator_(allocator)
-{
-    TM_LOG_DEBUG(__PRETTY_FUNCTION__);
-    if (allocator_ != nullptr) {
-        cublas_workspace_ = allocator_->reMalloc(cublas_workspace_, CUBLAS_WORKSPACE_SIZE, false);
-    }
-}
-#endif
-
-cublasMMWrapper::~cublasMMWrapper()
-{
-    TM_LOG_DEBUG(__PRETTY_FUNCTION__);
-    mu_ = nullptr;
-    if (allocator_ != nullptr) {
-        allocator_->free((void**)(&cublas_workspace_));
-        allocator_ = nullptr;
-    }
-}
-
-cublasMMWrapper::cublasMMWrapper(const cublasMMWrapper& wrapper):
-    cublas_handle_(wrapper.cublas_handle_),
-    cublaslt_handle_(wrapper.cublaslt_handle_),
-#ifdef SPARSITY_ENABLED
-    cusparselt_handle_(wrapper.cusparselt_handle_),
-#endif
-    stream_(wrapper.stream_),
-    cublas_algo_map_(wrapper.cublas_algo_map_),
-    mu_(wrapper.mu_),
-    allocator_(wrapper.allocator_)
-{
-    TM_LOG_DEBUG(__PRETTY_FUNCTION__);
-    if (allocator_ != nullptr) {
-        cublas_workspace_ = allocator_->reMalloc(cublas_workspace_, CUBLAS_WORKSPACE_SIZE, false);
-    }
-}
-
-void cublasMMWrapper::Gemm(cublasOperation_t transa,
-                           cublasOperation_t transb,
-                           const int         m,
-                           const int         n,
-                           const int         k,
-                           const void*       alpha,
-                           const void*       A,
-                           cudaDataType_t    Atype,
-                           int               lda,
-                           const void*       B,
-                           cudaDataType_t    Btype,
-                           int               ldb,
-                           const void*       beta,
-                           void*             C,
-                           cudaDataType_t    Ctype,
-                           int               ldc,
-                           cudaDataType_t    computeType,
-                           cublasGemmAlgo_t  algo)
-{
-    TM_LOG_DEBUG(__PRETTY_FUNCTION__);
-    mu_->lock();
-    check_cuda_error(cublasGemmEx(cublas_handle_,
-                                  transa,
-                                  transb,
-                                  m,
-                                  n,
-                                  k,
-                                  alpha,
-                                  A,
-                                  Atype,
-                                  lda,
-                                  B,
-                                  Btype,
-                                  ldb,
-                                  beta,
-                                  C,
-                                  Ctype,
-                                  ldc,
-                                  computeType,
-                                  algo));
-    sync_check_cuda_error();
-    mu_->unlock();
-}
-
-void cublasMMWrapper::Gemm(cublasOperation_t transa,
-                           cublasOperation_t transb,
-                           const int         m,
-                           const int         n,
-                           const int         k,
-                           const void*       A,
-                           const int         lda,
-                           const void*       B,
-                           const int         ldb,
-                           void*             C,
-                           const int         ldc)
-{
-    TM_LOG_DEBUG(__PRETTY_FUNCTION__);
-    Gemm(transa, transb, m, n, k, A, lda, B, ldb, C, ldc, 1.0f, 0.0f);
-}
-
-void cublasMMWrapper::Gemm(cublasOperation_t transa,
-                           cublasOperation_t transb,
-                           const int         m,
-                           const int         n,
-                           const int         k,
-                           const void*       A,
-                           const int         lda,
-                           const void*       B,
-                           const int         ldb,
-                           void*             C,
-                           const int         ldc,
-                           float             f_alpha,
-                           float             f_beta)
-{
-    TM_LOG_DEBUG(__PRETTY_FUNCTION__);
-    half h_alpha = (half)(f_alpha);
-    half h_beta  = (half)(f_beta);
-
-    mu_->lock();
-    // TODO: default cublas libs
-    int  is_fp16_computeType = computeType_ == CUDA_R_16F ? 1 : 0;
-    bool using_cublasLt      = (Atype_ == CUDA_R_16F) ? true : false;
-    int  batch_count         = 1;
-    // fp32 use cublas as default
-    // fp16 use cublasLt as default
-    const void* alpha = is_fp16_computeType ? reinterpret_cast<void*>(&h_alpha) : reinterpret_cast<void*>(&f_alpha);
-    const void* beta  = is_fp16_computeType ? reinterpret_cast<void*>(&h_beta) : reinterpret_cast<void*>(&f_beta);
-
-    int findAlgo = cublas_algo_map_->isExist(batch_count, m, n, k, getCublasDataType(Atype_));
-
-    cublasLtMatmulAlgo_info info = cublas_algo_map_->getAlgo(batch_count, m, n, k, getCublasDataType(Atype_));
-    if (findAlgo) {
-        if (info.stages != -1) {
-            using_cublasLt = false;
-        }
-        else {
-            using_cublasLt = false;
-        }
-    }
-
-    // if (using_cublasLt) {
-    if (0) {
-        cublasLtMatmulDesc_t   operationDesc = NULL;
-        cublasLtMatrixLayout_t Adesc = NULL, Bdesc = NULL, Cdesc = NULL;
-        cudaDataType_t         scaleType;
-#if (CUDART_VERSION >= 11000)
-        cublasComputeType_t computeType;
-#else
-        cudaDataType_t computeType;
-#endif
-
-        if (is_fp16_computeType) {
-#if (CUDART_VERSION >= 11000)
-            computeType = CUBLAS_COMPUTE_16F;
-#else
-            computeType = CUDA_R_16F;
-#endif
-            scaleType = CUDA_R_16F;
-        }
-        else {
-#if (CUDART_VERSION >= 11000)
-            computeType = CUBLAS_COMPUTE_32F;
-#else
-            computeType = CUDA_R_32F;
-#endif
-            scaleType = CUDA_R_32F;
-        }
-
-        // --------------------------------------
-        // Create descriptors for the original matrices
-        cublasLtMatrixLayoutCreate(&Adesc, Atype_, transa == CUBLAS_OP_N ? m : k, transa == CUBLAS_OP_N ? k : m, lda);
-        cublasLtMatrixLayoutCreate(&Bdesc, Btype_, transb == CUBLAS_OP_N ? k : n, transb == CUBLAS_OP_N ? n : k, ldb);
-        cublasLtMatrixLayoutCreate(&Cdesc, Ctype_, m, n, ldc);
-#if (CUDART_VERSION >= 11000)
-        cublasLtMatmulDescCreate(&operationDesc, computeType, scaleType);
-#else
-        cublasLtMatmulDescCreate(&operationDesc, computeType);
-#endif
-
-        cublasLtMatmulDescSetAttribute(operationDesc, CUBLASLT_MATMUL_DESC_TRANSA, &transa, sizeof(cublasOperation_t));
-        cublasLtMatmulDescSetAttribute(operationDesc, CUBLASLT_MATMUL_DESC_TRANSB, &transb, sizeof(cublasOperation_t));
-
-        cublasLtMatmulAlgo_t algo;
-        void*                workSpace     = cublas_workspace_;
-        int                  workspaceSize = cublas_workspace_ == NULL ? 0 : CUBLAS_WORKSPACE_SIZE;
-        if (findAlgo) {
-            if (info.workspaceSize > workspaceSize) {
-                findAlgo = 0;
-            }
-            else {
-                cublasLtMatmulAlgoInit(
-                    cublaslt_handle_, computeType, scaleType, Atype_, Btype_, Ctype_, Ctype_, info.algoId, &algo);
-                cublasLtMatmulAlgoConfigSetAttribute(
-                    &algo, CUBLASLT_ALGO_CONFIG_CUSTOM_OPTION, &(info.customOption), sizeof(info.customOption));
-                cublasLtMatmulAlgoConfigSetAttribute(
-                    &algo, CUBLASLT_ALGO_CONFIG_TILE_ID, &(info.tile), sizeof(info.tile));
-                cublasLtMatmulAlgoConfigSetAttribute(
-                    &algo, CUBLASLT_ALGO_CONFIG_SPLITK_NUM, &(info.splitK_val), sizeof(info.splitK_val));
-                cublasLtMatmulAlgoConfigSetAttribute(
-                    &algo, CUBLASLT_ALGO_CONFIG_CTA_SWIZZLING, &(info.swizzle), sizeof(info.swizzle));
-                cublasLtMatmulAlgoConfigSetAttribute(&algo,
-                                                     CUBLASLT_ALGO_CONFIG_REDUCTION_SCHEME,
-                                                     &(info.reductionScheme),
-                                                     sizeof(info.reductionScheme));
-
-#if (CUDART_VERSION >= 11000)
-                cublasLtMatmulAlgoConfigSetAttribute(
-                    &algo, CUBLASLT_ALGO_CONFIG_STAGES_ID, &(info.stages), sizeof(info.stages));
-#endif
-
-#if (CUBLAS_VER_MAJOR == 11 && CUBLAS_VER_MINOR == 11 && CUBLAS_VER_PATCH >= 3)
-                cublasLtMatmulAlgoConfigSetAttribute(
-                    &algo, CUBLASLT_ALGO_CONFIG_INNER_SHAPE_ID, &(info.inner_shapeId), sizeof(info.inner_shapeId));
-                cublasLtMatmulAlgoConfigSetAttribute(&algo,
-                                                     CUBLASLT_ALGO_CONFIG_CLUSTER_SHAPE_ID,
-                                                     &(info.cluster_shapeId),
-                                                     sizeof(info.cluster_shapeId));
-#elif (CUBLAS_VER_MAJOR == 11 && CUBLAS_VER_MINOR == 11 && CUBLAS_VER_PATCH < 3)
-                cublasLtMatmulAlgoConfigSetAttribute(
-                    &algo, CUBLASLT_ALGO_CONFIG_MMA_SHAPE_ID, &(info.mma_shapeId), sizeof(info.mma_shapeId));
-                cublasLtMatmulAlgoConfigSetAttribute(
-                    &algo, CUBLASLT_ALGO_CONFIG_CGA_SHAPE_ID, &(info.cga_shapeId), sizeof(info.cga_shapeId));
-                cublasLtMatmulAlgoConfigSetAttribute(
-                    &algo, CUBLASLT_ALGO_CONFIG_SCHEDULING_MODE, &(info.sche_mode), sizeof(info.sche_mode));
-#endif
-            }
-        }
-
-        // cublasLtMatmul(cublaslt_handle_,
-        //                operationDesc,
-        //                alpha,
-        //                A,
-        //                Adesc,
-        //                B,
-        //                Bdesc,
-        //                beta,
-        //                C,
-        //                Cdesc,
-        //                C,
-        //                Cdesc,
-        //                (findAlgo == 1 ? (&algo) : NULL),
-        //                workSpace,
-        //                workspaceSize,
-        //                stream_);
-
-        cublasLtMatmulDescDestroy(operationDesc);
-        cublasLtMatrixLayoutDestroy(Adesc);
-        cublasLtMatrixLayoutDestroy(Bdesc);
-        cublasLtMatrixLayoutDestroy(Cdesc);
-        sync_check_cuda_error();
-    }
-    else {
-        int cublasAlgo = info.algoId;
-        check_cuda_error(cublasGemmEx(cublas_handle_,
-                                      transa,
-                                      transb,
-                                      m,
-                                      n,
-                                      k,
-                                      alpha,
-                                      A,
-                                      Atype_,
-                                      lda,
-                                      B,
-                                      Btype_,
-                                      ldb,
-                                      beta,
-                                      C,
-                                      Ctype_,
-                                      ldc,
-                                      computeType_,
-                                      static_cast<cublasGemmAlgo_t>(cublasAlgo)));
-        sync_check_cuda_error();
-    }
-    mu_->unlock();
-}
-
-void cublasMMWrapper::setFP32GemmConfig()
-{
-    Atype_       = CUDA_R_32F;
-    Btype_       = CUDA_R_32F;
-    Ctype_       = CUDA_R_32F;
-    computeType_ = CUDA_R_32F;
-}
-
-void cublasMMWrapper::setFP16GemmConfig()
-{
-    Atype_       = CUDA_R_16F;
-    Btype_       = CUDA_R_16F;
-    Ctype_       = CUDA_R_16F;
-    computeType_ = CUDA_R_16F;
-}
-
-#ifdef ENABLE_BF16
-void cublasMMWrapper::setBF16GemmConfig()
-{
-    Atype_       = CUDA_R_16BF;
-    Btype_       = CUDA_R_16BF;
-    Ctype_       = CUDA_R_16BF;
-    computeType_ = CUDA_R_32F;
-}
-#endif
-
-void cublasMMWrapper::setGemmConfig(cudaDataType_t aType,
-                                    cudaDataType_t bType,
-                                    cudaDataType_t cType,
-                                    cudaDataType_t computeType)
-{
-    Atype_       = aType;
-    Btype_       = bType;
-    Ctype_       = cType;
-    computeType_ = computeType;
-}
-
-CublasDataType cublasMMWrapper::getCublasDataType(cudaDataType_t data_type)
-{
-    if (data_type == CUDA_R_16F) {
-        return HALF_DATATYPE;
-    }
-    else if (data_type == CUDA_R_32F) {
-        return FLOAT_DATATYPE;
-    }
-#ifdef ENABLE_BF16
-    else if (data_type == CUDA_R_16BF) {
-        return BFLOAT16_DATATYPE;
-    }
-#endif
-    return FLOAT_DATATYPE;
-}
-
-#if (CUDART_VERSION >= 11000)
-// input, weight, output are row-major
-// only works for cublas 11.x
-void cublasMMWrapper::Gemm(cublasOperation_t transa,
-                           cublasOperation_t transb,
-                           const int         m,
-                           const int         n,
-                           const int         k,
-                           const void*       A,
-                           const int         lda,
-                           const void*       B,
-                           const int         ldb,
-                           const void*       bias,
-                           void*             C,
-                           const int         ldc)
-{
-    TM_LOG_DEBUG(__PRETTY_FUNCTION__);
-    cudaDataType_t      Atype, Btype, Ctype;
-    cublasComputeType_t computeType;
-    cudaDataType_t      scaleType;
-    float               alpha_float = 1.0f;
-    float               beta_float  = 0.0f;
-    half                alpha_half  = half(1.0f);
-    half                beta_half   = half(0.0f);
-    void *              alpha, *beta;
-
-    // int is_fp16_computeType = computeType_ == CUDA_R_16F ? 1 : 0;
-    if (Atype_ == CUDA_R_32F) {
-        computeType = CUBLAS_COMPUTE_32F_FAST_TF32;
-        Atype       = CUDA_R_32F;
-        Btype       = CUDA_R_32F;
-        Ctype       = CUDA_R_32F;
-        scaleType   = CUDA_R_32F;
-        alpha       = &alpha_float;
-        beta        = &beta_float;
-    }
-    else if (Atype_ == CUDA_R_16BF) {
-        computeType = CUBLAS_COMPUTE_32F_FAST_TF32;
-        Atype       = CUDA_R_16BF;
-        Btype       = CUDA_R_16BF;
-        Ctype       = CUDA_R_16BF;
-        scaleType   = CUDA_R_32F;
-        alpha       = &alpha_float;
-        beta        = &beta_float;
-    }
-    else {
-        computeType = CUBLAS_COMPUTE_16F;
-        Atype       = CUDA_R_16F;
-        Btype       = CUDA_R_16F;
-        Ctype       = CUDA_R_16F;
-        scaleType   = CUDA_R_16F;
-        alpha       = &alpha_half;
-        beta        = &beta_half;
-    }
-
-    cublasLtMatmulDesc_t   operationDesc = NULL;
-    cublasLtMatrixLayout_t Adesc = NULL, Bdesc = NULL, Cdesc = NULL;
-    cublasLtEpilogue_t     epi = CUBLASLT_EPILOGUE_BIAS;
-    cublasLtMatrixLayoutCreate(&Adesc, Atype, (transa == CUBLAS_OP_N) ? m : k, (transa == CUBLAS_OP_N) ? k : m, lda);
-    cublasLtMatrixLayoutCreate(&Bdesc, Btype, (transb == CUBLAS_OP_N) ? k : n, (transb == CUBLAS_OP_N) ? n : k, ldb);
-    cublasLtMatrixLayoutCreate(&Cdesc, Ctype, m, n, ldc);
-
-    cublasLtMatmulDescCreate(&operationDesc, computeType, scaleType);
-    cublasLtMatmulDescSetAttribute(operationDesc, CUBLASLT_MATMUL_DESC_TRANSA, &transa, sizeof(cublasOperation_t));
-    cublasLtMatmulDescSetAttribute(operationDesc, CUBLASLT_MATMUL_DESC_TRANSB, &transb, sizeof(cublasOperation_t));
-    cublasLtMatmulDescSetAttribute(operationDesc, CUBLASLT_MATMUL_DESC_EPILOGUE, &epi, sizeof(cublasLtEpilogue_t));
-    cublasLtMatmulDescSetAttribute(operationDesc, CUBLASLT_MATMUL_DESC_BIAS_POINTER, &bias, sizeof(const void*));
-    // check_cuda_error(cublasLtMatmul(
-    //     cublaslt_handle_, operationDesc, alpha, A, Adesc, B, Bdesc, beta, C, Cdesc, C, Cdesc, NULL, NULL, 0, stream_));
-    cublasLtMatrixLayoutDestroy(Adesc);
-    cublasLtMatrixLayoutDestroy(Bdesc);
-    cublasLtMatrixLayoutDestroy(Cdesc);
-    cublasLtMatmulDescDestroy(operationDesc);
-}
-#endif
-void cublasMMWrapper::setStream(cudaStream_t stream)
-{
-    stream_ = stream;
-}
-
-void cublasMMWrapper::stridedBatchedGemm(cublasOperation_t transa,
-                                         cublasOperation_t transb,
-                                         const int         m,
-                                         const int         n,
-                                         const int         k,
-                                         const void*       A,
-                                         const int         lda,
-                                         const int64_t     strideA,
-                                         const void*       B,
-                                         const int         ldb,
-                                         const int64_t     strideB,
-                                         void*             C,
-                                         const int         ldc,
-                                         const int64_t     strideC,
-                                         const int         batch_count,
-                                         const float       f_alpha,
-                                         const float       f_beta)
-{
-    half h_alpha = (half)f_alpha;
-    half h_beta  = (half)f_beta;
-
-    mu_->lock();
-    int         is_fp16_computeType = computeType_ == CUDA_R_16F ? 1 : 0;
-    const void* alpha =
-        is_fp16_computeType ? reinterpret_cast<void*>(&h_alpha) : reinterpret_cast<const void*>(&f_alpha);
-    const void* beta = is_fp16_computeType ? reinterpret_cast<void*>(&h_beta) : reinterpret_cast<const void*>(&f_beta);
-    cublasLtMatmulAlgo_info info = cublas_algo_map_->getAlgo(batch_count, m, n, k, getCublasDataType(Atype_));
-
-    check_cuda_error(cublasGemmStridedBatchedEx(cublas_handle_,
-                                                transa,
-                                                transb,
-                                                m,
-                                                n,
-                                                k,
-                                                alpha,
-                                                A,
-                                                Atype_,
-                                                lda,
-                                                strideA,
-                                                B,
-                                                Btype_,
-                                                ldb,
-                                                strideB,
-                                                beta,
-                                                C,
-                                                Ctype_,
-                                                ldc,
-                                                strideC,
-                                                batch_count,
-                                                computeType_,
-                                                static_cast<cublasGemmAlgo_t>(info.algoId)));
-
-    mu_->unlock();
-}
-
-void cublasMMWrapper::stridedBatchedGemm(cublasOperation_t transa,
-                                         cublasOperation_t transb,
-                                         const int         m,
-                                         const int         n,
-                                         const int         k,
-                                         const float       f_alpha,
-                                         const void*       A,
-                                         cudaDataType_t    AType,
-                                         const int         lda,
-                                         const int64_t     strideA,
-                                         const void*       B,
-                                         cudaDataType_t    BType,
-                                         const int         ldb,
-                                         const int64_t     strideB,
-                                         const float       f_beta,
-                                         void*             C,
-                                         cudaDataType_t    CType,
-                                         const int         ldc,
-                                         const int64_t     strideC,
-                                         const int         batch_count,
-                                         cudaDataType_t    computeType)
-{
-    half h_alpha = (half)f_alpha;
-    half h_beta  = (half)f_beta;
-
-    mu_->lock();
-    int         is_fp16_computeType = computeType == CUDA_R_16F ? 1 : 0;
-    const void* alpha =
-        is_fp16_computeType ? reinterpret_cast<void*>(&h_alpha) : reinterpret_cast<const void*>(&f_alpha);
-    const void* beta = is_fp16_computeType ? reinterpret_cast<void*>(&h_beta) : reinterpret_cast<const void*>(&f_beta);
-    cublasLtMatmulAlgo_info info = cublas_algo_map_->getAlgo(batch_count, m, n, k, getCublasDataType(Atype_));
-
-    check_cuda_error(cublasGemmStridedBatchedEx(cublas_handle_,
-                                                transa,
-                                                transb,
-                                                m,
-                                                n,
-                                                k,
-                                                alpha,
-                                                A,
-                                                AType,
-                                                lda,
-                                                strideA,
-                                                B,
-                                                BType,
-                                                ldb,
-                                                strideB,
-                                                beta,
-                                                C,
-                                                CType,
-                                                ldc,
-                                                strideC,
-                                                batch_count,
-                                                computeType,
-                                                static_cast<cublasGemmAlgo_t>(info.algoId)));
-
-    mu_->unlock();
-}
-
-void cublasMMWrapper::batchedGemm(cublasOperation_t  transa,
-                                  cublasOperation_t  transb,
-                                  const int          m,
-                                  const int          n,
-                                  const int          k,
-                                  const void* const* A,
-                                  const int          lda,
-                                  const void* const* B,
-                                  const int          ldb,
-                                  void* const*       C,
-                                  const int          ldc,
-                                  const int          batch_count)
-{
-    float f_alpha = static_cast<float>(1.0f);
-    float f_beta  = static_cast<float>(0.0f);
-
-    half h_alpha = (half)1.0f;
-    half h_beta  = (half)0.0f;
-
-    mu_->lock();
-    int         is_fp16_computeType = computeType_ == CUDA_R_16F ? 1 : 0;
-    const void* alpha = is_fp16_computeType ? reinterpret_cast<void*>(&h_alpha) : reinterpret_cast<void*>(&f_alpha);
-    const void* beta  = is_fp16_computeType ? reinterpret_cast<void*>(&h_beta) : reinterpret_cast<void*>(&f_beta);
-    cublasLtMatmulAlgo_info info = cublas_algo_map_->getAlgo(batch_count, m, n, k, getCublasDataType(Atype_));
-
-    check_cuda_error(cublasGemmBatchedEx(cublas_handle_,
-                                         transa,
-                                         transb,
-                                         m,
-                                         n,
-                                         k,
-                                         alpha,
-                                         A,
-                                         Atype_,
-                                         lda,
-                                         B,
-                                         Btype_,
-                                         ldb,
-                                         beta,
-                                         C,
-                                         Ctype_,
-                                         ldc,
-                                         batch_count,
-                                         computeType_,
-                                         static_cast<cublasGemmAlgo_t>(info.algoId)));
-    mu_->unlock();
-}
-
-bool cublasMMWrapper::isFuseBatchGemm(const int batch_count, const int m, const int k, const int n)
-{
-    CublasDataType data_type = getCublasDataType(Atype_);
-
-    if (cublas_algo_map_->isExist(batch_count, m, k, n, data_type) == false
-        || cublas_algo_map_->isExist(1, m, k, n, data_type) == false) {
-        return false;
-    }
-    else {
-        return cublas_algo_map_->getAlgo(batch_count, m, k, n, data_type).exec_time
-               < 3 * cublas_algo_map_->getAlgo(1, m, k, n, data_type).exec_time;
-    }
-}
-
-#ifdef SPARSITY_ENABLED
-void cublasMMWrapper::SpGemm(cublasOperation_t transa,
-                             cublasOperation_t transb,
-                             const int         m,
-                             const int         n,
-                             const int         k,
-                             const void*       A,
-                             const void*       B,
-                             void*             C)
-{
-    if (Atype_ != CUDA_R_16F || Btype_ != CUDA_R_16F || Ctype_ != CUDA_R_16F) {
-        throw std::runtime_error("\n[TM][ERROR] sparse GEMM only supports FP16 data type now.");
-    }
-    static bool not_printed_fp32_accumulation_warning = true;
-    if (computeType_ != CUDA_R_16F && not_printed_fp32_accumulation_warning) {
-        printf("[TM][WARNING] cublasMMWrapper sets to FP32 compute type, "
-               "but sparse gemm will use FP16 compute type since cusparselt "
-               "supports FP16 accumulation only.\n");
-        not_printed_fp32_accumulation_warning = false;
-    }
-    cusparseOrder_t     order = CUSPARSE_ORDER_COL;
-    cusparseOperation_t opA = (transa == CUBLAS_OP_N) ? CUSPARSE_OPERATION_NON_TRANSPOSE : CUSPARSE_OPERATION_TRANSPOSE;
-    cusparseOperation_t opB = (transb == CUBLAS_OP_N) ? CUSPARSE_OPERATION_NON_TRANSPOSE : CUSPARSE_OPERATION_TRANSPOSE;
-    cusparseComputeType compute_type = CUSPARSE_COMPUTE_16F;
-    cusparseLtMatmulDescriptor_t   matmul;
-    cusparseLtMatmulAlgSelection_t alg_sel;
-    cusparseLtMatmulPlan_t         plan;
-
-    bool     is_rowmajor    = (order == CUSPARSE_ORDER_ROW);
-    bool     isA_transposed = (opA != CUSPARSE_OPERATION_NON_TRANSPOSE);
-    bool     isB_transposed = (opB != CUSPARSE_OPERATION_NON_TRANSPOSE);
-    auto     num_A_rows     = (isA_transposed) ? k : m;
-    auto     num_A_cols     = (isA_transposed) ? m : k;
-    auto     num_B_rows     = (isB_transposed) ? n : k;
-    auto     num_B_cols     = (isB_transposed) ? k : n;
-    auto     num_C_rows     = m;
-    auto     num_C_cols     = n;
-    unsigned alignment      = 16;
-    auto     lda            = (is_rowmajor) ? num_A_cols : num_A_rows;
-    auto     ldb            = (is_rowmajor) ? num_B_cols : num_B_rows;
-    auto     ldc            = (is_rowmajor) ? num_C_cols : num_C_rows;
-    float    _alpha(1.0f);
-    float    _beta(0.0f);
-
-    char mark[256];
-    sprintf(mark, "%d_%d_%d_%d", 1, m, n, k);
-    if (sp_mat_A_desc_map_.find(mark) != sp_mat_A_desc_map_.end()) {
-        CHECK_CUSPARSE(cusparseLtMatmulDescriptorInit(&cusparselt_handle_,
-                                                      &matmul,
-                                                      opA,
-                                                      opB,
-                                                      &sp_mat_A_desc_map_[mark],
-                                                      &sp_mat_B_desc_map_[mark],
-                                                      &sp_mat_C_desc_map_[mark],
-                                                      &sp_mat_C_desc_map_[mark],
-                                                      compute_type))
-    }
-    else {
-        // initializing MatDesc takes a lot of time
-        cusparseLtMatDescriptor_t mat_A, mat_B, mat_C;
-        sp_mat_A_desc_map_[mark] = mat_A;
-        sp_mat_B_desc_map_[mark] = mat_B;
-        sp_mat_C_desc_map_[mark] = mat_C;
-        CHECK_CUSPARSE(cusparseLtStructuredDescriptorInit(&cusparselt_handle_,
-                                                          &sp_mat_A_desc_map_[mark],
-                                                          num_A_rows,
-                                                          num_A_cols,
-                                                          lda,
-                                                          alignment,
-                                                          Atype_,
-                                                          order,
-                                                          CUSPARSELT_SPARSITY_50_PERCENT))
-        CHECK_CUSPARSE(cusparseLtDenseDescriptorInit(
-            &cusparselt_handle_, &sp_mat_B_desc_map_[mark], num_B_rows, num_B_cols, ldb, alignment, Btype_, order))
-        CHECK_CUSPARSE(cusparseLtDenseDescriptorInit(
-            &cusparselt_handle_, &sp_mat_C_desc_map_[mark], num_C_rows, num_C_cols, ldc, alignment, Ctype_, order))
-        CHECK_CUSPARSE(cusparseLtMatmulDescriptorInit(&cusparselt_handle_,
-                                                      &matmul,
-                                                      opA,
-                                                      opB,
-                                                      &sp_mat_A_desc_map_[mark],
-                                                      &sp_mat_B_desc_map_[mark],
-                                                      &sp_mat_C_desc_map_[mark],
-                                                      &sp_mat_C_desc_map_[mark],
-                                                      compute_type))
-    }
-    mu_->lock();
-    CHECK_CUSPARSE(
-        cusparseLtMatmulAlgSelectionInit(&cusparselt_handle_, &alg_sel, &matmul, CUSPARSELT_MATMUL_ALG_DEFAULT))
-    int alg = cublas_algo_map_->getSpAlgo(1, num_A_rows, num_B_cols, num_A_cols);
-    CHECK_CUSPARSE(cusparseLtMatmulAlgSetAttribute(
-        &cusparselt_handle_, &alg_sel, CUSPARSELT_MATMUL_ALG_CONFIG_ID, &alg, sizeof(alg)))
-    size_t workspace_size;
-    CHECK_CUSPARSE(cusparseLtMatmulGetWorkspace(&cusparselt_handle_, &alg_sel, &workspace_size))
-    CHECK_CUSPARSE(cusparseLtMatmulPlanInit(&cusparselt_handle_, &plan, &matmul, &alg_sel, workspace_size))
-
-    void*        d_workspace = nullptr;
-    int          num_streams = 1;
-    cudaStream_t streams[1]  = {stream_};
-    CHECK_CUSPARSE(
-        cusparseLtMatmul(&cusparselt_handle_, &plan, &_alpha, A, B, &_beta, C, C, d_workspace, streams, num_streams))
-    CHECK_CUSPARSE(cusparseLtMatmulPlanDestroy(&plan))
-    sync_check_cuda_error();
-    mu_->unlock();
-}
-
-size_t cublasMMWrapper::getSparseMatrixSize(int m, int k)
-{
-    // Get a compressed matrix size of shape (m, k) used in cusparselt.
-    auto            Atype_     = CUDA_R_16F;
-    cusparseOrder_t order      = CUSPARSE_ORDER_COL;
-    unsigned        alignment  = 16;
-    int             num_A_rows = m;
-    int             num_A_cols = k;
-    int             lda        = num_A_rows;
-
-    cusparseLtMatDescriptor_t mat_A;
-    CHECK_CUSPARSE(cusparseLtStructuredDescriptorInit(&cusparselt_handle_,
-                                                      &mat_A,
-                                                      num_A_rows,
-                                                      num_A_cols,
-                                                      lda,
-                                                      alignment,
-                                                      Atype_,
-                                                      order,
-                                                      CUSPARSELT_SPARSITY_50_PERCENT));
-    size_t compressed_size = 0;
-    CHECK_CUSPARSE(cusparseLtSpMMACompressedSize2(&cusparselt_handle_, &mat_A, &compressed_size));
-    return compressed_size;
-}
-
-void cublasMMWrapper::compressMatrix(const void* input, void* output, const int m, const int k)
-{
-    cusparseOrder_t           order = CUSPARSE_ORDER_COL;
-    cusparseOperation_t       opA   = CUSPARSE_OPERATION_NON_TRANSPOSE;
-    cusparseLtMatDescriptor_t mat_A;
-    unsigned                  alignment = 16;
-    CHECK_CUSPARSE(cusparseLtStructuredDescriptorInit(
-        &cusparselt_handle_, &mat_A, m, k, m, alignment, CUDA_R_16F, order, CUSPARSELT_SPARSITY_50_PERCENT))
-    CHECK_CUSPARSE(cusparseLtSpMMACompress2(&cusparselt_handle_, &mat_A, true, opA, input, output, stream_))
-    sync_check_cuda_error();
-}
-
-bool cublasMMWrapper::isUseSparse(const int batch_count, const int m, const int n, const int k)
-{
-    return cublas_algo_map_->isUseSparse(batch_count, m, n, k);
-}
-#endif
-
-std::pair<bool, cublasLtMatmulAlgo_t> cublasMMWrapper::findBestAlgo(cublasLtHandle_t       lightHandle,
-                                                                    cublasLtMatmulDesc_t   computeDesc,
-                                                                    const void*            alpha,
-                                                                    const void*            A,
-                                                                    cublasLtMatrixLayout_t Adesc,
-                                                                    const void*            B,
-                                                                    cublasLtMatrixLayout_t Bdesc,
-                                                                    const void*            beta,
-                                                                    const void*            C,
-                                                                    cublasLtMatrixLayout_t Cdesc,
-                                                                    void*                  D,
-                                                                    cublasLtMatrixLayout_t Ddesc,
-                                                                    cudaStream_t           stream)
-{
-#if (CUBLAS_VERSION) <= 11601
-    FT_CHECK_WITH_INFO(false, "CUBLAS version too low.");
-    return {false, cublasLtMatmulAlgo_t{}};
-#else
-    size_t returnSize;
-    int32_t pointer_mode;
-    cublasLtMatmulDescGetAttribute(
-        computeDesc, CUBLASLT_MATMUL_DESC_POINTER_MODE, &pointer_mode, sizeof(pointer_mode), &returnSize);
-
-    std::vector<cublasLtMatmulHeuristicResult_t> heuristics(200);
-    cublasLtMatmulPreference_t preference;
-    check_cuda_error(cublasLtMatmulPreferenceCreate(&preference));
-    check_cuda_error(cublasLtMatmulPreferenceInit(preference));
-    uint64_t workspace_size = CUBLAS_WORKSPACE_SIZE;
-    check_cuda_error(cublasLtMatmulPreferenceSetAttribute(
-        preference, CUBLASLT_MATMUL_PREF_MAX_WORKSPACE_BYTES, &workspace_size, sizeof(workspace_size)));
-#if (CUBLAS_VERSION) <= 12000
-    uint32_t pointer_mode_mask = 0;
-    check_cuda_error(cublasLtMatmulPreferenceSetAttribute(
-        preference, CUBLASLT_MATMUL_PREF_EPILOGUE_MASK, &pointer_mode_mask, sizeof(pointer_mode_mask)));
-#endif
-
-    int return_count = 0;
-    auto ret = cublasLtMatmulAlgoGetHeuristic(lightHandle,
-                                              computeDesc,
-                                              Adesc,
-                                              Bdesc,
-                                              Cdesc,
-                                              Ddesc,
-                                              preference,
-                                              heuristics.size(),
-                                              heuristics.data(),
-                                              &return_count);
-    heuristics.resize(return_count);
-
-    std::map<int, std::vector<float>> algo_results;
-    for (const auto& heuristic : heuristics) {
-        cublasLtMatmulAlgo_t algo = heuristic.algo;
-        int32_t algo_id;
-        cublasLtMatmulAlgoConfigGetAttribute(&algo, CUBLASLT_ALGO_CONFIG_ID, &algo_id, sizeof(algo_id), &returnSize);
-
-        cudaEvent_t start_event, stop_event;
-        cudaEventCreate(&start_event);
-        cudaEventCreate(&stop_event);
-
-        float my_alpha = 1.0f;
-        float my_beta = 0.0f;
-
-        for (int i = 0; i < 11; i++) {
-            float duration_ms;
-            cudaEventRecord(start_event, stream);
-            check_cuda_error(cublasLtMatmul(lightHandle,
-                                            computeDesc,
-                                            alpha,
-                                            A,
-                                            Adesc,
-                                            B,
-                                            Bdesc,
-                                            beta,
-                                            C,
-                                            Cdesc,
-                                            D,
-                                            Ddesc,
-                                            &algo,
-                                            cublas_workspace_,
-                                            CUBLAS_WORKSPACE_SIZE,
-                                            stream));
-            cudaEventRecord(stop_event, stream);
-            cudaEventSynchronize(stop_event);
-            cudaEventElapsedTime(&duration_ms, start_event, stop_event);
-
-            algo_results[algo_id].push_back(duration_ms);
-        }
-        std::sort(algo_results[algo_id].begin(), algo_results[algo_id].end());
-    }
-
-    cublasLtMatmulHeuristicResult_t result;
-    float best_time = INFINITY;
-    for (const auto& heuristic : heuristics) {
-        cublasLtMatmulAlgo_t algo = heuristic.algo;
-        int32_t algo_id;
-        cublasLtMatmulAlgoConfigGetAttribute(&algo, CUBLASLT_ALGO_CONFIG_ID, &algo_id, sizeof(algo_id), &returnSize);
-        const auto& results = algo_results[algo_id];
-
-        if (results.size() > 0 && results[5] < best_time) {
-            best_time = results[5];
-            result = heuristic;
-        }
-    }
-
-    return {best_time != INFINITY, result.algo};
-#endif
-}
-
-cublasMMWrapper::MatrixLayout cublasMMWrapper::createMatrixLayout(cublasLtMatrixLayout_t Mdesc)
-{
-    size_t       returnSize;
-    MatrixLayout m_layout;
-
-    cublasLtMatrixLayoutGetAttribute(
-        Mdesc, CUBLASLT_MATRIX_LAYOUT_TYPE, &std::get<0>(m_layout), sizeof(std::get<0>(m_layout)), &returnSize);
-    cublasLtMatrixLayoutGetAttribute(
-        Mdesc, CUBLASLT_MATRIX_LAYOUT_ORDER, &std::get<1>(m_layout), sizeof(std::get<1>(m_layout)), &returnSize);
-    cublasLtMatrixLayoutGetAttribute(
-        Mdesc, CUBLASLT_MATRIX_LAYOUT_ROWS, &std::get<2>(m_layout), sizeof(std::get<2>(m_layout)), &returnSize);
-    cublasLtMatrixLayoutGetAttribute(
-        Mdesc, CUBLASLT_MATRIX_LAYOUT_COLS, &std::get<3>(m_layout), sizeof(std::get<3>(m_layout)), &returnSize);
-
-    return m_layout;
-}
-
-cublasStatus_t cublasMMWrapper::cublasLtMatmulWrapper(cublasLtHandle_t            lightHandle,
-                                                      cublasLtMatmulDesc_t        computeDesc,
-                                                      const void*                 alpha,
-                                                      const void*                 A,
-                                                      cublasLtMatrixLayout_t      Adesc,
-                                                      const void*                 B,
-                                                      cublasLtMatrixLayout_t      Bdesc,
-                                                      const void*                 beta,
-                                                      const void*                 C,
-                                                      cublasLtMatrixLayout_t      Cdesc,
-                                                      void*                       D,
-                                                      cublasLtMatrixLayout_t      Ddesc,
-                                                      const cublasLtMatmulAlgo_t* algo,
-                                                      void*                       workspace,
-                                                      size_t                      workspaceSizeInBytes,
-                                                      cudaStream_t                stream)
-{
-    cache_idx_t cache_idx{
-        computeDesc,
-        {createMatrixLayout(Adesc), createMatrixLayout(Bdesc), createMatrixLayout(Cdesc), createMatrixLayout(Ddesc)}};
-
-    cublasLtMatmulAlgo_t algo_value;
-    bool                 found_algo = false;
-    if (algo == nullptr) {
-        if (algo_cache.find(cache_idx) == algo_cache.end()) {
-            auto result =
-                findBestAlgo(lightHandle, computeDesc, alpha, A, Adesc, B, Bdesc, beta, C, Cdesc, D, Ddesc, stream);
-            if (result.first) {
-                algo_cache[cache_idx] = result.second;
-                algo_value            = result.second;
-                found_algo            = true;
-            }
-        }
-        else {
-            algo_value = algo_cache[cache_idx];
-            found_algo = true;
-        }
-    }
-
-    return cublasLtMatmul(lightHandle,
-                          computeDesc,
-                          alpha,
-                          A,
-                          Adesc,
-                          B,
-                          Bdesc,
-                          beta,
-                          C,
-                          Cdesc,
-                          D,
-                          Ddesc,
-                          found_algo ? &algo_value : algo,
-                          workspace,
-                          workspaceSizeInBytes,
-                          stream);
-}
-
-void cublasMMWrapper::_Int8Gemm(const int     m,
-                                const int     n,
-                                const int     k,
-                                const int8_t* A,
-                                const int     lda,
-                                const int8_t* B,
-                                const int     ldb,
-                                void*         C,
-                                const int     ldc,
-                                const void*   alpha,
-                                const int     mode,
-                                const bool    per_column_scaling)
-{
-    /* mode:
-     *  - 0: int8 * int8 -> int32 -> int8
-     *  - 1: int8 * int8 -> int32 -> int32
-     */
-// #if (CUBLAS_VERSION) <= 11601
-#if 1
-    FT_CHECK_WITH_INFO(false, "CUBLAS version too low.");
-#else
-
-    mu_->lock();
-    const auto op_a = CUBLAS_OP_T;
-    const auto op_b = CUBLAS_OP_N;
-    const auto dataType = CUDA_R_8I;
-    const auto resultType = mode == 0 ? CUDA_R_8I : CUDA_R_32I;
-    const auto computeType = CUBLAS_COMPUTE_32I;
-    const auto scaleType = mode == 0 ? CUDA_R_32F : CUDA_R_32I;
-    const int batch_count = 1;
-    const void* beta;
-
-    int findAlgo = cublas_algo_map_->isExist(batch_count, m, n, k, getCublasDataType(dataType));
-
-    cublasLtMatmulAlgo_info info = cublas_algo_map_->getAlgo(batch_count, m, n, k, getCublasDataType(dataType));
-
-    cublasLtMatmulDesc_t operationDesc = NULL;
-    cublasLtMatrixLayout_t Adesc = NULL, Bdesc = NULL, Cdesc = NULL;
-
-    // --------------------------------------
-    // Create descriptors for the original matrices
-    check_cuda_error(cublasLtMatrixLayoutCreate(&Adesc, dataType, k, m, lda));
-    check_cuda_error(cublasLtMatrixLayoutCreate(&Bdesc, dataType, k, n, ldb));
-    check_cuda_error(cublasLtMatrixLayoutCreate(&Cdesc, resultType, m, n, ldc));
-
-    check_cuda_error(cublasLtMatmulDescCreate(&operationDesc, computeType, scaleType));
-
-    auto pointer_mode = CUBLASLT_POINTER_MODE_HOST;
-    if (mode == 0) {
-        pointer_mode =
-            per_column_scaling ? CUBLASLT_POINTER_MODE_ALPHA_DEVICE_VECTOR_BETA_HOST : CUBLASLT_POINTER_MODE_DEVICE;
-    }
-    check_cuda_error(
-        cublasLtMatmulDescSetAttribute(operationDesc, CUBLASLT_MATMUL_DESC_TRANSA, &op_a, sizeof(cublasOperation_t)));
-    check_cuda_error(
-        cublasLtMatmulDescSetAttribute(operationDesc, CUBLASLT_MATMUL_DESC_TRANSB, &op_b, sizeof(cublasOperation_t)));
-    check_cuda_error(
-        cublasLtMatmulDescSetAttribute(operationDesc, CUBLASLT_MATMUL_DESC_TRANSC, &op_b, sizeof(cublasOperation_t)));
-    check_cuda_error(cublasLtMatmulDescSetAttribute(
-        operationDesc, CUBLASLT_MATMUL_DESC_POINTER_MODE, &pointer_mode, sizeof(pointer_mode)));
-
-    const int32_t int_one = 1;
-    const int32_t int_zero = 0;
-    const float float_zero = 0;
-    if (mode == 0) {
-        beta = per_column_scaling ? &float_zero : NULL;
-    }
-    else {
-        alpha = &int_one;
-        beta = &int_zero;
-    }
-
-    cublasLtMatmulAlgo_t algo;
-    void* workSpace = cublas_workspace_;
-    int workspaceSize = cublas_workspace_ == NULL ? 0 : CUBLAS_WORKSPACE_SIZE;
-
-    sync_check_cuda_error();
-    auto ret = cublasLtMatmulWrapper(cublaslt_handle_,
-                                     operationDesc,
-                                     alpha,
-                                     A,
-                                     Adesc,
-                                     B,
-                                     Bdesc,
-                                     beta,
-                                     C,
-                                     Cdesc,
-                                     C,
-                                     Cdesc,
-                                     NULL,
-                                     workSpace,
-                                     workspaceSize,
-                                     stream_);
-    check_cuda_error(ret);
-    sync_check_cuda_error();
-
-    cublasLtMatmulDescDestroy(operationDesc);
-    cublasLtMatrixLayoutDestroy(Adesc);
-    cublasLtMatrixLayoutDestroy(Bdesc);
-    cublasLtMatrixLayoutDestroy(Cdesc);
-    sync_check_cuda_error();
-    mu_->unlock();
-#endif
-}
-
-void cublasMMWrapper::Int8Gemm(const int     m,
-                               const int     n,
-                               const int     k,
-                               const int8_t* A,
-                               const int     lda,
-                               const int8_t* B,
-                               const int     ldb,
-                               int8_t*       C,
-                               const int     ldc,
-                               const float*  alpha,
-                               const bool    per_column_scaling)
-{
-    return _Int8Gemm(m, n, k, A, lda, B, ldb, C, ldc, alpha, 0, per_column_scaling);
-}
-
-void cublasMMWrapper::Int8Gemm(const int     m,
-                               const int     n,
-                               const int     k,
-                               const int8_t* A,
-                               const int     lda,
-                               const int8_t* B,
-                               const int     ldb,
-                               int32_t*      C,
-                               const int     ldc)
-{
-    return _Int8Gemm(m, n, k, A, lda, B, ldb, C, ldc, (float*)nullptr, 1, false);
-}
-
-}  // namespace turbomind
diff --git a/src/turbomind/utils/cublasMMWrapper.h b/src/turbomind/utils/cublasMMWrapper.h
deleted file mode 100644
index f768ef3ea81d117a2f20b3711126acc824442e90..0000000000000000000000000000000000000000
--- a/src/turbomind/utils/cublasMMWrapper.h
+++ /dev/null
@@ -1,297 +0,0 @@
-/*
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "cuda_utils.h"
-#include "src/turbomind/utils/allocator.h"
-#include "src/turbomind/utils/cublasAlgoMap.h"
-#include <array>
-#include <cublasLt.h>
-#include <cublas_v2.h>
-#include <cuda_runtime.h>
-#include <map>
-#include <mutex>
-#include <string>
-
-#pragma once
-namespace turbomind {
-
-class cublasMMWrapper {
-protected:
-    cublasHandle_t   cublas_handle_;
-    cublasLtHandle_t cublaslt_handle_;
-#ifdef SPARSITY_ENABLED
-    cusparseLtHandle_t                               cusparselt_handle_;
-    std::map<std::string, cusparseLtMatDescriptor_t> sp_mat_A_desc_map_;
-    std::map<std::string, cusparseLtMatDescriptor_t> sp_mat_B_desc_map_;
-    std::map<std::string, cusparseLtMatDescriptor_t> sp_mat_C_desc_map_;
-#endif
-
-    cudaDataType_t Atype_;
-    cudaDataType_t Btype_;
-    cudaDataType_t Ctype_;
-    cudaDataType_t computeType_;
-
-    cudaStream_t   stream_;
-    cublasAlgoMap* cublas_algo_map_;
-    std::mutex*    mu_;
-
-    IAllocator* allocator_        = nullptr;
-    void*       cublas_workspace_ = nullptr;
-
-    friend class cublasINT8MMWrapper;
-
-    void _Int8Gemm(const int     m,
-                   const int     n,
-                   const int     k,
-                   const int8_t* A,
-                   const int     lda,
-                   const int8_t* B,
-                   const int     ldb,
-                   void*         C,
-                   const int     ldc,
-                   const void*   alpha,
-                   const int     mode,
-                   const bool    per_column_scaling);
-
-public:
-    cublasMMWrapper(cublasHandle_t   cublas_handle_,
-                    cublasLtHandle_t cublaslt_handle_,
-                    cudaStream_t     stream,
-                    cublasAlgoMap*   map,
-                    std::mutex*      mu,
-                    IAllocator*      allocator);
-
-#ifdef SPARSITY_ENABLED
-    cublasMMWrapper(cublasHandle_t     cublas_handle_,
-                    cublasLtHandle_t   cublaslt_handle_,
-                    cusparseLtHandle_t cusparselt_handle,
-                    cudaStream_t       stream,
-                    cublasAlgoMap*     map,
-                    std::mutex*        mu,
-                    IAllocator*        allocator);
-#endif
-
-    ~cublasMMWrapper();
-
-    cublasMMWrapper(const cublasMMWrapper& wrapper);
-
-    virtual void cublasVersionCheck()
-    {
-        return;
-    };
-    cublasStatus_t cublasLtMatmulWrapper(cublasLtHandle_t            lightHandle,
-                                         cublasLtMatmulDesc_t        computeDesc,
-                                         const void*                 alpha,
-                                         const void*                 A,
-                                         cublasLtMatrixLayout_t      Adesc,
-                                         const void*                 B,
-                                         cublasLtMatrixLayout_t      Bdesc,
-                                         const void*                 beta,
-                                         const void*                 C,
-                                         cublasLtMatrixLayout_t      Cdesc,
-                                         void*                       D,
-                                         cublasLtMatrixLayout_t      Ddesc,
-                                         const cublasLtMatmulAlgo_t* algo,
-                                         void*                       workspace,
-                                         size_t                      workspaceSizeInBytes,
-                                         cudaStream_t                stream);
-
-    std::pair<bool, cublasLtMatmulAlgo_t> findBestAlgo(cublasLtHandle_t       lightHandle,
-                                                       cublasLtMatmulDesc_t   computeDesc,
-                                                       const void*            alpha,
-                                                       const void*            A,
-                                                       cublasLtMatrixLayout_t Adesc,
-                                                       const void*            B,
-                                                       cublasLtMatrixLayout_t Bdesc,
-                                                       const void*            beta,
-                                                       const void*            C,
-                                                       cublasLtMatrixLayout_t Cdesc,
-                                                       void*                  D,
-                                                       cublasLtMatrixLayout_t Ddesc,
-                                                       cudaStream_t           stream);
-
-    using MatrixLayout = std::tuple<cudaDataType_t, cublasLtOrder_t, uint64_t, uint64_t>;
-    using cache_idx_t  = std::tuple<cublasLtMatmulDesc_t, std::array<MatrixLayout, 4>>;
-    std::map<cache_idx_t, cublasLtMatmulAlgo_t> algo_cache;
-
-    MatrixLayout createMatrixLayout(cublasLtMatrixLayout_t Mdesc);
-
-    void Gemm(cublasOperation_t transa,
-              cublasOperation_t transb,
-              const int         m,
-              const int         n,
-              const int         k,
-              const void*       alpha,
-              const void*       A,
-              cudaDataType_t    Atype,
-              int               lda,
-              const void*       B,
-              cudaDataType_t    Btype,
-              int               ldb,
-              const void*       beta,
-              void*             C,
-              cudaDataType_t    Ctype,
-              int               ldc,
-              cudaDataType_t    computeType,
-              cublasGemmAlgo_t  algo);
-
-    void Gemm(cublasOperation_t transa,
-              cublasOperation_t transb,
-              const int         m,
-              const int         n,
-              const int         k,
-              const void*       A,
-              const int         lda,
-              const void*       B,
-              const int         ldb,
-              void*             C,
-              const int         ldc);
-
-    void Gemm(cublasOperation_t transa,
-              cublasOperation_t transb,
-              const int         m,
-              const int         n,
-              const int         k,
-              const void*       A,
-              const int         lda,
-              const void*       B,
-              const int         ldb,
-              void*             C,
-              const int         ldc,
-              float             f_alpha,
-              float             f_beta);
-
-    void Int8Gemm(const int     m,
-                  const int     n,
-                  const int     k,
-                  const int8_t* A,
-                  const int     lda,
-                  const int8_t* B,
-                  const int     ldb,
-                  int8_t*       C,
-                  const int     ldc,
-                  const float*  alpha,
-                  const bool    per_column_scaling = false);
-
-    void Int8Gemm(const int     m,
-                  const int     n,
-                  const int     k,
-                  const int8_t* A,
-                  const int     lda,
-                  const int8_t* B,
-                  const int     ldb,
-                  int32_t*      C,
-                  const int     ldc);
-
-    void setFP32GemmConfig();
-    void setFP16GemmConfig();
-#ifdef ENABLE_BF16
-    void setBF16GemmConfig();
-#endif
-    void setStream(cudaStream_t stream);
-
-    void setGemmConfig(cudaDataType_t aType, cudaDataType_t bType, cudaDataType_t cType, cudaDataType_t computeType);
-
-    CublasDataType getCublasDataType(cudaDataType_t data_type);
-
-#if (CUDART_VERSION >= 11000)
-    void Gemm(cublasOperation_t transa,
-              cublasOperation_t transb,
-              const int         m,
-              const int         n,
-              const int         k,
-              const void*       A,
-              const int         lda,
-              const void*       B,
-              const int         ldb,
-              const void*       bias,
-              void*             C,
-              const int         ldc);
-#endif
-
-    void stridedBatchedGemm(cublasOperation_t transa,
-                            cublasOperation_t transb,
-                            const int         m,
-                            const int         n,
-                            const int         k,
-                            const void*       A,
-                            const int         lda,
-                            const int64_t     strideA,
-                            const void*       B,
-                            const int         ldb,
-                            const int64_t     strideB,
-                            void*             C,
-                            const int         ldc,
-                            const int64_t     strideC,
-                            const int         batchCount,
-                            const float       f_alpha = 1.0f,
-                            const float       f_beta  = 0.0f);
-
-    void stridedBatchedGemm(cublasOperation_t transa,
-                            cublasOperation_t transb,
-                            const int         m,
-                            const int         n,
-                            const int         k,
-                            const float       f_alpha,
-                            const void*       A,
-                            cudaDataType_t    AType,
-                            const int         lda,
-                            const int64_t     strideA,
-                            const void*       B,
-                            cudaDataType_t    BType,
-                            const int         ldb,
-                            const int64_t     strideB,
-                            const float       f_beta,
-                            void*             C,
-                            cudaDataType_t    CType,
-                            const int         ldc,
-                            const int64_t     strideC,
-                            const int         batch_count,
-                            cudaDataType_t    computeType);
-
-    void batchedGemm(cublasOperation_t  transa,
-                     cublasOperation_t  transb,
-                     const int          m,
-                     const int          n,
-                     const int          k,
-                     const void* const* A,
-                     const int          lda,
-                     const void* const* B,
-                     const int          ldb,
-                     void* const*       C,
-                     const int          ldc,
-                     const int          batch_count);
-
-    bool isFuseBatchGemm(const int batch_count, const int m, const int k, const int n);
-
-#ifdef SPARSITY_ENABLED
-    void SpGemm(cublasOperation_t transa,
-                cublasOperation_t transb,
-                const int         m,
-                const int         n,
-                const int         k,
-                const void*       A,
-                const void*       B,
-                void*             C);
-
-    size_t getSparseMatrixSize(int m, int k);
-    void   compressMatrix(const void* input, void* output, const int m, const int k);
-
-    bool isUseSparse(const int batch_count, const int m, const int n, const int k);
-#endif
-};
-
-}  // namespace turbomind
diff --git a/src/turbomind/utils/cuda_bf16_fallbacks.cuh b/src/turbomind/utils/cuda_bf16_fallbacks.cuh
deleted file mode 100644
index c115fa4632ea3fca12c70d5e202e7dc72a7ad881..0000000000000000000000000000000000000000
--- a/src/turbomind/utils/cuda_bf16_fallbacks.cuh
+++ /dev/null
@@ -1,290 +0,0 @@
-/*
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "src/turbomind/utils/cuda_bf16_wrapper.h"
-#include <cuda_fp16.h>
-
-namespace turbomind {
-
-#ifdef ENABLE_BF16
-inline __device__ float2 bf1622float2(const __nv_bfloat162 val)
-{
-#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ < 800
-    float2 f_val;
-    f_val.x = __low2float(val);
-    f_val.y = __high2float(val);
-    return f_val;
-#else
-    return __bfloat1622float2(val);
-#endif
-}
-
-inline __device__ int16_t bf1622int16(__nv_bfloat162 val)
-{
-#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ < 800
-    float2 f_val;
-    f_val.x = max(min(__low2float(val), 127.f), -128.f);
-    f_val.y = max(min(__high2float(val), 127.f), -128.f);
-    union {
-        int8_t  int8[2];
-        int16_t int16;
-    };
-    int8[0] = static_cast<int8_t>(static_cast<short>(f_val.x));
-    int8[1] = static_cast<int8_t>(static_cast<short>(f_val.y));
-    return int16;
-#else
-    val = __hmin2(val, make_bfloat162(127., 127.));
-    val = __hmax2(val, make_bfloat162(-128., -128.));
-    union {
-        int8_t  int8[2];
-        int16_t int16;
-    };
-    int8[0] = static_cast<int8_t>(static_cast<short>(val.x));
-    int8[1] = static_cast<int8_t>(static_cast<short>(val.y));
-    return int16;
-#endif
-}
-
-inline __device__ __nv_bfloat162 float22bf162(const float2 val)
-{
-#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ < 800
-    return __floats2bfloat162_rn(val.x, val.y);
-#else
-    return __float22bfloat162_rn(val);
-#endif
-}
-
-inline __device__ __nv_bfloat162 bf162bf162(const __nv_bfloat16 val)
-{
-#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ < 800
-    __nv_bfloat162 val2;
-    val2.x = val;
-    val2.y = val;
-    return val2;
-#else
-    return __bfloat162bfloat162(val);
-#endif
-}
-
-inline __device__ __nv_bfloat162 bf16hadd2(const __nv_bfloat162 x, const __nv_bfloat162 y)
-{
-#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ < 800
-    float fxl, fxh, fyl, fyh;
-    fxl = __low2float(x);
-    fxh = __high2float(x);
-    fyl = __low2float(y);
-    fyh = __high2float(y);
-    return __floats2bfloat162_rn(fxl + fyl, fxh + fyh);
-#else
-    return __hadd2(x, y);
-#endif
-}
-
-inline __device__ __nv_bfloat16 bf16hadd(const __nv_bfloat16 x, const __nv_bfloat16 y)
-{
-#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ < 800
-    return __float2bfloat16(__bfloat162float(x) + __bfloat162float(y));
-#else
-    return __hadd(x, y);
-#endif
-}
-
-inline __device__ __nv_bfloat162 bf16hsub2(const __nv_bfloat162 x, const __nv_bfloat162 y)
-{
-#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ < 800
-    float fxl, fxh, fyl, fyh;
-    fxl = __low2float(x);
-    fxh = __high2float(x);
-    fyl = __low2float(y);
-    fyh = __high2float(y);
-    return __floats2bfloat162_rn(fxl - fyl, fxh - fyh);
-#else
-    return __hsub2(x, y);
-#endif
-}
-
-inline __device__ __nv_bfloat16 bf16hsub(const __nv_bfloat16 x, const __nv_bfloat16 y)
-{
-#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ < 800
-    return __float2bfloat16(__bfloat162float(x) - __bfloat162float(y));
-#else
-    return __hsub(x, y);
-#endif
-}
-
-inline __device__ __nv_bfloat162 bf16hmul2(const __nv_bfloat162 x, const __nv_bfloat162 y)
-{
-#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ < 800
-    float fxl, fxh, fyl, fyh;
-    fxl = __low2float(x);
-    fxh = __high2float(x);
-    fyl = __low2float(y);
-    fyh = __high2float(y);
-    return __floats2bfloat162_rn(fxl * fyl, fxh * fyh);
-#else
-    return __hmul2(x, y);
-#endif
-}
-
-inline __device__ __nv_bfloat16 bf16hmul(const __nv_bfloat16 x, const __nv_bfloat16 y)
-{
-#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ < 800
-    return __float2bfloat16(__bfloat162float(x) * __bfloat162float(y));
-#else
-    return __hmul(x, y);
-#endif
-}
-
-inline __device__ __nv_bfloat162 bf16hfma2(const __nv_bfloat162 x, const __nv_bfloat162 y, const __nv_bfloat162 z)
-{
-#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ < 800
-    float fxl, fxh, fyl, fyh, fzl, fzh;
-    fxl = __low2float(x);
-    fxh = __high2float(x);
-    fyl = __low2float(y);
-    fyh = __high2float(y);
-    fzl = __low2float(z);
-    fzh = __high2float(z);
-    return __floats2bfloat162_rn(fxl * fyl + fzl, fxh * fyh + fzh);
-#else
-    return __hfma2(x, y, z);
-#endif
-}
-
-inline __device__ __nv_bfloat16 bf16hfma(const __nv_bfloat16 x, const __nv_bfloat16 y, const __nv_bfloat16 z)
-{
-#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ < 800
-    return __float2bfloat16(__bfloat162float(x) * __bfloat162float(y) + __bfloat162float(z));
-#else
-    return __hfma(x, y, z);
-#endif
-}
-
-inline __device__ __nv_bfloat162 bf16exp2(const __nv_bfloat162 x)
-{
-#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ < 800
-    float fxl, fxh;
-    fxl = __low2float(x);
-    fxh = __high2float(x);
-    ;
-    return __floats2bfloat162_rn(expf(fxl), expf(fxh));
-#else
-    return h2exp(x);
-#endif
-}
-
-#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ < 800)
-inline __device__ __nv_bfloat162 operator*(const __nv_bfloat162 x, const __nv_bfloat162 y)
-{
-    return bf16hmul2(x, y);
-};
-inline __device__ __nv_bfloat162 operator+(const __nv_bfloat162 x, const __nv_bfloat162 y)
-{
-    return bf16hadd2(x, y);
-};
-
-inline __device__ __nv_bfloat162 make_bfloat162(const __nv_bfloat16 x, const __nv_bfloat16 y)
-{
-    __nv_bfloat162 t;
-    t.x = x;
-    t.y = y;
-    return t;
-}
-
-#endif
-
-inline __device__ __nv_bfloat16 bf16hadd(__nv_bfloat16 a, __nv_bfloat16 b, __nv_bfloat16 c)
-{
-#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ < 800
-    return __float2bfloat16(__bfloat162float(a) + __bfloat162float(b) + __bfloat162float(c));
-#else
-    return a + b + c;
-#endif
-}
-
-inline __device__ __nv_bfloat16 bf16hadd(__nv_bfloat16 a, __nv_bfloat16 b, __nv_bfloat16 c, __nv_bfloat16 d)
-{
-#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ < 800
-    return __float2bfloat16(__bfloat162float(a) + __bfloat162float(b) + __bfloat162float(c) + __bfloat162float(d));
-#else
-    return (__nv_bfloat16)((float)a + (float)b + (float)c + (float)d);
-#endif
-}
-
-inline __device__ __nv_bfloat162 bf16hadd2(__nv_bfloat162 a, __nv_bfloat162 b, __nv_bfloat162 c)
-{
-#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ < 800
-    float fal, fah, fbl, fbh, fcl, fch;
-    fal = __low2float(a);
-    fah = __high2float(a);
-    fbl = __low2float(b);
-    fbh = __high2float(b);
-    fcl = __low2float(c);
-    fch = __high2float(c);
-    return __floats2bfloat162_rn(fal + fbl + fcl, fah + fbh + fch);
-#else
-    return a + b + c;
-#endif
-}
-
-inline __device__ __nv_bfloat16 bf16hmul(__nv_bfloat16 a, __nv_bfloat16 b, __nv_bfloat16 c)
-{
-#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ < 800
-    return __float2bfloat16(__bfloat162float(a) * __bfloat162float(b) * __bfloat162float(c));
-#else
-    return a * b * c;
-#endif
-}
-
-inline __device__ __nv_bfloat162 bf16hmul2(__nv_bfloat162 a, __nv_bfloat162 b, __nv_bfloat162 c)
-{
-#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ < 800
-    float fal, fah, fbl, fbh, fcl, fch;
-    fal = __low2float(a);
-    fah = __high2float(a);
-    fbl = __low2float(b);
-    fbh = __high2float(b);
-    fcl = __low2float(c);
-    fch = __high2float(c);
-    return __floats2bfloat162_rn(fal * fbl * fcl, fah * fbh * fch);
-#else
-    return a * b * c;
-#endif
-}
-
-inline __device__ __nv_bfloat162 bf16hfma2(__nv_bfloat162 a, __nv_bfloat162 b, __nv_bfloat162 c, __nv_bfloat162 d)
-{
-#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ < 800
-    float fal, fah, fbl, fbh, fcl, fch, fdl, fdh;
-    fal = __low2float(a);
-    fah = __high2float(a);
-    fbl = __low2float(b);
-    fbh = __high2float(b);
-    fcl = __low2float(c);
-    fch = __high2float(c);
-    fdl = __low2float(d);
-    fdh = __high2float(d);
-    return __floats2bfloat162_rn(fal * fbl * fcl + fdl, fah * fbh * fch + fdh);
-#else
-    return a * b * c + d;
-#endif
-}
-
-#endif  // ENABLE_BF16
-
-}  // namespace turbomind
diff --git a/src/turbomind/utils/cuda_bf16_wrapper.h b/src/turbomind/utils/cuda_bf16_wrapper.h
deleted file mode 100644
index fb2a89af5cd609bc1231ec28bf9f567d6c1e4b7a..0000000000000000000000000000000000000000
--- a/src/turbomind/utils/cuda_bf16_wrapper.h
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#ifdef ENABLE_BF16
-#include <cuda_bf16.h>
-#endif
diff --git a/src/turbomind/utils/cuda_fp8_utils.cu b/src/turbomind/utils/cuda_fp8_utils.cu
deleted file mode 100644
index 5651dab2e796d1b5ceb66a77ef7e3a40f80cfaf8..0000000000000000000000000000000000000000
--- a/src/turbomind/utils/cuda_fp8_utils.cu
+++ /dev/null
@@ -1,124 +0,0 @@
-/*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "cuda_fp8_utils.h"
-
-namespace turbomind {
-#ifdef ENABLE_FP8
-
-template<typename T_OUT, typename T_IN, QUANTIZE_MODE quantize_mode>
-__global__ void quantizeMatrix(T_OUT* output, float const* input_scale, T_IN const* input, uint32_t size, uint32_t n)
-{
-    for (uint32_t i = threadIdx.x + blockIdx.x * blockDim.x; i < size; i += blockDim.x * gridDim.x) {
-        if (quantize_mode == QUANTIZE_MODE::PER_CHANNEL) {
-            output[i] = T_OUT((float)(input[i]) * __ldg(input_scale + (i % n)));
-        }
-        else {
-            output[i] = T_OUT((float)(input[i]) * __ldg(input_scale));
-        }
-    }
-}
-
-template<typename T_OUT, typename T_IN, QUANTIZE_MODE quantize_mode>
-void invokeQuantizeMatrix(
-    T_OUT* output, float const* input_scale, T_IN const* input, uint32_t size, uint32_t n, cudaStream_t stream)
-{
-    dim3 grid(32);
-    dim3 block(256);
-    quantizeMatrix<T_OUT, T_IN, quantize_mode><<<grid, block, 0, stream>>>(output, input_scale, input, size, n);
-}
-
-#define defineinvokeQuantizeMatrix(type_out, type_in, mode)                                                            \
-    template void invokeQuantizeMatrix<type_out, type_in, mode>(type_out * output,                                     \
-                                                                float const*   input_scale,                            \
-                                                                type_in const* input,                                  \
-                                                                uint32_t       size,                                   \
-                                                                uint32_t       n,                                      \
-                                                                cudaStream_t   stream);
-
-defineinvokeQuantizeMatrix(__nv_fp8_e4m3, float, QUANTIZE_MODE::PER_CHANNEL);
-defineinvokeQuantizeMatrix(__nv_fp8_e4m3, float, QUANTIZE_MODE::PER_TENSOR);
-defineinvokeQuantizeMatrix(__nv_fp8_e4m3, half, QUANTIZE_MODE::PER_CHANNEL);
-defineinvokeQuantizeMatrix(__nv_fp8_e4m3, half, QUANTIZE_MODE::PER_TENSOR);
-defineinvokeQuantizeMatrix(half, __nv_fp8_e4m3, QUANTIZE_MODE::PER_CHANNEL);
-defineinvokeQuantizeMatrix(half, __nv_fp8_e4m3, QUANTIZE_MODE::PER_TENSOR);
-defineinvokeQuantizeMatrix(float, __nv_fp8_e4m3, QUANTIZE_MODE::PER_CHANNEL);
-defineinvokeQuantizeMatrix(float, __nv_fp8_e4m3, QUANTIZE_MODE::PER_TENSOR);
-#ifdef ENABLE_BF16
-defineinvokeQuantizeMatrix(__nv_fp8_e4m3, __nv_bfloat16, QUANTIZE_MODE::PER_CHANNEL);
-defineinvokeQuantizeMatrix(__nv_fp8_e4m3, __nv_bfloat16, QUANTIZE_MODE::PER_TENSOR);
-defineinvokeQuantizeMatrix(__nv_bfloat16, __nv_fp8_e4m3, QUANTIZE_MODE::PER_CHANNEL);
-defineinvokeQuantizeMatrix(__nv_bfloat16, __nv_fp8_e4m3, QUANTIZE_MODE::PER_TENSOR);
-#endif
-
-template<typename T_OUT, typename T_IN, typename T_FAKE>
-__global__ void fakeQuantize(T_OUT* dst, const T_IN* src, const int size)
-{
-    for (int tid = threadIdx.x + blockIdx.x * blockDim.x; tid < size; tid += blockDim.x * gridDim.x) {
-        T_FAKE tmp = (T_FAKE)((float)src[tid]);
-        dst[tid]   = (T_OUT)((float)tmp);
-    }
-}
-
-template<typename T_OUT, typename T_IN, typename T_FAKE>
-void invokeFakeQuantize(T_OUT* dst, const T_IN* src, const int size, cudaStream_t stream)
-{
-    fakeQuantize<T_OUT, T_IN, T_FAKE><<<256, 256, 0, stream>>>(dst, src, size);
-}
-
-template void
-invokeFakeQuantize<float, float, __nv_fp8_e4m3>(float* dst, const float* src, const int size, cudaStream_t stream);
-template void
-invokeFakeQuantize<half, half, __nv_fp8_e4m3>(half* dst, const half* src, const int size, cudaStream_t stream);
-template void invokeFakeQuantize<__nv_bfloat16, __nv_bfloat16, __nv_fp8_e4m3>(__nv_bfloat16*       dst,
-                                                                              const __nv_bfloat16* src,
-                                                                              const int            size,
-                                                                              cudaStream_t         stream);
-
-template<typename T_W>
-__global__ void computeFP8QuantizeScale(float* quant_ptr, const T_W* weights, const int k, const int n)
-{
-    float max = -10000.f;
-    for (int i = 0; i < k; i++) {
-        float val = fabs((float)weights[i * n + blockIdx.x * blockDim.x + threadIdx.x]);
-        max       = max > val ? max : val;
-        if (threadIdx.x == 0 && blockIdx.x == 0 && i % 100 == 0) {
-            printf("max: %f, val: %f \n", max, val);
-        }
-    }
-    // quant_ptr[blockIdx.x * blockDim.x + threadIdx.x] = 1.0f;
-    // quant_ptr[blockIdx.x * blockDim.x + threadIdx.x] = FP8_E4M3_MAX / max;
-    quant_ptr[blockIdx.x * blockDim.x + threadIdx.x] = std::max(max / FP8_E4M3_MAX, 1.0f / 32.f);
-}
-
-template<typename T_W>
-void invokeComputeFP8QuantizeScale(float* quant_ptr, const T_W* weights, const int k, const int n, cudaStream_t stream)
-{
-    dim3 block(256);
-    dim3 grid;
-    grid.x = (n + 255) / 256;
-    computeFP8QuantizeScale<T_W><<<grid, block, 0, stream>>>(quant_ptr, weights, k, n);
-}
-
-#ifdef ENABLE_BF16
-template void invokeComputeFP8QuantizeScale(
-    float* quant_ptr, const __nv_bfloat16* weights, const int k, const int n, cudaStream_t stream);
-#endif
-template void
-invokeComputeFP8QuantizeScale(float* quant_ptr, const float* weights, const int k, const int n, cudaStream_t stream);
-
-#endif  // ENABLE_FP8
-}  // namespace turbomind
diff --git a/src/turbomind/utils/cuda_fp8_utils.h b/src/turbomind/utils/cuda_fp8_utils.h
deleted file mode 100644
index ba7f91c8bf6a43a165d97cc4e9ee63265649621e..0000000000000000000000000000000000000000
--- a/src/turbomind/utils/cuda_fp8_utils.h
+++ /dev/null
@@ -1,195 +0,0 @@
-/*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#ifdef ENABLE_FP8
-#include <cuda_fp8.h>
-#include <cuda_runtime.h>
-#include <stdint.h>
-
-// #define FP8_MHA
-#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ == 900
-#define FUSE_GEMM_ACT
-#endif
-#define FP8_GEMM_OUTPUT_QUANT_DISABLE
-
-#ifdef FUSE_GEMM_ACT
-#define USE_QGMMA
-#endif
-
-namespace turbomind {
-
-const float FP8_E4M3_MAX = 480.0f;
-
-enum QUANTIZE_MODE
-{
-    PER_CHANNEL,
-    PER_TENSOR,
-    PER_CHANNEL_WEIGHT_PER_TENSOR_ACT
-};
-
-// Packed Data Type
-typedef struct __CUDA_ALIGN__(32) {
-    float array[8];
-} float8;
-
-typedef struct __CUDA_ALIGN__(16) {
-    half array[8];
-} half8;
-
-#ifdef ENABLE_BF16
-typedef struct __CUDA_ALIGN__(4) {
-    __nv_bfloat16 array[2];
-} __nv_bfloat16_2;
-
-typedef struct __CUDA_ALIGN__(8) {
-    __nv_bfloat162 x, y;
-} __nv_bfloat162_2_xy;
-
-typedef struct __CUDA_ALIGN__(8) {
-    __nv_bfloat16 array[4];
-} __nv_bfloat164;
-
-typedef struct __CUDA_ALIGN__(8) {
-    __nv_bfloat162 array[2];
-} __nv_bfloat162_2;
-
-typedef struct __CUDA_ALIGN__(16) {
-    __nv_bfloat16 array[8];
-} __nv_bfloat168;
-
-typedef struct __CUDA_ALIGN__(16) {
-    __nv_bfloat162 array[4];
-} __nv_bfloat162_4;
-
-typedef struct __CUDA_ALIGN__(32) {
-    __nv_bfloat16 array[16];
-} __nv_bfloat1616;
-#endif
-
-#ifdef ENABLE_FP8
-typedef struct __CUDA_ALIGN__(2) {
-    __nv_fp8_e4m3 array[2];
-} __nv_fp8_2_e4m3;
-
-typedef struct __CUDA_ALIGN__(4) {
-    __nv_fp8_e4m3 array[4];
-} __nv_fp8_4_e4m3;
-
-typedef struct __CUDA_ALIGN__(4) {
-    __nv_fp8x2_e4m3 array[2];
-} __nv_fp8x2_x2_e4m3;
-
-typedef struct __CUDA_ALIGN__(8) {
-    __nv_fp8_e4m3 array[8];
-} __nv_fp8_8_e4m3;
-
-typedef struct __CUDA_ALIGN__(8) {
-    __nv_fp8x2_e4m3 array[4];
-} __nv_fp8x2_x4_e4m3;
-
-typedef struct __CUDA_ALIGN__(16) {
-    __nv_fp8_e4m3 array[16];
-} __nv_fp8x16_e4m3;
-#endif
-
-// only BF16 and FP8
-template<typename T, int PACK_SIZE>
-struct PackType {
-    using type = float;
-};
-
-#ifdef ENABLE_BF16
-template<>
-struct PackType<__nv_bfloat16, 2> {
-    using type = __nv_bfloat16_2;
-};
-
-template<>
-struct PackType<__nv_bfloat16, 4> {
-    using type = __nv_bfloat164;
-};
-
-template<>
-struct PackType<__nv_bfloat16, 8> {
-    using type = __nv_bfloat168;
-};
-#endif
-
-#ifdef ENABLE_FP8
-template<>
-struct PackType<__nv_fp8_e4m3, 2> {
-    using type = __nv_fp8_2_e4m3;
-};
-
-template<>
-struct PackType<__nv_fp8_e4m3, 4> {
-    using type = __nv_fp8_4_e4m3;
-};
-
-template<>
-struct PackType<__nv_fp8_e4m3, 8> {
-    using type = __nv_fp8_8_e4m3;
-};
-#endif
-
-__inline__ __device__ void fp8x4_e4m3_to_bfloat2(__nv_bfloat162* out1, __nv_bfloat162* out2, const __nv_fp8x4_e4m3* in)
-{
-    const char4 tmp_val = reinterpret_cast<const char4*>(in)[0];
-    *out1               = __nv_bfloat162((float)reinterpret_cast<const __nv_fp8_e4m3*>(&tmp_val.x)[0],
-                           (float)reinterpret_cast<const __nv_fp8_e4m3*>(&tmp_val.y)[0]);
-    *out2               = __nv_bfloat162((float)reinterpret_cast<const __nv_fp8_e4m3*>(&tmp_val.z)[0],
-                           (float)reinterpret_cast<const __nv_fp8_e4m3*>(&tmp_val.w)[0]);
-}
-
-__inline__ __device__ __nv_bfloat162 fp8x2_e4m3_to_bfloat2(const __nv_fp8x2_e4m3* in)
-{
-    const char2    tmp_val = reinterpret_cast<const char2*>(in)[0];
-    __nv_bfloat162 out     = __nv_bfloat162((float)reinterpret_cast<const __nv_fp8_e4m3*>(&tmp_val.x)[0],
-                                        (float)reinterpret_cast<const __nv_fp8_e4m3*>(&tmp_val.y)[0]);
-    return out;
-}
-
-__inline__ __device__ void fp8x4_e4m3_to_half2(half2* out1, half2* out2, const __nv_fp8x4_e4m3* in)
-{
-    const char4 tmp_val = reinterpret_cast<const char4*>(in)[0];
-    *out1               = half2((float)reinterpret_cast<const __nv_fp8_e4m3*>(&tmp_val.x)[0],
-                  (float)reinterpret_cast<const __nv_fp8_e4m3*>(&tmp_val.y)[0]);
-    *out2               = half2((float)reinterpret_cast<const __nv_fp8_e4m3*>(&tmp_val.z)[0],
-                  (float)reinterpret_cast<const __nv_fp8_e4m3*>(&tmp_val.w)[0]);
-}
-
-__inline__ __device__ half2 fp8x2_e4m3_to_half2(const __nv_fp8x2_e4m3* in)
-{
-    const char2 tmp_val = reinterpret_cast<const char2*>(in)[0];
-    half2       out     = half2((float)reinterpret_cast<const __nv_fp8_e4m3*>(&tmp_val.x)[0],
-                      (float)reinterpret_cast<const __nv_fp8_e4m3*>(&tmp_val.y)[0]);
-    return out;
-}
-
-template<typename T_OUT, typename T_IN, QUANTIZE_MODE quantize_mode>
-void invokeQuantizeMatrix(
-    T_OUT* output, float const* input_qua_amax_ptr, T_IN const* input, uint32_t size, uint32_t n, cudaStream_t stream);
-
-template<typename T_OUT, typename T_IN, typename T_FAKE>
-void invokeFakeQuantize(T_OUT* dst, const T_IN* src, const int size, cudaStream_t stream);
-
-template<typename T_W>
-void invokeComputeFP8QuantizeScale(float* quant_ptr, const T_W* weights, const int k, const int n, cudaStream_t stream);
-
-}  // namespace turbomind
-#endif  // ENABLE_FP8
diff --git a/src/turbomind/utils/cuda_type_utils.cuh b/src/turbomind/utils/cuda_type_utils.cuh
deleted file mode 100644
index 653ac35b75e90a9943705b5b7ce924725fbb99c1..0000000000000000000000000000000000000000
--- a/src/turbomind/utils/cuda_type_utils.cuh
+++ /dev/null
@@ -1,614 +0,0 @@
-/*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "src/turbomind/utils/cuda_bf16_fallbacks.cuh"
-#include "src/turbomind/utils/cuda_bf16_wrapper.h"
-#include "src/turbomind/utils/cuda_fp8_utils.h"
-#include <cuda.h>
-#include <cuda_fp16.h>
-
-namespace turbomind {
-
-template<typename T>
-inline __device__ T ldg(const T* val)
-{
-    return __ldg(val);
-}
-
-#if ENABLE_BF16
-template<>
-inline __device__ __nv_bfloat162 ldg(const __nv_bfloat162* val)
-{
-#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ < 800
-    return val[0];
-#else
-    return __ldg(val);
-#endif
-}
-
-template<>
-inline __device__ __nv_bfloat16 ldg(const __nv_bfloat16* val)
-{
-#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ < 800
-    return val[0];
-#else
-    return __ldg(val);
-#endif
-}
-#endif  // ENABLE_BF16
-
-// Get type2 from type or vice versa (applied to half and bfloat16)
-template<typename T>
-struct TypeConverter {
-    using Type = half2;
-};  // keep for generality
-
-template<>
-struct TypeConverter<half2> {
-    using Type = half;
-};
-
-template<>
-struct TypeConverter<half> {
-    using Type = half2;
-};
-
-#if ENABLE_BF16
-template<>
-struct TypeConverter<__nv_bfloat162> {
-    using Type = __nv_bfloat16;
-};
-
-template<>
-struct TypeConverter<__nv_bfloat16> {
-    using Type = __nv_bfloat162;
-};
-#endif  // ENABLE_BF16
-
-// Defined math operations (bfloat16 fallback to fp32 when it is not supported)
-template<typename T>
-inline __device__ T hadd2(T a, T b)
-{
-    return __hadd2(a, b);
-}
-
-#if ENABLE_BF16
-template<>
-inline __device__ __nv_bfloat162 hadd2(__nv_bfloat162 a, __nv_bfloat162 b)
-{
-    return bf16hadd2(a, b);
-}
-#endif  // ENABLE_BF16
-
-template<typename T>
-inline __device__ T add(T a, T b)
-{
-    return a + b;
-}
-
-template<>
-inline __device__ half2 add(half2 a, half2 b)
-{
-    return __hadd2(a, b);
-}
-
-template<>
-inline __device__ half add(half a, half b)
-{
-    return __hadd(a, b);
-}
-
-#if ENABLE_BF16
-template<>
-inline __device__ __nv_bfloat162 add(__nv_bfloat162 a, __nv_bfloat162 b)
-{
-    return bf16hadd2(a, b);
-}
-
-template<>
-inline __device__ __nv_bfloat16 add(__nv_bfloat16 a, __nv_bfloat16 b)
-{
-    return bf16hadd(a, b);
-}
-
-inline __device__ __nv_bfloat16 add(__nv_bfloat16 a, float b)
-{
-    return bf16hadd(a, __float2bfloat16(b));
-}
-#endif  // ENABLE_BF16
-
-// applies to all 4 values addition
-template<typename T>
-inline __device__ T add(T a, T b, T c)
-{
-    return a + b + c;
-}
-
-#if ENABLE_BF16
-inline __device__ __nv_bfloat16 add(__nv_bfloat16 a, __nv_bfloat16 b, __nv_bfloat16 c)
-{
-    return bf16hadd(a, b, c);
-}
-
-inline __device__ __nv_bfloat162 add(__nv_bfloat162 a, __nv_bfloat162 b, __nv_bfloat162 c)
-{
-    return bf16hadd2(a, b, c);
-}
-#endif  // ENABLE_BF16
-
-// applies to all 4 values addition
-template<typename T>
-inline __device__ T add(T a, T b, T c, T d)
-{
-    return (T)((float)a + (float)b + (float)c + (float)d);
-}
-
-#if ENABLE_BF16
-inline __device__ __nv_bfloat16 add(__nv_bfloat16 a, __nv_bfloat16 b, __nv_bfloat16 c, __nv_bfloat16 d)
-{
-    return bf16hadd(a, b, c, d);
-}
-#endif  // ENABLE_BF16
-
-template<typename T>
-inline __device__ T hsub2(T a, T b)
-{
-    return __hsub2(a, b);
-}
-
-#if ENABLE_BF16
-template<>
-inline __device__ __nv_bfloat162 hsub2(__nv_bfloat162 a, __nv_bfloat162 b)
-{
-    return bf16hsub2(a, b);
-}
-#endif  // ENABLE_BF16
-
-template<typename T>
-inline __device__ T hmul2(T a, T b)
-{
-    return __hmul2(a, b);
-}
-
-#if ENABLE_BF16
-template<>
-inline __device__ __nv_bfloat162 hmul2(__nv_bfloat162 a, __nv_bfloat162 b)
-{
-    return bf16hmul2(a, b);
-}
-#endif  // ENABLE_BF16
-
-template<typename T>
-inline __device__ T hmul2(T a, T b, T c)
-{
-    return a * b * c;
-}
-
-#if ENABLE_BF16
-template<>
-inline __device__ __nv_bfloat162 hmul2(__nv_bfloat162 a, __nv_bfloat162 b, __nv_bfloat162 c)
-{
-    return bf16hmul2(a, b, c);
-}
-#endif  // ENABLE_BF16
-
-template<typename T>
-inline __device__ T mul(T a, T b, T c)
-{
-    return a * b * c;
-}
-
-#if ENABLE_BF16
-template<>
-inline __device__ __nv_bfloat16 mul(__nv_bfloat16 a, __nv_bfloat16 b, __nv_bfloat16 c)
-{
-    return bf16hmul(a, b, c);
-}
-
-inline __device__ __nv_bfloat162 mul(__nv_bfloat162 a, __nv_bfloat162 b, __nv_bfloat162 c)
-{
-    return bf16hmul2(a, b, c);
-}
-#endif  // ENABLE_BF16
-
-template<typename T>
-inline __device__ T fma(T a, T b, T c, T d)
-{
-    return a * b * c + d;
-}
-
-#if ENABLE_BF16
-inline __device__ __nv_bfloat162 fma(__nv_bfloat162 a, __nv_bfloat162 b, __nv_bfloat162 c, __nv_bfloat162 d)
-{
-    return bf16hfma2(a, b, c, d);
-}
-#endif  // ENABLE_BF16
-
-template<typename T>
-inline __device__ T fma(T a, T b, T c)
-{
-    return a * b + c;
-}
-
-#if ENABLE_BF16
-template<>
-inline __device__ __nv_bfloat162 fma(__nv_bfloat162 a, __nv_bfloat162 b, __nv_bfloat162 c)
-{
-    return bf16hfma2(a, b, c);
-}
-
-template<>
-inline __device__ __nv_bfloat16 fma(__nv_bfloat16 a, __nv_bfloat16 b, __nv_bfloat16 c)
-{
-    return bf16hfma(a, b, c);
-}
-#endif  // ENABLE_BF16
-
-template<typename T>
-inline __device__ T hexp2(T a)
-{
-    return h2exp(a);
-}
-
-#if ENABLE_BF16
-template<>
-inline __device__ __nv_bfloat162 hexp2(__nv_bfloat162 a)
-{
-    return bf16exp2(a);
-}
-#endif  // ENABLE_BF16
-
-template<typename T_OUT, typename T_IN>
-__device__ inline T_OUT cuda_cast(T_IN val)
-{
-    return val;
-}
-
-template<>
-__device__ inline float2 cuda_cast<float2, int2>(int2 val)
-{
-    return make_float2(val.x, val.y);
-}
-template<>
-__device__ inline float2 cuda_cast<float2, float>(float val)
-{
-    return make_float2(val, val);
-}
-template<>
-__device__ inline float2 cuda_cast<float2, half2>(half2 val)
-{
-    return __half22float2(val);
-}
-template<>
-__device__ inline half2 cuda_cast<half2, float2>(float2 val)
-{
-    return __float22half2_rn(val);
-}
-template<>
-__device__ inline half2 cuda_cast<half2, float>(float val)
-{
-    return __float2half2_rn(val);
-}
-template<>
-__device__ inline half2 cuda_cast<half2, half>(half val)
-{
-    return __half2half2(val);
-}
-
-template<>
-__device__ inline int8_t cuda_cast<int8_t, half>(half val)
-{
-    union {
-        int8_t  int8[2];
-        int16_t int16;
-    };
-    union {
-        half    fp16;
-        int16_t int16_in;
-    };
-    fp16 = val;
-    // asm volatile("cvt.rni.sat.s8.f16 %0, %1;" : "=h"(int16) : "h"(int16_in));
-    return int8[0];
-}
-
-template<>
-__device__ inline int16_t cuda_cast<int16_t, half2>(half2 val)
-{
-    union {
-        int8_t  int8[2];
-        int16_t int16;
-    };
-    // int8[0] = cuda_cast<int8_t>(val.x);
-    // int8[1] = cuda_cast<int8_t>(val.y);
-    int8[0] = cuda_cast<int8_t>((val.data[0]));
-    int8[1] = cuda_cast<int8_t>((val.data[1]));  
-    return int16;
-}
-
-template<>
-__device__ inline int8_t cuda_cast<int8_t, float>(float val)
-{
-    // union {
-    //     int8_t  int8[2];
-    //     int16_t int16;
-    // };
-    // asm volatile("cvt.rni.sat.s8.f32 %0, %1;" : "=h"(int16) : "f"(val));
-    // return int8[0];
-    int8_t dst;
-    if (val >= 128){
-        dst = 127;
-    }else if (val < -128){
-        dst = -128;
-    }else{
-        dst = static_cast<int8_t>(val);
-    }
-    return dst;
-}
-
-template<>
-__device__ inline int16_t cuda_cast<int16_t, float2>(float2 val)
-{
-    union {
-        int8_t  int8[2];
-        int16_t int16;
-    };
-    int8[0] = cuda_cast<int8_t>(val.x);
-    int8[1] = cuda_cast<int8_t>(val.y);
-    return int16;
-}
-
-template<>
-__device__ inline half2 cuda_cast<half2, int16_t>(int16_t val)
-{
-    union {
-        int8_t  int8[2];
-        int16_t int16;
-    };
-    int16 = val;
-    return make_half2(int8[0], int8[1]);
-}
-
-template<>
-__device__ inline float2 cuda_cast<float2, int16_t>(int16_t val)
-{
-    union {
-        int8_t  int8[2];
-        int16_t int16;
-    };
-    int16 = val;
-    return make_float2(int8[0], int8[1]);
-}
-
-#ifdef ENABLE_BF16
-template<>
-__device__ inline __nv_bfloat16 cuda_cast(int32_t val)
-{
-    return static_cast<float>(val);
-}
-template<>
-__device__ inline __nv_bfloat16 cuda_cast(int8_t val)
-{
-    return static_cast<float>(val);
-}
-template<>
-__device__ inline int8_t cuda_cast(__nv_bfloat16 val)
-{
-    return static_cast<float>(val);
-}
-
-template<>
-__device__ inline float cuda_cast<float, __nv_bfloat16>(__nv_bfloat16 val)
-{
-    return __bfloat162float(val);
-}
-
-template<>
-__device__ inline float2 cuda_cast<float2, __nv_bfloat162>(__nv_bfloat162 val)
-{
-    return bf1622float2(val);
-}
-
-template<>
-__device__ inline half cuda_cast<half, __nv_bfloat16>(__nv_bfloat16 val)
-{
-    return __float2half(__bfloat162float(val));
-}
-
-template<>
-__device__ inline int16_t cuda_cast<int16_t, __nv_bfloat162>(__nv_bfloat162 val)
-{
-    return bf1622int16(val);
-}
-
-template<>
-__device__ inline __nv_bfloat16 cuda_cast<__nv_bfloat16, float>(float val)
-{
-    return __float2bfloat16(val);
-}
-template<>
-__device__ inline __nv_bfloat16 cuda_cast<__nv_bfloat16, half>(half val)
-{
-    return __float2bfloat16(__half2float(val));
-}
-
-template<>
-__device__ inline __nv_bfloat162 cuda_cast<__nv_bfloat162, __nv_bfloat16>(__nv_bfloat16 val)
-{
-    return bf162bf162(val);
-}
-template<>
-__device__ inline __nv_bfloat162 cuda_cast<__nv_bfloat162, float>(float val)
-{
-    return __float2bfloat162_rn(val);
-}
-template<>
-__device__ inline __nv_bfloat162 cuda_cast<__nv_bfloat162, float2>(float2 val)
-{
-    return float22bf162(val);
-}
-template<>
-__device__ inline __nv_bfloat162 cuda_cast<__nv_bfloat162, int16_t>(int16_t val)
-{
-    union {
-        int8_t  int8[2];
-        int16_t int16;
-    };
-    int16 = val;
-    __nv_bfloat162 res;
-    res.x = cuda_cast<__nv_bfloat16>(int8[0]);
-    res.y = cuda_cast<__nv_bfloat16>(int8[1]);
-    return res;
-}
-
-template<>
-__device__ inline __nv_bfloat162 cuda_cast<__nv_bfloat162, half2>(half2 val)
-{
-    return float22bf162(__half22float2(val));
-}
-
-#endif  // ENABLE BF16
-
-template<typename T>
-__device__ inline T cuda_abs(T val);
-template<>
-__device__ inline float cuda_abs(float val)
-{
-    return fabs(val);
-}
-template<>
-__device__ inline half cuda_abs(half val)
-{
-    return __habs(val);
-}
-template<>
-__device__ inline half2 cuda_abs(half2 val)
-{
-    return __habs2(val);
-}
-
-#ifdef ENABLE_BF16
-
-#if __CUDA_ARCH__ >= 800 || !defined(__CUDA_ARCH__)
-template<>
-__device__ inline __nv_bfloat16 cuda_abs(__nv_bfloat16 val)
-{
-    return __habs(val);
-}
-template<>
-__device__ inline __nv_bfloat162 cuda_abs(__nv_bfloat162 val)
-{
-    return __habs2(val);
-}
-#else
-template<>
-__device__ inline __nv_bfloat16 cuda_abs(__nv_bfloat16 val)
-{
-    return fabs(val);
-}
-template<>
-__device__ inline __nv_bfloat162 cuda_abs(__nv_bfloat162 val)
-{
-    return make_bfloat162(fabs(val.x), fabs(val.y));
-}
-#endif
-
-#endif  // ENABLE_FP16
-
-// Unary maximum: compute the max of a vector type
-template<typename To, typename Ti>
-__device__ inline To cuda_max(Ti val)
-{
-    return cuda_cast<To>(val);
-};
-
-template<>
-__device__ inline half cuda_max(half2 val)
-{
-    // return (val.x > val.y) ? val.x : val.y;
-    return (val.data[0] > val.data[1]) ? val.data[0] : val.data[1];
-}
-#ifdef ENABLE_BF16
-template<>
-__device__ inline __nv_bfloat16 cuda_max(__nv_bfloat162 val)
-{
-    // return (val.x > val.y) ? val.x : val.y;
-    return (val.data[0] > val.data[1]) ? val.data[0] : val.data[1];
-}
-#endif
-
-// Binary maximum: compute the max of two scalar types
-template<typename T>
-__device__ inline T cuda_max(T val1, T val2)
-{
-    return (val1 > val2) ? val1 : val2;
-}
-
-#ifdef ENABLE_FP8
-template<>
-__device__ inline float2 cuda_cast<float2, __nv_fp8x2_e4m3>(__nv_fp8x2_e4m3 val)
-{
-    return bf1622float2(fp8x2_e4m3_to_bfloat2(&val));
-}
-template<>
-__device__ inline __nv_fp8x2_e4m3 cuda_cast<__nv_fp8x2_e4m3, float2>(float2 val)
-{
-    return __nv_fp8x2_e4m3(bf1622float2(float22bf162(val)));
-}
-
-template<>
-__device__ inline __nv_fp8_e4m3 cuda_cast<__nv_fp8_e4m3, half>(half val)
-{
-    return __nv_fp8_e4m3(val);
-}
-template<>
-__device__ inline __nv_fp8_e4m3 cuda_cast<__nv_fp8_e4m3, __nv_bfloat16>(__nv_bfloat16 val)
-{
-    return __nv_fp8_e4m3(val);
-}
-template<>
-__device__ inline __nv_fp8_e4m3 cuda_cast<__nv_fp8_e4m3, float>(float val)
-{
-    return __nv_fp8_e4m3(val);
-}
-template<>
-__device__ inline float cuda_cast<float, __nv_fp8_e4m3>(__nv_fp8_e4m3 val)
-{
-    return (float)val;
-}
-template<>
-__device__ inline __nv_bfloat162 cuda_cast<__nv_bfloat162, __nv_fp8x2_e4m3>(__nv_fp8x2_e4m3 val)
-{
-    return fp8x2_e4m3_to_bfloat2(&val);
-}
-
-template<>
-__device__ inline int8_t cuda_cast<int8_t, __nv_fp8_e4m3>(__nv_fp8_e4m3 val)
-{
-    // no impl
-    return 0;
-}
-
-template<>
-__device__ inline __nv_fp8_e4m3 cuda_cast<__nv_fp8_e4m3, int8_t>(int8_t val)
-{
-    return cuda_cast<__nv_fp8_e4m3>(cuda_cast<__nv_bfloat16>(cuda_cast<float>(val)));
-}
-
-#endif  // ENABLE_FP8
-
-}  // namespace turbomind
diff --git a/src/turbomind/utils/cuda_utils.cc b/src/turbomind/utils/cuda_utils.cc
deleted file mode 100644
index 45fa06a6d5f49b78d66559319288cc48d571fead..0000000000000000000000000000000000000000
--- a/src/turbomind/utils/cuda_utils.cc
+++ /dev/null
@@ -1,396 +0,0 @@
-/*
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "src/turbomind/utils/cuda_utils.h"
-#include "src/turbomind/macro.h"
-#include "src/turbomind/utils/cuda_fp8_utils.h"
-
-namespace turbomind {
-
-/* **************************** debug tools ********************************* */
-
-template<typename T>
-void print_to_file(const T* result, const int size, const char* file, cudaStream_t stream, std::ios::openmode open_mode)
-{
-    cudaDeviceSynchronize();
-    check_cuda_error(cudaGetLastError());
-    printf("[INFO] file: %s with size %d.\n", file, size);
-    std::ofstream outFile(file, open_mode);
-    if (outFile) {
-        T* tmp = new T[size];
-        check_cuda_error(cudaMemcpyAsync(tmp, result, sizeof(T) * size, cudaMemcpyDeviceToHost, stream));
-        for (int i = 0; i < size; ++i) {
-            float val = (float)(tmp[i]);
-            outFile << val << std::endl;
-        }
-        delete[] tmp;
-    }
-    else {
-        throw std::runtime_error(std::string("[TM][ERROR] Cannot open file: ") + file + "\n");
-    }
-    cudaDeviceSynchronize();
-    check_cuda_error(cudaGetLastError());
-}
-
-template void
-print_to_file(const float* result, const int size, const char* file, cudaStream_t stream, std::ios::openmode open_mode);
-template void
-print_to_file(const half* result, const int size, const char* file, cudaStream_t stream, std::ios::openmode open_mode);
-#ifdef ENABLE_BF16
-template void print_to_file(
-    const __nv_bfloat16* result, const int size, const char* file, cudaStream_t stream, std::ios::openmode open_mode);
-#endif
-
-template<typename T>
-void print_abs_mean(const T* buf, uint size, cudaStream_t stream, std::string name)
-{
-    if (buf == nullptr) {
-        TM_LOG_WARNING("It is an nullptr, skip!");
-        return;
-    }
-    cudaDeviceSynchronize();
-    check_cuda_error(cudaGetLastError());
-    T* h_tmp = new T[size];
-    cudaMemcpyAsync(h_tmp, buf, sizeof(T) * size, cudaMemcpyDeviceToHost, stream);
-    cudaDeviceSynchronize();
-    check_cuda_error(cudaGetLastError());
-    double   sum        = 0.0f;
-    uint64_t zero_count = 0;
-    float    max_val    = -1e10;
-    bool     find_inf   = false;
-    for (uint i = 0; i < size; i++) {
-        if (std::isinf((float)(h_tmp[i]))) {
-            find_inf = true;
-            continue;
-        }
-        sum += abs((double)h_tmp[i]);
-        if ((float)h_tmp[i] == 0.0f) {
-            zero_count++;
-        }
-        max_val = max_val > abs(float(h_tmp[i])) ? max_val : abs(float(h_tmp[i]));
-    }
-    printf("[TM][INFO] %20s size: %u, abs mean: %f, abs sum: %f, abs max: %f, find inf: %s",
-           name.c_str(),
-           size,
-           sum / size,
-           sum,
-           max_val,
-           find_inf ? "true" : "false");
-    std::cout << std::endl;
-    delete[] h_tmp;
-    cudaDeviceSynchronize();
-    check_cuda_error(cudaGetLastError());
-}
-
-template void print_abs_mean(const float* buf, uint size, cudaStream_t stream, std::string name);
-template void print_abs_mean(const half* buf, uint size, cudaStream_t stream, std::string name);
-#ifdef ENABLE_BF16
-template void print_abs_mean(const __nv_bfloat16* buf, uint size, cudaStream_t stream, std::string name);
-#endif
-template void print_abs_mean(const int* buf, uint size, cudaStream_t stream, std::string name);
-template void print_abs_mean(const uint* buf, uint size, cudaStream_t stream, std::string name);
-template void print_abs_mean(const int8_t* buf, uint size, cudaStream_t stream, std::string name);
-#ifdef ENABLE_FP8
-template void print_abs_mean(const __nv_fp8_e4m3* buf, uint size, cudaStream_t stream, std::string name);
-#endif
-
-template<typename T>
-void print_to_screen(const T* result, const int size)
-{
-    if (result == nullptr) {
-        TM_LOG_WARNING("It is an nullptr, skip! \n");
-        return;
-    }
-    T* tmp = reinterpret_cast<T*>(malloc(sizeof(T) * size));
-    check_cuda_error(cudaMemcpy(tmp, result, sizeof(T) * size, cudaMemcpyDeviceToHost));
-    for (int i = 0; i < size; ++i) {
-        printf("%d, %f\n", i, static_cast<float>(tmp[i]));
-    }
-    free(tmp);
-}
-
-template void print_to_screen(const float* result, const int size);
-template void print_to_screen(const half* result, const int size);
-#ifdef ENABLE_BF16
-template void print_to_screen(const __nv_bfloat16* result, const int size);
-#endif
-template void print_to_screen(const int* result, const int size);
-template void print_to_screen(const uint* result, const int size);
-template void print_to_screen(const bool* result, const int size);
-#ifdef ENABLE_FP8
-template void print_to_screen(const __nv_fp8_e4m3* result, const int size);
-#endif
-
-template<typename T>
-void printMatrix(T* ptr, int m, int k, int stride, bool is_device_ptr)
-{
-    T* tmp;
-    if (is_device_ptr) {
-        // k < stride ; stride = col-dimension.
-        tmp = reinterpret_cast<T*>(malloc(m * stride * sizeof(T)));
-        check_cuda_error(cudaMemcpy(tmp, ptr, sizeof(T) * m * stride, cudaMemcpyDeviceToHost));
-        cudaDeviceSynchronize();
-    }
-    else {
-        tmp = ptr;
-    }
-
-    for (int ii = -1; ii < m; ++ii) {
-        if (ii >= 0) {
-            printf("%02d ", ii);
-        }
-        else {
-            printf("   ");
-        }
-
-        for (int jj = 0; jj < k; jj += 1) {
-            if (ii >= 0) {
-                printf("%7.3f ", (float)tmp[ii * stride + jj]);
-            }
-            else {
-                printf("%7d ", jj);
-            }
-        }
-        printf("\n");
-    }
-    if (is_device_ptr) {
-        free(tmp);
-    }
-}
-
-template void printMatrix(float* ptr, int m, int k, int stride, bool is_device_ptr);
-template void printMatrix(half* ptr, int m, int k, int stride, bool is_device_ptr);
-#ifdef ENABLE_BF16
-template void printMatrix(__nv_bfloat16* ptr, int m, int k, int stride, bool is_device_ptr);
-#endif
-
-void printMatrix(unsigned long long* ptr, int m, int k, int stride, bool is_device_ptr)
-{
-    typedef unsigned long long T;
-    T*                         tmp;
-    if (is_device_ptr) {
-        // k < stride ; stride = col-dimension.
-        tmp = reinterpret_cast<T*>(malloc(m * stride * sizeof(T)));
-        check_cuda_error(cudaMemcpy(tmp, ptr, sizeof(T) * m * stride, cudaMemcpyDeviceToHost));
-        cudaDeviceSynchronize();
-    }
-    else {
-        tmp = ptr;
-    }
-
-    for (int ii = -1; ii < m; ++ii) {
-        if (ii >= 0) {
-            printf("%02d ", ii);
-        }
-        else {
-            printf("   ");
-        }
-
-        for (int jj = 0; jj < k; jj += 1) {
-            if (ii >= 0) {
-                printf("%4llu ", tmp[ii * stride + jj]);
-            }
-            else {
-                printf("%4d ", jj);
-            }
-        }
-        printf("\n");
-    }
-    if (is_device_ptr) {
-        free(tmp);
-    }
-}
-
-void printMatrix(int* ptr, int m, int k, int stride, bool is_device_ptr)
-{
-    typedef int T;
-    T*          tmp;
-    if (is_device_ptr) {
-        // k < stride ; stride = col-dimension.
-        tmp = reinterpret_cast<T*>(malloc(m * stride * sizeof(T)));
-        check_cuda_error(cudaMemcpy(tmp, ptr, sizeof(T) * m * stride, cudaMemcpyDeviceToHost));
-        cudaDeviceSynchronize();
-    }
-    else {
-        tmp = ptr;
-    }
-
-    for (int ii = -1; ii < m; ++ii) {
-        if (ii >= 0) {
-            printf("%02d ", ii);
-        }
-        else {
-            printf("   ");
-        }
-
-        for (int jj = 0; jj < k; jj += 1) {
-            if (ii >= 0) {
-                printf("%4d ", tmp[ii * stride + jj]);
-            }
-            else {
-                printf("%4d ", jj);
-            }
-        }
-        printf("\n");
-    }
-    if (is_device_ptr) {
-        free(tmp);
-    }
-}
-
-// multiple definitions for msvc
-#ifndef _MSC_VER
-void printMatrix(size_t* ptr, int m, int k, int stride, bool is_device_ptr)
-{
-    typedef size_t T;
-    T*             tmp;
-    if (is_device_ptr) {
-        // k < stride ; stride = col-dimension.
-        tmp = reinterpret_cast<T*>(malloc(m * stride * sizeof(T)));
-        check_cuda_error(cudaMemcpy(tmp, ptr, sizeof(T) * m * stride, cudaMemcpyDeviceToHost));
-        cudaDeviceSynchronize();
-    }
-    else {
-        tmp = ptr;
-    }
-
-    for (int ii = -1; ii < m; ++ii) {
-        if (ii >= 0) {
-            printf("%02d ", ii);
-        }
-        else {
-            printf("   ");
-        }
-
-        for (int jj = 0; jj < k; jj += 1) {
-            if (ii >= 0) {
-                printf("%4ld ", tmp[ii * stride + jj]);
-            }
-            else {
-                printf("%4d ", jj);
-            }
-        }
-        printf("\n");
-    }
-    if (is_device_ptr) {
-        free(tmp);
-    }
-}
-#endif
-
-template<typename T>
-void check_max_val(const T* result, const int size)
-{
-    T* tmp = new T[size];
-    cudaMemcpy(tmp, result, sizeof(T) * size, cudaMemcpyDeviceToHost);
-    float max_val = -100000;
-    for (int i = 0; i < size; i++) {
-        float val = static_cast<float>(tmp[i]);
-        if (val > max_val) {
-            max_val = val;
-        }
-    }
-    delete tmp;
-    printf("[INFO][CUDA] addr %p max val: %f \n", result, max_val);
-}
-
-template void check_max_val(const float* result, const int size);
-template void check_max_val(const half* result, const int size);
-#ifdef ENABLE_BF16
-template void check_max_val(const __nv_bfloat16* result, const int size);
-#endif
-
-template<typename T>
-void check_abs_mean_val(const T* result, const int size)
-{
-    T* tmp = new T[size];
-    cudaMemcpy(tmp, result, sizeof(T) * size, cudaMemcpyDeviceToHost);
-    float sum = 0.0f;
-    for (int i = 0; i < size; i++) {
-        sum += abs(static_cast<float>(tmp[i]));
-    }
-    delete tmp;
-    printf("[INFO][CUDA] addr %p abs mean val: %f \n", result, sum / size);
-}
-
-template void check_abs_mean_val(const float* result, const int size);
-template void check_abs_mean_val(const half* result, const int size);
-#ifdef ENABLE_BF16
-template void check_abs_mean_val(const __nv_bfloat16* result, const int size);
-#endif
-
-/* ***************************** common utils ****************************** */
-
-cudaError_t getSetDevice(int i_device, int* o_device)
-{
-    int         current_dev_id = 0;
-    cudaError_t err            = cudaSuccess;
-
-    if (o_device != NULL) {
-        err = cudaGetDevice(&current_dev_id);
-        if (err != cudaSuccess) {
-            return err;
-        }
-        if (current_dev_id == i_device) {
-            *o_device = i_device;
-        }
-        else {
-            err = cudaSetDevice(i_device);
-            if (err != cudaSuccess) {
-                return err;
-            }
-            *o_device = current_dev_id;
-        }
-    }
-    else {
-        err = cudaSetDevice(i_device);
-        if (err != cudaSuccess) {
-            return err;
-        }
-    }
-
-    return cudaSuccess;
-}
-
-FtCudaDataType getModelFileType(std::string ini_file, std::string section_name)
-{
-    FtCudaDataType model_file_type;
-    INIReader      reader = INIReader(ini_file);
-    if (reader.ParseError() < 0) {
-        TM_LOG_WARNING("Can't load %s. Use FP32 as default", ini_file.c_str());
-        model_file_type = FtCudaDataType::FP32;
-    }
-    else {
-        std::string weight_data_type_str = std::string(reader.Get(section_name, "weight_data_type"));
-        if (weight_data_type_str.find("fp32") != std::string::npos) {
-            model_file_type = FtCudaDataType::FP32;
-        }
-        else if (weight_data_type_str.find("fp16") != std::string::npos) {
-            model_file_type = FtCudaDataType::FP16;
-        }
-        else if (weight_data_type_str.find("bf16") != std::string::npos) {
-            model_file_type = FtCudaDataType::BF16;
-        }
-        else {
-            TM_LOG_WARNING("Invalid type %s. Use FP32 as default", weight_data_type_str.c_str());
-            model_file_type = FtCudaDataType::FP32;
-        }
-    }
-    return model_file_type;
-}
-
-/* ************************** end of common utils ************************** */
-}  // namespace turbomind
diff --git a/src/turbomind/utils/cuda_utils.h b/src/turbomind/utils/cuda_utils.h
deleted file mode 100644
index be0b85d69a5f558cbc583c5b7120b54ca6b7aa39..0000000000000000000000000000000000000000
--- a/src/turbomind/utils/cuda_utils.h
+++ /dev/null
@@ -1,487 +0,0 @@
-/*
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "3rdparty/INIReader.h"
-#include "src/turbomind/macro.h"
-#include "src/turbomind/utils/cuda_bf16_wrapper.h"
-#include "src/turbomind/utils/logger.h"
-
-#include <cublasLt.h>
-#include <cublas_v2.h>
-#include <cuda_runtime.h>
-#include <fstream>
-#include <iostream>
-#include <string>
-#include <vector>
-#ifdef SPARSITY_ENABLED
-#include <cusparseLt.h>
-#endif
-
-namespace turbomind {
-
-#define MAX_CONFIG_NUM 20
-#define COL32_ 32
-// workspace for cublas gemm : 32MB
-#define CUBLAS_WORKSPACE_SIZE 33554432
-
-typedef struct __align__(4)
-{
-    half x, y, z, w;
-}
-half4;
-
-/* **************************** type definition ***************************** */
-
-enum CublasDataType
-{
-    FLOAT_DATATYPE    = 0,
-    HALF_DATATYPE     = 1,
-    BFLOAT16_DATATYPE = 2,
-    INT8_DATATYPE     = 3,
-    FP8_DATATYPE      = 4
-};
-
-enum FtCudaDataType
-{
-    FP32 = 0,
-    FP16 = 1,
-    BF16 = 2,
-    INT8 = 3,
-    FP8  = 4
-};
-
-enum class OperationType
-{
-    FP32,
-    FP16,
-    BF16,
-    INT8,
-    FP8
-};
-
-/* **************************** debug tools ********************************* */
-static const char* _cudaGetErrorEnum(cudaError_t error)
-{
-    return cudaGetErrorString(error);
-}
-
-static const char* _cudaGetErrorEnum(cublasStatus_t error)
-{
-    switch (error) {
-        case CUBLAS_STATUS_SUCCESS:
-            return "CUBLAS_STATUS_SUCCESS";
-
-        case CUBLAS_STATUS_NOT_INITIALIZED:
-            return "CUBLAS_STATUS_NOT_INITIALIZED";
-
-        case CUBLAS_STATUS_ALLOC_FAILED:
-            return "CUBLAS_STATUS_ALLOC_FAILED";
-
-        case CUBLAS_STATUS_INVALID_VALUE:
-            return "CUBLAS_STATUS_INVALID_VALUE";
-
-        case CUBLAS_STATUS_ARCH_MISMATCH:
-            return "CUBLAS_STATUS_ARCH_MISMATCH";
-
-        case CUBLAS_STATUS_MAPPING_ERROR:
-            return "CUBLAS_STATUS_MAPPING_ERROR";
-
-        case CUBLAS_STATUS_EXECUTION_FAILED:
-            return "CUBLAS_STATUS_EXECUTION_FAILED";
-
-        case CUBLAS_STATUS_INTERNAL_ERROR:
-            return "CUBLAS_STATUS_INTERNAL_ERROR";
-
-        case CUBLAS_STATUS_NOT_SUPPORTED:
-            return "CUBLAS_STATUS_NOT_SUPPORTED";
-
-        case CUBLAS_STATUS_LICENSE_ERROR:
-            return "CUBLAS_STATUS_LICENSE_ERROR";
-    }
-    return "<unknown>";
-}
-
-template<typename T>
-void check(T result, char const* const func, const char* const file, int const line)
-{
-    if (result) {
-        throw std::runtime_error(std::string("[TM][ERROR] CUDA runtime error: ") + (_cudaGetErrorEnum(result)) + " "
-                                 + file + ":" + std::to_string(line) + " \n");
-    }
-}
-
-#define check_cuda_error(val) check((val), #val, __FILE__, __LINE__)
-#define check_cuda_error_2(val, file, line) check((val), #val, file, line)
-
-inline void syncAndCheck(const char* const file, int const line)
-{
-    // When FT_DEBUG_LEVEL=DEBUG, must check error
-    static char* level_name = std::getenv("FT_DEBUG_LEVEL");
-    if (level_name != nullptr) {
-        static std::string level = std::string(level_name);
-        if (level == "DEBUG") {
-            cudaDeviceSynchronize();
-            cudaError_t result = cudaGetLastError();
-            if (result) {
-                throw std::runtime_error(std::string("[TM][ERROR] CUDA runtime error: ") + (_cudaGetErrorEnum(result))
-                                         + " " + file + ":" + std::to_string(line) + " \n");
-            }
-            TM_LOG_DEBUG(fmtstr("run syncAndCheck at %s:%d", file, line));
-        }
-    }
-
-#ifndef NDEBUG
-    cudaDeviceSynchronize();
-    cudaError_t result = cudaGetLastError();
-    if (result) {
-        throw std::runtime_error(std::string("[TM][ERROR] CUDA runtime error: ") + (_cudaGetErrorEnum(result)) + " "
-                                 + file + ":" + std::to_string(line) + " \n");
-    }
-#endif
-}
-
-#define sync_check_cuda_error() syncAndCheck(__FILE__, __LINE__)
-
-#define checkCUDNN(expression)                                                                                         \
-    {                                                                                                                  \
-        cudnnStatus_t status = (expression);                                                                           \
-        if (status != CUDNN_STATUS_SUCCESS) {                                                                          \
-            std::cerr << "Error on file " << __FILE__ << " line " << __LINE__ << ": " << cudnnGetErrorString(status)   \
-                      << std::endl;                                                                                    \
-            std::exit(EXIT_FAILURE);                                                                                   \
-        }                                                                                                              \
-    }
-
-template<typename T>
-void print_to_file(const T*           result,
-                   const int          size,
-                   const char*        file,
-                   cudaStream_t       stream    = 0,
-                   std::ios::openmode open_mode = std::ios::out);
-
-template<typename T>
-void print_abs_mean(const T* buf, uint size, cudaStream_t stream, std::string name = "");
-
-template<typename T>
-void print_to_screen(const T* result, const int size);
-
-template<typename T>
-void printMatrix(T* ptr, int m, int k, int stride, bool is_device_ptr);
-
-void printMatrix(unsigned long long* ptr, int m, int k, int stride, bool is_device_ptr);
-void printMatrix(int* ptr, int m, int k, int stride, bool is_device_ptr);
-void printMatrix(size_t* ptr, int m, int k, int stride, bool is_device_ptr);
-
-template<typename T>
-void check_max_val(const T* result, const int size);
-
-template<typename T>
-void check_abs_mean_val(const T* result, const int size);
-
-#define PRINT_FUNC_NAME_()                                                                                             \
-    do {                                                                                                               \
-        std::cout << "[TM][CALL] " << __FUNCTION__ << " " << std::endl;                                                \
-    } while (0)
-
-[[noreturn]] inline void throwRuntimeError(const char* const file, int const line, std::string const& info = "")
-{
-    throw std::runtime_error(std::string("[TM][ERROR] ") + info + " Assertion fail: " + file + ":"
-                             + std::to_string(line) + " \n");
-}
-
-inline void myAssert(bool result, const char* const file, int const line, std::string const& info = "")
-{
-    if (!result) {
-        throwRuntimeError(file, line, info);
-    }
-}
-
-#define FT_CHECK(val) myAssert(val, __FILE__, __LINE__)
-#define FT_CHECK_WITH_INFO(val, info)                                                                                  \
-    do {                                                                                                               \
-        bool is_valid_val = (val);                                                                                     \
-        if (!is_valid_val) {                                                                                           \
-            turbomind::myAssert(is_valid_val, __FILE__, __LINE__, (info));                                             \
-        }                                                                                                              \
-    } while (0)
-
-#define FT_THROW(info) throwRuntimeError(__FILE__, __LINE__, info)
-
-#ifdef SPARSITY_ENABLED
-#define CHECK_CUSPARSE(func)                                                                                           \
-    {                                                                                                                  \
-        cusparseStatus_t status = (func);                                                                              \
-        if (status != CUSPARSE_STATUS_SUCCESS) {                                                                       \
-            throw std::runtime_error(std::string("[TM][ERROR] CUSPARSE API failed at line ")                           \
-                                     + std::to_string(__LINE__) + " in file " + __FILE__ + ": "                        \
-                                     + cusparseGetErrorString(status) + " " + std::to_string(status));                 \
-        }                                                                                                              \
-    }
-#endif
-
-/*************Time Handling**************/
-class CudaTimer {
-private:
-    cudaEvent_t  event_start_;
-    cudaEvent_t  event_stop_;
-    cudaStream_t stream_;
-
-public:
-    explicit CudaTimer(cudaStream_t stream = 0)
-    {
-        stream_ = stream;
-    }
-    void start()
-    {
-        check_cuda_error(cudaEventCreate(&event_start_));
-        check_cuda_error(cudaEventCreate(&event_stop_));
-        check_cuda_error(cudaEventRecord(event_start_, stream_));
-    }
-    float stop()
-    {
-        float time;
-        check_cuda_error(cudaEventRecord(event_stop_, stream_));
-        check_cuda_error(cudaEventSynchronize(event_stop_));
-        check_cuda_error(cudaEventElapsedTime(&time, event_start_, event_stop_));
-        check_cuda_error(cudaEventDestroy(event_start_));
-        check_cuda_error(cudaEventDestroy(event_stop_));
-        return time;
-    }
-    ~CudaTimer() {}
-};
-
-/* ***************************** common utils ****************************** */
-
-inline void print_mem_usage(std::string time = "after allocation")
-{
-    size_t free_bytes, total_bytes;
-    check_cuda_error(cudaMemGetInfo(&free_bytes, &total_bytes));
-    float free  = static_cast<float>(free_bytes) / 1024.0 / 1024.0 / 1024.0;
-    float total = static_cast<float>(total_bytes) / 1024.0 / 1024.0 / 1024.0;
-    float used  = total - free;
-    printf("%-20s: free: %5.2f GB, total: %5.2f GB, used: %5.2f GB\n", time.c_str(), free, total, used);
-}
-
-inline int getSMVersion()
-{
-    int device{-1};
-    check_cuda_error(cudaGetDevice(&device));
-    int sm_major = 0;
-    int sm_minor = 0;
-    check_cuda_error(cudaDeviceGetAttribute(&sm_major, cudaDevAttrComputeCapabilityMajor, device));
-    check_cuda_error(cudaDeviceGetAttribute(&sm_minor, cudaDevAttrComputeCapabilityMinor, device));
-    return sm_major * 10 + sm_minor;
-}
-
-inline int getMaxSharedMemoryPerBlock()
-{
-    int device{-1};
-    check_cuda_error(cudaGetDevice(&device));
-    int max_shared_memory_size = 0;
-    check_cuda_error(cudaDeviceGetAttribute(&max_shared_memory_size, cudaDevAttrMaxSharedMemoryPerBlock, device));
-    return max_shared_memory_size;
-}
-
-inline std::string getDeviceName()
-{
-    int device{-1};
-    check_cuda_error(cudaGetDevice(&device));
-    cudaDeviceProp props;
-    check_cuda_error(cudaGetDeviceProperties(&props, device));
-    return std::string(props.name);
-}
-
-inline int div_up(int a, int n)
-{
-    return (a + n - 1) / n;
-}
-
-cudaError_t getSetDevice(int i_device, int* o_device = NULL);
-
-inline int getDevice()
-{
-    int current_dev_id = 0;
-    check_cuda_error(cudaGetDevice(&current_dev_id));
-    return current_dev_id;
-}
-
-inline int getDeviceCount()
-{
-    int count = 0;
-    check_cuda_error(cudaGetDeviceCount(&count));
-    return count;
-}
-
-template<typename T>
-CublasDataType getCublasDataType()
-{
-    if (std::is_same<T, half>::value) {
-        return HALF_DATATYPE;
-    }
-#ifdef ENABLE_BF16
-    else if (std::is_same<T, __nv_bfloat16>::value) {
-        return BFLOAT16_DATATYPE;
-    }
-#endif
-    else if (std::is_same<T, float>::value) {
-        return FLOAT_DATATYPE;
-    }
-    else {
-        FT_CHECK(false);
-        return FLOAT_DATATYPE;
-    }
-}
-
-template<typename T>
-cudaDataType_t getCudaDataType()
-{
-    if (std::is_same<T, half>::value) {
-        return CUDA_R_16F;
-    }
-#ifdef ENABLE_BF16
-    else if (std::is_same<T, __nv_bfloat16>::value) {
-        return CUDA_R_16BF;
-    }
-#endif
-    else if (std::is_same<T, float>::value) {
-        return CUDA_R_32F;
-    }
-    else {
-        FT_CHECK(false);
-        return CUDA_R_32F;
-    }
-}
-
-template<CublasDataType T>
-struct getTypeFromCudaDataType {
-    using Type = float;
-};
-
-template<>
-struct getTypeFromCudaDataType<HALF_DATATYPE> {
-    using Type = half;
-};
-
-#ifdef ENABLE_BF16
-template<>
-struct getTypeFromCudaDataType<BFLOAT16_DATATYPE> {
-    using Type = __nv_bfloat16;
-};
-#endif
-
-FtCudaDataType getModelFileType(std::string ini_file, std::string section_name);
-
-// clang-format off
-template<typename T> struct packed_type;
-template <>          struct packed_type<float>         { using type = float; }; // we don't need to pack float by default
-template <>          struct packed_type<half>          { using type = half2; };
-
-#ifdef ENABLE_BF16
-template<>
-struct packed_type<__nv_bfloat16> {
-    using type = __nv_bfloat162;
-};
-#endif
-
-template<typename T> struct num_elems;
-template <>          struct num_elems<float>           { static constexpr int value = 1; };
-template <>          struct num_elems<float2>          { static constexpr int value = 2; };
-template <>          struct num_elems<float4>          { static constexpr int value = 4; };
-template <>          struct num_elems<half>            { static constexpr int value = 1; };
-template <>          struct num_elems<half2>           { static constexpr int value = 2; };
-#ifdef ENABLE_BF16
-template <>          struct num_elems<__nv_bfloat16>   { static constexpr int value = 1; };
-template <>          struct num_elems<__nv_bfloat162>  { static constexpr int value = 2; };
-#endif
-
-template<typename T, int num> struct packed_as;
-template<typename T>          struct packed_as<T, 1>              { using type = T; };
-template<>                    struct packed_as<half,  2>          { using type = half2; };
-template<>                    struct packed_as<float,  2>         { using type = float2; };
-template<>                    struct packed_as<int8_t, 2>         { using type = int16_t; };
-template<>                    struct packed_as<int32_t, 2>        { using type = int2; };
-template<>                    struct packed_as<half2, 1>          { using type = half; };
-#ifdef ENABLE_BF16
-template<> struct packed_as<__nv_bfloat16,  2> { using type = __nv_bfloat162; };
-template<> struct packed_as<__nv_bfloat162, 1> { using type = __nv_bfloat16;  };
-#endif
-
-inline __device__ float2 operator*(float2 a, float2 b) { return make_float2(a.x * b.x, a.y * b.y); }
-inline __device__ float2 operator*(float2 a, float  b) { return make_float2(a.x * b, a.y * b); }
-// clang-format on
-
-template<typename T1, typename T2>
-void compareTwoTensor(
-    const T1* pred, const T2* ref, const int size, const int print_size = 0, const std::string filename = "")
-{
-    T1* h_pred = new T1[size];
-    T2* h_ref  = new T2[size];
-    check_cuda_error(cudaMemcpy(h_pred, pred, size * sizeof(T1), cudaMemcpyDeviceToHost));
-    check_cuda_error(cudaMemcpy(h_ref, ref, size * sizeof(T2), cudaMemcpyDeviceToHost));
-
-    FILE* fd = nullptr;
-    if (filename != "") {
-        fd = fopen(filename.c_str(), "w");
-        fprintf(fd, "| %10s | %10s | %10s | %10s | \n", "pred", "ref", "abs_diff", "rel_diff(%)");
-    }
-
-    if (print_size > 0) {
-        TM_LOG_INFO("  id |   pred  |   ref   |abs diff | rel diff (%) |");
-    }
-    float mean_abs_diff = 0.0f;
-    float mean_rel_diff = 0.0f;
-    int   count         = 0;
-    for (int i = 0; i < size; i++) {
-        if (i < print_size) {
-            TM_LOG_INFO("%4d | % 6.4f | % 6.4f | % 6.4f | % 7.4f |",
-                        i,
-                        (float)h_pred[i],
-                        (float)h_ref[i],
-                        abs((float)h_pred[i] - (float)h_ref[i]),
-                        abs((float)h_pred[i] - (float)h_ref[i]) / (abs((float)h_ref[i]) + 1e-6f) * 100.f);
-        }
-        if ((float)h_pred[i] == 0) {
-            continue;
-        }
-        count += 1;
-        mean_abs_diff += abs((float)h_pred[i] - (float)h_ref[i]);
-        mean_rel_diff += abs((float)h_pred[i] - (float)h_ref[i]) / (abs((float)h_ref[i]) + 1e-6f) * 100.f;
-
-        if (fd != nullptr) {
-            fprintf(fd,
-                    "| %10.5f | %10.5f | %10.5f | %11.5f |\n",
-                    (float)h_pred[i],
-                    (float)h_ref[i],
-                    abs((float)h_pred[i] - (float)h_ref[i]),
-                    abs((float)h_pred[i] - (float)h_ref[i]) / (abs((float)h_ref[i]) + 1e-6f) * 100.f);
-        }
-    }
-    mean_abs_diff = mean_abs_diff / (float)count;
-    mean_rel_diff = mean_rel_diff / (float)count;
-    TM_LOG_INFO("mean_abs_diff: % 6.4f, mean_rel_diff: % 6.4f (%%)", mean_abs_diff, mean_rel_diff);
-
-    if (fd != nullptr) {
-        fprintf(fd, "mean_abs_diff: % 6.4f, mean_rel_diff: % 6.4f (%%)", mean_abs_diff, mean_rel_diff);
-        fclose(fd);
-    }
-    delete[] h_pred;
-    delete[] h_ref;
-}
-
-/* ************************** end of common utils ************************** */
-}  // namespace turbomind
diff --git a/src/turbomind/utils/custom_ar_comm.cc b/src/turbomind/utils/custom_ar_comm.cc
deleted file mode 100644
index cc7fce455e85516ddb80c010e9998f6f7f731ce1..0000000000000000000000000000000000000000
--- a/src/turbomind/utils/custom_ar_comm.cc
+++ /dev/null
@@ -1,189 +0,0 @@
-/*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "custom_ar_comm.h"
-
-namespace turbomind {
-
-template<typename T>
-CustomAllReduceComm<T>::CustomAllReduceComm(size_t rank_size, size_t rank): rank_size_(rank_size), rank_(rank)
-{
-    param_.barrier_flag = 0;
-    // NOTE: assume All Reduce happens within the node (DGX A100)
-    param_.rank       = rank_;
-    param_.local_rank = rank_;
-    param_.node_id    = 0;
-}
-
-template<typename T>
-CustomAllReduceComm<T>::~CustomAllReduceComm()
-{
-    cudaPointerAttributes comm_buffer_attributes, barrier_attributes;
-    check_cuda_error(cudaPointerGetAttributes(&comm_buffer_attributes, param_.peer_comm_buffer_ptrs[rank_]));
-    check_cuda_error(cudaPointerGetAttributes(&barrier_attributes, param_.peer_barrier_ptrs[rank_]));
-    if (comm_buffer_attributes.type == 2) {
-        check_cuda_error(cudaFree(param_.peer_comm_buffer_ptrs[rank_]));
-    }
-    if (barrier_attributes.type == 2) {
-        check_cuda_error(cudaFree(param_.peer_barrier_ptrs[rank_]));
-    }
-}
-
-template<typename T>
-void CustomAllReduceComm<T>::customAllReduce(size_t elts, cudaStream_t stream)
-{
-    param_.elts_total   = elts;
-    param_.barrier_flag = FLAG(param_.barrier_flag + 1);
-
-    invokeOneOrTwoShotAllReduceKernel<T>(param_, stream);
-
-    // swap back
-    output_tensor_->at(0).data = (void*)tmp_tensor_data_;
-}
-
-template<typename T>
-void CustomAllReduceComm<T>::allocateAndExchangePeerAccessPointer(
-    std::vector<std::shared_ptr<AbstractCustomComm>>* custom_all_reduce_comms)
-{
-    assert(custom_all_reduce_comms->size() == rank_size_);
-    assert(rank_ == 0);
-    // Enable Peer to Peer Access
-    enableP2P(rank_size_);
-    for (size_t i = 0; i < rank_size_; i++) {
-        check_cuda_error(cudaSetDevice(i));
-        check_cuda_error(cudaMalloc(&(param_.peer_comm_buffer_ptrs[i]), CUSTOM_AR_SIZE_THRESHOLD));
-        check_cuda_error(
-            cudaMalloc(&(param_.peer_barrier_ptrs[i]), rank_size_ * (MAX_ALL_REDUCE_BLOCKS + 1) * sizeof(uint32_t)));
-        check_cuda_error(
-            cudaMemset(param_.peer_barrier_ptrs[i], 0, rank_size_ * (MAX_ALL_REDUCE_BLOCKS + 1) * sizeof(uint32_t)));
-        T*        current_peer_comm_buffer_ptr = param_.peer_comm_buffer_ptrs[i];
-        uint32_t* current_peer_barrier_ptr     = param_.peer_barrier_ptrs[i];
-        // Assume current comm allocates device memory on all ranks (rank_ == 0)
-        for (size_t j = 1; j < rank_size_; j++) {
-            static_cast<CustomAllReduceComm<T>*>(custom_all_reduce_comms->at(j).get())
-                ->param_.peer_comm_buffer_ptrs[i] = current_peer_comm_buffer_ptr;
-            static_cast<CustomAllReduceComm<T>*>(custom_all_reduce_comms->at(j).get())->param_.peer_barrier_ptrs[i] =
-                current_peer_barrier_ptr;
-        }
-    }
-
-    // Set default local_output_buffer_ptr to local peer_comm_buffer_ptrs
-    for (size_t i = 0; i < rank_size_; i++) {
-        static_cast<CustomAllReduceComm<T>*>(custom_all_reduce_comms->at(i).get())->param_.local_output_buffer_ptr =
-            static_cast<CustomAllReduceComm<T>*>(custom_all_reduce_comms->at(i).get())->param_.peer_comm_buffer_ptrs[i];
-    }
-}
-
-template<typename T>
-void CustomAllReduceComm<T>::enableP2P(int ngpus)
-{
-    int peer_access_available = 0;
-    for (int i = 0; i < ngpus; i++) {
-        cudaSetDevice(i);
-        for (int j = 0; j < ngpus; j++) {
-            if (i == j) {
-                continue;
-            }
-            cudaDeviceCanAccessPeer(&peer_access_available, i, j);
-            // Custom AR Kernels need DGX A100 NVSWITCH connections
-            assert(peer_access_available);
-            cudaDeviceEnablePeerAccess(j, 0);
-        }
-    }
-}
-
-template<typename T>
-bool CustomAllReduceComm<T>::swapInternalBuffer(std::vector<Tensor>* tensor_buffer, size_t elts)
-{
-    // Check if all reduce elts meet the requirement of custom kernels
-    // If meet, then swap the local comm buffer ptr with output tensor data pointer (avoid additional
-    // memory movement)
-    if (rank_size_ > 1 && elts * sizeof(T) <= CUSTOM_AR_SIZE_THRESHOLD) {
-        tmp_tensor_data_               = (T*)(tensor_buffer->at(0).data);
-        output_tensor_                 = tensor_buffer;
-        tensor_buffer->at(0).data      = (void*)param_.peer_comm_buffer_ptrs[rank_];
-        param_.local_output_buffer_ptr = tmp_tensor_data_;
-        return true;
-    }
-    return false;
-}
-
-template<typename T>
-void initCustomAllReduceComm(std::vector<std::shared_ptr<AbstractCustomComm>>* custom_all_reduce_comms,
-                             int                                               enable_custom_all_reduce,
-                             size_t                                            rank_size)
-{
-    if (enable_custom_all_reduce == 0) {
-        // don't use custom all reduce kernels, fall back to NCCL
-        for (size_t i = 0; i < rank_size; i++) {
-            custom_all_reduce_comms->push_back(nullptr);
-        }
-        return;
-    }
-
-    if (rank_size != RANKS_PER_NODE) {
-#ifdef BUILD_MULTI_GPU
-        if (rank_size > 1) {
-            TM_LOG_WARNING("Custom All Reduce only supports 8 Ranks currently. Using NCCL as Comm.");
-        }
-#else
-        FT_CHECK_WITH_INFO(rank_size == 1,
-                           fmtstr("Custom All Reduce only supports 8 Ranks currently, got rank_size %ld. FT needs "
-                                  "the NCCL library to communicate among devices but has built without NCCL. "
-                                  "Please use the flag -DBUILD_MULTI_GPU=ON when compiling.",
-                                  rank_size));
-#endif
-        for (size_t i = 0; i < rank_size; i++) {
-            custom_all_reduce_comms->push_back(nullptr);
-        }
-        return;
-    }
-
-#if defined(CUDART_VERSION) && CUDART_VERSION >= 11020
-    for (size_t i = 0; i < rank_size; i++) {
-        custom_all_reduce_comms->push_back(std::make_shared<CustomAllReduceComm<T>>(rank_size, i));
-    }
-    custom_all_reduce_comms->at(0)->allocateAndExchangePeerAccessPointer(custom_all_reduce_comms);
-#else
-    TM_LOG_WARNING("Custom All Reduce is not supported before CUDA 11.2. Using NCCL as Comm.");
-    for (size_t i = 0; i < rank_size; i++) {
-        custom_all_reduce_comms->push_back(nullptr);
-    }
-#endif
-}
-
-// Template instantiation
-template class CustomAllReduceComm<uint16_t>;
-#ifdef ENABLE_BF16
-template class CustomAllReduceComm<__nv_bfloat16>;
-#endif
-template class CustomAllReduceComm<uint32_t>;
-template void
-initCustomAllReduceComm<uint16_t>(std::vector<std::shared_ptr<AbstractCustomComm>>* custom_all_reduce_comms,
-                                  int                                               enable_custom_all_reduce,
-                                  size_t                                            rank_size);
-#ifdef ENABLE_BF16
-template void
-initCustomAllReduceComm<__nv_bfloat16>(std::vector<std::shared_ptr<AbstractCustomComm>>* custom_all_reduce_comms,
-                                       int                                               enable_custom_all_reduce,
-                                       size_t                                            rank_size);
-#endif
-template void
-initCustomAllReduceComm<uint32_t>(std::vector<std::shared_ptr<AbstractCustomComm>>* custom_all_reduce_comms,
-                                  int                                               enable_custom_all_reduce,
-                                  size_t                                            rank_size);
-
-}  // namespace turbomind
diff --git a/src/turbomind/utils/custom_ar_comm.h b/src/turbomind/utils/custom_ar_comm.h
deleted file mode 100644
index adc3ede0488e3146c5e9c239a9a51f6d46094490..0000000000000000000000000000000000000000
--- a/src/turbomind/utils/custom_ar_comm.h
+++ /dev/null
@@ -1,87 +0,0 @@
-/*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include <memory>
-#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-
-#include "src/turbomind/kernels/custom_ar_kernels.h"
-#include "src/turbomind/utils/Tensor.h"
-#include "src/turbomind/utils/cuda_utils.h"
-#include "src/turbomind/utils/logger.h"
-
-namespace turbomind {
-
-class AbstractCustomComm {
-public:
-    AbstractCustomComm()                                                             = default;
-    virtual ~AbstractCustomComm()                                                    = default;
-    virtual void customAllReduce(size_t elts, cudaStream_t stream)                   = 0;
-    virtual void enableP2P(int ngpus)                                                = 0;
-    virtual bool swapInternalBuffer(std::vector<Tensor>* tensor_buffer, size_t elts) = 0;
-    virtual void
-    allocateAndExchangePeerAccessPointer(std::vector<std::shared_ptr<AbstractCustomComm>>* custom_all_reduce_comms) = 0;
-};
-
-template<typename T>
-class CustomAllReduceComm: public AbstractCustomComm {
-public:
-    CustomAllReduceComm(size_t rank_size, size_t rank);
-    ~CustomAllReduceComm();
-
-    void customAllReduce(size_t elts, cudaStream_t stream);
-
-    void allocateAndExchangePeerAccessPointer(
-        std::vector<std::shared_ptr<AbstractCustomComm>>* custom_all_reduce_comms) override;
-
-    bool swapInternalBuffer(std::vector<Tensor>* tensor_buffer, size_t elts) override;
-
-    void enableP2P(int ngpus) override;
-
-private:
-    AllReduceParams<T>   param_;
-    std::vector<Tensor>* output_tensor_;
-    T*                   tmp_tensor_data_;
-    size_t               rank_size_;
-    size_t               rank_;
-};
-
-template<typename T>
-void initCustomAllReduceComm(std::vector<std::shared_ptr<AbstractCustomComm>>* custom_all_reduce_comms,
-                             int                                               enable_custom_all_reduce,
-                             size_t                                            rank_size);
-
-template<typename T>
-struct CustomARCommTypeConverter {
-    using Type = uint32_t;
-};
-
-template<>
-struct CustomARCommTypeConverter<half> {
-    using Type = uint16_t;
-};
-
-#ifdef ENABLE_BF16
-template<>
-struct CustomARCommTypeConverter<__nv_bfloat16> {
-    using Type = __nv_bfloat16;
-};
-#endif
-
-}  // namespace turbomind
diff --git a/src/turbomind/utils/gemm.cc b/src/turbomind/utils/gemm.cc
deleted file mode 100644
index 808978d8204013db5a9d37dd348b19e42ab2752f..0000000000000000000000000000000000000000
--- a/src/turbomind/utils/gemm.cc
+++ /dev/null
@@ -1,1186 +0,0 @@
-/*
- * Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "src/turbomind/utils/gemm.h"
-
-namespace turbomind {
-
-/* ***************************** GEMM Impl ******************************** */
-
-Gemm::Gemm(IAllocator* allocator, cudaStream_t stream, std::string config_file)
-{
-    allocator_ = allocator;
-    stream_    = stream;
-    mutex_     = new std::mutex();  // mutex per process
-    check_cuda_error(cublasCreate(&cublas_handle_));
-    // check_cuda_error(cublasLtCreate(&cublaslt_handle_));
-    check_cuda_error(cublasSetStream(cublas_handle_, stream));
-
-    if (allocator_ != nullptr) {
-        workspace_ = allocator_->reMalloc(workspace_, WORKSPACE_SIZE);
-    }
-    loadGemmConfig(config_file);
-}
-
-Gemm::~Gemm()
-{
-    if (allocator_ != nullptr) {
-        allocator_->free((void**)(&workspace_));
-        allocator_ = nullptr;
-    }
-    // cublasLtDestroy(cublaslt_handle_);
-    cublasDestroy(cublas_handle_);
-    delete cublas_algo_map_;
-    delete mutex_;
-}
-
-std::string Gemm::toString()
-{
-    const char* a_type_str       = a_type_ == TYPE_FP16 ? "FP16" : "FP32";
-    const char* b_type_str       = b_type_ == TYPE_FP16 ? "FP16" : "FP32";
-    const char* c_type_str       = c_type_ == TYPE_FP16 ? "FP16" : "FP32";
-    const char* compute_type_str = compute_type_ == TYPE_FP16 ? "FP16" : "FP32";
-    return fmtstr(
-        "Gemm[a_type=%s, b_type=%s, c_type=%s, compute_type=%s]", a_type_str, b_type_str, c_type_str, compute_type_str);
-}
-
-void Gemm::setAllocator(IAllocator* allocator)
-{
-    if (allocator_ != nullptr && workspace_ != nullptr) {
-        allocator_->free((void**)(&workspace_));
-    }
-    allocator_ = allocator;
-    if (allocator_ != nullptr) {
-        workspace_ = allocator_->reMalloc(workspace_, WORKSPACE_SIZE);
-    }
-}
-
-void Gemm::setCudaStream(cudaStream_t& stream)
-{
-    stream_ = stream;
-    cublasSetStream(cublas_handle_, stream);
-}
-
-void Gemm::setComputeType(DataType compute_type)
-{
-    checkDataTypeValidity(compute_type);
-    compute_type_ = compute_type;
-}
-
-void Gemm::setTypes(DataType a_type, DataType b_type, DataType c_type, DataType compute_type)
-{
-    checkDataTypeValidity(a_type);
-    checkDataTypeValidity(b_type);
-    checkDataTypeValidity(c_type);
-    a_type_ = a_type;
-    b_type_ = b_type;
-    c_type_ = c_type;
-    setComputeType(compute_type);
-}
-
-template<typename T>
-void Gemm::setDefaultTypes()
-{
-    if (std::is_same<T, float>::value) {
-        setTypes(TYPE_FP32, TYPE_FP32, TYPE_FP32, TYPE_FP32);
-    }
-    else if (std::is_same<T, half>::value) {
-        setTypes(TYPE_FP16, TYPE_FP16, TYPE_FP16, TYPE_FP16);
-    }
-    else {
-        throw GemmNotSupportedException("Gemm supports float or half type.");
-    }
-}
-
-void Gemm::loadGemmConfig(std::string config_file)
-{
-    if (cublas_algo_map_ != nullptr) {
-        delete cublas_algo_map_;  // unload the previous cublas map.
-    }
-    cublas_algo_map_ = new cublasAlgoMap(config_file);
-}
-
-void Gemm::gemm(const GemmOp              transa,
-                const GemmOp              transb,
-                const size_t              m,
-                const size_t              n,
-                const size_t              k,
-                const void*               input,
-                const DenseWeight<float>& weight,
-                void*                     output,
-                const float               alpha,
-                const float               beta)
-{
-    gemm(transa,
-         transb,
-         m,
-         n,
-         k,
-         input,
-         a_type_,
-         (transa == GEMM_OP_N) ? k : m,
-         (const void*)weight.kernel,
-         b_type_,
-         (transb == GEMM_OP_N) ? n : k,
-         output,
-         c_type_,
-         n,
-         alpha,
-         beta);
-}
-
-void Gemm::gemm(const GemmOp             transa,
-                const GemmOp             transb,
-                const size_t             m,
-                const size_t             n,
-                const size_t             k,
-                const void*              input,
-                const DenseWeight<half>& weight,
-                void*                    output,
-                const float              alpha,
-                const float              beta)
-{
-    gemm(transa,
-         transb,
-         m,
-         n,
-         k,
-         input,
-         a_type_,
-         (transa == GEMM_OP_N) ? k : m,
-         (const void*)weight.kernel,
-         b_type_,
-         (transb == GEMM_OP_N) ? n : k,
-         output,
-         c_type_,
-         n,
-         alpha,
-         beta);
-}
-
-void Gemm::gemm(const GemmOp transa,
-                const GemmOp transb,
-                const size_t m,
-                const size_t n,
-                const size_t k,
-                const void*  A,
-                const void*  B,
-                void*        C,
-                const float  alpha,
-                const float  beta)
-{
-    size_t lda = (transa == GEMM_OP_N) ? k : m;
-    size_t ldb = (transb == GEMM_OP_N) ? n : k;
-    size_t ldc = n;
-    gemm(transa, transb, m, n, k, A, a_type_, lda, B, b_type_, ldb, C, c_type_, ldc, alpha, beta);
-}
-
-void Gemm::gemm(const GemmOp transa,
-                const GemmOp transb,
-                const size_t m,
-                const size_t n,
-                const size_t k,
-                const void*  A,
-                const size_t lda,
-                const void*  B,
-                const size_t ldb,
-                void*        C,
-                const size_t ldc,
-                const float  alpha,
-                const float  beta)
-{
-    gemm(transa, transb, m, n, k, A, a_type_, lda, B, b_type_, ldb, C, c_type_, ldc, alpha, beta);
-}
-
-void Gemm::gemm(const GemmOp   transa,
-                const GemmOp   transb,
-                const size_t   m,
-                const size_t   n,
-                const size_t   k,
-                const void*    A,
-                const DataType Atype,
-                const size_t   lda,
-                const void*    B,
-                const DataType Btype,
-                const size_t   ldb,
-                void*          C,
-                const DataType Ctype,
-                const size_t   ldc,
-                const float    alpha,
-                const float    beta)
-{
-    TM_LOG_TRACE("Gemm::gemm [m=%ld, n=%ld, k=%ld, lda=%ld, ldb=%ld, ldc=%ld]", m, n, k, lda, ldb, ldc);
-
-    // Implementation copied from cublasMMWrapper::Gemm
-    // Switch A and B since both cublas and cublasLt assume a column major layout,
-    // while A and B are both row major layout.
-    const void* a_data_ptr = B;
-    const void* b_data_ptr = A;
-
-    cublasOperation_t a_op = getCublasOperation(transb);
-    cublasOperation_t b_op = getCublasOperation(transa);
-
-    cudaDataType_t a_type = getCublasDataType(Btype);
-    cudaDataType_t b_type = getCublasDataType(Atype);
-    cudaDataType_t c_type = getCublasDataType(Ctype);
-
-    // swap m and n
-    const size_t _m = n;
-    const size_t _n = m;
-
-    // swap lda and ldb;
-    const size_t _lda = ldb;
-    const size_t _ldb = lda;
-
-    mutex_->lock();
-    // Use cublas as default in FP32 and cublasLt as default in FP16
-    bool is_fp16_compute_type = compute_type_ == TYPE_FP16;
-    // bool using_cublasLt       = Atype == TYPE_FP16;
-    bool using_cublasLt       = (Atype == TYPE_FP16) ? false : false;
-    int  batch_count          = 1;
-
-    half        h_alpha = (half)alpha;
-    half        h_beta  = (half)beta;
-    const void* alpha_ptr =
-        is_fp16_compute_type ? reinterpret_cast<const void*>(&h_alpha) : reinterpret_cast<const void*>(&alpha);
-    const void* beta_ptr =
-        is_fp16_compute_type ? reinterpret_cast<const void*>(&h_beta) : reinterpret_cast<const void*>(&beta);
-
-    // TODO: unify CUBLAS_DATA_TYPE and DataType.
-    int findAlgo =
-        cublas_algo_map_->isExist(batch_count, _m, _n, k, (a_type == CUDA_R_16F) ? HALF_DATATYPE : FLOAT_DATATYPE);
-    cublasLtMatmulAlgo_info info =
-        cublas_algo_map_->getAlgo(batch_count, _m, _n, k, (a_type == CUDA_R_16F) ? HALF_DATATYPE : FLOAT_DATATYPE);
-    if (findAlgo) {
-        using_cublasLt = (info.stages != -1);
-    }
-
-    // if (using_cublasLt) {
-    if(0) {
-        const size_t a_rows = (a_op == getCublasOperation(GEMM_OP_N)) ? _m : k;
-        const size_t a_cols = (a_op == getCublasOperation(GEMM_OP_N)) ? k : _m;
-        const size_t b_rows = (b_op == getCublasOperation(GEMM_OP_N)) ? k : _n;
-        const size_t b_cols = (b_op == getCublasOperation(GEMM_OP_N)) ? _n : k;
-
-        cublasLtMatmulDesc_t   matmul_desc = NULL;
-        cublasLtMatrixLayout_t a_desc = NULL, b_desc = NULL, c_desc = NULL;
-        cudaDataType_t         scale_type   = getCublasDataType(compute_type_);
-        auto                   compute_type = getCublasComputeType(compute_type_);
-
-        // --------------------------------------
-        // Create descriptors for the original matrices
-        cublasLtMatrixLayoutCreate(&a_desc, a_type, a_rows, a_cols, _lda);
-        cublasLtMatrixLayoutCreate(&b_desc, b_type, b_rows, b_cols, _ldb);
-        cublasLtMatrixLayoutCreate(&c_desc, c_type, _m, _n, ldc);
-#if (CUDART_VERSION >= 11000)
-        cublasLtMatmulDescCreate(&matmul_desc, compute_type, scale_type);
-#else
-        cublasLtMatmulDescCreate(&matmul_desc, compute_type);
-#endif
-
-        cublasLtMatmulDescSetAttribute(matmul_desc, CUBLASLT_MATMUL_DESC_TRANSA, &a_op, sizeof(cublasOperation_t));
-        cublasLtMatmulDescSetAttribute(matmul_desc, CUBLASLT_MATMUL_DESC_TRANSB, &b_op, sizeof(cublasOperation_t));
-
-        cublasLtMatmulAlgo_t algo;
-        void*                workspace      = workspace_;
-        int                  workspace_size = workspace_ == nullptr ? 0 : CUBLAS_WORKSPACE_SIZE;
-        if (findAlgo) {
-            if (info.workspaceSize > workspace_size) {
-                findAlgo = 0;
-            }
-            else {
-                cublasLtMatmulAlgoInit(
-                    cublaslt_handle_, compute_type, scale_type, a_type, b_type, c_type, c_type, info.algoId, &algo);
-                cublasLtMatmulAlgoConfigSetAttribute(
-                    &algo, CUBLASLT_ALGO_CONFIG_CUSTOM_OPTION, &(info.customOption), sizeof(info.customOption));
-                cublasLtMatmulAlgoConfigSetAttribute(
-                    &algo, CUBLASLT_ALGO_CONFIG_TILE_ID, &(info.tile), sizeof(info.tile));
-                cublasLtMatmulAlgoConfigSetAttribute(
-                    &algo, CUBLASLT_ALGO_CONFIG_SPLITK_NUM, &(info.splitK_val), sizeof(info.splitK_val));
-                cublasLtMatmulAlgoConfigSetAttribute(
-                    &algo, CUBLASLT_ALGO_CONFIG_CTA_SWIZZLING, &(info.swizzle), sizeof(info.swizzle));
-                cublasLtMatmulAlgoConfigSetAttribute(
-                    &algo, CUBLASLT_ALGO_CONFIG_REDUCTION_SCHEME, &(info.reductionScheme), sizeof(int));
-#if (CUDART_VERSION >= 11000)
-                cublasLtMatmulAlgoConfigSetAttribute(
-                    &algo, CUBLASLT_ALGO_CONFIG_STAGES_ID, &(info.stages), sizeof(info.stages));
-#endif
-            }
-        }
-
-        cublasLtMatmul(cublaslt_handle_,
-                       matmul_desc,
-                       alpha_ptr,
-                       a_data_ptr,
-                       a_desc,
-                       b_data_ptr,
-                       b_desc,
-                       beta_ptr,
-                       C,
-                       c_desc,
-                       C,
-                       c_desc,
-                       (findAlgo == 1 ? (&algo) : NULL),
-                       workspace,
-                       workspace_size,
-                       stream_);
-
-        cublasLtMatmulDescDestroy(matmul_desc);
-        cublasLtMatrixLayoutDestroy(a_desc);
-        cublasLtMatrixLayoutDestroy(b_desc);
-        cublasLtMatrixLayoutDestroy(c_desc);
-        sync_check_cuda_error();
-    }
-    else {
-        cudaDataType_t compute_type = getCublasDataType(compute_type_);
-        int            cublas_algo  = info.algoId;
-        check_cuda_error(cublasGemmEx(cublas_handle_,
-                                      a_op,
-                                      b_op,
-                                      _m,
-                                      _n,
-                                      k,
-                                      alpha_ptr,
-                                      a_data_ptr,
-                                      a_type,
-                                      _lda,
-                                      b_data_ptr,
-                                      b_type,
-                                      _ldb,
-                                      beta_ptr,
-                                      C,
-                                      c_type,
-                                      ldc,
-                                      compute_type,
-                                      static_cast<cublasGemmAlgo_t>(cublas_algo)));
-        sync_check_cuda_error();
-    }
-    mutex_->unlock();
-}
-
-void Gemm::batchedGemm(const GemmOp       transa,
-                       const GemmOp       transb,
-                       const size_t       m,
-                       const size_t       n,
-                       const size_t       k,
-                       const void* const* A,
-                       const void* const* B,
-                       void* const*       C,
-                       const size_t       batch_size,
-                       const float        alpha,
-                       const float        beta)
-{
-    size_t lda = (transa == GEMM_OP_N) ? k : m;
-    size_t ldb = (transb == GEMM_OP_N) ? n : k;
-    size_t ldc = n;
-    batchedGemm(transa, transb, m, n, k, A, a_type_, lda, B, b_type_, ldb, C, c_type_, ldc, batch_size, alpha, beta);
-}
-
-void Gemm::batchedGemm(const GemmOp       transa,
-                       const GemmOp       transb,
-                       const size_t       m,
-                       const size_t       n,
-                       const size_t       k,
-                       const void* const* A,
-                       const size_t       lda,
-                       const void* const* B,
-                       const size_t       ldb,
-                       void* const*       C,
-                       const size_t       ldc,
-                       const size_t       batch_size,
-                       const float        alpha,
-                       const float        beta)
-{
-    batchedGemm(transa, transb, m, n, k, A, a_type_, lda, B, b_type_, ldb, C, c_type_, ldc, batch_size, alpha, beta);
-}
-
-void Gemm::batchedGemm(const GemmOp       transa,
-                       const GemmOp       transb,
-                       const size_t       m,
-                       const size_t       n,
-                       const size_t       k,
-                       const void* const* A,
-                       const DataType     Atype,
-                       const size_t       lda,
-                       const void* const* B,
-                       const DataType     Btype,
-                       const size_t       ldb,
-                       void* const*       C,
-                       const DataType     Ctype,
-                       const size_t       ldc,
-                       const size_t       batch_size,
-                       const float        alpha,
-                       const float        beta)
-{
-    TM_LOG_TRACE(
-        "Gemm::batchedGemm [b=%ld m=%ld, n=%ld, k=%ld, lda=%ld, ldb=%ld, ldc=%ld]", batch_size, m, n, k, lda, ldb, ldc);
-
-    // Switch A and B.
-    const void* const* a_data_ptr = B;
-    const void* const* b_data_ptr = A;
-
-    cublasOperation_t a_op = getCublasOperation(transb);
-    cublasOperation_t b_op = getCublasOperation(transa);
-
-    cudaDataType_t a_type = getCublasDataType(Btype);
-    cudaDataType_t b_type = getCublasDataType(Atype);
-    cudaDataType_t c_type = getCublasDataType(Ctype);
-
-    // swap m and n, lda and ldb
-    const size_t _m   = n;
-    const size_t _n   = m;
-    const size_t _lda = ldb;
-    const size_t _ldb = lda;
-
-    half h_alpha = (half)alpha;
-    half h_beta  = (half)beta;
-
-    mutex_->lock();
-    bool        is_fp16_compute_type = compute_type_ == TYPE_FP16;
-    const void* alpha_ptr =
-        is_fp16_compute_type ? reinterpret_cast<const void*>(&h_alpha) : reinterpret_cast<const void*>(&alpha);
-    const void* beta_ptr =
-        is_fp16_compute_type ? reinterpret_cast<const void*>(&h_beta) : reinterpret_cast<const void*>(&beta);
-    cublasLtMatmulAlgo_info info =
-        cublas_algo_map_->getAlgo(batch_size, m, n, k, (a_type == CUDA_R_16F) ? HALF_DATATYPE : FLOAT_DATATYPE);
-
-    check_cuda_error(cublasGemmBatchedEx(cublas_handle_,
-                                         a_op,
-                                         b_op,
-                                         _m,
-                                         _n,
-                                         k,
-                                         alpha_ptr,
-                                         a_data_ptr,
-                                         a_type,
-                                         _lda,
-                                         b_data_ptr,
-                                         b_type,
-                                         _ldb,
-                                         beta_ptr,
-                                         C,
-                                         c_type,
-                                         ldc,
-                                         batch_size,
-                                         getCublasComputeType(compute_type_),
-                                         static_cast<cublasGemmAlgo_t>(info.algoId)));
-    mutex_->unlock();
-}
-
-void Gemm::stridedBatchedGemm(GemmOp       transa,
-                              GemmOp       transb,
-                              const size_t m,
-                              const size_t n,
-                              const size_t k,
-                              const void*  A,
-                              const void*  B,
-                              void*        C,
-                              const size_t batch_size,
-                              const float  alpha,
-                              const float  beta)
-{
-    size_t  lda     = (transa == GEMM_OP_N) ? k : m;
-    size_t  ldb     = (transb == GEMM_OP_N) ? n : k;
-    size_t  ldc     = n;
-    int64_t stridea = m * k;
-    int64_t strideb = k * n;
-    int64_t stridec = m * n;
-
-    stridedBatchedGemm(transa,
-                       transb,
-                       m,
-                       n,
-                       k,
-                       A,
-                       a_type_,
-                       lda,
-                       stridea,
-                       B,
-                       b_type_,
-                       ldb,
-                       strideb,
-                       C,
-                       c_type_,
-                       ldc,
-                       stridec,
-                       batch_size,
-                       compute_type_,
-                       alpha,
-                       beta);
-}
-
-void Gemm::stridedBatchedGemm(GemmOp        transa,
-                              GemmOp        transb,
-                              const size_t  m,
-                              const size_t  n,
-                              const size_t  k,
-                              const void*   A,
-                              const int64_t strideA,
-                              const void*   B,
-                              const int64_t strideB,
-                              void*         C,
-                              const int64_t strideC,
-                              const size_t  batch_size,
-                              const float   alpha,
-                              const float   beta)
-{
-    size_t lda = (transa == GEMM_OP_N) ? k : m;
-    size_t ldb = (transb == GEMM_OP_N) ? n : k;
-    size_t ldc = n;
-    stridedBatchedGemm(transa,
-                       transb,
-                       m,
-                       n,
-                       k,
-                       A,
-                       a_type_,
-                       lda,
-                       strideA,
-                       B,
-                       b_type_,
-                       ldb,
-                       strideB,
-                       C,
-                       c_type_,
-                       ldc,
-                       strideC,
-                       batch_size,
-                       compute_type_,
-                       alpha,
-                       beta);
-}
-
-void Gemm::stridedBatchedGemm(GemmOp        transa,
-                              GemmOp        transb,
-                              const size_t  m,
-                              const size_t  n,
-                              const size_t  k,
-                              const void*   A,
-                              const size_t  lda,
-                              const int64_t strideA,
-                              const void*   B,
-                              const size_t  ldb,
-                              const int64_t strideB,
-                              void*         C,
-                              const size_t  ldc,
-                              const int64_t strideC,
-                              const size_t  batch_size,
-                              const float   alpha,
-                              const float   beta)
-{
-    stridedBatchedGemm(transa,
-                       transb,
-                       m,
-                       n,
-                       k,
-                       A,
-                       a_type_,
-                       lda,
-                       strideA,
-                       B,
-                       b_type_,
-                       ldb,
-                       strideB,
-                       C,
-                       c_type_,
-                       ldc,
-                       strideC,
-                       batch_size,
-                       compute_type_,
-                       alpha,
-                       beta);
-}
-
-void Gemm::stridedBatchedGemm(GemmOp        transa,
-                              GemmOp        transb,
-                              const size_t  m,
-                              const size_t  n,
-                              const size_t  k,
-                              const void*   A,
-                              DataType      Atype,
-                              const size_t  lda,
-                              const int64_t strideA,
-                              const void*   B,
-                              DataType      Btype,
-                              const size_t  ldb,
-                              const int64_t strideB,
-                              void*         C,
-                              DataType      Ctype,
-                              const size_t  ldc,
-                              const int64_t strideC,
-                              const size_t  batch_size,
-                              DataType      compute_type,
-                              const float   alpha,
-                              const float   beta)
-{
-    TM_LOG_TRACE("Gemm::stridedBatchedGemm [b=%ld, m=%ld, n=%ld, k=%ld, lda=%ld, ldb=%ld, ldc=%ld]",
-                 batch_size,
-                 m,
-                 n,
-                 k,
-                 lda,
-                 ldb,
-                 ldc);
-
-    // Switch A and B.
-    const void* a_data_ptr = B;
-    const void* b_data_ptr = A;
-
-    cublasOperation_t a_op = getCublasOperation(transb);
-    cublasOperation_t b_op = getCublasOperation(transa);
-
-    cudaDataType_t a_type = getCublasDataType(Btype);
-    cudaDataType_t b_type = getCublasDataType(Atype);
-    cudaDataType_t c_type = getCublasDataType(Ctype);
-
-    // swap m and n, lda and ldb, stride A and B
-    const size_t  _m       = n;
-    const size_t  _n       = m;
-    const size_t  _lda     = ldb;
-    const size_t  _ldb     = lda;
-    const int64_t _stridea = strideB;
-    const int64_t _strideb = strideA;
-
-    half h_alpha = (half)alpha;
-    half h_beta  = (half)beta;
-
-    mutex_->lock();
-    bool        is_fp16_compute_type = compute_type_ == TYPE_FP16;
-    const void* alpha_ptr =
-        is_fp16_compute_type ? reinterpret_cast<const void*>(&h_alpha) : reinterpret_cast<const void*>(&alpha);
-    const void* beta_ptr =
-        is_fp16_compute_type ? reinterpret_cast<const void*>(&h_beta) : reinterpret_cast<const void*>(&beta);
-    cublasLtMatmulAlgo_info info =
-        cublas_algo_map_->getAlgo(batch_size, m, n, k, (a_type == CUDA_R_16F) ? HALF_DATATYPE : FLOAT_DATATYPE);
-
-    check_cuda_error(cublasGemmStridedBatchedEx(cublas_handle_,
-                                                a_op,
-                                                b_op,
-                                                _m,
-                                                _n,
-                                                k,
-                                                alpha_ptr,
-                                                a_data_ptr,
-                                                a_type,
-                                                _lda,
-                                                _stridea,
-                                                b_data_ptr,
-                                                b_type,
-                                                _ldb,
-                                                _strideb,
-                                                beta_ptr,
-                                                C,
-                                                c_type,
-                                                ldc,
-                                                strideC,
-                                                batch_size,
-                                                getCublasComputeType(compute_type),
-                                                static_cast<cublasGemmAlgo_t>(info.algoId)));
-    mutex_->unlock();
-}
-
-void Gemm::checkDataTypeValidity(const DataType& type)
-{
-    if (type != TYPE_FP32 && type != TYPE_FP16) {
-        throw GemmNotSupportedException("Gemm supports TYPE_FP16 or TYPE_FP32");
-    }
-}
-
-/* ************************* End of GEMM Impl **************************** */
-
-// void Int8Gemm::gemm(Tensor& C,
-//                     const GemmOp transa,
-//                     const GemmOp transb,
-//                     const Tensor& A,
-//                     const Tensor& B,
-//                     const float alpha,
-//                     const float beta)
-// {
-
-// }
-
-/* ************************* SpGEMM Impl *********************************** */
-#ifdef SPARSITY_ENABLED
-SpGemm::SpGemm(IAllocator* allocator, cudaStream_t stream, std::string config_file, std::string spconfig_file):
-    Gemm(allocator, stream, config_file)
-{
-    CHECK_CUSPARSE(cusparseLtInit(&cusparselt_handle_));
-    // TODO(jaedeokk):
-    //   Let's make cublasAlgoMap load gemm/spgemm config separtely,
-    //   allowing us to inherit Gemm's constructor.
-    // cublas_algo_map_.loadSpGemmConfig(spconfig_file);  // enable this line later.
-
-    a_type_       = TYPE_FP16;
-    b_type_       = TYPE_FP16;
-    c_type_       = TYPE_FP16;
-    compute_type_ = TYPE_FP16;
-}
-
-SpGemm::~SpGemm()
-{
-    cusparseLtDestroy(&cusparselt_handle_);
-    // Need to destroy matmul description cache.
-    for (auto& kv : a_desc_map_) {  // kv = (mark, a_desc)
-        cusparseLtMatDescriptorDestroy(&a_desc_map_[kv.first]);
-    }
-    for (auto& kv : b_desc_map_) {  // kv = (mark, b_desc)
-        cusparseLtMatDescriptorDestroy(&b_desc_map_[kv.first]);
-    }
-    for (auto& kv : c_desc_map_) {  // kv = (mark, c_desc)
-        cusparseLtMatDescriptorDestroy(&c_desc_map_[kv.first]);
-    }
-}
-
-std::string SpGemm::toString()
-{
-    const char* a_type_str       = a_type_ == TYPE_FP16 ? "FP16" : "FP32";
-    const char* b_type_str       = b_type_ == TYPE_FP16 ? "FP16" : "FP32";
-    const char* c_type_str       = c_type_ == TYPE_FP16 ? "FP16" : "FP32";
-    const char* compute_type_str = compute_type_ == TYPE_FP16 ? "FP16" : "FP32";
-    return fmtstr("SpGemm[a_type=%s, b_type=%s, c_type=%s, compute_type=%s]",
-                  a_type_str,
-                  b_type_str,
-                  c_type_str,
-                  compute_type_str);
-}
-
-void SpGemm::loadGemmConfig(std::string config_file, std::string spconfig_file)
-{
-    if (cublas_algo_map_ != nullptr) {
-        delete cublas_algo_map_;  // unload algo map.
-    }
-    cublas_algo_map_ = new cublasAlgoMap(config_file, spconfig_file);
-}
-
-void SpGemm::checkDataTypeValidity(const DataType& type)
-{
-    if (type != TYPE_FP16) {
-        throw GemmNotSupportedException("Sparse GEMM only supports FP16 data type now.");
-    }
-}
-
-bool SpGemm::useBaseGemm(size_t batch_size, size_t m, size_t n, size_t k)
-{
-    return !cublas_algo_map_->isUseSparse(batch_size, m, n, k);
-}
-
-// Temporal gemm helper mtehod to use template T.
-template<typename T>
-void SpGemm::weightGemmHelper(const GemmOp          transa,
-                              const GemmOp          transb,
-                              const size_t          m,
-                              const size_t          n,
-                              const size_t          k,
-                              const void*           input,
-                              const DenseWeight<T>& weight,
-                              void*                 output,
-                              const float           alpha,
-                              const float           beta)
-{
-    size_t lda = (transa == GEMM_OP_N) ? k : m;
-    size_t ldb = (transb == GEMM_OP_N) ? n : k;
-    size_t ldc = n;
-    if (useBaseGemm(1, m, n, k) || weight.sp_kernel == nullptr) {
-        Gemm::gemm(transa,
-                   transb,
-                   m,
-                   n,
-                   k,
-                   input,
-                   a_type_,
-                   lda,
-                   (const void*)weight.kernel,
-                   b_type_,
-                   ldb,
-                   output,
-                   c_type_,
-                   ldc,
-                   alpha,
-                   beta);
-    }
-    else {
-        gemm(transa,
-             transb,
-             m,
-             n,
-             k,
-             input,
-             a_type_,
-             lda,
-             (const void*)weight.sp_kernel,
-             b_type_,
-             ldb,
-             output,
-             c_type_,
-             ldc,
-             alpha,
-             beta);
-    }
-}
-
-void SpGemm::gemm(const GemmOp              transa,
-                  const GemmOp              transb,
-                  const size_t              m,
-                  const size_t              n,
-                  const size_t              k,
-                  const void*               input,
-                  const DenseWeight<float>& weight,
-                  void*                     output,
-                  const float               alpha,
-                  const float               beta)
-{
-    weightGemmHelper<float>(transa, transb, m, n, k, input, weight, output, alpha, beta);
-}
-void SpGemm::gemm(const GemmOp             transa,
-                  const GemmOp             transb,
-                  const size_t             m,
-                  const size_t             n,
-                  const size_t             k,
-                  const void*              input,
-                  const DenseWeight<half>& weight,
-                  void*                    output,
-                  const float              alpha,
-                  const float              beta)
-{
-    weightGemmHelper<half>(transa, transb, m, n, k, input, weight, output, alpha, beta);
-}
-
-void SpGemm::gemm(const GemmOp   transa,
-                  const GemmOp   transb,
-                  const size_t   m,
-                  const size_t   n,
-                  const size_t   k,
-                  const void*    A,
-                  const DataType Atype,
-                  const size_t   lda,
-                  const void*    B,
-                  const DataType Btype,
-                  const size_t   ldb,
-                  void*          C,
-                  const DataType Ctype,
-                  const size_t   ldc,
-                  const float    alpha,
-                  const float    beta)
-{
-    TM_LOG_TRACE("SpGemm::gemm [m=%ld, n=%ld, k=%ld, lda=%ld, ldb=%ld, ldc=%ld]", m, n, k, lda, ldb, ldc);
-    checkDataTypeValidity(Atype);
-    checkDataTypeValidity(Btype);
-    checkDataTypeValidity(Ctype);
-    checkDataTypeValidity(compute_type_);
-
-    if (useBaseGemm(1, m, n, k)) {
-        // Compute by the base GEMM.
-        Gemm::gemm(transa, transb, m, n, k, A, Atype, lda, B, Btype, ldb, C, Ctype, ldc, alpha, beta);
-        return;
-    }
-
-    // Switch A/B due to column major layout in computation.
-    //  Typical usecase of Gemm family is to compute Y = X * W where X is an
-    //  input tensor and W is a kernel weight. Compression takes a lot time
-    //  so only the kernel weight (which is fixed in inference time) can be
-    //  sparse. Using B as sparse seems not stable, unfortunately.
-    //  (e.g. caching matrix descriptions is not correctly working.)
-    //  Thus, SpGemm considers a column major layout in computation to make
-    //  C^T = B^T * A^T, where a kernel weight "B" is located at the front.
-    const void* a_data = B;
-    const void* b_data = A;
-
-    cusparseOrder_t order = CUSPARSE_ORDER_COL;
-
-    cusparseOperation_t opA = getCusparseOperation(transb);
-    cusparseOperation_t opB = getCusparseOperation(transa);
-
-    cudaDataType_t a_type = getCublasDataType(Btype);
-    cudaDataType_t b_type = getCublasDataType(Atype);
-    cudaDataType_t c_type = getCublasDataType(Ctype);
-
-    const size_t _m   = n;
-    const size_t _n   = m;
-    const size_t _lda = ldb;
-    const size_t _ldb = lda;
-
-    const size_t a_rows = (opA == CUSPARSE_OPERATION_NON_TRANSPOSE) ? _m : k;
-    const size_t a_cols = (opA == CUSPARSE_OPERATION_NON_TRANSPOSE) ? k : _m;
-    const size_t b_rows = (opB == CUSPARSE_OPERATION_NON_TRANSPOSE) ? k : _n;
-    const size_t b_cols = (opB == CUSPARSE_OPERATION_NON_TRANSPOSE) ? _n : k;
-    const size_t c_rows = _m;
-    const size_t c_cols = _n;
-
-    const unsigned      alignment    = 16;
-    cusparseComputeType compute_type = getCusparseComputeType(compute_type_);
-
-    cusparseLtMatmulDescriptor_t   matmul;
-    cusparseLtMatmulAlgSelection_t alg_sel;
-    cusparseLtMatmulPlan_t         plan;
-
-    char mark[256];
-    sprintf(mark, "%d_%ld_%ld_%ld_%s_%s", 1, m, n, k, getGemmOpString(transb).c_str(), getGemmOpString(transa).c_str());
-    if (a_desc_map_.find(mark) != a_desc_map_.end()) {
-        CHECK_CUSPARSE(cusparseLtMatmulDescriptorInit(&cusparselt_handle_,
-                                                      &matmul,
-                                                      opA,
-                                                      opB,
-                                                      &a_desc_map_[mark],
-                                                      &b_desc_map_[mark],
-                                                      &c_desc_map_[mark],
-                                                      &c_desc_map_[mark],
-                                                      compute_type));
-    }
-    else {
-        // initializing MatDesc takes a lot of time
-        cusparseLtMatDescriptor_t a_desc, b_desc, c_desc;
-        a_desc_map_[mark] = a_desc;
-        b_desc_map_[mark] = b_desc;
-        c_desc_map_[mark] = c_desc;
-        CHECK_CUSPARSE(cusparseLtStructuredDescriptorInit(&cusparselt_handle_,
-                                                          &a_desc_map_[mark],
-                                                          a_rows,
-                                                          a_cols,
-                                                          _lda,
-                                                          alignment,
-                                                          a_type,
-                                                          order,
-                                                          CUSPARSELT_SPARSITY_50_PERCENT));
-        CHECK_CUSPARSE(cusparseLtDenseDescriptorInit(
-            &cusparselt_handle_, &b_desc_map_[mark], b_rows, b_cols, _ldb, alignment, b_type, order));
-        CHECK_CUSPARSE(cusparseLtDenseDescriptorInit(
-            &cusparselt_handle_, &c_desc_map_[mark], c_rows, c_cols, ldc, alignment, c_type, order));
-        CHECK_CUSPARSE(cusparseLtMatmulDescriptorInit(&cusparselt_handle_,
-                                                      &matmul,
-                                                      opA,
-                                                      opB,
-                                                      &a_desc_map_[mark],
-                                                      &b_desc_map_[mark],
-                                                      &c_desc_map_[mark],
-                                                      &c_desc_map_[mark],
-                                                      compute_type));
-    }
-
-    mutex_->lock();
-    CHECK_CUSPARSE(
-        cusparseLtMatmulAlgSelectionInit(&cusparselt_handle_, &alg_sel, &matmul, CUSPARSELT_MATMUL_ALG_DEFAULT));
-    int alg = cublas_algo_map_->getSpAlgo(1, a_rows, b_cols, a_cols);
-    CHECK_CUSPARSE(cusparseLtMatmulAlgSetAttribute(
-        &cusparselt_handle_, &alg_sel, CUSPARSELT_MATMUL_ALG_CONFIG_ID, &alg, sizeof(alg)));
-    size_t workspace_size;
-    CHECK_CUSPARSE(cusparseLtMatmulGetWorkspace(&cusparselt_handle_, &alg_sel, &workspace_size));
-    CHECK_CUSPARSE(cusparseLtMatmulPlanInit(&cusparselt_handle_, &plan, &matmul, &alg_sel, workspace_size));
-
-    void*        d_workspace = nullptr;  // Can we use the workspace of the class?
-    int          num_streams = 1;
-    cudaStream_t streams[1]  = {stream_};
-    CHECK_CUSPARSE(cusparseLtMatmul(
-        &cusparselt_handle_, &plan, &alpha, a_data, b_data, &beta, C, C, d_workspace, streams, num_streams))
-    CHECK_CUSPARSE(cusparseLtMatmulPlanDestroy(&plan))
-    mutex_->unlock();
-    sync_check_cuda_error();
-}
-#endif
-
-/* ************************* End of SpGEMM Impl ************************** */
-
-/* ***************************** GEMM utils ****************************** */
-
-std::shared_ptr<Gemm> createGemm(IAllocator* allocator, cudaStream_t stream, bool sparse, bool quantized)
-{
-    TM_LOG_TRACE(
-        "Create Gemm instance [sparse=%s, quantized=%s]", sparse ? "true" : "false", quantized ? "true" : "false");
-    std::shared_ptr<Gemm> gemm;
-    if (!sparse) {
-        if (!quantized) {
-            gemm = std::make_shared<Gemm>(allocator, stream);
-        }
-        else {
-            throw GemmNotSupportedException("Int8 Gemm is not supported yet");
-        }
-    }
-    else {
-#ifdef SPARSITY_ENABLED
-        if (sparse && !quantized) {
-            gemm = std::make_shared<SpGemm>(allocator, stream);
-        }
-        else {
-            throw GemmNotSupportedException("Int8 Sparse Gemm is not supported yet");
-        }
-#else
-        throw GemmNotSupportedException("Sparsity support is not enabled. To enabled sparisty, "
-                                        "please provide `-DSPARSITY_SUPPORT` flag for compilation.");
-#endif
-    }
-    return gemm;
-}
-
-cudaDataType_t getCublasDataType(DataType dtype)
-{
-    switch (dtype) {
-        case TYPE_FP16:
-            return CUDA_R_16F;
-        case TYPE_FP32:
-            return CUDA_R_32F;
-        default:
-            throw GemmNotSupportedException("Not supported data type.");
-    }
-}
-
-#if (CUDART_VERSION >= 11000)
-cublasComputeType_t getCublasComputeType(DataType ctype)
-{
-    switch (ctype) {
-        case TYPE_FP16:
-            return CUBLAS_COMPUTE_16F;
-        case TYPE_FP32:
-            return CUBLAS_COMPUTE_32F;
-        default:
-            throw GemmNotSupportedException("Not supported cublas compute type.");
-    }
-}
-#else
-cudaDataType_t getCublasComputeType(DataType ctype)
-{
-    switch (ctype) {
-        case TYPE_FP16:
-            return CUDA_R_16F;
-        case TYPE_FP32:
-            return CUDA_R_32F;
-        default:
-            throw GemmNotSupportedException("Not supported cublas compute type.");
-    }
-}
-#endif
-
-cublasOperation_t getCublasOperation(GemmOp op)
-{
-    switch (op) {
-        case GEMM_OP_N:
-            return CUBLAS_OP_N;
-        case GEMM_OP_T:
-            return CUBLAS_OP_T;
-        default:
-            throw GemmNotSupportedException("Unknown GemmOp provided.");
-    }
-}
-
-std::string getGemmOpString(const GemmOp& op)
-{
-    switch (op) {
-        case GEMM_OP_T:
-            return "T";
-        case GEMM_OP_N:
-            return "N";
-    }
-    throw GemmNotSupportedException("Unknown GemmOp provided.");
-}
-
-#ifdef SPARSITY_ENABLED
-cusparseOperation_t getCusparseOperation(GemmOp op)
-{
-    switch (op) {
-        case GEMM_OP_N:
-            return CUSPARSE_OPERATION_NON_TRANSPOSE;
-        case GEMM_OP_T:
-            return CUSPARSE_OPERATION_TRANSPOSE;
-        default:
-            throw GemmNotSupportedException("Unknown GemmOp provided.");
-    }
-}
-
-cusparseComputeType getCusparseComputeType(DataType ctype)
-{
-    if (ctype != TYPE_FP16) {
-        throw GemmNotSupportedException("Sparse GEMM supports TYPE_FP16 compute type only.");
-    }
-    return CUSPARSE_COMPUTE_16F;
-}
-
-void pruneMatrixB(void* data, const cudaStream_t& stream, const size_t k, const size_t n, const GemmOp trans)
-{
-    TM_LOG_TRACE("Prune matrix B [k=%ld, n=%ld, op=%s]", k, n, getGemmOpString(trans).c_str());
-
-    // Due to A/B switching, the matrix B will be used as a matrix A.
-    const cusparseOrder_t order     = CUSPARSE_ORDER_COL;
-    const size_t          rows      = (trans == GEMM_OP_N) ? n : k;
-    const size_t          cols      = (trans == GEMM_OP_N) ? k : n;
-    const size_t          ld        = rows;
-    const unsigned        alignment = 16;
-
-    const cusparseLtPruneAlg_t prune_alg = CUSPARSELT_PRUNE_SPMMA_STRIP;
-    const cusparseOperation_t  op        = getCusparseOperation(trans);
-    const cudaDataType_t       dtype     = CUDA_R_16F;  // fixed under cusparselt == 0.2.0.
-
-    // 0: B is sparse,  1: A is sparse
-    // B matrix will be used as A matrix at the SpGemm::gemm.
-    const int is_sparse_a = 1;
-
-    // TODO: Let the resource manager handle GPU-related resources later.
-    cusparseLtHandle_t handle;
-    CHECK_CUSPARSE(cusparseLtInit(&handle));
-    cusparseLtMatDescriptor_t mat_desc;
-    CHECK_CUSPARSE(cusparseLtStructuredDescriptorInit(
-        &handle, &mat_desc, rows, cols, ld, alignment, dtype, order, CUSPARSELT_SPARSITY_50_PERCENT));
-    CHECK_CUSPARSE(cusparseLtSpMMAPrune2(&handle, &mat_desc, is_sparse_a, op, data, data, prune_alg, stream));
-    CHECK_CUSPARSE(cusparseLtMatDescriptorDestroy(&mat_desc));
-    CHECK_CUSPARSE(cusparseLtDestroy(&handle));
-}
-
-size_t compressMatrixB(void**              output,
-                       IAllocator&         allocator,
-                       const cudaStream_t& stream,
-                       const void*         input,
-                       const size_t        k,
-                       const size_t        n,
-                       const GemmOp        trans)
-{
-    TM_LOG_TRACE("compressMatrix [k=%ld, n=%ld, dtype=FP16]", k, n);
-
-    // swap A/B due to column/row major layout mismatch.
-    cusparseOrder_t order = CUSPARSE_ORDER_COL;
-    const size_t    rows  = (trans == GEMM_OP_N) ? n : k;
-    const size_t    cols  = (trans == GEMM_OP_N) ? k : n;
-    const size_t    ld    = rows;
-
-    cudaDataType_t            dtype    = CUDA_R_16F;  // fixed under cusparselt == 0.2.0.
-    cusparseLtSparsity_t      sparsity = CUSPARSELT_SPARSITY_50_PERCENT;
-    cusparseOperation_t       op       = getCusparseOperation(trans);
-    cusparseLtMatDescriptor_t mat_desc;
-    const unsigned            alignment   = 16;
-    const int                 is_sparse_a = 1;  // 0: B is sparse,  1: A is sparse
-
-    cusparseLtHandle_t handle;
-    CHECK_CUSPARSE(cusparseLtInit(&handle));
-
-    CHECK_CUSPARSE(
-        cusparseLtStructuredDescriptorInit(&handle, &mat_desc, rows, cols, ld, alignment, dtype, order, sparsity))
-
-    size_t compressed_size = 0;
-    CHECK_CUSPARSE(cusparseLtSpMMACompressedSize2(&handle, &mat_desc, &compressed_size));
-    if (compressed_size == 0) {
-        throw GemmInvalidException("Fail to compute correct compressed_size, got 0. This error may be "
-                                   "caused by a too small input matrix.");
-    }
-
-    *output = allocator.malloc(compressed_size, false);
-    CHECK_CUSPARSE(cusparseLtSpMMACompress2(&handle, &mat_desc, is_sparse_a, op, input, *output, stream))
-
-    CHECK_CUSPARSE(cusparseLtMatDescriptorDestroy(&mat_desc));
-    CHECK_CUSPARSE(cusparseLtDestroy(&handle));
-    return compressed_size;
-}
-
-#endif
-
-/* ************************* End of GEMM utils **************************** */
-
-}  // end of namespace turbomind
diff --git a/src/turbomind/utils/gemm.h b/src/turbomind/utils/gemm.h
deleted file mode 100644
index 7cc5502da9be8077e0e14e77fc967068f70f199e..0000000000000000000000000000000000000000
--- a/src/turbomind/utils/gemm.h
+++ /dev/null
@@ -1,681 +0,0 @@
-/*
- * Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include <cublasLt.h>
-#include <cublas_v2.h>
-#include <cuda_runtime.h>
-#include <iostream>
-#include <map>
-#include <mutex>
-#include <stdexcept>
-#include <string>
-
-// TODO: Need to remove the dependency of the layer module.
-//   e.g. refactor Weight class to some base module.
-#include "src/turbomind/layers/DenseWeight.h"
-#include "src/turbomind/utils/Tensor.h"
-#include "src/turbomind/utils/allocator.h"
-#include "src/turbomind/utils/cublasAlgoMap.h"
-#include "src/turbomind/utils/cuda_utils.h"
-#include "src/turbomind/utils/logger.h"
-#include "src/turbomind/utils/memory_utils.h"
-
-#ifndef CUDART_VERSION
-#error CUDART_VERSION Undefined!
-#endif
-
-// cublas default workspace size: 32MB. Let me make this as a Gemm property.
-#define WORKSPACE_SIZE 33554432
-
-namespace turbomind {
-
-// A wrapper of cublas or cusparse matrix operator.
-//  - GEMM_OP_N = CUBLAS_OP_N or CUSPARSE_OP_N
-//  - GEMM_OP_T = CUBLAS_OP_T or CUSPARSE_OP_T
-enum GemmOp
-{
-    GEMM_OP_N,
-    GEMM_OP_T
-};
-
-// A base class of the GEMM family.
-// In the current version Gemm is as a base class as well as an interface.
-class Gemm {
-
-public:
-    Gemm() = delete;  // Disable a default constructor
-    /**
-     * A Gemm class.
-     *
-     * NOTE:
-     *   A, B, C are assumed to have a row major layout, while a backend cuda libraries
-     *   assumes a column major layout. However, a family of Gemm has already handled
-     *   such discrepancy internally. Please use naively without a trick like switching
-     *   inputs A and B that aligns the matrix layout.
-     *
-     * Restriction: Supported in/out data or compute types: TYPE_FP16, TYPE_FP32.
-     *
-     * TODO:
-     *   Unify resource allocation/release from a singleton GPU resource managers.
-     *   Thus, allocator, stream can be replaced by a resource handler later.
-     *   E.g. Gemm(std::shared_ptr<ResourceManager> resource_manager), and
-     *        stream_ = resource_manager.getCudaStream();
-     *        buffer = resource_manager.malloc(...);
-     *
-     * @param allocator   Resource allocator.
-     * @param stream      A CUDA stream.
-     * @param config_file A file path of a GEMM configuration.
-     */
-    Gemm(IAllocator* allocator, cudaStream_t stream, std::string config_file = GEMM_CONFIG);
-    Gemm(Gemm const& other) = delete;
-    virtual ~Gemm();
-
-    virtual std::string toString();
-
-    /**
-     * @brief Set GEMM compute type.
-     *
-     * @param compute_type The data type of accumulation type inside GEMM computation.
-     *                     (Choices: TYPE_FP16, TYPE_FP32)
-     *
-     * @throw GemmNotSupportedException if a type is not TYPE_FP16 or TYPE_FP32.
-     * @throw std::runtime_error  if any exception inside CUDA.
-     */
-    void setComputeType(DataType compute_type);
-
-    /**
-     * @brief Set matrix data types and compute precision.
-     *
-     * Supported data or compute types: TYPE_FP16, TYPE_FP32
-     *
-     * @param a_type  The data type of a matrix A.
-     * @param b_type  The data type of a matrix B.
-     * @param c_type  The data type of a matrix C.
-     * @param compute_type  The data type of accumulation type inside GEMM computation.
-     *
-     * @throw GemmNotSupportedException if a type is not TYPE_FP16 or TYPE_FP32.
-     * @throw std::runtime_error  if any exception inside CUDA.
-     */
-    void setTypes(DataType a_type, DataType b_type, DataType c_type, DataType compute_type);
-
-    /**
-     * @brief Set matrix data and compute types by default values.
-     *
-     * Default configs:
-     *  - T=float : data type=TYPE_FP32, compute type=TYPE_FP32
-     *  - T=half  : data type=TYPE_FP16, compute type=TYPE_FP32
-     */
-    template<typename T>
-    void setDefaultTypes();
-
-    void loadGemmConfig(std::string config_file);
-
-    void setAllocator(IAllocator* allocator);
-    void setCudaStream(cudaStream_t& stream);
-
-    // Th APIs below are to see how the interface will change
-    // if it cooperates with Tensor. To enable it, we need to
-    // update the Tensor class. For instance, data is need to
-    // be of type (void*) rather than (const void*) to pass it
-    // as the output C of gemm.
-    // virtual void gemm(Tensor& C,
-    //                   const GemmOp transa,
-    //                   const GemmOp transb,
-    //                   const Tensor& A,
-    //                   const Tensor& B,
-    //                   const float alpha = 1.0f,
-    //                   const float beta = 0.0f);
-    //
-    // virtual void batchedMatmul(std::vector<Tensor> Carray,
-    //                            const GemmOp transa,
-    //                            const GemmOp transb,
-    //                            const std::vector<Tensor> Aarray,
-    //                            const std::vector<Tensor> Barray,
-    //                            const float alpha = 1.0f,
-    //                            const float beta = 0.0f);
-    //
-    // virtual void stridedBatchedGemm(Tensor& C,
-    //                                 const GemmOp transa,
-    //                                 const GemmOp transb,
-    //                                 const Tensor& A,
-    //                                 const Tensor& B,
-    //                                 const float alpha = 1.0f,
-    //                                 const float beta = 0.0f);
-
-    // TODO:
-    // This function cooperates with a Weight object to simply Gemm calls
-    // inside layers, computing the following formula
-    //     output(C) = input(A) * weight_kernel(B)
-    // where weight_kernel can be changed according to Gemm functions.
-    // DenseWeight is of a template struct, not allowing override the method.
-    // We temperally add an interface here for two cases float/half,
-    // but to finialze this function, we need an interface of a weight class
-    // which is not a template class.
-    virtual void gemm(const GemmOp              transa,
-                      const GemmOp              transb,
-                      const size_t              m,
-                      const size_t              n,
-                      const size_t              k,
-                      const void*               input,
-                      const DenseWeight<float>& weight,
-                      void*                     output,
-                      const float               alpha = 1.0f,
-                      const float               beta  = 0.0f);
-    virtual void gemm(const GemmOp             transa,
-                      const GemmOp             transb,
-                      const size_t             m,
-                      const size_t             n,
-                      const size_t             k,
-                      const void*              input,
-                      const DenseWeight<half>& weight,
-                      void*                    output,
-                      const float              alpha = 1.0f,
-                      const float              beta  = 0.0f);
-
-    virtual void gemm(const GemmOp transa,
-                      const GemmOp transb,
-                      const size_t m,
-                      const size_t n,
-                      const size_t k,
-                      const void*  A,
-                      const void*  B,
-                      void*        C,
-                      const float  alpha = 1.0f,
-                      const float  beta  = 0.0f);
-
-    virtual void gemm(const GemmOp transa,
-                      const GemmOp transb,
-                      const size_t m,
-                      const size_t n,
-                      const size_t k,
-                      const void*  A,
-                      const size_t lda,
-                      const void*  B,
-                      const size_t ldb,
-                      void*        C,
-                      const size_t ldc,
-                      const float  alpha = 1.0f,
-                      const float  beta  = 0.0f);
-    /**
-     * @brief Compute the matrix multiplication `C = \alpha * op(A) * op(B) + \beta * C`.
-     *
-     * @param transa A transpose operation of a matrix A (GEMM_OP_N or GEMM_OP_T).
-     * @param transb A transpose operation of a matrix B (GEMM_OP_N or GEMM_OP_T).
-     * @param m      A number of rows of a matrix op(A) and C.
-     * @param n      A number of columns of a matrix op(B) or C.
-     * @param k      A number of columns of op(A) and rows of op(B).
-     * @param A      A device pointer of a matrix A of dimension (m x lda).
-     * @param Atype  A data type of A (TYPE_FP16 or TYPE_FP32)
-     * @param lda    A leading dimension of the matrix A.
-     * @param B      A device pointer of a matrix B of dimension (k x ldb).
-     * @param Btype  A data type of B (TYPE_FP16 or TYPE_FP32)
-     * @param ldb    A leading dimension of the matrix B.
-     * @param C      (Output) A device pointer of a matrix C of dimension (m x ldc).
-     * @param Ctype  A data type of C (TYPE_FP16 or TYPE_FP32)
-     * @param ldc    A leading dimension of the matrix C.
-     * @param alpha  A scale factor for A*B (default: 1.0f).
-     * @param beta   A scale factor for C (default: 0.0f).
-     *
-     * @throw GemmNotSupportedException if a type is not TYPE_FP16 or TYPE_FP32.
-     * @throw std::runtime_error  if any exception inside CUDA.
-     */
-    virtual void gemm(const GemmOp   transa,
-                      const GemmOp   transb,
-                      const size_t   m,
-                      const size_t   n,
-                      const size_t   k,
-                      const void*    A,
-                      const DataType Atype,
-                      const size_t   lda,
-                      const void*    B,
-                      const DataType Btype,
-                      const size_t   ldb,
-                      void*          C,
-                      const DataType Ctype,
-                      const size_t   ldc,
-                      const float    alpha = 1.0f,
-                      const float    beta  = 0.0f);
-
-    virtual void batchedGemm(const GemmOp       transa,
-                             const GemmOp       transb,
-                             const size_t       m,
-                             const size_t       n,
-                             const size_t       k,
-                             const void* const* A,
-                             const void* const* B,
-                             void* const*       C,
-                             const size_t       batch_size,
-                             const float        alpha = 1.0f,
-                             const float        beta  = 0.0f);
-
-    virtual void batchedGemm(const GemmOp       transa,
-                             const GemmOp       transb,
-                             const size_t       m,
-                             const size_t       n,
-                             const size_t       k,
-                             const void* const* A,
-                             const size_t       lda,
-                             const void* const* B,
-                             const size_t       ldb,
-                             void* const*       C,
-                             const size_t       ldc,
-                             const size_t       batch_size,
-                             const float        alpha = 1.0f,
-                             const float        beta  = 0.0f);
-
-    /**
-     * @brief Compute the matrix multiplication of batch of matrices As and Bs
-     *
-     * For input batch A[i]/B[i] and output batch C[i], i = 0, ..., batch_size - 1,
-     *  `C[i] = \alpha * op(A[i]) * op(B[i]) + \beta * C[i]`.
-     *
-     * @param transa A transpose operation of a matrix A (GEMM_OP_N or GEMM_OP_T).
-     * @param transb A transpose operation of a matrix B (GEMM_OP_N or GEMM_OP_T).
-     * @param m      A number of rows of a matrix op(A) and C.
-     * @param n      A number of columns of a matrix op(B) or C.
-     * @param k      A number of columns of op(A) and rows of op(B).
-     * @param A      An array of device pointers of batch of input A matrices.
-     * @param Atype  A data type of A (TYPE_FP16 or TYPE_FP32)
-     * @param lda    A leading dimension of the matrix A.
-     * @param B      An array of device pointers of batch of input B matrices.
-     * @param Btype  A data type of B (TYPE_FP16 or TYPE_FP32)
-     * @param ldb    A leading dimension of the matrix B.
-     * @param C      (Output) An array of device pointers of batch of output C matrices.
-     * @param Ctype  A data type of C (TYPE_FP16 or TYPE_FP32)
-     * @param ldc    A leading dimension of the matrix C.
-     * @param alpha  A scale factor for A*B (default: 1.0f).
-     * @param beta   A scale factor for C (default: 0.0f).
-     *
-     * @throw GemmNotSupportedException if a type is not TYPE_FP16 or TYPE_FP32.
-     * @throw std::runtime_error  if any exception inside CUDA.
-     */
-    virtual void batchedGemm(const GemmOp       transa,
-                             const GemmOp       transb,
-                             const size_t       m,
-                             const size_t       n,
-                             const size_t       k,
-                             const void* const* A,
-                             const DataType     Atype,
-                             const size_t       lda,
-                             const void* const* B,
-                             const DataType     Btype,
-                             const size_t       ldb,
-                             void* const*       C,
-                             const DataType     Ctype,
-                             const size_t       ldc,
-                             const size_t       batch_size,
-                             const float        alpha = 1.0f,
-                             const float        beta  = 0.0f);
-
-    virtual void stridedBatchedGemm(GemmOp       transa,
-                                    GemmOp       transb,
-                                    const size_t m,
-                                    const size_t n,
-                                    const size_t k,
-                                    const void*  A,
-                                    const void*  B,
-                                    void*        C,
-                                    const size_t batch_size,
-                                    const float  alpha = 1.0f,
-                                    const float  beta  = 0.0f);
-
-    virtual void stridedBatchedGemm(GemmOp        transa,
-                                    GemmOp        transb,
-                                    const size_t  m,
-                                    const size_t  n,
-                                    const size_t  k,
-                                    const void*   A,
-                                    const int64_t strideA,
-                                    const void*   B,
-                                    const int64_t strideB,
-                                    void*         C,
-                                    const int64_t strideC,
-                                    const size_t  batch_size,
-                                    const float   alpha = 1.0f,
-                                    const float   beta  = 0.0f);
-
-    virtual void stridedBatchedGemm(GemmOp        transa,
-                                    GemmOp        transb,
-                                    const size_t  m,
-                                    const size_t  n,
-                                    const size_t  k,
-                                    const void*   A,
-                                    const size_t  lda,
-                                    const int64_t strideA,
-                                    const void*   B,
-                                    const size_t  ldb,
-                                    const int64_t strideB,
-                                    void*         C,
-                                    const size_t  ldc,
-                                    const int64_t strideC,
-                                    const size_t  batch_size,
-                                    const float   alpha = 1.0f,
-                                    const float   beta  = 0.0f);
-    /**
-     * @brief Compute the strided matrix multiplication of batch of matrices As and Bs
-     *
-     * For input batch A[i]/B[i] and output batch C[i], i = 0, ..., batch_size - 1,
-     *  `C[i] = \alpha * op(A[i]) * op(B[i]) + \beta * C[i]`.
-     *
-     * @param transa   A transpose operation of a matrix A (GEMM_OP_N or GEMM_OP_T).
-     * @param transb   A transpose operation of a matrix B (GEMM_OP_N or GEMM_OP_T).
-     * @param m        A number of rows of a matrix op(A) and C.
-     * @param n        A number of columns of a matrix op(B) or C.
-     * @param k        A number of columns of op(A) and rows of op(B).
-     * @param A        An array of device pointers of batch of input A matrices.
-     * @param Atype    A data type of A (TYPE_FP16 or TYPE_FP32)
-     * @param lda      A leading dimension of the matrix A.
-     * @param strideA  An offset in number of elements between matrix A[i] and A[i+1].
-     * @param B        An array of device pointers of batch of input B matrices.
-     * @param Btype    A data type of B (TYPE_FP16 or TYPE_FP32)
-     * @param ldb      A leading dimension of the matrix B.
-     * @param strideB  An offset in number of elements between matrix B[i] and B[i+1].
-     * @param C        (Output) An array of device pointers of batch of output C matrices.
-     * @param Ctype    A data type of C (TYPE_FP16 or TYPE_FP32)
-     * @param ldc      A leading dimension of the matrix C.
-     * @param strideC  An offset in number of elements between matrix C[i] and C[i+1].
-     * @param compute_type  An accumulation type of GEMM.
-     * @param alpha    A scale factor for A*B (default: 1.0f).
-     * @param beta     A scale factor for C (default: 0.0f).
-     *
-     * @throw GemmNotSupportedException if a type is not TYPE_FP16 or TYPE_FP32.
-     * @throw std::runtime_error  if any exception inside CUDA.
-     */
-    virtual void stridedBatchedGemm(GemmOp        transa,
-                                    GemmOp        transb,
-                                    const size_t  m,
-                                    const size_t  n,
-                                    const size_t  k,
-                                    const void*   A,
-                                    DataType      Atype,
-                                    const size_t  lda,
-                                    const int64_t strideA,
-                                    const void*   B,
-                                    DataType      Btype,
-                                    const size_t  ldb,
-                                    const int64_t strideB,
-                                    void*         C,
-                                    DataType      Ctype,
-                                    const size_t  ldc,
-                                    const int64_t strideC,
-                                    const size_t  batch_size,
-                                    DataType      compute_type,
-                                    const float   alpha = 1.0f,
-                                    const float   beta  = 0.0f);
-
-protected:
-    IAllocator*    allocator_ = nullptr;
-    cudaStream_t   stream_;
-    std::mutex*    mutex_           = nullptr;
-    cublasAlgoMap* cublas_algo_map_ = nullptr;
-
-    cublasHandle_t   cublas_handle_;
-    cublasLtHandle_t cublaslt_handle_;
-    void*            workspace_ = nullptr;
-
-    // use FP32 as default
-    DataType a_type_       = TYPE_FP32;
-    DataType b_type_       = TYPE_FP32;
-    DataType c_type_       = TYPE_FP32;
-    DataType compute_type_ = TYPE_FP32;
-
-    // Check if data and inputs are valid in the Gemm class.
-    virtual void checkDataTypeValidity(const DataType& type);
-};
-
-// class Int8Gemm : public Gemm {
-
-// protected:
-//     bool use_ORDER_COL32_2R_4R4_; // what is this?
-// };
-
-#ifdef SPARSITY_ENABLED
-
-/**
- * A Sparse Gemm class.
- *
- * NOTE:
- *   A, B, C are assumed to have a row major layout.
- *   There are two restrictions:
- *   - It supports the case when the matrix B is sparse.
- *   - Supported only TYPE_FP16 for in/out data or compute types.
- */
-class SpGemm: public Gemm {
-
-protected:
-    cusparseLtHandle_t                               cusparselt_handle_;
-    std::map<std::string, cusparseLtMatDescriptor_t> a_desc_map_;
-    std::map<std::string, cusparseLtMatDescriptor_t> b_desc_map_;
-    std::map<std::string, cusparseLtMatDescriptor_t> c_desc_map_;
-    bool                                             useBaseGemm(size_t batch_size, size_t m, size_t n, size_t k);
-
-public:
-    using Gemm::setComputeType;
-    using Gemm::setTypes;
-    using Gemm::setDefaultTypes;
-    using Gemm::setAllocator;
-    using Gemm::setCudaStream;
-    using Gemm::gemm;
-    using Gemm::batchedGemm;
-    using Gemm::stridedBatchedGemm;
-
-    /**
-     * @param allocator   Resource allocator.
-     * @param stream      A CUDA stream.
-     * @param config_file A file path of a GEMM configuration.
-     */
-    // TODO: Let's unify algo map loading part.
-    SpGemm(IAllocator*  allocator,
-           cudaStream_t stream,
-           std::string  config_file   = GEMM_CONFIG,
-           std::string  spconfig_file = SPGEMM_CONFIG);
-    ~SpGemm();
-    std::string toString() override;
-    void        loadGemmConfig(std::string config_file, std::string spconfig_file);
-
-    // Template method cannot be overridden.
-    void gemm(const GemmOp              transa,
-              const GemmOp              transb,
-              const size_t              m,
-              const size_t              n,
-              const size_t              k,
-              const void*               input,
-              const DenseWeight<float>& weight,
-              void*                     output,
-              const float               alpha = 1.0f,
-              const float               beta  = 0.0f) override;
-    void gemm(const GemmOp             transa,
-              const GemmOp             transb,
-              const size_t             m,
-              const size_t             n,
-              const size_t             k,
-              const void*              input,
-              const DenseWeight<half>& weight,
-              void*                    output,
-              const float              alpha = 1.0f,
-              const float              beta  = 0.0f) override;
-
-    void gemm(const GemmOp   transa,
-              const GemmOp   transb,
-              const size_t   m,
-              const size_t   n,
-              const size_t   k,
-              const void*    A,
-              const DataType Atype,
-              const size_t   lda,
-              const void*    B,
-              const DataType Btype,
-              const size_t   ldb,
-              void*          C,
-              const DataType Ctype,
-              const size_t   ldc,
-              const float    alpha = 1.0f,
-              const float    beta  = 0.0f) override;
-
-private:
-    void checkDataTypeValidity(const DataType& type) override;
-
-    // Temporal gemm helper mtehod to use template T.
-    template<typename T>
-    void weightGemmHelper(const GemmOp          transa,
-                          const GemmOp          transb,
-                          const size_t          m,
-                          const size_t          n,
-                          const size_t          k,
-                          const void*           input,
-                          const DenseWeight<T>& weight,
-                          void*                 output,
-                          const float           alpha,
-                          const float           beta);
-};
-
-// class Int8SpGemm : public Int8Gemm, public SpGemm {
-
-// };
-#endif
-
-/* ***************************** GEMM Exceptions ******************************* */
-
-class GemmInvalidShapeException: public std::exception {
-private:
-    std::string msg_ = "Invalid matrix shapes.";
-
-public:
-    explicit GemmInvalidShapeException() = default;
-
-    template<typename... Args>
-    explicit GemmInvalidShapeException(const std::string format, const Args&... args): msg_(fmtstr(format, args...))
-    {
-    }
-
-    const char* what() const throw()
-    {
-        return msg_.c_str();
-    }
-};
-
-class GemmNotSupportedException: public std::exception {
-private:
-    std::string msg_ = "Not supported exception.";
-
-public:
-    explicit GemmNotSupportedException() = default;
-
-    template<typename... Args>
-    explicit GemmNotSupportedException(const std::string format, const Args&... args): msg_(fmtstr(format, args...))
-    {
-    }
-
-    const char* what() const throw()
-    {
-        return msg_.c_str();
-    }
-};
-
-class GemmInvalidException: public std::exception {
-private:
-    std::string msg_ = "Invalid use of gemm.";
-
-public:
-    explicit GemmInvalidException() = default;
-
-    template<typename... Args>
-    explicit GemmInvalidException(const std::string format, const Args&... args): msg_(fmtstr(format, args...))
-    {
-    }
-
-    const char* what() const throw()
-    {
-        return msg_.c_str();
-    }
-};
-
-/* ************************ End of GEMM Exceptions ************************ */
-
-/* ***************************** GEMM utils ******************************* */
-
-/**
- * @brief Create method for the Gemm family.
- *
- * @param allocator  Resource allocator.
- * @param stream     A CUDA stream.
- * @param sparse     Whether to use sparse GEMM
- * @param quantized  Whether to use int8 quantized GEMM.
- * @return A shared pointer of a GemmCls instance.
- */
-std::shared_ptr<Gemm>
-createGemm(IAllocator* allocator, cudaStream_t stream, bool sparse = false, bool quantized = false);
-
-cudaDataType_t getCublasDataType(DataType dtype);
-#if (CUDART_VERSION >= 11000)
-cublasComputeType_t getCublasComputeType(DataType dtype);
-#else
-cudaDataType_t getCublasComputeType(DataType dtype);
-#endif
-cublasOperation_t getCublasOperation(GemmOp op);
-std::string       getGemmOpString(const GemmOp& op);
-
-#ifdef SPARSITY_ENABLED
-cusparseOperation_t getCusparseOperation(GemmOp op);
-cusparseComputeType getCusparseComputeType(DataType dtype);
-
-/**
- * @brief Prune a weight matrix (in-place).
- *
- * SpGemm supports a case when the sparse matrix is B in C=A*B.
- *
- * @param data    A data pointer
- * @param stream  A cuda stream object.
- * @param k       A number of rows of op(B).
- * @param n       A number of columns of op(B).
- * @param trans   A transpose operation that will be applied to the matrix
- *                (default: GEMM_OP_N).
- */
-void pruneMatrixB(
-    void* data, const cudaStream_t& stream, const size_t k, const size_t n, const GemmOp trans = GEMM_OP_N);
-
-/**
- * @brief Compress the B matrix in a specific sparsity format.
- *
- * @param output A pointer where to allocate memory buffer to store a compressed matrix.
- * @param alloactor  A resource allocator.
- * @param stream A cuda stream object.
- * @param input  An input matrix to compress.
- * @param k      A number of rows of op(B).
- * @param n      A number of columns of op(B).
- * @param trans  A transpose operation that will be applied to the matrix (default: GEMM_OP_N).
- *
- * @return A size of the allocated device buffer of the compressed matrix.
- *
- * @throw GemmInvalidException  if the input matrix does not have 2:4 sparsity.
- *              or if fail to compute a correct buffer size to store the compressed matrix.
- * @throw std::runtime_error  if any exception inside CUDA.
- */
-size_t compressMatrixB(void**              output,
-                       IAllocator&         allocator,
-                       const cudaStream_t& stream,
-                       const void*         input,
-                       const size_t        k,
-                       const size_t        n,
-                       const GemmOp        trans = GEMM_OP_N);
-
-#endif
-
-/* ************************* End of GEMM utils **************************** */
-
-}  // end of namespace turbomind
diff --git a/src/turbomind/utils/gemm_test/CMakeLists.txt b/src/turbomind/utils/gemm_test/CMakeLists.txt
deleted file mode 100644
index 58c2d705b9446c40aa2cc99420de100a79e5844c..0000000000000000000000000000000000000000
--- a/src/turbomind/utils/gemm_test/CMakeLists.txt
+++ /dev/null
@@ -1,122 +0,0 @@
-# Copyright (c) 2019-2023, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-cmake_minimum_required(VERSION 3.8)
-
-#find_package(CUDAToolkit REQUIRED)
-find_package(CUDA REQUIRED)
-
-set(gemm_func_files
-  gemm_func.cc
-)
-
-set(encoder_gemm_func_files
-  encoder_gemm_func.cc
-)
-
-set(encoder_igemm_func_files
-  encoder_igemm_func.cc
-)
-
-set(decoding_gemm_func_files
-  decoding_gemm_func.cc
-)
-
-set(gpt_gemm_func_files
-  gpt_gemm_func.cc
-)
-
-set(xlnet_gemm_func_files
-  xlnet_gemm_func.cc
-)
-
-set(t5_gemm_func_files
-  t5_gemm_func.cc
-)
-
-set(swin_igemm_func_files
-  swin_igemm_func.cc
-)
-
-set(swin_gemm_func_files
-  swin_gemm_func.cc
-)
-
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC")
-set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -fPIC")
-
-add_library(gemm_func STATIC ${gemm_func_files})
-#target_link_libraries(gemm_func PUBLIC cublas cublasLt cudart cuda_utils logger)
-target_link_libraries(gemm_func PUBLIC cublas cudart cuda_utils logger)
-#set_property(TARGET gemm_func PROPERTY POSITION_INDEPENDENT_CODE ON)
-#set_property(TARGET gemm_func PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS ON)
-
-add_library(encoder_gemm_func STATIC ${encoder_gemm_func_files})
-#target_link_libraries(encoder_gemm_func PUBLIC cublas cublasLt cudart gemm_func cuda_utils logger)
-target_link_libraries(encoder_gemm_func PUBLIC cublas cudart gemm_func cuda_utils logger)
-if (SPARSITY_SUPPORT)
-target_link_libraries(encoder_gemm_func PUBLIC cusparse -lcusparseLt)
-endif()
-#set_property(TARGET encoder_gemm_func PROPERTY POSITION_INDEPENDENT_CODE ON)
-#set_property(TARGET encoder_gemm_func PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS ON)
-
-add_library(encoder_igemm_func STATIC ${encoder_igemm_func_files})
-#target_link_libraries(encoder_igemm_func PUBLIC cublas cublasLt cudart cuda_utils logger)
-target_link_libraries(encoder_igemm_func PUBLIC cublas cudart cuda_utils logger)
-if (SPARSITY_SUPPORT)
-target_link_libraries(encoder_igemm_func PUBLIC cusparse -lcusparseLt)
-endif()
-#set_property(TARGET encoder_igemm_func PROPERTY POSITION_INDEPENDENT_CODE ON)
-#set_property(TARGET encoder_igemm_func PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS ON)
-
-add_library(decoding_gemm_func STATIC ${decoding_gemm_func_files})
-#target_link_libraries(decoding_gemm_func PUBLIC cublas cublasLt cudart gemm_func cuda_utils logger)
-target_link_libraries(decoding_gemm_func PUBLIC cublas cudart gemm_func cuda_utils logger)
-#set_property(TARGET decoding_gemm_func PROPERTY POSITION_INDEPENDENT_CODE ON)
-#set_property(TARGET decoding_gemm_func PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS ON)
-
-add_library(gpt_gemm_func STATIC ${gpt_gemm_func_files})
-#target_link_libraries(gpt_gemm_func PUBLIC cublas cublasLt cudart gemm_func cuda_utils logger)
-target_link_libraries(gpt_gemm_func PUBLIC cublas cudart gemm_func cuda_utils logger)
-if (SPARSITY_SUPPORT)
-  target_link_libraries(gpt_gemm_func PUBLIC cusparse -lcusparseLt)
-endif()
-#set_property(TARGET gpt_gemm_func PROPERTY POSITION_INDEPENDENT_CODE ON)
-#set_property(TARGET gpt_gemm_func PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS ON)
-
-add_library(xlnet_gemm_func STATIC ${xlnet_gemm_func_files})
-#target_link_libraries(xlnet_gemm_func PUBLIC cublas cublasLt cudart gemm_func cuda_utils logger)
-target_link_libraries(xlnet_gemm_func PUBLIC cublas cudart gemm_func cuda_utils logger)
-#set_property(TARGET xlnet_gemm_func PROPERTY POSITION_INDEPENDENT_CODE ON)
-#set_property(TARGET xlnet_gemm_func PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS ON)
-
-add_library(t5_gemm_func STATIC ${t5_gemm_func_files})
-#target_link_libraries(t5_gemm_func PUBLIC cublas cublasLt cudart gemm_func cuda_utils logger)
-target_link_libraries(t5_gemm_func PUBLIC cublas cudart gemm_func cuda_utils logger)
-if (SPARSITY_SUPPORT)
-  target_link_libraries(t5_gemm_func PUBLIC cusparse -lcusparseLt)
-endif()
-#set_property(TARGET t5_gemm_func PROPERTY POSITION_INDEPENDENT_CODE ON)
-#set_property(TARGET t5_gemm_func PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS ON)
-
-add_library(swin_igemm_func STATIC ${swin_igemm_func_files})
-#target_link_libraries(swin_igemm_func PUBLIC cublas cublasLt cudart gemm_func encoder_igemm_func cuda_utils logger)
-target_link_libraries(swin_igemm_func PUBLIC cublas cudart gemm_func encoder_igemm_func cuda_utils logger)
-#set_property(TARGET swin_igemm_func PROPERTY POSITION_INDEPENDENT_CODE ON)
-#set_property(TARGET swin_igemm_func PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS ON)
-
-add_library(swin_gemm_func STATIC ${swin_gemm_func_files})
-#target_link_libraries(swin_gemm_func PUBLIC cublas cublasLt cudart gemm_func cuda_utils logger)
-target_link_libraries(swin_gemm_func PUBLIC cublas cudart gemm_func cuda_utils logger)
-#set_property(TARGET swin_gemm_func PROPERTY POSITION_INDEPENDENT_CODE ON)
-#set_property(TARGET swin_gemm_func PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS ON)
diff --git a/src/turbomind/utils/gemm_test/decoding_gemm_func.cc b/src/turbomind/utils/gemm_test/decoding_gemm_func.cc
deleted file mode 100644
index 7c7dde59f45aa77a5220842d7005f5c563dd7554..0000000000000000000000000000000000000000
--- a/src/turbomind/utils/gemm_test/decoding_gemm_func.cc
+++ /dev/null
@@ -1,413 +0,0 @@
-/*
- * Copyright (c) 2020-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "src/turbomind/utils/gemm_test/decoding_gemm_func.h"
-#include "src/turbomind/macro.h"
-#include <chrono>
-
-namespace turbomind {
-
-template<typename T>
-void generate_decoding_gemm_config(int   batch_size,
-                                   int   beam_width,
-                                   int   max_mem_seq_len,
-                                   int   head_num,
-                                   int   size_per_head,
-                                   int   inter_size,
-                                   int   vocab_size,
-                                   int   mem_hidden_units,
-                                   void* buffer_in,
-                                   bool  isAppend)
-{
-    void* cublas_workspace;
-    void* buffer;
-    int   workSpaceSize;
-
-#ifdef ENABLE_BF16
-    if (std::is_same<T, half>::value || std::is_same<T, __nv_bfloat16>::value) {
-#else
-    if (std::is_same<T, half>::value) {
-#endif  // ENABLE_BF16
-        // cublas_workspace_ should be the start pointer of cudaMalloc()
-        // to ensure 16B alignemnet
-        cublas_workspace = buffer_in;
-        buffer           = (void*)((char*)cublas_workspace + CUBLAS_WORKSPACE_SIZE);
-        workSpaceSize    = CUBLAS_WORKSPACE_SIZE;
-    }
-    else {
-        cublas_workspace = nullptr;
-        buffer           = buffer_in;
-        workSpaceSize    = 0;
-    }
-
-    struct cudaDeviceProp prop;
-    check_cuda_error(cudaGetDeviceProperties(&prop, 0));
-    printf("Device %s\n", prop.name);
-
-    // check config
-    FILE* fd;
-    int   line_count = 0;
-    if (!isAppend) {
-        fd = fopen(GEMM_CONFIG, "w+");
-    }
-    else {
-        fd = fopen(GEMM_CONFIG, "a+");
-        std::vector<std::string> config;
-        char                     line[1024];
-        while (fgets(line, 1024, fd) != NULL) {
-            config.push_back(std::string(line));
-        }
-        line_count = config.size();
-        if (config.size() >= (MAX_CONFIG_NUM * GEMM_NUM + 1))  // 6 cublas/cublasLt, first row is not included
-        {
-            int startIdx = config.size() - ((MAX_CONFIG_NUM - 1) * GEMM_NUM);
-            fclose(fd);
-            fd = fopen(GEMM_CONFIG, "w+");
-            fprintf(fd, "%s", config[0].c_str());
-            for (uint i = startIdx; i < config.size(); i++) {
-                fprintf(fd, "%s", config[i].c_str());
-            }
-            line_count = config.size() - (GEMM_NUM + 3);
-        }
-    }
-
-    const int hidden_units = head_num * size_per_head;
-    const int gemm_num     = 6;
-    int       M[gemm_num];
-    int       N[gemm_num];
-    int       K[gemm_num];
-    int       batchCount[gemm_num] = {1, 1, 1, 1, 1, 1};
-    char      mess[gemm_num][256];
-
-    // gemm 0
-    M[0] = batch_size * beam_width;
-    K[0] = hidden_units;
-    N[0] = K[0] * 3;
-    strcpy(mess[0], "from_tensor * weightQKV");
-
-    // gemm 1
-    M[1] = batch_size * beam_width;
-    K[1] = hidden_units;
-    N[1] = K[1];
-    strcpy(mess[1], "attr * output_kernel");
-
-    // gemm2
-    M[2] = batch_size * beam_width * max_mem_seq_len;
-    K[2] = mem_hidden_units;
-    N[2] = hidden_units;
-    strcpy(mess[2], "mem_tensor * weightK/V in cross attention");
-
-    // gemm 3
-    M[3] = batch_size * beam_width;
-    K[3] = hidden_units;
-    N[3] = inter_size;
-    strcpy(mess[3], "ffn gemm1 ");
-
-    // gemm 4
-    M[4] = batch_size * beam_width;
-    K[4] = inter_size;
-    N[4] = hidden_units;
-    strcpy(mess[4], "ffn gemm2");
-
-    // gemm5
-    M[5] = batch_size * beam_width;
-    K[5] = hidden_units;
-    N[5] = ceil(vocab_size / 8.) * 8;
-    strcpy(mess[5], "decoder_output * embedding_kernel -> embedding_output");
-
-    cublasHandle_t cublas_handle;
-    check_cuda_error(cublasCreate(&cublas_handle));
-    // cublasLtHandle_t ltHandle;
-    // check_cuda_error(cublasLtCreate(&ltHandle));
-
-    cudaDataType_t AType;
-    cudaDataType_t BType;
-    cudaDataType_t CType;
-    cudaDataType_t computeType;
-    int            startAlgo, endAlgo;
-    const int      ites = 100;
-
-    CublasDataType data_type;
-    if (std::is_same<T, float>::value) {
-        data_type   = FLOAT_DATATYPE;
-        AType       = CUDA_R_32F;
-        BType       = CUDA_R_32F;
-        CType       = CUDA_R_32F;
-        computeType = CUDA_R_32F;
-        startAlgo   = (int)CUBLAS_GEMM_DEFAULT;
-        // endAlgo     = (int)CUBLAS_GEMM_ALGO23;
-        endAlgo     = (int)CUBLAS_GEMM_DEFAULT;
-    }
-    else if (std::is_same<T, half>::value) {
-        data_type   = HALF_DATATYPE;
-        AType       = CUDA_R_16F;
-        BType       = CUDA_R_16F;
-        CType       = CUDA_R_16F;
-        computeType = CUDA_R_16F;
-        // startAlgo   = (int)CUBLAS_GEMM_DEFAULT_TENSOR_OP;
-        // endAlgo     = (int)CUBLAS_GEMM_ALGO15_TENSOR_OP;
-        startAlgo   = (int)CUBLAS_GEMM_DEFAULT;
-        endAlgo     = (int)CUBLAS_GEMM_DEFAULT;
-    }
-#ifdef ENABLE_BF16
-    else if (std::is_same<T, __nv_bfloat16>::value) {
-        data_type   = BFLOAT16_DATATYPE;
-        AType       = CUDA_R_16BF;
-        BType       = CUDA_R_16BF;
-        CType       = CUDA_R_16BF;
-        computeType = CUDA_R_32F;
-        // startAlgo   = (int)CUBLAS_GEMM_DEFAULT_TENSOR_OP;
-        // endAlgo     = (int)CUBLAS_GEMM_ALGO15_TENSOR_OP;
-        startAlgo   = (int)CUBLAS_GEMM_DEFAULT;
-        endAlgo     = (int)CUBLAS_GEMM_DEFAULT;
-    }
-#endif
-    // using scaleT = typename ScaleTypeConverter<T>::Type;
-    using scaleT = typename ScaleTypeConverter<T, true>::Type;
-
-    scaleT alpha = (scaleT)1.0f;
-    scaleT beta  = (scaleT)0.0f;
-
-    printf("***Encoder Gemm Testing Begin***\n");
-    printf("***Cublas Gemm Testing Begin***\n");
-    if (line_count == 0) {
-        fprintf(fd,
-                "batch_size, seq_len, head_num, size_per_head dataType ### batchCount, n, m, k, algoId, "
-                "customOption, tile, numSplitsK, swizzle, reductionScheme, workspaceSize, stages, exec_time\n");
-    }
-    for (int i = 0; i < gemm_num; ++i) {
-        int m = M[i], n = N[i], k = K[i];
-        printf("\n-----------------------------\n");
-        printf("GEMM test %d: [M: %d, K: %d, N: %d] %s\n", i, m, k, n, mess[i]);
-        T* d_A = (T*)buffer;
-        T* d_B = d_A + m * k * batchCount[i];
-        T* d_C = d_B + k * n * batchCount[i];
-
-        float exec_time = 99999.0f;
-        int   fast_algo = 0;
-        int   seq_len   = i == 2 ? max_mem_seq_len : 1;
-        for (int algo = startAlgo; algo <= endAlgo; algo++) {
-            cublasStatus_t status;
-            cudaDeviceSynchronize();
-            auto start = std::chrono::high_resolution_clock::now();
-            for (int ite = 0; ite < ites; ++ite) {
-                status = cublasGemmEx(cublas_handle,
-                                      CUBLAS_OP_N,
-                                      CUBLAS_OP_N,
-                                      n,
-                                      m,
-                                      k,
-                                      &alpha,
-                                      d_B,
-                                      BType,
-                                      n,
-                                      d_A,
-                                      AType,
-                                      k,
-                                      &beta,
-                                      d_C,
-                                      CType,
-                                      n,
-                                      computeType,
-                                      static_cast<cublasGemmAlgo_t>(algo));
-                if (status != CUBLAS_STATUS_SUCCESS) {
-                    break;
-                }
-            }
-            cudaDeviceSynchronize();
-            auto end = std::chrono::high_resolution_clock::now();
-            auto dur = std::chrono::duration<float, std::milli>(end - start);
-            if (status == CUBLAS_STATUS_SUCCESS) {
-                printf("algo_%d costs %.3fms \n", algo, dur.count() / ites);
-                if (dur.count() / ites < exec_time) {
-                    exec_time = dur.count() / ites;
-                    fast_algo = algo;
-                }
-            }
-        }
-        printf("fast_algo %d costs %.3f ms\n", fast_algo, exec_time);
-
-        // for fp16 and bf16, we compare cublasLt
-        if (data_type != FLOAT_DATATYPE) {
-            printf("***cublasLt Gemm Testing Begin***\n");
-            // Let try a fixed number of combinations
-            const int          ALGO_COMBINATIONS = 5000;
-            customMatmulPerf_t perfResults[ALGO_COMBINATIONS];
-
-            // LtHgemmCustomFind<T, scaleT>(ltHandle,
-            //                              batch_size * beam_width,
-            //                              seq_len,
-            //                              head_num,
-            //                              size_per_head,
-            //                              n,
-            //                              m,
-            //                              k,
-            //                              &alpha,
-            //                              d_B,
-            //                              d_A,
-            //                              &beta,
-            //                              d_C,
-            //                              cublas_workspace,
-            //                              workSpaceSize,
-            //                              fd,
-            //                              perfResults,
-            //                              ALGO_COMBINATIONS);
-            // if (perfResults[0].time < exec_time) {
-            //     printPerfStructure(batch_size * beam_width,
-            //                        seq_len,
-            //                        head_num,
-            //                        size_per_head,
-            //                        n,
-            //                        m,
-            //                        k,
-            //                        perfResults[0],
-            //                        fd,
-            //                        data_type,
-            //                        0);
-            // }
-            // else {
-            {
-                fprintf(fd,
-                        "%d %d %d %d %d ### %d %d %d %d %d -1 -1 -1 -1 -1 -1 -1 "
-#if (CUBLAS_VER_MAJOR == 11 && CUBLAS_VER_MINOR == 11 && CUBLAS_VER_PATCH >= 3)
-                        "-1 -1 "
-#elif (CUBLAS_VER_MAJOR == 11 && CUBLAS_VER_MINOR == 11 && CUBLAS_VER_PATCH < 3)
-                        "-1 -1 -1 "
-#endif
-                        "%f\n",
-                        batch_size * beam_width,
-                        seq_len,
-                        head_num,
-                        size_per_head,
-                        data_type,
-                        batchCount[i],
-                        n,
-                        m,
-                        k,
-                        fast_algo,
-                        exec_time);
-            }
-            printf("***cublasLt Gemm Testing End***\n");
-        }
-        else {
-            fprintf(fd,
-                    "%d %d %d %d %d ### %d %d %d %d %d -1 -1 -1 -1 -1 -1 -1 "
-#if (CUBLAS_VER_MAJOR == 11 && CUBLAS_VER_MINOR == 11 && CUBLAS_VER_PATCH >= 3)
-                    "-1 -1 "
-#elif (CUBLAS_VER_MAJOR == 11 && CUBLAS_VER_MINOR == 11 && CUBLAS_VER_PATCH < 3)
-                    "-1 -1 -1 "
-#endif
-                    "%f\n",
-                    batch_size * beam_width,
-                    seq_len,
-                    head_num,
-                    size_per_head,
-                    data_type,
-                    batchCount[i],
-                    n,
-                    m,
-                    k,
-                    fast_algo,
-                    exec_time);
-        }
-    }
-    printf("***cublas Gemm Testing End***\n\n");
-    fclose(fd);
-    printf("***Decoding Gemm Testing End***\n");
-    return;
-}
-
-template void generate_decoding_gemm_config<float>(int   batch_size,
-                                                   int   beam_width,
-                                                   int   seq_len,
-                                                   int   head_num,
-                                                   int   size_per_head,
-                                                   int   inter_size,
-                                                   int   vocab_size,
-                                                   int   mem_hidden_units,
-                                                   void* buffer_in,
-                                                   bool  isAppend);
-
-template void generate_decoding_gemm_config<half>(int   batch_size,
-                                                  int   beam_width,
-                                                  int   seq_len,
-                                                  int   head_num,
-                                                  int   size_per_head,
-                                                  int   inter_size,
-                                                  int   vocab_size,
-                                                  int   mem_hidden_units,
-                                                  void* buffer_in,
-                                                  bool  isAppend);
-
-#ifdef ENABLE_BF16
-template void generate_decoding_gemm_config<__nv_bfloat16>(int   batch_size,
-                                                           int   beam_width,
-                                                           int   seq_len,
-                                                           int   head_num,
-                                                           int   size_per_head,
-                                                           int   inter_size,
-                                                           int   vocab_size,
-                                                           int   mem_hidden_units,
-                                                           void* buffer_in,
-                                                           bool  isAppend);
-#endif
-
-size_t calDecodingGemmTestBufSizeInByte(int            batch_size,
-                                        int            beam_width,
-                                        int            max_mem_seq_len,
-                                        int            head_num,
-                                        int            size_per_head,
-                                        int            inter_size,
-                                        int            memory_hidden_units,
-                                        int            vocab_size,
-                                        CublasDataType data_type)
-{
-    size_t       buf_size_in_byte   = 0;
-    const size_t tensor_para_size   = 1;
-    const size_t hidden_units       = head_num * size_per_head;
-    const size_t local_head_num     = head_num / tensor_para_size;
-    const size_t local_hidden_units = local_head_num * size_per_head;
-
-    // int wordSize = (data_type == FLOAT_DATATYPE ? sizeof(float) : sizeof(half));
-    // Because we always use float for some buffer, set the wordSize to float directly.
-    int wordSize = sizeof(float);
-
-    size_t              m = batch_size * beam_width;
-    std::vector<size_t> buff_size;
-    // for qkv gemm
-    buff_size.push_back(m * hidden_units + hidden_units * 3 * local_hidden_units + m * 3 * local_hidden_units);
-    // for attention output gemm
-    buff_size.push_back(m * hidden_units + hidden_units * local_hidden_units + m * local_hidden_units);
-    // for memory_tensor gemm
-    buff_size.push_back(m * max_mem_seq_len * memory_hidden_units + memory_hidden_units * local_hidden_units
-                        + m * max_mem_seq_len * local_hidden_units);
-    // for context ffn gemm
-    buff_size.push_back(m * inter_size / tensor_para_size + hidden_units * inter_size / tensor_para_size
-                        + m * hidden_units);
-    // for vocab
-    buff_size.push_back(m * hidden_units + hidden_units * ceil(vocab_size / 8.) * 8 / tensor_para_size
-                        + m * ceil(vocab_size / 8.) * 8 / tensor_para_size);
-
-    for (auto t : buff_size) {
-        buf_size_in_byte = buf_size_in_byte > t ? buf_size_in_byte : t;
-    }
-    buf_size_in_byte *= wordSize;
-    buf_size_in_byte += ((data_type == HALF_DATATYPE || data_type == BFLOAT16_DATATYPE) ? CUBLAS_WORKSPACE_SIZE : 0);
-
-    return buf_size_in_byte;
-}
-
-}  // namespace turbomind
diff --git a/src/turbomind/utils/gemm_test/decoding_gemm_func.h b/src/turbomind/utils/gemm_test/decoding_gemm_func.h
deleted file mode 100644
index 9f17b358b772b682c5472777f4cd94ab6c0c5516..0000000000000000000000000000000000000000
--- a/src/turbomind/utils/gemm_test/decoding_gemm_func.h
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Copyright (c) 2020-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "src/turbomind/utils/cublasAlgoMap.h"
-#include "src/turbomind/utils/cuda_bf16_wrapper.h"
-#include "src/turbomind/utils/cuda_utils.h"
-#include "src/turbomind/utils/gemm_test/gemm_func.h"
-
-#include <cstdio>
-#include <cstdlib>
-#include <ctime>
-#include <cuda_fp16.h>
-#include <cuda_profiler_api.h>
-#include <map>
-#ifdef __linux__
-#include <sys/time.h>
-#include <unistd.h>
-#endif
-#include <vector>
-
-namespace turbomind {
-
-template<typename T>
-void generate_decoding_gemm_config(int   batch_size,
-                                   int   beam_width,
-                                   int   seq_len,
-                                   int   head_num,
-                                   int   size_per_head,
-                                   int   inter_size,
-                                   int   vocab_size,
-                                   int   mem_hidden_units,
-                                   void* buffer_in,
-                                   bool  isAppend);
-
-size_t calDecodingGemmTestBufSizeInByte(int            batch_size,
-                                        int            beam_width,
-                                        int            max_mem_seq_len,
-                                        int            head_num,
-                                        int            size_per_head,
-                                        int            inter_size,
-                                        int            memory_hidden_units,
-                                        int            vocab_size,
-                                        CublasDataType data_type);
-
-}  // namespace turbomind
diff --git a/src/turbomind/utils/gemm_test/encoder_gemm_func.cc b/src/turbomind/utils/gemm_test/encoder_gemm_func.cc
deleted file mode 100644
index bca96a7697398c5eedce2010f64b02b0b3ec3df4..0000000000000000000000000000000000000000
--- a/src/turbomind/utils/gemm_test/encoder_gemm_func.cc
+++ /dev/null
@@ -1,573 +0,0 @@
-/*
- * Copyright (c) 2020-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "src/turbomind/utils/gemm_test/encoder_gemm_func.h"
-#include "src/turbomind/macro.h"
-#include <chrono>
-
-namespace turbomind {
-
-template<typename T>
-void generate_encoder_gemm_config(
-    int batch_size, int seq_len, int head_num, int size_per_head, void* buffer_in, bool isAppend, int tensor_para_size)
-{
-    void* cublas_workspace;
-    void* buffer;
-    int   workSpaceSize;
-
-#ifdef ENABLE_BF16
-    if (std::is_same<T, half>::value || std::is_same<T, __nv_bfloat16>::value) {
-#else
-    if (std::is_same<T, half>::value) {
-#endif  // ENABLE_BF16
-        // cublas_workspace_ should be the start pointer of cudaMalloc()
-        // to ensure 16B alignemnet
-        cublas_workspace = buffer_in;
-        buffer           = (void*)((char*)cublas_workspace + CUBLAS_WORKSPACE_SIZE);
-        workSpaceSize    = CUBLAS_WORKSPACE_SIZE;
-    }
-    else {
-        cublas_workspace = nullptr;
-        buffer           = buffer_in;
-        workSpaceSize    = 0;
-    }
-
-    struct cudaDeviceProp prop;
-    check_cuda_error(cudaGetDeviceProperties(&prop, 0));
-    printf("Device %s\n", prop.name);
-
-    // check config
-    FILE* fd;
-    int   line_count = 0;
-    if (!isAppend) {
-        fd = fopen(GEMM_CONFIG, "w+");
-    }
-    else {
-        fd = fopen(GEMM_CONFIG, "a+");
-        std::vector<std::string> config;
-        char                     line[1024];
-        while (fgets(line, 1024, fd) != NULL) {
-            config.push_back(std::string(line));
-        }
-        line_count = config.size();
-        if (config.size() >= (MAX_CONFIG_NUM * GEMM_NUM + 1))  // 6 cublas/cublasLt, first row is not included
-        {
-            int startIdx = config.size() - ((MAX_CONFIG_NUM - 1) * GEMM_NUM);
-            fclose(fd);
-            fd = fopen(GEMM_CONFIG, "w+");
-            fprintf(fd, "%s", config[0].c_str());
-            for (uint i = startIdx; i < config.size(); i++) {
-                fprintf(fd, "%s", config[i].c_str());
-            }
-            line_count = config.size() - (GEMM_NUM + 3);
-        }
-    }
-
-    const int gemm_num = 7;
-    int       M[gemm_num];
-    int       N[gemm_num];
-    int       K[gemm_num];
-    int       batchCount[gemm_num] = {1, 1, 1, 1, 1, 1, 1};
-    char      mess[gemm_num][256];
-    float     exec_times[gemm_num];
-
-    // gemm1
-    M[0] = batch_size * seq_len;
-    K[0] = head_num * size_per_head;
-    N[0] = (head_num / tensor_para_size) * size_per_head;
-    strcpy(mess[0], "from_tensor * weightQ/K/V");
-
-    // gemm2
-    M[1] = M[0];
-    K[1] = head_num * size_per_head;
-    N[1] = 4 * head_num * size_per_head / tensor_para_size;
-    strcpy(mess[1], "attr_output * inter_kernel");
-
-    // gemm3
-    M[2] = M[0];
-    K[2] = 4 * head_num * size_per_head / tensor_para_size;
-    N[2] = head_num * size_per_head;
-    strcpy(mess[2], "inter_matmul * output_kernel");
-
-    M[3]          = seq_len;
-    N[3]          = seq_len;
-    K[3]          = size_per_head;
-    batchCount[3] = batch_size * (head_num / tensor_para_size);
-    strcpy(mess[3], "attention batched Gemm1");
-
-    M[4]          = seq_len;
-    N[4]          = size_per_head;
-    K[4]          = seq_len;
-    batchCount[4] = batch_size * (head_num / tensor_para_size);
-    strcpy(mess[4], "attention batched Gemm2");
-
-    M[5]          = batch_size * seq_len;
-    N[5]          = (head_num / tensor_para_size) * size_per_head;
-    K[5]          = head_num * size_per_head;
-    batchCount[5] = 3;
-    strcpy(mess[5], "from_tensor * weight_QKV in BatchGemm");
-
-    M[6] = batch_size * seq_len;
-    K[6] = (head_num / tensor_para_size) * size_per_head;
-    N[6] = head_num * size_per_head;
-    strcpy(mess[6], "attr * output_kernel");
-
-    cublasHandle_t cublas_handle;
-    check_cuda_error(cublasCreate(&cublas_handle));
-    // cublasLtHandle_t ltHandle;
-    // check_cuda_error(cublasLtCreate(&ltHandle));
-
-    cudaDataType_t AType;
-    cudaDataType_t BType;
-    cudaDataType_t CType;
-    cudaDataType_t computeType;
-    int            startAlgo, endAlgo;
-    const int      ites = 100;
-
-    CublasDataType data_type;
-    if (std::is_same<T, float>::value) {
-        data_type   = FLOAT_DATATYPE;
-        AType       = CUDA_R_32F;
-        BType       = CUDA_R_32F;
-        CType       = CUDA_R_32F;
-        computeType = CUDA_R_32F;
-        startAlgo   = (int)CUBLAS_GEMM_DEFAULT;
-        // endAlgo     = (int)CUBLAS_GEMM_ALGO23;
-        endAlgo     = (int)CUBLAS_GEMM_DEFAULT;
-    }
-    else if (std::is_same<T, half>::value) {
-        data_type   = HALF_DATATYPE;
-        AType       = CUDA_R_16F;
-        BType       = CUDA_R_16F;
-        CType       = CUDA_R_16F;
-        computeType = CUDA_R_16F;
-        // startAlgo   = (int)CUBLAS_GEMM_DEFAULT_TENSOR_OP;
-        // endAlgo     = (int)CUBLAS_GEMM_ALGO15_TENSOR_OP;
-        startAlgo   = (int)CUBLAS_GEMM_DEFAULT;
-        endAlgo     = (int)CUBLAS_GEMM_DEFAULT;
-    }
-#ifdef ENABLE_BF16
-    else if (std::is_same<T, __nv_bfloat16>::value) {
-        data_type   = BFLOAT16_DATATYPE;
-        AType       = CUDA_R_16BF;
-        BType       = CUDA_R_16BF;
-        CType       = CUDA_R_16BF;
-        computeType = CUDA_R_32F;
-        // startAlgo   = (int)CUBLAS_GEMM_DEFAULT_TENSOR_OP;
-        // endAlgo     = (int)CUBLAS_GEMM_ALGO15_TENSOR_OP;
-        startAlgo   = (int)CUBLAS_GEMM_DEFAULT;
-        endAlgo     = (int)CUBLAS_GEMM_DEFAULT;
-    }
-#endif
-    // using scaleT = typename ScaleTypeConverter<T, false>::Type;
-    using scaleT = typename ScaleTypeConverter<T, true>::Type;
-
-    scaleT alpha = (scaleT)1.0f;
-    scaleT beta  = (scaleT)0.0f;
-
-    printf("***Encoder Gemm Testing Begin***\n");
-    printf("***Cublas Gemm Testing Begin***\n");
-    if (line_count == 0) {
-        fprintf(fd,
-                "batch_size, seq_len, head_num, size_per_head dataType ### batchCount, n, m, k, algoId, "
-                "customOption, tile, numSplitsK, swizzle, reductionScheme, workspaceSize, stages, exec_time\n");
-    }
-    for (int i = 0; i < gemm_num; ++i) {
-        // if(i != 0 && i != 5) continue;
-
-        int m = M[i], n = N[i], k = K[i];
-        printf("\n-----------------------------\n");
-        printf("GEMM test %d: [M: %d, K: %d, N: %d] %s\n", i, m, k, n, mess[i]);
-        T* d_A = (T*)buffer;
-        T* d_B = d_A + m * k * batchCount[i];
-        T* d_C = d_B + k * n * batchCount[i];
-
-        // array of pointer for batchedGemm
-        T* harray[12];
-        harray[0]  = (T*)buffer;
-        harray[1]  = (T*)((char*)buffer + sizeof(T) * m * k);
-        harray[2]  = (T*)((char*)buffer + 2 * sizeof(T) * m * k);
-        harray[4]  = (T*)((char*)buffer + 3 * sizeof(T) * m * k);
-        harray[5]  = (T*)((char*)buffer + 3 * sizeof(T) * m * k + sizeof(T) * k * n);
-        harray[6]  = (T*)((char*)buffer + 3 * sizeof(T) * m * k + 2 * sizeof(T) * k * n);
-        harray[8]  = (T*)((char*)buffer + 3 * sizeof(T) * m * k + 3 * sizeof(T) * k * n);
-        harray[9]  = (T*)((char*)buffer + 3 * sizeof(T) * m * k + 3 * sizeof(T) * k * n + sizeof(T) * m * n);
-        harray[10] = (T*)((char*)buffer + 3 * sizeof(T) * m * k + 3 * sizeof(T) * k * n + 2 * sizeof(T) * m * n);
-
-        T** darray = 0;
-        check_cuda_error(cudaMalloc((void**)&darray, sizeof(T*) * 12));
-        cudaMemcpy((void*)darray, (void*)harray, sizeof(T*) * 12, cudaMemcpyHostToDevice);
-        T** dAarray = darray;
-        T** dBarray = darray + 4;
-        T** dCarray = darray + 8;
-
-        float exec_time = 99999.0f;
-        int   fast_algo = 0;
-        for (int algo = startAlgo; algo <= endAlgo; algo++) {
-            cublasStatus_t status;
-            cudaDeviceSynchronize();
-            auto start = std::chrono::high_resolution_clock::now();
-            for (int ite = 0; ite < ites; ++ite) {
-                if (i < 3) {
-                    status = cublasGemmEx(cublas_handle,
-                                          CUBLAS_OP_N,
-                                          CUBLAS_OP_N,
-                                          n,
-                                          m,
-                                          k,
-                                          &alpha,
-                                          d_B,
-                                          BType,
-                                          n,
-                                          d_A,
-                                          AType,
-                                          k,
-                                          &beta,
-                                          d_C,
-                                          CType,
-                                          n,
-                                          computeType,
-                                          static_cast<cublasGemmAlgo_t>(algo));
-                }
-                else if (i == 3) {
-                    status = cublasGemmStridedBatchedEx(cublas_handle,
-                                                        CUBLAS_OP_T,
-                                                        CUBLAS_OP_N,
-                                                        seq_len,
-                                                        seq_len,
-                                                        size_per_head,
-                                                        &alpha,
-                                                        d_B,
-                                                        BType,
-                                                        size_per_head,
-                                                        seq_len * size_per_head,
-                                                        d_A,
-                                                        AType,
-                                                        size_per_head,
-                                                        seq_len * size_per_head,
-                                                        &beta,
-                                                        d_C,
-                                                        CType,
-                                                        seq_len,
-                                                        seq_len * seq_len,
-                                                        batch_size * head_num,
-                                                        computeType,
-                                                        static_cast<cublasGemmAlgo_t>(algo));
-                }
-                else if (i == 4) {
-                    status = cublasGemmStridedBatchedEx(cublas_handle,
-                                                        CUBLAS_OP_N,
-                                                        CUBLAS_OP_N,
-                                                        size_per_head,
-                                                        seq_len,
-                                                        seq_len,
-                                                        &alpha,
-                                                        d_B,
-                                                        BType,
-                                                        size_per_head,
-                                                        seq_len * size_per_head,
-                                                        d_A,
-                                                        AType,
-                                                        seq_len,
-                                                        seq_len * seq_len,
-                                                        &beta,
-                                                        d_C,
-                                                        CType,
-                                                        size_per_head,
-                                                        seq_len * size_per_head,
-                                                        batch_size * head_num,
-                                                        computeType,
-                                                        static_cast<cublasGemmAlgo_t>(algo));
-                }
-                else if (i == 5) {
-                    status = cublasGemmBatchedEx(cublas_handle,
-                                                 CUBLAS_OP_N,
-                                                 CUBLAS_OP_N,
-                                                 n,
-                                                 m,
-                                                 k,
-                                                 &alpha,
-                                                 (const void* const*)dBarray,
-                                                 BType,
-                                                 n,
-                                                 (const void* const*)dAarray,
-                                                 AType,
-                                                 k,
-                                                 &beta,
-                                                 (void* const*)dCarray,
-                                                 CType,
-                                                 n,
-                                                 3,
-                                                 computeType,
-                                                 static_cast<cublasGemmAlgo_t>(algo));
-                }
-                if (status != CUBLAS_STATUS_SUCCESS) {
-                    break;
-                }
-            }
-            cudaDeviceSynchronize();
-            auto end = std::chrono::high_resolution_clock::now();
-            auto dur = std::chrono::duration<float, std::milli>(end - start);
-            if (status == CUBLAS_STATUS_SUCCESS) {
-                printf("algo_%d costs %.3fms \n", algo, dur.count() / ites);
-                if (dur.count() / ites < exec_time) {
-                    exec_time = dur.count() / ites;
-                    fast_algo = algo;
-                }
-            }
-        }
-        printf("fast_algo %d costs %.3f ms\n", fast_algo, exec_time);
-
-        // for fp16 and bf16, we compare cublasLt
-        if (i < 3 && data_type != FLOAT_DATATYPE) {
-            printf("***cublasLt Gemm Testing Begin***\n");
-            // Let try a fixed number of combinations
-            const int          ALGO_COMBINATIONS = 5000;
-            customMatmulPerf_t perfResults[ALGO_COMBINATIONS];
-            // LtHgemmCustomFind<T, scaleT>(ltHandle,
-            //                              batch_size,
-            //                              seq_len,
-            //                              head_num,
-            //                              size_per_head,
-            //                              n,
-            //                              m,
-            //                              k,
-            //                              &alpha,
-            //                              d_B,
-            //                              d_A,
-            //                              &beta,
-            //                              d_C,
-            //                              cublas_workspace,
-            //                              workSpaceSize,
-            //                              fd,
-            //                              perfResults,
-            //                              ALGO_COMBINATIONS);
-            // if (perfResults[0].time < exec_time) {
-            //     printPerfStructure(
-            //         batch_size, seq_len, head_num, size_per_head, n, m, k, perfResults[0], fd, data_type, 0);
-            //     exec_time = perfResults[0].time;
-            // }
-            // else {
-            {
-                fprintf(fd,
-                        "%d %d %d %d %d ### %d %d %d %d %d -1 -1 -1 -1 -1 -1 -1 "
-#if (CUBLAS_VER_MAJOR == 11 && CUBLAS_VER_MINOR == 11 && CUBLAS_VER_PATCH >= 3)
-                        "-1 -1 "
-#elif (CUBLAS_VER_MAJOR == 11 && CUBLAS_VER_MINOR == 11 && CUBLAS_VER_PATCH < 3)
-                        "-1 -1 -1 "
-#endif
-                        "%f\n",
-                        batch_size,
-                        seq_len,
-                        head_num,
-                        size_per_head,
-                        data_type,
-                        batchCount[i],
-                        n,
-                        m,
-                        k,
-                        fast_algo,
-                        exec_time);
-            }
-            printf("***cublasLt Gemm Testing End***\n");
-        }
-        else {
-            fprintf(fd,
-                    "%d %d %d %d %d ### %d %d %d %d %d -1 -1 -1 -1 -1 -1 -1 "
-#if (CUBLAS_VER_MAJOR == 11 && CUBLAS_VER_MINOR == 11 && CUBLAS_VER_PATCH >= 3)
-                    "-1 -1 "
-#elif (CUBLAS_VER_MAJOR == 11 && CUBLAS_VER_MINOR == 11 && CUBLAS_VER_PATCH < 3)
-                    "-1 -1 -1 "
-#endif
-                    "%f\n",
-                    batch_size,
-                    seq_len,
-                    head_num,
-                    size_per_head,
-                    data_type,
-                    batchCount[i],
-                    n,
-                    m,
-                    k,
-                    fast_algo,
-                    exec_time);
-        }
-        exec_times[i] = exec_time;
-        cudaFree(darray);
-    }
-    printf("***cublas Gemm Testing End***\n\n");
-    fclose(fd);
-    printf("***Encoder Gemm Testing End***\n");
-
-#ifdef SPARSITY_ENABLED
-    bool do_sparse_test = false;
-    if (prop.major == 8 && (prop.minor == 0 || prop.minor == 6)) {
-        do_sparse_test = true;
-    }
-    if (do_sparse_test && sizeof(T) == sizeof(half)) {
-        printf("***cusparseLt Gemm Testing Begin***\n");
-        // only first 3 cases can be sparse
-        const int spgemm_num = 3;
-        if (!isAppend) {
-            fd = fopen(SPGEMM_CONFIG, "w+");
-        }
-        else {
-            fd = fopen(SPGEMM_CONFIG, "a+");
-            std::vector<std::string> config;
-            char                     line[1024];
-            while (fgets(line, 1024, fd) != NULL) {
-                config.push_back(std::string(line));
-            }
-            line_count = config.size();
-            if (config.size() >= (MAX_CONFIG_NUM * spgemm_num + 1))  // 6 cublas/cublasLt, first row is not included
-            {
-                int startIdx = config.size() - ((MAX_CONFIG_NUM - 1) * spgemm_num);
-                fclose(fd);
-                fd = fopen(SPGEMM_CONFIG, "w+");
-                fprintf(fd, "%s", config[0].c_str());
-                for (uint i = startIdx; i < config.size(); i++) {
-                    fprintf(fd, "%s", config[i].c_str());
-                }
-                line_count = config.size() - (spgemm_num + 3);
-            }
-        }
-        if (line_count == 0) {
-            fprintf(
-                fd,
-                "batch_size, seq_len, head_num, size_per_head dataType ### batchCount, m, n, k, algoId, exec_time\n");
-        }
-        cusparseLtHandle_t handle;
-        CHECK_CUSPARSE(cusparseLtInit(&handle));
-        cusparseOrder_t     order        = CUSPARSE_ORDER_COL;
-        cusparseOperation_t opA          = CUSPARSE_OPERATION_NON_TRANSPOSE;
-        cusparseOperation_t opB          = CUSPARSE_OPERATION_NON_TRANSPOSE;
-        cusparseComputeType compute_type = CUSPARSE_COMPUTE_16F;
-        unsigned            alignment    = 16;
-        cudaStream_t        stream       = 0;
-        float               alpha2       = 1.0f;
-        float               beta2        = 0.0f;
-        for (int i = 0; i < spgemm_num; ++i) {
-            // to be compatible with spgemm wrapper, we let A be the weight matrix
-            // so m and n are swapped
-            // A: mxk B: kxn C:mxn
-            int m = N[i], n = M[i], k = K[i];
-            printf("\n-----------------------------\n");
-            printf("GEMM test %d: [M: %d, K: %d, N: %d]\n", i, m, k, n);
-            T* d_A = (T*)buffer;
-            T* d_B = d_A + m * k * batchCount[i];
-            T* d_C = d_B + k * n * batchCount[i];
-            T* dA_compressed;
-            {
-                cusparseLtMatDescriptor_t mat_A;
-                CHECK_CUSPARSE(cusparseLtStructuredDescriptorInit(
-                    &handle, &mat_A, m, k, m, alignment, CUDA_R_16F, order, CUSPARSELT_SPARSITY_50_PERCENT))
-                CHECK_CUSPARSE(
-                    cusparseLtSpMMAPrune2(&handle, &mat_A, true, opA, d_A, d_A, CUSPARSELT_PRUNE_SPMMA_STRIP, stream))
-                size_t compressed_size;
-                CHECK_CUSPARSE(cusparseLtSpMMACompressedSize2(&handle, &mat_A, &compressed_size))
-                check_cuda_error(cudaMalloc((void**)&dA_compressed, compressed_size));
-                CHECK_CUSPARSE(cusparseLtSpMMACompress2(&handle, &mat_A, true, opA, d_A, dA_compressed, stream))
-            }
-
-            float exec_time = 99999.0f;
-            int   fast_algo = 0;
-            for (int alg = 0; alg < 4; ++alg) {
-                cudaDeviceSynchronize();
-                cusparseLtMatDescriptor_t mat_A, mat_B, mat_C;
-                void*                     d_workspace = nullptr;
-                int                       num_streams = 1;
-                cudaStream_t              streams[1]  = {stream};
-                CHECK_CUSPARSE(cusparseLtStructuredDescriptorInit(
-                    &handle, &mat_A, m, k, m, alignment, CUDA_R_16F, order, CUSPARSELT_SPARSITY_50_PERCENT))
-                CHECK_CUSPARSE(cusparseLtDenseDescriptorInit(&handle, &mat_B, k, n, k, alignment, CUDA_R_16F, order))
-                CHECK_CUSPARSE(cusparseLtDenseDescriptorInit(&handle, &mat_C, m, n, m, alignment, CUDA_R_16F, order))
-                auto start = std::chrono::high_resolution_clock::now();
-                for (int ite = 0; ite < ites; ++ite) {
-                    // initializing MatDesc takes a lot of time
-                    // and these descs can be stored to other place
-                    // whereas storing MatMulPlan to other place will cause errors
-                    cusparseLtMatmulDescriptor_t   matmul;
-                    cusparseLtMatmulAlgSelection_t alg_sel;
-                    cusparseLtMatmulPlan_t         plan;
-                    CHECK_CUSPARSE(cusparseLtMatmulDescriptorInit(
-                        &handle, &matmul, opA, opB, &mat_A, &mat_B, &mat_C, &mat_C, compute_type))
-                    CHECK_CUSPARSE(
-                        cusparseLtMatmulAlgSelectionInit(&handle, &alg_sel, &matmul, CUSPARSELT_MATMUL_ALG_DEFAULT))
-                    CHECK_CUSPARSE(cusparseLtMatmulAlgSetAttribute(
-                        &handle, &alg_sel, CUSPARSELT_MATMUL_ALG_CONFIG_ID, &alg, sizeof(alg)))
-                    size_t workspace_size;
-                    CHECK_CUSPARSE(cusparseLtMatmulGetWorkspace(&handle, &alg_sel, &workspace_size))
-                    CHECK_CUSPARSE(cusparseLtMatmulPlanInit(&handle, &plan, &matmul, &alg_sel, workspace_size))
-                    CHECK_CUSPARSE(cusparseLtMatmul(&handle,
-                                                    &plan,
-                                                    &alpha2,
-                                                    dA_compressed,
-                                                    d_B,
-                                                    &beta2,
-                                                    d_C,
-                                                    d_C,
-                                                    d_workspace,
-                                                    streams,
-                                                    num_streams))
-                    CHECK_CUSPARSE(cusparseLtMatmulPlanDestroy(&plan))
-                }
-                cudaDeviceSynchronize();
-                auto end = std::chrono::high_resolution_clock::now();
-                auto dur = std::chrono::duration<float, std::milli>(end - start);
-                printf("algo_%d costs %.3fms \n", alg, dur.count() / ites);
-                if (dur.count() < exec_time) {
-                    exec_time = dur.count();
-                    fast_algo = alg;
-                }
-            }
-            exec_time /= ites;
-            if (exec_time >= exec_times[i]) {
-                fast_algo = -1;
-            }
-            printf("fast_algo %d\n", fast_algo);
-            fprintf(fd,
-                    "%d %d %d %d %d ### %d %d %d %d %d %f\n",
-                    batch_size,
-                    seq_len,
-                    head_num,
-                    size_per_head,
-                    HALF_DATATYPE,
-                    batchCount[i],
-                    m,
-                    n,
-                    k,
-                    fast_algo,
-                    exec_time);
-            cudaFree(dA_compressed);
-        }
-        CHECK_CUSPARSE(cusparseLtDestroy(&handle))
-        fclose(fd);
-        printf("***cusparseLt Gemm Testing End***\n");
-    }
-#endif
-    return;
-}
-
-template void generate_encoder_gemm_config<float>(
-    int batch_size, int seq_len, int head_num, int size_per_head, void* buffer, bool isAppend, int tensor_para_size);
-template void generate_encoder_gemm_config<half>(
-    int batch_size, int seq_len, int head_num, int size_per_head, void* buffer, bool isAppend, int tensor_para_size);
-#ifdef ENABLE_BF16
-template void generate_encoder_gemm_config<__nv_bfloat16>(
-    int batch_size, int seq_len, int head_num, int size_per_head, void* buffer, bool isAppend, int tensor_para_size);
-#endif
-
-}  // namespace turbomind
diff --git a/src/turbomind/utils/gemm_test/encoder_gemm_func.h b/src/turbomind/utils/gemm_test/encoder_gemm_func.h
deleted file mode 100644
index 35c62ca7715a54338dfd492bb4b989934efcbe3c..0000000000000000000000000000000000000000
--- a/src/turbomind/utils/gemm_test/encoder_gemm_func.h
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Copyright (c) 2020-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "src/turbomind/utils/cublasAlgoMap.h"
-#include "src/turbomind/utils/cuda_bf16_wrapper.h"
-#include "src/turbomind/utils/cuda_utils.h"
-#include "src/turbomind/utils/gemm_test/gemm_func.h"
-
-#include <cstdio>
-#include <cstdlib>
-#include <ctime>
-#include <cuda_fp16.h>
-#include <cuda_profiler_api.h>
-#include <map>
-#ifdef __linux__
-#include <sys/time.h>
-#include <unistd.h>
-#endif
-#include <vector>
-
-namespace turbomind {
-
-template<typename T>
-void generate_encoder_gemm_config(int   batch_size,
-                                  int   seq_len,
-                                  int   head_num,
-                                  int   size_per_head,
-                                  void* buffer,
-                                  bool  isAppend         = true,
-                                  int   tensor_para_size = 1);
-
-}  // namespace turbomind
diff --git a/src/turbomind/utils/gemm_test/encoder_igemm_func.cc b/src/turbomind/utils/gemm_test/encoder_igemm_func.cc
deleted file mode 100644
index 1585e5f821edd4681254125c7dd491f3f5f74f6f..0000000000000000000000000000000000000000
--- a/src/turbomind/utils/gemm_test/encoder_igemm_func.cc
+++ /dev/null
@@ -1,1334 +0,0 @@
-/*
- * Copyright (c) 2020-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "encoder_igemm_func.h"
-#include "src/turbomind/macro.h"
-#include <chrono>
-
-#ifndef CUDART_VERSION
-#error CUDART_VERSION Undefined!
-#endif
-
-namespace turbomind {
-
-int batch_size_;
-int seq_len_;
-int head_num_;
-int size_per_head_;
-
-static const char* showStatus(cublasStatus_t error)
-{
-    switch (error) {
-        case CUBLAS_STATUS_SUCCESS:
-            return "CUBLAS_STATUS_SUCCESS";
-
-        case CUBLAS_STATUS_NOT_INITIALIZED:
-            return "CUBLAS_STATUS_NOT_INITIALIZED";
-
-        case CUBLAS_STATUS_ALLOC_FAILED:
-            return "CUBLAS_STATUS_ALLOC_FAILED";
-
-        case CUBLAS_STATUS_INVALID_VALUE:
-            return "CUBLAS_STATUS_INVALID_VALUE";
-
-        case CUBLAS_STATUS_ARCH_MISMATCH:
-            return "CUBLAS_STATUS_ARCH_MISMATCH";
-
-        case CUBLAS_STATUS_MAPPING_ERROR:
-            return "CUBLAS_STATUS_MAPPING_ERROR";
-
-        case CUBLAS_STATUS_EXECUTION_FAILED:
-            return "CUBLAS_STATUS_EXECUTION_FAILED";
-
-        case CUBLAS_STATUS_INTERNAL_ERROR:
-            return "CUBLAS_STATUS_INTERNAL_ERROR";
-
-        case CUBLAS_STATUS_NOT_SUPPORTED:
-            return "CUBLAS_STATUS_NOT_SUPPORTED";
-
-        case CUBLAS_STATUS_LICENSE_ERROR:
-            return "CUBLAS_STATUS_LICENSE_ERROR";
-    }
-
-    return "<unknown>";
-}
-
-// Utility function to print customMatmulPerf_t structure
-int printPerfStructure(int m, int n, int k, const customMatmulPerf_t& perf, FILE* fout, int hasPrint)
-{
-    int algoId, tile, swizzle, customOption, numSplitsK, reductionScheme, stages;
-
-    const cublasLtMatmulAlgo_t* matmulAlgo = &perf.algo;
-    cublasLtMatmulAlgoConfigGetAttribute(matmulAlgo, CUBLASLT_ALGO_CONFIG_ID, &algoId, sizeof(algoId), NULL);
-    cublasLtMatmulAlgoConfigGetAttribute(matmulAlgo, CUBLASLT_ALGO_CONFIG_TILE_ID, &tile, sizeof(tile), NULL);
-    cublasLtMatmulAlgoConfigGetAttribute(
-        matmulAlgo, CUBLASLT_ALGO_CONFIG_SPLITK_NUM, &numSplitsK, sizeof(numSplitsK), NULL);
-    cublasLtMatmulAlgoConfigGetAttribute(
-        matmulAlgo, CUBLASLT_ALGO_CONFIG_REDUCTION_SCHEME, &reductionScheme, sizeof(reductionScheme), NULL);
-    cublasLtMatmulAlgoConfigGetAttribute(
-        matmulAlgo, CUBLASLT_ALGO_CONFIG_CTA_SWIZZLING, &swizzle, sizeof(swizzle), NULL);
-    cublasLtMatmulAlgoConfigGetAttribute(
-        matmulAlgo, CUBLASLT_ALGO_CONFIG_CUSTOM_OPTION, &customOption, sizeof(customOption), NULL);
-#if (CUDART_VERSION >= 11000)
-    cublasLtMatmulAlgoConfigGetAttribute(matmulAlgo, CUBLASLT_ALGO_CONFIG_STAGES_ID, &stages, sizeof(stages), NULL);
-#else
-    stages                     = 0;
-#endif
-
-    printf("algo={ Id=%d, tileIdx=%d (%s) splitK=%d reduc=%d swizzle=%d custom=%d stages=%d} status %d "
-           "time %f workspace=%d mathMode=%d waves=%f\n",
-           algoId,
-           tile,
-           matmulTileName[tile],
-           numSplitsK,
-           reductionScheme,
-           swizzle,
-           customOption,
-           stages,
-           perf.status,
-           perf.time,
-           (int)perf.workspaceSize,
-           (int)perf.mathMode,
-           perf.wavesCount);
-
-    // chose the fastest algo that does not need workspace
-    if ((int)perf.workspaceSize == 0 && hasPrint == 0) {
-        fprintf(fout,
-                "%d %d %d %d %d ### 1 %d %d %d %d %d %d %d %d %d %d %d %f\n",
-                batch_size_,
-                seq_len_,
-                head_num_,
-                size_per_head_,
-                INT8_DATATYPE,
-                m,
-                n,
-                k,
-                algoId,
-                customOption,
-                tile,
-                numSplitsK,
-                swizzle,
-                reductionScheme,
-                (int)perf.workspaceSize,
-                stages,
-                perf.time);
-        return 1;
-    }
-    else {
-        return hasPrint;
-    }
-}
-
-int printBatchPerfStructure(
-    int batchCount, int m, int n, int k, const customMatmulPerf_t& perf, FILE* fout, int hasPrint)
-{
-    int algoId, tile, swizzle, customOption, numSplitsK, reductionScheme, stages;
-
-    const cublasLtMatmulAlgo_t* matmulAlgo = &perf.algo;
-    cublasLtMatmulAlgoConfigGetAttribute(matmulAlgo, CUBLASLT_ALGO_CONFIG_ID, &algoId, sizeof(algoId), NULL);
-    cublasLtMatmulAlgoConfigGetAttribute(matmulAlgo, CUBLASLT_ALGO_CONFIG_TILE_ID, &tile, sizeof(tile), NULL);
-    cublasLtMatmulAlgoConfigGetAttribute(
-        matmulAlgo, CUBLASLT_ALGO_CONFIG_SPLITK_NUM, &numSplitsK, sizeof(numSplitsK), NULL);
-    cublasLtMatmulAlgoConfigGetAttribute(
-        matmulAlgo, CUBLASLT_ALGO_CONFIG_REDUCTION_SCHEME, &reductionScheme, sizeof(reductionScheme), NULL);
-    cublasLtMatmulAlgoConfigGetAttribute(
-        matmulAlgo, CUBLASLT_ALGO_CONFIG_CTA_SWIZZLING, &swizzle, sizeof(swizzle), NULL);
-    cublasLtMatmulAlgoConfigGetAttribute(
-        matmulAlgo, CUBLASLT_ALGO_CONFIG_CUSTOM_OPTION, &customOption, sizeof(customOption), NULL);
-#if (CUDART_VERSION >= 11000)
-    cublasLtMatmulAlgoConfigGetAttribute(matmulAlgo, CUBLASLT_ALGO_CONFIG_STAGES_ID, &stages, sizeof(stages), NULL);
-#else
-    stages                     = 0;
-#endif
-
-    printf("algo={ Id=%d, tileIdx=%d (%s) splitK=%d reduc=%d swizzle=%d custom=%d stages=%d} status %d "
-           "time %f workspace=%d mathMode=%d waves=%f\n",
-           algoId,
-           tile,
-           matmulTileName[tile],
-           numSplitsK,
-           reductionScheme,
-           swizzle,
-           customOption,
-           stages,
-           perf.status,
-           perf.time,
-           (int)perf.workspaceSize,
-           (int)perf.mathMode,
-           perf.wavesCount);
-
-    // chose the fastest algo that does not need workspace
-    if ((int)perf.workspaceSize == 0 && hasPrint == 0) {
-        fprintf(fout,
-                "%d %d %d %d %d ### %d %d %d %d %d %d %d %d %d %d %d %d %f\n",
-                batch_size_,
-                seq_len_,
-                head_num_,
-                size_per_head_,
-                INT8_DATATYPE,
-                batchCount,
-                m,
-                n,
-                k,
-                algoId,
-                customOption,
-                tile,
-                numSplitsK,
-                swizzle,
-                reductionScheme,
-                (int)perf.workspaceSize,
-                stages,
-                perf.time);
-        return 1;
-    }
-    else {
-        return hasPrint;
-    }
-}
-
-static inline bool time_compare(const customMatmulPerf_t& perf_a, const customMatmulPerf_t& perf_b)
-{
-    return ((perf_a.status == CUBLAS_STATUS_SUCCESS) && (perf_a.time < perf_b.time));
-}
-
-static cublasStatus_t customMatmulRun(cublasLtHandle_t            ltHandle,  // to get the capabilities (required a GPU)
-                                      cublasLtMatmulDesc_t        operationDesc,
-                                      const void*                 alpha, /* host or device pointer */
-                                      const void*                 A,
-                                      cublasLtMatrixLayout_t      Adesc,
-                                      const void*                 B,
-                                      cublasLtMatrixLayout_t      Bdesc,
-                                      const void*                 beta, /* host or device pointer */
-                                      const void*                 C,
-                                      cublasLtMatrixLayout_t      Cdesc,
-                                      void*                       D,
-                                      cublasLtMatrixLayout_t      Ddesc,
-                                      const cublasLtMatmulAlgo_t& algo,
-                                      int                         kernelRepeats,
-                                      void*                       workSpace,
-                                      size_t                      workSpaceSizeInBytes,
-                                      customMatmulPerf_t&         perfResults,
-                                      cudaStream_t                stream)
-{
-    cublasLtMatmulHeuristicResult_t heurResult;
-    /* Looping over the Algo */
-    int            repeats = kernelRepeats;
-    cublasStatus_t algoStatus =
-        cublasLtMatmulAlgoCheck(ltHandle, operationDesc, Adesc, Bdesc, Cdesc, Ddesc, &algo, &heurResult);
-    if (algoStatus == CUBLAS_STATUS_SUCCESS) {
-        if (heurResult.workspaceSize <= workSpaceSizeInBytes) {
-            cublasStatus_t oneRunStatus;
-            cudaDeviceSynchronize();
-            auto start = std::chrono::high_resolution_clock::now();
-            for (int loop = 0; loop < repeats; loop++) {
-                // oneRunStatus = cublasLtMatmul(ltHandle,
-                //                               operationDesc,
-                //                               alpha,
-                //                               A,
-                //                               Adesc,
-                //                               B,
-                //                               Bdesc,
-                //                               beta,
-                //                               C,
-                //                               Cdesc,
-                //                               D,
-                //                               Ddesc,
-                //                               &algo,
-                //                               workSpace,
-                //                               workSpaceSizeInBytes,
-                //                               stream);
-            }
-            cudaDeviceSynchronize();
-            auto end = std::chrono::high_resolution_clock::now();
-            auto dur = std::chrono::duration<float, std::milli>(end - start);
-            if (oneRunStatus != CUBLAS_STATUS_SUCCESS) {
-                algoStatus = oneRunStatus;
-            }
-            float time = dur.count();
-            // For the moment only add successful findings
-            if (algoStatus == CUBLAS_STATUS_SUCCESS) {
-                perfResults.algo          = algo;
-                perfResults.time          = time / repeats;
-                perfResults.workspaceSize = heurResult.workspaceSize;
-                perfResults.wavesCount    = heurResult.wavesCount;
-            }
-        }
-        else {
-            // printf("not enough workspace! %ld\n", heurResult.workspaceSize);
-            algoStatus = CUBLAS_STATUS_NOT_SUPPORTED;  // Not enough workspace
-        }
-    }
-    else {
-        // printf("check fail!\n");
-    }
-    return algoStatus;
-}
-
-// Sample wrapper running through multiple algo and config attributes combination for INT8 gemm using cublasLt low-level
-// API
-template<typename T, typename scaleT>
-int LtIgemmCustomFind(cublasLtHandle_t ltHandle,
-                      int              m,
-                      int              n,
-                      int              k,
-                      const scaleT*    alpha, /* host pointer */
-                      const int8_t*    A,
-                      const int8_t*    B,
-                      const scaleT*    beta, /* host pointer */
-                      T*               C,
-                      void*            workSpace,
-                      size_t           workSpaceSize,
-                      FILE*            fout)
-{
-    cublasStatus_t status = CUBLAS_STATUS_SUCCESS;
-
-    cublasLtMatmulDesc_t   operationDesc = NULL;
-    cublasLtMatrixLayout_t Adesc = NULL, Bdesc = NULL, Cdesc = NULL;
-    cudaStream_t           stream = 0;
-    // SplitK value that we are going to try when SplitK is supported for a given algo
-    const int splitKSequenceA[] = {2, 3, 4, 5, 6, 8, 12, 16, 32};
-    // Let try a fixed number of combinations
-#define ALGO_COMBINATIONS 50000
-    int                AlgoCombinations = ALGO_COMBINATIONS;
-    int                AlgoCount        = 0;
-    int                kernelRepeats    = 100;  // number of time the CUDA kernels will be run back to back
-    customMatmulPerf_t perfResults[ALGO_COMBINATIONS];
-    int                nbAlgoIds = 0;
-#define ALGO_IDS 100
-    int algoIdA[ALGO_IDS];
-
-    cudaDataType_t Atype, Btype, Ctype, scaleType;
-    Atype = CUDA_R_8I;
-    Btype = CUDA_R_8I;
-
-    if (std::is_same<T, int32_t>::value && std::is_same<scaleT, int>::value) {
-        Ctype     = CUDA_R_32I;
-        scaleType = CUDA_R_32I;
-    }
-    else if (std::is_same<T, int8_t>::value && std::is_same<scaleT, float>::value) {
-        Ctype     = CUDA_R_8I;
-        scaleType = CUDA_R_32F;
-    }
-    else {
-        printf("[ERROR]<T,scaleT> of igemm is invalid\n");
-        exit(-1);
-    }
-
-#if (CUDART_VERSION >= 11000)
-    cublasComputeType_t computeType = CUBLAS_COMPUTE_32I;
-#else
-    cudaDataType_t computeType = CUDA_R_32I;
-#endif
-    cublasOperation_t opTranspose = CUBLAS_OP_T;
-
-    bool use_ORDER_COL32_2R_4R4 = false;
-#if (CUDART_VERSION >= 11000)
-    int device{-1};
-    cudaGetDevice(&device);
-    cudaDeviceProp props;
-    cudaGetDeviceProperties(&props, device);
-    if (props.major * 10 + props.minor >= 80) {
-        use_ORDER_COL32_2R_4R4 = true;
-    }
-#endif
-    cublasLtOrder_t order_COL32 = CUBLASLT_ORDER_COL32;
-    cublasLtOrder_t order_matrixB;
-#if (CUDART_VERSION >= 11000)
-    if (use_ORDER_COL32_2R_4R4) {
-        order_matrixB = CUBLASLT_ORDER_COL32_2R_4R4;
-    }
-    else {
-        order_matrixB = CUBLASLT_ORDER_COL4_4R2_8C;
-    }
-#else
-    order_matrixB              = CUBLASLT_ORDER_COL4_4R2_8C;
-#endif
-
-    int ldaTransform = 32 * m;
-    int ldbTransform;
-    if (use_ORDER_COL32_2R_4R4) {
-        ldbTransform = 32 * ((n + 32 - 1) / 32) * 32;
-    }
-    else {
-        ldbTransform = 32 * ((n + 8 - 1) / 8) * 8;
-    }
-
-    int ldcTransform = 32 * m;
-
-#if (CUDART_VERSION >= 11000)
-    status = cublasLtMatmulDescCreate(&operationDesc, computeType, scaleType);
-#else
-    status                     = cublasLtMatmulDescCreate(&operationDesc, scaleType);
-#endif
-    if (status != CUBLAS_STATUS_SUCCESS) {
-        goto CLEANUP;
-    }
-    cublasLtMatmulDescSetAttribute(operationDesc, CUBLASLT_MATMUL_DESC_TRANSB, &opTranspose, sizeof(cublasOperation_t));
-
-    // Create matrix descriptors.
-    status = cublasLtMatrixLayoutCreate(&Adesc, Atype, m, k, ldaTransform);
-    if (status != CUBLAS_STATUS_SUCCESS) {
-        goto CLEANUP;
-    }
-    status = cublasLtMatrixLayoutSetAttribute(Adesc, CUBLASLT_MATRIX_LAYOUT_ORDER, &order_COL32, sizeof(order_COL32));
-    if (status != CUBLAS_STATUS_SUCCESS) {
-        goto CLEANUP;
-    }
-    status = cublasLtMatrixLayoutCreate(&Bdesc, Btype, n, k, ldbTransform);
-    if (status != CUBLAS_STATUS_SUCCESS) {
-        goto CLEANUP;
-    }
-    status =
-        cublasLtMatrixLayoutSetAttribute(Bdesc, CUBLASLT_MATRIX_LAYOUT_ORDER, &order_matrixB, sizeof(order_matrixB));
-    if (status != CUBLAS_STATUS_SUCCESS) {
-        goto CLEANUP;
-    }
-    status = cublasLtMatrixLayoutCreate(&Cdesc, Ctype, m, n, ldcTransform);
-    if (status != CUBLAS_STATUS_SUCCESS) {
-        goto CLEANUP;
-    }
-    status = cublasLtMatrixLayoutSetAttribute(Cdesc, CUBLASLT_MATRIX_LAYOUT_ORDER, &order_COL32, sizeof(order_COL32));
-    if (status != CUBLAS_STATUS_SUCCESS) {
-        goto CLEANUP;
-    }
-
-    // Request AlgoId available for IGEMM
-    status = cublasLtMatmulAlgoGetIds(
-        ltHandle, computeType, scaleType, Atype, Btype, Ctype, Ctype, ALGO_IDS, algoIdA, &nbAlgoIds);
-    if (status != CUBLAS_STATUS_SUCCESS) {
-        goto CLEANUP;
-    }
-
-    // Loop over the Algo IDs
-    for (int idx = 0; (idx < nbAlgoIds) && (AlgoCount < AlgoCombinations); idx++) {
-        cublasLtMatmulAlgo_t algo;
-        size_t               sizeWritten = 0;
-        /* Initialize algo structure with given Algp ID */
-        status =
-            cublasLtMatmulAlgoInit(ltHandle, computeType, scaleType, Atype, Btype, Ctype, Ctype, algoIdA[idx], &algo);
-        if (status != CUBLAS_STATUS_SUCCESS) {
-            continue;
-        }
-        // Query the tiles enums supported by that algo
-        cublasLtMatmulAlgoCapGetAttribute(&algo, CUBLASLT_ALGO_CAP_TILE_IDS, NULL, 0, &sizeWritten);
-        int  nbTiles = int(sizeWritten / sizeof(int));
-        int* tileA   = new int[nbTiles == 0 ? 1 : nbTiles];
-        if (nbTiles == 0) {
-            tileA[0] = CUBLASLT_MATMUL_TILE_UNDEFINED;
-            nbTiles  = 1;
-        }
-#if (CUDART_VERSION >= 11000)
-        cublasLtMatmulAlgoCapGetAttribute(&algo, CUBLASLT_ALGO_CAP_STAGES_IDS, NULL, 0, &sizeWritten);
-        int              nbStages = int(sizeWritten / sizeof(int));
-        std::vector<int> stagesA(nbStages == 0 ? 1 : nbStages);
-        if (nbStages == 0) {
-            stagesA[0] = CUBLASLT_MATMUL_STAGES_UNDEFINED;
-            nbStages   = 1;
-        }
-        else {
-            cublasLtMatmulAlgoCapGetAttribute(
-                &algo, CUBLASLT_ALGO_CAP_STAGES_IDS, stagesA.data(), sizeof(int) * nbStages, &sizeWritten);
-        }
-#endif
-        int splitkSupport, redMask, swizzlingMax, customOptionMax;
-        // Retrieve Algo Capabilities attributes to be able to setup loop over the different combinations
-        cublasLtMatmulAlgoCapGetAttribute(
-            &algo, CUBLASLT_ALGO_CAP_TILE_IDS, tileA, sizeof(int) * nbTiles, &sizeWritten);
-        cublasLtMatmulAlgoCapGetAttribute(
-            &algo, CUBLASLT_ALGO_CAP_SPLITK_SUPPORT, &splitkSupport, sizeof(splitkSupport), &sizeWritten);
-        cublasLtMatmulAlgoCapGetAttribute(
-            &algo, CUBLASLT_ALGO_CAP_REDUCTION_SCHEME_MASK, &redMask, sizeof(redMask), &sizeWritten);
-        cublasLtMatmulAlgoCapGetAttribute(
-            &algo, CUBLASLT_ALGO_CAP_CTA_SWIZZLING_SUPPORT, &swizzlingMax, sizeof(swizzlingMax), &sizeWritten);
-        cublasLtMatmulAlgoCapGetAttribute(
-            &algo, CUBLASLT_ALGO_CAP_CUSTOM_OPTION_MAX, &customOptionMax, sizeof(customOptionMax), &sizeWritten);
-        /* Loop over the different tiles */
-        for (int tileIdx = 0; tileIdx < nbTiles; tileIdx++) {
-#if (CUDART_VERSION >= 11000)
-            /* Loop over different stages count */
-            for (int stagesIdx = 0; stagesIdx < nbStages; stagesIdx++) {
-                cublasLtMatmulAlgoConfigSetAttribute(
-                    &algo, CUBLASLT_ALGO_CONFIG_STAGES_ID, &stagesA[stagesIdx], sizeof(stagesA[stagesIdx]));
-#endif
-                /* Loop over the different custom option if any */
-                for (int customOption = 0; customOption <= customOptionMax; customOption++) {
-                    cublasLtMatmulAlgoConfigSetAttribute(
-                        &algo, CUBLASLT_ALGO_CONFIG_CUSTOM_OPTION, &customOption, sizeof(customOption));
-                    /* Loop over the CTAs swizzling support */
-                    for (int k = 0; k <= swizzlingMax; k++) {
-                        int splitK_trial = 0;
-                        if (splitkSupport) {
-                            splitK_trial += sizeof(splitKSequenceA) / sizeof(splitKSequenceA[0]);
-                        }
-                        // Loop over the splitK value over a fixed sequence splitKSequenceA in addition to the case
-                        // where splitK is not enabled
-                        for (int l = 0; (l < (1 + splitK_trial)) && (AlgoCount < AlgoCombinations); l++) {
-                            /* Setup attribute of the algo to run */
-                            cublasLtMatmulAlgoConfigSetAttribute(
-                                &algo, CUBLASLT_ALGO_CONFIG_TILE_ID, &tileA[tileIdx], sizeof(tileA[tileIdx]));
-                            int splitK_val = 0;
-                            int redScheme  = CUBLASLT_REDUCTION_SCHEME_NONE;
-                            cublasLtMatmulAlgoConfigSetAttribute(
-                                &algo, CUBLASLT_ALGO_CONFIG_SPLITK_NUM, &splitK_val, sizeof(splitK_val));
-                            cublasLtMatmulAlgoConfigSetAttribute(
-                                &algo, CUBLASLT_ALGO_CONFIG_CTA_SWIZZLING, &k, sizeof(k));
-                            cublasLtMatmulAlgoConfigSetAttribute(
-                                &algo, CUBLASLT_ALGO_CONFIG_REDUCTION_SCHEME, &redScheme, sizeof(int));
-
-                            if (l > 0) {  // Split-K case
-                                splitK_val = splitKSequenceA[l - 1];
-                                cublasLtMatmulAlgoConfigSetAttribute(&algo,
-                                                                     CUBLASLT_ALGO_CONFIG_SPLITK_NUM,
-                                                                     &splitKSequenceA[l - 1],
-                                                                     sizeof(splitKSequenceA[l - 1]));
-                                /* Going over all the reduction scheme  */
-                                for (redScheme = 1;
-                                     redScheme <= (int)CUBLASLT_REDUCTION_SCHEME_MASK && (AlgoCount < AlgoCombinations);
-                                     redScheme = redScheme << 1) {
-                                    if (redScheme & redMask) {
-                                        cublasLtMatmulAlgoConfigSetAttribute(&algo,
-                                                                             CUBLASLT_ALGO_CONFIG_REDUCTION_SCHEME,
-                                                                             &redScheme,
-                                                                             sizeof(redScheme));
-                                        status                        = customMatmulRun(ltHandle,
-                                                                 operationDesc,
-                                                                 alpha, /* host or device pointer */
-                                                                 A,
-                                                                 Adesc,
-                                                                 B,
-                                                                 Bdesc,
-                                                                 beta, /* host or device pointer */
-                                                                 C,
-                                                                 Cdesc,
-                                                                 C,
-                                                                 Cdesc,
-                                                                 algo,
-                                                                 kernelRepeats,
-                                                                 workSpace,
-                                                                 workSpaceSize,
-                                                                 perfResults[AlgoCount],
-                                                                 stream);
-                                        perfResults[AlgoCount].status = status;
-                                        if (status == CUBLAS_STATUS_SUCCESS) {
-                                            AlgoCount++;
-                                        }
-                                    }  // end if
-                                }      // end for
-                            }
-                            else {  // Non-splitK case
-                                /* if user preference is ok with workspace */
-                                if (AlgoCount < AlgoCombinations) {
-                                    status                        = customMatmulRun(ltHandle,
-                                                             operationDesc,
-                                                             alpha, /* host or device pointer */
-                                                             A,
-                                                             Adesc,
-                                                             B,
-                                                             Bdesc,
-                                                             beta, /* host or device pointer */
-                                                             C,
-                                                             Cdesc,
-                                                             C,
-                                                             Cdesc,
-                                                             algo,
-                                                             kernelRepeats,
-                                                             workSpace,
-                                                             workSpaceSize,
-                                                             perfResults[AlgoCount],
-                                                             stream);
-                                    perfResults[AlgoCount].status = status;
-                                    if (status == CUBLAS_STATUS_SUCCESS) {
-                                        AlgoCount++;
-                                    }
-                                }
-                            }
-                        }  // end l
-                    }      // end k
-                }          // end customOption
-#if (CUDART_VERSION >= 11000)
-            }  // end stagesIdx
-#endif
-        }  // end tileIdx
-        delete[] tileA;
-    }  // end idx
-    // Sort the results per run duration
-    std::sort(perfResults, perfResults + AlgoCount, time_compare);
-    // Print timing and perf details
-    for (int i = 0, hasPrint = 0; i < AlgoCount; i++) {
-        printf("result %03d : ", i);
-        hasPrint = printPerfStructure(m, n, k, perfResults[i], fout, hasPrint);
-    }
-
-CLEANUP:
-    // Descriptors are no longer needed as all GPU work was already enqueued
-    if (Cdesc) {
-        cublasLtMatrixLayoutDestroy(Cdesc);
-    }
-    if (Bdesc) {
-        cublasLtMatrixLayoutDestroy(Bdesc);
-    }
-    if (Adesc) {
-        cublasLtMatrixLayoutDestroy(Adesc);
-    }
-    if (operationDesc) {
-        cublasLtMatmulDescDestroy(operationDesc);
-    }
-    return status == CUBLAS_STATUS_SUCCESS ? 0 : 1;
-}
-
-template int LtIgemmCustomFind(cublasLtHandle_t ltHandle,
-                               int              m,
-                               int              n,
-                               int              k,
-                               const int*       alpha, /* host pointer */
-                               const int8_t*    A,
-                               const int8_t*    B,
-                               const int*       beta, /* host pointer */
-                               int32_t*         C,
-                               void*            workSpace,
-                               size_t           workSpaceSize,
-                               FILE*            fout);
-
-template int LtIgemmCustomFind(cublasLtHandle_t ltHandle,
-                               int              m,
-                               int              n,
-                               int              k,
-                               const float*     alpha, /* host pointer */
-                               const int8_t*    A,
-                               const int8_t*    B,
-                               const float*     beta, /* host pointer */
-                               int8_t*          C,
-                               void*            workSpace,
-                               size_t           workSpaceSize,
-                               FILE*            fout);
-
-template<typename T, typename scaleT>
-int LtBatchIgemmCustomFind(cublasLtHandle_t ltHandle,
-                           int              batchCount,
-                           int              m,
-                           int              n,
-                           int              k,
-                           const scaleT*    alpha, /* host pointer */
-                           const int8_t*    A,
-                           const int8_t*    B,
-                           const scaleT*    beta, /* host pointer */
-                           T*               C,
-                           void*            workSpace,
-                           size_t           workSpaceSize,
-                           FILE*            fout)
-{
-    cublasStatus_t status = CUBLAS_STATUS_SUCCESS;
-
-    cublasLtMatmulDesc_t   operationDesc = NULL;
-    cublasLtMatrixLayout_t Adesc = NULL, Bdesc = NULL, Cdesc = NULL;
-    cudaStream_t           stream = 0;
-    // SplitK value that we are going to try when SplitK is supported for a given algo
-    const int splitKSequenceA[] = {2, 3, 4, 5, 6, 8, 12, 16, 32};
-    // Let try a fixed number of combinations
-#define ALGO_COMBINATIONS 50000
-    int                AlgoCombinations = ALGO_COMBINATIONS;
-    int                AlgoCount        = 0;
-    int                kernelRepeats    = 100;  // number of time the CUDA kernels will be run back to back
-    customMatmulPerf_t perfResults[ALGO_COMBINATIONS];
-    int                nbAlgoIds = 0;
-#define ALGO_IDS 100
-    int algoIdA[ALGO_IDS];
-
-    cudaDataType_t Atype, Btype, Ctype, scaleType;
-    Atype = CUDA_R_8I;
-    Btype = CUDA_R_8I;
-
-    if (std::is_same<T, int32_t>::value && std::is_same<scaleT, int>::value) {
-        Ctype     = CUDA_R_32I;
-        scaleType = CUDA_R_32I;
-    }
-    else if (std::is_same<T, int8_t>::value && std::is_same<scaleT, float>::value) {
-        Ctype     = CUDA_R_8I;
-        scaleType = CUDA_R_32F;
-    }
-    else {
-        printf("[ERROR]<T,scaleT> of igemm is invalid\n");
-        exit(-1);
-    }
-
-#if (CUDART_VERSION >= 11000)
-    cublasComputeType_t computeType = CUBLAS_COMPUTE_32I;
-#else
-    cudaDataType_t computeType = CUDA_R_32I;
-#endif
-    cublasOperation_t opTranspose = CUBLAS_OP_T;
-
-    bool use_ORDER_COL32_2R_4R4 = false;
-#if (CUDART_VERSION >= 11000)
-    int device{-1};
-    cudaGetDevice(&device);
-    cudaDeviceProp props;
-    cudaGetDeviceProperties(&props, device);
-    if (props.major * 10 + props.minor >= 80) {
-        use_ORDER_COL32_2R_4R4 = true;
-    }
-#endif
-    cublasLtOrder_t order_COL32 = CUBLASLT_ORDER_COL32;
-    cublasLtOrder_t order_matrixB;
-#if (CUDART_VERSION >= 11000)
-    if (use_ORDER_COL32_2R_4R4) {
-        order_matrixB = CUBLASLT_ORDER_COL32_2R_4R4;
-    }
-    else {
-        order_matrixB = CUBLASLT_ORDER_COL4_4R2_8C;
-    }
-#else
-    order_matrixB              = CUBLASLT_ORDER_COL4_4R2_8C;
-#endif
-
-    int ldaTransform = 32 * m;
-    int ldbTransform;
-    if (use_ORDER_COL32_2R_4R4) {
-        ldbTransform = 32 * ((n + 32 - 1) / 32) * 32;
-    }
-    else {
-        ldbTransform = 32 * ((n + 8 - 1) / 8) * 8;
-    }
-
-    int ldcTransform = 32 * m;
-
-    int64_t stridea, strideb, stridec;
-    stridea = m * k;
-    strideb = n * k;
-    stridec = m * n;
-
-#if (CUDART_VERSION >= 11000)
-    status = cublasLtMatmulDescCreate(&operationDesc, computeType, scaleType);
-#else
-    status                     = cublasLtMatmulDescCreate(&operationDesc, scaleType);
-#endif
-    if (status != CUBLAS_STATUS_SUCCESS) {
-        goto CLEANUP;
-    }
-    cublasLtMatmulDescSetAttribute(operationDesc, CUBLASLT_MATMUL_DESC_TRANSB, &opTranspose, sizeof(cublasOperation_t));
-
-    // Create matrix descriptors.
-    status = cublasLtMatrixLayoutCreate(&Adesc, Atype, m, k, ldaTransform);
-    if (status != CUBLAS_STATUS_SUCCESS) {
-        goto CLEANUP;
-    }
-    status = cublasLtMatrixLayoutSetAttribute(Adesc, CUBLASLT_MATRIX_LAYOUT_ORDER, &order_COL32, sizeof(order_COL32));
-    if (status != CUBLAS_STATUS_SUCCESS) {
-        goto CLEANUP;
-    }
-    cublasLtMatrixLayoutSetAttribute(Adesc, CUBLASLT_MATRIX_LAYOUT_BATCH_COUNT, &batchCount, sizeof(batchCount));
-    cublasLtMatrixLayoutSetAttribute(Adesc, CUBLASLT_MATRIX_LAYOUT_STRIDED_BATCH_OFFSET, &stridea, sizeof(stridea));
-
-    status = cublasLtMatrixLayoutCreate(&Bdesc, Btype, n, k, ldbTransform);
-    if (status != CUBLAS_STATUS_SUCCESS) {
-        goto CLEANUP;
-    }
-    status =
-        cublasLtMatrixLayoutSetAttribute(Bdesc, CUBLASLT_MATRIX_LAYOUT_ORDER, &order_matrixB, sizeof(order_matrixB));
-    if (status != CUBLAS_STATUS_SUCCESS) {
-        goto CLEANUP;
-    }
-    cublasLtMatrixLayoutSetAttribute(Bdesc, CUBLASLT_MATRIX_LAYOUT_BATCH_COUNT, &batchCount, sizeof(batchCount));
-    cublasLtMatrixLayoutSetAttribute(Bdesc, CUBLASLT_MATRIX_LAYOUT_STRIDED_BATCH_OFFSET, &strideb, sizeof(strideb));
-
-    status = cublasLtMatrixLayoutCreate(&Cdesc, Ctype, m, n, ldcTransform);
-    if (status != CUBLAS_STATUS_SUCCESS) {
-        goto CLEANUP;
-    }
-    status = cublasLtMatrixLayoutSetAttribute(Cdesc, CUBLASLT_MATRIX_LAYOUT_ORDER, &order_COL32, sizeof(order_COL32));
-    if (status != CUBLAS_STATUS_SUCCESS) {
-        goto CLEANUP;
-    }
-    cublasLtMatrixLayoutSetAttribute(Cdesc, CUBLASLT_MATRIX_LAYOUT_BATCH_COUNT, &batchCount, sizeof(batchCount));
-    cublasLtMatrixLayoutSetAttribute(Cdesc, CUBLASLT_MATRIX_LAYOUT_STRIDED_BATCH_OFFSET, &stridec, sizeof(stridec));
-
-    // Request AlgoId available for IGEMM
-    status = cublasLtMatmulAlgoGetIds(
-        ltHandle, computeType, scaleType, Atype, Btype, Ctype, Ctype, ALGO_IDS, algoIdA, &nbAlgoIds);
-    if (status != CUBLAS_STATUS_SUCCESS) {
-        goto CLEANUP;
-    }
-
-    // Loop over the Algo IDs
-    for (int idx = 0; (idx < nbAlgoIds) && (AlgoCount < AlgoCombinations); idx++) {
-        cublasLtMatmulAlgo_t algo;
-        size_t               sizeWritten = 0;
-        /* Initialize algo structure with given Algp ID */
-        status =
-            cublasLtMatmulAlgoInit(ltHandle, computeType, scaleType, Atype, Btype, Ctype, Ctype, algoIdA[idx], &algo);
-        if (status != CUBLAS_STATUS_SUCCESS) {
-            continue;
-        }
-        // Query the tiles enums supported by that algo
-        cublasLtMatmulAlgoCapGetAttribute(&algo, CUBLASLT_ALGO_CAP_TILE_IDS, NULL, 0, &sizeWritten);
-        int  nbTiles = int(sizeWritten / sizeof(int));
-        int* tileA   = new int[nbTiles == 0 ? 1 : nbTiles];
-        if (nbTiles == 0) {
-            tileA[0] = CUBLASLT_MATMUL_TILE_UNDEFINED;
-            nbTiles  = 1;
-        }
-#if (CUDART_VERSION >= 11000)
-        cublasLtMatmulAlgoCapGetAttribute(&algo, CUBLASLT_ALGO_CAP_STAGES_IDS, NULL, 0, &sizeWritten);
-        int              nbStages = int(sizeWritten / sizeof(int));
-        std::vector<int> stagesA(nbStages == 0 ? 1 : nbStages);
-        if (nbStages == 0) {
-            stagesA[0] = CUBLASLT_MATMUL_STAGES_UNDEFINED;
-            nbStages   = 1;
-        }
-        else {
-            cublasLtMatmulAlgoCapGetAttribute(
-                &algo, CUBLASLT_ALGO_CAP_STAGES_IDS, stagesA.data(), sizeof(int) * nbStages, &sizeWritten);
-        }
-#endif
-        int splitkSupport, redMask, swizzlingMax, customOptionMax;
-        // Retrieve Algo Capabilities attributes to be able to setup loop over the different combinations
-        cublasLtMatmulAlgoCapGetAttribute(
-            &algo, CUBLASLT_ALGO_CAP_TILE_IDS, tileA, sizeof(int) * nbTiles, &sizeWritten);
-        cublasLtMatmulAlgoCapGetAttribute(
-            &algo, CUBLASLT_ALGO_CAP_SPLITK_SUPPORT, &splitkSupport, sizeof(splitkSupport), &sizeWritten);
-        cublasLtMatmulAlgoCapGetAttribute(
-            &algo, CUBLASLT_ALGO_CAP_REDUCTION_SCHEME_MASK, &redMask, sizeof(redMask), &sizeWritten);
-        cublasLtMatmulAlgoCapGetAttribute(
-            &algo, CUBLASLT_ALGO_CAP_CTA_SWIZZLING_SUPPORT, &swizzlingMax, sizeof(swizzlingMax), &sizeWritten);
-        cublasLtMatmulAlgoCapGetAttribute(
-            &algo, CUBLASLT_ALGO_CAP_CUSTOM_OPTION_MAX, &customOptionMax, sizeof(customOptionMax), &sizeWritten);
-        /* Loop over the different tiles */
-        for (int tileIdx = 0; tileIdx < nbTiles; tileIdx++) {
-#if (CUDART_VERSION >= 11000)
-            /* Loop over different stages count */
-            for (int stagesIdx = 0; stagesIdx < nbStages; stagesIdx++) {
-                cublasLtMatmulAlgoConfigSetAttribute(
-                    &algo, CUBLASLT_ALGO_CONFIG_STAGES_ID, &stagesA[stagesIdx], sizeof(stagesA[stagesIdx]));
-#endif
-                /* Loop over the different custom option if any */
-                for (int customOption = 0; customOption <= customOptionMax; customOption++) {
-                    cublasLtMatmulAlgoConfigSetAttribute(
-                        &algo, CUBLASLT_ALGO_CONFIG_CUSTOM_OPTION, &customOption, sizeof(customOption));
-                    /* Loop over the CTAs swizzling support */
-                    for (int k = 0; k <= swizzlingMax; k++) {
-                        int splitK_trial = 0;
-                        if (splitkSupport) {
-                            splitK_trial += sizeof(splitKSequenceA) / sizeof(splitKSequenceA[0]);
-                        }
-                        // Loop over the splitK value over a fixed sequence splitKSequenceA in addition to the case
-                        // where splitK is not enabled
-                        for (int l = 0; (l < (1 + splitK_trial)) && (AlgoCount < AlgoCombinations); l++) {
-                            /* Setup attribute of the algo to run */
-                            cublasLtMatmulAlgoConfigSetAttribute(
-                                &algo, CUBLASLT_ALGO_CONFIG_TILE_ID, &tileA[tileIdx], sizeof(tileA[tileIdx]));
-                            int splitK_val = 0;
-                            int redScheme  = CUBLASLT_REDUCTION_SCHEME_NONE;
-                            cublasLtMatmulAlgoConfigSetAttribute(
-                                &algo, CUBLASLT_ALGO_CONFIG_SPLITK_NUM, &splitK_val, sizeof(splitK_val));
-                            cublasLtMatmulAlgoConfigSetAttribute(
-                                &algo, CUBLASLT_ALGO_CONFIG_CTA_SWIZZLING, &k, sizeof(k));
-                            cublasLtMatmulAlgoConfigSetAttribute(
-                                &algo, CUBLASLT_ALGO_CONFIG_REDUCTION_SCHEME, &redScheme, sizeof(int));
-
-                            if (l > 0) {  // Split-K case
-                                splitK_val = splitKSequenceA[l - 1];
-                                cublasLtMatmulAlgoConfigSetAttribute(&algo,
-                                                                     CUBLASLT_ALGO_CONFIG_SPLITK_NUM,
-                                                                     &splitKSequenceA[l - 1],
-                                                                     sizeof(splitKSequenceA[l - 1]));
-                                /* Going over all the reduction scheme  */
-                                for (redScheme = 1;
-                                     redScheme <= (int)CUBLASLT_REDUCTION_SCHEME_MASK && (AlgoCount < AlgoCombinations);
-                                     redScheme = redScheme << 1) {
-                                    if (redScheme & redMask) {
-                                        cublasLtMatmulAlgoConfigSetAttribute(&algo,
-                                                                             CUBLASLT_ALGO_CONFIG_REDUCTION_SCHEME,
-                                                                             &redScheme,
-                                                                             sizeof(redScheme));
-                                        status                        = customMatmulRun(ltHandle,
-                                                                 operationDesc,
-                                                                 alpha, /* host or device pointer */
-                                                                 A,
-                                                                 Adesc,
-                                                                 B,
-                                                                 Bdesc,
-                                                                 beta, /* host or device pointer */
-                                                                 C,
-                                                                 Cdesc,
-                                                                 C,
-                                                                 Cdesc,
-                                                                 algo,
-                                                                 kernelRepeats,
-                                                                 workSpace,
-                                                                 workSpaceSize,
-                                                                 perfResults[AlgoCount],
-                                                                 stream);
-                                        perfResults[AlgoCount].status = status;
-                                        if (status == CUBLAS_STATUS_SUCCESS) {
-                                            AlgoCount++;
-                                        }
-                                    }  // end if
-                                }      // end for
-                            }
-                            else {  // Non-splitK case
-                                /* if user preference is ok with workspace */
-                                if (AlgoCount < AlgoCombinations) {
-                                    status                        = customMatmulRun(ltHandle,
-                                                             operationDesc,
-                                                             alpha, /* host or device pointer */
-                                                             A,
-                                                             Adesc,
-                                                             B,
-                                                             Bdesc,
-                                                             beta, /* host or device pointer */
-                                                             C,
-                                                             Cdesc,
-                                                             C,
-                                                             Cdesc,
-                                                             algo,
-                                                             kernelRepeats,
-                                                             workSpace,
-                                                             workSpaceSize,
-                                                             perfResults[AlgoCount],
-                                                             stream);
-                                    perfResults[AlgoCount].status = status;
-                                    if (status == CUBLAS_STATUS_SUCCESS) {
-                                        AlgoCount++;
-                                    }
-                                }
-                            }
-                        }  // end l
-                    }      // end k
-                }          // end customOption
-#if (CUDART_VERSION >= 11000)
-            }  // end stagesIdx
-#endif
-        }  // end tileIdx
-        delete[] tileA;
-    }  // end idx
-    // Sort the results per run duration
-    std::sort(perfResults, perfResults + AlgoCount, time_compare);
-    // Print timing and perf details
-    for (int i = 0, hasPrint = 0; i < AlgoCount; i++) {
-        printf("result %03d : ", i);
-        hasPrint = printBatchPerfStructure(batchCount, m, n, k, perfResults[i], fout, hasPrint);
-    }
-
-CLEANUP:
-    // Descriptors are no longer needed as all GPU work was already enqueued
-    if (Cdesc) {
-        cublasLtMatrixLayoutDestroy(Cdesc);
-    }
-    if (Bdesc) {
-        cublasLtMatrixLayoutDestroy(Bdesc);
-    }
-    if (Adesc) {
-        cublasLtMatrixLayoutDestroy(Adesc);
-    }
-    if (operationDesc) {
-        cublasLtMatmulDescDestroy(operationDesc);
-    }
-    return status == CUBLAS_STATUS_SUCCESS ? 0 : 1;
-}
-
-template int LtBatchIgemmCustomFind(cublasLtHandle_t ltHandle,
-                                    int              batchCount,
-                                    int              m,
-                                    int              n,
-                                    int              k,
-                                    const int*       alpha, /* host pointer */
-                                    const int8_t*    A,
-                                    const int8_t*    B,
-                                    const int*       beta, /* host pointer */
-                                    int32_t*         C,
-                                    void*            workSpace,
-                                    size_t           workSpaceSize,
-                                    FILE*            fout);
-
-template int LtBatchIgemmCustomFind(cublasLtHandle_t ltHandle,
-                                    int              batchCount,
-                                    int              m,
-                                    int              n,
-                                    int              k,
-                                    const float*     alpha, /* host pointer */
-                                    const int8_t*    A,
-                                    const int8_t*    B,
-                                    const float*     beta, /* host pointer */
-                                    int8_t*          C,
-                                    void*            workSpace,
-                                    size_t           workSpaceSize,
-                                    FILE*            fout);
-
-// initialize matrix in column-major
-void matInit(int rows, int cols, int8_t* p, int ld)
-{
-    srand(time(NULL));
-
-    for (int c = 0; c < cols; c++) {
-        for (int r = 0; r < rows; r++) {
-            int index = r + c * ld;
-
-            p[index] = rand() % 255 - 127;
-        }
-    }
-}
-
-int batch_igemm_config(int batchCount, int m, int n, int k, FILE* fout, void* buffer)
-{
-    printf("batchCount %d m %d n %d k %d\n", batchCount, m, n, k);
-    int alpha = 1;
-    int beta  = 0;
-
-    int8_t*  d_A = (int8_t*)buffer;                       // m * k, stored in column-major
-    int8_t*  d_B = d_A + batchCount * m * k;              // k * n, stored in column-major
-    int32_t* d_C = (int32_t*)(d_B + batchCount * k * n);  // m * n, stored in column-major
-
-    cublasLtHandle_t ltHandle;
-    cublasLtCreate(&ltHandle);
-
-    LtBatchIgemmCustomFind(ltHandle,
-                           batchCount,
-                           m,
-                           n,
-                           k,
-                           &alpha, /* host pointer */
-                           d_A,
-                           d_B,
-                           &beta, /* host pointer */
-                           d_C,
-                           NULL,
-                           0,
-                           fout);
-    // free memory
-    cublasLtDestroy(ltHandle);
-    return 0;
-}
-
-int igemm_config(int m, int n, int k, FILE* fout, void* buffer)
-{
-    printf("batchCount %d m %d n %d k %d\n", 1, m, n, k);
-    int alpha = 1;
-    int beta  = 0;
-
-    int8_t*  d_A = (int8_t*)buffer;          // m * k, stored in column-major
-    int8_t*  d_B = d_A + m * k;              // k * n, stored in column-major
-    int32_t* d_C = (int32_t*)(d_B + k * n);  // m * n, stored in column-major
-
-    cublasLtHandle_t ltHandle;
-    cublasLtCreate(&ltHandle);
-
-    LtIgemmCustomFind(ltHandle,
-                      m,
-                      n,
-                      k,
-                      &alpha, /* host pointer */
-                      d_A,
-                      d_B,
-                      &beta, /* host pointer */
-                      d_C,
-                      NULL,
-                      0,
-                      fout);
-
-    cublasLtDestroy(ltHandle);
-    return 0;
-}
-
-int generate_encoder_igemm_config(
-    int batch_size, int seq_len, int head_num, int size_per_head, void* buffer, bool isAppend)
-{
-
-    // ensure program running on SM >= 7.5
-    struct cudaDeviceProp prop;
-    check_cuda_error(cudaGetDeviceProperties(&prop, 0));
-    if (!(prop.major >= 8 || (prop.major >= 7 && prop.minor >= 5))) {
-        printf("[ERROR] INT8 mode > 0 is only supported on device with sm >= 7.5\n ");
-        exit(-1);
-    }
-    printf("Device %s\n", prop.name);
-
-    // check config
-    FILE* fout;
-    if (!isAppend) {
-        fout = fopen(IGEMM_CONFIG, "w+");
-        fprintf(
-            fout,
-            "batch_size seq_len head_num size_per_head dataType ### batchCount m n k algoId customOption tile splitK_val swizzle reductionScheme workspaceSize stages exec_time\n");
-    }
-    else {
-        fout = fopen(IGEMM_CONFIG, "a+");
-        std::vector<std::string> config;
-        char                     line[1024];
-        while (fgets(line, 1024, fout) != NULL) {
-            config.push_back(std::string(line));
-        }
-        if (config.size() >= MAX_CONFIG_NUM * GEMM_NUM) {
-            int startIdx = config.size() - (MAX_CONFIG_NUM - 1) * GEMM_NUM;
-            fclose(fout);
-            fout = fopen(IGEMM_CONFIG, "w+");
-            for (int i = startIdx; i < (int)config.size(); i++) {
-                fprintf(fout, "%s", config[i].c_str());
-            }
-        }
-    }
-
-    batch_size_    = batch_size;
-    seq_len_       = seq_len;
-    head_num_      = head_num;
-    size_per_head_ = size_per_head;
-    int m          = batch_size * seq_len;
-    int n          = head_num * size_per_head;
-    int k          = n;
-    int batchCount;
-
-    printf("***Encoder IGemm Testing Begin***\n");
-    printf("\n-----------------------------\n");
-
-    batchCount = 3;
-    m          = batch_size * seq_len;
-    k          = head_num * size_per_head;
-    n          = k;
-    if (n % 32 != 0 || k % 32 != 0) {
-        printf("[WARNING] For INT8 gemm test, n, k should be multiples of 32 (n = %d, k = %d)\n", n, k);
-    }
-    else {
-        batch_igemm_config(batchCount, m, n, k, fout, buffer);
-    }
-
-    printf("\n-----------------------------\n");
-    m          = seq_len;
-    n          = seq_len;
-    k          = size_per_head;
-    batchCount = batch_size * head_num;
-    if (n % 32 != 0 || k % 32 != 0) {
-        printf("[WARNING] For INT8 gemm test, n, k should be multiples of 32 (n = %d, k = %d)\n", n, k);
-    }
-    else {
-        batch_igemm_config(batchCount, m, n, k, fout, buffer);
-    }
-
-    printf("\n-----------------------------\n");
-    m          = seq_len;
-    n          = size_per_head;
-    k          = seq_len;
-    batchCount = batch_size * head_num;
-    if (n % 32 != 0 || k % 32 != 0) {
-        printf("[WARNING] For INT8 gemm test, n, k should be multiples of 32 (n = %d, k = %d)\n", n, k);
-    }
-    else {
-        batch_igemm_config(batchCount, m, n, k, fout, buffer);
-    }
-
-    printf("\n-----------------------------\n");
-    m = batch_size * seq_len;
-    n = head_num * size_per_head;
-    k = head_num * size_per_head;
-    if (n % 32 != 0 || k % 32 != 0) {
-        printf("[WARNING] For INT8 gemm test, n, k should be multiples of 32 (n = %d, k = %d)\n", n, k);
-    }
-    else {
-        igemm_config(m, n, k, fout, buffer);
-    }
-
-    printf("\n-----------------------------\n");
-    n = 4 * n;
-    if (n % 32 != 0 || k % 32 != 0) {
-        printf("[WARNING] For INT8 gemm test, n, k should be multiples of 32 (n = %d, k = %d)\n", n, k);
-    }
-    else {
-        igemm_config(m, n, k, fout, buffer);
-    }
-
-    printf("\n-----------------------------\n");
-    n = k;
-    k = 4 * n;
-    if (n % 32 != 0 || k % 32 != 0) {
-        printf("[WARNING] For INT8 gemm test, n, k should be multiples of 32 (n = %d, k = %d)\n", n, k);
-    }
-    else {
-        igemm_config(m, n, k, fout, buffer);
-    }
-
-    fclose(fout);
-    printf("\n-----------------------------\n");
-    printf("***Encoder IGemm Testing End***\n");
-
-#ifdef SPARSITY_ENABLED
-    bool do_sparse_test = false;
-    if (prop.major == 8 && (prop.minor == 0 || prop.minor == 6)) {
-        do_sparse_test = true;
-    }
-    if (do_sparse_test) {
-        printf("***cusparseLt Gemm Testing Begin***\n");
-        const int spgemm_num = 3;
-        FILE*     fd;
-        int       line_count = 0;
-        const int ites       = 100;
-        if (!isAppend) {
-            fd = fopen(SPIGEMM_CONFIG, "w+");
-        }
-        else {
-            fd = fopen(SPIGEMM_CONFIG, "a+");
-            std::vector<std::string> config;
-            char                     line[1024];
-            while (fgets(line, 1024, fd) != NULL) {
-                config.push_back(std::string(line));
-            }
-            line_count = config.size();
-            if (config.size() >= (MAX_CONFIG_NUM * spgemm_num + 1))  // 6 cublas/cublasLt, first row is not included
-            {
-                int startIdx = config.size() - ((MAX_CONFIG_NUM - 1) * spgemm_num);
-                fclose(fd);
-                fd = fopen(SPIGEMM_CONFIG, "w+");
-                fprintf(fd, "%s", config[0].c_str());
-                for (uint i = startIdx; i < config.size(); i++) {
-                    fprintf(fd, "%s", config[i].c_str());
-                }
-                line_count = config.size() - (spgemm_num + 3);
-            }
-        }
-        if (line_count == 0) {
-            fprintf(
-                fd,
-                "batch_size, seq_len, head_num, size_per_head dataType ### batchCount, m, n, k, algoId, exec_time\n");
-        }
-
-        int M[spgemm_num];
-        int N[spgemm_num];
-        int K[spgemm_num];
-        // gemm1
-        M[0] = batch_size * seq_len;
-        K[0] = head_num * size_per_head;
-        N[0] = K[0];
-        // gemm2
-        M[1] = M[0];
-        K[1] = K[0];
-        N[1] = 4 * N[0];
-        // gemm3
-        M[2] = M[0];
-        K[2] = 4 * K[0];
-        N[2] = N[0];
-
-        cusparseLtHandle_t handle;
-        CHECK_CUSPARSE(cusparseLtInit(&handle));
-        cusparseOrder_t     col_order    = CUSPARSE_ORDER_COL;
-        cusparseOrder_t     row_order    = CUSPARSE_ORDER_ROW;
-        cusparseOperation_t opA          = CUSPARSE_OPERATION_NON_TRANSPOSE;
-        cusparseOperation_t opB          = CUSPARSE_OPERATION_NON_TRANSPOSE;
-        cusparseComputeType compute_type = CUSPARSE_COMPUTE_32I;
-        unsigned            alignment    = 16;
-        cudaStream_t        stream       = 0;
-        float               alpha2       = 1.0f;
-        float               beta2        = 0.0f;
-        for (int i = 0; i < spgemm_num; ++i) {
-            // to be compatible with spgemm wrapper, we let A be the weight matrix
-            // so m and n are swapped
-            // A: mxk B: kxn C:mxn
-            int m = N[i], n = M[i], k = K[i];
-            printf("\n-----------------------------\n");
-            printf("GEMM test %d: [M: %d, K: %d, N: %d]\n", i, m, k, n);
-            int8_t* d_A = (int8_t*)buffer;
-            int8_t* d_B = d_A + m * k;
-            int8_t* d_C = d_B + k * n;
-            int8_t* dA_compressed;
-            {
-                cusparseLtMatDescriptor_t mat_A;
-                CHECK_CUSPARSE(cusparseLtStructuredDescriptorInit(
-                    &handle, &mat_A, m, k, k, alignment, CUDA_R_8I, row_order, CUSPARSELT_SPARSITY_50_PERCENT))
-                CHECK_CUSPARSE(
-                    cusparseLtSpMMAPrune2(&handle, &mat_A, true, opA, d_A, d_A, CUSPARSELT_PRUNE_SPMMA_STRIP, stream))
-                size_t compressed_size;
-                CHECK_CUSPARSE(cusparseLtSpMMACompressedSize2(&handle, &mat_A, &compressed_size))
-                check_cuda_error(cudaMalloc((void**)&dA_compressed, compressed_size));
-                CHECK_CUSPARSE(cusparseLtSpMMACompress2(&handle, &mat_A, true, opA, d_A, dA_compressed, stream))
-            }
-            cudaDeviceSynchronize();
-            cudaError_t result = cudaGetLastError();
-            if (result) {
-                throw std::runtime_error(std::string("[TM][ERROR] CUDA runtime error: "));
-            }
-
-            float exec_time = 99999.0f;
-            int   fast_algo = 0;
-            for (int alg = 0; alg < 4; ++alg) {
-                cudaDeviceSynchronize();
-                cusparseLtMatDescriptor_t mat_A, mat_B, mat_C;
-                void*                     d_workspace = nullptr;
-                int                       num_streams = 1;
-                cudaStream_t              streams[1]  = {stream};
-                CHECK_CUSPARSE(cusparseLtStructuredDescriptorInit(
-                    &handle, &mat_A, m, k, k, alignment, CUDA_R_8I, row_order, CUSPARSELT_SPARSITY_50_PERCENT))
-                CHECK_CUSPARSE(cusparseLtDenseDescriptorInit(&handle, &mat_B, k, n, k, alignment, CUDA_R_8I, col_order))
-                CHECK_CUSPARSE(cusparseLtDenseDescriptorInit(&handle, &mat_C, m, n, m, alignment, CUDA_R_8I, col_order))
-                auto start = std::chrono::high_resolution_clock::now();
-                for (int ite = 0; ite < ites; ++ite) {
-                    // initializing MatDesc takes a lot of time
-                    // and these descs can be stored to other place
-                    // whereas storing MatMulPlan to other place will cause errors
-                    cusparseLtMatmulDescriptor_t   matmul;
-                    cusparseLtMatmulAlgSelection_t alg_sel;
-                    cusparseLtMatmulPlan_t         plan;
-                    CHECK_CUSPARSE(cusparseLtMatmulDescriptorInit(
-                        &handle, &matmul, opA, opB, &mat_A, &mat_B, &mat_C, &mat_C, compute_type))
-                    CHECK_CUSPARSE(
-                        cusparseLtMatmulAlgSelectionInit(&handle, &alg_sel, &matmul, CUSPARSELT_MATMUL_ALG_DEFAULT))
-                    CHECK_CUSPARSE(cusparseLtMatmulAlgSetAttribute(
-                        &handle, &alg_sel, CUSPARSELT_MATMUL_ALG_CONFIG_ID, &alg, sizeof(alg)))
-                    size_t workspace_size;
-                    CHECK_CUSPARSE(cusparseLtMatmulGetWorkspace(&handle, &alg_sel, &workspace_size))
-                    CHECK_CUSPARSE(cusparseLtMatmulPlanInit(&handle, &plan, &matmul, &alg_sel, workspace_size))
-                    CHECK_CUSPARSE(cusparseLtMatmul(&handle,
-                                                    &plan,
-                                                    &alpha2,
-                                                    dA_compressed,
-                                                    d_B,
-                                                    &beta2,
-                                                    d_C,
-                                                    d_C,
-                                                    d_workspace,
-                                                    streams,
-                                                    num_streams))
-                    CHECK_CUSPARSE(cusparseLtMatmulPlanDestroy(&plan))
-                }
-                cudaDeviceSynchronize();
-                auto end = std::chrono::high_resolution_clock::now();
-                auto dur = std::chrono::duration<float, std::milli>(end - start);
-                printf("algo_%d costs %.3fms \n", alg, dur.count() / ites);
-                if (dur.count() < exec_time) {
-                    exec_time = dur.count();
-                    fast_algo = alg;
-                }
-            }
-            exec_time /= ites;
-            printf("fast_algo %d\n", fast_algo);
-            fprintf(fd,
-                    "%d %d %d %d %d ### 1 %d %d %d %d %f\n",
-                    batch_size,
-                    seq_len,
-                    head_num,
-                    size_per_head,
-                    HALF_DATATYPE,
-                    m,
-                    n,
-                    k,
-                    fast_algo,
-                    exec_time);
-            cudaFree(dA_compressed);
-        }
-        CHECK_CUSPARSE(cusparseLtDestroy(&handle))
-        fclose(fd);
-        printf("***cusparseLt Gemm Testing End***\n");
-    }
-#endif
-    return 0;
-}
-
-}  // namespace turbomind
diff --git a/src/turbomind/utils/gemm_test/encoder_igemm_func.h b/src/turbomind/utils/gemm_test/encoder_igemm_func.h
deleted file mode 100644
index 4cadeed02687875c7c746ed475881f381cebc05d..0000000000000000000000000000000000000000
--- a/src/turbomind/utils/gemm_test/encoder_igemm_func.h
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- * Copyright (c) 2020-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "src/turbomind/utils/cublasAlgoMap.h"
-#include "src/turbomind/utils/cuda_utils.h"
-#include <algorithm>
-#include <cublasLt.h>
-#include <cuda_runtime.h>
-#include <map>
-#include <stdio.h>
-#include <stdlib.h>
-#ifdef __linux__
-#include <sys/time.h>
-#include <unistd.h>
-#endif
-#include <time.h>
-#include <vector>
-
-namespace turbomind {
-
-/* CAUTION : must match cublasLtMatmulTile_t */
-const char* const matmulTileName[] = {"UNDEF",  "8x8",     "8x16",    "16x8",    "8x32",    "16x16",  "32x8",
-                                      "8x64",   "16x32",   "32x16",   "64x8",    "32x32",   "32x64",  "64x32",
-                                      "32x128", "64x64",   "128x32",  "64x128",  "128x64",  "64x256", "128x128",
-                                      "256x64", "64x512",  "128x256", "256x128", "512x64",  "64x96",  "96*64",
-                                      "96x128", "128x160", "160x128", "192x128", "128x192", "128x96", "END"};
-
-int generate_encoder_igemm_config(
-    int batch_size, int seq_len, int head_num, int size_per_head, void* buffer, bool isAppend = true);
-
-int printPerfStructure(int m, int n, int k, const customMatmulPerf_t& perf, FILE* fout, int hasPrint);
-
-int printBatchPerfStructure(
-    int batchCount, int m, int n, int k, const customMatmulPerf_t& perf, FILE* fout, int hasPrint);
-
-template<typename T, typename scaleT>
-int LtIgemmCustomFind(cublasLtHandle_t ltHandle,
-                      int              m,
-                      int              n,
-                      int              k,
-                      const scaleT*    alpha, /* host pointer */
-                      const int8_t*    A,
-                      const int8_t*    B,
-                      const scaleT*    beta, /* host pointer */
-                      T*               C,
-                      void*            workSpace,
-                      size_t           workSpaceSize,
-                      FILE*            fout);
-
-template<typename T, typename scaleT>
-int LtBatchIgemmCustomFind(cublasLtHandle_t ltHandle,
-                           int              batchCount,
-                           int              m,
-                           int              n,
-                           int              k,
-                           const scaleT*    alpha, /* host pointer */
-                           const int8_t*    A,
-                           const int8_t*    B,
-                           const scaleT*    beta, /* host pointer */
-                           T*               C,
-                           void*            workSpace,
-                           size_t           workSpaceSize,
-                           FILE*            fout);
-
-void matInit(int rows, int cols, int8_t* p, int ld);
-
-}  // namespace turbomind
diff --git a/src/turbomind/utils/gemm_test/gemm_func.cc b/src/turbomind/utils/gemm_test/gemm_func.cc
deleted file mode 100644
index 0a4645481b1b2fdd3195166a5eeb3f3bf2479f10..0000000000000000000000000000000000000000
--- a/src/turbomind/utils/gemm_test/gemm_func.cc
+++ /dev/null
@@ -1,990 +0,0 @@
-/*
- * Copyright (c) 2020-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "encoder_gemm_func.h"
-#include <assert.h>
-#include <sys/types.h>
-#include <vector>
-
-#ifndef CUDART_VERSION
-#error CUDART_VERSION Undefined!
-#endif
-
-namespace turbomind {
-
-// Utility function to print customMatmulPerf_t structure
-int printPerfStructure(int                       batch_size,
-                       int                       seq_len,
-                       int                       head_num,
-                       int                       size_per_head,
-                       int                       m,
-                       int                       n,
-                       int                       k,
-                       const customMatmulPerf_t& perf,
-                       FILE*                     fout,
-                       CublasDataType            data_type,
-                       int                       hasPrint,
-                       int                       batch_count)
-{
-    int algoId, tile, swizzle, customOption, numSplitsK, reductionScheme, stages;
-
-    const cublasLtMatmulAlgo_t* matmulAlgo = &perf.algo;
-    cublasLtMatmulAlgoConfigGetAttribute(matmulAlgo, CUBLASLT_ALGO_CONFIG_ID, &algoId, sizeof(algoId), NULL);
-    cublasLtMatmulAlgoConfigGetAttribute(matmulAlgo, CUBLASLT_ALGO_CONFIG_TILE_ID, &tile, sizeof(tile), NULL);
-    cublasLtMatmulAlgoConfigGetAttribute(
-        matmulAlgo, CUBLASLT_ALGO_CONFIG_SPLITK_NUM, &numSplitsK, sizeof(numSplitsK), NULL);
-    cublasLtMatmulAlgoConfigGetAttribute(
-        matmulAlgo, CUBLASLT_ALGO_CONFIG_REDUCTION_SCHEME, &reductionScheme, sizeof(reductionScheme), NULL);
-    cublasLtMatmulAlgoConfigGetAttribute(
-        matmulAlgo, CUBLASLT_ALGO_CONFIG_CTA_SWIZZLING, &swizzle, sizeof(swizzle), NULL);
-    cublasLtMatmulAlgoConfigGetAttribute(
-        matmulAlgo, CUBLASLT_ALGO_CONFIG_CUSTOM_OPTION, &customOption, sizeof(customOption), NULL);
-#if (CUDART_VERSION >= 11000)
-    cublasLtMatmulAlgoConfigGetAttribute(matmulAlgo, CUBLASLT_ALGO_CONFIG_STAGES_ID, &stages, sizeof(stages), NULL);
-#else
-    stages = 0;
-#endif
-#if (CUBLAS_VER_MAJOR == 11 && CUBLAS_VER_MINOR == 11 && CUBLAS_VER_PATCH >= 3)
-    uint16_t inner_shapeId, cluster_shapeId;
-    cublasLtMatmulAlgoConfigGetAttribute(
-        matmulAlgo, CUBLASLT_ALGO_CONFIG_INNER_SHAPE_ID, &inner_shapeId, sizeof(inner_shapeId), NULL);
-    cublasLtMatmulAlgoConfigGetAttribute(
-        matmulAlgo, CUBLASLT_ALGO_CONFIG_CLUSTER_SHAPE_ID, &cluster_shapeId, sizeof(cluster_shapeId), NULL);
-#elif (CUBLAS_VER_MAJOR == 11 && CUBLAS_VER_MINOR == 11 && CUBLAS_VER_PATCH < 3)
-    uint16_t mma_shapeId, cga_shapeId, sche_mode;
-    cublasLtMatmulAlgoConfigGetAttribute(
-        matmulAlgo, CUBLASLT_ALGO_CONFIG_MMA_SHAPE_ID, &mma_shapeId, sizeof(mma_shapeId), NULL);
-    cublasLtMatmulAlgoConfigGetAttribute(
-        matmulAlgo, CUBLASLT_ALGO_CONFIG_CGA_SHAPE_ID, &cga_shapeId, sizeof(cga_shapeId), NULL);
-    cublasLtMatmulAlgoConfigGetAttribute(
-        matmulAlgo, CUBLASLT_ALGO_CONFIG_SCHEDULING_MODE, &sche_mode, sizeof(sche_mode), NULL);
-#endif
-
-    printf("algo={ Id=%d, tileIdx=%d (%s) splitK=%d reduc=%d swizzle=%d custom=%d "
-#if (CUDART_VERSION >= 11000)
-           "stages=%d "
-#endif
-#if (CUBLAS_VER_MAJOR == 11 && CUBLAS_VER_MINOR == 11 && CUBLAS_VER_PATCH >= 3)
-           "inner_shapeId=%d cluster_shapeId=%d"
-#elif (CUBLAS_VER_MAJOR == 11 && CUBLAS_VER_MINOR == 11 && CUBLAS_VER_PATCH < 3)
-           "mma_shapeId=%d cga_shapeId=%d schedule_mode=%d"
-#endif
-           "} status %d "
-           "time %fms workspace=%d mathMode=%d waves=%f\n",
-           algoId,
-           tile,
-           matmulTileName[tile],
-           numSplitsK,
-           reductionScheme,
-           swizzle,
-           customOption,
-#if (CUDART_VERSION >= 11000)
-           stages,
-#endif
-#if (CUBLAS_VER_MAJOR == 11 && CUBLAS_VER_MINOR == 11 && CUBLAS_VER_PATCH >= 3)
-           inner_shapeId,
-           cluster_shapeId,
-#elif (CUBLAS_VER_MAJOR == 11 && CUBLAS_VER_MINOR == 11 && CUBLAS_VER_PATCH < 3)
-           mma_shapeId,
-           cga_shapeId,
-           sche_mode,
-#endif
-           perf.status,
-           perf.time,
-           (int)perf.workspaceSize,
-           (int)perf.mathMode,
-           perf.wavesCount);
-    if (hasPrint == 0) {
-        fprintf(fout,
-                "%d %d %d %d %d ### %d %d %d %d %d %d %d %d %d %d %d %d "
-#if (CUBLAS_VER_MAJOR == 11 && CUBLAS_VER_MINOR == 11 && CUBLAS_VER_PATCH >= 3)
-                "%d %d "
-#elif (CUBLAS_VER_MAJOR == 11 && CUBLAS_VER_MINOR == 11 && CUBLAS_VER_PATCH < 3)
-                "%d %d %d "
-#endif
-                "%f\n",
-                batch_size,
-                seq_len,
-                head_num,
-                size_per_head,
-                data_type,
-                batch_count,
-                m,
-                n,
-                k,
-                algoId,
-                customOption,
-                tile,
-                numSplitsK,
-                swizzle,
-                reductionScheme,
-                (int)perf.workspaceSize,
-                stages,
-#if (CUBLAS_VER_MAJOR == 11 && CUBLAS_VER_MINOR == 11 && CUBLAS_VER_PATCH >= 3)
-                inner_shapeId,
-                cluster_shapeId,
-#elif (CUBLAS_VER_MAJOR == 11 && CUBLAS_VER_MINOR == 11 && CUBLAS_VER_PATCH < 3)
-                mma_shapeId,
-                cga_shapeId,
-                sche_mode,
-#endif
-                perf.time);
-        return 1;
-    }
-    else {
-        return hasPrint;
-    }
-}
-
-static inline bool time_compare(const customMatmulPerf_t& perf_a, const customMatmulPerf_t& perf_b)
-{
-    return ((perf_a.status == CUBLAS_STATUS_SUCCESS) && (perf_a.time < perf_b.time));
-}
-
-static cublasStatus_t customMatmulRun(cublasLtHandle_t            ltHandle,  // to get the capabilities (required a GPU)
-                                      cublasLtMatmulDesc_t        operationDesc,
-                                      const void*                 alpha, /* host or device pointer */
-                                      const void*                 A,
-                                      cublasLtMatrixLayout_t      Adesc,
-                                      const void*                 B,
-                                      cublasLtMatrixLayout_t      Bdesc,
-                                      const void*                 beta, /* host or device pointer */
-                                      const void*                 C,
-                                      cublasLtMatrixLayout_t      Cdesc,
-                                      void*                       D,
-                                      cublasLtMatrixLayout_t      Ddesc,
-                                      const cublasLtMatmulAlgo_t& algo,
-                                      int                         kernelRepeats,
-                                      void*                       workSpace,
-                                      size_t                      workSpaceSizeInBytes,
-                                      customMatmulPerf_t&         perfResults,
-                                      cudaStream_t                stream,
-                                      cudaEvent_t&                startEvent,
-                                      cudaEvent_t&                stopEvent)
-{
-    cublasLtMatmulHeuristicResult_t heurResult;
-    /* Looping over the Algo */
-    int            repeats = kernelRepeats;
-    cublasStatus_t algoStatus =
-        cublasLtMatmulAlgoCheck(ltHandle, operationDesc, Adesc, Bdesc, Cdesc, Ddesc, &algo, &heurResult);
-
-    if (algoStatus == CUBLAS_STATUS_SUCCESS) {
-        if (heurResult.workspaceSize <= workSpaceSizeInBytes) {
-            cudaError_t err, err1, err2, err3;
-            err = cudaEventRecord(startEvent, stream);
-            for (int loop = 0; loop < repeats; loop++) {
-                cublasStatus_t oneRunStatus = cublasLtMatmul(ltHandle,
-                                                             operationDesc,
-                                                             alpha,
-                                                             A,
-                                                             Adesc,
-                                                             B,
-                                                             Bdesc,
-                                                             beta,
-                                                             C,
-                                                             Cdesc,
-                                                             D,
-                                                             Ddesc,
-                                                             &algo,
-                                                             workSpace,
-                                                             workSpaceSizeInBytes,
-                                                             stream);
-                if (oneRunStatus != CUBLAS_STATUS_SUCCESS) {
-                    algoStatus = oneRunStatus;
-                    break;
-                }
-            }
-            err1 = cudaEventRecord(stopEvent, stream);
-            err2 = cudaEventSynchronize(stopEvent);
-            float time;
-            err3 = cudaEventElapsedTime(&time, startEvent, stopEvent);
-            if ((err != cudaSuccess) || (err1 != cudaSuccess) || (err2 != cudaSuccess) || (err3 != cudaSuccess)) {
-                algoStatus = CUBLAS_STATUS_INTERNAL_ERROR;
-            }
-            // For the moment only add successful findings
-            if (algoStatus == CUBLAS_STATUS_SUCCESS) {
-                perfResults.algo          = algo;
-                perfResults.time          = time / repeats;
-                perfResults.workspaceSize = heurResult.workspaceSize;
-                perfResults.wavesCount    = heurResult.wavesCount;
-            }
-        }
-        else {
-            // printf("not enough workspace! %ld\n", heurResult.workspaceSize);
-            algoStatus = CUBLAS_STATUS_NOT_SUPPORTED;  // Not enough workspace
-        }
-    }
-
-    return algoStatus;
-}
-
-template<typename T, typename scaleT>
-int LtHgemmCustomFind(cublasLtHandle_t   ltHandle,
-                      int                batch_size,
-                      int                seq_len,
-                      int                head_num,
-                      int                size_per_head,
-                      int                m,
-                      int                n,
-                      int                k,
-                      const scaleT*      alpha, /* host pointer */
-                      const T*           A,
-                      const T*           B,
-                      const scaleT*      beta, /* host pointer */
-                      T*                 C,
-                      void*              workSpace,
-                      size_t             workSpaceSize,
-                      FILE*              fout,
-                      customMatmulPerf_t perfResults[],
-                      int                AlgoCombinations,
-                      cudaDataType_t     dtype_fp8,
-                      int                batchCount,
-                      int64_t            strideA,
-                      int64_t            strideB,
-                      int64_t            strideD)
-{
-    cublasStatus_t status = CUBLAS_STATUS_SUCCESS;
-    cudaEvent_t    startEvent;
-    cudaEvent_t    stopEvent;
-    CublasDataType data_type;
-
-    cublasLtMatmulDesc_t   operationDesc = NULL;
-    cublasLtMatrixLayout_t Adesc = NULL, Bdesc = NULL, Cdesc = NULL, Ddesc = NULL;
-
-    cudaStream_t stream = 0;
-    // SplitK value that we are going to try when SplitK is supported for a
-    // given algo
-    const int splitKSequenceA[] = {2, 3, 4, 5, 6, 8, 12, 16, 32};
-    // Let try a fixed number of combinations
-    int                               AlgoCount         = 0;
-    int                               AlgoCountRestrict = 0;            // workspace == 0
-    const int                         maxNumTraversal   = 50;           // max number of traversal
-    std::vector<cublasLtMatmulAlgo_t> algos(AlgoCombinations);          // 0 <= workspace <= 32MB
-    std::vector<cublasLtMatmulAlgo_t> algosRestrict(AlgoCombinations);  // workspace == 0
-    const int                         kernelRepeats = 100;  // number of time the CUDA kernels will be run back to back
-    int                               nbAlgoIds     = 0;    // Number of algorithms actually returned by
-                                                            // cublasLtMatmulAlgoGetIds function.
-#define ALGO_IDS 100                                        // Number of algorithms requested.
-    int algoIdA[ALGO_IDS];                                  // Array containing the algorithm IDs returned by
-                                                            // cublasLtMatmulAlgoGetIds function.
-    cudaDataType_t Atype, Btype, Ctype, scaleType, Dtype;
-#if (CUDART_VERSION >= 11000)
-    cublasComputeType_t computeType;
-#else
-    cudaDataType_t computeType;
-#endif
-
-    if (std::is_same<T, float>::value) {
-        data_type = FLOAT_DATATYPE;
-        Atype = CUDA_R_32F, Btype = CUDA_R_32F, Ctype = CUDA_R_32F, Dtype = CUDA_R_32F;
-    }
-    else if (std::is_same<T, half>::value) {
-        data_type = HALF_DATATYPE;
-        Atype = CUDA_R_16F, Btype = CUDA_R_16F, Ctype = CUDA_R_16F, Dtype = CUDA_R_16F;
-    }
-#ifdef ENABLE_BF16
-    else if (std::is_same<T, __nv_bfloat16>::value) {
-        data_type = BFLOAT16_DATATYPE;
-        Atype = CUDA_R_16BF, Btype = CUDA_R_16BF, Ctype = CUDA_R_16BF, Dtype = CUDA_R_16BF;
-    }
-#endif
-#ifdef ENABLE_FP8
-    else if (std::is_same<T, __nv_fp8_e4m3>::value) {
-        data_type = FP8_DATATYPE;
-        Atype = CUDA_R_8F_E4M3, Btype = CUDA_R_8F_E4M3, Ctype = CUDA_R_16BF;
-#ifdef FP8_GEMM_OUTPUT_QUANT_DISABLE
-        Dtype = CUDA_R_16BF;
-#else
-        Dtype = dtype_fp8;
-#endif
-    }
-#endif
-
-    if (sizeof(scaleT) == sizeof(float)) {
-        scaleType = CUDA_R_32F;
-#if (CUDART_VERSION >= 11000)
-        computeType = CUBLAS_COMPUTE_32F;
-#else
-        computeType = CUDA_R_32F;
-#endif
-    }
-    else {
-        scaleType = CUDA_R_16F;
-#if (CUDART_VERSION >= 11000)
-        computeType = CUBLAS_COMPUTE_16F;
-#else
-        computeType = CUDA_R_16F;
-#endif
-    }
-
-    const cublasOperation_t tA = data_type == FP8_DATATYPE ? CUBLAS_OP_T : CUBLAS_OP_N;
-
-// Create operation descriptor; see cublasLtMatmulDescAttributes_t for
-// details about defaults; here we just need to set the transforms for A and
-// B
-#if (CUDART_VERSION >= 11000)
-    status = cublasLtMatmulDescCreate(&operationDesc, computeType,
-                                      scaleType);  //  creates a matrix multiply descriptor
-#else
-    status = cublasLtMatmulDescCreate(&operationDesc, computeType);
-#endif
-    if (status != CUBLAS_STATUS_SUCCESS) {
-        goto CLEANUP;
-    }
-
-    status = cublasLtMatmulDescSetAttribute(operationDesc, CUBLASLT_MATMUL_DESC_TRANSA, &tA, sizeof(tA));
-    if (status != CUBLAS_STATUS_SUCCESS) {
-        goto CLEANUP;
-    }
-#ifdef ENABLE_FP8
-    if (data_type == FP8_DATATYPE) {
-        const int8_t fastAccuMode = 1;  // enable fast imprecise accum
-        status                    = cublasLtMatmulDescSetAttribute(
-            operationDesc, CUBLASLT_MATMUL_DESC_FAST_ACCUM, &fastAccuMode, sizeof(decltype(fastAccuMode)));
-        if (status != CUBLAS_STATUS_SUCCESS) {
-            goto CLEANUP;
-        }
-    }
-#endif
-
-    // Create matrix descriptors. We are good with the details here so no need
-    // to set any extra attributes
-    if (data_type == FP8_DATATYPE) {
-        status = cublasLtMatrixLayoutCreate(&Adesc, Atype, k, m, k);
-    }
-    else {
-        status = cublasLtMatrixLayoutCreate(&Adesc, Atype, m, k, m);
-    }
-    if (status != CUBLAS_STATUS_SUCCESS) {
-        goto CLEANUP;
-    }
-
-    status = cublasLtMatrixLayoutCreate(&Bdesc, Btype, k, n, k);
-    if (status != CUBLAS_STATUS_SUCCESS) {
-        goto CLEANUP;
-    }
-
-    status = cublasLtMatrixLayoutCreate(&Cdesc, Ctype, m, n, m);
-    if (status != CUBLAS_STATUS_SUCCESS) {
-        goto CLEANUP;
-    }
-    status = cublasLtMatrixLayoutCreate(&Ddesc, Dtype, m, n, m);
-    if (status != CUBLAS_STATUS_SUCCESS) {
-        goto CLEANUP;
-    }
-
-    if (batchCount > 1) {
-        check_cuda_error(cublasLtMatrixLayoutSetAttribute(
-            Adesc, CUBLASLT_MATRIX_LAYOUT_BATCH_COUNT, &batchCount, sizeof(batchCount)));
-        check_cuda_error(cublasLtMatrixLayoutSetAttribute(
-            Bdesc, CUBLASLT_MATRIX_LAYOUT_BATCH_COUNT, &batchCount, sizeof(batchCount)));
-        check_cuda_error(cublasLtMatrixLayoutSetAttribute(
-            Cdesc, CUBLASLT_MATRIX_LAYOUT_BATCH_COUNT, &batchCount, sizeof(batchCount)));
-        check_cuda_error(cublasLtMatrixLayoutSetAttribute(
-            Ddesc, CUBLASLT_MATRIX_LAYOUT_BATCH_COUNT, &batchCount, sizeof(batchCount)));
-
-        check_cuda_error(cublasLtMatrixLayoutSetAttribute(
-            Adesc, CUBLASLT_MATRIX_LAYOUT_STRIDED_BATCH_OFFSET, &strideA, sizeof(strideA)));
-        check_cuda_error(cublasLtMatrixLayoutSetAttribute(
-            Bdesc, CUBLASLT_MATRIX_LAYOUT_STRIDED_BATCH_OFFSET, &strideB, sizeof(strideB)));
-        check_cuda_error(cublasLtMatrixLayoutSetAttribute(
-            Cdesc, CUBLASLT_MATRIX_LAYOUT_STRIDED_BATCH_OFFSET, &strideD, sizeof(strideD)));
-        check_cuda_error(cublasLtMatrixLayoutSetAttribute(
-            Ddesc, CUBLASLT_MATRIX_LAYOUT_STRIDED_BATCH_OFFSET, &strideD, sizeof(strideD)));
-    }
-
-    // Create CUDA event to time the execution time of each algo
-    if (cudaEventCreate(&startEvent, cudaEventBlockingSync) != cudaSuccess) {
-        goto CLEANUP;
-    }
-    if (cudaEventCreate(&stopEvent, cudaEventBlockingSync) != cudaSuccess) {
-        goto CLEANUP;
-    }
-
-    // Request the 100 first AlgoId available
-    status = cublasLtMatmulAlgoGetIds(
-        ltHandle, computeType, scaleType, Atype, Btype, Ctype, Dtype, ALGO_IDS, algoIdA, &nbAlgoIds);
-    if (status != CUBLAS_STATUS_SUCCESS) {
-        goto CLEANUP;
-    }
-    if (nbAlgoIds > ALGO_IDS) {
-        printf(
-            "Warning: the algo id count is not large enough to guarantee the best algo %d, %d\n", nbAlgoIds, ALGO_IDS);
-    }
-
-    // Loop over the Algo IDs
-    // This loop doesn't work for fp8 gemm
-    for (int idx = 0; (idx < nbAlgoIds) && (AlgoCount < AlgoCombinations); idx++) {
-        cublasLtMatmulAlgo_t algo;
-        size_t               sizeWritten = 0;
-        /* Initialize algo structure with given Algp ID */
-        status =
-            cublasLtMatmulAlgoInit(ltHandle, computeType, scaleType, Atype, Btype, Ctype, Dtype, algoIdA[idx], &algo);
-        if (status != CUBLAS_STATUS_SUCCESS) {
-            continue;
-        }
-        // Query the tiles enums supported by that algo
-        cublasLtMatmulAlgoCapGetAttribute(&algo, CUBLASLT_ALGO_CAP_TILE_IDS, NULL, 0, &sizeWritten);
-        int  nbTiles = int(sizeWritten / sizeof(int));
-        int* tileA   = new int[nbTiles == 0 ? 1 : nbTiles];
-        if (nbTiles == 0) {
-            tileA[0] = CUBLASLT_MATMUL_TILE_UNDEFINED;
-            nbTiles  = 1;
-        }
-#if (CUDART_VERSION >= 11000)
-        cublasLtMatmulAlgoCapGetAttribute(&algo, CUBLASLT_ALGO_CAP_STAGES_IDS, NULL, 0, &sizeWritten);
-        int              nbStages = int(sizeWritten / sizeof(int));
-        std::vector<int> stagesA(nbStages == 0 ? 1 : nbStages);
-        if (nbStages == 0) {
-            stagesA[0] = CUBLASLT_MATMUL_STAGES_UNDEFINED;
-            nbStages   = 1;
-        }
-        else {
-            cublasLtMatmulAlgoCapGetAttribute(
-                &algo, CUBLASLT_ALGO_CAP_STAGES_IDS, stagesA.data(), sizeof(int) * nbStages, &sizeWritten);
-        }
-#endif
-        int splitkSupport, redMask, swizzlingMax, customOptionMax;
-        // Retrieve Algo Capabilities attributes to be able to setup loop over
-        // the different combinations
-        cublasLtMatmulAlgoCapGetAttribute(
-            &algo, CUBLASLT_ALGO_CAP_TILE_IDS, tileA, sizeof(int) * nbTiles, &sizeWritten);
-        cublasLtMatmulAlgoCapGetAttribute(
-            &algo, CUBLASLT_ALGO_CAP_SPLITK_SUPPORT, &splitkSupport, sizeof(splitkSupport), &sizeWritten);
-        cublasLtMatmulAlgoCapGetAttribute(
-            &algo, CUBLASLT_ALGO_CAP_REDUCTION_SCHEME_MASK, &redMask, sizeof(redMask), &sizeWritten);
-        cublasLtMatmulAlgoCapGetAttribute(
-            &algo, CUBLASLT_ALGO_CAP_CTA_SWIZZLING_SUPPORT, &swizzlingMax, sizeof(swizzlingMax), &sizeWritten);
-        cublasLtMatmulAlgoCapGetAttribute(
-            &algo, CUBLASLT_ALGO_CAP_CUSTOM_OPTION_MAX, &customOptionMax, sizeof(customOptionMax), &sizeWritten);
-
-        /* Loop over the different tiles */
-        for (int tileIdx = 0; tileIdx < nbTiles; tileIdx++) {
-#if (CUDART_VERSION >= 11000)
-            /* Loop over different stages count */
-            for (int stagesIdx = 0; stagesIdx < nbStages; stagesIdx++) {
-                cublasLtMatmulAlgoConfigSetAttribute(
-                    &algo, CUBLASLT_ALGO_CONFIG_STAGES_ID, &stagesA[stagesIdx], sizeof(stagesA[stagesIdx]));
-#endif
-                /* Loop over the different custom option if any */
-                for (int customOption = 0; customOption <= customOptionMax; customOption++) {
-                    cublasLtMatmulAlgoConfigSetAttribute(
-                        &algo, CUBLASLT_ALGO_CONFIG_CUSTOM_OPTION, &customOption, sizeof(customOption));
-                    /* Loop over the CTAs swizzling support */
-                    for (int k = 0; k <= swizzlingMax; k++) {
-                        int splitK_trial = 0;
-                        if (splitkSupport) {
-                            splitK_trial += sizeof(splitKSequenceA) / sizeof(splitKSequenceA[0]);
-                        }
-                        // Loop over the splitK value over a fixed sequence
-                        // splitKSequenceA in addition to the case where splitK
-                        // is not enabled
-                        for (int l = 0; (l < (1 + splitK_trial)) && (AlgoCount < AlgoCombinations); l++) {
-                            /* Setup attribute of the algo to run */
-                            cublasLtMatmulAlgoConfigSetAttribute(
-                                &algo, CUBLASLT_ALGO_CONFIG_TILE_ID, &tileA[tileIdx], sizeof(tileA[tileIdx]));
-                            int splitK_val = 0;
-                            int redScheme  = CUBLASLT_REDUCTION_SCHEME_NONE;
-                            cublasLtMatmulAlgoConfigSetAttribute(
-                                &algo, CUBLASLT_ALGO_CONFIG_SPLITK_NUM, &splitK_val, sizeof(splitK_val));
-                            cublasLtMatmulAlgoConfigSetAttribute(
-                                &algo, CUBLASLT_ALGO_CONFIG_CTA_SWIZZLING, &k, sizeof(k));
-                            cublasLtMatmulAlgoConfigSetAttribute(
-                                &algo, CUBLASLT_ALGO_CONFIG_REDUCTION_SCHEME, &redScheme, sizeof(int));
-
-                            if (l > 0) {  // Split-K case
-                                splitK_val = splitKSequenceA[l - 1];
-                                cublasLtMatmulAlgoConfigSetAttribute(&algo,
-                                                                     CUBLASLT_ALGO_CONFIG_SPLITK_NUM,
-                                                                     &splitKSequenceA[l - 1],
-                                                                     sizeof(splitKSequenceA[l - 1]));
-                                /* Going over all the reduction scheme  */
-                                for (redScheme = 1;
-                                     redScheme < (int)CUBLASLT_REDUCTION_SCHEME_MASK && (AlgoCount < AlgoCombinations);
-                                     redScheme = redScheme << 1) {
-                                    if (redScheme & redMask) {
-                                        cublasLtMatmulAlgoConfigSetAttribute(&algo,
-                                                                             CUBLASLT_ALGO_CONFIG_REDUCTION_SCHEME,
-                                                                             &redScheme,
-                                                                             sizeof(redScheme));
-
-                                        cublasLtMatmulHeuristicResult_t heurResult;
-                                        cublasStatus_t                  algoStatus = cublasLtMatmulAlgoCheck(
-                                            ltHandle, operationDesc, Adesc, Bdesc, Cdesc, Cdesc, &algo, &heurResult);
-                                        if (heurResult.workspaceSize > workSpaceSize) {
-                                            // printf("not enough workspace!
-                                            // %ld\n",
-                                            // heurResult.workspaceSize);
-                                            algoStatus = CUBLAS_STATUS_NOT_SUPPORTED;  // Not enough workspace
-                                        }
-                                        else if (heurResult.workspaceSize == 0) {
-                                            if (algoStatus == CUBLAS_STATUS_SUCCESS) {
-                                                algosRestrict[AlgoCountRestrict++] = algo;
-                                            }
-                                        }
-                                        if (algoStatus == CUBLAS_STATUS_SUCCESS) {
-                                            algos[AlgoCount++] = algo;
-                                        }
-                                    }  // end if
-                                }      // end for
-                            }
-                            else {  // Non-splitK case
-                                /* if user preference is ok with workspace */
-                                if (AlgoCount < AlgoCombinations) {
-                                    cublasLtMatmulHeuristicResult_t heurResult;
-                                    cublasStatus_t                  algoStatus = cublasLtMatmulAlgoCheck(
-                                        ltHandle, operationDesc, Adesc, Bdesc, Cdesc, Cdesc, &algo, &heurResult);
-                                    if (heurResult.workspaceSize > workSpaceSize) {
-                                        // printf("not enough workspace! %ld\n",
-                                        // heurResult.workspaceSize);
-                                        algoStatus = CUBLAS_STATUS_NOT_SUPPORTED;  // Not
-                                                                                   // enough
-                                                                                   // workspace
-                                    }
-                                    else if (heurResult.workspaceSize == 0) {
-                                        if (algoStatus == CUBLAS_STATUS_SUCCESS) {
-                                            algosRestrict[AlgoCountRestrict++] = algo;
-                                        }
-                                    }
-                                    if (algoStatus == CUBLAS_STATUS_SUCCESS) {
-                                        algos[AlgoCount++] = algo;
-                                    }
-                                }
-                            }
-                        }  // end l
-                    }      // end k
-                }          // end customOption
-#if (CUDART_VERSION >= 11000)
-            }  // end stagesIdx
-#endif
-        }  // end tileIdx
-        delete[] tileA;
-    }  // end idx
-
-    printf("AlgoCount: %d\n", AlgoCount);
-    if (data_type == FP8_DATATYPE) {
-        assert(AlgoCount == 0);
-    }
-    if (AlgoCount < maxNumTraversal && data_type != FP8_DATATYPE) {
-        // 0 <= workspacesize <= 32MB
-        for (int i = 0; i < AlgoCount; i++) {
-            status                = customMatmulRun(ltHandle,
-                                     operationDesc,
-                                     alpha, /* host or device pointer */
-                                     A,
-                                     Adesc,
-                                     B,
-                                     Bdesc,
-                                     beta, /* host or device pointer */
-                                     C,
-                                     Cdesc,
-                                     C,
-                                     Cdesc,
-                                     algos[i],
-                                     kernelRepeats,
-                                     workSpace,
-                                     workSpaceSize,
-                                     perfResults[i],
-                                     stream,
-                                     startEvent,
-                                     stopEvent);
-            perfResults[i].status = status;
-            // if (status == CUBLAS_STATUS_SUCCESS) AlgoCount++;
-        }
-    }
-    else {
-        // Heuristic + workspacesize==0
-        AlgoCount = 0;
-        nbAlgoIds = 0;
-        cublasLtMatmulPreference_t pref;
-        cublasLtMatmulPreferenceCreate(&pref);
-        uint64_t maxWorkSpaceSize = workSpaceSize;  //(32MB)
-        cublasLtMatmulPreferenceSetAttribute(
-            pref, CUBLASLT_MATMUL_PREF_MAX_WORKSPACE_BYTES, &maxWorkSpaceSize, sizeof(maxWorkSpaceSize));
-        cublasLtMatmulHeuristicResult_t heuristicResultsArray[maxNumTraversal];
-
-        cublasLtMatmulAlgoGetHeuristic(ltHandle,
-                                       operationDesc,
-                                       Adesc,
-                                       Bdesc,
-                                       Cdesc,
-                                       Ddesc,
-                                       pref,
-                                       maxNumTraversal,
-                                       heuristicResultsArray,
-                                       &nbAlgoIds);
-        cublasLtMatmulPreferenceDestroy(pref);
-        printf("return %d and run heuristic algo\n", nbAlgoIds);
-        for (int i = 0; i < nbAlgoIds; i++) {
-            if (heuristicResultsArray[i].state == CUBLAS_STATUS_SUCCESS) {
-                status                        = customMatmulRun(ltHandle,
-                                         operationDesc,
-                                         alpha, /* host or device pointer */
-                                         A,
-                                         Adesc,
-                                         B,
-                                         Bdesc,
-                                         beta, /* host or device pointer */
-                                         C,
-                                         Cdesc,
-                                         C,
-                                         Ddesc,
-                                         heuristicResultsArray[i].algo,
-                                         kernelRepeats,
-                                         workSpace,
-                                         workSpaceSize,
-                                         perfResults[AlgoCount],
-                                         stream,
-                                         startEvent,
-                                         stopEvent);
-                perfResults[AlgoCount].status = status;
-                if (status == CUBLAS_STATUS_SUCCESS) {
-                    AlgoCount++;
-                }
-            }
-        }
-
-        // workspacesize==0
-        printf("workspacesize==0, run %d algos\n", AlgoCountRestrict);
-        for (int i = 0; i < AlgoCountRestrict && i < (maxNumTraversal - nbAlgoIds); i++) {
-            status                        = customMatmulRun(ltHandle,
-                                     operationDesc,
-                                     alpha, /* host or device pointer */
-                                     A,
-                                     Adesc,
-                                     B,
-                                     Bdesc,
-                                     beta, /* host or device pointer */
-                                     C,
-                                     Cdesc,
-                                     C,
-                                     Ddesc,
-                                     algosRestrict[i],
-                                     kernelRepeats,
-                                     NULL,
-                                     0,
-                                     perfResults[AlgoCount],
-                                     stream,
-                                     startEvent,
-                                     stopEvent);
-            perfResults[AlgoCount].status = status;
-            if (status == CUBLAS_STATUS_SUCCESS) {
-                AlgoCount++;
-            }
-        }
-    }
-
-    // Sort the results per run duration
-    std::sort(perfResults, perfResults + AlgoCount, time_compare);
-    // Print timing and perf details
-    for (int i = 0, hasPrint = 1; i < AlgoCount; i++) {
-        printf("result %03d : ", i);
-        hasPrint = printPerfStructure(batch_size,
-                                      seq_len,
-                                      head_num,
-                                      size_per_head,
-                                      m,
-                                      n,
-                                      k,
-                                      perfResults[i],
-                                      fout,
-                                      data_type,
-                                      hasPrint,
-                                      batchCount);
-    }
-
-CLEANUP:
-    // Descriptors are no longer needed as all GPU work was already enqueued
-    if (Cdesc) {
-        cublasLtMatrixLayoutDestroy(Cdesc);
-    }
-    if (Bdesc) {
-        cublasLtMatrixLayoutDestroy(Bdesc);
-    }
-    if (Adesc) {
-        cublasLtMatrixLayoutDestroy(Adesc);
-    }
-    if (operationDesc) {
-        cublasLtMatmulDescDestroy(operationDesc);
-    }
-    if (startEvent) {
-        cudaEventDestroy(startEvent);
-    }
-    if (stopEvent) {
-        cudaEventDestroy(stopEvent);
-    }
-    return status == CUBLAS_STATUS_SUCCESS ? 0 : 1;
-}
-
-template int LtHgemmCustomFind(cublasLtHandle_t   ltHandle,
-                               int                batch_size,
-                               int                seq_len,
-                               int                head_num,
-                               int                size_per_head,
-                               int                m,
-                               int                n,
-                               int                k,
-                               const float*       alpha, /* host pointer */
-                               const float*       A,
-                               const float*       B,
-                               const float*       beta, /* host pointer */
-                               float*             C,
-                               void*              workSpace,
-                               size_t             workSpaceSize,
-                               FILE*              fout,
-                               customMatmulPerf_t perfResults[],
-                               int                AlgoCombinations,
-                               cudaDataType_t     dtype_fp8,
-                               int                batchCount,
-                               int64_t            strideA,
-                               int64_t            strideB,
-                               int64_t            strideD);
-
-template int LtHgemmCustomFind(cublasLtHandle_t   ltHandle,
-                               int                batch_size,
-                               int                seq_len,
-                               int                head_num,
-                               int                size_per_head,
-                               int                m,
-                               int                n,
-                               int                k,
-                               const half*        alpha, /* host pointer */
-                               const half*        A,
-                               const half*        B,
-                               const half*        beta, /* host pointer */
-                               half*              C,
-                               void*              workSpace,
-                               size_t             workSpaceSize,
-                               FILE*              fout,
-                               customMatmulPerf_t perfResults[],
-                               int                AlgoCombinations,
-                               cudaDataType_t     dtype_fp8,
-                               int                batchCount,
-                               int64_t            strideA,
-                               int64_t            strideB,
-                               int64_t            strideD);
-
-#ifdef ENABLE_BF16
-template int LtHgemmCustomFind(cublasLtHandle_t     ltHandle,
-                               int                  batch_size,
-                               int                  seq_len,
-                               int                  head_num,
-                               int                  size_per_head,
-                               int                  m,
-                               int                  n,
-                               int                  k,
-                               const float*         alpha, /* host pointer */
-                               const __nv_bfloat16* A,
-                               const __nv_bfloat16* B,
-                               const float*         beta, /* host pointer */
-                               __nv_bfloat16*       C,
-                               void*                workSpace,
-                               size_t               workSpaceSize,
-                               FILE*                fout,
-                               customMatmulPerf_t   perfResults[],
-                               int                  AlgoCombinations,
-                               cudaDataType_t       dtype_fp8,
-                               int                  batchCount,
-                               int64_t              strideA,
-                               int64_t              strideB,
-                               int64_t              strideD);
-#endif
-
-#ifdef ENABLE_FP8
-template int LtHgemmCustomFind(cublasLtHandle_t     ltHandle,
-                               int                  batch_size,
-                               int                  seq_len,
-                               int                  head_num,
-                               int                  size_per_head,
-                               int                  m,
-                               int                  n,
-                               int                  k,
-                               const float*         alpha, /* host pointer */
-                               const __nv_fp8_e4m3* A,
-                               const __nv_fp8_e4m3* B,
-                               const float*         beta, /* host pointer */
-                               __nv_fp8_e4m3*       C,
-                               void*                workSpace,
-                               size_t               workSpaceSize,
-                               FILE*                fout,
-                               customMatmulPerf_t   perfResults[],
-                               int                  AlgoCombinations,
-                               cudaDataType_t       dtype_fp8,
-                               int                  batchCount,
-                               int64_t              strideA,
-                               int64_t              strideB,
-                               int64_t              strideD);
-#endif
-
-template int LtHgemmCustomFind(cublasLtHandle_t   ltHandle,
-                               int                batch_size,
-                               int                seq_len,
-                               int                head_num,
-                               int                size_per_head,
-                               int                m,
-                               int                n,
-                               int                k,
-                               const float*       alpha, /* host pointer */
-                               const half*        A,
-                               const half*        B,
-                               const float*       beta, /* host pointer */
-                               half*              C,
-                               void*              workSpace,
-                               size_t             workSpaceSize,
-                               FILE*              fout,
-                               customMatmulPerf_t perfResults[],
-                               int                AlgoCombinations,
-                               cudaDataType_t     dtype_fp8,
-                               int                batchCount,
-                               int64_t            strideA,
-                               int64_t            strideB,
-                               int64_t            strideD);
-
-size_t calGemmTestBufSizeInByte(int            batch_size,
-                                int            seq_len,
-                                int            head_num,
-                                int            size_per_head,
-                                int            inter_size,
-                                int            vocab_size,
-                                int            int8_mode,
-                                CublasDataType data_type)
-{
-    size_t buf_size_in_byte;
-    if (int8_mode > 0) {
-        int m = batch_size * seq_len;
-        int n = head_num * size_per_head;
-        int k = n;
-
-        size_t size1 = 3 * (m * k * sizeof(int8_t) + k * n * sizeof(int8_t) + m * n * sizeof(int));
-        size_t size2 = batch_size * head_num
-                       * (seq_len * size_per_head * sizeof(int8_t) + size_per_head * seq_len * sizeof(int8_t)
-                          + seq_len * seq_len * sizeof(int));
-        size_t size3 = batch_size * head_num
-                       * (seq_len * seq_len * sizeof(int8_t) + seq_len * size_per_head * sizeof(int8_t)
-                          + seq_len * size_per_head * sizeof(int));
-        size_t size4     = m * k * sizeof(int8_t) + k * inter_size * sizeof(int8_t) + m * inter_size * sizeof(int);
-        size_t size5     = m * k * sizeof(int8_t) + k * vocab_size * sizeof(int8_t) + m * vocab_size * sizeof(int);
-        buf_size_in_byte = size1 > size2 ? size1 : size2;
-        buf_size_in_byte = buf_size_in_byte > size3 ? buf_size_in_byte : size3;
-        buf_size_in_byte = buf_size_in_byte > size4 ? buf_size_in_byte : size4;
-        buf_size_in_byte = buf_size_in_byte > size5 ? buf_size_in_byte : size5;
-    }
-    else {
-        size_t m = batch_size * seq_len;
-        size_t n = head_num * size_per_head;
-        size_t k = n;
-        // TODO need to add bfloat16 here
-        int    wordSize = (data_type == FLOAT_DATATYPE ? sizeof(float) : sizeof(half));
-        size_t size1    = 3 * (m * k + k * n + m * n) * wordSize;
-        size_t size2    = (size_t)batch_size * (size_t)head_num
-                       * ((size_t)seq_len * (size_t)seq_len + (size_t)seq_len * (size_t)size_per_head
-                          + (size_t)seq_len * (size_t)size_per_head)
-                       * (size_t)wordSize;
-        size_t size3     = (m * k + k * inter_size + m * inter_size) * wordSize;
-        size_t size4     = (m * k + k * vocab_size + m * vocab_size) * wordSize;
-        buf_size_in_byte = size1 > size2 ? size1 : size2;
-        buf_size_in_byte = buf_size_in_byte > size3 ? buf_size_in_byte : size3;
-        buf_size_in_byte = buf_size_in_byte > size4 ? buf_size_in_byte : size4;
-        buf_size_in_byte +=
-            ((data_type == HALF_DATATYPE || data_type == BFLOAT16_DATATYPE) ? CUBLAS_WORKSPACE_SIZE : 0);
-    }
-    return buf_size_in_byte;
-}
-
-size_t calGemmTestBufSizeInByteXlnet(
-    int batch_size, int seq_len, int head_num, int size_per_head, int inter_size, int hidden_units, int is_fp16)
-{
-    int M[10]          = {0};
-    int N[10]          = {0};
-    int K[10]          = {0};
-    int batchCount[10] = {0};
-
-    // gemm1
-    M[0]          = hidden_units;
-    N[0]          = seq_len * batch_size;
-    K[0]          = hidden_units;
-    batchCount[0] = 3;
-
-    // gemm2
-    M[1]          = hidden_units;
-    N[1]          = seq_len * 2;
-    K[1]          = hidden_units;
-    batchCount[1] = 1;
-
-    // gemm3
-    M[2]          = seq_len;
-    N[2]          = seq_len;
-    K[2]          = size_per_head;
-    batchCount[2] = batch_size * head_num;
-
-    // gemm4
-    M[3]          = seq_len * 2;
-    N[3]          = seq_len;
-    K[3]          = size_per_head;
-    batchCount[3] = batch_size * head_num;
-
-    // gemm5
-    M[4]          = 2;
-    N[4]          = seq_len;
-    K[4]          = size_per_head;
-    batchCount[4] = batch_size * head_num;
-
-    // gemm6
-    M[5] = head_num;
-    N[5] = seq_len;
-    K[5] = 2;
-    // gemm7
-    M[6]          = size_per_head;
-    N[6]          = seq_len;
-    K[6]          = seq_len;
-    batchCount[6] = batch_size * head_num;
-
-    // gemm8
-    M[7]          = hidden_units;
-    N[7]          = seq_len;
-    K[7]          = hidden_units;
-    batchCount[7] = batch_size;
-
-    // gemm9
-    M[8]          = inter_size;
-    N[8]          = seq_len;
-    K[8]          = hidden_units;
-    batchCount[8] = batch_size;
-
-    // gemm10
-    M[9]          = hidden_units;
-    N[9]          = seq_len;
-    K[9]          = inter_size;
-    batchCount[9] = batch_size;
-
-    size_t max_size = 0;
-
-    for (int i = 0; i < 10; ++i) {
-        int    m = M[i], n = N[i], k = K[i];
-        size_t size = (M[i] * N[i] + M[i] * K[i] + N[i] * K[i]) * batchCount[i];
-        if (size > max_size) {
-            max_size = size;
-        }
-    }
-
-    int size_per_ele = 4;
-    if (is_fp16 == true) {
-        size_per_ele = 2;
-    }
-    return max_size * size_per_ele;
-}
-
-}  // namespace turbomind
diff --git a/src/turbomind/utils/gemm_test/gemm_func.h b/src/turbomind/utils/gemm_test/gemm_func.h
deleted file mode 100644
index b33ae2132bea86b3a77490d7e385f916ec1e2212..0000000000000000000000000000000000000000
--- a/src/turbomind/utils/gemm_test/gemm_func.h
+++ /dev/null
@@ -1,103 +0,0 @@
-/*
- * Copyright (c) 2020-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "encoder_igemm_func.h"  // TODO(bhsueh) Remove this include
-#include "src/turbomind/utils/cublasAlgoMap.h"
-#include "src/turbomind/utils/cuda_utils.h"
-#include <cstdio>
-#include <cstdlib>
-#include <ctime>
-#ifdef ENABLE_BF16
-#include <cuda_fp16.h>
-#endif
-#ifdef ENABLE_FP8
-#include <cuda_fp8.h>
-#endif
-#ifdef __linux__
-#include <sys/time.h>
-#include <unistd.h>
-#endif
-#include <cuda_profiler_api.h>
-#include <map>
-#include <vector>
-
-namespace turbomind {
-
-// Scale Type Converter
-// is_fp16_compute_type is only valid when T = half
-template<typename T, bool is_fp16_compute_type = false>
-struct ScaleTypeConverter {
-    using Type = float;
-};
-
-template<>
-struct ScaleTypeConverter<half, true> {
-    using Type = half;
-};
-
-template<typename T, typename scaleT>
-int LtHgemmCustomFind(cublasLtHandle_t   ltHandle,
-                      int                batch_size,
-                      int                seq_len,
-                      int                head_num,
-                      int                size_per_head,
-                      int                m,
-                      int                n,
-                      int                k,
-                      const scaleT*      alpha, /* host pointer */
-                      const T*           A,
-                      const T*           B,
-                      const scaleT*      beta, /* host pointer */
-                      T*                 C,
-                      void*              workSpace,
-                      size_t             workSpaceSize,
-                      FILE*              fout,
-                      customMatmulPerf_t perfResults[],
-                      int                AlgoCombinations,
-                      cudaDataType_t     dtype_fp8  = CUDA_R_32F,
-                      int                batchCount = 1,
-                      int64_t            strideA    = 0,
-                      int64_t            strideB    = 0,
-                      int64_t            strideD    = 0);
-
-size_t calGemmTestBufSizeInByte(int            batch_size,
-                                int            seq_len,
-                                int            head_num,
-                                int            size_per_head,
-                                int            inter_size,
-                                int            vocab_size,
-                                int            int8_mode,
-                                CublasDataType data_type);
-
-size_t calGemmTestBufSizeInByteXlnet(
-    int batch_size, int seq_len, int head_num, int size_per_head, int inter_size, int hidden_units, int is_fp16);
-
-int printPerfStructure(int                       batch_size,
-                       int                       seq_len,
-                       int                       head_num,
-                       int                       size_per_head,
-                       int                       m,
-                       int                       n,
-                       int                       k,
-                       const customMatmulPerf_t& perf,
-                       FILE*                     fout,
-                       CublasDataType            data_type,
-                       int                       hasPrint,
-                       int                       batch_count = 1);
-
-}  // namespace turbomind
diff --git a/src/turbomind/utils/gemm_test/gpt_gemm_func.cc b/src/turbomind/utils/gemm_test/gpt_gemm_func.cc
deleted file mode 100644
index 1fa6d4588104876f70508b32f781b118f71c8cce..0000000000000000000000000000000000000000
--- a/src/turbomind/utils/gemm_test/gpt_gemm_func.cc
+++ /dev/null
@@ -1,829 +0,0 @@
-/*
- * Copyright (c) 2020-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "src/turbomind/utils/gemm_test/gpt_gemm_func.h"
-#include "src/turbomind/macro.h"
-#include <chrono>
-
-namespace turbomind {
-
-bool isSparseGemmAvailable(size_t m, size_t n, size_t k)
-{
-    return m % 8 == 0 && n % 8 == 0 && k % 8 == 0;
-}
-
-template<typename T>
-void generate_gpt_gemm_config(int   batch_size,
-                              int   beam_width,
-                              int   max_input_len,
-                              int   head_num,
-                              int   size_per_head,
-                              int   inter_size,
-                              int   vocab_size,
-                              int   tensor_para_size,
-                              void* buffer_in,
-                              bool  isAppend)
-{
-    FT_CHECK(head_num % tensor_para_size == 0);
-    void* cublas_workspace;
-    void* buffer;
-    int   workSpaceSize;
-#if 0
-    bool  workspace_flag = std::is_same<T, half>::value;
-#ifdef ENABLE_FP8
-    workspace_flag = workspace_flag || std::is_same<T, __nv_fp8_e4m3>::value;
-#endif
-#if ENABLE_BF16
-    workspace_flag = workspace_flag || std::is_same<T, __nv_bfloat16>::value;
-#endif
-#endif
-    // algorithms with workspace perform worse than evaluated
-    const bool workspace_flag = 0;
-    if (workspace_flag) {
-        // cublas_workspace_ should be the start pointer of cudaMalloc()
-        // to ensure 16B alignemnet
-        cublas_workspace = buffer_in;
-        buffer           = (void*)((char*)cublas_workspace + CUBLAS_WORKSPACE_SIZE);
-        workSpaceSize    = CUBLAS_WORKSPACE_SIZE;
-    }
-    else {
-        cublas_workspace = nullptr;
-        buffer           = buffer_in;
-        workSpaceSize    = 0;
-    }
-
-    struct cudaDeviceProp prop;
-    check_cuda_error(cudaGetDeviceProperties(&prop, 0));
-    printf("Device %s\n", prop.name);
-
-    // check config
-    FILE* fd;
-    int   line_count = 0;
-    if (!isAppend) {
-        fd = fopen(GEMM_CONFIG, "w+");
-    }
-    else {
-        fd = fopen(GEMM_CONFIG, "a+");
-        std::vector<std::string> config;
-        char                     line[1024];
-        while (fgets(line, 1024, fd) != NULL) {
-            config.push_back(std::string(line));
-        }
-        line_count = config.size();
-        // if (config.size() >= (MAX_CONFIG_NUM * GEMM_NUM + 1))  // 6 cublas/cublasLt, first row is not included
-        // {
-        //     int startIdx = config.size() - ((MAX_CONFIG_NUM - 1) * GEMM_NUM);
-        //     fclose(fd);
-        //     fd = fopen(GEMM_CONFIG, "w+");
-        //     fprintf(fd, "%s", config[0].c_str());
-        //     for (uint i = startIdx; i < config.size(); i++) {
-        //         fprintf(fd, "%s", config[i].c_str());
-        //     }
-        //     line_count = config.size() - (GEMM_NUM + 3);
-        // }
-    }
-
-    const int hidden_units         = head_num * size_per_head;
-    const int local_head_num       = head_num / tensor_para_size;
-    const int local_hidden_units   = local_head_num * size_per_head;
-    const int max_input_len_padded = (max_input_len + 15) / 16 * 16;
-    const int gemm_num             = 11;
-    int       M[gemm_num];
-    int       N[gemm_num];
-    int       K[gemm_num];
-    int       batchCount[gemm_num];
-    int64_t   strideA[gemm_num];
-    int64_t   strideB[gemm_num];
-    int64_t   strideD[gemm_num];
-    char      mess[gemm_num][256];
-    float     exec_times[gemm_num];
-
-    // gemm 0
-    M[0]          = batch_size * beam_width * max_input_len;
-    K[0]          = hidden_units;
-    N[0]          = 3 * local_hidden_units;
-    batchCount[0] = 1;
-    strideA[0]    = 0;
-    strideB[0]    = 0;
-    strideD[0]    = 0;
-    strcpy(mess[0], "context from_tensor * weightQKV");
-
-    // gemm 1
-    M[1]          = max_input_len_padded;
-    K[1]          = size_per_head;
-    N[1]          = max_input_len_padded;
-    batchCount[1] = batch_size * beam_width * local_head_num;
-    strideA[1]    = max_input_len_padded * size_per_head;
-    strideB[1]    = max_input_len_padded * size_per_head;
-    strideD[1]    = max_input_len_padded * max_input_len_padded;
-    strcpy(mess[1], "context batch gemm Q*K^T");
-
-    // gemm 2
-    M[2]          = max_input_len_padded;
-    K[2]          = max_input_len_padded;
-    N[2]          = size_per_head;
-    batchCount[2] = batch_size * beam_width * local_head_num;
-    strideA[2]    = max_input_len_padded * size_per_head;
-    strideB[2]    = max_input_len_padded * max_input_len_padded;
-    strideD[2]    = max_input_len_padded * size_per_head;
-    strcpy(mess[2], "context batch gemm QK*V^T");
-
-    // gemm 3
-    M[3]          = batch_size * beam_width * max_input_len;
-    K[3]          = local_hidden_units;
-    N[3]          = hidden_units;
-    batchCount[3] = 1;
-    strideA[3]    = 0;
-    strideB[3]    = 0;
-    strideD[3]    = 0;
-    strcpy(mess[3], "context attr * output_kernel");
-
-    // gemm 4
-    M[4]          = batch_size * beam_width * max_input_len;
-    K[4]          = hidden_units;
-    N[4]          = inter_size / tensor_para_size;
-    batchCount[4] = 1;
-    strideA[4]    = 0;
-    strideB[4]    = 0;
-    strideD[4]    = 0;
-    strcpy(mess[4], "context ffn gemm 1");
-
-    // gemm 5
-    M[5]          = batch_size * beam_width * max_input_len;
-    K[5]          = inter_size / tensor_para_size;
-    N[5]          = hidden_units;
-    batchCount[5] = 1;
-    strideA[5]    = 0;
-    strideB[5]    = 0;
-    strideD[5]    = 0;
-    strcpy(mess[5], "context ffn gemm 2");
-
-    // gemm 6
-    M[6]          = batch_size * beam_width;
-    K[6]          = hidden_units;
-    N[6]          = 3 * local_hidden_units;
-    batchCount[6] = 1;
-    strideA[6]    = 0;
-    strideB[6]    = 0;
-    strideD[6]    = 0;
-    strcpy(mess[6], "from_tensor * weightQKV");
-
-    // gemm 7
-    M[7]          = batch_size * beam_width;
-    K[7]          = local_hidden_units;
-    N[7]          = hidden_units;
-    batchCount[7] = 1;
-    strideA[7]    = 0;
-    strideB[7]    = 0;
-    strideD[7]    = 0;
-    strcpy(mess[7], "attr * output_kernel");
-
-    // gemm 8
-    M[8]          = batch_size * beam_width;
-    K[8]          = hidden_units;
-    N[8]          = inter_size / tensor_para_size;
-    batchCount[8] = 1;
-    strideA[8]    = 0;
-    strideB[8]    = 0;
-    strideD[8]    = 0;
-    strcpy(mess[8], "ffn gemm 1");
-
-    // gemm 9
-    M[9]          = batch_size * beam_width;
-    K[9]          = inter_size / tensor_para_size;
-    N[9]          = hidden_units;
-    batchCount[9] = 1;
-    strideA[9]    = 0;
-    strideB[9]    = 0;
-    strideD[9]    = 0;
-    strcpy(mess[9], "ffn gemm 2");
-
-    // gemm 10
-    M[10]          = batch_size * beam_width;
-    K[10]          = hidden_units;
-    N[10]          = ceil(vocab_size / 8.) * 8 / tensor_para_size;
-    batchCount[10] = 1;
-    strideA[10]    = 0;
-    strideB[10]    = 0;
-    strideD[10]    = 0;
-    strcpy(mess[10], "logits gemm");
-
-    cublasHandle_t cublas_handle;
-    check_cuda_error(cublasCreate(&cublas_handle));
-    // cublasLtHandle_t ltHandle;
-    // check_cuda_error(cublasLtCreate(&ltHandle));
-
-    cudaDataType_t AType;
-    cudaDataType_t BType;
-    cudaDataType_t CType;
-    cudaDataType_t DType;
-    cudaDataType_t DType_FP8[gemm_num];
-    cudaDataType_t computeType;
-    int            startAlgo, endAlgo;
-    const int      ites = 100;
-
-    CublasDataType data_type;
-    if (std::is_same<T, float>::value) {
-        data_type   = FLOAT_DATATYPE;
-        AType       = CUDA_R_32F;
-        BType       = CUDA_R_32F;
-        CType       = CUDA_R_32F;
-        DType       = CUDA_R_32F;
-        computeType = CUDA_R_32F;
-        startAlgo   = (int)CUBLAS_GEMM_DEFAULT;
-        // endAlgo     = (int)CUBLAS_GEMM_ALGO23;
-        endAlgo     = (int)CUBLAS_GEMM_DEFAULT;
-    }
-    else if (std::is_same<T, half>::value) {
-        data_type   = HALF_DATATYPE;
-        AType       = CUDA_R_16F;
-        BType       = CUDA_R_16F;
-        CType       = CUDA_R_16F;
-        DType       = CUDA_R_16F;
-        computeType = CUDA_R_16F;
-        // startAlgo   = (int)CUBLAS_GEMM_DEFAULT_TENSOR_OP;
-        // endAlgo     = (int)CUBLAS_GEMM_ALGO15_TENSOR_OP;
-        startAlgo   = (int)CUBLAS_GEMM_DEFAULT;
-        endAlgo     = (int)CUBLAS_GEMM_DEFAULT;
-    }
-#ifdef ENABLE_BF16
-    else if (std::is_same<T, __nv_bfloat16>::value) {
-        data_type   = BFLOAT16_DATATYPE;
-        AType       = CUDA_R_16BF;
-        BType       = CUDA_R_16BF;
-        CType       = CUDA_R_16BF;
-        DType       = CUDA_R_16BF;
-        computeType = CUDA_R_32F;
-        // startAlgo   = (int)CUBLAS_GEMM_DEFAULT_TENSOR_OP;
-        // endAlgo     = (int)CUBLAS_GEMM_ALGO15_TENSOR_OP;
-        startAlgo   = (int)CUBLAS_GEMM_DEFAULT;
-        endAlgo     = (int)CUBLAS_GEMM_DEFAULT;
-    }
-#endif
-#ifdef ENABLE_FP8
-    else if (std::is_same<T, __nv_fp8_e4m3>::value) {
-        data_type = FP8_DATATYPE;
-        AType     = CUDA_R_8F_E4M3;
-        BType     = CUDA_R_8F_E4M3;
-        CType     = CUDA_R_16BF;
-#ifdef FP8_GEMM_OUTPUT_QUANT_DISABLE
-        DType = CUDA_R_16BF
-#else
-        DType_FP8[0] = CUDA_R_8F_E4M3;
-        DType_FP8[1] = CUDA_R_16BF;
-        DType_FP8[2] = CUDA_R_8F_E4M3;
-        DType_FP8[3] = CUDA_R_16BF;
-        DType_FP8[4] = CUDA_R_16BF;
-        DType_FP8[5] = CUDA_R_16BF;
-#ifdef FP8_MHA
-        DType_FP8[6] = CUDA_R_8F_E4M3;
-#else
-        DType_FP8[6] = CUDA_R_16BF;
-#endif
-        DType_FP8[7] = CUDA_R_16BF;
-        DType_FP8[8] = CUDA_R_16BF;
-        DType_FP8[9] = CUDA_R_16BF;
-#endif
-            computeType = CUDA_R_32F;
-        // startAlgo       = (int)CUBLAS_GEMM_DEFAULT_TENSOR_OP;
-        // endAlgo         = (int)CUBLAS_GEMM_ALGO15_TENSOR_OP;
-        startAlgo   = (int)CUBLAS_GEMM_DEFAULT;
-        endAlgo     = (int)CUBLAS_GEMM_DEFAULT;
-    }
-#endif
-    // float alpha = (float)1.0f;
-    // float beta  = (float)0.0f;
-    float f_alpha = (float)1.0f;
-    float f_beta  = (float)0.0f;
-
-    half h_alpha = (half)(f_alpha);
-    half h_beta = (half)(f_beta);
-
-    int is_fp16_computeType = computeType == CUDA_R_16F ? 1 : 0;
-
-    const void* alpha = is_fp16_computeType ? reinterpret_cast<void*>(&h_alpha) : reinterpret_cast<void*>(&f_alpha);
-    const void* beta = is_fp16_computeType ? reinterpret_cast<void*>(&h_beta) : reinterpret_cast<void*>(&f_beta);
-
-    printf("***Encoder Gemm Testing Begin***\n");
-    printf("***Cublas Gemm Testing Begin***\n");
-    if (line_count == 0) {
-        fprintf(fd,
-                "batch_size, seq_len, head_num, size_per_head dataType ### batchCount, n, m, k, algoId, "
-                "customOption, tile, numSplitsK, swizzle, reductionScheme, workspaceSize, stages, "
-#if (CUBLAS_VER_MAJOR == 11 && CUBLAS_VER_MINOR == 11 && CUBLAS_VER_PATCH >= 3)
-                "inner_shapeId, cluster_shapeId, "
-#elif (CUBLAS_VER_MAJOR == 11 && CUBLAS_VER_MINOR == 11 && CUBLAS_VER_PATCH < 3)
-                "mma_shapeId, cga_shapeId, schedule_mode, "
-#endif
-                "exec_time\n");
-    }
-
-    for (int i = 0; i < gemm_num; ++i) {
-        // tuning of context gemm and logits gemm is not working yet
-        if (i <= 5 || i == 10) {
-            continue;
-        }
-        int seq_len = i <= 5 ? max_input_len : 1;
-
-        int m = M[i], n = N[i], k = K[i];
-        printf("\n-----------------------------\n");
-        printf("GEMM test %d: [M: %d, K: %d, N: %d] %s\n", i, m, k, n, mess[i]);
-        T* d_A = (T*)buffer;
-        T* d_B = d_A + m * k * batchCount[i];
-        T* d_C = d_B + k * n * batchCount[i];
-
-        float exec_time = 99999.0f;
-        int   fast_algo = 0;
-        for (int algo = startAlgo; algo <= endAlgo; algo++) {
-            cublasStatus_t status;
-            cudaDeviceSynchronize();
-            auto start = std::chrono::high_resolution_clock::now();
-            for (int ite = 0; ite < ites; ++ite) {
-                if (i == 1) {
-                    status = cublasGemmStridedBatchedEx(cublas_handle,
-                                                        CUBLAS_OP_T,
-                                                        CUBLAS_OP_N,
-                                                        max_input_len,
-                                                        max_input_len,
-                                                        size_per_head,
-                                                        &f_alpha,
-                                                        d_B,
-                                                        BType,
-                                                        size_per_head,
-                                                        max_input_len * size_per_head,
-                                                        d_A,
-                                                        AType,
-                                                        size_per_head,
-                                                        max_input_len * size_per_head,
-                                                        &f_beta,
-                                                        d_C,
-                                                        CUDA_R_32F,  // CType,
-                                                        max_input_len,
-                                                        max_input_len * max_input_len,
-                                                        batchCount[i],
-                                                        CUDA_R_32F,
-                                                        static_cast<cublasGemmAlgo_t>(algo));
-                }
-                else if (i == 2) {
-                    status = cublasGemmStridedBatchedEx(cublas_handle,
-                                                        CUBLAS_OP_N,
-                                                        CUBLAS_OP_N,
-                                                        size_per_head,
-                                                        max_input_len,
-                                                        max_input_len,
-                                                        &alpha,
-                                                        d_B,
-                                                        BType,
-                                                        size_per_head,
-                                                        max_input_len * size_per_head,
-                                                        d_A,
-                                                        AType,
-                                                        max_input_len,
-                                                        max_input_len * max_input_len,
-                                                        &beta,
-                                                        d_C,
-                                                        CType,
-                                                        size_per_head,
-                                                        max_input_len * size_per_head,
-                                                        batchCount[i],
-                                                        computeType,
-                                                        static_cast<cublasGemmAlgo_t>(algo));
-                }
-                else if (i == 10) {
-                    status = cublasGemmEx(cublas_handle,
-                                          CUBLAS_OP_T,
-                                          CUBLAS_OP_N,
-                                          n,
-                                          m,
-                                          k,
-                                          &alpha,
-                                          d_B,
-                                          BType,
-                                          k,
-                                          d_A,
-                                          AType,
-                                          k,
-                                          &beta,
-                                          d_C,
-                                          CType,
-                                          n,
-                                          computeType,
-                                          static_cast<cublasGemmAlgo_t>(algo));
-                }
-                else {
-                    status = cublasGemmEx(cublas_handle,
-                                          CUBLAS_OP_N,
-                                          CUBLAS_OP_N,
-                                          n,
-                                          m,
-                                          k,
-                                          &alpha,
-                                          d_B,
-                                          BType,
-                                          n,
-                                          d_A,
-                                          AType,
-                                          k,
-                                          &beta,
-                                          d_C,
-                                          CType,
-                                          n,
-                                          computeType,
-                                          static_cast<cublasGemmAlgo_t>(algo));
-                }
-
-                if (status != CUBLAS_STATUS_SUCCESS) {
-                    break;
-                }
-            }
-            cudaDeviceSynchronize();
-            auto end = std::chrono::high_resolution_clock::now();
-            auto dur = std::chrono::duration<float, std::milli>(end - start);
-            if (status == CUBLAS_STATUS_SUCCESS) {
-                printf("algo_%d costs %.3fms \n", algo, dur.count() / ites);
-                if (dur.count() / ites < exec_time) {
-                    exec_time = dur.count() / ites;
-                    fast_algo = algo;
-                }
-            }
-            sync_check_cuda_error();
-        }
-
-        printf("fast_algo %d costs %.3f ms\n", fast_algo, exec_time);
-
-        // for fp16 and bf16, we compare cublasLt
-        // for fp8, compare cublaslt for all gemm kernels
-        if ((data_type != FLOAT_DATATYPE && i != 1 && i != 2 && i != 10) || data_type == FP8_DATATYPE) {
-            printf("***cublasLt Gemm Testing Beign***\n");
-            // Let try a fixed number of combinations
-            const int          ALGO_COMBINATIONS = 10000;
-            customMatmulPerf_t perfResults[ALGO_COMBINATIONS];
-
-            // for gpt, computeType & scaleType should be FP32
-            // LtHgemmCustomFind<T, float>(ltHandle,
-            //                             batch_size * beam_width,
-            //                             i == 1 || i == 2 ? max_input_len : 1,
-            //                             head_num,
-            //                             size_per_head,
-            //                             n,
-            //                             m,
-            //                             k,
-            //                             &alpha,
-            //                             d_B,
-            //                             d_A,
-            //                             &beta,
-            //                             d_C,
-            //                             cublas_workspace,
-            //                             workSpaceSize,
-            //                             fd,
-            //                             perfResults,
-            //                             ALGO_COMBINATIONS,
-            //                             DType_FP8[i],
-            //                             batchCount[i],
-            //                             strideA[i],
-            //                             strideB[i],
-            //                             strideD[i]);
-            // if (perfResults[0].time < exec_time) {
-            //     printPerfStructure(batch_size * beam_width,
-            //                        seq_len,
-            //                        head_num,
-            //                        size_per_head,
-            //                        n,
-            //                        m,
-            //                        k,
-            //                        perfResults[0],
-            //                        fd,
-            //                        data_type,
-            //                        0,
-            //                        batchCount[i]);
-            // }
-            // else {
-            {
-                fprintf(fd,
-                        "%d %d %d %d %d ### %d %d %d %d %d -1 -1 -1 -1 -1 -1 -1 "
-#if (CUBLAS_VER_MAJOR == 11 && CUBLAS_VER_MINOR == 11 && CUBLAS_VER_PATCH >= 3)
-                        "-1 -1 "
-#elif (CUBLAS_VER_MAJOR == 11 && CUBLAS_VER_MINOR == 11 && CUBLAS_VER_PATCH < 3)
-                        "-1 -1 -1 "
-#endif
-                        "%f\n",
-                        batch_size * beam_width,
-                        seq_len,
-                        head_num,
-                        size_per_head,
-                        data_type,
-                        batchCount[i],
-                        n,
-                        m,
-                        k,
-                        fast_algo,
-                        exec_time);
-            }
-            printf("***cublasLt Gemm Testing End***\n");
-        }
-        else {
-            fprintf(fd,
-                    "%d %d %d %d %d ### %d %d %d %d %d -1 -1 -1 -1 -1 -1 -1 "
-#if (CUBLAS_VER_MAJOR == 11 && CUBLAS_VER_MINOR == 11 && CUBLAS_VER_PATCH >= 3)
-                    "-1 -1 "
-#elif (CUBLAS_VER_MAJOR == 11 && CUBLAS_VER_MINOR == 11 && CUBLAS_VER_PATCH < 3)
-                    "-1 -1 -1 "
-#endif
-                    "%f\n",
-                    batch_size * beam_width,
-                    seq_len,
-                    head_num,
-                    size_per_head,
-                    data_type,
-                    batchCount[i],
-                    n,
-                    m,
-                    k,
-                    fast_algo,
-                    exec_time);
-        }
-        sync_check_cuda_error();
-        exec_times[i] = exec_time;
-    }
-    printf("***cublas Gemm Testing End***\n\n");
-    fclose(fd);
-
-#ifdef SPARSITY_ENABLED
-    bool do_sparse_test = false;
-    if (prop.major == 8 && (prop.minor == 0 || prop.minor == 6) && sizeof(T) == sizeof(half)) {
-        do_sparse_test = true;
-    }
-    if (do_sparse_test) {
-        printf("***cusparseLt Gemm Testing Begin***\n");
-        // Only first 8 cases can be sparse
-        // - QKV kernel, Projection, FC1, FC2 in context or decoding.
-        const int spgemm_num = 8;
-        if (!isAppend) {
-            fd = fopen(SPGEMM_CONFIG, "w+");
-        }
-        else {
-            fd = fopen(SPGEMM_CONFIG, "a+");
-            std::vector<std::string> config;
-            char                     line[1024];
-            while (fgets(line, 1024, fd) != NULL) {
-                config.push_back(std::string(line));
-            }
-            line_count = config.size();
-            // gemm_num configs (cublas/cublasLt), first row is not included
-            if (config.size() >= (MAX_CONFIG_NUM * spgemm_num + 1)) {
-                int startIdx = config.size() - ((MAX_CONFIG_NUM - 1) * spgemm_num);
-                fclose(fd);
-                fd = fopen(SPGEMM_CONFIG, "w+");
-                fprintf(fd, "%s", config[0].c_str());
-                for (uint i = startIdx; i < config.size(); i++) {
-                    fprintf(fd, "%s", config[i].c_str());
-                }
-                line_count = config.size() - (spgemm_num + 3);
-            }
-        }
-        if (line_count == 0) {
-            // header line
-            fprintf(fd,
-                    "batch_size, seq_len, head_num, size_per_head dataType "
-                    "### batchCount, m, n, k, algoId, exec_time\n");
-        }
-
-        cusparseLtHandle_t handle;
-        CHECK_CUSPARSE(cusparseLtInit(&handle));
-        cusparseOrder_t     order = CUSPARSE_ORDER_COL;
-        cusparseOperation_t opA   = CUSPARSE_OPERATION_NON_TRANSPOSE;
-        cusparseOperation_t opB   = CUSPARSE_OPERATION_NON_TRANSPOSE;
-        // let's make this optional
-        cusparseComputeType compute_type = CUSPARSE_COMPUTE_16F;
-        unsigned            alignment    = 16;
-        cudaStream_t        stream       = 0;
-        float               alpha2       = 1.0f;
-        float               beta2        = 0.0f;
-        for (int i = 0; i < gemm_num; ++i) {
-            // skip qk or attn or logit gemms.
-            if (i == 1 || i == 2 || i == 10) {
-                continue;
-            }
-
-            // seq_len is always 1 except context gemms.
-            int seq_len = i <= 5 ? max_input_len : 1;
-
-            // to be compatible with spgemm wrapper, we let A be the weight matrix
-            // so m and n are swapped
-            // A: mxk B: kxn C:mxn
-            int m = N[i], n = M[i], k = K[i];
-            printf("\n-----------------------------\n");
-            printf("GEMM test %d: [M: %d, K: %d, N: %d]\n", i, m, k, n);
-
-            if (n % 8 != 0) {
-                n = div_up(n, 8) * 8;  // pad n to be multiple of 8 as FT does.
-            }
-
-            T* d_A = (T*)buffer;
-            T* d_B = d_A + m * k * batchCount[i];
-            T* d_C = d_B + k * n * batchCount[i];
-            T* dA_compressed;
-            {
-                cusparseLtMatDescriptor_t mat_A;
-                CHECK_CUSPARSE(cusparseLtStructuredDescriptorInit(
-                    &handle, &mat_A, m, k, m, alignment, CUDA_R_16F, order, CUSPARSELT_SPARSITY_50_PERCENT))
-                CHECK_CUSPARSE(
-                    cusparseLtSpMMAPrune2(&handle, &mat_A, true, opA, d_A, d_A, CUSPARSELT_PRUNE_SPMMA_STRIP, stream))
-                size_t compressed_size;
-                CHECK_CUSPARSE(cusparseLtSpMMACompressedSize2(&handle, &mat_A, &compressed_size))
-                check_cuda_error(cudaMalloc((void**)&dA_compressed, compressed_size));
-                CHECK_CUSPARSE(cusparseLtSpMMACompress2(&handle, &mat_A, true, opA, d_A, dA_compressed, stream))
-            }
-
-            float exec_time = 99999.0f;
-            int   fast_algo = 0;
-            if (isSparseGemmAvailable(m, n, k)) {
-                for (int alg = 0; alg < 4; ++alg) {
-                    cudaDeviceSynchronize();
-                    cusparseLtMatDescriptor_t mat_A, mat_B, mat_C;
-                    void*                     d_workspace = nullptr;
-                    int                       num_streams = 1;
-                    cudaStream_t              streams[1]  = {stream};
-                    CHECK_CUSPARSE(cusparseLtStructuredDescriptorInit(
-                        &handle, &mat_A, m, k, m, alignment, CUDA_R_16F, order, CUSPARSELT_SPARSITY_50_PERCENT))
-                    CHECK_CUSPARSE(
-                        cusparseLtDenseDescriptorInit(&handle, &mat_B, k, n, k, alignment, CUDA_R_16F, order))
-                    CHECK_CUSPARSE(
-                        cusparseLtDenseDescriptorInit(&handle, &mat_C, m, n, m, alignment, CUDA_R_16F, order))
-                    cudaDeviceSynchronize();
-                    auto start = std::chrono::high_resolution_clock::now();
-                    for (int ite = 0; ite < ites; ++ite) {
-                        // initializing MatDesc takes a lot of time
-                        // and these descs can be stored to other place
-                        // whereas storing MatMulPlan to other place will cause errors
-                        cusparseLtMatmulDescriptor_t   matmul;
-                        cusparseLtMatmulAlgSelection_t alg_sel;
-                        cusparseLtMatmulPlan_t         plan;
-                        CHECK_CUSPARSE(cusparseLtMatmulDescriptorInit(
-                            &handle, &matmul, opA, opB, &mat_A, &mat_B, &mat_C, &mat_C, compute_type))
-                        CHECK_CUSPARSE(
-                            cusparseLtMatmulAlgSelectionInit(&handle, &alg_sel, &matmul, CUSPARSELT_MATMUL_ALG_DEFAULT))
-                        CHECK_CUSPARSE(cusparseLtMatmulAlgSetAttribute(
-                            &handle, &alg_sel, CUSPARSELT_MATMUL_ALG_CONFIG_ID, &alg, sizeof(alg)))
-                        size_t workspace_size;
-                        CHECK_CUSPARSE(cusparseLtMatmulGetWorkspace(&handle, &alg_sel, &workspace_size))
-                        CHECK_CUSPARSE(cusparseLtMatmulPlanInit(&handle, &plan, &matmul, &alg_sel, workspace_size))
-                        CHECK_CUSPARSE(cusparseLtMatmul(&handle,
-                                                        &plan,
-                                                        &alpha2,
-                                                        dA_compressed,
-                                                        d_B,
-                                                        &beta2,
-                                                        d_C,
-                                                        d_C,
-                                                        d_workspace,
-                                                        streams,
-                                                        num_streams))
-                        CHECK_CUSPARSE(cusparseLtMatmulPlanDestroy(&plan))
-                    }
-                    cudaDeviceSynchronize();
-                    auto end = std::chrono::high_resolution_clock::now();
-                    auto dur = std::chrono::duration<float, std::milli>(end - start);
-                    printf("algo_%d costs %.3fms \n", alg, dur.count() / ites);
-                    if (dur.count() < exec_time) {
-                        exec_time = dur.count();
-                        fast_algo = alg;
-                    }
-                }
-            }
-            exec_time /= ites;
-            if (exec_time >= exec_times[i]) {
-                fast_algo = -1;
-            }
-            printf("fast_algo %d\n", fast_algo);
-            fprintf(fd,
-                    "%d %d %d %d %d ### %d %d %d %d %d %f\n",
-                    batch_size * beam_width,
-                    seq_len,
-                    head_num,
-                    size_per_head,
-                    data_type,
-                    batchCount[i],
-                    m,
-                    n,
-                    k,
-                    fast_algo,
-                    exec_time);
-            cudaFree(dA_compressed);
-        }
-        CHECK_CUSPARSE(cusparseLtDestroy(&handle))
-        fclose(fd);
-        printf("***cusparseLt Gemm Testing End***\n");
-    }
-#endif
-
-    printf("***GPT Gemm Testing End***\n");
-    return;
-}
-
-template void generate_gpt_gemm_config<float>(int   batch_size,
-                                              int   beam_width,
-                                              int   max_input_len,
-                                              int   head_num,
-                                              int   size_per_head,
-                                              int   inter_size,
-                                              int   vocab_size,
-                                              int   tensor_para_size,
-                                              void* buffer_in,
-                                              bool  isAppend);
-
-template void generate_gpt_gemm_config<half>(int   batch_size,
-                                             int   beam_width,
-                                             int   max_input_len,
-                                             int   head_num,
-                                             int   size_per_head,
-                                             int   inter_size,
-                                             int   vocab_size,
-                                             int   tensor_para_size,
-                                             void* buffer_in,
-                                             bool  isAppend);
-
-#ifdef ENABLE_BF16
-template void generate_gpt_gemm_config<__nv_bfloat16>(int   batch_size,
-                                                      int   beam_width,
-                                                      int   max_input_len,
-                                                      int   head_num,
-                                                      int   size_per_head,
-                                                      int   inter_size,
-                                                      int   vocab_size,
-                                                      int   tensor_para_size,
-                                                      void* buffer_in,
-                                                      bool  isAppend);
-#endif
-
-#ifdef ENABLE_FP8
-template void generate_gpt_gemm_config<__nv_fp8_e4m3>(int   batch_size,
-                                                      int   beam_width,
-                                                      int   max_input_len,
-                                                      int   head_num,
-                                                      int   size_per_head,
-                                                      int   inter_size,
-                                                      int   vocab_size,
-                                                      int   tensor_para_size,
-                                                      void* buffer_in,
-                                                      bool  isAppend);
-#endif
-
-size_t calGptGemmTestBufSizeInByte(int            batch_size,
-                                   int            beam_width,
-                                   int            max_input_len,
-                                   int            head_num,
-                                   int            size_per_head,
-                                   int            inter_size,
-                                   int            vocab_size,
-                                   int            tensor_para_size,
-                                   CublasDataType data_type)
-{
-    size_t       buf_size_in_byte   = 0;
-    const size_t hidden_units       = head_num * size_per_head;
-    const size_t local_head_num     = head_num / tensor_para_size;
-    const size_t local_hidden_units = local_head_num * size_per_head;
-
-    // int wordSize = (data_type == FLOAT_DATATYPE ? sizeof(float) : sizeof(half));
-    // Because we always use float for some buffer, set the wordSize to float directly.
-    int wordSize = sizeof(float);
-
-    size_t              m = batch_size * beam_width * max_input_len;
-    std::vector<size_t> buff_size;
-    // for context qkv gemm
-    buff_size.push_back(m * hidden_units + hidden_units * 3 * local_hidden_units + m * 3 * local_hidden_units);
-    // for context batch gemm
-    buff_size.push_back(m * local_hidden_units + m * local_hidden_units
-                        + batch_size * beam_width * head_num * max_input_len * max_input_len);
-    // for context ffn gemm
-    buff_size.push_back(m * inter_size / tensor_para_size + hidden_units * inter_size / tensor_para_size
-                        + m * hidden_units);
-    // for vocab
-    buff_size.push_back(m * hidden_units + hidden_units * ceil(vocab_size / 8.) * 8 / tensor_para_size
-                        + m * ceil(vocab_size / 8.) * 8 / tensor_para_size);
-
-    for (auto t : buff_size) {
-        buf_size_in_byte = buf_size_in_byte > t ? buf_size_in_byte : t;
-    }
-    buf_size_in_byte *= wordSize;
-    buf_size_in_byte += ((data_type == HALF_DATATYPE || data_type == BFLOAT16_DATATYPE || data_type == FP8_DATATYPE) ?
-                             CUBLAS_WORKSPACE_SIZE :
-                             0);
-
-    return buf_size_in_byte;
-}
-
-}  // namespace turbomind
diff --git a/src/turbomind/utils/gemm_test/gpt_gemm_func.h b/src/turbomind/utils/gemm_test/gpt_gemm_func.h
deleted file mode 100644
index bcbe131d8be9134ed96efe22b193f41e4b58f80a..0000000000000000000000000000000000000000
--- a/src/turbomind/utils/gemm_test/gpt_gemm_func.h
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- * Copyright (c) 2020-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "src/turbomind/utils/cublasAlgoMap.h"
-#include "src/turbomind/utils/cuda_bf16_wrapper.h"
-#include "src/turbomind/utils/cuda_utils.h"
-#include "src/turbomind/utils/gemm_test/gemm_func.h"
-
-#include <cstdio>
-#include <cstdlib>
-#include <ctime>
-#ifdef ENABLE_BF16
-#include <cuda_fp16.h>
-#endif
-#ifdef ENABLE_FP8
-#include <cuda_fp8.h>
-#endif
-#include <cuda_profiler_api.h>
-#include <map>
-#ifdef __linux__
-#include <sys/time.h>
-#include <unistd.h>
-#endif
-#include <vector>
-
-namespace turbomind {
-
-template<typename T>
-void generate_gpt_gemm_config(int   batch_size,
-                              int   beam_width,
-                              int   seq_len,
-                              int   head_num,
-                              int   size_per_head,
-                              int   inter_size,
-                              int   vocab_size,
-                              int   tensor_para_size,
-                              void* buffer_in,
-                              bool  isAppend);
-
-size_t calGptGemmTestBufSizeInByte(int            batch_size,
-                                   int            beam_width,
-                                   int            max_input_len,
-                                   int            head_num,
-                                   int            size_per_head,
-                                   int            inter_size,
-                                   int            vocab_size,
-                                   int            tensor_para_size,
-                                   CublasDataType data_type);
-
-}  // namespace turbomind
diff --git a/src/turbomind/utils/gemm_test/swin_gemm_func.cc b/src/turbomind/utils/gemm_test/swin_gemm_func.cc
deleted file mode 100644
index e2051ab7cada8c571b533a37e667a9c069c1580a..0000000000000000000000000000000000000000
--- a/src/turbomind/utils/gemm_test/swin_gemm_func.cc
+++ /dev/null
@@ -1,410 +0,0 @@
-/*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "src/turbomind/utils/gemm_test/swin_gemm_func.h"
-#include "src/turbomind/macro.h"
-#include <chrono>
-
-namespace turbomind {
-
-template<typename T>
-void generate_swin_gemm_config(
-    int batch_size, int seq_len, int head_num, int size_per_head, void* buffer_in, bool isAppend)
-{
-    void* cublas_workspace;
-    void* buffer;
-    int   workSpaceSize;
-#ifdef ENABLE_BF16
-    if (std::is_same<T, half>::value || std::is_same<T, __nv_bfloat16>::value) {
-#else
-    if (std::is_same<T, half>::value) {
-#endif  // ENABLE_BF16
-        // cublas_workspace_ should be the start pointer of cudaMalloc()
-        // to ensure 16B alignemnet
-        cublas_workspace = buffer_in;
-        buffer           = (void*)((char*)cublas_workspace + CUBLAS_WORKSPACE_SIZE);
-        workSpaceSize    = CUBLAS_WORKSPACE_SIZE;
-    }
-    else {
-        cublas_workspace = nullptr;
-        buffer           = buffer_in;
-        workSpaceSize    = 0;
-    }
-
-    struct cudaDeviceProp prop;
-    check_cuda_error(cudaGetDeviceProperties(&prop, 0));
-    printf("Device %s\n", prop.name);
-
-    // check config
-    FILE* fd;
-    int   line_count = 0;
-    if (!isAppend) {
-        fd = fopen(GEMM_CONFIG, "w+");
-        fprintf(
-            fd,
-            "batch_size seq_len head_num size_per_head dataType ### batchCount n m k algoId customOption tile splitK_val swizzle reductionScheme workspaceSize stages exec_time\n");
-    }
-    else {
-        fd = fopen(GEMM_CONFIG, "a+");
-        std::vector<std::string> config;
-        char                     line[1024];
-        while (fgets(line, 1024, fd) != NULL) {
-            config.push_back(std::string(line));
-        }
-        line_count = config.size();
-        if (config.size() >= (MAX_CONFIG_NUM * GEMM_NUM + 1))  // 6 cublas/cublasLt, first row is not included
-        {
-            int startIdx = config.size() - ((MAX_CONFIG_NUM - 1) * GEMM_NUM);
-            fclose(fd);
-            fd = fopen(GEMM_CONFIG, "w+");
-            fprintf(fd, "%s", config[0].c_str());
-            for (uint i = startIdx; i < config.size(); i++) {
-                fprintf(fd, "%s", config[i].c_str());
-            }
-            line_count = config.size() - (GEMM_NUM + 3);
-        }
-    }
-
-    const int gemm_num            = 7;
-    const int NUM_OF_BASIC_LAYERS = 4;
-    int       M[gemm_num];
-    int       N[gemm_num];
-    int       K[gemm_num];
-    int       batchCount[gemm_num] = {1, 1, 1, 1, 1, 1, 1};
-    char      mess[gemm_num][256];
-    float     exec_times[gemm_num];
-
-    printf("***Encoder Gemm Testing Begin***\n");
-    printf("***Cublas Gemm Testing Begin***\n");
-    for (int basic_layer = 0; basic_layer < NUM_OF_BASIC_LAYERS; basic_layer++) {
-        // gemm1
-        M[0] = batch_size * seq_len;
-        K[0] = head_num * size_per_head;
-        N[0] = 3 * K[0];
-        strcpy(mess[0], "from_tensor * weightQ/K/V");
-
-        // gemm2
-        M[1] = M[0];
-        K[1] = K[0];
-        N[1] = K[0];
-        strcpy(mess[1], "attr * output_kernel");
-
-        // gemm3
-        M[2] = M[0];
-        K[2] = K[0];
-        N[2] = 4 * K[0];
-        strcpy(mess[2], "attr_output * inter_kernel");
-
-        // gemm3
-        M[3] = M[0];
-        K[3] = 4 * K[0];
-        N[3] = K[0];
-        strcpy(mess[3], "inter_matmul * output_kernel");
-
-        M[4] = M[0] / 4;
-        K[4] = 4 * K[0];
-        N[4] = 2 * K[0];
-        strcpy(mess[4], "patchMerge gemm");
-
-        M[5]          = seq_len;
-        N[5]          = seq_len;
-        K[5]          = size_per_head;
-        batchCount[5] = batch_size * head_num;
-        strcpy(mess[5], "attention batched Gemm1");
-
-        M[6]          = seq_len;
-        N[6]          = size_per_head;
-        K[6]          = seq_len;
-        batchCount[6] = batch_size * head_num;
-        strcpy(mess[6], "attention batched Gemm2");
-
-        cublasHandle_t cublas_handle;
-        check_cuda_error(cublasCreate(&cublas_handle));
-        // cublasLtHandle_t ltHandle;
-        // check_cuda_error(cublasLtCreate(&ltHandle));
-
-        cudaDataType_t AType;
-        cudaDataType_t BType;
-        cudaDataType_t CType;
-        cudaDataType_t computeType;
-        int            startAlgo, endAlgo;
-        const int      ites = 100;
-
-        CublasDataType data_type;
-        if (std::is_same<T, float>::value) {
-            data_type   = FLOAT_DATATYPE;
-            AType       = CUDA_R_32F;
-            BType       = CUDA_R_32F;
-            CType       = CUDA_R_32F;
-            computeType = CUDA_R_32F;
-            startAlgo   = (int)CUBLAS_GEMM_DEFAULT;
-            // endAlgo     = (int)CUBLAS_GEMM_ALGO23;
-            endAlgo     = (int)CUBLAS_GEMM_DEFAULT;
-        }
-        else if (std::is_same<T, half>::value) {
-            data_type   = HALF_DATATYPE;
-            AType       = CUDA_R_16F;
-            BType       = CUDA_R_16F;
-            CType       = CUDA_R_16F;
-            computeType = CUDA_R_16F;
-            // startAlgo   = (int)CUBLAS_GEMM_DEFAULT_TENSOR_OP;
-            // endAlgo     = (int)CUBLAS_GEMM_ALGO15_TENSOR_OP;
-            startAlgo   = (int)CUBLAS_GEMM_DEFAULT;
-            endAlgo     = (int)CUBLAS_GEMM_DEFAULT;
-        }
-#ifdef ENABLE_BF16
-        else if (std::is_same<T, __nv_bfloat16>::value) {
-            data_type   = BFLOAT16_DATATYPE;
-            AType       = CUDA_R_16BF;
-            BType       = CUDA_R_16BF;
-            CType       = CUDA_R_16BF;
-            computeType = CUDA_R_32F;
-            // startAlgo   = (int)CUBLAS_GEMM_DEFAULT_TENSOR_OP;
-            // endAlgo     = (int)CUBLAS_GEMM_ALGO15_TENSOR_OP;
-            startAlgo   = (int)CUBLAS_GEMM_DEFAULT;
-            endAlgo     = (int)CUBLAS_GEMM_DEFAULT;
-        }
-#endif
-        // using scaleT = typename ScaleTypeConverter<T, false>::Type;
-        using scaleT = typename ScaleTypeConverter<T, true>::Type;
-
-        scaleT alpha = (scaleT)1.0f;
-        scaleT beta  = (scaleT)0.0f;
-
-        for (int i = 0; i < gemm_num; ++i) {
-            // if(i != 0 && i != 5) continue;
-
-            int m = M[i], n = N[i], k = K[i];
-            printf("\n-----------------------------\n");
-            printf("GEMM test %d: [M: %d, K: %d, N: %d] %s\n", i, m, k, n, mess[i]);
-            T* d_A = (T*)buffer;
-            T* d_B = d_A + m * k * batchCount[i];
-            T* d_C = d_B + k * n * batchCount[i];
-
-            // array of pointer for batchedGemm
-            T* harray[12];
-            harray[0]  = (T*)buffer;
-            harray[1]  = (T*)((char*)buffer + sizeof(T) * m * k);
-            harray[2]  = (T*)((char*)buffer + 2 * sizeof(T) * m * k);
-            harray[4]  = (T*)((char*)buffer + 3 * sizeof(T) * m * k);
-            harray[5]  = (T*)((char*)buffer + 3 * sizeof(T) * m * k + sizeof(T) * k * n);
-            harray[6]  = (T*)((char*)buffer + 3 * sizeof(T) * m * k + 2 * sizeof(T) * k * n);
-            harray[8]  = (T*)((char*)buffer + 3 * sizeof(T) * m * k + 3 * sizeof(T) * k * n);
-            harray[9]  = (T*)((char*)buffer + 3 * sizeof(T) * m * k + 3 * sizeof(T) * k * n + sizeof(T) * m * n);
-            harray[10] = (T*)((char*)buffer + 3 * sizeof(T) * m * k + 3 * sizeof(T) * k * n + 2 * sizeof(T) * m * n);
-
-            T** darray = 0;
-            check_cuda_error(cudaMalloc((void**)&darray, sizeof(T*) * 12));
-            cudaMemcpy((void*)darray, (void*)harray, sizeof(T*) * 12, cudaMemcpyHostToDevice);
-            T** dAarray = darray;
-            T** dBarray = darray + 4;
-            T** dCarray = darray + 8;
-
-            float exec_time = 99999.0f;
-            int   fast_algo = 0;
-            for (int algo = startAlgo; algo <= endAlgo; algo++) {
-                cublasStatus_t status;
-                cudaDeviceSynchronize();
-                auto start = std::chrono::high_resolution_clock::now();
-                for (int ite = 0; ite < ites; ++ite) {
-                    if (i < 5) {
-                        status = cublasGemmEx(cublas_handle,
-                                              CUBLAS_OP_N,
-                                              CUBLAS_OP_N,
-                                              n,
-                                              m,
-                                              k,
-                                              &alpha,
-                                              d_B,
-                                              BType,
-                                              n,
-                                              d_A,
-                                              AType,
-                                              k,
-                                              &beta,
-                                              d_C,
-                                              CType,
-                                              n,
-                                              computeType,
-                                              static_cast<cublasGemmAlgo_t>(algo));
-                    }
-                    else if (i == 5) {
-                        status = cublasGemmStridedBatchedEx(cublas_handle,
-                                                            CUBLAS_OP_T,
-                                                            CUBLAS_OP_N,
-                                                            seq_len,
-                                                            seq_len,
-                                                            size_per_head,
-                                                            &alpha,
-                                                            d_B,
-                                                            BType,
-                                                            size_per_head,
-                                                            seq_len * size_per_head,
-                                                            d_A,
-                                                            AType,
-                                                            size_per_head,
-                                                            seq_len * size_per_head,
-                                                            &beta,
-                                                            d_C,
-                                                            CType,
-                                                            seq_len,
-                                                            seq_len * seq_len,
-                                                            batch_size * head_num,
-                                                            computeType,
-                                                            static_cast<cublasGemmAlgo_t>(algo));
-                    }
-                    else if (i == 6) {
-                        status = cublasGemmStridedBatchedEx(cublas_handle,
-                                                            CUBLAS_OP_N,
-                                                            CUBLAS_OP_N,
-                                                            size_per_head,
-                                                            seq_len,
-                                                            seq_len,
-                                                            &alpha,
-                                                            d_B,
-                                                            BType,
-                                                            size_per_head,
-                                                            seq_len * size_per_head,
-                                                            d_A,
-                                                            AType,
-                                                            seq_len,
-                                                            seq_len * seq_len,
-                                                            &beta,
-                                                            d_C,
-                                                            CType,
-                                                            size_per_head,
-                                                            seq_len * size_per_head,
-                                                            batch_size * head_num,
-                                                            computeType,
-                                                            static_cast<cublasGemmAlgo_t>(algo));
-                    }
-                    if (status != CUBLAS_STATUS_SUCCESS) {
-                        break;
-                    }
-                }
-                cudaDeviceSynchronize();
-                auto end = std::chrono::high_resolution_clock::now();
-                auto dur = std::chrono::duration<float, std::milli>(end - start);
-                if (status == CUBLAS_STATUS_SUCCESS) {
-                    printf("algo_%d costs %.3fms \n", algo, dur.count() / ites);
-                    if (dur.count() / ites < exec_time) {
-                        exec_time = dur.count() / ites;
-                        fast_algo = algo;
-                    }
-                }
-            }
-            printf("fast_algo %d costs %.3f ms\n", fast_algo, exec_time);
-
-            // for fp16 and bf16, we compare cublasLt
-            if (i < 5 && data_type != FLOAT_DATATYPE) {
-                printf("***cublasLt Gemm Testing Begin***\n");
-                // Let try a fixed number of combinations
-                const int          ALGO_COMBINATIONS = 5000;
-                customMatmulPerf_t perfResults[ALGO_COMBINATIONS];
-
-                // LtHgemmCustomFind<T, scaleT>(ltHandle,
-                //                              batch_size,
-                //                              seq_len,
-                //                              head_num,
-                //                              size_per_head,
-                //                              n,
-                //                              m,
-                //                              k,
-                //                              &alpha,
-                //                              d_B,
-                //                              d_A,
-                //                              &beta,
-                //                              d_C,
-                //                              cublas_workspace,
-                //                              workSpaceSize,
-                //                              fd,
-                //                              perfResults,
-                //                              ALGO_COMBINATIONS);
-                // if (perfResults[0].time < exec_time) {
-                //     printPerfStructure(
-                //         batch_size, seq_len, head_num, size_per_head, n, m, k, perfResults[0], fd, data_type, 0);
-                //     exec_time = perfResults[0].time;
-                // }
-                // else {
-                {
-                    fprintf(fd,
-                            "%d %d %d %d %d ### %d %d %d %d %d -1 -1 -1 -1 -1 -1 -1 "
-#if (CUBLAS_VER_MAJOR == 11 && CUBLAS_VER_MINOR == 11 && CUBLAS_VER_PATCH >= 3)
-                            "-1 -1 "
-#elif (CUBLAS_VER_MAJOR == 11 && CUBLAS_VER_MINOR == 11 && CUBLAS_VER_PATCH < 3)
-                            "-1 -1 -1 "
-#endif
-                            "%f\n",
-                            batch_size,
-                            seq_len,
-                            head_num,
-                            size_per_head,
-                            data_type,
-                            batchCount[i],
-                            n,
-                            m,
-                            k,
-                            fast_algo,
-                            exec_time);
-                }
-                printf("***cublasLt Gemm Testing End***\n");
-            }
-            else {
-                fprintf(fd,
-                        "%d %d %d %d %d ### %d %d %d %d %d -1 -1 -1 -1 -1 -1 -1 "
-#if (CUBLAS_VER_MAJOR == 11 && CUBLAS_VER_MINOR == 11 && CUBLAS_VER_PATCH >= 3)
-                        "-1 -1 "
-#elif (CUBLAS_VER_MAJOR == 11 && CUBLAS_VER_MINOR == 11 && CUBLAS_VER_PATCH < 3)
-                        "-1 -1 -1 "
-#endif
-                        "%f\n",
-                        batch_size,
-                        seq_len,
-                        head_num,
-                        size_per_head,
-                        data_type,
-                        batchCount[i],
-                        n,
-                        m,
-                        k,
-                        fast_algo,
-                        exec_time);
-            }
-            exec_times[i] = exec_time;
-            cudaFree(darray);
-        }
-
-        if (basic_layer != NUM_OF_BASIC_LAYERS - 1) {
-            batch_size = batch_size / 4;
-            head_num   = head_num * 2;
-        }
-    }
-    printf("***cublas Gemm Testing End***\n\n");
-    fclose(fd);
-    printf("***Encoder Gemm Testing End***\n");
-    return;
-}
-
-template void generate_swin_gemm_config<float>(
-    int batch_size, int seq_len, int head_num, int size_per_head, void* buffer, bool isAppend);
-template void generate_swin_gemm_config<half>(
-    int batch_size, int seq_len, int head_num, int size_per_head, void* buffer, bool isAppend);
-#ifdef ENABLE_BF16
-template void generate_swin_gemm_config<__nv_bfloat16>(
-    int batch_size, int seq_len, int head_num, int size_per_head, void* buffer, bool isAppend);
-#endif
-
-}  // namespace turbomind
diff --git a/src/turbomind/utils/gemm_test/swin_gemm_func.h b/src/turbomind/utils/gemm_test/swin_gemm_func.h
deleted file mode 100644
index 815da7b19792de59c0f7ce97ff06b94bb24d2a5c..0000000000000000000000000000000000000000
--- a/src/turbomind/utils/gemm_test/swin_gemm_func.h
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * Copyright (c) 2020-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "src/turbomind/utils/cublasAlgoMap.h"
-#include "src/turbomind/utils/cuda_bf16_wrapper.h"
-#include "src/turbomind/utils/cuda_utils.h"
-#include "src/turbomind/utils/gemm_test/gemm_func.h"
-
-#include <cstdio>
-#include <cstdlib>
-#include <ctime>
-#include <cuda_fp16.h>
-#include <cuda_profiler_api.h>
-#include <map>
-#ifdef __linux__
-#include <sys/time.h>
-#include <unistd.h>
-#endif
-#include <vector>
-
-namespace turbomind {
-
-template<typename T>
-void generate_swin_gemm_config(
-    int batch_size, int seq_len, int head_num, int size_per_head, void* buffer, bool isAppend = true);
-
-}  // namespace turbomind
diff --git a/src/turbomind/utils/gemm_test/swin_igemm_func.cc b/src/turbomind/utils/gemm_test/swin_igemm_func.cc
deleted file mode 100644
index 6fccf62e831930c5c9b591d29bd4e1a43a1137d7..0000000000000000000000000000000000000000
--- a/src/turbomind/utils/gemm_test/swin_igemm_func.cc
+++ /dev/null
@@ -1,282 +0,0 @@
-/*
- * Copyright (c) 2020-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "swin_igemm_func.h"
-#include <chrono>
-
-namespace turbomind {
-
-static const char* showStatus(cublasStatus_t error)
-{
-    switch (error) {
-        case CUBLAS_STATUS_SUCCESS:
-            return "CUBLAS_STATUS_SUCCESS";
-
-        case CUBLAS_STATUS_NOT_INITIALIZED:
-            return "CUBLAS_STATUS_NOT_INITIALIZED";
-
-        case CUBLAS_STATUS_ALLOC_FAILED:
-            return "CUBLAS_STATUS_ALLOC_FAILED";
-
-        case CUBLAS_STATUS_INVALID_VALUE:
-            return "CUBLAS_STATUS_INVALID_VALUE";
-
-        case CUBLAS_STATUS_ARCH_MISMATCH:
-            return "CUBLAS_STATUS_ARCH_MISMATCH";
-
-        case CUBLAS_STATUS_MAPPING_ERROR:
-            return "CUBLAS_STATUS_MAPPING_ERROR";
-
-        case CUBLAS_STATUS_EXECUTION_FAILED:
-            return "CUBLAS_STATUS_EXECUTION_FAILED";
-
-        case CUBLAS_STATUS_INTERNAL_ERROR:
-            return "CUBLAS_STATUS_INTERNAL_ERROR";
-
-        case CUBLAS_STATUS_NOT_SUPPORTED:
-            return "CUBLAS_STATUS_NOT_SUPPORTED";
-
-        case CUBLAS_STATUS_LICENSE_ERROR:
-            return "CUBLAS_STATUS_LICENSE_ERROR";
-    }
-
-    return "<unknown>";
-}
-
-static inline bool time_compare(const customMatmulPerf_t& perf_a, const customMatmulPerf_t& perf_b)
-{
-    return ((perf_a.status == CUBLAS_STATUS_SUCCESS) && (perf_a.time < perf_b.time));
-}
-
-static cublasStatus_t customMatmulRun(cublasLtHandle_t            ltHandle,  // to get the capabilities (required a GPU)
-                                      cublasLtMatmulDesc_t        operationDesc,
-                                      const void*                 alpha, /* host or device pointer */
-                                      const void*                 A,
-                                      cublasLtMatrixLayout_t      Adesc,
-                                      const void*                 B,
-                                      cublasLtMatrixLayout_t      Bdesc,
-                                      const void*                 beta, /* host or device pointer */
-                                      const void*                 C,
-                                      cublasLtMatrixLayout_t      Cdesc,
-                                      void*                       D,
-                                      cublasLtMatrixLayout_t      Ddesc,
-                                      const cublasLtMatmulAlgo_t& algo,
-                                      int                         kernelRepeats,
-                                      void*                       workSpace,
-                                      size_t                      workSpaceSizeInBytes,
-                                      customMatmulPerf_t&         perfResults,
-                                      cudaStream_t                stream)
-{
-    cublasLtMatmulHeuristicResult_t heurResult;
-    /* Looping over the Algo */
-    int            repeats = kernelRepeats;
-    cublasStatus_t algoStatus =
-        cublasLtMatmulAlgoCheck(ltHandle, operationDesc, Adesc, Bdesc, Cdesc, Ddesc, &algo, &heurResult);
-    if (algoStatus == CUBLAS_STATUS_SUCCESS) {
-        if (heurResult.workspaceSize <= workSpaceSizeInBytes) {
-            cublasStatus_t oneRunStatus;
-            cudaDeviceSynchronize();
-            auto start = std::chrono::high_resolution_clock::now();
-            for (int loop = 0; loop < repeats; loop++) {
-                oneRunStatus = cublasLtMatmul(ltHandle,
-                                              operationDesc,
-                                              alpha,
-                                              A,
-                                              Adesc,
-                                              B,
-                                              Bdesc,
-                                              beta,
-                                              C,
-                                              Cdesc,
-                                              D,
-                                              Ddesc,
-                                              &algo,
-                                              workSpace,
-                                              workSpaceSizeInBytes,
-                                              stream);
-            }
-            cudaDeviceSynchronize();
-            auto end = std::chrono::high_resolution_clock::now();
-            auto dur = std::chrono::duration<float, std::milli>(end - start);
-            if (oneRunStatus != CUBLAS_STATUS_SUCCESS) {
-                algoStatus = oneRunStatus;
-            }
-            float time = dur.count();
-            // For the moment only add successful findings
-            if (algoStatus == CUBLAS_STATUS_SUCCESS) {
-                perfResults.algo          = algo;
-                perfResults.time          = time / repeats;
-                perfResults.workspaceSize = heurResult.workspaceSize;
-                perfResults.wavesCount    = heurResult.wavesCount;
-            }
-        }
-        else {
-            // printf("not enough workspace! %ld\n", heurResult.workspaceSize);
-            algoStatus = CUBLAS_STATUS_NOT_SUPPORTED;  // Not enough workspace
-        }
-    }
-    else {
-        // printf("check fail!\n");
-    }
-    return algoStatus;
-}
-
-int igemm_config_INT8IO(int m, int n, int k, FILE* fout, void* buffer)
-{
-    printf("batchCount %d m %d n %d k %d\n", 1, m, n, k);
-    float alpha = 1.0f;
-    float beta  = 0.0f;
-
-    int8_t* d_A = (int8_t*)buffer;         // m * k, stored in column-major
-    int8_t* d_B = d_A + m * k;             // k * n, stored in column-major
-    int8_t* d_C = (int8_t*)(d_B + k * n);  // m * n, stored in column-major
-
-    // cublasLtHandle_t ltHandle;
-    // cublasLtCreate(&ltHandle);
-
-    // LtIgemmCustomFind(ltHandle,
-    //                   m,
-    //                   n,
-    //                   k,
-    //                   &alpha, /* host pointer */
-    //                   d_A,
-    //                   d_B,
-    //                   &beta, /* host pointer */
-    //                   d_C,
-    //                   NULL,
-    //                   0,
-    //                   fout);
-
-    // cublasLtDestroy(ltHandle);
-    return 0;
-}
-
-int generate_swin_igemm_config(
-    int batch_size, int seq_len, int head_num, int size_per_head, void* buffer, bool isAppend)
-{
-
-    // ensure program running on SM >= 7.5
-    struct cudaDeviceProp prop;
-    check_cuda_error(cudaGetDeviceProperties(&prop, 0));
-    if (!(prop.major >= 8 || (prop.major >= 7 && prop.minor >= 5))) {
-        printf("[ERROR] INT8 mode > 0 is only supported on device with sm >= 7.5\n ");
-        exit(-1);
-    }
-    printf("Device %s\n", prop.name);
-
-    // check config
-    FILE* fout;
-    if (!isAppend) {
-        fout = fopen(IGEMM_CONFIG, "w+");
-        fprintf(
-            fout,
-            "batch_size seq_len head_num size_per_head dataType ### batchCount m n k algoId customOption tile splitK_val swizzle reductionScheme workspaceSize stages exec_time\n");
-    }
-    else {
-        fout = fopen(IGEMM_CONFIG, "a+");
-        std::vector<std::string> config;
-        char                     line[1024];
-        while (fgets(line, 1024, fout) != NULL) {
-            config.push_back(std::string(line));
-        }
-        if (config.size() >= MAX_CONFIG_NUM * GEMM_NUM) {
-            int startIdx = config.size() - (MAX_CONFIG_NUM - 1) * GEMM_NUM;
-            fclose(fout);
-            fout = fopen(IGEMM_CONFIG, "w+");
-            for (int i = startIdx; i < (int)config.size(); i++) {
-                fprintf(fout, "%s", config[i].c_str());
-            }
-        }
-    }
-
-    int       m = batch_size * seq_len;
-    int       n = head_num * size_per_head;
-    int       k = n;
-    int       batchCount;
-    const int NUM_OF_BASIC_LAYERS = 4;
-
-    printf("***Swin IGemm Testing Begin***\n");
-
-    for (int basic_layer = 0; basic_layer < NUM_OF_BASIC_LAYERS; basic_layer++) {
-        printf("\n-----------------------------\n");
-        batchCount = 1;
-        m          = batch_size * seq_len;
-        k          = head_num * size_per_head;
-        n          = 3 * head_num * size_per_head;
-        if (n % 32 != 0 || k % 32 != 0) {
-            printf("[WARNING] For INT8 gemm test, n, k should be multiples of 32 (n = %d, k = %d)\n", n, k);
-        }
-        else {
-            igemm_config_INT8IO(m, n, k, fout, buffer);
-        }
-
-        printf("\n-----------------------------\n");
-        m = batch_size * seq_len;
-        n = head_num * size_per_head;
-        k = head_num * size_per_head;
-        if (n % 32 != 0 || k % 32 != 0) {
-            printf("[WARNING] For INT8 gemm test, n, k should be multiples of 32 (n = %d, k = %d)\n", n, k);
-        }
-        else {
-            igemm_config_INT8IO(m, n, k, fout, buffer);
-        }
-
-        printf("\n-----------------------------\n");
-        m = batch_size * seq_len;
-        n = 4 * head_num * size_per_head;
-        k = head_num * size_per_head;
-        if (n % 32 != 0 || k % 32 != 0) {
-            printf("[WARNING] For INT8 gemm test, n, k should be multiples of 32 (n = %d, k = %d)\n", n, k);
-        }
-        else {
-            igemm_config_INT8IO(m, n, k, fout, buffer);
-        }
-
-        printf("\n-----------------------------\n");
-        m = batch_size * seq_len;
-        n = head_num * size_per_head;
-        k = 4 * head_num * size_per_head;
-        if (n % 32 != 0 || k % 32 != 0) {
-            printf("[WARNING] For INT8 gemm test, n, k should be multiples of 32 (n = %d, k = %d)\n", n, k);
-        }
-        else {
-            igemm_config_INT8IO(m, n, k, fout, buffer);
-        }
-
-        if (basic_layer != NUM_OF_BASIC_LAYERS - 1) {
-            printf("\n-----------------------------\n");
-            batch_size = batch_size / 4;
-            head_num   = head_num * 2;
-            m          = batch_size * seq_len;
-            n          = head_num * size_per_head;
-            k          = 2 * head_num * size_per_head;
-            if (n % 32 != 0 || k % 32 != 0) {
-                printf("[WARNING] For INT8 gemm test, n, k should be multiples of 32 (n = %d, k = %d)\n", n, k);
-            }
-            else {
-                igemm_config_INT8IO(m, n, k, fout, buffer);
-            }
-        }
-        printf("\n-----------------------------\n");
-    }
-
-    fclose(fout);
-    printf("\n-----------------------------\n");
-    printf("***Swin IGemm Testing End***\n");
-    return 0;
-}
-
-}  // namespace turbomind
diff --git a/src/turbomind/utils/gemm_test/swin_igemm_func.h b/src/turbomind/utils/gemm_test/swin_igemm_func.h
deleted file mode 100644
index 21603dc57dfdcfd77679ccfc14777c9e9ea630bc..0000000000000000000000000000000000000000
--- a/src/turbomind/utils/gemm_test/swin_igemm_func.h
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Copyright (c) 2020-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "src/turbomind/utils/cublasAlgoMap.h"
-#include "src/turbomind/utils/cuda_utils.h"
-#include "src/turbomind/utils/gemm_test/encoder_igemm_func.h"
-#include <algorithm>
-#include <cublasLt.h>
-#include <cuda_runtime.h>
-#include <map>
-#include <stdio.h>
-#include <stdlib.h>
-#include <time.h>
-#ifdef __linux__
-#include <sys/time.h>
-#include <unistd.h>
-#endif
-#include <vector>
-
-namespace turbomind {
-
-/* CAUTION : must match cublasLtMatmulTile_t */
-// const char* const matmulTileName[] = {
-//     "UNDEF",  "8x8",    "8x16",    "16x8",   "8x32",   "16x16",   "32x8",    "8x64",   "16x32",
-//     "32x16",  "64x8",   "32x32",   "32x64",  "64x32",  "32x128",  "64x64",   "128x32", "64x128",
-//     "128x64", "64x256", "128x128", "256x64", "64x512", "128x256", "256x128", "512x64",
-// };
-
-int generate_swin_igemm_config(
-    int batch_size, int seq_len, int head_num, int size_per_head, void* buffer, bool isAppend = true);
-
-}  // namespace turbomind
diff --git a/src/turbomind/utils/gemm_test/t5_gemm_func.cc b/src/turbomind/utils/gemm_test/t5_gemm_func.cc
deleted file mode 100644
index dd978311dc81b3fd421fb3b9a35eb201e40bc9d1..0000000000000000000000000000000000000000
--- a/src/turbomind/utils/gemm_test/t5_gemm_func.cc
+++ /dev/null
@@ -1,843 +0,0 @@
-/*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "src/turbomind/utils/gemm_test/t5_gemm_func.h"
-#include "src/turbomind/macro.h"
-#include <chrono>
-
-namespace turbomind {
-
-bool isSparseGemmAvailable(size_t m, size_t n, size_t k)
-{
-    return m % 8 == 0 && n % 8 == 0 && k % 8 == 0;
-}
-
-template<typename T>
-void generate_t5_gemm_config(int   batch_size,
-                             int   beam_width,
-                             int   max_mem_seq_len,
-                             int   encoder_d_model,
-                             int   encoder_head_num,
-                             int   encoder_size_per_head,
-                             int   encoder_inter_size,
-                             int   decoder_d_model,
-                             int   decoder_head_num,
-                             int   decoder_size_per_head,
-                             int   decoder_inter_size,
-                             int   decoder_vocab_size,
-                             int   tensor_para_size,
-                             void* buffer_in,
-                             bool  isAppend,
-                             bool  is_fp16_compute_type)
-{
-    FT_CHECK(encoder_head_num % tensor_para_size == 0);
-    FT_CHECK(decoder_head_num % tensor_para_size == 0);
-
-    void* cublas_workspace;
-    void* buffer;
-    int   workSpaceSize;
-#ifdef ENABLE_BF16
-    if (std::is_same<T, half>::value || std::is_same<T, __nv_bfloat16>::value) {
-#else
-    if (std::is_same<T, half>::value) {
-#endif  // ENABLE_BF16
-        // cublas_workspace_ should be the start pointer of cudaMalloc()
-        // to ensure 16B alignemnet
-        cublas_workspace = buffer_in;
-        buffer           = (void*)((char*)cublas_workspace + CUBLAS_WORKSPACE_SIZE);
-        workSpaceSize    = CUBLAS_WORKSPACE_SIZE;
-    }
-    else {
-        cublas_workspace = nullptr;
-        buffer           = buffer_in;
-        workSpaceSize    = 0;
-    }
-
-    struct cudaDeviceProp prop;
-    check_cuda_error(cudaGetDeviceProperties(&prop, 0));
-    printf("Device %s\n", prop.name);
-
-    // check config
-    FILE* fd;
-    int   line_count = 0;
-    if (!isAppend) {
-        fd = fopen(GEMM_CONFIG, "w+");
-    }
-    else {
-        fd = fopen(GEMM_CONFIG, "a+");
-        std::vector<std::string> config;
-        char                     line[1024];
-        while (fgets(line, 1024, fd) != NULL) {
-            config.push_back(std::string(line));
-        }
-        line_count = config.size();
-        if (config.size() >= (MAX_CONFIG_NUM * GEMM_NUM + 1))  // 6 cublas/cublasLt, first row is not included
-        {
-            int startIdx = config.size() - ((MAX_CONFIG_NUM - 1) * GEMM_NUM);
-            fclose(fd);
-            fd = fopen(GEMM_CONFIG, "w+");
-            fprintf(fd, "%s", config[0].c_str());
-            for (uint i = startIdx; i < config.size(); i++) {
-                fprintf(fd, "%s", config[i].c_str());
-            }
-            line_count = config.size() - (GEMM_NUM + 3);
-        }
-    }
-
-    const int gemm_num = 12;
-    int       M[gemm_num];
-    int       N[gemm_num];
-    int       K[gemm_num];
-    int       batchCount[gemm_num];
-    char      mess[gemm_num][256];
-    float     exec_times[gemm_num];
-
-    // gemm 0
-    M[0]          = batch_size * max_mem_seq_len;
-    K[0]          = encoder_d_model;
-    N[0]          = encoder_head_num / tensor_para_size * encoder_size_per_head;
-    batchCount[0] = 3;
-    strcpy(mess[0], "encoder from_tensor * batched gemm weightQKV");
-
-    // gemm 1
-    M[1]          = max_mem_seq_len;
-    K[1]          = encoder_size_per_head;
-    N[1]          = max_mem_seq_len;
-    batchCount[1] = batch_size * encoder_head_num / tensor_para_size;
-    strcpy(mess[1], "encoder batch strided gemm Q*K^T");
-
-    // gemm 2
-    M[2]          = max_mem_seq_len;
-    K[2]          = max_mem_seq_len;
-    N[2]          = encoder_size_per_head;
-    batchCount[2] = batch_size * encoder_head_num / tensor_para_size;
-    strcpy(mess[2], "encoder batch strided gemm QK*V^T");
-
-    // gemm 3
-    M[3]          = batch_size * max_mem_seq_len;
-    K[3]          = encoder_head_num / tensor_para_size * encoder_size_per_head;
-    N[3]          = encoder_d_model;
-    batchCount[3] = 1;
-    strcpy(mess[3], "encoder attr * output_kernel");
-
-    // gemm 4
-    M[4]          = batch_size * max_mem_seq_len;
-    K[4]          = encoder_d_model;
-    N[4]          = encoder_inter_size / tensor_para_size;
-    batchCount[4] = 1;
-    strcpy(mess[4], "encoder ffn gemm 1");
-
-    // gemm 5
-    M[5]          = batch_size * max_mem_seq_len;
-    K[5]          = encoder_inter_size / tensor_para_size;
-    N[5]          = encoder_d_model;
-    batchCount[5] = 1;
-    strcpy(mess[5], "encoder ffn gemm 2");
-
-    // gemm 6
-    M[6]          = batch_size * beam_width;
-    K[6]          = decoder_d_model;
-    N[6]          = 3 * decoder_head_num / tensor_para_size * decoder_size_per_head;
-    batchCount[6] = 1;
-    strcpy(mess[6], "from_tensor * weightQKV");
-
-    // gemm 7
-    M[7]          = batch_size * beam_width;
-    K[7]          = decoder_head_num / tensor_para_size * decoder_size_per_head;
-    N[7]          = decoder_d_model;
-    batchCount[7] = 1;
-    strcpy(mess[7], "attr * output_kernel");
-
-    // gemm 8
-    M[8]          = batch_size * beam_width;
-    K[8]          = decoder_d_model;
-    N[8]          = decoder_inter_size / tensor_para_size;
-    batchCount[8] = 1;
-    strcpy(mess[8], "ffn gemm 1");
-
-    // gemm 9
-    M[9]          = batch_size * beam_width;
-    K[9]          = decoder_inter_size / tensor_para_size;
-    N[9]          = decoder_d_model;
-    batchCount[9] = 1;
-    strcpy(mess[9], "ffn gemm 2");
-
-    // gemm 10
-    size_t decoder_vocab_size_padded = ((size_t)ceil(decoder_vocab_size / 1. / tensor_para_size) * tensor_para_size);
-    if (!std::is_same<T, float>::value) {
-        decoder_vocab_size_padded = ((size_t)ceil(decoder_vocab_size_padded / 8.) * 8);
-    }
-    M[10]          = batch_size * beam_width;
-    K[10]          = decoder_d_model;
-    N[10]          = decoder_vocab_size_padded / tensor_para_size;
-    batchCount[10] = 1;
-    strcpy(mess[10], "logits gemm");
-
-    // gemm 11
-    M[11]          = batch_size * max_mem_seq_len;
-    K[11]          = encoder_d_model;
-    N[11]          = encoder_head_num / tensor_para_size * encoder_size_per_head;
-    batchCount[11] = 1;
-    strcpy(mess[11], "encoder from_tensor * splited qkv weight");
-
-    cublasHandle_t cublas_handle;
-    check_cuda_error(cublasCreate(&cublas_handle));
-    // cublasLtHandle_t ltHandle;
-    // check_cuda_error(cublasLtCreate(&ltHandle));
-
-    cudaDataType_t AType;
-    cudaDataType_t BType;
-    cudaDataType_t CType;
-    cudaDataType_t computeType;
-    int            startAlgo, endAlgo;
-    const int      ites = 100;
-
-    CublasDataType data_type;
-    if (std::is_same<T, float>::value) {
-        data_type   = FLOAT_DATATYPE;
-        AType       = CUDA_R_32F;
-        BType       = CUDA_R_32F;
-        CType       = CUDA_R_32F;
-        computeType = CUDA_R_32F;
-        startAlgo   = (int)CUBLAS_GEMM_DEFAULT;
-        // endAlgo     = (int)CUBLAS_GEMM_ALGO23;
-        endAlgo     = (int)CUBLAS_GEMM_DEFAULT;
-    }
-    else if (std::is_same<T, half>::value) {
-        data_type   = HALF_DATATYPE;
-        AType       = CUDA_R_16F;
-        BType       = CUDA_R_16F;
-        CType       = CUDA_R_16F;
-        computeType = CUDA_R_16F;
-        // startAlgo   = (int)CUBLAS_GEMM_DEFAULT_TENSOR_OP;
-        // endAlgo     = (int)CUBLAS_GEMM_ALGO15_TENSOR_OP;
-        startAlgo   = (int)CUBLAS_GEMM_DEFAULT;
-        endAlgo     = (int)CUBLAS_GEMM_DEFAULT;
-    }
-#ifdef ENABLE_BF16
-    else if (std::is_same<T, __nv_bfloat16>::value) {
-        data_type   = BFLOAT16_DATATYPE;
-        AType       = CUDA_R_16BF;
-        BType       = CUDA_R_16BF;
-        CType       = CUDA_R_16BF;
-        computeType = CUDA_R_32F;
-        // startAlgo   = (int)CUBLAS_GEMM_DEFAULT_TENSOR_OP;
-        // endAlgo     = (int)CUBLAS_GEMM_ALGO15_TENSOR_OP;
-        startAlgo   = (int)CUBLAS_GEMM_DEFAULT;
-        endAlgo     = (int)CUBLAS_GEMM_DEFAULT;
-    }
-#endif
-    float f_alpha = (float)1.0f;
-    float f_beta  = (float)0.0f;
-
-    half h_alpha = (half)(1.0f);
-    half h_beta  = (half)(0.0f);
-
-    void* alpha = computeType == CUDA_R_16F ? (void*)(&h_alpha) : (void*)(&f_alpha);
-    void* beta  = computeType == CUDA_R_16F ? (void*)(&h_beta) : (void*)(&f_beta);
-
-    printf("***Encoder Gemm Testing Begin***\n");
-    printf("***Cublas Gemm Testing Begin***\n");
-    if (line_count == 0) {
-        fprintf(fd,
-                "batch_size, seq_len, head_num, size_per_head dataType ### batchCount, n, m, k, algoId, "
-                "customOption, tile, numSplitsK, swizzle, reductionScheme, workspaceSize, stages, exec_time\n");
-    }
-    for (int i = 0; i < gemm_num; ++i) {
-        int seq_len       = (i <= 5 || i == 11) ? max_mem_seq_len : 1;
-        int head_num      = ((i <= 5 || i == 11) ? encoder_head_num : decoder_head_num) / tensor_para_size;
-        int size_per_head = (i <= 5 || i == 11) ? encoder_size_per_head : decoder_size_per_head;
-
-        int m = M[i], n = N[i], k = K[i];
-        printf("\n-----------------------------\n");
-        printf("GEMM test %d: [M: %d, K: %d, N: %d] %s\n", i, m, k, n, mess[i]);
-        T* d_A = (T*)buffer;
-        T* d_B = d_A + m * k * batchCount[i];
-        T* d_C = d_B + k * n * batchCount[i];
-
-        // array of pointer for batchedGemm
-        T* harray[12];
-        harray[0]  = (T*)buffer;
-        harray[1]  = (T*)((char*)buffer + sizeof(T) * m * k);
-        harray[2]  = (T*)((char*)buffer + 2 * sizeof(T) * m * k);
-        harray[4]  = (T*)((char*)buffer + 3 * sizeof(T) * m * k);
-        harray[5]  = (T*)((char*)buffer + 3 * sizeof(T) * m * k + sizeof(T) * k * n);
-        harray[6]  = (T*)((char*)buffer + 3 * sizeof(T) * m * k + 2 * sizeof(T) * k * n);
-        harray[8]  = (T*)((char*)buffer + 3 * sizeof(T) * m * k + 3 * sizeof(T) * k * n);
-        harray[9]  = (T*)((char*)buffer + 3 * sizeof(T) * m * k + 3 * sizeof(T) * k * n + sizeof(T) * m * n);
-        harray[10] = (T*)((char*)buffer + 3 * sizeof(T) * m * k + 3 * sizeof(T) * k * n + 2 * sizeof(T) * m * n);
-
-        T** darray = 0;
-        check_cuda_error(cudaMalloc((void**)&darray, sizeof(T*) * 12));
-        cudaMemcpy((void*)darray, (void*)harray, sizeof(T*) * 12, cudaMemcpyHostToDevice);
-        T** dAarray = darray;
-        T** dBarray = darray + 4;
-        T** dCarray = darray + 8;
-
-        float exec_time = 99999.0f;
-        int   fast_algo = 0;
-        for (int algo = startAlgo; algo <= endAlgo; algo++) {
-            cublasStatus_t status;
-            cudaDeviceSynchronize();
-            auto start = std::chrono::high_resolution_clock::now();
-            for (int ite = 0; ite < ites; ++ite) {
-                if (i == 0) {
-                    status = cublasGemmBatchedEx(cublas_handle,
-                                                 CUBLAS_OP_N,
-                                                 CUBLAS_OP_N,
-                                                 n,
-                                                 m,
-                                                 k,
-                                                 alpha,
-                                                 (const void* const*)dBarray,
-                                                 BType,
-                                                 n,
-                                                 (const void* const*)dAarray,
-                                                 AType,
-                                                 k,
-                                                 beta,
-                                                 (void* const*)dCarray,
-                                                 CType,
-                                                 n,
-                                                 batchCount[i],
-                                                 computeType,
-                                                 static_cast<cublasGemmAlgo_t>(algo));
-                }
-                else if (i == 1) {
-                    status = cublasGemmStridedBatchedEx(cublas_handle,
-                                                        CUBLAS_OP_T,
-                                                        CUBLAS_OP_N,
-                                                        max_mem_seq_len,
-                                                        max_mem_seq_len,
-                                                        encoder_size_per_head,
-                                                        alpha,
-                                                        d_B,
-                                                        BType,
-                                                        encoder_size_per_head,
-                                                        max_mem_seq_len * encoder_size_per_head,
-                                                        d_A,
-                                                        AType,
-                                                        encoder_size_per_head,
-                                                        max_mem_seq_len * encoder_size_per_head,
-                                                        beta,
-                                                        d_C,
-                                                        CType,  // CType,
-                                                        max_mem_seq_len,
-                                                        max_mem_seq_len * max_mem_seq_len,
-                                                        batchCount[i],
-                                                        computeType,
-                                                        static_cast<cublasGemmAlgo_t>(algo));
-                }
-                else if (i == 2) {
-                    status = cublasGemmStridedBatchedEx(cublas_handle,
-                                                        CUBLAS_OP_N,
-                                                        CUBLAS_OP_N,
-                                                        encoder_size_per_head,
-                                                        max_mem_seq_len,
-                                                        max_mem_seq_len,
-                                                        alpha,
-                                                        d_B,
-                                                        BType,
-                                                        encoder_size_per_head,
-                                                        max_mem_seq_len * encoder_size_per_head,
-                                                        d_A,
-                                                        AType,
-                                                        max_mem_seq_len,
-                                                        max_mem_seq_len * max_mem_seq_len,
-                                                        beta,
-                                                        d_C,
-                                                        CType,
-                                                        encoder_size_per_head,
-                                                        max_mem_seq_len * encoder_size_per_head,
-                                                        batchCount[i],
-                                                        computeType,
-                                                        static_cast<cublasGemmAlgo_t>(algo));
-                }
-                else if (i == 10) {
-                    status = cublasGemmEx(cublas_handle,
-                                          CUBLAS_OP_T,
-                                          CUBLAS_OP_N,
-                                          n,
-                                          m,
-                                          k,
-                                          alpha,
-                                          d_B,
-                                          BType,
-                                          k,
-                                          d_A,
-                                          AType,
-                                          k,
-                                          beta,
-                                          d_C,
-                                          CType,
-                                          n,
-                                          computeType,
-                                          static_cast<cublasGemmAlgo_t>(algo));
-                }
-                else {
-                    status = cublasGemmEx(cublas_handle,
-                                          CUBLAS_OP_N,
-                                          CUBLAS_OP_N,
-                                          n,
-                                          m,
-                                          k,
-                                          alpha,
-                                          d_B,
-                                          BType,
-                                          n,
-                                          d_A,
-                                          AType,
-                                          k,
-                                          beta,
-                                          d_C,
-                                          CType,
-                                          n,
-                                          computeType,
-                                          static_cast<cublasGemmAlgo_t>(algo));
-                }
-
-                if (status != CUBLAS_STATUS_SUCCESS) {
-                    break;
-                }
-            }
-            cudaDeviceSynchronize();
-            auto end = std::chrono::high_resolution_clock::now();
-            auto dur = std::chrono::duration<float, std::milli>(end - start);
-            if (status == CUBLAS_STATUS_SUCCESS) {
-                printf("algo_%d costs %.3fms \n", algo, dur.count() / ites);
-                if (dur.count() / ites < exec_time) {
-                    exec_time = dur.count() / ites;
-                    fast_algo = algo;
-                }
-            }
-            sync_check_cuda_error();
-        }
-
-        printf("fast_algo %d costs %.3f ms\n", fast_algo, exec_time);
-
-        using scaleT = float;
-
-        if (is_fp16_compute_type) {
-            using scaleT = typename ScaleTypeConverter<T, true>::Type;
-        }
-
-        // for fp16 and bf16, we compare cublasLt
-        if (data_type != FLOAT_DATATYPE && i != 1 && i != 2 && i != 0 && i != 10) {
-            printf("***cublasLt Gemm Testing Begin***\n");
-            // Let try a fixed number of combinations
-            const int          ALGO_COMBINATIONS = 5000;
-            customMatmulPerf_t perfResults[ALGO_COMBINATIONS];
-
-            // for t5, computeType & scaleType should be FP32
-            if (is_fp16_compute_type) {
-                using scaleT       = typename ScaleTypeConverter<T, true>::Type;
-                scaleT alpha_scale = (scaleT)1.0f;
-                scaleT beta_scale  = (scaleT)0.0f;
-
-                // LtHgemmCustomFind<T, scaleT>(ltHandle,
-                //                              m,
-                //                              seq_len,
-                //                              head_num,
-                //                              size_per_head,
-                //                              n,
-                //                              m,
-                //                              k,
-                //                              &(alpha_scale),
-                //                              d_B,
-                //                              d_A,
-                //                              &(beta_scale),
-                //                              d_C,
-                //                              cublas_workspace,
-                //                              workSpaceSize,
-                //                              fd,
-                //                              perfResults,
-                //                              ALGO_COMBINATIONS);
-            }
-            else {
-                // LtHgemmCustomFind<T, float>(ltHandle,
-                //                             m,
-                //                             seq_len,
-                //                             head_num,
-                //                             size_per_head,
-                //                             n,
-                //                             m,
-                //                             k,
-                //                             &(f_alpha),
-                //                             d_B,
-                //                             d_A,
-                //                             &(f_beta),
-                //                             d_C,
-                //                             cublas_workspace,
-                //                             workSpaceSize,
-                //                             fd,
-                //                             perfResults,
-                //                             ALGO_COMBINATIONS);
-            }
-
-            // if (perfResults[0].time < exec_time) {
-            //     printPerfStructure(batch_size * (i <= 5 || i == 1 ? 1 : beam_width),
-            //                        seq_len,
-            //                        head_num,
-            //                        size_per_head,
-            //                        n,
-            //                        m,
-            //                        k,
-            //                        perfResults[0],
-            //                        fd,
-            //                        data_type,
-            //                        0);
-            // }
-            // else {
-            {
-                fprintf(fd,
-                        "%d %d %d %d %d ### %d %d %d %d %d -1 -1 -1 -1 -1 -1 -1 "
-#if (CUBLAS_VER_MAJOR == 11 && CUBLAS_VER_MINOR == 11 && CUBLAS_VER_PATCH >= 3)
-                        "-1 -1 "
-#elif (CUBLAS_VER_MAJOR == 11 && CUBLAS_VER_MINOR == 11 && CUBLAS_VER_PATCH < 3)
-                        "-1 -1 -1 "
-#endif
-                        "%f\n",
-                        batch_size * (i <= 5 || i == 1 ? 1 : beam_width),
-                        seq_len,
-                        head_num,
-                        size_per_head,
-                        data_type,
-                        batchCount[i],
-                        n,
-                        m,
-                        k,
-                        fast_algo,
-                        exec_time);
-            }
-            printf("***cublasLt Gemm Testing End***\n");
-        }
-        else {
-            fprintf(fd,
-                    "%d %d %d %d %d ### %d %d %d %d %d -1 -1 -1 -1 -1 -1 -1 "
-#if (CUBLAS_VER_MAJOR == 11 && CUBLAS_VER_MINOR == 11 && CUBLAS_VER_PATCH >= 3)
-                    "-1 -1 "
-#elif (CUBLAS_VER_MAJOR == 11 && CUBLAS_VER_MINOR == 11 && CUBLAS_VER_PATCH < 3)
-                    "-1 -1 -1 "
-#endif
-                    "%f\n",
-                    batch_size * (i <= 5 || i == 1 ? 1 : beam_width),
-                    seq_len,
-                    head_num,
-                    size_per_head,
-                    data_type,
-                    batchCount[i],
-                    n,
-                    m,
-                    k,
-                    fast_algo,
-                    exec_time);
-        }
-        sync_check_cuda_error();
-        exec_times[i] = exec_time;
-    }
-    printf("***cublas Gemm Testing End***\n\n");
-    fclose(fd);
-
-#ifdef SPARSITY_ENABLED
-    bool do_sparse_test = false;
-    if (prop.major == 8 && (prop.minor == 0 || prop.minor == 6) && sizeof(T) == sizeof(half)) {
-        do_sparse_test = true;
-    }
-    if (do_sparse_test) {
-        printf("***cusparseLt Gemm Testing Begin***\n");
-        // Only first 8 cases can be sparse
-        // - QKV kernel, Projection, FC1, FC2 in context or decoding.
-        const int spgemm_num = 8;
-        if (!isAppend) {
-            fd = fopen(SPGEMM_CONFIG, "w+");
-        }
-        else {
-            fd = fopen(SPGEMM_CONFIG, "a+");
-            std::vector<std::string> config;
-            char                     line[1024];
-            while (fgets(line, 1024, fd) != NULL) {
-                config.push_back(std::string(line));
-            }
-            line_count = config.size();
-            // gemm_num configs (cublas/cublasLt), first row is not included
-            if (config.size() >= (MAX_CONFIG_NUM * spgemm_num + 1)) {
-                int startIdx = config.size() - ((MAX_CONFIG_NUM - 1) * spgemm_num);
-                fclose(fd);
-                fd = fopen(SPGEMM_CONFIG, "w+");
-                fprintf(fd, "%s", config[0].c_str());
-                for (uint i = startIdx; i < config.size(); i++) {
-                    fprintf(fd, "%s", config[i].c_str());
-                }
-                line_count = config.size() - (spgemm_num + 3);
-            }
-        }
-        if (line_count == 0) {
-            // header line
-            fprintf(fd,
-                    "batch_size, seq_len, head_num, size_per_head dataType "
-                    "### batchCount, m, n, k, algoId, exec_time\n");
-        }
-
-        cusparseLtHandle_t handle;
-        CHECK_CUSPARSE(cusparseLtInit(&handle));
-        cusparseOrder_t     order = CUSPARSE_ORDER_COL;
-        cusparseOperation_t opA   = CUSPARSE_OPERATION_NON_TRANSPOSE;
-        cusparseOperation_t opB   = CUSPARSE_OPERATION_NON_TRANSPOSE;
-        // let's make this optional
-        cusparseComputeType compute_type = CUSPARSE_COMPUTE_16F;
-        unsigned            alignment    = 16;
-        cudaStream_t        stream       = 0;
-        float               alpha2       = 1.0f;
-        float               beta2        = 0.0f;
-        for (int i = 0; i < gemm_num; ++i) {
-            // skip qk or attn or logit gemms.
-            if (i == 1 || i == 2 || i == 10) {
-                continue;
-            }
-
-            // seq_len is always 1 except context gemms.
-            int seq_len       = i <= 5 ? max_mem_seq_len : 1;
-            int head_num      = (i <= 5 ? encoder_head_num : decoder_head_num) / tensor_para_size;
-            int size_per_head = i <= 5 ? encoder_size_per_head : decoder_size_per_head;
-
-            // to be compatible with spgemm wrapper, we let A be the weight matrix
-            // so m and n are swapped
-            // A: mxk B: kxn C:mxn
-            int m = N[i], n = M[i], k = K[i];
-            printf("\n-----------------------------\n");
-            printf("GEMM test %d: [M: %d, K: %d, N: %d]\n", i, m, k, n);
-            T* d_A = (T*)buffer;
-            T* d_B = d_A + m * k * batchCount[i];
-            T* d_C = d_B + k * n * batchCount[i];
-            T* dA_compressed;
-            {
-                cusparseLtMatDescriptor_t mat_A;
-                CHECK_CUSPARSE(cusparseLtStructuredDescriptorInit(
-                    &handle, &mat_A, m, k, m, alignment, CUDA_R_16F, order, CUSPARSELT_SPARSITY_50_PERCENT))
-                CHECK_CUSPARSE(
-                    cusparseLtSpMMAPrune2(&handle, &mat_A, true, opA, d_A, d_A, CUSPARSELT_PRUNE_SPMMA_STRIP, stream))
-                size_t compressed_size;
-                CHECK_CUSPARSE(cusparseLtSpMMACompressedSize2(&handle, &mat_A, &compressed_size))
-                check_cuda_error(cudaMalloc((void**)&dA_compressed, compressed_size));
-                CHECK_CUSPARSE(cusparseLtSpMMACompress2(&handle, &mat_A, true, opA, d_A, dA_compressed, stream))
-            }
-
-            float exec_time = 99999.0f;
-            int   fast_algo = 0;
-            if (isSparseGemmAvailable(m, n, k)) {
-                for (int alg = 0; alg < 4; ++alg) {
-                    cudaDeviceSynchronize();
-                    cusparseLtMatDescriptor_t mat_A, mat_B, mat_C;
-                    void*                     d_workspace = nullptr;
-                    int                       num_streams = 1;
-                    cudaStream_t              streams[1]  = {stream};
-                    CHECK_CUSPARSE(cusparseLtStructuredDescriptorInit(
-                        &handle, &mat_A, m, k, m, alignment, CUDA_R_16F, order, CUSPARSELT_SPARSITY_50_PERCENT))
-                    CHECK_CUSPARSE(
-                        cusparseLtDenseDescriptorInit(&handle, &mat_B, k, n, k, alignment, CUDA_R_16F, order))
-                    CHECK_CUSPARSE(
-                        cusparseLtDenseDescriptorInit(&handle, &mat_C, m, n, m, alignment, CUDA_R_16F, order))
-                    cudaDeviceSynchronize();
-                    auto start = std::chrono::high_resolution_clock::now();
-                    for (int ite = 0; ite < ites; ++ite) {
-                        // initializing MatDesc takes a lot of time
-                        // and these descs can be stored to other place
-                        // whereas storing MatMulPlan to other place will cause errors
-                        cusparseLtMatmulDescriptor_t   matmul;
-                        cusparseLtMatmulAlgSelection_t alg_sel;
-                        cusparseLtMatmulPlan_t         plan;
-                        CHECK_CUSPARSE(cusparseLtMatmulDescriptorInit(
-                            &handle, &matmul, opA, opB, &mat_A, &mat_B, &mat_C, &mat_C, compute_type))
-                        CHECK_CUSPARSE(
-                            cusparseLtMatmulAlgSelectionInit(&handle, &alg_sel, &matmul, CUSPARSELT_MATMUL_ALG_DEFAULT))
-                        CHECK_CUSPARSE(cusparseLtMatmulAlgSetAttribute(
-                            &handle, &alg_sel, CUSPARSELT_MATMUL_ALG_CONFIG_ID, &alg, sizeof(alg)))
-                        size_t workspace_size;
-                        CHECK_CUSPARSE(cusparseLtMatmulGetWorkspace(&handle, &alg_sel, &workspace_size))
-                        CHECK_CUSPARSE(cusparseLtMatmulPlanInit(&handle, &plan, &matmul, &alg_sel, workspace_size))
-                        CHECK_CUSPARSE(cusparseLtMatmul(&handle,
-                                                        &plan,
-                                                        &alpha2,
-                                                        dA_compressed,
-                                                        d_B,
-                                                        &beta2,
-                                                        d_C,
-                                                        d_C,
-                                                        d_workspace,
-                                                        streams,
-                                                        num_streams))
-                        CHECK_CUSPARSE(cusparseLtMatmulPlanDestroy(&plan))
-                    }
-                    cudaDeviceSynchronize();
-                    auto end = std::chrono::high_resolution_clock::now();
-                    auto dur = std::chrono::duration<float, std::milli>(end - start);
-                    printf("algo_%d costs %.3fms \n", alg, dur.count() / ites);
-                    if (dur.count() < exec_time) {
-                        exec_time = dur.count();
-                        fast_algo = alg;
-                    }
-                }
-            }
-            exec_time /= ites;
-            if (exec_time >= exec_times[i]) {
-                fast_algo = -1;
-            }
-            printf("fast_algo %d\n", fast_algo);
-            fprintf(fd,
-                    "%d %d %d %d %d ### %d %d %d %d %d %f\n",
-                    batch_size * beam_width,
-                    seq_len,
-                    head_num,
-                    size_per_head,
-                    data_type,
-                    batchCount[i],
-                    m,
-                    n,
-                    k,
-                    fast_algo,
-                    exec_time);
-            cudaFree(dA_compressed);
-        }
-        CHECK_CUSPARSE(cusparseLtDestroy(&handle))
-        fclose(fd);
-        printf("***cusparseLt Gemm Testing End***\n");
-    }
-#endif
-
-    printf("***T5 Gemm Testing End***\n");
-    return;
-}
-
-template void generate_t5_gemm_config<float>(int   batch_size,
-                                             int   beam_width,
-                                             int   max_mem_seq_len,
-                                             int   encoder_d_model,
-                                             int   encoder_head_num,
-                                             int   encoder_size_per_head,
-                                             int   encoder_inter_size,
-                                             int   decoder_d_model,
-                                             int   decoder_head_num,
-                                             int   decoder_size_per_head,
-                                             int   decoder_inter_size,
-                                             int   decoder_vocab_size,
-                                             int   tensor_para_size,
-                                             void* buffer_in,
-                                             bool  isAppend,
-                                             bool  is_fp16_compute_type);
-
-template void generate_t5_gemm_config<half>(int   batch_size,
-                                            int   beam_width,
-                                            int   max_mem_seq_len,
-                                            int   encoder_d_model,
-                                            int   encoder_head_num,
-                                            int   encoder_size_per_head,
-                                            int   encoder_inter_size,
-                                            int   decoder_d_model,
-                                            int   decoder_head_num,
-                                            int   decoder_size_per_head,
-                                            int   decoder_inter_size,
-                                            int   decoder_vocab_size,
-                                            int   tensor_para_size,
-                                            void* buffer_in,
-                                            bool  isAppend,
-                                            bool  is_fp16_compute_type);
-
-#ifdef ENABLE_BF16
-template void generate_t5_gemm_config<__nv_bfloat16>(int   batch_size,
-                                                     int   beam_width,
-                                                     int   max_mem_seq_len,
-                                                     int   encoder_d_model,
-                                                     int   encoder_head_num,
-                                                     int   encoder_size_per_head,
-                                                     int   encoder_inter_size,
-                                                     int   decoder_d_model,
-                                                     int   decoder_head_num,
-                                                     int   decoder_size_per_head,
-                                                     int   decoder_inter_size,
-                                                     int   decoder_vocab_size,
-                                                     int   tensor_para_size,
-                                                     void* buffer_in,
-                                                     bool  isAppend,
-                                                     bool  is_fp16_compute_type);
-#endif
-
-size_t calT5GemmTestBufSizeInByte(int            batch_size,
-                                  int            beam_width,
-                                  int            max_mem_seq_len,
-                                  int            encoder_d_model,
-                                  int            encoder_head_num,
-                                  int            encoder_size_per_head,
-                                  int            encoder_inter_size,
-                                  int            decoder_d_model,
-                                  int            decoder_head_num,
-                                  int            decoder_size_per_head,
-                                  int            decoder_inter_size,
-                                  int            decoder_vocab_size,
-                                  int            tensor_para_size,
-                                  CublasDataType data_type)
-{
-    const size_t local_encoder_head_num     = encoder_head_num / tensor_para_size;
-    const size_t local_encoder_hidden_units = local_encoder_head_num * encoder_size_per_head;
-    const size_t local_encoder_inter_size   = encoder_inter_size / tensor_para_size;
-    const size_t local_decoder_head_num     = decoder_head_num / tensor_para_size;
-    const size_t local_decoder_hidden_units = local_decoder_head_num * decoder_size_per_head;
-    const size_t local_decoder_inter_size   = decoder_inter_size / tensor_para_size;
-
-    size_t              m = batch_size * max_mem_seq_len;
-    std::vector<size_t> buff_size;
-
-    // encoder qkv gemm
-    buff_size.push_back(
-        3 * (m * encoder_d_model + encoder_d_model * local_encoder_hidden_units + m * local_encoder_hidden_units));
-    // encoder batch gemm
-    buff_size.push_back(m * local_encoder_hidden_units + m * local_encoder_hidden_units
-                        + batch_size * beam_width * local_encoder_head_num * max_mem_seq_len * max_mem_seq_len);
-    // encoder ffn gemm
-    buff_size.push_back(m * local_encoder_inter_size + encoder_d_model * local_encoder_inter_size
-                        + m * encoder_d_model);
-
-    m = batch_size * beam_width;
-    // decoder qkv gemm
-    buff_size.push_back(m * decoder_d_model + decoder_d_model * 3 * local_decoder_hidden_units
-                        + 3 * m * local_decoder_hidden_units);
-    // decoder cross mem gemm
-    buff_size.push_back(m * max_mem_seq_len * encoder_d_model + encoder_d_model * local_decoder_hidden_units
-                        + m * max_mem_seq_len * local_decoder_hidden_units);
-    // decoder ffn gemm
-    buff_size.push_back(m * local_decoder_inter_size + decoder_d_model * local_decoder_inter_size
-                        + m * decoder_d_model);
-    // decoder vocab gemm
-    size_t decoder_vocab_size_padded = ((size_t)ceil(decoder_vocab_size / 1. / tensor_para_size) * tensor_para_size);
-    if (data_type != FLOAT_DATATYPE) {
-        decoder_vocab_size_padded = ((size_t)ceil(decoder_vocab_size_padded / 8.) * 8);
-    }
-    buff_size.push_back(m * decoder_d_model + decoder_d_model * decoder_vocab_size_padded / tensor_para_size
-                        + m * decoder_vocab_size_padded / tensor_para_size);
-
-    size_t buf_size_in_byte = 0;
-    // int wordSize = (data_type == FLOAT_DATATYPE ? sizeof(float) : sizeof(half));
-    // Because we always use float for some buffer, set the wordSize to float directly.
-    int wordSize = sizeof(float);
-    for (auto t : buff_size) {
-        buf_size_in_byte = buf_size_in_byte > t ? buf_size_in_byte : t;
-    }
-    buf_size_in_byte *= wordSize;
-    buf_size_in_byte += ((data_type == HALF_DATATYPE || data_type == BFLOAT16_DATATYPE) ? CUBLAS_WORKSPACE_SIZE : 0);
-
-    return buf_size_in_byte;
-}
-
-}  // namespace turbomind
diff --git a/src/turbomind/utils/gemm_test/t5_gemm_func.h b/src/turbomind/utils/gemm_test/t5_gemm_func.h
deleted file mode 100644
index e0883095ae807ada248ebf391cce457c47c2299b..0000000000000000000000000000000000000000
--- a/src/turbomind/utils/gemm_test/t5_gemm_func.h
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
- * Copyright (c) 2020-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "src/turbomind/utils/cublasAlgoMap.h"
-#include "src/turbomind/utils/cuda_bf16_wrapper.h"
-#include "src/turbomind/utils/cuda_utils.h"
-#include "src/turbomind/utils/gemm_test/gemm_func.h"
-
-#include <cstdio>
-#include <cstdlib>
-#include <ctime>
-#include <cuda_fp16.h>
-#include <cuda_profiler_api.h>
-#include <map>
-#ifdef __linux__
-#include <sys/time.h>
-#include <unistd.h>
-#endif
-#include <vector>
-
-namespace turbomind {
-
-template<typename T>
-void generate_t5_gemm_config(int   batch_size,
-                             int   beam_width,
-                             int   max_mem_seq_len,
-                             int   encoder_d_model,
-                             int   encoder_head_num,
-                             int   encoder_size_per_head,
-                             int   encoder_inter_size,
-                             int   decoder_d_model,
-                             int   decoder_head_num,
-                             int   decoder_size_per_head,
-                             int   decoder_inter_size,
-                             int   decoder_vocab_size,
-                             int   tensor_para_size,
-                             void* buffer_in,
-                             bool  isAppend,
-                             bool  is_fp16_compute_type);
-
-size_t calT5GemmTestBufSizeInByte(int            batch_size,
-                                  int            beam_width,
-                                  int            max_mem_seq_len,
-                                  int            encoder_d_model,
-                                  int            encoder_head_num,
-                                  int            encoder_size_per_head,
-                                  int            encoder_inter_size,
-                                  int            decoder_d_model,
-                                  int            decoder_head_num,
-                                  int            decoder_size_per_head,
-                                  int            decoder_inter_size,
-                                  int            decoder_vocab_size,
-                                  int            tensor_para_size,
-                                  CublasDataType data_type);
-
-}  // namespace turbomind
diff --git a/src/turbomind/utils/gemm_test/xlnet_gemm_func.cc b/src/turbomind/utils/gemm_test/xlnet_gemm_func.cc
deleted file mode 100644
index 34c7bb2e7ff550e49663a2740aa561710a57535e..0000000000000000000000000000000000000000
--- a/src/turbomind/utils/gemm_test/xlnet_gemm_func.cc
+++ /dev/null
@@ -1,470 +0,0 @@
-/*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "src/turbomind/utils/gemm_test/xlnet_gemm_func.h"
-#include "src/turbomind/macro.h"
-#include <chrono>
-
-namespace turbomind {
-
-template<typename T>
-void generate_xlnet_gemm_config(int   batch_size,
-                                int   seq_len,
-                                int   head_num,
-                                int   size_per_head,
-                                int   hidden_units_,
-                                int   inter_size_,
-                                void* buffer_in,
-                                bool  isAppend)
-{
-    void* cublas_workspace;
-    void* buffer;
-    int   workSpaceSize;
-
-#ifdef ENABLE_BF16
-    if (std::is_same<T, half>::value || std::is_same<T, __nv_bfloat16>::value) {
-#else
-    if (std::is_same<T, half>::value) {
-#endif  // ENABLE_BF16
-        // cublas_workspace_ should be the start pointer of cudaMalloc()
-        // to ensure 16B alignemnet
-        cublas_workspace = buffer_in;
-        buffer           = (void*)((char*)cublas_workspace + CUBLAS_WORKSPACE_SIZE);
-        workSpaceSize    = CUBLAS_WORKSPACE_SIZE;
-    }
-    else {
-        cublas_workspace = nullptr;
-        buffer           = buffer_in;
-        workSpaceSize    = 0;
-    }
-
-    struct cudaDeviceProp prop;
-    check_cuda_error(cudaGetDeviceProperties(&prop, 0));
-    printf("Device %s\n", prop.name);
-
-    // check config
-    FILE* fd;
-    int   line_count = 0;
-    if (!isAppend) {
-        fd = fopen(GEMM_CONFIG, "w+");
-    }
-    else {
-        fd = fopen(GEMM_CONFIG, "a+");
-        std::vector<std::string> config;
-        char                     line[1024];
-        while (fgets(line, 1024, fd) != NULL) {
-            config.push_back(std::string(line));
-        }
-        line_count = config.size();
-        if (config.size() >= (MAX_CONFIG_NUM * GEMM_NUM + 1))  // 6 cublas/cublasLt, first row is not included
-        {
-            int startIdx = config.size() - ((MAX_CONFIG_NUM - 1) * GEMM_NUM);
-            fclose(fd);
-            fd = fopen(GEMM_CONFIG, "w+");
-            fprintf(fd, "%s", config[0].c_str());
-            for (uint i = startIdx; i < config.size(); i++) {
-                fprintf(fd, "%s", config[i].c_str());
-            }
-            line_count = config.size() - (GEMM_NUM + 3);
-        }
-    }
-
-    const int         gemm_num = 10;
-    int               M[gemm_num];
-    int               N[gemm_num];
-    int               K[gemm_num];
-    int               lda[gemm_num];
-    int               strideA[gemm_num];
-    int               ldb[gemm_num];
-    int               strideB[gemm_num];
-    int               ldc[gemm_num];
-    int               strideC[gemm_num];
-    cublasOperation_t transa[gemm_num]     = {CUBLAS_OP_N,
-                                          CUBLAS_OP_N,
-                                          CUBLAS_OP_T,
-                                          CUBLAS_OP_T,
-                                          CUBLAS_OP_T,
-                                          CUBLAS_OP_T,
-                                          CUBLAS_OP_N,
-                                          CUBLAS_OP_T,
-                                          CUBLAS_OP_N,
-                                          CUBLAS_OP_N};
-    cublasOperation_t transb[gemm_num]     = {CUBLAS_OP_N};
-    int               batchCount[gemm_num] = {1};
-    char              mess[gemm_num][256];
-
-    // gemm1
-    M[0]          = hidden_units_;
-    N[0]          = seq_len * batch_size;
-    K[0]          = hidden_units_;
-    lda[0]        = hidden_units_;
-    strideA[0]    = hidden_units_ * hidden_units_;
-    ldb[0]        = hidden_units_;
-    strideB[0]    = 0;
-    ldc[0]        = hidden_units_;
-    strideC[0]    = seq_len * batch_size * hidden_units_;
-    batchCount[0] = 3;
-    strcpy(mess[0], "from_tensor * weightQ/K/V");
-
-    // gemm2
-    M[1]          = hidden_units_;
-    N[1]          = seq_len * 2;
-    K[1]          = hidden_units_;
-    batchCount[1] = 1;
-    strcpy(mess[1], " k_head_r_");
-
-    // gemm3
-    M[2]          = seq_len;
-    N[2]          = seq_len;
-    K[2]          = size_per_head;
-    lda[2]        = size_per_head;
-    strideA[2]    = seq_len * size_per_head;
-    ldb[2]        = size_per_head;
-    strideB[2]    = seq_len * size_per_head;
-    ldc[2]        = seq_len;
-    strideC[2]    = seq_len * seq_len;
-    batchCount[2] = batch_size * head_num;
-    strcpy(mess[2], "ac");
-
-    // gemm4
-    M[3]       = seq_len * 2;
-    N[3]       = seq_len;
-    K[3]       = size_per_head;
-    lda[3]     = size_per_head;
-    strideA[3] = seq_len * 2 * size_per_head;
-    ldb[3]     = size_per_head;
-    strideB[3] = seq_len * size_per_head;
-    ldc[3]     = seq_len * 2;
-    strideC[3] = seq_len * seq_len * 2;
-
-    batchCount[3] = batch_size * head_num;
-    strcpy(mess[3], "bd");
-
-    // gemm5
-    M[4]          = 2;
-    N[4]          = seq_len;
-    K[4]          = size_per_head;
-    lda[4]        = size_per_head;
-    strideA[4]    = 2 * size_per_head;
-    ldb[4]        = size_per_head;
-    strideB[4]    = seq_len * size_per_head;
-    ldc[4]        = 2;
-    strideC[4]    = seq_len * 2;
-    batchCount[4] = batch_size * head_num;
-    strcpy(mess[4], "ef");
-
-    // gemm6
-    M[5]       = head_num;
-    N[5]       = seq_len;
-    K[5]       = 2;
-    lda[5]     = 2;
-    strideA[5] = 2 * head_num;
-    ldb[5]     = 2;
-    strideB[5] = seq_len * 2;
-    ldc[5]     = head_num;
-    strideC[5] = seq_len * head_num;
-
-    batchCount[5] = batch_size * seq_len;
-    strcpy(mess[5], "seg_mat");
-    // gemm7
-    M[6]       = size_per_head;
-    N[6]       = seq_len;
-    K[6]       = seq_len;
-    lda[6]     = size_per_head;
-    strideA[6] = seq_len * size_per_head;
-    ldb[6]     = seq_len;
-    strideB[6] = seq_len * seq_len;
-    ldc[6]     = size_per_head;
-    strideC[6] = seq_len * size_per_head;
-
-    batchCount[6] = batch_size * head_num;
-    strcpy(mess[6], "attn_vec");
-
-    // gemm8
-    M[7]          = hidden_units_;
-    N[7]          = seq_len * batch_size;
-    K[7]          = hidden_units_;
-    lda[7]        = hidden_units_;
-    batchCount[7] = 1;
-    strcpy(mess[7], "attn_out");
-
-    // gemm9
-    M[8]          = inter_size_;
-    N[8]          = seq_len * batch_size;
-    K[8]          = hidden_units_;
-    batchCount[8] = 1;
-    strcpy(mess[8], "output_fc1_");
-
-    // gemm10
-    M[9]          = hidden_units_;
-    N[9]          = seq_len * batch_size;
-    K[9]          = inter_size_;
-    batchCount[9] = 1;
-
-    strcpy(mess[9], "output_fc2_");
-
-    cublasHandle_t cublas_handle;
-    check_cuda_error(cublasCreate(&cublas_handle));
-    // cublasLtHandle_t ltHandle;
-    // check_cuda_error(cublasLtCreate(&ltHandle));
-
-    cudaDataType_t AType;
-    cudaDataType_t BType;
-    cudaDataType_t CType;
-    cudaDataType_t computeType;
-    int            startAlgo, endAlgo;
-    const int      ites = 100;
-
-    CublasDataType data_type;
-    if (std::is_same<T, float>::value) {
-        data_type   = FLOAT_DATATYPE;
-        AType       = CUDA_R_32F;
-        BType       = CUDA_R_32F;
-        CType       = CUDA_R_32F;
-        computeType = CUDA_R_32F;
-        startAlgo   = (int)CUBLAS_GEMM_DEFAULT;
-        // endAlgo     = (int)CUBLAS_GEMM_ALGO23;
-        endAlgo     = (int)CUBLAS_GEMM_DEFAULT;
-    }
-    else if (std::is_same<T, half>::value) {
-        data_type   = HALF_DATATYPE;
-        AType       = CUDA_R_16F;
-        BType       = CUDA_R_16F;
-        CType       = CUDA_R_16F;
-        computeType = CUDA_R_16F;
-        // startAlgo   = (int)CUBLAS_GEMM_DEFAULT_TENSOR_OP;
-        // endAlgo     = (int)CUBLAS_GEMM_ALGO15_TENSOR_OP;
-        startAlgo   = (int)CUBLAS_GEMM_DEFAULT;
-        endAlgo     = (int)CUBLAS_GEMM_DEFAULT;
-    }
-#ifdef ENABLE_BF16
-    else if (std::is_same<T, __nv_bfloat16>::value) {
-        data_type   = BFLOAT16_DATATYPE;
-        AType       = CUDA_R_16BF;
-        BType       = CUDA_R_16BF;
-        CType       = CUDA_R_16BF;
-        computeType = CUDA_R_32F;
-        // startAlgo   = (int)CUBLAS_GEMM_DEFAULT_TENSOR_OP;
-        // endAlgo     = (int)CUBLAS_GEMM_ALGO15_TENSOR_OP;
-        startAlgo   = (int)CUBLAS_GEMM_DEFAULT;
-        endAlgo     = (int)CUBLAS_GEMM_DEFAULT;
-    }
-#endif
-
-    // using scaleT = typename ScaleTypeConverter<T, false>::Type;
-    using scaleT = typename ScaleTypeConverter<T, true>::Type;
-
-    scaleT alpha = (scaleT)1.0f;
-    scaleT beta  = (scaleT)0.0f;
-
-    printf("***Xlnet Gemm Testing Begin***\n");
-    printf("***Cublas Gemm Testing Begin***\n");
-    if (line_count == 0) {
-        fprintf(fd,
-                "batch_size, seq_len, head_num, size_per_head dataType ### "
-                "batchCount, n, m, k, algoId, "
-                "customOption, tile, numSplitsK, swizzle, reductionScheme, "
-                "workspaceSize, stages, exec_time\n");
-    }
-    for (int i = 0; i < gemm_num; ++i) {
-        int m = M[i], n = N[i], k = K[i];
-        printf("\n-----------------------------\n");
-        printf("GEMM test %d: [M: %d, K: %d, N: %d] %s\n", i, m, k, n, mess[i]);
-        T* d_A = (T*)buffer;
-        T* d_B = d_A + m * k * batchCount[i];
-        T* d_C = d_B + k * n * batchCount[i];
-
-        float exec_time = 99999.0f;
-        int   fast_algo = 0;
-        for (int algo = startAlgo; algo <= endAlgo; algo++) {
-            cublasStatus_t status;
-            cudaDeviceSynchronize();
-            auto start = std::chrono::high_resolution_clock::now();
-            for (int ite = 0; ite < ites; ++ite) {
-                if (i == 1 || i == 7 || i == 8 || i == 9) {
-                    status = cublasGemmEx(cublas_handle,
-                                          transa[i],
-                                          transb[i],
-                                          n,
-                                          m,
-                                          k,
-                                          &alpha,
-                                          d_A,
-                                          AType,
-                                          n,
-                                          d_B,
-                                          AType,
-                                          k,
-                                          &beta,
-                                          d_C,
-                                          CType,
-                                          n,
-                                          computeType,
-                                          static_cast<cublasGemmAlgo_t>(algo));
-                }
-                else {
-                    status = cublasGemmStridedBatchedEx(cublas_handle,
-                                                        transa[i],
-                                                        transb[i],
-                                                        m,
-                                                        n,
-                                                        k,
-                                                        &alpha,
-                                                        d_A,
-                                                        BType,
-                                                        lda[i],
-                                                        strideA[i],
-                                                        d_B,
-                                                        AType,
-                                                        ldb[i],
-                                                        strideB[i],
-                                                        &beta,
-                                                        d_C,
-                                                        CType,
-                                                        ldc[i],
-                                                        strideC[i],
-                                                        batchCount[i],
-                                                        computeType,
-                                                        static_cast<cublasGemmAlgo_t>(algo));
-                }
-                if (status != CUBLAS_STATUS_SUCCESS) {
-                    break;
-                }
-            }
-            cudaDeviceSynchronize();
-            auto end = std::chrono::high_resolution_clock::now();
-            auto dur = std::chrono::duration<float, std::milli>(end - start);
-            if (status == CUBLAS_STATUS_SUCCESS) {
-                printf("algo_%d costs %.3fms \n", algo, dur.count() / ites);
-                if (dur.count() / ites < exec_time) {
-                    exec_time = dur.count() / ites;
-                    fast_algo = algo;
-                }  // end if diffTime
-            }      // end status
-        }          // end for algo
-
-        printf("fast_algo %d costs %.3f ms\n", fast_algo, exec_time);
-
-        if ((i == 1 || i == 7 || i == 8 || i == 9) && data_type != FLOAT_DATATYPE) {
-            printf("***cublasLt Gemm Testing Begin***\n");
-            // Let try a fixed number of combinations
-            const int          ALGO_COMBINATIONS = 5000;
-            customMatmulPerf_t perfResults[ALGO_COMBINATIONS];
-
-            // LtHgemmCustomFind<T, scaleT>(ltHandle,
-            //                              batch_size,
-            //                              seq_len,
-            //                              head_num,
-            //                              size_per_head,
-            //                              n,
-            //                              m,
-            //                              k,
-            //                              &alpha,
-            //                              d_B,
-            //                              d_A,
-            //                              &beta,
-            //                              d_C,
-            //                              cublas_workspace,
-            //                              workSpaceSize,
-            //                              fd,
-            //                              perfResults,
-            //                              ALGO_COMBINATIONS);
-            // if (perfResults[0].time < exec_time) {
-            //     printPerfStructure(
-            //         batch_size, seq_len, head_num, size_per_head, n, m, k, perfResults[0], fd, data_type, 0);
-            //     exec_time = perfResults[0].time;
-            // }
-            // else {
-            {
-                fprintf(fd,
-                        "%d %d %d %d %d ### %d %d %d %d %d -1 -1 -1 -1 -1 -1 -1 "
-#if (CUBLAS_VER_MAJOR == 11 && CUBLAS_VER_MINOR == 11 && CUBLAS_VER_PATCH >= 3)
-                        "-1 -1 "
-#elif (CUBLAS_VER_MAJOR == 11 && CUBLAS_VER_MINOR == 11 && CUBLAS_VER_PATCH < 3)
-                        "-1 -1 -1 "
-#endif
-                        "%f\n",
-                        batch_size,
-                        seq_len,
-                        head_num,
-                        size_per_head,
-                        data_type,
-                        batchCount[i],
-                        n,
-                        m,
-                        k,
-                        fast_algo,
-                        exec_time);
-            }
-            printf("***cublasLt Gemm Testing End***\n");
-        }
-        else {
-            fprintf(fd,
-                    "%d %d %d %d %d ### %d %d %d %d %d -1 -1 -1 -1 -1 -1 -1 "
-#if (CUBLAS_VER_MAJOR == 11 && CUBLAS_VER_MINOR == 11 && CUBLAS_VER_PATCH >= 3)
-                    "-1 -1 "
-#elif (CUBLAS_VER_MAJOR == 11 && CUBLAS_VER_MINOR == 11 && CUBLAS_VER_PATCH < 3)
-                    "-1 -1 -1 "
-#endif
-                    "%f\n",
-                    batch_size,
-                    seq_len,
-                    head_num,
-                    size_per_head,
-                    data_type,
-                    batchCount[i],
-                    n,
-                    m,
-                    k,
-                    fast_algo,
-                    exec_time);
-        }  // end else fp16
-    }      // end i
-    printf("***cublas Gemm Testing End***\n\n");
-    fclose(fd);
-    printf("***Xlnet Gemm Testing End***\n");
-
-    return;
-}
-
-template void generate_xlnet_gemm_config<float>(int   batch_size,
-                                                int   seq_len,
-                                                int   head_num,
-                                                int   size_per_head,
-                                                int   hidden_units_,
-                                                int   inter_size_,
-                                                void* buffer_in,
-                                                bool  isAppend);
-template void generate_xlnet_gemm_config<half>(int   batch_size,
-                                               int   seq_len,
-                                               int   head_num,
-                                               int   size_per_head,
-                                               int   hidden_units_,
-                                               int   inter_size_,
-                                               void* buffer_in,
-                                               bool  isAppend);
-#ifdef ENABLE_BF16
-template void generate_xlnet_gemm_config<__nv_bfloat16>(int   batch_size,
-                                                        int   seq_len,
-                                                        int   head_num,
-                                                        int   size_per_head,
-                                                        int   hidden_units_,
-                                                        int   inter_size_,
-                                                        void* buffer_in,
-                                                        bool  isAppend);
-#endif
-
-}  // namespace turbomind
diff --git a/src/turbomind/utils/gemm_test/xlnet_gemm_func.h b/src/turbomind/utils/gemm_test/xlnet_gemm_func.h
deleted file mode 100644
index 240805af4b667cf077ec8e4f8267dda16015fb14..0000000000000000000000000000000000000000
--- a/src/turbomind/utils/gemm_test/xlnet_gemm_func.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * Copyright (c) 2020-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "src/turbomind/utils/cublasAlgoMap.h"
-#include "src/turbomind/utils/cuda_bf16_wrapper.h"
-#include "src/turbomind/utils/cuda_utils.h"
-#include "src/turbomind/utils/gemm_test/gemm_func.h"
-
-#include <cstdio>
-#include <cstdlib>
-#include <ctime>
-#include <cuda_fp16.h>
-#include <cuda_profiler_api.h>
-#include <map>
-#ifdef __linux__
-#include <sys/time.h>
-#include <unistd.h>
-#endif
-#include <vector>
-
-namespace turbomind {
-
-template<typename T>
-void generate_xlnet_gemm_config(int   batch_size,
-                                int   seq_len,
-                                int   head_num,
-                                int   size_per_head,
-                                int   hidden_units_,
-                                int   inter_size_,
-                                void* buffer_in,
-                                bool  isAppend = true);
-
-}  // namespace turbomind
diff --git a/src/turbomind/utils/gpu_buf.h b/src/turbomind/utils/gpu_buf.h
deleted file mode 100644
index 7258afaf00ebc5c20f936470300837e1ead2a209..0000000000000000000000000000000000000000
--- a/src/turbomind/utils/gpu_buf.h
+++ /dev/null
@@ -1,87 +0,0 @@
-/*
- * Copyright (c) 2019-2021, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "cuda_fp16.h"
-#include "src/turbomind/utils/cuda_fp8_utils.h"
-#include "src/turbomind/utils/memory_utils.h"
-
-#include <cstdlib>
-#include <stdexcept>
-#include <type_traits>
-#include <vector>
-
-namespace turbomind {
-
-template<typename T>
-class GPUBuf {
-public:
-    GPUBuf(size_t size, bool random_init = true): size(size), ptr(nullptr)
-    {
-        deviceMalloc(&ptr, size, random_init);
-    }
-    template<typename T2>
-    GPUBuf(const GPUBuf<T2>& buf_src): size(buf_src.size), ptr(nullptr)
-    {
-        deviceMalloc(&ptr, size, false);
-        set(buf_src);
-    }
-
-    template<typename T2>
-    void set(const GPUBuf<T2>& buf_src)
-    {
-        if (std::is_same<T, T2>::value) {
-            cudaD2Dcpy(ptr, reinterpret_cast<T*>(buf_src.ptr), size);
-        }
-        else {
-            invokeCudaCast(ptr, buf_src.ptr, size, 0);
-        }
-    }
-
-    void set(const T* h_ptr)
-    {
-        cudaH2Dcpy(ptr, h_ptr, size);
-    }
-
-    void to_host(T* h_ptr) const
-    {
-        cudaD2Hcpy(h_ptr, ptr, size);
-    }
-
-    std::vector<T> to_host_vec() const
-    {
-        std::vector<T> host_vec(size);
-        cudaD2Hcpy(host_vec.data(), ptr, size);
-        return host_vec;
-    }
-
-    void zero()
-    {
-        deviceMemSetZero(ptr, size);
-    }
-
-    ~GPUBuf()
-    {
-        if (ptr != nullptr)
-            cudaFree(ptr);
-    }
-
-    size_t size;
-    T*     ptr;
-};
-
-}  // namespace turbomind
diff --git a/src/turbomind/utils/instance_comm.h b/src/turbomind/utils/instance_comm.h
deleted file mode 100644
index 5a25360a05a332386016bfc2b915508549e1dc13..0000000000000000000000000000000000000000
--- a/src/turbomind/utils/instance_comm.h
+++ /dev/null
@@ -1,16 +0,0 @@
-#pragma once
-
-namespace turbomind {
-
-class AbstractInstanceComm {
-public:
-    virtual ~AbstractInstanceComm() = default;
-
-    virtual void barrier() = 0;
-
-    virtual void setSharedObject(void*) = 0;
-
-    virtual void* getSharedObject() = 0;
-};
-
-}  // namespace turbomind
diff --git a/src/turbomind/utils/logger.cc b/src/turbomind/utils/logger.cc
deleted file mode 100644
index 9788a8fad7609fe14c6f7f3da7fad767a9c4d992..0000000000000000000000000000000000000000
--- a/src/turbomind/utils/logger.cc
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "src/turbomind/utils/logger.h"
-#include <cuda_runtime.h>
-
-namespace turbomind {
-
-Logger::Logger()
-{
-    char* is_first_rank_only_char = std::getenv("TM_LOG_FIRST_RANK_ONLY");
-    bool  is_first_rank_only =
-        (is_first_rank_only_char != nullptr && std::string(is_first_rank_only_char) == "ON") ? true : false;
-
-    int device_id;
-    cudaGetDevice(&device_id);
-
-    char* level_name = std::getenv("TM_LOG_LEVEL");
-    if (level_name != nullptr) {
-        std::map<std::string, Level> name_to_level = {
-            {"TRACE", TRACE},
-            {"DEBUG", DEBUG},
-            {"INFO", INFO},
-            {"WARNING", WARNING},
-            {"ERROR", ERROR},
-        };
-        auto level = name_to_level.find(level_name);
-        // If TM_LOG_FIRST_RANK_ONLY=ON, set LOG LEVEL of other device to ERROR
-        if (is_first_rank_only && device_id != 0) {
-            level = name_to_level.find("ERROR");
-        }
-        if (level != name_to_level.end()) {
-            setLevel(level->second);
-        }
-        else {
-            fprintf(stderr,
-                    "[TM][WARNING] Invalid logger level TM_LOG_LEVEL=%s. "
-                    "Ignore the environment variable and use a default "
-                    "logging level.\n",
-                    level_name);
-            level_name = nullptr;
-        }
-    }
-}
-
-}  // namespace turbomind
diff --git a/src/turbomind/utils/logger.h b/src/turbomind/utils/logger.h
deleted file mode 100644
index 3e9f25e9e9724b90ce694765c5f509ef2457edab..0000000000000000000000000000000000000000
--- a/src/turbomind/utils/logger.h
+++ /dev/null
@@ -1,128 +0,0 @@
-/*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include <cstdlib>
-#include <map>
-#include <string>
-
-#include "src/turbomind/utils/string_utils.h"
-
-namespace turbomind {
-
-// cub.cuh brings windows.h
-// should be included after cub.cuh
-#ifdef ERROR
-#undef ERROR
-#endif
-
-class Logger {
-
-public:
-    enum Level
-    {
-        TRACE   = 0,
-        DEBUG   = 10,
-        INFO    = 20,
-        WARNING = 30,
-        ERROR   = 40
-    };
-
-    static Logger& getLogger()
-    {
-        thread_local Logger instance;
-        return instance;
-    }
-    Logger(Logger const&) = delete;
-    void operator=(Logger const&) = delete;
-
-    template<typename... Args>
-    void log(const Level level, const std::string format, const Args&... args)
-    {
-        if (level_ <= level) {
-            std::string fmt = getPrefix(level) + format + "\n";
-            // FILE*       out    = level_ < WARNING ? stdout : stderr;
-            std::string logstr = fmtstr(fmt, args...);
-            fprintf(stderr, "%s", logstr.c_str());
-        }
-    }
-
-    template<typename... Args>
-    void log(const Level level, const int rank, const std::string format, const Args&... args)
-    {
-        if (level_ <= level) {
-            std::string fmt = getPrefix(level, rank) + format + "\n";
-            // FILE*       out    = level_ < WARNING ? stdout : stderr;
-            std::string logstr = fmtstr(fmt, args...);
-            fprintf(stderr, "%s", logstr.c_str());
-        }
-    }
-
-    void setLevel(const Level level)
-    {
-        level_ = level;
-        log(INFO, "Set logger level by %s", getLevelName(level).c_str());
-    }
-
-    int getLevel() const
-    {
-        return level_;
-    }
-
-private:
-    const std::string                              PREFIX      = "[TM]";
-    const std::map<const Level, const std::string> level_name_ = {
-        {TRACE, "TRACE"}, {DEBUG, "DEBUG"}, {INFO, "INFO"}, {WARNING, "WARNING"}, {ERROR, "ERROR"}};
-
-#ifndef NDEBUG
-    const Level DEFAULT_LOG_LEVEL = DEBUG;
-#else
-    const Level DEFAULT_LOG_LEVEL = INFO;
-#endif
-    Level level_ = DEFAULT_LOG_LEVEL;
-
-    Logger();
-
-    inline const std::string getLevelName(const Level level)
-    {
-        return level_name_.at(level);
-    }
-
-    inline const std::string getPrefix(const Level level)
-    {
-        return PREFIX + "[" + getLevelName(level) + "] ";
-    }
-
-    inline const std::string getPrefix(const Level level, const int rank)
-    {
-        return PREFIX + "[" + getLevelName(level) + "][" + std::to_string(rank) + "] ";
-    }
-};
-
-#define TM_LOG(level, ...)                                                                                             \
-    do {                                                                                                               \
-        if (turbomind::Logger::getLogger().getLevel() <= level) {                                                      \
-            turbomind::Logger::getLogger().log(level, __VA_ARGS__);                                                    \
-        }                                                                                                              \
-    } while (0)
-
-#define TM_LOG_TRACE(...) TM_LOG(turbomind::Logger::TRACE, __VA_ARGS__)
-#define TM_LOG_DEBUG(...) TM_LOG(turbomind::Logger::DEBUG, __VA_ARGS__)
-#define TM_LOG_INFO(...) TM_LOG(turbomind::Logger::INFO, __VA_ARGS__)
-#define TM_LOG_WARNING(...) TM_LOG(turbomind::Logger::WARNING, __VA_ARGS__)
-#define TM_LOG_ERROR(...) TM_LOG(turbomind::Logger::ERROR, __VA_ARGS__)
-}  // namespace turbomind
diff --git a/src/turbomind/utils/memory_utils.cu b/src/turbomind/utils/memory_utils.cu
deleted file mode 100644
index 93547f364f7c648d8c1e04f278aa033aedc42416..0000000000000000000000000000000000000000
--- a/src/turbomind/utils/memory_utils.cu
+++ /dev/null
@@ -1,1000 +0,0 @@
-/*
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "src/turbomind/macro.h"
-#include "src/turbomind/utils/Tensor.h"
-#include "src/turbomind/utils/cuda_type_utils.cuh"
-#include "src/turbomind/utils/logger.h"
-#include "src/turbomind/utils/memory_utils.h"
-#include <curand_kernel.h>
-#include <sys/stat.h>
-#include <unordered_map>
-
-namespace turbomind {
-
-template<typename T>
-void deviceMalloc(T** ptr, size_t size, bool is_random_initialize)
-{
-    FT_CHECK_WITH_INFO(size >= ((size_t)0), "Ask deviceMalloc size " + std::to_string(size) + "< 0 is invalid.");
-    check_cuda_error(cudaMalloc((void**)(ptr), sizeof(T) * size));
-    if (is_random_initialize) {
-        cudaRandomUniform(*ptr, size);
-    }
-}
-
-template void deviceMalloc(float** ptr, size_t size, bool is_random_initialize);
-template void deviceMalloc(half** ptr, size_t size, bool is_random_initialize);
-#ifdef ENABLE_BF16
-template void deviceMalloc(__nv_bfloat16** ptr, size_t size, bool is_random_initialize);
-#endif
-template void deviceMalloc(uint16_t** ptr, size_t size, bool is_random_initialize);
-template void deviceMalloc(int** ptr, size_t size, bool is_random_initialize);
-template void deviceMalloc(bool** ptr, size_t size, bool is_random_initialize);
-template void deviceMalloc(char** ptr, size_t size, bool is_random_initialize);
-template void deviceMalloc(int8_t** ptr, size_t size, bool is_random_initialize);
-#ifdef ENABLE_FP8
-template void deviceMalloc(__nv_fp8_e4m3** ptr, size_t size, bool is_random_initialize);
-#endif
-
-template<typename T>
-void deviceMemSetZero(T* ptr, size_t size)
-{
-    check_cuda_error(cudaMemset(static_cast<void*>(ptr), 0, sizeof(T) * size));
-}
-
-template void deviceMemSetZero(float* ptr, size_t size);
-template void deviceMemSetZero(half* ptr, size_t size);
-template void deviceMemSetZero(int* ptr, size_t size);
-template void deviceMemSetZero(uint32_t* ptr, size_t size);
-template void deviceMemSetZero(bool* ptr, size_t size);
-#ifdef ENABLE_FP8
-template void deviceMemSetZero(__nv_fp8_e4m3* ptr, size_t size);
-#endif
-#ifdef ENABLE_BF16
-template void deviceMemSetZero(__nv_bfloat16* ptr, size_t size);
-#endif
-
-template<typename T>
-void deviceFree(T*& ptr)
-{
-    if (ptr != NULL) {
-        check_cuda_error(cudaFree(ptr));
-        ptr = NULL;
-    }
-}
-
-template void deviceFree(float*& ptr);
-template void deviceFree(half*& ptr);
-#ifdef ENABLE_BF16
-template void deviceFree(__nv_bfloat16*& ptr);
-#endif
-template void deviceFree(unsigned short*& ptr);
-template void deviceFree(int*& ptr);
-template void deviceFree(bool*& ptr);
-template void deviceFree(char*& ptr);
-template void deviceFree(int8_t*& ptr);
-#ifdef ENABLE_FP8
-template void deviceFree(__nv_fp8_e4m3*& ptr);
-#endif
-
-template<typename T>
-void deviceFill(T* devptr, size_t size, T value, cudaStream_t stream)
-{
-    T* arr = new T[size];
-    std::fill(arr, arr + size, value);
-    check_cuda_error(cudaMemcpyAsync(devptr, arr, sizeof(T) * size, cudaMemcpyHostToDevice, stream));
-    delete[] arr;
-}
-
-template void deviceFill(float* devptr, size_t size, float value, cudaStream_t stream);
-template void deviceFill(half* devptr, size_t size, half value, cudaStream_t stream);
-#ifdef ENABLE_BF16
-template void deviceFill(__nv_bfloat16* devptr, size_t size, __nv_bfloat16 value, cudaStream_t stream);
-#endif
-template void deviceFill(int* devptr, size_t size, int value, cudaStream_t stream);
-template void deviceFill(bool* devptr, size_t size, bool value, cudaStream_t stream);
-
-template<typename T>
-void cudaD2Hcpy(T* tgt, const T* src, const size_t size)
-{
-    check_cuda_error(cudaMemcpy(tgt, src, sizeof(T) * size, cudaMemcpyDeviceToHost));
-}
-
-template void cudaD2Hcpy(float* tgt, const float* src, size_t size);
-template void cudaD2Hcpy(half* tgt, const half* src, size_t size);
-#ifdef ENABLE_BF16
-template void cudaD2Hcpy(__nv_bfloat16* tgt, const __nv_bfloat16* src, size_t size);
-#endif
-template void cudaD2Hcpy(int* tgt, const int* src, size_t size);
-template void cudaD2Hcpy(bool* tgt, const bool* src, size_t size);
-#ifdef ENABLE_FP8
-template void cudaD2Hcpy(__nv_fp8_e4m3* tgt, const __nv_fp8_e4m3* src, size_t size);
-#endif
-template void cudaD2Hcpy(unsigned long long* tgt, const unsigned long long* src, size_t size);
-template void cudaD2Hcpy(unsigned int* tgt, const unsigned int* src, size_t size);
-template void cudaD2Hcpy(int8_t* tgt, const int8_t* src, size_t size);
-
-template<typename T>
-void cudaH2Dcpy(T* tgt, const T* src, const size_t size)
-{
-    if (tgt == nullptr || src == nullptr) {
-        TM_LOG_ERROR("cudaH2Dcpy: dst=%p src=%p, size=%d", tgt, src, (int)(sizeof(T) * size));
-    }
-    check_cuda_error(cudaMemcpy(tgt, src, sizeof(T) * size, cudaMemcpyHostToDevice));
-}
-
-template void cudaH2Dcpy(float* tgt, const float* src, size_t size);
-template void cudaH2Dcpy(half* tgt, const half* src, size_t size);
-#ifdef ENABLE_BF16
-template void cudaH2Dcpy(__nv_bfloat16* tgt, const __nv_bfloat16* src, size_t size);
-#endif
-template void cudaH2Dcpy(int* tgt, const int* src, size_t size);
-template void cudaH2Dcpy(bool* tgt, const bool* src, size_t size);
-#ifdef ENABLE_FP8
-template void cudaH2Dcpy(__nv_fp8_e4m3* tgt, const __nv_fp8_e4m3* src, size_t size);
-#endif
-template void cudaH2Dcpy(unsigned long long* tgt, const unsigned long long* src, size_t size);
-template void cudaH2Dcpy(unsigned int* tgt, const unsigned int* src, size_t size);
-template void cudaH2Dcpy(int8_t* tgt, const int8_t* src, size_t size);
-
-template<typename T>
-void cudaD2Dcpy(T* tgt, const T* src, const size_t size)
-{
-    check_cuda_error(cudaMemcpy(tgt, src, sizeof(T) * size, cudaMemcpyDeviceToDevice));
-}
-
-template void cudaD2Dcpy(float* tgt, const float* src, size_t size);
-template void cudaD2Dcpy(half* tgt, const half* src, size_t size);
-#ifdef ENABLE_BF16
-template void cudaD2Dcpy(__nv_bfloat16* tgt, const __nv_bfloat16* src, size_t size);
-#endif
-template void cudaD2Dcpy(int* tgt, const int* src, size_t size);
-template void cudaD2Dcpy(bool* tgt, const bool* src, size_t size);
-template void cudaD2Dcpy(int8_t* tgt, const int8_t* src, size_t size);
-#ifdef ENABLE_FP8
-template void cudaD2Dcpy(__nv_fp8_e4m3* tgt, const __nv_fp8_e4m3* src, size_t size);
-#endif
-template void cudaD2Dcpy(unsigned long long* tgt, const unsigned long long* src, size_t size);
-
-template<typename T_OUT, typename T_IN>
-__global__ void cudaCast(T_OUT* dst, T_IN* src, const size_t size)
-{
-    for (size_t tid = threadIdx.x + blockIdx.x * blockDim.x; tid < size; tid += blockDim.x * gridDim.x) {
-        dst[tid] = (T_OUT)((float)(src[tid]));
-    }
-}
-
-template<typename T_OUT, typename T_IN>
-void invokeCudaCast(T_OUT* dst, T_IN const* const src, const size_t size, cudaStream_t stream)
-{
-    cudaCast<<<256, 256, 0, stream>>>(dst, src, size);
-}
-
-template void invokeCudaCast(float* dst, half const* const src, const size_t size, cudaStream_t stream);
-#ifdef ENABLE_BF16
-template void invokeCudaCast(float* dst, __nv_bfloat16 const* const src, const size_t size, cudaStream_t stream);
-template void invokeCudaCast(__nv_bfloat16* dst, float const* const src, const size_t size, cudaStream_t stream);
-template void invokeCudaCast(__nv_bfloat16* dst, half const* const src, const size_t size, cudaStream_t stream);
-template void invokeCudaCast(half* dst, __nv_bfloat16 const* const src, const size_t size, cudaStream_t stream);
-#endif
-#ifdef ENABLE_FP8
-template void invokeCudaCast(float* dst, __nv_fp8_e4m3 const* const src, const size_t size, cudaStream_t stream);
-template void
-invokeCudaCast(__nv_bfloat16* dst, __nv_fp8_e4m3 const* const src, const size_t size, cudaStream_t stream);
-template void invokeCudaCast(half* dst, __nv_fp8_e4m3 const* const src, const size_t size, cudaStream_t stream);
-template void invokeCudaCast(__nv_fp8_e4m3* dst, float const* const src, const size_t size, cudaStream_t stream);
-template void
-invokeCudaCast(__nv_fp8_e4m3* dst, __nv_bfloat16 const* const src, const size_t size, cudaStream_t stream);
-template void invokeCudaCast(__nv_fp8_e4m3* dst, half const* const src, const size_t size, cudaStream_t stream);
-#endif
-
-template<typename T>
-void cudaAutoCpy(T* tgt, const T* src, const size_t size, cudaStream_t stream)
-{
-    if (stream != NULL) {
-        check_cuda_error(cudaMemcpyAsync(tgt, src, sizeof(T) * size, cudaMemcpyDefault, stream));
-    }
-    else {
-        check_cuda_error(cudaMemcpy(tgt, src, sizeof(T) * size, cudaMemcpyDefault));
-    }
-}
-
-template void cudaAutoCpy(float* tgt, const float* src, size_t size, cudaStream_t stream);
-template void cudaAutoCpy(half* tgt, const half* src, size_t size, cudaStream_t stream);
-#ifdef ENABLE_BF16
-template void cudaAutoCpy(__nv_bfloat16* tgt, const __nv_bfloat16* src, size_t size, cudaStream_t stream);
-#endif
-template void cudaAutoCpy(int* tgt, const int* src, size_t size, cudaStream_t stream);
-template void cudaAutoCpy(bool* tgt, const bool* src, size_t size, cudaStream_t stream);
-template void cudaAutoCpy(int8_t* tgt, const int8_t* src, size_t size, cudaStream_t stream);
-template void cudaAutoCpy(uint* tgt, const uint* src, size_t size, cudaStream_t stream);
-template void cudaAutoCpy(unsigned long long* tgt, const unsigned long long* src, size_t size, cudaStream_t stream);
-template void cudaAutoCpy(char* tgt, const char* src, size_t size, cudaStream_t stream);
-
-template void cudaAutoCpy(float const** tgt, float const* const* src, size_t size, cudaStream_t stream);
-template void cudaAutoCpy(half const** tgt, half const* const* src, size_t size, cudaStream_t stream);
-#ifdef ENABLE_BF16
-template void cudaAutoCpy(__nv_bfloat16 const** tgt, __nv_bfloat16 const* const* src, size_t size, cudaStream_t stream);
-#endif
-template void cudaAutoCpy(int const** tgt, int const* const* src, size_t size, cudaStream_t stream);
-template void cudaAutoCpy(bool const** tgt, bool const* const* src, size_t size, cudaStream_t stream);
-template void cudaAutoCpy(int8_t const** tgt, int8_t const* const* src, size_t size, cudaStream_t stream);
-template void
-cudaAutoCpy(unsigned long long const** tgt, unsigned long long const* const* src, size_t size, cudaStream_t stream);
-
-template<typename T>
-__global__ void cuda_random_uniform_kernel(T* buffer, const size_t size, const int seq_offset)
-{
-    const int     idx = blockIdx.x * blockDim.x + threadIdx.x;
-    curandState_t local_state;
-    curand_init((unsigned long long int)1337, idx + seq_offset, 0, &local_state);
-    for (size_t index = idx; index < size; index += blockDim.x * gridDim.x) {
-        buffer[index] = (T)(curand_uniform(&local_state) * 0.2f - 0.1f);
-    }
-}
-
-template<>
-__global__ void cuda_random_uniform_kernel<int>(int* buffer, const size_t size, const int seq_offset)
-{
-    const int     idx = blockIdx.x * blockDim.x + threadIdx.x;
-    curandState_t local_state;
-    curand_init((float)1337.f, idx + seq_offset, 0, &local_state);
-    for (size_t index = idx; index < size; index += blockDim.x * gridDim.x) {
-        buffer[index] = curand(&local_state);
-    }
-}
-
-template<>
-__global__ void cuda_random_uniform_kernel<bool>(bool* buffer, const size_t size, const int seq_offset)
-{
-    const int     idx = blockIdx.x * blockDim.x + threadIdx.x;
-    curandState_t local_state;
-    curand_init((float)1337.f, idx + seq_offset, 0, &local_state);
-    for (size_t index = idx; index < size; index += blockDim.x * gridDim.x) {
-        buffer[index] = (curand(&local_state) % 2 == 0);
-    }
-}
-
-template<>
-__global__ void cuda_random_uniform_kernel<char>(char* buffer, const size_t size, const int seq_offset)
-{
-    const int     idx = blockIdx.x * blockDim.x + threadIdx.x;
-    curandState_t local_state;
-    curand_init((float)1337.f, idx + seq_offset, 0, &local_state);
-    for (size_t index = idx; index < size; index += blockDim.x * gridDim.x) {
-        buffer[index] = curand(&local_state) % 0xFF;
-    }
-}
-
-template<typename T>
-void cudaRandomUniform(T* buffer, const size_t size)
-{
-    static int seq_offset = 0;
-    cuda_random_uniform_kernel<T><<<256, 256>>>(buffer, size, seq_offset);
-    seq_offset += 256 * 256;
-}
-
-template void cudaRandomUniform(float* buffer, const size_t size);
-template void cudaRandomUniform(half* buffer, const size_t size);
-#ifdef ENABLE_BF16
-template void cudaRandomUniform(__nv_bfloat16* buffer, const size_t size);
-#endif
-template void cudaRandomUniform(int* buffer, const size_t size);
-template void cudaRandomUniform(bool* buffer, const size_t size);
-template void cudaRandomUniform(char* buffer, const size_t size);
-#ifdef ENABLE_FP8
-template void cudaRandomUniform(__nv_fp8_e4m3* buffer, const size_t size);
-#endif
-
-// loads data from binary file. If it succeeds, returns a non-empty vector. If loading fails or
-// the product of the elements in shape is 0, this function will return an empty vector.
-template<typename T>
-std::vector<T>
-loadWeightFromBinHelper(std::vector<size_t> shape, std::string filename, std::vector<ConcateSlice> slices = {})
-{
-    if (shape.size() > 2) {
-        printf("[ERROR] shape should have less than two dims \n");
-        return std::vector<T>();
-    }
-
-    size_t dim0 = shape[0], dim1 = 1;
-    if (shape.size() == 2) {
-        dim1 = shape[1];
-    }
-
-    if (slices.size() == 0) {
-        size_t size = dim0 * dim1;
-        if (size == 0) {
-            TM_LOG_WARNING("shape is zero, skip loading weight from file %s \n", filename.c_str());
-            return std::vector<T>();
-        }
-
-        std::vector<T> host_array(size);
-        std::ifstream  in(filename, std::ios::in | std::ios::binary);
-        if (!in.is_open()) {
-            TM_LOG_WARNING("file %s cannot be opened, loading model fails! \n", filename.c_str());
-            return std::vector<T>();
-        }
-
-        size_t loaded_data_size = sizeof(T) * size;
-        in.seekg(0, in.end);
-        const auto file_size_in_bytes = (size_t)in.tellg();
-        in.seekg(0, in.beg);
-
-        TM_LOG_DEBUG("Read " + std::to_string(loaded_data_size) + " bytes from " + filename);
-        in.read((char*)host_array.data(), loaded_data_size);
-
-        if (file_size_in_bytes != loaded_data_size) {
-            TM_LOG_WARNING("file %s has %ld, but request %ld, loading model fails!",
-                           filename.c_str(),
-                           file_size_in_bytes,
-                           loaded_data_size);
-            return std::vector<T>();
-        }
-        in.close();
-        // If we succeed, return an array with values.
-        return host_array;
-    }
-    else {
-        // concate all slices on the same dims
-
-        if (slices.size() != shape.size()) {
-            printf("[ERROR] slices should have same dims as shape \n");
-            return std::vector<T>();
-        }
-
-        // get slices
-        ConcateSlice slice0{{{0, dim0}}};
-        ConcateSlice slice1{{{0, dim1}}};
-        if (slices.size() > 0 && slices[0].slices.size() > 0) {
-            slice0 = slices[0];
-        }
-        if (shape.size() == 2 && slices[1].slices.size() > 0) {
-            slice1 = slices[1];
-        }
-
-        size_t w0 = 0;
-        for (auto& s : slice0.slices) {
-            if (s.second > dim0) {
-                s.second = dim0;
-            }
-            if (s.second < s.first) {
-                printf("[ERROR] slice0: end < start \n");
-                return std::vector<T>();
-            }
-            w0 += s.second - s.first;
-        }
-
-        size_t w1 = 0;
-        for (auto& s : slice1.slices) {
-            if (s.second > dim1) {
-                s.second = dim1;
-            }
-            if (s.second < s.first) {
-                printf("[ERROR] slice1: end < start \n");
-                return std::vector<T>();
-            }
-            w1 += s.second - s.first;
-        }
-
-        size_t size             = w0 * w1;
-        size_t loaded_data_size = size * sizeof(T);
-
-        TM_LOG_DEBUG("Read " + std::to_string(loaded_data_size) + " bytes from " + filename + " with slice.");
-        if (size == 0) {
-            TM_LOG_WARNING("shape is zero, skip loading weight from file %s \n", filename.c_str());
-            return std::vector<T>();
-        }
-
-        std::vector<T> host_array(size);
-        std::ifstream  in(filename, std::ios::in | std::ios::binary);
-        if (!in.is_open()) {
-            TM_LOG_WARNING("file %s cannot be opened, loading model fails! \n", filename.c_str());
-            return std::vector<T>();
-        }
-
-        char* host_ptr = (char*)host_array.data();
-        if (slice1.slices.size() == 0
-            || (slice1.slices.size() == 1 && slice1.slices[0].second - slice1.slices[0].first == dim1)) {
-            for (auto& s : slice0.slices) {
-                size_t read_size = (s.second - s.first) * dim1 * sizeof(T);
-                size_t pos       = s.first * dim1;
-                in.seekg(pos * sizeof(T));
-                in.read((char*)host_ptr, read_size);
-                host_ptr += read_size;
-            }
-            in.close();
-            return host_array;
-        }
-
-        {
-            for (auto& s0 : slice0.slices) {
-                // loop over outer slice
-                for (size_t line_id = s0.first; line_id < s0.second; ++line_id) {
-                    // loop over lines
-                    size_t pos0 = line_id * dim1;
-                    for (auto& s1 : slice1.slices) {
-                        // loop over inner slice
-                        size_t pos       = pos0 + s1.first;
-                        size_t read_size = (s1.second - s1.first) * sizeof(T);
-                        in.seekg(pos * sizeof(T));
-                        in.read(host_ptr, read_size);
-                        host_ptr += read_size;
-                    }
-                }
-            }
-            in.close();
-        }
-        return host_array;
-    }
-}
-
-std::vector<float> loadArrayFromBin(std::vector<size_t> shape, std::string filename, std::vector<ConcateSlice> slices)
-{
-    return loadWeightFromBinHelper<float>(shape, filename, slices);
-}
-
-template<typename T, typename T_IN>
-int loadWeightFromBinFunc(T*                        ptr,
-                          std::vector<size_t>       shape,
-                          std::string               filename,
-                          std::vector<ConcateSlice> slices = std::vector<ConcateSlice>())
-{
-    std::vector<T_IN> host_array = loadWeightFromBinHelper<T_IN>(shape, filename, slices);
-
-    if (host_array.empty()) {
-        return 0;
-    }
-
-    if (std::is_same<T, T_IN>::value == true) {
-        cudaH2Dcpy(ptr, (T*)host_array.data(), host_array.size());
-    }
-    else {
-        T_IN* ptr_2 = nullptr;
-        deviceMalloc(&ptr_2, host_array.size(), false);
-        cudaH2Dcpy(ptr_2, host_array.data(), host_array.size());
-        invokeCudaD2DcpyConvert(ptr, ptr_2, host_array.size());
-        deviceFree(ptr_2);
-    }
-    return 0;
-}
-
-template int loadWeightFromBinFunc<float, float>(float*                    ptr,
-                                                 std::vector<size_t>       shape,
-                                                 std::string               filename,
-                                                 std::vector<ConcateSlice> slices);
-template int loadWeightFromBinFunc<half, float>(half*                     ptr,
-                                                std::vector<size_t>       shape,
-                                                std::string               filename,
-                                                std::vector<ConcateSlice> slices);
-template int loadWeightFromBinFunc<float, half>(float*                    ptr,
-                                                std::vector<size_t>       shape,
-                                                std::string               filename,
-                                                std::vector<ConcateSlice> slices);
-template int loadWeightFromBinFunc<half, half>(half*                     ptr,
-                                               std::vector<size_t>       shape,
-                                               std::string               filename,
-                                               std::vector<ConcateSlice> slices);
-template int loadWeightFromBinFunc<int8_t, int8_t>(int8_t*                   ptr,
-                                                   std::vector<size_t>       shape,
-                                                   std::string               filename,
-                                                   std::vector<ConcateSlice> slices);
-#ifdef ENABLE_BF16
-template int loadWeightFromBinFunc<__nv_bfloat16, float>(__nv_bfloat16*            ptr,
-                                                         std::vector<size_t>       shape,
-                                                         std::string               filename,
-                                                         std::vector<ConcateSlice> slices);
-template int loadWeightFromBinFunc<__nv_bfloat16, half>(__nv_bfloat16*            ptr,
-                                                        std::vector<size_t>       shape,
-                                                        std::string               filename,
-                                                        std::vector<ConcateSlice> slices);
-template int loadWeightFromBinFunc<float, __nv_bfloat16>(float*                    ptr,
-                                                         std::vector<size_t>       shape,
-                                                         std::string               filename,
-                                                         std::vector<ConcateSlice> slices);
-template int loadWeightFromBinFunc<half, __nv_bfloat16>(half*                     ptr,
-                                                        std::vector<size_t>       shape,
-                                                        std::string               filename,
-                                                        std::vector<ConcateSlice> slices);
-template int loadWeightFromBinFunc<__nv_bfloat16, __nv_bfloat16>(__nv_bfloat16*            ptr,
-                                                                 std::vector<size_t>       shape,
-                                                                 std::string               filename,
-                                                                 std::vector<ConcateSlice> slices);
-#endif  // ENABLE_BF16
-template int loadWeightFromBinFunc<int, int>(int*                      ptr,
-                                             std::vector<size_t>       shape,
-                                             std::string               filename,
-                                             std::vector<ConcateSlice> slices);
-#ifdef ENABLE_FP8
-template int loadWeightFromBinFunc<__nv_fp8_e4m3, float>(__nv_fp8_e4m3*            ptr,
-                                                         std::vector<size_t>       shape,
-                                                         std::string               filename,
-                                                         std::vector<ConcateSlice> slices);
-#endif  // ENABLE_FP8
-
-template<typename T>
-int loadWeightFromBin(T*                        ptr,
-                      std::vector<size_t>       shape,
-                      std::string               filename,
-                      FtCudaDataType            model_file_type,
-                      std::vector<ConcateSlice> slices)
-{
-    switch (model_file_type) {
-        case FtCudaDataType::FP32:
-            loadWeightFromBinFunc<T, float>(ptr, shape, filename, slices);
-            break;
-        case FtCudaDataType::FP16:
-            loadWeightFromBinFunc<T, half>(ptr, shape, filename, slices);
-            break;
-        case FtCudaDataType::INT8:
-            loadWeightFromBinFunc<T, int8_t>(ptr, shape, filename, slices);
-            break;
-#ifdef ENABLE_BF16
-        case FtCudaDataType::BF16:
-            loadWeightFromBinFunc<T, __nv_bfloat16>(ptr, shape, filename, slices);
-            break;
-#endif
-#ifdef ENABLE_FP8
-        case FtCudaDataType::FP8:
-            loadWeightFromBinFunc<T, float>(ptr, shape, filename, slices);
-            break;
-#endif
-        default:
-            TM_LOG_ERROR("Does not support FtCudaDataType=%d", model_file_type);
-            FT_CHECK(false);
-    }
-    return 0;
-}
-
-template<>
-int loadWeightFromBin(int*                      ptr,
-                      std::vector<size_t>       shape,
-                      std::string               filename,
-                      FtCudaDataType            model_file_type,
-                      std::vector<ConcateSlice> slices)
-{
-    loadWeightFromBinFunc<int, int>(ptr, shape, filename, slices);
-    return 0;
-}
-
-template int loadWeightFromBin(float*                    ptr,
-                               std::vector<size_t>       shape,
-                               std::string               filename,
-                               FtCudaDataType            model_file_type,
-                               std::vector<ConcateSlice> slices);
-template int loadWeightFromBin(half*                     ptr,
-                               std::vector<size_t>       shape,
-                               std::string               filename,
-                               FtCudaDataType            model_file_type,
-                               std::vector<ConcateSlice> slices);
-template int loadWeightFromBin(int8_t*                   ptr,
-                               std::vector<size_t>       shape,
-                               std::string               filename,
-                               FtCudaDataType            model_file_type,
-                               std::vector<ConcateSlice> slices);
-#ifdef ENABLE_BF16
-template int loadWeightFromBin(__nv_bfloat16*            ptr,
-                               std::vector<size_t>       shape,
-                               std::string               filename,
-                               FtCudaDataType            model_file_type,
-                               std::vector<ConcateSlice> slices);
-#endif
-#ifdef ENABLE_FP8
-template int loadWeightFromBin(__nv_fp8_e4m3*            ptr,
-                               std::vector<size_t>       shape,
-                               std::string               filename,
-                               FtCudaDataType            model_file_type,
-                               std::vector<ConcateSlice> slices);
-#endif
-template int loadWeightFromBin(int*                      ptr,
-                               std::vector<size_t>       shape,
-                               std::string               filename,
-                               FtCudaDataType            model_file_type,
-                               std::vector<ConcateSlice> slices);
-
-template<typename T_IN, typename T_OUT>
-__global__ void cudaD2DcpyConvert(T_OUT* dst, const T_IN* src, const size_t size)
-{
-    for (size_t tid = threadIdx.x + blockIdx.x * blockDim.x; tid < size; tid += blockDim.x * gridDim.x) {
-        dst[tid] = cuda_cast<T_OUT>(src[tid]);
-    }
-}
-
-template<typename T_IN, typename T_OUT>
-void invokeCudaD2DcpyConvert(T_OUT* tgt, const T_IN* src, const size_t size, cudaStream_t stream)
-{
-    cudaD2DcpyConvert<<<256, 256, 0, stream>>>(tgt, src, size);
-}
-
-template void invokeCudaD2DcpyConvert(int8_t* tgt, const float* src, const size_t size, cudaStream_t stream);
-template void invokeCudaD2DcpyConvert(float* tgt, const int8_t* src, const size_t size, cudaStream_t stream);
-template void invokeCudaD2DcpyConvert(float* tgt, const int* src, const size_t size, cudaStream_t stream);
-template void invokeCudaD2DcpyConvert(half* tgt, const int* src, const size_t size, cudaStream_t stream);
-template void invokeCudaD2DcpyConvert(float* tgt, const float* src, const size_t size, cudaStream_t stream);
-template void invokeCudaD2DcpyConvert(half* tgt, const float* src, const size_t size, cudaStream_t stream);
-template void invokeCudaD2DcpyConvert(float* tgt, const half* src, const size_t size, cudaStream_t stream);
-template void invokeCudaD2DcpyConvert(uint* tgt, const int* src, const size_t size, cudaStream_t stream);
-template void invokeCudaD2DcpyConvert(int* tgt, const uint* src, const size_t size, cudaStream_t stream);
-template void invokeCudaD2DcpyConvert(int* tgt, const float* src, const size_t size, cudaStream_t stream);
-template void invokeCudaD2DcpyConvert(int* tgt, const half* src, const size_t size, cudaStream_t stream);
-
-#ifdef ENABLE_BF16
-template void invokeCudaD2DcpyConvert(__nv_bfloat16* tgt, const float* src, const size_t size, cudaStream_t stream);
-template void invokeCudaD2DcpyConvert(__nv_bfloat16* tgt, const int* src, const size_t size, cudaStream_t stream);
-template void invokeCudaD2DcpyConvert(float* tgt, const __nv_bfloat16* src, const size_t size, cudaStream_t stream);
-template void invokeCudaD2DcpyConvert(int* tgt, const __nv_bfloat16* src, const size_t size, cudaStream_t stream);
-#endif  // ENABLE_BF16
-
-template<typename T_IN, typename T_OUT>
-__global__ void
-cudaD2DScaleCpyConvert(T_OUT* dst, const T_IN* src, const float* scale, bool invert_scale, const size_t size)
-{
-    const float scale_value = invert_scale ? 1.0f / scale[0] : scale[0];
-    for (size_t tid = threadIdx.x + blockIdx.x * blockDim.x; tid < size; tid += blockDim.x * gridDim.x) {
-        dst[tid] = cuda_cast<T_OUT>(cuda_cast<float>(src[tid]) * scale_value);
-    }
-}
-
-template<typename T_IN, typename T_OUT>
-void invokeCudaD2DScaleCpyConvert(
-    T_OUT* tgt, const T_IN* src, const float* scale, bool invert_scale, const size_t size, cudaStream_t stream)
-{
-    cudaD2DScaleCpyConvert<<<256, 256, 0, stream>>>(tgt, src, scale, invert_scale, size);
-}
-
-// clang-format off
-template void invokeCudaD2DScaleCpyConvert(float* tgt, const int32_t* src, const float* scale, bool invert_scale, const size_t size, cudaStream_t stream);
-template void invokeCudaD2DScaleCpyConvert(int32_t* tgt, const float* src, const float* scale, bool invert_scale, const size_t size, cudaStream_t stream);
-template void invokeCudaD2DScaleCpyConvert(half* tgt, const int32_t* src, const float* scale, bool invert_scale, const size_t size, cudaStream_t stream);
-template void invokeCudaD2DScaleCpyConvert(int32_t* tgt, const half* src, const float* scale, bool invert_scale, const size_t size, cudaStream_t stream);
-#ifdef ENABLE_BF16
-template void invokeCudaD2DScaleCpyConvert(__nv_bfloat16* tgt, const int32_t* src, const float* scale, bool invert_scale, const size_t size, cudaStream_t stream);
-template void invokeCudaD2DScaleCpyConvert(int32_t* tgt, const __nv_bfloat16* src, const float* scale, bool invert_scale, const size_t size, cudaStream_t stream);
-#endif  // ENABLE_BF16
-#ifdef ENABLE_FP8
-template void invokeCudaD2DScaleCpyConvert(float* tgt, const __nv_fp8_e4m3* src, const float* scale, bool invert_scale, const size_t size, cudaStream_t stream);
-#endif  // ENABLE_FP8
-// clang-format on
-
-void invokeCudaD2DcpyHalf2Float(float* dst, half* src, const size_t size, cudaStream_t stream)
-{
-    invokeCudaD2DcpyConvert(dst, src, size, stream);
-}
-
-void invokeCudaD2DcpyFloat2Half(half* dst, float* src, const size_t size, cudaStream_t stream)
-{
-    invokeCudaD2DcpyConvert(dst, src, size, stream);
-}
-
-template<typename T>
-void saveToBinary(const T* ptr, const size_t size, std::string filename)
-{
-
-    std::vector<T> h_ptr(size);
-    cudaD2Hcpy(h_ptr.data(), ptr, size);
-    std::vector<float> float_ptr(size);
-    for (size_t i = 0; i < size; i++) {
-        float_ptr[i] = (float)h_ptr[i];
-    }
-
-    std::ofstream out(filename, std::ios::out | std::ios::binary);
-    FT_CHECK_WITH_INFO(out.is_open(), "Fail to open file " + filename);
-
-    out.write((char*)float_ptr.data(), size * sizeof(float));
-}
-
-template void saveToBinary(const float* ptr, const size_t size, std::string filename);
-template void saveToBinary(const half* ptr, const size_t size, std::string filename);
-#ifdef ENABLE_BF16
-template void saveToBinary(const __nv_bfloat16* ptr, const size_t size, std::string filename);
-#endif  // ENABLE_BF16
-
-template<>
-void saveToBinary(const int* ptr, const size_t size, std::string filename)
-{
-    std::vector<int> h_ptr(size);
-    cudaD2Hcpy(h_ptr.data(), ptr, size);
-    std::ofstream out(filename, std::ios::out | std::ios::binary);
-    FT_CHECK_WITH_INFO(out.is_open(), "Fail to open file " + filename);
-    out.write((char*)h_ptr.data(), size * sizeof(int));
-}
-
-template<typename T_IN, typename T_fake_type>
-__global__ void fakeCast(T_IN* input_ptr, const size_t size)
-{
-    for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < size; i += blockDim.x * gridDim.x) {
-        T_fake_type tmp_val = (T_fake_type)((float)input_ptr[i]);
-        input_ptr[i]        = (T_IN)((float)tmp_val);
-    }
-}
-
-template<typename T_IN, typename T_fake_type>
-void invokeFakeCast(T_IN* input_ptr, const size_t size, cudaStream_t stream)
-{
-    dim3 block(256);
-    dim3 grid((size + 255) / 256);
-    fakeCast<T_IN, T_fake_type><<<grid, block, 0, stream>>>(input_ptr, size);
-}
-
-#ifdef ENABLE_FP8
-__global__ void cudaD2Dcpyfp82Float(float* dst, __nv_fp8_e4m3* src, const size_t size)
-{
-    for (size_t tid = threadIdx.x + blockIdx.x * blockDim.x; tid < size; tid += blockDim.x * gridDim.x) {
-        dst[tid] = (float)(src[tid]);
-    }
-}
-
-void invokeCudaD2Dcpyfp82Float(float* dst, __nv_fp8_e4m3* src, const size_t size, cudaStream_t stream)
-{
-    cudaD2Dcpyfp82Float<<<256, 256, 0, stream>>>(dst, src, size);
-}
-
-__global__ void cudaD2Dcpyfp82Half(half* dst, __nv_fp8_e4m3* src, const size_t size)
-{
-    for (size_t tid = threadIdx.x + blockIdx.x * blockDim.x; tid < size; tid += blockDim.x * gridDim.x) {
-        dst[tid] = (half)((float)(src[tid]));
-    }
-}
-
-void invokeCudaD2Dcpyfp82Half(half* dst, __nv_fp8_e4m3* src, const size_t size, cudaStream_t stream)
-{
-    cudaD2Dcpyfp82Half<<<256, 256, 0, stream>>>(dst, src, size);
-}
-
-__global__ void cudaD2DcpyFloat2fp8(__nv_fp8_e4m3* dst, float* src, const size_t size)
-{
-    for (size_t tid = threadIdx.x + blockIdx.x * blockDim.x; tid < size; tid += blockDim.x * gridDim.x) {
-        dst[tid] = (__nv_fp8_e4m3)src[tid];
-    }
-}
-
-void invokeCudaD2DcpyFloat2fp8(__nv_fp8_e4m3* dst, float* src, const size_t size, cudaStream_t stream)
-{
-    cudaD2DcpyFloat2fp8<<<256, 256, 0, stream>>>(dst, src, size);
-}
-
-__global__ void cudaD2DcpyHalf2fp8(__nv_fp8_e4m3* dst, half* src, const size_t size)
-{
-    for (size_t tid = threadIdx.x + blockIdx.x * blockDim.x; tid < size; tid += blockDim.x * gridDim.x) {
-        dst[tid] = (__nv_fp8_e4m3)src[tid];
-    }
-}
-
-void invokeCudaD2DcpyHalf2fp8(__nv_fp8_e4m3* dst, half* src, const size_t size, cudaStream_t stream)
-{
-    cudaD2DcpyHalf2fp8<<<256, 256, 0, stream>>>(dst, src, size);
-}
-
-__global__ void cudaD2DcpyBfloat2fp8(__nv_fp8_e4m3* dst, __nv_bfloat16* src, const size_t size)
-{
-    for (size_t tid = threadIdx.x + blockIdx.x * blockDim.x; tid < size; tid += blockDim.x * gridDim.x) {
-        dst[tid] = (__nv_fp8_e4m3)src[tid];
-    }
-}
-
-void invokeCudaD2DcpyBfloat2fp8(__nv_fp8_e4m3* dst, __nv_bfloat16* src, const size_t size, cudaStream_t stream)
-{
-    cudaD2DcpyBfloat2fp8<<<256, 256, 0, stream>>>(dst, src, size);
-}
-
-#endif  // ENABLE_FP8
-
-template<typename T_OUT, typename T_IN>
-__global__ void transpose(T_OUT* dst, T_IN* src, const int dim0, const int dim1)
-{
-    for (size_t tid = threadIdx.x + blockIdx.x * blockDim.x; tid < dim0 * dim1; tid += blockDim.x * gridDim.x) {
-        const int src_col_id                = tid % dim1;
-        const int src_row_id                = tid / dim1;
-        dst[src_col_id * dim0 + src_row_id] = (T_OUT)(src[tid]);
-    }
-}
-
-template<typename T>
-void invokeInPlaceTranspose(T* data, T* workspace, const int dim0, const int dim1)
-{
-    // copy data to workspace, and then transpose from workspace to data
-    cudaD2Dcpy(workspace, data, dim0 * dim1);
-    transpose<<<256, 256>>>(data, workspace, dim0, dim1);
-}
-
-#ifdef ENABLE_FP8
-template void invokeInPlaceTranspose(__nv_fp8_e4m3* data, __nv_fp8_e4m3* workspace, const int dim0, const int dim1);
-#endif  // ENABLE_FP8
-#ifdef ENABLE_BF16
-template void invokeInPlaceTranspose(__nv_bfloat16* data, __nv_bfloat16* workspace, const int dim0, const int dim1);
-#endif  // ENABLE_BF16
-template void invokeInPlaceTranspose(float* data, float* workspace, const int dim0, const int dim1);
-
-template<typename T_OUT, typename T_IN>
-__global__ void transpose0213(T_OUT* dst, T_IN* src, const int dim0, const int dim1, const int dim2, const int dim3)
-{
-    // src permutation: [0, 1, 2, 3]
-    // dst permutation: [0, 2, 1, 3]
-    for (size_t tid = threadIdx.x + blockIdx.x * blockDim.x; tid < dim0 * dim1 * dim2 * dim3;
-         tid += blockDim.x * gridDim.x) {
-        int       tmp_idx   = tid;
-        const int dim_3_idx = tmp_idx % dim3;
-        tmp_idx             = (tmp_idx - dim_3_idx) / dim3;
-        const int dim_2_idx = tmp_idx % dim2;
-        tmp_idx             = (tmp_idx - dim_2_idx) / dim2;
-        const int dim_1_idx = tmp_idx % dim1;
-        tmp_idx             = (tmp_idx - dim_1_idx) / dim1;
-        const int dim_0_idx = tmp_idx % dim0;
-        dst[dim_0_idx * dim1 * dim2 * dim3 + dim_2_idx * dim1 * dim3 + dim_1_idx * dim3 + dim_3_idx] = src[tid];
-    }
-}
-
-template<typename T>
-void invokeInPlaceTranspose0213(T* data, T* workspace, const int dim0, const int dim1, const int dim2, const int dim3)
-{
-    // copy data to workspace, and then transpose from workspace to data
-    // Note that this kernel is used for pre-processing and not very efficient.
-    cudaD2Dcpy(workspace, data, dim0 * dim1 * dim2 * dim3);
-    transpose0213<<<256, 256>>>(data, workspace, dim0, dim1, dim2, dim3);
-}
-
-#ifdef ENABLE_FP8
-template void invokeInPlaceTranspose0213(
-    __nv_fp8_e4m3* data, __nv_fp8_e4m3* workspace, const int dim0, const int dim1, const int dim2, const int dim3);
-#endif  // ENABLE_FP8
-#ifdef ENABLE_BF16
-template void invokeInPlaceTranspose0213(
-    __nv_bfloat16* data, __nv_bfloat16* workspace, const int dim0, const int dim1, const int dim2, const int dim3);
-#endif  // ENABLE_BF16
-template void invokeInPlaceTranspose0213(
-    float* data, float* workspace, const int dim0, const int dim1, const int dim2, const int dim3);
-
-template<typename T_OUT, typename T_IN>
-__global__ void transpose102(T_OUT* dst, T_IN* src, const int dim0, const int dim1, const int dim2)
-{
-    // src permutation: [0, 1, 2]
-    // dst permutation: [1, 0, 2]
-    for (size_t tid = threadIdx.x + blockIdx.x * blockDim.x; tid < dim0 * dim1 * dim2; tid += blockDim.x * gridDim.x) {
-        int       tmp_idx                                           = tid;
-        const int dim_2_idx                                         = tmp_idx % dim2;
-        tmp_idx                                                     = (tmp_idx - dim_2_idx) / dim2;
-        const int dim_1_idx                                         = tmp_idx % dim1;
-        tmp_idx                                                     = (tmp_idx - dim_1_idx) / dim1;
-        const int dim_0_idx                                         = tmp_idx % dim0;
-        dst[dim_1_idx * dim0 * dim2 + dim_0_idx * dim2 + dim_2_idx] = src[tid];
-    }
-}
-
-template<typename T>
-void invokeInPlaceTranspose102(T* data, T* workspace, const int dim0, const int dim1, const int dim2)
-{
-    // copy data to workspace, and then transpose from workspace to data
-    // Note that this kernel is used for pre-processing and not very efficient.
-    cudaD2Dcpy(workspace, data, dim0 * dim1 * dim2);
-    transpose102<<<256, 256>>>(data, workspace, dim0, dim1, dim2);
-}
-
-#ifdef ENABLE_FP8
-template void invokeInPlaceTranspose102(
-    __nv_fp8_e4m3* data, __nv_fp8_e4m3* workspace, const int dim0, const int dim1, const int dim2);
-#endif  // ENABLE_FP8
-#ifdef ENABLE_BF16
-template void invokeInPlaceTranspose102(
-    __nv_bfloat16* data, __nv_bfloat16* workspace, const int dim0, const int dim1, const int dim2);
-#endif  // ENABLE_BF16
-template void invokeInPlaceTranspose102(float* data, float* workspace, const int dim0, const int dim1, const int dim2);
-
-template<typename T>
-void __global__ multiplyScale(T* tensor, float scale, const size_t size)
-{
-    for (size_t index = threadIdx.x + blockIdx.x * blockDim.x; index < size; index += blockDim.x * gridDim.x) {
-        tensor[index] = (T)(((float)tensor[index]) * scale);
-    }
-}
-
-template<typename T>
-void invokeMultiplyScale(T* tensor, float scale, const size_t size, cudaStream_t stream)
-{
-    int block = 256;
-    int grid  = (size + 255) / 256;
-    multiplyScale<<<grid, block, 0, stream>>>(tensor, scale, size);
-}
-
-template void invokeMultiplyScale(float* tensor, float scale, const size_t size, cudaStream_t stream);
-template void invokeMultiplyScale(half* tensor, float scale, const size_t size, cudaStream_t stream);
-#ifdef ENABLE_BF16
-template void invokeMultiplyScale(__nv_bfloat16* tensor, float scale, const size_t size, cudaStream_t stream);
-#endif
-#ifdef ENABLE_FP8
-template void invokeMultiplyScale(__nv_fp8_e4m3* tensor, float scale, const size_t size, cudaStream_t stream);
-#endif
-
-template<typename T>
-void __global__ divideScale(T* tensor, float scale, const size_t size)
-{
-    for (size_t index = threadIdx.x + blockIdx.x * blockDim.x; index < size; index += blockDim.x * gridDim.x) {
-        tensor[index] = (T)(((float)tensor[index]) / scale);
-    }
-}
-
-template<typename T>
-void invokeDivideScale(T* tensor, float scale, const size_t size, cudaStream_t stream)
-{
-    int block = 256;
-    int grid  = (size + 255) / 256;
-    divideScale<<<grid, block, 0, stream>>>(tensor, scale, size);
-}
-
-template void invokeDivideScale(float* tensor, float scale, const size_t size, cudaStream_t stream);
-template void invokeDivideScale(half* tensor, float scale, const size_t size, cudaStream_t stream);
-#ifdef ENABLE_BF16
-template void invokeDivideScale(__nv_bfloat16* tensor, float scale, const size_t size, cudaStream_t stream);
-#endif
-#ifdef ENABLE_FP8
-template void invokeDivideScale(__nv_fp8_e4m3* tensor, float scale, const size_t size, cudaStream_t stream);
-#endif
-#ifdef ENABLE_BF16
-template void invokeFakeCast<float, __nv_bfloat16>(float* input_ptr, const size_t size, cudaStream_t stream);
-template void
-invokeFakeCast<__nv_bfloat16, __nv_bfloat16>(__nv_bfloat16* input_ptr, const size_t size, cudaStream_t stream);
-template void invokeFakeCast<half, __nv_bfloat16>(half* input_ptr, const size_t size, cudaStream_t stream);
-#endif
-template void invokeFakeCast<float, half>(float* input_ptr, const size_t size, cudaStream_t stream);
-template void invokeFakeCast<float, float>(float* input_ptr, const size_t size, cudaStream_t stream);
-#ifdef ENABLE_FP8
-template void invokeFakeCast<float, __nv_fp8_e4m3>(float* input_ptr, const size_t size, cudaStream_t stream);
-template void invokeFakeCast<half, __nv_fp8_e4m3>(half* input_ptr, const size_t size, cudaStream_t stream);
-template void
-invokeFakeCast<__nv_bfloat16, __nv_fp8_e4m3>(__nv_bfloat16* input_ptr, const size_t size, cudaStream_t stream);
-#endif
-
-size_t cuda_datatype_size(FtCudaDataType dt)
-{
-    static const std::unordered_map<FtCudaDataType, size_t> sizes{{FtCudaDataType::FP32, sizeof(float)},
-                                                                  {FtCudaDataType::FP16, sizeof(half)}
-#ifdef ENABLE_BF16
-                                                                  ,
-                                                                  {FtCudaDataType::BF16, sizeof(__nv_bfloat16)}
-#endif
-    };
-
-    return sizes.at(dt);
-}
-
-template<typename T>
-__global__ void check_range(T* buffer, size_t size, T min, T max, bool* d_within_range)
-{
-    for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < size; i += blockDim.x * gridDim.x) {
-        const T val = buffer[i];
-        if (val < min || val > max) {
-            *d_within_range = false;
-        }
-    }
-}
-
-template<typename T>
-bool invokeCheckRange(T* buffer, const size_t size, T min, T max, bool* d_within_range, cudaStream_t stream)
-{
-    cudaMemsetAsync(d_within_range, true, sizeof(bool), stream);
-
-    dim3 block(256);
-    dim3 grid((size + 255) / 256);
-    check_range<T><<<grid, block, 0, stream>>>(buffer, size, min, max, d_within_range);
-
-    bool result;
-    cudaD2Hcpy(&result, d_within_range, 1);
-    return result;
-}
-
-template bool
-invokeCheckRange<int>(int* buffer, const size_t size, int min, int max, bool* d_within_range, cudaStream_t stream);
-
-}  // namespace turbomind
diff --git a/src/turbomind/utils/memory_utils.h b/src/turbomind/utils/memory_utils.h
deleted file mode 100644
index e51c903905c0fe5bd06d75985483f52c16fc17c7..0000000000000000000000000000000000000000
--- a/src/turbomind/utils/memory_utils.h
+++ /dev/null
@@ -1,154 +0,0 @@
-/*
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "src/turbomind/utils/Tensor.h"
-#include "src/turbomind/utils/cuda_fp8_utils.h"
-#include "src/turbomind/utils/cuda_utils.h"
-
-namespace turbomind {
-
-template<typename T>
-void deviceMalloc(T** ptr, size_t size, bool is_random_initialize = true);
-
-template<typename T>
-void deviceMemSetZero(T* ptr, size_t size);
-
-template<typename T>
-void deviceFree(T*& ptr);
-
-template<typename T>
-void deviceFill(T* devptr, size_t size, T value, cudaStream_t stream = 0);
-
-template<typename T>
-void cudaD2Hcpy(T* tgt, const T* src, const size_t size);
-
-template<typename T>
-void cudaH2Dcpy(T* tgt, const T* src, const size_t size);
-
-template<typename T>
-void cudaD2Dcpy(T* tgt, const T* src, const size_t size);
-
-template<typename T>
-void cudaAutoCpy(T* tgt, const T* src, const size_t size, cudaStream_t stream = NULL);
-
-template<typename T>
-void cudaRandomUniform(T* buffer, const size_t size);
-
-struct ConcateSlice {
-    std::vector<std::pair<size_t, size_t>> slices;
-};
-
-template<typename T>
-int loadWeightFromBin(T*                        ptr,
-                      std::vector<size_t>       shape,
-                      std::string               filename,
-                      FtCudaDataType            model_file_type = FtCudaDataType::FP32,
-                      std::vector<ConcateSlice> slices          = std::vector<ConcateSlice>());
-
-std::vector<float> loadArrayFromBin(std::vector<size_t>       shape,
-                                    std::string               filename,
-                                    std::vector<ConcateSlice> slices = std::vector<ConcateSlice>());
-
-// template<typename T>
-// int loadWeightFromBinAndQuantizeForWeightOnly(int8_t*             quantized_weight_ptr,
-//                                               T*                  scale_ptr,
-//                                               std::vector<size_t> shape,
-//                                               std::string         filename,
-//                                               FtCudaDataType      model_file_type = FtCudaDataType::FP32);
-
-void invokeCudaD2DcpyHalf2Float(float* dst, half* src, const size_t size, cudaStream_t stream);
-void invokeCudaD2DcpyFloat2Half(half* dst, float* src, const size_t size, cudaStream_t stream);
-#ifdef ENABLE_FP8
-void invokeCudaD2Dcpyfp82Float(float* dst, __nv_fp8_e4m3* src, const size_t size, cudaStream_t stream);
-void invokeCudaD2Dcpyfp82Half(half* dst, __nv_fp8_e4m3* src, const size_t size, cudaStream_t stream);
-void invokeCudaD2DcpyFloat2fp8(__nv_fp8_e4m3* dst, float* src, const size_t size, cudaStream_t stream);
-void invokeCudaD2DcpyHalf2fp8(__nv_fp8_e4m3* dst, half* src, const size_t size, cudaStream_t stream);
-void invokeCudaD2DcpyBfloat2fp8(__nv_fp8_e4m3* dst, __nv_bfloat16* src, const size_t size, cudaStream_t stream);
-#endif  // ENABLE_FP8
-#ifdef ENABLE_BF16
-void invokeCudaD2DcpyBfloat2Float(float* dst, __nv_bfloat16* src, const size_t size, cudaStream_t stream);
-#endif  // ENABLE_BF16
-
-template<typename T_OUT, typename T_IN>
-void invokeCudaCast(T_OUT* dst, T_IN const* const src, const size_t size, cudaStream_t stream);
-
-template<typename T, size_t n_dims>
-__inline__ __host__ __device__ size_t dim2flat(const T (&idx)[n_dims], const T (&dims)[n_dims])
-{
-    size_t flat_idx = 0;
-    for (size_t i = 0; i < n_dims; i++) {
-        flat_idx += idx[i];
-        if (i + 1 < n_dims)
-            flat_idx *= dims[i + 1];
-    }
-    return flat_idx;
-}
-
-template<typename T1, size_t n_dims, typename T2>
-__inline__ __host__ __device__ void flat2dim(T1 flat_idx, const T2 (&dims)[n_dims], T2 (&idx)[n_dims])
-{
-    for (int i = n_dims - 1; i >= 0; i--) {
-        idx[i] = flat_idx % dims[i];
-        flat_idx /= dims[i];
-    }
-}
-
-template<typename T>
-void invokeInPlaceTranspose(T* data, T* workspace, const int dim0, const int dim1);
-
-template<typename T>
-void invokeInPlaceTranspose0213(T* data, T* workspace, const int dim0, const int dim1, const int dim2, const int dim3);
-
-template<typename T>
-void invokeInPlaceTranspose102(T* data, T* workspace, const int dim0, const int dim1, const int dim2);
-
-template<typename T>
-void invokeMultiplyScale(T* tensor, float scale, const size_t size, cudaStream_t stream);
-
-template<typename T>
-void invokeDivideScale(T* tensor, float scale, const size_t size, cudaStream_t stream);
-
-template<typename T_IN, typename T_OUT>
-void invokeCudaD2DcpyConvert(T_OUT* tgt, const T_IN* src, const size_t size, cudaStream_t stream = 0);
-
-template<typename T_IN, typename T_OUT>
-void invokeCudaD2DScaleCpyConvert(
-    T_OUT* tgt, const T_IN* src, const float* scale, bool invert_scale, const size_t size, cudaStream_t stream = 0);
-
-inline bool checkIfFileExist(const std::string& file_path)
-{
-    std::ifstream in(file_path, std::ios::in | std::ios::binary);
-    if (in.is_open()) {
-        in.close();
-        return true;
-    }
-    return false;
-}
-
-template<typename T>
-void saveToBinary(const T* ptr, const size_t size, std::string filename);
-
-template<typename T_IN, typename T_fake_type>
-void invokeFakeCast(T_IN* input_ptr, const size_t size, cudaStream_t stream);
-
-size_t cuda_datatype_size(FtCudaDataType dt);
-
-template<typename T>
-bool invokeCheckRange(T* buffer, const size_t size, T min, T max, bool* d_within_range, cudaStream_t stream);
-
-}  // namespace turbomind
diff --git a/src/turbomind/utils/mpi_utils.cc b/src/turbomind/utils/mpi_utils.cc
deleted file mode 100644
index 737e428d04a05fdb1d84f11949d6dfd93480b7c9..0000000000000000000000000000000000000000
--- a/src/turbomind/utils/mpi_utils.cc
+++ /dev/null
@@ -1,122 +0,0 @@
-/*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "src/turbomind/utils/mpi_utils.h"
-
-namespace turbomind {
-namespace mpi {
-
-#ifdef BUILD_MULTI_GPU
-MPI_Datatype getMpiDtype(MpiType dtype)
-{
-    static const std::unordered_map<MpiType, MPI_Datatype> dtype_map{
-        {MPI_TYPE_BYTE, MPI_BYTE},
-        {MPI_TYPE_CHAR, MPI_CHAR},
-        {MPI_TYPE_INT, MPI_INT},
-        {MPI_TYPE_INT64_T, MPI_INT64_T},
-        {MPI_TYPE_UINT32_T, MPI_UINT32_T},
-        {MPI_TYPE_UNSIGNED_LONG_LONG, MPI_UNSIGNED_LONG_LONG},
-    };
-    return dtype_map.at(dtype);
-}
-#endif
-
-void initialize(int* argc, char*** argv)
-{
-#ifdef BUILD_MULTI_GPU
-    MPICHECK(MPI_Init(argc, argv));
-#endif
-}
-
-void finalize()
-{
-#ifdef BUILD_MULTI_GPU
-    MPICHECK(MPI_Finalize());
-#endif
-}
-
-bool isInitialized()
-{
-    int mpi_initialized = 0;
-#ifdef BUILD_MULTI_GPU
-    MPICHECK(MPI_Initialized(&mpi_initialized));
-#endif
-    return static_cast<bool>(mpi_initialized);
-}
-
-void initThread(int* argc, char*** argv, MpiThreadSupport required, int* provided)
-{
-#ifdef BUILD_MULTI_GPU
-    switch (required) {
-        case THREAD_SINGLE:
-            MPICHECK(MPI_Init_thread(argc, argv, MPI_THREAD_SINGLE, provided));
-            break;
-        case THREAD_FUNNELED:
-            MPICHECK(MPI_Init_thread(argc, argv, MPI_THREAD_FUNNELED, provided));
-            break;
-        case THREAD_SERIALIZED:
-            MPICHECK(MPI_Init_thread(argc, argv, MPI_THREAD_SERIALIZED, provided));
-            break;
-        case THREAD_MULTIPLE:
-            MPICHECK(MPI_Init_thread(argc, argv, MPI_THREAD_MULTIPLE, provided));
-            break;
-        default:
-            break;
-    }
-#endif
-}
-
-int getCommWorldRank()
-{
-    int rank = 0;
-#ifdef BUILD_MULTI_GPU
-    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-#endif
-    return rank;
-}
-
-int getCommWorldSize()
-{
-    int world_size = 1;
-#ifdef BUILD_MULTI_GPU
-    MPI_Comm_size(MPI_COMM_WORLD, &world_size);
-#endif
-    return world_size;
-}
-
-void barrier(MpiComm comm)
-{
-#ifdef BUILD_MULTI_GPU
-    MPICHECK(MPI_Barrier(comm.group));
-#endif
-}
-
-void barrier()
-{
-#ifdef BUILD_MULTI_GPU
-    MPICHECK(MPI_Barrier(MPI_COMM_WORLD));
-#endif
-}
-
-void bcast(void* buffer, size_t size, MpiType dtype, int root, MpiComm comm)
-{
-#ifdef BUILD_MULTI_GPU
-    MPICHECK(MPI_Bcast(buffer, size, getMpiDtype(dtype), root, comm.group));
-#endif
-}
-
-}  // namespace mpi
-}  // namespace turbomind
diff --git a/src/turbomind/utils/mpi_utils.h b/src/turbomind/utils/mpi_utils.h
deleted file mode 100644
index 0eef1f2cc1cfefb9bdf58d1d6869d9efce760808..0000000000000000000000000000000000000000
--- a/src/turbomind/utils/mpi_utils.h
+++ /dev/null
@@ -1,96 +0,0 @@
-/*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "src/turbomind/utils/logger.h"
-
-#ifdef BUILD_MULTI_GPU
-#include <mpi.h>
-#endif
-#include <stdio.h>
-#include <unordered_map>
-
-namespace turbomind {
-
-#ifdef BUILD_MULTI_GPU
-#define MPICHECK(cmd)                                                                                                  \
-    do {                                                                                                               \
-        int e = cmd;                                                                                                   \
-        if (e != MPI_SUCCESS) {                                                                                        \
-            printf("Failed: MPI error %s:%d '%d'\n", __FILE__, __LINE__, e);                                           \
-            exit(EXIT_FAILURE);                                                                                        \
-        }                                                                                                              \
-    } while (0)
-#else
-#define MPICHECK(cmd) printf("[WARNING] No MPI\n");
-#endif
-
-// A wrapper module of the MPI library.
-namespace mpi {
-
-// A wrapper of MPI data type. MPI_TYPE_{data_type}
-enum MpiType
-{
-    MPI_TYPE_BYTE,
-    MPI_TYPE_CHAR,
-    MPI_TYPE_INT,
-    MPI_TYPE_INT64_T,
-    MPI_TYPE_UINT32_T,
-    MPI_TYPE_UNSIGNED_LONG_LONG,
-};
-
-// A wrapper of the level of MPI thread support
-enum MpiThreadSupport
-{
-    THREAD_SINGLE,
-    THREAD_FUNNELED,
-    THREAD_SERIALIZED,
-    THREAD_MULTIPLE
-};
-
-struct MpiComm {
-#ifdef BUILD_MULTI_GPU
-    MPI_Comm group;
-    MpiComm(){};
-    MpiComm(MPI_Comm g): group(g){};
-#endif
-};
-
-#ifdef BUILD_MULTI_GPU
-#define COMM_WORLD MpiComm(MPI_COMM_WORLD)
-#else
-#define COMM_WORLD MpiComm()
-#endif
-
-#ifdef BUILD_MULTI_GPU
-MPI_Datatype getMpiDtype(MpiType dtype);
-#endif
-
-void initialize(int* argc, char*** argv);
-void initThread(int* argc, char*** argv, MpiThreadSupport required, int* provided);
-void finalize();
-bool isInitialized();
-void barrier(MpiComm comm);
-void barrier();
-
-int getCommWorldRank();
-int getCommWorldSize();
-
-void bcast(void* buffer, size_t size, MpiType dtype, int root, MpiComm comm);
-
-}  // namespace mpi
-}  // namespace turbomind
diff --git a/src/turbomind/utils/nccl_utils.cc b/src/turbomind/utils/nccl_utils.cc
deleted file mode 100644
index bd669ac22708fcf99b1fa9ca6fbe71bac136ec30..0000000000000000000000000000000000000000
--- a/src/turbomind/utils/nccl_utils.cc
+++ /dev/null
@@ -1,341 +0,0 @@
-/*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "src/turbomind/utils/nccl_utils.h"
-#include "src/turbomind/macro.h"
-#include <atomic>
-
-namespace turbomind {
-
-#ifdef BUILD_MULTI_GPU
-template<typename T>
-ncclDataType_t getNcclDataType()
-{
-    TM_LOG_DEBUG("%s start", __PRETTY_FUNCTION__);
-    ncclDataType_t nccl_data_type;
-    if (std::is_same<T, float>::value) {
-        nccl_data_type = ncclFloat;
-    }
-    else if (std::is_same<T, half>::value) {
-        nccl_data_type = ncclHalf;
-    }
-#if defined(ENABLE_BF16) && defined(ENABLE_BF16_NCCL)
-    else if (std::is_same<T, __nv_bfloat16>::value) {
-        nccl_data_type = ncclBfloat16;
-    }
-#endif
-    else if (std::is_same<T, int>::value) {
-        nccl_data_type = ncclInt;
-    }
-    else if (std::is_same<T, char>::value) {
-        nccl_data_type = ncclChar;
-    }
-    else if (std::is_same<T, bool>::value) {
-        nccl_data_type = ncclInt8;
-    }
-    else {
-        printf("[ERROR] NCCL only support float, half, bfloat16, int, char, and bool. \n");
-        exit(-1);
-    }
-    return nccl_data_type;
-}
-#endif
-
-template<typename T>
-void ftNcclAllReduceSum(const T* send_buf, T* recv_buf, const int data_size, NcclParam nccl_param, cudaStream_t stream)
-{
-    TM_LOG_DEBUG("%s start", __PRETTY_FUNCTION__);
-#ifdef BUILD_MULTI_GPU
-    ncclDataType_t nccl_data_type = getNcclDataType<T>();
-    NCCLCHECK(ncclGroupStart());
-    NCCLCHECK(ncclAllReduce(
-        (const void*)send_buf, (void*)recv_buf, data_size, nccl_data_type, ncclSum, nccl_param.nccl_comm_, stream));
-    NCCLCHECK(ncclGroupEnd());
-#endif
-    TM_LOG_DEBUG("%s stop", __PRETTY_FUNCTION__);
-}
-
-template<typename T>
-void ftNcclAllGather(
-    const T* send_buf, T* recv_buf, const int data_size, const int rank, NcclParam nccl_param, cudaStream_t stream)
-{
-    TM_LOG_DEBUG("%s start", __PRETTY_FUNCTION__);
-#ifdef BUILD_MULTI_GPU
-    ncclDataType_t nccl_data_type = getNcclDataType<T>();
-    NCCLCHECK(ncclGroupStart());
-    NCCLCHECK(
-        ncclAllGather(send_buf + rank * data_size, recv_buf, data_size, nccl_data_type, nccl_param.nccl_comm_, stream));
-    NCCLCHECK(ncclGroupEnd());
-#endif
-    TM_LOG_DEBUG("%s stop", __PRETTY_FUNCTION__);
-}
-
-template<typename T>
-void ftNcclSend(const T* send_buf, const int data_size, const int peer, NcclParam nccl_param, cudaStream_t stream)
-{
-    TM_LOG_DEBUG("%s start", __PRETTY_FUNCTION__);
-#ifdef BUILD_MULTI_GPU
-    ncclDataType_t nccl_data_type = getNcclDataType<T>();
-    NCCLCHECK(ncclSend(send_buf, data_size, nccl_data_type, peer, nccl_param.nccl_comm_, stream));
-#endif
-    TM_LOG_DEBUG("%s stop", __PRETTY_FUNCTION__);
-}
-
-template void
-ftNcclSend(const float* send_buf, const int data_size, const int peer, NcclParam nccl_param, cudaStream_t stream);
-template void
-ftNcclSend(const half* send_buf, const int data_size, const int peer, NcclParam nccl_param, cudaStream_t stream);
-#ifdef ENABLE_BF16
-template void ftNcclSend(
-    const __nv_bfloat16* send_buf, const int data_size, const int peer, NcclParam nccl_param, cudaStream_t stream);
-#endif
-template void
-ftNcclSend(const int* send_buf, const int data_size, const int peer, NcclParam nccl_param, cudaStream_t stream);
-template void
-ftNcclSend(const bool* send_buf, const int data_size, const int peer, NcclParam nccl_param, cudaStream_t stream);
-template void
-ftNcclSend(const char* send_buf, const int data_size, const int peer, NcclParam nccl_param, cudaStream_t stream);
-
-template<typename T>
-void ftNcclRecv(T* recv_buf, const int data_size, const int peer, NcclParam nccl_param, cudaStream_t stream)
-{
-    TM_LOG_DEBUG("%s start", __PRETTY_FUNCTION__);
-#ifdef BUILD_MULTI_GPU
-    ncclDataType_t nccl_data_type = getNcclDataType<T>();
-    NCCLCHECK(ncclRecv(recv_buf, data_size, nccl_data_type, peer, nccl_param.nccl_comm_, stream));
-#endif
-    TM_LOG_DEBUG("%s stop", __PRETTY_FUNCTION__);
-}
-
-template void
-ftNcclRecv(float* recv_buf, const int data_size, const int peer, NcclParam nccl_param, cudaStream_t stream);
-template void
-ftNcclRecv(half* recv_buf, const int data_size, const int peer, NcclParam nccl_param, cudaStream_t stream);
-#ifdef ENABLE_BF16
-template void
-ftNcclRecv(__nv_bfloat16* recv_buf, const int data_size, const int peer, NcclParam nccl_param, cudaStream_t stream);
-#endif
-template void ftNcclRecv(int* recv_buf, const int data_size, const int peer, NcclParam nccl_param, cudaStream_t stream);
-template void
-ftNcclRecv(bool* recv_buf, const int data_size, const int peer, NcclParam nccl_param, cudaStream_t stream);
-template void
-ftNcclRecv(char* recv_buf, const int data_size, const int peer, NcclParam nccl_param, cudaStream_t stream);
-
-template<typename T>
-void ftNcclBroadCast(T* buff, const int data_size, const int root, NcclParam nccl_param, cudaStream_t stream)
-{
-    TM_LOG_DEBUG("%s start", __PRETTY_FUNCTION__);
-#ifdef BUILD_MULTI_GPU
-    ncclDataType_t nccl_data_type = getNcclDataType<T>();
-    NCCLCHECK(ncclBcast(buff, data_size, nccl_data_type, root, nccl_param.nccl_comm_, stream));
-#endif
-    TM_LOG_DEBUG("%s stop", __PRETTY_FUNCTION__);
-}
-
-template void
-ftNcclBroadCast(char* buff, const int data_size, const int root, NcclParam nccl_param, cudaStream_t stream);
-template void
-ftNcclBroadCast(bool* buff, const int data_size, const int root, NcclParam nccl_param, cudaStream_t stream);
-template void
-ftNcclBroadCast(int* buff, const int data_size, const int root, NcclParam nccl_param, cudaStream_t stream);
-template void
-ftNcclBroadCast(float* buff, const int data_size, const int root, NcclParam nccl_param, cudaStream_t stream);
-template void
-ftNcclBroadCast(half* buff, const int data_size, const int root, NcclParam nccl_param, cudaStream_t stream);
-#ifdef ENABLE_BF16
-template void
-ftNcclBroadCast(__nv_bfloat16* buff, const int data_size, const int root, NcclParam nccl_param, cudaStream_t stream);
-#endif
-
-template void ftNcclAllReduceSum(
-    const float* send_buf, float* recv_buf, const int data_size, NcclParam nccl_param, cudaStream_t stream);
-
-template void ftNcclAllReduceSum(
-    const half* send_buf, half* recv_buf, const int data_size, NcclParam nccl_param, cudaStream_t stream);
-
-template void ftNcclAllReduceSum(
-    const int32_t* send_buf, int32_t* recv_buf, const int data_size, NcclParam nccl_param, cudaStream_t stream);
-
-#ifdef ENABLE_BF16
-template void ftNcclAllReduceSum(const __nv_bfloat16* send_buf,
-                                 __nv_bfloat16*       recv_buf,
-                                 const int            data_size,
-                                 NcclParam            nccl_param,
-                                 cudaStream_t         stream);
-#endif
-
-template void ftNcclAllGather(const float* send_buf,
-                              float*       recv_buf,
-                              const int    data_size,
-                              const int    rank,
-                              NcclParam    nccl_param,
-                              cudaStream_t stream);
-
-template void ftNcclAllGather(const half*  send_buf,
-                              half*        recv_buf,
-                              const int    data_size,
-                              const int    rank,
-                              NcclParam    nccl_param,
-                              cudaStream_t stream);
-
-#ifdef ENABLE_BF16
-template void ftNcclAllGather(const __nv_bfloat16* send_buf,
-                              __nv_bfloat16*       recv_buf,
-                              const int            data_size,
-                              const int            rank,
-                              NcclParam            nccl_param,
-                              cudaStream_t         stream);
-#endif
-
-void ftNcclGroupStart()
-{
-#ifdef BUILD_MULTI_GPU
-    NCCLCHECK(ncclGroupStart());
-#endif
-}
-
-void ftNcclGroupEnd()
-{
-#ifdef BUILD_MULTI_GPU
-    NCCLCHECK(ncclGroupEnd());
-#endif
-}
-
-void ftNcclStreamSynchronize(NcclParam tensor_para, NcclParam pipeline_para, cudaStream_t stream)
-{
-    TM_LOG_DEBUG("%s start", __PRETTY_FUNCTION__);
-#ifdef BUILD_MULTI_GPU
-    cudaError_t  cudaErr;
-    ncclResult_t tensor_ncclErr = ncclSuccess, tensor_ncclAsyncErr = ncclSuccess, pipeline_ncclErr = ncclSuccess,
-                 pipeline_ncclAsyncErr = ncclSuccess;
-    ncclComm_t tensor_comm             = tensor_para.nccl_comm_;
-    ncclComm_t pipeline_comm           = pipeline_para.nccl_comm_;
-    if (tensor_para.world_size_ == 1 && pipeline_para.world_size_ == 1) {
-        check_cuda_error(cudaStreamSynchronize(stream));
-        return;
-    }
-    while (1) {
-        cudaErr = cudaStreamQuery(stream);
-        if (cudaErr == cudaSuccess) {
-            TM_LOG_DEBUG("%s stop", __PRETTY_FUNCTION__);
-            return;
-        }
-
-        if (cudaErr != cudaErrorNotReady) {
-            std::string error_msg = "CUDA Error : cudaStreamQuery returned " + std::to_string(cudaErr);
-            throw std::runtime_error(error_msg);
-        }
-        if (tensor_para.world_size_ > 1) {
-            tensor_ncclErr = ncclCommGetAsyncError(tensor_comm, &tensor_ncclAsyncErr);
-        }
-        if (pipeline_para.world_size_ > 1) {
-            pipeline_ncclErr = ncclCommGetAsyncError(pipeline_comm, &pipeline_ncclAsyncErr);
-        }
-
-        if (tensor_ncclErr != ncclSuccess || pipeline_ncclErr != ncclSuccess) {
-            std::string error_msg = "NCCL Error : ncclCommGetAsyncError returned " + std::to_string(tensor_ncclErr)
-                                    + " (tensor_para) " + std::to_string(pipeline_ncclErr) + " (pipeline_para)";
-            throw std::runtime_error(error_msg);
-        }
-
-        if (tensor_ncclAsyncErr != ncclSuccess) {
-            // An asynchronous error happened. Stop the operation and destroy
-            // the communicator
-            tensor_ncclErr = ncclCommAbort(tensor_comm);
-            if (tensor_ncclErr != ncclSuccess) {
-                std::string error_msg = "NCCL Error : ncclCommDestroy returned " + std::to_string(tensor_ncclErr);
-                throw std::runtime_error(error_msg);
-            }
-        }
-
-        if (pipeline_ncclAsyncErr != ncclSuccess) {
-            // An asynchronous error happened. Stop the operation and destroy
-            // the communicator
-            pipeline_ncclErr = ncclCommAbort(pipeline_comm);
-            if (pipeline_ncclErr != ncclSuccess) {
-                std::string error_msg = "NCCL Error : ncclCommDestroy returned " + std::to_string(pipeline_ncclErr);
-                throw std::runtime_error(error_msg);
-            }
-        }
-    }
-#endif
-}
-
-void ftNcclGetUniqueId(NcclUid& uid)
-{
-#ifdef BUILD_MULTI_GPU
-    NCCLCHECK(ncclGetUniqueId(&uid.nccl_uid_));
-#endif
-}
-
-void ftNcclCommInitRank(NcclParam& param, const int rank, const int world_size, const NcclUid uid)
-{
-    TM_LOG_DEBUG("%s start", __PRETTY_FUNCTION__);
-#ifdef BUILD_MULTI_GPU
-    // Initialize a nccl communicator.
-    if (param.nccl_comm_ != nullptr) {
-        TM_LOG_WARNING("NcclParam is already initialized.");
-        return;
-    }
-    param.rank_       = rank;
-    param.world_size_ = world_size;
-    param.nccl_uid_   = uid.nccl_uid_;
-    NCCLCHECK(ncclCommInitRank(&param.nccl_comm_, param.world_size_, param.nccl_uid_, param.rank_));
-#endif
-    TM_LOG_DEBUG("%s stop", __PRETTY_FUNCTION__);
-}
-
-void ftNcclParamDestroy(NcclParam& param)
-{
-#ifdef BUILD_MULTI_GPU
-    if (param.nccl_comm_ != nullptr) {
-        ncclCommDestroy(param.nccl_comm_);
-    }
-#endif
-}
-
-static std::atomic<int>& ncclGroupCount()
-{
-    static std::atomic<int> value{};
-    return value;
-}
-
-int ftNcclNextGroupId()
-{
-    return ncclGroupCount()++;
-}
-
-int ftNcclGroupCount()
-{
-    return ncclGroupCount();
-}
-
-size_t getLocalBatchSize(const size_t batch_size, const size_t seq_len, const size_t pipeline_para_size)
-{
-    size_t local_batch_size = batch_size;
-    if (pipeline_para_size == 1) {
-        return local_batch_size;
-    }
-    if (local_batch_size % pipeline_para_size == 0) {
-        local_batch_size /= pipeline_para_size;
-    }
-    while (local_batch_size * seq_len > 1024 && local_batch_size % 2 == 0) {
-        local_batch_size /= 2;
-    }
-    return local_batch_size;
-}
-
-}  // namespace turbomind
diff --git a/src/turbomind/utils/nccl_utils.h b/src/turbomind/utils/nccl_utils.h
deleted file mode 100644
index 9827297c5ab868ea6396bdb7fa969d761ea77c06..0000000000000000000000000000000000000000
--- a/src/turbomind/utils/nccl_utils.h
+++ /dev/null
@@ -1,124 +0,0 @@
-/*
- * Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "src/turbomind/utils/cuda_utils.h"
-#include "src/turbomind/utils/logger.h"
-
-#include <cuda_runtime.h>
-#ifdef BUILD_MULTI_GPU
-#include <nccl.h>
-#endif
-#include <stdio.h>
-#include <string>
-
-#if defined(NCCL_VERSION_CODE) && (NCCL_VERSION_CODE >= 21003)
-#define ENABLE_BF16_NCCL
-#endif
-
-namespace turbomind {
-#ifdef BUILD_MULTI_GPU
-#define NCCLCHECK(cmd)                                                                                                 \
-    do {                                                                                                               \
-        ncclResult_t r = cmd;                                                                                          \
-        if (r != ncclSuccess) {                                                                                        \
-            printf("Failed, NCCL error %s:%d '%s'\n", __FILE__, __LINE__, ncclGetErrorString(r));                      \
-            __builtin_trap();                                                                                          \
-            exit(EXIT_FAILURE);                                                                                        \
-        }                                                                                                              \
-    } while (0)
-#else
-#define NCCLCHECK(cmd) printf("[WARNING} No NCCL");
-#endif
-
-struct NcclUid {
-#ifndef BUILD_MULTI_GPU
-    NcclUid(){};
-    NcclUid(NcclUid const& uid){};
-#else
-    ncclUniqueId nccl_uid_;
-    NcclUid(){};
-    NcclUid(NcclUid const& uid): nccl_uid_(uid.nccl_uid_){};
-#endif
-};
-
-struct NcclParam {
-    int rank_{0};
-    int world_size_{1};
-    int group_id_{0};
-#ifdef BUILD_MULTI_GPU
-    ncclUniqueId nccl_uid_{};
-    ncclComm_t   nccl_comm_ = nullptr;
-#endif
-
-#ifdef BUILD_MULTI_GPU
-    NcclParam(): rank_(0), world_size_(1), nccl_comm_(nullptr){};
-    NcclParam(int rank, int world_size): rank_(rank), world_size_(world_size){};
-    NcclParam(NcclParam const& param):
-        rank_(param.rank_),
-        world_size_(param.world_size_),
-        group_id_(param.group_id_),
-        nccl_uid_(param.nccl_uid_),
-        nccl_comm_(param.nccl_comm_){};
-    std::string toString()
-    {
-        return fmtstr(
-            "NcclParam[rank=%d, world_size=%d, nccl_comm=%p, group_id=%d]", rank_, world_size_, nccl_comm_, group_id_);
-    }
-#else
-    NcclParam(): rank_(0), world_size_(1){};
-    NcclParam(int rank, int world_size): rank_(rank), world_size_(world_size){};
-    NcclParam(NcclParam const& param): rank_(param.rank_), world_size_(param.world_size_){};
-    std::string toString()
-    {
-        return fmtstr("NcclParam[rank=%d, world_size=%d]", rank_, world_size_);
-    }
-#endif
-};
-
-// New APIs
-template<typename T>
-void ftNcclAllReduceSum(const T* send_buf, T* recv_buf, const int data_size, NcclParam nccl_param, cudaStream_t stream);
-
-template<typename T>
-void ftNcclAllGather(
-    const T* send_buf, T* recv_buf, const int data_size, const int rank, NcclParam nccl_param, cudaStream_t stream);
-
-template<typename T>
-void ftNcclBroadCast(T* buff, const int data_size, const int root, NcclParam nccl_param, cudaStream_t stream);
-
-template<typename T>
-void ftNcclRecv(T* recv_buf, const int data_size, const int peer, NcclParam nccl_param, cudaStream_t stream);
-
-template<typename T>
-void ftNcclSend(const T* send_buf, const int data_size, const int peer, NcclParam nccl_param, cudaStream_t stream);
-
-// nccl stream synchronize, abort nccl comms and throw errors when nccl async errors detected
-void ftNcclStreamSynchronize(NcclParam tensor_para, NcclParam pipeline_para_, cudaStream_t stream);
-
-void ftNcclGroupStart();
-void ftNcclGroupEnd();
-void ftNcclGetUniqueId(NcclUid& uid);
-void ftNcclCommInitRank(NcclParam& param, const int rank, const int world_size, const NcclUid uid);
-void ftNcclParamDestroy(NcclParam& param);
-
-int ftNcclNextGroupId();
-int ftNcclGroupCount();
-
-size_t getLocalBatchSize(const size_t batch_size, const size_t seq_len, const size_t pipeline_para_size);
-
-}  // namespace turbomind
diff --git a/src/turbomind/utils/nvtx_utils.cc b/src/turbomind/utils/nvtx_utils.cc
deleted file mode 100644
index 64d3d49fc1868c56c0c1ee0b78c43acb5c1545a1..0000000000000000000000000000000000000000
--- a/src/turbomind/utils/nvtx_utils.cc
+++ /dev/null
@@ -1,89 +0,0 @@
-/*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <iostream>
-
-#include "nvtx_utils.h"
-#ifdef USE_NVTX
-#include "nvtx3/nvToolsExt.h"
-#endif
-
-namespace ft_nvtx {
-std::string getScope()
-{
-    return scope;
-}
-void addScope(std::string name)
-{
-    scope = scope + name + "/";
-    return;
-}
-void setScope(std::string name)
-{
-    scope = name + "/";
-    return;
-}
-void resetScope()
-{
-    scope = "";
-    return;
-}
-void setDeviceDomain(int deviceId)
-{
-    domain = deviceId;
-    return;
-}
-void resetDeviceDomain()
-{
-    domain = 0;
-    return;
-}
-int getDeviceDomain()
-{
-    return domain;
-}
-
-bool isEnableNvtx()
-{
-    if (!has_read_nvtx_env) {
-        static char* ft_nvtx_env_char = std::getenv("FT_NVTX");
-        is_enable_ft_nvtx = (ft_nvtx_env_char != nullptr && std::string(ft_nvtx_env_char) == "ON") ? true : false;
-        has_read_nvtx_env = true;
-    }
-    return is_enable_ft_nvtx;
-}
-
-void ftNvtxRangePush(std::string name)
-{
-#ifdef USE_NVTX
-    nvtxStringHandle_t    nameId      = nvtxDomainRegisterStringA(NULL, (getScope() + name).c_str());
-    nvtxEventAttributes_t eventAttrib = {0};
-    eventAttrib.messageType           = NVTX_MESSAGE_TYPE_REGISTERED;
-    eventAttrib.message.registered    = nameId;
-    eventAttrib.payloadType           = NVTX_PAYLOAD_TYPE_INT32;
-    eventAttrib.payload.iValue        = getDeviceDomain();
-    nvtxRangePushEx(&eventAttrib);
-#endif
-}
-
-void ftNvtxRangePop()
-{
-#ifdef USE_NVTX
-    nvtxRangePop();
-#endif
-}
-
-}  // namespace ft_nvtx
diff --git a/src/turbomind/utils/nvtx_utils.h b/src/turbomind/utils/nvtx_utils.h
deleted file mode 100644
index e000c2157b957551b496671da851104198809564..0000000000000000000000000000000000000000
--- a/src/turbomind/utils/nvtx_utils.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-namespace ft_nvtx {
-static std::string scope;
-std::string        getScope();
-void               addScope(std::string name);
-void               setScope(std::string name);
-void               resetScope();
-static int         domain = 0;
-void               setDeviceDomain(int deviceId);
-int                getDeviceDomain();
-void               resetDeviceDomain();
-bool               isEnableNvtx();
-
-static bool has_read_nvtx_env = false;
-static bool is_enable_ft_nvtx = false;
-void        ftNvtxRangePush(std::string name);
-void        ftNvtxRangePop();
-}  // namespace ft_nvtx
-
-#define PUSH_RANGE(name)                                                                                               \
-    {                                                                                                                  \
-        if (ft_nvtx::isEnableNvtx()) {                                                                                 \
-            ft_nvtx::ftNvtxRangePush(name);                                                                            \
-        }                                                                                                              \
-    }
-
-#define POP_RANGE                                                                                                      \
-    {                                                                                                                  \
-        if (ft_nvtx::isEnableNvtx()) {                                                                                 \
-            ft_nvtx::ftNvtxRangePop();                                                                                 \
-        }                                                                                                              \
-    }
diff --git a/src/turbomind/utils/prompt_learning.h b/src/turbomind/utils/prompt_learning.h
deleted file mode 100644
index 87c87dd63c0308107179265667d37f52479d0a36..0000000000000000000000000000000000000000
--- a/src/turbomind/utils/prompt_learning.h
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * Copyright (c) 2020-2023, NVIDIA CORPORATION.  All rights reserved.
- * Copyright (c) 2021, NAVER Corp.  Authored by CLOVA.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-namespace turbomind {
-
-enum class PromptLearningType
-{
-    no_prompt,
-    soft_prompt,
-    prefix_prompt,
-    p_prompt_tuning
-};
-
-}  // namespace turbomind
diff --git a/src/turbomind/utils/string_utils.h b/src/turbomind/utils/string_utils.h
deleted file mode 100644
index 73267473d886207c83f0ee14d21da1e677186c21..0000000000000000000000000000000000000000
--- a/src/turbomind/utils/string_utils.h
+++ /dev/null
@@ -1,84 +0,0 @@
-/*
- * Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include <memory>   // std::make_unique
-#include <sstream>  // std::stringstream
-#include <string>
-#include <vector>
-
-namespace turbomind {
-
-template<typename... Args>
-inline std::string fmtstr(const std::string& format, Args... args)
-{
-    // This function came from a code snippet in stackoverflow under cc-by-1.0
-    //   https://stackoverflow.com/questions/2342162/stdstring-formatting-like-sprintf
-
-    // Disable format-security warning in this function.
-#if defined(_MSC_VER)  // for visual studio
-#pragma warning(push)
-#pragma warning(warning(disable : 4996))
-#elif defined(__GNUC__) || defined(__clang__)  // for gcc or clang
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wformat-security"
-#endif
-    int size_s = std::snprintf(nullptr, 0, format.c_str(), args...) + 1;  // Extra space for '\0'
-    if (size_s <= 0) {
-        throw std::runtime_error("Error during formatting.");
-    }
-    auto size = static_cast<size_t>(size_s);
-    auto buf  = std::make_unique<char[]>(size);
-    std::snprintf(buf.get(), size, format.c_str(), args...);
-#if defined(_MSC_VER)
-#pragma warning(pop)
-#elif defined(__GNUC__) || defined(__clang__)
-#pragma GCC diagnostic pop
-#endif
-    return std::string(buf.get(), buf.get() + size - 1);  // We don't want the '\0' inside
-}
-
-template<typename T>
-inline std::string vec2str(std::vector<T> vec)
-{
-    std::stringstream ss;
-    ss << "(";
-    if (!vec.empty()) {
-        for (size_t i = 0; i < vec.size() - 1; ++i) {
-            ss << vec[i] << ", ";
-        }
-        ss << vec.back();
-    }
-    ss << ")";
-    return ss.str();
-}
-
-template<typename T>
-inline std::string arr2str(T* arr, size_t size)
-{
-    std::stringstream ss;
-    ss << "(";
-    for (size_t i = 0; i < size - 1; ++i) {
-        ss << arr[i] << ", ";
-    }
-    if (size > 0) {
-        ss << arr[size - 1];
-    }
-    ss << ")";
-    return ss.str();
-}
-}  // namespace turbomind
diff --git a/src/turbomind/utils/test_utils.h b/src/turbomind/utils/test_utils.h
deleted file mode 100644
index 1cc36f5d9a47bf7100976bdfd9adbb173363c212..0000000000000000000000000000000000000000
--- a/src/turbomind/utils/test_utils.h
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Copyright (c) 2022 NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include <cmath>
-#include <cuda.h>
-#include <cuda_runtime_api.h>
-
-namespace turbomind {
-
-#define TIMEIT(print, n, stream, fn, ...)                                                                              \
-    ({                                                                                                                 \
-        cudaEvent_t _macro_event_start, _macro_event_stop;                                                             \
-        cudaEventCreate(&_macro_event_start);                                                                          \
-        cudaEventCreate(&_macro_event_stop);                                                                           \
-        cudaEventRecord(_macro_event_start, stream);                                                                   \
-        for (int i = 0; i < n; i++) {                                                                                  \
-            fn(__VA_ARGS__);                                                                                           \
-        }                                                                                                              \
-        cudaEventRecord(_macro_event_stop, stream);                                                                    \
-        cudaStreamSynchronize(stream);                                                                                 \
-        float ms = 0.0f;                                                                                               \
-        cudaEventElapsedTime(&ms, _macro_event_start, _macro_event_stop);                                              \
-        ms /= n;                                                                                                       \
-        if (print)                                                                                                     \
-            printf("[TIMEIT] " #fn ": %.2fµs\n", ms * 1000);                                                           \
-        ms;                                                                                                            \
-    })
-
-template<typename T>
-struct rel_abs_diff {
-    T operator()(const T& lhs, const T& rhs) const
-    {
-        return lhs == 0 ? 0 : static_cast<T>(fabs(lhs - rhs) / fabs(lhs));
-    }
-};
-
-template<typename T>
-struct abs_diff {
-    T operator()(const T& lhs, const T& rhs) const
-    {
-        return static_cast<T>(fabs(lhs - rhs));
-    }
-};
-
-}  // namespace turbomind
diff --git a/src/turbomind/utils/wenet_conv2d.h b/src/turbomind/utils/wenet_conv2d.h
deleted file mode 100644
index 399b4cb4c14a6ae807962e3deed7a09f3f0a723c..0000000000000000000000000000000000000000
--- a/src/turbomind/utils/wenet_conv2d.h
+++ /dev/null
@@ -1,198 +0,0 @@
-/*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "cublasLt.h"
-#include "cuda_utils.h"
-#include "math.h"
-#include "stdio.h"
-#include "stdlib.h"
-#include <cublas_v2.h>
-#include <cuda_fp16.h>
-#include <cudnn.h>
-
-namespace turbomind {
-
-template<typename T>
-void conv2d(T*             output,
-            T*             input,
-            void*          ws_data,
-            const int      index,
-            const T*       kernel,
-            const T*       bias,
-            const int      batch,
-            const int      h,
-            const int      w,
-            const int      in_channels,
-            const int      out_channels,
-            const int      kernel_size,
-            const int      stride,
-            cudnnHandle_t& cudnn_handle,
-            cudaStream_t   stream)
-{
-    cudnnDataType_t dataType;
-    cudnnDataType_t computeType = CUDNN_DATA_FLOAT;
-    const float     alpha1      = 1.0f;
-    const float     alpha2      = 0.0f;
-    if (std::is_same<T, half>::value) {
-        dataType = CUDNN_DATA_HALF;
-        // computeType = CUDNN_DATA_HALF;
-    }
-    else {
-        dataType = CUDNN_DATA_FLOAT;
-    }
-
-    cudnnTensorDescriptor_t      input_descriptor_;
-    cudnnTensorDescriptor_t      output_descriptor_;
-    cudnnFilterDescriptor_t      kernel_descriptor_;
-    cudnnTensorDescriptor_t      bias_descriptor_;
-    cudnnConvolutionDescriptor_t convolution_descriptor_;
-    cudnnActivationDescriptor_t  activation_descriptor_;
-    cudnnConvolutionFwdAlgo_t    convolution_algorithm_ = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM;
-
-    checkCUDNN(cudnnCreateTensorDescriptor(&input_descriptor_));
-    checkCUDNN(cudnnSetTensor4dDescriptor(input_descriptor_,
-                                          /*format=*/CUDNN_TENSOR_NHWC,
-                                          /*dataType=*/dataType,
-                                          /*batch_size=*/batch,
-                                          /*channels=*/in_channels,
-                                          /*image_height=*/h,
-                                          /*image_width=*/w));
-
-    checkCUDNN(cudnnCreateFilterDescriptor(&kernel_descriptor_));
-    checkCUDNN(cudnnSetFilter4dDescriptor(kernel_descriptor_,
-                                          /*dataType=*/dataType,
-                                          /*format=*/CUDNN_TENSOR_NHWC,
-                                          /*out_channels=*/out_channels,
-                                          /*in_channels=*/in_channels,
-                                          /*kernel_height=*/kernel_size,
-                                          /*kernel_width=*/kernel_size));
-
-    checkCUDNN(cudnnCreateTensorDescriptor(&bias_descriptor_));
-    checkCUDNN(cudnnSetTensor4dDescriptor(bias_descriptor_,
-                                          /*format=*/CUDNN_TENSOR_NHWC,
-                                          /*dataType=*/dataType,
-                                          /*batch_size=*/1,
-                                          /*channels=*/out_channels,
-                                          /*image_height=*/1,
-                                          /*image_width=*/1));
-
-    checkCUDNN(cudnnCreateConvolutionDescriptor(&convolution_descriptor_));
-    checkCUDNN(cudnnSetConvolution2dDescriptor(convolution_descriptor_,
-                                               /*pad_height=*/0,
-                                               /*pad_width=*/0,
-                                               /*vertical_stride=*/stride,
-                                               /*horizontal_stride=*/stride,
-                                               /*dilation_height=*/1,
-                                               /*dilation_width=*/1,
-                                               /*mode=*/CUDNN_CROSS_CORRELATION, /*CUDNN_CONVOLUTION,*/
-                                               /*computeType=*/computeType));
-    checkCUDNN(cudnnSetConvolutionMathType(convolution_descriptor_, CUDNN_TENSOR_OP_MATH_ALLOW_CONVERSION));
-
-    checkCUDNN(cudnnCreateActivationDescriptor(&activation_descriptor_));
-    checkCUDNN(cudnnSetActivationDescriptor(activation_descriptor_,
-                                            /*mode=*/CUDNN_ACTIVATION_RELU,
-                                            // /*mode=*/CUDNN_ACTIVATION_IDENTITY,
-                                            /*reluNanOpt=*/CUDNN_PROPAGATE_NAN,
-                                            /*coef=*/0.0));
-
-    checkCUDNN(cudnnCreateTensorDescriptor(&output_descriptor_));
-
-    // output
-    int out_n;
-    int out_c;
-    int out_h;
-    int out_w;
-
-    // TODO: set the second conv as nhwc in and nchw out
-    cudnnTensorFormat_t output_format = index == 0 ? CUDNN_TENSOR_NHWC : CUDNN_TENSOR_NCHW;
-    checkCUDNN(cudnnGetConvolution2dForwardOutputDim(
-        convolution_descriptor_, input_descriptor_, kernel_descriptor_, &out_n, &out_c, &out_h, &out_w));
-    checkCUDNN(cudnnSetTensor4dDescriptor(output_descriptor_, output_format, dataType, out_n, out_c, out_h, out_w));
-
-    // search algorithm, we use default directly to prevent the overhead to choose bset one
-    // int requestedAlgoCount = CUDNN_CONVOLUTION_FWD_ALGO_COUNT;
-    // int returnedAlgoCount = -1;
-    // cudnnConvolutionFwdAlgoPerf_t results[2 * CUDNN_CONVOLUTION_FWD_ALGO_COUNT];
-
-    // // Choose the best according to the preference
-    // checkCUDNN(cudnnGetConvolutionForwardAlgorithm_v7(cudnn_handle,
-    //                                                   input_descriptor_,
-    //                                                   kernel_descriptor_,
-    //                                                   convolution_descriptor_,
-    //                                                   output_descriptor_,
-    //                                                   requestedAlgoCount,
-    //                                                   &returnedAlgoCount,
-    //                                                   results));
-    // for (int algoIndex = 0; algoIndex < returnedAlgoCount; ++algoIndex) {
-    //     printf("^^^^ %s for Algo %d: %f time requiring %llu memory\n",
-    //            cudnnGetErrorString(results[algoIndex].status),
-    //            results[algoIndex].algo,
-    //            results[algoIndex].time,
-    //            (unsigned long long)results[algoIndex].memory);
-    // }
-    // convolution_algorithm_ = results[0].algo;
-
-    // workspace
-    size_t ws_size = 0;
-    checkCUDNN(cudnnGetConvolutionForwardWorkspaceSize(cudnn_handle,
-                                                       input_descriptor_,
-                                                       kernel_descriptor_,
-                                                       convolution_descriptor_,
-                                                       output_descriptor_,
-                                                       convolution_algorithm_,
-                                                       &ws_size));
-    TM_LOG_DEBUG("Convolution algorithm: %d with workspace size: %d \n", convolution_algorithm_, ws_size);
-    FT_CHECK_WITH_INFO(
-        ws_size <= (1 << 29),
-        "Current workspace used for CuDNN Convolution is fixed as 1 << 29, please increase it in WenetEncoder::allocateBuffer!");
-    // void *ws_data;
-    // if (ws_size > 0) {
-    //     check_cuda_error(cudaMalloc(&ws_data, ws_size));
-    // }
-    // else{
-    //     ws_data = nullptr;
-    // }
-
-    sync_check_cuda_error();
-    checkCUDNN(cudnnConvolutionBiasActivationForward(cudnn_handle,
-                                                     (void*)(&alpha1),
-                                                     input_descriptor_,
-                                                     input,
-                                                     kernel_descriptor_,
-                                                     kernel,
-                                                     convolution_descriptor_,
-                                                     convolution_algorithm_,
-                                                     (void*)ws_data,
-                                                     ws_size,
-                                                     (void*)(&alpha2),
-                                                     output_descriptor_,
-                                                     output,
-                                                     bias_descriptor_,
-                                                     bias,
-                                                     activation_descriptor_,
-                                                     output_descriptor_,
-                                                     output));
-
-    sync_check_cuda_error();
-    checkCUDNN(cudnnDestroyTensorDescriptor(input_descriptor_));
-    checkCUDNN(cudnnDestroyTensorDescriptor(output_descriptor_));
-    checkCUDNN(cudnnDestroyFilterDescriptor(kernel_descriptor_));
-    checkCUDNN(cudnnDestroyTensorDescriptor(bias_descriptor_));
-    checkCUDNN(cudnnDestroyActivationDescriptor(activation_descriptor_));
-    checkCUDNN(cudnnDestroyConvolutionDescriptor(convolution_descriptor_));
-}
-
-}  // namespace turbomind
diff --git a/src/turbomind/utils/word_list.cc b/src/turbomind/utils/word_list.cc
deleted file mode 100644
index c391191b36b67462e40180154f1ca89b8df347ff..0000000000000000000000000000000000000000
--- a/src/turbomind/utils/word_list.cc
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "word_list.h"
-#include "memory_utils.h"
-
-#include "assert.h"
-
-namespace turbomind {
-
-int read_word_list(const std::string& filename, std::vector<int>& file_data)
-{
-    std::ifstream word_list_file(filename, std::ios::in);
-
-    std::string line_buf;
-    int         line_count   = 0;
-    size_t      id_counts[2] = {0, 0};
-    while (std::getline(word_list_file, line_buf)) {
-
-        std::stringstream line_stream(line_buf);
-        std::string       vals;
-        while (std::getline(line_stream, vals, ',')) {
-            file_data.push_back(std::stoi(vals));
-            id_counts[line_count]++;
-        }
-        line_count++;
-
-        if (line_count > 1) {
-            break;
-        }
-    }
-    assert(id_counts[0] == id_counts[1]);
-
-    return 0;
-}
-
-}  // namespace turbomind
diff --git a/src/turbomind/utils/word_list.h b/src/turbomind/utils/word_list.h
deleted file mode 100644
index 13d8dee011694a5da32f3c0a12cea942773702dd..0000000000000000000000000000000000000000
--- a/src/turbomind/utils/word_list.h
+++ /dev/null
@@ -1,26 +0,0 @@
-/*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "Tensor.h"
-#include "stdlib.h"
-
-namespace turbomind {
-
-int read_word_list(const std::string& filename, std::vector<int>& tensor_data);
-
-}
diff --git a/tests/csrc/CMakeLists.txt b/tests/csrc/CMakeLists.txt
deleted file mode 100644
index 61a9b7383d3294de9e8e4258aaa21b99af2d7f42..0000000000000000000000000000000000000000
--- a/tests/csrc/CMakeLists.txt
+++ /dev/null
@@ -1,19 +0,0 @@
-# Copyright (c) 2021-2023, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-add_subdirectory(unittests)
-if(BUILD_PYT)
-    add_subdirectory(gemm_dequantize)
-    add_subdirectory(int8_gemm)
-endif()
diff --git a/tests/csrc/gemm_dequantize/CMakeLists.txt b/tests/csrc/gemm_dequantize/CMakeLists.txt
deleted file mode 100644
index dd02ecdc61c32af64e6090085a1e03ce82e28a5e..0000000000000000000000000000000000000000
--- a/tests/csrc/gemm_dequantize/CMakeLists.txt
+++ /dev/null
@@ -1,29 +0,0 @@
-# Copyright (c) 2021-2023, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-if (TORCH_VERSION VERSION_GREATER_EQUAL "1.9.0")
-    set(gemm_dq_test_files
-        th_gemm_dequantize.cc
-    )
-
-    add_definitions(-DTORCH_CUDA=1)
-
-    set(LIB_NAME "gemm_dq_unit_ops")
-    add_library(${LIB_NAME} SHARED ${gemm_dq_test_files})
-    set_target_properties(${LIB_NAME} PROPERTIES
-                          CUDA_RESOLVE_DEVICE_SYMBOLS ON)
-    target_link_libraries(${LIB_NAME} "${TORCH_LIBRARIES}" fpA_intB_gemm logger)
-else()
-    message("TORCH_VERSION ${TORCH_VERSION} < 1.9.0, skipping compiling th_moe_ops.cc because QUInt4x2 is supported after torch 1.9.0")
-endif()
diff --git a/tests/csrc/gemm_dequantize/th_gemm_dequantize.cc b/tests/csrc/gemm_dequantize/th_gemm_dequantize.cc
deleted file mode 100644
index e00a4eceeff4328e6c2f7f95f32690822a1dea30..0000000000000000000000000000000000000000
--- a/tests/csrc/gemm_dequantize/th_gemm_dequantize.cc
+++ /dev/null
@@ -1,372 +0,0 @@
-/*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <cublas_v2.h>
-#include <iostream>
-#include <vector>
-
-#include "torch/csrc/cuda/Stream.h"
-#include <torch/custom_class.h>
-#include <torch/script.h>
-
-#include "src/turbomind/kernels/cutlass_kernels/fpA_intB_gemm/fpA_intB_gemm.h"
-#include "src/turbomind/th_op/th_utils.h"
-#include "src/turbomind/utils/cuda_bf16_wrapper.h"
-
-#include "cutlass/numeric_types.h"
-
-using torch::Tensor;
-
-namespace torch_ext {
-
-namespace ft = turbomind;
-
-template<typename T, typename WeightType>
-Tensor fused_gemm_dq_helper(
-    Tensor input_activations, Tensor weight, Tensor scales, const int64_t timing_iterations, float& avg_time)
-{
-    const at::ScalarType _st    = input_activations.scalar_type();
-    const int            m      = input_activations.size(0);
-    const int            n      = scales.size(0);
-    const int            k      = input_activations.size(1);
-    auto                 stream = at::cuda::getCurrentCUDAStream().stream();
-
-    const T*          input_act_ptr = get_ptr<const T>(input_activations);
-    const WeightType* weight_ptr    = get_ptr<const WeightType>(weight);
-    const T*          scales_ptr    = get_ptr<const T>(scales);
-
-    turbomind::CutlassFpAIntBGemmRunner<T, WeightType> fused_gemm_dq_runner;
-    const int                                          ws_bytes = fused_gemm_dq_runner.getWorkspaceSize(m, n, k);
-
-    auto output_tensor = torch::empty({m, n}, torch::dtype(_st).device(torch::kCUDA).requires_grad(false));
-    auto ws_tensor     = torch::empty({ws_bytes}, torch::dtype(torch::kInt8).device(torch::kCUDA).requires_grad(false));
-
-    T*    output_tensor_ptr = get_ptr<T>(output_tensor);
-    char* ws_ptr            = get_ptr<char>(ws_tensor);
-
-    cudaEvent_t start, stop;
-    cudaEventCreate(&start);
-    cudaEventCreate(&stop);
-
-    cudaEventRecord(start, stream);
-    for (int64_t iter = 0; iter < timing_iterations; ++iter) {
-        fused_gemm_dq_runner.gemm(
-            input_act_ptr, weight_ptr, scales_ptr, output_tensor_ptr, m, n, k, ws_ptr, ws_bytes, stream);
-    }
-    cudaEventRecord(stop, stream);
-    cudaEventSynchronize(stop);
-    float total_time_ms = 0;
-    cudaEventElapsedTime(&total_time_ms, start, stop);
-    avg_time = total_time_ms / float(timing_iterations);
-
-    return output_tensor;
-}
-
-Tensor
-_fused_gemm_dq(Tensor input_activations, Tensor weight, Tensor scales, int64_t timing_iterations, float& avg_time)
-{
-    const at::ScalarType _st = input_activations.scalar_type();
-    CHECK_INPUT(scales, _st);
-
-    TORCH_CHECK(input_activations.dim() == 2, "Invalid rank for activations");
-    TORCH_CHECK(weight.dim() == 2, "Invalid rank for weight");
-    TORCH_CHECK(scales.dim() == 1, "Invalid rank for scales");
-
-    const int m = input_activations.size(0);
-    const int n = scales.size(0);
-    const int k = input_activations.size(1);
-
-    TORCH_CHECK(input_activations.size(1) == weight.size(0), "dim 1 of act and dim 0 of weight must be equal");
-
-    // We signal int4 by having the last weight dim be half the size of the scales.
-    // This is because int4 elements are packed into a single byte.
-    torch::ScalarType quant_type = weight.scalar_type();
-    if (weight.size(-1) == scales.size(-1) / 2) {
-        quant_type = at::ScalarType::QUInt4x2;
-    }
-    else {
-        TORCH_CHECK(weight.size(-1) == scales.size(-1),
-                    "Last dim of weight and scales must be equal for int8 "
-                    "or last dim of scale must be 2x last dim of weight for int4.");
-    }
-
-    Tensor output_tensor;
-    switch (_st) {
-        case at::ScalarType::Half: {
-            if (quant_type == torch::kInt8) {
-                output_tensor =
-                    fused_gemm_dq_helper<half, uint8_t>(input_activations, weight, scales, timing_iterations, avg_time);
-            }
-            else if (quant_type == at::ScalarType::QUInt4x2) {
-                output_tensor = fused_gemm_dq_helper<half, cutlass::uint4b_t>(
-                    input_activations, weight, scales, timing_iterations, avg_time);
-            }
-            else {
-                std::string err_msg = "Unsupported weight type " + std::string(at::toString(quant_type));
-                throw std::runtime_error(err_msg);
-            }
-            break;
-        }
-#ifdef ENABLE_BF16
-        case at::ScalarType::BFloat16: {
-            if (quant_type == torch::kInt8) {
-                output_tensor = fused_gemm_dq_helper<__nv_bfloat16, uint8_t>(
-                    input_activations, weight, scales, timing_iterations, avg_time);
-            }
-            else if (quant_type == at::ScalarType::QUInt4x2) {
-                output_tensor = fused_gemm_dq_helper<__nv_bfloat16, cutlass::uint4b_t>(
-                    input_activations, weight, scales, timing_iterations, avg_time);
-            }
-            else {
-                std::string err_msg = "Unsupported weight type " + std::string(at::toString(quant_type));
-                throw std::runtime_error(err_msg);
-            }
-            break;
-        }
-#endif
-        default:
-            throw std::runtime_error("Unsupported tensor type. Got " + std::string(at::toString(_st)));
-    }
-    return output_tensor;
-}
-
-Tensor fused_gemm_dq(Tensor input_activations, Tensor weight, Tensor scales)
-{
-    float dummy = 0.f;
-    return _fused_gemm_dq(input_activations, weight, scales, 1, dummy);
-}
-
-Tensor
-bench_cublas(Tensor input_activations, Tensor weight_dequantized, const int64_t timing_iterations, float& avg_time)
-{
-    using namespace turbomind;
-    const int m = input_activations.size(0);
-    const int n = weight_dequantized.size(1);
-    const int k = input_activations.size(1);
-
-    const void* input_act_ptr = get_ptr<const void>(input_activations);
-    const void* weight_ptr    = get_ptr<const void>(weight_dequantized);
-
-    cublasHandle_t       handle = at::cuda::getCurrentCUDABlasHandle();
-    const at::ScalarType _st    = input_activations.scalar_type();
-
-    TORCH_CHECK(input_activations.size(1) == weight_dequantized.size(0),
-                "CUBLAS_BENCH: dim 1 of act and dim 0 of weight must be equal");
-    CHECK_INPUT(input_activations, _st);
-    CHECK_INPUT(weight_dequantized, _st);
-
-    auto  output_tensor     = torch::empty({m, n}, torch::dtype(_st).device(torch::kCUDA).requires_grad(false));
-    void* output_tensor_ptr = get_ptr<void>(output_tensor);
-
-    TORCH_CHECK(_st == at::ScalarType::Half || _st == at::ScalarType::BFloat16, "Input type must be float or bfloat");
-    cudaDataType_t cublasType = _st == at::ScalarType::Half ? CUDA_R_16F : CUDA_R_16BF;
-
-    float alpha = 1.0f;
-    float beta  = 0.0f;
-
-    auto stream = at::cuda::getCurrentCUDAStream().stream();
-    cublasSetStream(handle, stream);
-
-    cudaEvent_t start, stop;
-    cudaEventCreate(&start);
-    cudaEventCreate(&stop);
-
-    cublasStatus_t status = CUBLAS_STATUS_SUCCESS;
-    cudaEventRecord(start, stream);
-    for (int64_t iter = 0; iter < timing_iterations; ++iter) {
-        status = cublasGemmEx(handle,
-                              CUBLAS_OP_N,
-                              CUBLAS_OP_N,
-                              n,
-                              m,
-                              k,
-                              &alpha,
-                              weight_ptr,
-                              cublasType,
-                              n,
-                              input_act_ptr,
-                              cublasType,
-                              k,
-                              &beta,
-                              output_tensor_ptr,
-                              cublasType,
-                              n,
-                              CUBLAS_COMPUTE_32F,
-                              CUBLAS_GEMM_DEFAULT);
-    }
-    cudaEventRecord(stop, stream);
-    cudaEventSynchronize(stop);
-    float total_time_ms = 0;
-    cudaEventElapsedTime(&total_time_ms, start, stop);
-    avg_time = total_time_ms / float(timing_iterations);
-    check_cuda_error(status);
-    return output_tensor;
-}
-
-std::vector<std::vector<Tensor>> benchmark_against_cublas_fp(Tensor        input_activations,
-                                                             Tensor        weight_quantized,
-                                                             Tensor        scales,
-                                                             Tensor        weight_dequantized,
-                                                             const int64_t timing_iterations)
-{
-    float  cublas_time   = 0.f;
-    float  ft_time       = 0.f;
-    Tensor cublas_result = bench_cublas(input_activations, weight_dequantized, timing_iterations, cublas_time);
-    Tensor ft_result     = _fused_gemm_dq(input_activations, weight_quantized, scales, timing_iterations, ft_time);
-
-    auto timing_tensor =
-        torch::empty({2}, torch::dtype(at::ScalarType::Float).device(torch::kCPU).requires_grad(false));
-    timing_tensor[0] = cublas_time;
-    timing_tensor[1] = ft_time;
-
-    // const int m = input_activations.size(0);
-    // const int n = weight_dequantized.size(1);
-    // const int k = input_activations.size(1);
-    // std::cout << "m, n, k" << m << ", " << n << ", " << k << std::endl;
-    // std::cout << "cuBLAS time (ms) " << cublas_time << std::endl;
-    // std::cout << "FT time (ms) " << ft_time << std::endl;
-
-    return {{timing_tensor}, {cublas_result, ft_result}};
-}
-
-template<typename T, typename WeightType>
-Tensor fused_gemm_dq_bias_act_helper(
-    Tensor input_activations, Tensor weight, Tensor scales, Tensor bias, ft::ActivationType activation_type)
-{
-    const at::ScalarType _st    = input_activations.scalar_type();
-    const int            m      = input_activations.size(0);
-    const int            n      = scales.size(0);
-    const int            k      = input_activations.size(1);
-    auto                 stream = at::cuda::getCurrentCUDAStream().stream();
-
-    const T*          input_act_ptr = get_ptr<const T>(input_activations);
-    const WeightType* weight_ptr    = get_ptr<const WeightType>(weight);
-    const T*          scales_ptr    = get_ptr<const T>(scales);
-    const T*          bias_ptr      = get_ptr<const T>(bias);
-
-    turbomind::CutlassFpAIntBGemmRunner<T, WeightType> fused_gemm_dq_runner;
-    const int                                          ws_bytes = fused_gemm_dq_runner.getWorkspaceSize(m, n, k);
-
-    auto output_tensor = torch::empty({m, n}, torch::dtype(_st).device(torch::kCUDA).requires_grad(false));
-    auto ws_tensor     = torch::empty({ws_bytes}, torch::dtype(torch::kInt8).device(torch::kCUDA).requires_grad(false));
-
-    T*    output_tensor_ptr = get_ptr<T>(output_tensor);
-    char* ws_ptr            = get_ptr<char>(ws_tensor);
-
-    fused_gemm_dq_runner.gemm_bias_act(input_act_ptr,
-                                       weight_ptr,
-                                       scales_ptr,
-                                       bias_ptr,
-                                       output_tensor_ptr,
-                                       m,
-                                       n,
-                                       k,
-                                       activation_type,
-                                       ws_ptr,
-                                       ws_bytes,
-                                       stream);
-
-    return output_tensor;
-}
-
-Tensor fused_gemm_dq_bias_act(
-    Tensor input_activations, Tensor weight, Tensor scales, Tensor bias, std::string activation_type_str)
-{
-    const at::ScalarType _st = input_activations.scalar_type();
-    CHECK_INPUT(scales, _st);
-    CHECK_INPUT(bias, _st);
-
-    TORCH_CHECK(input_activations.dim() == 2, "Invalid rank for activations");
-    TORCH_CHECK(weight.dim() == 2, "Invalid rank for weight");
-    TORCH_CHECK(scales.dim() == 1, "Invalid rank for scales");
-    TORCH_CHECK(bias.dim() == 1, "Invalid rank for bias");
-
-    const int m = input_activations.size(0);
-    const int n = scales.size(0);
-    const int k = input_activations.size(1);
-
-    TORCH_CHECK(bias.size(0) == n, "Must have 1 bias value for each output column");
-    TORCH_CHECK(input_activations.size(1) == weight.size(0), "dim 1 of act and dim 0 of weight must be equal");
-
-    // We signal int4 by having the last weight dim be half the size of the scales.
-    // This is because int4 elements are packed into a single byte.
-    torch::ScalarType quant_type = weight.scalar_type();
-    if (weight.size(-1) == scales.size(-1) / 2) {
-        quant_type = at::ScalarType::QUInt4x2;
-    }
-    else {
-        TORCH_CHECK(weight.size(-1) == scales.size(-1),
-                    "Last dim of weight and scales must be equal for int8 "
-                    "or last dim of scale must be 2x last dim of weight for int4.");
-    }
-
-    ft::ActivationType activation_type = ft::ActivationType::InvalidType;
-    if (activation_type_str == "identity") {
-        activation_type = ft::ActivationType::Identity;
-    }
-    else {
-        activation_type = ft::getActivationType(activation_type_str);
-    }
-
-    TORCH_CHECK(!isGatedActivation(activation_type), "Fused gated activations not supported.");
-
-    Tensor output_tensor;
-    switch (_st) {
-        case at::ScalarType::Half: {
-            if (quant_type == torch::kInt8) {
-                output_tensor = fused_gemm_dq_bias_act_helper<half, uint8_t>(
-                    input_activations, weight, scales, bias, activation_type);
-            }
-            else if (quant_type == at::ScalarType::QUInt4x2) {
-                output_tensor = fused_gemm_dq_bias_act_helper<half, cutlass::uint4b_t>(
-                    input_activations, weight, scales, bias, activation_type);
-            }
-            else {
-                std::string err_msg = "Unsupported weight type " + std::string(at::toString(quant_type));
-                throw std::runtime_error(err_msg);
-            }
-            break;
-        }
-#ifdef ENABLE_BF16
-        case at::ScalarType::BFloat16: {
-            if (quant_type == torch::kInt8) {
-                output_tensor = fused_gemm_dq_bias_act_helper<__nv_bfloat16, uint8_t>(
-                    input_activations, weight, scales, bias, activation_type);
-            }
-            else if (quant_type == at::ScalarType::QUInt4x2) {
-                output_tensor = fused_gemm_dq_bias_act_helper<__nv_bfloat16, cutlass::uint4b_t>(
-                    input_activations, weight, scales, bias, activation_type);
-            }
-            else {
-                std::string err_msg = "Unsupported weight type " + std::string(at::toString(quant_type));
-                throw std::runtime_error(err_msg);
-            }
-            break;
-        }
-#endif
-        default:
-            throw std::runtime_error("Unsupported tensor type. Got " + std::string(at::toString(_st)));
-    }
-    return output_tensor;
-}
-
-TORCH_LIBRARY(gemm_dq_unit_ops, m)
-{
-    m.def("fused_gemm_dq", fused_gemm_dq);
-    m.def("benchmark_against_cublas_fp", benchmark_against_cublas_fp);
-    m.def("fused_gemm_dq_bias_act", fused_gemm_dq_bias_act);
-}
-}  // namespace torch_ext
diff --git a/tests/csrc/gemm_dequantize/th_gemm_dequantize.py b/tests/csrc/gemm_dequantize/th_gemm_dequantize.py
deleted file mode 100644
index 2e4969f55427f736a6ccf528e65a2601d9d06adb..0000000000000000000000000000000000000000
--- a/tests/csrc/gemm_dequantize/th_gemm_dequantize.py
+++ /dev/null
@@ -1,283 +0,0 @@
-# flake8: noqa
-import unittest
-
-import torch
-
-
-def random_tensor(shape, dtype, device, mean=0, std=1):
-    return torch.empty(shape, dtype=dtype, device=device).normal_(mean, std)
-
-
-class TestGemmDequantize(unittest.TestCase):
-
-    def setUp(self) -> None:
-        torch.classes.load_library('lib/libth_transformer.so')
-        torch.classes.load_library('lib/libgemm_dq_unit_ops.so')
-        self.unpack_packed_int4s = torch.ops.turbomind.unpack_int4_packed_tensor_to_int8
-        self.pack_int4s = torch.ops.turbomind.pack_int8_tensor_to_packed_int4
-        self.fused_gemm_dq = torch.ops.gemm_dq_unit_ops.fused_gemm_dq
-        self.fused_gemm_dq_bias_act = torch.ops.gemm_dq_unit_ops.fused_gemm_dq_bias_act
-        self.bench = torch.ops.gemm_dq_unit_ops.benchmark_against_cublas_fp
-        self.preprocess_weights_for_mixed_gemm = torch.ops.turbomind.preprocess_weights_for_mixed_gemm
-
-        self.symmetric_quantizer = torch.ops.turbomind._symmetric_quantize_last_axis_of_batched_matrix
-
-        torch.manual_seed(734876213)
-
-    def dequantize_test_helper(self, weight_type, quant_type):
-        assert quant_type == torch.int8 or quant_type == torch.quint4x2
-
-        lower_bound = -128 if quant_type == torch.int8 else -8
-        upper_bound = 127 if quant_type == torch.int8 else 7
-
-        m, n, k = 64, 128, 64
-        weights = torch.randint(lower_bound,
-                                upper_bound, [k, n],
-                                dtype=torch.int8,
-                                device='cpu')
-
-        packed_weight = self.pack_int4s(
-            weights) if quant_type == torch.quint4x2 else weights
-        cuda_weights = self.preprocess_weights_for_mixed_gemm(
-            packed_weight, quant_type).to('cuda')
-        weights = weights.to('cuda')
-
-        act = torch.eye(m, dtype=weight_type, device='cuda')
-        scales = torch.ones([n], dtype=weight_type, device='cuda')
-
-        actual = self.fused_gemm_dq(act, cuda_weights, scales)
-        torch.testing.assert_close(actual,
-                                   weights,
-                                   atol=0,
-                                   rtol=0,
-                                   check_dtype=False)
-
-    def test_fp16_int8_dequantize(self):
-        self.dequantize_test_helper(torch.float16, torch.int8)
-
-    def test_bf16_int8_dequantize(self):
-        self.dequantize_test_helper(torch.bfloat16, torch.int8)
-
-    def test_fp16_int4_dequantize(self):
-        self.dequantize_test_helper(torch.float16, torch.quint4x2)
-
-    def test_bf16_int4_dequantize(self):
-        self.dequantize_test_helper(torch.bfloat16, torch.quint4x2)
-
-    def apply_act(self, inp, act_str):
-        if act_str == 'identity':
-            return inp
-        elif act_str == 'silu':
-            return torch.nn.SiLU()(inp)
-        elif act_str == 'relu':
-            return torch.nn.ReLU()(inp)
-        elif act_str == 'gelu':
-            return torch.nn.GELU(approximate='tanh')(inp)
-        else:
-            assert False, 'Unsupported activation'
-
-    def gemm_dequant_test_helper(self,
-                                 compute_type,
-                                 weight_dtype,
-                                 gemm_ms,
-                                 gemm_ns,
-                                 gemm_ks,
-                                 rtol,
-                                 atol,
-                                 act_str='only_gemm',
-                                 benchmark=False):
-        assert weight_dtype == torch.int8 or weight_dtype == torch.quint4x2, 'Weight must be quantized'
-
-        for gemm_k in gemm_ks:
-            for gemm_n in gemm_ns:
-                torch_weights_cpu = random_tensor((gemm_k, gemm_n),
-                                                  dtype=compute_type,
-                                                  device='cpu',
-                                                  mean=0,
-                                                  std=0.002)
-                ref_torch_weights, processed_torch_weights, torch_weight_scales = self.symmetric_quantizer(
-                    torch_weights_cpu, weight_dtype)
-                ref_torch_weights = self.unpack_packed_int4s(
-                    ref_torch_weights
-                ) if weight_dtype == torch.quint4x2 else ref_torch_weights
-                ref_torch_weights = ref_torch_weights.to('cuda')
-                processed_torch_weights = processed_torch_weights.to('cuda')
-                torch_weight_scales = torch_weight_scales.to('cuda')
-                torch_biases = random_tensor((gemm_n),
-                                             dtype=compute_type,
-                                             device='cuda',
-                                             mean=0,
-                                             std=0.1)
-
-                for num_rows in gemm_ms:
-                    torch_activations = torch.randn(size=(num_rows, gemm_k),
-                                                    dtype=compute_type,
-                                                    device='cuda')
-
-                    scales_unsqueezed = torch_weight_scales.unsqueeze(0)
-                    casted_weights = ref_torch_weights.to(
-                        torch_activations.dtype)
-                    dequantized_weights = torch.multiply(
-                        casted_weights, scales_unsqueezed)
-                    if benchmark:
-                        assert act_str == 'only_gemm', 'Benchmarks against cublas must use just GEMM.'
-                        torch.cuda.profiler.start()
-                        times, results = self.bench(torch_activations,
-                                                    processed_torch_weights,
-                                                    torch_weight_scales,
-                                                    dequantized_weights, 200)
-                        torch.cuda.profiler.stop()
-                        times = times[0]
-                        cublas_time = times[0].item()
-                        ft_time = times[1].item()
-                        ft_speedup = cublas_time / ft_time
-                        print('{},{},{},{},{},{}'.format(
-                            num_rows, gemm_n, gemm_k, cublas_time, ft_time,
-                            ft_speedup))
-                        reference_result = results[0]
-                        ft_result = results[1]
-                    else:
-                        if act_str == 'only_gemm':
-                            reference_result = torch.matmul(
-                                torch_activations, dequantized_weights)
-                            ft_result = self.fused_gemm_dq(
-                                torch_activations, processed_torch_weights,
-                                torch_weight_scales)
-                        else:
-                            reference_result = torch.matmul(
-                                torch_activations, dequantized_weights)
-                            reference_result += torch_biases.unsqueeze(0)
-                            reference_result = self.apply_act(
-                                reference_result, act_str)
-
-                            ft_result = self.fused_gemm_dq_bias_act(
-                                torch_activations, processed_torch_weights,
-                                torch_weight_scales, torch_biases, act_str)
-
-                    msg = 'FC1 Failed on m={}, n={}, k={}'.format(
-                        num_rows, gemm_n, gemm_k)
-                    torch.testing.assert_close(ft_result,
-                                               reference_result,
-                                               rtol=rtol,
-                                               atol=atol,
-                                               msg=msg,
-                                               check_dtype=False)
-
-    def test_fp16_int8_gemm(self):
-        self.gemm_dequant_test_helper(
-            torch.float16,
-            torch.int8,
-            gemm_ms=[256, 177, 195, 125, 66, 33, 8, 2, 1],
-            gemm_ns=[1024, 2048, 4096],
-            gemm_ks=[4096, 8192, 16384],
-            rtol=0.001,
-            atol=0.002)
-
-    def test_fp16_int4_gemm(self):
-        self.gemm_dequant_test_helper(
-            torch.float16,
-            torch.quint4x2,
-            gemm_ms=[256, 177, 195, 125, 66, 33, 8, 2, 1],
-            gemm_ns=[1024, 2048, 4096],
-            gemm_ks=[4096, 8192, 16384],
-            rtol=0.001,
-            atol=0.002)
-
-    def test_bf16_int8_gemm(self):
-        self.gemm_dequant_test_helper(
-            torch.bfloat16,
-            torch.int8,
-            gemm_ms=[256, 177, 195, 125, 66, 33, 8, 2, 1],
-            gemm_ns=[1024, 2048, 4096],
-            gemm_ks=[4096, 8192, 16384],
-            rtol=0.01,
-            atol=0.01)
-
-    def test_bf16_int4_gemm(self):
-        self.gemm_dequant_test_helper(
-            torch.bfloat16,
-            torch.quint4x2,
-            gemm_ms=[256, 177, 195, 125, 66, 33, 8, 2, 1],
-            gemm_ns=[1024, 2048, 4096],
-            gemm_ks=[4096, 8192, 16384],
-            rtol=0.01,
-            atol=0.01)
-
-    def test_fp16_int8_gemm_bias(self):
-        self.gemm_dequant_test_helper(torch.float16,
-                                      torch.int8,
-                                      gemm_ms=[256],
-                                      gemm_ns=[1024],
-                                      gemm_ks=[8192],
-                                      rtol=0.001,
-                                      atol=0.002,
-                                      act_str='identity')
-
-    def test_fp16_int8_gemm_bias_relu(self):
-        self.gemm_dequant_test_helper(torch.float16,
-                                      torch.int8,
-                                      gemm_ms=[256],
-                                      gemm_ns=[1024],
-                                      gemm_ks=[8192],
-                                      rtol=0.001,
-                                      atol=0.002,
-                                      act_str='relu')
-
-    def test_fp16_int8_gemm_bias_gelu(self):
-        self.gemm_dequant_test_helper(torch.float16,
-                                      torch.int8,
-                                      gemm_ms=[256],
-                                      gemm_ns=[1024],
-                                      gemm_ks=[8192],
-                                      rtol=0.001,
-                                      atol=0.002,
-                                      act_str='gelu')
-
-    def test_fp16_int8_gemm_bias_silu(self):
-        self.gemm_dequant_test_helper(torch.float16,
-                                      torch.int8,
-                                      gemm_ms=[256],
-                                      gemm_ns=[1024],
-                                      gemm_ks=[8192],
-                                      rtol=0.001,
-                                      atol=0.002,
-                                      act_str='silu')
-
-    def bench_helper(self, act_type, quant_type, rtol, atol):
-        # Warm, using bfloat here since it seems to reliably use cublas.
-        x = random_tensor([20480, 20480], torch.bfloat16, device='cuda')
-        warm_iters = 30
-        for iter in range(warm_iters):
-            res = x @ x
-
-        m_shapes = torch.arange(0, 12)
-        m_shapes = 2**m_shapes
-
-        self.gemm_dequant_test_helper(act_type,
-                                      quant_type,
-                                      gemm_ms=[128],
-                                      gemm_ns=[1536],
-                                      gemm_ks=[12288],
-                                      rtol=rtol,
-                                      atol=atol,
-                                      benchmark=True)
-
-    @unittest.skip("This is a benchmark so don't run by default")
-    def test_fp16_int8_cublas(self):
-        self.bench_helper(torch.float16, torch.int8, 1e-3, 0.002)
-
-    @unittest.skip("This is a benchmark so don't run by default")
-    def test_bf16_int8_cublas(self):
-        self.bench_helper(torch.bfloat16, torch.int8, 1e-2, 1e-2)
-
-    @unittest.skip("This is a benchmark so don't run by default")
-    def test_fp16_int4_cublas(self):
-        self.bench_helper(torch.float16, torch.quint4x2, 1e-3, 0.002)
-
-    @unittest.skip("This is a benchmark so don't run by default")
-    def test_bf16_int4_cublas(self):
-        self.bench_helper(torch.bfloat16, torch.quint4x2, 1e-2, 1e-2)
-
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/tests/csrc/int8_gemm/CMakeLists.txt b/tests/csrc/int8_gemm/CMakeLists.txt
deleted file mode 100644
index fe8b14455aff9154ab24991660d899b6cb02bdac..0000000000000000000000000000000000000000
--- a/tests/csrc/int8_gemm/CMakeLists.txt
+++ /dev/null
@@ -1,24 +0,0 @@
-# Copyright (c) 2021-2023, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-set(int8_test_files
-    int8_gemm_test.cu
-)
-
-add_definitions(-DTORCH_CUDA=1)
-
-set(EXE_NAME "int8_gemm_test")
-add_executable(${EXE_NAME} ${int8_test_files})
-set_target_properties(${EXE_NAME} PROPERTIES CUDA_RESOLVE_DEVICE_SYMBOLS ON)
-target_link_libraries(${EXE_NAME} PUBLIC "${TORCH_LIBRARIES}" int8_gemm tensor logger)
diff --git a/tests/csrc/int8_gemm/int8_gemm_test.cu b/tests/csrc/int8_gemm/int8_gemm_test.cu
deleted file mode 100644
index 0dc10b214d97b387c2bd1aad9625428e5cddbed0..0000000000000000000000000000000000000000
--- a/tests/csrc/int8_gemm/int8_gemm_test.cu
+++ /dev/null
@@ -1,184 +0,0 @@
-/*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <chrono>
-#include <cstdlib>
-#include <cublas_v2.h>
-#include <iostream>
-#include <vector>
-
-#include "torch/csrc/cuda/Stream.h"
-#include <torch/custom_class.h>
-#include <torch/script.h>
-
-#include "src/turbomind/kernels/cutlass_kernels/int8_gemm/int8_gemm.h"
-#include "src/turbomind/th_op/th_utils.h"
-#include "src/turbomind/utils/cuda_bf16_wrapper.h"
-#include "src/turbomind/utils/logger.h"
-
-#include "cutlass/numeric_types.h"
-
-using torch::Tensor;
-using torch_ext::get_ptr;
-
-namespace ft = turbomind;
-
-template<typename T>
-void int8_gemm_test(const int            m,
-                    const int            n,
-                    const int            k,
-                    const at::ScalarType output_data_type,
-                    const QuantMode      quant_mode,
-                    const int            iters)
-{
-    const bool per_token_quant =
-        quant_mode == QuantMode::PerTokenChannelQuant || quant_mode == QuantMode::PerTokenQuant;
-    const bool per_channel_quant =
-        quant_mode == QuantMode::PerTokenChannelQuant || quant_mode == QuantMode::PerChannelQuant;
-    const int row_scale_size = per_token_quant ? m : 1;
-    const int col_scale_size = per_channel_quant ? n : 1;
-
-    const at::ScalarType at_int32 = at::ScalarType::Int;
-    const at::ScalarType at_int8  = at::ScalarType::Char;
-    const at::ScalarType at_fp16  = at::ScalarType::Half;
-    const at::ScalarType at_bf16  = at::ScalarType::BFloat16;
-    const at::ScalarType at_fp32  = at::ScalarType::Float;
-
-    using std::chrono::high_resolution_clock;
-    using std::chrono::duration_cast;
-    using std::chrono::microseconds;
-
-    torch::manual_seed(0);
-
-    auto x = torch::randint(-128, 128, {m, k}, torch::dtype(at_int32).requires_grad(false));
-    auto w = torch::randint(-128, 128, {k, n}, torch::dtype(at_int32).requires_grad(false));
-
-    ft::FT_CHECK(torch::allclose(x, x.to(at_int8).to(at_int32)));
-    ft::FT_CHECK(torch::allclose(w, w.to(at_int8).to(at_int32)));
-
-    auto y = torch::matmul(x, w);
-
-    ft::Tensor{ft::MEMORY_CPU, ft::TYPE_INT32, {(size_t)m, (size_t)k}, get_ptr<int32_t>(x)}.saveNpy("x.npy");
-    ft::Tensor{ft::MEMORY_CPU, ft::TYPE_INT32, {(size_t)k, (size_t)n}, get_ptr<int32_t>(w)}.saveNpy("w.npy");
-    ft::Tensor{ft::MEMORY_CPU, ft::TYPE_INT32, {(size_t)m, (size_t)n}, get_ptr<int32_t>(y)}.saveNpy("y.npy");
-
-    auto x_gpu       = x.to(at_int8).to(torch::kCUDA);
-    auto w_T_gpu     = w.to(at_int8).to(torch::kCUDA).t().contiguous();
-    auto w_gpu       = w.to(at_int8).to(torch::kCUDA);
-    auto y_gpu       = torch::zeros({m, n}, torch::dtype(output_data_type).device(torch::kCUDA).requires_grad(false));
-    auto y_gpu_int32 = torch::zeros({m, n}, torch::dtype(at_int32).device(torch::kCUDA).requires_grad(false));
-
-    auto alpha_row_cultass = torch::ones({row_scale_size, 1}, torch::dtype(at_fp32).requires_grad(false)) * (1.0 / 100)
-                             * torch::randint(1, 10, {row_scale_size, 1}, torch::dtype(at_fp32));
-    auto alpha_col_cutlass = torch::ones({1, col_scale_size}, torch::dtype(at_fp32).requires_grad(false)) * (1.0 / 100)
-                             * torch::randint(1, 10, {1, col_scale_size}, torch::dtype(at_fp32));
-
-    auto alpha_row_torch = alpha_row_cultass.expand({m, 1});
-    auto alpha_col_torch = alpha_col_cutlass.expand({1, n});
-
-    // std::cout << alpha_row << std::endl;
-    auto alpha_row_gpu = alpha_row_cultass.to(torch::kCUDA);
-    auto alpha_col_gpu = alpha_col_cutlass.to(torch::kCUDA);
-
-    auto alpha_row_col_scale_gpu = torch::matmul(alpha_row_torch, alpha_col_torch).to(torch::kCUDA);
-
-    ft::CutlassInt8GemmRunner<T> cutlass_runner_half;
-
-    auto stream = at::cuda::getCurrentCUDAStream().stream();
-    // warm_up
-    cutlass_runner_half.gemm(get_ptr<int8_t>(x_gpu),
-                             get_ptr<int8_t>(w_T_gpu),
-                             quant_mode,
-                             get_ptr<float>(alpha_col_gpu),
-                             get_ptr<float>(alpha_row_gpu),
-                             get_ptr<T>(y_gpu),
-                             m,
-                             n,
-                             k,
-                             nullptr,
-                             0,
-                             stream);
-
-    ft::Tensor{ft::MEMORY_GPU, ft::TYPE_INT8, {(size_t)m, (size_t)k}, get_ptr<int8_t>(x_gpu)}.saveNpy("x_gpu.npy");
-    ft::Tensor{ft::MEMORY_GPU, ft::TYPE_INT8, {(size_t)n, (size_t)k}, get_ptr<int8_t>(w_T_gpu)}.saveNpy("w_T_gpu.npy");
-    ft::Tensor{ft::MEMORY_GPU, ft::TYPE_INT8, {(size_t)k, (size_t)n}, get_ptr<int8_t>(w_gpu)}.saveNpy("w_gpu.npy");
-    ft::Tensor{ft::MEMORY_GPU, ft::TYPE_FP16, {(size_t)m, (size_t)n}, get_ptr<T>(y_gpu)}.saveNpy("y_gpu.npy");
-    ft::Tensor{ft::MEMORY_GPU, ft::TYPE_INT32, {(size_t)m, (size_t)n}, get_ptr<int32_t>(y_gpu_int32)}.saveNpy(
-        "y_gpu_int32.npy");
-
-    ft::check_cuda_error(cudaStreamSynchronize(stream));
-    auto start = high_resolution_clock::now();
-
-    for (int i = 0; i < iters; ++i) {
-        cutlass_runner_half.gemm(get_ptr<int8_t>(x_gpu),
-                                 get_ptr<int8_t>(w_T_gpu),
-                                 quant_mode,
-                                 get_ptr<float>(alpha_col_gpu),
-                                 get_ptr<float>(alpha_row_gpu),
-                                 get_ptr<T>(y_gpu),
-                                 m,
-                                 n,
-                                 k,
-                                 nullptr,
-                                 0,
-                                 stream);
-    }
-
-    ft::check_cuda_error(cudaStreamSynchronize(stream));
-    auto end = high_resolution_clock::now();
-
-    auto duration = duration_cast<microseconds>(end - start);
-
-    if (torch::allclose(
-            (y.to(torch::kCUDA).to(at_fp32) * alpha_row_col_scale_gpu.to(torch::kCUDA)).to(output_data_type), y_gpu)) {
-        TM_LOG_INFO("SUCCESS " + std::to_string((double(duration.count()) / iters) / 1000) + " ms");
-    }
-    else {
-        TM_LOG_ERROR("FAILED " + std::to_string((double(duration.count()) / iters) / 1000) + " ms");
-        // std::cout << "diff " << (y.to(torch::kCUDA).to(at_fp32) *
-        // alpha_row_col_scale_gpu.to(torch::kCUDA)).to(at_fp16) - y_gpu << std::endl;
-    }
-}
-
-int main(int argc, char** argv)
-{
-    if (argc != 7) {
-        TM_LOG_ERROR(
-            "arguments missing, needs m, n, k, data_type(fp16=0, bf16=1), quant_mode (perTensor=0, perToken=1, perChannel=2, perTokenChannel=3), iters.");
-        return 0;
-    }
-
-    const int            m                = atoi(argv[1]);
-    const int            n                = atoi(argv[2]);
-    const int            k                = atoi(argv[3]);
-    const at::ScalarType output_data_type = atoi(argv[4]) == 0 ? at::ScalarType::Half : at::ScalarType::BFloat16;
-    const QuantMode      quant_mode       = static_cast<QuantMode>(atoi(argv[5]));
-    if (quant_mode == QuantMode::PerChannelQuant) {
-        printf("per channel quant \n");
-    }
-    const int iters = atoi(argv[6]);
-
-    if (output_data_type == at::ScalarType::Half) {
-        int8_gemm_test<half>(m, n, k, output_data_type, quant_mode, iters);
-    }
-    else {
-#if ENABLE_BF16
-        int8_gemm_test<__nv_bfloat16>(m, n, k, output_data_type, quant_mode, iters);
-#endif
-    }
-
-    return 0;
-}
diff --git a/tests/csrc/unittests/CMakeLists.txt b/tests/csrc/unittests/CMakeLists.txt
deleted file mode 100644
index 7701fc92b627c49678be0873e00c56b4ab434db4..0000000000000000000000000000000000000000
--- a/tests/csrc/unittests/CMakeLists.txt
+++ /dev/null
@@ -1,83 +0,0 @@
-# Copyright (c) 2021-2023, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# GoogleTest Preparation - Code block copied from
-#   https://google.github.io/googletest/quickstart-cmake.html
-include(FetchContent)
-FetchContent_Declare(
-  googletest
-  GIT_REPOSITORY https://github.com/google/googletest.git
-  GIT_TAG release-1.12.1
-)
-
-find_package(CUDAToolkit REQUIRED)
-
-if (NOT MSVC)
-  add_definitions(-DTORCH_CUDA=1)
-endif()
-
-# For Windows: Prevent overriding the parent project's compiler/linker settings
-set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
-FetchContent_MakeAvailable(googletest)
-
-add_executable(unittest
-    test_attention_kernels.cu
-    test_logprob_kernels.cu
-    test_penalty_kernels.cu
-    test_sampling_kernels.cu
-    test_sampling_layer.cu
-    test_tensor.cu)
-
-# automatic discovery of unit tests
-target_link_libraries(unittest PUBLIC "${TORCH_LIBRARIES}" gtest_main)
-target_compile_features(unittest PRIVATE cxx_std_14)
-
-# Sorted by alphabetical order of test name.
-target_link_libraries(  # Libs for test_attention_kernels
-  unittest PUBLIC
-    CUDA::cudart CUDA::curand
-    gpt_kernels gtest memory_utils tensor unfused_attention_kernels cuda_utils logger)
-target_link_libraries(  # Libs for test_logprob_kernels
-  unittest PUBLIC
-    CUDA::cudart
-    logprob_kernels memory_utils cuda_utils logger)
-target_link_libraries(  # Libs for test_penalty_kernels
-  unittest PUBLIC
-    CUDA::cublas CUDA::cublasLt CUDA::cudart
-    sampling_penalty_kernels memory_utils cuda_utils logger)
-target_link_libraries(  # Libs for test_sampling_kernel
-  unittest PUBLIC
-    CUDA::cudart
-    sampling_topk_kernels sampling_topp_kernels memory_utils tensor cuda_utils logger)
-target_link_libraries(  # Libs for test_sampling_layer
-  unittest PUBLIC
-    CUDA::cublas CUDA::cublasLt CUDA::cudart
-    cublasMMWrapper memory_utils
-    DynamicDecodeLayer TopKSamplingLayer TopPSamplingLayer tensor cuda_utils logger)
-target_link_libraries(  # Libs for test_tensor
-  unittest PUBLIC tensor cuda_utils logger)
-
-remove_definitions(-DTORCH_CUDA=1)
-add_executable(test_gemm test_gemm.cu)
-target_link_libraries(test_gemm PUBLIC CUDA::cublas CUDA::cudart CUDA::curand gemm cublasMMWrapper tensor cuda_utils logger)
-
-add_executable(test_gpt_kernels test_gpt_kernels.cu)
-target_link_libraries(test_gpt_kernels PUBLIC
-                      gpt_kernels memory_utils tensor cuda_utils logger)
-
-add_executable(test_context_attention_layer test_context_attention_layer.cu)
-target_link_libraries(test_context_attention_layer PUBLIC
-                      Llama CUDA::cublas CUDA::cublasLt CUDA::cudart
-                      unfused_attention_kernels
-                      memory_utils tensor cublasMMWrapper cuda_utils logger)
diff --git a/tests/csrc/unittests/gtest_utils.h b/tests/csrc/unittests/gtest_utils.h
deleted file mode 100644
index 8436e168101fe5d0e7b7ef3a14852b7dff530d9e..0000000000000000000000000000000000000000
--- a/tests/csrc/unittests/gtest_utils.h
+++ /dev/null
@@ -1,251 +0,0 @@
-#include <algorithm>   // std::fill_n
-#include <iostream>    // snprintf
-#include <math.h>      // expf, log
-#include <stdlib.h>    // rand
-#include <string>      // std::string
-#include <vector>      // std::vector
-
-#include <cuda_runtime.h>
-#include <gtest/gtest.h>
-
-#include "src/turbomind/utils/allocator.h"
-#include "src/turbomind/utils/memory_utils.h"
-#include "src/turbomind/utils/Tensor.h"
-#include "src/turbomind/utils/logger.h"
-
-namespace ft = turbomind;
-
-namespace {
-
-#define EPSILON (1e-20)
-
-bool almostEqual(float a, float b, float atol = 1e-5, float rtol = 1e-8)
-{
-    // Params: a = value to compare and b = reference
-    // This function follows implementation of numpy.isclose(), which checks
-    //   abs(a - b) <= (atol + rtol * abs(b)).
-    // Note that the inequality above is asymmetric where b is considered as
-    // a reference value. To account into both absolute/relative errors, it
-    // uses absolute tolerance and relative tolerance at the same time. The
-    // default values of atol and rtol borrowed from numpy.isclose(). For the
-    // case of nan value, the result will be true.
-    if (isnan(a) && isnan(b)) {
-        return true;
-    }
-    return fabs(a - b) <= (atol + rtol * fabs(b));
-}
-
-template<typename T>
-bool checkResult(std::string name, T* out, T*ref, size_t size, float atol, float rtol) {
-    size_t failures = 0;
-    float relative_gap = 0.0f;;
-
-    for (size_t i = 0; i < size; ++i) {
-        // The values for the output and the reference.
-        float a = (float)out[i];
-        float b = (float)ref[i];
-
-        bool ok = almostEqual(a, b, atol, rtol);
-        // Print the error.
-        if (!ok && failures < 4) {
-            TM_LOG_ERROR(">> invalid result for i=%lu:", i);
-            TM_LOG_ERROR(">>    found......: %10.6f", a);
-            TM_LOG_ERROR(">>    expected...: %10.6f", b);
-            TM_LOG_ERROR(">>    error......: %.6f", fabsf(a - b));
-            TM_LOG_ERROR(">>    tol........: %.6f", atol + rtol * fabs(b));
-        }
-        // Update the number of failures.
-        failures += ok ? 0 : 1;
-        // Update the relative gap.
-        relative_gap += fabsf(a - b) / (fabsf(b) + EPSILON);
-    }
-
-    relative_gap /= size;
-
-    // Allow not matched up to 1% elements.
-    size_t tol_failures = (size_t)(0.01 * size);
-    if (failures > tol_failures) {
-        TM_LOG_ERROR("%s (failures: %.2f%% atol: %.2e rtol: %.2e rel_gap: %.2e%%)",
-                     name.c_str(), 100. * failures / size, atol, rtol, 100. * relative_gap);
-    }
-    return failures <= tol_failures;
-}
-
-template<typename T>
-bool checkResult(std::string name, T* out, T* ref, size_t size,
-                 bool device_out = true, bool device_ref = false)
-{
-    bool is_fp32 = sizeof(T) == 4;
-    float atol = is_fp32 ? 1e-4f : 1e-3f;
-    float rtol = is_fp32 ? 1e-2f : 1e-1f;
-
-    T* h_out = nullptr;
-    if (device_out) {
-        h_out = new T[size];
-        cudaMemcpy(h_out, out, sizeof(T) * size, cudaMemcpyDeviceToHost);
-        out = h_out;
-    }
-    T* h_ref = nullptr;
-    if (device_ref) {
-        h_ref = new T[size];
-        cudaMemcpy(h_ref, ref, sizeof(T) * size, cudaMemcpyDeviceToHost);
-        ref = h_ref;
-    }
-    bool is_ok = checkResult(name, out, ref, size, atol, rtol);
-    if (h_out != nullptr){
-        delete[] h_out;
-    }
-    if (h_ref != nullptr) {
-        delete[] h_ref;
-    }
-    return is_ok;
-}
-
-template<typename T>
-void initRandom(T* ptr, size_t size, float minval, float maxval) {
-    for (size_t i = 0; i < size; ++i) {
-        float val = static_cast<float>(rand()) / static_cast<float>(RAND_MAX);
-        val *= (maxval - minval);
-        ptr[i] = static_cast<T>(minval + val);
-    }
-}
-
-void initRandomInt(int* ptr, size_t size, int minval, int maxval) {
-    assert(minval < maxval);
-    int mod = maxval - minval;
-    for (size_t i = 0; i < size; ++i) {
-        ptr[i] = minval + rand() % mod;
-    }
-}
-
-template<typename T>
-void tile(T* x, int m, int n) {
-    for (int i = 1; i < m; ++i) {
-        for (int j = 0; j < n; ++j) {
-            x[i * n + j] = x[j];
-        }
-    }
-}
-
-template<typename T>
-void tile(T* dst, T* src, int m, int n) {
-    for (int i = 1; i < m; ++i) {
-        for (int j = 0; j < n; ++j) {
-            dst[i * n + j] = src[j];
-        }
-    }
-}
-
-// for the safe arithmetic functions in host.
-namespace math {
-template<typename T>
-inline T add(T a, T b)
-{
-    return static_cast<T>((float)a + (float)b);
-}
-
-template<typename T>
-inline T mul(T a, T b)
-{
-    return static_cast<T>((float)a * (float)b);
-}
-
-template<typename T>
-inline T fma(T a, T b, T c)
-{
-    return static_cast<T>((float)a * (float)b + (float)c);
-}
-}
-
-typedef testing::Types<float, half> FloatAndHalfTypes;
-#ifndef ENABLE_BF16
-typedef FloatAndHalfTypes SupportTypes;
-#else
-typedef testing::Types<float, half, __nv_bfloat16> FloatHalfBf16Types;
-typedef FloatHalfBf16Types SupportTypes;
-#endif
-
-class FtTestBase: public testing::Test {
-public:
-    void SetUp() override
-    {
-        int device = 0;
-        cudaGetDevice(&device);
-        cudaStreamCreate(&stream);
-        allocator = new ft::Allocator<ft::AllocatorType::CUDA>(device);
-        allocator->setStream(stream);
-    }
-
-    void TearDown() override
-    {
-        // Automatically allocated CPU buffers should be released at the end of a test.
-        // We don't need to care GPU buffers allocated by Allocator because they are
-        // managed by the allocator.
-        for (auto& buffer : allocated_cpu_buffers) {
-            free(buffer);
-        }
-        allocated_cpu_buffers.clear();
-        delete allocator;
-        cudaStreamDestroy(stream);
-    }
-
-protected:
-    cudaStream_t                            stream;
-    ft::Allocator<ft::AllocatorType::CUDA>* allocator;
-    std::vector<void*>                      allocated_cpu_buffers;
-
-    // Utilities to easily handle tensor instances in test cases.
-
-    ft::Tensor createTensor(const ft::MemoryType mtype,
-                            const ft::DataType dtype,
-                            const std::vector<size_t> shape)
-    {
-        size_t n_elmts  = std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<size_t>());
-        size_t buf_size = ft::Tensor::getTypeSize(dtype) * n_elmts;
-
-        void* data = nullptr;
-        if (mtype == ft::MEMORY_CPU || mtype == ft::MEMORY_CPU_PINNED) {
-            data = malloc(buf_size);
-            allocated_cpu_buffers.push_back(data);
-        }
-        else {
-            data = allocator->malloc(buf_size);
-        }
-        return ft::Tensor(mtype, dtype, shape, data);
-    };
-
-    template<typename T>
-    ft::Tensor toHost(ft::Tensor& device_tensor)
-    {
-        if (device_tensor.data == nullptr) {
-            return ft::Tensor();
-        }
-        ft::Tensor host_tensor = createTensor(ft::MEMORY_CPU, device_tensor.type, device_tensor.shape);
-        ft::cudaAutoCpy(host_tensor.getPtr<T>(), device_tensor.getPtr<T>(), host_tensor.size(), stream);
-        cudaStreamSynchronize(stream);
-        return host_tensor;
-    };
-
-    template<typename T>
-    ft::Tensor toDevice(ft::Tensor& host_tensor)
-    {
-        if (host_tensor.data == nullptr) {
-            return ft::Tensor();
-        }
-        ft::Tensor device_tensor = createTensor(ft::MEMORY_GPU, host_tensor.type, host_tensor.shape);
-        ft::cudaAutoCpy(device_tensor.getPtr<T>(), host_tensor.getPtr<T>(), host_tensor.size(), stream);
-        return device_tensor;
-    };
-
-    void copyTensor(ft::Tensor& dst, ft::Tensor& src)
-    {
-        FT_CHECK_WITH_INFO(
-            src.sizeBytes() == dst.sizeBytes(),
-            ft::fmtstr("src and dst has different size (%ld != %ld)", src.sizeBytes(), dst.sizeBytes()));
-        ft::cudaAutoCpy(dst.getPtr<char>(), src.getPtr<char>(), src.sizeBytes(), stream);
-        cudaStreamSynchronize(stream);
-    }
-
-};
-
-}
diff --git a/tests/csrc/unittests/test_attention_kernels.cu b/tests/csrc/unittests/test_attention_kernels.cu
deleted file mode 100644
index 54a40452274408180e45524b5ee873b41692cf9f..0000000000000000000000000000000000000000
--- a/tests/csrc/unittests/test_attention_kernels.cu
+++ /dev/null
@@ -1,398 +0,0 @@
-/*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "gtest_utils.h"
-#include "src/turbomind/kernels/gpt_kernels.h"
-#include "src/turbomind/kernels/unfused_attention_kernels.h"
-#include "src/turbomind/utils/Tensor.h"
-#include "src/turbomind/utils/memory_utils.h"
-#include "src/turbomind/utils/nccl_utils.h"
-
-#include <curand.h>
-#include <sstream>
-#include <stdexcept>
-#include <vector>
-
-using namespace turbomind;
-
-namespace {
-
-struct AttentionKernelTestParam {
-    size_t batch_size    = 4;
-    size_t q_length      = 32;
-    size_t k_length      = 32;
-    size_t head_num      = 4;
-    size_t size_per_head = 32;
-
-    bool   use_fp32_qk_buf      = false;
-    size_t rotary_embedding_dim = 0;
-    bool   neox_rotary_style    = false;
-
-    float q_scaling = 1.0f;
-};
-
-namespace utils {
-
-#define CHECK_CURAND(cmd)                                                                                              \
-    do {                                                                                                               \
-        curandStatus_t err = cmd;                                                                                      \
-        if (err != CURAND_STATUS_SUCCESS) {                                                                            \
-            throw std::runtime_error(fmtstr("[TM][ERROR] curand runtime error: %d", err));                             \
-        }                                                                                                              \
-    } while (0)
-
-__global__ void convert_and_copy(half* dst, const float* src, const size_t size)
-{
-    for (int idx = threadIdx.x + blockIdx.x * blockDim.x; idx < size; idx += blockDim.x * gridDim.x) {
-        dst[idx] = __float2half(src[idx]);
-    }
-}
-
-#ifdef ENABLE_BF16
-__global__ void convert_and_copy(__nv_bfloat16* dst, const float* src, const size_t size)
-{
-    for (int idx = threadIdx.x + blockIdx.x * blockDim.x; idx < size; idx += blockDim.x * gridDim.x) {
-        dst[idx] = __float2bfloat16(src[idx]);
-    }
-}
-#endif
-
-template<typename T>
-void normal(curandGenerator_t curng, T* buf, size_t size, float mean, float stddev)
-{
-    float* tmp_buf = nullptr;
-    deviceMalloc(&tmp_buf, size);
-
-    // Generate random values in float data type.
-    CHECK_CURAND(curandGenerateNormal(curng, tmp_buf, size / 2, mean, stddev));
-    sync_check_cuda_error();
-
-    // Convert and copy to the output buffer if it is not of type float.
-    dim3 block(512);
-    dim3 grid(min(static_cast<int>((size + block.x - 1) / block.x), 256));
-    convert_and_copy<<<grid, block>>>(buf, tmp_buf, size);
-    cudaDeviceSynchronize();
-
-    deviceFree(tmp_buf);
-    sync_check_cuda_error();
-}
-
-template<>
-void normal(curandGenerator_t curng, float* buf, size_t size, float mean, float stddev)
-{
-    // Generate random values in float data type.
-    CHECK_CURAND(curandGenerateNormal(curng, buf, size / 2, mean, stddev));
-    sync_check_cuda_error();
-}
-
-template<typename T>
-void normal(curandGenerator_t curng, Tensor& tensor, float mean = 0.0f, float stddev = 1.0f)
-{
-    if (tensor.size() > 0) {
-        FT_CHECK(tensor.type == getTensorType<T>());
-        normal(curng, tensor.getPtr<T>(), tensor.size(), mean, stddev);
-    }
-}
-
-__host__ uint32_t pow2_rounddown(uint32_t x)
-{
-    x |= x >> 1;
-    x |= x >> 2;
-    x |= x >> 4;
-    x |= x >> 8;
-    x |= x >> 16;
-    x >>= 1;
-    return x + 1;
-}
-
-}  // namespace utils
-
-////////////////////////////
-// Reference computation.
-////////////////////////////
-
-template<typename T>
-inline T safe_add_bias(const T v, const T* bias, const size_t bias_idx)
-{
-    return bias == nullptr ? v : ::math::add(v, bias[bias_idx]);
-}
-
-template<typename T>
-void computeQkSoftmax(T*           attn_score,
-                      const T*     qk,
-                      const T*     attn_mask,
-                      const T*     pos_bias,
-                      const size_t batch_size,
-                      const size_t num_heads,
-                      const size_t q_length,
-                      const size_t k_length,
-                      const T      qk_scale)
-{
-    // attn_score [batch_size, num_heads, q_length, k_length]
-    // qk         [batch_size, num_heads, q_length, k_length]
-    // attn_mask  [batch_size, 1, q_length, k_length]
-    // pos_bias   [1, num_heads, q_length, k_length]
-
-    // batch, head index.
-    for (size_t bhi = 0; bhi < batch_size * num_heads; ++bhi) {
-        size_t bi = bhi / num_heads;  // batch index.
-        size_t hi = bhi % num_heads;  // head index.
-        // The attention mask of the current batch.
-        const T* mask = &attn_mask[bi * q_length * k_length];
-        // The position bias of the current head.
-        const T* head_pos_bias = pos_bias != nullptr ? &pos_bias[hi * q_length * k_length] : nullptr;
-
-        for (size_t qi = 0; qi < q_length; ++qi) {
-            float maxval = -FLT_MAX;
-            for (size_t ki = 0; ki < k_length; ++ki) {
-                size_t qk_idx = qi * k_length + ki;
-                if (int(mask[qk_idx]) > 0) {  // mask = 0 or 1.
-                    float val = (float)safe_add_bias(::math::mul(qk_scale, qk[qk_idx]), head_pos_bias, qk_idx);
-                    if (val > maxval) {
-                        maxval = val;
-                    }
-                }
-            }
-            float sum = 0.0f;
-            for (size_t ki = 0; ki < k_length; ++ki) {
-                size_t qk_idx = qi * k_length + ki;
-                if (int(mask[qk_idx]) > 0) {  // mask = 0 or 1.
-                    float val = (float)safe_add_bias(::math::mul(qk_scale, qk[qk_idx]), head_pos_bias, qk_idx);
-                    sum += expf(val - maxval);
-                }
-            }
-            for (size_t ki = 0; ki < k_length; ++ki) {
-                size_t qk_idx = qi * k_length + ki;
-                if (int(mask[qk_idx]) > 0) {  // mask = 0 or 1.
-                    float val          = (float)safe_add_bias(::math::mul(qk_scale, qk[qk_idx]), head_pos_bias, qk_idx);
-                    attn_score[qk_idx] = static_cast<T>(expf(val - maxval) / (sum + EPSILON));
-                }
-                else {
-                    attn_score[qk_idx] = T(0.0f);
-                }
-            }
-        }
-
-        // Move the data pointers to the next.
-        attn_score += q_length * k_length;
-        qk += q_length * k_length;
-    }
-}
-
-template<typename T>
-class AttentionKernelTest: public FtTestBase {
-
-private:
-    using FtTestBase::stream;
-    using FtTestBase::allocator;
-
-    unsigned long long seed = 31;
-    curandGenerator_t  curng;
-
-    Tensor randomAttentionMask(const std::vector<size_t> shape)
-    {
-        // shape (batch_size, 1, max_input_length, max_input_length + max_prompt_length)
-
-        // Create a attention mask tensor and buffer.
-        Tensor attn_mask = createTensor(MEMORY_GPU, getTensorType<T>(), shape);
-
-        // Set the mask values.
-        size_t batch_size   = shape[0];
-        size_t max_q_length = shape[2];
-        size_t max_k_length = shape[3];
-        // TODO: Enable prompts.
-        size_t max_prompt_length = max_k_length - max_q_length;
-
-        Tensor h_seq_lengths    = createTensor(MEMORY_CPU, TYPE_INT32, {batch_size});
-        Tensor h_prompt_lengths = createTensor(MEMORY_CPU, TYPE_INT32, {batch_size});
-        initRandomInt(h_seq_lengths.getPtr<int>(), batch_size, max_q_length, max_q_length + 1);
-        initRandomInt(h_prompt_lengths.getPtr<int>(), batch_size, 0, max_prompt_length + 1);
-
-        Tensor d_seq_lengths    = createTensor(MEMORY_GPU, TYPE_INT32, {batch_size});
-        Tensor d_prompt_lengths = createTensor(MEMORY_GPU, TYPE_INT32, {batch_size});
-        copyTensor(d_seq_lengths, h_seq_lengths);
-        copyTensor(d_prompt_lengths, h_prompt_lengths);
-
-        // Used gpt_kernels function to build attention mask.
-        invokeBuildDecoderAttentionMask(attn_mask.getPtr<T>(),
-                                        d_seq_lengths.getPtr<int>(),
-                                        d_prompt_lengths.getPtr<int>(),
-                                        batch_size,
-                                        max_q_length,
-                                        max_prompt_length,
-                                        stream);
-        sync_check_cuda_error();
-        return attn_mask;
-    };
-
-public:
-    void SetUp() override
-    {
-        FtTestBase::SetUp();
-        CHECK_CURAND(curandCreateGenerator(&curng, CURAND_RNG_PSEUDO_DEFAULT));
-        CHECK_CURAND(curandSetPseudoRandomGeneratorSeed(curng, seed));
-    }
-
-    void TearDown() override
-    {
-        curandDestroyGenerator(curng);
-        FtTestBase::TearDown();
-    }
-
-    void runTestMaskedSoftmax(AttentionKernelTestParam param, bool is_benchmark = false)
-    {
-        DataType dtype = getTensorType<T>();
-
-        std::vector<size_t> qk_shape{param.batch_size, param.head_num, param.q_length, param.k_length};
-
-        bool use_fp32_qk = param.use_fp32_qk_buf && dtype != TYPE_FP32;
-
-        Tensor qk        = createTensor(MEMORY_GPU, dtype, qk_shape);
-        Tensor qk_fp32   = use_fp32_qk ? createTensor(MEMORY_GPU, TYPE_FP32, qk_shape) : Tensor();
-        Tensor attn_mask = randomAttentionMask({param.batch_size, 1, param.q_length, param.k_length});
-        // Input random initialization
-        if (param.use_fp32_qk_buf && dtype != TYPE_FP32) {
-            utils::normal<float>(curng, qk_fp32);
-        }
-        else {
-            utils::normal<T>(curng, qk);
-        }
-
-        // Clone to host for reference computation if needed.
-        Tensor h_qk        = is_benchmark ? Tensor() : toHost<T>(qk);
-        Tensor h_attn_mask = is_benchmark ? Tensor() : toHost<T>(attn_mask);
-        Tensor h_qk_fp32   = is_benchmark ? Tensor() : toHost<float>(qk_fp32);
-
-        T scale = static_cast<T>(1 / sqrtf(param.size_per_head * 1.0f));
-
-        if (param.use_fp32_qk_buf && dtype != TYPE_FP32) {
-            MaskedSoftmaxParam<T, float> softmax_param;
-            softmax_param.attention_score = qk.getPtr<T>();
-            softmax_param.qk              = qk_fp32.getPtr<float>();
-            softmax_param.attention_mask  = attn_mask.getPtr<T>();
-            softmax_param.batch_size      = param.batch_size;
-            softmax_param.num_heads       = param.head_num;
-            softmax_param.q_length        = param.q_length;
-            softmax_param.k_length        = param.k_length;
-            softmax_param.qk_scale        = scale;
-            invokeMaskedSoftmax(softmax_param, stream);
-            sync_check_cuda_error();
-        }
-        else {
-            MaskedSoftmaxParam<T, T> softmax_param;
-            softmax_param.attention_score = qk.getPtr<T>();
-            softmax_param.qk              = qk.getPtr<T>();
-            softmax_param.attention_mask  = attn_mask.getPtr<T>();
-            softmax_param.batch_size      = param.batch_size;
-            softmax_param.num_heads       = param.head_num;
-            softmax_param.q_length        = param.q_length;
-            softmax_param.k_length        = param.k_length;
-            softmax_param.qk_scale        = scale;
-            invokeMaskedSoftmax(softmax_param, stream);
-            sync_check_cuda_error();
-        }
-
-        if (!is_benchmark) {
-            if (use_fp32_qk) {
-                computeQkSoftmax(h_qk.getPtr<T>(),
-                                 h_qk_fp32.getPtr<T>(),
-                                 h_attn_mask.getPtr<T>(),
-                                 (T*)nullptr,
-                                 param.batch_size,
-                                 param.head_num,
-                                 param.q_length,
-                                 param.k_length,
-                                 scale);
-            }
-            else {
-                computeQkSoftmax(h_qk.getPtr<T>(),
-                                 h_qk.getPtr<T>(),
-                                 h_attn_mask.getPtr<T>(),
-                                 (T*)nullptr,
-                                 param.batch_size,
-                                 param.head_num,
-                                 param.q_length,
-                                 param.k_length,
-                                 scale);
-            }
-            bool passed = checkResult("MaskedSoftmax", qk.getPtr<T>(), h_qk.getPtr<T>(), qk.size());
-            EXPECT_TRUE(passed);
-        }
-    }
-};
-
-TYPED_TEST_SUITE(AttentionKernelTest, SupportTypes);
-
-TYPED_TEST(AttentionKernelTest, MaskedSoftmax_NoPrompt)
-{
-    this->runTestMaskedSoftmax({1, 12, 12, 1, 32, false, 0, false});
-}
-
-TYPED_TEST(AttentionKernelTest, MaskedSoftmax_NoPrompt2)
-{
-    // q_length is not multiple of 4.
-    this->runTestMaskedSoftmax({1, 11, 11, 4, 32, false, 0, false});
-}
-
-TYPED_TEST(AttentionKernelTest, MaskedSoftmax_HasPrompt)
-{
-    this->runTestMaskedSoftmax({1, 12, 24, 2, 32, false, 0, false});
-}
-
-TYPED_TEST(AttentionKernelTest, MaskedSoftmax_HasPrompt2)
-{
-    this->runTestMaskedSoftmax({1, 11, 24, 2, 32, false, 0, false});
-}
-
-TYPED_TEST(AttentionKernelTest, MaskedSoftmax_LongSequence1024)
-{
-    this->runTestMaskedSoftmax({1, 12, 1024, 2, 32, false, 0, false});
-}
-
-TYPED_TEST(AttentionKernelTest, MaskedSoftmax_LongSequence2048)
-{
-    this->runTestMaskedSoftmax({1, 12, 2048, 2, 32, false, 0, false});
-}
-
-TYPED_TEST(AttentionKernelTest, MaskedSoftmax_LongSequence3072)
-{
-    this->runTestMaskedSoftmax({1, 12, 3072, 2, 32, false, 0, false});
-}
-
-TYPED_TEST(AttentionKernelTest, MaskedSoftmax_LongSequence4096)
-{
-    this->runTestMaskedSoftmax({1, 12, 4096, 2, 32, false, 0, false});
-}
-
-TYPED_TEST(AttentionKernelTest, Benchmark_MaskedSoftmax_LongSequence1024)
-{
-    // Assume the bloom 176B model with 8 TP.
-    this->runTestMaskedSoftmax({8, 1024, 1024, 14, 128, false, 0, false, true}, true);
-}
-
-TYPED_TEST(AttentionKernelTest, Benchmark_MaskedSoftmax_LongSequence2048)
-{
-    // Assume the bloom 176B model with 8 TP.
-    this->runTestMaskedSoftmax({8, 2048, 2048, 14, 128, false, 0, false, true}, true);
-}
-
-TYPED_TEST(AttentionKernelTest, Benchmark_MaskedSoftmax_LongSequence4096)
-{
-    // Assume the bloom 176B model with 8 TP.
-    this->runTestMaskedSoftmax({8, 4096, 4096, 14, 128, false, 0, false, true}, true);
-}
-
-}  // end of namespace
diff --git a/tests/csrc/unittests/test_context_attention_layer.cu b/tests/csrc/unittests/test_context_attention_layer.cu
deleted file mode 100644
index 87693de34d919c61d9ed65c05e12f86465e61959..0000000000000000000000000000000000000000
--- a/tests/csrc/unittests/test_context_attention_layer.cu
+++ /dev/null
@@ -1,388 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved.
-
-#include <assert.h>
-#include <cstdlib>
-#include <math.h>
-#include <numeric>
-#include <random>
-#include <thrust/device_vector.h>
-#include <thrust/execution_policy.h>
-#include <thrust/host_vector.h>
-#include <thrust/transform.h>
-
-#undef TORCH_CUDA
-
-#include "src/turbomind/kernels/bert_preprocess_kernels.h"
-#include "src/turbomind/kernels/unfused_attention_kernels.h"
-#include "src/turbomind/models/llama/llama_kernels.h"
-#include "src/turbomind/utils/allocator.h"
-#include "src/turbomind/utils/cublasMMWrapper.h"
-#include "src/turbomind/utils/cuda_utils.h"
-#include "src/turbomind/utils/logger.h"
-#include "src/turbomind/utils/memory_utils.h"
-#include "unittest_utils.h"
-
-using namespace turbomind;
-
-template<typename scalar_t>
-__global__ void pad_query_kernel(
-    scalar_t* query_ptr, const int* cu_seqlens, int batch_size, int batch_stride, int seq_stride, int max_seq_length)
-{
-    int batch_id = blockIdx.x;
-    int seqlen   = cu_seqlens[batch_id + 1] - cu_seqlens[batch_id];
-
-    query_ptr += batch_id * batch_stride;
-    for (int tid = threadIdx.x; tid < batch_stride; tid += blockDim.x) {
-        int seq_id = (tid / seq_stride) % max_seq_length;
-        if (seq_id >= seqlen) {
-            query_ptr[tid] = scalar_t(0.0f);
-        }
-    }
-}
-
-template<typename scalar_t>
-void pad_query(scalar_t*    query_ptr,
-               const int*   cu_seqlens,
-               int          batch_size,
-               int          batch_stride,
-               int          seq_stride,
-               int          max_seq_length,
-               cudaStream_t stream)
-{
-    pad_query_kernel<<<batch_size, 512, 0, stream>>>(
-        query_ptr, cu_seqlens, batch_size, batch_stride, seq_stride, max_seq_length);
-}
-
-template<typename scalar_t>
-__global__ void
-pad_out_kernel(scalar_t* out_ptr, const int* cu_seqlens, int batch_size, int batch_stride, int seq_stride)
-{
-    int seqlen = cu_seqlens[batch_size];
-
-    for (int tid = blockIdx.x * blockDim.x + threadIdx.x; tid < batch_size * batch_stride;
-         tid += blockDim.x * gridDim.x) {
-        int seq_id = (tid / seq_stride);
-        if (seq_id >= seqlen) {
-            out_ptr[tid] = scalar_t(0.0f);
-        }
-    }
-}
-
-template<typename scalar_t>
-void pad_out(
-    scalar_t* out_ptr, const int* cu_seqlens, int batch_size, int batch_stride, int seq_stride, cudaStream_t stream)
-{
-    pad_out_kernel<<<batch_size, 512, 0, stream>>>(out_ptr, cu_seqlens, batch_size, batch_stride, seq_stride);
-}
-
-template<typename scalar_t>
-void naive_mha(scalar_t*        out_ptr,
-               scalar_t*        query_ptr,
-               scalar_t*        key_ptr,
-               scalar_t*        val_ptr,
-               scalar_t*        mask_ptr,
-               scalar_t*        q_buf_ptr,
-               scalar_t*        k_buf_ptr,
-               scalar_t*        v_buf_ptr,
-               scalar_t*        qk_buf_ptr,
-               scalar_t*        out_buf_ptr,
-               int*             padding_offset,
-               int*             cu_seqlens,
-               int              batch_size,
-               int              head_num,
-               int              key_len,
-               int              seq_len,
-               int              size_per_head,
-               cudaStream_t     stream,
-               cublasMMWrapper* cublas_wrapper_)
-{
-    const scalar_t qk_scale = static_cast<scalar_t>(1.f / sqrtf(size_per_head * 1.f));
-    // create
-
-    //////////////////////////////////////////////
-    /// Q,K,V
-    /// transpose <B,s,h,D> -> <B,h,s,D>
-    /// TODO: remove padding
-    // invokeTransposeQKV(q_buf_ptr,
-    //                    query_ptr,
-    //                    batch_size,
-    //                    head_num,
-    //                    seq_len,
-    //                    size_per_head,
-    //                    nullptr,  // scale, only used in int8 mode
-    //                    0,        // int8_mode
-    //                    stream);
-    // invokeTransposeQKV(k_buf_ptr,
-    //                    key_ptr,
-    //                    batch_size,
-    //                    head_num,
-    //                    key_len,
-    //                    size_per_head,
-    //                    nullptr,  // scale, only used in int8 mode
-    //                    0,        // int8_mode
-    //                    stream);
-    // invokeTransposeQKV(v_buf_ptr,
-    //                    val_ptr,
-    //                    batch_size,
-    //                    head_num,
-    //                    key_len,
-    //                    size_per_head,
-    //                    nullptr,  // scale, only used in int8 mode
-    //                    0,        // int8_mode
-    //                    stream);
-
-    q_buf_ptr = query_ptr;
-    k_buf_ptr = key_ptr;
-    v_buf_ptr = val_ptr;
-
-    //////////////////////////////////////////////
-    /// Q*K batch gemm
-    /// -> [B, H, s, t + s]
-    cublas_wrapper_->stridedBatchedGemm(CUBLAS_OP_T,
-                                        CUBLAS_OP_N,
-                                        key_len,                  // m
-                                        seq_len,                  // n
-                                        size_per_head,            // k
-                                        k_buf_ptr,                // A
-                                        size_per_head,            // lda
-                                        key_len * size_per_head,  // strideA
-                                        q_buf_ptr,                // B
-                                        size_per_head,            // ldb
-                                        seq_len * size_per_head,  // strideB
-                                        qk_buf_ptr,               // C
-                                        key_len,                  // ldc
-                                        seq_len * key_len,        // strideC
-                                        batch_size * head_num);   // batchCount
-
-    //////////////////////////////////////////////
-    /// ! masked softmax (kernel asserts k_length <= 4096)
-    MaskedSoftmaxParam<scalar_t, scalar_t> param{};
-    param.attention_score    = qk_buf_ptr;
-    param.qk                 = qk_buf_ptr;
-    param.attention_mask     = mask_ptr;
-    param.batch_size         = batch_size;
-    param.q_length           = seq_len;
-    param.k_length           = key_len;
-    param.num_heads          = head_num;
-    param.qk_scale           = qk_scale;
-    param.linear_bias_slopes = nullptr;
-    invokeMaskedSoftmax(param, stream);
-
-    //////////////////////////////////////////////
-    /// softmax(QK)*V batch gemm
-    cublas_wrapper_->stridedBatchedGemm(CUBLAS_OP_N,
-                                        CUBLAS_OP_N,
-                                        size_per_head,            // m
-                                        seq_len,                  // n
-                                        key_len,                  // k
-                                        v_buf_ptr,                // A
-                                        size_per_head,            // lda
-                                        key_len * size_per_head,  // strideA,
-                                        qk_buf_ptr,               // B
-                                        key_len,                  // ldb
-                                        key_len * seq_len,        // strideB
-                                        out_buf_ptr,              // C
-                                        size_per_head,            // ldc,
-                                        seq_len * size_per_head,  // strideC
-                                        batch_size * head_num);   // batchCount
-
-    //////////////////////////////////////////////
-    /// transpose <B,h,s,D> -> <B,s,h,D>
-    int num_token = batch_size * seq_len;
-    invokeTransposeAttentionOutRemovePadding(out_buf_ptr,
-                                             out_ptr,
-                                             num_token,
-                                             batch_size,
-                                             seq_len,
-                                             head_num,
-                                             size_per_head,
-                                             padding_offset,
-                                             nullptr,
-                                             0,
-                                             stream);
-
-    pad_out(out_ptr, cu_seqlens, batch_size, head_num * seq_len * size_per_head, head_num * size_per_head, stream);
-}
-
-static const char* usage = "Usage: %s <batch-size> <num-heads> <key-len> <query-len> <size-per-head>\n"
-                           "Example: $test_context_attention_layer 2, 8, 1024, 512, 128\n";
-
-int main(int argc, const char* argv[])
-{
-    using namespace turbomind;
-    using scalar_t                            = half;
-    static const cudaDataType_t kCudaDataType = std::is_same<scalar_t, half>::value ? CUDA_R_16F : CUDA_R_32F;
-
-    Logger::getLogger().setLevel(Logger::INFO);
-
-    if (argc != 6) {
-        printf(usage, argv[0]);
-        return EXIT_FAILURE;
-    }
-
-    // First create an instance of an engine.
-    std::random_device rnd_device;
-    // Specify the engine and distribution.
-    std::mt19937 mersenne_engine{rnd_device()};  // Generates random integers
-
-    int batch_size    = atoi(argv[1]);
-    int num_heads     = atoi(argv[2]);
-    int key_len       = atoi(argv[3]);
-    int seq_len       = atoi(argv[4]);
-    int size_per_head = atoi(argv[5]);
-
-    // Create stream and handle
-    cudaStream_t     stream;
-    cublasHandle_t   cublas_handle;
-    cublasLtHandle_t cublaslt_handle;
-    cudaStreamCreate(&stream);
-    cublasCreate(&cublas_handle);
-    cublasLtCreate(&cublaslt_handle);
-    cublasSetStream(cublas_handle, stream);
-
-    cublasAlgoMap* cublas_algo_map = new cublasAlgoMap("gemm_config.in");
-
-    Allocator<AllocatorType::CUDA> allocator(getDevice());
-    allocator.setStream(stream);
-    std::mutex*     cublas_wrapper_mutex = new std::mutex();
-    cublasMMWrapper cublas_wrapper =
-        cublasMMWrapper(cublas_handle, cublaslt_handle, stream, cublas_algo_map, cublas_wrapper_mutex, &allocator);
-    cublas_wrapper.setGemmConfig(kCudaDataType, kCudaDataType, kCudaDataType, kCudaDataType);
-
-    // initialize device
-    scalar_t* query_ptr =
-        (scalar_t*)allocator.malloc(batch_size * num_heads * seq_len * size_per_head * sizeof(scalar_t));
-    scalar_t* key_ptr =
-        (scalar_t*)allocator.malloc(batch_size * num_heads * key_len * size_per_head * sizeof(scalar_t));
-    scalar_t* val_ptr =
-        (scalar_t*)allocator.malloc(batch_size * num_heads * key_len * size_per_head * sizeof(scalar_t));
-    scalar_t* mask_ptr = (scalar_t*)allocator.malloc(batch_size * seq_len * key_len * sizeof(scalar_t));
-    scalar_t* expect_out_ptr =
-        (scalar_t*)allocator.malloc(batch_size * num_heads * seq_len * size_per_head * sizeof(scalar_t), true);
-    scalar_t* actual_out_ptr =
-        (scalar_t*)allocator.malloc(batch_size * num_heads * seq_len * size_per_head * sizeof(scalar_t), true);
-    scalar_t* q_buf_ptr =
-        (scalar_t*)allocator.malloc(batch_size * num_heads * seq_len * size_per_head * sizeof(scalar_t), true);
-    scalar_t* k_buf_ptr =
-        (scalar_t*)allocator.malloc(batch_size * num_heads * key_len * size_per_head * sizeof(scalar_t), true);
-    scalar_t* v_buf_ptr =
-        (scalar_t*)allocator.malloc(batch_size * num_heads * key_len * size_per_head * sizeof(scalar_t), true);
-    scalar_t* qk_buf_ptr =
-        (scalar_t*)allocator.malloc(batch_size * num_heads * seq_len * key_len * sizeof(scalar_t), true);
-    scalar_t* out_buf_ptr =
-        (scalar_t*)allocator.malloc(batch_size * num_heads * seq_len * size_per_head * sizeof(scalar_t), true);
-
-    auto* h_pinned_token_num_ptr = (size_t*)allocator.malloc(sizeof(size_t), true);
-    auto* padding_offset_ptr     = (int*)allocator.malloc(sizeof(int) * batch_size * seq_len, false);
-    auto* cu_seqlens_ptr         = (int*)allocator.malloc(sizeof(int) * (batch_size + 1), false);
-    // auto* input_lengths  = (int*)allocator.malloc(sizeof(int) * batch_size, false);
-    thrust::device_vector<int> input_lengths(batch_size);
-    thrust::host_vector<int>   input_lengths_host(batch_size);
-    thrust::device_vector<int> kv_lengths(batch_size);
-    thrust::host_vector<int>   kv_lengths_host(batch_size);
-
-    cudaRandomUniform<scalar_t>(query_ptr, batch_size * num_heads * seq_len * size_per_head);
-    cudaRandomUniform<scalar_t>(key_ptr, batch_size * num_heads * key_len * size_per_head);
-    cudaRandomUniform<scalar_t>(val_ptr, batch_size * num_heads * key_len * size_per_head);
-    cudaRandomUniform<scalar_t>(mask_ptr, batch_size * seq_len * key_len);
-
-    // create random length for batch
-    {
-        std::uniform_int_distribution<int> dist{seq_len / 2, seq_len};
-        auto                               gen = [&dist, &mersenne_engine]() { return dist(mersenne_engine); };
-        std::generate(begin(input_lengths_host), end(input_lengths_host), gen);
-        thrust::copy(input_lengths_host.begin(), input_lengths_host.end(), input_lengths.begin());
-    }
-    size_t  h_token_num = 0;
-    size_t* h_pinned_token_num;
-    auto    input_lengths_ptr = thrust::raw_pointer_cast(input_lengths.data());
-    cudaMallocHost((void**)&h_pinned_token_num, sizeof(size_t));
-    invokeGetPaddingOffsetAndCuSeqLens(h_pinned_token_num,
-                                       &h_token_num,
-                                       padding_offset_ptr,
-                                       cu_seqlens_ptr,
-                                       input_lengths_ptr,
-                                       batch_size,
-                                       seq_len,
-                                       stream);
-    cudaFreeHost((void*)h_pinned_token_num);
-
-    {
-        std::uniform_int_distribution<int> dist{seq_len, key_len};
-        auto                               gen = [&dist, &mersenne_engine]() { return dist(mersenne_engine); };
-        std::generate(begin(kv_lengths_host), end(kv_lengths_host), gen);
-        thrust::copy(kv_lengths_host.begin(), kv_lengths_host.end(), kv_lengths.begin());
-    }
-    auto kv_lengths_ptr = thrust::raw_pointer_cast(kv_lengths.data());
-    // deviceFill(kv_lengths_ptr, batch_size, key_len, stream);
-
-    invokeCreateCausalMasks(mask_ptr, input_lengths_ptr, kv_lengths_ptr, seq_len, key_len, batch_size, stream);
-    // deviceFill(mask_ptr, batch_size*key_len*seq_len, scalar_t(1), stream);
-
-    // compute gt
-    naive_mha<scalar_t>(expect_out_ptr,
-                        query_ptr,
-                        key_ptr,
-                        val_ptr,
-                        mask_ptr,
-                        q_buf_ptr,
-                        k_buf_ptr,
-                        v_buf_ptr,
-                        qk_buf_ptr,
-                        out_buf_ptr,
-                        padding_offset_ptr,
-                        cu_seqlens_ptr,
-                        batch_size,
-                        num_heads,
-                        key_len,
-                        seq_len,
-                        size_per_head,
-                        stream,
-                        &cublas_wrapper);
-
-    // compute actual
-#ifdef _MSC_VER
-    static constexpr int FMHA_VERSION = 1;
-#else
-    static constexpr int FMHA_VERSION = 2;
-#endif
-    using AttentionOp = FlashAttentionOpImpl<scalar_t, FMHA_VERSION>;
-    using Layout      = typename AttentionOp::AttentionLayout;
-    Layout      layout_q{num_heads * seq_len * size_per_head, size_per_head, seq_len * size_per_head};
-    Layout      layout_k{num_heads * key_len * size_per_head, size_per_head, key_len * size_per_head};
-    Layout      layout_v{num_heads * key_len * size_per_head, size_per_head, key_len * size_per_head};
-    Layout      layout_o{num_heads * seq_len * size_per_head, num_heads * size_per_head, size_per_head, true};
-    AttentionOp flash_attention(batch_size, num_heads, key_len, seq_len, size_per_head);
-    float*      accum_buf_ptr = (float*)allocator.malloc(flash_attention.get_workspace_size(), true);
-
-    typename AttentionOp::Params attn_params{actual_out_ptr,
-                                             query_ptr,
-                                             key_ptr,
-                                             val_ptr,
-                                             mask_ptr,
-                                             accum_buf_ptr,
-                                             cu_seqlens_ptr,
-                                             nullptr,
-                                             nullptr,
-                                             kv_lengths_ptr,
-                                             1,
-                                             layout_q,
-                                             layout_k,
-                                             layout_v,
-                                             layout_o};
-    flash_attention(attn_params, stream);
-    sync_check_cuda_error();
-
-    int num_rows = 8;
-    // printf("query:\n");
-    // printMatrix(query_ptr, num_rows, 8, size_per_head, true);
-    // printf("expect:\n");
-    // printMatrix(expect_out_ptr, num_rows, 8, size_per_head, true);
-    // printf("actual:\n");
-    // printMatrix(actual_out_ptr, num_rows, 8, size_per_head, true);
-    checkResult(
-        "all close:", actual_out_ptr, expect_out_ptr, batch_size * num_heads * seq_len * size_per_head, true, true);
-
-    delete cublas_algo_map;
-    delete cublas_wrapper_mutex;
-}
diff --git a/tests/csrc/unittests/test_gemm.cu b/tests/csrc/unittests/test_gemm.cu
deleted file mode 100644
index be7fed531d8bd47fe9aaa09a7de4437317db93a2..0000000000000000000000000000000000000000
--- a/tests/csrc/unittests/test_gemm.cu
+++ /dev/null
@@ -1,1023 +0,0 @@
-#include <assert.h>
-#include <cublas_v2.h>
-#include <math.h>
-#include <numeric>
-#include <stdexcept>
-#include <tuple>
-#include <vector>
-
-#include "src/turbomind/layers/DenseWeight.h"
-#include "src/turbomind/utils/allocator.h"
-#include "src/turbomind/utils/cublasMMWrapper.h"
-#include "src/turbomind/utils/cuda_utils.h"
-#include "src/turbomind/utils/gemm.h"
-#include "src/turbomind/utils/logger.h"
-#include "src/turbomind/utils/memory_utils.h"
-
-using namespace turbomind;
-
-// Can be replaced by the function provided by a test framework
-
-class TestFailureError: public std::exception {
-private:
-    std::string msg_;
-
-public:
-    explicit TestFailureError() = default;
-    explicit TestFailureError(std::string name, std::string msg = "")
-    {
-        msg_ = fmtstr("TEST FAIL [%s] %s", name.c_str(), msg.c_str());
-    }
-    const char* what() const throw()
-    {
-        return msg_.c_str();
-    }
-};
-
-#define EXPECT_TRUE(cond)                                                                                              \
-    do {                                                                                                               \
-        if (!(cond)) {                                                                                                 \
-            TM_LOG_ERROR("TEST FAIL [%s] at %s:%d", __func__, __FILE__, __LINE__);                                     \
-            throw TestFailureError(__func__);                                                                          \
-        }                                                                                                              \
-    } while (false)
-
-#define EXPECT_ALMOST_EQUAL(name, dtype, ctype, out, ref)                                                              \
-    do {                                                                                                               \
-        bool is_ok = checkResult<dtype, ctype>(name, out, ref);                                                        \
-        if (!is_ok) {                                                                                                  \
-            TM_LOG_ERROR("TEST FAIL [%s] at %s:%d", __func__, __FILE__, __LINE__);                                     \
-            throw TestFailureError(__func__);                                                                          \
-        }                                                                                                              \
-    } while (false)
-
-////////////////////////////////////////////////////////////////////////////////////
-
-// TensorWrapper is to handle a tensor object as well as its memory buffer,
-// because tensor.data is const we cannot set values.
-class TensorWrapper {
-private:
-    IAllocator* allocator;
-
-public:
-    std::vector<size_t> shape;
-    DataType            type;
-    Tensor*             tensor;
-    void*               data;
-
-    TensorWrapper(IAllocator* allocator, DataType dtype, std::vector<size_t> shape, bool zero_init = false)
-    {
-        this->allocator = allocator;
-        this->type      = dtype;
-        this->shape     = shape;
-
-        size_t tensor_memsize = this->memsize();
-        this->data            = this->allocator->malloc(tensor_memsize, false);
-        if (zero_init) {
-            check_cuda_error(cudaMemset(data, 0x0, tensor_memsize));
-        }
-        else {
-            setRandomValues();
-        }
-        this->tensor = new Tensor(MEMORY_GPU, dtype, shape, data);
-    }
-
-    TensorWrapper(TensorWrapper const& other):
-        allocator(other.allocator), shape(other.shape), type(other.type), data(other.data), tensor(other.tensor)
-    {
-        TM_LOG_DEBUG("TensorWrapper copy: this=%p other=%p", data, other.data);
-    }
-    ~TensorWrapper()
-    {
-        delete tensor;
-        allocator->free((void**)(&data));
-    }
-
-    void setInvalidValues()
-    {
-        size_t type_size   = tensor->type == TYPE_FP32 ? sizeof(float) : sizeof(half);
-        size_t tensor_size = type_size * tensor->size();
-        // Fill by a random number to guarantee invalid values
-        check_cuda_error(cudaMemset(data, 0xdc, tensor_size));
-    }
-
-    void setRandomValues()
-    {
-        // random initialization
-        size_t num_elements = this->size();
-        switch (this->type) {
-            case TYPE_FP32:
-                cudaRandomUniform((float*)data, num_elements);
-                break;
-            case TYPE_FP16:
-                cudaRandomUniform((half*)data, num_elements);
-                break;
-            default:
-                // Will be added more if needed.
-                throw std::runtime_error("Not supported data type");
-        }
-    }
-
-    size_t size()
-    {
-        size_t n_elements = 1;
-        for (size_t s : this->shape) {
-            n_elements *= s;
-        }
-        return n_elements;
-    }
-
-    size_t memsize()
-    {
-        size_t type_size = 0;
-        switch (this->type) {
-            case TYPE_FP32:
-                type_size = sizeof(float);
-                break;
-            case TYPE_FP16:
-                type_size = sizeof(half);
-                break;
-            default:
-                throw std::runtime_error("Not supported data type.");
-        }
-        return type_size * this->size();
-    }
-};
-
-template<DataType computeType>
-void computeReference(GemmOp         transa,
-                      GemmOp         transb,
-                      TensorWrapper& C,
-                      TensorWrapper& A,
-                      TensorWrapper& B,
-                      float          alpha = 1.0f,
-                      float          beta  = 0.0f)
-{
-    size_t m = C.shape[0];
-    size_t n = C.shape[1];
-    size_t k = A.shape[1];
-
-    size_t lda = (transa == GEMM_OP_N) ? k : m;
-    size_t ldb = (transb == GEMM_OP_N) ? n : k;
-    size_t ldc = n;
-
-    cudaDataType_t atype        = (A.type == TYPE_FP16) ? CUDA_R_16F : CUDA_R_32F;
-    cudaDataType_t btype        = (B.type == TYPE_FP16) ? CUDA_R_16F : CUDA_R_32F;
-    cudaDataType_t ctype        = (C.type == TYPE_FP16) ? CUDA_R_16F : CUDA_R_32F;
-    cudaDataType_t compute_type = (computeType == TYPE_FP16) ? CUDA_R_16F : CUDA_R_32F;
-
-    cublasHandle_t cublas_handle;
-    check_cuda_error(cublasCreate(&cublas_handle));
-
-    half        h_alpha = (half)alpha;
-    half        h_beta  = (half)beta;
-    const void* _alpha  = (computeType == TYPE_FP16) ? (const void*)&h_alpha : (const void*)&alpha;
-    const void* _beta   = (computeType == TYPE_FP16) ? (const void*)&h_beta : (const void*)&beta;
-
-    check_cuda_error(cublasGemmEx(cublas_handle,
-                                  getCublasOperation(transb),
-                                  getCublasOperation(transa),
-                                  n,
-                                  m,
-                                  k,
-                                  _alpha,
-                                  (const void*)B.data,
-                                  btype,
-                                  ldb,
-                                  (const void*)A.data,
-                                  atype,
-                                  lda,
-                                  _beta,
-                                  (void*)C.data,
-                                  ctype,
-                                  ldc,
-                                  compute_type,
-                                  CUBLAS_GEMM_DEFAULT));
-    check_cuda_error(cublasDestroy(cublas_handle));
-    cudaDeviceSynchronize();
-}
-
-bool almostEqual(float a, float b, float atol = 1e-5, float rtol = 1e-8)
-{
-    // Params: a = value to compare and b = reference
-    // This function follows implementation of numpy.isclose(), which checks
-    //   abs(a - b) <= (atol + rtol * abs(b)).
-    // Note that the inequality above is asymmetric where b is considered as
-    // a reference value. To account into both absolute/relative errors, it
-    // uses absolute tolerance and relative tolerance at the same time. The
-    // default values of atol and rtol borrowed from numpy.isclose(). For the
-    // case of nan value, the result will be true.
-    if (isnan(a) && isnan(b)) {
-        return true;
-    }
-    return fabs(a - b) <= (atol + rtol * fabs(b));
-}
-
-template<typename T>
-bool _checkResult(std::string name, TensorWrapper& out, TensorWrapper& ref, float atol, float rtol)
-{
-    assert(out.type == ref.type);
-
-    size_t out_size = out.size();
-    size_t ref_size = ref.size();
-    T*     h_out    = reinterpret_cast<T*>(malloc(sizeof(T) * out_size));
-    T*     h_ref    = reinterpret_cast<T*>(malloc(sizeof(T) * ref_size));
-
-    cudaMemcpy(h_out, out.data, sizeof(T) * out_size, cudaMemcpyDeviceToHost);
-    cudaMemcpy(h_ref, ref.data, sizeof(T) * ref_size, cudaMemcpyDeviceToHost);
-    cudaDeviceSynchronize();
-
-    size_t failures = 0;
-    for (size_t i = 0; i < out_size; ++i) {
-        // The values for the output and the reference.
-        float a = (float)h_out[i];
-        float b = (float)h_ref[i];
-
-        bool ok = almostEqual(a, b, atol, rtol);
-        // Print the error.
-        if (!ok && failures < 4) {
-            TM_LOG_ERROR(">> invalid result for i=%lu:", i);
-            TM_LOG_ERROR(">>    found......: %10.6f", a);
-            TM_LOG_ERROR(">>    expected...: %10.6f", b);
-            TM_LOG_ERROR(">>    error......: %.6f", fabsf(a - b));
-            TM_LOG_ERROR(">>    tol........: %.6f", atol + rtol * fabs(b));
-        }
-
-        // Update the number of failures.
-        failures += ok ? 0 : 1;
-    }
-
-    // Allow not matched up to 1% elements.
-    size_t tol_failures = (size_t)(0.01 * out_size);
-    TM_LOG_INFO("check....... %30s : %s (failures: %.2f%% atol: %.2e rtol: %.2e)",
-                name.c_str(),
-                failures <= tol_failures ? "OK" : "FAILED",
-                100. * failures / out_size,
-                atol,
-                rtol);
-    return failures <= tol_failures;
-}
-
-template<typename T, DataType computeType>
-bool checkResult(std::string name, TensorWrapper& out, TensorWrapper& ref)
-{
-    float atol  = (computeType == TYPE_FP32) ? 1e-6f : 1e-3f;
-    float rtol  = (computeType == TYPE_FP32) ? 1e-4f : 1e-1f;
-    bool  is_ok = false;
-    if (sizeof(T) == 4) {
-        is_ok = _checkResult<float>(name, out, ref, atol, rtol);
-    }
-    else {
-        is_ok = _checkResult<half>(name, out, ref, atol, rtol);
-    }
-    return is_ok;
-}
-
-template<typename T, DataType computeType>
-bool checkResult(TensorWrapper& out, TensorWrapper& ref)
-{
-    return checkResult<T, computeType>("", out, ref);
-}
-
-template<typename T>
-std::string toString()
-{
-    std::string str = "dtype=";
-    str += std::is_same<T, float>::value ? "FP32" : "FP16";
-    return str;
-}
-
-template<typename T, DataType ctype>
-std::string toString()
-{
-    std::string str = "dtype=";
-    str += std::is_same<T, float>::value ? "FP32" : "FP16";
-    str += ", compute_type=";
-    str += (ctype == TYPE_FP32) ? "FP32" : "FP16";
-    return str;
-}
-
-std::string toString(GemmOp op)
-{
-    return op == GEMM_OP_N ? "N" : "T";
-}
-
-struct GemmOpPair {
-    GemmOp transa;
-    GemmOp transb;
-};
-
-static const std::vector<GemmOpPair> op_pairs{
-    {GEMM_OP_N, GEMM_OP_N}, {GEMM_OP_N, GEMM_OP_T}, {GEMM_OP_T, GEMM_OP_N}, {GEMM_OP_T, GEMM_OP_T}};
-
-static inline std::string getTestName(const char* func_name, GemmOp transa, GemmOp transb, size_t m, size_t n, size_t k)
-{
-    return fmtstr("%s [opA=%s, opB=%s, m=%ld, n=%ld, k=%ld]",
-                  func_name,
-                  getGemmOpString(transa).c_str(),
-                  getGemmOpString(transb).c_str(),
-                  m,
-                  n,
-                  k);
-}
-
-static inline std::string getTestName(const char* func_name, GemmOpPair op_pairs, size_t m, size_t n, size_t k)
-{
-    return getTestName(func_name, op_pairs.transa, op_pairs.transb, m, n, k);
-}
-
-/////////////////////////////////// Unittests //////////////////////////////////////////
-
-template<typename T, DataType computeType>
-void testGemmCorrectnessMatmul(size_t m, size_t n, size_t k)
-{
-    TM_LOG_INFO(
-        "Matmul function correctness test [m=%ld, n=%ld, k=%ld, %s]", m, n, k, toString<T, computeType>().c_str());
-    cudaStream_t stream;
-    check_cuda_error(cudaStreamCreate(&stream));
-
-    Allocator<AllocatorType::CUDA> allocator(getDevice());
-
-    DataType      dtype = getTensorType<T>();
-    TensorWrapper a_tensor(&allocator, dtype, {m, k}, false);
-    TensorWrapper b_tensor(&allocator, dtype, {k, n}, false);
-    TensorWrapper c_tensor(&allocator, dtype, {m, n}, true);
-    TensorWrapper expected(&allocator, dtype, {m, n}, true);
-
-    std::shared_ptr<Gemm> gemm = createGemm(&allocator, stream, false, false);
-    gemm->setTypes(a_tensor.type, b_tensor.type, c_tensor.type, computeType);
-
-    for (auto& op_pair : op_pairs) {
-        std::string tc_name = getTestName(__func__, op_pair, m, n, k);
-        TM_LOG_DEBUG(tc_name);
-        computeReference<computeType>(op_pair.transa, op_pair.transb, expected, a_tensor, b_tensor);
-
-        size_t lda = (op_pair.transa == GEMM_OP_N) ? k : m;
-        size_t ldb = (op_pair.transb == GEMM_OP_N) ? n : k;
-        size_t ldc = n;
-
-        c_tensor.setInvalidValues();  // to guarantee C has invalid data
-        gemm->gemm(op_pair.transa,
-                   op_pair.transb,
-                   m,
-                   n,
-                   k,
-                   a_tensor.data,
-                   a_tensor.type,
-                   lda,
-                   b_tensor.data,
-                   b_tensor.type,
-                   ldb,
-                   c_tensor.data,
-                   c_tensor.type,
-                   ldc);
-        EXPECT_ALMOST_EQUAL(tc_name + " api1", T, computeType, c_tensor, expected);
-
-        c_tensor.setInvalidValues();
-        gemm->gemm(op_pair.transa, op_pair.transb, m, n, k, a_tensor.data, lda, b_tensor.data, ldb, c_tensor.data, ldc);
-        EXPECT_ALMOST_EQUAL(tc_name + " api2", T, computeType, c_tensor, expected);
-
-        c_tensor.setInvalidValues();
-        gemm->gemm(op_pair.transa, op_pair.transb, m, n, k, a_tensor.data, b_tensor.data, c_tensor.data);
-        EXPECT_ALMOST_EQUAL(tc_name + " api3", T, computeType, c_tensor, expected);
-
-        c_tensor.setInvalidValues();
-        gemm->gemm(op_pair.transa,
-                   op_pair.transb,
-                   m,
-                   n,
-                   k,
-                   a_tensor.data,
-                   DenseWeight<T>{(const T*)b_tensor.data, nullptr, nullptr},
-                   c_tensor.data);
-        EXPECT_ALMOST_EQUAL(tc_name + " api4", T, computeType, c_tensor, expected);
-    }
-    check_cuda_error(cudaStreamDestroy(stream));
-}
-
-template<typename T, DataType computeType>
-void testGemmConsistencyMatmul(size_t m, size_t n, size_t k)
-{
-    // Test if Gemm is consistent with cublasWrapper
-    TM_LOG_INFO(
-        "Matmul function consistency test [m=%ld, n=%ld, k=%ld, %s]", m, n, k, toString<T, computeType>().c_str());
-
-    Allocator<AllocatorType::CUDA> allocator(getDevice());
-    cudaStream_t                   stream;
-    check_cuda_error(cudaStreamCreate(&stream));
-
-    DataType      dtype = getTensorType<T>();
-    TensorWrapper a_tensor(&allocator, dtype, {m, k}, false);
-    TensorWrapper b_tensor(&allocator, dtype, {k, n}, false);
-    TensorWrapper c_tensor(&allocator, dtype, {m, n}, true);
-    TensorWrapper expected(&allocator, dtype, {m, n}, true);
-
-    cublasHandle_t   cublas_handle;
-    cublasLtHandle_t cublaslt_handle;
-    check_cuda_error(cublasCreate(&cublas_handle));
-    check_cuda_error(cublasLtCreate(&cublaslt_handle));
-    check_cuda_error(cublasSetStream(cublas_handle, stream));
-    cublasAlgoMap   cublas_algo_map(GEMM_CONFIG);
-    std::mutex*     cublas_wrapper_mutex = new std::mutex();
-    cublasMMWrapper cublas_wrapper(
-        cublas_handle, cublaslt_handle, stream, &cublas_algo_map, cublas_wrapper_mutex, &allocator);
-
-    cudaDataType_t cuda_dtype = std::is_same<float, T>::value ? CUDA_R_32F : CUDA_R_16F;
-    cudaDataType_t cuda_ctype = (DataType::TYPE_FP32 == computeType) ? CUDA_R_32F : CUDA_R_16F;
-    cublas_wrapper.setGemmConfig(cuda_dtype, cuda_dtype, cuda_dtype, cuda_ctype);
-
-    std::shared_ptr<Gemm> gemm = createGemm(&allocator, stream, false, false);
-    gemm->setTypes(a_tensor.type, b_tensor.type, c_tensor.type, computeType);
-
-    for (auto& op_pair : op_pairs) {
-        std::string tc_name = getTestName(__func__, op_pair, m, n, k);
-
-        // Switch A/B because Gemm expects column major layout as cublas does.
-        size_t lda = (op_pair.transa == GEMM_OP_N) ? k : m;
-        size_t ldb = (op_pair.transb == GEMM_OP_N) ? n : k;
-        size_t ldc = n;
-        cublas_wrapper.Gemm(getCublasOperation(op_pair.transb),
-                            getCublasOperation(op_pair.transa),
-                            n,
-                            m,
-                            k,
-                            b_tensor.data,
-                            ldb,
-                            a_tensor.data,
-                            lda,
-                            expected.data,
-                            ldc);
-
-        c_tensor.setInvalidValues();  // to guarantee C has invalid data
-        gemm->gemm(op_pair.transa,
-                   op_pair.transb,
-                   m,
-                   n,
-                   k,
-                   a_tensor.data,
-                   a_tensor.type,
-                   lda,
-                   b_tensor.data,
-                   b_tensor.type,
-                   ldb,
-                   c_tensor.data,
-                   c_tensor.type,
-                   ldc);
-        EXPECT_ALMOST_EQUAL(tc_name + " api1", T, computeType, c_tensor, expected);
-
-        c_tensor.setInvalidValues();
-        gemm->gemm(op_pair.transa, op_pair.transb, m, n, k, a_tensor.data, lda, b_tensor.data, ldb, c_tensor.data, ldc);
-        EXPECT_ALMOST_EQUAL(tc_name + " api2", T, computeType, c_tensor, expected);
-
-        c_tensor.setInvalidValues();
-        gemm->gemm(op_pair.transa, op_pair.transb, m, n, k, a_tensor.data, b_tensor.data, c_tensor.data);
-        EXPECT_ALMOST_EQUAL(tc_name + " api3", T, computeType, c_tensor, expected);
-
-        c_tensor.setInvalidValues();
-        gemm->gemm(op_pair.transa,
-                   op_pair.transb,
-                   m,
-                   n,
-                   k,
-                   a_tensor.data,
-                   DenseWeight<T>{(const T*)b_tensor.data, nullptr, nullptr},
-                   c_tensor.data);
-        EXPECT_ALMOST_EQUAL(tc_name + " api4", T, computeType, c_tensor, expected);
-    }
-
-    delete cublas_wrapper_mutex;
-    check_cuda_error(cublasLtDestroy(cublaslt_handle));
-    check_cuda_error(cublasDestroy(cublas_handle));
-    check_cuda_error(cudaStreamDestroy(stream));
-}
-
-template<typename T, DataType computeType>
-void testGemmConsistencyBatchedMatmul(size_t m, size_t n, size_t k)
-{
-    // Test if Gemm is consistent with cublasWrapper
-    TM_LOG_INFO("Batched gemm function consistency test [m=%ld, n=%ld, k=%ld, %s]",
-                m,
-                n,
-                k,
-                toString<T, computeType>().c_str());
-
-    Allocator<AllocatorType::CUDA> allocator(getDevice());
-    cudaStream_t                   stream;
-    check_cuda_error(cudaStreamCreate(&stream));
-
-    // batch of in/out tensors
-    DataType                    a_type = getTensorType<T>();
-    DataType                    b_type = getTensorType<T>();
-    DataType                    c_type = getTensorType<T>();
-    std::vector<TensorWrapper*> a_tensors;
-    std::vector<TensorWrapper*> b_tensors;
-    std::vector<TensorWrapper*> c_tensors;
-    std::vector<TensorWrapper*> expecteds;
-    const size_t                batch_size = 3;
-    for (size_t i = 0; i < batch_size; ++i) {
-        a_tensors.push_back(new TensorWrapper(&allocator, a_type, {m, k}, false));
-        b_tensors.push_back(new TensorWrapper(&allocator, b_type, {k, n}, false));
-        c_tensors.push_back(new TensorWrapper(&allocator, c_type, {m, n}, true));
-        expecteds.push_back(new TensorWrapper(&allocator, c_type, {m, n}, true));
-    }
-
-    const T* hA[]{(const T*)a_tensors[0]->data,
-                  (const T*)a_tensors[1]->data,
-                  (const T*)a_tensors[2]->data,
-                  nullptr,  // for memory alignment.
-                  (const T*)b_tensors[0]->data,
-                  (const T*)b_tensors[1]->data,
-                  (const T*)b_tensors[2]->data,
-                  nullptr,  // for memory alignment.
-                  (const T*)c_tensors[0]->data,
-                  (const T*)c_tensors[1]->data,
-                  (const T*)c_tensors[2]->data,
-                  nullptr,  // for memory alignment.
-                  (const T*)expecteds[0]->data,
-                  (const T*)expecteds[1]->data,
-                  (const T*)expecteds[2]->data};
-
-    T** batch_tensor_ptrs = reinterpret_cast<T**>(allocator.malloc(sizeof(T*) * 16, false));
-    check_cuda_error(cudaMemcpyAsync((void*)batch_tensor_ptrs, hA, sizeof(T*) * 16, cudaMemcpyHostToDevice, stream));
-    const void* const* batch_a        = reinterpret_cast<const void* const*>(batch_tensor_ptrs);
-    const void* const* batch_b        = reinterpret_cast<const void* const*>(batch_tensor_ptrs + 4);
-    void* const*       batch_c        = reinterpret_cast<void* const*>(batch_tensor_ptrs + 8);
-    void* const*       batch_expected = reinterpret_cast<void* const*>(batch_tensor_ptrs + 12);
-
-    cublasHandle_t   cublas_handle;
-    cublasLtHandle_t cublaslt_handle;
-    check_cuda_error(cublasCreate(&cublas_handle));
-    check_cuda_error(cublasLtCreate(&cublaslt_handle));
-    check_cuda_error(cublasSetStream(cublas_handle, stream));
-    cublasAlgoMap   cublas_algo_map(GEMM_CONFIG);
-    std::mutex*     cublas_wrapper_mutex = new std::mutex();
-    cublasMMWrapper cublas_wrapper(
-        cublas_handle, cublaslt_handle, stream, &cublas_algo_map, cublas_wrapper_mutex, &allocator);
-
-    cudaDataType_t dtype = std::is_same<float, T>::value ? CUDA_R_32F : CUDA_R_16F;
-    cudaDataType_t ctype = (computeType == DataType::TYPE_FP32) ? CUDA_R_32F : CUDA_R_16F;
-    cublas_wrapper.setGemmConfig(dtype, dtype, dtype, ctype);
-
-    std::shared_ptr<Gemm> gemm = createGemm(&allocator, stream, false, false);
-    gemm->setTypes(a_type, b_type, c_type, computeType);
-
-    for (auto& op_pair : op_pairs) {
-        std::string tc_name = getTestName(__func__, op_pair, m, n, k);
-        TM_LOG_DEBUG(tc_name);
-
-        size_t lda = (op_pair.transa == GEMM_OP_N) ? k : m;
-        size_t ldb = (op_pair.transb == GEMM_OP_N) ? n : k;
-        size_t ldc = n;
-
-        // Switch A/B because Gemm expects column major layout as cublas does.
-        cublas_wrapper.batchedGemm(getCublasOperation(op_pair.transb),  // N
-                                   getCublasOperation(op_pair.transa),  // T
-                                   n,
-                                   m,
-                                   k,
-                                   (const void* const*)batch_b,
-                                   ldb,
-                                   (const void* const*)batch_a,
-                                   lda,
-                                   (void* const*)batch_expected,
-                                   ldc,
-                                   batch_size);
-
-        gemm->batchedGemm(op_pair.transa,
-                          op_pair.transb,
-                          m,
-                          n,
-                          k,
-                          batch_a,
-                          a_type,
-                          lda,
-                          batch_b,
-                          b_type,
-                          ldb,
-                          batch_c,
-                          c_type,
-                          ldc,
-                          batch_size);
-        for (size_t i = 0; i < batch_size; ++i) {
-            EXPECT_ALMOST_EQUAL(
-                tc_name + " api1 batch" + std::to_string(i), T, computeType, *c_tensors[i], *expecteds[i]);
-        }
-
-        for (size_t i = 0; i < batch_size; ++i) {
-            c_tensors[i]->setInvalidValues();
-        }
-        gemm->batchedGemm(
-            op_pair.transa, op_pair.transb, m, n, k, batch_a, lda, batch_b, ldb, batch_c, ldc, batch_size);
-        for (size_t i = 0; i < batch_size; ++i) {
-            EXPECT_ALMOST_EQUAL(
-                tc_name + " api2 batch" + std::to_string(i), T, computeType, *c_tensors[i], *expecteds[i]);
-        }
-
-        for (size_t i = 0; i < batch_size; ++i) {
-            c_tensors[i]->setInvalidValues();
-        }
-        gemm->batchedGemm(op_pair.transa, op_pair.transb, m, n, k, batch_a, batch_b, batch_c, batch_size);
-        for (size_t i = 0; i < batch_size; ++i) {
-            EXPECT_ALMOST_EQUAL(
-                tc_name + " api3 batch" + std::to_string(i), T, computeType, *c_tensors[i], *expecteds[i]);
-        }
-    }
-    a_tensors.clear();
-    b_tensors.clear();
-    c_tensors.clear();
-    expecteds.clear();
-    delete cublas_wrapper_mutex;
-    check_cuda_error(cublasLtDestroy(cublaslt_handle));
-    check_cuda_error(cublasDestroy(cublas_handle));
-    check_cuda_error(cudaStreamDestroy(stream));
-}
-
-template<typename T, DataType computeType>
-void testGemmConsistencyStridedBatchedMatmul(size_t batch_size, size_t m, size_t n, size_t k)
-{
-    // Test if Gemm is consistent with cublasWrapper
-    TM_LOG_INFO("Strided batched gemm function consistency test [bsz=%ld, m=%ld, n=%ld, k=%ld, %s]",
-                batch_size,
-                m,
-                n,
-                k,
-                toString<T, computeType>().c_str());
-
-    Allocator<AllocatorType::CUDA> allocator(getDevice());
-    cudaStream_t                   stream;
-    check_cuda_error(cudaStreamCreate(&stream));
-
-    DataType      data_type = getTensorType<T>();
-    TensorWrapper a_tensor(&allocator, data_type, {batch_size, m, k}, false);
-    TensorWrapper b_tensor(&allocator, data_type, {batch_size, k, n}, false);
-    TensorWrapper c_tensor(&allocator, data_type, {batch_size, m, n}, true);
-    TensorWrapper expected(&allocator, data_type, {batch_size, m, n}, true);
-
-    cublasHandle_t   cublas_handle;
-    cublasLtHandle_t cublaslt_handle;
-    check_cuda_error(cublasCreate(&cublas_handle));
-    check_cuda_error(cublasLtCreate(&cublaslt_handle));
-    check_cuda_error(cublasSetStream(cublas_handle, stream));
-    cublasAlgoMap   cublas_algo_map(GEMM_CONFIG);
-    std::mutex*     cublas_wrapper_mutex = new std::mutex();
-    cublasMMWrapper cublas_wrapper(
-        cublas_handle, cublaslt_handle, stream, &cublas_algo_map, cublas_wrapper_mutex, &allocator);
-
-    cudaDataType_t dtype = std::is_same<float, T>::value ? CUDA_R_32F : CUDA_R_16F;
-    cudaDataType_t ctype = (computeType == DataType::TYPE_FP32) ? CUDA_R_32F : CUDA_R_16F;
-    cublas_wrapper.setGemmConfig(dtype, dtype, dtype, ctype);
-
-    std::shared_ptr<Gemm> gemm = createGemm(&allocator, stream, false, false);
-    gemm->setTypes(a_tensor.type, b_tensor.type, c_tensor.type, computeType);
-
-    for (auto& op_pair : op_pairs) {
-        std::string tc_name = getTestName(__func__, op_pair, m, n, k);
-
-        // Switch A/B because Gemm expects column major layout as cublas does.
-        size_t lda = (op_pair.transa == GEMM_OP_N) ? k : m;
-        size_t ldb = (op_pair.transb == GEMM_OP_N) ? n : k;
-        size_t ldc = n;
-
-        int64_t stridea = m * k;
-        int64_t strideb = k * n;
-        int64_t stridec = m * n;
-
-        float alpha = 1.0f;
-        float beta  = 0.0f;
-
-        cublas_wrapper.stridedBatchedGemm(getCublasOperation(op_pair.transb),
-                                          getCublasOperation(op_pair.transa),
-                                          n,
-                                          m,
-                                          k,
-                                          alpha,
-                                          b_tensor.data,
-                                          getCublasDataType(b_tensor.type),
-                                          ldb,
-                                          strideb,
-                                          a_tensor.data,
-                                          getCublasDataType(a_tensor.type),
-                                          lda,
-                                          stridea,
-                                          beta,
-                                          expected.data,
-                                          getCublasDataType(expected.type),
-                                          ldc,
-                                          stridec,
-                                          batch_size,
-                                          getCublasDataType(computeType));
-
-        c_tensor.setInvalidValues();  // to guarantee C has invalid data
-        gemm->stridedBatchedGemm(op_pair.transa,
-                                 op_pair.transb,
-                                 m,
-                                 n,
-                                 k,
-                                 a_tensor.data,
-                                 a_tensor.type,
-                                 lda,
-                                 stridea,
-                                 b_tensor.data,
-                                 b_tensor.type,
-                                 ldb,
-                                 strideb,
-                                 c_tensor.data,
-                                 c_tensor.type,
-                                 ldc,
-                                 stridec,
-                                 batch_size,
-                                 computeType,
-                                 alpha,
-                                 beta);
-        EXPECT_ALMOST_EQUAL(tc_name + " api1", T, computeType, c_tensor, expected);
-
-        c_tensor.setInvalidValues();
-        gemm->stridedBatchedGemm(op_pair.transa,
-                                 op_pair.transb,
-                                 m,
-                                 n,
-                                 k,
-                                 a_tensor.data,
-                                 lda,
-                                 stridea,
-                                 b_tensor.data,
-                                 ldb,
-                                 strideb,
-                                 c_tensor.data,
-                                 ldc,
-                                 stridec,
-                                 batch_size,
-                                 alpha,
-                                 beta);
-        EXPECT_ALMOST_EQUAL(tc_name + " api2", T, computeType, c_tensor, expected);
-
-        c_tensor.setInvalidValues();
-        gemm->stridedBatchedGemm(op_pair.transa,
-                                 op_pair.transb,
-                                 m,
-                                 n,
-                                 k,
-                                 a_tensor.data,
-                                 stridea,
-                                 b_tensor.data,
-                                 strideb,
-                                 c_tensor.data,
-                                 stridec,
-                                 batch_size,
-                                 alpha,
-                                 beta);
-        EXPECT_ALMOST_EQUAL(tc_name + " api3", T, computeType, c_tensor, expected);
-
-        c_tensor.setInvalidValues();
-        gemm->stridedBatchedGemm(op_pair.transa,
-                                 op_pair.transb,
-                                 m,
-                                 n,
-                                 k,
-                                 a_tensor.data,
-                                 b_tensor.data,
-                                 c_tensor.data,
-                                 batch_size,
-                                 alpha,
-                                 beta);
-        EXPECT_ALMOST_EQUAL(tc_name + " api4", T, computeType, c_tensor, expected);
-    }
-
-    delete cublas_wrapper_mutex;
-    check_cuda_error(cublasLtDestroy(cublaslt_handle));
-    check_cuda_error(cublasDestroy(cublas_handle));
-    check_cuda_error(cudaStreamDestroy(stream));
-}
-
-#ifdef SPARSITY_ENABLED
-// The current SpGemm only supports TYPE_FP16 for T, computeType,
-// but let us keep these template variables for later use.
-template<typename T, DataType computeType>
-void testSpGemmCorrectnessMatmul(size_t m, size_t n, size_t k)
-{
-    TM_LOG_INFO(
-        "Sparse gemm function correctness test [m=%ld, n=%ld, k=%ld, %s]", m, n, k, toString<T, computeType>().c_str());
-    cudaStream_t stream;
-    check_cuda_error(cudaStreamCreate(&stream));
-
-    Allocator<AllocatorType::CUDA> allocator(getDevice());
-
-    DataType      dtype = getTensorType<T>();
-    TensorWrapper a_tensor(&allocator, dtype, {m, k}, false);
-    TensorWrapper b_tensor(&allocator, dtype, {k, n}, false);
-    TensorWrapper c_tensor(&allocator, dtype, {m, n}, true);
-    TensorWrapper expected(&allocator, dtype, {m, n}, true);
-
-    std::shared_ptr<Gemm> gemm = createGemm(&allocator, stream, true, false);
-    gemm->setTypes(a_tensor.type, b_tensor.type, c_tensor.type, computeType);
-
-    for (auto& op_pair : op_pairs) {
-        // A/B will be switched in SpGemm.
-        std::string tc_name = getTestName(__func__, op_pair, m, n, k);
-        TM_LOG_DEBUG(tc_name);
-
-        b_tensor.setRandomValues();
-        pruneMatrixB(b_tensor.data, stream, b_tensor.shape[0], b_tensor.shape[1], op_pair.transb);
-        computeReference<computeType>(op_pair.transa, op_pair.transb, expected, a_tensor, b_tensor);
-
-        void* b_compressed;
-        compressMatrixB(
-            &b_compressed, allocator, stream, b_tensor.data, b_tensor.shape[0], b_tensor.shape[1], op_pair.transb);
-
-        size_t lda = (op_pair.transa == GEMM_OP_N) ? k : m;
-        size_t ldb = (op_pair.transb == GEMM_OP_N) ? n : k;
-        size_t ldc = n;
-
-        c_tensor.setInvalidValues();  // to guarantee C has invalid data
-        gemm->gemm(op_pair.transa,
-                   op_pair.transb,
-                   m,
-                   n,
-                   k,
-                   a_tensor.data,
-                   a_tensor.type,
-                   lda,
-                   b_compressed,
-                   b_tensor.type,
-                   ldb,
-                   c_tensor.data,
-                   c_tensor.type,
-                   ldc);
-        EXPECT_ALMOST_EQUAL(tc_name + " api1", T, computeType, c_tensor, expected);
-
-        c_tensor.setInvalidValues();
-        gemm->gemm(op_pair.transa, op_pair.transb, m, n, k, a_tensor.data, lda, b_compressed, ldb, c_tensor.data, ldc);
-        EXPECT_ALMOST_EQUAL(tc_name + " api2", T, computeType, c_tensor, expected);
-
-        c_tensor.setInvalidValues();
-        gemm->gemm(op_pair.transa, op_pair.transb, m, n, k, a_tensor.data, b_compressed, c_tensor.data);
-        EXPECT_ALMOST_EQUAL(tc_name + " api3", T, computeType, c_tensor, expected);
-
-        c_tensor.setInvalidValues();
-        gemm->gemm(op_pair.transa,
-                   op_pair.transb,
-                   m,
-                   n,
-                   k,
-                   a_tensor.data,
-                   DenseWeight<T>{(const T*)b_tensor.data, nullptr, (const T*)b_compressed},
-                   c_tensor.data);
-        EXPECT_ALMOST_EQUAL(tc_name + " api4", T, computeType, c_tensor, expected);
-
-        allocator.free((void**)(&b_compressed));
-    }
-    check_cuda_error(cudaStreamDestroy(stream));
-}
-
-template<typename T, DataType computeType>
-void testSpGemmConsistencyMatmul(size_t m, size_t n, size_t k)
-{
-    // Test if Gemm is consistent with cublasWrapper
-    TM_LOG_INFO("Sparse Matmul function consistency test [m=%ld, n=%ld, k=%ld, %s]",
-                m,
-                n,
-                k,
-                toString<T, computeType>().c_str());
-
-    Allocator<AllocatorType::CUDA> allocator(getDevice());
-    cudaStream_t                   stream;
-    check_cuda_error(cudaStreamCreate(&stream));
-
-    DataType      dtype = getTensorType<T>();
-    TensorWrapper a_tensor(&allocator, dtype, {m, k}, false);
-    TensorWrapper b_tensor(&allocator, dtype, {k, n}, false);
-    TensorWrapper c_tensor(&allocator, dtype, {m, n}, true);
-    TensorWrapper expected(&allocator, dtype, {m, n}, true);
-
-    cublasHandle_t   cublas_handle;
-    cublasLtHandle_t cublaslt_handle;
-    check_cuda_error(cublasCreate(&cublas_handle));
-    check_cuda_error(cublasLtCreate(&cublaslt_handle));
-    check_cuda_error(cublasSetStream(cublas_handle, stream));
-    cublasAlgoMap   cublas_algo_map(GEMM_CONFIG);
-    std::mutex*     cublas_wrapper_mutex = new std::mutex();
-    cublasMMWrapper cublas_wrapper(
-        cublas_handle, cublaslt_handle, stream, &cublas_algo_map, cublas_wrapper_mutex, &allocator);
-
-    cudaDataType_t cu_dtype = std::is_same<float, T>::value ? CUDA_R_32F : CUDA_R_16F;
-    cudaDataType_t cu_ctype = (DataType::TYPE_FP32 == computeType) ? CUDA_R_32F : CUDA_R_16F;
-    cublas_wrapper.setGemmConfig(cu_dtype, cu_dtype, cu_dtype, cu_ctype);
-
-    std::shared_ptr<Gemm> gemm = createGemm(&allocator, stream, true, false);
-    gemm->setTypes(a_tensor.type, b_tensor.type, c_tensor.type, computeType);
-
-    for (auto& op_pair : op_pairs) {
-        std::string tc_name = getTestName(__func__, op_pair, m, n, k);
-        TM_LOG_DEBUG(tc_name);
-
-        b_tensor.setRandomValues();
-        pruneMatrixB(b_tensor.data, stream, b_tensor.shape[0], b_tensor.shape[1], op_pair.transb);
-
-        // Switch A/B because Gemm expects column major layout as cublas does.
-        size_t lda = (op_pair.transa == GEMM_OP_N) ? k : m;
-        size_t ldb = (op_pair.transb == GEMM_OP_N) ? n : k;
-        size_t ldc = n;
-        cublas_wrapper.Gemm(getCublasOperation(op_pair.transb),
-                            getCublasOperation(op_pair.transa),
-                            n,
-                            m,
-                            k,
-                            b_tensor.data,
-                            ldb,
-                            a_tensor.data,
-                            lda,
-                            expected.data,
-                            ldc);
-
-        void* b_compressed;
-        compressMatrixB(
-            &b_compressed, allocator, stream, b_tensor.data, b_tensor.shape[0], b_tensor.shape[1], op_pair.transb);
-
-        c_tensor.setInvalidValues();  // to guarantee C has invalid data
-        gemm->gemm(op_pair.transa,
-                   op_pair.transb,
-                   m,
-                   n,
-                   k,
-                   a_tensor.data,
-                   a_tensor.type,
-                   lda,
-                   b_compressed,
-                   b_tensor.type,
-                   ldb,
-                   c_tensor.data,
-                   c_tensor.type,
-                   ldc);
-        EXPECT_ALMOST_EQUAL(tc_name + " api1", T, computeType, c_tensor, expected);
-
-        c_tensor.setInvalidValues();
-        gemm->gemm(op_pair.transa, op_pair.transb, m, n, k, a_tensor.data, lda, b_compressed, ldb, c_tensor.data, ldc);
-        EXPECT_ALMOST_EQUAL(tc_name + " api1", T, computeType, c_tensor, expected);
-
-        c_tensor.setInvalidValues();
-        gemm->gemm(op_pair.transa, op_pair.transb, m, n, k, a_tensor.data, b_compressed, c_tensor.data);
-        EXPECT_ALMOST_EQUAL(tc_name + " api3", T, computeType, c_tensor, expected);
-    }
-
-    delete cublas_wrapper_mutex;
-    check_cuda_error(cublasLtDestroy(cublaslt_handle));
-    check_cuda_error(cublasDestroy(cublas_handle));
-    check_cuda_error(cudaStreamDestroy(stream));
-}
-#endif
-
-int main(int argc, char* argv[])
-{
-    // testGemmCreate();
-    using testcase_t = std::tuple<size_t, size_t, size_t>;
-
-    std::vector<testcase_t> testcases = {
-        {16, 32, 64}, {255, 255, 255}, {1041, 2047, 9999}, {1041, 1, 9999}, {1041, 999, 1}};
-
-    // Computation correctness tests
-    for (testcase_t& tc : testcases) {
-        size_t m = std::get<0>(tc);
-        size_t n = std::get<1>(tc);
-        size_t k = std::get<2>(tc);
-
-        testGemmCorrectnessMatmul<float, TYPE_FP32>(m, n, k);
-        testGemmCorrectnessMatmul<half, TYPE_FP32>(m, n, k);
-        testGemmCorrectnessMatmul<half, TYPE_FP16>(m, n, k);
-
-        testGemmConsistencyMatmul<float, TYPE_FP32>(m, n, k);
-        testGemmConsistencyMatmul<half, TYPE_FP32>(m, n, k);
-        testGemmConsistencyMatmul<half, TYPE_FP16>(m, n, k);
-
-        testGemmConsistencyBatchedMatmul<float, TYPE_FP32>(m, n, k);
-        testGemmConsistencyBatchedMatmul<half, TYPE_FP32>(m, n, k);
-        testGemmConsistencyBatchedMatmul<half, TYPE_FP16>(m, n, k);
-
-        testGemmConsistencyStridedBatchedMatmul<float, TYPE_FP32>(7, m, n, k);
-        testGemmConsistencyStridedBatchedMatmul<half, TYPE_FP32>(7, m, n, k);
-        testGemmConsistencyStridedBatchedMatmul<half, TYPE_FP16>(7, m, n, k);
-    }
-
-#ifdef SPARSITY_ENABLED
-    // Reset for SpGemm test.
-    testcases.clear();
-    testcases.insert(testcases.end(),
-                     {{8, 32, 32},  // minimum possible example.
-                      {8, 32, 64},
-                      {64, 64, 64},
-                      {16, 32, 64},
-                      {1024, 32, 1024},
-                      {1024, 1024, 32},
-                      {16, 1024, 1024},
-                      {1024, 1024, 1024}});
-
-    for (testcase_t& tc : testcases) {
-        size_t m = std::get<0>(tc);
-        size_t n = std::get<1>(tc);
-        size_t k = std::get<2>(tc);
-        testSpGemmCorrectnessMatmul<half, TYPE_FP16>(m, n, k);
-        testSpGemmConsistencyMatmul<half, TYPE_FP16>(m, n, k);
-    }
-#endif
-    TM_LOG_INFO("Test done");
-    return 0;
-}
diff --git a/tests/csrc/unittests/test_gpt_kernels.cu b/tests/csrc/unittests/test_gpt_kernels.cu
deleted file mode 100644
index 0bafb7b504baaa3b6cf18af4295eb0b7af964ccc..0000000000000000000000000000000000000000
--- a/tests/csrc/unittests/test_gpt_kernels.cu
+++ /dev/null
@@ -1,338 +0,0 @@
-#include <vector>
-#include <random>
-
-#include "src/turbomind/kernels/gpt_kernels.h"
-#include "src/turbomind/utils/memory_utils.h"
-
-#include "unittest_utils.h"
-
-int test_find_context_dups();
-int test_compact();
-int test_uncompact();
-
-int main(int argc, char* argv[])
-{
-    bool all_passed = true;
-    bool passed;
-
-    passed = test_find_context_dups() == EXIT_SUCCESS;
-    all_passed |= passed;
-    printf("%s", passed ? "." : "X");
-    if (!passed) {
-        puts("\ntest_find_context_dups: FAILED");
-    }
-
-    passed = test_compact() == EXIT_SUCCESS;
-    all_passed |= passed;
-    printf("%s", passed ? "." : "X");
-    if (!passed) {
-        puts("\ntest_compact: FAILED");
-    }
-
-    passed = test_uncompact() == EXIT_SUCCESS;
-    all_passed |= passed;
-    printf("%s", passed ? "." : "X");
-    if (!passed) {
-        puts("\ntest_uncompact: FAILED");
-    }
-
-    puts("");
-    return all_passed ? EXIT_SUCCESS : EXIT_FAILURE;
-}
-
-int test_find_context_dups()
-{
-    const size_t vec_size = 1234;
-    const size_t batch_size = 8;
-    // Reference to the first unique vector
-    const std::vector<int> shared_contexts_ref {0, 0, 2, 3, 4, 4, 3, 3};
-
-    // Which compact index belong to what vector
-    const std::vector<int> batch_idx_to_compact_idx {0, 0, 1, 2, 3, 3, 2, 2};
-    std::vector<int> batch_idx_to_compact_idx_test(batch_size);
-
-    // Reverse map of batch_idx_to_compact_idx
-    const std::vector<int> compact_idx_to_batch_idx {0, 2, 3, 4, -1, -1, -1, -1};
-    std::vector<int> compact_idx_to_batch_idx_test(batch_size, -1);
-
-    std::vector<int> input_ids;
-    std::vector<int> default_vector(vec_size, 0);
-
-    for (size_t i = 0; i < batch_size; ++i) {
-        default_vector[vec_size - 1] = shared_contexts_ref[i];
-        input_ids.insert(input_ids.end(), default_vector.begin(), default_vector.end());
-    }
-
-    std::vector<int> shared_contexts_test(batch_size);
-
-    int* d_input_ids;
-    int* d_shared_contexts_test;
-    int* d_batch_idx_to_compact_idx;
-    int* d_compact_to_batch;
-    int* d_compact_size;
-    cudaMalloc(&d_input_ids, batch_size * vec_size * sizeof(int));
-    cudaMalloc(&d_shared_contexts_test, batch_size * sizeof(int));
-    cudaMalloc(&d_batch_idx_to_compact_idx, batch_size * sizeof(int));
-    cudaMalloc(&d_compact_to_batch, batch_size * sizeof(int));
-    cudaMalloc(&d_compact_size, sizeof(int));
-
-    cudaH2Dcpy(d_input_ids, input_ids.data(), batch_size * vec_size);
-    cudaH2Dcpy(d_compact_to_batch, compact_idx_to_batch_idx_test.data(), batch_size);
-
-    invokeFindContextDups(d_shared_contexts_test,
-            d_batch_idx_to_compact_idx,
-            d_compact_to_batch,
-            d_compact_size,
-            d_input_ids,
-            batch_size,
-            vec_size);
-
-    int compact_size;
-    cudaD2Hcpy(shared_contexts_test.data(), d_shared_contexts_test, batch_size);
-    cudaD2Hcpy(batch_idx_to_compact_idx_test.data(), d_batch_idx_to_compact_idx, batch_size);
-    cudaD2Hcpy(compact_idx_to_batch_idx_test.data(), d_compact_to_batch, batch_size);
-    cudaD2Hcpy(&compact_size, d_compact_size, 1);
-
-    cudaFree(d_input_ids);
-    cudaFree(d_shared_contexts_test);
-
-    EXPECT_TRUE(shared_contexts_test == shared_contexts_ref);
-    EXPECT_TRUE(batch_idx_to_compact_idx == batch_idx_to_compact_idx_test);
-    EXPECT_TRUE(compact_idx_to_batch_idx_test == compact_idx_to_batch_idx);
-    EXPECT_TRUE(compact_size == 4);
-
-    return EXIT_SUCCESS;
-}
-
-int test_compact()
-{
-    size_t batch_size = 128;
-    size_t compact_size = 5;
-    size_t seq_len = 40;
-    size_t hidden_dimension = 8;
-    auto generator_f = std::bind(std::uniform_real_distribution<float>(-1.0, 1.0), std::mt19937());
-    auto generator_i = std::bind(std::uniform_int_distribution<int>(0, 128), std::mt19937());
-
-    // decoder_input [batch_size, seq_len, hidden_dimension] ->
-    // compact_decoder_input [compact_size, seq_len, hidden_dimension]
-    std::vector<float> decoder_input(batch_size * seq_len * hidden_dimension);
-    std::vector<float> compact_decoder_input(compact_size * seq_len * hidden_dimension);
-    std::generate(decoder_input.begin(), decoder_input.end(), generator_f);
-    float *d_decoder_input, *d_compact_decoder_input;
-    cudaMalloc(&d_decoder_input, decoder_input.size() * sizeof(float));
-    cudaMalloc(&d_compact_decoder_input, compact_decoder_input.size() * sizeof(float));
-    cudaH2Dcpy(d_decoder_input, decoder_input.data(), decoder_input.size());
-
-    // attention_mask [batch_size, seq_len, seq_len] ->
-    // compact_attention_mask [compact_size, seq_len, seq_len]
-    std::vector<float> attention_mask(batch_size * seq_len * seq_len);
-    std::vector<float> compact_attention_mask(compact_size * seq_len * seq_len);
-    std::generate(attention_mask.begin(), attention_mask.end(), generator_f);
-    float *d_attention_mask, *d_compact_attention_mask;
-    cudaMalloc(&d_attention_mask, attention_mask.size() * sizeof(float));
-    cudaMalloc(&d_compact_attention_mask, compact_attention_mask.size() * sizeof(float));
-    cudaH2Dcpy(d_attention_mask, attention_mask.data(), attention_mask.size());
-
-    // input_lengths [batch_size] -> compact_input_lengths [compact_size]
-    std::vector<int> input_lengths(batch_size);
-    std::vector<int> compact_input_lengths(compact_size);
-    std::generate(input_lengths.begin(), input_lengths.end(), generator_i);
-    int *d_input_lengths, *d_compact_input_lengths;
-    cudaMalloc(&d_input_lengths, input_lengths.size() * sizeof(int));
-    cudaMalloc(&d_compact_input_lengths, compact_input_lengths.size() * sizeof(int));
-    cudaH2Dcpy(d_input_lengths, input_lengths.data(), input_lengths.size());
-
-    // compact_idx [compact_size]
-    /* std::vector<int> compact_idx {0, 3}; */
-    std::vector<int> compact_idx {0, 29, 42, 44, 100};
-    int *d_compact_idx;
-    cudaMalloc(&d_compact_idx, compact_idx.size() * sizeof(int));
-    cudaH2Dcpy(d_compact_idx, compact_idx.data(), compact_idx.size());
-
-    invokeCompactInputs<float>(d_compact_decoder_input,
-                               d_compact_attention_mask,
-                               d_compact_input_lengths,
-                               d_decoder_input,
-                               d_attention_mask,
-                               d_input_lengths,
-                               d_compact_idx,
-                               compact_size,
-                               seq_len,
-                               hidden_dimension);
-
-    cudaD2Hcpy(compact_decoder_input.data(), d_compact_decoder_input, compact_decoder_input.size());
-    cudaD2Hcpy(compact_attention_mask.data(), d_compact_attention_mask, compact_attention_mask.size());
-    cudaD2Hcpy(compact_input_lengths.data(), d_compact_input_lengths, compact_input_lengths.size());
-
-    for (size_t i = 0; i < compact_size; i++) {
-        for (size_t t = 0; t < seq_len; t++) {
-            for (size_t h = 0; h < hidden_dimension; h++) {
-                EXPECT_TRUE(compact_decoder_input[(i * seq_len + t) * hidden_dimension + h] ==
-                            decoder_input[(compact_idx[i] * seq_len + t) * hidden_dimension + h]);
-            }
-        }
-    }
-
-    for (size_t i = 0; i < compact_size; i++) {
-        for (size_t t1 = 0; t1 < seq_len; t1++) {
-            for (size_t t2 = 0; t2 < seq_len; t2++) {
-                EXPECT_TRUE(compact_attention_mask[(i * seq_len + t1) * seq_len + t2] ==
-                            attention_mask[(compact_idx[i] * seq_len + t1) * seq_len + t2]);
-            }
-        }
-    }
-
-    for (size_t i = 0; i < compact_size; i++) {
-        EXPECT_TRUE(compact_input_lengths[i] == input_lengths[compact_idx[i]]);
-    }
-
-    cudaFree(d_decoder_input);
-    cudaFree(d_compact_decoder_input);
-    cudaFree(d_attention_mask);
-    cudaFree(d_compact_attention_mask);
-    cudaFree(d_input_lengths);
-    cudaFree(d_compact_input_lengths);
-    cudaFree(d_compact_idx);
-
-    return EXIT_SUCCESS;
-}
-
-int test_uncompact()
-{
-    // compact_decoder_outputs [compact_size, seq_len, hidden_dimension] ->
-    // decoder_outputs [batch_size, seq_len, hidden_dimension]
-    size_t batch_size = 128;
-    size_t compact_size = 6;
-    size_t local_batch_size = compact_size / 2;
-    size_t seq_len = 40;
-    size_t max_seq_len = 60;
-    size_t hidden_dimension = 8;
-    size_t num_layer = 2;
-    size_t num_head = 2;
-    size_t size_per_head = 4;
-    auto generator_f = std::bind(std::uniform_real_distribution<float>(-1.0, 1.0), std::mt19937());
-    auto generator_i = std::bind(std::uniform_int_distribution<int>(0, compact_size - 1), std::mt19937());
-
-    std::vector<float> compact_decoder_outputs(compact_size * seq_len * hidden_dimension);
-    std::vector<float> decoder_outputs(batch_size * seq_len * hidden_dimension);
-    std::vector<float> k_cache_compact(num_layer * compact_size * num_head * size_per_head * seq_len);
-    std::vector<float> v_cache_compact(num_layer * compact_size * num_head * seq_len * size_per_head);
-    std::vector<float> k_cache_out(num_layer * batch_size * num_head * size_per_head * max_seq_len);
-    std::vector<float> v_cache_out(num_layer * batch_size * num_head * max_seq_len * size_per_head);
-
-    std::generate(compact_decoder_outputs.begin(), compact_decoder_outputs.end(), generator_f);
-    std::generate(k_cache_compact.begin(), k_cache_compact.end(), generator_f);
-    std::generate(v_cache_compact.begin(), v_cache_compact.end(), generator_f);
-
-    std::vector<int> batch_to_compact_idx(batch_size);
-    std::generate(batch_to_compact_idx.begin(), batch_to_compact_idx.end(), generator_i);
-
-    float *d_compact_decoder_outputs, *d_decoder_outputs, *d_k_cache, *d_v_cache;
-    float *d_k_cache_compact, *d_v_cache_compact;
-
-    cudaMalloc(&d_compact_decoder_outputs, compact_decoder_outputs.size() * sizeof(float));
-    cudaH2Dcpy(d_compact_decoder_outputs, compact_decoder_outputs.data(), compact_decoder_outputs.size());
-
-    cudaMalloc(&d_k_cache_compact, k_cache_compact.size() * sizeof(float));
-    cudaMalloc(&d_v_cache_compact, v_cache_compact.size() * sizeof(float));
-    cudaH2Dcpy(d_k_cache_compact, k_cache_compact.data(), k_cache_compact.size());
-    cudaH2Dcpy(d_v_cache_compact, v_cache_compact.data(), v_cache_compact.size());
-
-    cudaMalloc(&d_k_cache, k_cache_out.size() * sizeof(float));
-    cudaMalloc(&d_v_cache, v_cache_out.size() * sizeof(float));
-    cudaMemset(d_k_cache, 0, k_cache_out.size() * sizeof(float));
-    cudaMemset(d_v_cache, 0, v_cache_out.size() * sizeof(float));
-
-    cudaMalloc(&d_decoder_outputs, decoder_outputs.size() * sizeof(float));
-
-    int *d_batch_to_compact_idx;
-    cudaMalloc(&d_batch_to_compact_idx, batch_to_compact_idx.size() * sizeof(int));
-    cudaH2Dcpy(d_batch_to_compact_idx, batch_to_compact_idx.data(), batch_to_compact_idx.size());
-
-    const size_t cache_stride_dst = max_seq_len * hidden_dimension;
-    const size_t cache_stride_src = seq_len * hidden_dimension;
-    for (size_t ite = 0; ite < (batch_size / local_batch_size); ite++) {
-        for (size_t l = 0; l < num_layer; l++) {
-
-            const float *k_cache_offset = d_k_cache_compact + (l * compact_size + ite * local_batch_size) * cache_stride_src;
-            const float *v_cache_offset = d_v_cache_compact + (l * compact_size + ite * local_batch_size) * cache_stride_src;
-
-            invokeUnCompactCaches(d_k_cache + l * batch_size * cache_stride_dst,
-                                  d_v_cache + l * batch_size * cache_stride_dst,
-                                  k_cache_offset,
-                                  v_cache_offset,
-                                  d_batch_to_compact_idx,
-                                  batch_size,
-                                  num_head,
-                                  max_seq_len,
-                                  seq_len,
-                                  size_per_head,
-                                  local_batch_size,
-                                  ite);
-        }
-    }
-
-    invokeUnCompactOutputs(d_decoder_outputs,
-                           d_compact_decoder_outputs,
-                           d_batch_to_compact_idx,
-                           batch_size,
-                           cache_stride_src);
-
-    cudaD2Hcpy(decoder_outputs.data(), d_decoder_outputs, decoder_outputs.size());
-    cudaD2Hcpy(k_cache_out.data(), d_k_cache, k_cache_out.size());
-    cudaD2Hcpy(v_cache_out.data(), d_v_cache, v_cache_out.size());
-
-    for (size_t i = 0; i < batch_size; i++) {
-        for (size_t t = 0; t < seq_len; t++) {
-            for (size_t h = 0; h < hidden_dimension; h++) {
-                EXPECT_TRUE(decoder_outputs[(i * seq_len + t) * hidden_dimension] ==
-                            compact_decoder_outputs[(batch_to_compact_idx[i] * seq_len + t) * hidden_dimension]);
-            }
-        }
-    }
-
-    size_t x_size = (16 / sizeof(float));
-    for (size_t l = 0; l < num_layer; l++) {
-        for (size_t i = 0; i < batch_size; i++) {
-            for (size_t h = 0; h < num_head; h++) {
-                for (size_t dh = 0; dh < size_per_head / x_size; dh++) {
-                    for (size_t t = 0; t < seq_len; t++) {
-                        for (size_t x = 0; x < x_size; x++) {
-                            auto src = batch_to_compact_idx[i];
-                            EXPECT_TRUE(
-                                    k_cache_out[((((l * batch_size + i  ) * num_head + h) * (size_per_head / x_size) + dh) *
-                                        max_seq_len + t) * x_size + x] ==
-                                    k_cache_compact[((((l * compact_size + src) * num_head + h) * (size_per_head / x_size) + dh) *
-                                        seq_len + t) * x_size + x]);
-                        }
-                    }
-                }
-            }
-        }
-    }
-
-    for (size_t l = 0; l < num_layer; l++) {
-        for (size_t i = 0; i < batch_size; i++) {
-            for (size_t h = 0; h < num_head; h++) {
-                for (size_t t = 0; t < seq_len; t++) {
-                    for (size_t dh = 0; dh < size_per_head; dh++) {
-                        auto src = batch_to_compact_idx[i];
-                        EXPECT_TRUE(
-                                v_cache_out[(((l * batch_size + i  ) * num_head + h) * max_seq_len + t) * size_per_head + dh] ==
-                                v_cache_compact[(((l * compact_size + src) * num_head + h) * seq_len + t) * size_per_head + dh]);
-                    }
-                }
-            }
-        }
-    }
-
-    cudaFree(d_compact_decoder_outputs);
-    cudaFree(d_k_cache_compact);
-    cudaFree(d_v_cache_compact);
-    cudaFree(d_k_cache);
-    cudaFree(d_v_cache);
-    cudaFree(d_decoder_outputs);
-    cudaFree(d_batch_to_compact_idx);
-
-    return EXIT_SUCCESS;
-}
diff --git a/tests/csrc/unittests/test_int8.cu b/tests/csrc/unittests/test_int8.cu
deleted file mode 100644
index 6831c56ea129c09b9150f76d6ca6b35c5119bce9..0000000000000000000000000000000000000000
--- a/tests/csrc/unittests/test_int8.cu
+++ /dev/null
@@ -1,95 +0,0 @@
-#include <algorithm>
-#include <iostream>
-#include <math.h>
-#include <stdlib.h>
-#include <string>
-#include <vector>
-
-#include "src/turbomind/kernels/transpose_int8_kernels.h"
-#include "src/turbomind/utils/Tensor.h"
-#include "src/turbomind/utils/cuda_utils.h"
-#include "src/turbomind/utils/memory_utils.h"
-
-#include <algorithm>
-#include <iostream>
-#include <random>
-
-#include "gtest_utils.h"
-
-using namespace turbomind;
-
-class Int8TestSuite: public FtTestBase {
-
-public:
-    void SetUp() override
-    {
-        FtTestBase::SetUp();
-    }
-    void TearDown() override
-    {
-        FtTestBase::TearDown();
-    }
-
-protected:
-    using FtTestBase::stream;
-    using FtTestBase::allocator;
-
-    struct cudaDeviceProp prop;
-
-    void testTransposition();
-};
-
-void fill_tensor_random(Tensor a)
-{
-    const size_t                          num_elems = a.size();
-    std::vector<int8_t>                   host_values(num_elems);
-    std::uniform_int_distribution<int8_t> int8_random(-128, 127);
-    std::mt19937                          rng(0);
-
-    std::generate(host_values.begin(), host_values.end(), [&int8_random, &rng]() { return int8_random(rng); });
-    cudaH2Dcpy(a.getPtr<int8_t>(), host_values.data(), num_elems);
-}
-
-void reference_transpose_host(std::vector<int8_t>& a_t_host, const Tensor& a)
-{
-    std::vector<int8_t> a_host(a.size());
-    cudaD2Hcpy(a_host.data(), a.getPtr<int8_t>(), a.size());
-
-    for (unsigned int i = 0; i < a.shape[0]; i++) {
-        for (unsigned int j = 0; j < a.shape[1]; j++) {
-            a_t_host[j * a.shape[0] + i] = a_host[i * a.shape[1] + j];
-        }
-    }
-}
-
-void Int8TestSuite::testTransposition()
-{
-    const int m = 32;
-    const int k = 2048;
-    const int n = 2048;
-
-    int8_t *a_data, *a_t_data;
-
-    cudaMalloc(&a_data, m * k * sizeof(int8_t));
-    Tensor a{MEMORY_GPU, TYPE_INT8, {32, 2048}, a_data};
-    fill_tensor_random(a);
-
-    cudaMalloc(&a_t_data, k * m * sizeof(int8_t));
-    Tensor a_t{MEMORY_GPU, TYPE_INT8, {2048, 32}, a_t_data};
-
-    std::vector<int8_t> a_t_host_ref(a_t.size());
-    reference_transpose_host(a_t_host_ref, a);
-
-    invokeTransposeInt8Tensor(a_t, a);
-    bool result = checkResult("", a_t.getPtr<int8_t>(), a_t_host_ref.data(), a_t.size());
-
-    cudaFree(a_data);
-    cudaFree(a_t_data);
-
-    EXPECT_TRUE(result);
-}
-
-TEST_F(Int8TestSuite, TranspositionCorrectness)
-{
-    this->testTransposition();
-}
diff --git a/tests/csrc/unittests/test_logprob_kernels.cu b/tests/csrc/unittests/test_logprob_kernels.cu
deleted file mode 100644
index 16fd2fb3276c5c13113ed058f88140ad63f7847e..0000000000000000000000000000000000000000
--- a/tests/csrc/unittests/test_logprob_kernels.cu
+++ /dev/null
@@ -1,307 +0,0 @@
-#include <assert.h>
-#include <float.h>
-#include <math.h>
-#include <stdexcept>
-#include <tuple>
-#include <vector>
-#ifdef __linux__
-#include <sys/time.h>
-#endif
-#include "src/turbomind/kernels/logprob_kernels.h"
-#include "src/turbomind/utils/allocator.h"
-#include "src/turbomind/utils/cuda_utils.h"
-#include "src/turbomind/utils/logger.h"
-#include "src/turbomind/utils/memory_utils.h"
-
-#include "gtest_utils.h"
-
-using namespace turbomind;
-
-////////////////////////////////////////////////////////////////////////////////////
-
-struct LogProbKernelTestParam {
-    size_t max_input_length;
-    size_t batch_size;
-    size_t vocab_size;
-    size_t beam_width;
-
-    std::string toString()
-    {
-        return fmtstr("LogProbKernelTestParam[max_input_length=%ld, batch=%ld, vocab=%ld, beam_width=%ld]",
-                      max_input_length,
-                      batch_size,
-                      vocab_size,
-                      beam_width);
-    }
-};
-
-/////////////////////////////////// Unittests //////////////////////////////////////////
-template<typename T>
-class LogProbKernelTest: public FtTestBase {
-
-protected:
-    void computeCumLogProbs(float*       cum_log_probs,
-                            float*       log_probs,
-                            const T*     logits,
-                            const int*   input_ids,
-                            const int*   input_lengths,
-                            const size_t max_input_length,
-                            const size_t batch_size,
-                            const size_t vocab_size,
-                            const size_t vocab_size_padded)
-    {
-        for (size_t step = 0; step < max_input_length; ++step) {
-            for (size_t i = 0; i < batch_size; ++i) {
-                if ((int)step == 0) {
-                    if (log_probs != nullptr) {
-                        log_probs[i] = 0.0f;
-                    }
-                    cum_log_probs[i] = 0.0f;
-                }
-                else if ((int)step < input_lengths[i]) {
-                    size_t   step_offset = (step - 1) * batch_size * vocab_size_padded;
-                    const T* vec         = logits + step_offset + i * vocab_size_padded;
-                    float    max_logits  = -FLT_MAX;
-                    for (size_t v = 0; v < vocab_size; ++v) {
-                        float val = static_cast<float>(vec[v]);
-                        if (val > max_logits) {
-                            max_logits = val;
-                        }
-                    }
-                    float sum = 0.0f;
-                    for (size_t v = 0; v < vocab_size; ++v) {
-                        sum += expf(static_cast<float>(vec[v]) - max_logits);
-                    }
-                    int   token_id = input_ids[step * batch_size + i];
-                    float log_prob = static_cast<float>(vec[token_id]) - max_logits - log(sum);
-                    if (log_probs != nullptr) {
-                        log_probs[step * batch_size + i] = log_prob;
-                    }
-                    cum_log_probs[i] += log_prob;
-                }
-            }
-        }
-    }
-
-    void computeCumLogProbsBatchFirst(float*       cum_log_probs,
-                                      float*       log_probs,
-                                      const T*     logits,
-                                      const int*   input_ids,
-                                      const int*   input_lengths,
-                                      const size_t max_input_length,
-                                      const size_t batch_size,
-                                      const size_t vocab_size,
-                                      const size_t vocab_size_padded)
-    {
-        for (size_t i = 0; i < batch_size; ++i) {
-            size_t batch_offset = i * max_input_length * vocab_size_padded;
-            for (size_t step = 0; step < max_input_length; ++step) {
-                if ((int)step == 0) {
-                    if (log_probs != nullptr) {
-                        log_probs[i * max_input_length] = 0.0f;
-                    }
-                    cum_log_probs[i] = 0.0f;
-                }
-                else if ((int)step < input_lengths[i]) {
-                    const T* vec        = logits + batch_offset + (step - 1) * vocab_size_padded;
-                    float    max_logits = -FLT_MAX;
-                    for (size_t v = 0; v < vocab_size; ++v) {
-                        float val = static_cast<float>(vec[v]);
-                        if (val > max_logits) {
-                            max_logits = val;
-                        }
-                    }
-                    float sum = 0.0f;
-                    for (size_t v = 0; v < vocab_size; ++v) {
-                        sum += expf(static_cast<float>(vec[v]) - max_logits);
-                    }
-                    int   token_id = input_ids[i * max_input_length + step];
-                    float log_prob = static_cast<float>(vec[token_id]) - max_logits - log(sum);
-                    if (log_probs != nullptr) {
-                        log_probs[i * max_input_length + step] = log_prob;
-                    }
-                    cum_log_probs[i] += log_prob;
-                }
-            }
-        }
-    }
-
-public:
-    void runTest(LogProbKernelTestParam param)
-    {
-        size_t max_input_length = param.max_input_length;
-        size_t batchxbeam       = param.batch_size * param.beam_width;
-        size_t vocab_size       = param.vocab_size;
-        // Make multiple of 8 as GPT does.
-        size_t vocab_size_padded = static_cast<size_t>(ceil(vocab_size / 8.f) * 8);
-
-        // input values
-        T*   h_logits        = new T[max_input_length * batchxbeam * vocab_size];
-        int* h_input_ids     = new int[max_input_length * batchxbeam];
-        int* h_input_lengths = new int[batchxbeam];
-
-        // output buffers
-        float* expected_cum_log_probs = new float[batchxbeam];
-
-        // initialize host buffers
-        initRandom(h_logits, max_input_length * batchxbeam * vocab_size, -10.0f / vocab_size, -1.0f);
-        initRandomInt(h_input_ids, max_input_length * batchxbeam, 0, vocab_size);
-        initRandomInt(h_input_lengths, batchxbeam, 1, max_input_length + 1);
-        memset(expected_cum_log_probs, 0, sizeof(float) * batchxbeam);
-
-        // device buffers
-        T*   d_logits = reinterpret_cast<T*>(allocator->malloc(sizeof(T) * max_input_length * batchxbeam * vocab_size));
-        int* d_input_ids       = reinterpret_cast<int*>(allocator->malloc(sizeof(int) * max_input_length * batchxbeam));
-        int* d_input_lengths   = reinterpret_cast<int*>(allocator->malloc(sizeof(int) * batchxbeam));
-        float* d_cum_log_probs = reinterpret_cast<float*>(allocator->malloc(sizeof(float) * batchxbeam));
-
-        // initialize device buffers
-        cudaH2Dcpy(d_logits, h_logits, max_input_length * batchxbeam * vocab_size);
-        cudaH2Dcpy(d_input_ids, h_input_ids, max_input_length * batchxbeam);
-        cudaH2Dcpy(d_input_lengths, h_input_lengths, batchxbeam);
-        deviceFill(d_cum_log_probs, batchxbeam, 0.0f);
-
-        size_t workspace_size = sizeof(float) * max_input_length * batchxbeam;
-        void*  workspace      = allocator->malloc(workspace_size);
-        invokeLogProbFromLogits(d_cum_log_probs,
-                                d_logits,
-                                d_input_ids,
-                                d_input_lengths,
-                                max_input_length,
-                                batchxbeam,
-                                vocab_size,
-                                vocab_size_padded,
-                                workspace,
-                                workspace_size,
-                                stream,
-                                false);
-        computeCumLogProbs(expected_cum_log_probs,
-                           nullptr,
-                           h_logits,
-                           h_input_ids,
-                           h_input_lengths,
-                           max_input_length,
-                           batchxbeam,
-                           vocab_size,
-                           vocab_size_padded);
-        bool passed = checkResult(param.toString(), d_cum_log_probs, expected_cum_log_probs, batchxbeam);
-        EXPECT_TRUE(passed);
-
-        TM_LOG_DEBUG("free host buffers");
-        delete[] expected_cum_log_probs;
-        delete[] h_input_lengths;
-        delete[] h_input_ids;
-        delete[] h_logits;
-    }
-
-    void runBatchFirstTest(LogProbKernelTestParam param)
-    {
-        size_t max_input_length = param.max_input_length;
-        size_t batchxbeam       = param.batch_size * param.beam_width;
-        size_t vocab_size       = param.vocab_size;
-        // Make multiple of 8 as GPT does.
-        size_t vocab_size_padded = static_cast<size_t>(ceil(vocab_size / 8.f) * 8);
-
-        // input values
-        T*   h_logits        = new T[max_input_length * batchxbeam * vocab_size_padded];
-        int* h_input_ids     = new int[max_input_length * batchxbeam];
-        int* h_input_lengths = new int[batchxbeam];
-
-        // output buffers
-        float* expected_cum_log_probs = new float[batchxbeam];
-
-        // initialize host buffers
-        initRandom(h_logits, max_input_length * batchxbeam * vocab_size_padded, -10.0f / vocab_size, -1.0f);
-        initRandomInt(h_input_ids, max_input_length * batchxbeam, 0, vocab_size);
-        initRandomInt(h_input_lengths, batchxbeam, 1, max_input_length + 1);
-        memset(expected_cum_log_probs, 0, sizeof(float) * batchxbeam);
-
-        // device buffers
-        T* d_logits =
-            reinterpret_cast<T*>(allocator->malloc(sizeof(T) * max_input_length * batchxbeam * vocab_size_padded));
-        int*   d_input_ids     = reinterpret_cast<int*>(allocator->malloc(sizeof(int) * max_input_length * batchxbeam));
-        int*   d_input_lengths = reinterpret_cast<int*>(allocator->malloc(sizeof(int) * batchxbeam));
-        float* d_cum_log_probs = reinterpret_cast<float*>(allocator->malloc(sizeof(float) * batchxbeam));
-
-        // initialize device buffers
-        cudaH2Dcpy(d_logits, h_logits, max_input_length * batchxbeam * vocab_size_padded);
-        cudaH2Dcpy(d_input_ids, h_input_ids, max_input_length * batchxbeam);
-        cudaH2Dcpy(d_input_lengths, h_input_lengths, batchxbeam);
-        check_cuda_error(cudaMemset(d_cum_log_probs, 0, sizeof(float) * batchxbeam));
-
-        size_t workspace_size = sizeof(float) * max_input_length * batchxbeam;
-        void*  workspace      = allocator->malloc(workspace_size);
-        invokeLogProbFromLogits(d_cum_log_probs,
-                                d_logits,
-                                d_input_ids,
-                                d_input_lengths,
-                                max_input_length,
-                                batchxbeam,
-                                vocab_size,
-                                vocab_size_padded,
-                                workspace,
-                                workspace_size,
-                                stream,
-                                true);
-
-        computeCumLogProbsBatchFirst(expected_cum_log_probs,
-                                     nullptr,
-                                     h_logits,
-                                     h_input_ids,
-                                     h_input_lengths,
-                                     max_input_length,
-                                     batchxbeam,
-                                     vocab_size,
-                                     vocab_size_padded);
-        std::string tag    = param.toString() + (std::is_same<T, float>::value ? " (fp32)" : " (fp16)");
-        bool        passed = checkResult(tag.c_str(), d_cum_log_probs, expected_cum_log_probs, batchxbeam);
-        EXPECT_TRUE(passed);
-
-        delete[] expected_cum_log_probs;
-        delete[] h_input_lengths;
-        delete[] h_input_ids;
-        delete[] h_logits;
-    }
-};
-
-TYPED_TEST_SUITE(LogProbKernelTest, FloatAndHalfTypes);
-
-TYPED_TEST(LogProbKernelTest, SingleStep)
-{
-    this->runTest({1, 32, 16, 1});
-}
-
-TYPED_TEST(LogProbKernelTest, AccumLongStep129)
-{
-    this->runTest({129, 8, 50211, 1});
-}
-
-TYPED_TEST(LogProbKernelTest, AccumLongStep1023)
-{
-    this->runTest({1023, 8, 5001, 1});
-}
-
-TYPED_TEST(LogProbKernelTest, AccumLongStep4096)
-{
-    this->runTest({4096, 8, 5001, 1});
-}
-
-TYPED_TEST(LogProbKernelTest, BatchFirstSingleStep)
-{
-    this->runBatchFirstTest({1, 32, 16, 1});
-}
-
-TYPED_TEST(LogProbKernelTest, BatchFirstAccumLongStep129)
-{
-    this->runBatchFirstTest({129, 8, 50211, 1});
-}
-
-TYPED_TEST(LogProbKernelTest, BatchFirstAccumLongStep1023)
-{
-    this->runBatchFirstTest({1023, 8, 5001, 1});
-}
-
-TYPED_TEST(LogProbKernelTest, BatchFirstAccumLongStep4096)
-{
-    this->runBatchFirstTest({4096, 8, 5001, 1});
-}
diff --git a/tests/csrc/unittests/test_penalty_kernels.cu b/tests/csrc/unittests/test_penalty_kernels.cu
deleted file mode 100644
index 86d23f44e6a96d3c089b4066bbf65e909015ecfc..0000000000000000000000000000000000000000
--- a/tests/csrc/unittests/test_penalty_kernels.cu
+++ /dev/null
@@ -1,680 +0,0 @@
-/*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <algorithm>  // std::min, std::max
-#include <iostream>   // snprintf
-#include <math.h>     // expf, log
-#include <stdexcept>
-#include <stdlib.h>   // rand
-#include <string>     // std::string
-#include <unordered_map>
-#include <vector>     // std::vector
-
-#include <cublasLt.h>
-#include <cublas_v2.h>
-#include <cuda_runtime.h>
-
-#include "gtest_utils.h"
-#include "src/turbomind/kernels/penalty_types.h"
-#include "src/turbomind/kernels/sampling_penalty_kernels.h"
-#include "src/turbomind/utils/cuda_utils.h"
-#include "src/turbomind/utils/memory_utils.h"
-
-using namespace turbomind;
-
-struct TemperatureTestParam {
-    size_t batch_size;
-    size_t vocab_size;
-    float* temperatures;
-    size_t temperatures_size;
-
-    std::string toString()
-    {
-        return fmtstr("TemperatureTestParam[batch=%ld, vocab=%ld, temperatures=%s]",
-                      batch_size,
-                      vocab_size,
-                      arr2str(temperatures, temperatures_size).c_str());
-    }
-};
-
-size_t pad_vocab_size(size_t vocab_size, size_t pad = 8)
-{
-    return (vocab_size + pad - 1) / pad * pad;
-}
-
-template<typename T>
-void applyRepetitonPenalty(T*           logits,
-                           const int*   output_ids,
-                           const int*   input_lengths,
-                           const float  repetition_penalty,
-                           const size_t step,
-                           const size_t max_input_length,
-                           const size_t batch_size,
-                           const size_t vocab_size,
-                           const size_t vocab_size_padded)
-{
-    bool* penalized = new bool[vocab_size];
-    for (size_t i = 0; i < batch_size; ++i) {
-        std::fill_n(penalized, vocab_size, false);
-        size_t length = std::min<int>(step, input_lengths[i]);
-        size_t offset = i * vocab_size_padded;
-        for (size_t t = 0; t < step; ++t) {
-            if (t >= (size_t)input_lengths[i] && t < max_input_length) {
-                continue;
-            }
-            int token_id = output_ids[i + t * batch_size];
-            if (!penalized[token_id]) {
-                float logit = static_cast<float>(logits[offset + token_id]);
-                logits[offset + token_id] =
-                    static_cast<T>(logit < 0.0f ? logit * repetition_penalty : logit / repetition_penalty);
-                penalized[token_id] = true;
-            }
-        }
-    }
-    delete[] penalized;
-}
-
-template<typename T>
-void batchApplyRepetitonPenalty(T*           logits,
-                                const int*   output_ids,
-                                const int*   input_lengths,
-                                const float* repetition_penalties,
-                                const size_t step,
-                                const size_t max_input_length,
-                                const size_t batch_size,
-                                const size_t vocab_size,
-                                const size_t vocab_size_padded)
-{
-    bool* penalized = new bool[vocab_size];
-    for (size_t i = 0; i < batch_size; ++i) {
-        float repetition_penalty = repetition_penalties[i];
-        std::fill_n(penalized, vocab_size, false);
-        size_t offset = i * vocab_size_padded;
-        for (size_t t = 0; t < step; ++t) {
-            if (t >= (size_t)input_lengths[i] && t < max_input_length) {
-                continue;
-            }
-            int token_id = output_ids[i + t * batch_size];
-            if (!penalized[token_id]) {
-                float logit = static_cast<float>(logits[offset + token_id]);
-                logits[offset + token_id] =
-                    static_cast<T>(logit < 0.0f ? logit * repetition_penalty : logit / repetition_penalty);
-                penalized[token_id] = true;
-            }
-        }
-    }
-    delete[] penalized;
-}
-
-template<typename T>
-void initLogitsAndBias(
-    T* logits, T* bias, const size_t batch_size, const size_t vocab_size, const size_t vocab_size_padded)
-{
-    initRandom(logits, batch_size * vocab_size_padded, -5.0f, 5.0f);
-    if (bias != nullptr) {
-        initRandom(bias, vocab_size, -5.0f, 5.0f);
-    }
-    bool is_half = sizeof(T) == 2;
-    for (size_t i = 0; i < batch_size; ++i) {
-        for (size_t j = 0; j < vocab_size_padded; ++j) {
-            if (j >= vocab_size) {
-                logits[i * vocab_size_padded + j] = static_cast<T>(is_half ? -65504.f : -FLT_MAX);
-                if (bias != nullptr && i == 0) {
-                    bias[j] = (T)0.0f;
-                }
-            }
-        }
-    }
-}
-
-/////////////////////////////////// Tests //////////////////////////////////////////
-
-template<typename T>
-class TemperaturePenaltyTest: public FtTestBase {
-protected:
-    // Set up test
-    size_t batch_size_;
-    size_t vocab_size_;
-    size_t vocab_size_padded_;
-
-    T* h_logits_;
-    T* h_bias_;
-    T* d_logits_;
-    T* d_bias_;
-
-    float* d_temperatures_;
-
-    void subsetup(TemperatureTestParam param)
-    {
-        batch_size_        = param.batch_size;
-        vocab_size_        = param.vocab_size;
-        vocab_size_padded_ = pad_vocab_size(vocab_size_);
-
-        h_logits_ = new T[batch_size_ * vocab_size_padded_];
-        h_bias_   = new T[vocab_size_padded_];
-        initLogitsAndBias(h_logits_, h_bias_, batch_size_, vocab_size_, vocab_size_padded_);
-
-        d_logits_ = reinterpret_cast<T*>(allocator->malloc(sizeof(T) * batch_size_ * vocab_size_padded_));
-        d_bias_   = reinterpret_cast<T*>(allocator->malloc(sizeof(T) * vocab_size_padded_));
-        cudaAutoCpy(d_logits_, h_logits_, batch_size_ * vocab_size_padded_, stream);
-        cudaAutoCpy(d_bias_, h_bias_, vocab_size_padded_, stream);
-        if (param.temperatures_size > 1) {
-            ASSERT_EQ(param.temperatures_size, param.batch_size) << "Invalid test configuration.";
-            d_temperatures_ = reinterpret_cast<float*>(allocator->malloc(sizeof(T) * param.temperatures_size));
-            cudaAutoCpy(d_temperatures_, param.temperatures, batch_size_, stream);
-        }
-    }
-
-    void subteardown()
-    {
-        delete[] h_logits_;
-        delete[] h_bias_;
-    }
-
-    void computeReference(T*           logits,
-                          const T*     bias,
-                          const float* temperatures,
-                          const size_t temperatures_size,
-                          const size_t batch_size,
-                          const size_t vocab_size,
-                          const size_t vocab_size_padded)
-    {
-        for (size_t i = 0; i < batch_size; ++i) {
-            float temperature = temperatures_size > 1 ? temperatures[i] : temperatures[0];
-            ASSERT_GT(temperature, 0.0f) << "temperature should be positive but got " << temperature;
-            for (size_t j = 0; j < vocab_size; ++j) {
-                size_t index = i * vocab_size_padded + j;
-                float  logit = static_cast<float>(logits[index]);
-                if (bias != nullptr) {
-                    logit += static_cast<float>(bias[j]);
-                }
-                logits[index] = static_cast<T>(logit / temperature);
-            }
-        }
-    }
-
-public:
-    void runTest(TemperatureTestParam param)
-    {
-        subsetup(param);
-        // Do test
-        if (param.temperatures_size == 1) {
-            invokeApplyTemperaturePenalty(
-                d_logits_, d_bias_, param.temperatures[0], batch_size_, vocab_size_, vocab_size_padded_, stream);
-        }
-        else {
-            invokeBatchApplyTemperaturePenalty(
-                d_logits_, d_bias_, d_temperatures_, batch_size_, vocab_size_, vocab_size_padded_, stream);
-        }
-        computeReference(h_logits_,
-                         h_bias_,
-                         param.temperatures,
-                         param.temperatures_size,
-                         batch_size_,
-                         vocab_size_,
-                         vocab_size_padded_);
-        bool passed = checkResult(param.toString(), d_logits_, h_logits_, batch_size_ * vocab_size_padded_);
-        EXPECT_TRUE(passed);
-        subteardown();
-    }
-
-    void runConsistencyTest(TemperatureTestParam param)
-    {
-        // Set up test
-        ASSERT_EQ(param.temperatures_size, 1) << "A consistency test assumes temperatures_size=1";
-        subsetup(param);
-
-        // Run a single runtime value case.
-        invokeApplyTemperaturePenalty(
-            d_logits_, d_bias_, param.temperatures[0], batch_size_, vocab_size_, vocab_size_padded_, stream);
-
-        float  temperature    = param.temperatures[0];
-        float* h_temperatures = new float[batch_size_];
-        for (size_t i = 0; i < batch_size_; ++i) {
-            h_temperatures[i] = temperature;
-        }
-        d_temperatures_ = reinterpret_cast<float*>(allocator->malloc(sizeof(T) * batch_size_));
-        cudaAutoCpy(d_temperatures_, h_temperatures, batch_size_, stream);
-
-        T* d_logits_batch = reinterpret_cast<T*>(allocator->malloc(sizeof(T) * batch_size_ * vocab_size_padded_));
-        T* d_bias_batch   = reinterpret_cast<T*>(allocator->malloc(sizeof(T) * vocab_size_padded_));
-        cudaAutoCpy(d_logits_batch, h_logits_, batch_size_ * vocab_size_padded_, stream);
-        cudaAutoCpy(d_bias_batch, h_bias_, vocab_size_padded_, stream);
-
-        invokeBatchApplyTemperaturePenalty(
-            d_logits_batch, d_bias_batch, d_temperatures_, batch_size_, vocab_size_, vocab_size_padded_, stream);
-        bool passed =
-            checkResult(param.toString(), d_logits_, d_logits_batch, batch_size_ * vocab_size_padded_, true, true);
-        EXPECT_TRUE(passed);
-
-        // Tear down test
-        delete[] h_temperatures;
-        subteardown();
-    }
-};
-
-// Since a compiler doesn't correctly catch the use of a variable inside gtest,
-// we carefully suppress a compile warning message.
-#pragma nv_diag_suppress 177
-
-TYPED_TEST_SUITE(TemperaturePenaltyTest, FloatAndHalfTypes);
-
-TYPED_TEST(TemperaturePenaltyTest, NoPenalty)
-{
-    float temperature = 1.0f;
-    this->runTest({6, 4, &temperature, 1});
-}
-
-TYPED_TEST(TemperaturePenaltyTest, LessThanOne)
-{
-    float temperature = 0.53f;
-    this->runTest({6, 4, &temperature, 1});
-}
-
-TYPED_TEST(TemperaturePenaltyTest, GreaterThaneOne)
-{
-    float temperature = 2.01f;
-    this->runTest({6, 4, &temperature, 1});
-}
-
-TYPED_TEST(TemperaturePenaltyTest, LargeVocab)
-{
-    float temperature = 2.01f;
-    this->runTest({6, 50001, &temperature, 1});
-}
-
-TYPED_TEST(TemperaturePenaltyTest, BatchNoPenalty)
-{
-    size_t batch_size   = 6;
-    float* temperatures = new float[batch_size];
-    for (size_t i = 0; i < batch_size; ++i) {
-        temperatures[i] = 1.0f;
-    }
-    this->runTest({batch_size, 4, temperatures, batch_size});
-}
-
-TYPED_TEST(TemperaturePenaltyTest, BatchLessThanOne)
-{
-    size_t batch_size   = 6;
-    float* temperatures = new float[batch_size];
-    for (size_t i = 0; i < batch_size; ++i) {
-        temperatures[i] = 0.53f;
-    }
-    this->runTest({batch_size, 4, temperatures, batch_size});
-}
-
-TYPED_TEST(TemperaturePenaltyTest, BatchGreaterThaneOne)
-{
-    size_t batch_size   = 6;
-    float* temperatures = new float[batch_size];
-    for (size_t i = 0; i < batch_size; ++i) {
-        temperatures[i] = 2.01f;
-    }
-    this->runTest({batch_size, 4, temperatures, batch_size});
-}
-
-TYPED_TEST(TemperaturePenaltyTest, BatchMixed)
-{
-    size_t batch_size   = 6;
-    float* temperatures = new float[batch_size];
-    for (size_t i = 0; i < batch_size; ++i) {
-        temperatures[i] = i % 2 == 0 ? 2.01f : 0.53f;
-    }
-    this->runTest({batch_size, 4, temperatures, batch_size});
-}
-
-TYPED_TEST(TemperaturePenaltyTest, Consistency)
-{
-    float temperature = 2.01f;
-    this->runConsistencyTest({6, 4, &temperature, 1});
-}
-
-struct RepetitionPenaltyTestCase {
-    size_t                batch_size;
-    size_t                vocab_size;
-    size_t                max_input_length;
-    float*                repetition_penalties;
-    size_t                repetition_penalties_size;
-    RepetitionPenaltyType repetition_penalty_type;
-
-    std::string toString()
-    {
-        static const std::unordered_map<RepetitionPenaltyType, std::string> typestr_map{
-            {RepetitionPenaltyType::Additive, "additive"},
-            {RepetitionPenaltyType::Multiplicative, "multiplicative"},
-            {RepetitionPenaltyType::None, "none"}};
-        return fmtstr("RepetitionPenaltyTestCase[batch=%ld, vocab=%ld, max_input_length=%ld, "
-                      "repetition_penalties=%s, repetition_penalty_type=%s]",
-                      batch_size,
-                      vocab_size,
-                      max_input_length,
-                      arr2str(repetition_penalties, repetition_penalties_size).c_str(),
-                      typestr_map.at(repetition_penalty_type).c_str());
-    }
-};
-
-template<typename T>
-class RepetitionPenaltyTest: public FtTestBase {
-protected:
-    // Set up test
-    size_t batch_size_;
-    size_t vocab_size_;
-    size_t vocab_size_padded_;
-    size_t max_input_length_;
-    size_t sequence_length_;
-    size_t step_;
-
-    T*   h_logits_;
-    T*   h_bias_;
-    int* h_output_ids_;
-    int* h_input_lengths_;
-
-    T*   d_logits_;
-    T*   d_bias_;
-    int* d_output_ids_;
-    int* d_input_lengths_;
-
-    float* d_repetition_penalties_;
-
-    void subsetup(RepetitionPenaltyTestCase param)
-    {
-        batch_size_        = param.batch_size;
-        vocab_size_        = param.vocab_size;
-        vocab_size_padded_ = pad_vocab_size(vocab_size_);
-        max_input_length_  = param.max_input_length;
-        sequence_length_   = 2 * max_input_length_;  // input + output
-        step_              = sequence_length_ * 0.7;
-
-        h_logits_        = new T[batch_size_ * vocab_size_padded_];
-        h_bias_          = new T[vocab_size_padded_];
-        h_output_ids_    = new int[sequence_length_ * batch_size_];
-        h_input_lengths_ = new int[batch_size_];
-        initLogitsAndBias(h_logits_, h_bias_, batch_size_, vocab_size_, vocab_size_padded_);
-        initRandomInt(h_output_ids_, sequence_length_ * batch_size_, 0, vocab_size_);
-        initRandomInt(h_input_lengths_, batch_size_, 1, max_input_length_);
-
-        d_logits_        = reinterpret_cast<T*>(allocator->malloc(sizeof(T) * batch_size_ * vocab_size_padded_));
-        d_bias_          = reinterpret_cast<T*>(allocator->malloc(sizeof(T) * vocab_size_padded_));
-        d_output_ids_    = reinterpret_cast<int*>(allocator->malloc(sizeof(int) * sequence_length_ * batch_size_));
-        d_input_lengths_ = reinterpret_cast<int*>(allocator->malloc(sizeof(int) * batch_size_));
-
-        cudaAutoCpy(d_logits_, h_logits_, batch_size_ * vocab_size_padded_, stream);
-        cudaAutoCpy(d_bias_, h_bias_, vocab_size_padded_, stream);
-        cudaAutoCpy(d_output_ids_, h_output_ids_, sequence_length_ * batch_size_, stream);
-        cudaAutoCpy(d_input_lengths_, h_input_lengths_, batch_size_, stream);
-        if (param.repetition_penalties_size > 1) {
-            ASSERT_EQ(param.repetition_penalties_size, param.batch_size) << "Invalid test configuration.";
-            d_repetition_penalties_ =
-                reinterpret_cast<float*>(allocator->malloc(sizeof(T) * param.repetition_penalties_size));
-            cudaAutoCpy(d_repetition_penalties_, param.repetition_penalties, batch_size_, stream);
-        }
-    }
-
-    void subteardown()
-    {
-        delete[] h_logits_;
-        delete[] h_bias_;
-        delete[] h_output_ids_;
-        delete[] h_input_lengths_;
-    }
-
-    void computeReference(T*                          logits,
-                          const int*                  output_ids,
-                          const int*                  input_lengths,
-                          const float*                repetition_penalties,
-                          const size_t                repetition_penalties_size,
-                          const RepetitionPenaltyType repetition_penalty_type,
-                          const size_t                step,
-                          const size_t                max_input_length,
-                          const size_t                batch_size,
-                          const size_t                vocab_size,
-                          const size_t                vocab_size_padded)
-    {
-        bool* penalized = new bool[vocab_size];
-        for (size_t i = 0; i < batch_size; ++i) {
-            float repetition_penalty =
-                repetition_penalties_size > 1 ? repetition_penalties[i] : repetition_penalties[0];
-
-            std::fill_n(penalized, vocab_size, false);
-            size_t offset = i * vocab_size_padded;
-            for (size_t t = 0; t < step; ++t) {
-                if (t >= (size_t)input_lengths[i] && t < max_input_length) {
-                    continue;
-                }
-                int token_id = output_ids[i + t * batch_size];
-                if (!penalized[token_id]) {
-                    float logit = static_cast<float>(logits[offset + token_id]);
-                    switch (repetition_penalty_type) {
-                        case RepetitionPenaltyType::Additive:
-                            logits[offset + token_id] = static_cast<T>(logit - repetition_penalty);
-                            break;
-                        case RepetitionPenaltyType::Multiplicative:
-                            logits[offset + token_id] =
-                                static_cast<T>(logit < 0.0f ? logit * repetition_penalty : logit / repetition_penalty);
-                            break;
-                        case RepetitionPenaltyType::None:
-                            // None. do nothing.
-                            break;
-                        default:
-                            throw std::domain_error("Invalid repetition penalty type.");
-                    }
-                    penalized[token_id] = true;
-                }
-            }
-        }
-        delete[] penalized;
-    }
-
-public:
-    void runTest(RepetitionPenaltyTestCase param)
-    {
-        subsetup(param);
-        // Do test
-        if (param.repetition_penalties_size == 1) {
-            invokeApplyRepetitionPenalty(d_logits_,
-                                         param.repetition_penalties[0],
-                                         nullptr,
-                                         d_output_ids_,
-                                         batch_size_,
-                                         batch_size_,
-                                         vocab_size_,
-                                         vocab_size_padded_,
-                                         d_input_lengths_,
-                                         max_input_length_,
-                                         step_,
-                                         param.repetition_penalty_type,
-                                         stream);
-        }
-        else {
-            invokeBatchApplyRepetitionPenalty(d_logits_,
-                                              d_repetition_penalties_,
-                                              d_output_ids_,
-                                              batch_size_,
-                                              batch_size_,
-                                              vocab_size_padded_,
-                                              d_input_lengths_,
-                                              max_input_length_,
-                                              step_,
-                                              param.repetition_penalty_type,
-                                              stream);
-        }
-        computeReference(h_logits_,
-                         h_output_ids_,
-                         h_input_lengths_,
-                         param.repetition_penalties,
-                         param.repetition_penalties_size,
-                         param.repetition_penalty_type,
-                         step_,
-                         max_input_length_,
-                         batch_size_,
-                         vocab_size_,
-                         vocab_size_padded_);
-        bool passed = checkResult(param.toString(), d_logits_, h_logits_, batch_size_ * vocab_size_padded_);
-        EXPECT_TRUE(passed);
-        subteardown();
-    }
-
-    void runConsistencyTest(RepetitionPenaltyTestCase param)
-    {
-        // Set up test
-        ASSERT_EQ(param.repetition_penalties_size, 1) << "A consistency test assumes repetition_penalties_size=1";
-        subsetup(param);
-
-        // Run a single runtime value case.
-        invokeApplyRepetitionPenalty(d_logits_,
-                                     param.repetition_penalties[0],
-                                     nullptr,
-                                     d_output_ids_,
-                                     batch_size_,
-                                     batch_size_,
-                                     vocab_size_,
-                                     vocab_size_padded_,
-                                     d_input_lengths_,
-                                     max_input_length_,
-                                     step_,
-                                     param.repetition_penalty_type,
-                                     stream);
-
-        float* h_repetition_penalties = new float[batch_size_];
-        for (size_t i = 0; i < batch_size_; ++i) {
-            h_repetition_penalties[i] = param.repetition_penalties[0];
-        }
-        d_repetition_penalties_ = reinterpret_cast<float*>(allocator->malloc(sizeof(T) * batch_size_));
-        cudaAutoCpy(d_repetition_penalties_, h_repetition_penalties, batch_size_, stream);
-
-        T* d_logits_batch = reinterpret_cast<T*>(allocator->malloc(sizeof(T) * batch_size_ * vocab_size_padded_));
-        cudaAutoCpy(d_logits_batch, h_logits_, batch_size_ * vocab_size_padded_, stream);
-        invokeBatchApplyRepetitionPenalty(d_logits_batch,
-                                          d_repetition_penalties_,
-                                          d_output_ids_,
-                                          batch_size_,
-                                          batch_size_,
-                                          vocab_size_padded_,
-                                          d_input_lengths_,
-                                          max_input_length_,
-                                          step_,
-                                          param.repetition_penalty_type,
-                                          stream);
-        bool passed =
-            checkResult(param.toString(), d_logits_, d_logits_batch, batch_size_ * vocab_size_padded_, true, true);
-        EXPECT_TRUE(passed);
-
-        // Tear down test
-        delete[] h_repetition_penalties;
-        subteardown();
-    }
-};
-
-TYPED_TEST_SUITE(RepetitionPenaltyTest, FloatAndHalfTypes);
-
-TYPED_TEST(RepetitionPenaltyTest, NoPenalty)
-{
-    float repetition_penalty = 1.0f;
-    this->runTest({6, 4, 5, &repetition_penalty, 1, RepetitionPenaltyType::Multiplicative});
-}
-
-TYPED_TEST(RepetitionPenaltyTest, LessThanOne)
-{
-    float repetition_penalty = 0.53f;
-    this->runTest({6, 4, 5, &repetition_penalty, 1, RepetitionPenaltyType::Multiplicative});
-}
-
-TYPED_TEST(RepetitionPenaltyTest, GreaterThaneOne)
-{
-    float repetition_penalty = 2.01f;
-    this->runTest({6, 4, 5, &repetition_penalty, 1, RepetitionPenaltyType::Multiplicative});
-}
-
-TYPED_TEST(RepetitionPenaltyTest, LargeVocab)
-{
-    float repetition_penalty = 2.01f;
-    this->runTest({6, 50001, 1003, &repetition_penalty, 1, RepetitionPenaltyType::Multiplicative});
-}
-
-TYPED_TEST(RepetitionPenaltyTest, BatchNoPenalty)
-{
-    size_t batch_size           = 6;
-    float* repetition_penalties = new float[batch_size];
-    for (size_t i = 0; i < batch_size; ++i) {
-        repetition_penalties[i] = 1.0f;
-    }
-    this->runTest({batch_size, 4, 5, repetition_penalties, batch_size, RepetitionPenaltyType::Multiplicative});
-}
-
-TYPED_TEST(RepetitionPenaltyTest, BatchLessThanOne)
-{
-    size_t batch_size           = 6;
-    float* repetition_penalties = new float[batch_size];
-    for (size_t i = 0; i < batch_size; ++i) {
-        repetition_penalties[i] = 0.53f;
-    }
-    this->runTest({batch_size, 4, 5, repetition_penalties, batch_size, RepetitionPenaltyType::Multiplicative});
-}
-
-TYPED_TEST(RepetitionPenaltyTest, BatchGreaterThaneOne)
-{
-    size_t batch_size   = 6;
-    float* temperatures = new float[batch_size];
-    for (size_t i = 0; i < batch_size; ++i) {
-        temperatures[i] = 2.01f;
-    }
-    this->runTest({batch_size, 4, 5, temperatures, batch_size, RepetitionPenaltyType::Multiplicative});
-}
-
-TYPED_TEST(RepetitionPenaltyTest, BatchMixed)
-{
-    size_t batch_size           = 6;
-    float* repetition_penalties = new float[batch_size];
-    for (size_t i = 0; i < batch_size; ++i) {
-        repetition_penalties[i] = i % 2 == 0 ? 2.01f : 0.53f;
-    }
-    this->runTest({batch_size, 4, 5, repetition_penalties, batch_size, RepetitionPenaltyType::Multiplicative});
-}
-
-TYPED_TEST(RepetitionPenaltyTest, Consistency)
-{
-    float repetition_penalty = 2.01f;
-    this->runConsistencyTest({6, 4, 5, &repetition_penalty, 1, RepetitionPenaltyType::Multiplicative});
-}
-
-TYPED_TEST(RepetitionPenaltyTest, PenaltyTypeAdditive)
-{
-    size_t batch_size           = 6;
-    float* repetition_penalties = new float[batch_size];
-    for (size_t i = 0; i < batch_size; ++i) {
-        repetition_penalties[i] = i % 2 == 0 ? 2.01f : 0.53f;
-    }
-    this->runTest({batch_size, 4, 5, repetition_penalties, batch_size, RepetitionPenaltyType::Additive});
-}
-
-TYPED_TEST(RepetitionPenaltyTest, PenaltyTypeAdditiveHasDefaultValueZero)
-{
-    float repetition_penalty = 1.0f;
-    this->runTest({6, 4, 5, &repetition_penalty, 1, RepetitionPenaltyType::Additive});
-}
-
-TYPED_TEST(RepetitionPenaltyTest, PenaltyTypeAdditiveHasDefaultValueZero2)
-{
-    size_t batch_size           = 6;
-    float* repetition_penalties = new float[batch_size];
-    for (size_t i = 0; i < batch_size; ++i) {
-        repetition_penalties[i] = i % 2 == 0 ? 1.0f : 0.0f;
-    }
-    this->runTest({batch_size, 4, 5, repetition_penalties, batch_size, RepetitionPenaltyType::Additive});
-}
-
-// Turn on the warning message.
-#pragma nv_diag_suppress 177
diff --git a/tests/csrc/unittests/test_sampling.cu b/tests/csrc/unittests/test_sampling.cu
deleted file mode 100644
index 8f687248f07beef1f571f5f0e9d00773e58d4f2a..0000000000000000000000000000000000000000
--- a/tests/csrc/unittests/test_sampling.cu
+++ /dev/null
@@ -1,1522 +0,0 @@
-#include <algorithm>  // std::min, std::max
-#include <iostream>   // snprintf
-#include <math.h>     // expf, log
-#include <stdlib.h>   // rand
-#include <string>     // std::string
-#include <vector>     // std::vector
-
-#include <cublasLt.h>
-#include <cublas_v2.h>
-#include <cuda_runtime.h>
-
-#include "src/turbomind/kernels/sampling_topk_kernels.h"
-#include "src/turbomind/layers/DynamicDecodeLayer.h"
-#include "src/turbomind/layers/sampling_layers/TopKSamplingLayer.h"
-#include "src/turbomind/macro.h"
-#include "src/turbomind/utils/Tensor.h"
-#include "src/turbomind/utils/cublasMMWrapper.h"
-#include "src/turbomind/utils/cuda_utils.h"
-#include "src/turbomind/utils/memory_utils.h"
-
-#include "tests/unittests/unittest_utils.h"
-
-using namespace turbomind;
-
-struct TestCase {
-    std::string name;
-    size_t      batch_size;
-    size_t      vocab_size;
-    size_t      beam_width;
-    size_t      top_k;
-    float       top_p;
-    size_t      output_len;
-
-    std::string toString()
-    {
-        char buf[100];
-        snprintf(buf,
-                 sizeof(buf),
-                 "TestCase[name=%s, batch=%ld, vocab=%ld, beam=%ld, k=%ld, p=%3.1f, output_len=%ld]",
-                 name.c_str(),
-                 batch_size,
-                 vocab_size,
-                 beam_width,
-                 top_k,
-                 top_p,
-                 output_len);
-        return buf;
-    }
-
-    void print()
-    {
-        TM_LOG_INFO(toString());
-    }
-};
-
-template<typename T>
-void computeProb(T* probs, T* logits, int batch_size, int vocab_size)
-{
-    // Compute the log probability from logits.
-    //   logits = batch_size x vocab_size vector.
-    //   logprobs = log(softmax(logits)) (softmax along with vocab dimension)
-    for (int bidx = 0; bidx < batch_size; ++bidx) {
-        float sum = 0.0f;
-        for (int i = 0; i < vocab_size; ++i) {
-            sum += expf((float)logits[bidx * vocab_size + i]);
-        }
-        for (int i = 0; i < vocab_size; ++i) {
-            int idx    = bidx * vocab_size + i;
-            probs[idx] = static_cast<T>(expf((float)logits[idx]) / (sum + EPSILON));
-        }
-    }
-}
-
-template<typename T>
-void computeLogProb(T* logprobs, T* logits, int batch_size, int vocab_size)
-{
-    // Compute the log probability from logits.
-    //   logits = batch_size x vocab_size vector.
-    //   logprobs = log(softmax(logits)) (softmax along with vocab dimension)
-    for (int bidx = 0; bidx < batch_size; ++bidx) {
-        float sum = 0.0f;
-        for (int i = 0; i < vocab_size; ++i) {
-            sum += expf(logits[bidx * vocab_size + i]);
-        }
-        for (int i = 0; i < vocab_size; ++i) {
-            int idx       = bidx * vocab_size + i;
-            logprobs[idx] = static_cast<T>(logf(expf(logits[idx]) / (sum + EPSILON) + EPSILON));
-        }
-    }
-}
-
-/////////////////////////////////// Tests //////////////////////////////////////////
-
-template<typename T>
-void testCumLogProbComputation(TestCase tc)
-{
-
-    bool is_fp32 = std::is_same<T, float>::value;
-
-    size_t             beam_width = tc.beam_width;
-    uint               top_k      = tc.top_k;
-    float              top_p      = tc.top_p;
-    unsigned long long seed       = 0;
-    // use default values having no effect.
-    float temperature        = 1.0f;
-    float len_penalty        = 0.0f;
-    float repetition_penalty = 1.0f;
-
-    size_t batch_size     = tc.batch_size;
-    size_t vocab_size     = tc.vocab_size;
-    int    end_id         = 3;
-    size_t max_input_len  = 0;  // has no effect.
-    size_t max_output_len = tc.output_len;
-    size_t max_seq_len    = max_input_len + max_output_len;
-
-    struct cudaDeviceProp prop;
-    check_cuda_error(cudaGetDeviceProperties(&prop, 0));
-
-    cudaStream_t     stream;
-    cublasHandle_t   cublas_handle;
-    cublasLtHandle_t cublaslt_handle;
-    check_cuda_error(cudaStreamCreate(&stream));
-    check_cuda_error(cublasCreate(&cublas_handle));
-    check_cuda_error(cublasLtCreate(&cublaslt_handle));
-    check_cuda_error(cublasSetStream(cublas_handle, stream));
-
-    cublasAlgoMap                   cublas_algo_map(GEMM_CONFIG);
-    Allocator<AllocatorType::CUDA>* allocator = new Allocator<AllocatorType::CUDA>(getDevice());
-    allocator->setStream(stream);
-
-    std::mutex*      cublas_wrapper_mutex = new std::mutex();
-    cublasMMWrapper* cublas_wrapper =
-        new cublasMMWrapper(cublas_handle, cublaslt_handle, stream, &cublas_algo_map, cublas_wrapper_mutex, allocator);
-
-    DynamicDecodeLayer<T>* dynamic_decode_layer = new DynamicDecodeLayer<T>(vocab_size,
-                                                                            vocab_size,
-                                                                            end_id,
-                                                                            stream,
-                                                                            cublas_wrapper,
-                                                                            allocator,
-                                                                            false,   // is_free_buffer_after_forward
-                                                                            &prop);  // cuda_device_prop
-
-    const DataType data_type   = getTensorType<T>();
-    size_t         logits_size = batch_size * beam_width * vocab_size;
-    T*             logits_buf  = reinterpret_cast<T*>(allocator->malloc(sizeof(T) * logits_size, true));
-
-    // Logit values in the host of shape ((batch_size x beam) x vocab_size) where beam = 1.
-    T*     h_logits               = new T[batch_size * beam_width * vocab_size];
-    T*     h_probs                = new T[batch_size * beam_width * vocab_size];
-    T*     h_log_probs            = new T[batch_size * beam_width * vocab_size];
-    float* h_cum_log_probs        = new float[batch_size * beam_width];
-    float* h_output_log_probs     = new float[max_output_len * batch_size * beam_width];
-    float* expected_cum_log_probs = new float[batch_size * beam_width];
-    initRandom(h_logits, batch_size * beam_width * vocab_size, -10.0f / vocab_size, -1.0f);
-    computeProb(h_probs, h_logits, batch_size * beam_width, vocab_size);
-    computeLogProb(h_log_probs, h_logits, batch_size * beam_width, vocab_size);
-    memset(expected_cum_log_probs, 0, sizeof(float) * batch_size * beam_width);
-
-#ifndef NDEBUG
-    TM_LOG_DEBUG("logit values");
-    printMatrixWithLimit(h_logits, batch_size * beam_width, vocab_size, vocab_size, false);
-    TM_LOG_DEBUG("\nprob values");
-    printMatrixWithLimit(h_probs, batch_size * beam_width, vocab_size, vocab_size, false);
-    TM_LOG_DEBUG("\nlog-prob values");
-    printMatrixWithLimit(h_log_probs, batch_size * beam_width, vocab_size, vocab_size, false);
-#endif
-
-    int*   tiled_input_lengths_buf = reinterpret_cast<int*>(allocator->malloc(sizeof(int) * batch_size * beam_width));
-    float* cum_log_probs = reinterpret_cast<float*>(allocator->malloc(sizeof(float) * batch_size * beam_width));
-    float* output_log_probs =
-        reinterpret_cast<float*>(allocator->malloc(sizeof(float) * max_output_len * batch_size * beam_width));
-
-    int* output_ids   = reinterpret_cast<int*>(allocator->malloc(sizeof(int) * max_seq_len * batch_size * beam_width));
-    int* h_output_ids = new int[batch_size * beam_width];
-
-    int* end_ids = reinterpret_cast<int*>(allocator->malloc(sizeof(int) * batch_size));
-    deviceFill(end_ids, batch_size, end_id);
-
-    // Init by zero.
-    cudaMemset(cum_log_probs, 0, sizeof(float) * batch_size * beam_width);
-    cudaMemset(output_log_probs, 0, sizeof(float) * max_output_len * batch_size * beam_width);
-    cudaMemset(output_ids, 0, sizeof(int) * max_seq_len * batch_size * beam_width);
-
-    TensorMap input_tensors({{"random_seed", {MEMORY_CPU, TYPE_INT32, {1}, &seed}},
-                             {"runtime_top_k", {MEMORY_CPU, TYPE_UINT32, {1}, &top_k}},
-                             {"runtime_top_p", {MEMORY_CPU, TYPE_FP32, {1}, &top_p}},
-                             {"temperature", Tensor{MEMORY_CPU, TYPE_FP32, {1}, &temperature}},
-                             {"len_penalty", Tensor{MEMORY_CPU, TYPE_FP32, {1}, &len_penalty}},
-                             {"repetition_penalty", Tensor{MEMORY_CPU, TYPE_FP32, {1}, &repetition_penalty}}});
-    dynamic_decode_layer->setup(batch_size, beam_width, &input_tensors);
-
-    for (size_t step = max_input_len; step < max_output_len; ++step) {
-        uint ite = 0;
-        // Reset by the test value since the sampling layer internally update the logit buffer (making it log-prob).
-        cudaH2Dcpy(logits_buf, h_logits, logits_size);
-        TensorMap dynamic_decode_input_tensors(
-            {{"logits", Tensor{MEMORY_GPU, TYPE_FP32, {batch_size, beam_width, vocab_size}, logits_buf}},
-             {"embedding_bias", Tensor{MEMORY_GPU, data_type, {vocab_size}, nullptr}},
-             {"step", Tensor{MEMORY_CPU, TYPE_INT32, {1}, &step}},
-             {"max_input_length", Tensor{MEMORY_CPU, TYPE_INT32, {1}, &max_input_len}},
-             {"input_lengths", Tensor{MEMORY_GPU, TYPE_INT32, {batch_size, beam_width}, tiled_input_lengths_buf}},
-             {"ite", Tensor{MEMORY_CPU, TYPE_UINT32, {1}, &ite}},
-             {"local_batch_size", Tensor{MEMORY_CPU, TYPE_INT32, {1}, &batch_size}},
-             {"end_id", Tensor{MEMORY_GPU, TYPE_INT32, {batch_size}, end_ids}},
-             {"random_seed", {MEMORY_CPU, TYPE_UINT64, {1}, &seed}},
-             {"runtime_top_k", {MEMORY_CPU, TYPE_UINT32, {1}, &top_k}},
-             {"runtime_top_p", {MEMORY_CPU, TYPE_FP32, {1}, &top_p}},
-             {"temperature", Tensor{MEMORY_CPU, TYPE_FP32, {1}, &temperature}},
-             {"len_penalty", Tensor{MEMORY_CPU, TYPE_FP32, {1}, &len_penalty}},
-             {"repetition_penalty", Tensor{MEMORY_CPU, TYPE_FP32, {1}, &repetition_penalty}}});
-
-        // common outputs
-        TensorMap dynamic_decode_output_tensors(
-            {{"output_ids", Tensor{MEMORY_GPU, TYPE_INT32, {max_seq_len, batch_size, beam_width}, output_ids}},
-             {"finished", Tensor{MEMORY_GPU, TYPE_BOOL, {batch_size * beam_width}, nullptr}},
-             {"cum_log_probs", Tensor{MEMORY_GPU, TYPE_FP32, {batch_size * beam_width}, cum_log_probs}},
-             {"output_log_probs",
-              Tensor{MEMORY_GPU, TYPE_FP32, {max_seq_len, batch_size, beam_width}, output_log_probs}},
-             {"parent_ids", Tensor{MEMORY_GPU, TYPE_INT32, {max_seq_len, batch_size, beam_width}, nullptr}},
-             {"sequence_length", Tensor{MEMORY_GPU, TYPE_INT32, {batch_size * beam_width}, nullptr}},
-             // necessary for beam search.
-             {"tgt_cache_indirection",
-              Tensor{MEMORY_GPU, TYPE_INT32, {batch_size, beam_width, max_output_len}, nullptr}}});
-
-        dynamic_decode_layer->forward(&dynamic_decode_output_tensors, &dynamic_decode_input_tensors);
-
-        TM_LOG_DEBUG("Step %2d generated ids", step);
-        cudaD2Hcpy(
-            h_output_ids,
-            (int*)dynamic_decode_output_tensors.at("output_ids").getPtrWithOffset(step * (batch_size * beam_width)),
-            batch_size * beam_width);
-        cudaD2Hcpy(h_cum_log_probs, cum_log_probs, batch_size * beam_width);
-        cudaD2Hcpy(h_output_log_probs, output_log_probs, max_output_len * batch_size * beam_width);
-        for (size_t i = 0; i < batch_size * beam_width; ++i) {
-            int idx = i * vocab_size + h_output_ids[i];
-            expected_cum_log_probs[i] += (float)h_log_probs[idx];
-            TM_LOG_DEBUG("| step %2d batch %2d idx %7d id %6d | log-prob %9.4f (expt: %9.4f) "
-                         "| cum-log-prob %9.4f (expt: %9.4f) | prob %9.4e",
-                         (int)step,
-                         (int)i,
-                         (int)idx,
-                         (int)h_output_ids[i],
-                         h_output_log_probs[step * batch_size * beam_width + i],
-                         (float)h_log_probs[idx],
-                         h_cum_log_probs[i],
-                         expected_cum_log_probs[i],
-                         (float)h_probs[idx]);
-        }
-        TM_LOG_DEBUG("");
-
-#ifndef NDEBUG
-        // print output ids
-        for (size_t s = max_input_len; s < max_seq_len; ++s) {
-            cudaD2Hcpy(
-                h_output_ids,
-                (int*)dynamic_decode_output_tensors.at("output_ids").getPtrWithOffset(s * (batch_size * beam_width)),
-                batch_size * beam_width);
-            printf("%02d ", (int)s);
-            for (size_t b = 0; b < batch_size; ++b) {
-                printf("%3d ", (int)h_output_ids[b]);
-            }
-            printf("\n");
-        }
-#endif
-    }
-    std::string tag    = tc.toString() + (is_fp32 ? " (fp32)" : " (fp16)");
-    bool        passed = checkResult(tag, cum_log_probs, expected_cum_log_probs, batch_size * beam_width);
-    EXPECT_TRUE(passed);
-
-    delete[] expected_cum_log_probs;
-    delete[] h_output_log_probs;
-    delete[] h_cum_log_probs;
-    delete[] h_logits;
-    delete[] h_log_probs;
-    delete[] h_probs;
-    delete[] h_output_ids;
-
-    delete dynamic_decode_layer;
-    delete cublas_wrapper;
-    delete allocator;
-    check_cuda_error(cudaStreamDestroy(stream));
-    check_cuda_error(cublasDestroy(cublas_handle));
-    check_cuda_error(cublasLtDestroy(cublaslt_handle));
-}
-
-void printTensors(TensorMap* map, size_t limit = 8)
-{
-    TM_LOG_INFO("Tensors:");
-    for (auto& kv : *map) {
-        Tensor t = kv.second;
-        TM_LOG_INFO(" - %-18s : %s", kv.first.c_str(), t.toString().c_str());
-    }
-}
-
-template<typename T>
-class SamplingDecodeTest {
-private:
-    unsigned long long              seed           = 0;
-    const static unsigned long long max_seed       = 30;
-    const size_t                    batch_size     = 6;
-    const size_t                    beam_width     = 1;
-    const size_t                    batchxbeam     = batch_size * beam_width;
-    const size_t                    vocab_size     = 8;
-    const size_t                    max_input_len  = 0;  // has no effect.
-    const size_t                    max_output_len = 3;
-    const size_t                    max_seq_len    = max_input_len + max_output_len;
-    const int                       end_id         = vocab_size - 1;
-    const DataType                  data_type      = getTensorType<T>();
-
-    // vocab size 8 & length 3
-    T* test_input_logits;
-
-    Allocator<AllocatorType::CUDA>* allocator;
-    std::mutex*                     cublas_wrapper_mutex;
-    cublasMMWrapper*                cublas_wrapper;
-    DynamicDecodeLayer<T>*          dynamic_decode_layer;
-
-    int*   h_output_ids;
-    T*     h_logits;
-    T*     h_probs;
-    T*     h_log_probs;
-    float* h_cum_log_probs;
-    float* h_output_log_probs;
-
-    T*     d_logits;
-    int*   d_input_lengths;
-    float* d_cum_log_probs;
-    float* d_output_log_probs;
-    int*   d_output_ids;
-    int*   d_end_ids;
-
-    void setup(unsigned long long seed = 0)
-    {
-        this->seed = seed;
-        struct cudaDeviceProp prop;
-        check_cuda_error(cudaGetDeviceProperties(&prop, 0));
-        cudaStream_t     stream;
-        cublasHandle_t   cublas_handle;
-        cublasLtHandle_t cublaslt_handle;
-        check_cuda_error(cudaStreamCreate(&stream));
-        check_cuda_error(cublasCreate(&cublas_handle));
-        check_cuda_error(cublasLtCreate(&cublaslt_handle));
-        check_cuda_error(cublasSetStream(cublas_handle, stream));
-        cublasAlgoMap cublas_algo_map(GEMM_CONFIG);
-        allocator = new Allocator<AllocatorType::CUDA>(getDevice());
-        allocator->setStream(stream);
-        cublas_wrapper_mutex = new std::mutex();
-        cublas_wrapper       = new cublasMMWrapper(
-            cublas_handle, cublaslt_handle, stream, &cublas_algo_map, cublas_wrapper_mutex, allocator);
-        dynamic_decode_layer = new DynamicDecodeLayer<T>(vocab_size,
-                                                         vocab_size,
-                                                         end_id,
-                                                         stream,
-                                                         cublas_wrapper,
-                                                         allocator,
-                                                         false,   // is_free_buffer_after_forward
-                                                         &prop);  // cuda_device_prop
-
-        h_output_ids       = new int[batchxbeam];
-        h_logits           = new T[batchxbeam * vocab_size];
-        h_probs            = new T[batchxbeam * vocab_size];
-        h_log_probs        = new T[batchxbeam * vocab_size];
-        h_cum_log_probs    = new float[batchxbeam];
-        h_output_log_probs = new float[max_output_len * batchxbeam];
-
-        // prob = (0.4, 0.3, 0.2, 0.1, ...)
-        test_input_logits = new T[24]{
-            -0.9163,  -1.2040,  -1.6094,  -2.3026,  -FLT_MAX, -FLT_MAX, -FLT_MAX, -FLT_MAX,  // step 0
-            -FLT_MAX, -FLT_MAX, -FLT_MAX, -FLT_MAX, -0.9163,  -1.2040,  -1.6094,  -2.3026,   // step 1
-            -FLT_MAX, -FLT_MAX, -0.9163,  -1.2040,  -1.6094,  -2.3026,  -FLT_MAX, -FLT_MAX   // step 2
-        };
-
-        d_logits           = reinterpret_cast<T*>(allocator->malloc(sizeof(T) * batchxbeam * vocab_size, true));
-        d_input_lengths    = reinterpret_cast<int*>(allocator->malloc(sizeof(int) * batchxbeam));
-        d_cum_log_probs    = reinterpret_cast<float*>(allocator->malloc(sizeof(float) * batchxbeam));
-        d_output_log_probs = reinterpret_cast<float*>(allocator->malloc(sizeof(float) * max_output_len * batchxbeam));
-        d_output_ids       = reinterpret_cast<int*>(allocator->malloc(sizeof(int) * max_seq_len * batchxbeam));
-        d_end_ids          = reinterpret_cast<int*>(allocator->malloc(sizeof(int) * batchxbeam));
-
-        // Init by zero.
-        cudaMemset(d_cum_log_probs, 0, sizeof(float) * batchxbeam);
-        cudaMemset(d_output_log_probs, 0, sizeof(float) * max_output_len * batchxbeam);
-        cudaMemset(d_output_ids, 0, sizeof(int) * max_seq_len * batchxbeam);
-        deviceFill(d_end_ids, batchxbeam, end_id, stream);
-    }
-
-    void teardown()
-    {
-        delete[] test_input_logits;
-        delete[] h_output_ids;
-        delete[] h_logits;
-        delete[] h_probs;
-        delete[] h_log_probs;
-        delete[] h_cum_log_probs;
-        delete[] h_output_log_probs;
-        delete dynamic_decode_layer;
-        delete cublas_wrapper;
-        delete cublas_wrapper_mutex;
-        delete allocator;
-    }
-
-    TensorMap* createInputTensors(
-        int* topk, size_t topk_size, float* topp, size_t topp_size, float* temperature, float* repetition_penalty)
-    {
-        // construct common input tensors
-        TensorMap* input_tensors = new TensorMap();
-        if (topk != nullptr) {
-            input_tensors->insert({"runtime_top_k", {MEMORY_CPU, TYPE_INT32, {topk_size}, topk}});
-        }
-        if (topp != nullptr) {
-            input_tensors->insert({"runtime_top_p", {MEMORY_CPU, TYPE_FP32, {topp_size}, topp}});
-        }
-        if (temperature != nullptr) {
-            input_tensors->insert({"temperature", Tensor{MEMORY_CPU, TYPE_FP32, {1}, temperature}});
-        }
-        if (repetition_penalty != nullptr) {
-            input_tensors->insert({"repetition_penalty", Tensor{MEMORY_CPU, TYPE_FP32, {1}, repetition_penalty}});
-        }
-        input_tensors->insert(
-            {"logits", Tensor{MEMORY_GPU, TYPE_FP32, {batch_size, beam_width, vocab_size}, d_logits}});
-        input_tensors->insert({"embedding_bias", Tensor{MEMORY_GPU, data_type, {vocab_size}, nullptr}});
-        input_tensors->insert({"max_input_length", Tensor{MEMORY_CPU, TYPE_INT32, {1}, &max_input_len}});
-        input_tensors->insert(
-            {"input_lengths", Tensor{MEMORY_GPU, TYPE_INT32, {batch_size, beam_width}, d_input_lengths}});
-        input_tensors->insert({"end_id", Tensor{MEMORY_CPU, TYPE_INT32, {batchxbeam}, &d_end_ids}});
-        input_tensors->insert({"random_seed", Tensor{MEMORY_CPU, TYPE_UINT64, {1}, &seed}});
-        return input_tensors;
-    }
-
-    TensorMap* createOutputTensors()
-    {
-        // construct common output tensors
-        TensorMap* output_tensors = new TensorMap();
-        output_tensors->insert(
-            {"output_ids", Tensor{MEMORY_GPU, TYPE_INT32, {max_seq_len, batch_size, beam_width}, d_output_ids}});
-        output_tensors->insert({"finished", Tensor{MEMORY_GPU, TYPE_BOOL, {batch_size * beam_width}, nullptr}});
-        output_tensors->insert(
-            {"cum_log_probs", Tensor{MEMORY_GPU, TYPE_FP32, {batch_size * beam_width}, d_cum_log_probs}});
-        output_tensors->insert(
-            {"output_log_probs",
-             Tensor{MEMORY_GPU, TYPE_FP32, {max_seq_len, batch_size, beam_width}, d_output_log_probs}});
-        output_tensors->insert({"sequence_length", Tensor{MEMORY_GPU, TYPE_INT32, {batch_size * beam_width}, nullptr}});
-        return output_tensors;
-    }
-
-    void batchH2Dcpy(T* dst, T* src, size_t m, size_t n)
-    {
-        for (size_t i = 0; i < m; ++i) {
-            cudaH2Dcpy(dst + i * n, src, n);
-        }
-    }
-
-    bool checkResult(std::string name, int* d_output_ids, std::vector<std::set<int>>& expected_ids)
-    {
-        assert(expected_ids.size() == max_seq_len * batchxbeam);
-        int* h_output_ids = new int[max_seq_len * batchxbeam];
-        cudaD2Hcpy(h_output_ids, d_output_ids, max_seq_len * batchxbeam);
-        int failures = 0;
-        for (size_t i = 0; i < max_seq_len * batchxbeam; ++i) {
-            size_t        s     = i / batchxbeam;
-            size_t        b     = i % batchxbeam;
-            std::set<int> expts = expected_ids.at(i);
-            if (expts.count(h_output_ids[i]) == 0) {
-                if (failures < 10) {
-                    std::stringstream ss;
-                    ss << " - Fail " << name << " (step=" << s << ", batch=" << b << ") "
-                       << "actual=" << h_output_ids[i] << ", expected";
-                    for (auto& expt : expts) {
-                        ss << " " << expt;
-                    }
-                    TM_LOG_DEBUG("%s", ss.str().c_str());
-                }
-                ++failures;
-            }
-        }
-        delete[] h_output_ids;
-        TM_LOG_DEBUG("check...%6s : %s (failures: %d / %d)",
-                     failures == 0 ? "....OK" : "FAILED",
-                     name.c_str(),
-                     failures,
-                     max_seq_len * batchxbeam);
-        return failures == 0;
-    }
-
-    bool testSampling(std::string                name,
-                      std::vector<std::set<int>> expected_output_ids,
-                      int*                       top_ks,
-                      size_t                     top_k_size,
-                      float*                     top_ps,
-                      size_t                     top_p_size,
-                      float*                     temperature,
-                      float*                     repetition_penalty)
-    {
-        TM_LOG_INFO("Test %s", name.c_str());
-        std::string tag    = fmtstr("Test %s T=%s", name.c_str(), std::is_same<T, float>::value ? "fp32" : "fp16");
-        bool        passed = true;
-        for (unsigned long long seed = 0; seed < max_seed; ++seed) {
-            this->setup(seed);
-            size_t     step = max_input_len;
-            uint       ite  = 0;
-            TensorMap* input_tensors =
-                createInputTensors(top_ks, top_k_size, top_ps, top_p_size, temperature, repetition_penalty);
-            input_tensors->insert({"step", Tensor{MEMORY_CPU, TYPE_INT32, {1}, &step}});
-            input_tensors->insert({"ite", Tensor{MEMORY_CPU, TYPE_UINT32, {1}, &ite}});
-            input_tensors->insert({"local_batch_size", Tensor{MEMORY_CPU, TYPE_INT32, {1}, &batch_size}});
-            TensorMap* output_tensors = createOutputTensors();
-
-            dynamic_decode_layer->setup(batch_size, beam_width, input_tensors);
-            for (step = max_input_len; step < max_output_len; ++step) {
-                // Reset by the test value since the sampling layer internally update the logit buffer.
-                batchH2Dcpy(input_tensors->at("logits").getPtr<T>(),
-                            test_input_logits + step * vocab_size,
-                            batchxbeam,
-                            vocab_size);
-                dynamic_decode_layer->forward(output_tensors, input_tensors);
-            }
-            bool is_ok = checkResult(tag + fmtstr(" seed=%lld", seed), d_output_ids, expected_output_ids);
-            passed &= is_ok;
-#ifndef NDEBUG
-            if (!is_ok) {
-                TM_LOG_ERROR("actual output ids");
-                printMatrix(d_output_ids, max_seq_len, batch_size, batch_size, true);
-            }
-#endif
-            delete output_tensors;
-            delete input_tensors;
-            this->teardown();
-        }
-        TM_LOG_INFO("check...%6s : %s", passed ? "....OK" : "FAILED", tag.c_str());
-        return passed;
-    }
-
-    bool testSamplingWithLocalBatch(std::string                name,
-                                    std::vector<std::set<int>> expected_output_ids,
-                                    int*                       top_ks,
-                                    size_t                     top_k_size,
-                                    float*                     top_ps,
-                                    size_t                     top_p_size,
-                                    float*                     temperature,
-                                    float*                     repetition_penalty)
-    {
-        TM_LOG_INFO("Test %s", name.c_str());
-        std::string tag    = fmtstr("Test %s T=%s", name.c_str(), std::is_same<T, float>::value ? "fp32" : "fp16");
-        bool        passed = true;
-        size_t      local_batch_size = 2;
-        uint        ite              = 1;
-        for (unsigned long long seed = 0; seed < max_seed; ++seed) {
-            this->setup(seed);
-            size_t     step = max_input_len;
-            TensorMap* input_tensors =
-                createInputTensors(top_ks, top_k_size, top_ps, top_p_size, temperature, repetition_penalty);
-            input_tensors->insert({"step", Tensor{MEMORY_CPU, TYPE_INT32, {1}, &step}});
-            input_tensors->insert({"ite", Tensor{MEMORY_CPU, TYPE_UINT32, {1}, &ite}});
-            input_tensors->insert({"local_batch_size", Tensor{MEMORY_CPU, TYPE_INT32, {1}, &local_batch_size}});
-            TensorMap* output_tensors = createOutputTensors();
-
-            dynamic_decode_layer->setup(batch_size, beam_width, input_tensors);
-            for (step = max_input_len; step < max_output_len; ++step) {
-                // Reset by the test value since the sampling layer internally update the logit buffer.
-                batchH2Dcpy(input_tensors->at("logits").getPtr<T>(),
-                            test_input_logits + step * vocab_size,
-                            batchxbeam,
-                            vocab_size);
-                dynamic_decode_layer->forward(output_tensors, input_tensors);
-            }
-            bool is_ok = checkResult(tag + fmtstr(" seed=%lld", seed), d_output_ids, expected_output_ids);
-            passed &= is_ok;
-#ifndef NDEBUG
-            if (!is_ok) {
-                TM_LOG_ERROR("actual output ids");
-                printMatrix(d_output_ids, max_seq_len, batch_size, batch_size, true);
-            }
-#endif
-            delete output_tensors;
-            delete input_tensors;
-            this->teardown();
-        }
-        TM_LOG_INFO("check...%6s : %s", passed ? "....OK" : "FAILED", tag.c_str());
-        return passed;
-    }
-
-public:
-    void testTopK()
-    {
-        int                        top_k = 2;
-        std::vector<std::set<int>> expected_output_ids{
-            // batch
-            //  0       1       2       3       4       5
-            {0, 1},
-            {0, 1},
-            {0, 1},
-            {0, 1},
-            {0, 1},
-            {0, 1},  // step 0
-            {4, 5},
-            {4, 5},
-            {4, 5},
-            {4, 5},
-            {4, 5},
-            {4, 5},  // step 1
-            {2, 3},
-            {2, 3},
-            {2, 3},
-            {2, 3},
-            {2, 3},
-            {2, 3}  // step 2
-        };
-        bool passed = this->testSampling("TopK", expected_output_ids, &top_k, 1, nullptr, 0, nullptr, nullptr);
-        EXPECT_TRUE(true);
-    }
-
-    void testBatchTopK()
-    {
-        int*                       top_ks = new int[batch_size]{2, 1, 1, 2, 1, 1};
-        std::vector<std::set<int>> expected_output_ids{
-            // batch
-            //  0    1    2       3    4    5
-            {0, 1},
-            {0},
-            {0},
-            {0, 1},
-            {0},
-            {0},  // step 0
-            {4, 5},
-            {4},
-            {4},
-            {4, 5},
-            {4},
-            {4},  // step 1
-            {2, 3},
-            {2},
-            {2},
-            {2, 3},
-            {2},
-            {2}  // step 2
-        };
-        bool passed =
-            this->testSampling("BatchTopK", expected_output_ids, top_ks, batch_size, nullptr, 0, nullptr, nullptr);
-        delete[] top_ks;
-        EXPECT_TRUE(passed);
-    }
-
-    void testTopP()
-    {
-        float                      top_p = 0.3;
-        std::vector<std::set<int>> expected_output_ids{
-            // batch
-            {0},
-            {0},
-            {0},
-            {0},
-            {0},
-            {0},  // step 0
-            {4},
-            {4},
-            {4},
-            {4},
-            {4},
-            {4},  // step 1
-            {2},
-            {2},
-            {2},
-            {2},
-            {2},
-            {2}  // step 2
-        };
-        bool passed = this->testSampling("TopP", expected_output_ids, nullptr, 0, &top_p, 1, nullptr, nullptr);
-        EXPECT_TRUE(true);
-    }
-
-    void testBatchTopP()
-    {
-        float*                     top_ps = new float[batch_size]{0.3f, 0.5f, 0.5f, 0.3f, 0.5f, 0.5f};
-        std::vector<std::set<int>> expected_output_ids{
-            {0},
-            {0, 1},
-            {0, 1},
-            {0},
-            {0, 1},
-            {0, 1},  // step 0
-            {4},
-            {4, 5},
-            {4, 5},
-            {4},
-            {4, 5},
-            {4, 5},  // step 1
-            {2},
-            {2, 3},
-            {2, 3},
-            {2},
-            {2, 3},
-            {2, 3}  // step 2
-        };
-        bool passed =
-            this->testSampling("BatchTopP", expected_output_ids, nullptr, 0, top_ps, batch_size, nullptr, nullptr);
-        delete[] top_ps;
-        EXPECT_TRUE(passed);
-    }
-
-    void testTopKTopP()
-    {
-        int                        top_k = 2;
-        float                      top_p = 0.3;
-        std::vector<std::set<int>> expected_output_ids{
-            // batch
-            {0},
-            {0},
-            {0},
-            {0},
-            {0},
-            {0},  // step 0
-            {4},
-            {4},
-            {4},
-            {4},
-            {4},
-            {4},  // step 1
-            {2},
-            {2},
-            {2},
-            {2},
-            {2},
-            {2}  // step 2
-        };
-        bool passed = this->testSampling("TopP", expected_output_ids, &top_k, 1, &top_p, 1, nullptr, nullptr);
-        EXPECT_TRUE(true);
-    }
-
-    void testBatchTopKTopP()
-    {
-        std::string                name   = "BatchTopKTopP";
-        int*                       top_ks = new int[batch_size]{2, 2, 1, 2, 2, 1};
-        float                      top_p  = 0.3;
-        std::vector<std::set<int>> expected_output_ids{
-            // batch
-            {0},
-            {0},
-            {0},
-            {0},
-            {0},
-            {0},  // step 0
-            {4},
-            {4},
-            {4},
-            {4},
-            {4},
-            {4},  // step 1
-            {2},
-            {2},
-            {2},
-            {2},
-            {2},
-            {2}  // step 2
-        };
-        bool passed = this->testSampling(name, expected_output_ids, top_ks, batch_size, &top_p, 1, nullptr, nullptr);
-        delete[] top_ks;
-        EXPECT_TRUE(passed);
-    }
-
-    void testTopKBatchTopP()
-    {
-        std::string                name   = "TopKBatchTopP";
-        int                        top_k  = 2;
-        float*                     top_ps = new float[batch_size]{0.5, 0.3, 0.5, 0.5, 0.3, 0.5};
-        std::vector<std::set<int>> expected_output_ids{
-            // batch
-            {0, 1},
-            {0},
-            {0, 1},
-            {0, 1},
-            {0},
-            {0, 1},  // step 0
-            {4, 5},
-            {4},
-            {4, 5},
-            {4, 5},
-            {4},
-            {4, 5},  // step 1
-            {2, 3},
-            {2},
-            {2, 3},
-            {2, 3},
-            {2},
-            {2, 3}  // step 2
-        };
-        bool passed = this->testSampling(name, expected_output_ids, &top_k, 1, top_ps, batch_size, nullptr, nullptr);
-        delete[] top_ps;
-        EXPECT_TRUE(passed);
-    }
-
-    void testBatchTopKBatchTopP()
-    {
-        std::string                name   = "BatchTopKBatchTopP";
-        int*                       top_ks = new int[batch_size]{2, 2, 0, 2, 2, 0};
-        float*                     top_ps = new float[batch_size]{0.0, 0.3, 0.5, 0.0, 0.3, 0.5};
-        std::vector<std::set<int>> expected_output_ids{
-            // batch
-            {0, 1},
-            {0},
-            {0, 1},
-            {0, 1},
-            {0},
-            {0, 1},  // step 0
-            {4, 5},
-            {4},
-            {4, 5},
-            {4, 5},
-            {4},
-            {4, 5},  // step 1
-            {2, 3},
-            {2},
-            {2, 3},
-            {2, 3},
-            {2},
-            {2, 3}  // step 2
-        };
-        bool passed =
-            this->testSampling(name, expected_output_ids, top_ks, batch_size, top_ps, batch_size, nullptr, nullptr);
-        delete[] top_ks;
-        delete[] top_ps;
-        EXPECT_TRUE(passed);
-    }
-
-    void testInvalidArgsZeroTopK()
-    {
-        std::string                name  = "InvalidArgsZeroTopK";
-        int                        top_k = 0;
-        std::vector<std::set<int>> expected_output_ids{
-            // batch
-            {0},
-            {0},
-            {0},
-            {0},
-            {0},
-            {0},  // step 0
-            {4},
-            {4},
-            {4},
-            {4},
-            {4},
-            {4},  // step 1
-            {2},
-            {2},
-            {2},
-            {2},
-            {2},
-            {2}  // step 2
-        };
-        bool passed = this->testSampling(name, expected_output_ids, &top_k, 1, nullptr, 0, nullptr, nullptr);
-        EXPECT_TRUE(passed);
-    }
-
-    void testInvalidArgsZeroTopP()
-    {
-        std::string                name  = "InvalidArgsZeroTopP";
-        float                      top_p = 0;
-        std::vector<std::set<int>> expected_output_ids{
-            // batch
-            {0},
-            {0},
-            {0},
-            {0},
-            {0},
-            {0},  // step 0
-            {4},
-            {4},
-            {4},
-            {4},
-            {4},
-            {4},  // step 1
-            {2},
-            {2},
-            {2},
-            {2},
-            {2},
-            {2}  // step 2
-        };
-        bool passed = this->testSampling(name, expected_output_ids, nullptr, 0, &top_p, 1, nullptr, nullptr);
-        EXPECT_TRUE(passed);
-    }
-
-    void testInvalidArgsZeroTopKTopP()
-    {
-        std::string                name  = "InvalidArgsZeroTopKTopP";
-        int                        top_k = 0;
-        float                      top_p = 0;
-        std::vector<std::set<int>> expected_output_ids{
-            // batch
-            {0},
-            {0},
-            {0},
-            {0},
-            {0},
-            {0},  // step 0
-            {4},
-            {4},
-            {4},
-            {4},
-            {4},
-            {4},  // step 1
-            {2},
-            {2},
-            {2},
-            {2},
-            {2},
-            {2}  // step 2
-        };
-        bool passed = this->testSampling(name, expected_output_ids, &top_k, 1, &top_p, 1, nullptr, nullptr);
-        EXPECT_TRUE(passed);
-    }
-
-    void testInvalidArgsZeroBatchTopKTopP()
-    {
-        std::string                name   = "InvalidArgsZeroBatchTopKTopP";
-        int*                       top_ks = new int[batch_size]{0, 0, 0, 0, 0, 0};
-        float                      top_p  = 0;
-        std::vector<std::set<int>> expected_output_ids{
-            // batch
-            {0},
-            {0},
-            {0},
-            {0},
-            {0},
-            {0},  // step 0
-            {4},
-            {4},
-            {4},
-            {4},
-            {4},
-            {4},  // step 1
-            {2},
-            {2},
-            {2},
-            {2},
-            {2},
-            {2}  // step 2
-        };
-        bool passed = this->testSampling(name, expected_output_ids, top_ks, batch_size, &top_p, 1, nullptr, nullptr);
-        delete[] top_ks;
-        EXPECT_TRUE(passed);
-    }
-
-    void testInvalidArgsZeroTopKBatchTopP()
-    {
-        std::string                name   = "InvalidArgsZeroTopKBatchTopP";
-        int                        top_k  = 0;
-        float*                     top_ps = new float[batch_size]{0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f};
-        std::vector<std::set<int>> expected_output_ids{
-            // batch
-            {0},
-            {0},
-            {0},
-            {0},
-            {0},
-            {0},  // step 0
-            {4},
-            {4},
-            {4},
-            {4},
-            {4},
-            {4},  // step 1
-            {2},
-            {2},
-            {2},
-            {2},
-            {2},
-            {2}  // step 2
-        };
-        bool passed = this->testSampling(name, expected_output_ids, &top_k, 1, top_ps, batch_size, nullptr, nullptr);
-        delete[] top_ps;
-        EXPECT_TRUE(passed);
-    }
-
-    void testInvalidArgsBatchTopKContainZero()
-    {
-        std::string                name   = "InvalidArgsBatchTopKContainZero";
-        int*                       top_ks = new int[batch_size]{2, 1, 0, 0, 2, 1};
-        std::vector<std::set<int>> expected_output_ids{
-            // batch
-            {0, 1},
-            {0},
-            {0},
-            {0},
-            {0, 1},
-            {0},  // step 0
-            {4, 5},
-            {4},
-            {4},
-            {4},
-            {4, 5},
-            {4},  // step 1
-            {2, 3},
-            {2},
-            {2},
-            {2},
-            {2, 3},
-            {2}  // step 2
-        };
-        bool passed = this->testSampling(name, expected_output_ids, top_ks, batch_size, nullptr, 0, nullptr, nullptr);
-        delete[] top_ks;
-        EXPECT_TRUE(passed);
-    }
-
-    void testInvalidArgsBatchTopPContainZero()
-    {
-        std::string                name   = "InvalidArgsBatchTopPContainZero";
-        float*                     top_ps = new float[batch_size]{0.5f, 0.5f, 0.0f, 0.5f, 0.0f, 0.3f};
-        std::vector<std::set<int>> expected_output_ids{
-            // batch
-            {0, 1},
-            {0, 1},
-            {0},
-            {0, 1},
-            {0},
-            {0},  // step 0
-            {4, 5},
-            {4, 5},
-            {4},
-            {4, 5},
-            {4},
-            {4},  // step 1
-            {2, 3},
-            {2, 3},
-            {2},
-            {2, 3},
-            {2},
-            {2}  // step 2
-        };
-        bool passed = this->testSampling(name, expected_output_ids, nullptr, 0, top_ps, batch_size, nullptr, nullptr);
-        delete[] top_ps;
-        EXPECT_TRUE(passed);
-    }
-
-    void testInvalidArgsBatchTopKTopPContainZero()
-    {
-        std::string                name   = "InvalidArgsBatchTopKTopPContainZero";
-        int*                       top_ks = new int[batch_size]{2, 2, 1, 0, 2, 0};
-        float                      top_p  = 0.0;
-        std::vector<std::set<int>> expected_output_ids{
-            // batch
-            {0, 1},
-            {0, 1},
-            {0},
-            {0},
-            {0, 1},
-            {0},  // step 0
-            {4, 5},
-            {4, 5},
-            {4},
-            {4},
-            {4, 5},
-            {4},  // step 1
-            {2, 3},
-            {2, 3},
-            {2},
-            {2},
-            {2, 3},
-            {2}  // step 2
-        };
-        bool passed = this->testSampling(name, expected_output_ids, top_ks, batch_size, &top_p, 1, nullptr, nullptr);
-        delete[] top_ks;
-        EXPECT_TRUE(passed);
-    }
-
-    void testInvalidArgsTopKBatchTopPContainZero()
-    {
-        std::string                name   = "InvalidArgsTopKBatchTopPContainZero";
-        int                        top_k  = 0;
-        float*                     top_ps = new float[batch_size]{0.0, 0.3, 0.5, 0.0, 0.3, 0.5};
-        std::vector<std::set<int>> expected_output_ids{
-            // batch
-            {0},
-            {0},
-            {0, 1},
-            {0},
-            {0},
-            {0, 1},  // step 0
-            {4},
-            {4},
-            {4, 5},
-            {4},
-            {4},
-            {4, 5},  // step 1
-            {2},
-            {2},
-            {2, 3},
-            {2},
-            {2},
-            {2, 3}  // step 2
-        };
-        bool passed = this->testSampling(name, expected_output_ids, &top_k, 1, top_ps, batch_size, nullptr, nullptr);
-        delete[] top_ps;
-        EXPECT_TRUE(passed);
-    }
-
-    void testInvalidArgsBatchTopKBatchTopPContainZero()
-    {
-        std::string                name   = "InvalidArgsBatchTopKBatchTopPContainZero";
-        int*                       top_ks = new int[batch_size]{0, 2, 1, 2, 2, 0};
-        float*                     top_ps = new float[batch_size]{0.0, 0.3, 0.9, 0.0, 0.3, 0.5};
-        std::vector<std::set<int>> expected_output_ids{
-            // batch
-            {0},
-            {0},
-            {0},
-            {0, 1},
-            {0},
-            {0, 1},  // step 0
-            {4},
-            {4},
-            {4},
-            {4, 5},
-            {4},
-            {4, 5},  // step 1
-            {2},
-            {2},
-            {2},
-            {2, 3},
-            {2},
-            {2, 3}  // step 2
-        };
-        bool passed =
-            this->testSampling(name, expected_output_ids, top_ks, batch_size, top_ps, batch_size, nullptr, nullptr);
-        delete[] top_ks;
-        delete[] top_ps;
-        EXPECT_TRUE(passed);
-    }
-
-    void testLocalBatchBatchTopP()
-    {
-        std::string                name   = "LocalBatch_BatchTopP";
-        float*                     top_ps = new float[batch_size]{0.3f, 0.5f, 0.5f, 0.3f, 0.5f, 0.5f};
-        std::vector<std::set<int>> expected_output_ids{
-            {0},
-            {0},
-            {0, 1},
-            {0},
-            {0},
-            {0},  // step 0
-            {0},
-            {0},
-            {4, 5},
-            {4},
-            {0},
-            {0},  // step 1
-            {0},
-            {0},
-            {2, 3},
-            {2},
-            {0},
-            {0}  // step 2
-        };
-        bool passed = this->testSamplingWithLocalBatch(
-            name, expected_output_ids, nullptr, 0, top_ps, batch_size, nullptr, nullptr);
-        delete[] top_ps;
-        EXPECT_TRUE(passed);
-    }
-
-    void testLocalBatchBatchTopKBatchTopP()
-    {
-        std::string                name   = "LocalBatch_BatchTopKBatchTopP";
-        int*                       top_ks = new int[batch_size]{2, 2, 0, 2, 2, 0};
-        float*                     top_ps = new float[batch_size]{0.0, 0.3, 0.5, 0.0, 0.3, 0.5};
-        std::vector<std::set<int>> expected_output_ids{
-            // batch
-            {0},
-            {0},
-            {0, 1},
-            {0, 1},
-            {0},
-            {0},  // step 0
-            {0},
-            {0},
-            {4, 5},
-            {4, 5},
-            {0},
-            {0},  // step 1
-            {0},
-            {0},
-            {2, 3},
-            {2, 3},
-            {0},
-            {0}  // step 2
-        };
-        bool passed = this->testSamplingWithLocalBatch(
-            name, expected_output_ids, top_ks, batch_size, top_ps, batch_size, nullptr, nullptr);
-        delete[] top_ks;
-        delete[] top_ps;
-        EXPECT_TRUE(passed);
-    }
-
-    void testAll()
-    {
-        this->testTopK();
-        this->testTopP();
-        this->testTopKTopP();
-        this->testBatchTopK();
-        this->testBatchTopP();
-        this->testBatchTopKTopP();
-        this->testTopKBatchTopP();
-        this->testBatchTopKBatchTopP();
-        this->testInvalidArgsZeroTopK();
-        this->testInvalidArgsZeroTopP();
-        this->testInvalidArgsZeroBatchTopKTopP();
-        this->testInvalidArgsZeroTopKBatchTopP();
-        this->testInvalidArgsZeroTopKTopP();
-        this->testInvalidArgsBatchTopKContainZero();
-        this->testInvalidArgsBatchTopPContainZero();
-        this->testInvalidArgsBatchTopKTopPContainZero();
-        this->testInvalidArgsTopKBatchTopPContainZero();
-        this->testInvalidArgsBatchTopKBatchTopPContainZero();
-        this->testLocalBatchBatchTopP();
-        this->testLocalBatchBatchTopKBatchTopP();
-    }
-};
-
-__global__ void generateRandomNumber(unsigned int* vals, curandState_t* states, const int batch_size)
-{
-    int idx = threadIdx.x;
-    if (idx < batch_size) {
-        vals[idx] = curand(states + idx);
-    }
-}
-
-template<typename T>
-static inline bool isEqualInPeriod(T* vals, size_t size, size_t period_size)
-{
-    // The same seed produces the same random number.
-    for (size_t i = 0; i + period_size - 1 < size; i += period_size) {
-        for (size_t j = 1; j < period_size; ++j) {
-            if (vals[i] != vals[i + j]) {
-                TM_LOG_INFO(" **** *** ** * [%d] %d <> [%d] %d", i, vals[i], i + j, vals[i + j]);
-                return false;
-            }
-        }
-    }
-    return true;
-}
-
-template<typename T>
-static inline bool isEqualInPeriod(T* vals, size_t size, size_t period_size, size_t except)
-{
-    // The same seed produces the same random number.
-    for (size_t i = 0; i + period_size - 1 < size; i += period_size) {
-        for (size_t j = 1; j < period_size; ++j) {
-            if (j != except && vals[i] != vals[i + j]) {
-                TM_LOG_INFO(" **** *** ** * [%d] %d <> [%d] %d", i, vals[i], i + j, vals[i + j]);
-                return false;
-            }
-        }
-    }
-    return true;
-}
-
-void testCuandBatchInitialize(const size_t batch_size)
-{
-    cudaStream_t stream;
-    cudaStreamCreate(&stream);
-
-    curandState_t* curand_states;
-    check_cuda_error(cudaMalloc(&curand_states, sizeof(curandState_t) * batch_size));
-    unsigned long long* h_random_seeds = new unsigned long long[batch_size];
-    const size_t        period_size    = 3;
-    for (size_t i = 0; i < batch_size; ++i) {
-        h_random_seeds[i] = i / period_size;
-    }
-    unsigned long long* d_random_seeds;
-    check_cuda_error(cudaMalloc(&d_random_seeds, sizeof(unsigned long long) * batch_size));
-    check_cuda_error(
-        cudaMemcpy(d_random_seeds, h_random_seeds, sizeof(unsigned long long) * batch_size, cudaMemcpyHostToDevice));
-
-    // Initialize curand states.
-    invokeCurandBatchInitialize(curand_states, batch_size, d_random_seeds, stream);
-    sync_check_cuda_error();
-
-    // Generate random numbers using initialized curand states.
-    unsigned int* d_rand_vals;
-    unsigned int* h_rand_vals = new unsigned int[batch_size];
-    check_cuda_error(cudaMalloc(&d_rand_vals, sizeof(unsigned int) * batch_size));
-    generateRandomNumber<<<1, batch_size, 0, stream>>>(d_rand_vals, curand_states, batch_size);
-    check_cuda_error(
-        cudaMemcpyAsync(h_rand_vals, d_rand_vals, sizeof(unsigned int) * batch_size, cudaMemcpyDeviceToHost, stream));
-    check_cuda_error(cudaStreamSynchronize(stream));
-
-    // The same seed produces the same random number.
-    bool passed = isEqualInPeriod(h_rand_vals, batch_size, period_size);
-    TM_LOG_INFO("CuandBatchInitTest check....... : %s", passed ? "OK" : "FAILED");
-    EXPECT_TRUE(passed);
-
-    delete h_rand_vals;
-    delete h_random_seeds;
-
-    check_cuda_error(cudaFree(d_rand_vals));
-    check_cuda_error(cudaFree(d_random_seeds));
-    check_cuda_error(cudaFree(curand_states));
-    check_cuda_error(cudaStreamDestroy(stream));
-}
-
-template<typename T, bool SINGLE_RANDOM_SEED, bool HAS_DIFF_ARGS, bool USE_LOCAL_BATCH>
-void testSamplingLayerCurandInit(TestCase tc)
-{
-    TM_LOG_DEBUG("testSamplingLayerCurandInit %s", tc.toString().c_str());
-    const DataType data_type = getTensorType<T>();
-
-    const size_t beam_width = 1;
-    const uint   top_k      = tc.top_k;
-    const float  top_p      = tc.top_p;
-    // use default values having no effect.
-    const float temperature        = 1.0f;
-    const float len_penalty        = 0.0f;
-    const float repetition_penalty = 1.0f;
-    const int   end_id             = 3;
-
-    const size_t batch_size       = tc.batch_size;
-    const size_t batchxbeam       = batch_size * beam_width;
-    const size_t local_batch_size = USE_LOCAL_BATCH ? 2 : batch_size;
-    assert(batch_size % local_batch_size == 0);
-    const size_t vocab_size     = tc.vocab_size;
-    const size_t max_input_len  = 0;  // has no effect.
-    const size_t max_output_len = tc.output_len;
-    const size_t max_seq_len    = max_input_len + max_output_len;
-
-    struct cudaDeviceProp prop;
-    check_cuda_error(cudaGetDeviceProperties(&prop, 0));
-
-    cudaStream_t     stream;
-    cublasHandle_t   cublas_handle;
-    cublasLtHandle_t cublaslt_handle;
-    check_cuda_error(cudaStreamCreate(&stream));
-    check_cuda_error(cublasCreate(&cublas_handle));
-    check_cuda_error(cublasLtCreate(&cublaslt_handle));
-    check_cuda_error(cublasSetStream(cublas_handle, stream));
-    cublasAlgoMap                   cublas_algo_map(GEMM_CONFIG);
-    std::mutex*                     cublas_wrapper_mutex = new std::mutex();
-    Allocator<AllocatorType::CUDA>* allocator            = new Allocator<AllocatorType::CUDA>(getDevice());
-    allocator->setStream(stream);
-    cublasMMWrapper* cublas_wrapper =
-        new cublasMMWrapper(cublas_handle, cublaslt_handle, stream, &cublas_algo_map, cublas_wrapper_mutex, allocator);
-    DynamicDecodeLayer<T>* dynamic_decode_layer = new DynamicDecodeLayer<T>(vocab_size,
-                                                                            vocab_size,
-                                                                            end_id,
-                                                                            stream,
-                                                                            cublas_wrapper,
-                                                                            allocator,
-                                                                            false,   // is_free_buffer_after_forward
-                                                                            &prop);  // cuda_device_prop
-
-    T*   h_logits     = reinterpret_cast<T*>(malloc(sizeof(T) * batchxbeam * vocab_size));
-    int* h_output_ids = reinterpret_cast<int*>(malloc(sizeof(int) * batchxbeam));
-
-    T*   d_logits_buf    = reinterpret_cast<T*>(allocator->malloc(sizeof(T) * batchxbeam * vocab_size));
-    int* d_input_lengths = reinterpret_cast<int*>(allocator->malloc(sizeof(int) * batchxbeam));
-    int* d_output_ids    = reinterpret_cast<int*>(allocator->malloc(sizeof(int) * max_seq_len * batchxbeam));
-
-    // Init by zero.
-    cudaMemset(d_input_lengths, 0, sizeof(int) * batchxbeam);
-    cudaMemset(d_output_ids, 0, sizeof(int) * max_seq_len * batchxbeam);
-
-    // Prepare decoding arguments
-    const size_t        random_seed_size = SINGLE_RANDOM_SEED ? 1 : batch_size;
-    const size_t        period_size      = 3;
-    unsigned long long* random_seed      = new unsigned long long[random_seed_size];
-    for (size_t i = 0; i < random_seed_size; ++i) {
-        random_seed[i] = i / period_size;
-    }
-    const bool   has_diff_runtime_args = HAS_DIFF_ARGS;
-    const size_t runtime_args_size     = has_diff_runtime_args ? batch_size : 1;
-    uint*        runtime_top_k         = new uint[runtime_args_size];
-    float*       runtime_top_p         = new float[runtime_args_size];
-    const size_t except_idx            = 1;
-    for (size_t i = 0; i < runtime_args_size; ++i) {
-        runtime_top_k[i] = (top_k > 1) && (i % period_size == except_idx) ? 1 : top_k;
-        runtime_top_p[i] = (i % period_size == except_idx) ? top_p * 0.1f : top_p;
-    }
-    int* d_end_id_buf = reinterpret_cast<int*>(allocator->malloc(sizeof(int) * batch_size));
-    deviceFill(d_end_id_buf, batch_size, end_id);
-
-#ifndef NDEBUG
-    TM_LOG_DEBUG("Random Seeds");
-    printMatrixWithLimit(random_seed, 1, random_seed_size, random_seed_size, false);
-#endif
-
-    bool passed = true;
-
-    TensorMap runtime_args;
-    runtime_args.insert({"has_diff_runtime_args", Tensor(MEMORY_CPU, TYPE_BOOL, {1}, &has_diff_runtime_args)});
-    runtime_args.insert({"random_seed", Tensor(MEMORY_CPU, TYPE_UINT64, {random_seed_size}, random_seed)});
-    runtime_args.insert({"runtime_top_k", Tensor(MEMORY_CPU, TYPE_INT32, {runtime_args_size}, runtime_top_k)});
-    runtime_args.insert({"runtime_top_p", Tensor(MEMORY_CPU, TYPE_FP32, {runtime_args_size}, runtime_top_p)});
-    runtime_args.insert({"temperature", Tensor(MEMORY_CPU, TYPE_FP32, {1}, &temperature)});
-    runtime_args.insert({"len_penalty", Tensor(MEMORY_CPU, TYPE_FP32, {1}, &len_penalty)});
-    runtime_args.insert({"repetition_penalty", Tensor(MEMORY_CPU, TYPE_FP32, {1}, &repetition_penalty)});
-    dynamic_decode_layer->setup(batch_size, beam_width, &runtime_args);
-
-    for (size_t step = max_input_len; step < max_output_len; ++step) {
-        const size_t iteration_num = batch_size / local_batch_size;
-
-        initRandom(h_logits, beam_width * vocab_size, -10.0f / vocab_size, -1.0f);
-        tile(h_logits, batch_size, beam_width * vocab_size);
-        cudaH2Dcpy(d_logits_buf, h_logits, batchxbeam * vocab_size);
-
-#ifndef NDEBUG
-        TM_LOG_DEBUG("logit values");
-        printMatrixWithLimit(h_logits, batchxbeam, vocab_size, vocab_size, false);
-#endif
-        for (uint ite = 0; ite < iteration_num; ++ite) {
-            TensorMap dynamic_decode_input_tensors(
-                {{"logits", Tensor{MEMORY_GPU, data_type, {batch_size, beam_width, vocab_size}, d_logits_buf}},
-                 {"embedding_bias", Tensor{MEMORY_GPU, data_type, {vocab_size}, nullptr}},
-                 {"step", Tensor{MEMORY_CPU, TYPE_INT32, {1}, &step}},
-                 {"max_input_length", Tensor{MEMORY_CPU, TYPE_INT32, {1}, &max_input_len}},
-                 {"input_lengths", Tensor{MEMORY_GPU, TYPE_INT32, {batch_size, beam_width}, d_input_lengths}},
-                 {"ite", Tensor{MEMORY_CPU, TYPE_UINT32, {1}, &ite}},
-                 {"has_diff_runtime_args", Tensor{MEMORY_CPU, TYPE_BOOL, {1}, &has_diff_runtime_args}},
-                 {"local_batch_size", Tensor{MEMORY_CPU, TYPE_INT32, {1}, &local_batch_size}},
-                 {"end_id", Tensor{MEMORY_GPU, TYPE_INT32, {batch_size}, d_end_id_buf}},
-                 {"random_seed", {MEMORY_CPU, TYPE_UINT64, {random_seed_size}, random_seed}},
-                 {"runtime_top_k", {MEMORY_CPU, TYPE_UINT32, {runtime_args_size}, runtime_top_k}},
-                 {"runtime_top_p", {MEMORY_CPU, TYPE_FP32, {runtime_args_size}, runtime_top_p}},
-                 {"temperature", Tensor{MEMORY_CPU, TYPE_FP32, {1}, &temperature}},
-                 {"len_penalty", Tensor{MEMORY_CPU, TYPE_FP32, {1}, &len_penalty}},
-                 {"repetition_penalty", Tensor{MEMORY_CPU, TYPE_FP32, {1}, &repetition_penalty}}});
-
-            // common outputs
-            TensorMap dynamic_decode_output_tensors(
-                {{"output_ids", Tensor{MEMORY_GPU, TYPE_INT32, {max_seq_len, batch_size, beam_width}, d_output_ids}},
-                 {"finished", Tensor{MEMORY_GPU, TYPE_BOOL, {batch_size * beam_width}, nullptr}},
-                 {"parent_ids", Tensor{MEMORY_GPU, TYPE_INT32, {max_seq_len, batch_size, beam_width}, nullptr}},
-                 {"sequence_length", Tensor{MEMORY_GPU, TYPE_INT32, {batch_size * beam_width}, nullptr}},
-                 // necessary for beam search.
-                 {"tgt_cache_indirection",
-                  Tensor{MEMORY_GPU, TYPE_INT32, {batch_size, beam_width, max_output_len}, nullptr}}});
-
-            dynamic_decode_layer->forward(&dynamic_decode_output_tensors, &dynamic_decode_input_tensors);
-            sync_check_cuda_error();
-#ifndef NDEBUG
-            TM_LOG_DEBUG("Step %2d generated ids", step);
-            printMatrix(d_output_ids, max_seq_len, batchxbeam, batchxbeam, true);
-            TM_LOG_DEBUG("");
-#endif
-            // check results.
-            cudaD2Hcpy(h_output_ids,
-                       (int*)dynamic_decode_output_tensors.at("output_ids").getPtrWithOffset(step * batchxbeam),
-                       batchxbeam);
-        }
-        bool is_ok = isEqualInPeriod(h_output_ids, batchxbeam, period_size, except_idx);
-        passed &= is_ok;
-    }
-    std::string tag = fmtstr("%s (seed=%-6s has_diff_args=%-5s local_batch=%-5s T=%s)",
-                             tc.toString().c_str(),
-                             SINGLE_RANDOM_SEED ? "single" : "multi",
-                             HAS_DIFF_ARGS ? "true" : "false",
-                             USE_LOCAL_BATCH ? "true" : "false",
-                             (std::is_same<T, float>::value ? " fp32" : " fp16"));
-    TM_LOG_INFO("check...%s SamplingLayerCurandInitTest %-30s", passed ? "....OK" : "FAILED", tag.c_str());
-    EXPECT_TRUE(passed);
-
-    free(h_logits);
-    free(h_output_ids);
-
-    delete dynamic_decode_layer;
-    delete runtime_top_k;
-    delete runtime_top_p;
-    delete random_seed;
-    delete cublas_wrapper;
-    delete allocator;
-    check_cuda_error(cudaStreamDestroy(stream));
-    check_cuda_error(cublasDestroy(cublas_handle));
-    check_cuda_error(cublasLtDestroy(cublaslt_handle));
-}
-
-int main()
-{
-    std::vector<TestCase> test_cases{
-        // TC: name / batch / vocab / beam / k / p / outlen
-        TestCase{"topk", 6, 4, 1, 1, 0.0f, 4},
-        TestCase{"topk", 6, 4, 1, 4, 0.0f, 4},
-        TestCase{"topk", 6, 51200, 1, 31, 0.0f, 16},
-        TestCase{"topk", 32, 51200, 1, 63, 0.0f, 16},
-        TestCase{"topk", 32, 51200, 1, 64, 0.0f, 16},
-        TestCase{"topp", 6, 4, 1, 0, 0.2f, 4},
-        TestCase{"topp", 6, 4, 1, 0, 0.8f, 4},
-        TestCase{"topp", 6, 4, 1, 0, 1.0f, 4},
-        TestCase{"topp", 6, 51200, 1, 0, 0.8f, 16},
-        TestCase{"topp", 32, 51200, 1, 0, 0.8f, 16},
-        TestCase{"topp", 32, 51200, 1, 0, 1.0f, 16},
-        TestCase{"topk_topp", 6, 4, 1, 1, 0.8f, 16},
-        TestCase{"topk_topp", 6, 4, 1, 4, 1.0f, 16},
-        TestCase{"topk_topp", 6, 51200, 1, 31, 0.8f, 16},
-        TestCase{"topk_topp", 32, 51200, 1, 63, 0.8f, 16},
-        TestCase{"topk_topp", 32, 51200, 1, 64, 1.0f, 16},
-    };
-
-    for (auto& tc : test_cases) {
-        testCumLogProbComputation<float>(tc);
-        testCumLogProbComputation<half>(tc);
-    }
-    TM_LOG_INFO("testCumLogProbComputation done");
-
-    SamplingDecodeTest<float> sampling_decode_test;
-    sampling_decode_test.testAll();
-
-    testCuandBatchInitialize(127);
-    TM_LOG_INFO("testCuandBatchInitialize done");
-
-#define LAUNCH_VARIANTS(T, tc, local_batch)                                                                            \
-    testSamplingLayerCurandInit<T, true, false, local_batch>(tc);                                                      \
-    testSamplingLayerCurandInit<T, true, true, local_batch>(tc);                                                       \
-    testSamplingLayerCurandInit<T, false, false, local_batch>(tc);                                                     \
-    testSamplingLayerCurandInit<T, false, true, local_batch>(tc);
-    for (auto& tc : test_cases) {
-        LAUNCH_VARIANTS(float, tc, false);  // without local batch
-        LAUNCH_VARIANTS(half, tc, false);
-        LAUNCH_VARIANTS(float, tc, true);  // with local batch
-        LAUNCH_VARIANTS(half, tc, true);
-    }
-#undef LAUNCH_VARIANTS
-    TM_LOG_INFO("testSamplingLayerCurandInit done");
-
-    return 0;
-}
diff --git a/tests/csrc/unittests/test_sampling_kernels.cu b/tests/csrc/unittests/test_sampling_kernels.cu
deleted file mode 100644
index 194eef0513de042c9104f98281f3970dcd7dcb9a..0000000000000000000000000000000000000000
--- a/tests/csrc/unittests/test_sampling_kernels.cu
+++ /dev/null
@@ -1,872 +0,0 @@
-#include <algorithm>  // std::fill_n
-#include <iostream>   // snprintf
-#include <math.h>     // expf, log
-#include <stdlib.h>   // rand
-#include <string>     // std::string
-#include <vector>     // std::vector
-
-#include <cublasLt.h>
-#include <cublas_v2.h>
-#include <cuda_runtime.h>
-#include <gtest/gtest.h>
-
-#include "src/turbomind/kernels/sampling_topk_kernels.h"
-#include "src/turbomind/kernels/sampling_topp_kernels.h"
-#include "src/turbomind/layers/DynamicDecodeLayer.h"
-#include "src/turbomind/layers/sampling_layers/TopKSamplingLayer.h"
-#include "src/turbomind/macro.h"
-#include "src/turbomind/utils/Tensor.h"
-#include "src/turbomind/utils/cublasMMWrapper.h"
-#include "src/turbomind/utils/cuda_utils.h"
-#include "src/turbomind/utils/memory_utils.h"
-
-#include "gtest_utils.h"
-
-using namespace turbomind;
-
-namespace {
-
-struct SamplingKernelTestParam {
-    size_t batch_size;
-    size_t vocab_size;
-    size_t beam_width;
-    uint   top_k;
-    float  top_p;
-    size_t output_len;
-
-    std::string toString()
-    {
-        return fmtstr("SamplingKernelTestParam[batch=%ld, vocab=%ld, beam=%ld, k=%u, p=%3.1f, output_len=%ld]",
-                      batch_size,
-                      vocab_size,
-                      beam_width,
-                      top_k,
-                      top_p,
-                      output_len);
-    }
-};
-
-/////////////////////////////////// Tests //////////////////////////////////////////
-
-template<typename T>
-void computeProb(T* probs, T* logits, int batch_size, int vocab_size)
-{
-    // Compute the log probability from logits.
-    //   logits = batch_size x vocab_size.
-    //   probs =  softmax(logits) (softmax along with vocab dimension)
-    // float is used for either T=float or half, since operations of half are
-    // not fully supported in a host function.
-    for (int bidx = 0; bidx < batch_size; ++bidx) {
-        float maxval = -FLT_MAX;
-        for (int i = 0; i < vocab_size; ++i) {
-            float logit = static_cast<float>(logits[bidx * vocab_size + i]);
-            if (logit > maxval) {
-                maxval = logit;
-            }
-        }
-        float sum = 0.0f;
-        for (int i = 0; i < vocab_size; ++i) {
-            sum += expf(static_cast<float>(logits[bidx * vocab_size + i]) - maxval);
-        }
-        for (int i = 0; i < vocab_size; ++i) {
-            int   idx   = bidx * vocab_size + i;
-            float logit = static_cast<float>(logits[idx]) - maxval;
-            probs[idx]  = static_cast<T>(expf(logit) / (sum + EPSILON));
-        }
-    }
-}
-
-template<typename T>
-void computeLogProb(T* logprobs, T* logits, int batch_size, int vocab_size)
-{
-    // Compute the log probability from logits.
-    //   logits = batch_size x vocab_size.
-    //   logprobs = log(softmax(logits)) (softmax along with vocab dimension)
-    // float is used for either T=float or half, since operations of half are
-    // not fully supported in a host function.
-    for (int bidx = 0; bidx < batch_size; ++bidx) {
-        float maxval = -FLT_MAX;
-        for (int i = 0; i < vocab_size; ++i) {
-            float logit = static_cast<float>(logits[bidx * vocab_size + i]);
-            if (logit > maxval) {
-                maxval = logit;
-            }
-        }
-        float sum = 0.0f;
-        for (int i = 0; i < vocab_size; ++i) {
-            sum += expf(static_cast<float>(logits[bidx * vocab_size + i]) - maxval);
-        }
-        for (int i = 0; i < vocab_size; ++i) {
-            int   idx     = bidx * vocab_size + i;
-            float logit   = static_cast<float>(logits[idx]) - maxval;
-            logprobs[idx] = static_cast<T>(logit - logf(sum + EPSILON));
-        }
-    }
-}
-
-template<typename T>
-class SamplingKernelTest: public testing::Test {
-public:
-    void SetUp() override
-    {
-        check_cuda_error(cudaStreamCreate(&stream));
-        allocator = new Allocator<AllocatorType::CUDA>(getDevice());
-        allocator->setStream(stream);
-    }
-    void TearDown() override
-    {
-        delete allocator;
-        check_cuda_error(cudaStreamDestroy(stream));
-    }
-
-protected:
-    unsigned long long              seed = 0;
-    cudaStream_t                    stream;
-    Allocator<AllocatorType::CUDA>* allocator;
-    curandState_t*                  curand_states;
-};
-
-template<typename T>
-class TopKSamplingKernelTest: public SamplingKernelTest<T> {
-
-protected:
-    const int end_id = 0;
-    using SamplingKernelTest<T>::seed;
-    using SamplingKernelTest<T>::stream;
-    using SamplingKernelTest<T>::allocator;
-    using SamplingKernelTest<T>::curand_states;
-
-public:
-    void runTest(SamplingKernelTestParam param)
-    {
-        size_t batch_size  = param.batch_size;
-        size_t vocab_size  = param.vocab_size;
-        size_t output_len  = param.output_len;
-        size_t max_seq_len = output_len;
-
-        uint  top_k = param.top_k;
-        float top_p = param.top_p;
-
-        // Logit values in the host of shape (batch_size x vocab_size).
-        T* h_logits = new T[batch_size * vocab_size];
-        T* h_probs  = new T[batch_size * vocab_size];
-        T* h_lprobs = new T[batch_size * vocab_size];
-
-        int*  h_output_ids  = new int[batch_size];
-        int*  h_seq_lengths = new int[batch_size];
-        bool* h_finished    = new bool[batch_size];
-
-        float* expected_cum_lprobs = new float[batch_size];
-        std::fill_n(expected_cum_lprobs, batch_size, 0);
-
-        curandState_t* curand_states =
-            reinterpret_cast<curandState_t*>(allocator->malloc(sizeof(curandState_t) * batch_size, false));
-        invokeCurandInitialize(curand_states, batch_size, seed, stream);
-
-        size_t workspace_size = 0;
-        // retrieve the workspace size of the top-k sampling kernel.
-        invokeTopKSampling<T>(nullptr,
-                              workspace_size,
-                              nullptr,
-                              nullptr,
-                              nullptr,
-                              nullptr,
-                              nullptr,
-                              nullptr,
-                              nullptr,
-                              top_k,
-                              1.0f,
-                              vocab_size,
-                              nullptr,
-                              stream,
-                              batch_size,
-                              nullptr);
-        void* workspace = allocator->malloc(workspace_size);
-
-        int*  end_ids     = reinterpret_cast<int*>(allocator->malloc(sizeof(int) * batch_size));
-        int*  seq_lengths = reinterpret_cast<int*>(allocator->malloc(sizeof(int) * batch_size));
-        bool* finished    = reinterpret_cast<bool*>(allocator->malloc(sizeof(bool) * batch_size));
-
-        T*     probs         = reinterpret_cast<T*>(allocator->malloc(sizeof(T) * batch_size * vocab_size));
-        float* cum_lprobs    = reinterpret_cast<float*>(allocator->malloc(sizeof(float) * batch_size));
-        float* output_lprobs = reinterpret_cast<float*>(allocator->malloc(sizeof(float) * output_len * batch_size));
-        int*   output_ids    = reinterpret_cast<int*>(allocator->malloc(sizeof(int) * max_seq_len * batch_size));
-
-        // Init by zero.
-        deviceFill(seq_lengths, batch_size, 0);
-        deviceFill(finished, batch_size, false);
-        deviceFill(end_ids, batch_size, end_id);
-
-        deviceFill(cum_lprobs, batch_size, 0.0f);
-        deviceFill(output_lprobs, output_len * batch_size, 0.0f);
-        deviceFill(output_ids, max_seq_len * batch_size, 0);
-
-        for (size_t step = 0; step < output_len; ++step) {
-            initRandom(h_logits, batch_size * vocab_size, -3.0f, 3.0f);
-            computeProb(h_probs, h_logits, batch_size, vocab_size);
-            cudaH2Dcpy(probs, h_probs, batch_size * vocab_size);
-            invokeTopKSampling(workspace,
-                               workspace_size,
-                               // Note that the kernel needs vocab probs instead of
-                               // log-prob if cum_log_probs or output_log_probs are
-                               // provided. It's because the sampling layer already
-                               // preprocesses log_prob_buf when those are provided.
-                               probs,
-                               output_ids + step * batch_size,
-                               seq_lengths,
-                               finished,
-                               cum_lprobs,
-                               output_lprobs + step * batch_size,
-                               curand_states,
-                               top_k,
-                               top_p,
-                               vocab_size,
-                               end_ids,
-                               stream,
-                               batch_size,
-                               nullptr);
-
-            // Compute reference.
-            cudaD2Hcpy(h_output_ids, output_ids + step * batch_size, batch_size);
-            cudaD2Hcpy(h_seq_lengths, seq_lengths, batch_size);
-            cudaD2Hcpy(h_finished, finished, batch_size);
-            computeLogProb(h_lprobs, h_logits, batch_size, vocab_size);
-            for (size_t i = 0; i < batch_size; ++i) {
-                int idx = i * vocab_size + h_output_ids[i];
-                expected_cum_lprobs[i] += (int)step < h_seq_lengths[i] ? (float)h_lprobs[idx] : 0.0f;
-                EXPECT_EQ(h_finished[i], h_output_ids[i] == end_id);
-            }
-        }
-        bool passed = checkResult(param.toString(), cum_lprobs, expected_cum_lprobs, batch_size);
-        EXPECT_TRUE(passed);
-
-        delete[] expected_cum_lprobs;
-        delete[] h_seq_lengths;
-        delete[] h_logits;
-        delete[] h_lprobs;
-        delete[] h_probs;
-        delete[] h_output_ids;
-    }
-
-    void runBatchTest(SamplingKernelTestParam param, bool has_diff_runtime_args, bool use_skip_decode)
-    {
-        size_t batch_size = param.batch_size;
-        size_t vocab_size = param.vocab_size;
-        size_t output_len = param.output_len;
-        size_t seq_len    = output_len;
-
-        int   top_k = param.top_k;
-        float top_p = param.top_p;
-
-        int*   h_top_ks = new int[batch_size];
-        float* h_top_ps = new float[batch_size];
-        for (size_t i = 0; i < batch_size; ++i) {
-            h_top_ks[i] = (!has_diff_runtime_args || i % 3 == 0) ? top_k : 1;
-            h_top_ps[i] = (!has_diff_runtime_args || i % 3 == 0) ? top_p : 0.1 * top_p;
-        }
-        int max_top_k = *std::max_element(h_top_ks, h_top_ks + batch_size);
-
-        // Logit values in the host of shape (batch_size x vocab_size).
-        T* h_logits = new T[batch_size * vocab_size];
-        T* h_probs  = new T[batch_size * vocab_size];
-        T* h_lprobs = new T[batch_size * vocab_size];
-
-        float* expected_cum_lprobs = new float[batch_size];
-
-        int*  h_output_ids  = new int[batch_size];
-        int*  h_seq_lengths = new int[batch_size];
-        bool* h_finished    = new bool[batch_size];
-        bool* h_skip_decode = new bool[batch_size];
-
-        initRandom(h_logits, batch_size * vocab_size, -3.0f, 3.0f);
-        std::fill_n(expected_cum_lprobs, batch_size, 0);
-        for (size_t i = 0; i < batch_size; ++i) {
-            h_skip_decode[i] = use_skip_decode && (i % 2 == 0);
-        }
-
-        curandState_t* curand_states =
-            reinterpret_cast<curandState_t*>(allocator->malloc(sizeof(curandState_t) * batch_size, false));
-        invokeCurandInitialize(curand_states, batch_size, seed, stream);
-
-        size_t workspace_size = 0;
-        // retrieve the workspace size of the top-k sampling kernel.
-        invokeBatchTopKSampling<T>(nullptr,  // workspace
-                                   workspace_size,
-                                   nullptr,  // log_probs
-                                   nullptr,  // ids
-                                   nullptr,  // sequence_lengths
-                                   nullptr,  // finished
-                                   nullptr,  // cum_log_probs
-                                   nullptr,  // output_log_probs
-                                   nullptr,  // curandstates
-                                   max_top_k,
-                                   nullptr,  // top_ks
-                                   1.0f,
-                                   nullptr,
-                                   vocab_size,
-                                   nullptr,  // end_ids
-                                   stream,
-                                   batch_size,
-                                   nullptr);
-        void* workspace = allocator->malloc(workspace_size, false);
-
-        int*   top_ks = reinterpret_cast<int*>(allocator->malloc(sizeof(int) * batch_size));
-        float* top_ps = reinterpret_cast<float*>(allocator->malloc(sizeof(float) * batch_size));
-
-        int*  end_ids     = reinterpret_cast<int*>(allocator->malloc(sizeof(int) * batch_size));
-        int*  seq_lengths = reinterpret_cast<int*>(allocator->malloc(sizeof(int) * batch_size));
-        int*  output_ids  = reinterpret_cast<int*>(allocator->malloc(sizeof(int) * seq_len * batch_size));
-        bool* finished    = reinterpret_cast<bool*>(allocator->malloc(sizeof(bool) * batch_size));
-        bool* skip_decode = reinterpret_cast<bool*>(allocator->malloc(sizeof(bool) * batch_size));
-
-        T*     probs         = reinterpret_cast<T*>(allocator->malloc(sizeof(T) * batch_size * vocab_size, true));
-        float* cum_lprobs    = reinterpret_cast<float*>(allocator->malloc(sizeof(float) * batch_size));
-        float* output_lprobs = reinterpret_cast<float*>(allocator->malloc(sizeof(float) * output_len * batch_size));
-
-        // Initialize.
-        cudaH2Dcpy(top_ks, h_top_ks, batch_size);
-        cudaH2Dcpy(top_ps, h_top_ps, batch_size);
-        cudaH2Dcpy(skip_decode, h_skip_decode, batch_size);
-
-        deviceFill(end_ids, batch_size, end_id);
-        deviceFill(seq_lengths, batch_size, 0);
-        deviceFill(finished, batch_size, false);
-        deviceFill(cum_lprobs, batch_size, 0.0f);
-        deviceFill(output_lprobs, output_len * batch_size, 0.0f);
-        deviceFill(output_ids, seq_len * batch_size, 0);
-
-        for (size_t step = 0; step < output_len; ++step) {
-            initRandom(h_logits, batch_size * vocab_size, -3.0f, 3.0f);
-            computeProb(h_probs, h_logits, batch_size, vocab_size);
-            cudaH2Dcpy(probs, h_probs, batch_size * vocab_size);
-
-            invokeBatchTopKSampling(workspace,
-                                    workspace_size,
-                                    // Note that the kernel needs vocab probs instead of
-                                    // log-prob if cum_log_probs or output_log_probs are
-                                    // provided. It's because the sampling layer already
-                                    // preprocesses log_prob_buf when those are provided.
-                                    probs,
-                                    output_ids + step * batch_size,
-                                    seq_lengths,
-                                    finished,
-                                    cum_lprobs,
-                                    output_lprobs + step * batch_size,
-                                    curand_states,
-                                    max_top_k,
-                                    top_ks,
-                                    1.0f,
-                                    nullptr,
-                                    vocab_size,
-                                    end_ids,
-                                    stream,
-                                    batch_size,
-                                    skip_decode);
-
-            // Compute reference.
-            cudaD2Hcpy(h_output_ids, output_ids + step * batch_size, batch_size);
-            cudaD2Hcpy(h_seq_lengths, seq_lengths, batch_size);
-            cudaD2Hcpy(h_finished, finished, batch_size);
-            computeLogProb(h_lprobs, h_logits, batch_size, vocab_size);
-            for (size_t i = 0; i < batch_size; ++i) {
-                if (!h_skip_decode[i]) {
-                    int idx = i * vocab_size + h_output_ids[i];
-                    expected_cum_lprobs[i] += (int)step < h_seq_lengths[i] ? (float)h_lprobs[idx] : 0.0f;
-                    EXPECT_EQ(h_finished[i], h_output_ids[i] == end_id);
-                }
-            }
-        }
-        bool passed = checkResult(param.toString(), cum_lprobs, expected_cum_lprobs, batch_size);
-        EXPECT_TRUE(passed) << "Fail subtest (has_diff_runtime_args: " << has_diff_runtime_args
-                            << ", skip_decode: " << use_skip_decode << ")";
-
-        delete[] expected_cum_lprobs;
-        delete[] h_seq_lengths;
-        delete[] h_logits;
-        delete[] h_lprobs;
-        delete[] h_probs;
-        delete[] h_output_ids;
-        delete[] h_top_ks;
-        delete[] h_skip_decode;
-    }
-
-    void runBatchTest(SamplingKernelTestParam param)
-    {
-        this->runBatchTest(param, false, false);
-        this->runBatchTest(param, false, true);
-        this->runBatchTest(param, true, false);
-        this->runBatchTest(param, true, true);
-    }
-};
-
-TYPED_TEST_SUITE(TopKSamplingKernelTest, FloatAndHalfTypes);
-
-TYPED_TEST(TopKSamplingKernelTest, CorrectnessGreedy)
-{
-    this->runTest({6, 4, 1, 1, 1.0f, 1});
-};
-
-TYPED_TEST(TopKSamplingKernelTest, CorrectnessAncestral)
-{
-    this->runTest({6, 4, 1, 4, 1.0f, 1});
-};
-
-TYPED_TEST(TopKSamplingKernelTest, CorrectnessLargeK63)
-{
-    this->runTest({16, 51200, 1, 63, 1.0f, 8});
-};
-
-TYPED_TEST(TopKSamplingKernelTest, CorrectnessLargeK1024)
-{
-    this->runTest({16, 51200, 1, 1024, 1.0f, 8});
-};
-
-TYPED_TEST(TopKSamplingKernelTest, CorrectnessTopKTopP)
-{
-    this->runTest({16, 4000, 1, 63, 0.3f, 8});
-};
-
-TYPED_TEST(TopKSamplingKernelTest, NotSupportedLargerThanK1024)
-{
-    EXPECT_THROW(this->runTest({16, 4000, 1, 1025, 1.0f, 8}), std::domain_error);
-};
-
-TYPED_TEST(TopKSamplingKernelTest, BatchCorrectnessGreedy)
-{
-    this->runBatchTest({6, 4, 1, 1, 1.0f, 1});
-};
-
-TYPED_TEST(TopKSamplingKernelTest, BatchCorrectnessAncestral)
-{
-    this->runBatchTest({6, 4, 1, 4, 1.0f, 1});
-};
-
-TYPED_TEST(TopKSamplingKernelTest, BatchCorrectnessLargeK63)
-{
-    this->runBatchTest({8, 4000, 1, 63, 1.0f, 8});
-};
-
-TYPED_TEST(TopKSamplingKernelTest, BatchCorrectnessLargeK1024)
-{
-    this->runBatchTest({8, 4000, 1, 1024, 0.0f, 8});
-};
-
-TYPED_TEST(TopKSamplingKernelTest, BatchCorrectnessTopKTopP)
-{
-    this->runBatchTest({8, 4000, 1, 63, 0.3f, 8});
-};
-
-template<typename T>
-class TopPSamplingKernelTest: public SamplingKernelTest<T> {
-
-protected:
-    const int end_id = 0;
-    using SamplingKernelTest<T>::seed;
-    using SamplingKernelTest<T>::stream;
-    using SamplingKernelTest<T>::allocator;
-    using SamplingKernelTest<T>::curand_states;
-
-public:
-    void runTest(SamplingKernelTestParam param)
-    {
-        size_t batch_size = param.batch_size;
-        size_t vocab_size = param.vocab_size;
-        size_t output_len = param.output_len;
-        size_t seq_len    = output_len;
-
-        float top_p = param.top_p;
-
-        // Logit values in the host of shape (batch_size x vocab_size).
-        T* h_logits = new T[batch_size * vocab_size];
-        T* h_probs  = new T[batch_size * vocab_size];
-        T* h_lprobs = new T[batch_size * vocab_size];
-
-        float* expected_cum_lprobs = new float[batch_size];
-        std::fill_n(expected_cum_lprobs, batch_size, 0);
-
-        int*  h_output_ids  = new int[batch_size];
-        int*  h_seq_lengths = new int[batch_size];
-        bool* h_finished    = new bool[batch_size];
-
-        initRandom(h_logits, batch_size * vocab_size, -3.0f, 3.0f);
-
-        int device;
-        cudaGetDevice(&device);
-        struct cudaDeviceProp device_prop;
-        cudaGetDeviceProperties(&device_prop, device);
-
-        curandState_t* curand_states =
-            reinterpret_cast<curandState_t*>(allocator->malloc(sizeof(curandState_t) * batch_size, false));
-        invokeCurandInitialize(curand_states, batch_size, seed, stream);
-
-        int* end_ids     = reinterpret_cast<int*>(allocator->malloc(sizeof(int) * batch_size));
-        int* seq_lengths = reinterpret_cast<int*>(allocator->malloc(sizeof(int) * batch_size));
-        int* output_ids  = reinterpret_cast<int*>(allocator->malloc(sizeof(int) * seq_len * batch_size));
-
-        bool* finished    = reinterpret_cast<bool*>(allocator->malloc(sizeof(bool) * batch_size));
-        bool* skip_decode = reinterpret_cast<bool*>(allocator->malloc(sizeof(bool) * batch_size));
-
-        T*     probs         = reinterpret_cast<T*>(allocator->malloc(sizeof(T) * batch_size * vocab_size));
-        float* cum_lprobs    = reinterpret_cast<float*>(allocator->malloc(sizeof(float) * batch_size));
-        float* output_lprobs = reinterpret_cast<float*>(allocator->malloc(sizeof(float) * output_len * batch_size));
-
-        int* begin_offsets    = reinterpret_cast<int*>(allocator->malloc(sizeof(int) * (batch_size + 1)));
-        int* end_offsets      = reinterpret_cast<int*>(allocator->malloc(sizeof(int) * (batch_size + 1)));
-        int* topp_id_vals_buf = reinterpret_cast<int*>(allocator->malloc(sizeof(int) * batch_size * vocab_size));
-
-        size_t workspace_size        = 0;
-        size_t cub_temp_storage_size = 0;
-        // retrieve the workspace size of the top-p sampling kernel.
-        invokeTopPSampling<T>(nullptr,  // workspace
-                              workspace_size,
-                              cub_temp_storage_size,
-                              nullptr,      // output_ids
-                              nullptr,      // sequence_length
-                              nullptr,      // finished_buffer
-                              nullptr,      // cum_log_probs
-                              nullptr,      // output_log_probs
-                              (T*)nullptr,  // log_probs
-                              topp_id_vals_buf,
-                              end_offsets,
-                              begin_offsets,
-                              curand_states,
-                              batch_size,
-                              vocab_size,
-                              nullptr,
-                              top_p,
-                              stream,
-                              &device_prop,
-                              nullptr);
-        void* workspace = allocator->malloc(workspace_size);
-
-        // Initialize.
-        deviceFill(end_ids, batch_size, end_id);
-        deviceFill(seq_lengths, batch_size, 0);
-        deviceFill(finished, batch_size, false);
-        deviceFill(cum_lprobs, batch_size, 0.0f);
-        deviceFill(output_lprobs, output_len * batch_size, 0.0f);
-        deviceFill(output_ids, seq_len * batch_size, 0);
-
-        for (size_t step = 0; step < output_len; ++step) {
-            initRandom(h_logits, batch_size * vocab_size, -3.0f, 3.0f);
-            computeProb(h_probs, h_logits, batch_size, vocab_size);
-            cudaH2Dcpy(probs, h_probs, batch_size * vocab_size);
-
-            invokeTopPInitialize(topp_id_vals_buf, end_offsets, begin_offsets, batch_size, vocab_size, stream);
-
-            invokeTopPSampling<T>(workspace,
-                                  workspace_size,
-                                  cub_temp_storage_size,
-                                  output_ids + step * batch_size,
-                                  seq_lengths,
-                                  finished,
-                                  cum_lprobs,
-                                  output_lprobs + step * batch_size,
-                                  // Note that the kernel needs vocab probs instead of
-                                  // log-prob if cum_log_probs or output_log_probs are
-                                  // provided. It's because the sampling layer already
-                                  // preprocesses log_prob_buf when those are provided.
-                                  probs,
-                                  topp_id_vals_buf,
-                                  end_offsets,
-                                  begin_offsets,
-                                  curand_states,
-                                  batch_size,
-                                  vocab_size,
-                                  end_ids,
-                                  top_p,
-                                  stream,
-                                  &device_prop,
-                                  nullptr);
-
-            // Compute reference.
-            cudaD2Hcpy(h_output_ids, output_ids + step * batch_size, batch_size);
-            cudaD2Hcpy(h_seq_lengths, seq_lengths, batch_size);
-            cudaD2Hcpy(h_finished, finished, batch_size);
-            computeLogProb(h_lprobs, h_logits, batch_size, vocab_size);
-            for (size_t i = 0; i < batch_size; ++i) {
-                int idx = i * vocab_size + h_output_ids[i];
-                expected_cum_lprobs[i] += (int)step < h_seq_lengths[i] ? (float)h_lprobs[idx] : 0.0f;
-                EXPECT_EQ(h_finished[i], h_output_ids[i] == end_id);
-            }
-        }
-        bool passed = checkResult(param.toString(), cum_lprobs, expected_cum_lprobs, batch_size);
-        EXPECT_TRUE(passed);
-
-        delete[] expected_cum_lprobs;
-        delete[] h_seq_lengths;
-        delete[] h_logits;
-        delete[] h_lprobs;
-        delete[] h_probs;
-        delete[] h_output_ids;
-    }
-
-    void runBatchTest(SamplingKernelTestParam param, bool has_diff_runtime_args, bool use_skip_decode)
-    {
-        size_t batch_size = param.batch_size;
-        size_t vocab_size = param.vocab_size;
-
-        float  top_p    = param.top_p;
-        float* h_top_ps = new float[batch_size];
-        // Initialize runtime top k values.
-        for (size_t i = 0; i < batch_size; ++i) {
-            h_top_ps[i] = (!has_diff_runtime_args || i % 3 == 0) ? top_p : 0.1 * top_p;
-        }
-        float max_top_p = *std::max_element(h_top_ps, h_top_ps + batch_size);
-
-        size_t output_len = param.output_len;
-        size_t seq_len    = output_len;
-
-        // Logit values in the host of shape (batch_size x vocab_size).
-        T* h_logits = new T[batch_size * vocab_size];
-        T* h_probs  = new T[batch_size * vocab_size];
-        T* h_lprobs = new T[batch_size * vocab_size];
-
-        float* expected_cum_lprobs = new float[batch_size];
-        std::fill_n(expected_cum_lprobs, batch_size, 0);
-
-        int*  h_output_ids  = new int[batch_size];
-        int*  h_seq_lengths = new int[batch_size];
-        bool* h_finished    = new bool[batch_size];
-        bool* h_skip_decode = new bool[batch_size];
-
-        initRandom(h_logits, batch_size * vocab_size, -3.0f, 3.0f);
-        std::fill_n(expected_cum_lprobs, batch_size, 0);
-        for (size_t i = 0; i < batch_size; ++i) {
-            h_skip_decode[i] = use_skip_decode && (i % 2 == 0);
-        }
-
-        int device;
-        cudaGetDevice(&device);
-        struct cudaDeviceProp device_prop;
-        cudaGetDeviceProperties(&device_prop, device);
-
-        curandState_t* curand_states =
-            reinterpret_cast<curandState_t*>(allocator->malloc(sizeof(curandState_t) * batch_size, false));
-        invokeCurandInitialize(curand_states, batch_size, seed, stream);
-
-        float* top_ps = reinterpret_cast<float*>(allocator->malloc(sizeof(float) * batch_size));
-
-        int* end_ids     = reinterpret_cast<int*>(allocator->malloc(sizeof(int) * batch_size));
-        int* seq_lengths = reinterpret_cast<int*>(allocator->malloc(sizeof(int) * batch_size));
-        int* output_ids  = reinterpret_cast<int*>(allocator->malloc(sizeof(int) * seq_len * batch_size));
-
-        bool* finished    = reinterpret_cast<bool*>(allocator->malloc(sizeof(bool) * batch_size));
-        bool* skip_decode = reinterpret_cast<bool*>(allocator->malloc(sizeof(bool) * batch_size));
-
-        T*     probs         = reinterpret_cast<T*>(allocator->malloc(sizeof(T) * batch_size * vocab_size));
-        float* cum_lprobs    = reinterpret_cast<float*>(allocator->malloc(sizeof(float) * batch_size));
-        float* output_lprobs = reinterpret_cast<float*>(allocator->malloc(sizeof(float) * output_len * batch_size));
-
-        int* begin_offsets    = reinterpret_cast<int*>(allocator->malloc(sizeof(int) * (batch_size + 1)));
-        int* end_offsets      = reinterpret_cast<int*>(allocator->malloc(sizeof(int) * (batch_size + 1)));
-        int* topp_id_vals_buf = reinterpret_cast<int*>(allocator->malloc(sizeof(int) * batch_size * vocab_size));
-
-        size_t workspace_size        = 0;
-        size_t cub_temp_storage_size = 0;
-        // retrieve the workspace size of the top-p sampling kernel.
-        invokeBatchTopPSampling<T>(nullptr,  // workspace
-                                   workspace_size,
-                                   cub_temp_storage_size,
-                                   nullptr,      // output_ids
-                                   nullptr,      // sequence_length
-                                   nullptr,      // finished_buffer
-                                   nullptr,      // cum_log_probs
-                                   nullptr,      // output_log_probs
-                                   (T*)nullptr,  // log_probs
-                                   topp_id_vals_buf,
-                                   end_offsets,
-                                   begin_offsets,
-                                   curand_states,
-                                   batch_size,
-                                   vocab_size,
-                                   nullptr,
-                                   max_top_p,
-                                   top_ps,
-                                   stream,
-                                   &device_prop,
-                                   nullptr);
-        void* workspace = allocator->malloc(workspace_size);
-
-        // Initialize.
-        cudaH2Dcpy(top_ps, h_top_ps, batch_size);
-        cudaH2Dcpy(skip_decode, h_skip_decode, batch_size);
-        deviceFill(end_ids, batch_size, end_id);
-        deviceFill(seq_lengths, batch_size, 0);
-        deviceFill(finished, batch_size, false);
-        deviceFill(cum_lprobs, batch_size, 0.0f);
-        deviceFill(output_lprobs, output_len * batch_size, 0.0f);
-        deviceFill(output_ids, seq_len * batch_size, 0);
-
-        for (size_t step = 0; step < output_len; ++step) {
-            initRandom(h_logits, batch_size * vocab_size, -3.0f, 3.0f);
-            computeProb(h_probs, h_logits, batch_size, vocab_size);
-            cudaH2Dcpy(probs, h_probs, batch_size * vocab_size);
-
-            invokeTopPInitialize(topp_id_vals_buf, end_offsets, begin_offsets, batch_size, vocab_size, stream);
-
-            invokeBatchTopPSampling<T>(workspace,
-                                       workspace_size,
-                                       cub_temp_storage_size,
-                                       output_ids + step * batch_size,
-                                       seq_lengths,
-                                       finished,
-                                       cum_lprobs,
-                                       output_lprobs + step * batch_size,
-                                       // Note that the kernel needs vocab probs instead of
-                                       // log-prob if cum_log_probs or output_log_probs are
-                                       // provided. It's because the sampling layer already
-                                       // preprocesses log_prob_buf when those are provided.
-                                       probs,
-                                       topp_id_vals_buf,
-                                       end_offsets,
-                                       begin_offsets,
-                                       curand_states,
-                                       batch_size,
-                                       vocab_size,
-                                       end_ids,
-                                       max_top_p,
-                                       top_ps,
-                                       stream,
-                                       &device_prop,
-                                       skip_decode);
-
-            // Compute reference.
-            cudaD2Hcpy(h_output_ids, output_ids + step * batch_size, batch_size);
-            cudaD2Hcpy(h_seq_lengths, seq_lengths, batch_size);
-            cudaD2Hcpy(h_finished, finished, batch_size);
-            computeLogProb(h_lprobs, h_logits, batch_size, vocab_size);
-            for (size_t i = 0; i < batch_size; ++i) {
-                if (!h_skip_decode[i]) {
-                    int idx = i * vocab_size + h_output_ids[i];
-                    expected_cum_lprobs[i] += (int)step < h_seq_lengths[i] ? (float)h_lprobs[idx] : 0.0f;
-                    EXPECT_EQ(h_finished[i], h_output_ids[i] == end_id);
-                }
-            }
-        }
-        bool passed = checkResult(param.toString(), cum_lprobs, expected_cum_lprobs, batch_size);
-        EXPECT_TRUE(passed) << "Fail subtest (has_diff_runtime_args: " << has_diff_runtime_args
-                            << ", skip_decode: " << use_skip_decode << ")";
-
-        delete[] expected_cum_lprobs;
-        delete[] h_seq_lengths;
-        delete[] h_logits;
-        delete[] h_lprobs;
-        delete[] h_probs;
-        delete[] h_output_ids;
-        delete[] h_top_ps;
-        delete[] h_skip_decode;
-    }
-
-    void runBatchTest(SamplingKernelTestParam param)
-    {
-        this->runBatchTest(param, false, false);
-        this->runBatchTest(param, false, true);
-        this->runBatchTest(param, true, false);
-        this->runBatchTest(param, true, true);
-    }
-};
-
-TYPED_TEST_SUITE(TopPSamplingKernelTest, FloatAndHalfTypes);
-
-TYPED_TEST(TopPSamplingKernelTest, CorrectnessSmallP)
-{
-    this->runTest({6, 4, 1, 0, 0.2f, 1});
-};
-
-TYPED_TEST(TopPSamplingKernelTest, CorrectnessLargeP)
-{
-    this->runTest({6, 4, 1, 0, 0.9f, 1});
-};
-
-TYPED_TEST(TopPSamplingKernelTest, CorrectnessAncestral)
-{
-    this->runTest({6, 4, 1, 0, 1.0f, 1});
-};
-
-TYPED_TEST(TopPSamplingKernelTest, CorrectnessLargeVocabSmallP)
-{
-    this->runTest({32, 51200, 1, 0, 0.2f, 16});
-};
-
-TYPED_TEST(TopPSamplingKernelTest, CorrectnessLargeVocabLargeP)
-{
-    this->runTest({32, 51200, 1, 0, 0.9f, 16});
-};
-
-TYPED_TEST(TopPSamplingKernelTest, BatchCorrectnessSmallP)
-{
-    this->runBatchTest({6, 4, 1, 0, 0.2f, 1});
-};
-
-TYPED_TEST(TopPSamplingKernelTest, BatchCorrectnessLargeP)
-{
-    this->runBatchTest({6, 4, 1, 0, 0.9f, 1});
-};
-
-TYPED_TEST(TopPSamplingKernelTest, BatchCorrectnessSmallP2)
-{
-    this->runBatchTest({8, 4000, 1, 0, 0.2f, 16});
-};
-
-TYPED_TEST(TopPSamplingKernelTest, BatchCorrectnessLargeP2)
-{
-    this->runBatchTest({8, 4000, 1, 0, 0.9f, 16});
-};
-
-__global__ void generateRandomNumber(unsigned int* vals, curandState_t* states, const int batch_size)
-{
-    int idx = threadIdx.x;
-    if (idx < batch_size) {
-        vals[idx] = curand(states + idx);
-    }
-}
-
-TEST(SamplingKernelTest, CurandBatchInitialize)
-{
-    size_t       batch_size = 127;
-    cudaStream_t stream;
-    cudaStreamCreate(&stream);
-
-    curandState_t* curand_states;
-    check_cuda_error(cudaMalloc(&curand_states, sizeof(curandState_t) * batch_size));
-    unsigned long long* h_random_seeds = new unsigned long long[batch_size];
-    const size_t        period_size    = 3;
-    for (size_t i = 0; i < batch_size; ++i) {
-        h_random_seeds[i] = i / period_size;
-    }
-    unsigned long long* d_random_seeds;
-    check_cuda_error(cudaMalloc(&d_random_seeds, sizeof(unsigned long long) * batch_size));
-    check_cuda_error(
-        cudaMemcpy(d_random_seeds, h_random_seeds, sizeof(unsigned long long) * batch_size, cudaMemcpyHostToDevice));
-
-    // Initialize curand states.
-    invokeCurandBatchInitialize(curand_states, batch_size, d_random_seeds, stream);
-    sync_check_cuda_error();
-
-    // Generate random numbers using initialized curand states.
-    unsigned int* d_rand_vals;
-    unsigned int* h_rand_vals = new unsigned int[batch_size];
-    check_cuda_error(cudaMalloc(&d_rand_vals, sizeof(unsigned int) * batch_size));
-    generateRandomNumber<<<1, batch_size, 0, stream>>>(d_rand_vals, curand_states, batch_size);
-    check_cuda_error(
-        cudaMemcpyAsync(h_rand_vals, d_rand_vals, sizeof(unsigned int) * batch_size, cudaMemcpyDeviceToHost, stream));
-    check_cuda_error(cudaStreamSynchronize(stream));
-
-    // The same seed produces the same random number.
-    for (size_t i = 0; i + period_size - 1 < batch_size; i += period_size) {
-        for (size_t j = 1; j < period_size; ++j) {
-            EXPECT_TRUE(h_rand_vals[i] == h_rand_vals[i + j])
-                << fmtstr("Fail at val[%d]=%d <> val[%d]=%d", i, h_rand_vals[i], i + j, h_rand_vals[i + j]);
-        }
-    }
-
-    delete h_rand_vals;
-    delete h_random_seeds;
-    check_cuda_error(cudaFree(d_rand_vals));
-    check_cuda_error(cudaFree(d_random_seeds));
-    check_cuda_error(cudaFree(curand_states));
-    check_cuda_error(cudaStreamDestroy(stream));
-}
-
-}  // end of namespace
diff --git a/tests/csrc/unittests/test_sampling_layer.cu b/tests/csrc/unittests/test_sampling_layer.cu
deleted file mode 100644
index 2639f3e11f37455d2dc280dc51f53d92341009dc..0000000000000000000000000000000000000000
--- a/tests/csrc/unittests/test_sampling_layer.cu
+++ /dev/null
@@ -1,1258 +0,0 @@
-#include <algorithm>  // std::min, std::max
-#include <iostream>   // snprintf
-#include <math.h>     // expf, log
-#include <stdlib.h>   // rand
-#include <string>     // std::string
-#include <vector>     // std::vector
-
-#include <cublasLt.h>
-#include <cublas_v2.h>
-#include <cuda_runtime.h>
-
-#include "src/turbomind/kernels/sampling_topk_kernels.h"
-#include "src/turbomind/layers/DynamicDecodeLayer.h"
-#include "src/turbomind/layers/sampling_layers/TopKSamplingLayer.h"
-#include "src/turbomind/macro.h"
-#include "src/turbomind/utils/Tensor.h"
-#include "src/turbomind/utils/cublasMMWrapper.h"
-#include "src/turbomind/utils/cuda_utils.h"
-#include "src/turbomind/utils/memory_utils.h"
-
-#include "gtest_utils.h"
-
-using namespace turbomind;
-
-struct SamplingLayerTestParam {
-    size_t batch_size;
-    size_t vocab_size;
-    size_t beam_width;
-    size_t top_k;
-    float  top_p;
-    size_t output_len;
-
-    std::string toString()
-    {
-        return fmtstr("SamplingLayerTestParam[batch=%ld, vocab=%ld, beam=%ld, k=%ld, p=%3.1f, output_len=%ld]",
-                      batch_size,
-                      vocab_size,
-                      beam_width,
-                      top_k,
-                      top_p,
-                      output_len);
-    }
-};
-
-template<typename T>
-void computeProb(T* probs, T* logits, int batch_size, int vocab_size)
-{
-    // Compute the log probability from logits.
-    //   logits = batch_size x vocab_size vector.
-    //   logprobs = log(softmax(logits)) (softmax along with vocab dimension)
-    for (int bidx = 0; bidx < batch_size; ++bidx) {
-        float sum = 0.0f;
-        for (int i = 0; i < vocab_size; ++i) {
-            sum += expf((float)logits[bidx * vocab_size + i]);
-        }
-        for (int i = 0; i < vocab_size; ++i) {
-            int idx    = bidx * vocab_size + i;
-            probs[idx] = static_cast<T>(expf((float)logits[idx]) / (sum + EPSILON));
-        }
-    }
-}
-
-template<typename T>
-void computeLogProb(T* logprobs, T* logits, int batch_size, int vocab_size)
-{
-    // Compute the log probability from logits.
-    //   logits = batch_size x vocab_size vector.
-    //   logprobs = log(softmax(logits)) (softmax along with vocab dimension)
-    for (int bidx = 0; bidx < batch_size; ++bidx) {
-        float sum = 0.0f;
-        for (int i = 0; i < vocab_size; ++i) {
-            sum += expf(logits[bidx * vocab_size + i]);
-        }
-        for (int i = 0; i < vocab_size; ++i) {
-            int idx       = bidx * vocab_size + i;
-            logprobs[idx] = static_cast<T>(logf(expf(logits[idx]) / (sum + EPSILON) + EPSILON));
-        }
-    }
-}
-
-template<typename T>
-class SamplingDecodeTest: public testing::Test {
-protected:
-    unsigned long long              seed           = 0;
-    const static unsigned long long max_seed       = 30;
-    const size_t                    batch_size     = 6;
-    const size_t                    beam_width     = 1;
-    const size_t                    batchxbeam     = batch_size * beam_width;
-    const size_t                    vocab_size     = 8;
-    const size_t                    max_input_len  = 0;  // has no effect.
-    const size_t                    max_output_len = 3;
-    const size_t                    max_seq_len    = max_input_len + max_output_len;
-    const int                       end_id         = vocab_size - 1;
-    const DataType                  data_type      = getTensorType<T>();
-
-    // vocab size 8 & length 3
-    T* test_input_logits;
-
-    cudaStream_t                            stream;
-    ft::Allocator<ft::AllocatorType::CUDA>* allocator;
-    cublasHandle_t                          cublas_handle;
-    cublasLtHandle_t                        cublaslt_handle;
-    std::mutex*                             cublas_wrapper_mutex;
-    cublasMMWrapper*                        cublas_wrapper;
-    DynamicDecodeLayer<T>*                  dynamic_decode_layer;
-
-    int*   h_output_ids;
-    T*     h_logits;
-    T*     h_probs;
-    T*     h_log_probs;
-    float* h_cum_log_probs;
-    float* h_output_log_probs;
-
-    T*     d_logits;
-    int*   d_input_lengths;
-    float* d_cum_log_probs;
-    float* d_output_log_probs;
-    int*   d_output_ids;
-    int*   d_end_ids;
-
-    void setup(unsigned long long seed = 0)
-    {
-        this->seed = seed;
-
-        check_cuda_error(cudaStreamCreate(&stream));
-        allocator = new Allocator<AllocatorType::CUDA>(getDevice());
-        allocator->setStream(stream);
-
-        struct cudaDeviceProp prop;
-        check_cuda_error(cudaGetDeviceProperties(&prop, 0));
-        check_cuda_error(cublasCreate(&cublas_handle));
-        check_cuda_error(cublasLtCreate(&cublaslt_handle));
-        check_cuda_error(cublasSetStream(cublas_handle, stream));
-        cublasAlgoMap cublas_algo_map(GEMM_CONFIG);
-        cublas_wrapper_mutex = new std::mutex();
-
-        cublas_wrapper = new cublasMMWrapper(
-            cublas_handle, cublaslt_handle, stream, &cublas_algo_map, cublas_wrapper_mutex, allocator);
-
-        dynamic_decode_layer = new DynamicDecodeLayer<T>(vocab_size,
-                                                         vocab_size,
-                                                         end_id,
-                                                         stream,
-                                                         cublas_wrapper,
-                                                         allocator,
-                                                         false,   // is_free_buffer_after_forward
-                                                         &prop);  // cuda_device_prop
-
-        h_output_ids       = new int[batchxbeam];
-        h_logits           = new T[batchxbeam * vocab_size];
-        h_probs            = new T[batchxbeam * vocab_size];
-        h_log_probs        = new T[batchxbeam * vocab_size];
-        h_cum_log_probs    = new float[batchxbeam];
-        h_output_log_probs = new float[max_output_len * batchxbeam];
-
-        // prob = (0.4, 0.3, 0.2, 0.1, ...)
-        test_input_logits = new T[24]{
-            -0.9163,  -1.2040,  -1.6094,  -2.3026,  -FLT_MAX, -FLT_MAX, -FLT_MAX, -FLT_MAX,  // step 0
-            -FLT_MAX, -FLT_MAX, -FLT_MAX, -FLT_MAX, -0.9163,  -1.2040,  -1.6094,  -2.3026,   // step 1
-            -FLT_MAX, -FLT_MAX, -0.9163,  -1.2040,  -1.6094,  -2.3026,  -FLT_MAX, -FLT_MAX   // step 2
-        };
-
-        d_logits           = reinterpret_cast<T*>(allocator->malloc(sizeof(T) * batchxbeam * vocab_size, true));
-        d_input_lengths    = reinterpret_cast<int*>(allocator->malloc(sizeof(int) * batchxbeam));
-        d_cum_log_probs    = reinterpret_cast<float*>(allocator->malloc(sizeof(float) * batchxbeam));
-        d_output_log_probs = reinterpret_cast<float*>(allocator->malloc(sizeof(float) * max_output_len * batchxbeam));
-        d_output_ids       = reinterpret_cast<int*>(allocator->malloc(sizeof(int) * max_seq_len * batchxbeam));
-        d_end_ids          = reinterpret_cast<int*>(allocator->malloc(sizeof(int) * batchxbeam));
-
-        // Init by zero.
-        cudaMemset(d_cum_log_probs, 0, sizeof(float) * batchxbeam);
-        cudaMemset(d_output_log_probs, 0, sizeof(float) * max_output_len * batchxbeam);
-        cudaMemset(d_output_ids, 0, sizeof(int) * max_seq_len * batchxbeam);
-        deviceFill(d_end_ids, batchxbeam, end_id, stream);
-    }
-
-    void teardown()
-    {
-        delete[] test_input_logits;
-        delete[] h_output_ids;
-        delete[] h_logits;
-        delete[] h_probs;
-        delete[] h_log_probs;
-        delete[] h_cum_log_probs;
-        delete[] h_output_log_probs;
-        delete dynamic_decode_layer;
-        delete cublas_wrapper;
-        delete cublas_wrapper_mutex;
-        delete allocator;
-        check_cuda_error(cublasDestroy(cublas_handle));
-        check_cuda_error(cublasLtDestroy(cublaslt_handle));
-        check_cuda_error(cudaStreamDestroy(stream));
-    }
-
-    TensorMap* createInputTensors(
-        int* topk, size_t topk_size, float* topp, size_t topp_size, float* temperature, float* repetition_penalty)
-    {
-        // construct common input tensors
-        TensorMap* input_tensors = new TensorMap();
-        if (topk != nullptr) {
-            input_tensors->insert({"runtime_top_k", {MEMORY_CPU, TYPE_INT32, {topk_size}, topk}});
-        }
-        if (topp != nullptr) {
-            input_tensors->insert({"runtime_top_p", {MEMORY_CPU, TYPE_FP32, {topp_size}, topp}});
-        }
-        if (temperature != nullptr) {
-            input_tensors->insert({"temperature", Tensor{MEMORY_CPU, TYPE_FP32, {1}, temperature}});
-        }
-        if (repetition_penalty != nullptr) {
-            input_tensors->insert({"repetition_penalty", Tensor{MEMORY_CPU, TYPE_FP32, {1}, repetition_penalty}});
-        }
-        input_tensors->insert(
-            {"logits", Tensor{MEMORY_GPU, TYPE_FP32, {batch_size, beam_width, vocab_size}, d_logits}});
-        input_tensors->insert({"embedding_bias", Tensor{MEMORY_GPU, data_type, {vocab_size}, nullptr}});
-        input_tensors->insert({"max_input_length", Tensor{MEMORY_CPU, TYPE_INT32, {1}, &max_input_len}});
-        input_tensors->insert(
-            {"input_lengths", Tensor{MEMORY_GPU, TYPE_INT32, {batch_size, beam_width}, d_input_lengths}});
-        input_tensors->insert({"end_id", Tensor{MEMORY_CPU, TYPE_INT32, {batchxbeam}, &d_end_ids}});
-        input_tensors->insert({"random_seed", Tensor{MEMORY_CPU, TYPE_UINT64, {1}, &seed}});
-        return input_tensors;
-    }
-
-    TensorMap* createOutputTensors()
-    {
-        // construct common output tensors
-        TensorMap* output_tensors = new TensorMap();
-        output_tensors->insert(
-            {"output_ids", Tensor{MEMORY_GPU, TYPE_INT32, {max_seq_len, batch_size, beam_width}, d_output_ids}});
-        output_tensors->insert({"finished", Tensor{MEMORY_GPU, TYPE_BOOL, {batch_size * beam_width}, nullptr}});
-        output_tensors->insert(
-            {"cum_log_probs", Tensor{MEMORY_GPU, TYPE_FP32, {batch_size * beam_width}, d_cum_log_probs}});
-        output_tensors->insert(
-            {"output_log_probs",
-             Tensor{MEMORY_GPU, TYPE_FP32, {max_seq_len, batch_size, beam_width}, d_output_log_probs}});
-        output_tensors->insert({"sequence_length", Tensor{MEMORY_GPU, TYPE_INT32, {batch_size * beam_width}, nullptr}});
-        return output_tensors;
-    }
-
-    void batchH2Dcpy(T* dst, T* src, size_t m, size_t n)
-    {
-        for (size_t i = 0; i < m; ++i) {
-            cudaH2Dcpy(dst + i * n, src, n);
-        }
-    }
-
-    bool checkResult(int* d_output_ids, std::vector<std::set<int>>& expected_ids)
-    {
-        assert(expected_ids.size() == max_seq_len * batchxbeam);
-        int* h_output_ids = new int[max_seq_len * batchxbeam];
-        cudaD2Hcpy(h_output_ids, d_output_ids, max_seq_len * batchxbeam);
-        int failures = 0;
-        for (size_t i = 0; i < max_seq_len * batchxbeam; ++i) {
-            size_t        s     = i / batchxbeam;
-            size_t        b     = i % batchxbeam;
-            std::set<int> expts = expected_ids.at(i);
-            if (expts.count(h_output_ids[i]) == 0) {
-                if (failures < 10) {
-                    std::stringstream ss;
-                    ss << " - Fail "
-                       << " (step=" << s << ", batch=" << b << ") "
-                       << "actual=" << h_output_ids[i] << ", expected";
-                    for (auto& expt : expts) {
-                        ss << " " << expt;
-                    }
-                    TM_LOG_DEBUG("%s", ss.str().c_str());
-                }
-                ++failures;
-            }
-        }
-        TM_LOG_DEBUG(
-            "check...%6s : failures: %d / %d", failures == 0 ? "....OK" : "FAILED", failures, max_seq_len * batchxbeam);
-        delete[] h_output_ids;
-        return failures == 0;
-    }
-
-public:
-    void runTest(std::vector<std::set<int>> expected_output_ids,
-                 int*                       top_ks,
-                 size_t                     top_k_size,
-                 float*                     top_ps,
-                 size_t                     top_p_size,
-                 float*                     temperature,
-                 float*                     repetition_penalty,
-                 bool                       use_local_batch = false)
-    {
-        size_t local_batch_size = use_local_batch ? batch_size / 3 : batch_size;
-        uint   ite              = use_local_batch ? 1 : 0;
-        for (unsigned long long seed = 0; seed < max_seed; ++seed) {
-            this->setup(seed);
-            size_t     step = max_input_len;
-            TensorMap* input_tensors =
-                createInputTensors(top_ks, top_k_size, top_ps, top_p_size, temperature, repetition_penalty);
-            input_tensors->insert({"step", Tensor{MEMORY_CPU, TYPE_INT32, {1}, &step}});
-            input_tensors->insert({"ite", Tensor{MEMORY_CPU, TYPE_UINT32, {1}, &ite}});
-            input_tensors->insert({"local_batch_size", Tensor{MEMORY_CPU, TYPE_INT32, {1}, &local_batch_size}});
-            TensorMap* output_tensors = createOutputTensors();
-
-            dynamic_decode_layer->setup(batch_size, beam_width, input_tensors);
-            for (step = max_input_len; step < max_output_len; ++step) {
-                // Reset by the test value since the sampling layer internally update the logit buffer.
-                batchH2Dcpy(input_tensors->at("logits").getPtr<T>(),
-                            test_input_logits + step * vocab_size,
-                            batchxbeam,
-                            vocab_size);
-                dynamic_decode_layer->forward(output_tensors, input_tensors);
-            }
-            bool passed = checkResult(d_output_ids, expected_output_ids);
-            EXPECT_TRUE(passed) << "Failed at seed " << seed;
-#ifndef NDEBUG
-            if (!passed) {
-                TM_LOG_ERROR("actual output ids");
-                printMatrix(d_output_ids, max_seq_len, batch_size, batch_size, true);
-            }
-#endif
-            delete output_tensors;
-            delete input_tensors;
-            this->teardown();
-        }
-    }
-};
-
-TYPED_TEST_SUITE(SamplingDecodeTest, FloatAndHalfTypes);
-
-TYPED_TEST(SamplingDecodeTest, TopK)
-{
-    int                        top_k = 2;
-    std::vector<std::set<int>> expected_output_ids{
-        // batch
-        //  0       1       2       3       4       5
-        {0, 1},
-        {0, 1},
-        {0, 1},
-        {0, 1},
-        {0, 1},
-        {0, 1},  // step 0
-        {4, 5},
-        {4, 5},
-        {4, 5},
-        {4, 5},
-        {4, 5},
-        {4, 5},  // step 1
-        {2, 3},
-        {2, 3},
-        {2, 3},
-        {2, 3},
-        {2, 3},
-        {2, 3}  // step 2
-    };
-    this->runTest(expected_output_ids, &top_k, 1, nullptr, 0, nullptr, nullptr);
-}
-
-TYPED_TEST(SamplingDecodeTest, BatchTopK)
-{
-    size_t                     batch_size = this->batch_size;
-    int*                       top_ks     = new int[batch_size]{2, 1, 1, 2, 1, 1};
-    std::vector<std::set<int>> expected_output_ids{
-        // batch
-        //  0    1    2       3    4    5
-        {0, 1},
-        {0},
-        {0},
-        {0, 1},
-        {0},
-        {0},  // step 0
-        {4, 5},
-        {4},
-        {4},
-        {4, 5},
-        {4},
-        {4},  // step 1
-        {2, 3},
-        {2},
-        {2},
-        {2, 3},
-        {2},
-        {2}  // step 2
-    };
-    this->runTest(expected_output_ids, top_ks, batch_size, nullptr, 0, nullptr, nullptr);
-    delete[] top_ks;
-}
-
-TYPED_TEST(SamplingDecodeTest, TopP)
-{
-    float                      top_p = 0.3;
-    std::vector<std::set<int>> expected_output_ids{
-        // batch
-        {0},
-        {0},
-        {0},
-        {0},
-        {0},
-        {0},  // step 0
-        {4},
-        {4},
-        {4},
-        {4},
-        {4},
-        {4},  // step 1
-        {2},
-        {2},
-        {2},
-        {2},
-        {2},
-        {2}  // step 2
-    };
-    this->runTest(expected_output_ids, nullptr, 0, &top_p, 1, nullptr, nullptr);
-}
-
-TYPED_TEST(SamplingDecodeTest, BatchTopP)
-{
-    size_t                     batch_size = this->batch_size;
-    float*                     top_ps     = new float[batch_size]{0.3f, 0.5f, 0.5f, 0.3f, 0.5f, 0.5f};
-    std::vector<std::set<int>> expected_output_ids{
-        {0},
-        {0, 1},
-        {0, 1},
-        {0},
-        {0, 1},
-        {0, 1},  // step 0
-        {4},
-        {4, 5},
-        {4, 5},
-        {4},
-        {4, 5},
-        {4, 5},  // step 1
-        {2},
-        {2, 3},
-        {2, 3},
-        {2},
-        {2, 3},
-        {2, 3}  // step 2
-    };
-    this->runTest(expected_output_ids, nullptr, 0, top_ps, batch_size, nullptr, nullptr);
-    delete[] top_ps;
-}
-
-TYPED_TEST(SamplingDecodeTest, TopKTopP)
-{
-    int                        top_k = 2;
-    float                      top_p = 0.3;
-    std::vector<std::set<int>> expected_output_ids{
-        // batch
-        {0},
-        {0},
-        {0},
-        {0},
-        {0},
-        {0},  // step 0
-        {4},
-        {4},
-        {4},
-        {4},
-        {4},
-        {4},  // step 1
-        {2},
-        {2},
-        {2},
-        {2},
-        {2},
-        {2}  // step 2
-    };
-    this->runTest(expected_output_ids, &top_k, 1, &top_p, 1, nullptr, nullptr);
-}
-
-TYPED_TEST(SamplingDecodeTest, BatchTopKTopP)
-{
-    size_t                     batch_size = this->batch_size;
-    int*                       top_ks     = new int[batch_size]{2, 2, 1, 2, 2, 1};
-    float                      top_p      = 0.3;
-    std::vector<std::set<int>> expected_output_ids{
-        // batch
-        {0},
-        {0},
-        {0},
-        {0},
-        {0},
-        {0},  // step 0
-        {4},
-        {4},
-        {4},
-        {4},
-        {4},
-        {4},  // step 1
-        {2},
-        {2},
-        {2},
-        {2},
-        {2},
-        {2}  // step 2
-    };
-    this->runTest(expected_output_ids, top_ks, batch_size, &top_p, 1, nullptr, nullptr);
-    delete[] top_ks;
-}
-
-TYPED_TEST(SamplingDecodeTest, TopKBatchTopP)
-{
-    size_t                     batch_size = this->batch_size;
-    int                        top_k      = 2;
-    float*                     top_ps     = new float[batch_size]{0.5, 0.3, 0.5, 0.5, 0.3, 0.5};
-    std::vector<std::set<int>> expected_output_ids{
-        // batch
-        {0, 1},
-        {0},
-        {0, 1},
-        {0, 1},
-        {0},
-        {0, 1},  // step 0
-        {4, 5},
-        {4},
-        {4, 5},
-        {4, 5},
-        {4},
-        {4, 5},  // step 1
-        {2, 3},
-        {2},
-        {2, 3},
-        {2, 3},
-        {2},
-        {2, 3}  // step 2
-    };
-    this->runTest(expected_output_ids, &top_k, 1, top_ps, batch_size, nullptr, nullptr);
-    delete[] top_ps;
-}
-
-TYPED_TEST(SamplingDecodeTest, BatchTopKBatchTopP)
-{
-    size_t                     batch_size = this->batch_size;
-    int*                       top_ks     = new int[batch_size]{2, 2, 0, 2, 2, 0};
-    float*                     top_ps     = new float[batch_size]{0.0, 0.3, 0.5, 0.0, 0.3, 0.5};
-    std::vector<std::set<int>> expected_output_ids{
-        // batch
-        {0, 1},
-        {0},
-        {0, 1},
-        {0, 1},
-        {0},
-        {0, 1},  // step 0
-        {4, 5},
-        {4},
-        {4, 5},
-        {4, 5},
-        {4},
-        {4, 5},  // step 1
-        {2, 3},
-        {2},
-        {2, 3},
-        {2, 3},
-        {2},
-        {2, 3}  // step 2
-    };
-    this->runTest(expected_output_ids, top_ks, batch_size, top_ps, batch_size, nullptr, nullptr);
-    delete[] top_ks;
-    delete[] top_ps;
-}
-
-TYPED_TEST(SamplingDecodeTest, InvalidArgsZeroTopK)
-{
-    size_t                     batch_size = this->batch_size;
-    int                        top_k      = 0;
-    std::vector<std::set<int>> expected_output_ids{
-        // batch
-        {0},
-        {0},
-        {0},
-        {0},
-        {0},
-        {0},  // step 0
-        {4},
-        {4},
-        {4},
-        {4},
-        {4},
-        {4},  // step 1
-        {2},
-        {2},
-        {2},
-        {2},
-        {2},
-        {2}  // step 2
-    };
-    this->runTest(expected_output_ids, &top_k, 1, nullptr, 0, nullptr, nullptr);
-}
-
-TYPED_TEST(SamplingDecodeTest, InvalidArgsZeroTopP)
-{
-    size_t                     batch_size = this->batch_size;
-    float                      top_p      = 0;
-    std::vector<std::set<int>> expected_output_ids{
-        // batch
-        {0},
-        {0},
-        {0},
-        {0},
-        {0},
-        {0},  // step 0
-        {4},
-        {4},
-        {4},
-        {4},
-        {4},
-        {4},  // step 1
-        {2},
-        {2},
-        {2},
-        {2},
-        {2},
-        {2}  // step 2
-    };
-    this->runTest(expected_output_ids, nullptr, 0, &top_p, 1, nullptr, nullptr);
-}
-
-TYPED_TEST(SamplingDecodeTest, InvalidArgsZeroTopKTopP)
-{
-    size_t                     batch_size = this->batch_size;
-    int                        top_k      = 0;
-    float                      top_p      = 0;
-    std::vector<std::set<int>> expected_output_ids{
-        // batch
-        {0},
-        {0},
-        {0},
-        {0},
-        {0},
-        {0},  // step 0
-        {4},
-        {4},
-        {4},
-        {4},
-        {4},
-        {4},  // step 1
-        {2},
-        {2},
-        {2},
-        {2},
-        {2},
-        {2}  // step 2
-    };
-    this->runTest(expected_output_ids, &top_k, 1, &top_p, 1, nullptr, nullptr);
-}
-
-TYPED_TEST(SamplingDecodeTest, InvalidArgsZeroBatchTopKTopP)
-{
-    size_t                     batch_size = this->batch_size;
-    int*                       top_ks     = new int[batch_size]{0, 0, 0, 0, 0, 0};
-    float                      top_p      = 0;
-    std::vector<std::set<int>> expected_output_ids{
-        // batch
-        {0},
-        {0},
-        {0},
-        {0},
-        {0},
-        {0},  // step 0
-        {4},
-        {4},
-        {4},
-        {4},
-        {4},
-        {4},  // step 1
-        {2},
-        {2},
-        {2},
-        {2},
-        {2},
-        {2}  // step 2
-    };
-    this->runTest(expected_output_ids, top_ks, batch_size, &top_p, 1, nullptr, nullptr);
-    delete[] top_ks;
-}
-
-TYPED_TEST(SamplingDecodeTest, InvalidArgsZeroTopKBatchTopP)
-{
-    size_t                     batch_size = this->batch_size;
-    int                        top_k      = 0;
-    float*                     top_ps     = new float[batch_size]{0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f};
-    std::vector<std::set<int>> expected_output_ids{
-        // batch
-        {0},
-        {0},
-        {0},
-        {0},
-        {0},
-        {0},  // step 0
-        {4},
-        {4},
-        {4},
-        {4},
-        {4},
-        {4},  // step 1
-        {2},
-        {2},
-        {2},
-        {2},
-        {2},
-        {2}  // step 2
-    };
-    this->runTest(expected_output_ids, &top_k, 1, top_ps, batch_size, nullptr, nullptr);
-    delete[] top_ps;
-}
-
-TYPED_TEST(SamplingDecodeTest, InvalidArgsBatchTopKContainZero)
-{
-    size_t                     batch_size = this->batch_size;
-    int*                       top_ks     = new int[batch_size]{2, 1, 0, 0, 2, 1};
-    std::vector<std::set<int>> expected_output_ids{
-        // batch
-        {0, 1},
-        {0},
-        {0},
-        {0},
-        {0, 1},
-        {0},  // step 0
-        {4, 5},
-        {4},
-        {4},
-        {4},
-        {4, 5},
-        {4},  // step 1
-        {2, 3},
-        {2},
-        {2},
-        {2},
-        {2, 3},
-        {2}  // step 2
-    };
-    this->runTest(expected_output_ids, top_ks, batch_size, nullptr, 0, nullptr, nullptr);
-    delete[] top_ks;
-}
-
-TYPED_TEST(SamplingDecodeTest, InvalidArgsBatchTopPContainZero)
-{
-    size_t                     batch_size = this->batch_size;
-    float*                     top_ps     = new float[batch_size]{0.5f, 0.5f, 0.0f, 0.5f, 0.0f, 0.3f};
-    std::vector<std::set<int>> expected_output_ids{
-        // batch
-        {0, 1},
-        {0, 1},
-        {0},
-        {0, 1},
-        {0},
-        {0},  // step 0
-        {4, 5},
-        {4, 5},
-        {4},
-        {4, 5},
-        {4},
-        {4},  // step 1
-        {2, 3},
-        {2, 3},
-        {2},
-        {2, 3},
-        {2},
-        {2}  // step 2
-    };
-    this->runTest(expected_output_ids, nullptr, 0, top_ps, batch_size, nullptr, nullptr);
-    delete[] top_ps;
-}
-
-TYPED_TEST(SamplingDecodeTest, InvalidArgsBatchTopKTopPContainZero)
-{
-    size_t                     batch_size = this->batch_size;
-    int*                       top_ks     = new int[batch_size]{2, 2, 1, 0, 2, 0};
-    float                      top_p      = 0.0;
-    std::vector<std::set<int>> expected_output_ids{
-        // batch
-        {0, 1},
-        {0, 1},
-        {0},
-        {0},
-        {0, 1},
-        {0},  // step 0
-        {4, 5},
-        {4, 5},
-        {4},
-        {4},
-        {4, 5},
-        {4},  // step 1
-        {2, 3},
-        {2, 3},
-        {2},
-        {2},
-        {2, 3},
-        {2}  // step 2
-    };
-    this->runTest(expected_output_ids, top_ks, batch_size, &top_p, 1, nullptr, nullptr);
-    delete[] top_ks;
-}
-
-TYPED_TEST(SamplingDecodeTest, InvalidArgsTopKBatchTopPContainZero)
-{
-    size_t                     batch_size = this->batch_size;
-    int                        top_k      = 0;
-    float*                     top_ps     = new float[batch_size]{0.0, 0.3, 0.5, 0.0, 0.3, 0.5};
-    std::vector<std::set<int>> expected_output_ids{
-        // batch
-        {0},
-        {0},
-        {0, 1},
-        {0},
-        {0},
-        {0, 1},  // step 0
-        {4},
-        {4},
-        {4, 5},
-        {4},
-        {4},
-        {4, 5},  // step 1
-        {2},
-        {2},
-        {2, 3},
-        {2},
-        {2},
-        {2, 3}  // step 2
-    };
-    this->runTest(expected_output_ids, &top_k, 1, top_ps, batch_size, nullptr, nullptr);
-    delete[] top_ps;
-}
-
-TYPED_TEST(SamplingDecodeTest, InvalidArgsBatchTopKBatchTopPContainZero)
-{
-    size_t                     batch_size = this->batch_size;
-    int*                       top_ks     = new int[batch_size]{0, 2, 1, 2, 2, 0};
-    float*                     top_ps     = new float[batch_size]{0.0, 0.3, 0.9, 0.0, 0.3, 0.5};
-    std::vector<std::set<int>> expected_output_ids{
-        // batch
-        {0},
-        {0},
-        {0},
-        {0, 1},
-        {0},
-        {0, 1},  // step 0
-        {4},
-        {4},
-        {4},
-        {4, 5},
-        {4},
-        {4, 5},  // step 1
-        {2},
-        {2},
-        {2},
-        {2, 3},
-        {2},
-        {2, 3}  // step 2
-    };
-    this->runTest(expected_output_ids, top_ks, batch_size, top_ps, batch_size, nullptr, nullptr);
-    delete[] top_ks;
-    delete[] top_ps;
-}
-
-TYPED_TEST(SamplingDecodeTest, LocalBatchBatchTopP)
-{
-    size_t                     batch_size = this->batch_size;
-    float*                     top_ps     = new float[batch_size]{0.3f, 0.5f, 0.5f, 0.3f, 0.5f, 0.5f};
-    std::vector<std::set<int>> expected_output_ids{
-        {0},
-        {0},
-        {0, 1},
-        {0},
-        {0},
-        {0},  // step 0
-        {0},
-        {0},
-        {4, 5},
-        {4},
-        {0},
-        {0},  // step 1
-        {0},
-        {0},
-        {2, 3},
-        {2},
-        {0},
-        {0}  // step 2
-    };
-    this->runTest(expected_output_ids, nullptr, 0, top_ps, batch_size, nullptr, nullptr, true);
-    delete[] top_ps;
-}
-
-TYPED_TEST(SamplingDecodeTest, LocalBatchBatchTopKBatchTopP)
-{
-    size_t                     batch_size = this->batch_size;
-    int*                       top_ks     = new int[batch_size]{2, 2, 0, 2, 2, 0};
-    float*                     top_ps     = new float[batch_size]{0.0, 0.3, 0.5, 0.0, 0.3, 0.5};
-    std::vector<std::set<int>> expected_output_ids{
-        // batch
-        {0},
-        {0},
-        {0, 1},
-        {0, 1},
-        {0},
-        {0},  // step 0
-        {0},
-        {0},
-        {4, 5},
-        {4, 5},
-        {0},
-        {0},  // step 1
-        {0},
-        {0},
-        {2, 3},
-        {2, 3},
-        {0},
-        {0}  // step 2
-    };
-    this->runTest(expected_output_ids, top_ks, batch_size, top_ps, batch_size, nullptr, nullptr, true);
-    delete[] top_ks;
-    delete[] top_ps;
-}
-
-template<typename T>
-class SamplingDecodeTest2: public FtTestBase {
-
-public:
-    void SetUp() override
-    {
-        FtTestBase::SetUp();
-        check_cuda_error(cudaGetDeviceProperties(&prop, 0));
-        check_cuda_error(cublasCreate(&cublas_handle));
-        check_cuda_error(cublasLtCreate(&cublaslt_handle));
-        check_cuda_error(cublasSetStream(cublas_handle, stream));
-        cublas_algo_map      = new cublasAlgoMap("");
-        cublas_wrapper_mutex = new std::mutex();
-        cublas_wrapper       = new cublasMMWrapper(
-            cublas_handle, cublaslt_handle, stream, cublas_algo_map, cublas_wrapper_mutex, allocator);
-    }
-    void TearDown() override
-    {
-        delete cublas_wrapper;
-        delete cublas_wrapper_mutex;
-        delete cublas_algo_map;
-        check_cuda_error(cublasLtDestroy(cublaslt_handle));
-        check_cuda_error(cublasDestroy(cublas_handle));
-        FtTestBase::TearDown();
-    }
-
-protected:
-    using FtTestBase::stream;
-    using FtTestBase::allocator;
-
-    struct cudaDeviceProp prop;
-    cublasHandle_t        cublas_handle;
-    cublasLtHandle_t      cublaslt_handle;
-    cublasAlgoMap*        cublas_algo_map;
-    std::mutex*           cublas_wrapper_mutex;
-    cublasMMWrapper*      cublas_wrapper;
-
-    DataType data_type = getTensorType<T>();
-
-    size_t batch_size;
-    size_t beam_width;
-    size_t batchxbeam;
-    size_t vocab_size;
-    size_t max_input_len;
-    size_t max_output_len;
-    size_t max_seq_len;
-
-    uint  top_k;
-    float top_p;
-    float temperature;
-    float repetition_penalty;
-    int   end_id;
-
-    T*     h_logits;
-    T*     h_probs;
-    T*     h_log_probs;
-    float* h_cum_log_probs;
-    float* h_output_log_probs;
-    int*   h_output_ids;
-
-    T*     d_logits;
-    int*   d_input_lengths;
-    float* d_cum_log_probs;
-    float* d_output_log_probs;
-    int*   d_output_ids;
-    int*   d_end_ids;
-
-    void setup(SamplingLayerTestParam param)
-    {
-        batch_size     = param.batch_size;
-        beam_width     = param.beam_width;
-        batchxbeam     = batch_size * param.beam_width;
-        vocab_size     = param.vocab_size;
-        max_input_len  = 0;
-        max_output_len = param.output_len;
-        max_seq_len    = max_input_len + max_output_len;
-
-        top_k = param.top_k;
-        top_p = param.top_p;
-        // use default values having no effect.
-        temperature        = 1.0f;
-        repetition_penalty = 1.0f;
-        end_id             = 0;
-
-        h_logits     = new T[batchxbeam * vocab_size];
-        h_output_ids = new int[batchxbeam];
-
-        d_logits        = reinterpret_cast<T*>(allocator->malloc(sizeof(T) * batchxbeam * vocab_size));
-        d_input_lengths = reinterpret_cast<int*>(allocator->malloc(sizeof(int) * batchxbeam));
-        d_output_ids    = reinterpret_cast<int*>(allocator->malloc(sizeof(int) * max_seq_len * batchxbeam));
-        d_end_ids       = reinterpret_cast<int*>(allocator->malloc(sizeof(int) * batch_size));
-
-        // Init by zero.
-        deviceFill(d_input_lengths, batchxbeam, 0, stream);
-        deviceFill(d_output_ids, max_seq_len * batchxbeam, 0, stream);
-        deviceFill(d_end_ids, batch_size, end_id);
-    }
-
-    void teardown()
-    {
-        delete[] h_logits;
-        delete[] h_output_ids;
-    }
-
-    void runCurandTest(SamplingLayerTestParam param, bool use_local_batch, bool use_single_random_seed)
-    {
-        setup(param);
-        const DataType data_type = getTensorType<T>();
-
-        const size_t local_batch_size = use_local_batch ? 3 : batch_size;
-        assert(batch_size % local_batch_size == 0);
-
-        DynamicDecodeLayer<T>* dynamic_decode_layer = new DynamicDecodeLayer<T>(vocab_size,
-                                                                                vocab_size,
-                                                                                end_id,
-                                                                                stream,
-                                                                                cublas_wrapper,
-                                                                                allocator,
-                                                                                false,   // is_free_buffer_after_forward
-                                                                                &prop);  // cuda_device_prop
-
-        // Prepare decoding arguments
-        const size_t        random_seed_size = use_single_random_seed ? 1 : batch_size;
-        const size_t        period_size      = 3;
-        unsigned long long* random_seed      = new unsigned long long[random_seed_size];
-        for (size_t i = 0; i < random_seed_size; ++i) {
-            random_seed[i] = i / period_size;
-        }
-
-        TensorMap runtime_args;
-        runtime_args.insert({"random_seed", Tensor(MEMORY_CPU, TYPE_UINT64, {random_seed_size}, random_seed)});
-        runtime_args.insert({"runtime_top_k", Tensor(MEMORY_CPU, TYPE_UINT32, {1}, &top_k)});
-        runtime_args.insert({"runtime_top_p", Tensor(MEMORY_CPU, TYPE_FP32, {1}, &top_p)});
-        dynamic_decode_layer->setup(batch_size, beam_width, &runtime_args);
-
-        for (size_t step = max_input_len; step < max_output_len; ++step) {
-            const size_t iteration_num = batch_size / local_batch_size;
-            initRandom(h_logits, beam_width * vocab_size, -3.0f, 3.0f);
-            tile(h_logits, batch_size, beam_width * vocab_size);
-            cudaH2Dcpy(d_logits, h_logits, batchxbeam * vocab_size);
-
-            for (uint ite = 0; ite < iteration_num; ++ite) {
-                TensorMap dynamic_decode_input_tensors(
-                    {{"logits", Tensor{MEMORY_GPU, data_type, {batch_size, beam_width, vocab_size}, d_logits}},
-                     {"embedding_bias", Tensor{MEMORY_GPU, data_type, {vocab_size}, nullptr}},
-                     {"step", Tensor{MEMORY_CPU, TYPE_INT32, {1}, &step}},
-                     {"max_input_length", Tensor{MEMORY_CPU, TYPE_INT32, {1}, &max_input_len}},
-                     {"input_lengths", Tensor{MEMORY_GPU, TYPE_INT32, {batch_size, beam_width}, d_input_lengths}},
-                     {"ite", Tensor{MEMORY_CPU, TYPE_UINT32, {1}, &ite}},
-                     {"local_batch_size", Tensor{MEMORY_CPU, TYPE_INT32, {1}, &local_batch_size}},
-                     {"end_id", Tensor{MEMORY_GPU, TYPE_INT32, {batch_size}, d_end_ids}},
-                     {"random_seed", {MEMORY_CPU, TYPE_UINT64, {random_seed_size}, random_seed}},
-                     {"runtime_top_k", {MEMORY_CPU, TYPE_UINT32, {1}, &top_k}},
-                     {"runtime_top_p", {MEMORY_CPU, TYPE_FP32, {1}, &top_p}}});
-
-                // common outputs
-                TensorMap dynamic_decode_output_tensors(
-                    {{"output_ids",
-                      Tensor{MEMORY_GPU, TYPE_INT32, {max_seq_len, batch_size, beam_width}, d_output_ids}},
-                     {"finished", Tensor{MEMORY_GPU, TYPE_BOOL, {batch_size * beam_width}, nullptr}},
-                     {"sequence_length", Tensor{MEMORY_GPU, TYPE_INT32, {batch_size * beam_width}, nullptr}}});
-
-                dynamic_decode_layer->forward(&dynamic_decode_output_tensors, &dynamic_decode_input_tensors);
-                sync_check_cuda_error();
-
-                // check results.
-                cudaD2Hcpy(h_output_ids,
-                           dynamic_decode_output_tensors.at("output_ids").getPtrWithOffset<int>(step * batchxbeam),
-                           batchxbeam);
-            }
-            // The same seed produces the same random number.
-            for (size_t i = 0; i + period_size - 1 < batchxbeam; i += period_size) {
-                for (size_t j = 1; j < period_size; ++j) {
-                    EXPECT_TRUE(h_output_ids[i] == h_output_ids[i + j])
-                        << fmtstr("Fail at step %u val[%d]=%d <> val[%d]=%d",
-                                  step,
-                                  i,
-                                  h_output_ids[i],
-                                  i + j,
-                                  h_output_ids[i + j]);
-                }
-            }
-        }
-        delete dynamic_decode_layer;
-        delete[] random_seed;
-        teardown();
-    }
-
-    void runCumLogProbTest(SamplingLayerTestParam param)
-    {
-        setup(param);
-        unsigned long long     seed                 = 43;
-        const DataType         data_type            = getTensorType<T>();
-        DynamicDecodeLayer<T>* dynamic_decode_layer = new DynamicDecodeLayer<T>(vocab_size,
-                                                                                vocab_size,
-                                                                                end_id,
-                                                                                stream,
-                                                                                cublas_wrapper,
-                                                                                allocator,
-                                                                                false,   // is_free_buffer_after_forward
-                                                                                &prop);  // cuda_device_prop
-
-        // Logit values in the host of shape ((batch_size x beam) x vocab_size) where beam = 1.
-        // T* h_logits = new T[batch_size * beam_width * vocab_size];
-        T*     h_probs                = new T[batch_size * beam_width * vocab_size];
-        T*     h_log_probs            = new T[batch_size * beam_width * vocab_size];
-        float* h_cum_log_probs        = new float[batch_size * beam_width];
-        float* h_output_log_probs     = new float[max_output_len * batch_size * beam_width];
-        float* expected_cum_log_probs = new float[batch_size * beam_width];
-        initRandom(h_logits, batch_size * beam_width * vocab_size, -3.0f, 3.0f);
-        computeProb(h_probs, h_logits, batch_size * beam_width, vocab_size);
-        computeLogProb(h_log_probs, h_logits, batch_size * beam_width, vocab_size);
-        std::fill_n(expected_cum_log_probs, batch_size * beam_width, 0);
-
-        int* tiled_input_lengths_buf = reinterpret_cast<int*>(allocator->malloc(sizeof(int) * batch_size * beam_width));
-        float* cum_log_probs = reinterpret_cast<float*>(allocator->malloc(sizeof(float) * batch_size * beam_width));
-        float* output_log_probs =
-            reinterpret_cast<float*>(allocator->malloc(sizeof(float) * max_output_len * batch_size * beam_width));
-
-        int* output_ids =
-            reinterpret_cast<int*>(allocator->malloc(sizeof(int) * max_seq_len * batch_size * beam_width));
-        int* h_output_ids = new int[batch_size * beam_width];
-
-        int* end_ids = reinterpret_cast<int*>(allocator->malloc(sizeof(int) * batch_size));
-        deviceFill(end_ids, batch_size, end_id);
-
-        // Init by zero.
-        cudaMemset(cum_log_probs, 0, sizeof(float) * batch_size * beam_width);
-        cudaMemset(output_log_probs, 0, sizeof(float) * max_output_len * batch_size * beam_width);
-        cudaMemset(output_ids, 0, sizeof(int) * max_seq_len * batch_size * beam_width);
-
-        TensorMap input_tensors({{"random_seed", {MEMORY_CPU, TYPE_INT32, {1}, &seed}},
-                                 {"runtime_top_k", {MEMORY_CPU, TYPE_UINT32, {1}, &top_k}},
-                                 {"runtime_top_p", {MEMORY_CPU, TYPE_FP32, {1}, &top_p}},
-                                 {"temperature", Tensor{MEMORY_CPU, TYPE_FP32, {1}, &temperature}},
-                                 {"repetition_penalty", Tensor{MEMORY_CPU, TYPE_FP32, {1}, &repetition_penalty}}});
-        dynamic_decode_layer->setup(batch_size, beam_width, &input_tensors);
-
-        for (size_t step = max_input_len; step < max_output_len; ++step) {
-            uint ite = 0;
-            // Reset by the test value since the sampling layer internally update the logit buffer (making it log-prob).
-            cudaH2Dcpy(d_logits, h_logits, batch_size * beam_width * vocab_size);
-            TensorMap dynamic_decode_input_tensors(
-                {{"logits", Tensor{MEMORY_GPU, TYPE_FP32, {batch_size, beam_width, vocab_size}, d_logits}},
-                 {"embedding_bias", Tensor{MEMORY_GPU, data_type, {vocab_size}, nullptr}},
-                 {"step", Tensor{MEMORY_CPU, TYPE_INT32, {1}, &step}},
-                 {"max_input_length", Tensor{MEMORY_CPU, TYPE_INT32, {1}, &max_input_len}},
-                 {"input_lengths", Tensor{MEMORY_GPU, TYPE_INT32, {batch_size, beam_width}, tiled_input_lengths_buf}},
-                 {"ite", Tensor{MEMORY_CPU, TYPE_UINT32, {1}, &ite}},
-                 {"local_batch_size", Tensor{MEMORY_CPU, TYPE_INT32, {1}, &batch_size}},
-                 {"end_id", Tensor{MEMORY_GPU, TYPE_INT32, {batch_size}, end_ids}},
-                 {"random_seed", {MEMORY_CPU, TYPE_UINT64, {1}, &seed}},
-                 {"runtime_top_k", {MEMORY_CPU, TYPE_UINT32, {1}, &top_k}},
-                 {"runtime_top_p", {MEMORY_CPU, TYPE_FP32, {1}, &top_p}},
-                 {"temperature", Tensor{MEMORY_CPU, TYPE_FP32, {1}, &temperature}},
-                 {"repetition_penalty", Tensor{MEMORY_CPU, TYPE_FP32, {1}, &repetition_penalty}}});
-
-            // common outputs
-            TensorMap dynamic_decode_output_tensors(
-                {{"output_ids", Tensor{MEMORY_GPU, TYPE_INT32, {max_seq_len, batch_size, beam_width}, output_ids}},
-                 {"finished", Tensor{MEMORY_GPU, TYPE_BOOL, {batch_size * beam_width}, nullptr}},
-                 {"cum_log_probs", Tensor{MEMORY_GPU, TYPE_FP32, {batch_size * beam_width}, cum_log_probs}},
-                 {"output_log_probs",
-                  Tensor{MEMORY_GPU, TYPE_FP32, {max_seq_len, batch_size, beam_width}, output_log_probs}},
-                 {"sequence_length", Tensor{MEMORY_GPU, TYPE_INT32, {batch_size * beam_width}, nullptr}}});
-
-            dynamic_decode_layer->forward(&dynamic_decode_output_tensors, &dynamic_decode_input_tensors);
-
-            TM_LOG_DEBUG("Step %2d generated ids", step);
-            cudaD2Hcpy(
-                h_output_ids,
-                dynamic_decode_output_tensors.at("output_ids").getPtrWithOffset<int>(step * (batch_size * beam_width)),
-                batch_size * beam_width);
-            cudaD2Hcpy(h_cum_log_probs, cum_log_probs, batch_size * beam_width);
-            cudaD2Hcpy(h_output_log_probs, output_log_probs, max_output_len * batch_size * beam_width);
-            for (size_t i = 0; i < batch_size * beam_width; ++i) {
-                int idx = i * vocab_size + h_output_ids[i];
-                expected_cum_log_probs[i] += (float)h_log_probs[idx];
-                TM_LOG_DEBUG("| step %2d batch %2d idx %7d id %6d | log-prob %9.4f (expt: %9.4f) "
-                             "| cum-log-prob %9.4f (expt: %9.4f) | prob %9.4e",
-                             (int)step,
-                             (int)i,
-                             (int)idx,
-                             (int)h_output_ids[i],
-                             h_output_log_probs[step * batch_size * beam_width + i],
-                             (float)h_log_probs[idx],
-                             h_cum_log_probs[i],
-                             expected_cum_log_probs[i],
-                             (float)h_probs[idx]);
-            }
-            TM_LOG_DEBUG("");
-        }
-
-        bool passed = checkResult(param.toString(), cum_log_probs, expected_cum_log_probs, batch_size * beam_width);
-        EXPECT_TRUE(passed);
-
-        delete[] expected_cum_log_probs;
-        delete[] h_output_log_probs;
-        delete[] h_cum_log_probs;
-        delete[] h_log_probs;
-        delete[] h_probs;
-
-        delete dynamic_decode_layer;
-    }
-};
-
-TYPED_TEST_SUITE(SamplingDecodeTest2, FloatAndHalfTypes);
-
-TYPED_TEST(SamplingDecodeTest2, CorrectnessSingleRandTopK)
-{
-    // test TopKSampling
-    this->runCurandTest({113, 1201, 1, 3, 1.0f, 5}, false, true);
-}
-
-TYPED_TEST(SamplingDecodeTest2, CorrectnessSingleRandTopP)
-{
-    this->runCurandTest({113, 1201, 1, 0, 1.0f, 5}, false, true);
-}
-
-TYPED_TEST(SamplingDecodeTest2, CorrectnessBatchRandTopK)
-{
-    // test TopKSampling
-    this->runCurandTest({113, 1201, 1, 3, 1.0f, 5}, false, false);
-}
-
-TYPED_TEST(SamplingDecodeTest2, CorrectnessBatchRandTopP)
-{
-    this->runCurandTest({113, 1201, 1, 0, 1.0f, 5}, false, false);
-}
-
-TYPED_TEST(SamplingDecodeTest2, CorrectnessBatchRandTopKLocalBatch)
-{
-    // test TopKSampling
-    this->runCurandTest({99, 1201, 1, 3, 1.0f, 5}, true, false);
-}
-
-TYPED_TEST(SamplingDecodeTest2, CorrectnessBatchRandTopPLocalBatch)
-{
-    this->runCurandTest({99, 1201, 1, 0, 1.0f, 5}, true, false);
-}
-
-TYPED_TEST(SamplingDecodeTest2, CorrectnessCumLogProbTopK)
-{
-    this->runCumLogProbTest({99, 1201, 1, 5, 1.0f, 5});
-}
-
-TYPED_TEST(SamplingDecodeTest2, CorrectnessCumLogProbTopP)
-{
-    this->runCumLogProbTest({99, 1201, 1, 0, 1.0f, 5});
-}
diff --git a/tests/csrc/unittests/test_tensor.cu b/tests/csrc/unittests/test_tensor.cu
deleted file mode 100644
index 4211ed3409b0558ce7c31d6985326586606e933e..0000000000000000000000000000000000000000
--- a/tests/csrc/unittests/test_tensor.cu
+++ /dev/null
@@ -1,256 +0,0 @@
-#include <iostream>
-#include <unordered_map>
-#include <vector>
-
-#include <gtest/gtest.h>
-
-#include "src/turbomind/utils/Tensor.h"
-
-using namespace turbomind;
-
-namespace {
-
-#define EXPECT_EQUAL_TENSORS(t1, t2)                                                                                   \
-    do {                                                                                                               \
-        EXPECT_TRUE(t1.where == t2.where);                                                                             \
-        EXPECT_TRUE(t1.type == t2.type);                                                                               \
-        EXPECT_TRUE(t1.shape == t2.shape);                                                                             \
-        EXPECT_TRUE(t1.data == t2.data);                                                                               \
-    } while (false)
-
-TEST(TensorMapTest, HasKeyCorrectness)
-{
-    bool*  v1 = new bool(true);
-    float* v2 = new float[6]{1.0f, 1.1f, 1.2f, 1.3f, 1.4f, 1.5f};
-    Tensor t1 = Tensor{MEMORY_CPU, TYPE_BOOL, {1}, v1};
-    Tensor t2 = Tensor{MEMORY_CPU, TYPE_FP32, {3, 2}, v2};
-
-    TensorMap map({{"t1", t1}, {"t2", t2}});
-    EXPECT_TRUE(map.isExist("t1"));
-    EXPECT_TRUE(map.isExist("t2"));
-    EXPECT_FALSE(map.isExist("t3"));
-
-    delete v1;
-    delete[] v2;
-}
-
-TEST(TensorMapTest, InsertCorrectness)
-{
-    int*   v1 = new int[4]{1, 10, 20, 30};
-    float* v2 = new float[2]{1.0f, 2.0f};
-    Tensor t1 = Tensor(MEMORY_CPU, TYPE_INT32, {4}, v1);
-    Tensor t2 = Tensor(MEMORY_CPU, TYPE_INT32, {2}, v2);
-
-    TensorMap map({{"t1", t1}});
-    EXPECT_TRUE(map.size() == 1);
-    EXPECT_TRUE(map.isExist("t1"));
-    EXPECT_EQUAL_TENSORS(map.at("t1"), t1);
-    EXPECT_FALSE(map.isExist("t2"));
-}
-
-TEST(TensorMapTest, InsertDoesNotAllowNoneTensor)
-{
-    TensorMap map;
-    EXPECT_TRUE(map.size() == 0);
-    // forbid a none tensor.
-    EXPECT_THROW(map.insert("none", {}), std::runtime_error);
-
-    // forbid a tensor having null data pointer.
-    Tensor none_data_tensor = Tensor(MEMORY_CPU, TYPE_INT32, {}, nullptr);
-    EXPECT_THROW(map.insert("empty", none_data_tensor), std::runtime_error);
-}
-
-TEST(TensorMapTest, InsertDoesNotAllowDuplicatedKey)
-{
-    int*      v1 = new int[4]{1, 10, 20, 30};
-    Tensor    t1 = Tensor(MEMORY_CPU, TYPE_INT32, {4}, v1);
-    Tensor    t2 = Tensor(MEMORY_CPU, TYPE_INT32, {2}, v1);
-    TensorMap map({{"t1", t1}});
-    EXPECT_TRUE(map.size() == 1);
-    // forbid a duplicated key.
-    EXPECT_THROW(map.insert("t1", t2), std::runtime_error);
-    delete[] v1;
-}
-
-TEST(TensorMapTest, GetValCorrectness)
-{
-    int*   v1 = new int[4]{1, 10, 20, 30};
-    Tensor t1 = Tensor(MEMORY_CPU, TYPE_INT32, {4}, v1);
-
-    TensorMap map({{"t1", t1}});
-    EXPECT_TRUE(map.size() == 1);
-    // throw exception since the map doesn't have a key "t3".
-    EXPECT_THROW(map.getVal<int>("t3"), std::runtime_error);
-    EXPECT_TRUE(map.getVal<int>("t1") == 1);
-    EXPECT_TRUE(map.getVal<int>("t1", 3) == 1);
-
-    // map doesn't have t2 so return the default value 3.
-    EXPECT_TRUE(map.getVal<int>("t2", 3) == 3);
-
-    v1[0] += 1;  // update value.
-    EXPECT_TRUE(map.getVal<int>("t1") == 2);
-    EXPECT_TRUE(map.getVal<int>("t1", 3) == 2);
-
-    size_t index = 2;
-    EXPECT_TRUE(map.getValWithOffset<int>("t1", index) == 20);
-    EXPECT_TRUE(map.getValWithOffset<int>("t1", index, 3) == 20);
-    EXPECT_TRUE(map.getValWithOffset<int>("t2", index, 3) == 3);
-    delete[] v1;
-}
-
-TEST(TensorMapTest, GetTensorCorrectness)
-{
-    bool*  t1_val = new bool(true);
-    float* t2_val = new float[6]{1.0f, 1.1f, 1.2f, 1.3f, 1.4f, 1.5f};
-    Tensor t1     = Tensor{MEMORY_CPU, TYPE_BOOL, {1}, t1_val};
-    Tensor t2     = Tensor{MEMORY_CPU, TYPE_FP32, {3, 2}, t2_val};
-
-    int*   default_val    = new int[4]{0, 1, 2, 3};
-    Tensor default_tensor = Tensor{MEMORY_CPU, TYPE_INT32, {4}, default_val};
-
-    TensorMap map({{"t1", t1}, {"t2", t2}});
-    EXPECT_THROW(map.at("t3"), std::runtime_error);
-    EXPECT_EQUAL_TENSORS(map.at("t1", default_tensor), t1);
-    EXPECT_EQUAL_TENSORS(map.at("t2", default_tensor), t2);
-    EXPECT_EQUAL_TENSORS(map.at("t3", default_tensor), default_tensor);
-    EXPECT_EQUAL_TENSORS(map.at("t3", {}), Tensor());
-
-    delete[] default_val;
-    delete[] t2_val;
-    delete[] t1_val;
-}
-
-TEST(TensorMapTest, GetTensorCorrectnessAtConstTensorMap)
-{
-    bool*  t1_val = new bool(true);
-    float* t2_val = new float[6]{1.0f, 1.1f, 1.2f, 1.3f, 1.4f, 1.5f};
-    Tensor t1     = Tensor{MEMORY_CPU, TYPE_BOOL, {1}, t1_val};
-    Tensor t2     = Tensor{MEMORY_CPU, TYPE_FP32, {3, 2}, t2_val};
-
-    int*   default_val    = new int[4]{0, 1, 2, 3};
-    Tensor default_tensor = Tensor{MEMORY_CPU, TYPE_INT32, {4}, default_val};
-
-    const TensorMap map({{"t1", t1}, {"t2", t2}});
-    EXPECT_THROW(map.at("t3"), std::runtime_error);
-    EXPECT_EQUAL_TENSORS(map.at("t1", default_tensor), t1);
-    EXPECT_EQUAL_TENSORS(map.at("t2", default_tensor), t2);
-    EXPECT_EQUAL_TENSORS(map.at("t3", default_tensor), default_tensor);
-    EXPECT_EQUAL_TENSORS(map.at("t3", {}), Tensor());
-
-    delete[] default_val;
-    delete[] t2_val;
-    delete[] t1_val;
-}
-
-TEST(TensorTest, EmptyTensorMinMaxRaiseError)
-{
-    Tensor t1;
-    EXPECT_THROW(t1.min<int>(), std::runtime_error);
-    EXPECT_THROW(t1.max<int>(), std::runtime_error);
-
-    Tensor t2 = Tensor{MEMORY_CPU, TYPE_INT32, {}, nullptr};
-    EXPECT_THROW(t2.min<int>(), std::runtime_error);
-    EXPECT_THROW(t2.max<int>(), std::runtime_error);
-}
-
-using TensorTypes = testing::Types<int8_t, int, float>;
-
-template<typename T>
-class TensorFuncTest: public testing::Test {};
-
-TYPED_TEST_SUITE(TensorFuncTest, TensorTypes);
-
-TYPED_TEST(TensorFuncTest, MaxCorrectness)
-{
-    using T = TypeParam;
-
-    size_t size = 4;
-
-    T* v1 = new T[size]{T(1), T(2), T(3), T(4)};
-    T* v2 = new T[size]{T(4), T(3), T(2), T(1)};
-    T* v3 = new T[size]{T(1), T(2), T(4), T(3)};
-
-    Tensor t1 = Tensor(MEMORY_CPU, getTensorType<T>(), {size}, v1);
-    Tensor t2 = Tensor(MEMORY_CPU, getTensorType<T>(), {size}, v2);
-    Tensor t3 = Tensor(MEMORY_CPU, getTensorType<T>(), {size}, v3);
-
-    EXPECT_EQ(t1.max<T>(), T(4));
-    EXPECT_EQ(t2.max<T>(), T(4));
-    EXPECT_EQ(t3.max<T>(), T(4));
-
-    delete[] v1;
-    delete[] v2;
-    delete[] v3;
-}
-
-TYPED_TEST(TensorFuncTest, MinCorrectness)
-{
-    using T = TypeParam;
-
-    size_t size = 4;
-
-    T* v1 = new T[size]{T(1), T(2), T(3), T(4)};
-    T* v2 = new T[size]{T(4), T(3), T(2), T(1)};
-    T* v3 = new T[size]{T(1), T(2), T(4), T(3)};
-
-    Tensor t1 = Tensor(MEMORY_CPU, getTensorType<T>(), {size}, v1);
-    Tensor t2 = Tensor(MEMORY_CPU, getTensorType<T>(), {size}, v2);
-    Tensor t3 = Tensor(MEMORY_CPU, getTensorType<T>(), {size}, v3);
-
-    EXPECT_EQ(t1.min<T>(), T(1));
-    EXPECT_EQ(t2.min<T>(), T(1));
-    EXPECT_EQ(t3.min<T>(), T(1));
-
-    delete[] v1;
-    delete[] v2;
-    delete[] v3;
-}
-
-TYPED_TEST(TensorFuncTest, AnyCorrectness)
-{
-    using T = TypeParam;
-
-    T*     v = new T[4]{T(1), T(2), T(3), T(4)};
-    Tensor t = Tensor{MEMORY_CPU, getTensorType<T>(), {4}, v};
-    EXPECT_TRUE(t.any<T>(T(1)));
-    EXPECT_FALSE(t.any<T>(T(5)));
-    delete[] v;
-}
-
-TYPED_TEST(TensorFuncTest, AllCorrectness)
-{
-    using T = TypeParam;
-
-    constexpr size_t size = 4;
-    T*               v1   = new T[size]{T(1), T(1), T(1), T(1)};
-    T*               v2   = new T[size]{T(1), T(1), T(1), T(2)};
-    Tensor           t1   = Tensor{MEMORY_CPU, getTensorType<T>(), {size}, v1};
-    Tensor           t2   = Tensor{MEMORY_CPU, getTensorType<T>(), {size}, v2};
-    EXPECT_TRUE(t1.all<T>(T(1)));
-    EXPECT_FALSE(t2.all<T>(T(2)));
-    delete[] v1;
-    delete[] v2;
-}
-
-TYPED_TEST(TensorFuncTest, SliceCorrectness)
-{
-    using T = TypeParam;
-
-    constexpr int size = 12;
-    T*            v    = new T[size];
-    for (int i = 0; i < size; ++i) {
-        v[i] = i;
-    }
-
-    DataType dtype = getTensorType<T>();
-    Tensor   t1    = Tensor(MEMORY_CPU, dtype, {3, 4}, v);
-    Tensor   t2    = t1.slice({2, 4}, 4);
-
-    EXPECT_EQUAL_TENSORS(t2, Tensor(MEMORY_CPU, dtype, {2, 4}, &v[4]));
-    // An overflowed tensor throws an exception.
-    EXPECT_THROW(t1.slice({2, 4}, 5), std::runtime_error);
-
-    delete[] v;
-}
-
-}  // end of namespace
diff --git a/tests/csrc/unittests/unittest_utils.h b/tests/csrc/unittests/unittest_utils.h
deleted file mode 100644
index fe48ad160e57d988134932674981a63ec9e61099..0000000000000000000000000000000000000000
--- a/tests/csrc/unittests/unittest_utils.h
+++ /dev/null
@@ -1,208 +0,0 @@
-/*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.  All rights reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include <algorithm>  // min, max
-#include <assert.h>   // assert
-#include <float.h>    // FLT_MAX
-#include <iostream>   // snprintf
-#include <limits>     // numeric_limits
-#include <math.h>     // expf, log
-#include <stdlib.h>   // rand
-#include <string>     // string
-#include <vector>     // vector
-
-#include "src/turbomind/utils/cuda_utils.h"
-#include "src/turbomind/utils/memory_utils.h"
-#include "src/turbomind/utils/string_utils.h"
-
-#define PRINT_LIMIT 16
-#define EPSILON (1e-20)
-#define EPSILON_FP16 (1e-10)
-
-using namespace turbomind;
-
-class TestFailureError: public std::exception {
-private:
-    std::string msg_;
-
-public:
-    explicit TestFailureError() = default;
-    explicit TestFailureError(std::string name, std::string msg = "")
-    {
-        msg_ = fmtstr("TEST FAIL [%s] %s", name.c_str(), msg.c_str());
-    }
-    const char* what() const throw()
-    {
-        return msg_.c_str();
-    }
-};
-
-#define EXPECT_TRUE(cond)                                                                                              \
-    do {                                                                                                               \
-        if (!(cond)) {                                                                                                 \
-            TM_LOG_ERROR("TEST FAIL [%s]: %s at %s:%d", __func__, #cond, __FILE__, __LINE__);                          \
-            throw TestFailureError(__func__);                                                                          \
-        }                                                                                                              \
-    } while (false)
-
-#define EXPECT_FALSE(cond)                                                                                             \
-    do {                                                                                                               \
-        if (cond) {                                                                                                    \
-            TM_LOG_ERROR("TEST FAIL [%s]: %s at %s:%d", __func__, #cond, __FILE__, __LINE__);                          \
-            throw TestFailureError(__func__);                                                                          \
-        }                                                                                                              \
-    } while (false)
-
-bool almostEqual(float a, float b, float atol = 1e-5, float rtol = 1e-8)
-{
-    // Params: a = value to compare and b = reference
-    // This function follows implementation of numpy.isclose(), which checks
-    //   abs(a - b) <= (atol + rtol * abs(b)).
-    // Note that the inequality above is asymmetric where b is considered as
-    // a reference value. To account into both absolute/relative errors, it
-    // uses absolute tolerance and relative tolerance at the same time. The
-    // default values of atol and rtol borrowed from numpy.isclose(). For the
-    // case of nan value, the result will be true.
-    if (isnan(a) && isnan(b)) {
-        return true;
-    }
-    return fabs(a - b) <= (atol + rtol * fabs(b));
-}
-
-template<typename T>
-bool checkResult(std::string name, T* out, T* ref, size_t size, float atol, float rtol)
-{
-    size_t failures     = 0;
-    float  relative_gap = 0.0f;
-    ;
-
-    for (size_t i = 0; i < size; ++i) {
-        // The values for the output and the reference.
-        float a = (float)out[i];
-        float b = (float)ref[i];
-
-        bool ok = almostEqual(a, b, atol, rtol);
-        // Print the error.
-        if (!ok && failures < 4) {
-            TM_LOG_ERROR(">> invalid result for i=%lu:", i);
-            TM_LOG_ERROR(">>    found......: %10.6f", a);
-            TM_LOG_ERROR(">>    expected...: %10.6f", b);
-            TM_LOG_ERROR(">>    error......: %.6f", fabsf(a - b));
-            TM_LOG_ERROR(">>    tol........: %.6f", atol + rtol * fabs(b));
-        }
-        // Update the number of failures.
-        failures += ok ? 0 : 1;
-        // Update the relative gap.
-        relative_gap += fabsf(a - b) / (fabsf(b) + EPSILON);
-    }
-
-    relative_gap /= size;
-
-    // Allow not matched up to 1% elements.
-    size_t tol_failures = (size_t)(0.01 * size);
-    TM_LOG_INFO("check...%6s : %-50s (failures: %.2f%% atol: %.2e rtol: %.2e rel_gap: %.2e%%)",
-                failures <= tol_failures ? "....OK" : "FAILED",
-                name.c_str(),
-                100. * failures / size,
-                atol,
-                rtol,
-                100. * relative_gap);
-    return failures <= tol_failures;
-}
-
-template<typename T>
-bool checkResult(std::string name, T* out, T* ref, size_t size, bool device_out = true, bool device_ref = false)
-{
-    bool  is_fp32 = sizeof(T) == 4;
-    float atol    = is_fp32 ? 1e-4f : 1e-3f;
-    float rtol    = is_fp32 ? 1e-2f : 1e-1f;
-
-    T* h_out = nullptr;
-    if (device_out) {
-        h_out = new T[size];
-        cudaMemcpy(h_out, out, sizeof(T) * size, cudaMemcpyDeviceToHost);
-        out = h_out;
-    }
-    T* h_ref = nullptr;
-    if (device_ref) {
-        h_ref = new T[size];
-        cudaMemcpy(h_ref, ref, sizeof(T) * size, cudaMemcpyDeviceToHost);
-        ref = h_ref;
-    }
-    bool is_ok = checkResult(name, out, ref, size, atol, rtol);
-    if (h_out != nullptr) {
-        delete[] h_out;
-    }
-    if (h_ref != nullptr) {
-        delete[] h_ref;
-    }
-    return is_ok;
-}
-
-template<typename T>
-void initRandom(T* ptr, size_t size, float minval, float maxval)
-{
-    for (size_t i = 0; i < size; ++i) {
-        float val = static_cast<float>(rand()) / static_cast<float>(RAND_MAX);
-        val *= (maxval - minval);
-        ptr[i] = static_cast<T>(minval + val);
-    }
-}
-
-void initRandomInt(int* ptr, size_t size, int minval, int maxval)
-{
-    assert(minval < maxval);
-    int mod = maxval - minval;
-    for (size_t i = 0; i < size; ++i) {
-        ptr[i] = minval + rand() % mod;
-    }
-}
-
-template<typename T>
-void tile(T* x, int m, int n)
-{
-    for (int i = 1; i < m; ++i) {
-        for (int j = 0; j < n; ++j) {
-            x[i * n + j] = x[j];
-        }
-    }
-}
-
-template<typename T>
-void tile(T* dst, T* src, int m, int n)
-{
-    for (int i = 1; i < m; ++i) {
-        for (int j = 0; j < n; ++j) {
-            dst[i * n + j] = src[j];
-        }
-    }
-}
-
-#define HALF_FLT_MAX 65504.0f
-
-template<typename T>
-bool isHalf()
-{
-    return std::is_same<T, half>::value;
-}
-
-template<typename T>
-static inline void printMatrixWithLimit(T* ptr, int m, int k, int stride, bool is_device_ptr)
-{
-    printMatrix(ptr, std::min(PRINT_LIMIT, m), std::min(PRINT_LIMIT, k), stride, is_device_ptr);
-}
diff --git a/tests/pytorch/test_decode.py b/tests/pytorch/test_decode.py
deleted file mode 100644
index ab573c75b8e31255636a4abef5a68f28d2b4b45d..0000000000000000000000000000000000000000
--- a/tests/pytorch/test_decode.py
+++ /dev/null
@@ -1,68 +0,0 @@
-import os
-
-import numpy as np
-import torch
-from transformers import AutoTokenizer
-
-from lmdeploy.pytorch.decode import Engine, decode_single
-from lmdeploy.pytorch.model import accel_model, init_model
-
-
-def _test_decode_dist(model_path, prompt):
-    tokenizer = AutoTokenizer.from_pretrained(model_path)
-    tokenizer.pad_token_id = tokenizer.eos_token_id
-    tokenizer.padding_side = 'right'
-
-    inputs = tokenizer(prompt)
-    input_ids = inputs.input_ids
-
-    engine = Engine(model_path, tokenizer=tokenizer)
-    probs = engine.decode(input_ids, sort=False, max_bs=1, pad=True)
-
-    return probs
-
-
-def _test_decode_single(model_path, prompt):
-    model, tokenizer = init_model(model_path)
-    model = accel_model(model)
-    model = model.eval()
-
-    tokenizer.pad_token_id = tokenizer.eos_token_id
-    tokenizer.padding_side = 'right'
-
-    inputs = tokenizer(prompt, return_tensors='pt', padding=True)
-    input_ids = inputs.input_ids.cuda()
-    attention_mask = inputs.attention_mask.cuda()
-
-    probs: torch.Tensor = decode_single(model, input_ids, attention_mask)
-
-    return probs.numpy()
-
-
-def test_compare(output_outliers=True):
-    os.environ['CUDA_VISIBLE_DEVICES'] = '0,1,2,3'
-    os.environ['MKL_THREADING_LAYER'] = 'GNU'
-    # https://github.com/pytorch/pytorch/issues/37377#issuecomment-629529611
-
-    model_path = 'llama2/huggingface/llama-2-7b'
-
-    prompts = [
-        'I believe the meaning of life is to find your gift. The purpose of life is to give it away.',  # noqa: E501
-        'Simply put, the theory of relativity states that ',
-        'Building a website can be done in 10 simple steps:'
-    ]
-
-    p_single = _test_decode_single(model_path, prompts)
-    p_dist = _test_decode_dist(model_path, prompts)
-
-    rtol = 2.0e-2
-    atol = 2.0e-2
-    if output_outliers:
-        np.set_printoptions(linewidth=150, edgeitems=5)
-        failed = (abs(p_dist - p_single) > atol + rtol * abs(p_single))
-        idx = failed.nonzero()
-        print(f'Num outliers: {len(idx[0])}')
-        print(p_dist[idx])
-        print(p_single[idx])
-
-    assert np.allclose(p_dist, p_single, rtol=rtol, atol=atol)
diff --git a/tests/pytorch/test_dist.py b/tests/pytorch/test_dist.py
deleted file mode 100644
index f14c046d33b7a4d5c1e0862a87e214bfbd6a554a..0000000000000000000000000000000000000000
--- a/tests/pytorch/test_dist.py
+++ /dev/null
@@ -1,51 +0,0 @@
-import unittest
-
-import torch
-
-from lmdeploy.pytorch.dist import (get_rank, master_only,
-                                   master_only_and_broadcast_general,
-                                   master_only_and_broadcast_tensor)
-
-
-class SimpleTest(unittest.TestCase):
-
-    @master_only
-    def fake_input(self):
-        print(f'Evaluate fake input 1 on {get_rank()}')
-        return 'master only or none'
-
-    @master_only_and_broadcast_general
-    def fake_input21(self):
-        print(f'Evaluate fake input 21 (str) on {get_rank()}')
-        return 'master only and_broadcast'
-
-    @master_only_and_broadcast_general
-    def fake_input22(self):
-        print(f'Evaluate fake input 22 (cpu tensor) on {get_rank()}')
-        return torch.tensor([6, 66, 666])
-
-    @master_only_and_broadcast_tensor
-    def fake_input3(self):
-        print(f'Evaluate fake input 3 (gpu tensor) on {get_rank()}')
-        return torch.tensor([6, 66, 666]).cuda()
-
-    def test(self):
-        torch.distributed.init_process_group(backend='nccl')
-        rank = get_rank()
-        # unittest will discard --local_rank, thus set manually
-        torch.cuda.set_device(rank)
-
-        in1 = self.fake_input()
-        in21 = self.fake_input21()
-        in22 = self.fake_input22()
-        in3 = self.fake_input3(dtype=torch.long, size=(1, 3))
-
-        if rank == 0:
-            self.assertEqual(in1, 'master only or none')
-        else:
-            self.assertEqual(in1, None)
-
-        self.assertEqual(in21, 'master only and_broadcast')
-        self.assertTrue(torch.allclose(in22, torch.tensor([6, 66, 666])))
-        self.assertFalse(torch.allclose(in3.cpu(), torch.tensor([6, 6, 666])))
-        self.assertTrue(torch.allclose(in3.cpu(), torch.tensor([6, 66, 666])))
diff --git a/tests/pytorch/test_model.py b/tests/pytorch/test_model.py
deleted file mode 100644
index 595ae6879d5b1a8f64ad2ea9e812f8c62acd1b35..0000000000000000000000000000000000000000
--- a/tests/pytorch/test_model.py
+++ /dev/null
@@ -1,19 +0,0 @@
-from lmdeploy.pytorch.model import accel_model, init_model
-
-
-def test_init_model():
-    cprint = lambda x: print(f'\033[92m{x}\033[0m')  # noqa: E731
-
-    # Test llama2-7b
-    for model_path in ['llama2/huggingface/llama-2-7b', 'internlm-7b']:
-        model, tokenizer = init_model(model_path)
-        assert tokenizer.is_fast
-        cprint('llama2 on CPU')
-        print(model)
-        model1 = accel_model(model)
-        cprint('llama2 on GPU')
-        print(model1)
-        cprint('llama2 with kernel injection')
-        model2 = accel_model(model, accel='deepspeed')
-        assert 'DeepSpeedSelfAttention' in repr(model2)
-        assert 'DeepSpeedMLP' in repr(model2)
diff --git a/tests/pytorch/test_utils.py b/tests/pytorch/test_utils.py
deleted file mode 100644
index fe0d809b98d630be7946156115cd673783d35fbb..0000000000000000000000000000000000000000
--- a/tests/pytorch/test_utils.py
+++ /dev/null
@@ -1,44 +0,0 @@
-from lmdeploy.pytorch.utils import BasicStreamer, TerminalIO
-
-
-def test_terminal_io(monkeypatch):
-    import io
-    tio = TerminalIO()
-    inputs = 'hello\n\n'
-    # inputs = 'hello\n\n\x1B[A\n\n'
-    monkeypatch.setattr('sys.stdin', io.StringIO(inputs))
-    string = tio.input()
-    tio.output(string)
-    assert string == 'hello'
-    # string = tio.input()
-    # tio.output(string)
-    # assert string == 'hello'
-
-
-def test_basic_streamer():
-    output = []
-
-    def decode_func(value):
-        return value + 10
-
-    def output_func(value):
-        output.append(value)
-
-    streamer = BasicStreamer(decode_func, output_func)
-    for i in range(10):
-        streamer.put(i)
-        if i == 5:
-            streamer.end()
-    streamer.end()
-
-    assert output == [11, 12, 13, 14, 15, '\n', 17, 18, 19, '\n']
-
-    output.clear()
-    streamer = BasicStreamer(decode_func, output_func, skip_prompt=False)
-    for i in range(10):
-        streamer.put(i)
-        if i == 5:
-            streamer.end()
-    streamer.end()
-
-    assert output == [10, 11, 12, 13, 14, 15, '\n', 16, 17, 18, 19, '\n']
diff --git a/tests/test_lmdeploy/test_cli.py b/tests/test_lmdeploy/test_cli.py
deleted file mode 100644
index a41eab442e551256b1149c832a8896f88f7d4143..0000000000000000000000000000000000000000
--- a/tests/test_lmdeploy/test_cli.py
+++ /dev/null
@@ -1,51 +0,0 @@
-import inspect
-
-
-def compare_func(class_method, function):
-    """Compare if a class method has same arguments as a function."""
-
-    argspec_cls = inspect.getfullargspec(class_method)
-    argspec_func = inspect.getfullargspec(function)
-    assert argspec_cls.args[1:] == argspec_func.args
-    assert argspec_cls.defaults == argspec_func.defaults
-    assert argspec_cls.annotations == argspec_func.annotations
-
-
-def test_cli():
-
-    from lmdeploy.cli.cli import CLI
-    from lmdeploy.serve.turbomind.deploy import main as convert
-    compare_func(CLI.convert, convert)
-
-
-def test_subcli_chat():
-    from lmdeploy.cli.chat import SubCliChat
-    from lmdeploy.pytorch.chat import main as run_torch_model
-    from lmdeploy.turbomind.chat import main as run_turbomind_model
-
-    compare_func(SubCliChat.torch, run_torch_model)
-    compare_func(SubCliChat.turbomind, run_turbomind_model)
-
-
-def test_subcli_lite():
-    from lmdeploy.cli.lite import SubCliLite
-    from lmdeploy.lite.apis.auto_awq import auto_awq
-    from lmdeploy.lite.apis.calibrate import calibrate
-    from lmdeploy.lite.apis.kv_qparams import main as run_kv_qparams
-
-    compare_func(SubCliLite.auto_awq, auto_awq)
-    compare_func(SubCliLite.calibrate, calibrate)
-    compare_func(SubCliLite.kv_qparams, run_kv_qparams)
-
-
-def test_subcli_serve():
-    from lmdeploy.cli.serve import SubCliServe
-    from lmdeploy.serve.client import main as run_triton_client
-    from lmdeploy.serve.gradio.app import run as run_gradio
-    from lmdeploy.serve.openai.api_client import main as run_api_client
-    from lmdeploy.serve.openai.api_server import main as run_api_server
-
-    compare_func(SubCliServe.gradio, run_gradio)
-    compare_func(SubCliServe.api_server, run_api_server)
-    compare_func(SubCliServe.api_client, run_api_client)
-    compare_func(SubCliServe.triton_client, run_triton_client)
diff --git a/tests/test_lmdeploy/test_lite/test_quantization/test_utils/test_cal_qparams.py b/tests/test_lmdeploy/test_lite/test_quantization/test_utils/test_cal_qparams.py
deleted file mode 100644
index 81f91594738fcf7f4f8f113301d46bfe012f5a07..0000000000000000000000000000000000000000
--- a/tests/test_lmdeploy/test_lite/test_quantization/test_utils/test_cal_qparams.py
+++ /dev/null
@@ -1,45 +0,0 @@
-import torch
-
-from lmdeploy.lite.utils import (cal_qparams_per_channel_absmax,
-                                 cal_qparams_per_channel_minmax,
-                                 cal_qparams_per_group_absmax,
-                                 cal_qparams_per_group_minmax,
-                                 cal_qparams_per_tensor_absmax,
-                                 cal_qparams_per_tensor_minmax)
-
-
-def test_cal_qparams():
-    """Test function for quantization parameter calculation."""
-
-    # Create a dummy tensor
-    w = torch.randn(64, 64)
-
-    # Test per-channel absmax method
-    qparams = cal_qparams_per_channel_absmax(w, 8)
-    assert qparams.scales.shape == (64, 1)
-    assert qparams.zero_points is None
-
-    # Test per-channel minmax method
-    qparams = cal_qparams_per_channel_minmax(w, 8)
-    assert qparams.scales.shape == (64, 1)
-    assert qparams.zero_points.shape == (64, 1)
-
-    # Test per-group absmax method
-    qparams = cal_qparams_per_group_absmax(w, 8, 16)
-    assert qparams.scales.shape == (64, 4, 1)
-    assert qparams.zero_points is None
-
-    # Test per-group minmax method
-    qparams = cal_qparams_per_group_minmax(w, 8, 16)
-    assert qparams.scales.shape == (64, 4, 1)
-    assert qparams.zero_points.shape == (64, 4, 1)
-
-    # Test per-tensor absmax method
-    qparams = cal_qparams_per_tensor_absmax(w, 8)
-    assert qparams.scales.shape == ()
-    assert qparams.zero_points is None
-
-    # Test per-tensor minmax method
-    qparams = cal_qparams_per_tensor_minmax(w, 8)
-    assert qparams.scales.shape == ()
-    assert qparams.zero_points.shape == ()
diff --git a/tests/test_lmdeploy/test_model.py b/tests/test_lmdeploy/test_model.py
deleted file mode 100644
index d07e1f1f73baa92d8e456fa0ae3edbc541f428fe..0000000000000000000000000000000000000000
--- a/tests/test_lmdeploy/test_model.py
+++ /dev/null
@@ -1,205 +0,0 @@
-import pytest
-
-from lmdeploy.model import MODELS, SamplingParam
-
-
-def test_base_model():
-    model = MODELS.get('llama')()
-    assert model is not None
-    assert model.capability == 'chat'
-    assert model.get_prompt('test') == 'test'
-    assert model.stop_words is None
-
-    model = MODELS.get('internlm')(capability='completion')
-    assert model.capability == 'completion'
-    assert model.get_prompt('hi') == 'hi'
-    assert model.messages2prompt('test') == 'test'
-
-
-def test_vicuna():
-    prompt = 'hello, can u introduce yourself'
-    model = MODELS.get('vicuna')(capability='completion')
-    assert model.get_prompt(prompt, sequence_start=True) == prompt
-    assert model.get_prompt(prompt, sequence_start=False) == prompt
-    assert model.stop_words is None
-    assert model.system is not None
-
-    model = MODELS.get('vicuna')(capability='chat',
-                                 system='Provide answers in Python')
-    assert model.get_prompt(prompt, sequence_start=True) != prompt
-    assert model.get_prompt(prompt, sequence_start=False) != prompt
-    assert model.system == 'Provide answers in Python'
-
-    model = MODELS.get('vicuna')(capability='voice')
-    _prompt = None
-    with pytest.raises(AssertionError):
-        _prompt = model.get_prompt(prompt, sequence_start=True)
-    assert _prompt is None
-
-
-def test_internlm_chat():
-    prompt = 'hello, can u introduce yourself'
-    model = MODELS.get('internlm-chat-7b')(capability='completion')
-    assert model.get_prompt(prompt, sequence_start=True) == prompt
-    assert model.get_prompt(prompt, sequence_start=False) == prompt
-    assert model.stop_words is not None
-    assert model.system == ''
-    assert model.session_len == 2048
-
-    model = MODELS.get('internlm-chat-7b')(capability='chat',
-                                           system='Provide answers in Python')
-    assert model.get_prompt(prompt, sequence_start=True) != prompt
-    assert model.get_prompt(prompt, sequence_start=False) != prompt
-    assert model.system == 'Provide answers in Python'
-
-    model = MODELS.get('internlm-chat-7b')(capability='voice')
-    _prompt = None
-    with pytest.raises(AssertionError):
-        _prompt = model.get_prompt(prompt, sequence_start=True)
-    assert _prompt is None
-
-    model = MODELS.get('internlm-chat-7b-8k')()
-    assert model.session_len == 8192
-
-
-def test_baichuan():
-    prompt = 'hello, can u introduce yourself'
-    model = MODELS.get('baichuan-7b')(capability='completion')
-    assert model.get_prompt(prompt, sequence_start=True) == prompt
-    assert model.get_prompt(prompt, sequence_start=False) == prompt
-    assert model.stop_words is None
-    assert model.repetition_penalty == 1.1
-
-    model = MODELS.get('baichuan-7b')(capability='chat')
-    _prompt = model.get_prompt(prompt, sequence_start=True)
-    assert _prompt == prompt
-
-
-def test_llama2():
-    prompt = 'hello, can u introduce yourself'
-    model = MODELS.get('llama2')(capability='completion')
-    assert model.get_prompt(prompt, sequence_start=True) == prompt
-    assert model.get_prompt(prompt, sequence_start=False) == prompt
-    assert model.stop_words is None
-    assert model.default_sys_prompt is not None
-
-    model = MODELS.get('llama2')(capability='chat',
-                                 system='Provide answers in Python')
-    assert model.get_prompt(prompt, sequence_start=True) != prompt
-    assert model.get_prompt(prompt, sequence_start=False) != prompt
-    assert model.default_sys_prompt == 'Provide answers in Python'
-
-    model = MODELS.get('llama2')(capability='voice')
-    _prompt = None
-    with pytest.raises(AssertionError):
-        _prompt = model.get_prompt(prompt, sequence_start=True)
-    assert _prompt is None
-
-
-def test_qwen():
-    prompt = 'hello, can u introduce yourself'
-    model = MODELS.get('qwen-7b')(capability='completion')
-    assert model.get_prompt(prompt, sequence_start=True) == prompt
-    assert model.get_prompt(prompt, sequence_start=False) == prompt
-    assert model.stop_words is not None
-
-    model = MODELS.get('qwen-7b')(capability='chat')
-    assert model.get_prompt(prompt, sequence_start=True) != prompt
-    assert model.get_prompt(prompt, sequence_start=False) != prompt
-
-    model = MODELS.get('qwen-7b')(capability='voice')
-    _prompt = None
-    with pytest.raises(AssertionError):
-        _prompt = model.get_prompt(prompt, sequence_start=True)
-    assert _prompt is None
-
-
-def test_codellama_completion():
-    model = MODELS.get('codellama')(capability='completion')
-    prompt = """\
-import socket
-
-def ping_exponential_backoff(host: str):"""
-    assert model.get_prompt(prompt) == prompt
-    assert model.get_prompt(prompt, sequence_start=False) == prompt
-    assert model.stop_words is None
-
-
-def test_codellama_infilling():
-    model = MODELS.get('codellama')(capability='infilling')
-    prompt = '''def remove_non_ascii(s: str) -> str:
-    """ <FILL>
-    return result
-'''
-    _prompt = model.get_prompt(prompt)
-    assert _prompt.find('<FILL>') == -1
-    assert model.stop_words == ['<EOT>']
-
-    model = MODELS.get('codellama')(capability='infilling', suffix_first=True)
-    _prompt = model.get_prompt(prompt)
-    assert _prompt.find('<FILL>') == -1
-
-
-def test_codellama_chat():
-    model = MODELS.get('codellama')(capability='chat',
-                                    system='Provide answers in Python')
-    prompt = 'Write a function that computes the set of sums of all contiguous sublists of a given list.'  # noqa: E501
-    _prompt = model.get_prompt(prompt, sequence_start=True)
-    assert _prompt.find('Provide answers in Python') != -1
-
-    _prompt = model.get_prompt(prompt, sequence_start=False)
-    assert _prompt.find('Provide answers in Python') == -1
-    assert model.stop_words is None
-
-
-def test_codellama_python_specialist():
-    model = MODELS.get('codellama')(capability='python')
-    prompt = """
-    def remove_non_ascii(s: str) -> str:
-"""
-    assert model.get_prompt(prompt, sequence_start=True) == prompt
-    assert model.get_prompt(prompt, sequence_start=False) == prompt
-    assert model.stop_words is None
-
-
-def test_codellama_others():
-    model = None
-    with pytest.raises(AssertionError):
-        model = MODELS.get('codellama')(capability='java')
-    assert model is None
-
-
-def test_sampling_param():
-    model = MODELS.get('llama')()
-    default_sampling_param = SamplingParam()
-    assert model.sampling_param == default_sampling_param
-
-    model = MODELS.get('llama')(top_p=0.1, top_k=10)
-    assert model.sampling_param.top_p == 0.1 and \
-        model.sampling_param.top_k == 10
-    assert model.sampling_param.temperature == 0.8 and \
-        model.sampling_param.repetition_penalty == 1.0
-
-    model = MODELS.get('codellama')(capability='completion')
-    assert model.sampling_param.top_p == 0.9 and \
-        model.sampling_param.top_k is None and \
-        model.sampling_param.temperature == 0.2 and \
-        model.sampling_param.repetition_penalty == 1.0
-
-    model = MODELS.get('codellama')(capability='chat')
-    assert model.sampling_param.top_p == 0.95 and \
-        model.sampling_param.top_k is None and \
-        model.sampling_param.temperature == 0.2 and \
-        model.sampling_param.repetition_penalty == 1.0
-
-    model = MODELS.get('codellama')(capability='infilling')
-    assert model.sampling_param.top_p == 0.9 and \
-        model.sampling_param.top_k is None and \
-        model.sampling_param.temperature == 0.0 and \
-        model.sampling_param.repetition_penalty == 1.0
-
-    model = MODELS.get('codellama')(capability='python')
-    assert model.sampling_param.top_p == 0.9 and \
-        model.sampling_param.top_k is None and \
-        model.sampling_param.temperature == 0.2 and \
-        model.sampling_param.repetition_penalty == 1.0
diff --git a/tests/test_lmdeploy/test_tokenizer.py b/tests/test_lmdeploy/test_tokenizer.py
deleted file mode 100644
index ff7d8047b267ef82417c49d515fd1475df078695..0000000000000000000000000000000000000000
--- a/tests/test_lmdeploy/test_tokenizer.py
+++ /dev/null
@@ -1,24 +0,0 @@
-import pytest
-
-from lmdeploy.tokenizer import HuggingFaceTokenizer
-
-
-@pytest.mark.parametrize('model_path', [
-    'internlm/internlm-chat-7b', 'Qwen/Qwen-7B-Chat',
-    'baichuan-inc/Baichuan-7B', 'codellama/CodeLlama-7b-hf',
-    'upstage/SOLAR-0-70b-16bit'
-])
-@pytest.mark.parametrize(
-    'input', ['hi, this is a test 😆😆! ' * 5, '為什麼我還在用繁體字 😆😆 gg! ' * 5])
-def test_tokenizer(model_path, input):
-    tokenizer = HuggingFaceTokenizer(model_path)
-    encoded = tokenizer.encode(input)
-    output = ''
-    offset = 0
-    for i in range(1, len(encoded) + 1):
-        decoded = tokenizer.decode(encoded[:i], offset)
-        if decoded.endswith('�'):
-            continue
-        output += decoded
-        offset = i
-    assert input == output, 'input string should equal to output after enc-dec'